mirror of
https://github.com/NousResearch/atropos.git
synced 2026-04-23 16:54:56 +00:00
[pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
This commit is contained in:
parent
22884d2bf7
commit
d84e3c70b7
16 changed files with 270 additions and 143 deletions
|
|
@ -36,7 +36,9 @@ class MMTBench(EvalBase):
|
|||
print(f"Warning: Full download failed, using streaming: {e}")
|
||||
# Fallback to streaming if full download fails (known column mismatch issue)
|
||||
try:
|
||||
dataset = load_dataset("OpenGVLab/MMT-Bench", split=split, streaming=True)
|
||||
dataset = load_dataset(
|
||||
"OpenGVLab/MMT-Bench", split=split, streaming=True
|
||||
)
|
||||
if max_samples:
|
||||
data = list(dataset.take(max_samples))
|
||||
else:
|
||||
|
|
@ -46,7 +48,9 @@ class MMTBench(EvalBase):
|
|||
data.append(item)
|
||||
if i % 5000 == 0 and i > 0:
|
||||
print(f" Streamed {i} samples...")
|
||||
print(f"Loaded {len(data)} examples from MMT-Bench ({split}, streaming)")
|
||||
print(
|
||||
f"Loaded {len(data)} examples from MMT-Bench ({split}, streaming)"
|
||||
)
|
||||
return data
|
||||
except Exception:
|
||||
raise ValueError(f"Could not load MMT-Bench dataset: {e}")
|
||||
|
|
@ -92,15 +96,19 @@ class MMTBench(EvalBase):
|
|||
|
||||
content = []
|
||||
if image_base64:
|
||||
content.append({
|
||||
"type": "image_url",
|
||||
"image_url": {"url": f"data:image/png;base64,{image_base64}"},
|
||||
})
|
||||
content.append(
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": f"data:image/png;base64,{image_base64}"},
|
||||
}
|
||||
)
|
||||
content.append({"type": "text", "text": prompt})
|
||||
|
||||
return [{"role": "user", "content": content}]
|
||||
|
||||
def extract_answer(self, response: str, num_choices: int) -> Tuple[Optional[str], str]:
|
||||
def extract_answer(
|
||||
self, response: str, num_choices: int
|
||||
) -> Tuple[Optional[str], str]:
|
||||
valid_letters = set(ascii_uppercase[:num_choices])
|
||||
|
||||
letter, method = extract_letter_from_answer_tag(response, valid_letters)
|
||||
|
|
@ -134,9 +142,12 @@ class MMTBench(EvalBase):
|
|||
answer = data_item.get("answer", "")
|
||||
|
||||
num_choices = sum(
|
||||
1 for letter in ascii_uppercase[:8]
|
||||
if letter in data_item and data_item[letter] is not None
|
||||
and isinstance(data_item[letter], str) and data_item[letter].strip()
|
||||
1
|
||||
for letter in ascii_uppercase[:8]
|
||||
if letter in data_item
|
||||
and data_item[letter] is not None
|
||||
and isinstance(data_item[letter], str)
|
||||
and data_item[letter].strip()
|
||||
)
|
||||
num_choices = max(num_choices, 4)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue