[pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
2026-04-23 16:54:56 +00:00 · 2026-01-12 23:29:15 +00:00 · 2026-01-12 23:29:15 +00:00 · d84e3c70b7
commit d84e3c70b7
parent 22884d2bf7
16 changed files with 270 additions and 143 deletions
--- a/environments/eval_environments/mmt_bench_environment.py
+++ b/environments/eval_environments/mmt_bench_environment.py
@ -36,7 +36,9 @@ class MMTBench(EvalBase):
            print(f"Warning: Full download failed, using streaming: {e}")
            # Fallback to streaming if full download fails (known column mismatch issue)
            try:
-                dataset = load_dataset("OpenGVLab/MMT-Bench", split=split, streaming=True)
+                dataset = load_dataset(
+                    "OpenGVLab/MMT-Bench", split=split, streaming=True
+                )
                if max_samples:
                    data = list(dataset.take(max_samples))
                else:
@ -46,7 +48,9 @@ class MMTBench(EvalBase):
                        data.append(item)
                        if i % 5000 == 0 and i > 0:
                            print(f"  Streamed {i} samples...")
-                print(f"Loaded {len(data)} examples from MMT-Bench ({split}, streaming)")
+                print(
+                    f"Loaded {len(data)} examples from MMT-Bench ({split}, streaming)"
+                )
                return data
            except Exception:
                raise ValueError(f"Could not load MMT-Bench dataset: {e}")
@ -92,15 +96,19 @@ class MMTBench(EvalBase):

        content = []
        if image_base64:
-            content.append({
-                "type": "image_url",
-                "image_url": {"url": f"data:image/png;base64,{image_base64}"},
-            })
+            content.append(
+                {
+                    "type": "image_url",
+                    "image_url": {"url": f"data:image/png;base64,{image_base64}"},
+                }
+            )
        content.append({"type": "text", "text": prompt})

        return [{"role": "user", "content": content}]

-    def extract_answer(self, response: str, num_choices: int) -> Tuple[Optional[str], str]:
+    def extract_answer(
+        self, response: str, num_choices: int
+    ) -> Tuple[Optional[str], str]:
        valid_letters = set(ascii_uppercase[:num_choices])

        letter, method = extract_letter_from_answer_tag(response, valid_letters)
@ -134,9 +142,12 @@ class MMTBench(EvalBase):
            answer = data_item.get("answer", "")

            num_choices = sum(
-                1 for letter in ascii_uppercase[:8]
-                if letter in data_item and data_item[letter] is not None
-                and isinstance(data_item[letter], str) and data_item[letter].strip()
+                1
+                for letter in ascii_uppercase[:8]
+                if letter in data_item
+                and data_item[letter] is not None
+                and isinstance(data_item[letter], str)
+                and data_item[letter].strip()
            )
            num_choices = max(num_choices, 4)