[feat] remove answer parsing since its already handled

This commit is contained in:
theblackcat102 2025-02-20 16:57:51 +08:00
parent 9a2e9e949e
commit 407b212326
2 changed files with 10 additions and 28 deletions

View file

@ -112,33 +112,26 @@ def test_cryptarithm_score_answer():
correct_answer_str = puzzle["answer"] # e.g. "A=1,B=7,..."
# 1) Missing '<answer>' => score should be 0.0
score = dataset.score_answer(answer=None, answer_str=correct_answer_str)
assert score == 0.0, f"Expected 0.0 when missing '<answer>' prefix, got {score}"
# score = dataset.score_answer(answer=None, answer_str=correct_answer_str)
# assert score == 0.0, f"Expected 0.0 when missing '<answer>' prefix, got {score}"
# 2) Correct mapping => expecting 1.0
user_answer = f"<answer>{correct_answer_str}</answer>"
score = dataset.score_answer(answer=user_answer, answer_str=correct_answer_str)
score = dataset.score_answer(answer=correct_answer_str, answer_str=correct_answer_str)
assert score == 1.0, f"Expected 1.0 for perfectly correct answer, got {score}"
# 2.1) Missing end tag => expecting 1.0
user_answer = f"<answer>{correct_answer_str}"
score = dataset.score_answer(answer=user_answer, answer_str=correct_answer_str)
assert score == 0.0, f"Expected 0.0 for missing end answer tag, got {score}"
# 3) Mismatch number of pairs => score should be 0.1
# For instance, drop the last pair
splitted = correct_answer_str.split(",")
mismatch_str = ",".join(splitted[:-1])
user_answer = f"<answer>{mismatch_str}</answer>"
score = dataset.score_answer(answer=user_answer, answer_str=correct_answer_str)
score = dataset.score_answer(answer=mismatch_str, answer_str=correct_answer_str)
assert score == 0.1, f"Expected 0.1 when #pairs does not match, got {score}"
# 4) Parse error => 0.15 (e.g. remove '=' from the first pair)
splitted = correct_answer_str.split(",")
splitted[0] = splitted[0].replace("=", "") # remove '=' in the first pair
parse_error_str = ",".join(splitted)
user_answer = f"<answer>{parse_error_str}</answer>"
score = dataset.score_answer(answer=user_answer, answer_str=correct_answer_str)
score = dataset.score_answer(answer=parse_error_str, answer_str=correct_answer_str)
assert score == 0.15, f"Expected 0.15 when parsing fails on at least one pair, got {score}"
# 5) Correct number of pairs, but duplicate alphabets => 0.3
@ -147,8 +140,7 @@ def test_cryptarithm_score_answer():
if len(splitted) > 1:
splitted[0] = splitted[1] # Duplicate the second pair in the first position
duplicates_str = ",".join(splitted)
user_answer = f"<answer>{duplicates_str}</answer>"
score = dataset.score_answer(answer=user_answer, answer_str=correct_answer_str)
score = dataset.score_answer(answer=duplicates_str, answer_str=correct_answer_str)
assert score == 0.3, f"Expected 0.3 if the final dict has fewer unique alphabets, got {score}"
# 6) Partial correctness => some correct, some incorrect
@ -171,8 +163,7 @@ def test_cryptarithm_score_answer():
i += 1
partial_answer_str = ",".join(new_pairs)
user_answer = f"<answer>{partial_answer_str}</answer>"
score = dataset.score_answer(answer=user_answer, answer_str=correct_answer_str)
score = dataset.score_answer(answer=partial_answer_str, answer_str=correct_answer_str)
# The formula is (num_correct / total) * 0.7 + 0.3
expected_score = (half / total) * 0.7 + 0.3