InternBootcamp/examples/data/InternBootcamp_eval/korPuzzleArrowMaze.jsonl
2025-05-23 15:27:15 +08:00

64 lines
19 KiB
JSON
Executable file
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{"data_source": "KorPuzzleArrowMaze", "prompt": "If C1 = r and C2 = ¬r ¬q, what is dispel(C1, C2)?\nProvide answer in format [[result]].\nFor multiple results use [[result1;result2]].\nFor empty clause write [[0]].", "ground_truth": {"problem_type": "compute_dispel", "C1": ["r"], "C2": ["¬r", "¬q"], "expected": "¬q"}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "If C1 = p ¬s and C2 = ¬p r, what is dispel(C1, C2)?\nProvide answer in format [[result]].\nFor multiple results use [[result1;result2]].\nFor empty clause write [[0]].", "ground_truth": {"problem_type": "compute_dispel", "C1": ["p", "¬s"], "C2": ["¬p", "r"], "expected": "r ¬s"}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "If C1 = q and C2 = ¬q s, what is dispel(C1, C2)?\nProvide answer in format [[result]].\nFor multiple results use [[result1;result2]].\nFor empty clause write [[0]].", "ground_truth": {"problem_type": "compute_dispel", "C1": ["q"], "C2": ["¬q", "s"], "expected": "s"}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Apply resolution algorithm to: (q)\nWhat is the output (Plausible/Implausible) and cycle count?\nAnswer format: [[output];[number]].", "ground_truth": {"problem_type": "algorithm_output", "cnf": [["q"]], "expected_output": "Plausible", "steps": 1}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Can clauses C1 = s p and C2 = ¬s r be resolved?\nA. Yes\nB. No\nAnswer format: [[option]].", "ground_truth": {"problem_type": "can_resolve", "C1": ["s", "p"], "C2": ["¬s", "r"], "expected": true}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Can clauses C1 = ¬s s q and C2 = q be resolved?\nA. Yes\nB. No\nAnswer format: [[option]].", "ground_truth": {"problem_type": "can_resolve", "C1": ["¬s", "s", "q"], "C2": ["q"], "expected": false}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Can clauses C1 = s ¬r and C2 = s be resolved?\nA. Yes\nB. No\nAnswer format: [[option]].", "ground_truth": {"problem_type": "can_resolve", "C1": ["s", "¬r"], "C2": ["s"], "expected": false}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Apply resolution algorithm to: (r s)\nWhat is the output (Plausible/Implausible) and cycle count?\nAnswer format: [[output];[number]].", "ground_truth": {"problem_type": "algorithm_output", "cnf": [["r", "s"]], "expected_output": "Plausible", "steps": 1}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Apply resolution algorithm to: (¬s)\nWhat is the output (Plausible/Implausible) and cycle count?\nAnswer format: [[output];[number]].", "ground_truth": {"problem_type": "algorithm_output", "cnf": [["¬s"]], "expected_output": "Plausible", "steps": 1}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Apply resolution algorithm to: (r ¬p q)\nWhat is the output (Plausible/Implausible) and cycle count?\nAnswer format: [[output];[number]].", "ground_truth": {"problem_type": "algorithm_output", "cnf": [["r", "¬p", "q"]], "expected_output": "Plausible", "steps": 1}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Apply resolution algorithm to: (p) ∧ (¬p)\nWhat is the output (Plausible/Implausible) and cycle count?\nAnswer format: [[output];[number]].", "ground_truth": {"problem_type": "algorithm_output", "cnf": [["p"], ["¬p"]], "expected_output": "Implausible", "steps": 1}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Can clauses C1 = q and C2 = ¬q be resolved?\nA. Yes\nB. No\nAnswer format: [[option]].", "ground_truth": {"problem_type": "can_resolve", "C1": ["q"], "C2": ["¬q"], "expected": true}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "If C1 = q r s and C2 = ¬q, what is dispel(C1, C2)?\nProvide answer in format [[result]].\nFor multiple results use [[result1;result2]].\nFor empty clause write [[0]].", "ground_truth": {"problem_type": "compute_dispel", "C1": ["q", "r", "s"], "C2": ["¬q"], "expected": "r s"}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Apply resolution algorithm to: (p) ∧ (¬p)\nWhat is the output (Plausible/Implausible) and cycle count?\nAnswer format: [[output];[number]].", "ground_truth": {"problem_type": "algorithm_output", "cnf": [["p"], ["¬p"]], "expected_output": "Implausible", "steps": 1}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Apply resolution algorithm to: (¬r)\nWhat is the output (Plausible/Implausible) and cycle count?\nAnswer format: [[output];[number]].", "ground_truth": {"problem_type": "algorithm_output", "cnf": [["¬r"]], "expected_output": "Plausible", "steps": 1}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Can clauses C1 = r and C2 = q ¬s be resolved?\nA. Yes\nB. No\nAnswer format: [[option]].", "ground_truth": {"problem_type": "can_resolve", "C1": ["r"], "C2": ["q", "¬s"], "expected": false}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "If C1 = s p and C2 = ¬s r, what is dispel(C1, C2)?\nProvide answer in format [[result]].\nFor multiple results use [[result1;result2]].\nFor empty clause write [[0]].", "ground_truth": {"problem_type": "compute_dispel", "C1": ["s", "p"], "C2": ["¬s", "r"], "expected": "r p"}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "If C1 = p r and C2 = ¬p r, what is dispel(C1, C2)?\nProvide answer in format [[result]].\nFor multiple results use [[result1;result2]].\nFor empty clause write [[0]].", "ground_truth": {"problem_type": "compute_dispel", "C1": ["p", "r"], "C2": ["¬p", "r"], "expected": "r"}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "If C1 = p ¬s and C2 = ¬p ¬r, what is dispel(C1, C2)?\nProvide answer in format [[result]].\nFor multiple results use [[result1;result2]].\nFor empty clause write [[0]].", "ground_truth": {"problem_type": "compute_dispel", "C1": ["p", "¬s"], "C2": ["¬p", "¬r"], "expected": "¬r ¬s"}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Can clauses C1 = ¬r and C2 = s p be resolved?\nA. Yes\nB. No\nAnswer format: [[option]].", "ground_truth": {"problem_type": "can_resolve", "C1": ["¬r"], "C2": ["s", "p"], "expected": false}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Apply resolution algorithm to: (p) ∧ (¬p)\nWhat is the output (Plausible/Implausible) and cycle count?\nAnswer format: [[output];[number]].", "ground_truth": {"problem_type": "algorithm_output", "cnf": [["p"], ["¬p"]], "expected_output": "Implausible", "steps": 1}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Can clauses C1 = r and C2 = p ¬p q be resolved?\nA. Yes\nB. No\nAnswer format: [[option]].", "ground_truth": {"problem_type": "can_resolve", "C1": ["r"], "C2": ["p", "¬p", "q"], "expected": false}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "If C1 = p s and C2 = ¬p, what is dispel(C1, C2)?\nProvide answer in format [[result]].\nFor multiple results use [[result1;result2]].\nFor empty clause write [[0]].", "ground_truth": {"problem_type": "compute_dispel", "C1": ["p", "s"], "C2": ["¬p"], "expected": "s"}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Can clauses C1 = ¬s and C2 = r ¬q be resolved?\nA. Yes\nB. No\nAnswer format: [[option]].", "ground_truth": {"problem_type": "can_resolve", "C1": ["¬s"], "C2": ["r", "¬q"], "expected": false}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Can clauses C1 = p r ¬q and C2 = ¬p be resolved?\nA. Yes\nB. No\nAnswer format: [[option]].", "ground_truth": {"problem_type": "can_resolve", "C1": ["p", "r", "¬q"], "C2": ["¬p"], "expected": true}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Can clauses C1 = q ¬p and C2 = ¬q r s be resolved?\nA. Yes\nB. No\nAnswer format: [[option]].", "ground_truth": {"problem_type": "can_resolve", "C1": ["q", "¬p"], "C2": ["¬q", "r", "s"], "expected": true}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "If C1 = r and C2 = ¬r, what is dispel(C1, C2)?\nProvide answer in format [[result]].\nFor multiple results use [[result1;result2]].\nFor empty clause write [[0]].", "ground_truth": {"problem_type": "compute_dispel", "C1": ["r"], "C2": ["¬r"], "expected": "0"}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Apply resolution algorithm to: (r s)\nWhat is the output (Plausible/Implausible) and cycle count?\nAnswer format: [[output];[number]].", "ground_truth": {"problem_type": "algorithm_output", "cnf": [["r", "s"]], "expected_output": "Plausible", "steps": 1}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "If C1 = s and C2 = ¬s r, what is dispel(C1, C2)?\nProvide answer in format [[result]].\nFor multiple results use [[result1;result2]].\nFor empty clause write [[0]].", "ground_truth": {"problem_type": "compute_dispel", "C1": ["s"], "C2": ["¬s", "r"], "expected": "r"}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Apply resolution algorithm to: (r)\nWhat is the output (Plausible/Implausible) and cycle count?\nAnswer format: [[output];[number]].", "ground_truth": {"problem_type": "algorithm_output", "cnf": [["r"]], "expected_output": "Plausible", "steps": 1}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "If C1 = r and C2 = ¬r, what is dispel(C1, C2)?\nProvide answer in format [[result]].\nFor multiple results use [[result1;result2]].\nFor empty clause write [[0]].", "ground_truth": {"problem_type": "compute_dispel", "C1": ["r"], "C2": ["¬r"], "expected": "0"}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Apply resolution algorithm to: (p) ∧ (¬p)\nWhat is the output (Plausible/Implausible) and cycle count?\nAnswer format: [[output];[number]].", "ground_truth": {"problem_type": "algorithm_output", "cnf": [["p"], ["¬p"]], "expected_output": "Implausible", "steps": 1}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Apply resolution algorithm to: (s q)\nWhat is the output (Plausible/Implausible) and cycle count?\nAnswer format: [[output];[number]].", "ground_truth": {"problem_type": "algorithm_output", "cnf": [["s", "q"]], "expected_output": "Plausible", "steps": 1}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Apply resolution algorithm to: (p) ∧ (¬p)\nWhat is the output (Plausible/Implausible) and cycle count?\nAnswer format: [[output];[number]].", "ground_truth": {"problem_type": "algorithm_output", "cnf": [["p"], ["¬p"]], "expected_output": "Implausible", "steps": 1}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "If C1 = r and C2 = ¬r ¬p, what is dispel(C1, C2)?\nProvide answer in format [[result]].\nFor multiple results use [[result1;result2]].\nFor empty clause write [[0]].", "ground_truth": {"problem_type": "compute_dispel", "C1": ["r"], "C2": ["¬r", "¬p"], "expected": "¬p"}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Apply resolution algorithm to: (p) ∧ (¬p)\nWhat is the output (Plausible/Implausible) and cycle count?\nAnswer format: [[output];[number]].", "ground_truth": {"problem_type": "algorithm_output", "cnf": [["p"], ["¬p"]], "expected_output": "Implausible", "steps": 1}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "If C1 = s and C2 = ¬s ¬q ¬r, what is dispel(C1, C2)?\nProvide answer in format [[result]].\nFor multiple results use [[result1;result2]].\nFor empty clause write [[0]].", "ground_truth": {"problem_type": "compute_dispel", "C1": ["s"], "C2": ["¬s", "¬q", "¬r"], "expected": "¬q ¬r"}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Can clauses C1 = p q and C2 = ¬s s ¬r be resolved?\nA. Yes\nB. No\nAnswer format: [[option]].", "ground_truth": {"problem_type": "can_resolve", "C1": ["p", "q"], "C2": ["¬s", "s", "¬r"], "expected": false}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "If C1 = r s p and C2 = ¬r ¬q, what is dispel(C1, C2)?\nProvide answer in format [[result]].\nFor multiple results use [[result1;result2]].\nFor empty clause write [[0]].", "ground_truth": {"problem_type": "compute_dispel", "C1": ["r", "s", "p"], "C2": ["¬r", "¬q"], "expected": "¬q s p"}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Can clauses C1 = r and C2 = ¬r be resolved?\nA. Yes\nB. No\nAnswer format: [[option]].", "ground_truth": {"problem_type": "can_resolve", "C1": ["r"], "C2": ["¬r"], "expected": true}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "If C1 = q and C2 = ¬q s ¬s, what is dispel(C1, C2)?\nProvide answer in format [[result]].\nFor multiple results use [[result1;result2]].\nFor empty clause write [[0]].", "ground_truth": {"problem_type": "compute_dispel", "C1": ["q"], "C2": ["¬q", "s", "¬s"], "expected": "s ¬s"}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "If C1 = q r ¬s and C2 = ¬q p ¬p, what is dispel(C1, C2)?\nProvide answer in format [[result]].\nFor multiple results use [[result1;result2]].\nFor empty clause write [[0]].", "ground_truth": {"problem_type": "compute_dispel", "C1": ["q", "r", "¬s"], "C2": ["¬q", "p", "¬p"], "expected": "r p ¬p ¬s"}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "If C1 = s p q and C2 = ¬s, what is dispel(C1, C2)?\nProvide answer in format [[result]].\nFor multiple results use [[result1;result2]].\nFor empty clause write [[0]].", "ground_truth": {"problem_type": "compute_dispel", "C1": ["s", "p", "q"], "C2": ["¬s"], "expected": "p q"}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Apply resolution algorithm to: (p) ∧ (¬p)\nWhat is the output (Plausible/Implausible) and cycle count?\nAnswer format: [[output];[number]].", "ground_truth": {"problem_type": "algorithm_output", "cnf": [["p"], ["¬p"]], "expected_output": "Implausible", "steps": 1}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Apply resolution algorithm to: (q ¬q ¬r)\nWhat is the output (Plausible/Implausible) and cycle count?\nAnswer format: [[output];[number]].", "ground_truth": {"problem_type": "algorithm_output", "cnf": [["q", "¬q", "¬r"]], "expected_output": "Plausible", "steps": 1}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Can clauses C1 = p and C2 = ¬p be resolved?\nA. Yes\nB. No\nAnswer format: [[option]].", "ground_truth": {"problem_type": "can_resolve", "C1": ["p"], "C2": ["¬p"], "expected": true}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "If C1 = r q and C2 = ¬r, what is dispel(C1, C2)?\nProvide answer in format [[result]].\nFor multiple results use [[result1;result2]].\nFor empty clause write [[0]].", "ground_truth": {"problem_type": "compute_dispel", "C1": ["r", "q"], "C2": ["¬r"], "expected": "q"}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "If C1 = p ¬s and C2 = ¬p q, what is dispel(C1, C2)?\nProvide answer in format [[result]].\nFor multiple results use [[result1;result2]].\nFor empty clause write [[0]].", "ground_truth": {"problem_type": "compute_dispel", "C1": ["p", "¬s"], "C2": ["¬p", "q"], "expected": "q ¬s"}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Apply resolution algorithm to: (p) ∧ (¬p)\nWhat is the output (Plausible/Implausible) and cycle count?\nAnswer format: [[output];[number]].", "ground_truth": {"problem_type": "algorithm_output", "cnf": [["p"], ["¬p"]], "expected_output": "Implausible", "steps": 1}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Can clauses C1 = s ¬s and C2 = ¬r be resolved?\nA. Yes\nB. No\nAnswer format: [[option]].", "ground_truth": {"problem_type": "can_resolve", "C1": ["s", "¬s"], "C2": ["¬r"], "expected": false}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Apply resolution algorithm to: (¬p q)\nWhat is the output (Plausible/Implausible) and cycle count?\nAnswer format: [[output];[number]].", "ground_truth": {"problem_type": "algorithm_output", "cnf": [["¬p", "q"]], "expected_output": "Plausible", "steps": 1}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "If C1 = q ¬p ¬s and C2 = ¬q r ¬r, what is dispel(C1, C2)?\nProvide answer in format [[result]].\nFor multiple results use [[result1;result2]].\nFor empty clause write [[0]].", "ground_truth": {"problem_type": "compute_dispel", "C1": ["q", "¬p", "¬s"], "C2": ["¬q", "r", "¬r"], "expected": "r ¬r ¬p ¬s"}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Apply resolution algorithm to: (p) ∧ (¬p)\nWhat is the output (Plausible/Implausible) and cycle count?\nAnswer format: [[output];[number]].", "ground_truth": {"problem_type": "algorithm_output", "cnf": [["p"], ["¬p"]], "expected_output": "Implausible", "steps": 1}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "If C1 = r and C2 = ¬r, what is dispel(C1, C2)?\nProvide answer in format [[result]].\nFor multiple results use [[result1;result2]].\nFor empty clause write [[0]].", "ground_truth": {"problem_type": "compute_dispel", "C1": ["r"], "C2": ["¬r"], "expected": "0"}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "If C1 = p ¬q and C2 = ¬p ¬q ¬r, what is dispel(C1, C2)?\nProvide answer in format [[result]].\nFor multiple results use [[result1;result2]].\nFor empty clause write [[0]].", "ground_truth": {"problem_type": "compute_dispel", "C1": ["p", "¬q"], "C2": ["¬p", "¬q", "¬r"], "expected": "¬q ¬r"}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "If C1 = r and C2 = ¬r q, what is dispel(C1, C2)?\nProvide answer in format [[result]].\nFor multiple results use [[result1;result2]].\nFor empty clause write [[0]].", "ground_truth": {"problem_type": "compute_dispel", "C1": ["r"], "C2": ["¬r", "q"], "expected": "q"}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "If C1 = p r and C2 = ¬p, what is dispel(C1, C2)?\nProvide answer in format [[result]].\nFor multiple results use [[result1;result2]].\nFor empty clause write [[0]].", "ground_truth": {"problem_type": "compute_dispel", "C1": ["p", "r"], "C2": ["¬p"], "expected": "r"}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "If C1 = p and C2 = ¬p ¬q, what is dispel(C1, C2)?\nProvide answer in format [[result]].\nFor multiple results use [[result1;result2]].\nFor empty clause write [[0]].", "ground_truth": {"problem_type": "compute_dispel", "C1": ["p"], "C2": ["¬p", "¬q"], "expected": "¬q"}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Can clauses C1 = ¬q p and C2 = ¬q be resolved?\nA. Yes\nB. No\nAnswer format: [[option]].", "ground_truth": {"problem_type": "can_resolve", "C1": ["¬q", "p"], "C2": ["¬q"], "expected": false}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Can clauses C1 = s ¬s and C2 = ¬r be resolved?\nA. Yes\nB. No\nAnswer format: [[option]].", "ground_truth": {"problem_type": "can_resolve", "C1": ["s", "¬s"], "C2": ["¬r"], "expected": false}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Can clauses C1 = ¬q and C2 = ¬s be resolved?\nA. Yes\nB. No\nAnswer format: [[option]].", "ground_truth": {"problem_type": "can_resolve", "C1": ["¬q"], "C2": ["¬s"], "expected": false}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Apply resolution algorithm to: (¬r)\nWhat is the output (Plausible/Implausible) and cycle count?\nAnswer format: [[output];[number]].", "ground_truth": {"problem_type": "algorithm_output", "cnf": [["¬r"]], "expected_output": "Plausible", "steps": 1}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Can clauses C1 = ¬s and C2 = q be resolved?\nA. Yes\nB. No\nAnswer format: [[option]].", "ground_truth": {"problem_type": "can_resolve", "C1": ["¬s"], "C2": ["q"], "expected": false}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Can clauses C1 = ¬s and C2 = r ¬q ¬p be resolved?\nA. Yes\nB. No\nAnswer format: [[option]].", "ground_truth": {"problem_type": "can_resolve", "C1": ["¬s"], "C2": ["r", "¬q", "¬p"], "expected": false}}