init-commit

This commit is contained in:
lilinyang 2025-05-23 15:27:15 +08:00
commit 18a552597a
3461 changed files with 1150579 additions and 0 deletions

View file

@ -0,0 +1,64 @@
{"data_source": "KorPuzzleArrowMaze", "prompt": "If C1 = r and C2 = ¬r ¬q, what is dispel(C1, C2)?\nProvide answer in format [[result]].\nFor multiple results use [[result1;result2]].\nFor empty clause write [[0]].", "ground_truth": {"problem_type": "compute_dispel", "C1": ["r"], "C2": ["¬r", "¬q"], "expected": "¬q"}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "If C1 = p ¬s and C2 = ¬p r, what is dispel(C1, C2)?\nProvide answer in format [[result]].\nFor multiple results use [[result1;result2]].\nFor empty clause write [[0]].", "ground_truth": {"problem_type": "compute_dispel", "C1": ["p", "¬s"], "C2": ["¬p", "r"], "expected": "r ¬s"}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "If C1 = q and C2 = ¬q s, what is dispel(C1, C2)?\nProvide answer in format [[result]].\nFor multiple results use [[result1;result2]].\nFor empty clause write [[0]].", "ground_truth": {"problem_type": "compute_dispel", "C1": ["q"], "C2": ["¬q", "s"], "expected": "s"}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Apply resolution algorithm to: (q)\nWhat is the output (Plausible/Implausible) and cycle count?\nAnswer format: [[output];[number]].", "ground_truth": {"problem_type": "algorithm_output", "cnf": [["q"]], "expected_output": "Plausible", "steps": 1}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Can clauses C1 = s p and C2 = ¬s r be resolved?\nA. Yes\nB. No\nAnswer format: [[option]].", "ground_truth": {"problem_type": "can_resolve", "C1": ["s", "p"], "C2": ["¬s", "r"], "expected": true}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Can clauses C1 = ¬s s q and C2 = q be resolved?\nA. Yes\nB. No\nAnswer format: [[option]].", "ground_truth": {"problem_type": "can_resolve", "C1": ["¬s", "s", "q"], "C2": ["q"], "expected": false}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Can clauses C1 = s ¬r and C2 = s be resolved?\nA. Yes\nB. No\nAnswer format: [[option]].", "ground_truth": {"problem_type": "can_resolve", "C1": ["s", "¬r"], "C2": ["s"], "expected": false}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Apply resolution algorithm to: (r s)\nWhat is the output (Plausible/Implausible) and cycle count?\nAnswer format: [[output];[number]].", "ground_truth": {"problem_type": "algorithm_output", "cnf": [["r", "s"]], "expected_output": "Plausible", "steps": 1}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Apply resolution algorithm to: (¬s)\nWhat is the output (Plausible/Implausible) and cycle count?\nAnswer format: [[output];[number]].", "ground_truth": {"problem_type": "algorithm_output", "cnf": [["¬s"]], "expected_output": "Plausible", "steps": 1}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Apply resolution algorithm to: (r ¬p q)\nWhat is the output (Plausible/Implausible) and cycle count?\nAnswer format: [[output];[number]].", "ground_truth": {"problem_type": "algorithm_output", "cnf": [["r", "¬p", "q"]], "expected_output": "Plausible", "steps": 1}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Apply resolution algorithm to: (p) ∧ (¬p)\nWhat is the output (Plausible/Implausible) and cycle count?\nAnswer format: [[output];[number]].", "ground_truth": {"problem_type": "algorithm_output", "cnf": [["p"], ["¬p"]], "expected_output": "Implausible", "steps": 1}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Can clauses C1 = q and C2 = ¬q be resolved?\nA. Yes\nB. No\nAnswer format: [[option]].", "ground_truth": {"problem_type": "can_resolve", "C1": ["q"], "C2": ["¬q"], "expected": true}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "If C1 = q r s and C2 = ¬q, what is dispel(C1, C2)?\nProvide answer in format [[result]].\nFor multiple results use [[result1;result2]].\nFor empty clause write [[0]].", "ground_truth": {"problem_type": "compute_dispel", "C1": ["q", "r", "s"], "C2": ["¬q"], "expected": "r s"}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Apply resolution algorithm to: (p) ∧ (¬p)\nWhat is the output (Plausible/Implausible) and cycle count?\nAnswer format: [[output];[number]].", "ground_truth": {"problem_type": "algorithm_output", "cnf": [["p"], ["¬p"]], "expected_output": "Implausible", "steps": 1}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Apply resolution algorithm to: (¬r)\nWhat is the output (Plausible/Implausible) and cycle count?\nAnswer format: [[output];[number]].", "ground_truth": {"problem_type": "algorithm_output", "cnf": [["¬r"]], "expected_output": "Plausible", "steps": 1}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Can clauses C1 = r and C2 = q ¬s be resolved?\nA. Yes\nB. No\nAnswer format: [[option]].", "ground_truth": {"problem_type": "can_resolve", "C1": ["r"], "C2": ["q", "¬s"], "expected": false}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "If C1 = s p and C2 = ¬s r, what is dispel(C1, C2)?\nProvide answer in format [[result]].\nFor multiple results use [[result1;result2]].\nFor empty clause write [[0]].", "ground_truth": {"problem_type": "compute_dispel", "C1": ["s", "p"], "C2": ["¬s", "r"], "expected": "r p"}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "If C1 = p r and C2 = ¬p r, what is dispel(C1, C2)?\nProvide answer in format [[result]].\nFor multiple results use [[result1;result2]].\nFor empty clause write [[0]].", "ground_truth": {"problem_type": "compute_dispel", "C1": ["p", "r"], "C2": ["¬p", "r"], "expected": "r"}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "If C1 = p ¬s and C2 = ¬p ¬r, what is dispel(C1, C2)?\nProvide answer in format [[result]].\nFor multiple results use [[result1;result2]].\nFor empty clause write [[0]].", "ground_truth": {"problem_type": "compute_dispel", "C1": ["p", "¬s"], "C2": ["¬p", "¬r"], "expected": "¬r ¬s"}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Can clauses C1 = ¬r and C2 = s p be resolved?\nA. Yes\nB. No\nAnswer format: [[option]].", "ground_truth": {"problem_type": "can_resolve", "C1": ["¬r"], "C2": ["s", "p"], "expected": false}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Apply resolution algorithm to: (p) ∧ (¬p)\nWhat is the output (Plausible/Implausible) and cycle count?\nAnswer format: [[output];[number]].", "ground_truth": {"problem_type": "algorithm_output", "cnf": [["p"], ["¬p"]], "expected_output": "Implausible", "steps": 1}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Can clauses C1 = r and C2 = p ¬p q be resolved?\nA. Yes\nB. No\nAnswer format: [[option]].", "ground_truth": {"problem_type": "can_resolve", "C1": ["r"], "C2": ["p", "¬p", "q"], "expected": false}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "If C1 = p s and C2 = ¬p, what is dispel(C1, C2)?\nProvide answer in format [[result]].\nFor multiple results use [[result1;result2]].\nFor empty clause write [[0]].", "ground_truth": {"problem_type": "compute_dispel", "C1": ["p", "s"], "C2": ["¬p"], "expected": "s"}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Can clauses C1 = ¬s and C2 = r ¬q be resolved?\nA. Yes\nB. No\nAnswer format: [[option]].", "ground_truth": {"problem_type": "can_resolve", "C1": ["¬s"], "C2": ["r", "¬q"], "expected": false}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Can clauses C1 = p r ¬q and C2 = ¬p be resolved?\nA. Yes\nB. No\nAnswer format: [[option]].", "ground_truth": {"problem_type": "can_resolve", "C1": ["p", "r", "¬q"], "C2": ["¬p"], "expected": true}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Can clauses C1 = q ¬p and C2 = ¬q r s be resolved?\nA. Yes\nB. No\nAnswer format: [[option]].", "ground_truth": {"problem_type": "can_resolve", "C1": ["q", "¬p"], "C2": ["¬q", "r", "s"], "expected": true}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "If C1 = r and C2 = ¬r, what is dispel(C1, C2)?\nProvide answer in format [[result]].\nFor multiple results use [[result1;result2]].\nFor empty clause write [[0]].", "ground_truth": {"problem_type": "compute_dispel", "C1": ["r"], "C2": ["¬r"], "expected": "0"}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Apply resolution algorithm to: (r s)\nWhat is the output (Plausible/Implausible) and cycle count?\nAnswer format: [[output];[number]].", "ground_truth": {"problem_type": "algorithm_output", "cnf": [["r", "s"]], "expected_output": "Plausible", "steps": 1}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "If C1 = s and C2 = ¬s r, what is dispel(C1, C2)?\nProvide answer in format [[result]].\nFor multiple results use [[result1;result2]].\nFor empty clause write [[0]].", "ground_truth": {"problem_type": "compute_dispel", "C1": ["s"], "C2": ["¬s", "r"], "expected": "r"}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Apply resolution algorithm to: (r)\nWhat is the output (Plausible/Implausible) and cycle count?\nAnswer format: [[output];[number]].", "ground_truth": {"problem_type": "algorithm_output", "cnf": [["r"]], "expected_output": "Plausible", "steps": 1}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "If C1 = r and C2 = ¬r, what is dispel(C1, C2)?\nProvide answer in format [[result]].\nFor multiple results use [[result1;result2]].\nFor empty clause write [[0]].", "ground_truth": {"problem_type": "compute_dispel", "C1": ["r"], "C2": ["¬r"], "expected": "0"}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Apply resolution algorithm to: (p) ∧ (¬p)\nWhat is the output (Plausible/Implausible) and cycle count?\nAnswer format: [[output];[number]].", "ground_truth": {"problem_type": "algorithm_output", "cnf": [["p"], ["¬p"]], "expected_output": "Implausible", "steps": 1}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Apply resolution algorithm to: (s q)\nWhat is the output (Plausible/Implausible) and cycle count?\nAnswer format: [[output];[number]].", "ground_truth": {"problem_type": "algorithm_output", "cnf": [["s", "q"]], "expected_output": "Plausible", "steps": 1}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Apply resolution algorithm to: (p) ∧ (¬p)\nWhat is the output (Plausible/Implausible) and cycle count?\nAnswer format: [[output];[number]].", "ground_truth": {"problem_type": "algorithm_output", "cnf": [["p"], ["¬p"]], "expected_output": "Implausible", "steps": 1}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "If C1 = r and C2 = ¬r ¬p, what is dispel(C1, C2)?\nProvide answer in format [[result]].\nFor multiple results use [[result1;result2]].\nFor empty clause write [[0]].", "ground_truth": {"problem_type": "compute_dispel", "C1": ["r"], "C2": ["¬r", "¬p"], "expected": "¬p"}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Apply resolution algorithm to: (p) ∧ (¬p)\nWhat is the output (Plausible/Implausible) and cycle count?\nAnswer format: [[output];[number]].", "ground_truth": {"problem_type": "algorithm_output", "cnf": [["p"], ["¬p"]], "expected_output": "Implausible", "steps": 1}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "If C1 = s and C2 = ¬s ¬q ¬r, what is dispel(C1, C2)?\nProvide answer in format [[result]].\nFor multiple results use [[result1;result2]].\nFor empty clause write [[0]].", "ground_truth": {"problem_type": "compute_dispel", "C1": ["s"], "C2": ["¬s", "¬q", "¬r"], "expected": "¬q ¬r"}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Can clauses C1 = p q and C2 = ¬s s ¬r be resolved?\nA. Yes\nB. No\nAnswer format: [[option]].", "ground_truth": {"problem_type": "can_resolve", "C1": ["p", "q"], "C2": ["¬s", "s", "¬r"], "expected": false}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "If C1 = r s p and C2 = ¬r ¬q, what is dispel(C1, C2)?\nProvide answer in format [[result]].\nFor multiple results use [[result1;result2]].\nFor empty clause write [[0]].", "ground_truth": {"problem_type": "compute_dispel", "C1": ["r", "s", "p"], "C2": ["¬r", "¬q"], "expected": "¬q s p"}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Can clauses C1 = r and C2 = ¬r be resolved?\nA. Yes\nB. No\nAnswer format: [[option]].", "ground_truth": {"problem_type": "can_resolve", "C1": ["r"], "C2": ["¬r"], "expected": true}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "If C1 = q and C2 = ¬q s ¬s, what is dispel(C1, C2)?\nProvide answer in format [[result]].\nFor multiple results use [[result1;result2]].\nFor empty clause write [[0]].", "ground_truth": {"problem_type": "compute_dispel", "C1": ["q"], "C2": ["¬q", "s", "¬s"], "expected": "s ¬s"}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "If C1 = q r ¬s and C2 = ¬q p ¬p, what is dispel(C1, C2)?\nProvide answer in format [[result]].\nFor multiple results use [[result1;result2]].\nFor empty clause write [[0]].", "ground_truth": {"problem_type": "compute_dispel", "C1": ["q", "r", "¬s"], "C2": ["¬q", "p", "¬p"], "expected": "r p ¬p ¬s"}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "If C1 = s p q and C2 = ¬s, what is dispel(C1, C2)?\nProvide answer in format [[result]].\nFor multiple results use [[result1;result2]].\nFor empty clause write [[0]].", "ground_truth": {"problem_type": "compute_dispel", "C1": ["s", "p", "q"], "C2": ["¬s"], "expected": "p q"}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Apply resolution algorithm to: (p) ∧ (¬p)\nWhat is the output (Plausible/Implausible) and cycle count?\nAnswer format: [[output];[number]].", "ground_truth": {"problem_type": "algorithm_output", "cnf": [["p"], ["¬p"]], "expected_output": "Implausible", "steps": 1}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Apply resolution algorithm to: (q ¬q ¬r)\nWhat is the output (Plausible/Implausible) and cycle count?\nAnswer format: [[output];[number]].", "ground_truth": {"problem_type": "algorithm_output", "cnf": [["q", "¬q", "¬r"]], "expected_output": "Plausible", "steps": 1}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Can clauses C1 = p and C2 = ¬p be resolved?\nA. Yes\nB. No\nAnswer format: [[option]].", "ground_truth": {"problem_type": "can_resolve", "C1": ["p"], "C2": ["¬p"], "expected": true}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "If C1 = r q and C2 = ¬r, what is dispel(C1, C2)?\nProvide answer in format [[result]].\nFor multiple results use [[result1;result2]].\nFor empty clause write [[0]].", "ground_truth": {"problem_type": "compute_dispel", "C1": ["r", "q"], "C2": ["¬r"], "expected": "q"}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "If C1 = p ¬s and C2 = ¬p q, what is dispel(C1, C2)?\nProvide answer in format [[result]].\nFor multiple results use [[result1;result2]].\nFor empty clause write [[0]].", "ground_truth": {"problem_type": "compute_dispel", "C1": ["p", "¬s"], "C2": ["¬p", "q"], "expected": "q ¬s"}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Apply resolution algorithm to: (p) ∧ (¬p)\nWhat is the output (Plausible/Implausible) and cycle count?\nAnswer format: [[output];[number]].", "ground_truth": {"problem_type": "algorithm_output", "cnf": [["p"], ["¬p"]], "expected_output": "Implausible", "steps": 1}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Can clauses C1 = s ¬s and C2 = ¬r be resolved?\nA. Yes\nB. No\nAnswer format: [[option]].", "ground_truth": {"problem_type": "can_resolve", "C1": ["s", "¬s"], "C2": ["¬r"], "expected": false}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Apply resolution algorithm to: (¬p q)\nWhat is the output (Plausible/Implausible) and cycle count?\nAnswer format: [[output];[number]].", "ground_truth": {"problem_type": "algorithm_output", "cnf": [["¬p", "q"]], "expected_output": "Plausible", "steps": 1}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "If C1 = q ¬p ¬s and C2 = ¬q r ¬r, what is dispel(C1, C2)?\nProvide answer in format [[result]].\nFor multiple results use [[result1;result2]].\nFor empty clause write [[0]].", "ground_truth": {"problem_type": "compute_dispel", "C1": ["q", "¬p", "¬s"], "C2": ["¬q", "r", "¬r"], "expected": "r ¬r ¬p ¬s"}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Apply resolution algorithm to: (p) ∧ (¬p)\nWhat is the output (Plausible/Implausible) and cycle count?\nAnswer format: [[output];[number]].", "ground_truth": {"problem_type": "algorithm_output", "cnf": [["p"], ["¬p"]], "expected_output": "Implausible", "steps": 1}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "If C1 = r and C2 = ¬r, what is dispel(C1, C2)?\nProvide answer in format [[result]].\nFor multiple results use [[result1;result2]].\nFor empty clause write [[0]].", "ground_truth": {"problem_type": "compute_dispel", "C1": ["r"], "C2": ["¬r"], "expected": "0"}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "If C1 = p ¬q and C2 = ¬p ¬q ¬r, what is dispel(C1, C2)?\nProvide answer in format [[result]].\nFor multiple results use [[result1;result2]].\nFor empty clause write [[0]].", "ground_truth": {"problem_type": "compute_dispel", "C1": ["p", "¬q"], "C2": ["¬p", "¬q", "¬r"], "expected": "¬q ¬r"}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "If C1 = r and C2 = ¬r q, what is dispel(C1, C2)?\nProvide answer in format [[result]].\nFor multiple results use [[result1;result2]].\nFor empty clause write [[0]].", "ground_truth": {"problem_type": "compute_dispel", "C1": ["r"], "C2": ["¬r", "q"], "expected": "q"}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "If C1 = p r and C2 = ¬p, what is dispel(C1, C2)?\nProvide answer in format [[result]].\nFor multiple results use [[result1;result2]].\nFor empty clause write [[0]].", "ground_truth": {"problem_type": "compute_dispel", "C1": ["p", "r"], "C2": ["¬p"], "expected": "r"}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "If C1 = p and C2 = ¬p ¬q, what is dispel(C1, C2)?\nProvide answer in format [[result]].\nFor multiple results use [[result1;result2]].\nFor empty clause write [[0]].", "ground_truth": {"problem_type": "compute_dispel", "C1": ["p"], "C2": ["¬p", "¬q"], "expected": "¬q"}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Can clauses C1 = ¬q p and C2 = ¬q be resolved?\nA. Yes\nB. No\nAnswer format: [[option]].", "ground_truth": {"problem_type": "can_resolve", "C1": ["¬q", "p"], "C2": ["¬q"], "expected": false}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Can clauses C1 = s ¬s and C2 = ¬r be resolved?\nA. Yes\nB. No\nAnswer format: [[option]].", "ground_truth": {"problem_type": "can_resolve", "C1": ["s", "¬s"], "C2": ["¬r"], "expected": false}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Can clauses C1 = ¬q and C2 = ¬s be resolved?\nA. Yes\nB. No\nAnswer format: [[option]].", "ground_truth": {"problem_type": "can_resolve", "C1": ["¬q"], "C2": ["¬s"], "expected": false}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Apply resolution algorithm to: (¬r)\nWhat is the output (Plausible/Implausible) and cycle count?\nAnswer format: [[output];[number]].", "ground_truth": {"problem_type": "algorithm_output", "cnf": [["¬r"]], "expected_output": "Plausible", "steps": 1}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Can clauses C1 = ¬s and C2 = q be resolved?\nA. Yes\nB. No\nAnswer format: [[option]].", "ground_truth": {"problem_type": "can_resolve", "C1": ["¬s"], "C2": ["q"], "expected": false}}
{"data_source": "KorPuzzleArrowMaze", "prompt": "Can clauses C1 = ¬s and C2 = r ¬q ¬p be resolved?\nA. Yes\nB. No\nAnswer format: [[option]].", "ground_truth": {"problem_type": "can_resolve", "C1": ["¬s"], "C2": ["r", "¬q", "¬p"], "expected": false}}