mirror of
https://github.com/InternLM/InternBootcamp.git
synced 2026-04-19 12:58:04 +00:00
121 lines
4.4 KiB
Python
Executable file
121 lines
4.4 KiB
Python
Executable file
import ast
|
|
import re
|
|
import json
|
|
import distance
|
|
|
|
from internbootcamp.bootcamp.base import Basebootcamp
|
|
from internbootcamp.libs.cipher import *
|
|
from bootcamp_utils import catch_print
|
|
|
|
cipher_env_dict = {}
|
|
for cipher_env in cipher_env_list:
|
|
temp = cipher_env()
|
|
cipher_env_dict[temp.cipher_name] = cipher_env
|
|
|
|
|
|
class Cipherbootcamp(Basebootcamp):
|
|
|
|
@staticmethod
|
|
def prompt_func(question_ori) -> str:
|
|
"""
|
|
Process the input_data and return the processed prompt.
|
|
|
|
Args:
|
|
question_ori: The question to be processed.
|
|
|
|
Returns:
|
|
str: The processed prompt.
|
|
"""
|
|
instruction_following = """
|
|
Let's think step by step and output the final answer with an example markdown formatting:
|
|
Final-answer: ```text
|
|
BTWTBIGTKTGBGIKHGTBTBEME
|
|
```
|
|
"""
|
|
prompt = question_ori + '\n' + instruction_following
|
|
return prompt
|
|
|
|
@staticmethod
|
|
def extract_output(output):
|
|
"""
|
|
Extract the output from the solution.
|
|
|
|
Args:
|
|
output: Model output to be processed.
|
|
|
|
Returns:
|
|
The processed output.
|
|
"""
|
|
pattern = pattern = r'```text\s*([\s\S]*?)\s*```'
|
|
matches = re.findall(pattern, output)
|
|
|
|
if matches:
|
|
# 获取 JSON 字符串
|
|
json_str = matches[-1]
|
|
# print('match?', json_str)
|
|
# print('solution generated? first lines', output[:200])
|
|
# print('solution generated? last lines', output[-200:])
|
|
# 替换单引号为双引号,将元组表示改为列表表示
|
|
json_str = json_str.replace("'", '"').replace("(", "[").replace(")", "]")
|
|
try:
|
|
# 解析 JSON 字符串为 Python 字典
|
|
result_dict = json.loads(json_str) if type(json_str) == dict else json_str
|
|
return result_dict
|
|
except json.JSONDecodeError as e:
|
|
# print(f"JSON 解析错误: {e}")
|
|
return json_str
|
|
else:
|
|
return None
|
|
|
|
@staticmethod
|
|
def _verify_correction(solution, identity)->bool:
|
|
|
|
input_str = identity.pop('input')
|
|
cipher_source = identity.pop('source_filename')
|
|
cipher_name = identity.pop('cipher_name')
|
|
extra_args = identity.pop('extra_args',{})
|
|
|
|
this_cipher_env = None
|
|
for cipher_env_name,cipher_env in cipher_env_dict.items():
|
|
if cipher_env_name == cipher_name:
|
|
this_cipher_env = cipher_env
|
|
break
|
|
if not this_cipher_env:
|
|
raise ValueError(f"cipher_source {cipher_source} is not supported")
|
|
else:
|
|
this_cipher = this_cipher_env()
|
|
|
|
# 将solution转为小写
|
|
solution = solution.lower()
|
|
|
|
|
|
if 'encode' in cipher_source:
|
|
this_cipher.generator(plaintext=input_str, **extra_args)
|
|
# ground_truth 小写
|
|
ground_truth = str(this_cipher.ciphertext).lower()
|
|
score = 1 - min(distance.levenshtein(solution, ground_truth) / len(ground_truth), 1.0)
|
|
elif 'decode' in cipher_source:
|
|
# if 'ASCII' in cipher_source:
|
|
# input_str = ast.literal_eval(input_str)
|
|
_,ground_truth = catch_print(this_cipher.decode,text=input_str, **extra_args)
|
|
ground_truth = str(ground_truth).lower()
|
|
score = 1 - min(distance.levenshtein(solution, ground_truth) / len(ground_truth), 1.0)
|
|
|
|
return score*score
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
|
candidate_str = "`text\nULTXAO2OCLC2W2\n```\n</think>\n\nFinal-answer: ```text\nBBBB\n```"
|
|
is_valid = Cipherbootcamp.verify_score(model_output=candidate_str,identity={
|
|
"id": 28,
|
|
"source_filename": "icl_with_rule_decode_Kor_rule25_SHACipher_cn.jsonl",
|
|
"cipher_name": "Kor_rule25_SHACipher",
|
|
"input": "26a37e1c9c2830e646b1163cfb",
|
|
"extra_args": {},
|
|
"output": "开始解密过程...\n加密的十六进制文本: 26a37e1c9c2830e646b1163cfb\n生成的SHA-256密钥: 73ef2a4edd7a7fbf07fd5f6faf99674dc0c25a025fd74c221f4c35849e5c0fb3\n十六进制转换为字节序列: b'&\\xa3~\\x1c\\x9c(0\\xe6F\\xb1\\x16<\\xfb'\n开始XOR解密...\n解密后的字节序列: b'ULTRAROYALIST'\n最终解密结果: ULTRAROYALIST\n",
|
|
"ground_truth": "ULTRAROYALIST"
|
|
}, short_penalty=False )
|
|
|
|
print("Is the candidate path valid?", is_valid)
|