mirror of
https://github.com/InternLM/InternBootcamp.git
synced 2026-04-19 12:58:04 +00:00
init-commit
This commit is contained in:
commit
18a552597a
3461 changed files with 1150579 additions and 0 deletions
121
internbootcamp/bootcamp/cipher/cipher_default.py
Executable file
121
internbootcamp/bootcamp/cipher/cipher_default.py
Executable file
|
|
@ -0,0 +1,121 @@
|
|||
import ast
|
||||
import re
|
||||
import json
|
||||
import distance
|
||||
|
||||
from internbootcamp.bootcamp.base import Basebootcamp
|
||||
from internbootcamp.libs.cipher import *
|
||||
from bootcamp_utils import catch_print
|
||||
|
||||
cipher_env_dict = {}
|
||||
for cipher_env in cipher_env_list:
|
||||
temp = cipher_env()
|
||||
cipher_env_dict[temp.cipher_name] = cipher_env
|
||||
|
||||
|
||||
class Cipherbootcamp(Basebootcamp):
|
||||
|
||||
@staticmethod
|
||||
def prompt_func(question_ori) -> str:
|
||||
"""
|
||||
Process the input_data and return the processed prompt.
|
||||
|
||||
Args:
|
||||
question_ori: The question to be processed.
|
||||
|
||||
Returns:
|
||||
str: The processed prompt.
|
||||
"""
|
||||
instruction_following = """
|
||||
Let's think step by step and output the final answer with an example markdown formatting:
|
||||
Final-answer: ```text
|
||||
BTWTBIGTKTGBGIKHGTBTBEME
|
||||
```
|
||||
"""
|
||||
prompt = question_ori + '\n' + instruction_following
|
||||
return prompt
|
||||
|
||||
@staticmethod
|
||||
def extract_output(output):
|
||||
"""
|
||||
Extract the output from the solution.
|
||||
|
||||
Args:
|
||||
output: Model output to be processed.
|
||||
|
||||
Returns:
|
||||
The processed output.
|
||||
"""
|
||||
pattern = pattern = r'```text\s*([\s\S]*?)\s*```'
|
||||
matches = re.findall(pattern, output)
|
||||
|
||||
if matches:
|
||||
# 获取 JSON 字符串
|
||||
json_str = matches[-1]
|
||||
# print('match?', json_str)
|
||||
# print('solution generated? first lines', output[:200])
|
||||
# print('solution generated? last lines', output[-200:])
|
||||
# 替换单引号为双引号,将元组表示改为列表表示
|
||||
json_str = json_str.replace("'", '"').replace("(", "[").replace(")", "]")
|
||||
try:
|
||||
# 解析 JSON 字符串为 Python 字典
|
||||
result_dict = json.loads(json_str) if type(json_str) == dict else json_str
|
||||
return result_dict
|
||||
except json.JSONDecodeError as e:
|
||||
# print(f"JSON 解析错误: {e}")
|
||||
return json_str
|
||||
else:
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _verify_correction(solution, identity)->bool:
|
||||
|
||||
input_str = identity.pop('input')
|
||||
cipher_source = identity.pop('source_filename')
|
||||
cipher_name = identity.pop('cipher_name')
|
||||
extra_args = identity.pop('extra_args',{})
|
||||
|
||||
this_cipher_env = None
|
||||
for cipher_env_name,cipher_env in cipher_env_dict.items():
|
||||
if cipher_env_name == cipher_name:
|
||||
this_cipher_env = cipher_env
|
||||
break
|
||||
if not this_cipher_env:
|
||||
raise ValueError(f"cipher_source {cipher_source} is not supported")
|
||||
else:
|
||||
this_cipher = this_cipher_env()
|
||||
|
||||
# 将solution转为小写
|
||||
solution = solution.lower()
|
||||
|
||||
|
||||
if 'encode' in cipher_source:
|
||||
this_cipher.generator(plaintext=input_str, **extra_args)
|
||||
# ground_truth 小写
|
||||
ground_truth = str(this_cipher.ciphertext).lower()
|
||||
score = 1 - min(distance.levenshtein(solution, ground_truth) / len(ground_truth), 1.0)
|
||||
elif 'decode' in cipher_source:
|
||||
# if 'ASCII' in cipher_source:
|
||||
# input_str = ast.literal_eval(input_str)
|
||||
_,ground_truth = catch_print(this_cipher.decode,text=input_str, **extra_args)
|
||||
ground_truth = str(ground_truth).lower()
|
||||
score = 1 - min(distance.levenshtein(solution, ground_truth) / len(ground_truth), 1.0)
|
||||
|
||||
return score*score
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
|
||||
candidate_str = "`text\nULTXAO2OCLC2W2\n```\n</think>\n\nFinal-answer: ```text\nBBBB\n```"
|
||||
is_valid = Cipherbootcamp.verify_score(model_output=candidate_str,identity={
|
||||
"id": 28,
|
||||
"source_filename": "icl_with_rule_decode_Kor_rule25_SHACipher_cn.jsonl",
|
||||
"cipher_name": "Kor_rule25_SHACipher",
|
||||
"input": "26a37e1c9c2830e646b1163cfb",
|
||||
"extra_args": {},
|
||||
"output": "开始解密过程...\n加密的十六进制文本: 26a37e1c9c2830e646b1163cfb\n生成的SHA-256密钥: 73ef2a4edd7a7fbf07fd5f6faf99674dc0c25a025fd74c221f4c35849e5c0fb3\n十六进制转换为字节序列: b'&\\xa3~\\x1c\\x9c(0\\xe6F\\xb1\\x16<\\xfb'\n开始XOR解密...\n解密后的字节序列: b'ULTRAROYALIST'\n最终解密结果: ULTRAROYALIST\n",
|
||||
"ground_truth": "ULTRAROYALIST"
|
||||
}, short_penalty=False )
|
||||
|
||||
print("Is the candidate path valid?", is_valid)
|
||||
Loading…
Add table
Add a link
Reference in a new issue