init-commit

2026-04-19 12:58:04 +00:00 · 2025-05-23 15:27:15 +08:00 · 2025-05-23 15:27:15 +08:00 · 18a552597a
commit 18a552597a
3461 changed files with 1150579 additions and 0 deletions
--- a/internbootcamp/bootcamp/cipher/cipher_default.py
+++ b/internbootcamp/bootcamp/cipher/cipher_default.py
@ -0,0 +1,121 @@
+import ast
+import re
+import json
+import distance
+
+from internbootcamp.bootcamp.base import Basebootcamp
+from internbootcamp.libs.cipher import *
+from bootcamp_utils import catch_print
+
+cipher_env_dict = {}
+for cipher_env in cipher_env_list:
+    temp = cipher_env()
+    cipher_env_dict[temp.cipher_name] = cipher_env
+
+
+class Cipherbootcamp(Basebootcamp):
+    
+    @staticmethod
+    def prompt_func(question_ori) -> str:
+        """
+        Process the input_data and return the processed prompt.
+        
+        Args:
+            question_ori: The question to be processed.
+        
+        Returns:
+            str: The processed prompt.
+        """
+        instruction_following = """
+Let's think step by step and output the final answer with an example markdown formatting:
+Final-answer: ```text
+BTWTBIGTKTGBGIKHGTBTBEME
+```
+"""
+        prompt = question_ori + '\n' + instruction_following
+        return prompt
+    
+    @staticmethod
+    def extract_output(output):
+        """
+        Extract the output from the solution.
+        
+        Args:
+            output: Model output to be processed.
+        
+        Returns:
+            The processed output.
+        """
+        pattern = pattern = r'```text\s*([\s\S]*?)\s*```'
+        matches = re.findall(pattern, output)
+
+        if matches:
+            # 获取 JSON 字符串
+            json_str = matches[-1]
+            # print('match?', json_str)
+            # print('solution generated? first lines', output[:200])
+            # print('solution generated? last lines', output[-200:])
+            # 替换单引号为双引号，将元组表示改为列表表示
+            json_str = json_str.replace("'", '"').replace("(", "[").replace(")", "]")
+            try:
+                # 解析 JSON 字符串为 Python 字典
+                result_dict = json.loads(json_str) if type(json_str) == dict else json_str
+                return result_dict
+            except json.JSONDecodeError as e:
+                # print(f"JSON 解析错误: {e}")
+                return json_str
+        else:
+            return None
+        
+    @staticmethod 
+    def _verify_correction(solution, identity)->bool:
+        
+        input_str = identity.pop('input')
+        cipher_source = identity.pop('source_filename')
+        cipher_name = identity.pop('cipher_name')
+        extra_args = identity.pop('extra_args',{})
+        
+        this_cipher_env = None
+        for cipher_env_name,cipher_env in cipher_env_dict.items():
+            if cipher_env_name == cipher_name:
+                this_cipher_env = cipher_env
+                break
+        if not this_cipher_env:
+            raise ValueError(f"cipher_source {cipher_source} is not supported")
+        else:
+            this_cipher = this_cipher_env()
+            
+        # 将solution转为小写
+        solution = solution.lower()
+        
+            
+        if 'encode' in cipher_source:
+            this_cipher.generator(plaintext=input_str, **extra_args)
+            # ground_truth 小写
+            ground_truth = str(this_cipher.ciphertext).lower()
+            score = 1 - min(distance.levenshtein(solution, ground_truth) / len(ground_truth), 1.0)
+        elif 'decode' in cipher_source:
+            # if 'ASCII' in cipher_source:
+            #     input_str = ast.literal_eval(input_str)
+            _,ground_truth = catch_print(this_cipher.decode,text=input_str, **extra_args)
+            ground_truth = str(ground_truth).lower()
+            score = 1 - min(distance.levenshtein(solution, ground_truth) / len(ground_truth), 1.0)
+            
+        return score*score
+
+
+if __name__ == "__main__":
+
+
+    candidate_str = "`text\nULTXAO2OCLC2W2\n```\n</think>\n\nFinal-answer: ```text\nBBBB\n```"
+    is_valid = Cipherbootcamp.verify_score(model_output=candidate_str,identity={
+    "id": 28,
+    "source_filename": "icl_with_rule_decode_Kor_rule25_SHACipher_cn.jsonl",
+    "cipher_name": "Kor_rule25_SHACipher",
+    "input": "26a37e1c9c2830e646b1163cfb",
+    "extra_args": {},
+    "output": "开始解密过程...\n加密的十六进制文本: 26a37e1c9c2830e646b1163cfb\n生成的SHA-256密钥: 73ef2a4edd7a7fbf07fd5f6faf99674dc0c25a025fd74c221f4c35849e5c0fb3\n十六进制转换为字节序列: b'&\\xa3~\\x1c\\x9c(0\\xe6F\\xb1\\x16<\\xfb'\n开始XOR解密...\n解密后的字节序列: b'ULTRAROYALIST'\n最终解密结果: ULTRAROYALIST\n",
+    "ground_truth": "ULTRAROYALIST"
+  }, short_penalty=False )
+    
+    print("Is the candidate path valid?", is_valid)