init-commit

2026-04-19 12:58:04 +00:00 · 2025-05-23 15:27:15 +08:00 · 2025-05-23 15:27:15 +08:00 · 18a552597a
commit 18a552597a
3461 changed files with 1150579 additions and 0 deletions
--- a/internbootcamp/libs/bbeh_multistep_arithmetic/bbeh_arithmetic_generator.py
+++ b/internbootcamp/libs/bbeh_multistep_arithmetic/bbeh_arithmetic_generator.py
@ -0,0 +1,205 @@
+import random
+import math
+import logging
+from typing import Dict, Tuple
+
+
+class BBEHArithmeticGenerator:
+    def __init__(self):
+        self.operators = ['+', '-', '*', '/', '><', ';', '@', '<>', '[]', '#', '!', '~', '&', ':', '][']
+        self.precedence = {
+            '+': 1, '-': 1,
+            '*': 2, '/': 2,
+            '><': 3, ';': 3,
+            '@': 4, '<>': 4, '[]': 4,
+            '#': 5, '!': 5,
+            '~': 6, '&': 6,
+            ':': 7, '][': 7
+        }
+        self.number_words = {
+            'one': 1, 'two': 2, 'three': 3, 'four': 4, 'five': 5,
+            'six': 6, 'seven': 7, 'eight': 8, 'nine': 9, 'ten': 10
+        }
+        self.logger = logging.getLogger(__name__)
+        self.max_value = 1.7976931348623157e+308
+        self.min_value = -1.7976931348623157e+308
+        self.epsilon = 1e-10
+
+    def generate_case(self, min_depth: int = 3, max_depth: int = 6,
+                      max_length: int = 50, difficulty: str = "medium") -> Dict:
+        """生成一个算术表达式案例"""
+        # 根据难度调整参数
+        if difficulty == "easy":
+            min_depth = 2
+            max_depth = 4
+            max_length = 30
+        elif difficulty == "hard":
+            min_depth = 4
+            max_depth = 8
+            max_length = 70
+
+        expression, answer = self._generate_expression(min_depth, max_depth, max_length)
+
+        return {
+            "expression": expression,
+            "answer": answer,
+            "difficulty": difficulty
+        }
+
+    def _generate_subexpression(self, depth: int, current_length: int,
+                                max_length: int) -> Tuple[str, int]:
+        """生成子表达式"""
+        if depth == 0 or current_length >= max_length:
+            if random.random() < 0.3:
+                word = random.choice(list(self.number_words.keys()))
+                return word, current_length + 1
+            return str(random.randint(-100, 100)), current_length + 1
+
+        choice = random.random()
+
+        if choice < 0.4:
+            op = random.choice(self.operators)
+            left, left_length = self._generate_subexpression(depth - 1, current_length, max_length)
+            right, right_length = self._generate_subexpression(depth - 1, left_length + 1, max_length)
+            return f"({left} {op} {right})", right_length + 3
+        else:
+            op = random.choice(self.operators)
+            left, left_length = self._generate_subexpression(depth - 1, current_length, max_length)
+            right, right_length = self._generate_subexpression(depth - 1, left_length + 1, max_length)
+            return f"({left} {op} {right})", right_length + 3
+
+    def _generate_expression(self, min_depth: int, max_depth: int,
+                             max_length: int) -> Tuple[str, float]:
+        """生成完整表达式及其答案"""
+        from libs.bbeh_multistep_arithmetic.bbeh_arithmetic_solver import BBEHArithmeticSolver
+        solver = BBEHArithmeticSolver()
+
+        while True:
+            try:
+                expression, _ = self._generate_subexpression(
+                    random.randint(min_depth, max_depth), 0, max_length
+                )
+                answer = solver.solve(expression)
+
+                # 验证答案是否有效
+                if not math.isinf(answer) and not math.isnan(answer):
+                    return expression, answer
+            except Exception as e:
+                self.logger.warning(f"Expression generation failed: {str(e)}")
+                continue
+
+    def _validate_expression(self, tokens):
+        """验证表达式的有效性"""
+        stack = []
+        for token in tokens:
+            if token == '(':
+                stack.append(token)
+            elif token == ')':
+                if not stack:
+                    return False
+                stack.pop()
+        return len(stack) == 0
+
+    def _validate_expression_structure(self, tokens):
+        """验证表达式的结构是否合法"""
+        stack = []
+        operand_count = 0
+        operator_count = 0
+
+        for token in tokens:
+            if token == '(':
+                stack.append(token)
+            elif token == ')':
+                if not stack:
+                    return False
+                stack.pop()
+            elif token in self.operators:
+                operator_count += 1
+            else:
+                operand_count += 1
+
+        # 检查括号是否匹配
+        if stack:
+            return False
+
+        # 检查操作数和操作符的数量关系
+        return operand_count == operator_count + 1
+
+    def _tokenize(self, expr):
+        """将表达式转换为token列表"""
+        try:
+            tokens = []
+            i = 0
+            while i < len(expr):
+                char = expr[i]
+
+                # 处理空格
+                if char.isspace():
+                    i += 1
+                    continue
+
+                # 处理数字单词
+                if char.isalpha() and (i == 0 or not expr[i - 1].isdigit()):
+                    word = ''
+                    while i < len(expr) and expr[i].isalpha():
+                        word += expr[i]
+                        i += 1
+                    if word in self.number_words:
+                        tokens.append(str(self.number_words[word]))
+                    else:
+                        raise ValueError(f"Unknown word: {word}")
+                    continue
+
+                # 处理数字（包括科学记数法）
+                if char.isdigit() or (char == '-' and (not tokens or tokens[-1] in self.operators + ['('])):
+                    num = char
+                    i += 1
+                    while i < len(expr) and (expr[i].isdigit() or expr[i] == '.'):
+                        num += expr[i]
+                        i += 1
+
+                    if i < len(expr) and (expr[i] == 'e' or expr[i] == 'E'):
+                        num += expr[i]
+                        i += 1
+                        if i < len(expr) and (expr[i] == '+' or expr[i] == '-'):
+                            num += expr[i]
+                            i += 1
+                        while i < len(expr) and expr[i].isdigit():
+                            num += expr[i]
+                            i += 1
+
+                    try:
+                        float(num)
+                        tokens.append(num)
+                    except ValueError:
+                        raise ValueError(f"Invalid number format: {num}")
+                    continue
+
+                # 处理括号
+                if char in '()':
+                    tokens.append(char)
+                    i += 1
+                    continue
+
+                # 处理运算符
+                if i < len(expr):
+                    max_op_len = 3
+                    matched = False
+                    for length in range(max_op_len, 0, -1):
+                        if i + length <= len(expr):
+                            potential_op = expr[i:i + length]
+                            if potential_op in self.operators:
+                                tokens.append(potential_op)
+                                i += length
+                                matched = True
+                                break
+                    if not matched:
+                        raise ValueError(f"Invalid character: {char}")
+
+            if not self._validate_expression(tokens):
+                raise ValueError("Invalid expression: Mismatched parentheses")
+
+            return tokens
+        except Exception as e:
+            self.logger.error(f"Error in tokenization: {str(e)}")
+            raise
--- a/internbootcamp/libs/bbeh_multistep_arithmetic/bbeh_arithmetic_solver.py
+++ b/internbootcamp/libs/bbeh_multistep_arithmetic/bbeh_arithmetic_solver.py
@ -0,0 +1,328 @@
+import math
+import logging
+from typing import Dict, List, Union, Optional
+
+class BBEHArithmeticSolver:
+    def __init__(self):
+        self.operators = ['+', '-', '*', '/', '><', ';', '@', '<>', '[]', '#', '!', '~', '&', ':', '][']
+        self.precedence = {
+            '+': 1, '-': 1,
+            '*': 2, '/': 2,
+            '><': 3, ';': 3,
+            '@': 4, '<>': 4, '[]': 4,
+            '#': 5, '!': 5,
+            '~': 6, '&': 6,
+            ':': 7, '][': 7
+        }
+        self.number_words = {
+            'one': 1, 'two': 2, 'three': 3, 'four': 4, 'five': 5,
+            'six': 6, 'seven': 7, 'eight': 8, 'nine': 9, 'ten': 10
+        }
+        self.logger = logging.getLogger(__name__)
+        self.epsilon = 1e-10
+        self.max_value = 1.7976931348623157e+308
+        self.min_value = -1.7976931348623157e+308
+
+    def solve(self, expression: str) -> float:
+        """求解算术表达式"""
+        try:
+            tokens = self._tokenize(expression)
+            if not tokens:
+                return float('inf')
+
+            if not self._validate_expression_structure(tokens):
+                return float('inf')
+
+            return self._evaluate_expression(tokens)
+        except Exception as e:
+            self.logger.error(f"Error solving expression: {str(e)}")
+            return float('inf')
+
+    def _tokenize(self, expr: str) -> List[str]:
+        """将表达式转换为token列表"""
+        try:
+            tokens = []
+            i = 0
+            while i < len(expr):
+                char = expr[i]
+
+                # 处理空格
+                if char.isspace():
+                    i += 1
+                    continue
+
+                # 处理数字单词
+                if char.isalpha() and (i == 0 or not expr[i - 1].isdigit()):
+                    word = ''
+                    while i < len(expr) and expr[i].isalpha():
+                        word += expr[i]
+                        i += 1
+                    if word in self.number_words:
+                        tokens.append(str(self.number_words[word]))
+                    else:
+                        raise ValueError(f"Unknown word: {word}")
+                    continue
+
+                # 处理数字（包括科学记数法）
+                if char.isdigit() or (char == '-' and (not tokens or tokens[-1] in self.operators + ['('])):
+                    num = char
+                    i += 1
+                    while i < len(expr) and (expr[i].isdigit() or expr[i] == '.'):
+                        num += expr[i]
+                        i += 1
+
+                    if i < len(expr) and (expr[i] == 'e' or expr[i] == 'E'):
+                        num += expr[i]
+                        i += 1
+                        if i < len(expr) and (expr[i] == '+' or expr[i] == '-'):
+                            num += expr[i]
+                            i += 1
+                        while i < len(expr) and expr[i].isdigit():
+                            num += expr[i]
+                            i += 1
+
+                    try:
+                        float(num)
+                        tokens.append(num)
+                    except ValueError:
+                        raise ValueError(f"Invalid number format: {num}")
+                    continue
+
+                # 处理括号
+                if char in '()':
+                    tokens.append(char)
+                    i += 1
+                    continue
+
+                # 处理运算符
+                if i < len(expr):
+                    max_op_len = 3
+                    matched = False
+                    for length in range(max_op_len, 0, -1):
+                        if i + length <= len(expr):
+                            potential_op = expr[i:i + length]
+                            if potential_op in self.operators:
+                                tokens.append(potential_op)
+                                i += length
+                                matched = True
+                                break
+                    if not matched:
+                        raise ValueError(f"Invalid character: {char}")
+
+            if not self._validate_expression(tokens):
+                raise ValueError("Invalid expression: Mismatched parentheses")
+
+            return tokens
+        except Exception as e:
+            self.logger.error(f"Error in tokenization: {str(e)}")
+            raise
+
+    def _validate_expression(self, tokens: List[str]) -> bool:
+        """验证表达式的有效性"""
+        stack = []
+        for token in tokens:
+            if token == '(':
+                stack.append(token)
+            elif token == ')':
+                if not stack:
+                    return False
+                stack.pop()
+        return len(stack) == 0
+
+    def _validate_expression_structure(self, tokens: List[str]) -> bool:
+        """验证表达式的结构是否合法"""
+        stack = []
+        operand_count = 0
+        operator_count = 0
+
+        for token in tokens:
+            if token == '(':
+                stack.append(token)
+            elif token == ')':
+                if not stack:
+                    return False
+                stack.pop()
+            elif token in self.operators:
+                operator_count += 1
+            else:
+                operand_count += 1
+
+        if stack:
+            return False
+
+        return operand_count == operator_count + 1
+
+    def _safe_float_conversion(self, value: Union[str, int, float]) -> float:
+        """安全地将值转换为浮点数"""
+        try:
+            if isinstance(value, (int, float)):
+                if math.isinf(value) or math.isnan(value):
+                    return float('inf') if value > 0 else float('-inf')
+                return float(value)
+
+            if isinstance(value, str):
+                try:
+                    result = float(value)
+                    if math.isinf(result) or math.isnan(result):
+                        return float('inf') if result > 0 else float('-inf')
+                    if abs(result) > self.max_value:
+                        return float('inf') if result > 0 else float('-inf')
+                    if abs(result) != 0 and abs(result) < self.min_value:
+                        return 0.0
+                    return result
+                except ValueError:
+                    if value.lower() in self.number_words:
+                        return float(self.number_words[value.lower()])
+                    raise
+
+            raise ValueError(f"Cannot convert {type(value)} to float")
+        except (ValueError, TypeError) as e:
+            self.logger.error(f"Error in safe_float_conversion: {str(e)}")
+            return float('inf')
+
+    def _perform_operation(self, op: str, a: float, b: float) -> float:
+        """执行具体的运算"""
+        try:
+            if math.isinf(a) or math.isinf(b):
+                if op in ['+', '*', '@', '<>', '[]', '#', '&']:
+                    return float('inf') if (a > 0 or b > 0) else float('-inf')
+                elif op in ['-', '/', '><', ';', '!', '~', ':', '][']:
+                    return float('inf')
+
+            if op == '/' and abs(b) < self.epsilon:
+                return float('inf')
+
+            def safe_operation(func):
+                try:
+                    result = func()
+                    if math.isinf(result) or math.isnan(result):
+                        return float('inf') if result > 0 else float('-inf')
+                    if abs(result) > self.max_value:
+                        return float('inf') if result > 0 else float('-inf')
+                    if abs(result) != 0 and abs(result) < self.min_value:
+                        return 0.0
+                    return result
+                except (OverflowError, ValueError, ZeroDivisionError):
+                    return float('inf')
+
+            if op == '+':
+                result = safe_operation(lambda: a + b)
+            elif op == '-':
+                result = safe_operation(lambda: a - b)
+            elif op == '*':
+                result = safe_operation(lambda: a * b)
+            elif op == '/':
+                result = safe_operation(lambda: a / b if abs(b) > self.epsilon else float('inf'))
+            elif op == '><':
+                result = safe_operation(lambda: a - b if a * b > 0 else a + b)
+            elif op == ';':
+                result = safe_operation(lambda: (a - b) if a + b > 0 else a - b)
+            elif op == '@':
+                result = safe_operation(lambda: a * b if a > b else a + b)
+            elif op == '<>':
+                result = safe_operation(lambda: abs(a - b))
+            elif op == '[]':
+                result = safe_operation(lambda: max(a, b))
+            elif op == '#':
+                result = safe_operation(lambda: (a * b) if a > b else min(a, b))
+            elif op == '!':
+                result = safe_operation(lambda: (a - b) if a * b > 0 else (a + b))
+            elif op == '~':
+                result = safe_operation(lambda: (2 * b - a) if a > b else b)
+            elif op == '&':
+                result = safe_operation(lambda: (a + b) if a * b > 0 else (a - b))
+            elif op == ':':
+                result = safe_operation(lambda: (a * b) if a + b > 0 else (-a * -b))
+            elif op == '][':
+                try:
+                    if abs(a) < 1e10 and abs(b) < 1e10:
+                        gcd_value = self._gcd(abs(int(a)), abs(int(b)))
+                        return safe_operation(lambda: (a - b) if gcd_value == 1 else float(gcd_value))
+                    else:
+                        return float('inf')
+                except (OverflowError, ValueError):
+                    return float('inf')
+            else:
+                raise ValueError(f"Unknown operator: {op}")
+
+            return result
+        except Exception as e:
+            self.logger.error(f"Error in operation {op} with operands {a}, {b}: {str(e)}")
+            return float('inf')
+
+    def _gcd(self, a: int, b: int) -> int:
+        """计算最大公约数"""
+        while b:
+            a, b = b, a % b
+        return a
+
+    def _evaluate_expression(self, tokens: List[str]) -> float:
+        """计算表达式的值"""
+        try:
+            values = []
+            operators = []
+
+            for token in tokens:
+                try:
+                    if token == '(':
+                        operators.append(token)
+                    elif token == ')':
+                        while operators and operators[-1] != '(':
+                            if len(values) < 2:
+                                raise ValueError(f"Not enough operands for operator {operators[-1]}")
+                            op = operators.pop()
+                            b = self._safe_float_conversion(values.pop())
+                            a = self._safe_float_conversion(values.pop())
+                            result = self._perform_operation(op, a, b)
+                            values.append(result)
+                        if operators:
+                            operators.pop()
+                    elif token in self.operators:
+                        while (operators and operators[-1] != '(' and
+                               self.precedence.get(operators[-1], 0) >= self.precedence.get(token, 0)):
+                            if len(values) < 2:
+                                raise ValueError(f"Not enough operands for operator {operators[-1]}")
+                            op = operators.pop()
+                            b = self._safe_float_conversion(values.pop())
+                            a = self._safe_float_conversion(values.pop())
+                            result = self._perform_operation(op, a, b)
+                            values.append(result)
+                        operators.append(token)
+                    else:
+                        try:
+                            if isinstance(token, str):
+                                token_lower = token.lower()
+                                if token_lower in self.number_words:
+                                    value = float(self.number_words[token_lower])
+                                else:
+                                    value = self._safe_float_conversion(token)
+                                values.append(value)
+                            else:
+                                raise ValueError(f"Invalid token type: {type(token)}")
+                        except ValueError as ve:
+                            self.logger.error(f"Error converting number: {token}")
+                            raise ValueError(f"Invalid number format: {token}")
+
+                except Exception as e:
+                    self.logger.error(f"Error processing token {token}: {str(e)}")
+                    return float('inf')
+
+            while operators:
+                if len(values) < 2:
+                    raise ValueError("Not enough operands for remaining operators")
+                op = operators.pop()
+                if op == '(':
+                    raise ValueError("Mismatched parentheses")
+                b = self._safe_float_conversion(values.pop())
+                a = self._safe_float_conversion(values.pop())
+                result = self._perform_operation(op, a, b)
+                values.append(result)
+
+            if len(values) != 1:
+                raise ValueError("Invalid expression: too many values")
+
+            return values[0]
+
+        except Exception as e:
+            self.logger.error(f"Error evaluating expression: {str(e)}")
+            return float('inf')
--- a/internbootcamp/libs/bbeh_multistep_arithmetic/bbeh_arithmetic_validor.py
+++ b/internbootcamp/libs/bbeh_multistep_arithmetic/bbeh_arithmetic_validor.py
@ -0,0 +1,220 @@
+import math
+import logging
+from typing import Dict, Union
+
+
+class BBEHArithmeticVerifier:
+    def __init__(self):
+        self.epsilon = 1e-10
+        self.logger = logging.getLogger(__name__)
+        self.stats = {
+            "total": 0,
+            "correct": 0,
+            "by_difficulty": {
+                "easy": {"total": 0, "correct": 0},
+                "medium": {"total": 0, "correct": 0},
+                "hard": {"total": 0, "correct": 0}
+            },
+            "by_operator": {},
+            "by_expression_length": {
+                "short": {"total": 0, "correct": 0},
+                "medium": {"total": 0, "correct": 0},
+                "long": {"total": 0, "correct": 0}
+            }
+        }
+
+    def verify_answer(self, case: Dict, answer: float) -> bool:
+        """验证答案是否正确"""
+        try:
+            expected = case["answer"]
+            difficulty = case.get("difficulty", "medium")
+            expression = case.get("expression", "")
+
+            # 验证答案
+            is_correct = self._validate_solution(expected, answer)
+
+            # 更新统计信息
+            self._update_statistics(is_correct, difficulty, expression)
+
+            return is_correct
+
+        except Exception as e:
+            self.logger.error(f"Error in verification: {str(e)}")
+            return False
+
+    def _validate_solution(self, expected: float, calculated: float) -> bool:
+        """验证解决方案"""
+        try:
+            # 处理无穷大的情况
+            if math.isinf(expected) and math.isinf(calculated):
+                return 1 if expected * calculated > 0 else 0 # 确保符号相同
+
+            # 处理NaN的情况
+            if math.isnan(expected) or math.isnan(calculated):
+                return 0
+
+            # 处理零附近的值
+            if abs(expected) < self.epsilon and abs(calculated) < self.epsilon:
+                return 1
+
+            # 处理普通情况
+            if abs(expected) > self.epsilon:
+                error = abs(expected - calculated)
+                relative_error = 1 - min(abs((expected - calculated) / abs(expected)), 1.0)
+                return relative_error
+
+            return abs(expected - calculated) < self.epsilon
+
+        except Exception as e:
+            self.logger.error(f"Error in solution validation: {str(e)}")
+            return 0
+
+    def _update_statistics(self, is_correct: bool, difficulty: str, expression: str) -> None:
+        """更新统计信息"""
+        try:
+            # 更新总计数
+            self.stats["total"] += 1
+            if is_correct:
+                self.stats["correct"] += 1
+
+            # 更新难度统计
+            if difficulty in self.stats["by_difficulty"]:
+                self.stats["by_difficulty"][difficulty]["total"] += 1
+                if is_correct:
+                    self.stats["by_difficulty"][difficulty]["correct"] += 1
+
+            # 更新表达式长度统计
+            length_category = self._categorize_expression_length(expression)
+            self.stats["by_expression_length"][length_category]["total"] += 1
+            if is_correct:
+                self.stats["by_expression_length"][length_category]["correct"] += 1
+
+            # 更新运算符统计
+            operators = self._extract_operators(expression)
+            for op in operators:
+                if op not in self.stats["by_operator"]:
+                    self.stats["by_operator"][op] = {"total": 0, "correct": 0}
+                self.stats["by_operator"][op]["total"] += 1
+                if is_correct:
+                    self.stats["by_operator"][op]["correct"] += 1
+
+        except Exception as e:
+            self.logger.error(f"Error updating statistics: {str(e)}")
+
+    def _categorize_expression_length(self, expression: str) -> str:
+        """根据表达式长度进行分类"""
+        length = len(expression)
+        if length < 30:
+            return "short"
+        elif length < 60:
+            return "medium"
+        else:
+            return "long"
+
+    def _extract_operators(self, expression: str) -> set:
+        """提取表达式中的运算符"""
+        operators = set()
+        operator_chars = {'+', '-', '*', '/', '><', ';', '@', '<>', '[]', '#', '!', '~', '&', ':', ']['}
+
+        i = 0
+        while i < len(expression):
+            # 检查两字符运算符
+            if i + 1 < len(expression):
+                two_char = expression[i:i + 2]
+                if two_char in operator_chars:
+                    operators.add(two_char)
+                    i += 2
+                    continue
+
+            # 检查单字符运算符
+            if expression[i] in operator_chars:
+                operators.add(expression[i])
+
+            i += 1
+
+        return operators
+
+    def get_statistics(self) -> Dict:
+        """获取验证统计信息"""
+        stats = {
+            "total_cases": self.stats["total"],
+            "correct_answers": self.stats["correct"],
+            "success_rate": 0 if self.stats["total"] == 0 else
+            (self.stats["correct"] / self.stats["total"]) * 100,
+            "by_difficulty": {},
+            "by_expression_length": {},
+            "by_operator": {}
+        }
+
+        # 处理难度统计
+        for diff, counts in self.stats["by_difficulty"].items():
+            total = counts["total"]
+            correct = counts["correct"]
+            success_rate = 0 if total == 0 else (correct / total) * 100
+            stats["by_difficulty"][diff] = {
+                "total": total,
+                "correct": correct,
+                "success_rate": f"{success_rate:.2f}%"
+            }
+
+        # 处理表达式长度统计
+        for length, counts in self.stats["by_expression_length"].items():
+            total = counts["total"]
+            correct = counts["correct"]
+            success_rate = 0 if total == 0 else (correct / total) * 100
+            stats["by_expression_length"][length] = {
+                "total": total,
+                "correct": correct,
+                "success_rate": f"{success_rate:.2f}%"
+            }
+
+        # 处理运算符统计
+        for op, counts in self.stats["by_operator"].items():
+            total = counts["total"]
+            correct = counts["correct"]
+            success_rate = 0 if total == 0 else (correct / total) * 100
+            stats["by_operator"][op] = {
+                "total": total,
+                "correct": correct,
+                "success_rate": f"{success_rate:.2f}%"
+            }
+
+        return stats
+
+    def reset_statistics(self) -> None:
+        """重置统计信息"""
+        self.stats = {
+            "total": 0,
+            "correct": 0,
+            "by_difficulty": {
+                "easy": {"total": 0, "correct": 0},
+                "medium": {"total": 0, "correct": 0},
+                "hard": {"total": 0, "correct": 0}
+            },
+            "by_operator": {},
+            "by_expression_length": {
+                "short": {"total": 0, "correct": 0},
+                "medium": {"total": 0, "correct": 0},
+                "long": {"total": 0, "correct": 0}
+            }
+        }
+
+    def format_case(self, case: Dict, language: str = "en") -> str:
+        """格式化案例为可读文本"""
+        expression = case["expression"]
+        if language == "en":
+            return (
+                f"Please evaluate the following arithmetic expression:\n\n"
+                f"{expression}\n\n"
+                f"The expression uses standard arithmetic operators (+, -, *, /) "
+                f"and custom operators (><, ;, @, <>, [], #, !, ~, &, :, ][).\n"
+                f"Please provide your answer as a decimal number."
+            )
+        else:  # Chinese
+            return (
+                f"请计算下面的算术表达式：\n\n"
+                f"{expression}\n\n"
+                f"表达式使用标准算术运算符 (+, -, *, /) "
+                f"和自定义运算符 (><, ;, @, <>, [], #, !, ~, &, :, ][)。\n"
+                f"请以小数形式提供你的答案。"
+            )
--- a/internbootcamp/libs/bbeh_multistep_arithmetic/multistep_arithmetic.py
+++ b/internbootcamp/libs/bbeh_multistep_arithmetic/multistep_arithmetic.py
@ -0,0 +1,510 @@
+import random
+import json
+import math
+import re
+import logging
+import time
+
+# 定义极限值 - 使用Python支持的最大范围
+MAX_VALUE = 1.7976931348623157e+308  # 最大浮点数
+MIN_VALUE = -1.7976931348623157e+308  # 最小浮点数
+EPSILON = 1e-10  # 用于浮点数比较
+
+# 设置日志
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+# 定义更多操作符
+OPERATORS = ['+', '-', '*', '/', '><', ';', '@', '<>', '[]', '#', '!', '~', '&', ':', '][']
+
+# 定义优先级
+PRECEDENCE = {
+    '+': 1, '-': 1,
+    '*': 2, '/': 2,
+    '><': 3, ';': 3,
+    '@': 4, '<>': 4, '[]': 4,
+    '#': 5, '!': 5,
+    '~': 6, '&': 6,
+    ':': 7, '][': 7
+}
+
+# 添加数字单词映射
+NUMBER_WORDS = {
+    'one': 1, 'two': 2, 'three': 3, 'four': 4, 'five': 5,
+    'six': 6, 'seven': 7, 'eight': 8, 'nine': 9, 'ten': 10
+}
+
+# 定义极限值
+MAX_VALUE = 1e100
+MIN_VALUE = -1e100
+
+def safe_pop(stack, error_msg="Empty stack"):
+    """安全的pop操作，包含错误处理"""
+    if not stack:
+        raise ValueError(error_msg)
+    return stack.pop()
+
+
+def perform_operation(op, a, b):
+    """执行操作，改进的错误处理和大数处理"""
+    try:
+        # 处理无穷大和非数值的情况
+        if math.isinf(a) or math.isinf(b):
+            if op in ['+', '*', '@', '<>', '[]', '#', '&']:
+                return float('inf') if (a > 0 or b > 0) else float('-inf')
+            elif op in ['-', '/', '><', ';', '!', '~', ':', '][']:
+                return float('inf')
+
+        # 处理零和接近零的情况
+        if op == '/' and abs(b) < EPSILON:
+            return float('inf')
+
+        # 防止溢出的预检查
+        def safe_operation(func):
+            try:
+                result = func()
+                # 检查结果是否在有效范围内
+                if math.isinf(result) or math.isnan(result):
+                    return float('inf') if result > 0 else float('-inf')
+                if abs(result) > MAX_VALUE:
+                    return float('inf') if result > 0 else float('-inf')
+                if abs(result) != 0 and abs(result) < MIN_VALUE:
+                    return 0.0
+                return result
+            except (OverflowError, ValueError, ZeroDivisionError):
+                return float('inf')
+
+        # 使用安全操作来执行各种运算
+        if op == '+':
+            result = safe_operation(lambda: a + b)
+        elif op == '-':
+            result = safe_operation(lambda: a - b)
+        elif op == '*':
+            result = safe_operation(lambda: a * b)
+        elif op == '/':
+            result = safe_operation(lambda: a / b if abs(b) > EPSILON else float('inf'))
+        elif op == '><':
+            result = safe_operation(lambda: a - b if a * b > 0 else a + b)
+        elif op == ';':
+            result = safe_operation(lambda: (a - b) if a + b > 0 else a - b)
+        elif op == '@':
+            result = safe_operation(lambda: a * b if a > b else a + b)
+        elif op == '<>':
+            result = safe_operation(lambda: abs(a - b))
+        elif op == '[]':
+            result = safe_operation(lambda: max(a, b))
+        elif op == '#':
+            result = safe_operation(lambda: (a * b) if a > b else min(a, b))
+        elif op == '!':
+            result = safe_operation(lambda: (a - b) if a * b > 0 else (a + b))
+        elif op == '~':
+            result = safe_operation(lambda: (2 * b - a) if a > b else b)
+        elif op == '&':
+            result = safe_operation(lambda: (a + b) if a * b > 0 else (a - b))
+        elif op == ':':
+            result = safe_operation(lambda: (a * b) if a + b > 0 else (-a * -b))
+        elif op == '][':
+            try:
+                if abs(a) < 1e10 and abs(b) < 1e10:
+                    gcd_value = gcd(abs(int(a)), abs(int(b)))
+                    return safe_operation(lambda: (a - b) if gcd_value == 1 else float(gcd_value))
+                else:
+                    return float('inf')
+            except (OverflowError, ValueError):
+                return float('inf')
+        else:
+            raise ValueError(f"Unknown operator: {op}")
+
+        return result
+    except Exception as e:
+        logger.error(f"Error in operation {op} with operands {a}, {b}: {str(e)}")
+        return float('inf')
+
+def validate_expression(tokens):
+    """验证表达式的有效性"""
+    stack = []
+    for token in tokens:
+        if token == '(':
+            stack.append(token)
+        elif token == ')':
+            if not stack:
+                return False
+            stack.pop()
+    return len(stack) == 0
+
+
+def tokenize(expr):
+    """改进的tokenize函数，支持科学记数法"""
+    try:
+        tokens = []
+        i = 0
+        while i < len(expr):
+            char = expr[i]
+
+            # 处理空格
+            if char.isspace():
+                i += 1
+                continue
+
+            # 处理数字单词
+            if char.isalpha() and (i == 0 or not expr[i - 1].isdigit()):  # 确保不是科学记数法中的'e'
+                word = ''
+                while i < len(expr) and expr[i].isalpha():
+                    word += expr[i]
+                    i += 1
+                if word in NUMBER_WORDS:
+                    tokens.append(str(NUMBER_WORDS[word]))
+                else:
+                    logger.warning(f"Unknown word encountered: {word}")
+                    raise ValueError(f"Unknown word: {word}")
+                continue
+
+            # 处理数字（包括科学记数法）
+            if char.isdigit() or (char == '-' and (not tokens or tokens[-1] in OPERATORS + ['('])):
+                num = char
+                i += 1
+                # 处理整数和小数部分
+                while i < len(expr) and (expr[i].isdigit() or expr[i] == '.'):
+                    num += expr[i]
+                    i += 1
+
+                # 处理科学记数法
+                if i < len(expr) and (expr[i] == 'e' or expr[i] == 'E'):
+                    num += expr[i]
+                    i += 1
+                    if i < len(expr) and (expr[i] == '+' or expr[i] == '-'):
+                        num += expr[i]
+                        i += 1
+                    while i < len(expr) and expr[i].isdigit():
+                        num += expr[i]
+                        i += 1
+
+                try:
+                    float(num)  # 验证数字格式是否正确
+                    tokens.append(num)
+                except ValueError:
+                    raise ValueError(f"Invalid number format: {num}")
+                continue
+
+            # 处理括号
+            if char in '()':
+                tokens.append(char)
+                i += 1
+                continue
+
+            # 处理运算符
+            if i < len(expr):
+                # 尝试匹配最长的运算符
+                max_op_len = 3
+                matched = False
+                for length in range(max_op_len, 0, -1):
+                    if i + length <= len(expr):
+                        potential_op = expr[i:i + length]
+                        if potential_op in OPERATORS:
+                            tokens.append(potential_op)
+                            i += length
+                            matched = True
+                            break
+                if not matched:
+                    raise ValueError(f"Invalid character: {char}")
+
+        if not validate_expression(tokens):
+            raise ValueError("Invalid expression: Mismatched parentheses")
+
+        return tokens
+    except Exception as e:
+        logger.error(f"Error in tokenization: {str(e)}")
+        raise
+
+def validate_expression_structure(tokens):
+    """验证表达式的结构是否合法"""
+    stack = []
+    operand_count = 0
+    operator_count = 0
+
+    for token in tokens:
+        if token == '(':
+            stack.append(token)
+        elif token == ')':
+            if not stack:
+                return False
+            stack.pop()
+        elif token in OPERATORS:
+            operator_count += 1
+        else:
+            operand_count += 1
+
+    # 检查括号是否匹配
+    if stack:
+        return False
+
+    # 检查操作数和操作符的数量关系
+    # 对于二元运算符，操作数应该比操作符多1
+    return operand_count == operator_count + 1
+
+
+def evaluate_expression(expression):
+    """改进的表达式求值函数"""
+    try:
+        tokens = tokenize(expression)
+        if not tokens:
+            logger.error(f"Empty expression: {expression}")
+            return float('inf')
+
+        # 验证表达式结构
+        if not validate_expression_structure(tokens):
+            logger.error(f"Invalid expression structure: {expression}")
+            return float('inf')
+
+        values = []
+        operators = []
+
+        for token in tokens:
+            try:
+                if token == '(':
+                    operators.append(token)
+                elif token == ')':
+                    while operators and operators[-1] != '(':
+                        if len(values) < 2:
+                            raise ValueError(f"Not enough operands for operator {operators[-1]}")
+                        op = operators.pop()
+                        b = safe_float_conversion(values.pop())
+                        a = safe_float_conversion(values.pop())
+                        result = perform_operation(op, a, b)
+                        values.append(result)
+                    if operators:
+                        operators.pop()  # 移除 '('
+                elif token in OPERATORS:
+                    while (operators and operators[-1] != '(' and
+                           PRECEDENCE.get(operators[-1], 0) >= PRECEDENCE.get(token, 0)):
+                        if len(values) < 2:
+                            raise ValueError(f"Not enough operands for operator {operators[-1]}")
+                        op = operators.pop()
+                        b = safe_float_conversion(values.pop())
+                        a = safe_float_conversion(values.pop())
+                        result = perform_operation(op, a, b)
+                        values.append(result)
+                    operators.append(token)
+                else:
+                    # 改进的数值转换部分
+                    try:
+                        if isinstance(token, str):
+                            token_lower = token.lower()
+                            if token_lower in NUMBER_WORDS:
+                                # 处理数字单词
+                                value = float(NUMBER_WORDS[token_lower])
+                            else:
+                                # 处理数字字符串（包括科学记数法）
+                                value = safe_float_conversion(token)
+
+                            values.append(value)
+                        else:
+                            raise ValueError(f"Invalid token type: {type(token)}")
+
+                    except ValueError as ve:
+                        logger.error(f"Error converting number: {token}")
+                        raise ValueError(f"Invalid number format: {token}")
+
+            except Exception as e:
+                logger.error(f"Error processing token {token}: {str(e)}")
+                return float('inf')
+
+        # 处理剩余的操作符
+        while operators:
+            if len(values) < 2:
+                raise ValueError("Not enough operands for remaining operators")
+            op = operators.pop()
+            if op == '(':
+                raise ValueError("Mismatched parentheses")
+            b = safe_float_conversion(values.pop())
+            a = safe_float_conversion(values.pop())
+            result = perform_operation(op, a, b)
+            values.append(result)
+
+        if len(values) != 1:
+            raise ValueError("Invalid expression: too many values")
+
+        return values[0]
+
+    except Exception as e:
+        logger.error(f"Error evaluating expression '{expression}': {str(e)}")
+        return float('inf')
+
+
+def safe_float_conversion(value):
+    """安全地将值转换为浮点数，改进的版本"""
+    try:
+        if isinstance(value, (int, float)):
+            if math.isinf(value) or math.isnan(value):
+                return float('inf') if value > 0 else float('-inf')
+            return float(value)
+
+        if isinstance(value, str):
+            try:
+                result = float(value)
+                # 处理特殊情况
+                if math.isinf(result) or math.isnan(result):
+                    return float('inf') if result > 0 else float('-inf')
+                # 处理超大数和超小数
+                if abs(result) > MAX_VALUE:
+                    return float('inf') if result > 0 else float('-inf')
+                if abs(result) != 0 and abs(result) < MIN_VALUE:
+                    return 0.0
+                return result
+            except ValueError:
+                # 处理数字单词
+                if value.lower() in NUMBER_WORDS:
+                    return float(NUMBER_WORDS[value.lower()])
+                raise
+
+        raise ValueError(f"Cannot convert {type(value)} to float")
+    except (ValueError, TypeError) as e:
+        logger.error(f"Error in safe_float_conversion: {str(e)}")
+        return float('inf')
+
+def gcd(a, b):
+    while b:
+        a, b = b, a % b
+    return a
+
+def generate_expression(min_depth=3, max_depth=6, max_length=50):
+    """改进的表达式生成函数"""
+    def generate_subexpression(depth, current_length):
+        if depth == 0 or current_length >= max_length:
+            if random.random() < 0.3:
+                return random.choice(list(NUMBER_WORDS.keys())), current_length + 1
+            return str(random.randint(-1000000, 1000000)), current_length + 1
+
+        choice = random.random()
+
+        if choice < 0.4:
+            # 生成简单的二元运算表达式
+            op = random.choice(OPERATORS)
+            left, left_length = generate_subexpression(depth - 1, current_length)
+            right, right_length = generate_subexpression(depth - 1, left_length + 1)
+            return f"({left} {op} {right})", right_length + 3
+
+        else:
+            # 生成带括号的表达式
+            op = random.choice(OPERATORS)
+            left, left_length = generate_subexpression(depth - 1, current_length)
+            right, right_length = generate_subexpression(depth - 1, left_length + 1)
+            return f"({left} {op} {right})", right_length + 3
+
+    while True:
+        try:
+            expression, _ = generate_subexpression(random.randint(min_depth, max_depth), 0)
+            # 验证生成的表达式
+            tokens = tokenize(expression)
+            if validate_expression_structure(tokens):
+                answer = evaluate_expression(expression)
+                if not math.isinf(answer):
+                    return expression, answer
+        except:
+            continue
+
+def generate_dataset(num_samples=100):
+    dataset = []
+    for _ in range(num_samples):
+        expression, answer = generate_expression()
+        dataset.append({"expression": expression, "answer": answer})
+    return dataset
+
+def solve_expression(expression):
+    return evaluate_expression(expression)
+
+def validate_solution(expression, expected_answer, calculated_answer):
+    """改进的解决方案验证函数"""
+    # 处理无穷大的情况
+    if math.isinf(expected_answer) and math.isinf(calculated_answer):
+        return expected_answer * calculated_answer > 0  # 确保符号相同
+
+    # 处理零附近的值
+    if abs(expected_answer) < EPSILON and abs(calculated_answer) < EPSILON:
+        return True
+
+    # 处理普通情况
+    if abs(expected_answer) > EPSILON:
+        relative_error = abs((expected_answer - calculated_answer) / expected_answer)
+        return relative_error < EPSILON
+
+    return abs(expected_answer - calculated_answer) < EPSILON
+
+def performance_test(num_expressions=1000):
+    start_time = time.time()
+    for _ in range(num_expressions):
+        expression, _ = generate_expression()
+        evaluate_expression(expression)
+    end_time = time.time()
+    avg_time = (end_time - start_time) / num_expressions
+    print(f"Average time per expression: {avg_time:.6f} seconds")
+
+def test_system():
+    # 从文件加载数据集
+    with open("bbeh_arithmetic_dataset.json", "r") as f:
+        dataset = json.load(f)
+
+    total_tests = len(dataset)
+    passed_tests = 0
+
+    for item in dataset:
+        expression = item["expression"]
+        expected_answer = item["answer"]
+
+        calculated_answer = solve_expression(expression)
+        is_correct = validate_solution(expression, expected_answer, calculated_answer)
+
+        if is_correct:
+            passed_tests += 1
+        else:
+            print(f"Failed test: {expression}")
+            print(f"Expected: {expected_answer}, Calculated: {calculated_answer}")
+
+    success_rate = passed_tests / total_tests * 100
+    print(f"Passed {passed_tests} out of {total_tests} tests.")
+    print(f"Success rate: {success_rate:.2f}%")
+
+    # 添加边界测试
+    edge_cases = [
+        # 基本运算的边界情况
+        "1e308 + 1",  # 接近最大值
+        "-1e308 - 1",  # 接近最小值
+        "1e-307 * 1e-307",  # 接近最小正数
+        "1e307 * 2",  # 溢出测试
+        "1 / 1e-307",  # 除法边界
+        # 自定义运算符的边界情况
+        "1e100 >< -1e100",
+        "1e100 ; 1e100",
+        "1e100 + 1e100",
+        "-1e100 - 1e100",
+        "1e-100 * 1e-100",
+        "1 / 1e-100",
+        "1e100 >< -1e100",
+        "1e100 ; 1e100",
+        "1e100 @ 1e100",
+        "1e100 <> -1e100",
+        "1e100 [] -1e100",
+        "1e100 # -1e100",
+        "1e100 ! -1e100",
+        "1e100 ~ -1e100",
+        "1e100 & -1e100",
+        "1e100 : -1e100",
+        "1e100 ][ -1e100"
+    ]
+
+    print("\nTesting edge cases:")
+    for case in edge_cases:
+        result = evaluate_expression(case)
+        print(f"{case} = {result}")
+
+    # 运行性能测试
+    print("\nRunning performance test:")
+    performance_test()
+
+# 生成数据集并保存到文件
+dataset = generate_dataset()
+with open("bbeh_arithmetic_dataset.json", "w") as f:
+    json.dump(dataset, f, indent=2)
+
+# 运行测试
+test_system()