mirror of
https://github.com/InternLM/InternBootcamp.git
synced 2026-04-26 17:13:14 +00:00
init-commit
This commit is contained in:
commit
18a552597a
3461 changed files with 1150579 additions and 0 deletions
611
internbootcamp/bootcamp/word_sorting/word_sorting.py
Executable file
611
internbootcamp/bootcamp/word_sorting/word_sorting.py
Executable file
|
|
@ -0,0 +1,611 @@
|
|||
"""# 谜题训练场开发任务
|
||||
|
||||
## 任务概述
|
||||
你是一位资深程序员,我需要你帮我实现一个特定谜题的训练场环境类。这个类继承自`Basebootcamp`,用于生成谜题实例并验证解答。
|
||||
|
||||
## 背景说明
|
||||
我正在开发一系列谜题训练场,每个训练场对应一个特定类型的谜题。训练场类命名为`{PuzzleName}bootcamp`,其中`PuzzleName`是谜题的名称。
|
||||
|
||||
每个训练场类主要提供两个核心功能:
|
||||
1. 生成该谜题类型的问题实例
|
||||
2. 验证用户对问题的回答是否正确
|
||||
|
||||
## 技术接口规范
|
||||
|
||||
### 类方法实现要求
|
||||
|
||||
```python
|
||||
class {PuzzleName}bootcamp(Basebootcamp):
|
||||
def __init__(self, **params):
|
||||
\"\"\"
|
||||
请你自定义params,以保存该puzzle相关的参数,例如网格大小等,参数配有默认值
|
||||
\"\"\"
|
||||
pass
|
||||
|
||||
def case_generator(self):
|
||||
\"\"\"
|
||||
生成谜题实例,提示:为保证谜题有解,可以先生成结果再对结果处理得到谜题
|
||||
返回:一个可JSON序列化的字典(避免包含set等无法通过json.dumps处理的数据结构)
|
||||
\"\"\"
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
def prompt_func(question_case) -> str:
|
||||
\"\"\"
|
||||
将case_generator生成的谜题实例转换为文本形式的问题,问题中包含问题背景、对谜题规则的介绍、具体要解决的谜题实例、期望最终答案的格式,
|
||||
例如:你是xxxx,请你解答yyyy,规则如下:yyyy,最终答案放置在:zzzzz
|
||||
注意:规则应当描述详细,包括任务背景、具体任务操作规则、对题目格式和答案格式的含义介绍等
|
||||
|
||||
参数:
|
||||
question_case: 由case_generator生成的谜题实例
|
||||
|
||||
返回:
|
||||
str: 格式化的问题字符串
|
||||
|
||||
注意:
|
||||
1. 需考虑问题的格式,以便后续能正确提取
|
||||
2. 问题描述中应包含期望的答案格式说明,以便后续能正确提取,为了避免抽取时匹配出干扰项,请要求模型将答案放在特定标签,如[answer] [/answer]内
|
||||
\"\"\"
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
def extract_output(output):
|
||||
\"\"\"
|
||||
从LLM的回复中提取符合格式要求的答案,如有多个,请抽取最后一个,避免使用re.search等只抽取第一个结果的方式。
|
||||
|
||||
参数:
|
||||
output: LLM的完整输出(包含原始问题和回答)
|
||||
|
||||
返回:
|
||||
提取的答案,若未找到符合格式的答案则返回None
|
||||
\"\"\"
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
def _verify_correction(cls, solution, identity):
|
||||
\"\"\"
|
||||
验证提取的答案是否正确,注意一个问题可以能有多个解,按照谜题规则进行检验,不要直接匹配可能的答案。
|
||||
|
||||
参数:
|
||||
solution: extract_output提取的答案
|
||||
identity: case_generator生成的谜题实例
|
||||
|
||||
返回:
|
||||
bool: 答案是否正确
|
||||
\"\"\"
|
||||
pass
|
||||
```
|
||||
|
||||
### 验证评分方法(基类已实现)
|
||||
|
||||
```python
|
||||
@classmethod
|
||||
def verify_score(cls, model_output, identity:dict, format_score=0.1) -> float:
|
||||
\"\"\"
|
||||
验证输出结果并评分。
|
||||
|
||||
参数:
|
||||
model_output: 模型的完整输出
|
||||
identity: 谜题实例(由case_generator生成)
|
||||
format_score: 答案格式正确时的基础分数
|
||||
|
||||
返回:
|
||||
float: 评分结果(0-1之间)
|
||||
\"\"\"
|
||||
score = 0.
|
||||
try:
|
||||
extract_solution = cls.extract_output(model_output)
|
||||
if extract_solution is None:
|
||||
return score
|
||||
else:
|
||||
score = format_score # 格式正确时的基础分数
|
||||
if cls._verify_correction(extract_solution, identity):
|
||||
score = 1. # 答案完全正确时的满分
|
||||
except Exception as e:
|
||||
# 处理异常情况
|
||||
pass
|
||||
return score
|
||||
```
|
||||
|
||||
### 使用示例
|
||||
|
||||
```python
|
||||
# 初始化谜题训练场
|
||||
bootcamp = Puzzlebootcamp()
|
||||
|
||||
# 生成谜题实例
|
||||
case = bootcamp.case_generator()
|
||||
|
||||
# 将谜题转换为文本问题
|
||||
prompt = Puzzlebootcamp.prompt_func(case)
|
||||
|
||||
# 获取LLM对问题的解答
|
||||
response = get_response(prompt, \"LLM\")
|
||||
|
||||
# 从完整对话中提取答案
|
||||
extracted_output = Puzzlebootcamp.extract_output(prompt + response)
|
||||
|
||||
# 验证答案并评分
|
||||
score = Puzzlebootcamp.verify_score(extracted_output, case)
|
||||
```
|
||||
|
||||
## 你的任务
|
||||
请根据以下谜题描述(谜题描述可能不完整,请先结合你的知识澄清规则),实现一个完整的谜题训练场类:
|
||||
|
||||
### 谜题描述
|
||||
[Description]
|
||||
# BBEH Word Sorting
|
||||
|
||||
This task is composed of two subtasks.
|
||||
|
||||
Subtask 1: This subtask is from the
|
||||
[BIG-Bench Mistake](https://arxiv.org/pdf/2311.08516). This task involves
|
||||
finding the first mistake in an existing chain-of-thought sequence, used to
|
||||
answer a Word Sorting question in the original BBH dataset. In each example,
|
||||
the target answer is either the number where the first mistake occurred, or
|
||||
that there are no mistakes in the CoT sequence. These CoT sequences are
|
||||
generated by prompting PaLM 2 Unicorn on the original BBH dataset at
|
||||
temperature = 0. The newline is used as a stop token so that each
|
||||
intermediate step can be prepended with *Thought 1:* , *Thought 2: *, etc.
|
||||
Further information on the prompting and generation process can be found in
|
||||
the original work.
|
||||
|
||||
Subtask 2: The second sub-task is sorting a list of words given a new
|
||||
alphabet order (examples include: an alphabet order that is the same as
|
||||
English but two letters are swapped in the order, an alphabet order that is
|
||||
the same as English but one/two letters are moved to the beginning/end of the
|
||||
order, or a completely new order). This task requires going against a strong
|
||||
prior and sorting words in a non-typical way.
|
||||
|
||||
[/Description]
|
||||
<example_1>
|
||||
|
||||
You are an expert in word sorting. You will be provided with a list of words and the steps taken, in the form of thoughts, to arrange them in alphabetical order.
|
||||
Your job is to identify the first step that was a mistake in reasoning about the order of the words. This can be misunderstanding the order of
|
||||
the alphabet or getting the word or letter wrong or missing a word entirely.
|
||||
Q: Sort the following words alphabetically: List: apocalypse tote apostle gay kauffman dicta hog prowl aunt calm carboxy dishevel orifice tearful karate ellen
|
||||
Thought 1: I should start by looking at the first letter of the words in the list. The first letter: \"apocalypse\": \"a\" (1). \"tote\": \"t\" (20). \"apostle\": \"a\" (1). \"gay\": \"g\" (7). \"kauffman\": \"k\" (11). \"dicta\": \"d\" (4). \"hog\": \"h\" (8). \"prowl\": \"p\" (16). \"aunt\": \"a\" (1). \"calm\": \"c\" (3). \"carboxy\": \"c\" (3). \"dishevel\": \"d\" (4). \"orifice\": \"o\" (15). \"tearful\": \"t\" (20). \"karate\": \"k\" (11). \"ellen\": \"e\" (5).
|
||||
Thought 2: We now have: (1) [\"apocalypse\" ? \"apostle\" ? \"aunt\" ? \"calm\" ? \"carboxy\"] < (3) \"dicta\" < (4) [\"dishevel\" ? \"dicta\"] < (5) \"ellen\" < (7) \"gay\" < (8) \"hog\" < (11) [\"kauffman\" ? \"karate\"] < (15) \"orifice\" < (16) \"prowl\" < (20) [\"tearful\" ? \"tote\"].
|
||||
Thought 3: Now let's sort this subpart [\"apocalypse\" ? \"apostle\" ? \"aunt\" ? \"calm\" ? \"carboxy\"] by looking at their second letters. The second letter: \"apocalypse\": \"p\" (16). \"apostle\": \"p\" (16). \"aunt\": \"u\" (21). \"calm\": \"a\" (1). \"carboxy\": \"a\" (1).
|
||||
Thought 4: We now have: (1) \"calm\" < (16) [\"apocalypse\" ? \"apostle\"] < (21) \"aunt\" < (26) \"carboxy\" for the subpart. Hence, we have \"calm\" < [\"apocalypse\" ? \"apostle\"] < \"aunt\" < \"carboxy\" < \"dicta\" < [\"dishevel\" ? \"dicta\"] < \"ellen\" < \"gay\" < \"hog\" < [\"kauffman\" ? \"karate\"] < \"orifice\" < \"prowl\" < [\"tearful\" ? \"tote\"].
|
||||
Thought 5: Now let's sort this subpart [\"apocalypse\" ? \"apostle\"] by looking at their third letters. The third letter: \"apocalypse\": \"o\" (15). \"apostle\": \"s\" (19).
|
||||
Thought 6: We now have: (15) \"apocalypse\" < (19) \"apostle\" for the subpart. Hence, we have \"calm\" < \"apocalypse\" < \"apostle\" < \"aunt\" < \"carboxy\" < \"dicta\" < [\"dishevel\" ? \"dicta\"] < \"ellen\" < \"gay\" < \"hog\" < [\"kauffman\" ? \"karate\"] < \"orifice\" < \"prowl\" < [\"tearful\" ? \"tote\"].
|
||||
Thought 7: Now let's sort this subpart [\"dishevel\" ? \"dicta\"] by looking at their second letters. The second letter: \"dishevel\": \"i\" (9). \"dicta\": \"i\" (9).
|
||||
Thought 8: We now have: (9) [\"dishevel\" ? \"dicta\"] for the subpart. Hence, we have \"calm\" < \"apocalypse\" < \"apostle\" < \"aunt\" < \"carboxy\" < \"dicta\" < [\"dishevel\" ? \"dicta\"] < \"ellen\" < \"gay\" < \"hog\" < [\"kauffman\" ? \"karate\"] < \"orifice\" < \"prowl\" < [\"tearful\" ? \"tote\"].
|
||||
Thought 9: Now let's sort this subpart [\"kauffman\" ? \"karate\"] by looking at their second letters. The second letter: \"kauffman\": \"a\" (1). \"karate\": \"a\" (1).
|
||||
Thought 10: We now have: (1) [\"kauffman\" ? \"karate\"] for the subpart. Hence, we have \"calm\" < \"apocalypse\" < \"apostle\" < \"aunt\" < \"carboxy\" < \"dicta\" < [\"dishevel\" ? \"dicta\"] < \"ellen\" < \"gay\" < \"hog\" < [\"kauffman\" ? \"karate\"] < \"orifice\" < \"prowl\" < [\"tearful\" ? \"tote\"].
|
||||
Thought 11: Now let's sort this subpart [\"tearful\" ? \"tote\"] by looking at their second letters. The second letter: \"tearful\": \"e\" (5). \"tote\": \"o\" (15).
|
||||
Thought 12: We now have: (5) \"tearful\" < (15) \"tote\" for the subpart. Hence, we have \"calm\" < \"apocalypse\" < \"apostle\" < \"aunt\" < \"carboxy\" < \"dicta\" < [\"dishevel\" ? \"dicta\"] < \"ellen\" < \"gay\" < \"hog\" < [\"kauffman\" ? \"karate\"] < \"orifice\" < \"prowl\" < \"tearful\" < \"tote\".
|
||||
Thought 13: I have now sorted all the words. The answer is apocalypse apostle aunt carboxy calm dicta dishevel ellen gay hog kauffman karate orifice prowl tearful tote
|
||||
Q: Is there a mistake in this sequence? Write \"No\" if there are no mistakes, or the number N if there is a mistake in Thought N.
|
||||
|
||||
</example_1>
|
||||
<example_2>
|
||||
|
||||
You are an expert in word sorting. You will be provided with a list of words and the steps taken, in the form of thoughts, to arrange them in alphabetical order.
|
||||
Your job is to identify the first step that was a mistake in reasoning about the order of the words. This can be misunderstanding the order of
|
||||
the alphabet or getting the word or letter wrong or missing a word entirely.
|
||||
Q: Sort the following words alphabetically: List: shutdown beebread conveyor courtyard semantic aqueduct lacewing grown mummy infinitude tennessee taillight monarch neural aristocrat topheavy flagler deviate
|
||||
Thought 1: I should start by looking at the first letter of the words in the list. The first letter: \"shutdown\": \"s\" (19). \"beebread\": \"b\" (2). \"conveyor\": \"c\" (3). \"courtyard\": \"c\" (3). \"semantic\": \"s\" (19). \"aqueduct\": \"a\" (1). \"lacewing\": \"l\" (12). \"grown\": \"g\" (7). \"mummy\": \"m\" (13). \"infinitude\": \"i\" (9). \"tennessee\": \"t\" (20). \"taillight\": \"t\" (20). \"monarch\": \"m\" (13). \"neural\": \"n\" (14). \"aristocrat\": \"a\" (1). \"topheavy\": \"t\" (20). \"flagler\": \"f\" (6). \"deviate\": \"d\" (4).
|
||||
Thought 2: We now have: (1) [\"aristocrat\" ? \"aqueduct\"] < (2) \"beebread\" < (3) [\"conveyor\" ? \"courtyard\"] < (4) \"deviate\" < (6) \"flagler\" < (7) \"grown\" < (9) \"infinitude\" < (12) \"lacewing\" < (13) [\"mummy\" ? \"monarch\"] < (14) \"neural\" < (19) [\"semantic\" ? \"shutdown\"] < (20) [\"tennessee\" ? \"taillight\" ? \"topheavy\"].
|
||||
Thought 3: Now let's sort this subpart [\"aristocrat\" ? \"aqueduct\"] by looking at their second letters. The second letter: \"aristocrat\": \"r\" (18). \"aqueduct\": \"q\" (17).
|
||||
Thought 4: We now have: (17) \"aqueduct\" < (18) \"aristocrat\" for the subpart. Hence, we have \"aqueduct\" < \"aristocrat\" < \"beebread\" < [\"conveyor\" ? \"courtyard\"] < \"deviate\" < \"flagler\" < \"grown\" < \"infinitude\" < \"lacewing\" < [\"mummy\" ? \"monarch\"] < \"neural\" < [\"semantic\" ? \"shutdown\"] < [\"tennessee\" ? \"taillight\" ? \"topheavy\"].
|
||||
Thought 5: Now let's sort this subpart [\"conveyor\" ? \"courtyard\"] by looking at their second letters. The second letter: \"conveyor\": \"o\" (15). \"courtyard\": \"o\" (15).
|
||||
Thought 6: We now have: (15) [\"conveyor\" ? \"courtyard\"] for the subpart. Hence, we have \"aqueduct\" < \"aristocrat\" < \"beebread\" < [\"conveyor\" ? \"courtyard\"] < \"deviate\" < \"flagler\" < \"grown\" < \"infinitude\" < \"lacewing\" < [\"mummy\" ? \"monarch\"] < \"neural\" < [\"semantic\" ? \"shutdown\"] < [\"tennessee\" ? \"taillight\" ? \"topheavy\"].
|
||||
Thought 7: Now let's sort this subpart [\"mummy\" ? \"monarch\"] by looking at their second letters. The second letter: \"mummy\": \"u\" (21). \"monarch\": \"o\" (15).
|
||||
Thought 8: We now have: (15) \"monarch\" < (21) \"mummy\" for the subpart. Hence, we have \"aqueduct\" < \"aristocrat\" < \"beebread\" < [\"conveyor\" ? \"courtyard\"] < \"deviate\" < \"flagler\" < \"grown\" < \"infinitude\" < \"lacewing\" < \"monarch\" < \"mummy\" < \"neural\" < [\"semantic\" ? \"shutdown\"] < [\"tennessee\" ? \"taillight\" ? \"topheavy\"].
|
||||
Thought 9: Now let's sort this subpart [\"semantic\" ? \"shutdown\"] by looking at their second letters. The second letter: \"semantic\": \"e\" (5). \"shutdown\": \"h\" (8).
|
||||
Thought 10: We now have: (5) \"semantic\" < (8) \"shutdown\" for the subpart. Hence, we have \"aqueduct\" < \"aristocrat\" < \"beebread\" < [\"conveyor\" ? \"courtyard\"] < \"deviate\" < \"flagler\" < \"grown\" < \"infinitude\" < \"lacewing\" < \"monarch\" < \"mummy\" < \"neural\" < \"semantic\" < \"shutdown\" < [\"tennessee\" ? \"taillight\" ? \"topheavy\"].
|
||||
Thought 11: Now let's sort this subpart [\"tennessee\" ? \"taillight\" ? \"topheavy\"] by looking at their second letters. The second letter: \"tennessee\": \"e\" (5). \"taillight\": \"a\" (1). \"topheavy\": \"o\" (15).
|
||||
Thought 12: We now have: (1) \"taillight\" < (5) \"tennessee\" < (15) \"topheavy\" for the subpart. Hence, we have \"aqueduct\" < \"aristocrat\" < \"beebread\" < [\"conveyor\" ? \"courtyard\"] < \"deviate\" < \"flagler\" < \"grown\" < \"infinitude\" < \"lacewing\" < \"monarch\" < \"mummy\" < \"neural\" < \"semantic\" < \"shutdown\" < \"taillight\" < \"tennessee\" < \"topheavy\".
|
||||
Thought 13: I have now sorted all the words. The answer is aqueduct aristocrat beebread conveyor courtyard deviate flagler grown infinitude lacewing mummy monarch neural semantic shutdown taillight tennessee topheavy
|
||||
Q: Is there a mistake in this sequence? Write \"No\" if there are no mistakes, or the number N if there is a mistake in Thought N.
|
||||
|
||||
</example_2>
|
||||
<example_3>
|
||||
|
||||
You are an expert in word sorting. You will be provided with a list of words and the steps taken, in the form of thoughts, to arrange them in alphabetical order.
|
||||
Your job is to identify the first step that was a mistake in reasoning about the order of the words. This can be misunderstanding the order of
|
||||
the alphabet or getting the word or letter wrong or missing a word entirely.
|
||||
Q: Sort the following words alphabetically: List: balletomane brindisi mona secondhand blackstone effluvium beman swabby durward horse lament camille appetite mattock disputant
|
||||
Thought 1: I should start by looking at the first letter of the words in the list. The first letter: \"balletomane\": \"b\" (2). \"brindisi\": \"b\" (2). \"mona\": \"m\" (13). \"secondhand\": \"s\" (19). \"blackstone\": \"b\" (2). \"effluvium\": \"e\" (5). \"beman\": \"b\" (2). \"swabby\": \"s\" (19). \"durward\": \"d\" (4). \"horse\": \"h\" (8). \"lament\": \"l\" (12). \"camille\": \"c\" (3). \"appetite\": \"a\" (1). \"mattock\": \"m\" (13). \"disputant\": \"d\" (4).
|
||||
Thought 2: We now have: (1) \"appetite\" < (2) [\"balletomane\" ? \"brindisi\" ? \"blackstone\" ? \"beman\"] < (3) \"camille\" < (4) [\"durward\" ? \"disputant\"] < (5) \"effluvium\" < (8) \"horse\" < (12) \"lament\" < (13) [\"mona\" ? \"mattock\"] < (19) [\"secondhand\" ? \"swabby\"].
|
||||
Thought 3: Now let's sort this subpart [\"balletomane\" ? \"brindisi\" ? \"blackstone\" ? \"beman\"] by looking at their second letters. The second letter: \"balletomane\": \"a\" (1). \"brindisi\": \"r\" (18). \"blackstone\": \"l\" (12). \"beman\": \"e\" (5).
|
||||
Thought 4: We now have: (1) \"balletomane\" < (5) \"beman\" < (12) \"blackstone\" < (18) \"brindisi\" for the subpart. Hence, we have \"appetite\" < \"balletomane\" < \"beman\" < \"camille\" < [\"durward\" ? \"disputant\"] < \"effluvium\" < \"horse\" < \"lament\" < [\"mona\" ? \"mattock\"] < [\"secondhand\" ? \"swabby\"].
|
||||
Thought 5: Now let's sort this subpart [\"durward\" ? \"disputant\"] by looking at their second letters. The second letter: \"durward\": \"u\" (21). \"disputant\": \"i\" (9).
|
||||
Thought 6: We now have: (9) \"disputant\" < (21) \"durward\" for the subpart. Hence, we have \"appetite\" < \"balletomane\" < \"beman\" < \"camille\" < \"disputant\" < \"effluvium\" < \"horse\" < \"lament\" < [\"mona\" ? \"mattock\"] < [\"secondhand\" ? \"swabby\"] < \"durward\".
|
||||
Thought 7: Now let's sort this subpart [\"mona\" ? \"mattock\"] by looking at their second letters. The second letter: \"mona\": \"o\" (15). \"mattock\": \"a\" (1).
|
||||
Thought 8: We now have: (1) \"mattock\" < (15) \"mona\" for the subpart. Hence, we have \"appetite\" < \"balletomane\" < \"beman\" < \"camille\" < \"disputant\" < \"effluvium\" < \"horse\" < \"lament\" < \"mattock\" < [\"secondhand\" ? \"swabby\"] < \"durward\" < \"mona\".
|
||||
Thought 9: Now let's sort this subpart [\"secondhand\" ? \"swabby\"] by looking at their second letters. The second letter: \"secondhand\": \"e\" (5). \"swabby\": \"w\" (23).
|
||||
Thought 10: We now have: (5) \"secondhand\" < (23) \"swabby\" for the subpart. Hence, we have \"appetite\" < \"balletomane\" < \"beman\" < \"camille\" < \"disputant\" < \"effluvium\" < \"horse\" < \"lament\" < \"mattock\" < \"secondhand\" < \"swabby\" < \"durward\" < \"mona\".
|
||||
Thought 11: I have now sorted all the words. The answer is appetite balletomane beman camille disputant effluvium horse lament mattock mona secondhand swabby durward
|
||||
Q: Is there a mistake in this sequence? Write \"No\" if there are no mistakes, or the number N if there is a mistake in Thought N.
|
||||
|
||||
</example_3>
|
||||
<example_4>
|
||||
|
||||
You are an expert in word sorting. You will be provided with a list of words and the steps taken, in the form of thoughts, to arrange them in alphabetical order.
|
||||
Your job is to identify the first step that was a mistake in reasoning about the order of the words. This can be misunderstanding the order of
|
||||
the alphabet or getting the word or letter wrong or missing a word entirely.
|
||||
Q: Sort the following words alphabetically: List: fetal trilingual incredulous boulder ail ar deride right degas snowy verb origami penguin monsanto compute awash dyadic
|
||||
Thought 1: I should start by looking at the first letter of the words in the list. The first letter: \"fetal\": \"f\" (6). \"trilingual\": \"t\" (20). \"incredulous\": \"i\" (9). \"boulder\": \"b\" (2). \"ail\": \"a\" (1). \"ar\": \"a\" (1). \"deride\": \"d\" (4). \"right\": \"r\" (18). \"degas\": \"d\" (4). \"snowy\": \"s\" (19). \"verb\": \"v\" (22). \"origami\": \"o\" (15). \"penguin\": \"p\" (16). \"monsanto\": \"m\" (13). \"compute\": \"c\" (3). \"awash\": \"a\" (1). \"dyadic\": \"d\" (4).
|
||||
Thought 2: We now have: (1) [\"ail\" ? \"ar\" ? \"awash\"] < (2) \"boulder\" < (3) \"compute\" < (4) [\"deride\" ? \"degas\" ? \"dyadic\"] < (6) \"fetal\" < (9) \"incredulous\" < (13) \"monsanto\" < (15) \"origami\" < (16) \"penguin\" < (18) \"right\" < (19) \"snowy\" < (20) \"trilingual\" < (22) \"verb\".
|
||||
Thought 3: Now let's sort this subpart [\"ail\" ? \"ar\" ? \"awash\"] by looking at their second letters. The second letter: \"ail\": \"i\" (9). \"ar\": \"r\" (18). \"awash\": \"w\" (23).
|
||||
Thought 4: We now have: (9) \"ail\" < (18) \"ar\" < (23) \"awash\" for the subpart. Hence, we have [\"ail\" ? \"ar\" ? \"awash\"] < (2) \"boulder\" < (3) \"compute\" < (4) [\"deride\" ? \"degas\" ? \"dyadic\"] < (6) \"fetal\" < (9) \"incredulous\" < (13) \"monsanto\" < (15) \"origami\" < (16) \"penguin\" < (18) \"right\" < (19) \"snowy\" < (20) \"trilingual\" < (22) \"verb\".
|
||||
Thought 5: Now let's sort this subpart [\"deride\" ? \"degas\" ? \"dyadic\"] by looking at their second letters. The second letter: \"deride\": \"e\" (5). \"degas\": \"e\" (5). \"dyadic\": \"y\" (25).
|
||||
Thought 6: We now have: (5) [\"deride\" ? \"degas\"] < (25) \"dyadic\" for the subpart. Hence, we have [\"ail\" ? \"ar\" ? \"awash\"] < (2) \"boulder\" < (3) \"compute\" < (4) [\"deride\" ? \"degas\"] < (5) \"dyadic\" < (6) \"fetal\" < (9) \"incredulous\" < (13) \"monsanto\" < (15) \"origami\" < (16) \"penguin\" < (18) \"right\" < (19) \"snowy\" < (20) \"trilingual\" < (22) \"verb\".
|
||||
Thought 7: Now let's sort this subpart [\"ail\" ? \"ar\" ? \"awash\"] by looking at their third letters. The third letter: \"ail\": \"l\" (12). \"ar\": \"r\" (18). \"awash\": \"s\" (19).
|
||||
Thought 8: We now have: (12) \"ail\" < (18) \"ar\" < (19) \"awash\" for the subpart. Hence, we have \"ail\" < \"ar\" < \"awash\" < (2) \"boulder\" < (3) \"compute\" < (4) [\"deride\" ? \"degas\"] < (5) \"dyadic\" < (6) \"fetal\" < (9) \"incredulous\" < (13) \"monsanto\" < (15) \"origami\" < (16) \"penguin\" < (18) \"right\" < (19) \"snowy\" < (20) \"trilingual\" < (22) \"verb\".
|
||||
Thought 9: Now let's sort this subpart [\"deride\" ? \"degas\"] by looking at their third letters. The third letter: \"deride\": \"r\" (18). \"degas\": \"s\" (19).
|
||||
Thought 10: We now have: (18) \"deride\" < (19) \"degas\" for the subpart. Hence, we have \"ail\" < \"ar\" < \"awash\" < (2) \"boulder\" < (3) \"compute\" < (4) \"deride\" < (5) \"dyadic\" < (6) \"fetal\" < (9) \"incredulous\" < (13) \"monsanto\" < (15) \"origami\" < (16) \"penguin\" < (18) \"right\" < (19) \"snowy\" < (20) \"trilingual\" < (22) \"verb\".
|
||||
Thought 11: I have now sorted all the words. The answer is ail ar awash boulder compute deride degas dyadic fetal incredulous monsanto origami penguin right snowy trilingual verb
|
||||
Q: Is there a mistake in this sequence? Write \"No\" if there are no mistakes, or the number N if there is a mistake in Thought N.
|
||||
|
||||
</example_4>
|
||||
<example_5>
|
||||
|
||||
You are an expert in word sorting. You will be provided with a list of words and the steps taken, in the form of thoughts, to arrange them in alphabetical order.
|
||||
Your job is to identify the first step that was a mistake in reasoning about the order of the words. This can be misunderstanding the order of
|
||||
the alphabet or getting the word or letter wrong or missing a word entirely.
|
||||
Q: Sort the following words alphabetically: List: yves butt scribe seaworthy manumitted clapeyron lift blackguard reconnaissance agile woodcut lightfooted sawbelly meager geophysical wiseacre mathieu cognoscenti purposive flamboyant
|
||||
Thought 1: I should start by looking at the first letter of the words in the list. The first letter: \"yves\": \"y\" (25). \"butt\": \"b\" (2). \"scribe\": \"s\" (19). \"seaworthy\": \"s\" (19). \"manumitted\": \"m\" (13). \"clapeyron\": \"c\" (3). \"lift\": \"l\" (12). \"blackguard\": \"b\" (2). \"reconnaissance\": \"r\" (18). \"agile\": \"a\" (1). \"woodcut\": \"w\" (23). \"lightfooted\": \"l\" (12). \"sawbelly\": \"s\" (19). \"meager\": \"m\" (13). \"geophysical\": \"g\" (7). \"wiseacre\": \"w\" (23). \"mathieu\": \"m\" (13). \"cognoscenti\": \"c\" (3). \"purposive\": \"p\" (16). \"flamboyant\": \"f\" (6).
|
||||
Thought 2: We now have: (1) \"agile\" < (2) [\"butt\" ? \"blackguard\"] < (3) [\"clapeyron\" ? \"cognoscenti\"] < (6) \"flamboyant\" < (7) \"geophysical\" < (12) [\"lift\" ? \"lightfooted\"] < (13) [\"manumitted\" ? \"mathieu\"] < (16) \"purposive\" < (18) \"reconnaissance\" < (19) [\"scribe\" ? \"seaworthy\" ? \"sawbelly\"] < (23) [\"woodcut\" ? \"wiseacre\"] < (25) \"yves\".
|
||||
Thought 3: Now let's sort this subpart [\"butt\" ? \"blackguard\"] by looking at their second letters. The second letter: \"butt\": \"u\" (21). \"blackguard\": \"l\" (12).
|
||||
Thought 4: We now have: (12) \"blackguard\" < (21) \"butt\" for the subpart. Hence, we have \"agile\" < \"blackguard\" < \"butt\" < [\"clapeyron\" ? \"cognoscenti\"] < \"flamboyant\" < \"geophysical\" < [\"lift\" ? \"lightfooted\"] < [\"manumitted\" ? \"mathieu\"] < \"purposive\" < \"reconnaissance\" < [\"scribe\" ? \"seaworthy\" ? \"sawbelly\"] < [\"woodcut\" ? \"wiseacre\"] < \"yves\".
|
||||
Thought 5: Now let's sort this subpart [\"clapeyron\" ? \"cognoscenti\"] by looking at their second letters. The second letter: \"clapeyron\": \"l\" (12). \"cognoscenti\": \"o\" (15).
|
||||
Thought 6: We now have: (12) \"clapeyron\" < (15) \"cognoscenti\" for the subpart. Hence, we have \"agile\" < \"blackguard\" < \"butt\" < \"clapeyron\" < \"cognoscenti\" < \"flamboyant\" < \"geophysical\" < [\"lift\" ? \"lightfooted\"] < [\"manumitted\" ? \"mathieu\"] < \"purposive\" < \"reconnaissance\" < [\"scribe\" ? \"seaworthy\" ? \"sawbelly\"] < [\"woodcut\" ? \"wiseacre\"] < \"yves\".
|
||||
Thought 7: Now let's sort this subpart [\"lift\" ? \"lightfooted\"] by looking at their second letters. The second letter: \"lift\": \"i\" (9). \"lightfooted\": \"i\" (9).
|
||||
Thought 8: We now have: (9) [\"lift\" ? \"lightfooted\"] for the subpart. Hence, we have \"agile\" < \"blackguard\" < \"butt\" < \"clapeyron\" < \"cognoscenti\" < \"flamboyant\" < \"geophysical\" < [\"lift\" ? \"lightfooted\"] < [\"manumitted\" ? \"mathieu\"] < \"purposive\" < \"reconnaissance\" < [\"scribe\" ? \"seaworthy\" ? \"sawbelly\"] < [\"woodcut\" ? \"wiseacre\"] < \"yves\".
|
||||
Thought 9: Now let's sort this subpart [\"manumitted\" ? \"mathieu\"] by looking at their second letters. The second letter: \"manumitted\": \"a\" (1). \"mathieu\": \"a\" (1).
|
||||
Thought 10: We now have: (1) [\"manumitted\" ? \"mathieu\"] for the subpart. Hence, we have \"agile\" < \"blackguard\" < \"butt\" < \"clapeyron\" < \"cognoscenti\" < \"flamboyant\" < \"geophysical\" < [\"lift\" ? \"lightfooted\"] < [\"manumitted\" ? \"mathieu\"] < \"purposive\" < \"reconnaissance\" < [\"scribe\" ? \"seaworthy\" ? \"sawbelly\"] < [\"woodcut\" ? \"wiseacre\"] < \"yves\".
|
||||
Thought 11: Now let's sort this subpart [\"scribe\" ? \"seaworthy\" ? \"sawbelly\"] by looking at their second letters. The second letter: \"scribe\": \"c\" (3). \"seaworthy\": \"e\" (5). \"sawbelly\": \"a\" (1).
|
||||
Thought 12: We now have: (1) \"sawbelly\" < (3) \"scribe\" < (5) \"seaworthy\" for the subpart. Hence, we have \"agile\" < \"blackguard\" < \"butt\" < \"clapeyron\" < \"cognoscenti\" < \"flamboyant\" < \"geophysical\" < [\"lift\" ? \"lightfooted\"] < [\"manumitted\" ? \"mathieu\"] < \"purposive\" < \"reconnaissance\" < \"sawbelly\" < \"scribe\" < \"seaworthy\" < [\"woodcut\" ? \"wiseacre\"] < \"yves\".
|
||||
Thought 13: Now let's sort this subpart [\"woodcut\" ? \"wiseacre\"] by looking at their second letters. The second letter: \"woodcut\": \"o\" (15). \"wiseacre\": \"i\" (9).
|
||||
Thought 14: We now have: (9) \"wiseacre\" < (15) \"woodcut\" for the subpart. Hence, we have \"agile\" < \"blackguard\" < \"butt\" < \"clapeyron\" < \"cognoscenti\" < \"flamboyant\" < \"geophysical\" < [\"lift\" ? \"lightfooted\"] < [\"manumitted\" ? \"mathieu\"] < \"purposive\" < \"reconnaissance\" < \"sawbelly\" < \"scribe\" < \"seaworthy\" < \"wiseacre\" < \"woodcut\" < \"yves\".
|
||||
Thought 15: Now let's sort this subpart [\"lift\" ? \"lightfooted\"] by looking at their third letters. The third letter: \"lift\": \"f\" (6). \"lightfooted\": \"t\" (20).
|
||||
Thought 16: We now have: (6) \"lift\" < (20) \"lightfooted\" for the subpart. Hence, we have \"agile\" < \"blackguard\" < \"butt\" < \"clapeyron\" < \"cognoscenti\" < \"flamboyant\" < \"geophysical\" < \"lift\" < \"lightfooted\" < [\"manumitted\" ? \"mathieu\"] < \"purposive\" < \"reconnaissance\" < \"sawbelly\" < \"scribe\" < \"seaworthy\" < \"wiseacre\" < \"woodcut\" < \"yves\".
|
||||
Thought 17: Now let's sort this subpart [\"manumitted\" ? \"mathieu\"] by looking at their third letters. The third letter: \"manumitted\": \"n\" (14). \"mathieu\": \"t\" (20).
|
||||
Thought 18: We now have: (14) \"manumitted\" < (20) \"mathieu\" for the subpart. Hence, we have \"agile\" < \"blackguard\" < \"butt\" < \"clapeyron\" < \"cognoscenti\" < \"flamboyant\" < \"geophysical\" < \"lift\" < \"lightfooted\" < \"manumitted\" < \"mathieu\" < \"purposive\" < \"reconnaissance\" < \"sawbelly\" < \"scribe\" < \"seaworthy\" < \"wiseacre\" < \"woodcut\" < \"yves\".
|
||||
Thought 19: I have now sorted all the words. The answer is agile blackguard butt clapeyron cognoscenti flamboyant geophysical lift lightfooted manumitted mathieu meager purposive reconnaissance sawbelly scribe seaworthy woodcut wiseacre yves
|
||||
Q: Is there a mistake in this sequence? Write \"No\" if there are no mistakes, or the number N if there is a mistake in Thought N.
|
||||
|
||||
</example_5>
|
||||
|
||||
|
||||
请完成上述谜题的训练场环境类实现,包括所有必要的方法。
|
||||
"""
|
||||
|
||||
from internbootcamp.bootcamp.base import Basebootcamp
|
||||
import re
|
||||
import random
|
||||
import json
|
||||
import string
|
||||
# from nltk.corpus import words as whole_words
|
||||
|
||||
|
||||
class BbehWordSortingbootcamp(Basebootcamp):
|
||||
SUPPORTED_SUBTASKS = ["error_detection", "pure_sort"]
|
||||
SORT_ORDER = ["normal_order", "custom_sorting"]
|
||||
MODES = ["normal", "reverse"]
|
||||
|
||||
def __init__(self, **params):
|
||||
super().__init__(**params)
|
||||
self.max_word_list_size = params.get("max_word_list_size", 16)
|
||||
self.min_word_list_size = params.get("min_word_list_size", 8)
|
||||
self.subtask_ratio = params.get("subtask_ratio", 0.5) # 子任务1占比
|
||||
self.mode_ratio = params.get("mode_ratio", 0.5)
|
||||
self.order_ratio = params.get("order_ratio", 0.5)
|
||||
# self.alphabet = self._generate_alphabet(params.get('letter_swaps', self.LETTER_SWAPS))
|
||||
internbootcamp_path = "/".join(__file__.split('/')[:-3])
|
||||
with open(f'{internbootcamp_path}/libs/data/words_alpha_370000.txt', 'r') as f:
|
||||
words = f.readlines()
|
||||
words = [w.strip() for w in words]
|
||||
self.word_pool = words
|
||||
|
||||
|
||||
def case_generator(self):
|
||||
"""生成包含两种子任务的案例"""
|
||||
subtask = random.choices(
|
||||
self.SUPPORTED_SUBTASKS,
|
||||
weights=[self.subtask_ratio, 1 - self.subtask_ratio],
|
||||
k=1,
|
||||
)[0]
|
||||
enable_error = True if subtask == "error_detection" else False
|
||||
mode = random.choices(
|
||||
self.MODES, weights=[self.mode_ratio, 1 - self.mode_ratio], k=1
|
||||
)[0]
|
||||
sort_order = random.choice(self.SORT_ORDER)
|
||||
|
||||
custom_order = None
|
||||
if sort_order == "custom_sorting":
|
||||
letters = list(string.ascii_lowercase)
|
||||
random.shuffle(letters)
|
||||
custom_order = letters
|
||||
|
||||
word_list_size = random.randint(
|
||||
self.min_word_list_size, self.max_word_list_size
|
||||
)
|
||||
words = random.sample(self.word_pool, word_list_size)
|
||||
words = [w.lower() for w in words]
|
||||
|
||||
meta_info = self._verbal_sort_words(
|
||||
word_list=words,
|
||||
mode=mode,
|
||||
custom_order=custom_order,
|
||||
enable_error=enable_error,
|
||||
)
|
||||
if subtask == "error_detection":
|
||||
while len(meta_info["thoughts"]) < word_list_size:
|
||||
words = random.sample(self.word_pool, word_list_size)
|
||||
words = [w.lower() for w in words]
|
||||
meta_info = self._verbal_sort_words(
|
||||
word_list=words,
|
||||
mode=mode,
|
||||
custom_order=custom_order,
|
||||
enable_error=enable_error,
|
||||
)
|
||||
|
||||
meta_info["words"] = words
|
||||
meta_info["subtask"] = subtask
|
||||
meta_info["mode"] = mode
|
||||
meta_info["sort_order"] = sort_order
|
||||
meta_info["custom_order"] = custom_order
|
||||
return meta_info
|
||||
|
||||
@staticmethod
|
||||
def prompt_func(case) -> str:
|
||||
|
||||
prompt = ""
|
||||
special_requirements = ""
|
||||
|
||||
if case["custom_order"] is not None:
|
||||
special_requirements += f"""
|
||||
You need to sort the words according to a custom alphabet. The custom order is: {', '.join(case['custom_order'])}
|
||||
"""
|
||||
|
||||
if case["mode"] == "reverse":
|
||||
special_requirements += f"""
|
||||
Add a new sorting rule. When sorting the words, you need to sort the letters in reverse order, comparing from right to left instead of left to right.
|
||||
"""
|
||||
if special_requirements != "":
|
||||
special_requirements = "There are some special requirements:\n" + special_requirements
|
||||
if case["subtask"] == "error_detection":
|
||||
prompt = """
|
||||
You are an expert in word sorting. You will be provided with a list of words and the steps taken, in the form of thoughts.
|
||||
Your job is to identify the first step that was a mistake in reasoning about the order of the words. This can be misunderstanding the order of
|
||||
the alphabet or getting the word or letter wrong or missing a word entirely.
|
||||
"""
|
||||
prompt += f"""Q: Sort the following words: List: {" ".join(case['words'])}
|
||||
{special_requirements}
|
||||
{chr(10).join(f"Thought {i+1}: {t}" for i, t in enumerate(case['thoughts']))}
|
||||
|
||||
Q: Is there a mistake in this sequence? Write \"No\" if there are no mistakes, or the number N if there is a mistake in Thought N.
|
||||
Please reason step by step and put your final answer within [answer] [/answer] tags.
|
||||
"""
|
||||
else:
|
||||
prompt = f"""Sort the following words with the new alphabet and separate them with comma: {", ".join(case['words'])}
|
||||
{special_requirements}
|
||||
Format your final answer as: [answer]<comma-separated words>[/answer].
|
||||
Please reason step by step and put your final answer within [answer] [/answer] tags."""
|
||||
|
||||
return prompt
|
||||
|
||||
@staticmethod
|
||||
def extract_output(output):
|
||||
"""提取两种子任务的答案"""
|
||||
answer_match = re.search(r"\[answer\](.*?)\[/answer\]", output, re.DOTALL)
|
||||
if not answer_match:
|
||||
return None
|
||||
|
||||
answer = answer_match.group(1).strip()
|
||||
|
||||
# 尝试解析为错误步骤编号
|
||||
if answer.lower() == "no":
|
||||
return "No"
|
||||
try:
|
||||
return int(answer)
|
||||
except ValueError:
|
||||
# 可能是子任务2的列表答案
|
||||
try:
|
||||
return json.loads(answer)
|
||||
except:
|
||||
return [w.strip() for w in answer.split(",")]
|
||||
|
||||
@classmethod
|
||||
def _verify_correction(cls, solution, case):
|
||||
"""根据子任务类型验证答案"""
|
||||
if case["subtask"] == "error_detection":
|
||||
expected = case["first_error_step"]
|
||||
return str(solution) == str(expected)
|
||||
else:
|
||||
# 验证排序结果与自定义字母表一致
|
||||
try:
|
||||
expected = case["sorted_words"]
|
||||
return solution == expected
|
||||
except:
|
||||
return False
|
||||
|
||||
def _verbal_sort_words(
|
||||
self, word_list, mode="normal", custom_order=None, enable_error=True
|
||||
):
|
||||
if custom_order is not None:
|
||||
key_map = {char: i for i, char in enumerate(custom_order)}
|
||||
key_map[""] = -1
|
||||
assert mode in ["normal", "reverse"], "Sort mode should in [normal, reverse]!"
|
||||
|
||||
def get_key(word, depth):
|
||||
if mode == "normal": # 正序
|
||||
return word[depth] if len(word) > depth else ""
|
||||
elif mode == "reverse": # 反序
|
||||
return word[-(depth + 1)] if len(word) > depth else ""
|
||||
|
||||
def verbal_sort_step(words, depth=0):
|
||||
if len(words) <= 1:
|
||||
return words
|
||||
|
||||
# 1. 提取当前字符进行分组
|
||||
grouped = {}
|
||||
reverve_group = {}
|
||||
for word in words:
|
||||
# generate error
|
||||
# missing the letter
|
||||
if enable_error and random.random() < 0.02:
|
||||
if verbal_sort_step.first_error_step is None:
|
||||
verbal_sort_step.first_error_step = verbal_sort_step.thought_count
|
||||
continue
|
||||
key = get_key(word, depth)
|
||||
if enable_error and random.random() < 0.02:
|
||||
error_key = random.choice(word)
|
||||
if error_key != key:
|
||||
key = error_key
|
||||
if verbal_sort_step.first_error_step is None:
|
||||
verbal_sort_step.first_error_step = verbal_sort_step.thought_count
|
||||
grouped.setdefault(key, []).append(word)
|
||||
reverve_group[word] = key
|
||||
# 2. 输出当前分组情况
|
||||
try:
|
||||
sorted_keys = sorted(
|
||||
grouped.keys(),
|
||||
key=lambda k: key_map[k] if custom_order and k in key_map else k,
|
||||
)
|
||||
except Exception as e:
|
||||
#
|
||||
pass
|
||||
|
||||
|
||||
verbal_letters = ""
|
||||
verbal_letters += f"The {number_to_ordinal(depth + 1)} letter: "
|
||||
for word in reverve_group.keys():
|
||||
verbal_letters += f"{word}: {reverve_group[word]}. "
|
||||
correct_sorted_keyes = sorted_keys.copy()
|
||||
|
||||
special_prompt = "" if mode == "normal" else " from right to left"
|
||||
if verbal_sort_step.thought_count == 1:
|
||||
thought = f"I should start by looking at the first letter of the words in the list. "
|
||||
else:
|
||||
thought = f"Now let's sort this subpart {words}. Looking at the {number_to_ordinal(depth + 1)} letters{special_prompt}."
|
||||
thought += verbal_letters
|
||||
verbal_sort_step.thought_list.append(thought)
|
||||
verbal_sort_step.thought_count += 1
|
||||
|
||||
# 3. 更新全局状态并输出
|
||||
new_global_state = []
|
||||
global_state = verbal_sort_step.global_state.copy()
|
||||
for group in global_state:
|
||||
if set(group) == set(words):
|
||||
for key in sorted_keys:
|
||||
new_global_state.append(grouped[key])
|
||||
else:
|
||||
new_global_state.append(group)
|
||||
|
||||
# 随机生成错误
|
||||
if enable_error and random.random() < 0.2: # 20% 的概率生成错误
|
||||
# 错误类型1: 错误的比较逻辑
|
||||
rnd = random.random()
|
||||
if rnd < 0.3:
|
||||
sorted_keys = sorted(
|
||||
grouped.keys(),
|
||||
reverse=True,
|
||||
key=lambda k: (
|
||||
key_map[k] if custom_order and k in key_map else k
|
||||
),
|
||||
) # 错误地反向排序
|
||||
elif rnd < 0.7:
|
||||
random.shuffle(sorted_keys)
|
||||
else:
|
||||
sorted_keys = list(grouped.keys()) # 错误地不排序
|
||||
if (
|
||||
verbal_sort_step.first_error_step is None
|
||||
and sorted_keys != correct_sorted_keyes
|
||||
):
|
||||
verbal_sort_step.first_error_step = verbal_sort_step.thought_count
|
||||
|
||||
# 随机生成错误
|
||||
if enable_error and random.random() < 0.2: # 20% 的概率生成错误
|
||||
# 错误类型2: 错误的全局状态更新
|
||||
# 随机打乱global state
|
||||
global_state = new_global_state
|
||||
random.shuffle(global_state)
|
||||
if verbal_sort_step.first_error_step is None:
|
||||
verbal_sort_step.first_error_step = verbal_sort_step.thought_count
|
||||
else:
|
||||
global_state = new_global_state
|
||||
verbal_sort_step.global_state = global_state
|
||||
|
||||
thought = f"We now have: "
|
||||
thought += " < ".join(
|
||||
[
|
||||
f"{grouped[k] if len(grouped[k]) > 1 else grouped[k][0]}"
|
||||
for k in sorted_keys
|
||||
]
|
||||
)
|
||||
if verbal_sort_step.thought_count != 2:
|
||||
thought += "for the subpart. Hence, we have "
|
||||
thought += " < ".join(
|
||||
[f"{group if len(group) > 1 else group[0]}" for group in global_state]
|
||||
)
|
||||
|
||||
verbal_sort_step.thought_list.append(thought)
|
||||
verbal_sort_step.thought_count += 1
|
||||
|
||||
# 4. 递归对子序列排序
|
||||
sorted_words = []
|
||||
for key in sorted_keys:
|
||||
new_words = verbal_sort_step(grouped[key], depth + 1)
|
||||
sorted_words.extend(new_words)
|
||||
|
||||
return sorted_words
|
||||
|
||||
verbal_sort_step.thought_count = 1
|
||||
verbal_sort_step.thought_list = []
|
||||
verbal_sort_step.first_error_step = None
|
||||
verbal_sort_step.global_state = [word_list.copy()]
|
||||
sorted_words = verbal_sort_step(word_list)
|
||||
answer_thought = "I have now sorted all the words. The answer is " + " ".join(sorted_words)
|
||||
verbal_sort_step.thought_list.append(answer_thought)
|
||||
return {
|
||||
"thoughts": verbal_sort_step.thought_list,
|
||||
"first_error_step": (
|
||||
"No"
|
||||
if verbal_sort_step.first_error_step is None
|
||||
else verbal_sort_step.first_error_step
|
||||
),
|
||||
"sorted_words": sorted_words,
|
||||
}
|
||||
|
||||
|
||||
def number_to_ordinal(n):
|
||||
ordinals = {
|
||||
1: "first",
|
||||
2: "second",
|
||||
3: "third",
|
||||
4: "fourth",
|
||||
5: "fifth",
|
||||
6: "sixth",
|
||||
7: "seventh",
|
||||
8: "eighth",
|
||||
9: "ninth",
|
||||
10: "tenth"
|
||||
}
|
||||
|
||||
return ordinals.get(n, f"{n}th")
|
||||
|
||||
if __name__ == "__main__":
|
||||
bootcamp = BbehWordSortingbootcamp()
|
||||
case = bootcamp.case_generator()
|
||||
print(case)
|
||||
prompt = bootcamp.prompt_func(case)
|
||||
print(f"prompt: {prompt}")
|
||||
|
||||
if case["subtask"] == "error_detection":
|
||||
answer = case["first_error_step"]
|
||||
else:
|
||||
answer = ", ".join(case["sorted_words"])
|
||||
answer = f"[answer]{answer}[/answer]"
|
||||
print(f"answer: {answer}")
|
||||
|
||||
extracted_output = bootcamp.extract_output(answer)
|
||||
print(f"extracted_output: {extracted_output}")
|
||||
|
||||
verify_result = bootcamp.verify_score(answer, case)
|
||||
print(f"verify_result: {verify_result}")
|
||||
Loading…
Add table
Add a link
Reference in a new issue