refactor(data_pipeline): optimize data generation pipeline; add multiple preset configurations for data generation

This commit is contained in:
chenyongkang 2025-08-26 18:40:21 +08:00
parent 1a8477c8d8
commit 8d493b35a0
2160 changed files with 69199 additions and 154 deletions

View file

@ -0,0 +1,32 @@
[
{
"difficulty": "easy",
"timeout": 30,
"language": "zh"
},
{
"difficulty": "medium",
"timeout": 30,
"language": "zh"
},
{
"difficulty": "hard",
"timeout": 30,
"language": "zh"
},
{
"difficulty": "easy",
"timeout": 30,
"language": "en"
},
{
"difficulty": "medium",
"timeout": 30,
"language": "en"
},
{
"difficulty": "hard",
"timeout": 30,
"language": "en"
}
]

View file

@ -0,0 +1,32 @@
[
{
"difficulty": "easy",
"timeout": 30,
"language": "zh"
},
{
"difficulty": "medium",
"timeout": 30,
"language": "zh"
},
{
"difficulty": "hard",
"timeout": 30,
"language": "zh"
},
{
"difficulty": "easy",
"timeout": 30,
"language": "en"
},
{
"difficulty": "medium",
"timeout": 30,
"language": "en"
},
{
"difficulty": "hard",
"timeout": 30,
"language": "en"
}
]

View file

@ -0,0 +1,80 @@
[
{
"apply_prob": 0.5,
"methods": [
"A",
"B",
"C"
],
"subjects": [
"natural numbers",
"products",
"college students",
"true revolutionaries",
"formally correct reasoning"
],
"predicates": [
"rational numbers",
"top-grade",
"patriots",
"upright and open",
"tested by practice"
]
},
{
"apply_prob": 0.5,
"methods": [
"A",
"B"
],
"subjects": [
"natural numbers",
"products",
"true revolutionaries"
],
"predicates": [
"rational numbers",
"top-grade",
"upright and open"
]
},
{
"apply_prob": 0.7,
"methods": [
"A",
"B",
"C"
],
"subjects": [
"natural numbers",
"products",
"college students",
"true revolutionaries",
"formally correct reasoning"
],
"predicates": [
"rational numbers",
"top-grade",
"patriots",
"upright and open",
"tested by practice"
]
},
{
"apply_prob": 0.6,
"methods": [
"B",
"C"
],
"subjects": [
"college students",
"true revolutionaries",
"formally correct reasoning"
],
"predicates": [
"patriots",
"upright and open",
"tested by practice"
]
}
]

View file

@ -0,0 +1,80 @@
[
{
"apply_prob": 0.5,
"methods": [
"A",
"B",
"C"
],
"subjects": [
"natural numbers",
"products",
"college students",
"true revolutionaries",
"formally correct reasoning"
],
"predicates": [
"rational numbers",
"top-grade",
"patriots",
"upright and open",
"tested by practice"
]
},
{
"apply_prob": 0.5,
"methods": [
"A",
"B"
],
"subjects": [
"natural numbers",
"products",
"true revolutionaries"
],
"predicates": [
"rational numbers",
"top-grade",
"upright and open"
]
},
{
"apply_prob": 0.7,
"methods": [
"A",
"B",
"C"
],
"subjects": [
"natural numbers",
"products",
"college students",
"true revolutionaries",
"formally correct reasoning"
],
"predicates": [
"rational numbers",
"top-grade",
"patriots",
"upright and open",
"tested by practice"
]
},
{
"apply_prob": 0.6,
"methods": [
"B",
"C"
],
"subjects": [
"college students",
"true revolutionaries",
"formally correct reasoning"
],
"predicates": [
"patriots",
"upright and open",
"tested by practice"
]
}
]

View file

@ -0,0 +1,22 @@
[
{
"max_variables": 3,
"problem_type": "classify",
"difficulty_level": "easy"
},
{
"max_variables": 2,
"problem_type": "classify",
"difficulty_level": "easy"
},
{
"max_variables": 3,
"problem_type": "denote",
"difficulty_level": "medium"
},
{
"max_variables": 4,
"problem_type": "classify",
"difficulty_level": "hard"
}
]

View file

@ -0,0 +1,22 @@
[
{
"max_variables": 3,
"problem_type": "classify",
"difficulty_level": "easy"
},
{
"max_variables": 2,
"problem_type": "classify",
"difficulty_level": "easy"
},
{
"max_variables": 3,
"problem_type": "denote",
"difficulty_level": "medium"
},
{
"max_variables": 4,
"problem_type": "classify",
"difficulty_level": "hard"
}
]

View file

@ -0,0 +1,26 @@
[
{
"max_attempts": 1,
"seed": 2,
"difficulty": "easy"
},
{
"max_attempts": 4,
"seed": 123,
"difficulty": "hard"
},
{
"max_attempts": 3,
"seed": 0,
"difficulty": "medium"
},
{
"max_attempts": 1,
"seed": 999
},
{
"max_attempts": 5,
"seed": 1,
"difficulty": "hard"
}
]

View file

@ -0,0 +1,26 @@
[
{
"max_attempts": 1,
"seed": 2,
"difficulty": "easy"
},
{
"max_attempts": 4,
"seed": 123,
"difficulty": "hard"
},
{
"max_attempts": 3,
"seed": 0,
"difficulty": "medium"
},
{
"max_attempts": 1,
"seed": 999
},
{
"max_attempts": 5,
"seed": 1,
"difficulty": "hard"
}
]

View file

@ -0,0 +1,54 @@
[
{
"question_types": [
"expression_conversion",
"equivalence_validation",
"expression_simplification"
],
"variables": [
"a",
"b",
"c"
],
"cities": [
"Beijing",
"Guangzhou",
"Shenzhen"
]
},
{
"question_types": [
"expression_conversion",
"equivalence_validation",
"puzzle_solution"
],
"variables": [
"p",
"q",
"r"
],
"cities": [
"Suzhou",
"Shanghai",
"Hangzhou"
]
},
{
"question_types": [
"expression_conversion",
"truth_table_completion",
"puzzle_solution"
],
"variables": [
"p",
"q",
"r",
"s"
],
"cities": [
"Shanghai",
"Beijing",
"Guangzhou"
]
}
]

View file

@ -0,0 +1,54 @@
[
{
"question_types": [
"expression_conversion",
"equivalence_validation",
"expression_simplification"
],
"variables": [
"a",
"b",
"c"
],
"cities": [
"Beijing",
"Guangzhou",
"Shenzhen"
]
},
{
"question_types": [
"expression_conversion",
"equivalence_validation",
"puzzle_solution"
],
"variables": [
"p",
"q",
"r"
],
"cities": [
"Suzhou",
"Shanghai",
"Hangzhou"
]
},
{
"question_types": [
"expression_conversion",
"truth_table_completion",
"puzzle_solution"
],
"variables": [
"p",
"q",
"r",
"s"
],
"cities": [
"Shanghai",
"Beijing",
"Guangzhou"
]
}
]

View file

@ -0,0 +1,20 @@
[
{
"enable_fill": true,
"seed": 42,
"difficulty": "easy",
"max_cases": 10
},
{
"enable_fill": false,
"seed": 888,
"difficulty": "easy",
"max_cases": 20
},
{
"enable_fill": true,
"seed": 777,
"difficulty": "hard",
"max_cases": 15
}
]

View file

@ -0,0 +1,20 @@
[
{
"enable_fill": true,
"seed": 42,
"difficulty": "easy",
"max_cases": 10
},
{
"enable_fill": false,
"seed": 888,
"difficulty": "easy",
"max_cases": 20
},
{
"enable_fill": true,
"seed": 777,
"difficulty": "hard",
"max_cases": 15
}
]

View file

@ -0,0 +1,40 @@
[
{
"fallacy_groups": [
"advanced"
],
"max_complexity": 5,
"template_variations": 5,
"include_analysis": true,
"shuffle_options": true
},
{
"fallacy_groups": [
"intermediate"
],
"max_complexity": 2,
"template_variations": 3,
"include_analysis": false,
"shuffle_options": false
},
{
"fallacy_groups": [
"basic"
],
"max_complexity": 1,
"template_variations": 1,
"include_analysis": false,
"shuffle_options": true
},
{
"fallacy_groups": [
"basic",
"intermediate",
"advanced"
],
"max_complexity": 3,
"template_variations": 2,
"include_analysis": true,
"shuffle_options": false
}
]

View file

@ -0,0 +1,40 @@
[
{
"fallacy_groups": [
"advanced"
],
"max_complexity": 5,
"template_variations": 5,
"include_analysis": true,
"shuffle_options": true
},
{
"fallacy_groups": [
"intermediate"
],
"max_complexity": 2,
"template_variations": 3,
"include_analysis": false,
"shuffle_options": false
},
{
"fallacy_groups": [
"basic"
],
"max_complexity": 1,
"template_variations": 1,
"include_analysis": false,
"shuffle_options": true
},
{
"fallacy_groups": [
"basic",
"intermediate",
"advanced"
],
"max_complexity": 3,
"template_variations": 2,
"include_analysis": true,
"shuffle_options": false
}
]

View file

@ -0,0 +1,74 @@
[
{
"s_list": [
"metals",
"products",
"students",
"mammals",
"pencils",
"stars",
"individual businesses"
],
"p_list": [
"conductive",
"qualified",
"like mathematics",
"warm-blooded animals",
"pens",
"planets",
"paid taxes"
],
"problem_types": [
"components",
"proposition_type",
"relationship_exists",
"relationship_type",
"truth_value"
]
},
{
"s_list": [
"students",
"animals",
"fruits",
"countries",
"books",
"cities"
],
"p_list": [
"happy",
"furry",
"sweet",
"large",
"interesting",
"populated"
],
"problem_types": [
"components",
"proposition_type",
"relationship_exists",
"truth_value"
]
},
{
"s_list": [
"plants",
"rocks",
"mountains",
"oceans",
"stars"
],
"p_list": [
"green",
"hard",
"tall",
"deep",
"bright"
],
"problem_types": [
"proposition_type",
"relationship_exists",
"truth_value"
]
}
]

View file

@ -0,0 +1,74 @@
[
{
"s_list": [
"metals",
"products",
"students",
"mammals",
"pencils",
"stars",
"individual businesses"
],
"p_list": [
"conductive",
"qualified",
"like mathematics",
"warm-blooded animals",
"pens",
"planets",
"paid taxes"
],
"problem_types": [
"components",
"proposition_type",
"relationship_exists",
"relationship_type",
"truth_value"
]
},
{
"s_list": [
"students",
"animals",
"fruits",
"countries",
"books",
"cities"
],
"p_list": [
"happy",
"furry",
"sweet",
"large",
"interesting",
"populated"
],
"problem_types": [
"components",
"proposition_type",
"relationship_exists",
"truth_value"
]
},
{
"s_list": [
"plants",
"rocks",
"mountains",
"oceans",
"stars"
],
"p_list": [
"green",
"hard",
"tall",
"deep",
"bright"
],
"problem_types": [
"proposition_type",
"relationship_exists",
"truth_value"
]
}
]

View file

@ -0,0 +1,26 @@
[
{
"problem_type_ratio": 0.3
},
{
"problem_type_ratio": 1.0
},
{
"problem_type_ratio": 0.8
},
{
"problem_type_ratio": 0.7
},
{
"problem_type_ratio": 0.0
},
{
"problem_type_ratio": 0.9
},
{
"problem_type_ratio": 0.1
},
{
"problem_type_ratio": 0.4
}
]

View file

@ -0,0 +1,26 @@
[
{
"problem_type_ratio": 0.3
},
{
"problem_type_ratio": 1.0
},
{
"problem_type_ratio": 0.8
},
{
"problem_type_ratio": 0.7
},
{
"problem_type_ratio": 0.0
},
{
"problem_type_ratio": 0.9
},
{
"problem_type_ratio": 0.1
},
{
"problem_type_ratio": 0.4
}
]

View file

@ -0,0 +1,70 @@
[
{
"symbolize_prob": 0.35,
"relationship_prob": 0.35,
"formula_inference_prob": 0.3,
"action_words": [
"study computer science",
"write novels",
"play piano",
"keep on painting"
],
"subjects": [
"Xiao Jin",
"Xiao Qian",
"Wang Qiang",
"Lin Min"
]
},
{
"symbolize_prob": 0.4,
"relationship_prob": 0.4,
"formula_inference_prob": 0.2,
"action_words": [
"work in Beijing",
"study abroad",
"settle permanently",
"win the championship"
],
"subjects": [
"Wang Qiang",
"Xiao Lin",
"Dr Lee",
"Old Zhao"
]
},
{
"symbolize_prob": 0.25,
"relationship_prob": 0.45,
"formula_inference_prob": 0.3,
"action_words": [
"work on farms",
"teach mathematics",
"travel abroad",
"keep on painting"
],
"subjects": [
"Old Zhao",
"Dr Lee",
"Xiao Bai",
"I"
]
},
{
"symbolize_prob": 0.45,
"relationship_prob": 0.25,
"formula_inference_prob": 0.3,
"action_words": [
"work diligently",
"settle in the US",
"invest in stocks",
"win the championship"
],
"subjects": [
"Mr Chen",
"Dr Lee",
"Xiao Lin",
"Old Zhao"
]
}
]

View file

@ -0,0 +1,70 @@
[
{
"symbolize_prob": 0.35,
"relationship_prob": 0.35,
"formula_inference_prob": 0.3,
"action_words": [
"study computer science",
"write novels",
"play piano",
"keep on painting"
],
"subjects": [
"Xiao Jin",
"Xiao Qian",
"Wang Qiang",
"Lin Min"
]
},
{
"symbolize_prob": 0.4,
"relationship_prob": 0.4,
"formula_inference_prob": 0.2,
"action_words": [
"work in Beijing",
"study abroad",
"settle permanently",
"win the championship"
],
"subjects": [
"Wang Qiang",
"Xiao Lin",
"Dr Lee",
"Old Zhao"
]
},
{
"symbolize_prob": 0.25,
"relationship_prob": 0.45,
"formula_inference_prob": 0.3,
"action_words": [
"work on farms",
"teach mathematics",
"travel abroad",
"keep on painting"
],
"subjects": [
"Old Zhao",
"Dr Lee",
"Xiao Bai",
"I"
]
},
{
"symbolize_prob": 0.45,
"relationship_prob": 0.25,
"formula_inference_prob": 0.3,
"action_words": [
"work diligently",
"settle in the US",
"invest in stocks",
"win the championship"
],
"subjects": [
"Mr Chen",
"Dr Lee",
"Xiao Lin",
"Old Zhao"
]
}
]

View file

@ -0,0 +1,89 @@
[
{
"problem_types": {
"relationship": 0.45,
"symbolization": 0.25,
"formula": 0.2,
"multiple_choice_formula": 0.1
},
"propositions": {
"p": [
"the store is open",
"the bank is closed",
"the library is quiet",
"the gym is busy",
"the café is empty",
"the museum is closed",
"the theater is showing a movie",
"the shop is having a sale",
"the office is quiet",
"the classroom is empty"
],
"q": [
"the cashier is busy",
"the librarian is available",
"the trainer is working",
"the barista is making coffee",
"the guard is on duty"
]
}
},
{
"problem_types": {
"relationship": 0.35,
"symbolization": 0.3,
"formula": 0.25,
"multiple_choice_formula": 0.1
},
"propositions": {
"p": [
"the train arrives on time",
"the meeting starts at 2 PM",
"the pizza is delicious",
"the book is interesting",
"the movie is exciting",
"the park is crowded",
"the concert is sold out",
"the weather is nice",
"the flight is delayed",
"the restaurant is busy"
],
"q": [
"the teacher arrives early",
"the student completes the homework",
"the car needs maintenance",
"the battery is charged",
"the internet is working"
]
}
},
{
"problem_types": {
"relationship": 0.3,
"symbolization": 0.35,
"formula": 0.2,
"multiple_choice_formula": 0.15
},
"propositions": {
"p": [
"the sun rises in the east",
"the moon shines at night",
"the earth orbits the sun",
"water boils at 100 degrees Celsius",
"ice melts at 0 degrees Celsius",
"plants need sunlight to grow",
"birds can fly",
"fish live in water",
"dogs are mammals",
"cats are carnivores"
],
"q": [
"the sky is blue",
"the grass is green",
"the tree is tall",
"the flower is red",
"the mountain is high"
]
}
}
]

View file

@ -0,0 +1,89 @@
[
{
"problem_types": {
"relationship": 0.45,
"symbolization": 0.25,
"formula": 0.2,
"multiple_choice_formula": 0.1
},
"propositions": {
"p": [
"the store is open",
"the bank is closed",
"the library is quiet",
"the gym is busy",
"the café is empty",
"the museum is closed",
"the theater is showing a movie",
"the shop is having a sale",
"the office is quiet",
"the classroom is empty"
],
"q": [
"the cashier is busy",
"the librarian is available",
"the trainer is working",
"the barista is making coffee",
"the guard is on duty"
]
}
},
{
"problem_types": {
"relationship": 0.35,
"symbolization": 0.3,
"formula": 0.25,
"multiple_choice_formula": 0.1
},
"propositions": {
"p": [
"the train arrives on time",
"the meeting starts at 2 PM",
"the pizza is delicious",
"the book is interesting",
"the movie is exciting",
"the park is crowded",
"the concert is sold out",
"the weather is nice",
"the flight is delayed",
"the restaurant is busy"
],
"q": [
"the teacher arrives early",
"the student completes the homework",
"the car needs maintenance",
"the battery is charged",
"the internet is working"
]
}
},
{
"problem_types": {
"relationship": 0.3,
"symbolization": 0.35,
"formula": 0.2,
"multiple_choice_formula": 0.15
},
"propositions": {
"p": [
"the sun rises in the east",
"the moon shines at night",
"the earth orbits the sun",
"water boils at 100 degrees Celsius",
"ice melts at 0 degrees Celsius",
"plants need sunlight to grow",
"birds can fly",
"fish live in water",
"dogs are mammals",
"cats are carnivores"
],
"q": [
"the sky is blue",
"the grass is green",
"the tree is tall",
"the flower is red",
"the mountain is high"
]
}
}
]

View file

@ -0,0 +1,8 @@
[
{
"grid_size": 4
},
{
"grid_size": 5
}
]

View file

@ -0,0 +1,8 @@
[
{
"grid_size": 4
},
{
"grid_size": 5
}
]

View file

@ -0,0 +1,97 @@
[
{
"num_entries": 5,
"years": [
1700,
1720,
1740,
1760,
1780
],
"attributes": {
"windmills": [
"Molen A",
"Molen B",
"Molen C",
"Molen D",
"Molen E"
],
"families": [
"Family 1",
"Family 2",
"Family 3",
"Family 4",
"Family 5"
]
}
},
{
"num_entries": 4,
"years": [
1683,
1706,
1729,
1752
],
"attributes": {
"windmills": [
"Vlietmolen",
"Westmolen",
"Zemelmolen",
"Other"
],
"families": [
"Van Dijk",
"Visser",
"De Jong",
"Other Family"
]
}
},
{
"num_entries": 5,
"years": [
1800,
1825,
1850,
1875,
1900
],
"attributes": {
"windmills": [
"A",
"B",
"C",
"D",
"E"
],
"families": [
"1",
"2",
"3",
"4",
"5"
]
}
},
{
"num_entries": 3,
"years": [
1600,
1650,
1700
],
"attributes": {
"windmills": [
"Old Molen",
"Middle Molen",
"New Molen"
],
"families": [
"Ancient Family",
"Middle Family",
"Modern Family"
]
}
}
]

View file

@ -0,0 +1,97 @@
[
{
"num_entries": 5,
"years": [
1700,
1720,
1740,
1760,
1780
],
"attributes": {
"windmills": [
"Molen A",
"Molen B",
"Molen C",
"Molen D",
"Molen E"
],
"families": [
"Family 1",
"Family 2",
"Family 3",
"Family 4",
"Family 5"
]
}
},
{
"num_entries": 4,
"years": [
1683,
1706,
1729,
1752
],
"attributes": {
"windmills": [
"Vlietmolen",
"Westmolen",
"Zemelmolen",
"Other"
],
"families": [
"Van Dijk",
"Visser",
"De Jong",
"Other Family"
]
}
},
{
"num_entries": 5,
"years": [
1800,
1825,
1850,
1875,
1900
],
"attributes": {
"windmills": [
"A",
"B",
"C",
"D",
"E"
],
"families": [
"1",
"2",
"3",
"4",
"5"
]
}
},
{
"num_entries": 3,
"years": [
1600,
1650,
1700
],
"attributes": {
"windmills": [
"Old Molen",
"Middle Molen",
"New Molen"
],
"families": [
"Ancient Family",
"Middle Family",
"Modern Family"
]
}
}
]

View file

@ -0,0 +1,3 @@
[
{"conf_file":"./internbootcamp/libs/med_calculator/med_calculator.json"}
]

View file

@ -0,0 +1,3 @@
[
{"conf_file":"./internbootcamp/libs/med_calculator/med_calculator.json"}
]