diff --git a/examples/pipelines/cipher_data_generator.py b/examples/pipelines/cipher_data_generator.py index ca9ee89..735c23d 100644 --- a/examples/pipelines/cipher_data_generator.py +++ b/examples/pipelines/cipher_data_generator.py @@ -170,12 +170,14 @@ if __name__ == '__main__': parser.add_argument('--split', type=str) parser.add_argument('--timestamp', type=str) parser.add_argument('--filepath', type=str) + parser.add_argument('--coef', type=float, default=30/144, help='Coef for balance cipher data tasks') args = parser.parse_args() # 获取传入的参数 nums = int(args.nums) if nums == 0: exit() + nums = int(nums * args.coef) split = str(args.split) timestamp = str(args.timestamp) # df = pd.read_csv(r'internbootcamp/libs/data/train.tsv', sep='\t') diff --git a/examples/pipelines/data_configs/data_config_test.jsonl b/examples/pipelines/data_configs/data_config_test.jsonl index 9fc370a..9b52ffe 100644 --- a/examples/pipelines/data_configs/data_config_test.jsonl +++ b/examples/pipelines/data_configs/data_config_test.jsonl @@ -1,81 +1,96 @@ -{"bootcamp_name": "cipher", "sample_number": 3, "config_file": "cipher", "bootcamp_cls_name": "Cipherbootcamp"} -{"bootcamp_name": "korLogicDefinitions", "sample_number": 64, "config_file": "korLogicDefinitions", "bootcamp_cls_name": "KorLogicDefinitionsbootcamp"} -{"bootcamp_name": "korLogicCooperativePrinciple", "sample_number": 64, "config_file": "korLogicCooperativePrinciple", "bootcamp_cls_name": "KorLogicCooperativePrinciplebootcamp"} -{"bootcamp_name": "korOperationUnicodeffe0", "sample_number": 64, "config_file": "korOperationUnicodeffe0", "bootcamp_cls_name": "KorOperationUnicodeffe0bootcamp"} -{"bootcamp_name": "korLogicEpistemicLogic", "sample_number": 64, "config_file": "korLogicEpistemicLogic", "bootcamp_cls_name": "KorLogicEpistemicLogicbootcamp"} -{"bootcamp_name": "korLogicDerivativeReasoningOfPropositionalLogic", "sample_number": 64, "config_file": "korLogicDerivativeReasoningOfPropositionalLogic", "bootcamp_cls_name": "KorLogicDerivativeReasoningOfPropositionalLogicbootcamp"} -{"bootcamp_name": "korLogicTruthValueModalPropositions", "sample_number": 64, "config_file": "korLogicTruthValueModalPropositions", "bootcamp_cls_name": "KorLogicTruthValueModalPropositionsbootcamp"} -{"bootcamp_name": "BBEHBuggyTables", "sample_number": 64, "config_file": "BBEHBuggyTables", "bootcamp_cls_name": "BBEHBuggyTablesbootcamp"} -{"bootcamp_name": "BbehWordSorting", "sample_number": 64, "config_file": "BbehWordSorting", "bootcamp_cls_name": "BbehWordSortingbootcamp"} -{"bootcamp_name": "korLogicFigureOfTheSyllogism", "sample_number": 64, "config_file": "korLogicFigureOfTheSyllogism", "bootcamp_cls_name": "KorLogicFigureOfTheSyllogismbootcamp"} -{"bootcamp_name": "korLogicResolution", "sample_number": 64, "config_file": "korLogicResolution", "bootcamp_cls_name": "KorLogicResolutionbootcamp"} -{"bootcamp_name": "thermometers", "sample_number": 64, "config_file": "thermometers", "bootcamp_cls_name": "Thermometersbootcamp"} -{"bootcamp_name": "korPuzzleCalcudoko", "sample_number": 64, "config_file": "korPuzzleCalcudoko", "bootcamp_cls_name": "KorPuzzleCalcudokobootcamp"} -{"bootcamp_name": "korPuzzle24Points", "sample_number": 64, "config_file": "korPuzzle24Points", "bootcamp_cls_name": "KorPuzzle24Pointsbootcamp"} -{"bootcamp_name": "korOperationUnicode2295", "sample_number": 64, "config_file": "korOperationUnicode2295", "bootcamp_cls_name": "KorOperationUnicode2295bootcamp"} -{"bootcamp_name": "nonograms", "sample_number": 64, "config_file": "nonograms", "bootcamp_cls_name": "Nonogramsbootcamp"} -{"bootcamp_name": "bbehboardgameqa", "sample_number": 64, "config_file": "bbeh_boardgame_qa", "bootcamp_cls_name": "Bbehboardgameqabootcamp"} -{"bootcamp_name": "korLogicEquivalenceCalculus", "sample_number": 64, "config_file": "korLogicEquivalenceCalculus", "bootcamp_cls_name": "KorLogicEquivalenceCalculusbootcamp"} -{"bootcamp_name": "dominosa", "sample_number": 64, "config_file": "dominosa", "bootcamp_cls_name": "Dominosabootcamp"} -{"bootcamp_name": "korPuzzleCryptoMath", "sample_number": 64, "config_file": "korPuzzleCryptoMath", "bootcamp_cls_name": "KorPuzzleCryptoMathbootcamp"} -{"bootcamp_name": "korLogicAnalogicalReasoning", "sample_number": 64, "config_file": "korLogicAnalogicalReasoning", "bootcamp_cls_name": "KorLogicAnalogicalReasoningbootcamp"} -{"bootcamp_name": "korOperationUnicode25bd", "sample_number": 64, "config_file": "korOperationUnicode25bd", "bootcamp_cls_name": "KorOperationUnicode25bdbootcamp"} -{"bootcamp_name": "korPuzzleWordBrainTeasers", "sample_number": 64, "config_file": "korPuzzleWordBrainTeasers", "bootcamp_cls_name": "KorPuzzleWordBrainTeasersbootcamp"} -{"bootcamp_name": "korOperationUnicode25a1", "sample_number": 64, "config_file": "korOperationUnicode25a1", "bootcamp_cls_name": "KorOperationUnicode25a1bootcamp"} -{"bootcamp_name": "kakurasu", "sample_number": 64, "config_file": "kakurasu", "bootcamp_cls_name": "Kakurasubootcamp"} -{"bootcamp_name": "korLogicLogicalMethodsForExploringCauseAndEffectRelationships", "sample_number": 64, "config_file": "korLogicLogicalMethodsForExploringCauseAndEffectRelationships", "bootcamp_cls_name": "KorLogicLogicalMethodsForExploringCauseAndEffectRelationshipsbootcamp"} -{"bootcamp_name": "binairo", "sample_number": 64, "config_file": "binairo", "bootcamp_cls_name": "Binairobootcamp"} -{"bootcamp_name": "korLogicInductionParadox", "sample_number": 64, "config_file": "korLogicInductionParadox", "bootcamp_cls_name": "KorLogicInductionParadoxbootcamp"} -{"bootcamp_name": "galaxies", "sample_number": 64, "config_file": "galaxies", "bootcamp_cls_name": "Galaxiesbootcamp"} -{"bootcamp_name": "BbehMultistepArithmetic", "sample_number": 64, "config_file": "BbehMultistepArithmetic", "bootcamp_cls_name": "BbehMultistepArithmeticbootcamp"} -{"bootcamp_name": "korLogicPropositionalLogicFormalization", "sample_number": 64, "config_file": "korLogicPropositionalLogicFormalization", "bootcamp_cls_name": "KorLogicPropositionalLogicFormalizationbootcamp"} -{"bootcamp_name": "korOperationUnicode25cb", "sample_number": 64, "config_file": "korOperationUnicode25cb", "bootcamp_cls_name": "KorOperationUnicode25cbbootcamp"} -{"bootcamp_name": "LightUp", "sample_number": 64, "config_file": "Light_Up", "bootcamp_cls_name": "LightUpbootcamp"} -{"bootcamp_name": "starbattle", "sample_number": 64, "config_file": "starbattle", "bootcamp_cls_name": "Starbattlebootcamp"} -{"bootcamp_name": "korOperationUnicode25b3", "sample_number": 64, "config_file": "korOperationUnicode25b3", "bootcamp_cls_name": "KorOperationUnicode25b3bootcamp"} -{"bootcamp_name": "sudoku", "sample_number": 64, "config_file": "sudoku", "bootcamp_cls_name": "Sudokubootcamp"} -{"bootcamp_name": "korLogicSpeechActs", "sample_number": 64, "config_file": "korLogicSpeechActs", "bootcamp_cls_name": "KorLogicSpeechActsbootcamp"} -{"bootcamp_name": "korOperationUnicode25a0", "sample_number": 64, "config_file": "korOperationUnicode25a0", "bootcamp_cls_name": "KorOperationUnicode25a0bootcamp"} -{"bootcamp_name": "korLogicStatisticalReasoning", "sample_number": 64, "config_file": "korLogicStatisticalReasoning", "bootcamp_cls_name": "KorLogicStatisticalReasoningbootcamp"} -{"bootcamp_name": "aquarium", "sample_number": 64, "config_file": "aquarium", "bootcamp_cls_name": "Aquariumbootcamp"} -{"bootcamp_name": "korLogicEnumerativeInductiveReasoning", "sample_number": 64, "config_file": "korLogicEnumerativeInductiveReasoning", "bootcamp_cls_name": "KorLogicEnumerativeInductiveReasoningbootcamp"} -{"bootcamp_name": "minesweeper", "sample_number": 64, "config_file": "minesweeper", "bootcamp_cls_name": "Minesweeperbootcamp"} -{"bootcamp_name": "futoshiki", "sample_number": 64, "config_file": "futoshiki", "bootcamp_cls_name": "Futoshikibootcamp"} -{"bootcamp_name": "BbehWebOfLies", "sample_number": 64, "config_file": "BbehWebOfLies", "bootcamp_cls_name": "BbehWebOfLiesbootcamp"} -{"bootcamp_name": "korOperationUnicode2605", "sample_number": 64, "config_file": "korOperationUnicode2605", "bootcamp_cls_name": "KorOperationUnicode2605bootcamp"} -{"bootcamp_name": "korPuzzleLogicPuzzle", "sample_number": 64, "config_file": "korPuzzleLogicPuzzle", "bootcamp_cls_name": "KorPuzzleLogicPuzzlebootcamp"} -{"bootcamp_name": "BbehGeometricShapes", "sample_number": 64, "config_file": "BbehGeometricShapes", "bootcamp_cls_name": "BbehGeometricShapesbootcamp"} -{"bootcamp_name": "korPuzzleWordRootsAndAffixes", "sample_number": 64, "config_file": "korPuzzleWordRootsAndAffixes", "bootcamp_cls_name": "KorPuzzleWordRootsAndAffixesbootcamp"} -{"bootcamp_name": "korPuzzleSkyscrapers", "sample_number": 64, "config_file": "korPuzzleSkyscrapers", "bootcamp_cls_name": "KorPuzzleSkyscrapersbootcamp"} -{"bootcamp_name": "korOperationUnicode25cf", "sample_number": 64, "config_file": "korOperationUnicode25cf", "bootcamp_cls_name": "KorOperationUnicode25cfbootcamp"} -{"bootcamp_name": "korLogicPredicateLogicFormalization", "sample_number": 64, "config_file": "korLogicPredicateLogicFormalization", "bootcamp_cls_name": "KorLogicPredicateLogicFormalizationbootcamp"} -{"bootcamp_name": "korPuzzleArrowMaze", "sample_number": 64, "config_file": "korPuzzleArrowMaze", "bootcamp_cls_name": "KorPuzzleArrowMazebootcamp"} -{"bootcamp_name": "bbehshuffobject", "sample_number": 64, "config_file": "bbeh_shuff_object", "bootcamp_cls_name": "Bbehshuffobjectbootcamp"} -{"bootcamp_name": "korPuzzleKukurasu", "sample_number": 64, "config_file": "korPuzzleKukurasu", "bootcamp_cls_name": "KorPuzzleKukurasubootcamp"} -{"bootcamp_name": "korLogicDynamicLogic", "sample_number": 64, "config_file": "korLogicDynamicLogic", "bootcamp_cls_name": "KorLogicDynamicLogicbootcamp"} -{"bootcamp_name": "korLogicDisjunctiveNormalFormAndConjunctiveNormalForm", "sample_number": 64, "config_file": "korLogicDisjunctiveNormalFormAndConjunctiveNormalForm", "bootcamp_cls_name": "KorLogicDisjunctiveNormalFormAndConjunctiveNormalFormbootcamp"} -{"bootcamp_name": "korPuzzleWordscapes", "sample_number": 64, "config_file": "korPuzzleWordscapes", "bootcamp_cls_name": "KorPuzzleWordscapesbootcamp"} -{"bootcamp_name": "bbehobjectproperties", "sample_number": 64, "config_file": "bbeh_object_properties", "bootcamp_cls_name": "Bbehobjectpropertiesbootcamp"} -{"bootcamp_name": "stitches", "sample_number": 64, "config_file": "stitches", "bootcamp_cls_name": "Stitchesbootcamp"} -{"bootcamp_name": "korPuzzleWordLadder", "sample_number": 64, "config_file": "korPuzzleWordLadder", "bootcamp_cls_name": "KorPuzzleWordLadderbootcamp"} -{"bootcamp_name": "BbehDyckLanguages", "sample_number": 64, "config_file": "BbehDyckLanguages", "bootcamp_cls_name": "BbehDyckLanguagesbootcamp"} -{"bootcamp_name": "calcudoku", "sample_number": 64, "config_file": "calcudoku", "bootcamp_cls_name": "Calcudokubootcamp"} -{"bootcamp_name": "korOperationUnicodeffe1", "sample_number": 64, "config_file": "korOperationUnicodeffe1", "bootcamp_cls_name": "KorOperationUnicodeffe1bootcamp"} -{"bootcamp_name": "korPuzzleCampsite", "sample_number": 64, "config_file": "korPuzzleCampsite", "bootcamp_cls_name": "KorPuzzleCampsitebootcamp"} -{"bootcamp_name": "korOperationUnicode20ac", "sample_number": 64, "config_file": "korOperationUnicode20ac", "bootcamp_cls_name": "KorOperationUnicode20acbootcamp"} -{"bootcamp_name": "korLogicPropositionalLogicConcepts", "sample_number": 64, "config_file": "korLogicPropositionalLogicConcepts", "bootcamp_cls_name": "KorLogicPropositionalLogicConceptsbootcamp"} -{"bootcamp_name": "arc", "sample_number": 64, "config_file": "arc", "bootcamp_cls_name": "Arcbootcamp"} -{"bootcamp_name": "korPuzzleWordSearch", "sample_number": 64, "config_file": "korPuzzleWordSearch", "bootcamp_cls_name": "KorPuzzleWordSearchbootcamp"} -{"bootcamp_name": "cryptomath", "sample_number": 64, "config_file": "crypto_math", "bootcamp_cls_name": "Cryptomathbootcamp"} -{"bootcamp_name": "arrowmaze", "sample_number": 64, "config_file": "arrowmaze", "bootcamp_cls_name": "Arrowmazebootcamp"} -{"bootcamp_name": "korLogicTemporalPropositions", "sample_number": 64, "config_file": "korLogicTemporalPropositions", "bootcamp_cls_name": "KorLogicTemporalPropositionsbootcamp"} -{"bootcamp_name": "bbehbooleanexpressions", "sample_number": 64, "config_file": "bbeh_boolean_expressions", "bootcamp_cls_name": "Bbehbooleanexpressionsbootcamp"} -{"bootcamp_name": "tents", "sample_number": 64, "config_file": "tents", "bootcamp_cls_name": "Tentsbootcamp"} -{"bootcamp_name": "bbehobjectcounting", "sample_number": 64, "config_file": "bbeh_object_counting", "bootcamp_cls_name": "Bbehobjectcountingbootcamp"} -{"bootcamp_name": "game24", "sample_number": 64, "config_file": "game24", "bootcamp_cls_name": "Game24bootcamp"} -{"bootcamp_name": "maze", "sample_number": 64, "config_file": "maze", "bootcamp_cls_name": "Mazebootcamp"} -{"bootcamp_name": "korOperationUnicode25ce", "sample_number": 64, "config_file": "korOperationUnicode25ce", "bootcamp_cls_name": "KorOperationUnicode25cebootcamp"} -{"bootcamp_name": "campsite", "sample_number": 64, "config_file": "campsite", "bootcamp_cls_name": "Campsitebootcamp"} -{"bootcamp_name": "korLogicCanonicalPropositions", "sample_number": 64, "config_file": "korLogicCanonicalPropositions", "bootcamp_cls_name": "KorLogicCanonicalPropositionsbootcamp"} -{"bootcamp_name": "korPuzzleConnectWords", "sample_number": 64, "config_file": "korPuzzleConnectWords", "bootcamp_cls_name": "KorPuzzleConnectWordsbootcamp"} -{"bootcamp_name": "korLogicFormalFallacies", "sample_number": 64, "config_file": "korLogicFormalFallacies", "bootcamp_cls_name": "KorLogicFormalFallaciesbootcamp"} +{"bootcamp_name": "aquarium", "sample_number": 100, "config_file": "aquarium", "bootcamp_cls_name": "Aquariumbootcamp"} +{"bootcamp_name": "arc", "sample_number": 100, "config_file": "arc", "bootcamp_cls_name": "Arcbootcamp"} +{"bootcamp_name": "arrowmaze", "sample_number": 100, "config_file": "arrowmaze", "bootcamp_cls_name": "Arrowmazebootcamp"} +{"bootcamp_name": "bbehboardgameqa", "sample_number": 100, "config_file": "bbeh_boardgame_qa", "bootcamp_cls_name": "Bbehboardgameqabootcamp"} +{"bootcamp_name": "bbehbooleanexpressions", "sample_number": 100, "config_file": "bbeh_boolean_expressions", "bootcamp_cls_name": "Bbehbooleanexpressionsbootcamp"} +{"bootcamp_name": "bbehobjectcounting", "sample_number": 100, "config_file": "bbeh_object_counting", "bootcamp_cls_name": "Bbehobjectcountingbootcamp"} +{"bootcamp_name": "bbehobjectproperties", "sample_number": 100, "config_file": "bbeh_object_properties", "bootcamp_cls_name": "Bbehobjectpropertiesbootcamp"} +{"bootcamp_name": "bbehshuffobject", "sample_number": 100, "config_file": "bbeh_shuff_object", "bootcamp_cls_name": "Bbehshuffobjectbootcamp"} +{"bootcamp_name": "BBEHBuggyTables", "sample_number": 100, "config_file": "BBEHBuggyTables", "bootcamp_cls_name": "BBEHBuggyTablesbootcamp"} +{"bootcamp_name": "BbehDyckLanguages", "sample_number": 100, "config_file": "BbehDyckLanguages", "bootcamp_cls_name": "BbehDyckLanguagesbootcamp"} +{"bootcamp_name": "BbehGeometricShapes", "sample_number": 100, "config_file": "BbehGeometricShapes", "bootcamp_cls_name": "BbehGeometricShapesbootcamp"} +{"bootcamp_name": "BbehMultistepArithmetic", "sample_number": 100, "config_file": "BbehMultistepArithmetic", "bootcamp_cls_name": "BbehMultistepArithmeticbootcamp"} +{"bootcamp_name": "BBEHMultistepArithmeticV2", "sample_number": 100, "config_file": "BBEHMultistepArithmeticV2", "bootcamp_cls_name": "BBEHMultistepArithmeticV2bootcamp"} +{"bootcamp_name": "BbehTemporalSequences", "sample_number": 100, "config_file": "BbehTemporalSequences", "bootcamp_cls_name": "BbehTemporalSequencesbootcamp"} +{"bootcamp_name": "BbehWebOfLies", "sample_number": 100, "config_file": "BbehWebOfLies", "bootcamp_cls_name": "BbehWebOfLiesbootcamp"} +{"bootcamp_name": "BbehWordSorting", "sample_number": 100, "config_file": "BbehWordSorting", "bootcamp_cls_name": "BbehWordSortingbootcamp"} +{"bootcamp_name": "binairo", "sample_number": 100, "config_file": "binairo", "bootcamp_cls_name": "Binairobootcamp"} +{"bootcamp_name": "calcudoku", "sample_number": 100, "config_file": "calcudoku", "bootcamp_cls_name": "Calcudokubootcamp"} +{"bootcamp_name": "campsite", "sample_number": 100, "config_file": "campsite", "bootcamp_cls_name": "Campsitebootcamp"} +{"bootcamp_name": "cipher", "sample_number": 100, "config_file": "cipher", "bootcamp_cls_name": "Cipherbootcamp"} +{"bootcamp_name": "cryptomath", "sample_number": 100, "config_file": "crypto_math", "bootcamp_cls_name": "Cryptomathbootcamp"} +{"bootcamp_name": "dominosa", "sample_number": 100, "config_file": "dominosa", "bootcamp_cls_name": "Dominosabootcamp"} +{"bootcamp_name": "futoshiki", "sample_number": 100, "config_file": "futoshiki", "bootcamp_cls_name": "Futoshikibootcamp"} +{"bootcamp_name": "galaxies", "sample_number": 100, "config_file": "galaxies", "bootcamp_cls_name": "Galaxiesbootcamp"} +{"bootcamp_name": "game24", "sample_number": 100, "config_file": "game24", "bootcamp_cls_name": "Game24bootcamp"} +{"bootcamp_name": "heyawake", "sample_number": 100, "config_file": "heyawake", "bootcamp_cls_name": "Heyawakebootcamp"} +{"bootcamp_name": "hitori", "sample_number": 100, "config_file": "hitori", "bootcamp_cls_name": "Hitoribootcamp"} +{"bootcamp_name": "kakurasu", "sample_number": 100, "config_file": "kakurasu", "bootcamp_cls_name": "Kakurasubootcamp"} +{"bootcamp_name": "kakuro", "sample_number": 100, "config_file": "kakuro", "bootcamp_cls_name": "Kakurobootcamp"} +{"bootcamp_name": "korCipherCustomInverseShiftSubstitutionCipher", "sample_number": 100, "config_file": "korCipherCustomInverseShiftSubstitutionCipher", "bootcamp_cls_name": "KorCipherCustomInverseShiftSubstitutionCipherbootcamp"} +{"bootcamp_name": "korLogicAnalogicalReasoning", "sample_number": 100, "config_file": "korLogicAnalogicalReasoning", "bootcamp_cls_name": "KorLogicAnalogicalReasoningbootcamp"} +{"bootcamp_name": "korLogicCanonicalPropositions", "sample_number": 100, "config_file": "korLogicCanonicalPropositions", "bootcamp_cls_name": "KorLogicCanonicalPropositionsbootcamp"} +{"bootcamp_name": "korLogicCooperativePrinciple", "sample_number": 100, "config_file": "korLogicCooperativePrinciple", "bootcamp_cls_name": "KorLogicCooperativePrinciplebootcamp"} +{"bootcamp_name": "korLogicDefinitions", "sample_number": 100, "config_file": "korLogicDefinitions", "bootcamp_cls_name": "KorLogicDefinitionsbootcamp"} +{"bootcamp_name": "korLogicDerivativeReasoningOfPropositionalLogic", "sample_number": 100, "config_file": "korLogicDerivativeReasoningOfPropositionalLogic", "bootcamp_cls_name": "KorLogicDerivativeReasoningOfPropositionalLogicbootcamp"} +{"bootcamp_name": "korLogicDisjunctiveNormalFormAndConjunctiveNormalForm", "sample_number": 100, "config_file": "korLogicDisjunctiveNormalFormAndConjunctiveNormalForm", "bootcamp_cls_name": "KorLogicDisjunctiveNormalFormAndConjunctiveNormalFormbootcamp"} +{"bootcamp_name": "korLogicDynamicLogic", "sample_number": 100, "config_file": "korLogicDynamicLogic", "bootcamp_cls_name": "KorLogicDynamicLogicbootcamp"} +{"bootcamp_name": "korLogicEnumerativeInductiveReasoning", "sample_number": 100, "config_file": "korLogicEnumerativeInductiveReasoning", "bootcamp_cls_name": "KorLogicEnumerativeInductiveReasoningbootcamp"} +{"bootcamp_name": "korLogicEpistemicLogic", "sample_number": 100, "config_file": "korLogicEpistemicLogic", "bootcamp_cls_name": "KorLogicEpistemicLogicbootcamp"} +{"bootcamp_name": "korLogicEquivalenceCalculus", "sample_number": 100, "config_file": "korLogicEquivalenceCalculus", "bootcamp_cls_name": "KorLogicEquivalenceCalculusbootcamp"} +{"bootcamp_name": "korLogicFigureOfTheSyllogism", "sample_number": 100, "config_file": "korLogicFigureOfTheSyllogism", "bootcamp_cls_name": "KorLogicFigureOfTheSyllogismbootcamp"} +{"bootcamp_name": "korLogicFormalFallacies", "sample_number": 100, "config_file": "korLogicFormalFallacies", "bootcamp_cls_name": "KorLogicFormalFallaciesbootcamp"} +{"bootcamp_name": "korLogicInductionParadox", "sample_number": 100, "config_file": "korLogicInductionParadox", "bootcamp_cls_name": "KorLogicInductionParadoxbootcamp"} +{"bootcamp_name": "korLogicLogicalMethodsForExploringCauseAndEffectRelationships", "sample_number": 100, "config_file": "korLogicLogicalMethodsForExploringCauseAndEffectRelationships", "bootcamp_cls_name": "KorLogicLogicalMethodsForExploringCauseAndEffectRelationshipsbootcamp"} +{"bootcamp_name": "korLogicPredicateLogicFormalization", "sample_number": 100, "config_file": "korLogicPredicateLogicFormalization", "bootcamp_cls_name": "KorLogicPredicateLogicFormalizationbootcamp"} +{"bootcamp_name": "korLogicPropositionalLogicConcepts", "sample_number": 100, "config_file": "korLogicPropositionalLogicConcepts", "bootcamp_cls_name": "KorLogicPropositionalLogicConceptsbootcamp"} +{"bootcamp_name": "korLogicPropositionalLogicFormalization", "sample_number": 100, "config_file": "korLogicPropositionalLogicFormalization", "bootcamp_cls_name": "KorLogicPropositionalLogicFormalizationbootcamp"} +{"bootcamp_name": "korLogicResolution", "sample_number": 100, "config_file": "korLogicResolution", "bootcamp_cls_name": "KorLogicResolutionbootcamp"} +{"bootcamp_name": "korLogicSpeechActs", "sample_number": 100, "config_file": "korLogicSpeechActs", "bootcamp_cls_name": "KorLogicSpeechActsbootcamp"} +{"bootcamp_name": "korLogicStatisticalReasoning", "sample_number": 100, "config_file": "korLogicStatisticalReasoning", "bootcamp_cls_name": "KorLogicStatisticalReasoningbootcamp"} +{"bootcamp_name": "korLogicTemporalPropositions", "sample_number": 100, "config_file": "korLogicTemporalPropositions", "bootcamp_cls_name": "KorLogicTemporalPropositionsbootcamp"} +{"bootcamp_name": "korLogicTruthValueModalPropositions", "sample_number": 100, "config_file": "korLogicTruthValueModalPropositions", "bootcamp_cls_name": "KorLogicTruthValueModalPropositionsbootcamp"} +{"bootcamp_name": "korOperationUnicode0032", "sample_number": 100, "config_file": "korOperationUnicode0032", "bootcamp_cls_name": "KorOperationUnicode0032bootcamp"} +{"bootcamp_name": "korOperationUnicode0033", "sample_number": 100, "config_file": "korOperationUnicode0033", "bootcamp_cls_name": "KorOperationUnicode0033bootcamp"} +{"bootcamp_name": "korOperationUnicode203b", "sample_number": 100, "config_file": "korOperationUnicode203b", "bootcamp_cls_name": "KorOperationUnicode203bbootcamp"} +{"bootcamp_name": "korOperationUnicode20ac", "sample_number": 100, "config_file": "korOperationUnicode20ac", "bootcamp_cls_name": "KorOperationUnicode20acbootcamp"} +{"bootcamp_name": "korOperationUnicode221e", "sample_number": 100, "config_file": "korOperationUnicode221e", "bootcamp_cls_name": "KorOperationUnicode221ebootcamp"} +{"bootcamp_name": "korOperationUnicode2295", "sample_number": 100, "config_file": "korOperationUnicode2295", "bootcamp_cls_name": "KorOperationUnicode2295bootcamp"} +{"bootcamp_name": "korOperationUnicode25a0", "sample_number": 100, "config_file": "korOperationUnicode25a0", "bootcamp_cls_name": "KorOperationUnicode25a0bootcamp"} +{"bootcamp_name": "korOperationUnicode25a1", "sample_number": 100, "config_file": "korOperationUnicode25a1", "bootcamp_cls_name": "KorOperationUnicode25a1bootcamp"} +{"bootcamp_name": "korOperationUnicode25b3", "sample_number": 100, "config_file": "korOperationUnicode25b3", "bootcamp_cls_name": "KorOperationUnicode25b3bootcamp"} +{"bootcamp_name": "korOperationUnicode25bd", "sample_number": 100, "config_file": "korOperationUnicode25bd", "bootcamp_cls_name": "KorOperationUnicode25bdbootcamp"} +{"bootcamp_name": "korOperationUnicode25cb", "sample_number": 100, "config_file": "korOperationUnicode25cb", "bootcamp_cls_name": "KorOperationUnicode25cbbootcamp"} +{"bootcamp_name": "korOperationUnicode25ce", "sample_number": 100, "config_file": "korOperationUnicode25ce", "bootcamp_cls_name": "KorOperationUnicode25cebootcamp"} +{"bootcamp_name": "korOperationUnicode25cf", "sample_number": 100, "config_file": "korOperationUnicode25cf", "bootcamp_cls_name": "KorOperationUnicode25cfbootcamp"} +{"bootcamp_name": "korOperationUnicode2605", "sample_number": 100, "config_file": "korOperationUnicode2605", "bootcamp_cls_name": "KorOperationUnicode2605bootcamp"} +{"bootcamp_name": "korOperationUnicodeffe0", "sample_number": 100, "config_file": "korOperationUnicodeffe0", "bootcamp_cls_name": "KorOperationUnicodeffe0bootcamp"} +{"bootcamp_name": "korOperationUnicodeffe1", "sample_number": 100, "config_file": "korOperationUnicodeffe1", "bootcamp_cls_name": "KorOperationUnicodeffe1bootcamp"} +{"bootcamp_name": "korPuzzle24Points", "sample_number": 100, "config_file": "korPuzzle24Points", "bootcamp_cls_name": "KorPuzzle24Pointsbootcamp"} +{"bootcamp_name": "korPuzzleArrowMaze", "sample_number": 100, "config_file": "korPuzzleArrowMaze", "bootcamp_cls_name": "KorPuzzleArrowMazebootcamp"} +{"bootcamp_name": "korPuzzleCalcudoko", "sample_number": 100, "config_file": "korPuzzleCalcudoko", "bootcamp_cls_name": "KorPuzzleCalcudokobootcamp"} +{"bootcamp_name": "korPuzzleCampsite", "sample_number": 100, "config_file": "korPuzzleCampsite", "bootcamp_cls_name": "KorPuzzleCampsitebootcamp"} +{"bootcamp_name": "korPuzzleConnectWords", "sample_number": 100, "config_file": "korPuzzleConnectWords", "bootcamp_cls_name": "KorPuzzleConnectWordsbootcamp"} +{"bootcamp_name": "korPuzzleCryptoMath", "sample_number": 100, "config_file": "korPuzzleCryptoMath", "bootcamp_cls_name": "KorPuzzleCryptoMathbootcamp"} +{"bootcamp_name": "korPuzzleKukurasu", "sample_number": 100, "config_file": "korPuzzleKukurasu", "bootcamp_cls_name": "KorPuzzleKukurasubootcamp"} +{"bootcamp_name": "korPuzzleLogicPuzzle", "sample_number": 100, "config_file": "korPuzzleLogicPuzzle", "bootcamp_cls_name": "KorPuzzleLogicPuzzlebootcamp"} +{"bootcamp_name": "korPuzzleMathPath", "sample_number": 100, "config_file": "korPuzzleMathPath", "bootcamp_cls_name": "KorPuzzleMathPathbootcamp"} +{"bootcamp_name": "korPuzzleMinesweeper", "sample_number": 100, "config_file": "korPuzzleMinesweeper", "bootcamp_cls_name": "KorPuzzleMinesweeperbootcamp"} +{"bootcamp_name": "korPuzzleSkyscrapers", "sample_number": 100, "config_file": "korPuzzleSkyscrapers", "bootcamp_cls_name": "KorPuzzleSkyscrapersbootcamp"} +{"bootcamp_name": "korPuzzleWordBrainTeasers", "sample_number": 100, "config_file": "korPuzzleWordBrainTeasers", "bootcamp_cls_name": "KorPuzzleWordBrainTeasersbootcamp"} +{"bootcamp_name": "korPuzzleWordLadder", "sample_number": 100, "config_file": "korPuzzleWordLadder", "bootcamp_cls_name": "KorPuzzleWordLadderbootcamp"} +{"bootcamp_name": "korPuzzleWordRootsAndAffixes", "sample_number": 100, "config_file": "korPuzzleWordRootsAndAffixes", "bootcamp_cls_name": "KorPuzzleWordRootsAndAffixesbootcamp"} +{"bootcamp_name": "korPuzzleWordscapes", "sample_number": 100, "config_file": "korPuzzleWordscapes", "bootcamp_cls_name": "KorPuzzleWordscapesbootcamp"} +{"bootcamp_name": "korPuzzleWordSearch", "sample_number": 100, "config_file": "korPuzzleWordSearch", "bootcamp_cls_name": "KorPuzzleWordSearchbootcamp"} +{"bootcamp_name": "LightUp", "sample_number": 100, "config_file": "Light_Up", "bootcamp_cls_name": "LightUpbootcamp"} +{"bootcamp_name": "maze", "sample_number": 100, "config_file": "maze", "bootcamp_cls_name": "Mazebootcamp"} +{"bootcamp_name": "minesweeper", "sample_number": 100, "config_file": "minesweeper", "bootcamp_cls_name": "Minesweeperbootcamp"} +{"bootcamp_name": "nonograms", "sample_number": 100, "config_file": "nonograms", "bootcamp_cls_name": "Nonogramsbootcamp"} +{"bootcamp_name": "pipes", "sample_number": 100, "config_file": "pipes", "bootcamp_cls_name": "Pipesbootcamp"} +{"bootcamp_name": "skyscrapers", "sample_number": 100, "config_file": "skyscrapers", "bootcamp_cls_name": "Skyscrapersbootcamp"} +{"bootcamp_name": "slitherlink", "sample_number": 100, "config_file": "slitherlink", "bootcamp_cls_name": "Slitherlinkbootcamp"} +{"bootcamp_name": "starbattle", "sample_number": 100, "config_file": "starbattle", "bootcamp_cls_name": "Starbattlebootcamp"} +{"bootcamp_name": "stitches", "sample_number": 100, "config_file": "stitches", "bootcamp_cls_name": "Stitchesbootcamp"} +{"bootcamp_name": "sudoku", "sample_number": 100, "config_file": "sudoku", "bootcamp_cls_name": "Sudokubootcamp"} +{"bootcamp_name": "tents", "sample_number": 100, "config_file": "tents", "bootcamp_cls_name": "Tentsbootcamp"} +{"bootcamp_name": "thermometers", "sample_number": 100, "config_file": "thermometers", "bootcamp_cls_name": "Thermometersbootcamp"} diff --git a/examples/pipelines/data_configs/data_config_train.jsonl b/examples/pipelines/data_configs/data_config_train.jsonl index eaa8bbe..35d358f 100644 --- a/examples/pipelines/data_configs/data_config_train.jsonl +++ b/examples/pipelines/data_configs/data_config_train.jsonl @@ -1,81 +1,96 @@ -{"bootcamp_name": "cipher", "sample_number": 200, "config_file": "cipher", "bootcamp_cls_name": "Cipherbootcamp"} -{"bootcamp_name": "korLogicDisjunctiveNormalFormAndConjunctiveNormalForm", "sample_number": 1000, "config_file": "korLogicDisjunctiveNormalFormAndConjunctiveNormalForm", "bootcamp_cls_name": "KorLogicDisjunctiveNormalFormAndConjunctiveNormalFormbootcamp"} -{"bootcamp_name": "korLogicFigureOfTheSyllogism", "sample_number": 1000, "config_file": "korLogicFigureOfTheSyllogism", "bootcamp_cls_name": "KorLogicFigureOfTheSyllogismbootcamp"} -{"bootcamp_name": "korPuzzle24Points", "sample_number": 1000, "config_file": "korPuzzle24Points", "bootcamp_cls_name": "KorPuzzle24Pointsbootcamp"} -{"bootcamp_name": "stitches", "sample_number": 2000, "config_file": "stitches", "bootcamp_cls_name": "Stitchesbootcamp"} -{"bootcamp_name": "bbehobjectproperties", "sample_number": 2000, "config_file": "bbeh_object_properties", "bootcamp_cls_name": "Bbehobjectpropertiesbootcamp"} -{"bootcamp_name": "thermometers", "sample_number": 2000, "config_file": "thermometers", "bootcamp_cls_name": "Thermometersbootcamp"} -{"bootcamp_name": "BBEHBuggyTables", "sample_number": 2000, "config_file": "BBEHBuggyTables", "bootcamp_cls_name": "BBEHBuggyTablesbootcamp"} -{"bootcamp_name": "korLogicDynamicLogic", "sample_number": 1000, "config_file": "korLogicDynamicLogic", "bootcamp_cls_name": "KorLogicDynamicLogicbootcamp"} -{"bootcamp_name": "futoshiki", "sample_number": 2000, "config_file": "futoshiki", "bootcamp_cls_name": "Futoshikibootcamp"} -{"bootcamp_name": "korLogicStatisticalReasoning", "sample_number": 1000, "config_file": "korLogicStatisticalReasoning", "bootcamp_cls_name": "KorLogicStatisticalReasoningbootcamp"} -{"bootcamp_name": "nonograms", "sample_number": 2000, "config_file": "nonograms", "bootcamp_cls_name": "Nonogramsbootcamp"} -{"bootcamp_name": "bbehboardgameqa", "sample_number": 2000, "config_file": "bbeh_boardgame_qa", "bootcamp_cls_name": "Bbehboardgameqabootcamp"} -{"bootcamp_name": "korPuzzleWordscapes", "sample_number": 1000, "config_file": "korPuzzleWordscapes", "bootcamp_cls_name": "KorPuzzleWordscapesbootcamp"} -{"bootcamp_name": "BbehDyckLanguages", "sample_number": 2000, "config_file": "BbehDyckLanguages", "bootcamp_cls_name": "BbehDyckLanguagesbootcamp"} -{"bootcamp_name": "korOperationUnicode2605", "sample_number": 1000, "config_file": "korOperationUnicode2605", "bootcamp_cls_name": "KorOperationUnicode2605bootcamp"} -{"bootcamp_name": "korLogicResolution", "sample_number": 1000, "config_file": "korLogicResolution", "bootcamp_cls_name": "KorLogicResolutionbootcamp"} -{"bootcamp_name": "korPuzzleCryptoMath", "sample_number": 1000, "config_file": "korPuzzleCryptoMath", "bootcamp_cls_name": "KorPuzzleCryptoMathbootcamp"} -{"bootcamp_name": "korPuzzleWordRootsAndAffixes", "sample_number": 1000, "config_file": "korPuzzleWordRootsAndAffixes", "bootcamp_cls_name": "KorPuzzleWordRootsAndAffixesbootcamp"} -{"bootcamp_name": "korPuzzleSkyscrapers", "sample_number": 1000, "config_file": "korPuzzleSkyscrapers", "bootcamp_cls_name": "KorPuzzleSkyscrapersbootcamp"} -{"bootcamp_name": "cryptomath", "sample_number": 2000, "config_file": "crypto_math", "bootcamp_cls_name": "Cryptomathbootcamp"} -{"bootcamp_name": "korLogicCanonicalPropositions", "sample_number": 1000, "config_file": "korLogicCanonicalPropositions", "bootcamp_cls_name": "KorLogicCanonicalPropositionsbootcamp"} -{"bootcamp_name": "korOperationUnicodeffe0", "sample_number": 1000, "config_file": "korOperationUnicodeffe0", "bootcamp_cls_name": "KorOperationUnicodeffe0bootcamp"} -{"bootcamp_name": "BbehWebOfLies", "sample_number": 1000, "config_file": "BbehWebOfLies", "bootcamp_cls_name": "BbehWebOfLiesbootcamp"} -{"bootcamp_name": "korLogicPropositionalLogicFormalization", "sample_number": 1000, "config_file": "korLogicPropositionalLogicFormalization", "bootcamp_cls_name": "KorLogicPropositionalLogicFormalizationbootcamp"} -{"bootcamp_name": "korLogicDefinitions", "sample_number": 1000, "config_file": "korLogicDefinitions", "bootcamp_cls_name": "KorLogicDefinitionsbootcamp"} -{"bootcamp_name": "minesweeper", "sample_number": 2000, "config_file": "minesweeper", "bootcamp_cls_name": "Minesweeperbootcamp"} -{"bootcamp_name": "BbehWordSorting", "sample_number": 800, "config_file": "BbehWordSorting", "bootcamp_cls_name": "BbehWordSortingbootcamp"} -{"bootcamp_name": "korLogicDerivativeReasoningOfPropositionalLogic", "sample_number": 1000, "config_file": "korLogicDerivativeReasoningOfPropositionalLogic", "bootcamp_cls_name": "KorLogicDerivativeReasoningOfPropositionalLogicbootcamp"} -{"bootcamp_name": "korOperationUnicode2295", "sample_number": 1000, "config_file": "korOperationUnicode2295", "bootcamp_cls_name": "KorOperationUnicode2295bootcamp"} -{"bootcamp_name": "bbehbooleanexpressions", "sample_number": 2000, "config_file": "bbeh_boolean_expressions", "bootcamp_cls_name": "Bbehbooleanexpressionsbootcamp"} -{"bootcamp_name": "starbattle", "sample_number": 2000, "config_file": "starbattle", "bootcamp_cls_name": "Starbattlebootcamp"} -{"bootcamp_name": "bbehobjectcounting", "sample_number": 2000, "config_file": "bbeh_object_counting", "bootcamp_cls_name": "Bbehobjectcountingbootcamp"} -{"bootcamp_name": "arrowmaze", "sample_number": 2000, "config_file": "arrowmaze", "bootcamp_cls_name": "Arrowmazebootcamp"} -{"bootcamp_name": "korPuzzleLogicPuzzle", "sample_number": 1000, "config_file": "korPuzzleLogicPuzzle", "bootcamp_cls_name": "KorPuzzleLogicPuzzlebootcamp"} -{"bootcamp_name": "sudoku", "sample_number": 2000, "config_file": "sudoku", "bootcamp_cls_name": "Sudokubootcamp"} -{"bootcamp_name": "arc", "sample_number": 10000, "config_file": "arc", "bootcamp_cls_name": "Arcbootcamp"} -{"bootcamp_name": "korOperationUnicode25a1", "sample_number": 1000, "config_file": "korOperationUnicode25a1", "bootcamp_cls_name": "KorOperationUnicode25a1bootcamp"} +{"bootcamp_name": "aquarium", "sample_number": 1000, "config_file": "aquarium", "bootcamp_cls_name": "Aquariumbootcamp"} +{"bootcamp_name": "arc", "sample_number": 1000, "config_file": "arc", "bootcamp_cls_name": "Arcbootcamp"} +{"bootcamp_name": "arrowmaze", "sample_number": 1000, "config_file": "arrowmaze", "bootcamp_cls_name": "Arrowmazebootcamp"} +{"bootcamp_name": "bbehboardgameqa", "sample_number": 1000, "config_file": "bbeh_boardgame_qa", "bootcamp_cls_name": "Bbehboardgameqabootcamp"} +{"bootcamp_name": "bbehbooleanexpressions", "sample_number": 1000, "config_file": "bbeh_boolean_expressions", "bootcamp_cls_name": "Bbehbooleanexpressionsbootcamp"} +{"bootcamp_name": "bbehobjectcounting", "sample_number": 1000, "config_file": "bbeh_object_counting", "bootcamp_cls_name": "Bbehobjectcountingbootcamp"} +{"bootcamp_name": "bbehobjectproperties", "sample_number": 1000, "config_file": "bbeh_object_properties", "bootcamp_cls_name": "Bbehobjectpropertiesbootcamp"} {"bootcamp_name": "bbehshuffobject", "sample_number": 1000, "config_file": "bbeh_shuff_object", "bootcamp_cls_name": "Bbehshuffobjectbootcamp"} -{"bootcamp_name": "game24", "sample_number": 8000, "config_file": "game24", "bootcamp_cls_name": "Game24bootcamp"} -{"bootcamp_name": "BbehGeometricShapes", "sample_number": 2000, "config_file": "BbehGeometricShapes", "bootcamp_cls_name": "BbehGeometricShapesbootcamp"} -{"bootcamp_name": "korPuzzleKukurasu", "sample_number": 1000, "config_file": "korPuzzleKukurasu", "bootcamp_cls_name": "KorPuzzleKukurasubootcamp"} -{"bootcamp_name": "korPuzzleWordSearch", "sample_number": 1000, "config_file": "korPuzzleWordSearch", "bootcamp_cls_name": "KorPuzzleWordSearchbootcamp"} -{"bootcamp_name": "korLogicCooperativePrinciple", "sample_number": 1000, "config_file": "korLogicCooperativePrinciple", "bootcamp_cls_name": "KorLogicCooperativePrinciplebootcamp"} -{"bootcamp_name": "korPuzzleCalcudoko", "sample_number": 1000, "config_file": "korPuzzleCalcudoko", "bootcamp_cls_name": "KorPuzzleCalcudokobootcamp"} -{"bootcamp_name": "BbehMultistepArithmetic", "sample_number": 2000, "config_file": "BbehMultistepArithmetic", "bootcamp_cls_name": "BbehMultistepArithmeticbootcamp"} -{"bootcamp_name": "korLogicSpeechActs", "sample_number": 1000, "config_file": "korLogicSpeechActs", "bootcamp_cls_name": "KorLogicSpeechActsbootcamp"} -{"bootcamp_name": "korPuzzleWordLadder", "sample_number": 1000, "config_file": "korPuzzleWordLadder", "bootcamp_cls_name": "KorPuzzleWordLadderbootcamp"} -{"bootcamp_name": "korPuzzleConnectWords", "sample_number": 1000, "config_file": "korPuzzleConnectWords", "bootcamp_cls_name": "KorPuzzleConnectWordsbootcamp"} -{"bootcamp_name": "korLogicLogicalMethodsForExploringCauseAndEffectRelationships", "sample_number": 1000, "config_file": "korLogicLogicalMethodsForExploringCauseAndEffectRelationships", "bootcamp_cls_name": "KorLogicLogicalMethodsForExploringCauseAndEffectRelationshipsbootcamp"} -{"bootcamp_name": "korPuzzleArrowMaze", "sample_number": 1000, "config_file": "korPuzzleArrowMaze", "bootcamp_cls_name": "KorPuzzleArrowMazebootcamp"} -{"bootcamp_name": "korPuzzleWordBrainTeasers", "sample_number": 1000, "config_file": "korPuzzleWordBrainTeasers", "bootcamp_cls_name": "KorPuzzleWordBrainTeasersbootcamp"} -{"bootcamp_name": "LightUp", "sample_number": 2000, "config_file": "Light_Up", "bootcamp_cls_name": "LightUpbootcamp"} -{"bootcamp_name": "korOperationUnicode25bd", "sample_number": 1000, "config_file": "korOperationUnicode25bd", "bootcamp_cls_name": "KorOperationUnicode25bdbootcamp"} -{"bootcamp_name": "binairo", "sample_number": 2000, "config_file": "binairo", "bootcamp_cls_name": "Binairobootcamp"} -{"bootcamp_name": "korOperationUnicode25b3", "sample_number": 1000, "config_file": "korOperationUnicode25b3", "bootcamp_cls_name": "KorOperationUnicode25b3bootcamp"} -{"bootcamp_name": "korLogicFormalFallacies", "sample_number": 1000, "config_file": "korLogicFormalFallacies", "bootcamp_cls_name": "KorLogicFormalFallaciesbootcamp"} -{"bootcamp_name": "korOperationUnicode25cb", "sample_number": 1000, "config_file": "korOperationUnicode25cb", "bootcamp_cls_name": "KorOperationUnicode25cbbootcamp"} -{"bootcamp_name": "korOperationUnicodeffe1", "sample_number": 1000, "config_file": "korOperationUnicodeffe1", "bootcamp_cls_name": "KorOperationUnicodeffe1bootcamp"} -{"bootcamp_name": "korLogicTemporalPropositions", "sample_number": 1000, "config_file": "korLogicTemporalPropositions", "bootcamp_cls_name": "KorLogicTemporalPropositionsbootcamp"} -{"bootcamp_name": "korLogicEquivalenceCalculus", "sample_number": 1000, "config_file": "korLogicEquivalenceCalculus", "bootcamp_cls_name": "KorLogicEquivalenceCalculusbootcamp"} -{"bootcamp_name": "korLogicEpistemicLogic", "sample_number": 1000, "config_file": "korLogicEpistemicLogic", "bootcamp_cls_name": "KorLogicEpistemicLogicbootcamp"} -{"bootcamp_name": "maze", "sample_number": 2000, "config_file": "maze", "bootcamp_cls_name": "Mazebootcamp"} -{"bootcamp_name": "calcudoku", "sample_number": 2000, "config_file": "calcudoku", "bootcamp_cls_name": "Calcudokubootcamp"} -{"bootcamp_name": "aquarium", "sample_number": 2000, "config_file": "aquarium", "bootcamp_cls_name": "Aquariumbootcamp"} -{"bootcamp_name": "kakurasu", "sample_number": 2000, "config_file": "kakurasu", "bootcamp_cls_name": "Kakurasubootcamp"} -{"bootcamp_name": "korLogicTruthValueModalPropositions", "sample_number": 1000, "config_file": "korLogicTruthValueModalPropositions", "bootcamp_cls_name": "KorLogicTruthValueModalPropositionsbootcamp"} -{"bootcamp_name": "tents", "sample_number": 2000, "config_file": "tents", "bootcamp_cls_name": "Tentsbootcamp"} -{"bootcamp_name": "korLogicInductionParadox", "sample_number": 1000, "config_file": "korLogicInductionParadox", "bootcamp_cls_name": "KorLogicInductionParadoxbootcamp"} -{"bootcamp_name": "korPuzzleCampsite", "sample_number": 1000, "config_file": "korPuzzleCampsite", "bootcamp_cls_name": "KorPuzzleCampsitebootcamp"} +{"bootcamp_name": "BBEHBuggyTables", "sample_number": 1000, "config_file": "BBEHBuggyTables", "bootcamp_cls_name": "BBEHBuggyTablesbootcamp"} +{"bootcamp_name": "BbehDyckLanguages", "sample_number": 1000, "config_file": "BbehDyckLanguages", "bootcamp_cls_name": "BbehDyckLanguagesbootcamp"} +{"bootcamp_name": "BbehGeometricShapes", "sample_number": 1000, "config_file": "BbehGeometricShapes", "bootcamp_cls_name": "BbehGeometricShapesbootcamp"} +{"bootcamp_name": "BbehMultistepArithmetic", "sample_number": 1000, "config_file": "BbehMultistepArithmetic", "bootcamp_cls_name": "BbehMultistepArithmeticbootcamp"} +{"bootcamp_name": "BBEHMultistepArithmeticV2", "sample_number": 1000, "config_file": "BBEHMultistepArithmeticV2", "bootcamp_cls_name": "BBEHMultistepArithmeticV2bootcamp"} +{"bootcamp_name": "BbehTemporalSequences", "sample_number": 1000, "config_file": "BbehTemporalSequences", "bootcamp_cls_name": "BbehTemporalSequencesbootcamp"} +{"bootcamp_name": "BbehWebOfLies", "sample_number": 1000, "config_file": "BbehWebOfLies", "bootcamp_cls_name": "BbehWebOfLiesbootcamp"} +{"bootcamp_name": "BbehWordSorting", "sample_number": 1000, "config_file": "BbehWordSorting", "bootcamp_cls_name": "BbehWordSortingbootcamp"} +{"bootcamp_name": "binairo", "sample_number": 1000, "config_file": "binairo", "bootcamp_cls_name": "Binairobootcamp"} +{"bootcamp_name": "calcudoku", "sample_number": 1000, "config_file": "calcudoku", "bootcamp_cls_name": "Calcudokubootcamp"} +{"bootcamp_name": "campsite", "sample_number": 1000, "config_file": "campsite", "bootcamp_cls_name": "Campsitebootcamp"} +{"bootcamp_name": "cipher", "sample_number": 1000, "config_file": "cipher", "bootcamp_cls_name": "Cipherbootcamp"} +{"bootcamp_name": "cryptomath", "sample_number": 1000, "config_file": "crypto_math", "bootcamp_cls_name": "Cryptomathbootcamp"} +{"bootcamp_name": "dominosa", "sample_number": 1000, "config_file": "dominosa", "bootcamp_cls_name": "Dominosabootcamp"} +{"bootcamp_name": "futoshiki", "sample_number": 1000, "config_file": "futoshiki", "bootcamp_cls_name": "Futoshikibootcamp"} +{"bootcamp_name": "galaxies", "sample_number": 1000, "config_file": "galaxies", "bootcamp_cls_name": "Galaxiesbootcamp"} +{"bootcamp_name": "game24", "sample_number": 1000, "config_file": "game24", "bootcamp_cls_name": "Game24bootcamp"} +{"bootcamp_name": "heyawake", "sample_number": 1000, "config_file": "heyawake", "bootcamp_cls_name": "Heyawakebootcamp"} +{"bootcamp_name": "hitori", "sample_number": 1000, "config_file": "hitori", "bootcamp_cls_name": "Hitoribootcamp"} +{"bootcamp_name": "kakurasu", "sample_number": 1000, "config_file": "kakurasu", "bootcamp_cls_name": "Kakurasubootcamp"} +{"bootcamp_name": "kakuro", "sample_number": 1000, "config_file": "kakuro", "bootcamp_cls_name": "Kakurobootcamp"} +{"bootcamp_name": "korCipherCustomInverseShiftSubstitutionCipher", "sample_number": 1000, "config_file": "korCipherCustomInverseShiftSubstitutionCipher", "bootcamp_cls_name": "KorCipherCustomInverseShiftSubstitutionCipherbootcamp"} {"bootcamp_name": "korLogicAnalogicalReasoning", "sample_number": 1000, "config_file": "korLogicAnalogicalReasoning", "bootcamp_cls_name": "KorLogicAnalogicalReasoningbootcamp"} -{"bootcamp_name": "campsite", "sample_number": 2000, "config_file": "campsite", "bootcamp_cls_name": "Campsitebootcamp"} -{"bootcamp_name": "korOperationUnicode25ce", "sample_number": 1000, "config_file": "korOperationUnicode25ce", "bootcamp_cls_name": "KorOperationUnicode25cebootcamp"} -{"bootcamp_name": "dominosa", "sample_number": 2000, "config_file": "dominosa", "bootcamp_cls_name": "Dominosabootcamp"} -{"bootcamp_name": "korOperationUnicode20ac", "sample_number": 1000, "config_file": "korOperationUnicode20ac", "bootcamp_cls_name": "KorOperationUnicode20acbootcamp"} +{"bootcamp_name": "korLogicCanonicalPropositions", "sample_number": 1000, "config_file": "korLogicCanonicalPropositions", "bootcamp_cls_name": "KorLogicCanonicalPropositionsbootcamp"} +{"bootcamp_name": "korLogicCooperativePrinciple", "sample_number": 1000, "config_file": "korLogicCooperativePrinciple", "bootcamp_cls_name": "KorLogicCooperativePrinciplebootcamp"} +{"bootcamp_name": "korLogicDefinitions", "sample_number": 1000, "config_file": "korLogicDefinitions", "bootcamp_cls_name": "KorLogicDefinitionsbootcamp"} +{"bootcamp_name": "korLogicDerivativeReasoningOfPropositionalLogic", "sample_number": 1000, "config_file": "korLogicDerivativeReasoningOfPropositionalLogic", "bootcamp_cls_name": "KorLogicDerivativeReasoningOfPropositionalLogicbootcamp"} +{"bootcamp_name": "korLogicDisjunctiveNormalFormAndConjunctiveNormalForm", "sample_number": 1000, "config_file": "korLogicDisjunctiveNormalFormAndConjunctiveNormalForm", "bootcamp_cls_name": "KorLogicDisjunctiveNormalFormAndConjunctiveNormalFormbootcamp"} +{"bootcamp_name": "korLogicDynamicLogic", "sample_number": 1000, "config_file": "korLogicDynamicLogic", "bootcamp_cls_name": "KorLogicDynamicLogicbootcamp"} {"bootcamp_name": "korLogicEnumerativeInductiveReasoning", "sample_number": 1000, "config_file": "korLogicEnumerativeInductiveReasoning", "bootcamp_cls_name": "KorLogicEnumerativeInductiveReasoningbootcamp"} -{"bootcamp_name": "korLogicPropositionalLogicConcepts", "sample_number": 1000, "config_file": "korLogicPropositionalLogicConcepts", "bootcamp_cls_name": "KorLogicPropositionalLogicConceptsbootcamp"} -{"bootcamp_name": "korOperationUnicode25cf", "sample_number": 1000, "config_file": "korOperationUnicode25cf", "bootcamp_cls_name": "KorOperationUnicode25cfbootcamp"} +{"bootcamp_name": "korLogicEpistemicLogic", "sample_number": 1000, "config_file": "korLogicEpistemicLogic", "bootcamp_cls_name": "KorLogicEpistemicLogicbootcamp"} +{"bootcamp_name": "korLogicEquivalenceCalculus", "sample_number": 1000, "config_file": "korLogicEquivalenceCalculus", "bootcamp_cls_name": "KorLogicEquivalenceCalculusbootcamp"} +{"bootcamp_name": "korLogicFigureOfTheSyllogism", "sample_number": 1000, "config_file": "korLogicFigureOfTheSyllogism", "bootcamp_cls_name": "KorLogicFigureOfTheSyllogismbootcamp"} +{"bootcamp_name": "korLogicFormalFallacies", "sample_number": 1000, "config_file": "korLogicFormalFallacies", "bootcamp_cls_name": "KorLogicFormalFallaciesbootcamp"} +{"bootcamp_name": "korLogicInductionParadox", "sample_number": 1000, "config_file": "korLogicInductionParadox", "bootcamp_cls_name": "KorLogicInductionParadoxbootcamp"} +{"bootcamp_name": "korLogicLogicalMethodsForExploringCauseAndEffectRelationships", "sample_number": 1000, "config_file": "korLogicLogicalMethodsForExploringCauseAndEffectRelationships", "bootcamp_cls_name": "KorLogicLogicalMethodsForExploringCauseAndEffectRelationshipsbootcamp"} {"bootcamp_name": "korLogicPredicateLogicFormalization", "sample_number": 1000, "config_file": "korLogicPredicateLogicFormalization", "bootcamp_cls_name": "KorLogicPredicateLogicFormalizationbootcamp"} -{"bootcamp_name": "galaxies", "sample_number": 2000, "config_file": "galaxies", "bootcamp_cls_name": "Galaxiesbootcamp"} +{"bootcamp_name": "korLogicPropositionalLogicConcepts", "sample_number": 1000, "config_file": "korLogicPropositionalLogicConcepts", "bootcamp_cls_name": "KorLogicPropositionalLogicConceptsbootcamp"} +{"bootcamp_name": "korLogicPropositionalLogicFormalization", "sample_number": 1000, "config_file": "korLogicPropositionalLogicFormalization", "bootcamp_cls_name": "KorLogicPropositionalLogicFormalizationbootcamp"} +{"bootcamp_name": "korLogicResolution", "sample_number": 1000, "config_file": "korLogicResolution", "bootcamp_cls_name": "KorLogicResolutionbootcamp"} +{"bootcamp_name": "korLogicSpeechActs", "sample_number": 1000, "config_file": "korLogicSpeechActs", "bootcamp_cls_name": "KorLogicSpeechActsbootcamp"} +{"bootcamp_name": "korLogicStatisticalReasoning", "sample_number": 1000, "config_file": "korLogicStatisticalReasoning", "bootcamp_cls_name": "KorLogicStatisticalReasoningbootcamp"} +{"bootcamp_name": "korLogicTemporalPropositions", "sample_number": 1000, "config_file": "korLogicTemporalPropositions", "bootcamp_cls_name": "KorLogicTemporalPropositionsbootcamp"} +{"bootcamp_name": "korLogicTruthValueModalPropositions", "sample_number": 1000, "config_file": "korLogicTruthValueModalPropositions", "bootcamp_cls_name": "KorLogicTruthValueModalPropositionsbootcamp"} +{"bootcamp_name": "korOperationUnicode0032", "sample_number": 1000, "config_file": "korOperationUnicode0032", "bootcamp_cls_name": "KorOperationUnicode0032bootcamp"} +{"bootcamp_name": "korOperationUnicode0033", "sample_number": 1000, "config_file": "korOperationUnicode0033", "bootcamp_cls_name": "KorOperationUnicode0033bootcamp"} +{"bootcamp_name": "korOperationUnicode203b", "sample_number": 1000, "config_file": "korOperationUnicode203b", "bootcamp_cls_name": "KorOperationUnicode203bbootcamp"} +{"bootcamp_name": "korOperationUnicode20ac", "sample_number": 1000, "config_file": "korOperationUnicode20ac", "bootcamp_cls_name": "KorOperationUnicode20acbootcamp"} +{"bootcamp_name": "korOperationUnicode221e", "sample_number": 1000, "config_file": "korOperationUnicode221e", "bootcamp_cls_name": "KorOperationUnicode221ebootcamp"} +{"bootcamp_name": "korOperationUnicode2295", "sample_number": 1000, "config_file": "korOperationUnicode2295", "bootcamp_cls_name": "KorOperationUnicode2295bootcamp"} {"bootcamp_name": "korOperationUnicode25a0", "sample_number": 1000, "config_file": "korOperationUnicode25a0", "bootcamp_cls_name": "KorOperationUnicode25a0bootcamp"} +{"bootcamp_name": "korOperationUnicode25a1", "sample_number": 1000, "config_file": "korOperationUnicode25a1", "bootcamp_cls_name": "KorOperationUnicode25a1bootcamp"} +{"bootcamp_name": "korOperationUnicode25b3", "sample_number": 1000, "config_file": "korOperationUnicode25b3", "bootcamp_cls_name": "KorOperationUnicode25b3bootcamp"} +{"bootcamp_name": "korOperationUnicode25bd", "sample_number": 1000, "config_file": "korOperationUnicode25bd", "bootcamp_cls_name": "KorOperationUnicode25bdbootcamp"} +{"bootcamp_name": "korOperationUnicode25cb", "sample_number": 1000, "config_file": "korOperationUnicode25cb", "bootcamp_cls_name": "KorOperationUnicode25cbbootcamp"} +{"bootcamp_name": "korOperationUnicode25ce", "sample_number": 1000, "config_file": "korOperationUnicode25ce", "bootcamp_cls_name": "KorOperationUnicode25cebootcamp"} +{"bootcamp_name": "korOperationUnicode25cf", "sample_number": 1000, "config_file": "korOperationUnicode25cf", "bootcamp_cls_name": "KorOperationUnicode25cfbootcamp"} +{"bootcamp_name": "korOperationUnicode2605", "sample_number": 1000, "config_file": "korOperationUnicode2605", "bootcamp_cls_name": "KorOperationUnicode2605bootcamp"} +{"bootcamp_name": "korOperationUnicodeffe0", "sample_number": 1000, "config_file": "korOperationUnicodeffe0", "bootcamp_cls_name": "KorOperationUnicodeffe0bootcamp"} +{"bootcamp_name": "korOperationUnicodeffe1", "sample_number": 1000, "config_file": "korOperationUnicodeffe1", "bootcamp_cls_name": "KorOperationUnicodeffe1bootcamp"} +{"bootcamp_name": "korPuzzle24Points", "sample_number": 1000, "config_file": "korPuzzle24Points", "bootcamp_cls_name": "KorPuzzle24Pointsbootcamp"} +{"bootcamp_name": "korPuzzleArrowMaze", "sample_number": 1000, "config_file": "korPuzzleArrowMaze", "bootcamp_cls_name": "KorPuzzleArrowMazebootcamp"} +{"bootcamp_name": "korPuzzleCalcudoko", "sample_number": 1000, "config_file": "korPuzzleCalcudoko", "bootcamp_cls_name": "KorPuzzleCalcudokobootcamp"} +{"bootcamp_name": "korPuzzleCampsite", "sample_number": 1000, "config_file": "korPuzzleCampsite", "bootcamp_cls_name": "KorPuzzleCampsitebootcamp"} +{"bootcamp_name": "korPuzzleConnectWords", "sample_number": 1000, "config_file": "korPuzzleConnectWords", "bootcamp_cls_name": "KorPuzzleConnectWordsbootcamp"} +{"bootcamp_name": "korPuzzleCryptoMath", "sample_number": 1000, "config_file": "korPuzzleCryptoMath", "bootcamp_cls_name": "KorPuzzleCryptoMathbootcamp"} +{"bootcamp_name": "korPuzzleKukurasu", "sample_number": 1000, "config_file": "korPuzzleKukurasu", "bootcamp_cls_name": "KorPuzzleKukurasubootcamp"} +{"bootcamp_name": "korPuzzleLogicPuzzle", "sample_number": 1000, "config_file": "korPuzzleLogicPuzzle", "bootcamp_cls_name": "KorPuzzleLogicPuzzlebootcamp"} +{"bootcamp_name": "korPuzzleMathPath", "sample_number": 1000, "config_file": "korPuzzleMathPath", "bootcamp_cls_name": "KorPuzzleMathPathbootcamp"} +{"bootcamp_name": "korPuzzleMinesweeper", "sample_number": 1000, "config_file": "korPuzzleMinesweeper", "bootcamp_cls_name": "KorPuzzleMinesweeperbootcamp"} +{"bootcamp_name": "korPuzzleSkyscrapers", "sample_number": 1000, "config_file": "korPuzzleSkyscrapers", "bootcamp_cls_name": "KorPuzzleSkyscrapersbootcamp"} +{"bootcamp_name": "korPuzzleWordBrainTeasers", "sample_number": 1000, "config_file": "korPuzzleWordBrainTeasers", "bootcamp_cls_name": "KorPuzzleWordBrainTeasersbootcamp"} +{"bootcamp_name": "korPuzzleWordLadder", "sample_number": 1000, "config_file": "korPuzzleWordLadder", "bootcamp_cls_name": "KorPuzzleWordLadderbootcamp"} +{"bootcamp_name": "korPuzzleWordRootsAndAffixes", "sample_number": 1000, "config_file": "korPuzzleWordRootsAndAffixes", "bootcamp_cls_name": "KorPuzzleWordRootsAndAffixesbootcamp"} +{"bootcamp_name": "korPuzzleWordscapes", "sample_number": 1000, "config_file": "korPuzzleWordscapes", "bootcamp_cls_name": "KorPuzzleWordscapesbootcamp"} +{"bootcamp_name": "korPuzzleWordSearch", "sample_number": 1000, "config_file": "korPuzzleWordSearch", "bootcamp_cls_name": "KorPuzzleWordSearchbootcamp"} +{"bootcamp_name": "LightUp", "sample_number": 1000, "config_file": "Light_Up", "bootcamp_cls_name": "LightUpbootcamp"} +{"bootcamp_name": "maze", "sample_number": 1000, "config_file": "maze", "bootcamp_cls_name": "Mazebootcamp"} +{"bootcamp_name": "minesweeper", "sample_number": 1000, "config_file": "minesweeper", "bootcamp_cls_name": "Minesweeperbootcamp"} +{"bootcamp_name": "nonograms", "sample_number": 1000, "config_file": "nonograms", "bootcamp_cls_name": "Nonogramsbootcamp"} +{"bootcamp_name": "pipes", "sample_number": 1000, "config_file": "pipes", "bootcamp_cls_name": "Pipesbootcamp"} +{"bootcamp_name": "skyscrapers", "sample_number": 1000, "config_file": "skyscrapers", "bootcamp_cls_name": "Skyscrapersbootcamp"} +{"bootcamp_name": "slitherlink", "sample_number": 1000, "config_file": "slitherlink", "bootcamp_cls_name": "Slitherlinkbootcamp"} +{"bootcamp_name": "starbattle", "sample_number": 1000, "config_file": "starbattle", "bootcamp_cls_name": "Starbattlebootcamp"} +{"bootcamp_name": "stitches", "sample_number": 1000, "config_file": "stitches", "bootcamp_cls_name": "Stitchesbootcamp"} +{"bootcamp_name": "sudoku", "sample_number": 1000, "config_file": "sudoku", "bootcamp_cls_name": "Sudokubootcamp"} +{"bootcamp_name": "tents", "sample_number": 1000, "config_file": "tents", "bootcamp_cls_name": "Tentsbootcamp"} +{"bootcamp_name": "thermometers", "sample_number": 1000, "config_file": "thermometers", "bootcamp_cls_name": "Thermometersbootcamp"} diff --git a/examples/pipelines/data_generator.py b/examples/pipelines/data_generator.py index c0c2e2d..c213662 100644 --- a/examples/pipelines/data_generator.py +++ b/examples/pipelines/data_generator.py @@ -34,7 +34,6 @@ def main_pipeline( configs = json.load(f) except: import pdb;pdb.set_trace() - if not configs: configs = [{}] n_per_config = [n // len(configs) for _ in configs] diff --git a/examples/pipelines/quickgen_data_configs.py b/examples/pipelines/quickgen_data_configs.py index ef8c186..ebdac49 100644 --- a/examples/pipelines/quickgen_data_configs.py +++ b/examples/pipelines/quickgen_data_configs.py @@ -9,7 +9,7 @@ import re # 每个puzzle的gen数量 train_sample_number = 1000 -test_sample_number = 64 +test_sample_number = 100 def checkpath(target_dir): # 检查目录是否存在 @@ -58,7 +58,7 @@ def process_data_config(): entry_train = { "bootcamp_name": bootcamp_name, - "sample_number": train_sample_number, + "sample_number": train_sample_number, "config_file": config_file, "bootcamp_cls_name": f"{bootcamp_cls_name}bootcamp" } diff --git a/examples/pipelines/run_pipeline.sh b/examples/pipelines/run_pipeline.sh index 9e01107..d1d3382 100755 --- a/examples/pipelines/run_pipeline.sh +++ b/examples/pipelines/run_pipeline.sh @@ -12,9 +12,9 @@ timestamp=$(date +"%Y-%m-%d-%H:%M:%S") # cipher输入集 cipher_input_file='internbootcamp/libs/data/words_alpha_370000.txt' -tokenizer="your tokenizer path" # tokenizer is used to calculate the sequence length of the prompt +tokenizer="/cpfs01/shared/llm_ddd/lipeiji/hf_hub_1/models--Qwen--Qwen2.5-32B-Instruct/snapshots/afb2829595f63efa3548e9d6b13aa66e61aa0f38" # tokenizer is used to calculate the sequence length of the prompt max_prompt_len=4096 -max_jobs=64 # 设置最大并发进程数 +max_jobs=60 # 设置最大并发进程数 jobs=() # 用于存储后台进程的PID cipher_test_nums_for_single_cipher=0 @@ -39,17 +39,6 @@ while IFS= read -r line || [ -n "$line" ]; do fi # 异步运行Python脚本 - python examples/pipelines/data_generator.py \ - --bootcamp_name "$bootcamp_name" \ - --n $sample_number \ - --save_file "examples/bootcamp_generator_outputs/$timestamp/train/${bootcamp_name}.jsonl" \ - --config_file "examples/pipelines/puzzle_configs/${config_file}_train.json" \ - --bootcamp_cls_name "$bootcamp_cls_name" \ - --tokenizer "$tokenizer" \ - --max_prompt_len $max_prompt_len \ - --shuffle - - # If there is no problem with the above command, you can use the following line to run it in multiple processes, replacing the above command # python examples/pipelines/data_generator.py \ # --bootcamp_name "$bootcamp_name" \ # --n $sample_number \ @@ -58,7 +47,18 @@ while IFS= read -r line || [ -n "$line" ]; do # --bootcamp_cls_name "$bootcamp_cls_name" \ # --tokenizer "$tokenizer" \ # --max_prompt_len $max_prompt_len \ - # --shuffle & + # --shuffle + + # If there is no problem with the above command, you can use the following line to run it in multiple processes, replacing the above command + python examples/pipelines/data_generator.py \ + --bootcamp_name "$bootcamp_name" \ + --n $sample_number \ + --save_file "examples/bootcamp_generator_outputs/$timestamp/train/${bootcamp_name}.jsonl" \ + --config_file "examples/pipelines/puzzle_configs/${config_file}_train.json" \ + --bootcamp_cls_name "$bootcamp_cls_name" \ + --tokenizer "$tokenizer" \ + --max_prompt_len $max_prompt_len \ + --shuffle & pid=$! # 获取后台进程的PID jobs+=("$pid") # 将PID加入数组 diff --git a/examples/unittests/run_eval.py b/examples/unittests/run_eval.py index 6d60131..0c1aede 100644 --- a/examples/unittests/run_eval.py +++ b/examples/unittests/run_eval.py @@ -19,6 +19,7 @@ TEMPLATE_MAP = { "r1": {"chat_template":"<|begin▁of▁sentence|><|User|>{input}<|Assistant|>\n","stop_words":["<|end▁of▁sentence|>"]}, # r1 new chat template "qwen": {"chat_template":"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{input}<|im_end|>\n<|im_start|>assistant\n","stop_words":["<|im_end|>", "<|endoftext|>"]}, # default qwen template "internthinker":{"chat_template":"<|im_start|>system\nYou are an expert reasoner with extensive experience in mathematical and code competitions. You approach problems through systematic thinking and rigorous reasoning. Your response should reflect deep understanding and precise logical thinking, making your solution path and reasoning clear to others. Please put your thinking process within ... tags.<|im_end|>\n<|im_start|>user\n{input}<|im_end|>\n<|im_start|>assistant\n","stop_words":["<|im_end|>", "<|endoftext|>"]}, + "internbootcamp":{"chat_template":"<|im_start|>system\nYou are an expert reasoner with extensive experience in mathematical and code competitions. You approach problems through systematic thinking and rigorous reasoning. Your response should reflect deep understanding and precise logical thinking, making your solution path and reasoning clear to others. Please put your thinking process within ... tags. After careful thought, present your final solution or answer clearly.<|im_end|>\n<|im_start|>user\n{input}<|im_end|>\n<|im_start|>assistant\n","stop_words":["<|im_end|>", "<|endoftext|>"]}, "chatml":{"chat_template":"<|im_start|>user\n{input}<|im_end|>\n<|im_start|>assistant\n","stop_words":["<|im_end|>", "<|endoftext|>"]}, # No sys prompt chatml } @@ -174,6 +175,9 @@ async def process_item(client, item, bootcamp, template, output_dir, semaphore, score = bootcamp.verify_score(output, item["ground_truth"], short_penalty=False, format_penalty=False) try: extracted = bootcamp.extract_output(output) + if type(extracted) is not str: + # Convert non-string extracted output to string, only in this way we can ensure that the output is JSON serializable + extracted = str(extracted) except: extracted = None output_len = response.usage.completion_tokens if 'usage' in response else len(output.split()) @@ -241,19 +245,19 @@ async def evaluate_dataset(file_path, bootcamp, output_dir, template, semaphore, async def main(): parser = argparse.ArgumentParser() - parser.add_argument('--url', default='http://{ip}:{port}/v1', + parser.add_argument('--url', default='http://10.130.133.35:8000/v1', help='Base URL of the OpenAI API compatible service. Default format is http://{ip}:{port}/v1.') parser.add_argument('--api_key', default='EMPTY', help='API key for accessing the model service. Set to "EMPTY" if no key is required.') - parser.add_argument('--model_name', required=True, + parser.add_argument('--model_name', default='DeepSeek-R1-Distill-Qwen-32B', help='Name of the model to be evaluated, e.g., r1_32B or other custom model name.') - parser.add_argument('--test_dir', required=True, + parser.add_argument('--test_dir', default='/cpfs01/shared/llm_ddd/lipeiji/InternBootcamp/examples/bootcamp_generator_outputs/2025-05-30-16:26:35/test', help='Path to the directory containing test JSONL files for evaluation.') - parser.add_argument('--max_concurrent_requests', type=int, default=128, + parser.add_argument('--max_concurrent_requests', type=int, default=144, help='Maximum number of concurrent requests allowed globally.') - parser.add_argument('--template', default='chatml',choices=['r1', 'qwen', 'internthinker', 'chatml'], + parser.add_argument('--template', default='r1',choices=['r1', 'qwen', 'internthinker', 'chatml'], help='Predefined conversation template used to format prompts. Only valid when api_mode is completion.') - parser.add_argument('--max_tokens', type=int, default=32768, + parser.add_argument('--max_tokens', type=int, default=8192, help='Maximum number of tokens the model can generate.') parser.add_argument('--temperature', type=float, default=0, help='Controls randomness in text generation. Lower values produce more deterministic outputs.') @@ -263,7 +267,7 @@ async def main(): help='API mode to use: "completion" for raw text generation or "chat_completion" for chat-style APIs.') parser.add_argument('--sys_prompt', type=str, help='System prompt content used in chat_completion mode. If not provided, uses the default from the template (if any).') - parser.add_argument('--max_retries', type=int, default=16, + parser.add_argument('--max_retries', type=int, default=8, help='Maximum number of retries for failed requests.') parser.add_argument('--max_retrying_delay', type=int, default=60, help='Maximum delay between retries in seconds (using exponential backoff).') diff --git a/examples/verl_usage/verl_data_preprocess.py b/examples/verl_usage/verl_data_preprocess.py index 3a0f715..109e026 100644 --- a/examples/verl_usage/verl_data_preprocess.py +++ b/examples/verl_usage/verl_data_preprocess.py @@ -91,17 +91,21 @@ def convert_to_parquet(src_jsonl, tgt_parquet, split, shuffle=True): # 构造目标格式的数据结构 formatted_data = { "data_source": 'bootcamp/' + data_source, - "prompt": [{ - "role": "user", - "content": prompt + "prompt": [ + { + "role": "system", + "content": "You are an expert reasoner with extensive experience in mathematical and code competitions. You approach problems through systematic thinking and rigorous reasoning. Your response should reflect deep understanding and precise logical thinking, making your solution path and reasoning clear to others. Please put your thinking process within ... tags. After careful thought, present your final solution or answer clearly." + }, + { + "role": "user", + "content": prompt }], "reward_model": { "style": "rule", "ground_truth": json.dumps(ground_truth, ensure_ascii=False) }, "extra_info": { - 'split': split, # 使用传入的 split 值 - 'index': idx + 'index': str(idx), # modify this to make it consistent with current verl format } } diff --git a/internbootcamp/bootcamp/GO/InternGOBootcampDev.py b/internbootcamp/bootcamp/GO/InternGOBootcampDev.py index e4f69b8..c4cf23c 100755 --- a/internbootcamp/bootcamp/GO/InternGOBootcampDev.py +++ b/internbootcamp/bootcamp/GO/InternGOBootcampDev.py @@ -5,7 +5,7 @@ from typing import List, Tuple, Dict, Set, Optional class InternGObootcamp(Basebootcamp): def __init__(self): - + pass def case_generator(self) -> Dict: """ Collection from game records diff --git a/internbootcamp/bootcamp/base.py b/internbootcamp/bootcamp/base.py index 29a4e0e..a0b336a 100755 --- a/internbootcamp/bootcamp/base.py +++ b/internbootcamp/bootcamp/base.py @@ -1,5 +1,6 @@ import re import json +import random class Basebootcamp: @@ -46,7 +47,7 @@ class Basebootcamp: @classmethod - def verify_score(cls, model_output, identity: dict, format_score=0, short_penalty=True, short_threshold=100, format_penalty=True) -> float: + def verify_score(cls, model_output, identity: dict, format_score=0, short_penalty=False, short_threshold=100, format_penalty=False) -> float: """ Verify the output against the ground truth. @@ -62,7 +63,10 @@ class Basebootcamp: if short_penalty and len(model_output) < short_threshold: # if the output is too short, consider it incorrect return score - if format_penalty and "" not in model_output: + if format_penalty and ("" not in model_output or "" not in model_output): + return score + if format_penalty and (model_output.count("") > 1 or model_output.count("") > 1 or model_output.count("") != model_output.count("") or not model_output.startswith("") or model_output.endswith("")): + # should not end with return score try: extract_solution = cls.extract_output(model_output) @@ -80,6 +84,13 @@ class Basebootcamp: except Exception as e: # print("Error in verify_score:", e) pass + if random.randint(1,1024) == 1: + print("=============DEBUG=============") + print("model_output:\n", model_output) + print("identity:\n", identity) + print("extract_solution:\n", extract_solution) + print("score:", score) + print("===============================") return score diff --git a/setup.py b/setup.py deleted file mode 100755 index 033418b..0000000 --- a/setup.py +++ /dev/null @@ -1,34 +0,0 @@ -import setuptools - -setuptools.setup( - name="internbootcamp", - version="0.1.0", - url="https://github.com/InternLM/InternBootcamp/tree/main", - packages=setuptools.find_packages(include=['internbootcamp',]), - install_requires=[ - "distance", - "matplotlib", - "datasets", - "jsonlines", - "fire", - "Faker", - "python-sat", - "sympy", - "openai", - "openpyxl", - "transformers", - "langdetect", - "pympler", - "shortuuid" - ], - - package_data={ - - }, - classifiers=[ - "Programming Language :: Python :: 3", - "License :: OSI Approved :: MIT License", - "Operating System :: OS Independent", - ], - python_requires='>=3.6', -) \ No newline at end of file