Completed: full example suite

This commit is contained in:
Cavit Erginsoy 2025-02-03 07:21:12 +00:00
parent c0a16d7f2b
commit de7d37f14f
13 changed files with 1309 additions and 220 deletions

View file

@ -0,0 +1,84 @@
#!/usr/bin/env python3
"""
main.py Orchestrates the overall flow:
1. Generate word ladder sets
2. Submit chain-of-thought reasoning requests in batches via the LLM
3. Upload the final dataset to HuggingFace Hub (if needed)
"""
import uuid
import sys
from pathlib import Path
from typing import Dict, Any
from examples.word_ladder.utils import create_word_ladders, generate_reasoning
def create_dataset(jsonl_path: Path, config: Dict[str, Any]) -> bool:
"""
Creates the word ladder dataset, handling potential exhaustion gracefully.
Returns:
bool: True if dataset was created (even if truncated), False if creation failed
"""
try:
print("Step 1: Algorithmically creating word ladder chains...")
create_word_ladders.create_word_ladder_dataset(str(jsonl_path), config=config)
return True
except IndexError as e:
# Dataset was exhausted but some examples were generated
print("\nNote: Dataset generation stopped early due to exhaustion of unique puzzles.")
print(f"Reason: {str(e)}")
if jsonl_path.exists():
print("Continuing with the partial dataset that was successfully generated.")
return True
return False
except Exception as e:
# Unexpected error during dataset creation
print(f"\nError: Failed to create dataset: {str(e)}")
return False
def main():
# Centralized configuration for the dataset
config = {
'dataset_name': 'word_ladder',
'dataset_config': {
'min_word_length': 3,
'max_word_length': 5,
'min_chain_length':-1, # set to -1 for the shortest possible path
'max_chain_length':10,
'size': 100, # Generate a small-ish dataset for demonstration
}
}
# Generate a friendly unique identifier and compose the file path
unique_id = uuid.uuid4().hex[:8]
output_dir = Path(__file__).resolve().parent / "output"
output_dir.mkdir(exist_ok=True) # Create output directory if it doesn't exist
jsonl_path = output_dir / f"word_ladders_{unique_id}.jsonl"
# Step 1: Create the dataset
if not create_dataset(jsonl_path, config):
print("Exiting due to dataset creation failure.")
sys.exit(1)
# Step 2: Generate reasoning
'''
try:
print("\nStep 2: Submitting reasoning batches for the dataset...")
generate_reasoning.submit_reasoning_batches(input_path=str(jsonl_path))
except Exception as e:
print(f"\nError: Failed to submit reasoning batches: {str(e)}")
sys.exit(1)
'''
# Step 3: Check Anthropic batch results
# Step 4: Upload to HuggingFace 🤗
print("\nComplete!")
if __name__ == "__main__":
main()