Completed: full example suite

2026-04-30 17:40:45 +00:00 · 2025-02-03 07:21:12 +00:00 · 2025-02-03 07:21:12 +00:00 · de7d37f14f
commit de7d37f14f
parent c0a16d7f2b
13 changed files with 1309 additions and 220 deletions
--- a/examples/word_ladder/main.py
+++ b/examples/word_ladder/main.py
@ -0,0 +1,84 @@
+#!/usr/bin/env python3
+"""
+main.py – Orchestrates the overall flow:
+1. Generate word ladder sets
+2. Submit chain-of-thought reasoning requests in batches via the LLM
+3. Upload the final dataset to HuggingFace Hub (if needed)
+"""
+
+import uuid
+import sys
+from pathlib import Path
+from typing import Dict, Any
+
+from examples.word_ladder.utils import create_word_ladders, generate_reasoning
+
+
+def create_dataset(jsonl_path: Path, config: Dict[str, Any]) -> bool:
+    """
+    Creates the word ladder dataset, handling potential exhaustion gracefully.
+    
+    Returns:
+        bool: True if dataset was created (even if truncated), False if creation failed
+    """
+    try:
+        print("Step 1: Algorithmically creating word ladder chains...")
+        create_word_ladders.create_word_ladder_dataset(str(jsonl_path), config=config)
+        return True
+        
+    except IndexError as e:
+        # Dataset was exhausted but some examples were generated
+        print("\nNote: Dataset generation stopped early due to exhaustion of unique puzzles.")
+        print(f"Reason: {str(e)}")
+        if jsonl_path.exists():
+            print("Continuing with the partial dataset that was successfully generated.")
+            return True
+        return False
+        
+    except Exception as e:
+        # Unexpected error during dataset creation
+        print(f"\nError: Failed to create dataset: {str(e)}")
+        return False
+
+def main():
+    # Centralized configuration for the dataset
+    config = {
+        'dataset_name': 'word_ladder',
+        'dataset_config': {
+            'min_word_length': 3,
+            'max_word_length': 5,
+            'min_chain_length':-1,  # set to -1 for the shortest possible path
+            'max_chain_length':10,
+            'size': 100,  # Generate a small-ish dataset for demonstration
+        }
+    }
+
+    # Generate a friendly unique identifier and compose the file path
+    unique_id = uuid.uuid4().hex[:8]
+    output_dir = Path(__file__).resolve().parent / "output"
+    output_dir.mkdir(exist_ok=True)  # Create output directory if it doesn't exist
+    jsonl_path = output_dir / f"word_ladders_{unique_id}.jsonl"
+
+    # Step 1: Create the dataset
+    if not create_dataset(jsonl_path, config):
+        print("Exiting due to dataset creation failure.")
+        sys.exit(1)
+
+
+    # Step 2: Generate reasoning
+    '''
+    try:
+        print("\nStep 2: Submitting reasoning batches for the dataset...")
+        generate_reasoning.submit_reasoning_batches(input_path=str(jsonl_path))
+    except Exception as e:
+        print(f"\nError: Failed to submit reasoning batches: {str(e)}")
+        sys.exit(1)
+    '''
+
+    # Step 3: Check Anthropic batch results
+    # Step 4: Upload to HuggingFace 🤗
+    
+    print("\nComplete!")
+
+if __name__ == "__main__":
+    main()