linting, moved env, updated contrib credit

2026-04-19 12:57:58 +00:00 · 2025-05-26 14:35:16 +10:00 · 2025-05-26 14:35:16 +10:00 · bf12e7df15
commit bf12e7df15
parent 81d1ebeaef
83 changed files with 1560 additions and 640 deletions
--- a/environments/community/pytorch_optimizer_coding/FOB/examples/neps/README.md
+++ b/environments/community/pytorch_optimizer_coding/FOB/examples/neps/README.md
@ -0,0 +1,16 @@
+# Using FOB with NePS for HPO
+Run all commands from the root of the FOB repository.
+
+## Setup
+```bash
+conda create -n fob-neps python=3.10 -y
+conda activate fob-neps
+pip install -r requirements.txt
+pip install -r examples/neps/requirements.txt # this will downgrade some packages
+pip install -e .
+```
+
+## Example
+```bash
+python examples/neps/hpo.py examples/neps/experiment.yaml
+```
--- a/environments/community/pytorch_optimizer_coding/FOB/examples/neps/hpo.py
+++ b/environments/community/pytorch_optimizer_coding/FOB/examples/neps/hpo.py
@ -0,0 +1,197 @@
+import argparse
+import logging
+import time
+from pathlib import Path
+
+import lightning as L
+import neps
+import torch
+from lightning.pytorch.callbacks import ModelCheckpoint
+from lightning.pytorch.loggers import TensorBoardLogger
+from neps.utils.common import get_initial_directory, load_lightning_checkpoint
+from pytorch_fob.engine.engine import Engine, Run
+
+#############################################################
+# Definig the seeds for reproducibility
+
+
+def set_seed(seed=42):
+    L.seed_everything(seed)
+
+
+#############################################################
+# Define search space
+
+
+def search_space(run: Run) -> dict:
+    config = run.get_config()
+    space = dict()
+    space["learning_rate"] = neps.FloatParameter(
+        lower=1e-5, upper=1e-1, log=True, default=1e-3
+    )
+    space["eta_min_factor"] = neps.FloatParameter(lower=1e-3, upper=1e-1, log=True)
+    space["warmup_factor"] = neps.FloatParameter(lower=1e-3, upper=1e-0, log=True)
+    if config["optimizer"]["name"] == "adamw_baseline":
+        space["weight_decay"] = neps.FloatParameter(lower=1e-5, upper=1e-0, log=True)
+        space["one_minus_beta1"] = neps.FloatParameter(lower=1e-2, upper=2e-1, log=True)
+        space["beta2"] = neps.FloatParameter(lower=0.9, upper=0.999)
+    elif config["optimizer"]["name"] == "sgd_baseline":
+        space["weight_decay"] = neps.FloatParameter(lower=1e-5, upper=1e-0, log=True)
+        space["momentum"] = neps.FloatParameter(lower=0, upper=1)
+    elif config["optimizer"]["name"] == "adamcpr_fast":
+        space["one_minus_beta1"] = neps.FloatParameter(lower=1e-2, upper=2e-1, log=True)
+        space["beta2"] = neps.FloatParameter(lower=0.9, upper=0.999)
+        space["kappa_init_param"] = neps.IntegerParameter(
+            lower=1, upper=19550, log=True
+        )
+        space["kappa_init_method"] = neps.ConstantParameter("warm_start")
+    else:
+        raise ValueError("optimizer not supported")
+    space["epochs"] = neps.IntegerParameter(
+        lower=5,
+        upper=config["task"]["max_epochs"],
+        is_fidelity=True,  # IMPORTANT to set this to True for the fidelity parameter
+    )
+    return space
+
+
+def create_exmperiment(run: Run, config: dict) -> dict:
+    new_config = run.get_config().copy()
+    for k, v in config.items():
+        if k == "one_minus_beta1":
+            new_config["optimizer"]["beta1"] = 1 - v
+        elif k != "epochs":
+            new_config["optimizer"][k] = v
+    return new_config
+
+
+#############################################################
+# Define the run pipeline function
+
+
+def create_pipline(base_run: Run):
+    def run_pipeline(pipeline_directory, previous_pipeline_directory, **config) -> dict:
+        # Initialize the first directory to store the event and checkpoints files
+        init_dir = get_initial_directory(pipeline_directory)
+        checkpoint_dir = init_dir / "checkpoints"
+
+        # Initialize the model and checkpoint dir
+        engine = Engine()
+        engine.parse_experiment(create_exmperiment(base_run, config))
+        run = next(engine.runs())
+        run.ensure_max_steps()
+        model, datamodule = run.get_task()
+
+        # Create the TensorBoard logger for logging
+        logger = TensorBoardLogger(
+            save_dir=init_dir, name="data", version="logs", default_hp_metric=False
+        )
+
+        # Add checkpoints at the end of training
+        checkpoint_callback = ModelCheckpoint(
+            dirpath=checkpoint_dir,
+            filename="{epoch}-{val_loss:.2f}",
+        )
+
+        # Use this function to load the previous checkpoint if it exists
+        checkpoint_path, checkpoint = load_lightning_checkpoint(
+            previous_pipeline_directory=previous_pipeline_directory,
+            checkpoint_dir=checkpoint_dir,
+        )
+
+        if checkpoint is None:
+            previously_spent_epochs = 0
+        else:
+            previously_spent_epochs = checkpoint["epoch"]
+
+        # Create a PyTorch Lightning Trainer
+        epochs = config["epochs"]
+
+        trainer = L.Trainer(
+            logger=logger,
+            max_epochs=epochs,
+            callbacks=[checkpoint_callback],
+        )
+
+        # Train the model and retrieve training/validation metrics
+        if checkpoint_path:
+            trainer.fit(model, datamodule=datamodule, ckpt_path=checkpoint_path)
+        else:
+            trainer.fit(model, datamodule=datamodule)
+
+        train_accuracy = trainer.logged_metrics.get("train_acc", None)
+        train_accuracy = (
+            train_accuracy.item()
+            if isinstance(train_accuracy, torch.Tensor)
+            else train_accuracy
+        )
+        val_loss = trainer.logged_metrics.get("val_loss", None)
+        val_loss = val_loss.item() if isinstance(val_loss, torch.Tensor) else val_loss
+        val_accuracy = trainer.logged_metrics.get("val_acc", None)
+        val_accuracy = (
+            val_accuracy.item()
+            if isinstance(val_accuracy, torch.Tensor)
+            else val_accuracy
+        )
+
+        # Test the model and retrieve test metrics
+        trainer.test(model, datamodule=datamodule)
+
+        test_accuracy = trainer.logged_metrics.get("test_acc", None)
+        test_accuracy = (
+            test_accuracy.item()
+            if isinstance(test_accuracy, torch.Tensor)
+            else test_accuracy
+        )
+
+        return {
+            "loss": val_loss,
+            "cost": epochs - previously_spent_epochs,
+            "info_dict": {
+                "train_accuracy": train_accuracy,
+                "val_accuracy": val_accuracy,
+                "test_accuracy": test_accuracy,
+            },
+        }
+
+    return run_pipeline
+
+
+if __name__ == "__main__":
+    # Parse command line arguments
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "experiment_file", type=Path, help="The yaml file specifying the experiment."
+    )
+    parser.add_argument(
+        "--n_trials",
+        type=int,
+        default=15,
+        help="Number of different configurations to train",
+    )
+    args, extra_args = parser.parse_known_args()
+
+    # Initialize the logger and record start time
+    start_time = time.time()
+    set_seed(42)
+    logging.basicConfig(level=logging.INFO)
+
+    engine = Engine()
+    engine.parse_experiment_from_file(args.experiment_file, extra_args)
+    run = next(engine.runs())
+
+    # Run NePS with specified parameters
+    neps.run(
+        run_pipeline=create_pipline(run),
+        pipeline_space=search_space(run),
+        root_directory=run.engine.output_dir,
+        max_evaluations_total=args.n_trials,
+        searcher="hyperband",
+    )
+
+    # Record the end time and calculate execution time
+    end_time = time.time()
+    execution_time = end_time - start_time
+
+    # Log the execution time
+    logging.info(f"Execution time: {execution_time} seconds")
--- a/environments/community/pytorch_optimizer_coding/FOB/examples/neps/requirements.txt
+++ b/environments/community/pytorch_optimizer_coding/FOB/examples/neps/requirements.txt
@ -0,0 +1,5 @@
+neural-pipeline-search
+torch==2.0.0
+torchvision
+torchaudio
+torchtext