linting, moved env, updated contrib credit

2026-04-25 17:10:42 +00:00 · 2025-05-26 14:35:16 +10:00 · 2025-05-26 14:35:16 +10:00 · bf12e7df15
commit bf12e7df15
parent 81d1ebeaef
83 changed files with 1560 additions and 640 deletions
--- a/environments/community/pytorch_optimizer_coding/FOB/.gitconfig
+++ b/environments/community/pytorch_optimizer_coding/FOB/.gitconfig
@ -0,0 +1,2 @@
+[pull]
+  rebase = true
--- a/environments/community/pytorch_optimizer_coding/FOB/.gitignore
+++ b/environments/community/pytorch_optimizer_coding/FOB/.gitignore
@ -0,0 +1,8 @@
+__pycache__
+.vscode
+data
+lightning_logs
+experiments
+*.ipynb
+experiment.yaml
+condaenv.*.requirements.txt
--- a/environments/community/pytorch_optimizer_coding/FOB/LICENSE
+++ b/environments/community/pytorch_optimizer_coding/FOB/LICENSE
@ -0,0 +1,189 @@
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+    Copyright 2024 Simon Blauth, Tobias Bürger, Zacharias Häringer
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
--- a/environments/community/pytorch_optimizer_coding/FOB/README.md
+++ b/environments/community/pytorch_optimizer_coding/FOB/README.md
@ -0,0 +1,295 @@
+# Fast Optimizer Benchmark
+
+Fast and cheap Benchmark for HPO and Optimizer.
+
+Master Project at Machine Learning Lab Freiburg,
+Simon Blauth, Tobias Bürger, Zacharias Häringer
+
+This benchmark aims to be fast while maintaining a wide selection of different tasks. It also tries to be independent of the hardware used, however it requires a minimum of 4 gpus ideally capable of bfloat16 mixed precision.
+One run of all tasks in this suite on a single optimizer configuration should not take more than a day.
+A benchmark should state the following for each task: time taken per optimization step compared to baseline, best model performance, final model performance.
+
+## Tasks
+
+We try to cover a large range of deep learning tasks in this benchmark.
+
+Instructions on how to write your own task can be found [here](pytorch_fob/tasks/README.md)
+
+### Available Tasks
+
+| Name | Dataset | Model | Task | Target Metric | Baseline Score | Baseline Runtime | Hardware |
+| ------- | ---- | ----- | ---- | ------------- | -------------- | ---------------- | -------- |
+| [mnist](pytorch_fob/tasks/mnist) | MNIST | MLP | Image Classification | Top-1 Accuracy | 0.97 | 1 min | 1 gpu |
+| [classification](pytorch_fob/tasks/classification) | [Imagenet-64x64](https://patrykchrabaszcz.github.io/Imagenet32/) | [Wide ResNet](https://arxiv.org/pdf/1605.07146.pdf) | Image Classification | Top-1 Accuracy | 0.69 | 4h | 4 gpu |
+| [classification_small](pytorch_fob/tasks/classification_small) | [CIFAR100](https://www.cs.toronto.edu/~kriz/cifar.html) | [Resnet18](https://arxiv.org/pdf/1512.03385.pdf) | Image Classification | Top-1 Accuracy | 0.77 | 10 min | 1 gpu |
+| [segmentation](pytorch_fob/tasks/segmentation) | [MIT Scene Parse](http://sceneparsing.csail.mit.edu/) | [SegFormer](https://arxiv.org/abs/2105.15203) | Semantic Segmentation | Intersection over Union (IoU) | 35.6 | 5h | 4 gpu |
+| [graph](pytorch_fob/tasks/graph) | [ogbg-molhiv](https://ogb.stanford.edu/docs/graphprop/#ogbg-mol) | [Graph Isomorphism Network (GIN)](https://arxiv.org/pdf/1810.00826.pdf) | Graph Property Prediction | ROC-AUC | 0.77 | 20min | 1 gpu |
+| [graph_tiny](pytorch_fob/tasks/graph_tiny) | [Cora](https://paperswithcode.com/sota/node-classification-on-cora) | [GCN](https://arxiv.org/abs/1609.02907) | Node Classification | Accuracy | 0.82 | 1min | 1 gpu |
+| [tabular](pytorch_fob/tasks/tabular) | [California Housing](https://www.dcc.fc.up.pt/~ltorgo/Regression/cal_housing.html) | [FT Transformer](https://arxiv.org/pdf/2106.11959.pdf) | Tabular Regression | Test RMSE | 0.40 | 2 min | 1 gpu |
+| [translation](pytorch_fob/tasks/translation) | [WMT17(en-de)](https://machinetranslate.org/wmt17) | [T5 small](https://jmlr.org/papers/volume21/20-074/20-074.pdf) | Machine Translation | BLEU (sacrebleu) | 26.3 | 6h | 4 gpus |
+
+## Optimizer and Scheduler
+
+An optimizer (together with the learning rate scheduler) contains the deep learning training algorithm to benchmark. Each optimizer has its own subfolder in the `optimizers` folder.
+We currently have the following optimizers:
+
+| Name | Optimizer | LR Scheduler |
+| ---- | --------- | ------------ |
+| adamw_baseline | [AdamW](https://arxiv.org/abs/1711.05101) | [Cosine Annealing](https://arxiv.org/abs/1608.03983) with linear warmup |
+| adamcpr | [AdamCPR](https://arxiv.org/abs/2311.09058v2) | [Cosine Annealing](https://arxiv.org/abs/1608.03983) with linear warmup |
+| sgd_baseline | Stochastic Gradient Descent | [Cosine Annealing](https://arxiv.org/abs/1608.03983) |
+
+Instructions on how to add your own optimizer can be found [here](pytorch_fob/optimizers/README.md)
+
+## Usage Instructions
+
+### Installation
+
+This repo was tested with Python 3.10, Python 3.11 works as well.
+Some libraries are not updates so currently Python 3.12 breaks.
+Create conda environment:
+```bash
+conda env create --file environment.yml
+```
+
+or alternatively:
+```bash
+conda create -n fob python=3.10 -y
+```
+Activate and install requirements
+```bash
+conda activate fob
+pip install -r requirements.txt
+pip install -e .
+```
+
+#### Troubleshooting
+
+Sometimes pip fails to install the correct version of `mmcv`. If you encounter errors, try to install the correct version of `mmcv` as instructed on their [website](https://mmcv.readthedocs.io/en/latest/get_started/installation.html).
+
+### How to run an experiment
+
+Make sure you have the conda environment set up and activated.
+Then you write an `experiment.yaml` (can be named differently) where you specify which optimizer and task you want to use. Every value can also be a list of values if you want to perform a gridsearch over them (more details below).
+
+As an example we use this `experiment.yaml`:
+```yaml
+task:
+  name:
+    - mnist
+    - classification_small
+optimizer:
+  - name: adamw_baseline
+    beta2: 0.98
+  - name: sgd_baseline
+    momentum: 0.5
+engine:
+  seed: [42, 47]
+```
+This will produce 2x2x2=8 runs in total.
+Each undefined parameter will be set using either `engine/default.yaml`, `optimizers/<optimizer>/default.yaml` or `tasks/<task>/default.yaml`.
+
+Before you run the experiment make sure the datasets are prepared:
+```bash
+python -m pytorch_fob.dataset_setup experiment.yaml
+```
+
+Then you run the experiment:
+```bash
+python -m pytorch_fob.run_experiment experiment.yaml
+```
+This runs all tasks with all optimizers and hyperparameter specified inside `experiment.yaml` using grid-search.
+You can either supply one value or a list of values for each entry. Grid-search combines each possible combination.
+For example: you specified 3 task, 2 optimizer, 2 different learning rates and 4 seeds then you need a total 3 x 2 x 2 x 4 = 48 runs
+
+You can additionally set values trough the command line (this overrides existing values). For example you can set the `data_dir` where datasets are stored using either:
+```bash
+python -m script experiment.yaml "engine.data_dir=<path>"
+```
+or you can specify it inside the `experiment.yaml`:
+```yaml
+engine:
+  data_dir: <path>
+```
+## Usage Examples
+
+In the following you can find example use cases for experiments. Here we will focus on running the training and testing pipeline. For instructions on how to plot the results, refer to the [evaluation/README.md](pytorch_fob/evaluation/README.md).
+
+In these examples we will perform 'dry-runs' by setting the following parameters in the `experiment.yaml`:
+
+```yaml
+engine:
+  train: false
+  test: false
+  plot: false
+```
+
+(Note: it might be a good idea to perform a dry run locally before wasting compute ressources)
+
+### Example 1: Running a single task
+
+This is an (quite) minimal example of how to run a single task. The model and training are customized. All other values will be taken from their respective `default.yaml`.
+
+```yaml
+task:
+  name: mnist
+  max_epochs: 1
+  model:
+    num_hidden: 42
+```
+
+Full experiment file: [examples/usage/1_single_task.yaml](examples/usage/1_single_task.yaml)
+
+```bash
+python -m pytorch_fob.run_experiment examples/usage/1_single_task.yaml
+```
+
+Take a look at the [output directory](examples/usage/outputs/experiment-1/) to see the results.
+
+Note on the *folder name* of the runs:
+Any hyperparameter that differs from the default will be included in the directory name. This is helpful for example when observing runs with Tensorboard.
+
+Note on the *directory structure* of the outputs:
+The individual runs will be placed
+
+```
+examples/usage/outputs/experiment-1  # (customize via: engine.output_dir)
+└── taskname                         # (customize via: task.output_dir_name)
+    └── optimizer name               # (customize via: optimizer.output_dir_name)
+        ├── run_1                    # (name includes non-default parameters)
+        ├── ...
+        └── run_n
+```
+
+### Example 2: Comparing optimizers
+
+To quickly run multiple optimizers on multiple hyperparameters, you can declare a list of values. This will perform a grid search over the values.
+
+```yaml
+optimizer:
+  - name: adamw_baseline
+    learning_rate: [1.0e-2, 1.0e-3]
+    weight_decay: [0.1, 0.01]
+  - name: adamcpr
+    learning_rate: [1.0e-2, 1.0e-3]
+    kappa_init_param: [0.5, 1, 2, 4, 8, 16, 32]
+```
+
+AdamW is used 4 (= 2 x 2) times, AdamCPR is used 14 (= 2 x 7) times, for a total of 18 runs.
+
+Full experiment file: [examples/usage/2_comparing_optimizers.yaml](examples/usage/2_comparing_optimizers.yaml)
+
+```bash
+python -m pytorch_fob.run_experiment examples/usage/2_comparing_optimizers.yaml
+```
+
+Take a look at the [output directory](examples/usage/outputs/experiment-2/) to see the 18 run folders.
+
+### Example 3: Running multiple tasks
+
+If you want to use this repository for benchmarking an optimizer you most likely want to run multiple tasks, on multiple seeds.
+
+```yaml
+task:
+  - classification
+  - classification_small
+  - graph
+  - graph_tiny
+  - mnist
+  - segmentation
+  - tabular
+  - translation
+engine:
+  seed: [1, 2, 3]
+```
+
+You can use any subset of the full task list, if some tasks are not relevant for you.
+Every task will be run on every seed. By default, the benchmark uses deterministic algorithms wherever possible and logs a warning otherwise.
+
+Full experiment file: [examples/usage/3_benchmark_optimizers.yaml](examples/usage/3_benchmark_optimizers.yaml)
+
+```bash
+python -m pytorch_fob.run_experiment examples/usage/3_benchmark_optimizers.yaml
+```
+
+Take a look at the [output directory](examples/usage/outputs/experiment-3/) to see the results.
+
+### Example 4: Running different versions of the same task
+
+You can also run different versions of the same task (or optimizer).
+This might be useful when you do not want a full grid search, but only want to combine certain groups.
+
+The full grid search would be 2x2x2x2, we only want 8
+🟦: group1  normalizer=quantile and noise=1.e-3 (+optimizer)
+🟧: group2  normalizer=standard and noise=0
+⬜: unwanted parameter combinations
+
+🟦⬜⬜🟧
+⬜🟦🟧⬜
+⬜🟧🟦⬜
+🟧⬜⬜🟦
+
+```yaml
+task:
+  - name: tabular
+    output_dir_name: tabular_quantile
+    train_transforms:
+      normalizer: quantile
+      noise: 1.e-3
+  - name: tabular
+    output_dir_name: tabular_standard
+    train_transforms:
+      normalizer: standard
+      noise: 0
+optimizer:
+  name: adamw_baseline
+  learning_rate: [1.e-2, 1.e-3]
+  weight_decay: [1.e-2, 1.e-3]
+```
+
+Full experiment file: [examples/usage/4_multiple_task_versions.yaml](examples/usage/4_multiple_task_versions.yaml)
+
+```bash
+python -m pytorch_fob.run_experiment examples/usage/4_multiple_task_versions.yaml
+```
+
+Take a look at the [output directory](examples/usage/outputs/experiment-4/) to see the results.
+
+### Example 5: Running experiments with SLURM (convenience)
+
+You can run experiments with SLURM. This is a convenience feature that allows you to run experiments on remote clusters. It splits each run of the experiment into a seperate job.
+
+```yaml
+engine:
+  run_scheduler: slurm_array
+  sbatch_args:
+    partition: my_gpu_partition  # adapt to your cluster
+  sbatch_script_template: path/to/template.sh
+```
+
+- The `slurm_array` scheduler will put the runs into an array job. Therefore all slurm relevant parameters (e.g. devices, time, workers, ...) need to be equal across all runs. Using this scheduler is only recommended when running a single task.
+The `slurm_jobs` scheduler on the other hand will put each run into a seperate job.
+- arguments put in `sbatch_args` will be passed to sbatch.
+  e.g. `partition: my_gpu_partition` is parsed to `--partition=my_gpu_partition`
+
+  - per default gpus equal to `engine.devices` and a number of cpus according to `engine.workers` are requested.
+  - The requested time is set according to the defaults per task. It is recommended to use the `engine.sbatch_time_factor` to scale the default time per task for slower / faster machines.
+- Wrap the FOB execution in your pre- and post commands (e.g. conda activation) with an `sbatch_script_template` the placeholder `__FOB_COMMAND__` in [examples/usage/sbatch_template.sh](examples/usage/sbatch_template.sh) will be replaced.
+
+
+Full experiment file: [examples/usage/5_slurm.yaml](examples/usage/5_slurm.yaml)
+
+Running this command without slurm will crash, but save the individual slurm scripts into [`path/to/sbatch_scripts`](examples/usage/outputs/experiment-5/sbatch_scripts) for us to look at.
+
+```bash
+python -m pytorch_fob.run_experiment examples/usage/5_slurm.yaml
+```
+
+Take a look at the [output directory](examples/usage/outputs/experiment-5/) to see the results.
+
+## License
+This repository is licensed under the Apache License 2.0.
+
+However, please be aware that the repository includes various models and datasets, each of which may have its own licensing terms. It is the responsibility of the users to ensure that they comply with the specific licenses of these models and datasets.
+
+By using this repository, you agree to respect and comply with all relevant licenses associated with the models and datasets. The Apache License 2.0 applies only to the original content and code provided in this repository.
--- a/environments/community/pytorch_optimizer_coding/FOB/baselines/README.md
+++ b/environments/community/pytorch_optimizer_coding/FOB/baselines/README.md
@ -0,0 +1,2 @@
+here you can find the configs that were used to create the baseline.
+Make sure to adapt the paths to data and the experiments output!
--- a/environments/community/pytorch_optimizer_coding/FOB/baselines/classification.yaml
+++ b/environments/community/pytorch_optimizer_coding/FOB/baselines/classification.yaml
@ -0,0 +1,26 @@
+task:
+  name: classification
+optimizer:
+  - name: adamw_baseline
+    learning_rate: [1.e-2, 1.e-3, 1.e-4]
+    weight_decay: [10, 1.e-0, 1.e-1, 1.e-2]
+  - name: adamcpr_fast
+    learning_rate: [1.e-2, 1.e-3, 1.e-4]
+    kappa_init_param: [0.125, 0.25, 0.5, 1]
+engine:
+  seed: [1, 2, 3]
+  # data_dir: ./data
+  # output_dir: ./experiments
+  plot: false
+  silent: true
+  sbatch_script_template: baselines/sbatch_template.sh  # adapt the template to your needs
+  run_scheduler: slurm_array
+  sbatch_time_factor: 1.8  # increase this for slower machine
+  sbatch_args:
+    partition: single  # adapt to your cluster
+evaluation:
+  output_types: [pdf]
+  plot:
+    x_axis:
+      - optimizer.kappa_init_param
+      - optimizer.weight_decay
--- a/environments/community/pytorch_optimizer_coding/FOB/baselines/classification_small-adamcpr_paper.yaml
+++ b/environments/community/pytorch_optimizer_coding/FOB/baselines/classification_small-adamcpr_paper.yaml
@ -0,0 +1,37 @@
+task:
+  name: classification_small
+  output_dir_name: classification_small_cpr_paper
+  label_smoothing: 0.1
+  train_transforms:
+    trivial_augment:
+      use: false
+optimizer:
+  - name: adamw_baseline
+    learning_rate: [1.e-1, 1.e-2, 1.e-3, 1.e-4]
+    # learning_rate: [1.e-1, 3.16e-2, 1.e-2, 3.16e-3, 1.e-3]  # finer grid
+    weight_decay: [1, 1.e-1, 1.e-2, 1.e-3, 1.e-4, 0]
+    warmup_factor: 0.025
+    eta_min_factor: 0.1
+  - name: adamcpr_fast
+    learning_rate: [1.e-1, 1.e-2, 1.e-3, 1.e-4]
+    # learning_rate: [1.e-1, 3.16e-2, 1.e-2, 3.16e-3, 1.e-3]  # finer grid
+    kappa_init_param: [0.5, 1, 2, 4, 8, 16, 32]
+    warmup_factor: 0.025
+    eta_min_factor: 0.1
+engine:
+  seed: [1, 2, 3]
+  # data_dir: ./data
+  # output_dir: ./experiments
+  plot: false
+  silent: true
+  sbatch_script_template: baselines/sbatch_template.sh  # adapt the template to your needs
+  run_scheduler: slurm_array
+  sbatch_time_factor: 1.8  # increase this for slower machine
+  sbatch_args:
+    partition: single  # adapt to your cluster
+evaluation:
+  output_types: [pdf]
+  plot:
+    x_axis:
+      - optimizer.kappa_init_param
+      - optimizer.weight_decay
--- a/environments/community/pytorch_optimizer_coding/FOB/baselines/classification_small.yaml
+++ b/environments/community/pytorch_optimizer_coding/FOB/baselines/classification_small.yaml
@ -0,0 +1,28 @@
+task:
+  name: classification_small
+optimizer:
+  - name: adamw_baseline
+    learning_rate: [1.e-1, 1.e-2, 1.e-3, 1.e-4]
+    # learning_rate: [1.e-1, 3.16e-2, 1.e-2, 3.16e-3, 1.e-3]  # finer grid
+    weight_decay: [10, 1.e-0, 1.e-1, 1.e-2, 1.e-3]
+  - name: adamcpr_fast
+    learning_rate: [1.e-1, 1.e-2, 1.e-3, 1.e-4]
+    # learning_rate: [1.e-1, 3.16e-2, 1.e-2, 3.16e-3, 1.e-3]  # finer grid
+    kappa_init_param: [1, 2, 4, 8, 16]
+engine:
+  seed: [1, 2, 3]
+  # data_dir: ./data
+  # output_dir: ./experiments
+  plot: false
+  silent: true
+  sbatch_script_template: baselines/sbatch_template.sh  # adapt the template to your needs
+  run_scheduler: slurm_array
+  sbatch_time_factor: 1.8  # increase this for slower machine
+  sbatch_args:
+    partition: single  # adapt to your cluster
+evaluation:
+  output_types: [pdf]
+  plot:
+    x_axis:
+      - optimizer.kappa_init_param
+      - optimizer.weight_decay
--- a/environments/community/pytorch_optimizer_coding/FOB/baselines/graph.yaml
+++ b/environments/community/pytorch_optimizer_coding/FOB/baselines/graph.yaml
@ -0,0 +1,26 @@
+task:
+  name: graph
+optimizer:
+  - name: adamw_baseline
+    learning_rate: [1.e-2, 1.e-3, 1.e-4]
+    weight_decay: [1.e-1, 1.e-2, 1.e-3, 0]
+  - name: adamcpr_fast
+    learning_rate: [1.e-2, 1.e-3, 1.e-4]
+    kappa_init_param: [0.5, 1, 2, 4]
+engine:
+  seed: [1, 2, 3]
+  # data_dir: ./data
+  # output_dir: ./experiments
+  plot: false
+  silent: true
+  sbatch_script_template: baselines/sbatch_template.sh  # adapt the template to your needs
+  run_scheduler: slurm_array
+  sbatch_time_factor: 1.8  # increase this for slower machine
+  sbatch_args:
+    partition: single  # adapt to your cluster
+evaluation:
+  output_types: [pdf]
+  plot:
+    x_axis:
+      - optimizer.kappa_init_param
+      - optimizer.weight_decay
--- a/environments/community/pytorch_optimizer_coding/FOB/baselines/graph_tiny.yaml
+++ b/environments/community/pytorch_optimizer_coding/FOB/baselines/graph_tiny.yaml
@ -0,0 +1,26 @@
+task:
+  name: graph_tiny
+optimizer:
+  - name: adamw_baseline
+    learning_rate: [1.e-0, 1.e-1, 1.e-2, 1.e-3]
+    weight_decay: [1, 1.e-1, 1.e-2, 1.e-3, 1.e-4, 0]
+  - name: adamcpr_fast
+    learning_rate: [1.e-0, 1.e-1, 1.e-2, 1.e-3]
+    kappa_init_param: [0.5, 1, 2, 4, 8, 16, 32]
+engine:
+  seed: [1, 2, 3]
+  # data_dir: ./data
+  # output_dir: ./experiments
+  plot: false
+  silent: true
+  sbatch_script_template: baselines/sbatch_template.sh  # adapt the template to your needs
+  run_scheduler: slurm_array
+  sbatch_time_factor: 1  # increase this for slower machine
+  sbatch_args:
+    partition: single  # adapt to your cluster
+evaluation:
+  output_types: [pdf]
+  plot:
+    x_axis:
+      - optimizer.kappa_init_param
+      - optimizer.weight_decay
--- a/environments/community/pytorch_optimizer_coding/FOB/baselines/mnist.yaml
+++ b/environments/community/pytorch_optimizer_coding/FOB/baselines/mnist.yaml
@ -0,0 +1,28 @@
+task:
+  name: mnist
+optimizer:
+  - name: adamw_baseline
+    learning_rate: [1.e-1, 1.e-2, 1.e-3]
+    # learning_rate: [1.e-1, 3.16e-2, 1.e-2, 3.16e-3, 1.e-3]  # finer grid
+    weight_decay: [1.e-0, 1.e-1, 1.e-2, 1.e-3]
+  - name: adamcpr_fast
+    learning_rate: [1.e-1, 1.e-2, 1.e-3]
+    # learning_rate: [1.e-1, 3.16e-2, 1.e-2, 3.16e-3, 1.e-3]  # finer grid
+    kappa_init_param: [0.5, 1, 2, 4, 8, 16, 32]
+engine:
+  seed: [1, 2, 3]
+  # data_dir: ./data
+  # output_dir: ./experiments
+  plot: false
+  silent: true
+  sbatch_script_template: baselines/sbatch_template.sh  # adapt the template to your needs
+  run_scheduler: slurm_array
+  sbatch_time_factor: 1.8  # increase this for slower machine
+  sbatch_args:
+    partition: single  # adapt to your cluster
+evaluation:
+  output_types: [pdf]
+  plot:
+    x_axis:
+      - optimizer.kappa_init_param
+      - optimizer.weight_decay
--- a/environments/community/pytorch_optimizer_coding/FOB/baselines/sbatch_template.sh
+++ b/environments/community/pytorch_optimizer_coding/FOB/baselines/sbatch_template.sh
@ -0,0 +1,23 @@
+module load devel/miniconda
+
+nvidia-smi
+
+source ~/.bashrc
+# some users reported issues with stacked conda environments; see https://en.wikipedia.org/wiki/Rule_of_three_(writing)
+conda deactivate
+conda deactivate
+conda deactivate
+conda activate fob
+
+# Running the job
+
+start=$(date +%s)
+
+__FOB_COMMAND__
+
+finish=$(date +%s)
+
+runtime=$((finish-start))
+
+echo Job execution complete.
+echo Total job runtime: $runtime seconds
--- a/environments/community/pytorch_optimizer_coding/FOB/baselines/segmentation.yaml
+++ b/environments/community/pytorch_optimizer_coding/FOB/baselines/segmentation.yaml
@ -0,0 +1,27 @@
+task:
+  name: segmentation
+optimizer:
+  - name: adamw_baseline
+    learning_rate: [3.16e-3, 1.e-3, 3.16e-4]
+    weight_decay: [1.e-1, 1.e-2, 1.e-3, 0]
+  - name: adamcpr_fast
+    learning_rate: [3.16e-3, 1.e-3, 3.16e-4]
+    kappa_init_param: [1, 4, 16, 64]
+engine:
+  seed: [1, 2, 3]
+  # data_dir: ./data
+  # output_dir: ./experiments
+  plot: false
+  silent: true
+  sbatch_script_template: baselines/sbatch_template.sh  # adapt the template to your needs
+  run_scheduler: slurm_array
+  sbatch_time_factor: 1.5  # increase this for slower machine
+  sbatch_args:
+    partition: single  # adapt to your cluster
+  save_sbatch_scripts: slurm-scripts
+evaluation:
+  output_types: [pdf]
+  plot:
+    x_axis:
+      - optimizer.kappa_init_param
+      - optimizer.weight_decay
--- a/environments/community/pytorch_optimizer_coding/FOB/baselines/tabular.yaml
+++ b/environments/community/pytorch_optimizer_coding/FOB/baselines/tabular.yaml
@ -0,0 +1,26 @@
+task:
+  name: tabular
+optimizer:
+  - name: adamw_baseline
+    learning_rate: [1.e-2, 1.e-3, 1.e-4]
+    weight_decay: [10, 1.e-0, 1.e-1, 1.e-2, 1.e-3]
+  - name: adamcpr_fast
+    learning_rate: [1.e-2, 1.e-3, 1.e-4]
+    kappa_init_param: [0.5, 1, 2, 4, 8]
+engine:
+  seed: [1, 2, 3]
+  # data_dir: ./data
+  # output_dir: ./experiments
+  plot: false
+  silent: true
+  sbatch_script_template: baselines/sbatch_template.sh  # adapt the template to your needs
+  run_scheduler: slurm_array
+  sbatch_time_factor: 1.8  # increase this for slower machine
+  sbatch_args:
+    partition: single  # adapt to your cluster
+evaluation:
+  output_types: [pdf]
+  plot:
+    x_axis:
+      - optimizer.kappa_init_param
+      - optimizer.weight_decay
--- a/environments/community/pytorch_optimizer_coding/FOB/baselines/translation.yaml
+++ b/environments/community/pytorch_optimizer_coding/FOB/baselines/translation.yaml
@ -0,0 +1,28 @@
+task:
+  name: translation
+optimizer:
+  - name: adamw_baseline
+    learning_rate: [3.16e-3, 1.e-3, 3.16e-4]
+    # learning_rate: [3.16e-3, 1.77e-3, 1.e-3, 5.16e-3, 3.16e-4]  # finer grid
+    weight_decay: [1.e-0, 1.e-1, 1.e-2]
+  - name: adamcpr_fast
+    learning_rate: [3.16e-3, 1.e-3, 3.16e-4]
+    # learning_rate: [3.16e-3, 1.77e-3, 1.e-3, 5.16e-3, 3.16e-4]  # finer grid
+    kappa_init_param: [0.5, 1, 2]
+engine:
+  seed: [1, 2, 3]
+  # data_dir: ./data
+  # output_dir: ./experiments
+  plot: false
+  silent: true
+  sbatch_script_template: baselines/sbatch_template.sh  # adapt the template to your needs
+  run_scheduler: slurm_array
+  sbatch_time_factor: 2.0  # increase this for slower machine
+  sbatch_args:
+    partition: single  # adapt to your cluster
+evaluation:
+  output_types: [pdf]
+  plot:
+    x_axis:
+      - optimizer.kappa_init_param
+      - optimizer.weight_decay
--- a/environments/community/pytorch_optimizer_coding/FOB/environment.yml
+++ b/environments/community/pytorch_optimizer_coding/FOB/environment.yml
@ -0,0 +1,7 @@
+name: fob
+dependencies:
+  - python=3.10
+  - pip
+  - pip:
+    - -r requirements.txt
+    - -e .
--- a/environments/community/pytorch_optimizer_coding/FOB/example_experiment.yaml
+++ b/environments/community/pytorch_optimizer_coding/FOB/example_experiment.yaml
@ -0,0 +1,4 @@
+task:
+  name: mnist
+optimizer:
+  name: adamw_baseline
--- a/environments/community/pytorch_optimizer_coding/FOB/examples/.gitignore
+++ b/environments/community/pytorch_optimizer_coding/FOB/examples/.gitignore
@ -0,0 +1,2 @@
+*.png
+outputs
--- a/environments/community/pytorch_optimizer_coding/FOB/examples/neps/README.md
+++ b/environments/community/pytorch_optimizer_coding/FOB/examples/neps/README.md
@ -0,0 +1,16 @@
+# Using FOB with NePS for HPO
+Run all commands from the root of the FOB repository.
+
+## Setup
+```bash
+conda create -n fob-neps python=3.10 -y
+conda activate fob-neps
+pip install -r requirements.txt
+pip install -r examples/neps/requirements.txt # this will downgrade some packages
+pip install -e .
+```
+
+## Example
+```bash
+python examples/neps/hpo.py examples/neps/experiment.yaml
+```
--- a/environments/community/pytorch_optimizer_coding/FOB/examples/neps/hpo.py
+++ b/environments/community/pytorch_optimizer_coding/FOB/examples/neps/hpo.py
@ -0,0 +1,197 @@
+import argparse
+import logging
+import time
+from pathlib import Path
+
+import lightning as L
+import neps
+import torch
+from lightning.pytorch.callbacks import ModelCheckpoint
+from lightning.pytorch.loggers import TensorBoardLogger
+from neps.utils.common import get_initial_directory, load_lightning_checkpoint
+from pytorch_fob.engine.engine import Engine, Run
+
+#############################################################
+# Definig the seeds for reproducibility
+
+
+def set_seed(seed=42):
+    L.seed_everything(seed)
+
+
+#############################################################
+# Define search space
+
+
+def search_space(run: Run) -> dict:
+    config = run.get_config()
+    space = dict()
+    space["learning_rate"] = neps.FloatParameter(
+        lower=1e-5, upper=1e-1, log=True, default=1e-3
+    )
+    space["eta_min_factor"] = neps.FloatParameter(lower=1e-3, upper=1e-1, log=True)
+    space["warmup_factor"] = neps.FloatParameter(lower=1e-3, upper=1e-0, log=True)
+    if config["optimizer"]["name"] == "adamw_baseline":
+        space["weight_decay"] = neps.FloatParameter(lower=1e-5, upper=1e-0, log=True)
+        space["one_minus_beta1"] = neps.FloatParameter(lower=1e-2, upper=2e-1, log=True)
+        space["beta2"] = neps.FloatParameter(lower=0.9, upper=0.999)
+    elif config["optimizer"]["name"] == "sgd_baseline":
+        space["weight_decay"] = neps.FloatParameter(lower=1e-5, upper=1e-0, log=True)
+        space["momentum"] = neps.FloatParameter(lower=0, upper=1)
+    elif config["optimizer"]["name"] == "adamcpr_fast":
+        space["one_minus_beta1"] = neps.FloatParameter(lower=1e-2, upper=2e-1, log=True)
+        space["beta2"] = neps.FloatParameter(lower=0.9, upper=0.999)
+        space["kappa_init_param"] = neps.IntegerParameter(
+            lower=1, upper=19550, log=True
+        )
+        space["kappa_init_method"] = neps.ConstantParameter("warm_start")
+    else:
+        raise ValueError("optimizer not supported")
+    space["epochs"] = neps.IntegerParameter(
+        lower=5,
+        upper=config["task"]["max_epochs"],
+        is_fidelity=True,  # IMPORTANT to set this to True for the fidelity parameter
+    )
+    return space
+
+
+def create_exmperiment(run: Run, config: dict) -> dict:
+    new_config = run.get_config().copy()
+    for k, v in config.items():
+        if k == "one_minus_beta1":
+            new_config["optimizer"]["beta1"] = 1 - v
+        elif k != "epochs":
+            new_config["optimizer"][k] = v
+    return new_config
+
+
+#############################################################
+# Define the run pipeline function
+
+
+def create_pipline(base_run: Run):
+    def run_pipeline(pipeline_directory, previous_pipeline_directory, **config) -> dict:
+        # Initialize the first directory to store the event and checkpoints files
+        init_dir = get_initial_directory(pipeline_directory)
+        checkpoint_dir = init_dir / "checkpoints"
+
+        # Initialize the model and checkpoint dir
+        engine = Engine()
+        engine.parse_experiment(create_exmperiment(base_run, config))
+        run = next(engine.runs())
+        run.ensure_max_steps()
+        model, datamodule = run.get_task()
+
+        # Create the TensorBoard logger for logging
+        logger = TensorBoardLogger(
+            save_dir=init_dir, name="data", version="logs", default_hp_metric=False
+        )
+
+        # Add checkpoints at the end of training
+        checkpoint_callback = ModelCheckpoint(
+            dirpath=checkpoint_dir,
+            filename="{epoch}-{val_loss:.2f}",
+        )
+
+        # Use this function to load the previous checkpoint if it exists
+        checkpoint_path, checkpoint = load_lightning_checkpoint(
+            previous_pipeline_directory=previous_pipeline_directory,
+            checkpoint_dir=checkpoint_dir,
+        )
+
+        if checkpoint is None:
+            previously_spent_epochs = 0
+        else:
+            previously_spent_epochs = checkpoint["epoch"]
+
+        # Create a PyTorch Lightning Trainer
+        epochs = config["epochs"]
+
+        trainer = L.Trainer(
+            logger=logger,
+            max_epochs=epochs,
+            callbacks=[checkpoint_callback],
+        )
+
+        # Train the model and retrieve training/validation metrics
+        if checkpoint_path:
+            trainer.fit(model, datamodule=datamodule, ckpt_path=checkpoint_path)
+        else:
+            trainer.fit(model, datamodule=datamodule)
+
+        train_accuracy = trainer.logged_metrics.get("train_acc", None)
+        train_accuracy = (
+            train_accuracy.item()
+            if isinstance(train_accuracy, torch.Tensor)
+            else train_accuracy
+        )
+        val_loss = trainer.logged_metrics.get("val_loss", None)
+        val_loss = val_loss.item() if isinstance(val_loss, torch.Tensor) else val_loss
+        val_accuracy = trainer.logged_metrics.get("val_acc", None)
+        val_accuracy = (
+            val_accuracy.item()
+            if isinstance(val_accuracy, torch.Tensor)
+            else val_accuracy
+        )
+
+        # Test the model and retrieve test metrics
+        trainer.test(model, datamodule=datamodule)
+
+        test_accuracy = trainer.logged_metrics.get("test_acc", None)
+        test_accuracy = (
+            test_accuracy.item()
+            if isinstance(test_accuracy, torch.Tensor)
+            else test_accuracy
+        )
+
+        return {
+            "loss": val_loss,
+            "cost": epochs - previously_spent_epochs,
+            "info_dict": {
+                "train_accuracy": train_accuracy,
+                "val_accuracy": val_accuracy,
+                "test_accuracy": test_accuracy,
+            },
+        }
+
+    return run_pipeline
+
+
+if __name__ == "__main__":
+    # Parse command line arguments
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "experiment_file", type=Path, help="The yaml file specifying the experiment."
+    )
+    parser.add_argument(
+        "--n_trials",
+        type=int,
+        default=15,
+        help="Number of different configurations to train",
+    )
+    args, extra_args = parser.parse_known_args()
+
+    # Initialize the logger and record start time
+    start_time = time.time()
+    set_seed(42)
+    logging.basicConfig(level=logging.INFO)
+
+    engine = Engine()
+    engine.parse_experiment_from_file(args.experiment_file, extra_args)
+    run = next(engine.runs())
+
+    # Run NePS with specified parameters
+    neps.run(
+        run_pipeline=create_pipline(run),
+        pipeline_space=search_space(run),
+        root_directory=run.engine.output_dir,
+        max_evaluations_total=args.n_trials,
+        searcher="hyperband",
+    )
+
+    # Record the end time and calculate execution time
+    end_time = time.time()
+    execution_time = end_time - start_time
+
+    # Log the execution time
+    logging.info(f"Execution time: {execution_time} seconds")
--- a/environments/community/pytorch_optimizer_coding/FOB/examples/neps/requirements.txt
+++ b/environments/community/pytorch_optimizer_coding/FOB/examples/neps/requirements.txt
@ -0,0 +1,5 @@
+neural-pipeline-search
+torch==2.0.0
+torchvision
+torchaudio
+torchtext
--- a/environments/community/pytorch_optimizer_coding/FOB/examples/plotting/1_mnist-adamw.yaml
+++ b/environments/community/pytorch_optimizer_coding/FOB/examples/plotting/1_mnist-adamw.yaml
@ -0,0 +1,27 @@
+task:
+  name: mnist
+optimizer:
+  name: adamw_baseline
+  learning_rate: [1.0e-2, 1.0e-3]
+  weight_decay: [0.1, 0.01]
+engine:
+  data_dir: ./examples/data
+  output_dir: ./examples/plotting/outputs
+  seed: [1, 2]
+evaluation:
+  output_dir: ./examples/plotting
+  experiment_name: 1_mnist-adamw
+  checkpoints:
+    - last
+  output_types:
+    - png
+  plot:
+    std: True
+    x_axis:
+      - optimizer.weight_decay
+    y_axis:
+      - optimizer.learning_rate
+  plotstyle:
+    tight_layout: False  # the title is a little bit squeezed with this
+    text:
+      usetex: True
--- a/environments/community/pytorch_optimizer_coding/FOB/examples/plotting/2_adamw-vs-sgd.yaml
+++ b/environments/community/pytorch_optimizer_coding/FOB/examples/plotting/2_adamw-vs-sgd.yaml
@ -0,0 +1,30 @@
+task:
+  name: mnist
+optimizer:
+  - name: sgd_baseline
+    learning_rate: [0.01, 0.001]
+    weight_decay: [0.1, 0.01]
+  - name: adamw_baseline
+    learning_rate: [0.01, 0.001]
+    weight_decay: [0.1, 0.01]
+engine:
+  data_dir: ./examples/data
+  output_dir: ./examples/plotting/outputs
+  seed: [1, 2]
+evaluation:
+  output_dir: ./examples/plotting
+  experiment_name: 2_adamw-vs-sgd
+  checkpoints:
+    - last
+  output_types:
+    - png
+  plot:
+    std: True
+    x_axis:
+      - optimizer.weight_decay
+    y_axis:
+      - optimizer.learning_rate
+  plotstyle:
+    tight_layout: False  # the title is a little bit squeezed with this
+    text:
+      usetex: True
--- a/environments/community/pytorch_optimizer_coding/FOB/examples/plotting/3_mnist-and-tabular_adamw-vs-sgd.yaml
+++ b/environments/community/pytorch_optimizer_coding/FOB/examples/plotting/3_mnist-and-tabular_adamw-vs-sgd.yaml
@ -0,0 +1,32 @@
+task:
+  - mnist
+  - tabular
+optimizer:
+  - name: sgd_baseline
+    learning_rate: [0.01, 0.001]
+    weight_decay: [0.1, 0.01]
+  - name: adamw_baseline
+    learning_rate: [0.01, 0.001]
+    weight_decay: [0.1, 0.01]
+engine:
+  data_dir: ./examples/data
+  output_dir: ./examples/plotting/outputs
+  seed: [1, 2]
+evaluation:
+  output_dir: ./examples/plotting
+  experiment_name: 3_mnist-and-tabular_adamw-vs-sgd
+  checkpoints:
+    - last
+  output_types:
+    - png
+  split_groups: ["task.name"]
+  plot:
+    std: True
+    x_axis:
+      - optimizer.weight_decay
+    y_axis:
+      - optimizer.learning_rate
+  plotstyle:
+    tight_layout: False  # the title is a little bit squeezed with this
+    text:
+      usetex: True
--- a/environments/community/pytorch_optimizer_coding/FOB/examples/plotting/4_adamw-vs-sgd_seeds.yaml
+++ b/environments/community/pytorch_optimizer_coding/FOB/examples/plotting/4_adamw-vs-sgd_seeds.yaml
--- a/environments/community/pytorch_optimizer_coding/FOB/examples/smac/README.md
+++ b/environments/community/pytorch_optimizer_coding/FOB/examples/smac/README.md
--- a/environments/community/pytorch_optimizer_coding/FOB/examples/smac/minimal_mnist.py
+++ b/environments/community/pytorch_optimizer_coding/FOB/examples/smac/minimal_mnist.py
--- a/environments/community/pytorch_optimizer_coding/FOB/examples/smac/optimizer_comparison.py
+++ b/environments/community/pytorch_optimizer_coding/FOB/examples/smac/optimizer_comparison.py
--- a/environments/community/pytorch_optimizer_coding/FOB/examples/smac/requirements.txt
+++ b/environments/community/pytorch_optimizer_coding/FOB/examples/smac/requirements.txt
--- a/environments/community/pytorch_optimizer_coding/FOB/examples/smac/smac.yaml
+++ b/environments/community/pytorch_optimizer_coding/FOB/examples/smac/smac.yaml
--- a/environments/community/pytorch_optimizer_coding/FOB/examples/smac/smac_slurm.yaml
+++ b/environments/community/pytorch_optimizer_coding/FOB/examples/smac/smac_slurm.yaml
--- a/environments/community/pytorch_optimizer_coding/FOB/examples/smac/test_incumbent.py
+++ b/environments/community/pytorch_optimizer_coding/FOB/examples/smac/test_incumbent.py
--- a/environments/community/pytorch_optimizer_coding/FOB/examples/usage/1_single_task.yaml
+++ b/environments/community/pytorch_optimizer_coding/FOB/examples/usage/1_single_task.yaml
--- a/environments/community/pytorch_optimizer_coding/FOB/examples/usage/2_comparing_optimizers.yaml
+++ b/environments/community/pytorch_optimizer_coding/FOB/examples/usage/2_comparing_optimizers.yaml
--- a/environments/community/pytorch_optimizer_coding/FOB/examples/usage/3_benchmark_optimizers.yaml
+++ b/environments/community/pytorch_optimizer_coding/FOB/examples/usage/3_benchmark_optimizers.yaml
--- a/environments/community/pytorch_optimizer_coding/FOB/examples/usage/4_multiple_task_versions.yaml
+++ b/environments/community/pytorch_optimizer_coding/FOB/examples/usage/4_multiple_task_versions.yaml
--- a/environments/community/pytorch_optimizer_coding/FOB/examples/usage/5_slurm.yaml
+++ b/environments/community/pytorch_optimizer_coding/FOB/examples/usage/5_slurm.yaml
--- a/environments/community/pytorch_optimizer_coding/FOB/examples/usage/sbatch_template.sh
+++ b/environments/community/pytorch_optimizer_coding/FOB/examples/usage/sbatch_template.sh
--- a/environments/community/pytorch_optimizer_coding/FOB/experiment_dummy_optimizer_yamltest.yaml
+++ b/environments/community/pytorch_optimizer_coding/FOB/experiment_dummy_optimizer_yamltest.yaml
@ -0,0 +1,14 @@
+
+task:
+  - mnist
+  - classification_small
+  - tabular
+optimizer:
+  - name: dummy_optimizer_yamltest
+engine:
+  seed: [42]
+  data_dir: examples/data
+  output_dir: outputs/experiment_dummy_optimizer_yamltest
+  train: true
+  test: true
+  plot: false
--- a/environments/community/pytorch_optimizer_coding/FOB/experiment_my_sgd_optimizer.yaml
+++ b/environments/community/pytorch_optimizer_coding/FOB/experiment_my_sgd_optimizer.yaml
@ -0,0 +1,15 @@
+
+task:
+  - mnist
+  - classification_small
+  - tabular
+max_epochs: 1
+optimizer:
+  - name: my_sgd_optimizer
+engine:
+  seed: [42]
+  data_dir: examples/data
+  output_dir: outputs/experiment_my_sgd_optimizer
+  train: true
+  test: true
+  plot: false
--- a/environments/community/pytorch_optimizer_coding/FOB/fix_folder_names.py
+++ b/environments/community/pytorch_optimizer_coding/FOB/fix_folder_names.py
@ -0,0 +1,120 @@
+"""
+This tool fixes folder names which are incorrect due to changes in the default.yaml
+"""
+
+import argparse
+import sys
+from pathlib import Path
+from pprint import pprint
+
+import yaml
+from pytorch_fob.engine.engine import Engine
+
+
+def deep_diff(dict1, dict2):
+    diff = {}
+
+    # Check keys in dict1 but not in dict2
+    for key in dict1:
+        if key not in dict2:
+            if dict1[key] is not None:
+                diff[key] = {"old_value": dict1[key], "new_value": None}
+        elif dict1[key] != dict2[key]:
+            if isinstance(dict1[key], dict) and isinstance(dict2[key], dict):
+                nested_diff = deep_diff(dict1[key], dict2[key])
+                if nested_diff:
+                    diff[key] = nested_diff
+            else:
+                diff[key] = {"old_value": dict1[key], "new_value": dict2[key]}
+
+    # Check keys in dict2 but not in dict1
+    for key in dict2:
+        if key not in dict1:
+            if dict2[key] is not None:
+                diff[key] = {"old_value": None, "new_value": dict2[key]}
+
+    return diff
+
+
+def fix_recursive(path: Path, dry_run: bool, ignore_config_diff: bool):
+    for file in path.iterdir():
+        if file.name == "config.yaml":
+            e = Engine()
+            e.parse_experiment_from_file(file)
+            runs = list(e.runs())
+            if len(runs) != 1:
+                print("Error config.yaml is invalid:", file, file=sys.stderr)
+                sys.exit(1)
+            r = runs[0]
+            target = r.run_dir.name
+            actual = file.parent.name
+            if actual == target:
+                continue
+            target = file.parent.parent / target
+            actual = file.parent
+            print(
+                "folder name is wrong and needs fixing:\ncurrent:",
+                actual,
+                "\ncalculated:",
+                target,
+            )
+            computed_config = r.export_config_dict()
+            with open(file, "r", encoding="utf8") as f:
+                actual_config = yaml.safe_load(f)
+            clean_computed_config = {
+                "engine": {"devices": computed_config["engine"]["devices"]},
+                "task": computed_config["task"],
+                "optimizer": computed_config["optimizer"],
+            }
+            clean_actual_config = {
+                "engine": {"devices": actual_config["engine"]["devices"]},
+                "task": actual_config["task"],
+                "optimizer": actual_config["optimizer"],
+            }
+            diff = deep_diff(clean_actual_config, clean_computed_config)
+            if diff and (not ignore_config_diff):
+                print("warning config dict differs!:")
+                pprint(diff)
+                print("skipping folder!")
+                continue
+            if not dry_run:
+                print("renaming...")
+                if target.exists():
+                    print("target path already exists, skipping...")
+                    continue
+                actual.rename(target)
+
+        elif file.is_dir():
+            fix_recursive(file, dry_run, ignore_config_diff)
+
+
+def main(args: argparse.Namespace):
+    base_folder: Path = args.base_folder
+    ignore_config_diff = args.ignore_config_diff
+    if ignore_config_diff:
+        res = input(
+            "WARNING: ignoring the config dict diffs can be dangerous do you know what you are doing? [y/n]"
+        )
+        if res.strip().lower() != "y":
+            return
+    fix_recursive(base_folder, args.dry_run, ignore_config_diff)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="folder name fix tool")
+    parser.add_argument(
+        "base_folder", type=Path, help="Folder with experiments, will check recursively"
+    )
+    parser.add_argument(
+        "--dry_run",
+        action="store_true",
+        help="Just print what would be changed, do not change any files",
+    )
+    parser.add_argument(
+        "--ignore_config_diff",
+        action="store_true",
+        help="Ignores config dict difference WARNING: experiment could be totally different!\
+                              Use with care!",
+    )
+    args = parser.parse_args()
+    main(args)
--- a/environments/community/pytorch_optimizer_coding/FOB/optimizer_benchmark_env.py
+++ b/environments/community/pytorch_optimizer_coding/FOB/optimizer_benchmark_env.py
@ -0,0 +1,255 @@
+import glob
+import os
+import re
+import subprocess
+from pathlib import Path
+
+
+class OptimizerBenchmarkEnv:
+    def __init__(self, fob_root=None):
+        # Root directory for FOB (should be the path to atropos/FOB)
+        self.fob_root = Path(fob_root) if fob_root else Path(__file__).parent
+        self.optimizers_dir = self.fob_root / "pytorch_fob" / "optimizers"
+        self.tasks = ["mnist", "classification_small", "tabular"]
+        self.optimizer_name = None
+        self.optimizer_dir = None
+
+    def submit_optimizer(
+        self, optimizer_code: str, optimizer_name: str, default_yaml: str = None
+    ):
+        """
+        Registers a new optimizer by writing its code and default config to the optimizers directory.
+        optimizer_code: Python code for optimizer.py (must define configure_optimizers)
+        optimizer_name: Name for the optimizer (used as folder name)
+        default_yaml: Optional YAML string for default.yaml (otherwise uses a minimal template)
+        """
+        self.optimizer_name = optimizer_name
+        self.optimizer_dir = self.optimizers_dir / optimizer_name
+        os.makedirs(self.optimizer_dir, exist_ok=True)
+        # Write optimizer.py
+        with open(self.optimizer_dir / "optimizer.py", "w") as f:
+            f.write(optimizer_code)
+        # Write default.yaml
+        if default_yaml is None:
+            default_yaml = (
+                f"""optimizer:\n  name: {optimizer_name}\n  learning_rate: 1.e-3\n"""
+            )
+        with open(self.optimizer_dir / "default.yaml", "w") as f:
+            f.write(default_yaml)
+        # Write __init__.py (empty)
+        with open(self.optimizer_dir / "__init__.py", "w") as f:
+            f.write("")
+        print(f"Registered optimizer '{optimizer_name}' at {self.optimizer_dir}")
+
+    def generate_experiment_yaml(
+        self, yaml_path=None, seeds=[42], data_dir="examples/data", output_dir=None
+    ):
+        """
+        Generates an experiment YAML for the three tasks and the registered optimizer.
+        yaml_path: where to write the YAML file (default: f"experiment_{optimizer_name}.yaml" in FOB root)
+        seeds: list of seeds to use
+        data_dir: directory for datasets
+        output_dir: directory for outputs (default: outputs/experiment_{optimizer_name})
+        """
+        if self.optimizer_name is None:
+            raise ValueError("No optimizer registered. Call submit_optimizer first.")
+        if output_dir is None:
+            output_dir = f"FOB/outputs/experiment_{self.optimizer_name}"
+        if yaml_path is None:
+            yaml_path = self.fob_root / f"experiment_{self.optimizer_name}.yaml"
+        else:
+            yaml_path = Path(yaml_path)
+        yaml_content = f"""
+task:
+  - mnist
+  - classification_small
+  - tabular
+max_epochs: 1
+optimizer:
+  - name: {self.optimizer_name}
+engine:
+  seed: {seeds}
+  data_dir: {data_dir}
+  output_dir: {output_dir}
+  train: true
+  test: true
+  plot: false
+"""
+        with open(yaml_path, "w") as f:
+            f.write(yaml_content)
+        self.experiment_yaml_path = yaml_path
+        print(f"Experiment YAML written to {yaml_path}")
+
+    def run_benchmark(self):
+        """
+        Runs dataset setup and experiment using the generated YAML.
+        """
+        if not hasattr(self, "experiment_yaml_path"):
+            raise ValueError(
+                "No experiment YAML found. Call generate_experiment_yaml first."
+            )
+        # Run dataset setup
+        print("Running dataset setup...")
+        subprocess.run(
+            [
+                "python3",
+                "-m",
+                "pytorch_fob.dataset_setup",
+                str(self.experiment_yaml_path),
+            ],
+            check=True,
+        )
+        # Run experiment
+        print("Running experiment...")
+        subprocess.run(
+            [
+                "python3",
+                "-m",
+                "pytorch_fob.run_experiment",
+                str(self.experiment_yaml_path),
+            ],
+            check=True,
+        )
+        print("Benchmark run complete.")
+
+    def get_reward(self, alpha=1.0, beta=1.0):
+        """
+        Computes a reward based on training time and final loss/accuracy.
+        For classification tasks (mnist, classification_small), uses accuracy (maximize).
+        For regression/tabular tasks, uses loss (minimize).
+        - alpha: weight for time
+        - beta: weight for loss/accuracy
+        """
+        import json
+
+        import yaml
+
+        with open(self.experiment_yaml_path, "r") as f:
+            config = yaml.safe_load(f)
+        output_dir = config["engine"]["output_dir"]
+        if not os.path.isabs(output_dir):
+            output_dir = self.fob_root / output_dir
+        total_time = 0.0
+        reward = 0.0
+        for task in self.tasks:
+            # Look for train_time.txt in the correct subdirectory
+            pattern = os.path.join(
+                output_dir, task, self.optimizer_name, "*", "train_time.txt"
+            )
+            files = glob.glob(str(pattern))
+            if not files:
+                print(
+                    f"Warning: No train_time.txt found for task {task}. Pattern: {pattern}"
+                )
+                continue
+            for file in files:
+                with open(file, "r") as f:
+                    content = f.read()
+                    match = re.search(r"([0-9]+\.?[0-9]*)", content)
+                    if match:
+                        t = float(match.group(1))
+                        total_time += t
+                        print(f"{task}: {t} seconds (from {file})")
+                # Find scores.json
+                scores_path = os.path.join(os.path.dirname(file), "scores.json")
+                metric_val = None
+                metric_used = None
+                if os.path.exists(scores_path):
+                    with open(scores_path, "r") as f:
+                        scores = json.load(f)
+                        # Classification: use accuracy if present
+                        if task in ["mnist", "classification_small"]:
+                            for k in ["acc", "test_acc", "val_acc"]:
+                                for parent in ["test_final", "test_best"]:
+                                    if (
+                                        parent in scores
+                                        and isinstance(scores[parent], list)
+                                        and len(scores[parent]) > 0
+                                    ):
+                                        if k in scores[parent][0]:
+                                            metric_val = scores[parent][0][k]
+                                            metric_used = k
+                                            break
+                                if metric_val is not None:
+                                    break
+                            if metric_val is not None:
+                                print(
+                                    f"{task}: {metric_used} = {metric_val} (maximize, from {scores_path})"
+                                )
+                                reward += beta * metric_val
+                            else:
+                                print(
+                                    f"Warning: No suitable accuracy metric found in {scores_path} for {task}"
+                                )
+                        # Tabular/regression: use loss if present
+                        elif task == "tabular":
+                            for k in ["loss", "test_loss", "val_loss", "test_rmse"]:
+                                for parent in ["test_final", "test_best"]:
+                                    if (
+                                        parent in scores
+                                        and isinstance(scores[parent], list)
+                                        and len(scores[parent]) > 0
+                                    ):
+                                        if k in scores[parent][0]:
+                                            metric_val = scores[parent][0][k]
+                                            metric_used = k
+                                            break
+                                if metric_val is not None:
+                                    break
+                            if metric_val is not None:
+                                print(
+                                    f"{task}: {metric_used} = {metric_val} (minimize, from {scores_path})"
+                                )
+                                reward += -beta * metric_val
+                            else:
+                                print(
+                                    f"Warning: No suitable loss metric found in {scores_path} for {task}"
+                                )
+                else:
+                    print(
+                        f"Warning: scores.json not found for task {task} at {scores_path}"
+                    )
+        # Subtract time penalty
+        reward -= alpha * total_time
+        print(f"Total training time: {total_time} seconds. Reward: {reward}")
+        return reward
+
+
+def test_optimizer_registration():
+    dummy_optimizer_code = """
+from lightning.pytorch.utilities.types import OptimizerLRScheduler
+from torch.optim import SGD
+from pytorch_fob.engine.parameter_groups import GroupedModel
+from pytorch_fob.engine.configs import OptimizerConfig
+
+def configure_optimizers(model: GroupedModel, config: OptimizerConfig) -> OptimizerLRScheduler:
+    lr = config.learning_rate
+    optimizer = SGD(model.grouped_parameters(lr=lr), lr=lr)
+    return {"optimizer": optimizer}
+"""
+    env = OptimizerBenchmarkEnv()
+    env.submit_optimizer(dummy_optimizer_code, "dummy_optimizer")
+    print("Test registration complete.")
+
+
+def test_yaml_generation():
+    dummy_optimizer_code = """
+from lightning.pytorch.utilities.types import OptimizerLRScheduler
+from torch.optim import SGD
+from pytorch_fob.engine.parameter_groups import GroupedModel
+from pytorch_fob.engine.configs import OptimizerConfig
+
+def configure_optimizers(model: GroupedModel, config: OptimizerConfig) -> OptimizerLRScheduler:
+    lr = config.learning_rate
+    optimizer = SGD(model.grouped_parameters(lr=lr), lr=lr)
+    return {"optimizer": optimizer}
+"""
+    env = OptimizerBenchmarkEnv()
+    env.submit_optimizer(dummy_optimizer_code, "dummy_optimizer_yamltest")
+    env.generate_experiment_yaml()
+    print(f"YAML generated at: {env.experiment_yaml_path}")
+
+
+if __name__ == "__main__":
+    test_optimizer_registration()
+    test_yaml_generation()
--- a/environments/community/pytorch_optimizer_coding/FOB/pyproject.toml
+++ b/environments/community/pytorch_optimizer_coding/FOB/pyproject.toml
@ -0,0 +1,43 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "pytorch-fob"
+version = "0.1.0"
+authors = [
+    { name="Simon Blauth", email="blauths@tf.uni-freiburg.de" },
+    { name="Tobias Bürger", email="buergert@tf.uni-freiburg.de" },
+    { name="Zacharias Häringer", email="haeringz@tf.uni-freiburg.de" },
+]
+description = "A fast optimizer benchmark."
+readme = "README.md"
+requires-python = ">=3.10"
+classifiers = [
+    "Programming Language :: Python :: 3",
+    "License :: OSI Approved :: Apache Software License",
+    "Operating System :: OS Independent",
+]
+
+[project.urls]
+Homepage = "https://github.com/automl/fob"
+Issues = "https://github.com/automl/fob/issues"
+
+[tool.ruff]
+line-length = 120
+indent-width = 4
+target-version = "py310"
+
+[tool.ruff.lint]
+select = ["E4", "E7", "E9", "F", "I"]
+
+[tool.ruff.format]
+quote-style = "double"
+
+# update pip package
+# https://packaging.python.org/en/latest/guides/distributing-packages-using-setuptools/
+#> pip install build twine
+#> python3 -m build --sdist
+#> python3 -m build --wheel
+#> twine check dist/*
+#> twine upload dist/*
--- a/environments/community/pytorch_optimizer_coding/FOB/pytorch_fob/init.py
+++ b/environments/community/pytorch_optimizer_coding/FOB/pytorch_fob/init.py
@ -0,0 +1 @@
+# from pytorch_fob.engine import Engine  # Unused import
--- a/environments/community/pytorch_optimizer_coding/FOB/pytorch_fob/dataset_setup.py
+++ b/environments/community/pytorch_optimizer_coding/FOB/pytorch_fob/dataset_setup.py
@ -0,0 +1,24 @@
+import argparse
+from pathlib import Path
+
+from pytorch_fob.engine.engine import Engine
+
+
+def get_parser():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "experiment_file", type=Path, help="The yaml file specifying the experiment."
+    )
+    return parser
+
+
+def main(args: argparse.Namespace, extra_args: list[str]):
+    engine = Engine()
+    engine.parse_experiment_from_file(args.experiment_file, extra_args=extra_args)
+    engine.prepare_data()
+
+
+if __name__ == "__main__":
+    parser = get_parser()
+    args, extra_args = parser.parse_known_args()
+    main(args, extra_args)
--- a/environments/community/pytorch_optimizer_coding/FOB/pytorch_fob/engine/init.py
+++ b/environments/community/pytorch_optimizer_coding/FOB/pytorch_fob/engine/init.py
@ -0,0 +1,7 @@
+from pathlib import Path
+
+# from pytorch_fob.engine.engine import Engine  # Unused import
+
+
+def repository_root() -> Path:
+    return Path(__file__).resolve().parent.parent
--- a/environments/community/pytorch_optimizer_coding/FOB/pytorch_fob/engine/callbacks.py
+++ b/environments/community/pytorch_optimizer_coding/FOB/pytorch_fob/engine/callbacks.py
@ -0,0 +1,353 @@
+import math
+import time
+from typing import Iterable, Optional
+
+import deepspeed
+import torch
+from lightning import Callback, LightningModule, Trainer
+from lightning_utilities.core.rank_zero import rank_zero_only
+from pytorch_fob.engine.utils import log_debug, log_info, log_warn, seconds_to_str
+from torch.linalg import vector_norm
+
+
+class RestrictTrainEpochs(Callback):
+    """Counts number of epochs since start of training and stops if max_epochs is reached."""
+
+    def __init__(self, max_epochs: int):
+        super().__init__()
+        self.max_epochs = max_epochs
+        self.epochs = 0
+        self.skip_first = False
+
+    def on_train_start(self, trainer: Trainer, pl_module: LightningModule):
+        log_debug(f"Training for {self.max_epochs} epochs...")
+        self.epochs = 0
+        trainer.should_stop = False
+
+    def on_train_epoch_end(self, trainer: Trainer, pl_module: LightningModule):
+        if self.skip_first:
+            self.skip_first = False
+        else:
+            self.epochs += 1
+        log_debug(f"Epoch {self.epochs}/{self.max_epochs}")
+        # TODO: test for DDP, do we need 'trainer.strategy.reduce_boolean_decision'?
+        if self.epochs >= self.max_epochs:
+            log_debug(f"Stopping training after {self.epochs} epochs")
+            trainer.should_stop = True
+
+    def on_load_checkpoint(
+        self, trainer: Trainer, pl_module: LightningModule, checkpoint
+    ):
+        # checkpoint loads the model at the end of the epoch, so we do not count the first epoch
+        self.skip_first = True
+
+
+class OptimizerTime(Callback):
+    def __init__(self):
+        super().__init__()
+        self.total_mean_optimizer_step_time_ms: float = 0.0
+        self.total_epochs: int = 0
+
+    def on_train_epoch_end(self, trainer, pl_module):
+        if len(pl_module.optimizer_times_ms) == 0:
+            return
+        epoch_mean = sum(pl_module.optimizer_times_ms) / len(
+            pl_module.optimizer_times_ms
+        )
+        pl_module.log(
+            "mean_optimizer_step_time_ms",
+            epoch_mean,
+            on_step=False,
+            on_epoch=True,
+            sync_dist=True,
+        )
+
+        # Update the running mean
+        self.total_epochs += 1
+        self.total_mean_optimizer_step_time_ms = (
+            (self.total_mean_optimizer_step_time_ms * (self.total_epochs - 1))
+            + epoch_mean
+        ) / self.total_epochs
+
+        # Reset the optimizer step times for the next epoch
+        pl_module.optimizer_times_ms = []  # type: ignore
+
+    def state_dict(self) -> dict[str, float | int]:
+        return {
+            "running_mean": self.total_mean_optimizer_step_time_ms,
+            "total_epochs": self.total_epochs,
+        }
+
+    def load_state_dict(self, state_dict: dict[str, float | int]):
+        self.total_mean_optimizer_step_time_ms = state_dict["running_mean"]
+        self.total_epochs = state_dict["total_epochs"]  # type: ignore
+
+
+class PrintEpochWithTime(Callback):
+    def __init__(self, active: bool = True):
+        super().__init__()
+        self.active: bool = active
+        self.time: dict[str, Optional[float]]
+        self.reset_time()
+
+    def reset_time(self):
+        self.time = {"train_start": None, "val_start": None, "val_end": None}
+
+    @rank_zero_only
+    def on_train_epoch_start(self, trainer: Trainer, pl_module: LightningModule):
+        if self.active:
+            self.time["train_start"] = time.time()
+
+    @rank_zero_only
+    def on_train_epoch_end(self, trainer: Trainer, pl_module: LightningModule):
+        # need to print here since train epoch ends after validation is done
+        if self.active and all(v is not None for v in self.time.values()):
+            max_epochs = pl_module.config.max_epochs
+            train_time = math.ceil(time.time() - self.time["train_start"])  # type: ignore
+            val_time = math.ceil(self.time["val_end"] - self.time["val_start"])  # type: ignore
+            log_info(
+                f"Finished training epoch {trainer.current_epoch + 1} of {max_epochs}. "
+                f"Time spent: training: {seconds_to_str(train_time - val_time)}, "
+                f"validation: {seconds_to_str(val_time)}, total: {seconds_to_str(train_time)}."
+            )
+            self.reset_time()
+
+    @rank_zero_only
+    def on_validation_epoch_start(self, trainer: Trainer, pl_module: LightningModule):
+        if self.active:
+            self.time["val_start"] = time.time()
+
+    @rank_zero_only
+    def on_validation_epoch_end(self, trainer: Trainer, pl_module: LightningModule):
+        if self.active:
+            self.time["val_end"] = time.time()
+
+
+def metric_fn(metric: str, v: torch.Tensor, override: Optional[float] = None) -> float:
+    if override is not None:
+        return override
+    match metric:
+        case "mean":
+            return v.mean().item()
+        case "sum":
+            return v.sum().item()
+        case "abs_mean":
+            return v.abs().mean().item()
+        case "std":
+            return v.std().item()
+        case "abs_std":
+            return v.abs().std().item()
+        case "min":
+            return v.min().item()
+        case "max":
+            return v.max().item()
+        case "l1":
+            return vector_norm(v, ord=1).item()
+        case "l2":
+            return vector_norm(v, ord=2).item()
+        case "sq_mean":
+            return (v**2).mean().item()
+        case "sq_sum":
+            return (v**2).sum().item()
+        case _:
+            raise ValueError(f"unknown metric {metric}")
+
+
+def add_metrics_to_stats(
+    stats: dict[str, float],
+    prefix: str,
+    name: str,
+    v: torch.Tensor,
+    metrics: Iterable[str],
+    override: Optional[float] = None,
+):
+    for metric in metrics:
+        stats[f"{prefix}/{name}/{metric}"] = metric_fn(metric, v, override=override)
+
+
+class LogTrainingStats(Callback):
+    def __init__(
+        self,
+        log_gradient: bool = True,
+        log_params: bool = True,
+        log_quantiles: bool = False,
+        log_momentum: bool = False,
+        log_lrs: bool = True,
+        log_every_n_steps: int = 50,
+        change_log_interval_every_n_steps: Optional[int] = None,
+        log_interval_factor: float = 2.0,
+        min_log_interval: int = 1,
+        max_log_interval: Optional[int] = None,
+        metrics: Iterable[str] = (
+            "mean",
+            "abs_mean",
+            "std",
+            "abs_std",
+            "min",
+            "max",
+            "l1",
+            "l2",
+            "sq_mean",
+        ),
+    ):
+        super().__init__()
+        self.log_gradient = log_gradient
+        self.log_params = log_params
+        self.log_quantiles = log_quantiles
+        self.log_momentum = log_momentum
+        self.log_lrs = log_lrs
+        self.log_every_n_steps = log_every_n_steps
+        self.change_log_interval_every_n_steps = change_log_interval_every_n_steps
+        self.log_interval_factor = log_interval_factor
+        self.min_log_interval = min_log_interval
+        self.max_log_interval = max_log_interval
+        self.metrics = metrics
+
+    def _check_and_adjust_log_interval(
+        self, trainer: Trainer, pl_module: LightningModule
+    ):
+        if self.change_log_interval_every_n_steps is not None:
+            if (
+                trainer.global_step > 0
+                and trainer.global_step % self.change_log_interval_every_n_steps == 0
+            ):
+                self.log_every_n_steps = math.ceil(
+                    self.log_every_n_steps * self.log_interval_factor
+                )
+                self.log_every_n_steps = max(
+                    self.log_every_n_steps, self.min_log_interval
+                )
+                if self.max_log_interval is not None:
+                    self.log_every_n_steps = min(
+                        self.log_every_n_steps, self.max_log_interval
+                    )
+        pl_module.log("logging_interval", self.log_every_n_steps)
+        return trainer.global_step % self.log_every_n_steps == 0
+
+    @rank_zero_only
+    def on_before_optimizer_step(
+        self,
+        trainer: Trainer,
+        pl_module: LightningModule,
+        optimizer: torch.optim.Optimizer,
+    ):
+        if self._check_and_adjust_log_interval(trainer, pl_module):
+            stats = {}
+            q = torch.arange(0.25, 1, 0.25).round(decimals=2).to(trainer.model.device)
+            for param_group in optimizer.param_groups:
+                for name, param in zip(param_group["names"], param_group["params"]):
+                    if self.log_params or self.log_lrs:
+                        v_detached = param.detach()
+
+                    if self.log_params:
+                        if torch.isnan(v_detached).sum() > 0:
+                            log_warn(f"# NaN in param {name}")
+                        if torch.isinf(v_detached).sum() > 0:
+                            log_warn(f"# Inf in param {name}")
+
+                        add_metrics_to_stats(
+                            stats, "param", name, v_detached, self.metrics
+                        )
+
+                        if self.log_quantiles and v_detached.size().numel() < 10000000:
+                            deciles = torch.quantile(
+                                v_detached.float(), q, interpolation="linear"
+                            )
+                            for q_idx, d_val in enumerate(deciles):
+                                stats[f"param/{name}/quantile-{q[q_idx]}"] = (
+                                    d_val.item()
+                                )
+
+                    if (self.log_gradient or self.log_lrs) and param.requires_grad:
+                        if trainer.num_devices > 1:
+                            grad_data = deepspeed.utils.safe_get_full_grad(param)
+                        else:
+                            grad_data = param.grad
+                    else:
+                        grad_data = None
+
+                    if grad_data is not None:
+                        if torch.isnan(grad_data).sum() > 0:
+                            log_warn(f"# NaN in grad {name}")
+                        if torch.isinf(grad_data).sum() > 0:
+                            log_warn(f"# Inf in grad {name}")
+
+                        if self.log_gradient:
+                            if (
+                                torch.isnan(grad_data).sum() > 0
+                                or torch.isinf(grad_data).sum() > 0
+                            ):
+                                add_metrics_to_stats(
+                                    stats,
+                                    "grad",
+                                    name,
+                                    grad_data,
+                                    self.metrics,
+                                    override=-10.0,
+                                )
+                                if (
+                                    self.log_quantiles
+                                    and grad_data.size().numel() < 10000000
+                                ):
+                                    for q_idx, _ in enumerate(q):
+                                        stats[f"param/{name}/quantile-{q[q_idx]}"] = -10
+
+                            stats[f"grad/{name}/mean"] = grad_data.mean().item()
+                            if len(grad_data.shape) > 1 or grad_data.shape[0] > 1:
+                                add_metrics_to_stats(
+                                    stats, "grad", name, grad_data, self.metrics
+                                )
+
+                                if (
+                                    self.log_quantiles
+                                    and grad_data.size().numel() < 10000000
+                                ):
+                                    deciles = torch.quantile(
+                                        grad_data.float(), q, interpolation="linear"
+                                    )
+                                    for q_idx, d_val in enumerate(deciles):
+                                        stats[f"grad/{name}/quantile-{q[q_idx]}"] = (
+                                            d_val.item()
+                                        )
+
+                        if self.log_lrs:
+                            grad_norm = vector_norm(grad_data)
+                            param_norm = vector_norm(v_detached)
+                            effective_lr = (
+                                (grad_norm / param_norm).item()
+                                if param_norm != 0
+                                else 0.0
+                            )
+                            stats[f"param/{name}/effective_lr"] = effective_lr
+
+                    if self.log_momentum or self.log_lrs:
+                        if param in optimizer.state:
+                            state = optimizer.state[param]
+                        else:
+                            state = {}
+
+                    if self.log_momentum:
+                        if "exp_avg" in state:
+                            moment1 = state["exp_avg"]
+                        elif "momentum_buffer" in state:
+                            moment1 = state["momentum_buffer"]
+                        else:
+                            moment1 = None
+                        if moment1 is not None:
+                            add_metrics_to_stats(
+                                stats, "1st_order_momentum", name, moment1, self.metrics
+                            )
+                        if "exp_avg_sq" in state:
+                            add_metrics_to_stats(
+                                stats,
+                                "2nd_order_momentum",
+                                name,
+                                state["exp_avg_sq"],
+                                self.metrics,
+                            )
+                    if self.log_lrs and "lr" in state:
+                        stats[f"param/{name}/lr"] = state["lr"].item()
+
+            if trainer.loggers is not None:
+                for logger in trainer.loggers:
+                    logger.log_metrics(stats, step=trainer.global_step)
--- a/environments/community/pytorch_optimizer_coding/FOB/pytorch_fob/engine/configs.py
+++ b/environments/community/pytorch_optimizer_coding/FOB/pytorch_fob/engine/configs.py
@ -0,0 +1,186 @@
+from pathlib import Path
+from typing import Any, Literal, Optional
+
+from .utils import (
+    AttributeDict,
+    EndlessList,
+    convert_type_inside_dict,
+    maybe_abspath,
+    some,
+    wrap_list,
+)
+
+
+class BaseConfig(AttributeDict):
+    def __init__(self, config: dict):
+        super().__init__(convert_type_inside_dict(config, dict, AttributeDict))
+
+
+class NamedConfig(BaseConfig):
+    def __init__(
+        self,
+        config: dict[str, Any],
+        identifier_key: str = "name",
+        outdir_key: str = "output_dir_name",
+    ) -> None:
+        super().__init__(config)
+        self.name = config[identifier_key]
+        self.output_dir_name = config.get(outdir_key, self.name)
+
+
+class OptimizerConfig(NamedConfig):
+    def __init__(
+        self,
+        config: dict[str, Any],
+        optimizer_key: str,
+        task_key: str,
+        identifier_key: str = "name",
+        outdir_key: str = "output_dir_name",
+    ) -> None:
+        cfg = dict(config[optimizer_key])
+        self.lr_interval: Literal["step", "epoch"] = cfg.get("lr_interval", "step")
+        self.max_steps: int = config[task_key].get("max_steps", None)
+        self.max_epochs: int = config[task_key]["max_epochs"]
+        cfg["max_steps"] = self.max_steps
+        cfg["max_epochs"] = self.max_epochs
+        super().__init__(cfg, identifier_key, outdir_key)
+
+
+class TaskConfig(NamedConfig):
+    def __init__(
+        self,
+        config: dict[str, Any],
+        task_key: str,
+        engine_key: str,
+        identifier_key: str = "name",
+        outdir_key: str = "output_dir_name",
+    ) -> None:
+        cfg = dict(config[task_key])
+        self.batch_size: int = cfg["batch_size"]
+        self.data_dir = Path(config[engine_key]["data_dir"]).resolve()
+        self.max_epochs: int = cfg["max_epochs"]
+        self.max_steps: int = cfg.get("max_steps", None)
+        self.target_metric: str = cfg["target_metric"]
+        self.target_metric_mode: str = cfg["target_metric_mode"]
+        self.workers = config[engine_key]["workers"]
+        cfg["data_dir"] = self.data_dir
+        cfg["workers"] = self.workers
+        super().__init__(cfg, identifier_key, outdir_key)
+
+
+class EngineConfig(BaseConfig):
+    def __init__(self, config: dict[str, Any], task_key: str, engine_key: str) -> None:
+        cfg = dict(config[engine_key])
+        self.accelerator = cfg["accelerator"]
+        self.deterministic: bool | Literal["warn"] = cfg["deterministic"]
+        self.data_dir = Path(cfg["data_dir"]).resolve()
+        self.detect_anomaly: bool = cfg["detect_anomaly"]
+        self.devices: int = some(cfg["devices"], default=1)
+        self.early_stopping: Optional[int] = cfg["early_stopping"]
+        self.early_stopping_metric: str = some(
+            cfg["early_stopping_metric"], default=config[task_key]["target_metric"]
+        )
+        self.gradient_clip_alg: str = cfg["gradient_clip_alg"]
+        self.gradient_clip_val: Optional[float] = cfg["gradient_clip_val"]
+        self.log_extra: bool | dict[str, bool] = cfg["log_extra"]
+        self.logging_inteval: int = cfg["logging_interval"]
+        self.max_steps: int = config[task_key].get("max_steps", None)
+        self.optimize_memory: bool = cfg["optimize_memory"]
+        self.output_dir = Path(cfg["output_dir"]).resolve()
+        self.plot: bool = cfg["plot"]
+        self.precision: str = cfg["precision"]
+        self.restrict_train_epochs: Optional[int] = cfg["restrict_train_epochs"]
+        _resume = cfg.get("resume", False)
+        self.resume: Optional[Path] | bool = (
+            Path(_resume).resolve() if isinstance(_resume, str) else _resume
+        )
+        self.run_scheduler: str = cfg["run_scheduler"]
+        self.seed: int = cfg["seed"]
+        self.seed_mode: str = cfg["seed_mode"]
+        self.save_sbatch_scripts: Optional[Path] = maybe_abspath(
+            cfg["save_sbatch_scripts"]
+        )
+        self.sbatch_args: dict[str, str] = cfg["sbatch_args"]
+        self.sbatch_script_template: Optional[Path] = maybe_abspath(
+            cfg["sbatch_script_template"]
+        )
+        self.sbatch_time_factor: float = cfg["sbatch_time_factor"]
+        self.slurm_log_dir: Optional[Path] = maybe_abspath(cfg["slurm_log_dir"])
+        self.silent: bool = cfg.get("silent", False)
+        self.test: bool = cfg.get("test", True)
+        self.train: bool = cfg.get("train", True)
+        self.validate: bool = cfg.get("validate", False)
+        self.workers: int = cfg["workers"]
+        cfg["data_dir"] = self.data_dir
+        cfg["devices"] = self.devices
+        cfg["early_stopping_metric"] = self.early_stopping_metric
+        cfg["max_steps"] = self.max_steps
+        cfg["output_dir"] = self.output_dir
+        cfg["resume"] = self.resume
+        cfg["slurm_log_dir"] = self.slurm_log_dir
+        cfg["save_sbatch_scripts"] = self.save_sbatch_scripts
+        cfg["sbatch_script_template"] = self.sbatch_script_template
+        super().__init__(cfg)
+
+    def outpath_relevant_engine_keys(self, prefix: str = "") -> list[str]:
+        keys = [
+            "accelerator",
+            "deterministic",
+            "detect_anomaly",
+            "devices",
+            "early_stopping",
+            "gradient_clip_alg",
+            "gradient_clip_val",
+            "optimize_memory",
+            "precision",
+            "seed",
+        ]
+        return [f"{prefix}{k}" for k in keys]
+
+    def outpath_irrelevant_engine_keys(self, prefix: str = "") -> list[str]:
+        return [
+            f"{prefix}{k}"
+            for k in self.keys()
+            if k not in self.outpath_relevant_engine_keys()
+        ]
+
+
+class EvalConfig(BaseConfig):
+    def __init__(
+        self, config: dict[str, Any], eval_key: str, engine_key: str, ignore_keys=None
+    ) -> None:
+        cfg = dict(config[eval_key])
+        self.experiment_files = AttributeDict(
+            dict(
+                best_model="results_best_model.json",
+                last_model="results_final_model.json",
+                config="config.yaml",
+            )
+        )
+        self.output_types: list[str] = wrap_list(cfg["output_types"])
+        experiment_dir = Path(config[engine_key]["output_dir"]).resolve()
+        self.output_dir: Path = some(
+            maybe_abspath(cfg["output_dir"]), default=experiment_dir / "plots"
+        )
+        self.experiment_name: str = cfg["experiment_name"]
+        self.verbose: bool = cfg.get("verbose", False)
+        split = cfg.get("split_groups", False)
+        self.split_groups: bool | list[str] = (
+            split if isinstance(split, bool) else wrap_list(split)
+        )
+        self.checkpoints: list[Literal["last", "best"]] = wrap_list(cfg["checkpoints"])
+        self.column_split_key: Optional[str] = cfg.get("column_split_key", None)
+        self.column_split_order: Optional[list[str]] = cfg.get(
+            "column_split_order", None
+        )
+        self.ignore_keys: list[str] = some(ignore_keys, default=[])
+        self.aggregate_groups: list[str] = wrap_list(cfg["aggregate_groups"])
+        cfg["ignore_keys"] = self.ignore_keys
+        cfg["output_types"] = self.output_types
+        cfg["output_dir"] = self.output_dir
+        cfg["aggregate_groups"] = self.aggregate_groups
+        cfg["output_types"] = self.output_types
+        cfg["plot"]["x_axis"] = EndlessList(wrap_list(cfg["plot"]["x_axis"]))
+        cfg["plot"]["y_axis"] = EndlessList(wrap_list(cfg["plot"]["y_axis"]))
+        cfg["split_groups"] = self.split_groups
+        super().__init__(cfg)
--- a/environments/community/pytorch_optimizer_coding/FOB/pytorch_fob/engine/default.yaml
+++ b/environments/community/pytorch_optimizer_coding/FOB/pytorch_fob/engine/default.yaml
@ -0,0 +1,41 @@
+engine:
+  accelerator: gpu           # Whether to train on cpu or gpu
+  check_finite: true         # Check if 'early_stopping_metric' is finite during training. Aborts training if not. Only active when 'early_stopping' is not null.
+  data_dir: ./data           # Where you want to store the training data
+  deterministic: warn        # 'warn' tries to use deterministic algorithms if possible, also accepts true or false.
+  detect_anomaly: false      # Lightning trainer argument with same name.
+  devices: null              # This is set by each task by default, but can be overridden
+  early_stopping: null       # The number of epochs to wait before stopping if no improvement is found. Set to null to disable.
+  early_stopping_metric: null  # Metric to use for early stopping. If null, uses 'task.target_metric'.
+  gradient_clip_alg: norm    # {value, norm} to disable gradient clipping: set 'gradient_clip_val' to null
+  gradient_clip_val: null    # DEFAULT: don't clip gradients, expects value in [0, 1]
+  log_extra: false           # Activate logging of gradients and more. Can be bool or a dict with the options supported by callback `LogTrainingStats` in `pytorch_fob/engine/callbacks.py`.
+  logging_interval: 50       # Number of steps between each logging step.
+  optimize_memory: false     # Use nondeterministic, but memory-efficient algorithms for self-attention
+  output_dir: ./experiments  # Where you want to store the results
+  plot: true                 # Whether to plot the results.
+  precision: bf16-mixed      # Floating precision of training, see https://lightning.ai/docs/pytorch/stable/common/precision_basic.html
+  restrict_train_epochs: null  # Only train for a specific number of epochs. Set to null to disable. The epochs set here are counted from start of training, so this works with 'resume'.
+  resume: true               # You can either pass the path to your checkpoint here or set to true, which loads the last checkpoint.
+  run_scheduler: sequential  # How to schedule the runs of the experiment. Supported values:
+                               # 'sequential': runs are performed sequentially
+                               # 'single:N' where N is the number of the run starting from 1.
+                               # 'slurm_array': runs are scheduled using a SLURM array job.
+                               # 'slurm_jobs': runs are scheduled using independent SLURM jobs
+  save_sbatch_scripts: null  # Path to directory where sbatch scripts will be saved. If null, sbatch scripts will not be saved.
+  sbatch_time_factor: 1      # Time factor for SLURM. Multiplies all default times by this factor.
+  sbatch_args:               # Additional arguments to pass to sbatch. Only used if run_scheduler is 'slurm_array'.
+    # ntasks-per-node and gres are set to 'devices' by default
+    # cpus-per-task is set to 'workers' by default
+    nodes: 1
+    mem-per-cpu: 2gb
+    time: 00:30:00           # Each task has their own default time (assumes A100 or similar gpu). Format: HH:MM:SS or seconds.
+  sbatch_script_template: null  # Path to template for the sbatch script. Script can contain placeholder '__FOB_COMMAND__'. Otherwise it will be executed before the experiment. 'sbatch_args' will be added to the beginning of the script.
+  slurm_log_dir: null        # Default: 'output_dir/slurm_logs' for run_scheduler 'slurm_array' and 'run_dir/slurm_logs' for run_scheduler 'slurm_jobs'
+  seed: 42                   # The seed to use for the experiment
+  seed_mode: fixed           # Currently only supports 'fixed'
+  silent: false              # whether to hide progress bars. Recommended when writing outputs to a log file.
+  test: true                 # Whether to test the model.
+  train: true                # Whether to train the model.
+  validate: false            # Whether to validate the model after training (only useful if you are interested in the results, for example for HPO).
+  workers: 16                # The number of processes to use for dataloading
--- a/environments/community/pytorch_optimizer_coding/FOB/pytorch_fob/engine/engine.py
+++ b/environments/community/pytorch_optimizer_coding/FOB/pytorch_fob/engine/engine.py
@ -0,0 +1,280 @@
+import json
+from copy import deepcopy
+from pathlib import Path
+from typing import Any, Callable, Iterable, Iterator, Literal, Optional
+
+from matplotlib.figure import Figure
+from pandas import DataFrame, concat, json_normalize
+from pytorch_fob.engine.configs import EvalConfig
+from pytorch_fob.engine.grid_search import grid_search
+from pytorch_fob.engine.parser import YAMLParser
+from pytorch_fob.engine.run import Run
+from pytorch_fob.engine.run_schedulers import sequential, slurm_array, slurm_jobs
+from pytorch_fob.engine.utils import (
+    log_debug,
+    log_info,
+    log_warn,
+    some,
+    sort_dict_recursively,
+)
+from pytorch_fob.evaluation import evaluation_path
+from pytorch_fob.evaluation.plot import (
+    create_figure,
+    get_output_file_path,
+    save_files,
+    set_plotstyle,
+)
+from pytorch_fob.optimizers import lr_schedulers_path, optimizer_names, optimizer_path
+from pytorch_fob.tasks import task_names, task_path
+
+
+def engine_path() -> Path:
+    return Path(__file__).resolve().parent
+
+
+class Engine:
+    def __init__(self) -> None:
+        self._runs = []
+        self._defaults = []
+        self._experiment = {}
+        self._experiment_file = None
+        self._block_plotting = False
+        self.task_key = "task"
+        self.optimizer_key = "optimizer"
+        self.engine_key = "engine"
+        self.eval_key = "evaluation"
+        self.identifier_key = "name"
+        self.default_file_name = "default.yaml"
+        self.parser = YAMLParser()
+
+    def run_experiment(self) -> Optional[list[int]]:
+        assert (
+            len(self._runs) > 0
+        ), "No runs in experiment, make sure to call 'parse_experiment' first."
+        scheduler = self._runs[0][self.engine_key]["run_scheduler"]
+        assert all(
+            map(lambda x: x[self.engine_key]["run_scheduler"] == scheduler, self._runs)
+        ), "You cannot perform gridsearch on 'run_scheduler'."
+        if scheduler == "sequential":
+            sequential(self.runs(), len(self._runs), self._experiment)
+        elif scheduler.startswith("single"):
+            n = int(scheduler.rsplit(":", 1)[-1])
+            log_info(f"Starting run {n}/{len(self._runs)}.")
+            run = self._make_run(n)
+            run.start()
+        elif scheduler == "slurm_array":
+            self._block_plotting = True
+            slurm_array(list(self.runs()), self._experiment)
+        elif scheduler == "slurm_jobs":
+            self._block_plotting = True
+            return slurm_jobs(list(self.runs()), self._experiment)
+        else:
+            raise ValueError(f"Unsupported run_scheduler: {scheduler=}.")
+
+    def parse_experiment_from_file(
+        self, file: Path, extra_args: Iterable[str] = tuple()
+    ):
+        self._experiment_file = file.resolve()
+        searchspace: dict[str, Any] = self.parser.parse_yaml(self._experiment_file)
+        self.parse_experiment(searchspace, extra_args)
+
+    def parse_experiment(
+        self, searchspace: dict[str, Any], extra_args: Iterable[str] = tuple()
+    ):
+        self.parser.parse_args_into_searchspace(searchspace, extra_args)
+        # normalize experiment
+        self._named_dicts_to_list(
+            searchspace,
+            [self.optimizer_key, self.task_key],
+            [optimizer_names(), task_names()],
+        )
+        searchspace = sort_dict_recursively(searchspace)
+        self._experiment = deepcopy(searchspace)
+        # exclude plotting from gridsearch
+        if self.eval_key in searchspace:
+            eval_config = searchspace.pop(self.eval_key)
+        else:
+            eval_config = {}
+        log_debug("Performing gridsearch...")
+        self._runs = grid_search(searchspace)
+        log_debug(f"Found {len(self._runs)} runs.")
+        for run in self._runs:
+            run[self.eval_key] = eval_config
+        self._fill_runs_from_default(self._runs)
+        self._fill_defaults()
+
+    def runs(self) -> Iterator[Run]:
+        """
+        Creates and initializes runs from parsed run config.
+        """
+        for n, _ in enumerate(self._runs, start=1):
+            yield self._make_run(n)
+
+    def prepare_data(self):
+        prepared = set()
+        for n, t in enumerate(self._runs, start=1):
+            name = t["task"]["name"]
+            if name not in prepared:
+                run = self._make_run(n)
+                log_info(f"Setting up data for {run.task_key} '{run.task.name}'...")
+                run.get_datamodule().prepare_data()
+                log_info("... finished.")
+                prepared.add(name)
+
+    def plot(self, save: bool = True) -> list[Figure]:
+        run = next(self.runs())
+        if self._block_plotting or not run.engine.plot:
+            return []
+        config = run.evaluation
+        set_plotstyle(config)
+        figs = []
+        for mode in config.checkpoints:
+            df = self.dataframe_from_runs(mode)
+            if config.plot.single_file:
+                fig, dfs = self.plot_one_fig(df, config)
+                if save:
+                    self.save_one_plot(fig, dfs, config, mode)
+                figs.append(fig)
+            else:
+                # TODO: option to split into multiple files
+                raise NotImplementedError(
+                    "evaluation.plot.single_file=False is not implemented yet."
+                )
+        return figs
+
+    def plot_one_fig(self, df: DataFrame, config: EvalConfig):
+        if config.column_split_key is None:
+            dfs = [df]
+        else:
+            groups = df.groupby(config.column_split_key)
+            order = some(
+                config.column_split_order, default=map(lambda x: x[0], sorted(groups))
+            )
+            dfs: list[DataFrame] = [
+                groups.get_group(group_name) for group_name in order
+            ]
+        fig, _ = create_figure(dfs, config)
+        return fig, dfs
+
+    def save_one_plot(
+        self,
+        fig,
+        dfs: list[DataFrame],
+        config: EvalConfig,
+        mode: Literal["last", "best"],
+    ):
+        output_file_path = get_output_file_path(dfs, config, suffix=mode)
+        save_files(fig, dfs, output_file_path, config)
+
+    def dataframe_from_runs(self, mode: Literal["last", "best"]) -> DataFrame:
+        dfs: list[DataFrame] = []
+        for run in self.runs():
+            df = json_normalize(run.get_config())
+            if mode == "last":
+                result_file = run.run_dir / run.evaluation.experiment_files.last_model
+            elif mode == "best":
+                result_file = run.run_dir / run.evaluation.experiment_files.best_model
+            else:
+                raise ValueError(f"mode {mode} not supported")
+            if not result_file.is_file():
+                log_warn(
+                    f"result file {result_file} not found, skipping this hyperparameter setting"
+                )
+                continue
+            metric = run.evaluation.plot.metric
+            with open(result_file, "r", encoding="utf8") as f:
+                content = json.load(f)
+                if metric in content[0]:
+                    df.at[0, metric] = content[0][metric]
+                else:
+                    log_warn(
+                        f"could not find value for {metric} in json, skipping this hyperparameter setting"
+                    )
+                    continue
+            dfs.append(df)
+        if len(dfs) == 0:
+            raise ValueError("no dataframes found, check your config")
+        return concat(dfs, sort=False)
+
+    def _make_run(self, n: int) -> Run:
+        """
+        n: number of the run, starting from 1
+        setup: download and prepare data
+        """
+        i = n - 1
+        return Run(
+            self._runs[i],
+            self._defaults[i],
+            self.task_key,
+            self.optimizer_key,
+            self.engine_key,
+            self.eval_key,
+            self.identifier_key,
+        )
+
+    def _named_dicts_to_list(
+        self,
+        searchspace: dict[str, Any],
+        keys: list[str],
+        valid_options: list[list[str]],
+    ):
+        assert len(keys) == len(valid_options)
+        for key, opts in zip(keys, valid_options):
+            if key not in searchspace:
+                continue
+            if isinstance(searchspace[key], dict) and all(
+                name in opts for name in searchspace[key]
+            ):
+                searchspace[key] = [
+                    cfg | {self.identifier_key: name}
+                    for name, cfg in searchspace[key].items()
+                ]
+
+    def _fill_defaults(self):
+        self._defaults = []
+        for run in self._runs:
+            default_cfg = {
+                k: {self.identifier_key: run[k][self.identifier_key]}
+                for k in [self.task_key, self.optimizer_key]
+            }
+            self._defaults.append(default_cfg)
+        self._fill_runs_from_default(self._defaults)
+
+    def _fill_runs_from_default(self, runs: list[dict[str, Any]]):
+        for i, _ in enumerate(runs):
+            # order from higher to lower in hierarchy
+            runs[i] = self._fill_named_from_default(runs[i], self.task_key, task_path)
+            runs[i] = self._fill_named_from_default(
+                runs[i], self.optimizer_key, optimizer_path
+            )
+            runs[i] = self._fill_unnamed_from_default(runs[i], lr_schedulers_path)
+            runs[i] = self._fill_unnamed_from_default(runs[i], engine_path)
+            runs[i] = self._fill_unnamed_from_default(runs[i], evaluation_path)
+
+    def _fill_unnamed_from_default(
+        self, experiment: dict[str, Any], unnamed_root: Callable
+    ) -> dict[str, Any]:
+        default_path: Path = unnamed_root() / self.default_file_name
+        default_config = self.parser.parse_yaml(default_path)
+        self.parser.merge_dicts_hierarchical(default_config, experiment)
+        return default_config
+
+    def _fill_named_from_default(
+        self, experiment: dict[str, Any], key: str, named_root: Callable
+    ) -> dict[str, Any]:
+        self._argcheck_named(experiment, key, self.identifier_key)
+        named = experiment[key]
+        if isinstance(named, dict):
+            named = named[self.identifier_key]
+        else:
+            experiment[key] = {self.identifier_key: named}
+        default_path: Path = named_root(named) / self.default_file_name
+        default_config = self.parser.parse_yaml(default_path)
+        self.parser.merge_dicts_hierarchical(default_config, experiment)
+        return default_config
+
+    def _argcheck_named(self, experiment: dict[str, Any], key: str, identifier: str):
+        assert key in experiment, f"You did not provide any {key}."
+        assert (
+            isinstance(experiment[key], str) or identifier in experiment[key]
+        ), f"Unknown {key}, either specify only a string or provide a key '{identifier}'"
--- a/environments/community/pytorch_optimizer_coding/FOB/pytorch_fob/engine/grid_search.py
+++ b/environments/community/pytorch_optimizer_coding/FOB/pytorch_fob/engine/grid_search.py
@ -0,0 +1,32 @@
+from typing import Any
+
+
+def unique(xs: list) -> list:
+    """Returns deduplicated list"""
+    res = []
+    for x in xs:
+        if x not in res:
+            res.append(x)
+    return res
+
+
+def grid_search(d: dict[str, Any]) -> list[dict[str, Any]]:
+    ret = []
+    if isinstance(d, dict):
+        if len(d) == 0:
+            return [dict()]
+        copy = d.copy()
+        k, v = copy.popitem()
+        configs = unique(grid_search(v))
+        rest = grid_search(copy)
+        for r in rest:
+            for config in configs:
+                ret.append(r | {k: config})
+    elif isinstance(d, list):
+        for v in d:
+            configs = grid_search(v)
+            for config in configs:
+                ret.append(config)
+    else:
+        ret.append(d)
+    return ret
--- a/environments/community/pytorch_optimizer_coding/FOB/pytorch_fob/engine/parameter_groups.py
+++ b/environments/community/pytorch_optimizer_coding/FOB/pytorch_fob/engine/parameter_groups.py
@ -0,0 +1,277 @@
+from dataclasses import dataclass, field
+from typing import Any, Callable, Iterable, Optional
+
+from pytorch_fob.engine.utils import log_warn, some
+from torch import nn
+from torch.nn import Module
+from torch.nn.parameter import Parameter
+
+
+@dataclass
+class ParameterGroup:
+    named_parameters: dict[str, Parameter]
+    lr_multiplier: Optional[float] = field(default=None)
+    weight_decay_multiplier: Optional[float] = field(default=None)
+    optimizer_kwargs: dict[str, Any] = field(default_factory=dict)
+
+    def __and__(self, other) -> "ParameterGroup":
+        assert isinstance(other, ParameterGroup)
+        n1 = set(self.named_parameters.keys())
+        n2 = set(other.named_parameters.keys())
+        all_params = self.named_parameters | other.named_parameters
+        n12 = n1 & n2
+        new_params = {n: all_params[n] for n in n12}
+        return ParameterGroup(
+            named_parameters=new_params,
+            lr_multiplier=some(other.lr_multiplier, default=self.lr_multiplier),
+            weight_decay_multiplier=some(
+                other.weight_decay_multiplier, default=self.weight_decay_multiplier
+            ),
+            optimizer_kwargs=self.optimizer_kwargs | other.optimizer_kwargs,
+        )
+
+    def __len__(self) -> int:
+        return len(self.named_parameters)
+
+    def __bool__(self) -> bool:
+        return not self.empty()
+
+    def empty(self) -> bool:
+        return len(self.named_parameters) == 0
+
+    def to_optimizer_dict(
+        self, lr: Optional[float] = None, weight_decay: Optional[float] = None
+    ) -> dict[str, list[Parameter] | Any]:
+        names = sorted(self.named_parameters)
+        d = {
+            "params": [self.named_parameters[n] for n in names],
+            "names": names,
+            **self.optimizer_kwargs,
+        }
+        if lr is not None:
+            d["lr"] = self.lr_multiplier * lr if self.lr_multiplier is not None else lr
+        if weight_decay is not None:
+            d["weight_decay"] = (
+                self.weight_decay_multiplier * weight_decay
+                if self.weight_decay_multiplier is not None
+                else weight_decay
+            )
+        return d
+
+
+class GroupedModel(Module):
+    """
+    Wrapper around a nn.Module to allow specifying different optimizer settings for different parameters.
+    To use this feature for your task, inherit from this class and override the `parameter_groups` method.
+    Then simply wrap your model before passing it to the `__init__` method of the `TaskModel` superclass.
+    """
+
+    def __init__(self, model: Module) -> None:
+        super().__init__()
+        self.model = model
+
+    def forward(self, *args, **kwargs):
+        return self.model.forward(*args, **kwargs)
+
+    def parameter_groups(self) -> list[ParameterGroup]:
+        return wd_group_named_parameters(self.model)
+
+    def grouped_parameters(
+        self, lr: Optional[float] = None, weight_decay: Optional[float] = None
+    ) -> list[dict[str, list[Parameter] | Any]]:
+        return [
+            pg.to_optimizer_dict(lr, weight_decay) for pg in self.parameter_groups()
+        ]
+
+
+def merge_parameter_splits(
+    split1: list[ParameterGroup], split2: list[ParameterGroup]
+) -> list[ParameterGroup]:
+    """
+    Merge two lists of ParameterGroup objects into a single list.
+    Assumes that both input lists partition the parameters.
+    """
+    groups = []
+    for pg1 in split1:
+        for pg2 in split2:
+            pg12 = pg1 & pg2
+            if not pg12.empty():
+                groups.append(pg12)
+    return groups
+
+
+def group_named_parameters(
+    model: Module,
+    g1_conds: Iterable[Callable] = (lambda *_: True,),
+    g2_conds: Iterable[Callable] = (lambda *_: True,),
+    special_conds: Iterable[Callable] = tuple(),
+    ignore_conds: Iterable[Callable] = tuple(),
+    g1_kwargs: Optional[dict[str, Any]] = None,
+    g2_kwargs: Optional[dict[str, Any]] = None,
+    debug: bool = False,
+) -> list[ParameterGroup]:
+    """
+    Group named parameters based on specified conditions and return a list of ParameterGroup objects.
+
+    Args:
+        model (Module): The neural network model.
+        g1_conds (Iterable[Callable]): Conditions for selecting parameters for group 1.
+        g2_conds (Iterable[Callable]): Conditions for selecting parameters for group 2.
+        special_conds (Iterable[Callable]): Conditions for selecting special parameters that should not be grouped.
+        ignore_conds (Iterable[Callable]): Conditions for ignoring parameters (e.g. if they occur in submodules).
+        g1_kwargs (Optional[dict[str, Any]]): Additional keyword arguments for constructor of group 1.
+        g2_kwargs (Optional[dict[str, Any]]): Additional keyword arguments for constructor of group 2.
+
+    Returns:
+        List[ParameterGroup]: A list of ParameterGroup objects containing named parameters.
+    """
+    g1_kwargs = g1_kwargs if g1_kwargs is not None else {}
+    g2_kwargs = g2_kwargs if g2_kwargs is not None else {}
+    s1 = set()
+    s2 = set()
+    special = set()
+    param_dict = {pn: p for pn, p in model.named_parameters() if p.requires_grad}
+    for mn, m in model.named_modules():
+        for pn, p in m.named_parameters():
+            fpn = f"{mn}.{pn}" if mn else pn  # full param name
+            if not p.requires_grad or fpn not in param_dict:
+                continue  # frozen weights
+            elif any(c(m, p, fpn) for c in ignore_conds):
+                continue
+            elif any(c(m, p, fpn) for c in special_conds):
+                special.add(fpn)
+            elif any(c(m, p, fpn) for c in g1_conds):
+                s1.add(fpn)
+            elif any(c(m, p, fpn) for c in g2_conds):
+                s2.add(fpn)
+            elif debug:
+                log_warn(
+                    "group_named_parameters: Not using any rule for ",
+                    fpn,
+                    " in ",
+                    type(m),
+                )
+
+    s1 |= param_dict.keys() - s2 - special
+
+    # validate that we considered every parameter
+    inter_params = s1 & s2
+    union_params = s1 | s2
+    assert (
+        len(inter_params) == 0
+    ), f"Parameters {str(inter_params)} made it into both s1/s2 sets!"
+    assert (
+        len(param_dict.keys() - special - union_params) == 0
+    ), f"parameters {str(param_dict.keys() - union_params)} \
+                were not separated into either s1/s2 set!"
+
+    if not s2:
+        param_groups = [
+            ParameterGroup(
+                named_parameters=dict(
+                    zip(
+                        sorted(union_params),
+                        (param_dict[pn] for pn in sorted(union_params)),
+                    )
+                )
+            )
+        ]
+    else:
+        param_groups = [
+            ParameterGroup(
+                named_parameters=dict(
+                    zip(sorted(s1), (param_dict[pn] for pn in sorted(s1)))
+                ),
+                **g1_kwargs,
+            ),
+            ParameterGroup(
+                named_parameters=dict(
+                    zip(sorted(s2), (param_dict[pn] for pn in sorted(s2)))
+                ),
+                **g2_kwargs,
+            ),
+        ]
+
+    return param_groups
+
+
+def wd_group_named_parameters(model: Module) -> list[ParameterGroup]:
+    whitelist_weight_modules = (
+        nn.Linear,
+        nn.modules.conv._ConvNd,
+    )  # pylint: disable=protected-access # noqa
+    blacklist_weight_modules = (
+        nn.modules.batchnorm._NormBase,  # pylint: disable=protected-access # noqa
+        nn.GroupNorm,
+        nn.LayerNorm,
+        nn.LocalResponseNorm,
+        nn.Embedding,
+    )
+    ignore_modules = (nn.Sequential,)
+    apply_decay_conds = [
+        lambda m, _, pn: pn.endswith("weight")
+        and isinstance(m, whitelist_weight_modules)
+    ]
+    apply_no_decay_conds = [
+        lambda m, _, pn: pn.endswith("bias") or isinstance(m, blacklist_weight_modules)
+    ]
+    special_conds = [lambda m, p, pn: hasattr(p, "_optim")]
+    ignore_conds = [lambda m, p, pn: isinstance(m, ignore_modules)]
+
+    return group_named_parameters(
+        model,
+        g1_conds=apply_decay_conds,
+        g2_conds=apply_no_decay_conds,
+        special_conds=special_conds,
+        ignore_conds=ignore_conds,
+        g2_kwargs={"weight_decay_multiplier": 0.0},
+    )
+
+
+def resolve_parameter_dicts(
+    dict1: dict[str, Any], dict2: dict[str, Any]
+) -> list[dict[str, Any]]:
+    p1, p2 = dict1["params"], dict2["params"]
+    n1, n2 = set(dict1["names"]), set(dict2["names"])
+    n_to_p1 = dict(zip(dict1["names"], dict1["params"]))
+    n_to_p2 = dict(zip(dict2["names"], dict2["params"]))
+    assert len(n1) == len(p1)
+    assert len(n2) == len(p2)
+    kwarg1 = {k: v for k, v in dict1.items() if k not in ["params", "names"]}
+    kwarg2 = {k: v for k, v in dict2.items() if k not in ["params", "names"]}
+    n1_and_n2 = n1 & n2
+    n1_no_n2 = n1 - n2
+    n2_no_n1 = n2 - n1
+    assert n1_and_n2 | n1_no_n2 | n2_no_n1 == n1 | n2
+    outdict1 = {
+        "params": [n_to_p1[n] for n in sorted(n1_no_n2)],
+        "names": sorted(n1_no_n2),
+        **kwarg1,
+    }
+    outdict2 = {
+        "params": [n_to_p2[n] for n in sorted(n2_no_n1)],
+        "names": sorted(n2_no_n1),
+        **kwarg2,
+    }
+    # kwarg2 takes precedence if an arg is present in both dicts:
+    outdict12 = {
+        "params": [{**n_to_p1, **n_to_p2}[n] for n in sorted(n1_and_n2)],
+        "names": sorted(n1_and_n2),
+        **kwarg1,
+        **kwarg2,
+    }
+    return [outdict1, outdict2, outdict12]
+
+
+def intersect_parameter_dicts(
+    dict1: dict[str, Any], dict2: dict[str, Any]
+) -> Optional[dict[str, Any]]:
+    d = resolve_parameter_dicts(dict1, dict2)[2]
+    return d if len(d["params"]) > 0 else None
+
+
+def merge_parameter_dicts(
+    dict1: dict[str, Any], dict2: dict[str, Any]
+) -> list[dict[str, Any]]:
+    d = resolve_parameter_dicts(dict1, dict2)
+    return list(filter(lambda x: len(x["params"]) > 0, d))
--- a/environments/community/pytorch_optimizer_coding/FOB/pytorch_fob/engine/parser.py
+++ b/environments/community/pytorch_optimizer_coding/FOB/pytorch_fob/engine/parser.py
@ -0,0 +1,69 @@
+import re
+from pathlib import Path
+from typing import Any, Iterable, Optional
+
+import yaml
+
+
+class YAMLParser:
+    def __init__(self) -> None:
+        pass
+
+    def parse_yaml(self, file: Path) -> Any:
+        """
+        Opens and parses a YAML file.
+        """
+        with open(file, "r", encoding="utf8") as f:
+            return yaml.safe_load(f)
+
+    def parse_yamls_and_extra_args(
+        self,
+        default_yaml: Path,
+        custom_yaml: Optional[Path],
+        additional_args: Iterable[str] = tuple(),
+    ) -> dict:
+        """assumes that there is a dict in the yaml"""
+        config_to_use = self.parse_yaml(default_yaml)
+        if custom_yaml is not None:
+            user_yaml = self.parse_yaml(custom_yaml)
+            # merge in place
+            self.merge_dicts_hierarchical(lo=config_to_use, hi=user_yaml)
+        self.parse_args_into_searchspace(config_to_use, additional_args)
+        return config_to_use
+
+    def parse_args_into_searchspace(
+        self, searchspace: dict[str, Any], args: Iterable[str]
+    ):
+        """
+        Overwrites args given in the form of 'this.that=something'. Also supports lists: 'this.that[0]=something'
+        """
+        for arg in args:
+            self._parse_arg_into_searchspace(searchspace, arg)
+
+    def _parse_arg_into_searchspace(self, searchspace: dict[str, Any], arg: str):
+        keys, value = arg.split("=")
+        keys = keys.split(".")
+        keys_with_list_indices = []
+        for key in keys:
+            match = re.search(r"^(.*?)\[(\-?\d+)\]$", key)
+            if match:
+                keys_with_list_indices.append(match.group(1))
+                keys_with_list_indices.append(int(match.group(2)))
+            else:
+                keys_with_list_indices.append(key)
+        target = searchspace
+        for key in keys_with_list_indices[:-1]:
+            if isinstance(target, dict) and key not in target:
+                target[key] = {}
+            target = target[key]
+        target[keys_with_list_indices[-1]] = yaml.safe_load(value)
+
+    def merge_dicts_hierarchical(self, lo: dict, hi: dict):
+        """
+        Overwrites values in `lo` with values from `hi` if they are present in both/
+        """
+        for k, v in hi.items():
+            if isinstance(v, dict) and isinstance(lo.get(k, None), dict):
+                self.merge_dicts_hierarchical(lo[k], v)
+            else:
+                lo[k] = v
--- a/environments/community/pytorch_optimizer_coding/FOB/pytorch_fob/engine/run.py
+++ b/environments/community/pytorch_optimizer_coding/FOB/pytorch_fob/engine/run.py
@ -0,0 +1,395 @@
+import hashlib
+import time
+from pathlib import Path
+from typing import Any, Optional
+
+import torch
+import yaml
+from lightning import (
+    Callback,
+    LightningDataModule,
+    LightningModule,
+    Trainer,
+    seed_everything,
+)
+from lightning.pytorch.callbacks import (
+    EarlyStopping,
+    LearningRateMonitor,
+    ModelCheckpoint,
+)
+from lightning.pytorch.loggers import CSVLogger, Logger, TensorBoardLogger
+from lightning.pytorch.utilities.types import _EVALUATE_OUTPUT
+from pytorch_fob.engine.callbacks import (
+    LogTrainingStats,
+    OptimizerTime,
+    PrintEpochWithTime,
+    RestrictTrainEpochs,
+)
+from pytorch_fob.engine.configs import (
+    EngineConfig,
+    EvalConfig,
+    OptimizerConfig,
+    TaskConfig,
+)
+from pytorch_fob.engine.utils import (
+    AttributeDict,
+    EndlessList,
+    calculate_steps,
+    concatenate_dict_keys,
+    convert_type_inside_dict,
+    dict_differences,
+    findfirst,
+    log_info,
+    log_warn,
+    path_to_str_inside_dict,
+    precision_with_fallback,
+    seconds_to_str,
+    trainer_strategy,
+    write_results,
+)
+from pytorch_fob.optimizers.optimizers import Optimizer
+from pytorch_fob.tasks.tasks import TaskDataModule, TaskModel, import_task
+
+
+class Run:
+    def __init__(
+        self,
+        config: dict[str, Any],
+        default_config: dict[str, Any],
+        task_key: str,
+        optimizer_key: str,
+        engine_key: str,
+        eval_key: str,
+        identifier_key: str,
+    ) -> None:
+        """
+        setup: download and prepare data before creating the Run
+        """
+        self._config = config
+        self._default_config = default_config
+        self.task_key = task_key
+        self.optimizer_key = optimizer_key
+        self.engine_key = engine_key
+        self.eval_key = eval_key
+        self.identifier_key = identifier_key
+        self._generate_configs()
+        self._set_outpath()
+        self._callbacks = AttributeDict({})
+
+    def start(self) -> dict[str, _EVALUATE_OUTPUT]:
+        self.run_dir.mkdir(parents=True, exist_ok=True)
+        self.export_config()
+        scores: dict[str, _EVALUATE_OUTPUT] = {}
+        if any([self.engine.train, self.engine.test]):
+            self._ensure_resume_path()
+            self.ensure_max_steps()
+            torch.set_float32_matmul_precision("high")
+            seed_everything(self.engine.seed, workers=True)
+            model, data_module = self.get_task()
+        if self.engine.train:
+            trainer = self.get_trainer()
+            self._train(trainer, model, data_module)
+            scores["mean_optimizer_time_ms"] = self._callbacks[
+                "optimizer_time"
+            ].total_mean_optimizer_step_time_ms
+        if self.engine.validate:
+            scores["validation"] = self._validate(trainer, model, data_module)
+        if self.engine.test:
+            tester = self.get_tester()
+            if (
+                self.engine.train
+            ):  # no need to load last checkpoint, model is already loaded
+                ckpt = None
+            elif self.engine.resume is not None:
+                ckpt = self.engine.resume
+            else:
+                log_warn(
+                    "No last checkpoint found, evaluating untrained model. "
+                    + "If this is unexpected, try to set 'engine.resume=true'."
+                )
+                ckpt = None
+            scores["test_final"] = self._test(
+                tester, model, data_module, ckpt=ckpt
+            )  # type: ignore (see ensure_resume_path)
+            best_path = self.get_best_checkpoint()
+            if best_path is not None:
+                scores["test_best"] = self._test(
+                    tester, model, data_module, Path(best_path)
+                )
+            else:
+                log_info("No best checkpoint found, skipping test.")
+        write_results(scores, self.run_dir / "scores.json")
+        return scores
+
+    def _train(
+        self, trainer: Trainer, model: LightningModule, data_module: LightningDataModule
+    ):
+        start_time = time.time()
+        if self.engine.accelerator == "gpu" and torch.cuda.is_available():
+            with torch.backends.cuda.sdp_kernel(
+                enable_flash=True,
+                enable_math=True,
+                enable_mem_efficient=(
+                    self.engine.optimize_memory or not self.engine.deterministic
+                ),
+            ):
+                trainer.fit(
+                    model, datamodule=data_module, ckpt_path=self.engine.resume
+                )  # type: ignore
+        else:
+            trainer.fit(
+                model, datamodule=data_module, ckpt_path=self.engine.resume
+            )  # type: ignore
+        end_time = time.time()
+        train_time = int(end_time - start_time)
+        log_info(f"Finished training in {seconds_to_str(train_time)}.")
+
+        # Write train_time.txt
+        train_time_path = self.run_dir / "train_time.txt"
+        with open(train_time_path, "w") as f:
+            f.write(str(train_time) + "\n")
+
+    def _validate(
+        self, trainer: Trainer, model: LightningModule, data_module: LightningDataModule
+    ) -> _EVALUATE_OUTPUT:
+        score = trainer.validate(model, datamodule=data_module)
+        return score
+
+    def _test(
+        self,
+        tester: Trainer,
+        model: LightningModule,
+        data_module: LightningDataModule,
+        ckpt: Optional[Path] = None,
+    ) -> _EVALUATE_OUTPUT:
+        ckpt_path = self.engine.resume if ckpt is None else ckpt
+        mode = "final" if ckpt_path is None or ckpt_path.stem.startswith("last") else "best"  # type: ignore
+        log_info(f"Testing {mode} checkpoint...")
+        score = tester.test(model, datamodule=data_module, ckpt_path=ckpt_path)  # type: ignore
+        write_results(score, self.run_dir / f"results_{mode}_model.json")
+        return score
+
+    def export_config(self):
+        with open(self.run_dir / "config.yaml", "w", encoding="utf8") as f:
+            d = path_to_str_inside_dict(self._config)
+            d = convert_type_inside_dict(d, EndlessList, list)
+            yaml.safe_dump(d, f)
+
+    def export_config_dict(self) -> dict[str, Any]:
+        d = path_to_str_inside_dict(self._config)
+        d = convert_type_inside_dict(d, EndlessList, list)
+        return d
+
+    def get_config(self) -> AttributeDict:
+        return AttributeDict(self._config)
+
+    def get_optimizer(self) -> Optimizer:
+        return Optimizer(self.optimizer)
+
+    def get_task(self) -> tuple[TaskModel, TaskDataModule]:
+        task_module = import_task(self.task.name)
+        return task_module.get_task(self.get_optimizer(), self.task)
+
+    def get_datamodule(self) -> TaskDataModule:
+        task_module = import_task(self.task.name)
+        return task_module.get_datamodule(self.task)
+
+    def get_callbacks(self) -> list[Callback]:
+        if len(self._callbacks) < 1:
+            self._init_callbacks()
+        return list(self._callbacks.values())
+
+    def get_loggers(self) -> list[Logger]:
+        return [
+            TensorBoardLogger(save_dir=self.run_dir, name="tb_logs"),
+            CSVLogger(save_dir=self.run_dir, name="csv_logs"),
+        ]
+
+    def get_trainer(self) -> Trainer:
+        return Trainer(
+            max_steps=self.engine.max_steps,
+            logger=self.get_loggers(),
+            callbacks=self.get_callbacks(),
+            devices=self.engine.devices,
+            strategy=trainer_strategy(self.engine.devices),
+            enable_progress_bar=(not self.engine.silent),
+            deterministic=self.engine.deterministic,
+            detect_anomaly=self.engine.detect_anomaly,
+            gradient_clip_val=self.engine.gradient_clip_val,
+            gradient_clip_algorithm=self.engine.gradient_clip_alg,
+            precision=precision_with_fallback(self.engine.precision),  # type: ignore
+            accelerator=self.engine.accelerator,
+            log_every_n_steps=self.engine.logging_inteval,
+        )
+
+    def get_tester(self) -> Trainer:
+        return Trainer(
+            devices=1,
+            logger=False,
+            enable_progress_bar=(not self.engine.silent),
+            deterministic=self.engine.deterministic,
+            precision=precision_with_fallback(self.engine.precision),  # type: ignore
+            accelerator=self.engine.accelerator,
+        )
+
+    def get_best_checkpoint(self) -> Optional[Path]:
+        model_checkpoint = self._callbacks.get("best_model_checkpoint", None)
+        if model_checkpoint is not None:
+            model_checkpoint = Path(model_checkpoint.best_model_path)
+            model_checkpoint = (
+                model_checkpoint if not model_checkpoint.is_dir() else None
+            )
+        if model_checkpoint is None:
+            available_checkpoints = self.get_available_checkpoints()
+            model_checkpoint = findfirst(
+                lambda x: x.stem.startswith("best"), available_checkpoints
+            )
+        return model_checkpoint
+
+    def get_available_checkpoints(self) -> list[Path]:
+        if self.checkpoint_dir.exists():
+            return list(
+                filter(lambda x: x.suffix == ".ckpt", self.checkpoint_dir.iterdir())
+            )
+        return []
+
+    def ensure_max_steps(self):
+        """
+        Ensures that `self.task.max_steps` is calculated and set correctly.
+        """
+        if self.task.max_steps is None:
+            max_steps = self._calc_max_steps()
+            self._config[self.task_key]["max_steps"] = max_steps
+            if self._default_config[self.task_key]["max_steps"] is None:
+                self._default_config[self.task_key]["max_steps"] = max_steps
+            self._generate_configs()
+            log_info(
+                f"'max_steps' not set explicitly, using {max_steps=} (calculated from "
+                + f"max_epochs={self.task.max_epochs}, batch_size={self.task.batch_size}, "
+                + f"devices={self.engine.devices})"
+            )
+
+    def _ensure_resume_path(self):
+        """
+        Ensures that `self.engine.resume` is either a valid Path or None.
+        """
+        if isinstance(self.engine.resume, Path):
+            pass
+        elif isinstance(self.engine.resume, bool):
+            resume_path = None
+            if self.engine.resume:
+                available_checkpoints = self.get_available_checkpoints()
+                if len(available_checkpoints) < 1:
+                    log_warn(
+                        "engine.resume=True but no checkpoint was found. Starting run from scratch."
+                    )
+                else:
+                    resume_path = findfirst(
+                        lambda x: x.stem == "last", available_checkpoints
+                    )
+            self._config[self.engine_key]["resume"] = resume_path
+            self._generate_configs()
+        else:
+            raise TypeError(
+                f"Unsupportet type for 'resume', got {type(self.engine.resume)=}."
+            )
+
+    def _calc_max_steps(self) -> int:
+        dm = self.get_datamodule()
+        dm.setup("fit")
+        train_samples = len(dm.data_train)
+        return calculate_steps(
+            self.task.max_epochs,
+            train_samples,
+            self.engine.devices,
+            self.task.batch_size,
+        )
+
+    def _init_callbacks(self):
+        self._callbacks["optimizer_time"] = OptimizerTime()
+        self._callbacks["best_model_checkpoint"] = ModelCheckpoint(
+            dirpath=self.checkpoint_dir,
+            filename="best-{epoch}-{step}",
+            monitor=self.task.target_metric,
+            mode=self.task.target_metric_mode,
+        )
+        self._callbacks["model_checkpoint"] = ModelCheckpoint(
+            dirpath=self.checkpoint_dir,
+            enable_version_counter=False,
+            every_n_epochs=1,
+            save_last=True,
+        )
+        if self.engine.early_stopping is not None:
+            self._callbacks["early_stopping"] = EarlyStopping(
+                monitor=self.engine.early_stopping_metric,
+                mode=self.task.target_metric_mode,
+                patience=self.engine.early_stopping,
+                check_finite=self.engine.check_finite,
+                log_rank_zero_only=True,
+            )
+        self._callbacks["lr_monitor"] = LearningRateMonitor(
+            logging_interval=self.optimizer.lr_interval
+        )
+        if self.engine.log_extra:
+            self._callbacks["extra"] = LogTrainingStats(
+                log_every_n_steps=self.engine.logging_inteval,
+                **(
+                    self.engine.log_extra
+                    if isinstance(self.engine.log_extra, dict)
+                    else {}
+                ),
+            )
+        self._callbacks["print_epoch"] = PrintEpochWithTime(self.engine.silent)
+        if self.engine.restrict_train_epochs is not None:
+            self._callbacks["restrict_train_epochs"] = RestrictTrainEpochs(
+                self.engine.restrict_train_epochs
+            )
+        # TODO: callback for logging time per step
+
+    def outpath_exclude_keys(self) -> list[str]:
+        return [self.eval_key, "output_dir_name"]
+
+    def _set_outpath(self):
+        base: Path = (
+            self.engine.output_dir
+            / self.task.output_dir_name
+            / self.optimizer.output_dir_name
+        )
+        exclude_keys = self.outpath_exclude_keys()
+        exclude_keys += self.engine.outpath_irrelevant_engine_keys()
+        diffs = concatenate_dict_keys(
+            dict_differences(self._config, self._default_config),
+            exclude_keys=exclude_keys,
+        )
+        run_dir = (
+            ",".join(f"{k}={str(v)}" for k, v in sorted(diffs.items()))
+            if diffs
+            else "default"
+        )
+        if len(run_dir) > 254:  # max file name length
+            hashdir = hashlib.md5(run_dir.encode()).hexdigest()
+            log_info(f"folder name {run_dir} is too long, using {hashdir} instead.")
+            run_dir = hashdir
+        self.run_dir = base / run_dir
+        self.checkpoint_dir = self.run_dir / "checkpoints"
+
+    def _generate_configs(self):
+        self.engine = EngineConfig(self._config, self.task_key, self.engine_key)
+        self.optimizer = OptimizerConfig(
+            self._config, self.optimizer_key, self.task_key, self.identifier_key
+        )
+        self.task = TaskConfig(
+            self._config, self.task_key, self.engine_key, self.identifier_key
+        )
+        self.evaluation = EvalConfig(
+            self._config,
+            eval_key=self.eval_key,
+            engine_key=self.engine_key,
+            ignore_keys=self.engine.outpath_irrelevant_engine_keys(
+                prefix=f"{self.engine_key}."
+            )
+            + [
+                f"{self.optimizer_key}.output_dir_name",
+                f"{self.task_key}.output_dir_name",
+            ],
+        )
--- a/environments/community/pytorch_optimizer_coding/FOB/pytorch_fob/engine/run_schedulers.py
+++ b/environments/community/pytorch_optimizer_coding/FOB/pytorch_fob/engine/run_schedulers.py
@ -0,0 +1,239 @@
+import traceback
+from pathlib import Path
+from tempfile import TemporaryDirectory
+from typing import Any, Iterable, Optional, Sequence
+
+import yaml
+from pytorch_fob.engine.run import Run
+from pytorch_fob.engine.slurm import Slurm
+from pytorch_fob.engine.utils import (
+    log_info,
+    log_warn,
+    seconds_to_str,
+    some,
+    str_to_seconds,
+)
+
+FOB_RUN_SCRIPT = "pytorch_fob.run_experiment"
+FOB_EVAL_SCRIPT = "pytorch_fob.evaluate_experiment"
+
+
+def argcheck_allequal_engine(
+    runs: list[Run], keys: list[str], reason: str = "'engine.run_scheduler=slurm_array'"
+) -> None:
+    ok = True
+    first = runs[0]
+    for key in keys:
+        if not all(run.engine[key] == first.engine[key] for run in runs[1:]):
+            ok = False
+            break
+    if not ok:
+        req = ", ".join(map(lambda s: "engine." + s, keys))
+        raise ValueError(
+            f"All runs must have the same values for {req} when using {reason}"
+        )
+
+
+def export_experiment(run: Run, experiment: dict[str, Any]) -> Path:
+    run.run_dir.mkdir(parents=True, exist_ok=True)
+    outfile = run.run_dir / "experiment.yaml"
+    with open(outfile, "w", encoding="utf8") as f:
+        yaml.safe_dump(experiment, f)
+    return outfile
+
+
+def process_args(args: dict[str, str], run: Run) -> None:
+    if "time" in args:
+        time = args["time"]
+        seconds = str_to_seconds(time) if isinstance(time, str) else time
+        args["time"] = seconds_to_str(int(run.engine.sbatch_time_factor * seconds))
+    if "gres" not in args and "gpus" not in args:
+        args["gres"] = f"gpu:{run.engine.devices}"
+    if not any(k.startswith("ntasks") for k in args):
+        args["ntasks-per-node"] = str(run.engine.devices)
+    if not any(k.startswith("cpus") for k in args):
+        args["cpus-per-task"] = str(run.engine.workers)
+
+
+def wrap_template(
+    template_path: Optional[Path], command: str, placeholder: str = "__FOB_COMMAND__"
+) -> str:
+    if template_path is not None:
+        with open(template_path, "r", encoding="utf8") as f:
+            template = f.read()
+            if placeholder in template:
+                command = template.replace(placeholder, command)
+            else:
+                command = f"{template}\n{command}\n"
+    return command
+
+
+def get_command(experiment_file: Path, index: Optional[str], plot: bool) -> str:
+    run_script = FOB_EVAL_SCRIPT if plot else FOB_RUN_SCRIPT
+    disable_plot = "" if plot else "engine.plot=false"
+    scheduler = "" if index is None else f"engine.run_scheduler=single:{index}"
+    return (
+        f"""srun python -m {run_script} {experiment_file} {scheduler} {disable_plot}"""
+    )
+
+
+def get_job_name(run: Run) -> str:
+    return f"FOB-{run.task.name}-{run.optimizer.name}"
+
+
+def get_slurm(
+    job_name: str, args: dict[str, str], log_dir: Path, scripts_dir: Path
+) -> Slurm:
+    return Slurm(
+        job_name,
+        args,
+        log_dir=str(log_dir.resolve()),
+        scripts_dir=str(scripts_dir.resolve()),
+        bash_strict=False,  # TODO: maybe add arg or just remove 'nounset'
+    )
+
+
+def run_slurm(
+    job_name: str,
+    command: str,
+    args: dict[str, str],
+    log_dir: Path,
+    save_sbatch_scripts: Optional[Path] = None,
+    dependencies: Sequence[int] = tuple(),
+    dependency_type: str = "afterok",
+) -> Optional[int]:
+    if save_sbatch_scripts is None:
+        with TemporaryDirectory() as tmpdir:
+            s = get_slurm(job_name, args, log_dir, scripts_dir=Path(tmpdir).resolve())
+            return s.run(
+                command,
+                name_addition="",
+                depends_on=dependencies,
+                dependency_type=dependency_type,
+            )
+    else:
+        s = get_slurm(job_name, args, log_dir, scripts_dir=save_sbatch_scripts)
+        return s.run(
+            command,
+            name_addition="",
+            depends_on=dependencies,
+            dependency_type=dependency_type,
+        )
+
+
+def run_plotting_job(
+    experiment_file: Path,
+    args: dict[str, str],
+    log_dir: Path,
+    dependencies: Sequence[int],
+    template: Optional[Path] = None,
+) -> None:
+    args["time"] = seconds_to_str(300)  # 5 minutes should be plenty of time to plot
+    args.pop("array", None)
+    # no gpus needed for plotting
+    args.pop("gpus", None)
+    args.pop("gres", None)
+    # just one cpu per node for plotting
+    remove_keys = [
+        k for k in args.keys() if k.startswith("ntasks") or k.startswith("cpus")
+    ]
+    for k in remove_keys:
+        args.pop(k)
+    args["nodes"] = "1"
+    args["ntasks-per-node"] = "1"
+    args["cpus-per-task"] = "2"
+    command = get_command(experiment_file, None, plot=True)
+    command = wrap_template(template, command)
+    run_slurm(
+        "FOB-plot",
+        command,
+        args,
+        log_dir,
+        dependencies=dependencies,
+        dependency_type="afterany",
+    )
+
+
+def slurm_array(runs: list[Run], experiment: dict[str, Any]) -> None:
+    equal_req = [
+        "devices",
+        "workers",
+        "sbatch_args",
+        "slurm_log_dir",
+        "sbatch_script_template",
+        "run_scheduler",
+    ]
+    argcheck_allequal_engine(runs, equal_req)
+    run = runs[0]  # all runs have the same args
+    args = run.engine.sbatch_args
+    log_dir = some(
+        run.engine.slurm_log_dir, default=run.engine.output_dir / "slurm_logs"
+    )
+    if "array" not in args:
+        args["array"] = f"1-{len(runs)}"
+    process_args(args, run)
+    experiment_file = [export_experiment(run, experiment).resolve() for run in runs][0]
+    command = get_command(experiment_file, "$SLURM_ARRAY_TASK_ID", plot=False)
+    command = wrap_template(run.engine.sbatch_script_template, command)
+    job_id = run_slurm(
+        get_job_name(run),
+        command,
+        args,
+        log_dir,
+        save_sbatch_scripts=run.engine.save_sbatch_scripts,
+    )
+    if job_id is not None and run.engine.plot:
+        run_plotting_job(
+            experiment_file,
+            args,
+            log_dir,
+            [job_id],
+            template=run.engine.sbatch_script_template,
+        )
+
+
+def slurm_jobs(runs: list[Run], experiment: dict[str, Any]) -> list[int]:
+    job_ids = []
+    experiment_file = Path()
+    for i, run in enumerate(runs, start=1):
+        args = run.engine.sbatch_args
+        process_args(args, run)
+        log_dir = some(run.engine.slurm_log_dir, default=run.run_dir / "slurm_logs")
+        experiment_file = export_experiment(run, experiment).resolve()
+        command = get_command(experiment_file, str(i), plot=False)
+        command = wrap_template(run.engine.sbatch_script_template, command)
+        job_id = run_slurm(
+            get_job_name(run),
+            command,
+            args,
+            log_dir,
+            save_sbatch_scripts=run.engine.save_sbatch_scripts,
+        )
+        if job_id is not None:
+            job_ids.append(job_id)
+    if len(job_ids) > 0 and any(map(lambda r: r.engine.plot, runs)):
+        equal_req = ["slurm_log_dir", "sbatch_script_template"]
+        argcheck_allequal_engine(
+            runs,
+            equal_req,
+            reason="'engine.plot=true' with 'engine.run_scheduler=slurm_jobs'",
+        )
+        run_plotting_job(
+            experiment_file,
+            args,
+            log_dir,
+            job_ids,
+            template=runs[0].engine.sbatch_script_template,
+        )
+    return job_ids
+
+
+def sequential(runs: Iterable[Run], n_runs: int, experiment: dict[str, Any]):
+    for i, run in enumerate(runs, start=1):
+        log_info(f"Starting run {i}/{n_runs}.")
+        export_experiment(run, experiment)
+        try:
+            run.start()
+        except RuntimeError:  # detect_anomaly raises RuntimeError
+            t = traceback.format_exc()
+            log_warn(f"Run {i}/{n_runs} failed with {t}.")
--- a/environments/community/pytorch_optimizer_coding/FOB/pytorch_fob/engine/slurm.py
+++ b/environments/community/pytorch_optimizer_coding/FOB/pytorch_fob/engine/slurm.py
@ -0,0 +1,196 @@
+"""
+The MIT License (MIT)
+
+Copyright (c) 2015 Brent Pedersen - Bioinformatics
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+Adapted from https://github.com/brentp/slurmpy
+
+"""
+
+from __future__ import print_function
+
+import atexit
+import datetime
+import hashlib
+import os
+import subprocess
+import sys
+import tempfile
+from typing import Optional, Sequence
+
+TMPL = """\
+#!/bin/bash
+
+#SBATCH -e {log_dir}/{name}.%J.err
+#SBATCH -o {log_dir}/{name}.%J.out
+#SBATCH -J {name}
+
+{header}
+
+{bash_setup}
+
+__script__"""
+
+
+def tmp(suffix=".sh"):
+    t = tempfile.mktemp(suffix=suffix)
+    atexit.register(os.unlink, t)
+    return t
+
+
+class Slurm(object):
+    def __init__(
+        self,
+        name,
+        slurm_kwargs=None,
+        tmpl=None,
+        date_in_name=True,
+        scripts_dir="slurm-scripts",
+        log_dir="logs",
+        bash_strict=True,
+    ):
+        if slurm_kwargs is None:
+            slurm_kwargs = {}
+        if tmpl is None:
+            tmpl = TMPL
+        self.log_dir = log_dir
+        self.bash_strict = bash_strict
+
+        header = []
+        if "time" not in slurm_kwargs.keys():
+            slurm_kwargs["time"] = "84:00:00"
+        for k, v in slurm_kwargs.items():
+            if len(k) > 1:
+                k = "--" + k + "="
+            else:
+                k = "-" + k + " "
+            header.append(f"#SBATCH {k}{v}")
+
+        # add bash setup list to collect bash script config
+        bash_setup = []
+        if bash_strict:
+            bash_setup.append("set -eo pipefail -o nounset")
+
+        self.header = "\n".join(header)
+        self.bash_setup = "\n".join(bash_setup)
+        self.name = "".join(
+            x for x in name.replace(" ", "-") if x.isalnum() or x == "-"
+        )
+        self.tmpl = tmpl
+        self.slurm_kwargs = slurm_kwargs
+        if scripts_dir is not None:
+            self.scripts_dir = os.path.abspath(scripts_dir)
+        else:
+            self.scripts_dir = None
+        self.date_in_name = bool(date_in_name)
+
+    def __str__(self):
+        return self.tmpl.format(
+            name=self.name,
+            header=self.header,
+            log_dir=self.log_dir,
+            bash_setup=self.bash_setup,
+        )
+
+    def _tmpfile(self):
+        if self.scripts_dir is None:
+            return tmp()
+        else:
+            for _dir in [self.scripts_dir, self.log_dir]:
+                if not os.path.exists(_dir):
+                    os.makedirs(_dir)
+            return f"{self.scripts_dir}/{self.name}.sh"
+
+    def run(
+        self,
+        command: str,
+        name_addition: Optional[str] = None,
+        cmd_kwargs: Optional[dict[str, str]] = None,
+        _cmd: str = "sbatch",
+        tries: int = 1,
+        depends_on: Optional[Sequence[int]] = None,
+        dependency_type: str = "afterok",
+    ) -> Optional[int]:
+        """
+        command: a bash command that you want to run
+        name_addition: if not specified, the sha1 of the command to run
+                       appended to job name. if it is "date", the yyyy-mm-dd
+                       date will be added to the job name.
+        cmd_kwargs: dict of extra arguments to fill in command
+                   (so command itself can be a template).
+        _cmd: submit command (change to "bash" for testing).
+        tries: try to run a job either this many times or until the first
+               success.
+        depends_on: job ids that this depends on before it is run
+        dependency_type: after, afterok, afterany, afternotok
+        """
+        if name_addition is None:
+            name_addition = hashlib.sha1(command.encode("utf-8")).hexdigest()
+
+        if self.date_in_name:
+            name_addition += "-" + str(datetime.date.today())
+        name_addition = name_addition.strip(" -")
+
+        if cmd_kwargs is None:
+            cmd_kwargs = {}
+
+        n = self.name
+        self.name = self.name.strip(" -")
+        self.name += "-" + name_addition.strip(" -")
+        args = []
+        for k, v in cmd_kwargs.items():
+            args.append(f"export {k}={v}")
+        args = "\n".join(args)
+
+        tmpl = str(self).replace("__script__", args + "\n###\n" + command)
+        if depends_on is None or (len(depends_on) == 1 and depends_on[0] is None):
+            depends_on = []
+
+        with open(self._tmpfile(), "w", encoding="utf8") as sh:
+            sh.write(tmpl)
+
+        job_id = None
+        for itry in range(1, tries + 1):
+            args = [_cmd]
+            if depends_on is not None and len(depends_on) > 0:
+                dep = f"--dependency={dependency_type}:" + ":".join(
+                    [str(x) for x in depends_on]
+                )
+                args.append(dep)
+            if itry > 1:
+                mid = f"--dependency=afternotok:{job_id}"
+                args.append(mid)
+            args.append(sh.name)
+            res = subprocess.check_output(args).strip()
+            print(res.decode(), file=sys.stderr)
+            self.name = n
+            if not res.startswith(b"Submitted batch"):
+                return None
+            j_id = int(res.split()[-1])
+            if itry == 1:
+                job_id = j_id
+        return job_id
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod()
--- a/environments/community/pytorch_optimizer_coding/FOB/pytorch_fob/engine/utils.py
+++ b/environments/community/pytorch_optimizer_coding/FOB/pytorch_fob/engine/utils.py
@ -0,0 +1,236 @@
+import json
+import logging
+import math
+import signal
+from pathlib import Path
+from typing import Any, Callable, Iterable, Optional, Type
+
+import torch
+from lightning_utilities.core.rank_zero import (
+    log,
+    rank_zero_debug,
+    rank_zero_info,
+    rank_zero_only,
+)
+
+
+def set_loglevel(level: str):
+    pytorch_logger = logging.getLogger("lightning.pytorch")
+    match level:
+        case "debug":
+            pytorch_logger.setLevel(logging.DEBUG)
+        case "info":
+            pytorch_logger.setLevel(logging.INFO)
+        case "warn":
+            pytorch_logger.setLevel(logging.WARNING)
+        case "error":
+            pytorch_logger.setLevel(logging.ERROR)
+        case "silent":
+            pytorch_logger.setLevel(logging.CRITICAL)
+
+
+@rank_zero_only
+def rank_zero_print(*args: Any, **kwargs: Any):
+    return print(*args, **kwargs)
+
+
+@rank_zero_only
+def log_warn(msg: str, *args: Any, prefix: str = "[FOB WARNING] ", **kwargs: Any):
+    return log.warning(f"{prefix}{msg}", *args, **kwargs)
+
+
+def log_info(msg: str, *args: Any, prefix: str = "[FOB INFO] ", **kwargs: Any):
+    return rank_zero_info(f"{prefix}{msg}", *args, **kwargs)
+
+
+def log_debug(msg: str, *args: Any, prefix: str = "[FOB DEBUG] ", **kwargs: Any):
+    return rank_zero_debug(f"{prefix}{msg}", *args, **kwargs)
+
+
+def write_results(results, filepath: Path):
+    with open(filepath, "w", encoding="utf8") as f:
+        json.dump(results, f, indent=4)
+    print(f"Saved results into {filepath}.")
+
+
+def wrap_list(x: Any) -> list[Any]:
+    if isinstance(x, list):
+        return x
+    return [x]
+
+
+def calculate_steps(epochs: int, datapoints: int, devices: int, batch_size: int) -> int:
+    return math.ceil(datapoints / batch_size / devices) * epochs
+
+
+def some(*args, default):
+    """
+    returns the first argument that is not None or default.
+    """
+    if len(args) < 1:
+        return default
+    first, *rest = args
+    if first is not None:
+        return first
+    return some(*rest, default=default)
+
+
+def maybe_abspath(path: Optional[str | Path]) -> Optional[Path]:
+    if path is None:
+        return None
+    return Path(path).resolve()
+
+
+def findfirst(f: Callable, xs: Iterable):
+    for x in xs:
+        if f(x):
+            return x
+    return None
+
+
+def trainer_strategy(devices: int | list[int] | str) -> str:
+    if isinstance(devices, str):
+        return "auto"
+    ndevices = devices if isinstance(devices, int) else len(devices)
+    return "ddp" if ndevices > 1 else "auto"
+
+
+def gpu_suited_for_compile():
+    if torch.cuda.is_available():
+        device_cap = torch.cuda.get_device_capability()
+        return device_cap in ((7, 0), (8, 0), (9, 0))
+
+
+def precision_with_fallback(precision: str) -> str:
+    """
+    Check if cuda supports bf16, if not using cuda or if not available return 16 instead of bf16
+    """
+    if not torch.cuda.is_available():
+        log_warn("Warning: No CUDA available. Results can be different!")
+        return precision[2:]
+    if precision.startswith("bf") and not torch.cuda.is_bf16_supported():
+        log_warn("Warning: GPU does not support bfloat16. Results can be different!")
+        return precision[2:]
+    return precision
+
+
+def str_to_seconds(s: str) -> int:
+    parts = s.split(":")
+    assert len(parts) == 3, f"Invalid time format: {s}. Use 'HH:MM:SS'."
+    return int(parts[0]) * 3600 + int(parts[1]) * 60 + int(parts[2])
+
+
+def seconds_to_str(total_seconds: int, sep: str = ":") -> str:
+    hours, rest = divmod(total_seconds, 3600)
+    minutes, seconds = divmod(rest, 60)
+    return sep.join(map(lambda x: str(x).zfill(2), [hours, minutes, seconds]))
+
+
+def begin_timeout(delay=10, show_threads=False):
+    if show_threads:
+        import sys
+        import threading
+        import traceback
+
+        thread_names = {t.ident: t.name for t in threading.enumerate()}
+        for thread_id, frame in sys._current_frames().items():
+            print(f"Thread {thread_names.get(thread_id, thread_id)}:")
+            traceback.print_stack(frame)
+            print()
+    signal.alarm(delay)  # Timeout after 10 seconds
+
+
+def path_to_str_inside_dict(d: dict) -> dict:
+    return convert_type_inside_dict(d, Path, str)
+
+
+def convert_type_inside_dict(d: dict, src: Type, tgt: Type) -> dict:
+    ret = {}
+    for k, v in d.items():
+        if isinstance(v, dict):
+            v = convert_type_inside_dict(v, src, tgt)
+        if isinstance(v, src):
+            ret[k] = tgt(v)
+        else:
+            ret[k] = v
+    return ret
+
+
+def dict_differences(custom: dict[str, Any], default: dict[str, Any]) -> dict[str, Any]:
+    """
+    Recursively returns a dictionary with the items in `custom` that are different or missing from `default`.
+
+    Example:
+    >>> dict_differences({"hi": 3, "bla": {"a": 2, "b": 2}}, {"hi": 2, "bla": {"a": 1, "b": 2}})
+    {'hi': 3, 'bla': {'a': 2}}
+    """
+    diff: dict[str, Any] = {}
+    for key, value in custom.items():
+        if key in default:
+            default_value = default[key]
+            if default_value == value:
+                continue
+            if isinstance(value, dict) and isinstance(default_value, dict):
+                diff[key] = dict_differences(value, default_value)
+            else:
+                diff[key] = value
+        else:
+            diff[key] = value
+    return diff
+
+
+def concatenate_dict_keys(
+    d: dict[str, Any],
+    parent_key: str = "",
+    sep: str = ".",
+    exclude_keys: Iterable[str] = tuple(),
+) -> dict[str, Any]:
+    """
+    Example:
+    >>> concatenate_dict_keys({ "A": { "B": { "C": 1, "D": 2 }, "E": { "F": 3 } } })
+    {'A.B.C': 1, 'A.B.D': 2, 'A.E.F': 3}
+    >>> concatenate_dict_keys({ "A": { "B": { "C": 1, "D": 2 }, "E": { "F": 3 } } }, exclude_keys=["B"])
+    {'A.E.F': 3}
+    """
+    result = {}
+    for k, v in d.items():
+        if k in exclude_keys:
+            continue
+        new_key = f"{parent_key}{sep}{k}" if parent_key else k
+        if isinstance(v, dict):
+            nested_result = concatenate_dict_keys(v, new_key, sep, exclude_keys)
+            result.update(nested_result)
+        else:
+            result[new_key] = v
+    return result
+
+
+def sort_dict_recursively(d: dict) -> dict:
+    sorted_dict = {}
+    for k, v in sorted(d.items()):
+        if isinstance(v, dict):
+            sorted_dict[k] = sort_dict_recursively(v)
+        else:
+            sorted_dict[k] = v
+    return sorted_dict
+
+
+class EndlessList(list):
+    """
+    Returns first element if out of bounds. Otherwise same as list.
+    """
+
+    def __getitem__(self, index):
+        if index >= len(self) and len(self) > 0:
+            return self[0]
+        return super().__getitem__(index)
+
+
+class AttributeDict(dict):
+
+    def __getattribute__(self, key: str) -> Any:
+        try:
+            return super().__getattribute__(key)
+        except AttributeError:
+            pass
+        return super().__getitem__(key)
--- a/environments/community/pytorch_optimizer_coding/FOB/pytorch_fob/evaluate_experiment.py
+++ b/environments/community/pytorch_optimizer_coding/FOB/pytorch_fob/evaluate_experiment.py
@ -0,0 +1,18 @@
+import argparse
+from pathlib import Path
+
+from pytorch_fob.engine.engine import Engine
+
+if __name__ == "__main__":
+
+    # parsing
+    parser = argparse.ArgumentParser(
+        description="Create a heatmap plot of benchmarking results."
+    )
+    parser.add_argument("settings", type=Path, help="Path to the experiment yaml file.")
+    args, extra_args = parser.parse_known_args()
+    if not any(arg.startswith("engine.plot") for arg in extra_args):
+        extra_args += ["engine.plot=true"]
+    engine = Engine()
+    engine.parse_experiment_from_file(args.settings, extra_args=extra_args)
+    engine.plot()
--- a/environments/community/pytorch_optimizer_coding/FOB/pytorch_fob/evaluation/README.md
+++ b/environments/community/pytorch_optimizer_coding/FOB/pytorch_fob/evaluation/README.md
@ -0,0 +1,131 @@
+# Evaluation
+
+During training you can monitor your experiments with [Tensorboard](https://www.tensorflow.org/tensorboard).
+We also try to provide some useful functionality to quickly evaluate and compare the results of your experiments.
+
+One can use the ```evaluate_experiment.py``` to get a quick first impression of a finished experiment run.
+
+## Plotting vs. raw data
+
+You can use the plotting pipeline with your customized setting (as shown in the usage examples).
+Alternatively you can use the script to export your data to a .csv and process the data to your own needs.
+
+In this scenario, set ```evaluation.output_types: [csv]  # no plotting, just the data``` in your experiment yaml.
+
+## Usage Examples
+
+In the following you can find 4 example use cases for experiments and how to visualize the results as heatmaps.
+
+1. testing an optimizer on a task
+2. comparing two optimizers on the same task
+3. comparing multiple optimizers on different tasks
+4. comparing the influence of a single hyperparameter
+
+Here we want to focus on the plotting. For instructions on how to run experiments, refer to the main [README](../../README.md). To get started right away, we provide the data for this example. If you want to reproduce it, refer to [this section](#reproducing-the-data).
+
+### Plotting the experiment
+
+By default, calling the `run_experiment.py` will plot the experiment after training and testing. To disable, set `engine.plot=false`.
+To plot your experiment afterwards, call the `evaluate_experiment.py` with the same experiment yaml. To adjust how to plot, change the values under the `evaluation` key of the experiment. Take a look at the [evaluation/default.yaml](default.yaml) to see which settings are available. Some of these keys are explained in the examples below to give the reader a first impression. Note that some default parameters are set in the respective tasks (e.g. in [tasks/mnist/default.yaml](../tasks/mnist/default.yaml)).
+
+### Example use cases
+
+Here are some example scenarios to give you an understanding of how our plotting works. Run the commands from the root of the repository. Take a look at the yaml files used in the command to see what is going on.
+
+#### Example 1
+
+This example is a good starting point; it shows the performance of a single default optimizer on one of the tasks.
+Experiment file: [examples/plotting/1_mnist-adamw.yaml](../../examples/plotting/1_mnist-adamw.yaml)
+
+```python -m pytorch_fob.evaluate_experiment examples/plotting/1_mnist-adamw.yaml```
+
+![your plot is not finished yet](../../examples/plotting/1_mnist-adamw-last-heatmap.png)
+
+This example uses only the final model performance and only creates the plot as png.
+
+Helpful settings:
+
+- ```checkpoints: [last]```  # you could use [last, best] to additionaly plot the model with the best validation
+- ```output_types: [png]```  # you could use [pdf, png] to also create a pdf
+
+
+#### Example 2
+
+You can compare two different optimizers.
+Experiment file: [examples/plotting/2_adamw-vs-sgd.yaml](../../examples/plotting/2_adamw-vs-sgd.yaml)
+
+```python -m pytorch_fob.evaluate_experiment examples/plotting/2_adamw-vs-sgd.yaml```
+
+![your plot is not finished yet](../../examples/plotting/2_adamw-vs-sgd-last-heatmap.png)
+
+Helpful settings:
+
+- ```plot.x_axis: [optimizer.weight_decay, optimizer.kappa_init_param]```  # the values given here are used as the value for the axis. The order in the list is used from left to right for the plot columns
+- `column_split_key: optimizer.name` This creates a column for each different optimizer (default behavior). You can set this to null to disable columns or choose a different key.
+
+
+#### Example 3
+
+There are multiple tasks in the benchmark, this example shows how to get a quick overview over multiple at the same time.
+Experiment file: [examples/plotting/3_mnist-and-tabular_adamw-vs-sgd.yaml](../../examples/plotting/3_mnist-and-tabular_adamw-vs-sgd.yaml)
+
+```python -m pytorch_fob.evaluate_experiment examples/plotting/3_mnist-and-tabular_adamw-vs-sgd.yaml```
+
+![your plot is not finished yet](../../examples/plotting/3_mnist-and-tabular_adamw-vs-sgd-last-heatmap.png)
+
+Helpful settings:
+
+ - ```split_groups: ["task.name"]```
+
+Every non unique value for each parameter name in `split_groups` will create its own subplot.
+Instead of a list you can set to `false` to disable splitting or `true` to split on every parameter that is different between runs (except those already in `column_split_key` or `aggregate_groups`).
+This list is useful if there are just a few parameters you want to split.
+
+#### Example 4
+
+Any parameter that is neither on the x-axis nor y-axis will either be aggregated over or split into subplots.
+Any individual square of a heatmap shows the *mean* and *std* over multiple runs (as seen in the previous plots). Here we show how to choose the runs to aggregate.
+Experiment file: [examples/plotting/4_adamw-vs-sgd_seeds.yaml](../../examples/plotting/4_adamw-vs-sgd_seeds.yaml)
+
+```python -m pytorch_fob.evaluate_experiment examples/plotting/4_adamw-vs-sgd_seeds.yaml```
+
+![your plot is not finished yet](../../examples/plotting/4_adamw-vs-sgd_seeds-last-heatmap.png)
+
+Helpful settings:
+
+- Control the std with
+    - ```plot.std```  # toggle off with ```False```
+    - ```plot.aggfunc: std```  # also try ```var```
+- control the rows with
+    - ```split_groups: ["engine.seed"]```
+    - ```aggregate_groups: []```
+
+Per default the plot will display the *mean* and *std* calculated over the seeds.
+We need to remove the seed from the ```aggregate_groups``` list (by giving an empty list instead). This list is useful if there are additional parameters you want to aggregate over.
+
+
+-------------------------------------------------------------------------------
+
+### Reproducing the Data
+
+Lets create some data that we can plot; from the root directory call:
+
+#### Data Download
+
+first we make sure the data is already downloaded beforehand:
+
+```python -m pytorch_fob.dataset_setup examples/plotting/3_mnist-and-tabular_adamw-vs-sgd.yaml```
+
+This will download the mnist data (required for 1-4) and tabular (required for 3) into the [examples/data](../../examples/data) directory - path can be changed in the corresponding yaml you want to use (e.g. [examples/plotting/1_mnist-adamw.yaml](../../examples/plotting/1_mnist-adamw.yaml) if you have already set up your benchmark).
+
+Estimated disk usage for the data: ~65M
+
+#### Training
+
+The 2 tasks will be run on 2x2 hyperparameter on 2 different seeds per optimizer for a total of 32 runs.
+
+```python -m pytorch_fob.run_experiment examples/plotting/3_mnist-and-tabular_adamw-vs-sgd.yaml```
+
+After training finished you should find 32 run directories in [examples/plotting/outputs](../../examples/plotting/outputs)
+
+All parameters that differ from the default value are noted in the directory name.
--- a/environments/community/pytorch_optimizer_coding/FOB/pytorch_fob/evaluation/init.py
+++ b/environments/community/pytorch_optimizer_coding/FOB/pytorch_fob/evaluation/init.py
@ -0,0 +1,5 @@
+from pathlib import Path
+
+
+def evaluation_path() -> Path:
+    return Path(__file__).resolve().parent
--- a/environments/community/pytorch_optimizer_coding/FOB/pytorch_fob/evaluation/default.yaml
+++ b/environments/community/pytorch_optimizer_coding/FOB/pytorch_fob/evaluation/default.yaml
@ -0,0 +1,54 @@
+evaluation:
+  data_dirs: null                # List of Paths
+  output_dir: null               # output filename is output_dir / experiment_name
+  experiment_name: null
+  split_groups: false            # {True, False, [param.a, param.b, ...]} create additional plots where the data is grouped by the given parameter; True to detect all params with multiple unique values
+  aggregate_groups:              # groups over which to aggregate values and compute mean/std. Default: [engine.seed]
+    - engine.seed
+  depth: 1                       # the depth of the trial dirs relative to the given data_dirs
+  checkpoints: [best, last]      # which model checkpoint to use
+  output_types: [pdf, png, csv]  # choose all you want from {csv, pdf, png} and put it in brackets
+  verbose: False                 # debug prints
+  column_split_key: optimizer.name  # if set, will split the dataframe and plot it in columns. Default: optimizer.name
+  column_split_order: null        # sets the order in which the columns are plotted.
+
+  # keeping the values on null -> automatically figure it out if possible, or let matplotlib decide
+  plot:
+    x_axis:                      # indices on x axis (same order as order of subigures given in data_dirs)
+      - optimizer.weight_decay
+    y_axis:                      # indices on y axis (same order as order of subigures given in data_dirs)
+      - optimizer.learning_rate
+    metric: null                 # is automatically chosen from task name, this will overwrite it
+    limits: null                 # sets the limits for the colormap, 2 ints, order does not matter, leave empty for automatic
+    std: True                    # show std over aggregated values
+    aggfunc: std                 # for example {std, var, sem} which function to use to aggregate over the seeds; will only be used when 'std' is set to true
+    # format:
+    #  string, how many digits to display, expects two values seperated by a dot (e.g. "2.3")
+    #  to make accuracy -> percent use a '2' in front of the dot
+    #  to display 3 digits after the decimal point, write a '3' behind the dot
+    format: null                 # for example {"2.0", "2.1", "2.3", "0.2", ...}
+    single_file: true            # if true, save all heatmaps in one file. 'split_groups' are represented as rows.
+
+  plotstyle:
+    tight_layout: True
+    text:
+      usetex: True               # you can give latex code in the yaml: $\sqrt{\pi \cdot \sigma}$ but some cluster dont have it installed# the font in the tiles of the matrix
+
+    # general font
+    font:
+      family: "serif"            # matplotlib {serif, sans-serif, cursive, fantasy, monospace}
+      size: 14
+
+    # the font in the tiles of the matrix
+    matrix_font:
+      size: 12
+
+    scale: 1.0                   # scales *figsize* argument by this value, useful for ".png"
+    color_palette: "rocket"
+    dpi: 300
+
+  # the name of the files storing the hyperparameters of the experiments and the scores
+  experiment_files:
+    best_model: results_best_model.json
+    last_model: results_final_model.json
+    config: config.yaml
--- a/environments/community/pytorch_optimizer_coding/FOB/pytorch_fob/evaluation/labels.yaml
+++ b/environments/community/pytorch_optimizer_coding/FOB/pytorch_fob/evaluation/labels.yaml
@ -0,0 +1,30 @@
+# pretty names for the plot
+names:
+  # optimizer
+  adamw_baseline: AdamW
+  sgd_baseline: SGD
+  adamcpr: AdamCPR
+  adamcpr_fast: AdamCPR
+  sgd_stepwise: SGD (stepwise)
+  # metric
+  test_acc: Test Accuracy
+  test_loss: Test Loss
+  test_mIoU: Test mean Intersection over Union
+  test_mAcc: Test mean Accuracy
+  test_rmse: Test Root Mean Square Error (RMSE)
+  test_rocauc: Test ROC-AUC
+  # parameter
+  learning_rate: Learning Rate
+  weight_decay: Weight Decay
+  kappa_init_param: Kappa Init Param
+  # tasks
+  classification: classification
+  classification_small: classification_small
+  detection: detection
+  graph: graph
+  graph_tiny: graph_tiny
+  mnist: mnist
+  segmentation: segmentation
+  tabular: tabular
+  template: template
+  translation: translation
--- a/environments/community/pytorch_optimizer_coding/FOB/pytorch_fob/evaluation/plot.py
+++ b/environments/community/pytorch_optimizer_coding/FOB/pytorch_fob/evaluation/plot.py
@ -0,0 +1,678 @@
+import json
+from itertools import repeat
+from os import PathLike
+from pathlib import Path
+from typing import List, Literal
+
+import matplotlib.pyplot as plt
+import pandas as pd
+import seaborn as sns
+from matplotlib.figure import Figure
+from pytorch_fob.engine.parser import YAMLParser
+from pytorch_fob.engine.utils import (
+    AttributeDict,
+    convert_type_inside_dict,
+    log_debug,
+    log_info,
+    log_warn,
+)
+from pytorch_fob.evaluation import evaluation_path
+
+
+def get_available_trials(dirname: Path, config: AttributeDict, depth: int = 1):
+    """finds the path for all trials in the *dirname* directory"""
+    # RECURSIVELY FIND ALL DIRS IN DIRNAME (up to depth)
+    assert isinstance(dirname, Path)
+    subdirs: list[Path] = [dirname]
+    all_results_must_be_same_depth = True
+    for _ in range(depth):
+        if all_results_must_be_same_depth:
+            new_subdirs: list[Path] = []
+            for subdir in subdirs:
+                new_subdirs += [x for x in subdir.iterdir() if x.is_dir()]
+            subdirs = new_subdirs
+        else:
+            for subdir in subdirs:
+                subdirs += [x for x in subdir.iterdir() if x.is_dir()]
+    format_str = "\n  "  # f-string expression part cannot include a backslash
+    log_debug(
+        f"found the following directories:{format_str}{format_str.join(str(i) for i in subdirs)}."
+    )
+
+    def is_trial(path: Path):
+        # here we could do additional checks to filter the subdirectories
+        # currently we only check if there is a config file
+        for x in path.iterdir():
+            found_a_config_file = x.name == config.experiment_files.config
+            if found_a_config_file:
+                return True
+        return False
+
+    subdirs = list(filter(is_trial, subdirs[::-1]))
+    log_debug(
+        f"We assume the following to be trials:{format_str}{format_str.join(str(i) for i in subdirs)}."
+    )
+    return subdirs
+
+
+def dataframe_from_trials(
+    trial_dir_paths: List[Path], config: AttributeDict
+) -> pd.DataFrame:
+    """takes result from get_available_trials and packs them in a dataframe,
+    does not filter duplicate hyperparameter settings."""
+    dfs: List[pd.DataFrame] = []
+
+    for path in trial_dir_paths:
+
+        config_file = path / config.experiment_files.config
+        if config.last_instead_of_best:
+            result_file = path / config.experiment_files.last_model
+        else:
+            result_file = path / config.experiment_files.best_model
+        all_files_exist = all([config_file.is_file(), result_file.is_file()])
+        if not all_files_exist:
+            log_warn(
+                f"WARNING: one or more files are missing in {path}. Skipping this hyperparameter setting."
+                + f"  <{config_file}>: {config_file.is_file()} and\n  <{result_file}>: {result_file.is_file()})"
+            )
+            continue
+
+        yaml_parser = YAMLParser()
+        yaml_content = yaml_parser.parse_yaml(config_file)
+        # convert the sub dicts first, then the dict itself
+        yaml_content = convert_type_inside_dict(
+            yaml_content, src=dict, tgt=AttributeDict
+        )
+        yaml_content = AttributeDict(yaml_content)
+
+        # use user given value
+        metric_of_value_to_plot = config.plot.metric
+
+        # compute it if user has not given a value
+        if not metric_of_value_to_plot:
+            raise ValueError("evaluation.plot.metric is not set")
+
+        data = pd.json_normalize(yaml_content)
+
+        with open(result_file, "r", encoding="utf8") as f:
+            content = json.load(f)
+            if metric_of_value_to_plot in content[0]:
+                data.at[0, metric_of_value_to_plot] = content[0][
+                    metric_of_value_to_plot
+                ]
+            else:
+                log_warn(f"could not find value for {metric_of_value_to_plot} in json")
+
+        dfs.append(data)
+
+    if len(dfs) == 0:
+        raise ValueError("no dataframes found, check your config")
+    df = pd.concat(dfs, sort=False)
+
+    return df
+
+
+def create_matrix_plot(
+    dataframe: pd.DataFrame,
+    config: AttributeDict,
+    cols: str,
+    idx: str,
+    ax=None,
+    cbar: bool = True,
+    vmin: None | int = None,
+    vmax: None | int = None,
+):
+    """
+    Creates one heatmap and puts it into the grid of subplots.
+    Uses pd.pivot_table() and sns.heatmap().
+    """
+    df_entry = dataframe.iloc[0]
+    metric_name = df_entry["evaluation.plot.metric"]
+
+    # CLEANING LAZY USER INPUT
+    # cols are x-axis, idx are y-axis
+    if cols not in dataframe.columns:
+        log_warn(
+            "x-axis value not present in the dataframe; did you forget to add a 'optimizer.' as a prefix?\n"
+            + f"  using '{'optimizer.' + cols}' as 'x-axis' instead."
+        )
+        cols = "optimizer." + cols
+    if idx not in dataframe.columns:
+        log_warn(
+            "y-axis value not present in the dataframe; did you forget to add a 'optimizer.' as a prefix?\n"
+            + f"  using '{'optimizer.' + idx}' as 'y-axis' instead."
+        )
+        idx = "optimizer." + idx
+    # create pivot table and format the score result
+    pivot_table = pd.pivot_table(
+        dataframe, columns=cols, index=idx, values=metric_name, aggfunc="mean"
+    )
+
+    fmt = None
+    format_string = dataframe["evaluation.plot.format"].iloc[0]
+
+    # scaline the values given by the user to fit his format needs (-> and adapting the limits)
+    value_exp_factor, decimal_points = format_string.split(".")
+    value_exp_factor = int(value_exp_factor)
+    decimal_points = int(decimal_points)
+    if vmin:
+        vmin *= 10**value_exp_factor
+    if vmax:
+        vmax *= 10**value_exp_factor
+    pivot_table = (pivot_table * (10**value_exp_factor)).round(decimal_points)
+    fmt = f".{decimal_points}f"
+
+    # up to here limits was the min and max over all dataframes,
+    # usually we want to use user values
+    if "evaluation.plot.limits" in dataframe.columns:
+        limits = dataframe["evaluation.plot.limits"].iloc[0]
+        if limits:
+            vmin = min(limits)
+            vmax = max(limits)
+            log_debug(f"setting cbar limits to {vmin}, {vmax} ")
+
+    colormap_name = config.plotstyle.color_palette
+    low_is_better = dataframe["evaluation.plot.test_metric_mode"].iloc[0] == "min"
+    if low_is_better:
+        colormap_name += "_r"  # this will "inver" / "flip" the colorbar
+    colormap = sns.color_palette(colormap_name, as_cmap=True)
+    metric_legend = pretty_name(metric_name)
+
+    # FINETUNE POSITION
+    # left bottom width height
+    # cbar_ax = fig.add_axes([0.92, 0.235, 0.02, 0.6])
+    cbar_ax = None
+
+    if not config.plot.std:
+        return sns.heatmap(
+            pivot_table,
+            ax=ax,
+            cbar_ax=cbar_ax,
+            annot=True,
+            fmt=fmt,
+            annot_kws={"fontsize": config.plotstyle.matrix_font.size},
+            cbar=cbar,
+            vmin=vmin,
+            vmax=vmax,
+            cmap=colormap,
+            cbar_kws={"label": f"{metric_legend}"},
+        )
+    else:
+        # BUILD STD TABLE
+        pivot_table_std = pd.pivot_table(
+            dataframe,
+            columns=cols,
+            index=idx,
+            values=metric_name,
+            aggfunc=config.plot.aggfunc,
+            fill_value=float("inf"),
+            dropna=False,
+        )
+        if float("inf") in pivot_table_std.values.flatten():
+            log_warn(
+                "WARNING: Not enough data to calculate the std, skipping std in plot"
+            )
+
+        pivot_table_std = (pivot_table_std * (10**value_exp_factor)).round(
+            decimal_points
+        )
+
+        annot_matrix = pivot_table.copy().astype("string")
+        for i in pivot_table.index:
+            for j in pivot_table.columns:
+                mean = pivot_table.loc[i, j]
+                std = pivot_table_std.loc[i, j]
+                std_string = f"\n±({round(std, decimal_points)})" if std != float("inf") else ""  # type: ignore
+                annot_matrix.loc[i, j] = f"{round(mean, decimal_points)}{std_string}"  # type: ignore
+
+        fmt = ""  # cannot format like before, as we do not only have a number
+
+        return sns.heatmap(
+            pivot_table,
+            ax=ax,
+            cbar_ax=cbar_ax,
+            annot=annot_matrix,
+            fmt=fmt,
+            annot_kws={"fontsize": config.plotstyle.matrix_font.size},
+            cbar=cbar,
+            vmin=vmin,
+            vmax=vmax,
+            cmap=colormap,
+            cbar_kws={"label": f"{metric_legend}"},
+        )
+
+
+def get_all_num_rows_and_their_names(dataframe_list: list[pd.DataFrame], config):
+    n_rows: list[int] = []
+    row_names: list[list[str]] = []
+    for i, df in enumerate(dataframe_list):
+        x_axis = config.plot.x_axis[i]
+        y_axis = config.plot.y_axis[0]
+        metrics = df["evaluation.plot.metric"].unique()
+        ignored_cols = [x_axis, y_axis]
+        ignored_cols += list(metrics)
+        ignored_cols += config.get("ignore_keys", [])
+        ignored_cols += config.get("aggregate_groups", [])
+        current_n_rows, current_names = get_num_rows(df, ignored_cols, config)
+        n_rows.append(current_n_rows)
+        if not current_names:  # will be empty if we have only one row
+            current_names.append("default")
+        row_names.append(current_names)
+
+    return n_rows, row_names
+
+
+def get_num_rows(
+    dataframe: pd.DataFrame, ignored_cols: list[str], config: AttributeDict
+) -> tuple[int, list[str]]:
+    """each matrix has 2 params (on for x and y each), one value, and we aggregate over seeds;
+    if there are more than than these 4 parameter with different values,
+    we want to put that in seperate rows instead of aggregating over them.
+    returning: the number of rows (atleast 1) and the names of the cols"""
+    necesarry_rows = 0
+
+    # the user might specify a value for the groups that we should split on in <split_groups>
+    whitelisted_cols: list[str] | Literal["all"] = (
+        "all"  # everything is whitelisted if this value stays 'all'
+    )
+    if isinstance(config.split_groups, list):
+        whitelisted_cols = config.split_groups[:]
+    elif config.split_groups is False:
+        whitelisted_cols = []
+
+    columns_with_non_unique_values = []
+    for col in dataframe.columns:
+        is_eval_key = col.startswith("evaluation.")
+        is_ignored = col in ignored_cols
+        is_whitelisted = whitelisted_cols == "all" or col in whitelisted_cols
+        if any([is_ignored, is_eval_key, not is_whitelisted]):
+            if is_whitelisted:
+                log_warn(
+                    f"{col} is in the whitelist, but will be ignored. "
+                    f"Probably {col} is in both 'split_groups' and 'aggregate_groups'."
+                )
+            log_debug(f"ignoring {col}")
+            continue
+        nunique = dataframe[col].nunique(dropna=False)
+        if nunique > 1:
+            log_debug(f"adding {col} since there are {nunique} unique values")
+            for unique_hp in dataframe[col].unique():
+                columns_with_non_unique_values.append(f"{col}={unique_hp}")
+            necesarry_rows += (
+                nunique  # each unique parameter should be an individual plot
+            )
+
+    rows_number = max(necesarry_rows, 1)
+    col_names = columns_with_non_unique_values
+    log_debug(f"{rows_number=}")
+    log_debug(f"{col_names=}")
+
+    return rows_number, col_names
+
+
+def find_global_vmin_vmax(dataframe_list, config):
+    vmin: int | float | None = None
+    vmax: int | float | None = None
+    num_cols = len(dataframe_list)
+
+    if num_cols > 1:
+        # all subplots should have same colors -> we need to find the limits
+        vmin = float("inf")
+        vmax = float("-inf")
+
+        for i in range(num_cols):
+            dataframe = dataframe_list[i]
+            cols = config.plot.x_axis[i]
+            idx = config.plot.y_axis[0]
+            key = config.plot.metric
+
+            pivot_table = pd.pivot_table(
+                dataframe, columns=cols, index=idx, values=key, aggfunc="mean"
+            )
+
+            min_value_present_in_current_df = pivot_table.min().min()
+            max_value_present_in_current_df = pivot_table.max().max()
+
+            log_debug(
+                "colorbar_limits:\n"
+                + f"  subfigure number {i+1}, checking for metric {key}: \n"
+                + f"  min value is {min_value_present_in_current_df},\n"
+                + f"  max value is {max_value_present_in_current_df}"
+            )
+            vmin = min(vmin, min_value_present_in_current_df)
+            vmax = max(vmax, max_value_present_in_current_df)
+
+    return vmin, vmax
+
+
+def create_figure(dataframe_list: list[pd.DataFrame], config: AttributeDict):
+    """
+    Takes a list of dataframes. Each dataframe is processed into a column of heatmaps.
+    """
+    num_cols: int = len(dataframe_list)
+
+    # calculate the number of rows for each dataframe
+    n_rows, row_names = get_all_num_rows_and_their_names(dataframe_list, config)
+
+    # Handling of the number of rows in the plot
+    # we could either create a full rectangular grid, or allow each subplot to nest subplots
+    # for nesting we would need to create subfigures instead of subplots i think
+    if config.split_groups is False:
+        n_rows_max = 1
+        row_names = [["default"] for _ in range(num_cols)]
+    else:
+        n_rows_max = max(n_rows)
+
+    log_debug(f"{n_rows=} and {num_cols=}")
+
+    # TODO, figsize was just hardcoded for (1, 2) grid and left to default for (1, 1) grid
+    #       probably not worth the hazzle to create something dynamic (atleast not now)
+    # EDIT: it was slightly adapted to allow num rows without being completely unreadable
+    # margin = (num_subfigures - 1) * 0.3
+    # figsize=(5*n_cols + margin, 2.5)
+    scale = config.plotstyle.scale
+    if num_cols == 1 and n_rows_max > 1:
+        figsize = (2**3 * scale, 2 * 3 * n_rows_max * scale)
+    elif num_cols == 2:
+        # TODO: after removing cbar from left subifgure, it is squished
+        #       there is an argument to share the legend, we should use that
+        figsize = (12 * scale, 5.4 * n_rows_max * scale)
+    elif num_cols > 2:
+        figsize = (12 * (num_cols / 2) * scale, 5.4 * n_rows_max * scale)
+    else:
+        figsize = None
+
+    # TODO: use seaborn FacetGrid
+    fig, axs = plt.subplots(n_rows_max, num_cols, figsize=figsize)
+    if n_rows_max == 1:
+        axs = [axs]
+    if num_cols == 1:
+        axs = [[ax] for ax in axs]  # adapt for special case so we have unified types
+
+    # Adjust left and right margins as needed
+    # fig.subplots_adjust(left=0.1, right=0.9, top=0.97, hspace=0.38, bottom=0.05,wspace=0.3)
+
+    # None -> plt will chose vmin and vmax
+    vmin, vmax = find_global_vmin_vmax(dataframe_list, config)
+
+    for i in range(num_cols):
+        num_nested_subfigures: int = n_rows[i]
+
+        if not config.split_groups:
+            create_one_grid_element(
+                dataframe_list,
+                config,
+                axs,
+                i,
+                j=0,
+                max_i=num_cols,
+                max_j=0,
+                vmin=vmin,
+                vmax=vmax,
+                n_rows=n_rows,
+                row_names=row_names,
+            )
+        else:
+            for j in range(num_nested_subfigures):
+                create_one_grid_element(
+                    dataframe_list,
+                    config,
+                    axs,
+                    i,
+                    j,
+                    max_i=num_cols,
+                    max_j=num_nested_subfigures,
+                    vmin=vmin,
+                    vmax=vmax,
+                    n_rows=n_rows,
+                    row_names=row_names,
+                )
+
+    if config.plotstyle.tight_layout:
+        fig.tight_layout()
+    # SUPTITLE (the super title on top of the whole figure in the middle)
+    # # TODO super title might be squished when used together with tight layout (removing for now)
+    # if n_rows_max > 1 or num_cols > 1:
+    #     # set experiment name as title when multiple matrices in image
+    #     if config.experiment_name:
+    #         fig.suptitle(config.experiment_name)
+    return fig, axs
+
+
+def create_one_grid_element(
+    dataframe_list: list[pd.DataFrame],
+    config: AttributeDict,
+    axs,
+    i: int,
+    j: int,
+    max_i: int,
+    max_j: int,
+    vmin,
+    vmax,
+    n_rows,
+    row_names,
+):
+    """does one 'axs' element as it is called in plt"""
+    num_nested_subfigures: int = n_rows[i]
+    name_for_additional_subplots: list[str] = row_names[i]
+    num_subfigures = max_i  # from left to right
+    num_nested_subfigures = max_j  # from top to bottom
+    dataframe = dataframe_list[i]
+
+    cols = config.plot.x_axis[i]
+    idx = config.plot.y_axis[0]
+    # only include colorbar once
+    include_cbar: bool = i == num_subfigures - 1
+
+    model_param = name_for_additional_subplots[j]
+    if model_param == "default":
+        current_dataframe = dataframe  # we do not need to do further grouping
+    else:
+        param_name, param_value = model_param.split("=", maxsplit=1)
+        if pd.api.types.is_numeric_dtype(dataframe[param_name]):
+            param_value = float(param_value)
+        try:
+            current_dataframe = dataframe.groupby([param_name]).get_group(
+                (param_value,)
+            )
+        except KeyError:
+            log_warn(
+                f"WARNING: was not able to groupby '{param_name}',"
+                + "maybe the data was created with different versions of fob; skipping this row"
+            )
+            log_debug(
+                f"{param_name=}{param_value=}{dataframe.columns=}{dataframe[param_name]=}"
+            )
+            return False
+    current_plot = create_matrix_plot(
+        current_dataframe,
+        config,
+        cols,
+        idx,
+        ax=axs[j][i],
+        cbar=include_cbar,
+        vmin=vmin,
+        vmax=vmax,
+    )
+
+    # LABELS
+    # Pretty name for label "learning_rate" => "Learning Rate"
+    # remove x_label of all but last row, remove y_label for all but first column
+    if i > 0:
+        current_plot.set_ylabel("", labelpad=8)
+    else:
+        current_plot.set_ylabel(pretty_name(current_plot.get_ylabel()))
+    if j < num_nested_subfigures - 1:
+        current_plot.set_xlabel("", labelpad=8)
+    else:
+        current_plot.set_xlabel(pretty_name(current_plot.get_xlabel()))
+
+    # reading optimizer and task name after grouping
+    df_entry = current_dataframe.iloc[0]  # just get an arbitrary trial
+    opti_name = df_entry["optimizer.name"]
+    task_name = df_entry["task.name"]
+
+    # TITLE
+    # title (heading) of the heatmap: <optimname> on <taskname> (+ additional info)
+    title = f"{pretty_name(opti_name)} on {pretty_name(task_name)}"
+    if max_i > 1 or max_j > 1:
+        title += "" if model_param == "default" else f"\n{model_param}"
+    current_plot.set_title(title)
+
+
+def extract_dataframes(
+    workload_paths: List[Path], config: AttributeDict, depth: int = 1
+) -> list[pd.DataFrame]:
+    df_list: list[pd.DataFrame] = []
+    num_dataframes: int = len(workload_paths)
+
+    for i in range(num_dataframes):
+        available_trials = get_available_trials(workload_paths[i], config, depth)
+        dataframe = dataframe_from_trials(available_trials, config)
+        df_list.append(dataframe)
+
+    return df_list
+
+
+def get_output_file_path(
+    dataframe_list: list[pd.DataFrame], config: AttributeDict, suffix: str = ""
+) -> Path:
+    task_names = [df.iloc[0]["task.name"] for df in dataframe_list]
+    optim_names = [df.iloc[0]["optimizer.name"] for df in dataframe_list]
+    task_name = "_".join(sorted(set(task_names)))
+    optim_name = "_".join(sorted(set(optim_names)))
+
+    here = Path(__file__).parent.resolve()
+
+    output_dir = Path(config.output_dir) if config.output_dir else here
+    experiment_name = (
+        Path(config.experiment_name)
+        if config.experiment_name
+        else f"{optim_name}-{task_name}"
+    )
+    output_file_path = output_dir / experiment_name
+
+    return Path(f"{output_file_path}-{suffix}" if suffix else output_file_path)
+
+
+def set_plotstyle(config: AttributeDict):
+    plt.rcParams["text.usetex"] = config.plotstyle.text.usetex
+    plt.rcParams["font.family"] = config.plotstyle.font.family
+    plt.rcParams["font.size"] = config.plotstyle.font.size
+
+
+def pretty_name(
+    name: str, pretty_names: dict | str = {}  # type: ignore pylint: disable=dangerous-default-value
+) -> str:
+    """
+    Tries to use a mapping for the name, else will do some general replacement.
+    mapping can be a directory or a filename of a yaml file with 'names' key
+    """
+
+    # reading from yaml and caching the dictionary
+    label_file: Path = evaluation_path() / "labels.yaml"
+    if isinstance(pretty_names, str):
+        label_file = Path(pretty_names)
+
+    if pretty_names == {} or isinstance(pretty_names, str):
+        yaml_parser = YAMLParser()
+        yaml_content = yaml_parser.parse_yaml(label_file)
+        pretty_names: dict[str, str] = yaml_content["names"]
+
+    # applying pretty names
+    name_without_yaml_prefix = name.split(".")[-1]
+    if name in pretty_names.keys():
+        name = pretty_names[name]
+    elif name_without_yaml_prefix in pretty_names.keys():
+        name = pretty_names[name_without_yaml_prefix]
+    else:
+        name = name.replace("_", " ").title()
+    return name
+
+
+def save_csv(dfs: list[pd.DataFrame], output_filename: Path):
+    for i, df in enumerate(dfs):
+        csv_output_filename = f"{output_filename.resolve()}-{i}.csv"
+        log_info(f"saving raw data as {csv_output_filename}")
+        df.to_csv(path_or_buf=csv_output_filename, index=False)
+
+
+def save_plot(fig: Figure, output_file_path: Path, file_type: str, dpi: int):
+    plot_output_filename = f"{output_file_path.resolve()}.{file_type}"
+    log_info(f"saving figure as <{plot_output_filename}>")
+    fig.savefig(plot_output_filename, dpi=dpi)
+
+
+def save_files(
+    fig, dfs: list[pd.DataFrame], output_file_path: Path, config: AttributeDict
+):
+    output_file_path.parent.mkdir(parents=True, exist_ok=True)
+
+    for file_type in config.output_types:
+        if file_type == "csv":
+            save_csv(dfs, output_file_path)
+        elif file_type == "png" or file_type == "pdf":
+            save_plot(fig, output_file_path, file_type, config.plotstyle.dpi)
+
+
+def clean_config(config: AttributeDict) -> AttributeDict:
+    """some processing that allows the user to be lazy, shortcut for the namespace,
+    hidden values are found and config.all_values"""
+    if "evaluation" in config.keys():
+        evaluation_config: AttributeDict = config.evaluation
+        evaluation_config["all_values"] = config
+        config = evaluation_config
+    else:
+        log_warn("there is no 'evaluation' in the yaml provided!")
+    if "data_dirs" in config.keys():
+        value_is_none = not config.data_dirs
+        value_has_wrong_type = not isinstance(config.data_dirs, (PathLike, str, list))
+        if value_is_none or value_has_wrong_type:
+            raise ValueError(
+                f"Error: 'evaluation.data_dirs' was not provided correctly! "
+                f"check for typos in the yaml provided! value given: {config.data_dirs}"
+            )
+
+    # allow the user to write a single string instead of a list of strings
+    if not isinstance(config.output_types, list):
+        config["output_types"] = [config.output_types]
+        log_info("fixing value for key <config.output_types> to be a list[str]")
+
+    if not isinstance(config.data_dirs, list):
+        config["data_dirs"] = [Path(config.data_dirs)]
+        log_info("fixing value for key <config.data_dirs> to be a list[Path]")
+
+    # x_axis
+    if not isinstance(config.plot.x_axis, list):
+        config["plot"]["x_axis"] = [config.plot.x_axis]
+        log_info("fixing value for key <config.plot.x_axis> to be a list[str]")
+    if len(config.plot.x_axis) < len(config.data_dirs):
+        # use same x axis for all if only one given
+        missing_elements = len(config.data_dirs) - len(config.plot.x_axis)
+        config["plot"]["x_axis"] += repeat(config.plot.x_axis[0], missing_elements)
+
+    # y_axis
+    if not isinstance(config.plot.y_axis, list):
+        config["plot"]["y_axis"] = [config.plot.y_axis]
+        log_info("fixing value for key <config.plot.y_axis> to be a list[str]")
+    if len(config.plot.y_axis) < len(config.data_dirs):
+        # use same x axis for all if only one given
+        missing_elements = len(config.data_dirs) - len(config.plot.y_axis)
+        config["plot"]["y_axis"] += repeat(config.plot.y_axis[0], missing_elements)
+
+    return config
+
+
+def main(config: AttributeDict):
+    config = clean_config(config)  # sets config to config.evaluation, cleans some data
+    workloads: List[Path] = [Path(name) for name in config.data_dirs]
+    log_debug(f"{workloads}=")
+
+    set_plotstyle(config)
+
+    dfs = extract_dataframes(workloads, depth=config.depth, config=config)
+    fig, _ = create_figure(dfs, config)
+
+    output_file_path = get_output_file_path(dfs, config)
+
+    save_files(fig, dfs, output_file_path, config)
--- a/environments/community/pytorch_optimizer_coding/FOB/pytorch_fob/optimizers/README.md
+++ b/environments/community/pytorch_optimizer_coding/FOB/pytorch_fob/optimizers/README.md
@ -0,0 +1,36 @@
+# Optimizers
+We currently have the following optimizers:
+
+| Name | Optimizer | LR Scheduler |
+| ---- | --------- | ------------ |
+| adamw_baseline | [AdamW](https://arxiv.org/abs/1711.05101) | [Cosine Annealing](https://arxiv.org/abs/1608.03983) with linear warmup |
+| adamcpr | [AdamCPR](https://arxiv.org/abs/2311.09058v2) | [Cosine Annealing](https://arxiv.org/abs/1608.03983) with linear warmup |
+| sgd_baseline | Stochastic Gradient Descent | [Cosine Annealing](https://arxiv.org/abs/1608.03983) |
+| sgd_stepwise | [Stochastic Gradient Descent](https://pytorch.org/docs/stable/generated/torch.optim.SGD.html#torch.optim.SGD) | [StepLR](https://pytorch.org/docs/stable/generated/torch.optim.lr_scheduler.StepLR.html) |
+| adafactor | [Adafactor](https://arxiv.org/abs/1804.04235) | Constant |
+
+
+## Creating your own optimizer
+To add your own optimizer, you need to create a subfolder in the `optimizers` directory. The name of that folder will be the name used to invoke the optimizer. Within the folder you need to provide two files: `optimizer.py` and `default.yaml`. There is a `template` optimizer with useful comments, which can be used as a starting point.
+
+### optimizer.py
+Here you need to implement a function `configure_optimizers` with the following signature:
+```python
+configure_optimizers(model: GroupedModel, config: OptimizerConfig) -> OptimizerLRScheduler
+```
+- The return type is the same as described [here](https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.core.LightningModule.html#lightning.pytorch.core.LightningModule.configure_optimizers).
+- The `GroupedModel` is a wrapper around a `torch.nn.Module`. It additionally provides a method `grouped_parameters`, which returns the model parameters grouped by their `weight_decay` and `learning_rate` settings. This is useful for some tasks that want to use e.g. lower learning rates for different parts of the model or to avoid applying weight decay to your norm layers. The underlying `torch.nn.Module` can be accessed with `model.model`.
+- The `OptimizerConfig` has the `lr_interval, max_steps, max_epochs` attributes. It also gains all attributes provided in the `optimizer` section of the `experiment.yaml`.
+
+### default.yaml
+Here you can provide default values for all the hyperparameters your optimizer needs. These values will be added to the `OptimizerConfig` passed to the `configure_optimizers`. So if you have the following `default.yaml`:
+```yaml
+optimizer:
+  name: my_awesome_optimizer
+  output_dir_name: my_awesome_optimizer
+  learning_rate: 1.e-3
+  important:
+    extra:
+      parameter: 42
+```
+you could use `config.important.extra.parameter` in the `configure_optimizers` function.
--- a/environments/community/pytorch_optimizer_coding/FOB/pytorch_fob/optimizers/init.py
+++ b/environments/community/pytorch_optimizer_coding/FOB/pytorch_fob/optimizers/init.py
@ -0,0 +1,4 @@
+from .lr_schedulers import lr_schedulers_path
+from .optimizers import Optimizer, optimizer_names, optimizer_path
+
+__all__ = ["Optimizer", "optimizer_names", "optimizer_path", "lr_schedulers_path"]
--- a/environments/community/pytorch_optimizer_coding/FOB/pytorch_fob/optimizers/optimizers.py
+++ b/environments/community/pytorch_optimizer_coding/FOB/pytorch_fob/optimizers/optimizers.py
@ -0,0 +1,32 @@
+import importlib
+from pathlib import Path
+
+from lightning.pytorch.utilities.types import OptimizerLRScheduler
+from pytorch_fob.engine.configs import OptimizerConfig
+from pytorch_fob.engine.parameter_groups import GroupedModel
+
+
+def import_optimizer(name: str):
+    return importlib.import_module(f"pytorch_fob.optimizers.{name}.optimizer")
+
+
+def optimizer_path(name: str) -> Path:
+    return Path(__file__).resolve().parent / name
+
+
+def optimizer_names() -> list[str]:
+    EXCLUDE = ["__pycache__", "lr_schedulers"]
+    return [
+        d.name
+        for d in Path(__file__).parent.iterdir()
+        if d.is_dir() and d.name not in EXCLUDE
+    ]
+
+
+class Optimizer:
+    def __init__(self, config: OptimizerConfig) -> None:
+        self.config = config
+
+    def configure_optimizers(self, model: GroupedModel) -> OptimizerLRScheduler:
+        optimizer_module = import_optimizer(self.config.name)
+        return optimizer_module.configure_optimizers(model, self.config)
--- a/environments/community/pytorch_optimizer_coding/FOB/pytorch_fob/run_experiment.py
+++ b/environments/community/pytorch_optimizer_coding/FOB/pytorch_fob/run_experiment.py
@ -0,0 +1,31 @@
+import argparse
+from pathlib import Path
+
+from pytorch_fob.engine.engine import Engine
+from pytorch_fob.engine.utils import set_loglevel
+
+
+def main(args: argparse.Namespace, extra_args: list[str]):
+    engine = Engine()
+    engine.parse_experiment_from_file(args.experiment_file, extra_args=extra_args)
+    engine.run_experiment()
+    engine.plot()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="runs an experiment specified by a file"
+    )
+    parser.add_argument(
+        "experiment_file", type=Path, help="The yaml file specifying the experiment."
+    )
+    parser.add_argument(
+        "--log_level",
+        type=str,
+        choices=["debug", "info", "warn", "silent"],
+        default="info",
+        help="Set the log level",
+    )
+    args, extra_args = parser.parse_known_args()
+    set_loglevel(args.log_level)
+    main(args, extra_args)
--- a/environments/community/pytorch_optimizer_coding/FOB/pytorch_fob/tasks/README.md
+++ b/environments/community/pytorch_optimizer_coding/FOB/pytorch_fob/tasks/README.md
@ -0,0 +1,98 @@
+# Tasks
+We provide a set of tasks to train and evaluate models. A task consists of a model and a dataset.
+
+Each task has their own `README.md` file with more details.
+
+We currently have the following tasks:
+
+### Ready to use
+
+| Name | Dataset | Model | Task | Target Metric | Baseline Score | Baseline Runtime | Hardware |
+| ------- | ---- | ----- | ---- | ------------- | -------------- | ---------------- | -------- |
+| [mnist](mnist) | MNIST | MLP | Image Classification | Top-1 Accuracy | 0.97 | 1 min | 1 gpu |
+| [classification](classification) | [Imagenet-64x64](https://patrykchrabaszcz.github.io/Imagenet32/) | [Wide ResNet](https://arxiv.org/pdf/1605.07146.pdf) | Image Classification | Top-1 Accuracy | 0.69 | 4h | 4 gpu |
+| [classification_small](classification_small) | [CIFAR100](https://www.cs.toronto.edu/~kriz/cifar.html) | [Resnet18](https://arxiv.org/pdf/1512.03385.pdf) | Image Classification | Top-1 Accuracy | 0.77 | 10 min | 1 gpu |
+| [segmentation](segmentation) | [MIT Scene Parse](http://sceneparsing.csail.mit.edu/) | [SegFormer](https://arxiv.org/abs/2105.15203) | Semantic Segmentation | Intersection over Union (IoU) | 0.35 | 5h | 4 gpu |
+| [graph](graph) | [ogbg-molhiv](https://ogb.stanford.edu/docs/graphprop/#ogbg-mol) | [Graph Isomorphism Network (GIN)](https://arxiv.org/pdf/1810.00826.pdf) | Graph Property Prediction | ROC-AUC | 0.77 | 20min | 1 gpu |
+| [graph_tiny](graph_tiny) | [Cora](https://paperswithcode.com/sota/node-classification-on-cora) | [GCN](https://arxiv.org/abs/1609.02907) | Node Classification | Accuracy | 0.82 | 1min | 1 gpu |
+| [tabular](tabular) | [California Housing](https://www.dcc.fc.up.pt/~ltorgo/Regression/cal_housing.html) | [FT Transformer](https://arxiv.org/pdf/2106.11959.pdf) | Tabular Regression | Test RMSE | 0.40 | 2 min | 1 gpu |
+| [translation](translation) | [WMT17(en-de)](https://machinetranslate.org/wmt17) | [T5 small](https://jmlr.org/papers/volume21/20-074/20-074.pdf) | Machine Translation | BLEU (sacrebleu) | 26.3 | 6h | 4 gpus |
+
+
+### Under Development
+
+| Name | Dataset | Model | Task | Target Metric | Baseline Score | Baseline Runtime | Hardware |
+| ------- | ----- | ----- | ---- | ------------- | -------------- | ---------------- | -------- |
+| [detection](pytorch_fob/tasks/detection) | [COCO](https://cocodataset.org) | [Faster R-CNN](https://arxiv.org/abs/1506.01497) with [MobileNet v3](https://arxiv.org/abs/1905.02244) backbone | Object detection | Average Precision (IoU) | ? | ~4h | 4 gpus |
+| rna_folding | bpRNA | RNAformer | RNA secondary structure prediction | F1 | ? | ~4h | 4 gpus |
+
+## Adding your own task
+To add your own task, you need to create a subfolder in the `tasks` directory. The name of that folder will be the name used to invoke the task. Within the folder you need to provide the following files: `task.py`, `model.py`, `data.py`, `default.yaml` and `README.md`.
+
+There is a [template](template) task with useful comments, which can be used as a starting point.
+
+### data.py
+Here you provide the code for interacting with your dataset. As we use [lightning](https://lightning.ai/docs/pytorch/stable/), you will need to crate a [LightningDataModule Datamodule](https://lightning.ai/docs/pytorch/stable/data/datamodule.html).
+
+The class you create must inherit from `TaskDataModule` which in turn inherits from `LightningDataModule`. The base `TaskDataModule` already defines some default methods for the dataloader methods, so if you do not need any custom dataloaders you can probably leave them.
+
+The two methods you need to implement are [prepare_data](https://lightning.ai/docs/pytorch/stable/data/datamodule.html#prepare-data) and [setup](https://lightning.ai/docs/pytorch/stable/data/datamodule.html#setup). In `prepare_data` you need to put you downloading and data preprocessing logic. In `setup` you should load and split your dataset and set the `self.data_train, self.data_val, self.data_test` attributes in the appropriate stages.
+
+### model.py
+Here you provide the code for the model. As we use [lightning](https://lightning.ai/docs/pytorch/stable/), you will need to create a [LightningModule](https://lightning.ai/docs/pytorch/stable/common/lightning_module.html).
+
+The class you create must inherit from `TaskModel` which in turn inherits from `LightningModule`. The `__init__` method should have the following signature:
+```python
+def __init__(self, optimizer: Optimizer, config: TaskConfig):
+```
+In the `__init__` method you need to create your model, and pass it to the `super().__init__` call. There the model is wrapped into a `GroupedModel` which splits the model parameters into weight_decay and non-weight_decay groups. If you want to specify your own parameter groups (e.g. for different learning rates) you need to wrap your model in a `GroupedModel` yourself, before passing it to the `super().__init__` call.
+
+The other methods you neet to implement are [training_step](https://lightning.ai/docs/pytorch/stable/common/lightning_module.html#training-step), [validation_step](https://lightning.ai/docs/pytorch/stable/common/lightning_module.html#validation-step) and [test_step](https://lightning.ai/docs/pytorch/stable/common/lightning_module.html#test-step). Here you need to implement the training and evaluation logic.
+
+### task.py
+Here you only need to provide two simple functions:
+```python
+def get_datamodule(config: TaskConfig) -> TaskDataModule
+```
+which returns an instance of your `DataModule` class, and
+```python
+def get_task(optimizer: Optimizer, config: TaskConfig) -> tuple[TaskModel, TaskDataModule]
+```
+which returns an instance of your `TaskModel` class and an instance of your `DataModule` class.
+
+### default.yaml
+Here you can provide default values for all the hyperparameters your task needs. All keys under the `task` section will be added to the `TaskConfig`.
+
+There are some required parameters you need to specify:
+```yaml
+task:
+  name: my_awesome_task    # same as directory name
+  batch_size: 123
+  max_epochs: 42
+  max_steps: null          # should be left null, use max_epochs instead
+  target_metric: val_acc   # choose a metric that is being logged in your LightningModule
+  target_metric_mode: min  # min or max
+engine:
+  devices: 1               # number of devices to use
+  sbatch_args:
+    time: 00:05:00         # estimated time to train
+evaluation:
+  plot:
+    metric: test_acc
+    test_metric_mode: min
+    format: "2.1"
+    limits: [0, 100]       # colorbar limits
+optimizer:
+  name: adamw_baseline     # set the default optimizer
+```
+
+You can optionally set and override optimizer defaults, e.g.:
+```yaml
+optimizer:
+  name: adamw_baseline
+  learning_rate: 0.1
+```
+would use a default learning rate of 0.1 instead of the one specified in the `default.yaml` of the optimizer. Note that this applies to all optimizers. So if the user chooses a different optimizer, they will still get the default learning rate specified here.
+
+### README.md
+Here you should provide a short description of your task, and a baseline performance. Follow the template as seen in the existing tasks.
--- a/environments/community/pytorch_optimizer_coding/FOB/pytorch_fob/tasks/init.py
+++ b/environments/community/pytorch_optimizer_coding/FOB/pytorch_fob/tasks/init.py
@ -0,0 +1 @@
+# from .tasks import TaskDataModule, TaskModel, import_task, task_names, task_path  # Unused imports
--- a/environments/community/pytorch_optimizer_coding/FOB/pytorch_fob/tasks/tasks.py
+++ b/environments/community/pytorch_optimizer_coding/FOB/pytorch_fob/tasks/tasks.py
@ -0,0 +1,126 @@
+import importlib
+import time
+from pathlib import Path
+from typing import Any, Callable, Optional
+
+import torch
+from lightning import LightningDataModule, LightningModule
+from lightning.pytorch.core.optimizer import LightningOptimizer
+from lightning.pytorch.utilities.types import OptimizerLRScheduler
+from pytorch_fob.engine.configs import TaskConfig
+from pytorch_fob.engine.parameter_groups import GroupedModel
+from pytorch_fob.optimizers import Optimizer
+from torch import nn
+from torch.utils.data import DataLoader
+
+
+def import_task(name: str):
+    return importlib.import_module(f"pytorch_fob.tasks.{name}.task")
+
+
+def task_path(name: str) -> Path:
+    return Path(__file__).resolve().parent / name
+
+
+def task_names() -> list[str]:
+    EXCLUDE = ["__pycache__"]
+    return [
+        d.name
+        for d in Path(__file__).parent.iterdir()
+        if d.is_dir() and d.name not in EXCLUDE
+    ]
+
+
+class TaskModel(LightningModule):
+    def __init__(
+        self,
+        model: nn.Module | GroupedModel,
+        optimizer: Optimizer,
+        config: TaskConfig,
+        **kwargs: Any,
+    ) -> None:
+        super().__init__(**kwargs)
+        self.config = config
+        self.optimizer = optimizer
+        self.model = model if isinstance(model, GroupedModel) else GroupedModel(model)
+        self.optimizer_times_ms = []
+
+    def forward(self, *args, **kwargs):
+        return self.model.forward(*args, **kwargs)
+
+    def configure_optimizers(self) -> OptimizerLRScheduler:
+        return self.optimizer.configure_optimizers(self.model)
+
+    def optimizer_step(
+        self,
+        epoch: int,
+        batch_idx: int,
+        optimizer: torch.optim.Optimizer | LightningOptimizer,
+        optimizer_closure: Optional[Callable[[], Any]] = None,
+    ) -> None:
+        start = time.time_ns()
+        optimizer.step(closure=optimizer_closure)  # type: ignore
+        end = time.time_ns()
+        duration_ms = (end - start) / 1e6
+        self.optimizer_times_ms.append(duration_ms)
+
+
+class TaskDataModule(LightningDataModule):
+    def __init__(self, config: TaskConfig) -> None:
+        super().__init__()
+        self.config = config
+        self.workers: int = min(config.workers, 16)
+        self.data_dir: Path = config.data_dir / config.name
+        self.batch_size: int = config.batch_size
+        self.data_train: Any
+        self.data_val: Any
+        self.data_test: Any
+        self.data_predict: Any
+        self.collate_fn = None
+
+    def check_dataset(self, data):
+        """Make sure that all tasks have correctly configured their data sets"""
+        if not data:
+            raise NotImplementedError("Each task has its own data set")
+        if not self.batch_size or self.batch_size < 1:
+            raise NotImplementedError(
+                "Each task configures its own batch_size. \
+                                      Please set it explicitely, to avoid confusion."
+            )
+
+    def train_dataloader(self):
+        self.check_dataset(self.data_train)
+        return DataLoader(
+            self.data_train,
+            shuffle=True,
+            batch_size=self.batch_size,
+            num_workers=self.workers,
+            collate_fn=self.collate_fn,
+        )
+
+    def val_dataloader(self):
+        self.check_dataset(self.data_val)
+        return DataLoader(
+            self.data_val,
+            batch_size=self.batch_size,
+            num_workers=self.workers,
+            collate_fn=self.collate_fn,
+        )
+
+    def test_dataloader(self):
+        self.check_dataset(self.data_test)
+        return DataLoader(
+            self.data_test,
+            batch_size=self.batch_size,
+            num_workers=self.workers,
+            collate_fn=self.collate_fn,
+        )
+
+    def predict_dataloader(self):
+        self.check_dataset(self.data_predict)
+        return DataLoader(
+            self.data_predict,
+            batch_size=self.batch_size,
+            num_workers=self.workers,
+            collate_fn=self.collate_fn,
+        )
--- a/environments/community/pytorch_optimizer_coding/FOB/requirements.txt
+++ b/environments/community/pytorch_optimizer_coding/FOB/requirements.txt
@ -0,0 +1,32 @@
+lightning~=2.2.2
+torch~=2.1.1
+torchvision~=0.16.1
+torchaudio~=2.1.1
+torchtext~=0.16.1
+tensorboard~=2.15.1
+datasets~=2.16.1
+pycocotools~=2.0.7
+tqdm~=4.66.1
+wget~=3.2
+deepspeed~=0.12.6
+rtdl_revisiting_models~=0.0.2
+scikit-learn~=1.5.0
+transformers~=4.38.0
+tokenizers~=0.15.0
+sentencepiece~=0.2.0
+sacrebleu~=2.4.1
+evaluate~=0.4.1
+seaborn~=0.13.1
+pandas~=2.2.0
+torch_geometric~=2.4.0
+spacy~=3.7.2
+ogb~=1.3.6
+timm~=0.9.12
+tensorflow~=2.15.0
+tensorflow-datasets~=4.9.4
+opencv-python~=4.9.0.80
+pytorch-cpr~=0.2.0
+mmsegmentation~=1.2.2
+mmcv~=2.1.0
+lion-pytorch~=0.2.2
+git+https://github.com/LiyuanLucasLiu/RAdam
--- a/environments/community/pytorch_optimizer_coding/FOB/run_optimizer_benchmark.py
+++ b/environments/community/pytorch_optimizer_coding/FOB/run_optimizer_benchmark.py
@ -0,0 +1,20 @@
+from optimizer_benchmark_env import OptimizerBenchmarkEnv
+
+optimizer_code = """
+from lightning.pytorch.utilities.types import OptimizerLRScheduler
+from torch.optim import SGD
+from pytorch_fob.engine.parameter_groups import GroupedModel
+from pytorch_fob.engine.configs import OptimizerConfig
+
+def configure_optimizers(model: GroupedModel, config: OptimizerConfig) -> OptimizerLRScheduler:
+    lr = config.learning_rate
+    optimizer = SGD(model.grouped_parameters(lr=lr), lr=lr)
+    return {"optimizer": optimizer}
+"""
+
+env = OptimizerBenchmarkEnv()
+env.submit_optimizer(optimizer_code, "my_sgd_optimizer")
+env.generate_experiment_yaml()
+env.run_benchmark()
+reward = env.get_reward()
+print("Final reward:", reward)
--- a/environments/community/pytorch_optimizer_coding/FOB/setup.py
+++ b/environments/community/pytorch_optimizer_coding/FOB/setup.py
@ -0,0 +1,15 @@
+from setuptools import setup
+
+with open("requirements.txt", "r", encoding="utf8") as f:
+    requirements = [line.strip() for line in f.readlines()]
+
+setup(
+    name="pytorch-fob",
+    version="0.1.0",
+    description="Fast Optimizer Benchmark",
+    url="https://github.com/automl/fob",
+    author="Simon Blauth, Tobias Bürger, Zacharias Häringer",
+    license="MIT",
+    packages=["pytorch_fob"],
+    install_requires=requirements,
+)
				`@ -0,0 +1 @@`
				`# from pytorch_fob.engine import Engine # Unused import`
				`@ -0,0 +1 @@`
				`# from .tasks import TaskDataModule, TaskModel, import_task, task_names, task_path # Unused imports`