From d557b1b4f9d70770fbbc4a8fd59cadcae251cb0c Mon Sep 17 00:00:00 2001
From: Zafir Stojanovski <zaf.stojano@gmail.com>
Date: Thu, 20 Feb 2025 09:54:26 +0100
Subject: [PATCH] contribution updates

---
 CONTRIBUTING.md        | 19 +++++++++++++++++++
 eval/r1/yaml/test.yaml |  8 ++++++++
 2 files changed, 27 insertions(+)
 create mode 100644 eval/r1/yaml/test.yaml

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 22878294..e586b779 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -66,6 +66,25 @@ When creating new datasets, please follow these guidelines:
    - [Create a Pull Request](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request-from-a-fork)
    - [Request review](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/requesting-a-pull-request-review)
    - Do not include changes to `GALLERY.md` (it's updated automatically)
+   - (Optional, but desirable) If you have an OpenRouter API key, please try running DeepSeek R1 against 5-10 samples from your dataset to make sure there are no unexpected issues with your dataset.
+      1. Update the configuration file `eval/r1/yaml/test.yaml` with your dataset:
+         ```yaml
+         # test.yaml
+         model: deepseek/deepseek-r1
+         category: test
+         datasets:
+         - {YOUR_DATASET_NAME}
+         eval_dir: eval/r1
+         dataset_size: 10
+         dataset_seed: 42
+         developer_role: system
+         ```
+      2. Run the evaluation script:
+         ```bash
+         python eval/r1/eval.py --yaml "eval/r1/yaml/test.yaml"
+         ```
+      3. Review the results in `eval/r1/test/{YOUR_DATASET_NAME}.json` and make sure there are no unexpected issues with the dataset generation, model's instruction following, or the scoring function.
+      4. Include the results in your PR description.
 
 5. **Review Process**:
    - Address reviewer feedback promptly
diff --git a/eval/r1/yaml/test.yaml b/eval/r1/yaml/test.yaml
new file mode 100644
index 00000000..b6956a2a
--- /dev/null
+++ b/eval/r1/yaml/test.yaml
@@ -0,0 +1,8 @@
+model: deepseek/deepseek-r1
+category: test
+datasets:
+  - YOUR_DATASET_NAME
+eval_dir: eval/r1
+dataset_size: 10
+dataset_seed: 42
+developer_role: system