diff --git a/atroposlib/envs/reward_fns/cosine_scaled_reward.py b/atroposlib/envs/reward_fns/cosine_scaled_reward.py
index 0b620abe..3a34198b 100644
--- a/atroposlib/envs/reward_fns/cosine_scaled_reward.py
+++ b/atroposlib/envs/reward_fns/cosine_scaled_reward.py
@@ -4,7 +4,17 @@ import logging
 from typing import Any, List, Optional, Union
 
 import scipy
-import torch
+
+try:
+    import torch
+except ImportError as e:
+    logger = logging.getLogger(__name__)
+    logger.warning(
+        "torch not installed, please install atroposlib[rewardfns] to use this reward function"
+    )
+    raise e
+
+
 from transformers import AutoModel, AutoTokenizer
 
 from .registry import registry
diff --git a/atroposlib/tests/test_advantages.py b/atroposlib/tests/test_advantages.py
index 151ebd2b..2643f580 100644
--- a/atroposlib/tests/test_advantages.py
+++ b/atroposlib/tests/test_advantages.py
@@ -1,7 +1,7 @@
 import math
 
+import numpy as np
 import pytest
-import torch
 
 # Adjust the import below if your functions are in a different module.
 from atroposlib.utils.advantages import (
@@ -23,9 +23,9 @@ def test_allclose_to_first_vector():
     """Test that return_vector=True returns a tensor of booleans."""
     values = [1.0, 1.000000001, 1.000000002]
     result = allclose_to_first(values, return_vector=True)
-    assert isinstance(result, torch.Tensor)
+    assert isinstance(result, np.ndarray)
     # All comparisons should be True.
-    assert torch.all(result)
+    assert np.all(result)
 
 
 def test_allclose_to_first_not_close():
@@ -74,15 +74,15 @@ def test_compute_stats_jagged():
 
 def test_compute_discounted_returns():
     """Test compute_discounted_returns with a tensor input."""
-    rewards = torch.tensor([1.0, 1.0, 1.0])
+    rewards = np.array([1.0, 1.0, 1.0])
     gamma = 0.9
     returns = compute_discounted_returns(rewards, gamma)
     # For a 3-element vector:
     # t=2: 1.0
     # t=1: 1.0 + 0.9*1.0 = 1.9
     # t=0: 1.0 + 0.9*1.9 = 2.71
-    expected = torch.tensor([2.71, 1.9, 1.0])
-    assert torch.allclose(returns, expected, rtol=1e-5, atol=1e-8)
+    expected = np.array([2.71, 1.9, 1.0])
+    assert np.allclose(returns, expected, rtol=1e-5, atol=1e-8)
 
 
 def test_compute_discounted_returns_list_input():
@@ -90,8 +90,8 @@ def test_compute_discounted_returns_list_input():
     rewards = [1, 1, 1]
     gamma = 0.0  # With gamma=0, the returns should equal the rewards.
     returns = compute_discounted_returns(rewards, gamma)
-    expected = torch.tensor([1.0, 1.0, 1.0])
-    assert torch.allclose(returns, expected, rtol=1e-5, atol=1e-8)
+    expected = np.array([1.0, 1.0, 1.0])
+    assert np.allclose(returns, expected, rtol=1e-5, atol=1e-8)
 
 
 def test_compute_grpo_process_supervision_advantages_cumsum():
diff --git a/atroposlib/utils/advantages.py b/atroposlib/utils/advantages.py
index dcb31b60..93ec0575 100644
--- a/atroposlib/utils/advantages.py
+++ b/atroposlib/utils/advantages.py
@@ -1,31 +1,32 @@
 from typing import Sequence
 
-import torch
+import numpy as np
 
 from atroposlib.type_definitions import number
 
-TensorLike = torch.Tensor | Sequence[torch.Tensor] | Sequence[Sequence]
+NumpyArrayLike = np.ndarray | Sequence[np.ndarray] | Sequence[Sequence]
 # Type alias for vector of bools
-BoolVector = torch.Tensor
+BoolVector = np.ndarray
 
 
 def allclose_to_first(
-    values: TensorLike,
+    # values: TensorLike,
+    values: NumpyArrayLike,
     rtol: float = 1e-05,
     atol: float = 1e-08,
     equal_nan: bool = False,
     return_vector: bool = False,
 ) -> BoolVector | bool:
     """
-    Check if all tensors in `values` are close to the first tensor `values[0]` using a vectorized approach.
+    Check if all arrays in `values` are close to the first array `values[0]` using a vectorized approach.
 
     If `return_vector` is False (default), returns a single boolean indicating whether
-    every tensor is close to the first tensor. If `return_vector` is True, returns a list
-    of booleans where each element corresponds to whether the respective tensor in
-    `values` is close to the first tensor. The first element is always True.
+    every array is close to the first array. If `return_vector` is True, returns a list
+    of booleans where each element corresponds to whether the respective array in
+    `values` is close to the first array. The first element is always True.
 
     Args:
-        values (torch.Tensor | Sequence[torch.Tensor] | Sequence[Sequence]):
+        values (np.ndarray | Sequence[np.ndarray] | Sequence[Sequence]):
             Nested list of values to compare. Must be rectangular, but not necessarily 2D.
         rtol (float, optional): Relative tolerance. Defaults to 1e-05.
         atol (float, optional): Absolute tolerance. Defaults to 1e-08.
@@ -35,24 +36,22 @@ def allclose_to_first(
 
     Returns:
         bool or BoolVector:
-            - If `return_vector` is False, returns True if all tensors are close to the first tensor;
+            - If `return_vector` is False, returns True if all arrays are close to the first array;
               otherwise, returns False.
-            - If `return_vector` is True, returns a 1D tensor of bools where the first element is True
-              (as the reference tensor is trivially close to itself), and each subsequent element indicates
-              whether the corresponding tensor is close to the first tensor.
+            - If `return_vector` is True, returns a 1D array of bools where the first element is True
+              (as the reference array is trivially close to itself), and each subsequent element indicates
+              whether the corresponding array is close to the first array.
     """
-    if not isinstance(values, torch.Tensor):
-        values = torch.tensor(values)
+    if not isinstance(values, np.ndarray):
+        values = np.array(values)
 
     reference = values[0]
-    is_close = torch.isclose(
-        values, reference, rtol=rtol, atol=atol, equal_nan=equal_nan
-    )
+    is_close = np.isclose(values, reference, rtol=rtol, atol=atol, equal_nan=equal_nan)
 
     # flatten dimensions after first
-    result_vector = torch.all(is_close.view(is_close.size(0), -1), dim=1)
+    result_vector = np.all(is_close.reshape(is_close.shape[0], -1), axis=1)
 
-    return result_vector if return_vector else bool(torch.all(result_vector))
+    return result_vector if return_vector else bool(np.all(result_vector))
 
 
 def compute_stats(data: Sequence[number | Sequence]) -> dict[str, float]:
@@ -104,23 +103,23 @@ def compute_stats(data: Sequence[number | Sequence]) -> dict[str, float]:
     return {"mean": mean, "var": variance}
 
 
-def compute_discounted_returns(rewards: torch.Tensor, gamma: float) -> torch.Tensor:
+def compute_discounted_returns(rewards: np.ndarray, gamma: float) -> np.ndarray:
     """Compute discounted returns from a 1D vector of rewards.
 
-    Given a list or torch tensor of rewards and a discount factor, this function computes
+    Given a list or numpy array of rewards and a discount factor, this function computes
     the discounted return at each timestep. The discounted return at time t is defined as:
       G_t = rewards[t] + gamma * rewards[t+1] + gamma^2 * rewards[t+2] + ...
 
     Args:
-        rewards (list[float] or torch.Tensor): A 1D list or tensor of rewards.
+        rewards (list[float] or np.ndarray): A 1D list or array of rewards.
         gamma (float): The discount factor (should be between 0 and 1).
 
     Returns:
         list[float]: A list containing the discounted returns for each timestep.
     """
-    if not isinstance(rewards, torch.Tensor):
-        rewards = torch.tensor(rewards, dtype=torch.float)
-    discounted_returns = torch.empty_like(rewards)
+    if not isinstance(rewards, np.ndarray):
+        rewards = np.array(rewards, dtype=np.float32)  # Use float32 for numpy default
+    discounted_returns = np.empty_like(rewards)
     running_return = 0.0
 
     for t in reversed(range(len(rewards))):
@@ -132,7 +131,7 @@ def compute_discounted_returns(rewards: torch.Tensor, gamma: float) -> torch.Ten
 
 def compute_grpo_process_supervision_advantages(
     rewards: Sequence[Sequence[number]], gamma: float = None, std_tol: float = 1e-8
-) -> list[torch.Tensor]:
+) -> list[np.ndarray]:
     """
     Given a (possibly jagged) list of list of rewards, compute advantages for GRPO.
 
@@ -144,7 +143,7 @@ def compute_grpo_process_supervision_advantages(
         std_tol (float): The tolerance for the standard deviation.
 
     Returns:
-        A list of tensors of advantages.
+        A list of arrays of advantages.
 
     Raises:
         ValueError: If the standard deviation of the flattened rewards is smaller than the tolerance.
@@ -155,13 +154,11 @@ def compute_grpo_process_supervision_advantages(
     if std < std_tol:
         raise ValueError(f"`std` is smaller than tolerance of {std_tol}.")
 
-    normalized_rewards = [
-        (torch.tensor(trajectory) - mean) / std for trajectory in rewards
-    ]
+    normalized_rewards = [(np.array(trajectory) - mean) / std for trajectory in rewards]
 
     if gamma is None:
         advantages = [
-            trajectory.flip(dims=[0]).cumsum(dim=0).flip(dims=[0])
+            np.flip(np.cumsum(np.flip(trajectory, axis=0), axis=0), axis=0)
             for trajectory in normalized_rewards
         ]
     else:
diff --git a/atroposlib/utils/tokenize_for_trainer.py b/atroposlib/utils/tokenize_for_trainer.py
index 8d9ea3dc..b5d16095 100644
--- a/atroposlib/utils/tokenize_for_trainer.py
+++ b/atroposlib/utils/tokenize_for_trainer.py
@@ -1,4 +1,4 @@
-import torch
+import numpy as np
 from transformers import PreTrainedTokenizer
 
 from atroposlib.type_definitions import Message
@@ -39,7 +39,7 @@ def tokenize_for_trainer(
         # (e.g. current date). e.g. consider a system prompt that depends on the current date and a run that crosses
         # midnight from 3/9 to 3/10 under a tokenizer that tokenizes 3/9 and 3/10 with a different number of tokens.
 
-        masks = torch.ones(len(tokens), dtype=torch.long) * -100
+        masks = np.ones(len(tokens), dtype=np.int64) * -100
 
         for i, msg in enumerate(chat):
             if msg["role"] in UNMASKED_ROLES:
@@ -51,7 +51,7 @@ def tokenize_for_trainer(
                 )
                 start_idx = len(prefix_tokens)
                 end_idx = len(unmasked_tokens)
-                masks[start_idx:end_idx] = torch.tensor(unmasked_tokens[start_idx:])
+                masks[start_idx:end_idx] = np.array(unmasked_tokens[start_idx:])
 
         masks = masks.tolist()
     if finish_reason == "length":
diff --git a/pyproject.toml b/pyproject.toml
index 3cd1f99b..1547aee7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -24,7 +24,6 @@ dependencies = [
     "polars",
     "aiofiles",
     "jsonlines",
-    "torch",
     "pydantic-cli",
     "hf_transfer",
 ]
@@ -41,6 +40,9 @@ atropos-dpo-gen = "atroposlib.cli.dpo:main"
 all = [
     "atroposlib[dev,examples]"
 ]
+rewardfns = [
+    "torch"
+]
 dev = [
     "pytest",
     "pytest-asyncio",
@@ -49,10 +51,11 @@ dev = [
     "flake8",
     "isort",
     "mypy",
-    'rich',
+    "rich",
 ]
 examples = [
-    "gradio"
+    "gradio",
+    "atroposlib[rewardfns]"
 ]
 
 [build-system]