diff --git a/environments/infinimath/infinimath_env.py b/environments/infinimath/infinimath_env.py
index e946e7e7..5e89a56c 100644
--- a/environments/infinimath/infinimath_env.py
+++ b/environments/infinimath/infinimath_env.py
@@ -5,22 +5,21 @@ import random
 import re
 from typing import Any, Dict, List, Optional, Tuple, Union
 
-from trajectoryhandler.envs.base import (
+from atroposlib.envs.base import (
     BaseEnv,
     BaseEnvConfig,
     OpenaiConfig,
     ScoredDataGroup,
 )
-from trajectoryhandler.envs.reward_fns import registry
-from trajectoryhandler.envs.reward_fns.combined_reward import CombinedReward
-from trajectoryhandler.utils.tokenize_for_trainer import tokenize_for_trainer
+from atroposlib.utils.tokenize_for_trainer import tokenize_for_trainer
 
 from .curriculum import MathCurriculum
 
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.DEBUG)
 
-system_prompt = """You are an expert mathematician. You need to solve the given math problem step-by-step, showing your reasoning clearly. You should enclose your thoughts and internal monologue inside <think> </think> tags, and then provide your final answer in a LaTeX format using \\boxed{your answer here}.
+system_prompt = """You are an expert mathematician that can use extremely long chains of thought to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help come to a correct solution prior to answering.
+You should enclose your thoughts and internal monologue inside <think> </think> tags, and then provide your final answer in a LaTeX format using \\boxed{your answer here}.
 
 The problems will be given in a LaTeX format, so be sure to follow the LaTeX syntax when writing your answer (although no $ delimiters are necessary).
 
@@ -55,6 +54,8 @@ class InfiniteMathEnvConfig(BaseEnvConfig):
     max_attempts_per_problem: int = 3
     correct_reward: float = 1.0
     incorrect_reward: float = -1.0
+    think_block_bonus: float = 0.2  # Bonus for a well-formed think block
+    boxed_answer_bonus: float = 0.2  # Bonus for a well-formed boxed answer
 
     # Length penalty parameters
     apply_length_penalty: bool = True
@@ -66,16 +67,6 @@ class InfiniteMathEnvConfig(BaseEnvConfig):
     temperature: float = 0.7
     top_p: float = 0.9
 
-    # Reward functions
-    reward_functions: List[Union[str, Dict[str, Any]]] = ["accuracy", "format", "boxed"]
-    accuracy_reward_weight: float = 1.0  # Weight for the accuracy reward
-    format_reward_weight: float = (
-        0.2  # Weight for the format reward relative to correctness
-    )
-    boxed_reward_weight: float = (
-        0.3  # Weight for the boxed answer reward relative to correctness
-    )
-
 
 class InfiniteMathEnv(BaseEnv):
     """Environment for procedurally generated math problems with curriculum advancement."""
@@ -103,70 +94,6 @@ class InfiniteMathEnv(BaseEnv):
         # Set the system prompt
         self.system_prompt = system_prompt
 
-        # Initialize reward function
-        self.reward_function = self._initialize_reward_function()
-
-    def _initialize_reward_function(self):
-        """Initialize the combined reward function for scoring."""
-        if hasattr(self.config, "reward_functions") and self.config.reward_functions:
-            # Configure parameters for specific reward functions
-            reward_configs = []
-            
-            for reward_func in self.config.reward_functions:
-                if isinstance(reward_func, str):
-                    # String name case - handle known rewards with custom params
-                    if reward_func == "accuracy":
-                        # Configure accuracy reward
-                        accuracy_config = {
-                            "type": "accuracy", 
-                            "weight": self.config.accuracy_reward_weight,
-                            "params": {
-                                "split_on_think_tag": True,  # Only look at what's after </think>
-                                "tolerance": 1e-6,  # Tolerance for number comparisons
-                            },
-                        }
-                        logger.info(f"Adding accuracy reward with config: {accuracy_config}")
-                        reward_configs.append(accuracy_config)
-                    elif reward_func == "format":
-                        # Configure format reward with think tags and explicit weight
-                        format_config = {
-                            "type": "format", 
-                            "weight": self.config.format_reward_weight,
-                            "params": {
-                                "preferred_tags": ["think"],
-                            },
-                        }
-                        logger.info(f"Adding format reward with config: {format_config}")
-                        reward_configs.append(format_config)
-                    elif reward_func == "boxed":
-                        # Configure boxed reward with proper parameters and explicit weight
-                        boxed_config = {
-                            "type": "boxed",
-                            "weight": self.config.boxed_reward_weight,
-                            "params": {
-                                "require_outside_think": True,
-                            },
-                        }
-                        logger.info(f"Adding boxed reward with config: {boxed_config}")
-                        reward_configs.append(boxed_config)
-                    else:
-                        # Pass through other reward functions as is
-                        logger.info(f"Adding generic reward function: {reward_func}")
-                        reward_configs.append(reward_func)
-                else:
-                    # Dict case - pass through as is
-                    logger.info(f"Adding reward config: {reward_func}")
-                    reward_configs.append(reward_func)
-            
-            # Create the reward function(s)
-            if len(reward_configs) == 1:
-                logger.info(f"Creating single reward function: {reward_configs[0]}")
-                return registry.create(reward_configs[0])
-            else:
-                logger.info(f"Creating combined reward function with {len(reward_configs)} rewards")
-                # Add explicit normalization to sum to 1.0
-                return CombinedReward(rewards=reward_configs, normalization="none")
-
     async def setup(self):
         """Initialize the environment and curriculum."""
         logger.info("Setting up InfiniteMathEnv")
@@ -340,21 +267,22 @@ class InfiniteMathEnv(BaseEnv):
                         )
 
         # Log reward function metrics
-        if hasattr(self, "reward_function") and self.wandb:
-            if hasattr(self.reward_function, "set_wandb_logger"):
-                self.reward_function.set_wandb_logger(self.wandb)
+        # REMOVED: Specific reward function config logging as it's not used anymore
+        # if hasattr(self, "reward_function") and self.wandb:
+        #     if hasattr(self.reward_function, "set_wandb_logger"):
+        #         self.reward_function.set_wandb_logger(self.wandb)
             
-            # Log the reward configurations
-            if isinstance(self.config.reward_functions, list) and self.config.reward_functions:
-                # Log the reward configuration
-                wandb_metrics["reward/format_reward_enabled"] = "format" in self.config.reward_functions
-                wandb_metrics["reward/boxed_reward_enabled"] = "boxed" in self.config.reward_functions
+        #     # Log the reward configurations
+        #     if isinstance(self.config.reward_functions, list) and self.config.reward_functions:
+        #         # Log the reward configuration
+        #         wandb_metrics["reward/format_reward_enabled"] = "format" in self.config.reward_functions
+        #         wandb_metrics["reward/boxed_reward_enabled"] = "boxed" in self.config.reward_functions
                 
-                if hasattr(self.config, "format_reward_weight"):
-                    wandb_metrics["reward/format_reward_weight"] = self.config.format_reward_weight
+        #         if hasattr(self.config, "format_reward_weight"):
+        #             wandb_metrics["reward/format_reward_weight"] = self.config.format_reward_weight
                 
-                if hasattr(self.config, "boxed_reward_weight"):
-                    wandb_metrics["reward/boxed_reward_weight"] = self.config.boxed_reward_weight
+        #         if hasattr(self.config, "boxed_reward_weight"):
+        #             wandb_metrics["reward/boxed_reward_weight"] = self.config.boxed_reward_weight
 
         # Add eval metrics
         for item in self.eval_metrics:
@@ -502,7 +430,7 @@ class InfiniteMathEnv(BaseEnv):
         )
 
         # Extract the boxed answer if present
-        boxed_answer = self.extract_boxed_answer(after_think_part)
+        boxed_answer = self._extract_boxed_answer(after_think_part)
         if not boxed_answer:
             # Try to find the answer in the last line
             lines = after_think_part.strip().split("\n")
@@ -510,15 +438,15 @@ class InfiniteMathEnv(BaseEnv):
                 boxed_answer = lines[-1].strip()
 
         # Clean up answers for comparison (remove spaces, convert to lowercase)
-        model_clean = self.clean_for_comparison(
+        model_clean = self._clean_for_comparison(
             boxed_answer if boxed_answer else after_think_part
         )
-        solution_clean = self.clean_for_comparison(solution)
+        solution_clean = self._clean_for_comparison(solution)
 
         # Check if they match
         return model_clean == solution_clean
 
-    def extract_boxed_answer(self, text: str) -> Optional[str]:
+    def _extract_boxed_answer(self, text: str) -> Optional[str]:
         """Extract answer from a LaTeX boxed expression."""
         # Try to find boxed content
         boxed_match = re.search(r"\\boxed{([^}]*)}", text)
@@ -526,7 +454,7 @@ class InfiniteMathEnv(BaseEnv):
             return boxed_match.group(1)
         return None
 
-    def clean_for_comparison(self, text: str) -> str:
+    def _clean_for_comparison(self, text: str) -> str:
         """Clean text for comparison."""
         # Remove LaTeX commands, spaces, commas, and convert to lowercase
         cleaned = re.sub(r"\\[a-zA-Z]+", "", text)
@@ -604,86 +532,54 @@ class InfiniteMathEnv(BaseEnv):
         scored_data["scores"] = []
         scored_data["messages"] = []
         
-        # Format completions for reward function evaluation
-        format_completions = []
-        
         # Process each item in the rollout data
-        for messages, solution, generator_id, level in rollout_group_data:
-            # Extract the model's answer
-            model_answer = messages[-1]["content"]
-            
-            # Add to format completions list for reward function
-            format_completions.append([{"role": "assistant", "content": model_answer}])
-            
-            # Record performance in curriculum based on the answer and solution
-            # This will be updated after the reward functions are applied
-        
-        # Apply all reward functions
-        reward_scores = []
-        unweighted_scores = []
-        if hasattr(self, "reward_function") and self.reward_function:
-            try:
-                # Apply the reward function (which may be a combined reward)
-                reward_scores = self.reward_function(format_completions, solution=solution)
-                logger.info(f"Reward scores: {reward_scores}")
-                
-                # Debug individual rewards if it's a combined reward
-                if hasattr(self.reward_function, "rewards"):
-                    logger.info(f"Combined reward with {len(self.reward_function.rewards)} components")
-                    for i, reward in enumerate(self.reward_function.rewards):
-                        if hasattr(reward, "compute"):
-                            # Get raw unweighted scores
-                            raw_scores = reward.compute(format_completions, solution=solution)
-                            if hasattr(reward, "weight"):
-                                logger.info(f"Reward {i} ({type(reward).__name__}): raw={raw_scores}, weight={reward.weight}")
-                            else:
-                                logger.info(f"Reward {i} ({type(reward).__name__}): raw={raw_scores}")
-                else:
-                    logger.info(f"Using single reward: {type(self.reward_function).__name__}")
-                
-            except Exception as e:
-                logger.error(f"Error applying reward functions: {e}")
-                logger.exception(e)
-                reward_scores = [0.0] * len(format_completions)
-        
-        # Now update curriculum based on accuracy reward results
         for i, (messages, solution, generator_id, level) in enumerate(rollout_group_data):
-            # Extract accuracy from the combined reward if available
-            is_correct = False
-            if reward_scores and hasattr(self.reward_function, "rewards"):
-                for reward in self.reward_function.rewards:
-                    if type(reward).__name__ == "AccuracyReward":
-                        # Get raw scores from accuracy reward
-                        accuracy_scores = reward.compute(format_completions, solution=solution)
-                        is_correct = accuracy_scores[i] > 0
-                        break
+            model_answer = messages[-1]["content"]
+            current_score = 0.0
+
+            # 1. Accuracy Check
+            is_correct = self.check_answer(model_answer, solution)
+            if is_correct:
+                current_score += self.config.correct_reward
+            else:
+                current_score += self.config.incorrect_reward
             
-            # Record answer correctness for tracking
+            # Record answer correctness for tracking and curriculum
             self.percent_correct_buffer.append(1 if is_correct else 0)
             if level is not None:
                 self.level_correct_buffer[level].append(1 if is_correct else 0)
-            
-            # Record performance in curriculum
             self.curriculum.record_performance(generator_id, is_correct)
-        
-        # Combine scores and add to scored data
-        for i, (messages, _, _, _) in enumerate(rollout_group_data):
-            # Use the reward score directly (all weights are applied)
-            combined_score = reward_scores[i] if reward_scores else 0.0
-            
-            logger.info(f"Final score for item {i}: {combined_score}")
+
+            # 2. Thinking Block Check
+            think_match = re.search(r"<think>(.*?)</think>", model_answer, re.DOTALL)
+            if think_match:
+                think_content = think_match.group(1).strip()
+                if think_content: # Check if there's actual content
+                    current_score += self.config.think_block_bonus
+                # else: penalty for empty think block, or neutral
+            # else: penalty for missing think block, or neutral
+
+            # 3. Boxed Answer Check
+            # Extract the part after the thinking block for boxed answer validation
+            after_think_part = model_answer.split("</think>")[-1].strip() if "</think>" in model_answer else model_answer
+            boxed_answer_content = self._extract_boxed_answer(after_think_part)
+            if boxed_answer_content is not None: # Check if \boxed{} is present and has content
+                current_score += self.config.boxed_answer_bonus
+            # else: penalty for missing/malformed boxed answer, or neutral
             
+            logger.info(f"Item {i}: Correct: {is_correct}, Think Bonus: {self.config.think_block_bonus if think_match and think_match.group(1).strip() else 0}, Boxed Bonus: {self.config.boxed_answer_bonus if boxed_answer_content is not None else 0}, Final Score: {current_score}")
+
             # Tokenize for the trainer
             tokens_dict = tokenize_for_trainer(
                 self.tokenizer,
-                messages,
-                None,
+                messages, # These are the full messages including system, user, assistant
+                None, # Not used by this tokenizer function apparently
             )
             
             # Add to scored data
             scored_data["tokens"].append(tokens_dict["tokens"])
             scored_data["masks"].append(tokens_dict["masks"])
-            scored_data["scores"].append(combined_score)
+            scored_data["scores"].append(current_score)
             scored_data["messages"].append(messages)
         
         # Advance difficulty if criteria met
diff --git a/environments/infinimath/infinimath_server.py b/environments/infinimath/infinimath_server.py
index 08cd0efa..de9b9e3c 100644
--- a/environments/infinimath/infinimath_server.py
+++ b/environments/infinimath/infinimath_server.py
@@ -86,10 +86,6 @@ async def main():
         length_threshold_ratio=raw_config.get("infinimath", {}).get("length_threshold_ratio", 0.6),
         temperature=raw_config.get("infinimath", {}).get("temperature", 0.7),
         top_p=raw_config.get("infinimath", {}).get("top_p", 0.9),
-        reward_functions=raw_config.get("infinimath", {}).get("reward_functions", ["accuracy", "format", "boxed"]),
-        accuracy_reward_weight=raw_config.get("infinimath", {}).get("accuracy_reward_weight", 1.0),
-        format_reward_weight=raw_config.get("infinimath", {}).get("format_reward_weight", 0.2),
-        boxed_reward_weight=raw_config.get("infinimath", {}).get("boxed_reward_weight", 0.3),
     )
 
     # Server configuration from config file or defaults