mirror of
https://github.com/NousResearch/atropos.git
synced 2026-04-19 12:57:58 +00:00
Updated test scripts
This commit is contained in:
parent
d6f9d58606
commit
85f462df5e
2 changed files with 7 additions and 6 deletions
|
|
@ -28,7 +28,7 @@ from atroposlib.envs.base import (
|
|||
ScoredDataGroup,
|
||||
)
|
||||
from atroposlib.utils.tokenize_for_trainer import tokenize_for_trainer
|
||||
from atroposlib.utils.message_history_utils import truncate_thinking
|
||||
from atroposlib.utils.message_history_utils import truncate_thinking, ensure_trajectory_token_limit
|
||||
from atroposlib.utils.tool_call_parser import parse_tool_call
|
||||
from atroposlib.utils.best_of_n_selection import select_best_index
|
||||
|
||||
|
|
@ -142,7 +142,6 @@ class BlackjackEnv(BaseEnv):
|
|||
env_reward: float,
|
||||
response_text: str,
|
||||
parsed_action: int,
|
||||
episode_seed: int,
|
||||
) -> float:
|
||||
"""
|
||||
Calculates a score for a single agent response based purely on environment reward
|
||||
|
|
@ -419,7 +418,7 @@ class BlackjackEnv(BaseEnv):
|
|||
alt_is_terminal.append(term_i or trunc_i)
|
||||
|
||||
combined_reward_i = self._score_response(
|
||||
raw_env_reward_i, full_agent_response, parsed_action, ep.seed
|
||||
raw_env_reward_i, full_agent_response, parsed_action
|
||||
)
|
||||
alt_combined_rewards.append(combined_reward_i)
|
||||
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ import random
|
|||
from dotenv import load_dotenv
|
||||
|
||||
from atroposlib.envs.base import EvalHandlingEnum, OpenaiConfig
|
||||
from environments.game_environments.gymnasium.blackjack_env import (
|
||||
from environments.game_environments.gymnasium.blackjack_env_thinking import (
|
||||
BlackjackEnv,
|
||||
BlackjackEnvConfig,
|
||||
)
|
||||
|
|
@ -76,9 +76,11 @@ async def main():
|
|||
|
||||
_ = env._get_or_create_episode(seed)
|
||||
|
||||
result_trajectory = await env.collect_trajectory(seed)
|
||||
result_trajectories_tuple = await env.collect_trajectories((seed, 0))
|
||||
result_trajectory = result_trajectories_tuple[0]
|
||||
|
||||
logger.info(
|
||||
f"Trajectory collection complete with {len(result_trajectory)} steps."
|
||||
f"Trajectory collection complete with {len(result_trajectory)} groups/steps."
|
||||
)
|
||||
|
||||
episode_summary = None
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue