mirror of
https://github.com/NousResearch/atropos.git
synced 2026-04-28 17:29:30 +00:00
merged latest
This commit is contained in:
parent
ba6ba173c1
commit
d768ad68aa
9 changed files with 28 additions and 3232 deletions
|
|
@ -5,7 +5,7 @@ from typing import Dict, List, Optional, Tuple
|
|||
|
||||
import gymnasium as gym
|
||||
|
||||
from atroposlib.envs.base import BaseEnv, BaseEnvConfig, OpenaiConfig, ScoredDataItem
|
||||
from atroposlib.envs.base import APIServerConfig, BaseEnv, BaseEnvConfig, ScoredDataItem
|
||||
from atroposlib.type_definitions import Item, Message
|
||||
from atroposlib.utils.tokenize_for_trainer import tokenize_for_trainer
|
||||
from atroposlib.utils.tool_call_parser import parse_tool_call
|
||||
|
|
@ -35,7 +35,7 @@ class BlackjackEnvNoThinking(BaseEnv):
|
|||
def __init__(
|
||||
self,
|
||||
config: BlackjackEnvNoThinkingConfig,
|
||||
server_configs: List[OpenaiConfig],
|
||||
server_configs: List[APIServerConfig],
|
||||
slurm: bool = True,
|
||||
testing: bool = False,
|
||||
):
|
||||
|
|
@ -74,7 +74,7 @@ class BlackjackEnvNoThinking(BaseEnv):
|
|||
)
|
||||
|
||||
@classmethod
|
||||
def config_init(cls) -> Tuple[BlackjackEnvNoThinkingConfig, List[OpenaiConfig]]:
|
||||
def config_init(cls) -> Tuple[BlackjackEnvNoThinkingConfig, List[APIServerConfig]]:
|
||||
env_config = BlackjackEnvNoThinkingConfig(
|
||||
tokenizer_name="NousResearch/DeepHermes-3-Llama-3-8B-Preview",
|
||||
group_size=16,
|
||||
|
|
@ -87,7 +87,7 @@ class BlackjackEnvNoThinking(BaseEnv):
|
|||
eval_episodes=100,
|
||||
)
|
||||
server_configs = [
|
||||
OpenaiConfig(
|
||||
APIServerConfig(
|
||||
model_name="NousResearch/DeepHermes-3-Llama-3-8B-Preview",
|
||||
base_url="http://localhost:9001/v1",
|
||||
api_key="x",
|
||||
|
|
|
|||
|
|
@ -21,10 +21,10 @@ import gymnasium
|
|||
from tqdm.asyncio import tqdm_asyncio
|
||||
|
||||
from atroposlib.envs.base import (
|
||||
APIServerConfig,
|
||||
BaseEnv,
|
||||
BaseEnvConfig,
|
||||
EvalHandlingEnum,
|
||||
OpenaiConfig,
|
||||
ScoredDataGroup,
|
||||
)
|
||||
from atroposlib.utils.best_of_n_selection import select_best_index
|
||||
|
|
@ -79,7 +79,7 @@ class BlackjackEnv(BaseEnv):
|
|||
def __init__(
|
||||
self,
|
||||
config: BlackjackEnvConfig,
|
||||
server_configs: List[OpenaiConfig],
|
||||
server_configs: List[APIServerConfig],
|
||||
slurm: bool = True,
|
||||
testing: bool = False,
|
||||
):
|
||||
|
|
@ -936,7 +936,7 @@ class BlackjackEnv(BaseEnv):
|
|||
await super().wandb_log(wandb_metrics)
|
||||
|
||||
@classmethod
|
||||
def config_init(cls) -> Tuple[BlackjackEnvConfig, List[OpenaiConfig]]:
|
||||
def config_init(cls) -> Tuple[BlackjackEnvConfig, List[APIServerConfig]]:
|
||||
env_config = BlackjackEnvConfig(
|
||||
tokenizer_name="NousResearch/DeepHermes-3-Llama-3-8B-Preview",
|
||||
group_size=16,
|
||||
|
|
@ -964,7 +964,7 @@ class BlackjackEnv(BaseEnv):
|
|||
tiebreak_token_factor=0.01,
|
||||
)
|
||||
server_configs = [
|
||||
OpenaiConfig(
|
||||
APIServerConfig(
|
||||
model_name="NousResearch/DeepHermes-3-Llama-3-8B-Preview",
|
||||
base_url="http://localhost:9004/v1",
|
||||
api_key="x",
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ import random
|
|||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from atroposlib.envs.base import EvalHandlingEnum, OpenaiConfig
|
||||
from atroposlib.envs.base import APIServerConfig, EvalHandlingEnum
|
||||
from environments.game_environments.gymnasium.blackjack.blackjack_env_thinking import (
|
||||
BlackjackEnv,
|
||||
BlackjackEnvConfig,
|
||||
|
|
@ -47,7 +47,7 @@ async def main():
|
|||
mc_samples=1,
|
||||
)
|
||||
server_configs = [
|
||||
OpenaiConfig(
|
||||
APIServerConfig(
|
||||
model_name="gpt-4.1-nano",
|
||||
base_url="https://api.openai.com/v1",
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ from typing import Optional
|
|||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from atroposlib.envs.base import EvalHandlingEnum, OpenaiConfig, ScoredDataItem
|
||||
from atroposlib.envs.base import APIServerConfig, EvalHandlingEnum, ScoredDataItem
|
||||
from environments.game_environments.gymnasium.blackjack.blackjack_env_no_thinking import (
|
||||
BlackjackEnvNoThinking,
|
||||
BlackjackEnvNoThinkingConfig,
|
||||
|
|
@ -41,7 +41,7 @@ async def main():
|
|||
eval_episodes=0,
|
||||
)
|
||||
server_configs = [
|
||||
OpenaiConfig(
|
||||
APIServerConfig(
|
||||
model_name="gpt-4.1-nano",
|
||||
base_url="https://api.openai.com/v1",
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue