mirror of
https://github.com/NousResearch/atropos.git
synced 2026-04-19 12:57:58 +00:00
make evals also use managed
This commit is contained in:
parent
1ccf3b54e3
commit
73e8ee2475
1 changed files with 12 additions and 11 deletions
|
|
@ -122,18 +122,19 @@ class GSM8kEnv(BaseEnv):
|
|||
|
||||
async def rollout_and_score_eval(self, question: str, answer: str) -> dict:
|
||||
"""Rollout and score evaluation with detailed sample data collection."""
|
||||
completion = await self.server.chat_completion(
|
||||
messages=[
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": question},
|
||||
],
|
||||
n=1,
|
||||
max_tokens=self.config.max_token_length,
|
||||
temperature=0.0,
|
||||
split="eval",
|
||||
)
|
||||
|
||||
async with self.server.managed_server(tokenizer=self.tokenizer) as managed:
|
||||
completion = await managed.chat_completion(
|
||||
messages=[
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": question},
|
||||
],
|
||||
n=1,
|
||||
max_tokens=self.config.max_token_length,
|
||||
temperature=0.0,
|
||||
)
|
||||
|
||||
response_content = completion.choices[0].message.content
|
||||
response_content = completion.choices[0].message.content
|
||||
|
||||
# Parse gold answer
|
||||
gold_parsed = parse(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue