chore: Rename generate_humor_dataset.py to hack0/generate_humor_dataset.py

This commit is contained in:
Kirill Igumenshchev 2025-05-18 17:27:58 -07:00 committed by Kirill Igumenshchev (aider)
parent 03902ccd45
commit 7792b10e82

View file

@ -0,0 +1,54 @@
import os
import json
from dotenv import load_dotenv
from openai import OpenAI
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def main():
load_dotenv()
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
comedians = [
"Norm Macdonald",
"John Mulaney",
"Hasan Minhaj",
"Dave Chappelle",
"Ali Wong",
"Chris Rock",
]
formats = [
"haiku",
"one-liner",
"q/a over sms",
]
output_file = "humor_dataset.jsonl"
model_name = "gpt-4o-mini"
logger.info(f"Generating humor dataset to {output_file} using model {model_name}")
with open(output_file, "w", encoding="utf-8") as fout:
for comedian in comedians:
for fmt in formats:
question = (
f"Whats the best local LLM model to generate {fmt} jokes "
f"in the style of {comedian}? Please explain your reasoning step by step."
)
response = client.chat.completions.create(
model=model_name,
messages=[{"role": "user", "content": question}],
)
answer = response.choices[0].message.content.strip()
record = {
"comedian": comedian,
"format": fmt,
"question": question,
"response": answer,
}
fout.write(json.dumps(record, ensure_ascii=False) + "\n")
logger.info(f"Wrote record: comedian={comedian}, format={fmt}")
if __name__ == "__main__":
main()