linting and moved to community

This commit is contained in:
Shannon Sands 2025-05-27 16:04:50 +10:00
parent 8df34efc56
commit a6ac7a3e42
46 changed files with 245 additions and 2314 deletions

View file

@ -1,57 +0,0 @@
import os
import json
from dotenv import load_dotenv
from openai import OpenAI
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def main():
load_dotenv()
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
comedians = [
"Norm Macdonald",
"John Mulaney",
"Hasan Minhaj",
"Dave Chappelle",
"Ali Wong",
"Chris Rock",
]
formats = [
"haiku",
"one-liner",
"q/a over sms",
]
output_file = "humor_dataset.jsonl"
model_name = "gpt-4o-mini"
logger.info(f"Generating humor dataset to {output_file} using model {model_name}")
with open(output_file, "w", encoding="utf-8") as fout:
for comedian in comedians:
for fmt in formats:
question = (
f"Whats the best local LLM model to generate {fmt} jokes "
f"in the style of {comedian}? Please explain your reasoning step by step, and generate 3 example jokes."
)
response = client.chat.completions.create(
model=model_name,
messages=[{"role": "user", "content": question}],
)
answer = response.choices[0].message.content.strip()
record = {
"comedian": comedian,
"format": fmt,
"question": question,
"response": answer,
}
fout.write(json.dumps(record, ensure_ascii=False) + "\n")
logger.info(f"Wrote record: comedian={comedian}, format={fmt}")
# Verify dataset count
count = sum(1 for _ in open(output_file, encoding="utf-8"))
logger.info(f"Dataset {output_file} contains {count} records")
if __name__ == "__main__":
main()