mirror of
https://github.com/lilakk/BLEUBERI.git
synced 2026-04-19 12:58:12 +00:00
220 lines
7.1 KiB
Python
220 lines
7.1 KiB
Python
import argparse
|
|
import code
|
|
import datetime
|
|
import json
|
|
import os
|
|
from pytz import timezone
|
|
import time
|
|
|
|
import pandas as pd # pandas>=2.0.3
|
|
import plotly.express as px
|
|
import plotly.graph_objects as go
|
|
from tqdm import tqdm
|
|
|
|
|
|
NUM_SERVERS = 14
|
|
LOG_ROOT_DIR = "~/fastchat_logs"
|
|
|
|
|
|
def get_log_files(max_num_files=None):
|
|
log_root = os.path.expanduser(LOG_ROOT_DIR)
|
|
filenames = []
|
|
for i in range(NUM_SERVERS):
|
|
for filename in os.listdir(f"{log_root}/server{i}"):
|
|
if filename.endswith("-conv.json"):
|
|
filepath = f"{log_root}/server{i}/{filename}"
|
|
name_tstamp_tuple = (filepath, os.path.getmtime(filepath))
|
|
filenames.append(name_tstamp_tuple)
|
|
# sort by tstamp
|
|
filenames = sorted(filenames, key=lambda x: x[1])
|
|
filenames = [x[0] for x in filenames]
|
|
|
|
max_num_files = max_num_files or len(filenames)
|
|
filenames = filenames[-max_num_files:]
|
|
return filenames
|
|
|
|
|
|
def load_log_files(filename):
|
|
data = []
|
|
for retry in range(5):
|
|
try:
|
|
lines = open(filename).readlines()
|
|
break
|
|
except FileNotFoundError:
|
|
time.sleep(2)
|
|
|
|
for l in lines:
|
|
row = json.loads(l)
|
|
data.append(
|
|
dict(
|
|
type=row["type"],
|
|
tstamp=row["tstamp"],
|
|
model=row.get("model", ""),
|
|
models=row.get("models", ["", ""]),
|
|
)
|
|
)
|
|
return data
|
|
|
|
|
|
def load_log_files_parallel(log_files, num_threads=16):
|
|
data_all = []
|
|
from multiprocessing import Pool
|
|
|
|
with Pool(num_threads) as p:
|
|
ret_all = list(tqdm(p.imap(load_log_files, log_files), total=len(log_files)))
|
|
for ret in ret_all:
|
|
data_all.extend(ret)
|
|
return data_all
|
|
|
|
|
|
def get_anony_vote_df(df):
|
|
anony_vote_df = df[
|
|
df["type"].isin(["leftvote", "rightvote", "tievote", "bothbad_vote"])
|
|
]
|
|
anony_vote_df = anony_vote_df[anony_vote_df["models"].apply(lambda x: x[0] == "")]
|
|
return anony_vote_df
|
|
|
|
|
|
def merge_counts(series, on, names):
|
|
ret = pd.merge(series[0], series[1], on=on)
|
|
for i in range(2, len(series)):
|
|
ret = pd.merge(ret, series[i], on=on)
|
|
ret = ret.reset_index()
|
|
old_names = list(ret.columns)[-len(series) :]
|
|
rename = {old_name: new_name for old_name, new_name in zip(old_names, names)}
|
|
ret = ret.rename(columns=rename)
|
|
return ret
|
|
|
|
|
|
def report_basic_stats(log_files):
|
|
df_all = load_log_files_parallel(log_files)
|
|
df_all = pd.DataFrame(df_all)
|
|
now_t = df_all["tstamp"].max()
|
|
df_1_hour = df_all[df_all["tstamp"] > (now_t - 3600)]
|
|
df_1_day = df_all[df_all["tstamp"] > (now_t - 3600 * 24)]
|
|
anony_vote_df_all = get_anony_vote_df(df_all)
|
|
|
|
# Chat trends
|
|
chat_dates = [
|
|
datetime.datetime.fromtimestamp(x, tz=timezone("US/Pacific")).strftime(
|
|
"%Y-%m-%d"
|
|
)
|
|
for x in df_all[df_all["type"] == "chat"]["tstamp"]
|
|
]
|
|
chat_dates_counts = pd.value_counts(chat_dates)
|
|
vote_dates = [
|
|
datetime.datetime.fromtimestamp(x, tz=timezone("US/Pacific")).strftime(
|
|
"%Y-%m-%d"
|
|
)
|
|
for x in anony_vote_df_all["tstamp"]
|
|
]
|
|
vote_dates_counts = pd.value_counts(vote_dates)
|
|
chat_dates_bar = go.Figure(
|
|
data=[
|
|
go.Bar(
|
|
name="Anony. Vote",
|
|
x=vote_dates_counts.index,
|
|
y=vote_dates_counts,
|
|
text=[f"{val:.0f}" for val in vote_dates_counts],
|
|
textposition="auto",
|
|
),
|
|
go.Bar(
|
|
name="Chat",
|
|
x=chat_dates_counts.index,
|
|
y=chat_dates_counts,
|
|
text=[f"{val:.0f}" for val in chat_dates_counts],
|
|
textposition="auto",
|
|
),
|
|
]
|
|
)
|
|
chat_dates_bar.update_layout(
|
|
barmode="stack",
|
|
xaxis_title="Dates",
|
|
yaxis_title="Count",
|
|
height=300,
|
|
width=1200,
|
|
)
|
|
|
|
# Model call counts
|
|
model_hist_all = df_all[df_all["type"] == "chat"]["model"].value_counts()
|
|
model_hist_1_day = df_1_day[df_1_day["type"] == "chat"]["model"].value_counts()
|
|
model_hist_1_hour = df_1_hour[df_1_hour["type"] == "chat"]["model"].value_counts()
|
|
model_hist = merge_counts(
|
|
[model_hist_all, model_hist_1_day, model_hist_1_hour],
|
|
on="model",
|
|
names=["All", "Last Day", "Last Hour"],
|
|
)
|
|
model_hist_md = model_hist.to_markdown(index=False, tablefmt="github")
|
|
|
|
# Action counts
|
|
action_hist_all = df_all["type"].value_counts()
|
|
action_hist_1_day = df_1_day["type"].value_counts()
|
|
action_hist_1_hour = df_1_hour["type"].value_counts()
|
|
action_hist = merge_counts(
|
|
[action_hist_all, action_hist_1_day, action_hist_1_hour],
|
|
on="type",
|
|
names=["All", "Last Day", "Last Hour"],
|
|
)
|
|
action_hist_md = action_hist.to_markdown(index=False, tablefmt="github")
|
|
|
|
# Anony vote counts
|
|
anony_vote_hist_all = anony_vote_df_all["type"].value_counts()
|
|
anony_vote_df_1_day = get_anony_vote_df(df_1_day)
|
|
anony_vote_hist_1_day = anony_vote_df_1_day["type"].value_counts()
|
|
# anony_vote_df_1_hour = get_anony_vote_df(df_1_hour)
|
|
# anony_vote_hist_1_hour = anony_vote_df_1_hour["type"].value_counts()
|
|
anony_vote_hist = merge_counts(
|
|
[anony_vote_hist_all, anony_vote_hist_1_day],
|
|
on="type",
|
|
names=["All", "Last Day"],
|
|
)
|
|
anony_vote_hist_md = anony_vote_hist.to_markdown(index=False, tablefmt="github")
|
|
|
|
# Last 24 hours
|
|
chat_1_day = df_1_day[df_1_day["type"] == "chat"]
|
|
num_chats_last_24_hours = []
|
|
base = df_1_day["tstamp"].min()
|
|
for i in range(24, 0, -1):
|
|
left = base + (i - 1) * 3600
|
|
right = base + i * 3600
|
|
num = ((chat_1_day["tstamp"] >= left) & (chat_1_day["tstamp"] < right)).sum()
|
|
num_chats_last_24_hours.append(num)
|
|
times = [
|
|
datetime.datetime.fromtimestamp(
|
|
base + i * 3600, tz=timezone("US/Pacific")
|
|
).strftime("%Y-%m-%d %H:%M:%S %Z")
|
|
for i in range(24, 0, -1)
|
|
]
|
|
last_24_hours_df = pd.DataFrame({"time": times, "value": num_chats_last_24_hours})
|
|
last_24_hours_md = last_24_hours_df.to_markdown(index=False, tablefmt="github")
|
|
|
|
# Last update datetime
|
|
last_updated_tstamp = now_t
|
|
last_updated_datetime = datetime.datetime.fromtimestamp(
|
|
last_updated_tstamp, tz=timezone("US/Pacific")
|
|
).strftime("%Y-%m-%d %H:%M:%S %Z")
|
|
|
|
# code.interact(local=locals())
|
|
|
|
return {
|
|
"chat_dates_bar": chat_dates_bar,
|
|
"model_hist_md": model_hist_md,
|
|
"action_hist_md": action_hist_md,
|
|
"anony_vote_hist_md": anony_vote_hist_md,
|
|
"num_chats_last_24_hours": last_24_hours_md,
|
|
"last_updated_datetime": last_updated_datetime,
|
|
}
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--max-num-files", type=int)
|
|
args = parser.parse_args()
|
|
|
|
log_files = get_log_files(args.max_num_files)
|
|
basic_stats = report_basic_stats(log_files)
|
|
|
|
print(basic_stats["action_hist_md"] + "\n")
|
|
print(basic_stats["model_hist_md"] + "\n")
|
|
print(basic_stats["anony_vote_hist_md"] + "\n")
|
|
print(basic_stats["num_chats_last_24_hours"] + "\n")
|