Files
t6_mem0/evaluation/generate_scores.py
2025-05-06 01:16:02 +05:30

42 lines
900 B
Python

import json
import pandas as pd
# Load the evaluation metrics data
with open('evaluation_metrics.json', 'r') as f:
data = json.load(f)
# Flatten the data into a list of question items
all_items = []
for key in data:
all_items.extend(data[key])
# Convert to DataFrame
df = pd.DataFrame(all_items)
# Convert category to numeric type
df['category'] = pd.to_numeric(df['category'])
# Calculate mean scores by category
result = df.groupby('category').agg({
'bleu_score': 'mean',
'f1_score': 'mean',
'llm_score': 'mean'
}).round(4)
# Add count of questions per category
result['count'] = df.groupby('category').size()
# Print the results
print("Mean Scores Per Category:")
print(result)
# Calculate overall means
overall_means = df.agg({
'bleu_score': 'mean',
'f1_score': 'mean',
'llm_score': 'mean'
}).round(4)
print("\nOverall Mean Scores:")
print(overall_means)