Docs Update (#2591)
This commit is contained in:
41
evaluation/generate_scores.py
Normal file
41
evaluation/generate_scores.py
Normal file
@@ -0,0 +1,41 @@
|
||||
import pandas as pd
|
||||
import json
|
||||
|
||||
# Load the evaluation metrics data
|
||||
with open('evaluation_metrics.json', 'r') as f:
|
||||
data = json.load(f)
|
||||
|
||||
# Flatten the data into a list of question items
|
||||
all_items = []
|
||||
for key in data:
|
||||
all_items.extend(data[key])
|
||||
|
||||
# Convert to DataFrame
|
||||
df = pd.DataFrame(all_items)
|
||||
|
||||
# Convert category to numeric type
|
||||
df['category'] = pd.to_numeric(df['category'])
|
||||
|
||||
# Calculate mean scores by category
|
||||
result = df.groupby('category').agg({
|
||||
'bleu_score': 'mean',
|
||||
'f1_score': 'mean',
|
||||
'llm_score': 'mean'
|
||||
}).round(4)
|
||||
|
||||
# Add count of questions per category
|
||||
result['count'] = df.groupby('category').size()
|
||||
|
||||
# Print the results
|
||||
print("Mean Scores Per Category:")
|
||||
print(result)
|
||||
|
||||
# Calculate overall means
|
||||
overall_means = df.agg({
|
||||
'bleu_score': 'mean',
|
||||
'f1_score': 'mean',
|
||||
'llm_score': 'mean'
|
||||
}).round(4)
|
||||
|
||||
print("\nOverall Mean Scores:")
|
||||
print(overall_means)
|
||||
Reference in New Issue
Block a user