Skip to content
Snippets Groups Projects
Commit 029e81d0 authored by Julien Breton's avatar Julien Breton
Browse files

annotation evaluator details

parent c001e398
No related branches found
No related tags found
No related merge requests found
import json import json
import importlib.util import importlib.util
import pandas as pd
spec = importlib.util.spec_from_file_location("recall_precision_f1_f2", "./recall_precision_f1_f2.py") spec = importlib.util.spec_from_file_location("recall_precision_f1_f2", "./recall_precision_f1_f2.py")
#spec = importlib.util.spec_from_file_location("recall_precision_f1_f2", "../../recall_precision_f1_f2.py")
recall_precision_f1_f2 = importlib.util.module_from_spec(spec) recall_precision_f1_f2 = importlib.util.module_from_spec(spec)
spec.loader.exec_module(recall_precision_f1_f2) spec.loader.exec_module(recall_precision_f1_f2)
#data_path = "../../results/LLM/GPT-4/GPT-4_zero_shot_cleaned.json" path = "../../results/LLM/GPT-4/GPT-4_zero_shot_"
#result_path = "../../results/LLM/GPT-4/GPT-4_zero_shot_results.json"
data_path = "../../results/LLM/GPT-4/GPT-4_zero_shot_v2_cleaned.json" #path = "../../results/LLM/GPT-4/GPT-4_zero_shot_v2_"
result_path = "../../results/LLM/GPT-4/GPT-4_zero_shot_v2_results.json"
#data_path = "../../results/LLM/Mixtral-8x7b/MIXTRAL_zero_shot_cleaned.json" #path="../../results/LLM/Mixtral-8x7b/MIXTRAL_zero_shot_"
#result_path="../../results/LLM/Mixtral-8x7b/MIXTRAL_zero_shot_results.json"
#data_path = "../../results/LLM/Mistral-7b/MISTRAL_zero_shot_cleaned.json" #path="../../results/LLM/Mistral-7b/MISTRAL_zero_shot_"
#result_path="../../results/LLM/Mistral-7b/MISTRAL_zero_shot_results.json"
#data_path = "../../results/LLM/GPT-4/GPT-4_few_shot_answers.json" #path = "../../results/LLM/GPT-4/GPT-4_few_shot_"
#result_path = "../../results/LLM/GPT-4/GPT-4_few_shot_results.json"
#data_path = "../../results/LLM/Mixtral-8x7b/MIXTRAL_few_shot_cleaned.json" #path="../../results/LLM/Mixtral-8x7b/MIXTRAL_few_shot_"
#result_path="../../results/LLM/Mixtral-8x7b/MIXTRAL_few_shot_results.json"
#data_path = "../../results/LLM/Mistral-7b/MISTRAL_few_shot_cleaned.json" #path="../../results/LLM/Mistral-7b/MISTRAL_few_shot_"
#result_path="../../results/LLM/Mistral-7b/MISTRAL_few_shot_results.json"
#data_path = "../../results/LLM/Mistral-7b/MISTRAL_fine_tuned_cleaned.json" #path="../../results/LLM/Mistral-7b/MISTRAL_fine_tuned_"
#result_path="../../results/LLM/Mistral-7b/MISTRAL_fine_tuned_results.json"
#data_path = "../../results/LLM/Mixtral-8x7b/MIXTRAL_fine_tuned_cleaned.json" #path="../../results/LLM/Mixtral-8x7b/MIXTRAL_fine_tuned_"
#result_path="../../results/LLM/Mixtral-8x7b/MIXTRAL_fine_tuned_results.json"
#data_path = "../../results/LLM/Miqu-1-70b/MIQU_zero_shot_cleaned.json" #path="../../results/LLM/Miqu-1-70b/MIQU_zero_shot_"
#result_path="../../results/LLM/Miqu-1-70b/MIQU_zero_shot_results.json"
with open('../../data/evalQS.json', 'r') as fichier: with open('../../data/evalQS.json', 'r') as fichier:
eval_data = json.load(fichier) eval_data_temp = json.load(fichier)
for sentence, classes in eval_data.items(): eval_data = {}
for sentence, classes in eval_data_temp.items():
eval_data[sentence] = {}
for tag, values in classes.items(): for tag, values in classes.items():
eval_data[sentence][tag] = []
for value in values: for value in values:
eval_data[sentence][tag].remove(value) eval_data[sentence][tag].append(value.replace(" ", "").lower())
eval_data[sentence][tag].append(value.replace(" ", ""))
with open(data_path, 'r') as fichier: with open(path+"cleaned.json", 'r') as fichier:
llm_data = json.load(fichier) llm_data_temp = json.load(fichier)
for sentence, classes in llm_data.items(): llm_data = {}
for sentence, classes in llm_data_temp.items():
llm_data[sentence] = {}
for tag, values in classes.items(): for tag, values in classes.items():
llm_data[sentence][tag] = []
for value in values: for value in values:
llm_data[sentence][tag].remove(value) llm_data[sentence][tag].append(value.replace(" ", "").lower())
llm_data[sentence][tag].append(value.replace(" ", ""))
del eval_data_temp, llm_data_temp
output = {'global': {}, 'break_down': {}} output = {'global': {}, 'break_down': {}}
...@@ -72,7 +68,9 @@ output['break_down'] = {element: { ...@@ -72,7 +68,9 @@ output['break_down'] = {element: {
} for element in } for element in
['action', 'actor', 'artifact', 'condition', 'location', 'modality', 'reference', 'time']} ['action', 'actor', 'artifact', 'condition', 'location', 'modality', 'reference', 'time']}
total = 0 columns = ['status', 'concept', 'eval_annot', 'llm_annot', 'sentence']
df = pd.DataFrame(columns=columns)
for sentence, classes in eval_data.items(): for sentence, classes in eval_data.items():
for tag, values in classes.items(): for tag, values in classes.items():
...@@ -95,6 +93,13 @@ for sentence, classes in eval_data.items(): ...@@ -95,6 +93,13 @@ for sentence, classes in eval_data.items():
if value in llm_values: if value in llm_values:
actual_perfect_equals += 1 actual_perfect_equals += 1
output['break_down'][tag]['perfect_equals'] += 1 output['break_down'][tag]['perfect_equals'] += 1
df = df._append({
'status': 'perfect_equals',
'eval_annot': value,
'llm_annot': value,
'sentence': sentence,
'concept': tag
}, ignore_index=True)
llm_values.remove(value) llm_values.remove(value)
continue continue
...@@ -104,11 +109,26 @@ for sentence, classes in eval_data.items(): ...@@ -104,11 +109,26 @@ for sentence, classes in eval_data.items():
if value in eval_sentence or eval_sentence in value: if value in eval_sentence or eval_sentence in value:
actual_subpart += 1 actual_subpart += 1
output['break_down'][tag]['subpart'] += 1 output['break_down'][tag]['subpart'] += 1
df = df._append({
'status': 'subpart',
'eval_annot': eval_sentence,
'llm_annot': value,
'sentence': sentence,
'concept': tag
}, ignore_index=True)
llm_values.remove(eval_sentence) llm_values.remove(eval_sentence)
break break
# Out of scope # Out of scope
output['break_down'][tag]['out_of_scope'] += len(llm_values) output['break_down'][tag]['out_of_scope'] += len(llm_values)
for value in llm_values:
df = df._append({
'status': 'out_of_scope',
'eval_annot': '',
'llm_annot': value,
'sentence': sentence,
'concept': tag
}, ignore_index=True)
# Annotation not covered # Annotation not covered
output['break_down'][tag]['not_covered'] += (len(values) - (actual_perfect_equals + actual_subpart)) output['break_down'][tag]['not_covered'] += (len(values) - (actual_perfect_equals + actual_subpart))
...@@ -132,5 +152,7 @@ for tag, values in output['break_down'].items(): ...@@ -132,5 +152,7 @@ for tag, values in output['break_down'].items():
print(fn, tp, fp) print(fn, tp, fp)
output['global'] = recall_precision_f1_f2.recall_precision_f1_f2(fn, tp , fp) output['global'] = recall_precision_f1_f2.recall_precision_f1_f2(fn, tp , fp)
with open(result_path, 'w', encoding='utf-8') as file: with open(path+"results.json", 'w', encoding='utf-8') as file:
json.dump(output, file) json.dump(output, file)
df.to_csv(path+'full_results.csv', index=False)
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment