diff --git a/modules/evaluators/llm_annotation_evaluator.py b/modules/evaluators/llm_annotation_evaluator.py index 70fcbcb691734e0c00f01775dc45012e6b46a475..689d5d7996c6c26fb931aba756331381eccb3e45 100644 --- a/modules/evaluators/llm_annotation_evaluator.py +++ b/modules/evaluators/llm_annotation_evaluator.py @@ -1,60 +1,56 @@ import json import importlib.util +import pandas as pd spec = importlib.util.spec_from_file_location("recall_precision_f1_f2", "./recall_precision_f1_f2.py") -#spec = importlib.util.spec_from_file_location("recall_precision_f1_f2", "../../recall_precision_f1_f2.py") recall_precision_f1_f2 = importlib.util.module_from_spec(spec) spec.loader.exec_module(recall_precision_f1_f2) -#data_path = "../../results/LLM/GPT-4/GPT-4_zero_shot_cleaned.json" -#result_path = "../../results/LLM/GPT-4/GPT-4_zero_shot_results.json" +path = "../../results/LLM/GPT-4/GPT-4_zero_shot_" -data_path = "../../results/LLM/GPT-4/GPT-4_zero_shot_v2_cleaned.json" -result_path = "../../results/LLM/GPT-4/GPT-4_zero_shot_v2_results.json" +#path = "../../results/LLM/GPT-4/GPT-4_zero_shot_v2_" -#data_path = "../../results/LLM/Mixtral-8x7b/MIXTRAL_zero_shot_cleaned.json" -#result_path="../../results/LLM/Mixtral-8x7b/MIXTRAL_zero_shot_results.json" +#path="../../results/LLM/Mixtral-8x7b/MIXTRAL_zero_shot_" -#data_path = "../../results/LLM/Mistral-7b/MISTRAL_zero_shot_cleaned.json" -#result_path="../../results/LLM/Mistral-7b/MISTRAL_zero_shot_results.json" +#path="../../results/LLM/Mistral-7b/MISTRAL_zero_shot_" -#data_path = "../../results/LLM/GPT-4/GPT-4_few_shot_answers.json" -#result_path = "../../results/LLM/GPT-4/GPT-4_few_shot_results.json" +#path = "../../results/LLM/GPT-4/GPT-4_few_shot_" -#data_path = "../../results/LLM/Mixtral-8x7b/MIXTRAL_few_shot_cleaned.json" -#result_path="../../results/LLM/Mixtral-8x7b/MIXTRAL_few_shot_results.json" +#path="../../results/LLM/Mixtral-8x7b/MIXTRAL_few_shot_" -#data_path = "../../results/LLM/Mistral-7b/MISTRAL_few_shot_cleaned.json" -#result_path="../../results/LLM/Mistral-7b/MISTRAL_few_shot_results.json" +#path="../../results/LLM/Mistral-7b/MISTRAL_few_shot_" -#data_path = "../../results/LLM/Mistral-7b/MISTRAL_fine_tuned_cleaned.json" -#result_path="../../results/LLM/Mistral-7b/MISTRAL_fine_tuned_results.json" +#path="../../results/LLM/Mistral-7b/MISTRAL_fine_tuned_" -#data_path = "../../results/LLM/Mixtral-8x7b/MIXTRAL_fine_tuned_cleaned.json" -#result_path="../../results/LLM/Mixtral-8x7b/MIXTRAL_fine_tuned_results.json" +#path="../../results/LLM/Mixtral-8x7b/MIXTRAL_fine_tuned_" -#data_path = "../../results/LLM/Miqu-1-70b/MIQU_zero_shot_cleaned.json" -#result_path="../../results/LLM/Miqu-1-70b/MIQU_zero_shot_results.json" +#path="../../results/LLM/Miqu-1-70b/MIQU_zero_shot_" with open('../../data/evalQS.json', 'r') as fichier: - eval_data = json.load(fichier) + eval_data_temp = json.load(fichier) -for sentence, classes in eval_data.items(): +eval_data = {} +for sentence, classes in eval_data_temp.items(): + eval_data[sentence] = {} for tag, values in classes.items(): + eval_data[sentence][tag] = [] for value in values: - eval_data[sentence][tag].remove(value) - eval_data[sentence][tag].append(value.replace(" ", "")) + eval_data[sentence][tag].append(value.replace(" ", "").lower()) -with open(data_path, 'r') as fichier: - llm_data = json.load(fichier) +with open(path+"cleaned.json", 'r') as fichier: + llm_data_temp = json.load(fichier) -for sentence, classes in llm_data.items(): +llm_data = {} +for sentence, classes in llm_data_temp.items(): + llm_data[sentence] = {} for tag, values in classes.items(): + llm_data[sentence][tag] = [] for value in values: - llm_data[sentence][tag].remove(value) - llm_data[sentence][tag].append(value.replace(" ", "")) + llm_data[sentence][tag].append(value.replace(" ", "").lower()) + +del eval_data_temp, llm_data_temp output = {'global': {}, 'break_down': {}} @@ -72,7 +68,9 @@ output['break_down'] = {element: { } for element in ['action', 'actor', 'artifact', 'condition', 'location', 'modality', 'reference', 'time']} -total = 0 +columns = ['status', 'concept', 'eval_annot', 'llm_annot', 'sentence'] +df = pd.DataFrame(columns=columns) + for sentence, classes in eval_data.items(): for tag, values in classes.items(): @@ -95,6 +93,13 @@ for sentence, classes in eval_data.items(): if value in llm_values: actual_perfect_equals += 1 output['break_down'][tag]['perfect_equals'] += 1 + df = df._append({ + 'status': 'perfect_equals', + 'eval_annot': value, + 'llm_annot': value, + 'sentence': sentence, + 'concept': tag + }, ignore_index=True) llm_values.remove(value) continue @@ -104,11 +109,26 @@ for sentence, classes in eval_data.items(): if value in eval_sentence or eval_sentence in value: actual_subpart += 1 output['break_down'][tag]['subpart'] += 1 + df = df._append({ + 'status': 'subpart', + 'eval_annot': eval_sentence, + 'llm_annot': value, + 'sentence': sentence, + 'concept': tag + }, ignore_index=True) llm_values.remove(eval_sentence) break # Out of scope output['break_down'][tag]['out_of_scope'] += len(llm_values) + for value in llm_values: + df = df._append({ + 'status': 'out_of_scope', + 'eval_annot': '', + 'llm_annot': value, + 'sentence': sentence, + 'concept': tag + }, ignore_index=True) # Annotation not covered output['break_down'][tag]['not_covered'] += (len(values) - (actual_perfect_equals + actual_subpart)) @@ -132,5 +152,7 @@ for tag, values in output['break_down'].items(): print(fn, tp, fp) output['global'] = recall_precision_f1_f2.recall_precision_f1_f2(fn, tp , fp) -with open(result_path, 'w', encoding='utf-8') as file: +with open(path+"results.json", 'w', encoding='utf-8') as file: json.dump(output, file) + +df.to_csv(path+'full_results.csv', index=False) \ No newline at end of file