From 85322ff0cd1c0a1778c58e5808f924d20b153108 Mon Sep 17 00:00:00 2001
From: Julien Breton <julien.breton@moncitron.fr>
Date: Sun, 4 Feb 2024 14:46:28 +0900
Subject: [PATCH] change hyper parameters + update results

---
 modules/evaluators/llm_annotation_evaluator.py        | 11 +++++++----
 results/LLM/GPT-4/GPT-4_few_shot_results.json         |  2 +-
 results/LLM/Mistral-7b/MISTRAL_few_shot_results.json  |  2 +-
 .../LLM/Mistral-7b/MISTRAL_fine_tuned_results.json    |  2 +-
 results/LLM/Mistral-7b/MISTRAL_zero_shot_results.json |  2 +-
 .../LLM/Mixtral-8x7b/MIXTRAL_few_shot_results.json    |  2 +-
 .../LLM/Mixtral-8x7b/MIXTRAL_fine_tuned_results.json  |  2 +-
 .../LLM/Mixtral-8x7b/MIXTRAL_zero_shot_results.json   |  2 +-
 8 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/modules/evaluators/llm_annotation_evaluator.py b/modules/evaluators/llm_annotation_evaluator.py
index 4c4e733..da139e2 100644
--- a/modules/evaluators/llm_annotation_evaluator.py
+++ b/modules/evaluators/llm_annotation_evaluator.py
@@ -7,8 +7,8 @@ recall_precision_f1_f2 = importlib.util.module_from_spec(spec)
 spec.loader.exec_module(recall_precision_f1_f2)
 
 
-data_path = "../../results/LLM/GPT-4/GPT-4_zero_shot_answers.json"
-result_path = "../../results/LLM/GPT-4/GPT-4_zero_shot_results.json"
+#data_path = "../../results/LLM/GPT-4/GPT-4_zero_shot_answers.json"
+#result_path = "../../results/LLM/GPT-4/GPT-4_zero_shot_results.json"
 
 #data_path = "../../results/LLM/Mixtral-8x7b/MIXTRAL_zero_shot_cleaned.json"
 #result_path="../../results/LLM/Mixtral-8x7b/MIXTRAL_zero_shot_results.json"
@@ -28,8 +28,8 @@ result_path = "../../results/LLM/GPT-4/GPT-4_zero_shot_results.json"
 #data_path = "../../results/LLM/Mistral-7b/MISTRAL_fine_tuned_cleaned.json"
 #result_path="../../results/LLM/Mistral-7b/MISTRAL_fine_tuned_results.json"
 
-#data_path = "../../results/LLM/Mixtral-8x7b/MIXTRAL_fine_tuned_cleaned.json"
-#result_path="../../results/LLM/Mixtral-8x7b/MIXTRAL_fine_tuned_results.json"
+data_path = "../../results/LLM/Mixtral-8x7b/MIXTRAL_fine_tuned_cleaned.json"
+result_path="../../results/LLM/Mixtral-8x7b/MIXTRAL_fine_tuned_results.json"
 
 
 with open('../../data/evalQS.json', 'r') as fichier:
@@ -76,6 +76,9 @@ for sentence, classes in eval_data.items():
         if tag not in output['break_down']:
             continue
 
+        if sentence not in llm_data:
+            continue
+
         if tag in llm_data[sentence]:
             llm_values = llm_data[sentence][tag]
         else:
diff --git a/results/LLM/GPT-4/GPT-4_few_shot_results.json b/results/LLM/GPT-4/GPT-4_few_shot_results.json
index 55f9299..7cae302 100644
--- a/results/LLM/GPT-4/GPT-4_few_shot_results.json
+++ b/results/LLM/GPT-4/GPT-4_few_shot_results.json
@@ -1 +1 @@
-{"global": {"recall": 0.20118845500848898, "precision": 0.2453416149068323, "f1": 0.2210820895522388, "f2": 0.20870024656569217}, "break_down": {"action": {"perfect_equals": 2, "subpart": 279, "miss_classification": 0, "hallucination": 3}, "actor": {"perfect_equals": 69, "subpart": 70, "miss_classification": 3, "hallucination": 31}, "artifact": {"perfect_equals": 54, "subpart": 161, "miss_classification": 2, "hallucination": 35}, "condition": {"perfect_equals": 36, "subpart": 105, "miss_classification": 2, "hallucination": 13}, "definition": {"perfect_equals": 0, "subpart": 0, "miss_classification": 3, "hallucination": 22}, "location": {"perfect_equals": 4, "subpart": 17, "miss_classification": 0, "hallucination": 16}, "modality": {"perfect_equals": 46, "subpart": 20, "miss_classification": 4, "hallucination": 20}, "reference": {"perfect_equals": 12, "subpart": 43, "miss_classification": 0, "hallucination": 8}, "time": {"perfect_equals": 14, "subpart": 35, "miss_classification": 0, "hallucination": 4}}}
\ No newline at end of file
+{"global": {"recall": 0.5197095435684648, "precision": 0.5180972078593589, "f1": 0.518902123252201, "f2": 0.5193862741032553}, "break_down": {"action": {"perfect_equals": 1, "subpart": 98, "not_covered": 57, "out_of_scope": 182}, "actor": {"perfect_equals": 48, "subpart": 28, "not_covered": 57, "out_of_scope": 63}, "artifact": {"perfect_equals": 38, "subpart": 67, "not_covered": 136, "out_of_scope": 110}, "condition": {"perfect_equals": 29, "subpart": 42, "not_covered": 108, "out_of_scope": 70}, "definition": {"perfect_equals": 0, "subpart": 0, "not_covered": 0, "out_of_scope": 0}, "location": {"perfect_equals": 4, "subpart": 11, "not_covered": 19, "out_of_scope": 6}, "modality": {"perfect_equals": 44, "subpart": 18, "not_covered": 19, "out_of_scope": 4}, "reference": {"perfect_equals": 12, "subpart": 27, "not_covered": 38, "out_of_scope": 16}, "time": {"perfect_equals": 11, "subpart": 23, "not_covered": 29, "out_of_scope": 15}}}
\ No newline at end of file
diff --git a/results/LLM/Mistral-7b/MISTRAL_few_shot_results.json b/results/LLM/Mistral-7b/MISTRAL_few_shot_results.json
index b2641d5..f3fa84f 100644
--- a/results/LLM/Mistral-7b/MISTRAL_few_shot_results.json
+++ b/results/LLM/Mistral-7b/MISTRAL_few_shot_results.json
@@ -1 +1 @@
-{"global": {"recall": 0.011035653650254669, "precision": 0.1015625, "f1": 0.019908116385911178, "f2": 0.013429752066115703}, "break_down": {"action": {"perfect_equals": 0, "subpart": 46, "miss_classification": 0, "hallucination": 1}, "actor": {"perfect_equals": 5, "subpart": 11, "miss_classification": 1, "hallucination": 12}, "artifact": {"perfect_equals": 2, "subpart": 12, "miss_classification": 0, "hallucination": 4}, "condition": {"perfect_equals": 2, "subpart": 38, "miss_classification": 0, "hallucination": 23}, "definition": {"perfect_equals": 0, "subpart": 0, "miss_classification": 0, "hallucination": 1}, "location": {"perfect_equals": 1, "subpart": 2, "miss_classification": 0, "hallucination": 2}, "modality": {"perfect_equals": 1, "subpart": 4, "miss_classification": 0, "hallucination": 2}, "reference": {"perfect_equals": 1, "subpart": 2, "miss_classification": 0, "hallucination": 3}, "time": {"perfect_equals": 1, "subpart": 0, "miss_classification": 0, "hallucination": 10}}}
\ No newline at end of file
+{"global": {"recall": 0.07261410788381743, "precision": 0.546875, "f1": 0.12820512820512822, "f2": 0.08785140562248996}, "break_down": {"action": {"perfect_equals": 1, "subpart": 24, "not_covered": 131, "out_of_scope": 21}, "actor": {"perfect_equals": 7, "subpart": 5, "not_covered": 121, "out_of_scope": 4}, "artifact": {"perfect_equals": 1, "subpart": 6, "not_covered": 234, "out_of_scope": 7}, "condition": {"perfect_equals": 3, "subpart": 16, "not_covered": 160, "out_of_scope": 21}, "definition": {"perfect_equals": 0, "subpart": 0, "not_covered": 0, "out_of_scope": 0}, "location": {"perfect_equals": 1, "subpart": 1, "not_covered": 32, "out_of_scope": 1}, "modality": {"perfect_equals": 1, "subpart": 1, "not_covered": 79, "out_of_scope": 3}, "reference": {"perfect_equals": 1, "subpart": 1, "not_covered": 75, "out_of_scope": 1}, "time": {"perfect_equals": 1, "subpart": 0, "not_covered": 62, "out_of_scope": 0}}}
\ No newline at end of file
diff --git a/results/LLM/Mistral-7b/MISTRAL_fine_tuned_results.json b/results/LLM/Mistral-7b/MISTRAL_fine_tuned_results.json
index 703cced..fcd111c 100644
--- a/results/LLM/Mistral-7b/MISTRAL_fine_tuned_results.json
+++ b/results/LLM/Mistral-7b/MISTRAL_fine_tuned_results.json
@@ -1 +1 @@
-{"global": {"recall": 0.06281833616298811, "precision": 0.10834553440702782, "f1": 0.07952713594841483, "f2": 0.0685820203892493}, "break_down": {"action": {"perfect_equals": 0, "subpart": 138, "miss_classification": 0, "hallucination": 8}, "actor": {"perfect_equals": 24, "subpart": 56, "miss_classification": 4, "hallucination": 16}, "artifact": {"perfect_equals": 14, "subpart": 83, "miss_classification": 0, "hallucination": 8}, "condition": {"perfect_equals": 9, "subpart": 199, "miss_classification": 2, "hallucination": 41}, "definition": {"perfect_equals": 0, "subpart": 0, "miss_classification": 1, "hallucination": 3}, "location": {"perfect_equals": 0, "subpart": 8, "miss_classification": 0, "hallucination": 5}, "modality": {"perfect_equals": 13, "subpart": 38, "miss_classification": 2, "hallucination": 24}, "reference": {"perfect_equals": 2, "subpart": 44, "miss_classification": 4, "hallucination": 33}, "time": {"perfect_equals": 12, "subpart": 43, "miss_classification": 6, "hallucination": 41}}}
\ No newline at end of file
+{"global": {"recall": 0.28319502074688796, "precision": 0.39970717423133234, "f1": 0.33151183970856096, "f2": 0.3007270323859881}, "break_down": {"action": {"perfect_equals": 0, "subpart": 53, "not_covered": 103, "out_of_scope": 85}, "actor": {"perfect_equals": 27, "subpart": 13, "not_covered": 93, "out_of_scope": 40}, "artifact": {"perfect_equals": 8, "subpart": 26, "not_covered": 207, "out_of_scope": 63}, "condition": {"perfect_equals": 12, "subpart": 49, "not_covered": 118, "out_of_scope": 147}, "definition": {"perfect_equals": 0, "subpart": 0, "not_covered": 0, "out_of_scope": 0}, "location": {"perfect_equals": 0, "subpart": 2, "not_covered": 32, "out_of_scope": 6}, "modality": {"perfect_equals": 13, "subpart": 18, "not_covered": 50, "out_of_scope": 20}, "reference": {"perfect_equals": 2, "subpart": 21, "not_covered": 54, "out_of_scope": 23}, "time": {"perfect_equals": 14, "subpart": 15, "not_covered": 34, "out_of_scope": 26}}}
\ No newline at end of file
diff --git a/results/LLM/Mistral-7b/MISTRAL_zero_shot_results.json b/results/LLM/Mistral-7b/MISTRAL_zero_shot_results.json
index 0546bc6..bb25660 100644
--- a/results/LLM/Mistral-7b/MISTRAL_zero_shot_results.json
+++ b/results/LLM/Mistral-7b/MISTRAL_zero_shot_results.json
@@ -1 +1 @@
-{"global": {"recall": 0.054982817869415807, "precision": 0.07364787111622555, "f1": 0.06296114117068373, "f2": 0.057918552036199104}, "break_down": {"action": {"perfect_equals": 0, "subpart": 161, "miss_classification": 0, "hallucination": 2}, "actor": {"perfect_equals": 13, "subpart": 77, "miss_classification": 3, "hallucination": 28}, "artifact": {"perfect_equals": 18, "subpart": 150, "miss_classification": 0, "hallucination": 22}, "condition": {"perfect_equals": 5, "subpart": 260, "miss_classification": 2, "hallucination": 45}, "definition": {"perfect_equals": 0, "subpart": 0, "miss_classification": 5, "hallucination": 21}, "location": {"perfect_equals": 2, "subpart": 8, "miss_classification": 0, "hallucination": 5}, "modality": {"perfect_equals": 12, "subpart": 46, "miss_classification": 2, "hallucination": 28}, "reference": {"perfect_equals": 3, "subpart": 53, "miss_classification": 7, "hallucination": 24}, "time": {"perfect_equals": 11, "subpart": 51, "miss_classification": 2, "hallucination": 31}}}
\ No newline at end of file
+{"global": {"recall": 0.2780692549842602, "precision": 0.3045977011494253, "f1": 0.2907295666483818, "f2": 0.2829987184963691}, "break_down": {"action": {"perfect_equals": 0, "subpart": 43, "not_covered": 112, "out_of_scope": 118}, "actor": {"perfect_equals": 16, "subpart": 23, "not_covered": 93, "out_of_scope": 51}, "artifact": {"perfect_equals": 13, "subpart": 42, "not_covered": 182, "out_of_scope": 113}, "condition": {"perfect_equals": 3, "subpart": 45, "not_covered": 128, "out_of_scope": 217}, "definition": {"perfect_equals": 0, "subpart": 0, "not_covered": 0, "out_of_scope": 0}, "location": {"perfect_equals": 2, "subpart": 2, "not_covered": 29, "out_of_scope": 6}, "modality": {"perfect_equals": 12, "subpart": 14, "not_covered": 54, "out_of_scope": 32}, "reference": {"perfect_equals": 3, "subpart": 22, "not_covered": 52, "out_of_scope": 31}, "time": {"perfect_equals": 10, "subpart": 15, "not_covered": 38, "out_of_scope": 37}}}
\ No newline at end of file
diff --git a/results/LLM/Mixtral-8x7b/MIXTRAL_few_shot_results.json b/results/LLM/Mixtral-8x7b/MIXTRAL_few_shot_results.json
index 6276988..a65d01d 100644
--- a/results/LLM/Mixtral-8x7b/MIXTRAL_few_shot_results.json
+++ b/results/LLM/Mixtral-8x7b/MIXTRAL_few_shot_results.json
@@ -1 +1 @@
-{"global": {"recall": 0.01867572156196944, "precision": 0.10679611650485436, "f1": 0.031791907514450865, "f2": 0.022366815778771858}, "break_down": {"action": {"perfect_equals": 0, "subpart": 69, "miss_classification": 0, "hallucination": 7}, "actor": {"perfect_equals": 9, "subpart": 34, "miss_classification": 3, "hallucination": 26}, "artifact": {"perfect_equals": 0, "subpart": 14, "miss_classification": 0, "hallucination": 1}, "condition": {"perfect_equals": 2, "subpart": 49, "miss_classification": 2, "hallucination": 11}, "definition": {"perfect_equals": 0, "subpart": 0, "miss_classification": 0, "hallucination": 1}, "location": {"perfect_equals": 1, "subpart": 1, "miss_classification": 0, "hallucination": 2}, "modality": {"perfect_equals": 5, "subpart": 5, "miss_classification": 1, "hallucination": 1}, "reference": {"perfect_equals": 1, "subpart": 3, "miss_classification": 0, "hallucination": 0}, "time": {"perfect_equals": 4, "subpart": 9, "miss_classification": 3, "hallucination": 4}}}
\ No newline at end of file
+{"global": {"recall": 0.11307053941908714, "precision": 0.529126213592233, "f1": 0.1863247863247863, "f2": 0.13417035942885278}, "break_down": {"action": {"perfect_equals": 0, "subpart": 34, "not_covered": 122, "out_of_scope": 35}, "actor": {"perfect_equals": 11, "subpart": 9, "not_covered": 113, "out_of_scope": 23}, "artifact": {"perfect_equals": 0, "subpart": 5, "not_covered": 236, "out_of_scope": 9}, "condition": {"perfect_equals": 6, "subpart": 22, "not_covered": 151, "out_of_scope": 23}, "definition": {"perfect_equals": 0, "subpart": 0, "not_covered": 0, "out_of_scope": 0}, "location": {"perfect_equals": 1, "subpart": 1, "not_covered": 32, "out_of_scope": 0}, "modality": {"perfect_equals": 5, "subpart": 4, "not_covered": 72, "out_of_scope": 1}, "reference": {"perfect_equals": 1, "subpart": 2, "not_covered": 74, "out_of_scope": 1}, "time": {"perfect_equals": 3, "subpart": 5, "not_covered": 55, "out_of_scope": 5}}}
\ No newline at end of file
diff --git a/results/LLM/Mixtral-8x7b/MIXTRAL_fine_tuned_results.json b/results/LLM/Mixtral-8x7b/MIXTRAL_fine_tuned_results.json
index 2c7fe6b..350bae2 100644
--- a/results/LLM/Mixtral-8x7b/MIXTRAL_fine_tuned_results.json
+++ b/results/LLM/Mixtral-8x7b/MIXTRAL_fine_tuned_results.json
@@ -1 +1 @@
-{"global": {"recall": 0.08149405772495756, "precision": 0.1465648854961832, "f1": 0.10474631751227495, "f2": 0.08943543879262159}, "break_down": {"action": {"perfect_equals": 0, "subpart": 126, "not_covered": -9742, "out_of_scope": 67}, "actor": {"perfect_equals": 26, "subpart": 54, "not_covered": -3621, "out_of_scope": 38}, "artifact": {"perfect_equals": 15, "subpart": 46, "not_covered": -3448, "out_of_scope": 23}, "condition": {"perfect_equals": 13, "subpart": 75, "not_covered": -4972, "out_of_scope": 62}, "definition": {"perfect_equals": 0, "subpart": 0, "not_covered": 0, "out_of_scope": 0}, "location": {"perfect_equals": 1, "subpart": 6, "not_covered": -61, "out_of_scope": 3}, "modality": {"perfect_equals": 28, "subpart": 14, "not_covered": -1640, "out_of_scope": 4}, "reference": {"perfect_equals": 2, "subpart": 6, "not_covered": -217, "out_of_scope": 8}, "time": {"perfect_equals": 11, "subpart": 16, "not_covered": -634, "out_of_scope": 12}}}
\ No newline at end of file
+{"global": {"recall": 0.36099585062240663, "precision": 0.5304878048780488, "f1": 0.42962962962962964, "f2": 0.3856382978723404}, "break_down": {"action": {"perfect_equals": 1, "subpart": 95, "not_covered": 60, "out_of_scope": 97}, "actor": {"perfect_equals": 31, "subpart": 27, "not_covered": 75, "out_of_scope": 60}, "artifact": {"perfect_equals": 12, "subpart": 28, "not_covered": 201, "out_of_scope": 44}, "condition": {"perfect_equals": 22, "subpart": 49, "not_covered": 108, "out_of_scope": 79}, "definition": {"perfect_equals": 0, "subpart": 0, "not_covered": 0, "out_of_scope": 0}, "location": {"perfect_equals": 1, "subpart": 5, "not_covered": 28, "out_of_scope": 4}, "modality": {"perfect_equals": 27, "subpart": 13, "not_covered": 41, "out_of_scope": 6}, "reference": {"perfect_equals": 3, "subpart": 8, "not_covered": 66, "out_of_scope": 5}, "time": {"perfect_equals": 10, "subpart": 16, "not_covered": 37, "out_of_scope": 13}}}
\ No newline at end of file
diff --git a/results/LLM/Mixtral-8x7b/MIXTRAL_zero_shot_results.json b/results/LLM/Mixtral-8x7b/MIXTRAL_zero_shot_results.json
index 614b98a..7532be5 100644
--- a/results/LLM/Mixtral-8x7b/MIXTRAL_zero_shot_results.json
+++ b/results/LLM/Mixtral-8x7b/MIXTRAL_zero_shot_results.json
@@ -1 +1 @@
-{"global": {"recall": 0.08488964346349745, "precision": 0.10834236186348863, "f1": 0.09519276534983341, "f2": 0.08873114463176575}, "break_down": {"action": {"perfect_equals": 0, "subpart": 157, "not_covered": -12029, "out_of_scope": 108}, "actor": {"perfect_equals": 27, "subpart": 80, "not_covered": -4833, "out_of_scope": 45}, "artifact": {"perfect_equals": 23, "subpart": 137, "not_covered": -9301, "out_of_scope": 68}, "condition": {"perfect_equals": 13, "subpart": 69, "not_covered": -4834, "out_of_scope": 55}, "definition": {"perfect_equals": 0, "subpart": 0, "not_covered": 0, "out_of_scope": 0}, "location": {"perfect_equals": 2, "subpart": 8, "not_covered": -123, "out_of_scope": 3}, "modality": {"perfect_equals": 29, "subpart": 19, "not_covered": -1825, "out_of_scope": 9}, "reference": {"perfect_equals": 1, "subpart": 15, "not_covered": -439, "out_of_scope": 14}, "time": {"perfect_equals": 5, "subpart": 23, "not_covered": -728, "out_of_scope": 17}}}
\ No newline at end of file
+{"global": {"recall": 0.4346473029045643, "precision": 0.4519956850053937, "f1": 0.44315177154944474, "f2": 0.43800961739494043}, "break_down": {"action": {"perfect_equals": 1, "subpart": 107, "not_covered": 48, "out_of_scope": 157}, "actor": {"perfect_equals": 25, "subpart": 40, "not_covered": 68, "out_of_scope": 87}, "artifact": {"perfect_equals": 18, "subpart": 77, "not_covered": 146, "out_of_scope": 133}, "condition": {"perfect_equals": 12, "subpart": 50, "not_covered": 117, "out_of_scope": 75}, "definition": {"perfect_equals": 0, "subpart": 0, "not_covered": 0, "out_of_scope": 0}, "location": {"perfect_equals": 2, "subpart": 7, "not_covered": 25, "out_of_scope": 4}, "modality": {"perfect_equals": 28, "subpart": 16, "not_covered": 37, "out_of_scope": 13}, "reference": {"perfect_equals": 1, "subpart": 14, "not_covered": 62, "out_of_scope": 15}, "time": {"perfect_equals": 5, "subpart": 16, "not_covered": 42, "out_of_scope": 24}}}
\ No newline at end of file
-- 
GitLab