add eval as option

6cb47426 · laura.riviere · 9544fd95 · 6cb47426 · 9544fd95 · 9544fd95
Commit 6cb47426 authored 2 years ago by laura.riviere
--- a/code/config_global_1.2.json
+++ b/code/config_global_1.2.json
 {
-    "usecase_description": "Config file for usecase_1 : from a raw text, get the same text but with EDU bracket.",
+    "usecase_description": "Config file for usecase_1 : from a tokenized text, get the same text but with EDU bracket.",
    "data_raw": {
-        "name": "edgar_poe_en",
+        "name": "edgar_poe_short",
-        "exte": ".txt",
+        "exte": ".conll",
        "language": "en",
        "existing_metadata": true
    },
    "steps":{
        "main": "annotation",
        "pre-processing": {
-            "to_do": true,
+            "to_do": false,
            "syntactic_tool": "stanza",
            "sentence_split": true,
            "tokenization": true,
-            "syntactic_parsing": false,
+            "syntactic_parsing": true,
            "create_metadata": {
                "to_do": true,
                "line": "paragraph",
@@ -30,16 +30,17 @@
                "validation_data_path": null
            }
        },
+        "evaluation": true,
        "gold_test_data_path": null
    },
    "output":{
        "conll_file":{
            "to_do": true,
            "metadata": true,
-            "with_gold_labels": false
+            "with_gold_labels": true
        },
        "txt_file":{
-            "to_do": false,
+            "to_do": true,
            "metadata": true
        }
    }

--- a/code/config_global_1.21.json
+++ b/code/config_global_1.21.json
-{
-    "usecase_description": "Config file for usecase_1 : from a tokenized text, get the same text but with EDU bracket.",
-    "data_raw": {
-        "name": "edgar_poe_short",
-        "exte": ".conll",
-        "language": "en",
-        "existing_metadata": true
-    },
-    "steps":{
-        "main": "annotation",
-        "pre-processing": {
-            "to_do": false,
-            "syntactic_tool": "stanza",
-            "sentence_split": true,
-            "tokenization": true,
-            "syntactic_parsing": true,
-            "create_metadata": {
-                "to_do": true,
-                "line": "paragraph",
-                "sent": "sent"
-            }
-        },
-        "discourse_segmenter": {
-            "model": "/home/lriviere/andiamo/discut/Results_conllu/results_eng.rst.gum-eng_bert/model.tar.gz",
-            "training": {
-                "toolkit": null,
-                "pre_trained_lm": null,
-                "config_file": null,
-                "train_data_path": null,
-                "validation_data_path": null
-            }
-        },
-        "gold_test_data_path": null
-    },
-    "output":{
-        "conll_file":{
-            "to_do": true,
-            "metadata": true,
-            "with_gold_labels": true
-        },
-        "txt_file":{
-            "to_do": true,
-            "metadata": true
-        }
-    }
-}
--- a/code/config_global_2.2.json
+++ b/code/config_global_2.2.json
-{
-    "usecase_description": "Config file for usecase_2 : from a gold text, make predictions with an existing model, compare gold and predictions with metrics.",
-    "data_raw": {
-        "name": "eng.pdtb.pdtb_dev",
-        "exte": ".conllu",
-        "language": "en",
-        "existing_metadata": true
-    },
-    "steps":{
-        "main": "test",
-        "pre-processing": {
-            "to_do": false,
-            "syntactic_tool": "stanza",
-            "sentence_split": true,
-            "tokenization": true,
-            "syntactic_parsing": true,
-            "create_metadata": {
-                "to_do": true,
-                "line": "paragraph",
-                "sent": "sent"
-            }
-        },
-        "discourse_segmenter": {
-            "model": "/home/lriviere/andiamo/morteza/discut/Results_conllu/results_eng.pdtb.pdtb_bert/model.tar.gz",
-            "training": {
-                "toolkit": null,
-                "pre_trained_lm": null,
-                "config_file": null,
-                "train_data_path": null,
-                "validation_data_path": null
-            }
-        },
-        "gold_test_data_path": null
-    },
-    "output":{
-        "conll_file":{
-            "to_do": true,
-            "metadata": true,
-            "with_gold_labels": true
-        },
-        "txt_file":{
-            "to_do": true,
-            "metadata": true
-        }
-    }
-}
--- a/code/config_global_3.json
+++ b/code/config_global_3.json
@@ -30,6 +30,7 @@
                "validation_data_path": "eng.rst.rstdt_dev"
            }
        },
+        "evaluation": true,
        "gold_test_data_path": "eng.rst.rstdt_dev"
    },
    "output":{

--- a/code/config_global_4.json
+++ b/code/config_global_4.json
@@ -30,6 +30,7 @@
                "validation_data_path": "/home/lriviere/andiamo/discut22/data/eng.sdrt.stac/eng.sdrt.stac_dev.conllu"
            }
        },
+        "evaluation": true,
        "gold_test_data_path": "eng.rst.rstdt_dev"
    },
    "output":{

--- a/code/discut22_2.py
+++ b/code/discut22_2.py
@@ -248,6 +248,7 @@ class Process:
        self.crea_meta = infos['pre-processing']['create_metadata']['to_do']
        self.meta_line = infos['pre-processing']['create_metadata']['line']
        self.meta_sent = infos['pre-processing']['create_metadata']['sent']
+        self.eval = infos['evaluation']
        if self.main == "train" or "fine_tune":
            self.set_train = infos['discourse_segmenter']['training']['train_data_path']
@@ -261,11 +262,6 @@ class Process:
        self.model = infos['discourse_segmenter']['model'] # ezpz for Tony 
        self.test_data = infos['gold_test_data_path']
-    def get_evaluation_status(self):
-        if self.main == "test" or self.main == "train" or self.main == "fine_tune":
-            self.eval = True
-        else:
-            self.eval = False # "annotation"
    def get_model(self):
        self.model_path = ""
@@ -397,7 +393,6 @@ if __name__ == '__main__':
        data.make_ner_format()
        steps.get_model()
        data.make_predictions(steps) # output allennlp JSON
-        steps.get_evaluation_status()
        if steps.eval == True:
            data.evaluation(steps, prod)
        else:
@@ -410,7 +405,6 @@ if __name__ == '__main__':
        steps.update_training_config()
        steps.training(data)
        data.make_predictions(steps, js_name=steps.set_test, fi_ner=steps.test_ner)
-        steps.get_evaluation_status()
        if steps.eval == True:
            data.evaluation(steps, prod, name=steps.test_data)
@@ -422,7 +416,6 @@ if __name__ == '__main__':
        steps.update_training_config()
        steps.fine_tuning(data)
        data.make_predictions(steps, js_name=steps.set_test, fi_ner=steps.test_ner, model=steps.model_ft_path)
-        steps.get_evaluation_status()
        if steps.eval == True:
            data.evaluation(steps, prod, name=steps.test_data, model=steps.model_ft_path)