diff --git a/code/config_global_1.2.json b/code/config_global_1.2.json index a6c9f8ccf5aba587ac066181ccaee169ccc38717..7a6b920522984459dccd26c878b43bf7821c9797 100644 --- a/code/config_global_1.2.json +++ b/code/config_global_1.2.json @@ -1,19 +1,19 @@ { - "usecase_description": "Config file for usecase_1 : from a raw text, get the same text but with EDU bracket.", + "usecase_description": "Config file for usecase_1 : from a tokenized text, get the same text but with EDU bracket.", "data_raw": { - "name": "edgar_poe_en", - "exte": ".txt", + "name": "edgar_poe_short", + "exte": ".conll", "language": "en", "existing_metadata": true }, "steps":{ "main": "annotation", "pre-processing": { - "to_do": true, + "to_do": false, "syntactic_tool": "stanza", "sentence_split": true, "tokenization": true, - "syntactic_parsing": false, + "syntactic_parsing": true, "create_metadata": { "to_do": true, "line": "paragraph", @@ -30,16 +30,17 @@ "validation_data_path": null } }, + "evaluation": true, "gold_test_data_path": null }, "output":{ "conll_file":{ "to_do": true, "metadata": true, - "with_gold_labels": false + "with_gold_labels": true }, "txt_file":{ - "to_do": false, + "to_do": true, "metadata": true } } diff --git a/code/config_global_1.21.json b/code/config_global_1.21.json deleted file mode 100644 index 21351a824b3bbaf6558f3f1b307911bd3af3c570..0000000000000000000000000000000000000000 --- a/code/config_global_1.21.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "usecase_description": "Config file for usecase_1 : from a tokenized text, get the same text but with EDU bracket.", - "data_raw": { - "name": "edgar_poe_short", - "exte": ".conll", - "language": "en", - "existing_metadata": true - }, - "steps":{ - "main": "annotation", - "pre-processing": { - "to_do": false, - "syntactic_tool": "stanza", - "sentence_split": true, - "tokenization": true, - "syntactic_parsing": true, - "create_metadata": { - "to_do": true, - "line": "paragraph", - "sent": "sent" - } - }, - "discourse_segmenter": { - "model": "/home/lriviere/andiamo/discut/Results_conllu/results_eng.rst.gum-eng_bert/model.tar.gz", - "training": { - "toolkit": null, - "pre_trained_lm": null, - "config_file": null, - "train_data_path": null, - "validation_data_path": null - } - }, - "gold_test_data_path": null - }, - "output":{ - "conll_file":{ - "to_do": true, - "metadata": true, - "with_gold_labels": true - }, - "txt_file":{ - "to_do": true, - "metadata": true - } - } -} - - - diff --git a/code/config_global_2.2.json b/code/config_global_2.2.json deleted file mode 100644 index 395e75891f2c6a204c2e194eab4c9ffb08ec3572..0000000000000000000000000000000000000000 --- a/code/config_global_2.2.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "usecase_description": "Config file for usecase_2 : from a gold text, make predictions with an existing model, compare gold and predictions with metrics.", - "data_raw": { - "name": "eng.pdtb.pdtb_dev", - "exte": ".conllu", - "language": "en", - "existing_metadata": true - }, - "steps":{ - "main": "test", - "pre-processing": { - "to_do": false, - "syntactic_tool": "stanza", - "sentence_split": true, - "tokenization": true, - "syntactic_parsing": true, - "create_metadata": { - "to_do": true, - "line": "paragraph", - "sent": "sent" - } - }, - "discourse_segmenter": { - "model": "/home/lriviere/andiamo/morteza/discut/Results_conllu/results_eng.pdtb.pdtb_bert/model.tar.gz", - "training": { - "toolkit": null, - "pre_trained_lm": null, - "config_file": null, - "train_data_path": null, - "validation_data_path": null - } - }, - "gold_test_data_path": null - }, - "output":{ - "conll_file":{ - "to_do": true, - "metadata": true, - "with_gold_labels": true - }, - "txt_file":{ - "to_do": true, - "metadata": true - } - } -} - - - diff --git a/code/config_global_3.json b/code/config_global_3.json index ad453a45e30169b6ce0d95ca9e63fcd0986c8a4c..c24b4ff27dd98227b58b2e886e40017582902503 100644 --- a/code/config_global_3.json +++ b/code/config_global_3.json @@ -30,6 +30,7 @@ "validation_data_path": "eng.rst.rstdt_dev" } }, + "evaluation": true, "gold_test_data_path": "eng.rst.rstdt_dev" }, "output":{ diff --git a/code/config_global_4.json b/code/config_global_4.json index 9e1a95cd1e65a47fc99e3de9fa454d2a70393386..52e75d35240168369a151ec37f75b5796626d0b7 100644 --- a/code/config_global_4.json +++ b/code/config_global_4.json @@ -30,6 +30,7 @@ "validation_data_path": "/home/lriviere/andiamo/discut22/data/eng.sdrt.stac/eng.sdrt.stac_dev.conllu" } }, + "evaluation": true, "gold_test_data_path": "eng.rst.rstdt_dev" }, "output":{ diff --git a/code/discut22_2.py b/code/discut22_2.py index abdc9e58cbb7b5d5d08265553f0bf14eac3539cd..4c5caaacfef951694f827e867fa9c1e3cd866301 100644 --- a/code/discut22_2.py +++ b/code/discut22_2.py @@ -248,6 +248,7 @@ class Process: self.crea_meta = infos['pre-processing']['create_metadata']['to_do'] self.meta_line = infos['pre-processing']['create_metadata']['line'] self.meta_sent = infos['pre-processing']['create_metadata']['sent'] + self.eval = infos['evaluation'] if self.main == "train" or "fine_tune": self.set_train = infos['discourse_segmenter']['training']['train_data_path'] @@ -261,11 +262,6 @@ class Process: self.model = infos['discourse_segmenter']['model'] # ezpz for Tony self.test_data = infos['gold_test_data_path'] - def get_evaluation_status(self): - if self.main == "test" or self.main == "train" or self.main == "fine_tune": - self.eval = True - else: - self.eval = False # "annotation" def get_model(self): self.model_path = "" @@ -397,7 +393,6 @@ if __name__ == '__main__': data.make_ner_format() steps.get_model() data.make_predictions(steps) # output allennlp JSON - steps.get_evaluation_status() if steps.eval == True: data.evaluation(steps, prod) else: @@ -410,7 +405,6 @@ if __name__ == '__main__': steps.update_training_config() steps.training(data) data.make_predictions(steps, js_name=steps.set_test, fi_ner=steps.test_ner) - steps.get_evaluation_status() if steps.eval == True: data.evaluation(steps, prod, name=steps.test_data) @@ -422,7 +416,6 @@ if __name__ == '__main__': steps.update_training_config() steps.fine_tuning(data) data.make_predictions(steps, js_name=steps.set_test, fi_ner=steps.test_ner, model=steps.model_ft_path) - steps.get_evaluation_status() if steps.eval == True: data.evaluation(steps, prod, name=steps.test_data, model=steps.model_ft_path)