diff --git a/README.md b/README.md index 0575190aa91d6135f50f82e01a23345665d6b8f7..ec7a9a9eb1fd83a24fe4138544f97db61e0e8c93 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,7 @@ pip install -r requirements.txt ``` ## Configuration file: to chose or to complete -- `code/config_1.json` Config for usecase_1 : take a sentence splited text, apply ToNy, output same text but with EDU brackets. +- `code/config_global_X.json` See global_config_file_guideline.md. ## Run usecase 1 diff --git a/code/config_1.json b/code/config_1.json deleted file mode 100644 index d8a166e03700d76c5ac47d6c43d385d4616968d7..0000000000000000000000000000000000000000 --- a/code/config_1.json +++ /dev/null @@ -1,34 +0,0 @@ -{ - "usecase_description": "Config file for usecase_1 : from a text, get the same text but with EDU bracket using ToNy segmenter.", - "input": { - "name": "chaperontest", - "file": ".ss", - "folder_path": "../data/chaperontest", - "format": "raw_sentences", - "language": "fr", - "gold": false - }, - "output": { - "format": "bracket", - "framework": "sdrt" - }, - "steps":{ - "main": "annotation", - "pre-processing": { - "tokenization": false, - "sentence_split": false, - "sentence_split_splitor": false, - "syntactic_parsing": false, - "NER_format_initialisation": false - }, - "discourse_segmenter": { - "model": "tony" - }, - "post-processing": { - "json_to_tab": true, - "tab_to_bracket":false - }, - "evaluation": false - } -} - diff --git a/code/config_1_fanny.json b/code/config_1_fanny.json deleted file mode 100644 index eae1fa0c89a1987df331f74a0129da3140b02c57..0000000000000000000000000000000000000000 --- a/code/config_1_fanny.json +++ /dev/null @@ -1,43 +0,0 @@ -{ - "usecase_description": "Config file for usecase_1 : from a text, get the same text but with EDU using an existing model.", - "input": { - "name": "chaperontest", - "file": ".ss", - "folder_path": "../data/chaperontest", - "format": "truc", - "language": "fr", - "gold": true - }, - "output": { - "format": "ner_tok", - "framework": "rst" - }, - "steps":{ - "main": "annotation", - "pre-processing": { - "tokenization": true, - "sentence_split": false, - "sentence_split_splitor": "stanza", - "syntactic_parsing": false, - "NER_format_initialisation": true - }, - "discourse_segmenter": { - "model": "tony", - "training": { - "toolkit": null, - "pre_trained_lm": null, - "config_file": null, - "train_data_path": null, - "validation_data_path": null - } - }, - "post-processing": { - "json_to_tab": true, - "tab_to_bracket": true - }, - "evaluation": false, - "gold_test_data_path": null - } -} - - diff --git a/code/config_2.json b/code/config_2.json deleted file mode 100644 index a8334462ffa8e651457dd6ece221c3b13c135a22..0000000000000000000000000000000000000000 --- a/code/config_2.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "usecase_description": "Config file for usecase_2 : Take a EDU gold segmented text au format tok as input, use a loaded model to make predictions. Output scores of model predictions against gold, and output discrepancies. To start, we evaluate tony on annodis dev set.", - "input": { - "name": "fra.sdrt.annodis_dev", - "file": ".ttok", - "folder_path": "../data/fra.sdrt.annodis_dev", - "format": "truc", - "language": "fr", - "gold": true - }, - "output": { - "format": "ner_tok", - "framework": "sdrt" - }, - "steps":{ - "main": "test", - "pre-processing": { - "tokenization": false, - "sentence_split": true, - "sentence_split_splitor": "stanza", - "syntactic_parsing": false, - "NER_format_initialisation": true - }, - "discourse_segmenter": { - "model": "tony" - }, - "post-processing": { - "json_to_tab": true, - "tab_to_bracket":false - }, - "evaluation": true - } -} - - diff --git a/code/config_3.json b/code/config_3.json deleted file mode 100644 index 67b6262a132371d381d63d65d34dc95d5efe9f36..0000000000000000000000000000000000000000 --- a/code/config_3.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "usecase_description": "Config file for usecase_2.2 : Take a EDU gold segmented text au format conll as input, use a loaded model to make predictions. Output scores of model predictions against gold, and output discrepancies. To start, we evaluate tony on annodis dev set.", - "input": { - "name": "fra.sdrt.annodis_dev", - "file": ".conllu", - "file_options": [".conllu", ".tok"], - "folder_path": "../data/fra.sdrt.annodis_dev", - "format": "truc", - "language": "fr", - "gold": true - }, - "output": { - "format": "ner_tok", - "framework": "sdrt" - }, - "steps":{ - "main": "test", - "pre-processing": { - "tokenization": false, - "sentence_split": false, - "sentence_split_splitor": "stanza", - "syntactic_parsing": false, - "NER_format_initialisation": true - }, - "discourse_segmenter": { - "model": "tony" - }, - "post-processing": { - "json_to_tab": true, - "tab_to_bracket":false - }, - "evaluation": true - } -} - - diff --git a/code/config_4.json b/code/config_4.json deleted file mode 100644 index dc1d29fdac4bf41d52b1d4f2fe18670e25a230cc..0000000000000000000000000000000000000000 --- a/code/config_4.json +++ /dev/null @@ -1,44 +0,0 @@ -{ - "usecase_description": "Config file for usecase_3 : Take a EDU gold segmented set of train/dev/test of texts au format conll as input, train a model, output scores.", - "input": { - "name": "eng.rst.rstdt", - "file": ".conllu", - "file_options": [".conllu", ".tok"], - "folder_path": "../data/eng.rst.rstdt", - "format": "truc", - "language": "en", - "gold": true - }, - "output": { - "format": "ner_tok", - "framework": "rst" - }, - "steps":{ - "main": "train", - "pre-processing": { - "tokenization": false, - "sentence_split": false, - "sentence_split_splitor": "stanza", - "syntactic_parsing": false, - "NER_format_initialisation": true - }, - "discourse_segmenter": { - "model": null, - "training": { - "toolkit": "allennlp", - "pre_trained_lm": "bert", - "config_file": "../model/config_training.jsonnet", - "train_data_path": "../data/eng.rst.rstdt/eng.rst.rstdt_train.conllu", - "validation_data_path": "../data/eng.rst.rstdt/eng.rst.rstdt_dev.conllu" - } - }, - "post-processing": { - "json_to_tab": false, - "tab_to_bracket":false - }, - "evaluation": true, - "gold_test_data_path": "../data/eng.rst.rstdt/eng.rst.rstdt_test.conllu" - } -} - -