update readme

ecbd8b76 · laura.riviere · e12e3a19 · e12e3a19 · e12e3a19 · e12e3a19
Commit ecbd8b76 authored 2 years ago by laura.riviere
--- a/code/config_global_1.json
+++ b/code/config_global_1.json
-{
-    "usecase_description": "Config file for usecase_1 : from a text, get the same text but with EDU bracket.",
-    "input": {
-        "name": "eng_annotation",
-        "file": ".conllu",
-        "language": "en"
-    },
-    "steps":{
-        "main": "annotation",
-        "pre-processing": {
-            "tokenization": false,
-            "tokenization_tool" : null,
-            "sentence_split": false,
-            "sentence_split_splitor": null,
-            "syntactic_parsing": false, 
-            "NER_format_initialisation": true
-        },
-        "discourse_segmenter": {
-            "model": "/home/lriviere/andiamo/discut22/data/eng.sdrt.stac/results_2022-11-21_15_42_42.923648/model.tar.gz",
-            "training": {
-                "toolkit": null,
-                "pre_trained_lm": null,
-                "config_file": null,
-                "train_data_path": null,
-                "validation_data_path": null
-            }
-        },
-        "post-processing": {
-            "json_to_tab": true,
-            "tab_to_bracket":true
-        },
-        "evaluation": false,
-        "gold_test_data_path": null
-    }
-}
--- a/code/config_global_2.json
+++ b/code/config_global_2.json
-{
-    "usecase_description": "Config file for usecase_2",
-    "input": {
-        "name": "fra.sdrt.annodis_dev",
-        "file": ".conllu",
-        "language": "fr"
-    },
-    "steps":{
-        "main": "test",
-        "pre-processing": {
-            "tokenization": false,
-            "tokenization_tool" : "spacy",
-            "sentence_split": false,
-            "sentence_split_splitor": "stanza",
-            "syntactic_parsing": false, 
-            "NER_format_initialisation": true
-        },
-        "discourse_segmenter": {
-            "model": "tony",
-            "training": {
-                "toolkit": null,
-                "pre_trained_lm": null,
-                "config_file": null,
-                "train_data_path": null,
-                "validation_data_path": null
-            }
-        },
-        "post-processing": {
-            "json_to_tab": true,
-            "metadata_conll": true,
-            "tab_to_bracket":true
-        },
-        "evaluation": true,
-        "gold_test_data_path": null
-    }
-}
--- a/code/config_global_3.json
+++ b/code/config_global_3.json
-{
-    "usecase_description": "Config file for usecase_3 : Take a EDU gold segmented set of train/dev/test of texts au format conll as input, train a model, output scores.",
-    "input": {
-        "name": "eng.sdrt.stac",
-        "file": ".conllu",
-        "language": "en"
-    },
-    "steps":{
-        "main": "train",
-        "pre-processing": {
-            "tokenization": false,
-            "tokenization_tool" : null,
-            "sentence_split": false,
-            "sentence_split_splitor": null,
-            "syntactic_parsing": false, 
-            "NER_format_initialisation": true
-        },
-        "discourse_segmenter": {
-            "model": null,
-            "training": {
-                "toolkit": "allennlp",
-                "pre_trained_lm": "bert",
-                "config_file": "../model/config_training_bert.jsonnet",
-                "train_data_path": "../data/eng.sdrt.stac/eng.sdrt.stac_train.conllu",
-                "validation_data_path": "../data/eng.sdrt.stac/eng.sdrt.stac_dev.conllu"
-            }
-        },
-        "post-processing": {
-            "json_to_tab": false,
-            "metadata_conll": false,
-            "tab_to_bracket":false
-        },
-        "evaluation": true,
-        "gold_test_data_path": "../data/eng.sdrt.stac/eng.sdrt.stac_test.conllu"
-    }
-}