Skip to content
Snippets Groups Projects
Commit 2ecf6186 authored by laura.riviere's avatar laura.riviere
Browse files

update Read.me and config files

parent 4c24c831
Branches
No related tags found
1 merge request!2Dev expes
...@@ -41,7 +41,7 @@ pip install -r requirements.txt ...@@ -41,7 +41,7 @@ pip install -r requirements.txt
``` ```
## Configuration file: to chose or to complete ## Configuration file: to chose or to complete
- `code/config_1.json` Config for usecase_1 : take a sentence splited text, apply ToNy, output same text but with EDU brackets. - `code/config_global_X.json` See global_config_file_guideline.md.
## Run usecase 1 ## Run usecase 1
......
{
"usecase_description": "Config file for usecase_1 : from a text, get the same text but with EDU bracket using ToNy segmenter.",
"input": {
"name": "chaperontest",
"file": ".ss",
"folder_path": "../data/chaperontest",
"format": "raw_sentences",
"language": "fr",
"gold": false
},
"output": {
"format": "bracket",
"framework": "sdrt"
},
"steps":{
"main": "annotation",
"pre-processing": {
"tokenization": false,
"sentence_split": false,
"sentence_split_splitor": false,
"syntactic_parsing": false,
"NER_format_initialisation": false
},
"discourse_segmenter": {
"model": "tony"
},
"post-processing": {
"json_to_tab": true,
"tab_to_bracket":false
},
"evaluation": false
}
}
{
"usecase_description": "Config file for usecase_1 : from a text, get the same text but with EDU using an existing model.",
"input": {
"name": "chaperontest",
"file": ".ss",
"folder_path": "../data/chaperontest",
"format": "truc",
"language": "fr",
"gold": true
},
"output": {
"format": "ner_tok",
"framework": "rst"
},
"steps":{
"main": "annotation",
"pre-processing": {
"tokenization": true,
"sentence_split": false,
"sentence_split_splitor": "stanza",
"syntactic_parsing": false,
"NER_format_initialisation": true
},
"discourse_segmenter": {
"model": "tony",
"training": {
"toolkit": null,
"pre_trained_lm": null,
"config_file": null,
"train_data_path": null,
"validation_data_path": null
}
},
"post-processing": {
"json_to_tab": true,
"tab_to_bracket": true
},
"evaluation": false,
"gold_test_data_path": null
}
}
{
"usecase_description": "Config file for usecase_2 : Take a EDU gold segmented text au format tok as input, use a loaded model to make predictions. Output scores of model predictions against gold, and output discrepancies. To start, we evaluate tony on annodis dev set.",
"input": {
"name": "fra.sdrt.annodis_dev",
"file": ".ttok",
"folder_path": "../data/fra.sdrt.annodis_dev",
"format": "truc",
"language": "fr",
"gold": true
},
"output": {
"format": "ner_tok",
"framework": "sdrt"
},
"steps":{
"main": "test",
"pre-processing": {
"tokenization": false,
"sentence_split": true,
"sentence_split_splitor": "stanza",
"syntactic_parsing": false,
"NER_format_initialisation": true
},
"discourse_segmenter": {
"model": "tony"
},
"post-processing": {
"json_to_tab": true,
"tab_to_bracket":false
},
"evaluation": true
}
}
{
"usecase_description": "Config file for usecase_2.2 : Take a EDU gold segmented text au format conll as input, use a loaded model to make predictions. Output scores of model predictions against gold, and output discrepancies. To start, we evaluate tony on annodis dev set.",
"input": {
"name": "fra.sdrt.annodis_dev",
"file": ".conllu",
"file_options": [".conllu", ".tok"],
"folder_path": "../data/fra.sdrt.annodis_dev",
"format": "truc",
"language": "fr",
"gold": true
},
"output": {
"format": "ner_tok",
"framework": "sdrt"
},
"steps":{
"main": "test",
"pre-processing": {
"tokenization": false,
"sentence_split": false,
"sentence_split_splitor": "stanza",
"syntactic_parsing": false,
"NER_format_initialisation": true
},
"discourse_segmenter": {
"model": "tony"
},
"post-processing": {
"json_to_tab": true,
"tab_to_bracket":false
},
"evaluation": true
}
}
{
"usecase_description": "Config file for usecase_3 : Take a EDU gold segmented set of train/dev/test of texts au format conll as input, train a model, output scores.",
"input": {
"name": "eng.rst.rstdt",
"file": ".conllu",
"file_options": [".conllu", ".tok"],
"folder_path": "../data/eng.rst.rstdt",
"format": "truc",
"language": "en",
"gold": true
},
"output": {
"format": "ner_tok",
"framework": "rst"
},
"steps":{
"main": "train",
"pre-processing": {
"tokenization": false,
"sentence_split": false,
"sentence_split_splitor": "stanza",
"syntactic_parsing": false,
"NER_format_initialisation": true
},
"discourse_segmenter": {
"model": null,
"training": {
"toolkit": "allennlp",
"pre_trained_lm": "bert",
"config_file": "../model/config_training.jsonnet",
"train_data_path": "../data/eng.rst.rstdt/eng.rst.rstdt_train.conllu",
"validation_data_path": "../data/eng.rst.rstdt/eng.rst.rstdt_dev.conllu"
}
},
"post-processing": {
"json_to_tab": false,
"tab_to_bracket":false
},
"evaluation": true,
"gold_test_data_path": "../data/eng.rst.rstdt/eng.rst.rstdt_test.conllu"
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment