Skip to content
Snippets Groups Projects
Commit af2160d7 authored by laura.riviere's avatar laura.riviere
Browse files

clean new config

parent cfc00e33
Branches
No related tags found
1 merge request!3Refacto 1205
{
"usecase_description": "Config file for usecase_1 : from a text, get the same text but with EDU bracket.",
"usecase_description": "Config file for usecase_1 : from a raw text, get the same text but with EDU bracket.",
"data_raw": {
"name": "edgar_poe_en",
"exte": ".txt",
"language": "en",
"existing_metadata": false
"existing_metadata": true
},
"steps":{
"main": "annotation",
......@@ -35,7 +35,6 @@
},
"output":{
"file":{
"json_to_tab": true,
"tab_to_bracket": true,
"conllu":true,
"metadata": true
......
{
"usecase_description": "Config file for usecase_1 : from a tokenized text, get the same text but with EDU bracket.",
"data_raw": {
"name": "edgar_poe_short",
"exte": ".conll",
"language": "en",
"existing_metadata": true
},
"steps":{
"main": "annotation",
"pre-processing": {
"to_do": false,
"syntactic_tool": "stanza",
"sentence_split": true,
"tokenization": true,
"syntactic_parsing": true,
"create_metadata": {
"to_do": true,
"line": "paragraph",
"sent": "sent"
}
},
"discourse_segmenter": {
"model": "/home/lriviere/andiamo/discut/Results_conllu/results_eng.rst.gum-eng_bert/model.tar.gz",
"training": {
"toolkit": null,
"pre_trained_lm": null,
"config_file": null,
"train_data_path": null,
"validation_data_path": null
}
},
"evaluation": false,
"gold_test_data_path": null
},
"output":{
"file":{
"conllu":true,
"metadata": true,
"tab_to_bracket": false
},
"scores":false
}
}
......@@ -27,6 +27,7 @@
},
"post-processing": {
"json_to_tab": false,
"metadata_conll": false,
"tab_to_bracket":false
},
"evaluation": true,
......
......@@ -240,7 +240,7 @@ if __name__ == '__main__':
now = datetime.now()
#stamp = re.sub('[\s:]', '_', str(now))
stamp = "debug1205"
stamp = "_debug1214"
my_logs = {}
my_logs['stamp'] = stamp
......
......@@ -20,6 +20,7 @@ from classes_def_2 import Data, Process, Output
import utils_2.syntactic_parsing as synt_pars
import utils.conv2ner as conv_to_ner # TODO clean it
import utils.json2conll as json_to_connl # TODO clean it
import utils.training_allennlp as tr_allen
......@@ -92,7 +93,7 @@ def make_predictions(data_in, model_path):
model = model_path # add def get_model from v1
data_out = f"{data.resu}/{data.name}_pred.json"
#cmd = f"allennlp predict --use-dataset-reader --output-file {data_out} {model_path} {data_in} &> {steps.data.resu}/logs.txt"
cmd = f"allennlp predict --use-dataset-reader --output-file {data_out} {model_path} {data_in}" # &> {steps.data.resu}/logs.txt"
cmd = f"allennlp predict --use-dataset-reader --output-file {data_out} {model_path} {data_in} &> {steps.data.resu}/logs_predictions.txt"
os.system(cmd)
return data_out
......@@ -126,17 +127,24 @@ if __name__ == '__main__':
create_folders([data.conv, data.resu])
data_preprocessed = pre_processing(data, steps)
#data_preprocessed = "/home/lriviere/andiamo/discut22/data/edgar_poe_short/data_converted_vendredi/edgar_poe_short.conll"
#TEST data_preprocessed = "/home/lriviere/andiamo/discut22/data/edgar_poe_short/data_converted_vendredi/edgar_poe_short.conll"
data_ner = data_to_ner_format(data_preprocessed)
if steps.main == "annotation":
#data_pred_json = make_predictions(data_ner, steps.model)
data_pred_json = "/home/lriviere/andiamo/discut22/data/edgar_poe_short/results_vendredi/edgar_poe_short_pred.json"
if steps.main == "annotation" or steps.main == "test":
data_pred_json = make_predictions(data_ner, steps.model)
#data_pred_json = "/home/lriviere/andiamo/discut22/data/edgar_poe_short/results_vendredi/edgar_poe_short_pred.json"
if prod.metadata == True:
data_pred_and_meta_conll = pred_json_to_conll_with_metadata(data_pred_json, data_preprocessed)
else:
data_pred_toke = pred_json_to_toke(data_pred_json)
#elif steps.main == "train":
#scores = compare_pred_gold()
#print_logs()
\ No newline at end of file
......@@ -33,9 +33,18 @@ def main(steps):
#### train, has_per == False
# allennlp train -s Results_${CONFIG}/results_${OUTPUT} ${CODE}configs/${MODEL}.jsonnet --include-package allen_custom.custom_conll_reader --include-package allen_custom.custom_simple_tagger --include-package allen_custom.custom_disrpt_reader --include-package allen_custom.custom_bert_token_embedder
# allennlp train -s Resultts_conllu/results_eng.rst.rstdt_bert ../code/utils/configs/bert.jsonnet ....
# Dicut- repo morteza
#allennlp train -s Results_${CONFIG}/results_${OUTPUT} ${CODE}configs/bert.jsonnet
cmd2 = f"allennlp train -s {steps.data.resu} {tr_config}"
# Discut-gitlab
cmd = f"allennlp train -s {steps.data.resu} {tr_config} --include-package allen_custom.custom_conll_reader --include-package allen_custom.custom_simple_tagger --include-package allen_custom.custom_disrpt_reader --include-package allen_custom.custom_bert_token_embedder"
print(cmd)
os.system(cmd)
print(cmd2)
os.system(cmd2)
# then...
# TODO:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment