clean new config

af2160d7 · laura.riviere · cfc00e33 · af2160d7 · af2160d7 · af2160d7
Commit af2160d7 authored 2 years ago by laura.riviere
--- a/code/config_global_1.2.json
+++ b/code/config_global_1.2.json
 {
-    "usecase_description": "Config file for usecase_1 : from a text, get the same text but with EDU bracket.",
+    "usecase_description": "Config file for usecase_1 : from a raw text, get the same text but with EDU bracket.",
    "data_raw": {
        "name": "edgar_poe_en",
        "exte": ".txt",
        "language": "en",
-        "existing_metadata": false
+        "existing_metadata": true
    },
    "steps":{
        "main": "annotation",
@@ -35,7 +35,6 @@
    },
    "output":{
        "file":{
-            "json_to_tab": true,
            "tab_to_bracket": true,
            "conllu":true,
            "metadata": true

--- a/code/config_global_1.21.json
+++ b/code/config_global_1.21.json
+{
+    "usecase_description": "Config file for usecase_1 : from a tokenized text, get the same text but with EDU bracket.",
+    "data_raw": {
+        "name": "edgar_poe_short",
+        "exte": ".conll",
+        "language": "en",
+        "existing_metadata": true
+    },
+    "steps":{
+        "main": "annotation",
+        "pre-processing": {
+            "to_do": false,
+            "syntactic_tool": "stanza",
+            "sentence_split": true,
+            "tokenization": true,
+            "syntactic_parsing": true,
+            "create_metadata": {
+                "to_do": true,
+                "line": "paragraph",
+                "sent": "sent"
+            }
+        },
+        "discourse_segmenter": {
+            "model": "/home/lriviere/andiamo/discut/Results_conllu/results_eng.rst.gum-eng_bert/model.tar.gz",
+            "training": {
+                "toolkit": null,
+                "pre_trained_lm": null,
+                "config_file": null,
+                "train_data_path": null,
+                "validation_data_path": null
+            }
+        },
+        "evaluation": false,
+        "gold_test_data_path": null
+    },
+    "output":{
+        "file":{
+            "conllu":true,
+            "metadata": true,
+            "tab_to_bracket": false
+        },
+        "scores":false
+    }
+}
+
+
+
--- a/code/config_global_3.json
+++ b/code/config_global_3.json
@@ -27,6 +27,7 @@
        },
        "post-processing": {
            "json_to_tab": false,
+            "metadata_conll": false,
            "tab_to_bracket":false
        },
        "evaluation": true,

--- a/code/discut22_1.py
+++ b/code/discut22_1.py
@@ -240,7 +240,7 @@ if __name__ == '__main__':

    now = datetime.now()
    #stamp = re.sub('[\s:]', '_', str(now))
-    stamp = "debug1205"
+    stamp = "_debug1214"
    my_logs = {}
    my_logs['stamp'] = stamp


--- a/code/discut22_2.py
+++ b/code/discut22_2.py
@@ -20,6 +20,7 @@ from classes_def_2 import Data, Process, Output
 import utils_2.syntactic_parsing as synt_pars
 import utils.conv2ner as conv_to_ner # TODO clean it
 import utils.json2conll as json_to_connl # TODO clean it
+import utils.training_allennlp as tr_allen



@@ -92,7 +93,7 @@ def make_predictions(data_in, model_path):
    model = model_path # add def get_model from v1
    data_out = f"{data.resu}/{data.name}_pred.json"
    #cmd = f"allennlp predict --use-dataset-reader --output-file {data_out} {model_path} {data_in} &> {steps.data.resu}/logs.txt"
-    cmd = f"allennlp predict --use-dataset-reader --output-file {data_out} {model_path} {data_in}" # &> {steps.data.resu}/logs.txt"
+    cmd = f"allennlp predict --use-dataset-reader --output-file {data_out} {model_path} {data_in} &> {steps.data.resu}/logs_predictions.txt"
    os.system(cmd)
    return data_out

@@ -126,17 +127,24 @@ if __name__ == '__main__':
    create_folders([data.conv, data.resu])

    data_preprocessed = pre_processing(data, steps)
-    #data_preprocessed = "/home/lriviere/andiamo/discut22/data/edgar_poe_short/data_converted_vendredi/edgar_poe_short.conll"
+    #TEST data_preprocessed = "/home/lriviere/andiamo/discut22/data/edgar_poe_short/data_converted_vendredi/edgar_poe_short.conll"
    data_ner = data_to_ner_format(data_preprocessed)

-    if steps.main == "annotation":
-        #data_pred_json = make_predictions(data_ner, steps.model)
-        data_pred_json = "/home/lriviere/andiamo/discut22/data/edgar_poe_short/results_vendredi/edgar_poe_short_pred.json"
+    if steps.main == "annotation" or steps.main == "test":
+        data_pred_json = make_predictions(data_ner, steps.model)
+        #data_pred_json = "/home/lriviere/andiamo/discut22/data/edgar_poe_short/results_vendredi/edgar_poe_short_pred.json"

        if prod.metadata == True:
            data_pred_and_meta_conll = pred_json_to_conll_with_metadata(data_pred_json, data_preprocessed)
        else:
            data_pred_toke = pred_json_to_toke(data_pred_json)

+    #elif steps.main == "train":
+
        
+
+
+
+    #scores = compare_pred_gold()
+
    #print_logs()
\ No newline at end of file
--- a/code/utils/training_allennlp.py
+++ b/code/utils/training_allennlp.py
@@ -33,9 +33,18 @@ def main(steps):
    #### train, has_per == False
    # allennlp train -s Results_${CONFIG}/results_${OUTPUT} ${CODE}configs/${MODEL}.jsonnet --include-package allen_custom.custom_conll_reader --include-package allen_custom.custom_simple_tagger --include-package allen_custom.custom_disrpt_reader --include-package allen_custom.custom_bert_token_embedder 
    # allennlp train -s Resultts_conllu/results_eng.rst.rstdt_bert ../code/utils/configs/bert.jsonnet ....
+    
+    # Dicut- repo morteza
+    #allennlp train -s Results_${CONFIG}/results_${OUTPUT} ${CODE}configs/bert.jsonnet
+    cmd2 = f"allennlp train -s {steps.data.resu} {tr_config}"
+    
+    # Discut-gitlab
    cmd = f"allennlp train -s {steps.data.resu} {tr_config} --include-package allen_custom.custom_conll_reader --include-package allen_custom.custom_simple_tagger --include-package allen_custom.custom_disrpt_reader --include-package allen_custom.custom_bert_token_embedder" 
-    print(cmd)
-    os.system(cmd)
+    
+    
+    
+    print(cmd2)
+    os.system(cmd2)
    # then...

    # TODO: