From 1021d27222901a2f58085d742954299819f87ae5 Mon Sep 17 00:00:00 2001
From: Morteza Ezzabady <morteza758@gmail.com>
Date: Thu, 5 Aug 2021 14:15:49 +0200
Subject: [PATCH] spliter in expes

---
 README.md                           |  3 ++-
 code/contextual_embeddings/expes.sh | 26 ++++++++++++++++++++++++--
 2 files changed, 26 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 80b201e..50cab13 100644
--- a/README.md
+++ b/README.md
@@ -13,7 +13,8 @@ Requirements:
 - pytorch: `pip install torch==1.9.0+cu111 torchvision==0.10.0+cu111 torchaudio===0.9.0 -f https://download.pytorch.org/whl/torch_stable.html`
 
 Usage:
-- train: `bash expes.sh eng.rst.rstdt conllu bert train`
+- train: `bash expes.sh eng.rst.rstdt conllu bert train [-s 200]`
+#for split the long sentences
 - test: `bash expes.sh eng.rst.rstdt conllu bert test`
 - fine-tune with other model: `bash expes.sh eng.rst.rstdt conllu bert train eng`
 - test on other model: `bash expes.sh eng.rst.rstdt conllu bert test eng`
diff --git a/code/contextual_embeddings/expes.sh b/code/contextual_embeddings/expes.sh
index 7f25ba3..1db5286 100644
--- a/code/contextual_embeddings/expes.sh
+++ b/code/contextual_embeddings/expes.sh
@@ -18,11 +18,23 @@ export ACTION=${4}
 if [ -z "$5" ]
 then
     export HAS_PAR=false
+    export TOOLONG=false
+elif [ "${5}" = "-s" ]
+then
+    export TOOLONG=true
+    export SPLIT=${6}
 else
     export HAS_PAR=true
+    export TOOLONG=false
     export PARENT=${5}
 fi
 
+if [ $# -eq 7 ] && [ "${6}" = "-s" ] 
+then
+    export TOOLONG=true
+    export SPLIT=${7}
+fi
+
 if [ "$MODEL" = "xlm" ] ; 
 then 
     export BERT_VOCAB="xlm-roberta-base"
@@ -49,12 +61,22 @@ export GOLD=${GOLD_BASE}${DATASET}"/"${DATASET}"_"${EVAL}"."${CONFIG}
 # conversion of datasets to NER / BIO format by first testing the existence of files so as not to redo it each time
 if [ ! -f ${CONV}${DATASET}"_train.ner."${CONFIG} ]; then
     echo "converting to ner format -> in data_converted ..."
-    python conv2ner.py "../../data/"${DATASET}"/"${DATASET}"_train."${CONFIG}  ${CONV}/${DATASET}"_train.ner."${CONFIG} 
+    if [ $TOOLONG = true ]
+    then 
+        python conv2ner.py "../../data/"${DATASET}"/"${DATASET}"_train."${CONFIG}  ${CONV}/${DATASET}"_train.ner."${CONFIG} --split-too-long True ${SPLIT}
+    else
+        python conv2ner.py "../../data/"${DATASET}"/"${DATASET}"_train."${CONFIG}  ${CONV}/${DATASET}"_train.ner."${CONFIG} 
+    fi
 fi
 
 if [ ! -f ${CONV}/${DATASET}"_"${EVAL}".ner."${CONFIG} ]; then
     echo "converting to ner format -> in data_converted ..."
-    python conv2ner.py "../../data/"${DATASET}"/"${DATASET}"_"${EVAL}"."${CONFIG}  ${CONV}/${DATASET}"_"${EVAL}".ner."${CONFIG} 
+    if [ $TOOLONG = true ]
+    then 
+        python conv2ner.py "../../data/"${DATASET}"/"${DATASET}"_"${EVAL}"."${CONFIG}  ${CONV}/${DATASET}"_"${EVAL}".ner."${CONFIG} --split-too-long True ${SPLIT}
+    else
+        python conv2ner.py "../../data/"${DATASET}"/"${DATASET}"_"${EVAL}"."${CONFIG}  ${CONV}/${DATASET}"_"${EVAL}".ner."${CONFIG} 
+    fi
 fi
 
 if [ "$ACTION" = "train" ] 
-- 
GitLab