From 1021d27222901a2f58085d742954299819f87ae5 Mon Sep 17 00:00:00 2001 From: Morteza Ezzabady <morteza758@gmail.com> Date: Thu, 5 Aug 2021 14:15:49 +0200 Subject: [PATCH] spliter in expes --- README.md | 3 ++- code/contextual_embeddings/expes.sh | 26 ++++++++++++++++++++++++-- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 80b201e..50cab13 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,8 @@ Requirements: - pytorch: `pip install torch==1.9.0+cu111 torchvision==0.10.0+cu111 torchaudio===0.9.0 -f https://download.pytorch.org/whl/torch_stable.html` Usage: -- train: `bash expes.sh eng.rst.rstdt conllu bert train` +- train: `bash expes.sh eng.rst.rstdt conllu bert train [-s 200]` +#for split the long sentences - test: `bash expes.sh eng.rst.rstdt conllu bert test` - fine-tune with other model: `bash expes.sh eng.rst.rstdt conllu bert train eng` - test on other model: `bash expes.sh eng.rst.rstdt conllu bert test eng` diff --git a/code/contextual_embeddings/expes.sh b/code/contextual_embeddings/expes.sh index 7f25ba3..1db5286 100644 --- a/code/contextual_embeddings/expes.sh +++ b/code/contextual_embeddings/expes.sh @@ -18,11 +18,23 @@ export ACTION=${4} if [ -z "$5" ] then export HAS_PAR=false + export TOOLONG=false +elif [ "${5}" = "-s" ] +then + export TOOLONG=true + export SPLIT=${6} else export HAS_PAR=true + export TOOLONG=false export PARENT=${5} fi +if [ $# -eq 7 ] && [ "${6}" = "-s" ] +then + export TOOLONG=true + export SPLIT=${7} +fi + if [ "$MODEL" = "xlm" ] ; then export BERT_VOCAB="xlm-roberta-base" @@ -49,12 +61,22 @@ export GOLD=${GOLD_BASE}${DATASET}"/"${DATASET}"_"${EVAL}"."${CONFIG} # conversion of datasets to NER / BIO format by first testing the existence of files so as not to redo it each time if [ ! -f ${CONV}${DATASET}"_train.ner."${CONFIG} ]; then echo "converting to ner format -> in data_converted ..." - python conv2ner.py "../../data/"${DATASET}"/"${DATASET}"_train."${CONFIG} ${CONV}/${DATASET}"_train.ner."${CONFIG} + if [ $TOOLONG = true ] + then + python conv2ner.py "../../data/"${DATASET}"/"${DATASET}"_train."${CONFIG} ${CONV}/${DATASET}"_train.ner."${CONFIG} --split-too-long True ${SPLIT} + else + python conv2ner.py "../../data/"${DATASET}"/"${DATASET}"_train."${CONFIG} ${CONV}/${DATASET}"_train.ner."${CONFIG} + fi fi if [ ! -f ${CONV}/${DATASET}"_"${EVAL}".ner."${CONFIG} ]; then echo "converting to ner format -> in data_converted ..." - python conv2ner.py "../../data/"${DATASET}"/"${DATASET}"_"${EVAL}"."${CONFIG} ${CONV}/${DATASET}"_"${EVAL}".ner."${CONFIG} + if [ $TOOLONG = true ] + then + python conv2ner.py "../../data/"${DATASET}"/"${DATASET}"_"${EVAL}"."${CONFIG} ${CONV}/${DATASET}"_"${EVAL}".ner."${CONFIG} --split-too-long True ${SPLIT} + else + python conv2ner.py "../../data/"${DATASET}"/"${DATASET}"_"${EVAL}"."${CONFIG} ${CONV}/${DATASET}"_"${EVAL}".ner."${CONFIG} + fi fi if [ "$ACTION" = "train" ] -- GitLab