Skip to content
Snippets Groups Projects
Commit 1021d272 authored by ezzabady.morteza's avatar ezzabady.morteza
Browse files

spliter in expes

parent d4bebcce
No related branches found
No related tags found
No related merge requests found
...@@ -13,7 +13,8 @@ Requirements: ...@@ -13,7 +13,8 @@ Requirements:
- pytorch: `pip install torch==1.9.0+cu111 torchvision==0.10.0+cu111 torchaudio===0.9.0 -f https://download.pytorch.org/whl/torch_stable.html` - pytorch: `pip install torch==1.9.0+cu111 torchvision==0.10.0+cu111 torchaudio===0.9.0 -f https://download.pytorch.org/whl/torch_stable.html`
Usage: Usage:
- train: `bash expes.sh eng.rst.rstdt conllu bert train` - train: `bash expes.sh eng.rst.rstdt conllu bert train [-s 200]`
#for split the long sentences
- test: `bash expes.sh eng.rst.rstdt conllu bert test` - test: `bash expes.sh eng.rst.rstdt conllu bert test`
- fine-tune with other model: `bash expes.sh eng.rst.rstdt conllu bert train eng` - fine-tune with other model: `bash expes.sh eng.rst.rstdt conllu bert train eng`
- test on other model: `bash expes.sh eng.rst.rstdt conllu bert test eng` - test on other model: `bash expes.sh eng.rst.rstdt conllu bert test eng`
......
...@@ -18,11 +18,23 @@ export ACTION=${4} ...@@ -18,11 +18,23 @@ export ACTION=${4}
if [ -z "$5" ] if [ -z "$5" ]
then then
export HAS_PAR=false export HAS_PAR=false
export TOOLONG=false
elif [ "${5}" = "-s" ]
then
export TOOLONG=true
export SPLIT=${6}
else else
export HAS_PAR=true export HAS_PAR=true
export TOOLONG=false
export PARENT=${5} export PARENT=${5}
fi fi
if [ $# -eq 7 ] && [ "${6}" = "-s" ]
then
export TOOLONG=true
export SPLIT=${7}
fi
if [ "$MODEL" = "xlm" ] ; if [ "$MODEL" = "xlm" ] ;
then then
export BERT_VOCAB="xlm-roberta-base" export BERT_VOCAB="xlm-roberta-base"
...@@ -49,12 +61,22 @@ export GOLD=${GOLD_BASE}${DATASET}"/"${DATASET}"_"${EVAL}"."${CONFIG} ...@@ -49,12 +61,22 @@ export GOLD=${GOLD_BASE}${DATASET}"/"${DATASET}"_"${EVAL}"."${CONFIG}
# conversion of datasets to NER / BIO format by first testing the existence of files so as not to redo it each time # conversion of datasets to NER / BIO format by first testing the existence of files so as not to redo it each time
if [ ! -f ${CONV}${DATASET}"_train.ner."${CONFIG} ]; then if [ ! -f ${CONV}${DATASET}"_train.ner."${CONFIG} ]; then
echo "converting to ner format -> in data_converted ..." echo "converting to ner format -> in data_converted ..."
python conv2ner.py "../../data/"${DATASET}"/"${DATASET}"_train."${CONFIG} ${CONV}/${DATASET}"_train.ner."${CONFIG} if [ $TOOLONG = true ]
then
python conv2ner.py "../../data/"${DATASET}"/"${DATASET}"_train."${CONFIG} ${CONV}/${DATASET}"_train.ner."${CONFIG} --split-too-long True ${SPLIT}
else
python conv2ner.py "../../data/"${DATASET}"/"${DATASET}"_train."${CONFIG} ${CONV}/${DATASET}"_train.ner."${CONFIG}
fi
fi fi
if [ ! -f ${CONV}/${DATASET}"_"${EVAL}".ner."${CONFIG} ]; then if [ ! -f ${CONV}/${DATASET}"_"${EVAL}".ner."${CONFIG} ]; then
echo "converting to ner format -> in data_converted ..." echo "converting to ner format -> in data_converted ..."
python conv2ner.py "../../data/"${DATASET}"/"${DATASET}"_"${EVAL}"."${CONFIG} ${CONV}/${DATASET}"_"${EVAL}".ner."${CONFIG} if [ $TOOLONG = true ]
then
python conv2ner.py "../../data/"${DATASET}"/"${DATASET}"_"${EVAL}"."${CONFIG} ${CONV}/${DATASET}"_"${EVAL}".ner."${CONFIG} --split-too-long True ${SPLIT}
else
python conv2ner.py "../../data/"${DATASET}"/"${DATASET}"_"${EVAL}"."${CONFIG} ${CONV}/${DATASET}"_"${EVAL}".ner."${CONFIG}
fi
fi fi
if [ "$ACTION" = "train" ] if [ "$ACTION" = "train" ]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment