diff --git a/README.md b/README.md index ec7a9a9eb1fd83a24fe4138544f97db61e0e8c93..0417a5a0ab0364cab050a9f3a85606db1e33023c 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,11 @@ Code: https://gitlab.inria.fr/andiamo/tony ``` pip install -r requirements.txt ``` +- Install pytorch: +``` +pip install torch==1.9.0+cu111 torchvision==0.10.0+cu111 torchaudio===0.9.0 -f https://download.pytorch.org/whl/torch_stable.html +``` + ## Configuration file: to chose or to complete - `code/config_global_X.json` See global_config_file_guideline.md. diff --git a/global_config_file_guideline.md b/global_config_file_guideline.md index 580bd7d576dfa97043b48369e38324a56455c724..fb4d0ca8ec40c23e6b98376ea7bcb2a0aefca2e6 100644 --- a/global_config_file_guideline.md +++ b/global_config_file_guideline.md @@ -117,7 +117,7 @@ - `"toolkit":` [string] The toolkit to build your model (to be added : "jiant"). - OPTIONS : ["allennlp"] - `"pre_trained_lm":` **bert** (to be added : roberta..) - - `"config_file":` [string] The path to the config file for training. e.g. `"../model/config_training.jsonnet"` + - `"config_file":` [string] The path to the config file for training. e.g. `"../model/config_training.jsonnet"`. This file need to be completed accordingly. - `"train_data_path":` [string] The path to your training dataset. e.g. `"../data/eng.rst.rstdt/eng.rst.rstdt_train.conllu"` *conflict with training_config ??* - `"validation_data_path":` [string] The path to your development dataset. e.g. `"../data/eng.rst.rstdt/eng.rst.rstdt_dev.conllu"` *idem* diff --git a/model/config_training_bert.jsonnet b/model/config_training_bert.jsonnet index a656ba4f8bb1104aaea62e5acd0fe6a5af66a46b..824dabd3acbbfe53621736fdd4178cef28fb15fd 100644 --- a/model/config_training_bert.jsonnet +++ b/model/config_training_bert.jsonnet @@ -64,8 +64,8 @@ } } }, - "train_data_path": "../data/eng.rst.rstdt/eng.rst.rstdt_train.ner.conllu", - "validation_data_path": "../data/eng.rst.rstdt/eng.rst.rstdt_dev.ner.conllu", + "train_data_path": "../data/eng.sdrt.stac/eng.sdrt.stac_train.ner.conllu", + "validation_data_path": "../data/eng.sdrt.stac/eng.sdrt.stac_dev.ner.conllu", "trainer": { "cuda_device": 1, "grad_norm": 5, diff --git a/requirements.txt b/requirements.txt index 8cb88a9c2e7574a11a92ca2604190a5b94361e78..a2a8ba53439b0d35b6b72cbf6949cdde0c7ce65b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,14 +1,11 @@ -_libgcc_mutex==0.1 -_openmp_mutex==5.1 alabaster==0.7.12 allennlp==0.9.0 atomicwrites==1.4.0 attrs==21.2.0 -babel==2.9.1 +Babel==2.9.1 blis==0.2.4 boto3==1.17.109 botocore==1.20.109 -ca-certificates==2022.07.19 cached-property==1.5.2 certifi==2021.5.30 cffi==1.14.6 @@ -20,64 +17,49 @@ cycler==0.10.0 cymem==2.0.5 docutils==0.17.1 editdistance==0.5.3 -en-core-web-sm==2.1.0 -filelock==3.8.0 +en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.1.0/en_core_web_sm-2.1.0.tar.gz flaky==3.7.0 -flask==2.0.1 -flask-cors==3.0.10 -fr-core-news-sm==2.1.0 +Flask==2.0.1 +Flask-Cors==3.0.10 ftfy==6.0.3 gevent==21.1.2 greenlet==1.1.0 h5py==3.3.0 -huggingface-hub==0.10.0 idna==2.10 imagesize==1.2.0 importlib-metadata==4.6.1 iniconfig==1.1.1 itsdangerous==2.0.1 -jinja2==3.0.1 +Jinja2==3.0.1 jmespath==0.10.0 joblib==1.0.1 -jsonnet==0.18.0 jsonnetbin==0.16.0 jsonpickle==2.0.0 kiwisolver==1.3.1 -ld_impl_linux-64==2.38 -libffi==3.3 -libgcc-ng==11.2.0 -libgomp==11.2.0 -libstdcxx-ng==11.2.0 -markupsafe==2.0.1 +MarkupSafe==2.0.1 matplotlib==3.4.2 murmurhash==1.0.5 -ncurses==6.3 nltk==3.6.2 numpy==1.21.0 numpydoc==1.1.0 -openssl==1.1.1q overrides==3.1.0 packaging==21.0 pandas==1.3.1 parsimonious==0.8.1 -pillow==8.3.1 -pip==22.1.2 +Pillow==8.3.1 plac==0.9.6 pluggy==0.13.1 preshed==2.0.1 protobuf==3.17.3 py==1.10.0 pycparser==2.20 -pygments==2.9.0 +Pygments==2.9.0 pyparsing==2.4.7 pytest==6.2.4 -python==3.7.13 python-dateutil==2.8.1 pytorch-pretrained-bert==0.6.2 pytorch-transformers==1.1.0 pytz==2021.1 -pyyaml==6.0 -readline==8.1.2 regex==2021.7.6 requests==2.25.1 responses==0.13.3 @@ -85,44 +67,33 @@ s3transfer==0.4.2 scikit-learn==0.24.2 scipy==1.7.0 sentencepiece==0.1.96 -setuptools==63.4.1 six==1.16.0 snowballstemmer==2.1.0 spacy==2.1.9 -sphinx==4.0.3 +Sphinx==4.0.3 sphinxcontrib-applehelp==1.0.2 sphinxcontrib-devhelp==1.0.2 sphinxcontrib-htmlhelp==2.0.0 sphinxcontrib-jsmath==1.0.1 sphinxcontrib-qthelp==1.0.3 sphinxcontrib-serializinghtml==1.1.5 -sqlite==3.39.2 sqlparse==0.4.1 srsly==1.0.5 stanfordnlp==0.2.0 stanza==1.2.2 -tensorboardx==2.4 +tensorboardX==2.4 thinc==7.0.8 threadpoolctl==2.2.0 -tk==8.6.12 -tokenizers==0.12.1 toml==0.10.2 -torch==1.9.0+cu111 -torchaudio==0.9.0 -torchvision==0.10.0+cu111 tqdm==4.61.2 -transformers==4.22.2 typing-extensions==3.10.0.0 typing-utils==0.1.0 -unidecode==1.2.0 +Unidecode==1.2.0 urllib3==1.26.6 wasabi==0.8.2 wcwidth==0.2.5 -werkzeug==2.0.1 -wheel==0.37.1 +Werkzeug==2.0.1 word2number==1.1 -xz==5.2.5 zipp==3.5.0 -zlib==1.2.12 -zope-event==4.5.0 -zope-interface==5.4.0 \ No newline at end of file +zope.event==4.5.0 +zope.interface==5.4.0 \ No newline at end of file