diff --git a/.gitignore b/.gitignore
index 371503a80d900dde316cbdc92efdf8fa6ce1812f..f57c436fa6463b2f0a0f382938ddf148bd516eea 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,17 +1,11 @@
 .idea
-tests
 venv
 *.pyc
 .DS_Store
-.env
-./bash_GPU.sh
-push pull texte
-logs
-Output
 .data
 TensorBoard
 models
-*.pkl
-good_models/model_check.pt
+good_models
 main.py
 *.pt
+Datasets/Utils
diff --git a/Configuration/Configuration.py b/Configuration/Configuration.py
deleted file mode 100644
index 3d94c9b2e3aad20cebafe0dd5015caee1c5e50b3..0000000000000000000000000000000000000000
--- a/Configuration/Configuration.py
+++ /dev/null
@@ -1,17 +0,0 @@
-import os
-from configparser import ConfigParser
-
-# Read configuration file
-path_current_directory = os.path.dirname(__file__)
-path_config_file = os.path.join(path_current_directory, 'config.ini')
-config = ConfigParser()
-config.read(path_config_file)
-
-# region Get section
-
-version = config["VERSION"]
-
-modelDecoderConfig = config["MODEL_DECODER"]
-modelTrainingConfig = config["MODEL_TRAINING"]
-
-# endregion Get section
diff --git a/Configuration/config.ini b/Configuration/config.ini
deleted file mode 100644
index 3fb0157dd2a41afc67607cb94f996daa3236ae0b..0000000000000000000000000000000000000000
--- a/Configuration/config.ini
+++ /dev/null
@@ -1,18 +0,0 @@
-[VERSION]
-transformers = 4.16.2
-[MODEL_DECODER]
-dim_encoder = 768
-dim_decoder = 128
-num_rnn_layers=1
-dropout=0.1
-teacher_forcing=0.05
-[MODEL_TRAINING]
-batch_size=16
-epoch=10
-seed_val=42
-learning_rate=0.005
-use_checkpoint_SAVE=1
-output_path=Output
-use_checkpoint_LOAD=1
-input_path=models_save
-model_to_load=model_check.pt
\ No newline at end of file
diff --git a/Datasets/index_to_pos1.pkl b/Datasets/index_to_pos1.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..c212d7e65f99a80671dfe2dba3481bbd49c22c6e
Binary files /dev/null and b/Datasets/index_to_pos1.pkl differ
diff --git a/Datasets/index_to_super.pkl b/Datasets/index_to_super.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..606848f7652155e0048c281320e36f46886661e5
Binary files /dev/null and b/Datasets/index_to_super.pkl differ
diff --git a/Datasets/m2_dataset_V2.csv b/Datasets/m2_dataset.csv
similarity index 100%
rename from Datasets/m2_dataset_V2.csv
rename to Datasets/m2_dataset.csv
diff --git a/README.md b/README.md
index 5994f1455440e7055fec3c5dd2f7e9baaa7e0cd5..5ece8c34aedf24cfb542a22c40d3263305b501eb 100644
--- a/README.md
+++ b/README.md
@@ -1,15 +1,87 @@
 # DeepGrail
 
+This repository contains a Python implementation of BertForTokenClassification using TLGbank data to develop
+part-of-speech taggers and supertaggers.
+
+This code was designed to work with the [DeepGrail Linker](https://gitlab.irit.fr/pnria/global-helper/deepgrail-linker)
+to provide a wide coverage syntactic and semantic parser for French. But the Tagger is independent, you can use it for your own tags.
+
 ## Usage
 
+### Structure
+
+```
+.
+├── Datasets                    # TLGbank data
+├── SuperTagger                 # Implementation of BertForTokenClassification
+│   ├── SuperTagger.py          # Main class
+│   └── Tagging_bert_model.py   # Bert model
+├── predict.py                  # Example of prediction
+└── train.py                    # Example of train
+```
+
 ### Installation
+
 Python 3.9.10 **(Warning don't use Python 3.10**+**)**
 
 Clone the project locally. In a clean python venv do `pip install -r requirements.txt`
 
+Download already trained models or prepare data for **your** train.
+
 ## How To use
 
-TODO ...
+**predict.py** and **train.py** show simple examples of how to use the model, feel free to look at them before using the
+SupperTagger
+
+### Utils
+
+For load **m2_dataset.csv**, you can use `SuperTagger.Utils.utils.read_csv_pgbar(...)`. This function return a pandas
+dataframe.
+
+### Prediction
+
+For predict on your data you need to load a model (save with this code).
+
+```
+df = read_csv_pgbar(file_path,20)
+texts = df['X'].tolist()
+
+tagger = SuperTagger()
+
+tagger.load_weights("your/model/path")
+
+pred_without_argmax, pred_convert, bert_hidden_state = tagger.predict(texts[7])
+
+print(pred_convert)
+#['let', 'dr(0,s,s)', 'let', 'dr(0,dr(0,s,s),np)', 'dr(0,np,n)', 'dr(0,n,n)', 'let', 'n', 'let', 'dl(0,n,n)', 'dr(0,dl(0,dl(0,n,n),dl(0,n,n)),dl(0,n,n))', 'dl(0,n,n)', 'let', 'dr(0,np,np)', 'np', 'dr(0,dl(0,np,np),np)', 'np', 'dr(0,dl(0,np,np),np)', 'np', 'dr(0,dl(0,np,s),dl(0,np,s))', 'dr(0,dl(0,np,s),np)', 'dl(1,s,s)', 'np', 'dr(0,dl(0,np,np),n)', 'n', 'dl(0,s,txt)']
+```
+
+### Training
+
+```
+df = read_csv_pgbar(file_path,1000)
+texts = df['X'].tolist()
+tags = df['Z'].tolist()
+
+#Dict for convert ID to token (The dict is save with the model for prediction)
+index_to_super = load_obj('Datasets/index_to_super') 
+
+tagger = SuperTagger()
+
+bert_name = 'camembert-base'
+
+tagger.create_new_model(len(index_to_super), bert_name, index_to_super)
+# You can load your model for re-train this
+# tagger.load_weights("your/model/path")
+
+tagger.train(texts,tags, checkpoint=True)
+
+pred_without_argmax, pred_convert, bert_hidden_state = tagger.predict(texts[7])
+```
+
+In train, if you use `checkpoint=True`, the model is automatically saved in a folder: Training_XX-XX_XX-XX. It saves after each epoch.
+Use `tensorboard=True` for log in same folder. (`tensorboard --logdir=logs` for see logs)
 
-tensorboard --logdir=logs
+## Authors
 
+[Rabault Julien](https://www.linkedin.com/in/julienrabault), de Pourtales Caroline
\ No newline at end of file
diff --git a/SuperTagger/SuperTagger.py b/SuperTagger/SuperTagger.py
index 70c15924fe51a8d75b35490393e984175dfe18c1..95b3d59d8704efb3efc9ffed80382a8fc7146cfb 100644
--- a/SuperTagger/SuperTagger.py
+++ b/SuperTagger/SuperTagger.py
@@ -1,26 +1,45 @@
+import datetime
 import os
 import sys
-
 import time
-import datetime
-from tkinter import Variable
 
 import torch
 import transformers
-from torch import nn
+from torch import nn, Tensor
 from torch.optim import Adam
+from torch.utils.data import Dataset, TensorDataset, random_split, DataLoader
 from torch.utils.tensorboard import SummaryWriter
 from tqdm import tqdm
 from transformers import AutoTokenizer
-
-from torch.utils.data import Dataset, TensorDataset, random_split
+from transformers import logging
 
 from SuperTagger.Utils.SentencesTokenizer import SentencesTokenizer
 from SuperTagger.Utils.SymbolTokenizer import SymbolTokenizer
 from SuperTagger.Utils.Tagging_bert_model import Tagging_bert_model
 
+logging.set_verbosity(logging.ERROR)
+
+
+def output_create_dir():
+    """
+
+    @return:
+    """
+    from datetime import datetime
+    outpout_path = 'TensorBoard'
+    training_dir = os.path.join(outpout_path, 'Tranning_' + datetime.today().strftime('%d-%m_%H-%M'))
+    logs_dir = os.path.join(training_dir, 'logs')
+    writer = SummaryWriter(log_dir=logs_dir)
+    return training_dir, writer
+
 
-def categorical_accuracy(preds, truth):
+def categorical_accuracy(preds: list[list[int]], truth: list[list[int]]) -> float:
+    """
+
+    @param preds:
+    @param truth:
+    @return:
+    """
     good_label = 0
     nb_label = 0
     for i in range(len(truth)):
@@ -48,7 +67,9 @@ def format_time(elapsed):
 class SuperTagger:
 
     def __init__(self):
+        """
 
+        """
         self.index_to_tags = None
         self.num_label = None
         self.bert_name = None
@@ -65,7 +86,11 @@ class SuperTagger:
         self.trainable = False
         self.model_load = False
 
-    def load_weights(self, model_file):
+    def load_weights(self, model_file: str):
+        """
+        yo mec
+        @param model_file:
+        """
         self.trainable = False
 
         print("#" * 15)
@@ -95,8 +120,13 @@ class SuperTagger:
         self.model_load = True
         self.trainable = True
 
-    def create_new_model(self, num_label, bert_name, index_to_tags: dict):
+    def create_new_model(self, num_label: int, bert_name: str, index_to_tags: dict):
+        """
 
+        @param num_label:
+        @param bert_name:
+        @param index_to_tags:
+        """
         assert len(
             index_to_tags) == num_label, f" len(index_to_tags) : {len(index_to_tags)} must be equels with num_label: {num_label}"
 
@@ -114,9 +144,15 @@ class SuperTagger:
         self.trainable = True
         self.model_load = True
 
-    def predict(self, sentences):
+    def predict(self, sentences: list[str]) -> (list[list[list[float]]], list[list[str]], Tensor):
+        """
 
+        @param sentences:
+        @return:
+        """
         assert self.trainable or self.model is None, "Please use the create_new_model(...) or load_weights(...) function before the predict, the model is not integrated"
+        sentences = [sentences] if type(sentences) == str else sentences
+
         self.model.eval()
         with torch.no_grad():
             sents_tokenized_t, sents_mask_t = self.sent_tokenizer.fit_transform_tensors(sentences)
@@ -127,13 +163,23 @@ class SuperTagger:
 
             return preds, self.tags_tokenizer.convert_ids_to_tags(preds.detach()), hidden
 
-    def train(self, sentences, tags, validation_rate=0.1, epochs=20, batch_size=32, tensorboard=False,
+    def train(self, sentences: list[str], tags: list[list[str]], validation_rate=0.1, epochs=20, batch_size=32,
+              tensorboard=False,
               checkpoint=False):
-
+        """
+
+        @param sentences:
+        @param tags:
+        @param validation_rate:
+        @param epochs:
+        @param batch_size:
+        @param tensorboard:
+        @param checkpoint:
+        """
         assert self.trainable or self.model is None, "Please use the create_new_model(...) or load_weights(...) function before the train, the model is not integrated"
 
         if checkpoint or tensorboard:
-            checkpoint_dir, writer = self.__output_create()
+            checkpoint_dir, writer = output_create_dir()
 
         training_dataloader, validation_dataloader = self.__preprocess_data(batch_size, sentences, tags,
                                                                             1 - validation_rate)
@@ -171,8 +217,16 @@ class SuperTagger:
             if checkpoint:
                 self.__checkpoint_save(path=os.path.join(checkpoint_dir, 'model_check.pt'))
 
-    def __preprocess_data(self, batch_size, sentences, tags, validation_rate):
+    def __preprocess_data(self, batch_size: int, sentences: list[str], tags: list[list[str]],
+                          validation_rate: float) -> (DataLoader, DataLoader):
+        """
 
+        @param batch_size:
+        @param sentences:
+        @param tags:
+        @param validation_rate:
+        @return:
+        """
         validation_dataloader = None
 
         sents_tokenized_t, sents_mask_t = self.sent_tokenizer.fit_transform_tensors(sentences)
@@ -191,15 +245,12 @@ class SuperTagger:
         training_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
         return training_dataloader, validation_dataloader
 
-    def __output_create(self):
-        from datetime import datetime
-        outpout_path = 'TensorBoard'
-        training_dir = os.path.join(outpout_path, 'Tranning_' + datetime.today().strftime('%d-%m_%H-%M'))
-        logs_dir = os.path.join(training_dir, 'logs')
-        writer = SummaryWriter(log_dir=logs_dir)
-        return training_dir, writer
+    def __train_epoch(self, training_dataloader: DataLoader) -> (float, float, str):
+        """
 
-    def __train_epoch(self, training_dataloader):
+        @param training_dataloader:
+        @return:
+        """
         self.model.train()
         epoch_loss = 0
         epoch_acc = 0
@@ -217,8 +268,6 @@ class SuperTagger:
 
                 predictions = torch.argmax(logit, dim=2).detach().cpu().numpy()
                 label_ids = targets.cpu().numpy()
-                # torch.nn.functional.one_hot(targets).long()
-                # torch.argmax(logit)
 
                 acc = categorical_accuracy(predictions, label_ids)
 
@@ -238,11 +287,17 @@ class SuperTagger:
 
         return epoch_acc, epoch_loss, training_time
 
-    def foward(self,b_sents_tokenized, b_sents_mask):
+    def foward(self, b_sents_tokenized: Tensor, b_sents_mask: Tensor) -> (Tensor, Tensor):
+        """
+
+        @param b_sents_tokenized:
+        @param b_sents_mask:
+        @return:
+        """
         _, logit, hidden = self.model((b_sents_tokenized, b_sents_mask))
         return logit, hidden
 
-    def __eval_epoch(self, validation_dataloader):
+    def __eval_epoch(self, validation_dataloader: DataLoader) -> (float, float, int):
         self.model.eval()
         eval_loss = 0
         eval_accuracy = 0
@@ -270,6 +325,10 @@ class SuperTagger:
         return eval_accuracy, eval_loss, nb_eval_steps
 
     def __checkpoint_save(self, path='/model_check.pt'):
+        """
+
+        @param path:
+        """
         self.model.cpu()
         # print('save model parameters to [%s]' % path, file=sys.stderr)
 
@@ -279,5 +338,3 @@ class SuperTagger:
             'optimizer': self.optimizer,
         }, path)
         self.model.to(self.device)
-
-
diff --git a/SuperTagger/Utils/SentencesTokenizer.py b/SuperTagger/Utils/SentencesTokenizer.py
index f1fbea51286ffb4f86e8a0b4f199bd78eb292772..ee72006edd06bac408e2415af2202d10cf226954 100644
--- a/SuperTagger/Utils/SentencesTokenizer.py
+++ b/SuperTagger/Utils/SentencesTokenizer.py
@@ -1,6 +1,3 @@
-import numpy as np
-import torch
-
 
 class SentencesTokenizer():
 
@@ -12,28 +9,7 @@ class SentencesTokenizer():
         return self.tokenizer(sents, padding=True)
 
     def fit_transform_tensors(self, sents):
-        # , return_tensors = 'pt'
         temp = self.tokenizer(sents, padding=True, return_tensors = 'pt')
-        #
-        # len_sent_max = len(temp['attention_mask'][0])
-        #
-        # input_ids = np.ones((len(sents),len_sent_max))
-        # attention_mask = np.zeros((len(sents),len_sent_max))
-        #
-        # for i in range(len(temp['offset_mapping'])):
-        #     h = 1
-        #     input_ids[i][0] = self.tokenizer.cls_token_id
-        #     attention_mask[i][0] = 1
-        #     for j in range (1,len_sent_max-1):
-        #         if temp['offset_mapping'][i][j][1] != temp['offset_mapping'][i][j+1][0]:
-        #             input_ids[i][h] = temp['input_ids'][i][j]
-        #             attention_mask[i][h] = 1
-        #             h += 1
-        #     input_ids[i][h] = self.tokenizer.eos_token_id
-        #     attention_mask[i][h] = 1
-        #
-        # input_ids = torch.tensor(input_ids).long()
-        # attention_mask = torch.tensor(attention_mask)
 
         return temp["input_ids"], temp["attention_mask"]
 
diff --git a/SuperTagger/Utils/utils.py b/SuperTagger/Utils/utils.py
index 03aadfeebc90e85a8b15d912c62459efdc2c9cc1..4c22a68bbb5e2f77093792025439626b0011413b 100644
--- a/SuperTagger/Utils/utils.py
+++ b/SuperTagger/Utils/utils.py
@@ -1,7 +1,4 @@
-import datetime
-
 import pandas as pd
-import torch
 from tqdm import tqdm
 
 
@@ -16,7 +13,8 @@ def read_csv_pgbar(csv_path, nrows=float('inf'), chunksize=100):
         rows = nrows
 
     with tqdm(total=rows, desc='Rows read: ') as bar:
-        for chunk in pd.read_csv(csv_path, converters={'Y1': pd.eval,'Y2': pd.eval,'Z': pd.eval}, chunksize=chunksize, nrows=rows):
+        for chunk in pd.read_csv(csv_path, converters={'Y1': pd.eval, 'Y2': pd.eval, 'Z': pd.eval}, chunksize=chunksize,
+                                 nrows=rows):
             chunk_list.append(chunk)
             bar.update(len(chunk))
 
@@ -24,5 +22,10 @@ def read_csv_pgbar(csv_path, nrows=float('inf'), chunksize=100):
     print("#" * 20)
     return df
 
+def load_obj(name):
+    with open(name + '.pkl', 'rb') as f:
+        import pickle
+        return pickle.load(f)
+
 
 
diff --git a/bash_GPU.sh b/bash_GPU.sh
deleted file mode 100644
index 665f769d8046d6fd61167efbbbdd8e46d5495d94..0000000000000000000000000000000000000000
--- a/bash_GPU.sh
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/bin/sh
-#SBATCH --job-name=N-tensorboard
-#SBATCH --partition=RTX6000Node
-#SBATCH --gres=gpu:1
-#SBATCH --mem=32000
-#SBATCH --gres-flags=enforce-binding
-#SBATCH --error="error_rtx1.err"
-#SBATCH --output="out_rtx1.out"
-
-module purge
-module load singularity/3.0.3
-
-srun singularity exec /logiciels/containerCollections/CUDA11/pytorch-NGC-21-03-py3.sif python "train.py"
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index c611e9b46c45eac6d06ab14c47a951c0c641796c..41ce3b5b133a0d8ce94376ac0e32283b3c9f2417 100644
Binary files a/requirements.txt and b/requirements.txt differ
diff --git a/train.py b/train.py
index d5e66aaba2d1810492e8ec908eeed05c214fc7e3..ddcda4c8eb610e6377b9842357390fa017267105 100644
--- a/train.py
+++ b/train.py
@@ -1,14 +1,7 @@
 from SuperTagger.SuperTagger import SuperTagger
-from SuperTagger.Utils.utils import read_csv_pgbar
+from SuperTagger.Utils.utils import read_csv_pgbar, load_obj
 
-
-def load_obj(name):
-    with open(name + '.pkl', 'rb') as f:
-        import pickle
-        return pickle.load(f)
-
-
-file_path = 'Datasets/m2_dataset_V2.csv'
+file_path = 'Datasets/m2_dataset.csv'
 
 
 df = read_csv_pgbar(file_path,1000)
@@ -25,7 +18,6 @@ tags = tags[4:]
 
 
 index_to_super = load_obj('Datasets/index_to_pos1')
-super_to_index = {v: int(k) for k, v in index_to_super.items()}
 
 tagger = SuperTagger()