license

b3eaea5d · Caroline de Pourtalès · c0fc4ac2 · b3eaea5d · b3eaea5d · b3eaea5d
Commit b3eaea5d authored 2 years ago by Caroline de Pourtalès
--- a/Configuration/config.ini
+++ b/Configuration/config.ini
@@ -4,7 +4,7 @@ transformers = 4.16.2
 [DATASET_PARAMS]
 symbols_vocab_size = 26
 atom_vocab_size = 18
-max_len_sentence = 290
+max_len_sentence = 83
 max_atoms_in_sentence = 900
 max_atoms_in_one_type = 360

@@ -24,8 +24,7 @@ sinkhorn_iters = 5

 [MODEL_TRAINING]
 batch_size = 32
-pretrain_linker_epochs = 10
-epoch = 20
+pretrain_linker_epochs = 1
+epoch = 1
 seed_val = 42
-learning_rate = 2e-3
-
+learning_rate = 2e-3
\ No newline at end of file
--- a/NeuralProofNet/NeuralProofNet.py
+++ b/NeuralProofNet/NeuralProofNet.py
@@ -48,8 +48,6 @@ class NeuralProofNet(Module):
        super(NeuralProofNet, self).__init__()
        config = Configuration.read_config()
        datasetConfig = config["DATASET_PARAMS"]
-        modelEncoderConfig = config["MODEL_ENCODER"]
-        modelLinkerConfig = config["MODEL_LINKER"]
        modelTrainingConfig = config["MODEL_TRAINING"]

        # pretrain settings
@@ -65,6 +63,7 @@ class NeuralProofNet(Module):
        linker = Linker(supertagger_path_model)
        if linker_path_model is not None:
            linker.load_weights(linker_path_model)
+            self.pretrain_linker_epochs = 0
        self.linker = linker

        # Learning
@@ -90,7 +89,7 @@ class NeuralProofNet(Module):
            df_axiom_links pandas DataFrame
            validation_rate
        Returns:
-            the training dataloader and the validation dataloader. They contains the list of atoms, their polarities, the axiom links, the sentences tokenized, sentence mask
+            the training dataloader and the validation dataloader. They contain the list of atoms, their polarities, the axiom links, the sentences tokenized, sentence mask
        """
        print("Start preprocess Data")
        sentences_batch = df_axiom_links["X"].str.strip().tolist()

--- a/README.md
+++ b/README.md
@@ -12,6 +12,7 @@ In this version the tagger is not retrained with the linker.

 ### Installation
 Python 3.9.10 **(Warning don't use Python 3.10**+**)**
+
 Clone the project locally.

 ### Libraries installation
@@ -20,6 +21,39 @@ Run the init.sh script and install the Tagger project under SuperTagger name and

 Upload the tagger.pt in models.  (You may need to modify 'model_tagger' in train.py.)

+You can upload a linker model, so there is no pretraining, you just need to give it to the Proof net initialization.
+
+### Structure
+
+The structure should look like this : 
+```
+.
+.
+├── Configuration                    # Configuration
+│   ├── Configuration.py             # Contains the function to execute for config
+│   └── config.ini                   # contains parameters
+├── requirements.txt                 # librairies needed
+├── Datasets                         # TLGbank data with links
+├── SuperTagger                      # The Supertagger directory (that you need to install)
+│    ├── ...
+│    └── SuperTagger                 # Implementation of BertForTokenClassification
+│        ├── SuperTagger.py          # Main class
+│        └── Tagging_bert_model.py   # Bert model
+├── Linker                           # The Linker directory (that you need to install)
+│    ├── ...
+│    └── Linker.py                   # Linker class containing the neural network
+├── NeuralProofNet                   # The NeuralProofNet directory
+│    ├── ...
+│    └── NeuralProofNet.py           # NeuralProofNet class containing the linker and supertagger
+├── models                           
+│    ├── linker.pt                   # OPTIONAL : the pt file contaning the pretrained linker (you need to install it)
+│    └── supertagger.pt              # the pt file contaning the pretrained supertagger (you need to install it)    
+├── Output                           # Directory where your linker models will be saved if checkpoint=True in train               
+├── TensorBoard                      # Directory where the stats will be saved if tensorboard=True in train
+└──  train.py                        # Example of train
+```
+
+
 ### Dataset format

 The sentences should be in a column "X", the links with '_x' postfix should be in a column "Y" and the categories in a column "Z".
@@ -38,7 +72,7 @@ For predict on your data you need to load a model (save with this code).

 ```
 linker = neuralproofnet.linker
-links = linker.predict_without_categories(["le chat est noir"])
+links = linker.predict_without_categories("le chat est noir")
 print(links)
 ```

@@ -46,6 +80,39 @@ The file ```postprocessing.py``` will allow you to draw the prediction. (limited

 You can also use the function ```predict_without_categories``` which only needs the sentence.

-## Authors

-[de Pourtales Caroline](https://www.linkedin.com/in/caroline-de-pourtales/), [Rabault Julien](https://www.linkedin.com/in/julienrabault)
\ No newline at end of file
+## LICENSE
+
+Copyright ou © ou Copr. CNRS, (18/07/2022)
+
+Contributeurs : 
+[de Pourtales Caroline](https://www.linkedin.com/in/caroline-de-pourtales/), [Rabault Julien](https://www.linkedin.com/in/julienrabault), Richard Moot
+
+Ce logiciel est un programme informatique servant à établir un Proof Net depuis une phrase française. 
+
+Ce logiciel est régi par la licence CeCILL-C soumise au droit français et
+respectant les principes de diffusion des logiciels libres. Vous pouvez
+utiliser, modifier et/ou redistribuer ce programme sous les conditions
+de la licence CeCILL-C telle que diffusée par le CEA, le CNRS et l'INRIA 
+sur le site "http://www.cecill.info".
+
+En contrepartie de l'accessibilité au code source et des droits de copie,
+de modification et de redistribution accordés par cette licence, il n'est
+offert aux utilisateurs qu'une garantie limitée.  Pour les mêmes raisons,
+seule une responsabilité restreinte pèse sur l'auteur du programme,  le
+titulaire des droits patrimoniaux et les concédants successifs.
+
+A cet égard  l'attention de l'utilisateur est attirée sur les risques
+associés au chargement,  à l'utilisation,  à la modification et/ou au
+développement et à la reproduction du logiciel par l'utilisateur étant 
+donné sa spécificité de logiciel libre, qui peut le rendre complexe à 
+manipuler et qui le réserve donc à des développeurs et des professionnels
+avertis possédant  des  connaissances  informatiques approfondies.  Les
+utilisateurs sont donc invités à charger  et  tester  l'adéquation  du
+logiciel à leurs besoins dans des conditions permettant d'assurer la
+sécurité de leurs systèmes et ou de leurs données et, plus généralement, 
+à l'utiliser et l'exploiter dans les mêmes conditions de sécurité. 
+
+Le fait que vous puissiez accéder à cet en-tête signifie que vous avez 
+pris connaissance de la licence CeCILL-C, et que vous en avez accepté les
+termes.
--- a/train.py
+++ b/train.py
@@ -6,7 +6,7 @@ from utils import read_csv_pgbar
 from Configuration import Configuration

 torch.cuda.empty_cache()
-nb_sentences = 100000000
+nb_sentences = 4*32
 file_path_axiom_links = 'Datasets/goldANDsilver_dataset_links.csv'
 model_tagger = "models/flaubert_super_98_V2_50e.pt"

@@ -28,5 +28,6 @@ print("#" * 20)
 neural_proof_net = NeuralProofNet(model_tagger)
 neural_proof_net.train_neuralproofnet(df_axiom_links, validation_rate=0.1, epochs=epochs, batch_size=batch_size,
                                      checkpoint=True, tensorboard=True)
+neural_proof_net.linker.predict_without_categories("le chat est noir")
 print("#" * 20)
 print("#" * 20)