update for osirim

b0e85309 · Caroline DE POURTALES · e115a712 · e115a712 · b0e85309 · b0e85309
Commit b0e85309 authored 3 years ago by Caroline DE POURTALES
--- a/Configuration/__pycache__/Configuration.cpython-38.pyc
+++ b/Configuration/__pycache__/Configuration.cpython-38.pyc
--- a/Linker/Linker.py
+++ b/Linker/Linker.py
@@ -65,6 +65,8 @@ class Linker(Module):
                                                         num_warmup_steps=0,
                                                         num_training_steps=100)

+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
    def __preprocess_data(self, batch_size, df_axiom_links, sentences_tokens, sentences_mask, validation_rate=0.0):
        r"""
        Args:
@@ -133,18 +135,18 @@ class Linker(Module):
                                                          atoms_batch_tokenized[s_idx][i] == self.atom_map[
                                                              atom_type] and
                                                          atoms_polarity_batch[s_idx][i])] + [
-                                                         torch.zeros(self.dim_embedding_atoms)])
+                                                         torch.zeros(self.dim_embedding_atoms, device=self.device)]).to(self.device)
                                         for s_idx in range(len(atoms_polarity_batch))], padding_value=0,
-                                        max_len=self.max_atoms_in_one_type // 2)
+                                        max_len=self.max_atoms_in_one_type // 2).to(self.device)

            neg_encoding = pad_sequence([torch.stack([x for i, x in enumerate(atoms_encoding[s_idx])
                                                      if (self.atom_map[atom_type] in atoms_batch_tokenized[s_idx] and
                                                          atoms_batch_tokenized[s_idx][i] == self.atom_map[
                                                              atom_type] and
                                                          not atoms_polarity_batch[s_idx][i])] + [
-                                                         torch.zeros(self.dim_embedding_atoms)])
+                                                         torch.zeros(self.dim_embedding_atoms, device=self.device)]).to(self.device)
                                         for s_idx in range(len(atoms_polarity_batch))], padding_value=0,
-                                        max_len=self.max_atoms_in_one_type // 2)
+                                        max_len=self.max_atoms_in_one_type // 2).to(self.device)

            pos_encoding = self.pos_transformation(pos_encoding)
            neg_encoding = self.neg_transformation(neg_encoding)
@@ -175,6 +177,7 @@ class Linker(Module):
        training_dataloader, validation_dataloader = self.__preprocess_data(batch_size, df_axiom_links,
                                                                            sentences_tokens, sentences_mask,
                                                                            validation_rate)
+        self.to(self.device)
        for epoch_i in range(0, epochs):
            epoch_acc, epoch_loss = self.train_epoch(training_dataloader, validation_dataloader, checkpoint, validate)

@@ -197,16 +200,16 @@ class Linker(Module):
        # For each batch of training data...
        for step, batch in enumerate(training_dataloader):
            # Unpack this training batch from our dataloader
-            batch_atoms = batch[0].to("cuda" if torch.cuda.is_available() else "cpu")
-            batch_polarity = batch[1].to("cuda" if torch.cuda.is_available() else "cpu")
-            batch_true_links = batch[2].to("cuda" if torch.cuda.is_available() else "cpu")
-            batch_sentences_tokens = batch[3].to("cuda" if torch.cuda.is_available() else "cpu")
-            batch_sentences_mask = batch[4].to("cuda" if torch.cuda.is_available() else "cpu")
+            batch_atoms = batch[0].to(self.device)
+            batch_polarity = batch[1].to(self.device)
+            batch_true_links = batch[2].to(self.device)
+            batch_sentences_tokens = batch[3].to(self.device)
+            batch_sentences_mask = batch[4].to(self.device)

            self.optimizer.zero_grad()

            # get sentence embedding from BERT which is already trained
-            logits, sentences_embedding = self.Supertagger.foward(batch_sentences_tokens, batch_sentences_mask)
+            logits, sentences_embedding = self.Supertagger.forward(batch_sentences_tokens, batch_sentences_mask)

            # Run the kinker on the categories predictions
            logits_predictions = self(batch_atoms, batch_polarity, sentences_embedding, batch_sentences_mask)
@@ -273,18 +276,18 @@ class Linker(Module):
                                                          atoms_tokenized[s_idx][i] == self.atom_map[
                                                              atom_type] and
                                                          polarities[s_idx][i])] + [
-                                                         torch.zeros(self.dim_embedding_atoms)])
+                                                         torch.zeros(self.dim_embedding_atoms, device=self.device)])
                                         for s_idx in range(len(polarities))], padding_value=0,
-                                        max_len=self.max_atoms_in_one_type // 2)
+                                        max_len=self.max_atoms_in_one_type // 2).to(self.device)

            neg_encoding = pad_sequence([torch.stack([x for i, x in enumerate(atoms_encoding[s_idx])
                                                      if (self.atom_map[atom_type] in atoms_tokenized[s_idx] and
                                                          atoms_tokenized[s_idx][i] == self.atom_map[
                                                              atom_type] and
                                                          not polarities[s_idx][i])] + [
-                                                         torch.zeros(self.dim_embedding_atoms)])
+                                                         torch.zeros(self.dim_embedding_atoms, device=self.device)])
                                         for s_idx in range(len(polarities))], padding_value=0,
-                                        max_len=self.max_atoms_in_one_type // 2)
+                                        max_len=self.max_atoms_in_one_type // 2).to(self.device)

            pos_encoding = self.pos_transformation(pos_encoding)
            neg_encoding = self.neg_transformation(neg_encoding)
@@ -297,13 +300,13 @@ class Linker(Module):
        return axiom_links

    def eval_batch(self, batch, cross_entropy_loss):
-        batch_atoms = batch[0].to("cuda" if torch.cuda.is_available() else "cpu")
-        batch_polarity = batch[1].to("cuda" if torch.cuda.is_available() else "cpu")
-        batch_true_links = batch[2].to("cuda" if torch.cuda.is_available() else "cpu")
-        batch_sentences_tokens = batch[3].to("cuda" if torch.cuda.is_available() else "cpu")
-        batch_sentences_mask = batch[4].to("cuda" if torch.cuda.is_available() else "cpu")
+        batch_atoms = batch[0].to(self.device)
+        batch_polarity = batch[1].to(self.device)
+        batch_true_links = batch[2].to(self.device)
+        batch_sentences_tokens = batch[3].to(self.device)
+        batch_sentences_mask = batch[4].to(self.device)

-        logits, sentences_embedding = self.Supertagger.foward(batch_sentences_tokens, batch_sentences_mask)
+        logits, sentences_embedding = self.Supertagger.forward(batch_sentences_tokens, batch_sentences_mask)
        logits_axiom_links_pred = self(batch_atoms, batch_polarity, sentences_embedding,
                                       batch_sentences_mask)
        axiom_links_pred = torch.argmax(logits_axiom_links_pred, dim=3)
@@ -363,4 +366,4 @@ class Linker(Module):
            'neg_transformation': self.neg_transformation.state_dict(),
            'optimizer': self.optimizer,
        }, path)
-        #self.to(self.device)
+        self.to(self.device)
--- a/Linker/MHA.py
+++ b/Linker/MHA.py
@@ -27,9 +27,8 @@ class AttentionDecoderLayer(Module):
            attention and feedforward operations, respectivaly. Otherwise it's done after.
            Default: ``False`` (after).
    """
-    __constants__ = ['batch_first', 'norm_first']

-    def __init__(self) -> None:
+    def __init__(self):
        super(AttentionDecoderLayer, self).__init__()

        # init params
@@ -42,18 +41,17 @@ class AttentionDecoderLayer(Module):

        # layers
        self.dropout = Dropout(dropout)
-        self.self_attn = MultiheadAttention(dim_decoder, nhead, dropout=dropout, batch_first=True,
+        self.self_attn = MultiheadAttention(dim_decoder, nhead, dropout=dropout,
                                            kdim=dim_decoder, vdim=dim_decoder)
        self.norm1 = LayerNorm(dim_decoder, eps=layer_norm_eps)
        self.multihead_attn = MultiheadAttention(dim_decoder, nhead, dropout=dropout,
-                                                 kdim=dim_encoder, vdim=dim_encoder,
-                                                 batch_first=True)
+                                                 kdim=dim_encoder, vdim=dim_encoder)
        self.norm2 = LayerNorm(dim_decoder, eps=layer_norm_eps)
        self.ffn = FFN(d_model=dim_decoder, d_ff=dim_feedforward, dropout=dropout)
        self.norm3 = LayerNorm(dim_decoder, eps=layer_norm_eps)

-    def forward(self, atoms_embedding: Tensor, sents_embedding: Tensor, encoder_mask: Tensor,
-                decoder_mask: Tensor) -> Tensor:
+    def forward(self, atoms_embedding, sents_embedding, encoder_mask,
+                decoder_mask):
        r"""Pass the inputs through the decoder layer.

        Args:
@@ -62,24 +60,27 @@ class AttentionDecoderLayer(Module):
            encoder_mask
            decoder_mask
        """
+        atoms_embedding = atoms_embedding.permute(1, 0, 2)
+        sents_embedding = sents_embedding.permute(1, 0, 2)
+
        x = atoms_embedding
        x = self.norm1(x + self._mask_mha_block(atoms_embedding, decoder_mask))
        x = self.norm2(x + self._mha_block(x, sents_embedding, encoder_mask))
        x = self.norm3(x + self._ff_block(x))

-        return x
+        return x.permute(1, 0, 2)

    # self-attention block
-    def _mask_mha_block(self, x: Tensor, decoder_mask: Tensor) -> Tensor:
+    def _mask_mha_block(self, x, decoder_mask):
        x = self.self_attn(x, x, x, attn_mask=decoder_mask)[0]
        return x

    # multihead attention block
-    def _mha_block(self, x: Tensor, sents_embs: Tensor, encoder_mask: Tensor) -> Tensor:
+    def _mha_block(self, x, sents_embs, encoder_mask):
        x = self.multihead_attn(x, sents_embs, sents_embs, attn_mask=encoder_mask)[0]
        return x

    # feed forward block
-    def _ff_block(self, x: Tensor) -> Tensor:
+    def _ff_block(self, x):
        x = self.ffn.forward(x)
        return x
--- a/bash_GPU.sh
+++ b/bash_GPU.sh
+#!/bin/sh
+#SBATCH --job-name=Deepgrail_Linker
+#SBATCH --partition=RTX6000Node
+#SBATCH --gres=gpu:1
+#SBATCH --mem=32000
+#SBATCH --gres-flags=enforce-binding
+#SBATCH --error="error_rtx1.err"
+#SBATCH --output="out_rtx1.out"
+
+module purge
+module load singularity/3.0.3
+
+srun singularity exec /logiciels/containerCollections/CUDA11/pytorch-NGC-21-03-py3.sif python "train.py"
\ No newline at end of file
--- a/requirements.txt
+++ b/requirements.txt
 numpy==1.22.2
 transformers==4.16.2
-torch==1.10.2
+torch==1.9.0
 huggingface-hub==0.4.0
 pandas==1.4.1
 sentencepiece

--- a/train.py
+++ b/train.py
@@ -15,9 +15,12 @@ df_axiom_links = read_csv_pgbar(file_path_axiom_links, nb_sentences)
 sentences_batch = df_axiom_links["Sentences"].tolist()
 supertagger = SuperTagger()
 supertagger.load_weights("models/model_supertagger.pt")
+
+
 sents_tokenized, sents_mask = supertagger.sent_tokenizer.fit_transform_tensors(sentences_batch)

 print("Linker")
 linker = Linker(supertagger)
+linker = linker.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
 print("Linker Training")
 linker.train_linker(df_axiom_links, sents_tokenized, sents_mask, validation_rate=0.1, epochs=epochs, batch_size=batch_size, checkpoint=True, validate=True)