From 400ce5a0205ea7ce11a3f94b84a66823e15196f8 Mon Sep 17 00:00:00 2001 From: Caroline DE POURTALES <cdepourt@montana.irit.fr> Date: Thu, 19 May 2022 13:18:33 +0200 Subject: [PATCH] change supertagger --- Configuration/config.ini | 12 +++++------- Linker/Linker.py | 4 ++-- Linker/MHA.py | 10 +++++----- Linker/utils_linker.py | 4 ++-- train.py | 2 -- 5 files changed, 14 insertions(+), 18 deletions(-) diff --git a/Configuration/config.ini b/Configuration/config.ini index fd5b5a7..d05a21b 100644 --- a/Configuration/config.ini +++ b/Configuration/config.ini @@ -12,16 +12,14 @@ max_atoms_in_one_type=250 dim_encoder = 768 [MODEL_DECODER] -dim_decoder = 32 +nhead=4 dropout=0.1 -teacher_forcing=0.05 +dim_feedforward=512 +layer_norm_eps=1e-5 [MODEL_LINKER] -nhead=4 -dim_feedforward=246 -dim_embedding_atoms=32 -dim_polarity_transfo=128 -layer_norm_eps=1e-5 +dim_embedding_atoms=256 +dim_polarity_transfo=256 dropout=0.1 sinkhorn_iters=3 diff --git a/Linker/Linker.py b/Linker/Linker.py index 3090e6f..c88e880 100644 --- a/Linker/Linker.py +++ b/Linker/Linker.py @@ -43,7 +43,7 @@ class Linker(Module): self.dim_polarity_transfo = int(Configuration.modelLinkerConfig['dim_polarity_transfo']) self.dim_embedding_atoms = int(Configuration.modelLinkerConfig['dim_embedding_atoms']) self.sinkhorn_iters = int(Configuration.modelLinkerConfig['sinkhorn_iters']) - self.nhead = int(Configuration.modelLinkerConfig['nhead']) + self.nhead = int(Configuration.modelDecoderConfig['nhead']) self.max_len_sentence = int(Configuration.datasetConfig['max_len_sentence']) self.max_atoms_in_sentence = int(Configuration.datasetConfig['max_atoms_in_sentence']) self.max_atoms_in_one_type = int(Configuration.datasetConfig['max_atoms_in_one_type']) @@ -77,7 +77,7 @@ class Linker(Module): lr=learning_rate) self.scheduler = get_cosine_schedule_with_warmup(self.optimizer, num_warmup_steps=0, - num_training_steps=100) + num_training_steps=float(Configuration.modelTrainingConfig['epoch'])) self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") diff --git a/Linker/MHA.py b/Linker/MHA.py index 651487b..efc9e75 100644 --- a/Linker/MHA.py +++ b/Linker/MHA.py @@ -33,11 +33,11 @@ class AttentionDecoderLayer(Module): # init params dim_encoder = int(Configuration.modelEncoderConfig['dim_encoder']) - dim_decoder = int(Configuration.modelDecoderConfig['dim_decoder']) - nhead = int(Configuration.modelLinkerConfig['nhead']) - dropout = float(Configuration.modelLinkerConfig['dropout']) - dim_feedforward = int(Configuration.modelLinkerConfig['dim_feedforward']) - layer_norm_eps = float(Configuration.modelLinkerConfig['layer_norm_eps']) + dim_decoder = int(Configuration.modelLinkerConfig['dim_embedding_atoms']) + nhead = int(Configuration.modelDecoderConfig['nhead']) + dropout = float(Configuration.modelDecoderConfig['dropout']) + dim_feedforward = int(Configuration.modelDecoderConfig['dim_feedforward']) + layer_norm_eps = float(Configuration.modelDecoderConfig['layer_norm_eps']) # layers self.dropout = Dropout(dropout) diff --git a/Linker/utils_linker.py b/Linker/utils_linker.py index 0821f61..1eacad6 100644 --- a/Linker/utils_linker.py +++ b/Linker/utils_linker.py @@ -10,13 +10,13 @@ from utils import pad_sequence class FFN(Module): "Implements FFN equation." - def __init__(self, d_model, d_ff, dropout=0.1): + def __init__(self, d_model, d_ff, dropout=0.1, d_out=None): super(FFN, self).__init__() self.ffn = Sequential( Linear(d_model, d_ff, bias=False), GELU(), Dropout(dropout), - Linear(d_ff, d_model, bias=False) + Linear(d_ff, d_out if d_out is not None else d_model, bias=False) ) def forward(self, x): diff --git a/train.py b/train.py index 8b8ff0d..e1d6389 100644 --- a/train.py +++ b/train.py @@ -13,8 +13,6 @@ epochs = int(Configuration.modelTrainingConfig['epoch']) file_path_axiom_links = 'Datasets/gold_dataset_links.csv' df_axiom_links = read_csv_pgbar(file_path_axiom_links, nb_sentences) -sentences_batch = df_axiom_links["Sentences"].tolist() - print("Linker") linker = Linker("models/model_supertagger.pt") print("Linker Training") -- GitLab