diff --git a/Configuration/config.ini b/Configuration/config.ini index fd5b5a7dbc452170386b60f8cb00732a21b9f14c..d05a21b070107c11f74e81f627e8accee93e4c22 100644 --- a/Configuration/config.ini +++ b/Configuration/config.ini @@ -12,16 +12,14 @@ max_atoms_in_one_type=250 dim_encoder = 768 [MODEL_DECODER] -dim_decoder = 32 +nhead=4 dropout=0.1 -teacher_forcing=0.05 +dim_feedforward=512 +layer_norm_eps=1e-5 [MODEL_LINKER] -nhead=4 -dim_feedforward=246 -dim_embedding_atoms=32 -dim_polarity_transfo=128 -layer_norm_eps=1e-5 +dim_embedding_atoms=256 +dim_polarity_transfo=256 dropout=0.1 sinkhorn_iters=3 diff --git a/Linker/Linker.py b/Linker/Linker.py index 3090e6fc2ab8f13567f04ad73cd568ddd192d8ea..c88e880e1d1e17fa8e94423c7cc2cfb7ce42e0ea 100644 --- a/Linker/Linker.py +++ b/Linker/Linker.py @@ -43,7 +43,7 @@ class Linker(Module): self.dim_polarity_transfo = int(Configuration.modelLinkerConfig['dim_polarity_transfo']) self.dim_embedding_atoms = int(Configuration.modelLinkerConfig['dim_embedding_atoms']) self.sinkhorn_iters = int(Configuration.modelLinkerConfig['sinkhorn_iters']) - self.nhead = int(Configuration.modelLinkerConfig['nhead']) + self.nhead = int(Configuration.modelDecoderConfig['nhead']) self.max_len_sentence = int(Configuration.datasetConfig['max_len_sentence']) self.max_atoms_in_sentence = int(Configuration.datasetConfig['max_atoms_in_sentence']) self.max_atoms_in_one_type = int(Configuration.datasetConfig['max_atoms_in_one_type']) @@ -77,7 +77,7 @@ class Linker(Module): lr=learning_rate) self.scheduler = get_cosine_schedule_with_warmup(self.optimizer, num_warmup_steps=0, - num_training_steps=100) + num_training_steps=float(Configuration.modelTrainingConfig['epoch'])) self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") diff --git a/Linker/MHA.py b/Linker/MHA.py index 651487b6e841398eed1f55e4fbc2bedcd6c6317b..efc9e75cada3fd926853ade18fbb4ea1c61419d7 100644 --- a/Linker/MHA.py +++ b/Linker/MHA.py @@ -33,11 +33,11 @@ class AttentionDecoderLayer(Module): # init params dim_encoder = int(Configuration.modelEncoderConfig['dim_encoder']) - dim_decoder = int(Configuration.modelDecoderConfig['dim_decoder']) - nhead = int(Configuration.modelLinkerConfig['nhead']) - dropout = float(Configuration.modelLinkerConfig['dropout']) - dim_feedforward = int(Configuration.modelLinkerConfig['dim_feedforward']) - layer_norm_eps = float(Configuration.modelLinkerConfig['layer_norm_eps']) + dim_decoder = int(Configuration.modelLinkerConfig['dim_embedding_atoms']) + nhead = int(Configuration.modelDecoderConfig['nhead']) + dropout = float(Configuration.modelDecoderConfig['dropout']) + dim_feedforward = int(Configuration.modelDecoderConfig['dim_feedforward']) + layer_norm_eps = float(Configuration.modelDecoderConfig['layer_norm_eps']) # layers self.dropout = Dropout(dropout) diff --git a/Linker/utils_linker.py b/Linker/utils_linker.py index 0821f6196c55a3c0961ecc89c23aaacde8f53140..1eacad663bd829d90b29ec7fd837ab7245a7ce2f 100644 --- a/Linker/utils_linker.py +++ b/Linker/utils_linker.py @@ -10,13 +10,13 @@ from utils import pad_sequence class FFN(Module): "Implements FFN equation." - def __init__(self, d_model, d_ff, dropout=0.1): + def __init__(self, d_model, d_ff, dropout=0.1, d_out=None): super(FFN, self).__init__() self.ffn = Sequential( Linear(d_model, d_ff, bias=False), GELU(), Dropout(dropout), - Linear(d_ff, d_model, bias=False) + Linear(d_ff, d_out if d_out is not None else d_model, bias=False) ) def forward(self, x): diff --git a/train.py b/train.py index 8b8ff0d5eed0e9ed2601a048c020372df5530a52..e1d6389c4efdab689e8792bfefa44859820cb50b 100644 --- a/train.py +++ b/train.py @@ -13,8 +13,6 @@ epochs = int(Configuration.modelTrainingConfig['epoch']) file_path_axiom_links = 'Datasets/gold_dataset_links.csv' df_axiom_links = read_csv_pgbar(file_path_axiom_links, nb_sentences) -sentences_batch = df_axiom_links["Sentences"].tolist() - print("Linker") linker = Linker("models/model_supertagger.pt") print("Linker Training")