Skip to content
Snippets Groups Projects
Commit e8d49aae authored by Caroline DE POURTALES's avatar Caroline DE POURTALES
Browse files

update for kokos parameters config

parent 400ce5a0
Branches
No related tags found
2 merge requests!6Linker with transformer,!5Linker with transformer
...@@ -12,13 +12,15 @@ max_atoms_in_one_type=250 ...@@ -12,13 +12,15 @@ max_atoms_in_one_type=250
dim_encoder = 768 dim_encoder = 768
[MODEL_DECODER] [MODEL_DECODER]
nhead=4 nhead=8
num_layers=1
dropout=0.1 dropout=0.1
dim_feedforward=512 dim_feedforward=512
layer_norm_eps=1e-5 layer_norm_eps=1e-5
[MODEL_LINKER] [MODEL_LINKER]
dim_embedding_atoms=256 dim_embedding_atoms=256
dim_pre_sinkhorn_transfo=32
dim_polarity_transfo=256 dim_polarity_transfo=256
dropout=0.1 dropout=0.1
sinkhorn_iters=3 sinkhorn_iters=3
...@@ -26,6 +28,6 @@ sinkhorn_iters=3 ...@@ -26,6 +28,6 @@ sinkhorn_iters=3
[MODEL_TRAINING] [MODEL_TRAINING]
device=cpu device=cpu
batch_size=16 batch_size=16
epoch=20 epoch=25
seed_val=42 seed_val=42
learning_rate=2e-5 learning_rate=2e-4
...@@ -39,15 +39,14 @@ class Linker(Module): ...@@ -39,15 +39,14 @@ class Linker(Module):
def __init__(self, supertagger_path_model): def __init__(self, supertagger_path_model):
super(Linker, self).__init__() super(Linker, self).__init__()
self.dim_encoder = int(Configuration.modelEncoderConfig['dim_encoder'])
self.dim_polarity_transfo = int(Configuration.modelLinkerConfig['dim_polarity_transfo'])
self.dim_embedding_atoms = int(Configuration.modelLinkerConfig['dim_embedding_atoms']) self.dim_embedding_atoms = int(Configuration.modelLinkerConfig['dim_embedding_atoms'])
self.sinkhorn_iters = int(Configuration.modelLinkerConfig['sinkhorn_iters'])
self.nhead = int(Configuration.modelDecoderConfig['nhead']) self.nhead = int(Configuration.modelDecoderConfig['nhead'])
self.max_len_sentence = int(Configuration.datasetConfig['max_len_sentence']) dim_pre_sinkhorn_transfo = int(Configuration.modelLinkerConfig['dim_pre_sinkhorn_transfo'])
dim_polarity_transfo = int(Configuration.modelLinkerConfig['dim_polarity_transfo'])
self.sinkhorn_iters = int(Configuration.modelLinkerConfig['sinkhorn_iters'])
self.max_atoms_in_sentence = int(Configuration.datasetConfig['max_atoms_in_sentence']) self.max_atoms_in_sentence = int(Configuration.datasetConfig['max_atoms_in_sentence'])
self.max_atoms_in_one_type = int(Configuration.datasetConfig['max_atoms_in_one_type']) self.max_atoms_in_one_type = int(Configuration.datasetConfig['max_atoms_in_one_type'])
self.atom_vocab_size = int(Configuration.datasetConfig['atom_vocab_size']) atom_vocab_size = int(Configuration.datasetConfig['atom_vocab_size'])
learning_rate = float(Configuration.modelTrainingConfig['learning_rate']) learning_rate = float(Configuration.modelTrainingConfig['learning_rate'])
self.dropout = Dropout(0.1) self.dropout = Dropout(0.1)
self.device = "cpu" self.device = "cpu"
...@@ -59,25 +58,22 @@ class Linker(Module): ...@@ -59,25 +58,22 @@ class Linker(Module):
self.atom_map = atom_map self.atom_map = atom_map
self.padding_id = self.atom_map['[PAD]'] self.padding_id = self.atom_map['[PAD]']
self.atoms_tokenizer = AtomTokenizer(atom_map, self.max_atoms_in_sentence) self.atoms_tokenizer = AtomTokenizer(atom_map, self.max_atoms_in_sentence)
self.atoms_embedding = AtomEmbedding(self.dim_embedding_atoms, self.atom_vocab_size, self.padding_id) self.atoms_embedding = AtomEmbedding(self.dim_embedding_atoms, atom_vocab_size, self.padding_id)
self.linker_encoder = AttentionDecoderLayer() self.linker_encoder = AttentionDecoderLayer()
self.pos_transformation = Sequential( self.pos_transformation = Sequential(
FFN(self.dim_embedding_atoms, self.dim_polarity_transfo, 0.1), FFN(self.dim_embedding_atoms, dim_polarity_transfo, 0.1, d_out=dim_pre_sinkhorn_transfo),
LayerNorm(self.dim_embedding_atoms, eps=1e-12) LayerNorm(dim_pre_sinkhorn_transfo, eps=1e-12)
) )
self.neg_transformation = Sequential( self.neg_transformation = Sequential(
FFN(self.dim_embedding_atoms, self.dim_polarity_transfo, 0.1), FFN(self.dim_embedding_atoms, dim_polarity_transfo, 0.1, d_out=dim_pre_sinkhorn_transfo),
LayerNorm(self.dim_embedding_atoms, eps=1e-12) LayerNorm(dim_pre_sinkhorn_transfo, eps=1e-12)
) )
self.cross_entropy_loss = SinkhornLoss() self.cross_entropy_loss = SinkhornLoss()
self.optimizer = AdamW(self.parameters(), self.optimizer = AdamW(self.parameters(),
lr=learning_rate) lr=learning_rate)
self.scheduler = get_cosine_schedule_with_warmup(self.optimizer,
num_warmup_steps=0,
num_training_steps=float(Configuration.modelTrainingConfig['epoch']))
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
...@@ -160,7 +156,7 @@ class Linker(Module): ...@@ -160,7 +156,7 @@ class Linker(Module):
neg_encoding = self.neg_transformation(neg_encoding) neg_encoding = self.neg_transformation(neg_encoding)
weights = torch.bmm(pos_encoding, neg_encoding.transpose(2, 1)) weights = torch.bmm(pos_encoding, neg_encoding.transpose(2, 1))
link_weights.append(sinkhorn(weights, iters=3)) link_weights.append(sinkhorn(weights, iters=self.sinkhorn_iters))
total_link_weights = torch.stack(link_weights) total_link_weights = torch.stack(link_weights)
link_weights_per_batch = total_link_weights.permute(1, 0, 2, 3) link_weights_per_batch = total_link_weights.permute(1, 0, 2, 3)
...@@ -260,7 +256,6 @@ class Linker(Module): ...@@ -260,7 +256,6 @@ class Linker(Module):
# Update parameters and take a step using the computed gradient. # Update parameters and take a step using the computed gradient.
self.optimizer.step() self.optimizer.step()
self.scheduler.step()
pred_axiom_links = torch.argmax(logits_predictions, dim=3) pred_axiom_links = torch.argmax(logits_predictions, dim=3)
accuracy_train += mesure_accuracy(batch_true_links, pred_axiom_links) accuracy_train += mesure_accuracy(batch_true_links, pred_axiom_links)
......
import torch import torch
from Configuration import Configuration from Configuration import Configuration
from Linker import * from Linker import *
from deepgrail_Tagger.SuperTagger.SuperTagger import SuperTagger
from utils import read_csv_pgbar from utils import read_csv_pgbar
torch.cuda.empty_cache() torch.cuda.empty_cache()
batch_size = int(Configuration.modelTrainingConfig['batch_size']) batch_size = int(Configuration.modelTrainingConfig['batch_size'])
nb_sentences = batch_size * 400 nb_sentences = batch_size * 40
epochs = int(Configuration.modelTrainingConfig['epoch']) epochs = int(Configuration.modelTrainingConfig['epoch'])
file_path_axiom_links = 'Datasets/gold_dataset_links.csv' file_path_axiom_links = 'Datasets/gold_dataset_links.csv'
df_axiom_links = read_csv_pgbar(file_path_axiom_links, nb_sentences) df_axiom_links = read_csv_pgbar(file_path_axiom_links, nb_sentences)
print("Linker") print("Linker")
linker = Linker("models/model_supertagger.pt") linker = Linker("models/model_supertagger.pt")
print("Linker Training") print("Linker Training")
linker.train_linker(df_axiom_links, validation_rate=0.1, epochs=epochs, batch_size=batch_size, checkpoint=True, tensorboard=True) linker.train_linker(df_axiom_links, validation_rate=0.1, epochs=epochs, batch_size=batch_size, checkpoint=False, tensorboard=True)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment