Skip to content
Snippets Groups Projects
Commit ba429805 authored by Julien Rabault's avatar Julien Rabault
Browse files

add encoding label

parent bc19cbf9
Branches
No related tags found
1 merge request!1Draft: Master
File added
File added
File added
import random
import torch
import torch.nn.functional as F
from torch.nn import (Dropout, Module, ModuleList, Linear, LSTM, GRU)
from Configuration import Configuration
class RNNDecoderLayer(Module):
def __init__(self, symbols_map):
super(RNNDecoderLayer, self).__init__()
# init params
self.dim_encoder = int(Configuration.modelDecoderConfig['dim_encoder'])
self.dim_decoder = int(Configuration.modelDecoderConfig['dim_decoder'])
self.max_symbols_per_word = int(Configuration.modelDecoderConfig['max_symbols_per_word'])
self.max_len_sentence = int(Configuration.modelDecoderConfig['max_len_sentence'])
self.symbols_vocab_size = int(Configuration.modelDecoderConfig['symbols_vocab_size'])
dropout = float(Configuration.modelDecoderConfig['dropout'])
self.num_rnn_layers = int(Configuration.modelDecoderConfig['num_rnn_layers'])
self.teacher_forcing = float(Configuration.modelDecoderConfig['teacher_forcing'])
self.bidirectional = False
self.use_attention = True
self.symbols_map = symbols_map
self.symbols_padding_id = self.symbols_map["[PAD]"]
self.symbols_sep_id = self.symbols_map["[SEP]"]
self.symbols_start_id = self.symbols_map["[START]"]
self.symbols_sos_id = self.symbols_map["[SOS]"]
# Different layers
# Symbols Embedding
# For hidden_state
self.dropout = Dropout(dropout)
# rnn Layer
if self.use_attention:
self.rnn = LSTM(input_size=self.dim_encoder, hidden_size=self.dim_encoder, num_layers=self.num_rnn_layers,
dropout=dropout,
bidirectional=self.bidirectional, batch_first=True)
else :
self.rnn = LSTM(input_size=self.dim_decoder, hidden_size=self.dim_encoder, num_layers=self.num_rnn_layers,
dropout=dropout,
bidirectional=self.bidirectional, batch_first=True)
# Projection on vocab_size
if self.bidirectional:
self.proj = Linear(self.dim_encoder * 2, self.symbols_vocab_size)
else:
self.proj = Linear(self.dim_encoder, self.symbols_vocab_size)
self.attn_combine = Linear(self.dim_decoder + self.dim_encoder, self.dim_encoder)
def sos_mask(self, y):
return torch.eq(y, self.symbols_sos_id)
def forward(self, symbols_tokenized_batch, last_hidden_state, pooler_output):
r"""Training the translation from encoded sentences to symbols
Args:
symbols_tokenized_batch: [batch_size, max_symbols_in_sentence] the true symbols for each sentence.
last_hidden_state: [batch_size, max_len_sentence, dim_encoder] Sequence of hidden-states at the output of the last layer of the model.
pooler_output: [batch_size, dim_encoder] Last layer hidden-state of the first token of the sequence (classification token) after further processing through the layers used for the auxiliary pretraining task
"""
batch_size, sequence_length, hidden_size = last_hidden_state.shape
# y_hat[batch_size, max_len_sentence, vocab_size] init with probability pad =1
y_hat = torch.zeros(batch_size, self.max_len_sentence, self.max_symbols_per_word, self.symbols_vocab_size,
dtype=torch.float, device="cuda" if torch.cuda.is_available() else "cpu")
y_hat[:, :, self.symbols_padding_id] = 1
decoded_ij = torch.ones(batch_size, 1, dtype=torch.long,
device="cuda" if torch.cuda.is_available() else "cpu") * self.symbols_start_id
sos_mask = torch.zeros(batch_size, dtype=torch.bool, device="cuda" if torch.cuda.is_available() else "cpu")
# hidden_state goes through multiple linear layers
hidden_state = pooler_output.unsqueeze(0).repeat(self.num_rnn_layers * (1 + self.bidirectional), 1, 1)
c_state = torch.zeros(self.num_rnn_layers * (1 + self.bidirectional), batch_size, hidden_size,
dtype=torch.float, device="cuda" if torch.cuda.is_available() else "cpu")
use_teacher_forcing = False
# for each symbol
for i in range(self.max_len_sentence):
for j in range(self.max_symbols_per_word) :
symbols_embedding = self.symbols_embedder(decoded_ij)
symbols_embedding = self.dropout(symbols_embedding)
output = symbols_embedding
if self.use_attention:
output = torch.cat((symbols_embedding, last_hidden_state[:, i, :].unsqueeze(1)), 2)
output = self.attn_combine(output)
output = F.relu(output)
# rnn layer
output, (hidden_state, c_state) = self.rnn(output, (hidden_state, c_state))
# Projection of the output of the rnn omitting the last probability (which is pad) so we dont predict PAD
proj = self.proj(output)[:, :, :-2]
if use_teacher_forcing:
decoded_ij = symbols_tokenized_batch[:, i, j].unsqueeze(1)
else:
decoded_ij = torch.argmax(F.softmax(proj, dim=2), dim=2)
# Calculate sos and pad
sos_mask_ij = self.sos_mask(torch.argmax(F.softmax(proj, dim=2), dim=2)[:, -1])
y_hat[~sos_mask, i, j, self.symbols_padding_id] = 0
y_hat[~sos_mask, i, j, :-2] = proj[~sos_mask, -1, :]
sos_mask = sos_mask_ij | sos_mask
# Stop if every sentence says padding or if we are full
if not torch.any(~sos_mask):
break
return y_hat
def predict_rnn(self, last_hidden_state, pooler_output):
r"""Predicts the symbols from the output of the encoder.
Args:
last_hidden_state: [batch_size, max_len_sentence, dim_encoder] the output of the encoder
pooler_output: [batch_size, dim_encoder] Last layer hidden-state of the first token of the sequence (classification token) after further processing through the layers used for the auxiliary pretraining task
"""
batch_size, sequence_length, hidden_size = last_hidden_state.shape
# y_hat[batch_size, max_len_sentence, vocab_size] init with probability pad =1
y_hat = torch.zeros(batch_size, self.max_len_sentence, self.max_symbols_per_word, self.symbols_vocab_size,
dtype=torch.float, device="cuda" if torch.cuda.is_available() else "cpu")
y_hat[:, :, self.symbols_padding_id] = 1
decoded_ij = torch.ones(batch_size, 1, dtype=torch.long,
device="cuda" if torch.cuda.is_available() else "cpu") * self.symbols_start_id
sos_mask = torch.zeros(batch_size, dtype=torch.bool, device="cuda" if torch.cuda.is_available() else "cpu")
# hidden_state goes through multiple linear layers
hidden_state = pooler_output.unsqueeze(0).repeat(self.num_rnn_layers * (1 + self.bidirectional), 1, 1)
c_state = torch.zeros(self.num_rnn_layers * (1 + self.bidirectional), batch_size, hidden_size,
dtype=torch.float, device="cuda" if torch.cuda.is_available() else "cpu")
symbols_embedding = self.symbols_embedder(decoded_ij)
symbols_embedding = self.dropout(symbols_embedding)
# for each symbol
for i in range(self.max_len_sentence):
output = symbols_embedding
if self.use_attention:
output = torch.cat((symbols_embedding, last_hidden_state[:, i, :].unsqueeze(1)), 2)
output = self.attn_combine(output)
output = F.relu(output)
for j in range(self.max_symbols_per_word) :
symbols_embedding = self.symbols_embedder(decoded_ij)
symbols_embedding = self.dropout(symbols_embedding)
# rnn layer
output, (hidden_state, c_state) = self.rnn(output, (hidden_state, c_state))
# Projection of the output of the rnn omitting the last probability (which is pad) so we dont predict PAD
proj_softmax = F.softmax(self.proj(output)[:, :, :-2], dim=2)
decoded_ij = torch.argmax(proj_softmax, dim=2)
# Set sos and pad
sos_mask_ij = self.sos_mask(decoded_ij[:, -1])
y_hat[~sos_mask, i, j, self.symbols_padding_id] = 0
y_hat[~sos_mask, i, j, :-2] = proj_softmax[~sos_mask, -1, :]
sos_mask = sos_mask_ij | sos_mask
# Stop if every sentence says padding or if we are full
if not torch.any(~sos_mask):
break
return y_hat
import sys
import torch
from torch import nn
from Configuration import Configuration
class EncoderLayer(nn.Module):
"""Encoder class, imput of supertagger"""
def __init__(self, model):
super(EncoderLayer, self).__init__()
self.name = "Encoder"
self.bert = model
self.hidden_size = self.bert.config.hidden_size
def forward(self, batch):
r"""
Args :
batch: list[str,mask], list of sentences (NOTE: untokenized, continuous sentences)
Returns :
last_hidden_state: [batch_size, max_len_sentence, dim_encoder] Sequence of hidden-states at the output of the last layer of the model.
pooler_output: [batch_size, dim_encoder] Last layer hidden-state of the first token of the sequence (classification token) after further processing through the layers used for the auxiliary pretraining task
"""
b_input_ids = batch[0]
b_input_mask = batch[1]
outputs = self.bert(
input_ids=b_input_ids, attention_mask=b_input_mask)
return outputs[0], outputs[1]
@staticmethod
def load(model_path: str):
r""" Load the model from a file.
Args :
model_path (str): path to model
Returns :
model (nn.Module): model with saved parameters
"""
params = torch.load(
model_path, map_location=lambda storage, loc: storage)
args = params['args']
model = EncoderLayer(**args)
model.load_state_dict(params['state_dict'])
return model
def save(self, path: str):
r""" Save the model to a file.
Args :
path (str): path to the model
"""
print('save model parameters to [%s]' % path, file=sys.stderr)
params = {
'args': dict(bert_config=self.bert.config, dropout_rate=self.dropout_rate),
'state_dict': self.state_dict()
}
torch.save(params, path)
def to_dict(self):
return {}
......@@ -4,9 +4,7 @@ from torch.nn import Dropout, LSTM
from torch.nn import Module
from Configuration import Configuration
from SuperTagger.Decoder.RNNDecoderLayer import RNNDecoderLayer
from torch.nn.utils.rnn import pack_padded_sequence
from SuperTagger.Encoder.EncoderLayer import EncoderLayer
from SuperTagger.eval import measure_supertagging_accuracy
......@@ -21,6 +19,8 @@ class EncoderDecoder(Module):
def __init__(self, BASE_MODEL, numPos1Classes, numPos2Classes, numSuperClasses):
super(EncoderDecoder, self).__init__()
self.bert = BASE_MODEL
self.dim_encoder = int(Configuration.modelDecoderConfig['dim_encoder'])
self.dim_decoder = int(Configuration.modelDecoderConfig['dim_decoder'])
self.num_rnn_layers = int(Configuration.modelDecoderConfig['num_rnn_layers'])
......@@ -28,18 +28,18 @@ class EncoderDecoder(Module):
dropout = float(Configuration.modelDecoderConfig['dropout'])
self.dropout = Dropout(dropout)
self.encoder = EncoderLayer(BASE_MODEL)
self.bert = BASE_MODEL
self.lstm_shared = LSTM(input_size=self.dim_encoder, hidden_size=self.dim_encoder, num_layers=self.num_rnn_layers,
dropout=dropout,
bidirectional=self.bidirectional, batch_first=True, )
#Pos1
self.pos1_1 = nn.Linear(self.dim_encoder*2,self.dim_decoder)
self.pos1_1 = nn.Linear(self.dim_encoder,self.dim_decoder)
self.pos1_2 = nn.Linear(self.dim_decoder, numPos1Classes)
#Pos2
self.pos2_1 = nn.Linear(self.dim_encoder*2, self.dim_decoder)
self.pos2_1 = nn.Linear(self.dim_encoder, self.dim_decoder)
self.pos2_2 = nn.Linear(self.dim_decoder, numPos2Classes)
#super
......@@ -47,24 +47,27 @@ class EncoderDecoder(Module):
num_layers=self.num_rnn_layers,
dropout=dropout,
bidirectional=self.bidirectional, batch_first=True, )
self.pos_super_1 = nn.Linear(self.dim_encoder*2,self.dim_decoder)
self.pos_super_1 = nn.Linear(self.dim_encoder,self.dim_decoder)
self.pos_super_2 = nn.Linear(self.dim_decoder, numSuperClasses)
def forward(self, batch):
encoded_layers, pooled_output = self.encoder(batch)
encoded_layers = self.dropout(encoded_layers)
# encoded_layers = encoded_layers.permute(1, 0, 2)
print("encoded_layers : ", encoded_layers.size())
b_input_ids = batch[0]
b_input_mask = batch[1]
encoded_layers, _ = self.bert(
input_ids=b_input_ids, attention_mask=b_input_mask, return_dict=False)
lstm_output, (h, c) = self.lstm_shared(encoded_layers) ## extract the 1st token's embeddings
print("last_hidden : ", lstm_output.size())
# output_shared = torch.cat((lstm_output[:, :, :self.dim_encoder], lstm_output[:, :, self.dim_encoder:]), dim=2)
lstm_output = self.dropout(encoded_layers)
print("encoded_layers : ", encoded_layers.size())
print("output_shared : ", lstm_output.size())
# lstm_output, _ = self.lstm_shared(encoded_layers) ## extract the 1st token's embeddings
# print("last_hidden : ", lstm_output.size())
#
# print("output_shared : ", lstm_output.size())
# Pos1
pos_1_output= self.pos1_1(lstm_output)
......@@ -77,9 +80,8 @@ class EncoderDecoder(Module):
pos_2_output = self.pos2_2(pos_2_output)
# super
enc_hiddens, (last_hidden_super, last_cell_super) = self.lstm_super(lstm_output)
print(enc_hiddens.size())
super_output = self.pos_super_1(enc_hiddens)
# enc_hiddens, _ = self.lstm_super(lstm_output)
super_output = self.pos_super_1(lstm_output)
super_output = self.dropout(super_output)
super_output = self.pos_super_2(super_output)
......
import torch
class EncoderInput():
class EncoderTokenizer():
def __init__(self, tokenizer):
"""@params tokenizer (PretrainedTokenizer): Tokenizer that tokenizes text """
......
import pickle
import numpy as np
import torch
def load_obj(name):
with open(name + '.pkl', 'rb') as f:
return pickle.load(f)
class SymbolTokenizer():
def __init__(self):
"""@params tokenizer (PretrainedTokenizer): Tokenizer that tokenizes text """
self.index_to_super = load_obj('Datasets/index_to_super')
self.index_to_pos1 = load_obj('Datasets/index_to_pos1')
self.index_to_pos2 = load_obj('Datasets/index_to_pos2')
self.super_to_index = {v: int(k) for k, v in self.index_to_super.items()}
self.pos1_to_index = {v: int(k) for k, v in self.index_to_pos1.items()}
self.pos2_to_index = {v: int(k) for k, v in self.index_to_pos2.items()}
def lenPOS1(self):
print(self.pos1_to_index)
return len(self.index_to_pos1) + 1
def lenPOS2(self):
return len(self.index_to_pos2) + 1
def lenSuper(self):
return len(self.index_to_super) + 1
def convert_batchs_to_ids(self, Y1, Y2, Super):
max_len_Y1 = max(len(elem) for elem in Y1)
max_len_Y2 = max(len(elem) for elem in Y2)
max_len_S = max(len(elem) for elem in Super)
Y1_tok = torch.as_tensor(pad_sequence([[self.pos1_to_index[str(symbol)] for symbol in sents] for sents in Y1]))
Y2_tok = torch.as_tensor(pad_sequence(
[[self.pos2_to_index[str(symbol)] for symbol in sents] for sents in Y2]))
super_tok = torch.as_tensor(pad_sequence(
[[self.super_to_index[str(symbol)] for symbol in sents] for sents in Super]))
return Y1_tok, Y2_tok, super_tok
# def convert_ids_to_symbols(self, ids):
# return [self.inverse_symbol_map[int(i)] for i in ids]
def pad_sequence(sequences, max_len=400):
sequences_pad = []
for s in sequences:
padded = [0] * max_len
padded[:len(s)] = s
sequences_pad.append(padded)
return sequences_pad
......@@ -33,12 +33,14 @@ class NormCrossEntropy(Module):
r"""Loss based on the cross entropy, it considers the number of words and ignore the padding.
"""
def __init__(self, ignore_index, sep_id, weights=None):
def __init__(self, ignore_index, weights=None):
super(NormCrossEntropy, self).__init__()
self.ignore_index = ignore_index
self.sep_id = sep_id
self.weights = weights
def forward(self, predictions, truths):
return cross_entropy(predictions.flatten(0, -2), truths.flatten(), weight=self.weights,
reduction='sum', ignore_index=self.ignore_index) / count_sep(truths.flatten(), self.sep_id)
print()
print("predictions : ", predictions.size())
print("truths : ", truths.size())
return cross_entropy(predictions.flatten(0, -2), truths.flatten(), weight=torch.tensor(self.weights,device="cuda" if torch.cuda.is_available() else "cpu"),
reduction='sum', ignore_index=self.ignore_index)
......@@ -5,7 +5,7 @@ import torch
from tqdm import tqdm
def read_csv_pgbar(csv_path, nrows=float('inf'), chunksize=500):
def read_csv_pgbar(csv_path, nrows=float('inf'), chunksize=100):
print("\n" + "#" * 20)
print("Loading csv...")
......
import itertools
import pickle
import re
import numpy as np
......@@ -115,17 +116,15 @@ def read_maxentdata(file):
Z = np.asarray(allsuper)
return X, Y1, Y2, Z, vocabulary, vnorm, partsofspeech1, partsofspeech2, superset, maxlen
def save_obj(obj, name):
with open(name + '.pkl', 'wb+') as f:
pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
# Txt_to_csv("m2.txt")
X, Y1, Y2, Z, vocabulary, vnorm, partsofspeech1, partsofspeech2, superset, maxlen = read_maxentdata("m2.txt")
def load_obj(name):
with open(name + '.pkl', 'rb') as f:
return pickle.load(f)
print("X[17] (", np.array(X[17]).shape ,") : ")
print(X[17])
print("Y1[17] (", np.array(Y1[17]).shape ,") : ")
print(Y1[17])
print("Y2[17] (", np.array(Y2[17]).shape ,") : ")
print(Y2[17])
# Txt_to_csv("m2.txt")
X, Y1, Y2, Z, vocabulary, vnorm, partsofspeech1, partsofspeech2, superset, maxlen = read_maxentdata("m2.txt")
......@@ -136,6 +135,13 @@ df['Y1'] = Y1
df['Y2'] = Y2
df['Z'] = Z
df.to_csv("../Datasets/m2_dataset_V2.csv", index=False)
t = np.unique(np.array(list(itertools.chain(*Z))))
print(t.size)
dict = { i : t[i] for i in range(0, len(t) ) }
df.to_csv("../Datasets/m2_dataset_V2.csv", index=False)
\ No newline at end of file
save_obj(dict,"../Datasets/index_to_super")
\ No newline at end of file
......@@ -6,16 +6,18 @@ import numpy as np
import torch
import torch.nn.functional as F
import transformers
from torch.optim import SGD, Adam
from torch.optim import Adam, RMSprop
from torch.utils.data import Dataset, TensorDataset, random_split
from transformers import (AutoTokenizer, get_cosine_schedule_with_warmup)
from transformers import (CamembertModel)
from Configuration import Configuration
from SuperTagger.Encoder.EncoderInput import EncoderInput
from SuperTagger.EncoderDecoder import EncoderDecoder
from SuperTagger.EncoderTokenizer import EncoderTokenizer
from SuperTagger.SymbolTokenizer import SymbolTokenizer
from SuperTagger.eval import NormCrossEntropy
from SuperTagger.utils import format_time, read_csv_pgbar, checkpoint_save, checkpoint_load
from SuperTagger.utils import format_time, read_csv_pgbar
from torch.utils.tensorboard import SummaryWriter
......@@ -24,9 +26,7 @@ torch.cuda.empty_cache()
# region ParamsModel
# max_symbols_per_word = int(Configuration.modelDecoderConfig['max_symbols_per_word'])
# max_len_sentence = int(Configuration.modelDecoderConfig['max_len_sentence'])
# symbol_vocab_size = int(Configuration.modelDecoderConfig['symbols_vocab_size'])
num_gru_layers = int(Configuration.modelDecoderConfig['num_rnn_layers'])
# endregion ParamsModel
......@@ -35,7 +35,7 @@ num_gru_layers = int(Configuration.modelDecoderConfig['num_rnn_layers'])
file_path = 'Datasets/m2_dataset_V2.csv'
batch_size = int(Configuration.modelTrainingConfig['batch_size'])
nb_sentences = batch_size * 300
nb_sentences = batch_size * 50
epochs = int(Configuration.modelTrainingConfig['epoch'])
seed_val = int(Configuration.modelTrainingConfig['seed_val'])
learning_rate = float(Configuration.modelTrainingConfig['learning_rate'])
......@@ -115,9 +115,8 @@ BASE_TOKENIZER = AutoTokenizer.from_pretrained(
'camembert-base',
do_lower_case=True)
BASE_MODEL = CamembertModel.from_pretrained("camembert-base")
sents_tokenizer = EncoderInput(BASE_TOKENIZER)
model = EncoderDecoder(BASE_MODEL, 20,20,20)
model = model.to("cuda" if torch.cuda.is_available() else "cpu")
sents_tokenizer = EncoderTokenizer(BASE_TOKENIZER)
symbol_tokenizer = SymbolTokenizer()
# endregion Model
......@@ -126,7 +125,9 @@ df = read_csv_pgbar(file_path, nb_sentences)
sents_tokenized, sents_mask = sents_tokenizer.fit_transform_tensors(df['X'].tolist())
dataset = TensorDataset(sents_tokenized, sents_mask)
y1, y2, super = symbol_tokenizer.convert_batchs_to_ids(df['Y1'].tolist(),df['Y2'].tolist(),df['Z'].tolist())
dataset = TensorDataset(sents_tokenized, sents_mask, y1, y2, super)
# , torch.tensor(df['Y1'].tolist()), torch.tensor(df['Y2'].tolist()), torch.tensor(df['Z'].tolist())
# Calculate the number of samples to include in each set.
......@@ -144,13 +145,14 @@ validation_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=batc
# endregion Data loader
model = EncoderDecoder(BASE_MODEL, symbol_tokenizer.lenPOS1(),symbol_tokenizer.lenPOS2(),symbol_tokenizer.lenSuper())
model = model.to("cuda" if torch.cuda.is_available() else "cpu")
# region Fit tunning
# Optimizer
# optimizer_encoder = Adam(model.encoder.parameters(),
# lr=5e-5)
# optimizer_decoder = Adam(model.decoder.parameters(),
# lr=learning_rate)
optimizer = RMSprop(model.parameters())
# Total number of training steps is [number of batches] x [number of epochs].
# (Note that this is not the same as the number of training samples).
......@@ -165,15 +167,9 @@ total_steps = len(training_dataloader) * epochs
# num_training_steps=total_steps)
# # Loss
# if loss_scaled_by_freq:
# weights = torch.as_tensor(
# [6.9952, 1.0763, 1.0317, 43.274, 16.5276, 11.8821, 28.2416, 2.7548, 1.0728, 3.1847, 8.4521, 6.77, 11.1887,
# 6.6692, 23.1277, 11.8821, 4.4338, 1.2303, 5.0238, 8.4376, 1.0656, 4.6886, 1.028, 4.273, 4.273, 0],
# device=torch.device("cuda" if torch.cuda.is_available() else "cpu"))
# cross_entropy_loss = NormCrossEntropy(symbols_tokenizer.pad_token_id, symbols_tokenizer.sep_token_id,
# weights=weights)
# else:
# cross_entropy_loss = NormCrossEntropy(symbols_tokenizer.pad_token_id, symbols_tokenizer.sep_token_id)
cross_entropy_loss_Y1 = NormCrossEntropy(0,0.15)
cross_entropy_loss_Y2 = NormCrossEntropy(0,.35)
cross_entropy_loss_S = NormCrossEntropy(0,.5)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
......@@ -211,7 +207,9 @@ def run_epochs(epochs):
t0 = time.time()
# Reset the total loss for this epoch.
total_train_loss = 0
total_train_loss_Y1 =0
total_train_loss_Y2 =0
total_train_loss_S =0
model.train()
......@@ -221,8 +219,8 @@ def run_epochs(epochs):
# if epoch_i == 0 and step == 0:
# writer.add_graph(model, input_to_model=batch[0], verbose=False)
# Progress update every 40 batches.
if step % 40 == 0 and not step == 0:
# Progress update every 10 batches.
if step % 10 == 0 and not step == 0:
# Calculate elapsed time in minutes.
elapsed = format_time(time.time() - t0)
# Report progress.
......@@ -231,23 +229,36 @@ def run_epochs(epochs):
# Unpack this training batch from our dataloader.
b_sents_tokenized = batch[0].to("cuda" if torch.cuda.is_available() else "cpu")
b_sents_mask = batch[1].to("cuda" if torch.cuda.is_available() else "cpu")
b_symbols_tokenized = batch[2].to("cuda" if torch.cuda.is_available() else "cpu")
# optimizer_encoder.zero_grad()
# optimizer_decoder.zero_grad()
optimizer.zero_grad()
logits_predictions = model((b_sents_tokenized, b_sents_mask))
output_dim_Y1 = logits_predictions[0].shape[1]
print(output_dim_Y1)
# output_Y1 = logits_predictions[0][1:].view(-1, output_dim_Y1)
output_dim_Y2 = logits_predictions[1].shape[1]
# output_Y2 = logits_predictions[1][1:].view(-1, output_dim_Y2)
output_dim_S = logits_predictions[2].shape[1]
# output_S = logits_predictions[2][1:].view(-1, output_dim_S)
logits_predictions = model(b_sents_tokenized, b_sents_mask, b_symbols_tokenized)
loss_Y1 = cross_entropy_loss_Y1(logits_predictions[0], batch[2][:output_dim_Y1])
loss_Y2 = cross_entropy_loss_Y2(logits_predictions[1], batch[3][:output_dim_Y2])
loss_S = cross_entropy_loss_S(logits_predictions[2], batch[4][:output_dim_S])
# loss = cross_entropy_loss(logits_predictions, b_symbols_tokenized)
# total_train_loss += float(loss)
# loss.backward()
total_train_loss_Y1 += float(loss_Y1)
total_train_loss_Y2 += float(loss_Y2)
total_train_loss_S += float(loss_S)
loss_Y1.backward()
loss_Y2.backward()
loss_S.backward()
# This is to help prevent the "exploding gradients" problem.
#torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=5.0, norm_type=2)
# Update parameters and take a step using the computed gradient.
# optimizer_encoder.step()
# optimizer_decoder.step()
optimizer.step()
#
# scheduler_encoder.step()
# scheduler_decoder.step()
......@@ -257,7 +268,9 @@ def run_epochs(epochs):
# if use_checkpoint_SAVE:
# checkpoint_save(model, optimizer_decoder, epoch_i, checkpoint_dir, loss)
avg_train_loss = total_train_loss / len(training_dataloader)
avg_train_loss_Y1 = total_train_loss_Y1 / len(training_dataloader)
avg_train_loss_Y2 = total_train_loss_Y2 / len(training_dataloader)
avg_train_loss_S = total_train_loss_S / len(training_dataloader)
# Measure how long this epoch took.
training_time = format_time(time.time() - t0)
......@@ -274,7 +287,9 @@ def run_epochs(epochs):
# writer.add_scalar('Accuracy/symbol', accuracy_symbol, epoch_i + 1)
print("")
print(" Average training loss: {0:.2f}".format(avg_train_loss))
print(" Average training loss: {0:.2f}".format(avg_train_loss_Y1))
print(" Average training loss: {0:.2f}".format(avg_train_loss_Y2))
print(" Average training loss: {0:.2f}".format(avg_train_loss_S))
print(" Training epcoh took: {:}".format(training_time))
# writer.add_scalar('Loss/train', total_train_loss, epoch_i+1)
......@@ -287,10 +302,13 @@ def run_epochs(epochs):
# run_epochs(epochs)
# endregion Train
b = next(iter(training_dataloader))
# , y1,y2,y3
a = model(b)
print(len(b))
print(a[0].size(),a[1].size(),a[2].size())
# b1, b2 , y1,y2,y3 = next(iter(training_dataloader))
# b =(b1, b2)
# # , y1,y2,y3
# a = model(b)
# print(len(b))
# print(a[0].size(),a[1].size(),a[2].size())
print(symbol_tokenizer.lenPOS1())
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment