Skip to content
Snippets Groups Projects
Commit 7b101512 authored by Julien Rabault's avatar Julien Rabault
Browse files

Add hidden state

parent 50aa065d
No related branches found
No related tags found
No related merge requests found
......@@ -9,3 +9,4 @@ good_models
main.py
*.pt
Datasets/Utils
*.zip
......@@ -182,17 +182,17 @@ class SuperTagger:
self.model = self.model.cpu()
preds, hidden = self.model.predict((sents_tokenized_t, sents_mask_t))
output = self.model.predict((sents_tokenized_t, sents_mask_t))
return preds, self.tags_tokenizer.convert_ids_to_tags(torch.argmax(preds, dim=2).detach()), hidden
return output['logit'], self.tags_tokenizer.convert_ids_to_tags(torch.argmax(output['logit'], dim=2).detach())
def forward(self, b_sents_tokenized: Tensor, b_sents_mask: Tensor) -> (Tensor, Tensor):
"""
Function used for the linker (same of predict)
"""
with torch.no_grad():
logit, hidden = self.model.predict((b_sents_tokenized, b_sents_mask))
return logit, hidden
output = self.model.predict((b_sents_tokenized, b_sents_mask))
return output
def train(self, sentences: list[str], tags: list[list[str]], validation_rate=0.1, epochs=20, batch_size=16,
tensorboard=False,
......@@ -311,9 +311,10 @@ class SuperTagger:
targets = batch[2].to(self.device)
self.optimizer.zero_grad()
loss, logit, _ = self.model((b_sents_tokenized, b_sents_mask, targets))
output = self.model((b_sents_tokenized, b_sents_mask, targets))
loss = output['loss']
predictions = torch.argmax(logit, dim=2).detach().cpu().numpy()
predictions = torch.argmax(output['logit'], dim=2).detach().cpu().numpy()
label_ids = targets.cpu().numpy()
acc = categorical_accuracy(predictions, label_ids)
......@@ -353,9 +354,10 @@ class SuperTagger:
b_sents_mask = batch[1].to(self.device)
b_symbols_tokenized = batch[2].to(self.device)
loss, logits, _ = self.model((b_sents_tokenized, b_sents_mask, b_symbols_tokenized))
output = self.model((b_sents_tokenized, b_sents_mask, b_symbols_tokenized))
loss = output['loss']
predictions = torch.argmax(logits, dim=2).detach().cpu().numpy()
predictions = torch.argmax(output['logit'], dim=2).detach().cpu().numpy()
label_ids = b_symbols_tokenized.cpu().numpy()
accuracy = categorical_accuracy(predictions, label_ids)
......
......@@ -24,9 +24,10 @@ class Tagging_bert_model(Module):
output = self.bert(
input_ids=b_input_ids, attention_mask=b_input_mask, labels=labels)
loss, logits, hidden = output[:3]
return loss, logits, hidden[0]
result = {'loss': output[0],'logit': output[1], 'word_embeding': output[2][0], 'last_hidden_state': output[2][1]}
return result
def predict(self, batch):
b_input_ids = batch[0]
......@@ -35,4 +36,6 @@ class Tagging_bert_model(Module):
output = self.bert(
input_ids=b_input_ids, attention_mask=b_input_mask)
return output[0], output[1][0]
result = {'logit' : output[0], 'word_embeding': output[1][0], 'last_hidden_state':output[1][1]}
return result
......@@ -13,12 +13,12 @@ tags_s = [['let', 'dr(0,s,s)', 'let', 'dr(0,dr(0,s,s),np)', 'dr(0,np,n)', 'dr(0,
#### MODEL ####
tagger = SuperTagger()
model = "models/flaubert_super_98%_V2_50e.pt"
model = "models/flaubert_super_98%_V2_50e/flaubert_super_98%_V2_50e.pt"
tagger.load_weights(model)
#### TEST ####
_, pred_convert, _ = tagger.predict(a_s)
_, pred_convert = tagger.predict(a_s)
print("Model : ", model)
......
File suppressed by a .gitattributes entry or the file's encoding is unsupported.
......@@ -7,7 +7,7 @@ file_path = 'Datasets/m2_dataset.csv'
df = read_csv_pgbar(file_path,100)
texts = df['X'].tolist()
tags = df['Y1'].tolist()
tags = df['Z'].tolist()
test_s = texts[:4]
tags_s = tags[:4]
......@@ -15,7 +15,7 @@ tags_s = tags[:4]
texts = texts[4:]
tags = tags[4:]
index_to_super = load_obj('Datasets/index_to_pos1')
index_to_super = load_obj('Datasets/index_to_super')
#### MODEL ####
tagger = SuperTagger()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment