Skip to content
Snippets Groups Projects
Commit df43ddab authored by Caroline DE POURTALES's avatar Caroline DE POURTALES
Browse files

correction on regex : confusion np/n

parent 9f49c6d7
Branches
No related tags found
2 merge requests!6Linker with transformer,!5Linker with transformer
No preview for this file type
No preview for this file type
......@@ -21,9 +21,9 @@ def get_axiom_links(max_atoms_in_one_type, atoms_polarity, batch_axiom_links):
for atom_type in list(atom_map.keys())[:-1]:
# filtrer sur atom_batch que ce type puis filtrer avec les indices sur atom polarity
l_polarity_plus = [[x for i, x in enumerate(atoms_batch[s_idx]) if atoms_polarity[s_idx, i]
and bool(re.search(atom_type, atoms_batch[s_idx][i]))] for s_idx in range(len(atoms_batch))]
and bool(re.search(atom_type+"_", atoms_batch[s_idx][i]))] for s_idx in range(len(atoms_batch))]
l_polarity_minus = [[x for i, x in enumerate(atoms_batch[s_idx]) if not atoms_polarity[s_idx, i]
and bool(re.search(atom_type, atoms_batch[s_idx][i]))] for s_idx in
and bool(re.search(atom_type+"_", atoms_batch[s_idx][i]))] for s_idx in
range(len(atoms_batch))]
linking_plus_to_minus = pad_sequence(
......
No preview for this file type
......@@ -43,6 +43,8 @@ learning_rate = float(Configuration.modelTrainingConfig['learning_rate'])
file_path_axiom_links = 'Datasets/aa1_links_dataset_links.csv'
df_axiom_links = read_csv_pgbar(file_path_axiom_links, nb_sentences)
sentences_batch = df_axiom_links["Sentences"]
atoms_batch = get_atoms_batch(df_axiom_links["sub_tree"])
atom_tokenizer = AtomTokenizer(atom_map, max_atoms_in_sentence)
atoms_batch_tokenized = atom_tokenizer.convert_batchs_to_ids(atoms_batch)
......@@ -51,14 +53,19 @@ print("atoms_tokens", atoms_batch_tokenized.shape)
atoms_polarity_batch = find_pos_neg_idexes(max_atoms_in_sentence, df_axiom_links["sub_tree"])
print("atoms_polarity_batch", atoms_polarity_batch.shape)
torch.set_printoptions(edgeitems=20)
truth_links_batch = get_axiom_links(max_atoms_in_one_type, atoms_polarity_batch, df_axiom_links["sub_tree"])
print("truth_links_batch", truth_links_batch.permute(1, 0, 2).shape)
print(" truth_links_batch example on first sentence class cl_r", truth_links_batch[0][0])
truth_links_batch = truth_links_batch.permute(1, 0, 2)
print("truth_links_batch", truth_links_batch.shape)
print("sentence", sentences_batch[14])
print("categories ", df_axiom_links["sub_tree"][14])
print("atoms_batch", atoms_batch[14])
print("atoms_polarity_batch", atoms_polarity_batch[14])
print(" truth_links_batch example on a sentence class n", truth_links_batch[14][2])
sentences_batch = df_axiom_links["Sentences"]
# Construction tensor dataset
dataset = TensorDataset(atoms_batch_tokenized, atoms_polarity_batch, truth_links_batch.permute(1, 0, 2))
dataset = TensorDataset(atoms_batch_tokenized, atoms_polarity_batch, truth_links_batch)
# Calculate the number of samples to include in each set.
train_size = int(0.9 * len(dataset))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment