correction on regex : confusion np/n

df43ddab · Caroline DE POURTALES · 9f49c6d7 · df43ddab · df43ddab · df43ddab
Commit df43ddab authored 3 years ago by Caroline DE POURTALES
--- a/SuperTagger/Linker/__pycache__/Linker.cpython-38.pyc
+++ b/SuperTagger/Linker/__pycache__/Linker.cpython-38.pyc
--- a/SuperTagger/Linker/__pycache__/utils.cpython-38.pyc
+++ b/SuperTagger/Linker/__pycache__/utils.cpython-38.pyc
--- a/SuperTagger/Linker/utils.py
+++ b/SuperTagger/Linker/utils.py
@@ -21,9 +21,9 @@ def get_axiom_links(max_atoms_in_one_type, atoms_polarity, batch_axiom_links):
    for atom_type in list(atom_map.keys())[:-1]:
        # filtrer sur atom_batch que ce type puis filtrer avec les indices sur atom polarity
        l_polarity_plus = [[x for i, x in enumerate(atoms_batch[s_idx]) if atoms_polarity[s_idx, i]
-                            and bool(re.search(atom_type, atoms_batch[s_idx][i]))] for s_idx in range(len(atoms_batch))]
+                            and bool(re.search(atom_type+"_", atoms_batch[s_idx][i]))] for s_idx in range(len(atoms_batch))]
        l_polarity_minus = [[x for i, x in enumerate(atoms_batch[s_idx]) if not atoms_polarity[s_idx, i]
-                             and bool(re.search(atom_type, atoms_batch[s_idx][i]))] for s_idx in
+                             and bool(re.search(atom_type+"_", atoms_batch[s_idx][i]))] for s_idx in
                            range(len(atoms_batch))]

        linking_plus_to_minus = pad_sequence(

--- a/SuperTagger/__pycache__/eval.cpython-38.pyc
+++ b/SuperTagger/__pycache__/eval.cpython-38.pyc
--- a/train.py
+++ b/train.py
@@ -43,6 +43,8 @@ learning_rate = float(Configuration.modelTrainingConfig['learning_rate'])
 file_path_axiom_links = 'Datasets/aa1_links_dataset_links.csv'
 df_axiom_links = read_csv_pgbar(file_path_axiom_links, nb_sentences)

+sentences_batch = df_axiom_links["Sentences"]
+
 atoms_batch = get_atoms_batch(df_axiom_links["sub_tree"])
 atom_tokenizer = AtomTokenizer(atom_map, max_atoms_in_sentence)
 atoms_batch_tokenized = atom_tokenizer.convert_batchs_to_ids(atoms_batch)
@@ -51,14 +53,19 @@ print("atoms_tokens", atoms_batch_tokenized.shape)
 atoms_polarity_batch = find_pos_neg_idexes(max_atoms_in_sentence, df_axiom_links["sub_tree"])
 print("atoms_polarity_batch", atoms_polarity_batch.shape)

+torch.set_printoptions(edgeitems=20)
 truth_links_batch = get_axiom_links(max_atoms_in_one_type, atoms_polarity_batch, df_axiom_links["sub_tree"])
-print("truth_links_batch", truth_links_batch.permute(1, 0, 2).shape)
-print(" truth_links_batch example on first sentence class cl_r", truth_links_batch[0][0])
+truth_links_batch = truth_links_batch.permute(1, 0, 2)
+print("truth_links_batch", truth_links_batch.shape)
+print("sentence", sentences_batch[14])
+print("categories ", df_axiom_links["sub_tree"][14])
+print("atoms_batch", atoms_batch[14])
+print("atoms_polarity_batch", atoms_polarity_batch[14])
+print(" truth_links_batch example on a sentence class n", truth_links_batch[14][2])

-sentences_batch = df_axiom_links["Sentences"]

 # Construction tensor dataset
-dataset = TensorDataset(atoms_batch_tokenized, atoms_polarity_batch, truth_links_batch.permute(1, 0, 2))
+dataset = TensorDataset(atoms_batch_tokenized, atoms_polarity_batch, truth_links_batch)

 # Calculate the number of samples to include in each set.
 train_size = int(0.9 * len(dataset))