diff --git a/SuperTagger/Linker/__pycache__/Linker.cpython-38.pyc b/SuperTagger/Linker/__pycache__/Linker.cpython-38.pyc index d796c7a4452db32c4eb9d9237b80fc4bab334428..e732b7dd6a2e215b497d136ba944e5e832aeacd6 100644 Binary files a/SuperTagger/Linker/__pycache__/Linker.cpython-38.pyc and b/SuperTagger/Linker/__pycache__/Linker.cpython-38.pyc differ diff --git a/SuperTagger/Linker/__pycache__/utils.cpython-38.pyc b/SuperTagger/Linker/__pycache__/utils.cpython-38.pyc index b13bf57006d2d3eb78625e40b86218f03e27798c..49d3c94ae3d2a9e58f9576ec4e25c73d5ea5aa19 100644 Binary files a/SuperTagger/Linker/__pycache__/utils.cpython-38.pyc and b/SuperTagger/Linker/__pycache__/utils.cpython-38.pyc differ diff --git a/SuperTagger/Linker/utils.py b/SuperTagger/Linker/utils.py index 3f8e892a7f693fc58dd6754b11a114091d364480..e5708c173f4d19668ddf7678d982fd04cf7167e4 100644 --- a/SuperTagger/Linker/utils.py +++ b/SuperTagger/Linker/utils.py @@ -21,9 +21,9 @@ def get_axiom_links(max_atoms_in_one_type, atoms_polarity, batch_axiom_links): for atom_type in list(atom_map.keys())[:-1]: # filtrer sur atom_batch que ce type puis filtrer avec les indices sur atom polarity l_polarity_plus = [[x for i, x in enumerate(atoms_batch[s_idx]) if atoms_polarity[s_idx, i] - and bool(re.search(atom_type, atoms_batch[s_idx][i]))] for s_idx in range(len(atoms_batch))] + and bool(re.search(atom_type+"_", atoms_batch[s_idx][i]))] for s_idx in range(len(atoms_batch))] l_polarity_minus = [[x for i, x in enumerate(atoms_batch[s_idx]) if not atoms_polarity[s_idx, i] - and bool(re.search(atom_type, atoms_batch[s_idx][i]))] for s_idx in + and bool(re.search(atom_type+"_", atoms_batch[s_idx][i]))] for s_idx in range(len(atoms_batch))] linking_plus_to_minus = pad_sequence( diff --git a/SuperTagger/__pycache__/eval.cpython-38.pyc b/SuperTagger/__pycache__/eval.cpython-38.pyc index ec90d972078edc0b20a9275119656a5973b657f9..fce253cccfad91c0b80dd6cd68cc5e05e3437104 100644 Binary files a/SuperTagger/__pycache__/eval.cpython-38.pyc and b/SuperTagger/__pycache__/eval.cpython-38.pyc differ diff --git a/train.py b/train.py index 05d223f6e3b9710d18cf2f6984e36f3e257209c7..d4e45335091925a0f6ce8d93d3c12300ab711bdb 100644 --- a/train.py +++ b/train.py @@ -43,6 +43,8 @@ learning_rate = float(Configuration.modelTrainingConfig['learning_rate']) file_path_axiom_links = 'Datasets/aa1_links_dataset_links.csv' df_axiom_links = read_csv_pgbar(file_path_axiom_links, nb_sentences) +sentences_batch = df_axiom_links["Sentences"] + atoms_batch = get_atoms_batch(df_axiom_links["sub_tree"]) atom_tokenizer = AtomTokenizer(atom_map, max_atoms_in_sentence) atoms_batch_tokenized = atom_tokenizer.convert_batchs_to_ids(atoms_batch) @@ -51,14 +53,19 @@ print("atoms_tokens", atoms_batch_tokenized.shape) atoms_polarity_batch = find_pos_neg_idexes(max_atoms_in_sentence, df_axiom_links["sub_tree"]) print("atoms_polarity_batch", atoms_polarity_batch.shape) +torch.set_printoptions(edgeitems=20) truth_links_batch = get_axiom_links(max_atoms_in_one_type, atoms_polarity_batch, df_axiom_links["sub_tree"]) -print("truth_links_batch", truth_links_batch.permute(1, 0, 2).shape) -print(" truth_links_batch example on first sentence class cl_r", truth_links_batch[0][0]) +truth_links_batch = truth_links_batch.permute(1, 0, 2) +print("truth_links_batch", truth_links_batch.shape) +print("sentence", sentences_batch[14]) +print("categories ", df_axiom_links["sub_tree"][14]) +print("atoms_batch", atoms_batch[14]) +print("atoms_polarity_batch", atoms_polarity_batch[14]) +print(" truth_links_batch example on a sentence class n", truth_links_batch[14][2]) -sentences_batch = df_axiom_links["Sentences"] # Construction tensor dataset -dataset = TensorDataset(atoms_batch_tokenized, atoms_polarity_batch, truth_links_batch.permute(1, 0, 2)) +dataset = TensorDataset(atoms_batch_tokenized, atoms_polarity_batch, truth_links_batch) # Calculate the number of samples to include in each set. train_size = int(0.9 * len(dataset))