-
Caroline DE POURTALES authoredCaroline DE POURTALES authored
utils_linker.py 11.11 KiB
import re
import regex
import torch
from torch.nn import Sequential, Linear, Dropout, GELU
from torch.nn import Module
from Linker.atom_map import atom_map
from utils import pad_sequence
class FFN(Module):
"Implements FFN equation."
def __init__(self, d_model, d_ff, dropout=0.1):
super(FFN, self).__init__()
self.ffn = Sequential(
Linear(d_model, d_ff, bias=False),
GELU(),
Dropout(dropout),
Linear(d_ff, d_model, bias=False)
)
def forward(self, x):
return self.ffn(x)
regex_categories = r'\w+\(\d+,(?:((?R))|(\w+))*,?(?:((?R))|(\w+))*\)'
#########################################################################################
################################ Liste des atoms avc _i########################################
#########################################################################################
def get_axiom_links(max_atoms_in_one_type, atoms_polarity, batch_axiom_links):
r"""
Args:
max_atoms_in_one_type : configuration
atoms_polarity : (batch_size, max_atoms_in_sentence)
batch_axiom_links : (batch_size, len_sentence) categories with the _i which allows linking atoms
Returns:
batch_true_links : (batch_size, atom_vocab_size, max_atoms_in_one_cat) contains the index of the negative atoms
"""
atoms_batch = get_atoms_links_batch(batch_axiom_links)
linking_plus_to_minus_all_types = []
for atom_type in list(atom_map.keys())[:-1]:
# filtrer sur atom_batch que ce type puis filtrer avec les indices sur atom polarity
l_polarity_plus = [[x for i, x in enumerate(atoms_batch[s_idx]) if atoms_polarity[s_idx, i]
and bool(re.search(atom_type + "_", atoms_batch[s_idx][i]))] for s_idx in
range(len(atoms_batch))]
l_polarity_minus = [[x for i, x in enumerate(atoms_batch[s_idx]) if not atoms_polarity[s_idx, i]
and bool(re.search(atom_type + "_", atoms_batch[s_idx][i]))] for s_idx in
range(len(atoms_batch))]
linking_plus_to_minus = pad_sequence(
[torch.as_tensor([l_polarity_minus[s_idx].index(x) if x in l_polarity_minus[s_idx] else -1 for i, x in
enumerate(l_polarity_plus[s_idx])], dtype=torch.long)
for s_idx in range(len(atoms_batch))], max_len=max_atoms_in_one_type // 2, padding_value=-1)
linking_plus_to_minus_all_types.append(linking_plus_to_minus)
return torch.stack(linking_plus_to_minus_all_types)
def category_to_atoms_axiom_links(category, categories_to_atoms):
r"""
Args:
category : str of kind AtomCat | CategoryCat(dr or dl)
categories_to_atoms : recursive list
Returns :
List of atoms inside the category in prefix order
"""
res = [bool(re.match(r'' + atom_type + "_\d+", category)) for atom_type in atom_map.keys()]
if category.startswith("GOAL:"):
word, cat = category.split(':')
return [cat]
elif True in res:
return [category]
else:
category_cut = regex.match(regex_categories, category).groups()
category_cut = [cat for cat in category_cut if cat is not None]
for cat in category_cut:
categories_to_atoms += category_to_atoms_axiom_links(cat, [])
return categories_to_atoms
def get_atoms_links_batch(category_batch):
r"""
Args:
category_batch : (batch_size, max_atoms_in_sentence) flattened categories in prefix order
Returns :
(batch_size, max_atoms_in_sentence) flattened categories in prefix order
"""
batch = []
for sentence in category_batch:
categories_to_atoms = []
for category in sentence:
categories_to_atoms += category_to_atoms_axiom_links(category, [])
batch.append(categories_to_atoms)
return batch
#########################################################################################
################################ Liste des atoms ########################################
#########################################################################################
def category_to_atoms(category, categories_to_atoms):
r"""
Args:
category : str of kind AtomCat | CategoryCat(dr or dl)
categories_to_atoms : recursive list
Returns:
List of atoms inside the category in prefix order
"""
res = [bool(re.match(r'' + atom_type + "_\d+", category)) for atom_type in atom_map.keys()]
if category.startswith("GOAL:"):
word, cat = category.split(':')
category = re.match(r'([a-zA-Z|_]+)_\d+', cat).group(1)
return [category]
elif True in res:
category = re.match(r'([a-zA-Z|_]+)_\d+', category).group(1)
return [category]
else:
category_cut = regex.match(regex_categories, category).groups()
category_cut = [cat for cat in category_cut if cat is not None]
for cat in category_cut:
categories_to_atoms += category_to_atoms(cat, [])
return categories_to_atoms
def get_atoms_batch(category_batch):
r"""
Args:
category_batch : (batch_size, max_atoms_in_sentence) flattened categories in prefix order
Returns:
(batch_size, max_atoms_in_sentence) flattened categories in prefix order
"""
batch = []
for sentence in category_batch:
categories_to_atoms = []
for category in sentence:
categories_to_atoms += category_to_atoms(category, [])
batch.append(categories_to_atoms)
return batch
#########################################################################################
################################ Polarity ###############################################
#########################################################################################
def category_to_atoms_polarity(category, polarity):
r"""
Args:
category : str of kind AtomCat | CategoryCat(dr or dl)
polarity : polarity according to recursivity
Returns:
Boolean Tensor of shape max_symbols_in_word, containing 1 for pos indexes and 0 for neg indexes
"""
category_to_polarity = []
res = [bool(re.match(r'' + atom_type + "_\d+", category)) for atom_type in atom_map.keys()]
if category.startswith("GOAL:"):
category_to_polarity.append(True)
elif True in res or category.startswith("dia") or category.startswith("box"):
category_to_polarity.append(False)
else:
# dr = /
if category.startswith("dr"):
category_cut = regex.match(regex_categories, category).groups()
category_cut = [cat for cat in category_cut if cat is not None]
left_side, right_side = category_cut[0], category_cut[1]
if polarity == True:
# for the left side : normal
res = [bool(re.match(r'' + atom_type + "_\d+", left_side)) for atom_type in atom_map.keys()]
if True in res or left_side.startswith("dia") or left_side.startswith("box"):
category_to_polarity.append(False)
else:
category_to_polarity += category_to_atoms_polarity(left_side, True)
# for the right side : change polarity for next right formula
res = [bool(re.match(r'' + atom_type + "_\d+", right_side)) for atom_type in atom_map.keys()]
if True in res or right_side.startswith("dia") or right_side.startswith("box"):
category_to_polarity.append(True)
else:
category_to_polarity += category_to_atoms_polarity(right_side, False)
else:
# for the left side
res = [bool(re.match(r'' + atom_type + "_\d+", left_side)) for atom_type in atom_map.keys()]
if True in res or left_side.startswith("dia") or left_side.startswith("box"):
category_to_polarity.append(True)
else:
category_to_polarity += category_to_atoms_polarity(left_side, False)
# for the right side : change polarity for next right formula
res = [bool(re.match(r'' + atom_type + "_\d+", right_side)) for atom_type in atom_map.keys()]
if True in res or right_side.startswith("dia") or right_side.startswith("box"):
category_to_polarity.append(False)
else:
category_to_polarity += category_to_atoms_polarity(right_side, True)
# dl = \
elif category.startswith("dl"):
category_cut = regex.match(regex_categories, category).groups()
category_cut = [cat for cat in category_cut if cat is not None]
left_side, right_side = category_cut[0], category_cut[1]
if polarity == True:
# for the left side : change polarity
res = [bool(re.match(r'' + atom_type + "_\d+", left_side)) for atom_type in atom_map.keys()]
if True in res or left_side.startswith("dia") or left_side.startswith("box"):
category_to_polarity.append(True)
else:
category_to_polarity += category_to_atoms_polarity(left_side, False)
# for the right side : normal
res = [bool(re.match(r'' + atom_type + "_\d+", right_side)) for atom_type in atom_map.keys()]
if True in res or right_side.startswith("dia") or right_side.startswith("box"):
category_to_polarity.append(False)
else:
category_to_polarity += category_to_atoms_polarity(right_side, True)
else:
# for the left side
res = [bool(re.match(r'' + atom_type + "_\d+", left_side)) for atom_type in atom_map.keys()]
if True in res or left_side.startswith("dia") or left_side.startswith("box"):
category_to_polarity.append(False)
else:
category_to_polarity += category_to_atoms_polarity(left_side, True)
# for the right side
res = [bool(re.match(r'' + atom_type + "_\d+", right_side)) for atom_type in atom_map.keys()]
if True in res or right_side.startswith("dia") or right_side.startswith("box"):
category_to_polarity.append(True)
else:
category_to_polarity += category_to_atoms_polarity(right_side, False)
return category_to_polarity
def find_pos_neg_idexes(max_atoms_in_sentence, atoms_batch):
r"""
Args:
max_atoms_in_sentence : configuration
atoms_batch : (batch_size, max_atoms_in_sentence) flattened categories in prefix order
Returns:
(batch_size, max_atoms_in_sentence) flattened categories'polarities in prefix order
"""
list_batch = []
for sentence in atoms_batch:
list_atoms = []
for category in sentence:
for at in category_to_atoms_polarity(category, True):
list_atoms.append(at)
list_batch.append(torch.as_tensor(list_atoms))
return pad_sequence([list_batch[i] for i in range(len(list_batch))],
max_len=max_atoms_in_sentence, padding_value=0)