Skip to content
Snippets Groups Projects
Commit b291997d authored by Guilherme Henrique's avatar Guilherme Henrique
Browse files

improve code readability

parent 625022cf
Branches
No related tags found
No related merge requests found
......@@ -2,44 +2,36 @@ import nltk
nltk.download('averaged_perceptron_tagger')
def get_core_concept(e1):
t1 = nltk.pos_tag(e1)
v1 = []
sn = False
for t in t1:
if 'V' in t[1] and len(t[0]) > 4:
v1.append(t[0])
def get_core_concept(entity):
"""
Get the core concept of an entity. The core concept is the first verb with length > 4 or the first noun with its
adjectives.
:param entity: RDFLib entity
:return: list of words
"""
tags = nltk.pos_tag(entity)
core_concept = []
no_name = False
for (word, tag) in tags:
if 'V' in tag and len(word) > 4:
core_concept.append(word)
break
if 'N' in t[1] or 'J' in t[1] and not sn:
if 'IN' in t[1]:
sn = True
if 'N' in tag or 'J' in tag and not no_name:
if 'IN' in tag:
no_name = True
else:
v1.append(t[0])
core_concept.append(word)
return v1
return core_concept
def get_core_tagged(e1):
t1 = nltk.pos_tag(e1)
v1 = []
sn = False
for t in t1:
if 'V' in t[1] and len(t[0]) > 4:
v1.append(t)
break
if 'N' in t[1] or 'J' in t[1] and not sn:
if 'IN' in t[1]:
sn = True
else:
v1.append(t)
return v1
def filter_jj(words):
def filter_adjectives(words):
"""
Filter adjectives from a list of words.
:param words: list of words
:return: list of words without adjectives
"""
tags = nltk.pos_tag(words)
return list(map(lambda x: x[0], filter(lambda x: x[1][0] == 'N', tags)))
\ No newline at end of file
return list(map(lambda word: word[0], filter(lambda word: word[1][0] == 'N', tags)))
This diff is collapsed.
......@@ -10,42 +10,81 @@ def metrics(correct, tries, total):
return precision, recall, fm
def gn(e, g):
if type(e) is str:
e = Literal(e)
ns = get_n(e, g)
def get_name(entity, graph):
if type(entity) is str:
entity = Literal(entity)
name = get_n(entity, graph)
if ns.startswith('//'):
ns = e.split('http://yago-knowledge.org/resource/')[-1]
if name.startswith('//'):
name = entity.split('http://yago-knowledge.org/resource/')[-1]
return ns
return name
def pad_encode(sentences, word_map):
"""
Encodes a list of sentences into a padded tensor of integer values using a word mapping.
def pad_encode(s, wm):
l1 = []
Example:
>>> word_map = {
... 'I': 1,
... 'love': 2,
... 'coding': 3,
... 'Python': 4,
... 'great': 5,
... 'fun': 6,
... 'is': 7,
... }
>>> sentences = ["I love coding Python", "Python is great", "Coding is fun"]
>>> encoded_sentences = pad_encode(sentences, word_map)
>>> print(encoded_sentences)
tensor([[1, 2, 3, 4],
[4, 7, 5, 0],
[3, 7, 6, 0]])
:param sentences: A list of input sentences to be encoded into tensors.
:param word_map: A dictionary mapping words to their corresponding integer representations.
:return: A tensor containing the padded and encoded sentences, where each sentence is represented
as a list of integers. The tensor has dimensions (num_sentences, max_sentence_length), where
num_sentences is the number of input sentences, and max_sentence_length is the length of the longest
sentence in terms of the number of words.
"""
sentence_list = []
max_len = -1
for q in s:
w = list(map(lambda q: wm[q], q.split()))
if len(w) > max_len:
max_len = len(w)
l1.append(w)
for sentence in sentences:
sentence = list(map(lambda word: word_map[word], sentence.split()))
if len(sentence) > max_len:
max_len = len(sentence)
sentence_list.append(sentence)
padded_sentences = []
for sentence in sentence_list:
padded_sentences.append(sentence + [0] * (max_len - len(sentence)))
nl1 = []
for w in l1:
nl1.append(w + [0] * (max_len - len(w)))
return torch.LongTensor(padded_sentences)
return torch.LongTensor(nl1)
def emb_average(sentence_ids, model):
"""
Calculates the average word embedding for a list of sentences using a given model.
def emb_average(ids, emb):
xe = torch.cat(list(map(lambda q: q.unsqueeze(0), ids)))
xem = emb(xe).sum(dim=1)
cf = torch.sum((xe != 0).float(), dim=1).unsqueeze(1)
cf[cf == 0] = 1
return xem / cf
:param sentence_ids: (list of torch.Tensor): A list of tensors representing sentences with word embeddings.
:param model: (torch.nn.Module): A neural network model that can compute embeddings for input sentences.
:return: A tensor representing the average word embedding for each input sentence.
"""
unsqueezed_sentence = torch.cat(list(map(lambda embedding: embedding.unsqueeze(0), sentence_ids)))
embedding_sum = model(unsqueezed_sentence).sum(dim=1)
non_zero_embeddings = torch.sum((unsqueezed_sentence != 0).float(), dim=1).unsqueeze(1)
non_zero_embeddings[non_zero_embeddings == 0] = 1
return embedding_sum / non_zero_embeddings
def calc_acc(pred, cty):
acc = (torch.LongTensor(pred) == cty).float().sum() / cty.shape[0]
return acc.item()
\ No newline at end of file
def calc_acc(predicted, correct):
"""
Calculates the accuracy of a model's predictions.
:param predicted: A list of predicted labels.
:param correct: A list of correct labels.
:return: The accuracy of the model's predictions.
"""
acc = (torch.LongTensor(predicted) == correct).float().sum() / correct.shape[0]
return acc.item()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment