Skip to content
Snippets Groups Projects
Commit e3ce129f authored by Guilherme Henrique's avatar Guilherme Henrique
Browse files

added remove prefix

parent 26812db9
No related branches found
No related tags found
No related merge requests found
......@@ -2,12 +2,17 @@ import xml.etree.ElementTree as ET
from tree_similarity import tree_sim, tree_size, post_order
import re
def remove_prefix(text):
return re.sub(r'\{[^}]+}', '', text)
def xml_to_tree(t):
att_keys = sorted(list(t.attrib.keys()))
att_pairs = ', '.join([f'{k}: {t.attrib[k]}' for k in att_keys])
tag = f'{t.tag} {att_pairs}'
att_pairs = ', '.join([f'{remove_prefix(k)}: {t.attrib[k]}' for k in att_keys])
tag = f'{remove_prefix(t.tag)} {att_pairs}'
children = sorted([xml_to_tree(c) for c in t], key=lambda x: x[0])
return tag, children
def load_maps(path):
......@@ -29,9 +34,9 @@ def load_maps(path):
t2 = ent2[0] if len(ent2) > 0 else ent2
maps.append((xml_to_tree(t1), xml_to_tree(t2)))
return maps
def maximize_assign(m):
preferences = {}
......@@ -139,4 +144,4 @@ def jaccard_sim(e1, e2):
post_order(e1, lambda x, y: s1.update(filter_entities(y[0])))
post_order(e2, lambda x, y: s2.update(filter_entities(y[0])))
return jaccard(s1, s2)
\ No newline at end of file
return jaccard(s1, s2)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment