diff --git a/complex_evaluate.py b/complex_evaluate.py index cef325e42073331641a466df9c79103742ecc874..418c0bd248ddbe0aeb609a1ea7ea2603728a22ee 100644 --- a/complex_evaluate.py +++ b/complex_evaluate.py @@ -2,12 +2,17 @@ import xml.etree.ElementTree as ET from tree_similarity import tree_sim, tree_size, post_order import re +def remove_prefix(text): + return re.sub(r'\{[^}]+}', '', text) + def xml_to_tree(t): att_keys = sorted(list(t.attrib.keys())) - att_pairs = ', '.join([f'{k}: {t.attrib[k]}' for k in att_keys]) - tag = f'{t.tag} {att_pairs}' + att_pairs = ', '.join([f'{remove_prefix(k)}: {t.attrib[k]}' for k in att_keys]) + tag = f'{remove_prefix(t.tag)} {att_pairs}' children = sorted([xml_to_tree(c) for c in t], key=lambda x: x[0]) + + return tag, children def load_maps(path): @@ -29,9 +34,9 @@ def load_maps(path): t2 = ent2[0] if len(ent2) > 0 else ent2 maps.append((xml_to_tree(t1), xml_to_tree(t2))) - return maps + def maximize_assign(m): preferences = {} @@ -139,4 +144,4 @@ def jaccard_sim(e1, e2): post_order(e1, lambda x, y: s1.update(filter_entities(y[0]))) post_order(e2, lambda x, y: s2.update(filter_entities(y[0]))) - return jaccard(s1, s2) \ No newline at end of file + return jaccard(s1, s2)