Skip to content
Snippets Groups Projects
Commit dc23cbec authored by Guilherme Henrique's avatar Guilherme Henrique
Browse files

output in alignment api and sssom

parent c1ca88bf
No related branches found
No related tags found
No related merge requests found
%% Cell type:code id: tags:
``` python
from sentence_transformers import SentenceTransformer
from models import Finbank
import random
import torch
import numpy as np
from property_matching import PropertyMatcher
from tqdm.auto import tqdm
from property_matching import most_common_pair
import matplotlib.pyplot as plt
torch.manual_seed(0)
random.seed(0)
np.random.seed(0)
```
%% Cell type:markdown id: tags:
Download embeddings in:
http://dl.turkunlp.org/finnish-embeddings/
%% Cell type:code id: tags:
``` python
wm = Finbank('/home/guilherme/Documents/kg/fin.bin')
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
property_matcher = PropertyMatcher(wm, model)
```
%% Cell type:code id: tags:
``` python
results = property_matcher.match('/home/guilherme/Documents/kg/conference', '/home/guilherme/Documents/kg/reference', th=0.65)
```
%% Output
----------------------------------------------------------------------------------------------------
Conference.owl iasted.owl
Loading o1
Loading o2
1
ontology iterations: 5248, (0, 0.0, 0.0), aligns: 1, po1: 64, po2: 41
----------------------------------------------------------------------------------------------------
ekaw.owl iasted.owl
Loading o1
Loading o2
0
ontology iterations: 1804, (0.0, 0, 0.0), aligns: 0, po1: 22, po2: 41
ontology iterations: 1804, (0, 0, 0.0), aligns: 0, po1: 22, po2: 41
----------------------------------------------------------------------------------------------------
cmt.owl Conference.owl
Loading o1
Loading o2
3
ontology iterations: 7552, (0.25, 0.3333333333333333, 0.28571428571428575), aligns: 3, po1: 59, po2: 64
ontology iterations: 7552, (0.3333333333333333, 0.3333333333333333, 0.3333333333333333), aligns: 3, po1: 59, po2: 64
----------------------------------------------------------------------------------------------------
cmt.owl iasted.owl
Loading o1
Loading o2
0
ontology iterations: 4838, (0, 0, 0.0), aligns: 0, po1: 59, po2: 41
----------------------------------------------------------------------------------------------------
confOf.owl ekaw.owl
Loading o1
Loading o2
0
ontology iterations: 1584, (0, 0, 0.0), aligns: 0, po1: 36, po2: 22
----------------------------------------------------------------------------------------------------
edas.owl ekaw.owl
Loading o1
Loading o2
4
ontology iterations: 2200, (1.0, 0.5, 0.6666666666666666), aligns: 4, po1: 50, po2: 22
----------------------------------------------------------------------------------------------------
confOf.owl iasted.owl
Loading o1
Loading o2
0
ontology iterations: 2952, (0, 0, 0.0), aligns: 0, po1: 36, po2: 41
----------------------------------------------------------------------------------------------------
Conference.owl ekaw.owl
Loading o1
Loading o2
2
ontology iterations: 2816, (0, 0.0, 0.0), aligns: 2, po1: 64, po2: 22
----------------------------------------------------------------------------------------------------
cmt.owl ekaw.owl
Loading o1
Loading o2
3
ontology iterations: 2596, (1.0, 1.0, 1.0), aligns: 3, po1: 59, po2: 22
----------------------------------------------------------------------------------------------------
edas.owl iasted.owl
Loading o1
Loading o2
0
ontology iterations: 4100, (0, 0, 0.0), aligns: 0, po1: 50, po2: 41
----------------------------------------------------------------------------------------------------
edas.owl sigkdd.owl
Loading o1
Loading o2
4
ontology iterations: 2600, (0.6666666666666666, 0.5, 0.5714285714285715), aligns: 4, po1: 50, po2: 26
----------------------------------------------------------------------------------------------------
cmt.owl confOf.owl
Loading o1
Loading o2
6
ontology iterations: 4248, (1.0, 0.6666666666666666, 0.8), aligns: 6, po1: 59, po2: 36
----------------------------------------------------------------------------------------------------
confOf.owl sigkdd.owl
Loading o1
Loading o2
1
ontology iterations: 1872, (0.5, 1.0, 0.6666666666666666), aligns: 1, po1: 36, po2: 26
----------------------------------------------------------------------------------------------------
ekaw.owl sigkdd.owl
Loading o1
Loading o2
0
ontology iterations: 1144, (0, 0, 0.0), aligns: 0, po1: 22, po2: 26
----------------------------------------------------------------------------------------------------
cmt.owl sigkdd.owl
Loading o1
Loading o2
2
ontology iterations: 3068, (0.6666666666666666, 1.0, 0.8), aligns: 2, po1: 59, po2: 26
ontology iterations: 3068, (1.0, 1.0, 1.0), aligns: 2, po1: 59, po2: 26
----------------------------------------------------------------------------------------------------
Conference.owl edas.owl
Loading o1
Loading o2
3
ontology iterations: 6400, (0.6666666666666666, 0.6666666666666666, 0.6666666666666666), aligns: 3, po1: 64, po2: 50
ontology iterations: 6400, (1.0, 0.6666666666666666, 0.8), aligns: 3, po1: 64, po2: 50
----------------------------------------------------------------------------------------------------
cmt.owl edas.owl
Loading o1
Loading o2
5
ontology iterations: 5900, (0.5, 0.2, 0.28571428571428575), aligns: 5, po1: 59, po2: 50
----------------------------------------------------------------------------------------------------
iasted.owl sigkdd.owl
Loading o1
Loading o2
0
ontology iterations: 2132, (0, 0, 0.0), aligns: 0, po1: 41, po2: 26
----------------------------------------------------------------------------------------------------
Conference.owl sigkdd.owl
Loading o1
Loading o2
3
ontology iterations: 3328, (0.5, 0.3333333333333333, 0.4), aligns: 3, po1: 64, po2: 26
ontology iterations: 3328, (1.0, 0.3333333333333333, 0.5), aligns: 3, po1: 64, po2: 26
----------------------------------------------------------------------------------------------------
confOf.owl edas.owl
Loading o1
Loading o2
5
ontology iterations: 3600, (1.0, 0.6, 0.7499999999999999), aligns: 5, po1: 36, po2: 50
----------------------------------------------------------------------------------------------------
Conference.owl confOf.owl
Loading o1
Loading o2
4
ontology iterations: 4608, (1.0, 0.5, 0.6666666666666666), aligns: 4, po1: 64, po2: 36
iterations: 74590, (0.6857142857142857, 0.5217391304347826, 0.5925925925925927)
iterations: 74590, (0.8275862068965517, 0.5217391304347826, 0.64)
%% Cell type:code id: tags:
``` python
p, r, f = zip(*results)
x = np.arange(0.0, 1, 0.01)
plt.plot(x, p, label="precision")
plt.plot(x, r, label="recall")
plt.plot(x, f, label="f-measure")
# draw vertical line in the x position containing the threshold that have the max f-measure
plt.axvline(x[np.argmax(f)], color='black', linestyle='--', label="best threshold")
plt.legend()
plt.show()
```
%% Output
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[1], line 1
TypeError Traceback (most recent call last)
Cell In[4], line 1
----> 1 p, r, f = zip(*results)
3 x = np.arange(0.0, 1, 0.01)
5 plt.plot(x, p, label="precision")
NameError: name 'results' is not defined
TypeError: 'float' object is not iterable
%% Cell type:code id: tags:
``` python
results = property_matcher.match('/home/guilherme/Documents/kg/knowledge', '/home/guilherme/Documents/kg/know-reference',
th=0.969, process_strategy=most_common_pair, steps=1, disable_dr=True)
```
%% Output
----------------------------------------------------------------------------------------------------
starwars.xml swtor.xml
Loading o1
Loading o2
56
ontology iterations: 256166, (0.4909090909090909, 0.9642857142857143, 0.6506024096385543), aligns: 56, po1: 698, po2: 367
----------------------------------------------------------------------------------------------------
memoryalpha.xml stexpand.xml
Loading o1
Loading o2
40
ontology iterations: 63112, (0.5342465753424658, 0.975, 0.6902654867256637), aligns: 40, po1: 322, po2: 196
----------------------------------------------------------------------------------------------------
starwars.xml swg.xml
Loading o1
Loading o2
20
ontology iterations: 102606, (0.4444444444444444, 1.0, 0.6153846153846153), aligns: 20, po1: 698, po2: 147
----------------------------------------------------------------------------------------------------
mcu.xml marvel.xml
Loading o1
Loading o2
11
ontology iterations: 19865, (0.6470588235294118, 1.0, 0.7857142857142858), aligns: 11, po1: 145, po2: 137
----------------------------------------------------------------------------------------------------
memoryalpha.xml memorybeta.xml
Loading o1
Loading o2
53
ontology iterations: 133630, (0.5104166666666666, 0.9245283018867925, 0.6577181208053692), aligns: 53, po1: 322, po2: 415
iterations: 575379, (0.5073313782991202, 0.9611111111111111, 0.6641074856046065)
%% Cell type:code id: tags:
``` python
p, r, f = zip(*results)
x = np.arange(0.1, 1, 0.01)
plt.plot(x, p, label="precision")
plt.plot(x, r, label="recall")
plt.plot(x, f, label="f-measure")
# draw vertical line in the x position containing the threshold that have the max f-measure
plt.axvline(x[np.argmax(f)], color='black', linestyle='--', label="best threshold")
print(x[np.argmax(f)])
plt.legend()
plt.show()
```
%% Output
0.9699999999999995
%% Cell type:code id: tags:
``` python
results = property_matcher.match('/home/guilherme/Documents/kg/knowledge', '/home/guilherme/Documents/kg/know-reference',
th=0.1, process_strategy=most_common_pair, steps=1, sim_weights=[0, 1], tr=[0.969])
```
%% Output
----------------------------------------------------------------------------------------------------
starwars.xml swtor.xml
Loading o1
Loading o2
56
ontology iterations: 256166, (0.4074074074074074, 0.19642857142857142, 0.2650602409638554), aligns: 56, po1: 698, po2: 367
----------------------------------------------------------------------------------------------------
memoryalpha.xml stexpand.xml
Loading o1
Loading o2
40
ontology iterations: 63112, (0.7307692307692307, 0.475, 0.5757575757575758), aligns: 40, po1: 322, po2: 196
----------------------------------------------------------------------------------------------------
starwars.xml swg.xml
Loading o1
Loading o2
20
ontology iterations: 102606, (0.5625, 0.45, 0.5), aligns: 20, po1: 698, po2: 147
----------------------------------------------------------------------------------------------------
mcu.xml marvel.xml
Loading o1
Loading o2
11
ontology iterations: 19865, (0.0, 0.0, 0.0), aligns: 11, po1: 145, po2: 137
----------------------------------------------------------------------------------------------------
memoryalpha.xml memorybeta.xml
Loading o1
Loading o2
53
ontology iterations: 133630, (0.6296296296296297, 0.32075471698113206, 0.425), aligns: 53, po1: 322, po2: 415
iterations: 575379, (0.13930348258706468, 0.4666666666666667, 0.21455938697318008)
%% Cell type:code id: tags:
``` python
p, r, f = zip(*results)
x = np.arange(0.1, 1, 0.01)
plt.plot(x, p, label="precision")
plt.plot(x, r, label="recall")
plt.plot(x, f, label="f-measure")
# draw vertical line in the x position containing the threshold that have the max f-measure
plt.axvline(x[np.argmax(f)], color='black', linestyle='--', label="best threshold")
print(x[np.argmax(f)])
plt.legend()
plt.show()
```
%% Cell type:code id: tags:
``` python
results = property_matcher.match('/home/guilherme/Documents/kg/knowledge', '/home/guilherme/Documents/kg/know-reference',
th=0.979, process_strategy=most_common_pair, steps=1, sim_weights=[1, 2])
```
%% Output
----------------------------------------------------------------------------------------------------
starwars.xml swtor.xml
Loading o1
Loading o2
56
ontology iterations: 256166, (0.4594594594594595, 0.30357142857142855, 0.3655913978494624), aligns: 56, po1: 698, po2: 367
----------------------------------------------------------------------------------------------------
memoryalpha.xml stexpand.xml
Loading o1
Loading o2
40
ontology iterations: 63112, (0.5, 0.25, 0.3333333333333333), aligns: 40, po1: 322, po2: 196
----------------------------------------------------------------------------------------------------
starwars.xml swg.xml
Loading o1
Loading o2
20
ontology iterations: 102606, (0.6470588235294118, 0.55, 0.5945945945945946), aligns: 20, po1: 698, po2: 147
----------------------------------------------------------------------------------------------------
mcu.xml marvel.xml
Loading o1
Loading o2
11
ontology iterations: 19865, (0.7142857142857143, 0.45454545454545453, 0.5555555555555556), aligns: 11, po1: 145, po2: 137
----------------------------------------------------------------------------------------------------
memoryalpha.xml memorybeta.xml
Loading o1
Loading o2
53
ontology iterations: 133630, (0.4, 0.22641509433962265, 0.2891566265060241), aligns: 53, po1: 322, po2: 415
iterations: 575379, (0.4954954954954955, 0.3055555555555556, 0.37800687285223367)
%% Cell type:code id: tags:
``` python
p, r, f = zip(*results)
x = np.arange(0.1, 1, 0.01)
plt.plot(x, p, label="precision")
plt.plot(x, r, label="recall")
plt.plot(x, f, label="f-measure")
# draw vertical line in the x position containing the threshold that have the max f-measure
plt.axvline(x[np.argmax(f)], color='black', linestyle='--', label="best threshold")
print(x[np.argmax(f)])
plt.legend()
plt.show()
```
%% Output
0.9799999999999995
%% Cell type:code id: tags:
``` python
results = property_matcher.match('/home/guilherme/Documents/kg/knowledge', '/home/guilherme/Documents/kg/know-reference',
th=0.569, process_strategy=most_common_pair, steps=1, sim_weights=[0, 1, 2])
```
%% Output
----------------------------------------------------------------------------------------------------
starwars.xml swtor.xml
Loading o1
Loading o2
56
ontology iterations: 256166, (0.1834862385321101, 0.35714285714285715, 0.2424242424242424), aligns: 56, po1: 698, po2: 367
----------------------------------------------------------------------------------------------------
memoryalpha.xml stexpand.xml
Loading o1
Loading o2
40
ontology iterations: 63112, (0.15384615384615385, 0.25, 0.1904761904761905), aligns: 40, po1: 322, po2: 196
----------------------------------------------------------------------------------------------------
starwars.xml swg.xml
Loading o1
Loading o2
20
ontology iterations: 102606, (0.1276595744680851, 0.3, 0.17910447761194026), aligns: 20, po1: 698, po2: 147
----------------------------------------------------------------------------------------------------
mcu.xml marvel.xml
Loading o1
Loading o2
11
ontology iterations: 19865, (0.08333333333333333, 0.18181818181818182, 0.1142857142857143), aligns: 11, po1: 145, po2: 137
----------------------------------------------------------------------------------------------------
memoryalpha.xml memorybeta.xml
Loading o1
Loading o2
53
ontology iterations: 133630, (0.16666666666666666, 0.3584905660377358, 0.2275449101796407), aligns: 53, po1: 322, po2: 415
iterations: 575379, (0.15877437325905291, 0.31666666666666665, 0.21150278293135436)
%% Cell type:code id: tags:
``` python
p, r, f = zip(*results)
x = np.arange(0.1, 1, 0.01)
plt.plot(x, p, label="precision")
plt.plot(x, r, label="recall")
plt.plot(x, f, label="f-measure")
# draw vertical line in the x position containing the threshold that have the max f-measure
plt.axvline(x[np.argmax(f)], color='black', linestyle='--', label="best threshold")
print(x[np.argmax(f)])
plt.legend()
plt.show()
```
%% Output
0.5699999999999997
%% Cell type:code id: tags:
``` python
```
......
main.py 0 → 100644
from sentence_transformers import SentenceTransformer
from models import Finbank
import random
import torch
import numpy as np
from property_matching import PropertyMatcher
from tqdm.auto import tqdm
from property_matching import most_common_pair
import matplotlib.pyplot as plt
import argparse
import rdflib
import tempfile
from urllib import parse, request
from om.ont import get_namespace
def parse_arguments():
arg_parser = argparse.ArgumentParser(description='LD similarity.')
arg_parser.add_argument('source', help='Source ontology path.')
arg_parser.add_argument('target', help='Target ontology path.')
arg_parser.add_argument('--output', dest='output', default='./output', help='Folder to save the results.')
arg_parser.add_argument('--format', dest='format', default='align', choices=['align', 'sssom'], help='Output format.')
return arg_parser.parse_args()
def toAlignFormat(aligns, onto1, onto2, location1, location2):
data = ["""<?xml version='1.0' encoding='utf-8' standalone='no'?>
<rdf:RDF xmlns='http://knowledgeweb.semanticweb.org/heterogeneity/alignment#'
xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'
xmlns:xsd='http://www.w3.org/2001/XMLSchema#'
xmlns:align='http://knowledgeweb.semanticweb.org/heterogeneity/alignment#'>"""]
data.append(f""" <Alignment>
<xml>yes</xml>
<level>0</level>
<type>**</type>
<onto1>
<Ontology rdf:about="{onto1}">
<location>{location1}</location>
</Ontology>
</onto1>
<onto2>
<Ontology rdf:about="{onto2}">
<location>{location2}</location>
</Ontology>
</onto2>""")
for (entity1, entity2), confidence in aligns.items():
data.append(f""" <map>
<Cell>
<entity1 rdf:resource="{entity1}"/>
<entity2 rdf:resource="{entity2}"/>
<relation>=</relation>
<measure rdf:datatype="http://www.w3.org/2001/XMLSchema#float">{confidence}</measure>
</Cell>
</map>""")
data.append(""" </Alignment>
</rdf:RDF>""")
return '\n'.join(data)
def ssom(aligns):
lines = ['subject_id\tpredicate_id\tobject_id\tmapping_justification\tconfidence']
for (entity1, entity2), confidence in aligns.items():
lines.append(f"{entity1}\tskos:exactMatch\t{entity2}\tsemapv:LexicalMatching\t{confidence}")
return "\n".join(lines)
if __name__ == '__main__':
args = parse_arguments()
wm = Finbank('/home/guilherme/Documents/kg/fin.bin')
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
property_matcher = PropertyMatcher(wm, model)
o1 = rdflib.Graph().parse(args.source)
o2 = rdflib.Graph().parse(args.target)
p, it = property_matcher.match_ontologies(o1, o2, 0.65)
# Parser
if args.format == 'sssom':
result = ssom(p)
suffix = '.tsv'
else:
result = toAlignFormat(p, get_namespace(o1), get_namespace(o2), args.source, args.target)
suffix = '.rdf'
with tempfile.NamedTemporaryFile('w', prefix='alignment_', suffix=suffix, delete=False) as out_file:
out_file.write(result)
print(parse.urljoin("file:", request.pathname2url(out_file.name)))
......@@ -14,6 +14,7 @@ from collections import Counter
from tqdm.auto import tqdm
import math
def get_type_h(e, g, ml=1):
if type(e) is Literal:
return [e.datatype]
......@@ -283,9 +284,6 @@ def get_prop(e, g, p):
return s, objc
def build_tf_models(o1, o2):
a_entities = set(filter(lambda x: is_property(x, o1), o1.subjects()))
b_entities = set(filter(lambda x: is_property(x, o2), o2.subjects()))
......@@ -380,7 +378,6 @@ class PropertyMatcher:
sim = 0
label_confidence = sim
if sim_weights:
conf = []
if 0 in sim_weights:
......@@ -403,7 +400,6 @@ class PropertyMatcher:
if tr is not None:
trm = [[0, 0] for _ in tr]
for r, k1, k2 in tqdm(list(onts(base, ref))):
print('-' * 100)
......@@ -457,7 +453,8 @@ class PropertyMatcher:
trm[i][0] += len(pa.intersection(cp))
trm[i][1] += len(cp)
print(f'ontology iterations: {oi}, {metrics(len(pa.intersection(cp)), len(cp), current_total)}, aligns: {current_total}, po1: {len(a_entities)}, po2: {len(b_entities)}')
print(
f'ontology iterations: {oi}, {metrics(len(pa.intersection(cp)), len(cp), current_total)}, aligns: {current_total}, po1: {len(a_entities)}, po2: {len(b_entities)}')
# for a1, a2 in pa.intersection(p):
# print(colored('✓', 'green'), get_n(a1, o1), get_n(a2, o2))
......@@ -471,8 +468,9 @@ class PropertyMatcher:
# print(colored('X', 'red'), get_n(d1, o1), get_n(a1, o1), get_n(r1, o1), colored('<>', 'green'),
# get_n(d2, o2), get_n(a2, o2), get_n(r2, o2))
# print(
# f'ontology iterations: {oi}, {metrics(current_correct, current_pred, current_total)}, aligns: {current_total}, po1: {len(a_entities)}, po2: {len(b_entities)}')
print(
f'ontology iterations: {oi}, {metrics(current_correct, current_pred, current_total)}, aligns: {current_total}, po1: {len(a_entities)}, po2: {len(b_entities)}')
print(f'iterations: {iterations}, {metrics(correct, pred, total)}')
if tr is not None:
res = []
......@@ -540,6 +538,4 @@ class PropertyMatcher:
pm[iv1] = (iv2, sim)
pm[iv2] = (iv1, sim)
return p, iterations
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment