Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
P
PropMatch
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
MELODI
Ontology Matching
PropMatch
Commits
dc23cbec
Commit
dc23cbec
authored
1 year ago
by
Guilherme Henrique
Browse files
Options
Downloads
Patches
Plain Diff
output in alignment api and sssom
parent
c1ca88bf
No related branches found
No related tags found
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
main.ipynb
+25
-27
25 additions, 27 deletions
main.ipynb
main.py
+98
-0
98 additions, 0 deletions
main.py
property_matching.py
+6
-10
6 additions, 10 deletions
property_matching.py
with
129 additions
and
37 deletions
main.ipynb
+
25
−
27
View file @
dc23cbec
...
...
@@ -5,8 +5,8 @@
"execution_count": 1,
"metadata": {
"ExecuteTime": {
"end_time": "2023-0
5-05T16
:5
8
:3
0.285996286
Z",
"start_time": "2023-0
5-05T16:58:27.520729058
Z"
"end_time": "2023-0
7-19T08
:5
7
:3
7.490880538
Z",
"start_time": "2023-0
7-19T08:57:35.005710556
Z"
}
},
"outputs": [],
...
...
@@ -38,8 +38,8 @@
"execution_count": 2,
"metadata": {
"ExecuteTime": {
"end_time": "2023-0
5-05T16:58:33.64365648
6Z",
"start_time": "2023-0
5-05T16
:5
8
:3
0.289546804
Z"
"end_time": "2023-0
7-19T08:57:46.77726675
6Z",
"start_time": "2023-0
7-19T08
:5
7
:3
7.493551556
Z"
}
},
"outputs": [],
...
...
@@ -51,24 +51,22 @@
},
{
"cell_type": "code",
"execution_count":
20
,
"execution_count":
3
,
"metadata": {
"ExecuteTime": {
"end_time": "2023-0
5-04T23:11:23.647059226
Z",
"start_time": "2023-0
5-04T23:08:45.535290656
Z"
"end_time": "2023-0
7-19T09:00:31.330783432
Z",
"start_time": "2023-0
7-19T08:57:46.780973332
Z"
}
},
"outputs": [
{
"data": {
"text/plain": " 0%| | 0/21 [00:00<?, ?it/s]",
"application/vnd.jupyter.widget-view+json": {
"model_id": "dc8966ab2d924576890dd6f0598da481",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/21 [00:00<?, ?it/s]"
]
"version_minor": 0,
"model_id": "4d5d3d0f0d60477d840af6d66d63615a"
}
},
"metadata": {},
"output_type": "display_data"
...
...
@@ -88,13 +86,13 @@
"Loading o1\n",
"Loading o2\n",
"0\n",
"ontology iterations: 1804, (0
.0
, 0, 0.0), aligns: 0, po1: 22, po2: 41\n",
"ontology iterations: 1804, (0, 0, 0.0), aligns: 0, po1: 22, po2: 41\n",
"----------------------------------------------------------------------------------------------------\n",
"cmt.owl Conference.owl\n",
"Loading o1\n",
"Loading o2\n",
"3\n",
"ontology iterations: 7552, (0.
25
, 0.3333333333333333, 0.
28571428571428575
), aligns: 3, po1: 59, po2: 64\n",
"ontology iterations: 7552, (0.
3333333333333333
, 0.3333333333333333, 0.
3333333333333333
), aligns: 3, po1: 59, po2: 64\n",
"----------------------------------------------------------------------------------------------------\n",
"cmt.owl iasted.owl\n",
"Loading o1\n",
...
...
@@ -166,13 +164,13 @@
"Loading o1\n",
"Loading o2\n",
"2\n",
"ontology iterations: 3068, (
0.6666666666666666
, 1.0,
0.8
), aligns: 2, po1: 59, po2: 26\n",
"ontology iterations: 3068, (
1.0
, 1.0,
1.0
), aligns: 2, po1: 59, po2: 26\n",
"----------------------------------------------------------------------------------------------------\n",
"Conference.owl edas.owl\n",
"Loading o1\n",
"Loading o2\n",
"3\n",
"ontology iterations: 6400, (
0.6666666666666666
, 0.6666666666666666, 0.
6666666666666666
), aligns: 3, po1: 64, po2: 50\n",
"ontology iterations: 6400, (
1.0
, 0.6666666666666666, 0.
8
), aligns: 3, po1: 64, po2: 50\n",
"----------------------------------------------------------------------------------------------------\n",
"cmt.owl edas.owl\n",
"Loading o1\n",
...
...
@@ -190,7 +188,7 @@
"Loading o1\n",
"Loading o2\n",
"3\n",
"ontology iterations: 3328, (
0.5
, 0.3333333333333333, 0.
4
), aligns: 3, po1: 64, po2: 26\n",
"ontology iterations: 3328, (
1.0
, 0.3333333333333333, 0.
5
), aligns: 3, po1: 64, po2: 26\n",
"----------------------------------------------------------------------------------------------------\n",
"confOf.owl edas.owl\n",
"Loading o1\n",
...
...
@@ -203,7 +201,7 @@
"Loading o2\n",
"4\n",
"ontology iterations: 4608, (1.0, 0.5, 0.6666666666666666), aligns: 4, po1: 64, po2: 36\n",
"iterations: 74590, (0.
685714285714285
7, 0.5217391304347826, 0.
5925925925925927
)\n"
"iterations: 74590, (0.
827586206896551
7, 0.5217391304347826, 0.
64
)\n"
]
}
],
...
...
@@ -213,23 +211,23 @@
},
{
"cell_type": "code",
"execution_count":
1
,
"execution_count":
4
,
"metadata": {
"ExecuteTime": {
"end_time": "2023-0
5
-0
4T23:13:14.075532740
Z",
"start_time": "2023-0
5
-0
4T23:13:13.788604059
Z"
"end_time": "2023-0
6
-0
1T13:30:47.366350949
Z",
"start_time": "2023-0
6
-0
1T13:30:46.430444165
Z"
}
},
"outputs": [
{
"ename": "
Nam
eError",
"evalue": "
name 'results' is not defined
",
"ename": "
Typ
eError",
"evalue": "
'float' object is not iterable
",
"output_type": "error",
"traceback": [
"\u001B[0;31m---------------------------------------------------------------------------\u001B[0m",
"\u001B[0;31m
Nam
eError\u001B[0m Traceback (most recent call last)",
"Cell \u001B[0;32mIn[
1
], line 1\u001B[0m\n\u001B[0;32m----> 1\u001B[0m p, r, f \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mzip\u001B[39m
(
\u001B[38;5;241m*\u001B[39m\u001B[43mresults\u001B[49m
)
\n\u001B[1;32m 3\u001B[0m x \u001B[38;5;241m=\u001B[39m np\u001B[38;5;241m.\u001B[39marange(\u001B[38;5;241m0.0\u001B[39m, \u001B[38;5;241m1\u001B[39m, \u001B[38;5;241m0.01\u001B[39m)\n\u001B[1;32m 5\u001B[0m plt\u001B[38;5;241m.\u001B[39mplot(x, p, label\u001B[38;5;241m=\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mprecision\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n",
"\u001B[0;31m
Nam
eError\u001B[0m:
name 'results' is not defined
"
"\u001B[0;31m
Typ
eError\u001B[0m Traceback (most recent call last)",
"Cell \u001B[0;32mIn[
4
], line 1\u001B[0m\n\u001B[0;32m----> 1\u001B[0m p, r, f \u001B[38;5;241m=\u001B[39m \u001B[38;5;28
;43
mzip\u001B[39
;49m\u001B[43m(\u001B[49
m\u001B[38;5;241
;43
m*\u001B[39
;49
m\u001B[43mresults\u001B[49m
\u001B[43m)\u001B[49m
\n\u001B[1;32m 3\u001B[0m x \u001B[38;5;241m=\u001B[39m np\u001B[38;5;241m.\u001B[39marange(\u001B[38;5;241m0.0\u001B[39m, \u001B[38;5;241m1\u001B[39m, \u001B[38;5;241m0.01\u001B[39m)\n\u001B[1;32m 5\u001B[0m plt\u001B[38;5;241m.\u001B[39mplot(x, p, label\u001B[38;5;241m=\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mprecision\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n",
"\u001B[0;31m
Typ
eError\u001B[0m:
'float' object is not iterable
"
]
}
],
...
...
%% Cell type:code id: tags:
```
python
from
sentence_transformers
import
SentenceTransformer
from
models
import
Finbank
import
random
import
torch
import
numpy
as
np
from
property_matching
import
PropertyMatcher
from
tqdm.auto
import
tqdm
from
property_matching
import
most_common_pair
import
matplotlib.pyplot
as
plt
torch
.
manual_seed
(
0
)
random
.
seed
(
0
)
np
.
random
.
seed
(
0
)
```
%% Cell type:markdown id: tags:
Download embeddings in:
http://dl.turkunlp.org/finnish-embeddings/
%% Cell type:code id: tags:
```
python
wm
=
Finbank
(
'
/home/guilherme/Documents/kg/fin.bin
'
)
model
=
SentenceTransformer
(
'
sentence-transformers/all-MiniLM-L6-v2
'
)
property_matcher
=
PropertyMatcher
(
wm
,
model
)
```
%% Cell type:code id: tags:
```
python
results
=
property_matcher
.
match
(
'
/home/guilherme/Documents/kg/conference
'
,
'
/home/guilherme/Documents/kg/reference
'
,
th
=
0.65
)
```
%% Output
----------------------------------------------------------------------------------------------------
Conference.owl iasted.owl
Loading o1
Loading o2
1
ontology iterations: 5248, (0, 0.0, 0.0), aligns: 1, po1: 64, po2: 41
----------------------------------------------------------------------------------------------------
ekaw.owl iasted.owl
Loading o1
Loading o2
0
ontology iterations: 1804, (0
.0
, 0, 0.0), aligns: 0, po1: 22, po2: 41
ontology iterations: 1804, (0, 0, 0.0), aligns: 0, po1: 22, po2: 41
----------------------------------------------------------------------------------------------------
cmt.owl Conference.owl
Loading o1
Loading o2
3
ontology iterations: 7552, (0.
25
, 0.3333333333333333, 0.
28571428571428575
), aligns: 3, po1: 59, po2: 64
ontology iterations: 7552, (0.
3333333333333333
, 0.3333333333333333, 0.
3333333333333333
), aligns: 3, po1: 59, po2: 64
----------------------------------------------------------------------------------------------------
cmt.owl iasted.owl
Loading o1
Loading o2
0
ontology iterations: 4838, (0, 0, 0.0), aligns: 0, po1: 59, po2: 41
----------------------------------------------------------------------------------------------------
confOf.owl ekaw.owl
Loading o1
Loading o2
0
ontology iterations: 1584, (0, 0, 0.0), aligns: 0, po1: 36, po2: 22
----------------------------------------------------------------------------------------------------
edas.owl ekaw.owl
Loading o1
Loading o2
4
ontology iterations: 2200, (1.0, 0.5, 0.6666666666666666), aligns: 4, po1: 50, po2: 22
----------------------------------------------------------------------------------------------------
confOf.owl iasted.owl
Loading o1
Loading o2
0
ontology iterations: 2952, (0, 0, 0.0), aligns: 0, po1: 36, po2: 41
----------------------------------------------------------------------------------------------------
Conference.owl ekaw.owl
Loading o1
Loading o2
2
ontology iterations: 2816, (0, 0.0, 0.0), aligns: 2, po1: 64, po2: 22
----------------------------------------------------------------------------------------------------
cmt.owl ekaw.owl
Loading o1
Loading o2
3
ontology iterations: 2596, (1.0, 1.0, 1.0), aligns: 3, po1: 59, po2: 22
----------------------------------------------------------------------------------------------------
edas.owl iasted.owl
Loading o1
Loading o2
0
ontology iterations: 4100, (0, 0, 0.0), aligns: 0, po1: 50, po2: 41
----------------------------------------------------------------------------------------------------
edas.owl sigkdd.owl
Loading o1
Loading o2
4
ontology iterations: 2600, (0.6666666666666666, 0.5, 0.5714285714285715), aligns: 4, po1: 50, po2: 26
----------------------------------------------------------------------------------------------------
cmt.owl confOf.owl
Loading o1
Loading o2
6
ontology iterations: 4248, (1.0, 0.6666666666666666, 0.8), aligns: 6, po1: 59, po2: 36
----------------------------------------------------------------------------------------------------
confOf.owl sigkdd.owl
Loading o1
Loading o2
1
ontology iterations: 1872, (0.5, 1.0, 0.6666666666666666), aligns: 1, po1: 36, po2: 26
----------------------------------------------------------------------------------------------------
ekaw.owl sigkdd.owl
Loading o1
Loading o2
0
ontology iterations: 1144, (0, 0, 0.0), aligns: 0, po1: 22, po2: 26
----------------------------------------------------------------------------------------------------
cmt.owl sigkdd.owl
Loading o1
Loading o2
2
ontology iterations: 3068, (
0.6666666666666666
, 1.0,
0.8
), aligns: 2, po1: 59, po2: 26
ontology iterations: 3068, (
1.0
, 1.0,
1.0
), aligns: 2, po1: 59, po2: 26
----------------------------------------------------------------------------------------------------
Conference.owl edas.owl
Loading o1
Loading o2
3
ontology iterations: 6400, (
0.6666666666666666
, 0.6666666666666666, 0.
6666666666666666
), aligns: 3, po1: 64, po2: 50
ontology iterations: 6400, (
1.0
, 0.6666666666666666, 0.
8
), aligns: 3, po1: 64, po2: 50
----------------------------------------------------------------------------------------------------
cmt.owl edas.owl
Loading o1
Loading o2
5
ontology iterations: 5900, (0.5, 0.2, 0.28571428571428575), aligns: 5, po1: 59, po2: 50
----------------------------------------------------------------------------------------------------
iasted.owl sigkdd.owl
Loading o1
Loading o2
0
ontology iterations: 2132, (0, 0, 0.0), aligns: 0, po1: 41, po2: 26
----------------------------------------------------------------------------------------------------
Conference.owl sigkdd.owl
Loading o1
Loading o2
3
ontology iterations: 3328, (
0.5
, 0.3333333333333333, 0.
4
), aligns: 3, po1: 64, po2: 26
ontology iterations: 3328, (
1.0
, 0.3333333333333333, 0.
5
), aligns: 3, po1: 64, po2: 26
----------------------------------------------------------------------------------------------------
confOf.owl edas.owl
Loading o1
Loading o2
5
ontology iterations: 3600, (1.0, 0.6, 0.7499999999999999), aligns: 5, po1: 36, po2: 50
----------------------------------------------------------------------------------------------------
Conference.owl confOf.owl
Loading o1
Loading o2
4
ontology iterations: 4608, (1.0, 0.5, 0.6666666666666666), aligns: 4, po1: 64, po2: 36
iterations: 74590, (0.
685714285714285
7, 0.5217391304347826, 0.
5925925925925927
)
iterations: 74590, (0.
827586206896551
7, 0.5217391304347826, 0.
64
)
%% Cell type:code id: tags:
```
python
p
,
r
,
f
=
zip
(
*
results
)
x
=
np
.
arange
(
0.0
,
1
,
0.01
)
plt
.
plot
(
x
,
p
,
label
=
"
precision
"
)
plt
.
plot
(
x
,
r
,
label
=
"
recall
"
)
plt
.
plot
(
x
,
f
,
label
=
"
f-measure
"
)
# draw vertical line in the x position containing the threshold that have the max f-measure
plt
.
axvline
(
x
[
np
.
argmax
(
f
)],
color
=
'
black
'
,
linestyle
=
'
--
'
,
label
=
"
best threshold
"
)
plt
.
legend
()
plt
.
show
()
```
%% Output
---------------------------------------------------------------------------
Nam
eError Traceback (most recent call last)
Cell In[
1
], line 1
Typ
eError Traceback (most recent call last)
Cell In[
4
], line 1
----> 1 p, r, f = zip(
*
results)
3 x = np.arange(0.0, 1, 0.01)
5 plt.plot(x, p, label="precision")
Nam
eError:
name 'results' is not defined
Typ
eError:
'float' object is not iterable
%% Cell type:code id: tags:
```
python
results
=
property_matcher
.
match
(
'
/home/guilherme/Documents/kg/knowledge
'
,
'
/home/guilherme/Documents/kg/know-reference
'
,
th
=
0.969
,
process_strategy
=
most_common_pair
,
steps
=
1
,
disable_dr
=
True
)
```
%% Output
----------------------------------------------------------------------------------------------------
starwars.xml swtor.xml
Loading o1
Loading o2
56
ontology iterations: 256166, (0.4909090909090909, 0.9642857142857143, 0.6506024096385543), aligns: 56, po1: 698, po2: 367
----------------------------------------------------------------------------------------------------
memoryalpha.xml stexpand.xml
Loading o1
Loading o2
40
ontology iterations: 63112, (0.5342465753424658, 0.975, 0.6902654867256637), aligns: 40, po1: 322, po2: 196
----------------------------------------------------------------------------------------------------
starwars.xml swg.xml
Loading o1
Loading o2
20
ontology iterations: 102606, (0.4444444444444444, 1.0, 0.6153846153846153), aligns: 20, po1: 698, po2: 147
----------------------------------------------------------------------------------------------------
mcu.xml marvel.xml
Loading o1
Loading o2
11
ontology iterations: 19865, (0.6470588235294118, 1.0, 0.7857142857142858), aligns: 11, po1: 145, po2: 137
----------------------------------------------------------------------------------------------------
memoryalpha.xml memorybeta.xml
Loading o1
Loading o2
53
ontology iterations: 133630, (0.5104166666666666, 0.9245283018867925, 0.6577181208053692), aligns: 53, po1: 322, po2: 415
iterations: 575379, (0.5073313782991202, 0.9611111111111111, 0.6641074856046065)
%% Cell type:code id: tags:
```
python
p
,
r
,
f
=
zip
(
*
results
)
x
=
np
.
arange
(
0.1
,
1
,
0.01
)
plt
.
plot
(
x
,
p
,
label
=
"
precision
"
)
plt
.
plot
(
x
,
r
,
label
=
"
recall
"
)
plt
.
plot
(
x
,
f
,
label
=
"
f-measure
"
)
# draw vertical line in the x position containing the threshold that have the max f-measure
plt
.
axvline
(
x
[
np
.
argmax
(
f
)],
color
=
'
black
'
,
linestyle
=
'
--
'
,
label
=
"
best threshold
"
)
print
(
x
[
np
.
argmax
(
f
)])
plt
.
legend
()
plt
.
show
()
```
%% Output
0.9699999999999995
%% Cell type:code id: tags:
```
python
results
=
property_matcher
.
match
(
'
/home/guilherme/Documents/kg/knowledge
'
,
'
/home/guilherme/Documents/kg/know-reference
'
,
th
=
0.1
,
process_strategy
=
most_common_pair
,
steps
=
1
,
sim_weights
=
[
0
,
1
],
tr
=
[
0.969
])
```
%% Output
----------------------------------------------------------------------------------------------------
starwars.xml swtor.xml
Loading o1
Loading o2
56
ontology iterations: 256166, (0.4074074074074074, 0.19642857142857142, 0.2650602409638554), aligns: 56, po1: 698, po2: 367
----------------------------------------------------------------------------------------------------
memoryalpha.xml stexpand.xml
Loading o1
Loading o2
40
ontology iterations: 63112, (0.7307692307692307, 0.475, 0.5757575757575758), aligns: 40, po1: 322, po2: 196
----------------------------------------------------------------------------------------------------
starwars.xml swg.xml
Loading o1
Loading o2
20
ontology iterations: 102606, (0.5625, 0.45, 0.5), aligns: 20, po1: 698, po2: 147
----------------------------------------------------------------------------------------------------
mcu.xml marvel.xml
Loading o1
Loading o2
11
ontology iterations: 19865, (0.0, 0.0, 0.0), aligns: 11, po1: 145, po2: 137
----------------------------------------------------------------------------------------------------
memoryalpha.xml memorybeta.xml
Loading o1
Loading o2
53
ontology iterations: 133630, (0.6296296296296297, 0.32075471698113206, 0.425), aligns: 53, po1: 322, po2: 415
iterations: 575379, (0.13930348258706468, 0.4666666666666667, 0.21455938697318008)
%% Cell type:code id: tags:
```
python
p
,
r
,
f
=
zip
(
*
results
)
x
=
np
.
arange
(
0.1
,
1
,
0.01
)
plt
.
plot
(
x
,
p
,
label
=
"
precision
"
)
plt
.
plot
(
x
,
r
,
label
=
"
recall
"
)
plt
.
plot
(
x
,
f
,
label
=
"
f-measure
"
)
# draw vertical line in the x position containing the threshold that have the max f-measure
plt
.
axvline
(
x
[
np
.
argmax
(
f
)],
color
=
'
black
'
,
linestyle
=
'
--
'
,
label
=
"
best threshold
"
)
print
(
x
[
np
.
argmax
(
f
)])
plt
.
legend
()
plt
.
show
()
```
%% Cell type:code id: tags:
```
python
results
=
property_matcher
.
match
(
'
/home/guilherme/Documents/kg/knowledge
'
,
'
/home/guilherme/Documents/kg/know-reference
'
,
th
=
0.979
,
process_strategy
=
most_common_pair
,
steps
=
1
,
sim_weights
=
[
1
,
2
])
```
%% Output
----------------------------------------------------------------------------------------------------
starwars.xml swtor.xml
Loading o1
Loading o2
56
ontology iterations: 256166, (0.4594594594594595, 0.30357142857142855, 0.3655913978494624), aligns: 56, po1: 698, po2: 367
----------------------------------------------------------------------------------------------------
memoryalpha.xml stexpand.xml
Loading o1
Loading o2
40
ontology iterations: 63112, (0.5, 0.25, 0.3333333333333333), aligns: 40, po1: 322, po2: 196
----------------------------------------------------------------------------------------------------
starwars.xml swg.xml
Loading o1
Loading o2
20
ontology iterations: 102606, (0.6470588235294118, 0.55, 0.5945945945945946), aligns: 20, po1: 698, po2: 147
----------------------------------------------------------------------------------------------------
mcu.xml marvel.xml
Loading o1
Loading o2
11
ontology iterations: 19865, (0.7142857142857143, 0.45454545454545453, 0.5555555555555556), aligns: 11, po1: 145, po2: 137
----------------------------------------------------------------------------------------------------
memoryalpha.xml memorybeta.xml
Loading o1
Loading o2
53
ontology iterations: 133630, (0.4, 0.22641509433962265, 0.2891566265060241), aligns: 53, po1: 322, po2: 415
iterations: 575379, (0.4954954954954955, 0.3055555555555556, 0.37800687285223367)
%% Cell type:code id: tags:
```
python
p
,
r
,
f
=
zip
(
*
results
)
x
=
np
.
arange
(
0.1
,
1
,
0.01
)
plt
.
plot
(
x
,
p
,
label
=
"
precision
"
)
plt
.
plot
(
x
,
r
,
label
=
"
recall
"
)
plt
.
plot
(
x
,
f
,
label
=
"
f-measure
"
)
# draw vertical line in the x position containing the threshold that have the max f-measure
plt
.
axvline
(
x
[
np
.
argmax
(
f
)],
color
=
'
black
'
,
linestyle
=
'
--
'
,
label
=
"
best threshold
"
)
print
(
x
[
np
.
argmax
(
f
)])
plt
.
legend
()
plt
.
show
()
```
%% Output
0.9799999999999995
%% Cell type:code id: tags:
```
python
results
=
property_matcher
.
match
(
'
/home/guilherme/Documents/kg/knowledge
'
,
'
/home/guilherme/Documents/kg/know-reference
'
,
th
=
0.569
,
process_strategy
=
most_common_pair
,
steps
=
1
,
sim_weights
=
[
0
,
1
,
2
])
```
%% Output
----------------------------------------------------------------------------------------------------
starwars.xml swtor.xml
Loading o1
Loading o2
56
ontology iterations: 256166, (0.1834862385321101, 0.35714285714285715, 0.2424242424242424), aligns: 56, po1: 698, po2: 367
----------------------------------------------------------------------------------------------------
memoryalpha.xml stexpand.xml
Loading o1
Loading o2
40
ontology iterations: 63112, (0.15384615384615385, 0.25, 0.1904761904761905), aligns: 40, po1: 322, po2: 196
----------------------------------------------------------------------------------------------------
starwars.xml swg.xml
Loading o1
Loading o2
20
ontology iterations: 102606, (0.1276595744680851, 0.3, 0.17910447761194026), aligns: 20, po1: 698, po2: 147
----------------------------------------------------------------------------------------------------
mcu.xml marvel.xml
Loading o1
Loading o2
11
ontology iterations: 19865, (0.08333333333333333, 0.18181818181818182, 0.1142857142857143), aligns: 11, po1: 145, po2: 137
----------------------------------------------------------------------------------------------------
memoryalpha.xml memorybeta.xml
Loading o1
Loading o2
53
ontology iterations: 133630, (0.16666666666666666, 0.3584905660377358, 0.2275449101796407), aligns: 53, po1: 322, po2: 415
iterations: 575379, (0.15877437325905291, 0.31666666666666665, 0.21150278293135436)
%% Cell type:code id: tags:
```
python
p
,
r
,
f
=
zip
(
*
results
)
x
=
np
.
arange
(
0.1
,
1
,
0.01
)
plt
.
plot
(
x
,
p
,
label
=
"
precision
"
)
plt
.
plot
(
x
,
r
,
label
=
"
recall
"
)
plt
.
plot
(
x
,
f
,
label
=
"
f-measure
"
)
# draw vertical line in the x position containing the threshold that have the max f-measure
plt
.
axvline
(
x
[
np
.
argmax
(
f
)],
color
=
'
black
'
,
linestyle
=
'
--
'
,
label
=
"
best threshold
"
)
print
(
x
[
np
.
argmax
(
f
)])
plt
.
legend
()
plt
.
show
()
```
%% Output
0.5699999999999997
%% Cell type:code id: tags:
```
python
```
...
...
This diff is collapsed.
Click to expand it.
main.py
0 → 100644
+
98
−
0
View file @
dc23cbec
from
sentence_transformers
import
SentenceTransformer
from
models
import
Finbank
import
random
import
torch
import
numpy
as
np
from
property_matching
import
PropertyMatcher
from
tqdm.auto
import
tqdm
from
property_matching
import
most_common_pair
import
matplotlib.pyplot
as
plt
import
argparse
import
rdflib
import
tempfile
from
urllib
import
parse
,
request
from
om.ont
import
get_namespace
def
parse_arguments
():
arg_parser
=
argparse
.
ArgumentParser
(
description
=
'
LD similarity.
'
)
arg_parser
.
add_argument
(
'
source
'
,
help
=
'
Source ontology path.
'
)
arg_parser
.
add_argument
(
'
target
'
,
help
=
'
Target ontology path.
'
)
arg_parser
.
add_argument
(
'
--output
'
,
dest
=
'
output
'
,
default
=
'
./output
'
,
help
=
'
Folder to save the results.
'
)
arg_parser
.
add_argument
(
'
--format
'
,
dest
=
'
format
'
,
default
=
'
align
'
,
choices
=
[
'
align
'
,
'
sssom
'
],
help
=
'
Output format.
'
)
return
arg_parser
.
parse_args
()
def
toAlignFormat
(
aligns
,
onto1
,
onto2
,
location1
,
location2
):
data
=
[
"""
<?xml version=
'
1.0
'
encoding=
'
utf-8
'
standalone=
'
no
'
?>
<rdf:RDF xmlns=
'
http://knowledgeweb.semanticweb.org/heterogeneity/alignment#
'
xmlns:rdf=
'
http://www.w3.org/1999/02/22-rdf-syntax-ns#
'
xmlns:xsd=
'
http://www.w3.org/2001/XMLSchema#
'
xmlns:align=
'
http://knowledgeweb.semanticweb.org/heterogeneity/alignment#
'
>
"""
]
data
.
append
(
f
"""
<Alignment>
<xml>yes</xml>
<level>0</level>
<type>**</type>
<onto1>
<Ontology rdf:about=
"
{
onto1
}
"
>
<location>
{
location1
}
</location>
</Ontology>
</onto1>
<onto2>
<Ontology rdf:about=
"
{
onto2
}
"
>
<location>
{
location2
}
</location>
</Ontology>
</onto2>
"""
)
for
(
entity1
,
entity2
),
confidence
in
aligns
.
items
():
data
.
append
(
f
"""
<map>
<Cell>
<entity1 rdf:resource=
"
{
entity1
}
"
/>
<entity2 rdf:resource=
"
{
entity2
}
"
/>
<relation>=</relation>
<measure rdf:datatype=
"
http://www.w3.org/2001/XMLSchema#float
"
>
{
confidence
}
</measure>
</Cell>
</map>
"""
)
data
.
append
(
"""
</Alignment>
</rdf:RDF>
"""
)
return
'
\n
'
.
join
(
data
)
def
ssom
(
aligns
):
lines
=
[
'
subject_id
\t
predicate_id
\t
object_id
\t
mapping_justification
\t
confidence
'
]
for
(
entity1
,
entity2
),
confidence
in
aligns
.
items
():
lines
.
append
(
f
"
{
entity1
}
\t
skos:exactMatch
\t
{
entity2
}
\t
semapv:LexicalMatching
\t
{
confidence
}
"
)
return
"
\n
"
.
join
(
lines
)
if
__name__
==
'
__main__
'
:
args
=
parse_arguments
()
wm
=
Finbank
(
'
/home/guilherme/Documents/kg/fin.bin
'
)
model
=
SentenceTransformer
(
'
sentence-transformers/all-MiniLM-L6-v2
'
)
property_matcher
=
PropertyMatcher
(
wm
,
model
)
o1
=
rdflib
.
Graph
().
parse
(
args
.
source
)
o2
=
rdflib
.
Graph
().
parse
(
args
.
target
)
p
,
it
=
property_matcher
.
match_ontologies
(
o1
,
o2
,
0.65
)
# Parser
if
args
.
format
==
'
sssom
'
:
result
=
ssom
(
p
)
suffix
=
'
.tsv
'
else
:
result
=
toAlignFormat
(
p
,
get_namespace
(
o1
),
get_namespace
(
o2
),
args
.
source
,
args
.
target
)
suffix
=
'
.rdf
'
with
tempfile
.
NamedTemporaryFile
(
'
w
'
,
prefix
=
'
alignment_
'
,
suffix
=
suffix
,
delete
=
False
)
as
out_file
:
out_file
.
write
(
result
)
print
(
parse
.
urljoin
(
"
file:
"
,
request
.
pathname2url
(
out_file
.
name
)))
This diff is collapsed.
Click to expand it.
property_matching.py
+
6
−
10
View file @
dc23cbec
...
...
@@ -14,6 +14,7 @@ from collections import Counter
from
tqdm.auto
import
tqdm
import
math
def
get_type_h
(
e
,
g
,
ml
=
1
):
if
type
(
e
)
is
Literal
:
return
[
e
.
datatype
]
...
...
@@ -283,9 +284,6 @@ def get_prop(e, g, p):
return
s
,
objc
def
build_tf_models
(
o1
,
o2
):
a_entities
=
set
(
filter
(
lambda
x
:
is_property
(
x
,
o1
),
o1
.
subjects
()))
b_entities
=
set
(
filter
(
lambda
x
:
is_property
(
x
,
o2
),
o2
.
subjects
()))
...
...
@@ -380,7 +378,6 @@ class PropertyMatcher:
sim
=
0
label_confidence
=
sim
if
sim_weights
:
conf
=
[]
if
0
in
sim_weights
:
...
...
@@ -403,7 +400,6 @@ class PropertyMatcher:
if
tr
is
not
None
:
trm
=
[[
0
,
0
]
for
_
in
tr
]
for
r
,
k1
,
k2
in
tqdm
(
list
(
onts
(
base
,
ref
))):
print
(
'
-
'
*
100
)
...
...
@@ -457,7 +453,8 @@ class PropertyMatcher:
trm
[
i
][
0
]
+=
len
(
pa
.
intersection
(
cp
))
trm
[
i
][
1
]
+=
len
(
cp
)
print
(
f
'
ontology iterations:
{
oi
}
,
{
metrics
(
len
(
pa
.
intersection
(
cp
)),
len
(
cp
),
current_total
)
}
, aligns:
{
current_total
}
, po1:
{
len
(
a_entities
)
}
, po2:
{
len
(
b_entities
)
}
'
)
print
(
f
'
ontology iterations:
{
oi
}
,
{
metrics
(
len
(
pa
.
intersection
(
cp
)),
len
(
cp
),
current_total
)
}
, aligns:
{
current_total
}
, po1:
{
len
(
a_entities
)
}
, po2:
{
len
(
b_entities
)
}
'
)
# for a1, a2 in pa.intersection(p):
# print(colored('✓', 'green'), get_n(a1, o1), get_n(a2, o2))
...
...
@@ -471,8 +468,9 @@ class PropertyMatcher:
# print(colored('X', 'red'), get_n(d1, o1), get_n(a1, o1), get_n(r1, o1), colored('<>', 'green'),
# get_n(d2, o2), get_n(a2, o2), get_n(r2, o2))
# print(
# f'ontology iterations: {oi}, {metrics(current_correct, current_pred, current_total)}, aligns: {current_total}, po1: {len(a_entities)}, po2: {len(b_entities)}')
print
(
f
'
ontology iterations:
{
oi
}
,
{
metrics
(
current_correct
,
current_pred
,
current_total
)
}
, aligns:
{
current_total
}
, po1:
{
len
(
a_entities
)
}
, po2:
{
len
(
b_entities
)
}
'
)
print
(
f
'
iterations:
{
iterations
}
,
{
metrics
(
correct
,
pred
,
total
)
}
'
)
if
tr
is
not
None
:
res
=
[]
...
...
@@ -540,6 +538,4 @@ class PropertyMatcher:
pm
[
iv1
]
=
(
iv2
,
sim
)
pm
[
iv2
]
=
(
iv1
,
sim
)
return
p
,
iterations
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment