Skip to content
Snippets Groups Projects
Commit 625022cf authored by Guilherme Henrique's avatar Guilherme Henrique
Browse files

added readme

parent dc23cbec
No related branches found
No related tags found
No related merge requests found
__pycache__
.git
.idea
Dockerfile
\ No newline at end of file
......@@ -2,4 +2,6 @@ venv
fin_cache
.idea
__pycache__
.ipynb_checkpoints
\ No newline at end of file
.ipynb_checkpoints
fin.bin
propmatch.tar.gz
\ No newline at end of file
FROM ubuntu:latest
ARG DEBIAN_FRONTEND=noninteractive
RUN apt-get update
RUN apt install software-properties-common -y
RUN add-apt-repository ppa:deadsnakes/ppa
RUN apt-get install -y python3.8
RUN apt-get install -y python3-pip
RUN apt-get install -y python3.8-dev
RUN apt-get install -y python3.8-distutils
RUN apt-get install -y git
WORKDIR /app
COPY . .
RUN python3.8 -m pip install -r requirements.txt
CMD ["python3.8", "-m", "uvicorn", "--workers", "1", "--port", "8080", "--host", "0.0.0.0", "main:app"]
EXPOSE 8080
# PropMatch: Property Matcher
PropMatch is a Python 3.9-based ontology property matching system designed to find better alignment of properties across different ontologies. This system uses lexical matching methods and alignment extension combined with different embeddings to increase the amount of correspondences found between properties.
## Download
A packaged version of PropMatch is available for download [here](https://drive.google.com/file/d/1UShYKSO8fle-VWC4o1YZ2xxsgVVyELZ4/view?usp=drive_link). It follows the MELT Web API protocol packaged in a Docker container.
## Development
PropMatch was tested on Python 3.9. To run PropMatch you also need to download the Finnish word embeddings from
http://dl.turkunlp.org/finnish-embeddings/finnish_4B_parsebank_skgram.bin.
To install the required dependencies, run:
```bash
pip install -r requirements.txt
```
## Contributing
Contributions to PropMatch are welcome! If you encounter issues or have suggestions for improvements, please feel free to open an issue or submit a pull request in the [PropMatch GitHub repository](https://github.com/guihcs/propalign).
## License
PropMatch is released under the [MIT License](https://opensource.org/licenses/MIT).
---
For inquiries and support, contact us at Guilherme.Santos-Sousa@irit.fr.
\ No newline at end of file
import sys
from sentence_transformers import SentenceTransformer
from models import Finbank
import random
import torch
import numpy as np
from property_matching import PropertyMatcher
from tqdm.auto import tqdm
from property_matching import most_common_pair
import matplotlib.pyplot as plt
import os
import requests
import argparse
import rdflib
import tempfile
from urllib import parse, request
from om.ont import get_namespace
import json
from typing import Union
import re
from fastapi.middleware.cors import CORSMiddleware
from fastapi import FastAPI, Form, Response, UploadFile, File
from fastapi.responses import PlainTextResponse, Response
from typing_extensions import Annotated
def parse_arguments():
arg_parser = argparse.ArgumentParser(description='LD similarity.')
app = FastAPI()
arg_parser.add_argument('source', help='Source ontology path.')
arg_parser.add_argument('target', help='Target ontology path.')
arg_parser.add_argument('--output', dest='output', default='./output', help='Folder to save the results.')
arg_parser.add_argument('--format', dest='format', default='align', choices=['align', 'sssom'], help='Output format.')
app.add_middleware(
CORSMiddleware,
allow_origins='*',
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
return arg_parser.parse_args()
wm = Finbank('./fin.bin')
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
property_matcher = PropertyMatcher(wm, model)
def toAlignFormat(aligns, onto1, onto2, location1, location2):
......@@ -62,6 +71,7 @@ def toAlignFormat(aligns, onto1, onto2, location1, location2):
return '\n'.join(data)
def ssom(aligns):
lines = ['subject_id\tpredicate_id\tobject_id\tmapping_justification\tconfidence']
for (entity1, entity2), confidence in aligns.items():
......@@ -69,30 +79,45 @@ def ssom(aligns):
return "\n".join(lines)
if __name__ == '__main__':
args = parse_arguments()
wm = Finbank('/home/guilherme/Documents/kg/fin.bin')
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
property_matcher = PropertyMatcher(wm, model)
o1 = rdflib.Graph().parse(args.source)
o2 = rdflib.Graph().parse(args.target)
@app.post('/match')
async def match(source: Union[str, UploadFile] = Form(...),
target: Union[str, UploadFile] = Form(...),
inputAlignment: Annotated[Union[str, None], Form()] = None,
parameters: Annotated[Union[str, None], Form()] = None):
outputFile = type(source) != str
p, it = property_matcher.match_ontologies(o1, o2, 0.65)
if type(source) == str:
o1 = rdflib.Graph().parse(source)
o2 = rdflib.Graph().parse(target)
else:
o1 = rdflib.Graph().parse(source.file, format=re.split(r'\W', source.content_type)[-1])
o2 = rdflib.Graph().parse(target.file, format=re.split(r'\W', target.content_type)[-1])
params = {}
# Parser
if parameters is not None:
with open(parameters) as f:
params = json.load(f)
p, it = property_matcher.match_ontologies(o1, o2, 0.65,
sim_weights=params['sim_weights'] if 'sim_weights' in params else None)
if args.format == 'sssom':
if 'format' in params and params['format'] == 'sssom':
result = ssom(p)
suffix = '.tsv'
else:
result = toAlignFormat(p, get_namespace(o1), get_namespace(o2), args.source, args.target)
if outputFile:
source = source.filename
target = target.filename
result = toAlignFormat(p, get_namespace(o1), get_namespace(o2), source, target)
suffix = '.rdf'
with tempfile.NamedTemporaryFile('w', prefix='alignment_', suffix=suffix, delete=False) as out_file:
out_file.write(result)
if outputFile:
return Response(result, media_type='application/rdf+xml')
else:
with tempfile.NamedTemporaryFile('w', prefix='alignment_', suffix=suffix, delete=False) as out_file:
out_file.write(result)
print(parse.urljoin("file:", request.pathname2url(out_file.name)))
return PlainTextResponse(out_file.name)
......@@ -29,7 +29,6 @@ class Finbank:
self.ep = ep
if not os.path.exists('./fin_cache'):
print('Embeddings cache not found. Building...')
os.mkdir('./fin_cache')
with open('./fin_cache/fbk.txt', 'w') as fbk:
......
import nltk
nltk.download('averaged_perceptron_tagger')
def get_core_concept(e1):
t1 = nltk.pos_tag(e1)
......
......@@ -11,4 +11,7 @@ rdflib
termcolor
py-stringmatching
scikit-learn
jupyter
\ No newline at end of file
jupyter
fastapi
python-multipart
uvicorn[standard]
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment