added readme

625022cf · Guilherme Henrique · dc23cbec · 625022cf · 625022cf · 625022cf
Commit 625022cf authored 1 year ago by Guilherme Henrique
--- a/.dockerignore
+++ b/.dockerignore
+__pycache__
+.git
+.idea
+Dockerfile
\ No newline at end of file
--- a/.gitignore
+++ b/.gitignore
@@ -2,4 +2,6 @@ venv
 fin_cache
 .idea
 __pycache__
-.ipynb_checkpoints
\ No newline at end of file
+.ipynb_checkpoints
+fin.bin
+propmatch.tar.gz
\ No newline at end of file
--- a/Dockerfile
+++ b/Dockerfile
+FROM ubuntu:latest
+
+ARG DEBIAN_FRONTEND=noninteractive
+
+RUN apt-get update
+RUN apt install software-properties-common -y
+RUN add-apt-repository ppa:deadsnakes/ppa
+
+RUN apt-get install -y python3.8
+RUN apt-get install -y python3-pip
+RUN apt-get install -y python3.8-dev
+RUN apt-get install -y python3.8-distutils
+
+RUN apt-get install -y git
+
+WORKDIR /app
+
+COPY . .
+RUN python3.8 -m pip install -r requirements.txt
+
+CMD ["python3.8", "-m", "uvicorn", "--workers", "1", "--port", "8080", "--host", "0.0.0.0", "main:app"]
+
+EXPOSE 8080
+
+
--- a/README.md
+++ b/README.md
+# PropMatch: Property Matcher
+
+PropMatch is a Python 3.9-based ontology property matching system designed to find better alignment of properties across different ontologies. This system uses lexical matching methods and alignment extension combined with different embeddings to increase the amount of correspondences found between properties.
+
+## Download
+
+A packaged version of PropMatch is available for download [here](https://drive.google.com/file/d/1UShYKSO8fle-VWC4o1YZ2xxsgVVyELZ4/view?usp=drive_link). It follows the MELT Web API protocol packaged in a Docker container.
+
+## Development
+
+PropMatch was tested on Python 3.9. To run PropMatch you also need to download the Finnish word embeddings from
+http://dl.turkunlp.org/finnish-embeddings/finnish_4B_parsebank_skgram.bin.
+
+To install the required dependencies, run:
+
+```bash
+pip install -r requirements.txt
+```
+
+## Contributing
+
+Contributions to PropMatch are welcome! If you encounter issues or have suggestions for improvements, please feel free to open an issue or submit a pull request in the [PropMatch GitHub repository](https://github.com/guihcs/propalign).
+
+## License
+
+PropMatch is released under the [MIT License](https://opensource.org/licenses/MIT).
+
+---
+
+For inquiries and support, contact us at Guilherme.Santos-Sousa@irit.fr.
\ No newline at end of file
--- a/main.py
+++ b/main.py
+import sys
+
 from sentence_transformers import SentenceTransformer
 from models import Finbank
-import random
-import torch
-import numpy as np
 from property_matching import PropertyMatcher
-from tqdm.auto import tqdm
-from property_matching import most_common_pair
-import matplotlib.pyplot as plt
+import os
+import requests
 import argparse
 import rdflib
 import tempfile
 from urllib import parse, request
 from om.ont import get_namespace
+import json
+from typing import Union
+import re

+from fastapi.middleware.cors import CORSMiddleware
+from fastapi import FastAPI, Form, Response, UploadFile, File
+from fastapi.responses import PlainTextResponse, Response
+from typing_extensions import Annotated

-def parse_arguments():
-    arg_parser = argparse.ArgumentParser(description='LD similarity.')
+app = FastAPI()

-    arg_parser.add_argument('source', help='Source ontology path.')
-    arg_parser.add_argument('target', help='Target ontology path.')
-    arg_parser.add_argument('--output', dest='output', default='./output', help='Folder to save the results.')
-    arg_parser.add_argument('--format', dest='format', default='align', choices=['align', 'sssom'], help='Output format.')
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins='*',
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)

-    return arg_parser.parse_args()
+wm = Finbank('./fin.bin')
+model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
+property_matcher = PropertyMatcher(wm, model)


 def toAlignFormat(aligns, onto1, onto2, location1, location2):
@@ -62,6 +71,7 @@ def toAlignFormat(aligns, onto1, onto2, location1, location2):

    return '\n'.join(data)

+
 def ssom(aligns):
    lines = ['subject_id\tpredicate_id\tobject_id\tmapping_justification\tconfidence']
    for (entity1, entity2), confidence in aligns.items():
@@ -69,30 +79,45 @@ def ssom(aligns):

    return "\n".join(lines)

-if __name__ == '__main__':
-    args = parse_arguments()
-    wm = Finbank('/home/guilherme/Documents/kg/fin.bin')
-    model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
-    property_matcher = PropertyMatcher(wm, model)

-    o1 = rdflib.Graph().parse(args.source)
-    o2 = rdflib.Graph().parse(args.target)
+@app.post('/match')
+async def match(source: Union[str, UploadFile] = Form(...),
+                target: Union[str, UploadFile] = Form(...),
+                inputAlignment: Annotated[Union[str, None], Form()] = None,
+                parameters: Annotated[Union[str, None], Form()] = None):
+    outputFile = type(source) != str

-    p, it = property_matcher.match_ontologies(o1, o2, 0.65)
+    if type(source) == str:
+        o1 = rdflib.Graph().parse(source)
+        o2 = rdflib.Graph().parse(target)
+    else:

+        o1 = rdflib.Graph().parse(source.file, format=re.split(r'\W', source.content_type)[-1])
+        o2 = rdflib.Graph().parse(target.file, format=re.split(r'\W', target.content_type)[-1])

+    params = {}

-    # Parser
+    if parameters is not None:
+        with open(parameters) as f:
+            params = json.load(f)

+    p, it = property_matcher.match_ontologies(o1, o2, 0.65,
+                                              sim_weights=params['sim_weights'] if 'sim_weights' in params else None)

-    if args.format == 'sssom':
+    if 'format' in params and params['format'] == 'sssom':
        result = ssom(p)
        suffix = '.tsv'
    else:
-        result = toAlignFormat(p, get_namespace(o1), get_namespace(o2), args.source, args.target)
+        if outputFile:
+            source = source.filename
+            target = target.filename
+        result = toAlignFormat(p, get_namespace(o1), get_namespace(o2), source, target)
        suffix = '.rdf'

-    with tempfile.NamedTemporaryFile('w', prefix='alignment_', suffix=suffix, delete=False) as out_file:
-        out_file.write(result)
+    if outputFile:
+        return Response(result, media_type='application/rdf+xml')
+    else:
+        with tempfile.NamedTemporaryFile('w', prefix='alignment_', suffix=suffix, delete=False) as out_file:
+            out_file.write(result)

-        print(parse.urljoin("file:", request.pathname2url(out_file.name)))
+        return PlainTextResponse(out_file.name)
--- a/models.py
+++ b/models.py
@@ -29,7 +29,6 @@ class Finbank:
        self.ep = ep

        if not os.path.exists('./fin_cache'):
-            print('Embeddings cache not found. Building...')
            os.mkdir('./fin_cache')

            with open('./fin_cache/fbk.txt', 'w') as fbk:

--- a/nlp.py
+++ b/nlp.py
 import nltk

+nltk.download('averaged_perceptron_tagger')

 def get_core_concept(e1):
    t1 = nltk.pos_tag(e1)

--- a/requirements.txt
+++ b/requirements.txt
@@ -11,4 +11,7 @@ rdflib
 termcolor
 py-stringmatching
 scikit-learn
-jupyter
\ No newline at end of file
+jupyter
+fastapi
+python-multipart
+uvicorn[standard]