From 625022cf0312f4074fa27a4ce6cb47ba96f8ccba Mon Sep 17 00:00:00 2001
From: Guilherme Henrique <guihss.cs@gmail.com>
Date: Mon, 28 Aug 2023 11:51:02 +0200
Subject: [PATCH] added readme

---
 .dockerignore    |  4 +++
 .gitignore       |  4 ++-
 Dockerfile       | 25 +++++++++++++++
 README.md        | 30 ++++++++++++++++++
 main.py          | 79 +++++++++++++++++++++++++++++++-----------------
 models.py        |  1 -
 nlp.py           |  1 +
 requirements.txt |  5 ++-
 8 files changed, 119 insertions(+), 30 deletions(-)
 create mode 100644 .dockerignore
 create mode 100644 Dockerfile
 create mode 100644 README.md

diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..8af27fb
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,4 @@
+__pycache__
+.git
+.idea
+Dockerfile
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index 3a5c4f7..0e59832 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,4 +2,6 @@ venv
 fin_cache
 .idea
 __pycache__
-.ipynb_checkpoints
\ No newline at end of file
+.ipynb_checkpoints
+fin.bin
+propmatch.tar.gz
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..b405f0e
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,25 @@
+FROM ubuntu:latest
+
+ARG DEBIAN_FRONTEND=noninteractive
+
+RUN apt-get update
+RUN apt install software-properties-common -y
+RUN add-apt-repository ppa:deadsnakes/ppa
+
+RUN apt-get install -y python3.8
+RUN apt-get install -y python3-pip
+RUN apt-get install -y python3.8-dev
+RUN apt-get install -y python3.8-distutils
+
+RUN apt-get install -y git
+
+WORKDIR /app
+
+COPY . .
+RUN python3.8 -m pip install -r requirements.txt
+
+CMD ["python3.8", "-m", "uvicorn", "--workers", "1", "--port", "8080", "--host", "0.0.0.0", "main:app"]
+
+EXPOSE 8080
+
+
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..b6a9220
--- /dev/null
+++ b/README.md
@@ -0,0 +1,30 @@
+# PropMatch: Property Matcher
+
+PropMatch is a Python 3.9-based ontology property matching system designed to find better alignment of properties across different ontologies. This system uses lexical matching methods and alignment extension combined with different embeddings to increase the amount of correspondences found between properties.
+
+## Download
+
+A packaged version of PropMatch is available for download [here](https://drive.google.com/file/d/1UShYKSO8fle-VWC4o1YZ2xxsgVVyELZ4/view?usp=drive_link). It follows the MELT Web API protocol packaged in a Docker container.
+
+## Development
+
+PropMatch was tested on Python 3.9. To run PropMatch you also need to download the Finnish word embeddings from
+http://dl.turkunlp.org/finnish-embeddings/finnish_4B_parsebank_skgram.bin.
+
+To install the required dependencies, run:
+
+```bash
+pip install -r requirements.txt
+```
+
+## Contributing
+
+Contributions to PropMatch are welcome! If you encounter issues or have suggestions for improvements, please feel free to open an issue or submit a pull request in the [PropMatch GitHub repository](https://github.com/guihcs/propalign).
+
+## License
+
+PropMatch is released under the [MIT License](https://opensource.org/licenses/MIT).
+
+---
+
+For inquiries and support, contact us at Guilherme.Santos-Sousa@irit.fr.
\ No newline at end of file
diff --git a/main.py b/main.py
index 9c84b8e..8c48c88 100644
--- a/main.py
+++ b/main.py
@@ -1,28 +1,37 @@
+import sys
+
 from sentence_transformers import SentenceTransformer
 from models import Finbank
-import random
-import torch
-import numpy as np
 from property_matching import PropertyMatcher
-from tqdm.auto import tqdm
-from property_matching import most_common_pair
-import matplotlib.pyplot as plt
+import os
+import requests
 import argparse
 import rdflib
 import tempfile
 from urllib import parse, request
 from om.ont import get_namespace
+import json
+from typing import Union
+import re
 
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi import FastAPI, Form, Response, UploadFile, File
+from fastapi.responses import PlainTextResponse, Response
+from typing_extensions import Annotated
 
-def parse_arguments():
-    arg_parser = argparse.ArgumentParser(description='LD similarity.')
+app = FastAPI()
 
-    arg_parser.add_argument('source', help='Source ontology path.')
-    arg_parser.add_argument('target', help='Target ontology path.')
-    arg_parser.add_argument('--output', dest='output', default='./output', help='Folder to save the results.')
-    arg_parser.add_argument('--format', dest='format', default='align', choices=['align', 'sssom'], help='Output format.')
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins='*',
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
 
-    return arg_parser.parse_args()
+wm = Finbank('./fin.bin')
+model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
+property_matcher = PropertyMatcher(wm, model)
 
 
 def toAlignFormat(aligns, onto1, onto2, location1, location2):
@@ -62,6 +71,7 @@ def toAlignFormat(aligns, onto1, onto2, location1, location2):
 
     return '\n'.join(data)
 
+
 def ssom(aligns):
     lines = ['subject_id\tpredicate_id\tobject_id\tmapping_justification\tconfidence']
     for (entity1, entity2), confidence in aligns.items():
@@ -69,30 +79,45 @@ def ssom(aligns):
 
     return "\n".join(lines)
 
-if __name__ == '__main__':
-    args = parse_arguments()
-    wm = Finbank('/home/guilherme/Documents/kg/fin.bin')
-    model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
-    property_matcher = PropertyMatcher(wm, model)
 
-    o1 = rdflib.Graph().parse(args.source)
-    o2 = rdflib.Graph().parse(args.target)
+@app.post('/match')
+async def match(source: Union[str, UploadFile] = Form(...),
+                target: Union[str, UploadFile] = Form(...),
+                inputAlignment: Annotated[Union[str, None], Form()] = None,
+                parameters: Annotated[Union[str, None], Form()] = None):
+    outputFile = type(source) != str
 
-    p, it = property_matcher.match_ontologies(o1, o2, 0.65)
+    if type(source) == str:
+        o1 = rdflib.Graph().parse(source)
+        o2 = rdflib.Graph().parse(target)
+    else:
 
+        o1 = rdflib.Graph().parse(source.file, format=re.split(r'\W', source.content_type)[-1])
+        o2 = rdflib.Graph().parse(target.file, format=re.split(r'\W', target.content_type)[-1])
 
+    params = {}
 
-    # Parser
+    if parameters is not None:
+        with open(parameters) as f:
+            params = json.load(f)
 
+    p, it = property_matcher.match_ontologies(o1, o2, 0.65,
+                                              sim_weights=params['sim_weights'] if 'sim_weights' in params else None)
 
-    if args.format == 'sssom':
+    if 'format' in params and params['format'] == 'sssom':
         result = ssom(p)
         suffix = '.tsv'
     else:
-        result = toAlignFormat(p, get_namespace(o1), get_namespace(o2), args.source, args.target)
+        if outputFile:
+            source = source.filename
+            target = target.filename
+        result = toAlignFormat(p, get_namespace(o1), get_namespace(o2), source, target)
         suffix = '.rdf'
 
-    with tempfile.NamedTemporaryFile('w', prefix='alignment_', suffix=suffix, delete=False) as out_file:
-        out_file.write(result)
+    if outputFile:
+        return Response(result, media_type='application/rdf+xml')
+    else:
+        with tempfile.NamedTemporaryFile('w', prefix='alignment_', suffix=suffix, delete=False) as out_file:
+            out_file.write(result)
 
-        print(parse.urljoin("file:", request.pathname2url(out_file.name)))
+        return PlainTextResponse(out_file.name)
diff --git a/models.py b/models.py
index 7f0c72d..a44ca56 100644
--- a/models.py
+++ b/models.py
@@ -29,7 +29,6 @@ class Finbank:
         self.ep = ep
 
         if not os.path.exists('./fin_cache'):
-            print('Embeddings cache not found. Building...')
             os.mkdir('./fin_cache')
 
             with open('./fin_cache/fbk.txt', 'w') as fbk:
diff --git a/nlp.py b/nlp.py
index 50b95a7..e118e69 100644
--- a/nlp.py
+++ b/nlp.py
@@ -1,5 +1,6 @@
 import nltk
 
+nltk.download('averaged_perceptron_tagger')
 
 def get_core_concept(e1):
     t1 = nltk.pos_tag(e1)
diff --git a/requirements.txt b/requirements.txt
index a5a27de..c7bbb5d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -11,4 +11,7 @@ rdflib
 termcolor
 py-stringmatching
 scikit-learn
-jupyter
\ No newline at end of file
+jupyter
+fastapi
+python-multipart
+uvicorn[standard]
-- 
GitLab