Skip to content
Snippets Groups Projects
Commit 2427614c authored by Julien B.'s avatar Julien B.
Browse files

First commit

parent 1bdccceb
No related branches found
No related tags found
No related merge requests found
.env
db.json
\ No newline at end of file
db.json
.idea
\ No newline at end of file
from queue import Queue
from threading import Thread
from api.internal_services.logger import logger
from api.internal_services.spacy import spacy_process
sentence_queue = Queue()
worker_thread = None
def process_queue():
global worker_thread
while sentence_queue.qsize() != 0:
sentence = sentence_queue.get()
logger.debug(f"Processing the sentence : {sentence}")
if sentence is None:
break
# all process
spacy_process(sentence)
sentence_queue.task_done()
logger.debug("Closing the worker thread")
worker_thread = None
def start_worker_thread():
global worker_thread
if worker_thread is None or not worker_thread.is_alive():
logger.debug("Starting the worker thread to process the queue")
worker_thread = Thread(target=process_queue, daemon=True)
worker_thread.start()
def add_sentence_to_queue(sentence):
sentence_queue.put(sentence)
start_worker_thread()
\ No newline at end of file
from tinydb import TinyDB, Query, where
db = TinyDB('db.json')
def getLastIndex():
result = db.search(where('key') == 'last_index')
if not result:
created_object = {'key': 'last_index', 'value': 0}
db.insert(created_object)
return 0
else:
return result[0]['value']
def updateLastIndex(value):
db.update({'value': value}, where('key') == 'last_index')
return value
\ No newline at end of file
# logger_config.py
import logging
logger = logging.getLogger('uvicorn.error')
logger.setLevel(logging.DEBUG)
\ No newline at end of file
from neo4j import GraphDatabase
uri = "bolt://localhost:7687" # Modifier l'URI en fonction de votre configuration
username = "neo4j"
password = "password"
# Connexion à la base de données Neo4j
driver = GraphDatabase.driver(uri, auth=(username, password))
def createWordNode(tx, id, text, lemma, pos, root):
tx.run('''
CREATE (
n:Word {
id: $id,
text: $text,
lemma: $lemma,
pos: $pos,
root: $root
}
)''',
id=id, text=text, lemma=lemma, pos=pos, root=root)
def createConstituentNode(tx, id, type):
tx.run('''
CREATE (
n:Constituent {
id: $id,
type: $type
}
)''',
id=id, type=type)
def createConceptNode(tx, concept, id):
tx.run('''
CREATE (
n:Concept {
type: $concept,
id: $id
}
)''',
concept=concept, id=id)
def createNextWordRelation(tx, idFrom, idTo):
tx.run('''
MATCH
(a:Word),
(b:Word)
WHERE a.id = $idFrom AND b.id = $idTo
CREATE (a)-[r:NEXT]->(b)
''',
idFrom=idFrom, idTo=idTo
)
def createDeprelRelation(tx, idFrom, idTo, type):
tx.run('''
MATCH
(a:Word),
(b:Word)
WHERE a.id = $idFrom AND b.id = $idTo
CREATE (a)-[r:DEPREL {type: $type}]->(b)
''',
idFrom=idFrom, idTo=idTo, type=type
)
def createConceptRelation(tx, idFrom, idTo):
tx.run('''
MATCH
(a:Concept),
(b:Word)
WHERE a.id = $idFrom AND b.id = $idTo
CREATE (a)-[r:LINKED]->(b)
''',
idFrom=idFrom, idTo=idTo
)
def createConstituentRelation(tx, idFrom, idTo):
tx.run('''
MATCH
(a:Constituent),
(b:Word|Constituent)
WHERE a.id = $idFrom AND b.id = $idTo
CREATE (a)-[r:CONSREL]->(b)
''',
idFrom=idFrom, idTo=idTo
)
def createRelation(tx, idFrom, idTo, id, type):
tx.run('''
MATCH
(a:Concept),
(b:Concept)
WHERE a.id = $idFrom AND b.id = $idTo
CREATE (a)-[r:RELATION {id: $id, type: $type}]->(b)
''',
idFrom=idFrom, idTo=idTo, id=id, type=type
)
\ No newline at end of file
import benepar, spacy
import warnings
warnings.filterwarnings("ignore")
from api.internal_services.database import getLastIndex, updateLastIndex
from api.internal_services.logger import logger
from api.internal_services.neo4j import createConstituentNode, driver, createConstituentRelation, createWordNode, \
createNextWordRelation, createConceptRelation, createDeprelRelation
benepar.download('benepar_fr2')
nlp = spacy.load('fr_dep_news_trf')
nlp.add_pipe('benepar', config={'model': 'benepar_fr2'})
def spacy_process(sentence):
with (driver.session() as session):
doc = nlp(sentence)
lastIndex = getLastIndex()
for i, sentence in enumerate(doc.sents):
lastIndex = updateLastIndex(lastIndex + 1)
#constituentDone = set()
logger.debug(sentence._.parse_string)
for constituent in sentence._.constituents:
constituentId = f"{lastIndex}.{i}.{constituent.start}-{constituent.end}"
logger.debug(f"Processing constituent : {constituentId} - {constituent._.labels}")
logger.debug(f"{constituent._.labels} and {constituent.root.text != constituent.text}")
if constituent._.labels and constituent.root.text != constituent.text:
# Créer le consituant
session.execute_write(
createConstituentNode,
f"{lastIndex}.{i}.{constituent.start}-{constituent.end}",
constituent._.labels[0]
)
if constituent._.parent is not None:
# parent existe alors on crée le lien
session.execute_write(
createConstituentRelation,
f"{lastIndex}.{i}.{constituent._.parent.start}-{constituent._.parent.end}",
constituentId
)
else:
# Créer le mot et le constituant solitaire si nécessaire
if constituent._.labels:
# Créer le consituant
session.execute_write(
createConstituentNode,
f"{lastIndex}.{i}.{constituent.start}-{constituent.end}",
constituent._.labels[0]
)
#Création du mot en noeud neo4j
session.execute_write(
createWordNode,
'.'.join(map(str, [lastIndex, i, constituent.root.i])),
constituent.text,
None if not hasattr(constituent, 'lemma_') else constituent.lemma_,
constituent.root.pos_,
True if constituent.root.dep_ == "root" else False
)
logger.debug(f"Creating word : {constituent.text}")
session.execute_write(
createConstituentRelation,
f"{lastIndex}.{i}.{constituent.start}-{constituent.end}",
'.'.join(map(str, [lastIndex, i, constituent.root.i])),
)
session.execute_write(
createConstituentRelation,
f"{lastIndex}.{i}.{constituent._.parent.start}-{constituent._.parent.end}",
f"{lastIndex}.{i}.{constituent.start}-{constituent.end}",
)
else:
#Création du mot en noeud neo4j
session.execute_write(
createWordNode,
'.'.join(map(str, [lastIndex, i, constituent.root.i])),
constituent.text,
None if not hasattr(constituent, 'lemma_') else constituent.lemma_,
constituent.root.pos_,
True if constituent.root.dep_ == "root" else False
)
logger.debug(f"Creating word : {constituent.text}")
# parent existe alors on crée le lien
session.execute_write(
createConstituentRelation,
f"{lastIndex}.{i}.{constituent._.parent.start}-{constituent._.parent.end}",
'.'.join(map(str, [lastIndex, i, constituent.root.i])),
)
for token in sentence:
#Création d'un lien de succession
if token.i != 0:
idFrom = '.'.join(map(str, [lastIndex, i, token.i - 1]))
idTo = '.'.join(map(str, [lastIndex, i, token.i]))
session.execute_write(createNextWordRelation, idFrom, idTo)
#dépendances syntaxiques
idFrom = '.'.join(map(str, [lastIndex, i, token.head.i]))
idTo = '.'.join(map(str, [lastIndex, i, token.i]))
session.execute_write(createDeprelRelation, idFrom, idTo, token.dep_)
# > fastapi dev main.py
# > uvicorn api.main:app --reload
from fastapi import FastAPI
from .routers import pipeline_endpoint
app = FastAPI()
app.include_router(pipeline_endpoint.router)
@app.get("/")
async def root():
return {"message": "ALA plateform is running !"}
\ No newline at end of file
from pydantic import BaseModel
from fastapi import APIRouter
from api.internal_services.background_worker import add_sentence_to_queue
from api.internal_services.logger import logger
router = APIRouter()
class Sentence(BaseModel):
sentence: str
@router.post("/sentences")
def add_sentence_to_process(sentence: Sentence):
logger.debug(f"New sentence added to queue : {sentence.sentence}")
add_sentence_to_queue(sentence.sentence)
return {"message": "Sentence added to the queue for processing."}
\ No newline at end of file
name: ala-plateform
services:
neo4j:
ports:
- 7474:7474
- 7687:7687
volumes:
- neo4j:/data
image: neo4j
volumes:
neo4j:
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment