diff --git a/.gitignore b/.gitignore index b92f26afd9b2c59224bbad08022c98a1f7b7b23e..1f70b935b80001f9dd78f4e87c69acf7af49a73e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,3 @@ -.env +api/.env db.json .idea \ No newline at end of file diff --git a/api/internal_services/annotator.py b/api/internal_services/annotator.py new file mode 100644 index 0000000000000000000000000000000000000000..ec9eaf4e0bbd446cdfd864112cfd901d34cb535f --- /dev/null +++ b/api/internal_services/annotator.py @@ -0,0 +1,55 @@ +from api.internal_services import neo4j +from api.internal_services.database import update_last_concept_index, get_last_concept_index +from api.internal_services.gpt import gpt_process +from api.internal_services.logger import logger + +is_using_GPT = True + + +def annotation_process(job): + if is_using_GPT: + from api.internal_services.neo4j import driver, create_concept_node, create_concept_relation + with (driver.session() as session): + gpt_annotation = gpt_process(job.job_data['sentence'].text) + for concept, annotations in gpt_annotation.items(): + filtered_annotation = set() + for annotation in annotations: + filtered_annotation = filtered_annotation | neo4j.get_filtered_annotation(job.job_data['sentence_id'], concept, annotation) + if filtered_annotation: + for interval in separate_intervals(filtered_annotation): + session.execute_write( + create_concept_node, + concept, + update_last_concept_index(get_last_concept_index() + 1), + "GPT" + ) + + for word_id in interval: + session.execute_write( + create_concept_relation, + get_last_concept_index(), + word_id + ) + + else: + gpt_annotation = {} + +def separate_intervals(data): + sorted_data = sorted(list(data), key=lambda x: int(x.split('.')[-1])) + + separated_intervals = [] + current_interval = [sorted_data[0]] + + for i in range(1, len(sorted_data)): + current_value = int(sorted_data[i].split('.')[-1]) + previous_value = int(current_interval[-1].split('.')[-1]) + + if current_value == previous_value + 1: + current_interval.append(sorted_data[i]) + else: + separated_intervals.append(current_interval) + current_interval = [sorted_data[i]] + + separated_intervals.append(current_interval) + + return separated_intervals \ No newline at end of file diff --git a/api/internal_services/background_worker.py b/api/internal_services/background_worker.py index 0f08b771e50b085f0fa3d82c1b2b2844976a7fe3..39e5e20c8b55a838cc7844e67fe7db967a6a53be 100644 --- a/api/internal_services/background_worker.py +++ b/api/internal_services/background_worker.py @@ -1,36 +1,42 @@ from queue import Queue from threading import Thread + from api.internal_services.logger import logger -from api.internal_services.spacy import spacy_process +from api.models.Job import JobType -sentence_queue = Queue() +job_queue = Queue() worker_thread = None - def process_queue(): global worker_thread - while sentence_queue.qsize() != 0: - sentence = sentence_queue.get() - logger.debug(f"Processing the sentence : {sentence}") - if sentence is None: + while job_queue.qsize() != 0: + job = job_queue.get() + logger.info(f"Processing the job {job.job_id}") + if job is None: break - # all process - spacy_process(sentence) + match job.job_type: + case JobType.SENTENCE_PARCING: + from api.internal_services.spacy import parsing_and_load_in_neo4j + parsing_and_load_in_neo4j(job) + case JobType.ANNOTATION: + from api.internal_services.annotator import annotation_process + annotation_process(job) - sentence_queue.task_done() + logger.info(f"Ending of the job {job.job_id}") + job_queue.task_done() - logger.debug("Closing the worker thread") + logger.info("Closing the worker thread") worker_thread = None def start_worker_thread(): global worker_thread if worker_thread is None or not worker_thread.is_alive(): - logger.debug("Starting the worker thread to process the queue") + logger.info("Starting the worker thread to process the queue") worker_thread = Thread(target=process_queue, daemon=True) worker_thread.start() -def add_sentence_to_queue(sentence): - sentence_queue.put(sentence) +def add_job_to_queue(job): + job_queue.put(job) start_worker_thread() \ No newline at end of file diff --git a/api/internal_services/database.py b/api/internal_services/database.py index 9f65482a916959b6c0ced066c30f1288b25f2a76..7332a26fe50c7bcc2b55f6cf2d4004174b7124c9 100644 --- a/api/internal_services/database.py +++ b/api/internal_services/database.py @@ -2,15 +2,28 @@ from tinydb import TinyDB, Query, where db = TinyDB('db.json') -def getLastIndex(): - result = db.search(where('key') == 'last_index') +def get_last_sentence_index(): + result = db.search(where('key') == 'last_sentence_index') if not result: - created_object = {'key': 'last_index', 'value': 0} + created_object = {'key': 'last_sentence_index', 'value': 0} db.insert(created_object) return 0 else: return result[0]['value'] -def updateLastIndex(value): - db.update({'value': value}, where('key') == 'last_index') +def update_last_sentence_index(value): + db.update({'value': value}, where('key') == 'last_sentence_index') + return value + +def get_last_concept_index(): + result = db.search(where('key') == 'last_concept_index') + if not result: + created_object = {'key': 'last_concept_index', 'value': 0} + db.insert(created_object) + return 0 + else: + return result[0]['value'] + +def update_last_concept_index(value): + db.update({'value': value}, where('key') == 'last_concept_index') return value \ No newline at end of file diff --git a/api/internal_services/gpt.py b/api/internal_services/gpt.py new file mode 100644 index 0000000000000000000000000000000000000000..a457c387f99d4f788d344c34e8d0391228e1a7dd --- /dev/null +++ b/api/internal_services/gpt.py @@ -0,0 +1,66 @@ +import json +import os +import traceback + +from openai import OpenAI +from api.internal_services.logger import logger + +client = OpenAI() + +def gpt_process(sentence): + try: + completion = client.chat.completions.create( + model="gpt-4", + messages=[ + {"role": "system", "content": get_pre_prompt()}, + {"role": "user", "content": sentence} + ] + ) + jsonOutput = json.loads(completion.choices[0].message.content) + + key_mappings = { + "Action": "action", + "Acteur": "actor", + "Objet": "artifact", + "Condition": "condition", + "Lieu": "location", + "Modalité": "modality", + "Référence": "reference", + "Temps": "time" + } + + jsonOutput = {key_mappings.get(k, k): v for k, v in jsonOutput.items()} + return jsonOutput + + except Exception as e: + logger.error(f"Error during GPT process with the sentence : {sentence} | {e}") + +def get_pre_prompt(): + return """ + Tu es un expert en NLP spécialisé dans l'extraction d'entités dans des phrases. + Ces entités comprennent l'Action, l'Acteur, l'Objet, la Condition, le Lieu, la Modalité, la Référence et le Temps. Ces concepts prennent les définitions suivantes : + * Action : le fait de faire quelque chose + * Acteur : une entité qui a la capacité d'agir + * Objet : élément physique fabriqué par l'homme et impliqué dans une action + * Condition : une contrainte énonçant les propriétés qui doivent être respectées + * Lieu : endroit où une action est effectuée + * Modalité : un verbe indiquant la modalité de l'action (par exemple : peut, doit, etc) + * Référence : mention d'autres dispositions légales ou textes juridiques affectant la disposition actuelle + * Temps : le moment ou la durée associée à la réalisation d'une action + Un élément de la phrase initiale peut posséder plusieurs classifications, tu dois extraire toutes les classifications possibles. + Lors de l'analyse de textes, tes réponses doivent être formatées sous forme de JSON, listant les concepts identifiés sans élaboration ni justification. + Le JSON sera de la forme suivante : + { + "Action": [], + "Acteur": [], + "Objet": [], + "Condition": [], + "Lieu": [], + "Modalité": [], + "Référence": [], + "Temps": [] + } + Par exemple, avec la phrase suivante : "Le propriétaire ou détenteur d ' un véhicule routier qui trouve mal fondée une décision relative à la réception ou l ' immatriculation de son véhicule peut déférer celle-ci au ministre qui , après avoir demandé la position de la SNCA , confirme ou réforme celle-ci dans les deux mois à compter de l ' introduction du recours accompagné de toutes les pièces et informations utiles ." + Vous devez obtenir : {"action": ["déférer celle-ci au ministre"], "actor": ["Le propriétaire ou détenteur d ' un véhicule routier", "ministre"], "condition": ["accompagné de toutes les pièces et informations utiles", "qui , après avoir demandé la position de la SNCA , confirme ou réforme celle-ci dans les deux mois à compter de l ' introduction du recours accompagné de toutes les pièces et informations utiles .", "qui trouve mal fondée une décision relative à la réception ou l ' immatriculation de son véhicule", "relative à la réception ou l ' immatriculation de son véhicule"], "modality": ["peut"], "time": ["après avoir demandé la position de la SNCA", "dans les deux mois à compter de l ' introduction du recours"]} + Vous ne donnerez que le JSON comme réponse, aucune justification ou explication n'est accepté. De plus, vous ne devez pas reformuler les éléments extraits, ils doivent être identiques au mot près ! + """ diff --git a/api/internal_services/neo4j.py b/api/internal_services/neo4j.py index 0290491aadab65077fe5e439bccb57547809cf0f..b691be943d3b4497d72ebef59df33c430c869110 100644 --- a/api/internal_services/neo4j.py +++ b/api/internal_services/neo4j.py @@ -1,4 +1,6 @@ from neo4j import GraphDatabase +from api.internal_services.logger import logger + uri = "bolt://localhost:7687" # Modifier l'URI en fonction de votre configuration username = "neo4j" @@ -8,7 +10,7 @@ password = "password" driver = GraphDatabase.driver(uri, auth=(username, password)) -def createWordNode(tx, id, text, lemma, pos, root): +def create_word_node(tx, id, text, lemma, pos, root): tx.run(''' CREATE ( n:Word { @@ -22,7 +24,7 @@ def createWordNode(tx, id, text, lemma, pos, root): id=id, text=text, lemma=lemma, pos=pos, root=root) -def createConstituentNode(tx, id, type): +def create_constituent_node(tx, id, type): tx.run(''' CREATE ( n:Constituent { @@ -33,18 +35,19 @@ def createConstituentNode(tx, id, type): id=id, type=type) -def createConceptNode(tx, concept, id): +def create_concept_node(tx, concept, id, origin): tx.run(''' CREATE ( n:Concept { type: $concept, - id: $id + id: $id, + origin: $origin } )''', - concept=concept, id=id) + concept=concept, id=id, origin=origin) -def createNextWordRelation(tx, idFrom, idTo): +def create_next_word_relation(tx, idFrom, idTo): tx.run(''' MATCH (a:Word), @@ -56,7 +59,7 @@ def createNextWordRelation(tx, idFrom, idTo): ) -def createDeprelRelation(tx, idFrom, idTo, type): +def create_deprel_relation(tx, idFrom, idTo, type): tx.run(''' MATCH (a:Word), @@ -68,7 +71,7 @@ def createDeprelRelation(tx, idFrom, idTo, type): ) -def createConceptRelation(tx, idFrom, idTo): +def create_concept_relation(tx, idFrom, idTo): tx.run(''' MATCH (a:Concept), @@ -79,7 +82,8 @@ def createConceptRelation(tx, idFrom, idTo): idFrom=idFrom, idTo=idTo ) -def createConstituentRelation(tx, idFrom, idTo): + +def create_constituent_relation(tx, idFrom, idTo): tx.run(''' MATCH (a:Constituent), @@ -90,7 +94,8 @@ def createConstituentRelation(tx, idFrom, idTo): idFrom=idFrom, idTo=idTo ) -def createRelation(tx, idFrom, idTo, id, type): + +def create_relation(tx, idFrom, idTo, id, type): tx.run(''' MATCH (a:Concept), @@ -99,4 +104,226 @@ def createRelation(tx, idFrom, idTo, id, type): CREATE (a)-[r:RELATION {id: $id, type: $type}]->(b) ''', idFrom=idFrom, idTo=idTo, id=id, type=type - ) \ No newline at end of file + ) + + +def get_filtered_annotation(sentence_id, concept, annotation): + annotation = annotation.strip().lower() + + if " " in annotation: + from api.internal_services.spacy import simple_parsing + annotation = simple_parsing(annotation) + words_ids = get_id_multi_tokens(annotation, sentence_id) + else: + words_ids = get_id_single_tokens(annotation, sentence_id) + + if len(words_ids) == 0: + logger.warn(f"Cannot find the following annotation '{annotation}' in the sentence id {sentence_id}. This error is a hallucination of large language models.") + return set() + + filtered_annotation = set() + if concept == "action": + filtered_annotation = action(words_ids) + elif concept == "actor": + filtered_annotation = actor(words_ids) + elif concept == "artifact": + filtered_annotation = artifact(words_ids) + elif concept == "condition": + filtered_annotation = condition(words_ids, sentence_id) + elif concept == "location": + filtered_annotation = location(words_ids) + elif concept == "modality": + filtered_annotation = modality(words_ids) + elif concept == "time": + filtered_annotation = time(words_ids) + elif concept == "reference": + filtered_annotation = reference(words_ids) + + return filtered_annotation + + +def get_id_multi_tokens(annotation, sentence_id): + annotation = annotation.split(" ") + list_multi_token = driver.execute_query( + ''' + WITH $array AS words + MATCH path = (start:Word)-[:NEXT*]->(end:Word) + where size(words) - 1 = size(relationships(path)) + and start.id starts with $sentence_id + and all( + idx IN range(0, size(words)-2) + WHERE (toLower(words[idx]) = toLower((nodes(path)[idx]).text) + AND toLower(words[idx+1]) = toLower((nodes(path)[idx + 1]).text)) + ) + and toLower(start.text) = words[0] + and toLower(end.text) = words[size(words) - 1] + with nodes(path) as result + unwind result as results + return collect(results.id) as liste + ''', + array=annotation, + sentence_id=f"{sentence_id}." + ) + return list_multi_token.records[0][0] + + +def get_id_single_tokens(annotation, sentence_id): + list_single_token = driver.execute_query( + ''' + match (w:Word) + where toLower(w.text) = $annotation + and w.id starts with $sentence_id + with distinct w as results + return collect(results.id) as liste + ''', + annotation=annotation, + sentence_id=f"{sentence_id}." + ) + return list_single_token.records[0][0] + + +def action(words_ids): + nodes = driver.execute_query( + ''' + match (c:Constituent)-[:CONSREL]->(w:Word) + where c.type in ["VN", "VPinf", "VPpart"] + and w.id in $array + with c as constituent + match (constituent)-[:CONSREL*..]->(w:Word) + return distinct w.id + ''', + array=list(words_ids) + ) + return set([record[0] for record in nodes.records]) + + +def actor(words_ids): + nodes = driver.execute_query( + ''' + match (c:Constituent)-[:CONSREL]->(w:Word) + where c.type in ["NP"] + and w.id in $array + with c as constituent + match (constituent)-[:CONSREL*..]->(w:Word) + return distinct w.id + ''', + array=list(words_ids) + ) + return set([record[0] for record in nodes.records]) + + +def artifact(words_ids): + nodes = driver.execute_query( + ''' + match (c:Constituent)-[:CONSREL]->(w:Word) + where c.type in ["NP"] + and w.id in $array + with c as constituent + match (constituent)-[:CONSREL*..]->(w:Word) + return distinct w.id + ''', + array=list(words_ids) + ) + return set([record[0] for record in nodes.records]) + + +def condition(words_ids, sentence_id): + sentence_id = str(sentence_id) + "." + nodes = driver.execute_query( + ''' + match (c:Constituent)-[:CONSREL*..]->(w:Word) + where w.id in $array + and c.type in ["Srel", "PP"] + with c as constituent + match (constituent)-[:CONSREL*..]->(w:Word) + return distinct w.id + + UNION + + match (c:Constituent)-[:CONSREL]->(w:Word) + where w.id in $array + and c.type in ["Ssub"] + with c as constituent + match (constituent)-[:CONSREL]->(w:Word) + return distinct w.id + ''', + array=list(words_ids), + sentence_id=sentence_id + ) + return set([record[0] for record in nodes.records]) + + +def location(words_ids): + nodes = driver.execute_query( + ''' + match (c:Constituent)-[:CONSREL]->(w:Word) + where c.type in ["NP"] + and w.id in $array + with c as constituent + match (constituent)-[:CONSREL*..]->(w:Word) + return distinct w.id + ''', + array=list(words_ids) + ) + return set([record[0] for record in nodes.records]) + + +def modality(words_ids): + nodes = driver.execute_query( + ''' + match (c:Constituent)-[:CONSREL]->(w:Word) + where c.type in ["VN"] + and w.id in $array + with c as constituent + match (constituent)-[:CONSREL*..]->(w:Word) + return distinct w.id + + UNION + + match (c:Constituent)-[:CONSREL]->(w:Word) + where c.type = "SENT" + and w.id in $array + return distinct w.id + ''', + array=list(words_ids) + ) + return set([record[0] for record in nodes.records]) + + +def time(words_ids): + nodes = driver.execute_query( + ''' + match (c:Constituent)-[:CONSREL]->(w:Word) + where c.type = "NP" + and w.id in $array + with c as constituent + match (constituent)-[:CONSREL*..]->(w:Word) + return distinct w.id + + UNION + + match (c1:Constituent {type: "PP"})-[:CONSREL]->(c2:Constituent {type: "P+"})-[:CONSREL]->(w:Word) + match (c2)<-[:CONSREL]-(:Constituent)-[:CONSREL]->(:Constituent {type: "NP"}) + where w.id in $array + with c1 as c1 + match (c1)-[:CONSREL*..]->(w:Word) + return distinct w.id + ''', + array=list(words_ids) + ) + return set([record[0] for record in nodes.records]) + + +def reference(words_ids): + nodes = driver.execute_query( + ''' + match (c:Constituent)-[:CONSREL]->(w:Word) + where c.type in ["NP", "PP"] + and w.id in $array + with c as constituent + match (constituent)-[:CONSREL*..]->(w:Word) + return distinct w.id + ''', + array=list(words_ids) + ) + return set([record[0] for record in nodes.records]) diff --git a/api/internal_services/spacy.py b/api/internal_services/spacy.py index 5e3da85f7e6a5dab20b1d207bf88cdc19ed6a165..43f9b649be3cf9e5f32cf2d78699ea0f7350105d 100644 --- a/api/internal_services/spacy.py +++ b/api/internal_services/spacy.py @@ -1,46 +1,46 @@ import benepar, spacy import warnings + +from api.internal_services.background_worker import add_job_to_queue +from api.models.Job import Job, JobType + warnings.filterwarnings("ignore") -from api.internal_services.database import getLastIndex, updateLastIndex -from api.internal_services.logger import logger -from api.internal_services.neo4j import createConstituentNode, driver, createConstituentRelation, createWordNode, \ - createNextWordRelation, createConceptRelation, createDeprelRelation +from api.internal_services.database import get_last_sentence_index, update_last_sentence_index +from api.internal_services.neo4j import create_constituent_node, driver, create_constituent_relation, create_word_node, \ + create_next_word_relation, create_deprel_relation benepar.download('benepar_fr2') nlp = spacy.load('fr_dep_news_trf') nlp.add_pipe('benepar', config={'model': 'benepar_fr2'}) -def spacy_process(sentence): +def parsing_and_load_in_neo4j(job): + sentence = job.job_data['sentence'] + last_index = get_last_sentence_index() + last_index = update_last_sentence_index(last_index + 1) + with (driver.session() as session): doc = nlp(sentence) - lastIndex = getLastIndex() for i, sentence in enumerate(doc.sents): - lastIndex = updateLastIndex(lastIndex + 1) - #constituentDone = set() - logger.debug(sentence._.parse_string) for constituent in sentence._.constituents: - constituentId = f"{lastIndex}.{i}.{constituent.start}-{constituent.end}" - logger.debug(f"Processing constituent : {constituentId} - {constituent._.labels}") - - logger.debug(f"{constituent._.labels} and {constituent.root.text != constituent.text}") + constituent_id = f"{last_index}.{i}.{constituent.start}-{constituent.end}" if constituent._.labels and constituent.root.text != constituent.text: # Créer le consituant session.execute_write( - createConstituentNode, - f"{lastIndex}.{i}.{constituent.start}-{constituent.end}", + create_constituent_node, + f"{last_index}.{i}.{constituent.start}-{constituent.end}", constituent._.labels[0] ) if constituent._.parent is not None: # parent existe alors on crée le lien session.execute_write( - createConstituentRelation, - f"{lastIndex}.{i}.{constituent._.parent.start}-{constituent._.parent.end}", - constituentId + create_constituent_relation, + f"{last_index}.{i}.{constituent._.parent.start}-{constituent._.parent.end}", + constituent_id ) else: @@ -48,61 +48,74 @@ def spacy_process(sentence): if constituent._.labels: # Créer le consituant session.execute_write( - createConstituentNode, - f"{lastIndex}.{i}.{constituent.start}-{constituent.end}", + create_constituent_node, + f"{last_index}.{i}.{constituent.start}-{constituent.end}", constituent._.labels[0] ) #Création du mot en noeud neo4j session.execute_write( - createWordNode, - '.'.join(map(str, [lastIndex, i, constituent.root.i])), + create_word_node, + '.'.join(map(str, [last_index, i, constituent.root.i])), constituent.text, None if not hasattr(constituent, 'lemma_') else constituent.lemma_, constituent.root.pos_, True if constituent.root.dep_ == "root" else False ) - logger.debug(f"Creating word : {constituent.text}") session.execute_write( - createConstituentRelation, - f"{lastIndex}.{i}.{constituent.start}-{constituent.end}", - '.'.join(map(str, [lastIndex, i, constituent.root.i])), + create_constituent_relation, + f"{last_index}.{i}.{constituent.start}-{constituent.end}", + '.'.join(map(str, [last_index, i, constituent.root.i])), ) session.execute_write( - createConstituentRelation, - f"{lastIndex}.{i}.{constituent._.parent.start}-{constituent._.parent.end}", - f"{lastIndex}.{i}.{constituent.start}-{constituent.end}", + create_constituent_relation, + f"{last_index}.{i}.{constituent._.parent.start}-{constituent._.parent.end}", + f"{last_index}.{i}.{constituent.start}-{constituent.end}", ) else: #Création du mot en noeud neo4j session.execute_write( - createWordNode, - '.'.join(map(str, [lastIndex, i, constituent.root.i])), + create_word_node, + '.'.join(map(str, [last_index, i, constituent.root.i])), constituent.text, None if not hasattr(constituent, 'lemma_') else constituent.lemma_, constituent.root.pos_, True if constituent.root.dep_ == "root" else False ) - logger.debug(f"Creating word : {constituent.text}") # parent existe alors on crée le lien session.execute_write( - createConstituentRelation, - f"{lastIndex}.{i}.{constituent._.parent.start}-{constituent._.parent.end}", - '.'.join(map(str, [lastIndex, i, constituent.root.i])), + create_constituent_relation, + f"{last_index}.{i}.{constituent._.parent.start}-{constituent._.parent.end}", + '.'.join(map(str, [last_index, i, constituent.root.i])), ) for token in sentence: #Création d'un lien de succession if token.i != 0: - idFrom = '.'.join(map(str, [lastIndex, i, token.i - 1])) - idTo = '.'.join(map(str, [lastIndex, i, token.i])) - session.execute_write(createNextWordRelation, idFrom, idTo) + idFrom = '.'.join(map(str, [last_index, i, token.i - 1])) + idTo = '.'.join(map(str, [last_index, i, token.i])) + session.execute_write(create_next_word_relation, idFrom, idTo) #dépendances syntaxiques - idFrom = '.'.join(map(str, [lastIndex, i, token.head.i])) - idTo = '.'.join(map(str, [lastIndex, i, token.i])) - session.execute_write(createDeprelRelation, idFrom, idTo, token.dep_) + idFrom = '.'.join(map(str, [last_index, i, token.head.i])) + idTo = '.'.join(map(str, [last_index, i, token.i])) + session.execute_write(create_deprel_relation, idFrom, idTo, token.dep_) + + new_job = Job() + new_job.job_id = job.job_id + new_job.job_type = JobType.ANNOTATION + new_job.job_data = {'sentence': sentence, 'sentence_id': last_index} + + add_job_to_queue(new_job) + +def simple_parsing(sentence): + doc = nlp(sentence) + output = [] + for i, sentence in enumerate(doc.sents): + for token in sentence: + output.append(token.text) + return ' '.join(output) diff --git a/api/main.py b/api/main.py index 000325852eafc04f740445b23e7003dcf90acb98..f3addaa72289fdfc82b8107c42bef5b3b46fb96c 100644 --- a/api/main.py +++ b/api/main.py @@ -1,5 +1,6 @@ # > fastapi dev main.py # > uvicorn api.main:app --reload +import uvicorn from fastapi import FastAPI from .routers import pipeline_endpoint @@ -9,4 +10,7 @@ app.include_router(pipeline_endpoint.router) @app.get("/") async def root(): - return {"message": "ALA plateform is running !"} \ No newline at end of file + return {"message": "ALA plateform is running !"} + +if __name__ == "__main__": + uvicorn.run(app, host="0.0.0.0", port=8000) \ No newline at end of file diff --git a/api/models/Job.py b/api/models/Job.py new file mode 100644 index 0000000000000000000000000000000000000000..48c477c08d03d34b83fffe15321014de55c65edd --- /dev/null +++ b/api/models/Job.py @@ -0,0 +1,11 @@ +from enum import Enum + + +class JobType(Enum): + SENTENCE_PARCING = 1 + ANNOTATION = 2 + +class Job(): + job_id: str + job_type: JobType + job_data: {} \ No newline at end of file diff --git a/api/routers/pipeline_endpoint.py b/api/routers/pipeline_endpoint.py index c2310e4777551a7c3d06ee7f012b545bc91616bf..4ec99dc8b40554710cec106f3e2520ca7e4f7872 100644 --- a/api/routers/pipeline_endpoint.py +++ b/api/routers/pipeline_endpoint.py @@ -1,7 +1,10 @@ from pydantic import BaseModel from fastapi import APIRouter -from api.internal_services.background_worker import add_sentence_to_queue +from api.internal_services.background_worker import add_job_to_queue from api.internal_services.logger import logger +import uuid + +from api.models.Job import Job, JobType router = APIRouter() @@ -11,6 +14,14 @@ class Sentence(BaseModel): @router.post("/sentences") def add_sentence_to_process(sentence: Sentence): - logger.debug(f"New sentence added to queue : {sentence.sentence}") - add_sentence_to_queue(sentence.sentence) - return {"message": "Sentence added to the queue for processing."} \ No newline at end of file + logger.info(f"New sentence added to queue : {sentence.sentence}") + new_job = Job() + new_job.job_id = uuid.uuid4() + new_job.job_type = JobType.SENTENCE_PARCING + new_job.job_data = {'sentence': sentence.sentence} + add_job_to_queue(new_job) + return {"message": "Job added to the queue for processing.", "job_id": new_job.job_id} + +@router.post("/actions/training") +def add_sentence_to_process(): + logger.info(f"Training process triggered") \ No newline at end of file