From b60892d3b35b47993ffbccf7cfe9fd027c4d1fe7 Mon Sep 17 00:00:00 2001 From: "Julien B." <xm9q8f80@jlnbrtn.me> Date: Wed, 21 Aug 2024 08:37:58 +0200 Subject: [PATCH] feat(internal_services): add gpt annotator and spacy parser --- .gitignore | 2 +- api/internal_services/annotator.py | 55 +++++ api/internal_services/background_worker.py | 34 +-- api/internal_services/database.py | 23 +- api/internal_services/gpt.py | 66 ++++++ api/internal_services/neo4j.py | 249 ++++++++++++++++++++- api/internal_services/spacy.py | 95 ++++---- api/main.py | 6 +- api/models/Job.py | 11 + api/routers/pipeline_endpoint.py | 19 +- 10 files changed, 483 insertions(+), 77 deletions(-) create mode 100644 api/internal_services/annotator.py create mode 100644 api/internal_services/gpt.py create mode 100644 api/models/Job.py diff --git a/.gitignore b/.gitignore index b92f26a..1f70b93 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,3 @@ -.env +api/.env db.json .idea \ No newline at end of file diff --git a/api/internal_services/annotator.py b/api/internal_services/annotator.py new file mode 100644 index 0000000..ec9eaf4 --- /dev/null +++ b/api/internal_services/annotator.py @@ -0,0 +1,55 @@ +from api.internal_services import neo4j +from api.internal_services.database import update_last_concept_index, get_last_concept_index +from api.internal_services.gpt import gpt_process +from api.internal_services.logger import logger + +is_using_GPT = True + + +def annotation_process(job): + if is_using_GPT: + from api.internal_services.neo4j import driver, create_concept_node, create_concept_relation + with (driver.session() as session): + gpt_annotation = gpt_process(job.job_data['sentence'].text) + for concept, annotations in gpt_annotation.items(): + filtered_annotation = set() + for annotation in annotations: + filtered_annotation = filtered_annotation | neo4j.get_filtered_annotation(job.job_data['sentence_id'], concept, annotation) + if filtered_annotation: + for interval in separate_intervals(filtered_annotation): + session.execute_write( + create_concept_node, + concept, + update_last_concept_index(get_last_concept_index() + 1), + "GPT" + ) + + for word_id in interval: + session.execute_write( + create_concept_relation, + get_last_concept_index(), + word_id + ) + + else: + gpt_annotation = {} + +def separate_intervals(data): + sorted_data = sorted(list(data), key=lambda x: int(x.split('.')[-1])) + + separated_intervals = [] + current_interval = [sorted_data[0]] + + for i in range(1, len(sorted_data)): + current_value = int(sorted_data[i].split('.')[-1]) + previous_value = int(current_interval[-1].split('.')[-1]) + + if current_value == previous_value + 1: + current_interval.append(sorted_data[i]) + else: + separated_intervals.append(current_interval) + current_interval = [sorted_data[i]] + + separated_intervals.append(current_interval) + + return separated_intervals \ No newline at end of file diff --git a/api/internal_services/background_worker.py b/api/internal_services/background_worker.py index 0f08b77..39e5e20 100644 --- a/api/internal_services/background_worker.py +++ b/api/internal_services/background_worker.py @@ -1,36 +1,42 @@ from queue import Queue from threading import Thread + from api.internal_services.logger import logger -from api.internal_services.spacy import spacy_process +from api.models.Job import JobType -sentence_queue = Queue() +job_queue = Queue() worker_thread = None - def process_queue(): global worker_thread - while sentence_queue.qsize() != 0: - sentence = sentence_queue.get() - logger.debug(f"Processing the sentence : {sentence}") - if sentence is None: + while job_queue.qsize() != 0: + job = job_queue.get() + logger.info(f"Processing the job {job.job_id}") + if job is None: break - # all process - spacy_process(sentence) + match job.job_type: + case JobType.SENTENCE_PARCING: + from api.internal_services.spacy import parsing_and_load_in_neo4j + parsing_and_load_in_neo4j(job) + case JobType.ANNOTATION: + from api.internal_services.annotator import annotation_process + annotation_process(job) - sentence_queue.task_done() + logger.info(f"Ending of the job {job.job_id}") + job_queue.task_done() - logger.debug("Closing the worker thread") + logger.info("Closing the worker thread") worker_thread = None def start_worker_thread(): global worker_thread if worker_thread is None or not worker_thread.is_alive(): - logger.debug("Starting the worker thread to process the queue") + logger.info("Starting the worker thread to process the queue") worker_thread = Thread(target=process_queue, daemon=True) worker_thread.start() -def add_sentence_to_queue(sentence): - sentence_queue.put(sentence) +def add_job_to_queue(job): + job_queue.put(job) start_worker_thread() \ No newline at end of file diff --git a/api/internal_services/database.py b/api/internal_services/database.py index 9f65482..7332a26 100644 --- a/api/internal_services/database.py +++ b/api/internal_services/database.py @@ -2,15 +2,28 @@ from tinydb import TinyDB, Query, where db = TinyDB('db.json') -def getLastIndex(): - result = db.search(where('key') == 'last_index') +def get_last_sentence_index(): + result = db.search(where('key') == 'last_sentence_index') if not result: - created_object = {'key': 'last_index', 'value': 0} + created_object = {'key': 'last_sentence_index', 'value': 0} db.insert(created_object) return 0 else: return result[0]['value'] -def updateLastIndex(value): - db.update({'value': value}, where('key') == 'last_index') +def update_last_sentence_index(value): + db.update({'value': value}, where('key') == 'last_sentence_index') + return value + +def get_last_concept_index(): + result = db.search(where('key') == 'last_concept_index') + if not result: + created_object = {'key': 'last_concept_index', 'value': 0} + db.insert(created_object) + return 0 + else: + return result[0]['value'] + +def update_last_concept_index(value): + db.update({'value': value}, where('key') == 'last_concept_index') return value \ No newline at end of file diff --git a/api/internal_services/gpt.py b/api/internal_services/gpt.py new file mode 100644 index 0000000..a457c38 --- /dev/null +++ b/api/internal_services/gpt.py @@ -0,0 +1,66 @@ +import json +import os +import traceback + +from openai import OpenAI +from api.internal_services.logger import logger + +client = OpenAI() + +def gpt_process(sentence): + try: + completion = client.chat.completions.create( + model="gpt-4", + messages=[ + {"role": "system", "content": get_pre_prompt()}, + {"role": "user", "content": sentence} + ] + ) + jsonOutput = json.loads(completion.choices[0].message.content) + + key_mappings = { + "Action": "action", + "Acteur": "actor", + "Objet": "artifact", + "Condition": "condition", + "Lieu": "location", + "Modalité": "modality", + "Référence": "reference", + "Temps": "time" + } + + jsonOutput = {key_mappings.get(k, k): v for k, v in jsonOutput.items()} + return jsonOutput + + except Exception as e: + logger.error(f"Error during GPT process with the sentence : {sentence} | {e}") + +def get_pre_prompt(): + return """ + Tu es un expert en NLP spécialisé dans l'extraction d'entités dans des phrases. + Ces entités comprennent l'Action, l'Acteur, l'Objet, la Condition, le Lieu, la Modalité, la Référence et le Temps. Ces concepts prennent les définitions suivantes : + * Action : le fait de faire quelque chose + * Acteur : une entité qui a la capacité d'agir + * Objet : élément physique fabriqué par l'homme et impliqué dans une action + * Condition : une contrainte énonçant les propriétés qui doivent être respectées + * Lieu : endroit où une action est effectuée + * Modalité : un verbe indiquant la modalité de l'action (par exemple : peut, doit, etc) + * Référence : mention d'autres dispositions légales ou textes juridiques affectant la disposition actuelle + * Temps : le moment ou la durée associée à la réalisation d'une action + Un élément de la phrase initiale peut posséder plusieurs classifications, tu dois extraire toutes les classifications possibles. + Lors de l'analyse de textes, tes réponses doivent être formatées sous forme de JSON, listant les concepts identifiés sans élaboration ni justification. + Le JSON sera de la forme suivante : + { + "Action": [], + "Acteur": [], + "Objet": [], + "Condition": [], + "Lieu": [], + "Modalité": [], + "Référence": [], + "Temps": [] + } + Par exemple, avec la phrase suivante : "Le propriétaire ou détenteur d ' un véhicule routier qui trouve mal fondée une décision relative à la réception ou l ' immatriculation de son véhicule peut déférer celle-ci au ministre qui , après avoir demandé la position de la SNCA , confirme ou réforme celle-ci dans les deux mois à compter de l ' introduction du recours accompagné de toutes les pièces et informations utiles ." + Vous devez obtenir : {"action": ["déférer celle-ci au ministre"], "actor": ["Le propriétaire ou détenteur d ' un véhicule routier", "ministre"], "condition": ["accompagné de toutes les pièces et informations utiles", "qui , après avoir demandé la position de la SNCA , confirme ou réforme celle-ci dans les deux mois à compter de l ' introduction du recours accompagné de toutes les pièces et informations utiles .", "qui trouve mal fondée une décision relative à la réception ou l ' immatriculation de son véhicule", "relative à la réception ou l ' immatriculation de son véhicule"], "modality": ["peut"], "time": ["après avoir demandé la position de la SNCA", "dans les deux mois à compter de l ' introduction du recours"]} + Vous ne donnerez que le JSON comme réponse, aucune justification ou explication n'est accepté. De plus, vous ne devez pas reformuler les éléments extraits, ils doivent être identiques au mot près ! + """ diff --git a/api/internal_services/neo4j.py b/api/internal_services/neo4j.py index 0290491..b691be9 100644 --- a/api/internal_services/neo4j.py +++ b/api/internal_services/neo4j.py @@ -1,4 +1,6 @@ from neo4j import GraphDatabase +from api.internal_services.logger import logger + uri = "bolt://localhost:7687" # Modifier l'URI en fonction de votre configuration username = "neo4j" @@ -8,7 +10,7 @@ password = "password" driver = GraphDatabase.driver(uri, auth=(username, password)) -def createWordNode(tx, id, text, lemma, pos, root): +def create_word_node(tx, id, text, lemma, pos, root): tx.run(''' CREATE ( n:Word { @@ -22,7 +24,7 @@ def createWordNode(tx, id, text, lemma, pos, root): id=id, text=text, lemma=lemma, pos=pos, root=root) -def createConstituentNode(tx, id, type): +def create_constituent_node(tx, id, type): tx.run(''' CREATE ( n:Constituent { @@ -33,18 +35,19 @@ def createConstituentNode(tx, id, type): id=id, type=type) -def createConceptNode(tx, concept, id): +def create_concept_node(tx, concept, id, origin): tx.run(''' CREATE ( n:Concept { type: $concept, - id: $id + id: $id, + origin: $origin } )''', - concept=concept, id=id) + concept=concept, id=id, origin=origin) -def createNextWordRelation(tx, idFrom, idTo): +def create_next_word_relation(tx, idFrom, idTo): tx.run(''' MATCH (a:Word), @@ -56,7 +59,7 @@ def createNextWordRelation(tx, idFrom, idTo): ) -def createDeprelRelation(tx, idFrom, idTo, type): +def create_deprel_relation(tx, idFrom, idTo, type): tx.run(''' MATCH (a:Word), @@ -68,7 +71,7 @@ def createDeprelRelation(tx, idFrom, idTo, type): ) -def createConceptRelation(tx, idFrom, idTo): +def create_concept_relation(tx, idFrom, idTo): tx.run(''' MATCH (a:Concept), @@ -79,7 +82,8 @@ def createConceptRelation(tx, idFrom, idTo): idFrom=idFrom, idTo=idTo ) -def createConstituentRelation(tx, idFrom, idTo): + +def create_constituent_relation(tx, idFrom, idTo): tx.run(''' MATCH (a:Constituent), @@ -90,7 +94,8 @@ def createConstituentRelation(tx, idFrom, idTo): idFrom=idFrom, idTo=idTo ) -def createRelation(tx, idFrom, idTo, id, type): + +def create_relation(tx, idFrom, idTo, id, type): tx.run(''' MATCH (a:Concept), @@ -99,4 +104,226 @@ def createRelation(tx, idFrom, idTo, id, type): CREATE (a)-[r:RELATION {id: $id, type: $type}]->(b) ''', idFrom=idFrom, idTo=idTo, id=id, type=type - ) \ No newline at end of file + ) + + +def get_filtered_annotation(sentence_id, concept, annotation): + annotation = annotation.strip().lower() + + if " " in annotation: + from api.internal_services.spacy import simple_parsing + annotation = simple_parsing(annotation) + words_ids = get_id_multi_tokens(annotation, sentence_id) + else: + words_ids = get_id_single_tokens(annotation, sentence_id) + + if len(words_ids) == 0: + logger.warn(f"Cannot find the following annotation '{annotation}' in the sentence id {sentence_id}. This error is a hallucination of large language models.") + return set() + + filtered_annotation = set() + if concept == "action": + filtered_annotation = action(words_ids) + elif concept == "actor": + filtered_annotation = actor(words_ids) + elif concept == "artifact": + filtered_annotation = artifact(words_ids) + elif concept == "condition": + filtered_annotation = condition(words_ids, sentence_id) + elif concept == "location": + filtered_annotation = location(words_ids) + elif concept == "modality": + filtered_annotation = modality(words_ids) + elif concept == "time": + filtered_annotation = time(words_ids) + elif concept == "reference": + filtered_annotation = reference(words_ids) + + return filtered_annotation + + +def get_id_multi_tokens(annotation, sentence_id): + annotation = annotation.split(" ") + list_multi_token = driver.execute_query( + ''' + WITH $array AS words + MATCH path = (start:Word)-[:NEXT*]->(end:Word) + where size(words) - 1 = size(relationships(path)) + and start.id starts with $sentence_id + and all( + idx IN range(0, size(words)-2) + WHERE (toLower(words[idx]) = toLower((nodes(path)[idx]).text) + AND toLower(words[idx+1]) = toLower((nodes(path)[idx + 1]).text)) + ) + and toLower(start.text) = words[0] + and toLower(end.text) = words[size(words) - 1] + with nodes(path) as result + unwind result as results + return collect(results.id) as liste + ''', + array=annotation, + sentence_id=f"{sentence_id}." + ) + return list_multi_token.records[0][0] + + +def get_id_single_tokens(annotation, sentence_id): + list_single_token = driver.execute_query( + ''' + match (w:Word) + where toLower(w.text) = $annotation + and w.id starts with $sentence_id + with distinct w as results + return collect(results.id) as liste + ''', + annotation=annotation, + sentence_id=f"{sentence_id}." + ) + return list_single_token.records[0][0] + + +def action(words_ids): + nodes = driver.execute_query( + ''' + match (c:Constituent)-[:CONSREL]->(w:Word) + where c.type in ["VN", "VPinf", "VPpart"] + and w.id in $array + with c as constituent + match (constituent)-[:CONSREL*..]->(w:Word) + return distinct w.id + ''', + array=list(words_ids) + ) + return set([record[0] for record in nodes.records]) + + +def actor(words_ids): + nodes = driver.execute_query( + ''' + match (c:Constituent)-[:CONSREL]->(w:Word) + where c.type in ["NP"] + and w.id in $array + with c as constituent + match (constituent)-[:CONSREL*..]->(w:Word) + return distinct w.id + ''', + array=list(words_ids) + ) + return set([record[0] for record in nodes.records]) + + +def artifact(words_ids): + nodes = driver.execute_query( + ''' + match (c:Constituent)-[:CONSREL]->(w:Word) + where c.type in ["NP"] + and w.id in $array + with c as constituent + match (constituent)-[:CONSREL*..]->(w:Word) + return distinct w.id + ''', + array=list(words_ids) + ) + return set([record[0] for record in nodes.records]) + + +def condition(words_ids, sentence_id): + sentence_id = str(sentence_id) + "." + nodes = driver.execute_query( + ''' + match (c:Constituent)-[:CONSREL*..]->(w:Word) + where w.id in $array + and c.type in ["Srel", "PP"] + with c as constituent + match (constituent)-[:CONSREL*..]->(w:Word) + return distinct w.id + + UNION + + match (c:Constituent)-[:CONSREL]->(w:Word) + where w.id in $array + and c.type in ["Ssub"] + with c as constituent + match (constituent)-[:CONSREL]->(w:Word) + return distinct w.id + ''', + array=list(words_ids), + sentence_id=sentence_id + ) + return set([record[0] for record in nodes.records]) + + +def location(words_ids): + nodes = driver.execute_query( + ''' + match (c:Constituent)-[:CONSREL]->(w:Word) + where c.type in ["NP"] + and w.id in $array + with c as constituent + match (constituent)-[:CONSREL*..]->(w:Word) + return distinct w.id + ''', + array=list(words_ids) + ) + return set([record[0] for record in nodes.records]) + + +def modality(words_ids): + nodes = driver.execute_query( + ''' + match (c:Constituent)-[:CONSREL]->(w:Word) + where c.type in ["VN"] + and w.id in $array + with c as constituent + match (constituent)-[:CONSREL*..]->(w:Word) + return distinct w.id + + UNION + + match (c:Constituent)-[:CONSREL]->(w:Word) + where c.type = "SENT" + and w.id in $array + return distinct w.id + ''', + array=list(words_ids) + ) + return set([record[0] for record in nodes.records]) + + +def time(words_ids): + nodes = driver.execute_query( + ''' + match (c:Constituent)-[:CONSREL]->(w:Word) + where c.type = "NP" + and w.id in $array + with c as constituent + match (constituent)-[:CONSREL*..]->(w:Word) + return distinct w.id + + UNION + + match (c1:Constituent {type: "PP"})-[:CONSREL]->(c2:Constituent {type: "P+"})-[:CONSREL]->(w:Word) + match (c2)<-[:CONSREL]-(:Constituent)-[:CONSREL]->(:Constituent {type: "NP"}) + where w.id in $array + with c1 as c1 + match (c1)-[:CONSREL*..]->(w:Word) + return distinct w.id + ''', + array=list(words_ids) + ) + return set([record[0] for record in nodes.records]) + + +def reference(words_ids): + nodes = driver.execute_query( + ''' + match (c:Constituent)-[:CONSREL]->(w:Word) + where c.type in ["NP", "PP"] + and w.id in $array + with c as constituent + match (constituent)-[:CONSREL*..]->(w:Word) + return distinct w.id + ''', + array=list(words_ids) + ) + return set([record[0] for record in nodes.records]) diff --git a/api/internal_services/spacy.py b/api/internal_services/spacy.py index 5e3da85..43f9b64 100644 --- a/api/internal_services/spacy.py +++ b/api/internal_services/spacy.py @@ -1,46 +1,46 @@ import benepar, spacy import warnings + +from api.internal_services.background_worker import add_job_to_queue +from api.models.Job import Job, JobType + warnings.filterwarnings("ignore") -from api.internal_services.database import getLastIndex, updateLastIndex -from api.internal_services.logger import logger -from api.internal_services.neo4j import createConstituentNode, driver, createConstituentRelation, createWordNode, \ - createNextWordRelation, createConceptRelation, createDeprelRelation +from api.internal_services.database import get_last_sentence_index, update_last_sentence_index +from api.internal_services.neo4j import create_constituent_node, driver, create_constituent_relation, create_word_node, \ + create_next_word_relation, create_deprel_relation benepar.download('benepar_fr2') nlp = spacy.load('fr_dep_news_trf') nlp.add_pipe('benepar', config={'model': 'benepar_fr2'}) -def spacy_process(sentence): +def parsing_and_load_in_neo4j(job): + sentence = job.job_data['sentence'] + last_index = get_last_sentence_index() + last_index = update_last_sentence_index(last_index + 1) + with (driver.session() as session): doc = nlp(sentence) - lastIndex = getLastIndex() for i, sentence in enumerate(doc.sents): - lastIndex = updateLastIndex(lastIndex + 1) - #constituentDone = set() - logger.debug(sentence._.parse_string) for constituent in sentence._.constituents: - constituentId = f"{lastIndex}.{i}.{constituent.start}-{constituent.end}" - logger.debug(f"Processing constituent : {constituentId} - {constituent._.labels}") - - logger.debug(f"{constituent._.labels} and {constituent.root.text != constituent.text}") + constituent_id = f"{last_index}.{i}.{constituent.start}-{constituent.end}" if constituent._.labels and constituent.root.text != constituent.text: # Créer le consituant session.execute_write( - createConstituentNode, - f"{lastIndex}.{i}.{constituent.start}-{constituent.end}", + create_constituent_node, + f"{last_index}.{i}.{constituent.start}-{constituent.end}", constituent._.labels[0] ) if constituent._.parent is not None: # parent existe alors on crée le lien session.execute_write( - createConstituentRelation, - f"{lastIndex}.{i}.{constituent._.parent.start}-{constituent._.parent.end}", - constituentId + create_constituent_relation, + f"{last_index}.{i}.{constituent._.parent.start}-{constituent._.parent.end}", + constituent_id ) else: @@ -48,61 +48,74 @@ def spacy_process(sentence): if constituent._.labels: # Créer le consituant session.execute_write( - createConstituentNode, - f"{lastIndex}.{i}.{constituent.start}-{constituent.end}", + create_constituent_node, + f"{last_index}.{i}.{constituent.start}-{constituent.end}", constituent._.labels[0] ) #Création du mot en noeud neo4j session.execute_write( - createWordNode, - '.'.join(map(str, [lastIndex, i, constituent.root.i])), + create_word_node, + '.'.join(map(str, [last_index, i, constituent.root.i])), constituent.text, None if not hasattr(constituent, 'lemma_') else constituent.lemma_, constituent.root.pos_, True if constituent.root.dep_ == "root" else False ) - logger.debug(f"Creating word : {constituent.text}") session.execute_write( - createConstituentRelation, - f"{lastIndex}.{i}.{constituent.start}-{constituent.end}", - '.'.join(map(str, [lastIndex, i, constituent.root.i])), + create_constituent_relation, + f"{last_index}.{i}.{constituent.start}-{constituent.end}", + '.'.join(map(str, [last_index, i, constituent.root.i])), ) session.execute_write( - createConstituentRelation, - f"{lastIndex}.{i}.{constituent._.parent.start}-{constituent._.parent.end}", - f"{lastIndex}.{i}.{constituent.start}-{constituent.end}", + create_constituent_relation, + f"{last_index}.{i}.{constituent._.parent.start}-{constituent._.parent.end}", + f"{last_index}.{i}.{constituent.start}-{constituent.end}", ) else: #Création du mot en noeud neo4j session.execute_write( - createWordNode, - '.'.join(map(str, [lastIndex, i, constituent.root.i])), + create_word_node, + '.'.join(map(str, [last_index, i, constituent.root.i])), constituent.text, None if not hasattr(constituent, 'lemma_') else constituent.lemma_, constituent.root.pos_, True if constituent.root.dep_ == "root" else False ) - logger.debug(f"Creating word : {constituent.text}") # parent existe alors on crée le lien session.execute_write( - createConstituentRelation, - f"{lastIndex}.{i}.{constituent._.parent.start}-{constituent._.parent.end}", - '.'.join(map(str, [lastIndex, i, constituent.root.i])), + create_constituent_relation, + f"{last_index}.{i}.{constituent._.parent.start}-{constituent._.parent.end}", + '.'.join(map(str, [last_index, i, constituent.root.i])), ) for token in sentence: #Création d'un lien de succession if token.i != 0: - idFrom = '.'.join(map(str, [lastIndex, i, token.i - 1])) - idTo = '.'.join(map(str, [lastIndex, i, token.i])) - session.execute_write(createNextWordRelation, idFrom, idTo) + idFrom = '.'.join(map(str, [last_index, i, token.i - 1])) + idTo = '.'.join(map(str, [last_index, i, token.i])) + session.execute_write(create_next_word_relation, idFrom, idTo) #dépendances syntaxiques - idFrom = '.'.join(map(str, [lastIndex, i, token.head.i])) - idTo = '.'.join(map(str, [lastIndex, i, token.i])) - session.execute_write(createDeprelRelation, idFrom, idTo, token.dep_) + idFrom = '.'.join(map(str, [last_index, i, token.head.i])) + idTo = '.'.join(map(str, [last_index, i, token.i])) + session.execute_write(create_deprel_relation, idFrom, idTo, token.dep_) + + new_job = Job() + new_job.job_id = job.job_id + new_job.job_type = JobType.ANNOTATION + new_job.job_data = {'sentence': sentence, 'sentence_id': last_index} + + add_job_to_queue(new_job) + +def simple_parsing(sentence): + doc = nlp(sentence) + output = [] + for i, sentence in enumerate(doc.sents): + for token in sentence: + output.append(token.text) + return ' '.join(output) diff --git a/api/main.py b/api/main.py index 0003258..f3addaa 100644 --- a/api/main.py +++ b/api/main.py @@ -1,5 +1,6 @@ # > fastapi dev main.py # > uvicorn api.main:app --reload +import uvicorn from fastapi import FastAPI from .routers import pipeline_endpoint @@ -9,4 +10,7 @@ app.include_router(pipeline_endpoint.router) @app.get("/") async def root(): - return {"message": "ALA plateform is running !"} \ No newline at end of file + return {"message": "ALA plateform is running !"} + +if __name__ == "__main__": + uvicorn.run(app, host="0.0.0.0", port=8000) \ No newline at end of file diff --git a/api/models/Job.py b/api/models/Job.py new file mode 100644 index 0000000..48c477c --- /dev/null +++ b/api/models/Job.py @@ -0,0 +1,11 @@ +from enum import Enum + + +class JobType(Enum): + SENTENCE_PARCING = 1 + ANNOTATION = 2 + +class Job(): + job_id: str + job_type: JobType + job_data: {} \ No newline at end of file diff --git a/api/routers/pipeline_endpoint.py b/api/routers/pipeline_endpoint.py index c2310e4..4ec99dc 100644 --- a/api/routers/pipeline_endpoint.py +++ b/api/routers/pipeline_endpoint.py @@ -1,7 +1,10 @@ from pydantic import BaseModel from fastapi import APIRouter -from api.internal_services.background_worker import add_sentence_to_queue +from api.internal_services.background_worker import add_job_to_queue from api.internal_services.logger import logger +import uuid + +from api.models.Job import Job, JobType router = APIRouter() @@ -11,6 +14,14 @@ class Sentence(BaseModel): @router.post("/sentences") def add_sentence_to_process(sentence: Sentence): - logger.debug(f"New sentence added to queue : {sentence.sentence}") - add_sentence_to_queue(sentence.sentence) - return {"message": "Sentence added to the queue for processing."} \ No newline at end of file + logger.info(f"New sentence added to queue : {sentence.sentence}") + new_job = Job() + new_job.job_id = uuid.uuid4() + new_job.job_type = JobType.SENTENCE_PARCING + new_job.job_data = {'sentence': sentence.sentence} + add_job_to_queue(new_job) + return {"message": "Job added to the queue for processing.", "job_id": new_job.job_id} + +@router.post("/actions/training") +def add_sentence_to_process(): + logger.info(f"Training process triggered") \ No newline at end of file -- GitLab