From b60892d3b35b47993ffbccf7cfe9fd027c4d1fe7 Mon Sep 17 00:00:00 2001
From: "Julien B." <xm9q8f80@jlnbrtn.me>
Date: Wed, 21 Aug 2024 08:37:58 +0200
Subject: [PATCH] feat(internal_services): add gpt annotator and spacy parser

---
 .gitignore                                 |   2 +-
 api/internal_services/annotator.py         |  55 +++++
 api/internal_services/background_worker.py |  34 +--
 api/internal_services/database.py          |  23 +-
 api/internal_services/gpt.py               |  66 ++++++
 api/internal_services/neo4j.py             | 249 ++++++++++++++++++++-
 api/internal_services/spacy.py             |  95 ++++----
 api/main.py                                |   6 +-
 api/models/Job.py                          |  11 +
 api/routers/pipeline_endpoint.py           |  19 +-
 10 files changed, 483 insertions(+), 77 deletions(-)
 create mode 100644 api/internal_services/annotator.py
 create mode 100644 api/internal_services/gpt.py
 create mode 100644 api/models/Job.py

diff --git a/.gitignore b/.gitignore
index b92f26a..1f70b93 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,3 @@
-.env
+api/.env
 db.json
 .idea
\ No newline at end of file
diff --git a/api/internal_services/annotator.py b/api/internal_services/annotator.py
new file mode 100644
index 0000000..ec9eaf4
--- /dev/null
+++ b/api/internal_services/annotator.py
@@ -0,0 +1,55 @@
+from api.internal_services import neo4j
+from api.internal_services.database import update_last_concept_index, get_last_concept_index
+from api.internal_services.gpt import gpt_process
+from api.internal_services.logger import logger
+
+is_using_GPT = True
+
+
+def annotation_process(job):
+    if is_using_GPT:
+        from api.internal_services.neo4j import driver, create_concept_node, create_concept_relation
+        with (driver.session() as session):
+            gpt_annotation = gpt_process(job.job_data['sentence'].text)
+            for concept, annotations in gpt_annotation.items():
+                filtered_annotation = set()
+                for annotation in annotations:
+                    filtered_annotation = filtered_annotation | neo4j.get_filtered_annotation(job.job_data['sentence_id'], concept, annotation)
+                if filtered_annotation:
+                    for interval in separate_intervals(filtered_annotation):
+                        session.execute_write(
+                            create_concept_node,
+                            concept,
+                            update_last_concept_index(get_last_concept_index() + 1),
+                            "GPT"
+                        )
+
+                        for word_id in interval:
+                            session.execute_write(
+                                create_concept_relation,
+                                get_last_concept_index(),
+                                word_id
+                            )
+
+    else:
+        gpt_annotation = {}
+
+def separate_intervals(data):
+    sorted_data = sorted(list(data), key=lambda x: int(x.split('.')[-1]))
+
+    separated_intervals = []
+    current_interval = [sorted_data[0]]
+
+    for i in range(1, len(sorted_data)):
+        current_value = int(sorted_data[i].split('.')[-1])
+        previous_value = int(current_interval[-1].split('.')[-1])
+
+        if current_value == previous_value + 1:
+            current_interval.append(sorted_data[i])
+        else:
+            separated_intervals.append(current_interval)
+            current_interval = [sorted_data[i]]
+
+    separated_intervals.append(current_interval)
+
+    return separated_intervals
\ No newline at end of file
diff --git a/api/internal_services/background_worker.py b/api/internal_services/background_worker.py
index 0f08b77..39e5e20 100644
--- a/api/internal_services/background_worker.py
+++ b/api/internal_services/background_worker.py
@@ -1,36 +1,42 @@
 from queue import Queue
 from threading import Thread
+
 from api.internal_services.logger import logger
-from api.internal_services.spacy import spacy_process
+from api.models.Job import JobType
 
-sentence_queue = Queue()
+job_queue = Queue()
 worker_thread = None
 
-
 def process_queue():
     global worker_thread
-    while sentence_queue.qsize() != 0:
-        sentence = sentence_queue.get()
-        logger.debug(f"Processing the sentence : {sentence}")
-        if sentence is None:
+    while job_queue.qsize() != 0:
+        job = job_queue.get()
+        logger.info(f"Processing the job {job.job_id}")
+        if job is None:
             break
 
-        # all process
-        spacy_process(sentence)
+        match job.job_type:
+            case JobType.SENTENCE_PARCING:
+                from api.internal_services.spacy import parsing_and_load_in_neo4j
+                parsing_and_load_in_neo4j(job)
+            case JobType.ANNOTATION:
+                from api.internal_services.annotator import annotation_process
+                annotation_process(job)
 
-        sentence_queue.task_done()
+        logger.info(f"Ending of the job {job.job_id}")
+        job_queue.task_done()
 
-    logger.debug("Closing the worker thread")
+    logger.info("Closing the worker thread")
     worker_thread = None
 
 
 def start_worker_thread():
     global worker_thread
     if worker_thread is None or not worker_thread.is_alive():
-        logger.debug("Starting the worker thread to process the queue")
+        logger.info("Starting the worker thread to process the queue")
         worker_thread = Thread(target=process_queue, daemon=True)
         worker_thread.start()
 
-def add_sentence_to_queue(sentence):
-    sentence_queue.put(sentence)
+def add_job_to_queue(job):
+    job_queue.put(job)
     start_worker_thread()
\ No newline at end of file
diff --git a/api/internal_services/database.py b/api/internal_services/database.py
index 9f65482..7332a26 100644
--- a/api/internal_services/database.py
+++ b/api/internal_services/database.py
@@ -2,15 +2,28 @@ from tinydb import TinyDB, Query, where
 
 db = TinyDB('db.json')
 
-def getLastIndex():
-    result = db.search(where('key') == 'last_index')
+def get_last_sentence_index():
+    result = db.search(where('key') == 'last_sentence_index')
     if not result:
-        created_object = {'key': 'last_index', 'value': 0}
+        created_object = {'key': 'last_sentence_index', 'value': 0}
         db.insert(created_object)
         return 0
     else:
         return result[0]['value']
 
-def updateLastIndex(value):
-    db.update({'value': value}, where('key') == 'last_index')
+def update_last_sentence_index(value):
+    db.update({'value': value}, where('key') == 'last_sentence_index')
+    return value
+
+def get_last_concept_index():
+    result = db.search(where('key') == 'last_concept_index')
+    if not result:
+        created_object = {'key': 'last_concept_index', 'value': 0}
+        db.insert(created_object)
+        return 0
+    else:
+        return result[0]['value']
+
+def update_last_concept_index(value):
+    db.update({'value': value}, where('key') == 'last_concept_index')
     return value
\ No newline at end of file
diff --git a/api/internal_services/gpt.py b/api/internal_services/gpt.py
new file mode 100644
index 0000000..a457c38
--- /dev/null
+++ b/api/internal_services/gpt.py
@@ -0,0 +1,66 @@
+import json
+import os
+import traceback
+
+from openai import OpenAI
+from api.internal_services.logger import logger
+
+client = OpenAI()
+
+def gpt_process(sentence):
+    try:
+        completion = client.chat.completions.create(
+            model="gpt-4",
+            messages=[
+                {"role": "system", "content": get_pre_prompt()},
+                {"role": "user", "content": sentence}
+            ]
+        )
+        jsonOutput = json.loads(completion.choices[0].message.content)
+
+        key_mappings = {
+            "Action": "action",
+            "Acteur": "actor",
+            "Objet": "artifact",
+            "Condition": "condition",
+            "Lieu": "location",
+            "Modalité": "modality",
+            "Référence": "reference",
+            "Temps": "time"
+        }
+
+        jsonOutput = {key_mappings.get(k, k): v for k, v in jsonOutput.items()}
+        return jsonOutput
+
+    except Exception as e:
+        logger.error(f"Error during GPT process with the sentence : {sentence} | {e}")
+
+def get_pre_prompt():
+    return """
+    Tu es un expert en NLP spécialisé dans l'extraction d'entités dans des phrases. 
+    Ces entités comprennent l'Action, l'Acteur, l'Objet, la Condition, le Lieu, la Modalité, la Référence et le Temps. Ces concepts prennent les définitions suivantes : 
+    * Action : le fait de faire quelque chose
+    * Acteur : une entité qui a la capacité d'agir
+    * Objet : élément physique fabriqué par l'homme et impliqué dans une action
+    * Condition : une contrainte énonçant les propriétés qui doivent être respectées
+    * Lieu : endroit où une action est effectuée
+    * Modalité : un verbe indiquant la modalité de l'action (par exemple : peut, doit, etc)
+    * Référence : mention d'autres dispositions légales ou textes juridiques affectant la disposition actuelle
+    * Temps : le moment ou la durée associée à la réalisation d'une action
+    Un élément de la phrase initiale peut posséder plusieurs classifications, tu dois extraire toutes les classifications possibles.
+    Lors de l'analyse de textes, tes réponses doivent être formatées sous forme de JSON, listant les concepts identifiés sans élaboration ni justification.
+    Le JSON sera de la forme suivante : 
+    {
+    "Action": [],
+    "Acteur": [],
+    "Objet": [],
+    "Condition": [],
+    "Lieu": [],
+    "Modalité": [],
+    "Référence": [],
+    "Temps": []
+    }
+    Par exemple, avec la phrase suivante : "Le propriétaire ou détenteur d ' un véhicule routier qui trouve mal fondée une décision relative à la réception ou l ' immatriculation de son véhicule peut déférer celle-ci au ministre qui , après avoir demandé la position de la SNCA , confirme ou réforme celle-ci dans les deux mois à compter de l ' introduction du recours accompagné de toutes les pièces et informations utiles ."
+    Vous devez obtenir : {"action": ["déférer celle-ci au ministre"], "actor": ["Le propriétaire ou détenteur d ' un véhicule routier", "ministre"], "condition": ["accompagné de toutes les pièces et informations utiles", "qui , après avoir demandé la position de la SNCA , confirme ou réforme celle-ci dans les deux mois à compter de l ' introduction du recours accompagné de toutes les pièces et informations utiles .", "qui trouve mal fondée une décision relative à la réception ou l ' immatriculation de son véhicule", "relative à la réception ou l ' immatriculation de son véhicule"], "modality": ["peut"], "time": ["après avoir demandé la position de la SNCA", "dans les deux mois à compter de l ' introduction du recours"]}
+    Vous ne donnerez que le JSON comme réponse, aucune justification ou explication n'est accepté. De plus, vous ne devez pas reformuler les éléments extraits, ils doivent être identiques au mot près !
+    """
diff --git a/api/internal_services/neo4j.py b/api/internal_services/neo4j.py
index 0290491..b691be9 100644
--- a/api/internal_services/neo4j.py
+++ b/api/internal_services/neo4j.py
@@ -1,4 +1,6 @@
 from neo4j import GraphDatabase
+from api.internal_services.logger import logger
+
 
 uri = "bolt://localhost:7687"  # Modifier l'URI en fonction de votre configuration
 username = "neo4j"
@@ -8,7 +10,7 @@ password = "password"
 driver = GraphDatabase.driver(uri, auth=(username, password))
 
 
-def createWordNode(tx, id, text, lemma, pos, root):
+def create_word_node(tx, id, text, lemma, pos, root):
     tx.run('''
             CREATE (
                 n:Word {
@@ -22,7 +24,7 @@ def createWordNode(tx, id, text, lemma, pos, root):
            id=id, text=text, lemma=lemma, pos=pos, root=root)
 
 
-def createConstituentNode(tx, id, type):
+def create_constituent_node(tx, id, type):
     tx.run('''
             CREATE (
                 n:Constituent {
@@ -33,18 +35,19 @@ def createConstituentNode(tx, id, type):
            id=id, type=type)
 
 
-def createConceptNode(tx, concept, id):
+def create_concept_node(tx, concept, id, origin):
     tx.run('''
             CREATE (
                 n:Concept {
                     type: $concept,
-                    id: $id
+                    id: $id,
+                    origin: $origin
                 }
             )''',
-           concept=concept, id=id)
+           concept=concept, id=id, origin=origin)
 
 
-def createNextWordRelation(tx, idFrom, idTo):
+def create_next_word_relation(tx, idFrom, idTo):
     tx.run('''
                 MATCH
                 (a:Word),
@@ -56,7 +59,7 @@ def createNextWordRelation(tx, idFrom, idTo):
            )
 
 
-def createDeprelRelation(tx, idFrom, idTo, type):
+def create_deprel_relation(tx, idFrom, idTo, type):
     tx.run('''
                 MATCH
                 (a:Word),
@@ -68,7 +71,7 @@ def createDeprelRelation(tx, idFrom, idTo, type):
            )
 
 
-def createConceptRelation(tx, idFrom, idTo):
+def create_concept_relation(tx, idFrom, idTo):
     tx.run('''
                 MATCH
                 (a:Concept),
@@ -79,7 +82,8 @@ def createConceptRelation(tx, idFrom, idTo):
            idFrom=idFrom, idTo=idTo
            )
 
-def createConstituentRelation(tx, idFrom, idTo):
+
+def create_constituent_relation(tx, idFrom, idTo):
     tx.run('''
                 MATCH
                 (a:Constituent),
@@ -90,7 +94,8 @@ def createConstituentRelation(tx, idFrom, idTo):
            idFrom=idFrom, idTo=idTo
            )
 
-def createRelation(tx, idFrom, idTo, id, type):
+
+def create_relation(tx, idFrom, idTo, id, type):
     tx.run('''
                 MATCH
                 (a:Concept),
@@ -99,4 +104,226 @@ def createRelation(tx, idFrom, idTo, id, type):
                 CREATE (a)-[r:RELATION {id: $id, type: $type}]->(b)
            ''',
            idFrom=idFrom, idTo=idTo, id=id, type=type
-           )
\ No newline at end of file
+           )
+
+
+def get_filtered_annotation(sentence_id, concept, annotation):
+    annotation = annotation.strip().lower()
+
+    if " " in annotation:
+        from api.internal_services.spacy import simple_parsing
+        annotation = simple_parsing(annotation)
+        words_ids = get_id_multi_tokens(annotation, sentence_id)
+    else:
+        words_ids = get_id_single_tokens(annotation, sentence_id)
+
+    if len(words_ids) == 0:
+        logger.warn(f"Cannot find the following annotation '{annotation}' in the sentence id {sentence_id}. This error is a hallucination of large language models.")
+        return set()
+
+    filtered_annotation = set()
+    if concept == "action":
+        filtered_annotation = action(words_ids)
+    elif concept == "actor":
+        filtered_annotation = actor(words_ids)
+    elif concept == "artifact":
+        filtered_annotation = artifact(words_ids)
+    elif concept == "condition":
+        filtered_annotation = condition(words_ids, sentence_id)
+    elif concept == "location":
+        filtered_annotation = location(words_ids)
+    elif concept == "modality":
+        filtered_annotation = modality(words_ids)
+    elif concept == "time":
+        filtered_annotation = time(words_ids)
+    elif concept == "reference":
+        filtered_annotation = reference(words_ids)
+
+    return filtered_annotation
+
+
+def get_id_multi_tokens(annotation, sentence_id):
+    annotation = annotation.split(" ")
+    list_multi_token = driver.execute_query(
+        '''
+        WITH $array AS words
+        MATCH path = (start:Word)-[:NEXT*]->(end:Word)
+        where size(words) - 1 = size(relationships(path))
+        and start.id starts with $sentence_id
+        and all(
+            idx IN range(0, size(words)-2)
+            WHERE (toLower(words[idx]) = toLower((nodes(path)[idx]).text)
+            AND toLower(words[idx+1]) = toLower((nodes(path)[idx + 1]).text))
+        )
+        and toLower(start.text) = words[0]
+        and toLower(end.text) = words[size(words) - 1]
+        with nodes(path) as result
+        unwind result as results
+        return collect(results.id) as liste
+        ''',
+        array=annotation,
+        sentence_id=f"{sentence_id}."
+    )
+    return list_multi_token.records[0][0]
+
+
+def get_id_single_tokens(annotation, sentence_id):
+    list_single_token = driver.execute_query(
+        '''
+        match (w:Word)
+        where toLower(w.text) = $annotation
+        and w.id starts with $sentence_id
+        with distinct w as results
+        return collect(results.id) as liste 
+        ''',
+        annotation=annotation,
+        sentence_id=f"{sentence_id}."
+    )
+    return list_single_token.records[0][0]
+
+
+def action(words_ids):
+    nodes = driver.execute_query(
+        '''
+        match (c:Constituent)-[:CONSREL]->(w:Word)
+        where c.type in ["VN", "VPinf", "VPpart"]
+        and w.id in $array
+        with c as constituent
+        match (constituent)-[:CONSREL*..]->(w:Word)
+        return distinct w.id
+        ''',
+        array=list(words_ids)
+    )
+    return set([record[0] for record in nodes.records])
+
+
+def actor(words_ids):
+    nodes = driver.execute_query(
+        '''
+        match (c:Constituent)-[:CONSREL]->(w:Word)
+        where c.type in ["NP"]
+        and w.id in $array
+        with c as constituent
+        match (constituent)-[:CONSREL*..]->(w:Word)
+        return distinct w.id
+        ''',
+        array=list(words_ids)
+    )
+    return set([record[0] for record in nodes.records])
+
+
+def artifact(words_ids):
+    nodes = driver.execute_query(
+        '''
+        match (c:Constituent)-[:CONSREL]->(w:Word)
+        where c.type in ["NP"]
+        and w.id in $array
+        with c as constituent
+        match (constituent)-[:CONSREL*..]->(w:Word)
+        return distinct w.id
+        ''',
+        array=list(words_ids)
+    )
+    return set([record[0] for record in nodes.records])
+
+
+def condition(words_ids, sentence_id):
+    sentence_id = str(sentence_id) + "."
+    nodes = driver.execute_query(
+        '''
+        match (c:Constituent)-[:CONSREL*..]->(w:Word)
+        where w.id in $array
+        and c.type in ["Srel", "PP"]
+        with c as constituent
+        match (constituent)-[:CONSREL*..]->(w:Word)
+        return distinct w.id
+        
+        UNION
+        
+        match (c:Constituent)-[:CONSREL]->(w:Word)
+        where w.id in $array
+        and c.type in ["Ssub"]
+        with c as constituent
+        match (constituent)-[:CONSREL]->(w:Word)
+        return distinct w.id
+        ''',
+        array=list(words_ids),
+        sentence_id=sentence_id
+    )
+    return set([record[0] for record in nodes.records])
+
+
+def location(words_ids):
+    nodes = driver.execute_query(
+        '''
+        match (c:Constituent)-[:CONSREL]->(w:Word)
+        where c.type in ["NP"]
+        and w.id in $array
+        with c as constituent
+        match (constituent)-[:CONSREL*..]->(w:Word)
+        return distinct w.id
+        ''',
+        array=list(words_ids)
+    )
+    return set([record[0] for record in nodes.records])
+
+
+def modality(words_ids):
+    nodes = driver.execute_query(
+        '''
+        match (c:Constituent)-[:CONSREL]->(w:Word)
+        where c.type in ["VN"]
+        and w.id in $array
+        with c as constituent
+        match (constituent)-[:CONSREL*..]->(w:Word)
+        return distinct w.id
+        
+        UNION
+            
+        match (c:Constituent)-[:CONSREL]->(w:Word)
+        where c.type = "SENT"
+        and w.id in $array
+        return distinct w.id
+        ''',
+        array=list(words_ids)
+    )
+    return set([record[0] for record in nodes.records])
+
+
+def time(words_ids):
+    nodes = driver.execute_query(
+        '''
+        match (c:Constituent)-[:CONSREL]->(w:Word)
+        where c.type = "NP"
+        and w.id in $array
+        with c as constituent
+        match (constituent)-[:CONSREL*..]->(w:Word)
+        return distinct w.id
+        
+        UNION
+        
+        match (c1:Constituent {type: "PP"})-[:CONSREL]->(c2:Constituent {type: "P+"})-[:CONSREL]->(w:Word)
+        match (c2)<-[:CONSREL]-(:Constituent)-[:CONSREL]->(:Constituent {type: "NP"})
+        where w.id in $array 
+        with c1 as c1
+        match (c1)-[:CONSREL*..]->(w:Word)
+        return distinct w.id
+        ''',
+        array=list(words_ids)
+    )
+    return set([record[0] for record in nodes.records])
+
+
+def reference(words_ids):
+    nodes = driver.execute_query(
+        '''
+        match (c:Constituent)-[:CONSREL]->(w:Word)
+        where c.type in ["NP", "PP"]
+        and w.id in $array
+        with c as constituent
+        match (constituent)-[:CONSREL*..]->(w:Word)
+        return distinct w.id
+        ''',
+        array=list(words_ids)
+    )
+    return set([record[0] for record in nodes.records])
diff --git a/api/internal_services/spacy.py b/api/internal_services/spacy.py
index 5e3da85..43f9b64 100644
--- a/api/internal_services/spacy.py
+++ b/api/internal_services/spacy.py
@@ -1,46 +1,46 @@
 import benepar, spacy
 import warnings
+
+from api.internal_services.background_worker import add_job_to_queue
+from api.models.Job import Job, JobType
+
 warnings.filterwarnings("ignore")
 
-from api.internal_services.database import getLastIndex, updateLastIndex
-from api.internal_services.logger import logger
-from api.internal_services.neo4j import createConstituentNode, driver, createConstituentRelation, createWordNode, \
-    createNextWordRelation, createConceptRelation, createDeprelRelation
+from api.internal_services.database import get_last_sentence_index, update_last_sentence_index
+from api.internal_services.neo4j import create_constituent_node, driver, create_constituent_relation, create_word_node, \
+    create_next_word_relation, create_deprel_relation
 
 benepar.download('benepar_fr2')
 nlp = spacy.load('fr_dep_news_trf')
 nlp.add_pipe('benepar', config={'model': 'benepar_fr2'})
 
 
-def spacy_process(sentence):
+def parsing_and_load_in_neo4j(job):
+    sentence = job.job_data['sentence']
+    last_index = get_last_sentence_index()
+    last_index = update_last_sentence_index(last_index + 1)
+
     with (driver.session() as session):
         doc = nlp(sentence)
-        lastIndex = getLastIndex()
 
         for i, sentence in enumerate(doc.sents):
-            lastIndex = updateLastIndex(lastIndex + 1)
-            #constituentDone = set()
-            logger.debug(sentence._.parse_string)
 
             for constituent in sentence._.constituents:
-                constituentId = f"{lastIndex}.{i}.{constituent.start}-{constituent.end}"
-                logger.debug(f"Processing constituent : {constituentId} - {constituent._.labels}")
-
-                logger.debug(f"{constituent._.labels} and {constituent.root.text != constituent.text}")
+                constituent_id = f"{last_index}.{i}.{constituent.start}-{constituent.end}"
                 if constituent._.labels and constituent.root.text != constituent.text:
                     # Créer le consituant
                     session.execute_write(
-                        createConstituentNode,
-                        f"{lastIndex}.{i}.{constituent.start}-{constituent.end}",
+                        create_constituent_node,
+                        f"{last_index}.{i}.{constituent.start}-{constituent.end}",
                         constituent._.labels[0]
                     )
 
                     if constituent._.parent is not None:
                         # parent existe alors on crée le lien
                         session.execute_write(
-                            createConstituentRelation,
-                            f"{lastIndex}.{i}.{constituent._.parent.start}-{constituent._.parent.end}",
-                            constituentId
+                            create_constituent_relation,
+                            f"{last_index}.{i}.{constituent._.parent.start}-{constituent._.parent.end}",
+                            constituent_id
                         )
 
                 else:
@@ -48,61 +48,74 @@ def spacy_process(sentence):
                     if constituent._.labels:
                         # Créer le consituant
                         session.execute_write(
-                            createConstituentNode,
-                            f"{lastIndex}.{i}.{constituent.start}-{constituent.end}",
+                            create_constituent_node,
+                            f"{last_index}.{i}.{constituent.start}-{constituent.end}",
                             constituent._.labels[0]
                         )
 
                         #Création du mot en noeud neo4j
                         session.execute_write(
-                            createWordNode,
-                            '.'.join(map(str, [lastIndex, i, constituent.root.i])),
+                            create_word_node,
+                            '.'.join(map(str, [last_index, i, constituent.root.i])),
                             constituent.text,
                             None if not hasattr(constituent, 'lemma_') else constituent.lemma_,
                             constituent.root.pos_,
                             True if constituent.root.dep_ == "root" else False
                         )
-                        logger.debug(f"Creating word : {constituent.text}")
 
                         session.execute_write(
-                            createConstituentRelation,
-                            f"{lastIndex}.{i}.{constituent.start}-{constituent.end}",
-                            '.'.join(map(str, [lastIndex, i, constituent.root.i])),
+                            create_constituent_relation,
+                            f"{last_index}.{i}.{constituent.start}-{constituent.end}",
+                            '.'.join(map(str, [last_index, i, constituent.root.i])),
                         )
 
                         session.execute_write(
-                            createConstituentRelation,
-                            f"{lastIndex}.{i}.{constituent._.parent.start}-{constituent._.parent.end}",
-                            f"{lastIndex}.{i}.{constituent.start}-{constituent.end}",
+                            create_constituent_relation,
+                            f"{last_index}.{i}.{constituent._.parent.start}-{constituent._.parent.end}",
+                            f"{last_index}.{i}.{constituent.start}-{constituent.end}",
                         )
 
                     else:
                         #Création du mot en noeud neo4j
                         session.execute_write(
-                            createWordNode,
-                            '.'.join(map(str, [lastIndex, i, constituent.root.i])),
+                            create_word_node,
+                            '.'.join(map(str, [last_index, i, constituent.root.i])),
                             constituent.text,
                             None if not hasattr(constituent, 'lemma_') else constituent.lemma_,
                             constituent.root.pos_,
                             True if constituent.root.dep_ == "root" else False
                         )
-                        logger.debug(f"Creating word : {constituent.text}")
 
                         # parent existe alors on crée le lien
                         session.execute_write(
-                            createConstituentRelation,
-                            f"{lastIndex}.{i}.{constituent._.parent.start}-{constituent._.parent.end}",
-                            '.'.join(map(str, [lastIndex, i, constituent.root.i])),
+                            create_constituent_relation,
+                            f"{last_index}.{i}.{constituent._.parent.start}-{constituent._.parent.end}",
+                            '.'.join(map(str, [last_index, i, constituent.root.i])),
                         )
 
             for token in sentence:
                 #Création d'un lien de succession
                 if token.i != 0:
-                    idFrom = '.'.join(map(str, [lastIndex, i, token.i - 1]))
-                    idTo = '.'.join(map(str, [lastIndex, i, token.i]))
-                    session.execute_write(createNextWordRelation, idFrom, idTo)
+                    idFrom = '.'.join(map(str, [last_index, i, token.i - 1]))
+                    idTo = '.'.join(map(str, [last_index, i, token.i]))
+                    session.execute_write(create_next_word_relation, idFrom, idTo)
 
                 #dépendances syntaxiques
-                idFrom = '.'.join(map(str, [lastIndex, i, token.head.i]))
-                idTo = '.'.join(map(str, [lastIndex, i, token.i]))
-                session.execute_write(createDeprelRelation, idFrom, idTo, token.dep_)
+                idFrom = '.'.join(map(str, [last_index, i, token.head.i]))
+                idTo = '.'.join(map(str, [last_index, i, token.i]))
+                session.execute_write(create_deprel_relation, idFrom, idTo, token.dep_)
+
+            new_job = Job()
+            new_job.job_id = job.job_id
+            new_job.job_type = JobType.ANNOTATION
+            new_job.job_data = {'sentence': sentence, 'sentence_id': last_index}
+
+            add_job_to_queue(new_job)
+
+def simple_parsing(sentence):
+    doc = nlp(sentence)
+    output = []
+    for i, sentence in enumerate(doc.sents):
+        for token in sentence:
+            output.append(token.text)
+    return ' '.join(output)
diff --git a/api/main.py b/api/main.py
index 0003258..f3addaa 100644
--- a/api/main.py
+++ b/api/main.py
@@ -1,5 +1,6 @@
 # > fastapi dev main.py
 # > uvicorn api.main:app --reload
+import uvicorn
 from fastapi import FastAPI
 from .routers import pipeline_endpoint
 
@@ -9,4 +10,7 @@ app.include_router(pipeline_endpoint.router)
 
 @app.get("/")
 async def root():
-    return {"message": "ALA plateform is running !"}
\ No newline at end of file
+    return {"message": "ALA plateform is running !"}
+
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=8000)
\ No newline at end of file
diff --git a/api/models/Job.py b/api/models/Job.py
new file mode 100644
index 0000000..48c477c
--- /dev/null
+++ b/api/models/Job.py
@@ -0,0 +1,11 @@
+from enum import Enum
+
+
+class JobType(Enum):
+    SENTENCE_PARCING = 1
+    ANNOTATION = 2
+
+class Job():
+    job_id: str
+    job_type: JobType
+    job_data: {}
\ No newline at end of file
diff --git a/api/routers/pipeline_endpoint.py b/api/routers/pipeline_endpoint.py
index c2310e4..4ec99dc 100644
--- a/api/routers/pipeline_endpoint.py
+++ b/api/routers/pipeline_endpoint.py
@@ -1,7 +1,10 @@
 from pydantic import BaseModel
 from fastapi import APIRouter
-from api.internal_services.background_worker import add_sentence_to_queue
+from api.internal_services.background_worker import add_job_to_queue
 from api.internal_services.logger import logger
+import uuid
+
+from api.models.Job import Job, JobType
 
 router = APIRouter()
 
@@ -11,6 +14,14 @@ class Sentence(BaseModel):
 
 @router.post("/sentences")
 def add_sentence_to_process(sentence: Sentence):
-    logger.debug(f"New sentence added to queue : {sentence.sentence}")
-    add_sentence_to_queue(sentence.sentence)
-    return {"message": "Sentence added to the queue for processing."}
\ No newline at end of file
+    logger.info(f"New sentence added to queue : {sentence.sentence}")
+    new_job = Job()
+    new_job.job_id = uuid.uuid4()
+    new_job.job_type = JobType.SENTENCE_PARCING
+    new_job.job_data = {'sentence': sentence.sentence}
+    add_job_to_queue(new_job)
+    return {"message": "Job added to the queue for processing.", "job_id": new_job.job_id}
+
+@router.post("/actions/training")
+def add_sentence_to_process():
+    logger.info(f"Training process triggered")
\ No newline at end of file
-- 
GitLab