Ajout documentation

968d3534 · William CHARLES · 39f5eef2 · 968d3534 · 968d3534 · 968d3534
Commit 968d3534 authored 4 months ago by William CHARLES
--- a/.gitignore
+++ b/.gitignore
+TestData/
\ No newline at end of file
--- a/DHFCMonoBuildTools.py
+++ b/DHFCMonoBuildTools.py
@@ -6,11 +6,16 @@ import jpype
 import jpype.imports
 from threading import Thread

+# The repository access to send queries.
 sparqlQuery="http://c2200024:7200/repositories/dhfc"
+# The repository access to send queries.
 sparqlUpdate="http://localhost:7200/repositories/dhfc/statements"
+# A prefix that will be used to create IRIs for entities
 baseNameSpace="https://cluedo4kg.irit.fr/repositories/sparqluedo"
+# The prefix of DHFC.
 dhfc="https://w3id.org/DHFC#"

+#This portion is used to carry out the connexion with the java portion of the code (Pellet)
 classpath = []
 for jar in os.listdir("lib"):
    classpath.append(r"lib\\"+jar)
@@ -28,6 +33,10 @@ from org.semanticweb.owlapi.reasoner import InferenceType
 from com.clarkparsia.owlapi.explanation import PelletExplanation
 from  org.semanticweb.owlapi.util import InferredPropertyAssertionGenerator, InferredClassAssertionAxiomGenerator, InferredOntologyGenerator

+
+# Used to send a SPARQL update query (query) to a SPARQL endpoint (endpoint).
+# The step parameter is used to split the query into subqueries whenever it is too long to be handled by the repository.
+# By default the step is at 0, and will increment every time the query is split in two. When it reaches 5, the query is abandonned.
 def updateSPARQL(endpoint, query, step=0) :
    print(query)
    sparql = SPARQLWrapper(endpoint)
@@ -56,7 +65,7 @@ def updateSPARQL(endpoint, query, step=0) :
                print("Step: "+str(step))
                raise
        
-
+# A class to get results from threads
 class ReturnableThread(Thread):
    # This class is a subclass of Thread that allows the thread to return a value.
    def __init__(self, target):
@@ -67,7 +76,7 @@ class ReturnableThread(Thread):
    def run(self) -> None:
        self.result = self.target()

-
+# Used to send a SPARQL query (query) to a SPARQL endpoint (endpoint).
 def querySPARQL(endpoint, query) :
    print(endpoint)
    print(query)
@@ -78,6 +87,8 @@ def querySPARQL(endpoint, query) :

    return ret["results"]["bindings"]

+# A utility function to convert the result of a query into a list to facilitate iteration.
+# Each entry is represented as a list of which the nth variable of the query is the nth element.
 def toList(dic) :
    l=[] 
    for e in dic :
@@ -87,6 +98,16 @@ def toList(dic) :
        l.append(f)
    return l

+# A utility function that takes as argument a list of lists and returns a list that is the concatenation of the elements of the parameter.
+def flatten(l) :
+    res=[]
+    for e in l :
+        res.append(e)
+    return res
+
+
+# A funtion that queries the repository and returns the number of interpretation universes that currently exist for a source (sourceName). 
+# sourceName is the non-prefixed name of the source, and not the full IRI.
 def interpretationCount(sourceName) :
    query="PREFIX dhfc: <"+dhfc+"> SELECT (COUNT(?i) AS ?count) {<"+baseNameSpace+"/"+sourceName+"> dhfc:hasInterpretation ?i} "
    ret=querySPARQL(sparqlQuery, query)
@@ -94,6 +115,8 @@ def interpretationCount(sourceName) :
        for el in e.values() :
            return int(el["value"])

+# A funtion that queries the repository and returns the number of assertion by a specific content producer (author) that currently exist for a source (sourceName). 
+# sourceName is the non-prefixed name of the source, and not the full IRI. So is author.
 def assertionCount(sourceName, author) :
    query="PREFIX dhfc: <"+dhfc+"> SELECT (COUNT(?o) AS ?count) {?o dhfc:assertedFrom <"+baseNameSpace+"/"+sourceName+">. <"+baseNameSpace+"/"+author+"> dhfc:assertsThrough ?a. ?a dhfc:assertsOver ?o} "
    ret=querySPARQL(sparqlQuery, query)
@@ -101,12 +124,14 @@ def assertionCount(sourceName, author) :
        for el in e.values() :
            return int(el["value"])

-def blockPrint():
-    sys.stdout = open(os.devnull, 'w')
+# Sends a query to add the typing triple for a new source.
+# sourceName is the non-prefixed name of the source, and not the full IRI.
 def newSource(sourceName) :
    query="""PREFIX dhfc: <"""+dhfc+""">INSERT DATA {<"""+baseNameSpace+"/"+sourceName+"""> a dhfc:Source} """
    updateSPARQL(sparqlUpdate, query)

+# Checks whether an entity (eName) is referenced within the repository as subject of a triple.
+# eName is the non-prefixed name of the source, and not the full IRI.
 def exists(eName) :
        sparql = SPARQLWrapper(sparqlQuery)
        sparql.setQuery("""
@@ -119,6 +144,7 @@ def exists(eName) :
        results = sparql.query().convert()
        return results["boolean"]

+# Sends a query that is used to merge blank nodes that reify the same triple.
 def normalize() :
    query="""PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
 PREFIX dhfc: <http://dhfc/> 
@@ -149,7 +175,8 @@ WHERE {
 }"""
    updateSPARQL(sparqlUpdate, query)

-
+# Sends an update to add a new interpretation to a source (sourceName)
+# sourceName is the non-prefixed name of the source, and not the full IRI.
 def newInterpretation(sourceName) :
    if not exists(sourceName) :
        newSource(sourceName)
@@ -158,6 +185,13 @@ def newInterpretation(sourceName) :
    updateSPARQL(sparqlUpdate, query)
    return baseNameSpace+"/"+sourceName+"#interpretation"+str(number)

+# The core function, which handles adding or editing an assertion.
+# This function handles the addition or edition of a specified assertion (assertion) and notably handles the edition of the interpretation universes with the new assertion. It also handles the addition of the inferences.
+# sourceName is the non-prefixed name of the source, and not the full IRI.
+# assertion is the IRI of the assertion to add/edit.
+# triplesSerialization is the turtle serialization of the content of the assertion.
+# New is a boolean parameter that specifies whether the assertion is a new one, or if it is an old one that is edited.
+# author is an optional parameter and is the non-prefixed name of the author, and not the full IRI.
 def assertionProtocol(sourceName, assertion, triplesSerialization, new, author=None) :
    print(triplesSerialization)
    query="""PREFIX dhfc: <"""+dhfc+"""> INSERT DATA {
@@ -187,7 +221,12 @@ def assertionProtocol(sourceName, assertion, triplesSerialization, new, author=N
    for t in l :
        t.join()
        
-
+# Sends updates to add/remove an assertion to an interpretation universe.
+# pattern is a dictionnary with the keys being the interpretation, and the values being a boolean which indicates whether the assertion is coherent with this interpreation universe.
+# interpretation is the IRI of the interpretation universe.
+# sourceName is the non-prefixed name of the source, and not the full IRI.
+# assertion is the IRI of the assertion.
+# New is a boolean parameter that specifies whether the assertion is a new one, or if it is an old one that is edited.
 def editGraph(pattern, interpretation, sourceName, assertion, new) :
    if pattern[interpretation] and new :
            query="""PREFIX dhfc: <"""+dhfc+""">INSERT DATA {
@@ -204,6 +243,10 @@ def editGraph(pattern, interpretation, sourceName, assertion, new) :
                updateSPARQL(sparqlUpdate, query)
            fragmentInterpretation(sourceName,interpretation, assertion)

+# The function that should be called to add a new assertion. The addition includes handling interpretation universe and adding inferences.
+# sourceName is the non-prefixed name of the source, and not the full IRI.
+# author is the non-prefixed name of the author, and not the full IRI.
+# triplesSerialization is the turtle serialization of the content of the assertion.
 def newAssertionSet(sourceName, author, triplesSerialization) :
    if reasonerConsistency(triplesSerialization) :
        if interpretationCount(sourceName)==0:
@@ -213,11 +256,18 @@ def newAssertionSet(sourceName, author, triplesSerialization) :
        assertionProtocol(sourceName, assertion, triplesSerialization, True, author=author)
    else :
        print("Inconsistent triples not added !")
-    
+
+# The function that should be called to edit an assertion. The edition includes handling interpretation universe and adding inferences.
+# sourceName is the non-prefixed name of the source, and not the full IRI.
+# assertion is the IRI of the assertion.
+# triplesSerialization is the turtle serialization of the new content of the assertion. 
 def editAssertionSet(sourceName, assertion, triplesSerialization) :
    if reasonerConsistency(triplesSerialization) :
        assertionProtocol(sourceName,assertion,triplesSerialization, False)
-    
+
+# This function returns a dictionnary indicating for all interpretations universes of a source whether it is consistent, in particular with the addition of some assertions 
+# sourceName is the non-prefixed name of the source, and not the full IRI.
+# toAdd is the list of assertions whose content are added to that of the interpretation universe when checking consistency.
 def checkInterpretations(sourceName, toAdd=[]) :
    query="PREFIX dhfc: <"+dhfc+"> SELECT ?i {<"+baseNameSpace+"/"+sourceName+"> dhfc:hasInterpretation ?i} "
    ret=toList(querySPARQL(sparqlQuery, query))
@@ -232,24 +282,8 @@ def checkInterpretations(sourceName, toAdd=[]) :
        dic[e]=threads[e].result
    return dic

-def getCoreNamedGraph(graphName) :
-    query="""PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
-                CONSTRUCT {
-                ?s ?p ?o
-                }
-                WHERE {
-                GRAPH <"""+graphName+"""> {?s ?p ?o.
-                    FILTER(?p!= rdf:subject && ?p!= rdf:predicate && ?p!= rdf:object && ?p != dhfc:hasRDFPart && ?p != dhfc:inNamedGraph)}
-                    FILTER(?o!= rdf:Statement && ?p!= dhfc:owlAxiom)}
-                }
-            """
-
-def flatten(l) :
-    res=[]
-    for e in l :
-        res.append(e)
-    return res

+# Queries the repository and returns a serialization of the content of the dhfc:DomainGraph 
 def ontologyContent():
    sparql = SPARQLWrapper(sparqlQuery)
    sparql.setQuery("""
@@ -264,17 +298,23 @@ def ontologyContent():
        """)
    return sparql.queryAndConvert().serialize()
    
-
+# Returns the list of assertions that are part of an interpretation.
+# interpretation is the IRI of the interpretation
 def getListAssertion(interpretation) :
    query="""PREFIX dhfc: <"""+dhfc+"""> SELECT ?a {<"""+interpretation+"""> dhfc:hasAssertion ?a}
        """
    return flatten(toList(querySPARQL(sparqlQuery, query)))

+# Returns the list of interpretations an assertion takes part into.
+# assertion is the IRI of the assertion
 def getListInterpretation(assertion) :
    query="""PREFIX dhfc: <"""+dhfc+"""> SELECT ?a {?a dhfc:hasAssertion <"""+assertion+"""> }
        """
    return flatten(toList(querySPARQL(sparqlQuery, query)))

+# This function returns boolean indicating whether a list assertion remains coherent when adding to it another list of assertions
+# assertions is the list of IRIs of the initial set of assertions.
+# toAdd is the list of assertions whose content are added to that of the interpretation universe when checking consistency.
 def checkInterpretationConsistency( assertions, toAdd=[]) :
    if assertions!=None :
        ret=assertions+toAdd
@@ -295,9 +335,12 @@ def checkInterpretationConsistency( assertions, toAdd=[]) :
    else :
        return True

+#A utility function used to create an IRI from a triple. It is used to generate IRIs for reifications so that they can be referenced outside their named graph.
 def hashEncoding(triple):
    return hex(hash(triple[0]))+hex(hash(triple[1]))+hex(hash(triple[2]))

+# A utility function that is used to generate the turtle serialization of a reified triple or OWL axiom.
+# It returns a couple (serialization, IRI of the reified resource)
 def reification(t, graphLocation=None) :
    if len(t)==1 :
        triple=reifiedVersion(t[0], graphLocation)
@@ -306,7 +349,9 @@ def reification(t, graphLocation=None) :
    if graphLocation!=None :
        triple=("GRAPH <"+graphLocation+"> {"+triple[0]+"}", triple[1])
    return triple
-    
+
+# A utility function that is used to generate the turtle serialization of a reified OWL axiom.
+# It returns a couple (serialization, IRI of the reified resource)
 def reifiedOWLAxiom(triples,  graphLocation=None) :
    axiomName=baseNameSpace
    for triple in triples :
@@ -318,6 +363,8 @@ def reifiedOWLAxiom(triples,  graphLocation=None) :
        sparql+=res[0]+""" <"""+axiomName+"""> dhfc:hasRDFPart <"""+res[1]+">. "
    return (sparql, axiomName)

+# A utility function that is used to generate the turtle serialization of a reified triple.
+# It returns a couple (serialization, IRI of the reified resource)
 def reifiedVersion(triple, graphLocation=None):
    blankNodeName=baseNameSpace+hashEncoding(triple)
    sparql="""<"""+blankNodeName+"""> a rdf:Statement. <"""+blankNodeName+"""> rdf:subject <"""+triple[0]+""">. 
@@ -327,6 +374,7 @@ def reifiedVersion(triple, graphLocation=None):
        sparql+= """<"""+blankNodeName+"""> dhfc:inNamedGraph <"""+graphLocation+""">. """
    return (sparql,blankNodeName)

+# Updates the repository to clear the inference graph of an intepretation universe (interpretation), including all supports targeting it.
 def clearInferenceGraph(interpretation) :
    print("Suppression")
    query="""PREFIX dhfc: <"""+dhfc+"""> SELECT ?g WHERE {<"""+interpretation+"""> dhfc:entails ?g."""
@@ -338,6 +386,7 @@ def clearInferenceGraph(interpretation) :
    except :
        print("No Inference Graph Found for : "+interpretation)

+# Updates the repository to add a graph that contains the inferences that can be made using a OWL reasoner from the content of an interpretation universe (interpretation). It also adds the supports to explain the inferences (one explantion for each inference)
 def addInferenceResults(interpretation) :
    # Clear previous inference
    clearInferenceGraph(interpretation)
@@ -401,6 +450,7 @@ def addInferenceResults(interpretation) :
    updateSPARQL(sparqlUpdate, query)
    #normalize()

+# Uses the Pellet reasoner and returns a boolean indicating whether some OWL based RDF content (turtleContent) is consistent.
 def reasonerConsistency(turtleContent: str) : 
    input_stream = ByteArrayInputStream(turtleContent.encode())
    manager = OWLManager.createOWLOntologyManager()
@@ -416,6 +466,7 @@ def reasonerConsistency(turtleContent: str) :
        #TODO pour plus tard : ajouter un print de ça
    return l

+# Returns the RDF serialization of an OWL axiom
 def RDFserialization(axiom) :
    input_stream = ByteArrayInputStream("".encode())
    manager = OWLManager.createOWLOntologyManager()
@@ -432,6 +483,7 @@ def RDFserialization(axiom) :
        print(err)
    return l

+#Takes as entry an RDF serialization and returns a list of the triples.
 def splitTurtle(l) :
    g=Graph()
    g.parse(data=l)
@@ -439,9 +491,10 @@ def splitTurtle(l) :
    for s, p, o in g :
        l.append((str(s), str(p), str(o)))
    return l
-  
-# Should return dictionnary with structure as example :
-# {<Inferred triple> : [[<Explanation1p1>, <Explanation1p2>], [<Explanation2p1>, <Explanation2p2>, <Explanation2p3>]]}
+
+# Uses the pellet reasoner to compute all the inferences of an RDF serialization (turtleContent), and an explanation for each.
+# Returns a dictionnary with structure as example :
+# {<Inferred triple> : [[<Explanationp1>, <Explanationp2>]]}
 def getInferedAxiomWithExplanation(turtleContent) :
    PelletExplanation.setup()
    input_stream = ByteArrayInputStream(turtleContent.encode())
@@ -484,11 +537,16 @@ def getInferedAxiomWithExplanation(turtleContent) :
            None  
    return explanations

+# Takes an interpretation universe and a new assertion and looks for sub-sets of the universe that might be coherent with this assertion. Then updates the graph with the new interpretation universes.
+# sourceName is the non-prefixed name of the source, and not the full IRI.
+# interpretation is the IRI of the interpretation universe
+# newAssertion is the IRI of the new assertion
 def fragmentInterpretation(sourceName,interpretation, newAssertion) :
    toFragment=getListAssertion(interpretation)
    l=[]
    fragmentInterpretation_rec(sourceName,toFragment, newAssertion, l)

+# An auxiliary recursive function used by fragmentInterpretation
 def fragmentInterpretation_rec(sourceName, assertions, newAssertion, l) :
    print(assertions)
    if len(assertions)>0 :
@@ -499,11 +557,9 @@ def fragmentInterpretation_rec(sourceName, assertions, newAssertion, l) :
            t.start()
            thrs.append(t)
        for t in thrs :
-            t.join()
-
-            
-            
+            t.join()          

+# A function used by fragmentInterpretation_rec to parallilize a section
 def fragment(sourceName, e, newAssertion, l) :
    if e not in l :
        l.append(e)

--- a/GenerateDatasets.py
+++ b/GenerateDatasets.py
@@ -91,7 +91,7 @@ f.close()
 counter=log["number"]
 ent=log["nEntities"]
 for nEntities in range(100, 1000, 100):
-    for n in range (10) :
+    for n in range (100) :
        if nEntities>=ent and n>counter :
            c=generate(nEntities*4, nEntities)
            file=open("TestData\\test_"+str(nEntities)+"_"+str(n)+"_1", "w")

--- a/TestData/test_100_1_1
+++ b/TestData/test_100_1_1
--- a/TestData/test_100_1_2
+++ b/TestData/test_100_1_2
--- a/TestData/test_100_2_1
+++ b/TestData/test_100_2_1
--- a/TestData/test_100_2_2
+++ b/TestData/test_100_2_2
--- a/TestData/test_100_3_1
+++ b/TestData/test_100_3_1
--- a/TestData/test_100_3_2
+++ b/TestData/test_100_3_2
--- a/TestData/test_100_4_1
+++ b/TestData/test_100_4_1
--- a/TestData/test_100_4_2
+++ b/TestData/test_100_4_2
--- a/TestData/test_100_5_1
+++ b/TestData/test_100_5_1
--- a/TestData/test_100_5_2
+++ b/TestData/test_100_5_2
--- a/TestData/test_100_6_1
+++ b/TestData/test_100_6_1
--- a/TestData/test_100_6_2
+++ b/TestData/test_100_6_2
--- a/TestData/test_100_7_1
+++ b/TestData/test_100_7_1
--- a/TestData/test_100_7_2
+++ b/TestData/test_100_7_2
--- a/TestData/test_100_8_1
+++ b/TestData/test_100_8_1
--- a/TestData/test_100_8_2
+++ b/TestData/test_100_8_2
--- a/TestData/test_100_9_1
+++ b/TestData/test_100_9_1