Skip to content
Snippets Groups Projects
Commit 07840057 authored by Guilherme Henrique's avatar Guilherme Henrique
Browse files

changed to gradle

parent febae162
No related branches found
No related tags found
No related merge requests found
......@@ -2,4 +2,6 @@
lib
out
/output/
target
\ No newline at end of file
target
.gradle
build
\ No newline at end of file
/*
* This file was generated by the Gradle 'init' task.
*
* This project uses @Incubating APIs which are subject to change.
*/
plugins {
id 'java'
id 'maven-publish'
}
repositories {
mavenLocal()
maven {
url = uri('https://repo.maven.apache.org/maven2/')
}
}
dependencies {
implementation 'org.deeplearning4j:deeplearning4j-core:1.0.0-M1.1'
implementation 'org.nd4j:nd4j-native-platform:1.0.0-M1.1'
implementation 'com.fasterxml.jackson.core:jackson-core:2.12.1'
implementation 'com.fasterxml.jackson.core:jackson-annotations:2.12.1'
implementation 'com.fasterxml.jackson.core:jackson-databind:2.12.1'
implementation 'commons-codec:commons-codec:1.15'
implementation 'org.slf4j:slf4j-nop:1.7.36'
implementation 'org.apache.jena:apache-jena-libs:4.4.0'
implementation 'org.apache.commons:commons-text:1.9'
implementation 'net.sourceforge.argparse4j:argparse4j:0.9.0'
//https://gitlab.inria.fr/moex/alignapi/-/releases
implementation files('lib/align.jar')
implementation files('lib/alignsvc.jar')
implementation files('lib/ontowrap.jar')
implementation files('lib/procalign.jar')
}
group = 'org.example'
version = '1.0-SNAPSHOT'
description = 'CanardE'
java.sourceCompatibility = JavaVersion.VERSION_18
publishing {
publications {
maven(MavenPublication) {
from(components.java)
}
}
}
File added
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-7.5.1-bin.zip
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists
gradlew 0 → 100755
#!/bin/sh
#
# Copyright © 2015-2021 the original authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
##############################################################################
#
# Gradle start up script for POSIX generated by Gradle.
#
# Important for running:
#
# (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is
# noncompliant, but you have some other compliant shell such as ksh or
# bash, then to run this script, type that shell name before the whole
# command line, like:
#
# ksh Gradle
#
# Busybox and similar reduced shells will NOT work, because this script
# requires all of these POSIX shell features:
# * functions;
# * expansions «$var», «${var}», «${var:-default}», «${var+SET}»,
# «${var#prefix}», «${var%suffix}», and «$( cmd )»;
# * compound commands having a testable exit status, especially «case»;
# * various built-in commands including «command», «set», and «ulimit».
#
# Important for patching:
#
# (2) This script targets any POSIX shell, so it avoids extensions provided
# by Bash, Ksh, etc; in particular arrays are avoided.
#
# The "traditional" practice of packing multiple parameters into a
# space-separated string is a well documented source of bugs and security
# problems, so this is (mostly) avoided, by progressively accumulating
# options in "$@", and eventually passing that to Java.
#
# Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS,
# and GRADLE_OPTS) rely on word-splitting, this is performed explicitly;
# see the in-line comments for details.
#
# There are tweaks for specific operating systems such as AIX, CygWin,
# Darwin, MinGW, and NonStop.
#
# (3) This script is generated from the Groovy template
# https://github.com/gradle/gradle/blob/master/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
# within the Gradle project.
#
# You can find Gradle at https://github.com/gradle/gradle/.
#
##############################################################################
# Attempt to set APP_HOME
# Resolve links: $0 may be a link
app_path=$0
# Need this for daisy-chained symlinks.
while
APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path
[ -h "$app_path" ]
do
ls=$( ls -ld "$app_path" )
link=${ls#*' -> '}
case $link in #(
/*) app_path=$link ;; #(
*) app_path=$APP_HOME$link ;;
esac
done
APP_HOME=$( cd "${APP_HOME:-./}" && pwd -P ) || exit
APP_NAME="Gradle"
APP_BASE_NAME=${0##*/}
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
# Use the maximum available, or set MAX_FD != -1 to use that value.
MAX_FD=maximum
warn () {
echo "$*"
} >&2
die () {
echo
echo "$*"
echo
exit 1
} >&2
# OS specific support (must be 'true' or 'false').
cygwin=false
msys=false
darwin=false
nonstop=false
case "$( uname )" in #(
CYGWIN* ) cygwin=true ;; #(
Darwin* ) darwin=true ;; #(
MSYS* | MINGW* ) msys=true ;; #(
NONSTOP* ) nonstop=true ;;
esac
CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
# Determine the Java command to use to start the JVM.
if [ -n "$JAVA_HOME" ] ; then
if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
# IBM's JDK on AIX uses strange locations for the executables
JAVACMD=$JAVA_HOME/jre/sh/java
else
JAVACMD=$JAVA_HOME/bin/java
fi
if [ ! -x "$JAVACMD" ] ; then
die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi
else
JAVACMD=java
which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi
# Increase the maximum file descriptors if we can.
if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then
case $MAX_FD in #(
max*)
MAX_FD=$( ulimit -H -n ) ||
warn "Could not query maximum file descriptor limit"
esac
case $MAX_FD in #(
'' | soft) :;; #(
*)
ulimit -n "$MAX_FD" ||
warn "Could not set maximum file descriptor limit to $MAX_FD"
esac
fi
# Collect all arguments for the java command, stacking in reverse order:
# * args from the command line
# * the main class name
# * -classpath
# * -D...appname settings
# * --module-path (only if needed)
# * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables.
# For Cygwin or MSYS, switch paths to Windows format before running java
if "$cygwin" || "$msys" ; then
APP_HOME=$( cygpath --path --mixed "$APP_HOME" )
CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" )
JAVACMD=$( cygpath --unix "$JAVACMD" )
# Now convert the arguments - kludge to limit ourselves to /bin/sh
for arg do
if
case $arg in #(
-*) false ;; # don't mess with options #(
/?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath
[ -e "$t" ] ;; #(
*) false ;;
esac
then
arg=$( cygpath --path --ignore --mixed "$arg" )
fi
# Roll the args list around exactly as many times as the number of
# args, so each arg winds up back in the position where it started, but
# possibly modified.
#
# NB: a `for` loop captures its iteration list before it begins, so
# changing the positional parameters here affects neither the number of
# iterations, nor the values presented in `arg`.
shift # remove old arg
set -- "$@" "$arg" # push replacement arg
done
fi
# Collect all arguments for the java command;
# * $DEFAULT_JVM_OPTS, $JAVA_OPTS, and $GRADLE_OPTS can contain fragments of
# shell script including quotes and variable substitutions, so put them in
# double quotes to make sure that they get re-expanded; and
# * put everything else in single quotes, so that it's not re-expanded.
set -- \
"-Dorg.gradle.appname=$APP_BASE_NAME" \
-classpath "$CLASSPATH" \
org.gradle.wrapper.GradleWrapperMain \
"$@"
# Stop when "xargs" is not available.
if ! command -v xargs >/dev/null 2>&1
then
die "xargs is not available"
fi
# Use "xargs" to parse quoted args.
#
# With -n1 it outputs one arg per line, with the quotes and backslashes removed.
#
# In Bash we could simply go:
#
# readarray ARGS < <( xargs -n1 <<<"$var" ) &&
# set -- "${ARGS[@]}" "$@"
#
# but POSIX shell has neither arrays nor command substitution, so instead we
# post-process each arg (as a line of input to sed) to backslash-escape any
# character that might be a shell metacharacter, then use eval to reverse
# that process (while maintaining the separation between arguments), and wrap
# the whole thing up as a single "set" statement.
#
# This will of course break if any of these variables contains a newline or
# an unmatched quote.
#
eval "set -- $(
printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" |
xargs -n1 |
sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' |
tr '\n' ' '
)" '"$@"'
exec "$JAVACMD" "$@"
@rem
@rem Copyright 2015 the original author or authors.
@rem
@rem Licensed under the Apache License, Version 2.0 (the "License");
@rem you may not use this file except in compliance with the License.
@rem You may obtain a copy of the License at
@rem
@rem https://www.apache.org/licenses/LICENSE-2.0
@rem
@rem Unless required by applicable law or agreed to in writing, software
@rem distributed under the License is distributed on an "AS IS" BASIS,
@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@rem See the License for the specific language governing permissions and
@rem limitations under the License.
@rem
@if "%DEBUG%"=="" @echo off
@rem ##########################################################################
@rem
@rem Gradle startup script for Windows
@rem
@rem ##########################################################################
@rem Set local scope for the variables with windows NT shell
if "%OS%"=="Windows_NT" setlocal
set DIRNAME=%~dp0
if "%DIRNAME%"=="" set DIRNAME=.
set APP_BASE_NAME=%~n0
set APP_HOME=%DIRNAME%
@rem Resolve any "." and ".." in APP_HOME to make it shorter.
for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"
@rem Find java.exe
if defined JAVA_HOME goto findJavaFromJavaHome
set JAVA_EXE=java.exe
%JAVA_EXE% -version >NUL 2>&1
if %ERRORLEVEL% equ 0 goto execute
echo.
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.
goto fail
:findJavaFromJavaHome
set JAVA_HOME=%JAVA_HOME:"=%
set JAVA_EXE=%JAVA_HOME%/bin/java.exe
if exist "%JAVA_EXE%" goto execute
echo.
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.
goto fail
:execute
@rem Setup the command line
set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
@rem Execute Gradle
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*
:end
@rem End local scope for the variables with windows NT shell
if %ERRORLEVEL% equ 0 goto mainEnd
:fail
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
rem the _cmd.exe /c_ return code!
set EXIT_CODE=%ERRORLEVEL%
if %EXIT_CODE% equ 0 set EXIT_CODE=1
if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE%
exit /b %EXIT_CODE%
:mainEnd
if "%OS%"=="Windows_NT" endlocal
:omega
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.example</groupId>
<artifactId>CanardE</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<maven.compiler.source>18</maven.compiler.source>
<maven.compiler.target>18</maven.compiler.target>
</properties>
<dependencies>
<dependency>
<groupId>org.deeplearning4j</groupId>
<artifactId>deeplearning4j-core</artifactId>
<version>1.0.0-M1.1</version>
</dependency>
<dependency>
<groupId>org.nd4j</groupId>
<artifactId>nd4j-native-platform</artifactId>
<version>1.0.0-M1.1</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId>
<version>2.12.1</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-annotations</artifactId>
<version>2.12.1</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
<version>2.12.1</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>fr.inrialpes.exmo</groupId>
<artifactId>procalign</artifactId>
<version>4.9</version>
<exclusions>
<exclusion>
<artifactId>log4j-core</artifactId>
<groupId>org.apache.logging.log4j</groupId>
</exclusion>
</exclusions>
<scope>system</scope>
<systemPath>${project.basedir}/lib/procalign.jar</systemPath>
</dependency>
<dependency>
<groupId>fr.inrialpes.exmo.</groupId>
<artifactId>ontowrap</artifactId>
<version>4.9</version>
<scope>system</scope>
<systemPath>${project.basedir}/lib/ontowrap.jar</systemPath>
</dependency>
<dependency>
<groupId>org.semanticweb.owl.align</groupId>
<artifactId>align</artifactId>
<version>4.9</version>
<scope>system</scope>
<systemPath>${project.basedir}/lib/align.jar</systemPath>
</dependency>
<dependency>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
<version>1.15</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.slf4j/slf4j-nop -->
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-nop</artifactId>
<version>1.7.36</version>
</dependency>
<dependency>
<groupId>org.apache.jena</groupId>
<artifactId>apache-jena-libs</artifactId>
<type>pom</type>
<version>4.4.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.commons/commons-text -->
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-text</artifactId>
<version>1.9</version>
</dependency>
</dependencies>
</project>
\ No newline at end of file
/*
* This file was generated by the Gradle 'init' task.
*
* This project uses @Incubating APIs which are subject to change.
*/
rootProject.name = 'CanardE'
......@@ -8,16 +8,19 @@ import irit.dataset.DatasetManager;
import irit.output.OutputManager;
import irit.resource.IRI;
import irit.resource.Resource;
import irit.similarity.EmbeddingManager;
import irit.sparql.exceptions.IncompleteSubstitutionException;
import irit.sparql.SparqlProxy;
import irit.sparql.query.exception.SparqlEndpointUnreachableException;
import irit.sparql.query.exception.SparqlQueryMalFormedException;
import irit.sparql.query.select.SparqlSelect;
import net.sourceforge.argparse4j.ArgumentParsers;
import net.sourceforge.argparse4j.impl.Arguments;
import net.sourceforge.argparse4j.inf.ArgumentParser;
import net.sourceforge.argparse4j.inf.ArgumentParserException;
import net.sourceforge.argparse4j.inf.Namespace;
import org.apache.jena.rdf.model.RDFNode;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.*;
import java.util.concurrent.*;
......@@ -26,140 +29,120 @@ import java.util.concurrent.*;
public class ComplexAlignmentGeneration {
public static void main(String[] args) throws SparqlEndpointUnreachableException, SparqlQueryMalFormedException, ExecutionException, InterruptedException, IncompleteSubstitutionException {
public static void main(String[] args) throws SparqlEndpointUnreachableException, SparqlQueryMalFormedException, ExecutionException, InterruptedException, IncompleteSubstitutionException, IOException {
System.out.println("===============================================================================");
System.out.println("CanardE");
System.out.println("===============================================================================");
String datasets = args[0];
String needs = args[1];
String embeddings = args[2];
String source = args[3];
String target = args[4];
String range = args[5];
ArgumentParser parser = buildArgumentParser();
Set<String> stringSet = Set.of(source, target);
Map<String, String> ds = new HashMap<>();
try {
Files.walk(Paths.get(datasets), 1).forEach(path -> {
if (!path.toString().endsWith(".ttl") && !stringSet.contains(path.getFileName().toString())) return;
ds.put(path.getFileName().toString().split("_")[0], path.toString());
});
} catch (IOException e) {
throw new RuntimeException(e);
}
Namespace res = parser.parseArgs(args);
String source = res.get("source");
String target = res.get("target");
String cqa = res.get("cqa");
String range = res.get("range");
String output = res.get("output");
boolean silent = res.get("silent");
int maxMatches = res.get("maxMatches");
System.out.println("Found " + ds.size() + " datasets.");
String sourceName = getFileName(source);
String targetName = getFileName(target);
Map<String, String> nd = new HashMap<>();
Map<String, List<SparqlSelect>> cqas = new HashMap<>();
try {
Files.walk(Paths.get(needs), 1).forEach(path -> {
String ont = path.getFileName().toString();
if (!ds.containsKey(ont)) return;
nd.put(ont, path.toString());
try {
Files.walk(path, 1).forEach(path1 -> {
if (Files.isDirectory(path1)) return;
Scanner squery = null;
try {
squery = new Scanner(path1);
} catch (IOException e) {
throw new RuntimeException(e);
}
String query = squery.useDelimiter("\\Z").next();
SparqlSelect sq = new SparqlSelect(query);
cqas.computeIfAbsent(ont, s -> new ArrayList<>()).add(sq);
squery.close();
});
} catch (IOException e) {
throw new RuntimeException(e);
}
List<SparqlSelect> sparqlSelects = SparqlSelect.load(cqa);
List<Float> rangeList = parseRange(range);
});
} catch (IOException e) {
throw new RuntimeException(e);
}
DatasetManager.getInstance().load(sourceName, source);
DatasetManager.getInstance().load(targetName, target);
for (String s : ds.keySet()) {
if (nd.containsKey(s)) continue;
System.out.println("⚠️ Not found CQAs for " + s + ".");
run(sourceName, targetName, sparqlSelects, rangeList, maxMatches, false, output);
} catch (ArgumentParserException e) {
parser.handleError(e);
}
System.out.println("Needs loaded.");
}
System.out.println("Loading embeddings.");
Map<String, String[]> embs = new HashMap<>();
try {
Files.walk(Paths.get(embeddings), 1).forEach(path -> {
if (Files.isDirectory(path)) return;
String f = path.getFileName().toString();
String[] split = f.split("[_.]");
public static ArgumentParser buildArgumentParser() {
ArgumentParser parser = ArgumentParsers.newFor("Canard").build()
.description("Complex alignment generator.");
if (split[1].equals("n")) embs.computeIfAbsent(split[0], s -> new String[2])[0] = path.toString();
else if (split[1].equals("e")) embs.computeIfAbsent(split[0], s -> new String[2])[1] = path.toString();
parser.addArgument("source")
.type(String.class)
.required(true)
.help("Source ontology.");
});
} catch (IOException e) {
throw new RuntimeException(e);
}
parser.addArgument("target")
.type(String.class)
.required(true)
.help("Target ontology.");
parser.addArgument("cqa")
.type(String.class)
.required(true)
.help("CQA folder.");
embs.forEach((name, paths) -> {
try {
EmbeddingManager.load(paths[0], paths[1]);
} catch (IOException e) {
throw new RuntimeException(e);
}
});
parser.addArgument("--range")
.type(String.class)
.setDefault("0.8")
.help("Threshold range.");
parser.addArgument("--output")
.type(String.class)
.setDefault("output")
.help("Output folder.");
ds.forEach((name, path) -> {
DatasetManager.getInstance().load(name, path);
});
parser.addArgument("--embedding")
.type(String.class)
.help("Path to embeddings.");
parser.addArgument("--silent")
.type(Boolean.class)
.action(Arguments.storeConst())
.setConst(true)
.setDefault(false)
.help("Disable console output.");
List<String[]> datasetArgs = new ArrayList<>();
parser.addArgument("--maxMatches")
.type(Integer.class)
.setDefault(10)
.help("Max Matches.");
ds.forEach((s, s2) -> {
if (source != null && !source.startsWith(s)) return;
ds.forEach((s1, s21) -> {
if (s.equals(s1)) return;
if (target != null && !target.startsWith(s1)) return;
datasetArgs.add(new String[]{s, s1});
});
});
return parser;
}
public static String getFileName(String path) {
String[] split = Paths.get(path).getFileName().toString().split("\\.");
return split[0];
}
String[] split = range.split(":");
List<Float> ths = new ArrayList<>();
public static List<Float> parseRange(String range) {
List<Float> ranges = new ArrayList<>();
String[] split = range.split(":");
for (float th = Float.parseFloat(split[0]); th <= Float.parseFloat(split[1]); th += Float.parseFloat(split[2])) {
ths.add(th);
}
float start = Float.parseFloat(split[0]);
float end = start;
float step = 0.1f;
String output = "output";
if (split.length > 1) end = Float.parseFloat(split[1]);
if (split.length > 2) start = Float.parseFloat(split[2]);
for (String[] datasetArg : datasetArgs) {
run(datasetArg[0], datasetArg[1], cqas.get(datasetArg[0]), ths, 10, false, output);
for (; start < end; start += step) {
ranges.add(start);
}
return ranges;
}
public static void run(String sourceEndpoint, String targetEndpoint, List<SparqlSelect> queries, List<Float> th, int maxMatches, boolean reassess, String outputPath) throws SparqlEndpointUnreachableException, SparqlQueryMalFormedException, ExecutionException, InterruptedException, IncompleteSubstitutionException {
OutputManager outputManager = new OutputManager();
outputManager.initOutputEdoal(sourceEndpoint, targetEndpoint, th, outputPath);
......@@ -293,10 +276,8 @@ public class ComplexAlignmentGeneration {
}
}
// System.out.println("Number of correspondences found (" + threshold + "): " + output.size());
if (reassess) {
System.out.println("Reassessing similarity");
for (SubgraphForOutput s : output) {
s.reassessSimilarityWithCounterExamples(sourceEndpoint, targetEndpoint, sq);
}
......
......@@ -56,15 +56,6 @@ public class SingleAnswer extends Answer {
HashSet<String> queryLabels = query.getLabels();
INDArray zeros = Nd4j.zeros(DataType.DOUBLE, EmbeddingManager.embshape);
for (String queryLabel : queryLabels) {
zeros = zeros.add(EmbeddingManager.get(queryLabel));
}
zeros = zeros.div(queryLabels.size());
double maxSim = -1;
Triple bestTriple = new Triple();
......@@ -81,10 +72,9 @@ public class SingleAnswer extends Answer {
for (Triple t : iri.getTriples()) {
double similarity = 0;
// t.retrieveIRILabels(targetEndpoint);
// t.retrieveTypes(targetEndpoint);
// similarity += t.compareLabel(queryLabels, similarityThreshold, targetEndpoint);
similarity += t.compareSim(zeros, similarityThreshold);
t.retrieveIRILabels(targetEndpoint);
t.retrieveTypes(targetEndpoint);
similarity += t.compareLabel(queryLabels, similarityThreshold, targetEndpoint);
if (similarity > maxSim) {
maxSim = similarity;
......
package irit.similarity;
import org.apache.commons.text.similarity.LevenshteinDistance;
import org.nd4j.linalg.api.buffer.DataType;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j;
......@@ -32,18 +33,7 @@ public class EmbeddingManager {
public static double getSim(String s1, String s2){
INDArray n1 = embs1.get(s1);
INDArray n2 = embs1.get(s2);
if (n1 == null){
n1 = Nd4j.zeros(DataType.DOUBLE, embshape);
}
if (n2 == null){
n2 = Nd4j.zeros(DataType.DOUBLE, embshape);
}
return Transforms.cosineSim(n1, n2);
return LevenshteinDistance.getDefaultInstance().apply(s1, s2) / (float) Math.max(s1.length(), s2.length());
}
private static Map<String, INDArray> loadEmbs(String n1, String e1) throws IOException {
......
......@@ -3,12 +3,13 @@ package irit.sparql.query.select;
import irit.resource.IRI;
import irit.sparql.query.SparqlQuery;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Map;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
public class SparqlSelect extends SparqlQuery {
......@@ -20,12 +21,12 @@ public class SparqlSelect extends SparqlQuery {
mainQuery = mainQuery.trim().replaceAll("SELECT", "select").replaceAll("WHERE", "where").replaceAll("\n", " ");
selectFocus = new ArrayList<>();
Pattern pattern = Pattern.compile("""
select[ \t
distncDISTNC]+(\\?[A-Za-z\\d_-]+)[ \t
]+(\\?*[A-Za-z\\d_-]*[ \t
]*)where[ \t
]*\\{(.+)}[ \t
]*$""");
select[ \t
distncDISTNC]+(\\?[A-Za-z\\d_-]+)[ \t
]+(\\?*[A-Za-z\\d_-]*[ \t
]*)where[ \t
]*\\{(.+)}[ \t
]*$""");
Matcher matcher = pattern.matcher(mainQuery);
while (matcher.find()) {
selectFocus.add(matcher.group(1).trim());
......@@ -34,12 +35,12 @@ public class SparqlSelect extends SparqlQuery {
}
where = matcher.group(3).trim();
setAggregate();
}
}
Pattern pattern2 = Pattern.compile("""
select([ \t
distncDISTNC]+\\?[A-Za-z\\d_-]+[ \t
]+\\?*[A-Za-z\\d_-]*[ \t
]*)where""");
select([ \t
distncDISTNC]+\\?[A-Za-z\\d_-]+[ \t
]+\\?*[A-Za-z\\d_-]*[ \t
]*)where""");
Matcher matcher2 = pattern2.matcher(mainQuery);
if (matcher2.find()) {
select = matcher2.group(1);
......@@ -48,6 +49,26 @@ public class SparqlSelect extends SparqlQuery {
}
public static List<SparqlSelect> load(String path) throws IOException {
List<SparqlSelect> sparqlSelects;
try (var walk = Files.walk(Paths.get(path), 1)) {
sparqlSelects = walk
.filter(path1 -> !Files.isDirectory(path1))
.map(path1 -> {
try {
return Files.readString(path1);
} catch (IOException e) {
throw new RuntimeException(e);
}
})
.map(SparqlSelect::new).toList();
}
return sparqlSelects;
}
public String getSelect() {
return select;
}
......@@ -93,11 +114,10 @@ public class SparqlSelect extends SparqlQuery {
}
public HashSet<String> getLabels(){
public HashSet<String> getLabels() {
HashSet<String> queryLabels = new HashSet<>();
for (Map.Entry<String, IRI> iri : getIRIList().entrySet()){
for (Map.Entry<String, IRI> iri : getIRIList().entrySet()) {
queryLabels.addAll(iri.getValue().getLabels());
}
return queryLabels;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment