diff --git a/.gitignore b/.gitignore index 7e511a4f71a14e36b90af9746b481a88cde92475..7d13785b01f085c0a5135689f6dc8ee6643986c1 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,6 @@ lib out /output/ -target \ No newline at end of file +target +.gradle +build \ No newline at end of file diff --git a/build.gradle b/build.gradle new file mode 100644 index 0000000000000000000000000000000000000000..24b9d60c164e93e9971794c574e221f9bf119ed6 --- /dev/null +++ b/build.gradle @@ -0,0 +1,49 @@ +/* + * This file was generated by the Gradle 'init' task. + * + * This project uses @Incubating APIs which are subject to change. + */ + +plugins { + id 'java' + id 'maven-publish' +} + +repositories { + mavenLocal() + maven { + url = uri('https://repo.maven.apache.org/maven2/') + } +} + +dependencies { + implementation 'org.deeplearning4j:deeplearning4j-core:1.0.0-M1.1' + implementation 'org.nd4j:nd4j-native-platform:1.0.0-M1.1' + implementation 'com.fasterxml.jackson.core:jackson-core:2.12.1' + implementation 'com.fasterxml.jackson.core:jackson-annotations:2.12.1' + implementation 'com.fasterxml.jackson.core:jackson-databind:2.12.1' + implementation 'commons-codec:commons-codec:1.15' + implementation 'org.slf4j:slf4j-nop:1.7.36' + implementation 'org.apache.jena:apache-jena-libs:4.4.0' + implementation 'org.apache.commons:commons-text:1.9' + implementation 'net.sourceforge.argparse4j:argparse4j:0.9.0' + + //https://gitlab.inria.fr/moex/alignapi/-/releases + implementation files('lib/align.jar') + implementation files('lib/alignsvc.jar') + implementation files('lib/ontowrap.jar') + implementation files('lib/procalign.jar') +} + +group = 'org.example' +version = '1.0-SNAPSHOT' +description = 'CanardE' +java.sourceCompatibility = JavaVersion.VERSION_18 + +publishing { + publications { + maven(MavenPublication) { + from(components.java) + } + } +} diff --git a/gradle/wrapper/gradle-wrapper.jar b/gradle/wrapper/gradle-wrapper.jar new file mode 100644 index 0000000000000000000000000000000000000000..249e5832f090a2944b7473328c07c9755baa3196 Binary files /dev/null and b/gradle/wrapper/gradle-wrapper.jar differ diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties new file mode 100644 index 0000000000000000000000000000000000000000..ae04661ee733431762e7ccf8ab9b7409ed44960c --- /dev/null +++ b/gradle/wrapper/gradle-wrapper.properties @@ -0,0 +1,5 @@ +distributionBase=GRADLE_USER_HOME +distributionPath=wrapper/dists +distributionUrl=https\://services.gradle.org/distributions/gradle-7.5.1-bin.zip +zipStoreBase=GRADLE_USER_HOME +zipStorePath=wrapper/dists diff --git a/gradlew b/gradlew new file mode 100755 index 0000000000000000000000000000000000000000..a69d9cb6c20655813e44515156e7253a2a239138 --- /dev/null +++ b/gradlew @@ -0,0 +1,240 @@ +#!/bin/sh + +# +# Copyright © 2015-2021 the original authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +############################################################################## +# +# Gradle start up script for POSIX generated by Gradle. +# +# Important for running: +# +# (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is +# noncompliant, but you have some other compliant shell such as ksh or +# bash, then to run this script, type that shell name before the whole +# command line, like: +# +# ksh Gradle +# +# Busybox and similar reduced shells will NOT work, because this script +# requires all of these POSIX shell features: +# * functions; +# * expansions «$var», «${var}», «${var:-default}», «${var+SET}», +# «${var#prefix}», «${var%suffix}», and «$( cmd )»; +# * compound commands having a testable exit status, especially «case»; +# * various built-in commands including «command», «set», and «ulimit». +# +# Important for patching: +# +# (2) This script targets any POSIX shell, so it avoids extensions provided +# by Bash, Ksh, etc; in particular arrays are avoided. +# +# The "traditional" practice of packing multiple parameters into a +# space-separated string is a well documented source of bugs and security +# problems, so this is (mostly) avoided, by progressively accumulating +# options in "$@", and eventually passing that to Java. +# +# Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS, +# and GRADLE_OPTS) rely on word-splitting, this is performed explicitly; +# see the in-line comments for details. +# +# There are tweaks for specific operating systems such as AIX, CygWin, +# Darwin, MinGW, and NonStop. +# +# (3) This script is generated from the Groovy template +# https://github.com/gradle/gradle/blob/master/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt +# within the Gradle project. +# +# You can find Gradle at https://github.com/gradle/gradle/. +# +############################################################################## + +# Attempt to set APP_HOME + +# Resolve links: $0 may be a link +app_path=$0 + +# Need this for daisy-chained symlinks. +while + APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path + [ -h "$app_path" ] +do + ls=$( ls -ld "$app_path" ) + link=${ls#*' -> '} + case $link in #( + /*) app_path=$link ;; #( + *) app_path=$APP_HOME$link ;; + esac +done + +APP_HOME=$( cd "${APP_HOME:-./}" && pwd -P ) || exit + +APP_NAME="Gradle" +APP_BASE_NAME=${0##*/} + +# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' + +# Use the maximum available, or set MAX_FD != -1 to use that value. +MAX_FD=maximum + +warn () { + echo "$*" +} >&2 + +die () { + echo + echo "$*" + echo + exit 1 +} >&2 + +# OS specific support (must be 'true' or 'false'). +cygwin=false +msys=false +darwin=false +nonstop=false +case "$( uname )" in #( + CYGWIN* ) cygwin=true ;; #( + Darwin* ) darwin=true ;; #( + MSYS* | MINGW* ) msys=true ;; #( + NONSTOP* ) nonstop=true ;; +esac + +CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar + + +# Determine the Java command to use to start the JVM. +if [ -n "$JAVA_HOME" ] ; then + if [ -x "$JAVA_HOME/jre/sh/java" ] ; then + # IBM's JDK on AIX uses strange locations for the executables + JAVACMD=$JAVA_HOME/jre/sh/java + else + JAVACMD=$JAVA_HOME/bin/java + fi + if [ ! -x "$JAVACMD" ] ; then + die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +else + JAVACMD=java + which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." +fi + +# Increase the maximum file descriptors if we can. +if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then + case $MAX_FD in #( + max*) + MAX_FD=$( ulimit -H -n ) || + warn "Could not query maximum file descriptor limit" + esac + case $MAX_FD in #( + '' | soft) :;; #( + *) + ulimit -n "$MAX_FD" || + warn "Could not set maximum file descriptor limit to $MAX_FD" + esac +fi + +# Collect all arguments for the java command, stacking in reverse order: +# * args from the command line +# * the main class name +# * -classpath +# * -D...appname settings +# * --module-path (only if needed) +# * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables. + +# For Cygwin or MSYS, switch paths to Windows format before running java +if "$cygwin" || "$msys" ; then + APP_HOME=$( cygpath --path --mixed "$APP_HOME" ) + CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" ) + + JAVACMD=$( cygpath --unix "$JAVACMD" ) + + # Now convert the arguments - kludge to limit ourselves to /bin/sh + for arg do + if + case $arg in #( + -*) false ;; # don't mess with options #( + /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath + [ -e "$t" ] ;; #( + *) false ;; + esac + then + arg=$( cygpath --path --ignore --mixed "$arg" ) + fi + # Roll the args list around exactly as many times as the number of + # args, so each arg winds up back in the position where it started, but + # possibly modified. + # + # NB: a `for` loop captures its iteration list before it begins, so + # changing the positional parameters here affects neither the number of + # iterations, nor the values presented in `arg`. + shift # remove old arg + set -- "$@" "$arg" # push replacement arg + done +fi + +# Collect all arguments for the java command; +# * $DEFAULT_JVM_OPTS, $JAVA_OPTS, and $GRADLE_OPTS can contain fragments of +# shell script including quotes and variable substitutions, so put them in +# double quotes to make sure that they get re-expanded; and +# * put everything else in single quotes, so that it's not re-expanded. + +set -- \ + "-Dorg.gradle.appname=$APP_BASE_NAME" \ + -classpath "$CLASSPATH" \ + org.gradle.wrapper.GradleWrapperMain \ + "$@" + +# Stop when "xargs" is not available. +if ! command -v xargs >/dev/null 2>&1 +then + die "xargs is not available" +fi + +# Use "xargs" to parse quoted args. +# +# With -n1 it outputs one arg per line, with the quotes and backslashes removed. +# +# In Bash we could simply go: +# +# readarray ARGS < <( xargs -n1 <<<"$var" ) && +# set -- "${ARGS[@]}" "$@" +# +# but POSIX shell has neither arrays nor command substitution, so instead we +# post-process each arg (as a line of input to sed) to backslash-escape any +# character that might be a shell metacharacter, then use eval to reverse +# that process (while maintaining the separation between arguments), and wrap +# the whole thing up as a single "set" statement. +# +# This will of course break if any of these variables contains a newline or +# an unmatched quote. +# + +eval "set -- $( + printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" | + xargs -n1 | + sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' | + tr '\n' ' ' + )" '"$@"' + +exec "$JAVACMD" "$@" diff --git a/gradlew.bat b/gradlew.bat new file mode 100644 index 0000000000000000000000000000000000000000..53a6b238d414d91c30c5644c82393d27416fbbe6 --- /dev/null +++ b/gradlew.bat @@ -0,0 +1,91 @@ +@rem +@rem Copyright 2015 the original author or authors. +@rem +@rem Licensed under the Apache License, Version 2.0 (the "License"); +@rem you may not use this file except in compliance with the License. +@rem You may obtain a copy of the License at +@rem +@rem https://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. +@rem + +@if "%DEBUG%"=="" @echo off +@rem ########################################################################## +@rem +@rem Gradle startup script for Windows +@rem +@rem ########################################################################## + +@rem Set local scope for the variables with windows NT shell +if "%OS%"=="Windows_NT" setlocal + +set DIRNAME=%~dp0 +if "%DIRNAME%"=="" set DIRNAME=. +set APP_BASE_NAME=%~n0 +set APP_HOME=%DIRNAME% + +@rem Resolve any "." and ".." in APP_HOME to make it shorter. +for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi + +@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" + +@rem Find java.exe +if defined JAVA_HOME goto findJavaFromJavaHome + +set JAVA_EXE=java.exe +%JAVA_EXE% -version >NUL 2>&1 +if %ERRORLEVEL% equ 0 goto execute + +echo. +echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:findJavaFromJavaHome +set JAVA_HOME=%JAVA_HOME:"=% +set JAVA_EXE=%JAVA_HOME%/bin/java.exe + +if exist "%JAVA_EXE%" goto execute + +echo. +echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:execute +@rem Setup the command line + +set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar + + +@rem Execute Gradle +"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* + +:end +@rem End local scope for the variables with windows NT shell +if %ERRORLEVEL% equ 0 goto mainEnd + +:fail +rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of +rem the _cmd.exe /c_ return code! +set EXIT_CODE=%ERRORLEVEL% +if %EXIT_CODE% equ 0 set EXIT_CODE=1 +if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE% +exit /b %EXIT_CODE% + +:mainEnd +if "%OS%"=="Windows_NT" endlocal + +:omega diff --git a/pom.xml b/pom.xml deleted file mode 100644 index 4349fcd61e9617101af90e526d344bab2aebe391..0000000000000000000000000000000000000000 --- a/pom.xml +++ /dev/null @@ -1,107 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<project xmlns="http://maven.apache.org/POM/4.0.0" - xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" - xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> - <modelVersion>4.0.0</modelVersion> - - <groupId>org.example</groupId> - <artifactId>CanardE</artifactId> - <version>1.0-SNAPSHOT</version> - - <properties> - <maven.compiler.source>18</maven.compiler.source> - <maven.compiler.target>18</maven.compiler.target> - </properties> - - <dependencies> - <dependency> - <groupId>org.deeplearning4j</groupId> - <artifactId>deeplearning4j-core</artifactId> - <version>1.0.0-M1.1</version> - </dependency> - <dependency> - <groupId>org.nd4j</groupId> - <artifactId>nd4j-native-platform</artifactId> - <version>1.0.0-M1.1</version> - </dependency> - <dependency> - <groupId>com.fasterxml.jackson.core</groupId> - <artifactId>jackson-core</artifactId> - <version>2.12.1</version> - <scope>compile</scope> - </dependency> - <dependency> - <groupId>com.fasterxml.jackson.core</groupId> - <artifactId>jackson-annotations</artifactId> - <version>2.12.1</version> - <scope>compile</scope> - </dependency> - <dependency> - <groupId>com.fasterxml.jackson.core</groupId> - <artifactId>jackson-databind</artifactId> - <version>2.12.1</version> - <scope>compile</scope> - </dependency> - <dependency> - <groupId>fr.inrialpes.exmo</groupId> - <artifactId>procalign</artifactId> - <version>4.9</version> - <exclusions> - <exclusion> - <artifactId>log4j-core</artifactId> - <groupId>org.apache.logging.log4j</groupId> - </exclusion> - </exclusions> - <scope>system</scope> - <systemPath>${project.basedir}/lib/procalign.jar</systemPath> - </dependency> - <dependency> - <groupId>fr.inrialpes.exmo.</groupId> - <artifactId>ontowrap</artifactId> - <version>4.9</version> - <scope>system</scope> - <systemPath>${project.basedir}/lib/ontowrap.jar</systemPath> - </dependency> - <dependency> - <groupId>org.semanticweb.owl.align</groupId> - <artifactId>align</artifactId> - <version>4.9</version> - <scope>system</scope> - <systemPath>${project.basedir}/lib/align.jar</systemPath> - </dependency> - - - <dependency> - <groupId>commons-codec</groupId> - <artifactId>commons-codec</artifactId> - <version>1.15</version> - </dependency> - - - <!-- https://mvnrepository.com/artifact/org.slf4j/slf4j-nop --> - <dependency> - <groupId>org.slf4j</groupId> - <artifactId>slf4j-nop</artifactId> - <version>1.7.36</version> - </dependency> - - - <dependency> - <groupId>org.apache.jena</groupId> - <artifactId>apache-jena-libs</artifactId> - <type>pom</type> - <version>4.4.0</version> - </dependency> - - - <!-- https://mvnrepository.com/artifact/org.apache.commons/commons-text --> - <dependency> - <groupId>org.apache.commons</groupId> - <artifactId>commons-text</artifactId> - <version>1.9</version> - </dependency> - - - </dependencies> - -</project> \ No newline at end of file diff --git a/settings.gradle b/settings.gradle new file mode 100644 index 0000000000000000000000000000000000000000..0542fd49fefc736ab0d0cc2f97541a7c77fdd168 --- /dev/null +++ b/settings.gradle @@ -0,0 +1,7 @@ +/* + * This file was generated by the Gradle 'init' task. + * + * This project uses @Incubating APIs which are subject to change. + */ + +rootProject.name = 'CanardE' diff --git a/src/main/java/irit/complex/ComplexAlignmentGeneration.java b/src/main/java/irit/complex/ComplexAlignmentGeneration.java index a748ed09e0560f71f624ee96ecce57dd6d5f23e0..8c7acc0fbf7eeb523f34f1441233fa5368397d90 100755 --- a/src/main/java/irit/complex/ComplexAlignmentGeneration.java +++ b/src/main/java/irit/complex/ComplexAlignmentGeneration.java @@ -8,16 +8,19 @@ import irit.dataset.DatasetManager; import irit.output.OutputManager; import irit.resource.IRI; import irit.resource.Resource; -import irit.similarity.EmbeddingManager; import irit.sparql.exceptions.IncompleteSubstitutionException; import irit.sparql.SparqlProxy; import irit.sparql.query.exception.SparqlEndpointUnreachableException; import irit.sparql.query.exception.SparqlQueryMalFormedException; import irit.sparql.query.select.SparqlSelect; +import net.sourceforge.argparse4j.ArgumentParsers; +import net.sourceforge.argparse4j.impl.Arguments; +import net.sourceforge.argparse4j.inf.ArgumentParser; +import net.sourceforge.argparse4j.inf.ArgumentParserException; +import net.sourceforge.argparse4j.inf.Namespace; import org.apache.jena.rdf.model.RDFNode; import java.io.IOException; -import java.nio.file.Files; import java.nio.file.Paths; import java.util.*; import java.util.concurrent.*; @@ -26,140 +29,120 @@ import java.util.concurrent.*; public class ComplexAlignmentGeneration { - public static void main(String[] args) throws SparqlEndpointUnreachableException, SparqlQueryMalFormedException, ExecutionException, InterruptedException, IncompleteSubstitutionException { + public static void main(String[] args) throws SparqlEndpointUnreachableException, SparqlQueryMalFormedException, ExecutionException, InterruptedException, IncompleteSubstitutionException, IOException { - System.out.println("==============================================================================="); - System.out.println("CanardE"); - System.out.println("==============================================================================="); - String datasets = args[0]; - String needs = args[1]; - String embeddings = args[2]; - String source = args[3]; - String target = args[4]; - String range = args[5]; + ArgumentParser parser = buildArgumentParser(); - Set<String> stringSet = Set.of(source, target); - - Map<String, String> ds = new HashMap<>(); try { - Files.walk(Paths.get(datasets), 1).forEach(path -> { - if (!path.toString().endsWith(".ttl") && !stringSet.contains(path.getFileName().toString())) return; - ds.put(path.getFileName().toString().split("_")[0], path.toString()); - }); - } catch (IOException e) { - throw new RuntimeException(e); - } + Namespace res = parser.parseArgs(args); + String source = res.get("source"); + String target = res.get("target"); + String cqa = res.get("cqa"); + String range = res.get("range"); + String output = res.get("output"); + boolean silent = res.get("silent"); + int maxMatches = res.get("maxMatches"); - System.out.println("Found " + ds.size() + " datasets."); + String sourceName = getFileName(source); + String targetName = getFileName(target); - Map<String, String> nd = new HashMap<>(); - Map<String, List<SparqlSelect>> cqas = new HashMap<>(); - try { - Files.walk(Paths.get(needs), 1).forEach(path -> { - String ont = path.getFileName().toString(); - if (!ds.containsKey(ont)) return; - nd.put(ont, path.toString()); - - - try { - Files.walk(path, 1).forEach(path1 -> { - if (Files.isDirectory(path1)) return; - Scanner squery = null; - try { - squery = new Scanner(path1); - } catch (IOException e) { - throw new RuntimeException(e); - } - - String query = squery.useDelimiter("\\Z").next(); - SparqlSelect sq = new SparqlSelect(query); - cqas.computeIfAbsent(ont, s -> new ArrayList<>()).add(sq); - squery.close(); - }); - } catch (IOException e) { - throw new RuntimeException(e); - } + List<SparqlSelect> sparqlSelects = SparqlSelect.load(cqa); + List<Float> rangeList = parseRange(range); - }); - } catch (IOException e) { - throw new RuntimeException(e); - } + DatasetManager.getInstance().load(sourceName, source); + DatasetManager.getInstance().load(targetName, target); - for (String s : ds.keySet()) { - if (nd.containsKey(s)) continue; - System.out.println("⚠️ Not found CQAs for " + s + "."); + + run(sourceName, targetName, sparqlSelects, rangeList, maxMatches, false, output); + + + } catch (ArgumentParserException e) { + parser.handleError(e); } - System.out.println("Needs loaded."); + } - System.out.println("Loading embeddings."); - Map<String, String[]> embs = new HashMap<>(); - try { - Files.walk(Paths.get(embeddings), 1).forEach(path -> { - if (Files.isDirectory(path)) return; - String f = path.getFileName().toString(); - String[] split = f.split("[_.]"); + public static ArgumentParser buildArgumentParser() { + ArgumentParser parser = ArgumentParsers.newFor("Canard").build() + .description("Complex alignment generator."); - if (split[1].equals("n")) embs.computeIfAbsent(split[0], s -> new String[2])[0] = path.toString(); - else if (split[1].equals("e")) embs.computeIfAbsent(split[0], s -> new String[2])[1] = path.toString(); + parser.addArgument("source") + .type(String.class) + .required(true) + .help("Source ontology."); - }); - } catch (IOException e) { - throw new RuntimeException(e); - } + parser.addArgument("target") + .type(String.class) + .required(true) + .help("Target ontology."); + parser.addArgument("cqa") + .type(String.class) + .required(true) + .help("CQA folder."); - embs.forEach((name, paths) -> { - try { - EmbeddingManager.load(paths[0], paths[1]); - } catch (IOException e) { - throw new RuntimeException(e); - } - }); + parser.addArgument("--range") + .type(String.class) + .setDefault("0.8") + .help("Threshold range."); + parser.addArgument("--output") + .type(String.class) + .setDefault("output") + .help("Output folder."); - ds.forEach((name, path) -> { - DatasetManager.getInstance().load(name, path); - }); + parser.addArgument("--embedding") + .type(String.class) + .help("Path to embeddings."); + parser.addArgument("--silent") + .type(Boolean.class) + .action(Arguments.storeConst()) + .setConst(true) + .setDefault(false) + .help("Disable console output."); - List<String[]> datasetArgs = new ArrayList<>(); + parser.addArgument("--maxMatches") + .type(Integer.class) + .setDefault(10) + .help("Max Matches."); - ds.forEach((s, s2) -> { - if (source != null && !source.startsWith(s)) return; - ds.forEach((s1, s21) -> { - if (s.equals(s1)) return; - if (target != null && !target.startsWith(s1)) return; - datasetArgs.add(new String[]{s, s1}); - }); - }); + return parser; + } + public static String getFileName(String path) { + String[] split = Paths.get(path).getFileName().toString().split("\\."); + return split[0]; + } - String[] split = range.split(":"); - List<Float> ths = new ArrayList<>(); + public static List<Float> parseRange(String range) { + List<Float> ranges = new ArrayList<>(); + String[] split = range.split(":"); - for (float th = Float.parseFloat(split[0]); th <= Float.parseFloat(split[1]); th += Float.parseFloat(split[2])) { - ths.add(th); - } + float start = Float.parseFloat(split[0]); + float end = start; + float step = 0.1f; - String output = "output"; + if (split.length > 1) end = Float.parseFloat(split[1]); + if (split.length > 2) start = Float.parseFloat(split[2]); - for (String[] datasetArg : datasetArgs) { - run(datasetArg[0], datasetArg[1], cqas.get(datasetArg[0]), ths, 10, false, output); + for (; start < end; start += step) { + ranges.add(start); } - + return ranges; } public static void run(String sourceEndpoint, String targetEndpoint, List<SparqlSelect> queries, List<Float> th, int maxMatches, boolean reassess, String outputPath) throws SparqlEndpointUnreachableException, SparqlQueryMalFormedException, ExecutionException, InterruptedException, IncompleteSubstitutionException { + OutputManager outputManager = new OutputManager(); outputManager.initOutputEdoal(sourceEndpoint, targetEndpoint, th, outputPath); @@ -293,10 +276,8 @@ public class ComplexAlignmentGeneration { } } -// System.out.println("Number of correspondences found (" + threshold + "): " + output.size()); if (reassess) { - System.out.println("Reassessing similarity"); for (SubgraphForOutput s : output) { s.reassessSimilarityWithCounterExamples(sourceEndpoint, targetEndpoint, sq); } diff --git a/src/main/java/irit/complex/answer/SingleAnswer.java b/src/main/java/irit/complex/answer/SingleAnswer.java index e1ad15174033685d9e034b99e13ad3a90f4a3c05..b37e5dc79198a2117d5e74fcdc41412bb6247524 100755 --- a/src/main/java/irit/complex/answer/SingleAnswer.java +++ b/src/main/java/irit/complex/answer/SingleAnswer.java @@ -56,15 +56,6 @@ public class SingleAnswer extends Answer { HashSet<String> queryLabels = query.getLabels(); - INDArray zeros = Nd4j.zeros(DataType.DOUBLE, EmbeddingManager.embshape); - - for (String queryLabel : queryLabels) { - zeros = zeros.add(EmbeddingManager.get(queryLabel)); - } - - zeros = zeros.div(queryLabels.size()); - - double maxSim = -1; Triple bestTriple = new Triple(); @@ -81,10 +72,9 @@ public class SingleAnswer extends Answer { for (Triple t : iri.getTriples()) { double similarity = 0; -// t.retrieveIRILabels(targetEndpoint); -// t.retrieveTypes(targetEndpoint); -// similarity += t.compareLabel(queryLabels, similarityThreshold, targetEndpoint); - similarity += t.compareSim(zeros, similarityThreshold); + t.retrieveIRILabels(targetEndpoint); + t.retrieveTypes(targetEndpoint); + similarity += t.compareLabel(queryLabels, similarityThreshold, targetEndpoint); if (similarity > maxSim) { maxSim = similarity; diff --git a/src/main/java/irit/similarity/EmbeddingManager.java b/src/main/java/irit/similarity/EmbeddingManager.java index 9fd0e8417c760693c332653bc5da24df6627af8c..aa627efc5872ce2168070e64742c0e5c178b935c 100644 --- a/src/main/java/irit/similarity/EmbeddingManager.java +++ b/src/main/java/irit/similarity/EmbeddingManager.java @@ -1,5 +1,6 @@ package irit.similarity; +import org.apache.commons.text.similarity.LevenshteinDistance; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; @@ -32,18 +33,7 @@ public class EmbeddingManager { public static double getSim(String s1, String s2){ - INDArray n1 = embs1.get(s1); - INDArray n2 = embs1.get(s2); - - if (n1 == null){ - n1 = Nd4j.zeros(DataType.DOUBLE, embshape); - } - - if (n2 == null){ - n2 = Nd4j.zeros(DataType.DOUBLE, embshape); - } - - return Transforms.cosineSim(n1, n2); + return LevenshteinDistance.getDefaultInstance().apply(s1, s2) / (float) Math.max(s1.length(), s2.length()); } private static Map<String, INDArray> loadEmbs(String n1, String e1) throws IOException { diff --git a/src/main/java/irit/sparql/query/select/SparqlSelect.java b/src/main/java/irit/sparql/query/select/SparqlSelect.java index 74e6c2fe7d19b3425b5c792ebc095ee96c53f154..6d3a2530a7dca5a3683cae754898f9b2fc8dcd51 100755 --- a/src/main/java/irit/sparql/query/select/SparqlSelect.java +++ b/src/main/java/irit/sparql/query/select/SparqlSelect.java @@ -3,12 +3,13 @@ package irit.sparql.query.select; import irit.resource.IRI; import irit.sparql.query.SparqlQuery; -import java.util.ArrayList; -import java.util.HashSet; -import java.util.Map; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; - +import java.util.stream.Collectors; public class SparqlSelect extends SparqlQuery { @@ -20,12 +21,12 @@ public class SparqlSelect extends SparqlQuery { mainQuery = mainQuery.trim().replaceAll("SELECT", "select").replaceAll("WHERE", "where").replaceAll("\n", " "); selectFocus = new ArrayList<>(); Pattern pattern = Pattern.compile(""" - select[ \t - distncDISTNC]+(\\?[A-Za-z\\d_-]+)[ \t - ]+(\\?*[A-Za-z\\d_-]*[ \t - ]*)where[ \t - ]*\\{(.+)}[ \t - ]*$"""); + select[ \t + distncDISTNC]+(\\?[A-Za-z\\d_-]+)[ \t + ]+(\\?*[A-Za-z\\d_-]*[ \t + ]*)where[ \t + ]*\\{(.+)}[ \t + ]*$"""); Matcher matcher = pattern.matcher(mainQuery); while (matcher.find()) { selectFocus.add(matcher.group(1).trim()); @@ -34,12 +35,12 @@ public class SparqlSelect extends SparqlQuery { } where = matcher.group(3).trim(); setAggregate(); - } + } Pattern pattern2 = Pattern.compile(""" - select([ \t - distncDISTNC]+\\?[A-Za-z\\d_-]+[ \t - ]+\\?*[A-Za-z\\d_-]*[ \t - ]*)where"""); + select([ \t + distncDISTNC]+\\?[A-Za-z\\d_-]+[ \t + ]+\\?*[A-Za-z\\d_-]*[ \t + ]*)where"""); Matcher matcher2 = pattern2.matcher(mainQuery); if (matcher2.find()) { select = matcher2.group(1); @@ -48,6 +49,26 @@ public class SparqlSelect extends SparqlQuery { } + + public static List<SparqlSelect> load(String path) throws IOException { + List<SparqlSelect> sparqlSelects; + try (var walk = Files.walk(Paths.get(path), 1)) { + sparqlSelects = walk + .filter(path1 -> !Files.isDirectory(path1)) + .map(path1 -> { + try { + return Files.readString(path1); + } catch (IOException e) { + throw new RuntimeException(e); + } + }) + .map(SparqlSelect::new).toList(); + + } + return sparqlSelects; + + } + public String getSelect() { return select; } @@ -93,11 +114,10 @@ public class SparqlSelect extends SparqlQuery { } - - public HashSet<String> getLabels(){ + public HashSet<String> getLabels() { HashSet<String> queryLabels = new HashSet<>(); - for (Map.Entry<String, IRI> iri : getIRIList().entrySet()){ + for (Map.Entry<String, IRI> iri : getIRIList().entrySet()) { queryLabels.addAll(iri.getValue().getLabels()); } return queryLabels;