diff --git a/src/main/java/irit/complex/ComplexAlignmentGeneration.java b/src/main/java/irit/complex/ComplexAlignmentGeneration.java index a9933464b5245264fbc0d4b15ed895e254f39e0f..89ca501f6aaad429dfe629a8ed8c1d4ad9d37d43 100755 --- a/src/main/java/irit/complex/ComplexAlignmentGeneration.java +++ b/src/main/java/irit/complex/ComplexAlignmentGeneration.java @@ -23,6 +23,7 @@ import org.apache.jena.rdf.model.RDFNode; import java.io.IOException; import java.nio.file.Paths; +import java.time.Instant; import java.util.*; import java.util.concurrent.*; @@ -32,7 +33,6 @@ public class ComplexAlignmentGeneration { public static void main(String[] args) throws SparqlEndpointUnreachableException, SparqlQueryMalFormedException, ExecutionException, InterruptedException, IncompleteSubstitutionException, IOException { - ArgumentParser parser = buildArgumentParser(); @@ -67,6 +67,7 @@ public class ComplexAlignmentGeneration { parser.handleError(e); } + } @@ -162,7 +163,6 @@ public class ComplexAlignmentGeneration { public static void align(SparqlSelect sq, String sourceEndpoint, String targetEndpoint, int maxMatches, boolean reassess, List<Float> th, OutputManager outputManager) throws SparqlEndpointUnreachableException, SparqlQueryMalFormedException, ExecutionException, InterruptedException, IncompleteSubstitutionException { Set<Answer> matchedAnswers = getMatchedAnswers(sq, sourceEndpoint, targetEndpoint, maxMatches); - for (float threshold : th) { List<SubgraphForOutput> subgraphForOutputs = buildSingleOutput(matchedAnswers, sq, sourceEndpoint, targetEndpoint, threshold, reassess); @@ -252,7 +252,6 @@ public class ComplexAlignmentGeneration { private static List<SubgraphForOutput> buildSingleOutput(Set<Answer> matchedAnswers, SparqlSelect sq, String sourceEndpoint, String targetEndpoint, float threshold, boolean reassess) throws SparqlEndpointUnreachableException, SparqlQueryMalFormedException { HashSet<InstantiatedSubgraph> goodSubgraphs = new HashSet<>(); for (Answer ans : matchedAnswers) { - HashSet<InstantiatedSubgraph> localSubgraphs = ans.findCorrespondingSubGraph(sq, targetEndpoint, threshold); goodSubgraphs.addAll(localSubgraphs); } @@ -279,13 +278,13 @@ public class ComplexAlignmentGeneration { } } - if (reassess) { for (SubgraphForOutput s : output) { s.reassessSimilarityWithCounterExamples(sourceEndpoint, targetEndpoint, sq); } } + Collections.sort(output); ArrayList<SubgraphForOutput> singleOutput = new ArrayList<>(); if (output.size() > 0 && output.get(output.size() - 1).getSimilarity() < 0.6 && output.get(output.size() - 1).getSimilarity() > 0.01) { @@ -297,6 +296,7 @@ public class ComplexAlignmentGeneration { if (output.get(i).getSimilarity() == sim) { singleOutput.add(output.get(i)); + } else { moreCorrespondences = false; } diff --git a/src/main/java/irit/complex/answer/SingleAnswer.java b/src/main/java/irit/complex/answer/SingleAnswer.java index b37e5dc79198a2117d5e74fcdc41412bb6247524..4be44baea4d00c12004b2daae66dbb535b412699 100755 --- a/src/main/java/irit/complex/answer/SingleAnswer.java +++ b/src/main/java/irit/complex/answer/SingleAnswer.java @@ -62,6 +62,7 @@ public class SingleAnswer extends Answer { HashSet<InstantiatedSubgraph> goodTriples = new HashSet<>(); int count = 0; + for (IRI iri : res.getSimilarIRIs()) { if (count < numberMaxOfExploredAnswers) { @@ -150,7 +151,7 @@ public class SingleAnswer extends Answer { iri.getValue() + " ?predicate ?object." + "MINUS{ " + iri.getValue() + " <http://www.w3.org/2002/07/owl#sameAs> ?object.}" - + "}LIMIT 500"; + + "} LIMIT 500"; List<Map<String, RDFNode>> result = SparqlProxy.query(targetEndpoint, query); diff --git a/src/main/java/irit/resource/IRI.java b/src/main/java/irit/resource/IRI.java index 170e8ed0384eae2a6a4a7eb28ac186443d282c10..6eadbc5da59acc48b9344b723ed2b8e54e05be67 100755 --- a/src/main/java/irit/resource/IRI.java +++ b/src/main/java/irit/resource/IRI.java @@ -124,7 +124,6 @@ public class IRI extends Resource { } } - /*Check if a match is in the target dataset*/ for (IRI match : allMatches) { if ( DatasetManager.getInstance().labelMaps.get(targetEndpoint).exists(match.toString())) { similarIRIs.add(match); diff --git a/src/main/java/irit/similarity/EmbeddingManager.java b/src/main/java/irit/similarity/EmbeddingManager.java index aa627efc5872ce2168070e64742c0e5c178b935c..468595f1f000576225bca138c01801bcc0b07983 100644 --- a/src/main/java/irit/similarity/EmbeddingManager.java +++ b/src/main/java/irit/similarity/EmbeddingManager.java @@ -13,12 +13,15 @@ import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.regex.Matcher; +import java.util.regex.Pattern; public class EmbeddingManager { private static Map<String, INDArray> embs1 = new HashMap<>(); public static long[] embshape; + private static final Pattern pattern = Pattern.compile("([^>]+)[#/]([A-Za-z0-9_-]+)"); public static void load(String n1, String e1) throws IOException { @@ -31,9 +34,20 @@ public class EmbeddingManager { } - public static double getSim(String s1, String s2){ + public static double getSim(String s1, String s2) { + s1 = getSuffix(s1).toLowerCase(); + s2 = getSuffix(s2).toLowerCase(); + return 1 - LevenshteinDistance.getDefaultInstance().apply(s1, s2) / (float) Math.max(s1.length(), s2.length()); + } + + private static String getSuffix(String value) { - return LevenshteinDistance.getDefaultInstance().apply(s1, s2) / (float) Math.max(s1.length(), s2.length()); + Matcher matcher = pattern.matcher(value); + if (matcher.find()) { + return matcher.group(2); + } else { + return value; + } } private static Map<String, INDArray> loadEmbs(String n1, String e1) throws IOException { @@ -54,17 +68,17 @@ public class EmbeddingManager { return embsMap; } - public static INDArray get(String e1){ + public static INDArray get(String e1) { if (!embs1.containsKey(e1)) return Nd4j.zeros(DataType.DOUBLE, embshape); return embs1.get(e1); } - private static String processLabel(String line){ + private static String processLabel(String line) { line = line.replaceAll("\\\\n", "\\n").trim(); - if (line.startsWith("http://") && line.contains("#")){ + if (line.startsWith("http://") && line.contains("#")) { String[] split = line.split("#"); - if (split.length > 1){ + if (split.length > 1) { line = split[1]; } else { line = split[0];