From ef2ba04496cab1c6efab9debe4dc0d40c9826478 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lukas=20Bl=C3=BCbaum?= Date: Tue, 17 Sep 2019 09:59:23 +0200 Subject: [PATCH] Merging SPARQL functionality in qa.commons + 2 unit tests in qa.ml --- .travis.yml | 9 +- .../org/aksw/qa/commons/sparql/SPARQL.java | 88 +++++++++- .../aksw/qa/commons/utils/SPARQLExecutor.java | 157 +++++++----------- .../measure/SPARQLBasedEvaluationTest.java | 10 +- .../qa/commons/utils/SPARQLExecutorTest.java | 3 +- .../dependencies/DependenciesTest.java | 28 ++++ .../PartOfSpeechTagsTest.java | 28 ++++ .../QuestionTypeAnalyzerTest.java | 3 +- .../main/java/org/aksw/qa/systems/OKBQA.java | 1 - .../main/java/org/aksw/qa/systems/QUINT.java | 5 - .../java/org/aksw/qa/systems/SorokinQA.java | 4 +- 11 files changed, 212 insertions(+), 124 deletions(-) create mode 100644 qa.ml/src/test/java/org/aksw/mlqa/analyzer/dependencies/DependenciesTest.java create mode 100644 qa.ml/src/test/java/org/aksw/mlqa/analyzer/partofspeechtags/PartOfSpeechTagsTest.java diff --git a/.travis.yml b/.travis.yml index a1803e72..b3098421 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,11 +1,12 @@ language: java sudo: false +dist: trusty -jdk: +jdk: - oraclejdk8 - -script: - - mvn test -B + +script: + - mvn test -B after_success: - bash <(curl -s https://codecov.io/bash) diff --git a/qa.commons/src/main/java/org/aksw/qa/commons/sparql/SPARQL.java b/qa.commons/src/main/java/org/aksw/qa/commons/sparql/SPARQL.java index 2b8850f7..115c5a9d 100644 --- a/qa.commons/src/main/java/org/aksw/qa/commons/sparql/SPARQL.java +++ b/qa.commons/src/main/java/org/aksw/qa/commons/sparql/SPARQL.java @@ -1,5 +1,6 @@ package org.aksw.qa.commons.sparql; +import java.util.ArrayList; import java.util.Set; import java.util.concurrent.ExecutionException; @@ -7,9 +8,12 @@ import org.aksw.jena_sparql_api.cache.h2.CacheUtilsH2; import org.aksw.jena_sparql_api.core.FluentQueryExecutionFactory; import org.aksw.jena_sparql_api.core.QueryExecutionFactory; +import org.aksw.jena_sparql_api.http.QueryExecutionFactoryHttp; import org.aksw.qa.commons.qald.QALD4_EvaluationUtils; +import org.aksw.qa.commons.utils.Results; import org.apache.jena.query.QueryExecution; import org.apache.jena.query.QueryFactory; +import org.apache.jena.query.QuerySolution; import org.apache.jena.query.ResultSet; import org.apache.jena.rdf.model.Literal; import org.apache.jena.rdf.model.RDFNode; @@ -97,7 +101,7 @@ public Set sparql(final String query) throws ExecutionException { } /** - * For use with {@link #sparql(String)} Extracts answer strings. Can be directly set as golden answers in IQuesion. + * For use with {@link #sparql(String)} Extracts answer strings. Can be directly set as golden answers in an IQuestion. * * @param answers * @return @@ -122,7 +126,60 @@ public static Set extractAnswerStrings(final Set answers) { } return set; } + + /** + * Executes a select query for the given endpoint and query. Returns the answer as an {@link Results} object. + * @param query + * @param endpoint + * @return + */ + public static Results executeSelect(final String query, final String endpoint) { + QueryExecutionFactory qef = new QueryExecutionFactoryHttp(endpoint); + QueryExecution qe = qef.createQueryExecution(query); + + ResultSet rs = qe.execSelect(); + + Results res = new Results(); + res.header.addAll(rs.getResultVars()); + + while(rs.hasNext()) { + QuerySolution sol = rs.nextSolution(); + res.table.add(new ArrayList()); + for(String head: res.header) { + String answer = ""; + + if(sol.get(head).isResource()) { + answer = sol.getResource(head).toString(); + } else { + String temp = sol.get(head).toString(); + if(temp.contains("@")) { + answer = "\"" + temp.substring(0, temp.indexOf("@")) + "\"" + temp.substring(temp.indexOf("@")); + } else if (temp.contains("^^")){ + answer = "\"" + temp.substring(0, temp.indexOf("^")) + "\"^^<" + temp.substring(temp.indexOf("^")+2) + ">"; + } else { + answer = temp; + } + } + res.table.get(res.table.size()-1).add(answer); + } + } + closeExecFactory(qef); + return res; + } + /** + * Executes an ask query for the given endpoint and query. + * @param query + * @param endpoint + * @return + */ + public static Boolean executeAsk(final String query, final String endpoint) { + QueryExecutionFactory qef = new QueryExecutionFactoryHttp(endpoint); + QueryExecution qe = qef.createQueryExecution(query); + closeExecFactory(qef); + return qe.execAsk(); + } + /** * @return - The time to live of frontendCache */ @@ -152,4 +209,33 @@ public static boolean isValidSparqlQuery(final String sparql) { } return true; } + + /** + * Checks if the given endpoint is alive. If fails, returns false. + * @param endpoint + * @return + */ + public static boolean isEndpointAlive(final String endpoint) { + QueryExecutionFactory qef = new QueryExecutionFactoryHttp(endpoint); + try { + QueryExecution qe = qef.createQueryExecution("PREFIX foaf: ASK { ?x foaf:name \"Alice\" }"); + qe.execAsk(); + return true; + } catch (Exception e) { + + } finally { + closeExecFactory(qef); + } + return false; + } + + private static void closeExecFactory(QueryExecutionFactory qef) { + if(qef != null) { + try { + qef.close(); + } catch (Exception e) { + e.printStackTrace(); + } + } + } } diff --git a/qa.commons/src/main/java/org/aksw/qa/commons/utils/SPARQLExecutor.java b/qa.commons/src/main/java/org/aksw/qa/commons/utils/SPARQLExecutor.java index 756e25c7..9a29bc6d 100644 --- a/qa.commons/src/main/java/org/aksw/qa/commons/utils/SPARQLExecutor.java +++ b/qa.commons/src/main/java/org/aksw/qa/commons/utils/SPARQLExecutor.java @@ -1,135 +1,91 @@ package org.aksw.qa.commons.utils; -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.StringReader; -import java.net.URL; -import java.net.URLEncoder; -import java.nio.charset.Charset; -import java.util.LinkedList; -import java.util.List; +import java.util.ArrayList; import java.util.Set; -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; -import javax.xml.parsers.ParserConfigurationException; - import org.aksw.qa.commons.qald.QALD4_EvaluationUtils; import org.aksw.qa.commons.sparql.SPARQL; import org.aksw.qa.commons.sparql.ThreadedSPARQL; import org.apache.jena.query.QueryExecution; import org.apache.jena.query.QueryExecutionFactory; +import org.apache.jena.query.QuerySolution; import org.apache.jena.query.ResultSet; import org.apache.jena.rdf.model.RDFNode; import org.apache.jena.rdf.model.impl.ResourceImpl; -import org.w3c.dom.Document; -import org.w3c.dom.NodeList; -import org.xml.sax.InputSource; -import org.xml.sax.SAXException; import com.google.common.collect.Sets; -//@Deprecated /** - * In qa.commons, there are 2 differnet ways to fire queries to an endpoint, {@link SPARQL} and this class. This becomes tedious to keep clean, especially when both share the same (hardcopied) code. - * Please consider merging the functionality in favor of {@link SPARQL} or {@link ThreadedSPARQL} - * - * @param service - * @param query - * @return + * Please consider using {@link SPARQL} or {@link ThreadedSPARQL} */ +@Deprecated public class SPARQLExecutor { - - public static boolean isEndpointAlive(final String endpoint) { + + /** + * An exact copy of this code is {@link SPARQL#isEndpointAlive(String)}. + * @param endpoint + * @return + */ + @Deprecated + public static boolean isEndpointAlive(final String endpoint) { try { - BufferedReader reader = getReader(endpoint); - reader.close(); + QueryExecution qeExe = QueryExecutionFactory.sparqlService(endpoint, "PREFIX foaf: ASK { ?x foaf:name \"Alice\" }"); + qeExe.execAsk(); return true; - } catch (IOException e) { + } catch (Exception e) { + } return false; - } - - //TODO change that to use proper JENA library + + /** + * An exact copy of this code is {@link SPARQL#executeSelect(String)}. + * @param query + * @param endpoint + * @return + */ + @Deprecated public static Results executeSelect(final String query, final String endpoint) { - BufferedReader reader; - try { - reader = getReader(endpoint + "?query=" + URLEncoder.encode(query, "UTF-8")); - String results = readAll(reader); - reader.close(); - Results ret = processSelectResults(results); - return ret; - } catch (IOException | ParserConfigurationException | SAXException e) { - e.printStackTrace(); - } - return null; - } - - public static Boolean executeAsk(final String query, final String endpoint) { - BufferedReader reader; - try { - reader = getReader(endpoint + "?query=" + URLEncoder.encode(query, "UTF-8")); - String results = readAll(reader); - reader.close(); - return Boolean.valueOf(results); - } catch (IOException e) { - e.printStackTrace(); - } - return null; - } - - private static Results processSelectResults(final String results) throws ParserConfigurationException, SAXException, IOException { - // Set ret = CollectionUtils.newHashSet(); - InputSource is = new InputSource(new StringReader(results)); - DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); - Document doc = builder.parse(is); - NodeList nodes = doc.getFirstChild().getChildNodes(); + QueryExecution qeExe = QueryExecutionFactory.sparqlService(endpoint, query); + ResultSet rs = qeExe.execSelect(); + Results res = new Results(); - for (int i = 0; i < nodes.getLength(); i++) { - NodeList childs = nodes.item(i).getChildNodes(); - - List row = new LinkedList<>(); - for (int j = 0; j < childs.getLength(); j++) { - if (childs.item(j).getNodeName().equals("th")) { - res.header.add(childs.item(j).getTextContent()); - continue; - } - - String add = ""; - if (childs.item(j).hasChildNodes()) { - add = childs.item(j).getFirstChild().getTextContent().trim(); + res.header.addAll(rs.getResultVars()); + + while(rs.hasNext()) { + QuerySolution sol = rs.nextSolution(); + res.table.add(new ArrayList()); + for(String head: res.header) { + String answer = ""; + + if(sol.get(head).isResource()) { + answer = sol.getResource(head).toString(); } else { - add = childs.item(j).getTextContent().trim(); - } - if (!add.isEmpty()) { - row.add(add); + String temp = sol.get(head).toString(); + if(temp.contains("@")) { + answer = "\"" + temp.substring(0, temp.indexOf("@")) + "\"" + temp.substring(temp.indexOf("@")); + } else if (temp.contains("^^")){ + answer = "\"" + temp.substring(0, temp.indexOf("^")) + "\"^^<" + temp.substring(temp.indexOf("^")+2) + ">"; + } else { + answer = temp; + } } - } - if (!row.isEmpty()) { - res.table.add(row); + res.table.get(res.table.size()-1).add(answer); } } - return res; } - private static String readAll(final BufferedReader reader) throws IOException { - StringBuilder sb = new StringBuilder(); - int cp; - while ((cp = reader.read()) != -1) { - sb.append((char) cp); - } - return sb.toString(); - } - - private static BufferedReader getReader(final String endpoint) throws IOException { - URL url = new URL(endpoint); - InputStream stream = url.openStream(); - BufferedReader reader = new BufferedReader(new InputStreamReader(stream, Charset.forName("UTF-8"))); - return reader; + /** + * An exact copy of this code is {@link SPARQL#executeAsk(String)}. + * @param query + * @param endpoint + * @return + */ + @Deprecated + public static Boolean executeAsk(final String query, final String endpoint) { + QueryExecution qeExe = QueryExecutionFactory.sparqlService(endpoint, query); + return qeExe.execAsk(); } /** @@ -165,5 +121,4 @@ public static Set sparql(final String service, final String query) { } return set; } - } diff --git a/qa.commons/src/test/java/org/aksw/qa/commons/measure/SPARQLBasedEvaluationTest.java b/qa.commons/src/test/java/org/aksw/qa/commons/measure/SPARQLBasedEvaluationTest.java index 3e2eeef5..24f82eda 100644 --- a/qa.commons/src/test/java/org/aksw/qa/commons/measure/SPARQLBasedEvaluationTest.java +++ b/qa.commons/src/test/java/org/aksw/qa/commons/measure/SPARQLBasedEvaluationTest.java @@ -4,7 +4,7 @@ import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; -import org.aksw.qa.commons.utils.SPARQLExecutor; +import org.aksw.qa.commons.sparql.SPARQL; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -15,8 +15,8 @@ public class SPARQLBasedEvaluationTest { @Test public void testEndpointAvailibility() { - assertTrue(SPARQLExecutor.isEndpointAlive("http://dbpedia.org/sparql")); - assertFalse(SPARQLExecutor.isEndpointAlive("http://dbpedia2.org/sparql")); + assertTrue(SPARQL.isEndpointAlive("http://dbpedia.org/sparql")); + assertFalse(SPARQL.isEndpointAlive("http://dbpedia2.org/sparql")); } @Test @@ -43,9 +43,9 @@ public void testTooGenericQuery() { public void testTooSpecificQuery() { // SELECT COUNT(?x)... String sparqlQuery = "PREFIX dbo: " + "PREFIX res: " + "PREFIX rdf: " - + "SELECT COUNT(DISTINCT ?uri) WHERE { " + "?uri rdf:type dbo:Film ." + "?uri dbo:starring res:Julia_Roberts .}"; + + "SELECT (COUNT(DISTINCT ?uri) as ?u) WHERE { " + "?uri rdf:type dbo:Film ." + "?uri dbo:starring res:Julia_Roberts .}"; String targetSPARQLQuery = "PREFIX dbo: " + "PREFIX res: " + "PREFIX rdf: " - + "SELECT COUNT(DISTINCT ?uri) WHERE { " + "?uri rdf:type dbo:Film ." + "?uri dbo:starring res:Julia_Roberts ." + "?uri dbo:director res:Garry_Marshall .}"; + + "SELECT (COUNT(DISTINCT ?uri) as ?u) WHERE { " + "?uri rdf:type dbo:Film ." + "?uri dbo:starring res:Julia_Roberts ." + "?uri dbo:director res:Garry_Marshall .}"; double precision = SPARQLBasedEvaluation.precision(sparqlQuery, targetSPARQLQuery, endpoint); double recall = SPARQLBasedEvaluation.recall(sparqlQuery, targetSPARQLQuery, endpoint); double fMeasure = SPARQLBasedEvaluation.fMeasure(sparqlQuery, targetSPARQLQuery, endpoint); diff --git a/qa.commons/src/test/java/org/aksw/qa/commons/utils/SPARQLExecutorTest.java b/qa.commons/src/test/java/org/aksw/qa/commons/utils/SPARQLExecutorTest.java index 0300ad73..9b087577 100644 --- a/qa.commons/src/test/java/org/aksw/qa/commons/utils/SPARQLExecutorTest.java +++ b/qa.commons/src/test/java/org/aksw/qa/commons/utils/SPARQLExecutorTest.java @@ -58,7 +58,6 @@ public void testResults() { assertTrue(res.header.contains("s")); assertTrue(res.header.contains("o")); List row = res.table.get(0); - assertTrue(row.get(res.header.indexOf("o")).equals("http://www.w3.org/2002/07/owl#FunctionalProperty")); assertTrue(row.get(res.header.indexOf("s")).equals("http://dbpedia.org/ontology/deathDate")); row = res.table.get(1); @@ -78,7 +77,7 @@ private Object[] getGoldenArray1() { private Object[] getGoldenArray2() { String[] ret = new String[3]; ret[0] = "\"1785-3-7\"^^"; - ret[1] = "1785-03-07"; + ret[1] = "\"1785-03-07\"^^"; ret[2] = "\"1950-1-9\"^^"; Arrays.sort(ret); return ret; diff --git a/qa.ml/src/test/java/org/aksw/mlqa/analyzer/dependencies/DependenciesTest.java b/qa.ml/src/test/java/org/aksw/mlqa/analyzer/dependencies/DependenciesTest.java new file mode 100644 index 00000000..45af4ed3 --- /dev/null +++ b/qa.ml/src/test/java/org/aksw/mlqa/analyzer/dependencies/DependenciesTest.java @@ -0,0 +1,28 @@ +package org.aksw.mlqa.analyzer.dependencies; + +import static org.junit.Assert.assertArrayEquals; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import org.junit.Test; + +public class DependenciesTest { + + @SuppressWarnings("unchecked") + @Test + public void dependenciesTest() { + Dependencies dep = new Dependencies(); + Map dependencies = (Map) dep.analyze("What is the capital of Germany?"); + + int[] actuals = new int[dependencies.keySet().size()]; + List keyList = new ArrayList(dependencies.keySet()); + for(int i = 0; i pos = (Map) dep.analyze("What is the capital of Germany?"); + + int[] actuals = new int[pos.keySet().size()]; + List keyList = new ArrayList(pos.keySet()); + for(int i = 0; i questions = LoaderController.load(Dataset.QALD9_Test_Multilingual); @@ -47,7 +46,7 @@ public void resultTypeBoolean() { for (IQuestion q : questions) { // Classify query type - boolean classification = questionTypeAnalyzer.isASKQuestion(q.getLanguageToQuestion().get("en")); + boolean classification = QuestionTypeAnalyzer.isASKQuestion(q.getLanguageToQuestion().get("en")); counter++; if (classification) { diff --git a/qa.systems/src/main/java/org/aksw/qa/systems/OKBQA.java b/qa.systems/src/main/java/org/aksw/qa/systems/OKBQA.java index 2a023606..3100ecdb 100644 --- a/qa.systems/src/main/java/org/aksw/qa/systems/OKBQA.java +++ b/qa.systems/src/main/java/org/aksw/qa/systems/OKBQA.java @@ -55,7 +55,6 @@ public void processResponse(String response, IQuestion question) { String answerString = result.getString("answer"); resultSet.add(answerString); - } //Get Query from log diff --git a/qa.systems/src/main/java/org/aksw/qa/systems/QUINT.java b/qa.systems/src/main/java/org/aksw/qa/systems/QUINT.java index 2137776d..c7abb80d 100644 --- a/qa.systems/src/main/java/org/aksw/qa/systems/QUINT.java +++ b/qa.systems/src/main/java/org/aksw/qa/systems/QUINT.java @@ -64,9 +64,4 @@ public void processQALDResp(HttpResponse response, IQuestion question) throws Js } question.setGoldenAnswers(resultSet); } - - public static void main(String[] args) throws Exception { - ASystem a = new SorokinQA(); - System.out.println(a.search("How many children did Benjamin Franklin have?", "en")); - } } diff --git a/qa.systems/src/main/java/org/aksw/qa/systems/SorokinQA.java b/qa.systems/src/main/java/org/aksw/qa/systems/SorokinQA.java index a881da10..14d423ab 100644 --- a/qa.systems/src/main/java/org/aksw/qa/systems/SorokinQA.java +++ b/qa.systems/src/main/java/org/aksw/qa/systems/SorokinQA.java @@ -10,7 +10,6 @@ public class SorokinQA extends Gen_HTTP_QA_Sys_JSON { - //private static final String URL = "http://semanticparsing.ukp.informatik.tu-darmstadt.de:5000/question-answering/answerforqald/"; private static final String URL_UG = "http://semanticparsing.ukp.informatik.tu-darmstadt.de:5000/question-answering/ungroundedgraph/"; private static final String URL_GG = "http://semanticparsing.ukp.informatik.tu-darmstadt.de:5000/question-answering/groundedgraphs/"; private static final String URL_EG = "http://semanticparsing.ukp.informatik.tu-darmstadt.de:5000/question-answering/evaluategraphs/"; @@ -20,8 +19,7 @@ public SorokinQA() { } /** - * Overriding original search method to implement SorokinQA's three step requests for - * QA + * Overriding original search method to implement SorokinQA's three step requests for QA */ @SuppressWarnings("unchecked") @Override