Skip to content

Commit

Permalink
Merge pull request #446 from the-qa-company/master
Browse files Browse the repository at this point in the history
Sync master into dev
  • Loading branch information
ate47 authored Mar 7, 2024
2 parents bb43285 + 32b7a92 commit b4a4935
Show file tree
Hide file tree
Showing 21 changed files with 225 additions and 29 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ The QA Company over the social networks
## About

The qEndpoint is a highly scalable triple store with full-text and [GeoSPARQL](https://www.ogc.org/standards/geosparql) support. It can be used as a standalone SPARQL endpoint, or as a dependency.
The qEndpoint is for example used in [Kohesio](https://kohesio.ec.europa.eu/) where each interaction with the UI corresponds to an underlying SPARQL query on the qEndpoint.
The qEndpoint is for example used in [Kohesio](https://kohesio.ec.europa.eu/) where each interaction with the UI corresponds to an underlying SPARQL query on the qEndpoint. Also qEndpoint is part of [QAnswer](https://qanswer.ai) enabeling question answering over RDF Graphs.

### Built With

Expand Down
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

<groupId>com.the-qa-company</groupId>
<artifactId>qendpoint-parent</artifactId>
<version>1.15.0</version>
<version>1.15.6</version>

<packaging>pom</packaging>

Expand Down
4 changes: 2 additions & 2 deletions qendpoint-backend/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<artifactId>qendpoint-backend</artifactId>
<version>1.15.0</version>
<version>1.15.6</version>

<packaging>jar</packaging>

Expand All @@ -15,7 +15,7 @@
<parent>
<groupId>com.the-qa-company</groupId>
<artifactId>qendpoint-parent</artifactId>
<version>1.15.0</version>
<version>1.15.6</version>
</parent>

<licenses>
Expand Down
4 changes: 2 additions & 2 deletions qendpoint-cli/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<artifactId>qendpoint-cli</artifactId>
<version>1.15.0</version>
<version>1.15.6</version>

<name>qendpoint package</name>
<description>Package of the qendpoint.</description>
Expand All @@ -11,7 +11,7 @@
<parent>
<groupId>com.the-qa-company</groupId>
<artifactId>qendpoint-parent</artifactId>
<version>1.15.0</version>
<version>1.15.6</version>
</parent>

<dependencies>
Expand Down
4 changes: 2 additions & 2 deletions qendpoint-core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<artifactId>qendpoint-core</artifactId>
<version>1.15.0</version>
<version>1.15.6</version>

<packaging>jar</packaging>

Expand All @@ -27,7 +27,7 @@
<parent>
<groupId>com.the-qa-company</groupId>
<artifactId>qendpoint-parent</artifactId>
<version>1.15.0</version>
<version>1.15.6</version>
</parent>

<licenses>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,14 @@ default boolean supportsLanguageOfId() {
return false;
}

/**
* @return if the dictionary is an MSD. if so, {@link #getObjects()} can't
* be used and the {@link #getAllObjects()} method should be used.
*/
default boolean isMultiSectionDictionary() {
return false;
}

/**
* Returns whether the dictionary supports graphs
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -375,6 +375,11 @@ public void reset() {
}
}

@Override
public boolean isMultiSectionDictionary() {
return true;
}

@Override
public OptimizedExtractor createOptimizedMapExtractor() {
return new MultDictionaryPFCOptimizedExtractor(this);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -528,6 +528,11 @@ public OptimizedExtractor createOptimizedMapExtractor() {
return new MultipleSectionDictionaryLangPFCOptimizedExtractor(this);
}

@Override
public boolean isMultiSectionDictionary() {
return true;
}

public int getObjectsSectionCount() {
return objectIdLocationsSec.length;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package com.the_qa_company.qendpoint.core.exceptions;

import java.io.IOException;

public class SignatureIOException extends IOException {
public SignatureIOException() {
}

public SignatureIOException(String message) {
super(message);
}

public SignatureIOException(String message, Throwable cause) {
super(message, cause);
}

public SignatureIOException(Throwable cause) {
super(cause);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import com.beust.jcommander.Parameter;
import com.beust.jcommander.internal.Lists;
import com.the_qa_company.qendpoint.core.dictionary.DictionarySection;
import com.the_qa_company.qendpoint.core.dictionary.impl.MultipleBaseDictionary;
import com.the_qa_company.qendpoint.core.exceptions.NotFoundException;
import com.the_qa_company.qendpoint.core.hdt.HDT;
import com.the_qa_company.qendpoint.core.hdt.HDTManager;
Expand Down Expand Up @@ -237,7 +236,7 @@ public void exec() throws Throwable {
try (HDT hdt = hdtl) {
boolean error;
long count = 0;
if (hdt.getDictionary() instanceof MultipleBaseDictionary) {
if (hdt.getDictionary().isMultiSectionDictionary()) {
colorTool.log("Checking subject entries");
error = checkDictionarySectionOrder(binary, unicode, colorTool, "subject",
hdt.getDictionary().getSubjects(), console);
Expand Down Expand Up @@ -279,6 +278,12 @@ public void exec() throws Throwable {
hdt.getDictionary().getShared(), console);
count += hdt.getDictionary().getShared().getNumberOfElements();
}
if (hdt.getDictionary().supportGraphs()) {
colorTool.log("Checking graph entries");
error |= checkDictionarySectionOrder(binary, unicode, colorTool, "graph",
hdt.getDictionary().getGraphs(), console);
count += hdt.getDictionary().getGraphs().getNumberOfElements();
}

if (error) {
colorTool.error("This HDT isn't valid", true);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import com.the_qa_company.qendpoint.core.dictionary.Dictionary;
import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder;
import com.the_qa_company.qendpoint.core.exceptions.IllegalFormatException;
import com.the_qa_company.qendpoint.core.exceptions.SignatureIOException;
import com.the_qa_company.qendpoint.core.hdt.HDTVocabulary;
import com.the_qa_company.qendpoint.core.hdt.impl.HDTDiskImporter;
import com.the_qa_company.qendpoint.core.hdt.impl.diskindex.DiskIndexSort;
Expand Down Expand Up @@ -1347,7 +1348,7 @@ public void syncOtherIndexes(Path fileLocation, HDTOptions spec, ProgressListene
idx.getOrder());
}
IOUtil.closeQuietly(old);
} catch (NoSuchFileException ignore) {
} catch (NoSuchFileException | SignatureIOException ignore) {
// no index with this name
if (!askedOrders.contains(order)) {
continue; // not asked by the user, we can ignore
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import com.the_qa_company.qendpoint.core.compact.sequence.SequenceLog64BigDisk;
import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder;
import com.the_qa_company.qendpoint.core.exceptions.IllegalFormatException;
import com.the_qa_company.qendpoint.core.exceptions.SignatureIOException;
import com.the_qa_company.qendpoint.core.iterator.utils.AsyncIteratorFetcher;
import com.the_qa_company.qendpoint.core.iterator.utils.ExceptionIterator;
import com.the_qa_company.qendpoint.core.iterator.utils.MapIterator;
Expand Down Expand Up @@ -96,7 +97,8 @@ public static BitmapTriplesIndex map(Path file, FileChannel channel, BitmapTripl

long currentSignature = signature(triples);
if (signature != currentSignature) {
throw new IOException(format("Wrong signature for file 0x%x != 0x%x", signature, currentSignature));
throw new SignatureIOException(
format("Wrong signature for file 0x%x != 0x%x", signature, currentSignature));
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
package com.the_qa_company.qendpoint.core.triples.impl;

import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder;
import com.the_qa_company.qendpoint.core.exceptions.ParserException;
import com.the_qa_company.qendpoint.core.hdt.HDTManager;
import com.the_qa_company.qendpoint.core.hdt.HDTVersion;
import com.the_qa_company.qendpoint.core.listener.ProgressListener;
import com.the_qa_company.qendpoint.core.options.HDTOptions;
import com.the_qa_company.qendpoint.core.options.HDTOptionsKeys;
import com.the_qa_company.qendpoint.core.util.LargeFakeDataSetStreamSupplier;
import com.the_qa_company.qendpoint.core.util.crc.CRC32;
import org.apache.commons.io.file.PathUtils;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;

import static org.junit.Assert.*;

public class BitmapTriplesIndexFileTest {

@Rule
public TemporaryFolder tempDir = TemporaryFolder.builder().assureDeletion().build();

public long crc32(byte[] data) {
CRC32 crc = new CRC32();
crc.update(data, 0, data.length);
return crc.getValue();
}

@Test
public void genTest() throws IOException, ParserException {
Path root = tempDir.newFolder().toPath();

HDTOptions spec = HDTOptions.of(
HDTOptionsKeys.BITMAPTRIPLES_INDEX_OTHERS, "spo,ops",
HDTOptionsKeys.BITMAPTRIPLES_INDEX_NO_FOQ, true
);
try {
Path hdtPath = root.resolve("temp.hdt");

LargeFakeDataSetStreamSupplier supplier = LargeFakeDataSetStreamSupplier
.createSupplierWithMaxTriples(1000, 10)
.withMaxLiteralSize(50)
.withMaxElementSplit(20);

supplier.createAndSaveFakeHDT(spec, hdtPath);

// should load
HDTManager.mapIndexedHDT(hdtPath, spec, ProgressListener.ignore()).close();
assertTrue("ops index doesn't exist", Files.exists(BitmapTriplesIndexFile.getIndexPath(hdtPath, TripleComponentOrder.OPS)));
assertFalse("foq index exists", Files.exists(hdtPath.resolveSibling(hdtPath.getFileName() + HDTVersion.get_index_suffix("-"))));

long crcold = crc32(Files.readAllBytes(hdtPath));

Path hdtPath2 = root.resolve("temp2.hdt");

Files.move(hdtPath, hdtPath2);

supplier.createAndSaveFakeHDT(spec, hdtPath);
// should erase the previous index and generate another one
HDTManager.mapIndexedHDT(hdtPath, spec, ProgressListener.ignore()).close();

long crcnew = crc32(Files.readAllBytes(hdtPath));

assertNotEquals("files are the same", crcold, crcnew);
} finally {
PathUtils.deleteDirectory(root);
}
}
}
4 changes: 2 additions & 2 deletions qendpoint-store/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<artifactId>qendpoint</artifactId>
<version>1.15.0</version>
<version>1.15.6</version>

<packaging>jar</packaging>

Expand All @@ -13,7 +13,7 @@
<parent>
<groupId>com.the-qa-company</groupId>
<artifactId>qendpoint-parent</artifactId>
<version>1.15.0</version>
<version>1.15.6</version>
</parent>

<licenses>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,10 @@
import com.the_qa_company.qendpoint.utils.sail.SourceSailConnectionWrapper;
import jakarta.json.Json;
import jakarta.json.stream.JsonGenerator;
import org.eclipse.rdf4j.model.Literal;
import org.eclipse.rdf4j.model.Namespace;
import org.eclipse.rdf4j.model.Statement;
import org.eclipse.rdf4j.model.ValueFactory;
import org.eclipse.rdf4j.model.base.CoreDatatype;
import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
import org.eclipse.rdf4j.model.util.Values;
import org.eclipse.rdf4j.query.*;
import org.eclipse.rdf4j.query.algebra.Var;
import org.eclipse.rdf4j.query.algebra.helpers.AbstractQueryModelVisitor;
import org.eclipse.rdf4j.query.explanation.Explanation;
import org.eclipse.rdf4j.query.explanation.GenericPlanNode;
import org.eclipse.rdf4j.query.parser.*;
Expand Down Expand Up @@ -104,9 +98,6 @@ public CompiledSailOptions getOptions() {

/**
* reindex all the lucene sails of this repository
*
* @throws Exception any exception returned by
* {@link org.eclipse.rdf4j.sail.lucene.LuceneSail#reindex()}
*/
public void reindexLuceneSails() {
compiledSail.reindexLuceneSails();
Expand Down Expand Up @@ -759,10 +750,36 @@ private ClosableResult<?> execute0(RepositoryConnection customConnection, String
* @param out the output stream, can be null
*/
public void executeUpdate(String sparqlQuery, int timeout, OutputStream out) {
executeUpdate(sparqlQuery, timeout, out, null);

}

/**
* execute a sparql update query
*
* @param sparqlQuery the query
* @param timeout query timeout
* @param out the output stream, can be null
* @param customConnection custom connection to use
*/
public void executeUpdate(String sparqlQuery, int timeout, OutputStream out,
RepositoryConnection customConnection) {
// logger.info("Running update query:"+sparqlQuery);
sparqlQuery = applyPrefixes(sparqlQuery);
sparqlQuery = Pattern.compile("MINUS \\{(?s).*?}\\n {2}}").matcher(sparqlQuery).replaceAll("");
try (SailRepositoryConnection connection = repository.getConnection()) {

RepositoryConnection connectionCloseable;
RepositoryConnection connection;

if (customConnection == null) {
connection = repository.getConnection();
connectionCloseable = connection;
} else {
connectionCloseable = null;
connection = customConnection;
}

try (connectionCloseable) {
connection.setParserConfig(new ParserConfig().set(BasicParserSettings.VERIFY_URI_SYNTAX, false));

Update preparedUpdate = connection.prepareUpdate(QueryLanguage.SPARQL, sparqlQuery);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,10 @@ public class EndpointStore extends AbstractNotifyingSail {
* enable the merge join, default true
*/
public static final String OPTION_QENDPOINT_MERGE_JOIN = "qendpoint.mergejoin";
/**
* disable delete bitmaps, default false
*/
public static final String OPTION_QENDPOINT_DELETE_DISABLE = "qendpoint.delete.disable";
private static final AtomicLong ENDPOINT_DEBUG_ID_GEN = new AtomicLong();
private static final Logger logger = LoggerFactory.getLogger(EndpointStore.class);
private final long debugId;
Expand Down Expand Up @@ -113,6 +117,7 @@ public class EndpointStore extends AbstractNotifyingSail {
// setting to put the delete map only in memory, i.e don't write to disk
private final boolean inMemDeletes;
private final boolean loadIntoMemory;
private final boolean deleteDisabled;

// bitmaps used to mark if the subject, predicate, object elements in HDT
// are used in the rdf4j delta store
Expand Down Expand Up @@ -175,6 +180,7 @@ public EndpointStore(EndpointFiles files, HDTOptions spec, boolean inMemDeletes,
throws IOException {
// load HDT file
this.spec = (spec = HDTOptions.ofNullable(spec));
deleteDisabled = spec.getBoolean(OPTION_QENDPOINT_DELETE_DISABLE, false);
validOrders = getHDTSpec().getEnumSet(HDTOptionsKeys.BITMAPTRIPLES_INDEX_OTHERS, TripleComponentOrder.class);
validOrders.add(TripleComponentOrder.SPO); // we need at least SPO

Expand Down Expand Up @@ -1215,6 +1221,10 @@ public long getGraphsCount(HDT hdt) {
return hdt.getDictionary().supportGraphs() ? hdt.getDictionary().getNgraphs() : 1;
}

public boolean isDeleteDisabled() {
return deleteDisabled;
}

public long getGraphsCount() {
return getGraphsCount(this.hdt);
}
Expand Down
Loading

0 comments on commit b4a4935

Please sign in to comment.