Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sync master into dev #446

Merged
merged 17 commits into from
Mar 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ The QA Company over the social networks
## About

The qEndpoint is a highly scalable triple store with full-text and [GeoSPARQL](https://www.ogc.org/standards/geosparql) support. It can be used as a standalone SPARQL endpoint, or as a dependency.
The qEndpoint is for example used in [Kohesio](https://kohesio.ec.europa.eu/) where each interaction with the UI corresponds to an underlying SPARQL query on the qEndpoint.
The qEndpoint is for example used in [Kohesio](https://kohesio.ec.europa.eu/) where each interaction with the UI corresponds to an underlying SPARQL query on the qEndpoint. Also qEndpoint is part of [QAnswer](https://qanswer.ai) enabeling question answering over RDF Graphs.

### Built With

Expand Down
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

<groupId>com.the-qa-company</groupId>
<artifactId>qendpoint-parent</artifactId>
<version>1.15.0</version>
<version>1.15.6</version>

<packaging>pom</packaging>

Expand Down
4 changes: 2 additions & 2 deletions qendpoint-backend/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<artifactId>qendpoint-backend</artifactId>
<version>1.15.0</version>
<version>1.15.6</version>

<packaging>jar</packaging>

Expand All @@ -15,7 +15,7 @@
<parent>
<groupId>com.the-qa-company</groupId>
<artifactId>qendpoint-parent</artifactId>
<version>1.15.0</version>
<version>1.15.6</version>
</parent>

<licenses>
Expand Down
4 changes: 2 additions & 2 deletions qendpoint-cli/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<artifactId>qendpoint-cli</artifactId>
<version>1.15.0</version>
<version>1.15.6</version>

<name>qendpoint package</name>
<description>Package of the qendpoint.</description>
Expand All @@ -11,7 +11,7 @@
<parent>
<groupId>com.the-qa-company</groupId>
<artifactId>qendpoint-parent</artifactId>
<version>1.15.0</version>
<version>1.15.6</version>
</parent>

<dependencies>
Expand Down
4 changes: 2 additions & 2 deletions qendpoint-core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<artifactId>qendpoint-core</artifactId>
<version>1.15.0</version>
<version>1.15.6</version>

<packaging>jar</packaging>

Expand All @@ -27,7 +27,7 @@
<parent>
<groupId>com.the-qa-company</groupId>
<artifactId>qendpoint-parent</artifactId>
<version>1.15.0</version>
<version>1.15.6</version>
</parent>

<licenses>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,14 @@ default boolean supportsLanguageOfId() {
return false;
}

/**
* @return if the dictionary is an MSD. if so, {@link #getObjects()} can't
* be used and the {@link #getAllObjects()} method should be used.
*/
default boolean isMultiSectionDictionary() {
return false;
}

/**
* Returns whether the dictionary supports graphs
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -375,6 +375,11 @@ public void reset() {
}
}

@Override
public boolean isMultiSectionDictionary() {
return true;
}

@Override
public OptimizedExtractor createOptimizedMapExtractor() {
return new MultDictionaryPFCOptimizedExtractor(this);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -528,6 +528,11 @@ public OptimizedExtractor createOptimizedMapExtractor() {
return new MultipleSectionDictionaryLangPFCOptimizedExtractor(this);
}

@Override
public boolean isMultiSectionDictionary() {
return true;
}

public int getObjectsSectionCount() {
return objectIdLocationsSec.length;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package com.the_qa_company.qendpoint.core.exceptions;

import java.io.IOException;

public class SignatureIOException extends IOException {
public SignatureIOException() {
}

public SignatureIOException(String message) {
super(message);
}

public SignatureIOException(String message, Throwable cause) {
super(message, cause);
}

public SignatureIOException(Throwable cause) {
super(cause);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import com.beust.jcommander.Parameter;
import com.beust.jcommander.internal.Lists;
import com.the_qa_company.qendpoint.core.dictionary.DictionarySection;
import com.the_qa_company.qendpoint.core.dictionary.impl.MultipleBaseDictionary;
import com.the_qa_company.qendpoint.core.exceptions.NotFoundException;
import com.the_qa_company.qendpoint.core.hdt.HDT;
import com.the_qa_company.qendpoint.core.hdt.HDTManager;
Expand Down Expand Up @@ -237,7 +236,7 @@ public void exec() throws Throwable {
try (HDT hdt = hdtl) {
boolean error;
long count = 0;
if (hdt.getDictionary() instanceof MultipleBaseDictionary) {
if (hdt.getDictionary().isMultiSectionDictionary()) {
colorTool.log("Checking subject entries");
error = checkDictionarySectionOrder(binary, unicode, colorTool, "subject",
hdt.getDictionary().getSubjects(), console);
Expand Down Expand Up @@ -279,6 +278,12 @@ public void exec() throws Throwable {
hdt.getDictionary().getShared(), console);
count += hdt.getDictionary().getShared().getNumberOfElements();
}
if (hdt.getDictionary().supportGraphs()) {
colorTool.log("Checking graph entries");
error |= checkDictionarySectionOrder(binary, unicode, colorTool, "graph",
hdt.getDictionary().getGraphs(), console);
count += hdt.getDictionary().getGraphs().getNumberOfElements();
}

if (error) {
colorTool.error("This HDT isn't valid", true);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import com.the_qa_company.qendpoint.core.dictionary.Dictionary;
import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder;
import com.the_qa_company.qendpoint.core.exceptions.IllegalFormatException;
import com.the_qa_company.qendpoint.core.exceptions.SignatureIOException;
import com.the_qa_company.qendpoint.core.hdt.HDTVocabulary;
import com.the_qa_company.qendpoint.core.hdt.impl.HDTDiskImporter;
import com.the_qa_company.qendpoint.core.hdt.impl.diskindex.DiskIndexSort;
Expand Down Expand Up @@ -1347,7 +1348,7 @@ public void syncOtherIndexes(Path fileLocation, HDTOptions spec, ProgressListene
idx.getOrder());
}
IOUtil.closeQuietly(old);
} catch (NoSuchFileException ignore) {
} catch (NoSuchFileException | SignatureIOException ignore) {
// no index with this name
if (!askedOrders.contains(order)) {
continue; // not asked by the user, we can ignore
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import com.the_qa_company.qendpoint.core.compact.sequence.SequenceLog64BigDisk;
import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder;
import com.the_qa_company.qendpoint.core.exceptions.IllegalFormatException;
import com.the_qa_company.qendpoint.core.exceptions.SignatureIOException;
import com.the_qa_company.qendpoint.core.iterator.utils.AsyncIteratorFetcher;
import com.the_qa_company.qendpoint.core.iterator.utils.ExceptionIterator;
import com.the_qa_company.qendpoint.core.iterator.utils.MapIterator;
Expand Down Expand Up @@ -96,7 +97,8 @@ public static BitmapTriplesIndex map(Path file, FileChannel channel, BitmapTripl

long currentSignature = signature(triples);
if (signature != currentSignature) {
throw new IOException(format("Wrong signature for file 0x%x != 0x%x", signature, currentSignature));
throw new SignatureIOException(
format("Wrong signature for file 0x%x != 0x%x", signature, currentSignature));
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
package com.the_qa_company.qendpoint.core.triples.impl;

import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder;
import com.the_qa_company.qendpoint.core.exceptions.ParserException;
import com.the_qa_company.qendpoint.core.hdt.HDTManager;
import com.the_qa_company.qendpoint.core.hdt.HDTVersion;
import com.the_qa_company.qendpoint.core.listener.ProgressListener;
import com.the_qa_company.qendpoint.core.options.HDTOptions;
import com.the_qa_company.qendpoint.core.options.HDTOptionsKeys;
import com.the_qa_company.qendpoint.core.util.LargeFakeDataSetStreamSupplier;
import com.the_qa_company.qendpoint.core.util.crc.CRC32;
import org.apache.commons.io.file.PathUtils;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;

import static org.junit.Assert.*;

public class BitmapTriplesIndexFileTest {

@Rule
public TemporaryFolder tempDir = TemporaryFolder.builder().assureDeletion().build();

public long crc32(byte[] data) {
CRC32 crc = new CRC32();
crc.update(data, 0, data.length);
return crc.getValue();
}

@Test
public void genTest() throws IOException, ParserException {
Path root = tempDir.newFolder().toPath();

HDTOptions spec = HDTOptions.of(
HDTOptionsKeys.BITMAPTRIPLES_INDEX_OTHERS, "spo,ops",
HDTOptionsKeys.BITMAPTRIPLES_INDEX_NO_FOQ, true
);
try {
Path hdtPath = root.resolve("temp.hdt");

LargeFakeDataSetStreamSupplier supplier = LargeFakeDataSetStreamSupplier
.createSupplierWithMaxTriples(1000, 10)
.withMaxLiteralSize(50)
.withMaxElementSplit(20);

supplier.createAndSaveFakeHDT(spec, hdtPath);

// should load
HDTManager.mapIndexedHDT(hdtPath, spec, ProgressListener.ignore()).close();
assertTrue("ops index doesn't exist", Files.exists(BitmapTriplesIndexFile.getIndexPath(hdtPath, TripleComponentOrder.OPS)));
assertFalse("foq index exists", Files.exists(hdtPath.resolveSibling(hdtPath.getFileName() + HDTVersion.get_index_suffix("-"))));

long crcold = crc32(Files.readAllBytes(hdtPath));

Path hdtPath2 = root.resolve("temp2.hdt");

Files.move(hdtPath, hdtPath2);

supplier.createAndSaveFakeHDT(spec, hdtPath);
// should erase the previous index and generate another one
HDTManager.mapIndexedHDT(hdtPath, spec, ProgressListener.ignore()).close();

long crcnew = crc32(Files.readAllBytes(hdtPath));

assertNotEquals("files are the same", crcold, crcnew);
} finally {
PathUtils.deleteDirectory(root);
}
}
}
4 changes: 2 additions & 2 deletions qendpoint-store/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<artifactId>qendpoint</artifactId>
<version>1.15.0</version>
<version>1.15.6</version>

<packaging>jar</packaging>

Expand All @@ -13,7 +13,7 @@
<parent>
<groupId>com.the-qa-company</groupId>
<artifactId>qendpoint-parent</artifactId>
<version>1.15.0</version>
<version>1.15.6</version>
</parent>

<licenses>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,10 @@
import com.the_qa_company.qendpoint.utils.sail.SourceSailConnectionWrapper;
import jakarta.json.Json;
import jakarta.json.stream.JsonGenerator;
import org.eclipse.rdf4j.model.Literal;
import org.eclipse.rdf4j.model.Namespace;
import org.eclipse.rdf4j.model.Statement;
import org.eclipse.rdf4j.model.ValueFactory;
import org.eclipse.rdf4j.model.base.CoreDatatype;
import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
import org.eclipse.rdf4j.model.util.Values;
import org.eclipse.rdf4j.query.*;
import org.eclipse.rdf4j.query.algebra.Var;
import org.eclipse.rdf4j.query.algebra.helpers.AbstractQueryModelVisitor;
import org.eclipse.rdf4j.query.explanation.Explanation;
import org.eclipse.rdf4j.query.explanation.GenericPlanNode;
import org.eclipse.rdf4j.query.parser.*;
Expand Down Expand Up @@ -104,9 +98,6 @@ public CompiledSailOptions getOptions() {

/**
* reindex all the lucene sails of this repository
*
* @throws Exception any exception returned by
* {@link org.eclipse.rdf4j.sail.lucene.LuceneSail#reindex()}
*/
public void reindexLuceneSails() {
compiledSail.reindexLuceneSails();
Expand Down Expand Up @@ -759,10 +750,36 @@ private ClosableResult<?> execute0(RepositoryConnection customConnection, String
* @param out the output stream, can be null
*/
public void executeUpdate(String sparqlQuery, int timeout, OutputStream out) {
executeUpdate(sparqlQuery, timeout, out, null);

}

/**
* execute a sparql update query
*
* @param sparqlQuery the query
* @param timeout query timeout
* @param out the output stream, can be null
* @param customConnection custom connection to use
*/
public void executeUpdate(String sparqlQuery, int timeout, OutputStream out,
RepositoryConnection customConnection) {
// logger.info("Running update query:"+sparqlQuery);
sparqlQuery = applyPrefixes(sparqlQuery);
sparqlQuery = Pattern.compile("MINUS \\{(?s).*?}\\n {2}}").matcher(sparqlQuery).replaceAll("");
try (SailRepositoryConnection connection = repository.getConnection()) {

RepositoryConnection connectionCloseable;
RepositoryConnection connection;

if (customConnection == null) {
connection = repository.getConnection();
connectionCloseable = connection;
} else {
connectionCloseable = null;
connection = customConnection;
}

try (connectionCloseable) {
connection.setParserConfig(new ParserConfig().set(BasicParserSettings.VERIFY_URI_SYNTAX, false));

Update preparedUpdate = connection.prepareUpdate(QueryLanguage.SPARQL, sparqlQuery);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,10 @@ public class EndpointStore extends AbstractNotifyingSail {
* enable the merge join, default true
*/
public static final String OPTION_QENDPOINT_MERGE_JOIN = "qendpoint.mergejoin";
/**
* disable delete bitmaps, default false
*/
public static final String OPTION_QENDPOINT_DELETE_DISABLE = "qendpoint.delete.disable";
private static final AtomicLong ENDPOINT_DEBUG_ID_GEN = new AtomicLong();
private static final Logger logger = LoggerFactory.getLogger(EndpointStore.class);
private final long debugId;
Expand Down Expand Up @@ -113,6 +117,7 @@ public class EndpointStore extends AbstractNotifyingSail {
// setting to put the delete map only in memory, i.e don't write to disk
private final boolean inMemDeletes;
private final boolean loadIntoMemory;
private final boolean deleteDisabled;

// bitmaps used to mark if the subject, predicate, object elements in HDT
// are used in the rdf4j delta store
Expand Down Expand Up @@ -175,6 +180,7 @@ public EndpointStore(EndpointFiles files, HDTOptions spec, boolean inMemDeletes,
throws IOException {
// load HDT file
this.spec = (spec = HDTOptions.ofNullable(spec));
deleteDisabled = spec.getBoolean(OPTION_QENDPOINT_DELETE_DISABLE, false);
validOrders = getHDTSpec().getEnumSet(HDTOptionsKeys.BITMAPTRIPLES_INDEX_OTHERS, TripleComponentOrder.class);
validOrders.add(TripleComponentOrder.SPO); // we need at least SPO

Expand Down Expand Up @@ -1215,6 +1221,10 @@ public long getGraphsCount(HDT hdt) {
return hdt.getDictionary().supportGraphs() ? hdt.getDictionary().getNgraphs() : 1;
}

public boolean isDeleteDisabled() {
return deleteDisabled;
}

public long getGraphsCount() {
return getGraphsCount(this.hdt);
}
Expand Down
Loading
Loading