Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-497 wip merge join #536

Draft
wants to merge 8 commits into
base: dev
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion qendpoint-backend/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
<rdf4j.version>5.0.2</rdf4j.version>
<spring.version>3.4.0</spring.version>
<logback.version>1.5.6</logback.version>

<gson.version>2.11.0</gson.version>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
</properties>
Expand Down Expand Up @@ -112,6 +112,11 @@
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
<version>${gson.version}</version>
</dependency>
<!-- v1.15 is needed for Jena 3.14+ - copied from org.apache.jena:jena:pom -->
<dependency>
<groupId>commons-codec</groupId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ public record FormatReturn(String query) {}
@Autowired
Sparql sparql;

@RequestMapping(value = "/sparql")
@RequestMapping(value = "/sparql", method = { RequestMethod.GET, RequestMethod.POST })
public void sparqlEndpoint(@RequestParam(value = "query", required = false) final String query,
@RequestParam(value = "update", required = false) final String updateQuery,
@RequestParam(value = "format", defaultValue = "json") final String format,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,12 @@
import com.the_qa_company.qendpoint.store.EndpointFiles;
import com.the_qa_company.qendpoint.store.EndpointStore;
import com.the_qa_company.qendpoint.store.EndpointStoreUtils;
import com.the_qa_company.qendpoint.store.HDTProps;
import com.the_qa_company.qendpoint.utils.FileUtils;
import com.the_qa_company.qendpoint.utils.RDFStreamUtils;
import jakarta.annotation.PostConstruct;
import jakarta.annotation.PreDestroy;
import org.apache.lucene.index.IndexReader;
import org.eclipse.rdf4j.model.Statement;
import org.eclipse.rdf4j.model.util.Values;
import org.eclipse.rdf4j.repository.RepositoryConnection;
Expand All @@ -28,14 +32,14 @@
import com.the_qa_company.qendpoint.core.util.io.CloseSuppressPath;
import com.the_qa_company.qendpoint.core.util.io.IOUtil;
import org.eclipse.rdf4j.sail.lucene.LuceneSail;
import org.eclipse.rdf4j.sail.lucene.SearchIndex;
import org.eclipse.rdf4j.sail.lucene.impl.LuceneIndex;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import org.springframework.web.server.ServerWebInputException;

import javax.annotation.PostConstruct;
import javax.annotation.PreDestroy;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
Expand All @@ -53,6 +57,7 @@
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.function.Consumer;
import java.util.stream.Collectors;
Expand Down Expand Up @@ -342,6 +347,46 @@ void initializeEndpointStore(boolean finishLoading) throws IOException {
CompiledSailOptions opt = sparqlRepository.getOptions();
port = opt.getPort();
}

if (endpoint != null) {
HDTProps props = endpoint.getHdtProps();
long bnCount = props.getEndBlankObjects() - props.getStartBlankObjects() // obj
+ props.getEndBlankShared() - props.getStartBlankShared() // shared
+ props.getEndBlankSubjects() - props.getStartBlankSubjects(); // subj
long literals = props.getEndLiteral() - props.getStartLiteral();
logger.info("Index props: Lit:{} bn:{}", literals, bnCount);
}
Set<LuceneSail> lcs = sparqlRepository.getLuceneSails();
if (!lcs.isEmpty()) {
logger.info("Lucene sails ({})", lcs.size());

final int maxCount = 5;
Iterator<LuceneSail> it = lcs.iterator();
for (int i = 0; i < Math.min(maxCount, lcs.size()); i++) {
if (!it.hasNext())
break;
LuceneSail lc = it.next();

String id = lc.getParameter(LuceneSail.INDEX_ID);
if (id == null || id.isEmpty())
id = "<unk>";
SearchIndex lcIdx = lc.getLuceneIndex();
String infoStr = lcIdx.getClass().getSimpleName();
if (lcIdx instanceof LuceneIndex li) {
IndexReader reader = li.getIndexReader();
int numDocs = reader.numDocs();
infoStr += " numDocs*Fields:" + numDocs + "*" + li.getIndexWriter().getFieldNames().size();
} else {
infoStr += " no data"; // add ES/Solr??
}
logger.info("{} {}", id, infoStr);
}
if (lcs.size() > maxCount) {
logger.info("...");
}

}

}
if (finishLoading) {
completeLoading();
Expand Down
4 changes: 2 additions & 2 deletions qendpoint-core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
<logback.version>1.5.6</logback.version>
<roaringbitmap.version>0.9.44</roaringbitmap.version>

<jena.version>4.3.2</jena.version>
<jena.version>4.9.0</jena.version>
<slf4j.version>1.7.30</slf4j.version>

<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
Expand All @@ -75,7 +75,7 @@
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-compress</artifactId>
<version>1.21</version>
<version>1.26.0</version>
</dependency>
<dependency>
<groupId>org.apache.jena</groupId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ public long countItemsY(long x) {
return last(x) - find(x) + 1;
}

public long search(long element, long begin, long end) throws NotFoundException {
public long search(long element, long begin, long end) {
if (end - begin > 10) {
return binSearch(element, begin, end);
} else {
Expand All @@ -134,15 +134,49 @@ public long binSearch(long element, long begin, long end) {
return -1;
}

public long linSearch(long element, long begin, long end) throws NotFoundException {
public long linSearch(long element, long begin, long end) {
while (begin <= end) {
long read = array.get(begin);
if (read == element) {
return begin;
}
begin++;
}
throw new NotFoundException();
return -1;
}

public long searchLoc(long element, long begin, long end) {
if (end - begin > 10) {
return binSearchLoc(element, begin, end);
} else {
return linSearchLoc(element, begin, end);
}
}

public long binSearchLoc(long element, long begin, long end) {
while (begin <= end) {
long mid = (begin + end) / 2;
long read = array.get(mid);
if (element > read) {
begin = mid + 1;
} else if (element < read) {
end = mid - 1;
} else {
return mid;
}
}
return -(1 + begin);
}

public long linSearchLoc(long element, long begin, long end) {
while (begin <= end) {
long read = array.get(begin);
if (read == element) {
return begin;
}
begin++;
}
return -(1 + begin);
}

public final long get(long pos) {
Expand Down Expand Up @@ -237,4 +271,11 @@ public void dump() {
System.out.println();
}

public Sequence getArray() {
return array;
}

public Bitmap getBitmap() {
return bitmap;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import com.the_qa_company.qendpoint.core.enums.RDFNodeType;
import com.the_qa_company.qendpoint.core.enums.TripleComponentRole;
import com.the_qa_company.qendpoint.core.header.Header;
import com.the_qa_company.qendpoint.core.quad.QuadString;
import com.the_qa_company.qendpoint.core.triples.TripleID;
import com.the_qa_company.qendpoint.core.triples.TripleString;

Expand Down Expand Up @@ -265,4 +266,16 @@ default TripleID toTripleId(TripleString tsstr) {
}
return tid;
}

default TripleString toTripleString(TripleID tssid) {
if (tssid.isQuad()) {
return new QuadString(idToString(tssid.getSubject(), TripleComponentRole.SUBJECT),
idToString(tssid.getPredicate(), TripleComponentRole.PREDICATE),
idToString(tssid.getObject(), TripleComponentRole.OBJECT),
idToString(tssid.getGraph(), TripleComponentRole.GRAPH));
}
return new TripleString(idToString(tssid.getSubject(), TripleComponentRole.SUBJECT),
idToString(tssid.getPredicate(), TripleComponentRole.PREDICATE),
idToString(tssid.getObject(), TripleComponentRole.OBJECT));
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package com.the_qa_company.qendpoint.core.hdt;

import com.the_qa_company.qendpoint.core.compact.integer.VByte;
import com.the_qa_company.qendpoint.core.enums.CompressionType;
import com.the_qa_company.qendpoint.core.enums.RDFNotation;
import com.the_qa_company.qendpoint.core.exceptions.NotFoundException;
Expand Down Expand Up @@ -35,11 +36,17 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.HttpURLConnection;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLConnection;
import java.nio.file.Files;
import java.nio.file.NoSuchFileException;
import java.nio.file.Path;
Expand All @@ -49,6 +56,7 @@

public class HDTManagerImpl extends HDTManager {
private static final Logger logger = LoggerFactory.getLogger(HDTManagerImpl.class);
private static final long HDT_DL_INFO_MAGIC = 0x4f464e4c44544448L;

@Override
public HDTOptions doReadOptions(String file) throws IOException {
Expand Down Expand Up @@ -177,6 +185,15 @@ private HDTResult generateChecksumAfter(long checksum, Path checksumPath, HDTOpt
public HDTResult doGenerateHDT(String rdfFileName, String baseURI, RDFNotation rdfNotation, HDTOptions spec,
ProgressListener listener) throws IOException, ParserException {
// choose the importer
long waitTimeStart = spec.getInt(HDTOptionsKeys.LOADER_WAIT_START, 0);
if (waitTimeStart > 0) {
logger.info("Waiting {}ms before start...", waitTimeStart);
try {
Thread.sleep(waitTimeStart);
} catch (InterruptedException ignore) {
}
logger.info("Done waiting");
}
String loaderType = spec.get(HDTOptionsKeys.LOADER_TYPE_KEY);
TempHDTImporter loader;
boolean isQuad = rdfNotation == RDFNotation.NQUAD;
Expand Down Expand Up @@ -224,6 +241,9 @@ public HDTResult doGenerateHDT(String rdfFileName, String baseURI, RDFNotation r
} else {
try {
preSize = Files.size(preDownload);
if (preSize == trueSize) {
break;
}
} catch (IOException ignore) {
preSize = 0;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,52 +97,6 @@ public TripleID next() {
return returnTriple;
}

/*
* (non-Javadoc)
* @see hdt.iterator.IteratorTripleID#hasPrevious()
*/
@Override
public boolean hasPrevious() {
return hasPreviousTriples;
}

private void doFetchPrevious() {
hasPreviousTriples = false;

while (iterator.hasPrevious()) {
TripleID previous = iterator.previous();

if (previous.match(pattern)) {
hasPreviousTriples = true;
hasMoreTriples = true;
previousTriple.assign(previous);
previousPosition = iterator.getLastTriplePositionSupplier();
break;
}
}
}

/*
* (non-Javadoc)
* @see hdt.iterator.IteratorTripleID#previous()
*/
@Override
public TripleID previous() {
if (goingUp) {
goingUp = false;
if (hasMoreTriples) {
doFetchPrevious();
}
doFetchPrevious();
}
returnTriple.assign(previousTriple);
lastPosition = previousPosition;

doFetchPrevious();

return returnTriple;
}

/*
* (non-Javadoc)
* @see hdt.iterator.IteratorTripleID#goToStart()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,6 @@ public GraphFilteringTripleId(IteratorTripleID iterator, long[] graphIds) {
this.graphIds = graphIds;
}

@Override
public boolean hasPrevious() {
throw new NotImplementedException();
}

@Override
public TripleID previous() {
throw new NotImplementedException();
}

@Override
public void goToStart() {
throw new NotImplementedException();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,26 +42,6 @@ public TripleID next() {
return triplesList.get(pos++);
}

/*
* (non-Javadoc)
* @see hdt.iterator.IteratorTripleID#hasPrevious()
*/
@Override
public boolean hasPrevious() {
return pos > 0;
}

/*
* (non-Javadoc)
* @see hdt.iterator.IteratorTripleID#previous()
*/
@Override
public TripleID previous() {
TripleID tripleID = triplesList.get(--pos);
lastPosition = pos;
return tripleID;
}

/*
* (non-Javadoc)
* @see hdt.iterator.IteratorTripleID#goToStart()
Expand Down
Loading