From 926a4e89c666980af78f421c10275a655d0f981d Mon Sep 17 00:00:00 2001 From: qaate47 Date: Tue, 12 Nov 2024 11:20:58 +0100 Subject: [PATCH 1/8] wip [skip ci] --- .../qendpoint/core/dictionary/Dictionary.java | 17 ++ .../qendpoint/core/hdt/HDTManagerImpl.java | 8 + .../core/merge/HDTMergeJoinIterator.java | 150 ++++++++++++++++++ .../core/merge/HDTMergeJoinPreparer.java | 82 ++++++++++ .../qendpoint/core/triples/TripleID.java | 39 +++++ .../triples/impl/BitmapTriplesIterator.java | 1 + .../qendpoint/core/util/CommonUtils.java | 37 +++++ .../core/merge/HDTMergeJoinIteratorTest.java | 122 ++++++++++++++ .../src/test/resources/merge_ds.ttl | 27 ++++ .../qendpoint/store/EndpointStore.java | 1 + .../store/EndpointStoreConnection.java | 4 + .../store/EndpointStoreQueryPreparer.java | 2 + .../qendpoint/store/EndpointTripleSource.java | 4 + .../qendpoint/utils/MergeJoinOptimizer.java | 94 +++++++++++ 14 files changed, 588 insertions(+) create mode 100644 qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/merge/HDTMergeJoinIterator.java create mode 100644 qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/merge/HDTMergeJoinPreparer.java create mode 100644 qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/CommonUtils.java create mode 100644 qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/merge/HDTMergeJoinIteratorTest.java create mode 100644 qendpoint-core/src/test/resources/merge_ds.ttl create mode 100644 qendpoint-store/src/main/java/com/the_qa_company/qendpoint/utils/MergeJoinOptimizer.java diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/Dictionary.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/Dictionary.java index 1d4ce246c..36df85624 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/Dictionary.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/Dictionary.java @@ -21,6 +21,7 @@ import com.the_qa_company.qendpoint.core.enums.RDFNodeType; import com.the_qa_company.qendpoint.core.enums.TripleComponentRole; import com.the_qa_company.qendpoint.core.header.Header; +import com.the_qa_company.qendpoint.core.quad.QuadString; import com.the_qa_company.qendpoint.core.triples.TripleID; import com.the_qa_company.qendpoint.core.triples.TripleString; @@ -265,4 +266,20 @@ default TripleID toTripleId(TripleString tsstr) { } return tid; } + + default TripleString toTripleString(TripleID tssid) { + if (tssid.isQuad()) { + return new QuadString( + idToString(tssid.getSubject(), TripleComponentRole.SUBJECT), + idToString(tssid.getPredicate(), TripleComponentRole.PREDICATE), + idToString(tssid.getObject(), TripleComponentRole.OBJECT), + idToString(tssid.getGraph(), TripleComponentRole.GRAPH) + ); + } + return new TripleString( + idToString(tssid.getSubject(), TripleComponentRole.SUBJECT), + idToString(tssid.getPredicate(), TripleComponentRole.PREDICATE), + idToString(tssid.getObject(), TripleComponentRole.OBJECT) + ); + } } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/HDTManagerImpl.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/HDTManagerImpl.java index 492421d91..ce9aa87a0 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/HDTManagerImpl.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/HDTManagerImpl.java @@ -1,5 +1,6 @@ package com.the_qa_company.qendpoint.core.hdt; +import com.the_qa_company.qendpoint.core.compact.integer.VByte; import com.the_qa_company.qendpoint.core.enums.CompressionType; import com.the_qa_company.qendpoint.core.enums.RDFNotation; import com.the_qa_company.qendpoint.core.exceptions.NotFoundException; @@ -35,11 +36,17 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.BufferedInputStream; import java.io.BufferedOutputStream; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.net.HttpURLConnection; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; +import java.net.URLConnection; import java.nio.file.Files; import java.nio.file.NoSuchFileException; import java.nio.file.Path; @@ -49,6 +56,7 @@ public class HDTManagerImpl extends HDTManager { private static final Logger logger = LoggerFactory.getLogger(HDTManagerImpl.class); + private static final long HDT_DL_INFO_MAGIC = 0x4f464e4c44544448L; @Override public HDTOptions doReadOptions(String file) throws IOException { diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/merge/HDTMergeJoinIterator.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/merge/HDTMergeJoinIterator.java new file mode 100644 index 000000000..06e2ef27b --- /dev/null +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/merge/HDTMergeJoinIterator.java @@ -0,0 +1,150 @@ +package com.the_qa_company.qendpoint.core.merge; + +import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder; +import com.the_qa_company.qendpoint.core.enums.TripleComponentRole; +import com.the_qa_company.qendpoint.core.iterator.utils.FetcherIterator; +import com.the_qa_company.qendpoint.core.triples.IteratorTripleID; +import com.the_qa_company.qendpoint.core.triples.TripleID; + +import java.util.List; + +public class HDTMergeJoinIterator extends FetcherIterator> { + public static final class MergeIteratorData { + private final IteratorTripleID it; + private final TripleComponentRole role; + private TripleID last; + private boolean loaded; + + public MergeIteratorData(IteratorTripleID it, TripleComponentRole role) { + this.it = it; + this.role = role; + } + + public long getSeekLayer(TripleID id) { + return switch (role) { + case OBJECT -> id.getObject(); + case PREDICATE -> id.getPredicate(); + case SUBJECT -> id.getSubject(); + case GRAPH -> id.getGraph(); + }; + } + + /** + * goto a layer + * @param id layer + * @return if we reach the end + */ + public boolean gotoLayer(long id) { + while (hasNext()) { + if (getSeekLayer(last) >= id) { + return false; // good layer or after + } + next(); // force next + } + return true; + } + + public boolean hasNext() { + if (loaded) { + return true; + } + if (!it.hasNext()) { + return false; + } + + last = it.next(); + loaded = true; + return true; + } + + public TripleID peek() { + if (hasNext()) return last; + return null; + } + public TripleID next() { + if (hasNext()) { + loaded = false; + return last; + } + return null; + } + } + + private final List iterators; + private boolean loaded; + + public HDTMergeJoinIterator(List iterators) { + this.iterators = iterators; + } + + private void moveNext() { + if (!loaded) { + loaded = true; + return; // start + } + + int minIdx = 0; + if (!iterators.get(minIdx).hasNext()) { + return; + } + TripleID minVal = iterators.get(minIdx).peek(); + TripleComponentOrder minOrder = iterators.get(minIdx).it.getOrder(); + + for (int i = 1; i < iterators.size(); i++) { + MergeIteratorData d = iterators.get(i); + if (!d.hasNext()) { + return; + } + TripleID peek = d.peek(); + + if (peek == null) { + return; // end + } + + TripleComponentOrder ord = d.it.getOrder(); + if (peek.compareTo(minVal, ord, minOrder) < 0) { + minVal = peek; + minOrder = ord; + minIdx = i; + } + } + + // move to next using this iterator + iterators.get(minIdx).next(); + } + + private boolean seekAll() { + MergeIteratorData it1 = iterators.get(0); + if (!it1.hasNext()) { + return false; // no data + } + long seek = it1.getSeekLayer(it1.peek()); + for (int i = 1; i < iterators.size(); i++) { + MergeIteratorData d = iterators.get(i); + + if (d.gotoLayer(seek)) { + return false; // too far + } + + long seekNext = d.getSeekLayer(d.peek()); + + if (seekNext != seek) { + seek = seekNext; + i = -1; // to compensate i++ + } + } + + return true; + } + + @Override + protected List getNext() { + moveNext(); + if (!seekAll()) return null; + + // all the iterators are peeked with the same layer, we can read + return iterators; + } + + +} diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/merge/HDTMergeJoinPreparer.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/merge/HDTMergeJoinPreparer.java new file mode 100644 index 000000000..bf097d177 --- /dev/null +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/merge/HDTMergeJoinPreparer.java @@ -0,0 +1,82 @@ +package com.the_qa_company.qendpoint.core.merge; + +import com.the_qa_company.qendpoint.core.exceptions.NotImplementedException; +import com.the_qa_company.qendpoint.core.hdt.HDT; +import com.the_qa_company.qendpoint.core.triples.TripleID; +import com.the_qa_company.qendpoint.core.util.CommonUtils; +import com.the_qa_company.qendpoint.core.util.string.ByteString; + +import java.util.ArrayList; +import java.util.List; +import java.util.stream.IntStream; + +public class HDTMergeJoinPreparer { + private final HDT hdt; + private final List patterns = new ArrayList<>(); + private int keyIds; + + public HDTMergeJoinPreparer(HDT hdt) { + this.hdt = hdt; + } + + public long createVar() { + return -(++keyIds); + } + + public void addPattern(TripleID tid) { + patterns.add(tid); + } + + public void addPattern(long s, long p, long o) { + addPattern(new TripleID(s, p, o)); + } + + public List buildIteration() { + List lst = new ArrayList<>(); + + if (keyIds == 0) { + // no var + throw new NotImplementedException("No variable");// TODO: + } + int[] occSH = new int[keyIds]; + int[] occP = new int[keyIds]; + + for (TripleID patt : patterns) { + long pp = patt.getPredicate(); + if (pp < 0) { + occP[1 - (int)pp]++; + } + + long ss = patt.getSubject(); + long oo = patt.getObject(); + + if (ss < 0) { + occSH[1 - (int)ss]++; + } + if (oo < 0) { + if (ss != oo) { // avoid double var + occSH[1 - (int)oo]++; + } + } + } + + int maxShIdx = CommonUtils.maxArg(occSH); + int maxPrIdx = CommonUtils.maxArg(occP); + + if (maxShIdx == 0 && maxPrIdx == 0) { + // no var + throw new NotImplementedException("No variable");// TODO: + } + + // fixme: we should also check if all the sub graphs are connected + + if (maxShIdx > maxPrIdx) { + // load shared var + } else { + // load + + } + + return lst; + } +} diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TripleID.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TripleID.java index 4e3f2de68..bf2c093d0 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TripleID.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TripleID.java @@ -19,6 +19,8 @@ package com.the_qa_company.qendpoint.core.triples; +import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder; +import com.the_qa_company.qendpoint.core.enums.TripleComponentRole; import com.the_qa_company.qendpoint.core.util.LongCompare; import java.io.Serial; @@ -238,6 +240,43 @@ public int compareTo(TripleID other) { } } + /** + * get a component value from its role + * @param role role + * @return component value + */ + public long get(TripleComponentRole role) { + return switch (role) { + case SUBJECT -> getSubject(); + case PREDICATE -> getPredicate(); + case OBJECT -> getObject(); + case GRAPH -> getGraph(); + }; + } + + /** + * compare this triple id with another triple id using order remap + * @param other other triple id + * @param orderThis order of this triple id + * @param orderOther order of the other triple id + * @return compare result + */ + public int compareTo(TripleID other, TripleComponentOrder orderThis, TripleComponentOrder orderOther) { + int result = LongCompare.compare(get(orderThis.getSubjectMapping()), other.get(orderOther.getSubjectMapping())); + + if (result != 0) { + return result; + } + + result = LongCompare.compare(get(orderThis.getPredicateMapping()), other.get(orderOther.getPredicateMapping())); + + if (result != 0) { + return result; + } + + return LongCompare.compare(get(orderThis.getObjectMapping()), other.get(orderOther.getObjectMapping())); + } + /** * Check whether this triple matches a pattern of TripleID. 0 acts as a * wildcard diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIterator.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIterator.java index 248a09234..fa81599da 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIterator.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIterator.java @@ -21,6 +21,7 @@ import com.the_qa_company.qendpoint.core.enums.ResultEstimationType; import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder; +import com.the_qa_company.qendpoint.core.exceptions.NotImplementedException; import com.the_qa_company.qendpoint.core.iterator.SuppliableIteratorTripleID; import com.the_qa_company.qendpoint.core.triples.TripleID; import com.the_qa_company.qendpoint.core.compact.bitmap.AdjacencyList; diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/CommonUtils.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/CommonUtils.java new file mode 100644 index 000000000..655746c96 --- /dev/null +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/CommonUtils.java @@ -0,0 +1,37 @@ +package com.the_qa_company.qendpoint.core.util; + +public class CommonUtils { + public static int minArg(int[] array) { + if (array.length < 2) { + return 0; + } + int minIdx = 0; + int minVal = array[0]; + for (int i = 1; i < array.length; i++) { + if (array[i] < minVal) { + minVal = array[i]; + minIdx = i; + } + } + + return minIdx; + } + public static int maxArg(int[] array) { + if (array.length < 2) { + return 0; + } + int maxIdx = 0; + int maxVal = array[0]; + for (int i = 1; i < array.length; i++) { + if (array[i] > maxVal) { + maxVal = array[i]; + maxIdx = i; + } + } + + return maxIdx; + } + private CommonUtils() { + throw new RuntimeException(); + }; +} diff --git a/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/merge/HDTMergeJoinIteratorTest.java b/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/merge/HDTMergeJoinIteratorTest.java new file mode 100644 index 000000000..43c71f90d --- /dev/null +++ b/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/merge/HDTMergeJoinIteratorTest.java @@ -0,0 +1,122 @@ +package com.the_qa_company.qendpoint.core.merge; + +import com.the_qa_company.qendpoint.core.dictionary.Dictionary; +import com.the_qa_company.qendpoint.core.enums.RDFNotation; +import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder; +import com.the_qa_company.qendpoint.core.enums.TripleComponentRole; +import com.the_qa_company.qendpoint.core.exceptions.NotFoundException; +import com.the_qa_company.qendpoint.core.exceptions.ParserException; +import com.the_qa_company.qendpoint.core.hdt.HDT; +import com.the_qa_company.qendpoint.core.hdt.HDTManager; +import com.the_qa_company.qendpoint.core.iterator.SuppliableIteratorTripleID; +import com.the_qa_company.qendpoint.core.listener.ProgressListener; +import com.the_qa_company.qendpoint.core.options.HDTOptions; +import com.the_qa_company.qendpoint.core.options.HDTOptionsKeys; +import com.the_qa_company.qendpoint.core.triples.TripleID; +import com.the_qa_company.qendpoint.core.triples.impl.BitmapTriplesIndexFile; +import org.junit.Assert; +import org.junit.Ignore; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Arrays; +import java.util.List; +import java.util.Objects; +import java.util.stream.Collectors; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertSame; +import static org.junit.Assert.assertTrue; + +public class HDTMergeJoinIteratorTest { + + @Rule + public TemporaryFolder tempDir = TemporaryFolder.builder().assureDeletion().build(); + + private InputStream getStream(String filename) { + InputStream is = getClass().getResourceAsStream(filename); + Assert.assertNotNull("can't find file " + filename, is); + return is; + } + + @Test + @Ignore("wip") + public void itTest() throws IOException, ParserException, NotFoundException { + Path root = tempDir.newFolder().toPath(); + + Path hdtPath = root.resolve("test.hdt"); + HDTOptions spec = HDTOptions.of( + HDTOptionsKeys.LOADER_TYPE_KEY, HDTOptionsKeys.LOADER_TYPE_VALUE_DISK, + HDTOptionsKeys.LOADER_DISK_FUTURE_HDT_LOCATION_KEY, hdtPath, + HDTOptionsKeys.LOADER_DISK_LOCATION_KEY, root.resolve("gd"), + HDTOptionsKeys.DICTIONARY_TYPE_KEY, HDTOptionsKeys.DICTIONARY_TYPE_VALUE_MULTI_OBJECTS_LANG, + HDTOptionsKeys.BITMAPTRIPLES_INDEX_METHOD_KEY, HDTOptionsKeys.BITMAPTRIPLES_INDEX_METHOD_VALUE_DISK, + HDTOptionsKeys.BITMAPTRIPLES_INDEX_NO_FOQ, true, + // all indexes + HDTOptionsKeys.BITMAPTRIPLES_INDEX_OTHERS, Arrays.stream(TripleComponentOrder.values()).map(TripleComponentOrder::name).collect(Collectors.joining(",")) + ); + ProgressListener listener = ProgressListener.ignore(); + String ns = "http://example.org/#"; + try ( + InputStream is = getStream("/merge_ds.ttl"); + HDT hdt = HDTManager.generateHDT(is, ns, RDFNotation.TURTLE, spec, listener) + ) { + hdt.saveToHDT(hdtPath); + } + + try (HDT hdt = HDTManager.mapIndexedHDT(hdtPath, spec, listener)) { + // test index creation + assertTrue(Files.exists(BitmapTriplesIndexFile.getIndexPath(hdtPath, TripleComponentOrder.OPS))); + assertTrue(Files.exists(BitmapTriplesIndexFile.getIndexPath(hdtPath, TripleComponentOrder.POS))); + assertTrue(Files.exists(BitmapTriplesIndexFile.getIndexPath(hdtPath, TripleComponentOrder.PSO))); + + + /* + The query is ~that: + SELECT * { + ?s ex:relative ?o + ?o rdfs:name ?n + ?o ex:id ?id + } + + */ + + Dictionary dict = hdt.getDictionary(); + long exRelative = dict.stringToId(ns + "relative", TripleComponentRole.PREDICATE); + long rdfsName = dict.stringToId("http://www.w3.org/2000/01/rdf-schema#name", TripleComponentRole.PREDICATE); + long exId = dict.stringToId(ns + "id", TripleComponentRole.PREDICATE); + + + TripleID p1 = new TripleID(0, exRelative, 0); + TripleID p2 = new TripleID(0, rdfsName, 0); + TripleID p3 = new TripleID(0, exId, 0); + + assertFalse(p1 + " empty", p1.isEmpty()); + assertFalse(p2 + " empty", p2.isEmpty()); + assertFalse(p3 + " empty", p3.isEmpty()); + + SuppliableIteratorTripleID it1 = hdt.getTriples().search(p1, TripleComponentOrder.POS.mask); + SuppliableIteratorTripleID it2 = hdt.getTriples().search(p2, TripleComponentOrder.PSO.mask); + SuppliableIteratorTripleID it3 = hdt.getTriples().search(p3, TripleComponentOrder.PSO.mask); + + assertSame("invalid order ", TripleComponentOrder.POS, it1.getOrder()); + assertSame("invalid order ", TripleComponentOrder.PSO, it2.getOrder()); + assertSame("invalid order ", TripleComponentOrder.PSO, it3.getOrder()); + + HDTMergeJoinIterator it = new HDTMergeJoinIterator(List.of( + new HDTMergeJoinIterator.MergeIteratorData(it1, TripleComponentRole.OBJECT), + new HDTMergeJoinIterator.MergeIteratorData(it2, TripleComponentRole.SUBJECT), + new HDTMergeJoinIterator.MergeIteratorData(it3, TripleComponentRole.SUBJECT) + )); + + System.out.println(it.hasNext()); + it.forEachRemaining(lst -> System.out.println(lst.stream().map(d -> dict.toTripleString(Objects.requireNonNull(d.peek())).toString()).collect(Collectors.joining(" - ")))); + } + + } +} \ No newline at end of file diff --git a/qendpoint-core/src/test/resources/merge_ds.ttl b/qendpoint-core/src/test/resources/merge_ds.ttl new file mode 100644 index 000000000..3d11f1f92 --- /dev/null +++ b/qendpoint-core/src/test/resources/merge_ds.ttl @@ -0,0 +1,27 @@ + +@prefix ex: . +@prefix rdfs: . + +ex:s1 rdfs:name "test" ; + ex:relative ex:s2, ex:s3 . + + +ex:s2 rdfs:name "test 2" . +ex:s3 rdfs:name "test 3" ; + ex:relative ex:s4 ; + ex:id "id3". + +ex:s4 rdfs:name "test 4" ; + ex:relative ex:s3 ; + ex:id "id42", "id43" . + + +ex:s5 rdfs:name "test 5" ; + ex:relative ex:s5 ; + ex:id "id51", "id52", "id53". + +ex:s6 rdfs:name "test 6" ; + ex:relative ex:s5 ; + ex:relative ex:s6 ; + ex:id "id51". + diff --git a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/store/EndpointStore.java b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/store/EndpointStore.java index 62a7a488d..14341ee5b 100644 --- a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/store/EndpointStore.java +++ b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/store/EndpointStore.java @@ -77,6 +77,7 @@ public class EndpointStore extends AbstractNotifyingSail { * disable the optimizer */ public static final String QUERY_CONFIG_NO_OPTIMIZER = "no_optimizer"; + public static final String QUERY_CONFIG_NO_OPTIMIZER_MERGE = "no_optimizer_merge"; /** * get the query plan */ diff --git a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/store/EndpointStoreConnection.java b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/store/EndpointStoreConnection.java index 62b8fff4b..c9ac865d5 100644 --- a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/store/EndpointStoreConnection.java +++ b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/store/EndpointStoreConnection.java @@ -825,6 +825,10 @@ public void run() { } } + public EndpointTripleSource getTripleSource() { + return tripleSource; + } + private class EndpointStoreConnectionListener implements SailConnectionListener { private boolean shouldHandle() { return !endpoint.isMerging() || !endpoint.isNotificationsFreeze(); diff --git a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/store/EndpointStoreQueryPreparer.java b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/store/EndpointStoreQueryPreparer.java index 1ddf27800..74d1e0e1a 100644 --- a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/store/EndpointStoreQueryPreparer.java +++ b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/store/EndpointStoreQueryPreparer.java @@ -2,6 +2,7 @@ import com.the_qa_company.qendpoint.federation.SPARQLServiceWikibaseLabelResolver; import com.the_qa_company.qendpoint.federation.ServiceClauseOptimizer; +import com.the_qa_company.qendpoint.utils.MergeJoinOptimizer; import com.the_qa_company.qendpoint.utils.VariableToIdSubstitution; import org.eclipse.rdf4j.common.iteration.CloseableIteration; import org.eclipse.rdf4j.query.BindingSet; @@ -132,6 +133,7 @@ protected CloseableIteration evaluate(TupleExpr tupleExpr, new IterativeEvaluationOptimizer().optimize(tupleExpr, dataset, bindings); new FilterOptimizer().optimize(tupleExpr, dataset, bindings); new OrderLimitOptimizer().optimize(tupleExpr, dataset, bindings); + new MergeJoinOptimizer(conn).optimize(tupleExpr, dataset, bindings); } new ServiceClauseOptimizer().optimize(tupleExpr, dataset, bindings); diff --git a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/store/EndpointTripleSource.java b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/store/EndpointTripleSource.java index fb3f40f78..3a8689f77 100644 --- a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/store/EndpointTripleSource.java +++ b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/store/EndpointTripleSource.java @@ -52,6 +52,10 @@ public EndpointTripleSource(EndpointStoreConnection endpointStoreConnection, End this.enableMergeJoin = endpoint.getHDTSpec().getBoolean(EndpointStore.OPTION_QENDPOINT_MERGE_JOIN, false); } + public boolean hasEnableMergeJoin() { + return enableMergeJoin; + } + private void initHDTIndex() { this.numberOfCurrentTriples = this.endpoint.getHdt().getTriples().getNumberOfElements(); } diff --git a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/utils/MergeJoinOptimizer.java b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/utils/MergeJoinOptimizer.java new file mode 100644 index 000000000..b7776bad0 --- /dev/null +++ b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/utils/MergeJoinOptimizer.java @@ -0,0 +1,94 @@ +package com.the_qa_company.qendpoint.utils; + +import com.the_qa_company.qendpoint.store.EndpointStore; +import com.the_qa_company.qendpoint.store.EndpointStoreConnection; +import org.eclipse.rdf4j.query.BindingSet; +import org.eclipse.rdf4j.query.Dataset; +import org.eclipse.rdf4j.query.algebra.Join; +import org.eclipse.rdf4j.query.algebra.LeftJoin; +import org.eclipse.rdf4j.query.algebra.StatementPattern; +import org.eclipse.rdf4j.query.algebra.TupleExpr; +import org.eclipse.rdf4j.query.algebra.evaluation.QueryOptimizer; +import org.eclipse.rdf4j.query.algebra.helpers.AbstractQueryModelVisitor; + +import java.util.ArrayList; +import java.util.List; + +public class MergeJoinOptimizer implements QueryOptimizer { + private final EndpointStoreConnection conn; + + public MergeJoinOptimizer(EndpointStoreConnection conn) { + this.conn = conn; + } + + @Override + public void optimize(TupleExpr tupleExpr, Dataset dataset, BindingSet bindingSet) { + if (!conn.getTripleSource().hasEnableMergeJoin() + || conn.hasConfig(EndpointStore.QUERY_CONFIG_NO_OPTIMIZER_MERGE)) { + return; // merge join disabled, ignore + } + + ModelVisitor visitor = new ModelVisitor(); + tupleExpr.visit(visitor); + + } + + protected static class ModelVisitor extends AbstractQueryModelVisitor { + + private boolean getJoinPatterns(Join node, List patterns) { + TupleExpr la = node.getLeftArg(); + if (la instanceof Join laj && getJoinPatterns(laj, patterns)) { + return true; + } + if (!(la instanceof StatementPattern stmt)) { + return true; + } + patterns.add(stmt); + + TupleExpr ra = node.getRightArg(); + if (ra instanceof Join raj && getJoinPatterns(raj, patterns)) { + return true; + } + if (!(ra instanceof StatementPattern stmt2)) { + return true; + } + patterns.add(stmt2); + return false; + } + + private List getJoinPatterns(Join node) { + List patterns = new ArrayList<>(); + if (getJoinPatterns(node, patterns)) { + return List.of(); + } + return patterns; + } + + @Override + public void meet(Join node) { + // stack the triple patterns + TupleExpr la = node.getLeftArg(); + TupleExpr ra = node.getRightArg(); + + List patterns = getJoinPatterns(node); + + if (patterns.isEmpty()) { + super.meet(node); + return; + } + + + for (StatementPattern p : patterns) { + // TODO: we can replace the patterns + p.getObjectVar().hasValue(); + } + + } + + @Override + public void meet(LeftJoin node) { + super.meet(node); + } + + } +} From 60762208ec16ae73a139adc9d1f023569d6b2724 Mon Sep 17 00:00:00 2001 From: qaate47 Date: Tue, 26 Nov 2024 12:02:01 +0100 Subject: [PATCH 2/8] wip merge joins [skip ci] --- .../controller/EndpointController.java | 2 +- .../qendpoint/core/dictionary/Dictionary.java | 12 +- .../qendpoint/core/hdt/HDTManagerImpl.java | 3 + .../core/merge/HDTMergeJoinIterator.java | 17 +-- .../core/merge/HDTMergeJoinPreparer.java | 107 +++++++++++++++++- .../qendpoint/core/triples/TripleID.java | 14 ++- .../core/triples/impl/BitmapTriples.java | 2 +- .../qendpoint/core/util/CommonUtils.java | 2 + .../core/merge/HDTMergeJoinIteratorTest.java | 48 ++++---- .../qendpoint/utils/MergeJoinOptimizer.java | 1 - 10 files changed, 149 insertions(+), 59 deletions(-) diff --git a/qendpoint-backend/src/main/java/com/the_qa_company/qendpoint/controller/EndpointController.java b/qendpoint-backend/src/main/java/com/the_qa_company/qendpoint/controller/EndpointController.java index e36497c18..09a2d6677 100644 --- a/qendpoint-backend/src/main/java/com/the_qa_company/qendpoint/controller/EndpointController.java +++ b/qendpoint-backend/src/main/java/com/the_qa_company/qendpoint/controller/EndpointController.java @@ -41,7 +41,7 @@ public record FormatReturn(String query) {} @Autowired Sparql sparql; - @RequestMapping(value = "/sparql") + @RequestMapping(value = "/sparql", method = { RequestMethod.GET, RequestMethod.POST }) public void sparqlEndpoint(@RequestParam(value = "query", required = false) final String query, @RequestParam(value = "update", required = false) final String updateQuery, @RequestParam(value = "format", defaultValue = "json") final String format, diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/Dictionary.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/Dictionary.java index 36df85624..ef183d57f 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/Dictionary.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/Dictionary.java @@ -269,17 +269,13 @@ default TripleID toTripleId(TripleString tsstr) { default TripleString toTripleString(TripleID tssid) { if (tssid.isQuad()) { - return new QuadString( - idToString(tssid.getSubject(), TripleComponentRole.SUBJECT), + return new QuadString(idToString(tssid.getSubject(), TripleComponentRole.SUBJECT), idToString(tssid.getPredicate(), TripleComponentRole.PREDICATE), idToString(tssid.getObject(), TripleComponentRole.OBJECT), - idToString(tssid.getGraph(), TripleComponentRole.GRAPH) - ); + idToString(tssid.getGraph(), TripleComponentRole.GRAPH)); } - return new TripleString( - idToString(tssid.getSubject(), TripleComponentRole.SUBJECT), + return new TripleString(idToString(tssid.getSubject(), TripleComponentRole.SUBJECT), idToString(tssid.getPredicate(), TripleComponentRole.PREDICATE), - idToString(tssid.getObject(), TripleComponentRole.OBJECT) - ); + idToString(tssid.getObject(), TripleComponentRole.OBJECT)); } } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/HDTManagerImpl.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/HDTManagerImpl.java index ce9aa87a0..c1e2b78eb 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/HDTManagerImpl.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/HDTManagerImpl.java @@ -232,6 +232,9 @@ public HDTResult doGenerateHDT(String rdfFileName, String baseURI, RDFNotation r } else { try { preSize = Files.size(preDownload); + if (preSize == trueSize) { + break; + } } catch (IOException ignore) { preSize = 0; } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/merge/HDTMergeJoinIterator.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/merge/HDTMergeJoinIterator.java index 06e2ef27b..f19c34dee 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/merge/HDTMergeJoinIterator.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/merge/HDTMergeJoinIterator.java @@ -22,15 +22,16 @@ public MergeIteratorData(IteratorTripleID it, TripleComponentRole role) { public long getSeekLayer(TripleID id) { return switch (role) { - case OBJECT -> id.getObject(); - case PREDICATE -> id.getPredicate(); - case SUBJECT -> id.getSubject(); - case GRAPH -> id.getGraph(); + case OBJECT -> id.getObject(); + case PREDICATE -> id.getPredicate(); + case SUBJECT -> id.getSubject(); + case GRAPH -> id.getGraph(); }; } /** * goto a layer + * * @param id layer * @return if we reach the end */ @@ -58,9 +59,11 @@ public boolean hasNext() { } public TripleID peek() { - if (hasNext()) return last; + if (hasNext()) + return last; return null; } + public TripleID next() { if (hasNext()) { loaded = false; @@ -140,11 +143,11 @@ private boolean seekAll() { @Override protected List getNext() { moveNext(); - if (!seekAll()) return null; + if (!seekAll()) + return null; // all the iterators are peeked with the same layer, we can read return iterators; } - } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/merge/HDTMergeJoinPreparer.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/merge/HDTMergeJoinPreparer.java index bf097d177..7fc6352bd 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/merge/HDTMergeJoinPreparer.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/merge/HDTMergeJoinPreparer.java @@ -1,16 +1,30 @@ package com.the_qa_company.qendpoint.core.merge; +import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder; +import com.the_qa_company.qendpoint.core.enums.TripleComponentRole; import com.the_qa_company.qendpoint.core.exceptions.NotImplementedException; +import com.the_qa_company.qendpoint.core.exceptions.ParserException; import com.the_qa_company.qendpoint.core.hdt.HDT; +import com.the_qa_company.qendpoint.core.hdt.HDTManager; +import com.the_qa_company.qendpoint.core.listener.ProgressListener; +import com.the_qa_company.qendpoint.core.options.HDTOptions; +import com.the_qa_company.qendpoint.core.quad.QuadString; import com.the_qa_company.qendpoint.core.triples.TripleID; +import com.the_qa_company.qendpoint.core.triples.TripleString; import com.the_qa_company.qendpoint.core.util.CommonUtils; -import com.the_qa_company.qendpoint.core.util.string.ByteString; - +import com.the_qa_company.qendpoint.core.util.StopWatch; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.BufferedReader; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; import java.util.ArrayList; import java.util.List; -import java.util.stream.IntStream; public class HDTMergeJoinPreparer { + private static final Logger logger = LoggerFactory.getLogger(HDTMergeJoinPreparer.class); private final HDT hdt; private final List patterns = new ArrayList<>(); private int keyIds; @@ -44,18 +58,18 @@ public List buildIteration() { for (TripleID patt : patterns) { long pp = patt.getPredicate(); if (pp < 0) { - occP[1 - (int)pp]++; + occP[1 - (int) pp]++; } long ss = patt.getSubject(); long oo = patt.getObject(); if (ss < 0) { - occSH[1 - (int)ss]++; + occSH[1 - (int) ss]++; } if (oo < 0) { if (ss != oo) { // avoid double var - occSH[1 - (int)oo]++; + occSH[1 - (int) oo]++; } } } @@ -79,4 +93,85 @@ public List buildIteration() { return lst; } + + public static void main(String[] args) throws IOException, ParserException { + if (args.length < 2) { + logger.error("Missing param: Usage [hdt] [desc]"); + return; + } + logger.info("Test preparer"); + String hdtPath = args[0]; + + record TPData(TripleString ts, TripleComponentRole role, TripleComponentOrder order) {} + + List data = new ArrayList<>(); + + try (BufferedReader r = Files.newBufferedReader(Path.of(args[1]))) { + String line; + + while ((line = r.readLine()) != null) { + if (line.isEmpty() || line.charAt(0) == '#') + continue; // comment + + QuadString ts = new QuadString(); + ts.read(line, true); + + if (ts.getSubject().equals("var")) + ts.setSubject(null); + if (ts.getPredicate().equals("var")) + ts.setPredicate(null); + if (ts.getObject().equals("var")) + ts.setObject(null); + + logger.info("read {}", ts); + + String orderCfg = ts.getGraph().toString(); + + if (orderCfg.isEmpty()) { + logger.error("Invalid role cfg: empty"); + return; + } + String[] cfg = orderCfg.split(":"); + TripleComponentRole role = TripleComponentRole.valueOf(cfg[0]); + TripleComponentOrder order = TripleComponentOrder.valueOf(cfg[1]); + ts.setGraph(null); + + data.add(new TPData(new TripleString(ts), role, order)); + } + } + + logger.info("Config loaded"); + data.forEach(c -> logger.info("- {}", c)); + logger.info("Loading HDT for query"); + HDTOptions spec = HDTOptions.of("bitmaptriples.index.allowOldOthers", true); + try (HDT hdt = HDTManager.mapIndexedHDT(hdtPath, spec, ProgressListener.sout())) { + List mergeData = new ArrayList<>(); + for (TPData tpd : data) { + TripleID tid = hdt.getDictionary().toTripleId(tpd.ts()); + + if (tid.isNoMatch()) { + logger.error("Triple {} is invalid", tpd.ts()); + return; + } + + mergeData.add(new HDTMergeJoinIterator.MergeIteratorData(hdt.getTriples().search(tid, tpd.order().mask), + tpd.role())); + } + + HDTMergeJoinIterator it = new HDTMergeJoinIterator(mergeData); + + logger.info("results:"); + StopWatch sw = new StopWatch(); + sw.reset(); + long ret = 0; + while (it.hasNext()) { + it.next(); + ret++; + // logger.info("- {}", it.next()); + } + logger.info("Done in {} {}", sw.stopAndShow(), ret); + } + logger.info("Unmapped"); + } + } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TripleID.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TripleID.java index bf2c093d0..923697f85 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TripleID.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TripleID.java @@ -242,22 +242,24 @@ public int compareTo(TripleID other) { /** * get a component value from its role + * * @param role role * @return component value */ public long get(TripleComponentRole role) { return switch (role) { - case SUBJECT -> getSubject(); - case PREDICATE -> getPredicate(); - case OBJECT -> getObject(); - case GRAPH -> getGraph(); + case SUBJECT -> getSubject(); + case PREDICATE -> getPredicate(); + case OBJECT -> getObject(); + case GRAPH -> getGraph(); }; } /** * compare this triple id with another triple id using order remap - * @param other other triple id - * @param orderThis order of this triple id + * + * @param other other triple id + * @param orderThis order of this triple id * @param orderOther order of the other triple id * @return compare result */ diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriples.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriples.java index 16dd2b141..81bed8188 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriples.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriples.java @@ -114,7 +114,7 @@ public class BitmapTriples implements TriplesPrivate, BitmapTriplesIndex { protected boolean isClosed; public BitmapTriples() throws IOException { - this(new HDTSpecification()); + this(HDTOptions.empty()); } public BitmapTriples(HDTOptions spec) throws IOException { diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/CommonUtils.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/CommonUtils.java index 655746c96..5824cbf97 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/CommonUtils.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/CommonUtils.java @@ -16,6 +16,7 @@ public static int minArg(int[] array) { return minIdx; } + public static int maxArg(int[] array) { if (array.length < 2) { return 0; @@ -31,6 +32,7 @@ public static int maxArg(int[] array) { return maxIdx; } + private CommonUtils() { throw new RuntimeException(); }; diff --git a/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/merge/HDTMergeJoinIteratorTest.java b/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/merge/HDTMergeJoinIteratorTest.java index 43c71f90d..e6e0b43da 100644 --- a/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/merge/HDTMergeJoinIteratorTest.java +++ b/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/merge/HDTMergeJoinIteratorTest.java @@ -50,22 +50,18 @@ public void itTest() throws IOException, ParserException, NotFoundException { Path root = tempDir.newFolder().toPath(); Path hdtPath = root.resolve("test.hdt"); - HDTOptions spec = HDTOptions.of( - HDTOptionsKeys.LOADER_TYPE_KEY, HDTOptionsKeys.LOADER_TYPE_VALUE_DISK, - HDTOptionsKeys.LOADER_DISK_FUTURE_HDT_LOCATION_KEY, hdtPath, - HDTOptionsKeys.LOADER_DISK_LOCATION_KEY, root.resolve("gd"), - HDTOptionsKeys.DICTIONARY_TYPE_KEY, HDTOptionsKeys.DICTIONARY_TYPE_VALUE_MULTI_OBJECTS_LANG, - HDTOptionsKeys.BITMAPTRIPLES_INDEX_METHOD_KEY, HDTOptionsKeys.BITMAPTRIPLES_INDEX_METHOD_VALUE_DISK, - HDTOptionsKeys.BITMAPTRIPLES_INDEX_NO_FOQ, true, + HDTOptions spec = HDTOptions.of(HDTOptionsKeys.LOADER_TYPE_KEY, HDTOptionsKeys.LOADER_TYPE_VALUE_DISK, + HDTOptionsKeys.LOADER_DISK_FUTURE_HDT_LOCATION_KEY, hdtPath, HDTOptionsKeys.LOADER_DISK_LOCATION_KEY, + root.resolve("gd"), HDTOptionsKeys.DICTIONARY_TYPE_KEY, + HDTOptionsKeys.DICTIONARY_TYPE_VALUE_MULTI_OBJECTS_LANG, HDTOptionsKeys.BITMAPTRIPLES_INDEX_METHOD_KEY, + HDTOptionsKeys.BITMAPTRIPLES_INDEX_METHOD_VALUE_DISK, HDTOptionsKeys.BITMAPTRIPLES_INDEX_NO_FOQ, true, // all indexes - HDTOptionsKeys.BITMAPTRIPLES_INDEX_OTHERS, Arrays.stream(TripleComponentOrder.values()).map(TripleComponentOrder::name).collect(Collectors.joining(",")) - ); + HDTOptionsKeys.BITMAPTRIPLES_INDEX_OTHERS, Arrays.stream(TripleComponentOrder.values()) + .map(TripleComponentOrder::name).collect(Collectors.joining(","))); ProgressListener listener = ProgressListener.ignore(); String ns = "http://example.org/#"; - try ( - InputStream is = getStream("/merge_ds.ttl"); - HDT hdt = HDTManager.generateHDT(is, ns, RDFNotation.TURTLE, spec, listener) - ) { + try (InputStream is = getStream("/merge_ds.ttl"); + HDT hdt = HDTManager.generateHDT(is, ns, RDFNotation.TURTLE, spec, listener)) { hdt.saveToHDT(hdtPath); } @@ -75,15 +71,9 @@ public void itTest() throws IOException, ParserException, NotFoundException { assertTrue(Files.exists(BitmapTriplesIndexFile.getIndexPath(hdtPath, TripleComponentOrder.POS))); assertTrue(Files.exists(BitmapTriplesIndexFile.getIndexPath(hdtPath, TripleComponentOrder.PSO))); - /* - The query is ~that: - SELECT * { - ?s ex:relative ?o - ?o rdfs:name ?n - ?o ex:id ?id - } - + * The query is ~that: SELECT * { ?s ex:relative ?o ?o rdfs:name ?n + * ?o ex:id ?id } */ Dictionary dict = hdt.getDictionary(); @@ -91,7 +81,6 @@ public void itTest() throws IOException, ParserException, NotFoundException { long rdfsName = dict.stringToId("http://www.w3.org/2000/01/rdf-schema#name", TripleComponentRole.PREDICATE); long exId = dict.stringToId(ns + "id", TripleComponentRole.PREDICATE); - TripleID p1 = new TripleID(0, exRelative, 0); TripleID p2 = new TripleID(0, rdfsName, 0); TripleID p3 = new TripleID(0, exId, 0); @@ -108,15 +97,16 @@ public void itTest() throws IOException, ParserException, NotFoundException { assertSame("invalid order ", TripleComponentOrder.PSO, it2.getOrder()); assertSame("invalid order ", TripleComponentOrder.PSO, it3.getOrder()); - HDTMergeJoinIterator it = new HDTMergeJoinIterator(List.of( - new HDTMergeJoinIterator.MergeIteratorData(it1, TripleComponentRole.OBJECT), - new HDTMergeJoinIterator.MergeIteratorData(it2, TripleComponentRole.SUBJECT), - new HDTMergeJoinIterator.MergeIteratorData(it3, TripleComponentRole.SUBJECT) - )); + HDTMergeJoinIterator it = new HDTMergeJoinIterator( + List.of(new HDTMergeJoinIterator.MergeIteratorData(it1, TripleComponentRole.OBJECT), + new HDTMergeJoinIterator.MergeIteratorData(it2, TripleComponentRole.SUBJECT), + new HDTMergeJoinIterator.MergeIteratorData(it3, TripleComponentRole.SUBJECT))); System.out.println(it.hasNext()); - it.forEachRemaining(lst -> System.out.println(lst.stream().map(d -> dict.toTripleString(Objects.requireNonNull(d.peek())).toString()).collect(Collectors.joining(" - ")))); + it.forEachRemaining(lst -> System.out + .println(lst.stream().map(d -> dict.toTripleString(Objects.requireNonNull(d.peek())).toString()) + .collect(Collectors.joining(" - ")))); } } -} \ No newline at end of file +} diff --git a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/utils/MergeJoinOptimizer.java b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/utils/MergeJoinOptimizer.java index b7776bad0..a6eb27e7c 100644 --- a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/utils/MergeJoinOptimizer.java +++ b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/utils/MergeJoinOptimizer.java @@ -77,7 +77,6 @@ public void meet(Join node) { return; } - for (StatementPattern p : patterns) { // TODO: we can replace the patterns p.getObjectVar().hasValue(); From fb85a5ef6ddcf7a45cd4949614d0d18cf57bca37 Mon Sep 17 00:00:00 2001 From: qaate47 Date: Wed, 27 Nov 2024 15:31:28 +0100 Subject: [PATCH 3/8] remove previous from the triple id iterators [skip ci] --- .../SequentialSearchIteratorTripleID.java | 46 ----------- .../utils/GraphFilteringTripleId.java | 9 --- .../iterator/utils/ListTripleIDIterator.java | 20 ----- .../core/merge/HDTMergeJoinIterator.java | 3 +- .../quad/impl/BitmapTriplesIteratorGraph.java | 10 --- .../impl/BitmapTriplesIteratorGraphG.java | 10 --- .../core/triples/IteratorTripleID.java | 65 +++++++++++---- .../triples/impl/BitmapTriplesIterator.java | 80 ++++++++++++------- .../impl/BitmapTriplesIteratorCat.java | 10 --- .../impl/BitmapTriplesIteratorMapDiff.java | 10 --- .../triples/impl/BitmapTriplesIteratorY.java | 31 ------- .../impl/BitmapTriplesIteratorYFOQ.java | 35 -------- .../triples/impl/BitmapTriplesIteratorZ.java | 18 ----- .../impl/BitmapTriplesIteratorZFOQ.java | 27 ------- .../triples/impl/EmptyTriplesIterator.java | 10 --- .../core/triples/impl/OneReadTempTriples.java | 10 --- .../core/triples/impl/TriplesList.java | 18 ----- .../core/triples/impl/TriplesListLong.java | 19 ----- .../compress/NoDuplicateTripleIDIterator.java | 10 --- .../triples/impl/BitmapQuadTriplesTest.java | 9 --- 20 files changed, 101 insertions(+), 349 deletions(-) diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/iterator/SequentialSearchIteratorTripleID.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/iterator/SequentialSearchIteratorTripleID.java index feb76d276..46de10431 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/iterator/SequentialSearchIteratorTripleID.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/iterator/SequentialSearchIteratorTripleID.java @@ -97,52 +97,6 @@ public TripleID next() { return returnTriple; } - /* - * (non-Javadoc) - * @see hdt.iterator.IteratorTripleID#hasPrevious() - */ - @Override - public boolean hasPrevious() { - return hasPreviousTriples; - } - - private void doFetchPrevious() { - hasPreviousTriples = false; - - while (iterator.hasPrevious()) { - TripleID previous = iterator.previous(); - - if (previous.match(pattern)) { - hasPreviousTriples = true; - hasMoreTriples = true; - previousTriple.assign(previous); - previousPosition = iterator.getLastTriplePositionSupplier(); - break; - } - } - } - - /* - * (non-Javadoc) - * @see hdt.iterator.IteratorTripleID#previous() - */ - @Override - public TripleID previous() { - if (goingUp) { - goingUp = false; - if (hasMoreTriples) { - doFetchPrevious(); - } - doFetchPrevious(); - } - returnTriple.assign(previousTriple); - lastPosition = previousPosition; - - doFetchPrevious(); - - return returnTriple; - } - /* * (non-Javadoc) * @see hdt.iterator.IteratorTripleID#goToStart() diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/iterator/utils/GraphFilteringTripleId.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/iterator/utils/GraphFilteringTripleId.java index 04185bac8..a1b597c0e 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/iterator/utils/GraphFilteringTripleId.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/iterator/utils/GraphFilteringTripleId.java @@ -16,15 +16,6 @@ public GraphFilteringTripleId(IteratorTripleID iterator, long[] graphIds) { this.graphIds = graphIds; } - @Override - public boolean hasPrevious() { - throw new NotImplementedException(); - } - - @Override - public TripleID previous() { - throw new NotImplementedException(); - } @Override public void goToStart() { diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/iterator/utils/ListTripleIDIterator.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/iterator/utils/ListTripleIDIterator.java index b07b9c972..926b223e6 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/iterator/utils/ListTripleIDIterator.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/iterator/utils/ListTripleIDIterator.java @@ -42,26 +42,6 @@ public TripleID next() { return triplesList.get(pos++); } - /* - * (non-Javadoc) - * @see hdt.iterator.IteratorTripleID#hasPrevious() - */ - @Override - public boolean hasPrevious() { - return pos > 0; - } - - /* - * (non-Javadoc) - * @see hdt.iterator.IteratorTripleID#previous() - */ - @Override - public TripleID previous() { - TripleID tripleID = triplesList.get(--pos); - lastPosition = pos; - return tripleID; - } - /* * (non-Javadoc) * @see hdt.iterator.IteratorTripleID#goToStart() diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/merge/HDTMergeJoinIterator.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/merge/HDTMergeJoinIterator.java index f19c34dee..e5affa3e4 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/merge/HDTMergeJoinIterator.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/merge/HDTMergeJoinIterator.java @@ -59,8 +59,9 @@ public boolean hasNext() { } public TripleID peek() { - if (hasNext()) + if (hasNext()) { return last; + } return null; } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapTriplesIteratorGraph.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapTriplesIteratorGraph.java index aa296be65..8fbcd1efb 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapTriplesIteratorGraph.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapTriplesIteratorGraph.java @@ -64,16 +64,6 @@ protected TripleID getNext() { } } - @Override - public boolean hasPrevious() { - throw new NotImplementedException(); - } - - @Override - public TripleID previous() { - throw new NotImplementedException(); - } - @Override public void goToStart() { tidIt.goToStart(); diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapTriplesIteratorGraphG.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapTriplesIteratorGraphG.java index b6f73594c..0ef4a97fd 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapTriplesIteratorGraphG.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapTriplesIteratorGraphG.java @@ -49,16 +49,6 @@ protected TripleID getNext() { return tripleID; } - @Override - public boolean hasPrevious() { - throw new NotImplementedException(); - } - - @Override - public TripleID previous() { - throw new NotImplementedException(); - } - @Override public void goToStart() { posZ = -1; diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/IteratorTripleID.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/IteratorTripleID.java index 828f58c78..0c57074dd 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/IteratorTripleID.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/IteratorTripleID.java @@ -21,6 +21,7 @@ import com.the_qa_company.qendpoint.core.enums.ResultEstimationType; import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder; +import com.the_qa_company.qendpoint.core.exceptions.NotImplementedException; import java.util.Iterator; @@ -29,21 +30,6 @@ */ public interface IteratorTripleID extends Iterator { - /** - * Whether the iterator has previous elements. - * - * @return boolean - */ - boolean hasPrevious(); - - /** - * Get the previous element. Call only if hasPrevious() returns true. It - * moves the cursor of the Iterator to the previous entry. - * - * @return TripleID - */ - TripleID previous(); - /** * Point the cursor to the first element of the data structure. */ @@ -103,4 +89,53 @@ public interface IteratorTripleID extends Iterator { default boolean isLastTriplePositionBoundToOrder() { return false; } + + /** + * goto the next subject >= id + * @param id id + * @return true if the next subject == id + * @see #canGoToSubject() if can goto returns false, this function is not available + */ + default boolean gotoSubject(long id) { + return false; + } + + /** + * goto the next predicate >= id + * @param id id + * @return true if the next predicate == id + * @see #canGoToPredicate() if can goto returns false, this function is not available + */ + default boolean gotoPredicate(long id) { + return false; + } + + /** + * goto the next object >= id + * @param id id + * @return true if the next object == id + * @see #canGoToObject() if can goto returns false, this function is not available + */ + default boolean gotoObject(long id) { + return false; + } + + /** + * @return true if {@link #gotoSubject(long)} can be used, false otherwise + */ + default boolean canGoToSubject() { + return false; + } + /** + * @return true if {@link #gotoPredicate(long)} can be used, false otherwise + */ + default boolean canGoToPredicate() { + return false; + } + /** + * @return true if {@link #gotoObject(long)} can be used, false otherwise + */ + default boolean canGoToObject() { + return false; + } } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIterator.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIterator.java index fa81599da..c36c82fe5 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIterator.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIterator.java @@ -21,6 +21,7 @@ import com.the_qa_company.qendpoint.core.enums.ResultEstimationType; import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder; +import com.the_qa_company.qendpoint.core.enums.TripleComponentRole; import com.the_qa_company.qendpoint.core.exceptions.NotImplementedException; import com.the_qa_company.qendpoint.core.iterator.SuppliableIteratorTripleID; import com.the_qa_company.qendpoint.core.triples.TripleID; @@ -169,37 +170,6 @@ public TripleID next() { return returnTriple; } - /* - * (non-Javadoc) - * @see hdt.iterator.IteratorTripleID#hasPrevious() - */ - @Override - public boolean hasPrevious() { - return posZ > minZ; - } - - /* - * (non-Javadoc) - * @see hdt.iterator.IteratorTripleID#previous() - */ - @Override - public TripleID previous() { - posZ--; - - posY = adjZ.findListIndex(posZ); - - z = adjZ.get(posZ); - y = adjY.get(posY); - x = adjY.findListIndex(posY) + 1; - - nextY = adjY.last(x - 1) + 1; - nextZ = adjZ.last(posY) + 1; - - updateOutput(); - - return returnTriple; - } - /* * (non-Javadoc) * @see hdt.iterator.IteratorTripleID#goToStart() @@ -299,4 +269,52 @@ public long getLastTriplePosition() { public boolean isLastTriplePositionBoundToOrder() { return true; } + + + private boolean gotoOrder(long id, TripleComponentRole role) { + switch (role) { + case SUBJECT -> { + + return false; + } + case PREDICATE -> { + + return false; + } + case OBJECT -> { + + return false; + } + default -> throw new NotImplementedException("goto " + role); + } + } + + @Override + public boolean gotoSubject(long id) { + return gotoOrder(id, idx.getOrder().getSubjectMapping()); + } + + + @Override + public boolean gotoPredicate(long id) { + return gotoOrder(id, idx.getOrder().getPredicateMapping()); + } + @Override + public boolean gotoObject(long id) { + return gotoOrder(id, idx.getOrder().getObjectMapping()); + } + + @Override + public boolean canGoToSubject() { + return true; + } + @Override + public boolean canGoToPredicate() { + return true; + } + @Override + public boolean canGoToObject() { + return true; + } + } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorCat.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorCat.java index 28a84759c..0ae1e60b4 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorCat.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorCat.java @@ -55,16 +55,6 @@ public BitmapTriplesIteratorCat(Triples hdt1, Triples hdt2, DictionaryCat dictio } - @Override - public boolean hasPrevious() { - return false; - } - - @Override - public TripleID previous() { - return null; - } - @Override public void goToStart() { diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorMapDiff.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorMapDiff.java index a85ee0610..b7d602804 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorMapDiff.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorMapDiff.java @@ -45,16 +45,6 @@ public BitmapTriplesIteratorMapDiff(HDT hdtOriginal, Bitmap deleteBitmap, Dictio count++; } - @Override - public boolean hasPrevious() { - return false; - } - - @Override - public TripleID previous() { - return null; - } - @Override public void goToStart() { diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorY.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorY.java index 71e7fe5ce..c7ce2bf98 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorY.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorY.java @@ -111,37 +111,6 @@ public TripleID next() { * (non-Javadoc) * @see hdt.iterator.IteratorTripleID#hasPrevious() */ - @Override - public boolean hasPrevious() { - return prevY != -1 || posZ >= prevZ; - } - - /* - * (non-Javadoc) - * @see hdt.iterator.IteratorTripleID#previous() - */ - @Override - public TripleID previous() { - if (posZ <= prevZ) { - nextY = posY; - posY = prevY; - prevY = adjY.findPreviousAppearance(prevY - 1, patY); - - posZ = prevZ = adjZ.find(posY); - nextZ = adjZ.last(posY); - - x = adjY.findListIndex(posY) + 1; - y = adjY.get(posY); - z = adjZ.get(posZ); - } else { - posZ--; - z = adjZ.get(posZ); - } - - updateOutput(); - - return returnTriple; - } /* * (non-Javadoc) diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorYFOQ.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorYFOQ.java index 726389578..dbbfea499 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorYFOQ.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorYFOQ.java @@ -112,41 +112,6 @@ public TripleID next() { return returnTriple; } - /* - * (non-Javadoc) - * @see hdt.iterator.IteratorTripleID#hasPrevious() - */ - @Override - public boolean hasPrevious() { - return numOccurrence > 1 || posZ >= prevZ; - } - - /* - * (non-Javadoc) - * @see hdt.iterator.IteratorTripleID#previous() - */ - @Override - public TripleID previous() { - if (posZ <= prevZ) { - numOccurrence--; - posY = triples.predicateIndex.getOccurrence(predBase, numOccurrence); - - prevZ = adjZ.find(posY); - posZ = nextZ = adjZ.last(posY); - - x = adjY.findListIndex(posY) + 1; - y = adjY.get(posY); - z = adjZ.get(posZ); - } else { - z = adjZ.get(posZ); - posZ--; - } - - updateOutput(); - - return returnTriple; - } - /* * (non-Javadoc) * @see hdt.iterator.IteratorTripleID#goToStart() diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorZ.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorZ.java index 91be651aa..670a59210 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorZ.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorZ.java @@ -94,24 +94,6 @@ public TripleID next() { return returnTriple; } - /* - * (non-Javadoc) - * @see hdt.iterator.IteratorTripleID#hasPrevious() - */ - @Override - public boolean hasPrevious() { - throw new NotImplementedException(); - } - - /* - * (non-Javadoc) - * @see hdt.iterator.IteratorTripleID#previous() - */ - @Override - public TripleID previous() { - throw new NotImplementedException(); - } - /* * (non-Javadoc) * @see hdt.iterator.IteratorTripleID#goToStart() diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorZFOQ.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorZFOQ.java index 51d74f537..b1ae4ae10 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorZFOQ.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorZFOQ.java @@ -159,33 +159,6 @@ public TripleID next() { return returnTriple; } - /* - * (non-Javadoc) - * @see hdt.iterator.IteratorTripleID#hasPrevious() - */ - @Override - public boolean hasPrevious() { - return posIndex > minIndex; - } - - /* - * (non-Javadoc) - * @see hdt.iterator.IteratorTripleID#previous() - */ - @Override - public TripleID previous() { - posIndex--; - - long posY = adjIndex.get(posIndex); - - z = patZ != 0 ? patZ : adjIndex.findListIndex(posIndex) + 1; - y = patY != 0 ? patY : adjY.get(posY); - x = adjY.findListIndex(posY) + 1; - - updateOutput(); - return returnTriple; - } - /* * (non-Javadoc) * @see hdt.iterator.IteratorTripleID#goToStart() diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/EmptyTriplesIterator.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/EmptyTriplesIterator.java index fc71a1bd5..6eef2b30d 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/EmptyTriplesIterator.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/EmptyTriplesIterator.java @@ -24,16 +24,6 @@ public TripleID next() { throw new NoSuchElementException(); } - @Override - public boolean hasPrevious() { - return false; - } - - @Override - public TripleID previous() { - throw new NoSuchElementException(); - } - @Override public void goToStart() { // Do nothing diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/OneReadTempTriples.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/OneReadTempTriples.java index 96c8f7381..029a65cd5 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/OneReadTempTriples.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/OneReadTempTriples.java @@ -233,16 +233,6 @@ public SimpleIteratorTripleID(Iterator it, TripleComponentOrder order, this.tripleCount = tripleCount; } - @Override - public boolean hasPrevious() { - throw new NotImplementedException(); - } - - @Override - public TripleID previous() { - throw new NotImplementedException(); - } - @Override public void goToStart() { throw new NotImplementedException(); diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TriplesList.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TriplesList.java index 9f77fb77f..bc0c7f2e2 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TriplesList.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TriplesList.java @@ -448,24 +448,6 @@ public TripleID next() { return triplesList.arrayOfTriples.get(pos++).asTripleID(); } - /* - * (non-Javadoc) - * @see hdt.iterator.IteratorTripleID#hasPrevious() - */ - @Override - public boolean hasPrevious() { - return pos > 0; - } - - /* - * (non-Javadoc) - * @see hdt.iterator.IteratorTripleID#previous() - */ - @Override - public TripleID previous() { - lastPosition = --pos; - return triplesList.arrayOfTriples.get(pos).asTripleID(); - } /* * (non-Javadoc) diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TriplesListLong.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TriplesListLong.java index 3897a9722..c9157b277 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TriplesListLong.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TriplesListLong.java @@ -440,25 +440,6 @@ public TripleID next() { return triplesList.arrayOfTriples.get(pos++); } - /* - * (non-Javadoc) - * @see hdt.iterator.IteratorTripleID#hasPrevious() - */ - @Override - public boolean hasPrevious() { - return pos > 0; - } - - /* - * (non-Javadoc) - * @see hdt.iterator.IteratorTripleID#previous() - */ - @Override - public TripleID previous() { - lastPosition = --pos; - return triplesList.arrayOfTriples.get(pos); - } - /* * (non-Javadoc) * @see hdt.iterator.IteratorTripleID#goToStart() diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/compress/NoDuplicateTripleIDIterator.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/compress/NoDuplicateTripleIDIterator.java index 50c7806f4..b6c75e976 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/compress/NoDuplicateTripleIDIterator.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/compress/NoDuplicateTripleIDIterator.java @@ -49,16 +49,6 @@ public TripleID next() { return next; } - @Override - public boolean hasPrevious() { - throw new NotImplementedException(); - } - - @Override - public TripleID previous() { - throw new NotImplementedException(); - } - @Override public void goToStart() { throw new NotImplementedException(); diff --git a/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapQuadTriplesTest.java b/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapQuadTriplesTest.java index aeeb40c51..09344fdfd 100644 --- a/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapQuadTriplesTest.java +++ b/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapQuadTriplesTest.java @@ -34,15 +34,6 @@ private static IteratorTripleID fromList(List lst) { private int current; private int lastLoc; - @Override - public boolean hasPrevious() { - return false; - } - - @Override - public TripleID previous() { - return null; - } @Override public void goToStart() { From c8b45813e0ccce6138f0c4903f28c4ed8410141b Mon Sep 17 00:00:00 2001 From: qaate47 Date: Wed, 27 Nov 2024 17:45:28 +0100 Subject: [PATCH 4/8] wip jump triple id iterators [skip ci] --- .../core/compact/bitmap/AdjacencyList.java | 47 +++++++- .../triples/impl/BitmapTriplesIterator.java | 113 +++++++++++++++++- .../impl/BitmapTriplesIteratorTest.java | 68 +++++++++++ 3 files changed, 220 insertions(+), 8 deletions(-) diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/AdjacencyList.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/AdjacencyList.java index c60885b40..19bf26e5e 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/AdjacencyList.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/AdjacencyList.java @@ -111,7 +111,7 @@ public long countItemsY(long x) { return last(x) - find(x) + 1; } - public long search(long element, long begin, long end) throws NotFoundException { + public long search(long element, long begin, long end) { if (end - begin > 10) { return binSearch(element, begin, end); } else { @@ -134,7 +134,7 @@ public long binSearch(long element, long begin, long end) { return -1; } - public long linSearch(long element, long begin, long end) throws NotFoundException { + public long linSearch(long element, long begin, long end) { while (begin <= end) { long read = array.get(begin); if (read == element) { @@ -142,7 +142,41 @@ public long linSearch(long element, long begin, long end) throws NotFoundExcepti } begin++; } - throw new NotFoundException(); + return -1; + } + + public long searchLoc(long element, long begin, long end) { + if (end - begin > 10) { + return binSearchLoc(element, begin, end); + } else { + return linSearchLoc(element, begin, end); + } + } + + public long binSearchLoc(long element, long begin, long end) { + while (begin <= end) { + long mid = (begin + end) / 2; + long read = array.get(mid); + if (element > read) { + begin = mid + 1; + } else if (element < read) { + end = mid - 1; + } else { + return mid; + } + } + return -(1 + begin); + } + + public long linSearchLoc(long element, long begin, long end) { + while (begin <= end) { + long read = array.get(begin); + if (read == element) { + return begin; + } + begin++; + } + return -(1 + begin); } public final long get(long pos) { @@ -237,4 +271,11 @@ public void dump() { System.out.println(); } + public Sequence getArray() { + return array; + } + + public Bitmap getBitmap() { + return bitmap; + } } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIterator.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIterator.java index c36c82fe5..e62df1323 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIterator.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIterator.java @@ -270,20 +270,124 @@ public boolean isLastTriplePositionBoundToOrder() { return true; } - private boolean gotoOrder(long id, TripleComponentRole role) { switch (role) { case SUBJECT -> { + if (patX != 0) { + return id == patX; // can't jump or already on the right element + } + + patX = id; + findRange(); + patX = 0; - return false; + return true; // we know x exists because we are using } case PREDICATE -> { + if (patY != 0) { + return id == patY; // can't jump or already on the right element + } + + if (posY == nextY) { + return false; // no next element + } + + long curr = this.adjY.get(posY); + + if (curr >= id) { + return curr == id; + } + if (posY + 1 == nextY) { + return false; // no next element + } + + long last = this.adjY.get(nextY - 1); + + + boolean res; + + if (last > id) { + // binary search between curr <-> last id + long loc = this.adjY.searchLoc(id, posY + 1, nextY - 2); + + if (loc > 0) { + res = true; + posY = loc; + y = id; + } else { + res = false; + posY = -loc - 1; + y = adjY.get(posY); + } + } else if (last != id) { + // last < id - GOTO end + 1 + posY = nextY; + res = false; + } else { + // last == id - GOTO last + posY = nextY - 1; + y = adjY.get(posY); + res = true; + } + + nextY = adjY.findNext(posY) + 1; - return false; + // down to z/posZ/nextZ? + posZ = adjZ.find(posY, patZ); + nextZ = adjZ.findNext(posZ) + 1; + + return res; } case OBJECT -> { + if (patZ != 0) { + return id == patZ; // can't jump or already on the right element + } + + if (posZ == nextZ) { + return false; // no next element + } + + long curr = this.adjZ.get(posZ); + + if (curr >= id) { + return curr == id; + } + if (posZ + 1 == nextZ) { + return false; // no next element + } + + long last = this.adjZ.get(nextZ - 1); + - return false; + boolean res; + + if (last > id) { + // binary search between curr <-> last id + long loc = this.adjZ.searchLoc(id, posZ + 1, nextZ - 2); + + if (loc >= 0) { //match + res = true; + posZ = loc; + //z = id; // no need to compute the z, it is only used in next() + } else { + res = false; + posZ = -loc - 1; + //z = adjZ.get(posZ); + } + } else if (last != id) { + // last < id - GOTO end + posZ = nextZ; + res = false; + } else { + // last == id - GOTO last + posZ = nextZ - 1; + //z = adjZ.get(posZ); + res = true; + } + + nextZ = adjZ.findNext(posZ) + 1; + + return res; } default -> throw new NotImplementedException("goto " + role); } @@ -294,7 +398,6 @@ public boolean gotoSubject(long id) { return gotoOrder(id, idx.getOrder().getSubjectMapping()); } - @Override public boolean gotoPredicate(long id) { return gotoOrder(id, idx.getOrder().getPredicateMapping()); diff --git a/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorTest.java b/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorTest.java index 706376573..4842866cf 100644 --- a/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorTest.java +++ b/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorTest.java @@ -1,12 +1,33 @@ package com.the_qa_company.qendpoint.core.triples.impl; import java.io.IOException; +import java.nio.file.Path; +import com.the_qa_company.qendpoint.core.exceptions.NotFoundException; +import com.the_qa_company.qendpoint.core.exceptions.ParserException; +import com.the_qa_company.qendpoint.core.hdt.HDT; +import com.the_qa_company.qendpoint.core.hdt.HDTManager; +import com.the_qa_company.qendpoint.core.options.HDTOptions; +import com.the_qa_company.qendpoint.core.triples.IteratorTripleID; +import com.the_qa_company.qendpoint.core.triples.IteratorTripleString; +import com.the_qa_company.qendpoint.core.triples.TripleID; +import com.the_qa_company.qendpoint.core.util.LargeFakeDataSetStreamSupplier; +import org.apache.commons.io.file.PathUtils; +import org.junit.Assert; import org.junit.Before; +import org.junit.Rule; import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; +import static org.junit.Assert.assertTrue; public class BitmapTriplesIteratorTest { + @Rule + public TemporaryFolder tempDir = TemporaryFolder.builder().assureDeletion().build(); + @Before public void setUp() throws Exception { } @@ -23,4 +44,51 @@ public void test() throws IOException { // } } + @Test + public void jumpTest() throws IOException, ParserException { + Path root = tempDir.newFolder().toPath(); + + try { + LargeFakeDataSetStreamSupplier sup = LargeFakeDataSetStreamSupplier.createSupplierWithMaxTriples(1000, 32); + Path hdtPath = root.resolve("test.hdt"); + sup.createAndSaveFakeHDT(HDTOptions.empty(), hdtPath); + + try (HDT hdt = HDTManager.mapIndexedHDT(hdtPath)) { + IteratorTripleID it = hdt.getTriples().searchAll(); + + assertTrue("bad class: " + it.getClass(), it instanceof BitmapTriplesIterator); + + TripleID start = it.next().clone(); + + + for (int i = 0; i < 458; i++) { + assertTrue(it.hasNext()); + it.next(); + } + + + TripleID lastTest = it.next().clone(); + + assertNotEquals(start, lastTest); + + long posLast = it.getLastTriplePosition(); + + it.goToStart(); + assertEquals(start, it.next()); + + assertEquals(0, it.getLastTriplePosition()); + + it.goTo(posLast); + + assertEquals(lastTest, it.next()); + + assertEquals(posLast, it.getLastTriplePosition()); + } + } finally { + PathUtils.deleteDirectory(root); + } + + + } + } From 232814a4ad333b48b60e98482f86e71d8bfc2ef0 Mon Sep 17 00:00:00 2001 From: qaate47 Date: Mon, 2 Dec 2024 17:04:36 +0100 Subject: [PATCH 5/8] fix latest orders and patch pom.xml --- qendpoint-backend/pom.xml | 7 +- .../qendpoint/controller/Sparql.java | 4 +- qendpoint-core/pom.xml | 4 +- .../utils/GraphFilteringTripleId.java | 1 - .../core/triples/IteratorTripleID.java | 14 +- .../triples/impl/BitmapTriplesIterator.java | 176 +++++----- .../core/triples/impl/TriplesList.java | 1 - .../triples/impl/BitmapQuadTriplesTest.java | 1 - .../impl/BitmapTriplesIteratorTest.java | 300 ++++++++++++++++-- 9 files changed, 397 insertions(+), 111 deletions(-) diff --git a/qendpoint-backend/pom.xml b/qendpoint-backend/pom.xml index 6b75e3315..2686d2fbe 100644 --- a/qendpoint-backend/pom.xml +++ b/qendpoint-backend/pom.xml @@ -46,7 +46,7 @@ 5.0.2 3.4.0 1.5.6 - + 2.11.0 UTF-8 UTF-8 @@ -112,6 +112,11 @@ + + com.google.code.gson + gson + ${gson.version} + commons-codec diff --git a/qendpoint-backend/src/main/java/com/the_qa_company/qendpoint/controller/Sparql.java b/qendpoint-backend/src/main/java/com/the_qa_company/qendpoint/controller/Sparql.java index 7b25ce0ca..ed9223eef 100644 --- a/qendpoint-backend/src/main/java/com/the_qa_company/qendpoint/controller/Sparql.java +++ b/qendpoint-backend/src/main/java/com/the_qa_company/qendpoint/controller/Sparql.java @@ -12,6 +12,8 @@ import com.the_qa_company.qendpoint.store.EndpointStoreUtils; import com.the_qa_company.qendpoint.utils.FileUtils; import com.the_qa_company.qendpoint.utils.RDFStreamUtils; +import jakarta.annotation.PostConstruct; +import jakarta.annotation.PreDestroy; import org.eclipse.rdf4j.model.Statement; import org.eclipse.rdf4j.model.util.Values; import org.eclipse.rdf4j.repository.RepositoryConnection; @@ -34,8 +36,6 @@ import org.springframework.stereotype.Component; import org.springframework.web.server.ServerWebInputException; -import javax.annotation.PostConstruct; -import javax.annotation.PreDestroy; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; diff --git a/qendpoint-core/pom.xml b/qendpoint-core/pom.xml index d0522ca2a..46dde9507 100644 --- a/qendpoint-core/pom.xml +++ b/qendpoint-core/pom.xml @@ -48,7 +48,7 @@ 1.5.6 0.9.44 - 4.3.2 + 4.9.0 1.7.30 UTF-8 @@ -75,7 +75,7 @@ org.apache.commons commons-compress - 1.21 + 1.26.0 org.apache.jena diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/iterator/utils/GraphFilteringTripleId.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/iterator/utils/GraphFilteringTripleId.java index a1b597c0e..3894787a9 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/iterator/utils/GraphFilteringTripleId.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/iterator/utils/GraphFilteringTripleId.java @@ -16,7 +16,6 @@ public GraphFilteringTripleId(IteratorTripleID iterator, long[] graphIds) { this.graphIds = graphIds; } - @Override public void goToStart() { throw new NotImplementedException(); diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/IteratorTripleID.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/IteratorTripleID.java index 0c57074dd..0fe04b126 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/IteratorTripleID.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/IteratorTripleID.java @@ -92,9 +92,11 @@ default boolean isLastTriplePositionBoundToOrder() { /** * goto the next subject >= id + * * @param id id * @return true if the next subject == id - * @see #canGoToSubject() if can goto returns false, this function is not available + * @see #canGoToSubject() if can goto returns false, this function is not + * available */ default boolean gotoSubject(long id) { return false; @@ -102,9 +104,11 @@ default boolean gotoSubject(long id) { /** * goto the next predicate >= id + * * @param id id * @return true if the next predicate == id - * @see #canGoToPredicate() if can goto returns false, this function is not available + * @see #canGoToPredicate() if can goto returns false, this function is not + * available */ default boolean gotoPredicate(long id) { return false; @@ -112,9 +116,11 @@ default boolean gotoPredicate(long id) { /** * goto the next object >= id + * * @param id id * @return true if the next object == id - * @see #canGoToObject() if can goto returns false, this function is not available + * @see #canGoToObject() if can goto returns false, this function is not + * available */ default boolean gotoObject(long id) { return false; @@ -126,12 +132,14 @@ default boolean gotoObject(long id) { default boolean canGoToSubject() { return false; } + /** * @return true if {@link #gotoPredicate(long)} can be used, false otherwise */ default boolean canGoToPredicate() { return false; } + /** * @return true if {@link #gotoObject(long)} can be used, false otherwise */ diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIterator.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIterator.java index e62df1323..d5b465d18 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIterator.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIterator.java @@ -272,39 +272,50 @@ public boolean isLastTriplePositionBoundToOrder() { private boolean gotoOrder(long id, TripleComponentRole role) { switch (role) { - case SUBJECT -> { - if (patX != 0) { - return id == patX; // can't jump or already on the right element - } - - patX = id; - findRange(); - patX = 0; + case SUBJECT -> { + if (patX != 0) { + return id == patX; // can't jump or already on the right element + } - return true; // we know x exists because we are using + if (x >= id) { + return id == x; } - case PREDICATE -> { - if (patY != 0) { - return id == patY; // can't jump or already on the right element - } - if (posY == nextY) { - return false; // no next element - } + x = id; + posY = adjY.find(x - 1); + posZ = adjZ.find(posY); + y = adjY.get(posY); + nextY = adjY.last(x - 1) + 1; + nextZ = adjZ.find(posY + 1); - long curr = this.adjY.get(posY); + return true; // we know x exists + } + case PREDICATE -> { + if (patY != 0) { + return id == patY; // can't jump or already on the right element + } - if (curr >= id) { - return curr == id; - } - if (posY + 1 == nextY) { - return false; // no next element - } + if (posY == nextY) { + return false; // no next element + } - long last = this.adjY.get(nextY - 1); + long curr = this.adjY.get(posY); + if (curr >= id) { + return curr == id; + } - boolean res; + boolean res; + if (posY + 1 == nextY) { + // no next element, go next X + x++; + posY = nextY; + nextY = adjY.findNext(posY) + 1; + y = adjY.get(posY); + + res = false; + } else { + long last = this.adjY.get(nextY - 1); if (last > id) { // binary search between curr <-> last id @@ -319,77 +330,79 @@ private boolean gotoOrder(long id, TripleComponentRole role) { posY = -loc - 1; y = adjY.get(posY); } - } else if (last != id) { - // last < id - GOTO end + 1 - posY = nextY; - res = false; } else { - // last == id - GOTO last - posY = nextY - 1; - y = adjY.get(posY); - res = true; + if (last != id) { + // last < id - GOTO end + 1 + posY = nextY; + res = false; + } else { + // last == id - GOTO last + posY = nextY - 1; + y = adjY.get(posY); + res = true; + } + nextY = adjY.findNext(posY) + 1; } + } - nextY = adjY.findNext(posY) + 1; - - // down to z/posZ/nextZ? - posZ = adjZ.find(posY, patZ); - nextZ = adjZ.findNext(posZ) + 1; + // down to z/posZ/nextZ? + posZ = adjZ.find(posY); // assert patZ != 0 + nextZ = adjZ.findNext(posZ) + 1; - return res; + return res; + } + case OBJECT -> { + if (patZ != 0) { + return id == patZ; // can't jump or already on the right element } - case OBJECT -> { - if (patZ != 0) { - return id == patZ; // can't jump or already on the right element - } - if (posZ == nextZ) { - return false; // no next element - } - - long curr = this.adjZ.get(posZ); + if (posZ == nextZ) { + return false; // no next element + } - if (curr >= id) { - return curr == id; - } - if (posZ + 1 == nextZ) { - return false; // no next element - } + long curr = this.adjZ.get(posZ); - long last = this.adjZ.get(nextZ - 1); + if (curr >= id) { + return curr == id; + } + if (posZ + 1 == nextZ) { + return false; // no next element + } + long last = this.adjZ.get(nextZ - 1); - boolean res; + boolean res; - if (last > id) { - // binary search between curr <-> last id - long loc = this.adjZ.searchLoc(id, posZ + 1, nextZ - 2); + if (last > id) { + // binary search between curr <-> last id + long loc = this.adjZ.searchLoc(id, posZ + 1, nextZ - 2); - if (loc >= 0) { //match - res = true; - posZ = loc; - //z = id; // no need to compute the z, it is only used in next() - } else { - res = false; - posZ = -loc - 1; - //z = adjZ.get(posZ); - } - } else if (last != id) { - // last < id - GOTO end - posZ = nextZ; - res = false; - } else { - // last == id - GOTO last - posZ = nextZ - 1; - //z = adjZ.get(posZ); + if (loc >= 0) { // match res = true; + posZ = loc; + // z = id; // no need to compute the z, it is only used in + // next() + } else { + res = false; + posZ = -loc - 1; + // z = adjZ.get(posZ); } + } else if (last != id) { + // last < id - GOTO end + posZ = nextZ; + res = false; + } else { + // last == id - GOTO last + posZ = nextZ - 1; + // z = adjZ.get(posZ); + res = true; + } - nextZ = adjZ.findNext(posZ) + 1; + nextZ = adjZ.findNext(posZ) + 1; - return res; - } - default -> throw new NotImplementedException("goto " + role); + return res; + } + default -> throw new NotImplementedException("goto " + role); } } @@ -402,6 +415,7 @@ public boolean gotoSubject(long id) { public boolean gotoPredicate(long id) { return gotoOrder(id, idx.getOrder().getPredicateMapping()); } + @Override public boolean gotoObject(long id) { return gotoOrder(id, idx.getOrder().getObjectMapping()); @@ -411,10 +425,12 @@ public boolean gotoObject(long id) { public boolean canGoToSubject() { return true; } + @Override public boolean canGoToPredicate() { return true; } + @Override public boolean canGoToObject() { return true; diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TriplesList.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TriplesList.java index bc0c7f2e2..4b6f522e2 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TriplesList.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TriplesList.java @@ -448,7 +448,6 @@ public TripleID next() { return triplesList.arrayOfTriples.get(pos++).asTripleID(); } - /* * (non-Javadoc) * @see hdt.iterator.IteratorTripleID#goToStart() diff --git a/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapQuadTriplesTest.java b/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapQuadTriplesTest.java index 09344fdfd..6d18a20a1 100644 --- a/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapQuadTriplesTest.java +++ b/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapQuadTriplesTest.java @@ -34,7 +34,6 @@ private static IteratorTripleID fromList(List lst) { private int current; private int lastLoc; - @Override public void goToStart() { current = 0; diff --git a/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorTest.java b/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorTest.java index 4842866cf..53c412b35 100644 --- a/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorTest.java +++ b/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorTest.java @@ -1,27 +1,34 @@ package com.the_qa_company.qendpoint.core.triples.impl; -import java.io.IOException; -import java.nio.file.Path; - -import com.the_qa_company.qendpoint.core.exceptions.NotFoundException; +import com.the_qa_company.qendpoint.core.enums.RDFNotation; +import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder; import com.the_qa_company.qendpoint.core.exceptions.ParserException; import com.the_qa_company.qendpoint.core.hdt.HDT; import com.the_qa_company.qendpoint.core.hdt.HDTManager; +import com.the_qa_company.qendpoint.core.listener.ProgressListener; import com.the_qa_company.qendpoint.core.options.HDTOptions; +import com.the_qa_company.qendpoint.core.options.HDTOptionsKeys; import com.the_qa_company.qendpoint.core.triples.IteratorTripleID; -import com.the_qa_company.qendpoint.core.triples.IteratorTripleString; import com.the_qa_company.qendpoint.core.triples.TripleID; import com.the_qa_company.qendpoint.core.util.LargeFakeDataSetStreamSupplier; import org.apache.commons.io.file.PathUtils; -import org.junit.Assert; import org.junit.Before; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Path; +import java.util.Random; + import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotEquals; +import static org.junit.Assert.assertSame; import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; public class BitmapTriplesIteratorTest { @@ -32,18 +39,6 @@ public class BitmapTriplesIteratorTest { public void setUp() throws Exception { } - @Test - public void test() throws IOException { -// HDT hdt = HDTManager.mapHDT("/Users/mck/hdt/swdf.hdt", null); -// -// int t = (int) hdt.getTriples().getNumberOfElements(); -// BitmapTriplesIterator it = new BitmapTriplesIterator((BitmapTriples) hdt.getTriples(), t-10, t); -// -// while(it.hasNext()) { -// System.out.println(it.next()); -// } - } - @Test public void jumpTest() throws IOException, ParserException { Path root = tempDir.newFolder().toPath(); @@ -60,13 +55,11 @@ public void jumpTest() throws IOException, ParserException { TripleID start = it.next().clone(); - for (int i = 0; i < 458; i++) { assertTrue(it.hasNext()); it.next(); } - TripleID lastTest = it.next().clone(); assertNotEquals(start, lastTest); @@ -88,7 +81,274 @@ public void jumpTest() throws IOException, ParserException { PathUtils.deleteDirectory(root); } + } + + private static final String JUMP_XYZ_DATASET = """ + @prefix ex: . + + ex:s1 ex:p1 ex:o0000, ex:o0001, ex:o0002, ex:o0003, ex:o0004, ex:o0005 ; + ex:p2 ex:o0000, ex:o0002, ex:o0003, ex:o0004, ex:o0005 ; + ex:p3 ex:o0000, ex:o0001, ex:o0002, ex:o0003, ex:o0004, ex:o0005 ; + ex:p4 ex:o0000, ex:o0001, ex:o0002, ex:o0004, ex:o0005 ; + ex:p5 ex:o0000, ex:o0001, ex:o0002, ex:o0003, ex:o0004, ex:o0005 . + + + ex:s2 ex:p1 ex:o0006, ex:o0007, ex:o0008, ex:o0009, ex:o0010, ex:o0011 ; + ex:p2 ex:o0008, ex:o0009, ex:o0010, ex:o0011 ; + ex:p3 ex:o0007, ex:o0008, ex:o0009, ex:o0010, ex:o0011 ; + ex:p4 ex:o0006, ex:o0007, ex:o0008, ex:o0009, ex:o0010, ex:o0011 . + + + ex:s3 ex:p1 ex:o0003, ex:o0005, ex:o0007, ex:o0009, ex:o0011, ex:o0015 ; + ex:p2 ex:o0003, ex:o0005, ex:o0007, ex:o0009, ex:o0011, ex:o0015 ; + ex:p3 ex:o0003, ex:o0005, ex:o0007, ex:o0009, ex:o0011, ex:o0015 ; + ex:p4 ex:o0003, ex:o0005, ex:o0007 ; + ex:p5 ex:o0003, ex:o0005, ex:o0007, ex:o0009, ex:o0011, ex:o0015 ; + ex:p6 ex:o0003, ex:o0007, ex:o0009, ex:o0011, ex:o0015 ; + ex:p7 ex:o0003, ex:o0005, ex:o0009, ex:o0011, ex:o0015 . + + + ex:s4 ex:p1 ex:o0003, ex:o0005, ex:o0007, ex:o0009, ex:o0011, ex:o0015 ; + ex:p2 ex:o0005, ex:o0007, ex:o0009, ex:o0011, ex:o0015 ; + ex:p3 ex:o0003, ex:o0005, ex:o0009, ex:o0011, ex:o0015 ; + ex:p4 ex:o0003, ex:o0005, ex:o0007, ex:o0009, ex:o0011, ex:o0015 ; + ex:p5 ex:o0003, ex:o0005, ex:o0007, ex:o0009 . + + """; + private static final long JUMP_XYZ_DATASET_X = 4; + private static final long JUMP_XYZ_DATASET_Y = 4; + + @Test + public void jumpXTest() throws IOException, ParserException { + Path root = tempDir.newFolder().toPath(); + + try { + + Path hdtPath = root.resolve("test.hdt"); + + HDTOptions spec = HDTOptions.of(HDTOptionsKeys.BITMAPTRIPLES_INDEX_OTHERS, "spo,sop,pos,pso,ops,osp", + HDTOptionsKeys.BITMAPTRIPLES_INDEX_NO_FOQ, true); + + try (HDT hdt = HDTManager.generateHDT( + new ByteArrayInputStream(JUMP_XYZ_DATASET.getBytes(StandardCharsets.UTF_8)), + LargeFakeDataSetStreamSupplier.BASE_URI, RDFNotation.TURTLE, spec, ProgressListener.ignore())) { + hdt.saveToHDT(hdtPath); + } + + try (HDT hdt = HDTManager.mapIndexedHDT(hdtPath, spec, ProgressListener.ignore())) { + + IteratorTripleID ittt = hdt.getTriples().searchAll(); + assertTrue("bad class: " + ittt.getClass(), ittt instanceof BitmapTriplesIterator); + + for (int sid = 1; sid <= JUMP_XYZ_DATASET_X; sid++) { + IteratorTripleID it = hdt.getTriples().searchAll(); + IteratorTripleID itex = hdt.getTriples().searchAll(); + + assertTrue(it.gotoSubject(sid)); + + assertEquals(sid, it.next().getSubject()); + + long s; + do { + assertTrue(itex.hasNext()); + s = itex.next().getSubject(); + } while (s < sid); + assertEquals(sid, s); + + assertTrue(it.hasNext()); + do { + TripleID ac = it.next(); + assertTrue(itex.hasNext()); + TripleID ex = itex.next(); + assertEquals(itex.getLastTriplePosition(), it.getLastTriplePosition()); + + assertEquals(ex, ac); + } while (it.hasNext()); + + assertFalse(itex.hasNext()); + } + } + } finally { + PathUtils.deleteDirectory(root); + } + } + + @Test + public void jumpYTest() throws IOException, ParserException { + Path root = tempDir.newFolder().toPath(); + + try { + + Path hdtPath = root.resolve("test.hdt"); + + HDTOptions spec = HDTOptions.of(HDTOptionsKeys.BITMAPTRIPLES_INDEX_OTHERS, "spo,sop,pos,pso,ops,osp", + HDTOptionsKeys.BITMAPTRIPLES_INDEX_NO_FOQ, true); + + try (HDT hdt = HDTManager.generateHDT( + new ByteArrayInputStream(JUMP_XYZ_DATASET.getBytes(StandardCharsets.UTF_8)), + LargeFakeDataSetStreamSupplier.BASE_URI, RDFNotation.TURTLE, spec, ProgressListener.ignore())) { + hdt.saveToHDT(hdtPath); + } + + try (HDT hdt = HDTManager.mapIndexedHDT(hdtPath, spec, ProgressListener.ignore())) { + + IteratorTripleID ittt = hdt.getTriples().searchAll(); + assertTrue("bad class: " + ittt.getClass(), ittt instanceof BitmapTriplesIterator); + + String lastPosData; + + for (int sid = 1; sid <= JUMP_XYZ_DATASET_X; sid++) { + for (int pid = 1; pid <= JUMP_XYZ_DATASET_Y; pid++) { + IteratorTripleID it = hdt.getTriples().searchAll(); + IteratorTripleID itex = hdt.getTriples().searchAll(); + + assertTrue(it.gotoSubject(sid)); + assertTrue(it.gotoPredicate(pid)); + + TripleID next = it.next(); + lastPosData = "[sid:" + sid + "/pid:" + pid + "][ac:" + it.getLastTriplePosition() + "/ex" + + itex.getLastTriplePosition() + "]" + next; + assertEquals("invalid pos: " + lastPosData, sid, next.getSubject()); + assertEquals("invalid pos: " + lastPosData, pid, next.getPredicate()); + + long s; + long p; + do { + assertTrue(itex.hasNext()); + TripleID next1 = itex.next(); + s = next1.getSubject(); + p = next1.getPredicate(); + lastPosData = "[sid:" + sid + "/pid:" + pid + "][ac:" + it.getLastTriplePosition() + "/ex" + + itex.getLastTriplePosition() + "]" + next1; + } while (s < sid || p < pid); + assertEquals(lastPosData, sid, s); + assertEquals(lastPosData, pid, p); + + assertTrue(it.hasNext()); + do { + TripleID ac = it.next(); + assertTrue(itex.hasNext()); + TripleID ex = itex.next(); + lastPosData = "[sid:" + sid + "/pid:" + pid + "][ac:" + it.getLastTriplePosition() + "/ex" + + itex.getLastTriplePosition() + "]" + ac + "/" + ex; + assertEquals(lastPosData, itex.getLastTriplePosition(), it.getLastTriplePosition()); + + assertEquals(lastPosData, ex, ac); + } while (it.hasNext()); + + assertFalse(itex.hasNext()); + } + } + } + } finally { + PathUtils.deleteDirectory(root); + } } + @Test + public void jumpXYZTest() throws IOException, ParserException { + Path root = tempDir.newFolder().toPath(); + + try { + Path hdtPath = root.resolve("test.hdt"); + + HDTOptions spec = HDTOptions.of(HDTOptionsKeys.BITMAPTRIPLES_INDEX_OTHERS, "spo,sop,pos,pso,ops,osp", + HDTOptionsKeys.BITMAPTRIPLES_INDEX_NO_FOQ, true); + final int count = 100_000; + LargeFakeDataSetStreamSupplier supplier = LargeFakeDataSetStreamSupplier + .createSupplierWithMaxTriples(count, 567890987).withMaxElementSplit(50).withMaxLiteralSize(20); + + supplier.createAndSaveFakeHDT(spec, hdtPath); + + Random rnd = new Random(34567); + + try (HDT hdt = HDTManager.mapIndexedHDT(hdtPath, spec, ProgressListener.ignore())) { + int elements = (int) hdt.getTriples().getNumberOfElements(); + + for (int i = 0; i < count; i++) { + int idx = rnd.nextInt(elements); + + IteratorTripleID it = hdt.getTriples().searchAll(); + + assertTrue(it.canGoTo()); + + it.goTo(idx); + + TripleID current = it.next().clone(); + assertEquals(idx, it.getLastTriplePosition()); + + for (int member = 0; member < 3; member++) { + IteratorTripleID itac = hdt.getTriples().searchAll(TripleComponentOrder.SPO.mask); + assertSame("invalid order (" + member + "/" + i + ")", itac.getOrder(), + TripleComponentOrder.SPO); + + // test subject + assertTrue("Can't jump to subject " + current + " (" + member + "/" + i + ")", + itac.canGoToSubject() && itac.gotoSubject(current.getSubject())); + + if (member >= 1) { + // test predicate + assertTrue("Can't jump to predicate " + current + " (" + member + "/" + i + ")", + itac.canGoToPredicate() && itac.gotoPredicate(current.getPredicate())); + + if (member >= 2) { + // test object + assertTrue("Can't jump to object " + current + " (" + member + "/" + i + ")", + itac.canGoToObject() && itac.gotoObject(current.getObject())); + } + } + + assertTrue("for " + current + " (" + member + "/" + i + ")", itac.hasNext()); + TripleID next = itac.next(); + String err = "invalid next " + next + " != " + current + " (" + member + "/" + i + ")"; + switch (member) { + case 2: // object + assertEquals("object err " + err, current.getObject(), next.getObject()); + case 1: // predicate + assertEquals("predicate err " + err, current.getPredicate(), next.getPredicate()); + case 0: // subject only + assertEquals("subject err " + err, current.getSubject(), next.getSubject()); + break; + default: + fail("bad member: " + member); + break; + } + if (member == 2) { + assertEquals("idx err " + err, idx, itac.getLastTriplePosition()); + TripleID newCurrent = itac.next(); + assertTrue("idx err " + err, idx < itac.getLastTriplePosition()); + + if (current.getSubject() == newCurrent.getSubject()) { + // no jump on X, we should have the sam + assertTrue("Can't jump to subject " + current + " (" + member + "/" + i + ")", + itac.gotoSubject(current.getSubject())); + + if (current.getPredicate() == newCurrent.getPredicate()) { + // no jump on Y, we should have the same + assertTrue("Can't jump to subject " + current + " (" + member + "/" + i + ")", + itac.gotoPredicate(current.getPredicate())); + + assertFalse("Can't jump to subject " + current + " (" + member + "/" + i + ")", + itac.gotoObject(current.getObject())); + } else { + assertFalse("Can't jump to subject " + current + " (" + member + "/" + i + ")", + itac.gotoPredicate(current.getPredicate())); + } + + } else { + assertFalse("Can't jump to subject " + current + " (" + member + "/" + i + ")", + itac.gotoSubject(current.getSubject())); + } + + } else { + assertTrue("idx err " + err, idx >= itac.getLastTriplePosition()); + } + } + } + } + } finally { + PathUtils.deleteDirectory(root); + } + } } From aea8d1e01d5c81063a4e28103d9db458ce4f4086 Mon Sep 17 00:00:00 2001 From: qaate47 Date: Mon, 2 Dec 2024 18:22:42 +0100 Subject: [PATCH 6/8] one more test and apply format --- .../impl/BitmapTriplesIteratorTest.java | 181 +++++++++++++++--- 1 file changed, 150 insertions(+), 31 deletions(-) diff --git a/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorTest.java b/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorTest.java index 53c412b35..2a13014fd 100644 --- a/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorTest.java +++ b/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorTest.java @@ -21,7 +21,6 @@ import java.io.IOException; import java.nio.charset.StandardCharsets; import java.nio.file.Path; -import java.util.Random; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; @@ -261,13 +260,10 @@ public void jumpXYZTest() throws IOException, ParserException { supplier.createAndSaveFakeHDT(spec, hdtPath); - Random rnd = new Random(34567); - try (HDT hdt = HDTManager.mapIndexedHDT(hdtPath, spec, ProgressListener.ignore())) { int elements = (int) hdt.getTriples().getNumberOfElements(); - for (int i = 0; i < count; i++) { - int idx = rnd.nextInt(elements); + for (int idx = 0; idx < elements; idx++) { IteratorTripleID it = hdt.getTriples().searchAll(); @@ -280,28 +276,28 @@ public void jumpXYZTest() throws IOException, ParserException { for (int member = 0; member < 3; member++) { IteratorTripleID itac = hdt.getTriples().searchAll(TripleComponentOrder.SPO.mask); - assertSame("invalid order (" + member + "/" + i + ")", itac.getOrder(), + assertSame("invalid order (" + member + "/" + idx + ")", itac.getOrder(), TripleComponentOrder.SPO); // test subject - assertTrue("Can't jump to subject " + current + " (" + member + "/" + i + ")", + assertTrue("Can't jump to subject " + current + " (" + member + "/" + idx + ")", itac.canGoToSubject() && itac.gotoSubject(current.getSubject())); if (member >= 1) { // test predicate - assertTrue("Can't jump to predicate " + current + " (" + member + "/" + i + ")", + assertTrue("Can't jump to predicate " + current + " (" + member + "/" + idx + ")", itac.canGoToPredicate() && itac.gotoPredicate(current.getPredicate())); if (member >= 2) { // test object - assertTrue("Can't jump to object " + current + " (" + member + "/" + i + ")", + assertTrue("Can't jump to object " + current + " (" + member + "/" + idx + ")", itac.canGoToObject() && itac.gotoObject(current.getObject())); } } - assertTrue("for " + current + " (" + member + "/" + i + ")", itac.hasNext()); + assertTrue("for " + current + " (" + member + "/" + idx + ")", itac.hasNext()); TripleID next = itac.next(); - String err = "invalid next " + next + " != " + current + " (" + member + "/" + i + ")"; + String err = "invalid next " + next + " != " + current + " (" + member + "/" + idx + ")"; switch (member) { case 2: // object assertEquals("object err " + err, current.getObject(), next.getObject()); @@ -316,29 +312,33 @@ public void jumpXYZTest() throws IOException, ParserException { } if (member == 2) { assertEquals("idx err " + err, idx, itac.getLastTriplePosition()); - TripleID newCurrent = itac.next(); - assertTrue("idx err " + err, idx < itac.getLastTriplePosition()); - - if (current.getSubject() == newCurrent.getSubject()) { - // no jump on X, we should have the sam - assertTrue("Can't jump to subject " + current + " (" + member + "/" + i + ")", - itac.gotoSubject(current.getSubject())); + if (itac.hasNext()) { + TripleID newCurrent = itac.next(); + assertTrue("idx err " + err, idx < itac.getLastTriplePosition()); + + if (current.getSubject() == newCurrent.getSubject()) { + // no jump on X, we should have the sam + assertTrue("Can't jump to subject " + current + " (" + member + "/" + idx + ")", + itac.gotoSubject(current.getSubject())); + + if (current.getPredicate() == newCurrent.getPredicate()) { + // no jump on Y, we should have the same + assertTrue("Can't jump to subject " + current + " (" + member + "/" + idx + ")", + itac.gotoPredicate(current.getPredicate())); + + assertFalse( + "Can't jump to subject " + current + " (" + member + "/" + idx + ")", + itac.gotoObject(current.getObject())); + } else { + assertFalse( + "Can't jump to subject " + current + " (" + member + "/" + idx + ")", + itac.gotoPredicate(current.getPredicate())); + } - if (current.getPredicate() == newCurrent.getPredicate()) { - // no jump on Y, we should have the same - assertTrue("Can't jump to subject " + current + " (" + member + "/" + i + ")", - itac.gotoPredicate(current.getPredicate())); - - assertFalse("Can't jump to subject " + current + " (" + member + "/" + i + ")", - itac.gotoObject(current.getObject())); } else { - assertFalse("Can't jump to subject " + current + " (" + member + "/" + i + ")", - itac.gotoPredicate(current.getPredicate())); + assertFalse("Can't jump to subject " + current + " (" + member + "/" + idx + ")", + itac.gotoSubject(current.getSubject())); } - - } else { - assertFalse("Can't jump to subject " + current + " (" + member + "/" + i + ")", - itac.gotoSubject(current.getSubject())); } } else { @@ -351,4 +351,123 @@ public void jumpXYZTest() throws IOException, ParserException { PathUtils.deleteDirectory(root); } } + + @Test + public void jumpXYZNextTest() throws IOException, ParserException { + Path root = tempDir.newFolder().toPath(); + + try { + Path hdtPath = root.resolve("test.hdt"); + + HDTOptions spec = HDTOptions.of(HDTOptionsKeys.BITMAPTRIPLES_INDEX_OTHERS, "spo,sop,pos,pso,ops,osp", + HDTOptionsKeys.BITMAPTRIPLES_INDEX_NO_FOQ, true); + final int count = 10_000; + LargeFakeDataSetStreamSupplier supplier = LargeFakeDataSetStreamSupplier + .createSupplierWithMaxTriples(count, 567890987).withMaxElementSplit(50).withMaxLiteralSize(20); + + supplier.createAndSaveFakeHDT(spec, hdtPath); + + try (HDT hdt = HDTManager.mapIndexedHDT(hdtPath, spec, ProgressListener.ignore())) { + int elements = (int) hdt.getTriples().getNumberOfElements(); + for (int idx = 0; idx < elements; idx++) { + + IteratorTripleID it = hdt.getTriples().searchAll(); + + assertTrue(it.canGoTo()); + + it.goTo(idx); + + TripleID current = it.next().clone(); + assertEquals(idx, it.getLastTriplePosition()); + + nextCountLoop: + for (int nextCount = 0; nextCount < 10; nextCount++) { + for (int member = 1; member < 3; member++) { + String memberInfo = " (" + member + "/" + idx + "/" + nextCount + ")"; + IteratorTripleID itac = hdt.getTriples().searchAll(TripleComponentOrder.SPO.mask); + assertSame("invalid order" + memberInfo, itac.getOrder(), TripleComponentOrder.SPO); + + // test subject + assertTrue("Can't jump to subject " + current + memberInfo, + itac.canGoToSubject() && itac.gotoSubject(current.getSubject())); + + for (int j = 0; j < nextCount; j++) { + assertTrue(itac.hasNext()); + TripleID pvid = itac.next(); + + if (itac.getLastTriplePosition() == idx) { + assertEquals(pvid, current); + break nextCountLoop; // we consumed the one + // we were searching + // for, it can't be + // used + } + } + + // test predicate + assertTrue("Can't jump to predicate " + current + memberInfo, + itac.canGoToPredicate() && itac.gotoPredicate(current.getPredicate())); + + if (member >= 2) { + // test object + assertTrue("Can't jump to object " + current + memberInfo, + itac.canGoToObject() && itac.gotoObject(current.getObject())); + } + + assertTrue("for " + current + memberInfo, itac.hasNext()); + TripleID next = itac.next(); + String err = "invalid next " + next + " != " + current + memberInfo; + switch (member) { + case 2: // object + assertEquals("object err " + err, current.getObject(), next.getObject()); + case 1: // predicate + assertEquals("predicate err " + err, current.getPredicate(), next.getPredicate()); + case 0: // subject only + assertEquals("subject err " + err, current.getSubject(), next.getSubject()); + break; + default: + fail("bad member: " + member); + break; + } + if (member == 2) { + assertEquals("idx err " + err, idx, itac.getLastTriplePosition()); + if (itac.hasNext()) { + TripleID newCurrent = itac.next(); + assertTrue("idx err " + err, idx < itac.getLastTriplePosition()); + + if (current.getSubject() == newCurrent.getSubject()) { + // no jump on X, we should have the sam + assertTrue("Can't jump to subject " + current + memberInfo + newCurrent, + itac.gotoSubject(current.getSubject())); + + if (current.getPredicate() == newCurrent.getPredicate()) { + // no jump on Y, we should have the + // same + assertTrue("Can't jump to subject " + current + memberInfo + newCurrent, + itac.gotoPredicate(current.getPredicate())); + + assertFalse("Can't jump to subject " + current + memberInfo + newCurrent, + itac.gotoObject(current.getObject())); + } else { + assertFalse("Can't jump to subject " + current + memberInfo + newCurrent, + itac.gotoPredicate(current.getPredicate())); + } + } else { + assertFalse("Can't jump to subject " + current + memberInfo + newCurrent, + itac.gotoSubject(current.getSubject())); + } + } + + } else { + assertTrue("idx err " + err, idx >= itac.getLastTriplePosition()); + } + } + } + + } + } + } finally { + PathUtils.deleteDirectory(root); + } + } } From 2c228ce37ab6684fae89adc3bd3c557e80876497 Mon Sep 17 00:00:00 2001 From: qaate47 Date: Wed, 4 Dec 2024 18:12:52 +0100 Subject: [PATCH 7/8] add time when reindexing lucenes --- .../qendpoint/core/util/StopWatch.java | 4 ++ .../qendpoint/compiler/CompiledSail.java | 50 +++++++++---------- 2 files changed, 27 insertions(+), 27 deletions(-) diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/StopWatch.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/StopWatch.java index 46fc58a77..41d0843e1 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/StopWatch.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/StopWatch.java @@ -35,6 +35,10 @@ public long getMeasure() { return end - ini; } + public long getMeasureMillis() { + return (end - ini) / 1_000_000; + } + public long stopAndGet() { stop(); return getMeasure(); diff --git a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/compiler/CompiledSail.java b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/compiler/CompiledSail.java index 215ea43e7..96d889e90 100644 --- a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/compiler/CompiledSail.java +++ b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/compiler/CompiledSail.java @@ -6,6 +6,7 @@ import com.the_qa_company.qendpoint.core.hdt.HDTManager; import com.the_qa_company.qendpoint.core.options.HDTOptions; import com.the_qa_company.qendpoint.core.triples.TripleString; +import com.the_qa_company.qendpoint.core.util.StopWatch; import com.the_qa_company.qendpoint.store.EndpointFiles; import com.the_qa_company.qendpoint.store.EndpointStore; import com.the_qa_company.qendpoint.store.exception.EndpointStoreException; @@ -216,6 +217,26 @@ public NotifyingSail getSource() { return source; } + private void reindexSail(LuceneSail sail) { + // bypass filtering system to use the source + NotifyingSail oldSail = sail.getBaseSail(); + try { + sail.setBaseSail(source); + String indexId = sail.getParameter(LuceneSail.INDEX_ID); + if (indexId == null || indexId.isEmpty()) { + indexId = ""; + } + StopWatch sw = new StopWatch(); + sw.reset(); + logger.info("Reindexing sail {}", indexId); + sail.reindex(); + sw.stop(); + logger.info("Sail {} reindexed in {} ({}ms)", indexId, sw, sw.getMeasureMillis()); + } finally { + sail.setBaseSail(oldSail); + } + } + /** * reindex all the compiled lucene sails * @@ -225,19 +246,7 @@ public NotifyingSail getSource() { public void reindexLuceneSails() throws SailException { for (LuceneSail sail : luceneSails) { // bypass filtering system to use the source - NotifyingSail oldSail = sail.getBaseSail(); - try { - sail.setBaseSail(source); - String indexId = sail.getParameter(LuceneSail.INDEX_ID); - if (indexId == null || indexId.isEmpty()) { - indexId = "no id"; - } - logger.info("Reindexing sail: {}", indexId); - sail.reindex(); - } finally { - sail.setBaseSail(oldSail); - } - + reindexSail(sail); } } @@ -254,20 +263,7 @@ public void reindexLuceneSail(String index) throws SailException { if (!index.equals(sail.getParameter(LuceneSail.INDEX_ID))) { continue; // ignore } - // bypass filtering system to use the source - NotifyingSail oldSail = sail.getBaseSail(); - try { - sail.setBaseSail(source); - String indexId = sail.getParameter(LuceneSail.INDEX_ID); - if (indexId == null || indexId.isEmpty()) { - indexId = "no id"; - } - logger.info("Reindexing sail: {}", indexId); - sail.reindex(); - } finally { - sail.setBaseSail(oldSail); - } - + reindexSail(sail); } } From dda0cc13503216155ded940e6043be881a86e6ba Mon Sep 17 00:00:00 2001 From: qaate47 Date: Mon, 9 Dec 2024 11:12:41 +0100 Subject: [PATCH 8/8] add profiling data [skip ci] --- .../qendpoint/controller/Sparql.java | 45 +++++++++++++++++++ .../qendpoint/core/hdt/HDTManagerImpl.java | 9 ++++ .../core/options/HDTOptionsKeys.java | 6 +++ 3 files changed, 60 insertions(+) diff --git a/qendpoint-backend/src/main/java/com/the_qa_company/qendpoint/controller/Sparql.java b/qendpoint-backend/src/main/java/com/the_qa_company/qendpoint/controller/Sparql.java index ed9223eef..fd572ac06 100644 --- a/qendpoint-backend/src/main/java/com/the_qa_company/qendpoint/controller/Sparql.java +++ b/qendpoint-backend/src/main/java/com/the_qa_company/qendpoint/controller/Sparql.java @@ -10,10 +10,12 @@ import com.the_qa_company.qendpoint.store.EndpointFiles; import com.the_qa_company.qendpoint.store.EndpointStore; import com.the_qa_company.qendpoint.store.EndpointStoreUtils; +import com.the_qa_company.qendpoint.store.HDTProps; import com.the_qa_company.qendpoint.utils.FileUtils; import com.the_qa_company.qendpoint.utils.RDFStreamUtils; import jakarta.annotation.PostConstruct; import jakarta.annotation.PreDestroy; +import org.apache.lucene.index.IndexReader; import org.eclipse.rdf4j.model.Statement; import org.eclipse.rdf4j.model.util.Values; import org.eclipse.rdf4j.repository.RepositoryConnection; @@ -30,6 +32,8 @@ import com.the_qa_company.qendpoint.core.util.io.CloseSuppressPath; import com.the_qa_company.qendpoint.core.util.io.IOUtil; import org.eclipse.rdf4j.sail.lucene.LuceneSail; +import org.eclipse.rdf4j.sail.lucene.SearchIndex; +import org.eclipse.rdf4j.sail.lucene.impl.LuceneIndex; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Value; @@ -53,6 +57,7 @@ import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.TreeMap; import java.util.function.Consumer; import java.util.stream.Collectors; @@ -342,6 +347,46 @@ void initializeEndpointStore(boolean finishLoading) throws IOException { CompiledSailOptions opt = sparqlRepository.getOptions(); port = opt.getPort(); } + + if (endpoint != null) { + HDTProps props = endpoint.getHdtProps(); + long bnCount = props.getEndBlankObjects() - props.getStartBlankObjects() // obj + + props.getEndBlankShared() - props.getStartBlankShared() // shared + + props.getEndBlankSubjects() - props.getStartBlankSubjects(); // subj + long literals = props.getEndLiteral() - props.getStartLiteral(); + logger.info("Index props: Lit:{} bn:{}", literals, bnCount); + } + Set lcs = sparqlRepository.getLuceneSails(); + if (!lcs.isEmpty()) { + logger.info("Lucene sails ({})", lcs.size()); + + final int maxCount = 5; + Iterator it = lcs.iterator(); + for (int i = 0; i < Math.min(maxCount, lcs.size()); i++) { + if (!it.hasNext()) + break; + LuceneSail lc = it.next(); + + String id = lc.getParameter(LuceneSail.INDEX_ID); + if (id == null || id.isEmpty()) + id = ""; + SearchIndex lcIdx = lc.getLuceneIndex(); + String infoStr = lcIdx.getClass().getSimpleName(); + if (lcIdx instanceof LuceneIndex li) { + IndexReader reader = li.getIndexReader(); + int numDocs = reader.numDocs(); + infoStr += " numDocs*Fields:" + numDocs + "*" + li.getIndexWriter().getFieldNames().size(); + } else { + infoStr += " no data"; // add ES/Solr?? + } + logger.info("{} {}", id, infoStr); + } + if (lcs.size() > maxCount) { + logger.info("..."); + } + + } + } if (finishLoading) { completeLoading(); diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/HDTManagerImpl.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/HDTManagerImpl.java index c1e2b78eb..cdb3a156a 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/HDTManagerImpl.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/HDTManagerImpl.java @@ -185,6 +185,15 @@ private HDTResult generateChecksumAfter(long checksum, Path checksumPath, HDTOpt public HDTResult doGenerateHDT(String rdfFileName, String baseURI, RDFNotation rdfNotation, HDTOptions spec, ProgressListener listener) throws IOException, ParserException { // choose the importer + long waitTimeStart = spec.getInt(HDTOptionsKeys.LOADER_WAIT_START, 0); + if (waitTimeStart > 0) { + logger.info("Waiting {}ms before start...", waitTimeStart); + try { + Thread.sleep(waitTimeStart); + } catch (InterruptedException ignore) { + } + logger.info("Done waiting"); + } String loaderType = spec.get(HDTOptionsKeys.LOADER_TYPE_KEY); TempHDTImporter loader; boolean isQuad = rdfNotation == RDFNotation.NQUAD; diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/options/HDTOptionsKeys.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/options/HDTOptionsKeys.java index 6b61e551b..73a2c799b 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/options/HDTOptionsKeys.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/options/HDTOptionsKeys.java @@ -303,6 +303,12 @@ public class HDTOptionsKeys { */ @Key(type = Key.Type.ENUM, desc = "loading type for HDTCat / HDTDiff") public static final String LOAD_HDT_TYPE_KEY = "loader.hdt.type"; + + /** + * Add time before starting the indexing, in ms, default 0 + */ + @Key(type = Key.Type.NUMBER, desc = "Add time before starting the indexing, in ms") + public static final String LOADER_WAIT_START = "loader.waitStart"; /** * load the HDT file into memory */