From f5bb5cecb700a8e550aedc22d37a514eba85887a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Thu, 5 Dec 2024 11:12:25 +0100 Subject: [PATCH 1/6] we need to specify the version of jackson --- qendpoint-backend/pom.xml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/qendpoint-backend/pom.xml b/qendpoint-backend/pom.xml index 6b75e331..156c3185 100644 --- a/qendpoint-backend/pom.xml +++ b/qendpoint-backend/pom.xml @@ -46,12 +46,23 @@ 5.0.2 3.4.0 1.5.6 + 2.18.1 UTF-8 UTF-8 + + com.fasterxml.jackson.core + jackson-databind + ${jackson.version} + + + com.fasterxml.jackson.core + jackson-core + ${jackson.version} + ch.qos.logback logback-core From 5a459122d143da88fb0d7ff5de62c3cff6881604 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Wed, 27 Nov 2024 12:30:19 +0100 Subject: [PATCH 2/6] GH-528 improve binary search in super blocks by keeping track of the estimated location of values in the underlying long array. This assumes that the values are ordered. --- .../core/compact/bitmap/Bitmap375Big.java | 70 +++++++++++++- .../core/util/disk/AbstractLongArray.java | 94 +++++++++++++++++++ .../core/util/disk/LargeLongArray.java | 5 +- .../qendpoint/core/util/disk/LongArray.java | 86 +++++++++++++++++ .../core/util/disk/LongArrayDisk.java | 4 +- .../core/util/disk/SimpleLongArray.java | 5 +- .../core/util/disk/SimpleSplitLongArray.java | 5 +- .../core/util/disk/SyncLongArray.java | 46 +++++++++ .../io/compress/WriteLongArrayBuffer.java | 5 +- 9 files changed, 306 insertions(+), 14 deletions(-) create mode 100644 qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/AbstractLongArray.java diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/Bitmap375Big.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/Bitmap375Big.java index 113501c2..01cb9c21 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/Bitmap375Big.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/Bitmap375Big.java @@ -23,6 +23,8 @@ import com.the_qa_company.qendpoint.core.util.io.CloseSuppressPath; import com.the_qa_company.qendpoint.core.util.io.Closer; import com.the_qa_company.qendpoint.core.util.io.IOUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.Closeable; import java.io.IOException; @@ -39,6 +41,24 @@ * @author mario.arias */ public class Bitmap375Big extends Bitmap64Big { + + private static final Logger logger = LoggerFactory.getLogger(Bitmap375Big.class); + + private static final boolean oldBinarySearch; + + static { + // check if the system property "useOldBinarySeearch" is set to true + String useOldBinarySearch = System.getProperty("useOldBinarySearch"); + if (useOldBinarySearch != null && useOldBinarySearch.equalsIgnoreCase("true")) { + oldBinarySearch = true; + logger.debug("Using old binary search"); + } else { + logger.debug("Using new binary search"); + oldBinarySearch = false; + } + + } + /** * create disk version bitmap with in memory super index * @@ -46,7 +66,6 @@ public class Bitmap375Big extends Bitmap64Big { * @param nbits number of bits * @return bitmap */ - public static Bitmap375Big disk(Path location, long nbits) { return disk(location, nbits, false); } @@ -181,6 +200,7 @@ public void updateIndex() { } pop = countSuperBlock + countBlock; indexUpToDate = true; + superBlocks.recalculateEstimatedValueLocation(); } /* @@ -189,8 +209,9 @@ public void updateIndex() { */ @Override public boolean access(long bitIndex) { - if (bitIndex < 0) + if (bitIndex < 0) { throw new IndexOutOfBoundsException("bitIndex < 0: " + bitIndex); + } long wordIndex = wordIndex(bitIndex); if (wordIndex >= words.length()) { @@ -324,7 +345,7 @@ public long select1(long x) { return 0; } // Search superblock (binary Search) - long superBlockIndex = binarySearch(superBlocks, x); + long superBlockIndex = oldBinarySearch ? binarySearch(superBlocks, x) : binarySearchNew(superBlocks, x); // If there is a run of many zeros, two correlative superblocks may have // the same value, @@ -332,7 +353,6 @@ public long select1(long x) { while (superBlockIndex > 0 && (superBlocks.get(superBlockIndex) >= x)) { superBlockIndex--; - } long countdown = x - superBlocks.get(superBlockIndex); @@ -444,6 +464,7 @@ public static long binarySearch0(LongArray arr, long fromIndex, long toIndex, lo * @param val val * @return index */ + public static long binarySearch(LongArray arr, long val) { long min = 0, max = arr.length(), mid; @@ -460,6 +481,42 @@ public static long binarySearch(LongArray arr, long val) { return min; } + public static long binarySearchNew(LongArray arr, long val) { + + long min = arr.getEstimatedLocationLowerBound(val); + long max = arr.getEstimatedLocationUpperBound(val); + long mid = arr.getEstimatedLocation(val, min, max); + + int i = 0; + while (min + 1 < max) { + // After the first iteration, the value that we are looking for is + // typically very close to the min value. Using linear search for + // the next two iterations improves the chances that we find the + // value faster than with binary search. + if (i == 1 || i == 2) { + long v = arr.get(min + 1); + if (v >= val) { + max = min + 1; + } else { + min = min + 1; + } + } else { + long v = arr.get(mid); + if (v >= val) { + max = mid; + } else { + min = mid; + } + } + mid = (min + max) / 2; + i++; + } + + arr.updateEstimatedValueLocation(val, min); + + return min; + } + public CloseSuppressPath getBlocksPath() { return blocksPath; } @@ -467,4 +524,9 @@ public CloseSuppressPath getBlocksPath() { public CloseSuppressPath getSuperBlocksPath() { return superBlocksPath; } + + @Override + public String toString() { + return "Bitmap375Big{}"; + } } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/AbstractLongArray.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/AbstractLongArray.java new file mode 100644 index 00000000..e6ef7a44 --- /dev/null +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/AbstractLongArray.java @@ -0,0 +1,94 @@ +package com.the_qa_company.qendpoint.core.util.disk; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public abstract class AbstractLongArray implements LongArray { + + private final Logger logger = LoggerFactory.getLogger(getClass()); + + private final int ESTIMATED_LOCATION_ARRAY_SIZE = 1024 * 128; + + // should take about 1MB per array when PREV_FOUND_SIZE is 1024 * 128 + private final long[] estimatedLocationMax = new long[ESTIMATED_LOCATION_ARRAY_SIZE]; + private final long[] estimatedLocationMin = new long[ESTIMATED_LOCATION_ARRAY_SIZE]; + private final long[] estimatedLocation = new long[ESTIMATED_LOCATION_ARRAY_SIZE]; + + private int estimatedLocationBucketSize; + + long maxValue = 1; + + @Override + public int getEstimatedLocationArrayBucketSize() { + return estimatedLocationBucketSize; + } + + private void updateEstimatedLocationArrayBucketSize() { + int minBucketSize = (int) (maxValue / ESTIMATED_LOCATION_ARRAY_SIZE); + // we want to have the next power of 2 + int next = 1; + while (next < minBucketSize) { + next <<= 1; + } + this.estimatedLocationBucketSize = next; + } + + @Override + public long[] getEstimatedLocationArray() { + return estimatedLocation; + } + + @Override + public long[] getEstimatedLocationArrayMin() { + return estimatedLocationMin; + } + + @Override + public long[] getEstimatedLocationArrayMax() { + return estimatedLocationMax; + } + + @Override + public void recalculateEstimatedValueLocation() { + updateEstimatedLocationArrayBucketSize(); + int estimatedLocationBucketSize = getEstimatedLocationArrayBucketSize(); + long len = length(); + boolean shouldLog = len > 1024 * 1024 * 2; + if (shouldLog) { + logger.info("Recalculating estimated location array 0%"); + } + + for (int i = 0; i < len; i++) { + long val = get(i); + if (val == 0) { + continue; + } + + int index = (int) (val / estimatedLocationBucketSize + 1); + estimatedLocationMax[index] = Math.max(estimatedLocationMax[index], i); + if (estimatedLocationMin[index] == 0) { + estimatedLocationMin[index] = i; + } else { + estimatedLocationMin[index] = Math.min(estimatedLocationMin[index], i); + } + estimatedLocation[index] = (estimatedLocationMax[index] + estimatedLocationMin[index]) / 2; + + if (shouldLog && i % (1024 * 1024) == 0) { + logger.info("Recalculating estimated location array {}%", (int) Math.floor(100.0 / len * i)); + } + } + + if (shouldLog) { + logger.info("Recalculating estimated location array 100%"); + } + } + + @Override + public final void set(long index, long value) { + maxValue = Math.max(maxValue, value); + innerSet(index, value); + } + + abstract protected void innerSet(long index, long value); + +} diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/LargeLongArray.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/LargeLongArray.java index f0b0e874..b655f1dd 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/LargeLongArray.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/LargeLongArray.java @@ -10,7 +10,7 @@ * * @author Antoine Willerval */ -public class LargeLongArray implements LongArray { +public class LargeLongArray extends AbstractLongArray { private UnsafeLongArray array; /** @@ -26,7 +26,7 @@ public long get(long index) { } @Override - public void set(long index, long value) { + protected void innerSet(long index, long value) { array.set(index, value); } @@ -55,4 +55,5 @@ public void resize(long newSize) throws IOException { public void clear() { array.clear(); } + } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/LongArray.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/LongArray.java index 77b0add5..aa9ca68c 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/LongArray.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/LongArray.java @@ -1,6 +1,8 @@ package com.the_qa_company.qendpoint.core.util.disk; import com.the_qa_company.qendpoint.core.util.io.IOUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.IOException; import java.util.Iterator; @@ -10,6 +12,10 @@ * Describe a large array of longs */ public interface LongArray extends Iterable { + + Logger logger = LoggerFactory.getLogger(LongArray.class); + long[] EMPTY_ARRAY = new long[0]; + /** * create an in memory long array * @@ -208,4 +214,84 @@ public Long next() { } }; } + + /** + * @return the estimated location array that contains the highest location + * for a given value + */ + default long[] getEstimatedLocationArrayMax() { + return getEstimatedLocationArray(); + } + + /** + * @return the estimated location array that contains the lowest location + * for a given value + */ + default long[] getEstimatedLocationArrayMin() { + return getEstimatedLocationArray(); + } + + /** + * @return the estimated location array + */ + default long[] getEstimatedLocationArray() { + return EMPTY_ARRAY; + } + + default int getEstimatedLocationArrayBucketSize() { + return 65536; + } + + default long getEstimatedLocationLowerBound(long val) { + int index = (int) (val / getEstimatedLocationArrayBucketSize() + 1); + if (index - 1 >= 0) { + long t = getEstimatedLocationArrayMax()[index - 1]; + if (t > 0) { + return t; + } + } + return 0; + } + + default long getEstimatedLocationUpperBound(long val) { + int index = (int) (val / getEstimatedLocationArrayBucketSize() + 1); + long[] estimatedLocationMin = getEstimatedLocationArrayMin(); + if (index + 1 < estimatedLocationMin.length) { + long t = estimatedLocationMin[index + 1]; + if (t > 0) { + return Math.min(length(), t); + } + } + + return length(); + } + + default long getEstimatedLocation(long val, long min, long max) { + int index = (int) (val / getEstimatedLocationArrayBucketSize() + 1); + var estimatedLocation = getEstimatedLocationArray(); + + if (index >= estimatedLocation.length) { + return (min + max) / 2; + } + long t = estimatedLocation[index]; + if (t > min && t < max) { + return t; + } else { + return (min + max) / 2; + } + } + + default void recalculateEstimatedValueLocation() { + logger.info("Class {} does not support recalculateEstimatedValueLocation()", + this.getClass().getCanonicalName()); + } + + default void updateEstimatedValueLocation(long val, long min) { + int index = (int) (val / getEstimatedLocationArrayBucketSize() + 1); + long[] estimatedLocation = getEstimatedLocationArray(); + if (index >= estimatedLocation.length) { + return; + } + estimatedLocation[index] = min; + } } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/LongArrayDisk.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/LongArrayDisk.java index 1b10eaed..e4bf98b0 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/LongArrayDisk.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/LongArrayDisk.java @@ -33,7 +33,7 @@ //Implementing an array of longs that is backed up on disk. Following this: http://vanillajava.blogspot.fr/2011/12/using-memory-mapped-file-for-huge.html -public class LongArrayDisk implements Closeable, LongArray { +public class LongArrayDisk extends AbstractLongArray implements Closeable { private static final long MAPPING_SIZE = 1 << 30; private final boolean closeChannel; private final FileChannel channel; @@ -137,7 +137,7 @@ public long get(long x) { } @Override - public void set(long index, long value) { + protected void innerSet(long index, long value) { if (index >= size || index < 0) { throw new IndexOutOfBoundsException(); } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/SimpleLongArray.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/SimpleLongArray.java index b818cc9a..a86951d0 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/SimpleLongArray.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/SimpleLongArray.java @@ -3,7 +3,7 @@ import java.io.IOException; import java.util.Arrays; -public class SimpleLongArray implements LongArray { +public class SimpleLongArray extends AbstractLongArray { public static LongArray of(int size) { return wrapper(new long[size]); @@ -28,7 +28,7 @@ public long get(long index) { } @Override - public void set(long index, long value) { + protected void innerSet(long index, long value) { if (index < 0 || index >= array.length) { throw new IndexOutOfBoundsException(index + " < 0 || " + index + " > " + array.length); } @@ -60,4 +60,5 @@ public void resize(long newSize) throws IOException { public void clear() { Arrays.fill(array, 0); } + } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/SimpleSplitLongArray.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/SimpleSplitLongArray.java index b9e2ab8f..982485f6 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/SimpleSplitLongArray.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/SimpleSplitLongArray.java @@ -10,7 +10,7 @@ /** * Implementation of LongArray for simple int64 splits */ -public class SimpleSplitLongArray implements LongArray, Closeable { +public class SimpleSplitLongArray extends AbstractLongArray implements Closeable { final LongArray array; private final int shift; private final long max; @@ -30,6 +30,7 @@ private SimpleSplitLongArray(LongArray array, int numbits, long size) { max = (~0L) >>> (64 - numbits); indexMask = (1 << shift) - 1; this.numbits = numbits; + } public static SimpleSplitLongArray int8Array(long size) { @@ -80,7 +81,7 @@ public long get(long index) { } @Override - public void set(long index, long value) { + public void innerSet(long index, long value) { long rindex = index >>> shift; int sindex = (int) (index & indexMask) << (6 - shift); diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/SyncLongArray.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/SyncLongArray.java index 9d2ccf53..80f7dc1e 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/SyncLongArray.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/SyncLongArray.java @@ -8,6 +8,7 @@ * @author Antoine Willerval */ public class SyncLongArray implements LongArray { + /** * Sync a long array * @@ -57,4 +58,49 @@ public synchronized void resize(long newSize) throws IOException { public synchronized void clear() { array.clear(); } + + @Override + public void updateEstimatedValueLocation(long val, long min) { + array.updateEstimatedValueLocation(val, min); + } + + @Override + public void recalculateEstimatedValueLocation() { + array.recalculateEstimatedValueLocation(); + } + + @Override + public long getEstimatedLocation(long val, long min, long max) { + return array.getEstimatedLocation(val, min, max); + } + + @Override + public long getEstimatedLocationUpperBound(long val) { + return array.getEstimatedLocationUpperBound(val); + } + + @Override + public long getEstimatedLocationLowerBound(long val) { + return array.getEstimatedLocationLowerBound(val); + } + + @Override + public int getEstimatedLocationArrayBucketSize() { + return array.getEstimatedLocationArrayBucketSize(); + } + + @Override + public long[] getEstimatedLocationArray() { + return array.getEstimatedLocationArray(); + } + + @Override + public long[] getEstimatedLocationArrayMin() { + return array.getEstimatedLocationArrayMin(); + } + + @Override + public long[] getEstimatedLocationArrayMax() { + return array.getEstimatedLocationArrayMax(); + } } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/compress/WriteLongArrayBuffer.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/compress/WriteLongArrayBuffer.java index 6502676f..09e436b1 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/compress/WriteLongArrayBuffer.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/compress/WriteLongArrayBuffer.java @@ -1,6 +1,7 @@ package com.the_qa_company.qendpoint.core.util.io.compress; import com.the_qa_company.qendpoint.core.util.BitUtil; +import com.the_qa_company.qendpoint.core.util.disk.AbstractLongArray; import com.the_qa_company.qendpoint.core.util.disk.LongArray; import java.io.Closeable; @@ -13,7 +14,7 @@ * * @author Antoine Willerval */ -public class WriteLongArrayBuffer implements LongArray, Closeable { +public class WriteLongArrayBuffer extends AbstractLongArray implements Closeable { // debug field private static final boolean DISABLE_BUFFER = true; private final LongArray array; @@ -132,7 +133,7 @@ public long get(long index) { * @param value the value to set */ @Override - public void set(long index, long value) { + protected void innerSet(long index, long value) { if (DISABLE_BUFFER) { array.set(index, value); return; From 9b7e6e27ed933b9b215972c8d50dbe526dc61f70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Thu, 5 Dec 2024 12:39:39 +0100 Subject: [PATCH 3/6] improved bit shifting in select1 by using Long.numberOfTrailingZeroes --- .../com/the_qa_company/qendpoint/core/util/BitUtil.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/BitUtil.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/BitUtil.java index c2e55eac..535a42cd 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/BitUtil.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/BitUtil.java @@ -64,9 +64,10 @@ public static void writeLowerBitsByteAligned(long value, long numbits, OutputStr public static int select1(long value, int rank) { int bitpos = 0; while (rank > 0 && value != 0) { - rank -= value & 1; - bitpos++; - value >>>= 1; + int trailingZeros = Long.numberOfTrailingZeros(value); + bitpos += trailingZeros + 1; + value >>>= trailingZeros + 1; + rank--; } return bitpos; } From 4d56b27510b34f1022ba1f1ea29fafddf3be1346 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Thu, 5 Dec 2024 13:23:12 +0100 Subject: [PATCH 4/6] only create the new Values if we are going to query the native store --- .../qendpoint/store/EndpointTripleSource.java | 114 ++++++++++-------- 1 file changed, 64 insertions(+), 50 deletions(-) diff --git a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/store/EndpointTripleSource.java b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/store/EndpointTripleSource.java index fb3f40f7..131a687f 100644 --- a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/store/EndpointTripleSource.java +++ b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/store/EndpointTripleSource.java @@ -37,6 +37,7 @@ public class EndpointTripleSource implements TripleSource { private static final Logger logger = LoggerFactory.getLogger(EndpointTripleSource.class); + public static final EmptyIteration EMPTY_ITERATION = new EmptyIteration<>(); private final EndpointStore endpoint; private long numberOfCurrentTriples; // count the number of times rdf4j is called within a triple pattern.. @@ -95,68 +96,22 @@ public CloseableIteration getStatements(StatementOrder stat boolean graph = endpoint.getHdt().getDictionary().supportGraphs(); // convert uris into ids if needed - Resource newSubj; - IRI newPred; - Value newObj; - Resource[] newContextes; + long subjectID = this.endpoint.getHdtConverter().subjectToID(subj); long predicateID = this.endpoint.getHdtConverter().predicateToID(pred); long objectID = this.endpoint.getHdtConverter().objectToID(obj); long[] graphID; - if (subjectID == 0 || subjectID == -1) { - newSubj = subj; - } else { - newSubj = this.endpoint.getHdtConverter().subjectIdToIRI(subjectID); - } - if (predicateID == 0 || predicateID == -1) { - newPred = pred; - } else { - newPred = this.endpoint.getHdtConverter().predicateIdToIRI(predicateID); - } - if (objectID == 0 || objectID == -1) { - newObj = obj; - } else { - newObj = this.endpoint.getHdtConverter().objectIdToIRI(objectID); - } - if (graph) { graphID = new long[contexts.length]; - newContextes = this.endpoint.getHdtConverter().graphIdToIRI(contexts, graphID); } else { graphID = null; - newContextes = contexts; } // logger.debug("SEARCH {} {} {}", newSubj, newPred, newObj); - // check if we need to search over the delta and if yes, search - CloseableIteration repositoryResult; - if (shouldSearchOverNativeStore(subjectID, predicateID, objectID)) { - if (statementOrder != null) { - throw new UnsupportedOperationException( - "Statement ordering is not supported when searching over the native store"); - } - logger.debug("Searching over native store"); - count++; - if (endpoint.isMergeTriggered) { - // query both native stores - logger.debug("Query both RDF4j stores!"); - CloseableIteration repositoryResult1 = this.endpointStoreConnection.getConnA_read() - .getStatements(newSubj, newPred, newObj, false, newContextes); - CloseableIteration repositoryResult2 = this.endpointStoreConnection.getConnB_read() - .getStatements(newSubj, newPred, newObj, false, newContextes); - repositoryResult = new CombinedNativeStoreResult(repositoryResult1, repositoryResult2); - - } else { - logger.debug("Query only one RDF4j stores!"); - repositoryResult = this.endpointStoreConnection.getCurrentConnectionRead().getStatements(newSubj, - newPred, newObj, false, newContextes); - } - } else { - logger.debug("Not searching over native store"); - repositoryResult = new EmptyIteration<>(); - } + var nativeStoreRepoResults = getNativeStoreIterator(statementOrder, subj, pred, obj, contexts, subjectID, + predicateID, objectID, graph, graphID); // iterate over the HDT file IteratorTripleID iterator; @@ -203,7 +158,66 @@ public CloseableIteration getStatements(StatementOrder stat // iterate over hdt result, delete the triples marked as deleted and add // the triples from the delta - return new EndpointStoreTripleIterator(endpointStoreConnection, this, iterator, repositoryResult); + return new EndpointStoreTripleIterator(endpointStoreConnection, this, iterator, nativeStoreRepoResults); + } + + private CloseableIteration getNativeStoreIterator(StatementOrder statementOrder, Resource subj, + IRI pred, Value obj, Resource[] contexts, long subjectID, long predicateID, long objectID, boolean graph, + long[] graphID) { + // check if we need to search over the delta and if yes, search + CloseableIteration repositoryResult; + if (shouldSearchOverNativeStore(subjectID, predicateID, objectID)) { + Resource newSubj; + IRI newPred; + Value newObj; + Resource[] newContextes; + if (subjectID == 0 || subjectID == -1) { + newSubj = subj; + } else { + newSubj = this.endpoint.getHdtConverter().subjectIdToIRI(subjectID); + } + if (predicateID == 0 || predicateID == -1) { + newPred = pred; + } else { + newPred = this.endpoint.getHdtConverter().predicateIdToIRI(predicateID); + } + if (objectID == 0 || objectID == -1) { + newObj = obj; + } else { + newObj = this.endpoint.getHdtConverter().objectIdToIRI(objectID); + } + + if (graph) { + newContextes = this.endpoint.getHdtConverter().graphIdToIRI(contexts, graphID); + } else { + newContextes = contexts; + } + + if (statementOrder != null) { + throw new UnsupportedOperationException( + "Statement ordering is not supported when searching over the native store"); + } + logger.debug("Searching over native store"); + count++; + if (endpoint.isMergeTriggered) { + // query both native stores + logger.debug("Query both RDF4j stores!"); + CloseableIteration repositoryResult1 = this.endpointStoreConnection.getConnA_read() + .getStatements(newSubj, newPred, newObj, false, newContextes); + CloseableIteration repositoryResult2 = this.endpointStoreConnection.getConnB_read() + .getStatements(newSubj, newPred, newObj, false, newContextes); + repositoryResult = new CombinedNativeStoreResult(repositoryResult1, repositoryResult2); + + } else { + logger.debug("Query only one RDF4j stores!"); + repositoryResult = this.endpointStoreConnection.getCurrentConnectionRead().getStatements(newSubj, + newPred, newObj, false, newContextes); + } + } else { + logger.debug("Not searching over native store"); + repositoryResult = EMPTY_ITERATION; + } + return repositoryResult; } // this function determines if a triple pattern should be searched over the From 342e3e864a33a22c44276c80c18baa9f189af215 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Wed, 11 Dec 2024 11:35:08 +0100 Subject: [PATCH 5/6] reduce memory pressure --- .../core/util/disk/AbstractLongArray.java | 19 +++++++++++++++++-- .../core/util/disk/LargeLongArray.java | 4 ++-- .../core/util/disk/LongArrayDisk.java | 4 ++-- .../core/util/disk/SimpleLongArray.java | 5 ++--- 4 files changed, 23 insertions(+), 9 deletions(-) diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/AbstractLongArray.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/AbstractLongArray.java index e6ef7a44..f22b60b4 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/AbstractLongArray.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/AbstractLongArray.java @@ -7,9 +7,24 @@ public abstract class AbstractLongArray implements LongArray { private final Logger logger = LoggerFactory.getLogger(getClass()); - private final int ESTIMATED_LOCATION_ARRAY_SIZE = 1024 * 128; + private static final int ESTIMATED_LOCATION_ARRAY_SIZE; + + static { + // get total amount of memory that this java program is allowed to use + long maxMemory = Runtime.getRuntime().maxMemory(); + + if (maxMemory >= 1024 * 1024 * 512) { + ESTIMATED_LOCATION_ARRAY_SIZE = 1024 * 128; + } else if (maxMemory >= 1024 * 1024 * 256) { + ESTIMATED_LOCATION_ARRAY_SIZE = 1024 * 64; + } else if (maxMemory >= 1024 * 1024 * 128) { + ESTIMATED_LOCATION_ARRAY_SIZE = 1024 * 32; + } else { + ESTIMATED_LOCATION_ARRAY_SIZE = 1024 * 16; + } + + } - // should take about 1MB per array when PREV_FOUND_SIZE is 1024 * 128 private final long[] estimatedLocationMax = new long[ESTIMATED_LOCATION_ARRAY_SIZE]; private final long[] estimatedLocationMin = new long[ESTIMATED_LOCATION_ARRAY_SIZE]; private final long[] estimatedLocation = new long[ESTIMATED_LOCATION_ARRAY_SIZE]; diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/LargeLongArray.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/LargeLongArray.java index b655f1dd..0593bdce 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/LargeLongArray.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/LargeLongArray.java @@ -10,7 +10,7 @@ * * @author Antoine Willerval */ -public class LargeLongArray extends AbstractLongArray { +public class LargeLongArray implements LongArray { private UnsafeLongArray array; /** @@ -26,7 +26,7 @@ public long get(long index) { } @Override - protected void innerSet(long index, long value) { + public void set(long index, long value) { array.set(index, value); } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/LongArrayDisk.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/LongArrayDisk.java index e4bf98b0..1b10eaed 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/LongArrayDisk.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/LongArrayDisk.java @@ -33,7 +33,7 @@ //Implementing an array of longs that is backed up on disk. Following this: http://vanillajava.blogspot.fr/2011/12/using-memory-mapped-file-for-huge.html -public class LongArrayDisk extends AbstractLongArray implements Closeable { +public class LongArrayDisk implements Closeable, LongArray { private static final long MAPPING_SIZE = 1 << 30; private final boolean closeChannel; private final FileChannel channel; @@ -137,7 +137,7 @@ public long get(long x) { } @Override - protected void innerSet(long index, long value) { + public void set(long index, long value) { if (index >= size || index < 0) { throw new IndexOutOfBoundsException(); } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/SimpleLongArray.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/SimpleLongArray.java index a86951d0..b818cc9a 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/SimpleLongArray.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/SimpleLongArray.java @@ -3,7 +3,7 @@ import java.io.IOException; import java.util.Arrays; -public class SimpleLongArray extends AbstractLongArray { +public class SimpleLongArray implements LongArray { public static LongArray of(int size) { return wrapper(new long[size]); @@ -28,7 +28,7 @@ public long get(long index) { } @Override - protected void innerSet(long index, long value) { + public void set(long index, long value) { if (index < 0 || index >= array.length) { throw new IndexOutOfBoundsException(index + " < 0 || " + index + " > " + array.length); } @@ -60,5 +60,4 @@ public void resize(long newSize) throws IOException { public void clear() { Arrays.fill(array, 0); } - } From c97d4f1dd11b59387b9f1f94cfa42dfb0128a14e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Wed, 11 Dec 2024 14:34:27 +0100 Subject: [PATCH 6/6] revert some changes based on review --- .../core/util/io/compress/WriteLongArrayBuffer.java | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/compress/WriteLongArrayBuffer.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/compress/WriteLongArrayBuffer.java index 09e436b1..7b10ac3c 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/compress/WriteLongArrayBuffer.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/compress/WriteLongArrayBuffer.java @@ -1,7 +1,6 @@ package com.the_qa_company.qendpoint.core.util.io.compress; import com.the_qa_company.qendpoint.core.util.BitUtil; -import com.the_qa_company.qendpoint.core.util.disk.AbstractLongArray; import com.the_qa_company.qendpoint.core.util.disk.LongArray; import java.io.Closeable; @@ -14,7 +13,7 @@ * * @author Antoine Willerval */ -public class WriteLongArrayBuffer extends AbstractLongArray implements Closeable { +public class WriteLongArrayBuffer implements LongArray, Closeable { // debug field private static final boolean DISABLE_BUFFER = true; private final LongArray array; @@ -34,9 +33,9 @@ public WriteLongArrayBuffer(LongArray array, long maxValue, int maxElement) { this.array = array; if (!DISABLE_BUFFER) { int bits = BitUtil.log2(maxValue + 2) + CompressUtil.INDEX_SHIFT; // + - // 1 - // for - // shared + // 1 + // for + // shared if (bits > 31) { bufferLong = new ArrayElementLong[maxElement / 3]; @@ -133,7 +132,7 @@ public long get(long index) { * @param value the value to set */ @Override - protected void innerSet(long index, long value) { + public void set(long index, long value) { if (DISABLE_BUFFER) { array.set(index, value); return;