diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/enums/TripleComponentOrder.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/enums/TripleComponentOrder.java index 92608d1e..fb648429 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/enums/TripleComponentOrder.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/enums/TripleComponentOrder.java @@ -19,6 +19,8 @@ package com.the_qa_company.qendpoint.core.enums; +import java.util.Map; + /** * Indicates the order of the triples */ @@ -26,41 +28,88 @@ public enum TripleComponentOrder { /** * Subject, predicate, object */ - Unknown(null, null, null), + Unknown(null, null, null, 0), /** * Subject, predicate, object */ - SPO(TripleComponentRole.SUBJECT, TripleComponentRole.PREDICATE, TripleComponentRole.OBJECT), + SPO(TripleComponentRole.SUBJECT, TripleComponentRole.PREDICATE, TripleComponentRole.OBJECT, 1), /** * Subject, object, predicate */ - SOP(TripleComponentRole.SUBJECT, TripleComponentRole.OBJECT, TripleComponentRole.PREDICATE), + SOP(TripleComponentRole.SUBJECT, TripleComponentRole.OBJECT, TripleComponentRole.PREDICATE, 1 << 1), /** * Predicate, subject, object */ - PSO(TripleComponentRole.PREDICATE, TripleComponentRole.SUBJECT, TripleComponentRole.OBJECT), + PSO(TripleComponentRole.PREDICATE, TripleComponentRole.SUBJECT, TripleComponentRole.OBJECT, 1 << 2), /** * Predicate, object, subject */ - POS(TripleComponentRole.PREDICATE, TripleComponentRole.OBJECT, TripleComponentRole.SUBJECT), + POS(TripleComponentRole.PREDICATE, TripleComponentRole.OBJECT, TripleComponentRole.SUBJECT, 1 << 3), /** * Object, subject, predicate */ - OSP(TripleComponentRole.OBJECT, TripleComponentRole.SUBJECT, TripleComponentRole.PREDICATE), + OSP(TripleComponentRole.OBJECT, TripleComponentRole.SUBJECT, TripleComponentRole.PREDICATE, 1 << 4), /** * Object, predicate, subject */ - OPS(TripleComponentRole.OBJECT, TripleComponentRole.PREDICATE, TripleComponentRole.SUBJECT); + OPS(TripleComponentRole.OBJECT, TripleComponentRole.PREDICATE, TripleComponentRole.SUBJECT, 1 << 5); + + public static final int ALL_MASK; + + static { + int allMask = 0; + // add all the mask to the var + for (TripleComponentOrder order : values()) { + allMask |= order.mask; + } + ALL_MASK = allMask; + } private final TripleComponentRole subjectMapping; private final TripleComponentRole predicateMapping; private final TripleComponentRole objectMapping; + public final int mask; TripleComponentOrder(TripleComponentRole subjectMapping, TripleComponentRole predicateMapping, - TripleComponentRole objectMapping) { + TripleComponentRole objectMapping, int mask) { this.subjectMapping = subjectMapping; this.predicateMapping = predicateMapping; this.objectMapping = objectMapping; + this.mask = mask; + } + + /** + * Search for an acceptable value in a map of orders + * + * @param flags flags to search the value + * @param map map + * @param value type + * @return find value, null for no matching value + */ + public static T fetchBestForCfg(int flags, Map map) { + for (Map.Entry e : map.entrySet()) { + if ((e.getKey().mask & flags) != 0) { + return e.getValue(); + } + } + return null; + } + + /** + * get an acceptable order for a order mask + * + * @param flags order mask + * @return order, {@link #Unknown} if nothing was found + */ + public static TripleComponentOrder getAcceptableOrder(int flags) { + if (flags != 0) { + for (TripleComponentOrder v : values()) { + if ((v.mask & flags) == 0) { + return v; + } + } + } + return Unknown; } public TripleComponentRole getSubjectMapping() { diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/HDTImpl.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/HDTImpl.java index f5bf99d3..11e22692 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/HDTImpl.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/HDTImpl.java @@ -33,6 +33,7 @@ import com.the_qa_company.qendpoint.core.dictionary.impl.MultipleSectionDictionaryBig; import com.the_qa_company.qendpoint.core.dictionary.impl.MultipleSectionDictionaryCat; import com.the_qa_company.qendpoint.core.enums.ResultEstimationType; +import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder; import com.the_qa_company.qendpoint.core.enums.TripleComponentRole; import com.the_qa_company.qendpoint.core.exceptions.IllegalFormatException; import com.the_qa_company.qendpoint.core.exceptions.NotFoundException; @@ -83,6 +84,7 @@ import java.io.InputStream; import java.io.OutputStream; import java.nio.file.Files; +import java.nio.file.Path; import java.nio.file.Paths; import java.util.Map; import java.util.zip.GZIPInputStream; @@ -265,6 +267,18 @@ public void saveToHDT(String fileName, ProgressListener listener) throws IOExcep @Override public IteratorTripleString search(CharSequence subject, CharSequence predicate, CharSequence object) throws NotFoundException { + return search(subject, predicate, object, TripleComponentOrder.ALL_MASK); + } + + @Override + public IteratorTripleString search(CharSequence subject, CharSequence predicate, CharSequence object, + CharSequence graph) throws NotFoundException { + return search(subject, predicate, object, graph, TripleComponentOrder.ALL_MASK); + } + + @Override + public IteratorTripleString search(CharSequence subject, CharSequence predicate, CharSequence object, + int searchOrderMask) throws NotFoundException { if (isClosed) { throw new IllegalStateException("Cannot search an already closed HDT"); @@ -314,6 +328,11 @@ public long estimatedNumResults() { public long getLastTriplePosition() { throw new NotImplementedException(); } + + @Override + public TripleComponentOrder getOrder() { + return TripleComponentOrder.getAcceptableOrder(searchOrderMask); + } }; } @@ -321,22 +340,23 @@ public long getLastTriplePosition() { if (isMapped) { try { - return new DictionaryTranslateIteratorBuffer(triples.search(triple), dictionary, subject, predicate, - object, g); + return new DictionaryTranslateIteratorBuffer(triples.search(triple, searchOrderMask), dictionary, + subject, predicate, object, g); } catch (NullPointerException e) { e.printStackTrace(); // FIXME: find why this can happen - return new DictionaryTranslateIterator(triples.search(triple), dictionary, subject, predicate, object, - g); + return new DictionaryTranslateIterator(triples.search(triple, searchOrderMask), dictionary, subject, + predicate, object, g); } } else { - return new DictionaryTranslateIterator(triples.search(triple), dictionary, subject, predicate, object, g); + return new DictionaryTranslateIterator(triples.search(triple, searchOrderMask), dictionary, subject, + predicate, object, g); } } @Override public IteratorTripleString search(CharSequence subject, CharSequence predicate, CharSequence object, - CharSequence graph) throws NotFoundException { + CharSequence graph, int searchOrderMask) throws NotFoundException { if (isClosed) { throw new IllegalStateException("Cannot search an already closed HDT"); } @@ -386,22 +406,27 @@ public long estimatedNumResults() { public long getLastTriplePosition() { throw new NotImplementedException(); } + + @Override + public TripleComponentOrder getOrder() { + return TripleComponentOrder.getAcceptableOrder(searchOrderMask); + } }; } if (isMapped) { try { - return new DictionaryTranslateIteratorBuffer(triples.search(triple), dictionary, subject, predicate, - object, graph); + return new DictionaryTranslateIteratorBuffer(triples.search(triple, searchOrderMask), dictionary, + subject, predicate, object, graph); } catch (NullPointerException e) { e.printStackTrace(); // FIXME: find why this can happen - return new DictionaryTranslateIterator(triples.search(triple), dictionary, subject, predicate, object, - graph); + return new DictionaryTranslateIterator(triples.search(triple, searchOrderMask), dictionary, subject, + predicate, object, graph); } } else { - return new DictionaryTranslateIterator(triples.search(triple), dictionary, subject, predicate, object, - graph); + return new DictionaryTranslateIterator(triples.search(triple, searchOrderMask), dictionary, subject, + predicate, object, graph); } } @@ -454,6 +479,13 @@ public void loadOrCreateIndex(ProgressListener listener, HDTOptions spec) throws // We need no index. return; } + triples.mapGenOtherIndexes(Path.of(String.valueOf(hdtFileName)), spec, listener); + + // disable the FOQ generation if asked + if (spec.getBoolean(HDTOptionsKeys.BITMAPTRIPLES_INDEX_NO_FOQ, false)) { + return; + } + ControlInfo ci = new ControlInformation(); String indexName = hdtFileName + HDTVersion.get_index_suffix("-"); indexName = indexName.replaceAll("\\.hdt\\.gz", "hdt"); diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/WriteHDTImpl.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/WriteHDTImpl.java index 96a6c729..e9f6cf28 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/WriteHDTImpl.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/WriteHDTImpl.java @@ -2,6 +2,7 @@ import com.the_qa_company.qendpoint.core.dictionary.DictionaryFactory; import com.the_qa_company.qendpoint.core.dictionary.DictionaryPrivate; +import com.the_qa_company.qendpoint.core.exceptions.NotFoundException; import com.the_qa_company.qendpoint.core.exceptions.NotImplementedException; import com.the_qa_company.qendpoint.core.header.HeaderFactory; import com.the_qa_company.qendpoint.core.header.HeaderPrivate; @@ -122,4 +123,11 @@ public void close() throws IOException { public IteratorTripleString search(CharSequence subject, CharSequence predicate, CharSequence object) { throw new NotImplementedException(); } + + @Override + public IteratorTripleString search(CharSequence subject, CharSequence predicate, CharSequence object, + int searchOrderMask) throws NotFoundException { + throw new NotImplementedException(); + } + } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/MapOnCallHDT.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/MapOnCallHDT.java index 3bbec86e..fa1a97b4 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/MapOnCallHDT.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/MapOnCallHDT.java @@ -9,6 +9,7 @@ import com.the_qa_company.qendpoint.core.listener.ProgressListener; import com.the_qa_company.qendpoint.core.options.HDTOptions; import com.the_qa_company.qendpoint.core.triples.IteratorTripleString; +import com.the_qa_company.qendpoint.core.triples.TripleString; import com.the_qa_company.qendpoint.core.triples.Triples; import com.the_qa_company.qendpoint.core.util.io.CloseSuppressPath; import com.the_qa_company.qendpoint.core.util.io.IOUtil; @@ -20,6 +21,7 @@ import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.StandardCopyOption; +import java.util.Iterator; /** * HDT implementation delaying the map method to avoid mapping into memory a @@ -107,6 +109,43 @@ public IteratorTripleString search(CharSequence subject, CharSequence predicate, return mapOrGetHDT().search(subject, predicate, object, graph); } + @Override + public IteratorTripleString search(TripleString triple) throws NotFoundException { + return mapOrGetHDT().search(triple); + } + + @Override + public IteratorTripleString searchAll() throws NotFoundException { + return mapOrGetHDT().searchAll(); + } + + @Override + public IteratorTripleString search(CharSequence subject, CharSequence predicate, CharSequence object, + int searchOrderMask) throws NotFoundException { + return mapOrGetHDT().search(subject, predicate, object, searchOrderMask); + } + + @Override + public IteratorTripleString search(CharSequence subject, CharSequence predicate, CharSequence object, + CharSequence graph, int searchOrderMask) throws NotFoundException { + return mapOrGetHDT().search(subject, predicate, object, graph, searchOrderMask); + } + + @Override + public IteratorTripleString search(TripleString triple, int searchOrderMask) throws NotFoundException { + return mapOrGetHDT().search(triple, searchOrderMask); + } + + @Override + public IteratorTripleString searchAll(int searchOrderMask) throws NotFoundException { + return mapOrGetHDT().searchAll(searchOrderMask); + } + + @Override + public Iterator iterator() { + return mapOrGetHDT().iterator(); + } + @Override public void loadFromHDT(InputStream input, ProgressListener listener) throws IOException { ((HDTPrivate) mapOrGetHDT()).loadFromHDT(input, listener); diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/header/PlainHeader.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/header/PlainHeader.java index 0df71236..0c37539b 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/header/PlainHeader.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/header/PlainHeader.java @@ -27,6 +27,7 @@ import java.util.List; import com.the_qa_company.qendpoint.core.exceptions.IllegalFormatException; +import com.the_qa_company.qendpoint.core.exceptions.NotFoundException; import com.the_qa_company.qendpoint.core.exceptions.ParserException; import com.the_qa_company.qendpoint.core.listener.ProgressListener; import com.the_qa_company.qendpoint.core.options.ControlInfo; @@ -188,6 +189,18 @@ public IteratorTripleString search(CharSequence subject, CharSequence predicate, return new PlainHeaderIterator(this, pattern); } + @Override + public IteratorTripleString search(CharSequence subject, CharSequence predicate, CharSequence object, + int searchOrderMask) throws NotFoundException { + return search(subject, predicate, object); + } + + @Override + public IteratorTripleString search(CharSequence subject, CharSequence predicate, CharSequence object, + CharSequence graph, int searchOrderMask) throws NotFoundException { + return search(subject, predicate, object, graph); + } + @Override public void processTriple(TripleString triple, long pos) { triples.add(new TripleString(triple)); diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/header/PlainHeaderIterator.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/header/PlainHeaderIterator.java index c0f04c4f..5c0a6b35 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/header/PlainHeaderIterator.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/header/PlainHeaderIterator.java @@ -19,6 +19,7 @@ package com.the_qa_company.qendpoint.core.header; +import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder; import com.the_qa_company.qendpoint.core.triples.IteratorTripleString; import com.the_qa_company.qendpoint.core.triples.TripleString; import com.the_qa_company.qendpoint.core.enums.ResultEstimationType; @@ -111,4 +112,8 @@ public long getLastTriplePosition() { throw new UnsupportedOperationException(); } + @Override + public TripleComponentOrder getOrder() { + return TripleComponentOrder.Unknown; + } } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/iterator/DictionaryTranslateIterator.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/iterator/DictionaryTranslateIterator.java index ad5e2328..b26015b5 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/iterator/DictionaryTranslateIterator.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/iterator/DictionaryTranslateIterator.java @@ -21,6 +21,7 @@ import com.the_qa_company.qendpoint.core.dictionary.Dictionary; import com.the_qa_company.qendpoint.core.enums.ResultEstimationType; +import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder; import com.the_qa_company.qendpoint.core.enums.TripleComponentRole; import com.the_qa_company.qendpoint.core.quad.QuadString; import com.the_qa_company.qendpoint.core.triples.IteratorTripleID; @@ -161,4 +162,9 @@ public long getLastTriplePosition() { return iterator.getLastTriplePosition(); } + @Override + public TripleComponentOrder getOrder() { + return iterator.getOrder(); + } + } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/iterator/DictionaryTranslateIteratorBuffer.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/iterator/DictionaryTranslateIteratorBuffer.java index cf46d3f7..84553d64 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/iterator/DictionaryTranslateIteratorBuffer.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/iterator/DictionaryTranslateIteratorBuffer.java @@ -21,6 +21,7 @@ import com.the_qa_company.qendpoint.core.dictionary.DictionaryPrivate; import com.the_qa_company.qendpoint.core.dictionary.impl.OptimizedExtractor; import com.the_qa_company.qendpoint.core.enums.ResultEstimationType; +import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder; import com.the_qa_company.qendpoint.core.enums.TripleComponentRole; import com.the_qa_company.qendpoint.core.quad.QuadString; import com.the_qa_company.qendpoint.core.triples.IteratorTripleString; @@ -278,6 +279,11 @@ public long getLastTriplePosition() { return lastPosition.compute(); } + @Override + public TripleComponentOrder getOrder() { + return iterator.getOrder(); + } + public static void setBlockSize(int size) { DEFAULT_BLOCK_SIZE = size; } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/options/HDTOptionsKeys.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/options/HDTOptionsKeys.java index 3e85f61a..1d0eed3b 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/options/HDTOptionsKeys.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/options/HDTOptionsKeys.java @@ -508,6 +508,12 @@ public class HDTOptionsKeys { @Key(type = Key.Type.BOOLEAN, desc = "Dump binary offsets, default false") public static final String DUMP_BINARY_OFFSETS = "bump.binary.offsets"; + @Key(type = Key.Type.STRING, desc = "Create other indexes in bitmaptriples pattern values (spo, ops, etc.), default none") + public static final String BITMAPTRIPLES_INDEX_OTHERS = "bitmaptriples.index.others"; + + @Key(type = Key.Type.BOOLEAN, desc = "No FoQ index generation default false") + public static final String BITMAPTRIPLES_INDEX_NO_FOQ = "bitmaptriples.index.noFoQ"; + // use tree-map to have a better order private static final Map OPTION_MAP = new TreeMap<>(); diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/rdf/RDFAccess.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/rdf/RDFAccess.java index 6b753100..3866cad3 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/rdf/RDFAccess.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/rdf/RDFAccess.java @@ -71,6 +71,51 @@ default IteratorTripleString searchAll() throws NotFoundException { return search("", "", "", ""); } + /** + * Iterate over the triples of an RDF Set that match the specified pattern. + * null and empty strings act as a wildcard. (e.g. search(null, null, null) + * iterates over all elements) + * + * @param subject The subject to search + * @param predicate The predicate to search + * @param object The object to search + * @param searchOrderMask The search order mask, can be get using + * {@link com.the_qa_company.qendpoint.core.enums.TripleComponentOrder#mask} + * @return Iterator of TripleStrings + * @throws NotFoundException when the triple cannot be found + */ + IteratorTripleString search(CharSequence subject, CharSequence predicate, CharSequence object, int searchOrderMask) + throws NotFoundException; + + /** + * Iterate over the triples of an RDF Set that match the specified pattern. + * null and empty strings act as a wildcard. Default implementation ignore + * the graph (e.g. search(null, null, null, null) iterates over all + * elements) + * + * @param subject The subject to search + * @param predicate The predicate to search + * @param object The object to search + * @param graph The graph to search + * @param searchOrderMask The search order mask, can be get using + * {@link com.the_qa_company.qendpoint.core.enums.TripleComponentOrder#mask} + * @return Iterator of TripleStrings + * @throws NotFoundException when the triple cannot be found + */ + default IteratorTripleString search(CharSequence subject, CharSequence predicate, CharSequence object, + CharSequence graph, int searchOrderMask) throws NotFoundException { + return search(subject, predicate, object, searchOrderMask); + } + + default IteratorTripleString search(TripleString triple, int searchOrderMask) throws NotFoundException { + return search(triple.getSubject(), triple.getPredicate(), triple.getObject(), triple.getGraph(), + searchOrderMask); + } + + default IteratorTripleString searchAll(int searchOrderMask) throws NotFoundException { + return search("", "", "", "", searchOrderMask); + } + @Override default Iterator iterator() { try { diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/rdf/parsers/JenaModelIterator.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/rdf/parsers/JenaModelIterator.java index 2c524e93..1ca94353 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/rdf/parsers/JenaModelIterator.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/rdf/parsers/JenaModelIterator.java @@ -1,5 +1,6 @@ package com.the_qa_company.qendpoint.core.rdf.parsers; +import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder; import org.apache.jena.rdf.model.Model; import org.apache.jena.rdf.model.Statement; import org.apache.jena.rdf.model.StmtIterator; @@ -54,4 +55,9 @@ public long getLastTriplePosition() { throw new UnsupportedOperationException(); } + @Override + public TripleComponentOrder getOrder() { + return TripleComponentOrder.Unknown; + } + } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/IteratorTripleString.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/IteratorTripleString.java index 3a943655..f8798b96 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/IteratorTripleString.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/IteratorTripleString.java @@ -22,6 +22,7 @@ import java.util.Iterator; import com.the_qa_company.qendpoint.core.enums.ResultEstimationType; +import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder; /** * Iterator of TripleStrings based on IteratorTripleID @@ -57,4 +58,9 @@ public interface IteratorTripleString extends Iterator { * @see Triples#findTriple(long) */ long getLastTriplePosition(); + + /** + * @return order of the components from the iterator + */ + TripleComponentOrder getOrder(); } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TripleID.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TripleID.java index 62f2ba4a..4e3f2de6 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TripleID.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TripleID.java @@ -19,10 +19,18 @@ package com.the_qa_company.qendpoint.core.triples; +import com.the_qa_company.qendpoint.core.util.LongCompare; + import java.io.Serial; import java.io.Serializable; -import com.the_qa_company.qendpoint.core.util.LongCompare; +import static com.the_qa_company.qendpoint.core.enums.TripleComponentOrder.ALL_MASK; +import static com.the_qa_company.qendpoint.core.enums.TripleComponentOrder.OPS; +import static com.the_qa_company.qendpoint.core.enums.TripleComponentOrder.OSP; +import static com.the_qa_company.qendpoint.core.enums.TripleComponentOrder.POS; +import static com.the_qa_company.qendpoint.core.enums.TripleComponentOrder.PSO; +import static com.the_qa_company.qendpoint.core.enums.TripleComponentOrder.SOP; +import static com.the_qa_company.qendpoint.core.enums.TripleComponentOrder.SPO; /** * TripleID holds a triple using Long IDs @@ -336,4 +344,34 @@ public int hashCode() { public TripleID copyNoGraph() { return new TripleID(subject, predicate, object); } + + /** + * @return the pattern order flags for this triple id + */ + public int getPatternOrderFlags() { + if (subject == 0) { + if (predicate == 0) { + if (object == 0) { + return ALL_MASK; // ??? + } + return OPS.mask | OSP.mask; // ??o + } + if (object == 0) { + return POS.mask | PSO.mask; // ?p? + } else { + return OPS.mask | POS.mask; // ?po + } + } + if (predicate == 0) { + if (object == 0) { + return SPO.mask | SOP.mask; // s?? + } + return SOP.mask | OSP.mask; // s?o + } + if (object == 0) { + return SPO.mask | PSO.mask; // sp? + } else { + return ALL_MASK; // spo + } + } } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/Triples.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/Triples.java index 4a815085..339defa8 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/Triples.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/Triples.java @@ -20,13 +20,16 @@ package com.the_qa_company.qendpoint.core.triples; import java.io.Closeable; +import java.util.Iterator; +import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder; import com.the_qa_company.qendpoint.core.header.Header; +import com.the_qa_company.qendpoint.core.iterator.SuppliableIteratorTripleID; /** * Interface for Triples implementation. */ -public interface Triples extends Closeable { +public interface Triples extends Closeable, Iterable { /** * Iterates over all triples. Equivalent to this.search(new TripleID()); * @@ -34,6 +37,15 @@ public interface Triples extends Closeable { */ IteratorTripleID searchAll(); + /** + * Iterates over all triples. Equivalent to this.search(new TripleID()); + * + * @param searchMask search index mark, done by combining + * {@link TripleComponentOrder#mask} + * @return IteratorTripleID + */ + IteratorTripleID searchAll(int searchMask); + /** * Iterates over all triples that match the pattern. * @@ -42,6 +54,16 @@ public interface Triples extends Closeable { */ IteratorTripleID search(TripleID pattern); + /** + * Iterates over all triples that match the pattern. + * + * @param pattern The pattern to match against + * @param searchMask search index mark, done by combining + * {@link TripleComponentOrder#mask} + * @return IteratorTripleID + */ + SuppliableIteratorTripleID search(TripleID pattern, int searchMask); + /** * Returns the total number of triples * @@ -94,4 +116,9 @@ default TripleID findTriple(long position) { * @see IteratorTripleString#getLastTriplePosition() */ TripleID findTriple(long position, TripleID buffer); + + @Override + default Iterator iterator() { + return searchAll(); + } } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TriplesPrivate.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TriplesPrivate.java index 0c7076a6..d986d9b8 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TriplesPrivate.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TriplesPrivate.java @@ -1,18 +1,18 @@ package com.the_qa_company.qendpoint.core.triples; -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; - import com.the_qa_company.qendpoint.core.dictionary.Dictionary; -import com.the_qa_company.qendpoint.core.iterator.SuppliableIteratorTripleID; -import com.the_qa_company.qendpoint.core.listener.ProgressListener; import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder; +import com.the_qa_company.qendpoint.core.listener.ProgressListener; import com.the_qa_company.qendpoint.core.options.ControlInfo; import com.the_qa_company.qendpoint.core.options.HDTOptions; import com.the_qa_company.qendpoint.core.util.io.CountInputStream; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.file.Path; + public interface TriplesPrivate extends Triples { /** * Serializes the triples to an OutputStream @@ -21,14 +21,6 @@ public interface TriplesPrivate extends Triples { */ void save(OutputStream output, ControlInfo ci, ProgressListener listener) throws IOException; - /** - * Iterates over all triples that match the pattern. - * - * @param pattern The pattern to match against - * @return IteratorTripleID - */ - SuppliableIteratorTripleID search(TripleID pattern); - /** * Loads the structure from an InputStream * @@ -62,6 +54,16 @@ public interface TriplesPrivate extends Triples { */ void mapIndex(CountInputStream input, File f, ControlInfo ci, ProgressListener listener) throws IOException; + /** + * Sync or create the asked other index + * + * @param file hdt file + * @param spec spec + * @param listener listener + * @throws IOException io + */ + void mapGenOtherIndexes(Path file, HDTOptions spec, ProgressListener listener) throws IOException; + /** * Saves the associated Index to an OutputStream * diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapQuadTriples.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapQuadTriples.java index 381f7a94..42aeeb6e 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapQuadTriples.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapQuadTriples.java @@ -239,6 +239,28 @@ public SuppliableIteratorTripleID search(TripleID pattern) { pattern.isQuad() ? pattern.getGraph() : 0); } + @Override + public SuppliableIteratorTripleID search(TripleID pattern, int searchMask) { + if (isClosed) { + throw new IllegalStateException("Cannot search on BitmapTriples if it's already closed"); + } + + if (getNumberOfElements() == 0 || pattern.isNoMatch()) { + return new EmptyTriplesIterator(order); + } + + TripleID reorderedPat = new TripleID(pattern); + TripleOrderConvert.swapComponentOrder(reorderedPat, TripleComponentOrder.SPO, order); + String patternString = reorderedPat.getPatternString(); + + if (hasFOQIndex() && patternString.equals("???G")) { + return new BitmapTriplesIteratorGraphG(this, pattern); + } + + return new BitmapTriplesIteratorGraph(this, super.search(pattern.copyNoGraph(), searchMask), + pattern.isQuad() ? pattern.getGraph() : 0); + } + @Override public void mapFromFile(CountInputStream input, File f, ProgressListener listener) throws IOException { ControlInformation ci = new ControlInformation(); diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriples.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriples.java index 8f417a71..3e7fd912 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriples.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriples.java @@ -72,20 +72,32 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.nio.channels.FileChannel; import java.nio.file.Files; +import java.nio.file.NoSuchFileException; import java.nio.file.Path; +import java.nio.file.StandardOpenOption; import java.util.ArrayList; +import java.util.Arrays; import java.util.Comparator; +import java.util.HashMap; import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.stream.Collectors; /** * @author mario.arias */ -public class BitmapTriples implements TriplesPrivate { +public class BitmapTriples implements TriplesPrivate, BitmapTriplesIndex { private static final Logger log = LoggerFactory.getLogger(BitmapTriples.class); protected TripleComponentOrder order; + protected final Map indexes = new HashMap<>(); + protected int indexesMask = 0; + protected Sequence seqY, seqZ, indexZ, predicateCount; protected Bitmap bitmapY, bitmapZ, bitmapIndexZ; @@ -285,12 +297,13 @@ public void load(TempTriples triples, ProgressListener listener) { this.load(it, listener); } - /* - * (non-Javadoc) - * @see hdt.triples.Triples#search(hdt.triples.TripleID) - */ @Override public SuppliableIteratorTripleID search(TripleID pattern) { + return search(pattern, TripleComponentOrder.ALL_MASK); + } + + @Override + public SuppliableIteratorTripleID search(TripleID pattern, int searchMask) { if (isClosed) { throw new IllegalStateException("Cannot search on BitmapTriples if it's already closed"); } @@ -301,6 +314,32 @@ public SuppliableIteratorTripleID search(TripleID pattern) { TripleID reorderedPat = new TripleID(pattern); TripleOrderConvert.swapComponentOrder(reorderedPat, TripleComponentOrder.SPO, order); + int flags = reorderedPat.getPatternOrderFlags(); + + if ((flags & searchMask & this.order.mask) != 0) { + // we can use the default order, so we use it + return new BitmapTriplesIterator(this, pattern); + } + + if ((indexesMask & flags) != 0) { + BitmapTriplesIndex idx; + + int bestOrders = flags & searchMask; + + if ((indexesMask & bestOrders) != 0) { + // we can use the asked order + idx = TripleComponentOrder.fetchBestForCfg(bestOrders, indexes); + } else { + // no asked order found, we can still use the best index + idx = TripleComponentOrder.fetchBestForCfg(flags, indexes); + } + + assert idx != null : String.format("the tid flags were describing an unknown pattern: %x &= %x", flags, + indexesMask & flags); + + return new BitmapTriplesIterator(idx, pattern); + } + String patternString = reorderedPat.getPatternString(); if (patternString.equals("?P?")) { @@ -342,7 +381,12 @@ public SuppliableIteratorTripleID search(TripleID pattern) { */ @Override public IteratorTripleID searchAll() { - return this.search(new TripleID()); + return searchAll(TripleComponentOrder.ALL_MASK); + } + + @Override + public IteratorTripleID searchAll(int searchMask) { + return this.search(new TripleID(), searchMask); } /* @@ -1030,7 +1074,7 @@ public TripleID findTriple(long position, TripleID tripleID) { // -1 so we don't count end of tree long posX = bitmapY.rank1(posY - 1); long x = posX + 1; // the subject ID is the position + 1, IDs start from - // 1 not zero + // 1 not zero tripleID.setAll(x, y, z); return tripleID; @@ -1207,11 +1251,17 @@ public void mapIndex(CountInputStream input, File f, ControlInfo ci, ProgressLis this.adjIndex = new AdjacencyList(this.indexZ, this.bitmapIndexZ); } + @Override + public void mapGenOtherIndexes(Path file, HDTOptions spec, ProgressListener listener) throws IOException { + syncOtherIndexes(file, spec, listener); + } + @Override public void close() throws IOException { isClosed = true; try { - Closer.closeAll(seqY, seqZ, indexZ, predicateCount, predicateIndex, bitmapIndexZ, diskSequenceLocation); + Closer.closeAll(seqY, seqZ, indexZ, predicateCount, predicateIndex, bitmapIndexZ, diskSequenceLocation, + indexes); } finally { diskSequenceLocation = null; seqY = null; @@ -1220,6 +1270,8 @@ public void close() throws IOException { predicateCount = null; predicateIndex = null; bitmapIndexZ = null; + indexes.clear(); + indexesMask = 0; } } @@ -1227,6 +1279,72 @@ public boolean hasFOQIndex() { return indexZ != null && bitmapIndexZ != null; } + public void syncOtherIndexes(Path fileLocation, HDTOptions spec, ProgressListener listener) throws IOException { + Closer.closeAll(indexes); + indexes.clear(); + indexesMask = 0; + + if (fileLocation == null) { + return; + } + + String otherIdxs = spec.get(HDTOptionsKeys.BITMAPTRIPLES_INDEX_OTHERS, ""); + + Set askedOrders = Arrays.stream(otherIdxs.toUpperCase().split(",")).map(e -> { + if (e.isEmpty() || e.equalsIgnoreCase(TripleComponentOrder.Unknown.name())) { + return null; + } + try { + return TripleComponentOrder.valueOf(e); + } catch (IllegalArgumentException ex) { + log.warn("Trying to use a bad order name {}", e, ex); + return null; + } + }).filter(Objects::nonNull).collect(Collectors.toSet()); + + MultiThreadListener mListener = MultiThreadListener.ofSingle(listener); + for (TripleComponentOrder order : TripleComponentOrder.values()) { + if (order == TripleComponentOrder.Unknown || order == this.order) { + continue; + } + + Path subIndexPath = BitmapTriplesIndexFile.getIndexPath(fileLocation, order); + + try (FileChannel channel = FileChannel.open(subIndexPath, StandardOpenOption.READ)) { + // load from the path... + + BitmapTriplesIndex idx = BitmapTriplesIndexFile.map(subIndexPath, channel); + BitmapTriplesIndex old = indexes.put(order, idx); + indexesMask |= idx.getOrder().mask; + if (old != null) { + log.warn("an index is using a bad order old:{} cur:{} new:{}", old.getOrder(), order, + idx.getOrder()); + } + IOUtil.closeQuietly(old); + } catch (NoSuchFileException ignore) { + // no index with this name + if (!askedOrders.contains(order)) { + continue; // not asked by the user, we can ignore + } + // generate the file + BitmapTriplesIndexFile.generateIndex(this, subIndexPath, order, spec, mListener); + try (FileChannel channel = FileChannel.open(subIndexPath, StandardOpenOption.READ)) { + // load from the path... + BitmapTriplesIndex idx = BitmapTriplesIndexFile.map(subIndexPath, channel); + BitmapTriplesIndex old = indexes.put(order, idx); + indexesMask |= order.mask; + if (old != null) { + log.warn("an index is using a bad order old:{} cur:{} new:{}", old.getOrder(), order, + idx.getOrder()); + } + IOUtil.closeQuietly(old); // should be null? + } catch (NoSuchFileException ex2) { + throw new IOException("index not generated", ex2); + } + } + } + } + @Override public TripleComponentOrder getOrder() { return this.order; @@ -1236,18 +1354,22 @@ public Sequence getIndexZ() { return indexZ; } + @Override public Sequence getSeqY() { return seqY; } + @Override public Sequence getSeqZ() { return seqZ; } + @Override public AdjacencyList getAdjacencyListY() { return adjY; } + @Override public AdjacencyList getAdjacencyListZ() { return adjZ; } @@ -1256,10 +1378,12 @@ public AdjacencyList getAdjacencyListIndex() { return adjIndex; } + @Override public Bitmap getBitmapY() { return bitmapY; } + @Override public Bitmap getBitmapZ() { return bitmapZ; } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIndex.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIndex.java new file mode 100644 index 00000000..213dc280 --- /dev/null +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIndex.java @@ -0,0 +1,22 @@ +package com.the_qa_company.qendpoint.core.triples.impl; + +import com.the_qa_company.qendpoint.core.compact.bitmap.AdjacencyList; +import com.the_qa_company.qendpoint.core.compact.bitmap.Bitmap; +import com.the_qa_company.qendpoint.core.compact.sequence.Sequence; +import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder; + +public interface BitmapTriplesIndex { + Bitmap getBitmapY(); + + Bitmap getBitmapZ(); + + Sequence getSeqY(); + + Sequence getSeqZ(); + + AdjacencyList getAdjacencyListY(); + + AdjacencyList getAdjacencyListZ(); + + TripleComponentOrder getOrder(); +} diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIndexFile.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIndexFile.java new file mode 100644 index 00000000..49896901 --- /dev/null +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIndexFile.java @@ -0,0 +1,357 @@ +package com.the_qa_company.qendpoint.core.triples.impl; + +import com.the_qa_company.qendpoint.core.compact.bitmap.AdjacencyList; +import com.the_qa_company.qendpoint.core.compact.bitmap.Bitmap; +import com.the_qa_company.qendpoint.core.compact.bitmap.Bitmap64Big; +import com.the_qa_company.qendpoint.core.compact.bitmap.BitmapFactory; +import com.the_qa_company.qendpoint.core.compact.bitmap.ModifiableBitmap; +import com.the_qa_company.qendpoint.core.compact.sequence.DynamicSequence; +import com.the_qa_company.qendpoint.core.compact.sequence.Sequence; +import com.the_qa_company.qendpoint.core.compact.sequence.SequenceFactory; +import com.the_qa_company.qendpoint.core.compact.sequence.SequenceLog64BigDisk; +import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder; +import com.the_qa_company.qendpoint.core.exceptions.IllegalFormatException; +import com.the_qa_company.qendpoint.core.iterator.utils.AsyncIteratorFetcher; +import com.the_qa_company.qendpoint.core.iterator.utils.ExceptionIterator; +import com.the_qa_company.qendpoint.core.iterator.utils.MapIterator; +import com.the_qa_company.qendpoint.core.listener.MultiThreadListener; +import com.the_qa_company.qendpoint.core.listener.ProgressListener; +import com.the_qa_company.qendpoint.core.options.HDTOptions; +import com.the_qa_company.qendpoint.core.options.HDTOptionsKeys; +import com.the_qa_company.qendpoint.core.triples.TripleID; +import com.the_qa_company.qendpoint.core.util.BitUtil; +import com.the_qa_company.qendpoint.core.util.concurrent.KWayMerger; +import com.the_qa_company.qendpoint.core.util.io.CloseMappedByteBuffer; +import com.the_qa_company.qendpoint.core.util.io.CloseSuppressPath; +import com.the_qa_company.qendpoint.core.util.io.Closer; +import com.the_qa_company.qendpoint.core.util.io.CountInputStream; +import com.the_qa_company.qendpoint.core.util.io.IOUtil; +import com.the_qa_company.qendpoint.core.util.listener.ListenerUtil; + +import java.io.BufferedInputStream; +import java.io.BufferedOutputStream; +import java.io.Closeable; +import java.io.IOException; +import java.io.InterruptedIOException; +import java.nio.channels.Channels; +import java.nio.channels.FileChannel; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Arrays; + +import static java.lang.String.format; + +/** + * File containing a BitmapTriples index + * + * @author Antoine Willerval + */ +public class BitmapTriplesIndexFile implements BitmapTriplesIndex, Closeable { + /** + * Get the path for an order for a hdt file + * + * @param hdt hdt path + * @param order order + * @return index path + */ + public static Path getIndexPath(Path hdt, TripleComponentOrder order) { + return hdt.resolveSibling(hdt.getFileName() + "." + order.name().toLowerCase() + ".idx"); + } + + public static final byte[] MAGIC = "$HDTIDX0".getBytes(StandardCharsets.US_ASCII); + + /** + * Map a file from a file + * + * @param file file + * @param channel channel + * @return index + * @throws IOException io + */ + public static BitmapTriplesIndex map(Path file, FileChannel channel) throws IOException { + try (CloseMappedByteBuffer header = IOUtil.mapChannel(file, channel, FileChannel.MapMode.READ_ONLY, 0, + MAGIC.length)) { + byte[] magicRead = new byte[MAGIC.length]; + + header.get(magicRead); + + if (!Arrays.equals(magicRead, MAGIC)) { + throw new IOException(format("Can't read %s magic", file)); + } + } + + CountInputStream stream = new CountInputStream(new BufferedInputStream(Channels.newInputStream(channel))); + stream.skipNBytes(MAGIC.length); + + String orderCfg = IOUtil.readSizedString(stream, ProgressListener.ignore()); + + TripleComponentOrder order = TripleComponentOrder.valueOf(orderCfg); + + Sequence seqY = SequenceFactory.createStream(stream, file.toFile()); + Bitmap bitY = BitmapFactory.createBitmap(stream); + bitY.load(stream, ProgressListener.ignore()); + + Sequence seqZ = SequenceFactory.createStream(stream, file.toFile()); + Bitmap bitZ = BitmapFactory.createBitmap(stream); + bitZ.load(stream, ProgressListener.ignore()); + + return new BitmapTriplesIndexFile(seqY, seqZ, bitY, bitZ, order); + } + + /** + * Generate an index in a particular destination + * + * @param triples triples to convert + * @param destination destination path + * @param order order to build + * @param spec ixd spec + * @param mtlistener listener + * @throws IOException ioe + */ + public static void generateIndex(BitmapTriples triples, Path destination, TripleComponentOrder order, + HDTOptions spec, MultiThreadListener mtlistener) throws IOException { + MultiThreadListener listener = MultiThreadListener.ofNullable(mtlistener); + Path diskLocation; + if (triples.diskSequence) { + diskLocation = triples.diskSequenceLocation.createOrGetPath(); + } else { + diskLocation = Files.createTempDirectory("bitmapTriples"); + } + int workers = (int) spec.getInt(HDTOptionsKeys.BITMAPTRIPLES_DISK_WORKER_KEY, + Runtime.getRuntime()::availableProcessors); + // check and set default values if required + if (workers <= 0) { + throw new IllegalArgumentException("Number of workers should be positive!"); + } + long chunkSize = spec.getInt(HDTOptionsKeys.BITMAPTRIPLES_DISK_CHUNK_SIZE_KEY, + () -> BitmapTriples.getMaxChunkSizeDiskIndex(workers)); + if (chunkSize < 0) { + throw new IllegalArgumentException("Negative chunk size!"); + } + long maxFileOpenedLong = spec.getInt(HDTOptionsKeys.BITMAPTRIPLES_DISK_MAX_FILE_OPEN_KEY, 1024); + int maxFileOpened; + if (maxFileOpenedLong < 0 || maxFileOpenedLong > Integer.MAX_VALUE) { + throw new IllegalArgumentException("maxFileOpened should be positive!"); + } else { + maxFileOpened = (int) maxFileOpenedLong; + } + long kwayLong = spec.getInt(HDTOptionsKeys.BITMAPTRIPLES_DISK_KWAY_KEY, + () -> Math.max(1, BitUtil.log2(maxFileOpened / workers))); + int k; + if (kwayLong <= 0 || kwayLong > Integer.MAX_VALUE) { + throw new IllegalArgumentException("kway can't be negative!"); + } else { + k = 1 << ((int) kwayLong); + } + long bufferSizeLong = spec.getInt(HDTOptionsKeys.BITMAPTRIPLES_DISK_BUFFER_SIZE_KEY, + CloseSuppressPath.BUFFER_SIZE); + int bufferSize; + if (bufferSizeLong > Integer.MAX_VALUE - 5L || bufferSizeLong <= 0) { + throw new IllegalArgumentException("Buffer size can't be negative or bigger than the size of an array!"); + } else { + bufferSize = (int) bufferSizeLong; + } + + try (CloseSuppressPath workDir = CloseSuppressPath + .of(diskLocation.resolve("triplesort-" + order.name().toLowerCase()))) { + workDir.mkdirs(); + workDir.closeWithDeleteRecurse(); + + ExceptionIterator sortedIds = null; + ModifiableBitmap bitY = null; + ModifiableBitmap bitZ = null; + DynamicSequence seqY = null; + DynamicSequence seqZ = null; + try { + sortedIds = new DiskTriplesReorderSorter(workDir, + new AsyncIteratorFetcher<>( + new MapIterator<>(triples.searchAll(triples.getOrder().mask), TripleID::clone)), + listener, bufferSize, chunkSize, k, triples.getOrder(), order).sort(workers); + + int ss = BitUtil.log2(triples.getBitmapY().countOnes()); + int ps = triples.getSeqY().sizeOf(); + int os = triples.getSeqZ().sizeOf(); + + TripleID logTriple = new TripleID(ss, ps, os); + + // we swap the order to find the new allocation numbits + TripleComponentOrder oldOrder = triples.getOrder(); + TripleOrderConvert.swapComponentOrder(logTriple, oldOrder, order); + + int ySize = (int) logTriple.getPredicate(); + int zSize = (int) logTriple.getObject(); + + long count = triples.getNumberOfElements(); + workDir.mkdirs(); + workDir.closeWithDeleteRecurse(); + bitY = Bitmap64Big.disk(workDir.resolve("bity"), count); + bitZ = Bitmap64Big.disk(workDir.resolve("bitZ"), count); + + triples.getSeqY().sizeOf(); + + seqY = new SequenceLog64BigDisk(workDir.resolve("seqy"), ySize, count, false, true); + seqZ = new SequenceLog64BigDisk(workDir.resolve("seqz"), zSize, count, false, true); + + long lastX = 0; + long lastY = 0; + long lastZ = 0; + + // filling index + + long x, y, z; + long numTriples = 0; + while (sortedIds.hasNext()) { + TripleID tid = sortedIds.next(); + + x = tid.getSubject(); + y = tid.getPredicate(); + z = tid.getObject(); + + if (x == 0 || y == 0 || z == 0) { + throw new IllegalFormatException("None of the components of a triple can be null"); + } + + if (numTriples == 0) { + seqY.append(y); + seqZ.append(z); + } else if (lastX != x) { + if (x != lastX + 1) { + throw new RuntimeException("Upper level must be increasing and correlative"); + } + + // X changed + bitY.append(true); + seqY.append(y); + + bitZ.append(true); + seqZ.append(z); + } else if (y != lastY) { + if (y < lastY) { + throw new IllegalFormatException("Middle level must be increasing for each parent."); + } + + // Y changed + bitY.append(false); + seqY.append(y); + + bitZ.append(true); + seqZ.append(z); + } else { + if (z < lastZ) { + throw new IllegalFormatException("Lower level must be increasing for each parent."); + } + + // Z changed + bitZ.append(false); + seqZ.append(z); + } + + lastX = x; + lastY = y; + lastZ = z; + + ListenerUtil.notifyCond(listener, "Converting to BitmapTriples", numTriples, numTriples, count); + numTriples++; + } + + if (numTriples > 0) { + bitY.append(true); + bitZ.append(true); + } + + assert numTriples == triples.getNumberOfElements(); + + seqY.aggressiveTrimToSize(); + seqZ.trimToSize(); + + // saving the index + try (BufferedOutputStream output = new BufferedOutputStream(Files.newOutputStream(destination))) { + output.write(MAGIC); + + IOUtil.writeSizedString(output, order.name(), listener); + + seqY.save(output, listener); + bitY.save(output, listener); + + seqZ.save(output, listener); + bitZ.save(output, listener); + + // no need for CRC I guess? + } + } catch (Throwable t) { + try { + Closer.closeAll(sortedIds, bitY, bitZ, seqY, seqZ); + } catch (Exception ex) { + t.addSuppressed(ex); + } catch (Throwable t2) { + t2.addSuppressed(t); + throw t2; + } + throw t; + } + Closer.closeAll(sortedIds, bitY, bitZ, seqY, seqZ); + + } catch (InterruptedException e) { + throw new InterruptedIOException(e.getMessage()); + } catch (KWayMerger.KWayMergerException e) { + throw new IOException(e); + } + } + + private final Sequence seqY, seqZ; + private final Bitmap bitY, bitZ; + private final AdjacencyList adjY, adjZ; + private final TripleComponentOrder order; + + private BitmapTriplesIndexFile(Sequence seqY, Sequence seqZ, Bitmap bitY, Bitmap bitZ, TripleComponentOrder order) { + this.seqY = seqY; + this.seqZ = seqZ; + this.bitY = bitY; + this.bitZ = bitZ; + this.order = order; + + this.adjY = new AdjacencyList(seqY, bitY); + this.adjZ = new AdjacencyList(seqZ, bitZ); + } + + @Override + public Bitmap getBitmapY() { + return bitY; + } + + @Override + public Bitmap getBitmapZ() { + return bitZ; + } + + @Override + public Sequence getSeqY() { + return seqY; + } + + @Override + public Sequence getSeqZ() { + return seqZ; + } + + @Override + public AdjacencyList getAdjacencyListY() { + return adjY; + } + + @Override + public AdjacencyList getAdjacencyListZ() { + return adjZ; + } + + @Override + public TripleComponentOrder getOrder() { + return order; + } + + @Override + public void close() throws IOException { + Closer.closeAll(bitY, bitZ, seqY, seqZ); + } +} diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIterator.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIterator.java index 57ba0a4a..3118c149 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIterator.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIterator.java @@ -30,7 +30,7 @@ */ public class BitmapTriplesIterator implements SuppliableIteratorTripleID { - protected final BitmapTriples triples; + protected final BitmapTriplesIndex idx; protected final TripleID pattern, returnTriple; protected long lastPosition; protected long patX, patY, patZ; @@ -40,8 +40,8 @@ public class BitmapTriplesIterator implements SuppliableIteratorTripleID { protected long nextY, nextZ; protected long x, y, z; - protected BitmapTriplesIterator(BitmapTriples triples, TripleID pattern, boolean search) { - this.triples = triples; + protected BitmapTriplesIterator(BitmapTriplesIndex idx, TripleID pattern, boolean search) { + this.idx = idx; this.returnTriple = new TripleID(); this.pattern = new TripleID(); if (search) { @@ -49,16 +49,16 @@ protected BitmapTriplesIterator(BitmapTriples triples, TripleID pattern, boolean } } - public BitmapTriplesIterator(BitmapTriples triples, TripleID pattern) { - this(triples, pattern, true); + public BitmapTriplesIterator(BitmapTriplesIndex idx, TripleID pattern) { + this(idx, pattern, true); } - public BitmapTriplesIterator(BitmapTriples triples, long minZ, long maxZ) { - this.triples = triples; + public BitmapTriplesIterator(BitmapTriplesIndex idx, long minZ, long maxZ) { + this.idx = idx; this.returnTriple = new TripleID(); this.pattern = new TripleID(); - adjY = triples.adjY; - adjZ = triples.adjZ; + adjY = idx.getAdjacencyListY(); + adjZ = idx.getAdjacencyListZ(); this.minZ = minZ; this.maxZ = maxZ; @@ -70,13 +70,13 @@ public BitmapTriplesIterator(BitmapTriples triples, long minZ, long maxZ) { public void newSearch(TripleID pattern) { this.pattern.assign(pattern); - TripleOrderConvert.swapComponentOrder(this.pattern, TripleComponentOrder.SPO, triples.order); + TripleOrderConvert.swapComponentOrder(this.pattern, TripleComponentOrder.SPO, idx.getOrder()); patX = this.pattern.getSubject(); patY = this.pattern.getPredicate(); patZ = this.pattern.getObject(); - adjY = triples.adjY; - adjZ = triples.adjZ; + adjY = idx.getAdjacencyListY(); + adjZ = idx.getAdjacencyListZ(); // ((BitSequence375)triples.bitmapZ).dump(); @@ -87,7 +87,7 @@ public void newSearch(TripleID pattern) { protected void updateOutput() { lastPosition = posZ; returnTriple.setAll(x, y, z); - TripleOrderConvert.swapComponentOrder(returnTriple, triples.order, TripleComponentOrder.SPO); + TripleOrderConvert.swapComponentOrder(returnTriple, idx.getOrder(), TripleComponentOrder.SPO); } private void findRange() { @@ -277,7 +277,7 @@ public void goTo(long pos) { */ @Override public TripleComponentOrder getOrder() { - return triples.order; + return idx.getOrder(); } /* diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorYFOQ.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorYFOQ.java index b762ab86..72638957 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorYFOQ.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorYFOQ.java @@ -208,7 +208,7 @@ public void goTo(long pos) { */ @Override public TripleComponentOrder getOrder() { - return triples.order; + return TripleComponentOrder.Unknown; // triples.order; } /* diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorZFOQ.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorZFOQ.java index 1d18bcfe..51d74f53 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorZFOQ.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorZFOQ.java @@ -240,7 +240,7 @@ public void goTo(long pos) { */ @Override public TripleComponentOrder getOrder() { - return triples.order; + return TripleComponentOrder.Unknown;// triples.order; } /* diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/DiskTriplesReorderSorter.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/DiskTriplesReorderSorter.java new file mode 100644 index 00000000..14136b5a --- /dev/null +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/DiskTriplesReorderSorter.java @@ -0,0 +1,156 @@ +package com.the_qa_company.qendpoint.core.triples.impl; + +import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder; +import com.the_qa_company.qendpoint.core.iterator.utils.AsyncIteratorFetcher; +import com.the_qa_company.qendpoint.core.iterator.utils.ExceptionIterator; +import com.the_qa_company.qendpoint.core.iterator.utils.SizeFetcher; +import com.the_qa_company.qendpoint.core.listener.MultiThreadListener; +import com.the_qa_company.qendpoint.core.triples.TripleID; +import com.the_qa_company.qendpoint.core.util.ParallelSortableArrayList; +import com.the_qa_company.qendpoint.core.util.concurrent.KWayMerger; +import com.the_qa_company.qendpoint.core.util.io.CloseSuppressPath; +import com.the_qa_company.qendpoint.core.util.io.IOUtil; +import com.the_qa_company.qendpoint.core.util.io.compress.CompressTripleMergeIterator; +import com.the_qa_company.qendpoint.core.util.io.compress.CompressTripleReader; +import com.the_qa_company.qendpoint.core.util.io.compress.CompressTripleWriter; +import com.the_qa_company.qendpoint.core.util.listener.IntermediateListener; + +import java.io.IOException; +import java.util.List; +import java.util.Optional; +import java.util.concurrent.atomic.AtomicLong; +import java.util.function.Supplier; + +public class DiskTriplesReorderSorter implements KWayMerger.KWayMergerImpl> { + private final CloseSuppressPath baseFileName; + private final AsyncIteratorFetcher source; + private final MultiThreadListener listener; + private final int bufferSize; + private final long chunkSize; + private final int k; + private final TripleComponentOrder oldOrder; + private final TripleComponentOrder newOrder; + private final AtomicLong read = new AtomicLong(); + + public DiskTriplesReorderSorter(CloseSuppressPath baseFileName, AsyncIteratorFetcher source, + MultiThreadListener listener, int bufferSize, long chunkSize, int k, TripleComponentOrder oldOrder, + TripleComponentOrder newOrder) { + this.source = source; + this.listener = MultiThreadListener.ofNullable(listener); + this.baseFileName = baseFileName; + this.bufferSize = bufferSize; + this.chunkSize = chunkSize; + this.k = k; + this.oldOrder = oldOrder; + this.newOrder = newOrder; + } + + @Override + public void createChunk(SizeFetcher flux, CloseSuppressPath output) + throws KWayMerger.KWayMergerException { + ParallelSortableArrayList pairs = new ParallelSortableArrayList<>(TripleID[].class); + + TripleID tid; + // loading the pairs + listener.notifyProgress(10, "reading triple part 0"); + while ((tid = flux.get()) != null) { + TripleOrderConvert.swapComponentOrder(tid, oldOrder, newOrder); + pairs.add(tid); + long r = read.incrementAndGet(); + if (r % 1_000_000 == 0) { + listener.notifyProgress(10, "reading triple part " + r); + } + } + + // sort the pairs + pairs.parallelSort(TripleID::compareTo); + + // write the result on disk + int count = 0; + int block = pairs.size() < 10 ? 1 : pairs.size() / 10; + IntermediateListener il = new IntermediateListener(listener); + il.setRange(70, 100); + il.notifyProgress(0, "creating file"); + try (CompressTripleWriter w = new CompressTripleWriter(output.openOutputStream(bufferSize), false)) { + // encode the size of the chunk + for (int i = 0; i < pairs.size(); i++) { + w.appendTriple(pairs.get(i)); + if (i % block == 0) { + il.notifyProgress(i / (block / 10f), "writing triples " + count + "/" + pairs.size()); + } + } + listener.notifyProgress(100, "writing completed " + pairs.size() + " " + output.getFileName()); + } catch (IOException e) { + throw new KWayMerger.KWayMergerException("Can't write chunk", e); + } + } + + @Override + public void mergeChunks(List inputs, CloseSuppressPath output) + throws KWayMerger.KWayMergerException { + try { + listener.notifyProgress(0, "merging triples " + output.getFileName()); + CompressTripleReader[] readers = new CompressTripleReader[inputs.size()]; + long count = 0; + try { + for (int i = 0; i < inputs.size(); i++) { + readers[i] = new CompressTripleReader(inputs.get(i).openInputStream(bufferSize)); + } + + // use spo because we are writing xyz + ExceptionIterator it = CompressTripleMergeIterator.buildOfTree(readers, + TripleComponentOrder.SPO); + // at least one + long rSize = it.getSize(); + long size = Math.max(rSize, 1); + long block = size < 10 ? 1 : size / 10; + try (CompressTripleWriter w = new CompressTripleWriter(output.openOutputStream(bufferSize), false)) { + while (it.hasNext()) { + w.appendTriple(it.next()); + if (count % block == 0) { + listener.notifyProgress(count / (block / 10f), "merging triples " + count + "/" + size); + } + count++; + } + } + } finally { + IOUtil.closeAll(readers); + } + listener.notifyProgress(100, "triples merged " + output.getFileName() + " " + count); + // delete old pairs + IOUtil.closeAll(inputs); + } catch (IOException e) { + throw new KWayMerger.KWayMergerException(e); + } + } + + @Override + public SizeFetcher newStopFlux(Supplier flux) { + return SizeFetcher.of(flux, p -> 3 * Long.BYTES, chunkSize); + } + + public ExceptionIterator sort(int workers) + throws InterruptedException, IOException, KWayMerger.KWayMergerException { + listener.notifyProgress(0, "Triple sort asked in " + baseFileName.toAbsolutePath()); + // force to create the first file + KWayMerger> merger = new KWayMerger<>(baseFileName, source, this, + Math.max(1, workers - 1), k); + merger.start(); + // wait for the workers to merge the sections and create the triples + Optional sections = merger.waitResult(); + if (sections.isEmpty()) { + return ExceptionIterator.empty(); + } + CloseSuppressPath path = sections.get(); + return new CompressTripleReader(path.openInputStream(bufferSize)) { + @Override + public void close() throws IOException { + try { + super.close(); + } finally { + IOUtil.closeObject(path); + } + } + }; + } +} diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/OneReadTempTriples.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/OneReadTempTriples.java index 99000ed3..327f9e6a 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/OneReadTempTriples.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/OneReadTempTriples.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.nio.file.Path; import java.util.Iterator; /** @@ -114,6 +115,11 @@ public SuppliableIteratorTripleID search(TripleID pattern) { throw new NotImplementedException(); } + @Override + public SuppliableIteratorTripleID search(TripleID pattern, int searchMask) { + throw new NotImplementedException(); + } + @Override public void load(InputStream input, ControlInfo ci, ProgressListener listener) throws IOException { throw new NotImplementedException(); @@ -139,6 +145,11 @@ public void mapIndex(CountInputStream input, File f, ControlInfo ci, ProgressLis throw new NotImplementedException(); } + @Override + public void mapGenOtherIndexes(Path file, HDTOptions spec, ProgressListener listener) { + throw new NotImplementedException(); + } + @Override public void saveIndex(OutputStream output, ControlInfo ci, ProgressListener listener) { throw new NotImplementedException(); @@ -165,6 +176,11 @@ public IteratorTripleID searchAll() { return new NoDuplicateTripleIDIterator(iterator); } + @Override + public IteratorTripleID searchAll(int searchMask) { + return new NoDuplicateTripleIDIterator(iterator); + } + @Override public long getNumberOfElements() { return iterator.estimatedNumResults(); diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TriplesList.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TriplesList.java index e3aafdc5..9f77fb77 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TriplesList.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TriplesList.java @@ -47,6 +47,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.nio.file.Path; import java.util.ArrayList; /** @@ -118,12 +119,22 @@ public SuppliableIteratorTripleID search(TripleID pattern) { } } + @Override + public SuppliableIteratorTripleID search(TripleID pattern, int searchMask) { + return search(pattern); + } + /* * (non-Javadoc) * @see hdt.triples.Triples#searchAll() */ @Override public IteratorTripleID searchAll() { + return searchAll(TripleComponentOrder.ALL_MASK); + } + + @Override + public IteratorTripleID searchAll(int searchMask) { TripleID all = new TripleID(0, 0, 0); return this.search(all); } @@ -530,6 +541,11 @@ public void mapFromFile(CountInputStream in, File f, ProgressListener listener) public void mapIndex(CountInputStream input, File f, ControlInfo ci, ProgressListener listener) { } + @Override + public void mapGenOtherIndexes(Path file, HDTOptions spec, ProgressListener listener) { + throw new NotImplementedException(); + } + @Override public void replaceAllIds(DictionaryIDMapping mapSubj, DictionaryIDMapping mapPred, DictionaryIDMapping mapObj, DictionaryIDMapping mapGraph) { diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TriplesListLong.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TriplesListLong.java index f2fbce58..3897a972 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TriplesListLong.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TriplesListLong.java @@ -24,6 +24,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.nio.file.Path; import java.util.ArrayList; import com.the_qa_company.qendpoint.core.dictionary.Dictionary; @@ -114,12 +115,22 @@ public SuppliableIteratorTripleID search(TripleID pattern) { } } + @Override + public SuppliableIteratorTripleID search(TripleID pattern, int searchMask) { + return search(pattern); + } + /* * (non-Javadoc) * @see hdt.triples.Triples#searchAll() */ @Override public IteratorTripleID searchAll() { + return searchAll(TripleComponentOrder.ALL_MASK); + } + + @Override + public IteratorTripleID searchAll(int searchMask) { TripleID all = new TripleID(0, 0, 0); return this.search(all); } @@ -522,6 +533,10 @@ public void mapFromFile(CountInputStream in, File f, ProgressListener listener) public void mapIndex(CountInputStream input, File f, ControlInfo ci, ProgressListener listener) { } + @Override + public void mapGenOtherIndexes(Path file, HDTOptions spec, ProgressListener listener) { + } + @Override public void replaceAllIds(DictionaryIDMapping mapSubj, DictionaryIDMapping mapPred, DictionaryIDMapping mapObj) { sorted = false; diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/WriteBitmapTriples.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/WriteBitmapTriples.java index 0594b703..03f31473 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/WriteBitmapTriples.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/WriteBitmapTriples.java @@ -32,6 +32,7 @@ import java.io.InputStream; import java.io.OutputStream; import java.nio.file.Files; +import java.nio.file.Path; /** * Appendable write {@link BitmapTriples} version @@ -100,11 +101,21 @@ public IteratorTripleID searchAll() { throw new NotImplementedException(); } + @Override + public IteratorTripleID searchAll(int searchMask) { + throw new NotImplementedException(); + } + @Override public SuppliableIteratorTripleID search(TripleID pattern) { throw new NotImplementedException(); } + @Override + public SuppliableIteratorTripleID search(TripleID pattern, int searchMask) { + throw new NotImplementedException(); + } + @Override public long getNumberOfElements() { return numTriples; @@ -171,6 +182,11 @@ public void mapIndex(CountInputStream input, File f, ControlInfo ci, ProgressLis throw new NotImplementedException(); } + @Override + public void mapGenOtherIndexes(Path file, HDTOptions spec, ProgressListener listener) { + throw new NotImplementedException(); + } + @Override public void saveIndex(OutputStream output, ControlInfo ci, ProgressListener listener) { throw new NotImplementedException(); diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/IOUtil.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/IOUtil.java index 497e69ff..29112c49 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/IOUtil.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/IOUtil.java @@ -544,6 +544,28 @@ public static long readLong(InputStream input) throws IOException { + ((readBuffer[0] & 255)); } + /** + * Read long, big endian. + * + * @param input is + * @throws IOException io exception + */ + public static long readLongBigEndian(InputStream input) throws IOException { + int n = 0; + byte[] readBuffer = new byte[8]; + while (n < 8) { + int count = input.read(readBuffer, n, 8 - n); + if (count < 0) + throw new EOFException(); + n += count; + } + + return ((long) readBuffer[0] << 56) + ((long) (readBuffer[1] & 255) << 48) + + ((long) (readBuffer[2] & 255) << 40) + ((long) (readBuffer[3] & 255) << 32) + + ((long) (readBuffer[4] & 255) << 24) + ((readBuffer[5] & 255) << 16) + ((readBuffer[6] & 255) << 8) + + ((readBuffer[7] & 255)); + } + public static long readLong(long location, FileChannel channel) throws IOException { try (CloseMappedByteBuffer buffer = new CloseMappedByteBuffer("readLong", channel.map(FileChannel.MapMode.READ_ONLY, location, 8), false)) { diff --git a/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesOrderTest.java b/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesOrderTest.java new file mode 100644 index 00000000..821dba3c --- /dev/null +++ b/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesOrderTest.java @@ -0,0 +1,99 @@ +package com.the_qa_company.qendpoint.core.triples.impl; + +import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder; +import com.the_qa_company.qendpoint.core.exceptions.ParserException; +import com.the_qa_company.qendpoint.core.hdt.HDT; +import com.the_qa_company.qendpoint.core.hdt.HDTManager; +import com.the_qa_company.qendpoint.core.listener.ProgressListener; +import com.the_qa_company.qendpoint.core.options.HDTOptions; +import com.the_qa_company.qendpoint.core.options.HDTOptionsKeys; +import com.the_qa_company.qendpoint.core.triples.IteratorTripleID; +import com.the_qa_company.qendpoint.core.triples.TripleID; +import com.the_qa_company.qendpoint.core.util.LargeFakeDataSetStreamSupplier; +import org.apache.commons.io.file.PathUtils; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; +import java.util.stream.Collectors; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +public class BitmapTriplesOrderTest { + @Rule + public TemporaryFolder tempDir = TemporaryFolder.builder().assureDeletion().build(); + + @Test + public void orderTest() throws IOException, ParserException { + Path root = tempDir.newFolder().toPath(); + + try { + LargeFakeDataSetStreamSupplier supplier = LargeFakeDataSetStreamSupplier + .createSupplierWithMaxTriples(10_000, 52).withMaxElementSplit(50).withMaxLiteralSize(20); + + HDTOptions spec = HDTOptions.of(HDTOptionsKeys.BITMAPTRIPLES_INDEX_NO_FOQ, true, + HDTOptionsKeys.BITMAPTRIPLES_INDEX_OTHERS, Arrays.stream(TripleComponentOrder.values()) + .map(Object::toString).collect(Collectors.joining(","))); + + Path hdtFile = root.resolve("file.hdt"); + supplier.createAndSaveFakeHDT(spec, hdtFile); + + try (HDT hdt = HDTManager.mapIndexedHDT(hdtFile, spec, ProgressListener.ignore())) { + + // check index creations + for (TripleComponentOrder order : TripleComponentOrder.values()) { + if (order == TripleComponentOrder.Unknown || order == TripleComponentOrder.SPO) { + // default or unknown + continue; + } + + Path path = BitmapTriplesIndexFile.getIndexPath(hdtFile, order); + assertTrue(path + " doesn't exist! order " + order, Files.exists(path)); + } + + // all triples available? + Set dso = new HashSet<>(); + + IteratorTripleID it = hdt.getTriples().searchAll(TripleComponentOrder.SPO.mask); + + assertEquals(TripleComponentOrder.SPO, it.getOrder()); + while (it.hasNext()) { + TripleID tid = it.next().clone(); + if (!dso.add(tid)) { + fail("tid " + tid + " was read twice, dso: " + dso); + } + } + + assertEquals(hdt.getTriples().getNumberOfElements(), dso.size()); + for (TripleComponentOrder order : TripleComponentOrder.values()) { + if (order == TripleComponentOrder.Unknown) { + continue; + } + Set ds = new HashSet<>(dso); + + IteratorTripleID it2 = hdt.getTriples().searchAll(order.mask); + assertEquals(order, it2.getOrder()); + while (it2.hasNext()) { + TripleID tid = it2.next().clone(); + if (!ds.remove(tid)) { + fail("tid " + tid + " can't be find, previously here: " + dso.contains(tid)); + } + } + + assertTrue("ds not empty, " + ds.size() + " elem remaining", ds.isEmpty()); + } + } + + } finally { + PathUtils.deleteDirectory(root); + } + } +} diff --git a/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesTest.java b/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesTest.java index ccecea79..b8438f36 100644 --- a/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesTest.java +++ b/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesTest.java @@ -24,9 +24,12 @@ import java.io.ByteArrayOutputStream; import java.io.IOException; +import java.nio.channels.FileChannel; import java.nio.file.Files; import java.nio.file.Path; +import java.nio.file.StandardOpenOption; import java.util.Collection; +import java.util.UUID; import java.util.stream.Stream; import static org.junit.Assert.*; @@ -298,6 +301,16 @@ public void memBitmapLoadIndexedTest() throws IOException, ParserException { @Ignore("Hand tests") public static class HandTest extends AbstractTest { + @Test + public void readBadChannelTest() throws IOException { + // java.nio.file.NoSuchFileException + try (FileChannel channel = FileChannel.open(Path.of(UUID.randomUUID().toString()), + StandardOpenOption.READ)) { + System.out.println(channel.isOpen()); + } + + } + @Test public void largeTest() throws IOException { /* diff --git a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/model/HDTValue.java b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/model/HDTValue.java index a469f578..7f11507f 100644 --- a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/model/HDTValue.java +++ b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/model/HDTValue.java @@ -6,6 +6,16 @@ * @author Antoine Willerval */ public interface HDTValue { + static int compare(HDTValue v1, HDTValue v2) { + int c = Integer.compare(v1.getHDTPosition(), v2.getHDTPosition()); + + if (c != 0) { + return c; + } + + return Long.compare(v1.getHDTPosition(), v2.getHDTPosition()); + } + /** * @return is a delegate value, should return the exact hashcode when asked * if true @@ -19,4 +29,21 @@ public interface HDTValue { * @param delegate boolean */ void setDelegate(boolean delegate); + + /** + * @return the id inside the hdt section, 0 or negative for invalid ids + */ + long getHDTId(); + + /** + * @return the section id of the hdt value + */ + int getHDTPosition(); + + /** + * @return if the HDT id is valid + */ + default boolean isValidHDTId() { + return getHDTId() > 0; + } } diff --git a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/model/SimpleBNodeHDT.java b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/model/SimpleBNodeHDT.java index 3b5d2e3f..35bd2b74 100644 --- a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/model/SimpleBNodeHDT.java +++ b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/model/SimpleBNodeHDT.java @@ -46,8 +46,8 @@ public boolean equals(Object o) { return false; if (this == o) { return true; - } else if (o instanceof SimpleBNodeHDT && this.id != -1 && ((SimpleBNodeHDT) o).getHdtId() != -1) { - return this.id == (((SimpleBNodeHDT) o).getHdtId()); + } else if (o instanceof HDTValue hv && this.id != -1 && hv.getHDTId() != -1) { + return this.id == hv.getHDTId(); } else { // could not compare IDs, we have to compare to string if (!(o instanceof BNode)) { return false; @@ -80,10 +80,16 @@ else if (this.position == SimpleIRIHDT.OBJECT_POS) } } - public long getHdtId() { + @Override + public long getHDTId() { return id; } + @Override + public int getHDTPosition() { + return position; + } + @Override public void setDelegate(boolean delegate) { this.delegate = delegate; diff --git a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/model/SimpleIRIHDT.java b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/model/SimpleIRIHDT.java index f5d1a95f..4be71659 100644 --- a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/model/SimpleIRIHDT.java +++ b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/model/SimpleIRIHDT.java @@ -57,6 +57,16 @@ public SimpleIRIHDT(HDT hdt, String iriString) { this.localNameIdx = -1; } + @Override + public long getHDTId() { + return id; + } + + @Override + public int getHDTPosition() { + return postion; + } + public long getId() { return id; } diff --git a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/model/SimpleLiteralHDT.java b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/model/SimpleLiteralHDT.java index 241ec7c7..54056c06 100644 --- a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/model/SimpleLiteralHDT.java +++ b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/model/SimpleLiteralHDT.java @@ -232,8 +232,8 @@ public boolean equals(Object o) { return true; } - if (o instanceof SimpleLiteralHDT) { - return ((SimpleLiteralHDT) o).getHdtID() == getHdtID(); + if (o instanceof HDTValue hv) { + return hv.getHDTId() == getHDTId(); } else if (o instanceof Literal other) { // Compare datatypes if (!getDatatype().equals(other.getDatatype())) { @@ -281,10 +281,16 @@ public String toString() { }); } - public long getHdtID() { + @Override + public long getHDTId() { return hdtID; } + @Override + public int getHDTPosition() { + return SimpleIRIHDT.OBJECT_POS; // a literal is only an object + } + @Override public String stringValue() { return getLabel(); diff --git a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/store/EndpointStore.java b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/store/EndpointStore.java index 400e2aee..7316d4c5 100644 --- a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/store/EndpointStore.java +++ b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/store/EndpointStore.java @@ -14,8 +14,7 @@ import com.the_qa_company.qendpoint.core.util.StopWatch; import com.the_qa_company.qendpoint.core.util.io.Closer; import com.the_qa_company.qendpoint.model.EndpointStoreValueFactory; -import com.the_qa_company.qendpoint.model.SimpleBNodeHDT; -import com.the_qa_company.qendpoint.model.SimpleIRIHDT; +import com.the_qa_company.qendpoint.model.HDTValue; import com.the_qa_company.qendpoint.utils.BitArrayDisk; import com.the_qa_company.qendpoint.utils.CloseSafeHDT; import com.the_qa_company.qendpoint.utils.OverrideHDTOptions; @@ -803,24 +802,20 @@ public void modifyBitmaps(Resource subject, IRI predicate, Value object) { // mark in HDT the store the subject, predicate, objects that are used // in rdf4j long subjectID; - if (subject instanceof SimpleIRIHDT iriHDT) { - subjectID = iriHDT.getId(); - } else if (subject instanceof SimpleBNodeHDT bNodeHDT) { - subjectID = bNodeHDT.getHdtId(); + if (subject instanceof HDTValue hv) { + subjectID = hv.getHDTId(); } else { subjectID = -1; } long predicateID; - if (predicate instanceof SimpleIRIHDT iriHDT) { - predicateID = iriHDT.getId(); + if (predicate instanceof HDTValue hv) { + predicateID = hv.getHDTId(); } else { predicateID = -1; } long objectID; - if (object instanceof SimpleIRIHDT iriHDT) { - objectID = iriHDT.getId(); - } else if (object instanceof SimpleBNodeHDT bNodeHDT) { - objectID = bNodeHDT.getHdtId(); + if (object instanceof HDTValue hv) { + objectID = hv.getHDTId(); } else { objectID = -1; } diff --git a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/store/HDTConverter.java b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/store/HDTConverter.java index 537efe30..45596a81 100644 --- a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/store/HDTConverter.java +++ b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/store/HDTConverter.java @@ -112,7 +112,7 @@ public long objectToID(Value obj) { } return hdt.getDictionary().stringToId(translate, TripleComponentRole.OBJECT); } else if (obj instanceof SimpleLiteralHDT hdtObj) { - return hdtObj.getHdtID(); + return hdtObj.getHDTId(); } else { if (QueryEvaluationUtil.isSimpleLiteral(obj)) { return this.hdt.getDictionary().stringToId('"' + obj.stringValue() + '"', diff --git a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/tools/QEPSearch.java b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/tools/QEPSearch.java index 1b2e84be..f1018664 100644 --- a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/tools/QEPSearch.java +++ b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/tools/QEPSearch.java @@ -877,11 +877,11 @@ private String prettyComponent(Object object) { } if (object instanceof SimpleLiteralHDT hdtLit) { - return out + colorTool.yellow() + " (" + hdtLit.getHdtID() + ")"; + return out + colorTool.yellow() + " (" + hdtLit.getHDTId() + ")"; } else if (object instanceof SimpleIRIHDT hdtIri) { return out + colorTool.yellow() + " (" + hdtIri.getId() + ")"; } else if (object instanceof SimpleBNodeHDT hdtBN) { - return out + colorTool.yellow() + " (" + hdtBN.getHdtId() + ")"; + return out + colorTool.yellow() + " (" + hdtBN.getHDTId() + ")"; } else { return out + colorTool.yellow(); } diff --git a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/utils/BitArrayDisk.java b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/utils/BitArrayDisk.java index 292b0045..e17e9352 100644 --- a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/utils/BitArrayDisk.java +++ b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/utils/BitArrayDisk.java @@ -6,11 +6,14 @@ import com.the_qa_company.qendpoint.core.listener.ProgressListener; import com.the_qa_company.qendpoint.core.util.io.IOUtil; +import java.io.BufferedInputStream; import java.io.Closeable; +import java.io.EOFException; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.nio.file.Files; import java.util.Arrays; /** @@ -106,6 +109,7 @@ public void changeToInDisk(File file) throws IOException { inMemory = false; this.output = new NioFile(file); writeBits(); + for (int offset = 0; offset < words.length; offset++) { output.writeLong(words[offset], 8L * (offset + 1)); } @@ -119,8 +123,7 @@ public void changeToInDisk(File file) throws IOException { */ private void writeBits() throws IOException { // write the length of the array in the beginning - int nwords = (int) numWords(allBits); - this.output.writeLong(nwords, 0); + this.output.writeLong((int) numWords(allBits), 0); } private void initWordsArray(long nbits) throws IOException { @@ -137,12 +140,18 @@ private void initWordsArray(long nbits) throws IOException { int lastNonZero = -1; // read previous values - for (int i = 0; i < this.words.length; i++) { - long v = this.output.readLong((i + 1) * 8L); - if (v != 0) { - this.words[i] = v; - lastNonZero = i; + try (BufferedInputStream is = new BufferedInputStream( + Files.newInputStream(this.output.getFile().toPath()))) { + // skip header + is.skipNBytes(8); + for (int i = 0; i < this.words.length; i++) { + long v = IOUtil.readLongBigEndian(is); + if (v != 0) { + this.words[i] = v; + lastNonZero = i; + } } + } catch (EOFException ignore) { } // recompute numbits if we have at least one bit if (lastNonZero != -1) diff --git a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/utils/CloseSafeHDT.java b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/utils/CloseSafeHDT.java index 46565f1f..9a16c33e 100644 --- a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/utils/CloseSafeHDT.java +++ b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/utils/CloseSafeHDT.java @@ -89,4 +89,22 @@ public IteratorTripleString search(CharSequence subject, CharSequence predicate, throws NotFoundException { return hdt.search(subject, predicate, object); } + + @Override + public IteratorTripleString search(CharSequence subject, CharSequence predicate, CharSequence object, + CharSequence graph) throws NotFoundException { + return hdt.search(subject, predicate, object, graph); + } + + @Override + public IteratorTripleString search(CharSequence subject, CharSequence predicate, CharSequence object, + int searchOrderMask) throws NotFoundException { + return hdt.search(subject, predicate, object, searchOrderMask); + } + + @Override + public IteratorTripleString search(CharSequence subject, CharSequence predicate, CharSequence object, + CharSequence graph, int searchOrderMask) throws NotFoundException { + return hdt.search(subject, predicate, object, graph, searchOrderMask); + } } diff --git a/qendpoint-store/src/test/java/com/the_qa_company/qendpoint/store/EndpointStoreTest.java b/qendpoint-store/src/test/java/com/the_qa_company/qendpoint/store/EndpointStoreTest.java index 941f1481..c7de1f46 100644 --- a/qendpoint-store/src/test/java/com/the_qa_company/qendpoint/store/EndpointStoreTest.java +++ b/qendpoint-store/src/test/java/com/the_qa_company/qendpoint/store/EndpointStoreTest.java @@ -795,7 +795,7 @@ public void bnodeTest() throws ParserException, IOException { Resource bnode = converter.idToSubjectHDTResource(1L); Assert.assertTrue(bnode instanceof BNode); Assert.assertTrue(bnode instanceof SimpleBNodeHDT); - Assert.assertEquals(1L, ((SimpleBNodeHDT) bnode).getHdtId()); + Assert.assertEquals(1L, ((SimpleBNodeHDT) bnode).getHDTId()); Assert.assertEquals("aaaa", ((BNode) bnode).getID()); Assert.assertEquals("_:aaaa", bnode.toString()); try (SailRepositoryConnection connection = repo.getConnection()) {