diff --git a/qendpoint-backend/src/main/java/com/the_qa_company/qendpoint/controller/Sparql.java b/qendpoint-backend/src/main/java/com/the_qa_company/qendpoint/controller/Sparql.java index 4bf3817b4..7b25ce0ca 100644 --- a/qendpoint-backend/src/main/java/com/the_qa_company/qendpoint/controller/Sparql.java +++ b/qendpoint-backend/src/main/java/com/the_qa_company/qendpoint/controller/Sparql.java @@ -19,7 +19,6 @@ import org.eclipse.rdf4j.rio.Rio; import org.eclipse.rdf4j.sail.NotifyingSail; import com.the_qa_company.qendpoint.core.enums.CompressionType; -import com.the_qa_company.qendpoint.core.enums.RDFNotation; import com.the_qa_company.qendpoint.core.exceptions.ParserException; import com.the_qa_company.qendpoint.core.hdt.HDT; import com.the_qa_company.qendpoint.core.hdt.HDTManager; @@ -43,7 +42,6 @@ import java.io.InputStream; import java.io.OutputStream; import java.io.PrintStream; -import java.io.PrintWriter; import java.net.URI; import java.nio.file.Files; import java.nio.file.Path; @@ -463,8 +461,9 @@ public LoadFileResult loadFile(InputStream input, String filename) throws IOExce if (sparqlRepository.getOptions().getStorageMode().equals(SailCompilerSchema.ENDPOINTSTORE_STORAGE)) { shutdown(); - RDFFormat format = Rio.getParserFormatForFileName(filename) - .orElseThrow(() -> new ServerWebInputException("file format not supported " + filename)); + RDFFormat format = filename.toLowerCase().endsWith(".hdt") ? RDFFormat.HDT + : Rio.getParserFormatForFileName(filename).orElseThrow( + () -> new ServerWebInputException("file format not supported " + filename)); EndpointStore endpoint = (EndpointStore) compiledSail.getSource(); EndpointFiles files = endpoint.getEndpointFiles(); @@ -524,7 +523,7 @@ public LoadFileResult loadFile(InputStream input, String filename) throws IOExce } else { shutdown(); initializeEndpointStore(false); - sendUpdates(input, baseURI, filename); + sendUpdates(input, filename); } try { sparqlRepository.reindexLuceneSails(); @@ -575,7 +574,7 @@ public Map getPrefixes() { return prefixes; } - private void sendUpdates(InputStream inputStream, String baseURI, String filename) throws IOException { + private void sendUpdates(InputStream inputStream, String filename) throws IOException { StopWatch timeWatch = new StopWatch(); // uncompress the file if required @@ -613,43 +612,6 @@ private void sendUpdates(InputStream inputStream, String baseURI, String filenam logger.info("NT file loaded in {}", timeWatch.stopAndShow()); } - private void generateHDT(Iterator it, String baseURI, HDTOptions spec, String hdtOutput) - throws IOException { - if (sparqlRepository.getOptions().getPassMode().equals(SailCompilerSchema.HDT_TWO_PASS_MODE)) { - // dump the file to the disk to allow 2 passes - Path tempNTFile = Paths.get(hdtOutput + "-tmp.nt"); - logger.info("Create TEMP NT file '{}'", tempNTFile); - try { - try (PrintWriter stream = new PrintWriter(tempNTFile.toFile())) { - while (it.hasNext()) { - TripleString ts = it.next(); - ts.dumpNtriple(stream); - } - } - logger.info("NT file created, generating HDT..."); - try { - HDT hdtDump = HDTManager.generateHDT(tempNTFile.toFile().getAbsolutePath(), baseURI, - RDFNotation.NTRIPLES, spec, null); - hdtDump.saveToHDT(hdtOutput, null); - hdtDump.close(); - } catch (ParserException e) { - throw new IOException("Can't generate HDT", e); - } - } finally { - Files.deleteIfExists(tempNTFile); - } - } else { - // directly use the TripleString stream to generate the HDT - try { - HDT hdtDump = HDTManager.generateHDT(it, baseURI, spec, null); - hdtDump.saveToHDT(hdtOutput, null); - hdtDump.close(); - } catch (ParserException e) { - throw new IOException("Can't generate HDT", e); - } - } - } - public int getPort() { return port; } diff --git a/qendpoint-backend/src/test/java/com/the_qa_company/qendpoint/controller/FileUploadTest.java b/qendpoint-backend/src/test/java/com/the_qa_company/qendpoint/controller/FileUploadTest.java index 4946e4abe..4f381f56d 100644 --- a/qendpoint-backend/src/test/java/com/the_qa_company/qendpoint/controller/FileUploadTest.java +++ b/qendpoint-backend/src/test/java/com/the_qa_company/qendpoint/controller/FileUploadTest.java @@ -1,6 +1,12 @@ package com.the_qa_company.qendpoint.controller; import com.the_qa_company.qendpoint.Application; +import com.the_qa_company.qendpoint.core.enums.RDFNotation; +import com.the_qa_company.qendpoint.core.exceptions.ParserException; +import com.the_qa_company.qendpoint.core.hdt.HDT; +import com.the_qa_company.qendpoint.core.hdt.HDTManager; +import com.the_qa_company.qendpoint.core.listener.ProgressListener; +import com.the_qa_company.qendpoint.core.options.HDTOptions; import com.the_qa_company.qendpoint.store.EndpointStore; import com.the_qa_company.qendpoint.utils.LargeFakeDataSetStreamSupplier; import com.the_qa_company.qendpoint.utils.RDFStreamUtils; @@ -60,7 +66,9 @@ public class FileUploadTest { @Parameterized.Parameters(name = "{0}") public static Collection params() { - return new ArrayList<>(RDFParserRegistry.getInstance().getKeys()); + ArrayList list = new ArrayList<>(RDFParserRegistry.getInstance().getKeys()); + list.add(RDFFormat.HDT); + return list; } @Autowired @@ -69,7 +77,7 @@ public static Collection params() { private final String fileName; private final RDFFormat format; - public FileUploadTest(RDFFormat format) throws IOException { + public FileUploadTest(RDFFormat format) throws IOException, ParserException { this.format = format; RDFFormat originalFormat = Rio.getParserFormatForFileName(COKTAILS_NT).orElseThrow(); @@ -79,9 +87,16 @@ public FileUploadTest(RDFFormat format) throws IOException { Path RDFFile = testDir.resolve(COKTAILS_NT + "." + format.getDefaultFileExtension()); if (!Files.exists(RDFFile)) { try (OutputStream os = new FileOutputStream(RDFFile.toFile()); InputStream is = stream(COKTAILS_NT)) { - RDFWriter writer = Rio.createWriter(format, os); - parser.setRDFHandler(noBNode(writer)); - parser.parse(is); + if (format == RDFFormat.HDT) { + try (HDT hdt = HDTManager.generateHDT(is, "http://example.org/#", RDFNotation.TURTLE, + HDTOptions.empty(), ProgressListener.ignore())) { + hdt.saveToHDT(os); + } + } else { + RDFWriter writer = Rio.createWriter(format, os); + parser.setRDFHandler(noBNode(writer)); + parser.parse(is); + } } } @@ -127,18 +142,6 @@ private InputStream streamOut(String file) throws FileNotFoundException { return new FileInputStream(file); } - private long fileSize(String file) throws IOException { - InputStream testNt = streamOut(file); - byte[] buff = new byte[1024]; - - long r; - long size = 0; - while ((r = testNt.read(buff)) != -1) { - size += r; - } - return size; - } - private String clearSpaces(String text) { return text.matches("(\\s|[\\n\\r])*") ? "" : text; } @@ -222,6 +225,8 @@ public void loadTest() throws IOException { @Test @Ignore("large test") public void loadLargeTest() throws IOException { + if (format == RDFFormat.HDT) + return; long size = Sparql.getMaxChunkSize() * 10; LargeFakeDataSetStreamSupplier supplier = new LargeFakeDataSetStreamSupplier(size, 42); sparql.loadFile(supplier.createRDFStream(format), "fake." + format.getDefaultFileExtension()); diff --git a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/utils/RDFStreamUtils.java b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/utils/RDFStreamUtils.java index 663c8e168..c27b72a18 100644 --- a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/utils/RDFStreamUtils.java +++ b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/utils/RDFStreamUtils.java @@ -1,5 +1,7 @@ package com.the_qa_company.qendpoint.utils; +import com.the_qa_company.qendpoint.core.hdt.HDT; +import com.the_qa_company.qendpoint.core.hdt.HDTManager; import com.the_qa_company.qendpoint.core.triples.TripleString; import com.the_qa_company.qendpoint.core.util.LiteralsUtils; import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; @@ -9,6 +11,7 @@ import org.eclipse.rdf4j.model.Statement; import org.eclipse.rdf4j.model.Value; import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.model.impl.SimpleValueFactory; import org.eclipse.rdf4j.rio.RDFFormat; import org.eclipse.rdf4j.rio.RDFHandler; import org.eclipse.rdf4j.rio.RDFHandlerException; @@ -17,6 +20,9 @@ import java.io.IOException; import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardCopyOption; import java.util.Iterator; import java.util.function.Consumer; import java.util.zip.GZIPInputStream; @@ -61,6 +67,22 @@ public static InputStream uncompressedStream(InputStream stream, String filename */ public static void readRDFStream(InputStream stream, RDFFormat format, boolean keepBNode, Consumer statementConsumer) throws IOException { + if (format == RDFFormat.HDT) { + // write HDT into a temp file, map it and iterate over it + Path path = Files.createTempFile(RDFStreamUtils.class.getName(), ".hdt"); + try { + Files.copy(stream, path, StandardCopyOption.REPLACE_EXISTING); + try (HDT hdt = HDTManager.mapHDT(path)) { + for (TripleString ts : hdt) { + SimpleValueFactory vf = SimpleValueFactory.getInstance(); + statementConsumer.accept(convertStatement(vf, ts)); + } + } + } finally { + Files.deleteIfExists(path); + } + return; + } RDFParser parser = Rio.createParser(format); parser.setPreserveBNodeIDs(keepBNode); parser.setRDFHandler(new RDFHandler() {