Skip to content

Commit

Permalink
GH-499 Reload DL
Browse files Browse the repository at this point in the history
  • Loading branch information
ate47 committed Nov 12, 2024
1 parent 1259d23 commit b7914f6
Show file tree
Hide file tree
Showing 4 changed files with 245 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import com.the_qa_company.qendpoint.core.header.HeaderUtil;
import com.the_qa_company.qendpoint.core.listener.ProgressListener;
import com.the_qa_company.qendpoint.core.options.HDTOptions;
import com.the_qa_company.qendpoint.core.options.HDTOptionsFile;
import com.the_qa_company.qendpoint.core.options.HDTOptionsKeys;
import com.the_qa_company.qendpoint.core.options.HDTSpecification;
import com.the_qa_company.qendpoint.core.rdf.RDFFluxStop;
Expand Down Expand Up @@ -192,6 +193,10 @@ public HDTResult doGenerateHDT(String rdfFileName, String baseURI, RDFNotation r
long checksum = 0;
try {
if (spec.getBoolean(HDTOptionsKeys.LOADER_PREDOWNLOAD_URL) && IOUtil.isRemoteURL(rdfFileName)) {
spec = spec.pushTop();
spec.set(HDTOptionsKeys.LOADER_PREDOWNLOAD_URL, false); // disable
// for
// cattree
long retry = spec.getInt(HDTOptionsKeys.LOADER_PREDOWNLOAD_URL_RETRY, 1);

final String rdfFileName2 = rdfFileName;
Expand All @@ -202,14 +207,42 @@ public HDTResult doGenerateHDT(String rdfFileName, String baseURI, RDFNotation r
throw new RuntimeException(e);
}
});
Path dlFile = preDownload.resolveSibling(preDownload.getFileName() + ".download");
HDTOptionsFile opFile = new HDTOptionsFile(dlFile);

long tryCount = 1;
checksumPath = spec.getPath(HDTOptionsKeys.LOADER_PREDOWNLOAD_CHECKSUM_PATH);
while (true) {
listener.notifyProgress(0,
"predownload " + rdfFileName + " into " + preDownload + " try #" + tryCount);
InputStream readIs = IOUtil.getFileInputStream(rdfFileName, false);
try (InputStream is = checksumPath != null ? new CRCInputStream(readIs, new CRC32()) : readIs;
opFile.sync();
long trueSize = IOUtil.getContentLengthLong(rdfFileName);
long lastLength = opFile.getOptions().getInt("last-length", 0);
long preSize;

if (lastLength != trueSize) {
preSize = 0;
} else {
try {
preSize = Files.size(preDownload);
} catch (IOException ignore) {
preSize = 0;
}
}
opFile.getOptions().set("last-length", trueSize);
opFile.save();

listener.notifyProgress(0, "predownload " + rdfFileName + " into " + preDownload + " try #"
+ tryCount + " / preSize " + preSize);

IOUtil.HTTPData readIs = IOUtil.getFileInputStreamData(rdfFileName, false, preSize);
long trueCurrSize = readIs.conn().getContentLengthLong();
if (trueCurrSize > 0 && trueSize != trueCurrSize) {
logger.error("Find bad file sizes compared to the previous calls, {} != {}", trueCurrSize,
trueSize);
continue; // try again, can it create an infinite loop
// is the server is bad?
}
InputStream stream = readIs.is();
try (InputStream is = checksumPath != null ? new CRCInputStream(stream, new CRC32()) : stream;
OutputStream os = new BufferedOutputStream(Files.newOutputStream(preDownload))) {
IOUtil.copy(is, os, listener, 10_000_000);
if (is instanceof CRCInputStream crcIs) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
package com.the_qa_company.qendpoint.core.options;

import com.the_qa_company.qendpoint.core.compact.integer.VByte;
import com.the_qa_company.qendpoint.core.listener.ProgressListener;
import com.the_qa_company.qendpoint.core.util.crc.CRC32;
import com.the_qa_company.qendpoint.core.util.crc.CRCInputStream;
import com.the_qa_company.qendpoint.core.util.crc.CRCOutputStream;
import com.the_qa_company.qendpoint.core.util.io.IOUtil;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Objects;
import java.util.Set;

public class HDTOptionsFile {
public static final long MAGIC = 0x4f464e4c44544448L;
private HDTOptions options = HDTOptions.of();

private final Path location;

public HDTOptionsFile(Path location) {
this.location = location;
}

public HDTOptions getOptions() {
return options;
}

public void sync() throws IOException {
HDTOptions options = HDTOptions.of();

if (!Files.exists(location)) {
this.options = options;
return;
}
ProgressListener l = ProgressListener.ignore();
try (CRCInputStream is = new CRCInputStream(new BufferedInputStream(Files.newInputStream(location)),
new CRC32())) {
if (IOUtil.readLong(is) != MAGIC)
throw new IOException("Can't read HDTOptions file: Bad magic");

long count = VByte.decode(is);
for (long i = 0; i < count; i++) {
String key = IOUtil.readSizedString(is, l);
String val = IOUtil.readSizedString(is, l);
if (!val.isEmpty()) {
options.set(key, val);
}
}
if (!is.readCRCAndCheck()) {
throw new IOException("Can't read HDTOptions file: Bad CRC");
}
}
this.options = options;
}

public void save() throws IOException {
ProgressListener l = ProgressListener.ignore();
try (CRCOutputStream os = new CRCOutputStream(new BufferedOutputStream(Files.newOutputStream(location)),
new CRC32())) {
IOUtil.writeLong(os, MAGIC);
Set<?> keys = options.getKeys();
VByte.encode(os, keys.size());
for (Object k : keys) {
String key = String.valueOf(k);
String val = Objects.requireNonNull(options.get(key), "");
IOUtil.writeSizedString(os, key, l);
IOUtil.writeSizedString(os, val, l);
}
os.writeCRC();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.net.HttpURLConnection;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
Expand Down Expand Up @@ -370,8 +371,14 @@ public static InputStream getFileInputStream(String fileName) throws IOException
}

public static InputStream getFileInputStream(String fileName, boolean uncompress) throws IOException {
return getFileInputStream(fileName, uncompress, -1);
}

public static InputStream getFileInputStream(String fileName, boolean uncompress, long startLen)
throws IOException {
InputStream input;
String name = fileName.toLowerCase();
boolean skipHandled = startLen <= 0;
if (isRemoteURL(fileName)) {
URL url;
try {
Expand All @@ -380,13 +387,20 @@ public static InputStream getFileInputStream(String fileName, boolean uncompress
throw new IOException("Invalid URI", e);
}
URLConnection con = url.openConnection();
if (startLen > 0 && con instanceof HttpURLConnection http) {
http.setRequestProperty("Range", "bytes=" + startLen + "-");
skipHandled = true;
}
con.connect();
input = con.getInputStream();
} else if (name.equals("-")) {
input = new BufferedInputStream(System.in);
} else {
input = new BufferedInputStream(new FileInputStream(fileName));
}
if (!skipHandled) {
input.skipNBytes(startLen);
}

if (uncompress) {
if (name.endsWith(".gz") || name.endsWith(".tgz")) {
Expand All @@ -400,6 +414,71 @@ public static InputStream getFileInputStream(String fileName, boolean uncompress
return input;
}

public record HTTPData(InputStream is, HttpURLConnection conn) implements Closeable {
@Override
public void close() throws IOException {
is.close();
}
}

public static HTTPData getFileInputStreamData(String fileName, boolean uncompress, long startLen)
throws IOException {
InputStream input;
String name = fileName.toLowerCase();
if (!isRemoteURL(fileName)) {
throw new IOException("Not a http connection");
}
URL url;
try {
url = new URI(fileName).toURL();
} catch (URISyntaxException e) {
throw new IOException("Invalid URI", e);
}

if (!url.getProtocol().equalsIgnoreCase("http") && !url.getProtocol().equalsIgnoreCase("https")) {
throw new IOException("Not a http connection");
}
HttpURLConnection con = (HttpURLConnection) url.openConnection();
if (startLen > 0) {
con.setRequestProperty("Range", "bytes=" + startLen + "-");
}
con.connect();
input = con.getInputStream();

if (uncompress) {
if (name.endsWith(".gz") || name.endsWith(".tgz")) {
input = new GZIPInputStream(input);
} else if (name.endsWith("bz2") || name.endsWith("bz")) {
input = new BZip2CompressorInputStream(input, true);
} else if (name.endsWith("xz")) {
input = new XZCompressorInputStream(input, true);
}
}
return new HTTPData(input, con);
}

public static long getContentLengthLong(String filename) throws IOException {
if (!isRemoteURL(filename)) {
return Files.size(Path.of(filename));
}
if (filename.equals("-")) {
return 0;
}
URL url;
try {
url = new URI(filename).toURL();
} catch (URISyntaxException e) {
throw new IOException("Invalid URI", e);
}
if (!url.getProtocol().equalsIgnoreCase("https") && !url.getProtocol().equalsIgnoreCase("http")) {
return 0;
}
HttpURLConnection con = (HttpURLConnection) url.openConnection();
con.setRequestMethod("HEAD");
con.connect();
return con.getContentLengthLong();
}

public static BufferedReader getFileReader(String fileName) throws IOException {
return new BufferedReader(new InputStreamReader(getFileInputStream(fileName)));
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,17 +1,22 @@
package com.the_qa_company.qendpoint.core.util.io;

import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.Closeable;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Arrays;

import org.junit.Assert;
import org.junit.Before;
import org.junit.Ignore;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
Expand Down Expand Up @@ -66,16 +71,16 @@ public void testWriteInt() {

ByteArrayInputStream bin = new ByteArrayInputStream(bout.toByteArray());

long a = IOUtil.readInt(bin);
int a = IOUtil.readInt(bin);
assertEquals(a, 3);

long b = IOUtil.readInt(bin);
int b = IOUtil.readInt(bin);
assertEquals(b, 4);

long c = IOUtil.readInt(bin);
int c = IOUtil.readInt(bin);
assertEquals(c, 0xFF0000AA);

long d = IOUtil.readInt(bin);
int d = IOUtil.readInt(bin);
assertEquals(d, 0xAABBCCDD);

} catch (IOException e) {
Expand Down Expand Up @@ -137,4 +142,48 @@ public void closeablePathTest() throws IOException {
Assert.assertFalse(Files.exists(p2));

}

@Ignore("Hand test")
@Test
public void urlTest() throws IOException {
final String url = "https://dumps.wikimedia.org/wikidatawiki/entities/latest-all.ttl.bz2";

long len = IOUtil.getContentLengthLong(url);

assertTrue("bad len: " + len, len > 0);

System.out.println(len);

byte[] read;
final int toRead = 0x1000;
try (InputStream is = IOUtil.getFileInputStream(url, false)) {
read = is.readNBytes(toRead);
}

assertEquals(toRead, read.length);

byte[] read2;
try (InputStream is = IOUtil.getFileInputStream(url, false)) {
read2 = is.readNBytes(toRead);
}

assertArrayEquals(read, read2);

byte[] read3;
int midRead = 0x500;
try (InputStream is = IOUtil.getFileInputStream(url, false)) {
is.skipNBytes(midRead);
read3 = is.readNBytes(midRead);
}
byte[] read4;
try (InputStream is = IOUtil.getFileInputStream(url, false, midRead)) {
read4 = is.readNBytes(midRead);
}

assertArrayEquals(read3, read4);

byte[] read5 = Arrays.copyOfRange(read, midRead, midRead + midRead);

assertArrayEquals(read3, read5);
}
}

0 comments on commit b7914f6

Please sign in to comment.