Skip to content

Commit

Permalink
Test index retention through GC events.
Browse files Browse the repository at this point in the history
  • Loading branch information
rcaudy authored Jan 7, 2025
2 parents dcc13e3 + 0e9c778 commit 189b24f
Show file tree
Hide file tree
Showing 2 changed files with 136 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -964,6 +964,43 @@ public void testStringContainsFilter() {
}
}

public void testIndexRetentionThroughGC() {
final Table childTable;

// We don't need this liveness scope for liveness management, but rather to opt out of the enclosing scope's
// enforceStrongReachability
try (final SafeCloseable ignored = LivenessScopeStack.open()) {
final Map<String, Object> retained = new HashMap<>();
final Random random = new Random(0);
final int size = 500;
final QueryTable parentTable = getTable(false, size, random,
initColumnInfos(new String[] {"S1", "S2"},
new SetGenerator<>("aa", "bb", "cc", "dd", "AA", "BB", "CC", "DD"),
new SetGenerator<>("aaa", "bbb", "ccc", "ddd", "AAA", "BBB", "CCC", "DDD")));

// Explicitly retain the index references.
retained.put("di1", DataIndexer.getOrCreateDataIndex(parentTable, "S1"));
retained.put("di2", DataIndexer.getOrCreateDataIndex(parentTable, "S2"));
childTable = parentTable.update("isEven = ii % 2 == 0");

// While retained, the indexes will survive GC
System.gc();

// While the references are held, the parent and child tables should have the indexes.
Assert.assertTrue(DataIndexer.hasDataIndex(parentTable, "S1"));
Assert.assertTrue(DataIndexer.hasDataIndex(parentTable, "S2"));
Assert.assertTrue(DataIndexer.hasDataIndex(childTable, "S1"));
Assert.assertTrue(DataIndexer.hasDataIndex(childTable, "S2"));

// Explicitly release the references.
retained.clear();
}
// After a GC, the child table should not have the indexes.
System.gc();
Assert.assertFalse(DataIndexer.hasDataIndex(childTable, "S1"));
Assert.assertFalse(DataIndexer.hasDataIndex(childTable, "S2"));
}

public void testStringMatchFilterIndexed() {
// MatchFilters (currently) only use indexes on initial creation but this incremental test will recreate
// index-enabled match filtered tables and compare them against incremental non-indexed filtered tables.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import io.deephaven.base.FileUtils;
import io.deephaven.base.verify.Assert;
import io.deephaven.engine.context.ExecutionContext;
import io.deephaven.engine.liveness.LivenessScopeStack;
import io.deephaven.engine.primitive.function.ByteConsumer;
import io.deephaven.engine.primitive.function.CharConsumer;
import io.deephaven.engine.primitive.function.FloatConsumer;
Expand Down Expand Up @@ -58,6 +59,7 @@
import io.deephaven.test.types.OutOfBandTest;
import io.deephaven.time.DateTimeUtils;
import io.deephaven.util.QueryConstants;
import io.deephaven.util.SafeCloseable;
import io.deephaven.util.codec.SimpleByteArrayCodec;
import io.deephaven.util.compare.DoubleComparisons;
import io.deephaven.util.compare.FloatComparisons;
Expand Down Expand Up @@ -88,6 +90,7 @@
import java.math.BigInteger;
import java.net.URI;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
import java.time.Instant;
import java.time.LocalDate;
Expand Down Expand Up @@ -337,6 +340,102 @@ public void vectorParquetFormat() {
groupedTable("largeAggParquet", LARGE_TABLE_SIZE, false);
}

@Test
public void indexRetentionThroughGC() {
final String destPath = Path.of(rootFile.getPath(), "ParquetTest_indexRetention_test").toString();
final int tableSize = 10_000;
final Table testTable = TableTools.emptyTable(tableSize).update(
"symbol = randomInt(0,4)",
"price = randomInt(0,10000) * 0.01",
"str_id = `str_` + String.format(`%08d`, randomInt(0,1_000_000))",
"indexed_val = ii % 10_000");
final ParquetInstructions writeInstructions = ParquetInstructions.builder()
.setGenerateMetadataFiles(true)
.addIndexColumns("indexed_val")
.build();
final PartitionedTable partitionedTable = testTable.partitionBy("symbol");
ParquetTools.writeKeyValuePartitionedTable(partitionedTable, destPath, writeInstructions);
final Table child;

// We don't need this liveness scope for liveness management, but rather to opt out of the enclosing scope's
// enforceStrongReachability
try (final SafeCloseable ignored = LivenessScopeStack.open()) {
// Read from disk and validate the indexes through GC.
Table parent = ParquetTools.readTable(destPath);
child = parent.update("new_val = indexed_val + 1")
.update("new_val = new_val + 1")
.update("new_val = new_val + 1")
.update("new_val = new_val + 1");

// These indexes will survive GC because the parent table is holding strong references.
System.gc();

// The parent table should have the indexes.
Assert.eqTrue(DataIndexer.hasDataIndex(parent, "symbol"), "hasDataIndex -> symbol");
Assert.eqTrue(DataIndexer.hasDataIndex(parent, "indexed_val"), "hasDataIndex -> indexed_val");

// The child table should have the indexes while the parent is retained.
Assert.eqTrue(DataIndexer.hasDataIndex(child, "symbol"), "hasDataIndex -> symbol");
Assert.eqTrue(DataIndexer.hasDataIndex(child, "indexed_val"), "hasDataIndex -> indexed_val");

// Force the parent to null to allow GC to collect it.
parent = null;
}

// After a GC, the child table should still have access to the indexes.
System.gc();
Assert.eqTrue(DataIndexer.hasDataIndex(child, "symbol"), "hasDataIndex -> symbol");
Assert.eqTrue(DataIndexer.hasDataIndex(child, "indexed_val"), "hasDataIndex -> indexed_val");
}

@Test
public void remappedIndexRetentionThroughGC() {
final String destPath =
Path.of(rootFile.getPath(), "ParquetTest_remappedIndexRetention_test.parquet").toString();
final int tableSize = 10_000;
final Table testTable = TableTools.emptyTable(tableSize).update(
"symbol = randomInt(0,4)",
"price = randomInt(0,10000) * 0.01",
"str_id = `str_` + String.format(`%08d`, randomInt(0,1_000_000))",
"indexed_val = ii % 10_000");
final ParquetInstructions writeInstructions = ParquetInstructions.builder()
.setGenerateMetadataFiles(true)
.addIndexColumns("symbol")
.addIndexColumns("indexed_val")
.build();
ParquetTools.writeTable(testTable, destPath, writeInstructions);
final Table child;

// We don't need this liveness scope for liveness management, but rather to opt out of the enclosing scope's
// enforceStrongReachability
try (final SafeCloseable ignored = LivenessScopeStack.open()) {
// Read from disk and validate the indexes through GC.
Table parent = ParquetTools.readTable(destPath);

// select() produces in-memory column sources, triggering the remapping of the indexes.
child = parent.select();

// These indexes will survive GC because the parent table is holding strong references.
System.gc();

// The parent table should have the indexes.
Assert.eqTrue(DataIndexer.hasDataIndex(parent, "symbol"), "hasDataIndex -> symbol");
Assert.eqTrue(DataIndexer.hasDataIndex(parent, "indexed_val"), "hasDataIndex -> indexed_val");

// The child table should have the indexes while the parent is retained.
Assert.eqTrue(DataIndexer.hasDataIndex(child, "symbol"), "hasDataIndex -> symbol");
Assert.eqTrue(DataIndexer.hasDataIndex(child, "indexed_val"), "hasDataIndex -> indexed_val");

// Force the parent to null to allow GC to collect it.
parent = null;
}

// After a GC, the child table should still have access to the indexes.
System.gc();
Assert.eqTrue(DataIndexer.hasDataIndex(child, "symbol"), "hasDataIndex -> symbol");
Assert.eqTrue(DataIndexer.hasDataIndex(child, "indexed_val"), "hasDataIndex -> indexed_val");
}

@Test
public void indexByLongKey() {
final TableDefinition definition = TableDefinition.of(
Expand Down

0 comments on commit 189b24f

Please sign in to comment.