diff --git a/engine/table/src/test/java/io/deephaven/engine/table/impl/QueryTableTest.java b/engine/table/src/test/java/io/deephaven/engine/table/impl/QueryTableTest.java index 5aa0832ebd1..be2b9f54678 100644 --- a/engine/table/src/test/java/io/deephaven/engine/table/impl/QueryTableTest.java +++ b/engine/table/src/test/java/io/deephaven/engine/table/impl/QueryTableTest.java @@ -964,6 +964,43 @@ public void testStringContainsFilter() { } } + public void testIndexRetentionThroughGC() { + final Table childTable; + + // We don't need this liveness scope for liveness management, but rather to opt out of the enclosing scope's + // enforceStrongReachability + try (final SafeCloseable ignored = LivenessScopeStack.open()) { + final Map retained = new HashMap<>(); + final Random random = new Random(0); + final int size = 500; + final QueryTable parentTable = getTable(false, size, random, + initColumnInfos(new String[] {"S1", "S2"}, + new SetGenerator<>("aa", "bb", "cc", "dd", "AA", "BB", "CC", "DD"), + new SetGenerator<>("aaa", "bbb", "ccc", "ddd", "AAA", "BBB", "CCC", "DDD"))); + + // Explicitly retain the index references. + retained.put("di1", DataIndexer.getOrCreateDataIndex(parentTable, "S1")); + retained.put("di2", DataIndexer.getOrCreateDataIndex(parentTable, "S2")); + childTable = parentTable.update("isEven = ii % 2 == 0"); + + // While retained, the indexes will survive GC + System.gc(); + + // While the references are held, the parent and child tables should have the indexes. + Assert.assertTrue(DataIndexer.hasDataIndex(parentTable, "S1")); + Assert.assertTrue(DataIndexer.hasDataIndex(parentTable, "S2")); + Assert.assertTrue(DataIndexer.hasDataIndex(childTable, "S1")); + Assert.assertTrue(DataIndexer.hasDataIndex(childTable, "S2")); + + // Explicitly release the references. + retained.clear(); + } + // After a GC, the child table should not have the indexes. + System.gc(); + Assert.assertFalse(DataIndexer.hasDataIndex(childTable, "S1")); + Assert.assertFalse(DataIndexer.hasDataIndex(childTable, "S2")); + } + public void testStringMatchFilterIndexed() { // MatchFilters (currently) only use indexes on initial creation but this incremental test will recreate // index-enabled match filtered tables and compare them against incremental non-indexed filtered tables. diff --git a/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java b/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java index e7faf5be88c..530be1c5d6b 100644 --- a/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java +++ b/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java @@ -10,6 +10,7 @@ import io.deephaven.base.FileUtils; import io.deephaven.base.verify.Assert; import io.deephaven.engine.context.ExecutionContext; +import io.deephaven.engine.liveness.LivenessScopeStack; import io.deephaven.engine.primitive.function.ByteConsumer; import io.deephaven.engine.primitive.function.CharConsumer; import io.deephaven.engine.primitive.function.FloatConsumer; @@ -58,6 +59,7 @@ import io.deephaven.test.types.OutOfBandTest; import io.deephaven.time.DateTimeUtils; import io.deephaven.util.QueryConstants; +import io.deephaven.util.SafeCloseable; import io.deephaven.util.codec.SimpleByteArrayCodec; import io.deephaven.util.compare.DoubleComparisons; import io.deephaven.util.compare.FloatComparisons; @@ -88,6 +90,7 @@ import java.math.BigInteger; import java.net.URI; import java.nio.file.Files; +import java.nio.file.Path; import java.nio.file.StandardCopyOption; import java.time.Instant; import java.time.LocalDate; @@ -337,6 +340,102 @@ public void vectorParquetFormat() { groupedTable("largeAggParquet", LARGE_TABLE_SIZE, false); } + @Test + public void indexRetentionThroughGC() { + final String destPath = Path.of(rootFile.getPath(), "ParquetTest_indexRetention_test").toString(); + final int tableSize = 10_000; + final Table testTable = TableTools.emptyTable(tableSize).update( + "symbol = randomInt(0,4)", + "price = randomInt(0,10000) * 0.01", + "str_id = `str_` + String.format(`%08d`, randomInt(0,1_000_000))", + "indexed_val = ii % 10_000"); + final ParquetInstructions writeInstructions = ParquetInstructions.builder() + .setGenerateMetadataFiles(true) + .addIndexColumns("indexed_val") + .build(); + final PartitionedTable partitionedTable = testTable.partitionBy("symbol"); + ParquetTools.writeKeyValuePartitionedTable(partitionedTable, destPath, writeInstructions); + final Table child; + + // We don't need this liveness scope for liveness management, but rather to opt out of the enclosing scope's + // enforceStrongReachability + try (final SafeCloseable ignored = LivenessScopeStack.open()) { + // Read from disk and validate the indexes through GC. + Table parent = ParquetTools.readTable(destPath); + child = parent.update("new_val = indexed_val + 1") + .update("new_val = new_val + 1") + .update("new_val = new_val + 1") + .update("new_val = new_val + 1"); + + // These indexes will survive GC because the parent table is holding strong references. + System.gc(); + + // The parent table should have the indexes. + Assert.eqTrue(DataIndexer.hasDataIndex(parent, "symbol"), "hasDataIndex -> symbol"); + Assert.eqTrue(DataIndexer.hasDataIndex(parent, "indexed_val"), "hasDataIndex -> indexed_val"); + + // The child table should have the indexes while the parent is retained. + Assert.eqTrue(DataIndexer.hasDataIndex(child, "symbol"), "hasDataIndex -> symbol"); + Assert.eqTrue(DataIndexer.hasDataIndex(child, "indexed_val"), "hasDataIndex -> indexed_val"); + + // Force the parent to null to allow GC to collect it. + parent = null; + } + + // After a GC, the child table should still have access to the indexes. + System.gc(); + Assert.eqTrue(DataIndexer.hasDataIndex(child, "symbol"), "hasDataIndex -> symbol"); + Assert.eqTrue(DataIndexer.hasDataIndex(child, "indexed_val"), "hasDataIndex -> indexed_val"); + } + + @Test + public void remappedIndexRetentionThroughGC() { + final String destPath = + Path.of(rootFile.getPath(), "ParquetTest_remappedIndexRetention_test.parquet").toString(); + final int tableSize = 10_000; + final Table testTable = TableTools.emptyTable(tableSize).update( + "symbol = randomInt(0,4)", + "price = randomInt(0,10000) * 0.01", + "str_id = `str_` + String.format(`%08d`, randomInt(0,1_000_000))", + "indexed_val = ii % 10_000"); + final ParquetInstructions writeInstructions = ParquetInstructions.builder() + .setGenerateMetadataFiles(true) + .addIndexColumns("symbol") + .addIndexColumns("indexed_val") + .build(); + ParquetTools.writeTable(testTable, destPath, writeInstructions); + final Table child; + + // We don't need this liveness scope for liveness management, but rather to opt out of the enclosing scope's + // enforceStrongReachability + try (final SafeCloseable ignored = LivenessScopeStack.open()) { + // Read from disk and validate the indexes through GC. + Table parent = ParquetTools.readTable(destPath); + + // select() produces in-memory column sources, triggering the remapping of the indexes. + child = parent.select(); + + // These indexes will survive GC because the parent table is holding strong references. + System.gc(); + + // The parent table should have the indexes. + Assert.eqTrue(DataIndexer.hasDataIndex(parent, "symbol"), "hasDataIndex -> symbol"); + Assert.eqTrue(DataIndexer.hasDataIndex(parent, "indexed_val"), "hasDataIndex -> indexed_val"); + + // The child table should have the indexes while the parent is retained. + Assert.eqTrue(DataIndexer.hasDataIndex(child, "symbol"), "hasDataIndex -> symbol"); + Assert.eqTrue(DataIndexer.hasDataIndex(child, "indexed_val"), "hasDataIndex -> indexed_val"); + + // Force the parent to null to allow GC to collect it. + parent = null; + } + + // After a GC, the child table should still have access to the indexes. + System.gc(); + Assert.eqTrue(DataIndexer.hasDataIndex(child, "symbol"), "hasDataIndex -> symbol"); + Assert.eqTrue(DataIndexer.hasDataIndex(child, "indexed_val"), "hasDataIndex -> indexed_val"); + } + @Test public void indexByLongKey() { final TableDefinition definition = TableDefinition.of(