From c74e57000221f0c79aba4833599da13402b04b65 Mon Sep 17 00:00:00 2001 From: Shivam Malhotra Date: Thu, 15 Aug 2024 16:04:55 -0500 Subject: [PATCH] feat: Added support to upcast numeric types when reading parquet (#5923) --- ...er.java => BooleanAsByteMaterializer.java} | 10 +- .../DoubleFromFloatMaterializer.java | 45 +++ .../materializers/DoubleMaterializer.java | 28 +- .../materializers/DoubleMaterializerBase.java | 39 +++ .../IntFromBooleanMaterializer.java | 45 +++ .../IntFromUnsignedByteMaterializer.java | 45 +++ .../IntFromUnsignedShortMaterializer.java | 45 +++ .../base/materializers/IntMaterializer.java | 28 +- .../materializers/IntMaterializerBase.java | 39 +++ .../LongFromBooleanMaterializer.java | 45 +++ .../LongFromIntMaterializer.java | 45 +++ .../LongFromUnsignedByteMaterializer.java | 45 +++ .../LongFromUnsignedIntMaterializer.java | 6 +- .../LongFromUnsignedShortMaterializer.java | 41 +++ .../ShortFromBooleanMaterializer.java | 45 +++ .../ShortFromUnsignedByteMaterializer.java | 41 +++ .../base/materializers/ShortMaterializer.java | 28 +- .../materializers/ShortMaterializerBase.java | 39 +++ .../table/location/ParquetColumnLocation.java | 108 +++++- .../pagestore/topage/ToBooleanAsBytePage.java | 4 +- .../table/pagestore/topage/ToBytePage.java | 35 +- .../table/pagestore/topage/ToCharPage.java | 4 - .../table/pagestore/topage/ToDoublePage.java | 35 +- .../table/pagestore/topage/ToFloatPage.java | 2 +- .../table/pagestore/topage/ToIntPage.java | 49 ++- .../table/pagestore/topage/ToLongPage.java | 36 ++ .../table/pagestore/topage/ToShortPage.java | 44 ++- .../table/ParquetTableReadWriteTest.java | 311 ++++++++++++++++++ py/server/tests/test_parquet.py | 28 +- .../ReplicatePageMaterializers.java | 133 +++++++- .../replicators/ReplicateToPage.java | 8 +- 31 files changed, 1297 insertions(+), 159 deletions(-) rename extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/{BoolMaterializer.java => BooleanAsByteMaterializer.java} (78%) create mode 100644 extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/DoubleFromFloatMaterializer.java create mode 100644 extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/DoubleMaterializerBase.java create mode 100644 extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/IntFromBooleanMaterializer.java create mode 100644 extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/IntFromUnsignedByteMaterializer.java create mode 100644 extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/IntFromUnsignedShortMaterializer.java create mode 100644 extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/IntMaterializerBase.java create mode 100644 extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongFromBooleanMaterializer.java create mode 100644 extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongFromIntMaterializer.java create mode 100644 extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongFromUnsignedByteMaterializer.java create mode 100644 extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongFromUnsignedShortMaterializer.java create mode 100644 extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/ShortFromBooleanMaterializer.java create mode 100644 extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/ShortFromUnsignedByteMaterializer.java create mode 100644 extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/ShortMaterializerBase.java diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/BoolMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/BooleanAsByteMaterializer.java similarity index 78% rename from extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/BoolMaterializer.java rename to extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/BooleanAsByteMaterializer.java index 6e5b7f56994..e3475a5a6d8 100644 --- a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/BoolMaterializer.java +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/BooleanAsByteMaterializer.java @@ -9,17 +9,17 @@ import java.util.Arrays; -public class BoolMaterializer implements PageMaterializer { +public class BooleanAsByteMaterializer implements PageMaterializer { public static final PageMaterializerFactory FACTORY = new PageMaterializerFactory() { @Override public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { - return new BoolMaterializer(dataReader, (byte) nullValue, numValues); + return new BooleanAsByteMaterializer(dataReader, (byte) nullValue, numValues); } @Override public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int numValues) { - return new BoolMaterializer(dataReader, numValues); + return new BooleanAsByteMaterializer(dataReader, numValues); } }; @@ -28,11 +28,11 @@ public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int num private final byte nullValue; private final byte[] data; - private BoolMaterializer(ValuesReader dataReader, int numValues) { + private BooleanAsByteMaterializer(ValuesReader dataReader, int numValues) { this(dataReader, (byte) 0, numValues); } - private BoolMaterializer(ValuesReader dataReader, byte nullValue, int numValues) { + private BooleanAsByteMaterializer(ValuesReader dataReader, byte nullValue, int numValues) { this.dataReader = dataReader; this.nullValue = nullValue; this.data = new byte[numValues]; diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/DoubleFromFloatMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/DoubleFromFloatMaterializer.java new file mode 100644 index 00000000000..4d97c220c7a --- /dev/null +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/DoubleFromFloatMaterializer.java @@ -0,0 +1,45 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +// ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY +// ****** Edit DoubleMaterializer and run "./gradlew replicatePageMaterializers" to regenerate +// +// @formatter:off +package io.deephaven.parquet.base.materializers; + +import io.deephaven.parquet.base.PageMaterializer; +import io.deephaven.parquet.base.PageMaterializerFactory; +import org.apache.parquet.column.values.ValuesReader; + +public class DoubleFromFloatMaterializer extends DoubleMaterializerBase implements PageMaterializer { + + public static final PageMaterializerFactory FACTORY = new PageMaterializerFactory() { + @Override + public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { + return new DoubleFromFloatMaterializer(dataReader, (double) nullValue, numValues); + } + + @Override + public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int numValues) { + return new DoubleFromFloatMaterializer(dataReader, numValues); + } + }; + + private final ValuesReader dataReader; + + private DoubleFromFloatMaterializer(ValuesReader dataReader, int numValues) { + this(dataReader, 0, numValues); + } + + private DoubleFromFloatMaterializer(ValuesReader dataReader, double nullValue, int numValues) { + super(nullValue, numValues); + this.dataReader = dataReader; + } + + @Override + public void fillValues(int startIndex, int endIndex) { + for (int ii = startIndex; ii < endIndex; ii++) { + data[ii] = dataReader.readFloat(); + } + } +} diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/DoubleMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/DoubleMaterializer.java index f5143679f1f..c8345eff522 100644 --- a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/DoubleMaterializer.java +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/DoubleMaterializer.java @@ -2,7 +2,7 @@ // Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending // // ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY -// ****** Edit FloatMaterializer and run "./gradlew replicatePageMaterializers" to regenerate +// ****** Edit LongMaterializer and run "./gradlew replicatePageMaterializers" to regenerate // // @formatter:off package io.deephaven.parquet.base.materializers; @@ -11,9 +11,7 @@ import io.deephaven.parquet.base.PageMaterializerFactory; import org.apache.parquet.column.values.ValuesReader; -import java.util.Arrays; - -public class DoubleMaterializer implements PageMaterializer { +public class DoubleMaterializer extends DoubleMaterializerBase implements PageMaterializer { public static final PageMaterializerFactory FACTORY = new PageMaterializerFactory() { @Override @@ -29,22 +27,13 @@ public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int num private final ValuesReader dataReader; - private final double nullValue; - private final double[] data; - private DoubleMaterializer(ValuesReader dataReader, int numValues) { this(dataReader, 0, numValues); } private DoubleMaterializer(ValuesReader dataReader, double nullValue, int numValues) { + super(nullValue, numValues); this.dataReader = dataReader; - this.nullValue = nullValue; - this.data = new double[numValues]; - } - - @Override - public void fillNulls(int startIndex, int endIndex) { - Arrays.fill(data, startIndex, endIndex, nullValue); } @Override @@ -53,15 +42,4 @@ public void fillValues(int startIndex, int endIndex) { data[ii] = dataReader.readDouble(); } } - - @Override - public Object fillAll() { - fillValues(0, data.length); - return data; - } - - @Override - public Object data() { - return data; - } } diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/DoubleMaterializerBase.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/DoubleMaterializerBase.java new file mode 100644 index 00000000000..22e1c430f33 --- /dev/null +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/DoubleMaterializerBase.java @@ -0,0 +1,39 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +// ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY +// ****** Edit LongMaterializerBase and run "./gradlew replicatePageMaterializers" to regenerate +// +// @formatter:off +package io.deephaven.parquet.base.materializers; + +import io.deephaven.parquet.base.PageMaterializer; + +import java.util.Arrays; + +abstract class DoubleMaterializerBase implements PageMaterializer { + + private final double nullValue; + final double[] data; + + DoubleMaterializerBase(double nullValue, int numValues) { + this.nullValue = nullValue; + this.data = new double[numValues]; + } + + @Override + public void fillNulls(int startIndex, int endIndex) { + Arrays.fill(data, startIndex, endIndex, nullValue); + } + + @Override + public Object fillAll() { + fillValues(0, data.length); + return data; + } + + @Override + public Object data() { + return data; + } +} diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/IntFromBooleanMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/IntFromBooleanMaterializer.java new file mode 100644 index 00000000000..6255c5d6bde --- /dev/null +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/IntFromBooleanMaterializer.java @@ -0,0 +1,45 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +// ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY +// ****** Edit IntMaterializer and run "./gradlew replicatePageMaterializers" to regenerate +// +// @formatter:off +package io.deephaven.parquet.base.materializers; + +import io.deephaven.parquet.base.PageMaterializer; +import io.deephaven.parquet.base.PageMaterializerFactory; +import org.apache.parquet.column.values.ValuesReader; + +public class IntFromBooleanMaterializer extends IntMaterializerBase implements PageMaterializer { + + public static final PageMaterializerFactory FACTORY = new PageMaterializerFactory() { + @Override + public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { + return new IntFromBooleanMaterializer(dataReader, (int) nullValue, numValues); + } + + @Override + public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int numValues) { + return new IntFromBooleanMaterializer(dataReader, numValues); + } + }; + + private final ValuesReader dataReader; + + private IntFromBooleanMaterializer(ValuesReader dataReader, int numValues) { + this(dataReader, 0, numValues); + } + + private IntFromBooleanMaterializer(ValuesReader dataReader, int nullValue, int numValues) { + super(nullValue, numValues); + this.dataReader = dataReader; + } + + @Override + public void fillValues(int startIndex, int endIndex) { + for (int ii = startIndex; ii < endIndex; ii++) { + data[ii] = dataReader.readBoolean() ? 1 : 0; + } + } +} diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/IntFromUnsignedByteMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/IntFromUnsignedByteMaterializer.java new file mode 100644 index 00000000000..30b0ae5a86a --- /dev/null +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/IntFromUnsignedByteMaterializer.java @@ -0,0 +1,45 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +// ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY +// ****** Edit LongFromUnsignedShortMaterializer and run "./gradlew replicatePageMaterializers" to regenerate +// +// @formatter:off +package io.deephaven.parquet.base.materializers; + +import io.deephaven.parquet.base.PageMaterializer; +import io.deephaven.parquet.base.PageMaterializerFactory; +import org.apache.parquet.column.values.ValuesReader; + +public class IntFromUnsignedByteMaterializer extends IntMaterializerBase implements PageMaterializer { + + public static final PageMaterializerFactory FACTORY = new PageMaterializerFactory() { + @Override + public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { + return new IntFromUnsignedByteMaterializer(dataReader, (int) nullValue, numValues); + } + + @Override + public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int numValues) { + return new IntFromUnsignedByteMaterializer(dataReader, numValues); + } + }; + + private final ValuesReader dataReader; + + private IntFromUnsignedByteMaterializer(ValuesReader dataReader, int numValues) { + this(dataReader, 0, numValues); + } + + private IntFromUnsignedByteMaterializer(ValuesReader dataReader, int nullValue, int numValues) { + super(nullValue, numValues); + this.dataReader = dataReader; + } + + @Override + public void fillValues(int startIndex, int endIndex) { + for (int ii = startIndex; ii < endIndex; ii++) { + data[ii] = Byte.toUnsignedInt((byte) dataReader.readInteger()); + } + } +} diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/IntFromUnsignedShortMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/IntFromUnsignedShortMaterializer.java new file mode 100644 index 00000000000..369a83d56db --- /dev/null +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/IntFromUnsignedShortMaterializer.java @@ -0,0 +1,45 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +// ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY +// ****** Edit LongFromUnsignedShortMaterializer and run "./gradlew replicatePageMaterializers" to regenerate +// +// @formatter:off +package io.deephaven.parquet.base.materializers; + +import io.deephaven.parquet.base.PageMaterializer; +import io.deephaven.parquet.base.PageMaterializerFactory; +import org.apache.parquet.column.values.ValuesReader; + +public class IntFromUnsignedShortMaterializer extends IntMaterializerBase implements PageMaterializer { + + public static final PageMaterializerFactory FACTORY = new PageMaterializerFactory() { + @Override + public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { + return new IntFromUnsignedShortMaterializer(dataReader, (int) nullValue, numValues); + } + + @Override + public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int numValues) { + return new IntFromUnsignedShortMaterializer(dataReader, numValues); + } + }; + + private final ValuesReader dataReader; + + private IntFromUnsignedShortMaterializer(ValuesReader dataReader, int numValues) { + this(dataReader, 0, numValues); + } + + private IntFromUnsignedShortMaterializer(ValuesReader dataReader, int nullValue, int numValues) { + super(nullValue, numValues); + this.dataReader = dataReader; + } + + @Override + public void fillValues(int startIndex, int endIndex) { + for (int ii = startIndex; ii < endIndex; ii++) { + data[ii] = Short.toUnsignedInt((short) dataReader.readInteger()); + } + } +} diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/IntMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/IntMaterializer.java index 97f47e23ebe..9b6f423c08a 100644 --- a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/IntMaterializer.java +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/IntMaterializer.java @@ -2,7 +2,7 @@ // Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending // // ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY -// ****** Edit FloatMaterializer and run "./gradlew replicatePageMaterializers" to regenerate +// ****** Edit LongMaterializer and run "./gradlew replicatePageMaterializers" to regenerate // // @formatter:off package io.deephaven.parquet.base.materializers; @@ -11,9 +11,7 @@ import io.deephaven.parquet.base.PageMaterializerFactory; import org.apache.parquet.column.values.ValuesReader; -import java.util.Arrays; - -public class IntMaterializer implements PageMaterializer { +public class IntMaterializer extends IntMaterializerBase implements PageMaterializer { public static final PageMaterializerFactory FACTORY = new PageMaterializerFactory() { @Override @@ -29,22 +27,13 @@ public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int num private final ValuesReader dataReader; - private final int nullValue; - private final int[] data; - private IntMaterializer(ValuesReader dataReader, int numValues) { this(dataReader, 0, numValues); } private IntMaterializer(ValuesReader dataReader, int nullValue, int numValues) { + super(nullValue, numValues); this.dataReader = dataReader; - this.nullValue = nullValue; - this.data = new int[numValues]; - } - - @Override - public void fillNulls(int startIndex, int endIndex) { - Arrays.fill(data, startIndex, endIndex, nullValue); } @Override @@ -53,15 +42,4 @@ public void fillValues(int startIndex, int endIndex) { data[ii] = dataReader.readInteger(); } } - - @Override - public Object fillAll() { - fillValues(0, data.length); - return data; - } - - @Override - public Object data() { - return data; - } } diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/IntMaterializerBase.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/IntMaterializerBase.java new file mode 100644 index 00000000000..a307e0c36d7 --- /dev/null +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/IntMaterializerBase.java @@ -0,0 +1,39 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +// ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY +// ****** Edit LongMaterializerBase and run "./gradlew replicatePageMaterializers" to regenerate +// +// @formatter:off +package io.deephaven.parquet.base.materializers; + +import io.deephaven.parquet.base.PageMaterializer; + +import java.util.Arrays; + +abstract class IntMaterializerBase implements PageMaterializer { + + private final int nullValue; + final int[] data; + + IntMaterializerBase(int nullValue, int numValues) { + this.nullValue = nullValue; + this.data = new int[numValues]; + } + + @Override + public void fillNulls(int startIndex, int endIndex) { + Arrays.fill(data, startIndex, endIndex, nullValue); + } + + @Override + public Object fillAll() { + fillValues(0, data.length); + return data; + } + + @Override + public Object data() { + return data; + } +} diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongFromBooleanMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongFromBooleanMaterializer.java new file mode 100644 index 00000000000..b3ff8fea210 --- /dev/null +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongFromBooleanMaterializer.java @@ -0,0 +1,45 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +// ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY +// ****** Edit LongMaterializer and run "./gradlew replicatePageMaterializers" to regenerate +// +// @formatter:off +package io.deephaven.parquet.base.materializers; + +import io.deephaven.parquet.base.PageMaterializer; +import io.deephaven.parquet.base.PageMaterializerFactory; +import org.apache.parquet.column.values.ValuesReader; + +public class LongFromBooleanMaterializer extends LongMaterializerBase implements PageMaterializer { + + public static final PageMaterializerFactory FACTORY = new PageMaterializerFactory() { + @Override + public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { + return new LongFromBooleanMaterializer(dataReader, (long) nullValue, numValues); + } + + @Override + public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int numValues) { + return new LongFromBooleanMaterializer(dataReader, numValues); + } + }; + + private final ValuesReader dataReader; + + private LongFromBooleanMaterializer(ValuesReader dataReader, int numValues) { + this(dataReader, 0, numValues); + } + + private LongFromBooleanMaterializer(ValuesReader dataReader, long nullValue, int numValues) { + super(nullValue, numValues); + this.dataReader = dataReader; + } + + @Override + public void fillValues(int startIndex, int endIndex) { + for (int ii = startIndex; ii < endIndex; ii++) { + data[ii] = dataReader.readBoolean() ? 1 : 0; + } + } +} diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongFromIntMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongFromIntMaterializer.java new file mode 100644 index 00000000000..a2359344597 --- /dev/null +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongFromIntMaterializer.java @@ -0,0 +1,45 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +// ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY +// ****** Edit LongMaterializer and run "./gradlew replicatePageMaterializers" to regenerate +// +// @formatter:off +package io.deephaven.parquet.base.materializers; + +import io.deephaven.parquet.base.PageMaterializer; +import io.deephaven.parquet.base.PageMaterializerFactory; +import org.apache.parquet.column.values.ValuesReader; + +public class LongFromIntMaterializer extends LongMaterializerBase implements PageMaterializer { + + public static final PageMaterializerFactory FACTORY = new PageMaterializerFactory() { + @Override + public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { + return new LongFromIntMaterializer(dataReader, (long) nullValue, numValues); + } + + @Override + public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int numValues) { + return new LongFromIntMaterializer(dataReader, numValues); + } + }; + + private final ValuesReader dataReader; + + private LongFromIntMaterializer(ValuesReader dataReader, int numValues) { + this(dataReader, 0, numValues); + } + + private LongFromIntMaterializer(ValuesReader dataReader, long nullValue, int numValues) { + super(nullValue, numValues); + this.dataReader = dataReader; + } + + @Override + public void fillValues(int startIndex, int endIndex) { + for (int ii = startIndex; ii < endIndex; ii++) { + data[ii] = dataReader.readInteger(); + } + } +} diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongFromUnsignedByteMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongFromUnsignedByteMaterializer.java new file mode 100644 index 00000000000..014d6f2d487 --- /dev/null +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongFromUnsignedByteMaterializer.java @@ -0,0 +1,45 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +// ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY +// ****** Edit LongFromUnsignedShortMaterializer and run "./gradlew replicatePageMaterializers" to regenerate +// +// @formatter:off +package io.deephaven.parquet.base.materializers; + +import io.deephaven.parquet.base.PageMaterializer; +import io.deephaven.parquet.base.PageMaterializerFactory; +import org.apache.parquet.column.values.ValuesReader; + +public class LongFromUnsignedByteMaterializer extends LongMaterializerBase implements PageMaterializer { + + public static final PageMaterializerFactory FACTORY = new PageMaterializerFactory() { + @Override + public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { + return new LongFromUnsignedByteMaterializer(dataReader, (long) nullValue, numValues); + } + + @Override + public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int numValues) { + return new LongFromUnsignedByteMaterializer(dataReader, numValues); + } + }; + + private final ValuesReader dataReader; + + private LongFromUnsignedByteMaterializer(ValuesReader dataReader, int numValues) { + this(dataReader, 0, numValues); + } + + private LongFromUnsignedByteMaterializer(ValuesReader dataReader, long nullValue, int numValues) { + super(nullValue, numValues); + this.dataReader = dataReader; + } + + @Override + public void fillValues(int startIndex, int endIndex) { + for (int ii = startIndex; ii < endIndex; ii++) { + data[ii] = Byte.toUnsignedLong((byte) dataReader.readInteger()); + } + } +} diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongFromUnsignedIntMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongFromUnsignedIntMaterializer.java index 297d7ae6cbb..cd37ad39c6a 100644 --- a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongFromUnsignedIntMaterializer.java +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongFromUnsignedIntMaterializer.java @@ -1,6 +1,10 @@ // // Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending // +// ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY +// ****** Edit LongFromUnsignedShortMaterializer and run "./gradlew replicatePageMaterializers" to regenerate +// +// @formatter:off package io.deephaven.parquet.base.materializers; import io.deephaven.parquet.base.PageMaterializer; @@ -35,7 +39,7 @@ private LongFromUnsignedIntMaterializer(ValuesReader dataReader, long nullValue, @Override public void fillValues(int startIndex, int endIndex) { for (int ii = startIndex; ii < endIndex; ii++) { - data[ii] = Integer.toUnsignedLong(dataReader.readInteger()); + data[ii] = Integer.toUnsignedLong((int) dataReader.readInteger()); } } } diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongFromUnsignedShortMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongFromUnsignedShortMaterializer.java new file mode 100644 index 00000000000..cf6f10049b8 --- /dev/null +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongFromUnsignedShortMaterializer.java @@ -0,0 +1,41 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +package io.deephaven.parquet.base.materializers; + +import io.deephaven.parquet.base.PageMaterializer; +import io.deephaven.parquet.base.PageMaterializerFactory; +import org.apache.parquet.column.values.ValuesReader; + +public class LongFromUnsignedShortMaterializer extends LongMaterializerBase implements PageMaterializer { + + public static final PageMaterializerFactory FACTORY = new PageMaterializerFactory() { + @Override + public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { + return new LongFromUnsignedShortMaterializer(dataReader, (long) nullValue, numValues); + } + + @Override + public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int numValues) { + return new LongFromUnsignedShortMaterializer(dataReader, numValues); + } + }; + + private final ValuesReader dataReader; + + private LongFromUnsignedShortMaterializer(ValuesReader dataReader, int numValues) { + this(dataReader, 0, numValues); + } + + private LongFromUnsignedShortMaterializer(ValuesReader dataReader, long nullValue, int numValues) { + super(nullValue, numValues); + this.dataReader = dataReader; + } + + @Override + public void fillValues(int startIndex, int endIndex) { + for (int ii = startIndex; ii < endIndex; ii++) { + data[ii] = Short.toUnsignedLong((short) dataReader.readInteger()); + } + } +} diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/ShortFromBooleanMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/ShortFromBooleanMaterializer.java new file mode 100644 index 00000000000..df09abb1b8f --- /dev/null +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/ShortFromBooleanMaterializer.java @@ -0,0 +1,45 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +// ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY +// ****** Edit ShortMaterializer and run "./gradlew replicatePageMaterializers" to regenerate +// +// @formatter:off +package io.deephaven.parquet.base.materializers; + +import io.deephaven.parquet.base.PageMaterializer; +import io.deephaven.parquet.base.PageMaterializerFactory; +import org.apache.parquet.column.values.ValuesReader; + +public class ShortFromBooleanMaterializer extends ShortMaterializerBase implements PageMaterializer { + + public static final PageMaterializerFactory FACTORY = new PageMaterializerFactory() { + @Override + public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { + return new ShortFromBooleanMaterializer(dataReader, (short) nullValue, numValues); + } + + @Override + public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int numValues) { + return new ShortFromBooleanMaterializer(dataReader, numValues); + } + }; + + private final ValuesReader dataReader; + + private ShortFromBooleanMaterializer(ValuesReader dataReader, int numValues) { + this(dataReader, (short) 0, numValues); + } + + private ShortFromBooleanMaterializer(ValuesReader dataReader, short nullValue, int numValues) { + super(nullValue, numValues); + this.dataReader = dataReader; + } + + @Override + public void fillValues(int startIndex, int endIndex) { + for (int ii = startIndex; ii < endIndex; ii++) { + data[ii] = (short) (dataReader.readBoolean() ? 1 : 0); + } + } +} diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/ShortFromUnsignedByteMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/ShortFromUnsignedByteMaterializer.java new file mode 100644 index 00000000000..833724a06cc --- /dev/null +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/ShortFromUnsignedByteMaterializer.java @@ -0,0 +1,41 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +package io.deephaven.parquet.base.materializers; + +import io.deephaven.parquet.base.PageMaterializer; +import io.deephaven.parquet.base.PageMaterializerFactory; +import org.apache.parquet.column.values.ValuesReader; + +public class ShortFromUnsignedByteMaterializer extends ShortMaterializerBase implements PageMaterializer { + + public static final PageMaterializerFactory FACTORY = new PageMaterializerFactory() { + @Override + public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { + return new ShortFromUnsignedByteMaterializer(dataReader, (short) nullValue, numValues); + } + + @Override + public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int numValues) { + return new ShortFromUnsignedByteMaterializer(dataReader, numValues); + } + }; + + private final ValuesReader dataReader; + + private ShortFromUnsignedByteMaterializer(ValuesReader dataReader, int numValues) { + this(dataReader, (short) 0, numValues); + } + + private ShortFromUnsignedByteMaterializer(ValuesReader dataReader, short nullValue, int numValues) { + super(nullValue, numValues); + this.dataReader = dataReader; + } + + @Override + public void fillValues(int startIndex, int endIndex) { + for (int ii = startIndex; ii < endIndex; ii++) { + data[ii] = (short) Byte.toUnsignedInt((byte) dataReader.readInteger()); + } + } +} diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/ShortMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/ShortMaterializer.java index b3c5eae6d9a..cfc389081f0 100644 --- a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/ShortMaterializer.java +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/ShortMaterializer.java @@ -2,7 +2,7 @@ // Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending // // ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY -// ****** Edit CharMaterializer and run "./gradlew replicatePageMaterializers" to regenerate +// ****** Edit LongMaterializer and run "./gradlew replicatePageMaterializers" to regenerate // // @formatter:off package io.deephaven.parquet.base.materializers; @@ -11,9 +11,7 @@ import io.deephaven.parquet.base.PageMaterializerFactory; import org.apache.parquet.column.values.ValuesReader; -import java.util.Arrays; - -public class ShortMaterializer implements PageMaterializer { +public class ShortMaterializer extends ShortMaterializerBase implements PageMaterializer { public static final PageMaterializerFactory FACTORY = new PageMaterializerFactory() { @Override @@ -29,22 +27,13 @@ public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int num private final ValuesReader dataReader; - private final short nullValue; - private final short[] data; - private ShortMaterializer(ValuesReader dataReader, int numValues) { this(dataReader, (short) 0, numValues); } private ShortMaterializer(ValuesReader dataReader, short nullValue, int numValues) { + super(nullValue, numValues); this.dataReader = dataReader; - this.nullValue = nullValue; - this.data = new short[numValues]; - } - - @Override - public void fillNulls(int startIndex, int endIndex) { - Arrays.fill(data, startIndex, endIndex, nullValue); } @Override @@ -53,15 +42,4 @@ public void fillValues(int startIndex, int endIndex) { data[ii] = (short) dataReader.readInteger(); } } - - @Override - public Object fillAll() { - fillValues(0, data.length); - return data; - } - - @Override - public Object data() { - return data; - } } diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/ShortMaterializerBase.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/ShortMaterializerBase.java new file mode 100644 index 00000000000..ff9265748e5 --- /dev/null +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/ShortMaterializerBase.java @@ -0,0 +1,39 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +// ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY +// ****** Edit LongMaterializerBase and run "./gradlew replicatePageMaterializers" to regenerate +// +// @formatter:off +package io.deephaven.parquet.base.materializers; + +import io.deephaven.parquet.base.PageMaterializer; + +import java.util.Arrays; + +abstract class ShortMaterializerBase implements PageMaterializer { + + private final short nullValue; + final short[] data; + + ShortMaterializerBase(short nullValue, int numValues) { + this.nullValue = nullValue; + this.data = new short[numValues]; + } + + @Override + public void fillNulls(int startIndex, int endIndex) { + Arrays.fill(data, startIndex, endIndex, nullValue); + } + + @Override + public Object fillAll() { + fillValues(0, data.length); + return data; + } + + @Override + public Object data() { + return data; + } +} diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/location/ParquetColumnLocation.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/location/ParquetColumnLocation.java index 4c6e53c4b7b..4ddebb33685 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/location/ParquetColumnLocation.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/location/ParquetColumnLocation.java @@ -37,6 +37,7 @@ import java.io.IOException; import java.math.BigDecimal; import java.math.BigInteger; +import java.time.Instant; import java.util.Arrays; import java.util.Optional; import java.util.function.Function; @@ -370,22 +371,59 @@ private static ToPage makeToPage( final PrimitiveType.PrimitiveTypeName typeName = type.getPrimitiveTypeName(); switch (typeName) { case BOOLEAN: - toPage = ToBooleanAsBytePage.create(pageType); + if (pageType == Boolean.class) { + toPage = ToBooleanAsBytePage.create(pageType); + } else if (pageType == byte.class) { + toPage = ToBytePage.createFromBoolean(pageType); + } else if (pageType == short.class) { + toPage = ToShortPage.createFromBoolean(pageType); + } else if (pageType == int.class) { + toPage = ToIntPage.createFromBoolean(pageType); + } else if (pageType == long.class) { + toPage = ToLongPage.createFromBoolean(pageType); + } else { + throw new IllegalArgumentException( + "Cannot convert parquet BOOLEAN primitive column to " + pageType); + } break; case INT32: - toPage = ToIntPage.create(pageType); + if (pageType == int.class) { + toPage = ToIntPage.create(pageType); + } else if (pageType == long.class) { + toPage = ToLongPage.createFromInt(pageType); + } else { + throw new IllegalArgumentException("Cannot convert parquet INT32 column to " + pageType); + } break; case INT64: - toPage = ToLongPage.create(pageType); + if (pageType == long.class) { + toPage = ToLongPage.create(pageType); + } else { + throw new IllegalArgumentException("Cannot convert parquet INT64 column to " + pageType); + } break; case INT96: - toPage = ToInstantPage.createFromInt96(pageType); + if (pageType == Instant.class) { + toPage = ToInstantPage.createFromInt96(pageType); + } else { + throw new IllegalArgumentException("Cannot convert parquet INT96 column to " + pageType); + } break; case DOUBLE: - toPage = ToDoublePage.create(pageType); + if (pageType == double.class) { + toPage = ToDoublePage.create(pageType); + } else { + throw new IllegalArgumentException("Cannot convert parquet DOUBLE column to " + pageType); + } break; case FLOAT: - toPage = ToFloatPage.create(pageType); + if (pageType == float.class) { + toPage = ToFloatPage.create(pageType); + } else if (pageType == double.class) { + toPage = ToDoublePage.createFromFloat(pageType); + } else { + throw new IllegalArgumentException("Cannot convert parquet FLOAT column to " + pageType); + } break; case BINARY: case FIXED_LEN_BYTE_ARRAY: @@ -413,9 +451,9 @@ private static ToPage makeToPage( } if (toPage == null) { - throw new TableDataException( + throw new IllegalArgumentException( "Unsupported parquet column type " + type.getPrimitiveTypeName() + - " with logical type " + logicalTypeAnnotation); + " with logical type " + logicalTypeAnnotation + " and page type " + pageType); } if (specialTypeName == ColumnTypeInfo.SpecialType.StringSet) { @@ -433,7 +471,7 @@ private static ToPage makeToPage( // noinspection unchecked return (ToPage) toPage; - } catch (RuntimeException except) { + } catch (final RuntimeException except) { throw new TableDataException( "Unexpected exception accessing column " + parquetColumnName, except); } @@ -494,19 +532,59 @@ private static class LogicalTypeVisitor if (intLogicalType.isSigned()) { switch (intLogicalType.getBitWidth()) { case 8: - return Optional.of(ToBytePage.create(pageType)); + if (pageType == byte.class) { + return Optional.of(ToBytePage.create(pageType)); + } else if (pageType == short.class) { + return Optional.of(ToShortPage.create(pageType)); + } else if (pageType == int.class) { + return Optional.of(ToIntPage.create(pageType)); + } else if (pageType == long.class) { + return Optional.of(ToLongPage.createFromInt(pageType)); + } + throw new IllegalArgumentException("Cannot convert parquet byte column to " + pageType); case 16: - return Optional.of(ToShortPage.create(pageType)); + if (pageType == short.class) { + return Optional.of(ToShortPage.create(pageType)); + } else if (pageType == int.class) { + return Optional.of(ToIntPage.create(pageType)); + } else if (pageType == long.class) { + return Optional.of(ToLongPage.createFromInt(pageType)); + } + throw new IllegalArgumentException("Cannot convert parquet short column to " + pageType); case 32: - return Optional.of(ToIntPage.create(pageType)); + if (pageType == int.class) { + return Optional.of(ToIntPage.create(pageType)); + } else if (pageType == long.class) { + return Optional.of(ToLongPage.createFromInt(pageType)); + } + throw new IllegalArgumentException("Cannot convert parquet int column to " + pageType); case 64: return Optional.of(ToLongPage.create(pageType)); } } else { switch (intLogicalType.getBitWidth()) { case 8: + if (pageType == char.class) { + return Optional.of(ToCharPage.create(pageType)); + } else if (pageType == short.class) { + return Optional.of(ToShortPage.createFromUnsignedByte(pageType)); + } else if (pageType == int.class) { + return Optional.of(ToIntPage.createFromUnsignedByte(pageType)); + } else if (pageType == long.class) { + return Optional.of(ToLongPage.createFromUnsignedByte(pageType)); + } + throw new IllegalArgumentException( + "Cannot convert parquet unsigned byte column to " + pageType); case 16: - return Optional.of(ToCharPage.create(pageType)); + if (pageType == char.class) { + return Optional.of(ToCharPage.create(pageType)); + } else if (pageType == int.class) { + return Optional.of(ToIntPage.createFromUnsignedShort(pageType)); + } else if (pageType == long.class) { + return Optional.of(ToLongPage.createFromUnsignedShort(pageType)); + } + throw new IllegalArgumentException( + "Cannot convert parquet unsigned short column to " + pageType); case 32: return Optional.of(ToLongPage.createFromUnsignedInt(pageType)); } @@ -547,7 +625,7 @@ private static class LogicalTypeVisitor case FIXED_LEN_BYTE_ARRAY: // fall through case BINARY: final int encodedSizeInBytes = typeName == BINARY ? -1 : type.getTypeLength(); - if (BigDecimal.class.equals(pageType)) { + if (pageType == BigDecimal.class) { final int precision = decimalLogicalType.getPrecision(); final int scale = decimalLogicalType.getScale(); try { @@ -560,7 +638,7 @@ private static class LogicalTypeVisitor pageType, new BigDecimalParquetBytesCodec(precision, scale, encodedSizeInBytes), columnChunkReader.getDictionarySupplier())); - } else if (BigInteger.class.equals(pageType)) { + } else if (pageType == BigInteger.class) { return Optional.of(ToBigIntegerPage.create( pageType, new BigIntegerParquetBytesCodec(encodedSizeInBytes), diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToBooleanAsBytePage.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToBooleanAsBytePage.java index 8be732df7a2..529bba27066 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToBooleanAsBytePage.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToBooleanAsBytePage.java @@ -5,7 +5,7 @@ import io.deephaven.chunk.attributes.Any; import io.deephaven.parquet.base.PageMaterializerFactory; -import io.deephaven.parquet.base.materializers.BoolMaterializer; +import io.deephaven.parquet.base.materializers.BooleanAsByteMaterializer; import io.deephaven.vector.ObjectVector; import io.deephaven.vector.ObjectVectorDirect; import io.deephaven.util.BooleanUtils; @@ -56,7 +56,7 @@ public final Object nullValue() { @Override public final PageMaterializerFactory getPageMaterializerFactory() { - return BoolMaterializer.FACTORY; + return BooleanAsByteMaterializer.FACTORY; } @Override diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToBytePage.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToBytePage.java index f63d9f2ebcb..1f953292d20 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToBytePage.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToBytePage.java @@ -1,15 +1,12 @@ // // Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending // -// ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY -// ****** Edit ToIntPage and run "./gradlew replicateToPage" to regenerate -// -// @formatter:off package io.deephaven.parquet.table.pagestore.topage; import io.deephaven.chunk.ChunkType; import io.deephaven.chunk.attributes.Any; import io.deephaven.parquet.base.PageMaterializerFactory; +import io.deephaven.parquet.base.materializers.BooleanAsByteMaterializer; import io.deephaven.parquet.base.materializers.ByteMaterializer; import org.jetbrains.annotations.NotNull; @@ -17,19 +14,35 @@ public class ToBytePage implements ToPage { + public static ToBytePage create(Class nativeType) { + verifyNativeType(nativeType); + // noinspection unchecked + return FROM_BYTE; + } + + public static ToBytePage createFromBoolean(final Class nativeType) { + verifyNativeType(nativeType); + // noinspection unchecked + return FROM_BOOLEAN; + } + @SuppressWarnings("rawtypes") - private static final ToBytePage INSTANCE = new ToBytePage<>(); + private static final ToBytePage FROM_BYTE = new ToBytePage<>(ByteMaterializer.FACTORY); + @SuppressWarnings("rawtypes") + private static final ToBytePage FROM_BOOLEAN = new ToBytePage<>(BooleanAsByteMaterializer.FACTORY); - public static ToBytePage create(Class nativeType) { + private static void verifyNativeType(final Class nativeType) { if (nativeType == null || byte.class.equals(nativeType)) { - // noinspection unchecked - return INSTANCE; + return; } - throw new IllegalArgumentException("The native type for a Byte column is " + nativeType.getCanonicalName()); } - private ToBytePage() {} + private final PageMaterializerFactory pageMaterializerFactory; + + private ToBytePage(@NotNull final PageMaterializerFactory pageMaterializerFactory) { + this.pageMaterializerFactory = pageMaterializerFactory; + } @Override @NotNull @@ -52,6 +65,6 @@ public final Object nullValue() { @Override @NotNull public final PageMaterializerFactory getPageMaterializerFactory() { - return ByteMaterializer.FACTORY; + return pageMaterializerFactory; } } diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToCharPage.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToCharPage.java index 3fbccc9eb95..2248cede91a 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToCharPage.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToCharPage.java @@ -1,10 +1,6 @@ // // Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending // -// ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY -// ****** Edit ToIntPage and run "./gradlew replicateToPage" to regenerate -// -// @formatter:off package io.deephaven.parquet.table.pagestore.topage; import io.deephaven.chunk.ChunkType; diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToDoublePage.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToDoublePage.java index f5f450f4753..474b4d3106f 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToDoublePage.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToDoublePage.java @@ -1,15 +1,12 @@ // // Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending // -// ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY -// ****** Edit ToIntPage and run "./gradlew replicateToPage" to regenerate -// -// @formatter:off package io.deephaven.parquet.table.pagestore.topage; import io.deephaven.chunk.ChunkType; import io.deephaven.chunk.attributes.Any; import io.deephaven.parquet.base.PageMaterializerFactory; +import io.deephaven.parquet.base.materializers.DoubleFromFloatMaterializer; import io.deephaven.parquet.base.materializers.DoubleMaterializer; import org.jetbrains.annotations.NotNull; @@ -17,19 +14,35 @@ public class ToDoublePage implements ToPage { + public static ToDoublePage create(final Class nativeType) { + verifyNativeType(nativeType); + // noinspection unchecked + return FROM_DOUBLE; + } + + public static ToDoublePage createFromFloat(final Class nativeType) { + verifyNativeType(nativeType); + // noinspection unchecked + return FROM_FLOAT; + } + @SuppressWarnings("rawtypes") - private static final ToDoublePage INSTANCE = new ToDoublePage<>(); + private static final ToDoublePage FROM_DOUBLE = new ToDoublePage<>(DoubleMaterializer.FACTORY); + @SuppressWarnings("rawtypes") + private static final ToDoublePage FROM_FLOAT = new ToDoublePage<>(DoubleFromFloatMaterializer.FACTORY); - public static ToDoublePage create(Class nativeType) { + private static void verifyNativeType(final Class nativeType) { if (nativeType == null || double.class.equals(nativeType)) { - // noinspection unchecked - return INSTANCE; + return; } - throw new IllegalArgumentException("The native type for a Double column is " + nativeType.getCanonicalName()); } - private ToDoublePage() {} + private final PageMaterializerFactory pageMaterializerFactory; + + private ToDoublePage(@NotNull final PageMaterializerFactory pageMaterializerFactory) { + this.pageMaterializerFactory = pageMaterializerFactory; + } @Override @NotNull @@ -52,6 +65,6 @@ public final Object nullValue() { @Override @NotNull public final PageMaterializerFactory getPageMaterializerFactory() { - return DoubleMaterializer.FACTORY; + return pageMaterializerFactory; } } diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToFloatPage.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToFloatPage.java index 64ef1bd83ed..63bb095afec 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToFloatPage.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToFloatPage.java @@ -2,7 +2,7 @@ // Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending // // ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY -// ****** Edit ToIntPage and run "./gradlew replicateToPage" to regenerate +// ****** Edit ToCharPage and run "./gradlew replicateToPage" to regenerate // // @formatter:off package io.deephaven.parquet.table.pagestore.topage; diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToIntPage.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToIntPage.java index 3217d95c0f5..4b0ef392fd3 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToIntPage.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToIntPage.java @@ -6,6 +6,9 @@ import io.deephaven.chunk.ChunkType; import io.deephaven.chunk.attributes.Any; import io.deephaven.parquet.base.PageMaterializerFactory; +import io.deephaven.parquet.base.materializers.IntFromBooleanMaterializer; +import io.deephaven.parquet.base.materializers.IntFromUnsignedByteMaterializer; +import io.deephaven.parquet.base.materializers.IntFromUnsignedShortMaterializer; import io.deephaven.parquet.base.materializers.IntMaterializer; import org.jetbrains.annotations.NotNull; @@ -13,19 +16,51 @@ public class ToIntPage implements ToPage { + public static ToIntPage create(Class nativeType) { + verifyNativeType(nativeType); + // noinspection unchecked + return FROM_INT; + } + + public static ToIntPage createFromUnsignedShort(final Class nativeType) { + verifyNativeType(nativeType); + // noinspection unchecked + return FROM_UNSIGNED_SHORT; + } + + public static ToIntPage createFromUnsignedByte(final Class nativeType) { + verifyNativeType(nativeType); + // noinspection unchecked + return FROM_UNSIGNED_BYTE; + } + + public static ToIntPage createFromBoolean(final Class nativeType) { + verifyNativeType(nativeType); + // noinspection unchecked + return FROM_BOOLEAN; + } + + @SuppressWarnings("rawtypes") + private static final ToIntPage FROM_INT = new ToIntPage<>(IntMaterializer.FACTORY); + @SuppressWarnings("rawtypes") + private static final ToIntPage FROM_UNSIGNED_SHORT = new ToIntPage<>(IntFromUnsignedShortMaterializer.FACTORY); + @SuppressWarnings("rawtypes") + private static final ToIntPage FROM_UNSIGNED_BYTE = new ToIntPage<>(IntFromUnsignedByteMaterializer.FACTORY); @SuppressWarnings("rawtypes") - private static final ToIntPage INSTANCE = new ToIntPage<>(); + private static final ToIntPage FROM_BOOLEAN = new ToIntPage<>(IntFromBooleanMaterializer.FACTORY); - public static ToIntPage create(Class nativeType) { + private static void verifyNativeType(final Class nativeType) { if (nativeType == null || int.class.equals(nativeType)) { - // noinspection unchecked - return INSTANCE; + return; } - throw new IllegalArgumentException("The native type for a Int column is " + nativeType.getCanonicalName()); } - private ToIntPage() {} + private final PageMaterializerFactory pageMaterializerFactory; + + private ToIntPage(@NotNull final PageMaterializerFactory pageMaterializerFactory) { + this.pageMaterializerFactory = pageMaterializerFactory; + } @Override @NotNull @@ -48,6 +83,6 @@ public final Object nullValue() { @Override @NotNull public final PageMaterializerFactory getPageMaterializerFactory() { - return IntMaterializer.FACTORY; + return pageMaterializerFactory; } } diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToLongPage.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToLongPage.java index ca86ced2540..befd67571a1 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToLongPage.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToLongPage.java @@ -6,7 +6,11 @@ import io.deephaven.chunk.ChunkType; import io.deephaven.chunk.attributes.Any; import io.deephaven.parquet.base.PageMaterializerFactory; +import io.deephaven.parquet.base.materializers.LongFromBooleanMaterializer; +import io.deephaven.parquet.base.materializers.LongFromIntMaterializer; +import io.deephaven.parquet.base.materializers.LongFromUnsignedByteMaterializer; import io.deephaven.parquet.base.materializers.LongFromUnsignedIntMaterializer; +import io.deephaven.parquet.base.materializers.LongFromUnsignedShortMaterializer; import io.deephaven.parquet.base.materializers.LongMaterializer; import org.jetbrains.annotations.NotNull; @@ -26,10 +30,42 @@ public static ToLongPage createFromUnsignedInt(final Cl return FROM_UNSIGNED_INT; } + public static ToLongPage createFromInt(final Class nativeType) { + verifyNativeType(nativeType); + // noinspection unchecked + return FROM_INT; + } + + public static ToLongPage createFromUnsignedShort(final Class nativeType) { + verifyNativeType(nativeType); + // noinspection unchecked + return FROM_UNSIGNED_SHORT; + } + + public static ToLongPage createFromUnsignedByte(final Class nativeType) { + verifyNativeType(nativeType); + // noinspection unchecked + return FROM_UNSIGNED_BYTE; + } + + public static ToLongPage createFromBoolean(final Class nativeType) { + verifyNativeType(nativeType); + // noinspection unchecked + return FROM_BOOLEAN; + } + @SuppressWarnings("rawtypes") private static final ToLongPage FROM_LONG = new ToLongPage<>(LongMaterializer.FACTORY); @SuppressWarnings("rawtypes") private static final ToLongPage FROM_UNSIGNED_INT = new ToLongPage<>(LongFromUnsignedIntMaterializer.FACTORY); + @SuppressWarnings("rawtypes") + private static final ToLongPage FROM_INT = new ToLongPage<>(LongFromIntMaterializer.FACTORY); + @SuppressWarnings("rawtypes") + private static final ToLongPage FROM_UNSIGNED_SHORT = new ToLongPage<>(LongFromUnsignedShortMaterializer.FACTORY); + @SuppressWarnings("rawtypes") + private static final ToLongPage FROM_UNSIGNED_BYTE = new ToLongPage<>(LongFromUnsignedByteMaterializer.FACTORY); + @SuppressWarnings("rawtypes") + private static final ToLongPage FROM_BOOLEAN = new ToLongPage<>(LongFromBooleanMaterializer.FACTORY); private static void verifyNativeType(final Class nativeType) { if (nativeType == null || long.class.equals(nativeType)) { diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToShortPage.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToShortPage.java index ce749f217a4..706617025b9 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToShortPage.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToShortPage.java @@ -1,15 +1,13 @@ // // Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending // -// ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY -// ****** Edit ToIntPage and run "./gradlew replicateToPage" to regenerate -// -// @formatter:off package io.deephaven.parquet.table.pagestore.topage; import io.deephaven.chunk.ChunkType; import io.deephaven.chunk.attributes.Any; import io.deephaven.parquet.base.PageMaterializerFactory; +import io.deephaven.parquet.base.materializers.ShortFromBooleanMaterializer; +import io.deephaven.parquet.base.materializers.ShortFromUnsignedByteMaterializer; import io.deephaven.parquet.base.materializers.ShortMaterializer; import org.jetbrains.annotations.NotNull; @@ -17,19 +15,43 @@ public class ToShortPage implements ToPage { + public static ToShortPage create(final Class nativeType) { + verifyNativeType(nativeType); + // noinspection unchecked + return FROM_SHORT; + } + + public static ToShortPage createFromUnsignedByte(final Class nativeType) { + verifyNativeType(nativeType); + // noinspection unchecked + return FROM_UNSIGNED_BYTE; + } + + public static ToShortPage createFromBoolean(final Class nativeType) { + verifyNativeType(nativeType); + // noinspection unchecked + return FROM_BOOLEAN; + } + + @SuppressWarnings("rawtypes") + private static final ToShortPage FROM_SHORT = new ToShortPage<>(ShortMaterializer.FACTORY); + @SuppressWarnings("rawtypes") + private static final ToShortPage FROM_UNSIGNED_BYTE = new ToShortPage<>(ShortFromUnsignedByteMaterializer.FACTORY); @SuppressWarnings("rawtypes") - private static final ToShortPage INSTANCE = new ToShortPage<>(); + private static final ToShortPage FROM_BOOLEAN = new ToShortPage<>(ShortFromBooleanMaterializer.FACTORY); - public static ToShortPage create(Class nativeType) { + private static void verifyNativeType(final Class nativeType) { if (nativeType == null || short.class.equals(nativeType)) { - // noinspection unchecked - return INSTANCE; + return; } - throw new IllegalArgumentException("The native type for a Short column is " + nativeType.getCanonicalName()); } - private ToShortPage() {} + private final PageMaterializerFactory pageMaterializerFactory; + + private ToShortPage(@NotNull final PageMaterializerFactory pageMaterializerFactory) { + this.pageMaterializerFactory = pageMaterializerFactory; + } @Override @NotNull @@ -52,6 +74,6 @@ public final Object nullValue() { @Override @NotNull public final PageMaterializerFactory getPageMaterializerFactory() { - return ShortMaterializer.FACTORY; + return pageMaterializerFactory; } } diff --git a/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java b/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java index 8a2df002b0e..74eca341e24 100644 --- a/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java +++ b/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java @@ -637,6 +637,317 @@ public void basicParquetWithMetadataTest() { assertTableEquals(table, fromDiskWithCommonMetadata); } + @Test + public void testOverrideBooleanColumnType() { + final Table table = TableTools.emptyTable(5).update("A = i % 3 == 0 ? true : i % 3 == 1 ? false : null"); + final File dest = new File(rootFile, "testOverrideBooleanColumnType.parquet"); + ParquetTools.writeTable(table, dest.getPath()); + assertTableEquals(table, ParquetTools.readTable(dest.getPath())); + + final Table arrayTable = + TableTools.emptyTable(5).update("A = new Boolean[] {i % 3 == 0 ? true : i % 3 == 1 ? false : null}"); + final File arrayTableDest = new File(rootFile, "testOverrideBooleanArrayType.parquet"); + ParquetTools.writeTable(arrayTable, arrayTableDest.getPath()); + assertTableEquals(arrayTable, ParquetTools.readTable(arrayTableDest.getPath())); + // Boolean -> byte + { + final ParquetInstructions readInstructions = ParquetInstructions.builder() + .setTableDefinition(TableDefinition.of(ColumnDefinition.ofByte("A"))) + .build(); + final Table byteTable = + TableTools.emptyTable(5).update("A = i % 3 == 0 ? (byte)1 : i % 3 == 1 ? (byte)0 : null"); + assertTableEquals(byteTable, ParquetTools.readTable(dest.getPath(), readInstructions)); + + final Table byteArrayTable = + TableTools.emptyTable(5) + .update("A = new byte[] {i % 3 == 0 ? (byte)1 : i % 3 == 1 ? (byte)0 : (byte)null}"); + assertTableEquals(byteArrayTable, ParquetTools.readTable(arrayTableDest.getPath(), + readInstructions.withTableDefinition(byteArrayTable.getDefinition())).select()); + } + + // Boolean -> short + { + final ParquetInstructions readInstructions = ParquetInstructions.builder() + .setTableDefinition(TableDefinition.of(ColumnDefinition.ofShort("A"))) + .build(); + final Table shortTable = + TableTools.emptyTable(5).update("A = i % 3 == 0 ? (short)1 : i % 3 == 1 ? (short)0 : null"); + assertTableEquals(shortTable, ParquetTools.readTable(dest.getPath(), readInstructions)); + + final Table shortArrayTable = + TableTools.emptyTable(5) + .update("A = new short[] {i % 3 == 0 ? (short)1 : i % 3 == (short)1 ? 0 : (short)null}"); + assertTableEquals(shortArrayTable, ParquetTools.readTable(arrayTableDest.getPath(), + readInstructions.withTableDefinition(shortArrayTable.getDefinition())).select()); + } + + // Boolean -> int + { + final ParquetInstructions readInstructions = ParquetInstructions.builder() + .setTableDefinition(TableDefinition.of(ColumnDefinition.ofInt("A"))) + .build(); + final Table intTable = TableTools.emptyTable(5).update("A = i % 3 == 0 ? 1 : i % 3 == 1 ? 0 : null"); + assertTableEquals(intTable, ParquetTools.readTable(dest.getPath(), readInstructions)); + + final Table intArrayTable = + TableTools.emptyTable(5).update("A = new int[] {i % 3 == 0 ? 1 : i % 3 == 1 ? 0 : null}"); + assertTableEquals(intArrayTable, ParquetTools.readTable(arrayTableDest.getPath(), + readInstructions.withTableDefinition(intArrayTable.getDefinition())).select()); + } + // Boolean -> long + { + final ParquetInstructions readInstructions = ParquetInstructions.builder() + .setTableDefinition(TableDefinition.of(ColumnDefinition.ofLong("A"))) + .build(); + final Table longTable = TableTools.emptyTable(5).update("A = i % 3 == 0 ? 1L : i % 3 == 1 ? 0L : null"); + assertTableEquals(longTable, ParquetTools.readTable(dest.getPath(), readInstructions)); + + final Table longArrayTable = + TableTools.emptyTable(5).update("A = new long[] {i % 3 == 0 ? 1L : i % 3 == 1 ? 0L : null}"); + assertTableEquals(longArrayTable, ParquetTools.readTable(arrayTableDest.getPath(), + readInstructions.withTableDefinition(longArrayTable.getDefinition())).select()); + } + } + + @Test + public void testOverrideByteColumnType() { + final Table table = TableTools.emptyTable(5).update("A=(byte)(i-2)"); + final File dest = new File(rootFile, "testOverrideByteColumnType.parquet"); + ParquetTools.writeTable(table, dest.getPath()); + assertTableEquals(table, ParquetTools.readTable(dest.getPath())); + + final Table arrayTable = TableTools.emptyTable(5).update("A = new byte[] {i == 0 ? null : (byte)(i-2)}"); + final File arrayTableDest = new File(rootFile, "testOverrideByteArrayColumnType.parquet"); + ParquetTools.writeTable(arrayTable, arrayTableDest.getPath()); + assertTableEquals(arrayTable, ParquetTools.readTable(arrayTableDest.getPath())); + + // byte -> short + { + final ParquetInstructions readInstructions = ParquetInstructions.builder() + .setTableDefinition(TableDefinition.of(ColumnDefinition.ofShort("A"))) + .build(); + assertTableEquals(table.updateView("A=(short)A"), + ParquetTools.readTable(dest.getPath(), readInstructions)); + + final Table shortArrayTable = + TableTools.emptyTable(5).update("A = new short[] {i == 0 ? null : (short)(i-2)}"); + assertTableEquals(shortArrayTable, ParquetTools.readTable(arrayTableDest.getPath(), + readInstructions.withTableDefinition(shortArrayTable.getDefinition())).select()); + } + // byte -> int + { + final ParquetInstructions readInstructions = ParquetInstructions.builder() + .setTableDefinition(TableDefinition.of(ColumnDefinition.ofInt("A"))) + .build(); + assertTableEquals(table.updateView("A=(int)A"), + ParquetTools.readTable(dest.getPath(), readInstructions)); + + final Table intArrayTable = TableTools.emptyTable(5).update("A = new int[] {i == 0 ? null : (int)(i-2)}"); + assertTableEquals(intArrayTable, ParquetTools.readTable(arrayTableDest.getPath(), + readInstructions.withTableDefinition(intArrayTable.getDefinition())).select()); + } + // byte -> long + { + final ParquetInstructions readInstructions = ParquetInstructions.builder() + .setTableDefinition(TableDefinition.of(ColumnDefinition.ofLong("A"))) + .build(); + assertTableEquals(table.updateView("A=(long)A"), + ParquetTools.readTable(dest.getPath(), readInstructions)); + + final Table longArrayTable = + TableTools.emptyTable(5).update("A = new long[] {i == 0 ? null : (long)(i-2)}"); + assertTableEquals(longArrayTable, ParquetTools.readTable(arrayTableDest.getPath(), + readInstructions.withTableDefinition(longArrayTable.getDefinition())).select()); + } + // byte -> char + { + final ParquetInstructions readInstructions = ParquetInstructions.builder() + .setTableDefinition(TableDefinition.of(ColumnDefinition.ofChar("A"))) + .build(); + try { + ParquetTools.readTable(dest.getPath(), readInstructions).select(); + fail("Expected an exception because cannot convert byte to char"); + } catch (final RuntimeException ignored) { + } + } + } + + @Test + public void testOverrideShortColumnType() { + final Table table = TableTools.emptyTable(5).update("A=(short)(i-2)"); + final File dest = new File(rootFile, "testOverrideShortColumnType.parquet"); + ParquetTools.writeTable(table, dest.getPath()); + assertTableEquals(table, ParquetTools.readTable(dest.getPath())); + + final Table arrayTable = TableTools.emptyTable(5).update("A = new short[] {i == 0 ? null : (short)(i-2)}"); + final File arrayTableDest = new File(rootFile, "testOverrideShortArrayColumnType.parquet"); + ParquetTools.writeTable(arrayTable, arrayTableDest.getPath()); + assertTableEquals(arrayTable, ParquetTools.readTable(arrayTableDest.getPath())); + + // short -> int + { + final ParquetInstructions readInstructions = ParquetInstructions.builder() + .setTableDefinition(TableDefinition.of(ColumnDefinition.ofInt("A"))) + .build(); + assertTableEquals(table.updateView("A=(int)A"), + ParquetTools.readTable(dest.getPath(), readInstructions)); + + final Table intArrayTable = TableTools.emptyTable(5).update("A = new int[] {i == 0 ? null : (int)(i-2)}"); + assertTableEquals(intArrayTable, ParquetTools.readTable(arrayTableDest.getPath(), + readInstructions.withTableDefinition(intArrayTable.getDefinition())).select()); + } + // short -> long + { + final ParquetInstructions readInstructions = ParquetInstructions.builder() + .setTableDefinition(TableDefinition.of(ColumnDefinition.ofLong("A"))) + .build(); + assertTableEquals(table.updateView("A=(long)A"), + ParquetTools.readTable(dest.getPath(), readInstructions)); + + final Table longArrayTable = + TableTools.emptyTable(5).update("A = new long[] {i == 0 ? null : (long)(i-2)}"); + assertTableEquals(longArrayTable, ParquetTools.readTable(arrayTableDest.getPath(), + readInstructions.withTableDefinition(longArrayTable.getDefinition())).select()); + } + // short -> byte + { + final ParquetInstructions readInstructions = ParquetInstructions.builder() + .setTableDefinition(TableDefinition.of(ColumnDefinition.ofByte("A"))) + .build(); + try { + ParquetTools.readTable(dest.getPath(), readInstructions).select(); + fail("Expected an exception because cannot convert short to byte"); + } catch (final RuntimeException ignored) { + } + } + } + + @Test + public void testOverrideCharColumnType() { + final Table table = TableTools.emptyTable(5).update("A=(char)i"); + final File dest = new File(rootFile, "testOverrideCharColumnType.parquet"); + ParquetTools.writeTable(table, dest.getPath()); + assertTableEquals(table, ParquetTools.readTable(dest.getPath())); + + final Table arrayTable = TableTools.emptyTable(5).update("A = new char[] {i == 0 ? null : (char)i}"); + final File arrayTableDest = new File(rootFile, "testOverrideCharArrayColumnType.parquet"); + ParquetTools.writeTable(arrayTable, arrayTableDest.getPath()); + assertTableEquals(arrayTable, ParquetTools.readTable(arrayTableDest.getPath())); + + // char -> int + { + final ParquetInstructions readInstructions = ParquetInstructions.builder() + .setTableDefinition(TableDefinition.of(ColumnDefinition.ofInt("A"))) + .build(); + assertTableEquals(table.updateView("A=(int)A"), + ParquetTools.readTable(dest.getPath(), readInstructions)); + + final Table intArrayTable = TableTools.emptyTable(5).update("A = new int[] {i == 0 ? null : (int)i}"); + assertTableEquals(intArrayTable, ParquetTools.readTable(arrayTableDest.getPath(), + readInstructions.withTableDefinition(intArrayTable.getDefinition())).select()); + } + // char -> long + { + final ParquetInstructions readInstructions = ParquetInstructions.builder() + .setTableDefinition(TableDefinition.of(ColumnDefinition.ofLong("A"))) + .build(); + assertTableEquals(table.updateView("A=(long)A"), + ParquetTools.readTable(dest.getPath(), readInstructions)); + + final Table longArrayTable = TableTools.emptyTable(5).update("A = new long[] {i == 0 ? null : (long)i}"); + assertTableEquals(longArrayTable, ParquetTools.readTable(arrayTableDest.getPath(), + readInstructions.withTableDefinition(longArrayTable.getDefinition())).select()); + } + // char -> short + { + final ParquetInstructions readInstructions = ParquetInstructions.builder() + .setTableDefinition(TableDefinition.of(ColumnDefinition.ofShort("A"))) + .build(); + try { + ParquetTools.readTable(dest.getPath(), readInstructions).select(); + fail("Expected an exception because cannot convert char to short"); + } catch (final RuntimeException ignored) { + } + } + } + + @Test + public void testOverrideIntColumnType() { + final Table table = TableTools.emptyTable(5).update("A=(int)(i-2)"); + final File dest = new File(rootFile, "testOverrideIntColumnType.parquet"); + ParquetTools.writeTable(table, dest.getPath()); + assertTableEquals(table, ParquetTools.readTable(dest.getPath())); + + final Table arrayTable = TableTools.emptyTable(5).update("A = new int[] {i == 0 ? null : (int)(i-2)}"); + final File arrayTableDest = new File(rootFile, "testOverrideIntArrayColumnType.parquet"); + ParquetTools.writeTable(arrayTable, arrayTableDest.getPath()); + assertTableEquals(arrayTable, ParquetTools.readTable(arrayTableDest.getPath())); + + // int -> long + { + final ParquetInstructions readInstructions = ParquetInstructions.builder() + .setTableDefinition(TableDefinition.of(ColumnDefinition.ofLong("A"))) + .build(); + assertTableEquals(table.updateView("A=(long)A"), + ParquetTools.readTable(dest.getPath(), readInstructions)); + final Table longArrayTable = + TableTools.emptyTable(5).update("A = new long[] {i == 0 ? null : (long)(i-2)}"); + assertTableEquals(longArrayTable, ParquetTools.readTable(arrayTableDest.getPath(), + readInstructions.withTableDefinition(longArrayTable.getDefinition())).select()); + } + + // int -> short + { + final ParquetInstructions readInstructions = ParquetInstructions.builder() + .setTableDefinition(TableDefinition.of(ColumnDefinition.ofShort("A"))) + .build(); + try { + ParquetTools.readTable(dest.getPath(), readInstructions).select(); + fail("Expected an exception because cannot convert int to short"); + } catch (final RuntimeException ignored) { + } + } + } + + @Test + public void testOverrideFloatColumnType() { + final Table table = TableTools.emptyTable(5).update("A=(float)(i-2)"); + final File dest = new File(rootFile, "testOverrideFloatColumnType.parquet"); + ParquetTools.writeTable(table, dest.getPath()); + assertTableEquals(table, ParquetTools.readTable(dest.getPath())); + + final Table arrayTable = TableTools.emptyTable(5).update("A = new float[] {i == 0 ? null : (float)(i-2)}"); + final File arrayTableDest = new File(rootFile, "testOverrideFloatArrayColumnType.parquet"); + ParquetTools.writeTable(arrayTable, arrayTableDest.getPath()); + assertTableEquals(arrayTable, ParquetTools.readTable(arrayTableDest.getPath())); + + // float -> double + { + final ParquetInstructions readInstructions = ParquetInstructions.builder() + .setTableDefinition(TableDefinition.of(ColumnDefinition.ofDouble("A"))) + .build(); + assertTableEquals(table.updateView("A=(double)A"), + ParquetTools.readTable(dest.getPath(), readInstructions)); + final Table doubleArrayTable = + TableTools.emptyTable(5).update("A = new double[] {i == 0 ? null : (double)(i-2)}"); + assertTableEquals(doubleArrayTable, ParquetTools.readTable(arrayTableDest.getPath(), + readInstructions.withTableDefinition(doubleArrayTable.getDefinition())).select()); + } + + // float -> short + { + final ParquetInstructions readInstructions = ParquetInstructions.builder() + .setTableDefinition(TableDefinition.of(ColumnDefinition.ofShort("A"))) + .build(); + try { + ParquetTools.readTable(dest.getPath(), readInstructions).select(); + fail("Expected an exception because cannot convert float to short"); + } catch (final RuntimeException ignored) { + } + } + } + + @Test public void parquetIndexingBuilderTest() { final Table source = TableTools.emptyTable(1_000_000).updateView( diff --git a/py/server/tests/test_parquet.py b/py/server/tests/test_parquet.py index 2d49f7c82cd..1a5d3b3e31c 100644 --- a/py/server/tests/test_parquet.py +++ b/py/server/tests/test_parquet.py @@ -14,7 +14,7 @@ from deephaven import DHError, empty_table, dtypes, new_table from deephaven import arrow as dharrow -from deephaven.column import InputColumn, Column, ColumnType, string_col, int_col, char_col, long_col +from deephaven.column import InputColumn, Column, ColumnType, string_col, int_col, char_col, long_col, short_col from deephaven.pandas import to_pandas, to_table from deephaven.parquet import (write, batch_write, read, delete, ColumnInstruction, ParquetFileLayout, write_partitioned) @@ -782,6 +782,32 @@ def test_unsigned_ints(self): ]) self.assert_table_equals(table_from_disk, expected) + def test_unsigned_byte_cast(self): + data = {'uint8Col': [255, 2, 0]} + df = pandas.DataFrame(data) + df['uint8Col'] = df['uint8Col'].astype(np.uint8) + pyarrow.parquet.write_table(pyarrow.Table.from_pandas(df), 'data_from_pyarrow.parquet') + + # UByte -> Char + table_from_disk = read("data_from_pyarrow.parquet", table_definition={"uint8Col": dtypes.char}) + expected = new_table([char_col("uint8Col", [255, 2, 0])]) + self.assert_table_equals(table_from_disk, expected) + + # UByte -> Short + table_from_disk = read("data_from_pyarrow.parquet", table_definition={"uint8Col": dtypes.short}) + expected = new_table([short_col("uint8Col", [255, 2, 0])]) + self.assert_table_equals(table_from_disk, expected) + + # UByte -> Int + table_from_disk = read("data_from_pyarrow.parquet", table_definition={"uint8Col": dtypes.int32}) + expected = new_table([int_col("uint8Col", [255, 2, 0])]) + self.assert_table_equals(table_from_disk, expected) + + # UByte -> Long + table_from_disk = read("data_from_pyarrow.parquet", table_definition={"uint8Col": dtypes.long}) + expected = new_table([long_col("uint8Col", [255, 2, 0])]) + self.assert_table_equals(table_from_disk, expected) + def test_v2_pages(self): def test_v2_pages_helper(dh_table): write(dh_table, "data_from_dh.parquet") diff --git a/replication/static/src/main/java/io/deephaven/replicators/ReplicatePageMaterializers.java b/replication/static/src/main/java/io/deephaven/replicators/ReplicatePageMaterializers.java index 13909dd32b3..e4e96e356d7 100644 --- a/replication/static/src/main/java/io/deephaven/replicators/ReplicatePageMaterializers.java +++ b/replication/static/src/main/java/io/deephaven/replicators/ReplicatePageMaterializers.java @@ -5,8 +5,7 @@ import java.io.IOException; -import static io.deephaven.replication.ReplicatePrimitiveCode.charToShortAndByte; -import static io.deephaven.replication.ReplicatePrimitiveCode.floatToAllFloatingPoints; +import static io.deephaven.replication.ReplicatePrimitiveCode.charToByte; import static io.deephaven.replication.ReplicatePrimitiveCode.replaceAll; /** @@ -20,7 +19,14 @@ public class ReplicatePageMaterializers { "extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/"; private static final String CHAR_MATERIALIZER_PATH = MATERIALIZER_DIR + "CharMaterializer.java"; - private static final String FLOAT_MATERIALIZER_PATH = MATERIALIZER_DIR + "FloatMaterializer.java"; + private static final String SHORT_MATERIALIZER_PATH = MATERIALIZER_DIR + "ShortMaterializer.java"; + private static final String INT_MATERIALIZER_PATH = MATERIALIZER_DIR + "IntMaterializer.java"; + private static final String LONG_MATERIALIZER_BASE_PATH = MATERIALIZER_DIR + "LongMaterializerBase.java"; + private static final String LONG_MATERIALIZER_PATH = MATERIALIZER_DIR + "LongMaterializer.java"; + private static final String LONG_FROM_INT_MATERIALIZER_PATH = MATERIALIZER_DIR + "LongFromIntMaterializer.java"; + private static final String LONG_FROM_UNSIGNED_SHORT_MATERIALIZER_PATH = + MATERIALIZER_DIR + "LongFromUnsignedShortMaterializer.java"; + private static final String DOUBLE_MATERIALIZER_PATH = MATERIALIZER_DIR + "DoubleMaterializer.java"; private static final String LOCAL_TIME_FROM_MICROS_MATERIALIZER_PATH = MATERIALIZER_DIR + "LocalTimeFromMicrosMaterializer.java"; private static final String LOCAL_DATE_TIME_FROM_MILLIS_MATERIALIZER_PATH = @@ -34,18 +40,121 @@ public class ReplicatePageMaterializers { private static final String BIG_INTEGER_MATERIALIZER_PATH = MATERIALIZER_DIR + "BigIntegerMaterializer.java"; public static void main(String... args) throws IOException { - charToShortAndByte(TASK, CHAR_MATERIALIZER_PATH, NO_EXCEPTIONS); + charToByte(TASK, CHAR_MATERIALIZER_PATH, NO_EXCEPTIONS); - // Float -> Double - floatToAllFloatingPoints(TASK, FLOAT_MATERIALIZER_PATH, NO_EXCEPTIONS); - - // Float -> Int + // LongBase -> IntBase String[][] pairs = new String[][] { - {"readFloat", "readInteger"}, - {"Float", "Int"}, - {"float", "int"} + {"Long", "Int"}, + {"long", "int"}, + }; + replaceAll(TASK, LONG_MATERIALIZER_BASE_PATH, null, NO_EXCEPTIONS, pairs); + + // Long -> Int + pairs = new String[][] { + {"readLong", "readInteger"}, + {"Long", "Int"}, + {"long", "int"} + }; + replaceAll(TASK, LONG_MATERIALIZER_PATH, null, NO_EXCEPTIONS, pairs); + + // Int -> IntFromBoolean + pairs = new String[][] { + {"IntMaterializer", "IntFromBooleanMaterializer"}, + {"readInteger\\(\\)", "readBoolean() ? 1 : 0"} + }; + replaceAll(TASK, INT_MATERIALIZER_PATH, null, new String[] {"IntMaterializerBase"}, pairs); + + // LongBase -> ShortBase + pairs = new String[][] { + {"Long", "Short"}, + {"long", "short"}, + }; + replaceAll(TASK, LONG_MATERIALIZER_BASE_PATH, null, NO_EXCEPTIONS, pairs); + + // Long -> Short + pairs = new String[][] { + {"dataReader.readLong", "(short) dataReader.readInteger"}, + {"dataReader, 0, numValues", "dataReader, (short) 0, numValues"}, + {"Long", "Short"}, + {"long", "short"} + }; + replaceAll(TASK, LONG_MATERIALIZER_PATH, null, NO_EXCEPTIONS, pairs); + + // Long -> LongFromInt + pairs = new String[][] { + {"LongMaterializer", "LongFromIntMaterializer"}, + {"readLong", "readInteger"}, + }; + replaceAll(TASK, LONG_MATERIALIZER_PATH, LONG_FROM_INT_MATERIALIZER_PATH, null, + new String[] {"LongMaterializerBase"}, pairs); + + // Long -> LongFromBoolean + pairs = new String[][] { + {"LongMaterializer", "LongFromBooleanMaterializer"}, + {"readLong\\(\\)", "readBoolean() ? 1 : 0"}, + }; + replaceAll(TASK, LONG_MATERIALIZER_PATH, null, new String[] {"LongMaterializerBase"}, pairs); + + // LongFromUnsignedShort -> LongFromUnsignedByte + pairs = new String[][] { + {"Short", "Byte"}, + {"short", "byte"} + }; + replaceAll(TASK, LONG_FROM_UNSIGNED_SHORT_MATERIALIZER_PATH, null, NO_EXCEPTIONS, pairs); + + // LongFromUnsignedShort -> LongFromUnsignedInt + pairs = new String[][] { + {"Short.toUnsignedLong", "Integer.toUnsignedLong"}, + {"Short", "Int"}, + {"short", "int"} + }; + replaceAll(TASK, LONG_FROM_UNSIGNED_SHORT_MATERIALIZER_PATH, null, NO_EXCEPTIONS, pairs); + + // LongFromUnsignedShort -> IntFromUnsignedShort + pairs = new String[][] { + {"LongFromUnsignedShort", "IntFromUnsignedShort"}, + {"Long", "Int"}, + {"long", "int"} + }; + replaceAll(TASK, LONG_FROM_UNSIGNED_SHORT_MATERIALIZER_PATH, null, NO_EXCEPTIONS, pairs); + + // LongFromUnsignedShort -> IntFromUnsignedByte + pairs = new String[][] { + {"Short", "Byte"}, + {"short", "byte"}, + {"Long", "Int"}, + {"long", "int"} + }; + replaceAll(TASK, LONG_FROM_UNSIGNED_SHORT_MATERIALIZER_PATH, null, NO_EXCEPTIONS, pairs); + + // LongBase -> DoubleBase + pairs = new String[][] { + {"Long", "Double"}, + {"long", "double"}, + }; + replaceAll(TASK, LONG_MATERIALIZER_BASE_PATH, null, NO_EXCEPTIONS, pairs); + + // Long -> Double + pairs = new String[][] { + {"dataReader.readLong", "dataReader.readDouble"}, + {"Long", "Double"}, + {"long", "double"} + }; + replaceAll(TASK, LONG_MATERIALIZER_PATH, null, NO_EXCEPTIONS, pairs); + + // Double -> DoubleFromFloat + pairs = new String[][] { + {"DoubleMaterializer", "DoubleFromFloatMaterializer"}, + {"Double", "Float"} + }; + replaceAll(TASK, DOUBLE_MATERIALIZER_PATH, null, new String[] {"DoubleMaterializerBase"}, pairs); + + // Short -> ShortFromBoolean + pairs = new String[][] { + {"ShortMaterializer", "ShortFromBooleanMaterializer"}, + {"dataReader.readInteger\\(\\)", "(dataReader.readBoolean() ? 1 : 0)"} }; - replaceAll(TASK, FLOAT_MATERIALIZER_PATH, null, NO_EXCEPTIONS, pairs); + replaceAll(TASK, SHORT_MATERIALIZER_PATH, null, new String[] {"ShortMaterializerBase"}, pairs); // LocalTimeFromMicros -> LocalTimeFromMillis // We change from Micros to Millis and not the other way since converting from Long to Integer has fewer diff --git a/replication/static/src/main/java/io/deephaven/replicators/ReplicateToPage.java b/replication/static/src/main/java/io/deephaven/replicators/ReplicateToPage.java index bd76f91cf1a..42e4cf0f6cd 100644 --- a/replication/static/src/main/java/io/deephaven/replicators/ReplicateToPage.java +++ b/replication/static/src/main/java/io/deephaven/replicators/ReplicateToPage.java @@ -4,8 +4,10 @@ package io.deephaven.replicators; import java.io.IOException; +import java.util.Map; -import static io.deephaven.replication.ReplicatePrimitiveCode.intToAllButBooleanAndLong; +import static io.deephaven.replication.ReplicatePrimitiveCode.charToByte; +import static io.deephaven.replication.ReplicatePrimitiveCode.charToFloat; import static io.deephaven.replication.ReplicatePrimitiveCode.replaceAll; /** @@ -18,12 +20,12 @@ public class ReplicateToPage { private static final String TO_PAGE_DIR = "extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/"; - private static final String TO_INT_PAGE_PATH = TO_PAGE_DIR + "ToIntPage.java"; + private static final String TO_CHAR_PAGE_PATH = TO_PAGE_DIR + "ToCharPage.java"; private static final String TO_LOCAL_DATE_TIME_PAGE_PATH = TO_PAGE_DIR + "ToLocalDateTimePage.java"; private static final String TO_BIG_INTEGER_PAGE_PATH = TO_PAGE_DIR + "ToBigIntegerPage.java"; public static void main(String... args) throws IOException { - intToAllButBooleanAndLong(TASK, TO_INT_PAGE_PATH, "interface"); + charToFloat(TASK, TO_CHAR_PAGE_PATH, null, "interface"); // LocalDateTime -> LocalTime String[][] pairs = new String[][] {