Skip to content

Commit

Permalink
feat: Added support to upcast numeric types when reading parquet (#5923)
Browse files Browse the repository at this point in the history
  • Loading branch information
malhotrashivam authored Aug 15, 2024
1 parent 00625b2 commit c74e570
Show file tree
Hide file tree
Showing 31 changed files with 1,297 additions and 159 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,17 @@

import java.util.Arrays;

public class BoolMaterializer implements PageMaterializer {
public class BooleanAsByteMaterializer implements PageMaterializer {

public static final PageMaterializerFactory FACTORY = new PageMaterializerFactory() {
@Override
public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) {
return new BoolMaterializer(dataReader, (byte) nullValue, numValues);
return new BooleanAsByteMaterializer(dataReader, (byte) nullValue, numValues);
}

@Override
public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int numValues) {
return new BoolMaterializer(dataReader, numValues);
return new BooleanAsByteMaterializer(dataReader, numValues);
}
};

Expand All @@ -28,11 +28,11 @@ public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int num
private final byte nullValue;
private final byte[] data;

private BoolMaterializer(ValuesReader dataReader, int numValues) {
private BooleanAsByteMaterializer(ValuesReader dataReader, int numValues) {
this(dataReader, (byte) 0, numValues);
}

private BoolMaterializer(ValuesReader dataReader, byte nullValue, int numValues) {
private BooleanAsByteMaterializer(ValuesReader dataReader, byte nullValue, int numValues) {
this.dataReader = dataReader;
this.nullValue = nullValue;
this.data = new byte[numValues];
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
//
// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending
//
// ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY
// ****** Edit DoubleMaterializer and run "./gradlew replicatePageMaterializers" to regenerate
//
// @formatter:off
package io.deephaven.parquet.base.materializers;

import io.deephaven.parquet.base.PageMaterializer;
import io.deephaven.parquet.base.PageMaterializerFactory;
import org.apache.parquet.column.values.ValuesReader;

public class DoubleFromFloatMaterializer extends DoubleMaterializerBase implements PageMaterializer {

public static final PageMaterializerFactory FACTORY = new PageMaterializerFactory() {
@Override
public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) {
return new DoubleFromFloatMaterializer(dataReader, (double) nullValue, numValues);
}

@Override
public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int numValues) {
return new DoubleFromFloatMaterializer(dataReader, numValues);
}
};

private final ValuesReader dataReader;

private DoubleFromFloatMaterializer(ValuesReader dataReader, int numValues) {
this(dataReader, 0, numValues);
}

private DoubleFromFloatMaterializer(ValuesReader dataReader, double nullValue, int numValues) {
super(nullValue, numValues);
this.dataReader = dataReader;
}

@Override
public void fillValues(int startIndex, int endIndex) {
for (int ii = startIndex; ii < endIndex; ii++) {
data[ii] = dataReader.readFloat();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending
//
// ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY
// ****** Edit FloatMaterializer and run "./gradlew replicatePageMaterializers" to regenerate
// ****** Edit LongMaterializer and run "./gradlew replicatePageMaterializers" to regenerate
//
// @formatter:off
package io.deephaven.parquet.base.materializers;
Expand All @@ -11,9 +11,7 @@
import io.deephaven.parquet.base.PageMaterializerFactory;
import org.apache.parquet.column.values.ValuesReader;

import java.util.Arrays;

public class DoubleMaterializer implements PageMaterializer {
public class DoubleMaterializer extends DoubleMaterializerBase implements PageMaterializer {

public static final PageMaterializerFactory FACTORY = new PageMaterializerFactory() {
@Override
Expand All @@ -29,22 +27,13 @@ public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int num

private final ValuesReader dataReader;

private final double nullValue;
private final double[] data;

private DoubleMaterializer(ValuesReader dataReader, int numValues) {
this(dataReader, 0, numValues);
}

private DoubleMaterializer(ValuesReader dataReader, double nullValue, int numValues) {
super(nullValue, numValues);
this.dataReader = dataReader;
this.nullValue = nullValue;
this.data = new double[numValues];
}

@Override
public void fillNulls(int startIndex, int endIndex) {
Arrays.fill(data, startIndex, endIndex, nullValue);
}

@Override
Expand All @@ -53,15 +42,4 @@ public void fillValues(int startIndex, int endIndex) {
data[ii] = dataReader.readDouble();
}
}

@Override
public Object fillAll() {
fillValues(0, data.length);
return data;
}

@Override
public Object data() {
return data;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
//
// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending
//
// ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY
// ****** Edit LongMaterializerBase and run "./gradlew replicatePageMaterializers" to regenerate
//
// @formatter:off
package io.deephaven.parquet.base.materializers;

import io.deephaven.parquet.base.PageMaterializer;

import java.util.Arrays;

abstract class DoubleMaterializerBase implements PageMaterializer {

private final double nullValue;
final double[] data;

DoubleMaterializerBase(double nullValue, int numValues) {
this.nullValue = nullValue;
this.data = new double[numValues];
}

@Override
public void fillNulls(int startIndex, int endIndex) {
Arrays.fill(data, startIndex, endIndex, nullValue);
}

@Override
public Object fillAll() {
fillValues(0, data.length);
return data;
}

@Override
public Object data() {
return data;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
//
// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending
//
// ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY
// ****** Edit IntMaterializer and run "./gradlew replicatePageMaterializers" to regenerate
//
// @formatter:off
package io.deephaven.parquet.base.materializers;

import io.deephaven.parquet.base.PageMaterializer;
import io.deephaven.parquet.base.PageMaterializerFactory;
import org.apache.parquet.column.values.ValuesReader;

public class IntFromBooleanMaterializer extends IntMaterializerBase implements PageMaterializer {

public static final PageMaterializerFactory FACTORY = new PageMaterializerFactory() {
@Override
public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) {
return new IntFromBooleanMaterializer(dataReader, (int) nullValue, numValues);
}

@Override
public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int numValues) {
return new IntFromBooleanMaterializer(dataReader, numValues);
}
};

private final ValuesReader dataReader;

private IntFromBooleanMaterializer(ValuesReader dataReader, int numValues) {
this(dataReader, 0, numValues);
}

private IntFromBooleanMaterializer(ValuesReader dataReader, int nullValue, int numValues) {
super(nullValue, numValues);
this.dataReader = dataReader;
}

@Override
public void fillValues(int startIndex, int endIndex) {
for (int ii = startIndex; ii < endIndex; ii++) {
data[ii] = dataReader.readBoolean() ? 1 : 0;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
//
// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending
//
// ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY
// ****** Edit LongFromUnsignedShortMaterializer and run "./gradlew replicatePageMaterializers" to regenerate
//
// @formatter:off
package io.deephaven.parquet.base.materializers;

import io.deephaven.parquet.base.PageMaterializer;
import io.deephaven.parquet.base.PageMaterializerFactory;
import org.apache.parquet.column.values.ValuesReader;

public class IntFromUnsignedByteMaterializer extends IntMaterializerBase implements PageMaterializer {

public static final PageMaterializerFactory FACTORY = new PageMaterializerFactory() {
@Override
public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) {
return new IntFromUnsignedByteMaterializer(dataReader, (int) nullValue, numValues);
}

@Override
public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int numValues) {
return new IntFromUnsignedByteMaterializer(dataReader, numValues);
}
};

private final ValuesReader dataReader;

private IntFromUnsignedByteMaterializer(ValuesReader dataReader, int numValues) {
this(dataReader, 0, numValues);
}

private IntFromUnsignedByteMaterializer(ValuesReader dataReader, int nullValue, int numValues) {
super(nullValue, numValues);
this.dataReader = dataReader;
}

@Override
public void fillValues(int startIndex, int endIndex) {
for (int ii = startIndex; ii < endIndex; ii++) {
data[ii] = Byte.toUnsignedInt((byte) dataReader.readInteger());
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
//
// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending
//
// ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY
// ****** Edit LongFromUnsignedShortMaterializer and run "./gradlew replicatePageMaterializers" to regenerate
//
// @formatter:off
package io.deephaven.parquet.base.materializers;

import io.deephaven.parquet.base.PageMaterializer;
import io.deephaven.parquet.base.PageMaterializerFactory;
import org.apache.parquet.column.values.ValuesReader;

public class IntFromUnsignedShortMaterializer extends IntMaterializerBase implements PageMaterializer {

public static final PageMaterializerFactory FACTORY = new PageMaterializerFactory() {
@Override
public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) {
return new IntFromUnsignedShortMaterializer(dataReader, (int) nullValue, numValues);
}

@Override
public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int numValues) {
return new IntFromUnsignedShortMaterializer(dataReader, numValues);
}
};

private final ValuesReader dataReader;

private IntFromUnsignedShortMaterializer(ValuesReader dataReader, int numValues) {
this(dataReader, 0, numValues);
}

private IntFromUnsignedShortMaterializer(ValuesReader dataReader, int nullValue, int numValues) {
super(nullValue, numValues);
this.dataReader = dataReader;
}

@Override
public void fillValues(int startIndex, int endIndex) {
for (int ii = startIndex; ii < endIndex; ii++) {
data[ii] = Short.toUnsignedInt((short) dataReader.readInteger());
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending
//
// ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY
// ****** Edit FloatMaterializer and run "./gradlew replicatePageMaterializers" to regenerate
// ****** Edit LongMaterializer and run "./gradlew replicatePageMaterializers" to regenerate
//
// @formatter:off
package io.deephaven.parquet.base.materializers;
Expand All @@ -11,9 +11,7 @@
import io.deephaven.parquet.base.PageMaterializerFactory;
import org.apache.parquet.column.values.ValuesReader;

import java.util.Arrays;

public class IntMaterializer implements PageMaterializer {
public class IntMaterializer extends IntMaterializerBase implements PageMaterializer {

public static final PageMaterializerFactory FACTORY = new PageMaterializerFactory() {
@Override
Expand All @@ -29,22 +27,13 @@ public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int num

private final ValuesReader dataReader;

private final int nullValue;
private final int[] data;

private IntMaterializer(ValuesReader dataReader, int numValues) {
this(dataReader, 0, numValues);
}

private IntMaterializer(ValuesReader dataReader, int nullValue, int numValues) {
super(nullValue, numValues);
this.dataReader = dataReader;
this.nullValue = nullValue;
this.data = new int[numValues];
}

@Override
public void fillNulls(int startIndex, int endIndex) {
Arrays.fill(data, startIndex, endIndex, nullValue);
}

@Override
Expand All @@ -53,15 +42,4 @@ public void fillValues(int startIndex, int endIndex) {
data[ii] = dataReader.readInteger();
}
}

@Override
public Object fillAll() {
fillValues(0, data.length);
return data;
}

@Override
public Object data() {
return data;
}
}
Loading

0 comments on commit c74e570

Please sign in to comment.