Skip to content

Commit

Permalink
refactor ParquetRewriter slightly to address PR comments
Browse files Browse the repository at this point in the history
  • Loading branch information
maxim_konstantinov committed Sep 8, 2024
1 parent bb42979 commit 5b97a4c
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,7 @@ public ParquetRewriter(RewriteOptions options) throws IOException {
.collect(Collectors.toList()),
out);

this.outSchema = getSchema();
this.outSchema = pruneColumnsInSchema(outSchema, options.getPruneColumns());
this.outSchema = pruneColumnsInSchema(getSchema(), options.getPruneColumns());
this.extraMetaData = getExtraMetadata(options);

if (options.getMaskColumns() != null) {
Expand Down Expand Up @@ -208,7 +207,6 @@ private MessageType getSchema() {
.getFields()
.forEach(x -> {
if (!fieldNames.containsKey(x.getName())) {
LOG.info("Column {} is added to the output from inputFilesToJoin side", x.getName());
fieldNames.put(x.getName(), x);
} else if (overwriteInputWithJoinColumns) {
LOG.info("Column {} in inputFiles is overwritten by inputFilesToJoin side", x.getName());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@
*/
package org.apache.parquet.hadoop.rewrite;

import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.*;
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BINARY;
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.DOUBLE;
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.FLOAT;
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT64;
import static org.apache.parquet.schema.Type.Repetition.OPTIONAL;
import static org.apache.parquet.schema.Type.Repetition.REPEATED;
import static org.apache.parquet.schema.Type.Repetition.REQUIRED;
Expand All @@ -36,7 +39,14 @@
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import java.io.IOException;
import java.util.*;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.BiFunction;
import java.util.function.Function;
import java.util.stream.Collectors;
Expand Down

0 comments on commit 5b97a4c

Please sign in to comment.