Skip to content

Commit

Permalink
Merge pull request #57 from noodlesbad/master
Browse files Browse the repository at this point in the history
refactored namings and updated to latest version lz4 and zstd libraries
  • Loading branch information
carlomedas authored Sep 5, 2022
2 parents 7ba3738 + c0c5aba commit 766f508
Show file tree
Hide file tree
Showing 205 changed files with 26,342 additions and 8,990 deletions.
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,8 @@ java/*/target
# Ignore native binaries
native/4mc
native/libhadoop-4mc.so.*
/.idea/
/java/examples/examples.iml
/java/.idea/
/java/hadoop-4mc/hadoop-4mc.iml
/java/parent.iml
9 changes: 5 additions & 4 deletions Readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ you are not needing long-term storage.
## Releases and change history
Releases with artifacts available at https://github.com/fingltd/4mc/releases - Attached artifacts contain jar with embedded native library for Windows/Linux/MacOS. You can anyway compile JNI bindings for your own platform and override embedded ones.
4mc CLI tool for all platforms is now available at https://github.com/fingltd/4mc/tree/master/tool
* **4mc 3.0.0** - Updated native libaries: LZ4 1.9.4 and ZSTD 1.5.2, package rename
* **4mc 2.2.0** - Updated native libaries: LZ4 1.9.2 and ZSTD 1.4.4
* **4mc 2.1.0** - Compatibility with newer Hadoop (2.7.x) and Spark (2.4.3)
* **4mc 2.0.0** - 4mz to support ZSTD (zstandard https://github.com/facebook/zstd)
Expand Down Expand Up @@ -80,10 +81,10 @@ Enabling codecs has no difference from usual, i.e. by adding them to configurati
org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.BZip2Codec,
com.hadoop.compression.lzo.LzoCodec,com.hadoop.compression.lzo.LzopCodec,
<!-- 4mc codecs -->
com.hadoop.compression.fourmc.Lz4Codec,com.hadoop.compression.fourmc.Lz4MediumCodec,com.hadoop.compression.fourmc.Lz4HighCodec,com.hadoop.compression.fourmc.Lz4UltraCodec,
com.hadoop.compression.fourmc.FourMcCodec,com.hadoop.compression.fourmc.FourMcMediumCodec,com.hadoop.compression.fourmc.FourMcHighCodec,com.hadoop.compression.fourmc.FourMcUltraCodec,
com.fing.compression.fourmc.Lz4Codec,com.fing.compression.fourmc.Lz4MediumCodec,com.fing.compression.fourmc.Lz4HighCodec,com.fing.compression.fourmc.Lz4UltraCodec,
com.fing.compression.fourmc.FourMcCodec,com.fing.compression.fourmc.FourMcMediumCodec,com.fing.compression.fourmc.FourMcHighCodec,com.fing.compression.fourmc.FourMcUltraCodec,
<!-- 4mz codecs -->
com.hadoop.compression.fourmc.FourMzCodec,com.hadoop.compression.fourmc.FourMzMediumCodec,com.hadoop.compression.fourmc.FourMzHighCodec,com.hadoop.compression.fourmc.FourMzUltraCodec
com.fing.compression.fourmc.FourMzCodec,com.fing.compression.fourmc.FourMzMediumCodec,com.fing.compression.fourmc.FourMzHighCodec,com.fing.compression.fourmc.FourMzUltraCodec
</value>
</property>
```
Expand Down Expand Up @@ -116,7 +117,7 @@ filepath = 'gs://data/foo.4mc'
# This will read the file and partition it as it loads
data = sc.newAPIHadoopFile(
filepath
, 'com.hadoop.mapreduce.FourMcTextInputFormat'
, 'com.fing.mapreduce.FourMcTextInputFormat'
, 'org.apache.hadoop.io.LongWritable'
, 'org.apache.hadoop.io.Text'
)
Expand Down
6 changes: 3 additions & 3 deletions java/examples/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">

<parent>
<groupId>com.hadoop.fourmc</groupId>
<groupId>com.fing.fourmc</groupId>
<artifactId>parent</artifactId>
<version>1.0</version>
</parent>
Expand All @@ -21,9 +21,9 @@
<!-- 4mc -->

<dependency>
<groupId>com.hadoop.fourmc</groupId>
<groupId>com.fing.fourmc</groupId>
<artifactId>hadoop-4mc</artifactId>
<version>2.0.0</version>
<version>3.0.0</version>
</dependency>

<!-- hadoop 2.6 / Elephant-Bird / spark 2.0.0 / protobuf deps -->
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package com.hadoop.fourmc.elephantbird.adapter;
package com.fing.fourmc.elephantbird.adapter;

import com.google.protobuf.Message;
import com.hadoop.mapreduce.FourMcInputFormat;
import com.fing.mapreduce.FourMcInputFormat;
import com.twitter.elephantbird.mapreduce.input.LzoProtobufBlockRecordReader;
import com.twitter.elephantbird.mapreduce.io.BinaryWritable;
import com.twitter.elephantbird.util.HadoopCompat;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package com.hadoop.fourmc.elephantbird.adapter;
package com.fing.fourmc.elephantbird.adapter;

import com.google.protobuf.Message;
import com.hadoop.compression.fourmc.Lz4Codec;
import com.fing.compression.fourmc.Lz4Codec;
import com.twitter.elephantbird.mapreduce.io.ProtobufBlockWriter;
import com.twitter.elephantbird.mapreduce.io.ProtobufWritable;
import com.twitter.elephantbird.mapreduce.output.LzoBinaryBlockRecordWriter;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package com.hadoop.fourmc.elephantbird.adapter;
package com.fing.fourmc.elephantbird.adapter;

import com.google.protobuf.Message;
import com.hadoop.mapreduce.FourMzInputFormat;
import com.fing.mapreduce.FourMzInputFormat;
import com.twitter.elephantbird.mapreduce.input.LzoProtobufBlockRecordReader;
import com.twitter.elephantbird.mapreduce.io.BinaryWritable;
import com.twitter.elephantbird.util.HadoopUtils;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package com.hadoop.fourmc.elephantbird.adapter;
package com.fing.fourmc.elephantbird.adapter;

import com.google.protobuf.Message;
import com.hadoop.compression.fourmc.ZstdCodec;
import com.fing.compression.fourmc.ZstdCodec;
import com.twitter.elephantbird.mapreduce.io.ProtobufBlockWriter;
import com.twitter.elephantbird.mapreduce.io.ProtobufWritable;
import com.twitter.elephantbird.mapreduce.output.LzoBinaryBlockRecordWriter;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
package com.hadoop.fourmc.mapreduce.protobuf.lz4;

import com.hadoop.compression.fourmc.FourMcCodec;
import com.hadoop.compression.fourmc.FourMcHighCodec;
import com.hadoop.compression.fourmc.FourMcMediumCodec;
import com.hadoop.compression.fourmc.FourMcUltraCodec;
import com.hadoop.fourmc.elephantbird.adapter.FourMcEbProtoInputFormat;
import com.hadoop.fourmc.elephantbird.adapter.FourMcEbProtoOutputFormat;
import com.hadoop.fourmc.protobuf.USER;
package com.fing.fourmc.mapreduce.protobuf.lz4;

import com.fing.compression.fourmc.FourMcCodec;
import com.fing.compression.fourmc.FourMcHighCodec;
import com.fing.compression.fourmc.FourMcMediumCodec;
import com.fing.compression.fourmc.FourMcUltraCodec;
import com.fing.fourmc.elephantbird.adapter.FourMcEbProtoInputFormat;
import com.fing.fourmc.elephantbird.adapter.FourMcEbProtoOutputFormat;
import com.fing.fourmc.protobuf.USER;
import com.twitter.elephantbird.mapreduce.io.ProtobufWritable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
package com.hadoop.fourmc.mapreduce.protobuf.zstd;

import com.hadoop.compression.fourmc.FourMzCodec;
import com.hadoop.compression.fourmc.FourMzHighCodec;
import com.hadoop.compression.fourmc.FourMzMediumCodec;
import com.hadoop.compression.fourmc.FourMzUltraCodec;
import com.hadoop.fourmc.elephantbird.adapter.FourMzEbProtoInputFormat;
import com.hadoop.fourmc.elephantbird.adapter.FourMzEbProtoOutputFormat;
import com.hadoop.fourmc.protobuf.USER;
package com.fing.fourmc.mapreduce.protobuf.zstd;

import com.fing.compression.fourmc.FourMzCodec;
import com.fing.compression.fourmc.FourMzHighCodec;
import com.fing.compression.fourmc.FourMzMediumCodec;
import com.fing.compression.fourmc.FourMzUltraCodec;
import com.fing.fourmc.elephantbird.adapter.FourMzEbProtoInputFormat;
import com.fing.fourmc.elephantbird.adapter.FourMzEbProtoOutputFormat;
import com.fing.fourmc.protobuf.USER;
import com.twitter.elephantbird.mapreduce.io.ProtobufWritable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
package com.hadoop.fourmc.mapreduce.text.lz4;
package com.fing.fourmc.mapreduce.text.lz4;

import com.hadoop.compression.fourmc.FourMcCodec;
import com.hadoop.compression.fourmc.FourMcHighCodec;
import com.hadoop.compression.fourmc.FourMcMediumCodec;
import com.hadoop.compression.fourmc.FourMcUltraCodec;
import com.hadoop.mapreduce.FourMcTextInputFormat;
import com.fing.compression.fourmc.FourMcCodec;
import com.fing.compression.fourmc.FourMcHighCodec;
import com.fing.compression.fourmc.FourMcMediumCodec;
import com.fing.compression.fourmc.FourMcUltraCodec;
import com.fing.mapreduce.FourMcTextInputFormat;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
package com.hadoop.fourmc.mapreduce.text.zstd;
package com.fing.fourmc.mapreduce.text.zstd;

import com.hadoop.compression.fourmc.FourMzCodec;
import com.hadoop.compression.fourmc.FourMzHighCodec;
import com.hadoop.compression.fourmc.FourMzMediumCodec;
import com.hadoop.compression.fourmc.FourMzUltraCodec;
import com.hadoop.mapreduce.FourMzTextInputFormat;
import com.fing.compression.fourmc.FourMzCodec;
import com.fing.compression.fourmc.FourMzHighCodec;
import com.fing.compression.fourmc.FourMzMediumCodec;
import com.fing.compression.fourmc.FourMzUltraCodec;
import com.fing.mapreduce.FourMzTextInputFormat;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
Expand Down Expand Up @@ -46,7 +46,7 @@ public int runTest(String[] args, Configuration conf) throws Exception {
job.setJobName("4mz.TestTextInput");

job.setJarByClass(getClass());
job.setMapperClass(com.hadoop.fourmc.mapreduce.text.zstd.TestTextInput.TestMapper.class);
job.setMapperClass(com.fing.fourmc.mapreduce.text.zstd.TestTextInput.TestMapper.class);
job.setNumReduceTasks(0);

job.setInputFormatClass(FourMzTextInputFormat.class);
Expand Down Expand Up @@ -86,7 +86,7 @@ public int runTest(String[] args, Configuration conf) throws Exception {
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
//args = new GenericOptionsParser(conf, args).getRemainingArgs();
com.hadoop.fourmc.mapreduce.text.zstd.TestTextInput runner = new com.hadoop.fourmc.mapreduce.text.zstd.TestTextInput();
com.fing.fourmc.mapreduce.text.zstd.TestTextInput runner = new com.fing.fourmc.mapreduce.text.zstd.TestTextInput();

if (args.length < 3) {
System.out.println(
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
package com.hadoop.fourmc.spark.protobuf;
package com.fing.fourmc.spark.protobuf;

import com.hadoop.compression.fourmc.FourMzHighCodec;
import com.hadoop.fourmc.elephantbird.adapter.FourMzEbProtoInputFormat;
import com.hadoop.fourmc.elephantbird.adapter.FourMzEbProtoOutputFormat;
import com.hadoop.fourmc.protobuf.USER;
import com.fing.compression.fourmc.FourMzHighCodec;
import com.fing.fourmc.elephantbird.adapter.FourMzEbProtoInputFormat;
import com.fing.fourmc.elephantbird.adapter.FourMzEbProtoOutputFormat;
import com.fing.fourmc.protobuf.USER;
import com.twitter.elephantbird.mapreduce.io.ProtobufWritable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
Expand Down
4 changes: 2 additions & 2 deletions java/examples/src/main/resources/user.proto
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package com.hadoop.fourmc.protobuf;
package com.fing.fourmc.protobuf;

option java_package = "com.hadoop.fourmc.protobuf";
option java_package = "com.fing.fourmc.protobuf";
option java_outer_classname = "USER";
option optimize_for = SPEED;

Expand Down
10 changes: 7 additions & 3 deletions java/hadoop-4mc/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">

<parent>
<groupId>com.hadoop.fourmc</groupId>
<groupId>com.fing.fourmc</groupId>
<artifactId>parent</artifactId>
<version>1.0</version>
</parent>
Expand All @@ -18,9 +18,9 @@
<javac.target.version>1.7</javac.target.version-->
</properties>

<groupId>com.hadoop.fourmc</groupId>
<groupId>com.fing.fourmc</groupId>
<artifactId>hadoop-4mc</artifactId>
<version>2.2.0</version>
<version>3.0.0</version>
<packaging>jar</packaging>

<name>4mc</name>
Expand Down Expand Up @@ -55,6 +55,10 @@
<id>BuzzL</id>
<name>Tommaso Latini</name>
</developer>
<developer>
<id>Scip88</id>
<name>Francesco Scipioni</name>
</developer>
</developers>


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
- LZ4 source repository : http://code.google.com/p/lz4/
**/

package com.hadoop.compression.fourmc;
package com.fing.compression.fourmc;

import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
You can contact LZ4 lib author at :
- LZ4 source repository : http://code.google.com/p/lz4/
**/
package com.hadoop.compression.fourmc;
package com.fing.compression.fourmc;

import org.apache.hadoop.io.compress.CompressionInputStream;
import org.apache.hadoop.io.compress.CompressionOutputStream;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
You can contact LZ4 lib author at :
- LZ4 source repository : http://code.google.com/p/lz4/
**/
package com.hadoop.compression.fourmc;
package com.fing.compression.fourmc;

import org.apache.hadoop.io.compress.Compressor;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
You can contact LZ4 lib author at :
- LZ4 source repository : http://code.google.com/p/lz4/
**/
package com.hadoop.compression.fourmc;
package com.fing.compression.fourmc;


import org.apache.hadoop.conf.Configuration;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
You can contact LZ4 lib author at :
- LZ4 source repository : http://code.google.com/p/lz4/
**/
package com.hadoop.compression.fourmc;
package com.fing.compression.fourmc;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
You can contact LZ4 lib author at :
- LZ4 source repository : http://code.google.com/p/lz4/
**/
package com.hadoop.compression.fourmc;
package com.fing.compression.fourmc;

import org.apache.hadoop.io.compress.Compressor;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
You can contact LZ4 lib author at :
- LZ4 source repository : http://code.google.com/p/lz4/
**/
package com.hadoop.compression.fourmc;
package com.fing.compression.fourmc;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
Expand All @@ -50,7 +50,7 @@ public class FourMcNativeCodeLoader {
* thus ignoring the embedded libraries inside jar.
*/
public static final String USE_BINARIES_ON_LIB_PATH =
"com.hadoop.compression.fourmc.use.libpath";
"com.fing.compression.fourmc.use.libpath";

private enum OS {
WINDOWS("win32", "dll"), LINUX("linux", "so"), MAC("darwin", "dylib"), SOLARIS("solaris", "so");
Expand Down
Loading

0 comments on commit 766f508

Please sign in to comment.