diff --git a/sqoop/README.md b/sqoop/README.md new file mode 100644 index 0000000..5745091 --- /dev/null +++ b/sqoop/README.md @@ -0,0 +1,21 @@ +# Sqoop + +This code accompanies [this article which walks through Sqoop][blog-post]. + +## Compile + +``` +mvn compile +``` + +## Build +``` +mvn assembly:single +``` + +## Run + +``` +java -jar /path/to/assembled.jar + +[blog-post]: \ No newline at end of file diff --git a/sqoop/pom.xml b/sqoop/pom.xml new file mode 100644 index 0000000..26e244b --- /dev/null +++ b/sqoop/pom.xml @@ -0,0 +1,121 @@ + + + + + + + org.apache.maven.plugins + maven-surefire-plugin + 2.8 + + + maven-assembly-plugin + + + + com.matthewrathbone.example.DataImporter + + + + jar-with-dependencies + + + + + + + + 4.0.0 + com.matthewrathbone.example + sqoop-1 + jar + 1.0-SNAPSHOT + sqoop-1 + http://maven.apache.org + + + org.apache.sqoop + sqoop + 1.4.2 + hadoop20 + + + mysql + mysql-connector-java + 5.1.34 + + + commons-cli + commons-cli + 1.2 + + + commons-logging + commons-logging + 1.0.4 + + + commons-io + commons-io + 1.4 + + + org.apache.avro + avro + 1.5.3 + + + org.slf4j + slf4j-api + + + org.mortbay.jetty + jetty + + + org.jboss.netty + netty + + + org.apache.velocity + velocity + + + + + org.apache.avro + avro-mapred + 1.5.3 + + + org.slf4j + slf4j-api + + + org.mortbay.jetty + jetty + + + org.jboss.netty + netty + + + org.apache.velocity + velocity + + + + + org.apache.hadoop + hadoop-core + 0.20.2 + + + + + my-internal-site + https://repo1.maven.org/maven2/org/apache/sqoop/sqoop/1.4.2/ + + + diff --git a/sqoop/src/main/java/com/matthewrathbone/example/DataImporter.java b/sqoop/src/main/java/com/matthewrathbone/example/DataImporter.java new file mode 100644 index 0000000..adb2e2f --- /dev/null +++ b/sqoop/src/main/java/com/matthewrathbone/example/DataImporter.java @@ -0,0 +1,40 @@ +package com.matthewrathbone.example; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.sqoop.tool.ImportTool; +import com.cloudera.sqoop.SqoopOptions; + +public class DataImporter { + + public static void main(String[] args){ + System.out.println(export()); + } + + public static int export(){ + SqoopOptions options = new SqoopOptions(); + options.setConnectString("jdbc:mysql://localhost/test"); + + //options.setTableName("TABLE_NAME"); + //options.setWhereClause("id>10"); // this where clause works when importing whole table, ie when setTableName() is used + + options.setUsername("root"); + options.setPassword(""); + options.setNumMappers(2); + options.setSqlQuery("select id, location from m_users WHERE $CONDITIONS"); + options.setSplitByCol("id"); + + Configuration config = new Configuration(); + config.addResource(new Path("/Users/elena/apache/hadoop-0.20.2/conf/core-site.xml")); + config.addResource(new Path("/Users/elena/apache/hadoop-0.20.2/conf/hdfs-site.xml")); + + options.setConf(config); + options.setTargetDir("users_java_10"); + options.setHadoopHome("/Users/elena/apache/hadoop-0.20.2"); + options.setExplicitDelims(true); + options.setFieldsTerminatedBy('\t'); + options.setLinesTerminatedBy('\n'); + int ret = new ImportTool().run(options); + return ret; + } +} diff --git a/sqoop2/README.md b/sqoop2/README.md new file mode 100644 index 0000000..3e34b90 --- /dev/null +++ b/sqoop2/README.md @@ -0,0 +1,22 @@ +# Sqoop + +This code accompanies [this article which walks through Sqoop][blog-post]. + +## Compile + +``` +mvn compile +``` + +## Build +``` +mvn assembly:single +``` + +## Run + +``` +java -jar /path/to/assembled.jar + +[blog-post]: +``` \ No newline at end of file diff --git a/sqoop2/pom.xml b/sqoop2/pom.xml new file mode 100644 index 0000000..fc666e6 --- /dev/null +++ b/sqoop2/pom.xml @@ -0,0 +1,202 @@ + + + + + + + org.apache.maven.plugins + maven-surefire-plugin + 2.8 + + + maven-assembly-plugin + + + + com.matthewrathbone.example.DataImporter + + + + jar-with-dependencies + + + + + + + + + UTF-8 + 1.6 + 1.6 + 1.4 + 2.5 + 2.4 + 10.8.2.2 + 1.0.3 + 2.0.2-alpha + 11.0.2 + 1.1 + 4.11 + 1.9.5 + 1.2.16 + 2.5 + 1.3.2 + 6 + 0.36 + ${tomcat.major.version}.${tomcat.minor.version} + 5.1.23 + 9.1-901.jdbc4 + 11.2.0.3 + 4.0 + 14.00.00.21 + 6.0 + + + 4.0.0 + com.matthewrathbone.example + sqoop-2 + jar + 1.99.3 + sqoop-2 + http://maven.apache.org + + + org.apache.sqoop + sqoop-client + ${project.version} + + + org.apache.sqoop + sqoop-shell + ${project.version} + + + org.apache.sqoop + sqoop-common + ${project.version} + + + org.apache.sqoop + sqoop-core + ${project.version} + + + org.apache.sqoop + sqoop-core + test-jar + ${project.version} + + + org.apache.sqoop + sqoop-server + war + ${project.version} + + + org.apache.sqoop + sqoop-spi + ${project.version} + + + org.apache.sqoop.repository + sqoop-repository-derby + ${project.version} + + + org.apache.sqoop + connector-sdk + ${project.version} + + + org.apache.sqoop.connector + sqoop-connector-generic-jdbc + ${project.version} + + + org.apache.sqoop.connector + sqoop-connector-generic-jdbc + test-jar + ${project.version} + + + mysql + mysql-connector-java + 5.1.34 + + + com.googlecode.json-simple + json-simple + ${json-simple.version} + + + org.apache.sqoop.submission + sqoop-submission-mapreduce + ${project.version} + hadoop200 + + + org.apache.sqoop.execution + sqoop-execution-mapreduce + ${project.version} + hadoop200 + + + commons-dbcp + commons-dbcp + ${commons-dbcp.version} + + + commons-lang + commons-lang + ${commons-lang.version} + + + javax.servlet + servlet-api + ${servlet.version} + + + junit + junit + ${junit.version} + + + log4j + log4j + ${log4j.version} + + + org.apache.derby + derby + ${derby.version} + + + org.apache.derby + derbynet + ${derby.version} + + + org.apache.derby + derbyclient + ${derby.version} + + + org.codehaus.cargo + cargo-core-container-tomcat + ${cargo.version} + + + mysql + mysql-connector-java + ${jdbc.mysql.version} + + + org.mockito + mockito-all + ${mockito.version} + test + + + diff --git a/sqoop2/src/main/java/com/matthewrathbone/example/DataImporter.java b/sqoop2/src/main/java/com/matthewrathbone/example/DataImporter.java new file mode 100644 index 0000000..b4ce91b --- /dev/null +++ b/sqoop2/src/main/java/com/matthewrathbone/example/DataImporter.java @@ -0,0 +1,83 @@ +package com.matthewrathbone.example; + +import org.apache.sqoop.client.SqoopClient; +import org.apache.sqoop.model.MConnection; +import org.apache.sqoop.model.MConnectionForms; +import org.apache.sqoop.model.MJob; +import org.apache.sqoop.model.MJobForms; +import org.apache.sqoop.model.MSubmission; +import org.apache.sqoop.validation.Status; + +public class DataImporter { + public static void main(String[] args) { + String connectionString = "jdbc:mysql://localhost/test"; + String username = "root"; + String password = ""; + String schemaName = ""; + String tableName = "m_users"; + String columns = "id,email,language,location"; + String partitionColumn = "id"; + String outputDirectory = "/user/elena/javasqoop23"; + String url = "http://localhost:12000/sqoop/"; + + SqoopClient client = new SqoopClient(url); + MConnection newCon = client.newConnection(1); + + //Get connection and framework forms. Set name for connection + MConnectionForms conForms = newCon.getConnectorPart(); + MConnectionForms frameworkForms = newCon.getFrameworkPart(); + newCon.setName("MyConnection"); + + //Set connection forms values + conForms.getStringInput("connection.connectionString").setValue(connectionString); + conForms.getStringInput("connection.jdbcDriver").setValue("com.mysql.jdbc.Driver"); + conForms.getStringInput("connection.username").setValue(username); + conForms.getStringInput("connection.password").setValue(password); + + frameworkForms.getIntegerInput("security.maxConnections").setValue(Integer.valueOf(0)); + + Status status = client.createConnection(newCon); + if(status.canProceed()) { + System.out.println("Created. New Connection ID : " +newCon.getPersistenceId()); + } else { + System.out.println("Check for status and forms error "); + } + + //Creating job object + MJob newjob = client.newJob(newCon.getPersistenceId(), org.apache.sqoop.model.MJob.Type.IMPORT); + MJobForms connectorForm = newjob.getConnectorPart(); + MJobForms frameworkForm = newjob.getFrameworkPart(); + + newjob.setName("ImportJob"); + //Database configuration + connectorForm.getStringInput("table.schemaName").setValue(schemaName); + //Input either table name or sql + connectorForm.getStringInput("table.tableName").setValue(tableName); + //connectorForm.getStringInput("table.sql").setValue("select id,name from table where ${CONDITIONS}"); + //connectorForm.getStringInput("table.columns").setValue(columns); + + connectorForm.getStringInput("table.partitionColumn").setValue(partitionColumn); + + //Set boundary value only if required + //connectorForm.getStringInput("table.boundaryQuery").setValue(""); + + //Output configurations + frameworkForm.getEnumInput("output.storageType").setValue("HDFS"); + frameworkForm.getEnumInput("output.outputFormat").setValue("TEXT_FILE"); + frameworkForm.getStringInput("output.outputDirectory").setValue(outputDirectory); + //Job resources + //frameworkForm.getIntegerInput("throttling.extractors").setValue(Integer.valueOf(1)); + //frameworkForm.getIntegerInput("throttling.loaders").setValue(Integer.valueOf(1)); + + status = client.createJob(newjob); + if(status.canProceed()) { + System.out.println("New Job ID: "+ newjob.getPersistenceId()); + } else { + System.out.println("Check for status and forms error "); + } + //Now Submit the Job + MSubmission submission = client.startSubmission(newjob.getPersistenceId()); + System.out.println("Status : " + submission.getStatus()); + + } +}