From 47bcf8a96389f9011db3db1c2718bd0d279227fc Mon Sep 17 00:00:00 2001 From: Noah Prince Date: Wed, 14 Feb 2024 08:52:04 -0800 Subject: [PATCH] bump rust --- protobuf-delta-lake-sink/Delta Lake.ipynb | 65 +++++------------------ protobuf-delta-lake-sink/Dockerfile | 2 +- 2 files changed, 15 insertions(+), 52 deletions(-) diff --git a/protobuf-delta-lake-sink/Delta Lake.ipynb b/protobuf-delta-lake-sink/Delta Lake.ipynb index 8e956c7..a2f29f1 100644 --- a/protobuf-delta-lake-sink/Delta Lake.ipynb +++ b/protobuf-delta-lake-sink/Delta Lake.ipynb @@ -2,55 +2,21 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "id": "2b2839a1", "metadata": { "scrolled": true }, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - ":: loading settings :: url = jar:file:/usr/local/lib/python3.11/site-packages/pyspark/jars/ivy-2.5.1.jar!/org/apache/ivy/core/settings/ivysettings.xml\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Ivy Default Cache set to: /Users/nprince/.ivy2/cache\n", - "The jars for the packages stored in: /Users/nprince/.ivy2/jars\n", - "io.delta#delta-core_2.12 added as a dependency\n", - "org.apache.hadoop#hadoop-aws added as a dependency\n", - ":: resolving dependencies :: org.apache.spark#spark-submit-parent-4ee87e66-b861-47ca-82ac-141d30f89d04;1.0\n", - "\tconfs: [default]\n", - "\tfound io.delta#delta-core_2.12;2.4.0 in central\n", - "\tfound io.delta#delta-storage;2.4.0 in central\n", - "\tfound org.antlr#antlr4-runtime;4.9.3 in central\n", - "\tfound org.apache.hadoop#hadoop-aws;3.3.1 in central\n", - "\tfound com.amazonaws#aws-java-sdk-bundle;1.11.901 in central\n", - "\tfound org.wildfly.openssl#wildfly-openssl;1.0.7.Final in central\n", - ":: resolution report :: resolve 264ms :: artifacts dl 7ms\n", - "\t:: modules in use:\n", - "\tcom.amazonaws#aws-java-sdk-bundle;1.11.901 from central in [default]\n", - "\tio.delta#delta-core_2.12;2.4.0 from central in [default]\n", - "\tio.delta#delta-storage;2.4.0 from central in [default]\n", - "\torg.antlr#antlr4-runtime;4.9.3 from central in [default]\n", - "\torg.apache.hadoop#hadoop-aws;3.3.1 from central in [default]\n", - "\torg.wildfly.openssl#wildfly-openssl;1.0.7.Final from central in [default]\n", - "\t---------------------------------------------------------------------\n", - "\t| | modules || artifacts |\n", - "\t| conf | number| search|dwnlded|evicted|| number|dwnlded|\n", - "\t---------------------------------------------------------------------\n", - "\t| default | 6 | 0 | 0 | 0 || 6 | 0 |\n", - "\t---------------------------------------------------------------------\n", - ":: retrieving :: org.apache.spark#spark-submit-parent-4ee87e66-b861-47ca-82ac-141d30f89d04\n", - "\tconfs: [default]\n", - "\t0 artifacts copied, 6 already retrieved (0kB/5ms)\n", - "23/07/12 17:47:52 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\n", - "Setting default log level to \"WARN\".\n", - "To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n" + "ename": "ModuleNotFoundError", + "evalue": "No module named 'pyspark'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[2], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mpyspark\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mdelta\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpip_utils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m configure_spark_with_delta_pip\n\u001b[1;32m 4\u001b[0m builder \u001b[38;5;241m=\u001b[39m pyspark\u001b[38;5;241m.\u001b[39msql\u001b[38;5;241m.\u001b[39mSparkSession\u001b[38;5;241m.\u001b[39mbuilder\u001b[38;5;241m.\u001b[39mappName(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mMyApp\u001b[39m\u001b[38;5;124m\"\u001b[39m) \\\n\u001b[1;32m 5\u001b[0m \u001b[38;5;241m.\u001b[39mconfig(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mspark.sql.extensions\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mio.delta.sql.DeltaSparkSessionExtension\u001b[39m\u001b[38;5;124m\"\u001b[39m) \\\n\u001b[1;32m 6\u001b[0m \u001b[38;5;241m.\u001b[39mconfig(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mspark.sql.catalog.spark_catalog\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124morg.apache.spark.sql.delta.catalog.DeltaCatalog\u001b[39m\u001b[38;5;124m\"\u001b[39m) \\\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[38;5;241m.\u001b[39mconfig(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mspark.hadoop.fs.s3a.impl\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124morg.apache.hadoop.fs.s3a.S3AFileSystem\u001b[39m\u001b[38;5;124m\"\u001b[39m) \\\n\u001b[1;32m 13\u001b[0m \u001b[38;5;241m.\u001b[39mconfig(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mspark.hadoop.fs.s3a.connection.ssl.enabled\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfalse\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", + "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'pyspark'" ] } ], @@ -143,17 +109,14 @@ } ], "source": [ - "spark.read.format(\"delta\").load(\"s3a://delta/silver/test11\").createOrReplaceTempView(\"test\")\n", - "spark.read.format(\"delta\").load(\"s3a://delta/bronze/iot_reward_share\").createOrReplaceTempView(\"iot_reward_share\")\n" + "spark.read.format(\"delta\").load(\"s3a://data/mobile-reward-share\").createOrReplaceTempView(\"mobile_reward_share\")\n" ] }, { "cell_type": "code", "execution_count": 5, "id": "3d9caa10", - "metadata": { - "scrolled": false - }, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -177,8 +140,8 @@ ], "source": [ "spark.sql(\"\"\"\\\n", - "SELECT gateway_reward.hotspot_key\n", - "FROM iot_reward_share\n", + "SELECT *\n", + "FROM mobile_reward_share\n", "limit 5\n", "\"\"\").show(truncate)" ] @@ -208,7 +171,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.4" + "version": "3.12.2" } }, "nbformat": 4, diff --git a/protobuf-delta-lake-sink/Dockerfile b/protobuf-delta-lake-sink/Dockerfile index ab7a63f..7816e4c 100644 --- a/protobuf-delta-lake-sink/Dockerfile +++ b/protobuf-delta-lake-sink/Dockerfile @@ -1,5 +1,5 @@ # Specify the base image -FROM rust:1.70.0 as builder +FROM rust:1.75.0 as builder RUN apt-get update && apt-get install -y protobuf-compiler