From 4a93f9900b1a07960c45f42fc5404c2f7dc54c18 Mon Sep 17 00:00:00 2001 From: Aykut Bozkurt Date: Sun, 27 Oct 2024 13:52:49 +0300 Subject: [PATCH] Adds Support for COPY TO/FROM Google Cloud Storage Supports following Google Cloud Storage uri forms: - gs:// \ / \ **Configuration** The simplest way to configure object storage is by creating a json config file like [`/tmp/gcs.json`]: ```bash $ cat /tmp/gcs.json { "gcs_base_url": "http://localhost:4443", "disable_oauth": true, "client_email": "", "private_key_id": "", "private_key": "" } ``` Alternatively, you can use the following environment variables when starting postgres to configure the Google Cloud Storage client: - `GOOGLE_SERVICE_ACCOUNT_KEY`: json serialized service account key - `GOOGLE_SERVICE_ACCOUNT_PATH`: an alternative location for the config file --- .devcontainer/.env | 22 ++++++++ .devcontainer/Dockerfile | 54 ++++++++---------- .devcontainer/create-test-buckets.sh | 7 +++ .devcontainer/devcontainer.json | 18 +++--- .devcontainer/docker-compose.yml | 60 ++++++++++++++++++++ .devcontainer/scripts/setup_azurite.sh | 7 --- .devcontainer/scripts/setup_minio.sh | 9 --- .devcontainer/scripts/setup_test_envs.sh | 19 ------- .github/workflows/ci.yml | 70 ++++++++++++++---------- .gitignore | 1 - .vscode/settings.json | 3 + Cargo.lock | 1 + Cargo.toml | 2 +- README.md | 22 ++++++++ src/arrow_parquet/uri_utils.rs | 52 +++++++++++++++++- src/lib.rs | 41 +++++++++++++- 16 files changed, 274 insertions(+), 114 deletions(-) create mode 100644 .devcontainer/.env create mode 100644 .devcontainer/create-test-buckets.sh create mode 100644 .devcontainer/docker-compose.yml delete mode 100644 .devcontainer/scripts/setup_azurite.sh delete mode 100644 .devcontainer/scripts/setup_minio.sh delete mode 100644 .devcontainer/scripts/setup_test_envs.sh diff --git a/.devcontainer/.env b/.devcontainer/.env new file mode 100644 index 0000000..ebc69c1 --- /dev/null +++ b/.devcontainer/.env @@ -0,0 +1,22 @@ +# S3 tests +AWS_ACCESS_KEY_ID=minioadmin +AWS_SECRET_ACCESS_KEY=minioadmin +AWS_REGION=us-east-1 +AWS_S3_TEST_BUCKET=testbucket +MINIO_ROOT_USER=minioadmin +MINIO_ROOT_PASSWORD=minioadmin + +# Azure Blob tests +AZURE_STORAGE_ACCOUNT=devstoreaccount1 +AZURE_STORAGE_KEY="Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==" +AZURE_STORAGE_CONNECTION_STRING="DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://localhost:10000/devstoreaccount1;" +AZURE_TEST_CONTAINER_NAME=testcontainer +AZURE_TEST_READ_ONLY_SAS="se=2100-05-05&sp=r&sv=2022-11-02&sr=c&sig=YMPFnAHKe9y0o3hFegncbwQTXtAyvsJEgPB2Ne1b9CQ%3D" +AZURE_TEST_READ_WRITE_SAS="se=2100-05-05&sp=rcw&sv=2022-11-02&sr=c&sig=TPz2jEz0t9L651t6rTCQr%2BOjmJHkM76tnCGdcyttnlA%3D" + +# GCS tests +GOOGLE_TEST_BUCKET=testbucket + +# Others +RUST_TEST_THREADS=1 +PG_PARQUET_TEST=true diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 9589833..f61437d 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -12,6 +12,11 @@ RUN apt-get update && apt-get -y install build-essential libreadline-dev zlib1g- curl lsb-release ca-certificates gnupg sudo git \ nano net-tools awscli +# install azure-cli +RUN curl -sL https://packages.microsoft.com/keys/microsoft.asc | gpg --dearmor | tee /etc/apt/trusted.gpg.d/microsoft.gpg > /dev/null +RUN echo "deb [arch=`dpkg --print-architecture` signed-by=/etc/apt/trusted.gpg.d/microsoft.gpg] https://packages.microsoft.com/repos/azure-cli/ `lsb_release -cs` main" | tee /etc/apt/sources.list.d/azure-cli.list +RUN apt-get update && apt-get install -y azure-cli + # install Postgres RUN sh -c 'echo "deb https://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list' RUN wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | apt-key add - @@ -20,42 +25,20 @@ RUN apt-get update && apt-get -y install postgresql-${PG_MAJOR}-postgis-3 \ postgresql-client-${PG_MAJOR} \ libpq-dev -# install azure-cli and azurite -RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - -RUN apt-get update && apt-get install -y nodejs -RUN curl -sL https://packages.microsoft.com/keys/microsoft.asc | gpg --dearmor | tee /etc/apt/trusted.gpg.d/microsoft.gpg > /dev/null -RUN echo "deb [arch=`dpkg --print-architecture` signed-by=/etc/apt/trusted.gpg.d/microsoft.gpg] https://packages.microsoft.com/repos/azure-cli/ `lsb_release -cs` main" | tee /etc/apt/sources.list.d/azure-cli.list -RUN apt-get update && apt-get install -y azure-cli -RUN npm install -g azurite +# set up permissions so that rust user can create extensions +RUN chmod a+rwx `pg_config --pkglibdir` \ + `pg_config --sharedir`/extension \ + /var/run/postgresql/ -# download and install MinIO server and client -RUN wget https://dl.min.io/server/minio/release/linux-amd64/minio -RUN chmod +x minio -RUN mv minio /usr/local/bin/minio - -# download and install MinIO admin -RUN wget https://dl.min.io/client/mc/release/linux-amd64/mc -RUN chmod +x mc -RUN mv mc /usr/local/bin/mc - -# set up pgrx with non-sudo user +# initdb requires non-root user. This will also be the user that runs the container. ARG USERNAME=rust -ARG USER_UID=501 -ARG USER_GID=$USER_UID -RUN groupadd --gid $USER_GID $USERNAME \ - && useradd --uid $USER_UID --gid $USER_GID -s /bin/bash -m $USERNAME - -RUN mkdir /workspaces && chown -R $USER_UID:$USER_GID /workspaces +ARG USER_UID=1000 +ARG USER_GID=1000 +RUN groupadd --gid $USER_GID $USERNAME +RUN useradd --uid $USER_UID --gid $USER_GID -s /bin/bash -m $USERNAME -# set up permissions so that the user below can create extensions -RUN chmod a+rwx `pg_config --pkglibdir` \ - `pg_config --sharedir`/extension \ - /var/run/postgresql/ +RUN echo "$USERNAME ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/$USERNAME -# add it to sudoers -RUN echo "$USERNAME ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/$USERNAME - -# now it is time to switch to user USER $USERNAME # install Rust environment @@ -67,3 +50,10 @@ ARG PGRX_VERSION=0.12.6 RUN cargo install --locked cargo-pgrx@${PGRX_VERSION} RUN cargo pgrx init --pg${PG_MAJOR} $(which pg_config) RUN echo "shared_preload_libraries = 'pg_parquet'" >> $HOME/.pgrx/data-${PG_MAJOR}/postgresql.conf + +# required for pgrx to work +ENV USER=$USERNAME + +# git completion +RUN curl -o ~/.git-completion.bash https://raw.githubusercontent.com/git/git/master/contrib/completion/git-completion.bash +RUN echo "source ~/.git-completion.bash" >> ~/.bashrc diff --git a/.devcontainer/create-test-buckets.sh b/.devcontainer/create-test-buckets.sh new file mode 100644 index 0000000..4c45e61 --- /dev/null +++ b/.devcontainer/create-test-buckets.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +aws --endpoint-url http://localhost:9000 s3 mb s3://$AWS_S3_TEST_BUCKET + +az storage container create -n $AZURE_TEST_CONTAINER_NAME --connection-string $AZURE_STORAGE_CONNECTION_STRING + +curl -v -X POST --data-binary "{\"name\":\"$GOOGLE_TEST_BUCKET\"}" -H "Content-Type: application/json" "http://localhost:4443/storage/v1/b" diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index a81ca9e..e2c90a8 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,7 +1,10 @@ { - "build": { - "dockerfile": "Dockerfile" - }, + "name": "pg_parquet Dev Environment", + "dockerComposeFile": "docker-compose.yml", + "service": "app", + "workspaceFolder": "/workspace", + "postStartCommand": "bash .devcontainer/create-test-buckets.sh", + "postAttachCommand": "sudo chown -R rust /workspace", "customizations": { "vscode": { "extensions": [ @@ -14,12 +17,5 @@ "henriiik.docker-linter" ] } - }, - "postStartCommand": "bash .devcontainer/scripts/setup_minio.sh && bash .devcontainer/scripts/setup_azurite.sh", - "forwardPorts": [ - 5432 - ], - "capAdd": [ - "SYS_PTRACE" - ] + } } diff --git a/.devcontainer/docker-compose.yml b/.devcontainer/docker-compose.yml new file mode 100644 index 0000000..805c3eb --- /dev/null +++ b/.devcontainer/docker-compose.yml @@ -0,0 +1,60 @@ +services: + app: + build: + context: . + dockerfile: Dockerfile + command: sleep infinity + network_mode: host + volumes: + - ..:/workspace + - ${USERPROFILE}${HOME}/.ssh:/home/rust/.ssh:ro + - ${USERPROFILE}${HOME}/.ssh/known_hosts:/home/rust/.ssh/known_hosts:rw + - ${USERPROFILE}${HOME}/.gitconfig:/home/rust/.gitconfig:ro + - ${USERPROFILE}${HOME}/.aws:/home/rust/.aws:ro + - ${USERPROFILE}${HOME}/.azure:/home/rust/.azure:ro + env_file: + - .env + cap_add: + - SYS_PTRACE + depends_on: + - minio + - azurite + - fake-gcs-server + + minio: + image: minio/minio + env_file: + - .env + network_mode: host + command: server /data + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "http://localhost:9000"] + interval: 6s + timeout: 2s + retries: 3 + + azurite: + image: mcr.microsoft.com/azure-storage/azurite + env_file: + - .env + network_mode: host + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "http://localhost:10000"] + interval: 6s + timeout: 2s + retries: 3 + + fake-gcs-server: + image: tustvold/fake-gcs-server + env_file: + - .env + network_mode: host + command: -scheme http -public-host localhost:4443 + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "http://localhost:4443"] + interval: 6s + timeout: 2s + retries: 3 diff --git a/.devcontainer/scripts/setup_azurite.sh b/.devcontainer/scripts/setup_azurite.sh deleted file mode 100644 index cea6712..0000000 --- a/.devcontainer/scripts/setup_azurite.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash - -source setup_test_envs.sh - -nohup azurite --location /tmp/azurite-storage > /dev/null 2>&1 & - -az storage container create --name "${AZURE_TEST_CONTAINER_NAME}" --public off --connection-string "$AZURE_STORAGE_CONNECTION_STRING" diff --git a/.devcontainer/scripts/setup_minio.sh b/.devcontainer/scripts/setup_minio.sh deleted file mode 100644 index 627e9c0..0000000 --- a/.devcontainer/scripts/setup_minio.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash - -source setup_test_envs.sh - -nohup minio server /tmp/minio-storage > /dev/null 2>&1 & - -mc alias set local http://localhost:9000 $MINIO_ROOT_USER $MINIO_ROOT_PASSWORD - -aws --endpoint-url http://localhost:9000 s3 mb s3://testbucket diff --git a/.devcontainer/scripts/setup_test_envs.sh b/.devcontainer/scripts/setup_test_envs.sh deleted file mode 100644 index a47e856..0000000 --- a/.devcontainer/scripts/setup_test_envs.sh +++ /dev/null @@ -1,19 +0,0 @@ -# S3 tests -export AWS_ACCESS_KEY_ID=admin -export AWS_SECRET_ACCESS_KEY=admin123 -export AWS_REGION=us-east-1 -export AWS_S3_TEST_BUCKET=testbucket -export MINIO_ROOT_USER=admin -export MINIO_ROOT_PASSWORD=admin123 - -# Azure Blob tests -export AZURE_STORAGE_ACCOUNT=devstoreaccount1 -export AZURE_STORAGE_KEY="Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==" -export AZURE_STORAGE_CONNECTION_STRING="DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://localhost:10000/devstoreaccount1;" -export AZURE_TEST_CONTAINER_NAME=testcontainer -export AZURE_TEST_READ_ONLY_SAS="se=2100-05-05&sp=r&sv=2022-11-02&sr=c&sig=YMPFnAHKe9y0o3hFegncbwQTXtAyvsJEgPB2Ne1b9CQ%3D" -export AZURE_TEST_READ_WRITE_SAS="se=2100-05-05&sp=rcw&sv=2022-11-02&sr=c&sig=TPz2jEz0t9L651t6rTCQr%2BOjmJHkM76tnCGdcyttnlA%3D" - -# Other -export PG_PARQUET_TEST=true -export RUST_TEST_THREADS=1 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d3f2f21..0ab088e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -65,6 +65,12 @@ jobs: path: ${{ env.SCCACHE_DIR }} key: pg_parquet-sccache-cache-${{ runner.os }}-${{ hashFiles('Cargo.lock', '.github/workflows/ci.yml') }} + - name: Export environment variables from .env file + uses: falti/dotenv-action@v1 + with: + path: .devcontainer/.env + export_variables: true + - name: Install PostgreSQL run: | sudo sh -c 'echo "deb https://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list' @@ -78,26 +84,11 @@ jobs: postgresql-client-${{ env.PG_MAJOR }} \ libpq-dev - - name: Install Azurite + - name: Install azure-cli run: | - curl -fsSL https://deb.nodesource.com/setup_20.x | sudo bash - - sudo apt-get update && sudo apt-get install -y nodejs curl -sL https://packages.microsoft.com/keys/microsoft.asc | gpg --dearmor | sudo tee /etc/apt/trusted.gpg.d/microsoft.gpg > /dev/null echo "deb [arch=`dpkg --print-architecture` signed-by=/etc/apt/trusted.gpg.d/microsoft.gpg] https://packages.microsoft.com/repos/azure-cli/ `lsb_release -cs` main" | sudo tee /etc/apt/sources.list.d/azure-cli.list sudo apt-get update && sudo apt-get install -y azure-cli - npm install -g azurite - - - name: Install MinIO - run: | - # Download and install MinIO server and client - wget https://dl.min.io/server/minio/release/linux-amd64/minio - chmod +x minio - mv minio /usr/local/bin/minio - - # Download and install MinIO admin - wget https://dl.min.io/client/mc/release/linux-amd64/mc - chmod +x mc - mv mc /usr/local/bin/mc - name: Install and configure pgrx run: | @@ -112,22 +103,47 @@ jobs: cargo fmt --all -- --check cargo clippy --all-targets --features "pg${{ env.PG_MAJOR }}, pg_test" --no-default-features -- -D warnings - - name: Run tests + - name: Set up permissions for PostgreSQL run: | - # Set up permissions so that the current user below can create extensions sudo chmod a+rwx $(pg_config --pkglibdir) \ $(pg_config --sharedir)/extension \ /var/run/postgresql/ - # Set up test environments - source .devcontainer/scripts/setup_test_envs.sh + - name: Start Minio for s3 emulator tests + run: | + docker run -p 9000:9000 minio/minio server /data + + while ! nc -z localhost 9000; do + echo "Waiting for localhost:9000..." + sleep 1 + done - # Start MinIO server - bash .devcontainer/scripts/setup_minio.sh + aws --endpoint-url http://localhost:9000 s3 mb s3://$AWS_S3_TEST_BUCKET + + - name: Start Azurite for Azure Blob Storage emulator tests + run: | + docker run -d -p 10000:10000 mcr.microsoft.com/azure-storage/azurite - # Start Azurite server - bash .devcontainer/scripts/setup_azurite.sh + while ! nc -z localhost 10000; do + echo "Waiting for localhost:10000..." + sleep 1 + done + az storage container create -n $AZURE_TEST_CONTAINER_NAME --connection-string $AZURE_STORAGE_CONNECTION_STRING + + - name: Start fake-gcs-server for Google Cloud Storage emulator tests + run: | + docker run -d -p 4443:4443 tustvold/fake-gcs-server -scheme http -filesystem-root /tmp/gcs -public-host localhost:4443 + + while ! nc -z localhost 4443; do + echo "Waiting for localhost:4443..." + sleep 1 + done + + curl -v -X POST --data-binary "{\"name\":\"$GOOGLE_TEST_BUCKET\"}" -H "Content-Type: application/json" "http://localhost:4443/storage/v1/b" + + - name: Run tests + run: | # Run tests with coverage tool source <(cargo llvm-cov show-env --export-prefix) cargo llvm-cov clean @@ -135,12 +151,6 @@ jobs: cargo pgrx test pg${{ env.PG_MAJOR }} --no-default-features cargo llvm-cov report --lcov > lcov.info - # Stop MinIO server - pkill -9 minio - - # Stop Azurite server - pkill -9 node - - name: Upload coverage report to Codecov if: ${{ env.PG_MAJOR }} == 17 uses: codecov/codecov-action@v4 diff --git a/.gitignore b/.gitignore index d15537c..a5aa1d9 100644 --- a/.gitignore +++ b/.gitignore @@ -12,5 +12,4 @@ *.lcov *.xml lcov.info -.env playground.rs diff --git a/.vscode/settings.json b/.vscode/settings.json index be4b716..762d130 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -4,4 +4,7 @@ "rust-analyzer.check.command": "clippy", "rust-analyzer.checkOnSave": true, "editor.inlayHints.enabled": "offUnlessPressed", + "files.watcherExclude": { + "**/target/**": true + } } \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index e263060..fd3414c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1935,6 +1935,7 @@ dependencies = [ "rand", "reqwest", "ring", + "rustls-pemfile 2.2.0", "serde", "serde_json", "snafu", diff --git a/Cargo.toml b/Cargo.toml index 12370d3..7bbb60c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,7 +26,7 @@ aws-config = { version = "1.5", default-features = false, features = ["rustls"]} aws-credential-types = {version = "1.2", default-features = false} futures = "0.3" home = "0.5" -object_store = {version = "0.11", default-features = false, features = ["aws", "azure"]} +object_store = {version = "0.11", default-features = false, features = ["aws", "azure", "gcp"]} once_cell = "1" parquet = {version = "53", default-features = false, features = [ "arrow", diff --git a/README.md b/README.md index e74f7fc..9c8ffac 100644 --- a/README.md +++ b/README.md @@ -212,6 +212,28 @@ Supported Azure Blob Storage uri formats are shown below: - azure:// \ / \ - https:// \.blob.core.windows.net / \ / \ +#### Google Cloud Storage + +The simplest way to configure object storage is by creating a json config file like [`/tmp/gcs.json`]: + +```bash +$ cat /tmp/gcs.json +{ + "gcs_base_url": "http://localhost:4443", + "disable_oauth": true, + "client_email": "", + "private_key_id": "", + "private_key": "" +} +``` + +Alternatively, you can use the following environment variables when starting postgres to configure the Google Cloud Storage client: +- `GOOGLE_SERVICE_ACCOUNT_KEY`: json serialized service account key +- `GOOGLE_SERVICE_ACCOUNT_PATH`: an alternative location for the config file + +Supported Google Cloud Storage uri formats are shown below: +- gs:// \ / \ + ## Copy Options `pg_parquet` supports the following options in the `COPY TO` command: - `format parquet`: you need to specify this option to read or write Parquet files which does not end with `.parquet[.]` extension. (This is the only option that `COPY FROM` command supports.), diff --git a/src/arrow_parquet/uri_utils.rs b/src/arrow_parquet/uri_utils.rs index 534caa1..32a8311 100644 --- a/src/arrow_parquet/uri_utils.rs +++ b/src/arrow_parquet/uri_utils.rs @@ -15,6 +15,7 @@ use ini::Ini; use object_store::{ aws::{AmazonS3, AmazonS3Builder}, azure::{AzureConfigKey, MicrosoftAzure, MicrosoftAzureBuilder}, + gcp::{GoogleCloudStorage, GoogleCloudStorageBuilder}, local::LocalFileSystem, path::Path, ObjectStore, ObjectStoreScheme, @@ -96,6 +97,17 @@ fn parse_s3_bucket(uri: &Url) -> Option { None } +fn parse_gcs_bucket(uri: &Url) -> Option { + let host = uri.host_str()?; + + // gs://{bucket}/key + if uri.scheme() == "gs" { + return Some(host.to_string()); + } + + None +} + fn object_store_with_location(uri: &Url, copy_from: bool) -> (Arc, Path) { let (scheme, path) = ObjectStoreScheme::parse(uri).unwrap_or_else(|_| panic!("unsupported uri {}", uri)); @@ -121,6 +133,16 @@ fn object_store_with_location(uri: &Url, copy_from: bool) -> (Arc { + let bucket_name = parse_gcs_bucket(uri).unwrap_or_else(|| { + panic!("failed to parse bucket name from uri: {}", uri); + }); + + let storage_container = PG_BACKEND_TOKIO_RUNTIME + .block_on(async { Arc::new(get_gcs_object_store(&bucket_name).await) }); + + (storage_container, path) + } ObjectStoreScheme::Local => { let uri = uri_as_string(uri); @@ -262,6 +284,25 @@ async fn get_azure_object_store(container_name: &str) -> MicrosoftAzure { azure_builder.build().unwrap_or_else(|e| panic!("{}", e)) } +async fn get_gcs_object_store(bucket_name: &str) -> GoogleCloudStorage { + let mut gcs_builder = GoogleCloudStorageBuilder::from_env().with_bucket_name(bucket_name); + + if is_testing() { + // use fake-gcp-server for testing + gcs_builder = gcs_builder.with_service_account_key( + "{ + \"gcs_base_url\": \"http://localhost:4443\", + \"disable_oauth\": true, + \"client_email\": \"\", + \"private_key_id\": \"\", + \"private_key\": \"\" + }", + ); + } + + gcs_builder.build().unwrap_or_else(|e| panic!("{}", e)) +} + fn is_testing() -> bool { std::env::var("PG_PARQUET_TEST").is_ok() } @@ -284,13 +325,20 @@ pub(crate) fn parse_uri(uri: &str) -> Url { } else if scheme == ObjectStoreScheme::MicrosoftAzure { parse_azure_blob_container(&uri).unwrap_or_else(|| { panic!( - "failed to parse container name from azure blob storage uri {}", + "failed to parse container name from Azure Blob Storage uri {}", + uri + ) + }); + } else if scheme == ObjectStoreScheme::GoogleCloudStorage { + parse_gcs_bucket(&uri).unwrap_or_else(|| { + panic!( + "failed to parse bucket name from Google Cloud Storage uri {}", uri ) }); } else { panic!( - "unsupported uri {}. Only Azure and S3 uris are supported.", + "unsupported uri {}. Only Azure Blob Storage, S3 and Google Cloud Storage uris are supported.", uri ); }; diff --git a/src/lib.rs b/src/lib.rs index 3239914..8055085 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1895,10 +1895,47 @@ mod tests { } #[pg_test] - #[should_panic(expected = "unsupported uri gs://testbucket")] + fn test_gcs_from_env() { + let test_bucket_name: String = + std::env::var("GOOGLE_TEST_BUCKET").expect("GOOGLE_TEST_BUCKET not found"); + + let gcs_uri = format!("gs://{}/pg_parquet_test.parquet", test_bucket_name); + + let test_table = TestTable::::new("int4".into()).with_uri(gcs_uri); + + test_table.insert("INSERT INTO test_expected (a) VALUES (1), (2), (null);"); + test_helper(test_table); + } + + #[pg_test] + #[should_panic(expected = "404 Not Found")] + fn test_gcs_write_wrong_bucket() { + let s3_uri = "gs://randombucketwhichdoesnotexist/pg_parquet_test.parquet"; + + let copy_to_command = format!( + "COPY (SELECT i FROM generate_series(1,10) i) TO '{}';", + s3_uri + ); + Spi::run(copy_to_command.as_str()).unwrap(); + } + + #[pg_test] + #[should_panic(expected = "404 Not Found")] + fn test_gcs_read_wrong_bucket() { + let gcs_uri = "gs://randombucketwhichdoesnotexist/pg_parquet_test.parquet"; + + let create_table_command = "CREATE TABLE test_table (a int);"; + Spi::run(create_table_command).unwrap(); + + let copy_from_command = format!("COPY test_table FROM '{}';", gcs_uri); + Spi::run(copy_from_command.as_str()).unwrap(); + } + + #[pg_test] + #[should_panic(expected = "unsupported uri http://testbucket")] fn test_unsupported_uri() { let test_table = - TestTable::::new("int4".into()).with_uri("gs://testbucket".to_string()); + TestTable::::new("int4".into()).with_uri("http://testbucket".to_string()); test_table.insert("INSERT INTO test_expected (a) VALUES (1), (2), (null);"); test_helper(test_table); }