diff --git a/examples/kfp-pipelines/superworkflows/ray/kfp_v2/Makefile b/examples/kfp-pipelines/superworkflows/ray/kfp_v2/Makefile index e3014cdc6..db8974494 100644 --- a/examples/kfp-pipelines/superworkflows/ray/kfp_v2/Makefile +++ b/examples/kfp-pipelines/superworkflows/ray/kfp_v2/Makefile @@ -1,4 +1,4 @@ -REPOROOT=${CURDIR}/../../../.. +REPOROOT=${CURDIR}/../../../../.. WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate include $(REPOROOT)/transforms/.make.workflows diff --git a/examples/kfp-pipelines/superworkflows/ray/kfp_v2/README.md b/examples/kfp-pipelines/superworkflows/ray/kfp_v2/README.md index 73aeda89f..f68c1aaf7 100644 --- a/examples/kfp-pipelines/superworkflows/ray/kfp_v2/README.md +++ b/examples/kfp-pipelines/superworkflows/ray/kfp_v2/README.md @@ -1,8 +1,8 @@ # Chaining transforms using KFP V2 -As in [super pipelines of KFP v1](../../../doc/multi_transform_pipeline.md), we want to offer an option of running a series of transforms one after the other on the data. But, in KFP v2 we can make it easier to chain transforms using the [nested pipelines](https://www.kubeflow.org/docs/components/pipelines/user-guides/components/compose-components-into-pipelines/#pipelines-as-components) that KFP v2 offers. +As in [super pipelines of KFP v1](../../../../../kfp/doc/multi_transform_pipeline.md), we want to offer an option of running a series of transforms one after the other on the data. But, in KFP v2 we can make it easier to chain transforms using the [nested pipelines](https://www.kubeflow.org/docs/components/pipelines/user-guides/components/compose-components-into-pipelines/#pipelines-as-components) that KFP v2 offers. -One example of chaining `noop` and `document id` transforms can be found [here](superpipeline_noop_docId_v2.py). When running this pipeline it appears as hierarchical graph with two nested pipelines, one for each transform as shown in the following screenshots. +One example of chaining `noop` and `document id` transforms can be found [here](superpipeline_noop_docId_v2_wf.py). When running this pipeline it appears as hierarchical graph with two nested pipelines, one for each transform as shown in the following screenshots. `root` Layer ![nested_pipeline](nested_pipeline.png) @@ -27,6 +27,6 @@ Another useful feature of the KFP v2 is the `Json` editor for the `dict` type in cd examples/kfp/superworkflows/ray/kfp_v2/ make clean export KFPv2=1 -export PYTHONPATH=../../../../transforms +export PYTHONPATH=../../../../../transforms make workflow-build ``` diff --git a/kfp/kfp_ray_components/Dockerfile b/kfp/kfp_ray_components/Dockerfile index f7861122b..6f6bf323d 100644 --- a/kfp/kfp_ray_components/Dockerfile +++ b/kfp/kfp_ray_components/Dockerfile @@ -2,6 +2,11 @@ ARG BASE_IMAGE=docker.io/rayproject/ray:2.36.1-py312 FROM ${BASE_IMAGE} +# see https://docs.openshift.com/container-platform/4.17/openshift_images/create-images.html#use-uid_create-images +USER root +RUN chown ray:root /home/ray && chmod 775 /home/ray +USER ray + # install libraries COPY requirements.txt requirements.txt RUN pip install --no-cache-dir -r requirements.txt @@ -10,13 +15,13 @@ ARG DPK_WHEEL_FILE_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=ray:users data-processing-dist data-processing-dist +COPY --chmod=775 --chown=ray:root data-processing-dist data-processing-dist RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}[ray] -COPY --chown=ray:users shared_workflow_support_lib shared_workflow_support_lib/ +COPY --chmod=775 --chown=ray:root shared_workflow_support_lib shared_workflow_support_lib/ RUN cd shared_workflow_support_lib && pip install --no-cache-dir -e . -COPY --chown=ray:users workflow_support_lib workflow_support_lib/ +COPY --chmod=775 --chown=ray:root workflow_support_lib workflow_support_lib/ RUN cd workflow_support_lib && pip install --no-cache-dir -e . # overwriting the installation of old versions of pydantic @@ -30,9 +35,6 @@ COPY ./src /pipelines/component/src # Set environment ENV KFP_v2=$KFP_v2 -# Grant non-root users the necessary permissions to the ray directory -RUN chmod 755 /home/ray - # Put these at the end since they seem to upset the docker cache. ARG BUILD_DATE ARG GIT_COMMIT diff --git a/tools/ingest2parquet/Dockerfile b/tools/ingest2parquet/Dockerfile index 00b88eae2..a4319c105 100644 --- a/tools/ingest2parquet/Dockerfile +++ b/tools/ingest2parquet/Dockerfile @@ -2,13 +2,18 @@ ARG BASE_IMAGE=docker.io/rayproject/ray:2.24.0-py310 FROM ${BASE_IMAGE} +# see https://docs.openshift.com/container-platform/4.17/openshift_images/create-images.html#use-uid_create-images +USER root +RUN chown ray:root /home/ray && chmod 775 /home/ray +USER ray + # install pytest RUN pip install --no-cache-dir pytest ARG DPK_WHEEL_FILE_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=ray:users data-processing-dist data-processing-dist +COPY --chmod=775 --chown=ray:root data-processing-dist data-processing-dist RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}[ray] COPY requirements.txt requirements.txt @@ -16,9 +21,9 @@ RUN pip install --no-cache-dir -r requirements.txt RUN rm requirements.txt # copy source -COPY --chown=ray:users ./src . +COPY --chmod=775 --chown=ray:root ./src . # copy test -COPY --chown=ray:users test/ test/ -COPY --chown=ray:users test-data/ test-data/ +COPY --chmod=775 --chown=ray:root test/ test/ +COPY --chmod=775 --chown=ray:root test-data/ test-data/ # Set environment ENV PYTHONPATH /home/ray diff --git a/transforms/Dockerfile.ray.template b/transforms/Dockerfile.ray.template index 944d04dd8..b8e52425b 100644 --- a/transforms/Dockerfile.ray.template +++ b/transforms/Dockerfile.ray.template @@ -1,6 +1,11 @@ ARG BASE_IMAGE=docker.io/rayproject/ray:2.24.0-py310 FROM ${BASE_IMAGE} +# see https://docs.openshift.com/container-platform/4.17/openshift_images/create-images.html#use-uid_create-images +USER root +RUN chown ray:root /home/ray && chmod 775 /home/ray +USER ray + RUN pip install --upgrade --no-cache-dir pip # install pytest @@ -10,17 +15,14 @@ ARG TRANSFORM_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=ray:users data-processing-dist data-processing-dist +COPY --chmod=775 --chown=ray:root data-processing-dist data-processing-dist RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}[ray] -COPY --chown=ray:users dpk_${TRANSFORM_NAME}/ dpk_${TRANSFORM_NAME}/ -COPY --chown=ray:users requirements.txt requirements.txt +COPY --chmod=775 --chown=ray:root dpk_${TRANSFORM_NAME}/ dpk_${TRANSFORM_NAME}/ +COPY --chmod=775 --chown=ray:root requirements.txt requirements.txt RUN pip install --no-cache-dir -r requirements.txt -# Grant non-root users the necessary permissions to the ray directory -RUN chmod 755 /home/ray - # Set environment ENV PYTHONPATH /home/ray diff --git a/transforms/code/code2parquet/ray/Dockerfile b/transforms/code/code2parquet/ray/Dockerfile index 8464e22a9..1309416ea 100644 --- a/transforms/code/code2parquet/ray/Dockerfile +++ b/transforms/code/code2parquet/ray/Dockerfile @@ -2,6 +2,11 @@ ARG BASE_IMAGE=docker.io/rayproject/ray:2.24.0-py310 FROM ${BASE_IMAGE} +# see https://docs.openshift.com/container-platform/4.17/openshift_images/create-images.html#use-uid_create-images +USER root +RUN chown ray:root /home/ray && chmod 775 /home/ray +USER ray + RUN pip install --upgrade --no-cache-dir pip # install pytest @@ -11,15 +16,15 @@ ARG DPK_WHEEL_FILE_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=ray:users data-processing-dist data-processing-dist +COPY --chmod=775 --chown=ray:root data-processing-dist data-processing-dist RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}[ray] -COPY --chown=ray:users python-transform/ python-transform/ +COPY --chmod=775 --chown=ray:root python-transform/ python-transform/ RUN cd python-transform && pip install --no-cache-dir -e . # Install ray project source -COPY --chown=ray:users src/ src/ -COPY --chown=ray:users pyproject.toml pyproject.toml +COPY --chmod=775 --chown=ray:root src/ src/ +COPY --chmod=775 --chown=ray:root pyproject.toml pyproject.toml RUN pip install --no-cache-dir -e . # copy the main() entry point to the image @@ -32,9 +37,6 @@ COPY src/code2parquet_local_ray.py local/ COPY test/ test/ COPY test-data/ test-data/ -# Grant non-root users the necessary permissions to the ray directory -RUN chmod 755 /home/ray - # Set environment ENV PYTHONPATH /home/ray diff --git a/transforms/code/code_profiler/Dockerfile.ray b/transforms/code/code_profiler/Dockerfile.ray index 6b2e4695e..c308c284c 100644 --- a/transforms/code/code_profiler/Dockerfile.ray +++ b/transforms/code/code_profiler/Dockerfile.ray @@ -2,6 +2,11 @@ ARG BASE_IMAGE=docker.io/rayproject/ray:2.24.0-py310 FROM ${BASE_IMAGE} +# see https://docs.openshift.com/container-platform/4.17/openshift_images/create-images.html#use-uid_create-images +USER root +RUN chown ray:root /home/ray && chmod 775 /home/ray +USER ray + RUN pip install --upgrade --no-cache-dir pip # install pytest @@ -10,17 +15,14 @@ ARG DPK_WHEEL_FILE_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=ray:users data-processing-dist data-processing-dist +COPY --chmod=775 --chown=ray:root data-processing-dist data-processing-dist RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}[ray] ## Copy the python version of the tansform -COPY --chown=ray:users dpk_code_profiler/ dpk_code_profiler/ -COPY --chown=ray:users requirements.txt requirements.txt +COPY --chmod=775 --chown=ray:root dpk_code_profiler/ dpk_code_profiler/ +COPY --chmod=775 --chown=ray:root requirements.txt requirements.txt RUN pip install -r requirements.txt -# Grant non-root users the necessary permissions to the ray directory -RUN chmod 755 /home/ray - # Set environment ENV PYTHONPATH /home/ray diff --git a/transforms/code/code_quality/ray/Dockerfile b/transforms/code/code_quality/ray/Dockerfile index ca3aa2e39..54630e9d9 100644 --- a/transforms/code/code_quality/ray/Dockerfile +++ b/transforms/code/code_quality/ray/Dockerfile @@ -2,6 +2,11 @@ ARG BASE_IMAGE=docker.io/rayproject/ray:2.24.0-py310 FROM ${BASE_IMAGE} +# see https://docs.openshift.com/container-platform/4.17/openshift_images/create-images.html#use-uid_create-images +USER root +RUN chown ray:root /home/ray && chmod 775 /home/ray +USER ray + RUN pip install --upgrade --no-cache-dir pip # install pytest @@ -14,17 +19,17 @@ ARG DPK_WHEEL_FILE_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=ray:users data-processing-dist data-processing-dist +COPY --chmod=775 --chown=ray:root data-processing-dist data-processing-dist RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}[ray] -COPY --chown=ray:users python-transform/ python-transform/ +COPY --chmod=775 --chown=ray:root python-transform/ python-transform/ RUN cd python-transform && pip install --no-cache-dir -e . #COPY requirements.txt requirements.txt #RUN pip install --no-cache-dir -r requirements.txt -COPY --chown=ray:users src/ src/ -COPY --chown=ray:users pyproject.toml pyproject.toml +COPY --chmod=775 --chown=ray:root src/ src/ +COPY --chmod=775 --chown=ray:root pyproject.toml pyproject.toml RUN pip install --no-cache-dir -e . # copy the main() entry point to the image @@ -37,9 +42,6 @@ COPY ./src/code_quality_local_ray.py local/ COPY test/ test/ COPY test-data/ test-data/ -# Grant non-root users the necessary permissions to the ray directory -RUN chmod 755 /home/ray - # Set environment ENV PYTHONPATH /home/ray diff --git a/transforms/code/header_cleanser/ray/Dockerfile b/transforms/code/header_cleanser/ray/Dockerfile index 7b6ad6712..1b21cef43 100644 --- a/transforms/code/header_cleanser/ray/Dockerfile +++ b/transforms/code/header_cleanser/ray/Dockerfile @@ -1,5 +1,10 @@ FROM docker.io/rayproject/ray:2.24.0-py310 +# see https://docs.openshift.com/container-platform/4.17/openshift_images/create-images.html#use-uid_create-images +USER root +RUN chown ray:root /home/ray && chmod 775 /home/ray +USER ray + # install pytest RUN pip install --no-cache-dir pytest @@ -7,14 +12,14 @@ ARG DPK_WHEEL_FILE_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=ray:users data-processing-dist data-processing-dist +COPY --chmod=775 --chown=ray:root data-processing-dist data-processing-dist RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}[ray] -COPY --chown=ray:users python-transform/ python-transform +COPY --chmod=775 --chown=ray:root python-transform/ python-transform RUN cd python-transform && pip install --no-cache-dir -e . -COPY --chown=ray:users src/ src/ -COPY --chown=ray:users pyproject.toml pyproject.toml +COPY --chmod=775 --chown=ray:root src/ src/ +COPY --chmod=775 --chown=ray:root pyproject.toml pyproject.toml RUN pip install --no-cache-dir -e . # Install system dependencies, including libgomp1 @@ -32,9 +37,6 @@ COPY src/header_cleanser_local_ray.py local/ COPY test/ test/ COPY test-data/ test-data/ -# Grant non-root users the necessary permissions to the ray directory -RUN chmod 755 /home/ray - # Set environment ENV PYTHONPATH /home/ray diff --git a/transforms/code/license_select/ray/Dockerfile b/transforms/code/license_select/ray/Dockerfile index f7288459d..6c8301c85 100644 --- a/transforms/code/license_select/ray/Dockerfile +++ b/transforms/code/license_select/ray/Dockerfile @@ -2,6 +2,11 @@ ARG BASE_IMAGE=docker.io/rayproject/ray:2.24.0-py310 FROM ${BASE_IMAGE} +# see https://docs.openshift.com/container-platform/4.17/openshift_images/create-images.html#use-uid_create-images +USER root +RUN chown ray:root /home/ray && chmod 775 /home/ray +USER ray + RUN pip install --upgrade --no-cache-dir pip # install pytest @@ -10,15 +15,15 @@ ARG DPK_WHEEL_FILE_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=ray:users data-processing-dist data-processing-dist +COPY --chmod=775 --chown=ray:root data-processing-dist data-processing-dist RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}[ray] -COPY --chown=ray:users python-transform/ python-transform/ +COPY --chmod=775 --chown=ray:root python-transform/ python-transform/ RUN cd python-transform && pip install --no-cache-dir -e . -COPY --chown=ray:users src/ src/ -COPY --chown=ray:users pyproject.toml pyproject.toml -COPY --chown=ray:users README.md README.md +COPY --chmod=775 --chown=ray:root src/ src/ +COPY --chmod=775 --chown=ray:root pyproject.toml pyproject.toml +COPY --chmod=775 --chown=ray:root README.md README.md RUN pip install --no-cache-dir -e . # copy source data @@ -29,9 +34,6 @@ COPY src/license_select_local_ray.py local/ COPY test/ test/ COPY test-data/ test-data/ -# Grant non-root users the necessary permissions to the ray directory -RUN chmod 755 /home/ray - # Put these at the end since they seem to upset the docker cache. ARG BUILD_DATE ARG GIT_COMMIT diff --git a/transforms/code/malware/ray/Dockerfile b/transforms/code/malware/ray/Dockerfile index fb05ce445..24f43d053 100644 --- a/transforms/code/malware/ray/Dockerfile +++ b/transforms/code/malware/ray/Dockerfile @@ -2,6 +2,11 @@ ARG BASE_IMAGE=docker.io/rayproject/ray:2.24.0-py310 FROM ${BASE_IMAGE} AS base +# see https://docs.openshift.com/container-platform/4.17/openshift_images/create-images.html#use-uid_create-images +USER root +RUN chown ray:root /home/ray && chmod 775 /home/ray +USER ray + RUN pip install --upgrade --no-cache-dir pip RUN pip install --no-cache-dir pytest @@ -40,14 +45,14 @@ ARG DPK_WHEEL_FILE_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=ray:users data-processing-dist data-processing-dist +COPY --chmod=775 --chown=ray:root data-processing-dist data-processing-dist RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}[ray] -COPY --chown=ray:users python-transform/ python-transform/ +COPY --chmod=775 --chown=ray:root python-transform/ python-transform/ RUN cd python-transform && pip install --no-cache-dir -e . -COPY --chown=ray:users src/ src/ -COPY --chown=ray:users pyproject.toml pyproject.toml +COPY --chmod=775 --chown=ray:root src/ src/ +COPY --chmod=775 --chown=ray:root pyproject.toml pyproject.toml RUN pip install --no-cache-dir -e . # copy the main() entry point to the image @@ -59,9 +64,6 @@ COPY src/malware_local_ray.py local/ COPY test/ test/ COPY test-data/ test-data/ -# Grant non-root users the necessary permissions to the ray directory -RUN chmod 755 /home/ray - ENV PYTHONPATH /home/ray USER root diff --git a/transforms/code/proglang_select/ray/Dockerfile b/transforms/code/proglang_select/ray/Dockerfile index f1b4591f1..f13ea3444 100644 --- a/transforms/code/proglang_select/ray/Dockerfile +++ b/transforms/code/proglang_select/ray/Dockerfile @@ -2,6 +2,11 @@ ARG BASE_IMAGE=docker.io/rayproject/ray:2.24.0-py310 FROM ${BASE_IMAGE} +# see https://docs.openshift.com/container-platform/4.17/openshift_images/create-images.html#use-uid_create-images +USER root +RUN chown ray:root /home/ray && chmod 775 /home/ray +USER ray + RUN pip install --upgrade --no-cache-dir pip # install pytest @@ -10,17 +15,17 @@ ARG DPK_WHEEL_FILE_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=ray:users data-processing-dist data-processing-dist +COPY --chmod=775 --chown=ray:root data-processing-dist data-processing-dist RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}[ray] -COPY --chown=ray:users python-transform/ python-transform/ +COPY --chmod=775 --chown=ray:root python-transform/ python-transform/ RUN cd python-transform && pip install --no-cache-dir -e . #COPY requirements.txt requirements.txt #RUN pip install --no-cache-dir -r requirements.txt -COPY --chown=ray:users src/ src/ -COPY --chown=ray:users pyproject.toml pyproject.toml +COPY --chmod=775 --chown=ray:root src/ src/ +COPY --chmod=775 --chown=ray:root pyproject.toml pyproject.toml RUN pip install --no-cache-dir -e . # copy the main() entry point to the image @@ -33,9 +38,6 @@ COPY ./src/proglang_select_local_ray.py local/ COPY test/ test/ COPY test-data/ test-data/ -# Grant non-root users the necessary permissions to the ray directory -RUN chmod 755 /home/ray - # Set environment ENV PYTHONPATH /home/ray diff --git a/transforms/code/repo_level_ordering/ray/Dockerfile b/transforms/code/repo_level_ordering/ray/Dockerfile index 018e61b20..79806dd73 100644 --- a/transforms/code/repo_level_ordering/ray/Dockerfile +++ b/transforms/code/repo_level_ordering/ray/Dockerfile @@ -2,6 +2,11 @@ ARG BASE_IMAGE=docker.io/rayproject/ray:2.24.0-py310 FROM ${BASE_IMAGE} +# see https://docs.openshift.com/container-platform/4.17/openshift_images/create-images.html#use-uid_create-images +USER root +RUN chown ray:root /home/ray && chmod 775 /home/ray +USER ray + RUN pip install --upgrade --no-cache-dir pip # install pytest @@ -10,12 +15,12 @@ ARG DPK_WHEEL_FILE_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=ray:users data-processing-dist data-processing-dist +COPY --chmod=775 --chown=ray:root data-processing-dist data-processing-dist RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}[ray] -COPY --chown=ray:users src/ src/ -COPY --chown=ray:users pyproject.toml pyproject.toml -COPY --chown=ray:users README.md README.md +COPY --chmod=775 --chown=ray:root src/ src/ +COPY --chmod=775 --chown=ray:root pyproject.toml pyproject.toml +COPY --chmod=775 --chown=ray:root README.md README.md RUN pip install --no-cache-dir -e . # copy source data @@ -27,9 +32,6 @@ COPY ./src/repo_level_order_s3_ray.py local/ COPY test/ test/ COPY test-data/ test-data/ -# Grant non-root users the necessary permissions to the ray directory -RUN chmod 755 /home/ray - # Set environment ENV PYTHONPATH /home/ray:/home/ray/src diff --git a/transforms/language/doc_chunk/Dockerfile.ray b/transforms/language/doc_chunk/Dockerfile.ray index 14aa9aca0..63f2981c3 100644 --- a/transforms/language/doc_chunk/Dockerfile.ray +++ b/transforms/language/doc_chunk/Dockerfile.ray @@ -1,6 +1,11 @@ ARG BASE_IMAGE=docker.io/rayproject/ray:2.24.0-py310 FROM ${BASE_IMAGE} +# see https://docs.openshift.com/container-platform/4.17/openshift_images/create-images.html#use-uid_create-images +USER root +RUN chown ray:root /home/ray && chmod 775 /home/ray +USER ray + # install pytest RUN pip install --no-cache-dir pytest @@ -9,17 +14,14 @@ ARG DPK_WHEEL_FILE_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=ray:users data-processing-dist data-processing-dist +COPY --chmod=775 --chown=ray:root data-processing-dist data-processing-dist RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}[ray] -COPY --chown=ray:users dpk_doc_chunk/ dpk_doc_chunk/ -COPY --chown=ray:users requirements.txt requirements.txt +COPY --chmod=775 --chown=ray:root dpk_doc_chunk/ dpk_doc_chunk/ +COPY --chmod=775 --chown=ray:root requirements.txt requirements.txt RUN pip install ${PIP_INSTALL_EXTRA_ARGS} --no-cache-dir -r requirements.txt -# Grant non-root users the necessary permissions to the ray directory -RUN chmod 755 /home/ray - # Set environment ENV PYTHONPATH /home/ray diff --git a/transforms/language/doc_quality/Dockerfile.ray b/transforms/language/doc_quality/Dockerfile.ray index af3db8de8..6b4ce18a1 100644 --- a/transforms/language/doc_quality/Dockerfile.ray +++ b/transforms/language/doc_quality/Dockerfile.ray @@ -2,6 +2,11 @@ ARG BASE_IMAGE=docker.io/rayproject/ray:2.24.0-py310 FROM ${BASE_IMAGE} +# see https://docs.openshift.com/container-platform/4.17/openshift_images/create-images.html#use-uid_create-images +USER root +RUN chown ray:root /home/ray && chmod 775 /home/ray +USER ray + RUN pip install --upgrade --no-cache-dir pip # install pytest @@ -10,17 +15,14 @@ ARG DPK_WHEEL_FILE_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=ray:users data-processing-dist data-processing-dist +COPY --chmod=775 --chown=ray:root data-processing-dist data-processing-dist RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}[ray] ## Copy the python version of the tansform -COPY --chown=ray:users dpk_doc_quality/ dpk_doc_quality/ -COPY --chown=ray:users requirements.txt requirements.txt +COPY --chmod=775 --chown=ray:root dpk_doc_quality/ dpk_doc_quality/ +COPY --chmod=775 --chown=ray:root requirements.txt requirements.txt RUN pip install -r requirements.txt -# Grant non-root users the necessary permissions to the ray directory -RUN chmod 755 /home/ray - # Set environment ENV PYTHONPATH /home/ray diff --git a/transforms/language/html2parquet/Dockerfile.ray b/transforms/language/html2parquet/Dockerfile.ray index 77b6632b3..f246116f4 100644 --- a/transforms/language/html2parquet/Dockerfile.ray +++ b/transforms/language/html2parquet/Dockerfile.ray @@ -2,6 +2,11 @@ ARG BASE_IMAGE=docker.io/rayproject/ray:2.24.0-py310 FROM ${BASE_IMAGE} +# see https://docs.openshift.com/container-platform/4.17/openshift_images/create-images.html#use-uid_create-images +USER root +RUN chown ray:root /home/ray && chmod 775 /home/ray +USER ray + RUN pip install --upgrade --no-cache-dir pip # install pytest @@ -10,17 +15,14 @@ ARG DPK_WHEEL_FILE_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=ray:users data-processing-dist data-processing-dist +COPY --chmod=775 --chown=ray:root data-processing-dist data-processing-dist RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}[ray] ## Copy the python version of the tansform -COPY --chown=ray:users dpk_html2parquet/ dpk_html2parquet/ -COPY --chown=ray:users requirements.txt requirements.txt +COPY --chmod=775 --chown=ray:root dpk_html2parquet/ dpk_html2parquet/ +COPY --chmod=775 --chown=ray:root requirements.txt requirements.txt RUN pip install -r requirements.txt -# Grant non-root users the necessary permissions to the ray directory -RUN chmod 755 /home/ray - # Set environment ENV PYTHONPATH /home/ray diff --git a/transforms/language/lang_id/Dockerfile.ray b/transforms/language/lang_id/Dockerfile.ray index 20d2517f5..ce81c320f 100644 --- a/transforms/language/lang_id/Dockerfile.ray +++ b/transforms/language/lang_id/Dockerfile.ray @@ -2,6 +2,11 @@ ARG BASE_IMAGE=docker.io/rayproject/ray:2.24.0-py310 FROM ${BASE_IMAGE} +# see https://docs.openshift.com/container-platform/4.17/openshift_images/create-images.html#use-uid_create-images +USER root +RUN chown ray:root /home/ray && chmod 775 /home/ray +USER ray + RUN pip install --upgrade --no-cache-dir pip # install pytest @@ -16,12 +21,12 @@ USER ray # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=ray:users data-processing-dist data-processing-dist +COPY --chmod=775 --chown=ray:root data-processing-dist data-processing-dist RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}[ray] -COPY --chown=ray:users dpk_lang_id/ dpk_lang_id/ -COPY --chown=ray:users requirements.txt requirements.txt +COPY --chmod=775 --chown=ray:root dpk_lang_id/ dpk_lang_id/ +COPY --chmod=775 --chown=ray:root requirements.txt requirements.txt RUN pip install --no-cache-dir -r requirements.txt # clean up apt @@ -31,9 +36,6 @@ RUN sudo apt remove gcc g++ -y \ && sudo rm -rf /var/cache/apt/archives/* /var/lib/apt/lists/* USER ray -# Grant non-root users the necessary permissions to the ray directory -RUN chmod 755 /home/ray - # Set environment ENV PYTHONPATH /home/ray diff --git a/transforms/language/pdf2parquet/Dockerfile.ray b/transforms/language/pdf2parquet/Dockerfile.ray index 1ae13dbad..e295a9e7f 100644 --- a/transforms/language/pdf2parquet/Dockerfile.ray +++ b/transforms/language/pdf2parquet/Dockerfile.ray @@ -2,6 +2,11 @@ ARG BASE_IMAGE=docker.io/rayproject/ray:2.24.0-py310 FROM ${BASE_IMAGE} +# see https://docs.openshift.com/container-platform/4.17/openshift_images/create-images.html#use-uid_create-images +USER root +RUN chown ray:root /home/ray && chmod 775 /home/ray +USER ray + RUN pip install --upgrade --no-cache-dir pip # install pytest @@ -18,13 +23,13 @@ RUN \ # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=ray:users data-processing-dist data-processing-dist +COPY --chmod=775 --chown=ray:root data-processing-dist data-processing-dist RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}[ray] ## Copy the python version of the tansform -COPY --chown=ray:users dpk_pdf2parquet/ dpk_pdf2parquet/ -COPY --chown=ray:users requirements.txt requirements.txt +COPY --chmod=775 --chown=ray:root dpk_pdf2parquet/ dpk_pdf2parquet/ +COPY --chmod=775 --chown=ray:root requirements.txt requirements.txt RUN pip install ${PIP_INSTALL_EXTRA_ARGS} -r requirements.txt @@ -34,10 +39,6 @@ RUN python -c 'from deepsearch_glm.utils.load_pretrained_models import load_pret # RUN python -c 'from docling.document_converter import DocumentConverter; from pathlib import Path; DocumentConverter.download_models_hf(local_dir=Path("./artifacts/"));' RUN python -c 'from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline; s=StandardPdfPipeline.download_models_hf(); print(f"Models cached in {s}")' - -# Grant non-root users the necessary permissions to the ray directory -RUN chmod 755 /home/ray - # Set environment ENV PYTHONPATH /home/ray diff --git a/transforms/language/pii_redactor/ray/Dockerfile b/transforms/language/pii_redactor/ray/Dockerfile index 426b8c542..a8ce793cf 100644 --- a/transforms/language/pii_redactor/ray/Dockerfile +++ b/transforms/language/pii_redactor/ray/Dockerfile @@ -1,6 +1,11 @@ ARG BASE_IMAGE=docker.io/rayproject/ray:2.24.0-py310 FROM ${BASE_IMAGE} +# see https://docs.openshift.com/container-platform/4.17/openshift_images/create-images.html#use-uid_create-images +USER root +RUN chown ray:root /home/ray && chmod 775 /home/ray +USER ray + RUN pip install --upgrade --no-cache-dir pip # install pytest @@ -9,18 +14,18 @@ ARG PIP_INSTALL_EXTRA_ARGS # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=ray:users data-processing-dist data-processing-dist +COPY --chmod=775 --chown=ray:root data-processing-dist data-processing-dist RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}[ray] ## Copy the python version of the tansform -COPY --chown=ray:users python-transform/ python-transform/ +COPY --chmod=775 --chown=ray:root python-transform/ python-transform/ RUN cd python-transform && pip install --no-cache-dir -e . #COPY requirements.txt requirements.txt #RUN pip install --no-cache-dir -r requirements.txt -COPY --chown=ray:users src/ src/ -COPY --chown=ray:users pyproject.toml pyproject.toml +COPY --chmod=775 --chown=ray:root src/ src/ +COPY --chmod=775 --chown=ray:root pyproject.toml pyproject.toml RUN pip install --no-cache-dir -e . # copy the main() entry point to the image @@ -33,9 +38,6 @@ COPY ./src/pii_redactor_local_ray.py local/ COPY test/ test/ COPY test-data/ test-data/ -# Grant non-root users the necessary permissions to the ray directory -RUN chmod 755 /home/ray - # Set environment ENV PYTHONPATH /home/ray diff --git a/transforms/language/text_encoder/Dockerfile.ray b/transforms/language/text_encoder/Dockerfile.ray index e31ddea60..4b6bee791 100644 --- a/transforms/language/text_encoder/Dockerfile.ray +++ b/transforms/language/text_encoder/Dockerfile.ray @@ -1,6 +1,11 @@ ARG BASE_IMAGE=docker.io/rayproject/ray:2.24.0-py310 FROM ${BASE_IMAGE} +# see https://docs.openshift.com/container-platform/4.17/openshift_images/create-images.html#use-uid_create-images +USER root +RUN chown ray:root /home/ray && chmod 775 /home/ray +USER ray + # install pytest RUN pip install --no-cache-dir pytest @@ -8,16 +13,13 @@ ARG PIP_INSTALL_EXTRA_ARGS ARG DPK_WHEEL_FILE_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=ray:users data-processing-dist data-processing-dist +COPY --chmod=775 --chown=ray:root data-processing-dist data-processing-dist RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}[ray] -COPY --chown=ray:users dpk_text_encoder/ dpk_text_encoder/ -COPY --chown=ray:users requirements.txt requirements.txt +COPY --chmod=775 --chown=ray:root dpk_text_encoder/ dpk_text_encoder/ +COPY --chmod=775 --chown=ray:root requirements.txt requirements.txt RUN pip install ${PIP_INSTALL_EXTRA_ARGS} --no-cache-dir -r requirements.txt -# Grant non-root users the necessary permissions to the ray directory -RUN chmod 755 /home/ray - # Set environment ENV PYTHONPATH /home/ray diff --git a/transforms/universal/doc_id/Dockerfile.ray b/transforms/universal/doc_id/Dockerfile.ray index 944d04dd8..b8e52425b 100644 --- a/transforms/universal/doc_id/Dockerfile.ray +++ b/transforms/universal/doc_id/Dockerfile.ray @@ -1,6 +1,11 @@ ARG BASE_IMAGE=docker.io/rayproject/ray:2.24.0-py310 FROM ${BASE_IMAGE} +# see https://docs.openshift.com/container-platform/4.17/openshift_images/create-images.html#use-uid_create-images +USER root +RUN chown ray:root /home/ray && chmod 775 /home/ray +USER ray + RUN pip install --upgrade --no-cache-dir pip # install pytest @@ -10,17 +15,14 @@ ARG TRANSFORM_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=ray:users data-processing-dist data-processing-dist +COPY --chmod=775 --chown=ray:root data-processing-dist data-processing-dist RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}[ray] -COPY --chown=ray:users dpk_${TRANSFORM_NAME}/ dpk_${TRANSFORM_NAME}/ -COPY --chown=ray:users requirements.txt requirements.txt +COPY --chmod=775 --chown=ray:root dpk_${TRANSFORM_NAME}/ dpk_${TRANSFORM_NAME}/ +COPY --chmod=775 --chown=ray:root requirements.txt requirements.txt RUN pip install --no-cache-dir -r requirements.txt -# Grant non-root users the necessary permissions to the ray directory -RUN chmod 755 /home/ray - # Set environment ENV PYTHONPATH /home/ray diff --git a/transforms/universal/ededup/Dockerfile.ray b/transforms/universal/ededup/Dockerfile.ray index 378cb7227..2584f7979 100644 --- a/transforms/universal/ededup/Dockerfile.ray +++ b/transforms/universal/ededup/Dockerfile.ray @@ -2,6 +2,11 @@ ARG BASE_IMAGE=docker.io/rayproject/ray:2.24.0-py310 FROM ${BASE_IMAGE} +# see https://docs.openshift.com/container-platform/4.17/openshift_images/create-images.html#use-uid_create-images +USER root +RUN chown ray:root /home/ray && chmod 775 /home/ray +USER ray + RUN pip install --upgrade --no-cache-dir pip # install pytest @@ -10,19 +15,16 @@ ARG DPK_WHEEL_FILE_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=ray:users data-processing-dist data-processing-dist +COPY --chmod=775 --chown=ray:root data-processing-dist data-processing-dist RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}[ray] # Install ray project source -COPY --chown=ray:users dpk_ededup/ dpk_ededup/ -COPY --chown=ray:users requirements.txt requirements.txt -COPY --chown=ray:users README.md README.md +COPY --chmod=775 --chown=ray:root dpk_ededup/ dpk_ededup/ +COPY --chmod=775 --chown=ray:root requirements.txt requirements.txt +COPY --chmod=775 --chown=ray:root README.md README.md RUN pip install --no-cache-dir -r requirements.txt -# Grant non-root users the necessary permissions to the ray directory -RUN chmod 755 /home/ray - # Set environment ENV PYTHONPATH /home/ray diff --git a/transforms/universal/fdedup/Dockerfile.python b/transforms/universal/fdedup/Dockerfile.python index 1a53451d5..cecfef17e 100644 --- a/transforms/universal/fdedup/Dockerfile.python +++ b/transforms/universal/fdedup/Dockerfile.python @@ -17,7 +17,6 @@ COPY --chown=dpk:root dpk_fdedup/ dpk_fdedup/ COPY --chown=dpk:root requirements.txt requirements.txt RUN pip install --no-cache-dir -r requirements.txt - # Set environment ENV PYTHONPATH /home/dpk diff --git a/transforms/universal/fdedup/Dockerfile.ray b/transforms/universal/fdedup/Dockerfile.ray index 379e45bad..da1c668f1 100644 --- a/transforms/universal/fdedup/Dockerfile.ray +++ b/transforms/universal/fdedup/Dockerfile.ray @@ -2,6 +2,11 @@ ARG BASE_IMAGE=docker.io/rayproject/ray:2.36.1-py310 FROM ${BASE_IMAGE} +# see https://docs.openshift.com/container-platform/4.17/openshift_images/create-images.html#use-uid_create-images +USER root +RUN chown ray:root /home/ray && chmod 775 /home/ray +USER ray + RUN pip install --upgrade --no-cache-dir pip # install pytest @@ -10,17 +15,14 @@ ARG DPK_WHEEL_FILE_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=ray:users data-processing-dist data-processing-dist +COPY --chmod=775 --chown=ray:root data-processing-dist data-processing-dist RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}[ray] ## Copy the python version of the tansform -COPY --chown=ray:users dpk_fdedup/ dpk_fdedup/ -COPY --chown=ray:users requirements.txt requirements.txt +COPY --chmod=775 --chown=ray:root dpk_fdedup/ dpk_fdedup/ +COPY --chmod=775 --chown=ray:root requirements.txt requirements.txt RUN pip install -r requirements.txt -# Grant non-root users the necessary permissions to the ray directory -RUN chmod 755 /home/ray - # Set environment ENV PYTHONPATH /home/ray diff --git a/transforms/universal/filter/Dockerfile.ray b/transforms/universal/filter/Dockerfile.ray index 944d04dd8..b8e52425b 100644 --- a/transforms/universal/filter/Dockerfile.ray +++ b/transforms/universal/filter/Dockerfile.ray @@ -1,6 +1,11 @@ ARG BASE_IMAGE=docker.io/rayproject/ray:2.24.0-py310 FROM ${BASE_IMAGE} +# see https://docs.openshift.com/container-platform/4.17/openshift_images/create-images.html#use-uid_create-images +USER root +RUN chown ray:root /home/ray && chmod 775 /home/ray +USER ray + RUN pip install --upgrade --no-cache-dir pip # install pytest @@ -10,17 +15,14 @@ ARG TRANSFORM_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=ray:users data-processing-dist data-processing-dist +COPY --chmod=775 --chown=ray:root data-processing-dist data-processing-dist RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}[ray] -COPY --chown=ray:users dpk_${TRANSFORM_NAME}/ dpk_${TRANSFORM_NAME}/ -COPY --chown=ray:users requirements.txt requirements.txt +COPY --chmod=775 --chown=ray:root dpk_${TRANSFORM_NAME}/ dpk_${TRANSFORM_NAME}/ +COPY --chmod=775 --chown=ray:root requirements.txt requirements.txt RUN pip install --no-cache-dir -r requirements.txt -# Grant non-root users the necessary permissions to the ray directory -RUN chmod 755 /home/ray - # Set environment ENV PYTHONPATH /home/ray diff --git a/transforms/universal/hap/Dockerfile.ray b/transforms/universal/hap/Dockerfile.ray index 944d04dd8..b8e52425b 100644 --- a/transforms/universal/hap/Dockerfile.ray +++ b/transforms/universal/hap/Dockerfile.ray @@ -1,6 +1,11 @@ ARG BASE_IMAGE=docker.io/rayproject/ray:2.24.0-py310 FROM ${BASE_IMAGE} +# see https://docs.openshift.com/container-platform/4.17/openshift_images/create-images.html#use-uid_create-images +USER root +RUN chown ray:root /home/ray && chmod 775 /home/ray +USER ray + RUN pip install --upgrade --no-cache-dir pip # install pytest @@ -10,17 +15,14 @@ ARG TRANSFORM_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=ray:users data-processing-dist data-processing-dist +COPY --chmod=775 --chown=ray:root data-processing-dist data-processing-dist RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}[ray] -COPY --chown=ray:users dpk_${TRANSFORM_NAME}/ dpk_${TRANSFORM_NAME}/ -COPY --chown=ray:users requirements.txt requirements.txt +COPY --chmod=775 --chown=ray:root dpk_${TRANSFORM_NAME}/ dpk_${TRANSFORM_NAME}/ +COPY --chmod=775 --chown=ray:root requirements.txt requirements.txt RUN pip install --no-cache-dir -r requirements.txt -# Grant non-root users the necessary permissions to the ray directory -RUN chmod 755 /home/ray - # Set environment ENV PYTHONPATH /home/ray diff --git a/transforms/universal/noop/ray/Dockerfile b/transforms/universal/noop/ray/Dockerfile index 2cf8b5705..796a9559f 100644 --- a/transforms/universal/noop/ray/Dockerfile +++ b/transforms/universal/noop/ray/Dockerfile @@ -1,6 +1,11 @@ ARG BASE_IMAGE=docker.io/rayproject/ray:2.24.0-py310 FROM ${BASE_IMAGE} +# see https://docs.openshift.com/container-platform/4.17/openshift_images/create-images.html#use-uid_create-images +USER root +RUN chown ray:root /home/ray && chmod 775 /home/ray +USER ray + RUN pip install --upgrade --no-cache-dir pip # install pytest @@ -9,18 +14,18 @@ ARG DPK_WHEEL_FILE_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=ray:users data-processing-dist data-processing-dist +COPY --chmod=775 --chown=ray:root data-processing-dist data-processing-dist RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}[ray] ## Copy the python version of the tansform -COPY --chown=ray:users python-transform/ python-transform/ +COPY --chmod=775 --chown=ray:root python-transform/ python-transform/ RUN cd python-transform && pip install --no-cache-dir -e . #COPY requirements.txt requirements.txt #RUN pip install --no-cache-dir -r requirements.txt -COPY --chown=ray:users src/ src/ -COPY --chown=ray:users pyproject.toml pyproject.toml +COPY --chmod=775 --chown=ray:root src/ src/ +COPY --chmod=775 --chown=ray:root pyproject.toml pyproject.toml RUN pip install --no-cache-dir -e . # copy the main() entry point to the image @@ -33,9 +38,6 @@ COPY ./src/noop_local_ray.py local/ COPY test/ test/ COPY test-data/ test-data/ -# Grant non-root users the necessary permissions to the ray directory -RUN chmod 755 /home/ray - # Set environment ENV PYTHONPATH /home/ray diff --git a/transforms/universal/profiler/ray/Dockerfile b/transforms/universal/profiler/ray/Dockerfile index 06ce60a32..131229d1f 100644 --- a/transforms/universal/profiler/ray/Dockerfile +++ b/transforms/universal/profiler/ray/Dockerfile @@ -2,6 +2,11 @@ ARG BASE_IMAGE=docker.io/rayproject/ray:2.24.0-py310 FROM ${BASE_IMAGE} +# see https://docs.openshift.com/container-platform/4.17/openshift_images/create-images.html#use-uid_create-images +USER root +RUN chown ray:root /home/ray && chmod 775 /home/ray +USER ray + RUN pip install --upgrade --no-cache-dir pip # install pytest @@ -11,17 +16,17 @@ ARG DPK_WHEEL_FILE_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=ray:users data-processing-dist data-processing-dist +COPY --chmod=775 --chown=ray:root data-processing-dist data-processing-dist RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}[ray] ## Copy the python version of the tansform -COPY --chown=ray:users python-transform/ python-transform/ +COPY --chmod=775 --chown=ray:root python-transform/ python-transform/ RUN cd python-transform && pip install --no-cache-dir -e . # Install ray project source -COPY --chown=ray:users src/ src/ -COPY --chown=ray:users pyproject.toml pyproject.toml -COPY --chown=ray:users README.md README.md +COPY --chmod=775 --chown=ray:root src/ src/ +COPY --chmod=775 --chown=ray:root pyproject.toml pyproject.toml +COPY --chmod=775 --chown=ray:root README.md README.md RUN pip install --no-cache-dir -e . # copy the main() entry point to the image @@ -34,9 +39,6 @@ COPY src/profiler_local_ray.py local/ COPY test/ test/ COPY test-data/ test-data/ -# Grant non-root users the necessary permissions to the ray directory -RUN chmod 755 /home/ray - # Set environment ENV PYTHONPATH /home/ray diff --git a/transforms/universal/resize/ray/Dockerfile b/transforms/universal/resize/ray/Dockerfile index 577840d74..f65430e7c 100644 --- a/transforms/universal/resize/ray/Dockerfile +++ b/transforms/universal/resize/ray/Dockerfile @@ -2,6 +2,11 @@ ARG BASE_IMAGE=docker.io/rayproject/ray:2.24.0-py310 FROM ${BASE_IMAGE} +# see https://docs.openshift.com/container-platform/4.17/openshift_images/create-images.html#use-uid_create-images +USER root +RUN chown ray:root /home/ray && chmod 775 /home/ray +USER ray + RUN pip install --upgrade --no-cache-dir pip # Install pytest so we can test the image later @@ -10,15 +15,15 @@ ARG DPK_WHEEL_FILE_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=ray:users data-processing-dist data-processing-dist +COPY --chmod=775 --chown=ray:root data-processing-dist data-processing-dist RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}[ray] ## Copy the python version of the tansform -COPY --chown=ray:users python-transform/ python-transform/ +COPY --chmod=775 --chown=ray:root python-transform/ python-transform/ RUN cd python-transform && pip install --no-cache-dir -e . -COPY --chown=ray:users src/ src/ -COPY --chown=ray:users pyproject.toml pyproject.toml +COPY --chmod=775 --chown=ray:root src/ src/ +COPY --chmod=775 --chown=ray:root pyproject.toml pyproject.toml RUN pip install --no-cache-dir -e . # copy the main() entry point to the image @@ -30,9 +35,6 @@ COPY ./src/resize_local_ray.py local/ COPY test/ test/ COPY test-data/ test-data/ -# Grant non-root users the necessary permissions to the ray directory -RUN chmod 755 /home/ray - USER root RUN chown -R ray /home/ray/test RUN chown -R ray /home/ray/test-data diff --git a/transforms/universal/tokenization/Dockerfile.ray b/transforms/universal/tokenization/Dockerfile.ray index 5462e48e8..50e6ff7a3 100644 --- a/transforms/universal/tokenization/Dockerfile.ray +++ b/transforms/universal/tokenization/Dockerfile.ray @@ -2,6 +2,11 @@ ARG BASE_IMAGE=docker.io/rayproject/ray:2.24.0-py310 FROM ${BASE_IMAGE} +# see https://docs.openshift.com/container-platform/4.17/openshift_images/create-images.html#use-uid_create-images +USER root +RUN chown ray:root /home/ray && chmod 775 /home/ray +USER ray + RUN pip install --upgrade --no-cache-dir pip # Install pytest so we can test the image later @@ -10,15 +15,14 @@ ARG DPK_WHEEL_FILE_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=ray:users data-processing-dist data-processing-dist +COPY --chmod=775 --chown=ray:root data-processing-dist data-processing-dist RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}[ray] -COPY --chown=ray:users dpk_tokenization/ dpk_tokenization/ -COPY --chown=ray:users requirements.txt requirements.txt +COPY --chmod=775 --chown=ray:root dpk_tokenization/ dpk_tokenization/ +COPY --chmod=775 --chown=ray:root requirements.txt requirements.txt RUN pip install --no-cache-dir -r requirements.txt - # Set environment ENV PYTHONPATH /home/ray