diff --git a/.make.defaults b/.make.defaults index 80df91c8e..1f0ffa7cb 100644 --- a/.make.defaults +++ b/.make.defaults @@ -226,6 +226,7 @@ __check_defined = \ --platform $(DOCKER_PLATFORM) \ --build-arg EXTRA_INDEX_URL=$(EXTRA_INDEX_URL) \ --build-arg BASE_IMAGE=$(BASE_IMAGE) \ + --build-arg TRANSFORM_NAME=$(TRANSFORM_NAME) \ --build-arg DPK_WHEEL_FILE_NAME=$(DPK_WHEEL_FILE_NAME) \ --build-arg BUILD_DATE=$(shell date -u +'%Y-%m-%dT%H:%M:%SZ') \ --build-arg GIT_COMMIT=$(shell git log -1 --format=%h) . diff --git a/transforms/Dockerfile.python.template b/transforms/Dockerfile.python.template new file mode 100644 index 000000000..9f38097b7 --- /dev/null +++ b/transforms/Dockerfile.python.template @@ -0,0 +1,33 @@ +FROM docker.io/python:3.10.14-slim-bullseye + +RUN pip install --upgrade --no-cache-dir pip + +# install pytest +RUN pip install --no-cache-dir pytest + +# Create a user and use it to run the transform +RUN useradd -ms /bin/bash dpk +USER dpk +WORKDIR /home/dpk +ARG DPK_WHEEL_FILE_NAME +ARG TRANSFORM_NAME + +# Copy and install data processing libraries +# These are expected to be placed in the docker context before this is run (see the make image). +COPY --chown=dpk:users data-processing-dist data-processing-dist +RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME} + +# END OF STEPS destined for a data-prep-kit base image + +COPY --chown=dpk:users dpk_${TRANSFORM_NAME}/ dpk_${TRANSFORM_NAME}/ +COPY --chown=dpk:users requirements.txt requirements.txt +RUN pip install --no-cache-dir -r requirements.txt + +# Set environment +ENV PYTHONPATH /home/dpk + +# Put these at the end since they seem to upset the docker cache. +ARG BUILD_DATE +ARG GIT_COMMIT +LABEL build-date=$BUILD_DATE +LABEL git-commit=$GIT_COMMIT diff --git a/transforms/Dockerfile.ray.template b/transforms/Dockerfile.ray.template new file mode 100644 index 000000000..944d04dd8 --- /dev/null +++ b/transforms/Dockerfile.ray.template @@ -0,0 +1,31 @@ +ARG BASE_IMAGE=docker.io/rayproject/ray:2.24.0-py310 +FROM ${BASE_IMAGE} + +RUN pip install --upgrade --no-cache-dir pip + +# install pytest +RUN pip install --no-cache-dir pytest +ARG DPK_WHEEL_FILE_NAME +ARG TRANSFORM_NAME + +# Copy and install data processing libraries +# These are expected to be placed in the docker context before this is run (see the make image). +COPY --chown=ray:users data-processing-dist data-processing-dist +RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}[ray] + + +COPY --chown=ray:users dpk_${TRANSFORM_NAME}/ dpk_${TRANSFORM_NAME}/ +COPY --chown=ray:users requirements.txt requirements.txt +RUN pip install --no-cache-dir -r requirements.txt + +# Grant non-root users the necessary permissions to the ray directory +RUN chmod 755 /home/ray + +# Set environment +ENV PYTHONPATH /home/ray + +# Put these at the end since they seem to upset the docker cache. +ARG BUILD_DATE +ARG GIT_COMMIT +LABEL build-date=$BUILD_DATE +LABEL git-commit=$GIT_COMMIT diff --git a/transforms/Dockerfile.spark.template b/transforms/Dockerfile.spark.template new file mode 100644 index 000000000..1af783438 --- /dev/null +++ b/transforms/Dockerfile.spark.template @@ -0,0 +1,34 @@ +FROM quay.io/dataprep1/data-prep-kit/data-prep-kit-spark-3.5.2:latest + +USER root +# install pytest +RUN pip install --no-cache-dir pytest + +WORKDIR ${SPARK_HOME}/work-dir +ARG DPK_WHEEL_FILE_NAME +ARG TRANSFORM_NAME + +# Copy and install data processing libraries +# These are expected to be placed in the docker context before this is run (see the make image). +COPY --chown=spark:users data-processing-dist data-processing-dist +RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}[spark] + + +# Install project source + +## Copy the python version of the tansform +COPY --chown=spark:users dpk_${TRANSFORM_NAME}/ dpk_${TRANSFORM_NAME}/ +COPY --chown=spark:users requirements.txt requirements.txt +RUN pip install -r requirements.txt + + +USER spark + +# Set environment +ENV PYTHONPATH=${SPARK_HOME}/work-dir/:${PYTHONPATH} + +# Put these at the end since they seem to upset the docker cache. +ARG BUILD_DATE +ARG GIT_COMMIT +LABEL build-date=$BUILD_DATE +LABEL git-commit=$GIT_COMMIT diff --git a/transforms/universal/doc_id/Dockerfile.python b/transforms/universal/doc_id/Dockerfile.python index fc634a043..9f38097b7 100644 --- a/transforms/universal/doc_id/Dockerfile.python +++ b/transforms/universal/doc_id/Dockerfile.python @@ -2,21 +2,26 @@ FROM docker.io/python:3.10.14-slim-bullseye RUN pip install --upgrade --no-cache-dir pip +# install pytest +RUN pip install --no-cache-dir pytest + # Create a user and use it to run the transform RUN useradd -ms /bin/bash dpk USER dpk WORKDIR /home/dpk ARG DPK_WHEEL_FILE_NAME +ARG TRANSFORM_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=dpk:root data-processing-dist data-processing-dist +COPY --chown=dpk:users data-processing-dist data-processing-dist RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME} -COPY --chown=dpk:root dpk_doc_id/ dpk_doc_id/ -COPY --chown=dpk:root requirements.txt requirements.txt -RUN pip install --no-cache-dir -r requirements.txt +# END OF STEPS destined for a data-prep-kit base image +COPY --chown=dpk:users dpk_${TRANSFORM_NAME}/ dpk_${TRANSFORM_NAME}/ +COPY --chown=dpk:users requirements.txt requirements.txt +RUN pip install --no-cache-dir -r requirements.txt # Set environment ENV PYTHONPATH /home/dpk diff --git a/transforms/universal/doc_id/Dockerfile.ray b/transforms/universal/doc_id/Dockerfile.ray index d8eadc23b..944d04dd8 100644 --- a/transforms/universal/doc_id/Dockerfile.ray +++ b/transforms/universal/doc_id/Dockerfile.ray @@ -1,5 +1,4 @@ ARG BASE_IMAGE=docker.io/rayproject/ray:2.24.0-py310 - FROM ${BASE_IMAGE} RUN pip install --upgrade --no-cache-dir pip @@ -7,16 +6,17 @@ RUN pip install --upgrade --no-cache-dir pip # install pytest RUN pip install --no-cache-dir pytest ARG DPK_WHEEL_FILE_NAME +ARG TRANSFORM_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). COPY --chown=ray:users data-processing-dist data-processing-dist RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}[ray] -## Copy the python version of the tansform -COPY --chown=ray:users dpk_doc_id/ dpk_doc_id/ -COPY --chown=ray:users requirements.txt requirements.txt -RUN pip install -r requirements.txt + +COPY --chown=ray:users dpk_${TRANSFORM_NAME}/ dpk_${TRANSFORM_NAME}/ +COPY --chown=ray:users requirements.txt requirements.txt +RUN pip install --no-cache-dir -r requirements.txt # Grant non-root users the necessary permissions to the ray directory RUN chmod 755 /home/ray @@ -28,4 +28,4 @@ ENV PYTHONPATH /home/ray ARG BUILD_DATE ARG GIT_COMMIT LABEL build-date=$BUILD_DATE -LABEL git-commit=$GIT_COMMIT \ No newline at end of file +LABEL git-commit=$GIT_COMMIT diff --git a/transforms/universal/doc_id/Dockerfile.spark b/transforms/universal/doc_id/Dockerfile.spark index 70c626a87..1af783438 100644 --- a/transforms/universal/doc_id/Dockerfile.spark +++ b/transforms/universal/doc_id/Dockerfile.spark @@ -6,18 +6,19 @@ RUN pip install --no-cache-dir pytest WORKDIR ${SPARK_HOME}/work-dir ARG DPK_WHEEL_FILE_NAME +ARG TRANSFORM_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=spark:root data-processing-dist data-processing-dist +COPY --chown=spark:users data-processing-dist data-processing-dist RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}[spark] # Install project source ## Copy the python version of the tansform -COPY --chown=spark:root dpk_doc_id/ dpk_doc_id/ -COPY --chown=spark:root requirements.txt requirements.txt +COPY --chown=spark:users dpk_${TRANSFORM_NAME}/ dpk_${TRANSFORM_NAME}/ +COPY --chown=spark:users requirements.txt requirements.txt RUN pip install -r requirements.txt diff --git a/transforms/universal/filter/Dockerfile.python b/transforms/universal/filter/Dockerfile.python index 68319778b..9f38097b7 100644 --- a/transforms/universal/filter/Dockerfile.python +++ b/transforms/universal/filter/Dockerfile.python @@ -2,21 +2,26 @@ FROM docker.io/python:3.10.14-slim-bullseye RUN pip install --upgrade --no-cache-dir pip +# install pytest +RUN pip install --no-cache-dir pytest + # Create a user and use it to run the transform RUN useradd -ms /bin/bash dpk USER dpk WORKDIR /home/dpk ARG DPK_WHEEL_FILE_NAME +ARG TRANSFORM_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=dpk:root data-processing-dist data-processing-dist +COPY --chown=dpk:users data-processing-dist data-processing-dist RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME} -COPY --chown=dpk:root dpk_filter/ dpk_filter/ -COPY --chown=dpk:root requirements.txt requirements.txt -RUN pip install --no-cache-dir -r requirements.txt +# END OF STEPS destined for a data-prep-kit base image +COPY --chown=dpk:users dpk_${TRANSFORM_NAME}/ dpk_${TRANSFORM_NAME}/ +COPY --chown=dpk:users requirements.txt requirements.txt +RUN pip install --no-cache-dir -r requirements.txt # Set environment ENV PYTHONPATH /home/dpk diff --git a/transforms/universal/filter/Dockerfile.ray b/transforms/universal/filter/Dockerfile.ray index fb2db3bd2..944d04dd8 100644 --- a/transforms/universal/filter/Dockerfile.ray +++ b/transforms/universal/filter/Dockerfile.ray @@ -1,5 +1,4 @@ ARG BASE_IMAGE=docker.io/rayproject/ray:2.24.0-py310 - FROM ${BASE_IMAGE} RUN pip install --upgrade --no-cache-dir pip @@ -7,16 +6,17 @@ RUN pip install --upgrade --no-cache-dir pip # install pytest RUN pip install --no-cache-dir pytest ARG DPK_WHEEL_FILE_NAME +ARG TRANSFORM_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). COPY --chown=ray:users data-processing-dist data-processing-dist RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}[ray] -## Copy the python version of the tansform -COPY --chown=ray:users dpk_filter/ dpk_filter/ -COPY --chown=ray:users requirements.txt requirements.txt -RUN pip install -r requirements.txt + +COPY --chown=ray:users dpk_${TRANSFORM_NAME}/ dpk_${TRANSFORM_NAME}/ +COPY --chown=ray:users requirements.txt requirements.txt +RUN pip install --no-cache-dir -r requirements.txt # Grant non-root users the necessary permissions to the ray directory RUN chmod 755 /home/ray @@ -28,4 +28,4 @@ ENV PYTHONPATH /home/ray ARG BUILD_DATE ARG GIT_COMMIT LABEL build-date=$BUILD_DATE -LABEL git-commit=$GIT_COMMIT \ No newline at end of file +LABEL git-commit=$GIT_COMMIT diff --git a/transforms/universal/filter/Dockerfile.spark b/transforms/universal/filter/Dockerfile.spark index aa7a9ab6d..1af783438 100644 --- a/transforms/universal/filter/Dockerfile.spark +++ b/transforms/universal/filter/Dockerfile.spark @@ -6,18 +6,19 @@ RUN pip install --no-cache-dir pytest WORKDIR ${SPARK_HOME}/work-dir ARG DPK_WHEEL_FILE_NAME +ARG TRANSFORM_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=spark:root data-processing-dist data-processing-dist +COPY --chown=spark:users data-processing-dist data-processing-dist RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}[spark] # Install project source ## Copy the python version of the tansform -COPY --chown=spark:root dpk_filter/ dpk_filter/ -COPY --chown=spark:root requirements.txt requirements.txt +COPY --chown=spark:users dpk_${TRANSFORM_NAME}/ dpk_${TRANSFORM_NAME}/ +COPY --chown=spark:users requirements.txt requirements.txt RUN pip install -r requirements.txt diff --git a/transforms/universal/hap/Dockerfile.python b/transforms/universal/hap/Dockerfile.python index e31639398..9f38097b7 100644 --- a/transforms/universal/hap/Dockerfile.python +++ b/transforms/universal/hap/Dockerfile.python @@ -10,16 +10,17 @@ RUN useradd -ms /bin/bash dpk USER dpk WORKDIR /home/dpk ARG DPK_WHEEL_FILE_NAME +ARG TRANSFORM_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=dpk:root data-processing-dist data-processing-dist +COPY --chown=dpk:users data-processing-dist data-processing-dist RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME} # END OF STEPS destined for a data-prep-kit base image -COPY --chown=dpk:root dpk_hap/ dpk_hap/ -COPY --chown=dpk:root requirements.txt requirements.txt +COPY --chown=dpk:users dpk_${TRANSFORM_NAME}/ dpk_${TRANSFORM_NAME}/ +COPY --chown=dpk:users requirements.txt requirements.txt RUN pip install --no-cache-dir -r requirements.txt # Set environment diff --git a/transforms/universal/hap/Dockerfile.ray b/transforms/universal/hap/Dockerfile.ray index c11c63c42..944d04dd8 100644 --- a/transforms/universal/hap/Dockerfile.ray +++ b/transforms/universal/hap/Dockerfile.ray @@ -6,6 +6,7 @@ RUN pip install --upgrade --no-cache-dir pip # install pytest RUN pip install --no-cache-dir pytest ARG DPK_WHEEL_FILE_NAME +ARG TRANSFORM_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). @@ -13,10 +14,7 @@ COPY --chown=ray:users data-processing-dist data-processing-dist RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}[ray] -#COPY requirements.txt requirements.txt -#RUN pip install --no-cache-dir -r requirements.txt - -COPY --chown=ray:users dpk_hap/ dpk_hap/ +COPY --chown=ray:users dpk_${TRANSFORM_NAME}/ dpk_${TRANSFORM_NAME}/ COPY --chown=ray:users requirements.txt requirements.txt RUN pip install --no-cache-dir -r requirements.txt