Skip to content

Commit

Permalink
Added TRANSFROM_NAME to docker build arg allowing simple copy paste o…
Browse files Browse the repository at this point in the history
…f docker file template

Signed-off-by: Maroun Touma <[email protected]>
  • Loading branch information
touma-I committed Jan 8, 2025
1 parent 9773a03 commit f52a875
Show file tree
Hide file tree
Showing 12 changed files with 135 additions and 25 deletions.
1 change: 1 addition & 0 deletions .make.defaults
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,7 @@ __check_defined = \
--platform $(DOCKER_PLATFORM) \
--build-arg EXTRA_INDEX_URL=$(EXTRA_INDEX_URL) \
--build-arg BASE_IMAGE=$(BASE_IMAGE) \
--build-arg TRANSFORM_NAME=$(TRANSFORM_NAME) \
--build-arg DPK_WHEEL_FILE_NAME=$(DPK_WHEEL_FILE_NAME) \
--build-arg BUILD_DATE=$(shell date -u +'%Y-%m-%dT%H:%M:%SZ') \
--build-arg GIT_COMMIT=$(shell git log -1 --format=%h) .
Expand Down
33 changes: 33 additions & 0 deletions transforms/Dockerfile.python.template
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
FROM docker.io/python:3.10.14-slim-bullseye

RUN pip install --upgrade --no-cache-dir pip

# install pytest
RUN pip install --no-cache-dir pytest

# Create a user and use it to run the transform
RUN useradd -ms /bin/bash dpk
USER dpk
WORKDIR /home/dpk
ARG DPK_WHEEL_FILE_NAME
ARG TRANSFORM_NAME

# Copy and install data processing libraries
# These are expected to be placed in the docker context before this is run (see the make image).
COPY --chown=dpk:root data-processing-dist data-processing-dist
RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}

# END OF STEPS destined for a data-prep-kit base image

COPY --chown=dpk:root dpk_${TRANSFORM_NAME}/ dpk_${TRANSFORM_NAME}/
COPY --chown=dpk:root requirements.txt requirements.txt
RUN pip install --no-cache-dir -r requirements.txt

# Set environment
ENV PYTHONPATH /home/dpk

# Put these at the end since they seem to upset the docker cache.
ARG BUILD_DATE
ARG GIT_COMMIT
LABEL build-date=$BUILD_DATE
LABEL git-commit=$GIT_COMMIT
31 changes: 31 additions & 0 deletions transforms/Dockerfile.ray.template
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
ARG BASE_IMAGE=docker.io/rayproject/ray:2.24.0-py310
FROM ${BASE_IMAGE}

RUN pip install --upgrade --no-cache-dir pip

# install pytest
RUN pip install --no-cache-dir pytest
ARG DPK_WHEEL_FILE_NAME
ARG TRANSFORM_NAME

# Copy and install data processing libraries
# These are expected to be placed in the docker context before this is run (see the make image).
COPY --chown=ray:users data-processing-dist data-processing-dist
RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}[ray]


COPY --chown=ray:users dpk_${TRANSFORM_NAME}/ dpk_${TRANSFORM_NAME}/
COPY --chown=ray:users requirements.txt requirements.txt
RUN pip install --no-cache-dir -r requirements.txt

# Grant non-root users the necessary permissions to the ray directory
RUN chmod 755 /home/ray

# Set environment
ENV PYTHONPATH /home/ray

# Put these at the end since they seem to upset the docker cache.
ARG BUILD_DATE
ARG GIT_COMMIT
LABEL build-date=$BUILD_DATE
LABEL git-commit=$GIT_COMMIT
34 changes: 34 additions & 0 deletions transforms/Dockerfile.spark.template
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
FROM quay.io/dataprep1/data-prep-kit/data-prep-kit-spark-3.5.2:latest

USER root
# install pytest
RUN pip install --no-cache-dir pytest

WORKDIR ${SPARK_HOME}/work-dir
ARG DPK_WHEEL_FILE_NAME
ARG TRANSFORM_NAME

# Copy and install data processing libraries
# These are expected to be placed in the docker context before this is run (see the make image).
COPY --chown=spark:root data-processing-dist data-processing-dist
RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}[spark]


# Install project source

## Copy the python version of the tansform
COPY --chown=spark:root dpk_${TRANSFORM_NAME}/ dpk_${TRANSFORM_NAME}/
COPY --chown=spark:root requirements.txt requirements.txt
RUN pip install -r requirements.txt


USER spark

# Set environment
ENV PYTHONPATH=${SPARK_HOME}/work-dir/:${PYTHONPATH}

# Put these at the end since they seem to upset the docker cache.
ARG BUILD_DATE
ARG GIT_COMMIT
LABEL build-date=$BUILD_DATE
LABEL git-commit=$GIT_COMMIT
11 changes: 8 additions & 3 deletions transforms/universal/doc_id/Dockerfile.python
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,26 @@ FROM docker.io/python:3.10.14-slim-bullseye

RUN pip install --upgrade --no-cache-dir pip

# install pytest
RUN pip install --no-cache-dir pytest

# Create a user and use it to run the transform
RUN useradd -ms /bin/bash dpk
USER dpk
WORKDIR /home/dpk
ARG DPK_WHEEL_FILE_NAME
ARG TRANSFORM_NAME

# Copy and install data processing libraries
# These are expected to be placed in the docker context before this is run (see the make image).
COPY --chown=dpk:root data-processing-dist data-processing-dist
RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}

COPY --chown=dpk:root dpk_doc_id/ dpk_doc_id/
COPY --chown=dpk:root requirements.txt requirements.txt
RUN pip install --no-cache-dir -r requirements.txt
# END OF STEPS destined for a data-prep-kit base image

COPY --chown=dpk:root dpk_${TRANSFORM_NAME}/ dpk_${TRANSFORM_NAME}/
COPY --chown=dpk:root requirements.txt requirements.txt
RUN pip install --no-cache-dir -r requirements.txt

# Set environment
ENV PYTHONPATH /home/dpk
Expand Down
12 changes: 6 additions & 6 deletions transforms/universal/doc_id/Dockerfile.ray
Original file line number Diff line number Diff line change
@@ -1,22 +1,22 @@
ARG BASE_IMAGE=docker.io/rayproject/ray:2.24.0-py310

FROM ${BASE_IMAGE}

RUN pip install --upgrade --no-cache-dir pip

# install pytest
RUN pip install --no-cache-dir pytest
ARG DPK_WHEEL_FILE_NAME
ARG TRANSFORM_NAME

# Copy and install data processing libraries
# These are expected to be placed in the docker context before this is run (see the make image).
COPY --chown=ray:users data-processing-dist data-processing-dist
RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}[ray]

## Copy the python version of the tansform
COPY --chown=ray:users dpk_doc_id/ dpk_doc_id/
COPY --chown=ray:users requirements.txt requirements.txt
RUN pip install -r requirements.txt

COPY --chown=ray:users dpk_${TRANSFORM_NAME}/ dpk_${TRANSFORM_NAME}/
COPY --chown=ray:users requirements.txt requirements.txt
RUN pip install --no-cache-dir -r requirements.txt

# Grant non-root users the necessary permissions to the ray directory
RUN chmod 755 /home/ray
Expand All @@ -28,4 +28,4 @@ ENV PYTHONPATH /home/ray
ARG BUILD_DATE
ARG GIT_COMMIT
LABEL build-date=$BUILD_DATE
LABEL git-commit=$GIT_COMMIT
LABEL git-commit=$GIT_COMMIT
3 changes: 2 additions & 1 deletion transforms/universal/doc_id/Dockerfile.spark
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ RUN pip install --no-cache-dir pytest

WORKDIR ${SPARK_HOME}/work-dir
ARG DPK_WHEEL_FILE_NAME
ARG TRANSFORM_NAME

# Copy and install data processing libraries
# These are expected to be placed in the docker context before this is run (see the make image).
Expand All @@ -16,7 +17,7 @@ RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}[spark]
# Install project source

## Copy the python version of the tansform
COPY --chown=spark:root dpk_doc_id/ dpk_doc_id/
COPY --chown=spark:root dpk_${TRANSFORM_NAME}/ dpk_${TRANSFORM_NAME}/
COPY --chown=spark:root requirements.txt requirements.txt
RUN pip install -r requirements.txt

Expand Down
11 changes: 8 additions & 3 deletions transforms/universal/filter/Dockerfile.python
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,26 @@ FROM docker.io/python:3.10.14-slim-bullseye

RUN pip install --upgrade --no-cache-dir pip

# install pytest
RUN pip install --no-cache-dir pytest

# Create a user and use it to run the transform
RUN useradd -ms /bin/bash dpk
USER dpk
WORKDIR /home/dpk
ARG DPK_WHEEL_FILE_NAME
ARG TRANSFORM_NAME

# Copy and install data processing libraries
# These are expected to be placed in the docker context before this is run (see the make image).
COPY --chown=dpk:root data-processing-dist data-processing-dist
RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}

COPY --chown=dpk:root dpk_filter/ dpk_filter/
COPY --chown=dpk:root requirements.txt requirements.txt
RUN pip install --no-cache-dir -r requirements.txt
# END OF STEPS destined for a data-prep-kit base image

COPY --chown=dpk:root dpk_${TRANSFORM_NAME}/ dpk_${TRANSFORM_NAME}/
COPY --chown=dpk:root requirements.txt requirements.txt
RUN pip install --no-cache-dir -r requirements.txt

# Set environment
ENV PYTHONPATH /home/dpk
Expand Down
12 changes: 6 additions & 6 deletions transforms/universal/filter/Dockerfile.ray
Original file line number Diff line number Diff line change
@@ -1,22 +1,22 @@
ARG BASE_IMAGE=docker.io/rayproject/ray:2.24.0-py310

FROM ${BASE_IMAGE}

RUN pip install --upgrade --no-cache-dir pip

# install pytest
RUN pip install --no-cache-dir pytest
ARG DPK_WHEEL_FILE_NAME
ARG TRANSFORM_NAME

# Copy and install data processing libraries
# These are expected to be placed in the docker context before this is run (see the make image).
COPY --chown=ray:users data-processing-dist data-processing-dist
RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}[ray]

## Copy the python version of the tansform
COPY --chown=ray:users dpk_filter/ dpk_filter/
COPY --chown=ray:users requirements.txt requirements.txt
RUN pip install -r requirements.txt

COPY --chown=ray:users dpk_${TRANSFORM_NAME}/ dpk_${TRANSFORM_NAME}/
COPY --chown=ray:users requirements.txt requirements.txt
RUN pip install --no-cache-dir -r requirements.txt

# Grant non-root users the necessary permissions to the ray directory
RUN chmod 755 /home/ray
Expand All @@ -28,4 +28,4 @@ ENV PYTHONPATH /home/ray
ARG BUILD_DATE
ARG GIT_COMMIT
LABEL build-date=$BUILD_DATE
LABEL git-commit=$GIT_COMMIT
LABEL git-commit=$GIT_COMMIT
3 changes: 2 additions & 1 deletion transforms/universal/filter/Dockerfile.spark
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ RUN pip install --no-cache-dir pytest

WORKDIR ${SPARK_HOME}/work-dir
ARG DPK_WHEEL_FILE_NAME
ARG TRANSFORM_NAME

# Copy and install data processing libraries
# These are expected to be placed in the docker context before this is run (see the make image).
Expand All @@ -16,7 +17,7 @@ RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}[spark]
# Install project source

## Copy the python version of the tansform
COPY --chown=spark:root dpk_filter/ dpk_filter/
COPY --chown=spark:root dpk_${TRANSFORM_NAME}/ dpk_${TRANSFORM_NAME}/
COPY --chown=spark:root requirements.txt requirements.txt
RUN pip install -r requirements.txt

Expand Down
3 changes: 2 additions & 1 deletion transforms/universal/hap/Dockerfile.python
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ RUN useradd -ms /bin/bash dpk
USER dpk
WORKDIR /home/dpk
ARG DPK_WHEEL_FILE_NAME
ARG TRANSFORM_NAME

# Copy and install data processing libraries
# These are expected to be placed in the docker context before this is run (see the make image).
Expand All @@ -18,7 +19,7 @@ RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}

# END OF STEPS destined for a data-prep-kit base image

COPY --chown=dpk:root dpk_hap/ dpk_hap/
COPY --chown=dpk:root dpk_${TRANSFORM_NAME}/ dpk_${TRANSFORM_NAME}/
COPY --chown=dpk:root requirements.txt requirements.txt
RUN pip install --no-cache-dir -r requirements.txt

Expand Down
6 changes: 2 additions & 4 deletions transforms/universal/hap/Dockerfile.ray
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,15 @@ RUN pip install --upgrade --no-cache-dir pip
# install pytest
RUN pip install --no-cache-dir pytest
ARG DPK_WHEEL_FILE_NAME
ARG TRANSFORM_NAME

# Copy and install data processing libraries
# These are expected to be placed in the docker context before this is run (see the make image).
COPY --chown=ray:users data-processing-dist data-processing-dist
RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}[ray]


#COPY requirements.txt requirements.txt
#RUN pip install --no-cache-dir -r requirements.txt

COPY --chown=ray:users dpk_hap/ dpk_hap/
COPY --chown=ray:users dpk_${TRANSFORM_NAME}/ dpk_${TRANSFORM_NAME}/
COPY --chown=ray:users requirements.txt requirements.txt
RUN pip install --no-cache-dir -r requirements.txt

Expand Down

0 comments on commit f52a875

Please sign in to comment.