Skip to content

Commit

Permalink
Merge pull request #929 from touma-I/Dockerfile-template
Browse files Browse the repository at this point in the history
Added TRANSFROM_NAME to docker build arg
  • Loading branch information
touma-I authored Jan 9, 2025
2 parents 9db837b + cef1dea commit 9eab8b8
Show file tree
Hide file tree
Showing 12 changed files with 143 additions and 33 deletions.
1 change: 1 addition & 0 deletions .make.defaults
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,7 @@ __check_defined = \
--platform $(DOCKER_PLATFORM) \
--build-arg EXTRA_INDEX_URL=$(EXTRA_INDEX_URL) \
--build-arg BASE_IMAGE=$(BASE_IMAGE) \
--build-arg TRANSFORM_NAME=$(TRANSFORM_NAME) \
--build-arg DPK_WHEEL_FILE_NAME=$(DPK_WHEEL_FILE_NAME) \
--build-arg BUILD_DATE=$(shell date -u +'%Y-%m-%dT%H:%M:%SZ') \
--build-arg GIT_COMMIT=$(shell git log -1 --format=%h) .
Expand Down
33 changes: 33 additions & 0 deletions transforms/Dockerfile.python.template
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
FROM docker.io/python:3.10.14-slim-bullseye

RUN pip install --upgrade --no-cache-dir pip

# install pytest
RUN pip install --no-cache-dir pytest

# Create a user and use it to run the transform
RUN useradd -ms /bin/bash dpk
USER dpk
WORKDIR /home/dpk
ARG DPK_WHEEL_FILE_NAME
ARG TRANSFORM_NAME

# Copy and install data processing libraries
# These are expected to be placed in the docker context before this is run (see the make image).
COPY --chown=dpk:users data-processing-dist data-processing-dist
RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}

# END OF STEPS destined for a data-prep-kit base image

COPY --chown=dpk:users dpk_${TRANSFORM_NAME}/ dpk_${TRANSFORM_NAME}/
COPY --chown=dpk:users requirements.txt requirements.txt
RUN pip install --no-cache-dir -r requirements.txt

# Set environment
ENV PYTHONPATH /home/dpk

# Put these at the end since they seem to upset the docker cache.
ARG BUILD_DATE
ARG GIT_COMMIT
LABEL build-date=$BUILD_DATE
LABEL git-commit=$GIT_COMMIT
31 changes: 31 additions & 0 deletions transforms/Dockerfile.ray.template
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
ARG BASE_IMAGE=docker.io/rayproject/ray:2.24.0-py310
FROM ${BASE_IMAGE}

RUN pip install --upgrade --no-cache-dir pip

# install pytest
RUN pip install --no-cache-dir pytest
ARG DPK_WHEEL_FILE_NAME
ARG TRANSFORM_NAME

# Copy and install data processing libraries
# These are expected to be placed in the docker context before this is run (see the make image).
COPY --chown=ray:users data-processing-dist data-processing-dist
RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}[ray]


COPY --chown=ray:users dpk_${TRANSFORM_NAME}/ dpk_${TRANSFORM_NAME}/
COPY --chown=ray:users requirements.txt requirements.txt
RUN pip install --no-cache-dir -r requirements.txt

# Grant non-root users the necessary permissions to the ray directory
RUN chmod 755 /home/ray

# Set environment
ENV PYTHONPATH /home/ray

# Put these at the end since they seem to upset the docker cache.
ARG BUILD_DATE
ARG GIT_COMMIT
LABEL build-date=$BUILD_DATE
LABEL git-commit=$GIT_COMMIT
34 changes: 34 additions & 0 deletions transforms/Dockerfile.spark.template
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
FROM quay.io/dataprep1/data-prep-kit/data-prep-kit-spark-3.5.2:latest

USER root
# install pytest
RUN pip install --no-cache-dir pytest

WORKDIR ${SPARK_HOME}/work-dir
ARG DPK_WHEEL_FILE_NAME
ARG TRANSFORM_NAME

# Copy and install data processing libraries
# These are expected to be placed in the docker context before this is run (see the make image).
COPY --chown=spark:users data-processing-dist data-processing-dist
RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}[spark]


# Install project source

## Copy the python version of the tansform
COPY --chown=spark:users dpk_${TRANSFORM_NAME}/ dpk_${TRANSFORM_NAME}/
COPY --chown=spark:users requirements.txt requirements.txt
RUN pip install -r requirements.txt


USER spark

# Set environment
ENV PYTHONPATH=${SPARK_HOME}/work-dir/:${PYTHONPATH}

# Put these at the end since they seem to upset the docker cache.
ARG BUILD_DATE
ARG GIT_COMMIT
LABEL build-date=$BUILD_DATE
LABEL git-commit=$GIT_COMMIT
13 changes: 9 additions & 4 deletions transforms/universal/doc_id/Dockerfile.python
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,26 @@ FROM docker.io/python:3.10.14-slim-bullseye

RUN pip install --upgrade --no-cache-dir pip

# install pytest
RUN pip install --no-cache-dir pytest

# Create a user and use it to run the transform
RUN useradd -ms /bin/bash dpk
USER dpk
WORKDIR /home/dpk
ARG DPK_WHEEL_FILE_NAME
ARG TRANSFORM_NAME

# Copy and install data processing libraries
# These are expected to be placed in the docker context before this is run (see the make image).
COPY --chown=dpk:root data-processing-dist data-processing-dist
COPY --chown=dpk:users data-processing-dist data-processing-dist
RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}

COPY --chown=dpk:root dpk_doc_id/ dpk_doc_id/
COPY --chown=dpk:root requirements.txt requirements.txt
RUN pip install --no-cache-dir -r requirements.txt
# END OF STEPS destined for a data-prep-kit base image

COPY --chown=dpk:users dpk_${TRANSFORM_NAME}/ dpk_${TRANSFORM_NAME}/
COPY --chown=dpk:users requirements.txt requirements.txt
RUN pip install --no-cache-dir -r requirements.txt

# Set environment
ENV PYTHONPATH /home/dpk
Expand Down
12 changes: 6 additions & 6 deletions transforms/universal/doc_id/Dockerfile.ray
Original file line number Diff line number Diff line change
@@ -1,22 +1,22 @@
ARG BASE_IMAGE=docker.io/rayproject/ray:2.24.0-py310

FROM ${BASE_IMAGE}

RUN pip install --upgrade --no-cache-dir pip

# install pytest
RUN pip install --no-cache-dir pytest
ARG DPK_WHEEL_FILE_NAME
ARG TRANSFORM_NAME

# Copy and install data processing libraries
# These are expected to be placed in the docker context before this is run (see the make image).
COPY --chown=ray:users data-processing-dist data-processing-dist
RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}[ray]

## Copy the python version of the tansform
COPY --chown=ray:users dpk_doc_id/ dpk_doc_id/
COPY --chown=ray:users requirements.txt requirements.txt
RUN pip install -r requirements.txt

COPY --chown=ray:users dpk_${TRANSFORM_NAME}/ dpk_${TRANSFORM_NAME}/
COPY --chown=ray:users requirements.txt requirements.txt
RUN pip install --no-cache-dir -r requirements.txt

# Grant non-root users the necessary permissions to the ray directory
RUN chmod 755 /home/ray
Expand All @@ -28,4 +28,4 @@ ENV PYTHONPATH /home/ray
ARG BUILD_DATE
ARG GIT_COMMIT
LABEL build-date=$BUILD_DATE
LABEL git-commit=$GIT_COMMIT
LABEL git-commit=$GIT_COMMIT
7 changes: 4 additions & 3 deletions transforms/universal/doc_id/Dockerfile.spark
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,19 @@ RUN pip install --no-cache-dir pytest

WORKDIR ${SPARK_HOME}/work-dir
ARG DPK_WHEEL_FILE_NAME
ARG TRANSFORM_NAME

# Copy and install data processing libraries
# These are expected to be placed in the docker context before this is run (see the make image).
COPY --chown=spark:root data-processing-dist data-processing-dist
COPY --chown=spark:users data-processing-dist data-processing-dist
RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}[spark]


# Install project source

## Copy the python version of the tansform
COPY --chown=spark:root dpk_doc_id/ dpk_doc_id/
COPY --chown=spark:root requirements.txt requirements.txt
COPY --chown=spark:users dpk_${TRANSFORM_NAME}/ dpk_${TRANSFORM_NAME}/
COPY --chown=spark:users requirements.txt requirements.txt
RUN pip install -r requirements.txt


Expand Down
13 changes: 9 additions & 4 deletions transforms/universal/filter/Dockerfile.python
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,26 @@ FROM docker.io/python:3.10.14-slim-bullseye

RUN pip install --upgrade --no-cache-dir pip

# install pytest
RUN pip install --no-cache-dir pytest

# Create a user and use it to run the transform
RUN useradd -ms /bin/bash dpk
USER dpk
WORKDIR /home/dpk
ARG DPK_WHEEL_FILE_NAME
ARG TRANSFORM_NAME

# Copy and install data processing libraries
# These are expected to be placed in the docker context before this is run (see the make image).
COPY --chown=dpk:root data-processing-dist data-processing-dist
COPY --chown=dpk:users data-processing-dist data-processing-dist
RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}

COPY --chown=dpk:root dpk_filter/ dpk_filter/
COPY --chown=dpk:root requirements.txt requirements.txt
RUN pip install --no-cache-dir -r requirements.txt
# END OF STEPS destined for a data-prep-kit base image

COPY --chown=dpk:users dpk_${TRANSFORM_NAME}/ dpk_${TRANSFORM_NAME}/
COPY --chown=dpk:users requirements.txt requirements.txt
RUN pip install --no-cache-dir -r requirements.txt

# Set environment
ENV PYTHONPATH /home/dpk
Expand Down
12 changes: 6 additions & 6 deletions transforms/universal/filter/Dockerfile.ray
Original file line number Diff line number Diff line change
@@ -1,22 +1,22 @@
ARG BASE_IMAGE=docker.io/rayproject/ray:2.24.0-py310

FROM ${BASE_IMAGE}

RUN pip install --upgrade --no-cache-dir pip

# install pytest
RUN pip install --no-cache-dir pytest
ARG DPK_WHEEL_FILE_NAME
ARG TRANSFORM_NAME

# Copy and install data processing libraries
# These are expected to be placed in the docker context before this is run (see the make image).
COPY --chown=ray:users data-processing-dist data-processing-dist
RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}[ray]

## Copy the python version of the tansform
COPY --chown=ray:users dpk_filter/ dpk_filter/
COPY --chown=ray:users requirements.txt requirements.txt
RUN pip install -r requirements.txt

COPY --chown=ray:users dpk_${TRANSFORM_NAME}/ dpk_${TRANSFORM_NAME}/
COPY --chown=ray:users requirements.txt requirements.txt
RUN pip install --no-cache-dir -r requirements.txt

# Grant non-root users the necessary permissions to the ray directory
RUN chmod 755 /home/ray
Expand All @@ -28,4 +28,4 @@ ENV PYTHONPATH /home/ray
ARG BUILD_DATE
ARG GIT_COMMIT
LABEL build-date=$BUILD_DATE
LABEL git-commit=$GIT_COMMIT
LABEL git-commit=$GIT_COMMIT
7 changes: 4 additions & 3 deletions transforms/universal/filter/Dockerfile.spark
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,19 @@ RUN pip install --no-cache-dir pytest

WORKDIR ${SPARK_HOME}/work-dir
ARG DPK_WHEEL_FILE_NAME
ARG TRANSFORM_NAME

# Copy and install data processing libraries
# These are expected to be placed in the docker context before this is run (see the make image).
COPY --chown=spark:root data-processing-dist data-processing-dist
COPY --chown=spark:users data-processing-dist data-processing-dist
RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}[spark]


# Install project source

## Copy the python version of the tansform
COPY --chown=spark:root dpk_filter/ dpk_filter/
COPY --chown=spark:root requirements.txt requirements.txt
COPY --chown=spark:users dpk_${TRANSFORM_NAME}/ dpk_${TRANSFORM_NAME}/
COPY --chown=spark:users requirements.txt requirements.txt
RUN pip install -r requirements.txt


Expand Down
7 changes: 4 additions & 3 deletions transforms/universal/hap/Dockerfile.python
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,17 @@ RUN useradd -ms /bin/bash dpk
USER dpk
WORKDIR /home/dpk
ARG DPK_WHEEL_FILE_NAME
ARG TRANSFORM_NAME

# Copy and install data processing libraries
# These are expected to be placed in the docker context before this is run (see the make image).
COPY --chown=dpk:root data-processing-dist data-processing-dist
COPY --chown=dpk:users data-processing-dist data-processing-dist
RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}

# END OF STEPS destined for a data-prep-kit base image

COPY --chown=dpk:root dpk_hap/ dpk_hap/
COPY --chown=dpk:root requirements.txt requirements.txt
COPY --chown=dpk:users dpk_${TRANSFORM_NAME}/ dpk_${TRANSFORM_NAME}/
COPY --chown=dpk:users requirements.txt requirements.txt
RUN pip install --no-cache-dir -r requirements.txt

# Set environment
Expand Down
6 changes: 2 additions & 4 deletions transforms/universal/hap/Dockerfile.ray
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,15 @@ RUN pip install --upgrade --no-cache-dir pip
# install pytest
RUN pip install --no-cache-dir pytest
ARG DPK_WHEEL_FILE_NAME
ARG TRANSFORM_NAME

# Copy and install data processing libraries
# These are expected to be placed in the docker context before this is run (see the make image).
COPY --chown=ray:users data-processing-dist data-processing-dist
RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}[ray]


#COPY requirements.txt requirements.txt
#RUN pip install --no-cache-dir -r requirements.txt

COPY --chown=ray:users dpk_hap/ dpk_hap/
COPY --chown=ray:users dpk_${TRANSFORM_NAME}/ dpk_${TRANSFORM_NAME}/
COPY --chown=ray:users requirements.txt requirements.txt
RUN pip install --no-cache-dir -r requirements.txt

Expand Down

0 comments on commit 9eab8b8

Please sign in to comment.