Skip to content

Commit

Permalink
Update dockerfiles and dependencies for all existing transforms
Browse files Browse the repository at this point in the history
Signed-off-by: Maroun Touma <[email protected]>
  • Loading branch information
touma-I committed Oct 23, 2024
1 parent 8551b38 commit db1c389
Show file tree
Hide file tree
Showing 67 changed files with 205 additions and 170 deletions.
5 changes: 3 additions & 2 deletions transforms/code/code2parquet/python/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,12 @@ RUN pip install --no-cache-dir pytest
RUN useradd -ms /bin/bash dpk
USER dpk
WORKDIR /home/dpk
ARG WHEEL_FILE_NAME

# Copy and install data processing libraries
# These are expected to be placed in the docker context before this is run (see the make image).
COPY --chown=dpk:root data-processing-lib-python/ data-processing-lib-python/
RUN cd data-processing-lib-python && pip install --no-cache-dir -e .
COPY --chown=ray:users data-processing-dist data-processing-dist
RUN pip install data-processing-dist/${WHEEL_FILE_NAME}

# END OF STEPS destined for a data-prep-kit base image

Expand Down
15 changes: 7 additions & 8 deletions transforms/code/code2parquet/ray/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,13 @@ RUN pip install --upgrade --no-cache-dir pip
# install pytest
RUN pip install --no-cache-dir pytest

# Copy in the data processing framework source/project and install it
# This is expected to be placed in the docker context before this is run (see the make image).
COPY --chown=ray:users data-processing-lib-python/ data-processing-lib-python/
RUN cd data-processing-lib-python && pip install --no-cache-dir -e .
COPY --chown=ray:users data-processing-lib-ray/ data-processing-lib-ray/
RUN cd data-processing-lib-ray && pip install --no-cache-dir -e .
COPY --chown=ray:users python-transform/ python-transform/
RUN cd python-transform && pip install --no-cache-dir -e .
ARG WHEEL_FILE_NAME

# Copy and install data processing libraries
# These are expected to be placed in the docker context before this is run (see the make image).
COPY --chown=ray:users data-processing-dist data-processing-dist
RUN pip install data-processing-dist/${WHEEL_FILE_NAME}[ray]

COPY --chown=ray:users python-transform/ python-transform/
RUN cd python-transform && pip install --no-cache-dir -e .

Expand Down
2 changes: 1 addition & 1 deletion transforms/code/code2parquet/ray/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ authors = [
{ name = "Boris Lublinsky", email = "[email protected]" },
]
dependencies = [
"data-prep-toolkit-ray==0.2.2.dev1",
"data-prep-toolkit[ray]==0.2.2.dev1",
"dpk-code2parquet-transform-python==0.2.2.dev1",
"parameterized",
"pandas",
Expand Down
5 changes: 3 additions & 2 deletions transforms/code/code_profiler/python/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,12 @@ RUN pip install --no-cache-dir pytest
RUN useradd -ms /bin/bash dpk
USER dpk
WORKDIR /home/dpk
ARG WHEEL_FILE_NAME

# Copy and install data processing libraries
# These are expected to be placed in the docker context before this is run (see the make image).
COPY --chown=dpk:root data-processing-lib-python/ data-processing-lib-python/
RUN cd data-processing-lib-python && pip install --no-cache-dir -e .
COPY --chown=ray:users data-processing-dist data-processing-dist
RUN pip install data-processing-dist/${WHEEL_FILE_NAME}

COPY --chown=dpk:root src/ src/
COPY --chown=dpk:root pyproject.toml pyproject.toml
Expand Down
9 changes: 5 additions & 4 deletions transforms/code/code_profiler/ray/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,13 @@ RUN pip install --upgrade --no-cache-dir pip
# install pytest
RUN pip install --no-cache-dir pytest

ARG WHEEL_FILE_NAME

# Copy and install data processing libraries
# These are expected to be placed in the docker context before this is run (see the make image).
COPY --chown=ray:users data-processing-lib-python/ data-processing-lib-python/
RUN cd data-processing-lib-python && pip install --no-cache-dir -e .
COPY --chown=ray:users data-processing-lib-ray/ data-processing-lib-ray/
RUN cd data-processing-lib-ray && pip install --no-cache-dir -e .
COPY --chown=ray:users data-processing-dist data-processing-dist
RUN pip install data-processing-dist/${WHEEL_FILE_NAME}[ray]

COPY --chown=ray:users python-transform/ python-transform/
RUN cd python-transform && pip install --no-cache-dir -e .

Expand Down
3 changes: 1 addition & 2 deletions transforms/code/code_profiler/ray/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@ authors = [
]
dependencies = [
"dpk-code-profiler-transform-python==0.2.2.dev0",
"data-prep-toolkit-ray==0.2.2.dev0",
"data-prep-toolkit==0.2.2.dev0",
"data-prep-toolkit[ray]==0.2.2.dev1",
]

[build-system]
Expand Down
5 changes: 3 additions & 2 deletions transforms/code/code_quality/python/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,12 @@ RUN pip install --no-cache-dir pytest
RUN useradd -ms /bin/bash dpk
USER dpk
WORKDIR /home/dpk
ARG WHEEL_FILE_NAME

# Copy and install data processing libraries
# These are expected to be placed in the docker context before this is run (see the make image).
COPY --chown=dpk:root data-processing-lib-python/ data-processing-lib-python/
RUN cd data-processing-lib-python && pip install --no-cache-dir -e .
COPY --chown=ray:users data-processing-dist data-processing-dist
RUN pip install data-processing-dist/${WHEEL_FILE_NAME}

# END OF STEPS destined for a data-prep-kit base image

Expand Down
8 changes: 4 additions & 4 deletions transforms/code/code_quality/ray/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@ RUN pip install --no-cache-dir pytest
USER root
RUN mkdir -p /home/ray/.cache && chmod -R 777 /home/ray/.cache
USER ray
ARG WHEEL_FILE_NAME

# Copy and install data processing libraries
# These are expected to be placed in the docker context before this is run (see the make image).
COPY --chown=ray:users data-processing-lib-python/ data-processing-lib-python/
RUN cd data-processing-lib-python && pip install --no-cache-dir -e .
COPY --chown=ray:users data-processing-lib-ray/ data-processing-lib-ray/
RUN cd data-processing-lib-ray && pip install --no-cache-dir -e .
COPY --chown=ray:users data-processing-dist data-processing-dist
RUN pip install data-processing-dist/${WHEEL_FILE_NAME}[ray]

COPY --chown=ray:users python-transform/ python-transform/
RUN cd python-transform && pip install --no-cache-dir -e .

Expand Down
2 changes: 1 addition & 1 deletion transforms/code/code_quality/ray/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ authors = [
]
dependencies = [
"dpk-code-quality-transform-python==0.2.2.dev1",
"data-prep-toolkit-ray==0.2.2.dev1",
"data-prep-toolkit[ray]==0.2.2.dev1",
]

[build-system]
Expand Down
5 changes: 3 additions & 2 deletions transforms/code/header_cleanser/python/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,12 @@ WORKDIR /home/dpk

# Create directories to mount volumnes for processing data outside of the image.
# RUN mkdir input && mkdir output
ARG WHEEL_FILE_NAME

# Copy and install data processing libraries
# These are expected to be placed in the docker context before this is run (see the make image).
COPY --chown=dpk:root data-processing-lib-python/ data-processing-lib-python/
RUN cd data-processing-lib-python && pip install --no-cache-dir -e .
COPY --chown=ray:users data-processing-dist data-processing-dist
RUN pip install data-processing-dist/${WHEEL_FILE_NAME}

# END OF STEPS destined for a data-prep-kit base image

Expand Down
9 changes: 5 additions & 4 deletions transforms/code/header_cleanser/ray/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@ FROM docker.io/rayproject/ray:2.24.0-py310
# install pytest
RUN pip install --no-cache-dir pytest

ARG WHEEL_FILE_NAME

# Copy and install data processing libraries
# These are expected to be placed in the docker context before this is run (see the make image).
COPY --chown=ray:users data-processing-lib-python/ data-processing-lib-python/
RUN cd data-processing-lib-python && pip install --no-cache-dir -e .
COPY --chown=ray:users data-processing-lib-ray/ data-processing-lib-ray/
RUN cd data-processing-lib-ray && pip install --no-cache-dir -e .
COPY --chown=ray:users data-processing-dist data-processing-dist
RUN pip install data-processing-dist/${WHEEL_FILE_NAME}[ray]

COPY --chown=ray:users python-transform/ python-transform
RUN cd python-transform && pip install --no-cache-dir -e .

Expand Down
2 changes: 1 addition & 1 deletion transforms/code/header_cleanser/ray/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ authors = [
]
dependencies = [
"dpk-header-cleanser-transform-python==0.2.2.dev1",
"data-prep-toolkit-ray==0.2.2.dev1",
"data-prep-toolkit[ray]==0.2.2.dev1",
"scancode-toolkit==32.1.0",
]

Expand Down
5 changes: 3 additions & 2 deletions transforms/code/license_select/python/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,12 @@ RUN pip install --no-cache-dir pytest
RUN useradd -ms /bin/bash dpk
USER dpk
WORKDIR /home/dpk
ARG WHEEL_FILE_NAME

# Copy and install data processing libraries
# These are expected to be placed in the docker context before this is run (see the make image).
COPY --chown=dpk:root data-processing-lib-python/ data-processing-lib-python/
RUN cd data-processing-lib-python && pip install --no-cache-dir -e .
COPY --chown=ray:users data-processing-dist data-processing-dist
RUN pip install data-processing-dist/${WHEEL_FILE_NAME}

# END OF STEPS destined for a data-prep-kit base image

Expand Down
12 changes: 6 additions & 6 deletions transforms/code/license_select/ray/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@ RUN pip install --upgrade --no-cache-dir pip

# install pytest
RUN pip install --no-cache-dir pytest
ARG WHEEL_FILE_NAME

# Copy and install data processing libraries
# These are expected to be placed in the docker context before this is run (see the make image).
COPY --chown=ray:users data-processing-dist data-processing-dist
RUN pip install data-processing-dist/${WHEEL_FILE_NAME}[ray]

# Copy in the data processing framework source/project and install it
# This is expected to be placed in the docker context before this is run (see the make image).
COPY --chown=ray:users data-processing-lib-python/ data-processing-lib-python/
RUN cd data-processing-lib-python && pip install --no-cache-dir -e .
COPY --chown=ray:users data-processing-lib-ray/ data-processing-lib-ray/
RUN cd data-processing-lib-ray && pip install --no-cache-dir -e .
COPY --chown=ray:users python-transform/ python-transform/
RUN cd python-transform && pip install --no-cache-dir -e .

Expand Down
2 changes: 1 addition & 1 deletion transforms/code/license_select/ray/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ authors = [
]
dependencies = [
"dpk-license-select-transform-python==0.2.2.dev1",
"data-prep-toolkit-ray==0.2.2.dev1",
"data-prep-toolkit[ray]==0.2.2.dev1",
]

[build-system]
Expand Down
5 changes: 3 additions & 2 deletions transforms/code/malware/python/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,12 @@ COPY --chown=dpk:root --from=clamav-local /var/lib/clamav/ /var/lib/clamav/
COPY --chown=dpk:root --from=clamav-local /etc/clamav/clamd.conf /etc/clamav/clamd.conf
COPY --chown=dpk:root --from=clamav-local /var/log/clamav/clamav.log /var/log/clamav/clamav.log
COPY --chown=dpk:root --from=clamav-local /var/run/clamav /var/run/clamav
ARG WHEEL_FILE_NAME

# Copy and install data processing libraries
# These are expected to be placed in the docker context before this is run (see the make image).
COPY --chown=dpk:root data-processing-lib-python/ data-processing-lib-python/
RUN cd data-processing-lib-python && pip install --no-cache-dir -e .
COPY --chown=ray:users data-processing-dist data-processing-dist
RUN pip install data-processing-dist/${WHEEL_FILE_NAME}

COPY --chown=dpk:root src/ src/
COPY --chown=dpk:root pyproject.toml pyproject.toml
Expand Down
12 changes: 6 additions & 6 deletions transforms/code/malware/ray/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,13 @@ COPY --from=clamav-local --chown=ray:0 /etc/clamav/clamd.conf /etc/clamav/clamd.
COPY --from=clamav-local --chown=ray:0 /var/log/clamav /var/log/clamav
COPY --from=clamav-local --chown=ray:0 /var/run/clamav /var/run/clamav

ARG WHEEL_FILE_NAME

# Copy and install data processing libraries
# These are expected to be placed in the docker context before this is run (see the make image).
COPY --chown=ray:users data-processing-dist data-processing-dist
RUN pip install data-processing-dist/${WHEEL_FILE_NAME}[ray]

# Copy in the data processing framework source/project and install it
# This is expected to be placed in the docker context before this is run (see the make image).
COPY --chown=ray:users data-processing-lib-python/ data-processing-lib-python/
RUN cd data-processing-lib-python && pip install --no-cache-dir -e .
COPY --chown=ray:users data-processing-lib-ray/ data-processing-lib-ray/
RUN cd data-processing-lib-ray && pip install --no-cache-dir -e .
COPY --chown=ray:users python-transform/ python-transform/
RUN cd python-transform && pip install --no-cache-dir -e .

Expand Down
2 changes: 1 addition & 1 deletion transforms/code/malware/ray/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ authors = [
]
dependencies = [
"dpk-malware-transform-python==0.2.2.dev1",
"data-prep-toolkit-ray==0.2.2.dev1",
"data-prep-toolkit[ray]==0.2.2.dev1",
]

[build-system]
Expand Down
5 changes: 3 additions & 2 deletions transforms/code/proglang_select/python/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,12 @@ RUN pip install --no-cache-dir pytest
RUN useradd -ms /bin/bash dpk
USER dpk
WORKDIR /home/dpk
ARG WHEEL_FILE_NAME

# Copy and install data processing libraries
# These are expected to be placed in the docker context before this is run (see the make image).
COPY --chown=dpk:root data-processing-lib-python/ data-processing-lib-python/
RUN cd data-processing-lib-python && pip install --no-cache-dir -e .
COPY --chown=ray:users data-processing-dist data-processing-dist
RUN pip install data-processing-dist/${WHEEL_FILE_NAME}

# END OF STEPS destined for a data-prep-kit base image

Expand Down
8 changes: 4 additions & 4 deletions transforms/code/proglang_select/ray/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@ RUN pip install --upgrade --no-cache-dir pip

# install pytest
RUN pip install --no-cache-dir pytest
ARG WHEEL_FILE_NAME

# Copy and install data processing libraries
# These are expected to be placed in the docker context before this is run (see the make image).
COPY --chown=ray:users data-processing-lib-python/ data-processing-lib-python/
RUN cd data-processing-lib-python && pip install --no-cache-dir -e .
COPY --chown=ray:users data-processing-lib-ray/ data-processing-lib-ray/
RUN cd data-processing-lib-ray && pip install --no-cache-dir -e .
COPY --chown=ray:users data-processing-dist data-processing-dist
RUN pip install data-processing-dist/${WHEEL_FILE_NAME}[ray]

COPY --chown=ray:users python-transform/ python-transform/
RUN cd python-transform && pip install --no-cache-dir -e .

Expand Down
2 changes: 1 addition & 1 deletion transforms/code/proglang_select/ray/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ authors = [
]
dependencies = [
"dpk-proglang-select-transform-python==0.2.2.dev1",
"data-prep-toolkit-ray==0.2.2.dev1",
"data-prep-toolkit[ray]==0.2.2.dev1",
]

[build-system]
Expand Down
7 changes: 3 additions & 4 deletions transforms/code/repo_level_ordering/ray/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,12 @@ RUN pip install --upgrade --no-cache-dir pip

# install pytest
RUN pip install --no-cache-dir pytest
ARG WHEEL_FILE_NAME

# Copy and install data processing libraries
# These are expected to be placed in the docker context before this is run (see the make image).
COPY --chown=ray:users data-processing-lib-python/ data-processing-lib-python/
RUN cd data-processing-lib-python && pip install --no-cache-dir -e .
COPY --chown=ray:users data-processing-lib-ray/ data-processing-lib-ray/
RUN cd data-processing-lib-ray && pip install --no-cache-dir -e .
COPY --chown=ray:users data-processing-dist data-processing-dist
RUN pip install data-processing-dist/${WHEEL_FILE_NAME}[ray]

COPY --chown=ray:users src/ src/
COPY --chown=ray:users pyproject.toml pyproject.toml
Expand Down
2 changes: 1 addition & 1 deletion transforms/code/repo_level_ordering/ray/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ authors = [
{ name = "Shanmukha Guttula", email = "[email protected]" },
]
dependencies = [
"data-prep-toolkit-ray==0.2.2.dev1",
"data-prep-toolkit[ray]==0.2.2.dev1",
"networkx==3.3",
"colorlog==6.8.2",
"func-timeout==4.3.5",
Expand Down
6 changes: 4 additions & 2 deletions transforms/language/doc_quality/python/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,12 @@ RUN useradd -ms /bin/bash dpk
USER dpk
WORKDIR /home/dpk

ARG WHEEL_FILE_NAME

# Copy and install data processing libraries
# These are expected to be placed in the docker context before this is run (see the make image).
COPY --chown=dpk:root data-processing-lib-python/ data-processing-lib-python/
RUN cd data-processing-lib-python && pip install --no-cache-dir -e .
COPY --chown=dpk:root data-processing-dist/ data-processing-dist/
RUN pip install data-processing-dist/${WHEEL_FILE_NAME}

# END OF STEPS destined for a data-prep-kit base image

Expand Down
9 changes: 5 additions & 4 deletions transforms/language/doc_quality/ray/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,14 @@ RUN pip install --upgrade --no-cache-dir pip

# install pytest
RUN pip install --no-cache-dir pytest
ARG WHEEL_FILE_NAME

# Copy and install data processing libraries
# These are expected to be placed in the docker context before this is run (see the make image).
COPY --chown=ray:users data-processing-lib-python/ data-processing-lib-python/
RUN cd data-processing-lib-python && pip install --no-cache-dir -e .
COPY --chown=ray:users data-processing-lib-ray/ data-processing-lib-ray/
RUN cd data-processing-lib-ray && pip install --no-cache-dir -e .
COPY --chown=ray:users data-processing-dist data-processing-dist
RUN pip install data-processing-dist/${WHEEL_FILE_NAME}[ray]

## Copy the python version of the tansform
COPY --chown=ray:users python-transform/ python-transform/
RUN cd python-transform && pip install --no-cache-dir -e .

Expand Down
2 changes: 1 addition & 1 deletion transforms/language/doc_quality/ray/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ authors = [
]
dependencies = [
"dpk-doc_quality-transform-python==0.2.2.dev1",
"data-prep-toolkit-ray==0.2.2.dev1"
"data-prep-toolkit[ray]==0.2.2.dev1",
]

[build-system]
Expand Down
6 changes: 4 additions & 2 deletions transforms/language/html2parquet/python/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,12 @@ RUN useradd -ms /bin/bash dpk
USER dpk
WORKDIR /home/dpk

ARG WHEEL_FILE_NAME

# Copy and install data processing libraries
# These are expected to be placed in the docker context before this is run (see the make image).
COPY --chown=dpk:root data-processing-lib-python/ data-processing-lib-python/
RUN cd data-processing-lib-python && pip install --no-cache-dir -e .
COPY --chown=dpk:root data-processing-dist/ data-processing-dist/
RUN pip install data-processing-dist/${WHEEL_FILE_NAME}

# END OF STEPS destined for a data-prep-kit base image

Expand Down
Loading

0 comments on commit db1c389

Please sign in to comment.