diff --git a/.make.defaults b/.make.defaults index 51eb984ee..9eb858530 100644 --- a/.make.defaults +++ b/.make.defaults @@ -209,7 +209,7 @@ __check_defined = \ # We create both local and remote tags. Local seems to be needed when using our spark # base image. Remote seems to be needed by kfp. .PHONY: .defaults.image -.defaults.image:: # Must be called with a DOCKER_IMAGE= settings. +.defaults.image:: # Must be called with a DOCKER_IMAGE_NAME= settings. @# Help: Create the docker image $(DOCKER_LOCAL_IMAGE) and a tag for $(DOCKER_REMOTE_IMAGE) $(call check_defined, DOCKER_IMAGE_NAME) # The following touch seems to be needed to work around a docker build problem in which @@ -222,14 +222,15 @@ __check_defined = \ if [ -e pyproject.toml ]; then \ touch pyproject.toml; \ fi - $(DOCKER) build -t $(DOCKER_LOCAL_IMAGE) $(DOCKER_BUILD_EXTRA_ARGS) \ + $(DOCKER) build -f $(DOCKER_FILE) -t $(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_VERSION) $(DOCKER_BUILD_EXTRA_ARGS) \ --platform $(DOCKER_PLATFORM) \ --build-arg EXTRA_INDEX_URL=$(EXTRA_INDEX_URL) \ --build-arg BASE_IMAGE=$(BASE_IMAGE) \ --build-arg DPK_WHEEL_FILE_NAME=$(DPK_WHEEL_FILE_NAME) \ --build-arg BUILD_DATE=$(shell date -u +'%Y-%m-%dT%H:%M:%SZ') \ --build-arg GIT_COMMIT=$(shell git log -1 --format=%h) . - $(DOCKER) tag $(DOCKER_LOCAL_IMAGE) $(DOCKER_REMOTE_IMAGE) + $(DOCKER) tag $(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_VERSION) $(DOCKER_REGISTRY_ENDPOINT)/$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_VERSION) + # Copy a source tree in LIB_PATH, including src, pyproject.toml to LIB_NAME # Generally used to copy source from within the repo into a local directory for use by a Dockerfile @@ -244,17 +245,6 @@ __check_defined = \ cp -p ${LIB_PATH}/requirements.txt ${LIB_NAME}; \ fi - -# Build and image using the local Dockerfile and make the data-processing-lib/python -# available in the current directory for use by the Dockerfile (i.e. to install the library). -#.PHONY: .defaults.python-lib-src-image -#.defaults.python-lib-src-image:: # Must be called with a DOCKER_LOCAL_IMAGE= settings. -# @# Help: Build the Python $(DOCKER_LOCAL_IMAGE) using the $(DOCKER_FILE), requirements.txt and data-processing-lib/python source -#ifeq ($(USE_REPO_LIB_SRC), 1) -# $(MAKE) LIB_PATH=$(DPK_PYTHON_LIB_DIR) LIB_NAME=data-processing-lib-python .defaults.copy-lib -#endif -# $(MAKE) DOCKER_IMAGE=$(DOCKER_LOCAL_IMAGE) .defaults.image -# -rm -rf data-processing-lib-python .PHONY: .default.build-lib-wheel .default.build-lib-wheel: @@ -262,6 +252,18 @@ __check_defined = \ rm -rf data-processing-dist && mkdir data-processing-dist cp $(REPOROOT)/data-processing-lib/dist/*.whl data-processing-dist + +# Build and image using the local Dockerfile +# Assumes wheel has already been created +.PHONY: .defaults.lib-whl-image +.defaults.lib-whl-image:: + # Must be called with a DOCKER_LOCAL_IMAGE= settings. + @# Help: Build the Python $(DOCKER_LOCAL_IMAGE) using the the wheel file for the library + @$(eval LIB_WHEEL_FILE := $(shell find data-processing-dist/*.whl)) + $(eval LIB_WHEEL_FILE := $(shell basename $(LIB_WHEEL_FILE))) + $(MAKE) DPK_WHEEL_FILE_NAME=$(LIB_WHEEL_FILE) .defaults.image + + # Build and image using the local Dockerfile and make the wheel for data-processing-lib # available in the current directory for use by the Dockerfile (i.e. to install the library). .PHONY: .defaults.python-lib-whl-image @@ -270,28 +272,9 @@ __check_defined = \ @# Help: Build the Python $(DOCKER_LOCAL_IMAGE) using the the wheel file for the library @$(eval LIB_WHEEL_FILE := $(shell find data-processing-dist/*.whl)) $(eval LIB_WHEEL_FILE := $(shell basename $(LIB_WHEEL_FILE))) - $(MAKE) DOCKER_IMAGE=$(DOCKER_LOCAL_IMAGE) DPK_WHEEL_FILE_NAME=$(LIB_WHEEL_FILE) .defaults.image + $(MAKE) DPK_WHEEL_FILE_NAME=$(LIB_WHEEL_FILE) .defaults.image -rm -rf data-processing-dist -# Build an image using the local Dockerfile and make the data-processing-lib/ray -# available in the current directory for use by the Dockerfile (i.e. to install the library). -# Note that this looks for the ../python directory, which is currently only used in the transform projects, -# but we add it here as a convenience to avoid duplicating a lot of this in transforms/.make.transforms. -#.PHONY: .defaults.ray-lib-src-image -#.defaults.ray-lib-src-image:: # Must be called with a DOCKER_LOCAL_IMAGE= settings. -# @# Help: Build the Ray $(DOCKER_LOCAL_IMAGE) using the $(DOCKER_FILE), requirements.txt and data-processing-libs source -#ifeq ($(USE_REPO_LIB_SRC), 1) -# $(MAKE) LIB_PATH=$(DPK_PYTHON_LIB_DIR) LIB_NAME=data-processing-lib-python .defaults.copy-lib -# $(MAKE) LIB_PATH=$(DPK_RAY_LIB_DIR) LIB_NAME=data-processing-lib-ray .defaults.copy-lib -#endif -# if [ -e ../python ]; then \ -# $(MAKE) LIB_PATH=../python LIB_NAME=python-transform .defaults.copy-lib; \ -# fi -# $(MAKE) DOCKER_IMAGE=$(DOCKER_LOCAL_IMAGE) .defaults.image -# -rm -rf data-processing-lib-python -# -rm -rf data-processing-lib-ray -# -rm -rf python-transform - # Build an image using the local Dockerfile and make the data-processing wheel # available in the current directory for use by the Dockerfile (i.e. to install the library). @@ -306,7 +289,7 @@ __check_defined = \ if [ -e ../python ]; then \ $(MAKE) LIB_PATH=../python LIB_NAME=python-transform .defaults.copy-lib; \ fi - $(MAKE) DOCKER_IMAGE=$(DOCKER_LOCAL_IMAGE) DPK_WHEEL_FILE_NAME=$(LIB_WHEEL_FILE) .defaults.image + $(MAKE) DPK_WHEEL_FILE_NAME=$(LIB_WHEEL_FILE) .defaults.image -rm -rf python-transform -rm -rf data-processing-dist @@ -316,24 +299,6 @@ __check_defined = \ .defaults.spark-lib-base-image: $(MAKE) -C $(DPK_SPARK_LIB_DIR) image -# Note that this looks for the ../python directory, which is currently only used in the transform projects, -# but we add it here as a convenience to avoid duplicating a lot of this in transforms/.make.transforms. -# Must be called with a DOCKER_LOCAL_IMAGE= settings. -#.PHONY: .defaults.spark-lib-src-image -#.defaults.spark-lib-src-image:: .defaults.spark-lib-base-image -# @# Help: Build the Spark $(DOCKER_LOCAL_IMAGE) using the $(DOCKER_FILE), requirements.txt and data-processing-libs source -# $(MAKE) IMAGE_NAME_TO_VERIFY=$(DOCKER_SPARK_BASE_IMAGE_NAME) .defaults.verify-image-availability -#ifeq ($(USE_REPO_LIB_SRC), 1) -# $(MAKE) LIB_PATH=$(DPK_PYTHON_LIB_DIR) LIB_NAME=data-processing-lib-python .defaults.copy-lib -# $(MAKE) LIB_PATH=$(DPK_SPARK_LIB_DIR) LIB_NAME=data-processing-lib-spark .defaults.copy-lib -#endif -# if [ -e ../python ]; then \ -# $(MAKE) LIB_PATH=../python LIB_NAME=python-transform .defaults.copy-lib; \ -# fi -# $(MAKE) DOCKER_IMAGE=$(DOCKER_LOCAL_IMAGE) BASE_IMAGE=$(DOCKER_SPARK_BASE_IMAGE) .defaults.image -# -rm -rf data-processing-lib-python -# -rm -rf data-processing-lib-spark -# -rm -rf python-transform .PHONY: .defaults.spark-lib-whl-image .defaults.spark-lib-whl-image:: .default.build-lib-wheel .defaults.spark-lib-base-image @@ -345,7 +310,7 @@ __check_defined = \ if [ -e ../python ]; then \ $(MAKE) LIB_PATH=../python LIB_NAME=python-transform .defaults.copy-lib; \ fi - $(MAKE) DOCKER_IMAGE=$(DOCKER_LOCAL_IMAGE) BASE_IMAGE=$(DOCKER_SPARK_BASE_IMAGE) DPK_WHEEL_FILE_NAME=$(LIB_WHEEL_FILE) .defaults.image + $(MAKE) BASE_IMAGE=$(DOCKER_SPARK_BASE_IMAGE) DPK_WHEEL_FILE_NAME=$(LIB_WHEEL_FILE) .defaults.image -rm -rf python-transform -rm -rf data-processing-dist diff --git a/transforms/.make.cicd.targets b/transforms/.make.cicd.targets index 69a5f54fd..dc276425e 100644 --- a/transforms/.make.cicd.targets +++ b/transforms/.make.cicd.targets @@ -7,10 +7,15 @@ include $(REPOROOT)/transforms/.make.transforms ###################################################################### -## Default setting for TRANSFORM_RUNTIME uses folder name-- Old layout -TRANSFORM_PYTHON_RUNTIME_SRC_FILE=-m dpk_$(TRANSFORM_NAME).transform -TRANSFORM_RAY_RUNTIME_SRC_FILE=-m dpk_$(TRANSFORM_NAME).ray.transform -TRANSFORM_PYTHON_RUNTIME_SRC_FILE=-m dpk_$(TRANSFORM_NAME).spark.transform +## Default setting for TRANSFORM_RUNTIME entry point: +# python -m dpk_html2parquet.ray.transform --help +# or +# python -m dpk_html2parquet.transform_python --help +# +TRANSFORM_PYTHON_SRC?="-m dpk_$(TRANSFORM_NAME).transform_python" +TRANSFORM_RAY_SRC?="-m dpk_$(TRANSFORM_NAME).ray.transform" +TRANSFORM_SPARK_SRC?="-m dpk_$(TRANSFORM_NAME).spark.transform" + venv:: .defaults.create-venv source venv/bin/activate && $(PIP) install -e $(REPOROOT)/data-processing-lib[ray,spark] @@ -19,7 +24,6 @@ venv:: .defaults.create-venv source venv/bin/activate && $(PIP) install -r requirements.txt; \ fi; - test:: .transforms.test-src test-image clean:: .transforms.clean @@ -28,62 +32,111 @@ clean:: .transforms.clean set-versions:: ## We need to think how we want to do this going forward -build:: -image:: - @if [ -e Dockerfile ]; then \ - $(MAKE) image-default ; \ - else \ - echo "Skipping image for $(shell pwd) since no Dockerfile is present"; \ +build:: image + +publish: + @if [ -e Dockerfile.python ]; then \ + $(MAKE) DOCKER_REMOTE_IMAGE=$(DOCKER_REGISTRY_ENDPOINT)/$(TRANSFORM_NAME)-python:$(DOCKER_IMAGE_VERSION) \ + .default.publish-image ; \ + fi + @if [ -e Dockerfile.ray ]; then \ + $(MAKE) DOCKER_REMOTE_IMAGE=$(DOCKER_REGISTRY_ENDPOINT)/$(TRANSFORM_NAME)-ray:$(DOCKER_IMAGE_VERSION) \ + .default.publish-image ; \ + fi + @if [ -e Dockerfile.spark ]; then \ + $(MAKE) DOCKER_REMOTE_IMAGE=$(DOCKER_REGISTRY_ENDPOINT)/$(TRANSFORM_NAME)-spark:$(DOCKER_IMAGE_VERSION) \ + .default.publish-image ; \ + fi + +test-image-sequence:: .defaults.lib-whl-image .transforms.test-image-help .transforms.clean + +test-image:: .default.build-lib-wheel + @if [ -e Dockerfile.python ]; then \ + $(MAKE) DOCKER_FILE=Dockerfile.python \ + TRANSFORM_RUNTIME_SRC_FILE=$(TRANSFORM_PYTHON_SRC) \ + DOCKER_IMAGE_NAME=$(TRANSFORM_NAME)-python \ + test-image-sequence ; \ + fi + @if [ -e Dockerfile.ray ]; then \ + $(MAKE) DOCKER_FILE=Dockerfile.ray \ + TRANSFORM_RUNTIME_SRC_FILE=$(TRANSFORM_RAY_SRC) \ + DOCKER_IMAGE_NAME=$(TRANSFORM_NAME)-ray \ + BASE_IMAGE=$(RAY_BASE_IMAGE) \ + test-image-sequence ; \ + fi + @if [ -e Dockerfile.spark ]; then \ + $(MAKE) DOCKER_FILE=Dockerfile.spark \ + TRANSFORM_RUNTIME_SRC_FILE=$(TRANSFORM_SPARK_SRC) \ + DOCKER_IMAGE_NAME=$(TRANSFORM_NAME)-spark \ + BASE_IMAGE=$(SPARK_BASE_IMAGE) \ + test-image-sequence ; \ + fi + -rm -rf data-processing-dist + + +image-python: + @if [ -e Dockerfile.python ]; then \ + $(MAKE) DOCKER_FILE=Dockerfile.python \ + DOCKER_IMAGE_NAME=$(TRANSFORM_NAME)-python \ + .defaults.lib-whl-image ; \ fi -publish:: - @if [ -e Dockerfile ]; then \ - $(MAKE) publish-default ; \ - else \ - echo "Skipping publish for $(shell pwd) since no Dockerfile is present"; \ +image-ray: + @if [ -e Dockerfile.ray ]; then \ + $(MAKE) DOCKER_FILE=Dockerfile.ray \ + DOCKER_IMAGE_NAME=$(TRANSFORM_NAME)-ray \ + BASE_IMAGE=$(RAY_BASE_IMAGE) \ + .defaults.lib-whl-image ; \ fi -publish-image:: - @if [ -e Dockerfile ]; then \ - $(MAKE) publish-image-default ; \ - else \ - echo "Skipping publish-image for $(shell pwd) since no Dockerfile is present"; \ +image-spark: + @if [ -e Dockerfile.spark ]; then \ + $(MAKE) DOCKER_FILE=Dockerfile.spark \ + DOCKER_IMAGE_NAME=$(TRANSFORM_NAME)-spark \ + BASE_IMAGE=$(SPARK_BASE_IMAGE) \ + .defaults.lib-whl-image ; \ fi -test-image:: - @if [ -e Dockerfile ]; then \ - $(MAKE) test-image-default ; \ - else \ - echo "Skipping test-image for $(shell pwd) since no Dockerfile is present"; \ +image:: .default.build-lib-wheel + ## Build all possible images unless a specific runtime is specified + @if [ -z "$(BUILD_SPECIFIC_RUNTIME)" ] || [ "$(BUILD_SPECIFIC_RUNTIME)" == "python" ]; then \ + $(MAKE) image-python ; \ + fi + @if [ -z "$(BUILD_SPECIFIC_RUNTIME)" ] || [ "$(BUILD_SPECIFIC_RUNTIME)" == "ray" ]; then \ + $(MAKE) image-ray ; \ fi + @if [ -z "$(BUILD_SPECIFIC_RUNTIME)" ] || [ "$(BUILD_SPECIFIC_RUNTIME)" == "spark" ]; then \ + $(MAKE) image-spark ; \ + fi + -rm -rf data-processing-dist test-src:: .transforms.test-src setup:: .transforms.setup -publish-default:: publish-image - -publish-image-default:: .defaults.publish-image - -test-image-default:: image .transforms.test-image-help .defaults.test-image-pytest .transforms.clean - -build-lib-wheel: - make -C $(REPOROOT)/data-processing-lib build-pkg-dist - -image-default:: build-lib-wheel - @$(eval LIB_WHEEL_FILE := $(shell find $(REPOROOT)/data-processing-lib/dist/*.whl)) - rm -fr dist && mv $(REPOROOT)/data-processing-lib/dist . - $(eval WHEEL_FILE_NAME := $(shell basename $(LIB_WHEEL_FILE))) - $(DOCKER) build -t $(DOCKER_IMAGE_NAME) $(DOCKER_BUILD_EXTRA_ARGS) \ - --platform $(DOCKER_PLATFORM) \ - --build-arg EXTRA_INDEX_URL=$(EXTRA_INDEX_URL) \ - --build-arg BASE_IMAGE=$(RAY_BASE_IMAGE) \ - --build-arg BUILD_DATE=$(shell date -u +'%Y-%m-%dT%H:%M:%SZ') \ - --build-arg WHEEL_FILE_NAME=$(WHEEL_FILE_NAME) \ - --build-arg TRANSFORM_NAME=$(TRANSFORM_NAME) \ - --build-arg GIT_COMMIT=$(shell git log -1 --format=%h) . - $(DOCKER) tag $(DOCKER_LOCAL_IMAGE) $(DOCKER_REMOTE_IMAGE) - rm -fr dist +.PHONY: workflow-vent +workflow-venv: + if [ -e kfp_ray ]; then \ + $(MAKE) -C kfp_ray workflow-venv; \ + fi + +.PHONY: workflow-test +workflow-test: + if [ -e kfp_ray ]; then \ + $(MAKE) -C kfp_ray workflow-test; \ + fi + +.PHONY: workflow-upload +workflow-upload: + if [ -e kfp_ray ]; then \ + $(MAKE) -C kfp_ray workflow-upload; \ + fi + +.PHONY: workflow-build +workflow-build: + if [ -e kfp_ray ]; then \ + $(MAKE) -C kfp_ray workflow-build; \ + fi diff --git a/transforms/language/html2parquet/Dockerfile b/transforms/language/html2parquet/Dockerfile.ray similarity index 100% rename from transforms/language/html2parquet/Dockerfile rename to transforms/language/html2parquet/Dockerfile.ray diff --git a/transforms/language/html2parquet/Makefile b/transforms/language/html2parquet/Makefile index b591d66f1..5ec956c23 100644 --- a/transforms/language/html2parquet/Makefile +++ b/transforms/language/html2parquet/Makefile @@ -11,12 +11,6 @@ include $(REPOROOT)/transforms/.make.cicd.targets # expected files and is used to define the transform's image name. TRANSFORM_NAME=$(shell basename `pwd`) -# Used as entry point: -# python -m dpk_html2parquet.ray.transform --help -# or -# python -m dpk_html2parquet.transform_python --help -TRANSFORM_RUNTIME_SRC_FILE=-m dpk_$(TRANSFORM_NAME).ray.transform - ################################################################################ # This defines the transforms' version number as would be used # when publishing the wheel. In general, only the micro version @@ -26,31 +20,5 @@ TRANSFORM_RUNTIME_SRC_FILE=-m dpk_$(TRANSFORM_NAME).ray.transform # update version numbers across the transform (e.g., pyproject.toml). #TRANSFORM_VERSION=$(DPK_VERSION) -test-image-default:: image .transforms.test-image-help .transforms.clean - - -.PHONY: workflow-vent -workflow-venv: -# if [ -e kfp_ray ]; then \ -# $(MAKE) -C kfp_ray workflow-venv; \ -# fi - -.PHONY: workflow-test -workflow-test: -# if [ -e kfp_ray ]; then \ -# $(MAKE) -C kfp_ray workflow-test; \ -# fi - -.PHONY: workflow-upload -workflow-upload: -# if [ -e kfp_ray ]; then \ -# $(MAKE) -C kfp_ray workflow-upload; \ -# fi - -.PHONY: workflow-build -workflow-build: -# if [ -e kfp_ray ]; then \ -# $(MAKE) -C kfp_ray workflow-build; \ -# fi diff --git a/transforms/language/html2parquet/kfp_ray/Makefile b/transforms/language/html2parquet/kfp_ray/Makefile index f4cb64903..17613dbf2 100644 --- a/transforms/language/html2parquet/kfp_ray/Makefile +++ b/transforms/language/html2parquet/kfp_ray/Makefile @@ -3,9 +3,10 @@ WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate include $(REPOROOT)/transforms/.make.workflows # Include the common configuration for this transform -include ../transform.config +#include ../transform.config -SRC_DIR=${CURDIR}/../ray/ +SRC_DIR=${CURDIR}/../ +BUILD_SPECIFIC_RUNTIME=ray PYTHON_WF := $(shell find ./ -name '*_wf.py') YAML_WF := $(patsubst %.py, %.yaml, ${PYTHON_WF})