From 89312985da80119473a87abcb14aa9e6da58be52 Mon Sep 17 00:00:00 2001 From: matouma <94904133+matouma@users.noreply.github.com> Date: Sat, 14 Dec 2024 17:38:26 +0100 Subject: [PATCH 1/2] create 0.2.3 release Signed-off-by: matouma <94904133+matouma@users.noreply.github.com> --- .make.versions | 4 ++-- README.md | 4 ++-- data-processing-lib/pyproject.toml | 2 +- kfp/kfp_ray_components/createRayClusterComponent.yaml | 2 +- kfp/kfp_ray_components/deleteRayClusterComponent.yaml | 2 +- kfp/kfp_ray_components/executeRayJobComponent.yaml | 2 +- kfp/kfp_ray_components/executeRayJobComponent_multi_s3.yaml | 2 +- kfp/kfp_ray_components/executeSubWorkflowComponent.yaml | 2 +- kfp/kfp_support_lib/kfp_v1_workflow_support/pyproject.toml | 4 ++-- kfp/kfp_support_lib/kfp_v2_workflow_support/pyproject.toml | 4 ++-- kfp/kfp_support_lib/shared_workflow_support/pyproject.toml | 4 ++-- transforms/code/code2parquet/kfp_ray/code2parquet_wf.py | 2 +- transforms/code/code2parquet/python/pyproject.toml | 2 +- transforms/code/code2parquet/python/requirements.txt | 2 +- transforms/code/code2parquet/ray/pyproject.toml | 6 +++--- transforms/code/code_profiler/python/pyproject.toml | 2 +- transforms/code/code_profiler/python/requirements.txt | 2 +- transforms/code/code_profiler/ray/pyproject.toml | 6 +++--- transforms/code/code_quality/kfp_ray/code_quality_wf.py | 2 +- transforms/code/code_quality/python/pyproject.toml | 2 +- transforms/code/code_quality/python/requirements.txt | 2 +- transforms/code/code_quality/ray/pyproject.toml | 6 +++--- .../code/header_cleanser/kfp_ray/header_cleanser_wf.py | 2 +- transforms/code/header_cleanser/python/pyproject.toml | 2 +- transforms/code/header_cleanser/python/requirements.txt | 2 +- transforms/code/header_cleanser/ray/pyproject.toml | 6 +++--- transforms/code/license_select/kfp_ray/license_select_wf.py | 2 +- transforms/code/license_select/python/pyproject.toml | 2 +- transforms/code/license_select/python/requirements.txt | 2 +- transforms/code/license_select/ray/pyproject.toml | 6 +++--- transforms/code/malware/kfp_ray/malware_wf.py | 2 +- transforms/code/malware/python/pyproject.toml | 4 ++-- transforms/code/malware/ray/pyproject.toml | 6 +++--- .../code/proglang_select/kfp_ray/proglang_select_wf.py | 2 +- transforms/code/proglang_select/python/pyproject.toml | 2 +- transforms/code/proglang_select/python/requirements.txt | 2 +- transforms/code/proglang_select/ray/pyproject.toml | 6 +++--- .../code/repo_level_ordering/kfp_ray/repo_level_order_wf.py | 2 +- transforms/code/repo_level_ordering/ray/pyproject.toml | 4 ++-- .../language/doc_chunk/kfp_ray/doc_chunk_multiple_wf.py | 2 +- transforms/language/doc_chunk/kfp_ray/doc_chunk_wf.py | 2 +- transforms/language/doc_chunk/python/requirements.txt | 2 +- transforms/language/doc_chunk/ray/pyproject.toml | 2 +- .../language/doc_quality/kfp_ray/doc_quality_multiple_wf.py | 2 +- transforms/language/doc_quality/kfp_ray/doc_quality_wf.py | 2 +- transforms/language/doc_quality/python/pyproject.toml | 2 +- transforms/language/doc_quality/python/requirements.txt | 2 +- transforms/language/doc_quality/ray/pyproject.toml | 6 +++--- transforms/language/html2parquet/kfp_ray/html2parquet_wf.py | 2 +- transforms/language/html2parquet/python/pyproject.toml | 2 +- transforms/language/html2parquet/python/requirements.txt | 2 +- transforms/language/html2parquet/ray/pyproject.toml | 2 +- transforms/language/html2parquet/ray/requirements.txt | 4 ++-- transforms/language/lang_id/kfp_ray/lang_id_multiple_wf.py | 2 +- transforms/language/lang_id/kfp_ray/lang_id_wf.py | 2 +- transforms/language/lang_id/python/pyproject.toml | 2 +- transforms/language/lang_id/python/requirements.txt | 2 +- transforms/language/lang_id/ray/pyproject.toml | 6 +++--- .../language/pdf2parquet/kfp_ray/pdf2parquet_multiple_wf.py | 2 +- transforms/language/pdf2parquet/kfp_ray/pdf2parquet_wf.py | 2 +- transforms/language/pdf2parquet/python/requirements.txt | 2 +- transforms/language/pdf2parquet/ray/requirements.txt | 2 +- transforms/language/pii_redactor/python/pyproject.toml | 2 +- transforms/language/pii_redactor/python/requirements.txt | 2 +- transforms/language/pii_redactor/ray/pyproject.toml | 6 +++--- .../text_encoder/kfp_ray/text_encoder_multiple_wf.py | 2 +- transforms/language/text_encoder/kfp_ray/text_encoder_wf.py | 2 +- transforms/language/text_encoder/python/pyproject.toml | 2 +- transforms/language/text_encoder/python/requirements.txt | 2 +- transforms/language/text_encoder/ray/pyproject.toml | 6 +++--- transforms/pyproject.toml | 2 +- transforms/requirements-ray.txt | 2 +- transforms/requirements.txt | 2 +- transforms/universal/doc_id/kfp_ray/doc_id_wf.py | 2 +- transforms/universal/doc_id/python/pyproject.toml | 2 +- transforms/universal/doc_id/python/requirements.txt | 2 +- transforms/universal/doc_id/ray/pyproject.toml | 6 +++--- transforms/universal/doc_id/spark/pyproject.toml | 4 ++-- transforms/universal/ededup/kfp_ray/ededup_wf.py | 2 +- transforms/universal/ededup/python/pyproject.toml | 2 +- transforms/universal/ededup/python/requirements.txt | 2 +- transforms/universal/ededup/ray/pyproject.toml | 6 +++--- transforms/universal/fdedup/kfp_ray/fdedup_wf.py | 2 +- transforms/universal/fdedup/python/pyproject.toml | 2 +- transforms/universal/fdedup/python/requirements.txt | 2 +- transforms/universal/fdedup/ray/pyproject.toml | 2 +- transforms/universal/fdedup/ray/requirements.txt | 4 ++-- transforms/universal/fdedup/spark/pyproject.toml | 2 +- transforms/universal/fdedup/spark/requirements.txt | 4 ++-- transforms/universal/filter/kfp_ray/filter_wf.py | 2 +- transforms/universal/filter/python/pyproject.toml | 2 +- transforms/universal/filter/python/requirements.txt | 2 +- transforms/universal/filter/ray/pyproject.toml | 6 +++--- transforms/universal/filter/spark/pyproject.toml | 4 ++-- transforms/universal/hap/kfp_ray.disable/hap_wf.py | 2 +- transforms/universal/hap/python/pyproject.toml | 2 +- transforms/universal/hap/python/requirements.txt | 2 +- transforms/universal/hap/ray/pyproject.toml | 2 +- transforms/universal/hap/ray/requirements.txt | 4 ++-- transforms/universal/noop/kfp_ray/noop_multiple_wf.py | 2 +- transforms/universal/noop/kfp_ray/noop_wf.py | 2 +- transforms/universal/noop/python/pyproject.toml | 4 ++-- transforms/universal/noop/ray/pyproject.toml | 6 +++--- transforms/universal/noop/spark/pyproject.toml | 6 +++--- transforms/universal/profiler/kfp_ray/profiler_wf.py | 2 +- transforms/universal/profiler/python/pyproject.toml | 2 +- transforms/universal/profiler/python/requirements.txt | 2 +- transforms/universal/profiler/ray/pyproject.toml | 6 +++--- transforms/universal/profiler/spark/pyproject.toml | 6 +++--- transforms/universal/resize/kfp_ray/resize_wf.py | 2 +- transforms/universal/resize/python/pyproject.toml | 2 +- transforms/universal/resize/python/requirements.txt | 2 +- transforms/universal/resize/ray/pyproject.toml | 6 +++--- transforms/universal/resize/spark/pyproject.toml | 6 +++--- .../universal/tokenization/kfp_ray/tokenization_wf.py | 2 +- transforms/universal/tokenization/python/pyproject.toml | 2 +- transforms/universal/tokenization/python/requirements.txt | 2 +- transforms/universal/tokenization/ray/pyproject.toml | 6 +++--- transforms/universal/web2parquet/requirements.txt | 2 +- 119 files changed, 175 insertions(+), 175 deletions(-) diff --git a/.make.versions b/.make.versions index 073328356..4362f9157 100644 --- a/.make.versions +++ b/.make.versions @@ -19,7 +19,7 @@ DPK_MINOR_VERSION=2 DPK_MICRO_VERSION=3 # The suffix is generally always set in the main/development branch and only nulled out when creating release branches. # It can be manually incremented, for example, to allow publishing a new intermediate version wheel to pypi. -DPK_VERSION_SUFFIX=.dev2 +DPK_VERSION_SUFFIX= DPK_VERSION=$(DPK_MAJOR_VERSION).$(DPK_MINOR_VERSION).$(DPK_MICRO_VERSION)$(DPK_VERSION_SUFFIX) @@ -66,4 +66,4 @@ endif # # If you change the versions numbers, be sure to run "make set-versions" to # update version numbers across the transform (e.g., pyproject.toml). -TRANSFORMS_PKG_VERSION=0.2.3.dev3 +TRANSFORMS_PKG_VERSION=0.2.3 diff --git a/README.md b/README.md index 1fa739fdd..038ceb85f 100644 --- a/README.md +++ b/README.md @@ -79,8 +79,8 @@ conda install gxx_linux-64 Next, install the data prep toolkit library. This library installs both the python and ray versions of the transforms. For better management of dependencies, it is recommended to install the same tagged version of both the library and the transform. ```bash -pip3 install 'data-prep-toolkit[ray]==0.2.3.dev0' -pip3 install 'data-prep-toolkit-transforms[ray,all]==0.2.3.dev1' +pip3 install 'data-prep-toolkit[ray]==0.2.3' +pip3 install 'data-prep-toolkit-transforms[all]==0.2.3' pip3 install jupyterlab ipykernel ipywidgets ## install custom kernel diff --git a/data-processing-lib/pyproject.toml b/data-processing-lib/pyproject.toml index 37e04d68a..582eb037d 100644 --- a/data-processing-lib/pyproject.toml +++ b/data-processing-lib/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "data_prep_toolkit" -version = "0.2.3.dev2" +version = "0.2.3" keywords = ["data", "data preprocessing", "data preparation", "llm", "generative", "ai", "fine-tuning", "llmapps" ] requires-python = ">=3.10,<3.13" description = "Data Preparation Toolkit Library for Ray and Python" diff --git a/kfp/kfp_ray_components/createRayClusterComponent.yaml b/kfp/kfp_ray_components/createRayClusterComponent.yaml index 30b0b66d8..429fcdb3e 100644 --- a/kfp/kfp_ray_components/createRayClusterComponent.yaml +++ b/kfp/kfp_ray_components/createRayClusterComponent.yaml @@ -11,7 +11,7 @@ inputs: implementation: container: - image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" + image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.3" # command is a list of strings (command-line arguments). # The YAML language has two syntaxes for lists and you can use either of them. # Here we use the "flow syntax" - comma-separated strings inside square brackets. diff --git a/kfp/kfp_ray_components/deleteRayClusterComponent.yaml b/kfp/kfp_ray_components/deleteRayClusterComponent.yaml index 44e199c47..18db8e9ce 100644 --- a/kfp/kfp_ray_components/deleteRayClusterComponent.yaml +++ b/kfp/kfp_ray_components/deleteRayClusterComponent.yaml @@ -9,7 +9,7 @@ inputs: implementation: container: - image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" + image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.3" # command is a list of strings (command-line arguments). # The YAML language has two syntaxes for lists and you can use either of them. # Here we use the "flow syntax" - comma-separated strings inside square brackets. diff --git a/kfp/kfp_ray_components/executeRayJobComponent.yaml b/kfp/kfp_ray_components/executeRayJobComponent.yaml index 7ab517bff..6d4725105 100644 --- a/kfp/kfp_ray_components/executeRayJobComponent.yaml +++ b/kfp/kfp_ray_components/executeRayJobComponent.yaml @@ -12,7 +12,7 @@ inputs: implementation: container: - image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" + image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.3" # command is a list of strings (command-line arguments). # The YAML language has two syntaxes for lists and you can use either of them. # Here we use the "flow syntax" - comma-separated strings inside square brackets. diff --git a/kfp/kfp_ray_components/executeRayJobComponent_multi_s3.yaml b/kfp/kfp_ray_components/executeRayJobComponent_multi_s3.yaml index 9b98912f0..1e1f3772a 100644 --- a/kfp/kfp_ray_components/executeRayJobComponent_multi_s3.yaml +++ b/kfp/kfp_ray_components/executeRayJobComponent_multi_s3.yaml @@ -13,7 +13,7 @@ inputs: implementation: container: - image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" + image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.3" # command is a list of strings (command-line arguments). # The YAML language has two syntaxes for lists and you can use either of them. # Here we use the "flow syntax" - comma-separated strings inside square brackets. diff --git a/kfp/kfp_ray_components/executeSubWorkflowComponent.yaml b/kfp/kfp_ray_components/executeSubWorkflowComponent.yaml index 6b261a003..ff4f164bf 100644 --- a/kfp/kfp_ray_components/executeSubWorkflowComponent.yaml +++ b/kfp/kfp_ray_components/executeSubWorkflowComponent.yaml @@ -27,7 +27,7 @@ outputs: implementation: container: - image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" + image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.3" # command is a list of strings (command-line arguments). # The YAML language has two syntaxes for lists, and you can use either of them. # Here we use the "flow syntax" - comma-separated strings inside square brackets. diff --git a/kfp/kfp_support_lib/kfp_v1_workflow_support/pyproject.toml b/kfp/kfp_support_lib/kfp_v1_workflow_support/pyproject.toml index 8aea5197e..561884fe4 100644 --- a/kfp/kfp_support_lib/kfp_v1_workflow_support/pyproject.toml +++ b/kfp/kfp_support_lib/kfp_v1_workflow_support/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "data_prep_toolkit_kfp_v1" -version = "0.2.3.dev2" +version = "0.2.3" requires-python = ">=3.10,<3.13" description = "Data Preparation Kit Library. KFP support" license = {text = "Apache-2.0"} @@ -13,7 +13,7 @@ authors = [ ] dependencies = [ "kfp==1.8.22", - "data-prep-toolkit-kfp-shared==0.2.3.dev2", + "data-prep-toolkit-kfp-shared==0.2.3", ] [build-system] diff --git a/kfp/kfp_support_lib/kfp_v2_workflow_support/pyproject.toml b/kfp/kfp_support_lib/kfp_v2_workflow_support/pyproject.toml index 10abfdcd3..a112f7980 100644 --- a/kfp/kfp_support_lib/kfp_v2_workflow_support/pyproject.toml +++ b/kfp/kfp_support_lib/kfp_v2_workflow_support/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "data_prep_toolkit_kfp_v2" -version = "0.2.3.dev2" +version = "0.2.3" requires-python = ">=3.10,<3.13" description = "Data Preparation Kit Library. KFP support" license = {text = "Apache-2.0"} @@ -14,7 +14,7 @@ authors = [ dependencies = [ "kfp==2.8.0", "kfp-kubernetes==1.2.0", - "data-prep-toolkit-kfp-shared==0.2.3.dev2", + "data-prep-toolkit-kfp-shared==0.2.3", ] [build-system] diff --git a/kfp/kfp_support_lib/shared_workflow_support/pyproject.toml b/kfp/kfp_support_lib/shared_workflow_support/pyproject.toml index 0ef8404fd..0feb90e29 100644 --- a/kfp/kfp_support_lib/shared_workflow_support/pyproject.toml +++ b/kfp/kfp_support_lib/shared_workflow_support/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "data_prep_toolkit_kfp_shared" -version = "0.2.3.dev2" +version = "0.2.3" requires-python = ">=3.10,<3.13" description = "Data Preparation Kit Library. KFP support" license = {text = "Apache-2.0"} @@ -14,7 +14,7 @@ authors = [ dependencies = [ "requests", "kubernetes", - "data-prep-toolkit[ray]>=0.2.3.dev2", + "data-prep-toolkit[ray]>=0.2.3", ] [build-system] diff --git a/transforms/code/code2parquet/kfp_ray/code2parquet_wf.py b/transforms/code/code2parquet/kfp_ray/code2parquet_wf.py index f3f491e4b..312fd6580 100644 --- a/transforms/code/code2parquet/kfp_ray/code2parquet_wf.py +++ b/transforms/code/code2parquet/kfp_ray/code2parquet_wf.py @@ -25,7 +25,7 @@ # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.3" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/code/code2parquet/python/pyproject.toml b/transforms/code/code2parquet/python/pyproject.toml index 9fe54599f..ea1b23313 100644 --- a/transforms/code/code2parquet/python/pyproject.toml +++ b/transforms/code/code2parquet/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_code2parquet_transform_python" -version = "0.2.3.dev2" +version = "0.2.3" requires-python = ">=3.10,<3.13" description = "code2parquet Python Transform" license = {text = "Apache-2.0"} diff --git a/transforms/code/code2parquet/python/requirements.txt b/transforms/code/code2parquet/python/requirements.txt index 9949ec266..d871e3142 100644 --- a/transforms/code/code2parquet/python/requirements.txt +++ b/transforms/code/code2parquet/python/requirements.txt @@ -1,3 +1,3 @@ -data-prep-toolkit>=0.2.3.dev2 +data-prep-toolkit>=0.2.3 parameterized pandas diff --git a/transforms/code/code2parquet/ray/pyproject.toml b/transforms/code/code2parquet/ray/pyproject.toml index 1b1edad2c..eb736d2dd 100644 --- a/transforms/code/code2parquet/ray/pyproject.toml +++ b/transforms/code/code2parquet/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_code2parquet_transform_ray" -version = "0.2.3.dev2" +version = "0.2.3" requires-python = ">=3.10,<3.13" description = "code2parquet Ray Transform" license = {text = "Apache-2.0"} @@ -10,8 +10,8 @@ authors = [ { name = "Boris Lublinsky", email = "blublinsky@ibm.com" }, ] dependencies = [ - "data-prep-toolkit[ray]>=0.2.3.dev2", - "dpk-code2parquet-transform-python==0.2.3.dev2", + "data-prep-toolkit[ray]>=0.2.3", + "dpk-code2parquet-transform-python==0.2.3", "parameterized", "pandas", ] diff --git a/transforms/code/code_profiler/python/pyproject.toml b/transforms/code/code_profiler/python/pyproject.toml index 6759dc09e..908160121 100644 --- a/transforms/code/code_profiler/python/pyproject.toml +++ b/transforms/code/code_profiler/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_code_profiler_transform_python" -version = "0.2.3.dev2" +version = "0.2.3" requires-python = ">=3.10,<3.13" description = "Code Profiler Python Transform" license = {text = "Apache-2.0"} diff --git a/transforms/code/code_profiler/python/requirements.txt b/transforms/code/code_profiler/python/requirements.txt index d328f5d67..ee5c4e7f2 100644 --- a/transforms/code/code_profiler/python/requirements.txt +++ b/transforms/code/code_profiler/python/requirements.txt @@ -1,4 +1,4 @@ -data-prep-toolkit>=0.2.3.dev2 +data-prep-toolkit>=0.2.3 parameterized pandas aiolimiter==1.1.0 diff --git a/transforms/code/code_profiler/ray/pyproject.toml b/transforms/code/code_profiler/ray/pyproject.toml index 26fc0acc1..dce39b153 100644 --- a/transforms/code/code_profiler/ray/pyproject.toml +++ b/transforms/code/code_profiler/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_code_profiler_transform_ray" -version = "0.2.3.dev2" +version = "0.2.3" requires-python = ">=3.10,<3.13" description = "Code Profiler Ray Transform" license = {text = "Apache-2.0"} @@ -9,8 +9,8 @@ authors = [ { name = "Pankaj Thorat", email = "pankaj.thorat@ibm.com" }, ] dependencies = [ - "dpk-code-profiler-transform-python==0.2.3.dev2", - "data-prep-toolkit[ray]>=0.2.3.dev2", + "dpk-code-profiler-transform-python==0.2.3", + "data-prep-toolkit[ray]>=0.2.3", ] [build-system] diff --git a/transforms/code/code_quality/kfp_ray/code_quality_wf.py b/transforms/code/code_quality/kfp_ray/code_quality_wf.py index 6a4ccec1b..ce5a5706b 100644 --- a/transforms/code/code_quality/kfp_ray/code_quality_wf.py +++ b/transforms/code/code_quality/kfp_ray/code_quality_wf.py @@ -24,7 +24,7 @@ task_image = "quay.io/dataprep1/data-prep-kit/code_quality-ray:latest" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.3" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/code/code_quality/python/pyproject.toml b/transforms/code/code_quality/python/pyproject.toml index be9ed6ac0..23ae9c635 100644 --- a/transforms/code/code_quality/python/pyproject.toml +++ b/transforms/code/code_quality/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_code_quality_transform_python" -version = "0.2.3.dev2" +version = "0.2.3" requires-python = ">=3.10,<3.13" description = "Code Quality Python Transform" license = {text = "Apache-2.0"} diff --git a/transforms/code/code_quality/python/requirements.txt b/transforms/code/code_quality/python/requirements.txt index b0d954a55..4226758bd 100644 --- a/transforms/code/code_quality/python/requirements.txt +++ b/transforms/code/code_quality/python/requirements.txt @@ -1,3 +1,3 @@ -data-prep-toolkit>=0.2.3.dev2 +data-prep-toolkit>=0.2.3 bs4==0.0.2 transformers==4.38.2 diff --git a/transforms/code/code_quality/ray/pyproject.toml b/transforms/code/code_quality/ray/pyproject.toml index 35e327e33..299014424 100644 --- a/transforms/code/code_quality/ray/pyproject.toml +++ b/transforms/code/code_quality/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_code_quality_transform_ray" -version = "0.2.3.dev2" +version = "0.2.3" requires-python = ">=3.10,<3.13" description = "Code Quality Ray Transform" license = {text = "Apache-2.0"} @@ -9,8 +9,8 @@ authors = [ { name = "Shivdeep Singh", email = "shivdeep.singh@ibm.com" }, ] dependencies = [ - "dpk-code-quality-transform-python==0.2.3.dev2", - "data-prep-toolkit[ray]>=0.2.3.dev2", + "dpk-code-quality-transform-python==0.2.3", + "data-prep-toolkit[ray]>=0.2.3", ] [build-system] diff --git a/transforms/code/header_cleanser/kfp_ray/header_cleanser_wf.py b/transforms/code/header_cleanser/kfp_ray/header_cleanser_wf.py index 9bb315569..06479781c 100644 --- a/transforms/code/header_cleanser/kfp_ray/header_cleanser_wf.py +++ b/transforms/code/header_cleanser/kfp_ray/header_cleanser_wf.py @@ -24,7 +24,7 @@ task_image = "quay.io/dataprep1/data-prep-kit/header_cleanser-ray:latest" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.3" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/code/header_cleanser/python/pyproject.toml b/transforms/code/header_cleanser/python/pyproject.toml index 5f2d5d3cd..cb3bdb9e4 100644 --- a/transforms/code/header_cleanser/python/pyproject.toml +++ b/transforms/code/header_cleanser/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_header_cleanser_transform_python" -version = "0.2.3.dev2" +version = "0.2.3" requires-python = ">=3.10,<3.13" description = "License and Copyright Removal Transform for Python" license = {text = "Apache-2.0"} diff --git a/transforms/code/header_cleanser/python/requirements.txt b/transforms/code/header_cleanser/python/requirements.txt index c64e01db4..da9d63534 100644 --- a/transforms/code/header_cleanser/python/requirements.txt +++ b/transforms/code/header_cleanser/python/requirements.txt @@ -1,3 +1,3 @@ -data-prep-toolkit>=0.2.3.dev2 +data-prep-toolkit>=0.2.3 scancode-toolkit==32.1.0 ; platform_system != 'Darwin' diff --git a/transforms/code/header_cleanser/ray/pyproject.toml b/transforms/code/header_cleanser/ray/pyproject.toml index 8d2b10d44..150e91876 100644 --- a/transforms/code/header_cleanser/ray/pyproject.toml +++ b/transforms/code/header_cleanser/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_header_cleanser_transform_ray" -version = "0.2.3.dev2" +version = "0.2.3" requires-python = ">=3.10,<3.13" description = "License and copyright removal Transform for Ray" license = {text = "Apache-2.0"} @@ -9,8 +9,8 @@ authors = [ { name = "Yash kalathiya", email = "yashkalathiya164@gmail.com" }, ] dependencies = [ - "dpk-header-cleanser-transform-python==0.2.3.dev2", - "data-prep-toolkit[ray]>=0.2.3.dev2", + "dpk-header-cleanser-transform-python==0.2.3", + "data-prep-toolkit[ray]>=0.2.3", "scancode-toolkit==32.1.0", ] diff --git a/transforms/code/license_select/kfp_ray/license_select_wf.py b/transforms/code/license_select/kfp_ray/license_select_wf.py index 7dba0d9d1..ab58fe8bb 100644 --- a/transforms/code/license_select/kfp_ray/license_select_wf.py +++ b/transforms/code/license_select/kfp_ray/license_select_wf.py @@ -25,7 +25,7 @@ # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.3" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/code/license_select/python/pyproject.toml b/transforms/code/license_select/python/pyproject.toml index 23e157817..17bac9393 100644 --- a/transforms/code/license_select/python/pyproject.toml +++ b/transforms/code/license_select/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_license_select_transform_python" -version = "0.2.3.dev2" +version = "0.2.3" requires-python = ">=3.10,<3.13" description = "License Select Python Transform" license = {text = "Apache-2.0"} diff --git a/transforms/code/license_select/python/requirements.txt b/transforms/code/license_select/python/requirements.txt index 0611369d3..013ce9011 100644 --- a/transforms/code/license_select/python/requirements.txt +++ b/transforms/code/license_select/python/requirements.txt @@ -1 +1 @@ -data-prep-toolkit>=0.2.3.dev2 \ No newline at end of file +data-prep-toolkit>=0.2.3 \ No newline at end of file diff --git a/transforms/code/license_select/ray/pyproject.toml b/transforms/code/license_select/ray/pyproject.toml index fb7ac5b28..cb5faa4d4 100644 --- a/transforms/code/license_select/ray/pyproject.toml +++ b/transforms/code/license_select/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_license_select_transform_ray" -version = "0.2.3.dev2" +version = "0.2.3" requires-python = ">=3.10,<3.13" description = "License Select Transform" license = {text = "Apache-2.0"} @@ -10,8 +10,8 @@ authors = [ { name = "Mark Lewis", email = "mark_lewis@uk.ibm.com" }, ] dependencies = [ - "dpk-license-select-transform-python==0.2.3.dev2", - "data-prep-toolkit[ray]>=0.2.3.dev2", + "dpk-license-select-transform-python==0.2.3", + "data-prep-toolkit[ray]>=0.2.3", ] [build-system] diff --git a/transforms/code/malware/kfp_ray/malware_wf.py b/transforms/code/malware/kfp_ray/malware_wf.py index bede80b88..f1b95c341 100644 --- a/transforms/code/malware/kfp_ray/malware_wf.py +++ b/transforms/code/malware/kfp_ray/malware_wf.py @@ -24,7 +24,7 @@ task_image = "quay.io/dataprep1/data-prep-kit/malware-ray:latest" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.3" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/code/malware/python/pyproject.toml b/transforms/code/malware/python/pyproject.toml index af955d09f..a1b1f6b81 100644 --- a/transforms/code/malware/python/pyproject.toml +++ b/transforms/code/malware/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_malware_transform_python" -version = "0.2.3.dev2" +version = "0.2.3" requires-python = ">=3.10,<3.13" description = "Malware Python Transform" license = {text = "Apache-2.0"} @@ -9,7 +9,7 @@ authors = [ { name = "Takuya Goto", email = "tkyg@jp.ibm.com" }, ] dependencies = [ - "data-prep-toolkit>=0.2.3.dev2", + "data-prep-toolkit>=0.2.3", "clamd==1.0.2", ] diff --git a/transforms/code/malware/ray/pyproject.toml b/transforms/code/malware/ray/pyproject.toml index cac960a5c..754b75aa2 100644 --- a/transforms/code/malware/ray/pyproject.toml +++ b/transforms/code/malware/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_malware_transform_ray" -version = "0.2.3.dev2" +version = "0.2.3" requires-python = ">=3.10,<3.13" description = "Malware Ray Transform" license = {text = "Apache-2.0"} @@ -9,8 +9,8 @@ authors = [ { name = "Takuya Goto", email = "tkyg@jp.ibm.com" }, ] dependencies = [ - "dpk-malware-transform-python==0.2.3.dev2", - "data-prep-toolkit[ray]>=0.2.3.dev2", + "dpk-malware-transform-python==0.2.3", + "data-prep-toolkit[ray]>=0.2.3", ] [build-system] diff --git a/transforms/code/proglang_select/kfp_ray/proglang_select_wf.py b/transforms/code/proglang_select/kfp_ray/proglang_select_wf.py index 11f001bfa..e90fe7f11 100644 --- a/transforms/code/proglang_select/kfp_ray/proglang_select_wf.py +++ b/transforms/code/proglang_select/kfp_ray/proglang_select_wf.py @@ -24,7 +24,7 @@ task_image = "quay.io/dataprep1/data-prep-kit/proglang_select-ray:latest" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.3" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/code/proglang_select/python/pyproject.toml b/transforms/code/proglang_select/python/pyproject.toml index 036cdd9ce..73790caf4 100644 --- a/transforms/code/proglang_select/python/pyproject.toml +++ b/transforms/code/proglang_select/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_proglang_select_transform_python" -version = "0.2.3.dev2" +version = "0.2.3" requires-python = ">=3.10,<3.13" description = "Programming Language Selection Python Transform" license = {text = "Apache-2.0"} diff --git a/transforms/code/proglang_select/python/requirements.txt b/transforms/code/proglang_select/python/requirements.txt index 0611369d3..013ce9011 100644 --- a/transforms/code/proglang_select/python/requirements.txt +++ b/transforms/code/proglang_select/python/requirements.txt @@ -1 +1 @@ -data-prep-toolkit>=0.2.3.dev2 \ No newline at end of file +data-prep-toolkit>=0.2.3 \ No newline at end of file diff --git a/transforms/code/proglang_select/ray/pyproject.toml b/transforms/code/proglang_select/ray/pyproject.toml index f6f469784..799aa644b 100644 --- a/transforms/code/proglang_select/ray/pyproject.toml +++ b/transforms/code/proglang_select/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_proglang_select_transform_ray" -version = "0.2.3.dev2" +version = "0.2.3" requires-python = ">=3.10,<3.13" description = "Programming Language Selection Ray Transform" license = {text = "Apache-2.0"} @@ -9,8 +9,8 @@ authors = [ { name = "Shivdeep Singh", email = "shivdeep.singh@ibm.com" }, ] dependencies = [ - "dpk-proglang-select-transform-python==0.2.3.dev2", - "data-prep-toolkit[ray]>=0.2.3.dev2", + "dpk-proglang-select-transform-python==0.2.3", + "data-prep-toolkit[ray]>=0.2.3", ] [build-system] diff --git a/transforms/code/repo_level_ordering/kfp_ray/repo_level_order_wf.py b/transforms/code/repo_level_ordering/kfp_ray/repo_level_order_wf.py index 38a829fab..599d9859a 100644 --- a/transforms/code/repo_level_ordering/kfp_ray/repo_level_order_wf.py +++ b/transforms/code/repo_level_ordering/kfp_ray/repo_level_order_wf.py @@ -24,7 +24,7 @@ EXEC_SCRIPT_NAME: str = "repo_level_order_transform_ray.py" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.3" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/code/repo_level_ordering/ray/pyproject.toml b/transforms/code/repo_level_ordering/ray/pyproject.toml index a4283bdda..5e1989809 100644 --- a/transforms/code/repo_level_ordering/ray/pyproject.toml +++ b/transforms/code/repo_level_ordering/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_repo_level_order_transform_ray" -version = "0.2.3.dev2" +version = "0.2.3" requires-python = ">=3.10,<3.13" description = "repo_level_order Ray Transform" license = {text = "Apache-2.0"} @@ -11,7 +11,7 @@ authors = [ { name = "Shanmukha Guttula", email = "shagutt1@in.ibm.com" }, ] dependencies = [ - "data-prep-toolkit[ray]>=0.2.3.dev2", + "data-prep-toolkit[ray]>=0.2.3", "networkx==3.3", "colorlog==6.8.2", "func-timeout==4.3.5", diff --git a/transforms/language/doc_chunk/kfp_ray/doc_chunk_multiple_wf.py b/transforms/language/doc_chunk/kfp_ray/doc_chunk_multiple_wf.py index 7e30ee8b8..24a655d75 100644 --- a/transforms/language/doc_chunk/kfp_ray/doc_chunk_multiple_wf.py +++ b/transforms/language/doc_chunk/kfp_ray/doc_chunk_multiple_wf.py @@ -23,7 +23,7 @@ EXEC_SCRIPT_NAME: str = "doc_chunk_transform_ray.py" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.3" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/language/doc_chunk/kfp_ray/doc_chunk_wf.py b/transforms/language/doc_chunk/kfp_ray/doc_chunk_wf.py index 387c3bda7..9db94d5f8 100644 --- a/transforms/language/doc_chunk/kfp_ray/doc_chunk_wf.py +++ b/transforms/language/doc_chunk/kfp_ray/doc_chunk_wf.py @@ -23,7 +23,7 @@ EXEC_SCRIPT_NAME: str = "doc_chunk_transform_ray.py" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.3" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/language/doc_chunk/python/requirements.txt b/transforms/language/doc_chunk/python/requirements.txt index 09595b728..4497d92f9 100644 --- a/transforms/language/doc_chunk/python/requirements.txt +++ b/transforms/language/doc_chunk/python/requirements.txt @@ -1,4 +1,4 @@ -data-prep-toolkit>=0.2.3.dev2 +data-prep-toolkit>=0.2.3 docling-core==2.3.0 pydantic>=2.0.0,<2.10.0 llama-index-core>=0.11.22,<0.12.0 diff --git a/transforms/language/doc_chunk/ray/pyproject.toml b/transforms/language/doc_chunk/ray/pyproject.toml index 14681e0de..3b8f9eaad 100644 --- a/transforms/language/doc_chunk/ray/pyproject.toml +++ b/transforms/language/doc_chunk/ray/pyproject.toml @@ -12,7 +12,7 @@ authors = [ ] dependencies = [ "dpk-doc-chunk-transform-python==0.3.0", - "data-prep-toolkit[ray]>=0.2.3.dev2", + "data-prep-toolkit[ray]>=0.2.3", ] [build-system] diff --git a/transforms/language/doc_quality/kfp_ray/doc_quality_multiple_wf.py b/transforms/language/doc_quality/kfp_ray/doc_quality_multiple_wf.py index 436d93ff3..ccfa639ec 100644 --- a/transforms/language/doc_quality/kfp_ray/doc_quality_multiple_wf.py +++ b/transforms/language/doc_quality/kfp_ray/doc_quality_multiple_wf.py @@ -23,7 +23,7 @@ EXEC_SCRIPT_NAME: str = "doc_quality_transform_ray.py" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.3" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/language/doc_quality/kfp_ray/doc_quality_wf.py b/transforms/language/doc_quality/kfp_ray/doc_quality_wf.py index f39fd7e39..0f8026da2 100644 --- a/transforms/language/doc_quality/kfp_ray/doc_quality_wf.py +++ b/transforms/language/doc_quality/kfp_ray/doc_quality_wf.py @@ -23,7 +23,7 @@ EXEC_SCRIPT_NAME: str = "doc_quality_transform_ray.py" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.3" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/language/doc_quality/python/pyproject.toml b/transforms/language/doc_quality/python/pyproject.toml index 784dc6845..1ed744efe 100644 --- a/transforms/language/doc_quality/python/pyproject.toml +++ b/transforms/language/doc_quality/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_doc_quality_transform_python" -version = "0.2.3.dev2" +version = "0.2.3" requires-python = ">=3.10,<3.13" description = "Document Quality Python Transform" license = {text = "Apache-2.0"} diff --git a/transforms/language/doc_quality/python/requirements.txt b/transforms/language/doc_quality/python/requirements.txt index 818bd120e..b02a19d46 100644 --- a/transforms/language/doc_quality/python/requirements.txt +++ b/transforms/language/doc_quality/python/requirements.txt @@ -1,2 +1,2 @@ -data-prep-toolkit>=0.2.3.dev2 +data-prep-toolkit>=0.2.3 diff --git a/transforms/language/doc_quality/ray/pyproject.toml b/transforms/language/doc_quality/ray/pyproject.toml index 998f0ba77..c8df4729b 100644 --- a/transforms/language/doc_quality/ray/pyproject.toml +++ b/transforms/language/doc_quality/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_doc_quality_transform_ray" -version = "0.2.3.dev2" +version = "0.2.3" requires-python = ">=3.10,<3.13" description = "Document Quality Ray Transform" license = {text = "Apache-2.0"} @@ -9,8 +9,8 @@ authors = [ { name = "Daiki Tsuzuku", email = "dtsuzuku@jp.ibm.com" } ] dependencies = [ - "dpk-doc_quality-transform-python==0.2.3.dev2", - "data-prep-toolkit[ray]>=0.2.3.dev2", + "dpk-doc_quality-transform-python==0.2.3", + "data-prep-toolkit[ray]>=0.2.3", ] [build-system] diff --git a/transforms/language/html2parquet/kfp_ray/html2parquet_wf.py b/transforms/language/html2parquet/kfp_ray/html2parquet_wf.py index 4eb8b9de1..cbff06191 100644 --- a/transforms/language/html2parquet/kfp_ray/html2parquet_wf.py +++ b/transforms/language/html2parquet/kfp_ray/html2parquet_wf.py @@ -23,7 +23,7 @@ EXEC_SCRIPT_NAME: str = "html2parquet_transform_ray.py" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.3" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/language/html2parquet/python/pyproject.toml b/transforms/language/html2parquet/python/pyproject.toml index 5d290ba5d..14d097fb4 100644 --- a/transforms/language/html2parquet/python/pyproject.toml +++ b/transforms/language/html2parquet/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_html2parquet_transform_python" -version = "0.2.3.dev2" +version = "0.2.3" requires-python = ">=3.10,<3.13" description = "HTML2PARQUET Python Transform" license = {text = "Apache-2.0"} diff --git a/transforms/language/html2parquet/python/requirements.txt b/transforms/language/html2parquet/python/requirements.txt index 90c996aa3..e5dafa64b 100644 --- a/transforms/language/html2parquet/python/requirements.txt +++ b/transforms/language/html2parquet/python/requirements.txt @@ -1,2 +1,2 @@ -data-prep-toolkit>=0.2.3.dev2 +data-prep-toolkit>=0.2.3 trafilatura==1.12.0 diff --git a/transforms/language/html2parquet/ray/pyproject.toml b/transforms/language/html2parquet/ray/pyproject.toml index a80e0f29d..9a1849105 100644 --- a/transforms/language/html2parquet/ray/pyproject.toml +++ b/transforms/language/html2parquet/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_html2parquet_transform_ray" -version = "0.2.3.dev2" +version = "0.2.3" requires-python = ">=3.10,<3.13" description = "HTML2PARQUET Python Transform" license = {text = "Apache-2.0"} diff --git a/transforms/language/html2parquet/ray/requirements.txt b/transforms/language/html2parquet/ray/requirements.txt index a8745547a..c222f3734 100644 --- a/transforms/language/html2parquet/ray/requirements.txt +++ b/transforms/language/html2parquet/ray/requirements.txt @@ -1,3 +1,3 @@ -dpk-html2parquet-transform-python==0.2.3.dev2 -data-prep-toolkit[ray]>=0.2.3.dev2 +dpk-html2parquet-transform-python==0.2.3 +data-prep-toolkit[ray]>=0.2.3 trafilatura==1.12.0 \ No newline at end of file diff --git a/transforms/language/lang_id/kfp_ray/lang_id_multiple_wf.py b/transforms/language/lang_id/kfp_ray/lang_id_multiple_wf.py index a89c54ab3..4dc9c52a6 100644 --- a/transforms/language/lang_id/kfp_ray/lang_id_multiple_wf.py +++ b/transforms/language/lang_id/kfp_ray/lang_id_multiple_wf.py @@ -23,7 +23,7 @@ EXEC_SCRIPT_NAME: str = "lang_id_transform_ray.py" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.3" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/language/lang_id/kfp_ray/lang_id_wf.py b/transforms/language/lang_id/kfp_ray/lang_id_wf.py index 2ac84645d..22b4ac4ec 100644 --- a/transforms/language/lang_id/kfp_ray/lang_id_wf.py +++ b/transforms/language/lang_id/kfp_ray/lang_id_wf.py @@ -23,7 +23,7 @@ EXEC_SCRIPT_NAME: str = "lang_id_transform_ray.py" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.3" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/language/lang_id/python/pyproject.toml b/transforms/language/lang_id/python/pyproject.toml index 4c9f6c0a9..341e4ab13 100644 --- a/transforms/language/lang_id/python/pyproject.toml +++ b/transforms/language/lang_id/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_lang_id_transform_python" -version = "0.2.3.dev2" +version = "0.2.3" requires-python = ">=3.10,<3.13" description = "Language Identification Python Transform" license = {text = "Apache-2.0"} diff --git a/transforms/language/lang_id/python/requirements.txt b/transforms/language/lang_id/python/requirements.txt index 28d06a1a3..ea00fe33e 100644 --- a/transforms/language/lang_id/python/requirements.txt +++ b/transforms/language/lang_id/python/requirements.txt @@ -1,4 +1,4 @@ -data-prep-toolkit>=0.2.3.dev2 +data-prep-toolkit>=0.2.3 fasttext==0.9.2 langcodes==3.3.0 huggingface-hub >= 0.21.4, <1.0.0 diff --git a/transforms/language/lang_id/ray/pyproject.toml b/transforms/language/lang_id/ray/pyproject.toml index 5c91f124b..7291671a9 100644 --- a/transforms/language/lang_id/ray/pyproject.toml +++ b/transforms/language/lang_id/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_lang_id_transform_ray" -version = "0.2.3.dev2" +version = "0.2.3" requires-python = ">=3.10,<3.13" description = "Language Identification Ray Transform" license = {text = "Apache-2.0"} @@ -9,8 +9,8 @@ authors = [ { name = "Daiki Tsuzuku", email = "dtsuzuku@jp.ibm.com" } ] dependencies = [ - "dpk-lang_id-transform-python==0.2.3.dev2", - "data-prep-toolkit[ray]>=0.2.3.dev2", + "dpk-lang_id-transform-python==0.2.3", + "data-prep-toolkit[ray]>=0.2.3", ] [build-system] diff --git a/transforms/language/pdf2parquet/kfp_ray/pdf2parquet_multiple_wf.py b/transforms/language/pdf2parquet/kfp_ray/pdf2parquet_multiple_wf.py index 8992f1145..e442335fc 100644 --- a/transforms/language/pdf2parquet/kfp_ray/pdf2parquet_multiple_wf.py +++ b/transforms/language/pdf2parquet/kfp_ray/pdf2parquet_multiple_wf.py @@ -23,7 +23,7 @@ EXEC_SCRIPT_NAME: str = "pdf2parquet_transform_ray.py" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.3" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/language/pdf2parquet/kfp_ray/pdf2parquet_wf.py b/transforms/language/pdf2parquet/kfp_ray/pdf2parquet_wf.py index c9cdbf652..fd722197e 100644 --- a/transforms/language/pdf2parquet/kfp_ray/pdf2parquet_wf.py +++ b/transforms/language/pdf2parquet/kfp_ray/pdf2parquet_wf.py @@ -23,7 +23,7 @@ EXEC_SCRIPT_NAME: str = "pdf2parquet_transform_ray.py" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.3" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/language/pdf2parquet/python/requirements.txt b/transforms/language/pdf2parquet/python/requirements.txt index 1b8a9b686..8cc5afc03 100644 --- a/transforms/language/pdf2parquet/python/requirements.txt +++ b/transforms/language/pdf2parquet/python/requirements.txt @@ -1,4 +1,4 @@ -data-prep-toolkit>=0.2.3.dev2 +data-prep-toolkit>=0.2.3 docling-core==2.3.0 docling-ibm-models==2.0.3 deepsearch-glm==0.26.1 diff --git a/transforms/language/pdf2parquet/ray/requirements.txt b/transforms/language/pdf2parquet/ray/requirements.txt index 704ae22d6..84ff463e1 100644 --- a/transforms/language/pdf2parquet/ray/requirements.txt +++ b/transforms/language/pdf2parquet/ray/requirements.txt @@ -1,5 +1,5 @@ dpk-pdf2parquet-transform-python==0.3.0 -data-prep-toolkit[ray]>=0.2.3.dev2 +data-prep-toolkit[ray]>=0.2.3 # docling-core==1.7.2 # docling-ibm-models==2.0.0 # deepsearch-glm==0.22.0 diff --git a/transforms/language/pii_redactor/python/pyproject.toml b/transforms/language/pii_redactor/python/pyproject.toml index 0308d2293..51ee27d95 100644 --- a/transforms/language/pii_redactor/python/pyproject.toml +++ b/transforms/language/pii_redactor/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_pii_redactor_transform_python" -version = "0.2.3.dev2" +version = "0.2.3" requires-python = ">=3.10,<3.13" description = "PII redactor Transform for Python" license = {text = "Apache-2.0"} diff --git a/transforms/language/pii_redactor/python/requirements.txt b/transforms/language/pii_redactor/python/requirements.txt index fa08b97d6..e33a8c166 100644 --- a/transforms/language/pii_redactor/python/requirements.txt +++ b/transforms/language/pii_redactor/python/requirements.txt @@ -1,4 +1,4 @@ -data-prep-toolkit>=0.2.3.dev2 +data-prep-toolkit>=0.2.3 presidio-analyzer>=2.2.355 presidio-anonymizer>=2.2.355 flair>=0.14.0 diff --git a/transforms/language/pii_redactor/ray/pyproject.toml b/transforms/language/pii_redactor/ray/pyproject.toml index 00b5ac21a..dd8ffa316 100644 --- a/transforms/language/pii_redactor/ray/pyproject.toml +++ b/transforms/language/pii_redactor/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_pii_redactor_transform_ray" -version = "0.2.3.dev2" +version = "0.2.3" requires-python = ">=3.10,<3.13" description = "PII Redactor Ray Transform" license = {text = "Apache-2.0"} @@ -10,8 +10,8 @@ authors = [ { name = "Boris Lublinsky", email = "blublinsky@ibm.com" }, ] dependencies = [ - "dpk_pii_redactor_transform_python==0.2.3.dev2", - "data-prep-toolkit[ray]>=0.2.3.dev2", + "dpk_pii_redactor_transform_python==0.2.3", + "data-prep-toolkit[ray]>=0.2.3", "presidio-analyzer>=2.2.355", "presidio-anonymizer>=2.2.355", "flair>=0.14.0", diff --git a/transforms/language/text_encoder/kfp_ray/text_encoder_multiple_wf.py b/transforms/language/text_encoder/kfp_ray/text_encoder_multiple_wf.py index e522737a1..fbff1c4f8 100644 --- a/transforms/language/text_encoder/kfp_ray/text_encoder_multiple_wf.py +++ b/transforms/language/text_encoder/kfp_ray/text_encoder_multiple_wf.py @@ -23,7 +23,7 @@ EXEC_SCRIPT_NAME: str = "text_encoder_transform_ray.py" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.3" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/language/text_encoder/kfp_ray/text_encoder_wf.py b/transforms/language/text_encoder/kfp_ray/text_encoder_wf.py index f88fe9eef..ef9727d5f 100644 --- a/transforms/language/text_encoder/kfp_ray/text_encoder_wf.py +++ b/transforms/language/text_encoder/kfp_ray/text_encoder_wf.py @@ -23,7 +23,7 @@ EXEC_SCRIPT_NAME: str = "text_encoder_transform_ray.py" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.3" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/language/text_encoder/python/pyproject.toml b/transforms/language/text_encoder/python/pyproject.toml index b4cb8dd22..ae2e0ff42 100644 --- a/transforms/language/text_encoder/python/pyproject.toml +++ b/transforms/language/text_encoder/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_text_encoder_transform_python" -version = "0.2.3.dev2" +version = "0.2.3" requires-python = ">=3.10,<3.13" description = "Text Encoder Python Transform" license = {text = "Apache-2.0"} diff --git a/transforms/language/text_encoder/python/requirements.txt b/transforms/language/text_encoder/python/requirements.txt index e469a501c..eb3813354 100644 --- a/transforms/language/text_encoder/python/requirements.txt +++ b/transforms/language/text_encoder/python/requirements.txt @@ -1,2 +1,2 @@ -data-prep-toolkit>=0.2.3.dev2 +data-prep-toolkit>=0.2.3 sentence-transformers==3.0.1 diff --git a/transforms/language/text_encoder/ray/pyproject.toml b/transforms/language/text_encoder/ray/pyproject.toml index 9eb2ae07b..087cbbf9e 100644 --- a/transforms/language/text_encoder/ray/pyproject.toml +++ b/transforms/language/text_encoder/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_text_encoder_transform_ray" -version = "0.2.3.dev2" +version = "0.2.3" requires-python = ">=3.10,<3.13" description = "Text Encoder Ray Transform" license = {text = "Apache-2.0"} @@ -11,8 +11,8 @@ authors = [ { name = "Peter Staar", email = "taa@zurich.ibm.com" }, ] dependencies = [ - "dpk-text_encoder-transform-python==0.2.3.dev2", - "data-prep-toolkit[ray]>=0.2.3.dev2", + "dpk-text_encoder-transform-python==0.2.3", + "data-prep-toolkit[ray]>=0.2.3", ] [build-system] diff --git a/transforms/pyproject.toml b/transforms/pyproject.toml index 46f5134e1..b90216f96 100644 --- a/transforms/pyproject.toml +++ b/transforms/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "data_prep_toolkit_transforms" -version = "0.2.3.dev3" +version = "0.2.3" requires-python = ">=3.10,<3.13" keywords = ["transforms", "data preprocessing", "data preparation", "llm", "generative", "ai", "fine-tuning", "llmapps" ] description = "Data Preparation Toolkit Transforms using Ray" diff --git a/transforms/requirements-ray.txt b/transforms/requirements-ray.txt index f7778def5..517d039d4 100644 --- a/transforms/requirements-ray.txt +++ b/transforms/requirements-ray.txt @@ -1,4 +1,4 @@ -data-prep-toolkit[ray]>=0.2.3.dev2 +data-prep-toolkit[ray]>=0.2.3 networkx==3.3 colorlog==6.8.2 func-timeout==4.3.5 diff --git a/transforms/requirements.txt b/transforms/requirements.txt index d42f58b88..fc37b9fc0 100644 --- a/transforms/requirements.txt +++ b/transforms/requirements.txt @@ -1 +1 @@ -data-prep-toolkit>=0.2.3.dev2 +data-prep-toolkit>=0.2.3 diff --git a/transforms/universal/doc_id/kfp_ray/doc_id_wf.py b/transforms/universal/doc_id/kfp_ray/doc_id_wf.py index f41231159..83e7947c4 100644 --- a/transforms/universal/doc_id/kfp_ray/doc_id_wf.py +++ b/transforms/universal/doc_id/kfp_ray/doc_id_wf.py @@ -22,7 +22,7 @@ # the name of the job script EXEC_SCRIPT_NAME: str = "doc_id_transform_ray.py" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.3" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/universal/doc_id/python/pyproject.toml b/transforms/universal/doc_id/python/pyproject.toml index 338289932..3dcb794b6 100644 --- a/transforms/universal/doc_id/python/pyproject.toml +++ b/transforms/universal/doc_id/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_doc_id_transform_python" -version = "0.2.3.dev2" +version = "0.2.3" requires-python = ">=3.10,<3.13" description = "ededup Python Transform" license = {text = "Apache-2.0"} diff --git a/transforms/universal/doc_id/python/requirements.txt b/transforms/universal/doc_id/python/requirements.txt index 0611369d3..013ce9011 100644 --- a/transforms/universal/doc_id/python/requirements.txt +++ b/transforms/universal/doc_id/python/requirements.txt @@ -1 +1 @@ -data-prep-toolkit>=0.2.3.dev2 \ No newline at end of file +data-prep-toolkit>=0.2.3 \ No newline at end of file diff --git a/transforms/universal/doc_id/ray/pyproject.toml b/transforms/universal/doc_id/ray/pyproject.toml index 8e886b6bd..e0705b6ba 100644 --- a/transforms/universal/doc_id/ray/pyproject.toml +++ b/transforms/universal/doc_id/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_doc_id_transform_ray" -version = "0.2.3.dev2" +version = "0.2.3" requires-python = ">=3.10,<3.13" description = "docid Ray Transform" license = {text = "Apache-2.0"} @@ -10,8 +10,8 @@ authors = [ { name = "Boris Lublinsky", email = "blublinsk@ibm.com" }, ] dependencies = [ - "dpk_doc_id_transform_python==0.2.3.dev2", - "data-prep-toolkit[ray]>=0.2.3.dev2", + "dpk_doc_id_transform_python==0.2.3", + "data-prep-toolkit[ray]>=0.2.3", ] [build-system] diff --git a/transforms/universal/doc_id/spark/pyproject.toml b/transforms/universal/doc_id/spark/pyproject.toml index 43dc0eb13..7d7b12d46 100644 --- a/transforms/universal/doc_id/spark/pyproject.toml +++ b/transforms/universal/doc_id/spark/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_doc_id_transform_spark" -version = "0.2.3.dev2" +version = "0.2.3" requires-python = ">=3.10,<3.13" description = "Doc ID Spark Transform" license = {text = "Apache-2.0"} @@ -10,7 +10,7 @@ authors = [ { name = "Boris Lublinsky", email = "blublinsk@ibm.com" }, ] dependencies = [ - "data-prep-toolkit[spark]>=0.2.3.dev2", + "data-prep-toolkit[spark]>=0.2.3", ] [build-system] diff --git a/transforms/universal/ededup/kfp_ray/ededup_wf.py b/transforms/universal/ededup/kfp_ray/ededup_wf.py index ab46daadb..288bbda69 100644 --- a/transforms/universal/ededup/kfp_ray/ededup_wf.py +++ b/transforms/universal/ededup/kfp_ray/ededup_wf.py @@ -24,7 +24,7 @@ EXEC_SCRIPT_NAME: str = "ededup_transform_ray.py" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.3" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/universal/ededup/python/pyproject.toml b/transforms/universal/ededup/python/pyproject.toml index f3791d583..4345723db 100644 --- a/transforms/universal/ededup/python/pyproject.toml +++ b/transforms/universal/ededup/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_ededup_transform_python" -version = "0.2.3.dev2" +version = "0.2.3" requires-python = ">=3.10,<3.13" description = "ededup Python Transform" license = {text = "Apache-2.0"} diff --git a/transforms/universal/ededup/python/requirements.txt b/transforms/universal/ededup/python/requirements.txt index 4d3289259..99fe74aad 100644 --- a/transforms/universal/ededup/python/requirements.txt +++ b/transforms/universal/ededup/python/requirements.txt @@ -1,3 +1,3 @@ -data-prep-toolkit>=0.2.3.dev2 +data-prep-toolkit>=0.2.3 mmh3>=4.1.0 xxhash==3.4.1 diff --git a/transforms/universal/ededup/ray/pyproject.toml b/transforms/universal/ededup/ray/pyproject.toml index 320e3dd32..a176c9db6 100644 --- a/transforms/universal/ededup/ray/pyproject.toml +++ b/transforms/universal/ededup/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_ededup_transform_ray" -version = "0.2.3.dev2" +version = "0.2.3" requires-python = ">=3.10,<3.13" description = "ededup Ray Transform" license = {text = "Apache-2.0"} @@ -10,8 +10,8 @@ authors = [ { name = "Boris Lublinsky", email = "blublinsky@ibm.com" }, ] dependencies = [ - "data-prep-toolkit[ray]>=0.2.3.dev2", - "dpk_ededup_transform_python==0.2.3.dev2", + "data-prep-toolkit[ray]>=0.2.3", + "dpk_ededup_transform_python==0.2.3", "tqdm==4.66.3", ] diff --git a/transforms/universal/fdedup/kfp_ray/fdedup_wf.py b/transforms/universal/fdedup/kfp_ray/fdedup_wf.py index ffc6f79bc..f0b2901c7 100644 --- a/transforms/universal/fdedup/kfp_ray/fdedup_wf.py +++ b/transforms/universal/fdedup/kfp_ray/fdedup_wf.py @@ -34,7 +34,7 @@ DATA_CLEANING_EXEC_SCRIPT_NAME: str = "data_cleaning_transform_ray.py" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.3" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/universal/fdedup/python/pyproject.toml b/transforms/universal/fdedup/python/pyproject.toml index b290e92a1..59831a9fb 100644 --- a/transforms/universal/fdedup/python/pyproject.toml +++ b/transforms/universal/fdedup/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_fdedup_transform_python" -version = "0.2.3.dev2" +version = "0.2.3" requires-python = ">=3.10,<3.13" description = "Fuzzy Dedup Transform for Python" license = {text = "Apache-2.0"} diff --git a/transforms/universal/fdedup/python/requirements.txt b/transforms/universal/fdedup/python/requirements.txt index d8488c831..3d91e5ba4 100644 --- a/transforms/universal/fdedup/python/requirements.txt +++ b/transforms/universal/fdedup/python/requirements.txt @@ -1,4 +1,4 @@ -data-prep-toolkit>=0.2.3.dev2 +data-prep-toolkit>=0.2.3 pyyaml>=6.0.2 boto3>=1.34.69 kubernetes>=30.1.0 diff --git a/transforms/universal/fdedup/ray/pyproject.toml b/transforms/universal/fdedup/ray/pyproject.toml index b114a4ac9..279ccd9eb 100644 --- a/transforms/universal/fdedup/ray/pyproject.toml +++ b/transforms/universal/fdedup/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_fdedup_transform_ray" -version = "0.2.3.dev2" +version = "0.2.3" requires-python = ">=3.10,<3.13" description = "fdedup Ray Transform" license = {text = "Apache-2.0"} diff --git a/transforms/universal/fdedup/ray/requirements.txt b/transforms/universal/fdedup/ray/requirements.txt index cd9ba25d3..0c45d9dab 100644 --- a/transforms/universal/fdedup/ray/requirements.txt +++ b/transforms/universal/fdedup/ray/requirements.txt @@ -1,5 +1,5 @@ -data-prep-toolkit[ray]>=0.2.3.dev2 -dpk_fdedup_transform_python==0.2.3.dev2 +data-prep-toolkit[ray]>=0.2.3 +dpk_fdedup_transform_python==0.2.3 mmh3>=4.1.0 xxhash==3.4.1 tqdm==4.66.3 diff --git a/transforms/universal/fdedup/spark/pyproject.toml b/transforms/universal/fdedup/spark/pyproject.toml index 478fd40db..288b5a896 100644 --- a/transforms/universal/fdedup/spark/pyproject.toml +++ b/transforms/universal/fdedup/spark/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_fdedup_transform_spark" -version = "0.2.3.dev2" +version = "0.2.3" requires-python = ">=3.10,<3.13" description = "Fuzzy Dedup Spark Transform" license = {text = "Apache-2.0"} diff --git a/transforms/universal/fdedup/spark/requirements.txt b/transforms/universal/fdedup/spark/requirements.txt index e631c9536..d29315043 100644 --- a/transforms/universal/fdedup/spark/requirements.txt +++ b/transforms/universal/fdedup/spark/requirements.txt @@ -1,5 +1,5 @@ -dpk_fdedup_transform_python==0.2.3.dev2 -data-prep-toolkit[spark]>=0.2.3.dev2 +dpk_fdedup_transform_python==0.2.3 +data-prep-toolkit[spark]>=0.2.3 pyyaml>=6.0.2 boto3>=1.34.69 kubernetes>=30.1.0 diff --git a/transforms/universal/filter/kfp_ray/filter_wf.py b/transforms/universal/filter/kfp_ray/filter_wf.py index b856b1007..926045ba7 100644 --- a/transforms/universal/filter/kfp_ray/filter_wf.py +++ b/transforms/universal/filter/kfp_ray/filter_wf.py @@ -24,7 +24,7 @@ task_image = "quay.io/dataprep1/data-prep-kit/filter-ray:latest" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.3" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/universal/filter/python/pyproject.toml b/transforms/universal/filter/python/pyproject.toml index 371f7775b..4851c8092 100644 --- a/transforms/universal/filter/python/pyproject.toml +++ b/transforms/universal/filter/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_filter_transform_python" -version = "0.2.3.dev2" +version = "0.2.3" requires-python = ">=3.10,<3.13" description = "Filter Transform for Python" license = {text = "Apache-2.0"} diff --git a/transforms/universal/filter/python/requirements.txt b/transforms/universal/filter/python/requirements.txt index c83f90bd4..c298deafd 100644 --- a/transforms/universal/filter/python/requirements.txt +++ b/transforms/universal/filter/python/requirements.txt @@ -1,3 +1,3 @@ -data-prep-toolkit>=0.2.3.dev2 +data-prep-toolkit>=0.2.3 duckdb>=0.10.1 diff --git a/transforms/universal/filter/ray/pyproject.toml b/transforms/universal/filter/ray/pyproject.toml index 5a908e792..aa7883daa 100644 --- a/transforms/universal/filter/ray/pyproject.toml +++ b/transforms/universal/filter/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_filter_transform_ray" -version = "0.2.3.dev2" +version = "0.2.3" requires-python = ">=3.10,<3.13" description = "Filter Transform for Ray" license = {text = "Apache-2.0"} @@ -9,8 +9,8 @@ authors = [ { name = "Constantin Adam", email = "cmadam@us.ibm.com" }, ] dependencies = [ - "dpk-filter-transform-python==0.2.3.dev2", - "data-prep-toolkit[ray]>=0.2.3.dev2", + "dpk-filter-transform-python==0.2.3", + "data-prep-toolkit[ray]>=0.2.3", ] [build-system] diff --git a/transforms/universal/filter/spark/pyproject.toml b/transforms/universal/filter/spark/pyproject.toml index cb2b3d929..f669da495 100644 --- a/transforms/universal/filter/spark/pyproject.toml +++ b/transforms/universal/filter/spark/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_filter_transform_spark" -version = "0.2.3.dev2" +version = "0.2.3" requires-python = ">=3.10,<3.13" description = "Filter Spark Transform" license = {text = "Apache-2.0"} @@ -9,7 +9,7 @@ authors = [ { name = "Boris Lublinsky", email = "blublinsk@ibm.com" }, ] dependencies = [ - "data-prep-toolkit[spark]>=0.2.3.dev2", + "data-prep-toolkit[spark]>=0.2.3", ] [project.optional-dependencies] diff --git a/transforms/universal/hap/kfp_ray.disable/hap_wf.py b/transforms/universal/hap/kfp_ray.disable/hap_wf.py index 786011d4d..5b1cde94f 100644 --- a/transforms/universal/hap/kfp_ray.disable/hap_wf.py +++ b/transforms/universal/hap/kfp_ray.disable/hap_wf.py @@ -23,7 +23,7 @@ EXEC_SCRIPT_NAME: str = "hap_transform_ray.py" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.3" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/universal/hap/python/pyproject.toml b/transforms/universal/hap/python/pyproject.toml index 7a6104ecb..bdc857e8e 100644 --- a/transforms/universal/hap/python/pyproject.toml +++ b/transforms/universal/hap/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_hap_transform_python" -version = "0.2.3.dev2" +version = "0.2.3" requires-python = ">=3.10,<3.13" description = "HAP Python Transform" license = {text = "Apache-2.0"} diff --git a/transforms/universal/hap/python/requirements.txt b/transforms/universal/hap/python/requirements.txt index f79ddd1f4..b40c1d94e 100644 --- a/transforms/universal/hap/python/requirements.txt +++ b/transforms/universal/hap/python/requirements.txt @@ -1,4 +1,4 @@ -data-prep-toolkit>=0.2.3.dev2 +data-prep-toolkit>=0.2.3 nltk==3.9.1 transformers==4.38.2 torch>=2.2.2,<=2.4.1 diff --git a/transforms/universal/hap/ray/pyproject.toml b/transforms/universal/hap/ray/pyproject.toml index f023a6c9d..07b3b416e 100644 --- a/transforms/universal/hap/ray/pyproject.toml +++ b/transforms/universal/hap/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_hap_transform_ray" -version = "0.2.3.dev2" +version = "0.2.3" requires-python = ">=3.10,<3.13" description = "HAP Ray Transform" license = {text = "Apache-2.0"} diff --git a/transforms/universal/hap/ray/requirements.txt b/transforms/universal/hap/ray/requirements.txt index 3b1aab085..8c7963713 100644 --- a/transforms/universal/hap/ray/requirements.txt +++ b/transforms/universal/hap/ray/requirements.txt @@ -1,5 +1,5 @@ -data-prep-toolkit[ray]>=0.2.3.dev2 -dpk-hap-transform-python==0.2.3.dev2 +data-prep-toolkit[ray]>=0.2.3 +dpk-hap-transform-python==0.2.3 nltk==3.9.1 transformers==4.38.2 torch>=2.2.2,<=2.4.1 diff --git a/transforms/universal/noop/kfp_ray/noop_multiple_wf.py b/transforms/universal/noop/kfp_ray/noop_multiple_wf.py index 3b102d205..473b44bca 100644 --- a/transforms/universal/noop/kfp_ray/noop_multiple_wf.py +++ b/transforms/universal/noop/kfp_ray/noop_multiple_wf.py @@ -23,7 +23,7 @@ EXEC_SCRIPT_NAME: str = "noop_transform_ray.py" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.3" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/universal/noop/kfp_ray/noop_wf.py b/transforms/universal/noop/kfp_ray/noop_wf.py index e8125328b..69d815217 100644 --- a/transforms/universal/noop/kfp_ray/noop_wf.py +++ b/transforms/universal/noop/kfp_ray/noop_wf.py @@ -24,7 +24,7 @@ EXEC_SCRIPT_NAME: str = "noop_transform_ray.py" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.3" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/universal/noop/python/pyproject.toml b/transforms/universal/noop/python/pyproject.toml index 69f770157..79fc36f3f 100644 --- a/transforms/universal/noop/python/pyproject.toml +++ b/transforms/universal/noop/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_noop_transform_python" -version = "0.2.3.dev2" +version = "0.2.3" requires-python = ">=3.10,<3.13" description = "NOOP Python Transform" license = {text = "Apache-2.0"} @@ -10,7 +10,7 @@ authors = [ { name = "Boris Lublinsky", email = "blublinsky@ibm.com" }, ] dependencies = [ - "data-prep-toolkit>=0.2.3.dev2", + "data-prep-toolkit>=0.2.3", ] [build-system] diff --git a/transforms/universal/noop/ray/pyproject.toml b/transforms/universal/noop/ray/pyproject.toml index 161f5dbed..c061a9c9d 100644 --- a/transforms/universal/noop/ray/pyproject.toml +++ b/transforms/universal/noop/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_noop_transform_ray" -version = "0.2.3.dev2" +version = "0.2.3" requires-python = ">=3.10,<3.13" description = "NOOP Ray Transform" license = {text = "Apache-2.0"} @@ -10,8 +10,8 @@ authors = [ { name = "Boris Lublinsky", email = "blublinsky@ibm.com" }, ] dependencies = [ - "dpk-noop-transform-python==0.2.3.dev2", - "data-prep-toolkit[ray]>=0.2.3.dev2", + "dpk-noop-transform-python==0.2.3", + "data-prep-toolkit[ray]>=0.2.3", ] [build-system] diff --git a/transforms/universal/noop/spark/pyproject.toml b/transforms/universal/noop/spark/pyproject.toml index 28d689696..ea3b5b7ea 100644 --- a/transforms/universal/noop/spark/pyproject.toml +++ b/transforms/universal/noop/spark/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_noop_transform_spark" -version = "0.2.3.dev2" +version = "0.2.3" requires-python = ">=3.10,<3.13" description = "NOOP Spark Transform" license = {text = "Apache-2.0"} @@ -10,8 +10,8 @@ authors = [ { name = "Boris Lublinsky", email = "blublinsk@ibm.com" }, ] dependencies = [ - "dpk-noop-transform-python==0.2.3.dev2", - "data-prep-toolkit[spark]>=0.2.3.dev2", + "dpk-noop-transform-python==0.2.3", + "data-prep-toolkit[spark]>=0.2.3", ] [build-system] diff --git a/transforms/universal/profiler/kfp_ray/profiler_wf.py b/transforms/universal/profiler/kfp_ray/profiler_wf.py index 914637895..5e4b52cee 100644 --- a/transforms/universal/profiler/kfp_ray/profiler_wf.py +++ b/transforms/universal/profiler/kfp_ray/profiler_wf.py @@ -24,7 +24,7 @@ EXEC_SCRIPT_NAME: str = "profiler_transform_ray.py" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.3" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/universal/profiler/python/pyproject.toml b/transforms/universal/profiler/python/pyproject.toml index c1cdd6bba..531cc8507 100644 --- a/transforms/universal/profiler/python/pyproject.toml +++ b/transforms/universal/profiler/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_profiler_transform_python" -version = "0.2.3.dev2" +version = "0.2.3" requires-python = ">=3.10,<3.13" description = "profiler Python Transform" license = {text = "Apache-2.0"} diff --git a/transforms/universal/profiler/python/requirements.txt b/transforms/universal/profiler/python/requirements.txt index f6ee9ad5c..c9c874ffe 100644 --- a/transforms/universal/profiler/python/requirements.txt +++ b/transforms/universal/profiler/python/requirements.txt @@ -1,5 +1,5 @@ -data-prep-toolkit>=0.2.3.dev2 +data-prep-toolkit>=0.2.3 mmh3==4.1.0 xxhash==3.4.1 diff --git a/transforms/universal/profiler/ray/pyproject.toml b/transforms/universal/profiler/ray/pyproject.toml index f33ebebcd..19b86624b 100644 --- a/transforms/universal/profiler/ray/pyproject.toml +++ b/transforms/universal/profiler/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_profiler_transform_ray" -version = "0.2.3.dev2" +version = "0.2.3" requires-python = ">=3.10,<3.13" description = "profiler Ray Transform" license = {text = "Apache-2.0"} @@ -9,8 +9,8 @@ authors = [ { name = "Boris Lublinsky", email = "blublinsky@ibm.com" }, ] dependencies = [ - "data-prep-toolkit[ray]>=0.2.3.dev2", - "dpk_profiler_transform_python==0.2.3.dev2", + "data-prep-toolkit[ray]>=0.2.3", + "dpk_profiler_transform_python==0.2.3", "tqdm==4.66.3", ] diff --git a/transforms/universal/profiler/spark/pyproject.toml b/transforms/universal/profiler/spark/pyproject.toml index f96405d25..5f7e674b9 100644 --- a/transforms/universal/profiler/spark/pyproject.toml +++ b/transforms/universal/profiler/spark/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_profiler_transform_spark" -version = "0.2.3.dev2" +version = "0.2.3" requires-python = ">=3.10,<3.13" description = "Profiler Spark Transform" license = {text = "Apache-2.0"} @@ -9,8 +9,8 @@ authors = [ { name = "Boris Lublinsky", email = "blublinsk@ibm.com" }, ] dependencies = [ - "dpk-profiler-transform-python==0.2.3.dev2", - "data-prep-toolkit[spark]>=0.2.3.dev2", + "dpk-profiler-transform-python==0.2.3", + "data-prep-toolkit[spark]>=0.2.3", ] [build-system] diff --git a/transforms/universal/resize/kfp_ray/resize_wf.py b/transforms/universal/resize/kfp_ray/resize_wf.py index 0724ed731..583e0605c 100644 --- a/transforms/universal/resize/kfp_ray/resize_wf.py +++ b/transforms/universal/resize/kfp_ray/resize_wf.py @@ -22,7 +22,7 @@ # the name of the job script EXEC_SCRIPT_NAME: str = "resize_transform_ray.py" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.3" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/universal/resize/python/pyproject.toml b/transforms/universal/resize/python/pyproject.toml index c0da29b30..de53c77ec 100644 --- a/transforms/universal/resize/python/pyproject.toml +++ b/transforms/universal/resize/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_resize_transform_python" -version = "0.2.3.dev2" +version = "0.2.3" requires-python = ">=3.10,<3.13" description = "resize Python Transform" license = {text = "Apache-2.0"} diff --git a/transforms/universal/resize/python/requirements.txt b/transforms/universal/resize/python/requirements.txt index 0611369d3..013ce9011 100644 --- a/transforms/universal/resize/python/requirements.txt +++ b/transforms/universal/resize/python/requirements.txt @@ -1 +1 @@ -data-prep-toolkit>=0.2.3.dev2 \ No newline at end of file +data-prep-toolkit>=0.2.3 \ No newline at end of file diff --git a/transforms/universal/resize/ray/pyproject.toml b/transforms/universal/resize/ray/pyproject.toml index c8ae29162..f987151e3 100644 --- a/transforms/universal/resize/ray/pyproject.toml +++ b/transforms/universal/resize/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_resize_transform_ray" -version = "0.2.3.dev2" +version = "0.2.3" requires-python = ">=3.10,<3.13" description = "Resize Ray Transform" license = {text = "Apache-2.0"} @@ -10,8 +10,8 @@ authors = [ { name = "Boris Lublinsky", email = "blublinsky@ibm.com" }, ] dependencies = [ - "dpk-resize-transform-python==0.2.3.dev2", - "data-prep-toolkit[ray]>=0.2.3.dev2", + "dpk-resize-transform-python==0.2.3", + "data-prep-toolkit[ray]>=0.2.3", ] [build-system] diff --git a/transforms/universal/resize/spark/pyproject.toml b/transforms/universal/resize/spark/pyproject.toml index 121739e7b..f8598f164 100644 --- a/transforms/universal/resize/spark/pyproject.toml +++ b/transforms/universal/resize/spark/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_resize_transform_spark" -version = "0.2.3.dev2" +version = "0.2.3" requires-python = ">=3.10,<3.13" description = "Resize Spark Transform" license = {text = "Apache-2.0"} @@ -10,8 +10,8 @@ authors = [ { name = "Boris Lublinsky", email = "blublinsk@ibm.com" }, ] dependencies = [ - "dpk-resize-transform-python==0.2.3.dev2", - "data-prep-toolkit[spark]>=0.2.3.dev2", + "dpk-resize-transform-python==0.2.3", + "data-prep-toolkit[spark]>=0.2.3", ] [build-system] diff --git a/transforms/universal/tokenization/kfp_ray/tokenization_wf.py b/transforms/universal/tokenization/kfp_ray/tokenization_wf.py index c131d11ea..5663a7ad3 100644 --- a/transforms/universal/tokenization/kfp_ray/tokenization_wf.py +++ b/transforms/universal/tokenization/kfp_ray/tokenization_wf.py @@ -23,7 +23,7 @@ task_image = "quay.io/dataprep1/data-prep-kit/tokenization-ray:latest" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.3" # path to kfp component specifications files # path to kfp component specifications files diff --git a/transforms/universal/tokenization/python/pyproject.toml b/transforms/universal/tokenization/python/pyproject.toml index fb9ee0c48..b3cdda1f0 100644 --- a/transforms/universal/tokenization/python/pyproject.toml +++ b/transforms/universal/tokenization/python/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "dpk_tokenization_transform_python" keywords = ["tokenizer", "data", "data preprocessing", "data preparation", "llm", "generative", "ai", "fine-tuning", "llmapps" ] -version = "0.2.3.dev2" +version = "0.2.3" requires-python = ">=3.10,<3.13" description = "Tokenization Transform for Python" license = {text = "Apache-2.0"} diff --git a/transforms/universal/tokenization/python/requirements.txt b/transforms/universal/tokenization/python/requirements.txt index 57ac9b547..56e81f87c 100644 --- a/transforms/universal/tokenization/python/requirements.txt +++ b/transforms/universal/tokenization/python/requirements.txt @@ -1,2 +1,2 @@ -data-prep-toolkit>=0.2.3.dev2 +data-prep-toolkit>=0.2.3 transformers==4.38.2 diff --git a/transforms/universal/tokenization/ray/pyproject.toml b/transforms/universal/tokenization/ray/pyproject.toml index 0829e002c..e7c2d4fd5 100644 --- a/transforms/universal/tokenization/ray/pyproject.toml +++ b/transforms/universal/tokenization/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_tokenization_transform_ray" -version = "0.2.3.dev2" +version = "0.2.3" requires-python = ">=3.10,<3.13" description = "Tokenization Transform for Ray" license = {text = "Apache-2.0"} @@ -9,8 +9,8 @@ authors = [ { name = "Xuan-Hong Dang", email = "xuan-hong.dang@ibm.com"}, ] dependencies = [ - "dpk-tokenization-transform-python==0.2.3.dev2", - "data-prep-toolkit[ray]>=0.2.3.dev2", + "dpk-tokenization-transform-python==0.2.3", + "data-prep-toolkit[ray]>=0.2.3", ] [build-system] diff --git a/transforms/universal/web2parquet/requirements.txt b/transforms/universal/web2parquet/requirements.txt index 586d0be24..be7eaacca 100644 --- a/transforms/universal/web2parquet/requirements.txt +++ b/transforms/universal/web2parquet/requirements.txt @@ -1,2 +1,2 @@ -data-prep-toolkit>=0.2.3.dev2 +data-prep-toolkit>=0.2.3 data_prep_connector>=0.2.3 \ No newline at end of file From 4fb2f8a8683bdc762585d92f3c2e0eda6937d76b Mon Sep 17 00:00:00 2001 From: matouma <94904133+matouma@users.noreply.github.com> Date: Sat, 14 Dec 2024 18:19:03 +0100 Subject: [PATCH 2/2] updated release notes Signed-off-by: matouma <94904133+matouma@users.noreply.github.com> --- release-notes.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/release-notes.md b/release-notes.md index 4b7b8d553..097bfbaa5 100644 --- a/release-notes.md +++ b/release-notes.md @@ -1,5 +1,21 @@ # Data Prep Kit Release notes +## Release 0.2.3 - 12/15/2024 + +## General + +New algorithm for Fuzzy dedup transform +Sample notebooks for some of the language transforms +Integrate Semantic profiler and report generation for code profiler transform + +### data-prep-toolkit libraries (python, ray, spark) + +1. Increase ray agent limit to 10,000 (default was 100) + +### Transforms + +1. Fuzzy dedup new algorithm for Python, Ray and Spark + ## Release 0.2.2 - 11/25/2024 ### General