Skip to content

Commit

Permalink
Merge pull request #366 from revit13/resizefix
Browse files Browse the repository at this point in the history
Fix kfp-data-processing tag and resize workflow.
  • Loading branch information
revit13 authored Jul 1, 2024
2 parents 31d073b + 5dee556 commit 0436111
Show file tree
Hide file tree
Showing 6 changed files with 13 additions and 13 deletions.
2 changes: 1 addition & 1 deletion kfp/kfp_ray_components/createRayClusterComponent.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ inputs:

implementation:
container:
image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.0.dev6"
image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1.dev0"
# command is a list of strings (command-line arguments).
# The YAML language has two syntaxes for lists and you can use either of them.
# Here we use the "flow syntax" - comma-separated strings inside square brackets.
Expand Down
2 changes: 1 addition & 1 deletion kfp/kfp_ray_components/deleteRayClusterComponent.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ inputs:

implementation:
container:
image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.0.dev6"
image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1.dev0"
# command is a list of strings (command-line arguments).
# The YAML language has two syntaxes for lists and you can use either of them.
# Here we use the "flow syntax" - comma-separated strings inside square brackets.
Expand Down
2 changes: 1 addition & 1 deletion kfp/kfp_ray_components/executeRayJobComponent.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ inputs:

implementation:
container:
image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.0.dev6"
image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1.dev0"
# command is a list of strings (command-line arguments).
# The YAML language has two syntaxes for lists and you can use either of them.
# Here we use the "flow syntax" - comma-separated strings inside square brackets.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ inputs:

implementation:
container:
image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.0.dev6"
image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1.dev0"
# command is a list of strings (command-line arguments).
# The YAML language has two syntaxes for lists and you can use either of them.
# Here we use the "flow syntax" - comma-separated strings inside square brackets.
Expand Down
2 changes: 1 addition & 1 deletion kfp/kfp_ray_components/executeSubWorkflowComponent.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ outputs:

implementation:
container:
image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.0.dev6"
image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1.dev0"
# command is a list of strings (command-line arguments).
# The YAML language has two syntaxes for lists, and you can use either of them.
# Here we use the "flow syntax" - comma-separated strings inside square brackets.
Expand Down
16 changes: 8 additions & 8 deletions transforms/universal/resize/kfp_ray/resize_wf.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
task_image = "quay.io/dataprep1/data-prep-kit/resize-ray:0.2.1.dev0"

# the name of the job script
EXEC_SCRIPT_NAME: str = "doc_id_transform_ray.py"
EXEC_SCRIPT_NAME: str = "resize_transform_ray.py"
# components
base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1.dev0"

Expand Down Expand Up @@ -95,23 +95,23 @@ def compute_exec_params_func(
# clean up Ray
cleanup_ray_op = comp.load_component_from_file(component_spec_path + "deleteRayClusterComponent.yaml")
# Task name is part of the pipeline name, the ray cluster name and the job name in DMF.
TASK_NAME: str = "doc_id"
TASK_NAME: str = "resize"


@dsl.pipeline(
name=TASK_NAME + "-ray-pipeline",
description="Pipeline for doc_id",
description="Pipeline for resize",
)
def doc_id(
def resize(
# Ray cluster
ray_name: str = "doc_id-kfp-ray", # name of Ray cluster
ray_name: str = "resize-kfp-ray", # name of Ray cluster
# Add image_pull_secret and image_pull_policy to ray workers if needed
ray_head_options: str = '{"cpu": 1, "memory": 4, "image": "' + task_image + '" }',
ray_worker_options: str = '{"replicas": 2, "max_replicas": 2, "min_replicas": 2, "cpu": 2, "memory": 4, '
'"image": "' + task_image + '"}',
server_url: str = "http://kuberay-apiserver-service.kuberay.svc.cluster.local:8888",
# data access
data_s3_config: str = "{'input_folder': 'test/doc_id/input/', 'output_folder': 'test/doc_id/output/'}",
data_s3_config: str = "{'input_folder': 'test/resize/input/', 'output_folder': 'test/resize/output/'}",
data_s3_access_secret: str = "s3-secret",
data_max_files: int = -1,
data_num_samples: int = -1,
Expand All @@ -123,7 +123,7 @@ def doc_id(
runtime_pipeline_id: str = "pipeline_id",
runtime_code_location: str = "{'github': 'github', 'commit_hash': '12345', 'path': 'path'}",
# doc id parameters
resize_max_rows_per_table: int = -1,
resize_max_rows_per_table: int = 20,
resize_max_mbytes_per_table: int = -1,
resize_size_type: str = "disk",
# additional parameters
Expand Down Expand Up @@ -216,4 +216,4 @@ def doc_id(

if __name__ == "__main__":
# Compiling the pipeline
compiler.Compiler().compile(doc_id, __file__.replace(".py", ".yaml"))
compiler.Compiler().compile(resize, __file__.replace(".py", ".yaml"))

0 comments on commit 0436111

Please sign in to comment.