From c9fa99b557919e5ea5e34e01e07c2e2db5c7e5ea Mon Sep 17 00:00:00 2001 From: Revital Sur Date: Sun, 13 Oct 2024 19:13:15 +0300 Subject: [PATCH] Add data_checkpinting. Signed-off-by: Revital Sur --- kfp/pipeline_generator/single-pipeline/README.md | 2 +- .../single-pipeline/templates/simple_pipeline.py | 3 +++ transforms/language/html2parquet/kfp_ray/html2parquet_wf.py | 3 +++ .../language/html2parquet/kfp_ray/pipeline_definitions.yaml | 1 + 4 files changed, 8 insertions(+), 1 deletion(-) diff --git a/kfp/pipeline_generator/single-pipeline/README.md b/kfp/pipeline_generator/single-pipeline/README.md index b9dbf363e..03a2f1e57 100644 --- a/kfp/pipeline_generator/single-pipeline/README.md +++ b/kfp/pipeline_generator/single-pipeline/README.md @@ -1,6 +1,6 @@ ## Steps to generate a new pipeline - create a `pipeline_definitions.yaml` file for the required task (similar to the example [pipeline_definitions.yaml for the noop task](../../../transforms/universal/noop/kfp_ray/pipeline_definitions.yaml)). - execute `make -C ../../../transforms workflow-venv` from this directory -- execute `source ../../../transforms/venv/bin/activate` +- execute `source ../../../transforms/venv/bin/activate && pip install pre_commit` - execute `./run.sh --config_file --output_dir_file `. When `pipeline_definitions_file_path` is the path of the `pipeline_definitions.yaml` file that defines the pipeline and `destination directory` is a directory where new pipeline file will be generated. diff --git a/kfp/pipeline_generator/single-pipeline/templates/simple_pipeline.py b/kfp/pipeline_generator/single-pipeline/templates/simple_pipeline.py index e78ce3aa1..101330a71 100644 --- a/kfp/pipeline_generator/single-pipeline/templates/simple_pipeline.py +++ b/kfp/pipeline_generator/single-pipeline/templates/simple_pipeline.py @@ -37,6 +37,7 @@ def compute_exec_params_func( data_s3_config: str, data_max_files: int, data_num_samples: int, + data_checkpointing: bool, runtime_pipeline_id: str, runtime_job_id: str, runtime_code_location: dict, @@ -50,6 +51,7 @@ def compute_exec_params_func( "data_s3_config": data_s3_config, "data_max_files": data_max_files, "data_num_samples": data_num_samples, + "data_checkpointing": data_checkpointing, "runtime_num_workers": KFPUtils.default_compute_execution_params(str(worker_options), str(actor_options)), "runtime_worker_options": str(actor_options), "runtime_pipeline_id": runtime_pipeline_id, @@ -177,6 +179,7 @@ def {{ pipeline_name }}( data_s3_config=data_s3_config, data_max_files=data_max_files, data_num_samples=data_num_samples, + data_checkpointing=data_checkpointing, runtime_pipeline_id=runtime_pipeline_id, runtime_job_id=run_id, runtime_code_location=runtime_code_location, diff --git a/transforms/language/html2parquet/kfp_ray/html2parquet_wf.py b/transforms/language/html2parquet/kfp_ray/html2parquet_wf.py index 57f7c9e3c..4eb8b9de1 100644 --- a/transforms/language/html2parquet/kfp_ray/html2parquet_wf.py +++ b/transforms/language/html2parquet/kfp_ray/html2parquet_wf.py @@ -36,6 +36,7 @@ def compute_exec_params_func( data_s3_config: str, data_max_files: int, data_num_samples: int, + data_checkpointing: bool, runtime_pipeline_id: str, runtime_job_id: str, runtime_code_location: dict, @@ -48,6 +49,7 @@ def compute_exec_params_func( "data_s3_config": data_s3_config, "data_max_files": data_max_files, "data_num_samples": data_num_samples, + "data_checkpointing": data_checkpointing, "runtime_num_workers": KFPUtils.default_compute_execution_params(str(worker_options), str(actor_options)), "runtime_worker_options": str(actor_options), "runtime_pipeline_id": runtime_pipeline_id, @@ -177,6 +179,7 @@ def html2parquet( data_s3_config=data_s3_config, data_max_files=data_max_files, data_num_samples=data_num_samples, + data_checkpointing=data_checkpointing, runtime_pipeline_id=runtime_pipeline_id, runtime_job_id=run_id, runtime_code_location=runtime_code_location, diff --git a/transforms/language/html2parquet/kfp_ray/pipeline_definitions.yaml b/transforms/language/html2parquet/kfp_ray/pipeline_definitions.yaml index 538a24f71..f78c81219 100644 --- a/transforms/language/html2parquet/kfp_ray/pipeline_definitions.yaml +++ b/transforms/language/html2parquet/kfp_ray/pipeline_definitions.yaml @@ -12,6 +12,7 @@ pipeline_common_input_parameters_values: kfp_base_image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" transform_image: "quay.io/dataprep1/data-prep-kit/html2parquet-ray:latest" s3_access_secret: "s3-secret" + image_pull_secret: "" input_folder: "test/html2parquet/input/" output_folder: "test/html2parquet/output/"