diff --git a/transforms/universal/hap/python/test-data/input/expected/metadata.json b/transforms/universal/hap/python/test-data/input/expected/metadata.json new file mode 100644 index 000000000..1e5f710db --- /dev/null +++ b/transforms/universal/hap/python/test-data/input/expected/metadata.json @@ -0,0 +1,49 @@ +{ + "pipeline": "pipeline_id", + "job details": { + "job category": "preprocessing", + "job name": "hap", + "job type": "pure python", + "job id": "job_id", + "start_time": "2024-09-26 20:56:49", + "end_time": "2024-09-26 20:56:56", + "status": "success" + }, + "code": { + "github": "github", + "commit_hash": "12345", + "path": "path" + }, + "job_input_params": { + "model_name_or_path": "ibm-granite/granite-guardian-hap-38m", + "annotation_column": "hap_score", + "doc_text_column": "doc_text", + "inference_engine": "CPU", + "max_length": 512, + "batch_size": 128, + "checkpointing": false, + "max_files": -1, + "random_samples": -1, + "files_to_use": [ + ".parquet" + ], + "num_processors": 0 + }, + "job_output_stats": { + "source_files": 1, + "source_size": 109303, + "result_files": 1, + "result_size": 79822, + "processing_time": 6.501, + "source_doc_count": 50, + "result_doc_count": 50 + }, + "source": { + "name": "/Users/ian/Desktop/data-prep-kit/transforms/universal/hap/python/test-data/input", + "type": "path" + }, + "target": { + "name": "/Users/ian/Desktop/data-prep-kit/transforms/universal/hap/python/output", + "type": "path" + } +} \ No newline at end of file diff --git a/transforms/universal/hap/python/test-data/input/expected/test1.parquet b/transforms/universal/hap/python/test-data/input/expected/test1.parquet new file mode 100644 index 000000000..8ac5be443 Binary files /dev/null and b/transforms/universal/hap/python/test-data/input/expected/test1.parquet differ