From abd12bab045e225f13ad00608bde996e42d5091a Mon Sep 17 00:00:00 2001 From: bshifaw Date: Fri, 14 Apr 2023 10:45:19 -0400 Subject: [PATCH] Add list-outputs (#248) * added list-outputs command * Added option to get workflow level outputs or task level outputs * Added option to print json summary and text * add integration test_list_outputs.py draft * Added options to utility_test_functions.py run cromshell function * Added function to confirm results from cromwell outputs endpoint contain outputs else throws an error. * Added variable to hold workflow id in cromshellconfig.py * add check of outputs for detailed list-outputs option --------- Co-authored-by: bshifaw --- src/cromshell/__main__.py | 2 + src/cromshell/list_outputs/__init__.py | 0 src/cromshell/list_outputs/command.py | 220 ++++++++++++++++ .../utilities/command_setup_utils.py | 12 + src/cromshell/utilities/cromshellconfig.py | 1 + tests/integration/test_list_outputs.py | 99 +++++++ tests/integration/utility_test_functions.py | 13 +- .../succeeded_helloworld.metadata.json | 245 ++++++++++++++++++ .../cromwell_output_api_example.json | 11 + .../helloworld_dict_of_outputs.json | 8 + .../helloworld_task_level_outputs.txt | 4 + .../helloworld_workflow_level_outputs.txt | 8 + .../print_file_like_value_in_dict_example.txt | 3 + ...e_like_value_in_dict_no_indent_example.txt | 3 + ...succeeded_helloworld.outputs.metadata.json | 9 + tests/unit/test_list_outputs.py | 205 +++++++++++++++ tests/workflows/helloWorld.wdl | 2 + 17 files changed, 843 insertions(+), 2 deletions(-) create mode 100644 src/cromshell/list_outputs/__init__.py create mode 100644 src/cromshell/list_outputs/command.py create mode 100644 tests/integration/test_list_outputs.py create mode 100644 tests/metadata/succeeded_helloworld.metadata.json create mode 100644 tests/unit/mock_data/list_outputs/cromwell_output_api_example.json create mode 100644 tests/unit/mock_data/list_outputs/helloworld_dict_of_outputs.json create mode 100644 tests/unit/mock_data/list_outputs/helloworld_task_level_outputs.txt create mode 100644 tests/unit/mock_data/list_outputs/helloworld_workflow_level_outputs.txt create mode 100644 tests/unit/mock_data/list_outputs/print_file_like_value_in_dict_example.txt create mode 100644 tests/unit/mock_data/list_outputs/print_file_like_value_in_dict_no_indent_example.txt create mode 100644 tests/unit/mock_data/list_outputs/succeeded_helloworld.outputs.metadata.json create mode 100644 tests/unit/test_list_outputs.py diff --git a/src/cromshell/__main__.py b/src/cromshell/__main__.py index 7faef693..30f98584 100644 --- a/src/cromshell/__main__.py +++ b/src/cromshell/__main__.py @@ -11,6 +11,7 @@ from .cost import command as cost from .counts import command as counts from .list import command as list +from .list_outputs import command as list_outputs from .logs import command as logs from .metadata import command as metadata from .slim_metadata import command as slim_metadata @@ -168,6 +169,7 @@ def version(): main_entry.add_command(update_server.main) main_entry.add_command(timing.main) main_entry.add_command(list.main) +main_entry.add_command(list_outputs.main) if __name__ == "__main__": diff --git a/src/cromshell/list_outputs/__init__.py b/src/cromshell/list_outputs/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/cromshell/list_outputs/command.py b/src/cromshell/list_outputs/command.py new file mode 100644 index 00000000..06a23a4a --- /dev/null +++ b/src/cromshell/list_outputs/command.py @@ -0,0 +1,220 @@ +import logging + +import click +import requests + +import cromshell.utilities.http_utils as http_utils +import cromshell.utilities.io_utils as io_utils +from cromshell.metadata import command as metadata_command +from cromshell.utilities import command_setup_utils + +LOGGER = logging.getLogger(__name__) + + +@click.command(name="list-outputs") +@click.argument("workflow_ids", required=True, nargs=-1) +@click.option( + "-d", + "--detailed", + is_flag=True, + default=False, + help="Get the output for a workflow at the task level", +) +@click.option( + "-j", + "--json-summary", + is_flag=True, + default=False, + help="Print a json summary of outputs, including non-file types.", +) +@click.pass_obj +def main(config, workflow_ids, detailed, json_summary): + """List all output files produced by a workflow.""" + + LOGGER.info("list-outputs") + + return_code = 0 + + for workflow_id in workflow_ids: + command_setup_utils.resolve_workflow_id_and_server( + workflow_id=workflow_id, cromshell_config=config + ) + + if not detailed: + workflow_outputs = get_workflow_level_outputs(config).get("outputs") + + if json_summary: + io_utils.pretty_print_json(format_json=workflow_outputs) + else: + print_file_like_value_in_dict( + outputs_metadata=workflow_outputs, + indent=False, + ) + else: + task_outputs = get_task_level_outputs(config) + + if json_summary: + io_utils.pretty_print_json(format_json=task_outputs) + else: + print_task_level_outputs(task_outputs) + + return return_code + + +def get_workflow_level_outputs(config) -> dict: + """Get the workflow level outputs from the workflow outputs + + Args: + config (dict): The cromshell config object + """ + + requests_out = requests.get( + f"{config.cromwell_api_workflow_id}/outputs", + timeout=config.requests_connect_timeout, + verify=config.requests_verify_certs, + headers=http_utils.generate_headers(config), + ) + + if requests_out.ok: + check_for_empty_output(requests_out.json().get("outputs"), config.workflow_id) + return requests_out.json() + else: + http_utils.check_http_request_status_code( + short_error_message="Failed to retrieve outputs for " + f"workflow: {config.workflow_id}", + response=requests_out, + # Raising exception is set false to allow + # command to retrieve outputs of remaining workflows. + raise_exception=False, + ) + + +def get_task_level_outputs(config) -> dict: + """Get the task level outputs from the workflow metadata + + Args: + config (dict): The cromshell config object + """ + # Get metadata + formatted_metadata_parameter = metadata_command.format_metadata_params( + list_of_keys=config.METADATA_KEYS_TO_OMIT, + exclude_keys=True, + expand_subworkflows=True, + ) + + workflow_metadata = metadata_command.get_workflow_metadata( + meta_params=formatted_metadata_parameter, + api_workflow_id=config.cromwell_api_workflow_id, + timeout=config.requests_connect_timeout, + verify_certs=config.requests_verify_certs, + headers=http_utils.generate_headers(config), + ) + + return filter_outputs_from_workflow_metadata(workflow_metadata) + + +def filter_outputs_from_workflow_metadata(workflow_metadata: dict) -> dict: + """Get the outputs from the workflow metadata + + Args: + workflow_metadata (dict): The workflow metadata + """ + calls_metadata = workflow_metadata["calls"] + output_metadata = {} + extract_task_key = "outputs" + + for call, index_list in calls_metadata.items(): + if "subWorkflowMetadata" in calls_metadata[call][0]: + output_metadata[call] = [] + for scatter in calls_metadata[call]: + output_metadata[call].append( + filter_outputs_from_workflow_metadata( + scatter["subWorkflowMetadata"] + ) + ) + else: + output_metadata[call] = [] + for index in index_list: + output_metadata[call].append(index.get(extract_task_key)) + + check_for_empty_output(output_metadata, workflow_metadata["id"]) + + return output_metadata + + +def print_task_level_outputs(output_metadata: dict) -> None: + """Print the outputs from the workflow metadata + output_metadata: {call_name:[index1{output_name: outputvalue}, index2{...}, ...], call_name:[], ...} + + Args: + output_metadata (dict): The output metadata from the workflow + """ + for call, index_list in output_metadata.items(): + print(call) + for call_index in index_list: + if call_index is not None: + print_file_like_value_in_dict(outputs_metadata=call_index, indent=True) + + +def print_file_like_value_in_dict(outputs_metadata: dict, indent: bool) -> None: + """Print the file like values in the output metadata dictionary + + Args: + outputs_metadata (dict): The output metadata + indent (bool): Whether to indent the output + """ + + for output_name, output_value in outputs_metadata.items(): + if isinstance(output_value, str): + print_output_name_and_file(output_name, output_value, indent=indent) + elif isinstance(output_value, list): + for output_value_item in output_value: + print_output_name_and_file( + output_name, output_value_item, indent=indent + ) + + +def print_output_name_and_file( + output_name: str, output_value: str, indent: bool = True +) -> None: + """Print the task name and the file name + + Args: + output_name (str): The task output name + output_value (str): The task output value + indent (bool): Whether to indent the output""" + + i = "\t" if indent else "" + + if isinstance(output_value, str): + if is_path_or_url_like(output_value): + print(f"{i}{output_name}: {output_value}") + + +def is_path_or_url_like(in_string: str) -> bool: + """Check if the string is a path or url + + Args: + in_string (str): The string to check for path or url like-ness + """ + if ( + in_string.startswith("gs://") + or in_string.startswith("/") + or in_string.startswith("http://") + or in_string.startswith("https://") + ): + return True + else: + return False + + +def check_for_empty_output(workflow_outputs: dict, workflow_id: str) -> None: + """Check if the workflow outputs are empty + + Args: + cromwell_outputs (dict): Dictionary of workflow outputs + :param workflow_id: The workflow id + """ + if not workflow_outputs: + LOGGER.error(f"No outputs found for workflow: {workflow_id}") + raise Exception(f"No outputs found for workflow: {workflow_id}") diff --git a/src/cromshell/utilities/command_setup_utils.py b/src/cromshell/utilities/command_setup_utils.py index 48221674..12bc90b2 100644 --- a/src/cromshell/utilities/command_setup_utils.py +++ b/src/cromshell/utilities/command_setup_utils.py @@ -17,5 +17,17 @@ def resolve_workflow_id_and_server(workflow_id: str, cromshell_config) -> str: http_utils.set_and_check_cromwell_server( config=cromshell_config, workflow_id=resolved_workflow_id ) + set_workflow_id(workflow_id=resolved_workflow_id, cromshell_config=cromshell_config) return resolved_workflow_id + + +def set_workflow_id(workflow_id: str, cromshell_config) -> None: + """ + Sets the workflow id in the config object + + :param workflow_id: workflow UUID + :param cromshell_config: + :return: None + """ + cromshell_config.workflow_id = workflow_id diff --git a/src/cromshell/utilities/cromshellconfig.py b/src/cromshell/utilities/cromshellconfig.py index a5b37108..163f884f 100644 --- a/src/cromshell/utilities/cromshellconfig.py +++ b/src/cromshell/utilities/cromshellconfig.py @@ -27,6 +27,7 @@ ] CROMWELL_API_STRING = "/api/workflows/v1" WOMTOOL_API_STRING = "/api/womtool/v1" +workflow_id = None # Concatenate the cromwell url, api string, and workflow ID. Set in subcommand. cromwell_api_workflow_id = None # Defaults for variables will be set after functions have been defined diff --git a/tests/integration/test_list_outputs.py b/tests/integration/test_list_outputs.py new file mode 100644 index 00000000..27ab8351 --- /dev/null +++ b/tests/integration/test_list_outputs.py @@ -0,0 +1,99 @@ +from pathlib import Path + +import pytest + +from tests.integration import utility_test_functions + +workflows_path = Path(__file__).parents[1].joinpath("workflows/") + + +class TestListOutputs: + @pytest.mark.parametrize( + "wdl, json_file, options, output_template", + [ + ( + "tests/workflows/helloWorld.wdl", + "tests/workflows/helloWorld.json", + None, + [ + "HelloWorld.output_file: /cromwell-executions/HelloWorld//call-HelloWorldTask/execution/stdout", + "", + ], + ), + ( + "tests/workflows/helloWorld.wdl", + "tests/workflows/helloWorld.json", + ["-d"], + [ + "HelloWorld.HelloWorldTask", + "\toutput_file: /cromwell-executions/HelloWorld//call-HelloWorldTask/execution/stdout", + "", + ], + ), + ( + "tests/workflows/helloWorld.wdl", + "tests/workflows/helloWorld.json", + ["-j"], + [ + "{", + ' "HelloWorld.output_file": "/cromwell-executions/HelloWorld//call-HelloWorldTask/execution/stdout"', + "}", + "", + ], + ), + ( + "tests/workflows/helloWorld.wdl", + "tests/workflows/helloWorld.json", + ["-j", "-d"], + [ + "{", + ' "HelloWorld.HelloWorldTask": [', + " {", + ' "output_file": "/cromwell-executions/HelloWorld//call-HelloWorldTask/execution/stdout"', + " }", + " ]", + "}", + "", + ], + ), + ], + ) + def test_list_outputs( + self, + local_cromwell_url: str, + wdl: str, + json_file: str, + options: list, + output_template: list, + ansi_escape, + ): + # submit workflow + test_workflow_id = utility_test_functions.submit_workflow( + local_cromwell_url=local_cromwell_url, + wdl=wdl, + json_file=json_file, + exit_code=0, + ) + + utility_test_functions.wait_for_workflow_completion( + test_workflow_id=test_workflow_id + ) + + # run list-outputs + status_result = utility_test_functions.run_cromshell_command( + command=["list-outputs", test_workflow_id], + exit_code=0, + subcommand_options=options, + ) + + status_result_per_line = status_result.stdout.split("\n") + + workflow_outputs = [ + sub.replace("", test_workflow_id) for sub in output_template + ] + + print("Print workflow list-outputs results:") + for line in status_result_per_line: + print(line) + + assert status_result_per_line == workflow_outputs diff --git a/tests/integration/utility_test_functions.py b/tests/integration/utility_test_functions.py index 525b05f5..c628f0fe 100644 --- a/tests/integration/utility_test_functions.py +++ b/tests/integration/utility_test_functions.py @@ -10,10 +10,13 @@ from cromshell.utilities import cromshellconfig -def run_cromshell_command(command: list, exit_code: int): +def run_cromshell_command( + command: list, exit_code: int, subcommand_options: list = None +): """ Run cromshell alias using CliRunner and assert job is successful + :param subcommand_options: The options to pass to the subcommand :param command: The subcommand, options, and arguments in list form e.g. [ "alias", @@ -25,12 +28,18 @@ def run_cromshell_command(command: list, exit_code: int): :return: results from execution """ + if subcommand_options: + command_with_options = command[:1] + subcommand_options + command[1:] + else: + command_with_options = command + runner = CliRunner(mix_stderr=False) # The absolute path will be passed to the invoke command because # the test is being run in temp directory created by CliRunner. with runner.isolated_filesystem(): - result = runner.invoke(cromshell, command) + result = runner.invoke(cromshell, command_with_options) assert result.exit_code == exit_code, ( + f"\nCOMMAND:\n{command_with_options}" f"\nSTDOUT:\n{result.stdout}" f"\nSTDERR:\n{result.stderr}" f"\nExceptions:\n{result.exception}" diff --git a/tests/metadata/succeeded_helloworld.metadata.json b/tests/metadata/succeeded_helloworld.metadata.json new file mode 100644 index 00000000..278d049c --- /dev/null +++ b/tests/metadata/succeeded_helloworld.metadata.json @@ -0,0 +1,245 @@ +{ + "actualWorkflowLanguage": "WDL", + "actualWorkflowLanguageVersion": "draft-2", + "calls": { + "HelloWorld.HelloWorldTask": [ + { + "attempt": 1, + "backend": "PAPIv2", + "backendLabels": { + "cromwell-workflow-id": "cromwell-9ee4aa2e-7ac5-4c61-88b2-88a4d10f168b", + "wdl-task-name": "helloworldtask" + }, + "backendLogs": { + "log": "gs://broad-dsp-lrma-cromwell-central/HelloWorld/9ee4aa2e-7ac5-4c61-88b2-88a4d10f168b/call-HelloWorldTask/HelloWorldTask.log" + }, + "backendStatus": "Success", + "callCaching": { + "allowResultReuse": true, + "effectiveCallCachingMode": "ReadAndWriteCache", + "hashes": { + "backend name": "85D6F63859525E464173387636E20324", + "command template": "265419AF4D334CAD1CC95E22CABFC8E5", + "input": { + "Boolean use_ssd": "68934A3E9455FA72420237EB05902327", + "Int command_mem": "051928341BE67DCBA03F0E04104D9047", + "Int default_boot_disk_size_gb": "9BF31C7FF062936A96D3C8BD1F8F2FF3", + "Int default_disk_space_gb": "F899139DF5E1059396431415E770C6DD", + "Int default_ram_mb": "BE767243CA8F574C740FB4C26CC6DCEB", + "Int machine_mem": "BE767243CA8F574C740FB4C26CC6DCEB", + "String docker": "1013E15CCD5A51F2B5A9F9DB9A13A756" + }, + "input count": "8F14E45FCEEA167A5A36DEDD4BEA2543", + "output count": "CFCD208495D565EF66E7DFF9F98764DA", + "runtime attribute": { + "continueOnReturnCode": "CFCD208495D565EF66E7DFF9F98764DA", + "docker": "D1FA3055EB3A898E0F62F288A334EB0A", + "failOnStderr": "68934A3E9455FA72420237EB05902327" + } + }, + "hit": false, + "result": "Cache Miss" + }, + "callRoot": "gs://broad-dsp-lrma-cromwell-central/HelloWorld/9ee4aa2e-7ac5-4c61-88b2-88a4d10f168b/call-HelloWorldTask", + "commandLine": " set -e\necho 'Hello World!'", + "compressedDockerSize": 4991278, + "dockerImageUsed": "frolvlad/alpine-bash@sha256:edac5ae03440fe8dcf3ff3410373c8dce56cca2915c74b5bf39afecff1693b28", + "end": "2022-10-21T19:12:53.198Z", + "executionEvents": [ + { + "description": "Worker \"google-pipelines-worker-6debdda87d7e05f880737046b3f87c2a\" assigned in \"us-east1-b\" on a \"custom-1-3072\" machine", + "endTime": "2022-10-21T19:10:54.271Z", + "startTime": "2022-10-21T19:10:15.348Z" + }, + { + "description": "Worker released", + "endTime": "2022-10-21T19:12:08.760Z", + "startTime": "2022-10-21T19:12:08.760Z" + }, + { + "description": "UpdatingJobStore", + "endTime": "2022-10-21T19:12:53.198Z", + "startTime": "2022-10-21T19:12:52.199Z" + }, + { + "description": "UserAction", + "endTime": "2022-10-21T19:11:54.971Z", + "startTime": "2022-10-21T19:11:50.783Z" + }, + { + "description": "CallCacheReading", + "endTime": "2022-10-21T19:09:48.639Z", + "startTime": "2022-10-21T19:09:48.634Z" + }, + { + "description": "Complete in GCE / Cromwell Poll Interval", + "endTime": "2022-10-21T19:12:52.016Z", + "startTime": "2022-10-21T19:12:08.760Z" + }, + { + "description": "Pending", + "endTime": "2022-10-21T19:09:48.049Z", + "startTime": "2022-10-21T19:09:48.048Z" + }, + { + "description": "WaitingForValueStore", + "endTime": "2022-10-21T19:09:48.266Z", + "startTime": "2022-10-21T19:09:48.266Z" + }, + { + "description": "RunningJob", + "endTime": "2022-10-21T19:10:03.560Z", + "startTime": "2022-10-21T19:09:48.639Z" + }, + { + "description": "ContainerSetup", + "endTime": "2022-10-21T19:11:30.236Z", + "startTime": "2022-10-21T19:11:26.183Z" + }, + { + "description": "Localization", + "endTime": "2022-10-21T19:11:50.783Z", + "startTime": "2022-10-21T19:11:31.132Z" + }, + { + "description": "Pulling \"frolvlad/alpine-bash@sha256:edac5ae03440fe8dcf3ff3410373c8dce56cca2915c74b5bf39afecff1693b28\"", + "endTime": "2022-10-21T19:11:26.183Z", + "startTime": "2022-10-21T19:11:23.952Z" + }, + { + "description": "waiting for quota", + "endTime": "2022-10-21T19:10:15.348Z", + "startTime": "2022-10-21T19:10:03.560Z" + }, + { + "description": "Pulling \"gcr.io/google.com/cloudsdktool/cloud-sdk:276.0.0-slim\"", + "endTime": "2022-10-21T19:11:23.952Z", + "startTime": "2022-10-21T19:10:54.271Z" + }, + { + "description": "PreparingJob", + "endTime": "2022-10-21T19:09:48.634Z", + "startTime": "2022-10-21T19:09:48.266Z" + }, + { + "description": "Background", + "endTime": "2022-10-21T19:11:30.760Z", + "startTime": "2022-10-21T19:11:30.427Z" + }, + { + "description": "Delocalization", + "endTime": "2022-10-21T19:12:08.760Z", + "startTime": "2022-10-21T19:11:54.971Z" + }, + { + "description": "RequestingExecutionToken", + "endTime": "2022-10-21T19:09:48.266Z", + "startTime": "2022-10-21T19:09:48.049Z" + }, + { + "description": "UpdatingCallCache", + "endTime": "2022-10-21T19:12:52.199Z", + "startTime": "2022-10-21T19:12:52.016Z" + } + ], + "executionStatus": "Done", + "inputs": { + "boot_disk_size_gb": null, + "command_mem": 2048, + "cpu": null, + "default_boot_disk_size_gb": 15, + "default_disk_space_gb": 100, + "default_ram_mb": 3072, + "disk_space_gb": null, + "docker": "frolvlad/alpine-bash", + "machine_mem": 3072, + "mem": null, + "preemptible_attempts": null, + "use_ssd": false + }, + "jes": { + "endpointUrl": "https://lifesciences.googleapis.com/", + "executionBucket": "gs://broad-dsp-lrma-cromwell-central", + "googleProject": "broad-dsp-lrma", + "instanceName": "google-pipelines-worker-6debdda87d7e05f880737046b3f87c2a", + "machineType": "custom-1-3072", + "zone": "us-east1-b" + }, + "jobId": "projects/602335226495/locations/us-central1/operations/7523289379686914908", + "labels": { + "cromwell-workflow-id": "cromwell-9ee4aa2e-7ac5-4c61-88b2-88a4d10f168b", + "wdl-task-name": "HelloWorldTask" + }, + "outputs": { + "bam_subset_file_index": "gs://broad-dsp-lrma-cromwell-central/HelloWorld/9ee4aa2e-7ac5-4c61-88b2-88a4d10f168b/call-HelloWorldTask/shard-1/m64020_190210_035026.subreads.ccs.uncorrected.aligned.merged.chr2.bai", + "bam_subset_file": "gs://broad-dsp-lrma-cromwell-central/HelloWorld/9ee4aa2e-7ac5-4c61-88b2-88a4d10f168b/call-HelloWorldTask/shard-1/m64020_190210_035026.subreads.ccs.uncorrected.aligned.merged.chr2.bam", + "timing_info": "gs://broad-dsp-lrma-cromwell-central/HelloWorld/9ee4aa2e-7ac5-4c61-88b2-88a4d10f168b/call-HelloWorldTask/shard-1/m64020_190210_035026.subreads.ccs.uncorrected.aligned.merged.timingInformation.txt" + }, + "preemptible": false, + "returnCode": 0, + "runtimeAttributes": { + "bootDiskSizeGb": "15", + "continueOnReturnCode": "0", + "cpu": "1", + "cpuMin": "1", + "disks": "local-disk 100 HDD", + "docker": "frolvlad/alpine-bash", + "failOnStderr": "false", + "maxRetries": "0", + "memory": "3 GB", + "memoryMin": "2 GB", + "noAddress": "false", + "preemptible": "0", + "zones": "us-east1-b,us-east1-c,us-east1-d" + }, + "shardIndex": -1, + "start": "2022-10-21T19:09:48.048Z", + "stderr": "gs://broad-dsp-lrma-cromwell-central/HelloWorld/9ee4aa2e-7ac5-4c61-88b2-88a4d10f168b/call-HelloWorldTask/stderr", + "stdout": "gs://broad-dsp-lrma-cromwell-central/HelloWorld/9ee4aa2e-7ac5-4c61-88b2-88a4d10f168b/call-HelloWorldTask/stdout" + } + ] + }, + "end": "2022-10-21T19:12:53.940Z", + "id": "9ee4aa2e-7ac5-4c61-88b2-88a4d10f168b", + "inputs": { + "HelloWorld.HelloWorldTask.default_boot_disk_size_gb": 15, + "HelloWorld.HelloWorldTask.default_disk_space_gb": 100, + "HelloWorld.HelloWorldTask.default_ram_mb": 3072, + "HelloWorld.HelloWorldTask.use_ssd": false, + "HelloWorld.boot_disk_size_gb": null, + "HelloWorld.cpu": null, + "HelloWorld.disk_space_gb": null, + "HelloWorld.docker": "frolvlad/alpine-bash", + "HelloWorld.mem": null, + "HelloWorld.preemptible_attempts": null + }, + "labels": { + "cromwell-workflow-id": "cromwell-9ee4aa2e-7ac5-4c61-88b2-88a4d10f168b" + }, + "outputs": { + "bam_subset_file_index": "gs://broad-dsp-lrma-cromwell-central/HelloWorld/9ee4aa2e-7ac5-4c61-88b2-88a4d10f168b/call-HelloWorldTask/shard-1/m64020_190210_035026.subreads.ccs.uncorrected.aligned.merged.chr2.bai", + "bam_subset_file": "gs://broad-dsp-lrma-cromwell-central/HelloWorld/9ee4aa2e-7ac5-4c61-88b2-88a4d10f168b/call-HelloWorldTask/shard-1/m64020_190210_035026.subreads.ccs.uncorrected.aligned.merged.chr2.bam", + "timing_info": "gs://broad-dsp-lrma-cromwell-central/HelloWorld/9ee4aa2e-7ac5-4c61-88b2-88a4d10f168b/call-HelloWorldTask/shard-1/m64020_190210_035026.subreads.ccs.uncorrected.aligned.merged.timingInformation.txt" + + }, + "start": "2022-10-21T19:09:45.889Z", + "status": "Succeeded", + "submission": "2022-10-21T19:09:27.744Z", + "workflowName": "HelloWorld", + "workflowProcessingEvents": [ + { + "cromwellId": "cromid-dea8727", + "cromwellVersion": "63", + "description": "Finished", + "timestamp": "2022-10-21T19:12:53.940Z" + }, + { + "cromwellId": "cromid-dea8727", + "cromwellVersion": "63", + "description": "PickedUp", + "timestamp": "2022-10-21T19:09:45.888Z" + } + ], + "workflowRoot": "gs://broad-dsp-lrma-cromwell-central/HelloWorld/9ee4aa2e-7ac5-4c61-88b2-88a4d10f168b/" +} + diff --git a/tests/unit/mock_data/list_outputs/cromwell_output_api_example.json b/tests/unit/mock_data/list_outputs/cromwell_output_api_example.json new file mode 100644 index 00000000..96425b83 --- /dev/null +++ b/tests/unit/mock_data/list_outputs/cromwell_output_api_example.json @@ -0,0 +1,11 @@ +{ + "id": "9ee4aa2e-7ac5-4c61-88b2-88a4d10f168b", + "output": { + "HelloWorld.analysis_ready_bam_size": 0, + "HelloWorld.analysis_ready_bam_size_in_gb": 132, + "HelloWorld.analysis_ready_bam_name": "NA12878.hg38.bam", + "HelloWorld.HelloWorldTask.bam_subset_file_index": "gs://broad-dsp-lrma-cromwell-central/HelloWorld/9ee4aa2e-7ac5-4c61-88b2-88a4d10f168b/call-HelloWorldTask/shard-1/m64020_190210_035026.subreads.ccs.uncorrected.aligned.merged.chr2.bai", + "HelloWorld.HelloWorldTask.bam_subset_file": "gs://broad-dsp-lrma-cromwell-central/HelloWorld/9ee4aa2e-7ac5-4c61-88b2-88a4d10f168b/call-HelloWorldTask/shard-1/m64020_190210_035026.subreads.ccs.uncorrected.aligned.merged.chr2.bam", + "HelloWorld.HelloWorldTask.timing_info": "gs://broad-dsp-lrma-cromwell-central/HelloWorld/9ee4aa2e-7ac5-4c61-88b2-88a4d10f168b/call-HelloWorldTask/shard-1/m64020_190210_035026.subreads.ccs.uncorrected.aligned.merged.timingInformation.txt" + } +} diff --git a/tests/unit/mock_data/list_outputs/helloworld_dict_of_outputs.json b/tests/unit/mock_data/list_outputs/helloworld_dict_of_outputs.json new file mode 100644 index 00000000..25843e38 --- /dev/null +++ b/tests/unit/mock_data/list_outputs/helloworld_dict_of_outputs.json @@ -0,0 +1,8 @@ +{ + "HelloWorld.analysis_ready_bam_size": 0, + "HelloWorld.analysis_ready_bam_size_in_gb": 132, + "HelloWorld.analysis_ready_bam_name": "NA12878.hg38.bam", + "HelloWorld.HelloWorldTask.bam_subset_file_index": "gs://broad-dsp-lrma-cromwell-central/HelloWorld/9ee4aa2e-7ac5-4c61-88b2-88a4d10f168b/call-HelloWorldTask/shard-1/m64020_190210_035026.subreads.ccs.uncorrected.aligned.merged.chr2.bai", + "HelloWorld.HelloWorldTask.bam_subset_file": "gs://broad-dsp-lrma-cromwell-central/HelloWorld/9ee4aa2e-7ac5-4c61-88b2-88a4d10f168b/call-HelloWorldTask/shard-1/m64020_190210_035026.subreads.ccs.uncorrected.aligned.merged.chr2.bam", + "HelloWorld.HelloWorldTask.timing_info": "gs://broad-dsp-lrma-cromwell-central/HelloWorld/9ee4aa2e-7ac5-4c61-88b2-88a4d10f168b/call-HelloWorldTask/shard-1/m64020_190210_035026.subreads.ccs.uncorrected.aligned.merged.timingInformation.txt" +} diff --git a/tests/unit/mock_data/list_outputs/helloworld_task_level_outputs.txt b/tests/unit/mock_data/list_outputs/helloworld_task_level_outputs.txt new file mode 100644 index 00000000..0b3a4568 --- /dev/null +++ b/tests/unit/mock_data/list_outputs/helloworld_task_level_outputs.txt @@ -0,0 +1,4 @@ +HelloWorld.HelloWorldTask + bam_subset_file_index: gs://broad-dsp-lrma-cromwell-central/HelloWorld/9ee4aa2e-7ac5-4c61-88b2-88a4d10f168b/call-HelloWorldTask/shard-1/m64020_190210_035026.subreads.ccs.uncorrected.aligned.merged.chr2.bai + bam_subset_file: gs://broad-dsp-lrma-cromwell-central/HelloWorld/9ee4aa2e-7ac5-4c61-88b2-88a4d10f168b/call-HelloWorldTask/shard-1/m64020_190210_035026.subreads.ccs.uncorrected.aligned.merged.chr2.bam + timing_info: gs://broad-dsp-lrma-cromwell-central/HelloWorld/9ee4aa2e-7ac5-4c61-88b2-88a4d10f168b/call-HelloWorldTask/shard-1/m64020_190210_035026.subreads.ccs.uncorrected.aligned.merged.timingInformation.txt diff --git a/tests/unit/mock_data/list_outputs/helloworld_workflow_level_outputs.txt b/tests/unit/mock_data/list_outputs/helloworld_workflow_level_outputs.txt new file mode 100644 index 00000000..25843e38 --- /dev/null +++ b/tests/unit/mock_data/list_outputs/helloworld_workflow_level_outputs.txt @@ -0,0 +1,8 @@ +{ + "HelloWorld.analysis_ready_bam_size": 0, + "HelloWorld.analysis_ready_bam_size_in_gb": 132, + "HelloWorld.analysis_ready_bam_name": "NA12878.hg38.bam", + "HelloWorld.HelloWorldTask.bam_subset_file_index": "gs://broad-dsp-lrma-cromwell-central/HelloWorld/9ee4aa2e-7ac5-4c61-88b2-88a4d10f168b/call-HelloWorldTask/shard-1/m64020_190210_035026.subreads.ccs.uncorrected.aligned.merged.chr2.bai", + "HelloWorld.HelloWorldTask.bam_subset_file": "gs://broad-dsp-lrma-cromwell-central/HelloWorld/9ee4aa2e-7ac5-4c61-88b2-88a4d10f168b/call-HelloWorldTask/shard-1/m64020_190210_035026.subreads.ccs.uncorrected.aligned.merged.chr2.bam", + "HelloWorld.HelloWorldTask.timing_info": "gs://broad-dsp-lrma-cromwell-central/HelloWorld/9ee4aa2e-7ac5-4c61-88b2-88a4d10f168b/call-HelloWorldTask/shard-1/m64020_190210_035026.subreads.ccs.uncorrected.aligned.merged.timingInformation.txt" +} diff --git a/tests/unit/mock_data/list_outputs/print_file_like_value_in_dict_example.txt b/tests/unit/mock_data/list_outputs/print_file_like_value_in_dict_example.txt new file mode 100644 index 00000000..fddd77d2 --- /dev/null +++ b/tests/unit/mock_data/list_outputs/print_file_like_value_in_dict_example.txt @@ -0,0 +1,3 @@ + HelloWorld.HelloWorldTask.bam_subset_file_index: gs://broad-dsp-lrma-cromwell-central/HelloWorld/9ee4aa2e-7ac5-4c61-88b2-88a4d10f168b/call-HelloWorldTask/shard-1/m64020_190210_035026.subreads.ccs.uncorrected.aligned.merged.chr2.bai + HelloWorld.HelloWorldTask.bam_subset_file: gs://broad-dsp-lrma-cromwell-central/HelloWorld/9ee4aa2e-7ac5-4c61-88b2-88a4d10f168b/call-HelloWorldTask/shard-1/m64020_190210_035026.subreads.ccs.uncorrected.aligned.merged.chr2.bam + HelloWorld.HelloWorldTask.timing_info: gs://broad-dsp-lrma-cromwell-central/HelloWorld/9ee4aa2e-7ac5-4c61-88b2-88a4d10f168b/call-HelloWorldTask/shard-1/m64020_190210_035026.subreads.ccs.uncorrected.aligned.merged.timingInformation.txt diff --git a/tests/unit/mock_data/list_outputs/print_file_like_value_in_dict_no_indent_example.txt b/tests/unit/mock_data/list_outputs/print_file_like_value_in_dict_no_indent_example.txt new file mode 100644 index 00000000..86247ddf --- /dev/null +++ b/tests/unit/mock_data/list_outputs/print_file_like_value_in_dict_no_indent_example.txt @@ -0,0 +1,3 @@ +HelloWorld.HelloWorldTask.bam_subset_file_index: gs://broad-dsp-lrma-cromwell-central/HelloWorld/9ee4aa2e-7ac5-4c61-88b2-88a4d10f168b/call-HelloWorldTask/shard-1/m64020_190210_035026.subreads.ccs.uncorrected.aligned.merged.chr2.bai +HelloWorld.HelloWorldTask.bam_subset_file: gs://broad-dsp-lrma-cromwell-central/HelloWorld/9ee4aa2e-7ac5-4c61-88b2-88a4d10f168b/call-HelloWorldTask/shard-1/m64020_190210_035026.subreads.ccs.uncorrected.aligned.merged.chr2.bam +HelloWorld.HelloWorldTask.timing_info: gs://broad-dsp-lrma-cromwell-central/HelloWorld/9ee4aa2e-7ac5-4c61-88b2-88a4d10f168b/call-HelloWorldTask/shard-1/m64020_190210_035026.subreads.ccs.uncorrected.aligned.merged.timingInformation.txt diff --git a/tests/unit/mock_data/list_outputs/succeeded_helloworld.outputs.metadata.json b/tests/unit/mock_data/list_outputs/succeeded_helloworld.outputs.metadata.json new file mode 100644 index 00000000..0bb3b079 --- /dev/null +++ b/tests/unit/mock_data/list_outputs/succeeded_helloworld.outputs.metadata.json @@ -0,0 +1,9 @@ +{ + "HelloWorld.HelloWorldTask": [ + { + "bam_subset_file_index": "gs://broad-dsp-lrma-cromwell-central/HelloWorld/9ee4aa2e-7ac5-4c61-88b2-88a4d10f168b/call-HelloWorldTask/shard-1/m64020_190210_035026.subreads.ccs.uncorrected.aligned.merged.chr2.bai", + "bam_subset_file": "gs://broad-dsp-lrma-cromwell-central/HelloWorld/9ee4aa2e-7ac5-4c61-88b2-88a4d10f168b/call-HelloWorldTask/shard-1/m64020_190210_035026.subreads.ccs.uncorrected.aligned.merged.chr2.bam", + "timing_info": "gs://broad-dsp-lrma-cromwell-central/HelloWorld/9ee4aa2e-7ac5-4c61-88b2-88a4d10f168b/call-HelloWorldTask/shard-1/m64020_190210_035026.subreads.ccs.uncorrected.aligned.merged.timingInformation.txt" + } + ] +} diff --git a/tests/unit/test_list_outputs.py b/tests/unit/test_list_outputs.py new file mode 100644 index 00000000..e84856d3 --- /dev/null +++ b/tests/unit/test_list_outputs.py @@ -0,0 +1,205 @@ +import json + +import pytest + +from cromshell.list_outputs import command as list_outputs_command + + +class TestListOutputs: + """Test the execution list-outputs command functions""" + + @pytest.mark.parametrize( + "workflow_metadata_file, outputs_metadata_file_path", + [ + [ + "succeeded_helloworld.metadata.json", + "list_outputs/succeeded_helloworld.outputs.metadata.json", + ], + ], + ) + def test_filter_outputs_from_workflow_metadata( + self, + mock_data_path, + tests_metadata_path, + workflow_metadata_file, + outputs_metadata_file_path, + ): + with open(tests_metadata_path.joinpath(workflow_metadata_file), "r") as f: + workflow_metadata = json.load(f) + + with open(mock_data_path.joinpath(outputs_metadata_file_path), "r") as f: + outputs_metadata = json.load(f) + + assert ( + list_outputs_command.filter_outputs_from_workflow_metadata( + workflow_metadata + ) + == outputs_metadata + ) + + @pytest.mark.parametrize( + "outputs_metadata_file_path, expected_task_level_outputs_file_path", + [ + [ + "list_outputs/succeeded_helloworld.outputs.metadata.json", + "list_outputs/helloworld_task_level_outputs.txt", + ], + ], + ) + def test_print_task_level_outputs( + self, + outputs_metadata_file_path: dict, + mock_data_path, + expected_task_level_outputs_file_path, + capsys, + ) -> None: + """Test the print_task_level_outputs function""" + + with open(mock_data_path.joinpath(outputs_metadata_file_path), "r") as f: + outputs_metadata = json.load(f) + with open( + mock_data_path.joinpath(expected_task_level_outputs_file_path), "r" + ) as f: + expected_task_level_outputs = f.read() + + list_outputs_command.print_task_level_outputs(outputs_metadata) + + captured = capsys.readouterr() + assert captured.out == expected_task_level_outputs + + @pytest.mark.parametrize( + "outputs_api_example_file, expected_workflow_level_outputs_file_path, indent", + [ + [ + "list_outputs/helloworld_dict_of_outputs.json", + "list_outputs/print_file_like_value_in_dict_example.txt", + True, + ], + [ + "list_outputs/helloworld_dict_of_outputs.json", + "list_outputs/print_file_like_value_in_dict_no_indent_example.txt", + False, + ], + ], + ) + def test_print_output_metadata( + self, + outputs_api_example_file, + tests_metadata_path, + mock_data_path, + expected_workflow_level_outputs_file_path, + indent, + capsys, + ) -> None: + with open(mock_data_path.joinpath(outputs_api_example_file), "r") as f: + outputs_metadata = json.load(f) + with open( + mock_data_path.joinpath(expected_workflow_level_outputs_file_path), "r" + ) as f: + expected_workflow_level_outputs = f.read() + + list_outputs_command.print_file_like_value_in_dict( + outputs_metadata, indent=indent + ) + + captured = capsys.readouterr() + assert captured.out == expected_workflow_level_outputs + + @pytest.mark.parametrize( + "output_name, output_value, indent, expected_function_print", + [ + [ + "task_name", + "/taskoutputfile", + True, + "\ttask_name: /taskoutputfile\n", + ], + [ + "task_name", + "taskoutputfile", + True, + "", + ], + [ + "task_name", + "gs://taskoutputfile", + True, + "\ttask_name: gs://taskoutputfile\n", + ], + ], + ) + def test_print_output_name_and_file( + self, + output_name, + output_value, + indent, + expected_function_print, + capsys, + ) -> None: + list_outputs_command.print_output_name_and_file( + output_name=output_name, + output_value=output_value, + indent=indent, + ) + + captured = capsys.readouterr() + assert captured.out == expected_function_print + + @pytest.mark.parametrize( + "value, expected_bool", + [ + [ + "task_value", + False, + ], + [ + "/task_value", + True, + ], + [ + "gs://task_value", + True, + ], + [ + "task_value/", + False, + ], + [ + "http://task_value", + True, + ], + ], + ) + def test_is_path_or_url_like(self, value, expected_bool): + assert list_outputs_command.is_path_or_url_like(value) == expected_bool + + @pytest.mark.parametrize( + "example_output_results, workflow_id", + [ + [ + {}, + "04b65be4-896f-439c-8a01-5e4dc6c116dd'", + ], + [ + {"outputs": {"one": 2}, "id": "04b65be4-896f-439c-8a01-5e4dc6c116dd"}, + "04b65be4-896f-439c-8a01-5e4dc6c116dd'", + ], + ], + ) + def test_check_for_empty_output( + self, example_output_results: dict, workflow_id: str + ): + """Test the check_for_empty_output function""" + + if example_output_results == {}: + with pytest.raises(Exception): + list_outputs_command.check_for_empty_output( + example_output_results, workflow_id + ) + else: + assert ( + list_outputs_command.check_for_empty_output( + example_output_results, workflow_id + ) + is None + ) diff --git a/tests/workflows/helloWorld.wdl b/tests/workflows/helloWorld.wdl index 6ea59c64..a0eb63f1 100644 --- a/tests/workflows/helloWorld.wdl +++ b/tests/workflows/helloWorld.wdl @@ -32,6 +32,7 @@ workflow HelloWorld { } output { + File output_file = HelloWorldTask.output_file } } @@ -89,6 +90,7 @@ task HelloWorldTask { # ------------------------------------------------ # Outputs: output { + File output_file = stdout() } }