Add list-outputs (#248)

* added list-outputs command * Added option to get workflow level outputs or task level outputs * Added option to print json summary and text * add integration test_list_outputs.py draft * Added options to utility_test_functions.py run cromshell function * Added function to confirm results from cromwell outputs endpoint contain outputs else throws an error. * Added variable to hold workflow id in cromshellconfig.py * add check of outputs for detailed list-outputs option --------- Co-authored-by: bshifaw <[email protected]>
broadinstitute · Apr 14, 2023 · abd12ba · abd12ba
1 parent 3a323e4
commit abd12ba
Show file tree

Hide file tree

Showing 17 changed files with 843 additions and 2 deletions.
diff --git a/src/cromshell/__main__.py b/src/cromshell/__main__.py
@@ -11,6 +11,7 @@
 from .cost import command as cost
 from .counts import command as counts
 from .list import command as list
+from .list_outputs import command as list_outputs
 from .logs import command as logs
 from .metadata import command as metadata
 from .slim_metadata import command as slim_metadata
@@ -168,6 +169,7 @@ def version():
 main_entry.add_command(update_server.main)
 main_entry.add_command(timing.main)
 main_entry.add_command(list.main)
+main_entry.add_command(list_outputs.main)
 
 
 if __name__ == "__main__":

diff --git a/src/cromshell/list_outputs/__init__.py b/src/cromshell/list_outputs/__init__.py
diff --git a/src/cromshell/list_outputs/command.py b/src/cromshell/list_outputs/command.py
@@ -0,0 +1,220 @@
+import logging
+
+import click
+import requests
+
+import cromshell.utilities.http_utils as http_utils
+import cromshell.utilities.io_utils as io_utils
+from cromshell.metadata import command as metadata_command
+from cromshell.utilities import command_setup_utils
+
+LOGGER = logging.getLogger(__name__)
+
+
+@click.command(name="list-outputs")
+@click.argument("workflow_ids", required=True, nargs=-1)
+@click.option(
+    "-d",
+    "--detailed",
+    is_flag=True,
+    default=False,
+    help="Get the output for a workflow at the task level",
+)
+@click.option(
+    "-j",
+    "--json-summary",
+    is_flag=True,
+    default=False,
+    help="Print a json summary of outputs, including non-file types.",
+)
+@click.pass_obj
+def main(config, workflow_ids, detailed, json_summary):
+    """List all output files produced by a workflow."""
+
+    LOGGER.info("list-outputs")
+
+    return_code = 0
+
+    for workflow_id in workflow_ids:
+        command_setup_utils.resolve_workflow_id_and_server(
+            workflow_id=workflow_id, cromshell_config=config
+        )
+
+        if not detailed:
+            workflow_outputs = get_workflow_level_outputs(config).get("outputs")
+
+            if json_summary:
+                io_utils.pretty_print_json(format_json=workflow_outputs)
+            else:
+                print_file_like_value_in_dict(
+                    outputs_metadata=workflow_outputs,
+                    indent=False,
+                )
+        else:
+            task_outputs = get_task_level_outputs(config)
+
+            if json_summary:
+                io_utils.pretty_print_json(format_json=task_outputs)
+            else:
+                print_task_level_outputs(task_outputs)
+
+    return return_code
+
+
+def get_workflow_level_outputs(config) -> dict:
+    """Get the workflow level outputs from the workflow outputs
+
+    Args:
+        config (dict): The cromshell config object
+    """
+
+    requests_out = requests.get(
+        f"{config.cromwell_api_workflow_id}/outputs",
+        timeout=config.requests_connect_timeout,
+        verify=config.requests_verify_certs,
+        headers=http_utils.generate_headers(config),
+    )
+
+    if requests_out.ok:
+        check_for_empty_output(requests_out.json().get("outputs"), config.workflow_id)
+        return requests_out.json()
+    else:
+        http_utils.check_http_request_status_code(
+            short_error_message="Failed to retrieve outputs for "
+            f"workflow: {config.workflow_id}",
+            response=requests_out,
+            # Raising exception is set false to allow
+            # command to retrieve outputs of remaining workflows.
+            raise_exception=False,
+        )
+
+
+def get_task_level_outputs(config) -> dict:
+    """Get the task level outputs from the workflow metadata
+
+    Args:
+        config (dict): The cromshell config object
+    """
+    # Get metadata
+    formatted_metadata_parameter = metadata_command.format_metadata_params(
+        list_of_keys=config.METADATA_KEYS_TO_OMIT,
+        exclude_keys=True,
+        expand_subworkflows=True,
+    )
+
+    workflow_metadata = metadata_command.get_workflow_metadata(
+        meta_params=formatted_metadata_parameter,
+        api_workflow_id=config.cromwell_api_workflow_id,
+        timeout=config.requests_connect_timeout,
+        verify_certs=config.requests_verify_certs,
+        headers=http_utils.generate_headers(config),
+    )
+
+    return filter_outputs_from_workflow_metadata(workflow_metadata)
+
+
+def filter_outputs_from_workflow_metadata(workflow_metadata: dict) -> dict:
+    """Get the outputs from the workflow metadata
+
+    Args:
+        workflow_metadata (dict): The workflow metadata
+    """
+    calls_metadata = workflow_metadata["calls"]
+    output_metadata = {}
+    extract_task_key = "outputs"
+
+    for call, index_list in calls_metadata.items():
+        if "subWorkflowMetadata" in calls_metadata[call][0]:
+            output_metadata[call] = []
+            for scatter in calls_metadata[call]:
+                output_metadata[call].append(
+                    filter_outputs_from_workflow_metadata(
+                        scatter["subWorkflowMetadata"]
+                    )
+                )
+        else:
+            output_metadata[call] = []
+            for index in index_list:
+                output_metadata[call].append(index.get(extract_task_key))
+
+    check_for_empty_output(output_metadata, workflow_metadata["id"])
+
+    return output_metadata
+
+
+def print_task_level_outputs(output_metadata: dict) -> None:
+    """Print the outputs from the workflow metadata
+    output_metadata: {call_name:[index1{output_name: outputvalue}, index2{...}, ...], call_name:[], ...}
+
+    Args:
+        output_metadata (dict): The output metadata from the workflow
+    """
+    for call, index_list in output_metadata.items():
+        print(call)
+        for call_index in index_list:
+            if call_index is not None:
+                print_file_like_value_in_dict(outputs_metadata=call_index, indent=True)
+
+
+def print_file_like_value_in_dict(outputs_metadata: dict, indent: bool) -> None:
+    """Print the file like values in the output metadata dictionary
+
+    Args:
+        outputs_metadata (dict): The output metadata
+        indent (bool): Whether to indent the output
+    """
+
+    for output_name, output_value in outputs_metadata.items():
+        if isinstance(output_value, str):
+            print_output_name_and_file(output_name, output_value, indent=indent)
+        elif isinstance(output_value, list):
+            for output_value_item in output_value:
+                print_output_name_and_file(
+                    output_name, output_value_item, indent=indent
+                )
+
+
+def print_output_name_and_file(
+    output_name: str, output_value: str, indent: bool = True
+) -> None:
+    """Print the task name and the file name
+
+    Args:
+        output_name (str): The task output name
+        output_value (str): The task output value
+        indent (bool): Whether to indent the output"""
+
+    i = "\t" if indent else ""
+
+    if isinstance(output_value, str):
+        if is_path_or_url_like(output_value):
+            print(f"{i}{output_name}: {output_value}")
+
+
+def is_path_or_url_like(in_string: str) -> bool:
+    """Check if the string is a path or url
+
+    Args:
+        in_string (str): The string to check for path or url like-ness
+    """
+    if (
+        in_string.startswith("gs://")
+        or in_string.startswith("/")
+        or in_string.startswith("http://")
+        or in_string.startswith("https://")
+    ):
+        return True
+    else:
+        return False
+
+
+def check_for_empty_output(workflow_outputs: dict, workflow_id: str) -> None:
+    """Check if the workflow outputs are empty
+
+    Args:
+        cromwell_outputs (dict): Dictionary of workflow outputs
+        :param workflow_id: The workflow id
+    """
+    if not workflow_outputs:
+        LOGGER.error(f"No outputs found for workflow: {workflow_id}")
+        raise Exception(f"No outputs found for workflow: {workflow_id}")
diff --git a/src/cromshell/utilities/command_setup_utils.py b/src/cromshell/utilities/command_setup_utils.py
@@ -17,5 +17,17 @@ def resolve_workflow_id_and_server(workflow_id: str, cromshell_config) -> str:
     http_utils.set_and_check_cromwell_server(
         config=cromshell_config, workflow_id=resolved_workflow_id
     )
+    set_workflow_id(workflow_id=resolved_workflow_id, cromshell_config=cromshell_config)
 
     return resolved_workflow_id
+
+
+def set_workflow_id(workflow_id: str, cromshell_config) -> None:
+    """
+    Sets the workflow id in the config object
+
+    :param workflow_id: workflow UUID
+    :param cromshell_config:
+    :return: None
+    """
+    cromshell_config.workflow_id = workflow_id
diff --git a/src/cromshell/utilities/cromshellconfig.py b/src/cromshell/utilities/cromshellconfig.py
@@ -27,6 +27,7 @@
 ]
 CROMWELL_API_STRING = "/api/workflows/v1"
 WOMTOOL_API_STRING = "/api/womtool/v1"
+workflow_id = None
 # Concatenate the cromwell url, api string, and workflow ID. Set in subcommand.
 cromwell_api_workflow_id = None
 # Defaults for variables will be set after functions have been defined

diff --git a/tests/integration/test_list_outputs.py b/tests/integration/test_list_outputs.py
@@ -0,0 +1,99 @@
+from pathlib import Path
+
+import pytest
+
+from tests.integration import utility_test_functions
+
+workflows_path = Path(__file__).parents[1].joinpath("workflows/")
+
+
+class TestListOutputs:
+    @pytest.mark.parametrize(
+        "wdl, json_file, options, output_template",
+        [
+            (
+                "tests/workflows/helloWorld.wdl",
+                "tests/workflows/helloWorld.json",
+                None,
+                [
+                    "HelloWorld.output_file: /cromwell-executions/HelloWorld/<workflow-id>/call-HelloWorldTask/execution/stdout",
+                    "",
+                ],
+            ),
+            (
+                "tests/workflows/helloWorld.wdl",
+                "tests/workflows/helloWorld.json",
+                ["-d"],
+                [
+                    "HelloWorld.HelloWorldTask",
+                    "\toutput_file: /cromwell-executions/HelloWorld/<workflow-id>/call-HelloWorldTask/execution/stdout",
+                    "",
+                ],
+            ),
+            (
+                "tests/workflows/helloWorld.wdl",
+                "tests/workflows/helloWorld.json",
+                ["-j"],
+                [
+                    "{",
+                    '    "HelloWorld.output_file": "/cromwell-executions/HelloWorld/<workflow-id>/call-HelloWorldTask/execution/stdout"',
+                    "}",
+                    "",
+                ],
+            ),
+            (
+                "tests/workflows/helloWorld.wdl",
+                "tests/workflows/helloWorld.json",
+                ["-j", "-d"],
+                [
+                    "{",
+                    '    "HelloWorld.HelloWorldTask": [',
+                    "        {",
+                    '            "output_file": "/cromwell-executions/HelloWorld/<workflow-id>/call-HelloWorldTask/execution/stdout"',
+                    "        }",
+                    "    ]",
+                    "}",
+                    "",
+                ],
+            ),
+        ],
+    )
+    def test_list_outputs(
+        self,
+        local_cromwell_url: str,
+        wdl: str,
+        json_file: str,
+        options: list,
+        output_template: list,
+        ansi_escape,
+    ):
+        # submit workflow
+        test_workflow_id = utility_test_functions.submit_workflow(
+            local_cromwell_url=local_cromwell_url,
+            wdl=wdl,
+            json_file=json_file,
+            exit_code=0,
+        )
+
+        utility_test_functions.wait_for_workflow_completion(
+            test_workflow_id=test_workflow_id
+        )
+
+        # run list-outputs
+        status_result = utility_test_functions.run_cromshell_command(
+            command=["list-outputs", test_workflow_id],
+            exit_code=0,
+            subcommand_options=options,
+        )
+
+        status_result_per_line = status_result.stdout.split("\n")
+
+        workflow_outputs = [
+            sub.replace("<workflow-id>", test_workflow_id) for sub in output_template
+        ]
+
+        print("Print workflow list-outputs results:")
+        for line in status_result_per_line:
+            print(line)
+
+        assert status_result_per_line == workflow_outputs
diff --git a/tests/integration/utility_test_functions.py b/tests/integration/utility_test_functions.py
@@ -10,10 +10,13 @@
 from cromshell.utilities import cromshellconfig
 
 
-def run_cromshell_command(command: list, exit_code: int):
+def run_cromshell_command(
+    command: list, exit_code: int, subcommand_options: list = None
+):
     """
     Run cromshell alias using CliRunner and assert job is successful
 
+    :param subcommand_options: The options to pass to the subcommand
     :param command: The subcommand, options, and arguments in list form e.g.
     [
         "alias",
@@ -25,12 +28,18 @@ def run_cromshell_command(command: list, exit_code: int):
     :return: results from execution
     """
 
+    if subcommand_options:
+        command_with_options = command[:1] + subcommand_options + command[1:]
+    else:
+        command_with_options = command
+
     runner = CliRunner(mix_stderr=False)
     # The absolute path will be passed to the invoke command because
     # the test is being run in temp directory created by CliRunner.
     with runner.isolated_filesystem():
-        result = runner.invoke(cromshell, command)
+        result = runner.invoke(cromshell, command_with_options)
         assert result.exit_code == exit_code, (
+            f"\nCOMMAND:\n{command_with_options}"
             f"\nSTDOUT:\n{result.stdout}"
             f"\nSTDERR:\n{result.stderr}"
             f"\nExceptions:\n{result.exception}"