Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Detect and flatten nested WDL directories #268

Merged
merged 7 commits into from
Sep 27, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions src/cromshell/submit/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
import logging
from datetime import datetime
from pathlib import Path, PurePath
import tempfile

import os

import click
import requests
Expand Down Expand Up @@ -65,6 +68,11 @@ def main(config, wdl, wdl_json, options_json, dependencies_zip, no_validation):

http_utils.assert_can_communicate_with_server(config=config)

if io_utils.has_nested_dependencies(wdl):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be a good idea to have the option to disable this feature.
Also Logger.debug message saying this is taking place within the if statement

tempdir = tempfile.TemporaryDirectory(prefix='cromshell_')
wdl = io_utils.flatten_nested_dependencies(tempdir, wdl)
dependencies_zip = tempdir.name

if no_validation:
LOGGER.info("Skipping WDL validation")
else:
Expand Down
57 changes: 56 additions & 1 deletion src/cromshell/utilities/io_utils.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import json
import logging
import re
import os
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If possible use avoid using os package and use pathlib.Path library for file/directly handling. The pathlib module is a newer module in Python that provides a more object-oriented way of handling file paths. This makes it more intuitive and easier to use, and it also provides a number of features that are not available in the os module.

import shutil
from contextlib import nullcontext
from io import BytesIO
from pathlib import Path
from typing import BinaryIO, List, Union
import tempfile
from typing import BinaryIO, List, Union, Tuple
from zipfile import ZIP_DEFLATED, ZipFile

from pygments import formatters, highlight, lexers
Expand Down Expand Up @@ -101,6 +103,59 @@ def assert_path_is_not_empty(path: Union[str, Path], description: str) -> None:
raise EOFError(f"ERROR: {description} is empty: {path}.")


def has_nested_dependencies(wdl_path: str or Path) -> bool:
"""Determine if a WDL has any nested imports."""

with open(wdl_path, 'r') as rf:
for l in rf:
if l.startswith('import'):
m = re.match(r'import "(.+)"', l)

if "../" in m.group(1):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

An edge case that this function would miss would be nested wdl that uses absolute paths for their imports. Highly unlikely though as I haven't seen wdls like that often or at all.

return True

return False


def get_flattened_filename(tempdir: tempfile.TemporaryDirectory, wdl_path: str or Path):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add the output type being returned by the function

"""Generate the filename to use for flattened WDL files."""
kvg marked this conversation as resolved.
Show resolved Hide resolved

return os.path.join(
tempdir.name,
re.sub("^-", "", re.sub(os.path.sep, "-", os.path.dirname(wdl_path))) + '-' + os.path.basename(wdl_path)
)


def flatten_nested_dependencies(tempdir: tempfile.TemporaryDirectory, wdl_path: str or Path) -> str:
"""Flatten a WDL directory structure and rewrite imports accordingly.

Return string representing the filesystem location of the rewritten WDL.
"""

wdl_dir = os.path.dirname(wdl_path)

new_wdl_path = get_flattened_filename(tempdir, wdl_path)

with open(wdl_path, 'r') as rf, open(new_wdl_path, 'w') as wf:
for l in rf:
if l.startswith('import'):
m = re.match(r'import "(.+)"', l)

kvg marked this conversation as resolved.
Show resolved Hide resolved
imported_wdl_path = os.path.abspath(os.path.join(wdl_dir, m.group(1)))
import_line = re.sub(m.group(1), os.path.basename(get_flattened_filename(tempdir, imported_wdl_path)), l)

if ' as ' in l:
wf.write(import_line)
else:
wf.write(f'{import_line.strip()} as {re.sub(".wdl", "", os.path.basename(imported_wdl_path))}\n')

flatten_nested_dependencies(tempdir, imported_wdl_path)
else:
wf.write(l)

return new_wdl_path


def open_or_zip(path: Union[str, Path, None]) -> Union[nullcontext, BytesIO, BinaryIO]:
"""Return a context that may be used for reading the contents from the path.

Expand Down