-
Notifications
You must be signed in to change notification settings - Fork 15
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Detect and flatten nested WDL directories #268
Changes from 2 commits
314b181
9e52991
fc02db4
c4260c6
61a6d71
7c27e80
d224473
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,11 +1,13 @@ | ||
import json | ||
import logging | ||
import re | ||
import os | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If possible use avoid using os package and use pathlib.Path library for file/directly handling. The pathlib module is a newer module in Python that provides a more object-oriented way of handling file paths. This makes it more intuitive and easier to use, and it also provides a number of features that are not available in the os module. |
||
import shutil | ||
from contextlib import nullcontext | ||
from io import BytesIO | ||
from pathlib import Path | ||
from typing import BinaryIO, List, Union | ||
import tempfile | ||
from typing import BinaryIO, List, Union, Tuple | ||
from zipfile import ZIP_DEFLATED, ZipFile | ||
|
||
from pygments import formatters, highlight, lexers | ||
|
@@ -101,6 +103,59 @@ def assert_path_is_not_empty(path: Union[str, Path], description: str) -> None: | |
raise EOFError(f"ERROR: {description} is empty: {path}.") | ||
|
||
|
||
def has_nested_dependencies(wdl_path: str or Path) -> bool: | ||
"""Determine if a WDL has any nested imports.""" | ||
|
||
with open(wdl_path, 'r') as rf: | ||
for l in rf: | ||
if l.startswith('import'): | ||
m = re.match(r'import "(.+)"', l) | ||
|
||
if "../" in m.group(1): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. An edge case that this function would miss would be nested wdl that uses absolute paths for their imports. Highly unlikely though as I haven't seen wdls like that often or at all. |
||
return True | ||
|
||
return False | ||
|
||
|
||
def get_flattened_filename(tempdir: tempfile.TemporaryDirectory, wdl_path: str or Path): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add the output type being returned by the function |
||
"""Generate the filename to use for flattened WDL files.""" | ||
kvg marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
return os.path.join( | ||
tempdir.name, | ||
re.sub("^-", "", re.sub(os.path.sep, "-", os.path.dirname(wdl_path))) + '-' + os.path.basename(wdl_path) | ||
) | ||
|
||
|
||
def flatten_nested_dependencies(tempdir: tempfile.TemporaryDirectory, wdl_path: str or Path) -> str: | ||
"""Flatten a WDL directory structure and rewrite imports accordingly. | ||
|
||
Return string representing the filesystem location of the rewritten WDL. | ||
""" | ||
|
||
wdl_dir = os.path.dirname(wdl_path) | ||
|
||
new_wdl_path = get_flattened_filename(tempdir, wdl_path) | ||
|
||
with open(wdl_path, 'r') as rf, open(new_wdl_path, 'w') as wf: | ||
for l in rf: | ||
if l.startswith('import'): | ||
m = re.match(r'import "(.+)"', l) | ||
|
||
kvg marked this conversation as resolved.
Show resolved
Hide resolved
|
||
imported_wdl_path = os.path.abspath(os.path.join(wdl_dir, m.group(1))) | ||
import_line = re.sub(m.group(1), os.path.basename(get_flattened_filename(tempdir, imported_wdl_path)), l) | ||
|
||
if ' as ' in l: | ||
wf.write(import_line) | ||
else: | ||
wf.write(f'{import_line.strip()} as {re.sub(".wdl", "", os.path.basename(imported_wdl_path))}\n') | ||
|
||
flatten_nested_dependencies(tempdir, imported_wdl_path) | ||
else: | ||
wf.write(l) | ||
|
||
return new_wdl_path | ||
|
||
|
||
def open_or_zip(path: Union[str, Path, None]) -> Union[nullcontext, BytesIO, BinaryIO]: | ||
"""Return a context that may be used for reading the contents from the path. | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It would be a good idea to have the option to disable this feature.
Also Logger.debug message saying this is taking place within the if statement