diff --git a/docs/_static/css/typealiases.css b/docs/_static/css/typealiases.css index d679b2a4..a888a383 100644 --- a/docs/_static/css/typealiases.css +++ b/docs/_static/css/typealiases.css @@ -1,3 +1,4 @@ +#snakebids\.paths\.specs\.BidsPathSpec, #snakebids\.types\.InputsConfig, #snakebids\.types\.ZipList, #snakebids\.types\.ZipListLike { diff --git a/docs/api/main.rst b/docs/api/main.rst index 2bc64c2c..8cc62ac9 100644 --- a/docs/api/main.rst +++ b/docs/api/main.rst @@ -1,63 +1,65 @@ .. _main api: + ================ API ================ +.. py:currentmodule:: snakebids -snakebids ---------- - -.. autoclass:: snakebids.BidsComponent - :members: - :exclude-members: input_wildcards, input_lists, input_name, input_path, input_zip_lists - :inherited-members: - -.. dropdown:: Legacy ``BidsComponents`` properties - :icon: info - :class-title: sd-outline-info - - The following properties are historical aliases of ``BidsComponents`` properties. There are no current plans to deprecate them, but new code should avoid them. +.. toctree:: + :hidden: - .. autoproperty:: snakebids.BidsComponent.input_zip_lists + paths + creation + manipulation + structures - .. autoproperty:: snakebids.BidsComponent.input_wildcards - .. autoproperty:: snakebids.BidsComponent.input_name +Path Creation +------------- - .. autoproperty:: snakebids.BidsComponent.input_path +.. Need to manually create this table because bids does not have a proper docstring - .. autoproperty:: snakebids.BidsComponent.input_lists +.. =================================== ================================ +.. :func:`bids ` Generate bids or bids-like paths +.. :func:`bids_factory ` Create new :func:`bids` functions according to a spec +.. =================================== ================================ +.. autosummary:: + :nosignatures: -.. autoclass:: snakebids.BidsPartialComponent + bids + bids_factory -.. autoclass:: snakebids.BidsComponentRow - :members: - :exclude-members: zip_lists +Dataset Creation +---------------- -.. autoclass:: snakebids.BidsDataset - :members: - :exclude-members: input_wildcards, input_lists, input_path, input_zip_lists +.. autosummary:: -.. dropdown:: Legacy ``BidsDataset`` properties - :icon: info - :class-title: sd-outline-info + generate_inputs - The following properties are historical aliases of :class:`~snakebids.BidsDataset` properties. There are no current plans to deprecate them, but new code should avoid them. +Dataset Manipulation +-------------------- - .. autoproperty:: snakebids.BidsDataset.input_zip_lists +.. autosummary:: + :nosignatures: - .. autoproperty:: snakebids.BidsDataset.input_wildcards + filter_list + get_filtered_ziplist_index - .. autoproperty:: snakebids.BidsDataset.input_path +Data Structures +--------------- - .. autoproperty:: snakebids.BidsDataset.input_lists +.. autosummary:: + :nosignatures: + BidsComponent + BidsPartialComponent + BidsComponentRow + BidsDataset + BidsDatasetDict -.. automodule:: snakebids - :exclude-members: from_bids_lists, BidsComponent, BidsPartialComponent, BidsComponentRow, BidsDataset - :members: app diff --git a/docs/bids_app/overview.md b/docs/bids_app/overview.md index 11a6fb0f..6e20392e 100644 --- a/docs/bids_app/overview.md +++ b/docs/bids_app/overview.md @@ -1,4 +1,12 @@ -# Overview +# Bids Apps + +```{toctree} +:hidden: + +config +workflow +plugins +``` Snakebids apps rely on a configuration file (`snakebids.yml`). This file specifies which files from a BIDS dataset should be used as input. The apps also utilize workflow definitions, which are written in one or more Snakefile(s) and determine how the input files are processed. diff --git a/docs/bids_function/overview.rst b/docs/bids_function/overview.rst index 6c593159..84ff55af 100644 --- a/docs/bids_function/overview.rst +++ b/docs/bids_function/overview.rst @@ -1,5 +1,5 @@ -Overview -======== +Bids Function +============= The ``bids`` function generates a BIDS-like filepath corresponding to its keyword arguments. The generated filepath has the form:: diff --git a/docs/conf.py b/docs/conf.py index 8ee6d312..380f1239 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -36,6 +36,7 @@ "sphinxarg.ext", "sphinx.ext.intersphinx", "sphinx.ext.napoleon", + "sphinx.ext.autosummary", "sphinx.ext.autodoc", "sphinxcontrib.asciinema", "myst_parser", @@ -69,7 +70,8 @@ "InputsConfig": "snakebids.types.InputsConfig", } autodoc_typehints_format = "short" -autosummary_imported_members = True +# autosummary_imported_members = True +autosummary_generate = True # Add any paths that contain templates here, relative to this directory. diff --git a/docs/index.md b/docs/index.md index be777ec9..1d2ae2fb 100644 --- a/docs/index.md +++ b/docs/index.md @@ -7,67 +7,30 @@ Snakebids is migrating to a more robust, extensible API! If you're coming from p ```{toctree} -:caption: General -:name: general +:caption: User Guide +:name: guide :hidden: -:maxdepth: 2 +:maxdepth: 1 general/why_snakebids -``` - -```{toctree} -:caption: Tutorial -:name: tutorial -:hidden: -:maxdepth: 2 - tutorial/tutorial -``` - -```{toctree} -:caption: Using the bids function -:name: bids_function -:hidden: -:maxdepth: 2 - bids_function/overview -``` - -```{toctree} -:caption: Creating a bids app -:name: bids_app -:hidden: -:maxdepth: 2 bids_app/overview -bids_app/config -bids_app/workflow -bids_app/plugins -``` - -```{toctree} -:caption: Running a snakebids app -:name: running_snakebids -:hidden: -:maxdepth: 2 running_snakebids/overview + +migration/index ``` -```{toctree} -:caption: Migration -:name: migration -:hidden: -:maxdepth: 2 -migration/0.5_to_0.8 -migration/0.7_to_0.8 -``` + + ```{toctree} :caption: Reference :hidden: -:maxdepth: 1 +:maxdepth: 2 api/main api/internals diff --git a/docs/running_snakebids/overview.md b/docs/running_snakebids/overview.md index 95fa5132..27bec2a0 100644 --- a/docs/running_snakebids/overview.md +++ b/docs/running_snakebids/overview.md @@ -1,5 +1,4 @@ -Overview -======== +# Running Snakebids Once you've specified a snakebids app with a config file and one or more workflow files, you're ready to invoke your snakebids app with the standard BIDS app CLI. @@ -12,15 +11,14 @@ Note that if any rules in the Snakebids workflow use Singularity containers, spe 1. Inputs are copied into a working subdirectory of the output directory before any processing that requires a Singularity container is performed, or: 2. The `SINGULARITY_BINDPATH` environment variable binds the location of the input dataset. -Indexing of large datasets can be a time-consuming process. Leveraging the functionality of `PyBIDS`, Snakebids offers a convenient solution by allowing you to create or utilize an existing database. With this approach, the indexing of datasets is only performed when explictly requested, typically when there are changes to the dataset. To create or use an existing database, you can invoke the following CLI arguments: +Indexing of large datasets can be a time-consuming process. Leveraging the functionality of `PyBIDS`, Snakebids offers a convenient solution by allowing you to create or utilize an existing database. With this approach, the indexing of datasets is only performed when explictly requested, typically when there are changes to the dataset. To create or use an existing database, you can invoke the following CLI arguments: 1. `--pybidsdb-dir {dir}`: specify the path to the database directory 1. `--pybidsdb-reset`: indicate that an existing database should be updated The boilerplate app starts with the validator plugin enabled - without it, validation is not performed. By default, this feature uses the command-line (node.js) version of the [validator](https://www.npmjs.com/package/bids-validator). If this is not found to be installed on the system, the `pybids` version of validation will be performed instead. To opt-out of validation, invoke the `--skip-bids-validation` flag. Details related to using and creating plugins can be found on the [plugins](/bids_app/plugins) page. -Workflow mode -============= +## Workflow mode Snakebids apps use a BIDS app CLI, giving great flexibility when switching datasets. However, when developing a Snakebids app or when running the app repeatedly on the same dataset, it can be more convenient to directly call the Snakemake CLI. Snakebids facilitates this using workflow mode. diff --git a/docs/tutorial/tutorial.md b/docs/tutorial/tutorial.md index 5bca6bef..76a34693 100644 --- a/docs/tutorial/tutorial.md +++ b/docs/tutorial/tutorial.md @@ -1,8 +1,10 @@ # Tutorial + % links [expand_func]: inv:snakemake:std:label#snakefiles_expand +(tutorial_getting_started)= ## Getting started In this example we will make a workflow to smooth ``bold`` scans from a bids dataset. @@ -62,11 +64,11 @@ $ cp -r snakebids/docs/tutorial/bids ./data It's also perfectly possible (and probably better!) to try the tutorial on your own dataset. Just adjust any paths below so that they match your data! -# Part I: Snakemake +(snakemake_tutorial)= +## Part I: Snakemake (step_0)= - -## Step 0: a basic non-generic workflow +### Step 0: a basic non-generic workflow In this rule, we start by creating a rule that is effectively hard-coding the paths for input and output to re-create the command as above. @@ -89,8 +91,7 @@ When we invoke ``snakemake``, it uses the first rule in the snakefile as the ``t So far, we just have a fancy way of specifying the exact same command we started with, so there is no added benefit (yet). But we will soon add to this rule to make it more generalizable. (step_1)= - -## Step 1: adding wildcards +### Step 1: adding wildcards First step to make the workflow generalizeable is to replace the hard-coded identifiers (e.g. the subject, task and run) with wildcards. @@ -127,8 +128,7 @@ Now, try changing the output smoothing value, e.g. ``fwhm-10mm``, and see what h As expected the command still uses a smoothing value of 2.12, since that has been hard-coded, but we will see how to rectify this in the next step. (step_2)= - -## Step 2: adding a params function +### Step 2: adding a params function As we noted, the sigma parameter needs to be computed from the FWHM. We can use a function to do this. Functions can be used for any ``input`` or ``params``, and must take ``wildcards`` as an input argument, which provides a mechanism to pass the wildcards (determined from the output file) to the function. @@ -169,8 +169,7 @@ Now try running the workflow again, with `fwhm-5` as well as `fwhm-10`. ``` (step_3)= - -## Step 3: adding a target rule +### Step 3: adding a target rule Now we have a generic rule, but it is pretty tedious to have to type out the filename of each target from the command-line in order to use it. @@ -206,8 +205,7 @@ The entire Snakefile for reference is: ``` (step_4)= - -## Step 4: adding a config file +### Step 4: adding a config file We have a functional workflow, but suppose you need to configure or run it on another bids dataset with different subjects, tasks, runs, or you want to run it for different smoothing values. You have to actually modify your workflow in order to do this. @@ -247,13 +245,14 @@ After these changes, the workflow should still run just like the last step, but ``` -# Part II: Snakebids + +(snakebids_tutorial)= +## Part II: Snakebids Now that we have a fully functioning and generic Snakemake workflow, let's see what Snakebids can add. (step_5)= - -## Step 5: the bids() function +### Step 5: the bids() function The first thing we can make use of is the {func}`~snakebids.bids` function. This provides an easy way to generate bids filenames. This is especially useful when defining output files in your workflow and you have many bids entities. @@ -295,7 +294,8 @@ The Snakefile with the output filename replaced (in both rules) is below: :caption: Snakefile ``` -## Step 6: parsing the BIDS dataset + +### Step 6: parsing the BIDS dataset So far, we have had to manually enter the path to input bold file in the config file, and also specify what subjects, tasks, and runs we want processed. Can't we use the fact that we have a BIDS dataset to automate this a bit more? @@ -381,7 +381,8 @@ Notice that `inputs['bold'].path`{l=python} is the same as the path we wrote und :emphasize-lines: 3 ``` -## Step 7: using input wildcards + +### Step 7: using input wildcards {attr}`BidsComponent.path ` already grants us a lot of flexibility, but we can still do more! In addition to the three main attributes of {class}`BidsComponents ` already described, the class offers a number of special properties we can use in our workflows. First, we'll look at {attr}`BidsComponent.wildcards `. This is a dict that maps each entity to the brace-wrapped `{wildcards}` we specified in `pybids_config`. If you printed this value in our test workflow, it would look like this: @@ -449,7 +450,7 @@ For reference, here is the updated config file and Snakefile after these changes :caption: Snakefile ``` -## Step 8: creating a command-line executable +### Step 8: creating a command-line executable Now that we have pybids parsing to dynamically configure our workflow inputs based on our BIDS dataset, we are ready to turn our workflow into a [BIDS App](http://bids-apps.neuroimaging.io/). BIDS Apps are command-line apps with a standardized interface (e.g. three required positional arguments: ``bids_directory``, ``output_directory``, and ``analysis_level``). diff --git a/poetry.lock b/poetry.lock index 16392c7d..b7eb6021 100644 --- a/poetry.lock +++ b/poetry.lock @@ -485,6 +485,17 @@ files = [ {file = "docopt-0.6.2.tar.gz", hash = "sha256:49b3a825280bd66b3aa83585ef59c4a8c82f2c8a522dbe754a8bc8d08c85c491"}, ] +[[package]] +name = "docstring-parser" +version = "0.15" +description = "Parse Python docstrings in reST, Google and Numpydoc format" +optional = false +python-versions = ">=3.6,<4.0" +files = [ + {file = "docstring_parser-0.15-py3-none-any.whl", hash = "sha256:d1679b86250d269d06a99670924d6bce45adc00b08069dae8c47d98e89b667a9"}, + {file = "docstring_parser-0.15.tar.gz", hash = "sha256:48ddc093e8b1865899956fcc03b03e66bb7240c310fac5af81814580c55bf682"}, +] + [[package]] name = "docutils" version = "0.20.1" @@ -3485,4 +3496,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = ">=3.8,<4.0" -content-hash = "d76eccae2cf6ba64eede6899750efa2e1ce4ce59c693d897a8143ef26d291592" +content-hash = "917b4aeea83990e31463b42e8d835a165b5af522e32b6dc9bc749b62fc7f59d8" diff --git a/pyproject.toml b/pyproject.toml index 8613866e..f4e4a9c9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -94,6 +94,7 @@ pytest-split = "^0.8.1" tomli = "^2.0.1" requests-mock = "^1.11.0" pytest-cov = "^4.1.0" +docstring-parser = "^0.15" [tool.poetry.group.docs.dependencies] @@ -129,6 +130,7 @@ ruff check --fix --select I001 snakebids """ benchmark = "pytest --benchmark-only --benchmark-autosave" docs = "sphinx-autobuild docs build/docs --watch snakebids -W" +update_bids.script = "scripts.update_bids:main" [tool.poe.tasks._get_version] imports = ["platform"] diff --git a/scripts/__init__.py b/scripts/__init__.py new file mode 100644 index 00000000..8a1dee64 --- /dev/null +++ b/scripts/__init__.py @@ -0,0 +1,2 @@ +# type: ignore +__submodules__ = [] diff --git a/scripts/update_bids.py b/scripts/update_bids.py new file mode 100644 index 00000000..30c0867c --- /dev/null +++ b/scripts/update_bids.py @@ -0,0 +1,149 @@ +"""Recompile the bids function stub file based on latest specs""" +from __future__ import annotations + +import inspect +import itertools as it +import re +from pathlib import Path +from types import ModuleType +from typing import Iterable + +import black + +from snakebids.paths import presets, specs +from snakebids.paths._templates import bids_func, spec_func +from snakebids.paths.utils import BidsPathSpecFile, get_specs + + +def generate_stub(mod: ModuleType, imports: list[str], funcs: Iterable[str]): + prelude = [ + "# This stub file is automatically generated", + "# It can be updated using::", + "#", + "# poetry run poe update_bids", + "", + ] + assert mod.__file__ + # pyi = "\n".join(it.chain(prelude, imports, funcs)) + # print(pyi) + # print(black.format_str( + # pyi, + # mode=black.Mode(is_pyi=True), + # )) + with Path(mod.__file__).with_suffix(".pyi").open("w") as f_: + f_.write( + black.format_str( + "\n".join(it.chain(prelude, imports, funcs)), + mode=black.Mode(is_pyi=True), + ) + ) + + +SOURCE_TEMPLATE = """ +# +# The code between these tags is automatically generated. Do not +# manually edit +# To update, run:: +# +# poetry run poe update_bids +# + +if not TYPE_CHECKING: + __all__ = [ # noqa:F822 + {all} + ] + + def __dir__(): + return __all__ + +{statements} +# +""" + + +def update_source( + mod: ModuleType, all_items: Iterable[str], statements: list[str] | None = None +): + source = inspect.getsource(mod) + all_formatted = ",".join(f'"{item}"' for item in all_items) + compiled = re.compile( + r"#\s?(?:.*\n)+#\s?<\/AUTOUPDATE>", flags=re.MULTILINE + ) + if not compiled.search(source): + err = f"Could not find an existing block in " f"{mod.__name__}" + raise ValueError(err) + replaced = black.format_str( + compiled.sub( + SOURCE_TEMPLATE.strip().format( + all=all_formatted, statements="\n".join(statements or []) + ), + source, + ), + mode=black.Mode(), + ) + with open(inspect.getfile(mod), "w") as f_: + f_.write(replaced) + + +def presets_stub(versions: Iterable[str], latest: str): + for member in versions: + yield bids_func.format_pyi( + spec=f"_{member.replace('.', '_')}", spec_label=member + ) + + yield bids_func.format_pyi( + spec="", + spec_label="latest", + spec_clarify=f" (currently pointing to '{latest}')", + ) + + +def spec_stub(versions: Iterable[BidsPathSpecFile], latest: BidsPathSpecFile): + for version in versions: + yield spec_func.format_pyi(version) + + yield spec_func.format_pyi(latest) + + +def get_latest(versions: Iterable[BidsPathSpecFile]) -> tuple[str, BidsPathSpecFile]: + all_versions: dict[str, BidsPathSpecFile] = {} + + for version in versions: + all_versions[version["version"]] = version + + version = sorted(all_versions, key=lambda v: tuple(v.split(".")))[-1] + return version.replace(".", "_"), { + **all_versions[version], + "version": "latest", + } + + +def main(): + all_specs = list(get_specs()) + latest_version, latest_spec = get_latest(all_specs) + generate_stub( + specs, + ["from .utils import BidsPathSpec", "LATEST: str"], + spec_stub(all_specs, latest_spec), + ) + generate_stub( + presets, + ["from pathlib import Path"], + presets_stub((spec["version"] for spec in all_specs), latest_version), + ) + + versions = [spec["version"].replace(".", "_") for spec in all_specs] + update_source( + presets, it.chain((f"bids_{version}" for version in versions), ("bids",)) + ) + + spec_list = ",".join(f'"{v}"' for v in versions) + update_source( + specs, + it.chain((version for version in versions), ("latest", "LATEST")), + [f"_SPECS = [{spec_list}]", f'LATEST = "{latest_version}"'], + ) + + +if __name__ == "__main__": + main() diff --git a/snakebids/__init__.py b/snakebids/__init__.py index 937d00a2..d317d71b 100644 --- a/snakebids/__init__.py +++ b/snakebids/__init__.py @@ -16,6 +16,7 @@ "BidsDatasetDict", "BidsPartialComponent", "bids", + "bids_v0_0_0", "filter_list", "generate_inputs", "get_filtered_ziplist_index", diff --git a/snakebids/__init__.pyi b/snakebids/__init__.pyi index 37aa4831..e8439b5b 100644 --- a/snakebids/__init__.pyi +++ b/snakebids/__init__.pyi @@ -12,6 +12,7 @@ from .core import ( ) from .paths import ( bids, + bids_v0_0_0, ) __all__ = [ @@ -21,6 +22,7 @@ __all__ = [ "BidsDatasetDict", "BidsPartialComponent", "bids", + "bids_v0_0_0", "filter_list", "generate_inputs", "get_filtered_ziplist_index", diff --git a/snakebids/paths/__init__.py b/snakebids/paths/__init__.py index 23885b26..1df7a772 100644 --- a/snakebids/paths/__init__.py +++ b/snakebids/paths/__init__.py @@ -1,11 +1,10 @@ # type: ignore __submodules__ = ["presets"] - # import lazy_loader __getattr__, __dir__, __all__ = lazy_loader.attach_stub(__name__, __file__) -__all__ = ["bids"] +__all__ = ["bids", "bids_v0_0_0"] # diff --git a/snakebids/paths/__init__.pyi b/snakebids/paths/__init__.pyi index d0a42281..39ae2474 100644 --- a/snakebids/paths/__init__.pyi +++ b/snakebids/paths/__init__.pyi @@ -1,5 +1,8 @@ +# from .presets import ( bids, + bids_v0_0_0, ) -__all__ = ["bids"] +__all__ = ["bids", "bids_v0_0_0"] +# diff --git a/snakebids/paths/_templates/__init__.py b/snakebids/paths/_templates/__init__.py new file mode 100644 index 00000000..07cad78f --- /dev/null +++ b/snakebids/paths/_templates/__init__.py @@ -0,0 +1,10 @@ +# type: ignore +__submodules__ = [] + +# +import lazy_loader + +__getattr__, __dir__, __all__ = lazy_loader.attach_stub(__name__, __file__) + +__all__ = [] +# diff --git a/snakebids/paths/_templates/__init__.pyi b/snakebids/paths/_templates/__init__.pyi new file mode 100644 index 00000000..a9a2c5b3 --- /dev/null +++ b/snakebids/paths/_templates/__init__.pyi @@ -0,0 +1 @@ +__all__ = [] diff --git a/snakebids/paths/_templates/bids_func.py b/snakebids/paths/_templates/bids_func.py new file mode 100644 index 00000000..b676030d --- /dev/null +++ b/snakebids/paths/_templates/bids_func.py @@ -0,0 +1,65 @@ +import textwrap + +import docstring_parser as docstr + +TEMPLATE = ''' +def bids{spec}( + root: str | Path | None = None, + *, + datatype: str | None = None, + prefix: str | None = None, + suffix: str | None = None, + extension: str | None = None, + **entities: str | bool, +) -> str: + """{docstring} + """ + ... +''' +DOCSTRING = """Generate bids or bids-like paths + +Path is compiled based on the '{spec_label}' spec{spec_clarify}, with the +general form:: + + [root]/[sub-{{subject}}]/[ses-{{session}}]/ + [prefix]_[sub-{{subject}}]_[ses-{{session}}]_[{{key}}-{{val}}_ ... ]_[suffix] + +If no arguments are specified, an empty string will be returned. + + +Parameters +---------- + +root + Root folder to include in the path (e.g. ``results``) +datatype + Folder to include after sub-/ses- (e.g. ``anat``, ``dwi`` ) +prefix + String to prepend to the file name. Useful for injecting custom entities at + the front of the filename, e.g. ``tpl-{{tpl}}`` +suffix + Suffix plus, optionally, the extension (e.g. ``T1w.nii.gz``) +extension + bids extension, beginning with ``.`` (e.g. ``.nii.gz``). Typically + shouldn't be specified manually: extensions should be listed along with the + suffix. +entities + bids entities as keyword arguments paired with values (e.g. ``space="T1w"`` + for ``space-T1w``) + +""" + + +def format_pyi(spec: str, spec_label: str, spec_clarify: str = ""): + doc = docstr.parse( + DOCSTRING.format(spec_label=spec_label, spec_clarify=spec_clarify) + ) + if doc.long_description: + doc.long_description = "\n\n".join( + "\n".join(textwrap.wrap(para, 84)) if not para.startswith(" ") else para + for para in doc.long_description.split("\n\n") + ) + return TEMPLATE.format( + spec=spec, + docstring=docstr.compose(doc), + ) diff --git a/snakebids/paths/_templates/spec_func.py b/snakebids/paths/_templates/spec_func.py new file mode 100644 index 00000000..3b4013bf --- /dev/null +++ b/snakebids/paths/_templates/spec_func.py @@ -0,0 +1,100 @@ +from __future__ import annotations + +import textwrap +from typing import TYPE_CHECKING + +from snakebids.utils.utils import entity_to_wildcard + +if TYPE_CHECKING: + from snakebids.paths.utils import BidsPathSpec, BidsPathSpecFile + +TEMPLATE = ''' +def {spec}(subject_dir: bool = True, session_dir: bool = True) -> BidsPathSpec: + """{docstring} + """ + ... +''' +DOCSTRING = """{description} + +Parameters +---------- +subject_dir + If False, downstream path generator will not include the subject dir + `sub-{{subject}}/*` +session_dir : bool, optional + If False, downstream path generator will not include the session dir + `*/ses-{{session}}/*` + +""" + +DEFAULT_DESCRIPTION = """Bids Spec v{version} + +Supply this to snakebids.bids_factory to construct a corresponding bids function +""" + + +def _wrap_template(template: str, length: int): + def recurse(lines: list[str]) -> list[str]: + line = lines[-1] + if len(line) <= length: + return lines + i = line[length - 1 :: -1].index("_") + return recurse(lines[:-1] + [line[: length - i], " " + line[length - i :]]) + + return "\n".join(recurse([template])) + + +def compile_example(spec: BidsPathSpec): + # import within function to avoid circular import + from snakebids.paths.factory import bids_factory + + entities = [listing["entity"] for listing in spec] + standard_entities = ("prefix", "datatype", "suffix", "extension") + try: + wild = entities.index("*") + except ValueError: + wild = -2 + template = bids_factory(spec)( + **entity_to_wildcard(standard_entities), + **entity_to_wildcard(e for e in entities if e != "*"), + ) + search = f"{spec[wild+1].get('tag', entities[wild+1])}-{{{entities[wild+1]}}}_" + i = template.index(search) + if wild < 0: + i = len(search) + i + return _wrap_template(template[:i] + "..._" + template[i:], 80) + + +def format_doc(spec: BidsPathSpecFile): + try: + import docstring_parser as docstr + + if (description := spec.get("description")) is None: + description = DEFAULT_DESCRIPTION.format(version=spec["version"]) + + doc = docstr.parse(DOCSTRING.format(description=description.strip())) + if doc.long_description: + doc.long_description = ( + "\n\n".join( + "\n".join(textwrap.wrap(para, 84)) + if not para.startswith(" ") + else para + for para in doc.long_description.split("\n\n") + ) + + "\n\nFormatted as::\n\n " + + compile_example(spec["spec"]) + ) + + return docstr.compose(doc) + except ImportError: + return DOCSTRING.format( + description=DEFAULT_DESCRIPTION.format(version=spec["version"]).strip() + ) + ... + + +def format_pyi(spec: BidsPathSpecFile): + return TEMPLATE.format( + spec=spec["version"].replace(".", "_"), + docstring=format_doc(spec), + ) diff --git a/snakebids/paths/factory.py b/snakebids/paths/factory.py new file mode 100644 index 00000000..61d9730d --- /dev/null +++ b/snakebids/paths/factory.py @@ -0,0 +1,265 @@ +from __future__ import annotations + +import itertools as it +import os +import sys +from pathlib import Path +from typing import Protocol + +import more_itertools as itx + +from snakebids.paths.utils import BidsPathSpec, find_entity + + +class BidsFunction(Protocol): + """Signature for functions returned by ``bids_factory``. + + See :func:`bids` for more details + """ + + def __call__( # noqa: D102 + self, + root: str | Path | None = None, + *, + datatype: str | None = None, + prefix: str | None = None, + suffix: str | None = None, + extension: str | None = None, + **entities: str | bool, + ) -> str: + ... + + +def _get_entity_parser(aliases: dict[str, str]): + def parse_entities(entities: dict[str, str | bool]) -> dict[str, str]: + result: dict[str, str] = {} + for entity, val in entities.items(): + # strip underscores from keys (needed so that users can use reserved + # keywords by appending a _) + stripped = entity.rstrip("_") + unaliased = aliases.get(stripped, stripped) + if unaliased in result: + aliased = itx.nth(aliases, list(aliases.values()).index(unaliased)) + err = ( + "Long and short names of an entity cannot be used in the same " + f"call to bids(): got '{aliased}' and '{unaliased}'" + ) + raise ValueError(err) + result[unaliased] = str(val) + return result + + return parse_entities + + +def bids_factory(spec: BidsPathSpec, *, _v0_0_0: bool = False) -> BidsFunction: + """Generate bids functions according to the supplied spec. + + Parameters + ---------- + spec + Valid Bids Spec object + _v0_0_0 + Provides backward compatibility for the bids_v0_0_0 signature. Should not + otherwise be used + """ + order: list[str] = [] + dirs: set[str] = set() + aliases: dict[str, str] = {} + + if _v0_0_0: + subject_dir_default = find_entity(spec, "subject").get("dir", False) + session_dir_default = find_entity(spec, "session").get("dir", False) + else: + subject_dir_default = True + session_dir_default = True + for entry in spec: + tag = entry.get("tag", entry["entity"]) + order.append(tag) + aliases[entry["entity"]] = tag + if entry.get("dir"): + dirs.add(tag) + + parse_entities = _get_entity_parser(aliases) + + def bids( + root: str | Path | None = None, + *, + datatype: str | None = None, + prefix: str | None = None, + suffix: str | None = None, + extension: str | None = None, + **entities: str | bool, + ) -> str: + """Generate bids or bids-like paths. + + File path is of the form:: + + [root]/[sub-{subject}]/[ses-{session]/ + [prefix]_[sub-{subject}]_[ses-{session}]_[{key}-{val}_ ... ]_[suffix] + + If no arguments are specified, an empty string will be returned. + + Datatype and prefix may not be used in isolation, but must be given with + another entity. + + Bids functions are versioned for long-term stability. The latest version is + ````. Information on its spec can be found at + :func:`~snakebids.paths.specs.`. + + .. warning:: + + The plain function ``bids``, as in:: + + from snakebids import bids + + always points to the latest version. This is unsafe for production + environments, as the function may break without warning when snakebids is + updated, even on patch upgrades. Production code should always use a + versioned bids, such as:: + + from snakebids import bids_ as bids + + Parameters + ---------- + root + Root folder to include in the path (e.g. ``results``) + datatype + Folder to include after sub-/ses- (e.g. ``anat``, ``dwi`` ) + prefix + String to prepend to the file name. Useful for injecting custom entities at + the front of the filename, e.g. ``tpl-{tpl}`` + suffix + Suffix plus, optionally, the extension (e.g. ``T1w.nii.gz``) + extension + bids extension, beginning with ``.`` (e.g. ``.nii.gz``). Typically + shouldn't be specified manually: extensions should be listed along with the + suffix. + entities + bids entities as keyword arguments paired with values (e.g. ``space="T1w"`` + for ``space-T1w``) + + + Examples + -------- + Below is a rule using bids naming for input and output:: + + rule proc_img: + input: 'sub-{subject}_T1w.nii.gz' output: + 'sub-{subject}_space-snsx32_desc-preproc_T1w.nii.gz' + + With bids() you can instead use:: + + rule proc_img: input: bids(subject='{subject}',suffix='T1w.nii.gz') + output: bids( + subject='{subject}', space='snsx32', desc='preproc', + suffix='T1w.nii.gz' + ) + + Note that here we are not actually using "functions as inputs" in snakemake, + which would require a function definition with wildcards as the argument, and + restrict to input/params, but bids() is being used simply to return a string. + + Also note that space, desc and suffix are NOT wildcards here, only {subject} is. + This makes it easy to combine wildcards and non-wildcards with bids-like naming. + + However, you can still use bids() in a lambda function. This is especially + useful if your wildcards are named the same as bids entities (e.g. {subject}, + {session}, {task} etc..):: + + rule proc_img: + input: lambda wildcards: bids(**wildcards,suffix='T1w.nii.gz') output: + bids( + subject='{subject}', space='snsx32', desc='preproc', + suffix='T1w.nii.gz' + ) + + Or another example where you may have many bids-like wildcards used in your + workflow:: + + rule denoise_func: + input: lambda wildcards: bids(**wildcards, suffix='bold.nii.gz') output: + bids( + subject='{subject}', session='{session}', task='{task}', + acq='{acq}', desc='denoise', suffix='bold.nii.gz' + ) + + In this example, all the wildcards will be determined from the output and passed + on to bids() for inputs. The output filename will have a 'desc-denoise' flag + added to it. + + Also note that even if you supply entities in a different order, the entities + will be ordered based on the OrderedDict defined here. If entities not known are + provided, they will be just be placed at the end (before the suffix), in the + order you provide them in. + """ + if _v0_0_0: + from snakebids.paths.specs import v0_0_0 + + include_subject_dir = bool(entities.pop("include_subject_dir", True)) + include_session_dir = bool(entities.pop("include_session_dir", True)) + if ( + include_session_dir ^ session_dir_default + or include_subject_dir ^ subject_dir_default + ): + return bids_factory(v0_0_0(include_subject_dir, include_session_dir))( + root, + datatype=datatype, + prefix=prefix, + suffix=suffix, + extension=extension, + **entities, + ) + + if not any([entities, suffix, extension]) and any([datatype, prefix]): + raise ValueError( + "At least one of suffix, extension, or an entity must be " + "supplied.\n\tGot only: " + + " and ".join( + filter( + None, + ( + f"datatype='{datatype}'" if datatype else None, + f"prefix='{prefix}'" if prefix else None, + ), + ) + ) + ) + + parsed = parse_entities(entities) + + spec_parts: list[str] = [] + custom_parts: list[str] = [] + split: int = sys.maxsize + 1 + path_parts: list[str] = [] + + if root: + path_parts.append(str(root)) + if prefix: + spec_parts.append(prefix) + for entity in order: + # Check for `*` first so that if user specifies an entity called `*` we + # don't skip setting the split + if entity == "*": + split = len(path_parts) + elif value := parsed.pop(entity, None): + spec_parts.append(f"{entity}-{value}") + if entity in dirs: + path_parts.append(f"{entity}-{value}") + for key, value in parsed.items(): + custom_parts.append(f"{key}-{value}") + + if datatype: + path_parts.append(datatype) + path_parts.append( + "_".join(it.chain(spec_parts[:split], custom_parts, spec_parts[split:])) + ) + + tail = f"_{suffix}{extension or ''}" if suffix else extension or "" + + return os.path.join(*path_parts) + tail + + from snakebids.paths.specs import LATEST + + assert bids.__doc__ + bids.__doc__ = bids.__doc__.replace("", LATEST) + return bids diff --git a/snakebids/paths/presets.py b/snakebids/paths/presets.py index 19fc2e86..f68d1cd1 100644 --- a/snakebids/paths/presets.py +++ b/snakebids/paths/presets.py @@ -1,216 +1,37 @@ -"""Utilities for converting Snakemake apps to BIDS apps.""" from __future__ import annotations import functools as ft -import itertools as it -import os -import sys -from pathlib import Path +from typing import TYPE_CHECKING -import more_itertools as itx +from snakebids.paths import specs +from snakebids.paths.factory import bids_factory +from snakebids.paths.specs import latest -from snakebids.paths.specs import v0_0_0 +# +# The code between these tags is automatically generated. Do not +# manually edit +# To update, run:: +# +# poetry run poe update_bids +# +if not TYPE_CHECKING: + __all__ = ["bids_v0_0_0", "bids"] # noqa:F822 -@ft.lru_cache -def _parse_spec(include_subject_dir: bool, include_session_dir: bool): - spec = v0_0_0(subject_dir=include_subject_dir, session_dir=include_session_dir) - order: list[str] = [] - dirs: set[str] = set() - aliases: dict[str, str] = {} - - for entry in spec: - tag = entry.get("tag", entry["entity"]) - order.append(tag) - aliases[entry["entity"]] = tag - if entry.get("dir"): - dirs.add(tag) - - def parse_entities(entities: dict[str, str | bool]) -> dict[str, str]: - result: dict[str, str] = {} - for entity, val in entities.items(): - # strip underscores from keys (needed so that users can use reserved - # keywords by appending a _) - stripped = entity.rstrip("_") - unaliased = aliases.get(stripped, stripped) - if unaliased in result: - aliased = itx.nth(aliases, list(aliases.values()).index(unaliased)) - err = ( - "Long and short names of an entity cannot be used in the same " - f"call to bids(): got '{aliased}' and '{unaliased}'" - ) - raise ValueError(err) - result[unaliased] = str(val) - return result - - return order, dirs, parse_entities - - -def bids( - root: str | Path | None = None, - datatype: str | None = None, - prefix: str | None = None, - suffix: str | None = None, - extension: str | None = None, - **entities: str | bool, -) -> str: - """Generate bids paths based on provided entities according to standardized schema. - - File path is of the form:: - - [root]/[sub-{subject}]/[ses-{session]/ - [prefix]_[sub-{subject}]_[ses-{session}]_[{key}-{val}_ ... ]_[suffix] - - Parameters - ---------- - root : str or Path, default=None - root folder to include in the path (e.g. 'results') - datatype : str, default=None - folder to include after sub-/ses- (e.g. anat, dwi ) - prefix : str, default=None - string to prepend to the file name (typically not defined, unless you - want tpl-{tpl}, or a datatype) - suffix : str, default=None - bids suffix including extension (e.g. 'T1w.nii.gz') - subject : str, default=None - subject to use, for folder and filename - session : str, default=None - session to use, for folder and filename - include_subject_dir : bool, default=True - whether to include the sub-{subject} folder if subject defined - (default: True) - include_session_dir : bool, default=True - whether to include the ses-{session} folder if session defined - (default: True) - **entities : dict, optional - dictionary of bids entities (e.g. space=T1w for space-T1w) - - Returns - ------- - str - bids-like file path - - Examples - -------- - Below is a rule using bids naming for input and output:: - - rule proc_img: - input: 'sub-{subject}_T1w.nii.gz' - output: 'sub-{subject}_space-snsx32_desc-preproc_T1w.nii.gz' - - With bids() you can instead use:: - - rule proc_img: - input: bids(subject='{subject}',suffix='T1w.nii.gz') - output: bids( - subject='{subject}', - space='snsx32', - desc='preproc', - suffix='T1w.nii.gz' - ) + def __dir__(): + return __all__ - Note that here we are not actually using "functions as inputs" in snakemake, which - would require a function definition with wildcards as the argument, and restrict to - input/params, but bids() is being used simply to return a string. - Also note that space, desc and suffix are NOT wildcards here, only {subject} is. - This makes it easy to combine wildcards and non-wildcards with bids-like naming. +# - However, you can still use bids() in a lambda function. This is especially useful if - your wildcards are named the same as bids entities (e.g. {subject}, {session}, - {task} etc..):: - rule proc_img: - input: lambda wildcards: bids(**wildcards,suffix='T1w.nii.gz') - output: bids( - subject='{subject}', - space='snsx32', - desc='preproc', - suffix='T1w.nii.gz' - ) - - Or another example where you may have many bids-like wildcards used in your - workflow:: - - rule denoise_func: - input: lambda wildcards: bids(**wildcards, suffix='bold.nii.gz') - output: bids( - subject='{subject}', - session='{session}', - task='{task}', - acq='{acq}', - desc='denoise', - suffix='bold.nii.gz' - ) - - In this example, all the wildcards will be determined from the output and passed on - to bids() for inputs. The output filename will have a 'desc-denoise' flag added to - it. - - Also note that even if you supply entities in a different order, the entities will - be ordered based on the OrderedDict defined here. If entities not known are - provided, they will be just be placed at the end (before the suffix), in the order - you provide them in. - - Notes - ----- - * For maximum flexibility all arguments are optional (if none are specified, will - return empty string). Note that datatype and prefix may not be used in isolation, - but must be given with another entity. - - * Some code adapted from mne-bids, specifically - https://mne.tools/mne-bids/stable/_modules/mne_bids/utils.html - """ - if not any([entities, suffix, extension]) and any([datatype, prefix]): - raise ValueError( - "At least one of suffix, extension, or an entity must be " - "supplied.\n\tGot only: " - + " and ".join( - filter( - None, - ( - f"datatype='{datatype}'" if datatype else None, - f"prefix='{prefix}'" if prefix else None, - ), - ) - ) - ) - - include_subject_dir = bool(entities.pop("include_subject_dir", True)) - include_session_dir = bool(entities.pop("include_session_dir", True)) - - order, dirs, parse_entities = _parse_spec( - include_subject_dir=include_subject_dir, include_session_dir=include_session_dir - ) - parsed = parse_entities(entities) - - spec_parts: list[str] = [] - custom_parts: list[str] = [] - split: int = sys.maxsize + 1 - path_parts: list[str] = [] - - if root: - path_parts.append(str(root)) - if prefix: - spec_parts.append(prefix) - for entity in order: - # Check for `*` first so that if user specifies an entity called `*` we don't - # skip setting the split - if entity == "*": - split = len(path_parts) - elif value := parsed.pop(entity, None): - spec_parts.append(f"{entity}-{value}") - if entity in dirs: - path_parts.append(f"{entity}-{value}") - for key, value in parsed.items(): - custom_parts.append(f"{key}-{value}") - - if datatype: - path_parts.append(datatype) - path_parts.append( - "_".join(it.chain(spec_parts[:split], custom_parts, spec_parts[split:])) - ) - - tail = f"_{suffix}{extension or ''}" if suffix else extension or "" - - return os.path.join(*path_parts) + tail +@ft.lru_cache +def __getattr__(name: str): + if name == "bids": + return bids_factory(latest()) + prefix = name[:5] + version = name[5:] + if prefix != "bids_" or (spec := getattr(specs, version, None)) is None: + err = f"module '{__name__}' has no attribute '{name}'" + raise AttributeError(err) + return bids_factory(spec()) diff --git a/snakebids/paths/presets.pyi b/snakebids/paths/presets.pyi index 082af31a..e981512e 100644 --- a/snakebids/paths/presets.pyi +++ b/snakebids/paths/presets.pyi @@ -16,8 +16,7 @@ def bids_v0_0_0( ) -> str: """Generate bids or bids-like paths - File path is compiled based on the 'v0_0_0' spec, with the - general form:: + Path is compiled based on the 'v0.0.0' spec, with the general form:: [root]/[sub-{subject}]/[ses-{session}]/ [prefix]_[sub-{subject}]_[ses-{session}]_[{key}-{val}_ ... ]_[suffix] @@ -43,7 +42,6 @@ def bids_v0_0_0( entities bids entities as keyword arguments paired with values (e.g. ``space="T1w"`` for ``space-T1w``) - """ ... @@ -58,8 +56,8 @@ def bids( ) -> str: """Generate bids or bids-like paths - File path is compiled based on the 'latest' spec, with the - general form:: + Path is compiled based on the 'latest' spec (currently pointing to 'v0_0_0'), with + the general form:: [root]/[sub-{subject}]/[ses-{session}]/ [prefix]_[sub-{subject}]_[ses-{session}]_[{key}-{val}_ ... ]_[suffix] @@ -85,6 +83,5 @@ def bids( entities bids entities as keyword arguments paired with values (e.g. ``space="T1w"`` for ``space-T1w``) - """ ... diff --git a/snakebids/paths/resources/spec.0.0.0.yaml b/snakebids/paths/resources/spec.0.0.0.yaml index b3f762c4..418c9426 100644 --- a/snakebids/paths/resources/spec.0.0.0.yaml +++ b/snakebids/paths/resources/spec.0.0.0.yaml @@ -1,3 +1,14 @@ +version: "v0.0.0" +description: | + The legacy spec used since the beginning of snakebids. + + This spec alone equips :func:`~snakebids.bids` with 2 extra arguments: + ``include_subject_dir`` and ``include_session_dir``. These default to + ``True``, but if set ``False``, remove the subject and session dirs + respectively from the output path. For future specs, this behaviour should be + achieved by modifying the spec and generating a new :func:`~snakebids.bids` + function +spec: - entity: "subject" tag: "sub" dir: true diff --git a/snakebids/paths/specs.py b/snakebids/paths/specs.py index 6b7b104b..25463d08 100644 --- a/snakebids/paths/specs.py +++ b/snakebids/paths/specs.py @@ -1,69 +1,53 @@ from __future__ import annotations -from typing import List +from typing import TYPE_CHECKING -import importlib_resources as impr -import more_itertools as itx -from typing_extensions import NotRequired, TypeAlias, TypedDict +from snakebids.paths._templates import spec_func +from snakebids.paths.utils import BidsPathSpec, find_entity, get_spec_path, load_spec -from snakebids.io.yaml import get_yaml_io -from snakebids.paths import resources +# +# The code between these tags is automatically generated. Do not +# manually edit +# To update, run:: +# +# poetry run poe update_bids +# +if not TYPE_CHECKING: + __all__ = ["v0_0_0", "latest", "LATEST"] # noqa:F822 -class BidsPathEntitySpec(TypedDict): - """Interface for BIDS path specification.""" + def __dir__(): + return __all__ - entity: str - """Entity full name""" - tag: NotRequired[str] - """Short entity name, as appears in the path""" +_SPECS = ["v0_0_0"] +LATEST = "v0_0_0" +# - dir: NotRequired[bool] - """If true, a directory with the entity-value pair is created""" +def __getattr__(name: str): + """Allow dynamic retrieval of latest spec.""" + if name == "latest": + name = LATEST -def _find_entity(spec: BidsPathSpec, entity: str): - return itx.one(item for item in spec if item["entity"] == entity) + if name not in _SPECS: + msg = f"module '{__name__}' has no attribute '{name}'" + raise AttributeError(msg) + spec_config = load_spec(get_spec_path(name)) -BidsPathSpec: TypeAlias = List[BidsPathEntitySpec] -"""List of :class:`BidsPathEntitySpec`, defining the order of entities in a bids path""" + spec = spec_config["spec"] + def get_spec(subject_dir: bool = True, session_dir: bool = True) -> BidsPathSpec: + if not subject_dir: + find_entity(spec, "subject")["dir"] = False -def v0_0_0(subject_dir: bool = True, session_dir: bool = True) -> BidsPathSpec: - r"""Get the v0.0.0 BidsPathSpec. + if not session_dir: + find_entity(spec, "session")["dir"] = False - This spec alone equips :func:`~snakebids.bids` with 2 extra arguments: - ``include_subject_dir`` and ``include_session_dir``. These default to ``True``, but - if set ``False``, remove the subject and session dirs respectively from the output - path. For future specs, this behaviour should be achieved by modifying the spec and - generating a new :func:`~snakebids.bids` function + return spec - Formatted as:: + get_spec.__doc__ = spec_func.format_doc(spec_config) + get_spec.__name__ = name - sub-{sub}/ses-{ses}/{datatype}/\ - sub-{sub}_ses-{ses}_task-{task}_acq-{acq}_\ - ce-{ce}_rec-{rec}_dir-{dir}_run-{run}_mod-{mod}_\ - echo-{echo}_hemi-{hemi}_space-{space}_res-{res}_\ - den-{den}_label-{label}_desc-{desc}_..._{suffix}{.ext} - - Parameters - ---------- - subject_dir - If False, downstream path generator will not include the subject dir - `sub-{subject}/*` - session_dir : bool, optional - If False, downstream path generator will not include the session dir - `*/ses-{session}/*` - """ - spec = get_yaml_io().load( - impr.files(resources).joinpath("spec.0.0.0.yaml").read_text() - ) - if not subject_dir: - _find_entity(spec, "subject")["dir"] = False - - if not session_dir: - _find_entity(spec, "session")["dir"] = False - - return spec + return get_spec diff --git a/snakebids/paths/specs.pyi b/snakebids/paths/specs.pyi new file mode 100644 index 00000000..b080a08b --- /dev/null +++ b/snakebids/paths/specs.pyi @@ -0,0 +1,64 @@ +# This stub file is automatically generated +# It can be updated using:: +# +# poetry run poe update_bids + +from .utils import BidsPathSpec + +LATEST: str + +def v0_0_0(subject_dir: bool = True, session_dir: bool = True) -> BidsPathSpec: + """The legacy spec used since the beginning of snakebids. + + This spec alone equips :func:`~snakebids.bids` with 2 extra arguments: + ``include_subject_dir`` and ``include_session_dir``. These default to ``True``, but + if set ``False``, remove the subject and session dirs respectively from the output + path. For future specs, this behaviour should be achieved by modifying the spec and + generating a new :func:`~snakebids.bids` function + + Formatted as:: + + sub-{subject}/ses-{session}/{datatype}/{prefix}_sub-{subject}_ses-{session}_ + task-{task}_acq-{acq}_ce-{ce}_rec-{rec}_dir-{dir}_run-{run}_mod-{mod}_ + echo-{echo}_hemi-{hemi}_space-{space}_res-{res}_den-{den}_label-{label}_ + desc-{desc}_..._{suffix}{extension} + + + Parameters + ---------- + subject_dir + If False, downstream path generator will not include the subject dir + `sub-{subject}/*` + session_dir : bool, optional + If False, downstream path generator will not include the session dir + `*/ses-{session}/*` + """ + ... + +def latest(subject_dir: bool = True, session_dir: bool = True) -> BidsPathSpec: + """The legacy spec used since the beginning of snakebids. + + This spec alone equips :func:`~snakebids.bids` with 2 extra arguments: + ``include_subject_dir`` and ``include_session_dir``. These default to ``True``, but + if set ``False``, remove the subject and session dirs respectively from the output + path. For future specs, this behaviour should be achieved by modifying the spec and + generating a new :func:`~snakebids.bids` function + + Formatted as:: + + sub-{subject}/ses-{session}/{datatype}/{prefix}_sub-{subject}_ses-{session}_ + task-{task}_acq-{acq}_ce-{ce}_rec-{rec}_dir-{dir}_run-{run}_mod-{mod}_ + echo-{echo}_hemi-{hemi}_space-{space}_res-{res}_den-{den}_label-{label}_ + desc-{desc}_..._{suffix}{extension} + + + Parameters + ---------- + subject_dir + If False, downstream path generator will not include the subject dir + `sub-{subject}/*` + session_dir : bool, optional + If False, downstream path generator will not include the session dir + `*/ses-{session}/*` + """ + ... diff --git a/snakebids/paths/utils.py b/snakebids/paths/utils.py new file mode 100644 index 00000000..e2a68583 --- /dev/null +++ b/snakebids/paths/utils.py @@ -0,0 +1,73 @@ +from __future__ import annotations + +from typing import Iterator, List + +import importlib_resources as impr +import more_itertools as itx +from typing_extensions import NotRequired, TypeAlias, TypedDict + +from snakebids.io.yaml import get_yaml_io +from snakebids.paths import resources + + +def find_entity(spec: BidsPathSpec, entity: str) -> BidsPathEntitySpec: + """Return configuration for specified entity out of BidsPathSpec.""" + return itx.one(item for item in spec if item["entity"] == entity) + + +class BidsPathEntitySpec(TypedDict): + """Defines an entity in a bids path.""" + + entity: str + """Entity full name""" + + tag: NotRequired[str] + """Short entity name, as appears in the path""" + + dir: NotRequired[bool] + """If true, a directory with the entity-value pair is created""" + + +BidsPathSpec: TypeAlias = List[BidsPathEntitySpec] +"""List of :class:`BidsPathEntitySpec`, defining the order of entities in a bids path""" + + +class BidsPathSpecFile(TypedDict): + """Defines the valid structure for a BidsSpec definition file.""" + + version: str + """Version of the spec in semver""" + + description: NotRequired[str] + """Optional description to be used in the spec function docstring""" + + spec: BidsPathSpec + """Definition of the spec""" + + +def load_spec(path: impr.abc.Traversable) -> BidsPathSpecFile: + """Return the spec saved in the provided yaml file.""" + return get_yaml_io().load(path.read_text()) + + +def get_specs() -> Iterator[BidsPathSpecFile]: + """Yield all defined bids spec file objects.""" + for path in impr.files(resources).iterdir(): + if ( + path.is_file() + and path.name.startswith("spec.") + and path.name.endswith(".yaml") + ): + yield load_spec(path) + + +def get_spec_path(version: str) -> impr.abc.Traversable: + """Return path corresponding to provided spec version attribute. + + Parameters + ---------- + version + version attribute formatted as vx_x_x where x_x_x is spec semver + """ + dotted = version[1:].replace("_", ".") + return impr.files(resources).joinpath(f"spec.{dotted}.yaml") diff --git a/snakebids/tests/test_paths/test_bids.py b/snakebids/tests/test_paths/test_bids.py index c19168c4..accd7b0c 100644 --- a/snakebids/tests/test_paths/test_bids.py +++ b/snakebids/tests/test_paths/test_bids.py @@ -11,20 +11,13 @@ from hypothesis import strategies as st from pathvalidate import Platform, is_valid_filename, is_valid_filepath +from snakebids.paths import specs from snakebids.paths.presets import bids -from snakebids.paths.specs import v0_0_0 +from snakebids.paths.utils import BidsPathSpec from snakebids.tests import strategies as sb_st from snakebids.tests.helpers import Benchmark, is_strictly_increasing from snakebids.utils.utils import BidsEntity -V0_0_0 = v0_0_0() -STD_ENTITIES = {e["entity"] for e in V0_0_0} -HAS_TAG = {e["entity"] for e in V0_0_0 if e.get("tag")} -HAS_DIR = {e["entity"] for e in V0_0_0 if e.get("dir")} - - -# module specific entities - def _get_entity_tags(entities: Iterable[str]): return [BidsEntity.normalize(entity).tag for entity in entities] @@ -40,250 +33,273 @@ def _roots(): return st.text().filter(lambda s: is_valid_filepath(s, Platform.LINUX)) -def _bids_args( - entities: set[str] | None = STD_ENTITIES, - nonstandard: bool = True, - custom: bool = True, -): - return ( - st.dictionaries( - keys=st.one_of( - # standard - sb_st.bids_entity(whitelist_entities=entities) - if entities is not None - else sb_st.nothing(), - # custom entities - _values() - .map(BidsEntity) - .filter( - lambda s: str(s) - not in {"datatype", "suffix", "extension", "prefix"} - ) - if custom - else sb_st.nothing(), - # nonstandard entities - sb_st.bids_entity( - whitelist_entities=["datatype", "suffix", "extension"] - ) - if nonstandard - else sb_st.nothing(), - ), - # The boolean here is to decide whether to use the entity or the tag in the - # BidsEntity generated above - values=st.tuples( - st.booleans(), - _values(), - ), - min_size=1, - ) - .map( - # we only want to use the full entity name if the spec in use understands - # the full entity name - lambda d: { - key.entity if val[0] and str(key) in HAS_TAG else key.tag: val[1] - for key, val in d.items() - } - ) - .filter(lambda s: set(s) - {"datatype", "extension"}) - ) - - -@given(_bids_args(nonstandard=False)) -def test_number_of_underscore_corresponds_to_number_entities(entities: dict[str, str]): - assert bids(**entities).count("_") == len(entities) - 1 - - -@given(_bids_args(nonstandard=False)) -def test_number_of_dashes_corresponds_to_number_entities(entities: dict[str, str]): - assert Path(bids(**entities)).name.count("-") == len(entities) - - -@given(entities=_bids_args(), prefix=_values()) -def test_beginning_of_name_always_prefix(entities: dict[str, str], prefix: str): - assert Path(bids(prefix=prefix, **entities)).name.startswith(prefix) - - -@given(entities=_bids_args(nonstandard=False), suffix=_values(), extension=_values()) -def test_end_of_path_always_suffix_extension( - entities: dict[str, str], suffix: str, extension: str -): - assert bids(suffix=suffix, extension=extension, **entities).endswith( - suffix + extension - ) - - -@given(entities=_bids_args(nonstandard=False), suffix=_values()) -def test_underscore_precedes_suffix(entities: dict[str, str], suffix: str): - assert bids(suffix=suffix, **entities)[len(suffix) * -1 - 1] == "_" - +def make_bids_testsuite(spec: BidsPathSpec): + std_entities = {e["entity"] for e in spec} + has_tag = {e["entity"] for e in spec if e.get("tag")} + has_dir = {e["entity"] for e in spec if e.get("dir")} -@given( - entities=_bids_args(nonstandard=False), - suffix=_values() | st.none(), - extension=_values(), -) -def test_underscore_does_not_precede_extension( - entities: dict[str, str], suffix: str | None, extension: str -): - assert ( - bids(suffix=suffix, extension=extension, **entities)[len(extension) * -1 - 1] - != "_" - ) - - -@given(entities=_bids_args(nonstandard=False)) -def test_no_underscore_at_end_if_no_suffix(entities: dict[str, str]): - assert bids(**entities)[-1] != "_" - - -@given(entities=_bids_args(), root=_roots()) -def test_beginning_of_path_always_root(entities: dict[str, str], root: str): - path = bids(root=root, **entities) - assert path.startswith(root) - length = len(root) - 1 if root[-1] == os.path.sep else len(root) - assert path[length] == os.path.sep + def _bids_args( + entities: set[str] | None = std_entities, + nonstandard: bool = True, + custom: bool = True, + ): + return ( + st.dictionaries( + keys=st.one_of( + # standard + sb_st.bids_entity(whitelist_entities=entities) + if entities is not None + else sb_st.nothing(), + # custom entities + _values() + .map(BidsEntity) + .filter( + lambda s: str(s) + not in {"datatype", "suffix", "extension", "prefix"} + ) + if custom + else sb_st.nothing(), + # nonstandard entities + sb_st.bids_entity( + whitelist_entities=["datatype", "suffix", "extension"] + ) + if nonstandard + else sb_st.nothing(), + ), + # The boolean here is to decide whether to use the entity or the tag in + # the BidsEntity generated above + values=st.tuples( + st.booleans(), + _values(), + ), + min_size=1, + ) + .map( + # we only want to use the full entity name if the spec in use + # understands the full entity name + lambda d: { + key.entity if val[0] and str(key) in has_tag else key.tag: val[1] + for key, val in d.items() + } + ) + .filter(lambda s: set(s) - {"datatype", "extension"}) + ) + class BidsTests: + @given(_bids_args(nonstandard=False)) + def test_number_of_underscore_corresponds_to_number_entities( + self, entities: dict[str, str] + ): + assert bids(**entities).count("_") == len(entities) - 1 + + @given(_bids_args(nonstandard=False)) + def test_number_of_dashes_corresponds_to_number_entities( + self, entities: dict[str, str] + ): + assert Path(bids(**entities)).name.count("-") == len(entities) + + @given(entities=_bids_args(), prefix=_values()) + def test_beginning_of_name_always_prefix( + self, entities: dict[str, str], prefix: str + ): + assert Path(bids(prefix=prefix, **entities)).name.startswith(prefix) + + @given( + entities=_bids_args(nonstandard=False), + suffix=_values(), + extension=_values(), + ) + def test_end_of_path_always_suffix_extension( + self, entities: dict[str, str], suffix: str, extension: str + ): + assert bids(suffix=suffix, extension=extension, **entities).endswith( + suffix + extension + ) + + @given(entities=_bids_args(nonstandard=False), suffix=_values()) + def test_underscore_precedes_suffix( + self, entities: dict[str, str], suffix: str + ): + assert bids(suffix=suffix, **entities)[len(suffix) * -1 - 1] == "_" + + @given( + entities=_bids_args(nonstandard=False), + suffix=_values() | st.none(), + extension=_values(), + ) + def test_underscore_does_not_precede_extension( + self, entities: dict[str, str], suffix: str | None, extension: str + ): + assert ( + bids(suffix=suffix, extension=extension, **entities)[ + len(extension) * -1 - 1 + ] + != "_" + ) + + @given(entities=_bids_args(nonstandard=False)) + def test_no_underscore_at_end_if_no_suffix(self, entities: dict[str, str]): + assert bids(**entities)[-1] != "_" + + @given(entities=_bids_args(), root=_roots()) + def test_beginning_of_path_always_root( + self, entities: dict[str, str], root: str + ): + path = bids(root=root, **entities) + assert path.startswith(root) + length = len(root) - 1 if root[-1] == os.path.sep else len(root) + assert path[length] == os.path.sep + + @example(entities={"sub": "0"}, datatype=".", root="0") + @given( + entities=_bids_args(nonstandard=False), datatype=_values(), root=_roots() + ) + def test_bottom_directory_always_datatype( + self, entities: dict[str, str], datatype: str, root: str + ): + # use os.path functions so that datatype=="." is treated safely + assert datatype == os.path.basename( + os.path.dirname(bids(root=root, datatype=datatype, **entities)) + ) + + @given(entities=_bids_args(nonstandard=False), datatype=_values()) + def test_datatype_not_in_path_name( + self, entities: dict[str, str], datatype: str + ): + assume(datatype not in "".join(it.chain.from_iterable(entities.items()))) + assert datatype not in Path(bids(datatype=datatype, **entities)).name + + @given(entities=_bids_args(nonstandard=False)) + def test_entities_all_in_path_as_tags(self, entities: dict[str, str]): + tags = _get_entity_tags(entities) + path = "_" + Path(bids(**entities)).name + for tag in tags: + assert f"_{tag}-" in path + + @given(entities=_bids_args(entities=has_tag, nonstandard=False, custom=False)) + def test_full_entity_names_not_in_path(self, entities: dict[str, str]): + non_tags = [ + normed.entity + for entity in entities + if entity != (normed := BidsEntity.normalize(entity)).tag + ] + path = bids(**entities) + for entity in non_tags: + assert f"{entity}-" not in path + + @given(entities=_bids_args(entities=has_tag, nonstandard=False, custom=False)) + def test_long_and_short_names_cannot_be_used_simultaneously( + self, entities: dict[str, str] + ): + entities = {BidsEntity.normalize(e).entity: v for e, v in entities.items()} + tags = {BidsEntity.normalize(e).tag: v for e, v in entities.items()} + with pytest.raises( + ValueError, + match="Long and short names of an entity cannot be used in the same", + ) as err: + bids(**entities, **tags) + assert itx.first(entities) in err.value.args[0] + assert itx.first(tags) in err.value.args[0] + + @given( + entities=_bids_args(nonstandard=False, custom=False), + custom=_bids_args(entities=None, nonstandard=False), + ) + def test_entities_found_in_name_in_correct_order( + self, entities: dict[str, str], custom: dict[str, str] + ): + # Make sure custom tags don't overlap with main tags + assume(not set(entities) & set(custom)) + tags = _get_entity_tags(entities) + order: list[str] = [] + for entity in spec: + if (tag := entity.get("tag", entity["entity"])) in tags: + order.append(tag) + # Custom tags come after defined tags + order.extend(custom) + path = "_" + Path(bids(**custom, **entities)).name + assert set(tags) | set(custom) == set(order) + assert is_strictly_increasing(path.index(f"_{e}-") for e in order) + + @given( + entities=_bids_args(entities=std_entities - has_dir, nonstandard=False), + root=_roots(), + ) + def test_nondir_entities_dont_have_dirs( + self, entities: dict[str, str], root: str + ): + assert Path(bids(root=root, **entities)).parent == Path(root) + + @given(entities=_bids_args(entities=has_dir, nonstandard=False, custom=False)) + def test_dir_entities_each_own_dir(self, entities: dict[str, str]): + for par in itx.islice_extended(Path(bids(**entities)).parents, 0, -1): + count = 0 + for e in list(entities): + tag = BidsEntity.normalize(e).tag + if ( + par.name[: len(tag)] == tag + # if found, appears nowhere else + and f"{tag}-" not in str(par.parent) + ): + del entities[e] + count += 1 + assert count == 1 + # Check that all entities have been removed (ie found) + assert not entities + + @given( + entities=_bids_args(entities=has_dir, nonstandard=False, custom=False), + root=_roots().filter(lambda s: s != "."), + ) + def test_directories_in_correct_order( + self, entities: dict[str, str], root: str + ): + tags = _get_entity_tags(entities) + order: list[str] = [] + for entity in spec: + if (tag := entity.get("tag", entity["entity"])) in tags: + order.append(tag) + path = str(Path(bids(root=root, **entities)).parent) + assert set(tags) == set(order) + assert is_strictly_increasing( + path.index(f"{os.path.sep}{e}-") for e in order + ) + + @given(entities=_bids_args(nonstandard=False), root=_roots()) + def test_values_paired_with_entities(self, entities: dict[str, str], root: str): + path = Path(bids(root=root, **entities)) + name = "_" + path.name + parent = os.path.sep + str(path.parent) + + def assert_follows(string: str, first: str, second: str): + start = string.index(first) + len(first) + assert string[start : start + len(second)] == second + + for entity, value in entities.items(): + tag = BidsEntity.normalize(entity).tag + assert_follows(name, f"_{tag}-", value) + if entity in has_dir: + assert_follows(parent, f"{os.path.sep}{tag}-", value) + + def test_bids_with_no_args_gives_empty_path(self): + assert not bids() + + @given( + args=st.dictionaries( + st.sampled_from(["datatype", "prefix"]), _values(), min_size=1 + ) + ) + def test_missing_essential_entities_gives_error(self, args: dict[str, str]): + with pytest.raises( + ValueError, + match="At least one of suffix, extension, or an entity must be", + ): + bids(**args) -@example(entities={"sub": "0"}, datatype=".", root="0") -@given(entities=_bids_args(nonstandard=False), datatype=_values(), root=_roots()) -def test_bottom_directory_always_datatype( - entities: dict[str, str], datatype: str, root: str -): - # use os.path functions so that datatype=="." is treated safely - assert datatype == os.path.basename( - os.path.dirname(bids(root=root, datatype=datatype, **entities)) - ) + return BidsTests -@given(entities=_bids_args(nonstandard=False), datatype=_values()) -def test_datatype_not_in_path_name(entities: dict[str, str], datatype: str): - assume(datatype not in "".join(it.chain.from_iterable(entities.items()))) - assert datatype not in Path(bids(datatype=datatype, **entities)).name - - -@given(entities=_bids_args(nonstandard=False)) -def test_entities_all_in_path_as_tags(entities: dict[str, str]): - tags = _get_entity_tags(entities) - path = "_" + Path(bids(**entities)).name - for tag in tags: - assert f"_{tag}-" in path - - -@given(entities=_bids_args(entities=HAS_TAG, nonstandard=False, custom=False)) -def test_full_entity_names_not_in_path(entities: dict[str, str]): - non_tags = [ - normed.entity - for entity in entities - if entity != (normed := BidsEntity.normalize(entity)).tag - ] - path = bids(**entities) - for entity in non_tags: - assert f"{entity}-" not in path - - -@given(entities=_bids_args(entities=HAS_TAG, nonstandard=False, custom=False)) -def test_long_and_short_names_cannot_be_used_simultaneously(entities: dict[str, str]): - entities = {BidsEntity.normalize(e).entity: v for e, v in entities.items()} - tags = {BidsEntity.normalize(e).tag: v for e, v in entities.items()} - with pytest.raises( - ValueError, match="Long and short names of an entity cannot be used in the same" - ) as err: - bids(**entities, **tags) - assert itx.first(entities) in err.value.args[0] - assert itx.first(tags) in err.value.args[0] - - -@given( - entities=_bids_args(nonstandard=False, custom=False), - custom=_bids_args(entities=None, nonstandard=False), -) -def test_entities_found_in_name_in_correct_order( - entities: dict[str, str], custom: dict[str, str] -): - # Make sure custom tags don't overlap with main tags - assume(not set(entities) & set(custom)) - tags = _get_entity_tags(entities) - order: list[str] = [] - for entity in V0_0_0: - if (tag := entity.get("tag", entity["entity"])) in tags: - order.append(tag) - # Custom tags come after defined tags - order.extend(custom) - path = "_" + Path(bids(**custom, **entities)).name - assert set(tags) | set(custom) == set(order) - assert is_strictly_increasing(path.index(f"_{e}-") for e in order) - - -@given( - entities=_bids_args(entities=STD_ENTITIES - HAS_DIR, nonstandard=False), - root=_roots(), -) -def test_nondir_entities_dont_have_dirs(entities: dict[str, str], root: str): - assert Path(bids(root=root, **entities)).parent == Path(root) - - -@given(entities=_bids_args(entities=HAS_DIR, nonstandard=False, custom=False)) -def test_dir_entities_each_own_dir(entities: dict[str, str]): - for par in itx.islice_extended(Path(bids(**entities)).parents, 0, -1): - count = 0 - for e in list(entities): - tag = BidsEntity.normalize(e).tag - if ( - par.name[: len(tag)] == tag - # if found, appears nowhere else - and f"{tag}-" not in str(par.parent) - ): - del entities[e] - count += 1 - assert count == 1 - # Check that all entities have been removed (ie found) - assert not entities - - -@given( - entities=_bids_args(entities=HAS_DIR, nonstandard=False, custom=False), - root=_roots().filter(lambda s: s != "."), -) -def test_directories_in_correct_order(entities: dict[str, str], root: str): - tags = _get_entity_tags(entities) - order: list[str] = [] - for entity in V0_0_0: - if (tag := entity.get("tag", entity["entity"])) in tags: - order.append(tag) - path = str(Path(bids(root=root, **entities)).parent) - assert set(tags) == set(order) - assert is_strictly_increasing(path.index(f"{os.path.sep}{e}-") for e in order) - - -@given(entities=_bids_args(nonstandard=False), root=_roots()) -def test_values_paired_with_entities(entities: dict[str, str], root: str): - path = Path(bids(root=root, **entities)) - name = "_" + path.name - parent = os.path.sep + str(path.parent) - - def assert_follows(string: str, first: str, second: str): - start = string.index(first) + len(first) - assert string[start : start + len(second)] == second - - for entity, value in entities.items(): - tag = BidsEntity.normalize(entity).tag - assert_follows(name, f"_{tag}-", value) - if entity in HAS_DIR: - assert_follows(parent, f"{os.path.sep}{tag}-", value) - - -def test_bids_with_no_args_gives_empty_path(): - assert not bids() - - -@given( - args=st.dictionaries(st.sampled_from(["datatype", "prefix"]), _values(), min_size=1) -) -def test_missing_essential_entities_gives_error(args: dict[str, str]): - with pytest.raises( - ValueError, match="At least one of suffix, extension, or an entity must be" - ): - bids(**args) +TestV0_0_0 = make_bids_testsuite(specs.v0_0_0()) def test_benchmark_bids(benchmark: Benchmark): @@ -291,6 +307,7 @@ def test_benchmark_bids(benchmark: Benchmark): benchmark( bids, root="foo/bar", + subject_dir=False, subject="001", session="32", run="stop", diff --git a/snakebids/tests/test_paths/test_specs.py b/snakebids/tests/test_paths/test_specs.py index 825035d7..bcda2d63 100644 --- a/snakebids/tests/test_paths/test_specs.py +++ b/snakebids/tests/test_paths/test_specs.py @@ -1,4 +1,5 @@ -from snakebids.paths.specs import _find_entity, v0_0_0 +from snakebids.paths.specs import v0_0_0 +from snakebids.paths.utils import find_entity def test_all_entries_define_entity(): @@ -9,11 +10,11 @@ def test_all_entries_define_entity(): def test_subject_dir_can_be_excluded(): spec = v0_0_0(subject_dir=False) - subject = _find_entity(spec, "subject") + subject = find_entity(spec, "subject") assert subject.get("dir") is False def test_session_dir_can_be_excluded(): spec = v0_0_0(session_dir=False) - session = _find_entity(spec, "session") + session = find_entity(spec, "session") assert session.get("dir") is False