From 03afc3772b6e97f09956efed1d7650b8ed912207 Mon Sep 17 00:00:00 2001 From: Karl5766 Date: Thu, 25 Jul 2024 10:40:12 -0400 Subject: [PATCH] use double slash to test local relative instead --- snakebids/core/input_generation.py | 27 +++++++++---------------- snakebids/tests/strategies.py | 17 ++++++++-------- snakebids/tests/test_generate_inputs.py | 22 +++++++++----------- 3 files changed, 27 insertions(+), 39 deletions(-) diff --git a/snakebids/core/input_generation.py b/snakebids/core/input_generation.py index 4b917374..aef19aaa 100644 --- a/snakebids/core/input_generation.py +++ b/snakebids/core/input_generation.py @@ -15,7 +15,6 @@ Literal, overload, ) -from urllib.parse import urlparse import more_itertools as itx from bids import BIDSLayout, BIDSLayoutIndexer @@ -393,26 +392,22 @@ def _all_custom_paths(config: InputsConfig): return all(comp.get("custom_path") for comp in config.values()) -def _is_network_fs_path(path: Path | str): - """Test if a path location is using a network file system e.g. "gs://" or "s3://". - - Paths like "file:///..." will not be recognized as network even if they may be - network locations mapped to a local location. Windows ("C://") and linux - ("/path/to/file") paths and relative path ("path/to/file") are not network file - system paths. +def _is_local_relative(path: Path | str): + """Test if a path location is local path relative to the current working directory. Parameter --------- - A UPath, Path, or str object to be checked + path + A UPath, Path, or str object to be checked Returns ------- is_url : bool - True if the path belongs to a network file system + True if the path is relative """ - # StackOverflow reference: (by Kukanani) - # https://stackoverflow.com/questions/8357098/how-can-i-check-if-a-url-is-absolute-using-python - return bool(urlparse(str(path)).netloc) # netloc="" for non-local paths + path_str = str(path) + is_doubleslash_schemed = "://" in path_str + return not is_doubleslash_schemed and not os.path.isabs(path_str) def _gen_bids_layout( @@ -746,11 +741,7 @@ def _parse_bids_path(path: str, entities: Iterable[str]) -> tuple[str, dict[str, # If path is relative, we need to get a slash in front of it to ensure parsing works # correctly. So prepend "./" or ".\" and run function again, then strip before # returning - if ( - not _is_network_fs_path(path) - and not os.path.isabs(path) - and get_first_dir(path) != "." - ): + if _is_local_relative(path) and get_first_dir(path) != ".": path_, wildcard_values = _parse_bids_path(os.path.join(".", path), entities) return str(Path(path_)), wildcard_values diff --git a/snakebids/tests/strategies.py b/snakebids/tests/strategies.py index 000b92e0..1c1f0975 100644 --- a/snakebids/tests/strategies.py +++ b/snakebids/tests/strategies.py @@ -37,6 +37,11 @@ valid_entities: tuple[str, ...] = tuple(BidsConfig.load("bids").entities.keys()) path_characters = st.characters(blacklist_characters=["/", "\x00"], codec="UTF-8") +# StackOverflow answer by Gumbo +# https://stackoverflow.com/questions/1547899/which-characters-make-a-url-invalid#1547940 +scheme_characters = st.sampled_from( + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~?#[]@!$&'()*+,;=" +) def nothing() -> Any: @@ -45,15 +50,9 @@ def nothing() -> Any: def schemes() -> st.SearchStrategy[str]: # Generate the prefix part of a url. - # - # we only sample from a fixed set of schemes. If schemes are randomly generated - # then something like file:// could be a problem because it may be either an - # abs or relative path and will fail the os.path.isabs() code - # - # Note for its current status we don't guarantee the following schemes to work - # but only requiring them to be retained correctly i.e. not from 'gs://' to - # 'gs:/' when fed into generate_inputs() and path expansion - return st.sampled_from(["C://", "c:\\", "gs://", "s3://"]) + random_st = st.text(scheme_characters, min_size=1).map(lambda s: f"{s}://") + fixed_st = st.sampled_from(["C://", "c:\\", "gs://", "s3://"]) + return random_st | fixed_st def paths( diff --git a/snakebids/tests/test_generate_inputs.py b/snakebids/tests/test_generate_inputs.py index e516b68f..64175f9b 100644 --- a/snakebids/tests/test_generate_inputs.py +++ b/snakebids/tests/test_generate_inputs.py @@ -30,7 +30,7 @@ _all_custom_paths, _gen_bids_layout, _get_components, - _is_network_fs_path, + _is_local_relative, _normalize_database_args, _parse_bids_path, _parse_custom_path, @@ -1611,23 +1611,21 @@ class TestRecogPathSchemes: ("./hello/world", "RELATIVE"), ("hello/world", "RELATIVE"), ("/hello/world", "ABSOLUTE"), - (r"C:\some\file", "ABSOLUTE"), - (r"C:\\some\\file", "ABSOLUTE"), - ("C:/some/file", "ABSOLUTE"), ("gs://some/google/cloud/bucket", "NETWORK"), ("s3://some/s3/bucket", "NETWORK"), ) @pytest.mark.parametrize(("path", "path_type"), PATH_AND_TYPES) - def test_is_network_fs_path(self, path, path_type): + def test_is_local_relative(self, path, path_type): isnet = path_type == "NETWORK" + is_local_relative = path_type == "RELATIVE" # test the path itself, and the corresponding Path(path) assert ( - _is_network_fs_path(path) == isnet - ), f"Path {path} should have isnet={isnet} but got {_is_network_fs_path(path)}" + _is_local_relative(path) == is_local_relative + ), f"Path {path} fails is local relative path test." if not isnet: - assert not _is_network_fs_path(Path(path)) + assert _is_local_relative(Path(path)) == is_local_relative @pytest.mark.skipif( sys.version_info < (3, 12), reason="Path class has no with_segments()" @@ -1645,7 +1643,7 @@ def __init__(self, *pathsegments): def __str__(self): # __fspath__ calls __str__ by default return f"gs://{super().__str__()}" - assert _is_network_fs_path(MockGCSPath(path)) + assert not _is_local_relative(MockGCSPath(path)) @example_if( @@ -1914,7 +1912,7 @@ def test_when_all_custom_paths_no_layout_indexed( class TestParseBidsPath: @given( component=sb_st.bids_components(max_values=1, restrict_patterns=True), - scheme=sb_st.schemes(), + scheme=sb_st.schemes() | st.none(), ) def test_splits_wildcards_from_path( self, component: BidsComponent, scheme: str | None @@ -1928,7 +1926,7 @@ def test_splits_wildcards_from_path( @given( component=sb_st.bids_components(max_values=1, restrict_patterns=True), - scheme=sb_st.schemes(), + scheme=sb_st.schemes() | st.none(), ) def test_one_match_found_for_each_entity( self, component: BidsComponent, scheme: str | None @@ -1946,7 +1944,7 @@ def test_one_match_found_for_each_entity( component=sb_st.bids_components( max_values=1, restrict_patterns=True, extra_entities=False ), - scheme=sb_st.schemes(), + scheme=sb_st.schemes() | st.none(), entity=sb_st.bids_entity(), ) def test_missing_match_leads_to_error(