-
Notifications
You must be signed in to change notification settings - Fork 7
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add routines for L1 and L2 processing #220
base: main
Are you sure you want to change the base?
Changes from 25 commits
6841233
d537d98
01994b8
1929dd8
844592e
1d9fb73
38cf58f
b9851c1
20e690f
72d444b
00ff7ee
c20e7e1
7f802eb
de09ad9
6298811
deb0972
055384c
7874379
6f760e4
68ca587
62e96ed
51c8df4
7fd47bc
5494f92
0ed1694
9ac2b65
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,11 +24,11 @@ | |
|
||
from disdrodb.api.info import infer_disdrodb_tree_path_components | ||
from disdrodb.api.path import ( | ||
define_data_dir, | ||
define_issue_dir, | ||
define_issue_filepath, | ||
define_metadata_dir, | ||
define_metadata_filepath, | ||
define_station_dir, | ||
) | ||
from disdrodb.utils.directories import ( | ||
ensure_string_path, | ||
|
@@ -70,10 +70,7 @@ def check_url(url: str) -> bool: | |
``True`` if url well formatted, ``False`` if not well formatted. | ||
""" | ||
regex = r"^(https?:\/\/)?(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)$" # noqa: E501 | ||
|
||
if re.match(regex, url): | ||
return True | ||
return False | ||
return re.match(regex, url) | ||
|
||
|
||
def check_path_is_a_directory(dir_path, path_name=""): | ||
|
@@ -95,6 +92,7 @@ def check_directories_inside(dir_path): | |
def check_base_dir(base_dir: str): | ||
"""Raise an error if the path does not end with ``DISDRODB``.""" | ||
base_dir = str(base_dir) # convert Pathlib to string | ||
base_dir = os.path.normpath(base_dir) | ||
if not base_dir.endswith("DISDRODB"): | ||
raise ValueError(f"The path {base_dir} does not end with DISDRODB. Please check the path.") | ||
return base_dir | ||
|
@@ -150,53 +148,76 @@ def check_product(product): | |
"""Check DISDRODB product.""" | ||
if not isinstance(product, str): | ||
raise TypeError("`product` must be a string.") | ||
valid_products = ["RAW", "L0A", "L0B"] | ||
valid_products = ["RAW", "L0A", "L0B", "L0C", "L1", "L2E", "L2M", "L2S"] | ||
if product.upper() not in valid_products: | ||
msg = f"Valid `products` are {valid_products}." | ||
logger.error(msg) | ||
raise ValueError(msg) | ||
return product | ||
|
||
|
||
def check_station_dir(product, data_source, campaign_name, station_name, base_dir=None): | ||
"""Check existence of the station data directory. If does not exists, raise an error.""" | ||
station_dir = define_station_dir( | ||
ghiggi marked this conversation as resolved.
Show resolved
Hide resolved
|
||
def has_available_data( | ||
data_source, | ||
campaign_name, | ||
station_name, | ||
product, | ||
base_dir=None, | ||
# Option for L2E | ||
sample_interval=None, | ||
rolling=None, | ||
# Option for L2M | ||
model_name=None, | ||
): | ||
"""Return ``True`` if data are available for the given product and station.""" | ||
# Define product directory | ||
data_dir = define_data_dir( | ||
Comment on lines
+159
to
+173
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ℹ New issue: Excess Number of Function Arguments |
||
product=product, | ||
base_dir=base_dir, | ||
data_source=data_source, | ||
campaign_name=campaign_name, | ||
station_name=station_name, | ||
# Option for L2E | ||
sample_interval=sample_interval, | ||
rolling=rolling, | ||
# Option for L2M | ||
model_name=model_name, | ||
# Directory options | ||
check_exists=False, | ||
) | ||
if not os.path.exists(station_dir) and os.path.isdir(station_dir): | ||
msg = f"The station {station_name} data directory does not exist at {station_dir}." | ||
logger.error(msg) | ||
raise ValueError(msg) | ||
return station_dir | ||
|
||
# If the product directory does not exists, return False | ||
if not os.path.isdir(data_dir): | ||
return False | ||
|
||
def has_available_station_files(product, data_source, campaign_name, station_name, base_dir=None): | ||
"""Return ``True`` if data are available for the given product and station.""" | ||
station_dir = check_station_dir( | ||
product=product, | ||
base_dir=base_dir, | ||
data_source=data_source, | ||
campaign_name=campaign_name, | ||
station_name=station_name, | ||
) | ||
filepaths = list_files(station_dir, glob_pattern="*", recursive=True) | ||
ghiggi marked this conversation as resolved.
Show resolved
Hide resolved
|
||
# If no files, return False | ||
filepaths = list_files(data_dir, glob_pattern="*", recursive=True) | ||
nfiles = len(filepaths) | ||
return nfiles >= 1 | ||
|
||
|
||
def check_station_has_data(product, data_source, campaign_name, station_name, base_dir=None): | ||
"""Check the station data directory has data inside. If not, raise an error.""" | ||
if not has_available_station_files( | ||
ghiggi marked this conversation as resolved.
Show resolved
Hide resolved
|
||
def check_data_availability( | ||
product, | ||
data_source, | ||
campaign_name, | ||
station_name, | ||
base_dir=None, | ||
# Option for L2E | ||
sample_interval=None, | ||
rolling=None, | ||
# Option for L2M | ||
model_name=None, | ||
): | ||
"""Check the station product data directory has files inside. If not, raise an error.""" | ||
if not has_available_data( | ||
Comment on lines
+197
to
+210
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ℹ New issue: Excess Number of Function Arguments |
||
product=product, | ||
base_dir=base_dir, | ||
data_source=data_source, | ||
campaign_name=campaign_name, | ||
station_name=station_name, | ||
# Option for L2E | ||
sample_interval=sample_interval, | ||
rolling=rolling, | ||
# Option for L2M | ||
model_name=model_name, | ||
): | ||
msg = f"The {product} station data directory of {data_source} {campaign_name} {station_name} is empty !" | ||
logger.error(msg) | ||
|
@@ -271,6 +292,7 @@ def check_issue_dir(data_source, campaign_name, base_dir=None): | |
def check_issue_file(data_source, campaign_name, station_name, base_dir=None): | ||
"""Check existence of a valid issue YAML file. If does not exists, raise an error.""" | ||
from disdrodb.issue.checks import check_issue_compliance | ||
from disdrodb.issue.writer import create_station_issue | ||
|
||
_ = check_issue_dir( | ||
base_dir=base_dir, | ||
|
@@ -286,9 +308,12 @@ def check_issue_file(data_source, campaign_name, station_name, base_dir=None): | |
) | ||
# Check existence | ||
if not os.path.exists(issue_filepath): | ||
msg = f"The issue YAML file of {data_source} {campaign_name} {station_name} does not exist at {issue_filepath}." | ||
logger.error(msg) | ||
raise ValueError(msg) | ||
create_station_issue( | ||
base_dir=base_dir, | ||
data_source=data_source, | ||
campaign_name=campaign_name, | ||
station_name=station_name, | ||
) | ||
|
||
# Check validity | ||
check_issue_compliance( | ||
|
@@ -398,7 +423,7 @@ def check_raw_dir(raw_dir: str, station_name: str) -> None: | |
check_directories_inside(raw_dir) | ||
|
||
# Check there is data in the station directory | ||
check_station_has_data( | ||
check_data_availability( | ||
product="RAW", | ||
base_dir=base_dir, | ||
data_source=data_source, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
✅ No longer an issue: Code Duplication
The module no longer contains too many functions with similar structure