diff --git a/disdrodb/l0/check_configs.py b/disdrodb/l0/check_configs.py index e6b08564..29ab5737 100644 --- a/disdrodb/l0/check_configs.py +++ b/disdrodb/l0/check_configs.py @@ -342,7 +342,7 @@ def check_raw_array(sensor_name: str) -> None: raise ValueError(f"Wrong chunksizes for {key} in l0b_encodings.yml for sensor {sensor_name}.") # Get chunksizes in l0b_encoding.yml and check that if len > 1, has dimension_order key in raw_data_format - list_attributes_L0B_encodings = [ + list_attributes_l0b_encodings = [ i for i in l0b_encodings.keys() if isinstance(l0b_encodings.get(i).get("chunksizes"), list) and len(l0b_encodings.get(i).get("chunksizes")) > 1 @@ -351,7 +351,7 @@ def check_raw_array(sensor_name: str) -> None: i for i in raw_data_format.keys() if raw_data_format.get(i).get("dimension_order") is not None ] - if not sorted(list_attributes_L0B_encodings) == sorted(list_attributes_from_raw_data_format): + if not sorted(list_attributes_l0b_encodings) == sorted(list_attributes_from_raw_data_format): raise ValueError(f"Chunksizes in l0b_encodings and raw_data_format for sensor {sensor_name} does not match.") diff --git a/disdrodb/l0/io.py b/disdrodb/l0/io.py index 650bf5f9..34e714ac 100644 --- a/disdrodb/l0/io.py +++ b/disdrodb/l0/io.py @@ -206,7 +206,7 @@ def get_dataframe_min_max_time(df: pd.DataFrame): return (starting_time, ending_time) -def get_L0A_dir(processed_dir: str, station_name: str) -> str: +def get_l0a_dir(processed_dir: str, station_name: str) -> str: """Define L0A directory. Parameters @@ -225,7 +225,7 @@ def get_L0A_dir(processed_dir: str, station_name: str) -> str: return dir_path -def get_L0B_dir(processed_dir: str, station_name: str) -> str: +def get_l0b_dir(processed_dir: str, station_name: str) -> str: """Define L0B directory. Parameters @@ -244,7 +244,7 @@ def get_L0B_dir(processed_dir: str, station_name: str) -> str: return dir_path -def get_L0A_fname(df, processed_dir, station_name: str) -> str: +def get_l0a_fname(df, processed_dir, station_name: str) -> str: """Define L0A file name. Parameters @@ -274,7 +274,7 @@ def get_L0A_fname(df, processed_dir, station_name: str) -> str: return fname -def get_L0B_fname(ds, processed_dir, station_name: str) -> str: +def get_l0b_fname(ds, processed_dir, station_name: str) -> str: """Define L0B file name. Parameters @@ -304,7 +304,7 @@ def get_L0B_fname(ds, processed_dir, station_name: str) -> str: return fname -def get_L0A_fpath(df: pd.DataFrame, processed_dir: str, station_name: str) -> str: +def get_l0a_fpath(df: pd.DataFrame, processed_dir: str, station_name: str) -> str: """Define L0A file path. Parameters @@ -321,13 +321,13 @@ def get_L0A_fpath(df: pd.DataFrame, processed_dir: str, station_name: str) -> st str L0A file path. """ - fname = get_L0A_fname(df=df, processed_dir=processed_dir, station_name=station_name) - dir_path = get_L0A_dir(processed_dir=processed_dir, station_name=station_name) + fname = get_l0a_fname(df=df, processed_dir=processed_dir, station_name=station_name) + dir_path = get_l0a_dir(processed_dir=processed_dir, station_name=station_name) fpath = os.path.join(dir_path, fname) return fpath -def get_L0B_fpath(ds: xr.Dataset, processed_dir: str, station_name: str, l0b_concat=False) -> str: +def get_l0b_fpath(ds: xr.Dataset, processed_dir: str, station_name: str, l0b_concat=False) -> str: """Define L0B file path. Parameters @@ -347,10 +347,10 @@ def get_L0B_fpath(ds: xr.Dataset, processed_dir: str, station_name: str, l0b_con str L0B file path. """ - dir_path = get_L0B_dir(processed_dir, station_name) + dir_path = get_l0b_dir(processed_dir, station_name) if l0b_concat: dir_path = os.path.dirname(dir_path) - fname = get_L0B_fname(ds, processed_dir, station_name) + fname = get_l0b_fname(ds, processed_dir, station_name) fpath = os.path.join(dir_path, fname) return fpath @@ -489,8 +489,8 @@ def get_l0a_file_list(processed_dir, station_name, debugging_mode): List of L0A file paths. """ - L0A_dir_path = get_L0A_dir(processed_dir, station_name) - filepaths = glob.glob(os.path.join(L0A_dir_path, "*.parquet")) + l0a_dir_path = get_l0a_dir(processed_dir, station_name) + filepaths = glob.glob(os.path.join(l0a_dir_path, "*.parquet")) n_files = len(filepaths) @@ -501,7 +501,7 @@ def get_l0a_file_list(processed_dir, station_name, debugging_mode): # If no file available, raise error if n_files == 0: - msg = f"No L0A Apache Parquet file is available in {L0A_dir_path}. Run L0A processing first." + msg = f"No L0A Apache Parquet file is available in {l0a_dir_path}. Run L0A processing first." raise ValueError(msg) return filepaths @@ -1036,7 +1036,7 @@ def create_directory_structure(processed_dir, product_level, station_name, force ####--------------------------------------------------------------------------. #### DISDRODB L0A Readers -def _read_L0A(fpath: str, verbose: bool = False, debugging_mode: bool = False) -> pd.DataFrame: +def _read_l0a(fpath: str, verbose: bool = False, debugging_mode: bool = False) -> pd.DataFrame: # Log msg = f" - Reading L0 Apache Parquet file at {fpath} started." log_info(logger, msg, verbose) @@ -1050,7 +1050,7 @@ def _read_L0A(fpath: str, verbose: bool = False, debugging_mode: bool = False) - return df -def read_L0A_dataframe( +def read_l0a_dataframe( fpaths: Union[str, list], verbose: bool = False, debugging_mode: bool = False, @@ -1093,7 +1093,7 @@ def read_L0A_dataframe( fpaths = fpaths[0:3] # select first 3 fpaths # - Define the list of dataframe - list_df = [_read_L0A(fpath, verbose=verbose, debugging_mode=debugging_mode) for fpath in fpaths] + list_df = [_read_l0a(fpath, verbose=verbose, debugging_mode=debugging_mode) for fpath in fpaths] # - Concatenate dataframe df = concatenate_dataframe(list_df, verbose=verbose) # --------------------------------------------------- diff --git a/disdrodb/l0/l0_processing.py b/disdrodb/l0/l0_processing.py index 7fd29155..e87fee14 100644 --- a/disdrodb/l0/l0_processing.py +++ b/disdrodb/l0/l0_processing.py @@ -95,7 +95,7 @@ def _generate_l0a( ): """Generate L0A file from raw file.""" - from disdrodb.l0.io import get_L0A_fpath + from disdrodb.l0.io import get_l0a_fpath from disdrodb.l0.l0a_processing import ( process_raw_file, write_l0a, @@ -145,7 +145,7 @@ def _generate_l0a( ##--------------------------------------------------------------------. #### - Write to Parquet - fpath = get_L0A_fpath(df=df, processed_dir=processed_dir, station_name=station_name) + fpath = get_l0a_fpath(df=df, processed_dir=processed_dir, station_name=station_name) write_l0a(df=df, fpath=fpath, force=force, verbose=verbose) ##--------------------------------------------------------------------. @@ -178,7 +178,7 @@ def _generate_l0b( debugging_mode, parallel, ): - from disdrodb.l0.io import get_L0B_fpath, read_L0A_dataframe + from disdrodb.l0.io import get_l0b_fpath, read_l0a_dataframe from disdrodb.l0.l0b_processing import ( create_l0b_from_l0a, write_l0b, @@ -215,14 +215,14 @@ def _generate_l0b( ##------------------------------------------------------------------------. try: # Read L0A Apache Parquet file - df = read_L0A_dataframe(filepath, verbose=verbose, debugging_mode=debugging_mode) + df = read_l0a_dataframe(filepath, verbose=verbose, debugging_mode=debugging_mode) # -----------------------------------------------------------------. # Create xarray Dataset ds = create_l0b_from_l0a(df=df, attrs=attrs, verbose=verbose) # -----------------------------------------------------------------. # Write L0B netCDF4 dataset - fpath = get_L0B_fpath(ds, processed_dir, station_name) + fpath = get_l0b_fpath(ds, processed_dir, station_name) write_l0b(ds, fpath=fpath, force=force) ##--------------------------------------------------------------------. @@ -256,7 +256,7 @@ def _generate_l0b_from_nc( verbose, parallel, ): - from disdrodb.l0.io import get_L0B_fpath + from disdrodb.l0.io import get_l0b_fpath from disdrodb.l0.l0b_nc_processing import process_raw_nc from disdrodb.l0.l0b_processing import write_l0b @@ -298,7 +298,7 @@ def _generate_l0b_from_nc( ) # -----------------------------------------------------------------. # Write L0B netCDF4 dataset - fpath = get_L0B_fpath(ds, processed_dir, station_name) + fpath = get_l0b_fpath(ds, processed_dir, station_name) write_l0b(ds, fpath=fpath, force=force) ##--------------------------------------------------------------------. diff --git a/disdrodb/l0/l0b_nc_concat.py b/disdrodb/l0/l0b_nc_concat.py index 122da187..46a28687 100644 --- a/disdrodb/l0/l0b_nc_concat.py +++ b/disdrodb/l0/l0b_nc_concat.py @@ -19,7 +19,7 @@ import logging import os -from disdrodb.l0.io import get_L0B_dir, get_L0B_fpath +from disdrodb.l0.io import get_l0b_dir, get_l0b_fpath from disdrodb.utils.logger import ( close_logger, create_file_logger, @@ -53,19 +53,19 @@ def _concatenate_netcdf_files(processed_dir, station_name, remove=False, verbose # -------------------------------------------------------------------------. # Retrieve L0B files - L0B_dir_path = get_L0B_dir(processed_dir, station_name) - file_list = sorted(glob.glob(os.path.join(L0B_dir_path, "*.nc"))) + l0b_dir_path = get_l0b_dir(processed_dir, station_name) + file_list = sorted(glob.glob(os.path.join(l0b_dir_path, "*.nc"))) # -------------------------------------------------------------------------. # Check there are at least two files n_files = len(file_list) if n_files == 0: - msg = f"No L0B file is available for concatenation in {L0B_dir_path}." + msg = f"No L0B file is available for concatenation in {l0b_dir_path}." log_error(logger=logger, msg=msg, verbose=False) raise ValueError(msg) if n_files == 1: - msg = f"Only a single file is available for concatenation in {L0B_dir_path}." + msg = f"Only a single file is available for concatenation in {l0b_dir_path}." log_warning(logger=logger, msg=msg, verbose=verbose) raise ValueError(msg) @@ -75,7 +75,7 @@ def _concatenate_netcdf_files(processed_dir, station_name, remove=False, verbose # -------------------------------------------------------------------------. # Define the filepath of the concatenated L0B netCDF - single_nc_fpath = get_L0B_fpath(ds, processed_dir, station_name, l0b_concat=True) + single_nc_fpath = get_l0b_fpath(ds, processed_dir, station_name, l0b_concat=True) force = True # TODO add as argument write_l0b(ds, fpath=single_nc_fpath, force=force) diff --git a/disdrodb/l0/l0b_processing.py b/disdrodb/l0/l0b_processing.py index 7bab09e1..9a982d71 100644 --- a/disdrodb/l0/l0b_processing.py +++ b/disdrodb/l0/l0b_processing.py @@ -148,7 +148,7 @@ def format_string_array(string: str, n_values: int) -> np.array: # Replace "-9.999" with 0 values = np.char.replace(values, "-9.999", "0") # Cast values to float type - # --> Note: the disk encoding is specified in the L0B_encodings.yml + # --> Note: the disk encoding is specified in the l0b_encodings.yml values = values.astype(float) return values diff --git a/disdrodb/l0/standards.py b/disdrodb/l0/standards.py index 73e49fcf..a6f04162 100644 --- a/disdrodb/l0/standards.py +++ b/disdrodb/l0/standards.py @@ -798,7 +798,7 @@ def get_l0a_encodings_dict(sensor_name: str) -> dict: L0A encodings """ - # - L0A_encodings currently specify only the dtype. This could be expanded in the future. + # - l0a_encodings.yml currently specify only the dtype. This could be expanded in the future. d = read_config_yml(sensor_name=sensor_name, filename="l0a_encodings.yml") return d diff --git a/disdrodb/tests/test_l0/test_config_files.py b/disdrodb/tests/test_l0/test_config_files.py index bdcc1bb6..70272ceb 100644 --- a/disdrodb/tests/test_l0/test_config_files.py +++ b/disdrodb/tests/test_l0/test_config_files.py @@ -25,7 +25,7 @@ def check_list_length(cls, value): return value -class L0B_encodings_2n_level(BaseModel): +class l0b_encodings_2n_level(BaseModel): dtype: str zlib: bool complevel: int @@ -306,13 +306,13 @@ def test_yaml_format_basic_config_files(yaml_file_path: str) -> None: assert is_string_list(list_of_fisrt_level_values) -# Test the fotmat and content of the L0B_encodings.yml file +# Test the fotmat and content of the l0b_encodings.yml file list_of_yaml_file_paths = list_files(CONFIG_FOLDER, "l0b_encodings.yml") @pytest.mark.parametrize("yaml_file_path", list_of_yaml_file_paths) -def test_L0B_encodings_format(yaml_file_path: str) -> None: - """test the L0B_encodings.yml file format +def test_l0b_encodings_format(yaml_file_path: str) -> None: + """test the l0b_encodings.yml file format Parameters ---------- @@ -330,4 +330,4 @@ def test_L0B_encodings_format(yaml_file_path: str) -> None: # check that the second level of the dictionary match the schema for value in data.values(): - assert validate_schema_pytest(value, L0B_encodings_2n_level) + assert validate_schema_pytest(value, l0b_encodings_2n_level) diff --git a/disdrodb/tests/test_l0/test_io.py b/disdrodb/tests/test_l0/test_io.py index 978a6ace..8bdf8f19 100644 --- a/disdrodb/tests/test_l0/test_io.py +++ b/disdrodb/tests/test_l0/test_io.py @@ -245,9 +245,9 @@ def test_get_dataset_min_max_time(): @pytest.mark.parametrize("path_process_dir", [PATH_PROCESS_DIR_WINDOWS, PATH_PROCESS_DIR_LINUX]) -def test_get_L0A_dir(path_process_dir): +def test_get_l0a_dir(path_process_dir): res = ( - io.get_L0A_dir(path_process_dir, "STATION_NAME") + io.get_l0a_dir(path_process_dir, "STATION_NAME") .replace(path_process_dir, "") .replace("\\", "") .replace("/", "") @@ -256,9 +256,9 @@ def test_get_L0A_dir(path_process_dir): @pytest.mark.parametrize("path_process_dir", [PATH_PROCESS_DIR_WINDOWS, PATH_PROCESS_DIR_LINUX]) -def test_get_L0B_dir(path_process_dir): +def test_get_l0b_dir(path_process_dir): res = ( - io.get_L0B_dir(path_process_dir, "STATION_NAME") + io.get_l0b_dir(path_process_dir, "STATION_NAME") .replace(path_process_dir, "") .replace("\\", "") .replace("/", "") @@ -266,7 +266,7 @@ def test_get_L0B_dir(path_process_dir): assert res == "L0BSTATION_NAME" -def test_get_L0A_fpath(): +def test_get_l0a_fpath(): """ Test the naming and the path of the L0A file Note that this test needs "/pytest_files/test_folders_files_structure/DISDRODB/Processed/DATA_SOURCE/CAMPAIGN_NAME/ @@ -297,7 +297,7 @@ def test_get_L0A_fpath(): df = pd.DataFrame({"time": pd.date_range(start=start_date, end=end_date)}) # Test the function - res = io.get_L0A_fpath(df, path_campaign_name, station_name) + res = io.get_l0a_fpath(df, path_campaign_name, station_name) # Define expected results expected_name = ( @@ -307,7 +307,7 @@ def test_get_L0A_fpath(): assert res == expected_path -def test_get_L0B_fpath(): +def test_get_l0b_fpath(): """ Test the naming and the path of the L0B file Note that this test needs "/pytest_files/test_folders_files_structure/DISDRODB/Processed/DATA_SOURCE/CAMPAIGN_NAME/ @@ -344,7 +344,7 @@ def test_get_L0B_fpath(): ) # Test the function - res = io.get_L0B_fpath(ds, path_campaign_name, station_name) + res = io.get_l0b_fpath(ds, path_campaign_name, station_name) # Define expected results expected_name = f"L0B.{campaign_name.upper()}.{station_name}.s{start_date_str}.e{end_date_str}.{PRODUCT_VERSION}.nc" @@ -610,7 +610,7 @@ def test_copy_station_metadata(): ####--------------------------------------------------------------------------. -def test__read_L0A(): +def test__read_l0a(): # create dummy dataframe data = [{"a": "1", "b": "2"}, {"a": "2", "b": "2", "c": "3"}] df = pd.DataFrame(data) @@ -624,12 +624,12 @@ def test__read_L0A(): df.to_parquet(path_parquet_file, compression="gzip") # read written parquet file - df_written = io._read_L0A(path_parquet_file, False) + df_written = io._read_l0a(path_parquet_file, False) assert df.equals(df_written) -def test_read_L0A_dataframe(): +def test_read_l0a_dataframe(): list_of_parquet_file_paths = list() for i in [0, 1]: @@ -659,7 +659,7 @@ def test_read_L0A_dataframe(): df_concatenate = df_concatenate.sort_values(by="time") # read written parquet files - df_written = io.read_L0A_dataframe(list_of_parquet_file_paths, False) + df_written = io.read_l0a_dataframe(list_of_parquet_file_paths, False) # Create lists df_concatenate_list = df_concatenate.values.tolist() diff --git a/disdrodb/tests/test_l0/test_l0a_processing.py b/disdrodb/tests/test_l0/test_l0a_processing.py index 4ebaf79c..06536b5e 100644 --- a/disdrodb/tests/test_l0/test_l0a_processing.py +++ b/disdrodb/tests/test_l0/test_l0a_processing.py @@ -561,7 +561,7 @@ def test_write_l0a(): l0a_processing.write_l0a(df, path_parquet_file, True, False) # Read parquet file - df_written = io.read_L0A_dataframe([path_parquet_file], False) + df_written = io.read_l0a_dataframe([path_parquet_file], False) # Check if parquet file are similar is_equal = df.equals(df_written) diff --git a/tutorials/reader_preparation.ipynb b/tutorials/reader_preparation.ipynb index 8573e472..c7f13914 100644 --- a/tutorials/reader_preparation.ipynb +++ b/tutorials/reader_preparation.ipynb @@ -1357,7 +1357,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The function `infer_df_str_column_names()` tries to guess the column name based on string patterns according to `L0A_encodings.yml` and the type of sensor." + "The function `infer_df_str_column_names()` tries to guess the column name based on string patterns according to `l0a_encodings.yml` and the type of sensor." ] }, { @@ -1409,7 +1409,7 @@ "source": [ "This can help us to define later the `column_names` list.\n", "\n", - "As reference, here is the list of valid columns name (taken from `L0A_encodings.yml`):" + "As reference, here is the list of valid columns name (taken from `l0a_encodings.yml`):" ] }, {