diff --git a/CrocoDash/data_access.py b/CrocoDash/data_access.py new file mode 100644 index 0000000..24984ea --- /dev/null +++ b/CrocoDash/data_access.py @@ -0,0 +1,54 @@ +""" +Data Access Module -> Query Data Sources like GLORYS & GEBCO +""" +import xarray as xr +import glob +import os +import copernicusmarine +from .rm6.regional_mom6 import regional_mom6 as rm6 + +def get_glorys_data_from_rda(dates: list,lat_min, lat_max, lon_min,lon_max) -> xr.Dataset: + """ + Gather GLORYS Data on Derecho Computers from the campaign storage and return the dataset sliced to the llc and urc coordinates at the specific dates + 2005 Only + """ + + # Set + drop_var_lst = ['mlotst','bottomT','sithick','siconc','usi','vsi'] + ds_in_path = '/glade/campaign/cgd/oce/projects/CROCODILE/glorys012/GLOBAL/' + ds_in_files = [] + date_strings = [date.strftime('%Y%m%d') for date in dates] + for date in date_strings: + pattern = os.path.join(ds_in_path, "**",f'*{date}*.nc') + ds_in_files.extend(glob.glob(pattern, recursive=True)) + ds_in_files = sorted(ds_in_files) + dataset = xr.open_mfdataset(ds_in_files,decode_times=False).drop_vars(drop_var_lst).sel(latitude=slice(lat_min,lat_max),longitude=slice(lon_min,lon_max)) + + return dataset + +def get_glorys_data_from_cds_api(dates: tuple, lat_min, lat_max, lon_min, lon_max) -> xr.Dataset: + """ + Using the copernucismarine api, query GLORYS data (any dates) + """ + ds = copernicusmarine.open_dataset( + dataset_id = 'cmems_mod_glo_phy_my_0.083deg_P1D-m', + minimum_longitude = lon_min, + maximum_longitude = lon_max, + minimum_latitude = lat_min, + maximum_latitude = lat_max, + start_datetime = dates[0], + end_datetime = dates[1], + variables=["uo","vo","thetao","so","zos"], + ) + return ds + +def get_glorys_data_script_for_cli(dates: tuple, lat_min, lat_max, lon_min, lon_max, filename, download_path) -> None: + """ + Script to run the GLORYS data query for the CLI + """ + return rm6.get_glorys_data([lon_min, lon_max], + [lat_min,lat_max], + [dates[0],dates[-1]], + filename, + download_path) + diff --git a/CrocoDash/rm6 b/CrocoDash/rm6 index e832983..022dd01 160000 --- a/CrocoDash/rm6 +++ b/CrocoDash/rm6 @@ -1 +1 @@ -Subproject commit e83298369939822d518843364449c1cc9cbac104 +Subproject commit 022dd01ab4f34eb4c2faa5cf654ce94409df59ea diff --git a/CrocoDash/visualCaseGen b/CrocoDash/visualCaseGen index daf3ade..9a9b906 160000 --- a/CrocoDash/visualCaseGen +++ b/CrocoDash/visualCaseGen @@ -1 +1 @@ -Subproject commit daf3ade1d8eca909bbba4b67fa3d4f570b28286e +Subproject commit 9a9b9063bbb1b1545aebe98f47553e65cec33776 diff --git a/tests/test_data_access.py b/tests/test_data_access.py new file mode 100644 index 0000000..923dd41 --- /dev/null +++ b/tests/test_data_access.py @@ -0,0 +1,20 @@ +from CrocoDash import data_access as da +import pandas as pd + +def test_get_glorys_data_from_rda(): + dates = ['2005-01-01','2005-02-01'] + lat_min = 30 + lat_max = 31 + lon_min = -71 + lon_max = -70 + dataset = da.get_glorys_data_from_rda(pd.date_range(start=dates[0], end=dates[1]).to_pydatetime().tolist(),lat_min, lat_max, lon_min,lon_max) + print(dataset) + +def test_get_glorys_data_from_api(): + dates = ['2000-01-01','2020-12-31'] + lat_min = 3 + lat_max = 61 + lon_min = -101 + lon_max = -34 + dataset = da.get_glorys_data_from_cds_api(dates,lat_min, lat_max, lon_min,lon_max) + dataset.to_netcdf("") \ No newline at end of file