Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix various pp issues related to running seaice_suite #721

Merged
merged 6 commits into from
Dec 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion data/fieldlist_GFDL.jsonc
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,13 @@
"realm": "atmos",
"units": "1",
"ndim": 3
},
},
"siconc": {
"standard_name": "sea_ice_area_fraction",
"realm": "seaIce",
"units": "0-1",
"ndim": 3
},
"IWP": {
"standard_name": "atmosphere_mass_content_of_cloud_ice",
"long_name": "Ice water path",
Expand Down
2 changes: 1 addition & 1 deletion diagnostics/seaice_suite/seaice_suite_sic_mean_sigma.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def readindata(file, varname='siconc', firstyr='1979', lastyr='2014'):


# 1) Loading model data files:
input_file = "{DATADIR}/mon/{CASENAME}.{siconc_var}.mon.nc".format(**os.environ)
input_file = os.environ['SICONC_FILE']
obsoutput_dir = "{WORK_DIR}/obs/".format(**os.environ)
modoutput_dir = "{WORK_DIR}/model/".format(**os.environ)
figures_dir = "{WORK_DIR}/model/".format(**os.environ)
Expand Down
47 changes: 45 additions & 2 deletions src/data_sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,36 @@

def set_date_range(self, startdate: str, enddate: str):
self.date_range = util.DateRange(start=startdate, end=enddate)

def set_query(self, var: varlist_util.VarlistEntry, path_regex: str):
realm_regex = var.realm + '*'
date_range = var.T.range

Check warning

Code scanning / CodeQL

Variable defined multiple times Warning

This assignment to 'date_range' is unnecessary as it is
redefined
before this value is used.
This assignment to 'date_range' is unnecessary as it is
redefined
before this value is used.
var_id = var.name
standard_name = var.standard_name
if var.translation.convention is not None:
var_id = var.translation.name
standard_name = var.translation.standard_name
if any(var.translation.alternate_standard_names):
standard_name = [var.translation.standard_name] + var.translation.alternate_standard_names
date_range = var.translation.T.range

Check warning

Code scanning / CodeQL

Variable defined multiple times Warning

This assignment to 'date_range' is unnecessary as it is
redefined
before this value is used.
if var.is_static:
date_range = None

Check notice

Code scanning / CodeQL

Unused local variable Note

Variable date_range is not used.
freq = "fx"
else:
freq = var.T.frequency
if not isinstance(freq, str):
freq = freq.format_local()
if freq == 'hr':
freq = '1hr'

# define initial query dictionary with variable settings requirements that do not change if
# the variable is translated
self.query['frequency'] = freq
self.query['path'] = path_regex
self.query['realm'] = realm_regex
self.query['standard_name'] = standard_name
self.query['variable_id'] = var_id


def translate_varlist(self,
var: varlist_util.VarlistEntry,
Expand Down Expand Up @@ -94,7 +124,10 @@
# col_spec = sampleLocalFileDataSource_col_spec
# varlist = diagnostic.varlist
convention: str = "CMIP"


def set_query(self, var: varlist_util.VarlistEntry, path_regex: str):
super().set_query(var, path_regex)
return

@data_source.maker
class CESMDataSource(DataSourceBase):
Expand All @@ -105,7 +138,10 @@
# col_spec = sampleLocalFileDataSource_col_spec
# varlist = diagnostic.varlist
convention: str = "CESM"


def set_query(self, var: varlist_util.VarlistEntry, path_regex: str):
super().set_query(var, path_regex)
return

@data_source.maker
class GFDLDataSource(DataSourceBase):
Expand All @@ -116,3 +152,10 @@
# col_spec = sampleLocalFileDataSource_col_spec
# varlist = diagnostic.varlist
convention: str = "GFDL"

def set_query(self, var: varlist_util.VarlistEntry, path_regex: str):
super().set_query(var, path_regex)
# this is hacky, but prevents the framework from grabbing from ice_1x1deg
if self.query['realm'] == 'seaIce*':
self.query['realm'] = 'ice'
return
64 changes: 36 additions & 28 deletions src/preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,36 @@ def execute(self, var: varlist_util.VarlistEntry,
pass


class PercentConversionFunction(PreprocessorFunctionBase):
"""A PreprocessorFunction which convers the dependent variable's units and values,
for the specific case of percentages. ``0-1`` are not defined in the UDUNITS-2
library. So, this function handles the case where we have to convert from
``0-1`` to ``%``.
"""

_std_name_tuple = ('0-1', '%')

def execute(self, var, ds, **kwargs):
Fixed Show fixed Hide fixed
var_unit = getattr(var, "units", "")
tv = var.translation #abbreviate
tv_unit = getattr(tv, "units", "")
# 0-1 to %
if str(tv_unit) == self._std_name_tuple[0] and str(var_unit) == self._std_name_tuple[1]:
ds[tv.name].attrs['units'] = '%'
ds[tv.name].values = ds[tv.name].values*100
return ds
# % to 0-1
if str(tv_unit) == self._std_name_tuple[1] and str(var_unit) == self._std_name_tuple[0]:
ds[tv.name].attrs['units'] = '0-1'
# sometimes % is [0,1] already
wrongkindofdoctor marked this conversation as resolved.
Show resolved Hide resolved
if ds[tv.name].values[:, :, 3].max() < 1.5:
return ds
else:
ds[tv.name].values = ds[tv.name].values/100
return ds

return ds

class PrecipRateToFluxFunction(PreprocessorFunctionBase):
"""A PreprocessorFunction which converts the dependent variable's units, for
the specific case of precipitation. Flux and precip rate differ by a factor
Expand Down Expand Up @@ -694,7 +724,7 @@ def _functions(self):
"""
# normal operation: run all functions
return [
AssociatedVariablesFunction,
AssociatedVariablesFunction, PercentConversionFunction,
PrecipRateToFluxFunction, ConvertUnitsFunction,
ExtractLevelFunction, RenameVariablesFunction
]
Expand Down Expand Up @@ -1012,33 +1042,11 @@ def query_catalog(self,
path_regex = [re.compile(r'({})'.format(case_name))]

for var in case_d.varlist.iter_vars():
realm_regex = var.realm + '*'
date_range = var.T.range
var_id = var.name
standard_name = var.standard_name
if var.translation.convention is not None:
var_id = var.translation.name
standard_name = var.translation.standard_name
if any(var.translation.alternate_standard_names):
standard_name = [var.translation.standard_name] + var.translation.alternate_standard_names
date_range = var.translation.T.range
if var.is_static:
date_range = None
freq = "fx"
else:
freq = var.T.frequency
if not isinstance(freq, str):
freq = freq.format_local()
if freq == 'hr':
freq = '1hr'


# define initial query dictionary with variable settings requirements that do not change if
# the variable is translated
case_d.query['frequency'] = freq
case_d.query['path'] = path_regex
case_d.query['realm'] = realm_regex
case_d.query['standard_name'] = standard_name
case_d.query['variable_id'] = var_id
case_d.set_query(var, path_regex)

# change realm key name if necessary
if cat.df.get('modeling_realm', None) is not None:
Expand All @@ -1047,7 +1055,7 @@ def query_catalog(self,
# search catalog for convention specific query object
var.log.info("Querying %s for variable %s for case %s.",
data_catalog,
var_id,
case_d.query['variable_id'],
case_name)
cat_subset = cat.search(**case_d.query)
if cat_subset.df.empty:
Expand Down Expand Up @@ -1086,7 +1094,7 @@ def query_catalog(self,
f"configuration file.")
else:
raise util.DataRequestError(
f"Unable to find match or alternate for {var_id}"
f"Unable to find match or alternate for {case_d.query['variable_id']}"
f" for case {case_name} in {data_catalog}")

# Get files in specified date range
Expand Down Expand Up @@ -1162,7 +1170,7 @@ def query_catalog(self,
# check that the trimmed variable data in the merged dataset matches the desired date range
if not var.is_static:
try:
self.check_time_bounds(cat_dict[case_name], var.translation, freq)
self.check_time_bounds(cat_dict[case_name], var.translation, var.T.frequency)
except LookupError:
var.log.error(f'Time bounds in trimmed dataset for {var_id} in case {case_name} do not match'
f'requested date_range.')
Expand Down
2 changes: 2 additions & 0 deletions src/units.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,8 @@ def conversion_factor(source_unit, dest_unit):
*source_unit*, *dest_unit* are coerced to :class:`Units` objects via
:func:`to_cfunits`.
"""
if str(source_unit) == str(dest_unit):
return 1.0 # bypass function if the units have the same string allowing units like '0-1' to be used
source_unit, dest_unit = to_equivalent_units(source_unit, dest_unit)
return Units.conform(1.0, source_unit, dest_unit)

Expand Down
19 changes: 10 additions & 9 deletions src/xr_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ def _old_axes_dict(self, var_name=None):
if len(v) > 1 and var_name is not None:
ax = [c for c in v if c in itertools.chain.from_iterable(axes_obj.cf.coordinates.values())]
del_ax = [d for d in v if d not in itertools.chain.from_iterable(axes_obj.cf.coordinates.values())]
if del_ax is not None: # remove the entries that are not in the cf.coordinates.values dict
if del_ax is not None and len(del_ax) > 0: # remove the entries that are not in the cf.coordinates.values dict
# append entries that are in the cf.coordinates.values dict if they are missing in coords_list
# and dims_list
if del_ax[0] in coords_list:
Expand All @@ -208,14 +208,15 @@ def _old_axes_dict(self, var_name=None):

if ax is not None:
vardict[k] = ax
if ax[0] not in coords_list:
_log.warning(("cf_xarray fix: %s axis %s not in dimensions "
"for %s; dropping."), k, ax[0], var_name)
delete_keys.append(k)
else:
coords_list.remove(ax[0])
if ax[0] in dims_list:
dims_list.remove(ax[0])
for a in ax:
if a not in coords_list:
_log.warning(("cf_xarray fix: %s axis %s not in dimensions "
"for %s; dropping."), k, a, var_name)
delete_keys.append(k)
else:
coords_list.remove(a)
if a in dims_list:
dims_list.remove(a)
elif len(v) == 1:
if v[0] not in coords_list:
_log.warning(("cf_xarray fix: %s axis %s not in dimensions "
Expand Down
Loading