NOAA-GFDL · wrongkindofdoctor · Dec 18, 2024 · Dec 16, 2024 · Dec 16, 2024 · Dec 16, 2024
@@ -163,7 +163,13 @@
       "realm": "atmos",
       "units": "1",
       "ndim": 3
-     },
+    },
+    "siconc": {
+      "standard_name": "sea_ice_area_fraction",
+      "realm": "seaIce",
+      "units": "0-1",
+      "ndim": 3
+    },
     "IWP": {
       "standard_name": "atmosphere_mass_content_of_cloud_ice",
       "long_name": "Ice water path",

@@ -91,7 +91,7 @@ def readindata(file, varname='siconc', firstyr='1979', lastyr='2014'):
 
 
 # 1) Loading model data files:
-input_file = "{DATADIR}/mon/{CASENAME}.{siconc_var}.mon.nc".format(**os.environ)
+input_file = os.environ['SICONC_FILE']
 obsoutput_dir = "{WORK_DIR}/obs/".format(**os.environ)
 modoutput_dir = "{WORK_DIR}/model/".format(**os.environ)
 figures_dir = "{WORK_DIR}/model/".format(**os.environ)

@@ -64,6 +64,36 @@
 
     def set_date_range(self, startdate: str, enddate: str):
         self.date_range = util.DateRange(start=startdate, end=enddate)
+
+    def set_query(self, var: varlist_util.VarlistEntry, path_regex: str):
+        realm_regex = var.realm + '*'
+        date_range = var.T.range
+        var_id = var.name
+        standard_name = var.standard_name
+        if var.translation.convention is not None:
+            var_id = var.translation.name
+            standard_name = var.translation.standard_name
+            if any(var.translation.alternate_standard_names):
+                standard_name = [var.translation.standard_name] + var.translation.alternate_standard_names
+                date_range = var.translation.T.range
+        if var.is_static:
+            date_range = None
+            freq = "fx"
+        else:
+            freq = var.T.frequency
+        if not isinstance(freq, str):
+            freq = freq.format_local()
+        if freq == 'hr':
+            freq = '1hr'
+
+        # define initial query dictionary with variable settings requirements that do not change if
+        # the variable is translated
+        self.query['frequency'] = freq
+        self.query['path'] = path_regex
+        self.query['realm'] = realm_regex
+        self.query['standard_name'] = standard_name
+        self.query['variable_id'] = var_id
+
 
     def translate_varlist(self,
                           var: varlist_util.VarlistEntry,
@@ -94,7 +124,10 @@
     # col_spec = sampleLocalFileDataSource_col_spec
     # varlist = diagnostic.varlist
     convention: str = "CMIP"
-
+
+    def set_query(self, var: varlist_util.VarlistEntry, path_regex: str):
+        super().set_query(var, path_regex)
+        return
 
 @data_source.maker
 class CESMDataSource(DataSourceBase):
@@ -105,7 +138,10 @@
     # col_spec = sampleLocalFileDataSource_col_spec
     # varlist = diagnostic.varlist
     convention: str = "CESM"
-
+
+    def set_query(self, var: varlist_util.VarlistEntry, path_regex: str):
+        super().set_query(var, path_regex)
+        return
 
 @data_source.maker
 class GFDLDataSource(DataSourceBase):
@@ -116,3 +152,10 @@
     # col_spec = sampleLocalFileDataSource_col_spec
     # varlist = diagnostic.varlist
     convention: str = "GFDL"
+
+    def set_query(self, var: varlist_util.VarlistEntry, path_regex: str):
+        super().set_query(var, path_regex)
+        # this is hacky, but prevents the framework from grabbing from ice_1x1deg
+        if self.query['realm'] == 'seaIce*':
+            self.query['realm'] = 'ice'
+        return
@@ -97,6 +97,36 @@ def execute(self, var: varlist_util.VarlistEntry,
         pass
 
 
+class PercentConversionFunction(PreprocessorFunctionBase):
+    """A PreprocessorFunction which convers the dependent variable's units and values,
+    for the specific case of percentages. ``0-1`` are not defined in the UDUNITS-2
+    library. So, this function handles the case where we have to convert from 
+    ``0-1`` to ``%``.
+    """
+
+    _std_name_tuple = ('0-1', '%')
+
+    def execute(self, var, ds, **kwargs):
+        var_unit = getattr(var, "units", "")
+        tv = var.translation #abbreviate
+        tv_unit = getattr(tv, "units", "")
+        # 0-1 to %
+        if str(tv_unit) == self._std_name_tuple[0] and str(var_unit) == self._std_name_tuple[1]:
+            ds[tv.name].attrs['units'] = '%'
+            ds[tv.name].values = ds[tv.name].values*100
+            return ds
+        # % to 0-1
+        if str(tv_unit) == self._std_name_tuple[1] and str(var_unit) == self._std_name_tuple[0]:
+            ds[tv.name].attrs['units'] = '0-1'
+            # sometimes % is [0,1] already
+            if ds[tv.name].values[:, :, 3].max() < 1.5:
+                return ds
+            else:
+                ds[tv.name].values = ds[tv.name].values/100
+                return ds
+
+        return ds
+
 class PrecipRateToFluxFunction(PreprocessorFunctionBase):
     """A PreprocessorFunction which converts the dependent variable's units, for
     the specific case of precipitation. Flux and precip rate differ by a factor
@@ -694,7 +724,7 @@ def _functions(self):
         """
         # normal operation: run all functions
         return [
-            AssociatedVariablesFunction,
+            AssociatedVariablesFunction, PercentConversionFunction,
             PrecipRateToFluxFunction, ConvertUnitsFunction,
             ExtractLevelFunction, RenameVariablesFunction
         ]
@@ -1012,33 +1042,11 @@ def query_catalog(self,
             path_regex = [re.compile(r'({})'.format(case_name))]
 
             for var in case_d.varlist.iter_vars():
-                realm_regex = var.realm + '*'
                 date_range = var.T.range
-                var_id = var.name
-                standard_name = var.standard_name
-                if var.translation.convention is not None:
-                    var_id = var.translation.name
-                    standard_name = var.translation.standard_name
-                    if any(var.translation.alternate_standard_names):
-                        standard_name = [var.translation.standard_name] + var.translation.alternate_standard_names
-                    date_range = var.translation.T.range
-                if var.is_static:
-                    date_range = None
-                    freq = "fx"
-                else:
-                    freq = var.T.frequency
-                if not isinstance(freq, str):
-                    freq = freq.format_local()
-                if freq == 'hr':
-                    freq = '1hr'
-
+
                 # define initial query dictionary with variable settings requirements that do not change if
                 # the variable is translated
-                case_d.query['frequency'] = freq
-                case_d.query['path'] = path_regex
-                case_d.query['realm'] = realm_regex
-                case_d.query['standard_name'] = standard_name
-                case_d.query['variable_id'] = var_id
+                case_d.set_query(var, path_regex) 
 
                 # change realm key name if necessary
                 if cat.df.get('modeling_realm', None) is not None:
@@ -1047,7 +1055,7 @@ def query_catalog(self,
                 # search catalog for convention specific query object
                 var.log.info("Querying %s for variable %s for case %s.",
                              data_catalog,
-                             var_id,
+                             case_d.query['variable_id'],
                              case_name)
                 cat_subset = cat.search(**case_d.query)
                 if cat_subset.df.empty:
@@ -1086,7 +1094,7 @@ def query_catalog(self,
                                 f"configuration file.")
                     else:
                         raise util.DataRequestError(
-                            f"Unable to find match or alternate for {var_id}"
+                            f"Unable to find match or alternate for {case_d.query['variable_id']}"
                             f" for case {case_name} in {data_catalog}")
 
                 # Get files in specified date range
@@ -1162,7 +1170,7 @@ def query_catalog(self,
                 # check that the trimmed variable data in the merged dataset matches the desired date range
                 if not var.is_static:
                     try:
-                        self.check_time_bounds(cat_dict[case_name], var.translation, freq)
+                        self.check_time_bounds(cat_dict[case_name], var.translation, var.T.frequency)
                     except LookupError:
                         var.log.error(f'Time bounds in trimmed dataset for {var_id} in case {case_name} do not match'
                                       f'requested date_range.')

@@ -135,6 +135,8 @@ def conversion_factor(source_unit, dest_unit):
     *source_unit*, *dest_unit* are coerced to :class:`Units` objects via
     :func:`to_cfunits`.
     """
+    if str(source_unit) == str(dest_unit):
+        return 1.0 # bypass function if the units have the same string allowing units like '0-1' to be used
     source_unit, dest_unit = to_equivalent_units(source_unit, dest_unit)
     return Units.conform(1.0, source_unit, dest_unit)
 

@@ -194,7 +194,7 @@ def _old_axes_dict(self, var_name=None):
             if len(v) > 1 and var_name is not None:
                 ax = [c for c in v if c in itertools.chain.from_iterable(axes_obj.cf.coordinates.values())]
                 del_ax = [d for d in v if d not in itertools.chain.from_iterable(axes_obj.cf.coordinates.values())]
-                if del_ax is not None:  # remove the entries that are not in the cf.coordinates.values dict
+                if del_ax is not None and len(del_ax) > 0:  # remove the entries that are not in the cf.coordinates.values dict
                     # append entries that are in the cf.coordinates.values dict if they are missing in coords_list
                     # and dims_list
                     if del_ax[0] in coords_list:
@@ -208,14 +208,15 @@ def _old_axes_dict(self, var_name=None):
 
                 if ax is not None:
                     vardict[k] = ax
-                    if ax[0] not in coords_list:
-                        _log.warning(("cf_xarray fix: %s axis %s not in dimensions "
-                                      "for %s; dropping."), k, ax[0], var_name)
-                        delete_keys.append(k)
-                    else:
-                        coords_list.remove(ax[0])
-                        if ax[0] in dims_list:
-                            dims_list.remove(ax[0])
+                    for a in ax:
+                        if a not in coords_list:
+                            _log.warning(("cf_xarray fix: %s axis %s not in dimensions "
+                                          "for %s; dropping."), k, a, var_name)
+                            delete_keys.append(k)
+                        else:
+                            coords_list.remove(a)
+                            if a in dims_list:
+                                dims_list.remove(a)
             elif len(v) == 1:
                 if v[0] not in coords_list:
                     _log.warning(("cf_xarray fix: %s axis %s not in dimensions "