From cb687dca8c0513be7d1024f497c3a0f8935e1f00 Mon Sep 17 00:00:00 2001 From: wrongkindofdoctor <20195932+wrongkindofdoctor@users.noreply.github.com> Date: Mon, 25 Nov 2024 18:31:15 -0500 Subject: [PATCH 1/9] define missing entries in dummy translation object returned by NoTranslationFieldlist.translate add logic to determine alternate_standard_names attribute to NoTranslationFieldlist.translate --- src/translation.py | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/src/translation.py b/src/translation.py index ce090372f..692d35d22 100644 --- a/src/translation.py +++ b/src/translation.py @@ -435,7 +435,7 @@ def translate(self, var, from_convention: str): ) -class NoTranslationFieldlist(metaclass=util.Singleton): +class NoTranslationFieldlist: """Class which partially implements the :class:`Fieldlist` interface but does no variable translation. :class:`~diagnostic.VarlistEntry` objects from the POD are passed through to create :class:`TranslatedVarlistEntry` objects. @@ -476,7 +476,7 @@ def translate_coord(self, coord, log=_log) -> TranslatedVarlistEntry: # should never get here - not called externally raise NotImplementedError - def translate(self, var, from_convention: str): + def translate(self, var, data_convention: str): """Returns :class:`TranslatedVarlistEntry` instance, populated with contents of input :class:`~diagnostic.VarlistEntry` instance. @@ -486,20 +486,37 @@ def translate(self, var, from_convention: str): based on the file's actual contents. """ coords_copy = copy.deepcopy(var.dims) + copy.deepcopy(var.scalar_coords) - # TODO: coerce_to_dataclass runs into recursion limit on var; fix that + fieldlist_obj = VariableTranslator().get_convention(data_convention) + fieldlist_entry = dict() + for variable_id_dict in fieldlist_obj.lut.values(): + if variable_id_dict.get('standard_name', None) == var.standard_name \ + or var.standard_name in variable_id_dict.get('alternate_standard_names'): + if variable_id_dict.get('realm', None) == var.realm \ + and variable_id_dict.get('units', None) == var.units.units: + fieldlist_entry = variable_id_dict + break + if len(fieldlist_entry.keys()) < 1: + var.log.error(f'No {data_convention} fieldlist entry found for variable {var.name}') + return None + alt_standard_names = fieldlist_entry.get('alternate_standard_names') return TranslatedVarlistEntry( name=var.name, standard_name=var.standard_name, units=var.units, - convention=_NO_TRANSLATION_CONVENTION, + convention=var.convention, coords=coords_copy, modifier=var.modifier, + alternate_standard_names=alt_standard_names, + realm=var.realm, log=var.log ) class VariableTranslator(metaclass=util.Singleton): - """:class:`~util.Singleton` containing information for different variable + """The use of class:`~util.Singleton` means that the VariableTranslator is not a + base class. Instead, it is a metaclass that needs to be created only once (done + in the mdtf_framework.py driver script to hold all the information from the fieldlist + tables that are later shared. Instead, the SUBCLASSES of the VariableTranslator are customized information for different variable naming conventions. These are defined in the ``data/fieldlist_*.jsonc`` files. """ From 8f91d2d483df6a0a8c54b822a7bb902196d0267d Mon Sep 17 00:00:00 2001 From: wrongkindofdoctor <20195932+wrongkindofdoctor@users.noreply.github.com> Date: Mon, 25 Nov 2024 18:47:12 -0500 Subject: [PATCH 2/9] set translate_data to false for testing --- tests/github_actions_test_ubuntu_1a.jsonc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/github_actions_test_ubuntu_1a.jsonc b/tests/github_actions_test_ubuntu_1a.jsonc index acb962a90..ee3ca3660 100644 --- a/tests/github_actions_test_ubuntu_1a.jsonc +++ b/tests/github_actions_test_ubuntu_1a.jsonc @@ -53,7 +53,7 @@ "run_pp": true, // Set to true to perform data translation; default false: - "translate_data": true, + "translate_data": false, // Set to true to have PODs save postscript figures in addition to bitmaps. "save_ps": false, From b4637e7b5712cd76b3faa18916c23dc888b17719 Mon Sep 17 00:00:00 2001 From: wrongkindofdoctor <20195932+wrongkindofdoctor@users.noreply.github.com> Date: Mon, 2 Dec 2024 09:56:05 -0500 Subject: [PATCH 3/9] edit logging message for no translation setting in pod_setup --- src/pod_setup.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/pod_setup.py b/src/pod_setup.py index 8b04577d8..5cd188984 100644 --- a/src/pod_setup.py +++ b/src/pod_setup.py @@ -299,13 +299,12 @@ def setup_pod(self, runtime_config: util.NameSpace, for case_name, case_dict in runtime_config.case_list.items(): cases[case_name].read_varlist(self, append_vars=append_vars) - # Translate the varlistEntries from the POD convention to the data convention if desired and the pod - # convention does not match the case convention + # Translate the varlistEntries from the POD convention to the data convention for the query if desired data_convention = case_dict.convention.lower() if not runtime_config.translate_data: - data_convention = 'no_translation' - self.log.info(f'Runtime option translate_data is set to .false.' + self.log.info(f'Runtime option translate_data is set to .false. ' f'No data translation will be performed for case {case_name}.') + data_convention = 'no_translation' else: if pod_convention != data_convention: self.log.info(f'Translating POD variables from {pod_convention} to {data_convention}') From 37e19ad0cd21e50c87967d5ed1fb94068f110e99 Mon Sep 17 00:00:00 2001 From: wrongkindofdoctor <20195932+wrongkindofdoctor@users.noreply.github.com> Date: Mon, 2 Dec 2024 09:57:17 -0500 Subject: [PATCH 4/9] add todo to translation translate_coord and cleanup comments --- src/translation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/translation.py b/src/translation.py index 692d35d22..73a22ddef 100644 --- a/src/translation.py +++ b/src/translation.py @@ -341,7 +341,7 @@ def translate_coord(self, coord, class_dict=None, log=_log) -> dict: coord_name = new_coord['name'] elif hasattr(new_coord, 'out_name'): coord_name = new_coord['out_name'] - else: + else: # TODO add more robust check for key name == 'plev' (or whatever the coordinate name in the lut should be based on fieldlist) coord_name = [k for k in lut1.keys()][0] coord_copy = copy.deepcopy(new_coord) @@ -480,7 +480,7 @@ def translate(self, var, data_convention: str): """Returns :class:`TranslatedVarlistEntry` instance, populated with contents of input :class:`~diagnostic.VarlistEntry` instance. - .. note:: + note:: We return a copy of the :class:`~diagnostic.VarlistEntry` because logic in :class:`~xr_parser.DefaultDatasetParser` alters the translation based on the file's actual contents. From 7dc364a78062197ccffb7014d25375d28e5be9f3 Mon Sep 17 00:00:00 2001 From: wrongkindofdoctor <20195932+wrongkindofdoctor@users.noreply.github.com> Date: Mon, 2 Dec 2024 09:57:46 -0500 Subject: [PATCH 5/9] remove checks for no_translation from preprocessor --- src/preprocessor.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/preprocessor.py b/src/preprocessor.py index 69b4ee617..53dff151d 100644 --- a/src/preprocessor.py +++ b/src/preprocessor.py @@ -1093,8 +1093,6 @@ def query_catalog(self, # check that start and end times include runtime startdate and enddate if not var.is_static: var_obj = var.translation - if var.translation.convention == 'no_translation': - var_obj = var try: self.check_time_bounds(cat_dict[case_name], var_obj, freq) except LookupError: @@ -1133,8 +1131,6 @@ def execute_pp_functions(self, v: varlist_util.VarlistEntry, return xarray_ds - return xarray_ds - def setup(self, pod): """Method to do additional configuration immediately before :meth:`process` is called on each variable for *pod*. Implements metadata cleaning via @@ -1441,8 +1437,6 @@ def write_pp_catalog(self, ds_match = input_catalog_ds[case_name] for var in case_dict.varlist.iter_vars(): var_name = var.translation.name - if var.translation.convention == 'no_translation': - var_name = var.name ds_var = ds_match.data_vars.get(var_name, None) if ds_var is None: log.error(f'No var {var_name}') From f6e0992d06a0b3c77350416c7cba321ac9f1d97f Mon Sep 17 00:00:00 2001 From: wrongkindofdoctor <20195932+wrongkindofdoctor@users.noreply.github.com> Date: Mon, 2 Dec 2024 10:47:03 -0500 Subject: [PATCH 6/9] define TranslatedVarlistEntry name attribute using data convention field table variable id --- src/translation.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/translation.py b/src/translation.py index 73a22ddef..09395ab56 100644 --- a/src/translation.py +++ b/src/translation.py @@ -488,19 +488,21 @@ def translate(self, var, data_convention: str): coords_copy = copy.deepcopy(var.dims) + copy.deepcopy(var.scalar_coords) fieldlist_obj = VariableTranslator().get_convention(data_convention) fieldlist_entry = dict() - for variable_id_dict in fieldlist_obj.lut.values(): + variable_id = "" + for variable_id, variable_id_dict in fieldlist_obj.lut.items(): if variable_id_dict.get('standard_name', None) == var.standard_name \ or var.standard_name in variable_id_dict.get('alternate_standard_names'): if variable_id_dict.get('realm', None) == var.realm \ and variable_id_dict.get('units', None) == var.units.units: fieldlist_entry = variable_id_dict + var_id = variable_id break if len(fieldlist_entry.keys()) < 1: var.log.error(f'No {data_convention} fieldlist entry found for variable {var.name}') return None alt_standard_names = fieldlist_entry.get('alternate_standard_names') return TranslatedVarlistEntry( - name=var.name, + name=variable_id, standard_name=var.standard_name, units=var.units, convention=var.convention, From ed16872dd2820f45ff0df0f40dd1d7dcdbd05ce1 Mon Sep 17 00:00:00 2001 From: wrongkindofdoctor <20195932+wrongkindofdoctor@users.noreply.github.com> Date: Mon, 2 Dec 2024 14:28:26 -0500 Subject: [PATCH 7/9] revert debugging changes from test config file --- tests/github_actions_test_ubuntu_1a.jsonc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/github_actions_test_ubuntu_1a.jsonc b/tests/github_actions_test_ubuntu_1a.jsonc index ee3ca3660..acb962a90 100644 --- a/tests/github_actions_test_ubuntu_1a.jsonc +++ b/tests/github_actions_test_ubuntu_1a.jsonc @@ -53,7 +53,7 @@ "run_pp": true, // Set to true to perform data translation; default false: - "translate_data": false, + "translate_data": true, // Set to true to have PODs save postscript figures in addition to bitmaps. "save_ps": false, From 27f3ff2293a3e00f46cfbb23497956b77e5db153 Mon Sep 17 00:00:00 2001 From: wrongkindofdoctor <20195932+wrongkindofdoctor@users.noreply.github.com> Date: Mon, 2 Dec 2024 14:42:40 -0500 Subject: [PATCH 8/9] update docs for translate_data flag in the runtime config file --- doc/sphinx/start_config.rst | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/doc/sphinx/start_config.rst b/doc/sphinx/start_config.rst index 21b98a403..e402e96a1 100644 --- a/doc/sphinx/start_config.rst +++ b/doc/sphinx/start_config.rst @@ -147,7 +147,11 @@ Options for workflow control * **run_pp**: (boolean) Set to *true* to run the preprocessor; default *true* -* **translate_data**: (boolean) Set to *true* to perform data translation; default *true* +* **translate_data**: (boolean) Set to *true* to perform data translation. If *false*, the preprocessor query + automatically uses the convention for each case in the input dataset for the query, and skips translating the + variable names and attributes to the POD convention. Note that this means that the precipRateToFluxConversion is not + applied. This option is best if you know that the input dataset has variable attributes that exactly match the + the POD variable attributes; default *true* * **save_ps**: (boolean) Set to *true* to have PODs save postscript figures in addition to bitmaps; default *false* From b2b8c1c9ffc7736e70e776097e1cb5e79dc5442a Mon Sep 17 00:00:00 2001 From: wrongkindofdoctor <20195932+wrongkindofdoctor@users.noreply.github.com> Date: Mon, 2 Dec 2024 14:45:00 -0500 Subject: [PATCH 9/9] fix variable_id and var_id refs in dummy translate method --- src/translation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/translation.py b/src/translation.py index 09395ab56..9590763b0 100644 --- a/src/translation.py +++ b/src/translation.py @@ -488,7 +488,7 @@ def translate(self, var, data_convention: str): coords_copy = copy.deepcopy(var.dims) + copy.deepcopy(var.scalar_coords) fieldlist_obj = VariableTranslator().get_convention(data_convention) fieldlist_entry = dict() - variable_id = "" + var_id = "" for variable_id, variable_id_dict in fieldlist_obj.lut.items(): if variable_id_dict.get('standard_name', None) == var.standard_name \ or var.standard_name in variable_id_dict.get('alternate_standard_names'): @@ -502,7 +502,7 @@ def translate(self, var, data_convention: str): return None alt_standard_names = fieldlist_entry.get('alternate_standard_names') return TranslatedVarlistEntry( - name=variable_id, + name=var_id, standard_name=var.standard_name, units=var.units, convention=var.convention,