Skip to content

Commit

Permalink
IMPRO-511 implement metadata cube saving (#538)
Browse files Browse the repository at this point in the history
* Initial incorporation of metadata cube saving based on IMPRO-466/464. Some of these changes will have to be removed to avoid updating the form of difference attribute from improver-gradient

* Revert changes to spatial.py and test_DifferenceBetweenAdjacentGridSquares.py pending Impro-507 / updating form_of_difference attribute. Incremented length of cubes list in test_save due to Iris 2 now saving metadata as separate cube rather than attributes on other cubes in file

* Remove some print statements only used during testing

* Remove superfluous comment

* Correct comment typo and remove spp__form_of_difference attribute from test_save.py as per reviewers comments

* Add unit test to check that metatdata prefixes cube is discarded during load

* Add unit tests for append_metadata_cube functionality

* inital fix to keep global attributes in netCDF file, get required global attributes from the data cubes and add them to the prefix cube

* change iteritems() to items() for Python 3 compatability

* Use a list of global keys as suggested by cgsandford

* Pass in global_keys as an argument to append_metadata_cube so as to avoid redefining this list

* Changes as per cgsandford suggestions, tests confirmed key value cannot be left as empty string

* amend comments

* add unit test to check global attributes added to prefix cube, revert change made to 02_basic_mean.bats used for capturing output

* only use one cube in input list, compare global keys on input cube to prefix cube than rather than keys from global_keys_ref

* shorten line 223 length, travis build complaining about line length of 80 characters

* add check that key values are also the same in prefix and data cube

* Removed redundant conversion of cubelist to be a variable of type list.
  • Loading branch information
markysparks authored and gavinevans committed May 22, 2018
1 parent fc48215 commit 93e01f0
Show file tree
Hide file tree
Showing 4 changed files with 168 additions and 11 deletions.
12 changes: 12 additions & 0 deletions lib/improver/tests/utilities/test_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
import iris
from iris.coords import DimCoord
from iris.tests import IrisTest
from iris.exceptions import ConstraintMismatchError
import numpy as np

from improver.utilities.load import load_cube, load_cubelist
Expand Down Expand Up @@ -225,6 +226,17 @@ def test_ordering_for_realization_threshold_percentile_over_coordinate(
self.assertArrayAlmostEqual(result.coord_dims("latitude")[0], 4)
self.assertArrayAlmostEqual(result.coord_dims("longitude")[0], 5)

def test_attributes(self):
"""Test that metadata attributes are successfully stripped out."""
result = load_cube(self.filepath)
self.assertNotIn('bald__isPrefixedBy', result.attributes.keys())

def test_prefix_cube_removed(self):
"""Test metadata prefix cube is discarded during load"""
msg = "no cubes found"
with self.assertRaisesRegexp(ConstraintMismatchError, msg):
load_cube(self.filepath, 'prefixes')

def test_no_lazy_load(self):
"""Test that the cube returned upon loading does not contain
lazy data."""
Expand Down
97 changes: 91 additions & 6 deletions lib/improver/tests/utilities/test_save.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,27 +42,36 @@

from improver.utilities.load import load_cube
from improver.utilities.save import save_netcdf
from improver.tests.ensemble_calibration.ensemble_calibration.\
from improver.utilities.save import append_metadata_cube
from improver.tests.ensemble_calibration.ensemble_calibration. \
helper_functions import set_up_cube


def set_up_test_cube():
""" Set up a temperature cube with additional global attributes. """
data = (np.linspace(-45.0, 45.0, 9).reshape(1, 1, 3, 3) + 273.15)
cube = set_up_cube(data, 'air_temperature', 'K', realizations=([0]))
cube.attributes['Conventions'] = 'CF-1.5'
cube.attributes['source_realizations'] = np.arange(12)
# Desired attributes that will be global in netCDF file
cube.attributes['title'] = 'Operational MOGREPS-UK Forecast Model'
cube.attributes['um_version'] = '10.4'
cube.attributes['grid_id'] = 'enukx_standard_v1'
cube.attributes['source'] = 'Met Office Unified Model'
cube.attributes['Conventions'] = 'CF-1.5'
cube.attributes['institution'] = 'Met Office'
cube.attributes['history'] = ''

return cube


class Test_save_netcdf(IrisTest):

""" Test function to save iris cubes as NetCDF files. """

def setUp(self):
""" Set up cube to write, read and check """
self.global_keys_ref = ['title', 'um_version', 'grid_id', 'source',
'Conventions', 'institution', 'history']
'Conventions', 'institution', 'history',
'bald__isPrefixedBy']
self.directory = mkdtemp()
self.filepath = os.path.join(self.directory, "temp.nc")
self.cube = set_up_test_cube()
Expand Down Expand Up @@ -92,7 +101,10 @@ def test_basic_cube_list(self):
save_netcdf(cube_list, self.filepath)
read_cubes = iris.load(self.filepath)
self.assertIsInstance(read_cubes, iris.cube.CubeList)
self.assertEqual(len(read_cubes), 2)
# Length of read_cubes now increased to 3 as Iris 2 saves metadata
# as separate cube rather than as attributes on other other cubes in
# the file (Iris 1.13)
self.assertEqual(len(read_cubes), 3)

def test_cube_data(self):
""" Test valid cube can be read from saved file """
Expand Down Expand Up @@ -144,10 +156,83 @@ def test_cf_shared_attributes_list(self):
cube_list = ([self.cube, self.cube])
save_netcdf(cube_list, self.filepath)
global_keys = Dataset(self.filepath, mode='r').ncattrs()
self.assertEqual(len(global_keys), 1)
self.assertEqual(len(global_keys), 8)
self.assertTrue(all(key in self.global_keys_ref
for key in global_keys))


class Test_append_metadata_cube(IrisTest):
"""Test that appropriate metadata cube and attributes have been appended
to the cubes in the cube list"""

def setUp(self):
""" Set up cube to write, read and check """
self.global_keys_ref = ['title', 'um_version', 'grid_id', 'source',
'Conventions', 'institution', 'history',
'bald__isPrefixedBy']
self.directory = mkdtemp()
self.filepath = os.path.join(self.directory, "temp.nc")
self.cube = set_up_test_cube()

def tearDown(self):
""" Remove temporary directories created for testing. """
call(['rm', '-f', self.filepath])
call(['rmdir', self.directory])

def test_bald_attribute_added(self):
"""Test that the bald__isPrefixedBy attribute is added to each cube
and points to prefix_list"""
cube_list = ([self.cube, self.cube])
metadata_cubelist = append_metadata_cube(
cube_list, self.global_keys_ref)
for cube in metadata_cubelist:
self.assertTrue(
cube.attributes['bald__isPrefixedBy']
is 'prefix_list')

def test_prefix_cube_attributes(self):
"""Test that metadata prefix cube contains the correct attributes"""
prefix_dict = {
'spp__': 'http://reference.metoffice.gov.uk/statistical-process'
'/properties/',
'bald__isPrefixedBy': 'prefix_list',
'bald__': 'http://binary-array-ld.net/latest/',
'spv__': 'http://reference.metoffice.gov.uk/statistical-process'
'/values/',
'rdf__': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
'spd__': 'http://reference.metoffice.gov.uk/statistical-process'
'/def/'}
metadata_cubelist = append_metadata_cube([], self.global_keys_ref)
self.assertDictEqual(metadata_cubelist[0].attributes, prefix_dict)

def test_global_attributes_present(self):
"""Test that desired global attributes are added to the prefix cube
so that Iris2 keeps these attributes global in any resultant
netCDF file saved using these cubes"""

cube_list = ([self.cube])
metadata_cubelist = append_metadata_cube(
cube_list, self.global_keys_ref)

keys_in_prefix_cube = metadata_cubelist[1].attributes

# Get the global keys from both prefix and data cubes
prefix_global_keys = [
k for k in keys_in_prefix_cube.keys()
if k in self.global_keys_ref]
data_cube_global_keys = [
k for k in self.cube.attributes.keys()
if k in self.global_keys_ref]

# Check the keys are the same for prefix and data cube
self.assertListEqual(
sorted(prefix_global_keys), sorted(data_cube_global_keys))

# Check the key values are the same for prefix and data cube.
for key in prefix_global_keys:
self.assertEqual(metadata_cubelist[-1].attributes[key],
self.cube.attributes[key])


if __name__ == '__main__':
unittest.main()
16 changes: 12 additions & 4 deletions lib/improver/utilities/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,15 @@ def load_cube(filepath, constraints=None, no_lazy_load=False):
Cube that has been loaded from the input filepath given the
constraints provided.
"""
# Remove metadata prefix cube if present
constraints = iris.Constraint(
cube_func=lambda cube: cube.long_name != 'prefixes') & constraints
cube = iris.load_cube(filepath, constraint=constraints)

# Remove metadata prefix cube attributes
if 'bald__isPrefixedBy' in cube.attributes.keys():
cube.attributes.pop('bald__isPrefixedBy')

# Ensure the probabilistic coordinates are the first coordinates within a
# cube and are in the specified order.
cube = enforce_coordinate_ordering(
Expand All @@ -75,12 +83,12 @@ def load_cube(filepath, constraints=None, no_lazy_load=False):


def load_cubelist(filepath, constraints=None, no_lazy_load=False):
"""Load the filepath(s) provided using Iris into a cubelist. Loads
exactly one data cube per file.
"""Load one cube from each of the filepath(s) provided using Iris into
a cubelist.
Args:
filepath (str or list):
Filepath(s) that will be loaded.
Filepath(s) that will be loaded, each containing a single cube.
constraints (iris.Constraint, str or None):
Constraint to be applied when loading from the input filepath.
This can be in the form of an iris.Constraint or could be a string
Expand All @@ -103,7 +111,7 @@ def load_cubelist(filepath, constraints=None, no_lazy_load=False):
else:
filepaths = filepath

# Constuct a cubelist using the load_cube function.
# Construct a cubelist using the load_cube function.
cubelist = iris.cube.CubeList([])
for filepath in filepaths:
try:
Expand Down
54 changes: 53 additions & 1 deletion lib/improver/utilities/save.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,56 @@
import iris


def append_metadata_cube(cubelist, global_keys):
""" Create a metadata cube associated with statistical
post-processing attributes of the input cube list.
Args:
cubelist (iris.cube.CubeList):
List of cubes to be saved
global_keys (list):
List of attributes to be treated as global across cubes and within
any netCDF files produced using these cubes.
Returns:
iris.cube.Cubelist with appended metadata cube
"""
keys_for_global_attr = {}

# Collect keys from each cubes attributes that match with global_keys
for cube in cubelist:
keys = cube.attributes
keys_for_global_attr = {k for k in keys.keys() if k in global_keys}

# Set up a basic prefix cube
prefix_cube = iris.cube.Cube(0, long_name='prefixes',
var_name='prefix_list')

# Attributes have to appear on all cubes in a cubelist for Iris 2 to save
# these attributes as global in a resulting netCDF file, so add all of the
# global attributes to the prefix cube (otherwise they will be made
# variables in the netCDF file).
for key in keys_for_global_attr:
prefix_cube.attributes[key] = cube.attributes[key]

# Add metadata prefix attributes to the prefix cube
prefix_cube.attributes['spp__'] = \
'http://reference.metoffice.gov.uk/statistical-process/properties/'
prefix_cube.attributes['spv__'] = \
'http://reference.metoffice.gov.uk/statistical-process/values/'
prefix_cube.attributes['spd__'] = \
'http://reference.metoffice.gov.uk/statistical-process/def/'
prefix_cube.attributes['rdf__'] = \
'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
prefix_cube.attributes['bald__'] = 'http://binary-array-ld.net/latest/'

cubelist.append(prefix_cube)
# bald__isPrefixedBy should be an attribute on all the cubes
for cube in cubelist:
cube.attributes['bald__isPrefixedBy'] = 'prefix_list'

return cubelist


def save_netcdf(cubelist, filename):
"""Save the input Cube or CubeList as a NetCDF file.
Expand All @@ -50,9 +100,11 @@ def save_netcdf(cubelist, filename):
cubelist = [cubelist]

global_keys = ['title', 'um_version', 'grid_id', 'source', 'Conventions',
'institution', 'history']
'institution', 'history', 'bald__isPrefixedBy']
local_keys = {key for cube in cubelist
for key in cube.attributes.keys()
if key not in global_keys}

cubelist = append_metadata_cube(cubelist, global_keys)

iris.fileformats.netcdf.save(cubelist, filename, local_keys=local_keys)

0 comments on commit 93e01f0

Please sign in to comment.