Skip to content

Commit 8dea269

Browse files
Allow masked data in coords (#6468)
* Allow masked data to be passed into a Cell, and by extensions a Coord. * Handled case where integer array with masked values could not be created. * Handle case where reading of scalar masked value from netCDF file looses the original dtype. * Removed some comments * Removed fill_value keyword to masked_array creation - wasn't fixing the issue with numpy warnings about NaNs. * Single method for handling points arrays with masked data * Refactored to add small inline helper function to add AuxCoord * Refactored the masked array handling and now filling masked arrays with default fill type. * Added test for Cell hashing * test_Cell.py: Converted unittest to pytest * Added tests for merging of scalar aux coords with mising data. * Removed unnecessary __main__ entry point from test_Cell.py * Missing hash test for bounded points * More rigorous test on masked data of merged AuxCoord * Migrated test_merge.py to pytest. * Moved tests/test_merge.py to tests/unit/merge/test_merge.py * What's new entry. * Fixed some typos in comment * Split out integration test from unit/merge/test_merge.py to integration/merge/test_merge.py
1 parent 9017fd2 commit 8dea269

File tree

8 files changed

+1326
-1202
lines changed

8 files changed

+1326
-1202
lines changed

docs/src/whatsnew/latest.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,9 @@ This document explains the changes made to Iris for this release
4444
so it also converts the values of the attributes ``"actual_range"``,
4545
``"valid_max"``, ``"valid_min"``, and ``"valid_range"``. (:pull:`6416`)
4646

47+
#. `@ukmo-ccbunney`_ fixed loading and merging of masked data in scalar ``AuxCoords``.
48+
(:issue:`3584`, :pull:`6468`)
49+
4750
💣 Incompatible Changes
4851
=======================
4952

lib/iris/_merge.py

Lines changed: 67 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1406,10 +1406,37 @@ def axis_and_name(name):
14061406
# TODO: Consider appropriate sort order (ascending,
14071407
# descending) i.e. use CF positive attribute.
14081408
cells = sorted(indexes[name])
1409-
points = np.array(
1410-
[cell.point for cell in cells],
1411-
dtype=metadata[name].points_dtype,
1412-
)
1409+
points = [cell.point for cell in cells]
1410+
1411+
# If any points are masked then create a masked array type,
1412+
# otherwise create a standard ndarray.
1413+
if np.ma.masked in points:
1414+
dtype = metadata[name].points_dtype
1415+
1416+
# Create a pre-filled array with all elements set to `fill_value` for dtype
1417+
# This avoids the following problems when trying to do `np.ma.masked_array(points, dtype...)`:
1418+
# - Underlying data of masked elements is arbitrary
1419+
# - Can't convert a np.ma.masked to an integer type
1420+
# - For floating point arrays, numpy raises a warning about "converting masked elements to NaN"
1421+
fill_value = np.trunc(
1422+
np.ma.default_fill_value(dtype), dtype=dtype
1423+
) # truncation needed to deal with silly default fill values in Numpy
1424+
1425+
# create array of fill values; ensures we have consistent data under mask
1426+
arr_points = np.ma.repeat(dtype.type(fill_value), len(points))
1427+
1428+
# get mask index and filtered data then store in new array:
1429+
mask = np.array([p is np.ma.masked for p in points])
1430+
arr_points.mask = mask
1431+
1432+
# Need another list comprehension to avoid numpy warning "converting masked elements to NaN":
1433+
arr_points[~mask] = np.array(
1434+
[p for p in points if p is not np.ma.masked]
1435+
)
1436+
points = arr_points
1437+
else:
1438+
points = np.array(points, dtype=metadata[name].points_dtype)
1439+
14131440
if cells[0].bound is not None:
14141441
bounds = np.array(
14151442
[cell.bound for cell in cells],
@@ -1594,13 +1621,19 @@ def _build_coordinates(self):
15941621
# the bounds are not monotonic, so try building the coordinate,
15951622
# and if it fails make the coordinate into an auxiliary coordinate.
15961623
# This will ultimately make an anonymous dimension.
1597-
try:
1598-
coord = iris.coords.DimCoord(
1599-
template.points, bounds=template.bounds, **template.kwargs
1600-
)
1601-
dim_coords_and_dims.append(_CoordAndDims(coord, template.dims))
1602-
except ValueError:
1624+
1625+
# If the points contain masked values, if definitely cannot be built
1626+
# as a dim coord, so add it to the _aux_templates immediately
1627+
if np.ma.is_masked(template.points):
16031628
self._aux_templates.append(template)
1629+
else:
1630+
try:
1631+
coord = iris.coords.DimCoord(
1632+
template.points, bounds=template.bounds, **template.kwargs
1633+
)
1634+
dim_coords_and_dims.append(_CoordAndDims(coord, template.dims))
1635+
except ValueError:
1636+
self._aux_templates.append(template)
16041637

16051638
# There is the potential that there are still anonymous dimensions.
16061639
# Get a list of the dimensions which are not anonymous at this stage.
@@ -1609,26 +1642,34 @@ def _build_coordinates(self):
16091642
]
16101643

16111644
# Build the auxiliary coordinates.
1645+
def _build_aux_coord_from_template(template):
1646+
# kwarg not applicable to AuxCoord.
1647+
template.kwargs.pop("circular", None)
1648+
coord = iris.coords.AuxCoord(
1649+
template.points, bounds=template.bounds, **template.kwargs
1650+
)
1651+
aux_coords_and_dims.append(_CoordAndDims(coord, template.dims))
1652+
16121653
for template in self._aux_templates:
16131654
# Attempt to build a DimCoord and add it to the cube. If this
16141655
# fails e.g it's non-monontic or multi-dimensional or non-numeric,
16151656
# then build an AuxCoord.
1616-
try:
1617-
coord = iris.coords.DimCoord(
1618-
template.points, bounds=template.bounds, **template.kwargs
1619-
)
1620-
if len(template.dims) == 1 and template.dims[0] not in covered_dims:
1621-
dim_coords_and_dims.append(_CoordAndDims(coord, template.dims))
1622-
covered_dims.append(template.dims[0])
1623-
else:
1624-
aux_coords_and_dims.append(_CoordAndDims(coord, template.dims))
1625-
except ValueError:
1626-
# kwarg not applicable to AuxCoord.
1627-
template.kwargs.pop("circular", None)
1628-
coord = iris.coords.AuxCoord(
1629-
template.points, bounds=template.bounds, **template.kwargs
1630-
)
1631-
aux_coords_and_dims.append(_CoordAndDims(coord, template.dims))
1657+
1658+
# Check here whether points are masked? If so then it has to be an AuxCoord
1659+
if np.ma.is_masked(template.points):
1660+
_build_aux_coord_from_template(template)
1661+
else:
1662+
try:
1663+
coord = iris.coords.DimCoord(
1664+
template.points, bounds=template.bounds, **template.kwargs
1665+
)
1666+
if len(template.dims) == 1 and template.dims[0] not in covered_dims:
1667+
dim_coords_and_dims.append(_CoordAndDims(coord, template.dims))
1668+
covered_dims.append(template.dims[0])
1669+
else:
1670+
aux_coords_and_dims.append(_CoordAndDims(coord, template.dims))
1671+
except ValueError:
1672+
_build_aux_coord_from_template(template)
16321673

16331674
# Mix in the vector coordinates.
16341675
for item, dims in zip(

lib/iris/coords.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1237,6 +1237,9 @@ class Cell(namedtuple("Cell", ["point", "bound"])):
12371237
# Make this class's comparison operators override those of numpy
12381238
__array_priority__ = 100
12391239

1240+
# pre-computed hash for un-hashable `np.ma.masked` value
1241+
_MASKED_VALUE_HASH = hash("<<##MASKED_VALUE##>>")
1242+
12401243
def __new__(cls, point=None, bound=None):
12411244
"""Construct a Cell from point or point-and-bound information."""
12421245
if point is None:
@@ -1277,13 +1280,17 @@ def __add__(self, mod):
12771280

12781281
def __hash__(self):
12791282
# See __eq__ for the definition of when two cells are equal.
1283+
point = self.point
1284+
if np.ma.is_masked(point):
1285+
# `np.ma.masked` is unhashable
1286+
point = Cell._MASKED_VALUE_HASH
12801287
if self.bound is None:
1281-
return hash(self.point)
1288+
return hash(point)
12821289
bound = self.bound
12831290
rbound = bound[::-1]
12841291
if rbound < bound:
12851292
bound = rbound
1286-
return hash((self.point, bound))
1293+
return hash((point, bound))
12871294

12881295
def __eq__(self, other):
12891296
"""Compare Cell equality depending on the type of the object to be compared."""
@@ -2086,7 +2093,8 @@ def cell(self, index):
20862093
"""
20872094
index = iris.util._build_full_slice_given_keys(index, self.ndim)
20882095

2089-
point = tuple(np.array(self.core_points()[index], ndmin=1).flatten())
2096+
# Use `np.asanyaray` to preserve any masked values:
2097+
point = tuple(np.asanyarray(self.core_points()[index]).flatten())
20902098
if len(point) != 1:
20912099
raise IndexError(
20922100
"The index %s did not uniquely identify a single "
@@ -2809,6 +2817,9 @@ def _values(self, points):
28092817
# Check validity requirements for dimension-coordinate points.
28102818
self._new_points_requirements(points)
28112819
# Cast to a numpy array for masked arrays with no mask.
2820+
2821+
# NOTE: This is the point where any mask is lost on a coordinate if none of the
2822+
# values are actually masked. What if we wanted this to be an AuxCoord with a mask?
28122823
points = np.array(points)
28132824

28142825
super(DimCoord, self.__class__)._values.fset(self, points)

lib/iris/fileformats/netcdf/loader.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,14 @@ def _get_cf_var_data(cf_var, filename):
253253
if total_bytes < _LAZYVAR_MIN_BYTES:
254254
# Don't make a lazy array, as it will cost more memory AND more time to access.
255255
result = cf_var[:]
256+
257+
# Special handling of masked scalar value; this will be returned as
258+
# an `np.ma.masked` instance which will lose the original dtype.
259+
# Workaround for this it return a 1-element masked array of the
260+
# correct dtype. Note: this is not an issue for masked arrays,
261+
# only masked scalar values.
262+
if result is np.ma.masked:
263+
result = np.ma.masked_all(1, dtype=cf_var.datatype)
256264
else:
257265
# Get lazy chunked data out of a cf variable.
258266
# Creates Dask wrappers around data arrays for any cube components which

0 commit comments

Comments
 (0)