Skip to content

Commit

Permalink
Speed up climatology checks
Browse files Browse the repository at this point in the history
  • Loading branch information
kwilcox committed Nov 5, 2019
1 parent 5a9d9a4 commit 3d8efba
Show file tree
Hide file tree
Showing 2 changed files with 124 additions and 18 deletions.
73 changes: 56 additions & 17 deletions ioos_qc/qartod.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,61 @@ def add(self,
)
)

def check(self, tinp, inp, zinp):

# Start with everything as UNKNOWN (1)
flag_arr = np.ma.empty(inp.size, dtype='uint8')
flag_arr.fill(QartodFlags.UNKNOWN)

# If the value is masked set the flag to MISSING
flag_arr[inp.mask] = QartodFlags.MISSING

# Iterate over each member and apply its spans on the input data.
# Member spans are applied in order and any data points that fall into
# more than one member are flagged by each one.
for m in self._members:

if m.period is not None:
# If a period is defined, extract the attribute from the
# pd.DatetimeIndex object before comparison. The min and max
# values are in this period unit already.
tinp_copy = getattr(tinp, m.period).to_series()
else:
# If a period isn't defined, make a new Timestamp object
# to align with the above name 'tinp_copy'
tinp_copy = tinp

# If a zspan is defined but we don't have z input (zinp), skip this member
# Note: `zinp.any()` can return `np.ma.masked` so we also check using isnan
if not isnan(m.zspan) and (not zinp.any() or isnan(zinp.any())):
continue

# Indexes that align with the T
t_idx = (tinp_copy > m.tspan.minv) & (tinp_copy <= m.tspan.maxv)

# Indexes that align with the Z
if not isnan(m.zspan):
# Only test non-masked values between the min and max
z_idx = (~zinp.mask) & (zinp > m.zspan.minv) & (zinp <= m.zspan.maxv)
else:
# Only test the values with masked Z, ie values with no Z
z_idx = zinp.mask

# Combine the T and Z indexes
values_idx = (t_idx & z_idx)

# Suspect data for this value span. Combined with the values_idx it
# represents the subset ofdata that should be suspect for this member.
# We split it into two indexes so we can also set all values outside of the
# suspect range to GOOD by taking the inverse of the suspect_idx
suspect_idx = (inp < m.vspan.minv) | (inp > m.vspan.maxv)

with np.errstate(invalid='ignore'):
flag_arr[(values_idx & suspect_idx)] = QartodFlags.SUSPECT
flag_arr[(values_idx & ~suspect_idx)] = QartodFlags.GOOD

return flag_arr

@staticmethod
def convert(config):
# Create a ClimatologyConfig object if one was not passed in
Expand Down Expand Up @@ -364,23 +419,7 @@ def climatology_test(config : Union[ClimatologyConfig, Sequence[Dict[str, Tuple]
inp = inp.flatten()
zinp = zinp.flatten()

# Start with everything as passing (1)
flag_arr = np.ma.ones(inp.size, dtype='uint8')

# If the value is masked set the flag to MISSING
flag_arr[inp.mask] = QartodFlags.MISSING

for i, (tind, ind, zind) in enumerate(zip(tinp, inp, zinp)):
minv, maxv = config.values(tind, zind)
if minv is None or maxv is None:
# Data point is outside the time/depth
flag_arr[i] = QartodFlags.UNKNOWN
else:
# Flag suspect outside of climatology span
with np.errstate(invalid='ignore'):
if ind < minv or ind > maxv:
flag_arr[i] = QartodFlags.SUSPECT

flag_arr = config.check(tinp, inp, zinp)
return flag_arr.reshape(original_shape)


Expand Down
69 changes: 68 additions & 1 deletion tests/test_qartod.py
Original file line number Diff line number Diff line change
Expand Up @@ -444,6 +444,73 @@ def test_weekofyear_periods(self):
self._run_test(cc)


class QartodClimatologyDepthTest(unittest.TestCase):

def setUp(self):
self.cc = qartod.ClimatologyConfig()
# with depths
self.cc.add(
tspan=(np.datetime64('2012-01'), np.datetime64('2013-01')),
vspan=(50, 60),
zspan=(0, 10)
)
# same as above, but different depths
self.cc.add(
tspan=(np.datetime64('2012-01'), np.datetime64('2013-01')),
vspan=(70, 80),
zspan=(10, 100)
)

def _run_test(self, test_inputs, expected_result):
times, values, depths = zip(*test_inputs)
inputs = [
values,
np.asarray(values, dtype=np.floating),
dask_arr(np.asarray(values, dtype=np.floating))
]

for i in inputs:
results = qartod.climatology_test(
config=self.cc,
tinp=times,
inp=i,
zinp=depths
)
npt.assert_array_equal(
results,
np.ma.array(expected_result)
)

def test_climatology_test_all_unknown(self):
# Our configs only define depths, so this is never run if no
# depths are passed in for any of the values
test_inputs = [
(
np.datetime64('2011-01-02'),
9,
None
),
(
np.datetime64('2011-01-02'),
11,
None
),
(
np.datetime64('2011-01-02'),
21,
None
),
# not run, outside given time ranges
(
np.datetime64('2015-01-02'),
21,
None
),
]
expected_result = [2, 2, 2, 2]
self._run_test(test_inputs, expected_result)


class QartodClimatologyTest(unittest.TestCase):

def setUp(self):
Expand Down Expand Up @@ -507,7 +574,7 @@ def test_climatology_test(self):
def test_climatology_test_seconds_since_epoch(self):
test_inputs = [
(
1293926400,
1293926400, # Sunday, January 2, 2011 12:00:00 AM UTC
11,
None
)
Expand Down

0 comments on commit 3d8efba

Please sign in to comment.