feat: split detect_nonwear: find_nonwear_segments and flag_nonwear

- Extract main nonwear finding logic to new function `detect_nonwear_segments` and rename existing function `detect_nonwear` to `flag_nonwear`. - Both `detect_nonwear_segments` and `flag_nonwear` are made available via the processing module.
OxWearables · Oct 23, 2024 · e0fdd60 · e0fdd60
1 parent 791a08f
commit e0fdd60
Show file tree

Hide file tree

Showing 4 changed files with 57 additions and 35 deletions.
diff --git a/memory_profiler.py b/memory_profiler.py
@@ -53,7 +53,7 @@ def main():
     if args.detect_nonwear:
         timer.start('Detecting nonwear...')
         with memray.Tracker(f'{memray_dir}/detect_nonwear.bin'):
-            data, info_nonwear = P.detect_nonwear(data, patience='1m')
+            data, info_nonwear = P.flag_nonwear(data, patience='1m')
         timer.stop()
         info.update(info_nonwear)
 

diff --git a/src/actipy/processing.py b/src/actipy/processing.py
@@ -7,7 +7,7 @@
 import warnings
 
 
-__all__ = ['lowpass', 'calibrate_gravity', 'detect_nonwear', 'resample', 'get_stationary_indicator']
+__all__ = ['lowpass', 'calibrate_gravity', 'flag_nonwear', 'resample', 'get_stationary_indicator', 'find_nonwear_segments']
 
 
 def resample(data, sample_rate, dropna=False, chunksize=1_000_000):
@@ -165,9 +165,9 @@ def lowpass(data, data_sample_rate, cutoff_rate=20, chunksize=1_000_000):
     return data, info
 
 
-def detect_nonwear(data, patience='90m', window='10s', stdtol=15 / 1000):
+def flag_nonwear(data, patience='90m', window='10s', stdtol=15 / 1000):
     """
-    Detect nonwear episodes based on long periods of no movement.
+    Flag nonwear episodes in the data by setting them to NA. Non-wear episodes are inferred from long periods of no movement.
 
     :param pandas.DataFrame data: A pandas.DataFrame of acceleration time-series. The index must be a DateTimeIndex.
     :type data: pandas.DataFrame.
@@ -183,34 +183,10 @@ def detect_nonwear(data, patience='90m', window='10s', stdtol=15 / 1000):
 
     info = {}
 
-    stationary_indicator = (  # this is more memory friendly than data[['x', 'y', 'z']].std()
-        data['x'].resample(window, origin='start').std().lt(stdtol)
-        & data['y'].resample(window, origin='start').std().lt(stdtol)
-        & data['z'].resample(window, origin='start').std().lt(stdtol)
-    )
+    nonwear_segments = find_nonwear_segments(data, patience=patience, window=window, stdtol=stdtol)
 
-    segment_edges = (stationary_indicator != stationary_indicator.shift(1))
-    segment_edges.iloc[0] = True  # first edge is always True
-    segment_ids = segment_edges.cumsum()
-    stationary_segment_ids = segment_ids[stationary_indicator]
-    stationary_segment_lengths = (
-        stationary_segment_ids
-        .groupby(stationary_segment_ids)
-        .agg(
-            start_time=lambda x: x.index[0],
-            length=lambda x: x.index[-1] - x.index[0]
-        )
-        .set_index('start_time')
-        .squeeze(axis=1)
-        # dtype defaults to int64 when series is empty, so
-        # astype('timedelta64[ns]') makes sure it's always a timedelta,
-        # otherwise comparison with Timedelta(patience) below will fail
-        .astype('timedelta64[ns]')
-    )
-    nonwear_segment_lengths = stationary_segment_lengths[stationary_segment_lengths > pd.Timedelta(patience)]
-
-    count_nonwear = len(nonwear_segment_lengths)
-    total_nonwear = nonwear_segment_lengths.sum().total_seconds()
+    count_nonwear = len(nonwear_segments)
+    total_nonwear = nonwear_segments.sum().total_seconds()
     total_wear = (
         data.index.to_series().diff()
         .pipe(lambda x: x[x < pd.Timedelta('1s')].sum())
@@ -223,7 +199,7 @@ def detect_nonwear(data, patience='90m', window='10s', stdtol=15 / 1000):
 
     # Flag nonwear segments
     data = data.copy(deep=True)  # copy to avoid modifying original data
-    for start_time, length in nonwear_segment_lengths.items():
+    for start_time, length in nonwear_segments.items():
         data.loc[start_time:start_time + length] = np.nan
 
     return data, info
@@ -485,6 +461,52 @@ def fn(data):
     return stationary_indicator
 
 
+def find_nonwear_segments(data, patience='90m', window='10s', stdtol=15 / 1000):
+    """
+    Find nonwear episodes based on long periods of no movement.
+
+    :param pandas.DataFrame data: A pandas.DataFrame of acceleration time-series. The index must be a DateTimeIndex.
+    :type data: pandas.DataFrame.
+    :param patience: Minimum length of the stationary period to be flagged as non-wear. Defaults to 90 minutes ("90m").
+    :type patience: str, optional
+    :param window: Rolling window to use to check for stationary periods. Defaults to 10 seconds ("10s").
+    :type window: str, optional
+    :param stdtol: Standard deviation under which the window is considered stationary. Defaults to 15 milligravity (0.015).
+    :type stdtol: float, optional
+    :return: A Series where the DatetimeIndex indicates the start times of each non-wear segment and the values are the length
+        of each segment, in timedelta64[ns].
+    :rtype: pandas.Series
+    """
+
+    stationary_indicator = (  # this is more memory friendly than data[['x', 'y', 'z']].std()
+        data['x'].resample(window, origin='start').std().lt(stdtol)
+        & data['y'].resample(window, origin='start').std().lt(stdtol)
+        & data['z'].resample(window, origin='start').std().lt(stdtol)
+    )
+
+    segment_edges = (stationary_indicator != stationary_indicator.shift(1))
+    segment_edges.iloc[0] = True  # first edge is always True
+    segment_ids = segment_edges.cumsum()
+    stationary_segment_ids = segment_ids[stationary_indicator]
+    stationary_segment_lengths = (
+        stationary_segment_ids
+        .groupby(stationary_segment_ids)
+        .agg(
+            start_time=lambda x: x.index[0],
+            length=lambda x: x.index[-1] - x.index[0]
+        )
+        .set_index('start_time')
+        .squeeze(axis=1)
+        # dtype defaults to int64 when series is empty, so
+        # astype('timedelta64[ns]') makes sure it's always a timedelta,
+        # otherwise comparison with Timedelta(patience) below will fail
+        .astype('timedelta64[ns]')
+    )
+    nonwear_segment_lengths = stationary_segment_lengths[stationary_segment_lengths > pd.Timedelta(patience)]
+
+    return nonwear_segment_lengths
+
+
 def get_wear_time(t, tol=0.1):
     """ Return wear time in seconds and number of interrupts. """
     tdiff = t.diff()

diff --git a/src/actipy/reader.py b/src/actipy/reader.py
@@ -110,7 +110,7 @@ def read_device(input_file,
 
     if detect_nonwear:
         timer.start("Nonwear detection...")
-        data, info_nonwear = P.detect_nonwear(data)
+        data, info_nonwear = P.flag_nonwear(data)
         info.update(info_nonwear)
         timer.stop()
 
@@ -176,7 +176,7 @@ def process(data, sample_rate,
 
     if detect_nonwear:
         timer.start("Nonwear detection...")
-        data, info_nonwear = P.detect_nonwear(data)
+        data, info_nonwear = P.flag_nonwear(data)
         info.update(info_nonwear)
         timer.stop()
 

diff --git a/tests/test_all.py b/tests/test_all.py
@@ -101,7 +101,7 @@ def test_detect_nonwear():
 
     data, info = read_device()
     # Use a bad patience to force nonwear detection
-    data, info_nonwear = P.detect_nonwear(data, patience='1m')
+    data, info_nonwear = P.flag_nonwear(data, patience='1m')
 
     info_nonwear_ref = {
         'WearTime(days)': 0.1203330787037037,