OxWearables · chanshing · Nov 6, 2024 · Nov 5, 2024 · Nov 6, 2024 · Nov 6, 2024
diff --git a/src/stepcount/stepcount.py b/src/stepcount/stepcount.py
@@ -1,5 +1,6 @@
 import warnings
 import os
+import sys
 import pathlib
 import urllib
 import shutil
@@ -82,28 +83,38 @@ def main():
     data, info_read = utils.read(
         args.filepath, 
         usecols=args.txyz, 
-        resample_hz=30 if args.model_type == 'ssl' else None,
+        start_time=args.start,
+        end_time=args.end,
         sample_rate=args.sample_rate, 
+        resample_hz=30 if args.model_type == 'ssl' else None,
         verbose=verbose
     )
     info.update(info_read)
 
-    # Set start/end times, if given
-    if args.start is not None:
-        data = data.loc[args.start:]
-    if args.end is not None:
-        data = data.loc[:args.end]
-
     # Exclusion: first/last days
     if args.exclude_first_last is not None:
-        data = utils.exclude_first_last_days(data, args.exclude_first_last)
+        data = utils.drop_first_last_days(data, args.exclude_first_last)
 
     # Exclusion: days with wear time below threshold
     if args.exclude_wear_below is not None:
-        data = utils.exclude_wear_below_days(data, args.exclude_wear_below)
-
-    # Summarize wear time
-    info.update(summarize_wear_time(data))
+        data = utils.flag_wear_below_days(data, args.exclude_wear_below)
+
+    # Update wear time stats after exclusions
+    info.update(utils.calculate_wear_stats(data))
+
+    # If no data, save Info.json and exit
+    if len(data) == 0 or data.isna().any(axis=1).all():  # TODO: check na only on x,y,z cols?
+        # Save Info.json
+        with open(f"{outdir}/{basename}-Info.json", 'w') as f:
+            json.dump(info, f, indent=4, cls=utils.NpEncoder)
+        # Print
+        print("\nSummary\n-------")
+        print(json.dumps(
+            {k: v for k, v in info.items() if not re.search(r'_Weekend|_Weekday|_Hour\d{2}', k)},
+            indent=4, cls=utils.NpEncoder
+        ))
+        print("No data to process. Exiting early...")
+        sys.exit(0)
 
     # Run model
     if verbose:
@@ -428,48 +439,6 @@ def load_model(
     return joblib.load(pth)
 
 
-def summarize_wear_time(
-    data: pd.DataFrame,
-):
-    """
-    Summarize wear time information from raw accelerometer data.
-
-    Parameters:
-    - data (pd.DataFrame): A pandas DataFrame of raw accelerometer data with columns 'x', 'y', 'z'.
-
-    Returns:
-    - dict: A dictionary containing various wear time statistics.
-
-    Example:
-        summary = summarize_wear_time(data)
-    """
-
-    dt = utils.infer_freq(data.index).total_seconds()
-    na = data.isna().any(axis=1)
-
-    if len(data) == 0 or na.all():
-        wear_start = None
-        wear_end = None
-        nonwear_time = len(data) * dt
-        wear_time = 0.0
-        covers24hok = 0
-    else:
-        wear_start = data.first_valid_index().strftime("%Y-%m-%d %H:%M:%S")
-        wear_end = data.last_valid_index().strftime("%Y-%m-%d %H:%M:%S")
-        nonwear_time = na.sum() * dt / (60 * 60 * 24)
-        wear_time = len(data) * dt / (60 * 60 * 24) - nonwear_time 
-        coverage = (~na).groupby(na.index.hour).mean()
-        covers24hok = int(len(coverage) == 24 and coverage.min() >= 0.01)
-
-    return {
-        'WearStartTime': wear_start,
-        'WearEndTime': wear_end,
-        'WearTime(days)': wear_time,
-        'NonwearTime(days)': nonwear_time,
-        'Covers24hOK': covers24hok
-    }
-
-
 def summarize_enmo(
     data: pd.DataFrame,
     adjust_estimates: bool = False

diff --git a/src/stepcount/utils.py b/src/stepcount/utils.py
@@ -12,8 +12,10 @@
 def read(
     filepath: str,
     usecols: str = 'time,x,y,z',
-    resample_hz: str = 'uniform',
+    start_time: str = None,
+    end_time: str = None,
     sample_rate: float = None,
+    resample_hz: str = 'uniform',
     verbose: bool = True
 ):
     """
@@ -91,8 +93,6 @@ def read(
             "Device": ftype,
             "Filesize(MB)": fsize,
             "SampleRate": sample_rate,
-            "StartTime": data.index[0].strftime('%Y-%m-%d %H:%M:%S'),
-            "EndTime": data.index[-1].strftime('%Y-%m-%d %H:%M:%S')
         })
 
     elif ftype in (".cwa", ".gt3x", ".bin"):
@@ -112,10 +112,73 @@ def read(
     if 'ResampleRate' not in info:
         info['ResampleRate'] = info['SampleRate']
 
+    # Trim the data if start/end times are specified
+    if start_time is not None:
+        data = data.loc[start_time:]
+    if end_time is not None:
+        data = data.loc[:end_time]
+
+    # Update wear stats
+    info.update(calculate_wear_stats(data))
+
     return data, info
 
 
-def exclude_wear_below_days(
+def calculate_wear_stats(data: pd.DataFrame):
+    """
+    Calculate wear time and related information from raw accelerometer data.
+
+    Parameters:
+    - data (pd.DataFrame): A pandas DataFrame of raw accelerometer data with columns 'x', 'y', 'z' and a DatetimeIndex.
+
+    Returns:
+    - dict: A dictionary containing various wear time stats.
+
+    Example:
+        info = calculate_wear_stats(data)
+    """
+
+    TIME_FORMAT = "%Y-%m-%d %H:%M:%S"
+
+    n_data = len(data)
+
+    if n_data == 0:
+        start_time = None
+        end_time = None
+        wear_start_time = None
+        wear_end_time = None
+        nonwear_duration = 0.0
+        wear_duration = 0.0
+        covers24hok = 0
+
+    else:
+        na = data.isna().any(axis=1)  # TODO: check na only on x,y,z cols?
+        dt = infer_freq(data.index).total_seconds()
+        start_time = data.index[0].strftime(TIME_FORMAT)
+        end_time = data.index[-1].strftime(TIME_FORMAT)
+        wear_start_time = data.first_valid_index()
+        if wear_start_time is not None:
+            wear_start_time = wear_start_time.strftime(TIME_FORMAT)
+        wear_end_time = data.last_valid_index()
+        if wear_end_time is not None:
+            wear_end_time = wear_end_time.strftime(TIME_FORMAT)
+        nonwear_duration = na.sum() * dt / (60 * 60 * 24)
+        wear_duration = n_data * dt / (60 * 60 * 24) - nonwear_duration 
+        coverage = (~na).groupby(na.index.hour).mean()
+        covers24hok = int(len(coverage) == 24 and coverage.min() >= 0.01)
+
+    return {
+        'StartTime': start_time,
+        'EndTime': end_time,
+        'WearStartTime': wear_start_time,
+        'WearEndTime': wear_end_time,
+        'WearTime(days)': wear_duration,
+        'NonwearTime(days)': nonwear_duration,
+        'Covers24hOK': covers24hok
+    }
+
+
+def flag_wear_below_days(
     x: Union[pd.Series, pd.DataFrame],
     min_wear: str = '12H'
 ):
@@ -154,34 +217,34 @@ def exclude_wear_below_days(
     return x
 
 
-def exclude_first_last_days(
+def drop_first_last_days(
     x: Union[pd.Series, pd.DataFrame],
     first_or_last='both'
 ):
     """
-    Set the values of the first day, last day, or both to NaN in a time series.
+    Drop the first day, last day, or both from a time series.
 
     Parameters:
     - x (pd.Series or pd.DataFrame): A pandas Series or DataFrame with a DatetimeIndex representing time series data.
-    - first_or_last (str, optional): A string indicating which days to exclude. Options are 'first', 'last', or 'both'. Default is 'both'.
+    - first_or_last (str, optional): A string indicating which days to drop. Options are 'first', 'last', or 'both'. Default is 'both'.
 
     Returns:
-    - pd.Series or pd.DataFrame: A pandas Series or DataFrame with the values of the specified days set to NaN.
+    - pd.Series or pd.DataFrame: A pandas Series or DataFrame with the values of the specified days dropped.
 
     Example:
-        # Exclude the first day from the series
-        series = exclude_first_last_days(series, first_or_last='first')
+        # Drop the first day from the series
+        series = drop_first_last_days(series, first_or_last='first')
     """
     if len(x) == 0:
-        print("No data to exclude")
+        print("No data to drop")
         return x
 
     if first_or_last == 'first':
-        x[x.index.date == x.index.date[0]] = np.nan
+        x = x[x.index.date != x.index.date[0]]
     elif first_or_last == 'last':
-        x[x.index.date == x.index.date[-1]] = np.nan
+        x = x[x.index.date != x.index.date[-1]]
     elif first_or_last == 'both':
-        x[(x.index.date == x.index.date[0]) | (x.index.date == x.index.date[-1])] = np.nan
+        x = x[(x.index.date != x.index.date[0]) & (x.index.date != x.index.date[-1])]
     return x