diff --git a/src/stepcount/stepcount.py b/src/stepcount/stepcount.py index fc8aefd..6e0426e 100644 --- a/src/stepcount/stepcount.py +++ b/src/stepcount/stepcount.py @@ -81,14 +81,15 @@ def main(): Y, W, T_steps = model.predict_from_frame(data) # Save step counts - Y.to_csv(f"{outdir}/{basename}-Steps.csv") + Y.to_csv(f"{outdir}/{basename}-Steps.csv.gz") # Save timestamps of each step - T_steps.to_csv(f"{outdir}/{basename}-StepTimes.csv", index=False) + T_steps.to_csv(f"{outdir}/{basename}-StepTimes.csv.gz", index=False) - # Summary - summary = summarize(Y, model.steptol) - summary['hourly'].to_csv(f"{outdir}/{basename}-HourlySteps.csv") - summary['daily_stats'].to_csv(f"{outdir}/{basename}-DailySteps.csv") + # Steps summary + summary = summarize_steps(Y, model.steptol) + summary['minutely'].to_csv(f"{outdir}/{basename}-MinutelySteps.csv.gz") + summary['hourly'].to_csv(f"{outdir}/{basename}-HourlySteps.csv.gz") + summary['daily'].to_csv(f"{outdir}/{basename}-DailySteps.csv.gz") info['TotalSteps'] = summary['total'] info['StepsDayAvg'] = summary['daily_avg'] info['StepsDayMed'] = summary['daily_med'] @@ -113,10 +114,11 @@ def main(): info['Steps75thDayMedAt'] = summary['daily_ptile_at_med']['p75_at'] info['Steps95thDayMedAt'] = summary['daily_ptile_at_med']['p95_at'] - # Impute missing periods & recalculate summary - summary_adj = summarize(Y, model.steptol, adjust_estimates=True) - summary_adj['hourly'].to_csv(f"{outdir}/{basename}-HourlyStepsAdjusted.csv") - summary_adj['daily_stats'].to_csv(f"{outdir}/{basename}-DailyStepsAdjusted.csv") + # Steps summary, adjusted + summary_adj = summarize_steps(Y, model.steptol, adjust_estimates=True) + summary_adj['minutely'].to_csv(f"{outdir}/{basename}-MinutelyStepsAdjusted.csv.gz") + summary_adj['hourly'].to_csv(f"{outdir}/{basename}-HourlyStepsAdjusted.csv.gz") + summary_adj['daily'].to_csv(f"{outdir}/{basename}-DailyStepsAdjusted.csv.gz") info['TotalStepsAdjusted'] = summary_adj['total'] info['StepsDayAvgAdjusted'] = summary_adj['daily_avg'] info['StepsDayMedAdjusted'] = summary_adj['daily_med'] @@ -149,15 +151,16 @@ def main(): print("\nSummary\n-------") print(json.dumps(info, indent=4, cls=NpEncoder)) print("\nEstimated Daily Stats\n---------------------") - print(summary['daily_stats']) + print(summary['daily']) print("\nEstimated Daily Stats (Adjusted)\n---------------------") - print(summary_adj['daily_stats']) + print(summary_adj['daily']) after = time.time() print(f"Done! ({round(after - before,2)}s)") -def summarize(Y, steptol=3, adjust_estimates=False): +def summarize_steps(Y, steptol=3, adjust_estimates=False): + """ Summarize step count data """ if adjust_estimates: Y = impute_missing(Y) @@ -207,9 +210,9 @@ def _tdelta_to_str(tdelta): # steps total = np.round(Y.agg(_sum)) # total steps - hourly = Y.resample('H').agg(_sum).round().rename('Steps') # steps, hourly - daily = Y.resample('D').agg(_sum).round().rename('Steps') # steps, daily - minutely = Y.resample('T').agg(_sum).round().rename('Steps') # steps, minutely + hourly = Y.resample('H').agg(_sum).rename('Steps') # steps, hourly + daily = Y.resample('D').agg(_sum).rename('Steps') # steps, daily + minutely = Y.resample('T').agg(_sum).rename('Steps') # steps, minutely # steps, daily stats if not adjust_estimates: @@ -220,19 +223,19 @@ def _tdelta_to_str(tdelta): daily_min = np.round(daily.min()) daily_max = np.round(daily.max()) else: - weekdaily = daily.groupby(daily.index.weekday).mean() - daily_avg = np.round(weekdaily.mean()) + day_of_week = daily.groupby(daily.index.weekday).mean() + daily_avg = np.round(day_of_week.mean()) with warnings.catch_warnings(): warnings.filterwarnings('ignore', message='Mean of empty slice') - daily_med = np.round(weekdaily.median()) - daily_min = np.round(weekdaily.min()) - daily_max = np.round(weekdaily.max()) + daily_med = np.round(day_of_week.median()) + daily_min = np.round(day_of_week.min()) + daily_max = np.round(day_of_week.max()) # walking dt = pd.Timedelta(infer_freq(Y.index)).seconds W = Y.mask(~Y.isna(), Y >= steptol) total_walk = np.round(W.agg(_sum) * dt / 60) - daily_walk = (W.resample('D').agg(_sum) * dt / 60).round().rename('Walk(mins)') + daily_walk = (W.resample('D').agg(_sum) * dt / 60).rename('Walk(mins)') # walking, daily stats if not adjust_estimates: @@ -243,38 +246,41 @@ def _tdelta_to_str(tdelta): daily_walk_min = np.round(daily_walk.min()) daily_walk_max = np.round(daily_walk.max()) else: - weekdaily_walk = daily_walk.groupby(daily_walk.index.weekday).mean() - daily_walk_avg = np.round(weekdaily_walk.mean()) + day_of_week_walk = daily_walk.groupby(daily_walk.index.weekday).mean() + daily_walk_avg = np.round(day_of_week_walk.mean()) with warnings.catch_warnings(): warnings.filterwarnings('ignore', message='Mean of empty slice') - daily_walk_med = np.round(weekdaily_walk.median()) - daily_walk_min = np.round(weekdaily_walk.min()) - daily_walk_max = np.round(weekdaily_walk.max()) + daily_walk_med = np.round(day_of_week_walk.median()) + daily_walk_min = np.round(day_of_week_walk.min()) + daily_walk_max = np.round(day_of_week_walk.max()) # cadence https://jamanetwork.com/journals/jama/fullarticle/2763292 - daily_cadence_peak1 = minutely.resample('D').agg(_max, n=1) - daily_cadence_peak30 = minutely.resample('D').agg(_max, n=30) - daily_cadence_p95 = minutely.resample('D').agg(_p95, steptol=steptol * 60 / dt) # scale steptol to steps/min + daily_cadence_peak1 = minutely.resample('D').agg(_max, n=1).rename('CadencePeak1') + daily_cadence_peak30 = minutely.resample('D').agg(_max, n=30).rename('CadencePeak30') + daily_cadence_p95 = minutely.resample('D').agg(_p95, steptol=steptol * 60 / dt).rename('Cadence95th') # scale steptol to steps/min if not adjust_estimates: cadence_peak1 = np.round(daily_cadence_peak1.mean()) cadence_peak30 = np.round(daily_cadence_peak30.mean()) cadence_p95 = np.round(daily_cadence_p95.mean()) else: - weekdaily_cadence_peak1 = daily_cadence_peak1.groupby(daily_cadence_peak1.index.weekday).mean() - weekdaily_cadence_peak30 = daily_cadence_peak30.groupby(daily_cadence_peak30.index.weekday).mean() - weekdaily_cadence_p95 = daily_cadence_p95.groupby(daily_cadence_p95.index.weekday).mean() - cadence_peak1 = np.round(weekdaily_cadence_peak1.mean()) - cadence_peak30 = np.round(weekdaily_cadence_peak30.mean()) - cadence_p95 = np.round(weekdaily_cadence_p95.mean()) + day_of_week_cadence_peak1 = daily_cadence_peak1.groupby(daily_cadence_peak1.index.weekday).mean() + day_of_week_cadence_peak30 = daily_cadence_peak30.groupby(daily_cadence_peak30.index.weekday).mean() + day_of_week_cadence_p95 = daily_cadence_p95.groupby(daily_cadence_p95.index.weekday).mean() + cadence_peak1 = np.round(day_of_week_cadence_peak1.mean()) + cadence_peak30 = np.round(day_of_week_cadence_peak30.mean()) + cadence_p95 = np.round(day_of_week_cadence_p95.mean()) daily_ptile_at = Y.groupby(pd.Grouper(freq='D')).apply(_percentile_at).unstack(1) daily_ptile_at_avg = daily_ptile_at.mean() daily_ptile_at_med = daily_ptile_at.median() # daily stats - daily_stats = pd.concat([ - daily_walk, - daily, + daily = pd.concat([ + pd.to_numeric(daily_walk.round(), downcast='integer'), + pd.to_numeric(daily.round(), downcast='integer'), + pd.to_numeric(daily_cadence_peak1.round(), downcast='integer'), + pd.to_numeric(daily_cadence_peak30.round(), downcast='integer'), + pd.to_numeric(daily_cadence_p95.round(), downcast='integer'), daily_ptile_at.rename(columns={ 'p05_at': 'Steps5thAt', 'p25_at': 'Steps25thAt', @@ -286,14 +292,13 @@ def _tdelta_to_str(tdelta): # convert units total = nanint(total) - hourly = pd.to_numeric(hourly, downcast='integer') - daily = pd.to_numeric(daily, downcast='integer') + minutely = pd.to_numeric(minutely.round(), downcast='integer') + hourly = pd.to_numeric(hourly.round(), downcast='integer') daily_avg = nanint(daily_avg) daily_med = nanint(daily_med) daily_min = nanint(daily_min) daily_max = nanint(daily_max) total_walk = nanint(total_walk) - daily_walk = pd.to_numeric(daily_walk, downcast='integer') daily_walk_avg = nanint(daily_walk_avg) daily_walk_med = nanint(daily_walk_med) daily_walk_min = nanint(daily_walk_min) @@ -306,8 +311,9 @@ def _tdelta_to_str(tdelta): return { 'total': total, + 'minutely': minutely, 'hourly': hourly, - 'daily_stats': daily_stats, + 'daily': daily, 'daily_avg': daily_avg, 'daily_med': daily_med, 'daily_min': daily_min,