Skip to content

Commit

Permalink
feat: minute-level data; refactor
Browse files Browse the repository at this point in the history
- Added new minute-level output data.
- Refactor:
  - Gzipped output CSVs.
  - Added cadence metrics to daily stats output.
  - daily_stats -> daily
  - summarize() -> summarize_steps()
  - Change when .round() is done - move it to the very end.
  • Loading branch information
chanshing committed Apr 14, 2024
1 parent 82b7777 commit 97febc8
Showing 1 changed file with 49 additions and 43 deletions.
92 changes: 49 additions & 43 deletions src/stepcount/stepcount.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,14 +81,15 @@ def main():
Y, W, T_steps = model.predict_from_frame(data)

# Save step counts
Y.to_csv(f"{outdir}/{basename}-Steps.csv")
Y.to_csv(f"{outdir}/{basename}-Steps.csv.gz")
# Save timestamps of each step
T_steps.to_csv(f"{outdir}/{basename}-StepTimes.csv", index=False)
T_steps.to_csv(f"{outdir}/{basename}-StepTimes.csv.gz", index=False)

# Summary
summary = summarize(Y, model.steptol)
summary['hourly'].to_csv(f"{outdir}/{basename}-HourlySteps.csv")
summary['daily_stats'].to_csv(f"{outdir}/{basename}-DailySteps.csv")
# Steps summary
summary = summarize_steps(Y, model.steptol)
summary['minutely'].to_csv(f"{outdir}/{basename}-MinutelySteps.csv.gz")
summary['hourly'].to_csv(f"{outdir}/{basename}-HourlySteps.csv.gz")
summary['daily'].to_csv(f"{outdir}/{basename}-DailySteps.csv.gz")
info['TotalSteps'] = summary['total']
info['StepsDayAvg'] = summary['daily_avg']
info['StepsDayMed'] = summary['daily_med']
Expand All @@ -113,10 +114,11 @@ def main():
info['Steps75thDayMedAt'] = summary['daily_ptile_at_med']['p75_at']
info['Steps95thDayMedAt'] = summary['daily_ptile_at_med']['p95_at']

# Impute missing periods & recalculate summary
summary_adj = summarize(Y, model.steptol, adjust_estimates=True)
summary_adj['hourly'].to_csv(f"{outdir}/{basename}-HourlyStepsAdjusted.csv")
summary_adj['daily_stats'].to_csv(f"{outdir}/{basename}-DailyStepsAdjusted.csv")
# Steps summary, adjusted
summary_adj = summarize_steps(Y, model.steptol, adjust_estimates=True)
summary_adj['minutely'].to_csv(f"{outdir}/{basename}-MinutelyStepsAdjusted.csv.gz")
summary_adj['hourly'].to_csv(f"{outdir}/{basename}-HourlyStepsAdjusted.csv.gz")
summary_adj['daily'].to_csv(f"{outdir}/{basename}-DailyStepsAdjusted.csv.gz")
info['TotalStepsAdjusted'] = summary_adj['total']
info['StepsDayAvgAdjusted'] = summary_adj['daily_avg']
info['StepsDayMedAdjusted'] = summary_adj['daily_med']
Expand Down Expand Up @@ -149,15 +151,16 @@ def main():
print("\nSummary\n-------")
print(json.dumps(info, indent=4, cls=NpEncoder))
print("\nEstimated Daily Stats\n---------------------")
print(summary['daily_stats'])
print(summary['daily'])
print("\nEstimated Daily Stats (Adjusted)\n---------------------")
print(summary_adj['daily_stats'])
print(summary_adj['daily'])

after = time.time()
print(f"Done! ({round(after - before,2)}s)")


def summarize(Y, steptol=3, adjust_estimates=False):
def summarize_steps(Y, steptol=3, adjust_estimates=False):
""" Summarize step count data """

if adjust_estimates:
Y = impute_missing(Y)
Expand Down Expand Up @@ -207,9 +210,9 @@ def _tdelta_to_str(tdelta):

# steps
total = np.round(Y.agg(_sum)) # total steps
hourly = Y.resample('H').agg(_sum).round().rename('Steps') # steps, hourly
daily = Y.resample('D').agg(_sum).round().rename('Steps') # steps, daily
minutely = Y.resample('T').agg(_sum).round().rename('Steps') # steps, minutely
hourly = Y.resample('H').agg(_sum).rename('Steps') # steps, hourly
daily = Y.resample('D').agg(_sum).rename('Steps') # steps, daily
minutely = Y.resample('T').agg(_sum).rename('Steps') # steps, minutely

# steps, daily stats
if not adjust_estimates:
Expand All @@ -220,19 +223,19 @@ def _tdelta_to_str(tdelta):
daily_min = np.round(daily.min())
daily_max = np.round(daily.max())
else:
weekdaily = daily.groupby(daily.index.weekday).mean()
daily_avg = np.round(weekdaily.mean())
day_of_week = daily.groupby(daily.index.weekday).mean()
daily_avg = np.round(day_of_week.mean())
with warnings.catch_warnings():
warnings.filterwarnings('ignore', message='Mean of empty slice')
daily_med = np.round(weekdaily.median())
daily_min = np.round(weekdaily.min())
daily_max = np.round(weekdaily.max())
daily_med = np.round(day_of_week.median())
daily_min = np.round(day_of_week.min())
daily_max = np.round(day_of_week.max())

# walking
dt = pd.Timedelta(infer_freq(Y.index)).seconds
W = Y.mask(~Y.isna(), Y >= steptol)
total_walk = np.round(W.agg(_sum) * dt / 60)
daily_walk = (W.resample('D').agg(_sum) * dt / 60).round().rename('Walk(mins)')
daily_walk = (W.resample('D').agg(_sum) * dt / 60).rename('Walk(mins)')

# walking, daily stats
if not adjust_estimates:
Expand All @@ -243,38 +246,41 @@ def _tdelta_to_str(tdelta):
daily_walk_min = np.round(daily_walk.min())
daily_walk_max = np.round(daily_walk.max())
else:
weekdaily_walk = daily_walk.groupby(daily_walk.index.weekday).mean()
daily_walk_avg = np.round(weekdaily_walk.mean())
day_of_week_walk = daily_walk.groupby(daily_walk.index.weekday).mean()
daily_walk_avg = np.round(day_of_week_walk.mean())
with warnings.catch_warnings():
warnings.filterwarnings('ignore', message='Mean of empty slice')
daily_walk_med = np.round(weekdaily_walk.median())
daily_walk_min = np.round(weekdaily_walk.min())
daily_walk_max = np.round(weekdaily_walk.max())
daily_walk_med = np.round(day_of_week_walk.median())
daily_walk_min = np.round(day_of_week_walk.min())
daily_walk_max = np.round(day_of_week_walk.max())

# cadence https://jamanetwork.com/journals/jama/fullarticle/2763292
daily_cadence_peak1 = minutely.resample('D').agg(_max, n=1)
daily_cadence_peak30 = minutely.resample('D').agg(_max, n=30)
daily_cadence_p95 = minutely.resample('D').agg(_p95, steptol=steptol * 60 / dt) # scale steptol to steps/min
daily_cadence_peak1 = minutely.resample('D').agg(_max, n=1).rename('CadencePeak1')
daily_cadence_peak30 = minutely.resample('D').agg(_max, n=30).rename('CadencePeak30')
daily_cadence_p95 = minutely.resample('D').agg(_p95, steptol=steptol * 60 / dt).rename('Cadence95th') # scale steptol to steps/min
if not adjust_estimates:
cadence_peak1 = np.round(daily_cadence_peak1.mean())
cadence_peak30 = np.round(daily_cadence_peak30.mean())
cadence_p95 = np.round(daily_cadence_p95.mean())
else:
weekdaily_cadence_peak1 = daily_cadence_peak1.groupby(daily_cadence_peak1.index.weekday).mean()
weekdaily_cadence_peak30 = daily_cadence_peak30.groupby(daily_cadence_peak30.index.weekday).mean()
weekdaily_cadence_p95 = daily_cadence_p95.groupby(daily_cadence_p95.index.weekday).mean()
cadence_peak1 = np.round(weekdaily_cadence_peak1.mean())
cadence_peak30 = np.round(weekdaily_cadence_peak30.mean())
cadence_p95 = np.round(weekdaily_cadence_p95.mean())
day_of_week_cadence_peak1 = daily_cadence_peak1.groupby(daily_cadence_peak1.index.weekday).mean()
day_of_week_cadence_peak30 = daily_cadence_peak30.groupby(daily_cadence_peak30.index.weekday).mean()
day_of_week_cadence_p95 = daily_cadence_p95.groupby(daily_cadence_p95.index.weekday).mean()
cadence_peak1 = np.round(day_of_week_cadence_peak1.mean())
cadence_peak30 = np.round(day_of_week_cadence_peak30.mean())
cadence_p95 = np.round(day_of_week_cadence_p95.mean())

daily_ptile_at = Y.groupby(pd.Grouper(freq='D')).apply(_percentile_at).unstack(1)
daily_ptile_at_avg = daily_ptile_at.mean()
daily_ptile_at_med = daily_ptile_at.median()

# daily stats
daily_stats = pd.concat([
daily_walk,
daily,
daily = pd.concat([
pd.to_numeric(daily_walk.round(), downcast='integer'),
pd.to_numeric(daily.round(), downcast='integer'),
pd.to_numeric(daily_cadence_peak1.round(), downcast='integer'),
pd.to_numeric(daily_cadence_peak30.round(), downcast='integer'),
pd.to_numeric(daily_cadence_p95.round(), downcast='integer'),
daily_ptile_at.rename(columns={
'p05_at': 'Steps5thAt',
'p25_at': 'Steps25thAt',
Expand All @@ -286,14 +292,13 @@ def _tdelta_to_str(tdelta):

# convert units
total = nanint(total)
hourly = pd.to_numeric(hourly, downcast='integer')
daily = pd.to_numeric(daily, downcast='integer')
minutely = pd.to_numeric(minutely.round(), downcast='integer')
hourly = pd.to_numeric(hourly.round(), downcast='integer')
daily_avg = nanint(daily_avg)
daily_med = nanint(daily_med)
daily_min = nanint(daily_min)
daily_max = nanint(daily_max)
total_walk = nanint(total_walk)
daily_walk = pd.to_numeric(daily_walk, downcast='integer')
daily_walk_avg = nanint(daily_walk_avg)
daily_walk_med = nanint(daily_walk_med)
daily_walk_min = nanint(daily_walk_min)
Expand All @@ -306,8 +311,9 @@ def _tdelta_to_str(tdelta):

return {
'total': total,
'minutely': minutely,
'hourly': hourly,
'daily_stats': daily_stats,
'daily': daily,
'daily_avg': daily_avg,
'daily_med': daily_med,
'daily_min': daily_min,
Expand Down

0 comments on commit 97febc8

Please sign in to comment.