Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Speed up and improve sleeplog handling #1244

Merged
merged 11 commits into from
Jan 10, 2025
8 changes: 8 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
# CHANGES IN GGIR VERSION 3.1-10

- Part 4:

- Speed up the loading of advanced format sleeplog

- Improve automated recognition of date format in sleeplog

# CHANGES IN GGIR VERSION 3.1-9

- Part 3:
Expand Down
87 changes: 66 additions & 21 deletions R/g.loadlog.R
Original file line number Diff line number Diff line change
Expand Up @@ -204,35 +204,48 @@ g.loadlog = function(loglocation = c(), coln1 = c(), colid = c(),
}
napcnt = nwcnt = iccnt = 1
IDcouldNotBeMatched = TRUE
dateformat_found = FALSE
for (i in 1:nrow(S)) { # loop through rows in sleeplog
ID = S[i,colid]
if (ID %in% startdates$ID == TRUE) { # matching ID in acc data, if not ignore ID
IDcouldNotBeMatched = FALSE
startdate_acc = as.Date(startdates$startdate[which(startdates$ID == ID)], tz = desiredtz)
startdate_sleeplog = S[i, datecols[1]]
startdate_sleeplog = as.character(S[i, datecols[1:pmin(length(datecols), 5)]])
Sdates_correct = c()
dateformats_to_consider = c("%Y-%m-%d", "%d-%m-%Y", "%m-%d-%Y", "%Y-%d-%m",
"%y-%m-%d", "%d-%m-%y", "%m-%d-%y", "%y-%d-%m",
"%Y/%m/%d", "%d/%m/%Y", "%m/%d/%Y", "%Y/%d/%m",
"%y/%m/%d", "%d/%m/%y", "%m/%d/%y", "%y/%d/%m")
if (dateformat_found == TRUE && dateformats_to_consider[1] != dateformat_correct) {
# If found then first try that before trying anything else
dateformats_to_consider = unique(c(dateformat_correct, dateformats_to_consider))
}
# Detect data format in sleeplog:
for (dateformat in c("%Y-%m-%d", "%d-%m-%Y", "%m-%d-%Y", "%Y-%d-%m",
"%y-%m-%d", "%d-%m-%y", "%m-%d-%y", "%y-%d-%m",
"%Y/%m/%d", "%d/%m/%Y", "%m/%d/%Y", "%Y/%d/%m",
"%y/%m/%d", "%d/%m/%y", "%m/%d/%y", "%y/%d/%m")) {
for (dateformat in dateformats_to_consider) {
startdate_sleeplog_tmp = as.Date(startdate_sleeplog, format = dateformat, tz = desiredtz)
Sdates = as.Date(as.character(S[i,datecols]), format = dateformat, tz = desiredtz)
if (length(which(diff(which(is.na(Sdates))) > 1)) > 0) {
stop(paste0("\nSleeplog for ID: ", ID, " has missing date(s)"), call. = FALSE)
}
if (is.na(startdate_sleeplog_tmp) == FALSE) {
if (all(is.na(startdate_sleeplog_tmp) == FALSE)) {
deltadate = as.numeric(startdate_sleeplog_tmp - startdate_acc)
if (is.na(deltadate) == FALSE) {
if (abs(deltadate) < 30) {
startdate_sleeplog = startdate_sleeplog_tmp
if (all(is.na(deltadate) == FALSE)) {
if (all(abs(deltadate) < 30)) {
startdate_sleeplog = startdate_sleeplog_tmp[1]
Sdates_correct = Sdates
dateformat_correct = dateformat
deltadate = deltadate[1]
dateformat_found = TRUE
break
}
}
}
}
if (deltadate > 300) {
warning(paste0("For ID ", ID, " the sleeplog start date is more than 300 days separated ",
"from the dates in the accelerometer recording, this may indicate a ",
"problem with date formats or their recognition, please check."), call. = FALSE)
}
if (startdates$startAtMidnight[which(startdates$ID == ID)] == TRUE) {
# If the first day in the advanced sleeplog is 28/11
# and the recording starts at midnight 27/11 00:00:00
Expand All @@ -251,6 +264,10 @@ g.loadlog = function(loglocation = c(), coln1 = c(), colid = c(),
} else {
# handle missing dates
ndates = as.numeric(diff(range(Sdates_correct[!is.na(Sdates_correct)]))) + 1
if (ndates > 300) warning(paste0("For ID ", ID, " the sleeplog has has ",
"more than 300 missing dates, this may ",
"indicate a problem with date format ",
"recognition. Please check."), call. = FALSE)
if (ndates > nnights) {
extraColumns = matrix("", nrow(newsleeplog), max(c((ndates - nnights)*2, 100)) + 1)
newsleeplog = cbind(newsleeplog, extraColumns)
Expand All @@ -260,6 +277,12 @@ g.loadlog = function(loglocation = c(), coln1 = c(), colid = c(),
nonwearlog = cbind(nonwearlog, extraColumns)
nnights = ndates
}
if (startdate_sleeplog - deltadate > startdate_sleeplog + nnights) {
warning(paste0("Accelerometer recording for ID ",
ID, " does not overlap with sleeplog date",
" range"), call. = FALSE)
next
}
# only attempt to use sleeplog if start date could be recognised
# Add row to newsleeplog if somehow there are not enough rows
if (count > nrow(newsleeplog)) {
Expand All @@ -270,7 +293,8 @@ g.loadlog = function(loglocation = c(), coln1 = c(), colid = c(),
}
newsleeplog[count ,1] = ID
newbedlog[count ,1] = ID
newsleeplog_times = newbedlog_times = c()
newsleeplog_times = newbedlog_times = rep("time", 100)
newCounter = 1
expected_dates = seq(startdate_sleeplog - deltadate, startdate_sleeplog + nnights, by = 1)
# loop over expect dates giving start date of sleeplog
for (ni in 1:(length(expected_dates) - 1)) {
Expand All @@ -283,7 +307,11 @@ g.loadlog = function(loglocation = c(), coln1 = c(), colid = c(),
}
curdatecol = datecols[ind]
nextdatecol = datecols[which(datecols > curdatecol)[1]]
if (is.na(nextdatecol)) nextdatecol = ncol(S) + 1
lastday = FALSE
if (is.na(nextdatecol)) {
nextdatecol = ncol(S) + 1
lastday = TRUE
}
# Handle mixed reporting of time in bed and SPT"
if (length(bedendcols) == 0 & length(bedstartcols) != 0 &
length(onsetcols) == 0 & length(wakecols) != 0) {
Expand All @@ -301,19 +329,29 @@ g.loadlog = function(loglocation = c(), coln1 = c(), colid = c(),
}
# Sleeplog:
onseti = onsetcols[which(onsetcols > curdatecol & onsetcols < nextdatecol)]
wakeupi = wakecols[which(wakecols > nextdatecol)[1]]
if (lastday == FALSE) {
wakeupi = wakecols[which(wakecols > nextdatecol)[1]]
wakeuptime = S[i,wakeupi]
} else {
wakeuptime = ""
}
if (length(onseti) == 1 & length(wakeupi) == 1) {
newsleeplog_times = c(newsleeplog_times, S[i,onseti], S[i,wakeupi])
newsleeplog_times[newCounter:(newCounter + 1)] = c(S[i,onseti], wakeuptime)
} else {
newsleeplog_times = c(newsleeplog_times, "", "")
newsleeplog_times[newCounter:(newCounter + 1)] = c("", "")
}
# time in bed
bedstarti = bedstartcols[which(bedstartcols > curdatecol & bedstartcols < nextdatecol)]
bedendi = bedendcols[which(bedendcols > nextdatecol)[1]]
if (lastday == FALSE) {
bedendi = bedendcols[which(bedendcols > nextdatecol)[1]]
bedendtime = S[i,bedendi]
} else {
bedendtime = ""
}
if (length(bedstarti) == 1 & length(bedendi) == 1) {
newbedlog_times = c(newbedlog_times, S[i,bedstarti], S[i,bedendi])
newbedlog_times[newCounter:(newCounter + 1)] = c(S[i,bedstarti], bedendtime)
} else {
newbedlog_times = c(newbedlog_times, "", "")
newbedlog_times[newCounter:(newCounter + 1)] = c("", "")
}
# Also grap nap, non-wear, and imputation code info and put those in separate matrices:
naps = napcols[which(napcols > curdatecol & napcols < nextdatecol)]
Expand All @@ -338,10 +376,17 @@ g.loadlog = function(loglocation = c(), coln1 = c(), colid = c(),
iccnt = iccnt + 1
}
} else {
newsleeplog_times = c(newsleeplog_times, "", "")
newbedlog_times = c(newbedlog_times, "", "")
newsleeplog_times[newCounter:(newCounter + 1)] = c("", "")
newbedlog_times[newCounter:(newCounter + 1)] = c("", "")
}
newCounter = newCounter + 2
if (newCounter > length(newbedlog_times) - 5) {
newbedlog_times = c(newbedlog_times, rep("time", 100))
newsleeplog_times = c(newsleeplog_times, rep("time", 100))
}
}
newsleeplog_times = newsleeplog_times[which(newsleeplog_times != "time")]
newbedlog_times = newbedlog_times[which(newbedlog_times != "time")]
# add columns to sleeplog
extracols = (length(newsleeplog_times) + 2) - ncol(newsleeplog)
if (extracols > 0) {
Expand Down Expand Up @@ -411,12 +456,12 @@ g.loadlog = function(loglocation = c(), coln1 = c(), colid = c(),
}
nnights = nnights + deltadate + 1 # to account for the possibility of extra night at the beginning of recording
# # From here we continue with original code focused on sleeplog only
if (exists("S") && ncol(S) > 0) {
if (exists("S") && ncol(S) > 0 && nnights > 0) {
sleeplog = adjustLogFormat(S, nnights, mode = "sleeplog")
} else {
sleeplog = NULL
}
if (exists("B")) {
if (exists("B") && nnights > 0) {
bedlog = adjustLogFormat(B, nnights, mode = "bedlog")
} else {
bedlog = NULL
Expand Down
24 changes: 20 additions & 4 deletions R/g.part4.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,31 @@ g.part4 = function(datadir = c(), metadatadir = c(), f0 = f0, f1 = f1,
dolog = FALSE
}
if (dolog == TRUE) {
logs_diaries = g.loadlog(params_sleep[["loglocation"]], coln1 = params_sleep[["coln1"]], colid = params_sleep[["colid"]],
meta.sleep.folder = meta.sleep.folder,
desiredtz = params_general[["desiredtz"]])
sleeplogRDataFile = paste0(metadatadir,"/meta/sleeplog.RData")
# only re-process sleeplog if sleeplog.RData does not exist or if sleeplog
# is from a date equal to or after sleeplog.RData
if (!file.exists(sleeplogRDataFile) ||
as.Date(file.info(params_sleep[["loglocation"]])$ctime) >= as.Date(file.info(sleeplogRDataFile)$ctime)) {
logs_diaries = g.loadlog(params_sleep[["loglocation"]],
coln1 = params_sleep[["coln1"]],
colid = params_sleep[["colid"]],
meta.sleep.folder = meta.sleep.folder,
desiredtz = params_general[["desiredtz"]])
save(logs_diaries, file = sleeplogRDataFile)
} else {
load(file = sleeplogRDataFile)
}
if (params_sleep[["sleepwindowType"]] == "TimeInBed" && length(logs_diaries$bedlog) > 0) {
sleeplog = logs_diaries$bedlog
} else {
sleeplog = logs_diaries$sleeplog
}
save(logs_diaries, file = paste0(metadatadir,"/meta/sleeplog.RData"))
sleeplog$night = as.numeric(sleeplog$night)
sleeplog$duration = as.numeric(sleeplog$duration)
if (is.null(logs_diaries$sleeplog) && is.null(logs_diaries$bedlog)) {
dolog = FALSE
rm(sleeplog)
}
}
#------------------------------------------------
# get list of accelerometer milestone data files from sleep (produced by g.part3)
Expand Down
6 changes: 3 additions & 3 deletions man/g.loadlog.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@
before storing the output in a dataframe
}
\usage{
g.loadlog(loglocation=c(),coln1=c(),colid=c(),
sleeplogsep=",", meta.sleep.folder = c(),
desiredtz="")
g.loadlog(loglocation = c(), coln1 = c(), colid = c(),
sleeplogsep = ",", meta.sleep.folder = c(),
desiredtz = "")
}
\arguments{
\item{loglocation}{
Expand Down
Loading