From 114f08c9e861ae8a0945345adacc485c2e30580b Mon Sep 17 00:00:00 2001 From: Vincent van Hees Date: Fri, 3 Jan 2025 19:56:49 +0100 Subject: [PATCH 01/11] do not reload sleeplog when no changes were made #1243 --- R/g.part4.R | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/R/g.part4.R b/R/g.part4.R index 8abf6fe42..b1d452ae0 100644 --- a/R/g.part4.R +++ b/R/g.part4.R @@ -32,15 +32,26 @@ g.part4 = function(datadir = c(), metadatadir = c(), f0 = f0, f1 = f1, dolog = FALSE } if (dolog == TRUE) { - logs_diaries = g.loadlog(params_sleep[["loglocation"]], coln1 = params_sleep[["coln1"]], colid = params_sleep[["colid"]], - meta.sleep.folder = meta.sleep.folder, - desiredtz = params_general[["desiredtz"]]) + sleeplogRDataFile = paste0(metadatadir,"/meta/sleeplog.RData") + # only re-process sleeplog if RData file does not exist or is from a date equal to or before the sleeplog + if (!file.exists(sleeplogRDataFile) || + as.Date(file.info(params_sleep[["loglocation"]])$ctime) >= as.Date(file.info(sleeplogRDataFile)$ctime)) { + logs_diaries = g.loadlog(params_sleep[["loglocation"]], + coln1 = params_sleep[["coln1"]], + colid = params_sleep[["colid"]], + meta.sleep.folder = meta.sleep.folder, + desiredtz = params_general[["desiredtz"]]) + save(logs_diaries, file = sleeplogRDataFile) + } else { + load(file = sleeplogRDataFile) + } if (params_sleep[["sleepwindowType"]] == "TimeInBed" && length(logs_diaries$bedlog) > 0) { sleeplog = logs_diaries$bedlog } else { sleeplog = logs_diaries$sleeplog } - save(logs_diaries, file = paste0(metadatadir,"/meta/sleeplog.RData")) + sleeplog$night = as.numeric(sleeplog$night) + sleeplog$duration = as.numeric(sleeplog$duration) } #------------------------------------------------ # get list of accelerometer milestone data files from sleep (produced by g.part3) From 354c54ed56b0db58d606ed915559230a28ef0710 Mon Sep 17 00:00:00 2001 From: Vincent van Hees Date: Fri, 3 Jan 2025 20:10:29 +0100 Subject: [PATCH 02/11] make data format detection more robust #1243 --- R/g.loadlog.R | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/R/g.loadlog.R b/R/g.loadlog.R index ec3bcdc07..f498ac682 100644 --- a/R/g.loadlog.R +++ b/R/g.loadlog.R @@ -204,30 +204,38 @@ g.loadlog = function(loglocation = c(), coln1 = c(), colid = c(), } napcnt = nwcnt = iccnt = 1 IDcouldNotBeMatched = TRUE + dateformat_found = FALSE for (i in 1:nrow(S)) { # loop through rows in sleeplog ID = S[i,colid] if (ID %in% startdates$ID == TRUE) { # matching ID in acc data, if not ignore ID IDcouldNotBeMatched = FALSE startdate_acc = as.Date(startdates$startdate[which(startdates$ID == ID)], tz = desiredtz) - startdate_sleeplog = S[i, datecols[1]] + startdate_sleeplog = as.character(S[i, datecols[1:pmin(length(datecols), 5)]]) Sdates_correct = c() + dateformats_to_consider = c("%Y-%m-%d", "%d-%m-%Y", "%m-%d-%Y", "%Y-%d-%m", + "%y-%m-%d", "%d-%m-%y", "%m-%d-%y", "%y-%d-%m", + "%Y/%m/%d", "%d/%m/%Y", "%m/%d/%Y", "%Y/%d/%m", + "%y/%m/%d", "%d/%m/%y", "%m/%d/%y", "%y/%d/%m") + if (dateformat_found == TRUE && dateformats_to_consider[1] != dateformat_correct) { + # If found then first try that before trying anything else + dateformats_to_consider = c(dateformat_correct, dateformats_to_consider) + } # Detect data format in sleeplog: - for (dateformat in c("%Y-%m-%d", "%d-%m-%Y", "%m-%d-%Y", "%Y-%d-%m", - "%y-%m-%d", "%d-%m-%y", "%m-%d-%y", "%y-%d-%m", - "%Y/%m/%d", "%d/%m/%Y", "%m/%d/%Y", "%Y/%d/%m", - "%y/%m/%d", "%d/%m/%y", "%m/%d/%y", "%y/%d/%m")) { + for (dateformat in dateformats_to_consider) { startdate_sleeplog_tmp = as.Date(startdate_sleeplog, format = dateformat, tz = desiredtz) Sdates = as.Date(as.character(S[i,datecols]), format = dateformat, tz = desiredtz) if (length(which(diff(which(is.na(Sdates))) > 1)) > 0) { stop(paste0("\nSleeplog for ID: ", ID, " has missing date(s)"), call. = FALSE) } - if (is.na(startdate_sleeplog_tmp) == FALSE) { + if (all(is.na(startdate_sleeplog_tmp) == FALSE)) { deltadate = as.numeric(startdate_sleeplog_tmp - startdate_acc) - if (is.na(deltadate) == FALSE) { - if (abs(deltadate) < 30) { - startdate_sleeplog = startdate_sleeplog_tmp + if (all(is.na(deltadate) == FALSE)) { + if (all(abs(deltadate) < 30)) { + startdate_sleeplog = startdate_sleeplog_tmp[1] Sdates_correct = Sdates dateformat_correct = dateformat + deltadate = deltadate[1] + dateformat_found = TRUE break } } From 18f40f75763d39a19a8667393d8bf5ce781524d6 Mon Sep 17 00:00:00 2001 From: Vincent van Hees Date: Fri, 3 Jan 2025 20:59:44 +0100 Subject: [PATCH 03/11] speed up by avoiding repeated vector expansion #1243 --- NEWS.md | 8 ++++++++ R/g.loadlog.R | 42 ++++++++++++++++++++++++++++++++---------- 2 files changed, 40 insertions(+), 10 deletions(-) diff --git a/NEWS.md b/NEWS.md index 8c10b795f..e28c36e56 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,11 @@ +# CHANGES IN GGIR VERSION 3.1-10 + +- Part 4: + + - Speed up the loading of advanced format sleeplog + + - Improve automated recognition of data format in sleeplog + # CHANGES IN GGIR VERSION 3.1-9 - Part 3: diff --git a/R/g.loadlog.R b/R/g.loadlog.R index f498ac682..0cb6f02c1 100644 --- a/R/g.loadlog.R +++ b/R/g.loadlog.R @@ -278,7 +278,8 @@ g.loadlog = function(loglocation = c(), coln1 = c(), colid = c(), } newsleeplog[count ,1] = ID newbedlog[count ,1] = ID - newsleeplog_times = newbedlog_times = c() + newsleeplog_times = newbedlog_times = rep("time", 100) + newCounter = 1 expected_dates = seq(startdate_sleeplog - deltadate, startdate_sleeplog + nnights, by = 1) # loop over expect dates giving start date of sleeplog for (ni in 1:(length(expected_dates) - 1)) { @@ -291,7 +292,11 @@ g.loadlog = function(loglocation = c(), coln1 = c(), colid = c(), } curdatecol = datecols[ind] nextdatecol = datecols[which(datecols > curdatecol)[1]] - if (is.na(nextdatecol)) nextdatecol = ncol(S) + 1 + lastday = FALSE + if (is.na(nextdatecol)) { + nextdatecol = ncol(S) + 1 + lastday = TRUE + } # Handle mixed reporting of time in bed and SPT" if (length(bedendcols) == 0 & length(bedstartcols) != 0 & length(onsetcols) == 0 & length(wakecols) != 0) { @@ -309,19 +314,29 @@ g.loadlog = function(loglocation = c(), coln1 = c(), colid = c(), } # Sleeplog: onseti = onsetcols[which(onsetcols > curdatecol & onsetcols < nextdatecol)] - wakeupi = wakecols[which(wakecols > nextdatecol)[1]] + if (lastday == FALSE) { + wakeupi = wakecols[which(wakecols > nextdatecol)[1]] + wakeuptime = S[i,wakeupi] + } else { + wakeuptime = "" + } if (length(onseti) == 1 & length(wakeupi) == 1) { - newsleeplog_times = c(newsleeplog_times, S[i,onseti], S[i,wakeupi]) + newsleeplog_times[newCounter:(newCounter + 1)] = c(S[i,onseti], wakeuptime) } else { - newsleeplog_times = c(newsleeplog_times, "", "") + newsleeplog_times[newCounter:(newCounter + 1)] = c("", "") } # time in bed bedstarti = bedstartcols[which(bedstartcols > curdatecol & bedstartcols < nextdatecol)] - bedendi = bedendcols[which(bedendcols > nextdatecol)[1]] + if (lastday == FALSE) { + bedendi = bedendcols[which(bedendcols > nextdatecol)[1]] + bedendtime = S[i,bedendi] + } else { + bedendtime = "" + } if (length(bedstarti) == 1 & length(bedendi) == 1) { - newbedlog_times = c(newbedlog_times, S[i,bedstarti], S[i,bedendi]) + newbedlog_times[newCounter:(newCounter + 1)] = c(S[i,bedstarti], bedendtime) } else { - newbedlog_times = c(newbedlog_times, "", "") + newbedlog_times[newCounter:(newCounter + 1)] = c("", "") } # Also grap nap, non-wear, and imputation code info and put those in separate matrices: naps = napcols[which(napcols > curdatecol & napcols < nextdatecol)] @@ -346,10 +361,17 @@ g.loadlog = function(loglocation = c(), coln1 = c(), colid = c(), iccnt = iccnt + 1 } } else { - newsleeplog_times = c(newsleeplog_times, "", "") - newbedlog_times = c(newbedlog_times, "", "") + newsleeplog_times[newCounter:(newCounter + 1)] = c("", "") + newbedlog_times[newCounter:(newCounter + 1)] = c("", "") + } + newCounter = newCounter + 2 + if (newCounter > length(newbedlog_times) - 5) { + newbedlog_times = c(newbedlog_times, rep("time", 100)) + newsleeplog_times = c(newsleeplog_times, rep("time", 100)) } } + newsleeplog_times = newsleeplog_times[which(newsleeplog_times != "time")] + newbedlog_times = newbedlog_times[which(newbedlog_times != "time")] # add columns to sleeplog extracols = (length(newsleeplog_times) + 2) - ncol(newsleeplog) if (extracols > 0) { From b234e120b50dcf290bd3df1cfb47ff37ed88bc5a Mon Sep 17 00:00:00 2001 From: Vincent van Hees Date: Sat, 4 Jan 2025 11:33:01 +0100 Subject: [PATCH 04/11] add warnings when > 300 missing dates or delta date is more than 300 days #1243 --- R/g.loadlog.R | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/R/g.loadlog.R b/R/g.loadlog.R index 0cb6f02c1..58ce1e5f0 100644 --- a/R/g.loadlog.R +++ b/R/g.loadlog.R @@ -241,6 +241,11 @@ g.loadlog = function(loglocation = c(), coln1 = c(), colid = c(), } } } + if (deltadate > 300) { + warning(paste0("For ID ", ID, " the sleeplog start date is more than 300 days separated ", + "from the dates in the accelerometer recording, this may indicate a ", + "problem with date formats or their recognition, please check."), call. = FALSE) + } if (startdates$startAtMidnight[which(startdates$ID == ID)] == TRUE) { # If the first day in the advanced sleeplog is 28/11 # and the recording starts at midnight 27/11 00:00:00 @@ -259,6 +264,10 @@ g.loadlog = function(loglocation = c(), coln1 = c(), colid = c(), } else { # handle missing dates ndates = as.numeric(diff(range(Sdates_correct[!is.na(Sdates_correct)]))) + 1 + if (ndates > 300) warning(paste0("For ID ", ID, " the sleeplog has has ", + "more than 300 missing dates, this may ", + "indicate a problem with date format ", + "recognition. Please check."), call. = FALSE) if (ndates > nnights) { extraColumns = matrix("", nrow(newsleeplog), max(c((ndates - nnights)*2, 100)) + 1) newsleeplog = cbind(newsleeplog, extraColumns) From b09d35b1a33a9a30044fab8bb9f8a61c64f3abd9 Mon Sep 17 00:00:00 2001 From: Vincent van Hees Date: Sat, 4 Jan 2025 11:40:46 +0100 Subject: [PATCH 05/11] avoid testing same dateformat twice #1243 --- R/g.loadlog.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/g.loadlog.R b/R/g.loadlog.R index 58ce1e5f0..ecaef058c 100644 --- a/R/g.loadlog.R +++ b/R/g.loadlog.R @@ -218,7 +218,7 @@ g.loadlog = function(loglocation = c(), coln1 = c(), colid = c(), "%y/%m/%d", "%d/%m/%y", "%m/%d/%y", "%y/%d/%m") if (dateformat_found == TRUE && dateformats_to_consider[1] != dateformat_correct) { # If found then first try that before trying anything else - dateformats_to_consider = c(dateformat_correct, dateformats_to_consider) + dateformats_to_consider = unique(c(dateformat_correct, dateformats_to_consider)) } # Detect data format in sleeplog: for (dateformat in dateformats_to_consider) { From 4ccdbd6684ac378f085bffbd053b99cd979dd528 Mon Sep 17 00:00:00 2001 From: Vincent van Hees Date: Sat, 4 Jan 2025 11:48:49 +0100 Subject: [PATCH 06/11] make code comment consistent with code --- R/g.part4.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/g.part4.R b/R/g.part4.R index b1d452ae0..b72da8e05 100644 --- a/R/g.part4.R +++ b/R/g.part4.R @@ -33,7 +33,8 @@ g.part4 = function(datadir = c(), metadatadir = c(), f0 = f0, f1 = f1, } if (dolog == TRUE) { sleeplogRDataFile = paste0(metadatadir,"/meta/sleeplog.RData") - # only re-process sleeplog if RData file does not exist or is from a date equal to or before the sleeplog + # only re-process sleeplog if sleeplog.RData does not exist or if sleeplog + # is from a date equal to or after sleeplog.RData if (!file.exists(sleeplogRDataFile) || as.Date(file.info(params_sleep[["loglocation"]])$ctime) >= as.Date(file.info(sleeplogRDataFile)$ctime)) { logs_diaries = g.loadlog(params_sleep[["loglocation"]], From 8587ae97ccad78227dcef027d9c7d82f7d4376c4 Mon Sep 17 00:00:00 2001 From: Vincent van Hees Date: Sat, 4 Jan 2025 12:30:56 +0100 Subject: [PATCH 07/11] typo --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index e28c36e56..046d5f3eb 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,7 +4,7 @@ - Speed up the loading of advanced format sleeplog - - Improve automated recognition of data format in sleeplog + - Improve automated recognition of date format in sleeplog # CHANGES IN GGIR VERSION 3.1-9 From eae6352af69aee7b14077a4c97bded0be914f06e Mon Sep 17 00:00:00 2001 From: Vincent van Hees Date: Fri, 10 Jan 2025 17:14:17 +0100 Subject: [PATCH 08/11] Handle scenario where sleeplog is available but does not overlap with any participant #1243 --- R/g.loadlog.R | 10 ++++++++-- R/g.part4.R | 3 +++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/R/g.loadlog.R b/R/g.loadlog.R index ecaef058c..42c631d22 100644 --- a/R/g.loadlog.R +++ b/R/g.loadlog.R @@ -277,6 +277,12 @@ g.loadlog = function(loglocation = c(), coln1 = c(), colid = c(), nonwearlog = cbind(nonwearlog, extraColumns) nnights = ndates } + if (startdate_sleeplog - deltadate > startdate_sleeplog + nnights) { + warning(paste0("Accelerometer recording for ID ", + ID, " does not overlap with sleeplog date", + " range"), call. = FALSE) + next + } # only attempt to use sleeplog if start date could be recognised # Add row to newsleeplog if somehow there are not enough rows if (count > nrow(newsleeplog)) { @@ -450,12 +456,12 @@ g.loadlog = function(loglocation = c(), coln1 = c(), colid = c(), } nnights = nnights + deltadate + 1 # to account for the possibility of extra night at the beginning of recording # # From here we continue with original code focused on sleeplog only - if (exists("S") && ncol(S) > 0) { + if (exists("S") && ncol(S) > 0 && nnights > 0) { sleeplog = adjustLogFormat(S, nnights, mode = "sleeplog") } else { sleeplog = NULL } - if (exists("B")) { + if (exists("B") && nnights > 0) { bedlog = adjustLogFormat(B, nnights, mode = "bedlog") } else { bedlog = NULL diff --git a/R/g.part4.R b/R/g.part4.R index b72da8e05..115ba8aa5 100644 --- a/R/g.part4.R +++ b/R/g.part4.R @@ -54,6 +54,9 @@ g.part4 = function(datadir = c(), metadatadir = c(), f0 = f0, f1 = f1, sleeplog$night = as.numeric(sleeplog$night) sleeplog$duration = as.numeric(sleeplog$duration) } + if (is.null(logs_diaries$sleeplog) && is.null(logs_diaries$bedlog)) { + dolog = FALSE + } #------------------------------------------------ # get list of accelerometer milestone data files from sleep (produced by g.part3) fnames = dir(meta.sleep.folder) From e046ff9c3dec4660eee877ab3e7eb414e1e4cec0 Mon Sep 17 00:00:00 2001 From: Vincent van Hees Date: Fri, 10 Jan 2025 17:14:29 +0100 Subject: [PATCH 09/11] tidy up syntax --- man/g.loadlog.Rd | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/man/g.loadlog.Rd b/man/g.loadlog.Rd index 6a4cf07b9..a2d1e09c8 100644 --- a/man/g.loadlog.Rd +++ b/man/g.loadlog.Rd @@ -8,9 +8,9 @@ before storing the output in a dataframe } \usage{ - g.loadlog(loglocation=c(),coln1=c(),colid=c(), - sleeplogsep=",", meta.sleep.folder = c(), - desiredtz="") + g.loadlog(loglocation = c(), coln1 = c(), colid = c(), + sleeplogsep = ",", meta.sleep.folder = c(), + desiredtz = "") } \arguments{ \item{loglocation}{ From 505582b5ad4aece25977c9368f384ac48bed9688 Mon Sep 17 00:00:00 2001 From: Vincent van Hees Date: Fri, 10 Jan 2025 17:34:51 +0100 Subject: [PATCH 10/11] also remove object sleeplog when sleeplog is not to be used --- R/g.part4.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/g.part4.R b/R/g.part4.R index 115ba8aa5..ec9c3053b 100644 --- a/R/g.part4.R +++ b/R/g.part4.R @@ -56,6 +56,7 @@ g.part4 = function(datadir = c(), metadatadir = c(), f0 = f0, f1 = f1, } if (is.null(logs_diaries$sleeplog) && is.null(logs_diaries$bedlog)) { dolog = FALSE + rm(sleeplog) } #------------------------------------------------ # get list of accelerometer milestone data files from sleep (produced by g.part3) From ef9bf6153243aa5aae261a5842d35428b7cf9a7c Mon Sep 17 00:00:00 2001 From: Vincent van Hees Date: Fri, 10 Jan 2025 17:59:51 +0100 Subject: [PATCH 11/11] move lines from commit eae6352 up to be within if statement #1243 --- R/g.part4.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/g.part4.R b/R/g.part4.R index ec9c3053b..32542b19b 100644 --- a/R/g.part4.R +++ b/R/g.part4.R @@ -53,10 +53,10 @@ g.part4 = function(datadir = c(), metadatadir = c(), f0 = f0, f1 = f1, } sleeplog$night = as.numeric(sleeplog$night) sleeplog$duration = as.numeric(sleeplog$duration) - } - if (is.null(logs_diaries$sleeplog) && is.null(logs_diaries$bedlog)) { - dolog = FALSE - rm(sleeplog) + if (is.null(logs_diaries$sleeplog) && is.null(logs_diaries$bedlog)) { + dolog = FALSE + rm(sleeplog) + } } #------------------------------------------------ # get list of accelerometer milestone data files from sleep (produced by g.part3)