diff --git a/NEWS.md b/NEWS.md index 8c10b795f..046d5f3eb 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,11 @@ +# CHANGES IN GGIR VERSION 3.1-10 + +- Part 4: + + - Speed up the loading of advanced format sleeplog + + - Improve automated recognition of date format in sleeplog + # CHANGES IN GGIR VERSION 3.1-9 - Part 3: diff --git a/R/g.loadlog.R b/R/g.loadlog.R index ec3bcdc07..42c631d22 100644 --- a/R/g.loadlog.R +++ b/R/g.loadlog.R @@ -204,35 +204,48 @@ g.loadlog = function(loglocation = c(), coln1 = c(), colid = c(), } napcnt = nwcnt = iccnt = 1 IDcouldNotBeMatched = TRUE + dateformat_found = FALSE for (i in 1:nrow(S)) { # loop through rows in sleeplog ID = S[i,colid] if (ID %in% startdates$ID == TRUE) { # matching ID in acc data, if not ignore ID IDcouldNotBeMatched = FALSE startdate_acc = as.Date(startdates$startdate[which(startdates$ID == ID)], tz = desiredtz) - startdate_sleeplog = S[i, datecols[1]] + startdate_sleeplog = as.character(S[i, datecols[1:pmin(length(datecols), 5)]]) Sdates_correct = c() + dateformats_to_consider = c("%Y-%m-%d", "%d-%m-%Y", "%m-%d-%Y", "%Y-%d-%m", + "%y-%m-%d", "%d-%m-%y", "%m-%d-%y", "%y-%d-%m", + "%Y/%m/%d", "%d/%m/%Y", "%m/%d/%Y", "%Y/%d/%m", + "%y/%m/%d", "%d/%m/%y", "%m/%d/%y", "%y/%d/%m") + if (dateformat_found == TRUE && dateformats_to_consider[1] != dateformat_correct) { + # If found then first try that before trying anything else + dateformats_to_consider = unique(c(dateformat_correct, dateformats_to_consider)) + } # Detect data format in sleeplog: - for (dateformat in c("%Y-%m-%d", "%d-%m-%Y", "%m-%d-%Y", "%Y-%d-%m", - "%y-%m-%d", "%d-%m-%y", "%m-%d-%y", "%y-%d-%m", - "%Y/%m/%d", "%d/%m/%Y", "%m/%d/%Y", "%Y/%d/%m", - "%y/%m/%d", "%d/%m/%y", "%m/%d/%y", "%y/%d/%m")) { + for (dateformat in dateformats_to_consider) { startdate_sleeplog_tmp = as.Date(startdate_sleeplog, format = dateformat, tz = desiredtz) Sdates = as.Date(as.character(S[i,datecols]), format = dateformat, tz = desiredtz) if (length(which(diff(which(is.na(Sdates))) > 1)) > 0) { stop(paste0("\nSleeplog for ID: ", ID, " has missing date(s)"), call. = FALSE) } - if (is.na(startdate_sleeplog_tmp) == FALSE) { + if (all(is.na(startdate_sleeplog_tmp) == FALSE)) { deltadate = as.numeric(startdate_sleeplog_tmp - startdate_acc) - if (is.na(deltadate) == FALSE) { - if (abs(deltadate) < 30) { - startdate_sleeplog = startdate_sleeplog_tmp + if (all(is.na(deltadate) == FALSE)) { + if (all(abs(deltadate) < 30)) { + startdate_sleeplog = startdate_sleeplog_tmp[1] Sdates_correct = Sdates dateformat_correct = dateformat + deltadate = deltadate[1] + dateformat_found = TRUE break } } } } + if (deltadate > 300) { + warning(paste0("For ID ", ID, " the sleeplog start date is more than 300 days separated ", + "from the dates in the accelerometer recording, this may indicate a ", + "problem with date formats or their recognition, please check."), call. = FALSE) + } if (startdates$startAtMidnight[which(startdates$ID == ID)] == TRUE) { # If the first day in the advanced sleeplog is 28/11 # and the recording starts at midnight 27/11 00:00:00 @@ -251,6 +264,10 @@ g.loadlog = function(loglocation = c(), coln1 = c(), colid = c(), } else { # handle missing dates ndates = as.numeric(diff(range(Sdates_correct[!is.na(Sdates_correct)]))) + 1 + if (ndates > 300) warning(paste0("For ID ", ID, " the sleeplog has has ", + "more than 300 missing dates, this may ", + "indicate a problem with date format ", + "recognition. Please check."), call. = FALSE) if (ndates > nnights) { extraColumns = matrix("", nrow(newsleeplog), max(c((ndates - nnights)*2, 100)) + 1) newsleeplog = cbind(newsleeplog, extraColumns) @@ -260,6 +277,12 @@ g.loadlog = function(loglocation = c(), coln1 = c(), colid = c(), nonwearlog = cbind(nonwearlog, extraColumns) nnights = ndates } + if (startdate_sleeplog - deltadate > startdate_sleeplog + nnights) { + warning(paste0("Accelerometer recording for ID ", + ID, " does not overlap with sleeplog date", + " range"), call. = FALSE) + next + } # only attempt to use sleeplog if start date could be recognised # Add row to newsleeplog if somehow there are not enough rows if (count > nrow(newsleeplog)) { @@ -270,7 +293,8 @@ g.loadlog = function(loglocation = c(), coln1 = c(), colid = c(), } newsleeplog[count ,1] = ID newbedlog[count ,1] = ID - newsleeplog_times = newbedlog_times = c() + newsleeplog_times = newbedlog_times = rep("time", 100) + newCounter = 1 expected_dates = seq(startdate_sleeplog - deltadate, startdate_sleeplog + nnights, by = 1) # loop over expect dates giving start date of sleeplog for (ni in 1:(length(expected_dates) - 1)) { @@ -283,7 +307,11 @@ g.loadlog = function(loglocation = c(), coln1 = c(), colid = c(), } curdatecol = datecols[ind] nextdatecol = datecols[which(datecols > curdatecol)[1]] - if (is.na(nextdatecol)) nextdatecol = ncol(S) + 1 + lastday = FALSE + if (is.na(nextdatecol)) { + nextdatecol = ncol(S) + 1 + lastday = TRUE + } # Handle mixed reporting of time in bed and SPT" if (length(bedendcols) == 0 & length(bedstartcols) != 0 & length(onsetcols) == 0 & length(wakecols) != 0) { @@ -301,19 +329,29 @@ g.loadlog = function(loglocation = c(), coln1 = c(), colid = c(), } # Sleeplog: onseti = onsetcols[which(onsetcols > curdatecol & onsetcols < nextdatecol)] - wakeupi = wakecols[which(wakecols > nextdatecol)[1]] + if (lastday == FALSE) { + wakeupi = wakecols[which(wakecols > nextdatecol)[1]] + wakeuptime = S[i,wakeupi] + } else { + wakeuptime = "" + } if (length(onseti) == 1 & length(wakeupi) == 1) { - newsleeplog_times = c(newsleeplog_times, S[i,onseti], S[i,wakeupi]) + newsleeplog_times[newCounter:(newCounter + 1)] = c(S[i,onseti], wakeuptime) } else { - newsleeplog_times = c(newsleeplog_times, "", "") + newsleeplog_times[newCounter:(newCounter + 1)] = c("", "") } # time in bed bedstarti = bedstartcols[which(bedstartcols > curdatecol & bedstartcols < nextdatecol)] - bedendi = bedendcols[which(bedendcols > nextdatecol)[1]] + if (lastday == FALSE) { + bedendi = bedendcols[which(bedendcols > nextdatecol)[1]] + bedendtime = S[i,bedendi] + } else { + bedendtime = "" + } if (length(bedstarti) == 1 & length(bedendi) == 1) { - newbedlog_times = c(newbedlog_times, S[i,bedstarti], S[i,bedendi]) + newbedlog_times[newCounter:(newCounter + 1)] = c(S[i,bedstarti], bedendtime) } else { - newbedlog_times = c(newbedlog_times, "", "") + newbedlog_times[newCounter:(newCounter + 1)] = c("", "") } # Also grap nap, non-wear, and imputation code info and put those in separate matrices: naps = napcols[which(napcols > curdatecol & napcols < nextdatecol)] @@ -338,10 +376,17 @@ g.loadlog = function(loglocation = c(), coln1 = c(), colid = c(), iccnt = iccnt + 1 } } else { - newsleeplog_times = c(newsleeplog_times, "", "") - newbedlog_times = c(newbedlog_times, "", "") + newsleeplog_times[newCounter:(newCounter + 1)] = c("", "") + newbedlog_times[newCounter:(newCounter + 1)] = c("", "") + } + newCounter = newCounter + 2 + if (newCounter > length(newbedlog_times) - 5) { + newbedlog_times = c(newbedlog_times, rep("time", 100)) + newsleeplog_times = c(newsleeplog_times, rep("time", 100)) } } + newsleeplog_times = newsleeplog_times[which(newsleeplog_times != "time")] + newbedlog_times = newbedlog_times[which(newbedlog_times != "time")] # add columns to sleeplog extracols = (length(newsleeplog_times) + 2) - ncol(newsleeplog) if (extracols > 0) { @@ -411,12 +456,12 @@ g.loadlog = function(loglocation = c(), coln1 = c(), colid = c(), } nnights = nnights + deltadate + 1 # to account for the possibility of extra night at the beginning of recording # # From here we continue with original code focused on sleeplog only - if (exists("S") && ncol(S) > 0) { + if (exists("S") && ncol(S) > 0 && nnights > 0) { sleeplog = adjustLogFormat(S, nnights, mode = "sleeplog") } else { sleeplog = NULL } - if (exists("B")) { + if (exists("B") && nnights > 0) { bedlog = adjustLogFormat(B, nnights, mode = "bedlog") } else { bedlog = NULL diff --git a/R/g.part4.R b/R/g.part4.R index 8abf6fe42..32542b19b 100644 --- a/R/g.part4.R +++ b/R/g.part4.R @@ -32,15 +32,31 @@ g.part4 = function(datadir = c(), metadatadir = c(), f0 = f0, f1 = f1, dolog = FALSE } if (dolog == TRUE) { - logs_diaries = g.loadlog(params_sleep[["loglocation"]], coln1 = params_sleep[["coln1"]], colid = params_sleep[["colid"]], - meta.sleep.folder = meta.sleep.folder, - desiredtz = params_general[["desiredtz"]]) + sleeplogRDataFile = paste0(metadatadir,"/meta/sleeplog.RData") + # only re-process sleeplog if sleeplog.RData does not exist or if sleeplog + # is from a date equal to or after sleeplog.RData + if (!file.exists(sleeplogRDataFile) || + as.Date(file.info(params_sleep[["loglocation"]])$ctime) >= as.Date(file.info(sleeplogRDataFile)$ctime)) { + logs_diaries = g.loadlog(params_sleep[["loglocation"]], + coln1 = params_sleep[["coln1"]], + colid = params_sleep[["colid"]], + meta.sleep.folder = meta.sleep.folder, + desiredtz = params_general[["desiredtz"]]) + save(logs_diaries, file = sleeplogRDataFile) + } else { + load(file = sleeplogRDataFile) + } if (params_sleep[["sleepwindowType"]] == "TimeInBed" && length(logs_diaries$bedlog) > 0) { sleeplog = logs_diaries$bedlog } else { sleeplog = logs_diaries$sleeplog } - save(logs_diaries, file = paste0(metadatadir,"/meta/sleeplog.RData")) + sleeplog$night = as.numeric(sleeplog$night) + sleeplog$duration = as.numeric(sleeplog$duration) + if (is.null(logs_diaries$sleeplog) && is.null(logs_diaries$bedlog)) { + dolog = FALSE + rm(sleeplog) + } } #------------------------------------------------ # get list of accelerometer milestone data files from sleep (produced by g.part3) diff --git a/man/g.loadlog.Rd b/man/g.loadlog.Rd index 6a4cf07b9..a2d1e09c8 100644 --- a/man/g.loadlog.Rd +++ b/man/g.loadlog.Rd @@ -8,9 +8,9 @@ before storing the output in a dataframe } \usage{ - g.loadlog(loglocation=c(),coln1=c(),colid=c(), - sleeplogsep=",", meta.sleep.folder = c(), - desiredtz="") + g.loadlog(loglocation = c(), coln1 = c(), colid = c(), + sleeplogsep = ",", meta.sleep.folder = c(), + desiredtz = "") } \arguments{ \item{loglocation}{