From 72cd7536c1d9353206a9d8129e234c4322ed9d46 Mon Sep 17 00:00:00 2001 From: Jairo H Migueles Date: Mon, 23 Oct 2023 13:08:01 +0200 Subject: [PATCH 01/13] log timegaps found in actigraph files in M --- R/g.getmeta.R | 2 ++ R/g.imputeTimegaps.R | 13 ++++++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/R/g.getmeta.R b/R/g.getmeta.R index 2944838cc..7326f079a 100644 --- a/R/g.getmeta.R +++ b/R/g.getmeta.R @@ -320,6 +320,8 @@ g.getmeta = function(datafile, params_metrics = c(), params_rawdata = c(), PreviousLastValue = PreviousLastValue, PreviousLastTime = PreviousLastTime, epochsize = c(ws3, ws2)) + QClog = rbind(QClog, P$QClog) + P = P$P PreviousLastValue = as.numeric(P[nrow(P), c("X", "Y", "Z")]) PreviousLastTime = as.POSIXct(P[nrow(P), "time"]) } diff --git a/R/g.imputeTimegaps.R b/R/g.imputeTimegaps.R index 9c33f4a56..fba1780a9 100644 --- a/R/g.imputeTimegaps.R +++ b/R/g.imputeTimegaps.R @@ -7,6 +7,8 @@ g.imputeTimegaps = function(x, xyzCol, timeCol = c(), sf, k=0.25, impute = TRUE, } # dummy variables to control the process remove_time_at_end = dummyTime = FirstRowZeros = imputelast = FALSE + # initialize numberofgaps and GapsLength + NumberOfGaps = GapsLength = NULL # add temporary timecolumn to enable timegap imputation where there are zeros if (length(timeCol) == 1) { if (!(timeCol %in% colnames(x))) dummyTime = TRUE @@ -102,6 +104,7 @@ g.imputeTimegaps = function(x, xyzCol, timeCol = c(), sf, k=0.25, impute = TRUE, if (NumberOfGaps > 0) { x$gap = 1 x$gap[gapsi] = round(deltatime[gapsi] * sf) # as.integer was problematic many decimals close to wholenumbers (but not whole numbers) resulting in 1 row less than expected + GapsLength = sum(x$gap[gapsi]) # normalisation to 1 G normalise = which(x$gap > 1) for (i_normalise in normalise) { @@ -183,8 +186,16 @@ g.imputeTimegaps = function(x, xyzCol, timeCol = c(), sf, k=0.25, impute = TRUE, if (remove_time_at_end == TRUE) { x = x[, grep(pattern = "time", x = colnames(x), invert = TRUE)] } + # keep only timestamp column if (all(c("time", "timestamp") %in% colnames(x))) { x = x[, grep(pattern = "timestamp", x = colnames(x), invert = TRUE)] } - return(x) + # QClog + start = as.numeric(as.POSIXct(x[1,1])) + end = as.numeric(as.POSIXct(x[nrow(x),1])) + QClog = data.frame(start = start, end = end, + blockLengthSeconds = (end - start) / sf, + timegaps_n = NumberOfGaps, timegaps_length = GapsLength) + # return data and QClog + return(list(P = x, QClog = QClog)) } \ No newline at end of file From 5e498f1b8d6998ec8dc7935fd5fb30dec4d01edf Mon Sep 17 00:00:00 2001 From: Jairo H Migueles Date: Mon, 30 Oct 2023 10:32:54 +0100 Subject: [PATCH 02/13] log QClog with idle sleep mode info --- R/g.getmeta.R | 2 ++ R/g.imputeTimegaps.R | 1 + 2 files changed, 3 insertions(+) diff --git a/R/g.getmeta.R b/R/g.getmeta.R index 7326f079a..adca4aeb9 100644 --- a/R/g.getmeta.R +++ b/R/g.getmeta.R @@ -284,6 +284,8 @@ g.getmeta = function(datafile, params_metrics = c(), params_rawdata = c(), PreviousLastValue = PreviousLastValue, PreviousLastTime = PreviousLastTime, epochsize = c(ws3, ws2)) + QClog = rbind(QClog, P$QClog) + P = P$P PreviousLastValue = as.numeric(P[nrow(P), xyzCol]) if (is.null(timeCol)) PreviousLastTime = NULL else PreviousLastTime = as.POSIXct(P[nrow(P), timeCol]) } diff --git a/R/g.imputeTimegaps.R b/R/g.imputeTimegaps.R index fba1780a9..995cc1c21 100644 --- a/R/g.imputeTimegaps.R +++ b/R/g.imputeTimegaps.R @@ -193,6 +193,7 @@ g.imputeTimegaps = function(x, xyzCol, timeCol = c(), sf, k=0.25, impute = TRUE, # QClog start = as.numeric(as.POSIXct(x[1,1])) end = as.numeric(as.POSIXct(x[nrow(x),1])) + if (is.null(GapsLength)) GapsLength = 0 QClog = data.frame(start = start, end = end, blockLengthSeconds = (end - start) / sf, timegaps_n = NumberOfGaps, timegaps_length = GapsLength) From 0ab2e75c2f722d47a2c641043c4794dc27f97749 Mon Sep 17 00:00:00 2001 From: Jairo H Migueles Date: Mon, 30 Oct 2023 11:30:19 +0100 Subject: [PATCH 03/13] fixed issue with duplicated second when appending gt3x data --- R/g.readaccfile.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/g.readaccfile.R b/R/g.readaccfile.R index 253680114..5e642df13 100644 --- a/R/g.readaccfile.R +++ b/R/g.readaccfile.R @@ -38,8 +38,8 @@ g.readaccfile = function(filename, blocksize, blocknumber, filequality, # endpage and the blocksize. if (blocknumber != 1 & length(PreviousEndPage) != 0) { # if ((mon == MONITOR$GENEACTIV && dformat == FORMAT$BIN) || dformat == FORMAT$CSV) { # change this line as the csv data do not need to skip one more row (the skip argument in read.csv does not include this row of the dataset) - if (mon == MONITOR$GENEACTIV && dformat == FORMAT$BIN) { - # only in GENEActiv binary data and for csv format data + if ((mon == MONITOR$GENEACTIV && dformat == FORMAT$BIN) | dformat == FORMAT$GT3X) { + # only in GENEActiv binary data and for gt3x format data # page selection is defined from start to end (including end) startpage = PreviousEndPage + 1 } else { From 981a5a3430e0d8f027338a0b927b60c892314bea Mon Sep 17 00:00:00 2001 From: Jairo H Migueles Date: Mon, 30 Oct 2023 11:37:12 +0100 Subject: [PATCH 04/13] update NEWS --- NEWS.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/NEWS.md b/NEWS.md index ff95b6f83..784786daf 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,9 @@ - Part 2: Bug fix in csv report generation when appending part 2 milestone data with differing number of columns #951 +- Part 1: Bug fix in gt3x data reading by avoiding duplication of seconds when appending +chunks of data #952 + # CHANGES IN GGIR VERSION 3.0-0 - Part 1 and 2: Change default value for nonwear_approach to "2023" which labels the entire window as nonwear if conditions are met. This instead of only the middle 15 minutes as is the case for "2013" setting. Further, setting "2023" now uses a 5 Hertz version of the signals for non-wear detection, for clipping detection the code uses the original signal. From d7ba079740d140094aa544e84d5a3753b20b322d Mon Sep 17 00:00:00 2001 From: Jairo H Migueles Date: Mon, 30 Oct 2023 13:06:12 +0100 Subject: [PATCH 05/13] make changes in g.imputetimegaps compatible with csv readings and extend tests --- R/g.calibrate.R | 2 +- R/g.imputeTimegaps.R | 7 ++++--- R/read.myacc.csv.R | 1 + tests/testthat/test_imputeTimegaps.R | 19 +++++++++++++++++++ 4 files changed, 25 insertions(+), 4 deletions(-) diff --git a/R/g.calibrate.R b/R/g.calibrate.R index f096ffded..e5366df74 100644 --- a/R/g.calibrate.R +++ b/R/g.calibrate.R @@ -160,7 +160,7 @@ g.calibrate = function(datafile, params_rawdata = c(), zeros = which(data[,1] == 0 & data[,2] == 0 & data[,3] == 0) if ((mon == MONITOR$ACTIGRAPH && dformat == FORMAT$CSV) || length(zeros) > 0) { data = g.imputeTimegaps(x = as.data.frame(data), xyzCol = 1:3, timeCol = c(), sf = sf, impute = FALSE) - data = as.matrix(data) + data = as.matrix(data$P) } LD = nrow(data) #store data that could not be used for this block, but will be added to next block diff --git a/R/g.imputeTimegaps.R b/R/g.imputeTimegaps.R index 995cc1c21..2ac123f03 100644 --- a/R/g.imputeTimegaps.R +++ b/R/g.imputeTimegaps.R @@ -104,7 +104,7 @@ g.imputeTimegaps = function(x, xyzCol, timeCol = c(), sf, k=0.25, impute = TRUE, if (NumberOfGaps > 0) { x$gap = 1 x$gap[gapsi] = round(deltatime[gapsi] * sf) # as.integer was problematic many decimals close to wholenumbers (but not whole numbers) resulting in 1 row less than expected - GapsLength = sum(x$gap[gapsi]) + GapsLength = sum(x$gap[gapsi]) - NumberOfGaps # - numberOfGaps because x$gap == 1 means no gap # normalisation to 1 G normalise = which(x$gap > 1) for (i_normalise in normalise) { @@ -192,11 +192,12 @@ g.imputeTimegaps = function(x, xyzCol, timeCol = c(), sf, k=0.25, impute = TRUE, } # QClog start = as.numeric(as.POSIXct(x[1,1])) - end = as.numeric(as.POSIXct(x[nrow(x),1])) + end = start + nrow(x) if (is.null(GapsLength)) GapsLength = 0 + if (is.null(NumberOfGaps)) NumberOfGaps = 0 QClog = data.frame(start = start, end = end, blockLengthSeconds = (end - start) / sf, - timegaps_n = NumberOfGaps, timegaps_length = GapsLength) + timegaps_n = NumberOfGaps, timegaps_s = GapsLength/sf) # return data and QClog return(list(P = x, QClog = QClog)) } \ No newline at end of file diff --git a/R/read.myacc.csv.R b/R/read.myacc.csv.R index 9663645b6..30748092e 100644 --- a/R/read.myacc.csv.R +++ b/R/read.myacc.csv.R @@ -311,6 +311,7 @@ read.myacc.csv = function(rmc.file=c(), rmc.nrow=Inf, rmc.skip=c(), rmc.dec=".", P = g.imputeTimegaps(P, xyzCol = c("accx", "accy", "accz"), timeCol = "timestamp", sf = sf, k = 0.25, PreviousLastValue = PreviousLastValue, PreviousLastTime = PreviousLastTime, epochsize = NULL) + P = P$P PreviousLastValue = as.numeric(P[nrow(P), c("accx", "accy", "accz")]) PreviousLastTime = as.POSIXct(P[nrow(P), "timestamp"]) } diff --git a/tests/testthat/test_imputeTimegaps.R b/tests/testthat/test_imputeTimegaps.R index c0612b74c..54a6e1b6b 100644 --- a/tests/testthat/test_imputeTimegaps.R +++ b/tests/testthat/test_imputeTimegaps.R @@ -17,19 +17,25 @@ test_that("timegaps are correctly imputed", { # Format 1: with timestamp & with timegaps (no zeroes, incomplete dataset) x1 = x[-zeros,] x1_imputed = g.imputeTimegaps(x1, xyzCol, timeCol = "time", sf = sf, k = 2/sf, impute = TRUE, PreviousLastValue = c(0,0,1)) + x1_imputed_QClog = x1_imputed$QClog; x1_imputed = x1_imputed$P x1_removed = g.imputeTimegaps(x1, xyzCol, timeCol = "time", sf = sf, k = 2/sf, impute = FALSE, PreviousLastValue = c(0,0,1)) + x1_removed_QClog = x1_removed$QClog; x1_removed = x1_removed$P # Format 2: with timestamp & with zeros (complete dataset) x2 = x x2[zeros, xyzCol] = 0 x2_imputed = g.imputeTimegaps(x2, xyzCol, timeCol = "time", sf = sf, k = 2/sf, impute = TRUE, PreviousLastValue = c(0,0,1)) + x2_imputed_QClog = x2_imputed$QClog; x2_imputed = x2_imputed$P x2_removed = g.imputeTimegaps(x2, xyzCol, timeCol = "time", sf = sf, k = 2/sf, impute = FALSE, PreviousLastValue = c(0,0,1)) + x2_removed_QClog = x2_removed$QClog; x2_removed = x2_removed$P # Format 3: without timestamp & with zeros (complete dataset) x3 = x_without_time x3[zeros, xyzCol] = 0 x3_imputed = g.imputeTimegaps(x3, xyzCol, timeCol = "time", sf = sf, k = 2/sf, impute = TRUE, PreviousLastValue = c(0,0,1)) + x3_imputed_QClog = x3_imputed$QClog; x3_imputed = x3_imputed$P x3_removed = g.imputeTimegaps(x3, xyzCol, timeCol = "time", sf = sf, k = 2/sf, impute = FALSE, PreviousLastValue = c(0,0,1)) + x3_removed_QClog = x3_removed$QClog; x3_removed = x3_removed$P # tests number of rows expect_equal(nrow(x1_imputed), N) @@ -47,6 +53,15 @@ test_that("timegaps are correctly imputed", { expect_equal(x1_removed$X, x2_removed$X) expect_equal(x1_removed$X, x3_removed$X) + # test QClog + expect_equal(x1_imputed_QClog$timegaps_n, 4) + expect_equal(x2_imputed_QClog$timegaps_n, 4) + expect_equal(x3_imputed_QClog$timegaps_n, 4) + + expect_equal(x1_imputed_QClog$timegaps_s, length(zeros)/sf) + expect_equal(x2_imputed_QClog$timegaps_s, length(zeros)/sf) + expect_equal(x3_imputed_QClog$timegaps_s, length(zeros)/sf) + # TEST IMPUTATION WHEN FIRST ROW IS NOT CONSECUTIVE TO PREVIOUS CHUNK ---- # Format 4: with timestamp & with timegaps (no zeroes, incomplete dataset) x4 = x[-zeros,] @@ -54,8 +69,10 @@ test_that("timegaps are correctly imputed", { suppressWarnings({ # warning arising from made up PreviousLastTime x4_imputed = g.imputeTimegaps(x4, xyzCol, timeCol = "time", sf = sf, k = 2/sf, impute = TRUE, PreviousLastValue = c(0,0,1), PreviousLastTime = PreviousLastTime) + x4_imputed_QClog = x4_imputed$QClog; x4_imputed = x4_imputed$P x4_removed = g.imputeTimegaps(x4, xyzCol, timeCol = "time", sf = sf, k = 2/sf, impute = FALSE, PreviousLastValue = c(0,0,1), PreviousLastTime = PreviousLastTime) + x4_removed_QClog = x4_removed$QClog; x4_removed = x4_removed$P }) expect_equal(nrow(x4_imputed), N + sf*30) @@ -67,7 +84,9 @@ test_that("timegaps are correctly imputed", { x5 = x x5[zeros, xyzCol] = 0 x5_imputed = g.imputeTimegaps(x5, xyzCol, timeCol = "time", sf = sf, k = 2/sf, impute = TRUE, PreviousLastValue = c(0,0,1)) + x5_imputed_QClog = x5_imputed$QClog; x5_imputed = x5_imputed$P x5_removed = g.imputeTimegaps(x5, xyzCol, timeCol = "time", sf = sf, k = 2/sf, impute = FALSE, PreviousLastValue = c(0,0,1)) + x5_removed_QClog = x5_removed$QClog; x5_removed = x5_removed$P expect_equal(nrow(x5_imputed), N) expect_equal(nrow(x5_removed), N - length(zeros)) From 89efeeed9b32d962a9475c9f55b82fe725c17d5a Mon Sep 17 00:00:00 2001 From: Jairo H Migueles Date: Mon, 30 Oct 2023 17:01:05 +0100 Subject: [PATCH 06/13] log imputed time and blocks in QC when time gaps are found in data --- R/g.analyse.R | 79 +++++++++++++++++----------- R/g.analyse.perfile.R | 40 ++++++++------ R/g.imputeTimegaps.R | 6 ++- tests/testthat/test_imputeTimegaps.R | 6 +-- 4 files changed, 79 insertions(+), 52 deletions(-) diff --git a/R/g.analyse.R b/R/g.analyse.R index 93423039b..5de78dbf0 100644 --- a/R/g.analyse.R +++ b/R/g.analyse.R @@ -292,7 +292,7 @@ g.analyse = function(I, C, M, IMP, params_247 = c(), params_phyact = c(), startt = startt) if (!is.null(M$QClog)) { - # Summarise the QC log (currently only expected from cwa Axivity files) + # Summarise the QC log (currently only expected from cwa Axivity, actigraph, and csv files) QCsummarise = function(QClog, wx) { x = ifelse(test = length(wx) > 0, yes = sum(QClog$end[wx] - QClog$start[wx]) / 60, @@ -300,44 +300,59 @@ g.analyse = function(I, C, M, IMP, params_247 = c(), params_phyact = c(), return(x) } # total imputation - impdone = which(M$QClog$imputed == TRUE) - file_summary$Dur_imputed = QCsummarise(M$QClog, impdone) - file_summary$Nblocks_imputed = length(impdone) + if ("imputed" %in% colnames(M$QClog)) { + impdone = which(M$QClog$imputed == TRUE) + if (any(colnames(M$QClog) == "timegaps_min")) { + file_summary$Dur_imputed = sum(M$QClog$timegaps_min) + file_summary$Nblocks_imputed = sum(M$QClog$timegaps_n) + } else { + file_summary$Dur_imputed = QCsummarise(M$QClog, impdone) + file_summary$Nblocks_imputed = length(impdone) + } + } # checksum - chsum_failed = which(M$QClog$checksum_pass == FALSE) - file_summary$Dur_chsum_failed = QCsummarise(M$QClog, chsum_failed) - file_summary$Nblocks_chsum_failed = length(chsum_failed) + if ("checksums_pass" %in% colnames(M$QClog)) { + chsum_failed = which(M$QClog$checksum_pass == FALSE) + file_summary$Dur_chsum_failed = QCsummarise(M$QClog, chsum_failed) + file_summary$Nblocks_chsum_failed = length(chsum_failed) + + } # nonincremental block ID - nonincremental = which(M$QClog$blockID_current - M$QClog$blockID_next != 1) - file_summary$Dur_nonincremental = QCsummarise(M$QClog, nonincremental) - file_summary$Nblocks_nonincremental = length(nonincremental) + if ("blockID_current" %in% colnames(M$QClog)) { + nonincremental = which(M$QClog$blockID_current - M$QClog$blockID_next != 1) + file_summary$Dur_nonincremental = QCsummarise(M$QClog, nonincremental) + file_summary$Nblocks_nonincremental = length(nonincremental) + } # sampling frequency issues - freqBlockHead = M$QClog$frequency_blockheader - frequency_bias = abs(M$QClog$frequency_observed - freqBlockHead) / freqBlockHead - - freqissue = which(frequency_bias >= 0.05 & frequency_bias < 0.1) - file_summary$Dur_freqissue_5_10 = QCsummarise(M$QClog, freqissue) - file_summary$Nblock_freqissue_5_10 = length(freqissue) - - freqissue = which(frequency_bias >= 0.1 & frequency_bias < 0.2) - file_summary$Dur_freqissue_10_20 = QCsummarise(M$QClog, freqissue) - file_summary$Nblock_freqissue_10_20 = length(freqissue) - - freqissue = which(frequency_bias >= 0.2 & frequency_bias < 0.3) - file_summary$Dur_freqissue_20_30 = QCsummarise(M$QClog, freqissue) - file_summary$Nblock_freqissue_20_30 = length(freqissue) - - freqissue = which(frequency_bias >= 0.3) - file_summary$Dur_freqissue_30 = QCsummarise(M$QClog, freqissue) - file_summary$Nblock_freqissue_30 = length(freqissue) + if ("frequency_blockheader" %in% colnames(M$QClog)) { + freqBlockHead = M$QClog$frequency_blockheader + frequency_bias = abs(M$QClog$frequency_observed - freqBlockHead) / freqBlockHead + } + if ("frequency_bias" %in% colnames(M$QClog)) { + freqissue = which(frequency_bias >= 0.05 & frequency_bias < 0.1) + file_summary$Dur_freqissue_5_10 = QCsummarise(M$QClog, freqissue) + file_summary$Nblock_freqissue_5_10 = length(freqissue) + + freqissue = which(frequency_bias >= 0.1 & frequency_bias < 0.2) + file_summary$Dur_freqissue_10_20 = QCsummarise(M$QClog, freqissue) + file_summary$Nblock_freqissue_10_20 = length(freqissue) + + freqissue = which(frequency_bias >= 0.2 & frequency_bias < 0.3) + file_summary$Dur_freqissue_20_30 = QCsummarise(M$QClog, freqissue) + file_summary$Nblock_freqissue_20_30 = length(freqissue) + + freqissue = which(frequency_bias >= 0.3) + file_summary$Dur_freqissue_30 = QCsummarise(M$QClog, freqissue) + file_summary$Nblock_freqissue_30 = length(freqissue) + } } - + metrics_nav = list(lookat = lookat, - colnames_to_lookat = colnames_to_lookat, - longitudinal_axis_id = longitudinal_axis_id) + colnames_to_lookat = colnames_to_lookat, + longitudinal_axis_id = longitudinal_axis_id) output_perfile = g.analyse.perfile(I, C, metrics_nav, AveAccAve24hr, doquan, doiglevels, tooshort, @@ -348,7 +363,7 @@ g.analyse = function(I, C, M, IMP, params_247 = c(), params_phyact = c(), output_perday = output_perday, dataqual_summary = dataqual_summary, file_summary = file_summary) - + filesummary = output_perfile$filesummary daysummary = output_perfile$daysummary diff --git a/R/g.analyse.perfile.R b/R/g.analyse.perfile.R index 23c51118a..9240034f2 100644 --- a/R/g.analyse.perfile.R +++ b/R/g.analyse.perfile.R @@ -64,8 +64,9 @@ g.analyse.perfile = function(I, C, metrics_nav, s_names[vi:(vi + q0)] = c("calib_err", "calib_status", colnames_to_lookat) vi = vi + q0 + 2 - # readAxivity QClog summary - if ("Dur_imputed" %in% names(file_summary)) { + # QClog summary + if ("Dur_chsum_failed" %in% names(file_summary)) { + # readAxivity QClog # These are summaries of the file health check by the GGIRread::readAxivity # the function handles data blocks (1-3 seconds) with faulty data by imputing # them and logging the information. @@ -101,6 +102,15 @@ g.analyse.perfile = function(I, C, metrics_nav, "filehealth_fbias2030_N", "filehealth_fbias30_N") vi = vi + 7 + } else if ("Dur_imputed" %in% names(file_summary)) { + # ActiGraph QClog + # This also logs time gaps in ActiGraph files, which correspond with periods + # in which the idle sleep mode has been activated + filesummary[vi:(vi + 1)] = c(file_summary$Dur_imputed, # total imputed + file_summary$Nblocks_imputed) + s_names[vi:(vi + 1)] = c("filehealth_totimp_min", + "filehealth_totimp_N") + vi = vi + 2 } #quantile, ML5, and intensity gradient variables @@ -166,20 +176,20 @@ g.analyse.perfile = function(I, C, metrics_nav, "cosinor_acrotime", "cosinor_ndays", "cosinor_R2") vi = vi + 6 try(expr = {filesummary[vi:(vi + 10)] = c(cosinor_coef$coefext$params$minimum, - cosinor_coef$coefext$params$amp, - cosinor_coef$coefext$params$alpha, - cosinor_coef$coefext$params$beta, - cosinor_coef$coefext$params$acrotime, - cosinor_coef$coefext$params$UpMesor, - cosinor_coef$coefext$params$DownMesor, - cosinor_coef$coefext$params$MESOR, - cosinor_coef$coefext$params$ndays, - cosinor_coef$coefext$params$F_pseudo, - cosinor_coef$coefext$params$R2)}, silent = TRUE) + cosinor_coef$coefext$params$amp, + cosinor_coef$coefext$params$alpha, + cosinor_coef$coefext$params$beta, + cosinor_coef$coefext$params$acrotime, + cosinor_coef$coefext$params$UpMesor, + cosinor_coef$coefext$params$DownMesor, + cosinor_coef$coefext$params$MESOR, + cosinor_coef$coefext$params$ndays, + cosinor_coef$coefext$params$F_pseudo, + cosinor_coef$coefext$params$R2)}, silent = TRUE) s_names[vi:(vi + 10)] = c("cosinorExt_minimum", "cosinorExt_amp", "cosinorExt_alpha", - "cosinorExt_beta", "cosinorExt_acrotime", "cosinorExt_UpMesor", - "cosinorExt_DownMesor", "cosinorExt_MESOR", - "cosinorExt_ndays", "cosinorExt_F_pseudo", "cosinorExt_R2") + "cosinorExt_beta", "cosinorExt_acrotime", "cosinorExt_UpMesor", + "cosinorExt_DownMesor", "cosinorExt_MESOR", + "cosinorExt_ndays", "cosinorExt_F_pseudo", "cosinorExt_R2") vi = vi + 11 filesummary[vi:(vi + 1)] = c(cosinor_coef$IVIS$InterdailyStability, cosinor_coef$IVIS$IntradailyVariability) diff --git a/R/g.imputeTimegaps.R b/R/g.imputeTimegaps.R index 2ac123f03..f8e036b8b 100644 --- a/R/g.imputeTimegaps.R +++ b/R/g.imputeTimegaps.R @@ -195,9 +195,11 @@ g.imputeTimegaps = function(x, xyzCol, timeCol = c(), sf, k=0.25, impute = TRUE, end = start + nrow(x) if (is.null(GapsLength)) GapsLength = 0 if (is.null(NumberOfGaps)) NumberOfGaps = 0 - QClog = data.frame(start = start, end = end, + imputed = NumberOfGaps > 0 + QClog = data.frame(imputed = imputed, + start = start, end = end, blockLengthSeconds = (end - start) / sf, - timegaps_n = NumberOfGaps, timegaps_s = GapsLength/sf) + timegaps_n = NumberOfGaps, timegaps_min = GapsLength/sf/60) # return data and QClog return(list(P = x, QClog = QClog)) } \ No newline at end of file diff --git a/tests/testthat/test_imputeTimegaps.R b/tests/testthat/test_imputeTimegaps.R index 54a6e1b6b..7b2300b8e 100644 --- a/tests/testthat/test_imputeTimegaps.R +++ b/tests/testthat/test_imputeTimegaps.R @@ -58,9 +58,9 @@ test_that("timegaps are correctly imputed", { expect_equal(x2_imputed_QClog$timegaps_n, 4) expect_equal(x3_imputed_QClog$timegaps_n, 4) - expect_equal(x1_imputed_QClog$timegaps_s, length(zeros)/sf) - expect_equal(x2_imputed_QClog$timegaps_s, length(zeros)/sf) - expect_equal(x3_imputed_QClog$timegaps_s, length(zeros)/sf) + expect_equal(x1_imputed_QClog$timegaps_min, length(zeros)/sf/60) + expect_equal(x2_imputed_QClog$timegaps_min, length(zeros)/sf/60) + expect_equal(x3_imputed_QClog$timegaps_min, length(zeros)/sf/60) # TEST IMPUTATION WHEN FIRST ROW IS NOT CONSECUTIVE TO PREVIOUS CHUNK ---- # Format 4: with timestamp & with timegaps (no zeroes, incomplete dataset) From 38e49faf6ef66825a66ee7aed791c8abe9c37eaa Mon Sep 17 00:00:00 2001 From: Jairo H Migueles Date: Mon, 30 Oct 2023 17:18:47 +0100 Subject: [PATCH 07/13] update documentation --- man/g.imputeTimegaps.Rd | 5 ++++- vignettes/GGIR.Rmd | 4 ++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/man/g.imputeTimegaps.Rd b/man/g.imputeTimegaps.Rd index 3ee787536..aa7d4efee 100644 --- a/man/g.imputeTimegaps.Rd +++ b/man/g.imputeTimegaps.Rd @@ -43,8 +43,11 @@ } } \value{ - Data.frame based on input x with timegaps imputed (as default) or with + List including: + - P, data.frame based on input x with timegaps imputed (as default) or with recordings with 0 values in the three axes removed (if impute = FALSE) + - QClog, data.frame with information on the number of time gaps found and the + total time imputed in minutes } \author{ Vincent T van Hees diff --git a/vignettes/GGIR.Rmd b/vignettes/GGIR.Rmd index edc05be18..55e8f2624 100644 --- a/vignettes/GGIR.Rmd +++ b/vignettes/GGIR.Rmd @@ -788,14 +788,14 @@ The data_quality_report.csv is stored in subfolder folder results/QC. | mean.temp | Mean temperature in sphere data | | device.serial.number | Device serial number | | NFilePagesSkipped | (Only for Axivity .cwa format) Number of data blocks skipped | -| filehealth_totimp_min | (Only for Axivity .cwa format) Total number of minutes of data imputed | +| filehealth_totimp_min | Total number of minutes of data imputed | | filehealth_checksumfail_min | (Only for Axivity .cwa format) Total number of minutes of data where the checksum failed | | filehealth_niblockid_min | (Only for Axivity .cwa format) Total number of minutes of data with non-incremental block ids | | filehealth_fbias0510_min | (Only for Axivity .cwa format) Total number of minutes with a sampling frequency bias between 5 and 10% | | filehealth_fbias1020_min | (Only for Axivity .cwa format) Total number of minutes with a sampling frequency bias between 10 and 20% | | filehealth_fbias2030_min | (Only for Axivity .cwa format) Total number of minutes with a sampling frequency bias between 20 and 30% | | filehealth_fbias30_min | (Only for Axivity .cwa format) Total number of minutes with a sampling frequency bias higher than 30% | -| filehealth_totimp_N | (Only for Axivity .cwa format) Total number of data blocks that were imputed | +| filehealth_totimp_N | Total number of data blocks that were imputed | | filehealth_checksumfail_N | (Only for Axivity .cwa format) Total number of blocks where the checksum failed | | filehealth_niblockid_N | (Only for Axivity .cwa format) Total number of data blocks with non-incremental block ids | | filehealth_fbias0510_N | (Only for Axivity .cwa format) Total number of data blocks with a sampling frequency bias between 5 and 10% | From a202276a1cfb1c0a58a15cd3bc5d3aa591842233 Mon Sep 17 00:00:00 2001 From: Jairo H Migueles Date: Mon, 30 Oct 2023 17:20:48 +0100 Subject: [PATCH 08/13] update NEWS --- NEWS.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index 784786daf..eb448c6c1 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,8 +2,9 @@ - Part 2: Bug fix in csv report generation when appending part 2 milestone data with differing number of columns #951 -- Part 1: Bug fix in gt3x data reading by avoiding duplication of seconds when appending -chunks of data #952 +- Part 1: Bug fix in gt3x data reading by avoiding duplication of seconds when appending chunks of data #952 + +- Part 1 + 2: Log information regarding number of time gaps and total time imputed with function g.imputeTimeGaps #571 # CHANGES IN GGIR VERSION 3.0-0 From eae9c8ac46160210ca1dfc8ab82228027f5c6e8b Mon Sep 17 00:00:00 2001 From: Jairo H Migueles Date: Mon, 30 Oct 2023 17:53:56 +0100 Subject: [PATCH 09/13] attempt to fix bug in ubuntu-latest - set origin in as.POSIXct --- R/g.imputeTimegaps.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/g.imputeTimegaps.R b/R/g.imputeTimegaps.R index f8e036b8b..3a66f57ff 100644 --- a/R/g.imputeTimegaps.R +++ b/R/g.imputeTimegaps.R @@ -191,7 +191,7 @@ g.imputeTimegaps = function(x, xyzCol, timeCol = c(), sf, k=0.25, impute = TRUE, x = x[, grep(pattern = "timestamp", x = colnames(x), invert = TRUE)] } # QClog - start = as.numeric(as.POSIXct(x[1,1])) + start = as.numeric(as.POSIXct(x[1,1], origin = "1970-1-1")) end = start + nrow(x) if (is.null(GapsLength)) GapsLength = 0 if (is.null(NumberOfGaps)) NumberOfGaps = 0 From bc591f23db5389853d748f1db36a31cd4480d3fc Mon Sep 17 00:00:00 2001 From: Vincent van Hees Date: Sun, 5 Nov 2023 13:41:59 +0100 Subject: [PATCH 10/13] rename P to x as P is already used in g.readaccfile and having the same name would be confusing --- R/g.calibrate.R | 2 +- R/g.getmeta.R | 4 ++-- R/g.imputeTimegaps.R | 2 +- R/read.myacc.csv.R | 2 +- tests/testthat/test_imputeTimegaps.R | 20 ++++++++++---------- 5 files changed, 15 insertions(+), 15 deletions(-) diff --git a/R/g.calibrate.R b/R/g.calibrate.R index e5366df74..96353164a 100644 --- a/R/g.calibrate.R +++ b/R/g.calibrate.R @@ -160,7 +160,7 @@ g.calibrate = function(datafile, params_rawdata = c(), zeros = which(data[,1] == 0 & data[,2] == 0 & data[,3] == 0) if ((mon == MONITOR$ACTIGRAPH && dformat == FORMAT$CSV) || length(zeros) > 0) { data = g.imputeTimegaps(x = as.data.frame(data), xyzCol = 1:3, timeCol = c(), sf = sf, impute = FALSE) - data = as.matrix(data$P) + data = as.matrix(data$x) } LD = nrow(data) #store data that could not be used for this block, but will be added to next block diff --git a/R/g.getmeta.R b/R/g.getmeta.R index adca4aeb9..8a4649387 100644 --- a/R/g.getmeta.R +++ b/R/g.getmeta.R @@ -285,7 +285,7 @@ g.getmeta = function(datafile, params_metrics = c(), params_rawdata = c(), PreviousLastTime = PreviousLastTime, epochsize = c(ws3, ws2)) QClog = rbind(QClog, P$QClog) - P = P$P + P = P$x PreviousLastValue = as.numeric(P[nrow(P), xyzCol]) if (is.null(timeCol)) PreviousLastTime = NULL else PreviousLastTime = as.POSIXct(P[nrow(P), timeCol]) } @@ -323,7 +323,7 @@ g.getmeta = function(datafile, params_metrics = c(), params_rawdata = c(), PreviousLastTime = PreviousLastTime, epochsize = c(ws3, ws2)) QClog = rbind(QClog, P$QClog) - P = P$P + P = P$x PreviousLastValue = as.numeric(P[nrow(P), c("X", "Y", "Z")]) PreviousLastTime = as.POSIXct(P[nrow(P), "time"]) } diff --git a/R/g.imputeTimegaps.R b/R/g.imputeTimegaps.R index 3a66f57ff..17627b25c 100644 --- a/R/g.imputeTimegaps.R +++ b/R/g.imputeTimegaps.R @@ -201,5 +201,5 @@ g.imputeTimegaps = function(x, xyzCol, timeCol = c(), sf, k=0.25, impute = TRUE, blockLengthSeconds = (end - start) / sf, timegaps_n = NumberOfGaps, timegaps_min = GapsLength/sf/60) # return data and QClog - return(list(P = x, QClog = QClog)) + return(list(x = x, QClog = QClog)) } \ No newline at end of file diff --git a/R/read.myacc.csv.R b/R/read.myacc.csv.R index 30748092e..acca94ab8 100644 --- a/R/read.myacc.csv.R +++ b/R/read.myacc.csv.R @@ -311,7 +311,7 @@ read.myacc.csv = function(rmc.file=c(), rmc.nrow=Inf, rmc.skip=c(), rmc.dec=".", P = g.imputeTimegaps(P, xyzCol = c("accx", "accy", "accz"), timeCol = "timestamp", sf = sf, k = 0.25, PreviousLastValue = PreviousLastValue, PreviousLastTime = PreviousLastTime, epochsize = NULL) - P = P$P + P = P$x PreviousLastValue = as.numeric(P[nrow(P), c("accx", "accy", "accz")]) PreviousLastTime = as.POSIXct(P[nrow(P), "timestamp"]) } diff --git a/tests/testthat/test_imputeTimegaps.R b/tests/testthat/test_imputeTimegaps.R index 7b2300b8e..86b44f269 100644 --- a/tests/testthat/test_imputeTimegaps.R +++ b/tests/testthat/test_imputeTimegaps.R @@ -17,25 +17,25 @@ test_that("timegaps are correctly imputed", { # Format 1: with timestamp & with timegaps (no zeroes, incomplete dataset) x1 = x[-zeros,] x1_imputed = g.imputeTimegaps(x1, xyzCol, timeCol = "time", sf = sf, k = 2/sf, impute = TRUE, PreviousLastValue = c(0,0,1)) - x1_imputed_QClog = x1_imputed$QClog; x1_imputed = x1_imputed$P + x1_imputed_QClog = x1_imputed$QClog; x1_imputed = x1_imputed$x x1_removed = g.imputeTimegaps(x1, xyzCol, timeCol = "time", sf = sf, k = 2/sf, impute = FALSE, PreviousLastValue = c(0,0,1)) - x1_removed_QClog = x1_removed$QClog; x1_removed = x1_removed$P + x1_removed_QClog = x1_removed$QClog; x1_removed = x1_removed$x # Format 2: with timestamp & with zeros (complete dataset) x2 = x x2[zeros, xyzCol] = 0 x2_imputed = g.imputeTimegaps(x2, xyzCol, timeCol = "time", sf = sf, k = 2/sf, impute = TRUE, PreviousLastValue = c(0,0,1)) - x2_imputed_QClog = x2_imputed$QClog; x2_imputed = x2_imputed$P + x2_imputed_QClog = x2_imputed$QClog; x2_imputed = x2_imputed$x x2_removed = g.imputeTimegaps(x2, xyzCol, timeCol = "time", sf = sf, k = 2/sf, impute = FALSE, PreviousLastValue = c(0,0,1)) - x2_removed_QClog = x2_removed$QClog; x2_removed = x2_removed$P + x2_removed_QClog = x2_removed$QClog; x2_removed = x2_removed$x # Format 3: without timestamp & with zeros (complete dataset) x3 = x_without_time x3[zeros, xyzCol] = 0 x3_imputed = g.imputeTimegaps(x3, xyzCol, timeCol = "time", sf = sf, k = 2/sf, impute = TRUE, PreviousLastValue = c(0,0,1)) - x3_imputed_QClog = x3_imputed$QClog; x3_imputed = x3_imputed$P + x3_imputed_QClog = x3_imputed$QClog; x3_imputed = x3_imputed$x x3_removed = g.imputeTimegaps(x3, xyzCol, timeCol = "time", sf = sf, k = 2/sf, impute = FALSE, PreviousLastValue = c(0,0,1)) - x3_removed_QClog = x3_removed$QClog; x3_removed = x3_removed$P + x3_removed_QClog = x3_removed$QClog; x3_removed = x3_removed$x # tests number of rows expect_equal(nrow(x1_imputed), N) @@ -69,10 +69,10 @@ test_that("timegaps are correctly imputed", { suppressWarnings({ # warning arising from made up PreviousLastTime x4_imputed = g.imputeTimegaps(x4, xyzCol, timeCol = "time", sf = sf, k = 2/sf, impute = TRUE, PreviousLastValue = c(0,0,1), PreviousLastTime = PreviousLastTime) - x4_imputed_QClog = x4_imputed$QClog; x4_imputed = x4_imputed$P + x4_imputed_QClog = x4_imputed$QClog; x4_imputed = x4_imputed$x x4_removed = g.imputeTimegaps(x4, xyzCol, timeCol = "time", sf = sf, k = 2/sf, impute = FALSE, PreviousLastValue = c(0,0,1), PreviousLastTime = PreviousLastTime) - x4_removed_QClog = x4_removed$QClog; x4_removed = x4_removed$P + x4_removed_QClog = x4_removed$QClog; x4_removed = x4_removed$x }) expect_equal(nrow(x4_imputed), N + sf*30) @@ -84,9 +84,9 @@ test_that("timegaps are correctly imputed", { x5 = x x5[zeros, xyzCol] = 0 x5_imputed = g.imputeTimegaps(x5, xyzCol, timeCol = "time", sf = sf, k = 2/sf, impute = TRUE, PreviousLastValue = c(0,0,1)) - x5_imputed_QClog = x5_imputed$QClog; x5_imputed = x5_imputed$P + x5_imputed_QClog = x5_imputed$QClog; x5_imputed = x5_imputed$x x5_removed = g.imputeTimegaps(x5, xyzCol, timeCol = "time", sf = sf, k = 2/sf, impute = FALSE, PreviousLastValue = c(0,0,1)) - x5_removed_QClog = x5_removed$QClog; x5_removed = x5_removed$P + x5_removed_QClog = x5_removed$QClog; x5_removed = x5_removed$x expect_equal(nrow(x5_imputed), N) expect_equal(nrow(x5_removed), N - length(zeros)) From 33456d00bd62a5a0d5d98867ef434831c8bc8711 Mon Sep 17 00:00:00 2001 From: Vincent van Hees Date: Sun, 5 Nov 2023 13:44:21 +0100 Subject: [PATCH 11/13] Clarify that filehealth quality refer to raw data blocks and clarify that filehealth_totimp_min only applies to Axivity ActiGraph gt3x and ad-hoc csv format --- vignettes/GGIR.Rmd | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/vignettes/GGIR.Rmd b/vignettes/GGIR.Rmd index 16b4a2c6e..fdf60e58d 100644 --- a/vignettes/GGIR.Rmd +++ b/vignettes/GGIR.Rmd @@ -792,21 +792,21 @@ The data_quality_report.csv is stored in subfolder folder results/QC. | QCmessage | Character QC message at the end of the auto-calibration | | mean.temp | Mean temperature in sphere data | | device.serial.number | Device serial number | -| NFilePagesSkipped | (Only for Axivity .cwa format) Number of data blocks skipped | -| filehealth_totimp_min | Total number of minutes of data imputed | -| filehealth_checksumfail_min | (Only for Axivity .cwa format) Total number of minutes of data where the checksum failed | -| filehealth_niblockid_min | (Only for Axivity .cwa format) Total number of minutes of data with non-incremental block ids | +| NFilePagesSkipped | (Only for Axivity .cwa format) Number of raw data blocks skipped | +| filehealth_totimp_min | (Only for Axivity .cwa, ActiGraph gt3x, and ad-hoc csv format) Total number of minutes of raw data imputed | +| filehealth_checksumfail_min | (Only for Axivity .cwa format) Total number of minutes of raw data where the checksum failed | +| filehealth_niblockid_min | (Only for Axivity .cwa format) Total number of minutes of raw data with non-incremental block ids | | filehealth_fbias0510_min | (Only for Axivity .cwa format) Total number of minutes with a sampling frequency bias between 5 and 10% | | filehealth_fbias1020_min | (Only for Axivity .cwa format) Total number of minutes with a sampling frequency bias between 10 and 20% | | filehealth_fbias2030_min | (Only for Axivity .cwa format) Total number of minutes with a sampling frequency bias between 20 and 30% | | filehealth_fbias30_min | (Only for Axivity .cwa format) Total number of minutes with a sampling frequency bias higher than 30% | -| filehealth_totimp_N | Total number of data blocks that were imputed | +| filehealth_totimp_N | (Only for Axivity .cwa, ActiGraph gt3x, and ad-hoc csv format) Total number of data blocks that were imputed | | filehealth_checksumfail_N | (Only for Axivity .cwa format) Total number of blocks where the checksum failed | -| filehealth_niblockid_N | (Only for Axivity .cwa format) Total number of data blocks with non-incremental block ids | -| filehealth_fbias0510_N | (Only for Axivity .cwa format) Total number of data blocks with a sampling frequency bias between 5 and 10% | -| filehealth_fbias1020_N | (Only for Axivity .cwa format) Total number of data blocks with a sampling frequency bias between 10 and 20%| -| filehealth_fbias2030_N | (Only for Axivity .cwa format) Total number of data blocks with a sampling frequency bias between 20 and 30% | -| filehealth_fbias30_N | (Only for Axivity .cwa format) Total number of data blocks with a sampling frequency bias higher than 30% | +| filehealth_niblockid_N | (Only for Axivity .cwa format) Total number of raw data blocks with non-incremental block ids | +| filehealth_fbias0510_N | (Only for Axivity .cwa format) Total number of raw data blocks with a sampling frequency bias between 5 and 10% | +| filehealth_fbias1020_N | (Only for Axivity .cwa format) Total number of raw data blocks with a sampling frequency bias between 10 and 20%| +| filehealth_fbias2030_N | (Only for Axivity .cwa format) Total number of raw data blocks with a sampling frequency bias between 20 and 30% | +| filehealth_fbias30_N | (Only for Axivity .cwa format) Total number of raw data blocks with a sampling frequency bias higher than 30% | @@ -974,11 +974,7 @@ vigorous activity: | `ACC_` | (average) acceleration according to default metric specific by acc.metric| | `_spt_wake_`| Wakefulness within the Sleep period time window.| | `_spt_sleep_` | Sleep within the Sleep period time window. | -| `_IN_` | Inactivity. Note that we use the term inactivity instead - of sedentary behaviour for the lowest intensity level of behaviour. The reason - for this is that GGIR does not attempt to classifying the activity type - sitting at the moment, by which we feel that using the term sedentary - behaviour would fail to communicate that.| +| `_IN_` | Inactivity. Note that we use the term inactivity instead of sedentary behaviour for the lowest intensity level of behaviour. The reason for this is that GGIR does not attempt to classifying the activity type sitting at the moment, by which we feel that using the term sedentary behaviour would fail to communicate that.| | `_LIG_` | Light activity | | `_MOD_` | Moderate activity | | `_VIG_` | Vigorous activity | From 0e295034ade204443fabf0bcab553f5ea7cc7a93 Mon Sep 17 00:00:00 2001 From: Vincent van Hees Date: Sun, 5 Nov 2023 13:54:22 +0100 Subject: [PATCH 12/13] Update g.imputeTimegaps.Rd --- man/g.imputeTimegaps.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/g.imputeTimegaps.Rd b/man/g.imputeTimegaps.Rd index aa7d4efee..2f42c4b6c 100644 --- a/man/g.imputeTimegaps.Rd +++ b/man/g.imputeTimegaps.Rd @@ -44,7 +44,7 @@ } \value{ List including: - - P, data.frame based on input x with timegaps imputed (as default) or with + - x, data.frame based on input x with timegaps imputed (as default) or with recordings with 0 values in the three axes removed (if impute = FALSE) - QClog, data.frame with information on the number of time gaps found and the total time imputed in minutes From 38bf6c72dfb01897326ebb8ba1d54f440cdfb3cf Mon Sep 17 00:00:00 2001 From: Vincent van Hees Date: Sun, 5 Nov 2023 14:11:06 +0100 Subject: [PATCH 13/13] fix typo in PR #958 g.analyse causing file health check not to be performed for cwa --- R/g.analyse.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/g.analyse.R b/R/g.analyse.R index 35b70d8ff..24ba307d1 100644 --- a/R/g.analyse.R +++ b/R/g.analyse.R @@ -316,7 +316,7 @@ g.analyse = function(I, C, M, IMP, params_247 = c(), params_phyact = c(), } # checksum - if ("checksums_pass" %in% colnames(M$QClog)) { + if ("checksum_pass" %in% colnames(M$QClog)) { chsum_failed = which(M$QClog$checksum_pass == FALSE) file_summary$Dur_chsum_failed = QCsummarise(M$QClog, chsum_failed) file_summary$Nblocks_chsum_failed = length(chsum_failed)