Skip to content

Commit

Permalink
Merge pull request #942 from wadpac/issue938_dictionaryPart5
Browse files Browse the repository at this point in the history
Thanks, looks good now.
  • Loading branch information
vincentvanhees authored Nov 3, 2023
2 parents 9f18bfb + 53c4a47 commit 85a8655
Show file tree
Hide file tree
Showing 9 changed files with 397 additions and 3 deletions.
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@ RELEASE_CYCLE.md
^docs$
^pkgdown$
^vignettes/chapter*.Rmd
^data-raw$
2 changes: 1 addition & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ export(g.analyse, g.calibrate,
correctOlderMilestoneData,
convertEpochData, appendRecords, extractID,
g.part5_analyseSegment, g.part5_initialise_ts,
g.part5.analyseRest)
g.part5.analyseRest, g.report.part5_dictionary)

importFrom("grDevices", "colors", "dev.off", "pdf","rainbow","rgb")
importFrom("graphics", "abline", "axis", "par", "plot", "plot.new",
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# CHANGES IN GGIR VERSION 3.0-1

- Part 5: New function to provide definition of the variable names in part 5 reports #938

- Part 2: Bug fix in csv report generation when appending part 2 milestone data with differing number of columns #951

# CHANGES IN GGIR VERSION 3.0-0
Expand Down
3 changes: 2 additions & 1 deletion R/GGIR.R
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ GGIR = function(mode = 1:5, datadir = c(), outputdir = c(),

stop("\nDirectory specified by argument datadir does not exist")
}
if (datadir == outputdir || grepl(paste(datadir, '/', sep =''), outputdir)) {
if (datadir == outputdir || grepl(paste(datadir, '/', sep = ''), outputdir)) {
stop(paste0('\nError: The file path specified by argument outputdir should ',
'NOT equal or be a subdirectory of the path specified by argument datadir'))
}
Expand Down Expand Up @@ -408,6 +408,7 @@ GGIR = function(mode = 1:5, datadir = c(), outputdir = c(),
week_weekend_aggregate.part5 = params_output[["week_weekend_aggregate.part5"]],
LUX_day_segments = params_247[["LUX_day_segments"]],
verbose = verbose, sep_reports = params_output[["sep_reports"]])
g.report.part5_dictionary(metadatadir = metadatadir, sep_reports = params_output[["sep_reports"]])
} else {
if (verbose == TRUE) cat("\nSkipped because no milestone data available")
}
Expand Down
4 changes: 3 additions & 1 deletion R/g.report.part5.R
Original file line number Diff line number Diff line change
Expand Up @@ -599,7 +599,9 @@ g.report.part5 = function(metadatadir = c(), f0 = c(), f1 = c(), loglocation = c
| nom == "acc_available"
| nom == "guider" | nom == "L5TIME" | nom == "M5TIME"
| nom == "L10TIME" | nom == "M10TIME"
| nom == "acc_available" | nom == "daytype")
| nom == "acc_available" | nom == "daytype"
| nom %in% paste0("sleeplog_used_", c("pla", "wei", "WD", "WE"))
| nom %in% paste0("window_number_", c("pla", "wei", "WD", "WE")))
names(OF4)[which(names(OF4) == "weekday")] = "startday"
OF4 = OF4[,-cut]
for (col4 in 1:ncol(OF4)) {
Expand Down
278 changes: 278 additions & 0 deletions R/g.report.part5_dictionary.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,278 @@
g.report.part5_dictionary = function(metadatadir, sep_reports = ",") {
# internal function to get class ------------------------------------------
getclass = function(x) {
if (length(x) == 0) return(NULL)
class = NULL
if (grepl("2", x)) x = unlist(strsplit(x, "2"))
for (i in 1:length(x)) {
if (x[i] == "IN") class[i] = "inactivity"
if (x[i] == "LIG" | x[i] == "LIPA") class[i] = "light physical activity"
if (x[i] == "MOD") class[i] = "moderate physical activity"
if (x[i] == "VIG") class[i] = "vigorous physical activity"
if (x[i] == "MVPA") class[i] = "moderate-to-vigorous physical activity"
if (x[i] == "PA") class[i] = "physical activity"
if (x[i] == "sleep") class[i] = "sleep"
if (x[i] == "wake") class[i] = "awake"
if (x[i] == "nonwear") class[i] = "non-wear time"
if (x[i] == "mostactive60min") class = "the most active 60 minutes of the day are accumulated"
if (x[i] == "mostactive30min") class = "the most active 30 minutes of the day are accumulated"
}
if (length(class) == 2) class = paste(class[1], "to", class[2])
return(class)
}
# -------------------------------------------------------------------------
# main script -------------------------------------------------------------
# -------------------------------------------------------------------------
# identify individual reports
reports = dir(file.path(metadatadir, "results"), full.names = TRUE,
pattern = "^part5.*\\.csv$")
# Select one daysummary, personsummary, and Segment report as variables are the same across configurations
ds = grep("^part5_daysummary", basename(reports))[1]
ps = grep("^part5_personsummary", basename(reports))[1]
reports = reports[c(ds, ps)]
# read col names of each report and derive definitions
for (ri in 1:length(reports)) {
# read report
R = data.table::fread(reports[ri], verbose = FALSE, nrows = 2)
cnames = colnames(R)
# initialise dictionary
dictionary = data.frame(Variable = cnames,
Definition = NA)
# get definitions
for (coli in 1:length(cnames)) {
# definitions will be a combination of what, window, when (only for LUX segments), class, unit
what = window = class = unit = NULL
# get variable names from baseDictionary
nam = gsub("_pla|_wei|_WD|_WE", "", cnames[coli])
if (grepl("Nvalid", cnames[coli])) nam = cnames[coli]
if (nam %in% names(baseDictionary)) {
what = baseDictionary[[nam]]
}
# if the variable is not in baseDictionary, extract definition:
if (is.null(what)) {
# split up variable name
elements = unlist(strsplit(cnames[coli], "[.]|_"))
# WHAT -------------------------------------------------------------
if ("dur" %in% elements | "nonwear" %in% elements) {
what = "Time accumulated"
} else if ("ACC" %in% elements) {
what = "Mean acceleration"
} else if ("Nbouts" %in% elements) {
what = "Number of bouts"
} else if ("Nblocks" %in% elements) {
what = "Number of blocks (defined as consecutive series of epochs with the same behavioural class)"
} else if ("quantile" %in% elements) {
what = "Acceleration above which (percentile)"
} else if ("LUX" %in% elements) {
if ("min" %in% elements | "above1000" %in% elements) {
# time in lux ranges
what = "Time accumulated with LUX"
if ("above1000" %in% elements) {
numbers = 1000
} else {
numbers = suppressWarnings(as.numeric(elements))
numbers = numbers[!is.na(numbers)]
}
if (any(numbers == Inf) | length(numbers) == 1) {
thresholds = paste("above", numbers[1])
} else if (length(numbers) == 2) {
thresholds = paste("between", numbers[1], "and", numbers[2])
} else {
thresholds = ""
}
what = paste(what, thresholds, "luxes")
unit = "(minutes)"
} else if ("mean" %in% elements) {
what = "Mean LUX value"
if ("mvpa" %in% elements) what = paste(what, "in moderate-to-vigorous physical activity time")
unit = "(luxes)"
} else if ("max" %in% elements) {
what = "Maximum LUX value"
unit = "(luxes)"
} else if ("timeawake" %in% elements) {
what = "Time classified as awake"
unit = "(minutes)"
} else if ("imputed" %in% elements) {
what = "Time in which the LUX has been imputed"
unit = "(minutes)"
} else if ("ignored" %in% elements) {
what = "Time in which the LUX has been ignored"
unit = "(minutes)"
}
# lux segments
if (any(grepl("hr", elements))) {
t1 = elements[grep("hr", elements)]; t1 = gsub("hr", ":00", t1)
t0 = elements[grep("hr", elements) - 1]
if (nchar(t0) == 1) t0 = paste0(t0, ":00")
what = paste(what, "in the segment from", t0, "to", t1)
}
}
# fragmentation metrics
if ("FRAG" %in% elements) {
what_bu = tolower(what)
what = "Fragmentation analysis:"
if ("TP" %in% elements) {
what = paste(what, "transition probability (%)")
} else if ("Nfrag" %in% elements) {
what = paste(what, "number of fragments")
} else if ("NFragPM" %in% elements) {
what = paste(what, "number of fragments per minute")
} else if ("mean" %in% elements) {
what = paste(what, "mean duration in the")
} else if ("Gini" %in% elements) {
what = paste(what, "Gini inequality index as calculated in the ineq R package")
} else if ("CoV" %in% elements) {
what = paste(what, "Coefficient of variance as proposed in https://shorturl.at/nsDU9")
} else if ("alpha" %in% elements) {
what = paste(what, "Alpha power law exponent metric as proposed in https://shorturl.at/gwzB8")
} else if ("x0" %in% elements) {
what = paste(what, "x0.5 power law exponent metric as proposed in https://shorturl.at/gwzB8")
} else if ("W0" %in% elements) {
what = paste(what, "W0.5 power law exponent metric as proposed in https://shorturl.at/gwzB8")
} else if ("SD" %in% elements) {
what = paste(what, "standard deviation in the")
}
what = paste(what, what_bu)
}
# WINDOW -----------------------------------------------------------
if ("day" %in% elements & "spt" %in% elements) {
window = "during the waking time and sleep period time (i.e., full window)"
} else if ("day" %in% elements) {
window = "during the waking time"
} else if ("spt" %in% elements) {
window = "during the sleep period time"
}
# CLASS ------------------------------------------------------------
classes = c("IN", "LIG", "LIPA", "MOD", "VIG", "MVPA", "PA",
"sleep", "nonwear", "mostactive60min", "mostactive30min",
"IN2PA", "PA2IN", "IN2LIPA", "IN2MVPA", "sleep2wake", "wake2sleep")
x = elements[which(elements %in% classes)]
class = getclass(x)
# sleep efficiency (overwrite prev classification as sleep)
if ("sleep" %in% elements & "efficiency" %in% elements) {
class = "Sleep efficiency"
unit = "(%)"
}
# wakefulness after sleep onset (overwrite prev classification of intensity)
if ("wake" %in% elements) class = paste("awake", class)
# bouts
if ("bts" %in% elements & !("Nbouts" %in% elements)) {
numbers = suppressWarnings(as.numeric(elements))
boutdur = numbers[!is.na(numbers)]
class = paste("bouts of", paste(boutdur, collapse = "-"), "min", class)
}
# unbouted time
if ("unbt" %in% elements & !("Nbouts" %in% elements)) {
intensity = elements[which(elements %in% c("IN", "LIG", "MOD", "VIG"))]
if (intensity %in% c("MOD", "VIG")) intensity = "MVPA"
look4 = paste("dur_day", intensity, "bts", sep = "_")
boutVars = grep(look4, cnames, value = TRUE)
boutdurs = c()
for (i in 1:length(boutVars)) {
x = unlist(strsplit(boutVars[i], split = "_"))
numbers = suppressWarnings(as.numeric(x))
boutdurs = c(boutdurs, numbers[!is.na(numbers)])
}
minboutdur = min(boutdurs)
class = paste("unbouted", class, paste0("(",0,"-",minboutdur, " min)"))
}
# intensity gradient
if ("ig" %in% elements) {
if ("gradient" %in% elements) class = "Intensity gradient"
if ("rsquared" %in% elements) class = paste("R-squared from the log-log time to intensity regression to calculate the intensity gradient")
if ("intercept" %in% elements) class = paste("Intercept from the log-log time to intensity regression to calculate the intensity gradient")
}
# connector (in)
if (!is.null(class)) {
if (class != "Sleep efficiency"
& substr(class, 1, 8) != "the most"
& !("ig" %in% elements)) class = paste("in", class)
}
# UNITS -------------------------------------------------------------------
if ("min" %in% elements) {
unit = "(minutes)"
} else if ("mg" %in% elements) {
unit = "(mili-gravity units)"
} else if ("perc" %in% elements) {
unit = "(%)"
}
# ------------------------------------------------------------------
# Rest of column names...
if (is.null(what)) {
if ("sleeponset" %in% elements | "wakeup" %in% elements) {
if ("sleeponset" %in% elements) what = "Sleep onset time"
if ("wakeup" %in% elements) what = "Sleep onset time"
if ("ts" %in% elements) {
unit = "(hh:mm:ss)"
} else {
unit = "(hours from previous midnight)"
}
} else if ((substr(elements[1], 1, 1) == "L" | substr(elements[1], 1, 1) == "M")
& elements[1] != "LUX") {
# LX and MX metrics
# what
if (grepl("TIME", elements[1])) {
what = "Starting time"
unit = "(timestamp)"
if ("num" %in% elements) {
unit = "(timestamp)"
}
} else if (grepl("VALUE", elements[1])) {
what = "Mean acceleration"
unit = "(mili-gravity units)"
} else if ("peakLUX" %in% elements) {
if ("mean" %in% elements) {
what = "Mean peak Lux"
} else if ("max" %in% elements) {
what = "Max peak Lux"
}
}
# class
X = as.numeric(gsub("\\D", "", elements))[1]
if (substr(elements[1], 1, 1) == "L") {
class = paste("during the", X, "consecutive hours with the lowest acceleration")
} else if (substr(elements[1], 1, 1) == "M") {
class = paste("during the", X, "consecutive hours with the highest acceleration")
}
} else if ("daytype" %in% elements) {
what = "WD = weekday; WE = weekend day"
} else {
what = ""
}
}
}
# build up definition
def = paste(what, class, window, unit)
# only for personsummary - aggregation method
if (grepl("personsummary", reports[ri])) {
agg = NULL
if (!grepl("Nvalid", cnames[coli])) {
if ("pla" %in% elements) agg = "- plain average"
if ("wei" %in% elements) agg = "- weighted average"
if ("WD" %in% elements) agg = "- weekdays average"
if ("WE" %in% elements) agg = "- weekend days average"
} else {
# adapt definition of valid days to part 5 reports
def = gsub("part 2, 4, and 5", "part 5", def)
}
def = paste(def, agg)
}
def = gsub("\\s+", " ", def) # this removes extra spaces
def = gsub("^ ", "", def) # this removes leading spaces

# if window and class have not been identified, then return blank definition
if (is.null(what) & is.null(class)) def = NA

# store definition
dictionary[coli, "Definition"] = def
}
# write csv reports with data dictionaries
directory = file.path(metadatadir, "results", "variableDictionary/")
if (!dir.exists(directory)) dir.create(directory)
fn = gsub("part5_", "part5_dictionary_", basename(reports[ri]))
fn = unlist(strsplit(fn, "_MM|_WW|_OO"))[1]
fn = paste0(fn, ".csv")
data.table::fwrite(dictionary, file = file.path(directory, fn),
row.names = FALSE, na = "", sep = sep_reports)
}
}
Binary file added R/sysdata.rda
Binary file not shown.
Loading

0 comments on commit 85a8655

Please sign in to comment.