Skip to content

Commit

Permalink
finish independent random effects
Browse files Browse the repository at this point in the history
  • Loading branch information
FBartos committed Jan 25, 2025
1 parent 032154f commit 4f9c06b
Show file tree
Hide file tree
Showing 6 changed files with 350 additions and 45 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: BayesTools
Title: Tools for Bayesian Analyses
Version: 0.2.18
Version: 0.2.19
Description: Provides tools for conducting Bayesian analyses and Bayesian model averaging
(Kass and Raftery, 1995, <doi:10.1080/01621459.1995.10476572>,
Hoeting et al., 1999, <doi:10.1214/ss/1009212519>). The package contains
Expand Down
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# version 0.2.19
### Features
- extending prior functions to accept `expression()` instead of a parameter, such objects can be use to create prior distributions that depend on other parameters in JAGS
- extending the formula interface of `JAGS_fit()` function to accept expressions that are appended as literal text to the generated JAGS formula
- extending the formula interface of `JAGS_fit()` function to handle uncorrelated random effects via `(x||y)` (lme4-like) notation

# version 0.2.18
### Features
- adding `prior_mixture()` function for creating a mixture of prior distributions
Expand Down
242 changes: 222 additions & 20 deletions R/JAGS-formula.R
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ JAGS_formula <- function(formula, parameter, data, prior_list){

# separate prior lists: extract random effects priors
prior_list_random_effects <- list()
if(length(random_effects)){
if(length(random_effects) > 0){
# store the random effects specific priors in the corresponding entry
for(i in seq_along(random_effects)){
prior_list_random_effects[[i]] <- prior_list[which(.get_grouping_factor(names(prior_list)) == attr(random_effects[[i]], "grouping_factor"))]
Expand Down Expand Up @@ -143,11 +143,13 @@ JAGS_formula <- function(formula, parameter, data, prior_list){
# check whether intercept is unique parameter
if(sum(grepl("intercept", names(prior_list))) > 1)
stop("only the intercept parameter can contain 'intercept' in its name.")
# check whether the interaction replacement is in usage
if(any(grepl("__xXx__", names(prior_list))))
stop("'__xXx__' string is internally used by the BayesTools package and can't be used for naming variables.")
if(any(grepl("__xREx__", names(prior_list))))
stop("'__xREx__' string is internally used by the BayesTools package and can't be used for naming variables.")
# check whether any reserved term is in usage
reserved_terms <- c("__xXx__", "__xREx__", "xRE_PRECx", "xRE_CORx", "xRE_Zx", "xRE_STDx", "xRE_COEFx", "xRE_MAPx", "xRE_COEFx", "xRE_DATAx")
for(reserved_term in reserved_terms){
if(any(grepl(reserved_term, names(prior_list))))
stop(paste0("'", reserved_term, "' string is internally used by the BayesTools package and can't be used for naming variables or prior distributions."))
}


# replace interaction signs (due to JAGS incompatibility)
colnames(model_matrix) <- gsub(":", "__xXx__", colnames(model_matrix))
Expand All @@ -157,6 +159,7 @@ JAGS_formula <- function(formula, parameter, data, prior_list){

# prepare syntax & data based on the formula
formula_syntax <- NULL
random_syntax <- NULL
JAGS_data <- list()
JAGS_data[[paste0("N_", parameter)]] <- nrow(data)

Expand Down Expand Up @@ -259,17 +262,23 @@ JAGS_formula <- function(formula, parameter, data, prior_list){

# add random effects back to the formula
for(i in seq_along(random_effects)){
# TODO
temp_random <- .JAGS_random_effect_formula(random_effects[[i]], parameter, data, prior_list_random_effects[[i]])

formula_syntax <- c(formula_syntax, temp_random$formula_syntax)
for(i in seq_along(temp_random[["data"]])){
JAGS_data[[names(temp_random[["data"]])[i]]] <- temp_random[["data"]][[i]]
}

random_syntax <- c(random_syntax, temp_random[["random_syntax"]])
formula_syntax <- c(formula_syntax, temp_random[["formula_term"]])
prior_list <- c(prior_list, temp_random[["prior_list"]])
}

# finish the syntax
formula_syntax <- paste0(
"for(i in 1:N_", parameter, "){\n",
" ", parameter, "[i] = ", paste0(formula_syntax, collapse = " + "), "\n",
"}\n")
formula_syntax <- paste0(formula_syntax, paste0(random_syntax, collapse = "\n"), collapse = "\n")

# add the parameter name as a prefix and attribute to each prior in the list
names(prior_list) <- paste0(parameter, "_", names(prior_list))
Expand All @@ -291,6 +300,7 @@ JAGS_formula <- function(formula, parameter, data, prior_list){
grouping_factor <- attr(formula, "grouping_factor")
grouping_independent <- attr(formula, "independent")
grouping_factor_levels <- levels(as.factor(data[[grouping_factor]]))
grouping_mapping <- as.numeric(as.factor(data[[grouping_factor]]))

# TODO: expand to factor random effects
# needs to implement LKJ correlation matrix
Expand Down Expand Up @@ -329,35 +339,208 @@ JAGS_formula <- function(formula, parameter, data, prior_list){

# check that all priors have a lower bound on 0 or their range is > 0, if not, throw a warning and correct
for(i in seq_along(prior_list)){
if(range(prior_list[[i]])[1] < 0){
warning(paste0("The lower bound of the '", names(prior_list)[i], "' prior distribution is below 0. Correcting to 0."), immediate. = TRUE)
prior_list[[i]] <- prior_list[[i]]$truncation$lower <- 0
if(is.prior.spike_and_slab(prior_list[[i]])){
if(range(prior_list[[i]][["variable"]])[1] < 0){
warning(paste0("The lower bound of the '", names(prior_list)[i], "' prior distribution is below 0. Correcting to 0."), immediate. = TRUE)
prior_list[[i]][["variable"]]$truncation$lower <- 0
}
}else if(is.prior.mixture(prior_list[[i]])){
for(j in seq_along(prior_list[[i]])){
if(range(prior_list[[i]][[j]])[1] < 0){
warning(paste0("The lower bound of the ", j, "-component in '", names(prior_list)[i], "' prior distribution is below 0. Correcting to 0."), immediate. = TRUE)
prior_list[[i]][[j]]$truncation$lower <- 0
}
}
}else{
if(range(prior_list[[i]])[1] < 0){
warning(paste0("The lower bound of the '", names(prior_list)[i], "' prior distribution is below 0. Correcting to 0."), immediate. = TRUE)
prior_list[[i]]$truncation$lower <- 0
}
}
}

# drop the grouping factor from the prior names
names(prior_list) <- .remove_grouping_factor(names(prior_list))
# check that all terms have a prior
# check that all terms have a prior distribution
check_list(prior_list, "prior_list", check_names = model_terms, allow_other = TRUE, all_objects = TRUE)

# all factor random effects need to be changed to independent contrasts
# - any other factor projection bounds random effects variances equal across some of the levels
# - this allows independent by-factor level variances
for(i in seq_along(model_terms[model_terms_type == "factor"])){
stats::contrasts(data[[model_terms[model_terms_type == "factor"][i]]]) <- "contr.independent"
}

# get the default design matrix
model_frame <- stats::model.frame(formula, data = data)
model_matrix <- stats::model.matrix(model_frame, formula = formula, data = data)

# check whether intercept is unique parameter
if(sum(grepl("intercept", names(prior_list))) > 1)
stop("only the intercept parameter can contain 'intercept' in its name.")
# check whether any reserved term is in usage
reserved_terms <- c("__xXx__", "__xREx__", "xRE_PRECx", "xRE_CORx", "xRE_Zx", "xRE_STDx", "xRE_COEFx", "xRE_MAPx", "xRE_COEFx", "xRE_DATAx")
for(reserved_term in reserved_terms){
if(any(grepl(reserved_term, names(prior_list))))
stop(paste0("'", reserved_term, "' string is internally used by the BayesTools package and can't be used for naming variables or prior distributions."))
}

# replace interaction signs (due to JAGS incompatibility)
colnames(model_matrix) <- gsub(":", "__xXx__", colnames(model_matrix))
names(prior_list) <- gsub(":", "__xXx__", names(prior_list))
names(model_terms_type) <- gsub(":", "__xXx__", names(model_terms_type))
model_terms <- gsub(":", "__xXx__", model_terms)


# prepare syntax & data based on the formula
parameter <- paste0(parameter, "__xREx__", grouping_factor)
formula_syntax <- NULL
JAGS_data <- list()
new_prior_list <- list()
JAGS_data[[paste0("N_", parameter)]] <- length(grouping_factor_levels)
parameter_suffix <- paste0("_xREx__", grouping_factor) # priors should not be named with parameter name (done on exit from formula)
parameter <- paste0(parameter, "_", parameter_suffix) # variables should be named already here
random_syntax <- NULL
JAGS_data <- list()
new_prior_list <- list()

### in essence, the following prepares constructors that:
# 1) samples standardized random effects xRE_Zx[ids, predictors] from a multivariate normal distribution
# 2) create a vector of by-parameter standard deviation of the random effects xRE_STDx[predictors]
# 3) multiplies the standardized random effects by parameter-specific standard deviations to create xRE_COEFx[ids, predictors] matrix
# 4) computes the per observation formula output based on indexing the by-id COEF and selecting the observation variables
# 5) appends the per-observation output to the higher order formula (done in the formula call itself)

n_id <- length(grouping_factor_levels)
n_par <- ncol(model_matrix)

# step 1:
# TODO: get the identity matrix to sample the standardized random effects (update once correlated random effects are available with cholesky sampled correlation matrix)
random_syntax <- c(random_syntax, .add_JAGS_matrix(name = paste0(parameter, "_xRE_PRECx"), diag(1, n_par)))
random_syntax <- c(random_syntax, paste0(
" for(i in 1:",n_id,"){\n",
" ",paste0(parameter, "_xRE_Zx"),"[i,1:", n_par ,"] ~ dmnorm(rep(0, ", n_par,"), ", paste0(parameter, "_xRE_PRECx"), ")\n",
" }\n"
))

# TODO: HERE - add expressions and dummy random_effect priros (maybe define new class?)
# step 2
if(has_intercept){
terms_indexes <- attr(model_matrix, "assign") + 1
terms_indexes[1] <- 0

formula_syntax <- c(formula_syntax, paste0(parameter, "_intercept"))
new_prior_list[[paste0(parameter_suffix, "_intercept")]] <- prior_list[["intercept"]]
attr(new_prior_list[[paste0(parameter_suffix, "_intercept")]], "random_factor") <- grouping_factor
random_syntax <- c(random_syntax, paste0(
parameter, "_xRE_STDx[1] = ", parameter, "_", "intercept"
))
}else{
terms_indexes <- attr(model_matrix, "assign")
}

# add remaining terms (omitting the intercept indexed as NA)
for(i in unique(terms_indexes[terms_indexes > 0])){

# extract the corresponding prior distribution for a given coefficient
this_prior <- prior_list[[model_terms[i]]]

# check whether the term is an interaction or not and save the corresponding attributes
attr(this_prior, "interaction") <- grepl("__xXx__", model_terms[i])
if(.is_prior_interaction(this_prior)){
attr(this_prior, "interaction_terms") <- strsplit(model_terms[i], "__xXx__")[[1]]
}

if(!is.null(attr(this_prior, "multiply_by")))
stop("'multiply_by' attribute is inadmissible for random effects")

if(model_terms_type[i] == "continuous"){

random_syntax <- c(random_syntax, paste0(
parameter, "_xRE_STDx[", i, "] = ", parameter, "_", model_terms[i]
))

}else if(model_terms_type[i] == "factor"){

attr(this_prior, "levels") <- sum(terms_indexes == i)
if(.is_prior_interaction(this_prior)){
level_names <- list()
for(sub_term in strsplit(model_terms[i], "__xXx__")[[1]]){
if(model_terms_type[sub_term] == "factor"){
level_names[[sub_term]] <- levels(data[,sub_term])
}
}
attr(this_prior, "level_names") <- level_names
}else{
attr(this_prior, "level_names") <- levels(data[,model_terms[i]])
}

# distribute the individual coefficients to the STD
for(j in 1:sum(terms_indexes == i)){
random_syntax <- c(random_syntax, paste0(
parameter, "_xRE_STDx[", i + j - 1, "] = ", parameter, "_", model_terms[i], "[", j, "]"
))
}

# transform the simple prior into independent factor prior (for each of the coefficients)
if(is.prior.simple(this_prior)){
class(this_prior) <- c(class(this_prior), "prior.factor", "prior.independent")
}else if(is.prior.spike_and_slab(this_prior)){
class(this_prior[["variable"]]) <- c(class(this_prior[["variable"]]), "prior.factor", "prior.independent")
class(this_prior) <- c(class(this_prior)[!class(this_prior) %in% c("prior.simple_spike_and_slab")], "prior.factor_spike_and_slab", "prior.independent")
}else if(is.prior.mixture(this_prior)){
for(p in seq_along(this_prior)){
class(this_prior[[p]]) <- c(class(this_prior[[p]]), "prior.factor", "prior.independent")
}
class(this_prior) <- c(class(this_prior)[!class(this_prior) %in% c("prior.simple_mixture")], "prior.factor_mixture", "prior.independent")
}

}else{
stop("Unrecognized model term.")
}

# update the corresponding prior distribution back into the prior list
# (and forward attributes to lower level components in the case of spike and slab and mixture priors)
attr(this_prior, "random_sd") <- TRUE
attr(this_prior, "random_factor") <- grouping_factor
if(is.prior.spike_and_slab(this_prior)){
attr(this_prior, "levels") -> attr(this_prior[["variable"]], "levels")
attr(this_prior, "level_names") -> attr(this_prior[["variable"]], "level_names")
attr(this_prior, "interaction") -> attr(this_prior[["variable"]], "interaction")
attr(this_prior, "interaction_terms") -> attr(this_prior[["variable"]], "interaction_terms")
this_prior -> new_prior_list[[paste0(parameter_suffix, "_", model_terms[i])]]
}else if(is.prior.mixture(this_prior)){
for(p in seq_along(this_prior)){
attr(this_prior, "levels") -> attr(this_prior[[p]], "levels")
attr(this_prior, "level_names") -> attr(this_prior[[p]], "level_names")
attr(this_prior, "interaction") -> attr(this_prior[[p]], "interaction")
attr(this_prior, "interaction_terms") -> attr(this_prior[[p]], "interaction_terms")
}
this_prior -> new_prior_list[[paste0(parameter_suffix, "_", model_terms[i])]]
}else{
this_prior -> new_prior_list[[paste0(parameter_suffix, "_", model_terms[i])]]
}

}

# step 3
random_syntax <- c(random_syntax, paste0(
" for(i in 1:",n_par,"){\n",
" ",paste0(parameter, "_xRE_COEFx"),"[1:",n_id,",i] = ",paste0(parameter, "_xRE_Zx"),"[1:",n_id,",i] * ",paste0(parameter, "_xRE_STDx"),"[i]\n",
" }\n"
))

# step 4
random_syntax <- c(random_syntax, paste0(
" for(i in 1:",nrow(model_matrix),"){\n",
" ",parameter,"[i] = inprod(", paste0(parameter, "_xRE_COEFx[", paste0(parameter, "_xRE_MAPx[i]"),", 1:",n_par,"]"), ", ", paste0(parameter, "_xRE_DATAx[i,1:", n_par,"]"),")\n",
" }\n"
))

# create the JAGS data list
JAGS_data[[paste0(parameter, "_xRE_DATAx")]] <- model_matrix
JAGS_data[[paste0(parameter, "_xRE_MAPx")]] <- grouping_mapping

return(list(
random_syntax = random_syntax,
formula_term = paste0(parameter,"[i]"),
data = JAGS_data,
prior_list = new_prior_list,
formula = formula
))
}

# formula helper functions
Expand Down Expand Up @@ -963,6 +1146,7 @@ contr.independent <- function(n, contrasts = TRUE){
#' @param parameters a vector of parameter names
#' @param formula_parameter a formula parameter prefix name
#' @param formula_parameters a vector of formula parameter prefix names
#' @param formula_random a vector of random effects grouping factors
#' @param formula_prefix whether the \code{formula_parameters} names should be
#' kept. Defaults to \code{TRUE}.
#'
Expand All @@ -978,9 +1162,10 @@ contr.independent <- function(n, contrasts = TRUE){
NULL

#' @rdname parameter_names
format_parameter_names <- function(parameters, formula_parameters = NULL, formula_prefix = TRUE){
format_parameter_names <- function(parameters, formula_parameters = NULL, formula_random = NULL, formula_prefix = TRUE){

check_char(parameters, "parameters", check_length = FALSE)
check_char(formula_random, "formula_random", check_length = FALSE, allow_NULL = TRUE)
check_char(formula_parameters, "formula_parameters", check_length = FALSE, allow_NULL = TRUE)
check_bool(formula_prefix, "formula_prefix")

Expand All @@ -990,6 +1175,23 @@ format_parameter_names <- function(parameters, formula_parameters = NULL, formul
if(formula_prefix) paste0("(", formula_parameters[i], ") ") else "",
parameters[grep(paste0(formula_parameters[i], "_"), parameters)])
}

for(i in seq_along(formula_random)){
temp_which <- grepl(paste0("_xREx__", formula_random[i], "_"), parameters)
temp_incl <- grepl("(inclusion)", parameters)
parameters[temp_which] <- gsub(
paste0("_xREx__", formula_random[i], "_"),
"",
parameters[temp_which]
)
if(any(temp_which & temp_incl)){
parameters[temp_which & temp_incl] <- paste0(gsub("(inclusion)", "", parameters[temp_which & temp_incl], fixed = TRUE), "|", formula_random[i], " (inclusion)")
}
if(any(temp_which & !temp_incl)){
parameters[temp_which & !temp_incl] <- paste0("sd(", parameters[temp_which & !temp_incl], "|", formula_random[i], ")")
}
}

parameters[grep("__xXx__", parameters)] <- gsub("__xXx__", ":", parameters[grep("__xXx__", parameters)])

return(parameters)
Expand Down
2 changes: 2 additions & 0 deletions R/summary-tables.R
Original file line number Diff line number Diff line change
Expand Up @@ -1132,6 +1132,7 @@ runjags_estimates_table <- function(fit, transformations = NULL, title = NULL,
colnames(model_samples) <- format_parameter_names(
parameters = colnames(model_samples),
formula_parameters = unique(unlist(lapply(prior_list, attr, which = "parameter"))),
formula_random = unique(unlist(lapply(prior_list, attr, which = "random_factor"))),
formula_prefix = formula_prefix)
}

Expand Down Expand Up @@ -1252,6 +1253,7 @@ runjags_inference_table <- function(fit, title = NULL, footnotes = NULL, warnin
rownames(runjags_summary) <- format_parameter_names(
parameters = rownames(runjags_summary),
formula_parameters = unique(unlist(lapply(prior_list, attr, which = "parameter"))),
formula_random = unique(unlist(lapply(prior_list, attr, which = "random_factor"))),
formula_prefix = formula_prefix)
}

Expand Down
1 change: 1 addition & 0 deletions man/parameter_names.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 4f9c06b

Please sign in to comment.