diff --git a/DESCRIPTION b/DESCRIPTION index 158160b..a92b61d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: jsdmstan Title: Fitting jSDMs in Stan -Version: 0.2.0 +Version: 0.3.0 Authors@R: person("Fiona", "Seaton", , "fseaton@ceh.ac.uk", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-2022-7451")) diff --git a/R/jsdm_stancode.R b/R/jsdm_stancode.R index 85fe886..0bb766b 100644 --- a/R/jsdm_stancode.R +++ b/R/jsdm_stancode.R @@ -36,7 +36,8 @@ jsdm_stancode <- function(method, family, prior = jsdm_prior(), log_lik = TRUE, site_intercept = "none", beta_param = "cor") { # checks - family <- match.arg(family, c("gaussian", "bernoulli", "poisson", "neg_binomial")) + family <- match.arg(family, c("gaussian", "bernoulli", "poisson", + "neg_binomial","binomial")) method <- match.arg(method, c("gllvm", "mglmm")) beta_param <- match.arg(beta_param, c("cor","unstruct")) site_intercept <- match.arg(site_intercept, c("none","grouped","ungrouped")) @@ -62,23 +63,29 @@ jsdm_stancode <- function(method, family, prior = jsdm_prior(), data <- paste( " int N; // Number of sites int S; // Number of species - ", ifelse(method == "gllvm", - "int D; // Number of latent dimensions", "" +", +ifelse(method == "gllvm", + " int D; // Number of latent dimensions", "" ), " int K; // Number of predictor variables matrix[N, K] X; // Predictor matrix -", ifelse(site_intercept == "grouped", +", +ifelse(site_intercept == "grouped", " int ngrp; // Number of groups in site intercept int grps[N]; // Vector matching sites to groups - ",""), + ",""), switch(family, "gaussian" = "real", "bernoulli" = "int", "neg_binomial" = "int", - "poisson" = "int" - ), "Y[N,S]; //Species matrix" + "poisson" = "int", + "binomial" = "int" + ), "Y[N,S]; //Species matrix", + ifelse(family == "binomial", + " + int Ntrials[N]; // Number of trials","") ) transformed_data <- ifelse(method == "gllvm", " // Ensures identifiability of the model - no rotation of factors @@ -255,7 +262,8 @@ jsdm_stancode <- function(method, family, prior = jsdm_prior(), kappa ~ ", prior[["kappa"]], "; "), "bern" = "", - "poisson" = "" + "poisson" = "", + "binomial" = "" ) ) model_pt2 <- paste( @@ -265,7 +273,8 @@ jsdm_stancode <- function(method, family, prior = jsdm_prior(), "gaussian" = "normal(mu[i,], sigma);", "bernoulli" = "bernoulli_logit(mu[i,]);", "neg_binomial" = "neg_binomial_2_log(mu[i,], kappa);", - "poisson" = "poisson_log(mu[i,]);" + "poisson" = "poisson_log(mu[i,]);", + "binomial" = "binomial_logit(Ntrials[i], mu[i,]);" ) ) @@ -316,18 +325,12 @@ jsdm_stancode <- function(method, family, prior = jsdm_prior(), for(j in 1:S) { log_lik[i, j] = ", switch(family, - "gaussian" = "normal_lpdf", - "bernoulli" = "bernoulli_logit_lpmf", - "neg_binomial" = "neg_binomial_2_log_lpmf", - "poisson" = "poisson_log_lpmf" - ), - "(Y[i, j] | linpred[i, j]", - switch(family, - "gaussian" = ", sigma)", - "bernoulli" = ")", - "neg_binomial" = ", kappa)", - "poisson" = ")" - ), "; + "gaussian" = "normal_lpdf(Y[i, j] | linpred[i, j], sigma);", + "bernoulli" = "bernoulli_logit_lpmf(Y[i, j] | linpred[i, j]);", + "neg_binomial" = "neg_binomial_2_log_lpmf(Y[i, j] | linpred[i, j], kappa);", + "poisson" = "poisson_log_lpmf(Y[i, j] | linpred[i, j]);", + "binomial" = "binomial_logit_lpmf(Y[i, j] | Ntrials[i], linpred[i, j]);" + )," } } } diff --git a/R/posterior_predict.R b/R/posterior_predict.R index 9291c1f..29056c2 100644 --- a/R/posterior_predict.R +++ b/R/posterior_predict.R @@ -161,7 +161,8 @@ posterior_linpred.jsdmStanFit <- function(object, transform = FALSE, "gaussian" = x, "bernoulli" = inv_logit(x), "poisson" = exp(x), - "neg_binomial" = exp(x) + "neg_binomial" = exp(x), + "binomial" = inv_logit(x) ) }) } @@ -184,6 +185,10 @@ posterior_linpred.jsdmStanFit <- function(object, transform = FALSE, #' #' @inheritParams posterior_linpred.jsdmStanFit #' +#' @param Ntrials For the binomial distribution the number of trials, given as +#' either a single integer which is assumed to be constant across sites or as +#' a site-length vector of integers. +#' #' @return A list of linear predictors. If list_index is \code{"draws"} (the default) #' the list will have length equal to the number of draws with each element of #' the list being a site x species matrix. If the list_index is \code{"species"} the @@ -200,7 +205,8 @@ posterior_linpred.jsdmStanFit <- function(object, transform = FALSE, posterior_predict.jsdmStanFit <- function(object, newdata = NULL, newdata_type = "X", ndraws = NULL, draw_ids = NULL, - list_index = "draws", ...) { + list_index = "draws", + Ntrials = NULL, ...) { transform <- ifelse(object$family == "gaussian", FALSE, TRUE) post_linpred <- posterior_linpred(object, newdata = newdata, ndraws = ndraws, @@ -214,20 +220,36 @@ posterior_predict.jsdmStanFit <- function(object, newdata = NULL, if (object$family == "neg_binomial") { mod_kappa <- rstan::extract(object$fit, pars = "kappa", permuted = FALSE) } + if(object$family == "binomial"){ + if(is.null(newdata)) { + Ntrials <- object$data_list$Ntrials + } else { + Ntrials <- ntrials_check(Ntrials, nrow(newdata)) + } + } n_sites <- length(object$sites) n_species <- length(object$species) - post_pred <- lapply(post_linpred, function(x, family = object$family) { - x2 <- x - x2 <- apply(x2, 1:2, function(x) { - switch(object$family, - "gaussian" = stats::rnorm(1, x, mod_sigma), - "bernoulli" = stats::rbinom(1, 1, x), - "poisson" = stats::rpois(1, x), - "neg_binomial" = rgampois(1, x, mod_kappa) - ) - }) + post_pred <- lapply(seq_along(post_linpred), + function(x, family = object$family) { + x2 <- post_linpred[[x]] + if(family == "binomial"){ + for(i in 1:nrow(x2)){ + for(j in 1:ncol(x2)){ + x2[i,j] <- stats::rbinom(1, Ntrials[i], x2[i,j]) + } + } + } else { + x2 <- apply(x2, 1:2, function(x) { + switch(object$family, + "gaussian" = stats::rnorm(1, x, mod_sigma), + "bernoulli" = stats::rbinom(1, 1, x), + "poisson" = stats::rpois(1, x), + "neg_binomial" = rgampois(1, x, mod_kappa) + ) + }) + } x2 }) diff --git a/R/sim_data_funs.R b/R/sim_data_funs.R index c18ecdf..0d4d9d1 100644 --- a/R/sim_data_funs.R +++ b/R/sim_data_funs.R @@ -1,29 +1,29 @@ #' Generate simulated data within a variety of jSDM methodologies #' -#' The \code{jsdm_sim_data} function can simulate data with either a multivariate -#' generalised mixed model (MGLMM) or a generalised linear latent variable model -#' (GLLVM). The \code{gllvm_sim_data} and \code{mglmm_sim_data} are aliases for -#' \code{jsdm_sim_data} that set \code{method} to \code{"gllvm"} and \code{"mglmm"} -#' respectively. +#' The \code{jsdm_sim_data} function can simulate data with either a +#' multivariate generalised mixed model (MGLMM) or a generalised linear latent +#' variable model (GLLVM). The \code{gllvm_sim_data} and \code{mglmm_sim_data} +#' are aliases for \code{jsdm_sim_data} that set \code{method} to \code{"gllvm"} +#' and \code{"mglmm"} respectively. #' #' @details This simulates data based on a joint species distribution model with -#' either a generalised linear latent variable model approach or a multivariate -#' generalised linear mixed model approach. +#' either a generalised linear latent variable model approach or a +#' multivariate generalised linear mixed model approach. #' #' Models can be fit with or without "measured predictors", and if measured #' predictors are included then the species have species-specific parameter #' estimates. These can either be simulated completely independently, or have -#' information pooled across species. If information is pooled this can be modelled -#' as either a random draw from some mean and standard deviation or species -#' covariance can be modelled together (this will be the covariance used in the -#' overall model if the method used has covariance). +#' information pooled across species. If information is pooled this can be +#' modelled as either a random draw from some mean and standard deviation or +#' species covariance can be modelled together (this will be the covariance +#' used in the overall model if the method used has covariance). #' -#' Environmental covariate effects (\code{"betas"}) can be parameterised in two -#' ways. With the \code{"cor"} parameterisation all covariate effects are assumed -#' to be constrained by a correlation matrix between the covariates. With the -#' \code{"unstruct"} parameterisation all covariate effects are assumed to draw -#' from a simple distribution with no correlation structure. Both parameterisations -#' can be modified using the prior object. +#' Environmental covariate effects (\code{"betas"}) can be parameterised in +#' two ways. With the \code{"cor"} parameterisation all covariate effects are +#' assumed to be constrained by a correlation matrix between the covariates. +#' With the \code{"unstruct"} parameterisation all covariate effects are +#' assumed to draw from a simple distribution with no correlation structure. +#' Both parameterisations can be modified using the prior object. #' #' @export #' @@ -36,30 +36,36 @@ #' @param K is number of covariates, by default \code{0} #' #' @param family is the response family, must be one of \code{"gaussian"}, -#' \code{"neg_binomial"}, \code{"poisson"} or \code{"bernoulli"}. Regular -#' expression matching is supported. +#' \code{"neg_binomial"}, \code{"poisson"}, \code{"binomial"}, +#' or \code{"bernoulli"}. Regular expression matching is supported. #' #' @param method is the jSDM method to use, currently either \code{"gllvm"} or #' \code{"mglmm"} - see details for more information. #' -#' @param species_intercept Whether to include an intercept in the predictors, must -#' be \code{TRUE} if \code{K} is \code{0}. Defaults to \code{TRUE}. +#' @param species_intercept Whether to include an intercept in the predictors, +#' must be \code{TRUE} if \code{K} is \code{0}. Defaults to \code{TRUE}. +#' +#' @param Ntrials For the binomial distribution the number of trials, given as +#' either a single integer which is assumed to be constant across sites or as +#' a site-length vector of integers. #' #' @param site_intercept Whether a site intercept should be included, potential -#' values \code{"none"} (no site intercept) or \code{"ungrouped"} (site intercept -#' with no grouping). Defaults to no site intercept, grouped is not supported -#' currently. +#' values \code{"none"} (no site intercept) or \code{"ungrouped"} (site +#' intercept with no grouping). Defaults to no site intercept, grouped is not +#' supported currently. #' -#' @param beta_param The parameterisation of the environmental covariate effects, by -#' default \code{"unstruct"}. See details for further information. +#' @param beta_param The parameterisation of the environmental covariate +#' effects, by default \code{"unstruct"}. See details for further information. #' #' @param prior Set of prior specifications from call to [jsdm_prior()] jsdm_sim_data <- function(N, S, D = NULL, K = 0L, family, method = c("gllvm", "mglmm"), species_intercept = TRUE, + Ntrials = NULL, site_intercept = "none", beta_param = "unstruct", prior = jsdm_prior()) { - response <- match.arg(family, c("gaussian", "neg_binomial", "poisson", "bernoulli")) + response <- match.arg(family, c("gaussian", "neg_binomial", "poisson", + "bernoulli", "binomial")) site_intercept <- match.arg(site_intercept, c("none","ungrouped","grouped")) beta_param <- match.arg(beta_param, c("cor", "unstruct")) if(site_intercept == "grouped"){ @@ -89,6 +95,10 @@ jsdm_sim_data <- function(N, S, D = NULL, K = 0L, family, method = c("gllvm", "m stop("prior object must be of class jsdmprior, produced by jsdm_prior()") } + if(response == "binomial"){ + Ntrials <- ntrials_check(Ntrials = Ntrials, N = N) + } + # prior object breakdown prior_split <- lapply(prior, strsplit, split = "\\(|\\)|,") if (!all(sapply(prior_split, function(x) { @@ -305,7 +315,8 @@ jsdm_sim_data <- function(N, S, D = NULL, K = 0L, family, method = c("gllvm", "m ), "gaussian" = stats::rnorm(1, mu_ij, sigma), "poisson" = stats::rpois(1, exp(mu_ij)), - "bernoulli" = stats::rbinom(1, 1, inv_logit(mu_ij)) + "bernoulli" = stats::rbinom(1, 1, inv_logit(mu_ij)), + "binomial" = stats::rbinom(1, Ntrials[i], inv_logit(mu_ij)) ) } } @@ -319,6 +330,9 @@ jsdm_sim_data <- function(N, S, D = NULL, K = 0L, family, method = c("gllvm", "m if(beta_param == "cor"){ pars$sigmas_preds <- sigmas_preds pars$z_preds <- z_preds + if(K != 0){ + pars$cor_preds <- cor_preds + } } if (site_intercept == "ungrouped") { @@ -352,6 +366,9 @@ jsdm_sim_data <- function(N, S, D = NULL, K = 0L, family, method = c("gllvm", "m output <- list( Y = Y, pars = pars, N = N, S = S, D = D, K = J, X = x ) + if(response == "binomial"){ + output$Ntrials <- Ntrials + } return(output) } @@ -431,7 +448,7 @@ rgampois <- function(n, mu, scale) { } inv_logit <- function(x) { - 1 / (1 + exp(x)) + 1 / (1 + exp(-x)) } @@ -506,3 +523,19 @@ rinvgamma <- function(n, shape, scale) { rstudentt <- function(n, df, mu, sigma) { mu + sigma * stats::rt(n, df = df) } + +ntrials_check <- function(Ntrials, N){ + if(is.null(Ntrials)){ + stop("Number of trials must be specified for the binomial distribution") + } + if(!is.double(Ntrials) & !is.integer(Ntrials)){ + stop("Ntrials must be a positive integer") + } + if(!(length(Ntrials) %in% c(1, N))){ + stop("Ntrials must be of length 1 or N") + } + if(length(Ntrials) == 1L){ + Ntrials <- rep(Ntrials, N) + } + Ntrials +} diff --git a/R/stan_jsdm.R b/R/stan_jsdm.R index 26177f2..c8c91e9 100644 --- a/R/stan_jsdm.R +++ b/R/stan_jsdm.R @@ -27,7 +27,8 @@ #' @param D The number of latent variables within a GLLVM model #' #' @param family The response family for the model, required to be one of -#' \code{"gaussian"}, \code{"bernoulli"}, \code{"poisson"} or \code{"neg_binomial"} +#' \code{"gaussian"}, \code{"bernoulli"}, \code{"poisson"}, \code{"binomial"} +#' or \code{"neg_binomial"} #' #' @param species_intercept Whether the model should be fit with an intercept by #' species, by default \code{TRUE} @@ -44,6 +45,10 @@ #' @param site_groups If the site intercept is grouped, a vector of group identities #' per site #' +#' @param Ntrials For the binomial distribution the number of trials, given as +#' either a single integer which is assumed to be constant across sites or as +#' a site-length vector of integers. +#' #' @param prior Set of prior specifications from call to [jsdm_prior()] #' #' @param save_data If the data used to fit the model should be saved in the model @@ -97,9 +102,10 @@ stan_jsdm <- function(X, ...) UseMethod("stan_jsdm") stan_jsdm.default <- function(X = NULL, Y = NULL, species_intercept = TRUE, method, dat_list = NULL, family, site_intercept = "none", D = NULL, prior = jsdm_prior(), site_groups = NULL, - beta_param = "unstruct", + beta_param = "unstruct", Ntrials = NULL, save_data = TRUE, iter = 4000, log_lik = TRUE, ...) { - family <- match.arg(family, c("gaussian", "bernoulli", "poisson", "neg_binomial")) + family <- match.arg(family, c("gaussian", "bernoulli", "poisson", + "neg_binomial","binomial")) beta_param <- match.arg(beta_param, c("cor", "unstruct")) stopifnot( @@ -113,9 +119,8 @@ stan_jsdm.default <- function(X = NULL, Y = NULL, species_intercept = TRUE, meth data_list <- validate_data( Y = Y, X = X, species_intercept = species_intercept, D = D, site_intercept = site_intercept, site_groups = site_groups, - dat_list = dat_list, phylo = FALSE, - family = family, method = method, nu05 = "1", - delta = 1e-5 + dat_list = dat_list, + family = family, method = method, Ntrials = Ntrials ) # Create stancode @@ -220,8 +225,8 @@ stan_gllvm.formula <- function(formula, data = list(), ...) { # Internal ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ validate_data <- function(Y, D, X, species_intercept, - dat_list, family, site_intercept, phylo, - method, nu05, delta, site_groups) { + dat_list, family, site_intercept, + method, site_groups, Ntrials) { method <- match.arg(method, c("gllvm", "mglmm")) # do things if data not given as list: @@ -246,6 +251,10 @@ validate_data <- function(Y, D, X, species_intercept, X <- matrix(1, nrow = N, ncol = 1) colnames(X) <- "(Intercept)" } else { + if(is.null(colnames(X))){ + message("No column names specified for X, assigning names") + colnames(X) <- paste0("V",seq_len(ncol(X))) + } K <- ncol(X) + 1 * species_intercept if(is.data.frame(X)){ X <- as.matrix(X) @@ -278,10 +287,8 @@ validate_data <- function(Y, D, X, species_intercept, data_list$ngrp <- ngrp data_list$grps <- grps } - if (!isFALSE(phylo)) { - data_list$Dmat <- phylo - data_list$nu05 <- nu05 - data_list$delta <- delta + if(family == "binomial"){ + data_list$Ntrials <- Ntrials } } else { if (!all(c("Y", "K", "S", "N", "X") %in% names(dat_list))) { @@ -292,9 +299,9 @@ validate_data <- function(Y, D, X, species_intercept, stop("If supplying data as a list must have a D entry") } - if (!isFALSE(phylo)) { - if (!all(c("Dmat", "nu05", "delta") %in% names(dat_list))) { - stop("Phylo models require Dmat, nu05 and delta in dat_list") + if (identical(family, "binomial")) { + if (!all(c("Ntrials") %in% names(dat_list))) { + stop("Binomial models require Ntrials in dat_list") } } @@ -334,12 +341,17 @@ validate_data <- function(Y, D, X, species_intercept, ))) { stop("Y matrix is not binary") } - } else if (family %in% c("poisson", "neg_binomial")) { + } else if (family %in% c("poisson", "neg_binomial", "binomial")) { if (!any(apply(data_list$Y, 1:2, is.wholenumber))) { stop("Y matrix is not composed of integers") } } + # Check if Ntrials is appropriate given + if(identical(family, "binomial")) { + data_list$Ntrials <- ntrials_check(data_list$Ntrials, data_list$N) + } + return(data_list) } diff --git a/R/update.R b/R/update.R index 6b25168..e735119 100644 --- a/R/update.R +++ b/R/update.R @@ -10,6 +10,8 @@ #' @param newY New Y data, by default \code{NULL} #' @param newX New X data, by default \code{NULL} #' @param newD New number of latent variables, by default \code{NULL} +#' @param newNtrials New number of trials (binomial model only), by default +#' \code{NULL} #' @param save_data Whether to save the data in the jsdmStanFit object, by default #' \code{TRUE} #' @param ... Arguments passed to [rstan::sampling()] @@ -49,6 +51,7 @@ #' gllvm_fit2 #' } update.jsdmStanFit <- function(object, newY = NULL, newX = NULL, newD = NULL, + newNtrials = NULL, save_data = TRUE, ...) { if (length(object$data_list) == 0) { stop("Update requires the original data to be saved in the model object") @@ -76,6 +79,14 @@ update.jsdmStanFit <- function(object, newY = NULL, newX = NULL, newD = NULL, } else{ D <- object$data_list$D } + if(family == "binomial") { + if(!is.null(newNtrials)){ + Ntrials <- ntrials_check(newNtrials, nrow(Y)) + } else{ + Ntrials <- object$data_list$Ntrials + } + } + species_intercept <- "(Intercept)" %in% colnames(object$data_list$X) site_intercept <- ifelse("ngrp" %in% names(object$data_list), "grouped", @@ -83,22 +94,13 @@ update.jsdmStanFit <- function(object, newY = NULL, newX = NULL, newD = NULL, "none")) site_groups <- if(site_intercept == "grouped"){ object$data_list$grps} else{NULL} - phylo <- object$data_list$phylo - if (!isFALSE(phylo)) { - nu05 <- object$data_list$nu05 - delta <- object$data_list$delta - } else { - nu05 <- 0L - delta <- 1e-5 - } # validate data data_list <- validate_data( Y = Y, X = X, species_intercept = species_intercept, D = D, site_intercept = site_intercept, site_groups = site_groups, - dat_list = NULL, phylo = phylo, - family = family, method = method, nu05 = nu05, - delta = delta + dat_list = NULL, + family = family, method = method, Ntrials = Ntrials ) # get original stan model diff --git a/man/jsdm_sim_data.Rd b/man/jsdm_sim_data.Rd index 20dc14f..30fa5df 100644 --- a/man/jsdm_sim_data.Rd +++ b/man/jsdm_sim_data.Rd @@ -14,6 +14,7 @@ jsdm_sim_data( family, method = c("gllvm", "mglmm"), species_intercept = TRUE, + Ntrials = NULL, site_intercept = "none", beta_param = "unstruct", prior = jsdm_prior() @@ -33,53 +34,57 @@ mglmm_sim_data(...) \item{K}{is number of covariates, by default \code{0}} \item{family}{is the response family, must be one of \code{"gaussian"}, -\code{"neg_binomial"}, \code{"poisson"} or \code{"bernoulli"}. Regular -expression matching is supported.} +\code{"neg_binomial"}, \code{"poisson"}, \code{"binomial"}, +or \code{"bernoulli"}. Regular expression matching is supported.} \item{method}{is the jSDM method to use, currently either \code{"gllvm"} or \code{"mglmm"} - see details for more information.} -\item{species_intercept}{Whether to include an intercept in the predictors, must -be \code{TRUE} if \code{K} is \code{0}. Defaults to \code{TRUE}.} +\item{species_intercept}{Whether to include an intercept in the predictors, +must be \code{TRUE} if \code{K} is \code{0}. Defaults to \code{TRUE}.} + +\item{Ntrials}{For the binomial distribution the number of trials, given as +either a single integer which is assumed to be constant across sites or as +a site-length vector of integers.} \item{site_intercept}{Whether a site intercept should be included, potential -values \code{"none"} (no site intercept) or \code{"ungrouped"} (site intercept -with no grouping). Defaults to no site intercept, grouped is not supported -currently.} +values \code{"none"} (no site intercept) or \code{"ungrouped"} (site +intercept with no grouping). Defaults to no site intercept, grouped is not +supported currently.} -\item{beta_param}{The parameterisation of the environmental covariate effects, by -default \code{"unstruct"}. See details for further information.} +\item{beta_param}{The parameterisation of the environmental covariate +effects, by default \code{"unstruct"}. See details for further information.} \item{prior}{Set of prior specifications from call to \code{\link[=jsdm_prior]{jsdm_prior()}}} \item{...}{Arguments passed to jsdm_sim_data} } \description{ -The \code{jsdm_sim_data} function can simulate data with either a multivariate -generalised mixed model (MGLMM) or a generalised linear latent variable model -(GLLVM). The \code{gllvm_sim_data} and \code{mglmm_sim_data} are aliases for -\code{jsdm_sim_data} that set \code{method} to \code{"gllvm"} and \code{"mglmm"} -respectively. +The \code{jsdm_sim_data} function can simulate data with either a +multivariate generalised mixed model (MGLMM) or a generalised linear latent +variable model (GLLVM). The \code{gllvm_sim_data} and \code{mglmm_sim_data} +are aliases for \code{jsdm_sim_data} that set \code{method} to \code{"gllvm"} +and \code{"mglmm"} respectively. } \details{ This simulates data based on a joint species distribution model with -either a generalised linear latent variable model approach or a multivariate -generalised linear mixed model approach. +either a generalised linear latent variable model approach or a +multivariate generalised linear mixed model approach. Models can be fit with or without "measured predictors", and if measured predictors are included then the species have species-specific parameter estimates. These can either be simulated completely independently, or have -information pooled across species. If information is pooled this can be modelled -as either a random draw from some mean and standard deviation or species -covariance can be modelled together (this will be the covariance used in the -overall model if the method used has covariance). - -Environmental covariate effects (\code{"betas"}) can be parameterised in two -ways. With the \code{"cor"} parameterisation all covariate effects are assumed -to be constrained by a correlation matrix between the covariates. With the -\code{"unstruct"} parameterisation all covariate effects are assumed to draw -from a simple distribution with no correlation structure. Both parameterisations -can be modified using the prior object. +information pooled across species. If information is pooled this can be +modelled as either a random draw from some mean and standard deviation or +species covariance can be modelled together (this will be the covariance +used in the overall model if the method used has covariance). + +Environmental covariate effects (\code{"betas"}) can be parameterised in +two ways. With the \code{"cor"} parameterisation all covariate effects are +assumed to be constrained by a correlation matrix between the covariates. +With the \code{"unstruct"} parameterisation all covariate effects are +assumed to draw from a simple distribution with no correlation structure. +Both parameterisations can be modified using the prior object. } \section{Functions}{ \itemize{ diff --git a/man/posterior_predict.jsdmStanFit.Rd b/man/posterior_predict.jsdmStanFit.Rd index fbc39e8..ca501f3 100644 --- a/man/posterior_predict.jsdmStanFit.Rd +++ b/man/posterior_predict.jsdmStanFit.Rd @@ -12,6 +12,7 @@ ndraws = NULL, draw_ids = NULL, list_index = "draws", + Ntrials = NULL, ... ) } @@ -32,6 +33,10 @@ number of samples.} \item{list_index}{Whether to return the output list indexed by the number of draws (default), species, or site.} +\item{Ntrials}{For the binomial distribution the number of trials, given as +either a single integer which is assumed to be constant across sites or as +a site-length vector of integers.} + \item{...}{Currently unused} } \value{ diff --git a/man/stan_gllvm.Rd b/man/stan_gllvm.Rd index 1adea80..37ede1f 100644 --- a/man/stan_gllvm.Rd +++ b/man/stan_gllvm.Rd @@ -43,7 +43,8 @@ Y, X, N, S, K, and site_intercept. See output of \code{\link[=jsdm_sim_data]{jsd example of how this can be formatted.} \item{family}{The response family for the model, required to be one of -\code{"gaussian"}, \code{"bernoulli"}, \code{"poisson"} or \code{"neg_binomial"}} +\code{"gaussian"}, \code{"bernoulli"}, \code{"poisson"}, \code{"binomial"} +or \code{"neg_binomial"}} \item{site_intercept}{Whether a site intercept should be included, potential values \code{"none"} (no site intercept), \code{"grouped"} (a site intercept diff --git a/man/stan_jsdm.Rd b/man/stan_jsdm.Rd index 10ee733..478f0f1 100644 --- a/man/stan_jsdm.Rd +++ b/man/stan_jsdm.Rd @@ -20,6 +20,7 @@ stan_jsdm(X, ...) prior = jsdm_prior(), site_groups = NULL, beta_param = "unstruct", + Ntrials = NULL, save_data = TRUE, iter = 4000, log_lik = TRUE, @@ -47,7 +48,8 @@ Y, X, N, S, K, and site_intercept. See output of \code{\link[=jsdm_sim_data]{jsd example of how this can be formatted.} \item{family}{The response family for the model, required to be one of -\code{"gaussian"}, \code{"bernoulli"}, \code{"poisson"} or \code{"neg_binomial"}} +\code{"gaussian"}, \code{"bernoulli"}, \code{"poisson"}, \code{"binomial"} +or \code{"neg_binomial"}} \item{site_intercept}{Whether a site intercept should be included, potential values \code{"none"} (no site intercept), \code{"grouped"} (a site intercept @@ -64,6 +66,10 @@ per site} \item{beta_param}{The parameterisation of the environmental covariate effects, by default \code{"unstruct"}. See details for further information.} +\item{Ntrials}{For the binomial distribution the number of trials, given as +either a single integer which is assumed to be constant across sites or as +a site-length vector of integers.} + \item{save_data}{If the data used to fit the model should be saved in the model object, by default TRUE.} diff --git a/man/stan_mglmm.Rd b/man/stan_mglmm.Rd index 42d4fc9..a25bb1a 100644 --- a/man/stan_mglmm.Rd +++ b/man/stan_mglmm.Rd @@ -40,7 +40,8 @@ Y, X, N, S, K, and site_intercept. See output of \code{\link[=jsdm_sim_data]{jsd example of how this can be formatted.} \item{family}{The response family for the model, required to be one of -\code{"gaussian"}, \code{"bernoulli"}, \code{"poisson"} or \code{"neg_binomial"}} +\code{"gaussian"}, \code{"bernoulli"}, \code{"poisson"}, \code{"binomial"} +or \code{"neg_binomial"}} \item{site_intercept}{Whether a site intercept should be included, potential values \code{"none"} (no site intercept), \code{"grouped"} (a site intercept diff --git a/man/update.jsdmStanFit.Rd b/man/update.jsdmStanFit.Rd index 168f547..c167b5f 100644 --- a/man/update.jsdmStanFit.Rd +++ b/man/update.jsdmStanFit.Rd @@ -4,7 +4,15 @@ \alias{update.jsdmStanFit} \title{Update a jsdmStanFit model object with new data or Stan arguments} \usage{ -\method{update}{jsdmStanFit}(object, newY = NULL, newX = NULL, newD = NULL, save_data = TRUE, ...) +\method{update}{jsdmStanFit}( + object, + newY = NULL, + newX = NULL, + newD = NULL, + newNtrials = NULL, + save_data = TRUE, + ... +) } \arguments{ \item{object}{The jsdmStanFit model object} @@ -15,6 +23,9 @@ \item{newD}{New number of latent variables, by default \code{NULL}} +\item{newNtrials}{New number of trials (binomial model only), by default +\code{NULL}} + \item{save_data}{Whether to save the data in the jsdmStanFit object, by default \code{TRUE}} diff --git a/tests/testthat/test-posterior_predict.R b/tests/testthat/test-posterior_predict.R index 4cc895e..6fd3697 100644 --- a/tests/testthat/test-posterior_predict.R +++ b/tests/testthat/test-posterior_predict.R @@ -122,3 +122,31 @@ test_that("posterior_(lin)pred works with mglmm and negbin", { expect_false(any(sapply(negb_pred2, anyNA))) expect_false(any(sapply(negb_pred2, function(x) x < 0))) }) + +bino_sim_data <- gllvm_sim_data(N = 100, S = 9, K = 2, family = "binomial", + site_intercept = "ungrouped", D = 2, + Ntrials = 20) +bino_pred_data <- matrix(rnorm(100 * 2), nrow = 100) +colnames(bino_pred_data) <- c("V1", "V2") +suppressWarnings(bino_fit <- stan_mglmm( + dat_list = bino_sim_data, family = "binomial", + refresh = 0, chains = 2, iter = 500 +)) +test_that("posterior_(lin)pred works with gllvm and bino", { + bino_pred <- posterior_predict(bino_fit, ndraws = 100) + + expect_length(bino_pred, 100) + expect_false(any(sapply(bino_pred, anyNA))) + expect_false(any(sapply(bino_pred, function(x) x < 0))) + expect_false(any(sapply(bino_pred, function(x) x > 20))) + + bino_pred2 <- posterior_predict(bino_fit, + newdata = bino_pred_data, Ntrials = 16, + ndraws = 50, list_index = "species" + ) + + expect_length(bino_pred2, 9) + expect_false(any(sapply(bino_pred2, anyNA))) + expect_false(any(sapply(bino_pred2, function(x) x < 0))) + expect_false(any(sapply(bino_pred2, function(x) x > 16))) +}) diff --git a/tests/testthat/test-sim_data_funs.R b/tests/testthat/test-sim_data_funs.R index 887ebd4..10793ac 100644 --- a/tests/testthat/test-sim_data_funs.R +++ b/tests/testthat/test-sim_data_funs.R @@ -18,6 +18,11 @@ test_that("gllvm_sim_data errors with bad inputs", { "Grouped site intercept not supported" ) + expect_error( + gllvm_sim_data(N = 200, S = 8, D = 2, family = "binomial", Ntrials = "1"), + "Ntrials must be a positive integer" + ) + }) test_that("gllvm_sim_data returns a list of correct length", { @@ -51,6 +56,16 @@ test_that("mglmm_sim_data errors with bad inputs", { site_intercept = "grouped"), "Grouped site intercept not supported" ) + + expect_error( + mglmm_sim_data(N = 100, S = 5, family = "binomial"), + "Number of trials must be specified" + ) + + expect_error( + mglmm_sim_data(N = 50, S = 8, family = "binomial", Ntrials = c(1,3)), + "Ntrials must be of length" + ) }) @@ -66,13 +81,19 @@ test_that("mglmm_sim_data returns a list of correct length", { expect_named(mglmm_sim, c( "Y", "pars", "N", "S", "D", "K", "X" )) + gllvm_sim <- jsdm_sim_data(100,12,D=2,family = "binomial", method = "gllvm", + Ntrials = 19) + expect_named(gllvm_sim, c( + "Y", "pars", "N", "S", "D", "K", "X", "Ntrials" + )) + expect_length(gllvm_sim$Ntrials, 100) }) test_that("jsdm_sim_data returns all appropriate pars", { mglmm_sim <- jsdm_sim_data(100,12,family = "gaussian", method = "mglmm", - beta_param = "cor") + beta_param = "cor", K = 3) expect_named(mglmm_sim$pars, c( - "betas","sigmas_preds","z_preds","sigmas_species", + "betas","sigmas_preds","z_preds","cor_preds","sigmas_species", "cor_species","z_species","sigma" )) gllvm_sim <- jsdm_sim_data(100,12,D=2,family = "neg_bin", method = "gllvm", @@ -83,6 +104,7 @@ test_that("jsdm_sim_data returns all appropriate pars", { )) + }) test_that("prior specification works", { diff --git a/tests/testthat/test-stan_jsdm.R b/tests/testthat/test-stan_jsdm.R index 0741dee..6dfab1f 100644 --- a/tests/testthat/test-stan_jsdm.R +++ b/tests/testthat/test-stan_jsdm.R @@ -43,27 +43,6 @@ test_that("summary works", { expect_match(rownames(mglmm_summ), "beta", all = TRUE) }) -test_that("update works", { - mglmm_data <- mglmm_sim_data(N = 20, S = 5, family = "gaussian", K = 2) - suppressWarnings(mglmm_fit2 <- update(mglmm_fit, - newY = mglmm_data$Y, - newX = mglmm_data$X, - refresh = 0, - chains = 2, iter = 200 - )) - suppressWarnings(mglmm_fit3 <- update(mglmm_fit, - refresh = 0, iter = 100 - )) - - expect_s3_class(mglmm_fit2, "jsdmStanFit") - expect_s3_class(mglmm_fit3, "jsdmStanFit") - - jsdm_empty <- jsdmStanFit_empty() - expect_error( - update(jsdm_empty), - "Update requires the original data to be saved in the model object" - ) -}) test_that("nuts_params works", { expect_named(nuts_params(mglmm_fit), c("Chain", "Iteration", "Parameter", "Value")) @@ -126,6 +105,23 @@ test_that("stan_gllvm fails with wrong inputs", { family = "bern", D = -1), "Must have at least one latent variable" ) + + expect_error( + suppressMessages(stan_gllvm(Y = matrix(sample.int(10,100, + replace = TRUE), + nrow = 20), + X = matrix(rnorm(60), nrow = 20), + family = "binomial", D = 2, Ntrials = "a")), + "Ntrials must be a positive integer" + ) + + expect_error( + suppressMessages(stan_gllvm(Y = matrix(sample.int(10,100, + replace = TRUE), nrow = 20), + X = matrix(rnorm(60), nrow = 20), + family = "binomial", D = 2, Ntrials = c(1,3))), + "Ntrials must be of length" + ) }) test_that("stan_gllvm returns right type of object", { @@ -165,6 +161,17 @@ test_that("stan_gllvm returns right type of object", { )) expect_s3_class(gllvm_fit, "jsdmStanFit") + + # binomial + gllvm_data <- gllvm_sim_data(N = 20, S = 8, D = 2, K = 2, family = "binomial", + Ntrials = 20) + suppressWarnings(gllvm_fit <- stan_gllvm( + Y = as.data.frame(gllvm_data$Y), X = as.data.frame(gllvm_data$X), + D = gllvm_data$D, refresh = 0, chains = 2, iter = 200, + family = "binomial", Ntrials = 20 + )) + + expect_s3_class(gllvm_fit, "jsdmStanFit") }) # MGLMM tests ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -187,6 +194,15 @@ test_that("stan_mglmm fails with wrong inputs", { ), "Y matrix is not composed of integers" ) + expect_error( + stan_mglmm( + Y = matrix(sample.int(100,500,replace=TRUE), nrow = 100), + X = data.frame(V1 = rnorm(100), + V2 = rnorm(100)), + family = "binomial" + ), + "Number of trials must be specified" + ) }) test_that("stan_mglmm returns right type of object", { @@ -216,6 +232,10 @@ test_that("stan_mglmm returns right type of object", { chains = 2 )) + expect_error(stan_jsdm(dat_list = mglmm_data, family = "binomial", + method = "mglmm"), + "Binomial models require Ntrials") + expect_s3_class(mglmm_fit, "jsdmStanFit") # neg bin @@ -227,6 +247,20 @@ test_that("stan_mglmm returns right type of object", { )) expect_s3_class(mglmm_fit, "jsdmStanFit") + + # binomial + mglmm_data <- mglmm_sim_data(N = 51, S = 6, K = 2, family = "binomial", + Ntrials=sample.int(20,51,replace = TRUE)) + suppressWarnings(mglmm_fit <- stan_mglmm( + dat_list = mglmm_data, + family = "binomial", + refresh = 0, chains = 2, iter = 200 + )) + + expect_s3_class(mglmm_fit, "jsdmStanFit") + + # binomial + }) # stan_jsdm site_intercept tests ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -235,11 +269,6 @@ mglmm_data <- mglmm_sim_data(N = 100, S = 8, family = "gaussian", K = 3, df <- as.data.frame(mglmm_data$X) grps <- rep(1:20, each = 5) -gllvm_data <- gllvm_sim_data(N = 100, S = 8, family = "bern", D = 3, - site_intercept = "ungrouped") -gllvm_data$grps <- rep(1:20, each = 5) -gllvm_data$ngrp <- 20 - test_that("site intercept errors correctly", { expect_error(stan_mglmm(~ V1 + V2, data = df, Y = mglmm_data$Y, site_intercept = "fauh",family = "gaussian")) @@ -270,17 +299,3 @@ test_that("site intercept models run", { expect_s3_class(mglmm_fit, "jsdmStanFit") }) - -test_that("site_intercept models update", { - suppressWarnings(gllvm_fit <- stan_gllvm(X = NULL, dat_list = gllvm_data, - site_intercept = "grouped", - family = "bern", - refresh = 0, chains = 1, iter = 200 - )) - expect_s3_class(gllvm_fit, "jsdmStanFit") - - suppressWarnings(gllvm_fit2 <- update(gllvm_fit, newD = 2, - refresh = 0, chains = 1, iter = 200 - )) - expect_s3_class(gllvm_fit2, "jsdmStanFit") -}) diff --git a/tests/testthat/test-update.R b/tests/testthat/test-update.R new file mode 100644 index 0000000..1bee5e1 --- /dev/null +++ b/tests/testthat/test-update.R @@ -0,0 +1,64 @@ +mglmm_data <- mglmm_sim_data(N = 30, S = 8, family = "gaussian", K = 3) +df <- as.data.frame(mglmm_data$X) + +suppressWarnings(mglmm_fit <- stan_jsdm(~ V1 + V2 + V3, + data = df, Y = mglmm_data$Y, + family = "gaussian", method = "mglmm", + refresh = 0, chains = 2, iter = 200 +)) +test_that("update works", { + mglmm_data <- mglmm_sim_data(N = 20, S = 5, family = "gaussian", K = 2) + suppressWarnings(mglmm_fit2 <- update(mglmm_fit, + newY = mglmm_data$Y, + newX = mglmm_data$X, + refresh = 0, + chains = 2, iter = 200 + )) + suppressWarnings(mglmm_fit3 <- update(mglmm_fit, + refresh = 0, iter = 100 + )) + + expect_s3_class(mglmm_fit2, "jsdmStanFit") + expect_s3_class(mglmm_fit3, "jsdmStanFit") + + jsdm_empty <- jsdmStanFit_empty() + expect_error( + update(jsdm_empty), + "Update requires the original data to be saved in the model object" + ) +}) + +gllvm_data <- gllvm_sim_data(N = 100, S = 8, family = "bern", D = 3, + site_intercept = "ungrouped") +gllvm_data$grps <- rep(1:20, each = 5) +gllvm_data$ngrp <- 20 + +test_that("site_intercept models update", { + suppressWarnings(gllvm_fit <- stan_gllvm(X = NULL, dat_list = gllvm_data, + site_intercept = "grouped", + family = "bern", + refresh = 0, chains = 1, iter = 200 + )) + expect_s3_class(gllvm_fit, "jsdmStanFit") + + suppressWarnings(gllvm_fit2 <- update(gllvm_fit, newD = 2, + refresh = 0, chains = 1, iter = 200 + )) + expect_s3_class(gllvm_fit2, "jsdmStanFit") +}) + +gllvm_data <- gllvm_sim_data(N = 100, S = 8, family = "binomial", D = 3, + site_intercept = "ungrouped", Ntrials = 20) + +test_that("binomial models update", { + suppressWarnings(gllvm_fit <- stan_gllvm(dat_list = gllvm_data, + family = "binomial", + refresh = 0, chains = 1, iter = 200 + )) + expect_s3_class(gllvm_fit, "jsdmStanFit") + + suppressWarnings(gllvm_fit2 <- update(gllvm_fit, newD = 2, newNtrials = 30, + refresh = 0, chains = 1, iter = 200 + )) + expect_s3_class(gllvm_fit2, "jsdmStanFit") +})