Skip to content

Commit

Permalink
Merge pull request #318 from stan-dev/discrete_PIT_in_ppc_pit_ecdf
Browse files Browse the repository at this point in the history
Limit maximum number of evaluation points in ppc_pit_ecdf functions by default to 1000.
  • Loading branch information
jgabry authored Feb 7, 2024
2 parents c97cd58 + 5bdbccc commit 680b7ae
Show file tree
Hide file tree
Showing 9 changed files with 118 additions and 111 deletions.
2 changes: 1 addition & 1 deletion R/helpers-ppc.R
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ adjust_gamma <- function(N,
abort("Value of 'prob' must be in (0,1).")
}
if (is.null(interpolate_adj)) {
if (K <= 200) {
if (K <= 200 || N < 100) {
interpolate_adj <- FALSE
} else {
interpolate_adj <- TRUE
Expand Down
57 changes: 30 additions & 27 deletions R/ppc-distributions.R
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,11 @@
#' both, depending on the `y_draw` argument.
#' }
#' \item{`ppc_pit_ecdf()`, `ppc_pit_ecdf_grouped()`}{
#' The ECDF of the empirical PIT values of `y` computed with respect to the
#' corresponding `yrep` values. `100 * prob`% central simultaneous confidence
#' intervals are provided to asses if `y` and `yrep` originate from the same
#' distribution. The PIT values can also be provided directly as `pit`.
#' The PIT-ECDF of the empirical PIT values of `y` computed with respect to
#' the corresponding `yrep` values. `100 * prob`% central simultaneous
#' confidence intervals are provided to asses if `y` and `yrep` originate
#' from the same distribution. The PIT values can also be provided directly
#' as `pit`.
#' See Säilynoja et al. (2021) for more details.}
#' }
#'
Expand All @@ -73,8 +74,8 @@
#' # ppc_ecdf_overlay with continuous data (set discrete=TRUE if discrete data)
#' ppc_ecdf_overlay(y, yrep[sample(nrow(yrep), 25), ])
#'
#' # ECDF and ECDF difference plot of the PIT values of y compared to yrep
#' # with 99% simultaneous confidence bands.
#' # PIT-ECDF and PIT-ECDF difference plot of the PIT values of y compared to
#' # yrep with 99% simultaneous confidence bands.
#' ppc_pit_ecdf(y, yrep, prob = 0.99, plot_diff = FALSE)
#' ppc_pit_ecdf(y, yrep, prob = 0.99, plot_diff = TRUE)
#' }
Expand Down Expand Up @@ -107,9 +108,9 @@
#' ppc_ecdf_overlay_grouped(y, yrep[1:25, ], group = group)
#'
#' \donttest{
#' # ECDF difference plots of the PIT values by group
#' # PIT-ECDF plots of the PIT values by group
#' # with 99% simultaneous confidence bands.
#' ppc_pit_ecdf_grouped(y, yrep, group=group, prob=0.99, plot_diff = TRUE)
#' ppc_pit_ecdf_grouped(y, yrep, group=group, prob=0.99)
#' }
#'
#' \donttest{
Expand Down Expand Up @@ -612,7 +613,7 @@ ppc_pit_ecdf <- function(y,
) %>%
unlist()
if (is.null(K)) {
K <- nrow(yrep) + 1
K <- min(nrow(yrep) + 1, 1000)
}
} else {
inform("'pit' specified so ignoring 'y', and 'yrep' if specified.")
Expand All @@ -631,7 +632,7 @@ ppc_pit_ecdf <- function(y,
lims <- ecdf_intervals(gamma = gamma, N = N, K = K)
ggplot() +
aes(
x = 1:K / K,
x = seq(0,1,length.out = K),
y = ecdf(pit)(seq(0, 1, length.out = K)) -
(plot_diff == TRUE) * seq(0, 1, length.out = K),
color = "y"
Expand Down Expand Up @@ -679,7 +680,7 @@ ppc_pit_ecdf_grouped <-
) %>%
unlist()
if (is.null(K)) {
K <- nrow(yrep) + 1
K <- min(nrow(yrep) + 1, 1000)
}
} else {
inform("'pit' specified so ignoring 'y' and 'yrep' if specified.")
Expand All @@ -691,7 +692,7 @@ ppc_pit_ecdf_grouped <-
N_g <- sum(group == g)
adjust_gamma(
N = N_g,
K = min(N_g, K),
K = ifelse(is.null(K), N_g, K),
prob = prob,
interpolate_adj = interpolate_adj
)
Expand All @@ -700,21 +701,23 @@ ppc_pit_ecdf_grouped <-

data <- data.frame(pit = pit, group = group) %>%
group_by(group) %>%
dplyr::group_map(~ data.frame(
ecdf_value = ecdf(.x$pit)(seq(0, 1, length.out = min(nrow(.x), K))),
group = .y[1],
lims_upper = ecdf_intervals(
gamma = gammas[[unlist(.y[1])]],
N = nrow(.x),
K = min(nrow(.x), K)
)$upper[-1] / nrow(.x),
lims_lower = ecdf_intervals(
gamma = gammas[[unlist(.y[1])]],
N = nrow(.x),
K = min(nrow(.x), K)
)$lower[-1] / nrow(.x),
x = seq(0, 1, length.out = min(nrow(.x), K))
)) %>%
dplyr::group_map(
~ data.frame(
ecdf_value = ecdf(.x$pit)(seq(0, 1, length.out = ifelse(is.null(K), nrow(.x), K))),
group = .y[1],
lims_upper = ecdf_intervals(
gamma = gammas[[unlist(.y[1])]],
N = nrow(.x),
K = ifelse(is.null(K), nrow(.x), K)
)$upper[-1] / nrow(.x),
lims_lower = ecdf_intervals(
gamma = gammas[[unlist(.y[1])]],
N = nrow(.x),
K = ifelse(is.null(K), nrow(.x), K)
)$lower[-1] / nrow(.x),
x = seq(0, 1, length.out = ifelse(is.null(K), nrow(.x), K))
)
) %>%
dplyr::bind_rows()

ggplot(data) +
Expand Down
19 changes: 10 additions & 9 deletions man-roxygen/args-pit-ecdf.R
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
#' @param K An optional integer defining the number of equally spaced evaluation
#' points for the ECDF. Reducing K when using `interpolate_adj = FALSE` makes
#' computing the confidence bands faster. For `ppc_pit_ecdf` and
#' `ppc_pit_ecdf_grouped`, defaults to `ncol(yrep) + 1`, or `length(pit)` if PIT
#' values are supplied. For `mcmc_rank_ecdf` defaults to the number of
#' iterations per chain in `x`.
#' points for the PIT-ECDF. Reducing K when using `interpolate_adj = FALSE`
#' makes computing the confidence bands faster. For `ppc_pit_ecdf` and
#' `ppc_pit_ecdf_grouped`, if PIT values are supplied, defaults to
#' `length(pit)`, otherwise yrep determines the maximum accuracy of the
#' estimated PIT values and `Ḱ` is set to `min(nrow(yrep) + 1, 1000)`. For
#' `mcmc_rank_ecdf`, defaults to the number of iterations per chain in `x`.
#' @param prob The desired simultaneous coverage level of the bands around the
#' ECDF. A value in (0,1).
#' @param plot_diff A boolean defining whether to plot the difference between
#' the observed ECDF and the theoretical expectation for uniform PIT values
#' rather than plotting the regular ECDF. The default is `FALSE`, but for
#' large samples we recommend setting `plot_diff=TRUE` as the difference plot
#' will visually show a more dynamic range.
#' the observed PIT- ECDF and the theoretical expectation for uniform PIT
#' values rather than plotting the regular ECDF. The default is `FALSE`, but
#' for large samples we recommend setting `plot_diff=TRUE` as the difference
#' plot will visually show a more dynamic range.
#' @param interpolate_adj A boolean defining if the simultaneous confidence
#' bands should be interpolated based on precomputed values rather than
#' computed exactly. Computing the bands may be computationally intensive and
Expand Down
11 changes: 6 additions & 5 deletions man/MCMC-traces.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

36 changes: 19 additions & 17 deletions man/PPC-distributions.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 8 additions & 8 deletions tests/testthat/_snaps/ppc-distributions/ppc-pit-ecdf-default.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
16 changes: 8 additions & 8 deletions tests/testthat/_snaps/ppc-distributions/ppc-pit-ecdf-diff.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit 680b7ae

Please sign in to comment.