From 414755546305ea2ef06f70145c78712a024e8963 Mon Sep 17 00:00:00 2001 From: jgabry Date: Fri, 13 Dec 2024 14:26:30 -0700 Subject: [PATCH 1/2] Ensure rank overlay plot starts at 0 even if not all bins present fixes #331 --- R/mcmc-traces.R | 13 +++- .../mcmc-rank-overlay-not-all-bins.svg | 63 +++++++++++++++++++ tests/testthat/data-for-mcmc-tests.R | 7 +++ tests/testthat/test-mcmc-traces.R | 6 ++ 4 files changed, 86 insertions(+), 3 deletions(-) create mode 100644 tests/testthat/_snaps/mcmc-traces/mcmc-rank-overlay-not-all-bins.svg diff --git a/R/mcmc-traces.R b/R/mcmc-traces.R index c01bc416..01345b62 100644 --- a/R/mcmc-traces.R +++ b/R/mcmc-traces.R @@ -304,17 +304,24 @@ mcmc_rank_overlay <- function(x, mutate(cut = cut(.data$value_rank, n_bins)) %>% group_by(.data$cut) %>% mutate(bin_start = min(.data$value_rank)) %>% - ungroup() %>% - select(-c("cut")) + ungroup() + # Count how many values fall into each bin per chain & parameter d_bin_counts <- data %>% left_join(histobins, by = "value_rank") %>% count(.data$parameter, .data$chain, .data$bin_start) + # Now ensure that all combinations of parameter, chain, and bin_start exist + # even if no counts are present (https://github.com/stan-dev/bayesplot/issues/331) + all_params_chains <- dplyr::distinct(data, .data$parameter, .data$chain) + all_bins <- dplyr::distinct(histobins, .data$bin_start, .data$cut) + combos <- dplyr::cross_join(all_params_chains, all_bins) + d_bin_counts <- full_join(combos, d_bin_counts, by = c("parameter", "chain", "bin_start")) %>% + mutate(n = dplyr::coalesce(n, 0L)) + # Duplicate the final bin, setting the left edge to the greatest x value, so # that the entire x-axis is used, right_edge <- max(data$value_rank) - d_bin_counts <- d_bin_counts %>% dplyr::filter(.data$bin_start == max(.data$bin_start)) %>% mutate(bin_start = right_edge) %>% diff --git a/tests/testthat/_snaps/mcmc-traces/mcmc-rank-overlay-not-all-bins.svg b/tests/testthat/_snaps/mcmc-traces/mcmc-rank-overlay-not-all-bins.svg new file mode 100644 index 00000000..53f0d82a --- /dev/null +++ b/tests/testthat/_snaps/mcmc-traces/mcmc-rank-overlay-not-all-bins.svg @@ -0,0 +1,63 @@ + + + + + + + + + + + + + + + + + + + + + + + + + +0 +25 +50 +75 +100 + + + + + + + + + + + +0 +500 +1000 +1500 +2000 +Rank +Chain + + +1 +2 +mcmc_rank_overlay (not all bins) + + diff --git a/tests/testthat/data-for-mcmc-tests.R b/tests/testthat/data-for-mcmc-tests.R index 0c7298c3..1136cefe 100644 --- a/tests/testthat/data-for-mcmc-tests.R +++ b/tests/testthat/data-for-mcmc-tests.R @@ -73,4 +73,11 @@ vdiff_dframe_chains_lp <- vdiff_dframe_chains_divergences vdiff_dframe_chains_lp$Parameter <- NULL vdiff_dframe_chains_lp$Value <- runif(2000, -100, -50) +vdiff_dframe_rank_overlay_bins_test <- posterior::as_draws_df( + list( + list(theta = -2 + 0.003 * 1:1000 + stats::arima.sim(list(ar = 0.7), n = 1000, sd = 0.5)), + list(theta = 1 + -0.01 * 1:1000 + stats::arima.sim(list(ar = 0.7), n = 1000, sd = 0.5)) + ) +) + set.seed(seed = NULL) diff --git a/tests/testthat/test-mcmc-traces.R b/tests/testthat/test-mcmc-traces.R index d7ba2b39..62d46c88 100644 --- a/tests/testthat/test-mcmc-traces.R +++ b/tests/testthat/test-mcmc-traces.R @@ -154,6 +154,9 @@ test_that("mcmc_rank_overlay renders correctly", { n_bins = 4 ) + # https://github.com/stan-dev/bayesplot/issues/331 + p_not_all_bins_exist <- mcmc_rank_overlay(vdiff_dframe_rank_overlay_bins_test) + vdiffr::expect_doppelganger("mcmc_rank_overlay (default)", p_base) vdiffr::expect_doppelganger( "mcmc_rank_overlay (reference line)", @@ -164,6 +167,9 @@ test_that("mcmc_rank_overlay renders correctly", { "mcmc_rank_overlay (wide bins)", p_one_param_wide_bins ) + + # https://github.com/stan-dev/bayesplot/issues/331 + vdiffr::expect_doppelganger("mcmc_rank_overlay (not all bins)", p_not_all_bins_exist) }) test_that("mcmc_rank_hist renders correctly", { From 2227e7d19bd6eb511a7a909a0db1694459b0243b Mon Sep 17 00:00:00 2001 From: jgabry Date: Sat, 14 Dec 2024 09:29:06 -0700 Subject: [PATCH 2/2] Use expand.grid + left_join Co-Authored-By: Maximilian Scholz <6530123+sims1253@users.noreply.github.com> --- R/mcmc-traces.R | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/R/mcmc-traces.R b/R/mcmc-traces.R index 01345b62..571a6ce2 100644 --- a/R/mcmc-traces.R +++ b/R/mcmc-traces.R @@ -313,11 +313,15 @@ mcmc_rank_overlay <- function(x, # Now ensure that all combinations of parameter, chain, and bin_start exist # even if no counts are present (https://github.com/stan-dev/bayesplot/issues/331) - all_params_chains <- dplyr::distinct(data, .data$parameter, .data$chain) - all_bins <- dplyr::distinct(histobins, .data$bin_start, .data$cut) - combos <- dplyr::cross_join(all_params_chains, all_bins) - d_bin_counts <- full_join(combos, d_bin_counts, by = c("parameter", "chain", "bin_start")) %>% - mutate(n = dplyr::coalesce(n, 0L)) + all_combos <- dplyr::as_tibble(expand.grid( + parameter = unique(data$parameter), + chain = unique(data$chain), + bin_start = unique(histobins$bin_start), + stringsAsFactors = FALSE + )) + d_bin_counts <- all_combos %>% + left_join(d_bin_counts, by = c("parameter", "chain", "bin_start")) %>% + mutate(n = dplyr::if_else(is.na(n), 0L, n)) # Duplicate the final bin, setting the left edge to the greatest x value, so # that the entire x-axis is used,