From 72da5d10e9f64c1f41e99b3358736fa666fe704a Mon Sep 17 00:00:00 2001 From: Joe Brown Date: Thu, 26 Oct 2023 13:41:40 -0400 Subject: [PATCH 01/17] testing out code correction --- R/iterate_model.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/iterate_model.R b/R/iterate_model.R index 5972b8f..0c9768c 100644 --- a/R/iterate_model.R +++ b/R/iterate_model.R @@ -154,9 +154,9 @@ iterate_model <- function(core, params, save_years = NULL, save_vars = NULL) { # Create a placeholder dataframe for the run if there's no data collected if (length(result_list) < i) { dat <- data.frame( - scenario = core$name, + scenario = rep(core$name, each = length(save_years)), year = save_years, - variable = save_vars, + variable = rep(save_vars, each = length(save_years)), value = NA, units = NA, run_number = i From 37ef7e7701ce0eff06fce05b89d486dc866f0baa Mon Sep 17 00:00:00 2001 From: Joe Brown Date: Tue, 28 Nov 2023 09:04:08 -0500 Subject: [PATCH 02/17] Update iterate_model.R --- R/iterate_model.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/iterate_model.R b/R/iterate_model.R index 0c9768c..8bb56bb 100644 --- a/R/iterate_model.R +++ b/R/iterate_model.R @@ -147,7 +147,7 @@ iterate_model <- function(core, params, save_years = NULL, save_vars = NULL) { }, # if Hector crashes because of parameter combinations, send error message error = function(e) { - message("An error occurred") + message("An error occurred", i) } ) From c9c128e851febb94554f388c65dcd9379404ddb3 Mon Sep 17 00:00:00 2001 From: Joe Brown Date: Tue, 6 Feb 2024 11:21:05 -0500 Subject: [PATCH 03/17] Updating score_bayesian to accept sensitivity argument The point of this argument is to provide a multiplier to sd(obs_data) to compute how sensitive the likelihood reuslt is to increasing rmse values. --- R/score_bayesian.R | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/R/score_bayesian.R b/R/score_bayesian.R index bcca51d..b4814c6 100644 --- a/R/score_bayesian.R +++ b/R/score_bayesian.R @@ -32,7 +32,7 @@ #' #' # scoring with a decay rate of 2 #' score_bayesian(mat, sigma = 2) -score_bayesian <- function(m, sigma = NULL) { +score_bayesian <- function(m, sigma = NULL, sensitivity = NULL) { # initialize vector to store RMSE values from loop rmse_vector <- numeric() @@ -62,26 +62,31 @@ score_bayesian <- function(m, sigma = NULL) { rmse_vector[i] <- rmse_vals } - # Compute sigma if not provided by the user - if (is.null(sigma)) { - sigma <- sd(obs_data) # Calculate sigma as the standard deviation of RMSE values + # Compute likelihood sensitivity using multiplier provided by the user + if (is.null(sensitivity)) { + sensitivity_value <- sd(obs_data) # Calculate sensitivity as the standard deviation of the observed data + } else { + sensitivity_value <- sensitivity * sd(obs_data) + if (length(sigma) != 1) + stop( "Sensitivity must be a single value.") } + # Check if sigma is negative, if so throw error - if (!is.na(sigma) && sigma < 0) warning("sigma value cannot be negative.") + if (any(!is.na(sensitivity) & (sensitivity < 0))) stop("Sensitivity cannot be negative.") # Compute likelihood using normal distribution likelihood function. # This is the probability of observing the modeled data given the # observed data. # Remove first value when calling rmse_vector (first values should be NA because # it represented obs_data) - likelihood <- exp(-0.5 * ((rmse_vector[-1]) / sigma)^2) + likelihood <- exp(-0.5 * ((rmse_vector[-1]) / sensitivity_value)^2) # Computing unnormalized posterior scores # Currently only computing posterior scores using uniform prior. # uniform prior is calculated as 1/length(likelihood) which is # the same as 1 / # of runs. - posterior <- likelihood * (1 / length(likelihood)) + posterior <- likelihood * (1 / length(likelihood)) ### DONT THINK THIS IS A NECESSARY STEP MAY JUST WANT TO RETURN "LIKELIHOOD" ### # Create data frame of results - get run_numbers from the list where RMSE values # are computed (names of the split_list components) From 5b58e4f81bf7de3bdee78e69760b650e98610431 Mon Sep 17 00:00:00 2001 From: Joe Brown Date: Tue, 6 Feb 2024 13:03:10 -0500 Subject: [PATCH 04/17] Update to RMSE_calc function adding sigma argument to apply error to calculation Adding sigma argument to the function. Sigma will communicate how to incorporate error terms to the RMSE calculation - a constant error term will be applied if a single value is provided. A time-varying error term will be applied if a vector is provided (vector mu = length of observed data). --- R/RMSE_calc.R | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/R/RMSE_calc.R b/R/RMSE_calc.R index 82c34f5..996dcc9 100644 --- a/R/RMSE_calc.R +++ b/R/RMSE_calc.R @@ -5,6 +5,9 @@ #' #' @param x A vector of modeled data values #' @param y A vector of observed data values +#' @param sigma A single value or vector of error terms the same length as y. +#' A single value will apply a constant error term. User can provide a vector +#' of error terms to incorporate time-varying error into the RMSE calculation. #' #' @return Returns a vector of RMSE values #' @export @@ -13,15 +16,26 @@ #' x <- c(1:5) #' y <- c(5:9) #' +#' # constant error #' RMSE_calc(x, y) -RMSE_calc <- function(x, y) { +RMSE_calc <- function(x, y, sigma) { # Check if all values are NA if (all(is.na(x)) || all(is.na(y))) { return(NA) } + # set sigma to default if not provided by the user + if( is.null(sigma)) { + sigma = sd(y) # if sigma is NULL compute as sd of observed data (y) + } else { + # if user provides sigma values it should be length = 1 or same length as the obs data. + required_length <- length(y) + if(length(sigma) != 1 && length(sigma) != required_length) + stop(paste("Length of sigma must be a single value or a vector matching the length of ", required_length, ".", sep = "")) + } + # compute RMSE - rmse_vals <- sqrt(mean((x - y)^2)) + rmse_vals <- sqrt(mean((x - y)^2) / sigma) # return a vector of RMSE values return(rmse_vals) From 9a199a6be31e929de4ee8abeed76bb7d4d0bf48f Mon Sep 17 00:00:00 2001 From: Joe Brown Date: Tue, 6 Feb 2024 13:17:13 -0500 Subject: [PATCH 05/17] Update to score_bayesian - adding sigma to RMSE_calc within score_bayesian Adding code to pass the sigma identified in score_bayesian to RMSE_calc. --- R/score_bayesian.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/score_bayesian.R b/R/score_bayesian.R index b4814c6..2c7cc3b 100644 --- a/R/score_bayesian.R +++ b/R/score_bayesian.R @@ -55,7 +55,7 @@ score_bayesian <- function(m, sigma = NULL, sensitivity = NULL) { if (all(is.na(model_data))) { rmse_vals <- NA # Set RMSE to NA for this column } else { - rmse_vals <- RMSE_calc(obs_data, model_data) + rmse_vals <- RMSE_calc(obs_data, model_data, sigma = sigma) } # vector of RMSE value for each model iteration From bcd6723c2784727884c8ed8635eb1ca38e7fda84 Mon Sep 17 00:00:00 2001 From: Joe Brown Date: Tue, 6 Feb 2024 13:31:19 -0500 Subject: [PATCH 06/17] changing 'sigma' to 'sensitivity' for example in documentation --- R/score_bayesian.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/score_bayesian.R b/R/score_bayesian.R index 2c7cc3b..3cb1c42 100644 --- a/R/score_bayesian.R +++ b/R/score_bayesian.R @@ -31,7 +31,7 @@ #' mat <- matrix(data = 1:15, nrow = 5, ncol = 3) #' #' # scoring with a decay rate of 2 -#' score_bayesian(mat, sigma = 2) +#' score_bayesian(mat, sensitivity = 2) score_bayesian <- function(m, sigma = NULL, sensitivity = NULL) { # initialize vector to store RMSE values from loop rmse_vector <- numeric() From 4a0fc7943e90e9c9259a89c861b4e34ed71d64bb Mon Sep 17 00:00:00 2001 From: Joe Brown Date: Tue, 6 Feb 2024 16:56:53 -0500 Subject: [PATCH 07/17] Update to RMSE_calc if no sigma provided calcualte RMSE without error (previous method) If the user does not provide sigma value or vector, calculate RMSE how it was calculated in the previous version: rmse_vals <- sqrt(mean((x - y)^2)) --- R/RMSE_calc.R | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/R/RMSE_calc.R b/R/RMSE_calc.R index 996dcc9..a242f4e 100644 --- a/R/RMSE_calc.R +++ b/R/RMSE_calc.R @@ -18,7 +18,7 @@ #' #' # constant error #' RMSE_calc(x, y) -RMSE_calc <- function(x, y, sigma) { +RMSE_calc <- function(x, y, sigma = NULL) { # Check if all values are NA if (all(is.na(x)) || all(is.na(y))) { return(NA) @@ -26,16 +26,17 @@ RMSE_calc <- function(x, y, sigma) { # set sigma to default if not provided by the user if( is.null(sigma)) { - sigma = sd(y) # if sigma is NULL compute as sd of observed data (y) + # if sigma = NULL compute RMSE without error term + rmse_vals <- sqrt(mean((x - y)^2)) } else { - # if user provides sigma values it should be length = 1 or same length as the obs data. + # if user provides sigma values compute RMSE with error. + # sigma should be length = 1 or same length as the obs data. required_length <- length(y) if(length(sigma) != 1 && length(sigma) != required_length) stop(paste("Length of sigma must be a single value or a vector matching the length of ", required_length, ".", sep = "")) - } - # compute RMSE - rmse_vals <- sqrt(mean((x - y)^2) / sigma) + rmse_vals <- sqrt(mean((x - y)^2) / sigma) + } # return a vector of RMSE values return(rmse_vals) From ba020caf16266e655d83390b01f683f19ffd900a Mon Sep 17 00:00:00 2001 From: Joe Brown Date: Wed, 7 Feb 2024 12:36:15 -0500 Subject: [PATCH 08/17] changes to RMSE_calc and score_bayesian to produce the time-varying error in an accurate way Okay in this change: 1. fixing the way that the time-varying is calculated. 2. if the user does not provide sigma -> assume homoscedasticity and compute sigma as sd(obs_data). 3. if use provides sigma use it. A vector should be used to if heteroscedasticity is assumed. 4. changes the order of arguments in function to make computation consistent between RMSE_calc and score_bayesian. --- R/RMSE_calc.R | 11 ++++++----- R/score_bayesian.R | 16 ++++++++-------- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/R/RMSE_calc.R b/R/RMSE_calc.R index a242f4e..5ea5987 100644 --- a/R/RMSE_calc.R +++ b/R/RMSE_calc.R @@ -26,16 +26,17 @@ RMSE_calc <- function(x, y, sigma = NULL) { # set sigma to default if not provided by the user if( is.null(sigma)) { - # if sigma = NULL compute RMSE without error term - rmse_vals <- sqrt(mean((x - y)^2)) + # if sigma = NULL compute RMSE with sd(y) + sigma = sd(y) + rmse_vals <- sqrt(mean(((x - y) / sigma)^2)) } else { # if user provides sigma values compute RMSE with error. # sigma should be length = 1 or same length as the obs data. required_length <- length(y) - if(length(sigma) != 1 && length(sigma) != required_length) - stop(paste("Length of sigma must be a single value or a vector matching the length of ", required_length, ".", sep = "")) + if( length(sigma) != 1 && length(sigma) != required_length) + stop( paste("Length of sigma must be a single value or a vector matching the length of ", required_length, ".", sep = "")) - rmse_vals <- sqrt(mean((x - y)^2) / sigma) + rmse_vals <- sqrt(mean(((x - y) / sigma)^2)) } # return a vector of RMSE values diff --git a/R/score_bayesian.R b/R/score_bayesian.R index 3cb1c42..983c0c1 100644 --- a/R/score_bayesian.R +++ b/R/score_bayesian.R @@ -55,24 +55,24 @@ score_bayesian <- function(m, sigma = NULL, sensitivity = NULL) { if (all(is.na(model_data))) { rmse_vals <- NA # Set RMSE to NA for this column } else { - rmse_vals <- RMSE_calc(obs_data, model_data, sigma = sigma) + rmse_vals <- RMSE_calc(model_data, obs_data, sigma = sigma) } # vector of RMSE value for each model iteration - rmse_vector[i] <- rmse_vals + rmse_vector[i - 1] <- rmse_vals } # Compute likelihood sensitivity using multiplier provided by the user if (is.null(sensitivity)) { - sensitivity_value <- sd(obs_data) # Calculate sensitivity as the standard deviation of the observed data + sensitivity_value <- sd(rmse_vector) # Calculate sensitivity as the standard deviation of the RMSE results } else { - sensitivity_value <- sensitivity * sd(obs_data) - if (length(sigma) != 1) - stop( "Sensitivity must be a single value.") + if (length(sensitivity) != 1) + stop( "Sensitivity must be a single value.") + sensitivity_value <- sensitivity * sd(rmse_vector) } - # Check if sigma is negative, if so throw error + # Check if sensitivity is negative, if so throw error if (any(!is.na(sensitivity) & (sensitivity < 0))) stop("Sensitivity cannot be negative.") # Compute likelihood using normal distribution likelihood function. @@ -80,7 +80,7 @@ score_bayesian <- function(m, sigma = NULL, sensitivity = NULL) { # observed data. # Remove first value when calling rmse_vector (first values should be NA because # it represented obs_data) - likelihood <- exp(-0.5 * ((rmse_vector[-1]) / sensitivity_value)^2) + likelihood <- exp(-0.5 * (rmse_vector / sensitivity_value)^2) # Computing unnormalized posterior scores # Currently only computing posterior scores using uniform prior. From 6e6836b16f98786ae88e47ca56909b664357296f Mon Sep 17 00:00:00 2001 From: Joe Brown Date: Wed, 7 Feb 2024 15:31:00 -0500 Subject: [PATCH 09/17] changing the structure of score_bayesian to be more efficent with storing rmse values. --- R/score_bayesian.R | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/R/score_bayesian.R b/R/score_bayesian.R index 983c0c1..471d2e2 100644 --- a/R/score_bayesian.R +++ b/R/score_bayesian.R @@ -34,7 +34,7 @@ #' score_bayesian(mat, sensitivity = 2) score_bayesian <- function(m, sigma = NULL, sensitivity = NULL) { # initialize vector to store RMSE values from loop - rmse_vector <- numeric() + rmse_vector <- rep(NA_real_, ncol(m)) # Stop execution if number of columns in the matrix is less than 2 # indicates that there is only one model result stored in matrix @@ -59,16 +59,16 @@ score_bayesian <- function(m, sigma = NULL, sensitivity = NULL) { } # vector of RMSE value for each model iteration - rmse_vector[i - 1] <- rmse_vals + rmse_vector[i] <- rmse_vals } # Compute likelihood sensitivity using multiplier provided by the user if (is.null(sensitivity)) { - sensitivity_value <- sd(rmse_vector) # Calculate sensitivity as the standard deviation of the RMSE results + sensitivity_value <- sd(rmse_vector, na.rm = TRUE) # Calculate sensitivity as the standard deviation of the RMSE results } else { if (length(sensitivity) != 1) stop( "Sensitivity must be a single value.") - sensitivity_value <- sensitivity * sd(rmse_vector) + sensitivity_value <- sensitivity * sd(rmse_vector, na.rm = TRUE) } @@ -80,6 +80,7 @@ score_bayesian <- function(m, sigma = NULL, sensitivity = NULL) { # observed data. # Remove first value when calling rmse_vector (first values should be NA because # it represented obs_data) + rmse_vector <- rmse_vector[-1] likelihood <- exp(-0.5 * (rmse_vector / sensitivity_value)^2) # Computing unnormalized posterior scores From 5c057333d87fe8b3e862cf358a6974e5ee268dc6 Mon Sep 17 00:00:00 2001 From: Joe Brown Date: Wed, 7 Feb 2024 15:42:56 -0500 Subject: [PATCH 10/17] Changing default sigma from NULL to sd(observed data) --- R/RMSE_calc.R | 27 +++++++++++++-------------- R/score_bayesian.R | 21 ++++++++++++++------- 2 files changed, 27 insertions(+), 21 deletions(-) diff --git a/R/RMSE_calc.R b/R/RMSE_calc.R index 5ea5987..6d08d41 100644 --- a/R/RMSE_calc.R +++ b/R/RMSE_calc.R @@ -18,26 +18,25 @@ #' #' # constant error #' RMSE_calc(x, y) -RMSE_calc <- function(x, y, sigma = NULL) { +RMSE_calc <- function(x, y, sigma = sd(y)) { # Check if all values are NA if (all(is.na(x)) || all(is.na(y))) { return(NA) } - # set sigma to default if not provided by the user - if( is.null(sigma)) { - # if sigma = NULL compute RMSE with sd(y) - sigma = sd(y) - rmse_vals <- sqrt(mean(((x - y) / sigma)^2)) - } else { - # if user provides sigma values compute RMSE with error. - # sigma should be length = 1 or same length as the obs data. - required_length <- length(y) - if( length(sigma) != 1 && length(sigma) != required_length) - stop( paste("Length of sigma must be a single value or a vector matching the length of ", required_length, ".", sep = "")) + # if user provides sigma values compute RMSE with error. + # sigma should be length = 1 or same length as the obs data. + if (length(sigma) != 1 && length(sigma) != length(y)) + stop( + paste( + "Length of sigma must be a single value or a vector matching the length of observed data = ", + length(y), + ".", + sep = "" + ) + ) - rmse_vals <- sqrt(mean(((x - y) / sigma)^2)) - } + rmse_vals <- sqrt(mean(((x - y) / sigma) ^ 2)) # return a vector of RMSE values return(rmse_vals) diff --git a/R/score_bayesian.R b/R/score_bayesian.R index 471d2e2..bff6156 100644 --- a/R/score_bayesian.R +++ b/R/score_bayesian.R @@ -32,7 +32,9 @@ #' #' # scoring with a decay rate of 2 #' score_bayesian(mat, sensitivity = 2) -score_bayesian <- function(m, sigma = NULL, sensitivity = NULL) { +score_bayesian <- function(m, + sigma = NULL, + sensitivity = NULL) { # initialize vector to store RMSE values from loop rmse_vector <- rep(NA_real_, ncol(m)) @@ -44,7 +46,8 @@ score_bayesian <- function(m, sigma = NULL, sensitivity = NULL) { obs_data <- m[, 1] # error if the observed data has no non-NA values - if (all(is.na(obs_data))) stop("No non-NA values present in observed data.") + if (all(is.na(obs_data))) + stop("No non-NA values present in observed data.") # loop across columns of the matrix. For each column (i) after col 2 for (i in 2:ncol(m)) { @@ -64,16 +67,19 @@ score_bayesian <- function(m, sigma = NULL, sensitivity = NULL) { # Compute likelihood sensitivity using multiplier provided by the user if (is.null(sensitivity)) { - sensitivity_value <- sd(rmse_vector, na.rm = TRUE) # Calculate sensitivity as the standard deviation of the RMSE results + sensitivity_value <- + sd(rmse_vector, na.rm = TRUE) # Calculate sensitivity as the standard deviation of the RMSE results } else { if (length(sensitivity) != 1) - stop( "Sensitivity must be a single value.") + stop("Sensitivity must be a single value.") sensitivity_value <- sensitivity * sd(rmse_vector, na.rm = TRUE) } # Check if sensitivity is negative, if so throw error - if (any(!is.na(sensitivity) & (sensitivity < 0))) stop("Sensitivity cannot be negative.") + if (any(!is.na(sensitivity) & + (sensitivity < 0))) + stop("Sensitivity cannot be negative.") # Compute likelihood using normal distribution likelihood function. # This is the probability of observing the modeled data given the @@ -81,13 +87,14 @@ score_bayesian <- function(m, sigma = NULL, sensitivity = NULL) { # Remove first value when calling rmse_vector (first values should be NA because # it represented obs_data) rmse_vector <- rmse_vector[-1] - likelihood <- exp(-0.5 * (rmse_vector / sensitivity_value)^2) + likelihood <- exp(-0.5 * (rmse_vector / sensitivity_value) ^ 2) # Computing unnormalized posterior scores # Currently only computing posterior scores using uniform prior. # uniform prior is calculated as 1/length(likelihood) which is # the same as 1 / # of runs. - posterior <- likelihood * (1 / length(likelihood)) ### DONT THINK THIS IS A NECESSARY STEP MAY JUST WANT TO RETURN "LIKELIHOOD" ### + posterior <- + likelihood * (1 / length(likelihood)) ### DONT THINK THIS IS A NECESSARY STEP MAY JUST WANT TO RETURN "LIKELIHOOD" ### # Create data frame of results - get run_numbers from the list where RMSE values # are computed (names of the split_list components) From 55169290c4f5c59d9b0a73a996ec956cdc0566b9 Mon Sep 17 00:00:00 2001 From: Joe Brown Date: Fri, 9 Feb 2024 15:52:06 -0500 Subject: [PATCH 11/17] updating score_bayesian to return likelihood instead of "unnormalized posterior" posterior probabilities is what prob_cal returns-- so I don't want to return that here -- I want to return likelihoods, which are normalized weights. I think but I will check this again. --- R/score_bayesian.R | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/R/score_bayesian.R b/R/score_bayesian.R index bff6156..64bbfa9 100644 --- a/R/score_bayesian.R +++ b/R/score_bayesian.R @@ -49,6 +49,11 @@ score_bayesian <- function(m, if (all(is.na(obs_data))) stop("No non-NA values present in observed data.") + # If sigma is not provided, use sd(y) as default + if (is.null(sigma)) { + sigma <- sd(obs_data) + } + # loop across columns of the matrix. For each column (i) after col 2 for (i in 2:ncol(m)) { # indicate modeled data are in subsequent columns @@ -93,10 +98,10 @@ score_bayesian <- function(m, # Currently only computing posterior scores using uniform prior. # uniform prior is calculated as 1/length(likelihood) which is # the same as 1 / # of runs. - posterior <- - likelihood * (1 / length(likelihood)) ### DONT THINK THIS IS A NECESSARY STEP MAY JUST WANT TO RETURN "LIKELIHOOD" ### + # posterior <- + # likelihood * (1 / length(likelihood)) ### DONT THINK THIS IS A NECESSARY STEP MAY JUST WANT TO RETURN "LIKELIHOOD" ### # Create data frame of results - get run_numbers from the list where RMSE values # are computed (names of the split_list components) - return(posterior) + return(likelihood) } From 08e7386dfc63c009fcf9f8067c1505e69a93a648 Mon Sep 17 00:00:00 2001 From: Joe Brown Date: Mon, 12 Feb 2024 17:05:21 -0500 Subject: [PATCH 12/17] fixing comment --- R/score_bayesian.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/score_bayesian.R b/R/score_bayesian.R index 64bbfa9..2cd040b 100644 --- a/R/score_bayesian.R +++ b/R/score_bayesian.R @@ -98,7 +98,7 @@ score_bayesian <- function(m, # Currently only computing posterior scores using uniform prior. # uniform prior is calculated as 1/length(likelihood) which is # the same as 1 / # of runs. - # posterior <- + # posterior <- # likelihood * (1 / length(likelihood)) ### DONT THINK THIS IS A NECESSARY STEP MAY JUST WANT TO RETURN "LIKELIHOOD" ### # Create data frame of results - get run_numbers from the list where RMSE values From eb11ceb95a4a15a8c082f8d6c43f96a08f9cf704 Mon Sep 17 00:00:00 2001 From: Joe Brown Date: Mon, 12 Feb 2024 17:12:31 -0500 Subject: [PATCH 13/17] Editing test code for RMSE_calc --- tests/testthat/test-RMSE_calc.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-RMSE_calc.R b/tests/testthat/test-RMSE_calc.R index 28ab593..33a3b89 100644 --- a/tests/testthat/test-RMSE_calc.R +++ b/tests/testthat/test-RMSE_calc.R @@ -7,7 +7,7 @@ test_that("NAs passed to function are treated correctly", { # test RMSE functionality test_that("RMSE is computed accurately", { - expect_equal(RMSE_calc(1, 5), sqrt(mean(1 - 5)^2)) + expect_equal(RMSE_calc(1, 5), sqrt((mean(1-5)/sd(5)^2))) }) # test return is vector From 87e4fad00e41fe8db444bcc0cffaac7b57bb5c26 Mon Sep 17 00:00:00 2001 From: Joe Brown Date: Mon, 12 Feb 2024 17:55:51 -0500 Subject: [PATCH 14/17] Adding additional tests for RMSE_calc --- tests/testthat/test-RMSE_calc.R | 10 ++++++++++ tests/testthat/test-score_bayesian.R | 10 +++++++--- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/tests/testthat/test-RMSE_calc.R b/tests/testthat/test-RMSE_calc.R index 33a3b89..2f3ea0a 100644 --- a/tests/testthat/test-RMSE_calc.R +++ b/tests/testthat/test-RMSE_calc.R @@ -15,3 +15,13 @@ test_that("RMSE is computed accurately", { test_that("RMSE return is a vector", { expect_vector(RMSE_calc(x = 1:3, y = 3:5)) }) + +# tests for sigma +test_that("sigma is a single value", { + expect_equal(RMSE_calc(1, 5, sigma = 1), sqrt(((mean(1-5)/1)^2))) +}) + +test_that("error if sigma is a vector of a different length than y", { + expect_error(RMSE_calc(1, 5, sigma = c(1, 2, 3)), + regexp = "Length of sigma must be a single value or a vector matching the length of observed data = 1") +}) diff --git a/tests/testthat/test-score_bayesian.R b/tests/testthat/test-score_bayesian.R index cd46df0..cd78898 100644 --- a/tests/testthat/test-score_bayesian.R +++ b/tests/testthat/test-score_bayesian.R @@ -18,10 +18,14 @@ test_that("function stops and produces error messages", { regexp = "No non-NA values present in observed data" ) - # warning when user supplies negative sigma values - expect_warning(score_bayesian(m, -1), - regexp = "sigma value cannot be negative." + # error when user supplies negative sensitivity value + expect_error(score_bayesian(m, sensitivity = -1), + regexp = "Sensitivity cannot be negative." ) + + # error when user supplies more than one sensitivity value + expect_error(score_bayesian(m, sensitivity = c(1, 2)), + regexp = "Sensitivity must be a single value.") }) # Testing output accuracy From f523ce1228cc4e5983b24e6b97ece29a038e0951 Mon Sep 17 00:00:00 2001 From: Joe Brown Date: Mon, 12 Feb 2024 20:55:46 -0500 Subject: [PATCH 15/17] increasing number of results needed to perform score_bayesian Need to make this an issue. The default for sensitivity is sd(rmse_vector) which does not work for column < 2 vectors of results. --- R/score_bayesian.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/score_bayesian.R b/R/score_bayesian.R index 2cd040b..164f348 100644 --- a/R/score_bayesian.R +++ b/R/score_bayesian.R @@ -40,7 +40,7 @@ score_bayesian <- function(m, # Stop execution if number of columns in the matrix is less than 2 # indicates that there is only one model result stored in matrix - stopifnot("More than 2 columns must be included in input matrix" = ncol(m) > 2) + stopifnot("More than 2 columns must be included in input matrix" = ncol(m) > 3) # indicate that observed data are in the first column of the matrix obs_data <- m[, 1] @@ -54,7 +54,7 @@ score_bayesian <- function(m, sigma <- sd(obs_data) } - # loop across columns of the matrix. For each column (i) after col 2 + # loop across columns of the matrix. For each column (i) starting with col 2 for (i in 2:ncol(m)) { # indicate modeled data are in subsequent columns model_data <- m[, i] From 6787eea212857ec37270cc5dbf6130a916aaa10a Mon Sep 17 00:00:00 2001 From: Joe Brown Date: Mon, 12 Feb 2024 23:02:38 -0500 Subject: [PATCH 16/17] editing tests for score_bayesian Need to revisit because testing that the score_bayesian output is equal to the likelihood function is a little complicated. --- R/score_bayesian.R | 19 ++++++++------ tests/testthat/test-score_bayesian.R | 38 +++++++++++----------------- 2 files changed, 26 insertions(+), 31 deletions(-) diff --git a/R/score_bayesian.R b/R/score_bayesian.R index 164f348..9695d27 100644 --- a/R/score_bayesian.R +++ b/R/score_bayesian.R @@ -28,7 +28,7 @@ #' #' @examples #' # creating sample matrix -#' mat <- matrix(data = 1:15, nrow = 5, ncol = 3) +#' mat <- matrix(data = 1:20, nrow = 5, ncol = 4) #' #' # scoring with a decay rate of 2 #' score_bayesian(mat, sensitivity = 2) @@ -40,7 +40,7 @@ score_bayesian <- function(m, # Stop execution if number of columns in the matrix is less than 2 # indicates that there is only one model result stored in matrix - stopifnot("More than 2 columns must be included in input matrix" = ncol(m) > 3) + stopifnot("More than 3 columns must be included in input matrix" = ncol(m) > 3) # indicate that observed data are in the first column of the matrix obs_data <- m[, 1] @@ -70,6 +70,11 @@ score_bayesian <- function(m, rmse_vector[i] <- rmse_vals } + # Check if sensitivity is negative, if so throw error + if (any(!is.na(sensitivity) & + (sensitivity < 0))) + stop("Sensitivity cannot be negative.") + # Compute likelihood sensitivity using multiplier provided by the user if (is.null(sensitivity)) { sensitivity_value <- @@ -79,12 +84,10 @@ score_bayesian <- function(m, stop("Sensitivity must be a single value.") sensitivity_value <- sensitivity * sd(rmse_vector, na.rm = TRUE) } - - - # Check if sensitivity is negative, if so throw error - if (any(!is.na(sensitivity) & - (sensitivity < 0))) - stop("Sensitivity cannot be negative.") + if (!is.na(sensitivity_value) && sensitivity_value == 0) { + likelihood <- rep(1, length(rmse_vector[-1])) + return(likelihood) + } # Compute likelihood using normal distribution likelihood function. # This is the probability of observing the modeled data given the diff --git a/tests/testthat/test-score_bayesian.R b/tests/testthat/test-score_bayesian.R index cd78898..6cb9e7a 100644 --- a/tests/testthat/test-score_bayesian.R +++ b/tests/testthat/test-score_bayesian.R @@ -4,11 +4,11 @@ m <- matrix(c(rep(1, 6), rep(2, 3), rep(3, 3)), nrow = 3, ncol = 4) # Testing error/warning cases test_that("function stops and produces error messages", { - # error when matrix has less than two columns + # error when matrix has less than three columns # m with only 2 cols - m_2row <- matrix(data = c(1:2), nrow = 2, ncol = 2) - expect_error(score_bayesian(m_2row), - regexp = "More than 2 columns must be included in input matrix" + m_2col <- matrix(data = c(1:6), nrow = 3, ncol = 2) + expect_error(score_bayesian(m_2col), + regexp = "More than 3 columns must be included in input matrix" ) # error when entire m is NA @@ -32,24 +32,16 @@ test_that("function stops and produces error messages", { test_that("scores assessed correctly", { # If entire column of modeled data is NA, set RMSE value to NA - m_NA <- matrix(data = c(rep(1, 3), rep(1, 3), rep(NA, 3)), nrow = 3, ncol = 3) - expect_equal(score_bayesian(m_NA, 2), c(0.5, NA)) + m_NA <- matrix(data = c(rep(1, 3), rep(1, 3), rep(2, 3), rep(NA, 3)), nrow = 3, ncol = 4) + test_result_1 <- score_bayesian(m_NA, sigma = 1) + expect_equal(test_result_1 [3], as.numeric(NA)) # scores equal when calculated RMSE = 0 - m2 <- matrix(data = c(1, 1, 1), nrow = 1, ncol = 3) - expect_equal(score_bayesian(m2, 2), c(0.5, 0.5)) - - # when sigma = 0 all scores are identical - m3 <- matrix(data = c(1:3), nrow = 1, ncol = 3) - result <- score_bayesian(m3, 0) - expect_identical(result, c(rep(result[1], length(result)))) - - # expected output of function matches gaussian likelihood - m4 <- matrix(data = c(1, 2, 2), nrow = 1, ncol = 3) - sigma4 <- 2 - expected_likelihood <- exp(-0.5 * (m4[1]^2) / sigma4^2) / m4[1, -1] - expect_equal( - score_bayesian(m4, sigma4), - expected_likelihood - ) -}) + m2 <- matrix(data = c(rep(1,3), rep(1, 3), rep(1, 3), rep(1, 3)), nrow = 3, ncol = 4) + expect_equal(score_bayesian(m2, sigma = 1), c(1, 1, 1)) + + # when sensitivity = 0 all scores are identical + m3 <- matrix(data = c(1:4), nrow = 1, ncol = 4) + expect_equal(score_bayesian(m3, sigma = 1, sensitivity = 0), c(1, 1, 1)) + + }) From 4238827a7485ddbd3e736b3bb67f2422d6f135e2 Mon Sep 17 00:00:00 2001 From: Joe Brown Date: Mon, 12 Feb 2024 23:32:50 -0500 Subject: [PATCH 17/17] editing documentation before pull reequest --- R/RMSE_calc.R | 5 +++-- R/score_bayesian.R | 16 +++++++++++----- man/RMSE_calc.Rd | 8 +++++++- man/score_bayesian.Rd | 23 +++++++++++++++-------- 4 files changed, 36 insertions(+), 16 deletions(-) diff --git a/R/RMSE_calc.R b/R/RMSE_calc.R index 6d08d41..45eaee5 100644 --- a/R/RMSE_calc.R +++ b/R/RMSE_calc.R @@ -8,7 +8,9 @@ #' @param sigma A single value or vector of error terms the same length as y. #' A single value will apply a constant error term. User can provide a vector #' of error terms to incorporate time-varying error into the RMSE calculation. -#' +#' `sigma` default assume homoscedasticity of residuals and applies a constant +#' error term equal to the standard deviation of observed data (scoring criterion). + #' @return Returns a vector of RMSE values #' @export #' @@ -16,7 +18,6 @@ #' x <- c(1:5) #' y <- c(5:9) #' -#' # constant error #' RMSE_calc(x, y) RMSE_calc <- function(x, y, sigma = sd(y)) { # Check if all values are NA diff --git a/R/score_bayesian.R b/R/score_bayesian.R index 9695d27..a1daca2 100644 --- a/R/score_bayesian.R +++ b/R/score_bayesian.R @@ -10,11 +10,17 @@ #' @param m A Matrix of values. The first column of the matrix should be #' a vector of observed data for a give variable. Subsequent vectors should be #' representative of modeled values for a given variable. -#' @param sigma Numeric value (optional). The standard deviation parameter for -#' the normal distribution used in the Bayesian analysis. If not provided, the -#' function will automatically compute it as the standard deviation of the -#' Root Mean Square Error (RMSE). A smaller value of `sigma` will make the -#' Bayesian analysis give more weight to models with lower RMSE values. +#' @param sigma Numeric value (optional). A single value or vector of error terms +#' the same length as y. A single value will apply a constant error term. User +#' can provide a vector of error terms to incorporate time-varying error +#' to the RMSE calculation. `sigma` default assume homoscedasticity of residuals +#' and applies a constant error term equal to the standard deviation of observed +#' data (scoring criterion). +#' @param sensitivity A multiplier that adjusts the sensitivity of the likelihood +#' values to increasing RMSE. If not provided, the function will automatically +#' calculate the sensitivity as one unit of standard deviation of the RMSE results. +#' A smaller sensitivity value will make the Bayesian analysis give more weight +#' to models with lower RMSE values. #' #' @note Note: In Bayesian statistics, the choice of `sigma` can significantly #' impact the results and conclusions of the analysis. Users are encouraged to diff --git a/man/RMSE_calc.Rd b/man/RMSE_calc.Rd index 859ef05..52f4d15 100644 --- a/man/RMSE_calc.Rd +++ b/man/RMSE_calc.Rd @@ -4,12 +4,18 @@ \alias{RMSE_calc} \title{Calculating Root Mean Square Error (RMSE)} \usage{ -RMSE_calc(x, y) +RMSE_calc(x, y, sigma = sd(y)) } \arguments{ \item{x}{A vector of modeled data values} \item{y}{A vector of observed data values} + +\item{sigma}{A single value or vector of error terms the same length as y. +A single value will apply a constant error term. User can provide a vector +of error terms to incorporate time-varying error into the RMSE calculation. +`sigma` default assume homoscedasticity of residuals and applies a constant +error term equal to the standard deviation of observed data (scoring criterion).} } \value{ Returns a vector of RMSE values diff --git a/man/score_bayesian.Rd b/man/score_bayesian.Rd index 74adc04..8b3f864 100644 --- a/man/score_bayesian.Rd +++ b/man/score_bayesian.Rd @@ -4,18 +4,25 @@ \alias{score_bayesian} \title{Computing Model Scores as Posterior Probabilities using Bayesian Inference} \usage{ -score_bayesian(m, sigma = NULL) +score_bayesian(m, sigma = NULL, sensitivity = NULL) } \arguments{ \item{m}{A Matrix of values. The first column of the matrix should be a vector of observed data for a give variable. Subsequent vectors should be representative of modeled values for a given variable.} -\item{sigma}{Numeric value (optional). The standard deviation parameter for -the normal distribution used in the Bayesian analysis. If not provided, the -function will automatically compute it as the standard deviation of the -Root Mean Square Error (RMSE). A smaller value of `sigma` will make the -Bayesian analysis give more weight to models with lower RMSE values.} +\item{sigma}{Numeric value (optional). A single value or vector of error terms +the same length as y. A single value will apply a constant error term. User +can provide a vector of error terms to incorporate time-varying error +to the RMSE calculation. `sigma` default assume homoscedasticity of residuals +and applies a constant error term equal to the standard deviation of observed +data (scoring criterion).} + +\item{sensitivity}{A multiplier that adjusts the sensitivity of the likelihood +values to increasing RMSE. If not provided, the function will automatically +calculate the sensitivity as one unit of standard deviation of the RMSE results. +A smaller sensitivity value will make the Bayesian analysis give more weight +to models with lower RMSE values.} } \value{ Returns a vector of scores with a length equal to the number of @@ -38,8 +45,8 @@ specific use case. } \examples{ # creating sample matrix -mat <- matrix(data = 1:15, nrow = 5, ncol = 3) +mat <- matrix(data = 1:20, nrow = 5, ncol = 4) # scoring with a decay rate of 2 -score_bayesian(mat, sigma = 2) +score_bayesian(mat, sensitivity = 2) }