Skip to content

Commit

Permalink
Merge branch 'enhancement/time-varying-error'
Browse files Browse the repository at this point in the history
  • Loading branch information
jk-brown committed Feb 13, 2024
2 parents 048e4bd + 4238827 commit 4766d9e
Show file tree
Hide file tree
Showing 7 changed files with 127 additions and 65 deletions.
24 changes: 20 additions & 4 deletions R/RMSE_calc.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,12 @@
#'
#' @param x A vector of modeled data values
#' @param y A vector of observed data values
#'
#' @param sigma A single value or vector of error terms the same length as y.
#' A single value will apply a constant error term. User can provide a vector
#' of error terms to incorporate time-varying error into the RMSE calculation.
#' `sigma` default assume homoscedasticity of residuals and applies a constant
#' error term equal to the standard deviation of observed data (scoring criterion).

#' @return Returns a vector of RMSE values
#' @export
#'
Expand All @@ -14,14 +19,25 @@
#' y <- c(5:9)
#'
#' RMSE_calc(x, y)
RMSE_calc <- function(x, y) {
RMSE_calc <- function(x, y, sigma = sd(y)) {
# Check if all values are NA
if (all(is.na(x)) || all(is.na(y))) {
return(NA)
}

# compute RMSE
rmse_vals <- sqrt(mean((x - y)^2))
# if user provides sigma values compute RMSE with error.
# sigma should be length = 1 or same length as the obs data.
if (length(sigma) != 1 && length(sigma) != length(y))
stop(
paste(
"Length of sigma must be a single value or a vector matching the length of observed data = ",
length(y),
".",
sep = ""
)
)

rmse_vals <- sqrt(mean(((x - y) / sigma) ^ 2))

# return a vector of RMSE values
return(rmse_vals)
Expand Down
6 changes: 3 additions & 3 deletions R/iterate_model.R
Original file line number Diff line number Diff line change
Expand Up @@ -147,16 +147,16 @@ iterate_model <- function(core, params, save_years = NULL, save_vars = NULL) {
},
# if Hector crashes because of parameter combinations, send error message
error = function(e) {
message("An error occurred")
message("An error occurred", i)
}
)

# Create a placeholder dataframe for the run if there's no data collected
if (length(result_list) < i) {
dat <- data.frame(
scenario = core$name,
scenario = rep(core$name, each = length(save_years)),
year = save_years,
variable = save_vars,
variable = rep(save_vars, each = length(save_years)),
value = NA,
units = NA,
run_number = i
Expand Down
71 changes: 49 additions & 22 deletions R/score_bayesian.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,17 @@
#' @param m A Matrix of values. The first column of the matrix should be
#' a vector of observed data for a give variable. Subsequent vectors should be
#' representative of modeled values for a given variable.
#' @param sigma Numeric value (optional). The standard deviation parameter for
#' the normal distribution used in the Bayesian analysis. If not provided, the
#' function will automatically compute it as the standard deviation of the
#' Root Mean Square Error (RMSE). A smaller value of `sigma` will make the
#' Bayesian analysis give more weight to models with lower RMSE values.
#' @param sigma Numeric value (optional). A single value or vector of error terms
#' the same length as y. A single value will apply a constant error term. User
#' can provide a vector of error terms to incorporate time-varying error
#' to the RMSE calculation. `sigma` default assume homoscedasticity of residuals
#' and applies a constant error term equal to the standard deviation of observed
#' data (scoring criterion).
#' @param sensitivity A multiplier that adjusts the sensitivity of the likelihood
#' values to increasing RMSE. If not provided, the function will automatically
#' calculate the sensitivity as one unit of standard deviation of the RMSE results.
#' A smaller sensitivity value will make the Bayesian analysis give more weight
#' to models with lower RMSE values.
#'
#' @note Note: In Bayesian statistics, the choice of `sigma` can significantly
#' impact the results and conclusions of the analysis. Users are encouraged to
Expand All @@ -28,25 +34,33 @@
#'
#' @examples
#' # creating sample matrix
#' mat <- matrix(data = 1:15, nrow = 5, ncol = 3)
#' mat <- matrix(data = 1:20, nrow = 5, ncol = 4)
#'
#' # scoring with a decay rate of 2
#' score_bayesian(mat, sigma = 2)
score_bayesian <- function(m, sigma = NULL) {
#' score_bayesian(mat, sensitivity = 2)
score_bayesian <- function(m,
sigma = NULL,
sensitivity = NULL) {
# initialize vector to store RMSE values from loop
rmse_vector <- numeric()
rmse_vector <- rep(NA_real_, ncol(m))

# Stop execution if number of columns in the matrix is less than 2
# indicates that there is only one model result stored in matrix
stopifnot("More than 2 columns must be included in input matrix" = ncol(m) > 2)
stopifnot("More than 3 columns must be included in input matrix" = ncol(m) > 3)

# indicate that observed data are in the first column of the matrix
obs_data <- m[, 1]

# error if the observed data has no non-NA values
if (all(is.na(obs_data))) stop("No non-NA values present in observed data.")
if (all(is.na(obs_data)))
stop("No non-NA values present in observed data.")

# loop across columns of the matrix. For each column (i) after col 2
# If sigma is not provided, use sd(y) as default
if (is.null(sigma)) {
sigma <- sd(obs_data)
}

# loop across columns of the matrix. For each column (i) starting with col 2
for (i in 2:ncol(m)) {
# indicate modeled data are in subsequent columns
model_data <- m[, i]
Expand All @@ -55,35 +69,48 @@ score_bayesian <- function(m, sigma = NULL) {
if (all(is.na(model_data))) {
rmse_vals <- NA # Set RMSE to NA for this column
} else {
rmse_vals <- RMSE_calc(obs_data, model_data)
rmse_vals <- RMSE_calc(model_data, obs_data, sigma = sigma)
}

# vector of RMSE value for each model iteration
rmse_vector[i] <- rmse_vals
}

# Compute sigma if not provided by the user
if (is.null(sigma)) {
sigma <- sd(obs_data) # Calculate sigma as the standard deviation of RMSE values
}
# Check if sensitivity is negative, if so throw error
if (any(!is.na(sensitivity) &
(sensitivity < 0)))
stop("Sensitivity cannot be negative.")

# Check if sigma is negative, if so throw error
if (!is.na(sigma) && sigma < 0) warning("sigma value cannot be negative.")
# Compute likelihood sensitivity using multiplier provided by the user
if (is.null(sensitivity)) {
sensitivity_value <-
sd(rmse_vector, na.rm = TRUE) # Calculate sensitivity as the standard deviation of the RMSE results
} else {
if (length(sensitivity) != 1)
stop("Sensitivity must be a single value.")
sensitivity_value <- sensitivity * sd(rmse_vector, na.rm = TRUE)
}
if (!is.na(sensitivity_value) && sensitivity_value == 0) {
likelihood <- rep(1, length(rmse_vector[-1]))
return(likelihood)
}

# Compute likelihood using normal distribution likelihood function.
# This is the probability of observing the modeled data given the
# observed data.
# Remove first value when calling rmse_vector (first values should be NA because
# it represented obs_data)
likelihood <- exp(-0.5 * ((rmse_vector[-1]) / sigma)^2)
rmse_vector <- rmse_vector[-1]
likelihood <- exp(-0.5 * (rmse_vector / sensitivity_value) ^ 2)

# Computing unnormalized posterior scores
# Currently only computing posterior scores using uniform prior.
# uniform prior is calculated as 1/length(likelihood) which is
# the same as 1 / # of runs.
posterior <- likelihood * (1 / length(likelihood))
# posterior <-
# likelihood * (1 / length(likelihood)) ### DONT THINK THIS IS A NECESSARY STEP MAY JUST WANT TO RETURN "LIKELIHOOD" ###

# Create data frame of results - get run_numbers from the list where RMSE values
# are computed (names of the split_list components)
return(posterior)
return(likelihood)
}
8 changes: 7 additions & 1 deletion man/RMSE_calc.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

23 changes: 15 additions & 8 deletions man/score_bayesian.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 11 additions & 1 deletion tests/testthat/test-RMSE_calc.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,21 @@ test_that("NAs passed to function are treated correctly", {
# test RMSE functionality

test_that("RMSE is computed accurately", {
expect_equal(RMSE_calc(1, 5), sqrt(mean(1 - 5)^2))
expect_equal(RMSE_calc(1, 5), sqrt((mean(1-5)/sd(5)^2)))
})

# test return is vector

test_that("RMSE return is a vector", {
expect_vector(RMSE_calc(x = 1:3, y = 3:5))
})

# tests for sigma
test_that("sigma is a single value", {
expect_equal(RMSE_calc(1, 5, sigma = 1), sqrt(((mean(1-5)/1)^2)))
})

test_that("error if sigma is a vector of a different length than y", {
expect_error(RMSE_calc(1, 5, sigma = c(1, 2, 3)),
regexp = "Length of sigma must be a single value or a vector matching the length of observed data = 1")
})
48 changes: 22 additions & 26 deletions tests/testthat/test-score_bayesian.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@ m <- matrix(c(rep(1, 6), rep(2, 3), rep(3, 3)), nrow = 3, ncol = 4)
# Testing error/warning cases

test_that("function stops and produces error messages", {
# error when matrix has less than two columns
# error when matrix has less than three columns
# m with only 2 cols
m_2row <- matrix(data = c(1:2), nrow = 2, ncol = 2)
expect_error(score_bayesian(m_2row),
regexp = "More than 2 columns must be included in input matrix"
m_2col <- matrix(data = c(1:6), nrow = 3, ncol = 2)
expect_error(score_bayesian(m_2col),
regexp = "More than 3 columns must be included in input matrix"
)

# error when entire m is NA
Expand All @@ -18,34 +18,30 @@ test_that("function stops and produces error messages", {
regexp = "No non-NA values present in observed data"
)

# warning when user supplies negative sigma values
expect_warning(score_bayesian(m, -1),
regexp = "sigma value cannot be negative."
# error when user supplies negative sensitivity value
expect_error(score_bayesian(m, sensitivity = -1),
regexp = "Sensitivity cannot be negative."
)

# error when user supplies more than one sensitivity value
expect_error(score_bayesian(m, sensitivity = c(1, 2)),
regexp = "Sensitivity must be a single value.")
})

# Testing output accuracy

test_that("scores assessed correctly", {
# If entire column of modeled data is NA, set RMSE value to NA
m_NA <- matrix(data = c(rep(1, 3), rep(1, 3), rep(NA, 3)), nrow = 3, ncol = 3)
expect_equal(score_bayesian(m_NA, 2), c(0.5, NA))
m_NA <- matrix(data = c(rep(1, 3), rep(1, 3), rep(2, 3), rep(NA, 3)), nrow = 3, ncol = 4)
test_result_1 <- score_bayesian(m_NA, sigma = 1)
expect_equal(test_result_1 [3], as.numeric(NA))

# scores equal when calculated RMSE = 0
m2 <- matrix(data = c(1, 1, 1), nrow = 1, ncol = 3)
expect_equal(score_bayesian(m2, 2), c(0.5, 0.5))

# when sigma = 0 all scores are identical
m3 <- matrix(data = c(1:3), nrow = 1, ncol = 3)
result <- score_bayesian(m3, 0)
expect_identical(result, c(rep(result[1], length(result))))

# expected output of function matches gaussian likelihood
m4 <- matrix(data = c(1, 2, 2), nrow = 1, ncol = 3)
sigma4 <- 2
expected_likelihood <- exp(-0.5 * (m4[1]^2) / sigma4^2) / m4[1, -1]
expect_equal(
score_bayesian(m4, sigma4),
expected_likelihood
)
})
m2 <- matrix(data = c(rep(1,3), rep(1, 3), rep(1, 3), rep(1, 3)), nrow = 3, ncol = 4)
expect_equal(score_bayesian(m2, sigma = 1), c(1, 1, 1))

# when sensitivity = 0 all scores are identical
m3 <- matrix(data = c(1:4), nrow = 1, ncol = 4)
expect_equal(score_bayesian(m3, sigma = 1, sensitivity = 0), c(1, 1, 1))

})

0 comments on commit 4766d9e

Please sign in to comment.