Skip to content

Commit

Permalink
Merge branch 'develop' into Issue_392
Browse files Browse the repository at this point in the history
  • Loading branch information
jspijker authored Oct 28, 2024
2 parents 78b0385 + f4918eb commit 859f571
Show file tree
Hide file tree
Showing 22 changed files with 1,328 additions and 403 deletions.
1 change: 1 addition & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ RUN install2.r --error --skipinstalled --ncpus -1 \
datawizard \
shinybusy \
filelock \
colorBlindness \
&& rm -rf /tmp/downloaded_packages

# install some more R pkgs (in a new layer)
Expand Down
29 changes: 28 additions & 1 deletion funs/data_to_tool_fun.R
Original file line number Diff line number Diff line change
Expand Up @@ -187,4 +187,31 @@ add_uncertainty_bias_sensor <- function(data_measurements){

return(data_measurements)

}
}

#' Filter data on parameter
#'
#' The station measure several paramaters. This function filters the
#' given parameter_input for the air quality stations. And keeps the
#' KNMI ws and wd observations
#'
#' @param data_all df with at least columns (parameter, station)
#' @param parameter_input string with parameter as in column parameter
#' @param knmi default T can be add if different approach
#'
#' @return df as data_all but without redundant parameters
#' @export
#'
filter_parameter <- function(data_all, parameter_input, knmi = T){
data_all <- data_all %>%
dplyr::mutate(
keep = case_when(parameter == parameter_input & !grepl("^KNMI", station) ~ T,
grepl("^KNMI", station) & parameter %in% c("wd", "ws") ~ T,
T ~ F
)
) %>%
dplyr::filter(keep) %>%
dplyr::select(-c(keep))

return(data_all)
}
5 changes: 5 additions & 0 deletions funs/get_data_caching_funs.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#'
#' @param measurements_con tbl, to indicate the database table
#' @param stations_name character, with the stations names of interest
#' @param parameter_input chacracter whit whic parameter of interest
#' @param start_time date, to indicatidate start of the period
#' @param end_time date, to indicate end of the period
#'
Expand All @@ -15,6 +16,7 @@
#'
get_measurements_cleaned <- function(measurements_con,
stations_name,
parameter_input,
start_time,
end_time){
# get the measurements from the caching dbs
Expand All @@ -40,6 +42,9 @@ get_measurements_cleaned <- function(measurements_con,
# Add bias to the uncertainty sensors raw data
data_all <- add_uncertainty_bias_sensor(data_all)

# Select only the measurements of the given parameter
data_all <- filter_parameter(data_all, parameter_input)

return(data_all)

}
Expand Down
19 changes: 11 additions & 8 deletions funs/queue_fun.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ create_data_request <- function(kits, time_start, time_end, conn, max_requests =
# conn: database connection object
# max_request: max jobs on queue for each request
#
# Return value: a list with data requests
# Return value: a list with data requests and last item is the job_id

add_req <- function(x,y) {
dl_req <- data.frame()
Expand All @@ -38,24 +38,27 @@ create_data_request <- function(kits, time_start, time_end, conn, max_requests =


job_id <- sprintf("id%010.0f", round(runif(1, 1, 2^32), digits = 0))
kits_req <- tibble(station = kits, time_start = time_start, time_end = time_end) %>%
kits_req <- tibble(station = kits, time_start = time_start, time_end = time_end) %>%
rowid_to_column("id") %>%
mutate(set = ceiling(id / max_requests)) %>%
mutate(set = ceiling(id / max_requests)) %>%
group_by(set)

res <- kits_req %>% group_map(add_req)

job_id <- sprintf("id%010.0f", round(runif(1, 1, 2^32), digits = 0))
for(i in 1:length(res)) {
job_id_seq <- sprintf("%s_%04i", job_id, i)

if(!doc_exists(type = "data_req", ref = job_id_seq, conn = pool)) {
log_trace("create_data_request: data request {job_id_seq} stored")
add_doc(type = "data_req", ref = job_id_seq,
doc = res[[i]], conn = pool,
overwrite = TRUE)
}
}

# Add job_id to return
res <- c(res, job_id)
invisible(res)

}
Expand Down Expand Up @@ -172,7 +175,7 @@ task_q <- R6::R6Class(
private$tasks$worker[[i]]$call(private$tasks$fun[[i]],
private$tasks$args[[i]])
}

})

duplicates <- which(private$tasks$state == "duplicate")
Expand All @@ -190,7 +193,7 @@ task_q <- R6::R6Class(
private$tasks$id[i] <- id
private$tasks$state[i] <- "waiting"
}

}
}

Expand Down
185 changes: 185 additions & 0 deletions funs/select_filter_functions.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
# Functions for selecting and filtering the data

#' Get selected stations ----
#'
#' Get the names of the stations which are selected
#'
#' @param data_stations df
#'
#' @return string with names of stations
#' @export
#'
get_selected_station <- function(data_stations){
# Check if there is data
shiny::validate(need(!is.null(data_stations), "No data_stations"))

# Get the names of selected stations
selected_station <- data_stations %>%
dplyr::filter(selected == T) %>%
dplyr::select(station) %>%
pull()

return(selected_station)
}


#' Filter data measurements
#'
#' Filter the data on selected stations, time and parameter
#'
#' @param start_time date, start of the selected period
#' @param end_time date, end of the selected period
#' @param cut_off numeric, values above this value will not be selected
#' @param parameter string, with the parameter of interest "pm25_kal"
#' @param data_stations df, with at least the columns c(date, stations,
#' value, group_name, linetype, station_type, col, size, label)
#' @param data_all df, with at least the columns c(station, parameter,
#' value, date, sd)
#'
#' @return df with the measurements and info of the stations selected,
#' columns : c(station, date, parameter, value, sd, label, group_name,
#' col, size, station_type, linetype)
#' @export
filter_data_measurements_fun <- function(start_time,
end_time,
cut_off,
selected_parameter,
data_stations,
data_all){

# Get selected stations
selected_stations <- get_selected_station(data_stations)

# Check if everything is available for the selection
shiny::validate(need(!is.null(start_time) &
!is.null(end_time) &
!is.null(selected_parameter) &
!is.null(data_all) &
!purrr::is_empty(selected_stations),
"Not yet data selected" ) )

# Get the info for each selected station
station_info <- data_stations %>%
dplyr::filter(selected == T)

# Filter the measurements
measurements_filt_stns <- data_all %>%
dplyr::filter(date > start_time & date < end_time &
station %in% selected_stations &
parameter == selected_parameter &
value < cut_off)

# Combine station_info with the measurements and keep relevant
# columns
measurements_filt_stns <-
dplyr::left_join(measurements_filt_stns,
station_info, by = "station") %>%
dplyr::select(station, date, parameter, value, sd, label,
group_name, col, size, station_type, linetype) %>%
# Keep for this dataset the label the same as the station. No changes for grouping yet.
dplyr::mutate(label = station)

# log_trace("fun filter measurements:: number of selected stations {length(selected_stations)}")
# log_trace("fun filter measurements:: names of selected stations {paste(selected_stations, sep = ' ', collapse = ' ')}")
log_trace("fun filter measurements: filtered measurements {nrow(measurements_filt_stns)}")
return(measurements_filt_stns)
}


#' Calculate group mean
#'
#' Calculate the mean values per time step for each group
#' an minimum values is applied
#'
#' @param measurements df with the data measurements including c(station, date, parameter, value, sd, label, group_name,
#' col, size, station_type, linetype)
#' @param uc_min_pm10 minimum value for a pm10 measurement
#' @param uc_min_pm25 minimum value for a pm2.5 measurement
#'
#' @return df with for each goup 1 measurement per time step
#' @export
calc_group_mean_fun <- function(measurements,
uc_min_pm10,
uc_min_pm25){

# check if stations are selected
shiny::validate(need(!is_empty(measurements) &
!dim(measurements)[1] == 0,
"No data_stations"))

# Calculate group mean and sd
data_mean <- measurements %>%
# Set label to groupname
dplyr::mutate(label = dplyr::case_when(station_type ==
"sensor" ~ group_name,
T ~ station)) %>%
# Keep also the parameters for the plotting
dplyr::group_by(group_name, date, parameter, label, col,
size, station_type, linetype) %>%
dplyr::summarise(value = mean(value, na.rm = T),
number = n(),
sd = mean(sd, na.rm = T)/sqrt(n())) %>%
dplyr::ungroup()

# Set sd of a sensor to a minimal value, different for pm10 and pm25
data_mean <- data_mean %>%
# Check for minimal sd for sensors
dplyr::mutate(
sd = dplyr::case_when(station_type == "sensor" &
grepl("pm25", parameter, fixed = T) &
sd < uc_min_pm25 ~ uc_min_pm25,
station_type == "sensor" &
grepl("pm10", parameter, fixed = T) &
sd < uc_min_pm10 ~ uc_min_pm10,
T ~ sd))

return(data_mean)

}


# Get knmi measurements ----
# the knmi measurements are excluded
# by the selected parameter in the measurements_filt_stns
#' Get selected knmi measurements
#'
#' Get the data of the selecetd knmi stations
#'
#' @param start_time date, start of the period
#' @param end_time date, end of the period
#' @param data_all df, with the measurements
#' @param data_stations df, with station info at least columns c(station, selected)
#'
#' @return df, part of data_all with the selected stations
#' @export
#'
#' @examples
get_knmi_measurements_fun <- function(start_time,
end_time,
data_all,
data_stations
){

# Get selected stations
all_selected_stations <- get_selected_station(data_stations)
selected_stations <- all_selected_stations[grep("KNMI", all_selected_stations)]

# Check if everything is available for the selection
shiny::validate(need(!is.null(start_time) &
!is.null(end_time) &
!is.null(data_all) &
!purrr::is_empty(selected_stations),
"Not yet data selected" ) )

# Filter the measurements
measurements_filt_knmi <- data_all %>%
dplyr::filter(date > start_time & date < end_time &
station %in% selected_stations
)

# log_trace("fun filter knmi measurements: number of selected stations {length(selected_stations)}")
# log_trace("fun filter knmi measurements: names of selected stations {paste(selected_stations, sep = ' ', collapse = ' ')}")
log_trace("fun filter knmi measurements: filtered measurements KNMI {nrow(measurements_filt_knmi)}")

return(measurements_filt_knmi)
}
9 changes: 9 additions & 0 deletions funs/ui_tab_grouping.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,15 @@ tpGrouping <- function(){
info_button_output("text_step1" )
),
),
fluidRow(
style = "margin-left: 20px;",
select_all_button_output("select_all")
),
br(),
fluidRow(
style = "margin-left: 20px;",
deselect_all_button_output("deselect_all")
),
fluidRow(
style = "margin-left: 20px;",
p(single_text_output("text_selected_sensors")),
Expand Down
57 changes: 57 additions & 0 deletions funs/ui_tab_info.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
## ------------------------
## Create content information about the tool
## -----------------------

tpInfo <- function(){

tabPanel( # tabpanel "INFORMATION" ----
value = "Information",
title = i18n$t("title_infotool"),
h4(i18n$t("word_ATTool")),
p(i18n$t("tool_intro_expl")),br(),
h4(i18n$t("word_data")),
p(i18n$t("tool_intro_data_expl")),br(),
h4(i18n$t("word_cal_values")),
p(i18n$t("tool_intro_cal_values_expl")),br(),
h4(i18n$t("word_confident_interval")),
p(i18n$t("tool_confident_interval_1_expl")),
p(i18n$t("tool_confident_interval_2_expl")),
p(i18n$t("tool_confident_interval_3_expl")),
p(i18n$t("tool_confident_interval_4_expl")),br(),
h4(i18n$t("word_maximum_value")),
p(i18n$t("tool_maximum_value_1_expl")),
br(),
h4(i18n$t("word_variation_whisker")),
p(i18n$t("tool_variation_whisker_1_expl")),
p(i18n$t("tool_variation_whisker_2_expl")),
p(i18n$t("tool_variation_whisker_3_expl")),br(),
h4(i18n$t("word_opensource")),
p(i18n$t("tool_intro_opensource_expl")), br(),
h4(i18n$t("word_links")),
p(i18n$t("expl_link_to_samenmeten"),
a("samenmeten.rivm.nl", href ='https://samenmeten.rivm.nl/dataportaal/', target = 'blank'),
br(), i18n$t("expl_link_to_samenmeten_info"),
a("link", href ='https://samenmeten.nl/dataportaal/samen-analyseren-tool', target = 'blank'),
br(), i18n$t("expl_link_github"),
a("github", href ='https://github.com/rivm-syso/Analyse-Together', target = 'blank'),
br(),i18n$t("expl_link_to_LML"),
a("luchtmeetnet.nl", href ='https://www.luchtmeetnet.nl/', target = 'blank'),
br(),i18n$t("expl_link_to_KNMI"),
a("knmi.nl", href ='https://www.knmi.nl/', target = 'blank'),
br(),i18n$t("expl_link_to_openair"),
a("openair", href ='https://davidcarslaw.github.io/openair/', target = 'blank'),
br(),
i18n$t("expl_link_to_projecten"),
a("samenmeten.nl/initiatieven", href ='https://www.samenmeten.nl/initiatieven', target = 'blank'),
br(),
i18n$t("expl_link_to_benb_artikel"),
a("link", href ='https://www.mdpi.com/1424-8220/22/20/8053', target = 'blank'),
br(),
i18n$t("expl_link_to_kalibration"),
a("link", href ='https://samenmeten.nl/dataportaal/kalibratie-van-fijnstofsensoren', target = 'blank'),
br(),
"Contact: ",
a("link", href ='https://samenmeten.nl/contact', target = 'blank'))

) # end of tabpanel "INFORMATION"
}
Loading

0 comments on commit 859f571

Please sign in to comment.