Merge branch 'develop' into Issue_392

rivm-syso · Oct 28, 2024 · 859f571 · 859f571
2 parents 78b0385 + f4918eb
commit 859f571
Show file tree

Hide file tree

Showing 22 changed files with 1,328 additions and 403 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -48,6 +48,7 @@ RUN install2.r --error --skipinstalled --ncpus -1 \
      datawizard \
      shinybusy \
      filelock \
+     colorBlindness \
      && rm -rf /tmp/downloaded_packages
 
 # install some more R pkgs (in a new layer)

diff --git a/funs/data_to_tool_fun.R b/funs/data_to_tool_fun.R
@@ -187,4 +187,31 @@ add_uncertainty_bias_sensor <- function(data_measurements){
 
   return(data_measurements)
 
-  }
+}
+
+#' Filter data on parameter
+#'
+#' The station measure several paramaters. This function filters the
+#' given parameter_input for the air quality stations. And keeps the
+#' KNMI ws and wd observations
+#'
+#' @param data_all df with at least columns (parameter, station)
+#' @param parameter_input string with parameter as in column parameter
+#' @param knmi default T can be add if different approach
+#'
+#' @return df as data_all but without redundant parameters
+#' @export
+#'
+filter_parameter <- function(data_all, parameter_input, knmi = T){
+  data_all <- data_all %>%
+    dplyr::mutate(
+      keep = case_when(parameter == parameter_input & !grepl("^KNMI", station) ~ T,
+                       grepl("^KNMI", station) & parameter %in% c("wd", "ws") ~ T,
+                       T ~ F
+      )
+    ) %>%
+    dplyr::filter(keep) %>%
+    dplyr::select(-c(keep))
+
+  return(data_all)
+}
diff --git a/funs/get_data_caching_funs.R b/funs/get_data_caching_funs.R
@@ -7,6 +7,7 @@
 #'
 #' @param measurements_con tbl, to indicate the database table
 #' @param stations_name character, with the stations names of interest
+#' @param parameter_input chacracter whit whic parameter of interest
 #' @param start_time date, to indicatidate start of the period
 #' @param end_time date, to indicate end of the period
 #'
@@ -15,6 +16,7 @@
 #'
 get_measurements_cleaned <- function(measurements_con,
                                      stations_name,
+                                     parameter_input,
                                      start_time,
                                      end_time){
   # get the measurements from the caching dbs
@@ -40,6 +42,9 @@ get_measurements_cleaned <- function(measurements_con,
   # Add bias to the uncertainty sensors raw data
   data_all <- add_uncertainty_bias_sensor(data_all)
 
+  # Select only the measurements of the given parameter
+  data_all <- filter_parameter(data_all, parameter_input)
+
   return(data_all)
 
 }

diff --git a/funs/queue_fun.R b/funs/queue_fun.R
@@ -23,7 +23,7 @@ create_data_request <- function(kits, time_start, time_end, conn, max_requests =
     # conn: database connection object
     # max_request: max jobs on queue for each request
     #
-    # Return value: a list with data requests
+    # Return value: a list with data requests and last item is the job_id
 
     add_req <- function(x,y) {
         dl_req <- data.frame()
@@ -38,24 +38,27 @@ create_data_request <- function(kits, time_start, time_end, conn, max_requests =
 
 
     job_id <- sprintf("id%010.0f", round(runif(1, 1, 2^32), digits = 0))
-    kits_req <- tibble(station = kits, time_start = time_start, time_end = time_end) %>% 
+    kits_req <- tibble(station = kits, time_start = time_start, time_end = time_end) %>%
         rowid_to_column("id") %>%
-        mutate(set = ceiling(id / max_requests)) %>% 
+        mutate(set = ceiling(id / max_requests)) %>%
         group_by(set)
-           
+
     res <- kits_req  %>% group_map(add_req)
-        
+
     job_id <- sprintf("id%010.0f", round(runif(1, 1, 2^32), digits = 0))
     for(i in 1:length(res)) {
         job_id_seq <- sprintf("%s_%04i", job_id, i)
-           
+
         if(!doc_exists(type = "data_req", ref = job_id_seq, conn = pool)) {
             log_trace("create_data_request: data request {job_id_seq} stored")
             add_doc(type = "data_req", ref = job_id_seq,
                     doc = res[[i]], conn = pool,
                     overwrite = TRUE)
         }
     }
+
+    # Add job_id to return
+    res <- c(res, job_id)
     invisible(res)
 
 }
@@ -172,7 +175,7 @@ task_q <- R6::R6Class(
                                                         private$tasks$worker[[i]]$call(private$tasks$fun[[i]],
                                                                                        private$tasks$args[[i]])
                                                     }
-                                                          
+
                                         })
 
                                          duplicates <- which(private$tasks$state == "duplicate")
@@ -190,7 +193,7 @@ task_q <- R6::R6Class(
                                                      private$tasks$id[i] <- id
                                                      private$tasks$state[i] <- "waiting"
                                                  }
-                                                 
+
                                              }
                                          }
 

diff --git a/funs/select_filter_functions.R b/funs/select_filter_functions.R
@@ -0,0 +1,185 @@
+# Functions for selecting and filtering the data
+
+#' Get selected stations ----
+#'
+#' Get the names of the stations which are selected
+#'
+#' @param data_stations df
+#'
+#' @return string with names of stations
+#' @export
+#'
+get_selected_station <- function(data_stations){
+  # Check if there is data
+  shiny::validate(need(!is.null(data_stations), "No data_stations"))
+
+  # Get the names of selected stations
+  selected_station <- data_stations %>%
+    dplyr::filter(selected == T) %>%
+    dplyr::select(station) %>%
+    pull()
+
+  return(selected_station)
+}
+
+
+#' Filter data measurements
+#'
+#' Filter the data on selected stations, time and parameter
+#'
+#' @param start_time date, start of the selected period
+#' @param end_time date, end of the selected period
+#' @param cut_off numeric, values above this value will not be selected
+#' @param parameter string, with the parameter of interest "pm25_kal"
+#' @param data_stations df, with at least the columns c(date, stations,
+#'  value, group_name, linetype, station_type, col, size, label)
+#' @param data_all df, with at least the columns c(station, parameter,
+#'  value, date, sd)
+#'
+#' @return df with the measurements and info of the stations selected,
+#' columns : c(station, date, parameter, value, sd, label, group_name,
+#' col, size, station_type, linetype)
+#' @export
+filter_data_measurements_fun <- function(start_time,
+                                     end_time,
+                                     cut_off,
+                                     selected_parameter,
+                                     data_stations,
+                                     data_all){
+
+  # Get selected stations
+  selected_stations <- get_selected_station(data_stations)
+
+  # Check if everything is available for the selection
+  shiny::validate(need(!is.null(start_time) &
+                         !is.null(end_time) &
+                         !is.null(selected_parameter) &
+                         !is.null(data_all) &
+                         !purrr::is_empty(selected_stations),
+                       "Not yet data selected" ) )
+
+  # Get the info for each selected station
+  station_info <- data_stations %>%
+    dplyr::filter(selected == T)
+
+  # Filter the measurements
+  measurements_filt_stns <- data_all %>%
+    dplyr::filter(date > start_time & date < end_time &
+                    station %in% selected_stations &
+                    parameter == selected_parameter &
+                    value < cut_off)
+
+  # Combine station_info with the measurements and keep relevant
+  # columns
+  measurements_filt_stns <-
+    dplyr::left_join(measurements_filt_stns,
+                     station_info, by = "station") %>%
+    dplyr::select(station, date, parameter, value, sd, label,
+                  group_name, col, size, station_type, linetype) %>%
+    # Keep for this dataset the label the same as the station. No changes for grouping yet.
+    dplyr::mutate(label = station)
+
+  # log_trace("fun filter measurements:: number of selected stations {length(selected_stations)}")
+  # log_trace("fun filter measurements:: names of selected stations {paste(selected_stations, sep = ' ', collapse = ' ')}")
+  log_trace("fun filter measurements: filtered measurements {nrow(measurements_filt_stns)}")
+  return(measurements_filt_stns)
+}
+
+
+#' Calculate group mean
+#'
+#' Calculate the mean values per time step for each group
+#'  an minimum values is applied
+#'
+#' @param measurements df with the data measurements including  c(station, date, parameter, value, sd, label, group_name,
+#' col, size, station_type, linetype)
+#' @param uc_min_pm10 minimum value for a pm10 measurement
+#' @param uc_min_pm25 minimum value for a pm2.5 measurement
+#'
+#' @return df with for each goup 1 measurement per time step
+#' @export
+calc_group_mean_fun <- function(measurements,
+                            uc_min_pm10,
+                            uc_min_pm25){
+
+  # check if stations are selected
+  shiny::validate(need(!is_empty(measurements) &
+                         !dim(measurements)[1] == 0,
+                       "No data_stations"))
+
+  # Calculate group mean and sd
+  data_mean <- measurements %>%
+    # Set label to groupname
+    dplyr::mutate(label = dplyr::case_when(station_type ==
+                                             "sensor" ~ group_name,
+                                           T ~ station)) %>%
+    # Keep also the parameters for the plotting
+    dplyr::group_by(group_name, date, parameter, label, col,
+                    size, station_type, linetype) %>%
+    dplyr::summarise(value = mean(value, na.rm = T),
+                     number = n(),
+                     sd = mean(sd, na.rm = T)/sqrt(n())) %>%
+    dplyr::ungroup()
+
+  # Set sd of a sensor to a minimal value, different for pm10 and pm25
+  data_mean <- data_mean %>%
+    # Check for minimal sd for sensors
+    dplyr::mutate(
+      sd = dplyr::case_when(station_type == "sensor" &
+                              grepl("pm25", parameter, fixed = T) &
+                              sd < uc_min_pm25 ~ uc_min_pm25,
+                            station_type == "sensor" &
+                              grepl("pm10", parameter, fixed = T) &
+                              sd < uc_min_pm10 ~ uc_min_pm10,
+                            T ~ sd))
+
+  return(data_mean)
+
+}
+
+
+# Get knmi measurements ----
+# the knmi measurements are excluded
+# by the selected parameter in the measurements_filt_stns
+#' Get selected knmi measurements
+#'
+#' Get the data of the selecetd knmi stations
+#'
+#' @param start_time date, start of the period
+#' @param end_time date, end of the period
+#' @param data_all df, with the measurements
+#' @param data_stations df, with station info at least columns c(station, selected)
+#'
+#' @return df, part of data_all with the selected stations
+#' @export
+#'
+#' @examples
+get_knmi_measurements_fun <- function(start_time,
+                                      end_time,
+                                      data_all,
+                                      data_stations
+                                      ){
+
+  # Get selected stations
+  all_selected_stations <- get_selected_station(data_stations)
+  selected_stations <- all_selected_stations[grep("KNMI", all_selected_stations)]
+
+  # Check if everything is available for the selection
+  shiny::validate(need(!is.null(start_time) &
+                         !is.null(end_time) &
+                         !is.null(data_all) &
+                         !purrr::is_empty(selected_stations),
+                       "Not yet data selected" ) )
+
+  # Filter the measurements
+  measurements_filt_knmi <- data_all %>%
+    dplyr::filter(date > start_time & date < end_time &
+                    station %in% selected_stations
+    )
+
+  # log_trace("fun filter knmi measurements: number of selected stations {length(selected_stations)}")
+  # log_trace("fun filter knmi measurements: names of selected stations {paste(selected_stations, sep = ' ', collapse = ' ')}")
+  log_trace("fun filter knmi measurements: filtered measurements KNMI {nrow(measurements_filt_knmi)}")
+
+  return(measurements_filt_knmi)
+}
diff --git a/funs/ui_tab_grouping.R b/funs/ui_tab_grouping.R
@@ -15,6 +15,15 @@ tpGrouping <- function(){
           info_button_output("text_step1" )
         ),
       ),
+      fluidRow(
+        style = "margin-left: 20px;",
+        select_all_button_output("select_all")
+      ),
+      br(),
+      fluidRow(
+        style = "margin-left: 20px;",
+        deselect_all_button_output("deselect_all")
+      ),
       fluidRow(
         style = "margin-left: 20px;",
         p(single_text_output("text_selected_sensors")),

diff --git a/funs/ui_tab_info.R b/funs/ui_tab_info.R
@@ -0,0 +1,57 @@
+## ------------------------
+## Create content information about the tool
+## -----------------------
+
+tpInfo <- function(){
+
+  tabPanel( # tabpanel "INFORMATION" ----
+            value = "Information",
+            title = i18n$t("title_infotool"),
+    h4(i18n$t("word_ATTool")),
+    p(i18n$t("tool_intro_expl")),br(),
+    h4(i18n$t("word_data")),
+    p(i18n$t("tool_intro_data_expl")),br(),
+    h4(i18n$t("word_cal_values")),
+    p(i18n$t("tool_intro_cal_values_expl")),br(),
+    h4(i18n$t("word_confident_interval")),
+    p(i18n$t("tool_confident_interval_1_expl")),
+    p(i18n$t("tool_confident_interval_2_expl")),
+    p(i18n$t("tool_confident_interval_3_expl")),
+    p(i18n$t("tool_confident_interval_4_expl")),br(),
+    h4(i18n$t("word_maximum_value")),
+    p(i18n$t("tool_maximum_value_1_expl")),
+    br(),
+    h4(i18n$t("word_variation_whisker")),
+    p(i18n$t("tool_variation_whisker_1_expl")),
+    p(i18n$t("tool_variation_whisker_2_expl")),
+    p(i18n$t("tool_variation_whisker_3_expl")),br(),
+    h4(i18n$t("word_opensource")),
+    p(i18n$t("tool_intro_opensource_expl")), br(),
+    h4(i18n$t("word_links")),
+    p(i18n$t("expl_link_to_samenmeten"),
+      a("samenmeten.rivm.nl", href ='https://samenmeten.rivm.nl/dataportaal/', target = 'blank'),
+      br(), i18n$t("expl_link_to_samenmeten_info"),
+      a("link", href ='https://samenmeten.nl/dataportaal/samen-analyseren-tool', target = 'blank'),
+      br(), i18n$t("expl_link_github"),
+      a("github", href ='https://github.com/rivm-syso/Analyse-Together', target = 'blank'),
+      br(),i18n$t("expl_link_to_LML"),
+      a("luchtmeetnet.nl", href ='https://www.luchtmeetnet.nl/', target = 'blank'),
+      br(),i18n$t("expl_link_to_KNMI"),
+      a("knmi.nl", href ='https://www.knmi.nl/', target = 'blank'),
+      br(),i18n$t("expl_link_to_openair"),
+      a("openair", href ='https://davidcarslaw.github.io/openair/', target = 'blank'),
+      br(),
+      i18n$t("expl_link_to_projecten"),
+      a("samenmeten.nl/initiatieven", href ='https://www.samenmeten.nl/initiatieven', target = 'blank'),
+      br(),
+      i18n$t("expl_link_to_benb_artikel"),
+      a("link", href ='https://www.mdpi.com/1424-8220/22/20/8053', target = 'blank'),
+      br(),
+      i18n$t("expl_link_to_kalibration"),
+      a("link", href ='https://samenmeten.nl/dataportaal/kalibratie-van-fijnstofsensoren', target = 'blank'),
+      br(),
+      "Contact: ",
+      a("link", href ='https://samenmeten.nl/contact', target = 'blank'))
+
+  ) # end of tabpanel "INFORMATION"
+}