CRAN changes

NeotomaDB · May 24, 2024 · e9255dc · e9255dc
1 parent 5187607
commit e9255dc
Show file tree

Hide file tree

Showing 32 changed files with 309 additions and 244 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -20,7 +20,7 @@ Description: Access and manipulation of data using the Neotoma Paleoecology Data
 License: MIT + file LICENSE
 Encoding: UTF-8
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.1
 Suggests:
     testthat,
     knitr,

diff --git a/NAMESPACE b/NAMESPACE
@@ -6,6 +6,7 @@ S3method(get_contacts,default)
 S3method(get_contacts,numeric)
 S3method(get_datasets,default)
 S3method(get_datasets,numeric)
+S3method(get_datasets,site)
 S3method(get_datasets,sites)
 S3method(get_downloads,character)
 S3method(get_downloads,numeric)
@@ -145,12 +146,10 @@ importFrom(httr,stop_for_status)
 importFrom(jsonlite,fromJSON)
 importFrom(jsonlite,toJSON)
 importFrom(leaflet,addCircleMarkers)
-importFrom(leaflet,addProviderTiles)
 importFrom(leaflet,addTiles)
 importFrom(leaflet,leaflet)
 importFrom(lubridate,as_date)
 importFrom(magrittr,"%>%")
-importFrom(mapview,mapshot)
 importFrom(methods,"slot<-")
 importFrom(methods,is)
 importFrom(methods,new)

diff --git a/R/02_genericDefinitions.R b/R/02_genericDefinitions.R
@@ -22,7 +22,7 @@ setGeneric("coordinates", function(obj, ...) {
 })
 
 #' @title plotLeaflet
-#' @author Socorro Dominguez \email{sedv8808@@gmail.com}
+#' @author Socorro Dominguez \email{s.dominguez@ht-data.com}
 #' @description Plot sites on a leaflet map
 #' @param object Sites object to plot
 #' @returns leaflet map with site markers

diff --git a/R/build_sample.R b/R/build_sample.R
@@ -1,5 +1,5 @@
 #' @title Build a samples `data.frame` from Neotoma API JSON
-#' @author Socorro Dominguez \email{sedv8808@@gmail.com}
+#' @author Socorro Dominguez \email{s.dominguez@ht-data.com}
 #' @import gtools
 #' @import lubridate
 #' @import dplyr

diff --git a/R/collunits-methods.R b/R/collunits-methods.R
@@ -128,6 +128,7 @@ setMethod(f = "show",
 #' @title Get or remove sites by numeric index
 #' @param x The collunits object
 #' @param i The numeric index
+#' @description Retrieve sites by numeric index
 #' @return null used for side effects. Printing a data.frame
 setMethod(f = "[",
           signature = signature(x = "collunits", i = "numeric"),

diff --git a/R/filter.R b/R/filter.R
@@ -5,43 +5,40 @@ utils::globalVariables(c("elev", "notes"))
 #' is a nested object (it contains collection units, datasets, samples, etc.)
 #' the degree to which filtering occurs depends on the amount of data contained
 #' within the sites object. Filtering parameters include:
-#' \itemize{
-#'  \item{"siteid"}{A numeric site identifier from the Neotoma Database}
-#'  \item{"sitename"}{The character string sitename.}
-#'  \item{"lat"}{A numeric latitude value.}
-#'  \item{"long"}{A numeric longitude value.}
-#'  \item{"altitude"}{The elevation of the site. Note that some sites do not
+#'  * `siteid` A numeric site identifier from the Neotoma Database.
+#'  * `sitename` The character string sitename.
+#'  * `lat` A numeric latitude value.
+#'  * `long` A numeric longitude value.
+#'  * `altitude` The elevation of the site. Note that some sites do not
 #'   include elevation information. For these an NA value appears, and they
-#'   would be removed when using an elevation filter.}
-#'  \item{"datasetid"}{A numeric datasetid from Neotoma.}
-#'  \item{"database"}{A character string naming the constituent database
-#'   from which the dataset is drawn.}
-#'  \item{"datasettype"}{A character string representing one of the many
-#'   dataset types within Neotoma.}
-#'  \item{"age_range_old"}{A dataset-level parameter indicating the oldest
-#'   date covered by the dataset chronology.}
-#'  \item{"age_range_young"}{A dataset-level parameter indicating the youngest
-#'   date covered by the dataset chronology.}
-#'  \item{"notes"}{Free-form dataset notes provided by the dataset PI(s),
-#'   analysts or data stewards.}
-#'  \item{"collectionunitid"}{A numeric collection unit identifier from
-#'   Neotoma.}
-#'  \item{"handle"}{A character string identifying the collection unit. These
-#'   are often shorter form names (originally a default 8 character length).}
-#'  \item{"collectionunitname"}{A character string identifying the collection
-#'   unit name.}
-#'  \item{"colldate"}{The date on which the collection unit was sampled. Many
-#'   of these are empty.}
-#'  \item{"location"}{A free-form character string indicating the location of
-#'   the collection unit within the site.}
-#'  \item{"waterdepth"}{A numeric depth at which the core was obtained.}
-#'  \item{"collunittype"}{A character string for the collection unit type.}
-#'  \item{"collectiondevice"}{A fixed vocabulary term for the collection
-#'   device.}
-#'  \item{"depositionalenvironment"}{A fixed vocabulary name for the
-#'   depositional environment.}
-#' }
-#'
+#'   would be removed when using an elevation filter.
+#'  * `datasetid` A numeric datasetid from Neotoma.
+#'  * `database` A character string naming the constituent database
+#'   from which the dataset is drawn.
+#'  * `datasettype` A character string representing one of the many
+#'   dataset types within Neotoma.
+#'  * `age_range_old` A dataset-level parameter indicating the oldest
+#'   date covered by the dataset chronology.
+#'  * `age_range_young` A dataset-level parameter indicating the youngest
+#'   date covered by the dataset chronology.
+#'  * `notes` Free-form dataset notes provided by the dataset PI(s),
+#'   analysts or data stewards.
+#'  * `collectionunitid` A numeric collection unit identifier from
+#'   Neotoma.
+#'  * `handle` A character string identifying the collection unit. These
+#'   are often shorter form names (originally a default 8 character length).
+#'  * `collectionunitname` A character string identifying the collection
+#'   unit name.
+#'  * `colldate` The date on which the collection unit was sampled. Many
+#'   of these are empty.
+#'  * `location` A free-form character string indicating the location of
+#'   the collection unit within the site.
+#'  * `waterdepth` A numeric depth at which the core was obtained.
+#'  * `collunittype` A character string for the collection unit type.
+#'  * `collectiondevice` A fixed vocabulary term for the collection
+#'   device.
+#'  * `depositionalenvironment` A fixed vocabulary name for the
+#'   depositional environment.
 #' @import sf
 #' @import dplyr
 #' @importFrom purrr map
@@ -50,7 +47,6 @@ utils::globalVariables(c("elev", "notes"))
 #' @param ... arguments to filter by.
 #' @returns filtered `sites` object 
 #' @export
-
 filter <- function(x, ...) {
   UseMethod("filter", x)
 }
@@ -61,42 +57,40 @@ filter <- function(x, ...) {
 #' is a nested object (it contains collection units, datasets, samples, etc.)
 #' the degree to which filtering occurs depends on the amount of data contained
 #' within the sites object. Filtering parameters include:
-#' \itemize{
-#'  \item{"siteid"}{A numeric site identifier from the Neotoma Database}
-#'  \item{"sitename"}{The character string sitename.}
-#'  \item{"lat"}{A numeric latitude value.}
-#'  \item{"long"}{A numeric longitude value.}
-#'  \item{"altitude"}{The elevation of the site. Note that some sites do not
+#'  * `siteid` A numeric site identifier from the Neotoma Database.
+#'  * `sitename` The character string sitename.
+#'  * `lat` A numeric latitude value.
+#'  * `long` A numeric longitude value.
+#'  * `altitude` The elevation of the site. Note that some sites do not
 #'   include elevation information. For these an NA value appears, and they
-#'   would be removed when using an elevation filter.}
-#'  \item{"datasetid"}{A numeric datasetid from Neotoma.}
-#'  \item{"database"}{A character string naming the constituent database
-#'   from which the dataset is drawn.}
-#'  \item{"datasettype"}{A character string representing one of the many
-#'   dataset types within Neotoma.}
-#'  \item{"age_range_old"}{A dataset-level parameter indicating the oldest
-#'   date covered by the dataset chronology.}
-#'  \item{"age_range_young"}{A dataset-level parameter indicating the youngest
-#'   date covered by the dataset chronology.}
-#'  \item{"notes"}{Free-form dataset notes provided by the dataset PI(s),
-#'   analysts or data stewards.}
-#'  \item{"collectionunitid"}{A numeric collection unit identifier from
-#'   Neotoma.}
-#'  \item{"handle"}{A character string identifying the collection unit. These
-#'   are often shorter form names (originally a default 8 character length).}
-#'  \item{"collectionunitname"}{A character string identifying the collection
-#'   unit name.}
-#'  \item{"colldate"}{The date on which the collection unit was sampled. Many
-#'   of these are empty.}
-#'  \item{"location"}{A free-form character string indicating the location of
-#'   the collection unit within the site.}
-#'  \item{"waterdepth"}{A numeric depth at which the core was obtained.}
-#'  \item{"collunittype"}{A character string for the collection unit type.}
-#'  \item{"collectiondevice"}{A fixed vocabulary term for the collection
-#'   device.}
-#'  \item{"depositionalenvironment"}{A fixed vocabulary name for the
-#'   depositional environment.}
-#' }
+#'   would be removed when using an elevation filter.
+#'  * `datasetid` A numeric datasetid from Neotoma.
+#'  * `database` A character string naming the constituent database
+#'   from which the dataset is drawn.
+#'  * `datasettype` A character string representing one of the many
+#'   dataset types within Neotoma.
+#'  * `age_range_old` A dataset-level parameter indicating the oldest
+#'   date covered by the dataset chronology.
+#'  * `age_range_young` A dataset-level parameter indicating the youngest
+#'   date covered by the dataset chronology.
+#'  * `notes` Free-form dataset notes provided by the dataset PI(s),
+#'   analysts or data stewards.
+#'  * `collectionunitid` A numeric collection unit identifier from
+#'   Neotoma.
+#'  * `handle` A character string identifying the collection unit. These
+#'   are often shorter form names (originally a default 8 character length).
+#'  * `collectionunitname` A character string identifying the collection
+#'   unit name.
+#'  * `colldate` The date on which the collection unit was sampled. Many
+#'   of these are empty.
+#'  * `location` A free-form character string indicating the location of
+#'   the collection unit within the site.
+#'  * `waterdepth` A numeric depth at which the core was obtained.
+#'  * `collunittype` A character string for the collection unit type.
+#'  * `collectiondevice` A fixed vocabulary term for the collection
+#'   device.
+#'  * `depositionalenvironment` A fixed vocabulary name for the
+#'   depositional environment.
 #' @import sf
 #' @import dplyr
 #' @importFrom purrr map
@@ -116,67 +110,73 @@ filter <- function(x, ...) {
 #' }
 #' @export
 filter.sites <- function(x, ...) {  # nolint
-
+  
   # It is time consuming to do all the joining.  So here we
   # do a thing to try to speed stuff up by only joining the stuff we need:
   ellipsis <- as.list(substitute(list(...), environment()))[-1L][[1]] %>%
     as.character()
-
+  
   sitecols <- c("sitename", "lat", "long", "altitude") %>%
     map(function(x) any(stringr::str_detect(ellipsis, x))) %>%
     unlist() %>%
     any()
-
+  
   datasetcols <- c("datasetid", "database", "datasettype", "age_range_old",
                    "age_range_young", "notes") %>%
     map(function(x) any(stringr::str_detect(ellipsis, x))) %>%
     unlist() %>%
     any()
-
+  
   collunitcols <- c("collectionunitid", "handle", "colldate",
                     "location", "waterdepth", "collunittype",
                     "collectiondevice", "defaultchronology",
                     "collectionunitname", "depositionalenvironment") %>%
     map(function(x) any(stringr::str_detect(ellipsis, x))) %>%
     unlist() %>%
     any()
-
+  
   ids <- getids(x)
-
+  ids <- ids %>% mutate(
+    collunitid = as.numeric(collunitid),
+    datasetid = as.numeric(datasetid)
+  )
+
   if (sitecols == TRUE) {
     ids <- ids %>%
       inner_join(as.data.frame(x), by = "siteid") %>%
       rename(altitude = elev,
              sitenotes = notes)
   }
-
+  
   if (collunitcols == TRUE) {
     ids <- ids %>%
-      inner_join(as.data.frame(collunits(x)), 
+      inner_join(
+        as.data.frame(collunits(x)), 
         by = c("collunitid" = "collectionunitid"))
   }
-
+  
   if (datasetcols == TRUE) {
     ids <- ids %>%
-      inner_join(as.data.frame(datasets(x)), by = "datasetid") %>%
-      rename(datasetnotes = notes)
+      inner_join(
+        mutate(as.data.frame(datasets(x)), datasetid = as.numeric(datasetid)),
+        by = "datasetid")
   }
-
+  
   cleanids <- ids %>%
     dplyr::filter(...)
-
+  
   if (nrow(cleanids) == 0) {
     return(new("sites"))
   }
-
+  
   siteids <- as.data.frame(x)$siteid
-
+  
   pared_sites <- x[which(siteids %in% cleanids$siteid)]
-
+  
   # Sites are cleared.  Now need to clear datasets:
   good_dsid <- unique(cleanids$datasetid)
   good_cuid <- unique(cleanids$collunitid)
-
+  
   pared_ds <- purrr::map(pared_sites@sites, function(x) {
     ycu <- collunits(x)
     ycu <- ycu[which(as.data.frame(ycu)$collectionunitid %in% good_cuid)]
@@ -187,9 +187,9 @@ filter.sites <- function(x, ...) {  # nolint
       return(y)
     })
     x@collunits@collunits <- xcu
-
+    
     return(x)
   })
-
+  
   return(new("sites", sites = pared_ds))
 }
diff --git a/R/get_datasets.R b/R/get_datasets.R
@@ -1,5 +1,5 @@
 #' @title get_datasets
-#' @author Socorro Dominguez \email{sedv8808@@gmail.com}
+#' @author Socorro Dominguez \email{s.dominguez@ht-data.com}
 #' @import gtools
 #' @import lubridate
 #' @import geojsonsf
@@ -333,3 +333,56 @@ get_datasets.sites <- function(x, ...) {
 
   return(output)
 }
+
+#' @title Get Dataset from a \code{site} object.
+#' @param x An object of class \code{site}.
+#' @param ... additional arguments accepted by \code{get_datasets()}
+#' @returns `sites` object with full metadata up to the `dataset` level
+#' @examples \donttest{
+#' random_sites <- get_sites(1)
+#' allds <- get_datasets(random_sites, limit=3)
+#' }
+#' @export
+get_datasets.site <- function(x, ...) {
+  # List of datasets ids
+  ids1 <- getids(x)
+  ids <- ids1 %>% dplyr::filter(!is.na(suppressWarnings(as.numeric(siteid))),
+                                !is.na(suppressWarnings(as.numeric(datasetid))))
+
+  ids2 <- getids(x) %>% dplyr::filter(is.na(suppressWarnings(as.numeric(siteid))) |
+                                        is.na(suppressWarnings(as.numeric(datasetid))))
+
+  if(nrow(ids2)!=0){
+    warnsite <- sprintf("SiteID %s or DatasetID %s does not exist in the Neotoma DB yet or it has been removed. 
+                        It will be removed from your search.",  paste0(ids2$siteid,collapse = ", "), paste0(ids2$datasetid,collapse = ", "))
+    warning(warnsite)
+  }
+
+  dataset_list <- ids$datasetid
+  dataset_list <- as.numeric(unlist(dataset_list))
+
+  ## Fixing all data
+  cl <- as.list(match.call())
+  cl[[1]] <- NULL
+
+  if('all_data' %in% names(cl)){
+    all_data = cl$all_data
+  }else{
+    cl[['all_data']] = TRUE
+  }
+
+  if('limit' %in% names(cl)){
+    cl[['all_data']] = FALSE
+  }
+
+  if('offset' %in% names(cl)){
+    cl[['all_data']] = FALSE
+  }
+  ## Fixing all data line
+
+  cl[['x']] <- dataset_list
+
+  output <- do.call(get_datasets, cl)
+
+  return(output)
+}
diff --git a/R/get_downloads.R b/R/get_downloads.R
@@ -1,5 +1,5 @@
 #' @title get_downloads
-#' @author Socorro Dominguez \email{sedv8808@@gmail.com}
+#' @author Socorro Dominguez \email{s.dominguez@ht-data.com}
 #' @import gtools
 #' @import lubridate
 #' @import dplyr