Skip to content

Commit

Permalink
CRAN changes
Browse files Browse the repository at this point in the history
  • Loading branch information
sedv8808 committed May 24, 2024
1 parent 5187607 commit e9255dc
Show file tree
Hide file tree
Showing 32 changed files with 309 additions and 244 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ Description: Access and manipulation of data using the Neotoma Paleoecology Data
License: MIT + file LICENSE
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.3
RoxygenNote: 7.3.1
Suggests:
testthat,
knitr,
Expand Down
3 changes: 1 addition & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ S3method(get_contacts,default)
S3method(get_contacts,numeric)
S3method(get_datasets,default)
S3method(get_datasets,numeric)
S3method(get_datasets,site)
S3method(get_datasets,sites)
S3method(get_downloads,character)
S3method(get_downloads,numeric)
Expand Down Expand Up @@ -145,12 +146,10 @@ importFrom(httr,stop_for_status)
importFrom(jsonlite,fromJSON)
importFrom(jsonlite,toJSON)
importFrom(leaflet,addCircleMarkers)
importFrom(leaflet,addProviderTiles)
importFrom(leaflet,addTiles)
importFrom(leaflet,leaflet)
importFrom(lubridate,as_date)
importFrom(magrittr,"%>%")
importFrom(mapview,mapshot)
importFrom(methods,"slot<-")
importFrom(methods,is)
importFrom(methods,new)
Expand Down
2 changes: 1 addition & 1 deletion R/02_genericDefinitions.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ setGeneric("coordinates", function(obj, ...) {
})

#' @title plotLeaflet
#' @author Socorro Dominguez \email{sedv8808@@gmail.com}
#' @author Socorro Dominguez \email{s.dominguez@ht-data.com}
#' @description Plot sites on a leaflet map
#' @param object Sites object to plot
#' @returns leaflet map with site markers
Expand Down
2 changes: 1 addition & 1 deletion R/build_sample.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#' @title Build a samples `data.frame` from Neotoma API JSON
#' @author Socorro Dominguez \email{sedv8808@@gmail.com}
#' @author Socorro Dominguez \email{s.dominguez@ht-data.com}
#' @import gtools
#' @import lubridate
#' @import dplyr
Expand Down
1 change: 1 addition & 0 deletions R/collunits-methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ setMethod(f = "show",
#' @title Get or remove sites by numeric index
#' @param x The collunits object
#' @param i The numeric index
#' @description Retrieve sites by numeric index
#' @return null used for side effects. Printing a data.frame
setMethod(f = "[",
signature = signature(x = "collunits", i = "numeric"),
Expand Down
182 changes: 91 additions & 91 deletions R/filter.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,43 +5,40 @@ utils::globalVariables(c("elev", "notes"))
#' is a nested object (it contains collection units, datasets, samples, etc.)
#' the degree to which filtering occurs depends on the amount of data contained
#' within the sites object. Filtering parameters include:
#' \itemize{
#' \item{"siteid"}{A numeric site identifier from the Neotoma Database}
#' \item{"sitename"}{The character string sitename.}
#' \item{"lat"}{A numeric latitude value.}
#' \item{"long"}{A numeric longitude value.}
#' \item{"altitude"}{The elevation of the site. Note that some sites do not
#' * `siteid` A numeric site identifier from the Neotoma Database.
#' * `sitename` The character string sitename.
#' * `lat` A numeric latitude value.
#' * `long` A numeric longitude value.
#' * `altitude` The elevation of the site. Note that some sites do not
#' include elevation information. For these an NA value appears, and they
#' would be removed when using an elevation filter.}
#' \item{"datasetid"}{A numeric datasetid from Neotoma.}
#' \item{"database"}{A character string naming the constituent database
#' from which the dataset is drawn.}
#' \item{"datasettype"}{A character string representing one of the many
#' dataset types within Neotoma.}
#' \item{"age_range_old"}{A dataset-level parameter indicating the oldest
#' date covered by the dataset chronology.}
#' \item{"age_range_young"}{A dataset-level parameter indicating the youngest
#' date covered by the dataset chronology.}
#' \item{"notes"}{Free-form dataset notes provided by the dataset PI(s),
#' analysts or data stewards.}
#' \item{"collectionunitid"}{A numeric collection unit identifier from
#' Neotoma.}
#' \item{"handle"}{A character string identifying the collection unit. These
#' are often shorter form names (originally a default 8 character length).}
#' \item{"collectionunitname"}{A character string identifying the collection
#' unit name.}
#' \item{"colldate"}{The date on which the collection unit was sampled. Many
#' of these are empty.}
#' \item{"location"}{A free-form character string indicating the location of
#' the collection unit within the site.}
#' \item{"waterdepth"}{A numeric depth at which the core was obtained.}
#' \item{"collunittype"}{A character string for the collection unit type.}
#' \item{"collectiondevice"}{A fixed vocabulary term for the collection
#' device.}
#' \item{"depositionalenvironment"}{A fixed vocabulary name for the
#' depositional environment.}
#' }
#'
#' would be removed when using an elevation filter.
#' * `datasetid` A numeric datasetid from Neotoma.
#' * `database` A character string naming the constituent database
#' from which the dataset is drawn.
#' * `datasettype` A character string representing one of the many
#' dataset types within Neotoma.
#' * `age_range_old` A dataset-level parameter indicating the oldest
#' date covered by the dataset chronology.
#' * `age_range_young` A dataset-level parameter indicating the youngest
#' date covered by the dataset chronology.
#' * `notes` Free-form dataset notes provided by the dataset PI(s),
#' analysts or data stewards.
#' * `collectionunitid` A numeric collection unit identifier from
#' Neotoma.
#' * `handle` A character string identifying the collection unit. These
#' are often shorter form names (originally a default 8 character length).
#' * `collectionunitname` A character string identifying the collection
#' unit name.
#' * `colldate` The date on which the collection unit was sampled. Many
#' of these are empty.
#' * `location` A free-form character string indicating the location of
#' the collection unit within the site.
#' * `waterdepth` A numeric depth at which the core was obtained.
#' * `collunittype` A character string for the collection unit type.
#' * `collectiondevice` A fixed vocabulary term for the collection
#' device.
#' * `depositionalenvironment` A fixed vocabulary name for the
#' depositional environment.
#' @import sf
#' @import dplyr
#' @importFrom purrr map
Expand All @@ -50,7 +47,6 @@ utils::globalVariables(c("elev", "notes"))
#' @param ... arguments to filter by.
#' @returns filtered `sites` object
#' @export

filter <- function(x, ...) {
UseMethod("filter", x)
}
Expand All @@ -61,42 +57,40 @@ filter <- function(x, ...) {
#' is a nested object (it contains collection units, datasets, samples, etc.)
#' the degree to which filtering occurs depends on the amount of data contained
#' within the sites object. Filtering parameters include:
#' \itemize{
#' \item{"siteid"}{A numeric site identifier from the Neotoma Database}
#' \item{"sitename"}{The character string sitename.}
#' \item{"lat"}{A numeric latitude value.}
#' \item{"long"}{A numeric longitude value.}
#' \item{"altitude"}{The elevation of the site. Note that some sites do not
#' * `siteid` A numeric site identifier from the Neotoma Database.
#' * `sitename` The character string sitename.
#' * `lat` A numeric latitude value.
#' * `long` A numeric longitude value.
#' * `altitude` The elevation of the site. Note that some sites do not
#' include elevation information. For these an NA value appears, and they
#' would be removed when using an elevation filter.}
#' \item{"datasetid"}{A numeric datasetid from Neotoma.}
#' \item{"database"}{A character string naming the constituent database
#' from which the dataset is drawn.}
#' \item{"datasettype"}{A character string representing one of the many
#' dataset types within Neotoma.}
#' \item{"age_range_old"}{A dataset-level parameter indicating the oldest
#' date covered by the dataset chronology.}
#' \item{"age_range_young"}{A dataset-level parameter indicating the youngest
#' date covered by the dataset chronology.}
#' \item{"notes"}{Free-form dataset notes provided by the dataset PI(s),
#' analysts or data stewards.}
#' \item{"collectionunitid"}{A numeric collection unit identifier from
#' Neotoma.}
#' \item{"handle"}{A character string identifying the collection unit. These
#' are often shorter form names (originally a default 8 character length).}
#' \item{"collectionunitname"}{A character string identifying the collection
#' unit name.}
#' \item{"colldate"}{The date on which the collection unit was sampled. Many
#' of these are empty.}
#' \item{"location"}{A free-form character string indicating the location of
#' the collection unit within the site.}
#' \item{"waterdepth"}{A numeric depth at which the core was obtained.}
#' \item{"collunittype"}{A character string for the collection unit type.}
#' \item{"collectiondevice"}{A fixed vocabulary term for the collection
#' device.}
#' \item{"depositionalenvironment"}{A fixed vocabulary name for the
#' depositional environment.}
#' }
#' would be removed when using an elevation filter.
#' * `datasetid` A numeric datasetid from Neotoma.
#' * `database` A character string naming the constituent database
#' from which the dataset is drawn.
#' * `datasettype` A character string representing one of the many
#' dataset types within Neotoma.
#' * `age_range_old` A dataset-level parameter indicating the oldest
#' date covered by the dataset chronology.
#' * `age_range_young` A dataset-level parameter indicating the youngest
#' date covered by the dataset chronology.
#' * `notes` Free-form dataset notes provided by the dataset PI(s),
#' analysts or data stewards.
#' * `collectionunitid` A numeric collection unit identifier from
#' Neotoma.
#' * `handle` A character string identifying the collection unit. These
#' are often shorter form names (originally a default 8 character length).
#' * `collectionunitname` A character string identifying the collection
#' unit name.
#' * `colldate` The date on which the collection unit was sampled. Many
#' of these are empty.
#' * `location` A free-form character string indicating the location of
#' the collection unit within the site.
#' * `waterdepth` A numeric depth at which the core was obtained.
#' * `collunittype` A character string for the collection unit type.
#' * `collectiondevice` A fixed vocabulary term for the collection
#' device.
#' * `depositionalenvironment` A fixed vocabulary name for the
#' depositional environment.
#' @import sf
#' @import dplyr
#' @importFrom purrr map
Expand All @@ -116,67 +110,73 @@ filter <- function(x, ...) {
#' }
#' @export
filter.sites <- function(x, ...) { # nolint

# It is time consuming to do all the joining. So here we
# do a thing to try to speed stuff up by only joining the stuff we need:
ellipsis <- as.list(substitute(list(...), environment()))[-1L][[1]] %>%
as.character()

sitecols <- c("sitename", "lat", "long", "altitude") %>%
map(function(x) any(stringr::str_detect(ellipsis, x))) %>%
unlist() %>%
any()

datasetcols <- c("datasetid", "database", "datasettype", "age_range_old",
"age_range_young", "notes") %>%
map(function(x) any(stringr::str_detect(ellipsis, x))) %>%
unlist() %>%
any()

collunitcols <- c("collectionunitid", "handle", "colldate",
"location", "waterdepth", "collunittype",
"collectiondevice", "defaultchronology",
"collectionunitname", "depositionalenvironment") %>%
map(function(x) any(stringr::str_detect(ellipsis, x))) %>%
unlist() %>%
any()

ids <- getids(x)

ids <- ids %>% mutate(
collunitid = as.numeric(collunitid),
datasetid = as.numeric(datasetid)
)

if (sitecols == TRUE) {
ids <- ids %>%
inner_join(as.data.frame(x), by = "siteid") %>%
rename(altitude = elev,
sitenotes = notes)
}

if (collunitcols == TRUE) {
ids <- ids %>%
inner_join(as.data.frame(collunits(x)),
inner_join(
as.data.frame(collunits(x)),
by = c("collunitid" = "collectionunitid"))
}

if (datasetcols == TRUE) {
ids <- ids %>%
inner_join(as.data.frame(datasets(x)), by = "datasetid") %>%
rename(datasetnotes = notes)
inner_join(
mutate(as.data.frame(datasets(x)), datasetid = as.numeric(datasetid)),
by = "datasetid")
}

cleanids <- ids %>%
dplyr::filter(...)

if (nrow(cleanids) == 0) {
return(new("sites"))
}

siteids <- as.data.frame(x)$siteid

pared_sites <- x[which(siteids %in% cleanids$siteid)]

# Sites are cleared. Now need to clear datasets:
good_dsid <- unique(cleanids$datasetid)
good_cuid <- unique(cleanids$collunitid)

pared_ds <- purrr::map(pared_sites@sites, function(x) {
ycu <- collunits(x)
ycu <- ycu[which(as.data.frame(ycu)$collectionunitid %in% good_cuid)]
Expand All @@ -187,9 +187,9 @@ filter.sites <- function(x, ...) { # nolint
return(y)
})
x@collunits@collunits <- xcu

return(x)
})

return(new("sites", sites = pared_ds))
}
55 changes: 54 additions & 1 deletion R/get_datasets.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#' @title get_datasets
#' @author Socorro Dominguez \email{sedv8808@@gmail.com}
#' @author Socorro Dominguez \email{s.dominguez@ht-data.com}
#' @import gtools
#' @import lubridate
#' @import geojsonsf
Expand Down Expand Up @@ -333,3 +333,56 @@ get_datasets.sites <- function(x, ...) {

return(output)
}

#' @title Get Dataset from a \code{site} object.
#' @param x An object of class \code{site}.
#' @param ... additional arguments accepted by \code{get_datasets()}
#' @returns `sites` object with full metadata up to the `dataset` level
#' @examples \donttest{
#' random_sites <- get_sites(1)
#' allds <- get_datasets(random_sites, limit=3)
#' }
#' @export
get_datasets.site <- function(x, ...) {
# List of datasets ids
ids1 <- getids(x)
ids <- ids1 %>% dplyr::filter(!is.na(suppressWarnings(as.numeric(siteid))),
!is.na(suppressWarnings(as.numeric(datasetid))))

ids2 <- getids(x) %>% dplyr::filter(is.na(suppressWarnings(as.numeric(siteid))) |
is.na(suppressWarnings(as.numeric(datasetid))))

if(nrow(ids2)!=0){
warnsite <- sprintf("SiteID %s or DatasetID %s does not exist in the Neotoma DB yet or it has been removed.
It will be removed from your search.", paste0(ids2$siteid,collapse = ", "), paste0(ids2$datasetid,collapse = ", "))
warning(warnsite)
}

dataset_list <- ids$datasetid
dataset_list <- as.numeric(unlist(dataset_list))

## Fixing all data
cl <- as.list(match.call())
cl[[1]] <- NULL

if('all_data' %in% names(cl)){
all_data = cl$all_data
}else{
cl[['all_data']] = TRUE
}

if('limit' %in% names(cl)){
cl[['all_data']] = FALSE
}

if('offset' %in% names(cl)){
cl[['all_data']] = FALSE
}
## Fixing all data line

cl[['x']] <- dataset_list

output <- do.call(get_datasets, cl)

return(output)
}
2 changes: 1 addition & 1 deletion R/get_downloads.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#' @title get_downloads
#' @author Socorro Dominguez \email{sedv8808@@gmail.com}
#' @author Socorro Dominguez \email{s.dominguez@ht-data.com}
#' @import gtools
#' @import lubridate
#' @import dplyr
Expand Down
Loading

0 comments on commit e9255dc

Please sign in to comment.