Skip to content

Commit

Permalink
Merge branch 'devel' of github.com:PavlidisLab/gemma.R into devel
Browse files Browse the repository at this point in the history
  • Loading branch information
oganm committed Feb 14, 2024
2 parents 50fc3db + cb14520 commit ea3210e
Show file tree
Hide file tree
Showing 14 changed files with 191 additions and 75 deletions.
2 changes: 2 additions & 0 deletions R/allEndpoints.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#' @keywords internal
#'
#' @examples
#' gemma.R:::.getResultSets(523099)
.getResultSets <- function(resultSet = NA_character_, raw = getOption(
"gemma.raw",
FALSE
Expand Down Expand Up @@ -106,6 +107,7 @@ mem.getResultSets <- function(resultSet = NA_character_, raw = getOption(
#' @keywords internal
#'
#' @examples
#' gemma.R:::.getResultSetFactors(523099)
.getResultSetFactors <- function(resultSet = NA_character_, raw = getOption(
"gemma.raw",
FALSE
Expand Down
133 changes: 94 additions & 39 deletions R/convenience.R
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,86 @@ make_design <- function(samples,metaType = "text"){
}


#' Get a subset of an array of factorValues
#' @param factorValue unimplemented
#' @param differential_expressions
#' @keywords internal
#' @return a boolean vector, samples representing the resultSet and/or the contrast
#' are set to TRUE
subset_factorValues <- function(factorValues,
factorValue = NULL,
differential_expressions = NULL,
resultSet = NULL,
contrast = NULL){

out <- rep(TRUE,length(factorValues))


if(!is.null(factorValue)){
# unimplemented
}

if(!is.null(differential_expressions)){

# this should never trigger but just in case...
assertthat::assert_that(
factorValues %>% do.call(rbind,.) %>% dplyr::select(ID,factor.ID) %>% unique %>% .$ID %>% table %>% {all(.==1)},
msg = "ID's cannot be repeated across factors")



subset <- differential_expressions %>% dplyr::filter(result.ID == resultSet) %>% .$subsetFactor %>% unique
# result set should have the same subset for all contrasts
assertthat::assert_that(length(subset)==1)

if(nrow(subset[[1]])!=0){

subset_ids <- subset %>% purrr::map('ID') %>% unlist

in_subset <- factorValues %>% purrr::map_lgl(function(x){
any(x$ID %in% subset_ids)
})


# subset_ids <- subset[[1]] %>%
# dplyr::mutate(comb = paste0(ID,'.',factor.ID)) %>% {.$comb}
#
# in_subset <- factorValues %>% purrr::map_lgl(function(x){
# x %>% dplyr::mutate(comb = paste0(ID,'.',factor.ID)) %>% {.$comb} %>%
# {all(subset_ids %in% .)}
# })

out <- out & in_subset
}


if(!is.null(contrast)){
cn <- differential_expressions %>% dplyr::filter(result.ID == resultSet & contrast.ID == contrast)
baseline_id <- cn$baseline.factors %>% purrr::map('ID') %>% unlist%>% unique
baseline_factor_id <- cn$baseline.factors %>% purrr::map('factor.ID') %>% unlist%>% unique

contrast_id <- cn$experimental.factors %>% purrr::map('ID') %>% unlist %>% unique
contrast_factor_id <- cn$experimental.factors %>% purrr::map('factor.ID') %>% unlist %>% unique

contrast_id<- contrast_id[match(baseline_factor_id,contrast_factor_id)]

in_contrast <- factorValues %>% purrr::map_lgl(function(x){
all(contrast_id %in% x$ID) |
all(baseline_id %in% x$ID) |
all(c(contrast_id[1],baseline_id[2]) %in% x$ID) |
all(c(contrast_id[2],baseline_id[1]) %in% x$ID)
})

out <- out & in_contrast

}
}
return(out)
}




#' Compile gene expression data and metadata
#'
#' Return an annotated Bioconductor-compatible
Expand All @@ -236,7 +316,9 @@ make_design <- function(samples,metaType = "text"){
#' SummarizedExperiments which are more recent. See the Summarized experiment
#' \href{https://bioconductor.org/packages/release/bioc/vignettes/SummarizedExperiment/inst/doc/SummarizedExperiment.html}{vignette}
#' or the ExpressionSet \href{https://bioconductor.org/packages/release/bioc/vignettes/Biobase/inst/doc/ExpressionSetIntroduction.pdf}{vignette}
#' for more details.
#' for more details. "tidy" for a long form data frame compatible with tidyverse functions.
#' 'list' to return a list containing individual data frames containing expression values,
#' design and the experiment.
#' @inheritParams memoise
#' @inheritParams get_dataset_expression_for_genes
#' @param metaType How should the metadata information should be included. Can be "text", "uri" or "both". "text" and "uri" options
Expand Down Expand Up @@ -364,45 +446,19 @@ get_dataset_object <- function(datasets,
data.table::setcolorder(packed_info$exp,c(gene_info,rownames(packed_info$design)))

if(!is.null(resultSets)){


diff <- get_dataset_differential_expression_analyses(dataset,memoised = memoised)
subset <- diff %>%
dplyr::filter(result.ID == resultSets[i]) %>%
.$subsetFactor %>% unique

assertthat::assert_that(length(subset)==1)

# passing the original samples is fine since expression data is
# reordered not design file
relevant <- subset_factorValues(packed_info$design$factorValues,
differential_expressions = diff,
resultSet = resultSets[i],
contrast = contrasts[i])

if(nrow(subset[[1]])!=0){
subset_ids <- subset %>% purrr::map('ID') %>% unlist

in_subset <- packed_info$design$factorValues %>% purrr::map_lgl(function(x){
any(x$ID %in% subset_ids)
})
} else{
in_subset <- TRUE
}

if(!is.null(contrasts)){
contrast <- diff %>% dplyr::filter(result.ID == resultSets[i] & contrast.ID == contrasts[i])
baseline_id <- contrast$baseline.factors %>% purrr::map('ID') %>% unlist%>% unique
baseline_factor_id <- contrast$baseline.factors %>% purrr::map('factor.ID') %>% unlist%>% unique
contrast_id <- contrast$experimental.factors %>% purrr::map('ID') %>% unlist %>% unique
contrast_factor_id <- contrast$experimental.factors %>% purrr::map('factor.ID') %>% unlist %>% unique

contrast_id<- contrast_id[match(baseline_factor_id,contrast_factor_id)]


in_contrast <- packed_info$design$factorValues %>% purrr::map_lgl(function(x){
all(contrast_id %in% x$ID) |
all(baseline_id %in% x$ID) |
all(c(contrast_id[1],baseline_id[2]) %in% x$ID) |
all(c(contrast_id[2],baseline_id[1]) %in% x$ID)
})
} else{
in_contrast <- TRUE
}
packed_info$exp <- packed_info$exp[,.SD,.SDcols = c(gene_info, rownames(packed_info$design)[in_subset & in_contrast])]
packed_info$design <- packed_info$design[in_subset & in_contrast,]
packed_info$exp <- packed_info$exp[,.SD,.SDcols = c(gene_info, rownames(packed_info$design)[relevant])]
packed_info$design <- packed_info$design[relevant,]
}

return(packed_info)
Expand Down Expand Up @@ -704,8 +760,7 @@ gemma_call <- function(call,...,json = TRUE){
#' @param directory Directory to save the output from the individual calls to. If provided, each page
#' is saved to separate files.
#' @param file The name of a file to save the results to, or \code{NULL} to not write
#' results to a file. If \code{raw == TRUE}, the output will be the raw endpoint from the
#' API, likely a JSON or a gzip file. Otherwise, it will be a RDS file.
#' results to a file. This function always saves the output as an RDS file. Otherwise, it will be a RDS file.
#' @param overwrite Whether or not to overwrite if a file exists at the specified
#' filename.
#' @return A data.table or a list containing data from all pages.
Expand Down
20 changes: 12 additions & 8 deletions R/processors.R
Original file line number Diff line number Diff line change
Expand Up @@ -557,17 +557,18 @@ processElements <- function(d) {
#' The fields of the output data.table are:
#'
#' \itemize{
#' \item \code{gene.Symbol}: Symbol for the gene
#' \item \code{gene.Ensembl}: Ensembl ID for the gene
#' \item \code{gene.symbol}: Symbol for the gene
#' \item \code{gene.ensembl}: Ensembl ID for the gene
#' \item \code{gene.NCBI}: NCBI id for the gene
#' \item \code{gene.Name}: Name of the gene
#' \item \code{gene.MFX.Rank}: Multifunctionality rank for the gene
#' \item \code{taxon.Name}: Name of the species
#' \item \code{taxon.Scientific}: Scientific name for the taxon
#' \item \code{gene.name}: Name of the gene
#' \item \code{gene.aliases}: Gene aliases. Each row includes a vector
#' \item \code{gene.MFX.rank}: Multifunctionality rank for the gene
#' \item \code{taxon.name}: Name of the species
#' \item \code{taxon.scientific}: Scientific name for the taxon
#' \item \code{taxon.ID}: Internal identifier given to the species by Gemma
#' \item \code{taxon.NCBI}: NCBI ID of the taxon
#' \item \code{taxon.Database.Name}: Underlying database used in Gemma for the taxon
#' \item \code{taxon.Database.ID}: ID of the underyling database used in Gemma for the taxon
#' \item \code{taxon.database.name}: Underlying database used in Gemma for the taxon
#' \item \code{taxon.database.ID}: ID of the underlying database used in Gemma for the taxon
#' }
#'
#' @keywords internal
Expand All @@ -577,6 +578,9 @@ processGenes <- function(d) {
gene.ensembl = accessField(d,'ensemblId',NA_character_),
gene.NCBI = accessField(d,"ncbiId",NA_integer_),
gene.name = accessField(d, "officialName",NA_character_),
gene.aliases = d %>% purrr::map(function(x){
x$aliases %>% unlist
}),
# gene.Aliases = d[["aliases"]],
# gene.GO = d[["numGoTerms"]],
# gene.Homologues = d[["homologues"]],
Expand Down
Binary file modified inst/script/openapi.json
Binary file not shown.
13 changes: 13 additions & 0 deletions inst/script/overrides.R
Original file line number Diff line number Diff line change
Expand Up @@ -315,3 +315,16 @@ NULL
#' @examples
#' get_dataset_expression_for_genes('GSE2018',genes=c(10225,2841))
NULL

#' .getResultSets
#'
#' @examples
#' gemma.R:::.getResultSets(523099)
NULL


#' .getResultSetFactors
#'
#' @examples
#' gemma.R:::.getResultSetFactors(523099)
NULL
7 changes: 6 additions & 1 deletion inst/script/registry.R
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,10 @@ registerEndpoint(

# /resultSets, get_result_sets -----
# unimplemented
# this is not useful in gemma.R and can be replaced by get_dataset_differential_expression_analyses
# this endpoint can be potentially useful if we can use
# make better use of its filter argument. talk to guillaume
# to see if there is a good way to do it
# otherwise it can be replaced by get_dataset_differential_expression_analyses
# the implementation below is also missing arguments

# registerEndpoint(
Expand Down Expand Up @@ -506,6 +509,8 @@ registerEndpoint("platforms/{platform}/elements/{probe}/genes?offset={offset}&li

# platforms -----
# merged with platforms/{platform}
# this endpoint has no unique parameters of its own unlike get_datasets
# which is why it's not separated

# platforms/{platform}, get_platforms_by_ids ----
registerEndpoint("platforms/{platforms}?&offset={offset}&limit={limit}&sort={sort}&filter={filter}",
Expand Down
3 changes: 3 additions & 0 deletions man/dot-getResultSetFactors.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions man/dot-getResultSets.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 1 addition & 2 deletions man/get_all_pages.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion man/get_dataset_object.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 9 additions & 8 deletions man/get_genes.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 9 additions & 8 deletions man/get_platform_element_genes.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 9 additions & 8 deletions man/processGenes.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit ea3210e

Please sign in to comment.