Skip to content

Commit

Permalink
Merge pull request #375 from broadinstitute/development
Browse files Browse the repository at this point in the history
Release 1.38.0
  • Loading branch information
jlchang authored Dec 10, 2024
2 parents 6ed571b + 704fa3f commit 887729d
Show file tree
Hide file tree
Showing 13 changed files with 456 additions and 251 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/minify_ontologies.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@ name: Minify ontologies
on:
pull_request:
types: [opened] # Only trigger on PR "opened" event
# push: # Uncomment, update branches to develop / debug
# branches:
# jlc_show_gene_name
# push: # Uncomment, update branches to develop / debug
# branches:
# jlc_show_de_pairwise

jobs:
build:
Expand Down
3 changes: 2 additions & 1 deletion ingest/anndata_.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import scipy
from scipy.io.mmio import MMFile


# scipy.io.mmwrite uses scientific notation by default
# https://stackoverflow.com/questions/64748513
class MMFileFixedFormat(MMFile):
Expand Down Expand Up @@ -72,7 +73,7 @@ def create_cell_data_arrays(self):
linear_data_id=self.study_file_id,
cluster_name=raw_filename,
study_file_id=self.study_file_id,
study_id=self.study_id
study_id=self.study_id,
):
data_arrays.append(data_array)

Expand Down
10 changes: 7 additions & 3 deletions ingest/cell_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
PREREQUISITES
Must have python 3.6 or higher.
"""

import collections
import ntpath
from collections import defaultdict, OrderedDict
Expand Down Expand Up @@ -115,6 +116,7 @@ def validate_header_for_coordinate_values(self):
"error", msg, "format:cap:metadata-no-coordinates"
)
return False

@staticmethod
def make_multiindex_name(modality):
"""From modality, generate column name in multi-index format"""
Expand All @@ -127,7 +129,9 @@ def create_boolean_modality_metadatum(df, modality):
"""Translate presence of single modality to boolean for BigQuery"""
# check for empty cells (aka. nan) or empty strings
modality_multiindex = CellMetadata.make_multiindex_name(modality)
no_modality_info = df[modality_multiindex].isna() | df[modality_multiindex].str.len().eq(0)
no_modality_info = df[modality_multiindex].isna() | df[
modality_multiindex
].str.len().eq(0)
bool_name = modality + "_bool"
bool_multiindex = CellMetadata.make_multiindex_name(bool_name)
# store inverse of no_modality_info (ie. True = has modality info)
Expand All @@ -145,12 +149,12 @@ def hide_modality_metadatum(self):
m_to_rename[bool_name] = has_m
self.modality_urls = self.file.filter(m_to_hide, axis=1)
self.file.drop(m_to_hide, axis=1, inplace=True)
self.file.rename(columns= m_to_rename, inplace=True)
self.file.rename(columns=m_to_rename, inplace=True)
return

def booleanize_modality_metadata(self):
"""Translate presence of modality data to boolean for BigQuery
If no modality data, self.files is unchanged
If no modality data, self.files is unchanged
"""
if self.modalities is not None:
df = copy.deepcopy(self.file)
Expand Down
38 changes: 27 additions & 11 deletions ingest/cli_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,13 @@ def create_parser():
help="Indicates that differential expression analysis should be invoked",
)

parser_differential_expression.add_argument(
"--de-type",
default="rest",
choices=['rest', 'pairwise'],
help="Accepted values: 'pairwise' or 'rest' (default)",
)

parser_differential_expression.add_argument(
"--study-accession",
required=True,
Expand Down Expand Up @@ -336,6 +343,17 @@ def create_parser():
"--gene-file", help="Path to .genes.tsv file"
)

# For pairwise analyses
parser_differential_expression.add_argument(
"--group1",
help="1st annotation label to use for pairwise DE analysis",
)

parser_differential_expression.add_argument(
"--group2",
help="2nd annotation label to use for pairwise DE analysis",
)

parser_ingest_differential_expression = subparsers.add_parser(
"ingest_differential_expression",
help="Indicates author differential expression analysis processing",
Expand Down Expand Up @@ -377,40 +395,38 @@ def create_parser():
parser_ingest_differential_expression.add_argument(
"--differential-expression-file",
required=True,
help="Path to DE file uploaded by author."
help="Path to DE file uploaded by author.",
)

parser_ingest_differential_expression.add_argument(
"--gene-header",
required=True,
help="Header used for gene names / symbols in DE file"
help="Header used for gene names / symbols in DE file",
)

parser_ingest_differential_expression.add_argument(
"--group-header",
required=True,
help="Header used for group in DE file"
"--group-header", required=True, help="Header used for group in DE file"
)

parser_ingest_differential_expression.add_argument(
"--comparison-group-header",
required=False,
help=(
"Header used for comparison group in DE file. " +
"For pairwise comparisons. Can omit if DE file is in one-vs-rest-only format."
)
"Header used for comparison group in DE file. "
+ "For pairwise comparisons. Can omit if DE file is in one-vs-rest-only format."
),
)

parser_ingest_differential_expression.add_argument(
"--size-metric",
required=True,
help='Header used as size metric in DE file, e.g. "logfoldchanges", "avg_log2FC", etc.'
help='Header used as size metric in DE file, e.g. "logfoldchanges", "avg_log2FC", etc.',
)

parser_ingest_differential_expression.add_argument(
"--significance-metric",
required=True,
help='Header used as significance metric in DE file, e.g. "pvals_adj", "p_val_adj", etc.'
help='Header used as significance metric in DE file, e.g. "pvals_adj", "p_val_adj", etc.',
)

# AnnData subparsers
Expand Down Expand Up @@ -493,7 +509,7 @@ def create_parser():
parser_rank_genes.add_argument(
'--publication',
help="URL of the study's publicly-accessible research article, or GS URL or local path to publication text file",
required=True
required=True,
)

return parser
Expand Down
16 changes: 10 additions & 6 deletions ingest/clusters.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,9 +136,11 @@ def transform(self):
{
"name": annot_name,
"type": annot_type,
"values": self.file[annot_headers].unique().tolist()
if annot_type == "group"
else [],
"values": (
self.file[annot_headers].unique().tolist()
if annot_type == "group"
else []
),
}
)
Annotations.dev_logger.info(f"Creating model for {self.study_id}")
Expand All @@ -148,9 +150,11 @@ def transform(self):
cell_annotations=cell_annotations,
study_file_id=self.study_file_id,
study_id=self.study_id,
domain_ranges=DomainRanges(**self.domain_ranges)
if self.domain_ranges is not None
else None,
domain_ranges=(
DomainRanges(**self.domain_ranges)
if self.domain_ranges is not None
else None
),
)

def get_data_array_annot(self, linear_data_id):
Expand Down
Loading

0 comments on commit 887729d

Please sign in to comment.