Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Release 1.38.0 #375

Merged
merged 12 commits into from
Dec 10, 2024
6 changes: 3 additions & 3 deletions .github/workflows/minify_ontologies.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@ name: Minify ontologies
on:
pull_request:
types: [opened] # Only trigger on PR "opened" event
# push: # Uncomment, update branches to develop / debug
# branches:
# jlc_show_gene_name
# push: # Uncomment, update branches to develop / debug
# branches:
# jlc_show_de_pairwise

jobs:
build:
Expand Down
3 changes: 2 additions & 1 deletion ingest/anndata_.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import scipy
from scipy.io.mmio import MMFile


# scipy.io.mmwrite uses scientific notation by default
# https://stackoverflow.com/questions/64748513
class MMFileFixedFormat(MMFile):
Expand Down Expand Up @@ -72,7 +73,7 @@ def create_cell_data_arrays(self):
linear_data_id=self.study_file_id,
cluster_name=raw_filename,
study_file_id=self.study_file_id,
study_id=self.study_id
study_id=self.study_id,
):
data_arrays.append(data_array)

Expand Down
10 changes: 7 additions & 3 deletions ingest/cell_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
PREREQUISITES
Must have python 3.6 or higher.
"""

import collections
import ntpath
from collections import defaultdict, OrderedDict
Expand Down Expand Up @@ -115,6 +116,7 @@ def validate_header_for_coordinate_values(self):
"error", msg, "format:cap:metadata-no-coordinates"
)
return False

@staticmethod
def make_multiindex_name(modality):
"""From modality, generate column name in multi-index format"""
Expand All @@ -127,7 +129,9 @@ def create_boolean_modality_metadatum(df, modality):
"""Translate presence of single modality to boolean for BigQuery"""
# check for empty cells (aka. nan) or empty strings
modality_multiindex = CellMetadata.make_multiindex_name(modality)
no_modality_info = df[modality_multiindex].isna() | df[modality_multiindex].str.len().eq(0)
no_modality_info = df[modality_multiindex].isna() | df[
modality_multiindex
].str.len().eq(0)
bool_name = modality + "_bool"
bool_multiindex = CellMetadata.make_multiindex_name(bool_name)
# store inverse of no_modality_info (ie. True = has modality info)
Expand All @@ -145,12 +149,12 @@ def hide_modality_metadatum(self):
m_to_rename[bool_name] = has_m
self.modality_urls = self.file.filter(m_to_hide, axis=1)
self.file.drop(m_to_hide, axis=1, inplace=True)
self.file.rename(columns= m_to_rename, inplace=True)
self.file.rename(columns=m_to_rename, inplace=True)
return

def booleanize_modality_metadata(self):
"""Translate presence of modality data to boolean for BigQuery
If no modality data, self.files is unchanged
If no modality data, self.files is unchanged
"""
if self.modalities is not None:
df = copy.deepcopy(self.file)
Expand Down
38 changes: 27 additions & 11 deletions ingest/cli_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,13 @@ def create_parser():
help="Indicates that differential expression analysis should be invoked",
)

parser_differential_expression.add_argument(
"--de-type",
default="rest",
choices=['rest', 'pairwise'],
help="Accepted values: 'pairwise' or 'rest' (default)",
)

parser_differential_expression.add_argument(
"--study-accession",
required=True,
Expand Down Expand Up @@ -336,6 +343,17 @@ def create_parser():
"--gene-file", help="Path to .genes.tsv file"
)

# For pairwise analyses
parser_differential_expression.add_argument(
"--group1",
help="1st annotation label to use for pairwise DE analysis",
)

parser_differential_expression.add_argument(
"--group2",
help="2nd annotation label to use for pairwise DE analysis",
)

parser_ingest_differential_expression = subparsers.add_parser(
"ingest_differential_expression",
help="Indicates author differential expression analysis processing",
Expand Down Expand Up @@ -377,40 +395,38 @@ def create_parser():
parser_ingest_differential_expression.add_argument(
"--differential-expression-file",
required=True,
help="Path to DE file uploaded by author."
help="Path to DE file uploaded by author.",
)

parser_ingest_differential_expression.add_argument(
"--gene-header",
required=True,
help="Header used for gene names / symbols in DE file"
help="Header used for gene names / symbols in DE file",
)

parser_ingest_differential_expression.add_argument(
"--group-header",
required=True,
help="Header used for group in DE file"
"--group-header", required=True, help="Header used for group in DE file"
)

parser_ingest_differential_expression.add_argument(
"--comparison-group-header",
required=False,
help=(
"Header used for comparison group in DE file. " +
"For pairwise comparisons. Can omit if DE file is in one-vs-rest-only format."
)
"Header used for comparison group in DE file. "
+ "For pairwise comparisons. Can omit if DE file is in one-vs-rest-only format."
),
)

parser_ingest_differential_expression.add_argument(
"--size-metric",
required=True,
help='Header used as size metric in DE file, e.g. "logfoldchanges", "avg_log2FC", etc.'
help='Header used as size metric in DE file, e.g. "logfoldchanges", "avg_log2FC", etc.',
)

parser_ingest_differential_expression.add_argument(
"--significance-metric",
required=True,
help='Header used as significance metric in DE file, e.g. "pvals_adj", "p_val_adj", etc.'
help='Header used as significance metric in DE file, e.g. "pvals_adj", "p_val_adj", etc.',
)

# AnnData subparsers
Expand Down Expand Up @@ -493,7 +509,7 @@ def create_parser():
parser_rank_genes.add_argument(
'--publication',
help="URL of the study's publicly-accessible research article, or GS URL or local path to publication text file",
required=True
required=True,
)

return parser
Expand Down
16 changes: 10 additions & 6 deletions ingest/clusters.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,9 +136,11 @@ def transform(self):
{
"name": annot_name,
"type": annot_type,
"values": self.file[annot_headers].unique().tolist()
if annot_type == "group"
else [],
"values": (
self.file[annot_headers].unique().tolist()
if annot_type == "group"
else []
),
}
)
Annotations.dev_logger.info(f"Creating model for {self.study_id}")
Expand All @@ -148,9 +150,11 @@ def transform(self):
cell_annotations=cell_annotations,
study_file_id=self.study_file_id,
study_id=self.study_id,
domain_ranges=DomainRanges(**self.domain_ranges)
if self.domain_ranges is not None
else None,
domain_ranges=(
DomainRanges(**self.domain_ranges)
if self.domain_ranges is not None
else None
),
)

def get_data_array_annot(self, linear_data_id):
Expand Down
Loading
Loading