Skip to content

Commit

Permalink
Added type aliases to complex datastructures
Browse files Browse the repository at this point in the history
  • Loading branch information
CodingBash committed Jun 13, 2024
1 parent 71a1f6c commit 6ccbea1
Show file tree
Hide file tree
Showing 11 changed files with 170 additions and 79 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@ ENV PATH="${VENV}/bin:$PATH"

# Install from PyPI
RUN pip install --upgrade pip
RUN pip install crispr-ambiguous-mapping==0.0.148
RUN pip install crispr-ambiguous-mapping==0.0.149
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,22 @@
from ..processing import crispr_guide_counting
from ..processing import crispr_sequence_encoding
from ..parsing import reporter_umitools_fastq_parsing
from ..models.mapping_models import WhitelistReporterCountsResult


@typechecked
def get_whitelist_reporter_counts_from_umitools_output(whitelist_guide_reporter_df: pd.DataFrame, fastq_r1_fn: str, fastq_r2_fn: str, barcode_pattern_regex: Optional[str] = None, umi_pattern_regex: Optional[str] = None, revcomp_protospacer: bool = False, revcomp_surrogate: bool = True, revcomp_barcode: bool = True, surrogate_hamming_threshold_strict: Optional[int] = 10, barcode_hamming_threshold_strict: Optional[int] = 2, protospacer_hamming_threshold_strict: Optional[int] = 7, cores: int=1):
def get_whitelist_reporter_counts_from_umitools_output(whitelist_guide_reporter_df: pd.DataFrame,
fastq_r1_fn: str,
fastq_r2_fn: str,
barcode_pattern_regex: Optional[str] = None,
umi_pattern_regex: Optional[str] = None,
revcomp_protospacer: bool = False,
revcomp_surrogate: bool = True,
revcomp_barcode: bool = True,
surrogate_hamming_threshold_strict: Optional[int] = 10,
barcode_hamming_threshold_strict: Optional[int] = 2,
protospacer_hamming_threshold_strict: Optional[int] = 7,
cores: int=1) -> WhitelistReporterCountsResult:
#
# Get counts of observed FASTQ sequences
#
Expand Down
Original file line number Diff line number Diff line change
@@ -1,36 +1,37 @@
from dataclasses import dataclass
from typing import Counter as CounterType
from typing import Tuple, Optional, DefaultDict, Dict
from .types import *
import pandas as pd

@dataclass
class MatchSetWhitelistReporterObservedSequenceCounterSeriesResults:

# Storing as a dictionary
ambiguous_ignored_umi_noncollapsed_alleleseries_dict : Optional[DefaultDict[Tuple[str, Optional[str], Optional[str]], pd.Series]] = None
ambiguous_ignored_umi_collapsed_alleleseries_dict : Optional[DefaultDict[Tuple[str, Optional[str], Optional[str]], pd.Series]] = None
ambiguous_ignored_alleleseries_dict : Optional[DefaultDict[Tuple[str, Optional[str], Optional[str]], pd.Series]] = None
ambiguous_ignored_umi_noncollapsed_alleleseries_dict : Optional[GeneralAlleleCountSeriesDict] = None
ambiguous_ignored_umi_collapsed_alleleseries_dict : Optional[GeneralAlleleCountSeriesDict] = None
ambiguous_ignored_alleleseries_dict : Optional[GeneralAlleleCountSeriesDict] = None

ambiguous_accepted_umi_noncollapsed_alleleseries_dict : Optional[DefaultDict[Tuple[str, Optional[str], Optional[str]], pd.Series]] = None
ambiguous_accepted_umi_collapsed_alleleseries_dict : Optional[DefaultDict[Tuple[str, Optional[str], Optional[str]], pd.Series]] = None
ambiguous_accepted_alleleseries_dict : Optional[DefaultDict[Tuple[str, Optional[str], Optional[str]], pd.Series]] = None
ambiguous_accepted_umi_noncollapsed_alleleseries_dict : Optional[GeneralAlleleCountSeriesDict] = None
ambiguous_accepted_umi_collapsed_alleleseries_dict : Optional[GeneralAlleleCountSeriesDict] = None
ambiguous_accepted_alleleseries_dict : Optional[GeneralAlleleCountSeriesDict] = None

ambiguous_spread_umi_noncollapsed_alleleseries_dict : Optional[DefaultDict[Tuple[str, Optional[str], Optional[str]], pd.Series]] = None
ambiguous_spread_umi_collapsed_alleleseries_dict : Optional[DefaultDict[Tuple[str, Optional[str], Optional[str]], pd.Series]] = None
ambiguous_spread_alleleseries_dict : Optional[DefaultDict[Tuple[str, Optional[str], Optional[str]], pd.Series]] = None
ambiguous_spread_umi_noncollapsed_alleleseries_dict : Optional[GeneralAlleleCountSeriesDict] = None
ambiguous_spread_umi_collapsed_alleleseries_dict : Optional[GeneralAlleleCountSeriesDict] = None
ambiguous_spread_alleleseries_dict : Optional[GeneralAlleleCountSeriesDict] = None

# Storing as a dataframe
ambiguous_ignored_umi_noncollapsed_allele_df : pd.DataFrame = None
ambiguous_ignored_umi_collapsed_allele_df : pd.DataFrame = None
ambiguous_ignored_allele_df : pd.DataFrame = None
ambiguous_ignored_umi_noncollapsed_allele_df : Optional[pd.DataFrame] = None
ambiguous_ignored_umi_collapsed_allele_df : Optional[pd.DataFrame] = None
ambiguous_ignored_allele_df : Optional[pd.DataFrame] = None

ambiguous_accepted_umi_noncollapsed_allele_df : pd.DataFrame = None
ambiguous_accepted_umi_collapsed_allele_df : pd.DataFrame = None
ambiguous_accepted_allele_df : pd.DataFrame = None
ambiguous_accepted_umi_noncollapsed_allele_df : Optional[pd.DataFrame] = None
ambiguous_accepted_umi_collapsed_allele_df : Optional[pd.DataFrame] = None
ambiguous_accepted_allele_df : Optional[pd.DataFrame] = None

ambiguous_spread_umi_noncollapsed_allele_df : pd.DataFrame = None
ambiguous_spread_umi_collapsed_allele_df : pd.DataFrame = None
ambiguous_spread_allele_df : pd.DataFrame = None
ambiguous_spread_umi_noncollapsed_allele_df : Optional[pd.DataFrame] = None
ambiguous_spread_umi_collapsed_allele_df : Optional[pd.DataFrame] = None
ambiguous_spread_allele_df : Optional[pd.DataFrame] = None

# This ensures that any empty series are kept at None
def __setattr__(self, name, value):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from .error_models import GuideCountError
from .quality_control_models import QualityControlResult

from .types import *

@dataclass
class SingleInferenceMatchResultValue:
Expand Down Expand Up @@ -145,6 +145,6 @@ class CountInput:
@dataclass
class WhitelistReporterCountsResult:
all_match_set_whitelist_reporter_counter_series_results: AllMatchSetWhitelistReporterCounterSeriesResults
observed_guide_reporter_umi_counts_inferred: DefaultDict[Tuple[str,Optional[str],Optional[str]], dict]
observed_guide_reporter_umi_counts_inferred: GeneralMappingInferenceDict
quality_control_result: QualityControlResult
count_input: CountInput
74 changes: 72 additions & 2 deletions crispr-ambiguous-mapping/crispr_ambiguous_mapping/models/types.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
from typing import Union, List, Mapping, Tuple, Optional, Any, DefaultDict
from typing import Union, List, Mapping, Tuple, Optional, Any, DefaultDict, Dict
from typing import Counter as CounterType
from .mapping_models import InferenceResult
import pandas as pd


# Sequence Count Result Objects
ProtospacerCounter = CounterType[str]
ProtospacerSurrogateCounter = CounterType[Tuple[str, str]]
ProtospacerBarcodeCounter = CounterType[Tuple[str, str]]
Expand All @@ -18,4 +22,70 @@
ProtospacerDictUMICounter,
ProtospacerSurrogateDictUMICounter,
ProtospacerBarcodeDictUMICounter,
ProtospacerSurrogateBarcodeDictUMICounter]
ProtospacerSurrogateBarcodeDictUMICounter]





# Inference Result Object
ProtospacerSurrogateBarcodeMappingInferenceDict = DefaultDict[Tuple[str,str,str], Dict[InferenceResult]]
ProtospacerSurrogateMappingInferenceDict = DefaultDict[Tuple[str,str], Dict[InferenceResult]]
ProtospacerBarcodeMappingInferenceDict = DefaultDict[Tuple[str,str], Dict[InferenceResult]]
ProtospacerMappingInferenceDict = DefaultDict[str, Dict[InferenceResult]]

GeneralMappingInferenceDict = Union[ProtospacerSurrogateBarcodeMappingInferenceDict,
ProtospacerSurrogateMappingInferenceDict,
ProtospacerBarcodeMappingInferenceDict,
ProtospacerMappingInferenceDict]


# Mapping Count Dict Object

ProtospacerSurrogateBarcodeMatchCountDict = DefaultDict[Tuple[str, str, str], Union[int, float]]
ProtospacerSurrogateMatchCountDict = DefaultDict[Tuple[str, str], Union[int, float]]
ProtospacerBarcodeMatchCountDict = DefaultDict[Tuple[str, str], Union[int, float]]
ProtospacerMatchCountDict = DefaultDict[str, Union[int, float]]
GeneralMatchCountDict = Union[ProtospacerSurrogateBarcodeMatchCountDict,
ProtospacerSurrogateMatchCountDict,
ProtospacerBarcodeMatchCountDict,
ProtospacerMatchCountDict]

ProtospacerSurrogateBarcodeMismatchCountDict = DefaultDict[Tuple[Tuple[str, str, str], Tuple[str, str, str]], Union[int, float]]
ProtospacerSurrogateMismatchCountDict = DefaultDict[Tuple[Tuple[str, str], Tuple[str, str]], Union[int, float]]
ProtospacerBarcodeMismatchCountDict = DefaultDict[Tuple[Tuple[str, str], Tuple[str, str]], Union[int, float]]
ProtospacerMismatchCountDict = DefaultDict[Tuple[str, str], Union[int, float]]

GeneralMismatchCountDict = Union[ProtospacerSurrogateBarcodeMismatchCountDict,
ProtospacerSurrogateMismatchCountDict,
ProtospacerBarcodeMismatchCountDict,
ProtospacerMismatchCountDict]

# Allele nested dict Object (Key of first dict is inferred, key of second dict is observed, value of second dict is count)
ProtospacerSurrogateBarcodeAlleleDict = DefaultDict[Tuple[str, str, str], DefaultDict[Tuple[str, str, str], Union[int, float]]]
ProtospacerSurrogateAlleleDict = DefaultDict[Tuple[str, str], DefaultDict[Tuple[str, str], Union[int, float]]]
ProtospacerBarcodeAlleleDict = DefaultDict[Tuple[str, str], DefaultDict[Tuple[str, str], Union[int, float]]]
ProtospacerAlleleDict = DefaultDict[str, DefaultDict[str, Union[int, float]]]

GeneralAlleleDict = Union[ProtospacerSurrogateBarcodeAlleleDict,
ProtospacerSurrogateAlleleDict,
ProtospacerBarcodeAlleleDict,
ProtospacerAlleleDict]




# Allele Count Series Dict
ProtospacerSurrogateBarcodeAlleleCountSeriesDict = DefaultDict[Tuple[str, str, str], pd.Series]
ProtospacerSurrogateAlleleCountSeriesDict = DefaultDict[Tuple[str, str], pd.Series]
ProtospacerBarcodeAlleleCountSeriesDict = DefaultDict[Tuple[str, str], pd.Series]
ProtospacerAlleleCountSeriesDict = DefaultDict[str, pd.Series]

GeneralAlleleCountSeriesDict = Union[ProtospacerSurrogateBarcodeAlleleCountSeriesDict,
ProtospacerSurrogateAlleleCountSeriesDict,
ProtospacerBarcodeAlleleCountSeriesDict,
ProtospacerAlleleCountSeriesDict]




Loading

0 comments on commit 6ccbea1

Please sign in to comment.