From 5d063fffaae27ab565af9161754a49ba88447933 Mon Sep 17 00:00:00 2001 From: Robert Petit Date: Sun, 25 Aug 2024 11:52:59 -0600 Subject: [PATCH] move params to the YAML files --- CHANGELOG | 35 +++++++++++++++++++++++++++++++++++ bin/sccmec | 31 +++++++++++++++++++++++-------- bin/sccmec-bioconda | 9 +++++++++ data/sccmec-regions.yaml | 3 +++ data/sccmec-targets.yaml | 3 +++ 5 files changed, 73 insertions(+), 8 deletions(-) create mode 100644 bin/sccmec-bioconda diff --git a/CHANGELOG b/CHANGELOG index 73940c3..6f2ffc2 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,15 @@ # Changelog +## v1.2.0 rpetit3/sccmec "" 2024/08/25 + +- Utilize both targets and full cassettes for classification +- update default thresholds based on `camlhmp-blast-thresholds` + - `--min-targets-pident` 90 + - `--min-targets-coverage` 80 + - `--min-regions-pident` 85 + - `--min-regions-coverage` 83 +- default values are set in YAMLs + ## v1.1.0 rpetit3/sccmec "USA300" 2024/08/15 - Update for latest camlhmp changes @@ -7,3 +17,28 @@ ## v1.0.0 rpetit3/sccmec "MRSA" 2024/04/30 - Initial release + +@click.option( + "--min-targets-pident", + default=90, + show_default=True, + help="Minimum percent identity of targets to count a hit", +) +@click.option( + "--min-targets-coverage", + default=80, + show_default=True, + help="Minimum percent coverage of targets to count a hit", +) +@click.option( + "--min-regions-pident", + default=85, + show_default=True, + help="Minimum percent identity of regions to count a hit", +) +@click.option( + "--min-regions-coverage", + default=83, + show_default=True, + help="Minimum percent coverage of regions to count a hit", +) diff --git a/bin/sccmec b/bin/sccmec index f91e456..6cdcb30 100755 --- a/bin/sccmec +++ b/bin/sccmec @@ -72,7 +72,7 @@ click.rich_click.OPTION_GROUPS = { "--yaml-targets", "-yt", required=True, - default=os.environ.get("CAML_YAML", None), + default=os.environ.get("SCCMEC_TARGETS_YAML", None), show_default=True, help="YAML file documenting the targets and types", ) @@ -80,7 +80,7 @@ click.rich_click.OPTION_GROUPS = { "--yaml-regions", "-yr", required=True, - default=os.environ.get("CAML_YAML", None), + default=os.environ.get("SCCMEC_REGIONS_YAML", None), show_default=True, help="YAML file documenting the regions and types", ) @@ -88,7 +88,7 @@ click.rich_click.OPTION_GROUPS = { "--targets", "-t", required=False if "--version" in sys.argv else True, - default=os.environ.get("CAML_TARGETS", None), + default=os.environ.get("SCCMEC_TARGETS_FASTA", None), show_default=True, help="Query targets in FASTA format", ) @@ -96,7 +96,7 @@ click.rich_click.OPTION_GROUPS = { "--regions", "-r", required=False if "--version" in sys.argv else True, - default=os.environ.get("CAML_TARGETS", None), + default=os.environ.get("SCCMEC_REGIONS_FASTA", None), show_default=True, help="Query regions in FASTA format", ) @@ -118,25 +118,25 @@ click.rich_click.OPTION_GROUPS = { ) @click.option( "--min-targets-pident", - default=95, + default=90, show_default=True, help="Minimum percent identity of targets to count a hit", ) @click.option( "--min-targets-coverage", - default=95, + default=80, show_default=True, help="Minimum percent coverage of targets to count a hit", ) @click.option( "--min-regions-pident", - default=95, + default=85, show_default=True, help="Minimum percent identity of regions to count a hit", ) @click.option( "--min-regions-coverage", - default=95, + default=83, show_default=True, help="Minimum percent coverage of regions to count a hit", ) @@ -219,6 +219,21 @@ def sccmec( file_exists_error(regions_blast_tsv, force) file_exists_error(regions_details_tsv, force) + # Check if params are set in the YAML (only change if not set on the command line) + if "--min-targets-pident" not in sys.argv: + if "min_pident" in targets_framework["engine"]["params"]: + min_pident = targets_framework["engine"]["params"]["min_pident"] + if "--min-targets-coverage" not in sys.argv: + if "min_coverage" in targets_framework["engine"]["params"]: + min_coverage = targets_framework["engine"]["params"]["min_coverage"] + + if "--min-regions-pident" not in sys.argv: + if "min_pident" in regions_framework["engine"]["params"]: + min_pident = regions_framework["engine"]["params"]["min_pident"] + if "--min-regions-coverage" not in sys.argv: + if "min_coverage" in regions_framework["engine"]["params"]: + min_coverage = regions_framework["engine"]["params"]["min_coverage"] + # Describe the command line arguments console = rich.console.Console(stderr=True) print( diff --git a/bin/sccmec-bioconda b/bin/sccmec-bioconda new file mode 100644 index 0000000..b7adc1a --- /dev/null +++ b/bin/sccmec-bioconda @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +sccmec_dir=$(dirname $0) + +SCCMEC_TARGETS_YAML="${sccmec_dir}/../share/sccmec/sccmec-targets.yaml" \ +SCCMEC_TARGETS_FASTA="${sccmec_dir}/../share/sccmec/sccmec-targets.fasta" \ +SCCMEC_REGIONS_YAML="${sccmec_dir}/../share/sccmec/sccmec-regions.yaml" \ +SCCMEC_REGIONS_FASTA="${sccmec_dir}/../share/sccmec/sccmec-regions.fasta" \ + sccmec-main \ + "${@:1}" diff --git a/data/sccmec-regions.yaml b/data/sccmec-regions.yaml index d51e340..b7c3812 100644 --- a/data/sccmec-regions.yaml +++ b/data/sccmec-regions.yaml @@ -13,6 +13,9 @@ metadata: engine: type: "blast" # The type of engine to use tool: blastn # The tool used to generate the data + params: + min_pident: 85 # The minimum percent identity for a hit + min_coverage: 83 # The minimum coverage for a hit # targets provides a list of sequence targets (primers, genes, proteins, etc...) targets: diff --git a/data/sccmec-targets.yaml b/data/sccmec-targets.yaml index d294b5d..e8da48f 100644 --- a/data/sccmec-targets.yaml +++ b/data/sccmec-targets.yaml @@ -13,6 +13,9 @@ metadata: engine: type: "blast" # The type of engine to use tool: blastn # The tool used to generate the data + params: + min_pident: 90 # The minimum percent identity for a hit + min_coverage: 80 # The minimum coverage for a hit # targets provides a list of sequence targets (primers, genes, proteins, etc...) targets: