From 523ba1b6ae4b751fb6644e4503eddf1d8ea6c8bb Mon Sep 17 00:00:00 2001 From: Robert A Petit III Date: Thu, 24 May 2018 16:34:36 -0400 Subject: [PATCH] added functions for storing agr primer based typing --- sample/tools.py | 3 + .../commands/insert_variant_reference.py | 6 +- variant/management/commands/update_snps.py | 73 +++++++++++++++++++ virulence/models.py | 29 ++++++++ virulence/tools.py | 11 ++- 5 files changed, 118 insertions(+), 4 deletions(-) create mode 100644 variant/management/commands/update_snps.py diff --git a/sample/tools.py b/sample/tools.py index 1aaaa2e..c5643bb 100644 --- a/sample/tools.py +++ b/sample/tools.py @@ -374,6 +374,7 @@ def get_file_list(is_paired): 'virulence_summary': '{0}/virulence/summary.csv', 'virulence_assembled_seqs': '{0}/virulence/assembled_seqs.fa.gz', 'virulence_debug_report': '{0}/virulence/debug.report.tsv', + 'virulence_agr_primers': '{0}/agr/primers.json', 'timeline': '{0}/staphopia-timeline.html', 'version': '{0}/staphopia-version.txt' @@ -429,6 +430,8 @@ def get_file_list(is_paired): 'variants': '{0}/variants/{1}.variants.vcf.gz', + 'virulence_agr_primers': '{0}/agr/primers.json', + 'timeline': '{0}/staphopia-timeline.html', 'version': '{0}/staphopia-version.txt' } diff --git a/variant/management/commands/insert_variant_reference.py b/variant/management/commands/insert_variant_reference.py index ff01627..736e9ec 100644 --- a/variant/management/commands/insert_variant_reference.py +++ b/variant/management/commands/insert_variant_reference.py @@ -198,11 +198,11 @@ def create_snp(self, record, reference, annotation, feature): reference_codon=( '.' if record.INFO['RefCodon'][0] is None - else record.INFO['RefCodon'][0] + else record.INFO['RefCodon'] ), alternate_codon=( '.' if record.INFO['AltCodon'][0] is None - else record.INFO['AltCodon'][0] + else record.INFO['AltCodon'] ), reference_amino_acid=( record.INFO['RefAminoAcid'] if record.INFO['RefAminoAcid'] @@ -222,7 +222,7 @@ def create_snp(self, record, reference, annotation, feature): ), amino_acid_change=( '.' if record.INFO['AminoAcidChange'][0] is None - else record.INFO['AminoAcidChange'][0] + else record.INFO['AminoAcidChange'] ), is_synonymous=record.INFO['IsSynonymous'], is_transition=record.INFO['IsTransition'], diff --git a/variant/management/commands/update_snps.py b/variant/management/commands/update_snps.py new file mode 100644 index 0000000..c5ffd73 --- /dev/null +++ b/variant/management/commands/update_snps.py @@ -0,0 +1,73 @@ +"""Update SNPs in the database.""" +import sys +from cyvcf2 import VCF +import time + +from django.db import connection, transaction +from django.db.utils import IntegrityError +from django.core.management.base import BaseCommand, CommandError + +from staphopia.utils import timeit +from variant.models import Reference, SNP + + +def get_reference_instance(reference_name): + """Get reference instance.""" + try: + return Reference.objects.get(name=reference_name) + except IntegrityError: + raise CommandError('Error getting/saving reference information') + + +class Command(BaseCommand): + """Update SNPs in the database.""" + + help = 'Update SNPs in the database.' + + def add_arguments(self, parser): + """Command line arguements.""" + parser.add_argument('input', metavar='INPUT_VCF', + help=('Annotated VCF formated file to ' + 'be inserted')) + parser.add_argument('--compressed', action='store_true', + help='Input VCF is gzipped.') + + def handle(self, *args, **opts): + """Insert results to database.""" + # Open VCF for reading + try: + vcf_reader = VCF(opts['input']) + except IOError: + raise CommandError('{0} does not exist'.format(input)) + + # Get reference info + reference_obj = get_reference_instance(vcf_reader.seqnames[0]) + + # Read through VCF + start_time = time.time() + count = 0 + with connection.cursor() as cursor: + for record in vcf_reader: + if record.is_snp: + sql = """UPDATE variant_snp + SET reference_codon='{0}', alternate_codon='{1}', + amino_acid_change='{2}' + WHERE reference_id={3} AND reference_position={4} AND + alternate_base='{5}';""".format( + '.' if record.INFO['RefCodon'][0] is None else record.INFO['RefCodon'], + '.' if record.INFO['AltCodon'][0] is None else record.INFO['AltCodon'], + '.' if record.INFO['AminoAcidChange'][0] is None else record.INFO['AminoAcidChange'], + reference_obj.pk, + record.POS, + record.ALT[0] + ) + cursor.execute(sql) + count += 1 + if count % 100000 == 0: + total_time = f'{time.time() - start_time:.2f}' + rate = f'{100000 / float(total_time):.2f}' + print(''.join([ + f'Processed 100k, Total {count} SNPs ', + f'(took {total_time}s, {rate} snp/s)' + ])) + start_time = time.time() diff --git a/virulence/models.py b/virulence/models.py index 8c4b19e..0702e7f 100644 --- a/virulence/models.py +++ b/virulence/models.py @@ -8,6 +8,7 @@ from django.contrib.postgres.fields import JSONField from sample.models import Sample +from staphopia.models import BlastQuery from version.models import Version @@ -40,3 +41,31 @@ class AribaSequence(models.Model): class Meta: unique_together = ('sample', 'version') + + +class AgrPrimers(models.Model): + """BLAST hits against SCCmec primers.""" + sample = models.ForeignKey(Sample, on_delete=models.CASCADE) + version = models.ForeignKey(Version, on_delete=models.CASCADE, + related_name='agr_primers_version') + query = models.ForeignKey(BlastQuery, on_delete=models.CASCADE) + contig = models.PositiveIntegerField() + + bitscore = models.PositiveSmallIntegerField() + evalue = models.DecimalField(max_digits=7, decimal_places=2) + identity = models.PositiveSmallIntegerField() + mismatch = models.PositiveSmallIntegerField() + gaps = models.PositiveSmallIntegerField() + hamming_distance = models.PositiveSmallIntegerField() + query_from = models.PositiveSmallIntegerField() + query_to = models.PositiveSmallIntegerField() + hit_from = models.PositiveIntegerField() + hit_to = models.PositiveIntegerField() + align_len = models.PositiveSmallIntegerField() + + qseq = models.TextField() + hseq = models.TextField() + midline = models.TextField() + + class Meta: + unique_together = ('sample', 'version', 'query') diff --git a/virulence/tools.py b/virulence/tools.py index 9e36a65..6c2608e 100644 --- a/virulence/tools.py +++ b/virulence/tools.py @@ -10,8 +10,10 @@ from django.db.utils import IntegrityError from django.core.management.base import CommandError +from assembly.tools import get_contigs +from sccmec.tools import insert_blast from staphopia.utils import read_fasta, timeit -from virulence.models import Ariba, AribaSequence, Cluster +from virulence.models import Ariba, AribaSequence, Cluster, AgrPrimers @timeit @@ -23,6 +25,13 @@ def insert_virulence(sample, version, files, force=False): results, sequences = read_report(files) + contigs = {} + for contig in get_contigs(sample, version): + contigs[contig.spades] = int(contig.spades.split('_')[1]) + + insert_blast(sample, version, files['virulence_agr_primers'], AgrPrimers, + contigs) + try: Ariba.objects.create(sample=sample, version=version, results=results) except IntegrityError as e: