From 48aa3503a059d67e191a96f99e9fb82dd42d8851 Mon Sep 17 00:00:00 2001 From: Jonatan Date: Tue, 3 Oct 2023 22:01:33 +0200 Subject: [PATCH] Removed unnecessary flags. Added check for translation beyond utr. Moved proximal variants to flags column. --- bin/epitopes.py | 4 +--- bin/variant_effect.py | 12 ++++++++---- bin/variants.py | 3 ++- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/bin/epitopes.py b/bin/epitopes.py index 22e6caf..73b731c 100755 --- a/bin/epitopes.py +++ b/bin/epitopes.py @@ -19,11 +19,10 @@ def create_epitope_varcode(chrm, start, ref, alt, db, mut_dna, mut_aa, transcrip errors += ' Could not infer the effect.' else: # Retrieve effect type - protein_mut = effect.short_description + protein_mut = effect.short_description if protein_mut is None: errors += ' Could not retrieve AA mutation.' elif not protein_mut.startswith('p.'): - errors += ' Computed with dictionary method.' errors += ' Invalid mutation {}.'.format(protein_mut) aa_pos = int(re.findall(r'\d+', mut_aa)[0]) if mut_aa != '' else 0 cDNA_pos = int(re.findall(r'\d+', mut_dna)[0]) if mut_dna != '' else 0 @@ -63,7 +62,6 @@ def create_epitope_varcode(chrm, start, ref, alt, db, mut_dna, mut_aa, transcrip if effect.mutant_protein_sequence is None or effect.original_protein_sequence is None: errors += ' Could not retrieve protein sequence.' else: - errors += ' Computed with varcode method.' # Type of effect effect_type = type(effect).__name__ if 'StopLoss' in effect_type: diff --git a/bin/variant_effect.py b/bin/variant_effect.py index 9493352..95a438a 100644 --- a/bin/variant_effect.py +++ b/bin/variant_effect.py @@ -11,7 +11,6 @@ def translate_dna(seq): return translate(seq, to_stop=True) - def missense_variant(starts, ends, wt_mer, mut_mer, errors, mut_dna, mut_aa, transcript, cDNA_pos, aa_pos, cDNA_dict, AA_dict): if 'delins' in mut_dna: return errors, wt_mer, mut_mer @@ -70,9 +69,9 @@ def frameshift_variant(ref, starts, ends, wt_mer, mut_mer, errors, mut_dna, mut_ wt_mer = [protein_seq[x:y] for x, y in zip(start, end)] if 'del' in mut_dna: fs = len(ref) - mut_cDNA = cDNA_seq[:cDNA_pos - 1] + cDNA_seq[cDNA_pos + fs - 1:] - mut_fasta = str(translate_dna(mut_cDNA.replace(' ', ''))) - mut_mer = [mut_fasta[x:] for x in start] + mut_fasta = cDNA_seq[:cDNA_pos - 1] + cDNA_seq[cDNA_pos + fs - 1:] + mut_protein = str(translate_dna(mut_fasta.replace(' ', ''))) + mut_mer = [mut_protein[x:] for x in start] elif 'dup' in mut_dna: dup_pos = [None, None] dup_pos = list(map(int, re.findall(r'\d+', mut_dna))) if mut_dna != '' else 0 @@ -84,6 +83,11 @@ def frameshift_variant(ref, starts, ends, wt_mer, mut_mer, errors, mut_dna, mut_ mut_fasta = cDNA_seq[:cDNA_pos] + ins_seq + cDNA_seq[cDNA_pos:] mut_protein = translate_dna(mut_fasta) mut_mer = [mut_protein[x:y] for x, y in zip(start, end)] + + cds_utr_protein = translate(mut_fasta) + if "*" in cds_utr_protein and cds_utr_protein[-1] != "*": + errors += " Translation goes beyond the 3'-UTR." + return errors, wt_mer, mut_mer diff --git a/bin/variants.py b/bin/variants.py index 8287b37..8c100d8 100755 --- a/bin/variants.py +++ b/bin/variants.py @@ -51,7 +51,7 @@ def proximal_variants(vcf, chromosome, start, end, alt, flanking_bases): if entry.start == start and entry.stop == end and entry.alts[0] == alt: continue else: - proximal_variants += f"{entry.chrom}:{entry.pos}-{entry.ref}>{entry.alts[0]}. " + proximal_variants += f" {entry.chrom}:{entry.pos}-{entry.ref}>{entry.alts[0]}. " return proximal_variants @@ -372,6 +372,7 @@ def filter_variants_dna(file, normal_coverage, tumor_coverage, tumor_var_depth, variant.num_callers = len(filtered) variant.status = pass_snp >= num_callers or pass_indel >= num_callers_indel variant.epitopes = variant_epitopes + variant.epitopes.flags = proximal_vars variant.dbsnp = avsnp150 variant.gnomad = gnomad_AF variant.cosmic = cosmic70