-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #27 from haessar/feature-input-validation
Validate compatibility of GFF and BAM input files
- Loading branch information
Showing
7 changed files
with
103 additions
and
16 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
import logging | ||
|
||
import pysam | ||
|
||
from .utils import connect_db, index_bam_file | ||
|
||
|
||
def matching_chr(db_path, args): | ||
""" | ||
Check seqids in BAM and GFF input files to ensure at least one matches. Returns bool. | ||
""" | ||
db = connect_db(db_path) | ||
gff_chrs = {f.seqid for f in db.all_features()} | ||
bam_chrs = set() | ||
|
||
index_bam_file(args.BAM_IN, args.processors) | ||
samfile = pysam.AlignmentFile(args.BAM_IN, "rb", require_index=True) | ||
|
||
for chr in gff_chrs: | ||
try: | ||
samfile.fetch(chr) | ||
except ValueError: | ||
logging.warning("Chromosome {} from GFF_IN not found in BAM_IN.".format(chr)) | ||
else: | ||
bam_chrs.add(str(chr)) | ||
if len(bam_chrs) > 1: | ||
logging.warning( | ||
""" | ||
Chromosomes {} are present in both GFF_IN and BAM_IN. | ||
Consider reducing both to a single chromosome to improve performance. | ||
""".format(', '.join(bam_chrs)) | ||
) | ||
return bool(bam_chrs) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
[metadata] | ||
name = peaks2utr | ||
version = 1.2.3 | ||
version = 1.2.4 | ||
author = William Haese-Hill | ||
author_email = [email protected] | ||
description = A robust, parallelized Python CLI for annotating three_prime_UTR | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
import os | ||
import os.path | ||
import unittest | ||
from unittest.mock import MagicMock, patch | ||
|
||
import gffutils | ||
|
||
from peaks2utr import prepare_argparser | ||
from peaks2utr.validation import matching_chr | ||
|
||
TEST_DIR = os.path.dirname(__file__) | ||
|
||
|
||
class TestValidation(unittest.TestCase): | ||
def setUp(self): | ||
argparser = prepare_argparser() | ||
self.args = argparser.parse_args(["Chr1.gtf", ""]) | ||
self.db_path = os.path.join(TEST_DIR, "Chr1.db") | ||
gffutils.create_db(os.path.join(TEST_DIR, self.args.GFF_IN), self.db_path, force=True) | ||
|
||
def tearDown(self): | ||
os.remove(self.db_path) | ||
|
||
def test_matching_chr(self): | ||
mock_af = MagicMock() | ||
mock_af.fetch.return_value = object | ||
with patch("peaks2utr.validation.index_bam_file") as mock_index: | ||
with patch("pysam.AlignmentFile", return_value=mock_af): | ||
self.assertTrue(matching_chr(self.db_path, self.args)) | ||
mock_af.fetch.side_effect = ValueError() | ||
self.assertFalse(matching_chr(self.db_path, self.args)) | ||
self.assertEqual(mock_index.call_count, 2) | ||
|
||
|
||
if __name__ == '__main__': | ||
unittest.main() |