-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathget_ZCRs_single_cov.py
103 lines (77 loc) · 2.32 KB
/
get_ZCRs_single_cov.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
__usage__ = """
python get_ZCRs_single_cov.py
--cov <FULL_PATH_TO_REFERENCE_COVERAGE_FILE>
--out <FULL_PATH_TO_OUTPUT_FILE>
"""
import sys
# --- end of imports --- #
def load_coverage( cov_file ):
"""! @brief load coverage from given file """
coverage = {}
with open( cov_file, "r" ) as f:
line = f.readline()
prev_chr = line.split('\t')[0]
cov = []
while line:
parts = line.strip().split('\t')
if parts[0] != prev_chr:
coverage.update( { prev_chr: cov } )
prev_chr = parts[0]
cov = []
cov.append( float( parts[2] ) )
line = f.readline()
coverage.update( { prev_chr: cov } )
return coverage
def find_ZCRs( cov, window_size=100 ):
"""! @brief find zero coverage regions """
ZCRs = []
for key in cov.keys():
r_cov = cov[ key ]
r_avg = sum( r_cov ) / float( len( r_cov ) )
print r_avg
start = 0
end = start + window_size
status = True
while status:
if end > len( r_cov ):
status = False
end = len( r_cov )
if start == end:
status = False
if sum( r_cov[ start:end ] ) / window_size < 0.05*r_avg:
in_status = True
zcr_start = 0 + start
zcr_end = 0 + start
while in_status:
start += window_size
end += window_size
zcr_end += window_size
if end > len( r_cov ):
end = len( r_cov )
zcr_end = len( r_cov )
in_status = False
if sum( r_cov[ start:end ] ) / window_size < 0.05*r_avg:
in_status = True
if end >= len( r_cov ):
in_status = False
else:
in_status = False
ZCRs.append( { 'chr': key, 'start': zcr_start, 'end': zcr_end, 'ref': sum( r_cov[ zcr_start:zcr_end ] ) / ( zcr_end - zcr_start ) } )
start += window_size
end += window_size
return ZCRs
def main( arguments ):
"""! @brief run everything """
cov_file = arguments[ arguments.index( '--cov' )+1 ]
output_file = arguments[ arguments.index( '--out' )+1 ]
cov = load_coverage( cov_file )
ZCRs = find_ZCRs( cov )
print "number of detected ZCRs: " + str( len( ZCRs ) )
with open( output_file, "w" ) as out:
out.write("Chr\tStart\tEnd\tRefCov\n")
for ZCR in ZCRs:
out.write( "\t".join( map( str, [ ZCR['chr'], ZCR['start'], ZCR['end'], ZCR['ref'] ] ) ) + '\n' )
if '--cov' in sys.argv and '--out' in sys.argv:
main( sys.argv )
else:
sys.exit( __usage__ )