-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathfiltering.py
79 lines (43 loc) · 1.09 KB
/
filtering.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import re # for Regular expressions
import sys
from Bio import AlignIO
#infile=sys.argv[1]
infile = 'all.prunned.list'
with open(infile) as f:
reflist = f.read().splitlines()
f.close()
missing = '[(+*)]'
ambiguous = '[N]'
badsimiosNs = []
badsimiosAsk = []
goodsimios = []
for i in reflist:
myfile= open(i, 'r')
mydata = myfile.read()
fasta = AlignIO.read(i, "fasta")
myfile.close()
if fasta.get_alignment_length() > 350 :
maxNs = len(re.findall(ambiguous, str(mydata), re.I))
maxAsk = len(re.findall(missing, str(mydata), re.I))
if maxNs > 2:
print ')-B'
badsimiosNs.append(i)
elif maxAsk > 1:
print ')-;'
badsimiosAsk.append(i)
else:
print 'lol'
goodsimios.append(i)
thefile3 = open('goodalignments.txt', 'w')
for item in goodsimios:
thefile3.write("%s\n" % item)
thefile3.close()
thefile1 = open('ambiguous.txt', 'w')
for item in badsimiosNs:
thefile1.write("%s\n" % item)
thefile1.close()
thefile2 = open('asterisks.txt', 'w')
for item in badsimiosAsk:
thefile2.write("%s\n" % item)
thefile2.close()
######### Run this python script