-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmissing_freq.py
42 lines (40 loc) · 1.3 KB
/
missing_freq.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
#### missing genotypes are 'imputed' as mean genotype value for the rest of the population #####
#### missing genotypes are frequency for which depth = count = 0 ####
#module load system/Python-3.6.3
#python
import csv
import sys
import os
from functools import reduce
import glob
import numpy as np
import pandas as pd
import statistics
w=open(sys.argv[2],'w')
#w=open('work/GWAS/BeeStrongHAV3_1/freq_imputed.txt','w')
#for f in open('work/GWAS/BeeStrongHAV3_1/freq.txt'):
for f in open(sys.argv[1]):
freq=f.strip('\n').split(' ')
if ('BS' in freq[6]):
head=str(freq).strip('[').strip(']').replace(',','').replace("'",'')
w.write(str(head)+'\n')
else:
Freq=np.array(freq)
Freq=Freq[4:len(Freq)]
p_miss=(Freq=='nan').sum()/len(Freq)
if (p_miss<0.05):
f_cor=[x for x in Freq if x != 'nan' ]
f_cor=[float(item) for item in f_cor]
m_f_cor=sum(f_cor)/len(f_cor)
Freq2=[m_f_cor if x == 'nan' else x for x in Freq]
freq_uniq=set(Freq2)
# if (len(freq_uniq)==1):
# print(freq[0]+' '+freq[1])
# elif (max(Freq2)>1):
Freq2=[float(item) for item in Freq2]
if (max(Freq2)>1):
print(freq[0]+' '+freq[1]+' still freq error')
else:
s=freq[0]+' '+freq[1]+' '+freq[2]+' '+freq[3]+' '+str(Freq2).strip('[').strip(']').replace(',','').replace("'",'')
w.write(str(s)+'\n')
w.close()