-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathHENe_052315.R
132 lines (111 loc) · 4.38 KB
/
HENe_052315.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
###### METHOD 2: Heterozygote excess Ne ######
# missing: removing samples with allele frequency < X
# missing: confidence intervals / bootstrapping
#' @title getSampleSize_byLocus
#' @description Takes a genind object and returns the number of samples occurring at each locus
#'
#' @author George Shirreff <georgeshirreff@@gmail.com>
#'
#' @examples library(adegenet)
#' @examples data(nancycats)
#' @examples getSampleSize_byLocus(nancycats)
#'
getSampleSize_byLocus <- function(genind_obj)
{
K<-length([email protected])
ends<-cumsum([email protected])
starts<-c(0,ends[-length(ends)])+1
sample_sizes<-numeric(K)
#i=4
for (i in 1:K){
sample_sizes[i]<-sum(na.rm=T,genind_obj@tab[,starts[i]:ends[i]])
}
return(sample_sizes)
}
#' @title Removing alleles below critical frequency
#' @description Takes a genind object, removes alleles below a certain allele frequency, and returns a list of genind objects, one object per locus
#'
#' @author Christine Ewers-Saucedo <ewers.christine@@gmail.com>
#'
#' @param genObj genind object
#' @param crit the lowest allele frequency allowed to occur. Alleles at a lower frequency are excluded.
#'
#' @examples library(adegenet)
#' @examples data(nancycats)
#' @examples makeCrit(nancycats)
#'
makeCrit <- function(genObj, crit) {
# testing the function
#genObj <- nancycats
#j <- 1
#crit <- 0.05
genind.by.locus <- seploc(genObj) # separates the genind object by locus
p <- makeP(genind.by.locus) # makes list with each list element containing the allele frequencies for one locus
K <- length(genind.by.locus) # number of loci
pp <- 0
for (j in 1:K) {
pp <- p[[j]] >= crit # boolean vector (TRUE FALSE TRUE etc)
genCrit <- genind.by.locus
genCrit[[j]]@tab <- genind.by.locus[[j]]@tab[,pp] # subsets each locus to include only alleles above critical allele frequency
rs <- rowSums(genCrit[[j]]@tab, na.rm = T)
for (k in 1:nrow(genCrit[[j]]@tab)) {
if ((rs[k]) < 2) {
genCrit[[j]]@tab[k,] <- NA
}
}
genCrit[[j]]@loc.nall <- ncol(genCrit[[j]]@tab)
genCrit[[j]]@loc.fac <- genind.by.locus[[j]]@loc.fac[pp]
genCrit[[j]]@all.names[[1]] <- genind.by.locus[[j]]@all.names[[1]][pp]
}
return(genCrit)
}
#' @title HENe
#' @description Calculates population size from a genind object by heterozygote excess
#'
#' @author Christine Ewers-Saucedo <ewers.christine@@gmail.com>, George Shirreff <georgeshirreff@@gmail.com>
#'
#' @examples library(adegenet)
#' @examples data(nancycats)
#' @examples HENe(nancycats)
#'
library(adegenet)
data(nancycats)
genind_obj <- nancycats
HENe <- function(genind_obj, crit) {
genind.by.locus <- makeCrit(genind_obj, crit)
K <- length(genind.by.locus) # number of loci
sample_sizes <- numeric(K)
for (i in 1:K) {
sample_sizes[i] <- sum(genind.by.locus[[i]]@tab, na.rm=T) # alleles sampled per locus
}
# generating empty files
num.alleles.per.locus <- numeric(K)
w <- numeric(K)
d.per.locus <- numeric(K)
dw <- numeric(K)
p <- vector("list", length=K)
num.allele <- vector("list", length=K)
Hexp <- vector("list", length=K)
no.het.per.allele <- vector("list", length=K)
Hobs <- vector("list", length=K)
d <- vector("list", length=K)
for (j in 1:K) {
L <- ncol(genind.by.locus[[j]]@tab) # number of alleles per locus
num.alleles.per.locus[j] <- sum(genind.by.locus[[j]]@tab, na.rm=T) # total number of alleles for locus 1
num.allele[[j]]<-p[[j]]<-Hexp[[j]]<-Hobs[[j]]<-no.het.per.allele[[j]]<-d[[j]]<-numeric(L)
for (i in 1:L) {
num.allele[[j]][i] <- sum(genind.by.locus[[j]]@tab[,i], na.rm=T) # number of occurrences of allele i of locus j
p[[j]][i] <- num.allele[[j]][i] / num.alleles.per.locus[j] # allele frequency
Hexp[[j]][i] <- 2*p[[j]][i]*(1-p[[j]][i])*sample_sizes[j] / (sample_sizes[j]-1) # not totally sure why this is normalized by sample size -> weighing?
no.het.per.allele[[j]][i] <- sum(genind.by.locus[[j]]@tab[,i] == 1, na.rm=T)
Hobs[[j]][i] <- no.het.per.allele[[j]][i] / sample_sizes[j]
d[[j]][i] <- (Hobs[[j]][i]-Hexp[[j]][i]) / Hexp[[j]][i]
}
w[j] <- sqrt(sample_sizes[j]/2)*([email protected][j]-1) / [email protected][j] # weights are the number of alleles per locus
d.per.locus[j] <- sample_sizes[j]/2*sum(d[[j]])
dw[j] <- d.per.locus[j]*w[j]
}
D <- sum(dw)/sum(w)
Nb <- 1/2*D + 1/(2*(D+1))
return(Nb)
}