-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtemporalNe.R
242 lines (205 loc) · 8.51 KB
/
temporalNe.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
### METHOD 3: Temporal method ###
require(adegenet)
#' @title varNe_point
#' @description Calculates population size from a genind object by calculating the variance of allele frequencies between generations.
#' @description The variance F is inversely proportional to the effective population size Ne.
#' @description This function calculates F based on Jorde and Ryman 2007. They state hat low frequency alleles are not problematic / do not bias Ne estimates, thus they are not removed here.
#'
#' @param genObj A genind object with years as populations.
#'
#' @return a vector with
#'
#' @author Christine Ewers-Saucedo <ewers.christine@@gmail.com>
#'
#' @examples library(adegenet)
#' @examples data(nancycats)
#' @examples varNe(nancycats)
#'
genObj <- simG3
t <- c(0,1,2) # provided by the user, the generations at which the samples were taken
varNe_point <- function(genObj, t = seq(1:length([email protected]))) {
G <- length([email protected]) # number of generations sampled
no.com <- G*(G-1)/2 # number of possible comparisons between two generations
genPopObj <- genind2genpop(genObj)
f <- makefreq(genPopObj) # columns are populations/generations. Allele freq per locus per pop are calculated
K <- [email protected] # number of alleles per loci overall
sum.K <- sum(K)
L <- length(K) # number of loci
genind.by.loc <- seploc(genObj) # one genind object per locus as list elements
loc.pop <- vector("list", length=L)
loc.pop.s <- matrix(0, nrow=G, ncol=L)
for (i in 1:L) {
loc.pop[[i]] <- genind2genpop(genind.by.loc[[i]]) # one list element per locus
loc.pop.s[,i] <- rowSums(loc.pop[[i]]@tab)/2 # number of ind. sampled per generation. Matrix with loci in columns, generations in rows
}
dif <- ff <- z <- matrix(0, nrow=as.numeric(paste(G,G-1,sep="")), ncol=ncol(f))
s <- matrix(0, nrow=as.numeric(paste(G,G-1,sep="")), ncol=ncol(loc.pop.s))
counter <- counter.j <- counter2 <- time.gen <- 0
for (i in 1:(G-1)) {
j <- i
repeat {
j <- j+1
dif[as.numeric(paste(i,j,sep="")),] <- sqrt((f[i,]-f[j,])^2) # allele frequency differences between generations
z[as.numeric(paste(i,j,sep="")),] <- (f[i,]+f[j,])/2 * (1-(f[i,]+f[j,])/2) # average allele frequency?
#ff[as.numeric(paste(i,j,sep="")),] <- (f[i,] != 0) & f[j,] != 0
s[as.numeric(paste(i,j,sep="")),] <- 1/(0.5*(1/loc.pop.s[i,]+1/loc.pop.s[j,]))
time.gen[as.numeric(paste(i,j,sep=""))] <- t[j]-t[i]
counter[paste(i,j,sep="")] <- paste(i,j,sep="")
counter2[paste(i,j,sep="")] <- paste(i,j,sep="-")
counter.j[paste(i,j,sep="")] <- j
if (j == G) break}
}
# remove empty columns which do not contain comparisons
counter <- as.numeric(counter[-1])
counter2 <- counter2[-1]
counter.j <- counter.j[-1]
dif <- dif[counter,]
#ff <- ff[counter,]
z <- z[counter,]
s <- s[counter,] # harmonic mean sample sizes: columns are loci, rows are comparisons
time.gen <- time.gen[counter]
# there are some NA in z. I do not know where they come from (likely missing data), but they need to be replaced
z[is.na(z)] <- 0
if (no.com == 1) {
for (j in 1:length(dif)) {
if (z[j] == 0) (dif[j] <- NA)
}
numerator <- sum(dif, na.rm=T) # sum allele frequency differences per comparison between generations (rows)
denominator <- sum(z, na.rm=T)
Fs <- numerator/denominator # single Fs value
S <- 1/(1/sum.K*sum(K/s, na.rm=T)) # harmonic mean sample size per comparison between two generations
s.per.gen <- 0
for (i in 1:G) {
s.per.gen[i] <- 1 / (1/sum.K * sum(K/loc.pop.s[i,], na.rm=T)) # mean of sample size per generation, weigthed by number of alleles per locus
}
}
if (no.com > 1 & L > 1) {
for (i in 1:nrow(dif)) {
for (j in 1:ncol(dif)) {
if (z[i,j] == 0) (dif[i,j] <- NA)
}
}
numerator <- rowSums(dif, na.rm=T) # sum allele frequency differences per comparison between generations (rows)
denominator <- rowSums(z, na.rm=T)
Fs <- numerator/denominator # one Fs value per generation comparison. Normally 2-10 values.
# generate weighted harmonic means of sample sizes
S <- s.per.gen <- 0
for (i in 1:no.com) {
S[i] <- 1/(1/sum.K*sum(K/s[i,], na.rm=T)) # harmonic mean sample size per comparison between two generations
}
for (i in 1:G) {
s.per.gen[i] <- 1/(1/sum.K * sum(K/loc.pop.s[i,], na.rm=T)) # mean of sample size per generation, weigthed by number of alleles per locus
}
}
if (no.com > 1 & L == 1) {
for (i in 1:nrow(dif)) {
for (j in 1:ncol(dif)) {
if (z[i,j] == 0) (dif[i,j] <- NA)
}
}
numerator <- rowSums(dif, na.rm=T) # sum allele frequency differences per comparison between generations (rows)
denominator <- rowSums(z, na.rm=T)
Fs <- numerator/denominator # one Fs value per generation comparison. Normally 2-10 values.
# generate weighted harmonic means of sample sizes
S <- s.per.gen <- 0
for (i in 1:no.com) {
S[i] <- 1/(1/sum.K*sum(K/s[i], na.rm=T)) # harmonic mean sample size per comparison between two generations
}
for (i in 1:G) {
s.per.gen[i] <- 1/(1/sum.K * sum(K/loc.pop.s[i,], na.rm=T)) # mean of sample size per generation, weigthed by number of alleles per locus
}
}
# calculate F "slash", one value per comparison between generations
Fslash <- 0
for (i in 1:no.com) {
j <- as.numeric(counter.j[i])
Fslash[i] <- (1/((1+Fs[i]/4)*(1-1/(2*s.per.gen[j])))) * (Fs[i]*(1-(1/(4*S[i])))-1/S[i])
}
#Fslash
Ne <- data.frame(between.generations=counter2, Ne=(time.gen/(2*Fslash)))
#Ne
return(Ne)
}
# jackknifing function
#' @title jackknifing samples
#' @description jackknifes samples
#'
#' @param bs jackknifed populations (list of genind objects)
#' @param Ne_point output of function varNe_point. Matrix containing point estimates of Ne, one estimate per comparison
#' @param statistic function to be employed, here varNe_point
#'
#' @return a list. Each element is a jackknifed genind object
#'
#' @author Christine Ewers-Saucedo <ewers.christine@@gmail.com>
#'
sum_jack2 <- function (Ne_point, bs, statistic) {
nreps <- length(bs)
stats <- lapply(bs, statistic)
ncom <- nrow(stats[[1]])
s <- vector("list", length=ncom)
for (i in 1:ncom) {
for (j in 1:nreps) {
s[[i]][j] <- stats[[j]][i,2]
}
}
o <- matrix(0, nrow=ncom, ncol=4)
for (i in 1:ncom) {
o[i,] <- c(median(s[[i]]), quantile(s[[i]], c(0.025, 0.975), na.rm = TRUE), var(s[[i]]))
}
oo <- data.frame(Ne_point, o)
colnames(oo) <- c("between.generations","point.estimate", "median", "0.025", "0.975", "variance")
rownames(oo) <- rownames(stats[[1]])
return(oo)
}
# UNDER DEVELOPMENT ###
jacknife_populations <- function(genObj) {
nsam <- as.vector(table(genObj@pop))
for (i in 1:length([email protected])) {
sub.pop <- genObj[genObj@pop == [email protected][i]]
x <- vector("list", length=nsam)
for (i in 1:nsam) {
x[[i]] <- sub.pop[-i]
}
}
return(x)
}
####
#' @title jackknifing samples
#' @description jackknifes samples
#'
#' @param genObj A genind object with years as populations.
#'
#' @return a list. Each element is a jackknifed genind object
#'
#' @author Christine Ewers-Saucedo <ewers.christine@@gmail.com>
#'
jack_samples <- function(genObj) {
nsam <- nrow(genObj@tab)
x <- vector("list", length=nsam)
for (i in 1:nsam) {
x[[i]] <- genObj[-i]
}
return(x)
}
# combine point estimate and jackknifing summaries
#' @title varNe
#' @description Calculates effective population size from a genind object by calculating the variance of allele frequencies between generations, and jackknifes the samples.
#' @description The variance F is inversely proportional to the effective population size Ne.
#' @description This function calculates F based on Jorde and Ryman 2007. They state hat low frequency alleles are not problematic / do not bias Ne estimates, thus they are not removed here.
#'
#' @param genObj A genind object with years as populations.
#'
#' @return a data.frame. Each column reports the result for a comparison of two generations,
#' @return and the rows correspond to the generations compared, the point estimate of variance Ne, and the summary statistics of the jackknifing (median, 0.025 quantile, 0.975 quantile, variance)
#'
#' @author Christine Ewers-Saucedo <ewers.christine@@gmail.com>
#'
#' @examples data(simG3)
#' @examples varNe(simG3)
#'
varNe <- function(genObj) {
Ne_point <- varNe_point(genObj)
jn <- jack_samples(genObj)
Ne_jk <- sum_jack2(Ne_point, jn, varNe_point)
return(Ne_jk)
}