-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfits_xyz_only.R
executable file
·143 lines (120 loc) · 4.89 KB
/
fits_xyz_only.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
#!/usr/bin/Rscript
require(Matrix)
require(dplyr)
require(xyz)
## Uncomment to use modified xyz
#library(Rcpp)
#source('./xyz/R/regression.R')
#source('./xyz/R/search.R')
#source('./xyz/R/xyz.R')
#sourceCpp('./xyz/src/core.cpp')
verbose <- TRUE
lethal_coef <- -1000
#args <- commandArgs(trailingOnly = TRUE)
#print(args)
#f <- args[1]
#L <- args[2] %>% as.numeric
#write_out <- args[3] == 'write'
f = "./simulated_data_large/n10000_p1000_SNR5_nbi500_nbij500_viol0_49127.rds"
L = 1000
write_out = FALSE
regression_alpha <- 1.0
n <- regmatches(x = f, m = regexpr(f, pattern = "(?<=n)\\d+(?=_)", perl = TRUE)) %>% as.numeric
p <- regmatches(x = f, m = regexpr(f, pattern = "(?<=_p)\\d+(?=_)", perl = TRUE)) %>% as.numeric
SNR <- regmatches(x = f, m = regexpr(f, pattern = "(?<=_SNR)\\d+(?=_)", perl = TRUE)) %>% as.numeric
num_bi <- regmatches(x = f, m = regexpr(f, pattern = "(?<=_nbi)\\d+(?=_)", perl = TRUE)) %>% as.numeric
num_bij <- regmatches(x = f, m = regexpr(f, pattern = "(?<=_nbij)\\d+(?=_)", perl = TRUE)) %>% as.numeric
num_lethals <- regmatches(x = f, m = regexpr(f, pattern = "(?<=_nlethals)\\d+(?=_)", perl = TRUE)) %>% as.numeric
perc_viol <- regmatches(x = f, m = regexpr(f, pattern = "(?<=_viol)\\d+(?=_)", perl = TRUE)) %>% as.numeric
ID <- regmatches(x = f, m = regexpr(f, pattern = "(?<=_)\\d[0-9a-z_]+(?=\\.rds)", perl = TRUE))
## not really necessary, but X is neater than data$X
#data <- readRDS(paste("simulated_lethal_data/", f, sep=''))
data <- readRDS(f)
X <- data$X
Y <- data$Y
obs <- data$obs
bi_ind <- data$bi_ind
bij_ind <- data$bij_ind
lethal_ind <- data$lethal_ind
if (L == -1) {
if(verbose) cat("using L= sqrt(p)\n")
L <- ceiling(sqrt(p))
}
data <- NULL
gc()
## Fit model, wip: use xyz
if (verbose) cat("Fitting model\n")
X[X == 0] <- -1
time <- system.time(regression_results <- xyz_regression(X, Y %>% as.numeric, standardize=TRUE, standardize_response=TRUE, alpha=regression_alpha, L=L, n_lambda=20))
#time <- system.time(regression_results <- xyz_search(X, Y %>% as.numeric, L=L, N=2000))
if (verbose) cat("Collecting stats\n")
# Collect coefficients
fx_main <- data.frame(gene_i = regression_results[[1]][[10]]) %>% # for xyz_regression
#fx_main <- data.frame(gene_i = regression_results[[1]][regression_results[[1]][1,]==regression_results[[1]][2,]]) %>% # for xyz_search
arrange(gene_i) %>%
mutate(type = "main", gene_j = NA, TP = (gene_i %in% bi_ind[["gene_i"]] || gene_i %in% lethal_ind[["gene_i"]])) %>%
mutate(lethal=gene_i %in% lethal_ind[["gene_i"]]) %>%
select(gene_i, gene_j, type, TP, lethal) %>%
arrange(desc(TP)) %>%
arrange(desc(lethal)) %>%
tbl_df
fx_main
# remove reflexive results
first = unlist(split(regression_results[[3]][[10]], 1:2)[1]) # for xyz_regression
second = unlist(split(regression_results[[3]][[10]], 1:2)[2]) # for xyz_regression
#first = unlist(split(regression_results[[1]], 1:2)[1])
#second = unlist(split(regression_results[[1]], 1:2)[2])
nfirst = first[first != second]
nsecond = second[first != second]
fx_int <- data.frame (gene_i = nfirst,
gene_j = nsecond) %>%
arrange(gene_i) %>%
left_join(., obs, by = c("gene_i", "gene_j")) %>%
mutate(type = "interaction") %>%
rowwise %>%
left_join(., merge(bij_ind, lethal_ind, all=T), by = c("gene_i", "gene_j")) %>%
ungroup %>%
mutate(TP = !is.na(coef)) %>%
mutate(lethal = (coef == lethal_coef)) %>%
select(gene_i, gene_j, type, TP, lethal) %>%
arrange(desc(TP)) %>%
arrange(desc(lethal)) %>%
tbl_df
fx_int %>% data.frame
## Statistical test if b_i and b_ij are sig. > 0
Z <- cbind(X[,fx_main[["gene_i"]]])
for (i in 1:nrow(fx_int)) {
Z <- cbind(Z, X[,fx_int[i,][["gene_i"]], drop = FALSE] * X[,fx_int[i,][["gene_j"]], drop = FALSE])
}
Z <- as.matrix(Z)
colnames(Z) <- rownames(Z) <- NULL
Ynum <- as.numeric(Y)
fit_red <- lm(Ynum ~ Z)
pvals <- data.frame(id = 1:ncol(Z), coef = coef(fit_red)[-1]) %>%
filter(!is.na(coef)) %>%
data.frame(., pval = summary(fit_red)$coef[-1,4]) %>%
tbl_df
smry <- left_join(rbind(fx_main, fx_int) %>% data.frame(id = 1:nrow(.), .), pvals, by = "id") %>%
mutate(pval = ifelse(is.na(pval), 1, pval)) %>%
rename(coef.est = coef) %>%
left_join(., obs, by = c("gene_i", "gene_j"))
# Print time taken to actuall run xyz
if (verbose)
time
## Write out
if (write_out) {
if (verbose) cat(sprintf("saving to './fits_proper/n%d_p%d_SNR%d_nbi%d_nbij%d_nlethals%d_viol%d_L%d_%s.rds'\n", n, p, SNR, num_bi, num_bij, num_lethals, perc_viol, L, ID))
saveRDS(list(fit = regression_results,
bij = bij_ind,
bi = bi_ind,
obs = obs,
fx_int = fx_int,
fx_main = fx_main,
fit_red = fit_red,
smry = smry,
time = time),
file = sprintf("./fits_proper/n%d_p%d_SNR%d_nbi%d_nbij%d_nlethals%d_viol%d_L%d_%s.rds",
n, p, SNR, num_bi, num_bij, num_lethals, perc_viol, L, ID))
} else {
cat("Not saving\n")
}