-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfits_glinternet_only.R
executable file
·131 lines (112 loc) · 4.42 KB
/
fits_glinternet_only.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
#!/usr/bin/env Rscript
require(Matrix)
require(dplyr)
require(glinternet)
## Uncomment to use modified xyz
#library(Rcpp)
#source('./xyz/R/regression.R')
#source('./xyz/R/search.R')
#source('./xyz/R/xyz.R')
#sourceCpp('./xyz/src/core.cpp')
verbose <- TRUE
lethal_coef <- -1000
lambda_min_ratio = 5e-2
args <- commandArgs(trailingOnly = TRUE)
print(args)
#f <- args[1]
f <- "simulated_data/n1000_p100_SNR2_nbi0_nbij5_nlethals0_viol0_6493.rds"
#L <- args[2] %>% as.numeric
write_out <- args[2] == 'write'
regression_alpha <- 0.9
n <- regmatches(x = f, m = regexpr(f, pattern = "(?<=n)\\d+(?=_)", perl = TRUE)) %>% as.numeric
p <- regmatches(x = f, m = regexpr(f, pattern = "(?<=_p)\\d+(?=_)", perl = TRUE)) %>% as.numeric
SNR <- regmatches(x = f, m = regexpr(f, pattern = "(?<=_SNR)\\d+(?=_)", perl = TRUE)) %>% as.numeric
num_bi <- regmatches(x = f, m = regexpr(f, pattern = "(?<=_nbi)\\d+(?=_)", perl = TRUE)) %>% as.numeric
num_bij <- regmatches(x = f, m = regexpr(f, pattern = "(?<=_nbij)\\d+(?=_)", perl = TRUE)) %>% as.numeric
num_lethals <- regmatches(x = f, m = regexpr(f, pattern = "(?<=_nlethals)\\d+(?=_)", perl = TRUE)) %>% as.numeric
perc_viol <- regmatches(x = f, m = regexpr(f, pattern = "(?<=_viol)\\d+(?=_)", perl = TRUE)) %>% as.numeric
ID <- regmatches(x = f, m = regexpr(f, pattern = "(?<=_)\\d[0-9a-z_]+(?=\\.rds)", perl = TRUE))
## not really necessary, but X is neater than data$X
#data <- readRDS(paste("simulated_lethal_data/", f, sep=''))
data <- readRDS(f)
X <- data$X
Y <- data$Y
obs <- data$obs
bi_ind <- data$bi_ind
bij_ind <- data$bij_ind
lethal_ind <- data$lethal_ind
data <- NULL
gc()
## Fit model using glinternet
if (verbose) cat("Fitting model\n")
if (verbose) cat("Fitting model\n")
time <- system.time(fit <- glinternet.cv(X = X %>% as.matrix,
Y = Y %>% as.numeric,
numLevels = rep(1,p),
family = "gaussian",
nLambda = 5, numCores=10, lambdaMinRatio = 0.4, verbose = FALSE))
if (verbose) cat("Collecting stats\n")
cf <- coef(fit, lambdaType = "lambdaHat") #lambdaIndex = 50)#
## Collect coefficients
fx_main <- data.frame(gene_i = cf$mainEffects$cont,
effect = cf$mainEffectsCoef$cont %>% lapply(., function(x) x[[1]]) %>% unlist) %>%
arrange(gene_i) %>%
mutate(type = "main", gene_j = NA, TP = (gene_i %in% bi_ind[["gene_i"]] || gene_i %in% lethal_ind[["gene_i"]])) %>%
mutate(lethal=gene_i %in% lethal_ind[["gene_i"]]) %>%
select(gene_i, gene_j, type, TP, lethal) %>%
arrange(desc(TP)) %>%
arrange(desc(lethal)) %>%
tbl_df
fx_main
fx_int <- data.frame(gene_i = cf$interactions$contcont[,1], gene_j = cf$interactions$contcont[,2],
effect = cf$interactionsCoef$contcont %>% unlist) %>%
arrange(gene_i) %>%
left_join(., obs, by = c("gene_i", "gene_j")) %>%
mutate(type = "interaction") %>%
rowwise %>%
left_join(., merge(bij_ind, lethal_ind, all=T), by = c("gene_i", "gene_j")) %>%
ungroup %>%
mutate(TP = !is.na(coef)) %>%
mutate(lethal = (coef == lethal_coef)) %>%
select(gene_i, gene_j, type, TP, lethal) %>%
arrange(desc(TP)) %>%
arrange(desc(lethal)) %>%
tbl_df
fx_int %>% data.frame
## Statistical test if b_i and b_ij are sig. > 0
Z <- cbind(X[,fx_main[["gene_i"]]])
for (i in 1:nrow(fx_int)) {
Z <- cbind(Z, X[,fx_int[i,][["gene_i"]], drop = FALSE] * X[,fx_int[i,][["gene_j"]], drop = FALSE])
}
Z <- as.matrix(Z)
colnames(Z) <- rownames(Z) <- NULL
Ynum <- as.numeric(Y)
fit_red <- lm(Ynum ~ Z)
pvals <- data.frame(id = 1:ncol(Z), coef = coef(fit_red)[-1]) %>%
filter(!is.na(coef)) %>%
data.frame(., pval = summary(fit_red)$coef[-1,4]) %>%
tbl_df
smry <- left_join(rbind(fx_main, fx_int) %>% data.frame(id = 1:nrow(.), .), pvals, by = "id") %>%
mutate(pval = ifelse(is.na(pval), 1, pval)) %>%
rename(coef.est = coef) %>%
left_join(., obs, by = c("gene_i", "gene_j"))
# Print time taken to actuall run xyz
if (verbose)
time
## Write out
if (write_out) {
if (verbose) cat("Saving\n")
saveRDS(list(fit = fit,
bij = bij_ind,
bi = bi_ind,
obs = obs,
fx_int = fx_int,
fx_main = fx_main,
fit_red = fit_red,
time = time,
smry = smry),
file = sprintf("./fits_glinternet/n%d_p%d_SNR%d_nbi%d_nbij%d_nlethals%d_viol%d_%s.rds",
n, p, SNR, num_bi, num_bij, num_lethals, perc_viol, ID))
} else {
cat("Not saving\n")
}