-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path09_asset_parallel.R
145 lines (107 loc) · 4.48 KB
/
09_asset_parallel.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#############################################################################
# Title: ASSET parallel
# Function: ASSET analysis
# Author: Murat Guler ([email protected], [email protected])
# Date: Jan 23th 2022
# Note: Please let me know if you have any trouble
#############################################################################
# Free R memory and remove prior environment
rm(list=ls())
gc()
#############################################################################
# Packages
# Install required packages, if not installed
install_if_missing <- function(package) {
if (!require(package, character.only = TRUE)) {
install.packages(package, dependencies = TRUE)
library(package, character.only = TRUE)
}
}
# List of packages to install and load
packages <- c("data.table", "ASSET", "dplyr", "purr", "parallel")
# Install and load each package
for (package in packages) {
install_if_missing(package)
}
# Load required packages
library(ASSET)
library(data.table)
library(dplyr)
library(parallel)
library(purrr)
# setwd
setwd("/your/sumstats/dir")
# Define phenotypes (your sumstats are in the seperate dirs names as phenotpye names)
phenotypes <- c("CLL", "DLBCL", "FL", "HL", "LPL_WM", "MGUS", "MM", "MZL")
# Get data dir
main_dir <- "/your/sumstats/dir"
# Initialize an empty list to store paths
paths_list <- list()
# Loop through phenotypes and store paths
for (pheno in phenotypes) {
path <- paste0(main_dir, "/", pheno)
paths_list[[pheno]] <- path
}
# Read GWAS sumstat, extract BETA and SE
gwas_list <- list()
for (i in 1:8) {
pheno <- phenotypes[i]
sumstats_path <- paste0(paths_list[i], "/", phenotypes[i], "_out_A1FREQ0p001_info0p9.txt")
sumstats <- fread(sumstats_path, header = TRUE, na.strings="NA", select= c(19,14,15), sep = " ")
names(sumstats) <- c("SNP", paste0(phenotypes[i], ".BETA"), paste0(phenotypes[i], ".SE"))
gwas_list[[pheno]] <- sumstats}
# Merge sumstats
gwas_merge <- reduce(gwas_list, inner_join, by = "SNP")
# Define the input arguments to h.traits
traits.lab <- c("CLL","DLBCL", "FL","HL","LPL_WM", "MGUS", "MM", "MZL")
N11 <- paste0(main_dir, "/", "asset", "/", "N11.txt")
N10 <- paste0(main_dir, "/", "asset", "/", "N10.txt")
N00 <- paste0(main_dir, "/", "asset", "/", "N00.txt")
N11 <- as.matrix(fread(N11, drop = 1))
N10 <- as.matrix(fread(N10, drop = 1))
N00 <- as.matrix(fread(N00, drop = 1))
rownames(N11) <- traits.lab
rownames(N10) <- traits.lab
rownames(N00) <- traits.lab
cor <- list(N11=N11, N00=N00, N10=N10)
ncase <- diag(N11)
ncntl <- diag(N00)
snps <- as.vector(gwas_merge[, "SNP"])
# Extract column names that end with ".BETA"
beta_columns <- grep("\\.BETA$", names(gwas_merge), value = TRUE)
# Extract the corresponding columns and convert to matrix
beta.hat <- as.matrix(gwas_merge[, beta_columns, with = FALSE])
rownames(beta.hat) <- seq_len(nrow(gwas_merge))
# Extract column names that end with ".BETA"
sigma_columns <- grep("\\.SE$", names(gwas_merge), value = TRUE)
# Extract the corresponding columns and convert to matrix
sigma.hat <- as.matrix(gwas_merge[, sigma_columns, with = FALSE])
rownames(sigma.hat) <- seq_len(nrow(gwas_merge))
# Number of cores to use
num_cores <- 64
# Initialize a cluster
cl <- makeCluster(num_cores)
# Parallel loop using mclapply
summary_list <- mclapply(1:nrow(snps), function(i) {
res <- h.traits(snps[i], traits.lab, beta.hat[i, , drop = FALSE], sigma.hat[i, , drop = FALSE], ncase, ncntl, cor = cor, cor.numr = FALSE, side = 2, meta = TRUE, zmax.args = NULL, meth.pval = "DLM")
# Get individual data frames
meta_df <- h.summary(res)[["Meta"]]
colnames(meta_df)[2:5] <- paste0(colnames(meta_df)[2:5], "_meta")
subset_1sided_df <- h.summary(res)[["Subset.1sided"]]
colnames(subset_1sided_df)[2:6] <- paste0(colnames(subset_1sided_df)[2:6], "_1sided")
subset_2sided_df <- h.summary(res)[["Subset.2sided"]]
colnames(subset_2sided_df)[2:12] <- paste0(colnames(subset_2sided_df)[2:12], "_2sided")
# Merge the three data frames by the "SNP" column
merged_df <- merge(meta_df, subset_1sided_df, by = "SNP")
merged_df <- merge(merged_df, subset_2sided_df, by = "SNP")
# Return the merged data frame
return(merged_df)
}, mc.cores = num_cores)
# Stop the cluster
stopCluster(cl)
# Combine the data frames within the summary_list into a single data frame
combined_df <- bind_rows(summary_list)
#Save
setwd("/your/output/dir")
# Write combined_df to a tab-delimited text file without row names
fwrite(combined_df, "asset_sumstat.txt", sep = "\t", quote = FALSE, row.names = FALSE)