forked from kpatel427/YouTubeTutorials
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsingleCell_pseudoBulk.R
135 lines (87 loc) · 2.93 KB
/
singleCell_pseudoBulk.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# script to perform pseudo-bulk DGA
# setwd("~/Desktop/demo/single_cell_DEG")
library(ExperimentHub)
library(Seurat)
library(DESeq2)
library(tidyverse)
# get data
eh <- ExperimentHub()
query(eh, "Kang")
sce <- eh[["EH2259"]]
seu.obj <- as.Seurat(sce, data = NULL)
View([email protected])
# QC and filtering
# explore QC
# get mito percent
seu.obj$mitoPercent <- PercentageFeatureSet(seu.obj, pattern = '^MT-')
View([email protected])
# filter
seu.filtered <- subset(seu.obj, subset = nFeature_originalexp > 200 & nFeature_originalexp < 2500 &
nCount_originalexp > 800 &
mitoPercent < 5 &
multiplets == 'singlet')
seu.obj
seu.filtered
# run Seurat's standard workflow steps
seu.filtered <- NormalizeData(seu.filtered)
seu.filtered <- FindVariableFeatures(seu.filtered)
seu.filtered <- ScaleData(seu.filtered)
seu.filtered <- RunPCA(seu.filtered)
ElbowPlot(seu.filtered)
seu.filtered <- RunUMAP(seu.filtered, dims = 1:20)
# visualize
cell_plot <- DimPlot(seu.filtered, reduction = 'umap', group.by = 'cell', label = TRUE)
cond_plot <- DimPlot(seu.filtered, reduction = 'umap', group.by = 'stim')
cell_plot|cond_plot
# pseudo-bulk workflow -----------------
# Acquiring necessary metrics for aggregation across cells in a sample
# 1. counts matrix - sample level
# counts aggregate to sample level
View([email protected])
seu.filtered$samples <- paste0(seu.filtered$stim, seu.filtered$ind)
DefaultAssay(seu.filtered)
cts <- AggregateExpression(seu.filtered,
group.by = c("cell", "samples"),
assays = 'originalexp',
slot = "counts",
return.seurat = FALSE)
cts <- cts$originalexp
# transpose
cts.t <- t(cts)
# convert to data.frame
cts.t <- as.data.frame(cts.t)
# get values where to split
splitRows <- gsub('_.*', '', rownames(cts.t))
# split data.frame
cts.split <- split.data.frame(cts.t,
f = factor(splitRows))
# fix colnames and transpose
cts.split.modified <- lapply(cts.split, function(x){
rownames(x) <- gsub('.*_(.*)', '\\1', rownames(x))
t(x)
})
#gsub('.*_(.*)', '\\1', 'B cells_ctrl101')
# Let's run DE analysis with B cells
# 1. Get counts matrix
counts_bcell <- cts.split.modified$`B cells`
# 2. generate sample level metadata
colData <- data.frame(samples = colnames(counts_bcell))
colData <- colData %>%
mutate(condition = ifelse(grepl('stim', samples), 'Stimulated', 'Control')) %>%
column_to_rownames(var = 'samples')
# get more information from metadata
# perform DESeq2 --------
# Create DESeq2 object
dds <- DESeqDataSetFromMatrix(countData = counts_bcell,
colData = colData,
design = ~ condition)
# filter
keep <- rowSums(counts(dds)) >=10
dds <- dds[keep,]
# run DESeq2
dds <- DESeq(dds)
# Check the coefficients for the comparison
resultsNames(dds)
# Generate results object
res <- results(dds, name = "condition_Stimulated_vs_Control")
res