Software for sequencing data for manuscript.
Use Anaconda to create an environment
mamba env create --file=mibrain.yml
Then install
pip install .
make install
library(miBrain)
seq.dir="~/data/seq" ## Directory with batchName/sampleName/featureCounts inside
if (!dir.exists("miBrain")) { dir.create("miBrain") } ## output directory
se = bulk_aggregate_counts(system.file("extdata", "miBrain_SampleSheet.csv", package="miBrain"), seq.dir)
sel = bulk_aggregate_comparison(se, system.file("extdata", "miBrain_comparison.csv", package="miBrain"), logTMM=FALSE)
lapply(sel, function(obj) {
if (obj@metadata$comparison %in% c("Pericyte", "BMEC")) {
### lower depth, use LRT+lower CPM cutoff to allow for more hypotheses
deg_dump(obj, "miBrain", cpm.cutoff=10, method="LRT")
} else {
deg_dump(obj, "miBrain")
}
})
For the same structure as before, with XLSX sheets in miBrain
directory:
library(miBrain)
if (!dir.exists("miBrain/cpdb")) {
dir.create("miBrain/cpdb", recursive=TRUE)
}
dl = lapply(list.files("miBrain", pattern="xlsx$", full.names=TRUE), deg_load)
names(dl) = sapply(dl, function(obj) { obj@metadata$comparison })
degl = dplyr::bind_rows(lapply(dl, function(obj) { obj@metadata$deg }))
colnames(degl) = gsub("^case$", "cluster", colnames(degl))
del = do.call(SummarizedExperiment::cbind, dl)
write.table(as.data.frame(SummarizedExperiment::assays(del)$counts), "miBrain/cpdb/counts.tsv", sep="\t", quote=FALSE)
write.table(as.data.frame(SummarizedExperiment::assays(del)$TMM), "miBrain/cpdb/tmm.tsv", sep="\t", quote=FALSE)
write.table(data.frame(Cell=colnames(del), cell_type=del$group), "miBrain/cpdb/meta.tsv", sep="\t", quote=FALSE, row.names=FALSE)
write.table(degl[(degl$FDR < 0.05) & (degl$log2FC > 0), c("cluster", "gene")], "miBrain/cpdb/deg_FDR005_log2FC0.tsv", sep="\t", quote=FALSE, row.names=FALSE)
write.table(degl[(degl$FDR < 0.1) & (degl$log2FC > 0), c("cluster", "gene")], "miBrain/cpdb/deg_FDR010_log2FC0.tsv", sep="\t", quote=FALSE, row.names=FALSE)
write.table(degl[(degl$FDR < 0.05) & (degl$log2FC > 0.5), c("cluster", "gene")], "miBrain/cpdb/deg_FDR005_log2FC05.tsv", sep="\t", quote=FALSE, row.names=FALSE)
write.table(degl[(degl$FDR < 0.1) & (degl$log2FC > 0.5), c("cluster", "gene")], "miBrain/cpdb/deg_FDR010_log2FC05.tsv", sep="\t", quote=FALSE, row.names=FALSE)
Then, to run cellphoneDB:
import blah
library(miBrain)
set_ht_opt(2)
dl = lapply(list.files("miBrain", pattern="^[A-Za-z]+[.]xlsx$", full.names=TRUE), deg_load)
sapply(dl, function(se) {
ct = S4Vectors::metadata(se)$comparison
se = se_tmm(se, log=TRUE)
cd = as.data.frame(SummarizedExperiment::colData(se))
mono = setdiff(1:nrow(cd), grep("miBrain", cd$group))
cd$group = gsub("^mono ", "", cd$group)
cd$group[mono] = paste0("monoculture ", cd$group[-grep("miBrain", cd$group)])
se$group = cd$group
se = se[,order(se$group)]
g = PlotPCA(se)
saveGGplot(g, paste0("miBrain/pca/", ct), w=3, h=3)
return(g)
})
library(miBrain)
set_ht_opt(2)
dl = lapply(list.files("miBrain", pattern="^[A-Za-z]+[.]xlsx$", full.names=TRUE), deg_load)
sapply(dl, function(se) {
ct = S4Vectors::metadata(se)$comparison
column_title = paste0("Top DEGs in ", ct)
se = se_tmm(se, log=TRUE)
cd = as.data.frame(SummarizedExperiment::colData(se))
mono = setdiff(1:nrow(cd), grep("miBrain", cd$group))
cd$group = gsub("^mono ", "", cd$group)
cd$group[mono] = paste0("monoculture ", cd$group[-grep("miBrain", cd$group)])
se$group = cd$group
se = se[,order(se$group)]
H = gene_heatmap(se, column_title=gsub("[.][0-9]+$", "", column_title))
saveHeatmap(H, paste0("miBrain/heatmaps_deg/", ct))
g = PlotPCA(se)
saveGGplot(g, paste0("miBrain/pca/", ct), w=3, h=3)
return(g)
})
library(miBrain)
set_ht_opt(2)
df = xl_parse_gene_list(system.file("extdata", "heatmap_genes.xlsx", package="miBrain")) ## load some gene lists
sapply(split(df, df$go_name), function(gf) {
ct = unique(gf$celltype)
column_title=head(gf$go_name, 1)
se = se_tmm(dl[[ct]], log=TRUE)
cd = as.data.frame(SummarizedExperiment::colData(se))
mono = setdiff(1:nrow(cd), grep("miBrain", cd$group))
cd$group = gsub("^mono ", "", cd$group)
cd$group[mono] = paste0("monoculture ", cd$group[-grep("miBrain", cd$group)])
se$group = cd$group
se = se[,order(se$group)]
H = gene_heatmap(se, gene=gf$gene, column_title=gsub("[.][0-9]+$", "", column_title))
saveHeatmap(H, paste0("miBrain/heatmaps_go/", column_title))
})