SuperCell-MEGENA pipeline
SuperCell 로 pseudobulk만들고 MEGENA돌리는 pipeline
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
library(Seurat)
library(BPCells)
library(ggplot2)
library(SuperCell)
library(MEGENA)
# needs to be set for large dataset analysis
options(future.globals.maxSize = 1e9)
setwd('/sc/arion/projects/zhangb03a/shared/U54_Infection/COVID-19/02.Analysis/SuperCellMEGENA/Orig')
# GEX <- readRDS("/sc/arion/projects/zhangb03a/shared/U54_Infection/COVID-19/02.Analysis/sketch_integ_finish.rds")
# cellType_list = c("B","CD4","CD8","DC","Epi","Macro","Mast","Mega","Mono","Neu","NK","Plasma")
# for(ct in cellType_list){
# sub <- subset(GEX, subset=majorType==ct)
# saveRDS(sub,paste0("sub_",ct,".rds"))
# }
# After macrophages, there might be instances where there are no cells corresponding to the specified cell types in the dataset.
#This causes the JoinLayer to not function properly.
DefaultAssay(GEX) <- "RNA"
cellType_list = c("Macro","Mast","Mega","Mono","Neu","NK","Plasma")
sub <- subset(GEX, subset=majorType%in%cellType_list)
sub <- JoinLayers(sub)
for(ct in cellType_list){
subsub <- subset(sub, subset=majorType==ct)
saveRDS(subsub,paste0("sub_",ct,".rds"))
}
###########################################################################
anyNA <- function(x) any(is.na(x))
source("/sc/arion/projects/scMultiscale/chos14/Project01_sysFlu/Trial02/04_MEGENA/recap_source.R")
source("/sc/arion/projects/scMultiscale/chos14/Project01_sysFlu/Trial02/04_MEGENA/check_matrix.R")
args=commandArgs(trailingOnly=TRUE)
cellType_list = c("B","CD4","CD8","DC","Epi","Macro","Mast","Mega","Mono","Neu","NK","Plasma")
cellType = cellType_list[as.numeric(args[1])]
print(cellType)
# SuperCell
sub <- readRDS(paste0("sub_",cellType,".rds"))
DefaultAssay(sub) <- "RNA"
sub <- JoinLayers(sub, assay="RNA")
wkdir <- paste0("MEGENA_",cellType,"_wSC_wo0");
dir.create(wkdir)
set.seed(1234)
setwd(wkdir)
print(wkdir)
sub = CreateSeuratObject(counts = LayerData(sub, assay = "RNA", layer = "counts"), project = "T", meta.data = sub@meta.data)
sub <- NormalizeData(sub)
sub <- FindVariableFeatures(sub,
selection.method = "disp", # "vst" is default
nfeatures = 1000)
hvg <- VariableFeatures(sub)
gamma <- 20 # Graining level
# Compute metacells using SuperCell package
MC <- SCimplify(
X = LayerData(sub, assay="RNA", layers="Data"), # single-cell log-normalized gene expression data
genes.use = hvg,
gamma = gamma,
n.pc = 10)
# Compute gene expression of metacells by simply averaging gene expression within each metacell
MC.ge <- supercell_GE(ge = LayerData(sub, assay="RNA", layers="Data"),
groups = MC$membership)
saveRDS(MC,"MC.rds")
saveRDS(MC.ge,"MC.ge.rds")
###########################################################################
# MEGENA
## QC params
min.read = 1E-320
min.cv = 0.01
min.sprop = 0.5
## MEGENA param
FDR.cutoff <- 1
n.cores <- 10
# input parameters
doPar <- TRUE;
method = "pearson"
#FDR.cutoff = 0.05
module.pval = 0.05
hub.pval = 0.05
####### Preprocess
data.mat = as.matrix(MC.ge)
# remove genes with low expression rates
res <- filter_low_quality_genes(data.mat,min.read = min.read,min.cv = min.cv,min.sprop = min.sprop)
datExpr <- res$expr.matrix
dim(datExpr)
# run correlation
# without 0
rho.out = calculate.rho.signed(datExpr,n.perm = 10,FDR.cutoff = 0.2,estimator = "pearson",
use.obs = "pairwise.complete.obs",direction = "absolute",
rho.thresh = NULL,sort.el = TRUE)
# with 0
rho.out = calculate.rho.signed(datExpr,n.perm = 10,FDR.cutoff = 0.2,estimator = "pearson",
use.obs = "na.or.complete",direction = "absolute",
rho.thresh = NULL,sort.el = TRUE)
# do minimal thresholding with |rho| > 0.2
ijw = subset(rho.out$signif.ijw, rho > 0.2) # 원래 코드
write.table(ijw,paste0("ijw.txt"),sep = "\t",row.names = F,col.names = T,quote = F)
rm(rho.out)
gc()
### Run PFN
message("Calculate PFN")
if (doPar & getDoParWorkers() == 1){
cl <- parallel::makeCluster(n.cores)
registerDoParallel(cl)
# check how many workers are there
cat(paste("number of cores to use:",getDoParWorkers(),"\n",sep = ""))
}
# calculate PFN (no)
if (doPar){
el <- calculate.PFN(ijw[,1:3],doPar = doPar,num.cores = n.cores)
saveRDS(el,file = "PFN.RDS")
write.table(el,file = paste0("MEGENA_Network.txt"),sep = "\t",row.names = F,col.names = T,quote = F)
}else{
el =readRDS("PFN.RDS")
}
# do clustering
if (doPar){
g <- graph.data.frame(el,directed = F)
MEGENA.output <- do.MEGENA(g,
mod.pval = module.pval,hub.pval = hub.pval,remove.unsig = TRUE,
min.size = 10,max.size = vcount(g)/2,
doPar = TRUE,num.cores = n.cores,n.perm = 100,
save.output = TRUE)
saveRDS(MEGENA.output,file = "MEGENA.output.RDS")
}else{
MEGENA.output = readRDS(file = "MEGENA.output.RDS")
}
m <- MEGENA.ModuleSummary(MEGENA.output, max.size=50000)
saveRDS(m,file = "MEGENA.summary.RDS")
write.table(m$module.table,file = "MEGENA_ModuleSummary.txt",sep ="\t",row.names = F,col.names = T,quote = F)
output.geneSet.file(m$modules,"MEGENA_ModuleSummary.modules.txt")
This post is licensed under CC BY 4.0 by the author.