SuperCell-MEGENA pipeline

Posted Aug 7, 2024
By Subin Cho 3 min read
SuperCell 로 pseudobulk만들고 MEGENA돌리는 pipeline

  
library(Seurat)
library(BPCells)
library(ggplot2)
library(SuperCell)
library(MEGENA)
# needs to be set for large dataset analysis
options(future.globals.maxSize = 1e9)

setwd('/sc/arion/projects/zhangb03a/shared/U54_Infection/COVID-19/02.Analysis/SuperCellMEGENA/Orig')

# GEX <- readRDS("/sc/arion/projects/zhangb03a/shared/U54_Infection/COVID-19/02.Analysis/sketch_integ_finish.rds")
# cellType_list = c("B","CD4","CD8","DC","Epi","Macro","Mast","Mega","Mono","Neu","NK","Plasma")
# for(ct in cellType_list){
#   sub <- subset(GEX, subset=majorType==ct)
#   saveRDS(sub,paste0("sub_",ct,".rds"))
# }

# After macrophages, there might be instances where there are no cells corresponding to the specified cell types in the dataset. 
#This causes the JoinLayer to not function properly.
DefaultAssay(GEX) <- "RNA"
cellType_list = c("Macro","Mast","Mega","Mono","Neu","NK","Plasma")
sub <- subset(GEX, subset=majorType%in%cellType_list)
sub <- JoinLayers(sub)
for(ct in cellType_list){
  subsub <- subset(sub, subset=majorType==ct)
  saveRDS(subsub,paste0("sub_",ct,".rds"))
}

###########################################################################

anyNA <- function(x) any(is.na(x))
source("/sc/arion/projects/scMultiscale/chos14/Project01_sysFlu/Trial02/04_MEGENA/recap_source.R")
source("/sc/arion/projects/scMultiscale/chos14/Project01_sysFlu/Trial02/04_MEGENA/check_matrix.R")

args=commandArgs(trailingOnly=TRUE)

cellType_list = c("B","CD4","CD8","DC","Epi","Macro","Mast","Mega","Mono","Neu","NK","Plasma")
cellType = cellType_list[as.numeric(args[1])]
print(cellType)

# SuperCell
sub <- readRDS(paste0("sub_",cellType,".rds"))
DefaultAssay(sub) <- "RNA"
sub <- JoinLayers(sub, assay="RNA")

wkdir <- paste0("MEGENA_",cellType,"_wSC_wo0");
dir.create(wkdir)
set.seed(1234)
setwd(wkdir)
print(wkdir)

sub = CreateSeuratObject(counts = LayerData(sub, assay = "RNA", layer = "counts"), project = "T", meta.data = sub@meta.data)
sub <- NormalizeData(sub)
sub <- FindVariableFeatures(sub, 
  selection.method = "disp", # "vst" is default
  nfeatures = 1000)
hvg <- VariableFeatures(sub)

gamma <- 20 # Graining level

# Compute metacells using SuperCell package
MC <- SCimplify(
  X = LayerData(sub, assay="RNA", layers="Data"), # single-cell log-normalized gene expression data
  genes.use = hvg, 
  gamma = gamma,
  n.pc = 10)

# Compute gene expression of metacells by simply averaging gene expression within each metacell
MC.ge <- supercell_GE(ge = LayerData(sub, assay="RNA", layers="Data"),
  groups = MC$membership)

saveRDS(MC,"MC.rds")
saveRDS(MC.ge,"MC.ge.rds")

###########################################################################
# MEGENA

## QC params
min.read = 1E-320
min.cv = 0.01
min.sprop = 0.5

## MEGENA param
FDR.cutoff <- 1
n.cores <- 10

# input parameters  
doPar <- TRUE;
method = "pearson"
#FDR.cutoff = 0.05
module.pval = 0.05
hub.pval = 0.05

####### Preprocess
data.mat = as.matrix(MC.ge)

# remove genes with low expression rates
res <- filter_low_quality_genes(data.mat,min.read = min.read,min.cv = min.cv,min.sprop = min.sprop)
datExpr <- res$expr.matrix
dim(datExpr)

# run correlation
# without 0
rho.out = calculate.rho.signed(datExpr,n.perm = 10,FDR.cutoff = 0.2,estimator = "pearson",
                               use.obs = "pairwise.complete.obs",direction = "absolute",
                               rho.thresh = NULL,sort.el = TRUE)
# with 0
rho.out = calculate.rho.signed(datExpr,n.perm = 10,FDR.cutoff = 0.2,estimator = "pearson",
                               use.obs = "na.or.complete",direction = "absolute",
                               rho.thresh = NULL,sort.el = TRUE)
# do minimal thresholding with |rho| > 0.2
ijw = subset(rho.out$signif.ijw, rho > 0.2) # 원래 코드

write.table(ijw,paste0("ijw.txt"),sep = "\t",row.names = F,col.names = T,quote = F)
rm(rho.out)
gc()

### Run PFN
message("Calculate PFN")
if (doPar & getDoParWorkers() == 1){
  cl <- parallel::makeCluster(n.cores)
  registerDoParallel(cl)
  
  # check how many workers are there
  cat(paste("number of cores to use:",getDoParWorkers(),"\n",sep = ""))
}

# calculate PFN (no)
if (doPar){
  el <- calculate.PFN(ijw[,1:3],doPar = doPar,num.cores = n.cores)
  saveRDS(el,file = "PFN.RDS")
  write.table(el,file = paste0("MEGENA_Network.txt"),sep = "\t",row.names = F,col.names = T,quote = F)
}else{
  el =readRDS("PFN.RDS")
}

# do clustering
if (doPar){
  g <- graph.data.frame(el,directed = F)
  
  MEGENA.output <- do.MEGENA(g,
                             mod.pval = module.pval,hub.pval = hub.pval,remove.unsig = TRUE,
                             min.size = 10,max.size = vcount(g)/2,
                             doPar = TRUE,num.cores = n.cores,n.perm = 100,
                             save.output = TRUE)
  saveRDS(MEGENA.output,file = "MEGENA.output.RDS")
  
}else{
  MEGENA.output = readRDS(file = "MEGENA.output.RDS")
}

m <- MEGENA.ModuleSummary(MEGENA.output, max.size=50000)
saveRDS(m,file = "MEGENA.summary.RDS")
write.table(m$module.table,file = "MEGENA_ModuleSummary.txt",sep ="\t",row.names = F,col.names = T,quote = F)
output.geneSet.file(m$modules,"MEGENA_ModuleSummary.modules.txt")
Study, Single-Cell
This post is licensed under CC BY 4.0 by the author.
SuperCell 로 pseudobulk만들고 MEGENA돌리는 pipeline

Trending Tags