RENIN
Regulatory Network Inference with single cell multiomic data RENIN generates parametric gene regulatory networks, predicting both cis-regulatory elements and transcription factors.
You can install RENIN with devtools:
library(devtools)
remotes::install_github('satijalab/seurat-wrappers')
devtools::install_github("nledru/RENIN")
renv::install("bioc::harmony")
19a: Load additional libraries
library(ArchR)
library(Seurat)
library(Signac)
library(SeuratWrappers)
library(RENIN)
library(harmony)
library(stringr)
library(EnsDb.Mmusculus.v79) # Ensembl database to convert to mouse mm10.
library(SingleCellExperiment)
library(chromVARmotifs)
library(universalmotif)
library(dplyr)
library(tidyr)
library(data.table)
19b. Grab the integrated ArchR object but in seurat object form
Install ArchRtoSignac
renv::install("bioc::biovizBase")
devtools::install_github("swaruplabUCI/ArchRtoSignac")
library(ArchRtoSignac)
19c. Generate a combined fragments file folder
cd $PROJECT_DIR
mkdir cell_ranger_fragments
cd cell_ranger_fragments/
cp ${PROJECT_DIR}/E12_scATAC-seq/outs/fragments.tsv.gz E12_fragments.tsv.gz
cp ${PROJECT_DIR}/E12_scATAC-seq/outs/fragments.tsv.gz.tbi E12_fragments.tsv.gz.tbi
cp ${PROJECT_DIR}/E18_scATAC-seq/outs/fragments.tsv.gz E18_fragments.tsv.gz
cp ${PROJECT_DIR}/E18_scATAC-seq/outs/fragments.tsv.gz.tbi E18_fragments.tsv.gz.tbi
cp ${PROJECT_DIR}/P5_scATAC-seq/outs/fragments.tsv.gz P5_fragments.tsv.gz
cp ${PROJECT_DIR}/P5_scATAC-seq/outs/fragments.tsv.gz.tbi P5_fragments.tsv.gz.tbi
cp ${PROJECT_DIR}/P30_scATAC-seq/outs/fragments.tsv.gz P30_fragments.tsv.gz
cp ${PROJECT_DIR}/P30_scATAC-seq/outs/fragments.tsv.gz.tbi P30_fragments.tsv.gz.tbi
19d. Create updated Seurat object from fragment files
pkm <- getPeakMatrix(proj8)
annotations <- getAnnotation(reference = EnsDb.Mmusculus.v79, refversion = "mm10") # "UCSC" is the default style to change to but can be changed with argument seqStyle
fragments_dir <- "${PROJECT_DIR}/cell_ranger_fragments/"
seurat_atac <- ArchR2Signac(
ArchRProject = proj8,
refversion = "mm10",
fragments_dir = fragments_dir,
pm = pkm, # peak matrix from getPeakMatrix()
fragments_fromcellranger = "N",
fragments_file_extension = "_fragments.tsv.gz", # File_Extension for fragments files
annotation = annotations # annotation from getAnnotation()
)
Need to custom modify this RENIN's functions to match the data
getGeneScoreMatrix <- function(
ArchRProject,
SeuratObject
){
print("In Progress:")
print("Get Gene Score Matrix From ArchRProject")
GeneScore_matrix <- ArchR::getMatrixFromProject(ArchRProject, useMatrix='GeneScoreMatrix_no_ribosomal')
gsm <- assays(GeneScore_matrix)$GeneScoreMatrix # peaks sparse martix
print("get Gene Features From ArchRProject")
GeneFeatures <-getFeatures(
ArchRProj = ArchRProject,
useMatrix = "GeneScoreMatrix_no_ribosomal",
select = NULL,
ignoreCase = TRUE
)
# set the column names for gsm
colnames(gsm) <- gsub("#", "_", colnames(gsm))
ix <- match(colnames(SeuratObject), colnames(gsm))
gsm <- gsm[,ix]
print("Saving Gene Features From ArchRProject into Gene Score Matrix")
rownames(gsm) <- GeneFeatures
print("Return Gene Score Matrix")
gsm
}
gsm <- getGeneScoreMatrix(ArchRProject = proj8, SeuratObject = seurat_atac)
seurat_atac[['RNA']] <- CreateAssayObject(counts = gsm)
Need to customize RENIN's addDimRed() function as well to match the data
addDimRed <- function(
ArchRProject,
SeuratObject
){
print("In Progress:")
print("add UMAP From ArchRProject to SeuratObject")
umap_df <- ArchRProject@embeddings$UMAP_LSI_Harmony_subset$df %>% as.matrix
rownames(umap_df) <- colnames(SeuratObject) # make the rowname the same format as seurat
colnames(umap_df) <- c('UMAP_1', 'UMAP_2')
SeuratObject@reductions$umap <- Seurat::CreateDimReducObject(
embeddings=umap_df,
assay="peaks"
)
print("In Progress:")
print("add reduction From ArchRProject to SeuratObject")
harmony_matrix <- ArchRProject@reducedDims$LSI_Harmony_subset$matDR
rownames(harmony_matrix) <- colnames(SeuratObject)
colnames(harmony_matrix) <- paste0('LSI_', 1:ncol(harmony_matrix))
SeuratObject@reductions$harmony <- Seurat::CreateDimReducObject(
embeddings=harmony_matrix,
assay="peaks"
)
print("Return SeuratObject")
SeuratObject
}
seurat_atac <- addDimRed(
ArchRProject = proj8,
SeuratObject = seurat_atac
)
19e. Identify the Ren1 expressing cells
gim <- getMatrixFromProject(proj8, "GeneIntegrationMatrix_LSI_Harmony")
gim_impute <- imputeMatrix(assay(gim), getImputeWeights(proj8))
gsm2 <- getMatrixFromProject(proj8, "GeneScoreMatrix_no_ribosomal")
gsm_impute <- imputeMatrix(assay(gsm2), getImputeWeights(proj8))
# Plot gene expression
ren1_gim <- gim_impute[grep('Ren1', rowData(gim)$name),]
ren1_gim_dt <- as.data.table(ren1_gim)
cellnames <- names(ren1_gim)
ren1_gim_dt[, cellname := cellnames]
ren1_gim_dt[, c("timepoint", "cellID") := tstrsplit(cellname, "#", fixed=TRUE)]
ren1_gim_dt <- merge(ren1_gim_dt, all_cells, by=c("cellname", "cellID", "timepoint"))
ren1_gim_dt$timepoint <- factor(ren1_gim_dt$timepoint,
levels = c("E12", "E18", "P5", "P30"))
# Identify clusters with *Ren1* expression greater than and less than the median
ren1_gim_cluster <- ren1_gim_dt[ ,list(mean=mean(ren1_gim)), by=cluster]
ren1_expressing_cells <- ren1_gim_cluster[ren1_gim_cluster$mean > quantile(ren1_gim_dt$ren1_gim, 0.5)[[1]],]$cluster
non_ren1_expressing_cells <- ren1_gim_cluster[ren1_gim_cluster$mean <= quantile(ren1_gim_dt$ren1_gim, 0.5)[[1]],]$cluster
p <- ggplot(ren1_gim_dt, aes(x=cluster, y=ren1_gim, col=cluster)) +
geom_boxplot() +
scale_x_discrete("cluster", drop=FALSE) +
labs(
x = "Cell Population",
y = "Ren1 Log2 (NormCounts + 1)"
) +
custom_theme_ArchR() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
svglite(paste0("Ren1_GIM_expression-by-cluster_boxplot_", Sys.Date(), ".svg"))
print(p)
dev.off()
# Plot gene activity score
ren1_gsm <- gsm_impute[grep('Ren1', rowData(gsm2)$name),]
ren1_gsm_dt <- as.data.table(ren1_gsm)
cellnames <- names(ren1_gsm)
ren1_gsm_dt[, cellname := cellnames]
ren1_gsm_dt[, c("timepoint", "cellID") := tstrsplit(cellname, "#", fixed=TRUE)]
ren1_gsm_dt <- merge(ren1_gsm_dt, all_cells, by=c("cellname", "cellID", "timepoint"))
ren1_gsm_dt$timepoint <- factor(ren1_gsm_dt$timepoint,
levels = c("E12", "E18", "P5", "P30"))
# Identify clusters with *Ren1* gene activity greater than and less than the median
ren1_gsm_cluster <- ren1_gsm_dt[ ,list(mean=mean(ren1_gsm)), by=cluster]
ren1_activity_cells <- ren1_gsm_cluster[ren1_gsm_cluster$mean > quantile(ren1_gsm_dt$ren1_gsm, 0.5)[[1]],]$cluster
non_ren1_activity_cells <- ren1_gsm_cluster[ren1_gsm_cluster$mean <= quantile(ren1_gsm_dt$ren1_gsm, 0.5)[[1]],]$cluster
p2 <- ggplot(ren1_gsm_dt, aes(x=cluster, y=ren1_gsm, col=cluster)) +
geom_boxplot() +
scale_x_discrete("cluster", drop=FALSE) +
labs(
x = "Mouse kidney development day",
y = "Ren1 NormCounts"
) +
custom_theme_ArchR() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
svglite(paste0("Ren1_GSM_expression-by-cluster_boxplot_", Sys.Date(), ".svg"))
print(p2)
dev.off()
# Match cells present in both gene expression and activity approaches
toMatch <- intersect(ren1_expressing_cells, ren1_activity_cells)
renin_positions <- grep(paste(toMatch,collapse="|"),
seurat_atac@meta.data$ManualClustersJG)
non_renin_positions <- which(!grepl(paste(toMatch,collapse="|"),
seurat_atac@meta.data$ManualClustersJG))
# Define datasets with the variable "RENIN".
seurat_atac@meta.data$renin <- "RENIN"
seurat_atac@meta.data$renin[non_renin_positions] <- "NON"
# Assay.use must be SCT, not RNA
merged_SCT <- SCTransform(object = seurat_atac, verbose = FALSE,
vars.to.regress = c("nCount_RNA"))
merged_SCT <- RunPCA(merged_SCT, pc.genes = merged_SCT@var.genes, npcs = 20,
verbose = FALSE)
seuratObj <- RunHarmony(
merged_SCT, "renin", assay.use="SCT", reduction.save = "harmony")
# Need to runHarmony on the ATAC data too
seuratObj <- SCTransform(object = seuratObj, verbose = FALSE,
vars.to.regress = c("nCount_peaks"))
seuratObj <- RunPCA(seuratObj, pc.genes = seuratObj@var.peaks, npcs = 20,
verbose = FALSE)
seuratObj <- RunHarmony(
seuratObj, "renin", assay.use="SCT", reduction.save = "harmony_peaks")
# Must re-run FindNeighbors before RunUMAP
# Generate Harmony derived peaks
seuratObj <- seuratObj %>%
RunUMAP(reduction = "harmony", dims = 1:20) %>%
FindNeighbors(reduction = "harmony", dims = 1:20) %>%
FindClusters(resolution = 0.5) %>%
identity()
seuratObj <- FindVariableFeatures(
seuratObj, selection.method = "vst", nfeatures = 3000)
19f. Perform RENIN analysis
seuratObj <- FindNeighbors(seuratObj, reduction = "harmony", dims = 1:20)
seuratObj <- FindClusters(seuratObj, resolution = 0.5)
seuratObj <- FindMultiModalNeighbors(seuratObj,
reduction.list = list("harmony",
"harmony_peaks"),
k.nn = 5, dims.list = list(1:20, 1:20))
seuratObj <- FindClusters(seuratObj, resolution = 0.5)
# Must runUMAP on both RNA and ATAC data
seuratObj <- RunUMAP(
seuratObj, reduction = "harmony", dims = 1:20, reduction.name = "UMAP")
seuratObj <- RunUMAP(
seuratObj, reduction = "harmony_peaks", dims = 1:20,
reduction.name = "UMAP_peaks")
# Change naming to match `monocle3` expectations
seuratObj[["HARMONY"]] <- seuratObj[["harmony"]]
seuratObj[["HARMONY_PEAKS"]] <- seuratObj[["harmony_peaks"]]
# This is essentially the first step of the `prepare_pseudocell_matrix` fcn
seuratObj <- FindMultiModalNeighbors(seuratObj,
reduction.list = list("harmony",
"harmony_peaks"),
k.nn = 5, dims.list = list(1:20, 1:20))
# MUST name this WNN.UMAP for `prepare_pseudocell_matrix` fcn
seuratObj <- RunUMAP(
seuratObj, reduction = "harmony", dims = 1:20, reduction.name = "WNN.UMAP")
seuratObj <- RunUMAP(
seuratObj, reduction = "harmony_peaks", dims = 1:20,
reduction.name = "WNN.UMAP_peaks")
# This is the second step of the `prepare_pseudocell_matrix` fcn (confirming the naming above works)
seurat.cds <- as.cell_data_set(seuratObj, assay = "SCT")
# Need to manually include this cluster_cells step before running `prepare_pseudocell_matrix` fcn
seurat.cds <- monocle3::cluster_cells(seurat.cds)
res <- reducedDim(seurat.cds, type = "WNN.UMAP", withDimnames = TRUE)
19g. Must overwrite default prepare_pseudocell_matrix fcn to use dplyr::select
Need to use dplyr::select for proper functionality
prepare_pseudocell_matrix <- function(seurat,
assay,
slot = "data",
cells_per_partition = 100,
find_neighbors = FALSE,
reduction1 = "harmony",
reduction2 = "harmony_peaks",
dim_list = list(1:50, 1:50),
k.nn = 5,
seed = 489284) {
require(Seurat)
require(Signac)
require(tidyverse)
require(Matrix)
if (!is.null(seed)) set.seed(seed)
pools <- apply_multi_micro_clustering(seurat,
cells_per_partition,
find_neighbors,
reduction1,
reduction2,
dim_list,
k.nn)
pool_column <- vector("numeric", dim(seurat)[2])
names(pool_column) <- colnames(seurat)
for (i in 1:length(pools)) {
pool_column[pools[[i]]] <- rep(i, length(pools[[i]]))
}
seurat <- AddMetaData(seurat, metadata = as.data.frame(pool_column))
pseudocell_mats <- vector("list", length = length(assay))
names(pseudocell_mats) <- assay
for (a in assay) {
mat <- GetAssayData(seurat, assay = a, slot = slot)
pseudocell_mat <- as_tibble(t(sapply(pools,
function(x) {
if (length(x) > 1) {
Matrix::rowSums(mat[, x]) / length(x)
} else {
mat[, x]
}
},
simplify = TRUE)),
rownames = "pseudocell_IDs")
pseudocell_mat <- pseudocell_mat %>% dplyr::select(-pseudocell_IDs) %>% as.data.frame
pseudocell_mats[[a]] <- pseudocell_mat
}
if (length(assay) == 1) return(pseudocell_mat)
return(pseudocell_mats)
}
mats <- prepare_pseudocell_matrix(seuratObj,
assay = c("peaks", "SCT"),
reduction1 = "harmony",
reduction2 = "harmony_peaks",
cells_per_partition = 50,
dim_list = list(1:20, 1:20))
expr_mat <- mats[["SCT"]]
peak_mat <- mats[["peaks"]]
Model the genes that are differentially expressed. - predict CREs using the full dataset containing all cell types - For the adaptive elastic net step, lambda2 = 0.5 and num_bootstraps = 100 are good defaults to achieve good model performance and relatively short runtime.
gene_list <- rownames(prepare_degs(seuratObj,
ident.1 = "RENIN", ident.2 = "NON",
group.by="renin"))
gene_list_top250 <- sort(gene_list[1:250])
Run run_peak_aen step by step to override errors in base code
DefaultAssay(seuratObj) <- "peaks" # function requires this call! It's in `prepare_degs` but NOT in `run_peak_aen`
# Now run the function after setting DefaultAssay
peak_results <- run_peak_aen(seuratObj, expr_mat, peak_mat,
gene_list_top250, lambda2 = 0.5,
max_distance = 5e+05, num_bootstraps = 100)
aen_lists <- make_aen_lists(peak_results)
19h. Look for motif enrichment in the predicted CREs.
# de.genes object == `prepare_degs` output
de.genes <- prepare_degs(seuratObj,
ident.1 = "RENIN", ident.2 = "NON",
group.by="renin")
non_genes <- rownames(de.genes)[which(de.genes$avg_log2FC < 0)]
all_cres <- unique(unlist(aen_lists))
cre_scores <- lapply(peak_results,
function(x) x[[4]][union(1, which(x[[4]][, "coef_if_kept"] != 0)),
"coef_if_kept"] * ifelse(x[[1]] %in% non_genes, -1, 1))
cre_scores <- cre_scores[which(unlist(lapply(cre_scores, length)) > 1)]
cre_total_scores <- bind_rows(cre_scores)
cre_total_scores[is.na(cre_total_scores)] <- 0
cre_total_scores <- cre_total_scores[,-1]
cre_total_scores_sums <- colSums(cre_total_scores)
cre_num_genes <- apply(cre_total_scores, 2, function(x) length(which(x != 0))) # how many genes each CRE targets
non_cres <- names(cre_total_scores_sums)[which(cre_total_scores_sums < 0)]
renin_cres <- names(cre_total_scores_sums)[which(cre_total_scores_sums > 0)]
## GC.percent not present in meta.features. Run RegionStats to compute GC.percent for each feature
seuratObj[["peaks"]] <- Signac::RegionStats(
object = seuratObj[["peaks"]],
genome = BSgenome.Mmusculus.UCSC.mm10
)
# Use same cisbp PWM as I use in ArchR
## Generate the PWM used from chromVAR
data("mouse_pwms_v1")
sout <- sapply(strsplit(names(mouse_pwms_v1), split = "_"), function(s) c(s[3]))
mouse_pwms_v2 <- mouse_pwms_v1[match(unique(sout), sout)]
#### See https://rdrr.io/github/timoast/signac/man/AddMotifs.html
seuratObj[["peaks"]] <- Signac::AddMotifs(
object = seuratObj[["peaks"]],
genome = BSgenome.Mmusculus.UCSC.mm10,
pfm = mouse_pwms_v2,
verbose = TRUE
)
svglite(paste0("RENIN_motif_enrichment_", Sys.Date(), ".svg"))
plot_motif_enrichment(seuratObj, non_cres, num_top_label = 5)
dev.off()
19i. Identify TFs with motifs within the linked CREs
run_tf_aen also needs the dplyer::select call added to the peaks_to_tfs fcn
peaks_to_tfs <- function(peak_list, key_tibble) {
require(tidyverse)
if (length(peak_list) == 1 && peak_list == "") {
return("")
}
subset_tibble <- key_tibble[, c("rowname", peak_list)]
subset <- subset_tibble %>% dplyr::select(-rowname) %>% rowSums
subset <- which(subset > 0)
return(subset_tibble$rowname[subset])
}
Run step by step through run_tf_aen to avoid errors
require(Seurat)
require(Signac)
require(SeuratWrappers)
require(BiocGenerics)
require(tidyverse)
require(gcdnet)
require(future)
require(future.apply)
seurat <- seuratObj
pseudocell_mat <- expr_mat
aen_results_peaks <- peak_results
gene_list <- gene_list_top250
lambda2 <- 0.5
gamma <- 1
ci_cutoff <- 1.96
pval_cutoff <- NULL
set_nonsig_to_zero <- TRUE
promoter_only <- FALSE
promoter_threshold <- 2000
train_fraction <- 0.8
num_bootstraps <- 100
bootstrap <- TRUE
num_threads <- 16
globals_maxsize <- NULL
verbose <- TRUE
bs_seed <- 943126
peak_assay <- "peaks"
multi_seed <- 6283408
modeled_genes <- names(aen_results_peaks)
if (!is.null(gene_list) && promoter_only) {
if (class(gene_list) == "list") {
gene_list <- unlist(gene_list)
}
modeled_genes <- gene_list
}
peak_tf_key <- seurat@assays$peaks@motifs@data
colnames(peak_tf_key) <- unlist(seurat@assays$peaks@motifs@motif.names)
regulator_tf_names <- colnames(peak_tf_key)
regulator_tf_names <- regulator_tf_names[which(regulator_tf_names %in% colnames(pseudocell_mat))]
peak_tf_key <- peak_tf_key[, regulator_tf_names]
peak_tf_key_tibble <- as_tibble(BiocGenerics::t(peak_tf_key), rownames = "rowname")
# prepare initial TF-gene lists
gene_coords <- Signac:::CollapseToLongestTranscript(Annotation(seurat))
all_peaks <- rownames(GetAssayData(seurat, assay = peak_assay))
prom_matrix <- RENIN:::UpstreamDistanceToTSS(peaks = StringToGRanges(all_peaks),
genes = gene_coords[which(gene_coords$gene_name %in% modeled_genes)],
distance = promoter_threshold)
prom_input_list <- lapply(modeled_genes, function(x) {
peak_col <- prom_matrix[, x]
return(names(peak_col)[which(peak_col > 0)])
})
names(prom_input_list) <- modeled_genes
aen_lists <- lapply(aen_results_peaks, function(x) {
l <- x[[4]]
l <- as.data.frame(l)[which(l[, "coef_if_kept"] != 0), ]
l <- rownames(l)[which(rownames(l) != "(Intercept)")]
})
names(aen_lists) <- modeled_genes
linked_peak_list <- lapply(modeled_genes, function(x) {
union(aen_lists[[x]], prom_input_list[[x]])
})
names(linked_peak_list) <- modeled_genes
tf_aggregates <- lapply(linked_peak_list, peaks_to_tfs, key_tibble = peak_tf_key_tibble)
names(tf_aggregates) <- modeled_genes
tf_aggregates <- tf_aggregates[which(unlist(lapply(tf_aggregates, length)) > 1)]
genes_without_peaks <- modeled_genes[which(!(modeled_genes %in% names(tf_aggregates)))]
if (verbose) print(paste0("Genes without any candidate TFs--most likely no linked peaks or promoter region peaks: ", genes_without_peaks))
# bootstrap setup
num_bootstraps = num_bootstraps + 1 # last seed added for cross validation step
if (!bootstrap) num_bootstraps = 2
if (!(is.null(bs_seed))) set.seed(bs_seed)
bootstrap_sequence_input_lists <- make_bootstrap_sequences(num_bootstraps, gene_list)
# run aen
tf_aggregates <- tf_aggregates[which(unlist(lapply(tf_aggregates, length)) > 1)]
input_list <- lapply(names(tf_aggregates),
function(x) list(x, tf_aggregates[[x]],
bootstrap_sequence_input_lists[[x]]))
names(input_list) <- names(tf_aggregates)
if (num_threads > 1) {
plan(multisession, workers = num_threads)
if (!is.null(globals_maxsize)) options(future.globals.maxSize = globals_maxsize)
}
start <- Sys.time()
if (num_threads == 1) {
aen_results <- lapply(input_list, run_aen_tfs_for_gene,
regulator_names = regulator_tf_names,
pseudocell_matrix = pseudocell_mat,
lambda2 = lambda2,
train_fraction = train_fraction,
gamma = gamma,
ci_cutoff = ci_cutoff,
pval_cutoff = pval_cutoff,
set_nonsig_to_zero = set_nonsig_to_zero)
} else if (!is.null(multi_seed)) {
aen_results <- future_lapply(input_list, run_aen_tfs_for_gene,
regulator_names = regulator_tf_names,
pseudocell_matrix = pseudocell_mat,
lambda2 = lambda2,
train_fraction = train_fraction,
gamma = gamma,
ci_cutoff = ci_cutoff,
pval_cutoff = pval_cutoff,
set_nonsig_to_zero = set_nonsig_to_zero,
future.seed = multi_seed)
} else {
aen_results <- future_lapply(input_list, run_aen_tfs_for_gene,
regulator_names = regulator_tf_names,
pseudocell_matrix = pseudocell_mat,
lambda2 = lambda2,
train_fraction = train_fraction,
gamma = gamma,
ci_cutoff = ci_cutoff,
pval_cutoff = pval_cutoff,
set_nonsig_to_zero = set_nonsig_to_zero)
}
names(aen_results) <- names(input_list)
end <- Sys.time()
if (verbose) print(paste0("AEN completed in ", end - start))
plan(sequential)
tf_results <- aen_results
tf_results <- run_tf_aen(seuratObj, expr_mat, peak_results,
gene_list_top250, lambda2 = 0.5)
regulator_tf_names <- unlist(seuratObj@assays$peaks@motifs@motif.names)
regulator_tf_names <- regulator_tf_names[which(regulator_tf_names %in% rownames(GetAssayData(seuratObj, assay = "SCT")))]
19j. Rank TFs by predicted regulatory score
TF regulatory score: sum the estimated regulatory coefficients across all modeled genes, then weight by expression.
tf_df <- rank_tfs(tf_results, seuratObj, non_genes, regulator_tf_names, num_cores = 1)
svglite(paste0("RENIN_plot_tf_rankings_", Sys.Date(),".svg"))
plot_tf_rankings(tf_df)
dev.off()
centrality_rankings <- rank_tfs_by_centrality(tf_results, seuratObj)
fwrite(data.table(TF=names(head(centrality_rankings[[1]], n = 10)),
Betweeness=head(centrality_rankings[[1]], n = 10)),
paste0("RENIN_RENIN-v-NON_Betweenness.csv"), sep=",")
fwrite(data.table(TF=names(head(centrality_rankings[[2]], n = 10)),
Betweeness=head(centrality_rankings[[2]], n = 10)),
paste0("RENIN_RENIN-v-NON_PageRank.csv"), sep=",")
19k. Analyse individual cell population comparisons
Create custom function to generate plots of cell-vs-cell comparisons.
plotRENIN <- function(seurat,
expr_mat=NULL,
peak_mat=NULL,
ident.1=NULL,
ident.1.name=NULL,
ident.2=NULL,
ident.2.name=NULL,
group.by=NULL,
max.genes=250,
lambda2 = 0.5,
max.distance = 5e+05,
num.bootstraps = 250) {
de.genes <- prepare_degs(
seurat,
ident.1 = ident.1,
ident.2 = ident.2,
group.by = group.by
)
gene_list <- rownames(de.genes)
if (length(gene_list) >= max.genes) {
gene_list <- sort(gene_list[1:max.genes])
} else {
gene_list <- sort(gene_list)
}
# function requires this call! It's in `prepare_degs` but NOT in `run_peak_aen`
DefaultAssay(seurat) <- "peaks"
# Now run the function after setting DefaultAssay
peak_results <- run_peak_aen(seurat,
expr_mat,
peak_mat,
gene_list,
lambda2 = lambda2,
max_distance = max.distance,
num_bootstraps = num.bootstraps)
aen_lists <- make_aen_lists(peak_results)
non_genes <- rownames(de.genes)[which(de.genes$avg_log2FC < 0)]
all_cres <- unique(unlist(aen_lists))
cre_scores <- lapply(peak_results, function(x) x[[4]][union(1, which(x[[4]][, "coef_if_kept"] != 0)), "coef_if_kept"] * ifelse(x[[1]] %in% non_genes, -1, 1))
cre_scores <- cre_scores[which(unlist(lapply(cre_scores, length)) > 1)]
cre_total_scores <- bind_rows(cre_scores)
cre_total_scores[is.na(cre_total_scores)] <- 0
cre_total_scores <- cre_total_scores[,-1]
cre_total_scores_sums <- colSums(cre_total_scores)
cre_num_genes <- apply(cre_total_scores, 2, function(x) length(which(x != 0))) # how many genes each CRE targets
non_cres <- names(cre_total_scores_sums)[which(cre_total_scores_sums < 0)]
renin_cres <- names(cre_total_scores_sums)[which(cre_total_scores_sums > 0)]
## GC.percent not present in meta.features. Run RegionStats to compute GC.percent for each feature
seurat[["peaks"]] <- Signac::RegionStats(
object = seurat[["peaks"]],
genome = BSgenome.Mmusculus.UCSC.mm10
)
## no motifs slot in this object...
### Use same cisbp PWM as I use in ArchR
#### See https://rdrr.io/github/timoast/signac/man/AddMotifs.html
seurat[["peaks"]] <- Signac::AddMotifs(
object = seurat[["peaks"]],
genome = BSgenome.Mmusculus.UCSC.mm10,
pfm = mouse_pwms_v2,
verbose = TRUE
)
svglite(paste0("RENIN_", ident.1.name, "-v-", ident.2.name, "_motif_enrichment.svg"))
print(plot_motif_enrichment(seurat, non_cres, num_top_label = 5))
dev.off()
tf_results <- run_tf_aen(seurat, expr_mat, peak_results,
gene_list, lambda2 = 0.5)
regulator_tf_names <- unlist(seurat@assays$peaks@motifs@motif.names)
regulator_tf_names <- regulator_tf_names[which(regulator_tf_names %in% rownames(GetAssayData(seurat, assay = "SCT")))]
tf_df <- rank_tfs(tf_results, seurat, non_genes, regulator_tf_names, num_cores = 1)
svglite(paste0("RENIN_", ident.1.name, "-v-", ident.2.name, "_plot_tf_rankings.svg"))
print(plot_tf_rankings(tf_df))
dev.off()
centrality_rankings <- rank_tfs_by_centrality(tf_results, seurat)
fwrite(data.table(TF=names(head(centrality_rankings[[1]], n = 10)),
Betweeness=head(centrality_rankings[[1]], n = 10)),
paste0("RENIN_", ident.1.name, "-v-", ident.2.name, "_Betweenness.csv"), sep=",")
fwrite(data.table(TF=names(head(centrality_rankings[[2]], n = 10)),
Betweeness=head(centrality_rankings[[2]], n = 10)),
paste0("RENIN_", ident.1.name, "-v-", ident.2.name, "_PageRank.csv"), sep=",")
}
seurat_subset <- subset(seuratObj, ManualClustersJG %in% c("early_JGs", "late_JGs", "early_VSMCs", "late_VSMCs"))
plotRENIN(seurat=seurat_subset,
expr_mat=expr_mat,
peak_mat=peak_mat,
ident.1 = c("early_JGs", "late_JGs"),
ident.1.name="JGs",
ident.2 = c("early_VSMCs", "late_VSMCs"),
ident.2.name="VSMCs",
group.by="ManualClustersJG")
seurat_subset <- subset(seuratObj, ManualClustersJG %in% c("late_JGs", "late_VSMCs"))
plotRENIN(seurat=seurat_subset,
expr_mat=expr_mat,
peak_mat=peak_mat,
ident.1 = "late_JGs",
ident.1.name="late_JGs",
ident.2 = "late_VSMCs",
ident.2.name="late_VSMCs",
group.by="ManualClustersJG")
seurat_subset <- subset(seuratObj, ManualClustersJG %in% c("early_JGs", "early_VSMCs"))
plotRENIN(seurat=seurat_subset,
expr_mat=expr_mat,
peak_mat=peak_mat,
ident.1 = "early_JGs",
ident.1.name="early_JGs",
ident.2 = "early_VSMCs",
ident.2.name="early_VSMCs",
group.by="ManualClustersJG")
for (i in 1:(length(trajectory2)-1)) {
fg_group <- trajectory2[i]
bg_group <- trajectory2[i+1]
if(!file.exists(paste0("RENIN_", fg_group, "-v-", bg_group, "_PageRank.csv"))) {
plotRENIN(seurat=seurat_subset,
expr_mat=expr_mat,
peak_mat=peak_mat,
ident.1 = fg_group,
ident.1.name=fg_group,
ident.2 = bg_group,
ident.2.name=bg_group,
group.by="ManualClustersJG")
}
}
save.image(paste0("RENIN_analysis_", Sys.Date(), ".RData"))