RENIN

Regulatory Network Inference with single cell multiomic data RENIN generates parametric gene regulatory networks, predicting both cis-regulatory elements and transcription factors.

You can install RENIN with devtools:

library(devtools)
remotes::install_github('satijalab/seurat-wrappers')
devtools::install_github("nledru/RENIN")
renv::install("bioc::harmony")

19a: Load additional libraries

library(ArchR)
library(Seurat)
library(Signac)
library(SeuratWrappers)
library(RENIN)
library(harmony)
library(stringr)
library(EnsDb.Mmusculus.v79) # Ensembl database to convert to mouse mm10. 
library(SingleCellExperiment)
library(chromVARmotifs)
library(universalmotif)
library(dplyr)
library(tidyr)
library(data.table)

19b. Grab the integrated ArchR object but in seurat object form

Install ArchRtoSignac

renv::install("bioc::biovizBase")
devtools::install_github("swaruplabUCI/ArchRtoSignac")
library(ArchRtoSignac)

19c. Generate a combined fragments file folder

cd $PROJECT_DIR
mkdir cell_ranger_fragments
cd cell_ranger_fragments/

cp ${PROJECT_DIR}/E12_scATAC-seq/outs/fragments.tsv.gz E12_fragments.tsv.gz
cp ${PROJECT_DIR}/E12_scATAC-seq/outs/fragments.tsv.gz.tbi E12_fragments.tsv.gz.tbi
cp ${PROJECT_DIR}/E18_scATAC-seq/outs/fragments.tsv.gz E18_fragments.tsv.gz
cp ${PROJECT_DIR}/E18_scATAC-seq/outs/fragments.tsv.gz.tbi E18_fragments.tsv.gz.tbi
cp ${PROJECT_DIR}/P5_scATAC-seq/outs/fragments.tsv.gz P5_fragments.tsv.gz
cp ${PROJECT_DIR}/P5_scATAC-seq/outs/fragments.tsv.gz.tbi P5_fragments.tsv.gz.tbi
cp ${PROJECT_DIR}/P30_scATAC-seq/outs/fragments.tsv.gz P30_fragments.tsv.gz
cp ${PROJECT_DIR}/P30_scATAC-seq/outs/fragments.tsv.gz.tbi P30_fragments.tsv.gz.tbi

19d. Create updated Seurat object from fragment files

pkm <- getPeakMatrix(proj8) 
annotations <- getAnnotation(reference = EnsDb.Mmusculus.v79, refversion = "mm10") # "UCSC" is the default style to change to but can be changed with argument seqStyle

fragments_dir <- "${PROJECT_DIR}/cell_ranger_fragments/"

seurat_atac <- ArchR2Signac(
  ArchRProject = proj8,
  refversion = "mm10",
  fragments_dir = fragments_dir,
  pm = pkm, # peak matrix from getPeakMatrix()
  fragments_fromcellranger = "N",
  fragments_file_extension = "_fragments.tsv.gz", # File_Extension for fragments files
  annotation = annotations # annotation from getAnnotation()
)

Need to custom modify this RENIN's functions to match the data

getGeneScoreMatrix <- function(
  ArchRProject,
  SeuratObject
){
  print("In Progress:")
  print("Get Gene Score Matrix From ArchRProject")
  GeneScore_matrix <- ArchR::getMatrixFromProject(ArchRProject, useMatrix='GeneScoreMatrix_no_ribosomal')
  gsm <- assays(GeneScore_matrix)$GeneScoreMatrix # peaks sparse martix
  print("get Gene Features From ArchRProject")
  GeneFeatures <-getFeatures(
    ArchRProj = ArchRProject,
    useMatrix = "GeneScoreMatrix_no_ribosomal",
    select = NULL,
    ignoreCase = TRUE
  )
  # set the column names for gsm
  colnames(gsm) <- gsub("#", "_", colnames(gsm))
  ix <- match(colnames(SeuratObject), colnames(gsm))
  gsm <- gsm[,ix]

  print("Saving Gene Features From ArchRProject into Gene Score Matrix")
  rownames(gsm) <- GeneFeatures

  print("Return Gene Score Matrix")
  gsm
}

gsm <- getGeneScoreMatrix(ArchRProject = proj8, SeuratObject = seurat_atac)
seurat_atac[['RNA']] <- CreateAssayObject(counts = gsm)

Need to customize RENIN's addDimRed() function as well to match the data

addDimRed <- function(
  ArchRProject,
  SeuratObject
){
  print("In Progress:")
  print("add UMAP From ArchRProject to SeuratObject")
  umap_df <- ArchRProject@embeddings$UMAP_LSI_Harmony_subset$df %>% as.matrix
  rownames(umap_df) <- colnames(SeuratObject) # make the rowname the same format as seurat
  colnames(umap_df) <- c('UMAP_1', 'UMAP_2')

  SeuratObject@reductions$umap <- Seurat::CreateDimReducObject(
      embeddings=umap_df,
      assay="peaks"
  )

  print("In Progress:")
  print("add reduction From ArchRProject to SeuratObject")

  harmony_matrix <- ArchRProject@reducedDims$LSI_Harmony_subset$matDR
  rownames(harmony_matrix) <- colnames(SeuratObject)
  colnames(harmony_matrix) <- paste0('LSI_', 1:ncol(harmony_matrix))

  SeuratObject@reductions$harmony <- Seurat::CreateDimReducObject(
      embeddings=harmony_matrix,
      assay="peaks"
  )

  print("Return SeuratObject")
  SeuratObject

}

seurat_atac <- addDimRed(
  ArchRProject = proj8,
  SeuratObject = seurat_atac
)

19e. Identify the Ren1 expressing cells

gim <- getMatrixFromProject(proj8, "GeneIntegrationMatrix_LSI_Harmony")
gim_impute <- imputeMatrix(assay(gim), getImputeWeights(proj8))

gsm2 <- getMatrixFromProject(proj8, "GeneScoreMatrix_no_ribosomal")
gsm_impute <- imputeMatrix(assay(gsm2), getImputeWeights(proj8))

# Plot gene expression
ren1_gim <- gim_impute[grep('Ren1', rowData(gim)$name),]
ren1_gim_dt <- as.data.table(ren1_gim)
cellnames <- names(ren1_gim)
ren1_gim_dt[, cellname := cellnames]
ren1_gim_dt[, c("timepoint", "cellID") := tstrsplit(cellname, "#", fixed=TRUE)]
ren1_gim_dt <- merge(ren1_gim_dt, all_cells, by=c("cellname", "cellID", "timepoint"))
ren1_gim_dt$timepoint <- factor(ren1_gim_dt$timepoint,
                                levels = c("E12", "E18", "P5", "P30"))

# Identify clusters with *Ren1* expression greater than and less than the median
ren1_gim_cluster <- ren1_gim_dt[ ,list(mean=mean(ren1_gim)), by=cluster]
ren1_expressing_cells <- ren1_gim_cluster[ren1_gim_cluster$mean > quantile(ren1_gim_dt$ren1_gim, 0.5)[[1]],]$cluster
non_ren1_expressing_cells <- ren1_gim_cluster[ren1_gim_cluster$mean <= quantile(ren1_gim_dt$ren1_gim, 0.5)[[1]],]$cluster

p <- ggplot(ren1_gim_dt, aes(x=cluster, y=ren1_gim, col=cluster)) +
    geom_boxplot() +
    scale_x_discrete("cluster", drop=FALSE) +
    labs(
        x = "Cell Population",
        y = "Ren1 Log2 (NormCounts + 1)"
      ) +
    custom_theme_ArchR() +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
svglite(paste0("Ren1_GIM_expression-by-cluster_boxplot_", Sys.Date(), ".svg"))
print(p)
dev.off()

# Plot gene activity score
ren1_gsm <- gsm_impute[grep('Ren1', rowData(gsm2)$name),]
ren1_gsm_dt <- as.data.table(ren1_gsm)
cellnames <- names(ren1_gsm)
ren1_gsm_dt[, cellname := cellnames]
ren1_gsm_dt[, c("timepoint", "cellID") := tstrsplit(cellname, "#", fixed=TRUE)]
ren1_gsm_dt <- merge(ren1_gsm_dt, all_cells, by=c("cellname", "cellID", "timepoint"))
ren1_gsm_dt$timepoint <- factor(ren1_gsm_dt$timepoint,
                                levels = c("E12", "E18", "P5", "P30"))

# Identify clusters with *Ren1* gene activity greater than and less than the median
ren1_gsm_cluster <- ren1_gsm_dt[ ,list(mean=mean(ren1_gsm)), by=cluster]
ren1_activity_cells <- ren1_gsm_cluster[ren1_gsm_cluster$mean > quantile(ren1_gsm_dt$ren1_gsm, 0.5)[[1]],]$cluster
non_ren1_activity_cells <- ren1_gsm_cluster[ren1_gsm_cluster$mean <= quantile(ren1_gsm_dt$ren1_gsm, 0.5)[[1]],]$cluster

p2 <- ggplot(ren1_gsm_dt, aes(x=cluster, y=ren1_gsm, col=cluster)) +
    geom_boxplot() +
    scale_x_discrete("cluster", drop=FALSE) +
    labs(
        x = "Mouse kidney development day",
        y = "Ren1 NormCounts"
      ) +
    custom_theme_ArchR() +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
svglite(paste0("Ren1_GSM_expression-by-cluster_boxplot_", Sys.Date(), ".svg"))
print(p2)
dev.off()


# Match cells present in both gene expression and activity approaches
toMatch <- intersect(ren1_expressing_cells, ren1_activity_cells)

renin_positions     <- grep(paste(toMatch,collapse="|"),
    seurat_atac@meta.data$ManualClustersJG)
non_renin_positions <- which(!grepl(paste(toMatch,collapse="|"),
    seurat_atac@meta.data$ManualClustersJG))

# Define datasets with the variable "RENIN".
seurat_atac@meta.data$renin <- "RENIN"
seurat_atac@meta.data$renin[non_renin_positions] <- "NON"

# Assay.use must be SCT, not RNA
merged_SCT <- SCTransform(object = seurat_atac, verbose = FALSE,
                          vars.to.regress = c("nCount_RNA"))
merged_SCT <- RunPCA(merged_SCT, pc.genes = merged_SCT@var.genes, npcs = 20,
                     verbose = FALSE)

seuratObj <- RunHarmony(
    merged_SCT, "renin", assay.use="SCT", reduction.save = "harmony")
# Need to runHarmony on the ATAC data too
seuratObj <- SCTransform(object = seuratObj, verbose = FALSE,
                          vars.to.regress = c("nCount_peaks"))
seuratObj <- RunPCA(seuratObj, pc.genes = seuratObj@var.peaks, npcs = 20,
                     verbose = FALSE)                          
seuratObj <- RunHarmony(
    seuratObj, "renin", assay.use="SCT", reduction.save = "harmony_peaks")

# Must re-run FindNeighbors before RunUMAP
# Generate Harmony derived peaks
seuratObj <- seuratObj %>% 
    RunUMAP(reduction = "harmony", dims = 1:20) %>% 
    FindNeighbors(reduction = "harmony", dims = 1:20) %>% 
    FindClusters(resolution = 0.5) %>% 
    identity()
seuratObj <- FindVariableFeatures(
    seuratObj, selection.method = "vst", nfeatures = 3000)

19f. Perform RENIN analysis

seuratObj <- FindNeighbors(seuratObj, reduction = "harmony", dims = 1:20)
seuratObj <- FindClusters(seuratObj, resolution = 0.5)
seuratObj <- FindMultiModalNeighbors(seuratObj,
                                     reduction.list = list("harmony",
                                                           "harmony_peaks"),
                                     k.nn = 5, dims.list = list(1:20, 1:20))
seuratObj <- FindClusters(seuratObj, resolution = 0.5)
# Must runUMAP on both RNA and ATAC data
seuratObj <- RunUMAP(
    seuratObj, reduction = "harmony", dims = 1:20, reduction.name = "UMAP")
seuratObj <- RunUMAP(
    seuratObj, reduction = "harmony_peaks", dims = 1:20,
    reduction.name = "UMAP_peaks")

# Change naming to match `monocle3` expectations
seuratObj[["HARMONY"]] <- seuratObj[["harmony"]]
seuratObj[["HARMONY_PEAKS"]] <- seuratObj[["harmony_peaks"]]

# This is essentially the first step of the `prepare_pseudocell_matrix` fcn
seuratObj <- FindMultiModalNeighbors(seuratObj,
                                     reduction.list = list("harmony",
                                                           "harmony_peaks"),
                                     k.nn = 5, dims.list = list(1:20, 1:20))
# MUST name this WNN.UMAP for `prepare_pseudocell_matrix` fcn
seuratObj <- RunUMAP(
    seuratObj, reduction = "harmony", dims = 1:20, reduction.name = "WNN.UMAP")
seuratObj <- RunUMAP(
    seuratObj, reduction = "harmony_peaks", dims = 1:20,
    reduction.name = "WNN.UMAP_peaks")

# This is the second step of the `prepare_pseudocell_matrix` fcn (confirming the naming above works)
seurat.cds <- as.cell_data_set(seuratObj, assay = "SCT")
# Need to manually include this cluster_cells step before running `prepare_pseudocell_matrix` fcn
seurat.cds <- monocle3::cluster_cells(seurat.cds)
res        <- reducedDim(seurat.cds, type = "WNN.UMAP", withDimnames = TRUE)

19g. Must overwrite default prepare_pseudocell_matrix fcn to use dplyr::select

Need to use dplyr::select for proper functionality

prepare_pseudocell_matrix <- function(seurat,
                                      assay,
                                      slot = "data",
                                      cells_per_partition = 100,
                                      find_neighbors = FALSE,
                                      reduction1 = "harmony",
                                      reduction2 = "harmony_peaks",
                                      dim_list = list(1:50, 1:50),
                                      k.nn = 5,
                                      seed = 489284) {
    require(Seurat)
    require(Signac)
    require(tidyverse)
    require(Matrix)

    if (!is.null(seed)) set.seed(seed)
    pools <- apply_multi_micro_clustering(seurat,
                                          cells_per_partition,
                                          find_neighbors,
                                          reduction1,
                                          reduction2,
                                          dim_list,
                                          k.nn)
    pool_column <- vector("numeric", dim(seurat)[2])
    names(pool_column) <- colnames(seurat)
    for (i in 1:length(pools)) {
        pool_column[pools[[i]]] <- rep(i, length(pools[[i]]))
    }
    seurat <- AddMetaData(seurat, metadata = as.data.frame(pool_column))

    pseudocell_mats <- vector("list", length = length(assay))
    names(pseudocell_mats) <- assay
    for (a in assay) {
        mat <- GetAssayData(seurat, assay = a, slot = slot)
        pseudocell_mat <- as_tibble(t(sapply(pools, 
                                             function(x) {
                                                    if (length(x) > 1) {
                                                        Matrix::rowSums(mat[, x]) / length(x)
                                                    } else {
                                                        mat[, x]
                                                    }
                                                },
                                             simplify = TRUE)),
                                    rownames = "pseudocell_IDs")
        pseudocell_mat <- pseudocell_mat %>% dplyr::select(-pseudocell_IDs) %>% as.data.frame
        pseudocell_mats[[a]] <- pseudocell_mat
    }

    if (length(assay) == 1) return(pseudocell_mat)

    return(pseudocell_mats)
}

mats <- prepare_pseudocell_matrix(seuratObj,
                                  assay = c("peaks", "SCT"),
                                  reduction1 = "harmony",
                                  reduction2 = "harmony_peaks",
                                  cells_per_partition = 50,
                                  dim_list = list(1:20, 1:20))

expr_mat <- mats[["SCT"]]
peak_mat <- mats[["peaks"]]

Model the genes that are differentially expressed. - predict CREs using the full dataset containing all cell types - For the adaptive elastic net step, lambda2 = 0.5 and num_bootstraps = 100 are good defaults to achieve good model performance and relatively short runtime.

gene_list <- rownames(prepare_degs(seuratObj,
                                    ident.1 = "RENIN", ident.2 = "NON",
                                    group.by="renin"))
gene_list_top250 <- sort(gene_list[1:250])

Run run_peak_aen step by step to override errors in base code

DefaultAssay(seuratObj) <- "peaks" # function requires this call! It's in `prepare_degs` but NOT in `run_peak_aen`
# Now run the function after setting DefaultAssay
peak_results <- run_peak_aen(seuratObj, expr_mat, peak_mat,
                             gene_list_top250, lambda2 = 0.5,
                             max_distance = 5e+05, num_bootstraps = 100)
aen_lists <- make_aen_lists(peak_results)

19h. Look for motif enrichment in the predicted CREs.

# de.genes object == `prepare_degs` output
de.genes <- prepare_degs(seuratObj,
                         ident.1 = "RENIN", ident.2 = "NON",
                         group.by="renin")

non_genes  <- rownames(de.genes)[which(de.genes$avg_log2FC < 0)]
all_cres   <- unique(unlist(aen_lists))
cre_scores <- lapply(peak_results,
    function(x) x[[4]][union(1, which(x[[4]][, "coef_if_kept"] != 0)),
    "coef_if_kept"] * ifelse(x[[1]] %in% non_genes, -1, 1))

cre_scores       <- cre_scores[which(unlist(lapply(cre_scores, length)) > 1)]
cre_total_scores <- bind_rows(cre_scores)
cre_total_scores[is.na(cre_total_scores)] <- 0
cre_total_scores      <- cre_total_scores[,-1]
cre_total_scores_sums <- colSums(cre_total_scores)
cre_num_genes <- apply(cre_total_scores, 2, function(x) length(which(x != 0))) # how many genes each CRE targets
non_cres   <- names(cre_total_scores_sums)[which(cre_total_scores_sums < 0)]
renin_cres <- names(cre_total_scores_sums)[which(cre_total_scores_sums > 0)]

## GC.percent not present in meta.features. Run RegionStats to compute GC.percent for each feature
seuratObj[["peaks"]] <- Signac::RegionStats(
  object = seuratObj[["peaks"]],
  genome = BSgenome.Mmusculus.UCSC.mm10
)

# Use same cisbp PWM as I use in ArchR
## Generate the PWM used from chromVAR
data("mouse_pwms_v1")
sout <- sapply(strsplit(names(mouse_pwms_v1), split = "_"), function(s) c(s[3]))
mouse_pwms_v2 <- mouse_pwms_v1[match(unique(sout), sout)]

#### See https://rdrr.io/github/timoast/signac/man/AddMotifs.html
seuratObj[["peaks"]] <- Signac::AddMotifs(
    object = seuratObj[["peaks"]],
    genome = BSgenome.Mmusculus.UCSC.mm10,
    pfm = mouse_pwms_v2,
    verbose = TRUE
)
svglite(paste0("RENIN_motif_enrichment_", Sys.Date(), ".svg"))
plot_motif_enrichment(seuratObj, non_cres, num_top_label = 5)
dev.off()

19i. Identify TFs with motifs within the linked CREs

run_tf_aen also needs the dplyer::select call added to the peaks_to_tfs fcn

peaks_to_tfs <- function(peak_list, key_tibble) {
    require(tidyverse)

    if (length(peak_list) == 1 && peak_list == "") {
        return("")
    }
    subset_tibble <- key_tibble[, c("rowname", peak_list)]
    subset <- subset_tibble %>% dplyr::select(-rowname) %>% rowSums
    subset <- which(subset > 0)

    return(subset_tibble$rowname[subset])
}

Run step by step through run_tf_aen to avoid errors

require(Seurat)
require(Signac)
require(SeuratWrappers)
require(BiocGenerics)
require(tidyverse)
require(gcdnet)
require(future)
require(future.apply)

seurat            <- seuratObj
pseudocell_mat    <- expr_mat
aen_results_peaks <- peak_results
gene_list         <- gene_list_top250
lambda2 <- 0.5
gamma <- 1
ci_cutoff <- 1.96
pval_cutoff <- NULL
set_nonsig_to_zero <- TRUE
promoter_only <- FALSE
promoter_threshold <- 2000
train_fraction <- 0.8
num_bootstraps <- 100
bootstrap <- TRUE
num_threads <- 16
globals_maxsize <- NULL
verbose <- TRUE
bs_seed <- 943126
peak_assay <- "peaks"
multi_seed <- 6283408

modeled_genes <- names(aen_results_peaks)
if (!is.null(gene_list) && promoter_only) {
    if (class(gene_list) == "list") {
        gene_list <- unlist(gene_list)
    }       
    modeled_genes <- gene_list
}

peak_tf_key <- seurat@assays$peaks@motifs@data
colnames(peak_tf_key) <- unlist(seurat@assays$peaks@motifs@motif.names)

regulator_tf_names <- colnames(peak_tf_key)
regulator_tf_names <- regulator_tf_names[which(regulator_tf_names %in% colnames(pseudocell_mat))]

peak_tf_key <- peak_tf_key[, regulator_tf_names]
peak_tf_key_tibble <- as_tibble(BiocGenerics::t(peak_tf_key), rownames = "rowname")

# prepare initial TF-gene lists
gene_coords <- Signac:::CollapseToLongestTranscript(Annotation(seurat))
all_peaks <- rownames(GetAssayData(seurat, assay = peak_assay))
prom_matrix <- RENIN:::UpstreamDistanceToTSS(peaks = StringToGRanges(all_peaks),
                                      genes = gene_coords[which(gene_coords$gene_name %in% modeled_genes)],
                                      distance = promoter_threshold)

prom_input_list <- lapply(modeled_genes, function(x) {
                        peak_col <- prom_matrix[, x]
                        return(names(peak_col)[which(peak_col > 0)])
                    })
names(prom_input_list) <- modeled_genes

aen_lists <- lapply(aen_results_peaks, function(x) {
        l <- x[[4]]
        l <- as.data.frame(l)[which(l[, "coef_if_kept"] != 0), ]
        l <- rownames(l)[which(rownames(l) != "(Intercept)")]
    })
names(aen_lists) <- modeled_genes
linked_peak_list <- lapply(modeled_genes, function(x) {
                        union(aen_lists[[x]], prom_input_list[[x]])
                    })
names(linked_peak_list) <- modeled_genes

tf_aggregates <- lapply(linked_peak_list, peaks_to_tfs, key_tibble = peak_tf_key_tibble)
names(tf_aggregates) <- modeled_genes
tf_aggregates <- tf_aggregates[which(unlist(lapply(tf_aggregates, length)) > 1)]
genes_without_peaks <- modeled_genes[which(!(modeled_genes %in% names(tf_aggregates)))]
if (verbose) print(paste0("Genes without any candidate TFs--most likely no linked peaks or promoter region peaks: ", genes_without_peaks))

# bootstrap setup
num_bootstraps = num_bootstraps + 1 # last seed added for cross validation step
if (!bootstrap) num_bootstraps = 2
if (!(is.null(bs_seed))) set.seed(bs_seed)
bootstrap_sequence_input_lists <- make_bootstrap_sequences(num_bootstraps, gene_list)

# run aen
tf_aggregates <- tf_aggregates[which(unlist(lapply(tf_aggregates, length)) > 1)] 
input_list    <- lapply(names(tf_aggregates),
    function(x) list(x, tf_aggregates[[x]],
                     bootstrap_sequence_input_lists[[x]]))
names(input_list) <- names(tf_aggregates)

if (num_threads > 1) {
    plan(multisession, workers = num_threads)
    if (!is.null(globals_maxsize)) options(future.globals.maxSize = globals_maxsize)
}
start <- Sys.time()
if (num_threads == 1) {
    aen_results <- lapply(input_list, run_aen_tfs_for_gene,
                                 regulator_names = regulator_tf_names,
                                 pseudocell_matrix = pseudocell_mat, 
                                 lambda2 = lambda2,
                                 train_fraction = train_fraction,
                                 gamma = gamma,
                                 ci_cutoff = ci_cutoff,
                                 pval_cutoff = pval_cutoff,
                                 set_nonsig_to_zero = set_nonsig_to_zero)
} else if (!is.null(multi_seed)) {
    aen_results <- future_lapply(input_list, run_aen_tfs_for_gene,
                                 regulator_names = regulator_tf_names,
                                 pseudocell_matrix = pseudocell_mat, 
                                 lambda2 = lambda2,
                                 train_fraction = train_fraction,
                                 gamma = gamma,
                                 ci_cutoff = ci_cutoff,
                                 pval_cutoff = pval_cutoff,
                                 set_nonsig_to_zero = set_nonsig_to_zero,
                                 future.seed = multi_seed)
} else {
    aen_results <- future_lapply(input_list, run_aen_tfs_for_gene,
                                 regulator_names = regulator_tf_names,
                                 pseudocell_matrix = pseudocell_mat, 
                                 lambda2 = lambda2,
                                 train_fraction = train_fraction,
                                 gamma = gamma,
                                 ci_cutoff = ci_cutoff,
                                 pval_cutoff = pval_cutoff,
                                 set_nonsig_to_zero = set_nonsig_to_zero)
}
names(aen_results) <- names(input_list)
end <- Sys.time()
if (verbose) print(paste0("AEN completed in ", end - start))
plan(sequential)
tf_results <- aen_results

tf_results <- run_tf_aen(seuratObj, expr_mat, peak_results,
                         gene_list_top250, lambda2 = 0.5)

regulator_tf_names <- unlist(seuratObj@assays$peaks@motifs@motif.names)
regulator_tf_names <- regulator_tf_names[which(regulator_tf_names %in% rownames(GetAssayData(seuratObj, assay = "SCT")))]

19j. Rank TFs by predicted regulatory score

TF regulatory score: sum the estimated regulatory coefficients across all modeled genes, then weight by expression.

tf_df <- rank_tfs(tf_results, seuratObj, non_genes, regulator_tf_names, num_cores = 1)
svglite(paste0("RENIN_plot_tf_rankings_", Sys.Date(),".svg"))
plot_tf_rankings(tf_df)
dev.off()

centrality_rankings <- rank_tfs_by_centrality(tf_results, seuratObj)
fwrite(data.table(TF=names(head(centrality_rankings[[1]], n = 10)),
                      Betweeness=head(centrality_rankings[[1]], n = 10)),
           paste0("RENIN_RENIN-v-NON_Betweenness.csv"), sep=",")
fwrite(data.table(TF=names(head(centrality_rankings[[2]], n = 10)),
                  Betweeness=head(centrality_rankings[[2]], n = 10)),
       paste0("RENIN_RENIN-v-NON_PageRank.csv"), sep=",")

19k. Analyse individual cell population comparisons

Create custom function to generate plots of cell-vs-cell comparisons.

plotRENIN <- function(seurat,
                      expr_mat=NULL,
                      peak_mat=NULL,
                      ident.1=NULL,
                      ident.1.name=NULL,
                      ident.2=NULL,
                      ident.2.name=NULL,
                      group.by=NULL,
                      max.genes=250,
                      lambda2 = 0.5,
                      max.distance = 5e+05,
                      num.bootstraps = 250) {

    de.genes <- prepare_degs(
        seurat,
        ident.1 = ident.1,
        ident.2 = ident.2,
        group.by = group.by
    )

    gene_list <- rownames(de.genes)
    if (length(gene_list) >= max.genes) {
        gene_list <- sort(gene_list[1:max.genes])
    } else {
        gene_list <- sort(gene_list)
    }
    # function requires this call! It's in `prepare_degs` but NOT in `run_peak_aen`
    DefaultAssay(seurat) <- "peaks" 
    # Now run the function after setting DefaultAssay
    peak_results <- run_peak_aen(seurat,
                                 expr_mat,
                                 peak_mat,
                                 gene_list,
                                 lambda2 = lambda2,
                                 max_distance = max.distance,
                                 num_bootstraps = num.bootstraps)
    aen_lists <- make_aen_lists(peak_results)

    non_genes <- rownames(de.genes)[which(de.genes$avg_log2FC < 0)]
    all_cres <- unique(unlist(aen_lists))
    cre_scores <- lapply(peak_results, function(x) x[[4]][union(1, which(x[[4]][, "coef_if_kept"] != 0)), "coef_if_kept"] * ifelse(x[[1]] %in% non_genes, -1, 1))
    cre_scores <- cre_scores[which(unlist(lapply(cre_scores, length)) > 1)]
    cre_total_scores <- bind_rows(cre_scores)
    cre_total_scores[is.na(cre_total_scores)] <- 0
    cre_total_scores <- cre_total_scores[,-1]
    cre_total_scores_sums <- colSums(cre_total_scores)
    cre_num_genes <- apply(cre_total_scores, 2, function(x) length(which(x != 0))) # how many genes each CRE targets
    non_cres <- names(cre_total_scores_sums)[which(cre_total_scores_sums < 0)]
    renin_cres <- names(cre_total_scores_sums)[which(cre_total_scores_sums > 0)]

    ## GC.percent not present in meta.features. Run RegionStats to compute GC.percent for each feature
    seurat[["peaks"]] <- Signac::RegionStats(
      object = seurat[["peaks"]],
      genome = BSgenome.Mmusculus.UCSC.mm10
    )

    ## no motifs slot in this object...
    ### Use same cisbp PWM as I use in ArchR
    #### See https://rdrr.io/github/timoast/signac/man/AddMotifs.html
    seurat[["peaks"]] <- Signac::AddMotifs(
        object = seurat[["peaks"]],
        genome = BSgenome.Mmusculus.UCSC.mm10,
        pfm = mouse_pwms_v2,
        verbose = TRUE
    )
    svglite(paste0("RENIN_", ident.1.name, "-v-", ident.2.name, "_motif_enrichment.svg"))
    print(plot_motif_enrichment(seurat, non_cres, num_top_label = 5))
    dev.off()

    tf_results <- run_tf_aen(seurat, expr_mat, peak_results,
                                     gene_list, lambda2 = 0.5)
    regulator_tf_names <- unlist(seurat@assays$peaks@motifs@motif.names)
    regulator_tf_names <- regulator_tf_names[which(regulator_tf_names %in% rownames(GetAssayData(seurat, assay = "SCT")))]

    tf_df <- rank_tfs(tf_results, seurat, non_genes, regulator_tf_names, num_cores = 1)
    svglite(paste0("RENIN_", ident.1.name, "-v-", ident.2.name, "_plot_tf_rankings.svg"))
    print(plot_tf_rankings(tf_df))
    dev.off()

    centrality_rankings <- rank_tfs_by_centrality(tf_results, seurat)
    fwrite(data.table(TF=names(head(centrality_rankings[[1]], n = 10)),
                      Betweeness=head(centrality_rankings[[1]], n = 10)),
           paste0("RENIN_", ident.1.name, "-v-", ident.2.name, "_Betweenness.csv"), sep=",")
    fwrite(data.table(TF=names(head(centrality_rankings[[2]], n = 10)),
                      Betweeness=head(centrality_rankings[[2]], n = 10)),
           paste0("RENIN_", ident.1.name, "-v-", ident.2.name, "_PageRank.csv"), sep=",")
}

seurat_subset <- subset(seuratObj, ManualClustersJG %in% c("early_JGs", "late_JGs", "early_VSMCs", "late_VSMCs"))

plotRENIN(seurat=seurat_subset,
          expr_mat=expr_mat,
          peak_mat=peak_mat,
          ident.1 = c("early_JGs", "late_JGs"),
          ident.1.name="JGs",
          ident.2 = c("early_VSMCs", "late_VSMCs"),
          ident.2.name="VSMCs",
          group.by="ManualClustersJG")


seurat_subset <- subset(seuratObj, ManualClustersJG %in% c("late_JGs", "late_VSMCs"))
plotRENIN(seurat=seurat_subset,
          expr_mat=expr_mat,
          peak_mat=peak_mat,
          ident.1 = "late_JGs",
          ident.1.name="late_JGs",
          ident.2 = "late_VSMCs",
          ident.2.name="late_VSMCs",
          group.by="ManualClustersJG")

seurat_subset <- subset(seuratObj, ManualClustersJG %in% c("early_JGs", "early_VSMCs"))
plotRENIN(seurat=seurat_subset,
          expr_mat=expr_mat,
          peak_mat=peak_mat,
          ident.1 = "early_JGs",
          ident.1.name="early_JGs",
          ident.2 = "early_VSMCs",
          ident.2.name="early_VSMCs",
          group.by="ManualClustersJG")

for (i in 1:(length(trajectory2)-1)) {
    fg_group <- trajectory2[i]
    bg_group <- trajectory2[i+1]
    if(!file.exists(paste0("RENIN_", fg_group, "-v-", bg_group, "_PageRank.csv"))) {
        plotRENIN(seurat=seurat_subset,
              expr_mat=expr_mat,
              peak_mat=peak_mat,
              ident.1 = fg_group,
              ident.1.name=fg_group,
              ident.2 = bg_group,
              ident.2.name=bg_group,
              group.by="ManualClustersJG")
    }
}

save.image(paste0("RENIN_analysis_", Sys.Date(), ".RData"))