14a. Identify positive TF-regulators

14a-1. first identify deviant TFs

seGroupMotif <- getGroupSE(ArchRProj = proj6,
                           useMatrix = "cisbp_Motif_JGMatrix",
                           groupBy = "ManualClustersJG")

Subset to just z-scores

seZ <- seGroupMotif[rowData(seGroupMotif)$seqnames=="z",]

Identify the maximum delta in z-score between all clusters, thereby stratifying motifs based on the degree of variation observed across clusters.

rowData(seZ)$maxDelta <- lapply(seq_len(ncol(seZ)), function(x){
  rowMaxs(assay(seZ) - assay(seZ)[,x])
}) %>% Reduce("cbind", .) %>% rowMaxs

14a-2. Identify Correlated TF Motifs and TF Gene Score/Expression

# GeneScoreMatrix
corGSM_MM <- correlateMatrices(
    ArchRProj = proj6,
    useMatrix1 = "GeneScoreMatrix_no_ribosomal",
    useMatrix2 = "cisbp_Motif_JGMatrix",
    reducedDims = "LSI_Harmony"
)

# GeneIntegrationMatrix
corGIM_MM <- correlateMatrices(
    ArchRProj = proj6,
    useMatrix1 = "GeneIntegrationMatrix_LSI_Harmony",
    useMatrix2 = "cisbp_Motif_JGMatrix",
    reducedDims = "LSI_Harmony"
)

14a-3. Add Maximum Delta Deviation to the Correlation Data Frame

corGSM_MM$maxDelta <- rowData(seZ)[match(corGSM_MM$cisbp_Motif_JGMatrix_name,
                                   rowData(seZ)$name), "maxDelta"]
corGIM_MM$maxDelta <- rowData(seZ)[match(corGIM_MM$cisbp_Motif_JGMatrix_name, 
                                   rowData(seZ)$name), "maxDelta"]

14a-4. Identify Positive TF Regulators

From the inferred gene expression (ATACseq).

corGSM_MM <- corGSM_MM[order(abs(corGSM_MM$cor), decreasing = TRUE), ]
corGSM_MM <- corGSM_MM[which(!duplicated(gsub("\\-.*","",
    corGSM_MM[,"cisbp_Motif_JGMatrix_matchName"]))), ]
corGSM_MM$TFRegulator <- "NO"
corGSM_MM$TFRegulator[which(corGSM_MM$cor > 0.5 & corGSM_MM$padj < 0.01 &
    corGSM_MM$maxDelta > quantile(corGSM_MM$maxDelta, 0.75))] <- "YES"
sort(corGSM_MM[corGSM_MM$TFRegulator=="YES",1])

# identified positive TF regulators from gene scores and motif deviation
# z-scores, highlight them in a dot plot.
p <- ggplot(data.frame(corGSM_MM), aes(cor, maxDelta, color = TFRegulator)) +
  geom_point() + 
  ggrepel::geom_label_repel(
        data = data.frame(
            head(corGSM_MM[order(corGSM_MM$TFRegulator, decreasing=T),], 30)),
            aes(x = cor,
                y = maxDelta,
                label = cisbp_Motif_JGMatrix_matchName), 
        size = 1.5,
        nudge_x = 3,
        max.overlaps = 30,
        color = "black"
  ) +
  theme_ArchR() +
  geom_vline(xintercept = 0, lty = "dashed") + 
  scale_color_manual(values = c("NO"="darkgrey", "YES"="firebrick3")) +
  xlab("Correlation To Gene Score") +
  ylab("Max TF Motif Delta") +
  scale_y_continuous(
    expand = c(0,0), 
    limits = c(0, max(corGSM_MM$maxDelta)*1.05)
  )

svglite(paste0(sample_name,
           "_positive-TF-regulators_ATAC_",
           Sys.Date(), ".svg"))
print(p)
dev.off()

# same analysis for the correlations derived from GeneIntegrationMatrix_LSI_Harmony (RNAseq)
corGIM_MM <- corGIM_MM[order(abs(corGIM_MM$cor), decreasing = TRUE), ]
corGIM_MM <- corGIM_MM[which(!duplicated(gsub("\\-.*","",corGIM_MM[,"cisbp_Motif_JGMatrix_name"]))), ]
corGIM_MM$TFRegulator <- "NO"
corGIM_MM$TFRegulator[which(corGIM_MM$cor > 0.5 & corGIM_MM$padj < 0.01 & corGIM_MM$maxDelta > quantile(corGIM_MM$maxDelta, 0.75))] <- "YES"
sort(corGIM_MM[corGIM_MM$TFRegulator=="YES",1])

p <- ggplot(data.frame(corGIM_MM), aes(cor, maxDelta, color = TFRegulator)) +
  geom_point() + 
  ggrepel::geom_label_repel(
        data = data.frame(
            head(corGIM_MM[order(corGIM_MM$TFRegulator, decreasing=T),], 30)),
            aes(x = cor,
                y = maxDelta,
                label = cisbp_Motif_JGMatrix_matchName), 
        size = 1.5,
        nudge_x = 3,
        max.overlaps = 30,
        color = "black"
  ) +
  theme_ArchR() +
  geom_vline(xintercept = 0, lty = "dashed") + 
  scale_color_manual(values = c("NO"="darkgrey", "YES"="firebrick3")) +
  xlab("Correlation To Gene Expression") +
  ylab("Max TF Motif Delta") +
  scale_y_continuous(
    expand = c(0,0), 
    limits = c(0, max(corGIM_MM$maxDelta)*1.05)
  )
svglite(paste0(sample_name,
           "_positive-TF-regulators_RNA_",
           Sys.Date(), ".svg"))
print(p)
dev.off()

save.image(paste0(sample_name, "_Correlation-Matrices_", Sys.Date(), ".RData"))

Correlated gene expression (or score) with changes in TF accessibility suggest a central role of these top factors in early kidney development. So in a general sense, these are the TFs that matter.

14b. Plot genes of interest on the motif matrix

motifs_of_interest <- c("FOXD1", "MEF2A", "MEF2B", "MEF2C", "MEF2D", "MYH11",
                        "CNN1", "SMARCC1", "SNAI2", "NFIX", "NOTCH", "RBPJ",
                        "SP1", "HOXB7", "CITED1", "SIX2", "SMTN", "SMAD1",
                        "E2F1", "NFIC", "SMARCC1")                      

motifs_of_interest2 <- getFeatures(proj6,
    select = paste(motifs_of_interest, collapse="|"),
    useMatrix = "cisbp_Motif_JGMatrix")
motifs_of_interest2

# Get just z scores
motifs_of_interest2 <- grep("z:", motifs_of_interest2, value = TRUE)
motifs_of_interest2 <- motifs_of_interest2[!grepl("z:Mesp1_58", motifs_of_interest2)]
motifs_of_interest2
for (motif in motifs_of_interest2) {
    if (!file.exists(paste0(sample_name,
                            "_MotifMatrix_", motif, "_", Sys.Date(), ".svg"))) {
        p1 <- plotEmbedding(
            ArchRProj = proj6, 
            colorBy = "cisbp_Motif_JGMatrix", 
            name = motif, 
            continuousSet = "horizonExtra",
            embedding = "UMAP_LSI_Harmony",
            imputeWeights = getImputeWeights(proj6)
        )

        svg(paste0(sample_name,
                   "_MotifMatrix_", motif, "_", Sys.Date(), ".svg"))
        print(p1)
        dev.off()
    }
}

14c. Plot footprints of TFs significant to the trajectory

motifPositions <- getPositions(proj6)

motifPositions <- getPositions(ArchRProj = proj6, name = "cisbp_Motif_JG")
motifPositions
motifs <- c("Foxd1", "Notch", "Rbpj", "Sp1", "Sp3", "Cited1", "Six2", "Tagln",
            "Smtn", "Smad1", "E2f1", "Hoxb7", "Nfi", "Nfix", "Nfic", "Myh11",
            "Mef2a", "Mef2b", "Mef2c", "Mef2d", "Smarcc1", "Pknox1", "Nfkb1",
            "Nfya", "Nfib", "Bach1", "Srf", "Ebf1", "Erfl")

markerMotifs2 <- unlist(lapply(motifs,
    function(x) grep(x, names(motifPositions), value = TRUE)))
markerMotifs2 <- markerMotifs2[markerMotifs2 %ni% "Sp100_714"]
markerMotifs2 <- markerMotifs2[markerMotifs2 %ni% "Sp140_717"]
markerMotifs2 <- markerMotifs2[markerMotifs2 %ni% "Sp110_718"]
markerMotifs2

seFoot_trajectory <- getFootprints(
  ArchRProj = proj6, 
  positions = motifPositions[markerMotifs2], 
  groupBy = "ManualClustersJG",
  useGroups = trajectory,
  minCells = 15,
  threads=1
)

plotFootprints(
  seFoot = seFoot_trajectory,
  ArchRProj = proj6, 
  normMethod = "Subtract",
  plotName = paste0(sample_name, "_JG_footprints-subtract-bias_", Sys.Date()),
  addDOC = FALSE,
  smoothWindow = 5,
  width=8
)

plotFootprints(
  seFoot = seFoot_trajectory,
  ArchRProj = proj6, 
  normMethod = "Divide",
  plotName = paste0(sample_name, "_JG_footprints-divide-bias_", Sys.Date()),
  addDOC = FALSE,
  smoothWindow = 5,
  width=8
)

seTSS <- getFootprints(
  ArchRProj = proj6, 
  positions = GRangesList(TSS = getTSS(proj6)), 
  groupBy = "ManualClustersJG",
  flank = 2000,
  threads=8
)
plotFootprints(
  seFoot = seTSS,
  ArchRProj = proj6, 
  normMethod = "None",
  plotName = paste0(sample_name, "_JG_TSS-No-Normalization_", Sys.Date()),
  addDOC = FALSE,
  flank = 2000,
  flankNorm = 100,
  width=8
)

# Trajectory
seTSS_Trajectory <- getFootprints(
  ArchRProj = proj6, 
  positions = GRangesList(TSS = getTSS(proj6)), 
  groupBy = "ManualClustersJG",
  flank = 2000,
  useGroups = trajectory,
  minCells = 15,
  threads=8
)
plotFootprints(
  seFoot = seTSS_Trajectory,
  ArchRProj = proj6, 
  normMethod = "None",
  plotName = paste0(sample_name, "_JG_GSM_GIM_TSS-No-Normalization_", Sys.Date()),
  addDOC = FALSE,
  flank = 2000,
  flankNorm = 100,
  width=8
)

save.image(paste0("proj6_identify_TFs_Footprints", Sys.Date(), ".RData"))