14a. Identify positive TF-regulators
14a-1. first identify deviant TFs
seGroupMotif <- getGroupSE(ArchRProj = proj6,
useMatrix = "cisbp_Motif_JGMatrix",
groupBy = "ManualClustersJG")
Subset to just z-scores
seZ <- seGroupMotif[rowData(seGroupMotif)$seqnames=="z",]
Identify the maximum delta in z-score between all clusters, thereby stratifying motifs based on the degree of variation observed across clusters.
rowData(seZ)$maxDelta <- lapply(seq_len(ncol(seZ)), function(x){
rowMaxs(assay(seZ) - assay(seZ)[,x])
}) %>% Reduce("cbind", .) %>% rowMaxs
14a-2. Identify Correlated TF Motifs and TF Gene Score/Expression
# GeneScoreMatrix
corGSM_MM <- correlateMatrices(
ArchRProj = proj6,
useMatrix1 = "GeneScoreMatrix_no_ribosomal",
useMatrix2 = "cisbp_Motif_JGMatrix",
reducedDims = "LSI_Harmony"
)
# GeneIntegrationMatrix
corGIM_MM <- correlateMatrices(
ArchRProj = proj6,
useMatrix1 = "GeneIntegrationMatrix_LSI_Harmony",
useMatrix2 = "cisbp_Motif_JGMatrix",
reducedDims = "LSI_Harmony"
)
14a-3. Add Maximum Delta Deviation to the Correlation Data Frame
corGSM_MM$maxDelta <- rowData(seZ)[match(corGSM_MM$cisbp_Motif_JGMatrix_name,
rowData(seZ)$name), "maxDelta"]
corGIM_MM$maxDelta <- rowData(seZ)[match(corGIM_MM$cisbp_Motif_JGMatrix_name,
rowData(seZ)$name), "maxDelta"]
14a-4. Identify Positive TF Regulators
From the inferred gene expression (ATACseq).
corGSM_MM <- corGSM_MM[order(abs(corGSM_MM$cor), decreasing = TRUE), ]
corGSM_MM <- corGSM_MM[which(!duplicated(gsub("\\-.*","",
corGSM_MM[,"cisbp_Motif_JGMatrix_matchName"]))), ]
corGSM_MM$TFRegulator <- "NO"
corGSM_MM$TFRegulator[which(corGSM_MM$cor > 0.5 & corGSM_MM$padj < 0.01 &
corGSM_MM$maxDelta > quantile(corGSM_MM$maxDelta, 0.75))] <- "YES"
sort(corGSM_MM[corGSM_MM$TFRegulator=="YES",1])
# identified positive TF regulators from gene scores and motif deviation
# z-scores, highlight them in a dot plot.
p <- ggplot(data.frame(corGSM_MM), aes(cor, maxDelta, color = TFRegulator)) +
geom_point() +
ggrepel::geom_label_repel(
data = data.frame(
head(corGSM_MM[order(corGSM_MM$TFRegulator, decreasing=T),], 30)),
aes(x = cor,
y = maxDelta,
label = cisbp_Motif_JGMatrix_matchName),
size = 1.5,
nudge_x = 3,
max.overlaps = 30,
color = "black"
) +
theme_ArchR() +
geom_vline(xintercept = 0, lty = "dashed") +
scale_color_manual(values = c("NO"="darkgrey", "YES"="firebrick3")) +
xlab("Correlation To Gene Score") +
ylab("Max TF Motif Delta") +
scale_y_continuous(
expand = c(0,0),
limits = c(0, max(corGSM_MM$maxDelta)*1.05)
)
svglite(paste0(sample_name,
"_positive-TF-regulators_ATAC_",
Sys.Date(), ".svg"))
print(p)
dev.off()
# same analysis for the correlations derived from GeneIntegrationMatrix_LSI_Harmony (RNAseq)
corGIM_MM <- corGIM_MM[order(abs(corGIM_MM$cor), decreasing = TRUE), ]
corGIM_MM <- corGIM_MM[which(!duplicated(gsub("\\-.*","",corGIM_MM[,"cisbp_Motif_JGMatrix_name"]))), ]
corGIM_MM$TFRegulator <- "NO"
corGIM_MM$TFRegulator[which(corGIM_MM$cor > 0.5 & corGIM_MM$padj < 0.01 & corGIM_MM$maxDelta > quantile(corGIM_MM$maxDelta, 0.75))] <- "YES"
sort(corGIM_MM[corGIM_MM$TFRegulator=="YES",1])
p <- ggplot(data.frame(corGIM_MM), aes(cor, maxDelta, color = TFRegulator)) +
geom_point() +
ggrepel::geom_label_repel(
data = data.frame(
head(corGIM_MM[order(corGIM_MM$TFRegulator, decreasing=T),], 30)),
aes(x = cor,
y = maxDelta,
label = cisbp_Motif_JGMatrix_matchName),
size = 1.5,
nudge_x = 3,
max.overlaps = 30,
color = "black"
) +
theme_ArchR() +
geom_vline(xintercept = 0, lty = "dashed") +
scale_color_manual(values = c("NO"="darkgrey", "YES"="firebrick3")) +
xlab("Correlation To Gene Expression") +
ylab("Max TF Motif Delta") +
scale_y_continuous(
expand = c(0,0),
limits = c(0, max(corGIM_MM$maxDelta)*1.05)
)
svglite(paste0(sample_name,
"_positive-TF-regulators_RNA_",
Sys.Date(), ".svg"))
print(p)
dev.off()
save.image(paste0(sample_name, "_Correlation-Matrices_", Sys.Date(), ".RData"))
Correlated gene expression (or score) with changes in TF accessibility suggest a central role of these top factors in early kidney development. So in a general sense, these are the TFs that matter.
14b. Plot genes of interest on the motif matrix
motifs_of_interest <- c("FOXD1", "MEF2A", "MEF2B", "MEF2C", "MEF2D", "MYH11",
"CNN1", "SMARCC1", "SNAI2", "NFIX", "NOTCH", "RBPJ",
"SP1", "HOXB7", "CITED1", "SIX2", "SMTN", "SMAD1",
"E2F1", "NFIC", "SMARCC1")
motifs_of_interest2 <- getFeatures(proj6,
select = paste(motifs_of_interest, collapse="|"),
useMatrix = "cisbp_Motif_JGMatrix")
motifs_of_interest2
# Get just z scores
motifs_of_interest2 <- grep("z:", motifs_of_interest2, value = TRUE)
motifs_of_interest2 <- motifs_of_interest2[!grepl("z:Mesp1_58", motifs_of_interest2)]
motifs_of_interest2
for (motif in motifs_of_interest2) {
if (!file.exists(paste0(sample_name,
"_MotifMatrix_", motif, "_", Sys.Date(), ".svg"))) {
p1 <- plotEmbedding(
ArchRProj = proj6,
colorBy = "cisbp_Motif_JGMatrix",
name = motif,
continuousSet = "horizonExtra",
embedding = "UMAP_LSI_Harmony",
imputeWeights = getImputeWeights(proj6)
)
svg(paste0(sample_name,
"_MotifMatrix_", motif, "_", Sys.Date(), ".svg"))
print(p1)
dev.off()
}
}
14c. Plot footprints of TFs significant to the trajectory
motifPositions <- getPositions(proj6)
motifPositions <- getPositions(ArchRProj = proj6, name = "cisbp_Motif_JG")
motifPositions
motifs <- c("Foxd1", "Notch", "Rbpj", "Sp1", "Sp3", "Cited1", "Six2", "Tagln",
"Smtn", "Smad1", "E2f1", "Hoxb7", "Nfi", "Nfix", "Nfic", "Myh11",
"Mef2a", "Mef2b", "Mef2c", "Mef2d", "Smarcc1", "Pknox1", "Nfkb1",
"Nfya", "Nfib", "Bach1", "Srf", "Ebf1", "Erfl")
markerMotifs2 <- unlist(lapply(motifs,
function(x) grep(x, names(motifPositions), value = TRUE)))
markerMotifs2 <- markerMotifs2[markerMotifs2 %ni% "Sp100_714"]
markerMotifs2 <- markerMotifs2[markerMotifs2 %ni% "Sp140_717"]
markerMotifs2 <- markerMotifs2[markerMotifs2 %ni% "Sp110_718"]
markerMotifs2
seFoot_trajectory <- getFootprints(
ArchRProj = proj6,
positions = motifPositions[markerMotifs2],
groupBy = "ManualClustersJG",
useGroups = trajectory,
minCells = 15,
threads=1
)
plotFootprints(
seFoot = seFoot_trajectory,
ArchRProj = proj6,
normMethod = "Subtract",
plotName = paste0(sample_name, "_JG_footprints-subtract-bias_", Sys.Date()),
addDOC = FALSE,
smoothWindow = 5,
width=8
)
plotFootprints(
seFoot = seFoot_trajectory,
ArchRProj = proj6,
normMethod = "Divide",
plotName = paste0(sample_name, "_JG_footprints-divide-bias_", Sys.Date()),
addDOC = FALSE,
smoothWindow = 5,
width=8
)
seTSS <- getFootprints(
ArchRProj = proj6,
positions = GRangesList(TSS = getTSS(proj6)),
groupBy = "ManualClustersJG",
flank = 2000,
threads=8
)
plotFootprints(
seFoot = seTSS,
ArchRProj = proj6,
normMethod = "None",
plotName = paste0(sample_name, "_JG_TSS-No-Normalization_", Sys.Date()),
addDOC = FALSE,
flank = 2000,
flankNorm = 100,
width=8
)
# Trajectory
seTSS_Trajectory <- getFootprints(
ArchRProj = proj6,
positions = GRangesList(TSS = getTSS(proj6)),
groupBy = "ManualClustersJG",
flank = 2000,
useGroups = trajectory,
minCells = 15,
threads=8
)
plotFootprints(
seFoot = seTSS_Trajectory,
ArchRProj = proj6,
normMethod = "None",
plotName = paste0(sample_name, "_JG_GSM_GIM_TSS-No-Normalization_", Sys.Date()),
addDOC = FALSE,
flank = 2000,
flankNorm = 100,
width=8
)
save.image(paste0("proj6_identify_TFs_Footprints", Sys.Date(), ".RData"))