Determine cell trajectories
- Identify Foxd1 high clusters
- Identify high Ren1 clusters
13a. Identify progenitor population and progression based on Foxd1
progenitor_markers <- c("Foxd1")
rm(GIM_progenitors_names)
for (gene in progenitor_markers) {
progenitor_gim <- gim_impute[grepl(paste0("^", gene, "$"),
rowData(gim)$name),]
progenitor_upper_quartile <- names(progenitor_gim[progenitor_gim >
quantile(gim_impute[grep(paste0("^", gene, "$"),
rowData(gim)$name),], 0.75)[[1]]])
if (exists("GIM_progenitors_names")) {
GIM_progenitors_names <- Reduce(intersect,
list(GIM_progenitors_names,
progenitor_upper_quartile))
} else {
GIM_progenitors_names <- progenitor_upper_quartile
}
}
rm(GSM_progenitors_names)
for (gene in progenitor_markers) {
progenitor_gsm <- gsm_impute[grepl(paste0("^", gene, "$"),
rowData(gsm)$name),]
progenitor_upper_quartile <- names(progenitor_gsm[progenitor_gsm >
quantile(gsm_impute[grep(paste0("^", gene, "$"),
rowData(gsm)$name),], 0.75)[[1]]])
if (exists("GSM_progenitors_names")) {
GSM_progenitors_names <- Reduce(intersect,
list(GSM_progenitors_names,
progenitor_upper_quartile))
} else {
GSM_progenitors_names <- progenitor_upper_quartile
}
}
Identify clusters by timepoint that are in the upper quartile of Foxd1 expression
cluster_name <- "ManualClustersJG"
cluster_name <- "ManualClustersJG"
progenitors <- intersect(GIM_progenitors_names, GSM_progenitors_names)
if(length(grep("^E12", progenitors))==0) {
E12_Q1_GIM_foxd1_idx <- 0
} else {
E12_Q1_GIM_foxd1_idx <- grep("^E12", progenitors)
}
if(length(grep("^E18", progenitors))==0) {
E18_Q1_GIM_foxd1_idx <- 0
} else {
E18_Q1_GIM_foxd1_idx <- grep("^E18", progenitors)
}
if(length(grep("^P5", progenitors))==0) {
P5_Q1_GIM_foxd1_idx <- 0
} else {
P5_Q1_GIM_foxd1_idx <- grep("^P5", progenitors)
}
if(length(grep("^P30", progenitors))==0) {
P30_Q1_GIM_foxd1_idx <- 0
} else {
P30_Q1_GIM_foxd1_idx <- grep("^P30", progenitors)
}
E12_Q1_GIM_foxd1_names <- progenitors[E12_Q1_GIM_foxd1_idx]
E18_Q1_GIM_foxd1_names <- progenitors[E18_Q1_GIM_foxd1_idx]
P5_Q1_GIM_foxd1_names <- progenitors[P5_Q1_GIM_foxd1_idx]
P30_Q1_GIM_foxd1_names <- progenitors[P30_Q1_GIM_foxd1_idx]
if (length(E12_Q1_GIM_foxd1_names)!=0) {
E12_progenitor_cell_index <- grep(paste(E12_Q1_GIM_foxd1_names,
collapse="|"),
getCellNames(proj5))
E12_foxd1_clusters <- table(proj5$ManualClustersJG[E12_progenitor_cell_index])
} else {E12_foxd1_clusters <- NA}
if (length(E18_Q1_GIM_foxd1_names)!=0) {
E18_progenitor_cell_index <- grep(paste(E18_Q1_GIM_foxd1_names,
collapse="|"),
getCellNames(proj5))
E18_foxd1_clusters <- table(proj5$ManualClustersJG[E18_progenitor_cell_index])
} else {E18_foxd1_clusters <- NA}
if (length(P5_Q1_GIM_foxd1_names)!=0) {
P5_progenitor_cell_index <- grep(paste(P5_Q1_GIM_foxd1_names,
collapse="|"),
getCellNames(proj5))
P5_foxd1_clusters <- table(proj5$ManualClustersJG[P5_progenitor_cell_index])
} else {P5_foxd1_clusters <- NA}
if (length(P30_Q1_GIM_foxd1_names)!=0) {
P30_progenitor_cell_index <- grep(paste(P30_Q1_GIM_foxd1_names,
collapse="|"),
getCellNames(proj5))
P30_foxd1_clusters <- table(proj5$ManualClustersJG[P30_progenitor_cell_index])
} else {P30_foxd1_clusters <- NA}
# Confirm there are values at each timepoint
for (timepoint in c("E12","E18","P5","P30")) {
if(any(is.na(get(paste0(timepoint, "_foxd1_clusters"))))) {
warning(paste0("Drop ", timepoint))
}
}
progenitor_order <- rbind(
cbind("E12", top_n(as.data.table(E12_foxd1_clusters), 3)),
cbind("E18", top_n(as.data.table(E18_foxd1_clusters), 3)),
cbind("P5", top_n(as.data.table(P5_foxd1_clusters), 3)),
cbind("P30", top_n(as.data.table(P30_foxd1_clusters), 3)))
colnames(progenitor_order) <- c("timepoint", "cluster", "no_cells")
progenitor_order[,marker:="Foxd1"]
13b. Identify endpoint (renin cell) population and progression based on Ren1
renin_markers <- c("Ren1")
rm(GIM_renin_names)
for (gene in renin_markers) {
renin_gim <- gim_impute[grepl(paste0("^", gene, "$"),
rowData(gim)$name),]
renin_upper_quartile <- names(renin_gim[renin_gim >
quantile(gim_impute[grep(paste0("^", gene, "$"),
rowData(gim)$name),], 0.75)[[1]]])
if (exists("GIM_renin_names")) {
GIM_renin_names <- Reduce(intersect,
list(GIM_renins_names,
renin_upper_quartile))
} else {
GIM_renin_names <- renin_upper_quartile
}
}
rm(GSM_renin_names)
for (gene in renin_markers) {
renin_gsm <- gsm_impute[grepl(paste0("^", gene, "$"),
rowData(gsm)$name),]
renin_upper_quartile <- names(renin_gsm[renin_gsm >
quantile(gsm_impute[grep(paste0("^", gene, "$"),
rowData(gsm)$name),], 0.75)[[1]]])
if (exists("GSM_renin_names")) {
GSM_renin_names <- Reduce(intersect,
list(GSM_renins_names,
renin_upper_quartile))
} else {
GSM_renin_names <- renin_upper_quartile
}
}
Identify clusters by timepoint that are in the upper quartile of Ren1 expression AND gene score
renin <- intersect(GIM_renin_names, GSM_renin_names)
if(length(grep("^E12", renin))==0) {
E12_Q1_GIM_ren1_idx <- 0
} else {
E12_Q1_GIM_ren1_idx <- grep("^E12", renin)
}
if(length(grep("^E18", renin))==0) {
E18_Q1_GIM_ren1_idx <- 0
} else {
E18_Q1_GIM_ren1_idx <- grep("^E18", renin)
}
if(length(grep("^P5", renin))==0) {
P5_Q1_GIM_ren1_idx <- 0
} else {
P5_Q1_GIM_ren1_idx <- grep("^P5", renin)
}
if(length(grep("^P30", renin))==0) {
P30_Q1_GIM_ren1_idx <- 0
} else {
P30_Q1_GIM_ren1_idx <- grep("^P30", renin)
}
E12_Q1_GIM_ren1_names <- renin[E12_Q1_GIM_ren1_idx]
E18_Q1_GIM_ren1_names <- renin[E18_Q1_GIM_ren1_idx]
P5_Q1_GIM_ren1_names <- renin[P5_Q1_GIM_ren1_idx]
P30_Q1_GIM_ren1_names <- renin[P30_Q1_GIM_ren1_idx]
if (length(E12_Q1_GIM_ren1_names)!=0) {
E12_renin_cell_index <- grep(paste(E12_Q1_GIM_ren1_names, collapse="|"),
getCellNames(proj5))
E12_ren1_clusters <- table(proj5$ManualClustersJG[E12_renin_cell_index])
} else {E12_ren1_clusters <- NA}
if (length(E18_Q1_GIM_ren1_names)!=0) {
E18_renin_cell_index <- grep(paste(E18_Q1_GIM_ren1_names, collapse="|"),
getCellNames(proj5))
E18_ren1_clusters <- table(proj5$ManualClustersJG[E18_renin_cell_index])
} else {E18_ren1_clusters <- NA}
if (length(P5_Q1_GIM_ren1_names)!=0) {
P5_renin_cell_index <- grep(paste(P5_Q1_GIM_ren1_names, collapse="|"),
getCellNames(proj5))
P5_ren1_clusters <- table(proj5$ManualClustersJG[P5_renin_cell_index])
} else {P5_ren1_clusters <- NA}
if (length(P30_Q1_GIM_ren1_names)!=0) {
P30_renin_cell_index <- grep(paste(P30_Q1_GIM_ren1_names, collapse="|"),
getCellNames(proj5))
P30_ren1_clusters <- table(proj5$ManualClustersJG[P30_renin_cell_index])
} else {P30_ren1_clusters <- NA}
for (timepoint in c("E12","E18","P5","P30")) {
if(any(is.na(get(paste0(timepoint, "_ren1_clusters"))))) {
warning(paste0("Drop ", timepoint))
}
}
renin_order <- rbind(
cbind("E18", top_n(as.data.table(E18_ren1_clusters), 3)),
cbind("P5", top_n(as.data.table(P5_ren1_clusters), 3)),
cbind("P30", top_n(as.data.table(P30_ren1_clusters), 3)))
colnames(renin_order) <- c("timepoint", "cluster", "no_cells")
renin_order[,marker:="Ren1"]
13c. Combine orders of Foxd1 and Ren1 clusters
Look at the order of cells with high expression and gene score of either Foxd1 or Ren1 ordered by the timepoint. Sort based on knowledge of when Foxd1 and Ren1 are typically expressed and of terminally differentiated cell populations that arise from early Foxd1 progenitors.
trajectory_order <- rbind(progenitor_order, renin_order)
trajectory_order$timepoint <- ordered(trajectory_order$timepoint,
levels = c("E12", "E18", "P5", "P30"))
trajectory_order$marker <- ordered(trajectory_order$marker,
levels = c("Foxd1", "Ren1"))
trajectory_order <- trajectory_order[order(trajectory_order$timepoint,
trajectory_order$marker),]
trajectory_order
fwrite(trajectory_order, 'trajectory_order.csv', sep=',')
trajectory <- c("proliferating_stroma_and_PODs", #E12
"proliferating_cortical_stroma_and_EPIs", #E18
"proliferating_MED_stroma_and_FBs", #E18
"proliferating_mesenchyme_and_PODs", #E18
"MCs_and_PCs", #E18
"PCs", #E18-P5
"early_JGs", #E18-P5
"early_VSMCs", #P5
"late_JGs", #P30
"late_VSMCs", #P30
"MCs") #P30
proj5 <- addTrajectory(
ArchRProj = proj5,
name = "JG",
groupBy = "ManualClustersJG,
trajectory = trajectory,
embedding = "UMAP_LSI_Harmony", force=T
)
13d. Plot the trajectory
p <- plotTrajectory(proj5, embedding = "UMAP_LSI_Harmony",
trajectory = "JG", colorBy = "cellColData", name = "JG")
plotPDF(p,
name = paste0(sample_name, "_JG_trajectory_UMAP_", Sys.Date(), ".pdf"),
ArchRProj = proj5, addDOC = FALSE, width = 24, height = 10)
svglite(paste0(sample_name, "_",
cluster_name, "_trajectory_UMAP_", Sys.Date(), ".svg"))
print(p[[1]])
dev.off()
13e. Plot marker genes along the trajectory
gene_list <- c(jg_markers, smc_markers)
for (gene in gene_list) {
if (!file.exists(paste0(sample_name, "_proj5_JG-Trajectory-DotPlot_",
gene, "_", Sys.Date(), ".svg"))) {
p_JG1 <- plotTrajectory(proj5, trajectory = "JG",
embedding = "UMAP_LSI_Harmony",
colorBy = "GeneScoreMatrix_no_ribosomal",
name = gene, continuousSet = "horizonExtra")
p2_JG1 <- plotTrajectory(proj5,
embedding = "UMAP_LSI_Harmony",
trajectory = "JG",
colorBy = "GeneIntegrationMatrix_LSI_Harmony",
name = gene, continuousSet = "blueYellow")
svglite(paste0(sample_name, "_proj5_JG-Trajectory_",
gene, "_", Sys.Date(), ".svg"))
ggAlignPlots(p_JG1[[1]], p2_JG1[[1]], type = "h")
dev.off()
svglite(paste0(sample_name, "_proj5_JG-Trajectory-DotPlot_",
gene, "_", Sys.Date(), ".svg"))
ggAlignPlots(p_JG1[[2]], p2_JG1[[2]], type = "h")
dev.off()
}
}
save.image(paste0(sample_name,
"_ManualClustersJG_Trajectory_",
Sys.Date(),
".RData"))
13f. Calculate updated peak set based on manual cluster annotations
proj6 <- proj5
addArchRThreads(threads = 4)
proj6 <- addGroupCoverages(ArchRProj = proj6,
groupBy = "ManualClustersJG",
minReplicates = 10,
maxReplicates = 25,
minCells = 25,
maxFragments = 100 * 10^6,
threads = 1,
force = TRUE)
save.image(paste0(sample_name,
"_ManualClustersJG_AddGroupCoverages_",
Sys.Date(),
".RData"))
# Adding peaks again
proj6 <- addReproduciblePeakSet(
ArchRProj = proj6,
minCells = 25,
groupBy = "ManualClustersJG",
threads=1,
force=T
)
proj6 <- addPeakMatrix(proj6)
proj6 <- addImputeWeights(proj6, reducedDims = "LSI_Harmony")
save.image(paste0(sample_name,
"_ManualClustersJG_PeakUpdate_",
Sys.Date(),
".RData"))
# Add motif annotations
proj6 <- addMotifAnnotations(ArchRProj = proj6,
motifSet = "cisbp",
name = "Motif")
proj6 <- addDeviationsMatrix(
ArchRProj = proj6,
peakAnnotation = "cisbp_Motif_JG",
force = TRUE
)
proj6 <- addMotifAnnotations(ArchRProj = proj6,
motifSet = "JASPAR2020",
name = "JASPAR2020_Motif_JG")
proj6 <- addDeviationsMatrix(
ArchRProj = proj6,
peakAnnotation = "JASPAR2020_Motif_JG",
force = TRUE
)
save.image(paste0(sample_name,
"_ManualClustersJG_AddMotif_",
Sys.Date(),
".RData"))
13g. Plot pseudotime heatmaps
- We can plot the MotifMatrix on the pseudo-time trajectory
- We can perform the same steps to obtain a pseudo-time heatmap of gene scores by setting useMatrix = "GeneScoreMatrix".
- Similarly, we can obtain a pseudo-time heatmap of gene expression by setting useMatrix = "GeneIntegrationMatrix_LSI_Harmony".
- Lastly, we can obtain a pseudo-time heatmap of peak accessibility by setting useMatrix = "PeakMatrix".
Integrative pseudo-time analyses
trajMM <- getTrajectory(ArchRProj = proj6,
name = "JG",
useMatrix = "cisbp_Motif_JGMatrix",
scaleTo=NULL,
log2Norm = FALSE,
threads = 1)
p1 <- plotTrajectoryHeatmap(trajMM,
labelRows=TRUE,
scaleRows=TRUE,
labelTop=60,
pal = paletteContinuous(set = "solarExtra"))
trajGSM <- getTrajectory(ArchRProj = proj6,
name = "JG",
useMatrix = "GeneScoreMatrix_no_ribosomal",
log2Norm = TRUE,
threads = 1)
p2 <- plotTrajectoryHeatmap(trajGSM,
labelRows=TRUE,
labelTop=60,
pal = paletteContinuous(set = "horizonExtra"))
trajGIM <- getTrajectory(ArchRProj = proj6,
name = "JG",
useMatrix = "GeneIntegrationMatrix_LSI_Harmony",
log2Norm = FALSE,
threads = 1)
p3 <- plotTrajectoryHeatmap(trajGIM,
labelTop=60,
pal = paletteContinuous(set = "blueYellow"))
trajPM <- getTrajectory(ArchRProj = proj6,
name = "JG",
useMatrix = "PeakMatrix",
log2Norm = TRUE,
threads = 1)
p4 <- plotTrajectoryHeatmap(trajPM,
labelTop=60,
pal = paletteContinuous(set = "solarExtra"))
# Plot all together:
plotPDF(p1, p2, p3, p4, name = paste0(sample_name,
"_JG",
"_Trajectory_Heatmaps_",
Sys.Date(), ".pdf"),
ArchRProj = proj6, addDOC = FALSE, width = 6, height = 8)
# GeneScoreMatrix with MotifMatrix
corGSM_MM <- correlateTrajectories(trajGSM, trajMM,
log2Norm1 = TRUE,
log2Norm2 = FALSE)
trajGSM_2 <- trajGSM[corGSM_MM[[1]]$name1, ]
trajMM_2 <- trajMM[corGSM_MM[[1]]$name2, ]
trajCombined <- trajGSM_2
assay(trajCombined, withDimnames=FALSE) <- (
t(apply(assay(trajGSM_2), 1, scale)) +
t(apply(assay(trajMM_2), 1, scale)))
combinedMat <- plotTrajectoryHeatmap(trajCombined,
returnMat = TRUE, varCutOff = 0)
rowOrder <- match(rownames(combinedMat), rownames(trajGSM_2))
ht1 <- plotTrajectoryHeatmap(trajGSM_2,
pal = paletteContinuous(set = "horizonExtra"),
varCutOff = 0,
rowOrder = rowOrder,
labelTop=60)
ht2 <- plotTrajectoryHeatmap(trajMM_2,
pal = paletteContinuous(set = "solarExtra"),
varCutOff = 0,
rowOrder = rowOrder,
labelTop=60)
svglite(paste0(sample_name, "_JG", "_",
"GeneScoreMatrix-v-MotifMatrix_IntegrativePseudotimeHeatmaps_",
Sys.Date(), ".svg"))
print(ht1 + ht2)
dev.off()
## GeneIntegrationMatrix_LSI_Harmony with MotifMatrix
corGIM_MM <- correlateTrajectories(trajGIM, trajMM,
log2Norm1 = FALSE,
log2Norm2 = FALSE)
trajGIM_2 <- trajGIM[corGIM_MM[[1]]$name1, ]
trajMM_2 <- trajMM[corGIM_MM[[1]]$name2, ]
trajCombined <- trajGIM_2
assay(trajCombined, withDimnames = FALSE) <- (
t(apply(assay(trajGIM_2), 1, scale)) +
t(apply(assay(trajMM_2), 1, scale)))
combinedMat <- plotTrajectoryHeatmap(trajCombined,
returnMat = TRUE, varCutOff = 0)
rowOrder <- match(rownames(combinedMat), rownames(trajGIM_2))
ht1 <- plotTrajectoryHeatmap(trajGIM_2,
pal = paletteContinuous(set = "blueYellow"),
varCutOff = 0,
rowOrder = rowOrder)
ht2 <- plotTrajectoryHeatmap(trajMM_2,
pal = paletteContinuous(set = "solarExtra"),
varCutOff = 0,
rowOrder = rowOrder)
svglite(paste0(sample_name, "_JG", "_",
"GeneIntegrationMatrix-v-MotifMatrix_IntegrativePseudotimeHeatmaps_",
Sys.Date(), ".svg"))
print(ht1 + ht2)
dev.off()
Save progress...
save.image(paste0(sample_name, "_TrajectoryCorrelation_", Sys.Date(), ".RData"))