Determine cell trajectories

  1. Identify Foxd1 high clusters
  2. Identify high Ren1 clusters

13a. Identify progenitor population and progression based on Foxd1

progenitor_markers <- c("Foxd1")

rm(GIM_progenitors_names)
for (gene in progenitor_markers) {
    progenitor_gim <- gim_impute[grepl(paste0("^", gene, "$"),
                                 rowData(gim)$name),]
    progenitor_upper_quartile <- names(progenitor_gim[progenitor_gim >
        quantile(gim_impute[grep(paste0("^", gene, "$"),
        rowData(gim)$name),], 0.75)[[1]]])
    if (exists("GIM_progenitors_names")) {
        GIM_progenitors_names <- Reduce(intersect,
                                        list(GIM_progenitors_names,
                                        progenitor_upper_quartile))
    } else {
        GIM_progenitors_names <- progenitor_upper_quartile
    }
}

rm(GSM_progenitors_names)
for (gene in progenitor_markers) {
    progenitor_gsm <- gsm_impute[grepl(paste0("^", gene, "$"),
                                 rowData(gsm)$name),]
    progenitor_upper_quartile <- names(progenitor_gsm[progenitor_gsm >
        quantile(gsm_impute[grep(paste0("^", gene, "$"),
        rowData(gsm)$name),], 0.75)[[1]]])
    if (exists("GSM_progenitors_names")) {
        GSM_progenitors_names <- Reduce(intersect,
                                        list(GSM_progenitors_names,
                                        progenitor_upper_quartile))
    } else {
        GSM_progenitors_names <- progenitor_upper_quartile
    }
}

Identify clusters by timepoint that are in the upper quartile of Foxd1 expression

cluster_name <- "ManualClustersJG"
cluster_name <- "ManualClustersJG"

progenitors <- intersect(GIM_progenitors_names, GSM_progenitors_names)

if(length(grep("^E12", progenitors))==0) {
    E12_Q1_GIM_foxd1_idx <- 0
} else {
    E12_Q1_GIM_foxd1_idx <- grep("^E12", progenitors)
}
if(length(grep("^E18", progenitors))==0) {
    E18_Q1_GIM_foxd1_idx <- 0
} else {
    E18_Q1_GIM_foxd1_idx <- grep("^E18", progenitors)
}
if(length(grep("^P5", progenitors))==0) {
    P5_Q1_GIM_foxd1_idx <- 0
} else {
    P5_Q1_GIM_foxd1_idx <- grep("^P5", progenitors)
}
if(length(grep("^P30", progenitors))==0) {
    P30_Q1_GIM_foxd1_idx <- 0
} else {
    P30_Q1_GIM_foxd1_idx <- grep("^P30", progenitors)
}

E12_Q1_GIM_foxd1_names <- progenitors[E12_Q1_GIM_foxd1_idx]
E18_Q1_GIM_foxd1_names <- progenitors[E18_Q1_GIM_foxd1_idx]
P5_Q1_GIM_foxd1_names <- progenitors[P5_Q1_GIM_foxd1_idx]
P30_Q1_GIM_foxd1_names <- progenitors[P30_Q1_GIM_foxd1_idx]

if (length(E12_Q1_GIM_foxd1_names)!=0) {
    E12_progenitor_cell_index <- grep(paste(E12_Q1_GIM_foxd1_names,
                                            collapse="|"),
                                 getCellNames(proj5))
    E12_foxd1_clusters <- table(proj5$ManualClustersJG[E12_progenitor_cell_index])
} else {E12_foxd1_clusters <- NA}
if (length(E18_Q1_GIM_foxd1_names)!=0) {
    E18_progenitor_cell_index <- grep(paste(E18_Q1_GIM_foxd1_names,
                                            collapse="|"),
                                 getCellNames(proj5))
    E18_foxd1_clusters <- table(proj5$ManualClustersJG[E18_progenitor_cell_index])
} else {E18_foxd1_clusters <- NA}
if (length(P5_Q1_GIM_foxd1_names)!=0) {
    P5_progenitor_cell_index <- grep(paste(P5_Q1_GIM_foxd1_names,
                                            collapse="|"),
                                 getCellNames(proj5))
    P5_foxd1_clusters <- table(proj5$ManualClustersJG[P5_progenitor_cell_index])
} else {P5_foxd1_clusters <- NA}
if (length(P30_Q1_GIM_foxd1_names)!=0) {
    P30_progenitor_cell_index <- grep(paste(P30_Q1_GIM_foxd1_names,
                                            collapse="|"),
                                 getCellNames(proj5))
    P30_foxd1_clusters <- table(proj5$ManualClustersJG[P30_progenitor_cell_index])
} else {P30_foxd1_clusters <- NA}

# Confirm there are values at each timepoint
for (timepoint in c("E12","E18","P5","P30")) {
    if(any(is.na(get(paste0(timepoint, "_foxd1_clusters"))))) {
        warning(paste0("Drop ", timepoint))
    }
}

progenitor_order <- rbind(
    cbind("E12", top_n(as.data.table(E12_foxd1_clusters), 3)),
    cbind("E18", top_n(as.data.table(E18_foxd1_clusters), 3)),
    cbind("P5", top_n(as.data.table(P5_foxd1_clusters), 3)),
    cbind("P30", top_n(as.data.table(P30_foxd1_clusters), 3)))
colnames(progenitor_order) <- c("timepoint", "cluster", "no_cells")
progenitor_order[,marker:="Foxd1"]

13b. Identify endpoint (renin cell) population and progression based on Ren1

renin_markers <- c("Ren1")

rm(GIM_renin_names)
for (gene in renin_markers) {
    renin_gim <- gim_impute[grepl(paste0("^", gene, "$"),
                                 rowData(gim)$name),]
    renin_upper_quartile <- names(renin_gim[renin_gim >
        quantile(gim_impute[grep(paste0("^", gene, "$"),
        rowData(gim)$name),], 0.75)[[1]]])
    if (exists("GIM_renin_names")) {
        GIM_renin_names <- Reduce(intersect,
                                        list(GIM_renins_names,
                                        renin_upper_quartile))
    } else {
        GIM_renin_names <- renin_upper_quartile
    }
}

rm(GSM_renin_names)
for (gene in renin_markers) {
    renin_gsm <- gsm_impute[grepl(paste0("^", gene, "$"),
                                 rowData(gsm)$name),]
    renin_upper_quartile <- names(renin_gsm[renin_gsm >
        quantile(gsm_impute[grep(paste0("^", gene, "$"),
        rowData(gsm)$name),], 0.75)[[1]]])
    if (exists("GSM_renin_names")) {
        GSM_renin_names <- Reduce(intersect,
                                        list(GSM_renins_names,
                                        renin_upper_quartile))
    } else {
        GSM_renin_names <- renin_upper_quartile
    }
}

Identify clusters by timepoint that are in the upper quartile of Ren1 expression AND gene score

renin <- intersect(GIM_renin_names, GSM_renin_names)
if(length(grep("^E12", renin))==0) {
    E12_Q1_GIM_ren1_idx <- 0
} else {
    E12_Q1_GIM_ren1_idx <- grep("^E12", renin)
}
if(length(grep("^E18", renin))==0) {
    E18_Q1_GIM_ren1_idx <- 0
} else {
    E18_Q1_GIM_ren1_idx <- grep("^E18", renin)
}
if(length(grep("^P5", renin))==0) {
    P5_Q1_GIM_ren1_idx <- 0
} else {
    P5_Q1_GIM_ren1_idx <- grep("^P5", renin)
}
if(length(grep("^P30", renin))==0) {
    P30_Q1_GIM_ren1_idx <- 0
} else {
    P30_Q1_GIM_ren1_idx <- grep("^P30", renin)
}

E12_Q1_GIM_ren1_names <- renin[E12_Q1_GIM_ren1_idx]
E18_Q1_GIM_ren1_names <- renin[E18_Q1_GIM_ren1_idx]
P5_Q1_GIM_ren1_names  <- renin[P5_Q1_GIM_ren1_idx]
P30_Q1_GIM_ren1_names <- renin[P30_Q1_GIM_ren1_idx]

if (length(E12_Q1_GIM_ren1_names)!=0) {
    E12_renin_cell_index <- grep(paste(E12_Q1_GIM_ren1_names, collapse="|"),
                                 getCellNames(proj5))
    E12_ren1_clusters <- table(proj5$ManualClustersJG[E12_renin_cell_index])
} else {E12_ren1_clusters <- NA}

if (length(E18_Q1_GIM_ren1_names)!=0) {
    E18_renin_cell_index <- grep(paste(E18_Q1_GIM_ren1_names, collapse="|"),
                                 getCellNames(proj5))
    E18_ren1_clusters <- table(proj5$ManualClustersJG[E18_renin_cell_index])
} else {E18_ren1_clusters <- NA}

if (length(P5_Q1_GIM_ren1_names)!=0) {
    P5_renin_cell_index <- grep(paste(P5_Q1_GIM_ren1_names, collapse="|"),
                                getCellNames(proj5))
    P5_ren1_clusters <- table(proj5$ManualClustersJG[P5_renin_cell_index])
} else {P5_ren1_clusters <- NA}

if (length(P30_Q1_GIM_ren1_names)!=0) {
    P30_renin_cell_index <- grep(paste(P30_Q1_GIM_ren1_names, collapse="|"),
                                 getCellNames(proj5))
    P30_ren1_clusters <- table(proj5$ManualClustersJG[P30_renin_cell_index])
} else {P30_ren1_clusters <- NA}

for (timepoint in c("E12","E18","P5","P30")) {
    if(any(is.na(get(paste0(timepoint, "_ren1_clusters"))))) {
        warning(paste0("Drop ", timepoint))
    }
}

renin_order <- rbind(
    cbind("E18", top_n(as.data.table(E18_ren1_clusters), 3)),
    cbind("P5", top_n(as.data.table(P5_ren1_clusters), 3)),
    cbind("P30", top_n(as.data.table(P30_ren1_clusters), 3)))
colnames(renin_order) <- c("timepoint", "cluster", "no_cells")
renin_order[,marker:="Ren1"]

13c. Combine orders of Foxd1 and Ren1 clusters

Look at the order of cells with high expression and gene score of either Foxd1 or Ren1 ordered by the timepoint. Sort based on knowledge of when Foxd1 and Ren1 are typically expressed and of terminally differentiated cell populations that arise from early Foxd1 progenitors.

trajectory_order <- rbind(progenitor_order, renin_order)
trajectory_order$timepoint <- ordered(trajectory_order$timepoint,
                                      levels = c("E12", "E18", "P5", "P30"))
trajectory_order$marker <- ordered(trajectory_order$marker,
                                      levels = c("Foxd1", "Ren1"))
trajectory_order <- trajectory_order[order(trajectory_order$timepoint,
                                           trajectory_order$marker),]
trajectory_order
fwrite(trajectory_order, 'trajectory_order.csv', sep=',')

trajectory <- c("proliferating_stroma_and_PODs",          #E12
                "proliferating_cortical_stroma_and_EPIs", #E18
                "proliferating_MED_stroma_and_FBs",       #E18
                "proliferating_mesenchyme_and_PODs",      #E18
                "MCs_and_PCs",                            #E18
                "PCs",                                    #E18-P5
                "early_JGs",                              #E18-P5
                "early_VSMCs",                            #P5
                "late_JGs",                               #P30
                "late_VSMCs",                             #P30
                "MCs")                                    #P30
proj5 <- addTrajectory(
    ArchRProj = proj5, 
    name = "JG", 
    groupBy = "ManualClustersJG,
    trajectory = trajectory, 
    embedding = "UMAP_LSI_Harmony", force=T
)

13d. Plot the trajectory

p <- plotTrajectory(proj5, embedding = "UMAP_LSI_Harmony", 
                    trajectory = "JG", colorBy = "cellColData", name = "JG")
plotPDF(p,
        name = paste0(sample_name, "_JG_trajectory_UMAP_", Sys.Date(), ".pdf"),
        ArchRProj = proj5, addDOC = FALSE, width = 24, height = 10)
svglite(paste0(sample_name, "_",
               cluster_name, "_trajectory_UMAP_", Sys.Date(), ".svg"))
print(p[[1]])
dev.off()

13e. Plot marker genes along the trajectory

gene_list <- c(jg_markers, smc_markers)

for (gene in gene_list) {
    if (!file.exists(paste0(sample_name, "_proj5_JG-Trajectory-DotPlot_",
                            gene, "_", Sys.Date(), ".svg"))) {
        p_JG1 <- plotTrajectory(proj5, trajectory = "JG",
                                embedding = "UMAP_LSI_Harmony",
                                colorBy = "GeneScoreMatrix_no_ribosomal",
                                name = gene, continuousSet = "horizonExtra")
        p2_JG1 <- plotTrajectory(proj5,
                                 embedding = "UMAP_LSI_Harmony",
                                 trajectory = "JG",
                                 colorBy = "GeneIntegrationMatrix_LSI_Harmony",
                                 name = gene, continuousSet = "blueYellow")
        svglite(paste0(sample_name, "_proj5_JG-Trajectory_",
                       gene, "_", Sys.Date(), ".svg"))
        ggAlignPlots(p_JG1[[1]], p2_JG1[[1]], type = "h")
        dev.off()

        svglite(paste0(sample_name, "_proj5_JG-Trajectory-DotPlot_",
                   gene, "_", Sys.Date(), ".svg"))
        ggAlignPlots(p_JG1[[2]], p2_JG1[[2]], type = "h")
        dev.off()
    } 
}

save.image(paste0(sample_name,
                  "_ManualClustersJG_Trajectory_",
                  Sys.Date(),
                  ".RData"))

13f. Calculate updated peak set based on manual cluster annotations

proj6 <- proj5

addArchRThreads(threads = 4)
proj6 <- addGroupCoverages(ArchRProj = proj6,
                            groupBy = "ManualClustersJG",
                            minReplicates = 10,
                            maxReplicates = 25,
                            minCells = 25,                            
                            maxFragments = 100 * 10^6,
                            threads = 1,
                            force = TRUE)

save.image(paste0(sample_name,
                  "_ManualClustersJG_AddGroupCoverages_",
                  Sys.Date(),
                  ".RData"))

# Adding peaks again
proj6 <- addReproduciblePeakSet(
    ArchRProj = proj6, 
    minCells = 25, 
    groupBy = "ManualClustersJG",
    threads=1,
    force=T
)
proj6 <- addPeakMatrix(proj6)
proj6 <- addImputeWeights(proj6, reducedDims = "LSI_Harmony")

save.image(paste0(sample_name,
                  "_ManualClustersJG_PeakUpdate_",
                  Sys.Date(),
                  ".RData"))

# Add motif annotations
proj6 <- addMotifAnnotations(ArchRProj = proj6,
                             motifSet = "cisbp",
                             name = "Motif")
proj6 <- addDeviationsMatrix(
  ArchRProj = proj6, 
  peakAnnotation = "cisbp_Motif_JG",
  force = TRUE
)
proj6 <- addMotifAnnotations(ArchRProj = proj6,
                              motifSet = "JASPAR2020",
                              name = "JASPAR2020_Motif_JG") 
proj6 <- addDeviationsMatrix(
  ArchRProj = proj6, 
  peakAnnotation = "JASPAR2020_Motif_JG",
  force = TRUE
) 

save.image(paste0(sample_name,
                  "_ManualClustersJG_AddMotif_",
                  Sys.Date(),
                  ".RData"))

13g. Plot pseudotime heatmaps

  1. We can plot the MotifMatrix on the pseudo-time trajectory
  2. We can perform the same steps to obtain a pseudo-time heatmap of gene scores by setting useMatrix = "GeneScoreMatrix".
  3. Similarly, we can obtain a pseudo-time heatmap of gene expression by setting useMatrix = "GeneIntegrationMatrix_LSI_Harmony".
  4. Lastly, we can obtain a pseudo-time heatmap of peak accessibility by setting useMatrix = "PeakMatrix".

Integrative pseudo-time analyses

trajMM  <- getTrajectory(ArchRProj = proj6,
                         name = "JG",
                         useMatrix = "cisbp_Motif_JGMatrix",
                         scaleTo=NULL,
                         log2Norm = FALSE,
                         threads = 1)
p1 <- plotTrajectoryHeatmap(trajMM,
                            labelRows=TRUE,
                            scaleRows=TRUE,
                            labelTop=60,
                            pal = paletteContinuous(set = "solarExtra"))

trajGSM <- getTrajectory(ArchRProj = proj6,
                         name = "JG",
                         useMatrix = "GeneScoreMatrix_no_ribosomal",
                         log2Norm = TRUE,
                         threads = 1)
p2 <- plotTrajectoryHeatmap(trajGSM,
                            labelRows=TRUE,
                            labelTop=60,
                            pal = paletteContinuous(set = "horizonExtra"))

trajGIM <- getTrajectory(ArchRProj = proj6, 
                         name = "JG",
                         useMatrix = "GeneIntegrationMatrix_LSI_Harmony",
                         log2Norm = FALSE,
                         threads = 1)
p3 <- plotTrajectoryHeatmap(trajGIM,
                            labelTop=60,
                            pal = paletteContinuous(set = "blueYellow"))

trajPM  <- getTrajectory(ArchRProj = proj6,
                         name = "JG",
                         useMatrix = "PeakMatrix",
                         log2Norm = TRUE,
                         threads = 1)
p4 <- plotTrajectoryHeatmap(trajPM,
                            labelTop=60,
                            pal = paletteContinuous(set = "solarExtra"))
# Plot all together:
plotPDF(p1, p2, p3, p4, name = paste0(sample_name,
                                      "_JG",
                                      "_Trajectory_Heatmaps_",
                                      Sys.Date(), ".pdf"),
        ArchRProj = proj6, addDOC = FALSE, width = 6, height = 8)

# GeneScoreMatrix with MotifMatrix
corGSM_MM <- correlateTrajectories(trajGSM, trajMM,
                                   log2Norm1 = TRUE,
                                   log2Norm2 = FALSE)
trajGSM_2 <- trajGSM[corGSM_MM[[1]]$name1, ]
trajMM_2  <- trajMM[corGSM_MM[[1]]$name2, ]
trajCombined <- trajGSM_2
assay(trajCombined, withDimnames=FALSE) <- (
    t(apply(assay(trajGSM_2), 1, scale)) +
    t(apply(assay(trajMM_2), 1, scale)))
combinedMat <- plotTrajectoryHeatmap(trajCombined,
                                     returnMat = TRUE, varCutOff = 0)
rowOrder <- match(rownames(combinedMat), rownames(trajGSM_2))
ht1 <- plotTrajectoryHeatmap(trajGSM_2,
                             pal = paletteContinuous(set = "horizonExtra"),
                             varCutOff = 0,
                             rowOrder = rowOrder,
                             labelTop=60)
ht2 <- plotTrajectoryHeatmap(trajMM_2,
                             pal = paletteContinuous(set = "solarExtra"),
                             varCutOff = 0,
                             rowOrder = rowOrder,
                             labelTop=60)
svglite(paste0(sample_name, "_JG", "_",
           "GeneScoreMatrix-v-MotifMatrix_IntegrativePseudotimeHeatmaps_",
           Sys.Date(), ".svg"))
print(ht1 + ht2)
dev.off()

## GeneIntegrationMatrix_LSI_Harmony with MotifMatrix
corGIM_MM <- correlateTrajectories(trajGIM, trajMM,
                                   log2Norm1 = FALSE,
                                   log2Norm2 = FALSE)
trajGIM_2 <- trajGIM[corGIM_MM[[1]]$name1, ]
trajMM_2  <- trajMM[corGIM_MM[[1]]$name2, ]

trajCombined <- trajGIM_2
assay(trajCombined, withDimnames = FALSE) <- (
    t(apply(assay(trajGIM_2), 1, scale)) + 
    t(apply(assay(trajMM_2), 1, scale)))

combinedMat <- plotTrajectoryHeatmap(trajCombined,
                                     returnMat = TRUE, varCutOff = 0) 
rowOrder <- match(rownames(combinedMat), rownames(trajGIM_2))
ht1 <- plotTrajectoryHeatmap(trajGIM_2,
                             pal = paletteContinuous(set = "blueYellow"),
                             varCutOff = 0,
                             rowOrder = rowOrder)
ht2 <- plotTrajectoryHeatmap(trajMM_2,
                             pal = paletteContinuous(set = "solarExtra"),
                             varCutOff = 0,
                             rowOrder = rowOrder)
svglite(paste0(sample_name, "_JG", "_", 
           "GeneIntegrationMatrix-v-MotifMatrix_IntegrativePseudotimeHeatmaps_",
           Sys.Date(), ".svg"))
print(ht1 + ht2)
dev.off()

Save progress...

save.image(paste0(sample_name, "_TrajectoryCorrelation_", Sys.Date(), ".RData"))