Plot bubble plots (dot plots)

Generate side-by-side expression and accessibility plots of genes of interest, including markers of relevant pathways in the kidney.

18a. Generate helper functions

logNormalize <- function(
  data,
  scale.factor = 1e4,
  margin = 2L,
  verbose = TRUE,
  ...
) {
    data <- as(object = data, Class = "dgCMatrix")
    norm.data <- Seurat:::LogNorm(data, scale_factor = scale.factor, display_progress = verbose)
    colnames(x = norm.data) <- colnames(x = data)
    rownames(x = norm.data) <- rownames(x = data)
    return(norm.data)
}

calcBubble <- function(meta_data=NULL,
                       mat=NULL,
                       row_data=NULL,
                       gene_list=NULL,
                       group_by=NULL,
                       col.min=-2.5,
                       col.max=2.5,
                       normalize=TRUE,
                       scale_factor = 10000,
                       ...) {
    if(normalize) {
        #exprs_matrix <- apply(X = mat, MARGIN = 2, FUN = logNormalize, factor = scale_factor)
        exprs_matrix <- Seurat::NormalizeData(mat, scale.factor = scale_factor, ...)
    } else {
        exprs_matrix <- mat
    }
    exp_dt <- as.data.table(exprs_matrix)
    gene_names      <- row_data
    geneXexp  <- cbind(gene_names, exp_dt)
    t_exp <- as.data.frame(t(as.data.frame(geneXexp[,2:length(colnames(geneXexp))])))
    colnames(t_exp) <- gene_names$name
    rownames(t_exp) <- colnames(geneXexp)[2:length(colnames(geneXexp))]
    for(i in rownames(t_exp)){
      t_exp[as.character(i),"Clusters"] <- meta_data$clusters[which(meta_data$cellname==as.character(i))] # or meta_data$named_cluster or meta_data$group
      t_exp[as.character(i),"Samples"]  <- meta_data$cellname[which(meta_data$cellname==as.character(i))]
    }

    summary_dt <- data.table(gene_name = unique(gene_list),
                             idents = rep(unique(t_exp$Clusters), each=length(unique(gene_list))),
                             avg_exp = numeric(length(rep(unique(t_exp$Clusters), each=length(unique(gene_list))))),
                             pct_exp = numeric(length(rep(unique(t_exp$Clusters), each=length(unique(gene_list))))),
                             z_score = numeric(length(rep(unique(t_exp$Clusters), each=length(unique(gene_list))))))
    for (pop in unique(t_exp$Clusters)) {
        pop_mat <- t_exp[t_exp$Clusters == pop, unique(gene_list)]
        avg_exp <- apply(
            X = pop_mat,
            MARGIN = 2,
            FUN = function(x) {
              return(mean(x = expm1(x = x)))
            }
          )
        pct_exp <- apply(
            X = pop_mat,
            MARGIN = 2,
            FUN = function(x, cutoff) {
                return ((sum(x > cutoff, na.rm = T) / length(x))*100)
            },
            cutoff = 0
          )
        summary_dt[idents == pop,]$avg_exp <- avg_exp
        summary_dt[idents == pop,]$pct_exp <- pct_exp
    }
    for (gene in unique(summary_dt$gene_name)) {
        gene_exp <- summary_dt[gene_name == gene, avg_exp]
        z_score <- scale(log1p(gene_exp))
        summary_dt[gene_name == gene,]$z_score <- z_score
    }

    summary_dt$avg_exp[is.nan(summary_dt$avg_exp)] <- 0
    summary_dt$avg_exp[is.na(summary_dt$avg_exp)] <- 0
    summary_dt$z_score[is.nan(summary_dt$z_score)] <- 0
    summary_dt$z_score[is.na(summary_dt$z_score)] <- 0

    # Subset by group
    if(!is.null(group_by)) {
        summary_dt <- summary_dt[summary_dt$idents %in% group_by,]
        summary_dt$idents  <- ordered(summary_dt$idents, levels = group_by)
    }
    # Order by provided gene_list
    summary_dt$gene_name <- ordered(summary_dt$gene_name, levels = gene_list)

    summary_dt[z_score > col.max,]$z_score <- col.max
    summary_dt[z_score < col.min,]$z_score <- col.min

    return(summary_dt)
}

plotBubble <- function(info=NULL,
                       calc_limit=TRUE,
                       lower_limit=-2.5,
                       upper_limit=2.5,
                       color_low = "#005AB5",
                       color_mid = "#ffffff",
                       color_high = "#DC3220") {
    color_title <- "Expression"
    size_title  <- "Expressed"

    if ("z_score" %in% names(info)) {
        color_column = "z_score"
        color_title = paste0(color_title, " Z-score")
    } else if ("avg_exp" %in% names(info)) {
        color_column = "avg_exp"
        color_title = paste0('Average ', color_title)
    } else {
        stop("Could not determine the column containing expression or accessibility values")
    }
    # Determine color limits
    if (calc_limit) {
        lower_limit <- min(floor(range(info[[color_column]])))
        upper_limit <- max(ceiling(range(info[[color_column]])))
    } else {
        lower_limit <- lower_limit
        upper_limit <- upper_limit
    }
    color_range <- c(lower_limit, upper_limit)
    g <- ggplot(data = info, aes(x = .data[["gene_name"]], y = .data[["idents"]])) +
          geom_point(aes(size = .data[["pct_exp"]], color = .data[[color_column]]),
                     position = position_nudge(x = 0, y = 0)) +
          scale_colour_gradient2(name=color_title,
                                 low=color_low,  # previous #352A86
                                 mid=color_mid,  # previous #2DB7A3
                                 high=color_high, # previous #F8FA0D
                                 midpoint = (color_range[-length(color_range)] + color_range[-1L])/2,
                                 limits=c(lower_limit, upper_limit)) +
          theme(axis.title.x = element_blank(), axis.title.y = element_blank()) +
          guides(size = guide_legend(title = paste0('% ', size_title))) +
          scale_radius(limits=c(0,100),
                       breaks=c(0,25,50,75,100)) +
          labs(
            x = 'gene_name',
            y = 'idents'
          ) +
          theme(panel.background = element_rect(fill='transparent'),
                plot.background = element_rect(fill='transparent', color=NA),
                panel.grid.major = element_blank(),
                panel.grid.minor = element_blank(),
                legend.background = element_rect(fill='transparent'),
                legend.box.background = element_rect(fill='transparent'),
                axis.line = element_line(colour = "black")) +
          theme(axis.text.x=element_text(angle = 90, vjust = 0.5, hjust = 0))

    return(g)
}

calcPlotBubble <- function(plotname, markers) {
    expr_info <- calcBubble(ArchRProj=proj6,
                            mat=gim_p6_impute,
                            row_data=rowData(gim_p6),
                            gene_list=unique(markers),
                            group_by=trajectory,
                            col.max=2,
                            col.min=-2,
                            z_transform=TRUE)
    acc_info <- calcBubble(ArchRProj=proj6,
                           mat=gsm_p6_impute,
                           row_data=rowData(gsm_p6),
                           gene_list=unique(markers),   
                           group_by=trajectory,
                           col.max=2,
                           col.min=-2,
                           z_transform=TRUE)
    p <- plotBubble(expr_info=expr_info,
                    acc_info=acc_info,
                    name=plotname)
    if (!file.exists(paste0("sc_kidney_BubblePlot_trajectory_", plotname,
                            "_", Sys.Date(), ".svg"))) {
        svglite(paste0("sc_kidney_BubblePlot_trajectory_", plotname,
                   "_", Sys.Date(), ".svg"),
                width=30, height=8,
                system_fonts = list(sans = "Arial"))
        print(p)
        dev.off()           
    }
}

18b. Extract gene expression and gene activity matrices

gim_p6 <- getMatrixFromProject(proj6, "GeneIntegrationMatrix_LSI_Harmony")
gim_p6_impute <- imputeMatrix(assay(gim_p6), getImputeWeights(proj6))

gsm_p6 <- getMatrixFromProject(proj6, "GeneScoreMatrix_no_ribosomal")
gsm_p6_impute <- imputeMatrix(assay(gsm_p6), getImputeWeights(proj6))

18c. Plot JG and SMC marker genes on bubble plot

jg_vsmc_markers <- c("Foxd1", "Ren1", "Akr1b7",
                     "Acta2", "Myh11", "Cnn1", "Smtn", "Tagln")
expr_info <- calcBubble(ArchRProj=proj6,
                        mat=gim_p6_impute,
                        row_data=rowData(gim_p6),
                        gene_list=unique(jg_vsmc_markers),
                        group_by=trajectory,
                        col.max=2,
                        col.min=-2,
                        z_transform=TRUE)
acc_info <- calcBubble(ArchRProj=proj6,
                       mat=gsm_p6_impute,
                       row_data=rowData(gsm_p6),
                       gene_list=unique(jg_vsmc_markers),   
                       group_by=trajectory,
                       col.max=2,
                       col.min=-2,
                       z_transform=TRUE)
plotname <- "Impute_Z-Score_JG-SMC_Markers"
p <- plotBubble(expr_info=expr_info,
                acc_info=acc_info,
                name=plotname)

svglite(paste0("sc_kidney_BubblePlot_trajectory_", plotname, ".svg"),
            width=30, height=8,
            system_fonts = list(sans = "Arial"))
print(p)
dev.off()

18d. Plot markers of kidney cell populations

Create plot of each cell marker grouping

plotnames <- c("Collecting_Duct_Principal_Cells",
               "Intercalated_A_Cells",
               "Intercalated_B_Cells",
               "Inner_Medullary_Collecting_Duct",
               "Connecting_Tubule",
               "Distal_Convoluted_Tubule",
               "Endothelial_Cell",
               "Fenestrated_EC",
               "Lymphatic_EC",
               "Fibroblast",
               "Granular_Cell",
               "Cortical_Interstitial_Cell",
               "Short_Loop_Descending_Limb",
               "Long_Descending_Limb_Outer_Medulla",
               "Long_Descending_Limb_Inner_Medulla",
               "Thin_Ascending_Limb",
               "Thick_Ascending_Limb",
               "Macula_Densa",
               "Mast_Cell",
               "Mesangial_Cell",
               "Pericyte",
               "Podocyte",
               "Proximal_General",
               "Proximal_S1",
               "Proximal_S2",
               "Proximal_S3",
               "Smooth_Muscle_Cell",
               "Transitional_Epithelium")
collecting_duct_principal_cells <- c("Acer2", "Aqp2", "Aqp3", "Aqp4", "Avpr2",
                                     "Fxyd4", "Fzd1", "Gata3", "Lgals3",
                                     "Ptges", "Stc1", "Tmem45b", "Tspan1")
intercalated_A_cells <- c("Aqp6", "Dmrt2", "P2ry14", "Slc4a1")
intercalated_B_cells <- c("Ap1s3", "Insrr", "Slc26a4")
inner_medullary_collecting_duct <- c("S100a4", "Akr1b8", "Gpha2", "Plekhb1",
                                     "Ccl2", "Aldh1a3", "Ccl7", "Slfn2",
                                     "Tm6sf1", "Fam20a", "Slc14a2", "Elf5")
connecting_tubule <- c("Calb1", "Ccl20", "Defb9", "Fam89a", "Stap1", "Trpv5")
distal_convoluted_tubule <- c("Cdc42ep3", "Clec4a3", "Fetub", "Gfra1", "Kcng1",
                              "Mlana", "Pcolce", "Plekho1", "Sgca", "Slc12a3")
endothelial_cell <- c("Adgrl4", "Emcn", "Eng", "Pecam1", "Plac8", "Plvap",
                      "Tek", "Tspan8", "Kdr", "Ets1")
fenestrated_ec <- c("Plvap", "Pecam1", "Cd34")
lymphatic_ec <- c("Cd34", "Pdpn", "Pecam1", "Prox1", "Vwf")                              
fibroblast <- c("Col1A1", "Nt5e", "Pdgfrb", "S100a4")
granular_cell <- c("Adrb1", "Ren1", "Agtr1a")
cortical_interstitial_cell <- c("Foxd1", "Nt5e", "Epo")
short_loop_descending_limb <- c("Rgs5", "Cox4i2", "Cyp4b1", "Parvb", "Sh3rf2",
                                "Hey2")
long_descending_limb_outer_medulla <- c("Nkain4", "Adamts5")
long_descending_limb_inner_medulla <- c("Cxcl1", "Gem", "Fga", "Pla2g7",
                                        "Thbs1", "Enc1", "Spsb1", "Arid5a",
                                        "Cd40", "Slc14a2")
thin_ascending_limb <- c("Nrgn", "Rnf182", "Pcsk9", "Dram1", "Mir24-2", "Arc",
                         "Nek6", "Cdkn1c", "Calcrl", "Clcnka")
thick_ascending_limb <- c("Umod", "Slc12a1", "Smarcd3", "Kcnt1", "Ptgs2",
                          "Irx2", "Ppp1r1b", "Lipg", "Tnni1", "Lrrc66", "Igf1")
macula_densa <- c("Ptgs2", "Nos1", "Oxtr")
mast_cell <- c("Fcer1a", "Mcemp1", "Milr1")
mesangial_cell <- c("Arid5b", "Ccr7", "Fn1", "Gata3", "Itga8", "Pawr",
                    "Pdgfrb", "Ptn", "Serpine2", "Tbx18", "Tek", "Thy1")
pericyte <- c("Acta2", "Alpl", "Angpt2", "Cspg4", "Mcam", "Mfge8", "Nt5e",
              "Tbx18", "Tek")
podocyte <- c("Nphs1", "Nphs2", "Wt1", "Cd2ap", "Podxl")
proximal_general <- c("Gsta2", "Agxt2", "Cyp2e1", "Cryl1", "Glyat", "Sord",
                      "Pdzk1", "Upb1", "Sod3", "Hnf4a")
proximal_s1 <- c("Nme4", "Apoe", "Slc5a2")
proximal_s2 <- c("Slc5a1", "Slco1a6", "Hgd", "Slc17a3", "Osgin1", "Hsd11b1",
                 "Serpinf2", "Kap", "Haao", "Slc7a13", "Ppic")
proximal_s3 <- c("Tff3", "Gng13", "Slc23a3", "Gc", "Cacng5", "Tgfbi",
                 "Slc22a13", "Rbp4")
smooth_muscle_cell <- c("Acta2", "Cnn1", "Myh11", "Smtn", "Tagln")
transitional_epithelium <- c("Upk1a", "Upk1b", "Upk2", "Upk3a", "Upk3b")

marker_list <- list(collecting_duct_principal_cells,
                    intercalated_A_cells,
                    intercalated_B_cells,
                    inner_medullary_collecting_duct,
                    connecting_tubule,
                    distal_convoluted_tubule,
                    endothelial_cell,
                    fenestrated_ec,
                    lymphatic_ec,
                    fibroblast,
                    granular_cell,
                    cortical_interstitial_cell,
                    short_loop_descending_limb,
                    long_descending_limb_outer_medulla,
                    long_descending_limb_inner_medulla,
                    thin_ascending_limb,
                    thick_ascending_limb,
                    macula_densa,
                    mast_cell,
                    mesangial_cell,
                    pericyte,
                    podocyte,
                    proximal_general,
                    proximal_s1,
                    proximal_s2,
                    proximal_s3,
                    smooth_muscle_cell,
                    transitional_epithelium)

names(marker_list) <- plotnames

for (i in 1:length(names(marker_list))) {
    calcPlotBubble(names(marker_list[i]), marker_list[[i]])
}

18e. Plot endocrine versus contractile markers along the trajectory

Endocrine markers

  • https://www.sciencedirect.com/science/article/pii/S0021925820339090
  • genes directly related to the endocrine phenotype (Ren1, Akr1b7, Sdf2l1, Spink8, Atp6v1g3)
Endocrine markers
Hopx
Fgf21
Fgf23
Kl
Cyp24a1
Mettl1
Mettl21b
Pth

Contractile markers

  • cells exposed to high perfusion pressure maintained a contractile phenotype (Vtn, Ckmt1)
Contractile markers
Actn3
Cald1
Myh9
endocrine   <- c("Creb1", "Rbpj", "Ets1", "Hoxd8", "Nr2f6", "Hoxb8", "Vdr",  
                 "Fgf21", "Fgf23", "Kl", "Cyp27b1", "Cyp24a1", "Pth",
                 "Mettl21b", "Mettl1", "Thra", "Thrb")
contractile <- c("Nkx2-4", "Nkx2-5", "Nkx2-3", "Myod1", "Nfatc4", "Klf6",
                 "Klf2", "Mef2c", "Nkx3-1", "Myocd", "Klf15", "Srf", "Klf4",
                 "Actn3", "Acta2", "Cald1", "Myh9", "Myh11", "Cnn1", "Hopx")

expr_info <- calcBubble(ArchRProj=proj6,
                        mat=gim_p6_impute,
                        row_data=rowData(gim_p6),
                        gene_list=unique(c(endocrine, contractile)),
                        group_by=trajectory,
                        col.max=2,
                        col.min=-2,
                        z_transform=TRUE)
acc_info <- calcBubble(ArchRProj=proj6,
                       mat=gsm_p6_impute,
                       row_data=rowData(gsm_p6),
                       gene_list=unique(c(endocrine, contractile)),   
                       group_by=trajectory,
                       col.max=2,
                       col.min=-2,
                       z_transform=TRUE)
plotname <- "Endocrine-v-Contractile"
p <- plotBubble(expr_info=expr_info,
                acc_info=acc_info,
                name=plotname)

svglite(paste0("sc_kidney_BubblePlot_trajectory_", plotname,
               "_", Sys.Date(), ".svg"),
            width=36, height=8,
            system_fonts = list(sans = "Arial"))
print(p)
dev.off()

18f. Plot genes identified in earlier study

renin_enriched <- c("Rgs5", "Crip1", "Atp1b2", "Syne2", "Plac9", "Myh11",
                    "Jph2", "Myo18", "Mgp", "Ren1", "Akr1b7")
renin_transcriptional_control <- c("Tbp", "Hoxd10", "Tcfap2", "Arid5b", "Cdx1",
                                   "Creb", "Esrrg", "Gata3", "Gfi1b", "Glis2",
                                   "Cbfa2t3", "Hand1", "Klf2", "Hoxb8",
                                   "Hoxd8", "Myod1", "Nr2f6", "Rbpj", "Cbf1",
                                   "Vdr", "Rfx2", "Nr4a1")
smc_transcriptional_control <- c("Tcf3", "Ikzf3", "Myocd", "Nfatc4", "Nkx2.3",
                                 "Nkx2.4", "Nkx2.5", "Nkx3.1", "Srf")
calcium_control <- c("Kcna3", "Jph2", "Pln", "Trpc6", "Trdn", "Plm", "Hrc",
                     "Mga", "Ptp4a3", "Sfrp2")

calcPlotBubble("Confer_JG", unique(renin_enriched))
calcPlotBubble("Confer_JG_trascriptional_ctrl", unique(renin_transcriptional_control))
calcPlotBubble("Confer_SMC_trascriptional_ctrl", unique(smc_transcriptional_control))
calcPlotBubble("Confer_Ca2+_ctrl", unique(calcium_control))

18g. Plot Kallikrein-Kinin system on bubble plot

kk_genes <- c("Bdkrb1", "Bdkrb2", "Kng1", "Klk1", "Klk2")
kk_markers_expr_info <- calcBubble(ArchRProj=proj6,
                                   mat=gim_p6_impute,
                                   row_data=rowData(gim_p6),
                                   gene_list=unique(kk_genes),
                                   group_by=trajectory,
                                   col.max=2,
                                   col.min=-2,
                                   z_transform=TRUE)
kk_markers_acc_info <- calcBubble(ArchRProj=proj6,
                                  mat=gsm_p6_impute,
                                  row_data=rowData(gsm_p6),
                                  gene_list=unique(kk_genes),   
                                  group_by=trajectory,
                                  col.max=2,
                                  col.min=-2,
                                  z_transform=TRUE)
plotname <- "Kallikrein-Kinin_System"
p <- plotBubble(expr_info=kk_markers_expr_info,
                acc_info=kk_markers_acc_info,
                name=plotname)

svglite(paste0("sc_kidney_BubblePlot_trajectory_", plotname,
               "_", Sys.Date(), ".svg"),
            width=30, height=8,
            system_fonts = list(sans = "Arial"))
print(p)
dev.off()

18h. Plot prostaglandin pathway bubble plot

prostaglandin_receptors <- c("Ptgds", "Ptges2", "Ptgfr", "Pgi2", "Tbxa2r",
                             "Ptgs2", "Ptges3", "Ptges3l", "Ptgis", "Ptgs1",
                             "Ptgs2os", "Ptgs2os2")
prostaglandin_expr_info <- calcBubble(ArchRProj=proj6,
                                      mat=gim_p6_impute,
                                      row_data=rowData(gim_p6),
                                      gene_list=unique(prostaglandin_receptors),
                                      group_by=trajectory,
                                      col.max=2,
                                      col.min=-2,
                                      z_transform=TRUE)
prostaglandin_acc_info <- calcBubble(ArchRProj=proj6,
                                     mat=gsm_p6_impute,
                                     row_data=rowData(gsm_p6),
                                     gene_list=unique(prostaglandin_receptors),   
                                     group_by=trajectory,                                   
                                     col.max=2,
                                     col.min=-2,
                                     z_transform=TRUE)
plotname <- "ProstaglandinGenes"
p <- plotBubble(expr_info=prostaglandin_expr_info,
                acc_info=prostaglandin_acc_info,
                name=plotname)

svglite(paste0("sc_kidney_BubblePlot_trajectory_", plotname, ".svg"),
            width=16, height=8,
            system_fonts = list(sans = "Arial"))
print(p)
dev.off()

18i. Plot endothelin pathway bubble plot

https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3005421/

endothelin_markers <- c("Edn1", "Edn2", "Vezf1", "Foxo1", "Prkca", "Prkcb",
                        "Prkcd", "Prkcg", "Tgfb1", "Tgfb2", "Tgfb3", "Hif1a",
                        "Gata2", "Edn3", "Ednra", "Ednrb")
endothelin_expr_info <- calcBubble(ArchRProj=proj6,
                                   mat=gim_p6_impute,
                                   row_data=rowData(gim_p6),
                                   gene_list=unique(endothelin_markers),
                                   group_by=trajectory,
                                   col.max=2,
                                   col.min=-2,
                                   z_transform=TRUE)
endothelin_acc_info <- calcBubble(ArchRProj=proj6,
                                  mat=gsm_p6_impute,
                                  row_data=rowData(gsm_p6),
                                  gene_list=unique(endothelin_markers),   
                                  group_by=trajectory,                                   
                                  col.max=2,
                                  col.min=-2,
                                  z_transform=TRUE)
plotname <- "EndothelinGenes"
p <- plotBubble(expr_info=endothelin_expr_info,
                acc_info=endothelin_acc_info,
                name=plotname)

svglite(paste0("sc_kidney_BubblePlot_trajectory_", plotname, ".svg"),
            width=22, height=8,
            system_fonts = list(sans = "Arial"))
print(p)
dev.off()

18j. Plot markers of each trajectory population on bubble plot

trajectory_markers <- c("Ephx3",         # MCs
                        "Atcayos",       # late_VSMCs
                        "Sgca",          # late_JGs
                        "Glp1r",         # early_VSMCs
                        "Lrrc15",        # early_JGs
                        "8030451O07Rik", # PCs
                        "Gnas",          # MCs_and_PCs
                        "Retn",          # proliferating_mesenchyme_and_PODs
                        "Ifitm1",        # proliferating_MED_stroma_and_FBs
                        "Neil3",         # proliferating_cortical_stroma_and_EPIs
                        "Bcl11a")        # proliferating_stroma_and_PODs
expr_info <- calcBubble(ArchRProj=proj6,
                        mat=gim_p6_impute,
                        row_data=rowData(gim_p6),
                        gene_list=unique(trajectory_markers),
                        group_by=trajectory,
                        col.max=5,
                        col.min=-5,
                        z_transform=TRUE)

expr_info$gene_name <- ordered(expr_info$gene_name,
                               levels = unique(trajectory_markers))

acc_info <- calcBubble(ArchRProj=proj6,
                       mat=gsm_p6_impute,
                       row_data=rowData(gsm_p6),
                       gene_list=unique(trajectory_markers),   
                       group_by=trajectory,                                   
                       col.max=5,
                       col.min=-5,
                       z_transform=TRUE)
acc_info$gene_name <- ordered(acc_info$gene_name,
                              levels = unique(trajectory_markers))
plotname <- "Impute_Z-Score_Trajectory"
p <- plotBubble(expr_info=expr_info,
                acc_info=acc_info,
                name=plotname)
svglite(paste0("sc_kidney_BubblePlot_trajectory_markers.svg"),
            width=12, height=8,
            system_fonts = list(sans = "Arial"))
print(p)
dev.off()