8a. Write scRNA-seq cluster marker genes to files

Yields 21 clusters after hemoglobin containing cells removed.

for (i in 0:21) {
    name <- paste0(sample_name,
                   "_scRNA-seq_Cluster-", i, "_Markers_", Sys.Date(), ".csv")
    cluster_markers <- as.data.table(
        FindMarkers(seurat, ident.1 = i,
                    min.pct = 0.25), keep.rownames=TRUE)
    fwrite(cluster_markers, name)
}

8b. Plot scRNA-seq PCA plot

svglite(paste0(sample_name, "_scRNA-seq_DimPlot-PCA_", Sys.Date(), ".svg"))
DimPlot(seurat, reduction = "pca")
dev.off()

# Raw counts
VlnPlot(seurat, features = c("Ren1", "Foxd1"), slot = "counts", log=TRUE)

svglite(paste0(sample_name,
           "_scRNA-seq_ViolinPlot-Ren1-Akr1b7-Foxd1_",
           Sys.Date(),
           ".svg"))
VlnPlot(seurat, features = c("Ren1", "Foxd1", "Akr1b7"))
dev.off()

svglite(paste0(sample_name,
           "_scRNA-seq_FeaturePlot-Ren1-Akr1b7-Foxd1_",
           Sys.Date(),
           ".svg"))
FeaturePlot(seurat, features = c("Ren1", "Foxd1", "Akr1b7"))
dev.off()

8c. Generate scRNA-seq marker gene lists

# Only positive markers
all_pos_markers <- FindAllMarkers(seurat, only.pos = TRUE,
                                  min.pct = 0.25, logfc.threshold = 0.25)

top10_pos <- data.table(all_pos_markers %>% group_by(cluster) %>%
    top_n(n = 10, wt = avg_log2FC))
top25_pos <- data.table(all_pos_markers %>% group_by(cluster) %>%
    top_n(n = 25, wt = avg_log2FC))
top50_pos <- data.table(all_pos_markers %>% group_by(cluster) %>%
    top_n(n = 50, wt = avg_log2FC))

fwrite(top10_pos,
    paste0(sample_name, "_scRNA-seq_Top10PositiveMarkers_", Sys.Date(), ".csv"),
    sep=",")
fwrite(top25_pos,
    paste0(sample_name, "_scRNA-seq_Top25PositiveMarkers_", Sys.Date(), ".csv"),
    sep=",")
fwrite(top50_pos,
    paste0(sample_name, "_scRNA-seq_Top50PositiveMarkers_", Sys.Date(), ".csv"),
    sep=",")

# Feature expression heatmap
svglite(paste0(sample_name,
           "_scRNA-seq_DoHeatmap_Top10Positive_",
           Sys.Date(), ".svg"),
    height=20, width=25)
DoHeatmap(seurat, features = top10_pos$gene, size=8, draw.lines=T) + NoLegend()
dev.off()

all_pos_markers_ROC <- FindAllMarkers(seurat, only.pos = TRUE, test.use = "roc",
                                  min.pct = 0.25, logfc.threshold = 0.25)

top10_ROC_pos <- data.table(all_pos_markers_ROC %>% group_by(cluster) %>%
    top_n(n = 10, wt = avg_log2FC))
top25_ROC_pos <- data.table(all_pos_markers_ROC %>% group_by(cluster) %>%
    top_n(n = 25, wt = avg_log2FC))
top50_ROC_pos <- data.table(all_pos_markers_ROC %>% group_by(cluster) %>%
    top_n(n = 50, wt = avg_log2FC))

fwrite(top10_ROC_pos,
    paste0(sample_name, "_scRNA-seq_Top10ROCPositiveMarkers_", Sys.Date(), ".csv"),
    sep=",")
fwrite(top25_ROC_pos,
    paste0(sample_name, "_scRNA-seq_Top25ROCPositiveMarkers_", Sys.Date(), ".csv"),
    sep=",")
fwrite(top50_ROC_pos,
    paste0(sample_name, "_scRNA-seq_Top50ROCPositiveMarkers_", Sys.Date(), ".csv"),
    sep=",")

# Feature expression heatmap
svglite(paste0(sample_name,
           "_scRNA-seq_DoHeatmap_Top10ROCPositive_",
           Sys.Date(), ".svg"),
    height=20, width=25)
DoHeatmap(seurat, features = top10_ROC_pos$gene, size=8, draw.lines=T) + NoLegend()
dev.off()

8d. Compare cluster marker gene lists with previously reported markers

Load package and create function for processing marker gene lists.

library("readxl")

splitDataTable <- function(DT, split_factor) {
    if (is.numeric(split_factor)) {
        split_factor = colnames(DT)[split_factor]
        message("Integer split_factor, changed to: ", split_factor)
    }
    lapply( split(1:nrow(DT), DT[, get(split_factor)]), function(x) DT[x])
}

Use cellkb.com

Manually input the Top25 gene markers of each cluster and obtain the top 5 ranked cells and overall ranked cell category to assign cluster identities.

Compare to Gomez' kidney marker genes list to identify cell types

cell_markers <- "WholeKidneyMarkers.xlsx"
kidneyXL <- read_excel(cell_markers)
excel_sheets(cell_markers)
markers <- read_excel(cell_markers, sheet = excel_sheets(cell_markers)[3])
markers <- as.data.table(markers)

cell_list <- splitDataTable(markers, "Cell Type")
rm(marker_tbl)
for (cell in names(cell_list)) {
    cell_type <- cell_list[[cell]]
    merged    <- merge(all_pos_markers_ROC,
                       cell_type,
                       by.x="gene",
                       by.y="Marker Gene Symbol")
    #print(merged)
    if (!is.null(merged$cluster)) {
        tbl       <- data.table(table(merged$cluster))
        col_pos   <- ncol(tbl)
        colnames(tbl)[col_pos] <- cell
        if (exists("marker_tbl")) {
            marker_tbl <- merge(marker_tbl, tbl, by="V1")
        } else {
            marker_tbl <- tbl
        }
    }
}
colnames(marker_tbl)[1] <- "cluster"
fwrite(marker_tbl,
    paste0(sample_name, "_WholeKidneyMarkers_", Sys.Date(), ".csv"), sep=",")

Compare to Cao et al. 2018

cell_markers <- "aau0730_tabless1_s13.xlsx"
kidneyXL <- read_excel(cell_markers)
excel_sheets(cell_markers)
tmp <- read_excel(cell_markers, sheet = excel_sheets(cell_markers)[8])
header <- as.character(tmp[1,])
markers <- as.data.table(tmp[-1,])
colnames(markers) <- header
markers <- as.data.table(markers)

cell_list <- splitDataTable(markers, "max.tissue")
rm(marker_tbl)
for (cell in names(cell_list)) {
    cell_type <- cell_list[[cell]]
    merged    <- merge(all_pos_markers_ROC,
                       cell_type,
                       by.x="gene",
                       by.y="gene_short_name")
    #print(merged)
    if (!is.null(merged$cluster)) {
        tbl       <- data.table(table(merged$cluster))
        col_pos   <- ncol(tbl)
        colnames(tbl)[col_pos] <- cell
        if (exists("marker_tbl")) {
            marker_tbl <- merge(marker_tbl, tbl, by="V1")
        } else {
            marker_tbl <- tbl
        }
    }
}
colnames(marker_tbl)[1] <- "cluster"
fwrite(marker_tbl,
    paste0(sample_name, "_Cao2018_", Sys.Date(), ".csv"), sep=",")

Compare to Marker genes from Coombes et al. (Development 2019) & Park et al. (Science 2018)

I manually rearranged Sheet2 into Sheet3 for ease of import

cell_markers <- "41467_2021_22266_MOESM5_ESM.xlsx"
kidneyXL <- read_excel(cell_markers)
excel_sheets(cell_markers)
markers <- read_excel(cell_markers, sheet = excel_sheets(cell_markers)[3])
markers <- as.data.table(markers)

cell_list <- splitDataTable(markers, "cell")
rm(marker_tbl)
for (cell in names(cell_list)) {
    cell_type <- cell_list[[cell]]
    merged    <- merge(all_pos_markers_ROC, cell_type, by.x="gene", by.y="gene")
    if (!is.null(merged$cluster)) {
        tbl       <- data.table(table(merged$cluster))
        col_pos   <- ncol(tbl)
        colnames(tbl)[col_pos] <- cell
        if (exists("marker_tbl")) {
            marker_tbl <- merge(marker_tbl, tbl, by="V1")
        } else {
            marker_tbl <- tbl
        }
    }
}
colnames(marker_tbl)[1] <- "cluster"
fwrite(marker_tbl,
    paste0(sample_name, "_CoombesPark_", Sys.Date(), ".csv"), sep=",")

8e. Assign cluster annotations based on the combined results from above

new.cluster.ids <- c("cortical_stroma_and_EPIs",
                     "MED_stroma_and_FBs",
                     "proliferating_stroma",
                     "MED_and_cortical_stroma_and_FBs",
                     "mesenchymal_progenitors",
                     "PCs_VSMCs_and_JGs",
                     "proliferating_stroma_and_PODs",
                     "proliferating_CD",
                     "ITSs_and_FBs",
                     "MCs_VSMCs_and_JGs",
                     "proliferating_stroma_and_tubule_cells",
                     "ureteric_epithelium",
                     "CD_stroma_and_FBs",
                     "PCTs",
                     "JGs_MCs_and_PCs",
                     "POD_1",
                     "proliferating_stroma_and_ENDOs",
                     "glomerular_ENDOs",
                     "POD_2",
                     "proliferating_myeloid_and_stromal_cells",
                     "immune_cells",
                     "ureteric_stroma_and_FBs")

8f. UMAP visualization of scRNA-seq clusters with annotations

names(new.cluster.ids) <- levels(seurat)
seurat_named <- RenameIdents(seurat, new.cluster.ids)
svglite(paste0(sample_name, "_scRNA-seq_UMAP_named_", Sys.Date(), ".svg"),
    height=10, width=10)
DimPlot(seurat_named, reduction = "umap", label = TRUE, pt.size = 0.5) + NoLegend()
dev.off()

# View ren1 expression
svglite(paste0(sample_name, "_scRNA-seq_VinPlot_Ren1_", Sys.Date(), ".svg"))
VlnPlot(seurat_named, features = c("Ren1"))
dev.off()
svglite(paste0(sample_name, "_scRNA-seq_VinPlot_Akr1b7_", Sys.Date(), ".svg"))
VlnPlot(seurat_named, features = c("Akr1b7"))
dev.off()
svglite(paste0(sample_name, "_scRNA-seq_VinPlot_Foxd1_", Sys.Date(), ".svg"))
VlnPlot(seurat_named, features = c("Foxd1"))
dev.off()

# View Ren1 on UMAP
svglite(paste0(sample_name, "_scRNA-seq_UMAP_Ren1_", Sys.Date(), ".svg"))
FeaturePlot(seurat_named, features = c("Ren1"))
dev.off()
svglite(paste0(sample_name, "_scRNA-seq_UMAP_Akr1b7_", Sys.Date(), ".svg"))
FeaturePlot(seurat_named, features = c("Akr1b7"))
dev.off()
svglite(paste0(sample_name, "_scRNA-seq_UMAP_Foxd1_", Sys.Date(), ".svg"))
FeaturePlot(seurat_named, features = c("Foxd1"))
dev.off()

# Heatmap of top10 log2FC genes by cluster
svglite(paste0(sample_name, "_scRNA-seq_Top10Heatmap_", Sys.Date(), ".svg"),
    height=25, width=25)
DoHeatmap(seurat_named, features = top10_ROC_pos$gene) + NoLegend()
dev.off()

save.image(paste0(sample_name, "_scRNA-seq_", Sys.Date(), ".RData"))