8a. Write scRNA-seq cluster marker genes to files
Yields 21 clusters after hemoglobin containing cells removed.
for (i in 0:21) {
name <- paste0(sample_name,
"_scRNA-seq_Cluster-", i, "_Markers_", Sys.Date(), ".csv")
cluster_markers <- as.data.table(
FindMarkers(seurat, ident.1 = i,
min.pct = 0.25), keep.rownames=TRUE)
fwrite(cluster_markers, name)
}
8b. Plot scRNA-seq PCA plot
svglite(paste0(sample_name, "_scRNA-seq_DimPlot-PCA_", Sys.Date(), ".svg"))
DimPlot(seurat, reduction = "pca")
dev.off()
# Raw counts
VlnPlot(seurat, features = c("Ren1", "Foxd1"), slot = "counts", log=TRUE)
svglite(paste0(sample_name,
"_scRNA-seq_ViolinPlot-Ren1-Akr1b7-Foxd1_",
Sys.Date(),
".svg"))
VlnPlot(seurat, features = c("Ren1", "Foxd1", "Akr1b7"))
dev.off()
svglite(paste0(sample_name,
"_scRNA-seq_FeaturePlot-Ren1-Akr1b7-Foxd1_",
Sys.Date(),
".svg"))
FeaturePlot(seurat, features = c("Ren1", "Foxd1", "Akr1b7"))
dev.off()
8c. Generate scRNA-seq marker gene lists
# Only positive markers
all_pos_markers <- FindAllMarkers(seurat, only.pos = TRUE,
min.pct = 0.25, logfc.threshold = 0.25)
top10_pos <- data.table(all_pos_markers %>% group_by(cluster) %>%
top_n(n = 10, wt = avg_log2FC))
top25_pos <- data.table(all_pos_markers %>% group_by(cluster) %>%
top_n(n = 25, wt = avg_log2FC))
top50_pos <- data.table(all_pos_markers %>% group_by(cluster) %>%
top_n(n = 50, wt = avg_log2FC))
fwrite(top10_pos,
paste0(sample_name, "_scRNA-seq_Top10PositiveMarkers_", Sys.Date(), ".csv"),
sep=",")
fwrite(top25_pos,
paste0(sample_name, "_scRNA-seq_Top25PositiveMarkers_", Sys.Date(), ".csv"),
sep=",")
fwrite(top50_pos,
paste0(sample_name, "_scRNA-seq_Top50PositiveMarkers_", Sys.Date(), ".csv"),
sep=",")
# Feature expression heatmap
svglite(paste0(sample_name,
"_scRNA-seq_DoHeatmap_Top10Positive_",
Sys.Date(), ".svg"),
height=20, width=25)
DoHeatmap(seurat, features = top10_pos$gene, size=8, draw.lines=T) + NoLegend()
dev.off()
all_pos_markers_ROC <- FindAllMarkers(seurat, only.pos = TRUE, test.use = "roc",
min.pct = 0.25, logfc.threshold = 0.25)
top10_ROC_pos <- data.table(all_pos_markers_ROC %>% group_by(cluster) %>%
top_n(n = 10, wt = avg_log2FC))
top25_ROC_pos <- data.table(all_pos_markers_ROC %>% group_by(cluster) %>%
top_n(n = 25, wt = avg_log2FC))
top50_ROC_pos <- data.table(all_pos_markers_ROC %>% group_by(cluster) %>%
top_n(n = 50, wt = avg_log2FC))
fwrite(top10_ROC_pos,
paste0(sample_name, "_scRNA-seq_Top10ROCPositiveMarkers_", Sys.Date(), ".csv"),
sep=",")
fwrite(top25_ROC_pos,
paste0(sample_name, "_scRNA-seq_Top25ROCPositiveMarkers_", Sys.Date(), ".csv"),
sep=",")
fwrite(top50_ROC_pos,
paste0(sample_name, "_scRNA-seq_Top50ROCPositiveMarkers_", Sys.Date(), ".csv"),
sep=",")
# Feature expression heatmap
svglite(paste0(sample_name,
"_scRNA-seq_DoHeatmap_Top10ROCPositive_",
Sys.Date(), ".svg"),
height=20, width=25)
DoHeatmap(seurat, features = top10_ROC_pos$gene, size=8, draw.lines=T) + NoLegend()
dev.off()
8d. Compare cluster marker gene lists with previously reported markers
Load package and create function for processing marker gene lists.
library("readxl")
splitDataTable <- function(DT, split_factor) {
if (is.numeric(split_factor)) {
split_factor = colnames(DT)[split_factor]
message("Integer split_factor, changed to: ", split_factor)
}
lapply( split(1:nrow(DT), DT[, get(split_factor)]), function(x) DT[x])
}
Use cellkb.com
Manually input the Top25 gene markers of each cluster and obtain the top 5 ranked cells and overall ranked cell category to assign cluster identities.
Compare to Gomez' kidney marker genes list to identify cell types
cell_markers <- "WholeKidneyMarkers.xlsx"
kidneyXL <- read_excel(cell_markers)
excel_sheets(cell_markers)
markers <- read_excel(cell_markers, sheet = excel_sheets(cell_markers)[3])
markers <- as.data.table(markers)
cell_list <- splitDataTable(markers, "Cell Type")
rm(marker_tbl)
for (cell in names(cell_list)) {
cell_type <- cell_list[[cell]]
merged <- merge(all_pos_markers_ROC,
cell_type,
by.x="gene",
by.y="Marker Gene Symbol")
#print(merged)
if (!is.null(merged$cluster)) {
tbl <- data.table(table(merged$cluster))
col_pos <- ncol(tbl)
colnames(tbl)[col_pos] <- cell
if (exists("marker_tbl")) {
marker_tbl <- merge(marker_tbl, tbl, by="V1")
} else {
marker_tbl <- tbl
}
}
}
colnames(marker_tbl)[1] <- "cluster"
fwrite(marker_tbl,
paste0(sample_name, "_WholeKidneyMarkers_", Sys.Date(), ".csv"), sep=",")
Compare to Cao et al. 2018
cell_markers <- "aau0730_tabless1_s13.xlsx"
kidneyXL <- read_excel(cell_markers)
excel_sheets(cell_markers)
tmp <- read_excel(cell_markers, sheet = excel_sheets(cell_markers)[8])
header <- as.character(tmp[1,])
markers <- as.data.table(tmp[-1,])
colnames(markers) <- header
markers <- as.data.table(markers)
cell_list <- splitDataTable(markers, "max.tissue")
rm(marker_tbl)
for (cell in names(cell_list)) {
cell_type <- cell_list[[cell]]
merged <- merge(all_pos_markers_ROC,
cell_type,
by.x="gene",
by.y="gene_short_name")
#print(merged)
if (!is.null(merged$cluster)) {
tbl <- data.table(table(merged$cluster))
col_pos <- ncol(tbl)
colnames(tbl)[col_pos] <- cell
if (exists("marker_tbl")) {
marker_tbl <- merge(marker_tbl, tbl, by="V1")
} else {
marker_tbl <- tbl
}
}
}
colnames(marker_tbl)[1] <- "cluster"
fwrite(marker_tbl,
paste0(sample_name, "_Cao2018_", Sys.Date(), ".csv"), sep=",")
Compare to Marker genes from Coombes et al. (Development 2019) & Park et al. (Science 2018)
I manually rearranged Sheet2 into Sheet3 for ease of import
cell_markers <- "41467_2021_22266_MOESM5_ESM.xlsx"
kidneyXL <- read_excel(cell_markers)
excel_sheets(cell_markers)
markers <- read_excel(cell_markers, sheet = excel_sheets(cell_markers)[3])
markers <- as.data.table(markers)
cell_list <- splitDataTable(markers, "cell")
rm(marker_tbl)
for (cell in names(cell_list)) {
cell_type <- cell_list[[cell]]
merged <- merge(all_pos_markers_ROC, cell_type, by.x="gene", by.y="gene")
if (!is.null(merged$cluster)) {
tbl <- data.table(table(merged$cluster))
col_pos <- ncol(tbl)
colnames(tbl)[col_pos] <- cell
if (exists("marker_tbl")) {
marker_tbl <- merge(marker_tbl, tbl, by="V1")
} else {
marker_tbl <- tbl
}
}
}
colnames(marker_tbl)[1] <- "cluster"
fwrite(marker_tbl,
paste0(sample_name, "_CoombesPark_", Sys.Date(), ".csv"), sep=",")
8e. Assign cluster annotations based on the combined results from above
new.cluster.ids <- c("cortical_stroma_and_EPIs",
"MED_stroma_and_FBs",
"proliferating_stroma",
"MED_and_cortical_stroma_and_FBs",
"mesenchymal_progenitors",
"PCs_VSMCs_and_JGs",
"proliferating_stroma_and_PODs",
"proliferating_CD",
"ITSs_and_FBs",
"MCs_VSMCs_and_JGs",
"proliferating_stroma_and_tubule_cells",
"ureteric_epithelium",
"CD_stroma_and_FBs",
"PCTs",
"JGs_MCs_and_PCs",
"POD_1",
"proliferating_stroma_and_ENDOs",
"glomerular_ENDOs",
"POD_2",
"proliferating_myeloid_and_stromal_cells",
"immune_cells",
"ureteric_stroma_and_FBs")
8f. UMAP visualization of scRNA-seq clusters with annotations
names(new.cluster.ids) <- levels(seurat)
seurat_named <- RenameIdents(seurat, new.cluster.ids)
svglite(paste0(sample_name, "_scRNA-seq_UMAP_named_", Sys.Date(), ".svg"),
height=10, width=10)
DimPlot(seurat_named, reduction = "umap", label = TRUE, pt.size = 0.5) + NoLegend()
dev.off()
# View ren1 expression
svglite(paste0(sample_name, "_scRNA-seq_VinPlot_Ren1_", Sys.Date(), ".svg"))
VlnPlot(seurat_named, features = c("Ren1"))
dev.off()
svglite(paste0(sample_name, "_scRNA-seq_VinPlot_Akr1b7_", Sys.Date(), ".svg"))
VlnPlot(seurat_named, features = c("Akr1b7"))
dev.off()
svglite(paste0(sample_name, "_scRNA-seq_VinPlot_Foxd1_", Sys.Date(), ".svg"))
VlnPlot(seurat_named, features = c("Foxd1"))
dev.off()
# View Ren1 on UMAP
svglite(paste0(sample_name, "_scRNA-seq_UMAP_Ren1_", Sys.Date(), ".svg"))
FeaturePlot(seurat_named, features = c("Ren1"))
dev.off()
svglite(paste0(sample_name, "_scRNA-seq_UMAP_Akr1b7_", Sys.Date(), ".svg"))
FeaturePlot(seurat_named, features = c("Akr1b7"))
dev.off()
svglite(paste0(sample_name, "_scRNA-seq_UMAP_Foxd1_", Sys.Date(), ".svg"))
FeaturePlot(seurat_named, features = c("Foxd1"))
dev.off()
# Heatmap of top10 log2FC genes by cluster
svglite(paste0(sample_name, "_scRNA-seq_Top10Heatmap_", Sys.Date(), ".svg"),
height=25, width=25)
DoHeatmap(seurat_named, features = top10_ROC_pos$gene) + NoLegend()
dev.off()
save.image(paste0(sample_name, "_scRNA-seq_", Sys.Date(), ".RData"))