7a. Add scRNA-seq data with Seurat
Load Seurat and set paths to necessary files
library(dplyr)
library(Seurat)
PROJECT_DIR <- "${HOME}"
E12_matrix_path <- file.path(PROJECT_DIR,
"../RNAseq/scRNA_E12/outs/filtered_feature_bc_matrix/")
E18_matrix_path <- file.path(PROJECT_DIR,
"../RNAseq/scRNA_E18/outs/filtered_feature_bc_matrix/")
P5_matrix_path <- file.path(PROJECT_DIR,
"../RNAseq/scRNA_P5/outs/filtered_feature_bc_matrix/")
P30_matrix_path <- file.path(PROJECT_DIR,
"../RNAseq/scRNA_P30/outs/filtered_feature_bc_matrix/")
7b. E12 - initialize and QC
E12_RNA <- Read10X(data.dir = E12_matrix_path)
# Initialize the Seurat object with the raw (non-normalized data).
E12_seurat <- CreateSeuratObject(counts = E12_RNA, project = "E12",
min.cells = 3, min.features = 200)
# The [[ operator can add columns to object metadata.
# This is a great place to stash QC stats
E12_seurat[["percent.mt"]] <- PercentageFeatureSet(E12_seurat, pattern = "^mt-")
# get rid of red blood cells, which have hemoglobin RNAs
E12_seurat[["percent.hemo"]] <- PercentageFeatureSet(object = E12_seurat,
pattern = "^Hb[^(p)]")
# FeatureScatter is typically used to visualize feature-feature relationships,
# but can be used for anything calculated by the object,
# i.e. columns in object metadata, PC scores etc.
plot1 <- FeatureScatter(E12_seurat,
feature1 = "nCount_RNA",
feature2 = "percent.mt")
svglite(paste0("E12", "_scRNA-seq_nCount-MT_", Sys.Date(), ".svg"))
plot1
dev.off()
plot2 <- FeatureScatter(E12_seurat,
feature1 = "nCount_RNA",
feature2 = "nFeature_RNA")
svglite(paste0("E12", "_scRNA-seq_nCount-nFeature_", Sys.Date(), ".svg"))
plot2
dev.off()
plot3 <- FeatureScatter(E12_seurat,
feature1 = "nCount_RNA",
feature2 = "percent.hemo")
svglite(paste0("E12", "_scRNA-seq_nCount-hemoglobin_", Sys.Date(), ".svg"))
plot3
dev.off()
# There is a high percentage of mtDNA in these samples. Setting threshold to 25%
E12_seurat <- subset(E12_seurat, subset = nFeature_RNA > 200 &
nFeature_RNA < 9000 & percent.mt < 20 &
percent.hemo < 5 & percent.hemo > -5)
7c. E18 - initialize and QC
E18_RNA <- Read10X(data.dir = E18_matrix_path)
# Initialize the Seurat object with the raw (non-normalized data).
E18_seurat <- CreateSeuratObject(counts = E18_RNA, project = "E18",
min.cells = 3, min.features = 200)
E18_seurat[["percent.mt"]] <- PercentageFeatureSet(E18_seurat, pattern = "^mt-")
# get rid of red blood cells, which have hemoglobin RNAs
E18_seurat[["percent.hemo"]] <- PercentageFeatureSet(object = E18_seurat,
pattern = "^Hb[^(p)]")
plot1 <- FeatureScatter(E18_seurat,
feature1 = "nCount_RNA",
feature2 = "percent.mt")
svglite(paste0("E18", "_scRNA-seq_nCount-MT_", Sys.Date(), ".svg"))
plot1
dev.off()
plot2 <- FeatureScatter(E18_seurat,
feature1 = "nCount_RNA",
feature2 = "nFeature_RNA")
svglite(paste0("E18", "_scRNA-seq_nCount-nFeature_", Sys.Date(), ".svg"))
plot2
dev.off()
plot3 <- FeatureScatter(E18_seurat,
feature1 = "nCount_RNA",
feature2 = "percent.hemo")
svglite(paste0("E18", "_scRNA-seq_nCount-hemoglobin_", Sys.Date(), ".svg"))
plot3
dev.off()
# There is a high percentage of mtDNA in these samples. Setting threshold to 25%
E18_seurat <- subset(E18_seurat, subset = nFeature_RNA > 200 &
nFeature_RNA < 7000 & percent.mt < 25 &
percent.hemo < 1 & percent.hemo > -1)
7d. P5 - initialize and QC
P5_RNA <- Read10X(data.dir = P5_matrix_path)
P5_seurat <- CreateSeuratObject(counts = P5_RNA, project = "P5",
min.cells = 3, min.features = 200)
P5_seurat[["percent.mt"]] <- PercentageFeatureSet(P5_seurat, pattern = "^mt-")
# get rid of red blood cells, which have hemoglobin RNAs
P5_seurat[["percent.hemo"]] <- PercentageFeatureSet(object = P5_seurat,
pattern = "^Hb[^(p)]")
plot1 <- FeatureScatter(P5_seurat,
feature1 = "nCount_RNA",
feature2 = "percent.mt")
svglite(paste0("P5", "_scRNA-seq_nCount-MT_", Sys.Date(), ".svg"))
plot1
dev.off()
plot2 <- FeatureScatter(P5_seurat,
feature1 = "nCount_RNA",
feature2 = "nFeature_RNA")
svglite(paste0("P5", "_scRNA-seq_nCount-nFeature_", Sys.Date(), ".svg"))
plot2
dev.off()
plot3 <- FeatureScatter(P5_seurat,
feature1 = "nCount_RNA",
feature2 = "percent.hemo")
svglite(paste0("P5", "_scRNA-seq_nCount-hemoglobin_", Sys.Date(), ".svg"))
plot3
dev.off()
# There is a high percentage of mtDNA in these samples. Setting threshold to 25%
P5_seurat <- subset(P5_seurat, subset = nFeature_RNA > 200 &
nFeature_RNA < 9000 & percent.mt < 25 &
percent.hemo < 2)
7e. P30 - initialize and QC
P30_RNA <- Read10X(data.dir = P30_matrix_path)
P30_seurat <- CreateSeuratObject(counts = P30_RNA, project = "P30",
min.cells = 3, min.features = 200)
P30_seurat[["percent.mt"]] <- PercentageFeatureSet(P30_seurat, pattern = "^mt-")
# get rid of red blood cells, which have hemoglobin RNAs
P30_seurat[["percent.hemo"]] <- PercentageFeatureSet(object = P30_seurat,
pattern = "^Hb[^(p)]")
plot1 <- FeatureScatter(P30_seurat,
feature1 = "nCount_RNA",
feature2 = "percent.mt")
svglite(paste0("P30", "_scRNA-seq_nCount-MT_", Sys.Date(), ".svg"))
plot1
dev.off()
plot2 <- FeatureScatter(P30_seurat,
feature1 = "nCount_RNA",
feature2 = "nFeature_RNA")
svglite(paste0("P30", "_scRNA-seq_nCount-nFeature_", Sys.Date(), ".svg"))
plot2
dev.off()
plot3 <- FeatureScatter(P30_seurat,
feature1 = "nCount_RNA",
feature2 = "percent.hemo")
svglite(paste0("P30", "_scRNA-seq_nCount-hemoglobin_", Sys.Date(), ".svg"))
plot3
dev.off()
# There is a high percentage of mtDNA in these samples. Setting threshold to 25%
P30_seurat <- subset(P30_seurat, subset = nFeature_RNA > 200 &
nFeature_RNA < 4500 & percent.mt < 25 &
percent.hemo < 6 & percent.hemo > -6)
7f. Combine and normalize across timepoints
# Normalize the data
E12_seurat <- NormalizeData(E12_seurat)
E18_seurat <- NormalizeData(E18_seurat)
P5_seurat <- NormalizeData(P5_seurat)
P30_seurat <- NormalizeData(P30_seurat)
renin_seurat_normalized <- merge(E12_seurat,
c(E18_seurat, P5_seurat, P30_seurat),
add.cell.ids = c("E12", "E18", "P5", "P30"),
project = "renin", merge.data = TRUE)
7g. Find variable features and visualize
seurat <- FindVariableFeatures(renin_seurat_normalized,
selection.method = "vst", nfeatures = 2000)
all.genes <- rownames(seurat)
no_ribo.genes <- all.genes[!grepl("^Rp[sl][[:digit:]]|^Rplp[[:digit:]]|^Rpsa", all.genes)]
seurat <- ScaleData(seurat, features = no_ribo.genes)
seurat <- RunPCA(seurat, features = VariableFeatures(object = seurat))
svglite(paste0(sample_name, "_scRNA-seq_VizDimLoadings_", Sys.Date(), ".svg"))
VizDimLoadings(seurat, dims = 1:2, reduction = "pca")
dev.off()
svglite(paste0(sample_name, "_scRNA-seq_DimHeatmap_", Sys.Date(), ".svg"))
DimHeatmap(seurat, dims = 1, cells = 500, balanced = TRUE)
dev.off()
# Identify the 10 most highly variable genes
top10 <- head(VariableFeatures(seurat), 10)
# plot variable features with and without labels
plot1 <- VariableFeaturePlot(seurat)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE)
svglite(paste0(sample_name, "_scRNA-seq_VariableFeaturePlot_", Sys.Date(), ".svg"))
plot2
dev.off()
seurat <- JackStraw(seurat, num.replicate = 100)
seurat <- ScoreJackStraw(seurat, dims = 1:20)
JackStrawPlot(seurat, dims = 1:20)
ElbowPlot(seurat)
# Elbow around PC15
seurat <- FindNeighbors(seurat, dims = 1:15)
seurat <- FindClusters(seurat, resolution = 0.5)
seurat <- RunUMAP(seurat, dims = 1:15)
svglite(paste0(sample_name, "_scRNA-seq_UMAP_unlabelled_", Sys.Date(), ".svg"))
DimPlot(seurat, reduction = "umap")
dev.off()
saveRDS(seurat, file =
paste0(sample_name, "__scRNA-seq_seurat-object_", Sys.Date(), ".rds"))
save.image(paste0(sample_name, "_scRNA-seq_", Sys.Date(), ".RData"))