7a. Add scRNA-seq data with Seurat

Load Seurat and set paths to necessary files

library(dplyr)
library(Seurat)

PROJECT_DIR <- "${HOME}"

E12_matrix_path <- file.path(PROJECT_DIR,
    "../RNAseq/scRNA_E12/outs/filtered_feature_bc_matrix/")
E18_matrix_path <- file.path(PROJECT_DIR,
    "../RNAseq/scRNA_E18/outs/filtered_feature_bc_matrix/")
P5_matrix_path  <- file.path(PROJECT_DIR,
    "../RNAseq/scRNA_P5/outs/filtered_feature_bc_matrix/")
P30_matrix_path <- file.path(PROJECT_DIR,
    "../RNAseq/scRNA_P30/outs/filtered_feature_bc_matrix/")

7b. E12 - initialize and QC

E12_RNA <- Read10X(data.dir = E12_matrix_path)
# Initialize the Seurat object with the raw (non-normalized data).
E12_seurat <- CreateSeuratObject(counts = E12_RNA, project = "E12",
                                 min.cells = 3, min.features = 200)
# The [[ operator can add columns to object metadata. 
# This is a great place to stash QC stats
E12_seurat[["percent.mt"]] <- PercentageFeatureSet(E12_seurat, pattern = "^mt-")

# get rid of red blood cells, which have hemoglobin RNAs 
E12_seurat[["percent.hemo"]] <- PercentageFeatureSet(object = E12_seurat,
                                                     pattern = "^Hb[^(p)]")

# FeatureScatter is typically used to visualize feature-feature relationships, 
# but can be used for anything calculated by the object, 
# i.e. columns in object metadata, PC scores etc.
plot1 <- FeatureScatter(E12_seurat,
                        feature1 = "nCount_RNA",
                        feature2 = "percent.mt")
svglite(paste0("E12", "_scRNA-seq_nCount-MT_", Sys.Date(), ".svg"))
plot1
dev.off()

plot2 <- FeatureScatter(E12_seurat,
                        feature1 = "nCount_RNA",
                        feature2 = "nFeature_RNA")
svglite(paste0("E12", "_scRNA-seq_nCount-nFeature_", Sys.Date(), ".svg"))
plot2
dev.off()

plot3 <- FeatureScatter(E12_seurat,
                        feature1 = "nCount_RNA",
                        feature2 = "percent.hemo")
svglite(paste0("E12", "_scRNA-seq_nCount-hemoglobin_", Sys.Date(), ".svg"))
plot3
dev.off()

# There is a high percentage of mtDNA in these samples. Setting threshold to 25%
E12_seurat <- subset(E12_seurat, subset = nFeature_RNA > 200 &
                     nFeature_RNA < 9000 & percent.mt < 20 &
                     percent.hemo < 5 & percent.hemo > -5)

7c. E18 - initialize and QC

E18_RNA <- Read10X(data.dir = E18_matrix_path)
# Initialize the Seurat object with the raw (non-normalized data).
E18_seurat <- CreateSeuratObject(counts = E18_RNA, project = "E18",
                                 min.cells = 3, min.features = 200)

E18_seurat[["percent.mt"]] <- PercentageFeatureSet(E18_seurat, pattern = "^mt-")

# get rid of red blood cells, which have hemoglobin RNAs 
E18_seurat[["percent.hemo"]] <- PercentageFeatureSet(object = E18_seurat,
                                                     pattern = "^Hb[^(p)]")

plot1 <- FeatureScatter(E18_seurat,
                        feature1 = "nCount_RNA",
                        feature2 = "percent.mt")
svglite(paste0("E18", "_scRNA-seq_nCount-MT_", Sys.Date(), ".svg"))
plot1
dev.off()

plot2 <- FeatureScatter(E18_seurat,
                        feature1 = "nCount_RNA",
                        feature2 = "nFeature_RNA")
svglite(paste0("E18", "_scRNA-seq_nCount-nFeature_", Sys.Date(), ".svg"))
plot2
dev.off()

plot3 <- FeatureScatter(E18_seurat,
                        feature1 = "nCount_RNA",
                        feature2 = "percent.hemo")
svglite(paste0("E18", "_scRNA-seq_nCount-hemoglobin_", Sys.Date(), ".svg"))
plot3
dev.off()

# There is a high percentage of mtDNA in these samples. Setting threshold to 25%
E18_seurat <- subset(E18_seurat, subset = nFeature_RNA > 200 &
                     nFeature_RNA < 7000 & percent.mt < 25 &
                     percent.hemo < 1 & percent.hemo > -1)

7d. P5 - initialize and QC

P5_RNA    <- Read10X(data.dir = P5_matrix_path)
P5_seurat <- CreateSeuratObject(counts = P5_RNA, project = "P5",
                                min.cells = 3, min.features = 200)

P5_seurat[["percent.mt"]] <- PercentageFeatureSet(P5_seurat, pattern = "^mt-")

# get rid of red blood cells, which have hemoglobin RNAs 
P5_seurat[["percent.hemo"]] <- PercentageFeatureSet(object = P5_seurat,
                                                    pattern = "^Hb[^(p)]")

plot1 <- FeatureScatter(P5_seurat,
                        feature1 = "nCount_RNA",
                        feature2 = "percent.mt")
svglite(paste0("P5", "_scRNA-seq_nCount-MT_", Sys.Date(), ".svg"))
plot1
dev.off()

plot2 <- FeatureScatter(P5_seurat,
                        feature1 = "nCount_RNA",
                        feature2 = "nFeature_RNA")
svglite(paste0("P5", "_scRNA-seq_nCount-nFeature_", Sys.Date(), ".svg"))
plot2
dev.off()

plot3 <- FeatureScatter(P5_seurat,
                        feature1 = "nCount_RNA",
                        feature2 = "percent.hemo")
svglite(paste0("P5", "_scRNA-seq_nCount-hemoglobin_", Sys.Date(), ".svg"))
plot3
dev.off()

# There is a high percentage of mtDNA in these samples. Setting threshold to 25%
P5_seurat <- subset(P5_seurat, subset = nFeature_RNA > 200 &
                    nFeature_RNA < 9000 & percent.mt < 25 &
                    percent.hemo < 2)

7e. P30 - initialize and QC

P30_RNA    <- Read10X(data.dir = P30_matrix_path)
P30_seurat <- CreateSeuratObject(counts = P30_RNA, project = "P30",
                                 min.cells = 3, min.features = 200)

P30_seurat[["percent.mt"]] <- PercentageFeatureSet(P30_seurat, pattern = "^mt-")

# get rid of red blood cells, which have hemoglobin RNAs 
P30_seurat[["percent.hemo"]] <- PercentageFeatureSet(object = P30_seurat,
                                                     pattern = "^Hb[^(p)]")

plot1 <- FeatureScatter(P30_seurat,
                        feature1 = "nCount_RNA",
                        feature2 = "percent.mt")
svglite(paste0("P30", "_scRNA-seq_nCount-MT_", Sys.Date(), ".svg"))
plot1
dev.off()

plot2 <- FeatureScatter(P30_seurat,
                        feature1 = "nCount_RNA",
                        feature2 = "nFeature_RNA")
svglite(paste0("P30", "_scRNA-seq_nCount-nFeature_", Sys.Date(), ".svg"))
plot2
dev.off()

plot3 <- FeatureScatter(P30_seurat,
                        feature1 = "nCount_RNA",
                        feature2 = "percent.hemo")
svglite(paste0("P30", "_scRNA-seq_nCount-hemoglobin_", Sys.Date(), ".svg"))
plot3
dev.off()

# There is a high percentage of mtDNA in these samples. Setting threshold to 25%
P30_seurat <- subset(P30_seurat, subset = nFeature_RNA > 200 &
                     nFeature_RNA < 4500 & percent.mt < 25 &
                     percent.hemo < 6 & percent.hemo > -6)

7f. Combine and normalize across timepoints

# Normalize the data
E12_seurat <- NormalizeData(E12_seurat)
E18_seurat <- NormalizeData(E18_seurat)
P5_seurat  <- NormalizeData(P5_seurat)
P30_seurat <- NormalizeData(P30_seurat)
renin_seurat_normalized <- merge(E12_seurat,
                                 c(E18_seurat, P5_seurat, P30_seurat),
                                 add.cell.ids = c("E12", "E18", "P5", "P30"),
                                 project = "renin", merge.data = TRUE)

7g. Find variable features and visualize

seurat <- FindVariableFeatures(renin_seurat_normalized,
                               selection.method = "vst", nfeatures = 2000)
all.genes  <- rownames(seurat)
no_ribo.genes <- all.genes[!grepl("^Rp[sl][[:digit:]]|^Rplp[[:digit:]]|^Rpsa", all.genes)]
seurat <- ScaleData(seurat, features = no_ribo.genes)
seurat <- RunPCA(seurat, features = VariableFeatures(object = seurat))

svglite(paste0(sample_name, "_scRNA-seq_VizDimLoadings_", Sys.Date(), ".svg"))
VizDimLoadings(seurat, dims = 1:2, reduction = "pca")
dev.off()

svglite(paste0(sample_name, "_scRNA-seq_DimHeatmap_", Sys.Date(), ".svg"))
DimHeatmap(seurat, dims = 1, cells = 500, balanced = TRUE)
dev.off()

# Identify the 10 most highly variable genes
top10 <- head(VariableFeatures(seurat), 10)

# plot variable features with and without labels
plot1 <- VariableFeaturePlot(seurat)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE)
svglite(paste0(sample_name, "_scRNA-seq_VariableFeaturePlot_", Sys.Date(), ".svg"))
plot2
dev.off()

seurat <- JackStraw(seurat, num.replicate = 100)
seurat <- ScoreJackStraw(seurat, dims = 1:20)

JackStrawPlot(seurat, dims = 1:20)
ElbowPlot(seurat)
# Elbow around PC15

seurat <- FindNeighbors(seurat, dims = 1:15)
seurat <- FindClusters(seurat, resolution = 0.5)
seurat <- RunUMAP(seurat, dims = 1:15)

svglite(paste0(sample_name, "_scRNA-seq_UMAP_unlabelled_", Sys.Date(), ".svg"))
DimPlot(seurat, reduction = "umap")
dev.off()

saveRDS(seurat, file = 
        paste0(sample_name, "__scRNA-seq_seurat-object_", Sys.Date(), ".rds"))
save.image(paste0(sample_name, "_scRNA-seq_", Sys.Date(), ".RData"))