Visualize enriched TFBS
16a. Create helper functions
Function to read GFF files as GRanges object
library('data.table')
library('tidyr')
library('GenomicRanges')
library('stringr')
gffRead <- function(file, expand = 25) {
dt <- suppressWarnings(fread(file))
dt <- separate(data = dt, col = '##gff-version 3',
into = c("chr", "positions"), sep = "\\:")
dt <- separate(data = dt, col = 'positions',
into = c("wide_start", "wide_end"), sep = "\\-")
dt$start <- ((as.numeric(dt$wide_start) - as.numeric(expand)) +
(as.numeric(dt$V4)))
dt$end <- ((as.numeric(dt$wide_start)) + (as.numeric(dt$V5) +
as.numeric(expand)))
colnames(dt) <- c("chr", "wide_start", "wide_end", "source", "type",
"motif_start", "motif_end", "score", "strand", "NA",
"info", "start", "end")
return(makeGRangesFromDataFrame(dt, keep.extra.columns=T))
}
convertGFF <- function(gff_file, region=NULL) {
motifs <- gffRead(gff_file)
if (!is.null(region)) {
motifs <- sort(subsetByOverlaps(motifs, region))
}
motifs_dt <- as.data.table(motifs)
# Drop unknown or random chromosomes
motifs_dt <- motifs_dt[!grep('chrUn_*', motifs_dt$seqnames),]
motifs_dt <- motifs_dt[!grep('random', motifs_dt$seqnames),]
motifs_dt[, c("name", "alias", "id", "pval", "qval", "sequence") := tstrsplit(info, ";", fixed=TRUE)]
motifs_dt[, c("name", "coordinates") := tstrsplit(name, "_", fixed=TRUE)]
motifs_dt$name <- gsub("Name=", "", motifs_dt$name)
motifs_dt$alias <- gsub("Alias=", "", motifs_dt$alias)
motifs_GR <- makeGRangesFromDataFrame(motifs_dt, keep.extra.columns=TRUE)
motifs_GRL <- split(motifs_GR, as.factor(motifs_GR$name))
return(motifs_GRL)
}
16b. Generate signal tracks along trajectory at Ren1 locus with enriched TFBS
Generate features object including consensus peaks, REN1 super enhancer, marker peaks of the "early_JGs" and "late_JGs" cell populations, and the motif occurrences of all enriched motifs in the consensus peaks around REN1.
Helper GenomicRanges
# Create GRanges object representing the known super-enhancer
ren1_SE <- data.table(chr="chr1",
start="133345631",
end="133350823",
name="REN1 SE",
strand="*",
score="-")
ren1_SE <- makeGRangesFromDataFrame(ren1_SE, keep.extra.columns=T)
# Ren1 TSS - chr1:133350673
# core promoter: -300bp to -50bp upstream of TSS
ren1_promoter <- data.table(chr="chr1",
start="133350373",
end="133350623",
name="REN1 PROMOTER",
strand="*",
score="-")
ren1_promoter <- makeGRangesFromDataFrame(ren1_promoter, keep.extra.columns=T)
# Wide view around Ren1 locus
ren1_wide <- data.table(chr="chr1",
start="133300000",
end="133400000",
name="REN1 wide",
strand="*",
score="-")
ren1_wide <- makeGRangesFromDataFrame(ren1_wide, keep.extra.columns=T)
akr1b7_wide <- data.table(chr="chr6",
start="34339269",
end="34450077",
name="Akr1b7 wide",
strand="*",
score="-")
akr1b7_wide <- makeGRangesFromDataFrame(akr1b7_wide, keep.extra.columns=T)
Plot signal around Ren1 with FIMO TFBS.
# TFBS occurrences identified through FIMO
cisbp_GRL <- convertGFF('FIMO/chromVAR_PWM_cisbp_JG_peaks/fimo.gff')
# Grab peakSet
peakSet <- getPeakSet(proj6)
# Subset peaks that are present around Ren1
ren1_overlapping_peaks <- sort(subsetByOverlaps(peakSet, ren1_wide))
# Identify peaks around Ren1 that are marker peaks for JG cells.
ren1_JG_peaks <- ren1_overlapping_peaks[
names(ren1_overlapping_peaks)=="early_JGs" |
names(ren1_overlapping_peaks)=="late_JGs"]
# Generate genomic features GRangesList object
features = c(GRangesList(Peaks = peakSet,
SE = ren1_SE,
Ren1_Promoter = ren1_promoter,
JG_peaks = ren1_JG_peaks),
cisbp_GRL)
p <- plotBrowserTrack(
ArchRProj = proj6,
groupBy = "ManualClustersJG",
geneSymbol = "Ren1",
upstream = 50000,
downstream = 50000,
features = features,
useGroups = trajectory,
loops = getPeak2GeneLinks(proj6, varCutOffATAC = 0.15),
sizes = c(8, 17, 3, 3)
)
plotPDF(plotList = p,
name = paste0(sample_name, "_JG_Peak2Gene_Ren1_motifs_",
Sys.Date(), ".pdf"),
ArchRProj = proj6,
addDOC = FALSE, width = 17.5, height = 10)
16c. Generate signal tracks along trajectory at Ren1 locus with literature reported TFBS
Plot signal around Ren1 with FIMO TFBS for literature reported promoter binding TFs.
# TFBS occurrences identified through FIMO
cisbp_GRL <- convertGFF('FIMO/chromVAR_PWM_cisbp_promoter/fimo.gff')
# Generate genomic features GRangesList object
features = c(GRangesList(Peaks = peakSet,
SE = ren1_SE,
Ren1_Promoter = ren1_promoter,
JG_peaks = ren1_JG_peaks),
cisbp_GRL)
p <- plotBrowserTrack(
ArchRProj = proj6,
groupBy = "ManualClustersJG",
geneSymbol = "Ren1",
upstream = 50000,
downstream = 50000,
features = features,
useGroups = trajectory,
loops = getPeak2GeneLinks(proj6, varCutOffATAC = 0.15),
sizes = c(8, 17, 3, 3)
)
plotPDF(plotList = p,
name = paste0(sample_name, "_JG_Peak2Gene_Ren1_promoters_motifs_",
Sys.Date(), ".pdf"),
ArchRProj = proj6,
addDOC = FALSE, width = 17.5, height = 10)
Plot signal around Ren1 with FIMO TFBS for literature reported enhancer binding TFs.
# TFBS occurrences identified through FIMO
cisbp_GRL <- convertGFF('FIMO/chromVAR_PWM_cisbp_enhancer/fimo.gff')
# Generate genomic features GRangesList object
features = c(GRangesList(Peaks = peakSet,
SE = ren1_SE,
Ren1_Promoter = ren1_promoter,
JG_peaks = ren1_JG_peaks),
cisbp_GRL)
p <- plotBrowserTrack(
ArchRProj = proj6,
groupBy = "ManualClustersJG",
geneSymbol = "Ren1",
upstream = 50000,
downstream = 50000,
features = features,
useGroups = trajectory,
loops = getPeak2GeneLinks(proj6, varCutOffATAC = 0.15),
sizes = c(8, 17, 3, 3)
)
plotPDF(plotList = p,
name = paste0(sample_name, "_JG_Peak2Gene_Ren1_enhancers_motifs_",
Sys.Date(), ".pdf"),
ArchRProj = proj6,
addDOC = FALSE, width = 17.5, height = 10)
16d. Generate signal tracks along trajectory at Akr1b7 locus
# TFBS occurrences for JG marker peaks identified through FIMO
cisbp_GRL <- convertGFF('FIMO/chromVAR_PWM_cisbp_akr1b7/fimo.gff')
# Generate genomic features GRangesList object
features = c(GRangesList(Peaks = peakSet,
JG_peaks = JG_Akr1b7_peakSet),
cisbp_GRL)
p <- plotBrowserTrack(
ArchRProj = proj6,
groupBy = "ManualClustersJG",
geneSymbol = "Akr1b7",
upstream = 50000,
downstream = 50000,
features = features,
useGroups = trajectory,
loops = getPeak2GeneLinks(proj6, varCutOffATAC = 0.15),
sizes = c(8, 17, 3, 3)
)
plotPDF(plotList = p,
name = paste0(sample_name, "_JG_Peak2Gene_Akr1b7_JGpeaks_motifs_",
Sys.Date(), ".pdf"),
ArchRProj = proj6,
addDOC = FALSE, width = 17.5, height = 10)
# TFBS occurrences for all peaks identified through FIMO
cisbp_GRL <- convertGFF(
'FIMO/chromVAR_PWM_cisbp_akr1b7_all_marker_peaks/fimo.gff')
# Generate genomic features GRangesList object
features = c(GRangesList(Peaks = peakSet,
JG_peaks = JG_Akr1b7_peakSet),
cisbp_GRL)
p <- plotBrowserTrack(
ArchRProj = proj6,
groupBy = "ManualClustersJG",
geneSymbol = "Akr1b7",
upstream = 50000,
downstream = 50000,
features = features,
useGroups = trajectory,
loops = getPeak2GeneLinks(proj6, varCutOffATAC = 0.15),
sizes = c(8, 17, 3, 3)
)
plotPDF(plotList = p,
name = paste0(sample_name, "_JG_Peak2Gene_Akr1b7_allpeaks_motifs_",
Sys.Date(), ".pdf"),
ArchRProj = proj6,
addDOC = FALSE, width = 17.5, height = 10)
16e. Plot chromVar deviation plots and Peak2Gene links plots
Look at the literature reported TFs binding to Ren1 promoter.
Plot chromVar deviations for each factor.
promoterMotifs <- c("CBF1", "NIC", "LXR", "NFIX", "SP1", "SP3", "HOXA9",
"HOXA10", "PBX1", "PKNOX1")
markerPromMotifs <- getFeatures(proj6,
select = paste(promoterMotifs, collapse="|"),
useMatrix = "cisbp_Motif_JGMatrix")
markerPromMotifs
# Get just z scores and remove inaccurate grep values
markerPromMotifs <- grep("z:", markerPromMotifs, value = TRUE)
markerPromMotifs <- markerPromMotifs[markerPromMotifs %ni% "z:Mesp1_58"]
markerPromMotifs <- markerPromMotifs[markerPromMotifs %ni% "z:Sp100_714"]
markerPromMotifs <- markerPromMotifs[markerPromMotifs %ni% "z:Sp140_717"]
markerPromMotifs <- markerPromMotifs[markerPromMotifs %ni% "z:Sp110_718"]
markerPromMotifs
p <- plotGroups(ArchRProj = proj6,
groupBy = "ManualClustersJG",
colorBy = "cisbp_Motif_JGMatrix",
name = markerPromMotifs,
imputeWeights = getImputeWeights(proj6)
)
plotPDF(plotList = p,
name = paste0(sample_name,
"_JG_chromVarDeviations-Trajectory_markerPromMotifs_",
Sys.Date(), ".pdf"),
ArchRProj = proj6,
addDOC = FALSE, width = 20, height = 6)
Plot signal tracks at each factor's locus.
markerPromRNA <- c("Sp3", "Nfix", "Pbx1", "Hoxa9", "Pknox1", "Hoxa10", "Sp1")
p <- plotBrowserTrack(
ArchRProj = proj6,
groupBy = "ManualClustersJG",
useGroups = trajectory,
geneSymbol = sort(markerPromRNA),
upstream = 50000,
downstream = 50000,
loops = getPeak2GeneLinks(proj6)
)
plotPDF(plotList = p,
name = paste0(sample_name, "_JG_Peak2Genes-Trajectory_markerPromRNA_",
Sys.Date(), ".pdf"),
ArchRProj = proj6,
addDOC = TRUE, width = 20, height = 6)
Look at the literature reported TFs binding to Ren1 enhancer.
Plot chromVar deviations for each factor.
enhancerMotifs <- c("NFIX", "SP1", "SP3", "CREB", "CREM", "USF1",
"USF2", "RAR", "RXR", "EAR2", "NFYA")
markerEnhMotifs <- getFeatures(proj6,
select = paste(enhancerMotifs, collapse="|"),
useMatrix = "cisbp_Motif_JGMatrix")
markerEnhMotifs
# Get just z scores and remove inaccurate grep values
markerEnhMotifs <- grep("z:", markerEnhMotifs, value = TRUE)
markerEnhMotifs <- markerEnhMotifs[markerEnhMotifs %ni% "z:Mesp1_58"]
markerEnhMotifs <- markerEnhMotifs[markerEnhMotifs %ni% "z:Sp100_714"]
markerEnhMotifs <- markerEnhMotifs[markerEnhMotifs %ni% "z:Sp140_717"]
markerEnhMotifs <- markerEnhMotifs[markerEnhMotifs %ni% "z:Sp110_718"]
markerEnhMotifs <- markerEnhMotifs[markerEnhMotifs %ni% "z:Creb3l3_116"]
markerEnhMotifs <- markerEnhMotifs[markerEnhMotifs %ni% "z:Rarb_663"]
markerEnhMotifs <- markerEnhMotifs[markerEnhMotifs %ni% "z:Rarg_654"]
markerEnhMotifs <- markerEnhMotifs[markerEnhMotifs %ni% "z:Creb3_870"]
markerEnhMotifs <- markerEnhMotifs[markerEnhMotifs %ni% "z:Crebzf_122"]
markerEnhMotifs <- markerEnhMotifs[markerEnhMotifs %ni% "z:Creb3l2_118"]
markerEnhMotifs
p <- plotGroups(ArchRProj = proj6,
groupBy = "ManualClustersJG",
colorBy = "cisbp_Motif_JGMatrix",
name = markerEnhMotifs,
imputeWeights = getImputeWeights(proj6)
)
plotPDF(plotList = p,
name = paste0(sample_name,
"_JG_chromVarDeviations-Trajectory_markerEnhMotifs_",
Sys.Date(), ".pdf"),
ArchRProj = proj6,
addDOC = FALSE, width = 20, height = 6)
Plot signal tracks at each factor's locus.
markerEnhRNA <- c("Sp3", "Nfix", "Rxrb", "Rara", "Rxra", "Rxrg", "Nfya", "Sp1",
"Crem", "Creb1", "Usf2", "Usf1")
p <- plotBrowserTrack(
ArchRProj = proj6,
groupBy = "ManualClustersJG",
useGroups = trajectory,
geneSymbol = sort(markerEnhRNA),
upstream = 50000,
downstream = 50000,
loops = getPeak2GeneLinks(proj6)
)
plotPDF(plotList = p,
name = paste0(sample_name, "_JG_Peak2Genes-Trajectory1_markerEnhRNA_",
Sys.Date(), ".pdf"),
ArchRProj = proj6,
addDOC = TRUE, width = 20, height = 6)
p <- plotBrowserTrack(
ArchRProj = proj6,
groupBy = "ManualClustersJG",
useGroups = trajectory,
geneSymbol = "REN1",
upstream = 50000,
downstream = 50000,
loops = getPeak2GeneLinks(proj6)
)
plotPDF(plotList = p,
name = paste0(sample_name, "_JG_Peak2Genes-Trajectory1_Ren1_", Sys.Date(), ".pdf"),
ArchRProj = proj6,
addDOC = FALSE, width = 20, height = 6)
16f. Plot enriched motif heatmaps
enrichMotifs_LSI_Harmony <- peakAnnoEnrichment(
seMarker = markersPeaks,
ArchRProj = proj6,
peakAnnotation = "cisbp_Motif_JG",
cutOff = "FDR <= 0.1 & Log2FC >= 0.25"
)
enrichMotifs_LSI_Harmony
heatmapEM <- plotEnrichHeatmap(enrichMotifs_LSI_Harmony,
n = 5, transpose = TRUE, rastr=FALSE)
svg(paste0(sample_name,
"_scATAC-seq_ManualClustersJG_enrichMotif_heatmap_",
Sys.Date(), ".svg"))
ComplexHeatmap::draw(heatmapEM, heatmap_legend_side = "bot", annotation_legend_side = "bot")
dev.off()
Save progress...
save.image(paste0("sc_kidney_SignalTracks_",Sys.Date(),".RData"))