Visualize enriched TFBS

16a. Create helper functions

Function to read GFF files as GRanges object

library('data.table')
library('tidyr')
library('GenomicRanges')
library('stringr')

gffRead <- function(file, expand = 25) {
    dt <- suppressWarnings(fread(file))
    dt <- separate(data = dt, col = '##gff-version 3',
                   into = c("chr", "positions"), sep = "\\:")
    dt <- separate(data = dt, col = 'positions',
                   into = c("wide_start", "wide_end"), sep = "\\-")
    dt$start <- ((as.numeric(dt$wide_start) - as.numeric(expand)) +
                 (as.numeric(dt$V4)))
    dt$end   <- ((as.numeric(dt$wide_start)) + (as.numeric(dt$V5) +
                 as.numeric(expand)))
    colnames(dt) <- c("chr", "wide_start", "wide_end", "source", "type",
                         "motif_start", "motif_end", "score", "strand", "NA",
                         "info", "start", "end")
    return(makeGRangesFromDataFrame(dt, keep.extra.columns=T))
}

convertGFF <- function(gff_file, region=NULL) {
    motifs <- gffRead(gff_file)
    if (!is.null(region)) {
        motifs <- sort(subsetByOverlaps(motifs, region))
    }
    motifs_dt <- as.data.table(motifs)
    # Drop unknown or random chromosomes
    motifs_dt <- motifs_dt[!grep('chrUn_*', motifs_dt$seqnames),]
    motifs_dt <- motifs_dt[!grep('random', motifs_dt$seqnames),]
    motifs_dt[, c("name", "alias", "id", "pval", "qval", "sequence") := tstrsplit(info, ";", fixed=TRUE)]
    motifs_dt[, c("name", "coordinates") := tstrsplit(name, "_", fixed=TRUE)]
    motifs_dt$name  <- gsub("Name=", "", motifs_dt$name)
    motifs_dt$alias <- gsub("Alias=", "", motifs_dt$alias)
    motifs_GR  <- makeGRangesFromDataFrame(motifs_dt, keep.extra.columns=TRUE)
    motifs_GRL <- split(motifs_GR, as.factor(motifs_GR$name))
    return(motifs_GRL)
}

16b. Generate signal tracks along trajectory at Ren1 locus with enriched TFBS

Generate features object including consensus peaks, REN1 super enhancer, marker peaks of the "early_JGs" and "late_JGs" cell populations, and the motif occurrences of all enriched motifs in the consensus peaks around REN1.

Helper GenomicRanges

# Create GRanges object representing the known super-enhancer
ren1_SE <- data.table(chr="chr1",
                      start="133345631",
                      end="133350823",
                      name="REN1 SE",
                      strand="*",
                      score="-")
ren1_SE <- makeGRangesFromDataFrame(ren1_SE, keep.extra.columns=T)

# Ren1 TSS - chr1:133350673
# core promoter: -300bp to -50bp upstream of TSS
ren1_promoter <- data.table(chr="chr1",
                            start="133350373",
                            end="133350623",
                            name="REN1 PROMOTER",
                            strand="*",
                            score="-")
ren1_promoter <- makeGRangesFromDataFrame(ren1_promoter, keep.extra.columns=T)

# Wide view around Ren1 locus
ren1_wide <- data.table(chr="chr1",
                        start="133300000",
                        end="133400000",
                        name="REN1 wide",
                        strand="*",
                        score="-")
ren1_wide <- makeGRangesFromDataFrame(ren1_wide, keep.extra.columns=T)

akr1b7_wide <- data.table(chr="chr6",
                          start="34339269",
                          end="34450077",
                          name="Akr1b7 wide",
                          strand="*",
                          score="-")
akr1b7_wide <- makeGRangesFromDataFrame(akr1b7_wide, keep.extra.columns=T)

Plot signal around Ren1 with FIMO TFBS.

# TFBS occurrences identified through FIMO
cisbp_GRL  <- convertGFF('FIMO/chromVAR_PWM_cisbp_JG_peaks/fimo.gff')

# Grab peakSet
peakSet <- getPeakSet(proj6)

# Subset peaks that are present around Ren1
ren1_overlapping_peaks <- sort(subsetByOverlaps(peakSet, ren1_wide))

# Identify peaks around Ren1 that are marker peaks for JG cells.
ren1_JG_peaks <- ren1_overlapping_peaks[
    names(ren1_overlapping_peaks)=="early_JGs" |
    names(ren1_overlapping_peaks)=="late_JGs"]

# Generate genomic features GRangesList object
features = c(GRangesList(Peaks = peakSet,
                         SE = ren1_SE,
                         Ren1_Promoter = ren1_promoter,
                         JG_peaks = ren1_JG_peaks),
             cisbp_GRL)

p <- plotBrowserTrack(
    ArchRProj = proj6, 
    groupBy = "ManualClustersJG", 
    geneSymbol = "Ren1", 
    upstream = 50000,
    downstream = 50000,
    features = features,
    useGroups = trajectory,
    loops = getPeak2GeneLinks(proj6, varCutOffATAC = 0.15),
    sizes = c(8, 17, 3, 3)
)
plotPDF(plotList = p, 
    name = paste0(sample_name, "_JG_Peak2Gene_Ren1_motifs_",
                  Sys.Date(), ".pdf"), 
    ArchRProj = proj6, 
    addDOC = FALSE, width = 17.5, height = 10)

16c. Generate signal tracks along trajectory at Ren1 locus with literature reported TFBS

Plot signal around Ren1 with FIMO TFBS for literature reported promoter binding TFs.

# TFBS occurrences identified through FIMO
cisbp_GRL  <- convertGFF('FIMO/chromVAR_PWM_cisbp_promoter/fimo.gff')

# Generate genomic features GRangesList object
features = c(GRangesList(Peaks = peakSet,
                         SE = ren1_SE,
                         Ren1_Promoter = ren1_promoter,
                         JG_peaks = ren1_JG_peaks),
             cisbp_GRL)

p <- plotBrowserTrack(
    ArchRProj = proj6, 
    groupBy = "ManualClustersJG", 
    geneSymbol = "Ren1", 
    upstream = 50000,
    downstream = 50000,
    features = features,
    useGroups = trajectory,
    loops = getPeak2GeneLinks(proj6, varCutOffATAC = 0.15),
    sizes = c(8, 17, 3, 3)
)
plotPDF(plotList = p, 
    name = paste0(sample_name, "_JG_Peak2Gene_Ren1_promoters_motifs_",
                  Sys.Date(), ".pdf"), 
    ArchRProj = proj6, 
    addDOC = FALSE, width = 17.5, height = 10)

Plot signal around Ren1 with FIMO TFBS for literature reported enhancer binding TFs.

# TFBS occurrences identified through FIMO
cisbp_GRL  <- convertGFF('FIMO/chromVAR_PWM_cisbp_enhancer/fimo.gff')

# Generate genomic features GRangesList object
features = c(GRangesList(Peaks = peakSet,
                         SE = ren1_SE,
                         Ren1_Promoter = ren1_promoter,
                         JG_peaks = ren1_JG_peaks),
             cisbp_GRL)

p <- plotBrowserTrack(
    ArchRProj = proj6, 
    groupBy = "ManualClustersJG", 
    geneSymbol = "Ren1", 
    upstream = 50000,
    downstream = 50000,
    features = features,
    useGroups = trajectory,
    loops = getPeak2GeneLinks(proj6, varCutOffATAC = 0.15),
    sizes = c(8, 17, 3, 3)
)
plotPDF(plotList = p, 
    name = paste0(sample_name, "_JG_Peak2Gene_Ren1_enhancers_motifs_",
                  Sys.Date(), ".pdf"), 
    ArchRProj = proj6, 
    addDOC = FALSE, width = 17.5, height = 10)

16d. Generate signal tracks along trajectory at Akr1b7 locus

# TFBS occurrences for JG marker peaks identified through FIMO
cisbp_GRL  <- convertGFF('FIMO/chromVAR_PWM_cisbp_akr1b7/fimo.gff')

# Generate genomic features GRangesList object
features = c(GRangesList(Peaks = peakSet,
                         JG_peaks = JG_Akr1b7_peakSet),
             cisbp_GRL)

p <- plotBrowserTrack(
    ArchRProj = proj6, 
    groupBy = "ManualClustersJG", 
    geneSymbol = "Akr1b7", 
    upstream = 50000,
    downstream = 50000,
    features = features,
    useGroups = trajectory,
    loops = getPeak2GeneLinks(proj6, varCutOffATAC = 0.15),
    sizes = c(8, 17, 3, 3)
)
plotPDF(plotList = p, 
    name = paste0(sample_name, "_JG_Peak2Gene_Akr1b7_JGpeaks_motifs_",
                  Sys.Date(), ".pdf"), 
    ArchRProj = proj6, 
    addDOC = FALSE, width = 17.5, height = 10)

# TFBS occurrences for all peaks identified through FIMO
cisbp_GRL  <- convertGFF(
    'FIMO/chromVAR_PWM_cisbp_akr1b7_all_marker_peaks/fimo.gff')

# Generate genomic features GRangesList object
features = c(GRangesList(Peaks = peakSet,
                         JG_peaks = JG_Akr1b7_peakSet),
             cisbp_GRL)

p <- plotBrowserTrack(
    ArchRProj = proj6, 
    groupBy = "ManualClustersJG", 
    geneSymbol = "Akr1b7", 
    upstream = 50000,
    downstream = 50000,
    features = features,
    useGroups = trajectory,
    loops = getPeak2GeneLinks(proj6, varCutOffATAC = 0.15),
    sizes = c(8, 17, 3, 3)
)
plotPDF(plotList = p, 
    name = paste0(sample_name, "_JG_Peak2Gene_Akr1b7_allpeaks_motifs_",
                  Sys.Date(), ".pdf"), 
    ArchRProj = proj6, 
    addDOC = FALSE, width = 17.5, height = 10)

16e. Plot chromVar deviation plots and Peak2Gene links plots

Look at the literature reported TFs binding to Ren1 promoter.

Plot chromVar deviations for each factor.

promoterMotifs <- c("CBF1", "NIC", "LXR", "NFIX", "SP1", "SP3", "HOXA9",
                    "HOXA10", "PBX1", "PKNOX1")

markerPromMotifs <- getFeatures(proj6,
                                select = paste(promoterMotifs, collapse="|"),
                                useMatrix = "cisbp_Motif_JGMatrix")
markerPromMotifs

# Get just z scores and remove inaccurate grep values
markerPromMotifs <- grep("z:", markerPromMotifs, value = TRUE)
markerPromMotifs <- markerPromMotifs[markerPromMotifs %ni% "z:Mesp1_58"]
markerPromMotifs <- markerPromMotifs[markerPromMotifs %ni% "z:Sp100_714"]
markerPromMotifs <- markerPromMotifs[markerPromMotifs %ni% "z:Sp140_717"]
markerPromMotifs <- markerPromMotifs[markerPromMotifs %ni% "z:Sp110_718"]
markerPromMotifs

p <- plotGroups(ArchRProj = proj6, 
  groupBy = "ManualClustersJG",
  colorBy = "cisbp_Motif_JGMatrix", 
  name = markerPromMotifs,
  imputeWeights = getImputeWeights(proj6)
)

plotPDF(plotList = p, 
    name = paste0(sample_name,
                  "_JG_chromVarDeviations-Trajectory_markerPromMotifs_",
                  Sys.Date(), ".pdf"), 
    ArchRProj = proj6, 
    addDOC = FALSE, width = 20, height = 6)

Plot signal tracks at each factor's locus.

markerPromRNA <- c("Sp3", "Nfix", "Pbx1", "Hoxa9", "Pknox1", "Hoxa10", "Sp1")

p <- plotBrowserTrack(
    ArchRProj = proj6, 
    groupBy = "ManualClustersJG", 
    useGroups = trajectory,
    geneSymbol = sort(markerPromRNA), 
    upstream = 50000,
    downstream = 50000,
    loops = getPeak2GeneLinks(proj6)
)

plotPDF(plotList = p, 
    name = paste0(sample_name, "_JG_Peak2Genes-Trajectory_markerPromRNA_",
                  Sys.Date(), ".pdf"), 
    ArchRProj = proj6, 
    addDOC = TRUE, width = 20, height = 6)

Look at the literature reported TFs binding to Ren1 enhancer.

Plot chromVar deviations for each factor.

enhancerMotifs <- c("NFIX", "SP1", "SP3", "CREB", "CREM", "USF1",
                    "USF2", "RAR", "RXR", "EAR2", "NFYA")
markerEnhMotifs <- getFeatures(proj6,
                               select = paste(enhancerMotifs, collapse="|"),
                               useMatrix = "cisbp_Motif_JGMatrix")
markerEnhMotifs

# Get just z scores and remove inaccurate grep values
markerEnhMotifs <- grep("z:", markerEnhMotifs, value = TRUE)
markerEnhMotifs <- markerEnhMotifs[markerEnhMotifs %ni% "z:Mesp1_58"]
markerEnhMotifs <- markerEnhMotifs[markerEnhMotifs %ni% "z:Sp100_714"]
markerEnhMotifs <- markerEnhMotifs[markerEnhMotifs %ni% "z:Sp140_717"]
markerEnhMotifs <- markerEnhMotifs[markerEnhMotifs %ni% "z:Sp110_718"]
markerEnhMotifs <- markerEnhMotifs[markerEnhMotifs %ni% "z:Creb3l3_116"]
markerEnhMotifs <- markerEnhMotifs[markerEnhMotifs %ni% "z:Rarb_663"]
markerEnhMotifs <- markerEnhMotifs[markerEnhMotifs %ni% "z:Rarg_654"]
markerEnhMotifs <- markerEnhMotifs[markerEnhMotifs %ni% "z:Creb3_870"]
markerEnhMotifs <- markerEnhMotifs[markerEnhMotifs %ni% "z:Crebzf_122"]
markerEnhMotifs <- markerEnhMotifs[markerEnhMotifs %ni% "z:Creb3l2_118"]
markerEnhMotifs

p <- plotGroups(ArchRProj = proj6, 
  groupBy = "ManualClustersJG",
  colorBy = "cisbp_Motif_JGMatrix", 
  name = markerEnhMotifs,
  imputeWeights = getImputeWeights(proj6)
)

plotPDF(plotList = p, 
    name = paste0(sample_name,
                  "_JG_chromVarDeviations-Trajectory_markerEnhMotifs_",
                  Sys.Date(), ".pdf"), 
    ArchRProj = proj6, 
    addDOC = FALSE, width = 20, height = 6)

Plot signal tracks at each factor's locus.

markerEnhRNA <- c("Sp3", "Nfix", "Rxrb", "Rara", "Rxra", "Rxrg", "Nfya", "Sp1",
                  "Crem", "Creb1", "Usf2", "Usf1")

p <- plotBrowserTrack(
    ArchRProj = proj6, 
    groupBy = "ManualClustersJG", 
    useGroups = trajectory,
    geneSymbol = sort(markerEnhRNA), 
    upstream = 50000,
    downstream = 50000,
    loops = getPeak2GeneLinks(proj6)
)

plotPDF(plotList = p, 
    name = paste0(sample_name, "_JG_Peak2Genes-Trajectory1_markerEnhRNA_",
                  Sys.Date(), ".pdf"), 
    ArchRProj = proj6, 
    addDOC = TRUE, width = 20, height = 6)

p <- plotBrowserTrack(
    ArchRProj = proj6, 
    groupBy = "ManualClustersJG", 
    useGroups = trajectory,
    geneSymbol = "REN1", 
    upstream = 50000,
    downstream = 50000,
    loops = getPeak2GeneLinks(proj6)
)

plotPDF(plotList = p, 
    name = paste0(sample_name, "_JG_Peak2Genes-Trajectory1_Ren1_", Sys.Date(), ".pdf"), 
    ArchRProj = proj6, 
    addDOC = FALSE, width = 20, height = 6)

16f. Plot enriched motif heatmaps

enrichMotifs_LSI_Harmony <- peakAnnoEnrichment(
    seMarker = markersPeaks,
    ArchRProj = proj6,
    peakAnnotation = "cisbp_Motif_JG",
    cutOff = "FDR <= 0.1 & Log2FC >= 0.25"
  )
enrichMotifs_LSI_Harmony
heatmapEM <- plotEnrichHeatmap(enrichMotifs_LSI_Harmony,
                               n = 5, transpose = TRUE, rastr=FALSE)
svg(paste0(sample_name,
    "_scATAC-seq_ManualClustersJG_enrichMotif_heatmap_",
    Sys.Date(), ".svg"))
ComplexHeatmap::draw(heatmapEM, heatmap_legend_side = "bot", annotation_legend_side = "bot")
dev.off()

Save progress...

save.image(paste0("sc_kidney_SignalTracks_",Sys.Date(),".RData"))