Simultaneously profile chromatin accessibility (ATAC-seq) and gene expression (RNA-seq) in the same single cell. Covers Cell Ranger ARC output, Signac + Seurat WNN integration, peak calling, motif enrichment, and gene regulatory network inference linking peaks to genes.
Single-cell ATAC-seq alone tells you which genomic regions are open (accessible) in each cell — a proxy for regulatory activity. RNA-seq alone tells you gene expression. Combining them in the same cell with 10x Multiome (or SHARE-seq, PAIRED-seq) lets you:
# In R:
install.packages(c("Seurat", "Signac", "ggplot2", "dplyr", "patchwork"))
BiocManager::install(c("GenomicRanges", "BSgenome.Hsapiens.UCSC.hg38",
"EnsDb.Hsapiens.v86", "chromVAR", "motifmatchr",
"TFBSTools", "JASPAR2020"))
# Verify
library(Seurat)
library(Signac)
packageVersion("Signac") # should be >= 1.12
# MACS2 for peak calling (Python) pip install macs2 macs2 --version
library(Seurat)
library(Signac)
library(EnsDb.Hsapiens.v86)
library(GenomicRanges)
# Cell Ranger ARC output directory structure:
# outs/
# ├── filtered_feature_bc_matrix/ (RNA counts)
# ├── atac_fragments.tsv.gz (ATAC fragments)
# ├── atac_peaks.bed (called peaks)
# └── per_barcode_metrics.csv (QC per cell)
# Read RNA counts
rna_counts <- Read10X_h5("outs/filtered_feature_bc_matrix.h5")
rna_mat <- rna_counts$`Gene Expression`
# Read ATAC peak counts
atac_counts <- rna_counts$Peaks
# Get genome annotations for hg38
annotations <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86)
seqlevelsStyle(annotations) <- "UCSC"
genome(annotations) <- "hg38"
# Create ChromatinAssay for ATAC data
chrom_assay <- CreateChromatinAssay(
counts = atac_counts,
sep = c(":", "-"),
genome = "hg38",
fragments = "outs/atac_fragments.tsv.gz",
min.cells = 10,
annotation = annotations
)
# Create Seurat object with both modalities
pbmc <- CreateSeuratObject(
counts = rna_mat,
assay = "RNA",
min.cells = 10
)
pbmc[["ATAC"]] <- chrom_assay
cat("Cells:", ncol(pbmc), "\n")
cat("RNA features:", nrow(pbmc[["RNA"]]), "\n")
cat("ATAC peaks:", nrow(pbmc[["ATAC"]]), "\n")
library(dplyr)
# QC metrics
pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-")
# Filter cells
pbmc <- subset(
pbmc,
subset = nFeature_RNA > 200 &
nFeature_RNA < 5000 &
percent.mt < 20
)
# Normalise and find variable features
DefaultAssay(pbmc) <- "RNA"
pbmc <- NormalizeData(pbmc)
pbmc <- FindVariableFeatures(pbmc, nfeatures = 3000)
pbmc <- ScaleData(pbmc)
pbmc <- RunPCA(pbmc, npcs = 50)
cat("After QC:", ncol(pbmc), "cells\n")
DefaultAssay(pbmc) <- "ATAC"
# ATAC-specific QC metrics
pbmc <- NucleosomeSignal(pbmc)
pbmc <- TSSEnrichment(pbmc, fast = FALSE)
# Visualise ATAC QC
VlnPlot(
pbmc,
features = c("nCount_ATAC", "TSS.enrichment", "nucleosome_signal"),
ncol = 3,
pt.size = 0
)
# Filter cells based on ATAC QC
pbmc <- subset(
pbmc,
subset = nCount_ATAC > 1000 &
nCount_ATAC < 100000 &
nucleosome_signal < 2 &
TSS.enrichment > 1
)
# Term-frequency inverse-document-frequency (TF-IDF) normalisation
pbmc <- RunTFIDF(pbmc)
# Feature selection — top accessible peaks
pbmc <- FindTopFeatures(pbmc, min.cutoff = "q5")
# SVD (LSI) dimensionality reduction
pbmc <- RunSVD(pbmc)
# Check if first LSI component correlates with depth (should be excluded)
DepthCor(pbmc)
# If component 1 correlates strongly with depth, exclude it in downstream steps
Weighted Nearest Neighbours (WNN) combines RNA PCA and ATAC LSI embeddings, weighting each modality adaptively per cell.
library(patchwork)
# Build WNN graph using both modalities
pbmc <- FindMultiModalNeighbors(
pbmc,
reduction.list = list("pca", "lsi"),
dims.list = list(1:50, 2:50), # exclude LSI dim 1 (depth correlated)
modality.weight.name = "RNA.weight"
)
# UMAP from WNN
pbmc <- RunUMAP(
pbmc,
nn.name = "weighted.nn",
reduction.name = "wnn.umap",
reduction.key = "wnnUMAP_"
)
# Also make RNA-only and ATAC-only UMAPs for comparison
pbmc <- RunUMAP(pbmc, reduction="pca", dims=1:50,
reduction.name="rna.umap", reduction.key="rnaUMAP_")
pbmc <- RunUMAP(pbmc, reduction="lsi", dims=2:50,
reduction.name="atac.umap", reduction.key="atacUMAP_")
# Cluster on WNN graph
pbmc <- FindClusters(pbmc, graph.name = "wsnn",
algorithm = 3, resolution = 0.8)
# Compare UMAPs side by side
p1 <- DimPlot(pbmc, reduction="rna.umap", group.by="seurat_clusters",
label=TRUE) + ggtitle("RNA only") + NoLegend()
p2 <- DimPlot(pbmc, reduction="atac.umap", group.by="seurat_clusters",
label=TRUE) + ggtitle("ATAC only") + NoLegend()
p3 <- DimPlot(pbmc, reduction="wnn.umap", group.by="seurat_clusters",
label=TRUE) + ggtitle("WNN (joint)") + NoLegend()
p1 | p2 | p3
ggsave("wnn_comparison_umap.pdf", width=18, height=6)
Cell Ranger ARC calls peaks globally. Re-calling peaks per cell type gives higher-resolution, cluster-specific accessibility profiles.
# Assign cell type labels first (from RNA markers)
pbmc <- RenameIdents(
pbmc,
"0" = "CD14 Mono",
"1" = "CD4 T",
"2" = "CD8 T",
"3" = "B cell",
"4" = "NK",
"5" = "CD16 Mono",
"6" = "DC",
"7" = "Platelet"
)
pbmc$cell_type <- Idents(pbmc)
# Call peaks per cell type using MACS2
peaks <- CallPeaks(
pbmc,
group.by = "cell_type",
macs2.path = "/path/to/macs2"
)
# Keep only standard chromosomes
peaks <- keepStandardChromosomes(peaks, pruning.mode = "coarse")
# Quantify fragments in new peaks
macs2_counts <- FeatureMatrix(
fragments = Fragments(pbmc),
features = peaks,
cells = colnames(pbmc)
)
# Create new ATAC assay with MACS2 peaks
pbmc[["MACS2"]] <- CreateChromatinAssay(
counts = macs2_counts,
fragments = Fragments(pbmc),
annotation = Annotation(pbmc)
)
cat("MACS2 peaks called:", nrow(peaks), "\n")
cat("Original peaks:", nrow(pbmc[["ATAC"]]), "\n")
library(JASPAR2020)
library(TFBSTools)
library(chromVAR)
library(motifmatchr)
library(BSgenome.Hsapiens.UCSC.hg38)
DefaultAssay(pbmc) <- "MACS2"
# Get JASPAR 2020 motif database (vertebrates)
pfm <- getMatrixSet(
JASPAR2020,
opts = list(collection = "CORE", tax_group = "vertebrates",
all_versions = FALSE)
)
# Scan peaks for TF motif matches
pbmc <- AddMotifs(pbmc, genome = BSgenome.Hsapiens.UCSC.hg38, pfm = pfm)
# Run chromVAR to compute per-cell TF activity scores
pbmc <- RunChromVAR(
object = pbmc,
genome = BSgenome.Hsapiens.UCSC.hg38
)
# Differential TF activity between cell types
DefaultAssay(pbmc) <- "chromvar"
da_tf <- FindMarkers(
pbmc,
ident.1 = "CD8 T",
ident.2 = "CD4 T",
only.pos = TRUE,
mean.fxn = rowMeans,
fc.name = "avg_diff"
)
# Map motif IDs to TF names
motif_names <- GetMotifData(pbmc, slot = "motif.names")
da_tf$tf_name <- motif_names[rownames(da_tf)]
cat("Top TFs enriched in CD8 T vs CD4 T:\n")
print(head(da_tf[, c("tf_name","avg_diff","p_val_adj")], 15))
# Visualise TF activity
DefaultAssay(pbmc) <- "chromvar"
FeaturePlot(
pbmc,
features = c("MA0080.5", "MA0105.4"), # IRF4, RUNX1
reduction = "wnn.umap",
min.cutoff = "q10",
max.cutoff = "q90",
ncol = 2
) &
scale_color_gradient2(low="blue", mid="lightgrey", high="red") &
ggtitle(c("IRF4 motif activity", "RUNX1 motif activity"))
ggsave("tf_motif_activity.pdf", width=12, height=5)
Linking open chromatin peaks to the genes they regulate is the core multiome analysis — enabled by having ATAC and RNA from the same cell.
DefaultAssay(pbmc) <- "MACS2"
# Link peaks to genes using co-variation across cells
# (peaks that open/close together with gene expression → likely regulatory)
pbmc <- LinkPeaks(
object = pbmc,
peak.assay = "MACS2",
expression.assay = "RNA",
genes.use = VariableFeatures(pbmc)
)
# View links
links <- Links(pbmc)
cat("Total peak-gene links:", length(links), "\n")
# Filter to strong, significant links
strong_links <- links[links$score > 0.1 & links$pvalue < 0.05]
cat("Strong links (score>0.1, p<0.05):", length(strong_links), "\n")
# Visualise coverage plot for a specific gene
CoveragePlot(
object = pbmc,
region = "CD8A",
features = "CD8A",
expression.assay = "RNA",
idents = c("CD8 T", "CD4 T", "CD14 Mono"),
extend.upstream = 5000,
extend.downstream = 5000
)
ggsave("CD8A_coverage_plot.pdf", width=10, height=8)
DefaultAssay(pbmc) <- "MACS2"
# Find differentially accessible peaks between cell types
da_peaks <- FindMarkers(
pbmc,
ident.1 = "CD8 T",
ident.2 = "CD4 T",
only.pos = TRUE,
test.use = "LR", # logistic regression (recommended for ATAC)
latent.vars = "nCount_ATAC" # control for sequencing depth
)
# Annotate peaks (nearest gene, peak type: promoter/enhancer/intron)
da_peaks_annotated <- ClosestFeature(pbmc, regions = rownames(da_peaks))
da_peaks <- cbind(da_peaks, da_peaks_annotated)
cat("Differentially accessible peaks (CD8 > CD4):", nrow(da_peaks), "\n")
# Promoter vs enhancer breakdown
table(da_peaks$type)
# Volcano plot
library(ggplot2)
library(ggrepel)
da_peaks$neg_log10_padj <- -log10(da_peaks$p_val_adj + 1e-300)
da_peaks$label <- ifelse(da_peaks$p_val_adj < 1e-10 & abs(da_peaks$avg_log2FC) > 1,
da_peaks$gene_name, "")
ggplot(da_peaks, aes(x=avg_log2FC, y=neg_log10_padj)) +
geom_point(aes(colour=type), alpha=0.6, size=1.2) +
geom_text_repel(aes(label=label), size=3, max.overlaps=20) +
geom_vline(xintercept=c(-1,1), linetype="dashed", colour="grey50") +
geom_hline(yintercept=-log10(0.05), linetype="dashed", colour="grey50") +
scale_colour_manual(values=c("Promoter"="#4CAF50","Distal Intergenic"="#F44336","Intron"="#FF9800")) +
labs(x="Average log2 fold change", y="-log10(adjusted p-value)",
title="Differentially accessible peaks: CD8 T vs CD4 T",
colour="Peak type") +
theme_classic(base_size=13)
ggsave("da_peaks_volcano.pdf", width=10, height=7, dpi=300)