A unified Python framework for storing, analysing, and visualising spatial omics data from any platform — Visium, Xenium, MERFISH, CODEX, Slide-seq — in a single interoperable format.
SpatialData (Marconato et al., Nature Methods 2024) solves a critical problem in spatial omics: every platform (10x Visium, 10x Xenium, MERFISH, Slide-seq, CODEX, IMC) produces data in a different format, making cross-platform analysis and multi-modal integration painful.
SpatialData provides a single, interoperable Zarr-based on-disk format that stores:
SpatialData
├── images # Raster images (H&E, IF, multiplexed)
│ └── "HE_image" → xarray DataArray (C, Y, X)
├── labels # Integer segmentation masks
│ └── "cell_labels" → xarray DataArray (Y, X)
├── points # Single-molecule coordinates
│ └── "transcripts" → GeoDataFrame (x, y, gene)
├── shapes # Geometric shapes
│ └── "visium_spots" → GeoDataFrame (geometry, spot_id)
└── tables # Gene expression per cell/spot
└── "table" → AnnData (obs=cells, var=genes)
All elements share a common coordinate system, so you can overlay them precisely regardless of source platform.
conda create -n spatialdata python=3.10 -y conda activate spatialdata pip install spatialdata spatialdata-io spatialdata-plot # Verify python -c "import spatialdata; print(spatialdata.__version__)"
# Download example Visium dataset (mouse brain coronal section) # From 10x Genomics website wget -O visium_mouse_brain.tar.gz \ "https://cf.10xgenomics.com/samples/spatial-exp/2.0.0/CytAssist_11mm_FFPE_Mouse_Brain/CytAssist_11mm_FFPE_Mouse_Brain_spatial.tar.gz" tar xzf visium_mouse_brain.tar.gz
import spatialdata_io as sdio
import spatialdata as sd
# Read Visium Space Ranger output into SpatialData format
sdata = sdio.visium(
path="visium_mouse_brain/",
dataset_id="mouse_brain",
imread_kwargs={"level": 0} # full resolution H&E image
)
print(sdata)
# SpatialData object with:
# ├── Images
# │ └── 'mouse_brain_hires_image': SpatialImage[c,y,x] (3, 600, 565)
# ├── Shapes
# │ └── 'mouse_brain_spots': GeoDataFrame shape (2695, 3)
# └── Tables
# └── 'table': AnnData (2695, 32285)
# Save to Zarr format (for efficient loading later)
sdata.write("mouse_brain.zarr")
# Load back
sdata = sd.read_zarr("mouse_brain.zarr")
Xenium is a single-molecule FISH platform with subcellular resolution. SpatialData handles the large transcript point clouds efficiently.
# Load Xenium output (from 10x Xenium Onboard Analysis output folder)
sdata_xenium = sdio.xenium(
path="xenium_output/",
n_jobs=8 # parallelise transcript loading
)
print(sdata_xenium)
# SpatialData object with:
# ├── Images
# │ ├── 'morphology_focus': SpatialImage[c,y,x]
# │ └── 'morphology_mip': SpatialImage[c,y,x]
# ├── Labels
# │ └── 'cell_labels': SpatialImage[y,x]
# ├── Points
# │ └── 'transcripts': DaskDataFrame (x, y, z, gene, qv_score)
# └── Tables
# └── 'table': AnnData (cells × genes)
# Check transcript count
print(f"Total transcripts: {len(sdata_xenium.points['transcripts']):,}")
print(f"Genes detected: {sdata_xenium.tables['table'].n_vars}")
import spatialdata_plot # extends SpatialData with .pl accessor
# Interactive napari viewer (recommended)
sdata.pl.render_images("mouse_brain_hires_image").pl.show()
# Static matplotlib plot — H&E with spots coloured by gene expression
import scanpy as sc
import matplotlib.pyplot as plt
# Run basic clustering on the table
sc.pp.normalize_total(sdata.tables["table"])
sc.pp.log1p(sdata.tables["table"])
sc.pp.highly_variable_genes(sdata.tables["table"], n_top_genes=3000)
sc.pp.pca(sdata.tables["table"])
sc.pp.neighbors(sdata.tables["table"])
sc.tl.leiden(sdata.tables["table"])
sc.tl.umap(sdata.tables["table"])
# Map cluster colours back to spatial coordinates
sdata.tables["table"].obs["leiden"].to_frame().join(
sdata.shapes["mouse_brain_spots"].set_index("spot_id")
)
# Plot spatial clusters on tissue
fig = (
sdata
.pl.render_images("mouse_brain_hires_image", alpha=0.5)
.pl.render_shapes(
"mouse_brain_spots",
color="leiden",
size=1.5
)
.pl.show(return_ax=True)
)
plt.savefig("visium_clusters.pdf", dpi=300, bbox_inches="tight")
plt.show()
One of SpatialData's key features is aligning multiple tissue sections or modalities into a shared coordinate system.
from spatialdata.transformations import Affine, set_transformation
# Load a second serial section
sdata2 = sdio.visium("visium_section2/", dataset_id="brain_s2")
# Define affine transformation to align section 2 onto section 1
# (estimated from landmark points or image registration)
transform = Affine(
[[1.02, 0.01, -15],
[-0.01, 0.99, 22],
[0, 0, 1 ]],
input_axes=("x", "y"),
output_axes=("x", "y")
)
# Apply transformation to all elements of section 2
set_transformation(sdata2, transform, to_coordinate_system="section1")
# Merge into single SpatialData object
sdata_merged = sd.concatenate(
[sdata, sdata2],
region_key="section",
region=["section1", "section2"]
)
# Now both sections are in the same coordinate space
sdata_merged.pl.render_images().pl.render_shapes(color="section").pl.show()
import squidpy as sq
import numpy as np
adata = sdata.tables["table"]
# Add spatial coordinates from spots
coords = sdata.shapes["mouse_brain_spots"][["geometry"]].copy()
adata.obsm["spatial"] = np.column_stack([
coords.geometry.x.values,
coords.geometry.y.values
])
# Build spatial graph
sq.gr.spatial_neighbors(adata, coord_type="generic", n_neighs=6)
# Spatial autocorrelation (Moran's I) — find spatially variable genes
sq.gr.spatial_autocorr(adata, mode="moran", n_perms=100, n_jobs=4)
# Top spatially variable genes
svg = adata.uns["moranI"].sort_values("I", ascending=False)
print("Top 10 spatially variable genes:")
print(svg.head(10)[["I", "pval_norm"]].to_string())
# Visualise top spatially variable gene on tissue
gene = svg.index[0]
sq.pl.spatial_scatter(
adata,
color=gene,
size=1.5,
img_alpha=0.5,
title=f"Spatial expression: {gene}"
)
plt.savefig(f"svg_{gene}.pdf", dpi=300, bbox_inches="tight")
# Neighbourhood enrichment — which cell types co-localise?
sq.gr.nhood_enrichment(adata, cluster_key="leiden")
sq.pl.nhood_enrichment(adata, cluster_key="leiden",
title="Neighbourhood enrichment",
figsize=(8, 8))
plt.savefig("neighbourhood_enrichment.pdf", dpi=300, bbox_inches="tight")
Squidpy is the spatial analysis companion to SpatialData, providing spatial statistics, ligand-receptor interaction analysis, and image feature extraction.
import squidpy as sq
# Ligand-receptor interaction analysis
sq.gr.ligrec(
adata,
n_perms=1000,
cluster_key="leiden",
copy=False,
use_raw=False,
transmitter_params={"categories": "ligand"},
receiver_params={"categories": "receptor"}
)
# Plot top interactions between clusters
sq.pl.ligrec(
adata,
cluster_key="leiden",
source_groups=["0", "1"], # sender cell types
target_groups=["2", "3"], # receiver cell types
means_range=(0.3, np.inf),
alpha=1e-4,
swap_axes=True
)
plt.savefig("ligrec_interactions.pdf", dpi=300, bbox_inches="tight")
# Extract image features (texture, summary stats) from H&E
sq.im.calculate_image_features(
adata,
sdata.images["mouse_brain_hires_image"],
features=["summary", "texture", "histogram"],
key_added="img_features"
)
print("Image features shape:", adata.obsm["img_features"].shape)
sdio.visium(), Xenium with sdio.xenium()spatialdata-plot's .pl accessorsq.gr.ligrec()