Markers Tutorial

[1]:
import warnings
warnings.filterwarnings("ignore")

import scanpy as sc
import sceleto as scl
import matplotlib.pyplot as plt

sc.settings.set_figure_params(facecolor='white', frameon=False)
[2]:
adata = sc.read("/nfs3/SCMGL/smc03_sceleto/PANGEA_T&NK_v03_final.h5ad")  # Load T&NK AnnData
[3]:
adata.X
[3]:
<Compressed Sparse Row sparse matrix of dtype 'float32'
        with 133978055 stored elements and shape (93531, 36601)>
[4]:
print(adata.X.min(), adata.X.max())
0.0 8.099076

Marker Discovery

[5]:
scl.us(adata, 'Level2')
_images/markers_tutorial_6_0.png

Run graph-based marker detection

[6]:
mk = scl.markers.marker(adata, "Level2")
  Auto thres_fc: 1.94

Top marker genes per group as a dictionary

[7]:
mk.markers.keys()
[7]:
dict_keys(['ILC', 'NK_CD16', 'NK_CD56', 'NK_CXCR6', 'T&NK_HSP', 'T&NK_ISG', 'T_CD4_CXCL13', 'T_CD4_EM_Tfh', 'T_CD4_EM_Th1', 'T_CD4_EM_Th2', 'T_CD4_EM_Th17', 'T_CD4_EM_Th22', 'T_CD4_EM_Treg', 'T_CD4_Eff_IFNG', 'T_CD4_Eff_IL13', 'T_CD4_Eff_IL17', 'T_CD4_N&CM', 'T_CD4_RM_Th1', 'T_CD4_RM_Th17', 'T_CD4_RM_Treg', 'T_CD4_activated', 'T_CD8_EM', 'T_CD8_EMRA', 'T_CD8_KLRC2', 'T_CD8_MHCII', 'T_CD8_N&CM', 'T_CD8_RM', 'T_CD8_activated', 'T_CD8_exhausted', 'T_MAIT', 'T_gdT_TRDV1', 'T_gdT_TRDV2'])

Top marker genes per group as a dictionary

[8]:
mk.markers['ILC'][:10]
[8]:
['IL1R1',
 'KIT',
 'TNFSF11',
 'SPINK2',
 'AFF3',
 'IL23R',
 'KRT86',
 'KRT81',
 'LST1',
 'RAMP1']

Dotplot of top 5 markers per cell type

[9]:
mk.plot(n_top=5)
_images/markers_tutorial_14_0.png

Dotplot

Pass a flat gene list to visualize any genes of interest

[10]:
celltypist_tnk = 'CD3D,CD3E,CD3G,CD4,CD40LG,CD8A,CD8B,SELL,CCR7,NR4A1,CCL5,CXCR3,TBX21,IFNG,GATA3,IL4,IL5,RORC,CCR6,ZBTB16,IL17A,IL17F,IL21,CXCR5,PDCD1,ICOS'.split(',')
celltypist_tnk += 'FOXP3,CTLA4,SLC4A10,TRAV1-2,CX3CR1,GZMK,GZMB,GZMH,GNLY,PRF1,CRTAM,CCR9,ITGAE,ITGA1,SPRY1,SPRY2,TRDC'.split(',')
celltypist_tnk += 'ITGAD,KIR2DL4,IKZF2,MKI67,TYROBP,NCAM1,FCGR3A,CD160,PCDH9,KIT,LST1'.split(',')

scl.dotplot(adata, celltypist_tnk, groupby='Level2')
_images/markers_tutorial_17_0.png

Pass a dict to group genes by functional category on the axis

[11]:
gene_dict = {
    "Cytotoxic": ["GZMA", "GZMB", "PRF1", "NKG7"],
    "Helper":    ["IL7R", "CCR7", "TCF7", "LEF1"],
    "Treg":      ["FOXP3", "IL2RA", "CTLA4"],
}

scl.dotplot(adata, gene_dict, groupby="Level2", swap_axes=True)
_images/markers_tutorial_19_0.png

PAGA edge fold-changes for a single gene

[12]:
mk.plot_gene_edges_fc("CD3D", figsize=(7, 5))
plt.show()
_images/markers_tutorial_21_0.png

Hierarchy

Run markers at three Leiden resolutions

[13]:
levels = ["leiden_1.0", "leiden_2.0", "leiden_4.0"]
marker_runs = {lv: scl.markers.marker(adata, lv) for lv in levels}
  Auto thres_fc: 2.85
  Auto thres_fc: 1.00
  Auto thres_fc: 2.49

Build cross-resolution hierarchy

[14]:
hr = scl.markers.hierarchy(adata, marker_runs.values())

UMAP colored by hierarchical cluster assignment

[15]:
scl.us(adata, 'icls')
_images/markers_tutorial_28_0.png

UMAP colored by lineage path

[16]:
scl.us(adata, 'path')
_images/markers_tutorial_30_0.png

Export interactive HTML viewer

[17]:
hr.interactive_viewer(adata, save="viewer_TNK_hierarchy.html")
Interactive viewer saved → viewer_TNK_hierarchy.html

Markers with batch testing

Load fibroblast dataset; ‘Sample’ column holds batch labels

[18]:
fib = sc.read_h5ad("/nfs/SCMGL/srk01_PancCa/16_FIGURE/data/PCI01.v43.fibrawt_final_02.h5ad")
scl.us(fib, 'anno_final')
_images/markers_tutorial_35_0.png
[19]:
print(fib.X.min(), fib.X.max())
-5.547311 10.0
[20]:
print(fib.raw.to_adata().X.min(), fib.raw.to_adata().X.max())
0.0 10.360901

Markers without batch consideration

[21]:
mk_no_batch = scl.markers.marker(fib, "anno_final")
  Auto thres_fc: 1.00

Markers with per-sample t-test filter

[22]:
mk_batch = scl.markers.marker(fib, "anno_final", batch_key="Sample")
  Auto thres_fc: 1.00
[23]:
print("Without batch correction")
mk_no_batch.plot(n_top=3)  # Batch-sensitive genes may appear
Without batch correction
_images/markers_tutorial_42_1.png
[24]:
print("With batch correction")
mk_batch.plot(n_top=3)  # Only genes consistent across samples are retained
With batch correction
_images/markers_tutorial_43_1.png

Build batch-corrected hierarchy across three resolutions

[25]:
sc.tl.leiden(fib, 0.1, key_added='leiden_0.1')
sc.tl.leiden(fib, 0.5, key_added='leiden_0.5')
sc.tl.leiden(fib, 1.0, key_added='leiden_1.0')
[26]:
fib_levels = ["leiden_0.1", "leiden_0.5", "leiden_1.0"]
fib_runs = {lv: scl.markers.marker(fib, lv, batch_key="Sample") for lv in fib_levels}
hr_fib = scl.markers.hierarchy(fib, fib_runs.values(), batch_key="Sample")
  Auto thres_fc: 1.00
  Auto thres_fc: 1.00
  Auto thres_fc: 1.00

Hierarchical clusters in fibroblast UMAP

[27]:
scl.us(fib, 'icls')
_images/markers_tutorial_48_0.png

Lineage paths in fibroblast UMAP

[28]:
scl.us(fib, 'path')
_images/markers_tutorial_50_0.png

Export batch-aware interactive viewer

[29]:
hr_fib.interactive_viewer(fib, save="viewer_fib_batch.html")
Batch interactive viewer saved → viewer_fib_batch.html