Source code for ncountr.plotting.qc_plots

"""QC summary plots."""

from __future__ import annotations

import re
from pathlib import Path
from typing import Union

import numpy as np
import matplotlib.pyplot as plt
from scipy import stats as sp_stats

from ncountr.experiment import NanostringExperiment


def _pos_scale_hk(experiment: NanostringExperiment) -> np.ndarray:
    """Compute pos-normalized HK totals for each sample."""
    samples = experiment.samples
    pos_df = experiment.pos_counts
    hk_df = experiment.hk_counts

    # Compute positive control geometric-mean scaling factors
    geomeans = {}
    for sid in samples:
        vals = pos_df[sid].values.astype(float)
        vals = vals[vals > 0]
        geomeans[sid] = sp_stats.gmean(vals) if len(vals) > 0 else np.nan

    valid = [v for v in geomeans.values() if np.isfinite(v)]
    if not valid:
        return hk_df[samples].sum(axis=0).values.astype(float)

    grand = sp_stats.gmean(valid)
    pos_scale = {
        sid: grand / geomeans[sid] if np.isfinite(geomeans[sid]) else 1.0
        for sid in samples
    }

    # Apply pos scaling to HK counts and sum
    hk_pos_norm = np.array([
        (hk_df[sid].values.astype(float) * pos_scale[sid]).sum()
        for sid in samples
    ])
    return hk_pos_norm


[docs] def plot_qc( experiment: NanostringExperiment, *, output: Union[str, Path, None] = None, fov_threshold: float = 0.75, ) -> plt.Figure: """Generate a 4-panel QC summary figure. Panels: A) FOV ratio, B) Positive control linearity, C) Negative background, D) Housekeeping gene totals (raw + pos-normalized). Parameters ---------- experiment : NanostringExperiment output : str or Path, optional Save figure to this path. fov_threshold : float FOV ratio threshold line. Returns ------- matplotlib.figure.Figure """ samples = experiment.samples qc = experiment.qc_results pos_df = experiment.pos_counts neg_df = experiment.neg_counts hk_df = experiment.hk_counts fig, axes = plt.subplots(2, 2, figsize=(12, 10)) # A: FOV ratios ax = axes[0, 0] if qc is not None and "FovRatio" in qc.columns: fov_ratios = qc.loc[samples, "FovRatio"].values.astype(float) colors = ["#2ca02c" if r > fov_threshold else "#d62728" for r in fov_ratios] ax.bar(range(len(samples)), fov_ratios, color=colors) ax.axhline(fov_threshold, color="red", linestyle="--", alpha=0.7, label=f"Threshold ({fov_threshold})") ax.set_ylim(min(0.7, np.nanmin(fov_ratios) - 0.02), 1.02) ax.set_xticks(range(len(samples))) ax.set_xticklabels(samples, rotation=45, ha="right") ax.set_ylabel("FOV Counted / FOV Count") ax.set_title("A. Field of View Ratio") ax.legend(fontsize=8) # B: Positive control linearity ax = axes[0, 1] conc_map: dict[str, float] = {} for name in pos_df.index: match = re.search(r"\(([\d.]+)\)", name) if match: conc_map[name] = float(match.group(1)) names_with_conc = [n for n in pos_df.index if n in conc_map] concs = np.array([conc_map[n] for n in names_with_conc]) for sid in samples: counts = np.array([pos_df.loc[n, sid] for n in names_with_conc]) mask = (concs > 0) & (counts > 0) ax.scatter(np.log10(concs[mask]), np.log10(counts[mask]), s=20, alpha=0.6, label=sid) ax.set_xlabel("log10(Expected Concentration)") ax.set_ylabel("log10(Observed Count)") ax.set_title("B. Positive Control Linearity") ax.legend(fontsize=6, ncol=3, loc="lower right") # C: Negative control background ax = axes[1, 0] if qc is not None and "NegBackground" in qc.columns: bg_vals = qc.loc[samples, "NegBackground"].values.astype(float) else: bg_vals = [neg_df[s].values.astype(float).mean() + 2 * neg_df[s].values.astype(float).std() for s in samples] ax.bar(range(len(samples)), bg_vals, color="steelblue") ax.set_xticks(range(len(samples))) ax.set_xticklabels(samples, rotation=45, ha="right") ax.set_ylabel("Background (mean + 2*SD)") ax.set_title("C. Negative Control Background") # D: Housekeeping gene totals (raw + pos-normalized) ax = axes[1, 1] hk_totals_raw = hk_df[samples].sum(axis=0).values.astype(float) x = np.arange(len(samples)) bar_width = 0.35 has_pos = len(pos_df) > 0 and pos_df.shape[0] > 0 if has_pos: hk_totals_norm = _pos_scale_hk(experiment) ax.bar(x - bar_width / 2, hk_totals_raw, bar_width, color="lightsteelblue", label="Raw") ax.bar(x + bar_width / 2, hk_totals_norm, bar_width, color="steelblue", label="Pos-normalized") ax.legend(fontsize=8) else: ax.bar(x, hk_totals_raw, color="steelblue") ax.set_xticks(x) ax.set_xticklabels(samples, rotation=45, ha="right") ax.set_ylabel("Total HK Gene Counts") ax.set_title("D. Housekeeping Gene Totals") fig.suptitle("Nanostring QC Summary", fontsize=14, fontweight="bold") plt.tight_layout() if output is not None: fig.savefig(output) return fig