Source code for ncountr.core.qc

"""Quality control checks for Nanostring nCounter data."""

from __future__ import annotations

import re

import numpy as np
import pandas as pd
from scipy import stats

from ncountr.experiment import NanostringExperiment


def _fov_ratio(lane_info: pd.DataFrame) -> pd.Series:
    """Compute FOV counted / FOV total ratio per sample."""
    return lane_info["FovCounted"] / lane_info["FovCount"].replace(0, np.nan)


def _pos_ctrl_r2(pos_counts: pd.DataFrame) -> pd.Series:
    """Compute R-squared of log-log positive control linearity per sample.

    Expected concentrations are extracted from control names, e.g.
    ``POS_A(128)`` → 128.
    """
    conc_map: dict[str, float] = {}
    for name in pos_counts.index:
        match = re.search(r"\(([\d.]+)\)", name)
        if match:
            conc_map[name] = float(match.group(1))

    names_with_conc = [n for n in pos_counts.index if n in conc_map]
    concs = np.array([conc_map[n] for n in names_with_conc])

    r2_values: dict[str, float] = {}
    for sid in pos_counts.columns:
        counts = np.array([pos_counts.loc[n, sid] for n in names_with_conc])
        mask = (concs > 0) & (counts > 0)
        if mask.sum() >= 3:
            r, _p = stats.pearsonr(np.log10(concs[mask]), np.log10(counts[mask]))
            r2_values[sid] = r ** 2
        else:
            r2_values[sid] = np.nan
    return pd.Series(r2_values, name="PosCtrl_R2")


def _neg_background(neg_counts: pd.DataFrame, n_sd: float = 2.0) -> pd.Series:
    """Compute negative control background threshold (mean + n_sd * SD)."""
    bg: dict[str, float] = {}
    for sid in neg_counts.columns:
        vals = neg_counts[sid].values.astype(float)
        bg[sid] = vals.mean() + n_sd * vals.std()
    return pd.Series(bg, name="NegBackground")


[docs] def qc( experiment: NanostringExperiment, *, fov_ratio_threshold: float = 0.75, pos_r2_threshold: float = 0.95, neg_sd: float = 2.0, ) -> pd.DataFrame: """Run QC checks and store results on the experiment. Parameters ---------- experiment : NanostringExperiment fov_ratio_threshold : float Minimum acceptable FOV ratio. pos_r2_threshold : float Minimum R-squared for positive control linearity. neg_sd : float Number of standard deviations above mean for negative background. Returns ------- pd.DataFrame QC results indexed by sample. """ qc_df = pd.DataFrame(index=experiment.samples) qc_df.index.name = "sample" # FOV ratio if not experiment.lane_info.empty: fov = _fov_ratio(experiment.lane_info) qc_df["FovRatio"] = fov.reindex(experiment.samples) qc_df["FovPass"] = qc_df["FovRatio"] > fov_ratio_threshold else: qc_df["FovRatio"] = np.nan qc_df["FovPass"] = True # Binding density if "BindingDensity" in experiment.lane_info.columns: qc_df["BindingDensity"] = experiment.lane_info["BindingDensity"].reindex( experiment.samples ) # Cartridge if "CartridgeID" in experiment.lane_info.columns: qc_df["CartridgeID"] = experiment.lane_info["CartridgeID"].reindex( experiment.samples ) # Positive control R2 r2 = _pos_ctrl_r2(experiment.pos_counts) qc_df["PosCtrl_R2"] = r2.reindex(experiment.samples) qc_df["PosCtrlPass"] = qc_df["PosCtrl_R2"] > pos_r2_threshold # Negative background bg = _neg_background(experiment.neg_counts, n_sd=neg_sd) qc_df["NegBackground"] = bg.reindex(experiment.samples) experiment.qc_results = qc_df return qc_df