Source code for deepretro.utils.metrics

"""Threshold optimization utilities for binary classification."""

from typing import Sequence

import numpy as np



[docs]
def find_optimal_threshold(
    y_true: Sequence[float],
    probabilities: Sequence[float],
) -> tuple[float, float]:
    """
    Find the classification threshold that maximises F1-score.

    Sweeps the precision-recall curve and picks the threshold where
    the harmonic mean of precision and recall is highest.

    Parameters
    ----------
    y_true : array-like, shape (n_samples,)
        True binary labels (0 or 1).
    probabilities : array-like, shape (n_samples,)
        Predicted probabilities for the positive class.

    Returns
    -------
    threshold : float
        Optimal classification threshold.
    f1 : float
        F1-score at the optimal threshold.

    Examples
    --------
    >>> import numpy as np
    >>> from deepretro.utils.metrics import find_optimal_threshold
    >>> y = np.array([0, 0, 1, 1])
    >>> proba = np.array([0.1, 0.4, 0.6, 0.9])
    >>> thr, f1 = find_optimal_threshold(y, proba)
    >>> 0.0 < thr < 1.0
    True
    >>> f1 > 0.0
    True
    """
    # Import lazily so docs and lightweight tooling can import this module
    # without pulling in sklearn's full scipy stack at module import time.
    from sklearn.metrics import precision_recall_curve

    precision, recall, thresholds = precision_recall_curve(y_true, probabilities)
    f1_scores = 2 * (precision * recall) / (precision + recall + 1e-10)
    best_idx = np.argmax(f1_scores)
    return float(thresholds[best_idx]), float(f1_scores[best_idx])