Source code for deepretro.utils.metrics

"""Threshold optimization utilities for binary classification."""

from typing import Sequence

import numpy as np


[docs] def find_optimal_threshold( y_true: Sequence[float], probabilities: Sequence[float], ) -> tuple[float, float]: """ Find the classification threshold that maximises F1-score. Sweeps the precision-recall curve and picks the threshold where the harmonic mean of precision and recall is highest. Parameters ---------- y_true : array-like, shape (n_samples,) True binary labels (0 or 1). probabilities : array-like, shape (n_samples,) Predicted probabilities for the positive class. Returns ------- threshold : float Optimal classification threshold. f1 : float F1-score at the optimal threshold. Examples -------- >>> import numpy as np >>> from deepretro.utils.metrics import find_optimal_threshold >>> y = np.array([0, 0, 1, 1]) >>> proba = np.array([0.1, 0.4, 0.6, 0.9]) >>> thr, f1 = find_optimal_threshold(y, proba) >>> 0.0 < thr < 1.0 True >>> f1 > 0.0 True """ # Import lazily so docs and lightweight tooling can import this module # without pulling in sklearn's full scipy stack at module import time. from sklearn.metrics import precision_recall_curve precision, recall, thresholds = precision_recall_curve(y_true, probabilities) f1_scores = 2 * (precision * recall) / (precision + recall + 1e-10) best_idx = np.argmax(f1_scores) return float(thresholds[best_idx]), float(f1_scores[best_idx])