Source code for mlpaper.perf_curves

# Ryan Turner (turnerry@iro.umontreal.ca)
from __future__ import absolute_import, print_function

import numpy as np

EPSILON = 1e-10  # Size of pseudo-point to add to true/false positive count.

# Interpolation kinds used here
LINEAR = "linear"
PREV = "previous"

# ============================================================================
# Create general binary count curves
# ============================================================================


def _add_pseudo_points(fps, tps):
    """Add pseudo-points that make ROC and PR analysis give sensible results in
    corner case there are no true positive or no false positives.

    Parameters
    ----------
    fps : ndarray, shape (n_boot, n_thresholds)
        A count of false positives, at index i being the number of negative
        samples assigned a ``score >= thresholds[i]``. The total number of
        negative samples is equal to ``fps[-1]`` (thus true negatives are given
        by ``fps[-1] - fps``).
    tps : ndarray, shape (n_boot, n_thresholds)
        An increasing count of true positives, at index i being the number
        of positive samples assigned a ``score >= thresholds[i]``. The total
        number of positive samples is equal to ``tps[-1]`` (thus false
        negatives are given by ``tps[-1] - tps``).

    Returns
    -------
    fps : ndarray, shape (n_boot, n_thresholds)
        If in corner case, `fps` after adding pseudo-points
    tps : ndarray, shape (n_boot, n_thresholds)
        If in corner case, `fps` after adding pseudo-points
    """
    assert fps.shape == tps.shape
    assert fps.size > 0  # Otherwise -1 index doesn't work

    fps_fix = fps[:, -1] == 0
    tps_fix = tps[:, -1] == 0

    if np.any(fps_fix) or np.any(tps_fix):
        fps, tps = fps.astype(float), tps.astype(float)
        fps[fps_fix, :] = EPSILON * tps[fps_fix, :]
        tps[tps_fix, :] = EPSILON * fps[tps_fix, :]
    return fps, tps


def _binary_clf_curve(y_true, y_score, sample_weight=None):
    """Calculate true and false positives per binary classification threshold.

    Based on `sklearn.metrics.ranking.binary_clf_curve` except that it supports
    a matrix a different sample weights `sample_weight`. It computes
    `binary_clf_curve` indenpedently for each column of `sample_weight` in a
    vectorized way. This is useful when doing a fast boot strap analysis. It is
    also more robust to corner cases such as when only a single class is
    present in `y_true`.

    Parameters
    ----------
    y_true : ndarray of type bool, shape (n_samples,)
        True targets of binary classification. Cannot be empty.
    y_score : ndarray, shape (n_samples,)
        Estimated probabilities or decision function. Must be finite.
    sample_weight : None or ndarray of shape (n_boot, n_samples)
        Sample weights. If `None`, all weights are one.

    Returns
    -------
    fps : ndarray, shape (n_boot, n_thresholds)
        A count of false positives, at index i being the number of negative
        samples assigned a ``score >= thresholds[i]``. The total number of
        negative samples is equal to ``fps[-1]`` (thus true negatives are given
        by ``fps[-1] - fps``).
    tps : ndarray, shape (n_boot, n_thresholds)
        An increasing count of true positives, at index i being the number
        of positive samples assigned a ``score >= thresholds[i]``. The total
        number of positive samples is equal to ``tps[-1]`` (thus false
        negatives are given by ``tps[-1] - tps``).
    thresholds : ndarray, shape (n_thresholds,)
        Decreasing score values.
    """
    assert y_true.ndim == 1 and y_true.dtype.kind == "b"
    assert y_score.shape == y_true.shape and np.all(np.isfinite(y_score))
    assert y_true.size >= 1, "y_true.size {}".format(y_true.size)

    # sort scores and corresponding truth values
    desc_score_indices = np.argsort(y_score, kind="mergesort")[::-1]
    y_score, y_true = y_score[desc_score_indices], y_true[desc_score_indices]

    # y_score typically has many tied values. Here we extract
    # the indices associated with the distinct values. We also
    # concatenate a value for the end of the curve.
    distinct_value_indices = np.where(np.diff(y_score))[0]
    threshold_idxs = np.r_[distinct_value_indices, y_true.size - 1]

    if sample_weight is None:
        tps = np.cumsum(y_true)[threshold_idxs]
        fps = 1 + threshold_idxs - tps
        assert fps[-1] == np.sum(~y_true) and tps[-1] == np.sum(y_true)
        tps, fps = tps[None, :], fps[None, :]  # Make output 2D in either case
    else:
        assert sample_weight.ndim == 2
        assert sample_weight.shape[1] == y_true.size
        assert sample_weight.shape[0] >= 1  # Might work at 0 anyway
        assert np.all(np.isfinite(sample_weight))
        # Negative weight makes no sense, 0 can violate assumps. of other funcs
        assert np.all(sample_weight > 0)

        weight = sample_weight[:, desc_score_indices]
        tps = np.cumsum(y_true[None, :] * weight, axis=1)[:, threshold_idxs]
        fps = np.cumsum(weight, axis=1)[:, threshold_idxs] - tps
        assert np.allclose(fps[:, -1], np.sum(weight[:, ~y_true], axis=1))
        assert np.allclose(tps[:, -1], np.sum(weight[:, y_true], axis=1))

    # Now put in the (0, 0) coord (y_score >= np.inf)
    zero_vec = np.zeros((fps.shape[0], 1), dtype=fps.dtype)
    fps, tps = np.c_[zero_vec, fps], np.c_[zero_vec, tps]
    thresholds = np.r_[np.inf, y_score[threshold_idxs]]
    assert thresholds.ndim == 1 and thresholds.size >= 2

    # Clean up corner case
    fps, tps = _add_pseudo_points(fps, tps)
    assert np.all(fps[:, -1] > 0) and np.all(tps[:, -1] > 0)
    assert fps.dtype == tps.dtype

    # Remove any decreases due to numerics
    fps = np.maximum.accumulate(fps, axis=1)
    assert np.all((np.diff(fps, axis=1) >= 0.0) & (np.diff(tps, axis=1) >= 0.0))

    return fps, tps, thresholds


# ============================================================================
# Convert general binary count curves to ROC, PR, PRG
# ============================================================================


[docs]def roc_curve(y_true, y_score, sample_weight=None):
    """Compute ROC curve with optional sample weight matrix.

    Based on `sklearn.metrics.ranking.roc_curve` except that it supports a
    matrix a different sample weights `sample_weight`. It computes
    the results indenpedently for each column of `sample_weight` in a
    vectorized way. This is useful when doing a fast boot strap analysis. It is
    also more robust to corner cases such as when only a single class is
    present in `y_true`.

    Parameters
    ----------
    y_true : ndarray of type bool, shape (n_samples,)
        True targets of binary classification. Cannot be empty.
    y_score : ndarray, shape (n_samples,)
        Estimated probabilities or decision function. Must be finite.
    sample_weight : None or ndarray of shape (n_samples, n_boot)
        Sample weights. If `None`, all weights are one.

    Returns
    -------
    fpr : ndarray, shape (n_boot, n_thresholds)
        The false positive rates. Each column is computed indepently by each
        column in `sample_weight`.
    tpr : ndarray, shape (n_boot, n_thresholds)
        The false positive rates. Each column is computed indepently by each
        column in `sample_weight`.
    thresholds : ndarray, shape (n_thresholds,)
        Decreasing score values.
    """
    fps, tps, thresholds = _binary_clf_curve(y_true, y_score, sample_weight=sample_weight)
    fpr = np.true_divide(fps, fps[:, -1:])
    tpr = np.true_divide(tps, tps[:, -1:])
    return (fpr, tpr, LINEAR), thresholds


[docs]def recall_precision_curve(y_true, y_score, sample_weight=None):
    """Compute recall precision curve with optional sample weight matrix. This
    has intentionally been named recall-precision rather than the traditional
    precision-recall.

    Based on `sklearn.metrics.ranking.precision_recall_curve` except that it
    supports a matrix a different sample weights `sample_weight`. The name
    order has been switched to `recall_precision_curve` to be consistent with
    `roc_curve` because recall is typically placed on the x-axis. It computes
    the results indenpedently for each column of `sample_weight` in a
    vectorized way. This is useful when doing a fast boot strap analysis. It is
    also more robust to corner cases such as when only a single class is
    present in `y_true`.

    Parameters
    ----------
    y_true : ndarray of type bool, shape (n_samples,)
        True targets of binary classification. Cannot be empty.
    y_score : ndarray, shape (n_samples,)
        Estimated probabilities or decision function. Must be finite.
    sample_weight : None or ndarray of shape (n_samples, n_boot)
        Sample weights. If `None`, all weights are one.

    Returns
    -------
    recall : ndarray, shape (n_boot, n_thresholds)
        The recall. Each column is computed indepently by each column in
        `sample_weight`.
    precision : ndarray, shape (n_boot, n_thresholds)
        The precision. Each column is computed indepently by each column in
        `sample_weight`.
    thresholds : ndarray, shape (n_thresholds,)
        Decreasing score values.
    """
    fps, tps, thresholds = _binary_clf_curve(y_true, y_score, sample_weight=sample_weight)
    recall = np.true_divide(tps, tps[:, -1:])
    with np.errstate(divide="ignore", invalid="ignore"):
        precision = np.true_divide(tps, tps + fps)
    precision[:, 0] = precision[:, 1]
    assert np.all(0.0 <= precision) and np.all(precision <= 1.0)
    return (recall, precision, PREV), thresholds


[docs]def prg_curve(y_true, y_score, sample_weight=None):
    """Compute precision recall gain curve with optional sample weight matrix.
    Similar to `recall_precision_curve`.

    Parameters
    ----------
    y_true : ndarray of type bool, shape (n_samples,)
        True targets of binary classification. Cannot be empty.
    y_score : ndarray, shape (n_samples,)
        Estimated probabilities or decision function. Must be finite.
    sample_weight : None or ndarray of shape (n_samples, n_boot)
        Sample weights. If `None`, all weights are one.

    Returns
    -------
    recall_gain : ndarray, shape (n_boot, n_thresholds)
        The recall_gain. Each column is computed indepently by each column in
        `sample_weight`.
    prec_gain : ndarray, shape (n_boot, n_thresholds)
        The precision gain. Each column is computed indepently by each column
        in `sample_weight`.
    thresholds : ndarray, shape (n_thresholds,)
        Decreasing score values.
    """
    fps, tps, thresholds = _binary_clf_curve(y_true, y_score, sample_weight=sample_weight)
    n_neg, n_pos = fps[:, -1:], tps[:, -1:]
    fns = n_pos - tps

    den = n_neg * tps
    with np.errstate(divide="ignore", invalid="ignore"):
        rec_gain = 1.0 - np.true_divide(n_pos * fns, den)
        prec_gain = 1.0 - np.true_divide(n_pos * fps, den)
    # interpolate backward just like in PR curve
    prec_gain[:, 0] = prec_gain[:, 1]

    # Bring forward most recent negative point as point at 0
    with np.errstate(invalid="ignore"):
        assert not np.any(np.diff(rec_gain, axis=1) < 0.0)
    rec_gain = np.maximum(0.0, rec_gain)
    assert np.all(rec_gain <= 1.0)
    assert np.all((rec_gain == 0.0) | (prec_gain <= 1.0))
    return (rec_gain, prec_gain, PREV), thresholds