Shortcuts

Source code for slideflow.biscuit.delong

import numpy as np
import scipy.stats

# AUC comparison adapted from
# https://github.com/Netflix/vmaf/
def compute_midrank(x):
    """Computes midranks.

    Args:
       x - a 1D numpy array

    Returns:
       array of midranks

    """
    J = np.argsort(x)
    Z = x[J]
    N = len(x)
    T = np.zeros(N, dtype=np.float32)
    i = 0
    while i < N:
        j = i
        while j < N and Z[j] == Z[i]:
            j += 1
        T[i:j] = 0.5*(i + j - 1)
        i = j
    T2 = np.empty(N, dtype=np.float32)
    # Note(kazeevn) +1 is due to Python using 0-based indexing
    # instead of 1-based in the AUC formula in the paper
    T2[J] = T + 1
    return T2


[docs]def fastDeLong(predictions_sorted_transposed, label_1_count): """The fast version of DeLong's method for computing the covariance of unadjusted AUC. Args: predictions_sorted_transposed: a 2D numpy.array[n_classifiers, n_examples] sorted such as the examples with label "1" are first Returns: (AUC value, DeLong covariance) """ # Short variables are named as they are in the paper m = label_1_count n = predictions_sorted_transposed.shape[1] - m positive_examples = predictions_sorted_transposed[:, :m] negative_examples = predictions_sorted_transposed[:, m:] k = predictions_sorted_transposed.shape[0] tx = np.empty([k, m], dtype=np.float32) ty = np.empty([k, n], dtype=np.float32) tz = np.empty([k, m + n], dtype=np.float32) for r in range(k): tx[r, :] = compute_midrank(positive_examples[r, :]) ty[r, :] = compute_midrank(negative_examples[r, :]) tz[r, :] = compute_midrank(predictions_sorted_transposed[r, :]) aucs = tz[:, :m].sum(axis=1) / m / n - float(m + 1.0) / 2.0 / n v01 = (tz[:, :m] - tx[:, :]) / n v10 = 1.0 - (tz[:, m:] - ty[:, :]) / m sx = np.cov(v01) sy = np.cov(v10) delongcov = sx / m + sy / n return aucs, delongcov
def calc_pvalue(aucs, sigma): """Computes log(10) of p-values. Args: aucs: 1D array of AUCs sigma: AUC DeLong covariances Returns: log10(pvalue) """ l = np.array([[1, -1]]) z = np.abs(np.diff(aucs)) / np.sqrt(np.dot(np.dot(l, sigma), l.T)) return np.log10(2) + scipy.stats.norm.logsf(z, loc=0, scale=1) / np.log(10) def compute_ground_truth_statistics(ground_truth): assert np.array_equal(np.unique(ground_truth), [0, 1]) order = (-ground_truth).argsort() label_1_count = int(ground_truth.sum()) return order, label_1_count
[docs]def delong_roc_variance(ground_truth, predictions): """Computes ROC AUC variance for a single set of predictions Args: ground_truth: np.array of 0 and 1 predictions: np.array of floats of the probability of being class 1 """ order, label_1_count = compute_ground_truth_statistics(ground_truth) predictions_sorted_transposed = predictions[np.newaxis, order] aucs, delongcov = fastDeLong(predictions_sorted_transposed, label_1_count) assert len(aucs) == 1, "There is a bug in the code, please forward this to the developers" return aucs[0], delongcov
[docs]def delong_roc_test(ground_truth, predictions_one, predictions_two): """Computes log(p-value) for hypothesis that two ROC AUCs are different Args: ground_truth: np.array of 0 and 1 predictions_one: predictions of the first model, np.array of floats of the probability of being class 1 predictions_two: predictions of the second model, np.array of floats of the probability of being class 1 """ order, label_1_count = compute_ground_truth_statistics(ground_truth) predictions_sorted_transposed = np.vstack((predictions_one, predictions_two))[:, order] aucs, delongcov = fastDeLong(predictions_sorted_transposed, label_1_count) return calc_pvalue(aucs, delongcov)