Source code for cobra.evaluation.evaluator


import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import seaborn as sns

from numpy import sqrt
from scipy.stats import norm

# classification
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_curve
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_auc_score
from sklearn.metrics import matthews_corrcoef
from sklearn.exceptions import NotFittedError

# regression
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

[docs]class ClassificationEvaluator():
    """Evaluator class encapsulating classification model metrics
    and plotting functionality.

    Attributes
    ----------
    y_true : np.ndarray
        True binary target data labels.
    y_pred : np.ndarray
        Target scores of the model.
    confusion_matrix : np.ndarray
        Confusion matrix computed for a particular cut-off.
    cumulative_gains : tuple
        Data for plotting cumulative gains curve.
    evaluation_metrics : dict
        Map containing various scalar evaluation metrics (precision, recall, accuracy, AUC, F1, etc.).
    lift_at : float
        Parameter to determine at which top level percentage the lift of the
        model should be computed.
    lift_curve : tuple
        Data for plotting lift curve(s).
    probability_cutoff : float
        Probability cut off to convert probability scores to a binary score.
    roc_curve : dict
        Map containing true-positive-rate, false-positive-rate at various
        thresholds (also incl.).
    n_bins : int, optional
        Defines the number of bins used to calculate the lift curve for
        (by default 10, so deciles).
    """

    def __init__(self,
                 probability_cutoff: float=None,
                 lift_at: float=0.05,
                 n_bins: int = 10):

        self.y_true = None
        self.y_pred = None

        self.lift_at = lift_at
        self.probability_cutoff = probability_cutoff
        self.n_bins = n_bins

        # Placeholder to store fitted output
        self.scalar_metrics = None
        self.roc_curve = None
        self.confusion_matrix = None
        self.lift_curve = None
        self.cumulative_gains = None

[docs]    def fit(self, y_true: np.ndarray, y_pred: np.ndarray):
        """Fit the evaluator by computing the relevant evaluation metrics on
        the inputs.

        Parameters
        ----------
        y_true : np.ndarray
            True labels.
        y_pred : np.ndarray
            Model scores (as probability).
        """
        fpr, tpr, thresholds = roc_curve(y_true=y_true, y_score=y_pred)

        # if probability_cutoff is not set, take the optimal cut-off
        if not self.probability_cutoff:
            self.probability_cutoff = (ClassificationEvaluator.
                                       _compute_optimal_cutoff(fpr, tpr,
                                                               thresholds))

        # Transform probabilities to binary array using cut-off
        y_pred_b = np.array([0 if pred <= self.probability_cutoff else 1
                             for pred in y_pred])

        # Compute the various evaluation metrics
        self.scalar_metrics = ClassificationEvaluator._compute_scalar_metrics(
            y_true,
            y_pred,
            y_pred_b,
            self.lift_at
        )

        self.y_true = y_true
        self.y_pred = y_pred

        self.roc_curve = {"fpr": fpr, "tpr": tpr, "thresholds": thresholds}
        self.confusion_matrix = confusion_matrix(y_true, y_pred_b)
        self.lift_curve = ClassificationEvaluator._compute_lift_per_bin(y_true, y_pred, self.n_bins)
        self.cumulative_gains = ClassificationEvaluator._compute_cumulative_gains(y_true, y_pred)

    @staticmethod
    def _compute_scalar_metrics(y_true: np.ndarray,
                                y_pred: np.ndarray,
                                y_pred_b: np.ndarray,
                                lift_at: float) -> pd.Series:
        """Convenient function to compute various scalar performance measures
        and return them in a pd.Series.

        Parameters
        ----------
        y_true : np.ndarray
            True binary target data labels.
        y_pred : np.ndarray
            Target scores of the model.
        y_pred_b : np.ndarray
            Predicted target data labels (binary).
        lift_at : float
            At what top level percentage the lift should be computed.

        Returns
        -------
        pd.Series
            Contains various performance measures of the model, being:
                Accuracy
                AUC
                Precision
                Recall
                F1
                Matthews correlation coefficient
                Lift at given percentage
        """
        return pd.Series({
            "accuracy": accuracy_score(y_true, y_pred_b),
            "AUC": roc_auc_score(y_true, y_pred),
            "precision": precision_score(y_true, y_pred_b),
            "recall": recall_score(y_true, y_pred_b),
            "F1": f1_score(y_true, y_pred_b, average=None)[1],
            "matthews_corrcoef": matthews_corrcoef(y_true, y_pred_b),
            "lift at {}".format(lift_at): np.round(ClassificationEvaluator
                                                   ._compute_lift(y_true=y_true,
                                                                  y_pred=y_pred,
                                                                  lift_at=lift_at), 2)
        })

[docs]    def plot_roc_curve(self, path: str=None, dim: tuple=(12, 8)):
        """Plot ROC curve of the model.

        Parameters
        ----------
        path : str, optional
            Path to store the figure.
        dim : tuple, optional
            Tuple with width and length of the plot.
        """

        if self.roc_curve is None:
            msg = ("This {} instance is not fitted yet. Call 'fit' with "
                   "appropriate arguments before using this method.")

            raise NotFittedError(msg.format(self.__class__.__name__))

        auc = float(self.scalar_metrics.loc["AUC"])

        with plt.style.context("seaborn-whitegrid"):

            fig, ax = plt.subplots(figsize=dim)

            ax.plot(self.roc_curve["fpr"],
                    self.roc_curve["tpr"],
                    color="cornflowerblue", linewidth=3,
                    label="ROC curve (area = {s:.3})".format(s=auc))

            ax.plot([0, 1], [0, 1], color="darkorange", linewidth=3,
                    linestyle="--")
            ax.set_xlabel("False Positive Rate", fontsize=15)
            ax.set_ylabel("True Positive Rate", fontsize=15)
            ax.legend(loc="lower right")
            ax.set_title("ROC curve", fontsize=20)

            if path:
                plt.savefig(path, format="png", dpi=300, bbox_inches="tight")

        plt.show()

[docs]    def plot_confusion_matrix(self, path: str=None, dim: tuple=(12, 8),
                              labels: list=["0", "1"]):
        """Plot the confusion matrix.

        Parameters
        ----------
        path : str, optional
            Path to store the figure.
        dim : tuple, optional
            Tuple with width and length of the plot.
        labels : list, optional
            Optional list of labels, default "0" and "1".
        """

        if self.confusion_matrix is None:
            msg = ("This {} instance is not fitted yet. Call 'fit' with "
                   "appropriate arguments before using this method.")

            raise NotFittedError(msg.format(self.__class__.__name__))

        fig, ax = plt.subplots(figsize=dim)
        ax = sns.heatmap(self.confusion_matrix,
                         annot=self.confusion_matrix.astype(str),
                         fmt="s", cmap="Blues",
                         xticklabels=labels, yticklabels=labels)
        ax.set_title("Confusion matrix", fontsize=20)

        if path:
            plt.savefig(path, format="png", dpi=300, bbox_inches="tight")

        plt.show()

[docs]    def plot_cumulative_response_curve(self, path: str=None, dim: tuple=(12, 8)):
        """Plot cumulative response curve.

        Parameters
        ----------
        path : str, optional
            Path to store the figure.
        dim : tuple, optional
            Tuple with width and length of the plot.
        """

        if self.lift_curve is None:
            msg = ("This {} instance is not fitted yet. Call 'fit' with "
                   "appropriate arguments before using this method.")

            raise NotFittedError(msg.format(self.__class__.__name__))

        x_labels, lifts, inc_rate = self.lift_curve

        lifts = np.array(lifts)*inc_rate*100

        with plt.style.context("seaborn-ticks"):
            fig, ax = plt.subplots(figsize=dim)

            plt.bar(x_labels[::-1], lifts, align="center",
                    color="cornflowerblue")
            plt.ylabel("response (%)", fontsize=16)
            plt.xlabel("decile", fontsize=16)
            ax.set_xticks(x_labels)
            ax.set_xticklabels(x_labels)

            plt.axhline(y=inc_rate*100, color="darkorange", linestyle="--",
                        xmin=0.05, xmax=0.95, linewidth=3, label="Incidence")

            # Legend
            ax.legend(loc="upper right")

            # Set Axis - make them pretty
            sns.despine(ax=ax, right=True, left=True)

            # Remove white lines from the second axis
            ax.grid(False)

            # Description
            ax.set_title("Cumulative Response curve", fontsize=20)

            if path is not None:
                plt.savefig(path, format="png", dpi=300, bbox_inches="tight")

            plt.show()

[docs]    def plot_lift_curve(self, path: str=None, dim: tuple=(12, 8)):
        """Plot lift per decile.

        Parameters
        ----------
        path : str, optional
            Path to store the figure.
        dim : tuple, optional
            Tuple with width and length of the plot.
        """

        if self.lift_curve is None:
            msg = ("This {} instance is not fitted yet. Call 'fit' with "
                   "appropriate arguments before using this method.")

            raise NotFittedError(msg.format(self.__class__.__name__))

        x_labels, lifts, _ = self.lift_curve

        with plt.style.context("seaborn-ticks"):
            fig, ax = plt.subplots(figsize=dim)

            plt.bar(x_labels[::-1], lifts, align="center",
                    color="cornflowerblue")
            plt.ylabel("lift", fontsize=16)
            plt.xlabel("decile", fontsize=16)
            ax.set_xticks(x_labels)
            ax.set_xticklabels(x_labels)

            plt.axhline(y=1, color="darkorange", linestyle="--",
                        xmin=0.05, xmax=0.95, linewidth=3, label="Baseline")

            # Legend
            ax.legend(loc="upper right")

            # Set Axis - make them pretty
            sns.despine(ax=ax, right=True, left=True)

            # Remove white lines from the second axis
            ax.grid(False)

            # Description
            ax.set_title("Cumulative Lift curve", fontsize=20)

            if path is not None:
                plt.savefig(path, format="png", dpi=300, bbox_inches="tight")

            plt.show()

[docs]    def plot_cumulative_gains(self, path: str=None, dim: tuple=(12, 8)):
        """Plot cumulative gains per decile.

        Parameters
        ----------
        path : str, optional
            Path to store the figure.
        dim : tuple, optional
            Tuple with width and length of the plot.
        """

        with plt.style.context("seaborn-whitegrid"):
            fig, ax = plt.subplots(figsize=dim)

            ax.plot(self.cumulative_gains[0]*100, self.cumulative_gains[1]*100,
                    color="cornflowerblue", linewidth=3,
                    label="cumulative gains")
            ax.plot(ax.get_xlim(), ax.get_ylim(), linewidth=3,
                    ls="--", color="darkorange", label="random selection")

            ax.set_title("Cumulative Gains curve", fontsize=20)

            # Format axes
            ax.set_xlim([0, 100])
            ax.set_ylim([0, 105])

            # Format ticks
            ticks_loc_y = ax.get_yticks().tolist()
            ax.yaxis.set_major_locator(mticker.FixedLocator(ticks_loc_y))
            ax.set_yticklabels(["{:3.0f}%".format(x) for x in ticks_loc_y])

            ticks_loc_x = ax.get_xticks().tolist()
            ax.xaxis.set_major_locator(mticker.FixedLocator(ticks_loc_x))
            ax.set_xticklabels(["{:3.0f}%".format(x) for x in ticks_loc_x])

            # Legend
            ax.legend(loc="lower right")

            if path is not None:
                plt.savefig(path, format="png", dpi=300, bbox_inches="tight")
            plt.show()

    @staticmethod
    def _find_optimal_cutoff(y_true: np.ndarray,
                             y_pred: np.ndarray) -> float:
        """Find the optimal probability cut off point for a
        classification model. Wrapper around _compute_optimal_cutoff.

        Parameters
        ----------
        y_true : np.ndarray
            True binary target data labels.
        y_pred : np.ndarray
            Target scores of the model.

        Returns
        -------
        float
            Optimal cut-off probability for the model.
        """
        return ClassificationEvaluator._compute_optimal_cutoff(roc_curve(y_true=y_true,
                                                                         y_score=y_pred))

    @staticmethod
    def _compute_optimal_cutoff(fpr: np.ndarray, tpr: np.ndarray,
                                thresholds: np.ndarray) -> float:
        """Find the optimal probability cut-off point for a
        classification model.

        The optimal cut-off would be where TPR is high and FPR is low, hence
        TPR - (1-FPR) should be zero or close to zero for the optimal cut-off.

        Parameters
        ----------
        fpr : np.ndarray
            False positive rate for various thresholds.
        tpr : np.ndarray
            True positive rate for various thresholds.
        thresholds : np.ndarray
            List of thresholds for which fpr and tpr were computed.

        Returns
        -------
        float
            Optimal probability cut-off point.
        """
        temp = np.absolute(tpr - (1-fpr))

        # index for optimal value is the one for which temp is minimal
        optimal_index = np.where(temp == min(temp))[0]

        return thresholds[optimal_index][0]

    @staticmethod
    def _compute_cumulative_gains(y_true: np.ndarray,
                                  y_pred: np.ndarray) -> tuple:
        """Compute cumulative gains of the model, returns percentages and
        gains cumulative gains curves.

        Code from (https://github.com/reiinakano/scikit-plot/blob/
                   2dd3e6a76df77edcbd724c4db25575f70abb57cb/
                   scikitplot/helpers.py#L157)

        Parameters
        ----------
        y_true : np.ndarray
            True binary target data labels.
        y_pred : np.ndarray
            Target scores of the model.

        Returns
        -------
        tuple
            With x-labels, and gains.
        """

        # make y_true a boolean vector
        y_true = (y_true == 1)

        sorted_indices = np.argsort(y_pred)[::-1]
        y_true = y_true[sorted_indices]
        gains = np.cumsum(y_true)

        percentages = np.arange(start=1, stop=len(y_true) + 1)

        gains = gains / float(np.sum(y_true))
        percentages = percentages / float(len(y_true))

        gains = np.insert(gains, 0, [0])
        percentages = np.insert(percentages, 0, [0])

        return percentages, gains

    @staticmethod
    def _compute_lift_per_bin(y_true: np.ndarray,
                              y_pred: np.ndarray,
                              n_bins: int=10) -> tuple:
        """Compute lift of the model for a given number of bins, returns x-labels,
        lifts and the target incidence to create cumulative response curves.

        Parameters
        ----------
        y_true : np.ndarray
            True binary target data labels.
        y_pred : np.ndarray
            Target scores of the model.
        n_bins : int, optional
            Defines the number of bins used to calculate the lift curve for
            (by default 10, so deciles).

        Returns
        -------
        tuple
            Includes x-labels, lifts per decile, and target incidence.
        """

        lifts = [ClassificationEvaluator._compute_lift(y_true=y_true,
                                                       y_pred=y_pred,
                                                       lift_at=perc_lift)
                 for perc_lift in np.linspace(1/n_bins, 1, num=n_bins, endpoint=True)]

        x_labels = [len(lifts)-x for x in np.arange(0, len(lifts), 1)]

        return x_labels, lifts, y_true.mean()

    @staticmethod
    def _compute_lift(y_true: np.ndarray, y_pred: np.ndarray,
                      lift_at: float=0.05) -> float:
        """Calculates lift given two arrays on specified level.

        Parameters
        ----------
        y_true : np.ndarray
            True binary target data labels.
        y_pred : np.ndarray
            Target scores of the model.
        lift_at : float, optional
            At what top level percentage the lift should be computed.

        Returns
        -------
        float
            Lift of the model.
        """

        # Make sure it is numpy array
        y_true_ = np.array(y_true)
        y_pred_ = np.array(y_pred)

        # Make sure it has correct shape
        y_true_ = y_true_.reshape(len(y_true_), 1)
        y_pred_ = y_pred_.reshape(len(y_pred_), 1)

        # Merge data together
        y_data = np.hstack([y_true_, y_pred_])

        # Calculate necessary variables
        nrows = len(y_data)
        stop = int(np.floor(nrows*lift_at))
        avg_incidence = np.einsum("ij->j", y_true_)/float(len(y_true_))

        # Sort and filter data
        data_sorted = (y_data[y_data[:, 1].argsort()[::-1]][:stop, 0]
                       .reshape(stop, 1))

        # Calculate lift (einsum is a very fast way of summing, but needs specific shape)
        inc_in_top_n = np.einsum("ij->j", data_sorted)/float(len(data_sorted))

        lift = np.round(inc_in_top_n/avg_incidence, 2)[0]

        return lift


[docs]class RegressionEvaluator():
    """Evaluator class encapsulating regression model metrics
    and plotting functionality.

    Attributes
    ----------
    y_true : np.ndarray
        True binary target data labels.
    y_pred : np.ndarray
        Target scores of the model.
    scalar_metrics : dict
        Map containing various scalar evaluation metrics (R-squared, MAE, MSE, RMSE)
    qq : pd.Series
        Theoretical quantiles and associated actual residuals.
    """

    def __init__(self):

        self.y_true = None
        self.y_pred = None

        # Placeholder to store fitted output
        self.scalar_metrics = None
        self.qq = None

[docs]    def fit(self, y_true: np.ndarray, y_pred: np.ndarray):
        """Fit the evaluator by computing the relevant evaluation metrics on
        the inputs.

        Parameters
        ----------
        y_true : np.ndarray
            True labels.
        y_pred : np.ndarray
            Model scores.
        """
        # Compute the various evaluation metrics
        self.scalar_metrics = RegressionEvaluator._compute_scalar_metrics(y_true, y_pred)

        self.y_true = y_true
        self.y_pred = y_pred

        # Compute qq info
        self.qq = RegressionEvaluator._compute_qq_residuals(y_true, y_pred)

    @staticmethod
    def _compute_scalar_metrics(y_true: np.ndarray,
                                y_pred: np.ndarray) -> pd.Series:
        """Convenient function to compute various scalar performance measures
        and return them in a pd.Series.

        Parameters
        ----------
        y_true : np.ndarray
            True binary target data labels.
        y_pred : np.ndarray
            Target scores of the model.

        Returns
        -------
        pd.Series
            Contains various performance measures of the model, being:
                R-squared (coefficient of determination, usually denoted as R-squared)
                Mean absolute error (expected value of the absolute error loss)
                Mean squared error (expected value of the quadratic error)
                Root mean squared error (sqrt of expected value of the quadratic error)
        """
        return pd.Series({
            "R2": r2_score(y_true, y_pred),
            "MAE": mean_absolute_error(y_true, y_pred),
            "MSE": mean_squared_error(y_true, y_pred),
            "RMSE": sqrt(mean_squared_error(y_true, y_pred))
        })

    @staticmethod
    def _compute_qq_residuals(y_true: np.ndarray,
                              y_pred: np.ndarray) -> pd.Series:
        """Convenience function to compute various scalar performance measures
        and return them in a pd.Series.

        Parameters
        ----------
        y_true : np.ndarray
            True binary target data labels.
        y_pred : np.ndarray
            Target scores of the model.

        Returns
        -------
        pd.Series
            Theoretical quantiles and associated actual residuals.
        """
        ## also possible directly via statsmodels.api.qqplot()

        n = len(y_true)

        df = pd.DataFrame({"res": sorted((y_true - y_pred))})  # ascending order
        m, s = df["res"].mean(), df["res"].std()

        df["z_res"] = df["res"].apply(lambda x: (x-m)/s)
        df["rank"] = df.index+1
        df["percentile"] = df["rank"].apply(lambda x: x/(n+1))  # divide by n+1 to avoid inf
        df["q_theoretical"] = norm.ppf(df["percentile"])

        return pd.Series({
            "quantiles": df["q_theoretical"].values,
            "residuals": df["z_res"].values,
        })

[docs]    def plot_predictions(self, path: str=None, dim: tuple=(12, 8)):
        """Plot predictions from the model against actual values.

        Parameters
        ----------
        path : str, optional
            Path to store the figure.
        dim : tuple, optional
            Tuple with width and length of the plot.
        """
        if self.y_true is None and self.y_pred is None:
            msg = ("This {} instance is not fitted yet. Call 'fit' with "
                   "appropriate arguments before using this method.")

        y_true = self.y_true
        y_pred = self.y_pred

        with plt.style.context("seaborn-whitegrid"):

            fig, ax = plt.subplots(figsize=dim)

            x = np.arange(1, len(y_true)+1)

            ax.plot(x, y_true, ls="--", label="actuals", color="darkorange", linewidth=3)
            ax.plot(x, y_pred, label="predictions", color="cornflowerblue", linewidth=3)

            ax.set_xlabel("Index", fontsize=15)
            ax.set_ylabel("Value", fontsize=15)
            ax.legend(loc="best")
            ax.set_title("Predictions vs. Actuals", fontsize=20)

            if path:
                plt.savefig(path, format="png", dpi=300, bbox_inches="tight")

        plt.show()

[docs]    def plot_qq(self, path: str=None, dim: tuple=(12, 8)):
        """Display a Q-Q plot from the standardized prediction residuals.

        Parameters
        ----------
        path : str, optional
            Path to store the figure.
        dim : tuple, optional
            Tuple with width and length of the plot.
        """

        if self.qq is None:
            msg = ("This {} instance is not fitted yet. Call 'fit' with "
                   "appropriate arguments before using this method.")

            raise NotFittedError(msg.format(self.__class__.__name__))

        with plt.style.context("seaborn-whitegrid"):

            fig, ax = plt.subplots(figsize=dim)

            x = self.qq["quantiles"]
            y = self.qq["residuals"]

            ax.plot(x, x, ls="--", label="perfect model", color="darkorange", linewidth=3)
            ax.plot(x, y, label="current model", color="cornflowerblue", linewidth=3)

            ax.set_xlabel("Theoretical quantiles", fontsize=15)
            ax.set_xticks(range(int(np.floor(min(x))), int(np.ceil(max(x[x < float("inf")])))+1, 1))

            ax.set_ylabel("Standardized residuals", fontsize=15)
            ax.set_yticks(range(int(np.floor(min(y))), int(np.ceil(max(y[x < float("inf")])))+1, 1))

            ax.legend(loc="best")
            ax.set_title("Q-Q plot", fontsize=20)

            if path:
                plt.savefig(path, format="png", dpi=300, bbox_inches="tight")

        plt.show()