Source code for cobra.evaluation.evaluator


import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import seaborn as sns

from numpy import sqrt
from scipy.stats import norm

# classification
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_curve
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_auc_score
from sklearn.metrics import matthews_corrcoef
from sklearn.exceptions import NotFittedError

# regression
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

[docs]class ClassificationEvaluator(): """Evaluator class encapsulating classification model metrics and plotting functionality. Attributes ---------- y_true : np.ndarray True binary target data labels. y_pred : np.ndarray Target scores of the model. confusion_matrix : np.ndarray Confusion matrix computed for a particular cut-off. cumulative_gains : tuple Data for plotting cumulative gains curve. evaluation_metrics : dict Map containing various scalar evaluation metrics (precision, recall, accuracy, AUC, F1, etc.). lift_at : float Parameter to determine at which top level percentage the lift of the model should be computed. lift_curve : tuple Data for plotting lift curve(s). probability_cutoff : float Probability cut off to convert probability scores to a binary score. roc_curve : dict Map containing true-positive-rate, false-positive-rate at various thresholds (also incl.). n_bins : int, optional Defines the number of bins used to calculate the lift curve for (by default 10, so deciles). """ def __init__(self, probability_cutoff: float=None, lift_at: float=0.05, n_bins: int = 10): self.y_true = None self.y_pred = None self.lift_at = lift_at self.probability_cutoff = probability_cutoff self.n_bins = n_bins # Placeholder to store fitted output self.scalar_metrics = None self.roc_curve = None self.confusion_matrix = None self.lift_curve = None self.cumulative_gains = None
[docs] def fit(self, y_true: np.ndarray, y_pred: np.ndarray): """Fit the evaluator by computing the relevant evaluation metrics on the inputs. Parameters ---------- y_true : np.ndarray True labels. y_pred : np.ndarray Model scores (as probability). """ fpr, tpr, thresholds = roc_curve(y_true=y_true, y_score=y_pred) # if probability_cutoff is not set, take the optimal cut-off if not self.probability_cutoff: self.probability_cutoff = (ClassificationEvaluator. _compute_optimal_cutoff(fpr, tpr, thresholds)) # Transform probabilities to binary array using cut-off y_pred_b = np.array([0 if pred <= self.probability_cutoff else 1 for pred in y_pred]) # Compute the various evaluation metrics self.scalar_metrics = ClassificationEvaluator._compute_scalar_metrics( y_true, y_pred, y_pred_b, self.lift_at ) self.y_true = y_true self.y_pred = y_pred self.roc_curve = {"fpr": fpr, "tpr": tpr, "thresholds": thresholds} self.confusion_matrix = confusion_matrix(y_true, y_pred_b) self.lift_curve = ClassificationEvaluator._compute_lift_per_bin(y_true, y_pred, self.n_bins) self.cumulative_gains = ClassificationEvaluator._compute_cumulative_gains(y_true, y_pred)
@staticmethod def _compute_scalar_metrics(y_true: np.ndarray, y_pred: np.ndarray, y_pred_b: np.ndarray, lift_at: float) -> pd.Series: """Convenient function to compute various scalar performance measures and return them in a pd.Series. Parameters ---------- y_true : np.ndarray True binary target data labels. y_pred : np.ndarray Target scores of the model. y_pred_b : np.ndarray Predicted target data labels (binary). lift_at : float At what top level percentage the lift should be computed. Returns ------- pd.Series Contains various performance measures of the model, being: Accuracy AUC Precision Recall F1 Matthews correlation coefficient Lift at given percentage """ return pd.Series({ "accuracy": accuracy_score(y_true, y_pred_b), "AUC": roc_auc_score(y_true, y_pred), "precision": precision_score(y_true, y_pred_b), "recall": recall_score(y_true, y_pred_b), "F1": f1_score(y_true, y_pred_b, average=None)[1], "matthews_corrcoef": matthews_corrcoef(y_true, y_pred_b), "lift at {}".format(lift_at): np.round(ClassificationEvaluator ._compute_lift(y_true=y_true, y_pred=y_pred, lift_at=lift_at), 2) })
[docs] def plot_roc_curve(self, path: str=None, dim: tuple=(12, 8)): """Plot ROC curve of the model. Parameters ---------- path : str, optional Path to store the figure. dim : tuple, optional Tuple with width and length of the plot. """ if self.roc_curve is None: msg = ("This {} instance is not fitted yet. Call 'fit' with " "appropriate arguments before using this method.") raise NotFittedError(msg.format(self.__class__.__name__)) auc = float(self.scalar_metrics.loc["AUC"]) with plt.style.context("seaborn-whitegrid"): fig, ax = plt.subplots(figsize=dim) ax.plot(self.roc_curve["fpr"], self.roc_curve["tpr"], color="cornflowerblue", linewidth=3, label="ROC curve (area = {s:.3})".format(s=auc)) ax.plot([0, 1], [0, 1], color="darkorange", linewidth=3, linestyle="--") ax.set_xlabel("False Positive Rate", fontsize=15) ax.set_ylabel("True Positive Rate", fontsize=15) ax.legend(loc="lower right") ax.set_title("ROC curve", fontsize=20) if path: plt.savefig(path, format="png", dpi=300, bbox_inches="tight") plt.show()
[docs] def plot_confusion_matrix(self, path: str=None, dim: tuple=(12, 8), labels: list=["0", "1"]): """Plot the confusion matrix. Parameters ---------- path : str, optional Path to store the figure. dim : tuple, optional Tuple with width and length of the plot. labels : list, optional Optional list of labels, default "0" and "1". """ if self.confusion_matrix is None: msg = ("This {} instance is not fitted yet. Call 'fit' with " "appropriate arguments before using this method.") raise NotFittedError(msg.format(self.__class__.__name__)) fig, ax = plt.subplots(figsize=dim) ax = sns.heatmap(self.confusion_matrix, annot=self.confusion_matrix.astype(str), fmt="s", cmap="Blues", xticklabels=labels, yticklabels=labels) ax.set_title("Confusion matrix", fontsize=20) if path: plt.savefig(path, format="png", dpi=300, bbox_inches="tight") plt.show()
[docs] def plot_cumulative_response_curve(self, path: str=None, dim: tuple=(12, 8)): """Plot cumulative response curve. Parameters ---------- path : str, optional Path to store the figure. dim : tuple, optional Tuple with width and length of the plot. """ if self.lift_curve is None: msg = ("This {} instance is not fitted yet. Call 'fit' with " "appropriate arguments before using this method.") raise NotFittedError(msg.format(self.__class__.__name__)) x_labels, lifts, inc_rate = self.lift_curve lifts = np.array(lifts)*inc_rate*100 with plt.style.context("seaborn-ticks"): fig, ax = plt.subplots(figsize=dim) plt.bar(x_labels[::-1], lifts, align="center", color="cornflowerblue") plt.ylabel("response (%)", fontsize=16) plt.xlabel("decile", fontsize=16) ax.set_xticks(x_labels) ax.set_xticklabels(x_labels) plt.axhline(y=inc_rate*100, color="darkorange", linestyle="--", xmin=0.05, xmax=0.95, linewidth=3, label="Incidence") # Legend ax.legend(loc="upper right") # Set Axis - make them pretty sns.despine(ax=ax, right=True, left=True) # Remove white lines from the second axis ax.grid(False) # Description ax.set_title("Cumulative Response curve", fontsize=20) if path is not None: plt.savefig(path, format="png", dpi=300, bbox_inches="tight") plt.show()
[docs] def plot_lift_curve(self, path: str=None, dim: tuple=(12, 8)): """Plot lift per decile. Parameters ---------- path : str, optional Path to store the figure. dim : tuple, optional Tuple with width and length of the plot. """ if self.lift_curve is None: msg = ("This {} instance is not fitted yet. Call 'fit' with " "appropriate arguments before using this method.") raise NotFittedError(msg.format(self.__class__.__name__)) x_labels, lifts, _ = self.lift_curve with plt.style.context("seaborn-ticks"): fig, ax = plt.subplots(figsize=dim) plt.bar(x_labels[::-1], lifts, align="center", color="cornflowerblue") plt.ylabel("lift", fontsize=16) plt.xlabel("decile", fontsize=16) ax.set_xticks(x_labels) ax.set_xticklabels(x_labels) plt.axhline(y=1, color="darkorange", linestyle="--", xmin=0.05, xmax=0.95, linewidth=3, label="Baseline") # Legend ax.legend(loc="upper right") # Set Axis - make them pretty sns.despine(ax=ax, right=True, left=True) # Remove white lines from the second axis ax.grid(False) # Description ax.set_title("Cumulative Lift curve", fontsize=20) if path is not None: plt.savefig(path, format="png", dpi=300, bbox_inches="tight") plt.show()
[docs] def plot_cumulative_gains(self, path: str=None, dim: tuple=(12, 8)): """Plot cumulative gains per decile. Parameters ---------- path : str, optional Path to store the figure. dim : tuple, optional Tuple with width and length of the plot. """ with plt.style.context("seaborn-whitegrid"): fig, ax = plt.subplots(figsize=dim) ax.plot(self.cumulative_gains[0]*100, self.cumulative_gains[1]*100, color="cornflowerblue", linewidth=3, label="cumulative gains") ax.plot(ax.get_xlim(), ax.get_ylim(), linewidth=3, ls="--", color="darkorange", label="random selection") ax.set_title("Cumulative Gains curve", fontsize=20) # Format axes ax.set_xlim([0, 100]) ax.set_ylim([0, 105]) # Format ticks ticks_loc_y = ax.get_yticks().tolist() ax.yaxis.set_major_locator(mticker.FixedLocator(ticks_loc_y)) ax.set_yticklabels(["{:3.0f}%".format(x) for x in ticks_loc_y]) ticks_loc_x = ax.get_xticks().tolist() ax.xaxis.set_major_locator(mticker.FixedLocator(ticks_loc_x)) ax.set_xticklabels(["{:3.0f}%".format(x) for x in ticks_loc_x]) # Legend ax.legend(loc="lower right") if path is not None: plt.savefig(path, format="png", dpi=300, bbox_inches="tight") plt.show()
@staticmethod def _find_optimal_cutoff(y_true: np.ndarray, y_pred: np.ndarray) -> float: """Find the optimal probability cut off point for a classification model. Wrapper around _compute_optimal_cutoff. Parameters ---------- y_true : np.ndarray True binary target data labels. y_pred : np.ndarray Target scores of the model. Returns ------- float Optimal cut-off probability for the model. """ return ClassificationEvaluator._compute_optimal_cutoff(roc_curve(y_true=y_true, y_score=y_pred)) @staticmethod def _compute_optimal_cutoff(fpr: np.ndarray, tpr: np.ndarray, thresholds: np.ndarray) -> float: """Find the optimal probability cut-off point for a classification model. The optimal cut-off would be where TPR is high and FPR is low, hence TPR - (1-FPR) should be zero or close to zero for the optimal cut-off. Parameters ---------- fpr : np.ndarray False positive rate for various thresholds. tpr : np.ndarray True positive rate for various thresholds. thresholds : np.ndarray List of thresholds for which fpr and tpr were computed. Returns ------- float Optimal probability cut-off point. """ temp = np.absolute(tpr - (1-fpr)) # index for optimal value is the one for which temp is minimal optimal_index = np.where(temp == min(temp))[0] return thresholds[optimal_index][0] @staticmethod def _compute_cumulative_gains(y_true: np.ndarray, y_pred: np.ndarray) -> tuple: """Compute cumulative gains of the model, returns percentages and gains cumulative gains curves. Code from (https://github.com/reiinakano/scikit-plot/blob/ 2dd3e6a76df77edcbd724c4db25575f70abb57cb/ scikitplot/helpers.py#L157) Parameters ---------- y_true : np.ndarray True binary target data labels. y_pred : np.ndarray Target scores of the model. Returns ------- tuple With x-labels, and gains. """ # make y_true a boolean vector y_true = (y_true == 1) sorted_indices = np.argsort(y_pred)[::-1] y_true = y_true[sorted_indices] gains = np.cumsum(y_true) percentages = np.arange(start=1, stop=len(y_true) + 1) gains = gains / float(np.sum(y_true)) percentages = percentages / float(len(y_true)) gains = np.insert(gains, 0, [0]) percentages = np.insert(percentages, 0, [0]) return percentages, gains @staticmethod def _compute_lift_per_bin(y_true: np.ndarray, y_pred: np.ndarray, n_bins: int=10) -> tuple: """Compute lift of the model for a given number of bins, returns x-labels, lifts and the target incidence to create cumulative response curves. Parameters ---------- y_true : np.ndarray True binary target data labels. y_pred : np.ndarray Target scores of the model. n_bins : int, optional Defines the number of bins used to calculate the lift curve for (by default 10, so deciles). Returns ------- tuple Includes x-labels, lifts per decile, and target incidence. """ lifts = [ClassificationEvaluator._compute_lift(y_true=y_true, y_pred=y_pred, lift_at=perc_lift) for perc_lift in np.linspace(1/n_bins, 1, num=n_bins, endpoint=True)] x_labels = [len(lifts)-x for x in np.arange(0, len(lifts), 1)] return x_labels, lifts, y_true.mean() @staticmethod def _compute_lift(y_true: np.ndarray, y_pred: np.ndarray, lift_at: float=0.05) -> float: """Calculates lift given two arrays on specified level. Parameters ---------- y_true : np.ndarray True binary target data labels. y_pred : np.ndarray Target scores of the model. lift_at : float, optional At what top level percentage the lift should be computed. Returns ------- float Lift of the model. """ # Make sure it is numpy array y_true_ = np.array(y_true) y_pred_ = np.array(y_pred) # Make sure it has correct shape y_true_ = y_true_.reshape(len(y_true_), 1) y_pred_ = y_pred_.reshape(len(y_pred_), 1) # Merge data together y_data = np.hstack([y_true_, y_pred_]) # Calculate necessary variables nrows = len(y_data) stop = int(np.floor(nrows*lift_at)) avg_incidence = np.einsum("ij->j", y_true_)/float(len(y_true_)) # Sort and filter data data_sorted = (y_data[y_data[:, 1].argsort()[::-1]][:stop, 0] .reshape(stop, 1)) # Calculate lift (einsum is a very fast way of summing, but needs specific shape) inc_in_top_n = np.einsum("ij->j", data_sorted)/float(len(data_sorted)) lift = np.round(inc_in_top_n/avg_incidence, 2)[0] return lift
[docs]class RegressionEvaluator(): """Evaluator class encapsulating regression model metrics and plotting functionality. Attributes ---------- y_true : np.ndarray True binary target data labels. y_pred : np.ndarray Target scores of the model. scalar_metrics : dict Map containing various scalar evaluation metrics (R-squared, MAE, MSE, RMSE) qq : pd.Series Theoretical quantiles and associated actual residuals. """ def __init__(self): self.y_true = None self.y_pred = None # Placeholder to store fitted output self.scalar_metrics = None self.qq = None
[docs] def fit(self, y_true: np.ndarray, y_pred: np.ndarray): """Fit the evaluator by computing the relevant evaluation metrics on the inputs. Parameters ---------- y_true : np.ndarray True labels. y_pred : np.ndarray Model scores. """ # Compute the various evaluation metrics self.scalar_metrics = RegressionEvaluator._compute_scalar_metrics(y_true, y_pred) self.y_true = y_true self.y_pred = y_pred # Compute qq info self.qq = RegressionEvaluator._compute_qq_residuals(y_true, y_pred)
@staticmethod def _compute_scalar_metrics(y_true: np.ndarray, y_pred: np.ndarray) -> pd.Series: """Convenient function to compute various scalar performance measures and return them in a pd.Series. Parameters ---------- y_true : np.ndarray True binary target data labels. y_pred : np.ndarray Target scores of the model. Returns ------- pd.Series Contains various performance measures of the model, being: R-squared (coefficient of determination, usually denoted as R-squared) Mean absolute error (expected value of the absolute error loss) Mean squared error (expected value of the quadratic error) Root mean squared error (sqrt of expected value of the quadratic error) """ return pd.Series({ "R2": r2_score(y_true, y_pred), "MAE": mean_absolute_error(y_true, y_pred), "MSE": mean_squared_error(y_true, y_pred), "RMSE": sqrt(mean_squared_error(y_true, y_pred)) }) @staticmethod def _compute_qq_residuals(y_true: np.ndarray, y_pred: np.ndarray) -> pd.Series: """Convenience function to compute various scalar performance measures and return them in a pd.Series. Parameters ---------- y_true : np.ndarray True binary target data labels. y_pred : np.ndarray Target scores of the model. Returns ------- pd.Series Theoretical quantiles and associated actual residuals. """ ## also possible directly via statsmodels.api.qqplot() n = len(y_true) df = pd.DataFrame({"res": sorted((y_true - y_pred))}) # ascending order m, s = df["res"].mean(), df["res"].std() df["z_res"] = df["res"].apply(lambda x: (x-m)/s) df["rank"] = df.index+1 df["percentile"] = df["rank"].apply(lambda x: x/(n+1)) # divide by n+1 to avoid inf df["q_theoretical"] = norm.ppf(df["percentile"]) return pd.Series({ "quantiles": df["q_theoretical"].values, "residuals": df["z_res"].values, })
[docs] def plot_predictions(self, path: str=None, dim: tuple=(12, 8)): """Plot predictions from the model against actual values. Parameters ---------- path : str, optional Path to store the figure. dim : tuple, optional Tuple with width and length of the plot. """ if self.y_true is None and self.y_pred is None: msg = ("This {} instance is not fitted yet. Call 'fit' with " "appropriate arguments before using this method.") y_true = self.y_true y_pred = self.y_pred with plt.style.context("seaborn-whitegrid"): fig, ax = plt.subplots(figsize=dim) x = np.arange(1, len(y_true)+1) ax.plot(x, y_true, ls="--", label="actuals", color="darkorange", linewidth=3) ax.plot(x, y_pred, label="predictions", color="cornflowerblue", linewidth=3) ax.set_xlabel("Index", fontsize=15) ax.set_ylabel("Value", fontsize=15) ax.legend(loc="best") ax.set_title("Predictions vs. Actuals", fontsize=20) if path: plt.savefig(path, format="png", dpi=300, bbox_inches="tight") plt.show()
[docs] def plot_qq(self, path: str=None, dim: tuple=(12, 8)): """Display a Q-Q plot from the standardized prediction residuals. Parameters ---------- path : str, optional Path to store the figure. dim : tuple, optional Tuple with width and length of the plot. """ if self.qq is None: msg = ("This {} instance is not fitted yet. Call 'fit' with " "appropriate arguments before using this method.") raise NotFittedError(msg.format(self.__class__.__name__)) with plt.style.context("seaborn-whitegrid"): fig, ax = plt.subplots(figsize=dim) x = self.qq["quantiles"] y = self.qq["residuals"] ax.plot(x, x, ls="--", label="perfect model", color="darkorange", linewidth=3) ax.plot(x, y, label="current model", color="cornflowerblue", linewidth=3) ax.set_xlabel("Theoretical quantiles", fontsize=15) ax.set_xticks(range(int(np.floor(min(x))), int(np.ceil(max(x[x < float("inf")])))+1, 1)) ax.set_ylabel("Standardized residuals", fontsize=15) ax.set_yticks(range(int(np.floor(min(y))), int(np.ceil(max(y[x < float("inf")])))+1, 1)) ax.legend(loc="best") ax.set_title("Q-Q plot", fontsize=20) if path: plt.savefig(path, format="png", dpi=300, bbox_inches="tight") plt.show()