Source code for rrgp.evaluator

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import sklearn
import tabulate
import os
import logging
import numpy as np
import matplotlib.pyplot as plt
from .database import transform_to_text_labels
from sklearn.utils.multiclass import unique_labels
from sklearn.metrics import (
    precision_score,
    recall_score,
    f1_score,
    accuracy_score,
    confusion_matrix,
    classification_report,
)
from sklearn.metrics import plot_confusion_matrix


[docs]def get_metrics_table(predictions, test_labels):
    """
    Generate a metrics table to evaluate predictions

    Parameters
    ----------

    predictions : array
        Predictions of a model
    test_labels : array
        Corresponding ground-truth

    Returns
    -------

    table : string
        Nicely formatted plain-text table with the computed metrics

    """

    # Compute metrics
    precision = precision_score(test_labels, predictions, average="micro")
    recall = recall_score(test_labels, predictions, average="micro")
    f1 = f1_score(test_labels, predictions, average="micro")
    accuracy = accuracy_score(test_labels, predictions)

    # Create table
    headers = ["Precision (avg)", "Recall (avg)", "F1 score (avg)", "Accuracy"]

    table = [[precision, recall, f1, accuracy]]

    return tabulate.tabulate(table, headers, tablefmt="rst", floatfmt=".3f")


[docs]def get_table_header(model_name, model):
    """
    Generate a header for the metrics table

    Parameters
    ----------

    model_name : str
        Type of model (svm or rf)
    model : object
        Trained model from which to get the parameters

    Returns
    -------

    header : string
        Nicely formatted text header

    """

    header = f"Model used: {str(model_name)}\n" + "Parameters:\n"

    if type(model) is sklearn.model_selection._search.GridSearchCV:
        model = model.best_estimator_

    if model_name == "svm":
        header += (
            f"-kernel: {model.kernel}\n"
            + f"-gamma: {model.gamma}\n"
            + f"-C: {model.C}\n"
        )
    elif model_name == "rf":
        header += (
            f"-n_estimators: {model.n_estimators}\n"
            + f"-max_depth: {model.max_depth}\n"
            + f"-min_samples_split: {model.min_samples_split}\n"
            + f"-min_samples_leaf: {model.min_samples_leaf}\n"
            + f"-bootstrap: {model.bootstrap}\n"
        )

    return header


[docs]def evaluate(predictions, test_data, test_labels, output_dir, model_name, model):
    """
    Evaluate the predictions given the ground-truth. Save a table with
    the metrics and a png file with the confusion matrix.

    Parameters
    ----------

    predictions : array
        Predictions of a model
    test_labels : array
        Corresponding ground-truth
    output_dir : str
        Folder name in which to save table and figure
    model_name : str
        Model type (svm or rf)
    model : object
        trained model

    """

    logging.info(f"Starting evaluation...")

    # Get the metrics table
    table = get_metrics_table(predictions, test_labels)

    # Append header to the table
    table = get_table_header(model_name, model) + table

    # Save the metrics table
    output_table = os.getcwd() + "/" + output_dir + "/table.rst"
    logging.info(f"Saving table at {output_table}")
    os.makedirs(os.path.dirname(output_table), exist_ok=True)
    with open(output_table, "wt") as f:
        f.write(table)

    # Numerical labels to text for the plot
    test_labels_txt = set(test_labels)
    classes_txt = transform_to_text_labels(np.array(list(test_labels_txt)))

    fig, ax = plt.subplots(figsize=(17, 15))

    # Get the normalized confusion matrix
    fig = plot_confusion_matrix(
        model,
        test_data,
        test_labels,
        normalize="true",
        cmap=plt.cm.Blues,
        display_labels=classes_txt,
        xticks_rotation="vertical",
        ax=ax,
    )

    fig.ax_.set(ylabel="True label", xlabel="Predicted label")

    title = f"Normalized confusion matrix\nModel: {model_name}"
    fig.ax_.set_title(title)

    # Save the confusion matrix
    output_figure = os.getcwd() + "/" + output_dir + "/confusion_matrix.png"
    logging.info(f"Saving confusion matrix at {output_figure}")
    plt.savefig(output_figure)