evaluation.py

from utils.profile import profile
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import json
from tqdm import tqdm
from decimal import Decimal
import numpy as np
from sklearn.utils.multiclass import unique_labels
from sklearn.metrics import fbeta_score, confusion_matrix, precision_score, recall_score, multilabel_confusion_matrix
from utils.utilitary_mtl import fmeasure
from utils.f_scores import F2Score
from utils.opportunity import opportunity_select_channels_tf
from dataio.opportunity.opportunity_adapter import opportunity_reader
import tensorflow as tf
import os
import sys
import math
import argparse


def construct_parser():
    def int_list(s): return [int(item) for item in s.strip().split(',')]
    parser = argparse.ArgumentParser()
    parser.add_argument(
        'labels', help='the labels to evaluate against', type=int_list)

    subparsers = parser.add_subparsers(title="Operation", dest="op")

    subparsers.required = True

    modelparser = subparsers.add_parser(
        'model', help='Evaluate a tf.keras model')
    modelparser.add_argument('hdf5', help='input model file', type=str)
    modelparser.add_argument(
        '--from-config', help='If passed, load config and weights seperately. Must pass the name of the hdf5 file with extensions .hdf5 or .h5, and json file must have same name with extension .json if passed.', action='store_true')
    modelparser.add_argument('--data', help='input dataset dir', type=str,
                             default='/data/opportunity/window_labels-mtl-tf/all_sensors/64')
    modelparser.add_argument('--out', help='output directory', type=str,
                             default='/models/OPPORTUNITY/MTL-HPS/evaluation')
    modelparser.add_argument('--batch-size', type=int, default=500)

    evalparser = subparsers.add_parser(
        'eval', help='Plot from a given evaluation file generated by this utility or the included callback')
    evalparser.add_argument('evaljson', help='Path to the evaluation file')
    evalparser.add_argument('tag', help='A tag to prefix to the output files')
    evalparser.add_argument('outdir', help='where to save the output files')

    masterevalparser = subparsers.add_parser(
        'mastereval', help='Compare multiple models with each other, by generating a CSV file with comparisons that will be plotted to a facetted lmplot')
    masterevalparser.add_argument(
        'masterevalfile', type=str, help='the filename and path of the master evaluation csv and png file')
    masterevalparser.add_argument(
        'evaljson', type=str, help='evaluation json to read into the master evaluation csv')
    masterevalparser.add_argument(
        'modelname', type=str, help='the model name to be used in graphs')
    masterevalparser.add_argument('--plotonly', action='store_true')
    masterevalparser.add_argument('--dpi', default=180, type=int)

    return parser


if __name__ == "__main__":
    parser = construct_parser()
    args = parser.parse_args()

    print("Command line arguments:")
    for arg in vars(args):
        print(f'  {arg} : {getattr(args, arg)}')
    print("\n")


class EvaluationCallback(tf.keras.callbacks.Callback):

    def __init__(self, dataset, label_names, num_classes, base_out_dir):
        self.evaluator = Evaluator(label_names, num_classes)
        self.dataset = dataset
        self.label_names = label_names
        self.num_classes = num_classes
        self.base_out_dir = base_out_dir

    @profile
    def do_eval(self, epoch):
        outdir = os.path.join(self.base_out_dir)
        if not os.path.isdir(outdir):
            os.makedirs(outdir)

        prefix = f"ep{epoch + 1}"
        print()
        print('Commencing evaluation')

        self.evaluator.evaluate_model(
            self.model, self.dataset,
            self.label_names, self.num_classes,
            outdir,
            prefix=prefix
        )

        tf.keras.backend.clear_session()
        print("tf.keras session cleared")

        self.evaluator.save_evaluation(outdir, prefix=prefix)
        

    def on_epoch_end(self, epoch, logs=None):
        self.do_eval(epoch)


class Evaluator:

    def __init__(self, label_names=None, num_classes=None):
        self.modes = ['micro', 'macro', 'weighted', None]
        self.betas = [1]
        if label_names is not None and num_classes is not None:
            self.initialize(label_names, num_classes)

    def initialize(self, label_names, num_classes):
        self.label_names = label_names
        self.num_classes = num_classes
        self.evaluation = []

    def load_evaluation(self, args):
        with open(args.evaljson, "r") as jf:
            self.evaluation = json.load(jf)

    def master_evaluate(self, args):
        if not os.path.isfile(args.masterevalfile) and args.plotonly:
            raise FileNotFoundError(
                f"The file {args.masteronly} could not be found, so plotting is not possible")
        if args.plotonly:
            self.mastereval = pd.read_csv(
                filepath_or_buffer=args.masterevalfile)
            print(f"Read {args.masterevalfile}")
        evaldf = self.make_evaluation_dataframe(args.modelname)
        if os.path.isfile(args.masterevalfile) and not args.plotonly:
            self.mastereval = pd.read_csv(
                filepath_or_buffer=args.masterevalfile)
            print(f"Read {args.masterevalfile}")
            self.mastereval = self.mastereval.append(evaldf)
            self.mastereval.to_csv(path_or_buf=args.masterevalfile,
                                   index=False)
            print(f"Appended {args.masterevalfile}")
        if not os.path.isfile(args.masterevalfile) and not args.plotonly:
            self.mastereval = evaldf
            self.mastereval.to_csv(
                path_or_buf=args.masterevalfile,
                index=False)
            print(f'Wrote {args.masterevalfile}')

        #p = self.plot_metrics_plot_mastereval(args)
        #pname = os.path.join(os.path.dirname(
        #    args.masterevalfile), f"{os.path.basename(args.masterevalfile)}.png")
        #p.savefig(pname, dpi=args.dpi)
        #print(f"Wrote to {pname}")

    def plot_metrics_plot_mastereval(self, args):
        p = sns.lmplot(data=self.mastereval,
                       x="Epoch",
                       y="Metric value",
                       col="Label channel",
                       hue="Model name",
                       col_wrap=3,
                       truncate=True,
                       lowess=True,
                       markers='.',
                       sharex=False,
                       sharey=False,
                       #line_kws={"lw": 1.25},
                       #scatter_kws={"s" : 4}
                       )
        p.set(ylim=(0.7, 0.93))
        return p

    def save_evaluation(self, outdir, prefix=None):
        def _convert(o):
            if isinstance(o, np.int64):
                return int(o)
            if isinstance(o, np.ndarray):
                if o.dtype == np.dtype('float64'):
                    return o.astype('float32').tolist()
                return o.tolist()
            raise TypeError
        if prefix is None:
            dest_name = os.path.join(outdir, 'eval.json')
        else:
            dest_name = os.path.join(outdir, f"{prefix}_eval.json")
        with open(dest_name, 'w') as f:
            json.dump(self.evaluation, f, indent=2, default=_convert)
        print(f'Wrote evaluation data to {dest_name}')

    def make_evaluation_dataframe(self, modelname=None):
        metric_name = "fbeta"
        average_names = ["micro", "macro", "weighted"]
        fbeta_names = [
            f"f{int(beta)}-{av}" for beta in self.betas for av in average_names]
        allowed_metric_names = fbeta_names

        data = {
            "Label channel": [],
            "Epoch": [],
            "Metric": [],
            "Metric value": []
        }

        if modelname is not None:
            data["Model name"] = []

        for eval_epoch, eval_data in enumerate(self.evaluation):
            met_data = eval_data[metric_name]
            for ln in self.label_names:
                met_data_label = met_data[ln]
                for av in met_data_label.keys():
                    if av in allowed_metric_names:
                        data["Metric value"].append(met_data_label[av])
                        data["Metric"].append(av)
                        data["Label channel"].append(ln)
                        data["Epoch"].append(eval_epoch + 1)
                        if modelname is not None:
                            data["Model name"].append(modelname)

        data = pd.DataFrame(data)
        print(data)

        return data

    def plot_metrics_plot_single_eval(self):
        # fig = plt.figure(figsize=(10 * len(self.label_names), 10))

        data = self.make_evaluation_dataframe()

        # plot = sns.relplot(data=data, x='Epoch', y='Metric value',
        #                    col='Label Channel', hue='Metric', style='Metric',
        #                    kind='line', col_wrap=2, markers=True,
        #                    height=10, aspect=1.5)
        plot = sns.lmplot(x="Epoch", y="Metric value", data=data,
                          hue="Label Channel", order=4,
                          height=10, aspect=1.5,
                          truncate=True,
                          ci=95, scatter=True)
        x = data["Epoch"]
        xint = range(min(x), math.ceil(max(x))+1)
        plt.xticks(xint)  # , rotation=30)

        return plot, data

    def save_metrics_plot(self, outdir, prefix=None):
        if prefix is None:
            dest_name = os.path.join(outdir, 'metrics.png')
            csvname = os.path.join(outdir, 'metrics-sklearn.csv')
        else:
            dest_name = os.path.join(outdir, f"{prefix}_metrics.png")
            csvname = os.path.join(outdir, f'{prefix}_metrics-sklearn.csv')

        fig, dataframe = self.plot_metrics_plot_single_eval()
        fig.savefig(dest_name, dpi=320)
        dataframe.to_csv(path_or_buf=csvname, index=False)
        print(f'Wrote metrics plot to {dest_name}')

    def load_test_data(self, args):
        label_names, num_classes = opportunity_select_channels_tf(args.labels)
        all_label_names, _ = opportunity_select_channels_tf(list(range(7)))
        print(f"Loading dataset from {args.data}")

        test_file_criteria = ["ADL4", "ADL5"]
        test_files = []
        filelist = os.listdir(args.data)
        for fn in filelist:
            if not fn.find(".tfrecords"):
                continue
            is_test = any([fn.find(c) > 0 for c in test_file_criteria])
            if is_test:
                test_files.append(os.path.join(args.data, fn))

        test_dataset = opportunity_reader(
            test_files[0:1],
            all_label_names=all_label_names,
            selected_label_names=label_names,
            number_classes=num_classes,
            validation=True)

        test_dataset = test_dataset.batch(args.batch_size, drop_remainder=True)

        self.initialize(label_names, num_classes)

        return test_dataset, label_names, num_classes

    def load_model(self, args):
        if args.from_config:
            if (args.hdf5.find('hdf5') > -1) or (args.hdf5.find('h5') > -1):
                jsonname = args.hdf5.replace('_weights.h5', '.json').replace(
                    '.hdf5', '.json').replace('.h5', '.json')
                hdf5name = args.hdf5
                print(f"Loading model config from {jsonname}")
                with open(jsonname, 'r') as jf:
                    config = jf.read()
                    model = tf.keras.models.model_from_json(config)
                print(f'Loading model weights from {hdf5name}')
                model.load_weights(hdf5name)
            else:
                raise AttributeError(
                    "Please pass the name of the HDF5 file with extension '.hdf5' or '.h5', not the '.json' file, when loading from config")
        else:
            print(f'Loading model weights and config from {args.hdf5}')
            model = tf.keras.models.load_model(args.hdf5)

        print()
        model.summary()

        return model

    def save_confusion_matrix(self, y_true, y_pred, num_classes, label_name, outdir, prefix=None):
        cm = confusion_matrix(y_true, y_pred)
        ax = self.plot_confusion_matrix(
            cm,
            np.array(list(range(num_classes))),
            normalize=True, title=label_name)
        if prefix is not None:
            dest_path = os.path.join(
                outdir, f'{prefix}_confusion_{label_name}.png')
        else:
            dest_path = os.path.join(outdir, f'confusion_{label_name}.png')
        plt.savefig(dest_path)
        print(f'Wrote confusion matrix {dest_path}')

    def plot_confusion_matrix(self,
                              cm,
                              classes,
                              normalize=False,
                              title=None,
                              verbose=False,
                              cmap=plt.cm.Blues):
        """
        This function prints and plots the confusion matrix.
        Normalization can be applied by setting `normalize=True`.
        """
        if not title:
            if normalize:
                title = 'Normalized confusion matrix'
            else:
                title = 'Confusion matrix, without normalization'

        # Compute confusion matrix
        # cm = confusion_matrix(y_true, y_pred)
        # Only use the labels that appear in the data
        # classes = classes[unique_labels(y_true, y_pred)]
        if normalize:
            s = cm.sum(axis=1)[:, np.newaxis]
            cm = np.divide(cm.astype('float'), s, where=s != 0)

        if verbose and normalize:
            print("Normalized confusion matrix")
        elif verbose and not normalize:
            print('Confusion matrix, without normalization')

        if verbose:
            print(cm)

        fig = plt.figure(figsize=(10, 10), dpi=160)
        ax = plt.gca()
        im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
        ax.figure.colorbar(im, ax=ax)
        # We want to show all ticks...
        ax.set(xticks=np.arange(cm.shape[1]),
               yticks=np.arange(cm.shape[0]),
               # ... and label them with the respective list entries
               xticklabels=classes, yticklabels=classes,
               title=title,
               ylabel='True label',
               xlabel='Predicted label')

        # Rotate the tick labels and set their alignment.
        plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
                 rotation_mode="anchor")

        # Loop over data dimensions and create text annotations.
        fmt = '.2f' if normalize else 'd'
        thresh = cm.max() / 2.
        for i in range(cm.shape[0]):
            for j in range(cm.shape[1]):
                ax.text(j, i, format(cm[i, j], fmt),
                        ha="center", va="center",
                        color="white" if cm[i, j] > thresh else "black")
        fig.tight_layout()
        return ax

    def evaluate_model(self, model, dataset, label_names, num_classes, outdir, prefix=None, beta=1.0, write_confusion=False):
        confusion_matrices = {label_names[i]: np.zeros(
            (nc, nc), dtype=int) for i, nc in enumerate(num_classes)}

        for x, y_true_all in tqdm(iter(dataset), file=sys.stdout):
            y_pred_all = model.predict(x)
            for li, ln in enumerate(label_names):
                if not type(y_pred_all) is list:
                    y_pred = y_pred_all
                else:
                    y_pred = y_pred_all[li]

                y_true = y_true_all[li]
                y_true_a = tf.math.argmax(y_true, axis=1)
                y_pred_a = np.argmax(y_pred, axis=1)

                confusion_matrices[ln] += confusion_matrix(
                    y_true_a, y_pred_a, labels=list(range(self.num_classes[li])))

                # for i in range(len(y_pred_a)):
                #    y_t = int(y_true_a[i])
                #    y_p = int(y_pred_a[i])
                #    confusion_matrices[ln][y_t, y_p] += 1

        multilabel_confision_matrices = {ln: self.cmat_to_mlcmat(
            confusion_matrices[ln]) for ln in self.label_names}

        precisions = {ln: {} for ln in self.label_names}
        recalls = {ln: {} for ln in self.label_names}
        fmeasure = {ln: {} for ln in self.label_names}

        for li, ln in enumerate(self.label_names):
            labels = list(range(self.num_classes[li]))
            for mode in self.modes:
                prec, rec, _, _ = self.precision_recall_fscore_support(
                    multilabel_confision_matrices[ln], labels=labels,
                    average=mode)
                mode_text = "none" if mode is None else mode
                precisions[ln][mode_text] = prec
                recalls[ln][mode_text] = rec
                for beta in self.betas:
                    _, _, fb, _ = self.precision_recall_fscore_support(
                        multilabel_confision_matrices[ln], labels=labels,
                        beta=beta,
                        average=mode
                    )
                    fb_mode_text = f"f{int(beta)}-{mode_text}"
                    fmeasure[ln][fb_mode_text] = fb

        eval = {
            # 'confusion': {ln: cm.tolist() for ln, cm in confusion_matrices.items()},
            # 'confusion-ml': multilabel_confision_matrices,
            'precision': precisions,
            'recall': recalls,
            'fbeta': fmeasure
        }
        if prefix is not None:
            eval['prefix'] = prefix
        if write_confusion:
            eval['confusion'] = confusion_matrices
            eval['confusion-ml'] = multilabel_confision_matrices

        self.evaluation.append(eval)

        return eval

    def cmat_to_mlcmat(self, cmat):
        # layout is:
        # tn fn
        # fp tp
        num_classes = cmat.shape[1]
        mlc = np.zeros((num_classes, 2, 2), dtype=int)
        for label in range(num_classes):
            tp = cmat[label, label]
            a = set(range(num_classes))
            a.remove(label)
            a = [(x, y) for x in a for y in a]
            tn = np.sum([cmat[y] for y in a])
            fp = np.sum(cmat[label, :]) - tp
            fn = np.sum(cmat[:, label]) - tp
            mlc[label, 1, 1] = tp
            mlc[label, 0, 0] = tn
            mlc[label, 0, 1] = fn
            mlc[label, 1, 0] = fp
        return mlc

    def precision_recall_fscore_support(self,
                                        MCM, beta=1.0, labels=None,
                                        pos_label=1, average=None,
                                        warn_for=('precision', 'recall',
                                                  'f-score'),
                                        sample_weight=None):
        """Adapted from SciKit Learn, Source: https://github.com/scikit-learn/scikit-learn/blob/1495f6924/sklearn/metrics/classification.py#L1263

        This variant allows for passing in multi-label confusion matrices that have been pre-calculated, as opposed to arrays of predictions and ground truths"""

        tp_sum = MCM[:, 1, 1]
        pred_sum = tp_sum + MCM[:, 0, 1]
        true_sum = tp_sum + MCM[:, 1, 0]

        if average == 'micro':
            tp_sum = np.array([tp_sum.sum()])
            pred_sum = np.array([pred_sum.sum()])
            true_sum = np.array([true_sum.sum()])

        # Finally, we have all our sufficient statistics. Divide! #
        beta2 = beta ** 2

        # Divide, and on zero-division, set scores to 0 and warn:

        precision = self._prf_divide(tp_sum, pred_sum,
                                     'precision', 'predicted', average, warn_for)
        recall = self._prf_divide(tp_sum, true_sum,
                                  'recall', 'true', average, warn_for)
        # Don't need to warn for F: either P or R warned, or tp == 0 where pos
        # and true are nonzero, in which case, F is well-defined and zero
        denom = beta2 * precision + recall
        denom[denom == 0.] = 1  # avoid division by 0
        f_score = (1 + beta2) * precision * recall / denom

        # Average the results
        if average == 'weighted':
            weights = true_sum
            if weights.sum() == 0:
                return 0, 0, 0, None
        elif average == 'samples':
            weights = sample_weight
        else:
            weights = None

        if average is not None:
            # assert average != 'binary' or len(precision) == 1
            precision = np.average(precision, weights=weights)
            recall = np.average(recall, weights=weights)
            f_score = np.average(f_score, weights=weights)
            true_sum = None  # return no support

        return precision, recall, f_score, true_sum

    def _prf_divide(self, numerator, denominator, metric, modifier, average, warn_for):
        """Performs division and handles divide-by-zero.
        On zero-division, sets the corresponding result elements to zero
        and raises a warning.
        The metric, modifier and average arguments are used only for determining
        an appropriate warning.
        """
        mask = denominator == 0.0
        denominator = denominator.copy()
        denominator[mask] = 1  # avoid infs/nans
        result = numerator / denominator
        if not np.any(mask):
            return result

        # build appropriate warning
        # E.g. "Precision and F-score are ill-defined and being set to 0.0 in
        # labels with no predicted samples"
        axis0 = 'sample'
        axis1 = 'label'
        if average == 'samples':
            axis0, axis1 = axis1, axis0

        if metric in warn_for and 'f-score' in warn_for:
            msg_start = '{0} and F-score are'.format(metric.title())
        elif metric in warn_for:
            msg_start = '{0} is'.format(metric.title())
        elif 'f-score' in warn_for:
            msg_start = 'F-score is'
        else:
            return result

        msg = ('{0} ill-defined and being set to 0.0 {{0}} '
               'no {1} {2}s.'.format(msg_start, modifier, axis0))
        if len(mask) == 1:
            msg = msg.format('due to')
        else:
            msg = msg.format('in {0}s with'.format(axis1))
        return result


if __name__ == "__main__":

    sns.set()
    sns.set_style("whitegrid")
    sns.set_context("paper")

    evaluator = Evaluator()

    if args.op == 'model':
        dataset, label_names, num_classes = evaluator.load_test_data(args)
        model = evaluator.load_model(args)
        if args.out is not None:
            outdir = os.path.dirname(args.hdf5)
        else:
            outdir = args.out
        evaluator.evaluate_model(
            model, dataset, label_names, num_classes, outdir)
        evaluator.save_evaluation(outdir, "model_evaluation")
        eval_name = 'model_evaluation'
    elif args.op == 'eval':
        label_names, num_classes = opportunity_select_channels_tf(args.labels)
        evaluator.initialize(label_names, num_classes)
        evaluator.load_evaluation(args)
        outdir = args.outdir
        if not os.path.isdir(outdir):
            os.makedirs(outdir)
            print(f"Created directory {outdir}")
        eval_name = args.tag
    elif args.op == 'mastereval':
        label_names, num_classes = opportunity_select_channels_tf(args.labels)
        evaluator.initialize(label_names, num_classes)
        evaluator.load_evaluation(args)
        evaluator.master_evaluate(args)

    if not args.op == 'mastereval':
        evaluator.save_metrics_plot(outdir, eval_name)