Source code for scorer.scorer.scorer

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import numpy as np
import warnings

from scorer.lib.scorer import _scorer

__author__  = ['Nico Curti']
__email__   = ['nico.curti2@unibo.it']
__all__ = ['Scorer']


[docs]class Scorer (dict): ''' Multi-class score computation. This class represents an optimized and extended version of the PyCM_ library. The full list of scores are evaluated using C++ functions wrapped into a single score object. The evaluation of the score functions can be performed into a parallel environment using OMP multhithreading. The C++ code is in fact auto-generated using the scripts provided into the utils_ directory and the optimal dependency graph is computed to allow the work distribution among the available threads. Example ------- >>> from scorer import Scorer >>> >>> y_true = ['a', 'b', 'a', 'a', 'b', 'c', 'c', 'a', 'a', 'b', 'c', 'a'] >>> y_pred = ['b', 'b', 'a', 'c', 'b', 'a', 'c', 'b', 'a', 'b', 'a', 'a'] >>> >>> scorer = Scorer() >>> scorer.evaluate(y_true, y_pred) References ---------- - Haghighi, S., Jasemi, M., Hessabi, S. and Zolanvari, A. (2018). PyCM: Multiclass confusion matrix library in Python. Journal of Open Source Software, 3(25), p.729. .. _PyCM : https://github.com/sepandhaghighi/pycm .. _utils : https://github.com/Nico-Curti/scorer/blob/master/utils/ '''
[docs] def __init__ (self): ''' Default constructor ''' self._obj = _scorer() super(Scorer, self).__init__({})
[docs] def _check_params (self, true, pred): ''' Check input dimension shapes Parameters ---------- true : array-like True label array pred : array-like Predicted label array Notes ----- .. note:: The array of true labels and predicted ones mush have the same length. If the given arrays have different shapes a ValueError is raised. ''' if len(true) != len(pred): class_name = self.__class__.__name__ raise ValueError('{0}: wrong shapes found. ' 'Found {1} true labels and {2} predicted labels. ' 'Input arrays must have the same length'.format( class_name, len(true), len(pred)))
[docs] def _label2numbers (self, arr): ''' Convert labels to numerical values Parameters ---------- arr : array_like The array of labels Returns ------- numeric_labels : np.ndarray Array of numerical labels obtained by the LabelEncoder transform Notes ----- .. note:: The C++ function allows only numerical (integer) values as labels in input. For more general support refers to the C++ example. Examples -------- >>> from scorer import scorer >>> y = ('A', 'A', 'B', 'B') >>> num_y = scorer()._label2numbers(y) >>> print(num_y) [0, 0, 1, 1] ''' unique, numeric_labels = np.unique(arr, return_inverse=True) if unique.size <= 1: raise ValueError('The number of classes must be greater than 1') return numeric_labels.astype('int32'), unique
[docs] def evaluate (self, lbl_true, lbl_pred): ''' Evaluate scores of prediction labels vs true labels Parameters ---------- lbl_true : array-like List of true labels lbl_pred : array-like List of predicted labels Returns ------- self Examples -------- >>> from scorer import scorer >>> size = 10 >>> y_true = np.random.choice([0., 1.], p=[.5, .5], size=(size, )) >>> y_pred = np.random.choice([0., 1.], p=[.5, .5], size=(size, )) >>> >>> scorer = Scorer() >>> scorer.evaluate(y_true, y_pred) >>> >>> # Or using simple lists >>> >>> y_true = y_true.tolist() >>> y_pred = y_pred.tolist() >>> >>> scorer.evaluate(y_true, y_pred) Notes ----- .. note:: The score evaluation is possible only with integer labels. The input labels are encoded in integers using the C++ version of the label encoder (_label2numbers). ''' self._check_params(lbl_true, lbl_pred) y_true, true_names = self._label2numbers(lbl_true) y_pred, true_names = self._label2numbers(lbl_pred) # set contiguous order memory for c++ compatibility y_true = np.ascontiguousarray(y_true) y_pred = np.ascontiguousarray(y_pred) score = self._obj.evaluate(y_true, y_pred, len(y_true)) self.update(score) with warnings.catch_warnings(): warnings.simplefilter("ignore") self['Confusion Matrix'] = np.reshape(self['Confusion Matrix'], newshape=(len(self['Classes']), len(self['Classes']))) self['Classes'] = true_names return self
@property def score (self): ''' Return the score list as dictionary. ''' return self @property def num_classes (self): ''' Return the number of classes identified. If the scores are not yet evaluated the return value is 0. ''' return len(self['Classes']) if 'Classes' in self else 0 @property def _get_available_metrics (self): ''' Get the dictionary of available metrics in a more manageable format. ''' return self._obj.get_available_metrics
[docs] def __getattr__ (self, stat): ''' Access to score stats as attribute Parameters ---------- stat: name Name of the score Examples -------- >>> from scorer import scorer >>> size = 10 >>> y_true = np.random.choice([0., 1.], p=[.5, .5], size=(size, )) >>> y_pred = np.random.choice([0., 1.], p=[.5, .5], size=(size, )) >>> >>> scorer = Scorer() >>> scorer.evaluate(y_true, y_pred) >>> print(scorer.ACC, scorer.TP, scorer.FP) Notes ----- .. note:: In many cases the string related to the score is very long and it includes information about the mathematical meaning of that score. To facilitate the usage of the class the search of the attributes is performed using a "regex" search. In this way it is possible to access member values as in the following example .. code-block:: python np.testing.assert_allclose(scorer['ACC(Accuracy)'], scorer.ACC) np.testing.assert_allclose(scorer['FP(False positive/type 1 error/false alarm)'], scorer.FP) np.testing.assert_allclose(scorer['TOP(Test outcome positive)'], scorer.TOP) np.testing.assert_allclose(scorer['FDR(False discovery rate)'], scorer.FDR) If the attribute is not found an AttributeError is raised. ''' if stat in self._get_available_metrics: stat = self._get_available_metrics[stat] return self[stat] elif stat in self: return self[stat] else: for x in sorted(self.keys()): y = x.split('(')[0] y = y.replace('%', '') y = y.replace(' ', '_') y = y.replace('-', '_') if y == stat: return super(Scorer, self).__getitem__(x) else: raise AttributeError('Attribute {} not found'.format(stat))
[docs] def __getitem__ (self, stat): ''' Get the value of the required score Parameters ---------- stat: str Name of the score Examples -------- >>> from scorer import scorer >>> size = 10 >>> y_true = np.random.choice([0., 1.], p=[.5, .5], size=(size, )) >>> y_pred = np.random.choice([0., 1.], p=[.5, .5], size=(size, )) >>> >>> scorer = Scorer() >>> scorer.evaluate(y_true, y_pred) >>> >>> print(scorer['accuracy_score']) Notes ----- .. note:: The search of the score name is performed using the key name of the dictionary. This function is different from __getattr__. ''' if not len(self): class_name = self.__class__.__name__ raise ValueError('{0}: score not computed yet. ' 'Please use the "evaluate" method before'.format( class_name)) try: return super(Scorer, self).__getitem__(stat) except KeyError: stat = self._get_available_metrics[stat] return self[stat] except KeyError: class_name = self.__class__.__name__ raise KeyError('{0}: statistic not found. ' 'Available statistics are {1}'.format( class_name, ','.join(self.keys())))
[docs] def __setitem__ (self, stat, values): ''' Set a score variable. Parameters ---------- stat: str Key as name of the new score values: float or list Value(s) of the new score Examples -------- >>> from scorer import scorer >>> size = 10 >>> y_true = np.random.choice([0., 1.], p=[.5, .5], size=(size, )) >>> y_pred = np.random.choice([0., 1.], p=[.5, .5], size=(size, )) >>> >>> scorer = Scorer() >>> scorer.evaluate(y_true, y_pred) >>> >>> scorer['dummy'] = 'dummy' UserWarning: Setting new statistics does not enable the computation of the dependencies ''' warnings.warn(UserWarning('Setting new statistics does not enable' 'the computation of the dependencies')) super(Scorer, self).__setitem__(stat, values)
def __repr__ (self): ''' Object representation ''' return str(self._obj)
[docs] def __str__ (self): ''' Print the object as table of scores ''' fmt = '' fmt += 'Classes: {}\n'.format(', '.join(['{}'.format(c) for c in self['Classes']])) fmt += 'Confusion Matrix:\n' fmt += '\n'.join([''.join(['{:4}'.format(item) for item in row]) for row in self['Confusion Matrix']]) fmt += '\n\nClass Statistics:\n\n' numeric_fmt = ' '.join(['{:>20.3f}' for _ in range(len(self['Classes']))]) array_fmt = ' '.join(['{:>20}' for _ in range(len(self['Classes']))]) for k, v in self.items(): if isinstance(v, list) and k not in ['Classes', 'Confusion Matrix']: try: fmt += '{name:<80} {value}\n'.format(**{'name' : k, 'value' : numeric_fmt.format(*v)}) except ValueError: fmt += '{name:<80} {value}\n'.format(**{'name' : k, 'value' : array_fmt.format(*v)}) fmt += '\nOverall Statistics:\n\n' for k, v in self.items(): if not isinstance(v, list) and k not in ['Classes', 'Confusion Matrix']: try: fmt += '{name:<80} {value:.3f}\n'.format(**{'name' : k, 'value' : v}) except (ValueError, TypeError): fmt += '{name:<80} {value}\n'.format(**{'name' : k, 'value' : v}) return fmt
if __name__ == '__main__': y_true = [2, 0, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2] # y_true = np.array([2, 0, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2], dtype=np.int32) y_pred = [0, 0, 2, 1, 0, 2, 1, 0, 2, 0, 2, 2] # y_pred = np.array([0, 0, 2, 1, 0, 2, 1, 0, 2, 0, 2, 2], dtype=np.int32) scorer = Scorer() scorer.evaluate(y_true, y_pred) print(scorer)