Source code for abydos.stats._confusion_table

# -*- coding: utf-8 -*-

# Copyright 2014-2018 by Christopher C. Little.
# This file is part of Abydos.
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <>.


This includes the ConfusionTable object, which includes members capable of
calculating the following data based on a confusion table:

    - population counts
    - precision, recall, specificity, negative predictive value, fall-out,
      false discovery rate, accuracy, balanced accuracy, informedness,
      and markedness
    - various means of the precision & recall, including: arithmetic,
      geometric, harmonic, quadratic, logarithmic, contraharmonic,
      identric (exponential), & Hölder (power/generalized) means
    - :math:`F_{\beta}`-scores, :math:`E`-scores, :math:`G`-measures, along
      with special functions for :math:`F_{1}`, :math:`F_{0.5}`, &
      :math:`F_{2}` scores
    - significance & Matthews correlation coefficient calculation

from __future__ import (

import math

from ._mean import (

__all__ = ['ConfusionTable']

[docs]class ConfusionTable(object): """ConfusionTable object. This object is initialized by passing either four integers (or a tuple of four integers) representing the squares of a confusion table: true positives, true negatives, false positives, and false negatives The object possesses methods for the calculation of various statistics based on the confusion table. """ _tp, _tn, _fp, _fn = 0, 0, 0, 0 def __init__(self, tp=0, tn=0, fp=0, fn=0): """Initialize ConfusionTable. Parameters ---------- tp : int or a tuple, list, or dict True positives; If a tuple or list is supplied, it must include 4 values in the order [tp, tn, fp, fn]. If a dict is supplied, it must have 4 keys, namely 'tp', 'tn', 'fp', & 'fn'. tn : int True negatives fp : int False positives fn : int False negatives Raises ------ AttributeError ConfusionTable requires a 4-tuple when being created from a tuple. Examples -------- >>> ct = ConfusionTable(120, 60, 20, 30) >>> ct == ConfusionTable((120, 60, 20, 30)) True >>> ct == ConfusionTable([120, 60, 20, 30]) True >>> ct == ConfusionTable({'tp': 120, 'tn': 60, 'fp': 20, 'fn': 30}) True """ if isinstance(tp, (tuple, list)): if len(tp) == 4: self._tp = tp[0] self._tn = tp[1] self._fp = tp[2] self._fn = tp[3] else: raise AttributeError( 'ConfusionTable requires a 4-tuple when being created ' + 'from a tuple.' ) elif isinstance(tp, dict): if 'tp' in tp: self._tp = tp['tp'] if 'tn' in tp: self._tn = tp['tn'] if 'fp' in tp: self._fp = tp['fp'] if 'fn' in tp: self._fn = tp['fn'] else: self._tp = tp self._tn = tn self._fp = fp self._fn = fn def __eq__(self, other): """Perform eqality (==) comparison. Compares a ConfusionTable to another ConfusionTable or its equivalent in the form of a tuple, list, or dict. Parameters ---------- other : ConfusionTable Another ConfusionTable object to compare to Returns ------- bool True if two ConfusionTables are the same object or all four of their attributes are equal Examples -------- >>> ct1 = ConfusionTable(120, 60, 20, 30) >>> ct2 = ConfusionTable(120, 60, 20, 30) >>> ct3 = ConfusionTable(60, 30, 10, 15) >>> ct1 == ct2 True >>> ct1 == ct3 False >>> ct1 != ct2 False >>> ct1 != ct3 True """ if isinstance(other, ConfusionTable): if id(self) == id(other): return True if ( self._tp == other.true_pos() and self._tn == other.true_neg() and self._fp == other.false_pos() and self._fn == other.false_neg() ): return True elif isinstance(other, (tuple, list)): if ( self._tp == other[0] and self._tn == other[1] and self._fp == other[2] and self._fn == other[3] ): return True elif isinstance(other, dict): if ( self._tp == other['tp'] and self._tn == other['tn'] and self._fp == other['fp'] and self._fn == other['fn'] ): return True return False def __str__(self): """Cast to str. Returns ------- str A human-readable version of the confusion table Example ------- >>> ct = ConfusionTable(120, 60, 20, 30) >>> str(ct) 'tp:120, tn:60, fp:20, fn:30' """ return 'tp:{}, tn:{}, fp:{}, fn:{}'.format( self._tp, self._tn, self._fp, self._fn )
[docs] def to_tuple(self): """Cast to tuple. Returns ------- tuple The confusion table as a 4-tuple (tp, tn, fp, fn) Example ------- >>> ct = ConfusionTable(120, 60, 20, 30) >>> ct.to_tuple() (120, 60, 20, 30) """ return self._tp, self._tn, self._fp, self._fn
[docs] def to_dict(self): """Cast to dict. Returns ------- dict The confusion table as a dict Example ------- >>> ct = ConfusionTable(120, 60, 20, 30) >>> import pprint >>> pprint.pprint(ct.to_dict()) {'fn': 30, 'fp': 20, 'tn': 60, 'tp': 120} """ return {'tp': self._tp, 'tn': self._tn, 'fp': self._fp, 'fn': self._fn}
[docs] def true_pos(self): """Return true positives. Returns ------- int The true positives of the confusion table Example ------- >>> ct = ConfusionTable(120, 60, 20, 30) >>> ct.true_pos() 120 """ return self._tp
[docs] def true_neg(self): """Return true negatives. Returns ------- int The true negatives of the confusion table Example ------- >>> ct = ConfusionTable(120, 60, 20, 30) >>> ct.true_neg() 60 """ return self._tn
[docs] def false_pos(self): """Return false positives. Returns ------- int The false positives of the confusion table Example ------- >>> ct = ConfusionTable(120, 60, 20, 30) >>> ct.false_pos() 20 """ return self._fp
[docs] def false_neg(self): """Return false negatives. Returns ------- int The false negatives of the confusion table Example ------- >>> ct = ConfusionTable(120, 60, 20, 30) >>> ct.false_neg() 30 """ return self._fn
[docs] def correct_pop(self): """Return correct population. Returns ------- int The correct population of the confusion table Example ------- >>> ct = ConfusionTable(120, 60, 20, 30) >>> ct.correct_pop() 180 """ return self._tp + self._tn
[docs] def error_pop(self): """Return error population. Returns ------- int The error population of the confusion table Example ------- >>> ct = ConfusionTable(120, 60, 20, 30) >>> ct.error_pop() 50 """ return self._fp + self._fn
[docs] def test_pos_pop(self): """Return test positive population. Returns ------- int The test positive population of the confusion table Example ------- >>> ct = ConfusionTable(120, 60, 20, 30) >>> ct.test_pos_pop() 140 """ return self._tp + self._fp
[docs] def test_neg_pop(self): """Return test negative population. Returns ------- int The test negative population of the confusion table Example ------- >>> ct = ConfusionTable(120, 60, 20, 30) >>> ct.test_neg_pop() 90 """ return self._tn + self._fn
[docs] def cond_pos_pop(self): """Return condition positive population. Returns ------- int The condition positive population of the confusion table Example ------- >>> ct = ConfusionTable(120, 60, 20, 30) >>> ct.cond_pos_pop() 150 """ return self._tp + self._fn
[docs] def cond_neg_pop(self): """Return condition negative population. Returns ------- int The condition negative population of the confusion table Example ------- >>> ct = ConfusionTable(120, 60, 20, 30) >>> ct.cond_neg_pop() 80 """ return self._fp + self._tn
[docs] def population(self): """Return population, N. Returns ------- int The population (N) of the confusion table Example ------- >>> ct = ConfusionTable(120, 60, 20, 30) >>> ct.population() 230 """ return self._tp + self._tn + self._fp + self._fn
[docs] def precision(self): r"""Return precision. Precision is defined as :math:`\frac{tp}{tp + fp}` AKA positive predictive value (PPV) Cf. Cf. Returns ------- float The precision of the confusion table Example ------- >>> ct = ConfusionTable(120, 60, 20, 30) >>> ct.precision() 0.8571428571428571 """ if self._tp + self._fp == 0: return float('NaN') return self._tp / (self._tp + self._fp)
[docs] def precision_gain(self): r"""Return gain in precision. The gain in precision is defined as: :math:`G(precision) = \frac{precision}{random~ precision}` Cf. Returns ------- float The gain in precision of the confusion table Example ------- >>> ct = ConfusionTable(120, 60, 20, 30) >>> ct.precision_gain() 1.3142857142857143 """ if self.population() == 0: return float('NaN') random_precision = self.cond_pos_pop() / self.population() return self.precision() / random_precision
[docs] def recall(self): r"""Return recall. Recall is defined as :math:`\frac{tp}{tp + fn}` AKA sensitivity AKA true positive rate (TPR) Cf. Cf. Cf. Returns ------- float The recall of the confusion table Example ------- >>> ct = ConfusionTable(120, 60, 20, 30) >>> ct.recall() 0.8 """ if self._tp + self._fn == 0: return float('NaN') return self._tp / (self._tp + self._fn)
[docs] def specificity(self): r"""Return specificity. Specificity is defined as :math:`\frac{tn}{tn + fp}` AKA true negative rate (TNR) Cf. Returns ------- float The specificity of the confusion table Example ------- >>> ct = ConfusionTable(120, 60, 20, 30) >>> ct.specificity() 0.75 """ if self._tn + self._fp == 0: return float('NaN') return self._tn / (self._tn + self._fp)
[docs] def npv(self): r"""Return negative predictive value (NPV). NPV is defined as :math:`\frac{tn}{tn + fn}` Cf. Returns ------- float The negative predictive value of the confusion table Example ------- >>> ct = ConfusionTable(120, 60, 20, 30) >>> ct.npv() 0.6666666666666666 """ if self._tn + self._fn == 0: return float('NaN') return self._tn / (self._tn + self._fn)
[docs] def fallout(self): r"""Return fall-out. Fall-out is defined as :math:`\frac{fp}{fp + tn}` AKA false positive rate (FPR) Cf. Returns ------- float The fall-out of the confusion table Example ------- >>> ct = ConfusionTable(120, 60, 20, 30) >>> ct.fallout() 0.25 """ if self._fp + self._tn == 0: return float('NaN') return self._fp / (self._fp + self._tn)
[docs] def fdr(self): r"""Return false discovery rate (FDR). False discovery rate is defined as :math:`\frac{fp}{fp + tp}` Cf. Returns ------- float The false discovery rate of the confusion table Example ------- >>> ct = ConfusionTable(120, 60, 20, 30) >>> ct.fdr() 0.14285714285714285 """ if self._fp + self._tp == 0: return float('NaN') return self._fp / (self._fp + self._tp)
[docs] def accuracy(self): r"""Return accuracy. Accuracy is defined as :math:`\frac{tp + tn}{population}` Cf. Returns ------- float The accuracy of the confusion table Example ------- >>> ct = ConfusionTable(120, 60, 20, 30) >>> ct.accuracy() 0.782608695652174 """ if self.population() == 0: return float('NaN') return (self._tp + self._tn) / self.population()
[docs] def accuracy_gain(self): r"""Return gain in accuracy. The gain in accuracy is defined as: :math:`G(accuracy) = \frac{accuracy}{random~ accuracy}` Cf. Returns ------- float The gain in accuracy of the confusion table Example ------- >>> ct = ConfusionTable(120, 60, 20, 30) >>> ct.accuracy_gain() 1.4325259515570934 """ if self.population() == 0: return float('NaN') random_accuracy = (self.cond_pos_pop() / self.population()) ** 2 + ( self.cond_neg_pop() / self.population() ) ** 2 return self.accuracy() / random_accuracy
[docs] def balanced_accuracy(self): r"""Return balanced accuracy. Balanced accuracy is defined as :math:`\frac{sensitivity + specificity}{2}` Cf. Returns ------- float The balanced accuracy of the confusion table Example ------- >>> ct = ConfusionTable(120, 60, 20, 30) >>> ct.balanced_accuracy() 0.775 """ return 0.5 * (self.recall() + self.specificity())
[docs] def informedness(self): """Return informedness. Informedness is defined as :math:`sensitivity + specificity - 1`. AKA Youden's J statistic (:cite:`Youden:1950`) AKA DeltaP' Cf. Returns ------- float The informedness of the confusion table Example ------- >>> ct = ConfusionTable(120, 60, 20, 30) >>> ct.informedness() 0.55 """ return self.recall() + self.specificity() - 1
[docs] def markedness(self): """Return markedness. Markedness is defined as :math:`precision + npv - 1` Returns ------- float The markedness of the confusion table Example ------- >>> ct = ConfusionTable(120, 60, 20, 30) >>> ct.markedness() 0.5238095238095237 """ return self.precision() + self.npv() - 1
[docs] def pr_amean(self): r"""Return arithmetic mean of precision & recall. The arithmetic mean of precision and recall is defined as: :math:`\frac{precision \cdot recall}{2}` Cf. Returns ------- float The arithmetic mean of the confusion table's precision & recall Example ------- >>> ct = ConfusionTable(120, 60, 20, 30) >>> ct.pr_amean() 0.8285714285714285 """ return amean((self.precision(), self.recall()))
[docs] def pr_gmean(self): r"""Return geometric mean of precision & recall. The geometric mean of precision and recall is defined as: :math:`\sqrt{precision \cdot recall}` Cf. Returns ------- float The geometric mean of the confusion table's precision & recall Example ------- >>> ct = ConfusionTable(120, 60, 20, 30) >>> ct.pr_gmean() 0.828078671210825 """ return gmean((self.precision(), self.recall()))
[docs] def pr_hmean(self): r"""Return harmonic mean of precision & recall. The harmonic mean of precision and recall is defined as: :math:`\frac{2 \cdot precision \cdot recall}{precision + recall}` Cf. Returns ------- float The harmonic mean of the confusion table's precision & recall Example ------- >>> ct = ConfusionTable(120, 60, 20, 30) >>> ct.pr_hmean() 0.8275862068965516 """ return hmean((self.precision(), self.recall()))
[docs] def pr_qmean(self): r"""Return quadratic mean of precision & recall. The quadratic mean of precision and recall is defined as: :math:`\sqrt{\frac{precision^{2} + recall^{2}}{2}}` Cf. Returns ------- float The quadratic mean of the confusion table's precision & recall Example ------- >>> ct = ConfusionTable(120, 60, 20, 30) >>> ct.pr_qmean() 0.8290638930598233 """ return qmean((self.precision(), self.recall()))
[docs] def pr_cmean(self): r"""Return contraharmonic mean of precision & recall. The contraharmonic mean is: :math:`\frac{precision^{2} + recall^{2}}{precision + recall}` Cf. Returns ------- float The contraharmonic mean of the confusion table's precision & recall Example ------- >>> ct = ConfusionTable(120, 60, 20, 30) >>> ct.pr_cmean() 0.8295566502463055 """ return cmean((self.precision(), self.recall()))
[docs] def pr_lmean(self): r"""Return logarithmic mean of precision & recall. The logarithmic mean is: 0 if either precision or recall is 0, the precision if they are equal, otherwise :math:`\frac{precision - recall} {ln(precision) - ln(recall)}` Cf. Returns ------- float The logarithmic mean of the confusion table's precision & recall Example ------- >>> ct = ConfusionTable(120, 60, 20, 30) >>> ct.pr_lmean() 0.8282429171492667 """ precision = self.precision() recall = self.recall() if not precision or not recall: return 0.0 elif precision == recall: return precision return (precision - recall) / (math.log(precision) - math.log(recall))
[docs] def pr_imean(self): r"""Return identric (exponential) mean of precision & recall. The identric mean is: precision if precision = recall, otherwise :math:`\frac{1}{e} \cdot \sqrt[precision - recall]{\frac{precision^{precision}} {recall^{recall}}}` Cf. Returns ------- float The identric mean of the confusion table's precision & recall Example ------- >>> ct = ConfusionTable(120, 60, 20, 30) >>> ct.pr_imean() 0.8284071826325543 """ return imean((self.precision(), self.recall()))
[docs] def pr_seiffert_mean(self): r"""Return Seiffert's mean of precision & recall. Seiffert's mean of precision and recall is: :math:`\frac{precision - recall}{4 \cdot arctan \sqrt{\frac{precision}{recall}} - \pi}` It is defined in :cite:`Seiffert:1993`. Returns ------- float Seiffert's mean of the confusion table's precision & recall Example ------- >>> ct = ConfusionTable(120, 60, 20, 30) >>> ct.pr_seiffert_mean() 0.8284071696048312 """ return seiffert_mean((self.precision(), self.recall()))
[docs] def pr_lehmer_mean(self, exp=2.0): r"""Return Lehmer mean of precision & recall. The Lehmer mean is: :math:`\frac{precision^{exp} + recall^{exp}} {precision^{exp-1} + recall^{exp-1}}` Cf. Parameters ---------- exp : float The exponent of the Lehmer mean Returns ------- float The Lehmer mean for the given exponent of the confusion table's precision & recall Example ------- >>> ct = ConfusionTable(120, 60, 20, 30) >>> ct.pr_lehmer_mean() 0.8295566502463055 """ return lehmer_mean((self.precision(), self.recall()), exp)
[docs] def pr_heronian_mean(self): r"""Return Heronian mean of precision & recall. The Heronian mean of precision and recall is defined as: :math:`\frac{precision + \sqrt{precision \cdot recall} + recall}{3}` Cf. Returns ------- float The Heronian mean of the confusion table's precision & recall Example ------- >>> ct = ConfusionTable(120, 60, 20, 30) >>> ct.pr_heronian_mean() 0.8284071761178939 """ return heronian_mean((self.precision(), self.recall()))
[docs] def pr_hoelder_mean(self, exp=2): r"""Return Hölder (power/generalized) mean of precision & recall. The power mean of precision and recall is defined as: :math:`\frac{1}{2} \cdot \sqrt[exp]{precision^{exp} + recall^{exp}}` for :math:`exp \ne 0`, and the geometric mean for :math:`exp = 0` Cf. Parameters ---------- exp : float The exponent of the Hölder mean Returns ------- float The Hölder mean for the given exponent of the confusion table's precision & recall Example ------- >>> ct = ConfusionTable(120, 60, 20, 30) >>> ct.pr_hoelder_mean() 0.8290638930598233 """ return hoelder_mean((self.precision(), self.recall()), exp)
[docs] def pr_agmean(self): """Return arithmetic-geometric mean of precision & recall. Iterates between arithmetic & geometric means until they converge to a single value (rounded to 12 digits) Cf. Returns ------- float The arithmetic-geometric mean of the confusion table's precision & recall Example ------- >>> ct = ConfusionTable(120, 60, 20, 30) >>> ct.pr_agmean() 0.8283250315702829 """ return agmean((self.precision(), self.recall()))
[docs] def pr_ghmean(self): """Return geometric-harmonic mean of precision & recall. Iterates between geometric & harmonic means until they converge to a single value (rounded to 12 digits) Cf. Returns ------- float The geometric-harmonic mean of the confusion table's precision & recall Example ------- >>> ct = ConfusionTable(120, 60, 20, 30) >>> ct.pr_ghmean() 0.8278323841238441 """ return ghmean((self.precision(), self.recall()))
[docs] def pr_aghmean(self): """Return arithmetic-geometric-harmonic mean of precision & recall. Iterates over arithmetic, geometric, & harmonic means until they converge to a single value (rounded to 12 digits), following the method described in :cite:`Raissouli:2009`. Returns ------- float The arithmetic-geometric-harmonic mean of the confusion table's precision & recall Example ------- >>> ct = ConfusionTable(120, 60, 20, 30) >>> ct.pr_aghmean() 0.8280786712108288 """ return aghmean((self.precision(), self.recall()))
[docs] def fbeta_score(self, beta=1.0): r"""Return :math:`F_{\beta}` score. :math:`F_{\beta}` for a positive real value :math:`\beta` "measures the effectiveness of retrieval with respect to a user who attaches :math:`\beta` times as much importance to recall as precision" (van Rijsbergen 1979) :math:`F_{\beta}` score is defined as: :math:`(1 + \beta^2) \cdot \frac{precision \cdot recall} {((\beta^2 \cdot precision) + recall)}` Cf. Parameters ---------- beta : float The :math:`\beta` parameter in the above formula Returns ------- float The :math:`F_{\beta}` of the confusion table Raises ------ AttributeError Beta must be a positive real value Examples -------- >>> ct = ConfusionTable(120, 60, 20, 30) >>> ct.fbeta_score() 0.8275862068965518 >>> ct.fbeta_score(beta=0.1) 0.8565371024734982 """ if beta <= 0: raise AttributeError('Beta must be a positive real value.') precision = self.precision() recall = self.recall() return ( (1 + beta ** 2) * precision * recall / ((beta ** 2 * precision) + recall) )
[docs] def f2_score(self): """Return :math:`F_{2}`. The :math:`F_{2}` score emphasizes recall over precision in comparison to the :math:`F_{1}` score Cf. Returns ------- float The :math:`F_{2}` of the confusion table Example ------- >>> ct = ConfusionTable(120, 60, 20, 30) >>> ct.f2_score() 0.8108108108108109 """ return self.fbeta_score(2.0)
[docs] def fhalf_score(self): """Return :math:`F_{0.5}` score. The :math:`F_{0.5}` score emphasizes precision over recall in comparison to the :math:`F_{1}` score Cf. Returns ------- float The :math:`F_{0.5}` score of the confusion table Example ------- >>> ct = ConfusionTable(120, 60, 20, 30) >>> ct.fhalf_score() 0.8450704225352114 """ return self.fbeta_score(0.5)
[docs] def e_score(self, beta=1): r"""Return :math:`E`-score. This is Van Rijsbergen's effectiveness measure: :math:`E=1-F_{\beta}`. Cf. Parameters ---------- beta : float The :math:`\beta` parameter in the above formula Returns ------- float The :math:`E`-score of the confusion table Example ------- >>> ct = ConfusionTable(120, 60, 20, 30) >>> ct.e_score() 0.17241379310344818 """ return 1 - self.fbeta_score(beta)
[docs] def f1_score(self): r"""Return :math:`F_{1}` score. :math:`F_{1}` score is the harmonic mean of precision and recall: :math:`2 \cdot \frac{precision \cdot recall}{precision + recall}` Cf. Returns ------- float The :math:`F_{1}` of the confusion table Example ------- >>> ct = ConfusionTable(120, 60, 20, 30) >>> ct.f1_score() 0.8275862068965516 """ return self.pr_hmean()
[docs] def f_measure(self): r"""Return :math:`F`-measure. :math:`F`-measure is the harmonic mean of precision and recall: :math:`2 \cdot \frac{precision \cdot recall}{precision + recall}` Cf. Returns ------- float The math:`F`-measure of the confusion table Example ------- >>> ct = ConfusionTable(120, 60, 20, 30) >>> ct.f_measure() 0.8275862068965516 """ return self.pr_hmean()
[docs] def g_measure(self): r"""Return G-measure. :math:`G`-measure is the geometric mean of precision and recall: :math:`\sqrt{precision \cdot recall}` This is identical to the Fowlkes–Mallows (FM) index for two clusters. Cf. Cf. Returns ------- float The :math:`G`-measure of the confusion table Example ------- >>> ct = ConfusionTable(120, 60, 20, 30) >>> ct.g_measure() 0.828078671210825 """ return self.pr_gmean()
[docs] def mcc(self): r"""Return Matthews correlation coefficient (MCC). The Matthews correlation coefficient is defined in :cite:`Matthews:1975` as: :math:`\frac{(tp \cdot tn) - (fp \cdot fn)} {\sqrt{(tp + fp)(tp + fn)(tn + fp)(tn + fn)}}` This is equivalent to the geometric mean of informedness and markedness, defined above. Cf. Returns ------- float The Matthews correlation coefficient of the confusion table Example ------- >>> ct = ConfusionTable(120, 60, 20, 30) >>> ct.mcc() 0.5367450401216932 """ if ( ( (self._tp + self._fp) * (self._tp + self._fn) * (self._tn + self._fp) * (self._tn + self._fn) ) ) == 0: return float('NaN') return ((self._tp * self._tn) - (self._fp * self._fn)) / math.sqrt( (self._tp + self._fp) * (self._tp + self._fn) * (self._tn + self._fp) * (self._tn + self._fn) )
[docs] def significance(self): r"""Return the significance, :math:`\chi^{2}`. Significance is defined as: :math:`\chi^{2} = \frac{(tp \cdot tn - fp \cdot fn)^{2} (tp + tn + fp + fn)} {((tp + fp)(tp + fn)(tn + fp)(tn + fn)}` Also: :math:`\chi^{2} = MCC^{2} \cdot n` Cf. Returns ------- float The significance of the confusion table Example ------- >>> ct = ConfusionTable(120, 60, 20, 30) >>> ct.significance() 66.26190476190476 """ if ( ( (self._tp + self._fp) * (self._tp + self._fn) * (self._tn + self._fp) * (self._tn + self._fn) ) ) == 0: return float('NaN') return ( (self._tp * self._tn - self._fp * self._fn) ** 2 * (self._tp + self._tn + self._fp + self._fn) ) / ( (self._tp + self._fp) * (self._tp + self._fn) * (self._tn + self._fp) * (self._tn + self._fn) )
[docs] def kappa_statistic(self): r"""Return κ statistic. The κ statistic is defined as: :math:`\kappa = \frac{accuracy - random~ accuracy} {1 - random~ accuracy}` The κ statistic compares the performance of the classifier relative to the performance of a random classifier. :math:`\kappa` = 0 indicates performance identical to random. :math:`\kappa` = 1 indicates perfect predictive success. :math:`\kappa` = -1 indicates perfect predictive failure. Returns ------- float The κ statistic of the confusion table Example ------- >>> ct = ConfusionTable(120, 60, 20, 30) >>> ct.kappa_statistic() 0.5344129554655871 """ if self.population() == 0: return float('NaN') random_accuracy = ( (self._tn + self._fp) * (self._tn + self._fn) + (self._fn + self._tp) * (self._fp + self._tp) ) / self.population() ** 2 return (self.accuracy() - random_accuracy) / (1 - random_accuracy)
if __name__ == '__main__': import doctest doctest.testmod()