Source code for abydos.fingerprint._consonant

# Copyright 2019-2020 by Christopher C. Little.
# This file is part of Abydos.
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <>.


Taft's consonant coding

from itertools import groupby

from ._fingerprint import _Fingerprint

__all__ = ['Consonant']

[docs]class Consonant(_Fingerprint): """Consonant Coding Fingerprint. Based on the consonant coding from :cite:`Taft:1970`, variants 1, 2, 3, 1-D, 2-D, and 3-D. .. versionadded:: 0.4.1 """ def __init__(self, variant=1, doubles=True, vowels=None): """Initialize Consonant instance. Parameters ---------- variant : int Selects between Taft's 3 variants, which assign to the vowel set one of: 1. A, E, I, O, & U 2. A, E, I, O, U, W, & Y 3. A, E, I, O, U, W, H, & Y doubles : bool If set to False, multiple consonants in a row are conflated to a single instance. vowels : list, set, or str Setting vowels to a non-None value overrides the variant setting and defines the set of letters to be removed from the input. .. versionadded:: 0.4.1 """ super(_Fingerprint, self).__init__() self._vowels = vowels self._doubles = doubles if self._vowels is None: self._vowels = set('AEIOU') if variant > 1: self._vowels.add('W') self._vowels.add('Y') if variant > 2: self._vowels.add('H') else: self._vowels = {_.upper() for _ in self._vowels}
[docs] def fingerprint(self, word): """Return the consonant coding. Parameters ---------- word : str The word to fingerprint Returns ------- int The consonant coding Examples -------- >>> cf = Consonant() >>> cf.fingerprint('hat') 'HT' >>> cf.fingerprint('niall') 'NLL' >>> cf.fingerprint('colin') 'CLN' >>> cf.fingerprint('atcg') 'ATCG' >>> cf.fingerprint('entreatment') 'ENTRTMNT' .. versionadded:: 0.4.1 """ # uppercase word = word.upper() # remove repeats if in -D variant if not self._doubles: word = ''.join(char for char, _ in groupby(word)) # remove vowels word = word[:1] + ''.join(_ for _ in word[1:] if _ not in self._vowels) return word
if __name__ == '__main__': import doctest doctest.testmod()