Source code for abydos.phonetic._phonic

# Copyright 2019-2020 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.phonetic._phonic.

Phonic
"""

from ._phonetic import _Phonetic

__all__ = ['PHONIC']


[docs]class PHONIC(_Phonetic): """PHONIC code. PHONIC is a Soundex-like algorithm defined in :cite:`Taft:1970`. .. versionadded:: 0.4.1 """ _trans2 = { 'CH': '6', 'SH': '6', 'PH': '8', 'CE': '0', 'CI': '0', 'CY': '0', } _trans1 = { 'D': '1', 'T': '1', 'N': '2', 'M': '3', 'R': '4', 'L': '5', 'J': '6', 'C': '7', 'K': '7', 'G': '7', 'Q': '7', 'X': '7', 'F': '8', 'V': '8', 'B': '9', 'P': '9', 'S': '0', 'Z': '0', } _alphabetic = dict(zip((ord(_) for _ in '0123456789'), 'STNMRLJKFP')) def __init__(self, max_length=5, zero_pad=True, extended=False): """Initialize PHONIC instance. Parameters ---------- max_length : int The length of the code returned (defaults to 5) zero_pad : bool Pad the end of the return value with 0s to achieve a max_length string extended : bool If True, this uses Taft's 'Extended PHONIC coding' mode, which simply omits the first character of the code. .. versionadded:: 0.4.1 """ # Require a max_length of at least 5 and not more than 64 if max_length != -1: self._max_length = min(max(5, max_length), 64) else: self._max_length = 64 self._zero_pad = zero_pad self._extended = extended
[docs] def encode_alpha(self, word): """Return the alphabetic PHONIC code for a word. Parameters ---------- word : str The word to transform Returns ------- str The alphabetic PHONIC value Examples -------- >>> pe = PHONIC() >>> pe.encode_alpha('Christopher') 'JRSTF' >>> pe.encode_alpha('Niall') 'NL' >>> pe.encode_alpha('Smith') 'SMT' >>> pe.encode_alpha('Schmidt') 'SJMT' .. versionadded:: 0.4.1 """ save_pad = self._zero_pad save_ext = self._extended self._zero_pad = False self._extended = True code = self.encode(word) self._zero_pad = save_pad self._extended = save_ext return code.translate(self._alphabetic)
[docs] def encode(self, word): """Return the PHONIC code for a word. Parameters ---------- word : str The word to transform Returns ------- str The PHONIC code Examples -------- >>> pe = PHONIC() >>> pe.encode('Christopher') 'C6401' >>> pe.encode('Niall') 'N2500' >>> pe.encode('Smith') 'S0310' >>> pe.encode('Schmidt') 'S0631' .. versionadded:: 0.4.1 """ # uppercase word = word.upper() code = [] pos = 0 while pos < len(word): if word[pos : pos + 2] in self._trans2: code.append(self._trans2[word[pos : pos + 2]]) pos += 1 elif word[pos] in self._trans1: code.append(self._trans1[word[pos]]) else: code.append('.') pos += 1 code = ''.join(code) code = self._delete_consecutive_repeats(code) code = code.replace('.', '') if self._zero_pad: code += '0' * (self._max_length - 1 - len(code)) if not self._extended: code = word[:1] + code return code[: self._max_length]
if __name__ == '__main__': import doctest doctest.testmod()