Source code for abydos.phonetic._koelner

# Copyright 2014-2020 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.phonetic._koelner.

Kölner Phonetik
"""

from unicodedata import normalize as unicode_normalize

from deprecation import deprecated

from ._phonetic import _Phonetic
from .. import __version__

__all__ = [
    'Koelner',
    'koelner_phonetik',
    'koelner_phonetik_alpha',
    'koelner_phonetik_num_to_alpha',
]


[docs]class Koelner(_Phonetic): """Kölner Phonetik. Based on the algorithm defined by :cite:`Postel:1969`. .. versionadded:: 0.3.6 """ _uc_v_set = set('AEIOUJY') _num_trans = dict(zip((ord(_) for _ in '012345678'), 'APTFKLNRS')) _num_set = set('012345678')
[docs] def encode(self, word): """Return the Kölner Phonetik (numeric output) code for a word. While the output code is numeric, it is still a str because 0s can lead the code. Parameters ---------- word : str The word to transform Returns ------- str The Kölner Phonetik value as a numeric string Example ------- >>> pe = Koelner() >>> pe.encode('Christopher') '478237' >>> pe.encode('Niall') '65' >>> pe.encode('Smith') '862' >>> pe.encode('Schmidt') '862' >>> pe.encode('Müller') '657' >>> pe.encode('Zimmermann') '86766' .. versionadded:: 0.1.0 .. versionchanged:: 0.3.6 Encapsulated in class """ def _after(word, pos, letters): """Return True if word[pos] follows one of the supplied letters. Parameters ---------- word : str The word to check pos : int Position within word to check letters : str Letters to confirm precede word[pos] Returns ------- bool True if word[pos] follows a value in letters .. versionadded:: 0.1.0 """ return pos > 0 and word[pos - 1] in letters def _before(word, pos, letters): """Return True if word[pos] precedes one of the supplied letters. Parameters ---------- word : str The word to check pos : int Position within word to check letters : str Letters to confirm follow word[pos] Returns ------- bool True if word[pos] precedes a value in letters .. versionadded:: 0.1.0 """ return pos + 1 < len(word) and word[pos + 1] in letters sdx = '' word = unicode_normalize('NFKD', word.upper()) word = word.replace('Ä', 'AE') word = word.replace('Ö', 'OE') word = word.replace('Ü', 'UE') word = ''.join(c for c in word if c in self._uc_set) # Nothing to convert, return base case if not word: return sdx for i in range(len(word)): if word[i] in self._uc_v_set: sdx += '0' elif word[i] == 'B': sdx += '1' elif word[i] == 'P': if _before(word, i, {'H'}): sdx += '3' else: sdx += '1' elif word[i] in {'D', 'T'}: if _before(word, i, {'C', 'S', 'Z'}): sdx += '8' else: sdx += '2' elif word[i] in {'F', 'V', 'W'}: sdx += '3' elif word[i] in {'G', 'K', 'Q'}: sdx += '4' elif word[i] == 'C': if _after(word, i, {'S', 'Z'}): sdx += '8' elif i == 0: if _before( word, i, {'A', 'H', 'K', 'L', 'O', 'Q', 'R', 'U', 'X'} ): sdx += '4' else: sdx += '8' elif _before(word, i, {'A', 'H', 'K', 'O', 'Q', 'U', 'X'}): sdx += '4' else: sdx += '8' elif word[i] == 'X': if _after(word, i, {'C', 'K', 'Q'}): sdx += '8' else: sdx += '48' elif word[i] == 'L': sdx += '5' elif word[i] in {'M', 'N'}: sdx += '6' elif word[i] == 'R': sdx += '7' elif word[i] in {'S', 'Z'}: sdx += '8' sdx = self._delete_consecutive_repeats(sdx) if sdx: sdx = sdx[:1] + sdx[1:].replace('0', '') return sdx
def _to_alpha(self, num): """Convert a Kölner Phonetik code from numeric to alphabetic. Parameters ---------- num : str or int A numeric Kölner Phonetik representation Returns ------- str An alphabetic representation of the same word Examples -------- >>> pe = Koelner() >>> pe._to_alpha('862') 'SNT' >>> pe._to_alpha('657') 'NLR' >>> pe._to_alpha('86766') 'SNRNN' .. versionadded:: 0.1.0 .. versionchanged:: 0.3.6 Encapsulated in class """ num = ''.join(c for c in num if c in self._num_set) return num.translate(self._num_trans)
[docs] def encode_alpha(self, word): """Return the Kölner Phonetik (alphabetic output) code for a word. Parameters ---------- word : str The word to transform Returns ------- str The Kölner Phonetik value as an alphabetic string Examples -------- >>> pe = Koelner() >>> pe.encode_alpha('Smith') 'SNT' >>> pe.encode_alpha('Schmidt') 'SNT' >>> pe.encode_alpha('Müller') 'NLR' >>> pe.encode_alpha('Zimmermann') 'SNRNN' .. versionadded:: 0.1.0 .. versionchanged:: 0.3.6 Encapsulated in class """ return koelner_phonetik_num_to_alpha(koelner_phonetik(word))
[docs]@deprecated( deprecated_in='0.4.0', removed_in='0.6.0', current_version=__version__, details='Use the Koelner.encode method instead.', ) def koelner_phonetik(word): """Return the Kölner Phonetik (numeric output) code for a word. This is a wrapper for :py:meth:`Koelner.encode`. Parameters ---------- word : str The word to transform Returns ------- str The Kölner Phonetik value as a numeric string Example ------- >>> koelner_phonetik('Christopher') '478237' >>> koelner_phonetik('Niall') '65' >>> koelner_phonetik('Smith') '862' >>> koelner_phonetik('Schmidt') '862' >>> koelner_phonetik('Müller') '657' >>> koelner_phonetik('Zimmermann') '86766' .. versionadded:: 0.1.0 """ return Koelner().encode(word)
[docs]@deprecated( deprecated_in='0.4.0', removed_in='0.6.0', current_version=__version__, details='Use the Koelner._to_alpha method instead.', ) def koelner_phonetik_num_to_alpha(num): """Convert a Kölner Phonetik code from numeric to alphabetic. This is a wrapper for :py:meth:`Koelner._to_alpha`. Parameters ---------- num : str or int A numeric Kölner Phonetik representation Returns ------- str An alphabetic representation of the same word Examples -------- >>> koelner_phonetik_num_to_alpha('862') 'SNT' >>> koelner_phonetik_num_to_alpha('657') 'NLR' >>> koelner_phonetik_num_to_alpha('86766') 'SNRNN' .. versionadded:: 0.1.0 """ return Koelner()._to_alpha(num)
[docs]@deprecated( deprecated_in='0.4.0', removed_in='0.6.0', current_version=__version__, details='Use the Koelner.encode_alpha method instead.', ) def koelner_phonetik_alpha(word): """Return the Kölner Phonetik (alphabetic output) code for a word. This is a wrapper for :py:meth:`Koelner.encode_alpha`. Parameters ---------- word : str The word to transform Returns ------- str The Kölner Phonetik value as an alphabetic string Examples -------- >>> koelner_phonetik_alpha('Smith') 'SNT' >>> koelner_phonetik_alpha('Schmidt') 'SNT' >>> koelner_phonetik_alpha('Müller') 'NLR' >>> koelner_phonetik_alpha('Zimmermann') 'SNRNN' .. versionadded:: 0.1.0 """ return Koelner().encode_alpha(word)
if __name__ == '__main__': import doctest doctest.testmod()