Source code for abydos.phonetic._norphone

# Copyright 2014-2020 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.phonetic._norphone.

Norphone
"""

from deprecation import deprecated

from ._phonetic import _Phonetic
from .. import __version__

__all__ = ['Norphone', 'norphone']


[docs]class Norphone(_Phonetic): """Norphone. The reference implementation by Lars Marius Garshol is available in :cite:`Garshol:2015`. Norphone was designed for Norwegian, but this implementation has been extended to support Swedish vowels as well. This function incorporates the "not implemented" rules from the above file's rule set. .. versionadded:: 0.3.6 """ _uc_v_set = {'A', 'E', 'I', 'O', 'U', 'Y', 'Å', 'Æ', 'Ø', 'Ä', 'Ö'} _replacements = { 4: {'SKEI': 'X'}, 3: {'SKJ': 'X', 'KEI': 'X'}, 2: { 'CH': 'K', 'CK': 'K', 'GJ': 'J', 'GH': 'K', 'HG': 'K', 'HJ': 'J', 'HL': 'L', 'HR': 'R', 'KJ': 'X', 'KI': 'X', 'LD': 'L', 'ND': 'N', 'PH': 'F', 'TH': 'T', 'SJ': 'X', }, 1: {'W': 'V', 'X': 'KS', 'Z': 'S', 'D': 'T', 'G': 'K'}, }
[docs] def encode(self, word): """Return the Norphone code. Parameters ---------- word : str The word to transform Returns ------- str The Norphone code Examples -------- >>> pe = Norphone() >>> pe.encode('Hansen') 'HNSN' >>> pe.encode('Larsen') 'LRSN' >>> pe.encode('Aagaard') 'ÅKRT' >>> pe.encode('Braaten') 'BRTN' >>> pe.encode('Sandvik') 'SNVK' .. versionadded:: 0.3.0 .. versionchanged:: 0.3.6 Encapsulated in class """ word = word.upper() code = '' skip = 0 if word[0:2] == 'AA': code = 'Å' skip = 2 elif word[0:2] == 'GI': code = 'J' skip = 2 elif word[0:3] == 'SKY': code = 'X' skip = 3 elif word[0:2] == 'EI': code = 'Æ' skip = 2 elif word[0:2] == 'KY': code = 'X' skip = 2 elif word[:1] == 'C': code = 'K' skip = 1 elif word[:1] == 'Ä': code = 'Æ' skip = 1 elif word[:1] == 'Ö': code = 'Ø' skip = 1 if word[-2:] == 'DT': word = word[:-2] + 'T' # Though the rules indicate this rule applies in all positions, the # reference implementation indicates it applies only in final position. elif word[-2:-1] in self._uc_v_set and word[-1:] == 'D': word = word[:-2] for pos, char in enumerate(word): if skip: skip -= 1 else: for length in sorted(self._replacements, reverse=True): if word[pos : pos + length] in self._replacements[length]: code += self._replacements[length][ word[pos : pos + length] ] skip = length - 1 break else: if not pos or char not in self._uc_v_set: code += char code = self._delete_consecutive_repeats(code) return code
[docs]@deprecated( deprecated_in='0.4.0', removed_in='0.6.0', current_version=__version__, details='Use the Norphone.encode method instead.', ) def norphone(word): """Return the Norphone code. This is a wrapper for :py:meth:`Norphone.encode`. Parameters ---------- word : str The word to transform Returns ------- str The Norphone code Examples -------- >>> norphone('Hansen') 'HNSN' >>> norphone('Larsen') 'LRSN' >>> norphone('Aagaard') 'ÅKRT' >>> norphone('Braaten') 'BRTN' >>> norphone('Sandvik') 'SNVK' .. versionadded:: 0.3.0 """ return Norphone().encode(word)
if __name__ == '__main__': import doctest doctest.testmod()