Source code for abydos.phonetic._meta_soundex

# Copyright 2018-2020 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.phonetic._meta_soundex.

MetaSoundex
"""

from deprecation import deprecated

from ._metaphone import Metaphone
from ._phonetic import _Phonetic
from ._phonetic_spanish import PhoneticSpanish
from ._soundex import Soundex
from ._spanish_metaphone import SpanishMetaphone
from .. import __version__

__all__ = ['MetaSoundex', 'metasoundex']


[docs]class MetaSoundex(_Phonetic): """MetaSoundex. This is based on :cite:`Koneru:2017`. Only English ('en') and Spanish ('es') languages are supported, as in the original. .. versionadded:: 0.3.6 """ _trans = dict( zip( (ord(_) for _ in 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'), '07430755015866075943077514', ) ) def __init__(self, lang='en'): """Initialize MetaSoundex instance. Parameters ---------- lang : str Either ``en`` for English or ``es`` for Spanish .. versionadded:: 0.4.0 """ self._lang = lang if lang == 'en': self._sdx = Soundex() self._meta = Metaphone() else: self._sdx = PhoneticSpanish() self._meta = SpanishMetaphone()
[docs] def encode_alpha(self, word): """Return the MetaSoundex code for a word. Parameters ---------- word : str The word to transform Returns ------- str The MetaSoundex code Examples -------- >>> pe = MetaSoundex() >>> pe.encode_alpha('Smith') 'SN' >>> pe.encode_alpha('Waters') 'WTRK' >>> pe.encode_alpha('James') 'JNK' >>> pe.encode_alpha('Schmidt') 'SNT' >>> pe.encode_alpha('Ashcroft') 'AKRP' >>> pe = MetaSoundex(lang='es') >>> pe.encode_alpha('Perez') 'PRS' >>> pe.encode_alpha('Martinez') 'NRTNS' >>> pe.encode_alpha('Gutierrez') 'GTRRS' >>> pe.encode_alpha('Santiago') 'SNTG' >>> pe.encode_alpha('Nicolás') 'NKLS' .. versionadded:: 0.4.0 """ word = self._sdx.encode_alpha(self._meta.encode_alpha(word)) return word
[docs] def encode(self, word): """Return the MetaSoundex code for a word. Parameters ---------- word : str The word to transform Returns ------- str The MetaSoundex code Examples -------- >>> pe = MetaSoundex() >>> pe.encode('Smith') '4500' >>> pe.encode('Waters') '7362' >>> pe.encode('James') '1520' >>> pe.encode('Schmidt') '4530' >>> pe.encode('Ashcroft') '0261' >>> pe = MetaSoundex(lang='es') >>> pe.encode('Perez') '094' >>> pe.encode('Martinez') '69364' >>> pe.encode('Gutierrez') '83994' >>> pe.encode('Santiago') '4638' >>> pe.encode('Nicolás') '6754' .. versionadded:: 0.3.0 .. versionchanged:: 0.3.6 Encapsulated in class """ word = self._sdx.encode(self._meta.encode(word)) if self._lang == 'en': word = word[0].translate(self._trans) + word[1:] return word
[docs]@deprecated( deprecated_in='0.4.0', removed_in='0.6.0', current_version=__version__, details='Use the MetaSoundex.encode method instead.', ) def metasoundex(word, lang='en'): """Return the MetaSoundex code for a word. This is a wrapper for :py:meth:`MetaSoundex.encode`. Parameters ---------- word : str The word to transform lang : str Either ``en`` for English or ``es`` for Spanish Returns ------- str The MetaSoundex code Examples -------- >>> metasoundex('Smith') '4500' >>> metasoundex('Waters') '7362' >>> metasoundex('James') '1520' >>> metasoundex('Schmidt') '4530' >>> metasoundex('Ashcroft') '0261' >>> metasoundex('Perez', lang='es') '094' >>> metasoundex('Martinez', lang='es') '69364' >>> metasoundex('Gutierrez', lang='es') '83994' >>> metasoundex('Santiago', lang='es') '4638' >>> metasoundex('Nicolás', lang='es') '6754' .. versionadded:: 0.3.0 """ return MetaSoundex(lang).encode(word)
if __name__ == '__main__': import doctest doctest.testmod()