Source code for abydos.phonetic._onca

# Copyright 2018-2020 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.phonetic._onca.

Oxford Name Compression Algorithm (ONCA)
"""

from deprecation import deprecated

from ._nysiis import NYSIIS
from ._phonetic import _Phonetic
from ._soundex import Soundex
from .. import __version__

__all__ = ['ONCA', 'onca']


[docs]class ONCA(_Phonetic): """Oxford Name Compression Algorithm (ONCA). This is the Oxford Name Compression Algorithm, based on :cite:`Gill:1997`. I can find no complete description of the "anglicised version of the NYSIIS method" identified as the first step in this algorithm, so this is likely not a precisely correct implementation, in that it employs the standard NYSIIS algorithm. .. versionadded:: 0.3.6 """ def __init__(self, max_length=4, zero_pad=True): """Initialize ONCA instance. Parameters ---------- max_length : int The maximum length (default 5) of the code to return zero_pad : bool Pad the end of the return value with 0s to achieve a max_length string .. versionadded:: 0.4.0 """ self._nysiis = NYSIIS(max_length=max_length * 3) self._soundex = Soundex(max_length=max_length, zero_pad=zero_pad)
[docs] def encode_alpha(self, word): """Return the alphabetic ONCA code for a word. Parameters ---------- word : str The word to transform Returns ------- str The alphabetic ONCA code Examples -------- >>> pe = ONCA() >>> pe.encode_alpha('Christopher') 'CRKT' >>> pe.encode_alpha('Niall') 'NL' >>> pe.encode_alpha('Smith') 'SNT' >>> pe.encode_alpha('Schmidt') 'SNT' .. versionadded:: 0.4.0 """ return self._soundex.encode_alpha(self._nysiis.encode_alpha(word))
[docs] def encode(self, word): """Return the Oxford Name Compression Algorithm (ONCA) code for a word. Parameters ---------- word : str The word to transform Returns ------- str The ONCA code Examples -------- >>> pe = ONCA() >>> pe.encode('Christopher') 'C623' >>> pe.encode('Niall') 'N400' >>> pe.encode('Smith') 'S530' >>> pe.encode('Schmidt') 'S530' .. versionadded:: 0.3.0 .. versionchanged:: 0.3.6 Encapsulated in class """ # In the most extreme case, 3 characters of NYSIIS input can be # compressed to one character of output, so give it triple the # max_length. return self._soundex.encode(self._nysiis.encode(word))
[docs]@deprecated( deprecated_in='0.4.0', removed_in='0.6.0', current_version=__version__, details='Use the ONCA.encode method instead.', ) def onca(word, max_length=4, zero_pad=True): """Return the Oxford Name Compression Algorithm (ONCA) code for a word. This is a wrapper for :py:meth:`ONCA.encode`. Parameters ---------- word : str The word to transform max_length : int The maximum length (default 5) of the code to return zero_pad : bool Pad the end of the return value with 0s to achieve a max_length string Returns ------- str The ONCA code Examples -------- >>> onca('Christopher') 'C623' >>> onca('Niall') 'N400' >>> onca('Smith') 'S530' >>> onca('Schmidt') 'S530' .. versionadded:: 0.3.0 """ return ONCA(max_length, zero_pad).encode(word)
if __name__ == '__main__': import doctest doctest.testmod()