Source code for abydos.fingerprint._extract_position_frequency

# Copyright 2019-2020 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.fingerprint._extract_position_frequence.

Taft's extract - position & frequency coding
"""

from ._fingerprint import _Fingerprint

__all__ = ['ExtractPositionFrequency']


[docs]class ExtractPositionFrequency(_Fingerprint): """Extract - Position & Frequency fingerprint. Based on the extract - position & frequency coding from :cite:`Taft:1970`. .. versionadded:: 0.4.1 """ _frequency = { x: y for x, y in zip( 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', ( 5, 1, 5, 0, 7, 1, 2, 5, 6, 0, 1, 5, 1, 3, 4, 3, 0, 4, 5, 3, 4, 1, 1, 0, 2, 1, ), ) } _position = (0, 1, 2, 3, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 6, 7)
[docs] def fingerprint(self, word): """Return the extract - position & frequency coding. Parameters ---------- word : str The word to fingerprint Returns ------- int The extract - position & frequency coding Examples -------- >>> fp = ExtractPositionFrequency() >>> fp.fingerprint('hat') 'HAT' >>> fp.fingerprint('niall') 'NILL' >>> fp.fingerprint('colin') 'COLN' >>> fp.fingerprint('atcg') 'ATCG' >>> fp.fingerprint('entreatment') 'NMNT' .. versionadded:: 0.4.1 """ # uppercase & reverse word = [_ for _ in word.upper() if _ in self._frequency] scores = [[] for _ in range(len(word))] pos = 0 for i in range(len(word)): scores[pos].append(self._frequency[word[pos]]) scores[pos][0] += self._position[min(i, 15)] scores[pos].append(len(word) + pos if pos < 0 else pos) pos = -(pos if pos < 0 else pos + 1) positions = sorted(pos[1] for pos in sorted(scores, reverse=True)[-4:]) return ''.join(word[_] for _ in positions)
if __name__ == '__main__': import doctest doctest.testmod()