Source code for abydos.distance._lcprefix

# Copyright 2018-2020 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.distance._lcprefix.

Longest common prefix
"""

from os.path import commonprefix

from ._distance import _Distance

__all__ = ['LCPrefix']


[docs]class LCPrefix(_Distance): """Longest common prefix. .. versionadded:: 0.4.0 """
[docs] def lcprefix(self, strings): """Return the longest common prefix of a list of strings. Longest common prefix (LCPrefix). Parameters ---------- strings : list of strings Strings for comparison Returns ------- str The longest common prefix Examples -------- >>> pfx = LCPrefix() >>> pfx.lcprefix(['cat', 'hat']) '' >>> pfx.lcprefix(['Niall', 'Neil']) 'N' >>> pfx.lcprefix(['aluminum', 'Catalan']) '' >>> pfx.lcprefix(['ATCG', 'TAGC']) '' .. versionadded:: 0.4.0 """ return commonprefix(strings)
[docs] def dist_abs(self, src, tar, *args): """Return the length of the longest common prefix of the strings. Parameters ---------- src : str Source string for comparison tar : str Target string for comparison *args : strs Additional strings for comparison Raises ------ ValueError All arguments must be of type str Returns ------- int The length of the longest common prefix Examples -------- >>> pfx = LCPrefix() >>> pfx.dist_abs('cat', 'hat') 0 >>> pfx.dist_abs('Niall', 'Neil') 1 >>> pfx.dist_abs('aluminum', 'Catalan') 0 >>> pfx.dist_abs('ATCG', 'TAGC') 0 .. versionadded:: 0.4.0 """ strings = [src, tar] for arg in args: if isinstance(arg, str): strings.append(arg) else: raise TypeError('All arguments must be of type str') return len(self.lcprefix(strings))
[docs] def sim(self, src, tar, *args): r"""Return the longest common prefix similarity of two or more strings. Longest common prefix similarity (:math:`sim_{LCPrefix}`). This employs the LCPrefix function to derive a similarity metric: :math:`sim_{LCPrefix}(s,t) = \frac{|LCPrefix(s,t)|}{max(|s|, |t|)}` Parameters ---------- src : str Source string for comparison tar : str Target string for comparison *args : strs Additional strings for comparison Returns ------- float LCPrefix similarity Examples -------- >>> pfx = LCPrefix() >>> pfx.sim('cat', 'hat') 0.0 >>> pfx.sim('Niall', 'Neil') 0.2 >>> pfx.sim('aluminum', 'Catalan') 0.0 >>> pfx.sim('ATCG', 'TAGC') 0.0 .. versionadded:: 0.4.0 """ if src == tar: return 1.0 elif not src or not tar: return 0.0 dist = self.dist_abs(src, tar, *args) maxlen = max(len(src), len(tar), *[len(arg) for arg in args]) return dist / maxlen
if __name__ == '__main__': import doctest doctest.testmod()