Source code for abydos.compression._rle

# Copyright 2014-2020 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.compression._rle.

Run-Length Encoding encoder/decoder
"""

from itertools import groupby

from deprecation import deprecated

from ._bwt import BWT
from .. import __version__

__all__ = ['RLE', 'rle_decode', 'rle_encode']


[docs]class RLE(object):
    """Run-Length Encoding.

    Cf. :cite:`Robinson:1967`.

    Based on http://rosettacode.org/wiki/Run-length_encoding#Python
    :cite:`rosettacode:2018`. This is licensed GFDL 1.2.

    Digits 0-9 cannot be in text.

    .. versionadded:: 0.3.6
    """

[docs]    def encode(self, text):
        r"""Perform encoding of run-length-encoding (RLE).

        Parameters
        ----------
        text : str
            A text string to encode

        Returns
        -------
        str
            Word decoded by RLE

        Examples
        --------
        >>> rle = RLE()
        >>> bwt = BWT()
        >>> rle.encode(bwt.encode('align'))
        'n\x00ilag'
        >>> rle.encode('align')
        'align'

        >>> rle.encode(bwt.encode('banana'))
        'annb\x00aa'
        >>> rle.encode('banana')
        'banana'

        >>> rle.encode(bwt.encode('aaabaabababa'))
        'ab\x00abbab5a'
        >>> rle.encode('aaabaabababa')
        '3abaabababa'

        .. versionadded:: 0.1.0
        .. versionchanged:: 0.3.6
            Encapsulated in class

        """
        if text:
            text = ((len(list(g)), k) for k, g in groupby(text))
            text = (
                (str(n) + k if n > 2 else (k if n == 1 else 2 * k))
                for n, k in text
            )
        return ''.join(text)

[docs]    def decode(self, text):
        r"""Perform decoding of run-length-encoding (RLE).

        Parameters
        ----------
        text : str
            A text string to decode

        Returns
        -------
        str
            Word decoded by RLE

        Examples
        --------
        >>> rle = RLE()
        >>> bwt = BWT()
        >>> bwt.decode(rle.decode('n\x00ilag'))
        'align'
        >>> rle.decode('align')
        'align'

        >>> bwt.decode(rle.decode('annb\x00aa'))
        'banana'
        >>> rle.decode('banana')
        'banana'

        >>> bwt.decode(rle.decode('ab\x00abbab5a'))
        'aaabaabababa'
        >>> rle.decode('3abaabababa')
        'aaabaabababa'

        .. versionadded:: 0.1.0
        .. versionchanged:: 0.3.6
            Encapsulated in class

        """
        mult = ''
        decoded = []
        for letter in list(text):
            if not letter.isdigit():
                if mult:
                    decoded.append(int(mult) * letter)
                    mult = ''
                else:
                    decoded.append(letter)
            else:
                mult += letter

        text = ''.join(decoded)
        return text


[docs]@deprecated(
    deprecated_in='0.4.0',
    removed_in='0.6.0',
    current_version=__version__,
    details='Use the RLE.encode method instead.',
)
def rle_encode(text, use_bwt=True):
    r"""Perform encoding of run-length-encoding (RLE).

    This is a wrapper for :py:meth:`RLE.encode`.

    Parameters
    ----------
    text : str
        A text string to encode
    use_bwt : bool
        Indicates whether to perform BWT encoding before RLE encoding

    Returns
    -------
    str
        Word decoded by RLE

    Examples
    --------
    >>> rle_encode('align')
    'n\x00ilag'
    >>> rle_encode('align', use_bwt=False)
    'align'

    >>> rle_encode('banana')
    'annb\x00aa'
    >>> rle_encode('banana', use_bwt=False)
    'banana'

    >>> rle_encode('aaabaabababa')
    'ab\x00abbab5a'
    >>> rle_encode('aaabaabababa', False)
    '3abaabababa'

    .. versionadded:: 0.1.0

    """
    if use_bwt:
        text = BWT().encode(text)
    return RLE().encode(text)


[docs]@deprecated(
    deprecated_in='0.4.0',
    removed_in='0.6.0',
    current_version=__version__,
    details='Use the RLE.decode method instead.',
)
def rle_decode(text, use_bwt=True):
    r"""Perform decoding of run-length-encoding (RLE).

    This is a wrapper for :py:meth:`RLE.decode`.

    Parameters
    ----------
    text : str
        A text string to decode
    use_bwt : bool
        Indicates whether to perform BWT decoding after RLE decoding

    Returns
    -------
    str
        Word decoded by RLE

    Examples
    --------
    >>> rle_decode('n\x00ilag')
    'align'
    >>> rle_decode('align', use_bwt=False)
    'align'

    >>> rle_decode('annb\x00aa')
    'banana'
    >>> rle_decode('banana', use_bwt=False)
    'banana'

    >>> rle_decode('ab\x00abbab5a')
    'aaabaabababa'
    >>> rle_decode('3abaabababa', False)
    'aaabaabababa'

    .. versionadded:: 0.1.0

    """
    text = RLE().decode(text)
    if use_bwt:
        text = BWT().decode(text)
    return text


if __name__ == '__main__':
    import doctest

    doctest.testmod()