Source code for ittools.core

# src/ittools/core.py
"""A colletion of iterable utilites.

Used to grow on the fly. Aims to provide general purpose abstract
functionalities.

.. autosummary::
   :nosignatures:

   depth
   nestify
   itrify
   is_empty
   Stringcrementor
   enum_to_2dix
   Index2D
   zip_split
   group
"""

import collections
import logging
import math
from itertools import zip_longest

from pandas import Series

logger = logging.getLogger(__name__)


[docs]def depth(arg, exclude=None):  # noqa: C901
    r"""Powerfull function to determine depth of iterable.

    Credit goes to:
    https://stackoverflow.com/a/35698158

    Parameters
    ----------
    arg : ~collections.abc.Iterable
        Iterable of which nested depth is to be determined

    exclude : ~collections.abc.Iterable, default=None
        Iterable of iterable types that should be ignored.
        If None, str are excluded.

    Returns
    -------
    int
        Depth of :paramref:`depth.arg`

    Example
    -------
    >>> depth([[2, 2], [2, [3, 3]], 1])
    3

    Using exclude to ignore tuples:

    >>> depth([[2, 2], [2, (3, 3)], 1], exclude=(tuple,))
    2
    """
    # complexity > 12; tolerable here
    if exclude is None:
        exclude = (str,)

    if isinstance(arg, tuple(exclude)):
        return 0

    try:
        if next(iter(arg)) is arg:  # avoid infinite loops
            return 1
    except TypeError:
        return 0

    try:  # pragma: no cover
        # coverage is excluded here, cause this part definetly gets
        # executed and tested. I might be overlooking something,
        # or pytest has coverage issues with rersive functions
        depths_in = map(lambda x: depth(x, exclude), arg.values())
    except AttributeError:
        try:
            depths_in = map(lambda x: depth(x, exclude), arg)

        except TypeError:  # pragma: no cover
            # print("Could not provoke this Error!")
            return 0

    try:
        depth_in = max(depths_in)
    except ValueError:
        depth_in = 0

    return 1 + depth_in


[docs]def nestify(obj, target_depth, container=list):
    """Return a nested container of obj of target depth.

    Parameters
    ----------
    obj
        obj which is to be put in a container

    target_depth : ~numbers.Number
        Keep nesting object until nesting depth >= target_depth

    container : ~typing.Container
        Container (list, tuple, ...)  the :paramref:`nestify.obj` is nested
        with.

    Returns
    -------
    ~typing.Container
        Nested container object with the depth of
        :paramref:`nestify.target_depth`

    Examples
    --------
    Standard use case:

    >>> nestify([1, 2, 3], 3)
    [[[1, 2, 3]]]


    Specifying the container to nest with:

    >>> nestify([1, 2, 3], 3, tuple)
    (([1, 2, 3],),)


    Not all containers work with all objects especially when working with sets,
    since sets itself are not hashable they can not be nested.

    >>> nestify([1, 2, 3], 3, set)
    Traceback (most recent call last):
      File "/usr/lib/python3.6/doctest.py", line 1330, in __run
        compileflags, 1), test.globs)
      File "<doctest ittools.nestify[2]>", line 1, in <module>
        nestify([1, 2, 3], 3, set)
      File "/home/tze/Code/ittools/ittools.py", line 118, in nestify
        obj = container([obj])
    TypeError: unhashable type: 'list'


    Frozensets however can be nested:

    >>> nestify(frozenset([1, 2, 2]), 3, frozenset)
    frozenset({frozenset({frozenset({1, 2})})})

    """
    while (
        depth(
            obj,
            exclude=(
                str,
                Series,
            ),
        )
        < target_depth
    ):
        obj = container([obj])
    return obj


[docs]def itrify(obj, container=list):
    """Turn object into an iterable container if not already.

    Strings will be itrified without splitting!

    Only objects that are of type ``str`` or **not** of
    :class:`collections.abc.Sequence` will be itrified.

    Parameters
    ----------
    obj
        Anything not a :class:`~collections.abc.Sequence` (except for str) will
        be containered as iterable.

    container : ~typing.Container, default=list
        Interable container designed to house :paramref:`itrify.obj`.


    Returns
    -------
    ~collections.abc.Container
        containered :paramref:`~itrify.obj`. ( i.e. ``list(obj)``)

    Examples
    --------
    Pretty much the same as ``list(('String',))``:

    >>> itrify('String')
    ['String']


    A list is already iterable so this is futile:

    >>> itrify([1, 2, 3], tuple)
    [1, 2, 3]


    Strings although iterable will be itrified as whole:

    >>> itrify('String', tuple)
    ('String',)


    The :paramref:`itrify.container` of course, can be any callable
    container type:

    >>> itrify('String', set)
    {'String'}


    Pandas is awesome they support out of the box data type transformation:

    >>> import pandas as pd
    >>> itrify(pd.Series([1,2,3]), set)
    {1, 2, 3}

    """
    if isinstance(obj, str):
        return container([obj])
    if not isinstance(obj, collections.abc.Sequence):
        return container(obj)
    return obj


[docs]def is_empty(lst):
    """Check if list ist empty.

    ``True`` if :paramref:`~is_empty.lst` is an empty :class:`~typing.List`. ``False``
    otherwise. Works based on ``bool([]) == True``.

    Parameters
    ----------
    lst: list
        List to be checked for emptiness.

    Returns
    -------
    bool
        ``True`` if :paramref:`~is_empty.lst` is an empty :class:`~typing.List`. ``False``
        otherwise.

    Examples
    --------
    >>> is_empty([])
    True

    >>> is_empty([[], [1,2,3]])
    False

    >>> is_empty([[[[]]]])
    True

    Tuple is not a list (u dont say):

    >>> is_empty(([], []))
    False
    """
    # is lst a list ?
    if isinstance(lst, list):
        # ... yes! check if all elements are empty
        return all(map(is_empty, lst))
    # ...  no! Not a list
    return False


[docs]class Stringcrementor:
    """
    Iterator of String + incremented Integer = Stringcrementor.

    Returns string + integer of which the integer is incremented
    by one each time next() is called on the Stringcrementor object.

    Parameters
    ----------
    string: str
        String/tag/label of what you want to be incremented i.e "Category".
        Default: ``Stringcrementor``
    start: ~numbers.Number
        Starting number which is to be incremented. Default: 0

    Returns
    -------
    str
        string + integer of which the integer is incremented.

    Example
    -------
    >>> strementor = Stringcrementor('The Answer is: ')

    >>> for i in range(42):
    ...     pass # just kidding

    >>> for i in range(3):
    ...     print(next(strementor))
    The Answer is: 0
    The Answer is: 1
    The Answer is: 2
    """

    def __init__(self, string="Stringcrementor ", start=0):
        self.value = start
        self.string = string

    def __iter__(self):
        """Retun instance of self, when iterated."""
        return self

    def __next__(self):
        """Increase the integer value by one when nexted on."""
        next_value = self.value
        self.value += 1
        return self.string + str(next_value)


[docs]def enum_to_2dix(number, shape):
    """Map a 1d range to a 2d index.

    Parameters
    ----------
    number : int
        Number to be mapped to a 2D index. Usually used with in some form of
        iteration.

    shape : tuple
        2 dimensional tuple defining an arrays 2d shape as in ``(rows, columns)``.

    Returns
    -------
    tuple
        the 1d enumerate numberition mapped to a (row, column) 2d tuple

    Note
    ----
    Only the number of columns is actually used. Since this is designed to
    be used with 2D-Matrices however, it is left as 2D-shape for convenience.

    This implies however, that you can actually use infinite
    :paramref:`~enum_to_2dix.number` arguments altough your
    :paramref:`~enum_to_2dix.shape` might imply only 3 rows.

    Examples
    --------
    Mapping ``range(6)`` to a 3,2 dimenstion array:

    >>> for i in range(6):
    ...     print(i, '->', enum_to_2dix(i, (3,2)))
    0 -> (0, 0)
    1 -> (0, 1)
    2 -> (1, 0)
    3 -> (1, 1)
    4 -> (2, 0)
    5 -> (2, 1)

    Mapping ``range(12)`` to a 3,4 dimenstion array:

    >>> for i in range(12):
    ...     print(i, '->', enum_to_2dix(i, (3,4)))
    0 -> (0, 0)
    1 -> (0, 1)
    2 -> (0, 2)
    3 -> (0, 3)
    4 -> (1, 0)
    5 -> (1, 1)
    6 -> (1, 2)
    7 -> (1, 3)
    8 -> (2, 0)
    9 -> (2, 1)
    10 -> (2, 2)
    11 -> (2, 3)
    """
    column = shape[1]
    return (math.floor(number / column), number % column)


[docs]class Index2D:
    """Construct a callable object that maps a number to a 2d index.

    Parameters
    ----------
    shape : 2-tuple
        tuple defining an array's 2d shape as in ``(rows, columns)``

    Returns
    -------
    tuple
        the 1d enumerate position mapped to a (row, column) 2d tuple

    Examples
    --------
    >>> idx2d = Index2D((3, 2))
    >>> for i in range(6):
    ...     print(i, '->', idx2d(i))
    ...
    0 -> (0, 0)
    1 -> (0, 1)
    2 -> (1, 0)
    3 -> (1, 1)
    4 -> (2, 0)
    5 -> (2, 1)
    """

    @property
    def shape(self):
        """Tuple representing the shape of the Index2D object."""
        return self._shape

    def __init__(self, shape):
        self._shape = shape

    def __call__(self, number):
        """Make the :class:`~ittools.Index2D` objects callable.

        Parameters
        ----------
        number : ~numbers.Number
            The 1 d index/number to be mapped to a 2d index.

        Returns
        -------
        tuple
            the 1d enumerate position mapped to a (row, column) 2d tuple

        """
        return enum_to_2dix(number, self.shape)


[docs]def zip_split(sequence, chunks):
    r"""Split sequence into chunks returning a zipped-like order of elements.

    The last :math:`n` chunks will be one item short of the rest, if the
    number of items in :paramref:`~zip_split.sequence` is not an integer
    multiple of :paramref:`~zip_split.chunks`. With :math:`n` beeing:

    :math:`n = \text{len}\left(\text{sequence}\right)-
    \left[\text{len}\left(\text{sequence}\right) \% \text{chunks}\right]`.

    Note
    ----
    Credit to https://www.garyrobinson.net/2008/04/splitting-a-pyt.html
    (Garry Robinson)

    Parameters
    ----------
    sequence: ~collections.abc.Sequence
        The sequence to split into chunks.
    chunks: int
        The number of splitted sequences created.

    Yields
    ------
    :class:`~collections.abc.Generator`
        A generator object yielding the chunks of items in zip like order.

    Examples
    --------
    Simple demonstration:

    >>> import ittools
    >>> hi10 = 10 * ['hi']
    >>> print(list(ittools.zip_split(hi10, 3)))
    [['hi', 'hi', 'hi', 'hi'], ['hi', 'hi', 'hi'], ['hi', 'hi', 'hi']]

    Use case for turning a (supposedly) long iterable into a
    :class:`pandas.DataFrame` of 3 rows:

    >>> import ittools
    >>> import pandas as pd
    >>> print(pd.DataFrame(list(ittools.zip_split(hi10, 3))).to_string(
    ...     index=False, header=False))
    hi hi hi   hi
    hi hi hi None
    hi hi hi None
    """
    for i in range(chunks):
        yield sequence[i::chunks]


[docs]def group(iterable, chunks, fillvalue=None):
    """Split iterable into chunks.

    If the number of items in :paramref:`~group.iterable` is not an integer
    multiple of :paramref:`~group.chunks`, the
    last chunk is filled using :paramref:`~group.fillvalue`.

    Parameters
    ----------
    iterable: ~collections.abc.Iterable
        The iterable to split into groups.

    chunks: int
        The number of groups created

    fillvalue: ~numbers.Number, None, default = None
        The last chunk is filled with this in case the number of items in
        :paramref:`~group.iterable` is not an integer multiple of
        :paramref:`~group.chunks`

    Returns
    -------
    :class:`~collections.abc.Generator`
        A generator object yielding the groups.

    Note
    ----
    Credit to https://stackoverflow.com/a/434411
    (Boris)

    Examples
    --------
    Simple example:

    >>> import ittools
    >>> print(list(ittools.group(range(10), chunks=3)))
    [(0, 1, 2, 3), (4, 5, 6, 7), (8, 9, None, None)]

    Use case for turning a(supposedly) long iterable into a
    :class:`pandas.DataFrame` of 3 columns:

    >>> import ittools
    >>> import pandas as pd
    >>> print(pd.DataFrame(list(zip(*ittools.group(range(10), 3)))).to_string(
    ...     index=False, header=False))
    0 4 8.0
    1 5 9.0
    2 6 NaN
    3 7 NaN
    """
    length = math.ceil(len(iterable) / chunks)
    args = [iter(iterable)] * length
    return zip_longest(*args, fillvalue=fillvalue)