"""
Finding string patterns in a text

Copyright 2020, University of Freiburg

Philipp Schneider <philipp.schneider@cs.uni-freiburg.de>
"""


def compute_matches(t, p, b, mod):
    '''
        returns a list of indices where the pattern p can be found
        in the text t.

        Parameters:
            t - integer array representing the text
            p - integer array representing the search pattern
            b - base to which the integers in t and p should be considered
            mod - modulus that implicitely defines a hash function

        Returns:
            an array containing all positions where the
            pattern p begins in t

        Unit-Tests:
            >>> text = 'wrgaskjdbfkwbrgtkasjbdfkajserksadjgfh'
            >>> pattern = 'tkasjb'
            >>> t = [ord(text[i]) for i in range(len(text))]
            >>> p = [ord(pattern[i]) for i in range(len(pattern))]
            >>> compute_matches(t, p, 257, 2**20)
            [15]

            >>> text = 'abcdefg'
            >>> pattern = 'cba'
            >>> t = [ord(text[i]) for i in range(len(text))]
            >>> p = [ord(pattern[i]) for i in range(len(pattern))]
            >>> compute_matches(t, p, 257, 2**20)
            []
    '''

    # text shorter than pattern -> no matches possible
    if len(t) < len(p):
        return []

    # define some constants
    n = len(t)
    m = len(p)
    h = (b ** (m-1)) % mod
    matches = []

    # initalize hash values
    p_hash = 0
    t_hash = 0
    for i in range(m):
        p_hash = (p_hash * b + p[i]) % mod
        t_hash = (t_hash * b + t[i]) % mod

    # compare and update hash value of t
    for s in range(n-m):
        if p_hash == t_hash:
            # if the hash values match, check for actual match
            if test_position(t, p, s):
                matches.append(s)  # add s to output
        # move 'window' of hashed value one position to the right
        t_hash = ((t_hash - t[s] * h) * b + t[s+m]) % mod

    # we have to check for a match for final value s = n-m
    s += 1
    if p_hash == t_hash:
        if test_position(t, p, s):
            matches.append(s)

    return matches


def test_position(t, p, s):
    '''
        checks whether text t starting from position s matches pattern p.

        Parameters:
            t - integer array representing the text
            p - integer array representing the search pattern

        Returns:
            True if and only if there is a match

        Unit-Tests:
            >>> t = [1,2,3,4,5,6,7]
            >>> p = [2,3,4,5,6]
            >>> test_position(t,p,1)
            True
            >>> test_position(t,p,0)
            False
    '''
    assert len(p)+s <= len(t)

    # Check for missmatches
    for i in range(len(p)):
        if t[s+i] != p[i]:
            return False

    # No missmatch -> return true
    return True


def read_input(filename):
    '''
        reads a string from a file and interprets it as two integer
        arrays representing a pattern and a text

        Parameter:
            filename - name of file

        Returns:
            pair (p, t) of array of integers representing the
            the pattern and the text respectively
    '''
    lines = ''
    with open(filename) as input:
        lines = input.read().splitlines()
    # first line contains pattern, second contains text
    chars_pattern = list(lines[0])
    chars_text = list(lines[1])
    # translate to unicode
    ints_pattern = [ord(chars_pattern[i]) for i in range(len(chars_pattern))]
    ints_text = [ord(chars_text[i]) for i in range(len(chars_text))]
    return (ints_pattern, ints_text)
