#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright 2011-2013 Codernity (http://codernity.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


import re
import tokenize
import token
import uuid


class IndexCreatorException(Exception):
    def __init__(self, ex, line=None):
        self.ex = ex
        self.line = line

    def __str__(self):
        if self.line:
            return repr(self.ex + "(in line: %d)" % self.line)
        return repr(self.ex)


class IndexCreatorFunctionException(IndexCreatorException):
    pass


class IndexCreatorValueException(IndexCreatorException):
    pass


class Parser(object):
    def __init__(self):
        pass

    def parse(self, data, name=None):
        if not name:
            self.name = "_" + uuid.uuid4().hex
        else:
            self.name = name

        self.ind = 0
        self.stage = 0
        self.logic = ['and', 'or', 'in']
        self.logic2 = ['&', '|']
        self.allowed_props = {'TreeBasedIndex': ['type', 'name', 'key_format', 'node_capacity', 'pointer_format', 'meta_format'],
                              'HashIndex': ['type', 'name', 'key_format', 'hash_lim', 'entry_line_format'],
                              'MultiHashIndex': ['type', 'name', 'key_format', 'hash_lim', 'entry_line_format'],
                              'MultiTreeBasedIndex': ['type', 'name', 'key_format', 'node_capacity', 'pointer_format', 'meta_format']
                              }
        self.funcs = {'md5': (['md5'], ['.digest()']),
                      'len': (['len'], []),
                      'str': (['str'], []),
                      'fix_r': (['self.fix_r'], []),
                      'prefix': (['self.prefix'], []),
                      'infix': (['self.infix'], []),
                      'suffix': (['self.suffix'], [])
                      }
        self.handle_int_imports = {'infix': "from itertools import izip\n"}

        self.funcs_with_body = {'fix_r':
                                ("""    def fix_r(self,s,l):
        e = len(s)
        if e == l:
            return s
        elif e > l:
            return s[:l]
        else:
            return s.rjust(l,'_')\n""", False),
                                'prefix':
                                ("""    def prefix(self,s,m,l,f):
        t = len(s)
        if m < 1:
            m = 1
        o = set()
        if t > l:
            s = s[:l]
            t = l
        while m <= t:
            o.add(s.rjust(f,'_'))
            s = s[:-1]
            t -= 1
        return o\n""", False),
                                'suffix':
                                ("""    def suffix(self,s,m,l,f):
        t = len(s)
        if m < 1:
            m = 1
        o = set()
        if t > l:
            s = s[t-l:]
            t = len(s)
        while m <= t:
            o.add(s.rjust(f,'_'))
            s = s[1:]
            t -= 1
        return o\n""", False),
                                'infix':
                                ("""    def infix(self,s,m,l,f):
        t = len(s)
        o = set()
        for x in xrange(m - 1, l):
            t = (s, )
            for y in xrange(0, x):
                t += (s[y + 1:],)
            o.update(set(''.join(x).rjust(f, '_').lower() for x in izip(*t)))
        return o\n""", False)}
        self.none = ['None', 'none', 'null']
        self.props_assign = ['=', ':']
        self.all_adj_num_comp = {token.NUMBER: (
            token.NUMBER, token.NAME, '-', '('),
            token.NAME: (token.NUMBER, token.NAME, '-', '('),
            ')': (token.NUMBER, token.NAME, '-', '(')
        }

        self.all_adj_num_op = {token.NUMBER: (token.NUMBER, token.NAME, '('),
                               token.NAME: (token.NUMBER, token.NAME, '('),
                               ')': (token.NUMBER, token.NAME, '(')
                               }
        self.allowed_adjacent = {
            "<=": self.all_adj_num_comp,
            ">=": self.all_adj_num_comp,
            ">": self.all_adj_num_comp,
            "<": self.all_adj_num_comp,

            "==": {token.NUMBER: (token.NUMBER, token.NAME, '('),
                   token.NAME: (token.NUMBER, token.NAME, token.STRING, '('),
                   token.STRING: (token.NAME, token.STRING, '('),
                   ')': (token.NUMBER, token.NAME, token.STRING, '('),
                   ']': (token.NUMBER, token.NAME, token.STRING, '(')
                   },

            "+": {token.NUMBER: (token.NUMBER, token.NAME, '('),
                  token.NAME: (token.NUMBER, token.NAME, token.STRING, '('),
                  token.STRING: (token.NAME, token.STRING, '('),
                  ')': (token.NUMBER, token.NAME, token.STRING, '('),
                  ']': (token.NUMBER, token.NAME, token.STRING, '(')
                  },

            "-": {token.NUMBER: (token.NUMBER, token.NAME, '('),
                  token.NAME: (token.NUMBER, token.NAME, '('),
                  ')': (token.NUMBER, token.NAME, '('),
                  '<': (token.NUMBER, token.NAME, '('),
                  '>': (token.NUMBER, token.NAME, '('),
                  '<=': (token.NUMBER, token.NAME, '('),
                  '>=': (token.NUMBER, token.NAME, '('),
                  '==': (token.NUMBER, token.NAME, '('),
                  ']': (token.NUMBER, token.NAME, '(')
                  },
            "*": self.all_adj_num_op,
            "/": self.all_adj_num_op,
            "%": self.all_adj_num_op,
            ",": {token.NUMBER: (token.NUMBER, token.NAME, token.STRING, '{', '[', '('),
                  token.NAME: (token.NUMBER, token.NAME, token.STRING, '(', '{', '['),
                  token.STRING: (token.NAME, token.STRING, token.NUMBER, '(', '{', '['),
                  ')': (token.NUMBER, token.NAME, token.STRING, '(', '{', '['),
                  ']': (token.NUMBER, token.NAME, token.STRING, '(', '{', '['),
                  '}': (token.NUMBER, token.NAME, token.STRING, '(', '{', '[')
                  }
        }

        def is_num(s):
            m = re.search('[^0-9*()+\-\s/]+', s)
            return not m

        def is_string(s):
            m = re.search('\s*(?P<a>[\'\"]+).*?(?P=a)\s*', s)
            return m
        data = re.split('make_key_value\:', data)

        if len(data) < 2:
            raise IndexCreatorFunctionException(
                "Couldn't find a definition of make_key_value function!\n")

        spl1 = re.split('make_key\:', data[0])
        spl2 = re.split('make_key\:', data[1])

        self.funcs_rev = False

        if len(spl1) > 1:
            data = [spl1[0]] + [data[1]] + [spl1[1]]
            self.funcs_rev = True
        elif len(spl2) > 1:
            data = [data[0]] + spl2
        else:
            data.append("key")

        if data[1] == re.search('\s*', data[1], re.S | re.M).group(0):
            raise IndexCreatorFunctionException("Empty function body ",
                                                len(re.split('\n', data[0])) + (len(re.split('\n', data[2])) if self.funcs_rev else 1) - 1)
        if data[2] == re.search('\s*', data[2], re.S | re.M).group(0):
            raise IndexCreatorFunctionException("Empty function body ",
                                                len(re.split('\n', data[0])) + (1 if self.funcs_rev else len(re.split('\n', data[1]))) - 1)
        if data[0] == re.search('\s*', data[0], re.S | re.M).group(0):
            raise IndexCreatorValueException("You didn't set any properity or you set them not at the begining of the code\n")

        data = [re.split(
            '\n', data[0]), re.split('\n', data[1]), re.split('\n', data[2])]
        self.cnt_lines = (len(data[0]), len(data[1]), len(data[2]))
        ind = 0
        self.predata = data
        self.data = [[], [], []]
        for i, v in enumerate(self.predata[0]):
            for k, w in enumerate(self.predata[0][i]):
                if self.predata[0][i][k] in self.props_assign:
                    if not is_num(self.predata[0][i][k + 1:]) and self.predata[0][i].strip()[:4] != 'type' and self.predata[0][i].strip()[:4] != 'name':
                        s = self.predata[0][i][k + 1:]
                        self.predata[0][i] = self.predata[0][i][:k + 1]

                        m = re.search('\s+', s.strip())
                        if not is_string(s) and not m:
                            s = "'" + s.strip() + "'"
                        self.predata[0][i] += s
                        break

        for n, i in enumerate(self.predata):
            for k in i:
                k = k.strip()
                if k:
                    self.data[ind].append(k)
                    self.check_enclosures(k, n)
            ind += 1

        return self.parse_ex()

    def readline(self, stage):
        def foo():
            if len(self.data[stage]) <= self.ind:
                self.ind = 0
                return ""
            else:
                self.ind += 1
                return self.data[stage][self.ind - 1]
        return foo

    def add(self, l, i):
        def add_aux(*args):
            # print args,self.ind
            if len(l[i]) < self.ind:
                l[i].append([])
            l[i][self.ind - 1].append(args)
        return add_aux

    def parse_ex(self):
        self.index_name = ""
        self.index_type = ""
        self.curLine = -1
        self.con = -1
        self.brackets = -1
        self.curFunc = None
        self.colons = 0
        self.line_cons = ([], [], [])
        self.pre_tokens = ([], [], [])
        self.known_dicts_in_mkv = []
        self.prop_name = True
        self.prop_assign = False
        self.is_one_arg_enough = False
        self.funcs_stack = []
        self.last_line = [-1, -1, -1]
        self.props_set = []
        self.custom_header = set()

        self.tokens = []
        self.tokens_head = ['# %s\n' % self.name, 'class %s(' % self.name, '):\n', '    def __init__(self, *args, **kwargs):        ']

        for i in range(3):
            tokenize.tokenize(self.readline(i), self.add(self.pre_tokens, i))
            # tokenize treats some keyword not in the right way, thats why we
            # have to change some of them
            for nk, k in enumerate(self.pre_tokens[i]):
                for na, a in enumerate(k):
                    if a[0] == token.NAME and a[1] in self.logic:
                        self.pre_tokens[i][nk][
                            na] = (token.OP, a[1], a[2], a[3], a[4])

        for i in self.pre_tokens[1]:
            self.line_cons[1].append(self.check_colons(i, 1))
            self.check_adjacents(i, 1)
            if self.check_for_2nd_arg(i) == -1 and not self.is_one_arg_enough:
                raise IndexCreatorValueException("No 2nd value to return (did u forget about ',None'?", self.cnt_line_nr(i[0][4], 1))
            self.is_one_arg_enough = False

        for i in self.pre_tokens[2]:
            self.line_cons[2].append(self.check_colons(i, 2))
            self.check_adjacents(i, 2)

        for i in self.pre_tokens[0]:
            self.handle_prop_line(i)

        self.cur_brackets = 0
        self.tokens += ['\n        super(%s, self).__init__(*args, **kwargs)\n    def make_key_value(self, data):        ' % self.name]

        for i in self.pre_tokens[1]:
            for k in i:
                self.handle_make_value(*k)

        self.curLine = -1
        self.con = -1
        self.cur_brackets = 0
        self.tokens += ['\n    def make_key(self, key):']

        for i in self.pre_tokens[2]:
            for k in i:
                self.handle_make_key(*k)

        if self.index_type == "":
            raise IndexCreatorValueException("Missing index type definition\n")
        if self.index_name == "":
            raise IndexCreatorValueException("Missing index name\n")

        self.tokens_head[0] = "# " + self.index_name + "\n" + \
            self.tokens_head[0]

        for i in self.funcs_with_body:
            if self.funcs_with_body[i][1]:
                self.tokens_head.insert(4, self.funcs_with_body[i][0])

        if None in self.custom_header:
            self.custom_header.remove(None)
        if self.custom_header:
            s = '    custom_header = """'
            for i in self.custom_header:
                s += i
            s += '"""\n'
            self.tokens_head.insert(4, s)

        if self.index_type in self.allowed_props:
            for i in self.props_set:
                if i not in self.allowed_props[self.index_type]:
                    raise IndexCreatorValueException("Properity %s is not allowed for index type: %s" % (i, self.index_type))

        # print "".join(self.tokens_head)
        # print "----------"
        # print (" ".join(self.tokens))
        return "".join(self.custom_header), "".join(self.tokens_head) + (" ".join(self.tokens))

    # has to be run BEFORE tokenize
    def check_enclosures(self, d, st):
        encs = []
        contr = {'(': ')', '{': '}', '[': ']', "'": "'", '"': '"'}
        ends = [')', '}', ']', "'", '"']
        for i in d:
            if len(encs) > 0 and encs[-1] in ['"', "'"]:
                if encs[-1] == i:
                    del encs[-1]
            elif i in contr:
                encs += [i]
            elif i in ends:
                if len(encs) < 1 or contr[encs[-1]] != i:
                    raise IndexCreatorValueException("Missing opening enclosure for \'%s\'" % i, self.cnt_line_nr(d, st))
                del encs[-1]

        if len(encs) > 0:
            raise IndexCreatorValueException("Missing closing enclosure for \'%s\'" % encs[0], self.cnt_line_nr(d, st))

    def check_adjacents(self, d, st):
        def std_check(d, n):
            if n == 0:
                prev = -1
            else:
                prev = d[n - 1][1] if d[n - 1][0] == token.OP else d[n - 1][0]

            cur = d[n][1] if d[n][0] == token.OP else d[n][0]

            # there always is an endmarker at the end, but this is a precaution
            if n + 2 > len(d):
                nex = -1
            else:
                nex = d[n + 1][1] if d[n + 1][0] == token.OP else d[n + 1][0]

            if prev not in self.allowed_adjacent[cur]:
                raise IndexCreatorValueException("Wrong left value of the %s" % cur, self.cnt_line_nr(line, st))

            # there is an assumption that whole data always ends with 0 marker, the idea prolly needs a rewritting to allow more whitespaces
            # between tokens, so it will be handled anyway
            elif nex not in self.allowed_adjacent[cur][prev]:
                raise IndexCreatorValueException("Wrong right value of the %s" % cur, self.cnt_line_nr(line, st))

        for n, (t, i, _, _, line) in enumerate(d):
            if t == token.NAME or t == token.STRING:
                if n + 1 < len(d) and d[n + 1][0] in [token.NAME, token.STRING]:
                    raise IndexCreatorValueException("Did you forget about an operator in between?", self.cnt_line_nr(line, st))
            elif i in self.allowed_adjacent:
                std_check(d, n)

    def check_colons(self, d, st):
        cnt = 0
        br = 0

        def check_ret_args_nr(a, s):
            c_b_cnt = 0
            s_b_cnt = 0
            n_b_cnt = 0
            comas_cnt = 0
            for _, i, _, _, line in a:

                if c_b_cnt == n_b_cnt == s_b_cnt == 0:
                    if i == ',':
                        comas_cnt += 1
                        if (s == 1 and comas_cnt > 1) or (s == 2 and comas_cnt > 0):
                            raise IndexCreatorFunctionException("Too much arguments to return", self.cnt_line_nr(line, st))
                        if s == 0 and comas_cnt > 0:
                            raise IndexCreatorValueException("A coma here doesn't make any sense", self.cnt_line_nr(line, st))

                    elif i == ':':
                            if s == 0:
                                raise IndexCreatorValueException("A colon here doesn't make any sense", self.cnt_line_nr(line, st))
                            raise IndexCreatorFunctionException("Two colons don't make any sense", self.cnt_line_nr(line, st))

                if i == '{':
                    c_b_cnt += 1
                elif i == '}':
                    c_b_cnt -= 1
                elif i == '(':
                    n_b_cnt += 1
                elif i == ')':
                    n_b_cnt -= 1
                elif i == '[':
                    s_b_cnt += 1
                elif i == ']':
                    s_b_cnt -= 1

        def check_if_empty(a):
            for i in a:
                if i not in [token.NEWLINE, token.INDENT, token.ENDMARKER]:
                    return False
            return True
        if st == 0:
            check_ret_args_nr(d, st)
            return

        for n, i in enumerate(d):
            if i[1] == ':':
                if br == 0:
                    if len(d) < n or check_if_empty(d[n + 1:]):
                        raise IndexCreatorValueException(
                            "Empty return value", self.cnt_line_nr(i[4], st))
                    elif len(d) >= n:
                        check_ret_args_nr(d[n + 1:], st)
                    return cnt
                else:
                    cnt += 1
            elif i[1] == '{':
                br += 1
            elif i[1] == '}':
                br -= 1
        check_ret_args_nr(d, st)
        return -1

    def check_for_2nd_arg(self, d):
        c_b_cnt = 0  # curly brackets counter '{}'
        s_b_cnt = 0  # square brackets counter '[]'
        n_b_cnt = 0  # normal brackets counter '()'

        def check_2nd_arg(d, ind):
            d = d[ind[0]:]
            for t, i, (n, r), _, line in d:
                if i == '{' or i is None:
                    return 0
                elif t == token.NAME:
                    self.known_dicts_in_mkv.append((i, (n, r)))
                    return 0
                elif t == token.STRING or t == token.NUMBER:
                    raise IndexCreatorValueException("Second return value of make_key_value function has to be a dictionary!", self.cnt_line_nr(line, 1))

        for ind in enumerate(d):
            t, i, _, _, _ = ind[1]
            if s_b_cnt == n_b_cnt == c_b_cnt == 0:
                if i == ',':
                    return check_2nd_arg(d, ind)
                elif (t == token.NAME and i not in self.funcs) or i == '{':
                    self.is_one_arg_enough = True

            if i == '{':
                c_b_cnt += 1
                self.is_one_arg_enough = True
            elif i == '}':
                c_b_cnt -= 1
            elif i == '(':
                n_b_cnt += 1
            elif i == ')':
                n_b_cnt -= 1
            elif i == '[':
                s_b_cnt += 1
            elif i == ']':
                s_b_cnt -= 1
        return -1

    def cnt_line_nr(self, l, stage):
        nr = -1
        for n, i in enumerate(self.predata[stage]):
            # print i,"|||",i.strip(),"|||",l
            if l == i.strip():
                nr = n
        if nr == -1:
            return -1

        if stage == 0:
            return nr + 1
        elif stage == 1:
            return nr + self.cnt_lines[0] + (self.cnt_lines[2] - 1 if self.funcs_rev else 0)
        elif stage == 2:
            return nr + self.cnt_lines[0] + (self.cnt_lines[1] - 1 if not self.funcs_rev else 0)

        return -1

    def handle_prop_line(self, d):
        d_len = len(d)
        if d[d_len - 1][0] == token.ENDMARKER:
            d_len -= 1

        if d_len < 3:
            raise IndexCreatorValueException("Can't handle properity assingment ", self.cnt_line_nr(d[0][4], 0))

        if not d[1][1] in self.props_assign:
            raise IndexCreatorValueException(
                "Did you forget : or =?", self.cnt_line_nr(d[0][4], 0))

        if d[0][0] == token.NAME or d[0][0] == token.STRING:
            if d[0][1] in self.props_set:
                raise IndexCreatorValueException("Properity %s is set more than once" % d[0][1], self.cnt_line_nr(d[0][4], 0))
            self.props_set += [d[0][1]]
            if d[0][1] == "type" or d[0][1] == "name":
                t, tk, _, _, line = d[2]

                if d_len > 3:
                    raise IndexCreatorValueException(
                        "Wrong value to assign", self.cnt_line_nr(line, 0))

                if t == token.STRING:
                    m = re.search('\s*(?P<a>[\'\"]+)(.*?)(?P=a)\s*', tk)
                    if m:
                        tk = m.groups()[1]
                elif t != token.NAME:
                    raise IndexCreatorValueException(
                        "Wrong value to assign", self.cnt_line_nr(line, 0))

                if d[0][1] == "type":
                    if d[2][1] == "TreeBasedIndex":
                        self.custom_header.add("from CodernityDB3.tree_index import TreeBasedIndex\n")
                    elif d[2][1] == "MultiTreeBasedIndex":
                        self.custom_header.add("from CodernityDB3.tree_index import MultiTreeBasedIndex\n")
                    elif d[2][1] == "MultiHashIndex":
                        self.custom_header.add("from CodernityDB3.hash_index import MultiHashIndex\n")
                    self.tokens_head.insert(2, tk)
                    self.index_type = tk
                else:
                    self.index_name = tk
                return
            else:
                self.tokens += ['\n        kwargs["' + d[0][1] + '"]']
        else:
            raise IndexCreatorValueException("Can't handle properity assingment ", self.cnt_line_nr(d[0][4], 0))

        self.tokens += ['=']

        self.check_adjacents(d[2:], 0)
        self.check_colons(d[2:], 0)

        for i in d[2:]:
            self.tokens += [i[1]]

    def generate_func(self, t, tk, pos_start, pos_end, line, hdata, stage):
        if self.last_line[stage] != -1 and pos_start[0] > self.last_line[stage] and line != '':
            raise IndexCreatorFunctionException("This line will never be executed!", self.cnt_line_nr(line, stage))
        if t == 0:
            return

        if pos_start[1] == 0:
            if self.line_cons[stage][pos_start[0] - 1] == -1:
                self.tokens += ['\n        return']
                self.last_line[stage] = pos_start[0]
            else:
                self.tokens += ['\n        if']
        elif tk == ':' and self.line_cons[stage][pos_start[0] - 1] > -1:
            if self.line_cons[stage][pos_start[0] - 1] == 0:
                self.tokens += [':\n            return']
                return
            self.line_cons[stage][pos_start[0] - 1] -= 1

        if tk in self.logic2:
            # print tk
            if line[pos_start[1] - 1] != tk and line[pos_start[1] + 1] != tk:
                self.tokens += [tk]
            if line[pos_start[1] - 1] != tk and line[pos_start[1] + 1] == tk:
                if tk == '&':
                    self.tokens += ['and']
                else:
                    self.tokens += ['or']
            return

        if self.brackets != 0:
            def search_through_known_dicts(a):
                for i, (n, r) in self.known_dicts_in_mkv:
                    if i == tk and r > pos_start[1] and n == pos_start[0] and hdata == 'data':
                        return True
                return False

            if t == token.NAME and len(self.funcs_stack) > 0 and self.funcs_stack[-1][0] == 'md5' and search_through_known_dicts(tk):
                raise IndexCreatorValueException("Second value returned by make_key_value for sure isn't a dictionary ", self.cnt_line_nr(line, 1))

        if tk == ')':
            self.cur_brackets -= 1
            if len(self.funcs_stack) > 0 and self.cur_brackets == self.funcs_stack[-1][1]:
                self.tokens += [tk]
                self.tokens += self.funcs[self.funcs_stack[-1][0]][1]
                del self.funcs_stack[-1]
                return
        if tk == '(':
            self.cur_brackets += 1

        if tk in self.none:
            self.tokens += ['None']
            return

        if t == token.NAME and tk not in self.logic and tk != hdata:
            if tk not in self.funcs:
                self.tokens += [hdata + '["' + tk + '"]']
            else:
                self.tokens += self.funcs[tk][0]
                if tk in self.funcs_with_body:
                    self.funcs_with_body[tk] = (
                        self.funcs_with_body[tk][0], True)
                self.custom_header.add(self.handle_int_imports.get(tk))
                self.funcs_stack += [(tk, self.cur_brackets)]
        else:
            self.tokens += [tk]

    def handle_make_value(self, t, tk, pos_start, pos_end, line):
        self.generate_func(t, tk, pos_start, pos_end, line, 'data', 1)

    def handle_make_key(self, t, tk, pos_start, pos_end, line):
        self.generate_func(t, tk, pos_start, pos_end, line, 'key', 2)