from xml.dom import minidom

class GourmetXML:
    def initiation(self, filename):
        xml_document = minidom.parse(filename)

        title_list, category_list, preptime_list, cooktime_list, \
                    servings_list, ing_list, instructions_list, \
                    notes_list, pic_list = self.xml_load(xml_document)

        recipes_info = [title_list, category_list, preptime_list,
                        cooktime_list, servings_list, ing_list,
                        instructions_list, notes_list, pic_list]


        print recipes_info[6]

        n_items = len(title_list)
        for lista in recipes_info:
            if lista == instructions_list or lista == notes_list:
                lista = self.do_treatment_to_lists(lista, n_items, True, False)
            elif lista == ing_list:
                pass
            else:
                lista = self.do_treatment_to_lists(lista, n_items, False, False)

        #print 'antes:', ing_list
        #print 'despues: ', ing_list
        recipes_info[5] = ing_list
        #print ing_list
        #print 'lista: ', ing_list

        print recipes_info[6]

        return recipes_info

    def xml_load(self, xml_document):
        ing_list = []

        title_list = []
        category_list = []
        preptime_list = []
        cooktime_list = []
        servings_list = []
        instructions_list = []
        notes_list = []
        pic_list = []
        idd = 0
        for node in xml_document.documentElement.childNodes:
            if node.nodeName == 'recipe':
                for item_node in node.childNodes:
                    if item_node.nodeName == 'title':
                        idd += 1
                        tup = [idd, item_node.firstChild.nodeValue]
                        title_list.append(tup)
                    elif item_node.nodeName == 'category':
                        if item_node.firstChild is None:
                            pass
                        else:
                            tup = [idd, item_node.firstChild.nodeValue]
                            category_list.append(tup)
                    elif item_node.nodeName == 'preptime':
                        if item_node.firstChild is None:
                            pass
                        else:
                            tup = [idd, item_node.firstChild.nodeValue]
                            preptime_list.append(tup)
                    elif item_node.nodeName == 'cooktime':
                        if item_node.firstChild is None:
                            pass
                        else:
                            tup = [idd, item_node.firstChild.nodeValue]
                            cooktime_list.append(tup)
                    elif item_node.nodeName == 'servings':
                        if item_node.firstChild is None:
                            pass
                        else:
                            tup = [idd, item_node.firstChild.nodeValue]
                            servings_list.append(tup)
                    elif item_node.nodeName == 'image':
                        for each_node in item_node.childNodes:
                            if each_node.nodeName == "#cdata-section":
                                tup = [idd, each_node.nodeValue]
                                pic_list.append(tup)
                            else:
                                pass
                    elif item_node.nodeName == 'ingredient-list':
                        one_item = ''
                        new_list = []
                        for each_node in item_node.childNodes:
                            if each_node.nodeName == 'ingredient':
                                for new_nodes in each_node.childNodes:
                                    if new_nodes.nodeName == 'amount':
                                        if new_nodes.firstChild is None:
                                            pass
                                        else:
                                            amount = new_nodes.firstChild.nodeValue.replace('\t', '').replace('\n', '')
                                            one_item = one_item + amount + '||'
                                    elif new_nodes.nodeName == 'unit':
                                        if new_nodes.firstChild is None:
                                            pass
                                        else:
                                            unit = new_nodes.firstChild.nodeValue.replace('\t', '').replace('\n', '')
                                            one_item = one_item + unit + '||'
                                    elif new_nodes.nodeName == 'item':
                                        if new_nodes.firstChild is None:
                                            pass
                                        else:
                                            item = new_nodes.firstChild.nodeValue.replace('\t', '').replace('\n', '')
                                            one_item = one_item + item + '\n'
                                one_item = self.tratar_one_ing(one_item)
                                new_list.append(one_item)
                                one_item = ''

                            elif each_node.nodeName == 'inggroup':
                                for group_node in each_node.childNodes:
                                    if group_node.nodeName == 'ingredient':
                                        for new_nodes in group_node.childNodes:
                                            if new_nodes.nodeName == 'amount':
                                                if new_nodes.firstChild is None:
                                                    pass
                                                else:
                                                    amount = new_nodes.firstChild.nodeValue.replace('\t', '').replace('\n', '')
                                                    one_item = one_item + amount + '||'
                                            elif new_nodes.nodeName == 'unit':
                                                if new_nodes.firstChild is None:
                                                    pass
                                                else:
                                                    unit = new_nodes.firstChild.nodeValue.replace('\t', '').replace('\n', '')
                                                    one_item = one_item + unit + '||'
                                            elif new_nodes.nodeName == 'item':
                                                if new_nodes.firstChild is None:
                                                    pass
                                                else:
                                                    item = new_nodes.firstChild.nodeValue.replace('\t', '').replace('\n', '')
                                                    one_item = one_item + item + '\n'
                                        one_item = self.tratar_one_ing(one_item)
                                        new_list.append(one_item)
                                        one_item = ''


                        tup = [idd, ('').join(new_list)]
                        ing_list.append(tup)
                    elif item_node.nodeName == 'instructions':
                        if item_node.firstChild is None:
                            pass
                        else:
                            tup = [idd, item_node.firstChild.nodeValue]
                            instructions_list.append(tup)

                    elif item_node.nodeName == 'modifications':
                        if item_node.firstChild is None:
                            pass
                        else:
                            tup = [idd, item_node.firstChild.nodeValue]
                            notes_list.append(tup)

        return title_list, category_list, preptime_list, cooktime_list, \
                servings_list, ing_list, instructions_list, notes_list, pic_list


    def tratar_one_ing(self, one_item):
        import re
        #print 'antes: ', one_item
        hele = re.compile(r'\||')
        if hele.findall(one_item).count('|') == 4: #tiene cuatro |||| no hacer nada
            pass
        elif hele.findall(one_item).count('|') == 2: #tiene solo dos ||, reemplazar || por ||||
            one_item  = one_item.replace('||', '||||')
        elif hele.findall(one_item).count('|') == 0: ## anadir |||| al principio.
            one_item = '||||' + one_item

        #print 'despues: ', one_item
        return one_item

    def do_treatment_to_lists(self, lista, n_items, text_list, inglist):
        if inglist:
            pass

        else:
            for i in range(len(lista)):
                if text_list:
                    new_item = lista[i][1].replace('\t', '')
                    if new_item[:1] == '\n':
                        new_item = new_item[1:]
                else:
                    #print lista[i]
                    new_item = lista[i][1].replace('\t', '').replace('\n', '')
                lista[i][1] = new_item

            lista2 = []
            for i in range(len(lista)):
                lista2.append(lista[i][0])

            lista1 = []
            for i in range(n_items):
                lista1.append(i+1)

            for i in lista1:
                if i in lista2:
                    pass
                else:
                    lista.append([i, ''])

            lista = lista.sort()

        return lista
