#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
===============================================================
     contains utility functions to access config file
===============================================================
"""
 
__author__ = "Sannikov Alexander <sannikov@cs.karelia.ru>"
__version__ = "$Revision: 1 $"
__date__ = "$Date: 2010/08/12 $"
__copyright__ = ""
__license__ = "GPLv2"

import xml.dom.minidom
from HTMLParser import HTMLParser
import string

class CommentProcessor:

    """
     Util classe for processing of XML with comments from livejournal 
     
     @author Sannikov Alexander <sannikov@cs.karelia.ru> 
     @version 0.1
     @date 2010/08/30
    """

    def __init__(self):
        """
        Constuctor for class.

        @return None
        """

        self.umap = {0:"Anonymous"}
        self.maxid = 0
        self.current = -1
        self.changeset = {}




        
    def feedmeta(self, rawxml):
	"""
        Process new part of XML with meta-data recieved from livejournal.

        @param rawxml (string) part of XML
        @return False if processing is finished or True otherwise
        """
	print rawxml
        tdom = xml.dom.minidom.parseString(rawxml)
	try:
	    if self.maxid==0:
		self.maxid = int(tdom.getElementsByTagName("maxid")[0].childNodes[0].data)
	except x:
	    print x
	    	
	try:
	    users = tdom.getElementsByTagName("usermap")
	    for user in users:
		self.umap[int(user.getAttribute('id'))] = user.getAttribute('user')
	except:
	    pass
	  
	changes = tdom.getElementsByTagName("comment")
	for change in changes:
            try:
                posterid = int(change.getAttribute('posterid'))
            except:
                posterid = 0

            try:
                state =change.getAttribute('state')
                if state =='':
            	    state = 'A'
            except:
                state = 'A'

            try:
                parentid = int(change.getAttribute('parentid'))
            except:
                parentid = 0

            try:
                jitemid = int(change.getAttribute('jitemid'))
            except:
                jitemid = 0
            cid = int(change.getAttribute('id'))
            self.changeset[cid] = {'posterid':posterid, 'state':state, 'parentid':parentid, 'jitemid':jitemid}
            if cid > self.current:
                self.current = cid
            print self.changeset[cid]

    def feeddata(self, rawxml):
	"""
        Process new part of XML with meta-data recieved from livejournal.

        @param rawxml (string) part of XML
        @return False if processing is finished or True otherwise
        """
        #print rawxml
        tdom = xml.dom.minidom.parseString(rawxml)

        comments = tdom.getElementsByTagName("comment")
        for comment in comments:
            cid = int(comment.getAttribute('id'))
            try:
                 self.changeset[cid]['parentid']= int(comment.getAttribute('parentid'))            
            except:
                 pass
            try:
                 self.changeset[cid]['jitemid']= int(comment.getAttribute('jitemid'))   
            except:
                 pass


            """
            for i in self.changeset[cid].keys():
                print i
                print self.changeset[cid][i]
                print comment.getAttribute(i)
                try:
                    try:
                        self.changeset[cid][i] = int(comment.getAttribute(i))
                    except:
                        self.changeset[cid][i] = comment.getAttribute(i)
                except:
                    pass
            """
            if self.changeset[cid]['state']=='A':
                self.changeset[cid]['body'] = comment.getElementsByTagName("body")[0].childNodes[0].data
                self.changeset[cid]['date'] = comment.getElementsByTagName("date")[0].childNodes[0].data
                try:
                    self.changeset[cid]['subject'] = comment.getElementsByTagName("subject")[0].childNodes[0].data
                except:
                    self.changeset[cid]['subject'] = None
	    #print self.changeset[cid]
	    if cid>self.current:
		self.current = cid
	    
class CHTMLParser(HTMLParser):

    """
     class for parsing HTML pages
     
     @author Sannikov Alexander <sannikov@cs.karelia.ru> 
     @version 0.1
     @date 2010/08/30
    """
    def __init__(self):
        """
        Constuctor for class.

        @return None
        """
        HTMLParser.__init__(self)
        #super(HTMLParser,self).__init__()
        self.form_in_process = False
        self.request ='http://livejournal.com/'
        self.params = {}

    def handle_starttag(self, tag, attrs):
        if tag=='form':
            self.form_in_process = True
            for i in attrs:
                if i[0]=='action':
                    self.request=self.request+i[1]

        par_nm = ''
        par_val = ''
        if tag=='input' and self.form_in_process:
            for i in attrs:
                if i[0]=='name':
                    par_nm = i[1]
                if i[0]=='value':
                    par_val = i[1]
            self.params[par_nm] = par_val

    def handle_startendtag(self, tag, attrs):
        self.handle_starttag(tag, attrs)
        self.handle_endtag(tag)

    def handle_endtag(self, tag):
        if tag=='form':
            self.form_in_process = False
            

           
