# program/class for turning a chemical formula into a molecular mass.
# Copyright Patrick Thomson. Version pre-release.
# patrick.thomson@REMOVETHISgmail.com
# bugs, suggestions welcome but please mention molmass.py in the subject line or you will be filtered.
#
# I use the GPL pragmatically; to aid interoperability. If you are interested
# in making money from this software, so am I. However, it'd be nice to see how you change it.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
# USA

import warnings
import listas

masslist={}
for i in range(1, len(listas.sim)):
	masslist[listas.sim[i]]=float(listas.pa[i])

macrolist = {"Me": "CH3",
"Et": "CH3CH2",
"Pr": "CH3CH2CH2",
"Bu": "CH3CH2CH2CH2",
"Ph": "C6H5",
"Ac": "CH3CO",
"Ts": "CH3(C6H4)SO3",
}


class SanityError(Exception):
    """This is an exception we will raise if there is a problem - the formula might be mangled or just contain things we cannot handle."""
    pass

def sanitycheck(compound):
    allowed = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789()."
    if compound.isalnum(): return # shortcut for if it's only letters and numbers, i.e. almost none of the calls here.
    # What letters the compound is allowed to have in it.
    for i in compound:
        if i not in allowed:
            raise SanityError, "bad symbols present"
    if compound.count("(") != compound.count(")"): raise SanityError, "mismatched bracket numbers"
    # This means we've not closed all our brackets. Doing it implicitly may lead to unintuitive behaviour.
    
    if compound.count(".") > 1: raise SanityError, "more than one dot"

def splink(str):
    """This whimsically-named function will take a string and return the initial consecutive number portions as
    an integer, plus the rest of the string, or 1 plus the original string otherwise. Empty string if none present."""    
    if not str[0].isdigit(): return (1, str)
    for i in range(1, len(str)): # start at 1 because we know it's a number from above.
        if not str[i].isdigit():
            return (int(str[:i]), str[i:])
    return (int(str), "")   # Otherwise, str was only one numeric character.

def splitbrackets(string):
    """Takes a string containing one or more pairs of brackets and splits it into
    three fields: everything before the first bracket, everything between the first
    bracket and its matching closing bracket, and everything after the matching closing
    bracket. E.g. 'abc(123(456))la(789)' -> ['abc', '123(456)', 'la(789)']"""
    sanitycheck(string)
    if string.count("(") < 1: return ['', string, '']
    # at this point, all bracket pairs are matched, so only check for (. if there aren't any, return what's appropriate.
    # also sanitycheck because I'm ridiculously paranoid. This may almost certainly be omitted for speed with no negative consequences
    # but I won't be the one to do it.
    
    # Otherwise, I didn't want to iterate over the string here but we have no choice
    start, rest = string.split("(", 1)
    # start is everything before the first bracket, rest is everything after it.
    if start.count(")") > 0: raise SanityError, "too many brackets close before opening!"
    if rest.count(")") == 0: raise SanityError, "too many brackets open before closing!"

    blevel = 1
    # blevel is the current "bracket level" - we start at 1 because there's already been one (

    pointer = 0
    # pointer refers to the index of the current character in the string.

    for i in rest:
        if i == "(": blevel += 1
        if i == ")": blevel -= 1
        if blevel == 0: break
        # halt execution if the current character is the matching ) that goes with the first ( we find
        pointer += 1
        
    middle = rest[0:pointer]
    end = rest[1 + pointer:]
    # middle is everything before the closing bracket, and end is everything after it. We do +1 so that the bracket itself is omitted.

    return [start, middle, end]

class MassCalc:
    """This is the big daddy. interface:

    compute(string):        Either returns a floating point number corresponding
                            to the mass of the compound represented by string,
                            or raises a SanityError with a reason why it failed.
    answerarray:            array containing machine-readable (element, count)
                            tuples.
    formula                 string containing human readable collated alphabetised
                            formula corresponding to the input string
    setaccurate(bool):      default true boolean which determines whether to use
                            accurate masses or approximate masses (nearest 1, apart
                            from Cl and Cu.
    """

    def __init__(self):
        self.__accurate = True
    
    def __capoff(self, stack, countstack, count):
        """Caps off a set of data from below."""
        element = ''.join(stack)
        amount  = int(''.join(countstack) or 1)
        self.answerarray.append((element, amount * count))


    def setaccurate(self, boolean):
        self.__accurate = boolean

    def __setapproximate(self):
        """makes self.masslist an approximate copy."""
        self.masslist = {}
        for element, mass in masslist.iteritems():
            self.masslist[element] = round(mass)
        self.masslist["Cu"] = 63.5
        self.masslist["Cl"] = 35.5

    def compute(self, compound):

        if self.__accurate: self.masslist = masslist # Use the accurate mass list.
        else: self.__setapproximate()

        self.answerarray = [] # erase the previous result, just in case. Also define the new array if it's the first run.


        compound = compound.replace("[", "(").replace("]",")")
#       For the scope of this program, square brackets such as TM complexes
#       will lead to an identical mass calculation if considered to be round ones.
        sanitycheck(compound)


#        if compound.count("H20") > 0: warnings.warn("warning, 20 hydrogens detected. May be a mistype of water.")
# Before we do anything, do the macros.

        for shorthand, longhand in macrolist.iteritems():
            if shorthand in compound:
                compound = compound.replace(shorthand, "(" + longhand + ")" ) # put brackets to be safe.

# Sort out the dotted part first. 
        try:
            compound, dotted = compound.split(".")
            if len(dotted) == 0: raise SanityError, "nothing after dot!"
            amount, subcompound = splink(dotted)
            if not subcompound: raise SanityError, "dotted section contains only digits!"
            workingarray = [(compound, 1), (subcompound, amount)]
        except ValueError:
#           There wasn't actually a dot!
            workingarray = [(compound, 1)]


# Main calculatory loop.
# Expand out all bracket pairs.
        while True:
            newarray = []
            expandcount = 0
            for workingstring, count in workingarray:
                if workingstring.count("(") == 0:
                    if not workingstring: continue
                    # due to laziness below, some elements may be empty. Discarding them here is simpler.

                    # Otherwise, stick it back on the stack unaltered
                    newarray.append((workingstring, count))
                else:
                    expandcount += 1
                    # We've had to process a bracket pair.
                    
                    first, middle, last = splitbrackets(workingstring)
                    # This is safe, since splitbrackets always returns a 3-element array, even if some or more are empty.
                    
                    newarray.append((first, count)) # the bit before the bracket is left as-is.
                    if not last: # If there's nothing after the closing bracket
                        newarray.append((middle, count))
                        continue
                    
                    splid = splink(last) # find out how many of the middle bracket we have using splink on the last fragment
                    newarray.append((middle, count * splid[0]))
                    newarray.append((splid[1], count))
                 
            workingarray = newarray
            if expandcount == 0: break
#
# At this point, workingarray now consists solely of letter-number-letter-number sequences, along with a quantity multiplier.
#

        for workingstring, count in workingarray:
            stack = []
            countstack = []
            for i in workingstring:
                if not stack: # If we're processing a new element
                    if i.isdigit(): raise SanityError, "number at start of compound fragment"
					#Esto estropea el dialogo, habra ke cambiarlo
                    if i.islower(): raise SanityError, "lowercase letter at start of compound fragment"
                    stack.append(i)
                else:
                    if i.islower():
                        if countstack: raise SanityError, "lowercase letter after number"
                        stack.append(i)
                    elif i.isupper(): # we've encountered an uppercase character during an element, so we cap off.
                        self.__capoff(stack, countstack, count)
                        stack = [i] # start a new stack with our new element
                        countstack = []
                    elif i.isdigit():
                        countstack.append(i)
            if stack: self.__capoff(stack, countstack, count)
#
# At this point, answerarray now consists solely of element-number tuples. Elements are an uppercase letter followed by zero or more lowercase letters. Duplication may be present.
# We could calculate the mass from this, but let's sort and collate it.
#
        answerdict = {}
        for i, count in self.answerarray:
            answerdict[i] = answerdict.get(i, 0) + count # ensures that we'll only have one entry per element
        self.answerarray = answerdict.items()
        self.answerarray.sort()

#
# Produce a human-readable collated formula
#
        formula = []
        for a, b in self.answerarray:
            if b == 1: b = "" # CH3, not C1H3
            formula.append(str(a) + str(b))
        self.formula = ''.join(formula)

#
# At this point, answerarray now contains an alphabetised unique list of element, multiplier pairs.
# It may be accessed by anyone calling compute, after the actual result has been yielded.
#
        mass = 0.0
        try:
            for element, count in self.answerarray:
                mass += self.masslist[element] * count
        except KeyError: raise SanityError, "element \"%s\" not recognised!" % element
        if float(int(mass)) == mass: mass = int(mass)
        return mass
