#!/usr/bin/python
# -*- coding: utf-8 -*-
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published
## by the Free Software Foundation; version 2 and higer.
##
## Martin Grimme (martin.grimme # gmail.com) 2010LANG_DE = 0x0

LANG_EN = 0x1
LANG_IT = 0x2
LANG_FR = 0x3
LANG_ES = 0x4
LANG_NL = 0x5
LANG_SE = 0x6
LANG_DA = 0x7
LANG_PO = 0x8
LANG_FI = 0x9
LANG_NO = 0xa
LANG_GR = 0xb
LANG_TR = 0xc
LANG_UNSPECIFIED = 0xf


GSM_DEFAULT_ALPHABET = [
    u"@",
    u"\u00a3",
    u"$",
    u"\u00a5",
    u"\u00e8",
    u"\u00e9",
    u"\u00f9",
    u"\u00ec",
    u"\u00f2",
    u"\u00c7",
    u"\n",
    u"\u00d8",
    u"\u00f8",
    u"\r",
    u"\u00c5",
    u"\u00e5",
    
    u"\u0394",
    u"_",
    u"\u03a6",
    u"\u0393",
    u"\u039b",
    u"\u03a9",
    u"\u03a0",
    u"\u03a8",
    u"\u03a3",
    u"\u0398",
    u"\u039e",
    u" ",
    u"\u00c6",
    u"\u00e6",
    u"\u00df",
    u"\u00c9",
    
    u" ",
    u"!",
    u"\"",
    u"#",
    u"\u00a4",
    u"%",
    u"&",
    u"'",
    u"(",
    u")",
    u"*",
    u"+",
    u",",
    u"-",
    u".",
    u"/",
    
    u"0",
    u"1",
    u"2",
    u"3",
    u"4",
    u"5",
    u"6",
    u"7",
    u"8",
    u"9",
    u":",
    u";",
    u"<",
    u"=",
    u">",
    u"?",
    
    u"\u00a1",
    u"A",
    u"B",
    u"C",
    u"D",
    u"E",
    u"F",
    u"G",
    u"H",
    u"I",
    u"J",
    u"K",
    u"L",
    u"M",
    u"N",
    u"O",
    
    u"P",
    u"Q",
    u"R",
    u"S",
    u"T",
    u"U",
    u"V",
    u"W",
    u"X",
    u"Y",
    u"Z",
    u"\u00c4",
    u"\u00d6",
    u"\u00d1",
    u"\u00dc",
    u"\u00a7",

    u"\u00bf",
    u"a",
    u"b",
    u"c",
    u"d",
    u"e",
    u"f",
    u"g",
    u"h",
    u"i",
    u"j",
    u"k",
    u"l",
    u"m",
    u"n",
    u"o",

    u"p",
    u"q",
    u"r",
    u"s",
    u"t",
    u"u",
    u"v",
    u"w",
    u"x",
    u"y",
    u"z",
    u"\u00e4",
    u"\u00f6",
    u"\u00f1",
    u"\u00fc",
    u"\u00e0"
]


def decode(s, n):
    """
    Decodes the given string using the given cell broadcast data coding scheme.
    
    @param s: string to decode
    @param n: GSM cell broadcast data coding scheme
    @return: UTF-8 string
    """

    # separate into nibbles
    hbits = (n & 0xf0) >> 4
    lbits = (n & 0x0f)
    
    if (hbits == 0x0):
        # language
        return _decode_language(s, lbits)

    elif (0x1 <= hbits <= 0x3):
        # reserved language
        return s
        
    elif (0x4 <= hbits <= 0x7):
        # general data coding indication
        return _decode_general_data_coding(s, hbits, lbits)
        
    elif (0x8 <= hbits <= 0xe):
        # reserved coding group
        return s
        
    elif (hbits == 0xf):
        # data coding / message handling
        return s


def _decode_language(s, lang):

    return _decode_default_alphabet(s)


def _decode_default_alphabet(s):
    
    # TODO: we really might have to do 7 bit character unpacking here
    
    # ought to be all in the 7 bit GSM character map
    chars = [ GSM_DEFAULT_ALPHABET[ord(c)] for c in s ]
    u_str = "".join(chars)
    return u_str.encode("utf-8")


def _decode_hex(s):

    return s.decode("hex")


def _decode_usc2(s):

    return s.decode("hex").decode("utf-16-be").encode("utf-8")


def _decode_general_data_coding(s, h, l):

    is_compressed = (h & 0x2)
    
    alphabet = (l & 0xc) >> 2

    if (alphabet == 0x0):
        # default alphabet
        return _decode_defaul_alphabet(s)
        
    elif (alphabet == 0x1):
        # 8 bit
        # actually, encoding is user-defined, but let's assume hex'd ASCII
        # for now
        return _decode_hex(s)
        
    elif (alphabet == 0x2):
        # USC2 (16 bit, BE)
        return _decode_usc2(s)
    elif (alphabet == 0x3):
        # reserved
        return s

