/**
 * @file common.c common routines for Liferea
 * 
 * Copyright (C) 2003, 2004 Lars Lindner <lars.lindner@gmx.net>
 * Copyright (C) 2004       Karl Soderstrom <ks@xanadunet.net>
 *
 * parts of the RFC822 timezone decoding were taken from the gmime 
 * source written by 
 *
 * Authors: Michael Zucchi <notzed@helixcode.com>
 *          Jeffrey Stedfast <fejj@helixcode.com>
 *
 * Copyright 2000 Helix Code, Inc. (www.helixcode.com)
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

#define _XOPEN_SOURCE           /* glibc2 needs this (man strptime) */
#define _GNU_SOURCE
#include <config.h>

#include <libxml/xmlerror.h>
#include <libxml/uri.h>
#include <libxml/parser.h>
#include <libxml/entities.h>
#include <libxml/HTMLparser.h>
#include <hildon/hildon-helper.h>
#include <glib.h>
#include <sys/stat.h>
#include <locale.h>
#include <time.h>
#include <stdlib.h>
#include <ctype.h>
#include <langinfo.h>
#include "support.h"
#include "common.h"
#include "conf.h"
#include "support.h"
#include "feed.h"
#include "debug.h"
#include <string.h>

#include <osso-log.h>

#include <osso-rss-feed-reader/cache_handling.h>

extern int setenv(__const char *__name, __const char *__value, int __replace)
    __THROW;

  extern int unsetenv(__const char *__name) __THROW;

  static gchar *standard_encoding = { "UTF-8" };



gchar *convertCharSet(gchar * from_encoding, gchar * to_encoding,
                      gchar * string);
gboolean feed_check_if_allowed(gchar * start_string);
xmlEntityPtr common_process_entities(void *ctxt, const xmlChar * name);
int
windowsToUTF8(unsigned char *out, int *outlen,
              const unsigned char *in_old, int *inlen);
unsigned char *replace_string(unsigned char *in,
                              const unsigned char *old_string,
                              const unsigned char *replaced_string);
void unhtmlizeHandleCharacters(void *user_data, const xmlChar * string,
                               int length);
void unhtmlizeHandleSAXStartElement(void *user_data, const xmlChar * name,
                                    const xmlChar ** atts);

/************************************************************************/
/*                        PRIVATE FUNCTIONS                             */
/************************************************************************/

/* this table of RFC822 timezones is from gmime-utils.c of the gmime API */
static struct {
    char *name;
    int offset;
} tz_offsets[] = {
    {
    "UT", 0}, {
    "GMT", 0}, {
    "EST", -500},               /* these are all US timezones.  bloody yanks */
    {
    "EDT", -400}, {
    "CST", -600}, {
    "CDT", -500}, {
    "MST", -700}, {
    "MDT", -600}, {
    "PST", -800}, {
    "PDT", -700}, {
    "Z", 0}, {
    "A", -100}, {
    "M", -1200}, {
    "N", 100}, {
    "Y", 1200}
};

static gchar *trim_left(gchar * string)
{
    ULOG_DEBUG("trim_left: before = \"%s\"", string);

    gchar *p = string, *q = string;

    while ((*p != 0) &&
           ((*p == '\t') || (*p == ' ') || (*p == '\r') || (*p == '\n')
           )) {
        p++;
    }

    if ((*p == 0) || (p == q))
        return string;

    while (*p != 0) {
        *q++ = *p++;
    }
    *q = 0;

    ULOG_DEBUG("trim_left: after = \"%s\"", string);
    return string;
}

static gchar *trim_right(gchar * string)
{
    ULOG_DEBUG("trim_right: before = \"%s\"", string);
    gchar *p = string + strlen(string) - 1;

    while ((p > string) &&
           ((*p == '\t') || (*p == ' ') || (*p == '\r') || (*p == '\n')
           )) {
        *p = 0;
        p--;
    }
    ULOG_DEBUG("trim_right: after = \"%s\"", string);

    return string;
}

/**
 * Error buffering function to be registered by 
 * xmlSetGenericErrorFunc(). This function is called on
 * each libxml2 error output and collects all output as
 * HTML in the buffer ctxt points to. 
 *
 * @param	ctxt	error context
 * @param	msg	printf like format string 
 */
static void bufferParseError(void *ctxt, const gchar * msg, ...)
{
    va_list params;
    errorCtxtPtr errors = (errorCtxtPtr) ctxt;
    gchar *newmsg = NULL;
    gchar *tmp = NULL;

    g_assert(NULL != errors);

    if (MAX_PARSE_ERROR_LINES > errors->errorCount++) {

        va_start(params, msg);
        newmsg = g_strdup_vprintf(msg, params);
        va_end(params);

        g_assert(NULL != newmsg);
        newmsg = utf8_fix(newmsg);
        tmp = g_markup_escape_text(newmsg, -1);
        g_free(newmsg);
        newmsg = tmp;

        addToHTMLBufferFast(&errors->buffer, "<pre>");
        addToHTMLBufferFast(&errors->buffer, newmsg);
        addToHTMLBufferFast(&errors->buffer, "</pre>");

        g_free(newmsg);
    }

    if (MAX_PARSE_ERROR_LINES == errors->errorCount) {
        newmsg =
            g_strdup_printf("%s<br/>%s", errors->buffer,
                            "[Parser error output was truncated]");
        g_free(errors->buffer);
        errors->buffer = newmsg;
    }
}

/** @returns timezone offset in seconds */
static time_t common_parse_rfc822_tz(unsigned char *token)
{
    int offset = 0;
    const unsigned char *inptr = token;

    if (*inptr == '+' || *inptr == '-') {
        offset = atoi(inptr);
    } else {
        int t;

        if (*inptr == '(')
            inptr++;

        for (t = 0; t < 15; t++)
            if (!strncmp
                (inptr, tz_offsets[t].name, strlen(tz_offsets[t].name)))
                offset = tz_offsets[t].offset;
    }

    return 60 * ((offset / 100) * 60 + (offset % 100));
}

static gchar *byte_to_hex(unsigned char nr)
{
    gchar *result = NULL;

    result = g_strdup_printf("%%%x%x", nr / 0x10, nr % 0x10);
    return result;
}

/************************************************************************/
/*                        PUBLIC FUNCTIONS                              */
/************************************************************************/

gchar *remove_newlines_and_extra_spaces(gchar * string)
{
    if (string == NULL)
        return NULL;

    gchar *p = string, *q = string;
    gchar first_character = 1;
    gboolean last_was_space = FALSE;

    while (*p != 0) {
        gchar c = *p++;
        if (c != '\x0A' && c != '\x0D') {
            if (((c != ' ') && (c != '\t')) || (last_was_space == FALSE)) {
                *q++ = c;
            }

            if ((c == ' ') || (c == '\t'))
                last_was_space = TRUE;
            else
                last_was_space = FALSE;
        } else {
            //             if(c == '\x0D') {
            //                 ULOG_DEBUG("Adding newline character");
            //                 *q++ = '\n';
            //                 last_was_space = TRUE;
            //             }
            if (!last_was_space && !first_character) {
                *q++ = ' ';
                last_was_space = TRUE;
            }
        }
        first_character = 0;
    }
    *q = 0;

    return string;
}

gchar *remove_newlines(gchar * string)
{
    gchar *p = string, *q = string;

    while (*p != 0) {
        gchar c = *p++;
        if (c != '\n')
            *q++ = c;

    }
    *q = 0;
    return string;
}

gchar *trim_whitespaces(gchar * string)
{
    return trim_left(trim_right(string));
}

void addToHTMLBufferFast(gchar ** buffer, const gchar * string)
{

    if (NULL == string)
        return;

    if (NULL != *buffer) {
        int oldlength = strlen(*buffer);
        int newlength = strlen(string);
        /* Round up to nearest 512 KB */
        int allocsize = (((oldlength + newlength + 1) / 512) + 1) * 512;
        *buffer = g_realloc(*buffer, allocsize);
        g_memmove(&((*buffer)[oldlength]), string, newlength + 1);
    } else {
        *buffer = g_strdup(string);
    }
}

void addToHTMLBuffer(gchar ** buffer, const gchar * string)
{

    if (NULL == string)
        return;

    if (NULL != *buffer) {
        int oldlength = strlen(*buffer);
        int newlength = strlen(string);
        int allocsize = (oldlength + newlength + 1);
        *buffer = g_realloc(*buffer, allocsize);
        g_memmove(&((*buffer)[oldlength]), string, newlength + 1);
    } else {
        *buffer = g_strdup(string);
    }
}

/* converts the string string encoded in from_encoding (which
 * can be NULL) to to_encoding, frees the original string and 
 * returns the result */
gchar *convertCharSet(gchar * from_encoding, gchar * to_encoding,
                      gchar * string)
{
    gint bw = 0, br = 0;
    gchar *new = NULL;
    GError *err = NULL;

    if (NULL == from_encoding)
        from_encoding = standard_encoding;

    if (NULL != string) {
        new =
            g_convert(string, strlen(string), to_encoding, from_encoding,
                      &br, &bw, &err);
        if (err != NULL) {
            g_warning("error converting character set: %s\n", err->message);
            g_error_free(err);
        }
        if (NULL != new)
            g_free(string);
        else
            new = string;
    } else {
        return g_strdup("");
    }

    return new;
}

gchar *convertToHTML(gchar * string)
{
    return string;
}

/* Conversion function which should be applied to all read XML strings, 
 * to ensure proper UTF8. This is because we use libxml2 in recovery
 * mode which can produce invalid UTF-8. 
 * 
 * The valid or a corrected string is returned. The original XML 
 * string is modified */
gchar *utf8_fix(xmlChar * string)
{
    const gchar *invalid_offset = NULL;

    if (NULL == string)
        return NULL;

    if (!g_utf8_validate(string, -1, &invalid_offset)) {
        /* if we have an invalid string we try to shorten
         * it until it is valid UTF-8 */
        debug0(DEBUG_PARSING, "parser delivered invalid UTF-8!");
        debug1(DEBUG_PARSING, "	>>>%s<<<\n", string);
        debug1(DEBUG_PARSING, "first invalid char is: >>>%s<<<\n",
               invalid_offset);
        debug0(DEBUG_PARSING, "removing invalid bytes");

        do {
            memmove((void *) invalid_offset, invalid_offset + 1,
                    strlen(invalid_offset + 1) + 1);
        } while (!g_utf8_validate(string, -1, &invalid_offset));

        debug0(DEBUG_PARSING, "result is:\n");
        debug1(DEBUG_PARSING, "	>>>%s<<<\n", string);
    }

    return string;
}

gchar *extractHTMLNode(xmlNodePtr cur, gboolean children)
{
    xmlBufferPtr buf = NULL;
    gchar *result = NULL;

    buf = xmlBufferCreate();
    if (children) {
        cur = cur->xmlChildrenNode;
        while (cur != NULL) {
            xmlNodeDump(buf, cur->doc, cur, 0, 0);
            cur = cur->next;
        }
    } else {
        xmlNodeDump(buf, cur->doc, cur, 0, 0);
    }
    if (xmlBufferLength(buf) > 0)
        result = xmlCharStrdup(xmlBufferContent(buf));

    xmlBufferFree(buf);

    return result;
}

void unhtmlizeHandleSAXStartElement(void *user_data, const xmlChar * name,
                                    const xmlChar ** atts)
{
    result_buffer *buffer = (result_buffer *) user_data;
    gint old_length = 0;

    if ((strcmp(name, "br") == 0) || (strcmp(name, "BR") == 0) ||
        (strcmp(name, "p") == 0) || (strcmp(name, "P") == 0)
        ) {
        old_length = buffer->length;
        buffer->length++;
        buffer->data = g_renew(gchar, buffer->data, buffer->length + 1);
        strncpy(buffer->data + old_length, "\n", 1);
        buffer->data[buffer->length] = 0;
    }
}


void unhtmlizeHandleCharacters(void *user_data, const xmlChar * string,
                               int length)
{
    result_buffer *buffer = (result_buffer *) user_data;
    gint old_length = 0;

    old_length = buffer->length;
    buffer->length += length;
    buffer->data = g_renew(gchar, buffer->data, buffer->length + 1);
    strncpy(buffer->data + old_length, (gchar *) string, length);
    buffer->data[buffer->length] = 0;

}

/* Converts a UTF-8 strings containing any HTML stuff to 
 * a string without any entities or tags containing all
 * text nodes of the given HTML string. The original 
 * string will be freed. 
 * 
 * NOTE TO SELF: this is used from various different places.
 * Check what needs to be done to have external links processed
 * in this and converted to GtkEventBoxes in GtkTextBuffer.
 * 
 */
gchar *unhtmlize(gchar * string)
{
    htmlSAXHandlerPtr sax_p = NULL;
    htmlParserCtxtPtr ctxt = NULL;
    gchar *result = NULL;
    result_buffer *buffer = NULL;

    if (NULL == string)
        return NULL;

    string = utf8_fix(string);

    /* only do something if there are any entities or tags */
    if (NULL == (strpbrk(string, "&<>")))
        return string;

    buffer = g_new0(result_buffer, 1);
    sax_p = g_new0(htmlSAXHandler, 1);
    sax_p->characters = unhtmlizeHandleCharacters;
    sax_p->startElement = unhtmlizeHandleSAXStartElement;

    /* in older versions htmlSAXParseDoc was used which caused
     * strange crashes when freeing the parser context... */

    ctxt =
        htmlCreatePushParserCtxt(sax_p, buffer, string, strlen(string), "",
                                 XML_CHAR_ENCODING_UTF8);
    htmlParseChunk(ctxt, string, 0, 1);
    htmlFreeParserCtxt(ctxt);
    result = buffer->data;
    g_free(buffer);
    g_free(sax_p);

    if (result == NULL || !g_utf8_strlen(result, -1)) {
        /* Something went wrong in the parsing.
         * Use original string instead */
        g_free(result);
        return string;
    } else {
        g_free(string);
        return result;
    }
}

/* same with unhtmlize, except for using xml.
 * and no encoding required (XML_CHAR_ENCODING_UTF8)
 */
gchar *unxmlize(gchar * string)
{
    xmlSAXHandlerPtr sax_p = NULL;
    xmlParserCtxtPtr ctxt = NULL;
    gchar *result = NULL;
    result_buffer *buffer = NULL;

    if (NULL == string)
        return NULL;

    string = utf8_fix(string);

    /* only do something if there are any entities or tags */
    if (NULL == (strpbrk(string, "&<>")))
        return string;

    buffer = g_new0(result_buffer, 1);
    sax_p = g_new0(xmlSAXHandler, 1);
    sax_p->characters = unhtmlizeHandleCharacters;
    sax_p->startElement = unhtmlizeHandleSAXStartElement;

    /* in older versions htmlSAXParseDoc was used which caused
     * strange crashes when freeing the parser context... */

    ctxt = xmlCreatePushParserCtxt(sax_p, buffer, string, strlen(string), "");
    xmlParseChunk(ctxt, string, 0, 1);
    xmlFreeParserCtxt(ctxt);
    result = buffer->data;
    g_free(buffer);
    g_free(sax_p);

    if (result == NULL || !g_utf8_strlen(result, -1)) {
        /* Something went wrong in the parsing.
         * Use original string instead */
        g_free(result);
        return string;
    } else {
        g_free(string);
        return result;
    }
}


unsigned char *replace_string(unsigned char *in,
                              const unsigned char *old_string,
                              const unsigned char *replaced_string)
{
    gchar **split = NULL;
    gchar *done = NULL;

    if (in == NULL)
        return NULL;

    split = g_strsplit(in, old_string, -1);

    if (split == NULL)
        return in;

    done = g_strjoinv(replaced_string, split);

    g_strfreev(split);
    g_free(in);

    return done;
}

int
windowsToUTF8(unsigned char *out, int *outlen,
              const unsigned char *in_old, int *inlen)
{
    unsigned char *in = NULL;
    unsigned char *outstart = NULL;
    unsigned char *base = NULL;
    unsigned char *outend = NULL;
    unsigned char *inend = NULL;
    unsigned char *instop = NULL;

    in = g_strdup(in_old);

    in = replace_string(in, "&amp;#128", "&amp;#8364");
    in = replace_string(in, "&amp;#130", "&amp;#8218");
    in = replace_string(in, "&amp;#145", "&amp;#8216");
    in = replace_string(in, "&amp;#146", "&amp;#8217");
    in = replace_string(in, "&amp;#147", "&amp;#8220");
    in = replace_string(in, "&amp;#148", "&amp;#8221");
    in = replace_string(in, "&amp;#150", "&amp;#8211");
    in = replace_string(in, "&amp;#151", "&amp;#8212");
    in = replace_string(in, "&amp;#153", "&amp;#8482");

    outstart = out;
    base = in;
    outend = out + *outlen;
    inend = in + strlen(in);
    instop = inend;

    while (in < inend && out < outend - 1) {
        if (*in >= 0xA0) {
            *out++ = (((*in) >> 6) & 0x1F) | 0xC0;
            *out++ = ((*in) & 0x3F) | 0x80;
            ++in;
        }

        if (instop - in > outend - out)
            instop = in + (outend - out);
        /*Fixes for a crash in rendering the feed. */
        //while (in < instop && *in < 0x80) {
        while (in < instop && *in < 0xA0) {
            *out++ = *in++;
        }
    }
    if (in < inend && out < outend && *in < 0x80) {
        *out++ = *in++;
    }

    *outlen = out - outstart;
    *inlen = in - base;
    g_free(base);

    return (0);
}

static int common_read_packet(char *data, char *mem, int size)
{
    int res = 0;

    while (res < size) {
        if (data[res] > 0)
            mem[res] = data[res];
        else {
            break;
        }
        res++;
    }
    return (res);
}

static xmlDocPtr common_create_xmldoc(gchar * data, gint size)
{
    xmlParserCtxtPtr ctxt;
    gchar chars[4];
    xmlDocPtr doc;              /* the resulting document tree */
    gint res;
    gint remaining_size = size;

    /*
     * Read a few first byte to check the input used for the
     * encoding detection at the parser level.
     */
    res =
        common_read_packet(data, chars,
                           remaining_size >= 4 ? 4 : remaining_size);
    data += res;
    remaining_size -= res;
    if (res <= 0) {
        g_message("Failed to parse 1\n");
        return NULL;
    }

    /*
     * Create a progressive parsing context, the 2 first arguments
     * are not used since we want to build a tree and not use a SAX
     * parsing interface. We also pass the first bytes of the document
     * to allow encoding detection when creating the parser but this
     * is optional.
     */
    ctxt = xmlCreatePushParserCtxt(NULL, NULL, chars, res, NULL);
    if (ctxt == NULL) {
        return NULL;
    }

    /*
     * loop on the input getting the document data, of course 4 bytes
     * at a time is not realistic but allows to verify testing on small
     * documents.
     */
    while ((res =
            common_read_packet(data, chars,
                               remaining_size >= 4 ? 4 : remaining_size)) >
           0) {
        xmlParseChunk(ctxt, chars, res, 0);
        data += res;
        remaining_size -= res;
        if (remaining_size < 0)
            break;
    }

    /*
     * there is no more input, indicate the parsing is finished.
     */
    xmlParseChunk(ctxt, chars, 0, 1);

    /*
     * collect the document back and if it was wellformed
     * and destroy the parser context.
     */
    doc = ctxt->myDoc;
    res = ctxt->wellFormed;
    xmlFreeParserCtxt(ctxt);


    return doc;
}


/**
 * Common function to create a XML DOM object from a given
 * XML buffer. This function sets up a parser context,
 * enables recovery mode and sets up the error handler.
 * 
 * The function returns a XML document pointer or NULL
 * if the document could not be read. It also sets 
 * errormsg to the last error messages on parsing
 * errors. 
 *
 * @param data		XML source
 * @param dataLength the length of the data string in bytes
 * @param errormsg	error buffer
 *
 * @return XML document
 */
xmlDocPtr parseBuffer(gchar * data, size_t dataLength, gchar ** errormsg)
{
    errorCtxtPtr errors = NULL;
    xmlDocPtr doc = NULL;

    g_assert(NULL != data);

    /* xmlCreateMemoryParserCtxt() doesn't like no data */
    if (0 == dataLength) {
        g_warning("parseBuffer(): Empty input!\n");
        *errormsg = g_strdup("parseBuffer(): Empty input!\n");
        return NULL;
    }

    errors = g_new0(struct errorCtxt, 1);
    /*tvh: TODO, even for Liferea...
     * This GenericErrorFunc only handles the not-yet-defined entities error
     * If the entity is an invalid xmlchar, like "&#22;" (Idon't know why), but
     * it would stop parsing and trunkcate the remaining text of the desription
     */
    xmlSetGenericErrorFunc(errors, (xmlGenericErrorFunc) bufferParseError);
    /*tvh: liferea 0.9.1 uses predefined set of entities :)
     */
    doc = common_create_xmldoc(data, dataLength);

    if (doc == NULL) {
        g_warning("xmlReadMemory: Could not parse document!\n");
        *errormsg =
            g_strdup_printf
            ("\nxmlReadMemory(): Could not parse document: \n %s%s",
             errors->buffer != NULL ? errors->buffer : "",
             errors->buffer != NULL ? "\n" : "");
        ULOG_DEBUG("Error message: %s", *errormsg);
        g_free(errors->buffer);
        errors->buffer = *errormsg;
    }
    /* This seems to reset the errorfunc to its default, so that the
     * GtkHTML2 module is not unhappy because it also tries to call the
     * errorfunc on occasion. */
    xmlSetGenericErrorFunc(NULL, NULL);

    *errormsg = errors->buffer;

    g_free(errors);
    /*tvh: life0.9.1's newly added ctxt */
    return doc;
}

void register_encoding_handlers()
{
    xmlNewCharEncodingHandler("windows-1252", windowsToUTF8, NULL);
}

/* converts a ISO 8601 time string to a time_t value */
time_t parseISO8601Date(gchar * date)
{
    struct tm tm;
    time_t t, t2, offset = 0;
    gboolean success = FALSE;
    gchar *pos = NULL;

    memset(&tm, 0, sizeof(struct tm));

    /* we expect at least something like "2003-08-07T15:28:19" and
     * don't require the second fractions and the timezone info
     * 
     * the most specific format:   YYYY-MM-DDThh:mm:ss.sTZD
     */

    /* full specified variant */
    if (NULL !=
        (pos = strptime((const char *) date, "%t%Y-%m-%dT%H:%M%t", &tm))) {
        /* Parse seconds */
        if (*pos == ':')
            pos++;
        if (isdigit(pos[0]) && !isdigit(pos[1])) {
            tm.tm_sec = pos[0] - '0';
            pos++;
        } else if (isdigit(pos[0]) && isdigit(pos[1])) {
            tm.tm_sec = 10 * (pos[0] - '0') + pos[0] - '0';
            pos += 2;
        }
        /* Parse timezone */
        if (*pos == 'Z')
            offset = 0;
        else if ((*pos == '+' || *pos == '-') && isdigit(pos[1])
                 && isdigit(pos[2]) && strlen(pos) >= 3) {
            offset = (10 * (pos[1] - '0') + (pos[2] - '0')) * 60 * 60;

            if (pos[3] == ':' && isdigit(pos[4]) && isdigit(pos[5]))
                offset += (10 * (pos[4] - '0') + (pos[5] - '0')) * 60;
            else if (isdigit(pos[3]) && isdigit(pos[4]))
                offset += (10 * (pos[3] - '0') + (pos[4] - '0')) * 60;

            offset *= (pos[0] == '+') ? 1 : -1;

        }
        success = TRUE;
        /* only date */
    } else if (NULL != strptime((const char *) date, "%t%Y-%m-%d", &tm))
        success = TRUE;
    /* there were others combinations too... */

    if (TRUE == success) {
        if ((time_t) (-1) != (t = mktime(&tm))) {
            /* Correct for the local timezone */
            t = t - offset;
            t2 = mktime(gmtime(&t));
            t = t - (t2 - t);

            return t;
        } else {
            g_message
                ("internal error! time conversion error! mktime failed!\n");
        }
    } else {
        g_message
            ("Invalid ISO8601 date format! Ignoring <dc:date> information!\n");
    }

    return 0;
}

gchar *dayofweek[] = { "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" };
gchar *months[] =
    { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct",
    "Nov", "Dec"
};

gchar *createRFC822Date(const time_t * time)
{
    struct tm *tm = NULL;

    tm = gmtime(time);          /* No need to free because it is statically allocated */
    return g_strdup_printf("%s, %2d %s %4d %02d:%02d:%02d GMT",
                           dayofweek[tm->tm_wday], tm->tm_mday,
                           months[tm->tm_mon], 1900 + tm->tm_year,
                           tm->tm_hour, tm->tm_min, tm->tm_sec);
}

/* converts a RFC822 time string to a time_t value */
time_t parseRFC822Date(gchar * date)
{
    struct tm tm;
    time_t t, t2;
    char *oldlocale = NULL;
    char *pos = NULL;
    gboolean success = FALSE;

    memset(&tm, 0, sizeof(struct tm));

    /* we expect at least something like "03 Dec 12 01:38:34" 
     * and don't require a day of week or the timezone
     * 
     * the most specific format we expect:  "Fri, 03 Dec 12 01:38:34 CET"
     */
    /* skip day of week */
    if (NULL != (pos = g_utf8_strchr(date, -1, ',')))
        date = ++pos;

    /* we expect English month names, so we set the locale */
    oldlocale = g_strdup(setlocale(LC_TIME, NULL));
    setlocale(LC_TIME, "C");

    /* standard format with 2 digit year */
    if (NULL != (pos = strptime((const char *) date, "%d %b %y %T", &tm)))
        success = TRUE;
    /* non-standard format with 4 digit year */
    else if (NULL !=
             (pos = strptime((const char *) date, "%d %b %Y %T", &tm)))
        success = TRUE;
    /* standard format with 2 digit year with no seconds */
    else if (NULL !=
             (pos = strptime((const char *) date, "%d %b %y %R", &tm)))
        success = TRUE;
    /* non-standard format with 4 digit year with no seconds */
    else if (NULL !=
             (pos = strptime((const char *) date, "%d %b %Y %R", &tm)))
        success = TRUE;

    while (pos != NULL && *pos != '\0' && isspace((int) *pos))  /* skip whitespaces before timezone */
        pos++;

    if (NULL != oldlocale) {
        setlocale(LC_TIME, oldlocale);  /* and reset it again */
        g_free(oldlocale);
    }

    if (TRUE == success) {
        if ((time_t) (-1) != (t = mktime(&tm))) {
            /* GMT time, with no daylight savings time
             * correction. (Usually, there is no daylight savings
             * time since the input is GMT.) */
            t = t - common_parse_rfc822_tz(pos);
            t2 = mktime(gmtime(&t));
            t = t - (t2 - t);
            return t;
        } else
            g_warning
                ("internal error! time conversion error! mktime failed!\n");
    } else {

        /*  g_message
         * ("Invalid RFC822 date format! Ignoring date information!\n"); */
    }

    return 0;
}



gchar *encode_uri_string(gchar * string)
{
    gchar *newURIString = NULL;
    gchar *hex, *tmp = NULL;
    int i = 0, j = 0, len = 0, bytes = 0;

    /* the UTF-8 string is casted to ASCII to treat
     * the characters bytewise and convert non-ASCII
     * compatible chars to URI hexcodes */
    newURIString = g_strdup("");
    len = strlen(string);
    for (i = 0; i < len; i++) {
        if (g_ascii_isalnum(string[i])
            || strchr("-_.!~*'()", (int) string[i]))
            tmp = g_strdup_printf("%s%c", newURIString, string[i]);
        else if (string[i] == ' ')
            tmp = g_strdup_printf("%s%%20", newURIString);
        else if ((unsigned char) string[i] <= 127) {
            tmp = g_strdup_printf(newURIString, hex = byte_to_hex(string[i]));
            g_free(hex);
        } else {
            bytes = 0;
            if (((unsigned char) string[i] >= 192)
                && ((unsigned char) string[i] <= 223))
                bytes = 2;
            else if (((unsigned char) string[i] > 223)
                     && ((unsigned char) string[i] <= 239))
                bytes = 3;
            else if (((unsigned char) string[i] > 239)
                     && ((unsigned char) string[i] <= 247))
                bytes = 4;
            else if (((unsigned char) string[i] > 247)
                     && ((unsigned char) string[i] <= 251))
                bytes = 5;
            else if (((unsigned char) string[i] > 247)
                     && ((unsigned char) string[i] <= 251))
                bytes = 6;

            if (0 != bytes) {
                if ((i + (bytes - 1)) > len) {
                    g_warning
                        ("Unexpected end of character sequence or corrupt UTF-8"
                         "encoding! Some characters were dropped!");
                    break;
                }

                for (j = 0; j < (bytes - 1); j++) {
                    tmp = g_strdup_printf("%s%s", newURIString, hex =
                                          byte_to_hex((unsigned char)
                                                      string[i++]));
                    g_free(hex);
                    g_free(newURIString);
                    newURIString = tmp;
                }
                tmp = g_strdup_printf("%s%s", newURIString, hex =
                                      byte_to_hex((unsigned char)
                                                  string[i]));
                g_free(hex);
            } else {
                /* sh..! */
                ULOG_ERR
                    ("Internal error while converting UTF-8 chars to HTTP URI!");
            }
        }
        g_free(newURIString);
        newURIString = tmp;
    }
    g_free(string);

    return newURIString;
}

/* to correctly escape and expand URLs, does not touch the
 * passed strings */
xmlChar *common_build_url(const gchar * url, const gchar * baseURL)
{
    xmlChar *escapedURL = NULL, *absURL = NULL, *escapedBaseURL = NULL;

    escapedURL = xmlURIEscape(url);

    if (NULL != baseURL) {
        escapedBaseURL = xmlURIEscape(baseURL);
        absURL = xmlBuildURI(escapedURL, escapedBaseURL);
        xmlFree(escapedURL);
        xmlFree(escapedBaseURL);
    } else {
        absURL = escapedURL;
    }

    return absURL;
}

gchar *filter_title(gchar * title)
{
    return g_strstrip(title);
}

#ifndef HAVE_STRSEP
/* code taken from glibc-2.2.1/sysdeps/generic/strsep.c */
char *strsep(char **stringp, const char *delim)
{
    char *begin = NULL, *end = NULL;

    begin = *stringp;
    if (begin == NULL)
        return NULL;

    /* A frequent case is when the delimiter string contains only one
     * character.  Here we don't need to call the expensive `strpbrk'
     * function and instead work using `strchr'.  */
    if (delim[0] == '\0' || delim[1] == '\0') {
        char ch = delim[0];

        if (ch == '\0')
            end = NULL;
        else {
            if (*begin == ch)
                end = begin;
            else if (*begin == '\0')
                end = NULL;
            else
                end = strchr(begin + 1, ch);
        }
    } else
        /* Find the end of the token.  */
        end = strpbrk(begin, delim);

    if (end) {
        /* Terminate the token and set *STRINGP past NUL character.  */
        *end++ = '\0';
        *stringp = end;
    } else
        /* No more delimiters; this is the last token.  */
        *stringp = NULL;
    return begin;
}
#endif                          /*HAVE_STRSEP */

/* Taken from gaim 24 June 2004, copyrighted by the gaim developers
 * under the GPL, etc.... */
gchar *strreplace(const char *string, const char *delimiter,
                  const char *replacement)
{
    gchar **split;
    gchar *ret = NULL;

    if (string == NULL)
        return NULL;

    if (delimiter == NULL)
        return NULL;

    if (replacement == NULL)
        return NULL;

    split = g_strsplit(string, delimiter, 0);
    ret = g_strjoinv(replacement, split);
    g_strfreev(split);

    return ret;
}

gchar *get_logical_color(gchar * logical)
{
    GtkWidget *widget = NULL;
    static gchar color[8];

    g_assert(NULL != logical);

    widget = gtk_label_new("");

    g_assert(NULL != widget);

    hildon_helper_set_logical_color(widget, GTK_RC_FG,
                                    GTK_STATE_NORMAL, logical);

    g_snprintf(color, 8, "#%02X%02X%02X",
               widget->style->fg->red / 255,
               widget->style->fg->green / 255, widget->style->fg->blue / 255);

    /*ULOG_DEBUG("%s: %s (%d, %d, %d)", logical, color,
     * widget->style->fg->red,
     * widget->style->fg->green, widget->style->fg->blue); */

    return color;
}

gchar *get_localized_time(time_t time)
{
    struct tm *tm = NULL;

    gchar *time_string = NULL;
    gchar *time_format = g_strdup(nl_langinfo(T_FMT));
    gchar ts_with_secs[TIMESTRLEN + 1] = { "" };
    gchar ts_without_secs[TIMESTRLEN + 1] = { "" };

    int i = 0, j = 0;
    int nr_colon = 0;
    gboolean ignore = FALSE;
    gboolean add_space = FALSE;

    tm = localtime(&time);

    strftime(ts_with_secs, TIMESTRLEN, time_format, tm);

    for (i = 0; i < TIMESTRLEN + 1; i++) {
        if (ts_with_secs[i] == ':')
            nr_colon++;

        if (nr_colon == 2) {
            ignore = TRUE;
            nr_colon = 0;
        }

        if (ignore && ts_with_secs[i] != ':'
            && (ts_with_secs[i] == ' ' || ts_with_secs[i] == '\0'
                || ts_with_secs[i] > 57)) {
            if (ts_with_secs[i] > 57)
                add_space = TRUE;
            ignore = FALSE;
        }

        if (!ignore) {
            if (add_space) {
                ts_without_secs[j++] = ' ';
                add_space = FALSE;
            }
            ts_without_secs[j++] = ts_with_secs[i];
        }

        if (ts_with_secs[i] == '\0')
            break;
    }

    g_free(time_format);

    time_string = g_strdup(ts_without_secs);
    time_string = g_strstrip(time_string);

    return time_string;
}

gchar *get_localized_date(time_t time, gboolean years)
{
    struct tm *tm = NULL;

    gchar *date_format = g_strdup(nl_langinfo(D_FMT));
    gchar *df_no_years = NULL;
    gchar **df_split = NULL;
    gchar date_string[TIMESTRLEN + 1];

    tm = localtime(&time);

    strftime(date_string, TIMESTRLEN, date_format, tm);

    if (!years) {
        df_split = g_strsplit_set(date_string, " /-.", -1);

        if (g_strv_length(df_split) >= 2) {
            gchar *separator = NULL;

            if (g_strstr_len(date_string, TIMESTRLEN + 1, " ") != NULL)
                separator = g_strdup(" ");
            else if (g_strstr_len(date_string, TIMESTRLEN + 1, "/") != NULL)
                separator = g_strdup("/");
            else if (g_strstr_len(date_string, TIMESTRLEN + 1, "-") != NULL)
                separator = g_strdup("-");
            else if (g_strstr_len(date_string, TIMESTRLEN + 1, ".") != NULL)
                separator = g_strdup(".");

            if (separator != NULL) {
                df_no_years =
                    g_strconcat(df_split[0], separator, df_split[1], NULL);
                g_free(separator);
            } else
                df_no_years = g_strdup("");
        } else
            df_no_years = g_strdup("");

        g_strfreev(df_split);
        g_free(date_format);

        return df_no_years;
    }

    g_free(date_format);

    return g_strdup(date_string);
}

gchar *paragraphize(gchar * text)
{
    gchar *p = text, *q = text;
    gboolean newline = FALSE;

    if (text == NULL)
        return NULL;

    while (*p != 0) {
        gchar c = *p++;
        if (c != ' ' && c != '\t' && c != '\n')
            newline = FALSE;
        if (newline)
            continue;
        if (c == '\n')
            newline = TRUE;
        *q++ = c;
    }
    *q = 0;

    return text;
}

gchar *remove_pre_tags(gchar * text)
{
    gchar **tmp1 = NULL;
    gchar *tmp2 = NULL;

    if (text == NULL)
        return NULL;

    tmp1 = g_strsplit(text, "<pre>", 0);
    if (g_strv_length(tmp1) > 1) {
        tmp2 = g_strjoinv("\n", tmp1);
        g_strfreev(tmp1);
    } else {
        g_strfreev(tmp1);
        return g_strdup(text);
    }

    tmp1 = g_strsplit(tmp2, "</pre>", 0);
    g_free(tmp2);
    if (g_strv_length(tmp1) > 1) {
        tmp2 = g_strjoinv("\n", tmp1);
        g_strfreev(tmp1);
        return tmp2;
    } else {
        g_strfreev(tmp1);
        return g_strdup(text);
    }
}

/* This function is not really that 'common'
 * It's more rss-specific description
 * and since it paragraphize() the text, it's also not that 
 * common to ALL plugins. For gtkhtml, this is fine :)
 * 
 * TODO: tvh: IMPORTANT: needs to get all the images here and store locally
 * somewhere in an array of files for example??
 * 
 */
gchar *prepare_description(gchar * text)
{

    gchar *tmp = g_strdup(text);
    gchar *search = NULL;
    gboolean have = TRUE;
    /*TODO: make a better string parser to remove the starting <p> or <br>
     * and the ending <p> or <br> */
    /*analyse, if it is not begining with !<p> */
    if (g_str_has_prefix(tmp, "<p>")) { /* remove the begining <p> */
        search = strdup(tmp + 3);
        free(tmp);
        tmp = search;
        ULOG_DEBUG("Removing starting <p>");
    }
    /*analyse if it ends with </br ...> */
    while (have) {

        if (g_str_has_suffix(tmp, "</p>") || g_str_has_suffix(tmp, "<br>")
            || g_str_has_suffix(tmp, "<br clear=all>")) {
            have = TRUE;
            if (g_str_has_suffix(tmp, "</p>")) {    /* remove the ending </p> */
                tmp[strlen(tmp) - 4] = '\0';
            }

            if (g_str_has_suffix(tmp, "<br>")) {    /* remove the ending <br> */
                tmp[strlen(tmp) - 4] = '\0';
            }
            if (g_str_has_suffix(tmp, "<br clear=all>")) {  /* remove the ending <br clear=all> */
                tmp[strlen(tmp) - 14] = '\0';
            }
        } else {
            have = FALSE;
        }
    }
    return tmp;

    /*
     * gchar *tmp1 = NULL;
     * gchar *tmp2 = NULL;
     * 
     * text = remove_newlines_and_extra_spaces(text);
     * tmp2 = unhtmlize(remove_pre_tags(text));
     * tmp2 = paragraphize(tmp2);
     * 
     * g_free(tmp1);
     * 
     * return g_strstrip(tmp2);
     */
    /*
     * gchar *tmp2 = NULL;
     * text = remove_newlines_and_extra_spaces(text);
     * //tvh: need original (HTML) tags  
     * //tmp2 = unhtmlize(remove_pre_tags(text));
     * //tmp2 = paragraphize(tmp2);
     * //TODO: is this necessary??
     * tmp2 = paragraphize(text);
     * 
     * //or this
     * //tmp2 = paragraphize(remove_pre_tags(text) );
     * return g_strstrip(tmp2);
     */

}

const char *invalid_fn_chars = "/\\?*&$@`!'.:,()<>|";
gchar *strip_invalid_char_from_fn(gchar * fn)
{
    fn = utf8_fix(trim_whitespaces(fn));
    char *p = fn;
    while (*p) {
        if (strchr(invalid_fn_chars, *p))
            memmove(p, p + 1, strlen(p) + 1);
        else
            p++;
    }
    return fn;
}

gboolean feed_check_if_allowed(gchar * start_string)
{
    gchar *allowed_string =
        "&nbsp; &iexcl; &cent; &pound; &curren; &yen; &brvbar; "
        "&sect; &uml; &copy; &ordf; &laquo; &not; &shy; &reg; &macr; &deg; &plusmn; "
        "&sup2; &sup3; &acute; &micro; &para; &middot; &cedil; &sup1; &ordm; &raquo; "
        "&frac14; &frac12; &frac34; &iquest; &Agrave; &Aacute; &Acirc; &Atilde; &Auml; "
        "&Aring; &AElig; &Ccedil; &Egrave; &Eacute; &Ecirc; &Euml; &Igrave; &Iacute; "
        "&Icirc; &Iuml; &ETH; &Ntilde; &Ograve; &Oacute; &Ocirc; &Otilde; &Ouml; &times; "
        "&Oslash; &Ugrave; &Uacute; &Ucirc; &Uuml; &Yacute; &THORN; &szlig; &agrave; "
        "&aacute; &acirc; &atilde; &auml; &aring; &aelig; &ccedil; &egrave; &eacute; "
        "&ecirc; &euml; &igrave; &iacute; &icirc; &iuml; &eth; &ntilde; &ograve; "
        "&oacute; &ocirc; &otilde; &ouml; &divide; &oslash; &ugrave; &uacute; &ucirc; "
        "&uuml; &yacute; &thorn; &yuml; &amp;";
    int i = 0;
    gchar *part = NULL;
    //ULOG_DEBUG("%s\n",__FUNCTION__);
    while (i < 10 && (start_string[i] != ' ' && start_string[i] != ';')) {
        i++;
    }
    if (i >= 10) {
        return TRUE;
    } else {
        if (start_string[i] == ' ') {
            return TRUE;
        } else {
            part = g_strndup(start_string, i + 1);
            ULOG_DEBUG("ALLOWED: %s\n", allowed_string);
            if (NULL == strstr(allowed_string, part)) { //not found in allowed strings
                g_free(part);
                return FALSE;
            } else {
                g_free(part);
                return TRUE;
            }
        }
    }

}

#ifdef feed_eliminate_end_sign_used
/*
 * that define is not currently used. It emphasizes the point that
 * feed_eliminate_end_sign() has turned into dead code after
 * addition of "return" as first stmt. Also the only calling point in feed.c
 * has been modified not to call here (was leaking memory by doing so)
 */
gchar *feed_eliminate_end_sign(gchar * text)
{
    return text;
    gchar *search_string = text, *found_end_sign = NULL, *temp =
        NULL, *start_text = text, *temp2 = NULL;
    int current_pos = 0;
    //ULOG_DEBUG("%s : %s\n",__FUNCTION__,text);
    while (search_string
           && (found_end_sign =
               g_strstr_len(search_string, strlen(search_string), "&lt;"))) {
        //if (found_end_sign[1]!=' ')
        {
            //if (!feed_check_if_allowed(found_end_sign))
            {
                current_pos = found_end_sign - start_text;
                temp = g_strndup(start_text, current_pos);
                temp2 = g_strconcat(temp, "&amp;", found_end_sign + 1, NULL);
                g_free(temp);
                g_free(start_text);
                start_text = temp2;
                search_string = start_text + current_pos + 1;

            }
        }
        search_string++;
    }

    return start_text;

}
#endif                          /* feed_eliminate_end_sign_used */

void ui_update(void)
{
    if (!lifereaStarted)
        return;
    while (gtk_events_pending())
        gtk_main_iteration();
}
