// -*- C++ -*-
/* Copyright (C) 2001 Fumitoshi UKAI <ukai@debian.or.jp>

This file is part of groff.

groff is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 2, or (at your option) any later
version.

groff is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */

#include "encoding.h"
#include "device.h"

#include <string.h>
#ifdef ENABLE_MULTIBYTE
#include <locale.h>
#include <wchar.h>
#ifdef HAVE_LANGINFO_CODESET
#include <langinfo.h>
#else
#include <stdlib.h>
#endif
#endif

class ascii8_handler : public encoding_handler {
  // encoding handler for 8bit ascii
  // no multibyte support
public:
  ascii8_handler() {}
  ~ascii8_handler() {}

  const char *name() { return "C"; }; /* ??? */

  inline int is_wchar_byte(unsigned char c) { return 0; }
  inline wchar make_wchar(unsigned char c0, encoding_istream& eis) {
    return wchar(c0);
  }
  inline int put_wchar(wchar wc, encoding_ostream& eos) {
    eos.putbyte((unsigned char)wchar_code(wc));
    return 1;
  }
  inline int max_wchar_len() { return 1; };

};

#ifdef ENABLE_MULTIBYTE
class utf8_handler: public encoding_handler {
public:
  utf8_handler() { } 
  ~utf8_handler() { }

  const char *name() { return "UTF-8"; };

  inline int is_wchar_byte(unsigned char c) {
    return (c >= 0x80);
  }

  /*
   0000 0000-0000 007F   0xxxxxxx
   0000 0080-0000 07FF   110xxxxx 10xxxxxx
   0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx

   0001 0000-001F FFFF   11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
   0020 0000-03FF FFFF   111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
   0400 0000-7FFF FFFF   1111110x 10xxxxxx ... 10xxxxxx
  */

  inline wchar make_wchar(unsigned char c0, encoding_istream& eis) {
    wchar wc = 0;
    int count = 0;
    if (! is_wchar_byte(c0)) {
      return c0;
    }
    if ((c0 & 0xFC) == 0xFC) {
      wc = c0 & 0x01;
      count = 5;
    } else if ((c0 & 0xF8) == 0xF8) {
      wc = c0 & 0x03;
      count = 4;
    } else if ((c0 & 0xF0) == 0xF0) {
      wc = c0 & 0x07;
      count = 3;
    } else if ((c0 & 0xE0) == 0xE0) {
      wc = c0 & 0x0F;
      count = 2;
    } else if ((c0 & 0xC0) == 0xC0) {
      wc = c0 & 0x1F;
      count = 1;
    }
    for (; count > 0; count--) {
      wc <<= 6;
      int c1 = eis.getbyte();
      if (! is_wchar_byte(c1)) {
	/* illegal utf8 sequence? */
      }
      wc |= (c1 & 0x3F);
    }
    return -wc;	// XXX: negative
  }

  inline int put_wchar(wchar wc, encoding_ostream& eos) {
    int count = 0;
    if (! is_wchar_code(wc)) {
      eos.putbyte(wc);
      return 1;
    }
    wc = -wc;	// XXX: negative -> character code

    if (wc < 0x800) {
      count = 1; eos.putbyte((wc >> 6) | 0xC0);
    } else if (wc < 0x10000) {
      count = 2; eos.putbyte((wc >> 12) | 0xE0);
    } else if (wc < 0x200000) {
      count = 3; eos.putbyte((wc >> 18) | 0xF0);
    } else if (wc < 0x4000000) {
      count = 4; eos.putbyte((wc >> 24) | 0xf8);
    } else if (wc <= 0x7fffffff) {
      count = 5; eos.putbyte((wc >> 30) | 0xFC);
    }
    for (int i = 0; i < count; i++) {
      eos.putbyte(((wc >> 6 * (count - i - 1)) & 0x3f) | 0x80);
    }
    return count;
  }
  inline int max_wchar_len() { return 6; }; /* XXX 3?*/

};

#define USE_ICONV
#ifdef USE_ICONV
#include <iconv.h>
#include <errno.h>

class iconv_handler : public encoding_handler {
private:
  iconv_t icd, ocd;
  const char *encoding_name;
  static iconv_handler *ih_header;

public:
  iconv_handler *next;

  static iconv_handler *new_iconv_handler(const char *ename) {
    iconv_handler *ip;
    // if already created, use it.
    for (ip = ih_header; ip != NULL; ip = ip->next) {
      if (strcmp(ip->name(), ename) == 0) {
	return ip;
      }
    }
    // not found, create new one.
    ip = new iconv_handler(ename);
    ip->next = ih_header;
    ih_header = ip;
    return ip;
  }
  iconv_handler(const char *ename) : encoding_name(ename) {
    // internal code is UCS-2BE
    icd = iconv_open("UCS-2BE", encoding_name);
    ocd = iconv_open(encoding_name, "UCS-2BE");
  };
  ~iconv_handler() {};

  const char *name() { return encoding_name; };

  inline int is_wchar_byte(unsigned char c) {
    return (c >= 0x80); /* ??? */
  }

  inline wchar make_wchar(unsigned char c0, encoding_istream& eis) {
    wchar wc = 0;
    char inbuf[8], outbuf[8];
    char *inp, *outp;
    size_t inbytesleft, outbytesleft;
    int i = 0;
    
    if (! is_wchar_byte(c0)) {
      return c0;
    }
    for (inbuf[i++] = c0; ;inbuf[i++] = eis.getbyte()) {
      inbytesleft = i;
      outbytesleft = sizeof(outbuf);
      inp = inbuf;
      outp = outbuf;
      size_t r = iconv(icd, &inp, &inbytesleft, &outp, &outbytesleft);
      if (r == (size_t)-1) {
	if (errno == EILSEQ) {
	  /* illegal sequence? */
	  return '?';
	} else if (errno == EINVAL) {
	  /* incomplete sequence? */
	  continue;
	} else if (errno == E2BIG) {
	  /* no room to output? */
	  return '?';
	}
      }
      /* ok */
      /* UCS-2 is 2 bytes */
      wc = ((outbuf[0] & 0x0ff) << 8) | (outbuf[1] & 0x0ff);
      return -wc;	// XXX: negative
    }
  }


  inline int put_wchar(wchar wc, encoding_ostream& eos) {
    char inbuf[4], outbuf[4];
    char *inp, *outp;
    size_t inbytesleft, outbytesleft;

    if (!is_wchar_code(wc)) {
      eos.putbyte(wc & 0x0ff);
      return 1;
    }
    wc = -wc;	// XXX: negative -> character code

    inbuf[0] = (wc >> 8) & 0x0ff;
    inbuf[1] = (wc >> 0) & 0x0ff;
    inbuf[2] = 0;
    inbytesleft = 2;
    outbytesleft = 4;
    inp = inbuf;
    outp = outbuf;
    size_t r = iconv(ocd, &inp, &inbytesleft, &outp, &outbytesleft);
    if (r == (size_t)-1) {
      if (errno == EILSEQ) {
	/* illegal sequence? */;
      } else if (errno == EINVAL) {
	/* incomplete sequence? */;
      } else if (errno == E2BIG) {
	/* no room to output? */;
      }
      eos.putbyte('?');
      return 1;
    }
    char *op = outbuf;
    int n = 0;
    for (; op < outp; op++, n++) {
      eos.putbyte(*op & 0x0ff);
    }
    return outp - outbuf;
  }
  inline int max_wchar_len() { return 6; }; /* XXX */

};
#else
class euc_handler : public encoding_handler {
  static const int WCTABLE_OFFSET = 0xa1;
  static const int WCTABLE_SIZE = 94;
  static const int EUCMASK = 0x8080;

public:
  euc_handler() {}
  ~euc_handler() {};

  const char *name() { return "EUC-JP"; };

  inline int is_wchar_byte(unsigned char c) {
    return (c >= 0xa1 && c <= 0xfe);
  }

  inline wchar make_wchar(unsigned char c0, encoding_istream& eis) {
    wchar wc;
    if (! is_wchar_byte(c0)) {
      return c0;
    }
    int c1 = eis.peekbyte();
    if (! is_wchar_byte(c1)) {
      eis.ungetbyte(c1);
      return c0;
    }
    c1 = eis.getbyte();
    wc = (c0 & 0xff) << 8;
    wc |= (c1 & 0xff);

    if (wc == 0xa1a1)
      return ' ';
    return -wc;
  }

  inline int put_wchar(wchar wc, encoding_ostream& eos) {
    if (is_wchar_code(wc)) {
      wc = -wc;
      eos.putbyte((wc >> 8) & 0x0ff);
      eos.putbyte((wc >> 0) & 0x0ff);
      return 2;    
    } else {
      eos.putbyte(wc & 0x0ff);
      return 1;
    }
  }
  inline int max_wchar_len() { return 2; }; /* XXX */
};
#endif /* USE_ICONV */
#endif

static ascii8_handler ascii8;
#ifdef ENABLE_MULTIBYTE
static utf8_handler utf8;
#ifdef USE_ICONV
iconv_handler *iconv_handler::ih_header = NULL;
#else
static euc_handler eucjp;
#endif
#endif

encoding_handler *input_encoding = &ascii8;
encoding_handler *output_encoding = &ascii8;

static void
new_encoding_handler(encoding_handler **eptr, const char *encoding_name)
{
  if (!encoding_name) {
    *eptr = &ascii8;
    return;
  }
#ifdef ENABLE_MULTIBYTE
  if (strcmp(encoding_name, "UTF-8") == 0) {
    *eptr = &utf8;
    return;
  }
#ifdef USE_ICONV
  if (strcmp(encoding_name, "C") != 0) {
    *eptr = iconv_handler::new_iconv_handler(encoding_name);
    return;
  }
#else
  // printf("encoding request: [%s]\n", encoding_name);
  if (strcmp(encoding_name, "EUC-JP") == 0) {
    // printf("encoding: [EUC-JP]\n");
    *eptr = &eucjp;
    return;
  }
#endif
#endif
  // default
  *eptr = &ascii8;
  return;
}

encoding_handler *
select_input_encoding_handler(const char *encoding_name)
{
  new_encoding_handler(&input_encoding, encoding_name);
  return input_encoding;
}

encoding_handler *
select_output_encoding_handler(const char *encoding_name)
{
  new_encoding_handler(&output_encoding, encoding_name);
  return output_encoding;
}

void
init_encoding_handler()
{
#ifdef ENABLE_MULTIBYTE
  const char *locale, *charset;
  // groff 1 defines ISO-8859-1 as the input encoding, so this is required
  // for compatibility. groff 2 will define UTF-8 (or possibly officially
  // allow it to be switchable?)
  select_input_encoding_handler("ISO-8859-1");
  select_output_encoding_handler("C");

  locale = setlocale(LC_ALL, "");
  if (locale == NULL ||
      strcmp(locale, "C") == 0 || strcmp(locale, "POSIX") == 0) {
    return;
  }
  /* check LC_CTYPE is C or POSIX */
  locale = setlocale(LC_CTYPE, NULL);
  if (strcmp(locale, "C") == 0 || strcmp(locale, "POSIX") == 0) {
    return;
  }
  /* otherwise */
#if HAVE_LANGINFO_CODESET
  charset = nl_langinfo(CODESET);
#else
  charset = strchr(locale, '.');
  if (charset)
    ++charset;
  else
    charset = "";
#endif
  if (strncmp(locale, "ja", 2) == 0 || strncmp(locale, "zh", 2) == 0 ||
      (strncmp(locale, "ko", 2) == 0 && strcmp(charset, "UTF-8") == 0)) {
    select_input_encoding_handler(charset);
    select_output_encoding_handler(charset);
  } else if ((!device || strcmp(device, "ascii8") == 0)) {
    select_input_encoding_handler(NULL);
    select_output_encoding_handler(NULL);
  }
#endif
  return;
}

int
is_wchar_code(wchar wc)
{
  return (wc < 0);
}

int
is_wchar_singlebyte(wchar wc)
{
    return ((-256 < wc) && (wc < 0));
}

unsigned char
wchar_singlebyte(wchar wc)
{
    if (wc >= 0)
	return (unsigned char)wc;
    else
	return (unsigned char)-wc;
}

int
wchar_code(wchar wc)
{
    if (wc >= 0)
	return wc;
    else
	return -wc;
}

int
make_wchar(int w)
{
    return -w;
}
