#include <stdio.h>
#include <string.h>
#include <unichar/unichar.h>
#include <unichar/ctype.h>
#include <unichar/string.h>

#define MAXLINESIZE 256

#define A(X) ((X) >> 12)
#define B(X) (((X) & 0x0f00) >> 8)
#define C(X) (((X) & 0x00f0) >> 4)
#define D(X) ((X) & 0x000f)

unichar_info_t *****___unichar_info_table = 0;
unsigned char ___unichar_info_table_index[0x2000];

unichar_info_t *
_unichar_info_table_entry(unichar_t c)
{
  unichar_info_t *i;
  unsigned int w, x, y, z;

  /* These are the indices of C in the giant table. */
  w = A(c); x = B(c); y = C(c); z = D(c);

  /* Create what space we need to. */
  if (___unichar_info_table == 0)
    ___unichar_info_table =
      (unichar_info_t *****) calloc(0x10, sizeof(unichar_info_t ****));
  if (___unichar_info_table[w] == 0)
    ___unichar_info_table[w] =
      (unichar_info_t ****) calloc(0x10, sizeof(unichar_info_t ***));
  if (___unichar_info_table[w][x] == 0)
    ___unichar_info_table[w][x] =
      (unichar_info_t ***) calloc(0x10, sizeof(unichar_info_t **));
  if (___unichar_info_table[w][x][y] == 0)
    ___unichar_info_table[w][x][y] =
      (unichar_info_t **) calloc(0x10, sizeof(unichar_info_t *));
  if ((i = ___unichar_info_table[w][x][y][z]) == 0)
    i = (___unichar_info_table[w][x][y][z] =
	 (unichar_info_t *) calloc(0x01, sizeof(unichar_info_t)));

  return i;
}

const char *
_unichar_type_to_charp(unichar_type_t type)
{
  switch (type)
    {
    case unichar_type_invalid_character:
      /* case unichar_type_invalid: */
      return "invalid character";
      break;
    case unichar_type_letter:
      return "letter";
      break;
    case unichar_type_uppercase_letter:
      return "uppercase letter";
      break;
    case unichar_type_lowercase_letter:
      return "lowercase letter";
      break;
    case unichar_type_titlecase_letter:
      return "titlecase letter";
      break;
    case unichar_type_modifier_letter:
      return "modifier letter";
      break;
    case unichar_type_other_letter:
      return "other letter";
      break;
    case unichar_type_mark:
      return "mark";
      break;
    case unichar_type_non_spacing_mark:
      return "non-spacing mark";
      break;
    case unichar_type_combining_mark:
      return "combining mark";
      break;
    case unichar_type_number:
      return "number";
      break;
    case unichar_type_decimal_digit_number:
      return "decimal digit number";
      break;
    case unichar_type_other_number:
      return "other number";
      break;
    case unichar_type_other_character:
      /* case unichar_type_other: */
      return "other character";
      break;
    case unichar_type_punctuation:
      return "punctuation";
      break;
    case unichar_type_dash_punctuation:
      return "dash punctuation";
      break;
    case unichar_type_open_punctuation:
      return "open punctuation";
      break;
    case unichar_type_close_punctuation:
      return "close punctuation";
      break;
    case unichar_type_other_punctuation:
      return "other punctuation";
      break;
    case unichar_type_symbol:
      return "symbol";
      break;
    case unichar_type_math_symbol:
      return "math symbol";
      break;
    case unichar_type_currency_symbol:
      return "currency symbol";
      break;
    case unichar_type_other_symbol:
      return "other symbol";
      break;
    case unichar_type_separator:
      return "separator";
      break;
    case unichar_type_space_separator:
      return "space separator";
      break;
    case unichar_type_line_separator:
      return "line separator";
      break;
    case unichar_type_paragraph_separator:
      return "paragraph separator";
      break;
    case unichar_type_control_character:
      /* case unichar_type_control: */
      return "control";
      break;
    case unichar_type_unknown:
    default:
      return "unknown";
      break;
    }
}

unichar_type_t
_unichar_charp_to_type(const char *t)
{
  if (t != 0)
  {
    if (t[0] == 'L' || t[0] == 'l')
    {
      if (t[1] == 'o' || t[1] == 'O')  /* "Lo" -- Letter, Other */
        return unichar_type_other_letter;
      else if (t[1] == 'u' || t[1] == 'U')  /* "Lu" -- Letter, Uppercase */
        return unichar_type_uppercase_letter;
      else if (t[1] == 'l' || t[1] == 'L')  /* "Ll" -- Letter, Lowercase */
        return unichar_type_lowercase_letter;
      else if (t[1] == 't' || t[1] == 'T')  /* "Lt" -- Letter, Titlecase */
        return unichar_type_titlecase_letter;
      else if (t[1] == 'm' || t[1] == 'M')  /* "Lm" -- Letter, Modifier */
        return unichar_type_modifier_letter;
    }
    else if (t[0] == 'P' || t[0] == 'p')
    {
      if (t[1] == 'o' || t[1] == 'O')  /* "Po" -- Punctuation, Other */
        return unichar_type_other_punctuation;
      else if (t[1] == 's' || t[1] == 'S')  /* "Ps" -- Punctuation, Open */
        return unichar_type_open_punctuation;
      else if (t[1] == 'e' || t[1] == 'E')  /* "Pe" -- Punctuation, Close */
        return unichar_type_close_punctuation;
      else if (t[1] == 'd' || t[1] == 'D')  /* "Pd" -- Punctuation, Dash */
        return unichar_type_dash_punctuation;
    }
    else if (t[0] == 'S' || t[0] == 's')
    {
      if (t[1] == 'o' || t[1] == 'O')  /* "So" -- Symbol, Other */
	return unichar_type_other_symbol;
      else if (t[1] == 'm' || t[1] == 'M')  /* "Sm" -- Symbol, Math */
	return unichar_type_math_symbol;
      else if (t[1] == 'c' || t[1] == 'C')  /* "Sc" -- Symbol, Currency */
	return unichar_type_currency_symbol;
    }
    else if (t[0] == 'M' || t[0] == 'm')
    {
      if (t[1] == 'n' || t[1] == 'N')  /* "Mn" -- Mark, Non-Spacing */
	return unichar_type_non_spacing_mark;
      else if (t[1] == 'c' || t[1] == 'C')  /* "Mc" -- Symbol, Combining */
	return unichar_type_combining_mark;
    }
    else if (t[0] == 'N' || t[0] == 'n')
    {
      if (t[1] == 'd' || t[1] == 'D')  /* "Nd" -- Number, Decimal Digit */
	return unichar_type_decimal_digit_number;
      else if (t[1] == 'o' || t[1] == 'O')  /* "No" -- Number, Other */
	return unichar_type_other_number;
    }
    else if (t[0] == 'Z' || t[0] == 'z')
    {
      if (t[1] == 's' || t[1] == 'S')  /* "Zs" -- Separator, Space */
	return unichar_type_space_separator;
      else if (t[1] == 'l' || t[1] == 'L')  /* "Zl" -- Separator, Line */
	return unichar_type_line_separator;
      else if (t[1] == 'p' || t[1] == 'P')  /* "Zp" -- Separator, Paragraph */
	return unichar_type_paragraph_separator;
    }
    else if (t[0] == 'C' || t[0] == 'c')
    {
      if (t[1] == 'c' || t[1] == 'C')  /* "Cc" -- Other, Control or Format */
	return unichar_type_control_character;
      else if (t[1] == 'o' || t[1] == 'O')  /* "Co" -- Other, Private Use */
	return unichar_type_other_character;
      else if (t[1] == 'n' || t[1] == 'N')  /* "Cn" -- Other, Not Assigned */
	return unichar_type_invalid_character;
    }
  }

  return unichar_type_unknown;
}

const char *
_unichar_bidirectional_type_to_charp(unichar_bidirectional_type_t type)
{
  switch (type)
    {
    case unichar_bidirectional_type_left_right:
      return "left to right";
      break;
    case unichar_bidirectional_type_right_left:
      return "right to left";
      break;
    case unichar_bidirectional_type_european_number:
      return "european number";
      break;
    case unichar_bidirectional_type_european_number_separator:
      return "european number separator";
      break;
    case unichar_bidirectional_type_european_number_terminator:
      return "european number terminator";
      break;
    case unichar_bidirectional_type_arabic_number:
      return "arabic number";
      break;
    case unichar_bidirectional_type_common_number_separator:
      return "common number separator";
      break;
    case unichar_bidirectional_type_block_separator:
      return "block separator";
      break;
    case unichar_bidirectional_type_segment_separator:
      return "segment separator";
      break;
    case unichar_bidirectional_type_whitespace:
      return "whitespace";
      break;
    case unichar_bidirectional_type_other_neutral:
      return "other neutral";
      break;
    case unichar_bidirectional_type_unknown:
    default:
      return "unknown";
      break;
    }
}

unichar_bidirectional_type_t
_unichar_charp_to_bidirectional_type(const char *t)
{
  if (t != 0)
  {
    if (t[0] == 'L' || t[0] == 'l')  /* "L" -- Left-Right */
      return unichar_bidirectional_type_left_right;
    else if (t[0] == 'R' || t[0] == 'r')  /* "R" -- Right-Left */
      return unichar_bidirectional_type_right_left;
    else if (t[0] == 'E' || t[0] == 'e')
    {
      if (t[1] == 'N' || t[1] == 'n')  /* "EN" -- European Number */
        return unichar_bidirectional_type_european_number;
      else if (t[1] == 'S' || t[1] == 's')  /* "ES" -- European Number Separator */
        return unichar_bidirectional_type_european_number_separator;
      else if (t[1] == 'T' || t[1] == 't')  /* "ES" -- European Number Terminator */
        return unichar_bidirectional_type_european_number_terminator;
    }
    else if (t[0] == 'A' || t[0] == 'a')  /* "AN" -- Arabic Number */
      return unichar_bidirectional_type_arabic_number;
    else if (t[0] == 'C' || t[0] == 'c')  /* "CS" -- Common Number Separator */
      return unichar_bidirectional_type_common_number_separator;
    else if (t[0] == 'B' || t[0] == 'b')  /* "B" -- Block Separator */
      return unichar_bidirectional_type_block_separator;
    else if (t[0] == 'S' || t[0] == 's')  /* "S" -- Segment Separator */
      return unichar_bidirectional_type_segment_separator;
    else if (t[0] == 'W' || t[0] == 'w')  /* "WS" -- Whitespace */
      return unichar_bidirectional_type_whitespace;
    else if (t[0] == 'O' || t[0] == 'o')  /* "ON" -- Other Neutral */
      return unichar_bidirectional_type_other_neutral;
  }

  return unichar_bidirectional_type_unknown;
}

const char *
_unichar_decomposition_modifier_to_charp(unichar_decomposition_modifier_t mod)
{
  switch (mod)
    {
    case unichar_decomposition_modifier_none:
      return "none";
      break;
    case unichar_decomposition_modifier_font:
      return "font";
      break;
    case unichar_decomposition_modifier_fraction:
      return "fraction";
      break;
    case unichar_decomposition_modifier_no_break:
      return "nobreak";
      break;
    case unichar_decomposition_modifier_initial:
      return "initial";
      break;
    case unichar_decomposition_modifier_medial:
      return "medial";
      break;
    case unichar_decomposition_modifier_final:
      return "final";
      break;
    case unichar_decomposition_modifier_isolated:
      return "isolated";
      break;
    case unichar_decomposition_modifier_circle:
      return "circle";
      break;
    case unichar_decomposition_modifier_superscript:
      return "super";
      break;
    case unichar_decomposition_modifier_subscript:
      return "sub";
      break;
    case unichar_decomposition_modifier_vertical:
      return "vertical";
      break;
    case unichar_decomposition_modifier_wide:
      return "wide";
      break;
    case unichar_decomposition_modifier_narrow:
      return "narrow";
      break;
    case unichar_decomposition_modifier_small:
      return "small";
      break;
    case unichar_decomposition_modifier_square:
      return "square";
      break;
    case unichar_decomposition_modifier_compatibility:
      return "compat";
      break;
    case unichar_decomposition_modifier_unknown:
    default:
      return "unknown";
      break;
    }
}

unichar_decomposition_modifier_t
_unichar_charp_to_decomposition_modifier(const char *t)
{
  if (!strcasecmp(t, ""))
    return unichar_decomposition_modifier_none;
  else if (!strcasecmp(t, "font"))
    return unichar_decomposition_modifier_font;
  else if (!strcasecmp(t, "fraction"))
    return unichar_decomposition_modifier_fraction;
  else if (!strcasecmp(t, "nobreak"))
    return unichar_decomposition_modifier_no_break;
  else if (!strcasecmp(t, "initial"))
    return unichar_decomposition_modifier_initial;
  else if (!strcasecmp(t, "medial"))
    return unichar_decomposition_modifier_medial;
  else if (!strcasecmp(t, "final"))
    return unichar_decomposition_modifier_final;
  else if (!strcasecmp(t, "isolated"))
    return unichar_decomposition_modifier_isolated;
  else if (!strcasecmp(t, "circle"))
    return unichar_decomposition_modifier_circle;
  else if (!strcasecmp(t, "super"))
    return unichar_decomposition_modifier_superscript;
  else if (!strcasecmp(t, "sub"))
    return unichar_decomposition_modifier_subscript;
  else if (!strcasecmp(t, "vertical"))
    return unichar_decomposition_modifier_vertical;
  else if (!strcasecmp(t, "wide"))
    return unichar_decomposition_modifier_wide;
  else if (!strcasecmp(t, "narrow"))
    return unichar_decomposition_modifier_narrow;
  else if (!strcasecmp(t, "small"))
    return unichar_decomposition_modifier_small;
  else if (!strcasecmp(t, "square"))
    return unichar_decomposition_modifier_square;
  else if (!strcasecmp(t, "compat"))
    return unichar_decomposition_modifier_compatibility;
  else /* ??? */
    return unichar_decomposition_modifier_unknown;
}

int
_unichar_info_is_loaded(unichar_t c)
{
  /* return (___unichar_info_table_index[c / 8] & (1 << (c % 8))); */
  return (___unichar_info_table_index[c >> 3]) & (1 << (c & 0x07));
}

void
_unichar_mark_info_index(unichar_t c, int flag)
{
  if (flag)
  {
    /* ___unichar_info_table_index[c / 8] |= (1 << (c % 8))); */
    ___unichar_info_table_index[c >> 3] |= (1 << (c & 0x07));
  }
  else /* (!flag) */
  {
    /* ___unichar_info_table_index[c / 8] &= 0xff - (1 << (c % 8))); */
    ___unichar_info_table_index[c >> 3] &= ~(1 << (c & 0x07));
  }

  return;
}

int
_unichar_parse_info_line(char *s)
{
  char *t, *u;
  unichar_info_t *i;
  unichar_t c, dc[32];
  unsigned int k;

  /* FIELD 1: CHARACTER */
  t = strsep(&s, ";");
  c = strtol(t, 0, 16);

  /* Get the entry slot for C. */
  i = _unichar_info_table_entry(c);

  /* Put the character in I. */
  i->character = c;

  /* FIELD 2: NAME */
  t = strsep(&s, ";");
  i->name = strdup(t);

  /* FIELD 3: TYPE */
  t = strsep(&s, ";");
  i->type = _unichar_charp_to_type(t);

  /* FIELD 4: COMBINING CLASS */
  t = strsep(&s, ";");
  i->combining_class = strtol(t, 0, 10);

  /* FIELD 5: BIDIRECTIONAL TYPE */
  t = strsep(&s, ";");
  i->bidirectional_type = _unichar_charp_to_bidirectional_type(t);

  /* FIELD 6: (one level of the) DECOMPOSITION (if any) */
  t = strsep(&s, ";");
  if ((u = strchr(t, '<')) != 0)
  {
    ++u;
    t = strchr(u, '>');
    *t = 0;
    ++t;
    i->decomposition_modifier = _unichar_charp_to_decomposition_modifier(u);
  }
  else
  {
    i->decomposition_modifier = unichar_decomposition_modifier_none;
  }
  for(k = 0; *t != 0;)
  {
    if (*t == ' ' || *t == '\t')  /* Skip over any whitespace. */
      ++t;
    else  /* Otherwise, everything should be unichar code points. */
      dc[k++] = (unichar_t) strtol(t, &t, 16);
  }
  dc[k] = 0x0000;
  if (dc[0] != 0x0000)
    i->decomposition = unichar_strdup(dc);
  else
    i->decomposition = 0;

  /* FIELD 7: DECIMAL DIGIT VALUE */
  t = strsep(&s, ";");
  i->decimal_digit_value = strtol(t, 0, 10);

  /* FIELD 8: DIGIT VALUE */
  t = strsep(&s, ";");
  i->digit_value = strtol(t, 0, 10);

  /* FIELD 9: NUMERIC VALUE */
  /* FIXME: We don't support this yet. */
  t = strsep(&s, ";");
  /* i->numeric_value = ???; */

  /* FIELD 10: IS MIRRORED */
  t = strsep(&s, ";");
  if (t[0] == 'Y' || t[0] == 'y')
    i->is_mirrored = 1;
  else /* if (t[0] == 'N' || t[0] == 'n') */
    i->is_mirrored = 0;

  /* FIELD 11: OLD NAME */
  t = strsep(&s, ";");
  i->old_name = strdup(t);

  /* FIELD 12: ISO 10646 COMMENT (if any) */
  t = strsep(&s, ";");
  i->iso_10646_comment = strdup(t);

  /* FIELD 13: UPPERCASE EQUIVALENT */
  t = strsep(&s, ";");
  i->to_uppercase = strtol(t, 0, 16) ?: c;

  /* FIELD 14: LOWERCASE EQUIVALENT */
  t = strsep(&s, ";");
  i->to_lowercase = strtol(t, 0, 16) ?: c;

  /* FIELD 15: TITLECASE EQUIVALENT */
  t = strsep(&s, ";");
  i->to_titlecase = strtol(t, 0, 16) ?: c;

  /* Make sure and mark off that we've loaded the character. */
  _unichar_mark_info_index(c, 1);

  return 1;  
}

unichar_info_t *
_unichar_load_info(unichar_t c)
{
  unichar_info_t *i;

  if (!(_unichar_info_is_loaded(c)))
  {
    i = _unichar_info_table_entry(c);

    /* Create a dummy entry.  (But don't mark it as loaded?) */
    i->character = c;
    i->name = "<unknown>";
    i->type = unichar_type_unknown;
    i->combining_class = 0;
    i->bidirectional_type = unichar_bidirectional_type_unknown;
    i->decomposition_modifier = unichar_decomposition_modifier_unknown;
    i->decomposition = 0;
    i->decimal_digit_value = 0;
    i->digit_value = 0;
    i->numeric_value = 0.0;
    i->is_mirrored = 0;
    i->old_name = "";
    i->iso_10646_comment = "";
    i->to_uppercase = c;
    i->to_lowercase = c;
    i->to_titlecase = c;
  }

  return i;
}

int
_unichar_unload_info(unichar_t c)
{
}

int
_unichar_load_uct(const char *uct_filename)
{
  FILE *uctf;

  /* Open the file. */
  uctf = fopen(uct_filename, "r");

  /* Until we get to the end of the file... */
  while (!feof(uctf))
  {
    char *t;
    char s[MAXLINESIZE];

    /* Make sure S is clean. */
    memset(s, 0, MAXLINESIZE);

    /* Read in a line. */
    t = fgets(s, MAXLINESIZE, uctf);

    /* Add the info contained in the line. */
    /* WARNING: This call may fudge with the contents of S,
     * so don't expect S to be usable after this. */
    if (t)
      _unichar_parse_info_line(s);
  }

  /* Close the file. */
  fclose(uctf);
}

unichar_info_t
unichar_info(unichar_t c)
{
  if (!(_unichar_info_is_loaded(c)))
    _unichar_load_info(c);

  return *(_unichar_info_table_entry(c));
}

void
_unichar_fprint_info(FILE *file, unichar_info_t info)
{
  const unichar_t *ucp;

  fprintf(file, "character: %#06x\n", info.character);
  fprintf(file, "  name: \"%s\"\n", info.name);
  fprintf(file, "  old name: \"%s\"\n", info.old_name);
  fprintf(file, "  iso-10646 comment: \"%s\"\n", info.iso_10646_comment);
  fprintf(file, "  to uppercase: %#06x\n", info.to_uppercase);
  fprintf(file, "  to lowercase: %#06x\n", info.to_lowercase);
  fprintf(file, "  to titlecase: %#06x\n", info.to_titlecase);
  fprintf(file, "  type: %s\n", _unichar_type_to_charp(info.type));
  fprintf(file, "  bidirectional type: %s\n", _unichar_bidirectional_type_to_charp(info.bidirectional_type));
  fprintf(file, "  combining class: %d\n", info.combining_class);
  fprintf(file, "  ordering priority: %d\n", info.ordering_priority);
  fprintf(file, "  is mirrored: %s\n", (info.is_mirrored ? "yes" : "no"));
  fprintf(file, "  decomposition modifier: %s\n", _unichar_decomposition_modifier_to_charp(info.decomposition_modifier));

  fprintf(file, "  decomposition: (");
  for (ucp = info.decomposition; ucp && *ucp; ucp++)
    fprintf(file, "%#06x ", *ucp);
  fprintf(file, ")\n");

  fprintf(file, "  decimal digit value: %d\n", info.decimal_digit_value);
  fprintf(file, "  digit value: %d\n", info.digit_value);
  fprintf(file, "  numeric value: %f\n", info.numeric_value);

  return;
}
