/*************************************************************************************************
 * Implementation of CJK uni-gram analyzing features
 *                                                      Copyright (C) 2003-2004 Mikio Hirabayashi
 * This file is part of Estraier, a personal full-text search system.
 * Estraier is free software; you can redistribute it and/or modify it under the terms of the GNU
 * General Public License as published by the Free Software Foundation; either version 2 of the
 * License, or any later version.
 * Estraier is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
 * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 * See the GNU General Public License for more details.
 * You should have received a copy of the GNU General Public License along with Estraier;
 * if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
 * MA 02111-1307 USA.
 *************************************************************************************************/

/* Note that the encoding of this file should be EUC-JP. */


#include "estcommon.h"



#if defined(MYCJKUNI)
/*************************************************************************************************
 * implementations
 *************************************************************************************************/


static void estdocaddtextcjkuni_impl(ODDOC *doc, const char *text, int size, int mode);


int estiscjkuni = TRUE;
void (*estdocaddtextcjkuni)(ODDOC *, const char *, int, int) = estdocaddtextcjkuni_impl;


/* Break a CJK text into words and register them to a document handle, by uni-gram method. */
static void estdocaddtextcjkuni_impl(ODDOC *doc, const char *text, int size, int mode){
  CBLIST *words;
  const char *word;
  char *nbuf, *utf;
  int i, nsiz;
  nbuf = cbmalloc(size * 2 + 1);
  nsiz = 0;
  for(i = 0; i < size; i += 2){
    nbuf[nsiz++] = text[i];
    nbuf[nsiz++] = text[i+1];
    nbuf[nsiz++] = 0x0;
    nbuf[nsiz++] = 0x0a;
  }
  utf = cbiconv(nbuf, nsiz, "UTF-16BE", "UTF-8", NULL, NULL);
  words = odbreaktext(utf);
  for(i = 0; i < cblistnum(words); i++){
    word = cblistval(words, i, NULL);
    switch(mode){
    case ESTDOCBOTH:
      oddocaddword(doc, word, word);
      break;
    case ESTDOCNONLY:
      oddocaddword(doc, word, "");
      break;
    case ESTDOCAONLY:
      oddocaddword(doc, "", word);
      break;
    }
  }
  cblistclose(words);
  free(utf);
  free(nbuf);
}



#else
/*************************************************************************************************
 * dummy interfaces
 *************************************************************************************************/


int estiscjkuni = FALSE;
void (*estdocaddtextcjkuni)(ODDOC *, const char *, int) = NULL;



#endif



/* END OF FILE */
