# dictformat-wordlist.rb: Converter module for word list without each reading
# $Id: dictformat-wordlist.rb,v 1.2 2005/03/07 07:51:33 komatsu Exp $
#
# Copyright (C) 2003 Hiroyuki Komatsu <komatsu@taiyaki.org>
#     All rights reserved.
#     This is free software with ABSOLUTELY NO WARRANTY.
#
# You can redistribute it and/or modify it under the terms of 
# the GNU General Public License version 2.

require 'prime/makedict/dictformat'
require 'MeCab'
$KCODE = 'e'

class DictFormatWordlist < DictFormat
  def initialize (is_interactive = true)
    super(is_interactive)
    @default_pos  = nil
    @default_freq = 0
    @default_attr = nil

    mecab_arguments = ['mecab'] # Dummy data
    @mecab = MeCab::Tagger.new(mecab_arguments)
  end

  def parse (line)
    return nil if line =~ /^ *;/
    results = []

    line.chomp!
    literal = line
    (pron, pos) = get_data(literal)
    results << [pron, @default_pos, literal, @default_freq, @default_attr]
    return results
  end

  def get_data (line)
    node = @mecab.parseToNode(line)
    sum_pos     = ''
    sum_reading = ''

    while (node.hasNode() != 0) do
      (pos, pos1, pos2, pos3, form, type, base, reading, pron) = 
	node.getFeature().split(',')
      if node.getPOSID() != 0 then
        sum_reading += reading
        sum_pos      = pos
      elsif node.getSurface() then
        sum_reading += node.getSurface()
        sum_pos      = pos
      end
      node = node.next()
    end
    return [sum_reading, sum_pos]
  end
end
