=begin
= WebUnit::Parser
  Copyright(C) 2001 yuichi TAKAHASHI, Narushima Hironori.
  $Id: parser.rb,v 1.5 2004/04/29 12:34:04 yuichi Exp $
  Part of WebUnit::Parser is variant of html-parser
=end

require 'htmlrepair'
require 'sgml-parser'
require 'html-parser'
require 'formatter'

module WebUnit

  class Parser < SGMLParser

    include Utils

    @@ignoretags = []
    SingleTags = %w( meta br hr base link area ) # input, frame, img

    def initialize()
      super( NullFormatter.new )
      @elem_stack = [ HtmlElem::new( nil, nil ) ]
      @form_stack = []
      @table_stack = []
      self.ignore @@ignoretags
    end

    def feed( response )
      @response = response
      begin
        $stderr.puts HTMLSplit.new( @response.body ).repair.to_s if $DEBUG
        super( HTMLSplit.new( @response.body ).repair.to_s )
      rescue
        raise BadHtmlTags, $!.message + @elem_stack.inspect
      end
      @elem_stack.pop.children[0]
    end

    def handle_data(data)
      @elem_stack.last.append data.squeeze( " \n" ).strip
    end

    def starttag( elem )
      @elem_stack.last.append elem
      @elem_stack.push elem
    end

    def endtag( tag )
      elem = @elem_stack.pop
      if tag != elem.tag
        if SingleTags.include?(tag)
          @elem_stack.push(elem)
        else
          puts "'#{tag}'(wait for '#{elem.tag.to_s}')" if $DEBUG
          raise BadHtmlTags, "'#{tag}'(wait for '#{elem.tag.to_s}')"
        end
      end
      elem
    end

    def unknown_starttag( tag, attrs )
      elem = HtmlElem::new( tag, attrs_to_hash( attrs ) )
      starttag( elem )
    end

    def unknown_endtag( tag )
      endtag( tag )
    end

    # special start and end ( a, form, table, ... )

    def start_a( attrs )
      ah = attrs_to_hash attrs
      unless ah["href"] =~ /^#/
        ah["href"] = complete_url( ah["href"], @response.url )
        ah["href"].gsub!( "&amp;", "&" )
      end
      elem = Link::new( ah )
      @response.add_link( elem )
      starttag( elem )
    end

    def start_form( attrs )
      ah = attrs_to_hash attrs
      ah["action"] = complete_url( ah["action"], @response.url )
      ah["action"].gsub!( "&amp;", "&" )
      elem = Form::new( ah )
      @response.add_form elem
      @form_stack.push elem
      starttag( elem )
    end
    def end_form
      endtag( 'form' )
      @form_stack.pop
    end

    def start_select( attrs )
      ah = attrs_to_hash attrs
      elem = Select::new( ah )
      @form_stack.last.add_param elem
      starttag( elem )
    end
    def end_select
      endtag( 'select' )
      @form_stack.last.parameters.last.end_option
    end

    def start_option( attrs )
      ah = attrs_to_hash attrs
      elem = SelectOption::new( ah )
      @form_stack.last.parameters.last.add_option elem
      starttag( elem )
    end

    def start_textarea( attrs )
      ah = attrs_to_hash attrs
      elem = Textarea::new( ah )
      @form_stack.last.add_param elem
      starttag( elem )
    end

    def start_table( attrs )
      ah = attrs_to_hash attrs
      elem = Table::new( ah )
      @response.add_table elem
      @table_stack.push elem
      starttag( elem )
    end
    def end_table
      endtag( 'table' )
      @table_stack.pop
    end

    def start_tr( attrs )
      elem = TableRow::new( attrs_to_hash( attrs ) )
      starttag( elem )
      @table_stack.last.add_row( attrs_to_hash( attrs ) )
    end

    def start_th( attrs )
      elem = TableCell::new( 'th', attrs_to_hash( attrs ) )
      @table_stack.last.add_cell( elem )
      starttag( elem )
    end

    def start_td( attrs )
      elem = TableCell::new( 'td', attrs_to_hash( attrs ) )
      @table_stack.last.add_cell( elem )
      starttag( elem )
    end

    # do ( start only )
    def dotag( tag, attrs )
      elem = HtmlElem::new( tag, attrs_to_hash( attrs ) )
      @elem_stack.last.children.push elem
    end

    SingleTags.each do |t|
      eval "def do_#{t}( attrs ); dotag( '#{t}', attrs ); end"
    end

    # special do ( input, frame, ... )

    def do_input( attrs )
      ah = attrs_to_hash attrs
      case ah['type']
        when 'submit'
          elem = InputSubmit::new( ah )
        when 'reset'
          elem = InputReset::new( ah )
        when 'checkbox'
          elem = InputCheckbox::new( ah )
        when 'radio'
          elem = InputRadio::new( ah )
        when 'image'
          elem = InputImage::new( ah )
        when 'file'
          elem = InputFile::new( ah )
        else # text
          elem = Input::new( ah )
      end
	  
      @form_stack.last.add_param elem if @form_stack.last
      @elem_stack.last.children.push elem
    end

    def do_frame( attrs )
      ah = attrs_to_hash attrs
      ah['src'] = complete_url( ah['src'], @response.url )
      elem = Frame::new( ah )
      @response.add_frame( elem )
    end

    def do_img( attrs )
      ah = attrs_to_hash attrs
      elem = Image::new( ah )
      @response.add_image( elem )
      @elem_stack.last.children.push elem
    end

    # ignore tag
    def ignore( arr )
      arr.each do |t|
        self.instance_eval( "def start_#{t}( a ); end" )
        self.instance_eval( "def end_#{t}; end" )
        self.instance_eval( "def do_#{t}( a ); end" )
      end
    end

    def self::ignore( arr )
      @@ignoretags = arr
    end

    # utils

    def attrs_to_hash( attrs )
      h = {}
      for a, v in attrs
        v = v.split('"')[1]
        h[a] = v ? v : ''
      end
      h
    end

  end

end
