package io

import (
	"encoding/csv"
	"errors"
	"fmt"
	"io"

	"git.sr.ht/~charles/rq/util"
	"github.com/spkg/bom"
)

func init() {
	registerInputHandler("csv", func() InputHandler { return &CSVInputHandler{} })

	// aliases
	registerInputHandler("tsv", func() InputHandler {
		return &CSVInputHandler{
			comma:       '\t',
			comment:     0,
			skipLines:   0,
			headers:     false,
			infer:       true,
			initialized: true,
		}
	})
}

var _ InputHandler = &CSVInputHandler{}

// CSVInputHandler implements a CSV input InputHandler.
//
// The following options are supported:
//
// csv.comma (rune) the comma-character to use (default: ,)
//
// csv.comment (rune) the comment leader character to use (default: none)
//
// csv.skip_lines (int) number of leading lines to skip, ignored if negative
// (default: 0)
//
// csv.headers (bool) if true, the first row is treated as headers and used to
// generate object keys for the remaining rows (default: false)
//
// csv.infer (bool) if true, attempt to convert boolean, integer, or floating
// point values in the CSV to those types before serialization, otherwise leave
// all values as strings (default: true)
type CSVInputHandler struct {
	comma       rune
	comment     rune
	skipLines   int
	headers     bool
	infer       bool
	initialized bool
}

func (c *CSVInputHandler) init() {
	if c.initialized {
		return
	}
	c.comma = ','
	c.comment = 0
	c.skipLines = 0
	c.headers = false
	c.infer = true
	c.initialized = true
}

// Name implements InputHandler.Name().
func (c *CSVInputHandler) Name() string {
	return "csv"
}

// Parse implements InputHandler.Parse().
func (c *CSVInputHandler) Parse(reader io.Reader) (interface{}, error) {
	c.init()

	// This works around the fact that Go's encoding/csv does not handle
	// byte order markers correctly by default. See:
	// https://github.com/golang/go/issues/33887
	wrapped := bom.NewReader(reader)

	r := csv.NewReader(wrapped)
	r.Comma = c.comma
	r.Comment = c.comment
	r.FieldsPerRecord = -1

	// If we are running in header mode, or we are inferring values,
	// then we end up constructing our own row data, so the CSV
	// reader can reuse the record slice for performance. If we aren't
	// inferring values or using headers, then we can just directly insert
	// the slice from the CSV reader into our output data, which means
	// each slice needs to be freshly allocated.
	r.ReuseRecord = c.headers || c.infer

	for i := 0; i < c.skipLines; i++ {
		_, err := r.Read()
		if err != nil {
			return nil, err
		}
	}

	var headerRow []string

	if c.headers {
		// We don't want to consume the "header row" if there isn't
		// one there, naturally.
		row, err := r.Read()
		if err != nil {
			return nil, err
		}

		// ReuseRecords is on, so we want to make a deep copy of the
		// header row.
		headerRow = make([]string, len(row))
		for i, v := range row {
			headerRow[i] = v
		}
	}

	// Performance optimization: if we aren't inferring types or using
	// headers, just directly insert the row. Remember we set r.ReuseRecord
	// appropriately earlier for this reason.
	if !c.headers && !c.infer {
		data := make([][]string, 0)
		for {
			row, err := r.Read()
			if errors.Is(err, io.EOF) {
				return data, nil
			} else if err != nil {
				return nil, err
			}

			data = append(data, row)
		}
	}

	// Handle case where we use headers, whether with value inference or
	// not.
	if c.headers {
		data := make([]map[string]interface{}, 0)

		for {

			row, err := r.Read()
			if errors.Is(err, io.EOF) {
				return data, nil
			} else if err != nil {
				return nil, err
			}

			rowParsed := make(map[string]interface{})
			for i, v := range row {
				// If we are missing a header, just fill it in
				// with column N.
				//
				// This might be a performance optimization
				// for later - Sprintf may be too expensive.
				var key string
				if i < len(headerRow) {
					key = headerRow[i]
				} else {
					key = fmt.Sprintf("column%d", i)
				}

				if c.infer {
					rowParsed[key] = util.StringToValue(v)
				} else {
					rowParsed[key] = v
				}
			}

			data = append(data, rowParsed)
		}
	}

	// If execution reaches this point, then we know c.headers is false and
	// c.infer is true.
	data := make([]interface{}, 0)
	for {
		row, err := r.Read()
		if errors.Is(err, io.EOF) {
			return data, nil
		} else if err != nil {
			return nil, err
		}

		rowParsed := make([]interface{}, len(row))
		for i, v := range row {
			rowParsed[i] = util.StringToValue(v)
		}

		data = append(data, rowParsed)
	}
}

// SetOption implements InputHandler.SetOption().
func (c *CSVInputHandler) SetOption(name string, value string) error {
	c.init()

	if ((name == "csv.comma") || (name == "csv.comment")) && (len(value) > 1) {
		u, err := util.Unescape(value)
		if (err == nil) && (len(u) == 1) {
			value = u
		}
	}

	switch name {
	case "csv.comma":
		c.comma = []rune(value)[0]
	case "csv.comment":
		c.comment = []rune(value)[0]
	case "csv.skip_lines":
		c.skipLines = util.StringToValue(value).(int)
	case "csv.headers":
		c.headers = util.StringToValue(value).(bool)
	case "csv.infer":
		c.infer = util.StringToValue(value).(bool)
	}

	return nil
}
