/*
 * Decompiled with CFR 0.152.
 */
package opennlp.tools.formats.nkjp;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.LinkedHashMap;
import java.util.Map;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import opennlp.tools.util.Span;
import opennlp.tools.util.XmlUtil;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

public class NKJPSegmentationDocument {
    Map<String, Map<String, Pointer>> segments = new LinkedHashMap<String, Map<String, Pointer>>();

    public Map<String, Map<String, Pointer>> getSegments() {
        return this.segments;
    }

    NKJPSegmentationDocument() {
    }

    NKJPSegmentationDocument(Map<String, Map<String, Pointer>> segments) {
        this();
        this.segments = segments;
    }

    public static NKJPSegmentationDocument parse(InputStream is) throws IOException {
        LinkedHashMap<String, Map<String, Pointer>> sentences = new LinkedHashMap<String, Map<String, Pointer>>();
        try {
            DocumentBuilder docBuilder = XmlUtil.createDocumentBuilder();
            Document doc = docBuilder.parse(is);
            XPathFactory xPathfactory = XPathFactory.newInstance();
            XPath xpath = xPathfactory.newXPath();
            XPathExpression SENT_NODES = xpath.compile("/teiCorpus/TEI/text/body/p/s");
            XPathExpression SEG_NODES = xpath.compile("./seg|./choice");
            XPathExpression SEG_NODES_ONLY = xpath.compile("./seg");
            NodeList nl = (NodeList)SENT_NODES.evaluate(doc, XPathConstants.NODESET);
            for (int i = 0; i < nl.getLength(); ++i) {
                Node sentnode = nl.item(i);
                String sentid = null;
                if (sentnode.getAttributes().getNamedItem("xml:id") != null) {
                    sentid = sentnode.getAttributes().getNamedItem("xml:id").getTextContent();
                }
                LinkedHashMap<String, Pointer> segments = new LinkedHashMap<String, Pointer>();
                NodeList segnl = (NodeList)SEG_NODES.evaluate(sentnode, XPathConstants.NODESET);
                for (int j = 0; j < segnl.getLength(); ++j) {
                    Node n = segnl.item(j);
                    if (n.getNodeName().equals("seg")) {
                        String segid = NKJPSegmentationDocument.xmlID(n);
                        Pointer pointer = NKJPSegmentationDocument.fromSeg(n);
                        segments.put(segid, pointer);
                        continue;
                    }
                    if (!n.getNodeName().equals("choice")) continue;
                    NodeList choices = n.getChildNodes();
                    for (int k = 0; k < choices.getLength(); ++k) {
                        if (choices.item(k).getNodeName().equals("nkjp:paren")) {
                            if (NKJPSegmentationDocument.checkRejectedParen(choices.item(k))) continue;
                            NodeList paren_segs = (NodeList)SEG_NODES_ONLY.evaluate(choices.item(k), XPathConstants.NODESET);
                            for (int l = 0; l < paren_segs.getLength(); ++l) {
                                String segid = NKJPSegmentationDocument.xmlID(paren_segs.item(l));
                                Pointer pointer = NKJPSegmentationDocument.fromSeg(paren_segs.item(l));
                                segments.put(segid, pointer);
                            }
                            continue;
                        }
                        if (!choices.item(k).getNodeName().equals("seg") || NKJPSegmentationDocument.checkRejected(choices.item(k))) continue;
                        String segid = NKJPSegmentationDocument.xmlID(choices.item(k));
                        Pointer pointer = NKJPSegmentationDocument.fromSeg(choices.item(k));
                        segments.put(segid, pointer);
                    }
                }
                sentences.put(sentid, segments);
            }
        }
        catch (IOException | XPathExpressionException | SAXException e) {
            throw new IOException("Failed to parse NKJP document", e);
        }
        return new NKJPSegmentationDocument(sentences);
    }

    static boolean checkRejected(Node n) {
        if (n.getAttributes() == null) {
            return false;
        }
        if (n.getAttributes().getNamedItem("nkjp:rejected") == null) {
            return false;
        }
        String rejected = n.getAttributes().getNamedItem("nkjp:rejected").getTextContent();
        return rejected.equals("true");
    }

    static boolean checkRejectedParen(Node n) {
        if (n.getChildNodes().getLength() == 0) {
            return false;
        }
        for (int i = 0; i < n.getChildNodes().getLength(); ++i) {
            Node m = n.getChildNodes().item(i);
            if (!m.getNodeName().equals("seg") || NKJPSegmentationDocument.checkRejected(m)) continue;
            return false;
        }
        return true;
    }

    static String xmlID(Node n) throws IOException {
        if (n.getAttributes() == null || n.getAttributes().getLength() < 1) {
            throw new IOException("Missing required attributes");
        }
        String id = null;
        if (n.getAttributes().getNamedItem("xml:id") != null) {
            id = n.getAttributes().getNamedItem("xml:id").getTextContent();
        }
        if (id == null) {
            throw new IOException("Missing xml:id attribute");
        }
        return id;
    }

    static Pointer fromSeg(Node n) throws IOException {
        int length;
        int offset;
        if (n.getAttributes() == null || n.getAttributes().getLength() < 2) {
            throw new IOException("Missing required attributes");
        }
        String ptr = null;
        if (n.getAttributes().getNamedItem("corresp") != null) {
            ptr = n.getAttributes().getNamedItem("corresp").getTextContent();
        }
        String spacing = "";
        if (n.getAttributes().getNamedItem("nkjp:nps") != null) {
            spacing = n.getAttributes().getNamedItem("nkjp:nps").getTextContent();
        }
        if (ptr == null) {
            throw new IOException("Missing required attribute");
        }
        boolean space_after = ptr.equals("yes");
        if (!ptr.contains("#") || !ptr.contains("(") || ptr.charAt(ptr.length() - 1) != ')') {
            throw new IOException("String " + ptr + " does not appear to be a valid NKJP corresp attribute");
        }
        int docend = ptr.indexOf(35);
        String document = ptr.substring(0, docend);
        int pointer_start = ptr.indexOf(40) + 1;
        String[] pieces = ptr.substring(pointer_start, ptr.length() - 1).split(",");
        if (pieces.length < 3 || pieces.length > 4) {
            throw new IOException("String " + ptr + " does not appear to be a valid NKJP corresp attribute");
        }
        String docid = pieces[0];
        if (pieces.length == 3) {
            offset = Integer.parseInt(pieces[1]);
            length = Integer.parseInt(pieces[2]);
        } else {
            int os1 = Integer.parseInt(pieces[1]);
            int os2 = Integer.parseInt(pieces[2]);
            offset = os1 * 1000 + os2;
            length = Integer.parseInt(pieces[3]);
        }
        return new Pointer(document, docid, offset, length, space_after);
    }

    static NKJPSegmentationDocument parse(File file) throws IOException {
        try (BufferedInputStream in = new BufferedInputStream(new FileInputStream(file));){
            NKJPSegmentationDocument nKJPSegmentationDocument = NKJPSegmentationDocument.parse(in);
            return nKJPSegmentationDocument;
        }
    }

    public static class Pointer {
        String doc;
        String id;
        int offset;
        int length;
        boolean space_after;

        public Pointer(String doc, String id, int offset, int length, boolean space_after) {
            this.doc = doc;
            this.id = id;
            this.offset = offset;
            this.length = length;
            this.space_after = space_after;
        }

        public Span toSpan() {
            return new Span(this.offset, this.offset + this.length);
        }

        public String toString() {
            return this.doc + "#string-range(" + this.id + "," + Integer.toString(this.offset) + "," + Integer.toString(this.length) + ")";
        }
    }
}

