package blanco.html.normalizer.util;

import blanco.commons.util.BlancoStringUtil;
import blanco.html.parser.BlancoHtmlContentSerializer;
import blanco.html.parser.BlancoHtmlParser;
import blanco.html.parser.BlancoHtmlParserFactory;
import blanco.html.parser.helper.BlancoHtmlNullContentHandler;
import blanco.xml.bind.valueobject.BlancoXmlDtd;
import java.io.BufferedWriter;
import java.io.IOException;
import java.io.StringReader;
import java.io.StringWriter;
import org.ccil.cowan.tagsoup.HTMLSchema;
import org.ccil.cowan.tagsoup.Parser;
import org.ccil.cowan.tagsoup.XMLWriter;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

/* loaded from: input_file:lib/blancohtmlnormalizer-0.1.5.jar:blanco/html/normalizer/util/BlancoHtmlNormalizerUtil.class */
public class BlancoHtmlNormalizerUtil {
    public static byte[] normalize(byte[] bArr) throws IOException {
        try {
            BlancoHtmlParser blancoHtmlParserFactory = BlancoHtmlParserFactory.getInstance();
            final BlancoXmlDtd blancoXmlDtd = new BlancoXmlDtd();
            blancoHtmlParserFactory.setLexicalHandler(new BlancoHtmlNullContentHandler() { // from class: blanco.html.normalizer.util.BlancoHtmlNormalizerUtil.1MyDtdHandler
                @Override // blanco.html.parser.helper.BlancoHtmlNullContentHandler, org.xml.sax.ext.LexicalHandler
                public void startDTD(String str, String str2, String str3) throws SAXException {
                    blancoXmlDtd.setName(str);
                    blancoXmlDtd.setPublicId(str2);
                    blancoXmlDtd.setSystemId(str3);
                }
            });
            blancoHtmlParserFactory.parse(bArr);
            return normalizeNumericCharacterReference(normalizeByTagSoup(new String(bArr, blancoHtmlParserFactory.getEncoding()), blancoXmlDtd).getBytes(blancoHtmlParserFactory.getEncoding()), blancoHtmlParserFactory.getEncoding()).getBytes(blancoHtmlParserFactory.getEncoding());
        } catch (SAXException e) {
            throw new IOException("SAXException例外発生: " + e.toString());
        }
    }

    private static String normalizeByTagSoup(String str, BlancoXmlDtd blancoXmlDtd) throws IOException, SAXException {
        Parser parser = new Parser();
        parser.setProperty(Parser.schemaProperty, new HTMLSchema());
        StringWriter stringWriter = new StringWriter();
        XMLWriter xMLWriter = new XMLWriter(stringWriter);
        parser.setContentHandler(xMLWriter);
        boolean z = false;
        if (BlancoStringUtil.null2Blank(blancoXmlDtd.getPublicId()).length() == 0) {
            xMLWriter.setOutputProperty(XMLWriter.DOCTYPE_PUBLIC, "-//W3C//DTD HTML 4.01 Transitional//EN");
        } else {
            xMLWriter.setOutputProperty(XMLWriter.DOCTYPE_PUBLIC, blancoXmlDtd.getPublicId());
            if (blancoXmlDtd.getPublicId().toUpperCase().indexOf("XHTML") >= 0) {
                z = true;
            }
        }
        if (!z) {
            parser.setFeature(Parser.namespacesFeature, false);
        }
        InputSource inputSource = new InputSource();
        inputSource.setCharacterStream(new StringReader(str));
        if (z) {
            xMLWriter.setOutputProperty(XMLWriter.METHOD, "xhtml");
        } else {
            xMLWriter.setOutputProperty(XMLWriter.METHOD, "html");
        }
        xMLWriter.setOutputProperty(XMLWriter.OMIT_XML_DECLARATION, "yes");
        parser.setFeature(Parser.defaultAttributesFeature, false);
        xMLWriter.setOutputProperty(XMLWriter.ENCODING, "Windows-31J");
        parser.setFeature(Parser.ignoreBogonsFeature, false);
        parser.setProperty(Parser.lexicalHandlerProperty, xMLWriter);
        parser.parse(inputSource);
        return stringWriter.toString();
    }

    private static String normalizeNumericCharacterReference(byte[] bArr, String str) throws IOException, SAXException {
        StringWriter stringWriter = new StringWriter();
        BlancoHtmlContentSerializer blancoHtmlContentSerializer = new BlancoHtmlContentSerializer();
        blancoHtmlContentSerializer.setWriter(new BufferedWriter(stringWriter));
        BlancoHtmlParser blancoHtmlParserFactory = BlancoHtmlParserFactory.getInstance();
        blancoHtmlParserFactory.setHandler(blancoHtmlContentSerializer);
        blancoHtmlParserFactory.setLexicalHandler(blancoHtmlContentSerializer);
        blancoHtmlParserFactory.setDTDHandler(blancoHtmlContentSerializer);
        blancoHtmlParserFactory.setEncoding(str);
        blancoHtmlParserFactory.parse(bArr);
        return stringWriter.toString();
    }
}
