package org.htmlparser.tests.lexerTests;

import java.io.IOException;
import java.net.URL;
import java.util.HashSet;
import org.htmlparser.Node;
import org.htmlparser.Parser;
import org.htmlparser.Remark;
import org.htmlparser.Tag;
import org.htmlparser.Text;
import org.htmlparser.lexer.Lexer;
import org.htmlparser.tags.ScriptTag;
import org.htmlparser.tags.StyleTag;
import org.htmlparser.tests.ParserTestCase;
import org.htmlparser.util.EncodingChangeException;
import org.htmlparser.util.NodeIterator;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;

/* loaded from: classes.dex */
public class LexerTests extends ParserTestCase {
    static final HashSet mAcceptable;

    static {
        System.setProperty("org.htmlparser.tests.lexerTests.LexerTests", "LexerTests");
        HashSet hashSet = new HashSet();
        mAcceptable = hashSet;
        hashSet.add("A");
        mAcceptable.add("BODY");
        mAcceptable.add("BR");
        mAcceptable.add("CENTER");
        mAcceptable.add("FONT");
        mAcceptable.add("HEAD");
        mAcceptable.add("HR");
        mAcceptable.add("HTML");
        mAcceptable.add("IMG");
        mAcceptable.add("P");
        mAcceptable.add("TABLE");
        mAcceptable.add("TD");
        mAcceptable.add("TITLE");
        mAcceptable.add("TR");
        mAcceptable.add("META");
        mAcceptable.add("STRONG");
        mAcceptable.add("FORM");
        mAcceptable.add("INPUT");
        mAcceptable.add("!DOCTYPE");
        mAcceptable.add("TBODY");
        mAcceptable.add("B");
        mAcceptable.add("DIV");
        mAcceptable.add("SCRIPT");
        mAcceptable.add("NOSCRIPT");
        mAcceptable.add("STYLE");
        mAcceptable.add("SPAN");
        mAcceptable.add("UL");
        mAcceptable.add("LI");
        mAcceptable.add("IFRAME");
        mAcceptable.add("LINK");
        mAcceptable.add("H1");
        mAcceptable.add("H3");
        mAcceptable.add("OBJECT");
        mAcceptable.add("PARAM");
        mAcceptable.add("EMBED");
    }

    public LexerTests(String str) {
        super(str);
    }

    public void checkTagNames(Node node) {
        if (node instanceof Tag) {
            Tag tag = (Tag) node;
            String tagName = tag.getTagName();
            if (!mAcceptable.contains(tagName)) {
                fail("unrecognized tag name \"" + tagName + "\"");
            }
            NodeList children = tag.getChildren();
            if (children != null) {
                for (int i = 0; i < children.size(); i++) {
                    checkTagNames(children.elementAt(i));
                }
            }
        }
    }

    public void testAttributedTag() throws ParserException {
        assertEquals("Tag contents wrong", "<head lang='en_US' dir=ltr\nprofile=\"http://htmlparser.sourceforge.org/dictionary.html\">", new Lexer("<head lang='en_US' dir=ltr\nprofile=\"http://htmlparser.sourceforge.org/dictionary.html\">").nextNode().toHtml());
    }

    public void testCommentInScript() throws ParserException {
        String str = "<script><!--document.write(\"en\");// --></script>";
        Parser parser = new Parser();
        parser.setInputHTML(str);
        NodeIterator elements = parser.elements();
        Node nextNode = elements.nextNode();
        if (nextNode == null) {
            fail("too few nodes");
        } else {
            assertStringEquals("bad parse", str, nextNode.toHtml());
        }
        assertTrue(nextNode instanceof ScriptTag);
        assertStringEquals("bad cdata", "<!--document.write(\"en\");// -->", ((ScriptTag) nextNode).getScriptCode());
        assertNull("too many nodes", elements.nextNode());
    }

    public void testConjoined() throws ParserException {
        createParser("<html><title>The Title\n</title><body>This is <a href=\"foo.html\">the body</a>.</body></html>");
        StringBuffer stringBuffer = new StringBuffer();
        NodeIterator elements = this.parser.elements();
        while (elements.hasMoreNodes()) {
            stringBuffer.append(elements.nextNode().toPlainTextString());
        }
        assertStringEquals("conjoined text", "The Title\nThis is the body.", stringBuffer.toString());
        createParser("<html><title>The Title</title>\n<body>This is <a href=\"foo.html\">the body</a>.</body></html>");
        StringBuffer stringBuffer2 = new StringBuffer();
        NodeIterator elements2 = this.parser.elements();
        while (elements2.hasMoreNodes()) {
            stringBuffer2.append(elements2.nextNode().toPlainTextString());
        }
        assertStringEquals("conjoined text", "The Title\nThis is the body.", stringBuffer2.toString());
        createParser("<html><title>The Title</title><body>\nThis is <a href=\"foo.html\">the body</a>.</body></html>");
        StringBuffer stringBuffer3 = new StringBuffer();
        NodeIterator elements3 = this.parser.elements();
        while (elements3.hasMoreNodes()) {
            stringBuffer3.append(elements3.nextNode().toPlainTextString());
        }
        assertStringEquals("conjoined text", "The Title\nThis is the body.", stringBuffer3.toString());
    }

    public void testDosEOL() throws ParserException {
        assertEquals("Text contents wrong", "Hello\r\nworld", ((Text) new Lexer("Hello\r\nworld").nextNode()).getText());
        assertEquals("Text contents wrong", "Hello\rworld", ((Text) new Lexer("Hello\rworld").nextNode()).getText());
    }

    public void testDosLineEndingInName() throws ParserException {
        this.parser = new Parser();
        this.parser.setInputHTML("<!\r\nMSIE->");
        Node nextNode = this.parser.elements().nextNode();
        if (nextNode == null) {
            fail("too few nodes");
            return;
        }
        assertNotNull("null node", nextNode);
        assertTrue(nextNode instanceof Tag);
        Tag tag = (Tag) nextNode;
        assertNotNull("null name", tag.getTagName());
        assertStringEquals("bad parse", "!", tag.getTagName());
    }

    public void testEOF_EOL() throws ParserException {
        assertEquals("Text contents wrong", "Hello world\n", ((Text) new Lexer("Hello world\n").nextNode()).getText());
        assertEquals("Text contents wrong", "Hello world\r", ((Text) new Lexer("Hello world\r").nextNode()).getText());
        assertEquals("Text contents wrong", "Hello world\r\n", ((Text) new Lexer("Hello world\r\n").nextNode()).getText());
    }

    public void testEscapedQuote() throws ParserException {
        Lexer lexer = new Lexer("\na='\\'';\n</script>");
        Node nextNode = lexer.nextNode(true);
        if (nextNode == null) {
            fail("too few nodes");
        } else {
            assertStringEquals("bad string", "\na='\\'';\n", nextNode.toHtml());
        }
        assertNotNull("too few nodes", lexer.nextNode(true));
        assertNull("too many nodes", lexer.nextNode(true));
    }

    public void testFidelity() throws ParserException, IOException {
        Lexer lexer = new Lexer(new URL("http://sourceforge.net").openConnection());
        StringBuffer stringBuffer = new StringBuffer(80000);
        int i = 0;
        while (true) {
            Node nextNode = lexer.nextNode();
            if (nextNode == null) {
                break;
            }
            String html = nextNode.toHtml();
            if (i != nextNode.getStartPosition()) {
                fail("non-contiguous" + html);
            }
            stringBuffer.append(html);
            i = nextNode.getEndPosition();
            if (stringBuffer.length() != i) {
                fail("text length differed after encountering node " + html);
            }
        }
        char[] charArray = lexer.getPage().getText().toCharArray();
        int length = stringBuffer.length();
        char[] cArr = new char[length];
        stringBuffer.getChars(0, stringBuffer.length(), cArr, 0);
        assertEquals("different amounts of text", charArray.length, length);
        for (int i2 = 0; i2 < charArray.length; i2++) {
            if (charArray[i2] != cArr[i2]) {
                fail("character differs at position " + i2 + ", expected <" + charArray[i2] + "> but was <" + cArr[i2] + ">");
            }
        }
    }

    public void testJIS() throws ParserException {
        Parser parser = new Parser("http://www.009.com/");
        try {
            NodeIterator elements = parser.elements();
            while (elements.hasMoreNodes()) {
                checkTagNames(elements.nextNode());
            }
        } catch (EncodingChangeException unused) {
            parser.reset();
            NodeIterator elements2 = parser.elements();
            while (elements2.hasMoreNodes()) {
                checkTagNames(elements2.nextNode());
            }
        }
    }

    public void testJsp() throws ParserException {
        Lexer lexer = new Lexer("<% out.urlEncode('abc') + \"<br>\" + out.urlEncode('xyz') %>");
        Node nextNode = lexer.nextNode();
        if (nextNode == null) {
            fail("too few nodes");
        } else {
            assertStringEquals("bad html", "<% out.urlEncode('abc') + \"<br>\" + out.urlEncode('xyz') %>", nextNode.toHtml());
        }
        assertNull("too many nodes", lexer.nextNode());
    }

    public void testPI() throws ParserException {
        Lexer lexer = new Lexer("<?php print(\"<p>Hello World!</p>\"); ?>");
        Node nextNode = lexer.nextNode();
        if (nextNode == null) {
            fail("too few nodes");
        } else {
            assertStringEquals("bad html", "<?php print(\"<p>Hello World!</p>\"); ?>", nextNode.toHtml());
        }
        assertNull("too many nodes", lexer.nextNode());
    }

    public void testPureTag() throws ParserException {
        assertEquals("Tag contents wrong", "<head>", new Lexer("<head>").nextNode().toHtml());
        Lexer lexer = new Lexer("<head><body>");
        assertEquals("Tag contents wrong", "<head>", lexer.nextNode().toHtml());
        assertEquals("Tag contents wrong", "<body>", lexer.nextNode().toHtml());
    }

    public void testPureText() throws ParserException {
        assertEquals("Text contents wrong", "Hello world", ((Text) new Lexer("Hello world").nextNode()).getText());
    }

    public void testRemark() throws ParserException {
        assertEquals("Tag contents wrong", "<!-- This is a comment -->", ((Remark) new Lexer("<!-- This is a comment -->").nextNode()).toHtml());
        assertEquals("Tag contents wrong", "<!-- This is a comment --  >", ((Remark) new Lexer("<!-- This is a comment --  >").nextNode()).toHtml());
        assertEquals("Tag contents wrong", "<!-- This is a\nmultiline comment -->", ((Remark) new Lexer("<!-- This is a\nmultiline comment -->").nextNode()).toHtml());
        assertEquals("Tag contents wrong", "<!-- This is a comment -->", ((Remark) new Lexer("<!-- This is a comment --><head>").nextNode()).toHtml());
        assertEquals("Tag contents wrong", "<!-- This is a comment --  >", ((Remark) new Lexer("<!-- This is a comment --  ><head>").nextNode()).toHtml());
        assertEquals("Tag contents wrong", "<!-- This is a\nmultiline comment -->", ((Remark) new Lexer("<!-- This is a\nmultiline comment --><head>").nextNode()).toHtml());
    }

    public void testStackOverflow() throws ParserException {
        createParser("<a href = \"http://test.com\" />");
        NodeIterator elements = this.parser.elements();
        while (elements.hasMoreNodes()) {
            assertStringEquals("no overflow", "<a href = \"http://test.com\" />", elements.nextNode().toHtml());
        }
        createParser("<a href=\"http://test.com\"/>");
        NodeIterator elements2 = this.parser.elements();
        while (elements2.hasMoreNodes()) {
            assertStringEquals("no overflow", "<a href=\"http://test.com\"/>", elements2.nextNode().toHtml());
        }
        createParser("<a href = \"http://test.com\"/>");
        NodeIterator elements3 = this.parser.elements();
        while (elements3.hasMoreNodes()) {
            assertStringEquals("no overflow", "<a href = \"http://test.com\"/>", elements3.nextNode().toHtml());
        }
    }

    public void testTagStops() throws ParserException {
        String[] strArr = {"Hello world", "Hello world\n", "Hello world\r\n", "Hello world\r"};
        String[] strArr2 = {"<head>", "</head>", "<%=head%>", "<?php ?>", "<!--head-->"};
        for (int i = 0; i < 4; i++) {
            for (int i2 = 0; i2 < 5; i2++) {
                assertEquals("Text contents wrong", strArr[i], ((Text) new Lexer(String.valueOf(strArr[i]) + strArr2[i2]).nextNode()).getText());
            }
        }
    }

    public void testUnixEOL() throws ParserException {
        assertEquals("Text contents wrong", "Hello\nworld", ((Text) new Lexer("Hello\nworld").nextNode()).getText());
    }

    public void testUrlInStyle() throws ParserException {
        String str = "<style>.eSDot {background-image:url(http://di.image.eshop.msn.com/img/sys/dot.gif)}</style>";
        Parser parser = new Parser();
        parser.setInputHTML(str);
        NodeIterator elements = parser.elements();
        Node nextNode = elements.nextNode();
        if (nextNode == null) {
            fail("too few nodes");
        } else {
            assertStringEquals("bad parse", str, nextNode.toHtml());
        }
        assertTrue(nextNode instanceof StyleTag);
        assertStringEquals("bad cdata", ".eSDot {background-image:url(http://di.image.eshop.msn.com/img/sys/dot.gif)}", ((StyleTag) nextNode).getStyleCode());
        assertNull("too many nodes", elements.nextNode());
    }
}
