package com.shanyu.voicewikilib.htmlparse;

import com.shanyu.voicewikilib.utils.MyLogger;
import java.util.ArrayList;

/* loaded from: classes.dex */
public class HTMLExtractor {
    public static final int INVAL_HEADER_LEVEL = 0;
    private static int MAX_ELEMENT_SIZE = 3000;
    private static int MAX_NUM_ELEMENT = 1000;
    public static final int TAG_BLOCKQUOTE = 3;
    public static final int TAG_DL = 4;
    public static final int TAG_H2 = 258;
    public static final int TAG_H3 = 259;
    public static final int TAG_H4 = 260;
    public static final int TAG_H_LEVEL_MASK = 255;
    public static final int TAG_H_MASK = 256;
    public static final int TAG_IGNORE = 0;
    public static final int TAG_LIST = 2;
    public static final int TAG_P = 1;
    protected String mSSkipTagName;
    protected boolean mIsExtractHref = false;
    protected int mNum = 0;
    protected int mMaxNum = MAX_NUM_ELEMENT;
    protected int mS = 0;
    protected int mS00 = 0;
    protected int mS01 = 0;
    protected int mS1 = 0;
    protected int mSNumTagSkip = 0;
    protected int mSCurTag = 0;
    protected StringBuilder mTagBuf = new StringBuilder(8);
    protected StringBuilder mCurBuf = new StringBuilder(MAX_ELEMENT_SIZE);
    protected StringBuilder mCurAttr = new StringBuilder(256);
    protected String mCurHref = null;
    protected String mCurDivId = null;
    protected int mCurAStartPos = 0;
    protected ArrayList<HReference> mCurHrefs = new ArrayList<>();
    protected int mNumHrefInLi = 0;
    protected int mNumLiInUL = 0;
    protected boolean mSkipPlain = false;
    public boolean mSkipParentheses = true;
    public boolean mSkipTables = true;
    protected int mH2 = 0;
    protected int mH3 = 0;
    protected int mH4 = 0;
    protected ArrayList<HtmlTagFilter> mSkipTags = new ArrayList<>();
    protected boolean mInBlockTag = false;

    /* loaded from: classes.dex */
    public static class Element {
        private ArrayList<HReference> mHrefs;
        private String mSecId;
        private int mTag;
        private String mText;

        public Element() {
        }

        public Element(String str) {
            init(str, 1, new ArrayList<>(), null);
        }

        public Element(String str, int i) {
            init(str, i, new ArrayList<>(), null);
        }

        public Element(String str, int i, String str2) {
            init(str, i, new ArrayList<>(), str2);
        }

        public Element(String str, int i, ArrayList<HReference> arrayList) {
            init(str, i, arrayList, null);
        }

        public int getHeaderLevel() {
            if (isHeader()) {
                return this.mTag & 255;
            }
            return 0;
        }

        public String getHref(int i) {
            return this.mHrefs.get(i).getHref();
        }

        public String getHrefText(int i) {
            return this.mHrefs.get(i).getText();
        }

        public int getNumHref() {
            return this.mHrefs.size();
        }

        public String getSectionId() {
            return this.mSecId;
        }

        public String getText() {
            return this.mText;
        }

        public void init(String str, int i, ArrayList<HReference> arrayList, String str2) {
            this.mText = str;
            this.mTag = i;
            this.mHrefs = arrayList;
            this.mSecId = str2;
        }

        public boolean isHeader() {
            return HTMLExtractor.isHeader(this.mTag);
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: classes.dex */
    public static class HReference {
        public String href;
        public String text;

        public HReference(String str, String str2) {
            this.href = str;
            this.text = str2;
        }

        public String getHref() {
            return this.href;
        }

        public String getText() {
            return this.text;
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    /* loaded from: classes.dex */
    public static class HtmlTagFilter {
        public String prop;
        public String tag;
        public String value;

        public HtmlTagFilter(String str, String str2, String str3) {
            this.tag = str;
            this.prop = str2;
            this.value = str3;
        }
    }

    private void addToResult(ArrayList<Element> arrayList) {
        if (this.mCurBuf.length() != 0) {
            if (this.mNum >= this.mMaxNum) {
                return;
            }
            if (this.mCurBuf.length() > MAX_ELEMENT_SIZE) {
                this.mCurBuf.setLength(MAX_ELEMENT_SIZE);
            }
            String tweakText = tweakText();
            if (tweakText != "") {
                if (isHeader(this.mSCurTag)) {
                    arrayList.add(new Element(tweakText, this.mSCurTag, getCurAttribute("id")));
                } else if (this.mIsExtractHref) {
                    arrayList.add(new Element(tweakText, this.mSCurTag, this.mCurHrefs));
                    this.mCurHrefs = new ArrayList<>();
                } else {
                    arrayList.add(new Element(tweakText, this.mSCurTag));
                }
                this.mCurBuf.setLength(0);
                this.mNum++;
            }
        }
        this.mSCurTag = 0;
    }

    private void checkSkipTagBegin(String str) {
        if (this.mSNumTagSkip > 0) {
            if (str.equals(this.mSSkipTagName)) {
                this.mSNumTagSkip++;
            }
        } else if (this.mSkipTables && str.equals("table")) {
            this.mSNumTagSkip = 1;
            this.mSSkipTagName = str;
        } else if (str.equals("button")) {
            this.mSNumTagSkip = 1;
            this.mSSkipTagName = str;
        } else if (this.mNum == 0 && ((str.equals("div") || str.equals("span")) && this.mCurAttr.indexOf("display: none;") > 0)) {
            this.mSNumTagSkip = 1;
            this.mSSkipTagName = str;
        }
        if (this.mSNumTagSkip == 0) {
            for (int i = 0; i < this.mSkipTags.size(); i++) {
                HtmlTagFilter htmlTagFilter = this.mSkipTags.get(i);
                if (htmlTagFilter.tag.equals(str) && (htmlTagFilter.prop.equals("*") || getCurAttribute(htmlTagFilter.prop).startsWith(htmlTagFilter.value) || getCurAttribute(htmlTagFilter.prop).contains(' ' + htmlTagFilter.value + ' ') || getCurAttribute(htmlTagFilter.prop).endsWith(' ' + htmlTagFilter.value))) {
                    this.mSNumTagSkip = 1;
                    this.mSSkipTagName = str;
                    MyLogger.vv("skipped div: ", htmlTagFilter.value);
                    return;
                }
            }
        }
    }

    private void handleTag(ArrayList<Element> arrayList) {
        String sb = this.mTagBuf.toString();
        if (sb.startsWith("/")) {
            handleTagEnd(sb.substring(1).toLowerCase(), arrayList);
        } else {
            handleTagBegin(sb.toLowerCase(), arrayList);
        }
    }

    private void handleTagBegin(String str, ArrayList<Element> arrayList) {
        checkSkipTagBegin(str);
        if (this.mSNumTagSkip <= 0 && !this.mInBlockTag) {
            if (str.equals("p")) {
                startTagClearBuf(1);
                return;
            }
            if (str.equals("ul") || str.equals("ol")) {
                if (this.mSCurTag == 2) {
                    this.mS01 = 0;
                    this.mS00 = 0;
                    addToResult(arrayList);
                }
                this.mNumLiInUL = 0;
                startTagClearBuf(2);
                return;
            }
            if (str.startsWith("h")) {
                if (str.charAt(1) == '2') {
                    startTagClearBuf(TAG_H2);
                    return;
                } else if (str.charAt(1) == '3') {
                    startTagClearBuf(TAG_H3);
                    return;
                } else {
                    if (str.charAt(1) == '4') {
                        startTagClearBuf(TAG_H4);
                        return;
                    }
                    return;
                }
            }
            if (str.equals("li")) {
                if (this.mIsExtractHref && this.mSCurTag == 2) {
                    this.mNumHrefInLi = 0;
                    this.mNumLiInUL++;
                    this.mCurBuf.append("(" + this.mNumLiInUL + ") ");
                }
                notifyLiBegin();
                return;
            }
            if (str.equals("a")) {
                if (this.mIsExtractHref && this.mSCurTag == 2 && this.mNumHrefInLi == 0) {
                    this.mCurAStartPos = this.mCurBuf.length();
                    this.mCurHref = getCurAttribute("href");
                }
                notifyABegin();
                return;
            }
            if (str.equals("div")) {
                this.mCurDivId = getCurAttribute("id");
                notifyDivBegin();
            } else if (str.equals("blockquote")) {
                this.mInBlockTag = true;
                startTagClearBuf(3);
            } else if (str.equals("dl")) {
                this.mInBlockTag = true;
                startTagClearBuf(4);
            }
        }
    }

    private void handleTagEnd(String str, ArrayList<Element> arrayList) {
        if (this.mSNumTagSkip > 0) {
            if (str.equals(this.mSSkipTagName)) {
                this.mSNumTagSkip--;
                return;
            }
            return;
        }
        if (this.mInBlockTag) {
            if (str.equals("blockquote")) {
                addToResult(arrayList);
                this.mInBlockTag = false;
            } else {
                if (!str.equals("dl")) {
                    return;
                }
                addToResult(arrayList);
                this.mInBlockTag = false;
            }
        }
        if (str.equals("div")) {
            this.mCurDivId = "";
            return;
        }
        if (str.equals("p") || str.equals("ul") || str.equals("ol") || str.equals("h2") || str.equals("h3") || str.equals("h4")) {
            this.mS01 = 0;
            this.mS00 = 0;
            addToResult(arrayList);
            return;
        }
        if (str.equals("li")) {
            if (this.mSCurTag == 2 && this.mCurBuf.length() != 0) {
                this.mCurBuf.append("\n\n");
                if (this.mNumHrefInLi == 0) {
                    MyLogger.vv("no href in <li>");
                    this.mCurHrefs.add(new HReference("", ""));
                }
            }
            this.mS01 = 0;
            this.mS00 = 0;
            notifyLiEnd();
            return;
        }
        if (str.equals("html")) {
            addToResult(arrayList);
            return;
        }
        if (str.equals("a")) {
            if (this.mIsExtractHref && this.mSCurTag == 2 && this.mNumHrefInLi == 0) {
                this.mCurHrefs.add(new HReference(this.mCurHref, this.mCurBuf.substring(this.mCurAStartPos, this.mCurBuf.length())));
                this.mNumHrefInLi++;
            }
            notifyAEnd();
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static boolean isHeader(int i) {
        return (i & 256) != 0;
    }

    private void processOneChar(char c, ArrayList<Element> arrayList) {
        switch (this.mS) {
            case 0:
                if (c == '<') {
                    this.mS = 1;
                    this.mS1 = 0;
                    this.mTagBuf.setLength(0);
                    if (this.mCurBuf.length() <= 0 || this.mCurBuf.charAt(this.mCurBuf.length() - 1) == ' ') {
                        return;
                    }
                    this.mCurBuf.append(' ');
                    return;
                }
                switch (this.mS00) {
                    case 0:
                        if (c == '&') {
                            this.mS00 = 1;
                            return;
                        }
                        if (c == '[') {
                            this.mS00 = 2;
                            this.mS01 = 0;
                            return;
                        } else if (c == '(' && this.mSkipParentheses) {
                            this.mS00 = 3;
                            this.mS01 = 0;
                            return;
                        } else {
                            if (this.mSNumTagSkip != 0 || this.mSkipPlain) {
                                return;
                            }
                            this.mCurBuf.append(c);
                            return;
                        }
                    case 1:
                        if (c == ';') {
                            this.mS00 = 0;
                            return;
                        }
                        return;
                    case 2:
                        if (c != ']') {
                            if (c == '[') {
                                this.mS01++;
                                return;
                            }
                            return;
                        } else if (this.mS01 == 0) {
                            this.mS00 = 0;
                            return;
                        } else {
                            this.mS01--;
                            return;
                        }
                    case 3:
                        if (c != ')') {
                            if (c == '(') {
                                this.mS01++;
                                return;
                            }
                            return;
                        } else if (this.mS01 == 0) {
                            this.mS00 = 0;
                            return;
                        } else {
                            this.mS01--;
                            return;
                        }
                    default:
                        MyLogger.e("fatal error: mS00 out of range");
                        return;
                }
            case 1:
                if (c == '>') {
                    this.mS = 0;
                    handleTag(arrayList);
                    return;
                } else if (this.mS1 == 1) {
                    this.mCurAttr.append(c);
                    return;
                } else if (c != ' ') {
                    this.mTagBuf.append(c);
                    return;
                } else {
                    this.mS1 = 1;
                    this.mCurAttr.setLength(0);
                    return;
                }
            default:
                MyLogger.e("fatal error: mS out of range");
                return;
        }
    }

    private void startTagClearBuf(int i) {
        this.mSCurTag = i;
        this.mCurBuf.setLength(0);
    }

    public void addSkipTag(String str, String str2, String str3) {
        this.mSkipTags.add(new HtmlTagFilter(str, str2, str3));
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public String getCurAttribute(String str) {
        try {
            int indexOf = this.mCurAttr.indexOf(str) + str.length() + 2;
            char charAt = this.mCurAttr.charAt(indexOf - 1);
            if (charAt != '\'' && charAt != '\"') {
                return "";
            }
            return this.mCurAttr.substring(indexOf, this.mCurAttr.indexOf(Character.toString(charAt), indexOf));
        } catch (Exception e) {
            return "";
        }
    }

    protected void notifyABegin() {
    }

    protected void notifyAEnd() {
    }

    protected void notifyDivBegin() {
    }

    protected void notifyLiBegin() {
    }

    protected void notifyLiEnd() {
    }

    public ArrayList<Element> processHTML(String str) {
        ArrayList<Element> arrayList = new ArrayList<>();
        MyLogger.vv(str);
        for (int i = 0; i < str.length(); i++) {
            processOneChar(str.charAt(i), arrayList);
        }
        return arrayList;
    }

    public void reset() {
        MyLogger.vv("in HTMLExtractor.reset");
        this.mSNumTagSkip = 0;
        this.mS1 = 0;
        this.mS01 = 0;
        this.mS00 = 0;
        this.mS = 0;
        this.mH4 = 0;
        this.mH3 = 0;
        this.mH2 = 0;
        this.mNum = 0;
        this.mMaxNum = MAX_NUM_ELEMENT;
        this.mSCurTag = 0;
        this.mCurBuf.setLength(0);
        this.mSkipParentheses = true;
        this.mSkipTables = true;
        this.mCurHrefs = new ArrayList<>();
        this.mCurDivId = "";
    }

    protected String tweakText() {
        return this.mCurBuf.toString();
    }
}
