package com.shanyu.voicewikilib.htmlparse;

import com.shanyu.voicewikilib.utils.MyLogger;
import java.util.ArrayList;
import java.util.Iterator;

/* loaded from: classes.dex */
public class WikiHTMLExtractor extends HTMLExtractor {
    private ArrayList<LanguageHolder> mLangUrls;
    private static boolean FEATURE_COUNT_SECTIONS = true;
    private static String DIV_ATTR_FOOTER_1 = "id=\"footer\"";
    private static String DIV_ATTR_FOOTER_2 = "class=\"section\" id=\"mw-mf-language-section\"";
    private static String LANG_SEL_DIV_ID = "content_language";

    public WikiHTMLExtractor() {
        this.mLangUrls = null;
        this.mLangUrls = new ArrayList<>();
        this.mIsExtractHref = true;
        super.addSkipTag("div", "id", "mw-mf-page-left");
        super.addSkipTag("div", "class", "mw-specialpage-summary");
        super.addSkipTag("div", "id", "results");
        super.addSkipTag("ul", "class", "hlist");
        super.addSkipTag("a", "class", "edit-page");
        super.addSkipTag("div", "class", "search-types");
        super.addSkipTag("p", "class", "mw-search-pager-bottom");
        super.addSkipTag("script", "*", "*");
    }

    public String[] getLangCodeArray() {
        ArrayList arrayList = new ArrayList();
        Iterator<LanguageHolder> it = this.mLangUrls.iterator();
        while (it.hasNext()) {
            arrayList.add(it.next().code);
        }
        return (String[]) arrayList.toArray(new String[0]);
    }

    public String[] getLangStrArray() {
        ArrayList arrayList = new ArrayList();
        Iterator<LanguageHolder> it = this.mLangUrls.iterator();
        while (it.hasNext()) {
            arrayList.add(it.next().display);
        }
        return (String[]) arrayList.toArray(new String[0]);
    }

    public String[] getLangUrlArray() {
        ArrayList arrayList = new ArrayList();
        Iterator<LanguageHolder> it = this.mLangUrls.iterator();
        while (it.hasNext()) {
            arrayList.add(it.next().href);
        }
        return (String[]) arrayList.toArray(new String[0]);
    }

    public boolean hasLangUrls() {
        return this.mLangUrls.size() > 0;
    }

    @Override // com.shanyu.voicewikilib.htmlparse.HTMLExtractor
    protected void notifyABegin() {
    }

    @Override // com.shanyu.voicewikilib.htmlparse.HTMLExtractor
    protected void notifyAEnd() {
        if (this.mCurDivId.equals(LANG_SEL_DIV_ID)) {
            String curAttribute = getCurAttribute("href");
            String curAttribute2 = getCurAttribute("lang");
            String substring = this.mCurBuf.substring(this.mCurAStartPos);
            if (curAttribute.length() <= 0 || curAttribute2.length() <= 1 || substring.length() <= 0 || curAttribute.charAt(0) != '/' || !curAttribute.contains(".m.wikipedia.org")) {
                return;
            }
            this.mLangUrls.add(new LanguageHolder(curAttribute2, curAttribute, substring));
        }
    }

    @Override // com.shanyu.voicewikilib.htmlparse.HTMLExtractor
    protected void notifyDivBegin() {
        if (this.mCurAttr.toString().startsWith(DIV_ATTR_FOOTER_1) || this.mCurAttr.toString().startsWith(DIV_ATTR_FOOTER_2)) {
            MyLogger.d("reached div DIV_ATTR_FOOTER");
            this.mMaxNum = this.mNum;
        }
    }

    @Override // com.shanyu.voicewikilib.htmlparse.HTMLExtractor
    protected void notifyLiBegin() {
        this.mSkipParentheses = false;
    }

    @Override // com.shanyu.voicewikilib.htmlparse.HTMLExtractor
    protected void notifyLiEnd() {
        this.mSkipPlain = false;
        this.mSkipParentheses = true;
    }

    @Override // com.shanyu.voicewikilib.htmlparse.HTMLExtractor
    public void reset() {
        super.reset();
        this.mLangUrls.clear();
    }

    @Override // com.shanyu.voicewikilib.htmlparse.HTMLExtractor
    protected String tweakText() {
        String sb = this.mCurBuf.toString();
        StringBuilder sb2 = new StringBuilder(128);
        if (!FEATURE_COUNT_SECTIONS) {
            return sb;
        }
        if (this.mSCurTag == 258) {
            this.mH2++;
            this.mH4 = 0;
            this.mH3 = 0;
            sb2.append(this.mH2);
            sb2.append(" ");
            sb2.append(sb);
            return sb2.toString();
        }
        if (this.mSCurTag == 259) {
            this.mH3++;
            this.mH4 = 0;
            sb2.append(this.mH2);
            sb2.append(".");
            sb2.append(this.mH3);
            sb2.append(" ");
            sb2.append(sb.trim());
            return sb2.toString();
        }
        if (this.mSCurTag != 260) {
            return sb;
        }
        this.mH4++;
        sb2.append(this.mH2);
        sb2.append(".");
        sb2.append(this.mH3);
        sb2.append(".");
        sb2.append(this.mH4);
        sb2.append(" ");
        sb2.append(sb.trim());
        return sb2.toString();
    }
}
