package org.apache.tika.parser.html.charsetdetector;

import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.AbstractMap;
import java.util.BitSet;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* JADX INFO: Access modifiers changed from: package-private */
/* loaded from: classes2.dex */
public class PreScanner {
    private static final byte EQUAL = 61;
    private static final byte LOWER_Z = 122;
    private static final byte SLASH = 47;
    private static final BitSet SPACE_OR_SLASH;
    private static final BitSet SPACE_OR_TAG_END;
    private static final BitSet SPECIAL_TAGS;
    private static final byte UPPER_A = 65;
    private static final byte UPPER_Z = 90;
    private static final byte[] UTF16_BE_BOM;
    private static final byte[] UTF16_LE_BOM;
    private static final byte[] UTF8_BOM;
    private static final BitSet WHITESPACE;
    private CharsetDetectionResult detectedCharset = CharsetDetectionResult.notFound();
    private BufferedInputStream stream;
    private static final Pattern CHARSET_PATTERN = Pattern.compile("charset\\s*=\\s*([\"']?)([^\"'\\s;]+)\\1");
    private static final byte TAG_START = 60;
    private static final byte[] COMMENT_START = {TAG_START, 33, 45, 45};
    private static final byte TAG_END = 62;
    private static final byte[] COMMENT_END = {45, 45, TAG_END};
    private static final byte LOWER_A = 97;
    private static final byte[] META_TAG_START = {TAG_START, 109, 101, 116, LOWER_A};
    private static final BitSet QUOTE = bitSet(34, 39);

    static {
        BitSet bitSet = bitSet(9, 10, 12, 13, 13, 32);
        WHITESPACE = bitSet;
        SPACE_OR_TAG_END = bitSet(bitSet, 62);
        SPACE_OR_SLASH = bitSet(bitSet, 47);
        SPECIAL_TAGS = bitSet(33, 47, 63);
        UTF8_BOM = new byte[]{-17, -69, -65};
        UTF16_BE_BOM = new byte[]{-2, -1};
        UTF16_LE_BOM = new byte[]{-1, -2};
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public PreScanner(InputStream inputStream) {
        this.stream = new BufferedInputStream(inputStream);
    }

    private static BitSet bitSet(BitSet bitSet, int... iArr) {
        BitSet bitSet2 = (BitSet) bitSet.clone();
        for (int i10 : iArr) {
            bitSet2.set(i10);
        }
        return bitSet2;
    }

    private static BitSet bitSet(int... iArr) {
        BitSet bitSet = new BitSet(255);
        for (int i10 : iArr) {
            bitSet.set(i10);
        }
        return bitSet;
    }

    private static boolean contains(BitSet bitSet, byte b10) {
        return bitSet.get(b10 & 255);
    }

    private boolean expect(byte... bArr) throws IOException {
        this.stream.mark(bArr.length);
        for (byte b10 : bArr) {
            if (read() != b10) {
                this.stream.reset();
                return false;
            }
        }
        return true;
    }

    private Map.Entry<String, String> getAttribute() throws IOException {
        String attributeName = getAttributeName();
        if (attributeName == null) {
            return null;
        }
        if (!expect(EQUAL)) {
            return new AbstractMap.SimpleEntry(attributeName, "");
        }
        skipAll(WHITESPACE);
        return new AbstractMap.SimpleEntry(attributeName, getAttributeValue());
    }

    private String getAttributeName() throws IOException {
        skipAll(SPACE_OR_SLASH);
        if (expect(TAG_END)) {
            return null;
        }
        StringBuilder sb = new StringBuilder();
        while (true) {
            if ((peek() != 61 || sb.length() <= 0) && peek() != 62 && peek() != 47 && !skipAll(WHITESPACE)) {
                sb.append((char) getLowerCaseChar());
            }
        }
        return sb.toString();
    }

    private String getAttributeValue() throws IOException {
        StringBuilder sb = new StringBuilder();
        this.stream.mark(1);
        byte read = read();
        if (contains(QUOTE, read)) {
            while (true) {
                byte lowerCaseChar = getLowerCaseChar();
                if (lowerCaseChar == read) {
                    break;
                }
                sb.append((char) lowerCaseChar);
            }
        } else {
            this.stream.reset();
            while (true) {
                byte lowerCaseChar2 = getLowerCaseChar();
                if (contains(SPACE_OR_TAG_END, lowerCaseChar2)) {
                    break;
                }
                sb.append((char) lowerCaseChar2);
                this.stream.mark(1);
            }
            this.stream.reset();
        }
        return sb.toString();
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static String getEncodingFromMeta(String str) {
        Matcher matcher = CHARSET_PATTERN.matcher(str);
        if (matcher.find()) {
            return matcher.group(2);
        }
        return null;
    }

    private byte getLowerCaseChar() throws IOException {
        byte read = read();
        return (read < 65 || read > 90) ? read : (byte) (read + 32);
    }

    private byte peek() throws IOException {
        this.stream.mark(1);
        byte read = read();
        this.stream.reset();
        return read;
    }

    private boolean processAny() throws IOException {
        return this.stream.read() != -1;
    }

    private boolean processAtLeastOneByte() {
        try {
            if (!processComment() && !processMeta() && !processTag() && !processSpecialTag()) {
                if (!processAny()) {
                    return false;
                }
            }
            return true;
        } catch (IOException unused) {
            return false;
        }
    }

    private boolean processComment() throws IOException {
        if (!expect(COMMENT_START)) {
            return false;
        }
        if (!expect(TAG_END)) {
            skipUntil(COMMENT_END);
        }
        return true;
    }

    private boolean processMeta() throws IOException {
        this.stream.mark(6);
        if (!readCaseInsensitive(META_TAG_START) || !contains(SPACE_OR_SLASH, read())) {
            this.stream.reset();
            return false;
        }
        MetaProcessor metaProcessor = new MetaProcessor();
        while (true) {
            Map.Entry<String, String> attribute = getAttribute();
            if (attribute == null) {
                metaProcessor.updateDetectedCharset(this.detectedCharset);
                return true;
            }
            metaProcessor.processAttribute(attribute);
        }
    }

    private boolean processSpecialTag() throws IOException {
        this.stream.mark(2);
        if (read() == 60 && contains(SPECIAL_TAGS, read())) {
            skipUntil(TAG_END);
            return true;
        }
        this.stream.reset();
        return false;
    }

    private boolean processTag() throws IOException {
        this.stream.mark(3);
        if (read() == 60) {
            int read = this.stream.read();
            if (read == 47) {
                read = this.stream.read();
            }
            if ((97 <= read && read <= 122) || (65 <= read && read <= 90)) {
                do {
                    this.stream.mark(1);
                } while (!contains(SPACE_OR_TAG_END, read()));
                this.stream.reset();
                do {
                } while (getAttribute() != null);
                return true;
            }
        }
        this.stream.reset();
        return false;
    }

    private byte read() throws IOException {
        int read = this.stream.read();
        if (read != -1) {
            return (byte) read;
        }
        throw new IOException();
    }

    private boolean readCaseInsensitive(byte... bArr) throws IOException {
        for (byte b10 : bArr) {
            if (getLowerCaseChar() != b10) {
                return false;
            }
        }
        return true;
    }

    private boolean skipAll(BitSet bitSet) throws IOException {
        this.stream.mark(1);
        byte read = read();
        boolean z9 = false;
        while (contains(bitSet, read)) {
            this.stream.mark(1);
            read = read();
            z9 = true;
        }
        this.stream.reset();
        return z9;
    }

    private void skipUntil(byte... bArr) throws IOException {
        while (!expect(bArr) && this.stream.read() != -1) {
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public Charset detectBOM() {
        try {
            if (expect(UTF8_BOM)) {
                return StandardCharsets.UTF_8;
            }
            if (expect(UTF16_BE_BOM)) {
                return StandardCharsets.UTF_16BE;
            }
            if (expect(UTF16_LE_BOM)) {
                return StandardCharsets.UTF_16LE;
            }
            return null;
        } catch (IOException unused) {
            return null;
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public Charset scan() {
        while (processAtLeastOneByte()) {
            if (this.detectedCharset.isFound()) {
                return this.detectedCharset.getCharset();
            }
        }
        return null;
    }
}
