package org.apache.tika.sax;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.Map;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.sax.StandardReference;
import thinlet.ThinletConstants;

/* loaded from: input_file:WEB-INF/lib/lucee.jar:bundles/org.apache.tika.core-1.28.3.jar:org/apache/tika/sax/StandardsText.class */
public class StandardsText {
    private static final String REGEX_HEADER = "(\\d{1,10}\\.(\\d{1,10}\\.?){0,10})\\p{Blank}+([A-Z]{1,256}(\\s[A-Z]+){0,256}){5,}";
    private static final String REGEX_APPLICABLE_DOCUMENTS = "(?i:.*APPLICABLE\\sDOCUMENTS|REFERENCE|STANDARD|REQUIREMENT|GUIDELINE|COMPLIANCE.*)";
    private static final String REGEX_IDENTIFIER = "(?<identifier>([0-9]{3,20}|([A-Z]+(-|_|\\.)?[0-9]{2,20}))((-|_|\\.)?[A-Z0-9]+){0,10})";
    private static final String REGEX_STANDARD_TYPE = "(\\s(?i:Publication|Standard))";
    private static final String REGEX_FALLBACK = "\\(?(?<mainOrganization>[A-Z]\\w{1,100})\\)?((\\s?(?<separator>\\/)\\s?)(\\w{1,100}\\s)*\\(?(?<secondOrganization>[A-Z]\\w{1,100})\\)?)?(\\s(?i:Publication|Standard))?(-|\\s)?(?<identifier>([0-9]{3,20}|([A-Z]+(-|_|\\.)?[0-9]{2,20}))((-|_|\\.)?[A-Z0-9]+){0,10})";
    private static final String REGEX_ORGANIZATION = StandardOrganizations.getOrganzationsRegex();
    private static final String REGEX_STANDARD = ".*" + REGEX_ORGANIZATION + ".+" + REGEX_ORGANIZATION + "?.*";

    public static ArrayList<StandardReference> extractStandardReferences(String str, double d) {
        return findStandards(str, findHeaders(str), d);
    }

    private static Map<Integer, String> findHeaders(String str) {
        TreeMap treeMap = new TreeMap();
        Matcher matcher = Pattern.compile(REGEX_HEADER).matcher(str);
        while (matcher.find()) {
            treeMap.put(Integer.valueOf(matcher.start()), matcher.group());
        }
        return treeMap;
    }

    private static ArrayList<StandardReference> findStandards(String str, Map<Integer, String> map, double d) {
        ArrayList<StandardReference> arrayList = new ArrayList<>();
        Matcher matcher = Pattern.compile(REGEX_FALLBACK).matcher(str);
        while (matcher.find()) {
            StandardReference.StandardReferenceBuilder secondOrganization = new StandardReference.StandardReferenceBuilder(matcher.group("mainOrganization"), matcher.group(Metadata.IDENTIFIER)).setSecondOrganization(matcher.group(ThinletConstants.SEPARATOR), matcher.group("secondOrganization"));
            double d2 = 0.25d;
            if (matcher.group().matches(REGEX_STANDARD)) {
                d2 = 0.25d + 0.25d;
            }
            if (matcher.group().matches(".*(\\s(?i:Publication|Standard)).*")) {
                d2 += 0.25d;
            }
            int i = 0;
            int i2 = 0;
            boolean z = false;
            Iterator<Map.Entry<Integer, String>> it = map.entrySet().iterator();
            while (it.hasNext() && !z) {
                i = i2;
                i2 = it.next().getKey().intValue();
                if (i2 > matcher.start()) {
                    z = true;
                }
            }
            if (map.get(Integer.valueOf(i)) != null && map.get(Integer.valueOf(i)).matches(REGEX_APPLICABLE_DOCUMENTS)) {
                d2 += 0.25d;
            }
            secondOrganization.setScore(d2);
            if (d2 >= d) {
                arrayList.add(secondOrganization.build());
            }
        }
        return arrayList;
    }
}
