package org.daisy.pipeline.nlp.ruledcategorizers;

import java.io.IOException;
import org.daisy.pipeline.nlp.RuleBasedTextCategorizer;
import org.daisy.pipeline.nlp.TextCategorizer;
import org.daisy.pipeline.nlp.impl.matchrules.NumberRangeMatchRule;
import org.daisy.pipeline.nlp.impl.matchrules.RegexMatchRule;

/* loaded from: input_file:org/daisy/pipeline/nlp/ruledcategorizers/RuledMultilangCategorizer.class */
public class RuledMultilangCategorizer extends RuleBasedTextCategorizer {
    public static int LOWEST_PRIORITY = 0;
    public static int COMMON_WORD_MAX_PRIORITY = 50;
    public static int SPACE_MAX_PRIORITY = 100;
    public static int QUOTE_MAX_PRIORITY = 125;
    public static int NUMBER_MAX_PRIORITY = 150;
    public static int ACRONYM_MAX_PRIORITY = 160;
    public static int ABBR_MAX_PRIORITY = 220;
    public static int WEBLINK_MAX_PRIORITY = 300;
    public static int SPACE_COMPOSED_MAX_PRIORITY = 500;
    public static int NUMBER_COMPOSED_MAX_PRIORITY = 600;
    public static int DICTIONARY_MAX_PRIORITY = 700;
    protected static String CommonWordPattern = "[@\\p{L}][-_@\\p{L}\\p{Nd}]*";
    protected static String Space = "";

    @Override // org.daisy.pipeline.nlp.TextCategorizer
    public void init(TextCategorizer.MatchMode matchMode) throws IOException {
        super.init(matchMode);
        RegexMatchRule regexMatchRule = new RegexMatchRule(TextCategorizer.Category.DATE, NUMBER_COMPOSED_MAX_PRIORITY, true, this.mMatchMode);
        regexMatchRule.init("([1-9][0-9]{1,3}|[0-9]{2})-(1[0-2]|0?[1-9])-(3[01]|[12]0|[0-2]?[1-9])(?![-\\p{L}\\p{Nd}])");
        addRule(regexMatchRule);
        RegexMatchRule regexMatchRule2 = new RegexMatchRule(TextCategorizer.Category.DATE, NUMBER_COMPOSED_MAX_PRIORITY, true, this.mMatchMode);
        regexMatchRule2.init("(1[0-2]|0?[1-9])-(3[01]|[12]0|[0-2]?[1-9])(?![-\\p{L}\\p{Nd}])");
        addRule(regexMatchRule2);
        RegexMatchRule regexMatchRule3 = new RegexMatchRule(TextCategorizer.Category.DATE, NUMBER_COMPOSED_MAX_PRIORITY, true, this.mMatchMode);
        regexMatchRule3.init("(3[01]|[12]0|[0-2]?[1-9])/(1[0-2]|0?[1-9])/([1-9][0-9]{1,3}|[0-9]{2})(?![/\\p{L}\\p{Nd}])");
        addRule(regexMatchRule3);
        RegexMatchRule regexMatchRule4 = new RegexMatchRule(TextCategorizer.Category.DATE, NUMBER_COMPOSED_MAX_PRIORITY, true, this.mMatchMode);
        regexMatchRule4.init("(3[01]|[12]0|[0-2]?[1-9])/(1[0-2]|0?[1-9])(?![/\\p{L}\\p{Nd}])");
        addRule(regexMatchRule4);
        addRule(new NumberRangeMatchRule(TextCategorizer.Category.RANGE, NUMBER_COMPOSED_MAX_PRIORITY, this.mMatchMode));
        RegexMatchRule regexMatchRule5 = new RegexMatchRule(TextCategorizer.Category.DATE, NUMBER_COMPOSED_MAX_PRIORITY, true, this.mMatchMode);
        regexMatchRule5.init("([1-9][0-9]{1,3}|[0-9]{2})-(1[0-2]|0?[1-9])(?![-\\p{L}\\p{Nd}])");
        addRule(regexMatchRule5);
        RegexMatchRule regexMatchRule6 = new RegexMatchRule(TextCategorizer.Category.DATE, NUMBER_COMPOSED_MAX_PRIORITY, true, this.mMatchMode);
        regexMatchRule6.init("(1[0-2]|0?[1-9])/([1-9][0-9]{1,3}|[0-9]{2})(?![/\\p{L}\\p{Nd}])");
        addRule(regexMatchRule6);
        RegexMatchRule regexMatchRule7 = new RegexMatchRule(TextCategorizer.Category.TIME, NUMBER_COMPOSED_MAX_PRIORITY, true, this.mMatchMode);
        regexMatchRule7.init("(2[0-4]|[01][0-9]):[0-6][0-9](?![0-9])");
        addRule(regexMatchRule7);
        String str = "(([1-9]{1,3}([,' ][0-9]{3})+|[1-9][0-9]*)(\\.[0-9]+)?)";
        RegexMatchRule regexMatchRule8 = new RegexMatchRule(TextCategorizer.Category.DIMENSIONS, NUMBER_COMPOSED_MAX_PRIORITY, true, this.mMatchMode);
        regexMatchRule8.init(str + "(x|[ ]x[ ])" + str + "(?![\\p{L}\\p{Nd}])");
        addRule(regexMatchRule8);
        RegexMatchRule regexMatchRule9 = new RegexMatchRule(TextCategorizer.Category.CURRENCY, NUMBER_COMPOSED_MAX_PRIORITY, false, this.mMatchMode);
        regexMatchRule9.init(str + "([\\$€£₤¥]|usd|euro[s]?)(?![\\p{L}\\p{Nd}])");
        addRule(regexMatchRule9);
        RegexMatchRule regexMatchRule10 = new RegexMatchRule(TextCategorizer.Category.CURRENCY, NUMBER_COMPOSED_MAX_PRIORITY, false, this.mMatchMode);
        regexMatchRule10.init("([\\$€£₤¥]|usd|euro[s]?)" + str + "(?![\\p{L}\\p{Nd}])");
        addRule(regexMatchRule10);
        RegexMatchRule regexMatchRule11 = new RegexMatchRule(TextCategorizer.Category.NUMBERING_ITEM, NUMBER_COMPOSED_MAX_PRIORITY, false, this.mMatchMode);
        regexMatchRule11.init("[0-9]+([-.][0-9]+)*\\.(?![\\p{L}\\p{Nd}])");
        addRule(regexMatchRule11);
        RegexMatchRule regexMatchRule12 = new RegexMatchRule(TextCategorizer.Category.QUANTITY, NUMBER_MAX_PRIORITY, true, this.mMatchMode);
        regexMatchRule12.init(str);
        addRule(regexMatchRule12);
        RegexMatchRule regexMatchRule13 = new RegexMatchRule(TextCategorizer.Category.IDENTIFIER, NUMBER_MAX_PRIORITY - 1, true, this.mMatchMode);
        regexMatchRule13.init("[0-9]+([-_:][0-9]+)*(?![\\p{L}\\p{Nd}])");
        addRule(regexMatchRule13);
        RegexMatchRule regexMatchRule14 = new RegexMatchRule(TextCategorizer.Category.SPACE, SPACE_MAX_PRIORITY, true, this.mMatchMode);
        regexMatchRule14.init("[" + Space + "]+");
        addRule(regexMatchRule14);
        RegexMatchRule regexMatchRule15 = new RegexMatchRule(TextCategorizer.Category.QUOTE, QUOTE_MAX_PRIORITY, true, this.mMatchMode);
        regexMatchRule15.init("[\\p{Pf}\\p{Pi}\"']");
        addRule(regexMatchRule15);
        RegexMatchRule regexMatchRule16 = new RegexMatchRule(TextCategorizer.Category.WEB_LINK, WEBLINK_MAX_PRIORITY, true, this.mMatchMode);
        regexMatchRule16.init("[a-z]+://[^" + Space + "]*");
        addRule(regexMatchRule16);
        RegexMatchRule regexMatchRule17 = new RegexMatchRule(TextCategorizer.Category.WEB_LINK, WEBLINK_MAX_PRIORITY, true, this.mMatchMode);
        regexMatchRule17.init("www\\.[^" + Space + "]+");
        addRule(regexMatchRule17);
        RegexMatchRule regexMatchRule18 = new RegexMatchRule(TextCategorizer.Category.EMAIL_ADDR, WEBLINK_MAX_PRIORITY, true, this.mMatchMode);
        regexMatchRule18.init("[\\p{L}][-_.\\p{L}\\p{Nd}]*(@|\\(at\\))[\\p{L}][-_.\\p{L}\\p{Nd}]*");
        addRule(regexMatchRule18);
        RegexMatchRule regexMatchRule19 = new RegexMatchRule(TextCategorizer.Category.ACRONYM, ACRONYM_MAX_PRIORITY, true, this.mMatchMode);
        regexMatchRule19.init("[\\p{L}]\\.([-]?[\\p{L}\\p{Nd}]\\.)+(?=[" + Space + "]+[\\p{Ll}])");
        addRule(regexMatchRule19);
        RegexMatchRule regexMatchRule20 = new RegexMatchRule(TextCategorizer.Category.ACRONYM, ACRONYM_MAX_PRIORITY, true, this.mMatchMode);
        regexMatchRule20.init("[\\p{L}]\\.([-]?[\\p{L}\\p{Nd}]\\.)+[\\p{L}\\p{Nd}]");
        addRule(regexMatchRule20);
        RegexMatchRule regexMatchRule21 = new RegexMatchRule(TextCategorizer.Category.ACRONYM, ACRONYM_MAX_PRIORITY, true, this.mMatchMode);
        regexMatchRule21.init("[\\p{Lu}]\\.(?=[" + Space + "])");
        addRule(regexMatchRule21);
        RegexMatchRule regexMatchRule22 = new RegexMatchRule(TextCategorizer.Category.ABBREVIATION, ABBR_MAX_PRIORITY, true, this.mMatchMode);
        regexMatchRule22.init("[\\p{Ll}]\\.(?=[" + Space + "])");
        addRule(regexMatchRule22);
        RegexMatchRule regexMatchRule23 = new RegexMatchRule(TextCategorizer.Category.COMMON, COMMON_WORD_MAX_PRIORITY, true, this.mMatchMode);
        regexMatchRule23.init(CommonWordPattern);
        addRule(regexMatchRule23);
        RegexMatchRule regexMatchRule24 = new RegexMatchRule(TextCategorizer.Category.PUNCTUATION, LOWEST_PRIORITY, true, this.mMatchMode);
        regexMatchRule24.init(".");
        addRule(regexMatchRule24);
    }

    static {
        for (char c : new char[]{' ', 133, 160, 5760, 6158, 8232, 8233, 8239, 8287, 12288}) {
            Space += new Character(c);
        }
        Space += new Character('\t') + "-" + new Character('\r');
        Space += new Character((char) 8192) + "-" + new Character((char) 8202);
    }
}
