/*
 * Decompiled with CFR 0.152.
 */
package org.daisy.pipeline.nlp.ruledcategorizers;

import java.io.IOException;
import org.daisy.pipeline.nlp.RuleBasedTextCategorizer;
import org.daisy.pipeline.nlp.TextCategorizer;
import org.daisy.pipeline.nlp.impl.matchrules.NumberRangeMatchRule;
import org.daisy.pipeline.nlp.impl.matchrules.RegexMatchRule;

public class RuledMultilangCategorizer
extends RuleBasedTextCategorizer {
    public static int LOWEST_PRIORITY;
    public static int COMMON_WORD_MAX_PRIORITY;
    public static int SPACE_MAX_PRIORITY;
    public static int QUOTE_MAX_PRIORITY;
    public static int NUMBER_MAX_PRIORITY;
    public static int ACRONYM_MAX_PRIORITY;
    public static int ABBR_MAX_PRIORITY;
    public static int WEBLINK_MAX_PRIORITY;
    public static int SPACE_COMPOSED_MAX_PRIORITY;
    public static int NUMBER_COMPOSED_MAX_PRIORITY;
    public static int DICTIONARY_MAX_PRIORITY;
    protected static String CommonWordPattern;
    protected static String Space;

    @Override
    public void init(TextCategorizer.MatchMode matchMode) throws IOException {
        super.init(matchMode);
        String year = "([1-9][0-9]{1,3}|[0-9]{2})";
        String month = "(1[0-2]|0?[1-9])";
        String days = "(3[01]|[12]0|[0-2]?[1-9])";
        RegexMatchRule rsm = new RegexMatchRule(TextCategorizer.Category.DATE, NUMBER_COMPOSED_MAX_PRIORITY, true, this.mMatchMode);
        rsm.init(year + "-" + month + "-" + days + "(?![-\\p{L}\\p{Nd}])");
        this.addRule(rsm);
        rsm = new RegexMatchRule(TextCategorizer.Category.DATE, NUMBER_COMPOSED_MAX_PRIORITY, true, this.mMatchMode);
        rsm.init(month + "-" + days + "(?![-\\p{L}\\p{Nd}])");
        this.addRule(rsm);
        rsm = new RegexMatchRule(TextCategorizer.Category.DATE, NUMBER_COMPOSED_MAX_PRIORITY, true, this.mMatchMode);
        rsm.init(days + "/" + month + "/" + year + "(?![/\\p{L}\\p{Nd}])");
        this.addRule(rsm);
        rsm = new RegexMatchRule(TextCategorizer.Category.DATE, NUMBER_COMPOSED_MAX_PRIORITY, true, this.mMatchMode);
        rsm.init(days + "/" + month + "(?![/\\p{L}\\p{Nd}])");
        this.addRule(rsm);
        this.addRule(new NumberRangeMatchRule(TextCategorizer.Category.RANGE, NUMBER_COMPOSED_MAX_PRIORITY, this.mMatchMode));
        rsm = new RegexMatchRule(TextCategorizer.Category.DATE, NUMBER_COMPOSED_MAX_PRIORITY, true, this.mMatchMode);
        rsm.init(year + "-" + month + "(?![-\\p{L}\\p{Nd}])");
        this.addRule(rsm);
        rsm = new RegexMatchRule(TextCategorizer.Category.DATE, NUMBER_COMPOSED_MAX_PRIORITY, true, this.mMatchMode);
        rsm.init(month + "/" + year + "(?![/\\p{L}\\p{Nd}])");
        this.addRule(rsm);
        rsm = new RegexMatchRule(TextCategorizer.Category.TIME, NUMBER_COMPOSED_MAX_PRIORITY, true, this.mMatchMode);
        rsm.init("(2[0-4]|[01][0-9]):[0-6][0-9](?![0-9])");
        this.addRule(rsm);
        String integer = "([1-9]{1,3}([,' ][0-9]{3})+|[1-9][0-9]*)";
        String real = "(" + integer + "(\\.[0-9]+)?)";
        String currency = "([\\$\u20ac\u00a3\u20a4\u00a5]|usd|euro[s]?)";
        rsm = new RegexMatchRule(TextCategorizer.Category.DIMENSIONS, NUMBER_COMPOSED_MAX_PRIORITY, true, this.mMatchMode);
        rsm.init(real + "(x|[ ]x[ ])" + real + "(?![\\p{L}\\p{Nd}])");
        this.addRule(rsm);
        rsm = new RegexMatchRule(TextCategorizer.Category.CURRENCY, NUMBER_COMPOSED_MAX_PRIORITY, false, this.mMatchMode);
        rsm.init(real + currency + "(?![\\p{L}\\p{Nd}])");
        this.addRule(rsm);
        rsm = new RegexMatchRule(TextCategorizer.Category.CURRENCY, NUMBER_COMPOSED_MAX_PRIORITY, false, this.mMatchMode);
        rsm.init(currency + real + "(?![\\p{L}\\p{Nd}])");
        this.addRule(rsm);
        rsm = new RegexMatchRule(TextCategorizer.Category.NUMBERING_ITEM, NUMBER_COMPOSED_MAX_PRIORITY, false, this.mMatchMode);
        rsm.init("[0-9]+([-.][0-9]+)*\\.(?![\\p{L}\\p{Nd}])");
        this.addRule(rsm);
        rsm = new RegexMatchRule(TextCategorizer.Category.QUANTITY, NUMBER_MAX_PRIORITY, true, this.mMatchMode);
        rsm.init(real);
        this.addRule(rsm);
        rsm = new RegexMatchRule(TextCategorizer.Category.IDENTIFIER, NUMBER_MAX_PRIORITY - 1, true, this.mMatchMode);
        rsm.init("[0-9]+([-_:][0-9]+)*(?![\\p{L}\\p{Nd}])");
        this.addRule(rsm);
        rsm = new RegexMatchRule(TextCategorizer.Category.SPACE, SPACE_MAX_PRIORITY, true, this.mMatchMode);
        rsm.init("[" + Space + "]+");
        this.addRule(rsm);
        rsm = new RegexMatchRule(TextCategorizer.Category.QUOTE, QUOTE_MAX_PRIORITY, true, this.mMatchMode);
        rsm.init("[\\p{Pf}\\p{Pi}\"']");
        this.addRule(rsm);
        rsm = new RegexMatchRule(TextCategorizer.Category.WEB_LINK, WEBLINK_MAX_PRIORITY, true, this.mMatchMode);
        rsm.init("[a-z]+://[^" + Space + "]*");
        this.addRule(rsm);
        rsm = new RegexMatchRule(TextCategorizer.Category.WEB_LINK, WEBLINK_MAX_PRIORITY, true, this.mMatchMode);
        rsm.init("www\\.[^" + Space + "]+");
        this.addRule(rsm);
        rsm = new RegexMatchRule(TextCategorizer.Category.EMAIL_ADDR, WEBLINK_MAX_PRIORITY, true, this.mMatchMode);
        rsm.init("[\\p{L}][-_.\\p{L}\\p{Nd}]*(@|\\(at\\))[\\p{L}][-_.\\p{L}\\p{Nd}]*");
        this.addRule(rsm);
        String acronymPrefix = "[\\p{L}]\\.([-]?[\\p{L}\\p{Nd}]\\.)+";
        rsm = new RegexMatchRule(TextCategorizer.Category.ACRONYM, ACRONYM_MAX_PRIORITY, true, this.mMatchMode);
        rsm.init(acronymPrefix + "(?=[" + Space + "]+[\\p{Ll}])");
        this.addRule(rsm);
        rsm = new RegexMatchRule(TextCategorizer.Category.ACRONYM, ACRONYM_MAX_PRIORITY, true, this.mMatchMode);
        rsm.init(acronymPrefix + "[\\p{L}\\p{Nd}]");
        this.addRule(rsm);
        rsm = new RegexMatchRule(TextCategorizer.Category.ACRONYM, ACRONYM_MAX_PRIORITY, true, this.mMatchMode);
        rsm.init("[\\p{Lu}]\\.(?=[" + Space + "])");
        this.addRule(rsm);
        rsm = new RegexMatchRule(TextCategorizer.Category.ABBREVIATION, ABBR_MAX_PRIORITY, true, this.mMatchMode);
        rsm.init("[\\p{Ll}]\\.(?=[" + Space + "])");
        this.addRule(rsm);
        rsm = new RegexMatchRule(TextCategorizer.Category.COMMON, COMMON_WORD_MAX_PRIORITY, true, this.mMatchMode);
        rsm.init(CommonWordPattern);
        this.addRule(rsm);
        rsm = new RegexMatchRule(TextCategorizer.Category.PUNCTUATION, LOWEST_PRIORITY, true, this.mMatchMode);
        rsm.init(".");
        this.addRule(rsm);
    }

    static {
        char[] SpaceChars;
        LOWEST_PRIORITY = 0;
        COMMON_WORD_MAX_PRIORITY = 50;
        SPACE_MAX_PRIORITY = 100;
        QUOTE_MAX_PRIORITY = 125;
        NUMBER_MAX_PRIORITY = 150;
        ACRONYM_MAX_PRIORITY = 160;
        ABBR_MAX_PRIORITY = 220;
        WEBLINK_MAX_PRIORITY = 300;
        SPACE_COMPOSED_MAX_PRIORITY = 500;
        NUMBER_COMPOSED_MAX_PRIORITY = 600;
        DICTIONARY_MAX_PRIORITY = 700;
        CommonWordPattern = "[@\\p{L}][-_@\\p{L}\\p{Nd}]*";
        Space = "";
        for (char spaceChar : SpaceChars = new char[]{' ', '\u0085', '\u00a0', '\u1680', '\u180e', '\u2028', '\u2029', '\u202f', '\u205f', '\u3000'}) {
            Space = Space + new Character(spaceChar);
        }
        Space = Space + new Character('\t') + "-" + new Character('\r');
        Space = Space + new Character('\u2000') + "-" + new Character('\u200a');
    }
}

