/*
 * Decompiled with CFR 0.152.
 */
package it.jrc.lt.core.component.flatpatternmatcher;

import it.jrc.lt.core.component.flatpatternmatcher.TokenTagger;
import java.util.HashMap;
import java.util.HashSet;
import piskorski.util.arraylist.StringArrayList;
import piskorski.util.strings.StringFunctions;

public class BasicTokenTagger
extends TokenTagger {
    public static String NUMBER = "[NUM]";
    public static String NUMBER_RANGE = "[NUM_RANGE]";
    public static String SINGLECAP = "[SINGCAP]";
    public static String ALLCAPS = "[ALLCAP]";
    public static String NUMBER_LETTER_COMPOUND = "[NUM_LET]";
    public static String LETTER_NUMBERCOMPOUND = "[LET_NUM]";
    public static String UPPERCASE_WORD = "[UPP_W]";
    public static String LOWERCASE_WORD = "[LOW_W]";
    public static String OPEN_BRACKET = "[OP_BR]";
    public static String CLOSE_BRACKET = "[CL_BR]";
    public static String DOT = "[DOT]";
    public static String COMMA = "[COMMA]";
    public static String COLON = "[COL]";
    public static String SEM_COLON = "[SEM_COL]";
    public static String QUOTATION = "[QUOT]";
    public static String EXCLAMATION = "[EXCL]";
    public static String QUESTION = "[QUEST]";
    public static String DASH = "[DASH]";
    public static String ASTERISK = "[ASTR]";
    public static String PLUS = "[PLUS]";
    public static String AT = "[AT]";
    public static String HASH = "[HASH]";
    public static String PERCENTAGE = "[PERC]";
    public static String AMPERSAND = "[AMP]";
    public static String SLASH = "[SLASH]";
    public static String BACKSLASH = "[B_SLASH]";
    public static String SEPARATOR = "[SEP]";
    public static String OTHER = "[OTH]";
    public static String WHITESPACE = "[]";
    private static HashSet<Character> Separators;
    private static HashMap<Character, String> sepType;
    private static String[] usedRomanNumerals;
    private static HashSet<String> myTags;
    private static String[] availableTags;
    private static String defaultTokenSeparators;

    @Override
    public boolean initialize(String configurationFile) {
        return true;
    }

    @Override
    public StringArrayList tagToken(String s) {
        StringArrayList results = new StringArrayList(3);
        if (s.length() == 1) {
            char c = s.charAt(0);
            if (Separators.contains(Character.valueOf(c))) {
                String type = sepType.get(Character.valueOf(c));
                if (type != null) {
                    results.add(type);
                } else {
                    results.add(SEPARATOR);
                }
            } else if (StringFunctions.onlyLatinDigits(s)) {
                results.add(NUMBER);
            } else if (StringFunctions.onlyUppercaseLetters(s)) {
                results.add(SINGLECAP);
            } else if (Character.isLowerCase(s.charAt(0))) {
                results.add(LOWERCASE_WORD);
            }
        } else if (StringFunctions.lettersFollowedByDigits(s)) {
            results.add(LETTER_NUMBERCOMPOUND);
        } else if (StringFunctions.digitsFollowedByLetters(s)) {
            results.add(NUMBER_LETTER_COMPOUND);
        } else if (StringFunctions.isNumberRange(s)) {
            results.add(NUMBER_RANGE);
        } else if (StringFunctions.onlyLatinDigits(s)) {
            results.add(NUMBER);
        } else if (StringFunctions.onlyUppercaseLetters(s)) {
            results.add(ALLCAPS);
            results.add(UPPERCASE_WORD);
        } else if (Character.isUpperCase(s.charAt(0))) {
            results.add(UPPERCASE_WORD);
        } else if (Character.isLowerCase(s.charAt(0))) {
            results.add(LOWERCASE_WORD);
        }
        if (results.size() == 0) {
            results.add(OTHER);
        }
        return results;
    }

    @Override
    public HashSet<String> getTokenTagSet() {
        return myTags;
    }

    @Override
    public String tagForWhiteSpaceToken() {
        return WHITESPACE;
    }

    static {
        usedRomanNumerals = new String[]{"I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX", "X", "XI", "XII", "XIII", "XIV", "XV", "XVI", "XVII", "XVIII", "XIX", "XX", "XXI", "XXII", "XXIII", "XXIV", "XXV", "XXVI", "XXVII", "XXVIII", "XXIX", "XXX", "XXXI", "XXXII", "XXXIII", "XXXIV", "XXXV", "XXXVI", "XXXVII", "XXXVIII", "XXXIX"};
        availableTags = new String[]{"[NUM]", "[NUM_RANGE]", "[ALLCAP]", "[SINGCAP]", "[NUM_LET]", "[LET_NUM]", "[UPP_W]", "[LOW_W]", "[OP_BR]", "[CL_BR]", "[DOT]", "[COMMA]", "[COL]", "[SEM_COL]", "[QUOT]", "[EXCL]", "[QUEST]", "[DASH]", "[ASTR]", "[PLUS]", "[AT]", "[HASH]", "[PERC]", "[AMP]", "[SLASH]", "[B_SLASH]", "[SEP]", "[OTH]", "[]"};
        defaultTokenSeparators = "\"\\,`.:;{}[]()~*!$<+@#>?/%^&-|\u201d\u201c\u2019\u2014\u2018\u00a3\u00a5\u00a2\u20ac\u00a9\u00b4\u02b9\u02bb\u02bc\u02bd\u02ca\u02cb\u02ba\u02dc\u02dd\u00ab\u00bb";
        Separators = new HashSet();
        int len = defaultTokenSeparators.length();
        for (int i = 0; i < len; ++i) {
            Separators.add(Character.valueOf(defaultTokenSeparators.charAt(i)));
        }
        sepType = new HashMap();
        sepType.put(Character.valueOf('.'), DOT);
        sepType.put(Character.valueOf(','), COMMA);
        sepType.put(Character.valueOf(':'), COLON);
        sepType.put(Character.valueOf(';'), SEM_COLON);
        sepType.put(Character.valueOf('!'), EXCLAMATION);
        sepType.put(Character.valueOf('?'), QUESTION);
        sepType.put(Character.valueOf('*'), ASTERISK);
        sepType.put(Character.valueOf('+'), PLUS);
        sepType.put(Character.valueOf('@'), AT);
        sepType.put(Character.valueOf('#'), HASH);
        sepType.put(Character.valueOf('%'), PERCENTAGE);
        sepType.put(Character.valueOf('&'), AMPERSAND);
        sepType.put(Character.valueOf('\\'), SLASH);
        sepType.put(Character.valueOf('/'), BACKSLASH);
        sepType.put(Character.valueOf('-'), DASH);
        sepType.put(Character.valueOf('\u2014'), DASH);
        sepType.put(Character.valueOf('\"'), QUOTATION);
        sepType.put(Character.valueOf('\u201d'), QUOTATION);
        sepType.put(Character.valueOf('\u201c'), QUOTATION);
        sepType.put(Character.valueOf('`'), QUOTATION);
        sepType.put(Character.valueOf('\u2019'), QUOTATION);
        sepType.put(Character.valueOf('\u2018'), QUOTATION);
        sepType.put(Character.valueOf('{'), OPEN_BRACKET);
        sepType.put(Character.valueOf('['), OPEN_BRACKET);
        sepType.put(Character.valueOf('('), OPEN_BRACKET);
        sepType.put(Character.valueOf('<'), OPEN_BRACKET);
        sepType.put(Character.valueOf(')'), CLOSE_BRACKET);
        sepType.put(Character.valueOf(']'), CLOSE_BRACKET);
        sepType.put(Character.valueOf('>'), CLOSE_BRACKET);
        sepType.put(Character.valueOf('}'), CLOSE_BRACKET);
        myTags = new HashSet();
        for (String s : availableTags) {
            myTags.add(s);
        }
    }
}

