/*
 * Decompiled with CFR 0.152.
 */
package it.jrc.lt.core.component.tokenizer;

import it.jrc.lt.core.component.Component;
import it.jrc.lt.core.component.Configuration;
import it.jrc.lt.core.component.ConfigurationFeature;
import it.jrc.lt.core.component.tokenizer.AbstractTokenItem;
import it.jrc.lt.core.component.tokenizer.AbstractTokenizer;
import it.jrc.lt.core.component.tokenizer.TokenizerException;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;
import piskorski.fs.letterfs.fsa.DictionaryLetterFSA;
import piskorski.fs.letterfs.fsa.DictionaryLetterFSATraversing;
import piskorski.fs.letterfs.fsa.DynamicLetterMarkedTravFSA_Impl;
import piskorski.util.functions.DataStream;
import piskorski.util.functions.Files;

final class ClassifyingTokenizer
extends AbstractTokenizer {
    private boolean[] isWhiteSpace = null;
    private boolean[] isTokenSeparator = null;
    private DictionaryLetterFSA classifier = null;
    String[] tokenNames = null;
    private transient boolean borderSeparatorTrimming = true;
    private static final transient String defaultWhitespaces = "\t\f\n\r ";
    private static final transient String defaultTokenSeparators = "\u2018\"\\',`.:;{}[]()~*!$<+@#>?/%^&-|\u201d\u201c\u2019\u2014\u2018\u00a3\u00a5\u00a2\u20ac\u00a9\u00b4\u02b9\u02bb\u02bc\u02bd\u02ca\u02cb\u02ba\u02dc\u02dd\u00ab\u00bb\u00a1\u00bf-\u2010\u2011\u2012\u2013\u2014\u2015";
    private static final transient char finalEmission = '\uffff';
    private static final transient String className = "[" + ClassifyingTokenizer.class.getSimpleName() + "] ";
    private static final transient int MAX_CHAR = 65536;
    public static transient byte OTHER = 0;

    @Override
    public void writeToStream(DataOutputStream d) throws IOException {
        this.classifier.writeToDataOutputStream(d);
        DataStream.writeStringArray(d, this.tokenNames);
        DataStream.writeBooleanArray(d, this.isTokenSeparator);
        DataStream.writeBooleanArray(d, this.isWhiteSpace);
        DataStream.writeString(d, this.getName());
    }

    @Override
    public void readFromStream(DataInputStream d) throws IOException {
        this.classifier = DictionaryLetterFSA.getInstance("TRANS_MATRIX_COMPRESSED");
        this.classifier.readFromDataInputStream(d);
        this.tokenNames = DataStream.readStringArray(d);
        this.isTokenSeparator = DataStream.readBooleanArray(d);
        this.isWhiteSpace = DataStream.readBooleanArray(d);
        this.setName(DataStream.readString(d));
    }

    @Override
    public boolean hasBeenInitialized() {
        if (this.isTokenSeparator == null) {
            return false;
        }
        if (this.isWhiteSpace == null) {
            return false;
        }
        if (this.classifier == null) {
            return false;
        }
        if (this.tokenNames == null) {
            return false;
        }
        return this.getName() != null;
    }

    protected ClassifyingTokenizer() {
    }

    @Override
    public Map<Byte, String> getTypeNames() {
        if (!this.hasBeenInitialized()) {
            return null;
        }
        HashMap<Byte, String> typeNames = new HashMap<Byte, String>();
        int len = this.tokenNames.length;
        for (int i = 0; i < len; ++i) {
            typeNames.put((byte)i, this.tokenNames[i]);
        }
        return Collections.unmodifiableMap(typeNames);
    }

    @Override
    protected List<ConfigurationFeature> getCompilationFeatures() {
        return Collections.unmodifiableList(Arrays.asList(CompilationFeatures.FEATURES));
    }

    @Override
    protected List<ConfigurationFeature> getDeploymentFeatures() {
        return Collections.unmodifiableList(Arrays.asList(DeploymentFeatures.FEATURES));
    }

    @Override
    protected void applySpecificSettings(Configuration configuration) {
        if (configuration.getFeature(DeploymentFeatures.WHITESPACE_TOKENS.getName()).compareTo("true") == 0) {
            this.produceWhitespaceTokens();
        }
        if (configuration.getFeature(DeploymentFeatures.BORDER_SEPARATOR_TRIMMING.getName()).compareTo("false") == 0) {
            this.switchOffBorderSeparatorTrimming();
        }
    }

    private void setWhitespaces(Configuration configuration) {
        String whitespaces = configuration.getFeature(CompilationFeatures.WHITESPACES.getName(), "");
        if (whitespaces.length() == 0) {
            whitespaces = defaultWhitespaces;
            Component.loggerMessage(className + "Missing or invalid value of the feature: " + CompilationFeatures.WHITESPACES.getName() + ". Default settings will be used.");
        }
        this.isWhiteSpace = new boolean[65536];
        Arrays.fill(this.isWhiteSpace, false);
        int len = whitespaces.length();
        for (int i = 0; i < len; ++i) {
            this.isWhiteSpace[whitespaces.charAt((int)i)] = true;
        }
    }

    private void setTokenSeparators(Configuration configuration) {
        String tokenSeparators = configuration.getFeature(CompilationFeatures.TOKEN_SEPARATORS.getName(), "");
        if (tokenSeparators.length() == 0) {
            tokenSeparators = defaultTokenSeparators;
            Component.loggerMessage(className + "Missing or invalid value of the feature: " + CompilationFeatures.TOKEN_SEPARATORS.getName() + ". Default settings will be used.");
        }
        this.isTokenSeparator = new boolean[65536];
        Arrays.fill(this.isTokenSeparator, false);
        int len = tokenSeparators.length();
        for (int i = 0; i < len; ++i) {
            this.isTokenSeparator[tokenSeparators.charAt((int)i)] = true;
        }
    }

    private void createAutomaton(Configuration configuration) throws TokenizerException {
        StringTokenizer sT;
        String[] lines;
        String encoding = configuration.getFeature(CompilationFeatures.CHARACTERSET.getName());
        String fileName = configuration.getFeature(CompilationFeatures.AUTOMATON_FILE.getName());
        try {
            lines = Files.FileToStringArray(fileName, encoding);
        }
        catch (Exception e) {
            throw new TokenizerException(className + "Problems encountered while reading classifying automaton file: " + fileName);
        }
        if (lines.length < 3) {
            throw new TokenizerException(className + "Invalid format of the file with the classifying automaton." + fileName);
        }
        int numStates = Integer.valueOf(lines[0]);
        DynamicLetterMarkedTravFSA_Impl tempAutomaton = new DynamicLetterMarkedTravFSA_Impl(numStates, numStates * 10);
        if (lines.length - 2 < numStates) {
            throw new TokenizerException(className + "Invalid format of the file with the classifying automaton." + fileName);
        }
        for (int i = 1; i < numStates; ++i) {
            tempAutomaton.addState();
        }
        int current = 1;
        for (int i = 0; i < numStates; ++i) {
            boolean isFinal;
            if ((sT = new StringTokenizer(lines[current++], " ")).countTokens() != 2) {
                throw new TokenizerException(className + "Invalid format of the file with the classifying automaton." + fileName);
            }
            int nextState = Integer.valueOf(sT.nextToken());
            boolean bl = isFinal = Integer.valueOf(sT.nextToken()) == 1;
            if (!isFinal) continue;
            tempAutomaton.setFinalState(nextState);
        }
        HashMap<Integer, Integer> specialTransitions = new HashMap<Integer, Integer>();
        while (current < lines.length) {
            if ((sT = new StringTokenizer(lines[current++], " ")).countTokens() != 3) {
                throw new TokenizerException(className + "Invalid format of the file with the classifying automaton." + fileName);
            }
            int sourceState = Integer.valueOf(sT.nextToken());
            int targetState = Integer.valueOf(sT.nextToken());
            String sign = sT.nextToken();
            if (sign.charAt(0) == '\\') {
                sign = sign.substring(2);
                char symbol = (char)Long.valueOf(sign, 16).longValue();
                tempAutomaton.addTransition(sourceState, targetState, symbol);
                continue;
            }
            Integer which = Integer.valueOf(sign.substring(1));
            Integer pointer = (Integer)specialTransitions.get(which);
            int newState = 0;
            if (pointer == null) {
                newState = tempAutomaton.addState();
                specialTransitions.put(which, new Integer(newState));
            } else {
                newState = pointer;
            }
            tempAutomaton.addTransition(sourceState, newState, '\uffff');
            tempAutomaton.addTransition(newState, targetState, (char)which.intValue());
        }
        this.classifier = DictionaryLetterFSA.getInstance("TRANS_MATRIX_COMPRESSED");
        this.classifier.initializeFrom(tempAutomaton);
    }

    private void setTokenTypeNames(Configuration configuration) throws TokenizerException {
        String encoding = configuration.getFeature(CompilationFeatures.CHARACTERSET.getName());
        String fileName = configuration.getFeature(CompilationFeatures.TOKEN_NAMES.getName());
        String[] data = null;
        try {
            data = Files.FileToStringArray(fileName, encoding);
        }
        catch (Exception e) {
            throw new TokenizerException(className + "Problems encountered while reading the file with token type names: " + fileName);
        }
        int len = data.length;
        this.tokenNames = new String[256];
        Arrays.fill(this.tokenNames, "not_defined");
        for (int i = 0; i < len; ++i) {
            StringTokenizer s = new StringTokenizer(data[i], " \t");
            if (s.countTokens() != 2) {
                throw new TokenizerException(className + "Invalid format of the file with the token type names: " + fileName);
            }
            int index = new Integer(s.nextToken());
            if (index < 0 || index >= 256) {
                throw new TokenizerException(className + "Invalid token ID: " + index + " in the file with the token type names: " + fileName);
            }
            this.tokenNames[index] = s.nextToken();
        }
    }

    private void init() {
        this.switchOnBorderSeparatorTrimming();
        this.ignoreWhitespaceTokens();
    }

    @Override
    protected boolean initialize(Configuration configuration) {
        try {
            this.setWhitespaces(configuration);
            this.setTokenSeparators(configuration);
            this.createAutomaton(configuration);
            this.setTokenTypeNames(configuration);
            this.setName(configuration.getFeature(CompilationFeatures.NAME.getName()));
            this.init();
        }
        catch (Exception e) {
            Component.loggerMessage(className + e.getMessage());
            return false;
        }
        return true;
    }

    private byte classifyToken(char[] inputText, int start, int end) {
        DictionaryLetterFSA.State currentState = DictionaryLetterFSATraversing.delta(this.classifier, inputText, start, end);
        if (currentState != null) {
            DictionaryLetterFSA.Transition currentTransition = currentState.getTransitionLabelledWith('\uffff');
            DictionaryLetterFSA.State state = currentState = currentTransition != null ? currentTransition.getTargetState() : null;
        }
        if (currentState != null) {
            return (byte)currentState.getFirstTransition().getLabel();
        }
        return OTHER;
    }

    @Override
    public ArrayList<AbstractTokenItem> tokenize(char[] inputText) {
        int len = inputText.length;
        ArrayList<AbstractTokenItem> tokens = new ArrayList<AbstractTokenItem>(len / 10 + 1);
        int current = 0;
        int start = 0;
        boolean intoken = false;
        boolean returnWhitespaces = this.producesWhitespaceTokens();
        boolean borderSeparatorTrimmingOn = this.isBorderSeparatorTrimmingActive();
        byte type = OTHER;
        while (current < len) {
            if (this.isWhiteSpace[inputText[current]]) {
                if (intoken) {
                    intoken = false;
                    type = this.classifyToken(inputText, start, current - 1);
                    if (type != OTHER) {
                        tokens.add(new MyTokenItem(start, current - 1, false, type));
                        continue;
                    }
                    type = borderSeparatorTrimmingOn ? this.classifyWithBorderseparatorTrimming(tokens, inputText, current, start) : OTHER;
                    if (type != OTHER) continue;
                    start = this.decomposeUnclassifiedToken(tokens, inputText, current, start);
                    continue;
                }
                start = current;
                while (current < len && this.isWhiteSpace[inputText[current]]) {
                    ++current;
                }
                if (!returnWhitespaces) continue;
                tokens.add(new MyTokenItem(start, current - 1, true, OTHER));
                continue;
            }
            if (!intoken) {
                start = current;
                intoken = true;
            }
            ++current;
        }
        if (intoken) {
            type = this.classifyToken(inputText, start, current - 1);
            if (type != OTHER) {
                tokens.add(new MyTokenItem(start, current - 1, false, type));
            } else {
                byte by = type = borderSeparatorTrimmingOn ? this.classifyWithBorderseparatorTrimming(tokens, inputText, current, start) : OTHER;
                if (type == OTHER) {
                    start = this.decomposeUnclassifiedToken(tokens, inputText, current, start);
                }
            }
        }
        return tokens;
    }

    private int decomposeUnclassifiedToken(ArrayList<AbstractTokenItem> tokens, char[] inputText, int current, int start) {
        boolean isInToken = false;
        int myStart = start;
        int count = start;
        while (count < current) {
            if (this.isTokenSeparator[inputText[count]]) {
                if (isInToken) {
                    tokens.add(new MyTokenItem(myStart, count - 1, false, this.classifyToken(inputText, myStart, count - 1)));
                }
                tokens.add(new MyTokenItem(count, count, false, this.classifyToken(inputText, count, count)));
                myStart = ++count;
                isInToken = false;
                continue;
            }
            if (!isInToken) {
                myStart = count;
                isInToken = true;
            }
            ++count;
        }
        if (isInToken) {
            tokens.add(new MyTokenItem(myStart, count - 1, false, this.classifyToken(inputText, myStart, count - 1)));
        }
        return myStart;
    }

    private byte classifyWithBorderseparatorTrimming(ArrayList<AbstractTokenItem> tokens, char[] inputText, int current, int start) {
        byte type;
        boolean initialSeparator = this.isTokenSeparator[inputText[start]];
        boolean finalSeparator = this.isTokenSeparator[inputText[current - 1]];
        int newStart = start + (initialSeparator ? 1 : 0);
        int newEnd = current - 1 - (finalSeparator ? 1 : 0);
        byte by = type = newEnd >= newStart ? this.classifyToken(inputText, newStart, newEnd) : OTHER;
        if (type != OTHER) {
            if (initialSeparator) {
                tokens.add(new MyTokenItem(start, start, false, this.classifyToken(inputText, start, start)));
            }
            tokens.add(new MyTokenItem(newStart, newEnd, false, type));
            if (finalSeparator) {
                tokens.add(new MyTokenItem(current - 1, current - 1, false, this.classifyToken(inputText, current - 1, current - 1)));
            }
        }
        return type;
    }

    public void switchOnBorderSeparatorTrimming() {
        this.borderSeparatorTrimming = true;
    }

    public void switchOffBorderSeparatorTrimming() {
        this.borderSeparatorTrimming = false;
    }

    public boolean isBorderSeparatorTrimmingActive() {
        return this.borderSeparatorTrimming;
    }

    private final class MyTokenItem
    extends AbstractTokenItem {
        private byte type;

        @Override
        public byte getType() {
            return this.type;
        }

        @Override
        protected String getType(byte type) {
            return ClassifyingTokenizer.this.tokenNames[type];
        }

        @Override
        protected void setType(byte type) {
            this.type = type;
        }

        MyTokenItem(int start, int end, boolean isWhitespace, byte type) {
            super(start, end, isWhitespace, type);
        }

        @Override
        public String toString() {
            return "[" + this.getStart() + "," + this.getEnd() + "," + this.getTypeAsString() + "," + this.isWhiteSpace() + "]";
        }
    }

    private static final class DeploymentFeatures {
        static final ConfigurationFeature CHARACTER_SET = ConfigurationFeature.createFeature("CharacterSet", false, "UTF-8");
        static final ConfigurationFeature WHITESPACE_TOKENS = ConfigurationFeature.createFeature("WhitespaceTokens", false, "false");
        static final ConfigurationFeature BORDER_SEPARATOR_TRIMMING = ConfigurationFeature.createFeature("UseBorderSeparatorTrimming", false, "true");
        static final ConfigurationFeature[] FEATURES = new ConfigurationFeature[]{CHARACTER_SET, WHITESPACE_TOKENS, BORDER_SEPARATOR_TRIMMING};

        private DeploymentFeatures() {
        }
    }

    private static final class CompilationFeatures {
        static final ConfigurationFeature WHITESPACES = ConfigurationFeature.createFeature("Whitespaces", false, "");
        static final ConfigurationFeature TOKEN_SEPARATORS = ConfigurationFeature.createFeature("TokenSeparators", false, "");
        static final ConfigurationFeature TOKEN_NAMES = ConfigurationFeature.createFeature("TokenNames", true, "");
        static final ConfigurationFeature AUTOMATON_FILE = ConfigurationFeature.createFeature("AutomatonFile", true, "");
        static final ConfigurationFeature CHARACTERSET = ConfigurationFeature.createFeature("CharacterSet", false, "UTF-8");
        static final ConfigurationFeature NAME = ConfigurationFeature.createFeature("Name", true, "");
        static final ConfigurationFeature[] FEATURES = new ConfigurationFeature[]{WHITESPACES, TOKEN_SEPARATORS, TOKEN_NAMES, AUTOMATON_FILE, CHARACTERSET, NAME};

        private CompilationFeatures() {
        }
    }
}

