/*
 * Decompiled with CFR 0.152.
 */
package it.jrc.lt.core.component.morphology;

import it.jrc.lt.core.component.Component;
import it.jrc.lt.core.component.Configuration;
import it.jrc.lt.core.component.ConfigurationFeature;
import it.jrc.lt.core.component.morphology.AbstractDisjunctionOfMorphologyItems;
import it.jrc.lt.core.component.morphology.AbstractMorphology;
import it.jrc.lt.core.component.morphology.AbstractMorphologyItem;
import it.jrc.lt.core.component.morphology.Language;
import it.jrc.lt.core.component.morphology.LanguageSpecificTreatment;
import it.jrc.lt.core.component.morphology.MorphologyException;
import it.jrc.lt.core.component.morphology.MultextMorphologyItem;
import it.jrc.lt.core.component.morphology.MultextPartOfSpeech;
import it.jrc.lt.core.component.tokenizer.AbstractTokenItem;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.StringTokenizer;
import piskorski.fs.letterfs.fsa.DictionaryLetterFSA;
import piskorski.fs.letterfs.fsa.DictionaryLetterFSATraversing;
import piskorski.fs.letterfs.fsa.DynamicLetterMarkedTravFSAInterface;
import piskorski.fs.letterfs.fsa.FiniteStateAlgorithms;
import piskorski.util.arraylist.StringArrayList;
import piskorski.util.functions.Files;
import piskorski.util.strings.StringFunctions;

final class MultextMorphology
extends AbstractMorphology {
    private int numberOfEntries;
    private Language language = null;
    private DictionaryLetterFSA automaton = null;
    private transient LanguageSpecificTreatment specialComponent;
    private static final char BOUNDARY_MARKER = ':';

    @Override
    public boolean hasBeenInitialized() {
        if (this.language == null) {
            return false;
        }
        return this.automaton != null;
    }

    @Override
    public Language getLanguage() {
        return this.language;
    }

    @Override
    public int getNumberEntries() {
        return this.numberOfEntries;
    }

    private String[] convertMultextResources(String[] multextEntries) throws IOException {
        char[] partOfSpeechCodes;
        boolean[] validPOS = new boolean[65536];
        Arrays.fill(validPOS, false);
        for (char c : partOfSpeechCodes = MultextPartOfSpeech.getPosCodes()) {
            validPOS[c] = true;
        }
        int size = multextEntries.length;
        for (int i = 0; i < size; ++i) {
            String attrList;
            String pattern;
            if (multextEntries[i].length() <= 0) continue;
            StringTokenizer ST = new StringTokenizer(multextEntries[i], "\t", false);
            if (ST.countTokens() != 3) {
                Component.loggerMessageAndReturn("The line: " + multextEntries[i] + " is not in MULTEXT format");
            }
            String form = ST.nextToken();
            String lemma = ST.nextToken();
            if (lemma.compareTo("=") == 0) {
                lemma = form;
            }
            if ((pattern = StringFunctions.FormingPattern(form, lemma)) != null) {
                lemma = pattern;
            }
            if (!validPOS[(attrList = ST.nextToken()).charAt(0)]) {
                Component.loggerMessageAndReturn("The input file: " + multextEntries[i] + " is not in MULTEXT format (POS code invalid)");
            }
            multextEntries[i] = form + ":" + lemma + ":" + attrList;
        }
        return multextEntries;
    }

    private char[] prepareInputForSearch(char[] word, int start, int end) {
        int wordLength = end - start + 1;
        char[] temp = new char[wordLength + 1];
        System.arraycopy(word, 0, temp, 0, wordLength);
        temp[wordLength++] = 58;
        return temp;
    }

    private char[] prepareInputForSearch(char[] word) {
        return this.prepareInputForSearch(word, 0, word.length - 1);
    }

    @Override
    public boolean contains(char[] word) {
        return this.applyAutomaton(this.prepareInputForSearch(word)) != null;
    }

    private DictionaryLetterFSA.State applyAutomaton(char[] temp) {
        DictionaryLetterFSA.State currentState = DictionaryLetterFSATraversing.delta(this.automaton, temp);
        if (currentState == null && !this.isCaseSensitive()) {
            currentState = DictionaryLetterFSATraversing.deltaDowncased(this.automaton, temp);
        }
        return currentState;
    }

    private StringArrayList getLabels(char[] word, int start, int end) {
        DictionaryLetterFSA.State currentState = this.applyAutomaton(this.prepareInputForSearch(word, start, end));
        return currentState != null ? DictionaryLetterFSATraversing.getPathLabels(currentState) : null;
    }

    private StringArrayList getLabels(char[] word) {
        return this.getLabels(word, 0, word.length - 1);
    }

    @Override
    public AbstractDisjunctionOfMorphologyItems search(char[] word) {
        StringArrayList labels = this.getLabels(word);
        return labels != null ? this.getMultextInterpretations(labels, 0, word.length - 1, word) : null;
    }

    @Override
    public String[] getLemma(char[] word) {
        StringArrayList labels = this.getLabels(word);
        if (labels == null) {
            return null;
        }
        int size = labels.size();
        HashSet<String> mySet = new HashSet<String>();
        for (int i = 0; i < size; ++i) {
            String label = labels.get(i);
            int startFeatureList = 0;
            while (label.charAt(startFeatureList) != ':') {
                ++startFeatureList;
            }
            String lemma = label.substring(0, startFeatureList);
            if (lemma.charAt(0) == '+') {
                lemma = StringFunctions.reconstructWord(lemma, new String(word));
            }
            if (mySet.contains(lemma)) continue;
            mySet.add(lemma);
        }
        size = mySet.size();
        String[] result = new String[size];
        int count = 0;
        for (String s : mySet) {
            result[count++] = s;
        }
        return result;
    }

    @Override
    public String getPartOfSpeech(char[] word) {
        StringArrayList labels = this.getLabels(word);
        if (labels == null) {
            return null;
        }
        int size = labels.size();
        HashSet<Character> mySet = new HashSet<Character>();
        for (int i = 0; i < size; ++i) {
            String label = labels.get(i);
            int startFeatureList = 0;
            while (label.charAt(startFeatureList) != ':') {
                ++startFeatureList;
            }
            char pos = label.charAt(startFeatureList + 1);
            if (mySet.contains(Character.valueOf(pos))) continue;
            mySet.add(Character.valueOf(pos));
        }
        size = mySet.size();
        char[] res = new char[size];
        int count = 0;
        for (Character c : mySet) {
            res[count++] = c.charValue();
        }
        return new String(res);
    }

    @Override
    protected List<ConfigurationFeature> getCompilationFeatures() {
        return Collections.unmodifiableList(Arrays.asList(CompilationFeatures.FEATURES));
    }

    @Override
    protected List<ConfigurationFeature> getDeploymentFeatures() {
        return Collections.unmodifiableList(Arrays.asList(DeploymentFeatures.FEATURES));
    }

    @Override
    protected void applySpecificSettings(Configuration configuration) {
        if (configuration.getFeature(DeploymentFeatures.CASE_SENSITIVE.getName()).compareTo("true") == 0) {
            this.switchOnCaseSensitiveMode();
        }
        if (configuration.getFeature(DeploymentFeatures.OUTPUT_FOR_UNKNOWN_WORDS.getName()).compareTo("false") == 0) {
            this.doNotReturnOutputForUnknownWords();
        }
    }

    private AbstractDisjunctionOfMorphologyItems getMultextInterpretations(StringArrayList labels, int start, int end, char[] word) {
        ArrayList<AbstractMorphologyItem> readings = new ArrayList<AbstractMorphologyItem>();
        int size = labels.size();
        for (int i = 0; i < size; ++i) {
            String label = labels.get(i);
            int startAttrValList = 0;
            while (label.charAt(startAttrValList) != ':') {
                ++startAttrValList;
            }
            String lemma = label.substring(0, startAttrValList);
            if (lemma.charAt(0) == '+') {
                lemma = this.isCaseSensitive() ? StringFunctions.reconstructWord(lemma, new String(word)) : StringFunctions.reconstructWord(lemma, new String(word).toLowerCase());
            }
            char pos = label.charAt(startAttrValList + 1);
            String attrValList = label.substring(startAttrValList + 2, label.length());
            readings.add(new MultextMorphologyItem(start, end, pos, lemma, attrValList));
        }
        return readings.size() > 0 ? new AbstractDisjunctionOfMorphologyItems(readings) : null;
    }

    @Override
    public void readFromStream(DataInputStream d) throws IOException {
        this.numberOfEntries = d.readInt();
        this.automaton = DictionaryLetterFSA.getInstance("BASIC");
        this.automaton.readFromDataInputStream(d);
        this.language = Language.getLanguage(d.readInt());
        this.specialComponent = LanguageSpecificTreatment.createInstance(this.language.getName(), this);
    }

    @Override
    public void writeToStream(DataOutputStream d) throws IOException {
        d.writeInt(this.numberOfEntries);
        this.automaton.writeToDataOutputStream(d);
        d.writeInt(this.language.getCode());
    }

    public MultextMorphology() {
        this.name = null;
        this.numberOfEntries = 0;
        this.specialComponent = null;
    }

    @Override
    protected boolean initialize(Configuration configuration) {
        Component.loggerMessage("Read Multext resources from file");
        try {
            Component.loggerMessage("Reading entries from: " + configuration.getFeature(CompilationFeatures.ENTRY_FILE.getName()));
            String[] entries = Files.FileToStringArray(configuration.getFeature(CompilationFeatures.ENTRY_FILE.getName()), configuration.getFeature(CompilationFeatures.CHARACTERSET.getName()));
            Component.loggerMessage("Compressing entries");
            entries = this.convertMultextResources(entries);
            if (entries == null) {
                return Component.loggerMessageAndReturn("Not valid Multext format");
            }
            this.numberOfEntries = entries.length;
            Component.loggerMessage("Number of Entries: " + entries.length);
            Component.loggerMessage("Creating an automaton for encoding the data");
            this.automaton = this.createAutomaton(entries);
            this.language = this.setLanguage(configuration);
            Component.loggerMessage("Reading language specic component resources");
            this.specialComponent = this.setSpecificComponent(configuration);
            entries = null;
        }
        catch (Exception e) {
            Component.loggerMessage("className" + e.toString() + e.getMessage());
            return false;
        }
        return true;
    }

    private LanguageSpecificTreatment setSpecificComponent(Configuration configuration) throws MorphologyException {
        return LanguageSpecificTreatment.createInstance(this.language.getName(), this);
    }

    private Language setLanguage(Configuration configuration) throws MorphologyException {
        try {
            String languageCode = configuration.getFeature(CompilationFeatures.LANGUAGE_CODE.getName());
            return Language.getLanguage(Integer.valueOf(languageCode));
        }
        catch (Exception e) {
            throw new MorphologyException("Invalid language code: " + CompilationFeatures.LANGUAGE_CODE.getName());
        }
    }

    private DictionaryLetterFSA createAutomaton(String[] entries) {
        DynamicLetterMarkedTravFSAInterface tempAutomaton = FiniteStateAlgorithms.MinDetAutomatonFromListOfWordsX(entries, false, 10, 3, false);
        Component.loggerMessage("Automaton compression");
        DictionaryLetterFSA finalAutomaton = DictionaryLetterFSA.getInstance("BASIC");
        finalAutomaton.initializeFrom(tempAutomaton);
        return finalAutomaton;
    }

    private AbstractDisjunctionOfMorphologyItems searchToken(char[] inputText, AbstractTokenItem token) {
        char[] form = token.toCharArray(inputText);
        StringArrayList labels = this.getLabels(form);
        return labels != null ? this.getMultextInterpretations(labels, token.getStart(), token.getEnd(), form) : null;
    }

    @Override
    public ArrayList<AbstractDisjunctionOfMorphologyItems> findMatch(ArrayList<AbstractTokenItem> tokens, char[] text) {
        int numTokens = tokens.size();
        ArrayList<AbstractDisjunctionOfMorphologyItems> result = new ArrayList<AbstractDisjunctionOfMorphologyItems>(numTokens);
        ArrayList<AbstractDisjunctionOfMorphologyItems> readings = new ArrayList<AbstractDisjunctionOfMorphologyItems>();
        for (int currentPos = 0; currentPos < numTokens; ++currentPos) {
            readings.clear();
            AbstractDisjunctionOfMorphologyItems singleResult = this.searchToken(text, tokens.get(currentPos));
            if (singleResult == null) {
                currentPos = this.specialComponent.process(readings, tokens, currentPos, text);
                if (readings.size() > 0) {
                    singleResult = readings.get(0);
                }
            }
            if (singleResult == null && this.producesOutputForUnknownWords()) {
                singleResult = this.createOutputForUnknown(tokens.get(currentPos).getStart(), tokens.get(currentPos).getEnd());
            }
            if (singleResult == null) continue;
            result.add(singleResult);
            for (int i = 1; i < readings.size(); ++i) {
                result.add(readings.get(i));
            }
        }
        return result;
    }

    private AbstractDisjunctionOfMorphologyItems createOutputForUnknown(int start, int end) {
        AbstractMorphologyItem[] items = new MultextMorphologyItem[]{new MultextMorphologyItem(start, end, MultextPartOfSpeech.UNKNOWN.getCode(), "", "")};
        return new AbstractDisjunctionOfMorphologyItems(items);
    }

    private static final class DeploymentFeatures {
        static final ConfigurationFeature CHARACTER_SET = ConfigurationFeature.createFeature("CharacterSet", false, "UTF-8");
        static final ConfigurationFeature CASE_SENSITIVE = ConfigurationFeature.createFeature("CaseSensitive", false, "false");
        static final ConfigurationFeature OUTPUT_FOR_UNKNOWN_WORDS = ConfigurationFeature.createFeature("OutputForUnknownWords", false, "true");
        static final ConfigurationFeature[] FEATURES = new ConfigurationFeature[]{CHARACTER_SET, CASE_SENSITIVE, OUTPUT_FOR_UNKNOWN_WORDS};

        private DeploymentFeatures() {
        }
    }

    private static final class CompilationFeatures {
        static final ConfigurationFeature NAME = ConfigurationFeature.createFeature("Name", true, "");
        static final ConfigurationFeature ENTRY_FILE = ConfigurationFeature.createFeature("EntryFile", true, "");
        static final ConfigurationFeature CHARACTERSET = ConfigurationFeature.createFeature("CharacterSet", false, "UTF-8");
        static final ConfigurationFeature LANGUAGE_CODE = ConfigurationFeature.createFeature("LanguageCode", false, new Integer(Language.ENGLISH.getCode()).toString());
        static final ConfigurationFeature[] FEATURES = new ConfigurationFeature[]{NAME, ENTRY_FILE, CHARACTERSET, LANGUAGE_CODE};

        private CompilationFeatures() {
        }
    }
}

