/*
 * Decompiled with CFR 0.152.
 */
package it.jrc.lt.core.component.sentencesplitter;

import it.jrc.lt.core.component.Component;
import it.jrc.lt.core.component.Configuration;
import it.jrc.lt.core.component.ConfigurationFeature;
import it.jrc.lt.core.component.gazetteer.AbstractDisjunctionOfGazetteerItems;
import it.jrc.lt.core.component.gazetteer.AbstractGazetteer;
import it.jrc.lt.core.component.gazetteer.AbstractGazetteerItem;
import it.jrc.lt.core.component.gazetteer.SearchStrategy;
import it.jrc.lt.core.component.sentencesplitter.AbstractSentenceItem;
import it.jrc.lt.core.component.sentencesplitter.AbstractSentenceSplitter;
import it.jrc.lt.core.component.sentencesplitter.SentenceSplitterException;
import it.jrc.lt.core.component.tokenizer.AbstractTokenItem;
import it.jrc.lt.core.textitem.TextItem;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import piskorski.util.functions.DataStream;
import piskorski.util.functions.Files;
import piskorski.util.strings.StringFunctions;

final class SimpleSentenceSplitter
extends AbstractSentenceSplitter {
    private AbstractGazetteer myResources = null;
    private char[] psbMarkers = null;
    private transient boolean[] isWhitespace = null;
    private transient boolean[] isPsbMarker = null;
    private static final transient char[] whiteSpaces = new char[]{' ', '\r', '\t', 'n', '\f'};
    private static final transient String className = "[" + SimpleSentenceSplitter.class.getSimpleName() + "] ";
    private static final transient int MAX_CHAR = 65536;

    @Override
    public boolean initialize(Configuration configuration) {
        try {
            this.setBoundaryMarkers(configuration);
            this.setGazetteer(configuration);
            this.initializeTempStructures();
            this.setName(configuration.getFeature(CompilationFeatures.NAME.getName()));
        }
        catch (Exception e) {
            Component.loggerMessage(className + e.getMessage());
            return false;
        }
        return true;
    }

    private void initializeTempStructures() {
        this.initializePotBoundaryMarkers();
        this.initializeWhiteSpaces();
        this.switchOnReturningTokenPositions();
        this.myResources.returnCharacterPositions();
        this.myResources.caseSensitiveModeActive();
        this.myResources.setSearchStrategy(SearchStrategy.LONGEST_MATCH);
    }

    private void initializePotBoundaryMarkers() {
        this.isPsbMarker = new boolean[65536];
        Arrays.fill(this.isPsbMarker, false);
        if (this.psbMarkers != null) {
            for (int i = 0; i < this.psbMarkers.length; ++i) {
                this.isPsbMarker[this.psbMarkers[i]] = true;
            }
        }
    }

    private void initializeWhiteSpaces() {
        this.isWhitespace = new boolean[65536];
        Arrays.fill(this.isWhitespace, false);
        for (int i = 0; i < whiteSpaces.length; ++i) {
            this.isWhitespace[SimpleSentenceSplitter.whiteSpaces[i]] = true;
        }
    }

    @Override
    protected void applySpecificSettings(Configuration configuration) {
        if (configuration.getFeature(DeploymentFeatures.TOKEN_POSITIONS.getName()).compareTo("false") == 0) {
            this.switchOffReturningTokenPositions();
        }
    }

    private void setGazetteer(Configuration configuration) throws SentenceSplitterException {
        String characterSet = configuration.getFeature(CompilationFeatures.CHARACTER_SET.getName());
        HashSet<ConfigurationFeature> gazetteerFeatures = new HashSet<ConfigurationFeature>();
        gazetteerFeatures.add(CompilationFeatures.NON_FINAL_ITEMS);
        gazetteerFeatures.add(CompilationFeatures.NON_INITIAL_ITEMS);
        gazetteerFeatures.add(CompilationFeatures.NON_INITIAL_PREFIX);
        String[] attributeList = new String[]{"TYPE"};
        ArrayList<String> myEntries = new ArrayList<String>();
        String[] entryList = null;
        for (ConfigurationFeature f : gazetteerFeatures) {
            try {
                for (String next : Files.FileToStringArray(configuration.getFeature(f.getName()), characterSet)) {
                    if (next.length() <= 0) continue;
                    myEntries.add(next + " | TYPE$" + f.defaultValue());
                }
            }
            catch (Exception e) {
                throw new SentenceSplitterException(className + "Problems while reading sentence splitter resources from: " + configuration.getFeature(f.getName()));
            }
        }
        entryList = new String[myEntries.size()];
        int i = 0;
        for (String s : myEntries) {
            entryList[i++] = s;
        }
        this.myResources = AbstractGazetteer.createInstance("basicGazetteer");
        try {
            Configuration gazConfig = Configuration.getInstance();
            gazConfig.addFeature("InputSeparator", "|");
            gazConfig.addFeature("AttributeValueSeparator", "$");
            this.myResources.createFrom(attributeList, entryList, gazConfig);
        }
        catch (Exception e) {
            Component.loggerMessage(className + e.getMessage());
            throw new SentenceSplitterException(className + "Problems while compiling sentence splitter resources");
        }
    }

    private void setBoundaryMarkers(Configuration configuration) throws SentenceSplitterException {
        String[] separators;
        String characterSet = configuration.getFeature(CompilationFeatures.CHARACTER_SET.getName());
        String boundaryMarkerFile = configuration.getFeature(CompilationFeatures.BOUNDARY_MARKERS.getName());
        try {
            separators = Files.FileToStringArray(boundaryMarkerFile, characterSet);
        }
        catch (Exception e) {
            throw new SentenceSplitterException(className + "Problems while reading sentence boundary resources from: " + boundaryMarkerFile);
        }
        if (separators.length == 0) {
            throw new SentenceSplitterException(className + "File for potential senetence boundary markers: " + boundaryMarkerFile + " is empty");
        }
        this.psbMarkers = new char[separators.length];
        for (int i = 0; i < separators.length; ++i) {
            if (separators[i].length() <= 0) continue;
            this.psbMarkers[i] = separators[i].charAt(0);
        }
    }

    protected SimpleSentenceSplitter() {
        this.name = null;
    }

    private boolean isPotentiallyFinalToken(AbstractTokenItem t, char[] text) {
        return this.isPsbMarker[text[t.getEnd()]];
    }

    @Override
    public ArrayList<AbstractSentenceItem> segment(char[] inputText, ArrayList<AbstractTokenItem> tokens) {
        int startPos;
        if (!this.hasBeenInitialized()) {
            return null;
        }
        if (inputText == null || tokens == null) {
            return null;
        }
        if (tokens.size() == 0 || inputText.length == 0) {
            return null;
        }
        ArrayList<AbstractSentenceItem> boundaries = new ArrayList<AbstractSentenceItem>();
        ArrayList<AbstractDisjunctionOfGazetteerItems> gazetteerResult = this.myResources.findMatch(tokens, inputText);
        if (gazetteerResult == null) {
            gazetteerResult = new ArrayList();
        }
        ArrayList<SimpleSentenceItem> filteredResult = new ArrayList<SimpleSentenceItem>(gazetteerResult.size());
        int len = gazetteerResult.size();
        for (int i = 0; i < len; ++i) {
            AbstractDisjunctionOfGazetteerItems info = gazetteerResult.get(i);
            AbstractGazetteerItem newOne = info.getItemWithSomeAttributeSetToAGivenValue(CompilationFeatures.NON_FINAL_ITEMS.defaultValue());
            if (newOne == null) continue;
            filteredResult.add(new SimpleSentenceItem(newOne.getStart(), newOne.getEnd()));
        }
        int maxLen = inputText.length;
        int numTokens = tokens.size();
        int numTokensMax = numTokens - 1;
        int nextTokenItem = 0;
        int nextGazetteerItem = 0;
        int nextGazetteerItemStart = filteredResult.size() > 0 ? ((TextItem)filteredResult.get(0)).getStart() : maxLen;
        int nextGazetteerItemEnd = filteredResult.size() > 0 ? ((TextItem)filteredResult.get(0)).getEnd() : maxLen;
        int lastTokenIndex = 0;
        block1: while (nextTokenItem < numTokensMax) {
            int endPos;
            int nextTokenItemStart = tokens.get(nextTokenItem).getStart();
            int nextTokenItemEnd = tokens.get(nextTokenItem).getEnd();
            if (nextTokenItemStart < nextGazetteerItemStart) {
                if (this.isPotentiallyFinalToken(tokens.get(nextTokenItem), inputText) && tokens.get(nextTokenItem + 1).getStart() - nextTokenItemEnd > 1) {
                    boolean nonInitialItem;
                    AbstractDisjunctionOfGazetteerItems gazInfo = this.myResources.findMatchAtGivenPosition(tokens, inputText, nextTokenItem + 1, true);
                    boolean bl = gazInfo == null ? true : (nonInitialItem = gazInfo.getItemWithSomeAttributeSetToAGivenValue(CompilationFeatures.NON_INITIAL_ITEMS.defaultValue()) == null);
                    if (nonInitialItem) {
                        gazInfo = this.myResources.findMatchAtGivenPosition(tokens, inputText, nextTokenItem + 1, false);
                        boolean bl2 = gazInfo == null ? true : (nonInitialItem = gazInfo.getItemWithSomeAttributeSetToAGivenValue(CompilationFeatures.NON_INITIAL_PREFIX.defaultValue()) == null);
                        if (nonInitialItem && !Character.isLowerCase(inputText[tokens.get(nextTokenItem + 1).getStart()])) {
                            startPos = this.returnsTokenPositions() ? lastTokenIndex : tokens.get(lastTokenIndex).getStart();
                            endPos = this.returnsTokenPositions() ? nextTokenItem : nextTokenItemEnd;
                            boundaries.add(new SimpleSentenceItem(startPos, endPos));
                            lastTokenIndex = ++nextTokenItem;
                            continue;
                        }
                    }
                }
                if (StringFunctions.containsEmptyLines(tokens.get(nextTokenItem).getEnd() + 1, tokens.get(nextTokenItem + 1).getStart() - 1, inputText)) {
                    startPos = this.returnsTokenPositions() ? lastTokenIndex : tokens.get(lastTokenIndex).getStart();
                    endPos = this.returnsTokenPositions() ? nextTokenItem : nextTokenItemEnd;
                    boundaries.add(new SimpleSentenceItem(startPos, endPos));
                    lastTokenIndex = nextTokenItem + 1;
                }
                ++nextTokenItem;
                continue;
            }
            while (nextTokenItem < numTokensMax) {
                if (tokens.get(nextTokenItem).getStart() <= nextGazetteerItemEnd) {
                    ++nextTokenItem;
                    continue;
                }
                TextItem spanIt = ++nextGazetteerItem < filteredResult.size() ? (TextItem)filteredResult.get(nextGazetteerItem) : null;
                nextGazetteerItemStart = spanIt != null ? spanIt.getStart() : maxLen;
                int n = nextGazetteerItemEnd = spanIt != null ? spanIt.getEnd() : maxLen;
                if (!StringFunctions.containsEmptyLines(tokens.get(nextTokenItem - 1).getEnd() + 1, tokens.get(nextTokenItem).getStart() - 1, inputText)) continue block1;
                startPos = this.returnsTokenPositions() ? lastTokenIndex : tokens.get(lastTokenIndex).getStart();
                endPos = this.returnsTokenPositions() ? nextTokenItem - 1 : tokens.get(nextTokenItem - 1).getEnd();
                boundaries.add(new SimpleSentenceItem(startPos, endPos));
                lastTokenIndex = nextTokenItem;
                continue block1;
            }
        }
        if (lastTokenIndex < nextTokenItem || nextTokenItem == 0) {
            startPos = this.returnsTokenPositions() ? lastTokenIndex : tokens.get(lastTokenIndex).getStart();
            int endPos = this.returnsTokenPositions() ? numTokensMax : tokens.get(numTokensMax).getEnd();
            boundaries.add(new SimpleSentenceItem(startPos, endPos));
        }
        return boundaries;
    }

    @Override
    public boolean hasBeenInitialized() {
        if (this.myResources == null) {
            return false;
        }
        if (this.psbMarkers == null) {
            return false;
        }
        if (this.isPsbMarker == null) {
            return false;
        }
        if (this.isWhitespace == null) {
            return false;
        }
        return this.getName() != null;
    }

    @Override
    public void readFromStream(DataInputStream d) throws IOException {
        this.myResources = AbstractGazetteer.createInstance("basicGazetteer");
        this.myResources.readFromStream(d);
        this.psbMarkers = DataStream.readCharArray(d);
        this.setName(DataStream.readString(d));
        this.initializeTempStructures();
    }

    @Override
    public void writeToStream(DataOutputStream d) throws IOException {
        this.myResources.writeToStream(d);
        DataStream.writeCharArray(d, this.psbMarkers);
        DataStream.writeString(d, this.getName());
    }

    @Override
    protected List<ConfigurationFeature> getCompilationFeatures() {
        return Collections.unmodifiableList(Arrays.asList(CompilationFeatures.FEATURES));
    }

    @Override
    protected List<ConfigurationFeature> getDeploymentFeatures() {
        return Collections.unmodifiableList(Arrays.asList(DeploymentFeatures.FEATURES));
    }

    private static final class DeploymentFeatures {
        static final ConfigurationFeature TOKEN_POSITIONS = ConfigurationFeature.createFeature("TokenPositions", false, "true");
        static final ConfigurationFeature CHARACTER_SET = ConfigurationFeature.createFeature("CharacterSet", false, "UTF-8");
        static final ConfigurationFeature[] FEATURES = new ConfigurationFeature[]{TOKEN_POSITIONS, CHARACTER_SET};

        private DeploymentFeatures() {
        }
    }

    private static final class CompilationFeatures {
        static final ConfigurationFeature NON_FINAL_ITEMS = ConfigurationFeature.createFeature("NonFinalItemsFile", true, "0");
        static final ConfigurationFeature NON_INITIAL_ITEMS = ConfigurationFeature.createFeature("NonInitialItemsFile", true, "1");
        static final ConfigurationFeature NON_INITIAL_PREFIX = ConfigurationFeature.createFeature("NonInitialPrefixFile", true, "2");
        static final ConfigurationFeature BOUNDARY_MARKERS = ConfigurationFeature.createFeature("BoundaryMarkersFile", true, "");
        static final ConfigurationFeature OUTPUT_FILE = ConfigurationFeature.createFeature("OutputFile", true, "");
        static final ConfigurationFeature NAME = ConfigurationFeature.createFeature("Name", true, "");
        static final ConfigurationFeature CHARACTER_SET = ConfigurationFeature.createFeature("CharacterSet", false, "UTF-8");
        static final ConfigurationFeature[] FEATURES = new ConfigurationFeature[]{NON_FINAL_ITEMS, NON_INITIAL_ITEMS, NON_INITIAL_PREFIX, BOUNDARY_MARKERS, OUTPUT_FILE, NAME, CHARACTER_SET};

        private CompilationFeatures() {
        }
    }

    private final class SimpleSentenceItem
    extends AbstractSentenceItem {
        public SimpleSentenceItem() {
        }

        SimpleSentenceItem(int start, int end) {
            this.setStart(start);
            this.setEnd(end);
        }

        @Override
        public String toString() {
            return "[" + this.getStart() + "," + this.getEnd() + "]";
        }
    }
}

