/*
 * Decompiled with CFR 0.152.
 */
package it.jrc.lt.regexpfs.funop;

import it.jrc.lt.core.component.gazetteer.AbstractDisjunctionOfGazetteerItems;
import it.jrc.lt.core.component.gazetteer.AbstractGazetteer;
import it.jrc.lt.core.component.morphology.AbstractDisjunctionOfMorphologyItems;
import it.jrc.lt.core.component.morphology.AbstractMorphology;
import it.jrc.lt.core.component.morphology.AbstractMorphologyItem;
import it.jrc.lt.core.component.morphology.MultextPartOfSpeech;
import it.jrc.lt.regexpfs.funop.FunctionalOperator;
import it.jrc.lt.regexpfs.module.ProcessingException;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.DataInputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.HashSet;
import java.util.StringTokenizer;

public class IsReliablePersonName
extends FunctionalOperator {
    private static AbstractMorphology morph;
    private static HashSet<String> filterOutPatterns;
    private static AbstractGazetteer gazetteer;
    private static final int NOUN_TYPE;

    public IsReliablePersonName() throws ProcessingException {
        IsReliablePersonName.instantiateMorphology();
        IsReliablePersonName.instantiatePatterns();
        IsReliablePersonName.instantiateGazetteer();
    }

    private static void instantiateGazetteer() throws ProcessingException {
        URL pURL = null;
        try {
            pURL = FunctionalOperator.class.getResource("resources/EN_FULL_NAMES.gz");
            if (pURL != null) {
                gazetteer = AbstractGazetteer.createInstance((String)"basicGazetteer");
                DataInputStream d = new DataInputStream(new BufferedInputStream(pURL.openStream()));
                gazetteer.readFromStream(d);
            }
        }
        catch (Exception e) {
            throw new ProcessingException("Could not read gazetteer file: " + pURL.getPath());
        }
    }

    private static void instantiateMorphology() throws ProcessingException {
        URL pURL = null;
        try {
            pURL = FunctionalOperator.class.getResource("resources/EN.mor");
            if (pURL != null) {
                morph = AbstractMorphology.createInstance((String)"multextMorphology");
                DataInputStream d = new DataInputStream(new BufferedInputStream(pURL.openStream()));
                morph.readFromStream(d);
            }
        }
        catch (Exception e) {
            throw new ProcessingException("Could not read morphology file: " + pURL.getPath());
        }
    }

    private static void instantiatePatterns() throws ProcessingException {
        try {
            URL pURL = FunctionalOperator.class.getResource("resources/EN.patterns");
            if (pURL != null) {
                String st;
                filterOutPatterns = new HashSet();
                BufferedReader in = new BufferedReader(new InputStreamReader(pURL.openStream()));
                while ((st = in.readLine()) != null) {
                    filterOutPatterns.add(st);
                }
                in.close();
            }
        }
        catch (Exception e) {
            throw new ProcessingException("Could not read patterns from URL");
        }
    }

    private String createPOSPattern(String input) {
        StringTokenizer sT = new StringTokenizer(input, " \t\n\r", false);
        StringBuffer output = new StringBuffer();
        if (morph == null) {
            return null;
        }
        if (filterOutPatterns == null) {
            return null;
        }
        while (sT.hasMoreElements()) {
            String nextToken = sT.nextToken();
            String pos = null;
            if (nextToken.length() <= 2) {
                if (nextToken.length() == 1) {
                    pos = "Np";
                } else if (nextToken.charAt(1) == '.') {
                    pos = "Np";
                }
            }
            if (pos == null) {
                pos = morph.getPartOfSpeech(nextToken);
                if (pos == null) {
                    pos = morph.getPartOfSpeech(nextToken.toLowerCase());
                }
                if (pos != null) {
                    output.append(pos);
                    boolean isProperNoun = false;
                    AbstractDisjunctionOfMorphologyItems minfo = morph.search(nextToken);
                    if (minfo == null) {
                        minfo = morph.search(nextToken.toLowerCase());
                    }
                    int lenMinfo = minfo.getNumberOfItems();
                    for (int j = 0; j < lenMinfo; ++j) {
                        AbstractMorphologyItem myItem = minfo.getItem(j);
                        if (myItem.getPos() != MultextPartOfSpeech.NOUN.getCode() || myItem.getTag(NOUN_TYPE) != 'p') continue;
                        isProperNoun = true;
                        break;
                    }
                    if (isProperNoun) {
                        output.append('p');
                    }
                } else {
                    output.append('U');
                }
            } else {
                output.append(pos);
            }
            if (!sT.hasMoreElements()) continue;
            output.append('-');
        }
        return output.toString();
    }

    @Override
    public String computeValue(String[] args) {
        AbstractDisjunctionOfGazetteerItems result;
        if (args == null) {
            return "";
        }
        int len = args.length;
        if (len != 1) {
            return "-";
        }
        String name = args[0];
        String pattern = this.createPOSPattern(name);
        if (pattern != null && filterOutPatterns.contains(pattern) && (result = gazetteer.lookUp(name.toCharArray())) == null) {
            return "no";
        }
        return "yes";
    }

    @Override
    public String getName() {
        return "IsReliablePersonName";
    }

    static {
        NOUN_TYPE = MultextPartOfSpeech.NOUN.getTagIndex("Type");
    }
}

