/*
 * Decompiled with CFR 0.152.
 */
package it.jrc.osint.extract.text.internal;

import it.jrc.osint.ContentType;
import it.jrc.osint.ContentTypes;
import it.jrc.osint.DocumentMetaItem;
import it.jrc.osint.extract.text.ErrorConditions;
import it.jrc.osint.extract.text.TextExtractionProvider;
import it.jrc.osint.extract.text.TextExtractionService;
import it.jrc.osint.extract.text.internal.HTMLTextExtractionProvider;
import it.jrc.osint.extract.text.internal.OSINTDocumentTextExtractionProvider;
import it.jrc.osint.extract.text.internal.TextTextExtractionProvider;
import it.jrc.osint.extract.text.internal.TikaInstanceProvider;
import it.jrc.osint.extract.text.internal.TikaTextExtractionProvider;
import it.jrc.osint.logging.LogManager;
import it.jrc.osint.logging.Logger;
import it.jrc.osint.operations.OperationStatus;
import it.jrc.osint.util.event.EventBroker;
import it.jrc.osint.util.io.FileUtil;
import it.jrc.osint.workspace.WorkspaceService;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.Map;
import org.apache.tika.Tika;

public class TextExtractionServiceImpl
implements TextExtractionService {
    private static final Logger log = LogManager.getLogger((String)TextExtractionServiceImpl.class.getCanonicalName());
    private Map<String, TextExtractionProviderRegistration> providerRegistry = new HashMap<String, TextExtractionProviderRegistration>();

    public TextExtractionServiceImpl(EventBroker eventBroker, WorkspaceService ws) {
    }

    public void start() {
        this.registerExtractionProviders();
    }

    private void registerExtractionProviders() {
        this.register(ContentTypes.HTML, new HTMLTextExtractionProvider());
        this.register(ContentTypes.TEXT, new TextTextExtractionProvider());
        this.register(ContentTypes.OSINT_META_FILE, new OSINTDocumentTextExtractionProvider());
        this.register(ContentTypes.XML, new TikaTextExtractionProvider());
        this.register(ContentTypes.MSWORD, new TikaTextExtractionProvider());
        this.register(ContentTypes.EXCEL, new TikaTextExtractionProvider());
        this.register(ContentTypes.POWERPOINT, new TikaTextExtractionProvider());
        this.register(ContentTypes.OFFICE_OPEN_XML_DOC, new TikaTextExtractionProvider());
        this.register(ContentTypes.OFFICE_OPEN_XML_EXCEL, new TikaTextExtractionProvider());
        this.register(ContentTypes.OFFICE_OPEN_XML_POWERPOINT, new TikaTextExtractionProvider());
        this.register(ContentTypes.OPEN_DOCUMENT_TEXT, new TikaTextExtractionProvider());
        this.register(ContentTypes.OPEN_DOCUMENT_PRESENTATION, new TikaTextExtractionProvider());
        this.register(ContentTypes.OPEN_DOCUMENT_SPREADSHEET, new TikaTextExtractionProvider());
        this.register(ContentTypes.PDF, new TikaTextExtractionProvider());
    }

    @Override
    public String detectContentType(InputStream in, String fileName) {
        String contentType = null;
        TextExtractionProviderRegistration registration = this.getRegistry().get(FileUtil.getFileExtension((String)fileName));
        if (registration != null) {
            contentType = registration.contentType;
            FileUtil.close((InputStream)in);
            return contentType;
        }
        try {
            String string = contentType = this.getTika().detect(in, fileName);
            return string;
        }
        catch (IOException e) {
            log.error("Failed to detect content type", (Throwable)e);
        }
        finally {
            FileUtil.close((InputStream)in);
        }
        return null;
    }

    @Override
    public boolean canExtract(String contentType) {
        if (contentType == null) {
            return false;
        }
        TextExtractionProvider provider = this.findProviderForContentType(contentType);
        return provider != null;
    }

    @Override
    public OperationStatus extract(InputStream in, String fileName, DocumentMetaItem metaItem) {
        try {
            String fileExtension = FileUtil.getFileExtension((String)fileName);
            TextExtractionProvider tep = this.findProviderForFileExtension(fileExtension);
            if (tep != null) {
                OperationStatus operationStatus = tep.extract(in, metaItem);
                return operationStatus;
            }
            OperationStatus operationStatus = new OperationStatus(ErrorConditions.getErrorConditions().getErrorCondition(ErrorConditions.EXTRACTION_PROVIDER_NOT_FOUND));
            return operationStatus;
        }
        finally {
            FileUtil.close((InputStream)in);
        }
    }

    public void register(String contentType, String[] fileExtensions, TextExtractionProvider provider) {
        TextExtractionProviderRegistration registration = new TextExtractionProviderRegistration(contentType, fileExtensions, provider);
        String[] stringArray = fileExtensions;
        int n = fileExtensions.length;
        int n2 = 0;
        while (n2 < n) {
            String ext = stringArray[n2];
            this.providerRegistry.put(ext, registration);
            ++n2;
        }
    }

    public void register(ContentType contentType, TextExtractionProvider provider) {
        TextExtractionProviderRegistration registration = new TextExtractionProviderRegistration(contentType.getMimeType(), contentType.getFileExtensions(), provider);
        String[] stringArray = contentType.getFileExtensions();
        int n = stringArray.length;
        int n2 = 0;
        while (n2 < n) {
            String ext = stringArray[n2];
            this.providerRegistry.put(ext, registration);
            ++n2;
        }
    }

    TextExtractionProvider findProviderForFileExtension(String fileExtension) {
        TextExtractionProviderRegistration registration = this.getRegistry().get(fileExtension);
        if (registration != null) {
            return registration.provider;
        }
        return null;
    }

    TextExtractionProvider findProviderForContentType(String contentType) {
        for (TextExtractionProviderRegistration reg : this.getRegistry().values()) {
            if (!reg.getContentType().equals(contentType)) continue;
            return reg.provider;
        }
        return null;
    }

    private Map<String, TextExtractionProviderRegistration> getRegistry() {
        return this.providerRegistry;
    }

    private Tika getTika() {
        return TikaInstanceProvider.getTika();
    }

    private class TextExtractionProviderRegistration {
        private TextExtractionProvider provider;
        private String[] fileExtensions;
        private String contentType;

        TextExtractionProviderRegistration(String contentType, String[] fileExtensions, TextExtractionProvider provider) {
            this.provider = provider;
            this.fileExtensions = fileExtensions;
            this.contentType = contentType;
        }

        boolean hasExtension(String fileExtension) {
            String[] stringArray = this.fileExtensions;
            int n = this.fileExtensions.length;
            int n2 = 0;
            while (n2 < n) {
                String ext = stringArray[n2];
                if (ext.equals(fileExtension)) {
                    return true;
                }
                ++n2;
            }
            return false;
        }

        String getContentType() {
            return this.contentType;
        }
    }
}

