/*
 * Decompiled with CFR 0.152.
 */
package it.jrc.osint.extract.text.internal;

import it.jrc.osint.DocumentMetaItem;
import it.jrc.osint.ResourceId;
import it.jrc.osint.console.OSINTConsolePlugin;
import it.jrc.osint.extract.text.ErrorConditions;
import it.jrc.osint.extract.text.TextExtractPlugin;
import it.jrc.osint.extract.text.TextExtractionService;
import it.jrc.osint.extract.text.internal.TextExtractionServiceEventImpl;
import it.jrc.osint.logging.LogManager;
import it.jrc.osint.logging.Logger;
import it.jrc.osint.metadata.ErrorStatusMetaData;
import it.jrc.osint.metadata.RepositoryMetaData;
import it.jrc.osint.operations.ErrorCodeRange;
import it.jrc.osint.operations.ErrorCondition;
import it.jrc.osint.operations.OperationStatus;
import it.jrc.osint.util.concurrent.ParallelProcessor2;
import it.jrc.osint.util.concurrent.ProcessingContext;
import it.jrc.osint.util.concurrent.ProcessingException;
import it.jrc.osint.util.concurrent.Producer;
import it.jrc.osint.util.concurrent.Task;
import it.jrc.osint.util.event.SystemEvent;
import it.jrc.osint.workspace.CaseProject;
import it.jrc.osint.workspace.MarkerUtil;
import it.jrc.osint.workspace.ResourceBuilder;
import it.jrc.osint.workspace.ResourceEvent;
import it.jrc.osint.workspace.WorkspaceService;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import org.eclipse.core.resources.IFile;
import org.eclipse.core.resources.IFolder;
import org.eclipse.core.resources.IResource;
import org.eclipse.core.runtime.IPath;
import org.eclipse.core.runtime.IProgressMonitor;

public class TextExtractionBuilder
implements ResourceBuilder {
    private static final Logger log = LogManager.getLogger((String)TextExtractionBuilder.class.getCanonicalName());

    public int prepareBuild(int kind, ResourceBuilder.BuildParameters params, CaseProject prj) {
        if (kind == 10) {
            this.markResources(params.getParentResource(), params.getResourceEvents(), prj);
        }
        int markedDocumentsCount = this.getMarkedDocumentsCount(params.getParentResource());
        return markedDocumentsCount;
    }

    private void markResources(IFolder parentFolder, List<ResourceEvent> events, CaseProject prj) {
        for (ResourceEvent event : events) {
            IPath originalPath;
            if (!prj.containsDocument(event.getResource().getFullPath())) continue;
            if (event.getType() == ResourceEvent.EventType.CREATED && event.getResource() instanceof IFile) {
                this.markTextExtraction(event.getResource());
            }
            if (event.getType() == ResourceEvent.EventType.DELETED) {
                boolean cfr_ignored_0 = event.getResource() instanceof IFile;
            }
            if (event.getType() == ResourceEvent.EventType.CONTENT_CHANGED && event.getResource() instanceof IFile) {
                this.markTextExtraction(event.getResource());
            }
            if (event.getType() != ResourceEvent.EventType.MOVED || !(event.getResource() instanceof IFile) || prj.containsDocument(originalPath = event.getMovedFromPath())) continue;
            this.markTextExtraction(event.getResource());
        }
    }

    private void markTextExtraction(IResource resource) {
        MarkerUtil.mark((IResource)resource, (String)"it.jrc.osint.extract.text.textExtractionMarker");
    }

    private void markError(IResource resource) {
        MarkerUtil.mark((IResource)resource, (String)"it.jrc.osint.workspace.errorStatusMarker");
    }

    private int getMarkedDocumentsCount(IFolder parentResource) {
        Set ids = MarkerUtil.getMarkedDocuments((IResource)parentResource, (String)"it.jrc.osint.extract.text.textExtractionMarker");
        return ids.size();
    }

    public Set<ResourceId> build(int kind, ResourceBuilder.BuildParameters params, CaseProject prj, IProgressMonitor monitor) {
        Set docsToExtract = MarkerUtil.getMarkedDocuments((IResource)params.getParentResource(), (String)"it.jrc.osint.extract.text.textExtractionMarker");
        this.cleanMarker(docsToExtract, prj);
        Set<ResourceId> docsExtracted = this.performTextExtraction(docsToExtract, prj, monitor);
        if (monitor.isCanceled()) {
            docsToExtract.removeAll(docsExtracted);
            for (ResourceId id : docsToExtract) {
                MarkerUtil.mark((ResourceId)id, (String)"it.jrc.osint.extract.text.textExtractionMarker");
            }
        }
        return docsExtracted;
    }

    private Set<ResourceId> performTextExtraction(Set<ResourceId> docsToExtract, CaseProject cp, IProgressMonitor monitor) {
        HashSet<ResourceId> docsExtracted = new HashSet<ResourceId>();
        if (docsToExtract.isEmpty()) {
            log.debug("No documents found for text extraction");
            monitor.beginTask("Text Extraction", -1);
            monitor.done();
            return docsExtracted;
        }
        Producer producer = cp.getMetaItemProducer(docsToExtract);
        TextExtractionTask textExtractionTask = new TextExtractionTask();
        ParallelProcessor2 processor = new ParallelProcessor2(producer, (Task)textExtractionTask);
        processor.setItemsDescriptor("files");
        processor.setTaskDescriptor("Text extraction");
        try {
            Iterator resultIterator = processor.start(monitor);
            while (resultIterator.hasNext()) {
                DocumentMetaItem nextResult = (DocumentMetaItem)resultIterator.next();
                if (nextResult == null) continue;
                cp.saveMetaItem_rm(nextResult);
            }
        }
        catch (Throwable t) {
            log.error("Failed to perform text extraction", t);
            return Collections.emptySet();
        }
        Producer itemProducer = cp.getMetaItemProducer(docsToExtract);
        HashSet<ResourceId> extractedFiles = new HashSet<ResourceId>();
        HashSet<ResourceId> failedFiles = new HashSet<ResourceId>();
        HashMap<ResourceId, ErrorCondition> failedFilesErrorConditionMap = new HashMap<ResourceId, ErrorCondition>();
        while (itemProducer.hasNext()) {
            DocumentMetaItem item = (DocumentMetaItem)itemProducer.next();
            RepositoryMetaData rmd = new RepositoryMetaData(item);
            if (rmd.isTextExtracted()) {
                extractedFiles.add(rmd.getResourceId());
                continue;
            }
            ErrorStatusMetaData emd = new ErrorStatusMetaData(item);
            failedFiles.add(item.getResourceId());
            failedFilesErrorConditionMap.put(item.getResourceId(), emd.getErrorCondition());
        }
        if (!extractedFiles.isEmpty()) {
            TextExtractPlugin.getEventBroker().postEvent((SystemEvent)new TextExtractionServiceEventImpl("osint/textExtraction/extractionSuccess", extractedFiles));
        }
        if (!failedFiles.isEmpty()) {
            TextExtractPlugin.getEventBroker().postEvent((SystemEvent)new TextExtractionServiceEventImpl("osint/textExtraction/extractionFailed", failedFiles, failedFilesErrorConditionMap));
        }
        return docsExtracted;
    }

    private DocumentMetaItem extractText(IFile file, DocumentMetaItem metaItem) {
        if (file == null || !file.exists()) {
            log.warn("Text extraction failed - File not found");
            return metaItem;
        }
        ErrorStatusMetaData emd = new ErrorStatusMetaData(metaItem);
        RepositoryMetaData rmd = new RepositoryMetaData(metaItem);
        if (emd.hasErrorStatus() && ErrorCodeRange.TEXT_EXTRACTION.isInRange(emd.getErrorCode())) {
            MarkerUtil.clean((IResource)file, (String)"it.jrc.osint.extract.text.textExtractionMarker");
            return metaItem;
        }
        try {
            TextExtractionService ts = TextExtractPlugin.getTextExtractionService();
            String contentType = ts.detectContentType(file.getContents(), file.getName());
            if (contentType == null) {
                emd.setOperationStatus(new OperationStatus(ErrorConditions.getErrorConditions().getErrorCondition(ErrorConditions.CONTENT_TYPE_DETECTION_FAILED)));
                OSINTConsolePlugin.getConsole().error("Failed to detect content type for document " + file.getName() + "  source: " + metaItem.getLink());
            } else {
                metaItem.setContentType(contentType);
            }
            if (ts.canExtract(contentType)) {
                OperationStatus status = ts.extract(file.getContents(), file.getName(), metaItem);
                if (status.isOK()) {
                    rmd.setTextExtracted(true);
                    TextExtractPlugin.getEntityExtractionService().detectLanguage(metaItem);
                } else {
                    rmd.setTextExtracted(false);
                    emd.setOperationStatus(status);
                    this.markError((IResource)file);
                    OSINTConsolePlugin.getConsole().error("Failed to extract text from document " + file.getName() + "  source: " + metaItem.getLink());
                }
            } else {
                rmd.setTextExtracted(false);
                emd.setOperationStatus(new OperationStatus(ErrorConditions.getErrorConditions().getErrorCondition(ErrorConditions.UNSUPPORTED_CONTENT_TYPE)));
                this.markError((IResource)file);
                OSINTConsolePlugin.getConsole().error("Unsupported content type: Failed to extract text from document " + file.getName() + "  source: " + metaItem.getLink());
            }
        }
        catch (Throwable t) {
            log.error("Failed to extract text from file " + file.getProjectRelativePath(), t);
        }
        return metaItem;
    }

    private void cleanMarker(Set<ResourceId> ids, CaseProject prj) {
        for (ResourceId id : ids) {
            IFile file = prj.getFile(id);
            MarkerUtil.clean((IResource)file, (String)"it.jrc.osint.extract.text.textExtractionMarker");
        }
    }

    public void clean(CaseProject prj, IProgressMonitor monitor) {
        log.debug("Clean requested");
        try {
            Producer producer = prj.getMetaItemProducer();
            int workItemCount = producer.expectedItemCount();
            LinkedList<DocumentMetaItem> cleanedItems = new LinkedList<DocumentMetaItem>();
            monitor.beginTask("Cleaning extracted texts", workItemCount);
            for (DocumentMetaItem item : producer) {
                this.cleanMetaItem(item);
                cleanedItems.add(item);
                MarkerUtil.clean((ResourceId)item.getResourceId(), (String)"it.jrc.osint.workspace.errorStatusMarker");
                MarkerUtil.mark((ResourceId)item.getResourceId(), (String)"it.jrc.osint.extract.text.textExtractionMarker");
                monitor.worked(1);
            }
            prj.saveMetaItems(cleanedItems);
        }
        finally {
            monitor.done();
        }
    }

    private void cleanMetaItem(DocumentMetaItem item) {
        int errorCode;
        if (item == null) {
            return;
        }
        ErrorStatusMetaData emd = new ErrorStatusMetaData(item);
        RepositoryMetaData rmd = new RepositoryMetaData(item);
        if (emd.hasErrorStatus() && ErrorCodeRange.TEXT_EXTRACTION.isInRange(errorCode = emd.getErrorCode())) {
            emd.cleanErrorStatus();
        }
        item.setText(null);
        rmd.setTextExtracted(false);
    }

    private class TextExtractionTask
    implements Task<DocumentMetaItem> {
        public String getName() {
            return "Text Extraction";
        }

        public void init(ProcessingContext context) throws ProcessingException {
        }

        public void cancel() {
        }

        public DocumentMetaItem process(DocumentMetaItem inputItem) throws ProcessingException {
            try {
                WorkspaceService ws = TextExtractPlugin.getWorkspaceService();
                ResourceId id = inputItem.getResourceId();
                IFile file = ws.getFile(id);
                if (inputItem != null && file != null && file.exists()) {
                    return TextExtractionBuilder.this.extractText(file, inputItem);
                }
                return inputItem;
            }
            catch (Throwable t) {
                log.error("Failed to extract text", t);
                throw new ProcessingException("Failed to extract text", t);
            }
        }
    }
}

