/*
 * Decompiled with CFR 0.152.
 */
package it.jrc.osint.grabber.internal;

import it.jrc.emmutils.HTTPStream;
import it.jrc.osint.ContentType;
import it.jrc.osint.ContentTypes;
import it.jrc.osint.DocumentMetaItem;
import it.jrc.osint.console.Console;
import it.jrc.osint.grabber.internal.GrabberServiceImpl;
import it.jrc.osint.logging.LogManager;
import it.jrc.osint.logging.Logger;
import it.jrc.osint.metadata.AcquisitionMetaData;
import it.jrc.osint.metadata.GrabberMetaData;
import it.jrc.osint.operations.FetchStatus;
import it.jrc.osint.util.concurrent.AbstractProcessingTask;
import it.jrc.osint.util.concurrent.ProcessingContext;
import it.jrc.osint.util.concurrent.ProcessingException;
import it.jrc.osint.util.io.FileUtil;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.io.Writer;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.Date;
import java.util.Random;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class GrabItemTask
extends AbstractProcessingTask<DocumentMetaItem> {
    private static final Logger log = LogManager.getLogger((String)GrabItemTask.class.getName());
    private long httpTimeoutMillis = 30000L;
    private int minimumRandomDelayMs = 0;
    private Date timestamp;
    private Console console;
    private GrabberServiceImpl grabberService;

    public GrabItemTask(Console listener, GrabberServiceImpl grabberService) {
        this.console = listener;
        this.grabberService = grabberService;
        this.timestamp = new Date();
    }

    public void init(ProcessingContext context) {
        if (context != null && context instanceof GrabberServiceImpl.GrabberProcessingContext) {
            GrabberServiceImpl.GrabberProcessingContext ctx = (GrabberServiceImpl.GrabberProcessingContext)context;
            this.setHttpTimeoutMillis(ctx.getHttpTimeoutMillis());
            this.minimumRandomDelayMs = ctx.getMiniumRandomDelayMs();
        }
    }

    public void setHttpTimeoutMillis(long timeout) {
        this.httpTimeoutMillis = timeout;
    }

    private DocumentMetaItem grabItem(DocumentMetaItem item) {
        HTTPStream httpStream;
        GrabberMetaData gmd;
        block14: {
            ContentType detectedType;
            String link;
            block15: {
                AcquisitionMetaData amd;
                block13: {
                    this.setGUID(item);
                    link = item.getLink();
                    log.debug("Starting download of " + link);
                    gmd = new GrabberMetaData(item);
                    amd = new AcquisitionMetaData(item);
                    gmd.setTimeStamp(this.timestamp);
                    httpStream = null;
                    httpStream = new HTTPStream(link, StandardCharsets.ISO_8859_1.toString(), this.httpTimeoutMillis);
                    if (!this.isCanceled()) break block13;
                    this.close(httpStream);
                    return null;
                }
                String streamURI = httpStream.getURI();
                if (streamURI != null && !"".equals(streamURI)) {
                    item.setLink(streamURI);
                    log.debug("Downloaded from " + streamURI);
                }
                if (httpStream.getStatus() != 200 || this.isCanceled()) break block14;
                gmd.setFetchStatus(FetchStatus.SUCCESS);
                gmd.setFetchStatusDescription("SUCCESS");
                amd.setSource("HTTP Web Grabber");
                String httpStreamContentType = httpStream.getType();
                detectedType = this.detectContentType(httpStreamContentType, item.getLink());
                if (detectedType != ContentType.NULL_CONTENT_TYPE) break block15;
                gmd.setFetchStatus(FetchStatus.CONTENT_NOT_SUPPORTED);
                gmd.setFetchStatusDescription("FAILED: a supported content type could not be detected");
                item.setContentType(httpStreamContentType);
                DocumentMetaItem documentMetaItem = item;
                this.close(httpStream);
                return documentMetaItem;
            }
            String fileExtension = this.getPossibleFileExtensionFromLink(link);
            if (fileExtension == null) {
                fileExtension = detectedType.getDefaultFileExtension();
            }
            if (detectedType.isBinary()) {
                boolean success = this.downloadBinaryContent(httpStream, fileExtension, item);
                if (!success) {
                    String reason = gmd.getFetchStatusDescription();
                    log.error("Fetch of URL " + item.getLink() + " failed. Reason: " + reason);
                    this.console.error("Fetch of URL " + item.getLink() + " failed. Reason: " + reason);
                }
            } else {
                boolean success = this.downloadTextContent(httpStream, fileExtension, item);
                if (!success) {
                    String reason = gmd.getFetchStatusDescription();
                    log.error("Fetch of URL " + item.getLink() + " failed. Reason: " + reason);
                    this.console.error("Fetch of URL " + item.getLink() + " failed. Reason: " + reason);
                }
            }
            this.console.info("Downloaded " + item.getLink());
            DocumentMetaItem documentMetaItem = item;
            this.close(httpStream);
            return documentMetaItem;
        }
        try {
            gmd.setFetchStatus(FetchStatus.FAILED);
            gmd.setFetchStatusDescription("FAILED: Server returned code: " + httpStream.getStatus());
            log.error("Fetch of URL " + item.getLink() + " failed.");
            this.console.error("Fetch of URL " + item.getLink() + " failed (Status code: " + httpStream.getStatus() + " )");
            DocumentMetaItem documentMetaItem = item;
            this.close(httpStream);
            return documentMetaItem;
        }
        catch (Throwable t) {
            try {
                this.console.error("Failed to download URL " + item.getLink());
                log.error("Fetch of URL " + item.getLink() + " failed.", t);
                gmd.setFetchStatus(FetchStatus.FAILED);
                gmd.setFetchStatusDescription("FAILED: " + t.toString());
                DocumentMetaItem documentMetaItem = item;
                this.close(httpStream);
                return documentMetaItem;
            }
            catch (Throwable throwable) {
                this.close(httpStream);
                throw throwable;
            }
        }
    }

    private ContentType detectContentType(String httpStreamContentType, String link) {
        String fileExtension;
        String mimeType = null;
        ContentType contentType = ContentType.NULL_CONTENT_TYPE;
        if (httpStreamContentType != null) {
            mimeType = this.translateContentType(httpStreamContentType);
        }
        if (mimeType != null) {
            contentType = ContentTypes.getContentTypeByMimeType((String)mimeType);
        }
        if (contentType == null && (fileExtension = this.getPossibleFileExtensionFromLink(link)) != null) {
            contentType = ContentTypes.getContentTypeByFileExtension((String)fileExtension);
        }
        if (contentType == null) {
            contentType = ContentType.NULL_CONTENT_TYPE;
        }
        return contentType;
    }

    private String getPossibleFileExtensionFromLink(String link) {
        int extensionIdx = link.lastIndexOf(".");
        if (extensionIdx == -1 || extensionIdx + 1 >= link.length()) {
            return null;
        }
        String fileExtension = link.substring(extensionIdx + 1);
        if (fileExtension.length() > 4 || !ContentTypes.containsFileExtension((String)fileExtension)) {
            return null;
        }
        return fileExtension;
    }

    private String translateContentType(String httpStreamContentType) {
        if (httpStreamContentType == null) {
            return null;
        }
        if (httpStreamContentType.equals("xml")) {
            return "application/xml";
        }
        if (httpStreamContentType.equals("html")) {
            return "text/html";
        }
        if (httpStreamContentType.equals("text")) {
            return "text/plain";
        }
        if (httpStreamContentType.equals("pdf")) {
            return "application/pdf";
        }
        return httpStreamContentType;
    }

    private boolean isUTF8(String encoding) {
        Pattern p = Pattern.compile("(.*)utf-8(.*)");
        Matcher m = p.matcher(encoding);
        return m.matches();
    }

    private boolean downloadTextContent(HTTPStream httpStream, String fileExtension, DocumentMetaItem item) {
        InputStreamReader inReader = null;
        OutputStreamWriter outWriter = null;
        GrabberMetaData gmd = new GrabberMetaData(item);
        Charset encodingCharset = null;
        try {
            String encoding = httpStream.getEncoding();
            encodingCharset = this.isUTF8(encoding) ? StandardCharsets.UTF_8 : Charset.forName(encoding.toUpperCase());
        }
        catch (Throwable t) {
            encodingCharset = StandardCharsets.ISO_8859_1;
            log.warn("Failed to obtain encoding from HTTP, using default ISO-8859-1", t);
        }
        if (encodingCharset == null) {
            encodingCharset = StandardCharsets.ISO_8859_1;
        }
        try {
            try {
                int ch;
                inReader = new InputStreamReader((InputStream)new BufferedInputStream((InputStream)httpStream), encodingCharset);
                File tmpTextFile = this.grabberService.createTempFile(FileUtil.createFilenameFromURL((String)httpStream.getURI()), "." + fileExtension);
                outWriter = new OutputStreamWriter((OutputStream)new BufferedOutputStream(new FileOutputStream(tmpTextFile)), StandardCharsets.UTF_8);
                while ((ch = inReader.read()) != -1) {
                    outWriter.write(ch);
                }
                outWriter.flush();
                gmd.setRawContentPath(tmpTextFile.getAbsolutePath());
                gmd.setFetchStatus(FetchStatus.SUCCESS);
            }
            catch (Throwable t) {
                log.error("Failed to download text content", t);
                gmd.setFetchStatus(FetchStatus.FAILED);
                gmd.setFetchStatusDescription(t.toString());
                FileUtil.close((Reader)inReader);
                FileUtil.close(outWriter);
                return false;
            }
        }
        catch (Throwable throwable) {
            FileUtil.close(inReader);
            FileUtil.close(outWriter);
            throw throwable;
        }
        FileUtil.close((Reader)inReader);
        FileUtil.close((Writer)outWriter);
        return true;
    }

    private boolean downloadBinaryContent(HTTPStream httpStream, String fileExtension, DocumentMetaItem item) {
        BufferedInputStream in = null;
        BufferedOutputStream out = null;
        GrabberMetaData gmd = new GrabberMetaData(item);
        try {
            try {
                int ch;
                in = new BufferedInputStream((InputStream)httpStream);
                File tmpBinFile = this.grabberService.createTempFile(FileUtil.createFilenameFromURL((String)httpStream.getURI()), "." + fileExtension);
                out = new BufferedOutputStream(new FileOutputStream(tmpBinFile));
                while ((ch = in.read()) != -1) {
                    out.write(ch);
                }
                out.flush();
                gmd.setRawContentPath(tmpBinFile.getAbsolutePath());
                gmd.setFetchStatus(FetchStatus.SUCCESS);
            }
            catch (Throwable t) {
                log.error("Failed to download binary content", t);
                gmd.setFetchStatus(FetchStatus.FAILED);
                gmd.setFetchStatusDescription(t.toString());
                FileUtil.close((InputStream)in);
                FileUtil.close(out);
                return false;
            }
        }
        catch (Throwable throwable) {
            FileUtil.close(in);
            FileUtil.close(out);
            throw throwable;
        }
        FileUtil.close((InputStream)in);
        FileUtil.close((OutputStream)out);
        return true;
    }

    private void setGUID(DocumentMetaItem item) {
        if (item.getGuid() == null) {
            item.setGuid(Long.toString(System.nanoTime()));
        }
    }

    private void close(HTTPStream aStream) {
        if (aStream != null) {
            try {
                aStream.close();
            }
            catch (Exception e) {
                log.error("Could not close HttpStream properly", (Throwable)e);
            }
        }
    }

    public DocumentMetaItem process(DocumentMetaItem inputItem) throws ProcessingException {
        if (this.minimumRandomDelayMs > 0) {
            try {
                Random random = new Random();
                Thread.sleep(random.nextInt(this.minimumRandomDelayMs) + 3000);
            }
            catch (Throwable t) {
                log.error("Failed to wait for a random delay");
            }
        }
        GrabberMetaData gmd = new GrabberMetaData(inputItem);
        try {
            return this.grabItem(inputItem);
        }
        catch (OutOfMemoryError e) {
            log.error("Out of memory. Failed to fetch URL: " + inputItem.getLink(), (Throwable)e);
            throw e;
        }
        catch (Exception t) {
            log.error("Failed to fetch URL: " + inputItem.getLink(), (Throwable)t);
            gmd.setFetchStatus(FetchStatus.FAILED);
            gmd.setFetchStatusDescription("Could not be fetched: " + t.toString());
            return inputItem;
        }
    }
}

