/*
 * Decompiled with CFR 0.152.
 */
package it.jrc.emmcrawler;

import it.jrc.emmcrawler.Crawler;
import it.jrc.emmcrawler.Href;
import it.jrc.emmutils.HTTPStream;
import it.jrc.emmutils.HTTPUrl;
import it.jrc.htmlparser.HTMLDocument;
import it.jrc.htmlparser.HTMLDocumentBuilder;
import it.jrc.htmlparser.HTMLNode;
import it.jrc.htmlparser.TextFilter;
import it.jrc.htmlparser.Token;
import it.jrc.osint.logging.LogManager;
import it.jrc.osint.logging.Logger;
import it.jrc.rss.RSSItem;
import it.jrc.rss.SimpleElement;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Vector;
import java.util.regex.Pattern;

public class GetItems
extends Thread {
    private static final boolean DEBUG = false;
    private Logger logger = null;
    private HTMLDocumentBuilder htmlDocBuilder = null;
    private Crawler parent = null;
    private Pattern xhostPattern;
    private String lastBuildDate;
    private int minTextSize;
    private int nRandomDelay;
    private String htmlType;
    private String source;

    public GetItems(Crawler c, int n) {
        this.logger = LogManager.getLogger((String)("it.jrc.emmcrawler.GetItems:" + n));
        this.parent = c;
        this.xhostPattern = null;
        this.minTextSize = 0;
        this.htmlDocBuilder = new HTMLDocumentBuilder();
    }

    public void setMinTextSize(int minSize) {
        this.minTextSize = minSize;
    }

    public void setRandomDelay(int delay) {
        this.nRandomDelay = delay;
    }

    public void setSource(String s) {
        this.source = s;
    }

    public void setHtmlType(String t) {
        this.htmlType = t;
    }

    public void setXhostPattern(String x) {
        this.xhostPattern = Pattern.compile(x, 2);
    }

    private boolean hasAlphaNum(String s) {
        boolean alphanum = false;
        int nc = 0;
        while (!alphanum && nc < s.length()) {
            alphanum = Character.isLetterOrDigit(s.charAt(nc++));
        }
        return alphanum;
    }

    private RSSItem processHTML(HTTPStream httpStream, Href href) throws IOException {
        boolean doText;
        String src;
        RSSItem item = null;
        HTTPUrl pageUrl = href.httpUrl;
        String lastModified = httpStream.getLastModified();
        String encoding = httpStream.getEncoding();
        if (encoding == null) {
            encoding = "iso-8859-1";
        }
        HTMLDocument htmlDoc = this.htmlDocBuilder.parse((InputStream)httpStream, encoding);
        String title = href.title;
        if (title == null || title.equals("")) {
            title = htmlDoc.getTitle();
        }
        if ((src = this.source) == null) {
            src = httpStream.getHost();
        }
        item = new RSSItem();
        item.setTitle(title);
        item.setLink(href.url);
        item.setLastModified(lastModified);
        item.setSource(src);
        HTTPUrl docBase = pageUrl;
        Vector nodelist = htmlDoc.getNodeList("base");
        if (nodelist.size() > 0) {
            HTMLNode node = (HTMLNode)nodelist.get(0);
            HashMap attrib = node.getToken().attList;
            Object baseHref = (String)attrib.get("href");
            if (baseHref != null && !((String)baseHref).equals("")) {
                baseHref = ((String)baseHref).substring(1, ((String)baseHref).length() - 1).trim();
                docBase = new HTTPUrl((String)baseHref);
            }
        }
        Vector linkElements = htmlDoc.getNodeList("link");
        for (HTMLNode node : linkElements) {
            String value;
            String rssUrl = null;
            Token t = node.getToken();
            HashMap attList = t.attList;
            if (attList.get("rel") == null || !((String)attList.get("rel")).equals("\"alternate\"") || attList.get("type") == null || !((String)attList.get("type")).equals("\"application/rss+xml\"") || (value = (String)attList.get("href")) == null) continue;
            String url = value.substring(1, value.length() - 1);
            HTTPUrl httpUrl = new HTTPUrl(docBase, url);
            rssUrl = httpUrl.getUrl();
            item.addElement(new SimpleElement("emm:feed", rssUrl));
        }
        int confidence = href.confidence - 1;
        boolean bl = doText = this.htmlType == null || !this.htmlType.equals("rss");
        if (doText && title != null && !title.equals("")) {
            try {
                TextFilter textFilter = new TextFilter();
                String text = null;
                if (this.htmlType == null || this.htmlType.equals("news")) {
                    textFilter.parse(htmlDoc, null);
                    text = textFilter.getPlainText();
                } else if (this.htmlType != null && this.htmlType.equals("plain")) {
                    textFilter.parseAllText(htmlDoc, null);
                    text = textFilter.getPlainText();
                }
                if (text != null && text.length() > this.minTextSize) {
                    item.setText(text);
                    item.setLink(httpStream.getURI());
                    item.setDescription(textFilter.getDescription());
                    item.setContentType("text/html");
                    item.createGuid(src, text);
                }
                item.setImages(htmlDoc.getImages(".jp", true));
            }
            catch (Exception ex) {
                this.logger.error(String.valueOf(href.url) + ": " + ex.getMessage());
            }
        }
        ArrayList<String> urls = new ArrayList<String>();
        ArrayList<Href> hrefs = new ArrayList<Href>();
        HashMap links = htmlDoc.getHrefs();
        for (String url : links.keySet()) {
            Href h = new Href(docBase, url);
            if (urls.contains(h.url)) continue;
            urls.add(h.url);
            h.confidence = confidence;
            h.level = href.level + 1;
            h.xhost = href.xhost;
            String text = (String)links.get(url);
            if (text != null && this.hasAlphaNum(text)) {
                h.title = text.trim();
            }
            hrefs.add(h);
        }
        Vector frames = htmlDoc.getFrames();
        String frameTitle = title;
        if (frameTitle == null) {
            frameTitle = "";
        }
        int f = 0;
        while (f < frames.size()) {
            String frameUrl = (String)frames.get(f);
            if (!links.containsKey(frameUrl)) {
                Href h = new Href(docBase, frameUrl);
                if (!urls.contains(h.url)) {
                    h.title = frameTitle;
                    h.confidence = confidence;
                    h.level = href.level + 1;
                    h.xhost = href.xhost;
                    hrefs.add(h);
                }
            }
            ++f;
        }
        item.setInfo(hrefs);
        return item;
    }

    private RSSItem processBinary(HTTPStream httpStream, Href href, String contentType) {
        RSSItem item = new RSSItem();
        item.setTitle("Linke to binary document of type " + contentType);
        item.setContentType(contentType);
        item.setLink(httpStream.getURI());
        return item;
    }

    private RSSItem ItemfromWeb(HTTPStream httpStream, Href href) throws Exception {
        RSSItem item = null;
        String contentType = httpStream.getType();
        if (contentType.startsWith("html") || contentType.startsWith("text")) {
            item = this.processHTML(httpStream, href);
        } else if (this.parent.canExtract(this.translateContentType(contentType))) {
            item = this.processBinary(httpStream, href, this.translateContentType(contentType));
        }
        return item;
    }

    private String translateContentType(String httpStreamContentType) {
        if (httpStreamContentType == null) {
            return null;
        }
        if (httpStreamContentType.equals("xml")) {
            return "application/xml";
        }
        if (httpStreamContentType.equals("html")) {
            return "text/html";
        }
        if (httpStreamContentType.equals("text")) {
            return "text/plain";
        }
        if (httpStreamContentType.equals("pdf")) {
            return "application/pdf";
        }
        return httpStreamContentType;
    }

    private RSSItem get(Href href) {
        RSSItem item = null;
        try {
            RSSItem check = this.parent.checkItem(href.url);
            String lastModified = null;
            if (check != null) {
                lastModified = check.getLastModified();
            }
            Pattern xhost = null;
            if (href.xhost == 0) {
                xhost = this.xhostPattern;
            }
            try (HTTPStream httpStream = new HTTPStream(href.url, lastModified, xhost);){
                int status;
                href.retCode = status = httpStream.getStatus();
                if (status == 304) {
                    item = check;
                } else if (status == 301) {
                    this.logger.warn("redirect not allowed to " + httpStream.getRedirectLocation() + " for " + href.url);
                    href.url = httpStream.getRedirectLocation();
                } else if (status == 200) {
                    href.url = httpStream.getURI();
                    if (httpStream.getXhost()) {
                        ++href.xhost;
                    }
                    if (href.xhost <= 0) {
                        item = this.ItemfromWeb(httpStream, href);
                    }
                } else {
                    this.logger.error("statuscode " + status + " for " + href.url);
                }
            }
        }
        catch (Exception e) {
            this.logger.error("could not get " + href.url, (Throwable)e);
            item = null;
        }
        return item;
    }

    private synchronized void delay() {
        try {
            this.wait((long)((double)this.nRandomDelay * Math.random()));
        }
        catch (InterruptedException ie) {
            this.logger.error("random delay wait interrupted");
        }
    }

    @Override
    public void run() {
        this.logger.info("running: xhost=" + this.xhostPattern + " minTextSize=" + this.minTextSize + " source=" + this.source + " htmlType=" + this.htmlType);
        Href href = this.parent.getNext(null);
        while (href != null) {
            RSSItem item;
            if (this.nRandomDelay > 0) {
                this.delay();
            }
            if ((item = this.get(href)) != null) {
                this.parent.putItem(item);
            } else {
                this.parent.failItem(href);
            }
            href = this.parent.getNext(href);
        }
        this.parent.done();
    }
}

