/*
 * Decompiled with CFR 0.152.
 */
package test.crawler;

import it.jrc.emmcrawler.Crawler;
import it.jrc.osint.logging.LogManager;
import it.jrc.osint.logging.Logger;
import it.jrc.rss.RSS;
import it.jrc.rss.RSSItem;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Vector;

public class SimpleCrawler
extends Crawler {
    private static Logger log = LogManager.getLogger((String)SimpleCrawler.class.getName());
    private RSS rss;
    private HashSet<String> urls;

    public SimpleCrawler(int nWorkers, ArrayList<String> startUrls, int maxDepth) {
        super(startUrls, maxDepth, null);
        this.setWorkers(nWorkers);
        this.rss = new RSS();
        this.urls = new HashSet();
    }

    public RSS getRSS() {
        return this.rss;
    }

    @Override
    public void newItem(RSSItem item) {
        if (!this.urls.contains(item.getLink())) {
            this.rss.addItem(item);
            this.urls.add(item.getLink());
        }
    }

    @Override
    public void finished() {
    }

    private static void setupLogging() {
    }

    @Override
    protected boolean canExtract(String contentType) {
        return contentType.equals("text/html");
    }

    public static void main(String[] args) {
        SimpleCrawler.setupLogging();
        ArrayList<String> urls = new ArrayList<String>();
        String url = "http://www.golem.de";
        urls.add(url);
        SimpleCrawler simpleCrawler = new SimpleCrawler(3, urls, 1);
        simpleCrawler.setHtmlType("news");
        simpleCrawler.setSource("bbc");
        simpleCrawler.start();
        simpleCrawler.sync();
        RSS rss = simpleCrawler.getRSS();
        Vector items = rss.getItems();
        for (RSSItem anItem : items) {
            log.debug(anItem.toString());
        }
    }
}

