package cn.edu.hfut.dmic.webcollector.example;

import cn.edu.hfut.dmic.webcollector.model.CrawlDatum;
import cn.edu.hfut.dmic.webcollector.model.CrawlDatums;
import cn.edu.hfut.dmic.webcollector.model.Page;
import cn.edu.hfut.dmic.webcollector.plugin.ram.RamCrawler;
import com.alipay.sdk.cons.a;
import java.net.URLEncoder;
import org.jsoup.select.Elements;
import org.nutz.ioc.meta.IocValue;

/* loaded from: classes.dex */
public class DemoBingCrawler extends RamCrawler {
    public DemoBingCrawler(String str, int i) throws Exception {
        for (int i2 = 1; i2 <= i; i2++) {
            addSeed(new CrawlDatum(createBingUrl(str, i2)).putMetaData("keyword", str).putMetaData("pageNum", i2 + "").putMetaData("pageType", "searchEngine").putMetaData("depth", a.d));
        }
    }

    public static String createBingUrl(String str, int i) throws Exception {
        return String.format("http://cn.bing.com/search?q=%s&first=%s", URLEncoder.encode(str, "utf-8"), Integer.valueOf((i * 10) - 9));
    }

    public static void main(String[] strArr) throws Exception {
        new DemoBingCrawler("网络爬虫", 3).start();
    }

    @Override // cn.edu.hfut.dmic.webcollector.fetcher.Visitor
    public void visit(Page page, CrawlDatums crawlDatums) {
        String metaData = page.getMetaData("keyword");
        String metaData2 = page.getMetaData("pageType");
        int intValue = Integer.valueOf(page.getMetaData("depth")).intValue();
        if (!metaData2.equals("searchEngine")) {
            if (metaData2.equals("outlink")) {
                System.out.println(String.format("第%s页第%s个结果:%s(%s字节)\tdepth=%s\trefer=%s", Integer.valueOf(Integer.valueOf(page.getMetaData("pageNum")).intValue()), Integer.valueOf(Integer.valueOf(page.getMetaData("rank")).intValue() + 1), page.getDoc().title(), Integer.valueOf(page.getContent().length), Integer.valueOf(intValue), page.getMetaData(IocValue.TYPE_REFER)));
                return;
            }
            return;
        }
        int intValue2 = Integer.valueOf(page.getMetaData("pageNum")).intValue();
        System.out.println("成功抓取关键词" + metaData + "的第" + intValue2 + "页搜索结果");
        Elements select = page.select("li.b_ans h2>a,li.b_algo h2>a");
        for (int i = 0; i < select.size(); i++) {
            crawlDatums.add(new CrawlDatum(select.get(i).attr("abs:href")).putMetaData("keyword", metaData).putMetaData("pageNum", intValue2 + "").putMetaData("rank", i + "").putMetaData("pageType", "outlink").putMetaData("depth", (intValue + 1) + "").putMetaData(IocValue.TYPE_REFER, page.getUrl()));
        }
    }
}
