package cn.edu.hfut.dmic.webcollector.crawler;

import cn.edu.hfut.dmic.webcollector.model.Links;
import cn.edu.hfut.dmic.webcollector.model.Page;
import cn.edu.hfut.dmic.webcollector.util.RegexRule;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:cn/edu/hfut/dmic/webcollector/crawler/BreadthCrawler.class */
public abstract class BreadthCrawler extends DeepCrawler {
    public static final Logger LOG = LoggerFactory.getLogger(BreadthCrawler.class);
    protected boolean autoParse;
    protected RegexRule regexRule;

    public BreadthCrawler(String str, boolean z) {
        super(str);
        this.regexRule = new RegexRule();
        this.autoParse = z;
    }

    @Override // cn.edu.hfut.dmic.webcollector.fetcher.Visitor
    public Links visitAndGetNextLinks(Page page) {
        String contentType;
        Links links = new Links();
        if (this.autoParse && (contentType = page.getResponse().getContentType()) != null && contentType.contains("text/html") && page.getDoc() != null) {
            links.addAllFromDocument(page.getDoc(), this.regexRule);
        }
        try {
            visit(page, links);
        } catch (Exception e) {
            LOG.info("Exception", e);
        }
        return links;
    }

    public abstract void visit(Page page, Links links);

    public void addRegex(String str) {
        this.regexRule.addRule(str);
    }

    public boolean isAutoParse() {
        return this.autoParse;
    }

    public void setAutoParse(boolean z) {
        this.autoParse = z;
    }

    public RegexRule getRegexRule() {
        return this.regexRule;
    }

    public void setRegexRule(RegexRule regexRule) {
        this.regexRule = regexRule;
    }
}
