Added a little check for the response code to not index pages that return errors or finish with codes in the 100 range

pull/15/head
Slatian 2022-11-19 15:45:52 +01:00 zatwierdzone przez Alexander Cobleigh
rodzic 34d6df3830
commit ed5f5189b0
1 zmienionych plików z 5 dodań i 0 usunięć

Wyświetl plik

@ -294,6 +294,11 @@ func Crawl(config types.Config) {
// on every a element which has an href attribute, call callback
c.OnHTML("a[href]", func(e *colly.HTMLElement) {
if e.Response.StatusCode >= 400 || e.Response.StatusCode <= 100 {
return
}
link := getLink(e.Attr("href"))
if findSuffix(SUFFIXES, link) {
return