#!/usr/bin/env python import sys import scrapy from scrapy.http import Response from scrapy.crawler import CrawlerProcess from scrapy.spiders import CrawlSpider, Rule from scrapy.linkextractors import LinkExtractor class TvShowItem(scrapy.Item): article_id = scrapy.Field() article_title = scrapy.Field() title = scrapy.Field() date = scrapy.Field() summary = scrapy.Field() image_url = scrapy.Field() download_url = scrapy.Field() class TvShow(CrawlSpider): name = "rlsb_tvshow" allowed_domains = ["rlsbb.ru"] start_urls = ["https://rlsbb.ru/category/tv-shows/"] custom_settings = { 'USER_AGENT': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36', 'AUTOTHROTTLE_ENABLED': True, 'DOWNLOAD_DELAY': 10, } rules = [ Rule(LinkExtractor(allow=r"/tv-shows/page/"), callback="parse", follow=True) ] def parse(self, response): pass def main(): process = CrawlerProcess() process.crawl(TvShow) process.start() return 0 if __name__ == "__main__": sys.exit(main())