diff --git a/rlsbb_scraper/spiders/rlsbb.py b/rlsbb_scraper/spiders/rlsbb.py index 3b071df..0e57807 100644 --- a/rlsbb_scraper/spiders/rlsbb.py +++ b/rlsbb_scraper/spiders/rlsbb.py @@ -3,13 +3,22 @@ import scrapy class RlsBBMagsSpider(scrapy.Spider): name = "rlsbb_mags" - start_urls = [ - "https://rlsbb.ru/category/magazines/" - ] + custom_settings = { 'AUTOTHROTTLE_ENABLED': True, } + def __init__(self, start_page=1, end_page=10): + self.start_page = int(start_page) + self.end_page = int(end_page) + + def start_requests(self): + for i in range(self.start_page, self.end_page + 1): + if i == 1: + yield scrapy.Request(url="https://rlsbb.ru/category/magazines/", callback=self.parse) + else: + yield scrapy.Request(url=f"https://rlsbb.ru/category/magazines/page/{i}/", callback=self.parse) + def parse(self, response): for article in response.css("article"): yield {