Adding some logic to get the number of pages to crawl from the command line
This commit is contained in:
parent
ad50fe8224
commit
515f9ca361
@ -3,13 +3,22 @@ import scrapy
|
||||
|
||||
class RlsBBMagsSpider(scrapy.Spider):
|
||||
name = "rlsbb_mags"
|
||||
start_urls = [
|
||||
"https://rlsbb.ru/category/magazines/"
|
||||
]
|
||||
|
||||
custom_settings = {
|
||||
'AUTOTHROTTLE_ENABLED': True,
|
||||
}
|
||||
|
||||
def __init__(self, start_page=1, end_page=10):
|
||||
self.start_page = int(start_page)
|
||||
self.end_page = int(end_page)
|
||||
|
||||
def start_requests(self):
|
||||
for i in range(self.start_page, self.end_page + 1):
|
||||
if i == 1:
|
||||
yield scrapy.Request(url="https://rlsbb.ru/category/magazines/", callback=self.parse)
|
||||
else:
|
||||
yield scrapy.Request(url=f"https://rlsbb.ru/category/magazines/page/{i}/", callback=self.parse)
|
||||
|
||||
def parse(self, response):
|
||||
for article in response.css("article"):
|
||||
yield {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user