23 lines
834 B
Python

import scrapy
class RlsBBMagsSpider(scrapy.Spider):
name = "rlsbb_mags"
start_urls = [
"https://rlsbb.ru/category/magazines/"
]
custom_settings = {
'AUTOTHROTTLE_ENABLED': True,
}
def parse(self, response):
for article in response.css("article"):
yield {
'id': article.attrib['id'],
'article_title': article.css('h1.entry-title > a::text').get(),
'title': article.css('.entry-summary > p > strong::text').get(),
'date': article.css('.entry-meta-header-before::text').getall()[1].strip(),
'image_url': article.css('.entry-summary > p > img::attr(src)').get(),
'download_url': article.css('.entry-summary > p > a[href ^= "https://rapidgator"]::attr(href)').get()
}