Adding a first working iteration of a spider for magazines in RlsBB

This commit is contained in:
edipretoro 2024-06-05 22:33:04 +02:00
parent 6998d60cc0
commit 95c7a6cf12

View File

@ -0,0 +1,22 @@
import scrapy
class RlsBBMagsSpider(scrapy.Spider):
name = "rlsbb_mags"
start_urls = [
"https://rlsbb.ru/category/magazines/"
]
custom_settings = {
'AUTOTHROTTLE_ENABLED': True,
}
def parse(self, response):
for article in response.css("article"):
yield {
'id': article.attrib['id'],
'article_title': article.css('h1.entry-title > a::text').get(),
'title': article.css('.entry-summary > p > strong::text').get(),
'date': article.css('.entry-meta-header-before::text').getall()[1].strip(),
'image_url': article.css('.entry-summary > p > img::attr(src)').get(),
'download_url': article.css('.entry-summary > p > a[href ^= "https://rapidgator"]::attr(href)').get()
}