23 lines
834 B
Python
23 lines
834 B
Python
import scrapy
|
|
|
|
|
|
class RlsBBMagsSpider(scrapy.Spider):
|
|
name = "rlsbb_mags"
|
|
start_urls = [
|
|
"https://rlsbb.ru/category/magazines/"
|
|
]
|
|
custom_settings = {
|
|
'AUTOTHROTTLE_ENABLED': True,
|
|
}
|
|
|
|
def parse(self, response):
|
|
for article in response.css("article"):
|
|
yield {
|
|
'id': article.attrib['id'],
|
|
'article_title': article.css('h1.entry-title > a::text').get(),
|
|
'title': article.css('.entry-summary > p > strong::text').get(),
|
|
'date': article.css('.entry-meta-header-before::text').getall()[1].strip(),
|
|
'image_url': article.css('.entry-summary > p > img::attr(src)').get(),
|
|
'download_url': article.css('.entry-summary > p > a[href ^= "https://rapidgator"]::attr(href)').get()
|
|
}
|