diff --git a/rslbb_scraper/spiders/rlsbb.py b/rslbb_scraper/spiders/rlsbb.py new file mode 100644 index 0000000..3b071df --- /dev/null +++ b/rslbb_scraper/spiders/rlsbb.py @@ -0,0 +1,22 @@ +import scrapy + + +class RlsBBMagsSpider(scrapy.Spider): + name = "rlsbb_mags" + start_urls = [ + "https://rlsbb.ru/category/magazines/" + ] + custom_settings = { + 'AUTOTHROTTLE_ENABLED': True, + } + + def parse(self, response): + for article in response.css("article"): + yield { + 'id': article.attrib['id'], + 'article_title': article.css('h1.entry-title > a::text').get(), + 'title': article.css('.entry-summary > p > strong::text').get(), + 'date': article.css('.entry-meta-header-before::text').getall()[1].strip(), + 'image_url': article.css('.entry-summary > p > img::attr(src)').get(), + 'download_url': article.css('.entry-summary > p > a[href ^= "https://rapidgator"]::attr(href)').get() + }