Adding a first working iteration of a spider for magazines in RlsBB
This commit is contained in:
parent
6998d60cc0
commit
95c7a6cf12
22
rslbb_scraper/spiders/rlsbb.py
Normal file
22
rslbb_scraper/spiders/rlsbb.py
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
import scrapy
|
||||||
|
|
||||||
|
|
||||||
|
class RlsBBMagsSpider(scrapy.Spider):
|
||||||
|
name = "rlsbb_mags"
|
||||||
|
start_urls = [
|
||||||
|
"https://rlsbb.ru/category/magazines/"
|
||||||
|
]
|
||||||
|
custom_settings = {
|
||||||
|
'AUTOTHROTTLE_ENABLED': True,
|
||||||
|
}
|
||||||
|
|
||||||
|
def parse(self, response):
|
||||||
|
for article in response.css("article"):
|
||||||
|
yield {
|
||||||
|
'id': article.attrib['id'],
|
||||||
|
'article_title': article.css('h1.entry-title > a::text').get(),
|
||||||
|
'title': article.css('.entry-summary > p > strong::text').get(),
|
||||||
|
'date': article.css('.entry-meta-header-before::text').getall()[1].strip(),
|
||||||
|
'image_url': article.css('.entry-summary > p > img::attr(src)').get(),
|
||||||
|
'download_url': article.css('.entry-summary > p > a[href ^= "https://rapidgator"]::attr(href)').get()
|
||||||
|
}
|
||||||
Loading…
x
Reference in New Issue
Block a user