Processing correctly links extracted from a post
This commit is contained in:
parent
1d0cb8ed5d
commit
a7952bc32c
@ -165,7 +165,7 @@ class TvShow(CrawlSpider):
|
|||||||
item['date'] = self.parse_date(article.css('.entry-meta-header-before::text').getall()[1].strip())
|
item['date'] = self.parse_date(article.css('.entry-meta-header-before::text').getall()[1].strip())
|
||||||
item['summary'] = "".join(article.xpath('.//div[@class="entry-summary"]/node()').extract())
|
item['summary'] = "".join(article.xpath('.//div[@class="entry-summary"]/node()').extract())
|
||||||
item['image_url'] = article.css('.entry-summary > p > img::attr(src)').get()
|
item['image_url'] = article.css('.entry-summary > p > img::attr(src)').get()
|
||||||
item['download_url'] = "#".join(article.css('.entry-summary > p > a[href ^= "https://rapidgator"]::attr(href)').extract())
|
item['download_url'] = article.css('.entry-summary > p > a[href ^= "https://rapidgator"]::attr(href)').extract()
|
||||||
yield item
|
yield item
|
||||||
|
|
||||||
def parse_date(self, formatted_date: str):
|
def parse_date(self, formatted_date: str):
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user