From 0db07013ce7482dc8fbc7b59d505793b121f516a Mon Sep 17 00:00:00 2001 From: edipretoro Date: Thu, 1 Jan 2026 21:24:27 +0100 Subject: [PATCH] Creating the SQLAlchemyPipeline class --- scrarls.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/scrarls.py b/scrarls.py index 8480a6a..f2b2282 100644 --- a/scrarls.py +++ b/scrarls.py @@ -84,6 +84,33 @@ class TvShowItem(scrapy.Item): download_url: scrapy.Field = scrapy.Field() +class SQLAlchemyPipeline: + def __init__(self): + self.engine: Engine = create_engine('sqlite:///tvshows.db', echo=True) + Base.metadata.create_all(self.engine) + self.Session = sessionmaker(bind=self.engine) + + def process_item(self, item, spider): + session = self.Session() + try: + stmt = select(TvShowDB).where(TvShowDB.post_id == item["post_id"]) + show = session.scalars(stmt).first() + print(f"{show=}") + if not show: + show = TvShowDB(**item) + session.add(show) + else: + for key, value in item.items(): + setattr(show, key, value) + session.commit() + except Exception as e: + session.rollback() + raise + finally: + session.close() + return item + + class TvShow(CrawlSpider): name: str = "rlsb_tvshow" allowed_domains: list[str] = ["rlsbb.ru"]