Compare commits

..

2 Commits

3 changed files with 96 additions and 90 deletions
-1
View File
@@ -1,2 +1 @@
/*.db
/__pycache__/
-85
View File
@@ -1,85 +0,0 @@
from datetime import datetime
from sqlalchemy import ForeignKey, func, Integer, Boolean, String, Text, DateTime
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship
class Base(DeclarativeBase):
"""Base déclarative pour SQLAlchemy 2.0."""
pass
class TvShowDB(Base):
"""Modèle pour le stockage des épisodes (SQLAlchemy 2.0)."""
__tablename__: str = "tvshows"
id: Mapped[int] = mapped_column(
Integer,
primary_key=True,
autoincrement=True
)
post_id: Mapped[str] = mapped_column(
String(length=255),
nullable=False,
unique=True,
index=True
)
post_title: Mapped[str] = mapped_column(
String(255),
nullable=False
)
title: Mapped[str] = mapped_column(
String(255),
nullable=False,
index=True
)
date: Mapped[datetime] = mapped_column(
DateTime,
nullable=False,
index=True
)
summary: Mapped[str | None] = mapped_column(
Text,
nullable=True
)
image_url: Mapped[str | None] = mapped_column(
String(255),
nullable=True
)
created_at: Mapped[datetime] = mapped_column(
DateTime,
server_default=func.datetime('now'),
nullable=False
)
updated_at: Mapped[datetime] = mapped_column(
DateTime,
server_default=func.datetime('now'),
onupdate=func.datetime('now'),
nullable=False
)
links: Mapped[list["LinkDB"]] = relationship(back_populates="show")
class LinkDB(Base):
"""Modèle pour le stockage des liens de téléchargement (SQLAlchemy 2.0)."""
__tablename__: str = "links"
id: Mapped[int] = mapped_column(
Integer,
primary_key=True,
autoincrement=True
)
link: Mapped[str] = mapped_column(
String(255),
nullable=False
)
is_downloaded: Mapped[bool] = mapped_column(
Boolean,
default=False
)
show_id: Mapped[int] = mapped_column(ForeignKey("tvshows.id"))
show: Mapped["TvShowDB"] = relationship(back_populates="links")
+96 -4
View File
@@ -4,17 +4,95 @@ import re
import sys
from datetime import datetime
from multiprocessing import Process
import scrapy
from scrapy.crawler import CrawlerProcess
from scrapy.spiders import CrawlSpider, Rule
from scrapy.linkextractors import LinkExtractor
from sqlalchemy import create_engine, select, Engine
from sqlalchemy import ForeignKey, create_engine, func, select, Engine, Integer, Boolean, String, Text, DateTime
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship
from sqlalchemy.orm import sessionmaker
from models import Base, TvShowDB, LinkDB
class Base(DeclarativeBase):
"""Base déclarative pour SQLAlchemy 2.0."""
pass
class TvShowDB(Base):
"""Modèle pour le stockage des épisodes (SQLAlchemy 2.0)."""
__tablename__: str = "tvshows"
id: Mapped[int] = mapped_column(
Integer,
primary_key=True,
autoincrement=True
)
post_id: Mapped[str] = mapped_column(
String(length=255),
nullable=False,
unique=True,
index=True
)
post_title: Mapped[str] = mapped_column(
String(255),
nullable=False
)
title: Mapped[str] = mapped_column(
String(255),
nullable=False,
index=True
)
date: Mapped[datetime] = mapped_column(
DateTime,
nullable=False,
index=True
)
summary: Mapped[str | None] = mapped_column(
Text,
nullable=True
)
image_url: Mapped[str | None] = mapped_column(
String(255),
nullable=True
)
created_at: Mapped[datetime] = mapped_column(
DateTime,
server_default=func.datetime('now'),
nullable=False
)
updated_at: Mapped[datetime] = mapped_column(
DateTime,
server_default=func.datetime('now'),
onupdate=func.datetime('now'),
nullable=False
)
links: Mapped[list["LinkDB"]] = relationship(back_populates="show")
class LinkDB(Base):
"""Modèle pour le stockage des liens de téléchargement (SQLAlchemy 2.0)."""
__tablename__: str = "links"
id: Mapped[int] = mapped_column(
Integer,
primary_key=True,
autoincrement=True
)
link: Mapped[str] = mapped_column(
String(255),
nullable=False
)
is_downloaded: Mapped[bool] = mapped_column(
Boolean,
default=False
)
show_id: Mapped[int] = mapped_column(ForeignKey("tvshows.id"))
show: Mapped["TvShowDB"] = relationship(back_populates="links")
class TvShowItem(scrapy.Item):
@@ -96,11 +174,25 @@ class TvShow(CrawlSpider):
return datetime.strptime(formatted_date, "Posted on %B %d, %Y at %H:%M in")
def main():
def run_scrapy():
process = CrawlerProcess()
_ = process.crawl(TvShow)
process.start()
return 0
def main():
p_scrapy = Process(target=run_scrapy)
try:
p_scrapy.start()
except KeyboardInterrupt:
print(f"Closing...")
p_scrapy.terminate()
p_scrapy.join(timeout=30)
if p_scrapy.is_alive():
print("⚠️ Scrapy n'a pas pu s'arrêter proprement.")
p_scrapy.kill()
print(f"scrarls is stopped.")
if __name__ == "__main__":