Skip to content

Commit

Permalink
feat: add manual scrape endpoint. fixed mdblist empty list issue. oth…
Browse files Browse the repository at this point in the history
…er small tweaks.
  • Loading branch information
dreulavelle committed Sep 24, 2024
1 parent e7e7e1d commit 57f23d6
Show file tree
Hide file tree
Showing 13 changed files with 149 additions and 22 deletions.
21 changes: 20 additions & 1 deletion src/controllers/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,4 +155,23 @@ async def get_logs():

@router.get("/events", operation_id="events")
async def get_events(request: Request):
return {"success": True, "data": request.app.program.em.get_event_updates()}
return {"success": True, "data": request.app.program.em.get_event_updates()}


@router.get("/mount", operation_id="mount")
async def get_rclone_files():
"""Get all files in the rclone mount."""
import os
rclone_dir = settings_manager.settings.symlink.rclone_path
file_map = {}
def scan_dir(path):
with os.scandir(path) as entries:
for entry in entries:
if entry.is_file():
file_map[entry.name] = entry.path
elif entry.is_dir():
scan_dir(entry.path)

scan_dir(rclone_dir) # dict of `filename: filepath``
return {"success": True, "data": file_map}

94 changes: 94 additions & 0 deletions src/controllers/scrape.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
"""Scrape controller."""
from fastapi import APIRouter, HTTPException, Request
from sqlalchemy import select
from program.scrapers import Scraping
from program.db.db_functions import get_item_by_imdb_id
from program.indexers.trakt import TraktIndexer
from program.media.item import MediaItem
from program.downloaders.realdebrid import get_torrents
from program.db.db import db

router = APIRouter(
prefix="/scrape",
tags=["scrape"]
)


@router.get(
"",
summary="Scrape Media Item",
description="Scrape media item based on IMDb ID."
)
async def scrape(request: Request, imdb_id: str, season: int = None, episode: int = None):
"""
Scrape media item based on IMDb ID.
- **imdb_id**: IMDb ID of the media item.
"""
if (services := request.app.program.services):
scraping = services[Scraping]
indexer = services[TraktIndexer]
else:
raise HTTPException(status_code=412, detail="Scraping services not initialized")

try:
with db.Session() as session:
media_item = session.execute(
select(MediaItem).where(
MediaItem.imdb_id == imdb_id,
MediaItem.type.in_(["movie", "show"])
)
).unique().scalar_one_or_none()
if not media_item:
indexed_item = MediaItem({"imdb_id": imdb_id})
media_item = next(indexer.run(indexed_item))
if not media_item:
raise HTTPException(status_code=204, detail="Media item not found")
session.add(media_item)
session.commit()
session.refresh(media_item)

if media_item.type == "show":
if season and episode:
media_item = next((ep for ep in media_item.seasons[season - 1].episodes if ep.number == episode), None)
if not media_item:
raise HTTPException(status_code=204, detail="Episode not found")
elif season:
media_item = media_item.seasons[season - 1]
if not media_item:
raise HTTPException(status_code=204, detail="Season not found")
elif media_item.type == "movie" and (season or episode):
raise HTTPException(status_code=204, detail="Item type returned movie, cannot scrape season or episode")

results = scraping.scrape(media_item, log=False)
if not results:
return {"success": True, "data": []}

data = [
{
"raw_title": stream.raw_title,
"infohash": stream.infohash,
"rank": stream.rank
} for stream in results.values()
]

except StopIteration as e:
raise HTTPException(status_code=204, detail="Media item not found")
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))

return {"success": True, "data": data}


@router.get(
"/rd",
summary="Get Real-Debrid Torrents",
description="Get torrents from Real-Debrid."
)
async def get_rd_torrents(limit: int = 1000):
"""
Get torrents from Real-Debrid.
- **limit**: Limit the number of torrents to get.
"""
return get_torrents(limit)
2 changes: 2 additions & 0 deletions src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

from controllers.default import router as default_router
from controllers.items import router as items_router
from controllers.scrape import router as scrape_router
from controllers.settings import router as settings_router
from controllers.tmdb import router as tmdb_router
from controllers.webhooks import router as webhooks_router
Expand Down Expand Up @@ -65,6 +66,7 @@ async def dispatch(self, request: Request, call_next):
app.include_router(default_router)
app.include_router(settings_router)
app.include_router(items_router)
app.include_router(scrape_router)
app.include_router(webhooks_router)
app.include_router(tmdb_router)
app.include_router(ws_router)
Expand Down
2 changes: 1 addition & 1 deletion src/program/content/mdblist.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def run(self) -> Generator[MediaItem, None, None]:
else:
items = list_items_by_url(list, self.settings.api_key)
for item in items:
if not item or item.imdb_id is None:
if hasattr(item, "error") or not item or item.imdb_id is None:
continue
if item.imdb_id.startswith("tt"):
items_to_yield.append(MediaItem(
Expand Down
12 changes: 10 additions & 2 deletions src/program/db/db_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,18 @@ def get_parent_items_by_ids(media_item_ids: list[int]):
with db.Session() as session:
items = []
for media_item_id in media_item_ids:
item = session.execute(select(MediaItem).where(MediaItem._id == media_item_id, MediaItem.type.in_(["movie", "show"]))).unique().scalar_one()
items.append(item)
item = session.execute(select(MediaItem).where(MediaItem._id == media_item_id, MediaItem.type.in_(["movie", "show"]))).unique().scalar_one_or_none()
if item:
items.append(item)
return items

def get_item_by_imdb_id(imdb_id: str):
"""Retrieve a MediaItem of type 'movie' or 'show' by an IMDb ID."""
from program.media.item import MediaItem
with db.Session() as session:
item = session.execute(select(MediaItem).where(MediaItem.imdb_id == imdb_id, MediaItem.type.in_(["movie", "show"]))).unique().scalar_one_or_none()
return item

def delete_media_item(item: "MediaItem"):
"""Delete a MediaItem and all its associated relationships."""
with db.Session() as session:
Expand Down
6 changes: 6 additions & 0 deletions src/program/indexers/trakt.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ def __init__(self):
self.ids = []
self.initialized = True
self.settings = settings_manager.settings.indexer
self.failed_ids = set()

@staticmethod
def copy_attributes(source, target):
Expand Down Expand Up @@ -61,6 +62,9 @@ def run(self, in_item: MediaItem, log_msg: bool = True) -> Generator[Union[Movie
logger.error(f"Item {in_item.log_string} does not have an imdb_id, cannot index it")
return

if in_item.imdb_id in self.failed_ids:
return

item_type = in_item.type if in_item.type != "mediaitem" else None
item = create_item_from_imdb_id(imdb_id, item_type)

Expand All @@ -71,9 +75,11 @@ def run(self, in_item: MediaItem, log_msg: bool = True) -> Generator[Union[Movie
pass
else:
logger.error(f"Indexed IMDb Id {item.imdb_id} returned the wrong item type: {item.type}")
self.failed_ids.add(in_item.imdb_id)
return
else:
logger.error(f"Failed to index item with imdb_id: {in_item.imdb_id}")
self.failed_ids.add(in_item.imdb_id)
return

item = self.copy_items(in_item, item)
Expand Down
11 changes: 3 additions & 8 deletions src/program/program.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
import asyncio
import linecache
import os
import threading
import time
import traceback
from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import datetime
from queue import Empty
Expand All @@ -13,11 +11,9 @@
from rich.live import Live
from rich.progress import (
BarColumn,
MofNCompleteColumn,
Progress,
SpinnerColumn,
TextColumn,
TimeElapsedColumn,
TimeRemainingColumn,
)

Expand Down Expand Up @@ -45,8 +41,7 @@
if settings_manager.settings.tracemalloc:
import tracemalloc

from sqlalchemy import and_, exists, func, or_, select, text
from sqlalchemy.orm import joinedload
from sqlalchemy import func, select, text

import program.db.db_functions as DB
from program.db.db import db, run_migrations, vacuum_and_analyze_index_maintenance
Expand Down Expand Up @@ -359,9 +354,9 @@ def _init_db_from_symlinks(self):
added = []
errors = []
if res == 0:
logger.log("PROGRAM", "Collecting items from symlinks, this may take a while depending on library size")
items = self.services[SymlinkLibrary].run()
if settings_manager.settings.map_metadata:
logger.log("PROGRAM", "Collecting items from symlinks, this may take a while depending on library size")
items = self.services[SymlinkLibrary].run()
console = Console()
progress = Progress(
SpinnerColumn(),
Expand Down
2 changes: 1 addition & 1 deletion src/program/scrapers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def run_service(service, item,):
if total_results != len(results):
logger.debug(f"Scraped {item.log_string} with {total_results} results, removed {total_results - len(results)} duplicate hashes")

sorted_streams: Dict[str, Stream] = _parse_results(item, results)
sorted_streams: Dict[str, Stream] = _parse_results(item, results, log)

if sorted_streams and (log and settings_manager.settings.debug):
item_type = item.type.title()
Expand Down
10 changes: 6 additions & 4 deletions src/program/scrapers/shared.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from program.settings.versions import models
from utils.logger import logger

enable_aliases = settings_manager.settings.scraping.enable_aliases
settings_model = settings_manager.settings.ranking
ranking_model = models.get(settings_model.profile)
rtn = RTN(settings_model, ranking_model)
Expand All @@ -31,7 +32,7 @@ def _get_stremio_identifier(item: MediaItem) -> str:
return identifier, scrape_type, imdb_id


def _parse_results(item: MediaItem, results: Dict[str, str]) -> Dict[str, Stream]:
def _parse_results(item: MediaItem, results: Dict[str, str], log_msg: bool = True) -> Dict[str, Stream]:
"""Parse the results from the scrapers into Torrent objects."""
torrents: Set[Torrent] = set()
processed_infohashes: Set[str] = set()
Expand All @@ -52,9 +53,10 @@ def _parse_results(item: MediaItem, results: Dict[str, str]) -> Dict[str, Stream
infohash=infohash,
correct_title=correct_title,
remove_trash=settings_manager.settings.ranking.options["remove_all_trash"],
aliases=item.get_aliases(),
aliases=item.get_aliases() if enable_aliases else {} # in some cases we want to disable aliases
)


if torrent.data.country and not item.is_anime:
if _get_item_country(item) != torrent.data.country:
if settings_manager.settings.scraping.parse_debug:
Expand Down Expand Up @@ -110,11 +112,11 @@ def _parse_results(item: MediaItem, results: Dict[str, str]) -> Dict[str, Stream
# The only stuff I've seen that show up here is titles with a date.
# Dates can be sometimes parsed incorrectly by Arrow library,
# so we'll just ignore them.
if settings_manager.settings.scraping.parse_debug:
if settings_manager.settings.scraping.parse_debug and log_msg:
logger.debug(f"Skipping torrent: '{raw_title}' - {e}")
continue
except GarbageTorrent as e:
if settings_manager.settings.scraping.parse_debug:
if settings_manager.settings.scraping.parse_debug and log_msg:
logger.debug(f"Trashing torrent for {item.log_string}: '{raw_title}'")
continue

Expand Down
8 changes: 6 additions & 2 deletions src/program/settings/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@
from pathlib import Path
from typing import Any, Callable, List

from pydantic import BaseModel, field_validator
from pydantic import BaseModel, Field, field_validator
from RTN.models import SettingsModel

from program.settings.migratable import MigratableBaseModel
from utils import root_dir

deprecation_warning = "This has been deprecated and will be removed in a future version."

class Observable(MigratableBaseModel):
class Config:
Expand Down Expand Up @@ -193,6 +194,7 @@ class TorrentioConfig(Observable):
filter: str = "sort=qualitysize%7Cqualityfilter=480p,scr,cam"
url: str = "http://torrentio.strem.fun"
timeout: int = 30
ratelimit: bool = Field(default=True, deprecated=deprecation_warning)


class KnightcrawlerConfig(Observable):
Expand Down Expand Up @@ -221,6 +223,7 @@ class ZileanConfig(Observable):
enabled: bool = False
url: str = "http://localhost:8181"
timeout: int = 30
ratelimit: bool = Field(default=True, deprecated=deprecation_warning)


class MediafusionConfig(Observable):
Expand All @@ -233,7 +236,6 @@ class MediafusionConfig(Observable):
"torrentio_streams"
]


class OrionoidConfig(Observable):
enabled: bool = False
api_key: str = ""
Expand All @@ -244,6 +246,7 @@ class OrionoidConfig(Observable):
"limitcount": 5
}
timeout: int = 30
ratelimit: bool = Field(default=True, deprecated=deprecation_warning)


class JackettConfig(Observable):
Expand Down Expand Up @@ -281,6 +284,7 @@ class ScraperModel(Observable):
after_5: int = 6
after_10: int = 24
parse_debug: bool = False
enable_aliases: bool = True
torrentio: TorrentioConfig = TorrentioConfig()
knightcrawler: KnightcrawlerConfig = KnightcrawlerConfig()
jackett: JackettConfig = JackettConfig()
Expand Down
1 change: 0 additions & 1 deletion src/program/updaters/emby.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ def __init__(self):
def validate(self) -> bool:
"""Validate Emby library"""
if not self.settings.enabled:
logger.warning("Emby Updater is set to disabled.")
return False
if not self.settings.api_key:
logger.error("Emby API key is not set!")
Expand Down
1 change: 0 additions & 1 deletion src/program/updaters/jellyfin.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ def __init__(self):
def validate(self) -> bool:
"""Validate Jellyfin library"""
if not self.settings.enabled:
logger.warning("Jellyfin Updater is set to disabled.")
return False
if not self.settings.api_key:
logger.error("Jellyfin API key is not set!")
Expand Down
1 change: 0 additions & 1 deletion src/program/updaters/plex.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ def __init__(self):
def validate(self) -> bool: # noqa: C901
"""Validate Plex library"""
if not self.settings.enabled:
logger.warning("Plex Updater is set to disabled.")
return False
if not self.settings.token:
logger.error("Plex token is not set!")
Expand Down

0 comments on commit 57f23d6

Please sign in to comment.