From 68f4cbd1f5cc661f8f11d1e4e8ab51948b5835cf Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 8 May 2026 19:08:23 +0000 Subject: [PATCH] optimize plex sync: docker exec, single async loop, version-controlled crontab MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three independent improvements that together address the OOM-kills-bot problem caused by the every-10-minute plex sync: 1. dcp-django-admin.sh prefers \`docker compose exec\` against the running web container over \`docker compose run --rm\`. Each sync invocation no longer pays the cost of spinning up a fresh ~150 MB ephemeral container. Falls back to \`compose run\` when web isn't running (e.g. first migrate). Also reads POSTGRES_HOST from .env directly instead of spawning a container just to inspect env, and skips the confirmation prompt when stdin isn't a TTY (so cron and GitHub Actions don't hang). 2. SyncWithPlexCommand now does sync-then-async-fanout: phase 1 walks Plex movies and persists them; phase 2 runs ONE asyncio.run wrapping all enrichments inside a single shared aiohttp.ClientSession via asyncio.gather. The old code spun up a fresh event loop and ClientSession per movie. EnrichMovieActorsCommand.execute now takes an optional session parameter so the existing manual backfill command (manage.py enrich_actors) keeps working unchanged. 3. crontab.txt commits the schedule to the repo, plus a \`just install-crontab\` recipe to apply it on the server. No scheduler container required — the existing schedule is now version-controlled instead of living only on the VPS. --- packages/django-app/app/plex/commands.py | 146 ++++++++++---------- packages/django-app/bin/dcp-django-admin.sh | 59 ++++---- packages/django-app/crontab.txt | 9 ++ packages/django-app/justfile | 5 + 4 files changed, 118 insertions(+), 101 deletions(-) create mode 100644 packages/django-app/crontab.txt diff --git a/packages/django-app/app/plex/commands.py b/packages/django-app/app/plex/commands.py index 57184a2..ad21967 100644 --- a/packages/django-app/app/plex/commands.py +++ b/packages/django-app/app/plex/commands.py @@ -14,116 +14,116 @@ class SyncWithPlexCommand(AbstractBaseCommand): """ - Sync Oscarr's database with the movies on the Plex. - Stops syncing when we get to a movie that was added - before the latest movie in the database. + Sync Oscarr's database with the movies on Plex. Stops syncing when we + reach a movie that was added before the latest movie in the database. - Unfortunately the Plex API doesn't allow us to filter - by addedAt, so we have to get a page of movies. + Plex API doesn't allow filtering by addedAt, so we page through movies + sorted addedAt:desc and bail when we hit one we've already seen. """ def execute(self) -> None: super().execute() latest_movie = PlexMovieRepository.get_latest() + forms_to_enrich: list[EnrichMovieActorsForm] = [] for movie in Plex.fetch_movies(sort="addedAt:desc", container_start=0, container_size=5): added_at = Plex.normalize_added_at(movie.addedAt) if latest_movie and added_at <= latest_movie.created_at: - # break out of loop if we start to get a movie - # added before the latest movie in the database - return + break try: movie_details = Plex.extract_movie_details(movie) plex_movie = PlexMovieRepository.get_or_create(movie_details) - plex_movie.created_at = added_at + plex_movie.save() + print(f"Created PlexMovie: {plex_movie}") - # Enrich actors with TMDB data - try: - form = EnrichMovieActorsForm({"movie": plex_movie.id, "max_actors": 30}) - if form.is_valid(): - command = EnrichMovieActorsCommand(form) - asyncio.run(command.execute()) - else: - logger.warning( - f"Invalid form for enriching {plex_movie.title}: {form.errors}" - ) - except Exception as e: - logger.warning(f"Failed to enrich actors for {plex_movie.title}: {e}") - plex_movie.save() + form = EnrichMovieActorsForm({"movie": plex_movie.id, "max_actors": 30}) + if form.is_valid(): + forms_to_enrich.append(form) else: - # Only save if enrichment didn't happen (it saves itself via repository) - if not plex_movie.actors_enriched_at: - plex_movie.save() - - print(f"Created PlexMovie: {plex_movie}") - except Exception as e: + logger.warning(f"Invalid form for enriching {plex_movie.title}: {form.errors}") + except Exception: logger.exception(f"Failed to create PlexMovie: {movie}") - logger.exception(e) + + if forms_to_enrich: + asyncio.run(_enrich_all(forms_to_enrich)) + + +async def _enrich_all(forms: list[EnrichMovieActorsForm]) -> None: + """ + Run all enrichments inside one event loop with a shared HTTP session, + instead of one event loop + one ClientSession per movie. + """ + async with ClientSession() as session: + results = await asyncio.gather( + *(EnrichMovieActorsCommand(form).execute(session=session) for form in forms), + return_exceptions=True, + ) + for form, result in zip(forms, results, strict=True): + if isinstance(result, Exception): + movie = form.cleaned_data.get("movie") + title = getattr(movie, "title", "") + logger.warning(f"Failed to enrich actors for {title}: {result}") class EnrichMovieActorsCommand(AbstractBaseCommand): """ - Command to enrich a movie's actor list with data from TMDB. + Enrich a movie's actor list with data from TMDB. """ def __init__(self, form: EnrichMovieActorsForm): self.form = form - async def execute(self): - """ - Execute the enrichment asynchronously. - Updates the movie instance with enriched actors and TMDB ID. - """ + async def execute(self, session: ClientSession | None = None): super().execute() movie = self.form.cleaned_data["movie"] max_actors = self.form.cleaned_data["max_actors"] - async with ClientSession() as session: - plex_actors = list(movie.actors) if movie.actors else [] - tmdb_id = movie.tmdb_id - - # If we don't have a TMDB ID, search by title and year + if session is None: + async with ClientSession() as new_session: + await self._do_enrich(movie, max_actors, new_session) + else: + await self._do_enrich(movie, max_actors, session) + + async def _do_enrich(self, movie, max_actors: int, session: ClientSession) -> None: + plex_actors = list(movie.actors) if movie.actors else [] + tmdb_id = movie.tmdb_id + + if not tmdb_id: + logger.info(f"Searching TMDB for {movie.title} ({movie.year})...") + tmdb_id = await TMDB.find_movie_id( + str(movie.title), + movie.year, + session, # type: ignore[arg-type] + ) if not tmdb_id: - logger.info(f"Searching TMDB for {movie.title} ({movie.year})...") - tmdb_id = await TMDB.find_movie_id( - str(movie.title), - movie.year, - session, # type: ignore[arg-type] - ) - if not tmdb_id: - logger.warning(f"Could not find TMDB ID for {movie.title}") - return - - # Fetch credits from TMDB - try: - credits = await TMDB.get_movie_credits(tmdb_id, session) - tmdb_actors = [actor["name"] for actor in credits.get("cast", [])] + logger.warning(f"Could not find TMDB ID for {movie.title}") + return - # Merge actors: Plex actors first, then TMDB actors not in the list - enriched_actors = plex_actors.copy() - plex_actors_lower = {actor.lower() for actor in enriched_actors} + try: + credits = await TMDB.get_movie_credits(tmdb_id, session) + tmdb_actors = [actor["name"] for actor in credits.get("cast", [])] - for tmdb_actor in tmdb_actors: - if tmdb_actor.lower() not in plex_actors_lower: - enriched_actors.append(tmdb_actor) + enriched_actors = plex_actors.copy() + plex_actors_lower = {actor.lower() for actor in enriched_actors} - # Limit to max_actors - enriched_actors = enriched_actors[:max_actors] + for tmdb_actor in tmdb_actors: + if tmdb_actor.lower() not in plex_actors_lower: + enriched_actors.append(tmdb_actor) - # Update the movie via repository - await PlexMovieRepository.update_movie_actors_async( - movie=movie, actors=enriched_actors, tmdb_id=tmdb_id - ) + enriched_actors = enriched_actors[:max_actors] - logger.info( - f"Enriched {movie.title}: {len(plex_actors)} Plex + " - f"{len(tmdb_actors)} TMDB = {len(enriched_actors)} total" - ) + await PlexMovieRepository.update_movie_actors_async( + movie=movie, actors=enriched_actors, tmdb_id=tmdb_id + ) - except Exception as e: - logger.exception(f"Error enriching actors for {movie.title}: {e}") - raise + logger.info( + f"Enriched {movie.title}: {len(plex_actors)} Plex + " + f"{len(tmdb_actors)} TMDB = {len(enriched_actors)} total" + ) + except Exception as e: + logger.exception(f"Error enriching actors for {movie.title}: {e}") + raise diff --git a/packages/django-app/bin/dcp-django-admin.sh b/packages/django-app/bin/dcp-django-admin.sh index 2b26215..f5db620 100755 --- a/packages/django-app/bin/dcp-django-admin.sh +++ b/packages/django-app/bin/dcp-django-admin.sh @@ -1,33 +1,36 @@ -#! /bin/bash +#!/bin/bash -# proxy to execute `manage.py` (django-admin) commands in web container +# Proxy to run `manage.py` (django-admin) commands inside the web container. +# +# Prefers `docker compose exec` against an already-running web container so +# every invocation doesn't pay the cost of spinning up a fresh ephemeral +# container (~150 MB on a heavy Django image). Falls back to `compose run +# --rm` only when web isn't running. -function checkenv() { - ############################################################## - # check user's confidence if we are not using local database # - ############################################################## +set -e - # get db host envar from docker container - DB_HOST_ENVAR=$(docker compose run --rm -w /code/app web env | grep POSTGRES_HOST) - DB_HOST=$(cut -d "=" -f2 <<< "$DB_HOST_ENVAR") - # bashism to trim newline - DB_HOST=${DB_HOST//[$'\t\r\n']} - if [ "$DB_HOST" != 'db' ] && [ "$DB_HOST" != '0.0.0.0' ] && [ "$DB_HOST" != 'localhost' ] - then - echo "You are running this command against the database at ${DB_HOST}!" - checkconfidence - fi -} +# Confirmation: only when targeting a non-local DB AND we have a TTY. +# Reads POSTGRES_HOST from .env directly instead of spawning a container +# just to inspect env, which the old script did. +if [ -t 0 ] && [ -f .env ]; then + DB_HOST=$(grep -E '^POSTGRES_HOST=' .env | cut -d '=' -f2 | tr -d '[:space:]') + case "$DB_HOST" in + ''|db|0.0.0.0|localhost) ;; + *) + echo "You are running this command against the database at ${DB_HOST}!" + read -r -p "Are you sure you want to continue? [y/N] " response + [[ "$response" =~ ^([yY][eE][sS]|[yY])$ ]] || exit 1 + ;; + esac +fi -function checkconfidence() { - read -r -p "Are you sure you want to continue? [y/N] " response - if [[ "$response" =~ ^([yY][eE][sS]|[yY])$ ]] - then - return - else - exit - fi -} +# `-T` disables TTY allocation, required when invoked from cron / CI / SSH +# action where stdin isn't a TTY. +EXEC_FLAGS="" +[ ! -t 0 ] && EXEC_FLAGS="-T" -checkenv -docker compose run --rm -w /code/app web /code/app/manage.py "$@" +if docker compose ps -q web 2>/dev/null | grep -q .; then + exec docker compose exec $EXEC_FLAGS -w /code/app web /code/app/manage.py "$@" +else + exec docker compose run --rm $EXEC_FLAGS -w /code/app web /code/app/manage.py "$@" +fi diff --git a/packages/django-app/crontab.txt b/packages/django-app/crontab.txt new file mode 100644 index 0000000..8d9d986 --- /dev/null +++ b/packages/django-app/crontab.txt @@ -0,0 +1,9 @@ +# oscarr scheduled jobs. +# Install (or update) on the server with: just install-crontab + +# nightly backups +0 7 * * * cd /root/oscarr-stuff/oscarr/packages/django-app && just create-json-backup /mnt/volume_sfo3_01/oscarr/backups +0 7 * * * cd /root/oscarr-stuff/oscarr/packages/django-app && just create-pgdump /mnt/volume_sfo3_01/oscarr/backups + +# sync with plex every 10 minutes +*/10 * * * * cd /root/oscarr-stuff/oscarr/packages/django-app && just sync-with-plex diff --git a/packages/django-app/justfile b/packages/django-app/justfile index 655ac7b..b7048d6 100644 --- a/packages/django-app/justfile +++ b/packages/django-app/justfile @@ -7,6 +7,11 @@ default: copy-env: test -f .env || cp .env.template .env +# install crontab.txt as the active crontab (replaces existing entries) +install-crontab: + crontab crontab.txt + @echo "installed:" && crontab -l + # calls sync_with_plex django management command in the container sync-with-plex: ./bin/dcp-django-admin.sh sync_with_plex