From 1b359d72ade506eb3a8fa6adc52b4bca7769d7c8 Mon Sep 17 00:00:00 2001 From: mauryasameer Date: Wed, 8 Apr 2026 00:40:16 +0530 Subject: [PATCH 1/2] fix(memory): prevent OOM crashes by caching OCR engine instances - In `OCRFactory`, cached instantiated engines in `_instances` dict to prevent multi-GB memory leaks on every Gradio request. - Explicitly disabled `use_gpu` and `show_log` during `PaddleOCR` initialization to minimize host probing and stdout noise in constrained environments. --- src/providers/paddle_provider.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/src/providers/paddle_provider.py b/src/providers/paddle_provider.py index 967f201..ff4d581 100644 --- a/src/providers/paddle_provider.py +++ b/src/providers/paddle_provider.py @@ -27,7 +27,13 @@ def __init__(self, lang: str = "en", use_angle_cls: bool = True, enable_mkldnn: if not _PADDLE_AVAILABLE: raise ImportError("paddleocr not installed. Run: pip install paddleocr") logger.info("Initializing PaddleOCR (lang=%s)...", lang) - self.ocr = _PaddleOCR(use_angle_cls=use_angle_cls, lang=lang, enable_mkldnn=enable_mkldnn) + self.ocr = _PaddleOCR( + use_angle_cls=use_angle_cls, + lang=lang, + enable_mkldnn=enable_mkldnn, + use_gpu=False, + show_log=False + ) def predict(self, image_path: str): return self.ocr.predict(image_path) @@ -80,6 +86,7 @@ class OCRFactory: """Factory for instantiating and registering OCR engine providers.""" _engines: dict = {"paddle": PaddleOCREngine} + _instances: dict = {} @classmethod def register_engine(cls, name: str, engine_class: type) -> None: @@ -88,6 +95,9 @@ def register_engine(cls, name: str, engine_class: type) -> None: @classmethod def get_engine(cls, name: str = "paddle", **kwargs) -> BaseOCREngine: name = name.lower() + if name in cls._instances: + return cls._instances[name] + if name not in cls._engines: if name == "easyocr": from src.providers.easyocr_provider import EasyOCREngine @@ -98,7 +108,10 @@ def get_engine(cls, name: str = "paddle", **kwargs) -> BaseOCREngine: engine_class = cls._engines.get(name) if not engine_class: raise ValueError(f"OCR Engine '{name}' not found or not installed.") - return engine_class(**kwargs) + + instance = engine_class(**kwargs) + cls._instances[name] = instance + return instance @classmethod def list_available_engines(cls) -> list[str]: From b1551f292fe9f8b1ed4ce710f5b253ec24c1211f Mon Sep 17 00:00:00 2001 From: mauryasameer Date: Wed, 8 Apr 2026 00:41:56 +0530 Subject: [PATCH 2/2] fix(lint): remove trailing whitespace --- src/providers/paddle_provider.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/providers/paddle_provider.py b/src/providers/paddle_provider.py index ff4d581..7383297 100644 --- a/src/providers/paddle_provider.py +++ b/src/providers/paddle_provider.py @@ -108,7 +108,7 @@ def get_engine(cls, name: str = "paddle", **kwargs) -> BaseOCREngine: engine_class = cls._engines.get(name) if not engine_class: raise ValueError(f"OCR Engine '{name}' not found or not installed.") - + instance = engine_class(**kwargs) cls._instances[name] = instance return instance