From 23025a0acd05a2a5fe6a5ea8b92075459ad62a51 Mon Sep 17 00:00:00 2001 From: Erlonidas Date: Thu, 19 Feb 2026 19:20:58 -0300 Subject: [PATCH] refact: force the use of GPU during double steps PDFs extraction content --- demo_page.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/demo_page.py b/demo_page.py index 99362d5..35a4b27 100644 --- a/demo_page.py +++ b/demo_page.py @@ -24,17 +24,25 @@ def __init__(self, model_id_or_path): """ # Load model from local path or Hugging Face hub self.processor = AutoProcessor.from_pretrained(model_id_or_path) - self.model = Qwen2_5_VLForConditionalGeneration.from_pretrained(model_id_or_path) - self.model.eval() - # Set device and precision + # Set device and force GPU usage self.device = "cuda" if torch.cuda.is_available() else "cpu" - self.model.to(self.device) - + if self.device == "cuda": - self.model = self.model.bfloat16() + print(f"🚀 Loading model on GPU: {torch.cuda.get_device_name(0)}") + # Load model directly on GPU with optimized settings + self.model = Qwen2_5_VLForConditionalGeneration.from_pretrained( + model_id_or_path, + torch_dtype=torch.bfloat16, + device_map="cuda:0" + ) else: + print("⚠️ GPU not available, using CPU (will be slower)") + self.model = Qwen2_5_VLForConditionalGeneration.from_pretrained(model_id_or_path) self.model = self.model.float() + self.model.to(self.device) + + self.model.eval() # set tokenizer self.tokenizer = self.processor.tokenizer