diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..c106a9f Binary files /dev/null and b/.DS_Store differ diff --git a/README.md b/README.md index 42862e3..b717bec 100644 --- a/README.md +++ b/README.md @@ -55,3 +55,34 @@ __Contributors:__ - Vincent Harkins (@vharkins1) - Marc Vergés (@marcvergees) - Jan Sans + + +## Local Development Setup (Beginner Friendly) + +1. Clone your fork and enter project folder: + + - git clone + cd FireForm (Terminal) + +2. Create virtual environment: + + - python3 -m venv venv + source venv/bin/activate + +3. Install dependencies: + +4. Initialize database tables: + +5. Run backend server: + +6. Open Swagger UI in browser: (http://127.0.0.1:8000/docs) + +### Common Errors + +- `sqlite3.OperationalError: no such table` +→ Run database initialization step. + +- `Could not connect to Ollama` +→ Ensure Ollama server is running locally. + + diff --git a/api/db/models.py b/api/db/models.py index f76c93b..d237f82 100644 --- a/api/db/models.py +++ b/api/db/models.py @@ -15,4 +15,5 @@ class FormSubmission(SQLModel, table=True): template_id: int input_text: str output_pdf_path: str - created_at: datetime = Field(default_factory=datetime.utcnow) \ No newline at end of file + requires_review: bool = False + created_at: datetime = Field(default_factory=datetime.utcnow) diff --git a/api/routes/forms.py b/api/routes/forms.py index f3430ed..c281aea 100644 --- a/api/routes/forms.py +++ b/api/routes/forms.py @@ -17,9 +17,16 @@ def fill_form(form: FormFill, db: Session = Depends(get_db)): fetched_template = get_template(db, form.template_id) controller = Controller() - path = controller.fill_form(user_input=form.input_text, fields=fetched_template.fields, pdf_form_path=fetched_template.pdf_path) - - submission = FormSubmission(**form.model_dump(), output_pdf_path=path) - return create_form(db, submission) + path, review_flag = controller.fill_form( + user_input=form.input_text, + fields=fetched_template.fields, + pdf_form_path=fetched_template.pdf_path) + if not path: + raise AppError("PDF generation failed", status_code=400) + submission = FormSubmission( + **form.model_dump(), + output_pdf_path=path, + requires_review=review_flag) + return create_form(db, submission) \ No newline at end of file diff --git a/src/controller.py b/src/controller.py index d31ec9c..c761780 100644 --- a/src/controller.py +++ b/src/controller.py @@ -5,7 +5,14 @@ def __init__(self): self.file_manipulator = FileManipulator() def fill_form(self, user_input: str, fields: list, pdf_form_path: str): - return self.file_manipulator.fill_form(user_input, fields, pdf_form_path) + path, review_flag = self.file_manipulator.fill_form( + user_input=user_input, + fields=fields, + pdf_form_path=pdf_form_path + ) + return path, review_flag + + def create_template(self, pdf_path: str): return self.file_manipulator.create_template(pdf_path) \ No newline at end of file diff --git a/src/file_manipulator.py b/src/file_manipulator.py index b7815cc..78bcc6a 100644 --- a/src/file_manipulator.py +++ b/src/file_manipulator.py @@ -1,7 +1,12 @@ import os +import logging from src.filler import Filler from src.llm import LLM from commonforms import prepare_form +from src.utils.extraction_validator import ExtractionValidator + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) class FileManipulator: @@ -11,7 +16,7 @@ def __init__(self): def create_template(self, pdf_path: str): """ - By using commonforms, we create an editable .pdf template and we store it. + By using commonforms, we create an editable .pdf template and store it. """ template_path = pdf_path[:-4] + "_template.pdf" prepare_form(pdf_path, template_path) @@ -19,29 +24,49 @@ def create_template(self, pdf_path: str): def fill_form(self, user_input: str, fields: list, pdf_form_path: str): """ - It receives the raw data, runs the PDF filling logic, - and returns the path to the newly created file. + Receives raw data, runs extraction + validation + PDF filling, + and returns the output file path with review flag. """ - print("[1] Received request from frontend.") + logger.info("[1] Received request from frontend.") print(f"[2] PDF template path: {pdf_form_path}") if not os.path.exists(pdf_form_path): print(f"Error: PDF template not found at {pdf_form_path}") - return None # Or raise an exception + return None, True + + logger.info("[3] Starting extraction...") - print("[3] Starting extraction and PDF filling process...") try: self.llm._target_fields = fields self.llm._transcript_text = user_input - output_name = self.filler.fill_form(pdf_form=pdf_form_path, llm=self.llm) + + success = self.llm.extract_structured_safe() + + if not success: + print("Structured extraction failed → fallback to old extraction") + self.llm.main_loop() + + extracted_data = self.llm.get_data() + + validator = ExtractionValidator() + validation_result = validator.validate(extracted_data) + + review_flag = validation_result["requires_review"] + + print("\n[4] Validation Result") + print(validation_result) + + output_name = self.filler.fill_form( + pdf_form=pdf_form_path, + llm=self.llm + ) print("\n----------------------------------") print("✅ Process Complete.") print(f"Output saved to: {output_name}") - return output_name + return output_name, review_flag except Exception as e: print(f"An error occurred during PDF generation: {e}") - # Re-raise the exception so the frontend can handle it - raise e + raise e \ No newline at end of file diff --git a/src/llm.py b/src/llm.py index 70937f9..9edbb8a 100644 --- a/src/llm.py +++ b/src/llm.py @@ -60,12 +60,11 @@ def main_loop(self): } try: - response = requests.post(ollama_url, json=payload) + response = requests.post(ollama_url, json=payload, timeout=30) response.raise_for_status() - except requests.exceptions.ConnectionError: - raise ConnectionError( - f"Could not connect to Ollama at {ollama_url}. " - "Please ensure Ollama is running and accessible." + except requests.exceptions.Timeout: + raise TimeoutError( + f"Ollama request timed out after 30 seconds at {ollama_url}" ) except requests.exceptions.HTTPError as e: raise RuntimeError(f"Ollama returned an error: {e}") @@ -133,3 +132,44 @@ def handle_plural_values(self, plural_value): def get_data(self): return self._json + + + + +def extract_structured(self): + schema_fields = list(self._target_fields.keys()) + + prompt = f""" +Extract structured JSON for these fields: +{schema_fields} + +Text: +{self._transcript_text} + +Return only JSON. +""" + + ollama_host = os.getenv("OLLAMA_HOST", "http://localhost:11434").rstrip("/") + ollama_url = f"{ollama_host}/api/generate" + + payload = { + "model": "mistral", + "prompt": prompt, + "stream": False + } + + response = requests.post(ollama_url, json=payload) + response.raise_for_status() + + json_data = response.json() + return json_data["response"] + + +def extract_structured_safe(self): + try: + raw = self.extract_structured() + parsed = json.loads(raw) + self._json = parsed + return True + except Exception: + return False \ No newline at end of file diff --git a/src/schemas/incident-schema.py b/src/schemas/incident-schema.py new file mode 100644 index 0000000..054054a --- /dev/null +++ b/src/schemas/incident-schema.py @@ -0,0 +1,6 @@ +INCIDENT_SCHEMA = { + "location": "", + "time": "", + "severity": "", + "description": "" +} \ No newline at end of file diff --git a/src/utils/extraction_validator.py b/src/utils/extraction_validator.py new file mode 100644 index 0000000..eb26f76 --- /dev/null +++ b/src/utils/extraction_validator.py @@ -0,0 +1,21 @@ +class ExtractionValidator: + REQUIRED_FIELDS = ["location", "time", "severity", "description"] + + def validate(self, data: dict): + missing_fields = [] + confidence_score = 100 + + for field in self.REQUIRED_FIELDS: + value = data.get(field) + + if value is None or value == "" or value == "-1": + missing_fields.append(field) + confidence_score -= 25 + + requires_review = len(missing_fields) > 0 + + return { + "requires_review": requires_review, + "missing_fields": missing_fields, + "confidence_score": confidence_score + } \ No newline at end of file diff --git a/src/utils/validation.py b/src/utils/validation.py new file mode 100644 index 0000000..5ecc5c1 --- /dev/null +++ b/src/utils/validation.py @@ -0,0 +1,11 @@ +def requires_review(data: dict, required_fields: list): + for field in required_fields: + value = data.get(field) + + if value is None: + return True + + if isinstance(value, str) and value.strip() in ["", "-1"]: + return True + + return False \ No newline at end of file diff --git a/test.pdf b/test.pdf new file mode 100644 index 0000000..e69de29