Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added .DS_Store
Binary file not shown.
31 changes: 31 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,34 @@ __Contributors:__
- Vincent Harkins (@vharkins1)
- Marc Vergés (@marcvergees)
- Jan Sans


## Local Development Setup (Beginner Friendly)

1. Clone your fork and enter project folder:

- git clone <your-fork-url>
cd FireForm (Terminal)

2. Create virtual environment:

- python3 -m venv venv
source venv/bin/activate

3. Install dependencies:

4. Initialize database tables:

5. Run backend server:

6. Open Swagger UI in browser: (http://127.0.0.1:8000/docs)

### Common Errors

- `sqlite3.OperationalError: no such table`
→ Run database initialization step.

- `Could not connect to Ollama`
→ Ensure Ollama server is running locally.


3 changes: 2 additions & 1 deletion api/db/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,5 @@ class FormSubmission(SQLModel, table=True):
template_id: int
input_text: str
output_pdf_path: str
created_at: datetime = Field(default_factory=datetime.utcnow)
requires_review: bool = False
created_at: datetime = Field(default_factory=datetime.utcnow)
15 changes: 11 additions & 4 deletions api/routes/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,16 @@ def fill_form(form: FormFill, db: Session = Depends(get_db)):
fetched_template = get_template(db, form.template_id)

controller = Controller()
path = controller.fill_form(user_input=form.input_text, fields=fetched_template.fields, pdf_form_path=fetched_template.pdf_path)

submission = FormSubmission(**form.model_dump(), output_pdf_path=path)
return create_form(db, submission)
path, review_flag = controller.fill_form(
user_input=form.input_text,
fields=fetched_template.fields,
pdf_form_path=fetched_template.pdf_path)
if not path:
raise AppError("PDF generation failed", status_code=400)

submission = FormSubmission(
**form.model_dump(),
output_pdf_path=path,
requires_review=review_flag)

return create_form(db, submission)
9 changes: 8 additions & 1 deletion src/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,14 @@ def __init__(self):
self.file_manipulator = FileManipulator()

def fill_form(self, user_input: str, fields: list, pdf_form_path: str):
return self.file_manipulator.fill_form(user_input, fields, pdf_form_path)
path, review_flag = self.file_manipulator.fill_form(
user_input=user_input,
fields=fields,
pdf_form_path=pdf_form_path
)
return path, review_flag



def create_template(self, pdf_path: str):
return self.file_manipulator.create_template(pdf_path)
45 changes: 35 additions & 10 deletions src/file_manipulator.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
import os
import logging
from src.filler import Filler
from src.llm import LLM
from commonforms import prepare_form
from src.utils.extraction_validator import ExtractionValidator

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


class FileManipulator:
Expand All @@ -11,37 +16,57 @@ def __init__(self):

def create_template(self, pdf_path: str):
"""
By using commonforms, we create an editable .pdf template and we store it.
By using commonforms, we create an editable .pdf template and store it.
"""
template_path = pdf_path[:-4] + "_template.pdf"
prepare_form(pdf_path, template_path)
return template_path

def fill_form(self, user_input: str, fields: list, pdf_form_path: str):
"""
It receives the raw data, runs the PDF filling logic,
and returns the path to the newly created file.
Receives raw data, runs extraction + validation + PDF filling,
and returns the output file path with review flag.
"""
print("[1] Received request from frontend.")
logger.info("[1] Received request from frontend.")
print(f"[2] PDF template path: {pdf_form_path}")

if not os.path.exists(pdf_form_path):
print(f"Error: PDF template not found at {pdf_form_path}")
return None # Or raise an exception
return None, True

logger.info("[3] Starting extraction...")

print("[3] Starting extraction and PDF filling process...")
try:
self.llm._target_fields = fields
self.llm._transcript_text = user_input
output_name = self.filler.fill_form(pdf_form=pdf_form_path, llm=self.llm)

success = self.llm.extract_structured_safe()

if not success:
print("Structured extraction failed → fallback to old extraction")
self.llm.main_loop()

extracted_data = self.llm.get_data()

validator = ExtractionValidator()
validation_result = validator.validate(extracted_data)

review_flag = validation_result["requires_review"]

print("\n[4] Validation Result")
print(validation_result)

output_name = self.filler.fill_form(
pdf_form=pdf_form_path,
llm=self.llm
)

print("\n----------------------------------")
print("✅ Process Complete.")
print(f"Output saved to: {output_name}")

return output_name
return output_name, review_flag

except Exception as e:
print(f"An error occurred during PDF generation: {e}")
# Re-raise the exception so the frontend can handle it
raise e
raise e
50 changes: 45 additions & 5 deletions src/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,11 @@ def main_loop(self):
}

try:
response = requests.post(ollama_url, json=payload)
response = requests.post(ollama_url, json=payload, timeout=30)
response.raise_for_status()
except requests.exceptions.ConnectionError:
raise ConnectionError(
f"Could not connect to Ollama at {ollama_url}. "
"Please ensure Ollama is running and accessible."
except requests.exceptions.Timeout:
raise TimeoutError(
f"Ollama request timed out after 30 seconds at {ollama_url}"
)
except requests.exceptions.HTTPError as e:
raise RuntimeError(f"Ollama returned an error: {e}")
Expand Down Expand Up @@ -133,3 +132,44 @@ def handle_plural_values(self, plural_value):

def get_data(self):
return self._json




def extract_structured(self):
schema_fields = list(self._target_fields.keys())

prompt = f"""
Extract structured JSON for these fields:
{schema_fields}

Text:
{self._transcript_text}

Return only JSON.
"""

ollama_host = os.getenv("OLLAMA_HOST", "http://localhost:11434").rstrip("/")
ollama_url = f"{ollama_host}/api/generate"

payload = {
"model": "mistral",
"prompt": prompt,
"stream": False
}

response = requests.post(ollama_url, json=payload)
response.raise_for_status()

json_data = response.json()
return json_data["response"]


def extract_structured_safe(self):
try:
raw = self.extract_structured()
parsed = json.loads(raw)
self._json = parsed
return True
except Exception:
return False
6 changes: 6 additions & 0 deletions src/schemas/incident-schema.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
INCIDENT_SCHEMA = {
"location": "",
"time": "",
"severity": "",
"description": ""
}
21 changes: 21 additions & 0 deletions src/utils/extraction_validator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
class ExtractionValidator:
REQUIRED_FIELDS = ["location", "time", "severity", "description"]

def validate(self, data: dict):
missing_fields = []
confidence_score = 100

for field in self.REQUIRED_FIELDS:
value = data.get(field)

if value is None or value == "" or value == "-1":
missing_fields.append(field)
confidence_score -= 25

requires_review = len(missing_fields) > 0

return {
"requires_review": requires_review,
"missing_fields": missing_fields,
"confidence_score": confidence_score
}
11 changes: 11 additions & 0 deletions src/utils/validation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
def requires_review(data: dict, required_fields: list):
for field in required_fields:
value = data.get(field)

if value is None:
return True

if isinstance(value, str) and value.strip() in ["", "-1"]:
return True

return False
Empty file added test.pdf
Empty file.