Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 0 additions & 89 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,89 +0,0 @@
# ScholarAI Backend

Backend for the ScholarAI research assistant. This project uses FastAPI and Poetry.

## Prerequisites

- Python 3.10+ (as per `pyproject.toml`)
- Poetry (Python dependency manager)
- Docker & Docker Compose (for containerized setup)

## Quick Start

### Option 1: Local Development

1. **Install dependencies:**
```bash
poetry install
```

2. **Configure environment:**
```bash
cp env.example .env
# Edit .env with your API keys and settings
```

3. **Run the application:**
```bash
poetry run uvicorn app.main:app --reload --port 8000
```

Server will be available at `http://localhost:8000`

### Option 2: Docker Setup

1. **Configure environment:**
```bash
cp env.example .env
# Edit .env with your API keys and settings
```

2. **Start with Docker:**
```bash
# Ensure Docker network exists
docker network create docker_scholar-network

# Start the application
./scripts/docker.sh start
```

Server will be available at `http://localhost:8000`

## Environment Configuration

Copy `env.example` to `.env` and configure:

```bash
# Required
UNPAYWALL_EMAIL=your.email@example.com

# Optional (for enhanced features)
CORE_API_KEY=your_core_api_key
RABBITMQ_USER=your_rabbitmq_user
RABBITMQ_PASSWORD=your_rabbitmq_password
B2_KEY_ID=your_b2_key_id
B2_APPLICATION_KEY=your_b2_application_key
B2_BUCKET_NAME=your_b2_bucket_name
LOG_LEVEL=INFO
```

## Available Scripts

- `./scripts/docker.sh start` - Start the application with Docker
- `./scripts/docker.sh stop` - Stop the application
- `./scripts/docker.sh rebuild` - Rebuild and restart
- `./scripts/azure-setup.sh` - Set up Azure infrastructure

## Testing

```bash
# Test Unpaywall client
poetry run python test_unpaywall.py

# Run all tests
poetry run pytest
```

## Project Structure

For detailed project structure, see `docs/3_Code_Structure.md`.
20 changes: 17 additions & 3 deletions app/api/api_v1/endpoints/gap_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ class GapAnalysisSubmissionRequest(BaseModel):
url: HttpUrl
max_papers: Optional[int] = 10
validation_threshold: Optional[int] = 2
analysis_mode: Optional[str] = "deep" # "light" or "deep"

class GapAnalysisSubmissionResponse(BaseModel):
"""Response when submitting gap analysis"""
Expand Down Expand Up @@ -64,14 +65,27 @@ async def submit_gap_analysis(request: GapAnalysisSubmissionRequest):
gap_request = GapAnalysisRequest(
url=str(request.url),
max_papers=request.max_papers or 10,
validation_threshold=request.validation_threshold or 2
validation_threshold=request.validation_threshold or 2,
analysis_mode=request.analysis_mode or "deep"
)

# Submit job for background processing
job_id = await background_processor.submit_job(gap_request)

# Estimate processing time based on parameters
estimated_minutes = max(3, (gap_request.max_papers * 0.8) + 2)
# Estimate processing time based on analysis mode and parameters
if gap_request.analysis_mode == "light":
# Light analysis: 2-3 minutes
base_time = 2.5
paper_multiplier = 0.1 # Much faster per paper
else:
# Deep analysis: 10-15 minutes
base_time = 10
paper_multiplier = 0.8 # Original timing

estimated_minutes = max(
2 if gap_request.analysis_mode == "light" else 8,
base_time + (gap_request.max_papers * paper_multiplier)
)

logger.info(f"🎯 Gap analysis job {job_id} submitted for {request.url}")

Expand Down
4 changes: 4 additions & 0 deletions app/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ class Settings:
# Environment
ENVIRONMENT: str = os.getenv("ENVIRONMENT", "development")
LOG_LEVEL: str = os.getenv("LOG_LEVEL", "info")

# Gap Analysis Configuration
GAP_ANALYSIS_LIGHT_MODE_MAX_PAPERS: int = int(os.getenv("GAP_ANALYSIS_LIGHT_MODE_MAX_PAPERS", "5"))
GAP_ANALYSIS_LIGHT_MODE_VALIDATION_THRESHOLD: int = int(os.getenv("GAP_ANALYSIS_LIGHT_MODE_VALIDATION_THRESHOLD", "1"))


settings = Settings()
Expand Down
42 changes: 34 additions & 8 deletions app/services/gap_analyzer/background_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,20 +70,27 @@ async def _load_existing_jobs(self):
"""Load existing jobs from persistent storage on startup."""
try:
job_files = list(self.jobs_dir.glob("job_*.json"))
loaded_count = 0

for job_file in job_files:
try:
with open(job_file, 'r') as f:
job_data = json.load(f)

# Validate required fields
if "job_id" not in job_data or "request" not in job_data:
logger.warning(f"Invalid job file {job_file}: missing required fields")
continue

# Recreate JobInfo object from saved data
job_id = job_data["job_id"]
request = GapAnalysisRequest(**job_data["request"])
job_info = JobInfo(job_id, request)

# Restore job state
job_info.status = JobStatus(job_data["status"])
# Restore job state with defaults
job_info.status = JobStatus(job_data.get("status", "pending"))
job_info.created_at = datetime.fromisoformat(job_data["created_at"])
job_info.progress_message = job_data["progress_message"]
job_info.progress_message = job_data.get("progress_message", "Job loaded from storage")
job_info.result_file = job_data.get("result_file")

if job_data.get("started_at"):
Expand All @@ -93,16 +100,34 @@ async def _load_existing_jobs(self):
if job_data.get("error_message"):
job_info.error_message = job_data["error_message"]

# If job was running when server stopped, mark it as failed
if job_info.status == JobStatus.RUNNING:
job_info.status = JobStatus.FAILED
job_info.error_message = "Job interrupted by server restart"
job_info.progress_message = "Job failed due to server restart"
self._save_job_status(job_id)

self.jobs[job_id] = job_info
logger.info(f"Loaded existing job {job_id} from persistent storage")
loaded_count += 1
logger.info(f"✅ Loaded job {job_id} with status: {job_info.status.value}")

except Exception as e:
logger.error(f"Failed to load job from {job_file}: {str(e)}")
logger.error(f"Failed to load job from {job_file}: {str(e)}")

logger.info(f"Loaded {len(self.jobs)} existing jobs from persistent storage")
logger.info(f"🔄 Successfully loaded {loaded_count} jobs from persistent storage")

# Log summary by status
status_counts = {}
for job in self.jobs.values():
status = job.status.value
status_counts[status] = status_counts.get(status, 0) + 1

if status_counts:
status_summary = ", ".join([f"{status}: {count}" for status, count in status_counts.items()])
logger.info(f"📊 Job status summary: {status_summary}")

except Exception as e:
logger.error(f"Failed to load existing jobs: {str(e)}")
logger.error(f"💥 Failed to load existing jobs: {str(e)}")

def _save_job_status(self, job_id: str):
"""Save job status to persistent storage."""
Expand Down Expand Up @@ -316,7 +341,8 @@ async def _process_job(self, job_id: str):
job.status = JobStatus.COMPLETED
job.completed_at = datetime.utcnow()
job.result_file = result_filename
job.progress_message = f"Analysis completed! Found {len(result.validated_gaps)} validated research gaps."
gap_count = len(result.validated_gaps) if result.validated_gaps else 0
job.progress_message = f"Analysis completed! Found {gap_count} validated research gaps."

# Save final status to persistent storage
self._save_job_status(job_id)
Expand Down
4 changes: 3 additions & 1 deletion app/services/gap_analyzer/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,18 @@

from datetime import datetime
from dataclasses import dataclass, field
from typing import Dict, Any, List, Optional
from typing import Dict, Any, List, Optional, Literal
from uuid import uuid4
from pydantic import BaseModel, Field

AnalysisMode = Literal["light", "deep"]

class GapAnalysisRequest(BaseModel):
"""Request model for gap analysis"""
url: str = Field(..., description="URL of the seed paper to analyze")
max_papers: int = Field(default=10, ge=5, le=20, description="Maximum papers to analyze")
validation_threshold: int = Field(default=2, ge=1, le=5, description="Number of validation attempts per gap")
analysis_mode: AnalysisMode = Field(default="deep", description="Analysis depth mode: 'light' for fast 2-3 min analysis, 'deep' for comprehensive 10-15 min analysis")


@dataclass
Expand Down
18 changes: 15 additions & 3 deletions app/services/gap_analyzer/orchestrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ async def analyze_research_gaps(self, request: GapAnalysisRequest) -> GapAnalysi
request_id = str(uuid4())[:8]
start_time = time.time()

logger.info(f"Starting gap analysis {request_id} for paper: {request.url}")
logger.info(f"Starting {request.analysis_mode} gap analysis {request_id} for paper: {request.url}")

try:
# Phase 1: Seeding the Exploration
Expand All @@ -103,13 +103,25 @@ async def analyze_research_gaps(self, request: GapAnalysisRequest) -> GapAnalysi
if not seed_analysis:
raise Exception("Failed to analyze seed paper")

# Adjust parameters based on analysis mode
if request.analysis_mode == "light":
# Light mode: reduce scope for faster processing
max_papers = min(request.max_papers, 5) # Cap at 5 papers
validation_threshold = 1 # Minimal validation
logger.info(f"🚀 Light analysis mode: {max_papers} papers, minimal validation")
else:
# Deep mode: use full parameters
max_papers = request.max_papers
validation_threshold = request.validation_threshold
logger.info(f"🔬 Deep analysis mode: {max_papers} papers, thorough validation")

# Phase 2: Main Exploration Loop
logger.info("Phase 2: Main exploration loop...")
await self._phase_2_expanding_frontier(request.max_papers)
await self._phase_2_expanding_frontier(max_papers)

# Phase 3: Gap Validation Loop
logger.info("Phase 3: Gap validation loop...")
await self._phase_3_final_validation(request.validation_threshold)
await self._phase_3_final_validation(validation_threshold)

# Phase 4: Final Response Synthesis
logger.info("Phase 4: Final response synthesis...")
Expand Down
Loading