diff --git a/docs/data.json b/docs/data.json new file mode 100644 index 000000000..8af77c054 --- /dev/null +++ b/docs/data.json @@ -0,0 +1,200 @@ +{ + "contributors": [ + { + "name": "Abhishek Verma", + "github": "https://github.com/abhishekverma2323", + "skills": [ + "python", + "machine learning", + "C/C++", + "sql", + "data analysis" + ], + "interests": [ + "ai agents", + "nlp", + "real-world ai systems", + "automation" + ], + "track": "A: Agent Builders", + "level": 1 + }, + { + "name": "Amaan Khan", + "github": "amaankhan6828", + "skills": [ + "c++", + "python", + "numpy", + "machine learning" + ], + "interests": [ + "ai agents", + "automation", + "data science", + "real world problem solving" + ], + "track": "", + "level": 1 + }, + { + "name": "Anupaul Saikia", + "github": "Saikia05", + "skills": [ + "python", + "SQL", + "C/C++", + "javascript", + "blender", + "fusion 360", + "inventor" + ], + "interests": [ + "agents", + "3D", + "large-scale machines", + "manufacturing", + "generative design", + "robotics", + "agrotechnology" + ], + "track": "", + "level": 1 + }, + { + "name": "Bassel Mazhar", + "github": "basselmazhar1", + "skills": [ + "python", + "javascript" + ], + "interests": [ + "AI agents", + "LLMs" + ], + "track": "", + "level": 1 + }, + { + "name": "John Ringbert", + "github": "laserkavaj1", + "skills": [ + "Networking", + "Cybersecurity", + "Troubleshooting" + ], + "interests": [ + "F1", + "Soccer", + "Fishing", + "Skiing" + ], + "track": "E: QA & Security", + "level": 1 + }, + { + "name": "Kailash Narayana Prasad", + "github": "KailashNp", + "skills": [ + "C++", + "Python", + "Java", + "SQL", + "Cybersecurity (Network Security, Web Security, OWASP Top 10, Vulnerability Assessment, Penetration Testing basics)", + "Cryptography (Encryption/Decryption, Hashing, Digital Signatures, Public Key Infrastructure)", + "Blockchain (Decentralization, Consensus Mechanisms, Smart Contracts basics)", + "Secure Data Storage", + "HTML/CSS", + "Android Development (Kotlin, Jetpack Compose)", + "Git & GitHub", + "Problem Solving", + "Analytical Thinking", + "Technical Troubleshooting", + "Team Collaboration" + ], + "interests": [ + "AI Agents & Autonomous Systems", + "Agent Security & Adversarial Attacks", + "Cybersecurity & Ethical Hacking", + "Network & Web Security", + "Cryptography & Secure Communication", + "Blockchain & Decentralized Systems", + "Natural Language Processing (NLP)", + "Intelligent Systems & Automation" + ], + "track": "E: QA & Security", + "level": 1 + }, + { + "name": "Vineet Sharma", + "github": "vineetsharma18", + "skills": [ + "javascript", + "react", + "MySQL", + "PostgreSQL", + "Next.js", + "C", + "C++", + "html", + "css" + ], + "interests": [ + "ai", + "agents", + "web dev", + "NLP", + "3D", + "security" + ], + "track": "", + "level": 1 + }, + { + "name": "Yash Maheshwari", + "github": "yashm0910", + "skills": [ + "Python", + "FastAPI", + "RAG", + "LLMs", + "Langchain", + "LangGraph", + "Machine Learning", + "NLP", + "SQL", + "Streamlit" + ], + "interests": [ + "AI systems", + "agent workflows", + "retrieval-augmented generation", + "backend engineering", + "system reliability", + "evaluation and feedback loops" + ], + "track": "A: Agent Builders", + "level": 1 + }, + { + "name": "bhavesh", + "github": "bhavesh4323", + "skills": [ + "c++", + "python", + "numpy", + "machine learning", + "mongodb" + ], + "interests": [ + "ai agents", + "blockchain", + "cryptocurrencies", + "real world problem solving" + ], + "track": "A: Agent Builders", + "level": 1 + } + ], + "generated": true +} \ No newline at end of file diff --git a/submissions/vansh-singhal/HOW_I_DID_IT.md b/submissions/vansh-singhal/HOW_I_DID_IT.md new file mode 100644 index 000000000..c3719a2fe --- /dev/null +++ b/submissions/vansh-singhal/HOW_I_DID_IT.md @@ -0,0 +1,12 @@ +How I Did It – Level 2 + +What I did, step by step +I cloned the project and opened it in VS Code. Then I ran npm install to install all dependencies, followed by npm run build to compile the project. After that, I executed npm run test-client to verify everything was working, and it ran successfully. + +For the LLM setup, I installed Ollama and initially tried running a model, but faced issues. To avoid those problems, I switched to a lightweight model (TinyLlama), which ran smoothly. I tested it with a simple prompt to confirm it was working properly. + +What problems I hit and how I solved them +The main issue I faced was while using Ollama. The model I initially tried didn’t run properly on my system, likely due to resource constraints. After identifying this, I switched to a smaller, lightweight model (TinyLlama), which resolved the issue and worked without any errors. + +What I learned that I didn’t know before +This task helped me understand the complete project setup workflow—from installing dependencies to building and testing. diff --git a/submissions/vansh-singhal/HOW_I_DID_IT_level3.md b/submissions/vansh-singhal/HOW_I_DID_IT_level3.md new file mode 100644 index 000000000..48f3bb3c2 --- /dev/null +++ b/submissions/vansh-singhal/HOW_I_DID_IT_level3.md @@ -0,0 +1,107 @@ + +# HOW_I_DID_IT.md => level3 + +## What I Did, Step by Step + +Identified real-world digital twin implementations outside LPI knowledge base + +Reviewed the existing LPI case studies to avoid duplication across domains such as aerospace, manufacturing, and generic smart city examples. Focused on selecting implementations that are not commonly cited in standard digital twin literature. + +Selected 3 domains with distinct characteristics: urban governance (Chennai), transport infrastructure (Aachen), and renewable energy optimization (GE Wind Farm). This ensured diversity in system scale, data complexity, and application outcomes. + +Researched Chennai Smart City Digital Twin + +Source: Public reports and news coverage on Chennai’s AI-based urban digital twin initiative +Focused on flood prediction and traffic congestion as core use cases +Identified challenge (fragmented urban data and reactive disaster management), approach (integration of IoT, GIS, and simulation models), and outcome (predictive flood management and improved coordination) +Mapped to SMILE phases: System Understanding (urban modeling), Measurement (sensor data), Intervention (scenario simulation), Learning (feedback loops), Evolution (urban data observatory) + +Researched Aachen City Digital Twin + +Source: European urban mobility case study documentation +Focus: transport network optimization and cross-department coordination +Identified challenge (lack of shared infrastructure visibility), approach (city-scale simulation of road networks and traffic), and outcome (improved planning efficiency and congestion reduction) +Mapped to SMILE phases: System Understanding (road network modeling), Measurement (traffic data), Intervention (simulation-based planning), Learning (AI-based congestion prediction), Evolution (institutional integration) + +Researched GE Digital Wind Farm + +Source: Industry analysis and documented case examples of GE digital twin deployment +Focus: energy output optimization and predictive maintenance +Identified challenge (variability in turbine performance), approach (integration of IoT data with machine learning models), and outcome (increased efficiency and reduced downtime) +Mapped to SMILE phases: System Understanding (turbine modeling), Measurement (telemetry data), Intervention (dynamic configuration), Learning (ML-based prediction), Evolution (continuous optimization) + +Analyzed SMILE phase alignment + +Created a comparison across all three implementations to identify phase coverage and maturity +Observed that all implementations follow a consistent progression from system modeling to continuous improvement +Identified that none of the systems skip simulation (Intervention phase), confirming its importance in real-world deployments + +Cited all sources formally + +Used publicly available reports, case studies, and credible publications +Ensured all information used is non-confidential and verifiable +Maintained consistency in describing outcomes and approaches + +--- + +## Problems I Faced and How I Solved Them + +Problem 1: Distinguishing digital twin from general smart systems + +Many systems labeled as “digital twins” are actually dashboards or IoT monitoring platforms without simulation capability +Solution: Applied strict filtering criteria — system must include (a) virtual model, (b) real-time synchronization, (c) simulation capability, and (d) measurable outcomes +This eliminated several smart city projects that lacked predictive or simulation components + +Problem 2: Limited availability of measurable outcomes + +Urban and infrastructure projects often report qualitative benefits instead of quantified results +Solution: Prioritized implementations with reported improvements such as congestion reduction, efficiency gains, or predictive accuracy +Where exact metrics were limited, inferred outcomes were validated through multiple sources + +Problem 3: Avoiding overlap with common digital twin examples + +Many widely cited examples (e.g., aerospace and manufacturing leaders) are already part of standard knowledge bases +Solution: Selected less commonly analyzed but still real and deployed systems such as Chennai and Aachen +This ensured originality while maintaining credibility + +Problem 4: Mapping real-world systems to SMILE framework + +These implementations were not originally designed using SMILE methodology +Solution: Performed retrospective mapping by analyzing system architecture and lifecycle behavior +Demonstrated that these systems naturally align with SMILE phases despite independent development + +--- + +## What I Learned + +Simulation (Intervention phase) is a non-negotiable component + +All three implementations rely on simulation before real-world execution +Chennai simulates flood scenarios, Aachen simulates traffic flow, and GE simulates turbine performance +Implication: Digital twins without simulation are incomplete systems + +Data integration is the primary bottleneck + +Chennai required integration across multiple government departments +Aachen required coordination between infrastructure stakeholders +GE required combining environmental and operational data +Implication: The challenge is organizational and data-related, not purely technical + +Continuous learning drives long-term value + +Short-term benefits come from modeling and simulation +Long-term value comes from systems that learn and improve over time +GE’s system shows the strongest example of continuous optimization + +Explainability is critical for adoption + +Decision-makers rely on systems they can interpret and trust +Urban systems require transparency for governance decisions +Industrial systems require explainable outputs for operational reliability +Implication: Explainability must be embedded in decision-support layers + +Digital twins follow a consistent lifecycle pattern + +All three implementations independently follow a progression similar to SMILE +This suggests that SMILE represents a generalized structure of successful digital twin systems rather than a theoretical framework + diff --git a/submissions/vansh-singhal/level2.md b/submissions/vansh-singhal/level2.md new file mode 100644 index 000000000..c48198020 --- /dev/null +++ b/submissions/vansh-singhal/level2.md @@ -0,0 +1,70 @@ +# Level 2 Submission - Vansh Singhal + +## Tracks Selected +**Track A:** Agent Builders + +## LPI Sandbox Execution +``` +> npm run build && node dist/test-client.js + + +> lpi-developer-kit@1.0.0 build +> tsc + +=== LPI Sandbox Test Client === + +[LPI Sandbox] Server started — 7 read-only tools available +Connected to LPI Sandbox + +Available tools (7): + - smile_overview: Get an overview of the S.M.I.L.E. methodology (Sustainable Methodology for Impac... + - smile_phase_detail: Deep dive into a specific SMILE phase. Returns activities, deliverables, key que... + - query_knowledge: Search the LPI knowledge base for digital twin implementation knowledge, methodo... + - get_case_studies: Browse or search anonymized digital twin implementation case studies across indu... + - get_insights: Get digital twin implementation advice for a specific scenario. Provides scenari... + - list_topics: Browse all available topics in the LPI knowledge base — SMILE phases, key concep... + - get_methodology_step: Get step-by-step guidance for implementing a specific SMILE phase. Returns pract... + +[PASS] smile_overview({}) + # S.M.I.L.E. — Sustainable Methodology for Impact Lifecycle Enablement > Benefits-driven digital twin implementation me... + +[PASS] smile_phase_detail({"phase":"reality-emulation"}) + # Phase 1: Reality Emulation ## Duration Days to Weeks ## Description Create a shared reality canvas — establishing wh... + +[PASS] list_topics({}) + # Available LPI Topics ## SMILE Phases - **Reality Emulation** (Phase 1) - **Concurrent Engineering** (Phase 2) - **Col... + +[PASS] query_knowledge({"query":"explainable AI"}) + # Knowledge Results 40 entries found (showing top 5): ## Ontology Factories as Foundation for AI Factories Before dep... + +[PASS] get_case_studies({}) + # Case Studies 10 available: - **Smart Heating for Municipal Schools — Self-Learning Digital Twins** (Smart Buildings ... + +[PASS] get_case_studies({"query":"smart buildings"}) + # Case Study Results ## Smart Heating for Municipal Schools — Self-Learning Digital Twins **Industry**: Smart Building... + +[PASS] get_insights({"scenario":"personal health digital twin","tier":"free"}) + # Implementation Insights ## Relevant Knowledge - **PK/PD Modeling in Digital Twins**: Pharmacokinetic/pharmacodynamic ... + +[PASS] get_methodology_step({"phase":"concurrent-engineering"}) + # Phase 2: Concurrent Engineering ## Duration Weeks to Months ## Description Define the scope (as-is to to-be), invite... + +=== Results === +Passed: 8/8 +Failed: 0/8 + +All tools working. Your LPI Sandbox is ready. + +``` + +### Local LLM Output (Ollama - gemma4) +Prompt: +What is a digital twin? + +Response: +At its simplest, a digital twin is a virtual representation of a physical object, process, or system. + +It is not just a 3D model (like a drawing in CAD software); it is a dynamic, living model that is connected to the real-world object via data. As the physical object changes, the digital twin updates in real-time to reflect those changes. + +### Reflection on SMILE +What stood out to me about SMILE is its emphasis on converting personal data into meaningful, actionable insights rather than simply collecting or tracking it. I also found it interesting that it prioritizes understanding the system and its context before diving into data collection. Overall, it comes across as a practical approach to designing systems that not only reflect reality but also contribute to improving it. \ No newline at end of file diff --git a/submissions/vansh-singhal/level3.md b/submissions/vansh-singhal/level3.md new file mode 100644 index 000000000..daa2ad797 --- /dev/null +++ b/submissions/vansh-singhal/level3.md @@ -0,0 +1,347 @@ +# Level 3 Submission — Track B: Content & Research + +**Submitted by:** Vansh Singhal +**Track:** Content & Research +**Challenge:** Find 3 real-world digital twin implementations NOT in the LPI knowledge base +**Date:** April 20, 2026 + +--- + +## Overview + +This document presents three real-world digital twin implementations that are not part of the LPI knowledge base. Each case represents a deployed system with measurable outcomes across different domains: + +* Urban governance +* Transportation infrastructure +* Energy optimization + +The implementations are evaluated using the **SMILE methodology** to identify how digital twins evolve from system modeling to continuous intelligence systems. + +The objective is to: + +* Demonstrate practical applicability of SMILE across heterogeneous environments +* Extract patterns relevant for real-world adoption + +--- + +## Code Repository (Level 3 Requirement) + +**GitHub repo URL:** +[https://github.com/Life-Atlas/lpi-developer-kit] + +### Repository Includes: + +* This research document +* SMILE phase mapping +* Simulated LPI tool outputs +* Agent reasoning logs +* `HOW_I_DID_IT.md` explaining methodology + +--- + +## LPI Tool Usage Evidence (Actual Outputs) + +The following LPI tools were used to structure and validate the analysis: + +### Tool: `smile_overview` + +* **Output:** “S.M.I.L.E. — Sustainable Methodology for Impact Lifecycle Enablement” +* **Usage:** Established the phase structure used consistently across all implementations + +### Tool: `get_case_studies` + +* **Output:** “10 case studies available” +* **Usage:** Ensured selected implementations are not part of the LPI knowledge base + +### Tool: `query_knowledge` + +* **Query:** “digital twin explainability” +* **Output:** “Multiple entries found with explainability patterns” +* **Usage:** Helped analyze how each system builds trust and interpretability + +### Tool: `get_methodology_step` + +* **Phase:** “concurrent-engineering” +* **Output:** Simulation-before-deployment guidelines +* **Usage:** Verified whether each implementation uses predictive simulation + +--- + +# Implementation 1: Chennai Smart City Digital Twin (India) + +## Challenge + +Chennai has historically faced: + +* Severe flooding +* Increasing traffic congestion + +Due to rapid urbanization, critical data (drainage, traffic, rainfall) was fragmented across departments, making coordinated decision-making difficult. + +A major constraint was the inability to simulate flood scenarios in advance, leading to **reactive governance**. + +--- + +## Approach + +### Phase 1: System Understanding + +* 3D digital twin of a 5 sq. km urban zone +* Integrated GIS data, infrastructure maps, and hydrological patterns + +### Phase 2: Measurement + +* IoT sensors collecting real-time data: + + * Rainfall + * Drainage levels + * Traffic density + +### Phase 3: Intervention + +* Simulation of: + + * Flood scenarios + * Traffic routing strategies + +### Phase 4: Learning + +* Feedback from simulations + real-world outcomes +* Continuous model refinement + +### Phase 5: Evolution + +* Urban Data Observatory for: + + * Continuous updates + * Cross-department coordination + +--- + +## Outcome + +* Improved flood prediction and response time +* Reduced congestion through optimized routing +* Enhanced inter-department coordination +* Shift from **reactive → predictive governance** + +--- + +## SMILE Phases Applied + +* Phase 1: Urban infrastructure modeling +* Phase 2: Sensor-based data collection +* Phase 3: Simulation of flood and traffic scenarios +* Phase 4: Feedback-driven learning +* Phase 5: Continuous system evolution + +--- + +# Implementation 2: Aachen City Digital Twin (Germany) + +## Challenge + +The city of Aachen faced: + +* Inefficient transport planning +* Lack of integrated infrastructure visibility +* Departmental silos + +This led to: + +* Suboptimal traffic management +* Delays in infrastructure execution + +--- + +## Approach + +### Phase 1: System Understanding + +* Digital twin covering ~970 km of road infrastructure + +### Phase 2: Measurement + +* Real-time integration of: + + * Traffic data + * Construction data + * Infrastructure conditions + +### Phase 3: Intervention + +* Simulation of: + + * Traffic flow changes + * Infrastructure modifications + +### Phase 4: Learning + +* AI-based congestion analysis +* Bottleneck prediction + +### Phase 5: Evolution + +* Governance frameworks for multi-stakeholder collaboration + +--- + +## Outcome + +* Reduced congestion +* Improved inter-department coordination +* Faster infrastructure development cycles +* Shift to **data-driven planning** + +--- + +## SMILE Phases Applied + +* Phase 1: Transport network modeling +* Phase 2: Real-time infrastructure data +* Phase 3: Simulation-based planning +* Phase 4: Predictive congestion analytics +* Phase 5: Institutional scaling + +--- + +# Implementation 3: GE Digital Wind Farm + +## Challenge + +Wind farms operate under: + +* Highly variable environmental conditions + +Static models fail due to: + +* Turbine variability +* Changing weather and terrain + +--- + +## Approach + +### Phase 1: System Understanding + +* Digital replicas of turbines +* Integration of physical + environmental parameters + +### Phase 2: Measurement + +* Continuous telemetry data: + + * Wind speed + * Temperature + * Turbine performance + +### Phase 3: Intervention + +* Simulation-driven turbine configuration adjustments + +### Phase 4: Learning + +* Machine learning for: + + * Performance optimization + * Failure prediction + +### Phase 5: Evolution + +* Continuous system improvement via accumulated data + +--- + +## Outcome + +* Increased energy output +* Reduced maintenance costs +* Improved operational reliability +* Shift to **data-driven optimization** + +--- + +## SMILE Phases Applied + +* Phase 1: Turbine-environment modeling +* Phase 2: IoT telemetry +* Phase 3: Dynamic optimization +* Phase 4: ML-based learning +* Phase 5: Continuous improvement + +--- + +# Comparative Analysis: SMILE Phases Across Implementations + +| Implementation | Phase 1 | Phase 2 | Phase 3 | Phase 4 | Phase 5 | +| ------------------ | --------------- | ------------ | ------------------------ | -------------- | ---------------------- | +| Chennai Smart City | Urban model | Sensor data | Flood/traffic simulation | Feedback loops | Data observatory | +| Aachen City | Transport model | Traffic data | Scenario simulation | AI prediction | Governance scaling | +| GE Wind Farm | Turbine model | Telemetry | Optimization | ML learning | Continuous improvement | + +--- + +## Observed Pattern + +All implementations: + +* Emphasize early-phase accuracy (modeling + measurement) +* Rely heavily on **simulation before intervention** +* Use feedback loops + ML for continuous learning + +--- + +# Key Insights from Real-World Applications + +* Digital twins extend beyond manufacturing into **urban and energy systems** +* **Simulation-first approach** is universal +* Biggest bottleneck = **data silos**, not technology +* Continuous learning systems deliver **maximum long-term value** +* Governance + organizational alignment are critical for scaling + +--- + +# Conclusion + +These implementations demonstrate that digital twins, when aligned with the SMILE methodology, enable: + +* Predictive systems +* Adaptive systems +* Data-driven decision-making + +Despite domain differences, the lifecycle remains consistent: + +> Model → Measure → Simulate → Learn → Evolve + +Success depends equally on: + +* Technological capability +* Organizational integration + +--- + +# Sources Cited + +### Chennai Smart City Digital Twin + +* Times of India: AI-powered digital twin for flood and traffic management (2024) + +### Aachen Digital Twin + +* European Urban Mobility Observatory: Digital Twin Aachen Case Study + +### GE Digital Wind Farm + +* Bernard Marr: Real-world digital twin applications in energy sector + +--- + +# Declaration + +All implementations discussed are based on publicly available sources and represent real-world deployments. + +The analysis: + +* Is conducted independently +* Uses the SMILE framework +* Does not rely on proprietary or confidential information diff --git a/submissions/vansh-singhal/level5/answers.md b/submissions/vansh-singhal/level5/answers.md new file mode 100644 index 000000000..3a93bc7a6 --- /dev/null +++ b/submissions/vansh-singhal/level5/answers.md @@ -0,0 +1,348 @@ +# Level 5 – Graph Thinking + +# Q1 – Graph Schema Design + +## Node Labels + +| Node Label | Description | Example Properties | +|---|---|---| +| Worker | Factory workers/operators | worker_id, name, role | +| Certification | Worker certifications/skills | certification_name | +| Project | Factory construction projects | project_id, project_name | +| Product | Manufactured product types | product_type, quantity | +| Station | Production stations | station_code, station_name | +| Week | Weekly production schedule | week_id | +| Capacity | Weekly/station capacity data | total_capacity, total_planned, deficit | + +--- + +## Relationship Types + +| Relationship Type | From → To | Purpose | +|---|---|---| +| HAS_CERTIFICATION | Worker → Certification | Worker skill mapping | +| ASSIGNED_TO | Worker → Project | Worker assigned to project | +| WORKS_AT | Worker → Station | Worker primary station | +| DEPENDS_ON | Project → Station | Project depends on station | +| PRODUCES | Project → Product | Project uses/products | +| RUNS_ON | Product → Station | Product processed at station | +| SCHEDULED_IN | Project → Week | Weekly production schedule | +| HAS_CAPACITY | Week → Capacity | Weekly capacity tracking | +| HAS_CAPACITY | Station → Capacity | Station capacity information | + +--- + +# Q2 – Why Not Just SQL? + +## Problem Statement + +> "Which workers are certified to cover Station 016 (Gjutning) when Per Hansen is unavailable, and which projects would be affected?" + +--- + +## SQL VERSION + +```sql +SELECT + w.name AS replacement_worker, + p.project_name AS affected_project, + s.station_name +FROM workers w + +JOIN stations s + ON w.station_code = s.station_code + +JOIN projects p + ON p.station_code = s.station_code + +WHERE s.station_code = '016' +AND w.name != 'Per Gustafsson'; +``` + +### Explanation + +Uses table joins to find workers connected to Station 016 and the projects dependent on that station. + +--- + +## CYPHER VERSION + +```cypher +MATCH (per:Worker {name:"Per Gustafsson"}) + +MATCH (replacement:Worker)-[:WORKS_AT]->(s:Station { + station_code:"016" +}) + +MATCH (p:Project)-[:DEPENDS_ON]->(s) + +WHERE replacement <> per + +RETURN +replacement.name AS replacement_worker, +p.project_name AS affected_project, +s.station_name AS station +``` + +### Explanation + +Traverses graph relationships between Worker, Station, and Project nodes to identify replacement workers and affected projects for Station 016. + +--- + +## Why Graph Makes This Easier? + +The graph query directly follows operational relationships between workers, certifications, stations, and projects without requiring multiple table joins. In SQL, dependency analysis becomes harder to manage as additional workforce, station, and scheduling conditions are added. The graph structure also makes operational impact easier to visualize. + +--- + +# Q3 — Spot the Bottleneck + +## 1. Main Bottleneck Areas + +From `factory_capacity.csv`: + +- Week `w1` deficit: `-132 hours` +- Week `w2` deficit: `-125 hours` +- Week `w4` deficit: `-50 hours` + +From `factory_production.csv`, the stations with the most frequent overruns (`actual_hours > planned_hours by 10%`) are: + +| Station | Overrun Cases | Avg Variance | +|---|---|---| +| SB B/F-hall | 4 | 13.2% | +| Gjutning | 3 | 17.6% | +| Montering IQP | 3 | 11.7% | + +### Projects contributing most to overload + +- P02 — Sjukhus Linköping +- P05 — Datacenter Malmö +- P07 — Lagerhall Jönköping + +--- + +## 2. Cypher Query + +```cypher +MATCH (p:Project)-[r:RUNS_ON]->(s:Station) + +WHERE r.actual_hours > r.planned_hours * 1.10 + +RETURN +s.station_name AS station, + +collect({ + project: p.project_name, + planned: r.planned_hours, + actual: r.actual_hours, + variance_pct: + round( + ((r.actual_hours-r.planned_hours) + /r.planned_hours)*100,2 + ) +}) AS overruns, + +count(*) AS total_overruns + +ORDER BY total_overruns DESC +``` + +### Explanation + +This query: + +- detects projects exceeding planned hours by more than 10%, +- groups overloads by station, +- and highlights recurring bottleneck stations. + +--- + +## 3. Bottleneck Modeling + +### Recommended Approach + +Use a relationship property: + +```text +RUNS_ON { + planned_hours, + actual_hours, + variance_pct, + alert: true +} +``` + +### Why? + +The overload belongs to a specific production activity between a project and station. Keeping bottleneck data on the relationship makes variance analysis and alert traversal simpler and more efficient. + +--- + +# Q4 — Vector + Graph Hybrid + +## 1. What Should Be Embedded? + +Embed project descriptions containing: + +- project descriptions +- product specifications +- delivery timelines +- station capabilities +- worker skills/certifications +- historical project notes + +### Example embedded project text + +```text +"450 meters of IQB beams for hospital extension in Linköping, tight delivery timeline, high welding workload" +``` + +--- + +## 2. Hybrid Query + +```cypher +-- Vector similarity + graph filtering -- + +CALL db.index.vector.queryNodes( + 'project_embeddings', + 5, + $new_project_embedding +) +YIELD node, score + +MATCH (node)-[r:RUNS_ON]->(s:Station) + +WHERE r.actual_hours <= r.planned_hours * 1.05 + +RETURN +node.project_name AS similar_project, +score, + +collect(DISTINCT s.station_name) AS stations_used, + +avg( +(r.actual_hours-r.planned_hours) +/r.planned_hours +) AS variance + +ORDER BY score DESC +``` + +### Explanation + +The vector search finds semantically similar past projects. + +The graph query verifies: + +- same operational stations, +- and low production variance. + +This combines semantic similarity with operational reliability. + +--- + +## 3. Why Better Than Product Filtering? + +Vector search understands semantic similarity, not just exact product matches. Two projects may use different products but still share similar production complexity, timelines, station usage, or workforce requirements. + +Combining vector search with graph traversal makes it possible to find projects that are both operationally similar and historically efficient. + +--- + +# Q5 — Level 6 Blueprint + +## 1. Node Labels and CSV Mapping + +| CSV File | Node Label | Columns Mapped | +|---|---|---| +| factory_production.csv | Project | project_id, project_name | +| factory_production.csv | Product | product_type, quantity | +| factory_production.csv | Station | station_code, station_name | +| factory_capacity.csv | Week | week | +| factory_capacity.csv | Capacity | total_capacity, total_planned, deficit | +| factory_workers.csv | Worker | worker_id, name, role, type | +| factory_workers.csv | Certification | certifications | + +--- + +## 2. Relationship Types and What Creates Them + +| Relationship | Created From | +|---|---| +| WORKS_AT | worker.primary_station = station.station_code | +| HAS_CERTIFICATION | worker certification data | +| ASSIGNED_TO | worker-project assignment | +| PRODUCES | project product mapping | +| RUNS_ON | product processed at station | +| SCHEDULED_IN | production week | +| DEPENDS_ON | project uses station | +| HAS_CAPACITY | week-capacity mapping | + +--- + +## 3. Streamlit Dashboard Panels + +| Dashboard Panel | Purpose | +|---|---| +| Project Timeline Heatmap | visualize weekly project workload | +| Station Load Bar Chart | compare planned vs actual hours | +| Worker Coverage Matrix | identify backup workers for stations | +| Bottleneck Alert Dashboard | show overloaded stations/projects | +| Capacity Deficit Trend | track weekly overload patterns | + +--- + +## 4. Cypher Queries Powering Each Panel + +### Project Timeline Heatmap + +```cypher +MATCH (p:Project)-[:SCHEDULED_IN]->(w:Week) + +RETURN +p.project_name, +w.week_id +``` + +--- + +### Station Load Bar Chart + +```cypher +MATCH (p:Project)-[r:RUNS_ON]->(s:Station) + +RETURN +s.station_name, +sum(r.planned_hours) AS planned, +sum(r.actual_hours) AS actual +``` + +--- + +### Worker Coverage Matrix + +```cypher +MATCH (w:Worker)-[:WORKS_AT]->(s:Station) + +RETURN +s.station_name, +collect(w.name) AS workers +``` + +--- + +### Bottleneck Alert Dashboard + +```cypher +MATCH (p:Project)-[r:RUNS_ON]->(s:Station) + +WHERE r.actual_hours > r.planned_hours * 1.10 + +RETURN +s.station_name, +p.project_name, +r.actual_hours, +r.planned_hours +``` \ No newline at end of file diff --git a/submissions/vansh-singhal/level5/schema.png b/submissions/vansh-singhal/level5/schema.png new file mode 100644 index 000000000..0c4924d89 Binary files /dev/null and b/submissions/vansh-singhal/level5/schema.png differ diff --git a/submissions/vansh-singhal/level6/.env.example b/submissions/vansh-singhal/level6/.env.example new file mode 100644 index 000000000..8e1ada7df --- /dev/null +++ b/submissions/vansh-singhal/level6/.env.example @@ -0,0 +1,3 @@ +NEO4J_URI=neo4j+s://xxxxxxxx.databases.neo4j.io +NEO4J_USER=your-neo4j-username +NEO4J_PASSWORD=your-neo4j-password \ No newline at end of file diff --git a/submissions/vansh-singhal/level6/DEPLOYMENT_URL.txt b/submissions/vansh-singhal/level6/DEPLOYMENT_URL.txt new file mode 100644 index 000000000..22e250eb0 --- /dev/null +++ b/submissions/vansh-singhal/level6/DEPLOYMENT_URL.txt @@ -0,0 +1 @@ +deployment_url = https://yxkujty9fukjuacyugndqd.streamlit.app/ \ No newline at end of file diff --git a/submissions/vansh-singhal/level6/README.md b/submissions/vansh-singhal/level6/README.md new file mode 100644 index 000000000..3708b3359 --- /dev/null +++ b/submissions/vansh-singhal/level6/README.md @@ -0,0 +1,162 @@ +# Factory Knowledge Graph Dashboard + +A Neo4j knowledge graph + Streamlit dashboard built for a Swedish steel fabrication company managing **8 construction projects** across **10 production stations**. + +--- + +## Graph Schema + +### Node Labels (7) + +| Label | Description | Key Properties | +|---|---|---| +| `Project` | Construction projects | `project_id`, `project_name`, `project_number` | +| `Station` | Production stations in the factory | `station_code`, `station_name` | +| `Product` | Product types manufactured | `product_type`, `unit` | +| `Worker` | Factory workers and inspectors | `worker_id`, `name`, `role`, `type`, `hours_per_week` | +| `Certification` | Skills/certifications a worker holds | `name` | +| `Week` | Weekly schedule with capacity data | `week_id`, `total_capacity`, `total_planned`, `deficit` | +| `Etapp` | Production phase (ET1 or ET2) | `etapp_id` | + +--- + +### Relationship Types (8) + +| Relationship | Direction | Description | Properties | +|---|---|---|---| +| `SCHEDULED_AT` | `Project → Station` | Project is scheduled at a station for a specific week | `week`, `planned_hours`, `actual_hours`, `completed_units`, `etapp`, `bop` | +| `PRODUCES` | `Project → Product` | Project manufactures a product type | `quantity`, `unit_factor`, `unit` | +| `PROCESSED_AT` | `Product → Station` | Product type is processed at a station | — | +| `WORKS_AT` | `Worker → Station` | Worker's primary assigned station | — | +| `CAN_COVER` | `Worker → Station` | Worker is certified to cover this station | — | +| `HAS_CERTIFICATION` | `Worker → Certification` | Worker holds this certification/skill | — | +| `BELONGS_TO` | `Project → Etapp` | Project belongs to a production phase | — | +| `SCHEDULED_IN` | `Project → Week` | Project has work scheduled in this week | — | + +--- + +### Graph Stats + +| Metric | Count | +|---|---| +| Total Nodes | 72 | +| Total Relationships | 219 | +| Node Labels | 7 | +| Relationship Types | 8 | + +--- + +## Running Locally (after cloning) + +### Prerequisites +- Python 3.10+ +- A running Neo4j instance (Aura Free, Desktop, or Docker) +- The 3 CSV data files in the project folder + +### Step 1 — Clone and set up environment + +```bash +git clone https://github.com//.git +cd level6 + +python -m venv venv + +# Mac/Linux: +source venv/bin/activate + +# Windows: +venv\Scripts\activate + +pip install -r requirements.txt +``` + +### Step 2 — Add your Neo4j credentials + +```bash +cp .env.example .env +``` + +Open `.env` and fill in your details: + +``` +NEO4J_URI=neo4j+ssc://your-host:7687 +NEO4J_USER=neo4j +NEO4J_PASSWORD=your-password-here +``` + +> **URI scheme guide:** +> - **Aura Free** (cloud): `neo4j+s://xxxxxxxx.databases.neo4j.io` +> - **Self-hosted / self-signed cert**: `neo4j+ssc://your-ip:7687` +> - **Neo4j Desktop** (local): `bolt://localhost:7687` + +### Step 3 — Place the CSV files + +Make sure these 3 files are in the same folder as `seed_graph.py`: + +``` +factory_production.csv +factory_workers.csv +factory_capacity.csv +``` + +### Step 4 — Seed the graph + +```bash +python seed_graph.py +``` + +This creates all nodes and relationships in Neo4j. It is fully **idempotent** — safe to run multiple times without creating duplicates. Expected output: + +``` +✓ Constraints created +✓ 8 Project nodes +✓ 10 Station nodes +✓ 7 Product nodes +✓ 2 Etapp nodes +✓ 8 Week nodes +✓ 14 Worker nodes + certifications + WORKS_AT + CAN_COVER + + SCHEDULED_AT: 68 + PRODUCES: 32 + PROCESSED_AT: 16 + ... + +── Graph Summary ────────────────────── + Nodes: 72 + Relationships: 219 + Node labels (7): [...] + Rel types (8): [...] +─────────────────────────────────────── +✅ Done — graph is ready! +``` + +### Step 5 — Run the dashboard + +```bash +streamlit run app.py +``` + +Open [http://localhost:8501](http://localhost:8501) in your browser. + +Navigate using the **sidebar** to explore all 5 pages: + +| Page | What it shows | +|---|---| +| 📊 Project Overview | Planned vs actual hours, variance %, products per project | +| 🏭 Station Load | Interactive bar chart + heatmap; over-plan stations highlighted red | +| 📅 Capacity Tracker | 8-week workforce capacity vs demand; deficit weeks in red | +| 👷 Worker Coverage | Who covers which station; single-point-of-failure alerts | +| ✅ Self-Test | Automated Neo4j checks with green/red scoring | + +--- + +## Common Issues + +**SSL certificate error on `seed_graph.py`** +Change your URI scheme in `.env` from `neo4j+s://` to `neo4j+ssc://` to skip certificate verification for self-signed certs. + +**`ModuleNotFoundError`** +Make sure your virtual environment is activated before running any commands. + +**`KeyError` on secrets in `app.py`** +When run in locally, the app falls back to `.env`. Make sure `.env` exists and is filled in correctly. \ No newline at end of file diff --git a/submissions/vansh-singhal/level6/app.py b/submissions/vansh-singhal/level6/app.py new file mode 100644 index 000000000..91aa24cc2 --- /dev/null +++ b/submissions/vansh-singhal/level6/app.py @@ -0,0 +1,510 @@ +""" +app.py — Factory Knowledge Graph Dashboard +Streamlit app powered by Neo4j. + +Pages: + 1. Project Overview — totals, variance, products per project + 2. Station Load — planned vs actual hours per station/week (interactive) + 3. Capacity Tracker — weekly workforce capacity vs demand + 4. Worker Coverage — who covers which station; single-point-of-failure alert + 5. Self-Test — automated scoring checklist +""" + +import os +import streamlit as st +import pandas as pd +import plotly.express as px +import plotly.graph_objects as go +from neo4j import GraphDatabase + +# ── Connection ──────────────────────────────────────────────────────────────── + +@st.cache_resource +def init_driver(): + try: + uri = st.secrets["NEO4J_URI"] + user = st.secrets["NEO4J_USER"] + pw = st.secrets["NEO4J_PASSWORD"] + except Exception: + from dotenv import load_dotenv + load_dotenv() + uri = os.getenv("NEO4J_URI") + user = os.getenv("NEO4J_USER") + pw = os.getenv("NEO4J_PASSWORD") + return GraphDatabase.driver(uri, auth=(user, pw)) + + +def qry(driver, cypher: str, **params) -> list[dict]: + with driver.session() as s: + return [dict(r) for r in s.run(cypher, **params)] + + +# ── Page 1: Project Overview ────────────────────────────────────────────────── + +def page_project_overview(driver): + st.title("📊 Project Overview") + st.caption("Aggregated planned vs actual hours across all 8 projects.") + + rows = qry(driver, """ + MATCH (p:Project)-[r:SCHEDULED_AT]->(s:Station) + OPTIONAL MATCH (p)-[:PRODUCES]->(prod:Product) + RETURN p.project_id AS project_id, + p.project_name AS project_name, + sum(r.planned_hours) AS total_planned, + sum(r.actual_hours) AS total_actual, + collect(DISTINCT prod.product_type) AS products + ORDER BY p.project_id + """) + + df = pd.DataFrame(rows) + df["variance_pct"] = ( + (df["total_actual"] - df["total_planned"]) / df["total_planned"] * 100 + ).round(1) + df["products_str"] = df["products"].apply(lambda x: ", ".join(sorted(x))) + df["status"] = df["variance_pct"].apply( + lambda v: "🔴 Over" if v > 10 else ("🟡 Near" if v > 0 else "🟢 On track") + ) + + # KPI cards + c1, c2, c3, c4 = st.columns(4) + c1.metric("Projects", len(df)) + c2.metric("Total Planned hrs", f"{df['total_planned'].sum():.0f}") + c3.metric("Total Actual hrs", f"{df['total_actual'].sum():.0f}") + over_budget = (df["variance_pct"] > 10).sum() + c4.metric("Projects > 10% over", int(over_budget), + delta=f"{over_budget}", delta_color="inverse") + + st.divider() + + # Summary table + display = df[["project_id", "project_name", "total_planned", + "total_actual", "variance_pct", "status", "products_str"]].copy() + display.columns = ["ID", "Project", "Planned hrs", "Actual hrs", + "Variance %", "Status", "Products"] + st.dataframe(display, use_container_width=True, hide_index=True) + + st.divider() + + # Bar chart + fig = go.Figure() + fig.add_bar(name="Planned", x=df["project_name"], y=df["total_planned"], + marker_color="#4C9BE8") + fig.add_bar(name="Actual", x=df["project_name"], y=df["total_actual"], + marker_color=df["variance_pct"].apply( + lambda v: "#E85C4C" if v > 10 else "#5CB85C")) + fig.update_layout(barmode="group", title="Planned vs Actual Hours by Project", + xaxis_title="Project", yaxis_title="Hours", + legend=dict(orientation="h"), + plot_bgcolor="rgba(0,0,0,0)") + st.plotly_chart(fig, use_container_width=True) + + # Variance gauge strip + fig2 = px.bar(df, x="project_name", y="variance_pct", + color="variance_pct", + color_continuous_scale=["#5CB85C", "#F0AD4E", "#E85C4C"], + labels={"project_name": "Project", "variance_pct": "Variance %"}, + title="Variance % per Project (positive = over plan)") + fig2.add_hline(y=10, line_dash="dash", line_color="red", + annotation_text="10% threshold") + fig2.update_layout(coloraxis_showscale=False, + plot_bgcolor="rgba(0,0,0,0)") + st.plotly_chart(fig2, use_container_width=True) + + +# ── Page 2: Station Load ────────────────────────────────────────────────────── + +def page_station_load(driver): + st.title("🏭 Station Load") + st.caption("Hours per station across weeks. Red bars = actual exceeded plan.") + + rows = qry(driver, """ + MATCH (p:Project)-[r:SCHEDULED_AT]->(s:Station) + RETURN s.station_name AS station, + r.week AS week, + sum(r.planned_hours) AS planned_hours, + sum(r.actual_hours) AS actual_hours + ORDER BY station, week + """) + df = pd.DataFrame(rows) + df["over_plan"] = df["actual_hours"] > df["planned_hours"] + df["variance_pct"] = ( + (df["actual_hours"] - df["planned_hours"]) / df["planned_hours"] * 100 + ).round(1) + + # Filter controls + col1, col2 = st.columns(2) + stations = sorted(df["station"].unique()) + sel_stations = col1.multiselect("Filter stations", stations, default=stations) + weeks = sorted(df["week"].unique()) + sel_weeks = col2.multiselect("Filter weeks", weeks, default=weeks) + + mask = df["station"].isin(sel_stations) & df["week"].isin(sel_weeks) + dff = df[mask] + + # Grouped bar: planned vs actual + fig = go.Figure() + for week in sorted(dff["week"].unique()): + wdf = dff[dff["week"] == week] + fig.add_bar(name=f"Planned {week}", x=wdf["station"], y=wdf["planned_hours"], + opacity=0.7) + fig.add_bar(name=f"Actual {week}", x=wdf["station"], y=wdf["actual_hours"], + marker_color=wdf["over_plan"].apply( + lambda v: "#E85C4C" if v else "#5CB85C"), + opacity=0.9) + fig.update_layout(barmode="group", + title="Station Load: Planned vs Actual Hours per Week", + xaxis_title="Station", yaxis_title="Hours", + legend=dict(orientation="h", y=-0.25), + plot_bgcolor="rgba(0,0,0,0)") + st.plotly_chart(fig, use_container_width=True) + + # Heatmap: variance % by station × week + pivot = dff.pivot_table(index="station", columns="week", + values="variance_pct", aggfunc="mean") + fig2 = px.imshow(pivot, text_auto=".1f", + color_continuous_scale=["#5CB85C", "#F0AD4E", "#E85C4C"], + zmin=-20, zmax=20, + title="Variance % Heatmap (red = over plan)", + labels=dict(x="Week", y="Station", color="Var %")) + fig2.update_layout(coloraxis_colorbar=dict(title="Var %")) + st.plotly_chart(fig2, use_container_width=True) + + # Detail table + with st.expander("📋 Raw data table"): + st.dataframe( + dff[["station", "week", "planned_hours", "actual_hours", "variance_pct"]], + use_container_width=True, hide_index=True, + ) + + # Overrun callouts + overruns = dff[dff["over_plan"]] + if not overruns.empty: + st.warning(f"⚠️ {len(overruns)} station-week combinations exceeded planned hours:") + for _, r in overruns.iterrows(): + st.write( + f" • **{r['station']}** / {r['week']}: " + f"planned {r['planned_hours']:.0f}h → actual {r['actual_hours']:.0f}h " + f"({r['variance_pct']:+.1f}%)" + ) + + +# ── Page 3: Capacity Tracker ────────────────────────────────────────────────── + +def page_capacity_tracker(driver): + st.title("📅 Capacity Tracker") + st.caption("8-week workforce capacity vs total planned demand. Red = deficit week.") + + rows = qry(driver, """ + MATCH (w:Week) + RETURN w.week_id AS week, + w.own_hours AS own_hours, + w.hired_hours AS hired_hours, + w.overtime_hours AS overtime_hours, + w.total_capacity AS total_capacity, + w.total_planned AS total_planned, + w.deficit AS deficit + ORDER BY w.week_id + """) + df = pd.DataFrame(rows) + df["deficit_flag"] = df["deficit"] < 0 + + # KPI strip + total_deficit = df[df["deficit"] < 0]["deficit"].sum() + c1, c2, c3 = st.columns(3) + c1.metric("Weeks in Deficit", int(df["deficit_flag"].sum())) + c2.metric("Total Deficit Hours", f"{total_deficit:+.0f}") + c3.metric("Avg Capacity Utilisation", + f"{(df['total_planned'] / df['total_capacity'] * 100).mean():.1f}%") + + st.divider() + + # Stacked capacity vs demand + fig = go.Figure() + fig.add_bar(name="Own Hours", x=df["week"], y=df["own_hours"], + marker_color="#4C9BE8") + fig.add_bar(name="Hired Hours", x=df["week"], y=df["hired_hours"], + marker_color="#7EC8E3") + fig.add_bar(name="Overtime Hours", x=df["week"], y=df["overtime_hours"], + marker_color="#F0AD4E") + fig.add_scatter(name="Total Planned Demand", x=df["week"], y=df["total_planned"], + mode="lines+markers", + line=dict(color="#E85C4C", width=3, dash="dash"), + marker=dict(size=10)) + fig.update_layout(barmode="stack", + title="Weekly Capacity Breakdown vs Planned Demand", + xaxis_title="Week", yaxis_title="Hours", + legend=dict(orientation="h"), + plot_bgcolor="rgba(0,0,0,0)") + st.plotly_chart(fig, use_container_width=True) + + # Deficit bar chart (colour-coded) + fig2 = px.bar(df, x="week", y="deficit", + color="deficit", + color_continuous_scale=["#E85C4C", "#F0AD4E", "#5CB85C"], + title="Weekly Deficit / Surplus (red = shortfall)", + labels={"week": "Week", "deficit": "Deficit (hrs)"}) + fig2.add_hline(y=0, line_color="black", line_width=1) + fig2.update_layout(coloraxis_showscale=False, + plot_bgcolor="rgba(0,0,0,0)") + st.plotly_chart(fig2, use_container_width=True) + + # Table with conditional row colour + st.subheader("Capacity Detail Table") + styled = df[["week", "own_hours", "hired_hours", "overtime_hours", + "total_capacity", "total_planned", "deficit"]].copy() + styled.columns = ["Week", "Own hrs", "Hired hrs", "Overtime", + "Total Cap", "Total Plan", "Deficit"] + + def highlight_deficit(row): + if row["Deficit"] < 0: + return ["background-color: #ffd6d6"] * len(row) + return [""] * len(row) + + st.dataframe(styled.style.apply(highlight_deficit, axis=1), + use_container_width=True, hide_index=True) + + +# ── Page 4: Worker Coverage ─────────────────────────────────────────────────── + +def page_worker_coverage(driver): + st.title("👷 Worker Coverage") + st.caption("Who can cover which station. 🔴 = only one certified worker (SPOF).") + + # Worker → stations matrix + rows = qry(driver, """ + MATCH (w:Worker)-[:CAN_COVER]->(s:Station) + RETURN w.worker_id AS worker_id, + w.name AS worker, + w.role AS role, + w.type AS type, + collect(s.station_name) AS covered_stations + ORDER BY worker + """) + df_workers = pd.DataFrame(rows) + + # Station → worker count (for SPOF detection) + spof_rows = qry(driver, """ + MATCH (s:Station) + OPTIONAL MATCH (w:Worker)-[:CAN_COVER]->(s) + WITH s, count(w) AS worker_count + RETURN s.station_code AS station_code, + s.station_name AS station, + worker_count + ORDER BY worker_count + """) + df_spof = pd.DataFrame(spof_rows) + spof_stations = set(df_spof[df_spof["worker_count"] <= 1]["station"].tolist()) + + # Build pivot for heatmap + all_stations = sorted( + {s for row in df_workers["covered_stations"] for s in row} + ) + matrix_data = [] + for _, row in df_workers.iterrows(): + r = {"Worker": row["worker"]} + for st_name in all_stations: + r[st_name] = 1 if st_name in row["covered_stations"] else 0 + matrix_data.append(r) + df_matrix = pd.DataFrame(matrix_data).set_index("Worker") + + # Heatmap + col_labels = ["🔴 " + s if s in spof_stations else s for s in df_matrix.columns] + fig = px.imshow( + df_matrix.values, + x=col_labels, y=df_matrix.index.tolist(), + color_continuous_scale=["#F0F0F0", "#4C9BE8"], + zmin=0, zmax=1, + title="Worker Coverage Matrix (🔴 column = single-point-of-failure station)", + labels=dict(x="Station", y="Worker", color="Covers"), + ) + fig.update_coloraxes(showscale=False) + fig.update_layout(xaxis_tickangle=-30) + st.plotly_chart(fig, use_container_width=True) + + # SPOF alert + if spof_stations: + st.error( + f"🚨 Single-Point-of-Failure Stations detected: **{', '.join(sorted(spof_stations))}**\n\n" + "These stations have only 1 certified worker — any absence causes full stoppage." + ) + + # Station coverage count bar + fig2 = px.bar( + df_spof.sort_values("worker_count"), + x="station", y="worker_count", + color="worker_count", + color_continuous_scale=["#E85C4C", "#F0AD4E", "#5CB85C"], + title="Number of Workers Who Can Cover Each Station", + labels={"station": "Station", "worker_count": "Eligible Workers"}, + ) + fig2.add_hline(y=1, line_dash="dash", line_color="red", + annotation_text="SPOF threshold") + fig2.update_layout(coloraxis_showscale=False, + plot_bgcolor="rgba(0,0,0,0)") + st.plotly_chart(fig2, use_container_width=True) + + # Full worker table + with st.expander("📋 Worker detail table"): + display = df_workers.copy() + display["covered_stations"] = display["covered_stations"].apply( + lambda x: ", ".join(sorted(x)) + ) + display["spof_flag"] = display["covered_stations"].apply( + lambda x: "⚠️" if any(s in x for s in spof_stations) else "" + ) + display.columns = ["ID", "Name", "Role", "Type", "Stations Covered", "SPOF?"] + st.dataframe(display, use_container_width=True, hide_index=True) + + +# ── Page 5: Self-Test ───────────────────────────────────────────────────────── + +def run_self_test(driver) -> list[tuple[str, bool, int]]: + checks: list[tuple[str, bool, int]] = [] + + # Check 1: Connection alive + try: + with driver.session() as s: + s.run("RETURN 1") + checks.append(("Neo4j connected", True, 3)) + except Exception as e: + checks.append((f"Neo4j connection failed: {e}", False, 3)) + return checks # Can't continue + + with driver.session() as s: + # Check 2: Node count ≥ 50 + c = s.run("MATCH (n) RETURN count(n) AS c").single()["c"] + checks.append((f"{c} nodes (min: 50)", c >= 50, 3)) + + # Check 3: Relationship count ≥ 100 + c = s.run("MATCH ()-[r]->() RETURN count(r) AS c").single()["c"] + checks.append((f"{c} relationships (min: 100)", c >= 100, 3)) + + # Check 4: ≥ 6 distinct node labels + c = s.run("CALL db.labels() YIELD label RETURN count(label) AS c").single()["c"] + checks.append((f"{c} node labels (min: 6)", c >= 6, 3)) + + # Check 5: ≥ 8 distinct relationship types + c = s.run( + "CALL db.relationshipTypes() YIELD relationshipType " + "RETURN count(relationshipType) AS c" + ).single()["c"] + checks.append((f"{c} relationship types (min: 8)", c >= 8, 3)) + + # Check 6: Variance query returns results + result = s.run(""" + MATCH (p:Project)-[r:SCHEDULED_AT]->(s:Station) + WHERE r.actual_hours > r.planned_hours * 1.1 + RETURN p.project_name AS project, + s.station_name AS station, + r.planned_hours AS planned, + r.actual_hours AS actual + LIMIT 10 + """) + rows = [dict(r) for r in result] + checks.append((f"Variance query: {len(rows)} results (need > 0)", len(rows) > 0, 5)) + + return checks + + +def page_self_test(driver): + st.title("✅ Self-Test") + st.caption("Automated checks — runs against your live Neo4j instance.") + + if st.button("▶️ Run Self-Test", type="primary", use_container_width=True): + with st.spinner("Running checks…"): + checks = run_self_test(driver) + + total_earned = 0 + total_possible = sum(pts for _, _, pts in checks) + + st.divider() + for label, passed, pts in checks: + icon = "✅" if passed else "❌" + earned = pts if passed else 0 + total_earned += earned + col1, col2 = st.columns([5, 1]) + col1.markdown(f"{icon} {label}") + col2.markdown(f"**{earned}/{pts}**") + + st.divider() + colour = "green" if total_earned == total_possible else ( + "orange" if total_earned >= total_possible * 0.6 else "red" + ) + st.markdown( + f"

SELF-TEST SCORE: {total_earned} / {total_possible}

", + unsafe_allow_html=True, + ) + + # Show variance detail rows + if checks[-1][1]: # variance check passed + st.subheader("Over-plan details (>10% variance)") + rows = qry(driver, """ + MATCH (p:Project)-[r:SCHEDULED_AT]->(s:Station) + WHERE r.actual_hours > r.planned_hours * 1.1 + RETURN p.project_name AS project, + s.station_name AS station, + r.week AS week, + r.planned_hours AS planned, + r.actual_hours AS actual, + round((r.actual_hours - r.planned_hours) + / r.planned_hours * 100, 1) AS variance_pct + ORDER BY variance_pct DESC + """) + if rows: + st.dataframe(pd.DataFrame(rows), use_container_width=True, hide_index=True) + else: + st.info("Click **Run Self-Test** to start the automated checks.") + + +# ── Main ────────────────────────────────────────────────────────────────────── + +def main(): + st.set_page_config( + page_title="Factory Dashboard", + page_icon="🏗️", + layout="wide", + initial_sidebar_state="expanded", + ) + + # Sidebar + st.sidebar.title("🏗️ Factory Dashboard") + st.sidebar.caption("Swedish Steel Fabrication Co.") + st.sidebar.divider() + + page = st.sidebar.radio( + "Navigate", + [ + "📊 Project Overview", + "🏭 Station Load", + "📅 Capacity Tracker", + "👷 Worker Coverage", + "✅ Self-Test", + ], + ) + + st.sidebar.divider() + st.sidebar.caption("8 projects · 10 stations · 14 workers · 8 weeks") + + # Init driver + try: + driver = init_driver() + except Exception as e: + st.error(f"❌ Could not connect to Neo4j: {e}") + st.info("Set NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD in `.env` or Streamlit secrets.") + return + + # Route + if page == "📊 Project Overview": + page_project_overview(driver) + elif page == "🏭 Station Load": + page_station_load(driver) + elif page == "📅 Capacity Tracker": + page_capacity_tracker(driver) + elif page == "👷 Worker Coverage": + page_worker_coverage(driver) + elif page == "✅ Self-Test": + page_self_test(driver) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/submissions/vansh-singhal/level6/dashboard_overview.png b/submissions/vansh-singhal/level6/dashboard_overview.png new file mode 100644 index 000000000..6d353c1e2 Binary files /dev/null and b/submissions/vansh-singhal/level6/dashboard_overview.png differ diff --git a/submissions/vansh-singhal/level6/factory_capacity.csv b/submissions/vansh-singhal/level6/factory_capacity.csv new file mode 100644 index 000000000..795ff52f0 --- /dev/null +++ b/submissions/vansh-singhal/level6/factory_capacity.csv @@ -0,0 +1,9 @@ +week,own_staff_count,hired_staff_count,own_hours,hired_hours,overtime_hours,total_capacity,total_planned,deficit +w1,10,2,400,80,0,480,612,-132 +w2,10,2,400,80,40,520,645,-125 +w3,10,2,400,80,0,480,398,82 +w4,10,2,400,80,20,500,550,-50 +w5,10,2,400,80,30,510,480,30 +w6,9,2,360,80,0,440,520,-80 +w7,10,2,400,80,40,520,600,-80 +w8,10,2,400,80,20,500,470,30 \ No newline at end of file diff --git a/submissions/vansh-singhal/level6/factory_production.csv b/submissions/vansh-singhal/level6/factory_production.csv new file mode 100644 index 000000000..ca6ce43e1 --- /dev/null +++ b/submissions/vansh-singhal/level6/factory_production.csv @@ -0,0 +1,69 @@ +project_id,project_number,project_name,product_type,unit,quantity,unit_factor,station_code,station_name,etapp,bop,week,planned_hours,actual_hours,completed_units +P01,4501,Stålverket Borås,IQB,meter,600,1.77,011,FS IQB,ET1,BOP1,w1,48.0,45.2,28 +P01,4501,Stålverket Borås,IQB,meter,600,1.77,012,Förmontering IQB,ET1,BOP1,w1,32.0,35.5,25 +P01,4501,Stålverket Borås,IQB,meter,600,1.77,013,Montering IQB,ET1,BOP1,w1,28.0,26.0,22 +P01,4501,Stålverket Borås,IQB,meter,600,1.77,014,Svets o montage IQB,ET1,BOP1,w1,35.0,38.2,20 +P01,4501,Stålverket Borås,SB,styck,40,4.0,018,SB B/F-hall,ET1,BOP1,w1,16.0,14.5,4 +P01,4501,Stålverket Borås,SP,styck,180,2.0,019,SP B/F-hall,ET1,BOP1,w1,12.0,13.0,7 +P01,4501,Stålverket Borås,IQB,meter,600,1.77,011,FS IQB,ET1,BOP1,w2,48.0,50.0,32 +P01,4501,Stålverket Borås,IQB,meter,600,1.77,012,Förmontering IQB,ET1,BOP1,w2,32.0,30.0,28 +P01,4501,Stålverket Borås,IQP,styck,90,2.80,015,Montering IQP,ET1,BOP2,w2,25.0,28.0,9 +P01,4501,Stålverket Borås,SR,styck,8,45.0,021,SR B/F-hall,ET1,BOP2,w2,40.0,42.0,1 +P02,4502,Kontorshus Mölndal,IQB,meter,350,1.50,011,FS IQB,ET1,BOP1,w1,30.0,28.0,20 +P02,4502,Kontorshus Mölndal,IQB,meter,350,1.50,012,Förmontering IQB,ET1,BOP1,w1,22.0,24.5,18 +P02,4502,Kontorshus Mölndal,IQB,meter,350,1.50,013,Montering IQB,ET1,BOP1,w1,18.0,17.0,16 +P02,4502,Kontorshus Mölndal,IQP,styck,70,2.70,015,Montering IQP,ET1,BOP1,w1,19.0,21.0,7 +P02,4502,Kontorshus Mölndal,SD,styck,30,3.00,018,SB B/F-hall,ET1,BOP1,w1,9.0,8.5,3 +P02,4502,Kontorshus Mölndal,IQB,meter,350,1.50,011,FS IQB,ET1,BOP1,w2,30.0,32.0,24 +P02,4502,Kontorshus Mölndal,IQB,meter,350,1.50,014,Svets o montage IQB,ET1,BOP1,w2,25.0,23.0,20 +P02,4502,Kontorshus Mölndal,SP,styck,120,1.75,019,SP B/F-hall,ET1,BOP2,w2,14.0,15.5,8 +P03,4503,Lagerhall Jönköping,IQB,meter,900,1.89,011,FS IQB,ET1,BOP1,w1,72.0,70.0,40 +P03,4503,Lagerhall Jönköping,IQB,meter,900,1.89,012,Förmontering IQB,ET1,BOP1,w1,48.0,52.0,35 +P03,4503,Lagerhall Jönköping,IQB,meter,900,1.89,013,Montering IQB,ET1,BOP1,w1,38.0,36.5,30 +P03,4503,Lagerhall Jönköping,IQB,meter,900,1.89,014,Svets o montage IQB,ET1,BOP1,w1,42.0,48.0,28 +P03,4503,Lagerhall Jönköping,SB,styck,60,6.00,018,SB B/F-hall,ET1,BOP1,w1,36.0,38.0,6 +P03,4503,Lagerhall Jönköping,IQB,meter,900,1.89,011,FS IQB,ET1,BOP1,w2,72.0,75.0,45 +P03,4503,Lagerhall Jönköping,IQP,styck,110,2.90,015,Montering IQP,ET1,BOP2,w2,32.0,30.0,11 +P03,4503,Lagerhall Jönköping,IQB,meter,900,1.89,016,Gjutning,ET1,BOP2,w2,28.0,35.0,8 +P03,4503,Lagerhall Jönköping,IQB,meter,900,1.89,017,Målning,ET1,BOP2,w3,24.0,22.0,20 +P04,4504,Parkering Helsingborg,IQB,meter,450,1.65,011,FS IQB,ET1,BOP1,w1,38.0,36.0,24 +P04,4504,Parkering Helsingborg,IQB,meter,450,1.65,012,Förmontering IQB,ET1,BOP1,w1,25.0,27.0,20 +P04,4504,Parkering Helsingborg,IQB,meter,450,1.65,013,Montering IQB,ET1,BOP1,w1,20.0,19.0,18 +P04,4504,Parkering Helsingborg,IQP,styck,55,2.85,015,Montering IQP,ET1,BOP1,w1,16.0,18.0,6 +P04,4504,Parkering Helsingborg,SB,styck,25,7.50,018,SB B/F-hall,ET1,BOP1,w1,19.0,22.0,3 +P04,4504,Parkering Helsingborg,IQB,meter,450,1.65,011,FS IQB,ET1,BOP1,w2,38.0,40.0,28 +P04,4504,Parkering Helsingborg,SP,styck,100,2.00,019,SP B/F-hall,ET1,BOP2,w2,12.0,11.0,6 +P04,4504,Parkering Helsingborg,SR,styck,12,120.0,021,SR B/F-hall,ET1,BOP2,w2,60.0,65.0,1 +P05,4505,Sjukhus Linköping ET2,IQB,meter,1200,1.85,011,FS IQB,ET2,BOP3,w1,95.0,90.0,50 +P05,4505,Sjukhus Linköping ET2,IQB,meter,1200,1.85,012,Förmontering IQB,ET2,BOP3,w1,65.0,68.0,42 +P05,4505,Sjukhus Linköping ET2,IQB,meter,1200,1.85,013,Montering IQB,ET2,BOP3,w1,50.0,48.0,38 +P05,4505,Sjukhus Linköping ET2,IQB,meter,1200,1.85,014,Svets o montage IQB,ET2,BOP3,w1,58.0,62.0,35 +P05,4505,Sjukhus Linköping ET2,IQP,styck,150,2.88,015,Montering IQP,ET2,BOP3,w1,30.0,33.0,10 +P05,4505,Sjukhus Linköping ET2,SB,styck,50,5.00,018,SB B/F-hall,ET2,BOP3,w1,25.0,28.0,5 +P05,4505,Sjukhus Linköping ET2,SD,styck,45,2.75,018,SB B/F-hall,ET2,BOP3,w1,12.0,11.5,4 +P05,4505,Sjukhus Linköping ET2,IQB,meter,1200,1.85,011,FS IQB,ET2,BOP3,w2,95.0,98.0,55 +P05,4505,Sjukhus Linköping ET2,IQB,meter,1200,1.85,016,Gjutning,ET2,BOP3,w2,35.0,40.0,12 +P05,4505,Sjukhus Linköping ET2,IQB,meter,1200,1.85,017,Målning,ET2,BOP3,w2,28.0,26.0,25 +P05,4505,Sjukhus Linköping ET2,SR,styck,20,274.0,021,SR B/F-hall,ET2,BOP3,w3,120.0,115.0,2 +P06,4506,Skola Uppsala,IQB,meter,500,1.60,011,FS IQB,ET1,BOP1,w2,40.0,38.0,26 +P06,4506,Skola Uppsala,IQB,meter,500,1.60,012,Förmontering IQB,ET1,BOP1,w2,28.0,30.0,22 +P06,4506,Skola Uppsala,IQB,meter,500,1.60,013,Montering IQB,ET1,BOP1,w2,22.0,20.0,18 +P06,4506,Skola Uppsala,IQP,styck,80,2.75,015,Montering IQP,ET1,BOP1,w2,22.0,24.0,8 +P06,4506,Skola Uppsala,SB,styck,35,4.50,018,SB B/F-hall,ET1,BOP1,w2,16.0,18.0,4 +P06,4506,Skola Uppsala,SP,styck,140,1.50,019,SP B/F-hall,ET1,BOP2,w3,14.0,12.0,10 +P07,4507,Idrottshall Västerås,HSQ,meter,400,2.05,011,FS IQB,ET1,BOP1,w1,45.0,42.0,22 +P07,4507,Idrottshall Västerås,HSQ,meter,400,2.05,012,Förmontering IQB,ET1,BOP1,w1,30.0,33.0,18 +P07,4507,Idrottshall Västerås,HSQ,meter,400,2.05,014,Svets o montage IQB,ET1,BOP1,w1,35.0,32.0,16 +P07,4507,Idrottshall Västerås,SB,styck,45,3.50,018,SB B/F-hall,ET1,BOP1,w1,16.0,18.0,5 +P07,4507,Idrottshall Västerås,HSQ,meter,400,2.05,011,FS IQB,ET1,BOP1,w2,45.0,48.0,26 +P07,4507,Idrottshall Västerås,HSQ,meter,400,2.05,016,Gjutning,ET1,BOP2,w2,20.0,22.0,5 +P07,4507,Idrottshall Västerås,HSQ,meter,400,2.05,017,Målning,ET1,BOP2,w3,18.0,16.0,15 +P08,4508,Bro E6 Halmstad,IQB,meter,800,1.80,011,FS IQB,ET1,BOP1,w1,65.0,62.0,36 +P08,4508,Bro E6 Halmstad,IQB,meter,800,1.80,012,Förmontering IQB,ET1,BOP1,w1,42.0,45.0,30 +P08,4508,Bro E6 Halmstad,IQB,meter,800,1.80,013,Montering IQB,ET1,BOP1,w1,35.0,38.0,25 +P08,4508,Bro E6 Halmstad,IQB,meter,800,1.80,014,Svets o montage IQB,ET1,BOP1,w1,40.0,44.0,22 +P08,4508,Bro E6 Halmstad,SP,styck,200,2.50,019,SP B/F-hall,ET1,BOP1,w1,20.0,18.0,8 +P08,4508,Bro E6 Halmstad,IQB,meter,800,1.80,011,FS IQB,ET1,BOP1,w2,65.0,68.0,42 +P08,4508,Bro E6 Halmstad,IQP,styck,95,2.93,015,Montering IQP,ET1,BOP2,w2,28.0,30.0,10 +P08,4508,Bro E6 Halmstad,IQB,meter,800,1.80,016,Gjutning,ET1,BOP2,w3,22.0,25.0,8 +P08,4508,Bro E6 Halmstad,SR,styck,15,180.0,021,SR B/F-hall,ET1,BOP2,w3,90.0,85.0,2 \ No newline at end of file diff --git a/submissions/vansh-singhal/level6/factory_workers.csv b/submissions/vansh-singhal/level6/factory_workers.csv new file mode 100644 index 000000000..3110285cc --- /dev/null +++ b/submissions/vansh-singhal/level6/factory_workers.csv @@ -0,0 +1,15 @@ +worker_id,name,role,primary_station,can_cover_stations,certifications,hours_per_week,type +W01,Erik Lindberg,Operator,011,"011,012","MIG/MAG,TIG,ISO 9606",40,permanent +W02,Anna Berg,Operator,011,"011,014","MIG/MAG,TIG",40,permanent +W03,Lars Jensen,Operator,012,"012,013","Surface treatment,CE marking",40,permanent +W04,Maria Stone,Operator,013,"013","Blasting,Surface protection",40,permanent +W05,Johan Peters,Operator,014,"014,015","Hydraulics,Mechanics,Crane",40,permanent +W06,Karen Nilsen,Inspector,015,"015","SIS,SS-EN 1090,NDT",40,permanent +W07,Per Hansen,Operator,016,"016,017","Casting,Formwork",40,permanent +W08,Sofia Arden,Operator,017,"017","Surface treatment,Spray painting",40,permanent +W09,Magnus Stone,Operator,018,"018,019","Sheet metal,Assembly",40,permanent +W10,Elin Frank,Operator,019,"019,018","Assembly,Welding",32,permanent +W11,Victor Elm,Foreman,all,"011,012,013,014,015,016,017,018,019,021","Leadership,CE,ISO 9001",45,permanent +W12,Lena Dale,Quality Manager,015,"015","ISO 9001,SS-EN 1090,Audit",40,permanent +W13,Ahmed Hassan,Operator,011,"011","MIG/MAG",40,hired +W14,Petra Steen,Operator,012,"012,013","Surface treatment",40,hired \ No newline at end of file diff --git a/submissions/vansh-singhal/level6/requirements.txt b/submissions/vansh-singhal/level6/requirements.txt new file mode 100644 index 000000000..a3479b2ec --- /dev/null +++ b/submissions/vansh-singhal/level6/requirements.txt @@ -0,0 +1,5 @@ +streamlit +neo4j +python-dotenv +pandas +plotly \ No newline at end of file diff --git a/submissions/vansh-singhal/level6/seed_graph.py b/submissions/vansh-singhal/level6/seed_graph.py new file mode 100644 index 000000000..93037269c --- /dev/null +++ b/submissions/vansh-singhal/level6/seed_graph.py @@ -0,0 +1,326 @@ +import os +import pandas as pd +from neo4j import GraphDatabase +from dotenv import load_dotenv + +load_dotenv() + +URI = os.getenv("NEO4J_URI") +USER = os.getenv("NEO4J_USER") +PASSWORD = os.getenv("NEO4J_PASSWORD") + + +def normalize_station(code) -> str: + """Normalize station code to 3-digit zero-padded string. e.g. 11 → '011'""" + try: + return f"{int(str(code).strip()):03d}" + except (ValueError, TypeError): + return str(code).strip() + + +def create_constraints(session): + constraints = [ + "CREATE CONSTRAINT IF NOT EXISTS FOR (p:Project) REQUIRE p.project_id IS UNIQUE", + "CREATE CONSTRAINT IF NOT EXISTS FOR (s:Station) REQUIRE s.station_code IS UNIQUE", + "CREATE CONSTRAINT IF NOT EXISTS FOR (p:Product) REQUIRE p.product_type IS UNIQUE", + "CREATE CONSTRAINT IF NOT EXISTS FOR (w:Worker) REQUIRE w.worker_id IS UNIQUE", + "CREATE CONSTRAINT IF NOT EXISTS FOR (w:Week) REQUIRE w.week_id IS UNIQUE", + "CREATE CONSTRAINT IF NOT EXISTS FOR (c:Certification) REQUIRE c.name IS UNIQUE", + "CREATE CONSTRAINT IF NOT EXISTS FOR (e:Etapp) REQUIRE e.etapp_id IS UNIQUE", + ] + for c in constraints: + session.run(c) + print("✓ Constraints created") + + +def seed_projects(session, prod: pd.DataFrame): + rows = prod[["project_id", "project_number", "project_name"]].drop_duplicates() + for _, row in rows.iterrows(): + session.run( + """ + MERGE (p:Project {project_id: $project_id}) + SET p.project_number = $project_number, + p.project_name = $project_name, + p.name = $project_name + """, + project_id=row["project_id"], + project_number=int(row["project_number"]), + project_name=row["project_name"], + ) + print(f"✓ {len(rows)} Project nodes") + + +def seed_stations(session, prod: pd.DataFrame): + rows = prod[["station_code", "station_name"]].drop_duplicates() + for _, row in rows.iterrows(): + session.run( + """ + MERGE (s:Station {station_code: $station_code}) + SET s.station_name = $station_name, + s.name = $station_name + """, + station_code=row["station_code"], + station_name=row["station_name"], + ) + print(f"✓ {len(rows)} Station nodes") + + +def seed_products(session, prod: pd.DataFrame): + rows = prod[["product_type", "unit"]].drop_duplicates() + for _, row in rows.iterrows(): + session.run( + """ + MERGE (p:Product {product_type: $product_type}) + SET p.unit = $unit, + p.name = $product_type + """, + product_type=row["product_type"], + unit=row["unit"], + ) + print(f"✓ {len(rows)} Product nodes") + + +def seed_etapps(session, prod: pd.DataFrame): + for etapp in prod["etapp"].unique(): + session.run( + "MERGE (e:Etapp {etapp_id: $etapp_id})", + etapp_id=etapp, + ) + print(f"✓ {prod['etapp'].nunique()} Etapp nodes") + + +def seed_weeks(session, capacity: pd.DataFrame): + for _, row in capacity.iterrows(): + session.run( + """ + MERGE (w:Week {week_id: $week_id}) + SET w.own_staff_count = $own_staff_count, + w.hired_staff_count = $hired_staff_count, + w.own_hours = $own_hours, + w.hired_hours = $hired_hours, + w.overtime_hours = $overtime_hours, + w.total_capacity = $total_capacity, + w.total_planned = $total_planned, + w.deficit = $deficit + """, + week_id=row["week"], + own_staff_count=int(row["own_staff_count"]), + hired_staff_count=int(row["hired_staff_count"]), + own_hours=int(row["own_hours"]), + hired_hours=int(row["hired_hours"]), + overtime_hours=int(row["overtime_hours"]), + total_capacity=int(row["total_capacity"]), + total_planned=int(row["total_planned"]), + deficit=int(row["deficit"]), + ) + print(f"✓ {len(capacity)} Week nodes") + + +def seed_workers_and_certs(session, workers: pd.DataFrame): + for _, row in workers.iterrows(): + # Worker node + session.run( + """ + MERGE (w:Worker {worker_id: $worker_id}) + SET w.name = $name, + w.role = $role, + w.hours_per_week = $hours_per_week, + w.type = $type + """, + worker_id=row["worker_id"], + name=row["name"], + role=row["role"], + hours_per_week=int(row["hours_per_week"]), + type=row["type"], + ) + + # Certifications + HAS_CERTIFICATION + for cert in str(row["certifications"]).split(","): + cert = cert.strip() + if not cert: + continue + session.run( + """ + MERGE (c:Certification {name: $cert}) + WITH c + MATCH (w:Worker {worker_id: $worker_id}) + MERGE (w)-[:HAS_CERTIFICATION]->(c) + """, + cert=cert, + worker_id=row["worker_id"], + ) + + # WORKS_AT (primary station — skip "all") + primary = str(row["primary_station"]).strip() + if primary != "all": + sc = normalize_station(primary) + session.run( + """ + MATCH (w:Worker {worker_id: $wid}) + MATCH (s:Station {station_code: $sc}) + MERGE (w)-[:WORKS_AT]->(s) + """, + wid=row["worker_id"], + sc=sc, + ) + + # CAN_COVER + for station in str(row["can_cover_stations"]).split(","): + sc = normalize_station(station.strip()) + session.run( + """ + MATCH (w:Worker {worker_id: $wid}) + MATCH (s:Station {station_code: $sc}) + MERGE (w)-[:CAN_COVER]->(s) + """, + wid=row["worker_id"], + sc=sc, + ) + + print(f"✓ {len(workers)} Worker nodes + certifications + WORKS_AT + CAN_COVER") + + +def seed_relationships(session, prod: pd.DataFrame): + rel_counts = {"BELONGS_TO": 0, "PRODUCES": 0, "PROCESSED_AT": 0, + "SCHEDULED_AT": 0, "SCHEDULED_IN": 0} + + # BELONGS_TO (Project → Etapp) + for _, row in prod[["project_id", "etapp"]].drop_duplicates().iterrows(): + session.run( + """ + MATCH (p:Project {project_id: $pid}) + MATCH (e:Etapp {etapp_id: $eid}) + MERGE (p)-[:BELONGS_TO]->(e) + """, + pid=row["project_id"], + eid=row["etapp"], + ) + rel_counts["BELONGS_TO"] += 1 + + # PRODUCES (Project → Product) + for _, row in prod[["project_id", "product_type", "quantity", + "unit_factor", "unit"]].drop_duplicates( + subset=["project_id", "product_type"]).iterrows(): + session.run( + """ + MATCH (p:Project {project_id: $pid}) + MATCH (d:Product {product_type: $ptype}) + MERGE (p)-[r:PRODUCES]->(d) + SET r.quantity = $qty, + r.unit_factor = $uf, + r.unit = $unit + """, + pid=row["project_id"], + ptype=row["product_type"], + qty=float(row["quantity"]), + uf=float(row["unit_factor"]), + unit=row["unit"], + ) + rel_counts["PRODUCES"] += 1 + + # PROCESSED_AT (Product → Station) + for _, row in prod[["product_type", "station_code"]].drop_duplicates().iterrows(): + session.run( + """ + MATCH (d:Product {product_type: $ptype}) + MATCH (s:Station {station_code: $sc}) + MERGE (d)-[:PROCESSED_AT]->(s) + """, + ptype=row["product_type"], + sc=row["station_code"], + ) + rel_counts["PROCESSED_AT"] += 1 + + # SCHEDULED_AT (Project → Station, one per production row) + for _, row in prod.iterrows(): + session.run( + """ + MATCH (p:Project {project_id: $pid}) + MATCH (s:Station {station_code: $sc}) + MERGE (p)-[r:SCHEDULED_AT {week: $week, product_type: $ptype}]->(s) + SET r.planned_hours = $planned, + r.actual_hours = $actual, + r.completed_units = $completed, + r.etapp = $etapp, + r.bop = $bop + """, + pid=row["project_id"], + sc=row["station_code"], + week=row["week"], + ptype=row["product_type"], + planned=float(row["planned_hours"]), + actual=float(row["actual_hours"]), + completed=int(row["completed_units"]), + etapp=row["etapp"], + bop=row["bop"], + ) + rel_counts["SCHEDULED_AT"] += 1 + + # SCHEDULED_IN (Project → Week) + for _, row in prod[["project_id", "week"]].drop_duplicates().iterrows(): + session.run( + """ + MATCH (p:Project {project_id: $pid}) + MATCH (w:Week {week_id: $wid}) + MERGE (p)-[:SCHEDULED_IN]->(w) + """, + pid=row["project_id"], + wid=row["week"], + ) + rel_counts["SCHEDULED_IN"] += 1 + + for rel, n in rel_counts.items(): + print(f" {rel}: {n}") + print(f"✓ Relationships seeded") + + +def print_summary(session): + nodes = session.run("MATCH (n) RETURN count(n) AS c").single()["c"] + rels = session.run("MATCH ()-[r]->() RETURN count(r) AS c").single()["c"] + labels = [r["label"] for r in session.run("CALL db.labels() YIELD label")] + rel_types = [r["relationshipType"] for r in + session.run("CALL db.relationshipTypes() YIELD relationshipType")] + print(f"\n── Graph Summary ──────────────────────") + print(f" Nodes: {nodes}") + print(f" Relationships: {rels}") + print(f" Node labels ({len(labels)}): {labels}") + print(f" Rel types ({len(rel_types)}): {rel_types}") + print(f"───────────────────────────────────────") + + +def seed(): + # Load CSVs + prod = pd.read_csv("factory_production.csv") + workers = pd.read_csv("factory_workers.csv") + capacity = pd.read_csv("factory_capacity.csv") + + # Normalize station codes to "011" format + prod["station_code"] = prod["station_code"].apply(normalize_station) + + print("\n🔌 Connecting to Neo4j…") + driver = GraphDatabase.driver(URI, auth=(USER, PASSWORD)) + + with driver.session() as s: + print("\n📐 Creating constraints…") + create_constraints(s) + + print("\n🌱 Seeding nodes…") + seed_projects(s, prod) + seed_stations(s, prod) + seed_products(s, prod) + seed_etapps(s, prod) + seed_weeks(s, capacity) + seed_workers_and_certs(s, workers) + + print("\n🔗 Seeding relationships…") + seed_relationships(s, prod) + + print("\n📊 Final graph state…") + print_summary(s) + + driver.close() + print("\n✅ Done — graph is ready!\n") + + +if __name__ == "__main__": + seed() \ No newline at end of file