From d108850c1a3aac4b5d6c4671e4328527f7b38a4a Mon Sep 17 00:00:00 2001 From: Hugo Campos <2074396+hugooc@users.noreply.github.com> Date: Tue, 26 May 2026 09:07:01 -0700 Subject: [PATCH] Add list_care_team and list_implants read tools Two new read tools, both built from existing complete-body HAR captures (no fresh capture needed). list_care_team: the home-page "Care Team and Recent Providers" roster (PCP, specialists, recent clinicians) with specialty, relationship, and per-provider capability flags. Legacy /mychartcn/Clinical/CareTeam/Load + LoadExternal; one CSRF token covers both POSTs, external is best-effort. can_message reflects only the panel's inline button, not reachability; documented so callers route messaging through list_message_recipients. list_implants: implanted/explanted devices (pacemakers, ICDs, leads, IOLs, ortho hardware) with manufacturer, model, serial, UDI/SDI, area, laterality, status, and implant/explant procedure. Single CSRF-gated POST to /mychartcn/api/implants/GetImplants. Group list is a body-area ordering index ("zzz" = sort-unknown-last sentinel); detail lives in implantList. isoDate is a display-string misnomer, so date_iso is derived. Devices can appear twice (curated + raw feed record, same serial); returned faithfully, not deduped. 567 tests passing, ruff + mypy clean. Co-Authored-By: Claude Opus 4.7 (1M context) --- CLAUDE.md | 8 +- docs/research/endpoints/care_team.md | 142 +++++++++ docs/research/endpoints/implants.md | 137 +++++++++ openkp/src/openkp/mcp_server.py | 68 +++++ openkp/src/openkp/scrapers/care_team.py | 201 +++++++++++++ openkp/src/openkp/scrapers/implants.py | 238 +++++++++++++++ openkp/tests/test_care_team.py | 370 +++++++++++++++++++++++ openkp/tests/test_implants.py | 384 ++++++++++++++++++++++++ 8 files changed, 1546 insertions(+), 2 deletions(-) create mode 100644 docs/research/endpoints/care_team.md create mode 100644 docs/research/endpoints/implants.md create mode 100644 openkp/src/openkp/scrapers/care_team.py create mode 100644 openkp/src/openkp/scrapers/implants.py create mode 100644 openkp/tests/test_care_team.py create mode 100644 openkp/tests/test_implants.py diff --git a/CLAUDE.md b/CLAUDE.md index a8bae9c..a27bbf2 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -19,7 +19,7 @@ What this means for current work: See `DESIGN.md` §1 (audience), §5 (Phase 4 / 4.5), §10 (distribution strategy). -## Current state (2026-05-11) +## Current state (2026-05-26) - **Phase 0 scaffold:** complete. - **Phase 1 auth:** complete. Silent session reuse via `~/.openkp/session.json` + httpx probe to `/mychartcn/keepalive.asp`. Interactive first-run Chromium, silent after. See ADR-005 and `docs/recon/session-2.md`. @@ -32,6 +32,8 @@ See `DESIGN.md` §1 (audience), §5 (Phase 4 / 4.5), §10 (distribution strategy - `emergency_contacts` (closes Phase 2) ✅ shipped + live-verified. Returns the full relationship roster — emergency contacts, DPOAHC healthcare agents, conservators — from a single Epic/MyChart endpoint. See `docs/research/endpoints/emergency_contacts.md`. - `list_appointments` + `list_past_visits` ✅ shipped + live-verified 2026-05-04. Upcoming/in-progress visits (single-call, no pagination) and past visits (paginated walker with `max_pages`, `page_size`, `until_iso` bounds). Both back on the legacy `/mychartcn/Visits/VisitsList/` family. **Live-verified twice: "when's my next appointment" returned next visit cleanly; "how many appointments in 2025, split virtual vs in-person" walked past visits and answered correctly (9 clinical encounters: 6 in-person + 3 virtual).** Filter HAR yielded the `numVisitsToRetrieve` discovery (default page=10 in front end, but Kaiser honors up to 78 — OpenKP defaults to 50, 5x fewer round trips for multi-year history). Filter-by-provider would be a future extension via `LoadFilterOptions` (see `appointments.md` "Filter index"). Session journal in sidecar. - `read_visit_notes` + `download_visit_avs_pdf` ✅ shipped + live-verified 2026-05-04. Clinical notes (provider chart notes, progress notes, op notes) plus the rendered After Visit Summary, for one past visit. Four-step server-side chain (`GetVisitDetailsPast` → `GetVisitNotes` → per-note `ValidateVisitNote` + `LoadReportContent(contextINI=HNO)` → `LoadReportContent(reportMnemonic=AMB_AVS)`) collapsed into one tool. **Two-CSRF gotcha:** Kaiser scopes anti-forgery tokens by referer; ValidateVisitNote uses `/visits/note?csn=...` referer while everything else uses `/visits/past-details?csn=...`. AVS PDF download follows the labs-PDF pattern (GetDocumentDetails → DownloadOrStream). HTML-stripped to plain text on `content_text`, raw HTML preserved on `content_html`. See `docs/research/endpoints/visit_notes.md`. Session journal in sidecar. + - `list_care_team` ✅ shipped + live-verified 2026-05-26. The home-page "Care Team and Recent Providers" roster — PCP, specialists, recently-seen clinicians — each with specialty, relationship label, and per-provider capability flags. Strict superset of `get_profile`'s single PCP field. Back on the legacy `/mychartcn/Clinical/CareTeam/Load` + `LoadExternal` family (one CSRF token covers both POSTs; external providers are best-effort). Built from an existing complete-body capture, no fresh HAR needed. **Gotcha documented:** `can_message` reflects only the care-team panel's inline button, NOT reachability — messaging still runs through `list_message_recipients` + `send_message`. See `docs/research/endpoints/care_team.md`. Session journal in sidecar (session-21). + - `list_implants` ✅ shipped + live-verified 2026-05-26. Implanted/explanted devices (pacemakers, ICDs, leads, IOLs, ortho hardware) with manufacturer, model, serial, UDI/SDI, body area, laterality, status, and implant/explant procedure (date + derived `date_iso` + provider). Single CSRF-gated POST to `/mychartcn/api/implants/GetImplants`, no pagination. `implantGroupList` is a body-area ordering index (`"zzz"` = Epic's sort-unknown-last sentinel); detail lives in `implantList`. **Live finding:** the newest device can appear twice (curated "Cardiac Implant" record + raw device-feed "Pacemaker" record, same serial) — OpenKP returns both faithfully, callers dedupe on `(serial, date_iso)`. `isoDate` is a display-string misnomer (same trap as the AVS date). See `docs/research/endpoints/implants.md`. Session journal in sidecar (session-21). - **Phase 3 write tools:** underway. - `request_refill(medication_id, confirm=False)` ✅ shipped 2026-04-25 (mail-only v1). Two-call confirm pattern, audit log + dry-run scaffolding. **Preview path live-verified, commit path pending next real refill cycle.** See `docs/recon/session-11.md`. - `track_refill_order(order_number)` ✅ shipped + live-verified 2026-04-27 (read sibling to request_refill). Single GET against `/orderDetails`. Surfaces order status (INPROGRESS / SHIPPED / DELIVERED), per-Rx detail, shipping address, payment last-4 / type / expiry, and a derived `tracking_ids` list. **Both INPROGRESS (HAR) and SHIPPED (live, 2026-04-27) verified against real Kaiser data.** Confirmed: `copay` on rxList entries populates post-adjudication (null on INPROGRESS, real $ once shipped), and `SHIPPED` is a real intermediate state where `digitalStatus="Complete"` even though `trackingId` is still empty (carrier handoff lags by hours/days). DELIVERED transition still unverified. See `docs/recon/session-13.md`. @@ -40,7 +42,7 @@ See `DESIGN.md` §1 (audience), §5 (Phase 4 / 4.5), §10 (distribution strategy - `download_message_attachment` ✅ shipped + live-verified 2026-04-25 (session 12). Two-step chain (`GetDocumentDetailsLegacy` → binary GET). Saves to `~/.openkp/downloads/`. Genetic panels and other clinically important documents arrive as message attachments — Kaiser doesn't surface them in test-results. - `list_messages(deep_search=True, max_pages=30)` ✅ shipped + live-verified 2026-04-25 (session 12). Walks pagination via `localSummary.oldestSearchedInstantISO` because Kaiser's `searchQuery` is page-scoped, not index-scoped (default search misses anything older than the most recent ~50 threads). Use this when looking for archival messages. See `docs/research/endpoints/messages.md` "Search" section and `docs/recon/session-12.md`. -**Tests:** 527 passing. Run with `.venv/bin/pytest -q` from `openkp/`. +**Tests:** 567 passing. Run with `.venv/bin/pytest -q` from `openkp/`. **CI:** GitHub Actions runs ruff + mypy + pytest on push/PR (Python 3.11/3.12/3.13). See `.github/workflows/ci.yml`. Status badge in root README. @@ -61,6 +63,8 @@ Public release is done. Open code work is below. - **OOC awareness:** the recipient catalog carries `oocDateISO` and `oocContextString` for providers who are out of office. Surface those as fields on `MessageRecipient` so the preview can flag "your provider is out of office until X" before the user commits. - **`body_preview` rename or cap:** today's field name suggests truncation but the implementation only truncates above 200 chars. Either rename to `body` (full echo always) or always cap with `...` suffix when longer. +3. **`list_access_log`** — who/what accessed your record, incl. third-party apps and connected health services pulling specific data classes with timestamps. Strongly on-mission for the patient-owned-data framing. **Already has complete bodies** in `problems-allergies-documents-and-more.har` (`GetPortalAccessLogEntries` + `GetThirdPartyAccessLogEntries`, both legacy `/mychartcn/api/access-logs/`), so no fresh capture needed — same lucky situation as care_team/implants. The catch vs those two: it's paginated via a `startingLine` cursor and result sets get large (a connected app can log hundreds of "Test Result Details" reads), so it needs a bounded walker like `list_past_visits`. The portal-self log is boring (all "you accessed your own record"); the value is the third-party log. + **Loose ends (optional, not blocking):** - ~~**`read_visit_notes` `iso` field is inconsistent.**~~ **Fixed 2026-05-10 (session 19).** AVS branch now parses the encounter-date display string ("Dec 04, 2025") to date-only ISO ("2025-12-04") via `_display_date_to_iso`. Clinical notes still carry full timestamp from `noteList[i].iso`. Field doc updated to spell out the two precision levels. Test pinned: `tests/test_visit_notes.py` asserts `avs.iso == "2025-01-01"` for the fixture. - **Live-verify the `is_telemedicine` heuristic on `list_appointments` / `list_past_visits`.** Recon had zero virtual visits, so the heuristic (Telemedicine OR EVisit OR CanShowTelemedicine) is inferential. Cowork-Claude bypassed it by reading `visit_type` directly ("Telephone", "Video Visit"), but next time Hugo's calendar has a video or phone visit, peek at the dump to see whether the heuristic actually fires. diff --git a/docs/research/endpoints/care_team.md b/docs/research/endpoints/care_team.md new file mode 100644 index 0000000..8b8b4fc --- /dev/null +++ b/docs/research/endpoints/care_team.md @@ -0,0 +1,142 @@ +# Care team endpoint + +Source HAR: `docs/research/captures/kp-care-team-1.har`, 2026-04-23 (full +response bodies preserved). Implemented as `list_care_team` 2026-05-26. + +## What this is + +The "Care Team and Recent Providers" panel on the MyChart home page (right +column): the patient's primary care provider, specialists, and recently-seen +clinicians. Page route: `/mychartcn/clinical/careteam`. + +This is a strict superset of what `get_profile` exposes. `get_profile` returns +only the PCP. This surface returns the whole care relationship roster plus +per-provider capability flags (can you message them, can you self-schedule). + +Two endpoints back it, both on the legacy `/mychartcn/Clinical/CareTeam/Load*` +family — the same `/mychartcn/Clinical//Load*` shape as problems, +allergies, and appointments: + +| Feature | Endpoint | Status | +| --- | --- | --- | +| Internal KP providers | `POST /mychartcn/Clinical/CareTeam/Load` | ✅ Mapped + shipped. Real bodies. | +| External (non-KP) providers | `POST /mychartcn/Clinical/CareTeam/LoadExternal` | ✅ Mapped + shipped. Returned empty list in recon (no external providers in the captured data). Entry shape assumed identical to internal. | + +## Auth / anti-forgery + +Standard `/mychartcn/` CSRF contract (see `messages.md` / `problems.md`). Fetch +one `__RequestVerificationToken` and reuse it for **both** POSTs — in the HAR, +`Load` and `LoadExternal` carried the byte-identical token. Referer for both is +`https://healthy.kaiserpermanente.org/mychartcn/clinical/careteam`. + +Both are GET-shaped POSTs: **no request body**, everything is in query params. + +**Query params** (`Load`): + +``` +hfrId= (empty) +sources= (empty) +actions= (empty) +isPrimaryStandalone=true +ComponentNumber=2 +noCache= +``` + +`LoadExternal` is identical minus `isPrimaryStandalone`. + +## Response shape + +```json +{ + "ProvidersList": [ + { + "ID": "WP-24...", // opaque Epic handle + "Name": "PAT EXAMPLE MD", // fabricated for this doc + "Photo": "https://www.permanente.net/pmdb/photosync/_photoweb.jpg", + "NationalProviderID": "WP-24...", // opaque handle, NOT a real NPI + "WebPageUrl": "https://mydoctor.kaiserpermanente.org/ncal/doctor/", + "InfoBlurbUrl": "https://healthy.kaiserpermanente.org/hmdo/...", + "AboutMeBlurb": [], + "CanViewProviderDetails": true, + "CanDirectSchedule": false, + "CanRequestAppointment": false, + "CanMessage": false, + "CommCenterMessageUrl": "", + "CanRequestCustomAppt": false, + "HasNoProviderRecord": false, + "IsNewSchedulingEnabled": true, + "Specialty": "Family Practice", + "Relation": "Primary Care Provider", // "Cardiologist", etc. + "SchedulableVisitTypes": null, + "DepartmentID": "WP-24...", // opaque Epic handle + "Organizations": null, + "IsExternal": false, + "CareTeamStatus": 0, // raw int enum; 0 for all observed + "CanHideProvider": true + } + ], + "DescriptiveTitle": "Care Team and Recent Providers", + "TabColorClass": "color1", + "IsCustomApptReqEnabled": false, + "CustomRequestAppointmentLink": "showform&formname=ApptReqCntr" +} +``` + +(Provider names and all `WP-24...` IDs above are fabricated/elided — the real +HAR contains the member's actual care relationships, which are PHI-adjacent.) + +## Field mapping (scraper → `CareTeamProvider`) + +| Model field | Source key | Notes | +| --- | --- | --- | +| `id` | `ID` | Opaque Epic handle. Required — entry dropped if missing. | +| `name` | `Name` | Display name incl. credential suffix. | +| `specialty` | `Specialty` | e.g. "Family Practice", "Cardiology". | +| `relation` | `Relation` | e.g. "Primary Care Provider", "Cardiologist". Populated, unlike the messages recipient catalog's null role. | +| `department_id` | `DepartmentID` | Opaque handle; pairs with scheduling. | +| `is_external` | `IsExternal` | True for `LoadExternal` entries. | +| `can_message` | `CanMessage` | Panel's inline quick-message button only — NOT reachability. See note below. | +| `can_schedule` | `CanDirectSchedule` | This panel's button only. | +| `can_request_appointment` | `CanRequestAppointment` | This panel's button only. | +| `can_view_details` | `CanViewProviderDetails` | | +| `photo_url` | `Photo` | permanente.net headshot URL. | +| `provider_page_url` | `WebPageUrl` | Public mydoctor.kaiserpermanente.org bio. | +| `care_team_status` | `CareTeamStatus` | Raw int; enum meaning unknown, 0 for all observed. | + +Fields intentionally dropped: `NationalProviderID` (an opaque handle, not a +true NPI — misleading to surface), `InfoBlurbUrl`, `AboutMeBlurb`, +`CommCenterMessageUrl`, `IsNewSchedulingEnabled`, `SchedulableVisitTypes`, +`Organizations`, `HasNoProviderRecord`, `CanRequestCustomAppt`, `CanHideProvider`. +Easy to add later if a tool needs them. + +## Behavior notes + +- **`can_message` is not reachability.** It mirrors the care-team panel's inline + quick-message button, which Kaiser can leave off even for providers you can + message just fine. Messaging actually runs through a different surface + (`list_message_recipients` + `send_message`, the "Message your care team" + compose flow). A provider with `can_message=False` here may still be a valid + recipient there. Observed live 2026-05-26: both providers in the captured + data came back `can_message=False` on this panel, which an LLM read as "outreach must go + through their department" — misleading, since they're messageable via + `send_message`. The same panel-button-only caveat applies to `can_schedule` + and `can_request_appointment`. The tool docstring and model comments now spell + this out so callers don't treat these flags as gates. +- `LoadExternal` is best-effort in the scraper: if it errors, we still return + the internal roster. External providers are a bonus, not the primary data. +- The external entry shape is **assumed identical** to internal — recon had an + empty external list, so it is untested against real external data. The parser + is defensive (never raises) so a shape surprise degrades to partial/empty. +- No pagination. Kaiser returns the full roster in one call each. + +## Open questions / future work + +- **Live-verify external providers.** Need a member who has a non-KP provider + on file to confirm the `LoadExternal` entry shape matches internal. +- **`CareTeamStatus` enum.** Only `0` observed. Could distinguish + active/inactive/recent — capture a roster with a dropped provider to learn. +- **`relation` → recipient linkage.** The care team `id` is a different opaque + handle than the `send_message` recipient catalog id. If we ever want + "message my cardiologist" to chain `list_care_team` → `send_message`, we + need to confirm whether the two ID spaces are reconcilable or whether + messaging must always go through `list_message_recipients`. diff --git a/docs/research/endpoints/implants.md b/docs/research/endpoints/implants.md new file mode 100644 index 0000000..238133c --- /dev/null +++ b/docs/research/endpoints/implants.md @@ -0,0 +1,137 @@ +# Implants endpoint + +Source HAR: `docs/research/captures/problems-allergies-and-more.har`, 2026-04-23 +(full response body preserved). Implemented as `list_implants` 2026-05-26. + +## What this is + +The MyChart "Implants" device list (page route `/mychartcn/app/implants`): +implanted and explanted medical devices — pacemakers, ICDs and leads, stents, +intraocular lenses, orthopedic hardware, and similar. A new data class for +OpenKP. Useful for MRI-safety questions, device recall lookups, and sharing +exact device specs (manufacturer / model / serial / UDI) with a non-KP +provider. + +Single legacy `/mychartcn/api/` POST, same `LoadListData`-style family as +problems and allergies. No pagination — the full list comes in one call. + +| Feature | Endpoint | Status | +| --- | --- | --- | +| Full implant list + per-device detail | `POST /mychartcn/api/implants/GetImplants` body `{}` | ✅ Mapped + shipped. Real body. | + +## Auth / anti-forgery + +Standard `/mychartcn/` CSRF contract (see `problems.md`). Fetch one +`__RequestVerificationToken`, send it as a header. Referer is +`https://healthy.kaiserpermanente.org/mychartcn/app/implants`. Request body is +the empty object `{}`. No query params. + +## Response shape + +Two parallel structures: + +- **`implantGroupList`** — a body-area ordering index: `[{area, implantIDs[]}]`. + The literal area `"zzz"` is Epic's sentinel that sorts unknown-area devices + last. We use this list only for ordering. +- **`implantList`** — the authoritative per-device detail, a dict keyed by + device id. Every field below comes from here. +- `communityActive` — boolean, ignored. + +```json +{ + "implantGroupList": [ + {"area": "Chest", "implantIDs": [""]}, + {"area": "Eye", "implantIDs": ["", ""]}, + {"area": "zzz", "implantIDs": ["", ...]} + ], + "implantList": { + "": { + "id": "WP-24...", // opaque Epic handle + "name": "Fake Pacemaker Model Z", // fabricated for this doc + "type": "Pacemaker", // "Cardiac Implant", "Ophthalmology", ... + "area": "Chest", // "" when unknown (→ null) + "laterality": "Left", // "Right", "" (→ null) + "status": "Implanted", + "isExplant": false, + "isExternal": false, + "manufacturer": "ACME CARDIAC", + "model": "FAKE-PACE", + "serial": "SN-CHEST-9", + "udi": "(01)...(17)...(21)...", // full barcode; often "" for older devices + "sdi": "00000000000000", // GTIN portion of the UDI + "lot": "", + "comments": [], // empty in all observed data + "description": [], // empty in all observed data + "organizationLinks": [], // ignored + "implantProcedure": { + "isoDate": "January 3, 2024", // MISNOMER: a display string, not ISO + "deviceCount": "1", // string, or "" + "provider": "DR FAKE", + "facility": "Fake Surgery Center" + }, + "explantProcedure": {"isoDate": "", "deviceCount": "", "provider": "", "facility": ""} + } + }, + "communityActive": false +} +``` + +(All device names, models, serials, and IDs above are fabricated/elided — the +real HAR contains the member's actual implanted devices, which are PHI.) + +## Field mapping (scraper → `Implant`) + +| Model field | Source key | Notes | +| --- | --- | --- | +| `id` | `id` (fallback: map key) | Required — device dropped if neither present. | +| `name` | `name` | | +| `type` | `type` | "Pacemaker", "Cardiac Implant", "Ophthalmology", ... | +| `area` | `area` | `""` → null. The `"zzz"` group sentinel never reaches here. | +| `laterality` | `laterality` | "Left" / "Right" / null. | +| `status` | `status` | "Implanted", "Explanted". | +| `is_explant` | `isExplant` | | +| `is_external` | `isExternal` | True for non-KP-sourced records. | +| `manufacturer` / `model` / `serial` | same | | +| `udi` | `udi` | Full UDI barcode. Empty for older devices (pre-UDI era). | +| `sdi` | `sdi` | Device-identifier portion of the UDI. | +| `lot` | `lot` | | +| `comments` / `description` | same | Lists; empty in all observed data, string shape assumed. | +| `implanted` | `implantProcedure` | → `ImplantProcedure`, null if all-empty. | +| `explanted` | `explantProcedure` | → `ImplantProcedure`, null if all-empty. | + +`ImplantProcedure`: `date` (the misnamed `isoDate` display string), `date_iso` +(derived `"YYYY-MM-DD"` via `%B %d, %Y`, null if unparseable), `provider`, +`facility`, `device_count`. + +## Behavior notes + +- **`isoDate` is not ISO.** Kaiser sends `"January 3, 2024"`. We pass it + through as `date` and additionally derive `date_iso`. Same misnomer trap as + the visit-notes AVS date. +- **Kaiser always sends both procedure blocks**, even for a device that was + never explanted (every field an empty string). We collapse an all-empty + block to `None` so `explanted` is null unless an explant really happened. +- **Older devices have no UDI.** Pre-UDI-mandate implants come back with empty + `udi`/`sdi` but populated `manufacturer`/`model`/`serial`. Don't treat a + missing UDI as a parse failure. +- **A device can appear twice.** Live-verified 2026-05-26: the newest device + came back as both a curated record (type "Cardiac Implant", with `area`, + `udi`, and ordering `provider`) and a raw device-feed record (type + "Pacemaker", name prefixed with a feed code like `Bsci_7677...`, no `udi` or + `area`), sharing the same `serial` and implant date. Older devices had only + the raw record — the structured/UDI record is a newer Epic feature. OpenKP + returns **both rows faithfully and does not dedupe** (a thin substrate + shouldn't hide data). A caller that wants distinct physical devices can + collapse on `(serial, implanted.date_iso)`. +- No pagination. + +## Open questions / future work + +- **`comments` / `description` shape when populated.** Always empty in recon. + If they ever carry clinically relevant notes (e.g. "MRI conditional"), + confirm whether elements are strings or objects and adjust `_str_list`. +- **`status` enum.** Only "Implanted" observed. Capture an explanted device to + confirm the "Explanted" string and whether other states exist. +- **UDI parsing.** We surface the raw UDI barcode. A future helper could split + it into GTIN / expiration / serial AIDC fields if a caller needs structured + UDI data. diff --git a/openkp/src/openkp/mcp_server.py b/openkp/src/openkp/mcp_server.py index 9c0c28e..517a355 100644 --- a/openkp/src/openkp/mcp_server.py +++ b/openkp/src/openkp/mcp_server.py @@ -33,6 +33,8 @@ from openkp.config import load_config from openkp.scrapers.allergies import fetch_allergies from openkp.scrapers.appointments import fetch_appointments, fetch_past_visits +from openkp.scrapers.care_team import fetch_care_team +from openkp.scrapers.implants import fetch_implants from openkp.scrapers.visit_notes import ( download_visit_avs_pdf as _download_visit_avs_pdf, fetch_visit_notes, @@ -424,6 +426,72 @@ async def list_allergies() -> dict: return response.model_dump() +@mcp.tool() +async def list_care_team() -> dict: + """List the patient's care team and recent providers. + + This is the "Care Team and Recent Providers" panel from the MyChart home + page: the primary care provider, specialists, and recently-seen clinicians. + Use it to answer "who is my doctor / cardiologist / care team", or as the + lookup step before messaging or scheduling with a specific provider. + + Returns a dict shaped like the `CareTeamResponse` pydantic model in + `openkp.scrapers.care_team`, with a `providers` array plus `total_count`. + + Each provider carries: `id`, `name`, `specialty` (e.g. "Family Practice", + "Cardiology"), `relation` (e.g. "Primary Care Provider", "Cardiologist"), + `department_id`, `is_external` (true for non-KP providers), `photo_url`, + `provider_page_url` (public bio), `care_team_status` (raw int enum), and + capability flags `can_message`, `can_schedule`, `can_request_appointment`, + `can_view_details`. + + IMPORTANT: these flags describe the buttons KP shows on the care team panel + itself, NOT what OpenKP's other tools can do. `can_message=False` does NOT + mean the provider is unreachable — messaging runs through a separate surface + (`list_message_recipients` + `send_message`), where a provider flagged + `can_message=False` here may still be a valid message recipient. Treat these + flags as hints about KP's portal UI, not as gates on OpenKP's messaging or + scheduling tools. To message a provider, check `list_message_recipients`. + + Richer than `get_profile`, which surfaces only the PCP. See + `docs/research/endpoints/care_team.md`. + """ + store = _get_session_store() + client = KaiserRequest(store) + response = await fetch_care_team(client) + return response.model_dump() + + +@mcp.tool() +async def list_implants() -> dict: + """List the patient's implanted (and explanted) medical devices. + + This is the MyChart "Implants" / device list: pacemakers, ICDs and leads, + stents, intraocular lenses, orthopedic hardware, and similar. Useful for + MRI-safety questions, device recall lookups, sharing exact device specs + with a non-KP provider, or answering "what's implanted in me and when". + + Returns a dict shaped like the `ImplantsResponse` pydantic model in + `openkp.scrapers.implants`, with an `implants` array plus `total_count`. + + Each implant carries: `id`, `name`, `type` (e.g. "Pacemaker", + "Cardiac Implant", "Ophthalmology"), `area` (body area, e.g. "Chest", + "Eye"; null when unknown), `laterality` ("Left"/"Right"), `status` + (e.g. "Implanted"), `is_explant`, `is_external`, `manufacturer`, `model`, + `serial`, `udi` (full barcode string), `sdi` (the device-identifier portion + of the UDI), `lot`, `comments`, `description`, and two procedure blocks: + `implanted` and `explanted`. Each procedure block (when present) carries + `date` (display string), `date_iso` ("YYYY-MM-DD", null if unparseable), + `provider`, `facility`, and `device_count`. + + See `docs/research/endpoints/implants.md`. + """ + store = _get_session_store() + client = KaiserRequest(store) + response = await fetch_implants(client) + return response.model_dump() + + @mcp.tool() async def request_refill(medication_id: str, confirm: bool = False) -> dict: """Request a mail-order refill for one prescription. Two-call confirm pattern. diff --git a/openkp/src/openkp/scrapers/care_team.py b/openkp/src/openkp/scrapers/care_team.py new file mode 100644 index 0000000..6d099d1 --- /dev/null +++ b/openkp/src/openkp/scrapers/care_team.py @@ -0,0 +1,201 @@ +"""Care team scraper. + +One MCP tool surfaces from this module: + +- `list_care_team` — the patient's "Care Team and Recent Providers" roster: + PCP, specialists, and recently-seen clinicians, each with specialty, + relationship label, and per-provider capability flags (messageable, + directly schedulable). + +Source: legacy MyChart `/mychartcn/Clinical/CareTeam/Load` (internal KP +providers) and `/mychartcn/Clinical/CareTeam/LoadExternal` (non-KP +providers). Same auth + CSRF contract as `problems.py` — a single anti-forgery +token covers both POSTs (Kaiser reuses one token for the whole page). No +pagination: Kaiser returns the full roster in one call each. + +This is a strict superset of `get_profile`'s PCP field — that surface gives +only the primary care provider, this one gives the whole care relationship +roster plus what you can do with each provider. + +Docs: `docs/research/endpoints/care_team.md` +""" + +from __future__ import annotations + +import logging +import random +from typing import Any + +import httpx +from pydantic import BaseModel, Field + +from openkp.scrapers.csrf import fetch_csrf_token +from openkp.scrapers.request import KaiserRequest + +logger = logging.getLogger(__name__) + +LOAD_PATH = "/mychartcn/Clinical/CareTeam/Load" +LOAD_EXTERNAL_PATH = "/mychartcn/Clinical/CareTeam/LoadExternal" +PAGE_REFERER = "https://healthy.kaiserpermanente.org/mychartcn/clinical/careteam" + +# Epic component identifier observed in the captured request. Hardcoded by +# Kaiser's front end — value is stable. +COMPONENT_NUMBER = "2" + + +# --- models --- + + +class CareTeamProvider(BaseModel): + """One clinician on the patient's care team or recent-providers list.""" + + id: str + name: str | None = None + specialty: str | None = None # e.g. "Family Practice", "Cardiology" + relation: str | None = None # e.g. "Primary Care Provider", "Cardiologist" + department_id: str | None = None # opaque Epic handle; pairs with scheduling + is_external: bool = False # True == non-KP provider (from LoadExternal) + # Capability flags from the care-team panel's OWN inline action buttons. + # They describe what KP's portal UI offers on this panel, NOT what OpenKP's + # other tools can do. In particular `can_message` is the panel's + # quick-message button — it is NOT a gate on reachability. Messaging runs + # through a separate surface (list_message_recipients + send_message), where + # a provider with can_message=False here may still be a valid recipient. + # Same caveat applies to can_schedule / can_request_appointment. + can_message: bool = False + can_schedule: bool = False + can_request_appointment: bool = False + can_view_details: bool = False + photo_url: str | None = None + provider_page_url: str | None = None # public mydoctor.kaiserpermanente.org bio + care_team_status: int | None = None # raw int enum; 0 observed for all + + +class CareTeamResponse(BaseModel): + """The full care team roster: internal KP providers plus any external ones.""" + + providers: list[CareTeamProvider] = Field(default_factory=list) + total_count: int = 0 + + +# --- public --- + + +async def fetch_care_team(client: KaiserRequest) -> CareTeamResponse: + """Fetch the patient's care team roster. One CSRF fetch + two round trips. + + Calls the internal-provider endpoint (the primary, always-present source) + then the external-provider endpoint. The external call is best-effort: if + it fails we still return the internal roster rather than losing everything. + Per ADR-005, never raise on missing fields — return whatever parses, leave + the rest null. An empty roster is a valid outcome. + """ + csrf = await fetch_csrf_token(client, referer=PAGE_REFERER) + + internal_payload = await _load(client, LOAD_PATH, csrf, extra_params={"isPrimaryStandalone": "true"}) + providers = _parse_providers(internal_payload) + + try: + external_payload = await _load(client, LOAD_EXTERNAL_PATH, csrf) + providers.extend(_parse_providers(external_payload)) + except (httpx.HTTPError, ValueError) as exc: + logger.warning("CareTeam LoadExternal failed, returning internal roster only: %s", exc) + + return CareTeamResponse(providers=providers, total_count=len(providers)) + + +# --- private --- + + +async def _load( + client: KaiserRequest, + path: str, + csrf_token: str, + extra_params: dict[str, str] | None = None, +) -> Any: + """POST one CareTeam Load endpoint and return its parsed JSON body. + + The captured requests carry no request body — these are GET-shaped POSTs + driven entirely by query params and the anti-forgery token header. + """ + params = { + "hfrId": "", + "sources": "", + "actions": "", + "ComponentNumber": COMPONENT_NUMBER, + "noCache": f"{random.random()}", + } + if extra_params: + params.update(extra_params) + response = await client.post(path, params=params, headers=_api_headers(csrf_token)) + response.raise_for_status() + return response.json() + + +def _api_headers(csrf_token: str) -> dict[str, str]: + return { + "Accept": "application/json, text/javascript, */*; q=0.01", + "Origin": "https://healthy.kaiserpermanente.org", + "Referer": PAGE_REFERER, + "X-Requested-With": "XMLHttpRequest", + "__RequestVerificationToken": csrf_token, + } + + +def _parse_providers(payload: Any) -> list[CareTeamProvider]: + """Walk a CareTeam Load response, produce a list of providers.""" + if not isinstance(payload, dict): + return [] + + raw_list = payload.get("ProvidersList") + if not isinstance(raw_list, list): + return [] + + providers: list[CareTeamProvider] = [] + for entry in raw_list: + provider = _parse_provider(entry) + if provider is not None: + providers.append(provider) + return providers + + +def _parse_provider(entry: Any) -> CareTeamProvider | None: + """One ProvidersList entry → `CareTeamProvider`. Returns None if no ID.""" + if not isinstance(entry, dict): + return None + + provider_id = _str_or_none(entry.get("ID")) + if provider_id is None: + return None + + return CareTeamProvider( + id=provider_id, + name=_str_or_none(entry.get("Name")), + specialty=_str_or_none(entry.get("Specialty")), + relation=_str_or_none(entry.get("Relation")), + department_id=_str_or_none(entry.get("DepartmentID")), + is_external=bool(entry.get("IsExternal")), + can_message=bool(entry.get("CanMessage")), + can_schedule=bool(entry.get("CanDirectSchedule")), + can_request_appointment=bool(entry.get("CanRequestAppointment")), + can_view_details=bool(entry.get("CanViewProviderDetails")), + photo_url=_str_or_none(entry.get("Photo")), + provider_page_url=_str_or_none(entry.get("WebPageUrl")), + care_team_status=_int_or_none(entry.get("CareTeamStatus")), + ) + + +def _str_or_none(value: Any) -> str | None: + if value is None: + return None + s = str(value).strip() + return s or None + + +def _int_or_none(value: Any) -> int | None: + if isinstance(value, bool): + # bools are ints in Python; we don't want True → 1 here. + return None + if isinstance(value, int): + return value + return None diff --git a/openkp/src/openkp/scrapers/implants.py b/openkp/src/openkp/scrapers/implants.py new file mode 100644 index 0000000..6a87866 --- /dev/null +++ b/openkp/src/openkp/scrapers/implants.py @@ -0,0 +1,238 @@ +"""Implanted-devices scraper. + +One MCP tool surfaces from this module: + +- `list_implants` — the patient's implanted (and explanted) medical devices: + pacemakers, ICDs, leads, stents, intraocular lenses, orthopedic hardware, + etc. Each device carries manufacturer, model, serial, UDI, body area, + laterality, status, and the implant/explant procedure (date + provider). + +Source: legacy MyChart `/mychartcn/api/implants/GetImplants`. Same auth + CSRF +contract as `problems.py`. No pagination — Kaiser returns the full device list +in one call. + +The response splits into two parts: `implantGroupList` (a body-area ordering +index, where the literal area `"zzz"` is Epic's sentinel sorting +unknown-area devices last) and `implantList` (the authoritative per-device +detail, keyed by device id). We iterate the group list purely for ordering and +pull every field from `implantList`. + +Docs: `docs/research/endpoints/implants.md` +""" + +from __future__ import annotations + +import logging +from datetime import datetime +from typing import Any + +from pydantic import BaseModel, Field + +from openkp.scrapers.csrf import fetch_csrf_token +from openkp.scrapers.request import KaiserRequest + +logger = logging.getLogger(__name__) + +LIST_PATH = "/mychartcn/api/implants/GetImplants" +PAGE_REFERER = "https://healthy.kaiserpermanente.org/mychartcn/app/implants" + + +# --- models --- + + +class ImplantProcedure(BaseModel): + """The implant or explant event for one device.""" + + date: str | None = None # Kaiser display string, e.g. "January 3, 2024" + date_iso: str | None = None # derived "YYYY-MM-DD", null if unparseable + provider: str | None = None + facility: str | None = None + device_count: str | None = None # Kaiser sends this as a string ("1") or empty + + +class Implant(BaseModel): + """One implanted or explanted device.""" + + id: str + name: str | None = None + type: str | None = None # "Pacemaker", "Cardiac Implant", "Ophthalmology" + area: str | None = None # body area, e.g. "Chest", "Eye"; null when unknown + laterality: str | None = None # "Left", "Right" + status: str | None = None # "Implanted", "Explanted", ... + is_explant: bool = False + is_external: bool = False # record sourced from outside KP + manufacturer: str | None = None + model: str | None = None + serial: str | None = None + udi: str | None = None # full UDI barcode string when present + sdi: str | None = None # device identifier (GTIN portion of the UDI) + lot: str | None = None + comments: list[str] = Field(default_factory=list) # empty in all observed data + description: list[str] = Field(default_factory=list) # empty in all observed data + implanted: ImplantProcedure | None = None + explanted: ImplantProcedure | None = None + + +class ImplantsResponse(BaseModel): + """The full implanted-devices list.""" + + implants: list[Implant] = Field(default_factory=list) + total_count: int = 0 + + +# --- public --- + + +async def fetch_implants(client: KaiserRequest) -> ImplantsResponse: + """Fetch the patient's implanted devices. One CSRF fetch + one round trip. + + Returns an `ImplantsResponse`. An empty `implants` list is a valid outcome + (patient has no implanted devices on record). Per ADR-005, never raise on + missing fields — return whatever parses, leave the rest null. + """ + csrf = await fetch_csrf_token(client, referer=PAGE_REFERER) + response = await client.post(LIST_PATH, headers=_api_headers(csrf), json={}) + response.raise_for_status() + return _parse_implants_response(response.json()) + + +# --- private --- + + +def _api_headers(csrf_token: str) -> dict[str, str]: + return { + "Accept": "application/json", + "Content-Type": "application/json", + "Origin": "https://healthy.kaiserpermanente.org", + "Referer": PAGE_REFERER, + "X-Requested-With": "XMLHttpRequest", + "__RequestVerificationToken": csrf_token, + } + + +def _parse_implants_response(payload: Any) -> ImplantsResponse: + """Walk the GetImplants response, produce an `ImplantsResponse`.""" + if not isinstance(payload, dict): + return ImplantsResponse() + + implant_map = payload.get("implantList") + if not isinstance(implant_map, dict): + return ImplantsResponse() + + implants: list[Implant] = [] + for dev_id in _ordered_ids(payload.get("implantGroupList"), implant_map): + implant = _parse_implant(dev_id, implant_map.get(dev_id)) + if implant is not None: + implants.append(implant) + + return ImplantsResponse(implants=implants, total_count=len(implants)) + + +def _ordered_ids(group_list: Any, implant_map: dict[str, Any]) -> list[str]: + """Device ids in the portal's body-area order. + + Uses `implantGroupList` for ordering, then appends any device present in + `implantList` but not referenced by a group (defensive — not observed). + """ + ordered: list[str] = [] + seen: set[str] = set() + + if isinstance(group_list, list): + for group in group_list: + if not isinstance(group, dict): + continue + ids = group.get("implantIDs") + if not isinstance(ids, list): + continue + for dev_id in ids: + if isinstance(dev_id, str) and dev_id in implant_map and dev_id not in seen: + ordered.append(dev_id) + seen.add(dev_id) + + for dev_id in implant_map: + if dev_id not in seen: + ordered.append(dev_id) + seen.add(dev_id) + + return ordered + + +def _parse_implant(dev_id: Any, entry: Any) -> Implant | None: + """One implantList value → `Implant`. Returns None if no usable id.""" + if not isinstance(entry, dict): + return None + + # Prefer the entry's own id; fall back to the map key. + implant_id = _str_or_none(entry.get("id")) or _str_or_none(dev_id) + if implant_id is None: + return None + + return Implant( + id=implant_id, + name=_str_or_none(entry.get("name")), + type=_str_or_none(entry.get("type")), + area=_str_or_none(entry.get("area")), + laterality=_str_or_none(entry.get("laterality")), + status=_str_or_none(entry.get("status")), + is_explant=bool(entry.get("isExplant")), + is_external=bool(entry.get("isExternal")), + manufacturer=_str_or_none(entry.get("manufacturer")), + model=_str_or_none(entry.get("model")), + serial=_str_or_none(entry.get("serial")), + udi=_str_or_none(entry.get("udi")), + sdi=_str_or_none(entry.get("sdi")), + lot=_str_or_none(entry.get("lot")), + comments=_str_list(entry.get("comments")), + description=_str_list(entry.get("description")), + implanted=_parse_procedure(entry.get("implantProcedure")), + explanted=_parse_procedure(entry.get("explantProcedure")), + ) + + +def _parse_procedure(raw: Any) -> ImplantProcedure | None: + """Parse an implant/explant procedure block. + + Kaiser always sends both blocks even when nothing happened (every field an + empty string). We collapse an all-empty block to None. + """ + if not isinstance(raw, dict): + return None + + date = _str_or_none(raw.get("isoDate")) # misnomer: it's a display string + provider = _str_or_none(raw.get("provider")) + facility = _str_or_none(raw.get("facility")) + device_count = _str_or_none(raw.get("deviceCount")) + + if date is None and provider is None and facility is None and device_count is None: + return None + + return ImplantProcedure( + date=date, + date_iso=_display_date_to_iso(date), + provider=provider, + facility=facility, + device_count=device_count, + ) + + +def _display_date_to_iso(value: str | None) -> str | None: + """"January 3, 2024" → "2024-01-03". None on anything unparseable.""" + if not value: + return None + try: + return datetime.strptime(value.strip(), "%B %d, %Y").date().isoformat() + except (ValueError, TypeError): + return None + + +def _str_list(value: Any) -> list[str]: + if not isinstance(value, list): + return [] + return [s.strip() for s in value if isinstance(s, str) and s.strip()] + + +def _str_or_none(value: Any) -> str | None: + if value is None: + return None + s = str(value).strip() + return s or None diff --git a/openkp/tests/test_care_team.py b/openkp/tests/test_care_team.py new file mode 100644 index 0000000..bb07fdb --- /dev/null +++ b/openkp/tests/test_care_team.py @@ -0,0 +1,370 @@ +"""Tests for scrapers/care_team.py: parser + HTTP integration. + +Fixtures use fabricated provider names and opaque placeholder IDs. No PHI. +""" + +from __future__ import annotations + +from unittest.mock import AsyncMock, MagicMock, patch + +import httpx +import pytest + +from openkp.scrapers.care_team import ( + COMPONENT_NUMBER, + LOAD_EXTERNAL_PATH, + LOAD_PATH, + PAGE_REFERER, + CareTeamResponse, + _int_or_none, + _parse_provider, + _parse_providers, + _str_or_none, + fetch_care_team, +) +from openkp.scrapers.csrf import CSRF_PATH + + +# --- fake data (non-PHI) --- + + +_FAKE_CSRF = "fake-csrf-token-abc123" + + +def _csrf_html(token: str = _FAKE_CSRF) -> str: + return f'' + + +def _provider(**overrides) -> dict: + """One ProvidersList entry mirroring real KP shape, fabricated values.""" + base = { + "ID": "prov-1", + "Name": "PAT EXAMPLE MD", + "Photo": "https://example.invalid/photo.jpg", + "NationalProviderID": "npid-1", + "WebPageUrl": "https://mydoctor.kaiserpermanente.org/example/doctor/patexample", + "InfoBlurbUrl": "", + "AboutMeBlurb": [], + "CanViewProviderDetails": True, + "CanDirectSchedule": False, + "CanRequestAppointment": False, + "CanMessage": False, + "CommCenterMessageUrl": "", + "CanRequestCustomAppt": False, + "HasNoProviderRecord": False, + "IsNewSchedulingEnabled": True, + "Specialty": "Family Practice", + "Relation": "Primary Care Provider", + "SchedulableVisitTypes": None, + "DepartmentID": "dept-1", + "Organizations": None, + "IsExternal": False, + "CareTeamStatus": 0, + "CanHideProvider": True, + } + base.update(overrides) + return base + + +def _internal_payload() -> dict: + return { + "ProvidersList": [ + _provider(), + _provider( + ID="prov-2", + Name="SAM SPECIALIST MD", + Specialty="Cardiology", + Relation="Cardiologist", + DepartmentID="dept-2", + CanMessage=True, + ), + ], + "DescriptiveTitle": "Care Team and Recent Providers", + "TabColorClass": "color1", + "IsCustomApptReqEnabled": False, + "CustomRequestAppointmentLink": "showform&formname=ApptReqCntr", + } + + +def _empty_external_payload() -> dict: + return { + "ProvidersList": [], + "DescriptiveTitle": "Care Team and Recent Providers", + "TabColorClass": "color1", + "IsCustomApptReqEnabled": False, + "CustomRequestAppointmentLink": "showform&formname=ApptReqCntr", + } + + +# --- _str_or_none / _int_or_none --- + + +def test_str_or_none_strips_and_handles_empty(): + assert _str_or_none(" hi ") == "hi" + assert _str_or_none("") is None + assert _str_or_none(None) is None + assert _str_or_none(" ") is None + + +def test_str_or_none_coerces_non_string(): + assert _str_or_none(42) == "42" + assert _str_or_none(0) == "0" + + +def test_int_or_none_accepts_int_rejects_bool(): + assert _int_or_none(0) == 0 + assert _int_or_none(3) == 3 + assert _int_or_none(True) is None + assert _int_or_none(False) is None + + +def test_int_or_none_rejects_other_types(): + assert _int_or_none("0") is None + assert _int_or_none(1.5) is None + assert _int_or_none(None) is None + + +# --- _parse_provider --- + + +def test_parse_provider_full_field_extraction(): + p = _parse_provider(_provider(Name=" PAT EXAMPLE MD ")) + assert p is not None + assert p.id == "prov-1" + assert p.name == "PAT EXAMPLE MD" + assert p.specialty == "Family Practice" + assert p.relation == "Primary Care Provider" + assert p.department_id == "dept-1" + assert p.is_external is False + assert p.can_message is False + assert p.can_schedule is False + assert p.can_request_appointment is False + assert p.can_view_details is True + assert p.photo_url == "https://example.invalid/photo.jpg" + assert p.provider_page_url.endswith("patexample") + assert p.care_team_status == 0 + + +def test_parse_provider_capability_flags_truthy(): + p = _parse_provider(_provider(CanMessage=True, CanDirectSchedule=True, CanRequestAppointment=True)) + assert p is not None + assert p.can_message is True + assert p.can_schedule is True + assert p.can_request_appointment is True + + +def test_parse_provider_external_flag(): + p = _parse_provider(_provider(IsExternal=True)) + assert p is not None + assert p.is_external is True + + +def test_parse_provider_missing_id_returns_none(): + assert _parse_provider({"Name": "no id"}) is None + assert _parse_provider(_provider(ID="")) is None + + +def test_parse_provider_non_dict_returns_none(): + assert _parse_provider(None) is None + assert _parse_provider("garbage") is None + assert _parse_provider(42) is None + + +def test_parse_provider_missing_optional_fields_yield_defaults(): + p = _parse_provider({"ID": "x"}) + assert p is not None + assert p.id == "x" + assert p.name is None + assert p.specialty is None + assert p.relation is None + assert p.is_external is False + assert p.can_message is False + assert p.care_team_status is None + + +def test_parse_provider_kaiser_int_id_is_coerced(): + p = _parse_provider({"ID": 12345, "Name": "X"}) + assert p is not None + assert p.id == "12345" + + +# --- _parse_providers --- + + +def test_parse_providers_happy_path(): + providers = _parse_providers(_internal_payload()) + assert len(providers) == 2 + assert providers[0].id == "prov-1" + assert providers[0].relation == "Primary Care Provider" + assert providers[1].id == "prov-2" + assert providers[1].specialty == "Cardiology" + assert providers[1].can_message is True + + +def test_parse_providers_empty_list(): + assert _parse_providers(_empty_external_payload()) == [] + + +def test_parse_providers_skips_unparseable_entries(): + payload = { + "ProvidersList": [ + _provider(ID="good"), + {"Name": "no id, dropped"}, + "garbage", + None, + ] + } + providers = _parse_providers(payload) + assert len(providers) == 1 + assert providers[0].id == "good" + + +def test_parse_providers_malformed_payload_returns_empty(): + assert _parse_providers({}) == [] + assert _parse_providers({"ProvidersList": "not a list"}) == [] + assert _parse_providers(None) == [] + assert _parse_providers("garbage") == [] + + +# --- HTTP integration --- + + +def _make_store() -> MagicMock: + from openkp.scrapers.auth import KaiserSession + + store = MagicMock() + store.get_session = AsyncMock( + return_value=KaiserSession( + cookies=[{"name": "k", "value": "v", "domain": ".kp.org", "path": "/"}], + user_agent="ua", + ) + ) + store.invalidate = AsyncMock() + return store + + +def _bind_request(responses: list[httpx.Response]) -> list[httpx.Response]: + req = httpx.Request("GET", "https://healthy.kaiserpermanente.org" + LOAD_PATH) + for r in responses: + r.request = req + return responses + + +def _patch_http(responses: list[httpx.Response]): + mock_client = AsyncMock() + mock_client.request = AsyncMock(side_effect=_bind_request(responses)) + patched = patch("openkp.scrapers.request.httpx.AsyncClient") + client_cls = patched.start() + client_cls.return_value.__aenter__.return_value = mock_client + client_cls.return_value.__aexit__.return_value = None + return mock_client, patched + + +@pytest.mark.asyncio +async def test_fetch_care_team_happy_path(): + from openkp.scrapers.request import KaiserRequest + + store = _make_store() + mock_client, p = _patch_http([ + httpx.Response(200, text=_csrf_html()), + httpx.Response(200, json=_internal_payload()), + httpx.Response(200, json=_empty_external_payload()), + ]) + try: + response = await fetch_care_team(KaiserRequest(store)) + finally: + p.stop() + + assert isinstance(response, CareTeamResponse) + assert response.total_count == 2 + assert response.providers[0].name == "PAT EXAMPLE MD" + assert response.providers[1].relation == "Cardiologist" + + # Three HTTP calls: CSRF GET, internal POST, external POST + assert mock_client.request.await_count == 3 + + csrf_call = mock_client.request.await_args_list[0] + assert csrf_call.args[0] == "GET" + assert CSRF_PATH in csrf_call.args[1] + + internal_call = mock_client.request.await_args_list[1] + assert internal_call.args[0] == "POST" + assert LOAD_PATH in internal_call.args[1] + headers = internal_call.kwargs["headers"] + assert headers["__RequestVerificationToken"] == _FAKE_CSRF + assert headers["Referer"] == PAGE_REFERER + params = internal_call.kwargs["params"] + assert params["ComponentNumber"] == COMPONENT_NUMBER + assert params["isPrimaryStandalone"] == "true" + assert "noCache" in params + + external_call = mock_client.request.await_args_list[2] + assert external_call.args[0] == "POST" + assert LOAD_EXTERNAL_PATH in external_call.args[1] + # External call carries the same CSRF token, no isPrimaryStandalone param + assert external_call.kwargs["headers"]["__RequestVerificationToken"] == _FAKE_CSRF + assert "isPrimaryStandalone" not in external_call.kwargs["params"] + + +@pytest.mark.asyncio +async def test_fetch_care_team_merges_external_providers(): + from openkp.scrapers.request import KaiserRequest + + store = _make_store() + external = { + "ProvidersList": [ + _provider(ID="ext-1", Name="OUTSIDE DOC MD", IsExternal=True, Specialty="Dermatology"), + ], + } + _, p = _patch_http([ + httpx.Response(200, text=_csrf_html()), + httpx.Response(200, json=_internal_payload()), + httpx.Response(200, json=external), + ]) + try: + response = await fetch_care_team(KaiserRequest(store)) + finally: + p.stop() + + assert response.total_count == 3 + assert response.providers[-1].id == "ext-1" + assert response.providers[-1].is_external is True + + +@pytest.mark.asyncio +async def test_fetch_care_team_external_failure_returns_internal(): + """A non-200 from LoadExternal must not lose the internal roster.""" + from openkp.scrapers.request import KaiserRequest + + store = _make_store() + _, p = _patch_http([ + httpx.Response(200, text=_csrf_html()), + httpx.Response(200, json=_internal_payload()), + httpx.Response(500, text="boom"), + ]) + try: + response = await fetch_care_team(KaiserRequest(store)) + finally: + p.stop() + + assert response.total_count == 2 + assert response.providers[0].id == "prov-1" + + +@pytest.mark.asyncio +async def test_fetch_care_team_empty_roster(): + from openkp.scrapers.request import KaiserRequest + + store = _make_store() + _, p = _patch_http([ + httpx.Response(200, text=_csrf_html()), + httpx.Response(200, json=_empty_external_payload()), + httpx.Response(200, json=_empty_external_payload()), + ]) + try: + response = await fetch_care_team(KaiserRequest(store)) + finally: + p.stop() + + assert response.total_count == 0 + assert response.providers == [] diff --git a/openkp/tests/test_implants.py b/openkp/tests/test_implants.py new file mode 100644 index 0000000..e5f2fc0 --- /dev/null +++ b/openkp/tests/test_implants.py @@ -0,0 +1,384 @@ +"""Tests for scrapers/implants.py: parser + HTTP integration. + +Fixtures use fabricated device names, models, and serials. No PHI. +""" + +from __future__ import annotations + +from unittest.mock import AsyncMock, MagicMock, patch + +import httpx +import pytest + +from openkp.scrapers.csrf import CSRF_PATH +from openkp.scrapers.implants import ( + LIST_PATH, + PAGE_REFERER, + ImplantsResponse, + _display_date_to_iso, + _ordered_ids, + _parse_implant, + _parse_implants_response, + _parse_procedure, + _str_list, + _str_or_none, + fetch_implants, +) + + +# --- fake data (non-PHI) --- + + +_FAKE_CSRF = "fake-csrf-token-abc123" + + +def _csrf_html(token: str = _FAKE_CSRF) -> str: + return f'' + + +def _empty_proc() -> dict: + return {"isoDate": "", "deviceCount": "", "provider": "", "facility": ""} + + +def _device(**overrides) -> dict: + base = { + "organizationLinks": [], + "isExternal": False, + "id": "dev-eye-r", + "name": "Fake Intraocular Lens X", + "type": "Ophthalmology", + "area": "Eye", + "laterality": "Right", + "udi": "(01)00000000000000(17)000000(21)SN-EYE-001", + "sdi": "00000000000000", + "manufacturer": "ACME OPTICS", + "serial": "SN-EYE-001", + "status": "Implanted", + "model": "FAKE-IOL-1", + "description": [], + "comments": [], + "lot": "", + "isExplant": False, + "implantProcedure": { + "isoDate": "March 5, 2020", + "deviceCount": "1", + "provider": "DR FAKE EYE", + "facility": "Fake Surgery Center", + }, + "explantProcedure": _empty_proc(), + } + base.update(overrides) + return base + + +def _sample_payload() -> dict: + chest = _device( + id="dev-chest", + name="Fake Pacemaker Model Z", + type="Pacemaker", + area="Chest", + laterality="Left", + manufacturer="ACME CARDIAC", + serial="SN-CHEST-9", + model="FAKE-PACE", + udi="", + sdi="", + implantProcedure={"isoDate": "January 3, 2024", "deviceCount": "", "provider": "", "facility": ""}, + ) + unknown = _device( + id="dev-unknown", + name="Fake Lead", + type="Cardiac Implant", + area="", # ungrouped → lands in the "zzz" sentinel group + laterality="", + manufacturer="ACME", + serial="SN-LEAD-3", + model="FAKE-LEAD", + udi="", + sdi="", + implantProcedure={"isoDate": "November 19, 2007", "deviceCount": "", "provider": "", "facility": ""}, + ) + eye = _device() + return { + "communityActive": False, + "implantGroupList": [ + {"area": "Chest", "implantIDs": ["dev-chest"]}, + {"area": "Eye", "implantIDs": ["dev-eye-r"]}, + {"area": "zzz", "implantIDs": ["dev-unknown"]}, + ], + "implantList": { + "dev-eye-r": eye, + "dev-chest": chest, + "dev-unknown": unknown, + }, + } + + +# --- helpers --- + + +def test_str_or_none_strips_and_handles_empty(): + assert _str_or_none(" hi ") == "hi" + assert _str_or_none("") is None + assert _str_or_none(None) is None + assert _str_or_none(" ") is None + + +def test_str_list_filters_non_strings_and_blanks(): + assert _str_list(["a", " b ", "", " ", 5, None]) == ["a", "b"] + assert _str_list("not a list") == [] + assert _str_list(None) == [] + assert _str_list([]) == [] + + +def test_display_date_to_iso(): + assert _display_date_to_iso("January 3, 2024") == "2024-01-03" + assert _display_date_to_iso("November 19, 2007") == "2007-11-19" + assert _display_date_to_iso(" March 5, 2020 ") == "2020-03-05" + # Unparseable / empty → None, never raises + assert _display_date_to_iso("") is None + assert _display_date_to_iso(None) is None + assert _display_date_to_iso("2024-01-03") is None + assert _display_date_to_iso("garbage") is None + + +# --- _parse_procedure --- + + +def test_parse_procedure_full(): + proc = _parse_procedure({ + "isoDate": "March 5, 2020", + "deviceCount": "1", + "provider": "DR FAKE EYE", + "facility": "Fake Surgery Center", + }) + assert proc is not None + assert proc.date == "March 5, 2020" + assert proc.date_iso == "2020-03-05" + assert proc.provider == "DR FAKE EYE" + assert proc.facility == "Fake Surgery Center" + assert proc.device_count == "1" + + +def test_parse_procedure_all_empty_returns_none(): + assert _parse_procedure(_empty_proc()) is None + assert _parse_procedure({}) is None + assert _parse_procedure(None) is None + assert _parse_procedure("garbage") is None + + +def test_parse_procedure_partial_date_only(): + proc = _parse_procedure({"isoDate": "January 3, 2024", "deviceCount": "", "provider": "", "facility": ""}) + assert proc is not None + assert proc.date == "January 3, 2024" + assert proc.date_iso == "2024-01-03" + assert proc.provider is None + assert proc.facility is None + assert proc.device_count is None + + +# --- _parse_implant --- + + +def test_parse_implant_full_field_extraction(): + imp = _parse_implant("dev-eye-r", _device()) + assert imp is not None + assert imp.id == "dev-eye-r" + assert imp.name == "Fake Intraocular Lens X" + assert imp.type == "Ophthalmology" + assert imp.area == "Eye" + assert imp.laterality == "Right" + assert imp.status == "Implanted" + assert imp.is_explant is False + assert imp.is_external is False + assert imp.manufacturer == "ACME OPTICS" + assert imp.model == "FAKE-IOL-1" + assert imp.serial == "SN-EYE-001" + assert imp.udi.startswith("(01)") + assert imp.sdi == "00000000000000" + assert imp.comments == [] + assert imp.implanted is not None + assert imp.implanted.date_iso == "2020-03-05" + assert imp.explanted is None # all-empty explant block collapses to None + + +def test_parse_implant_empty_area_becomes_none(): + imp = _parse_implant("dev-unknown", _device(id="dev-unknown", area="", laterality="")) + assert imp is not None + assert imp.area is None + assert imp.laterality is None + + +def test_parse_implant_falls_back_to_map_key_when_no_inner_id(): + entry = _device() + entry.pop("id") + imp = _parse_implant("map-key-1", entry) + assert imp is not None + assert imp.id == "map-key-1" + + +def test_parse_implant_no_id_anywhere_returns_none(): + entry = _device() + entry.pop("id") + assert _parse_implant(None, entry) is None + + +def test_parse_implant_non_dict_returns_none(): + assert _parse_implant("dev-1", None) is None + assert _parse_implant("dev-1", "garbage") is None + + +def test_parse_implant_explanted_device(): + entry = _device( + id="dev-old", + status="Explanted", + isExplant=True, + explantProcedure={"isoDate": "June 1, 2022", "deviceCount": "1", "provider": "DR FAKE", "facility": ""}, + ) + imp = _parse_implant("dev-old", entry) + assert imp is not None + assert imp.is_explant is True + assert imp.status == "Explanted" + assert imp.explanted is not None + assert imp.explanted.date_iso == "2022-06-01" + + +# --- _ordered_ids --- + + +def test_ordered_ids_follows_group_order(): + payload = _sample_payload() + ordered = _ordered_ids(payload["implantGroupList"], payload["implantList"]) + assert ordered == ["dev-chest", "dev-eye-r", "dev-unknown"] + + +def test_ordered_ids_appends_ungrouped_devices(): + implant_map = {"a": {}, "b": {}, "orphan": {}} + group_list = [{"area": "X", "implantIDs": ["b", "a"]}] + ordered = _ordered_ids(group_list, implant_map) + assert ordered[:2] == ["b", "a"] + assert "orphan" in ordered + assert len(ordered) == 3 + + +def test_ordered_ids_handles_malformed_groups(): + implant_map = {"a": {}} + # group list is junk; we still return the map's devices + assert _ordered_ids("not a list", implant_map) == ["a"] + assert _ordered_ids([None, {"implantIDs": "nope"}, {}], implant_map) == ["a"] + + +def test_ordered_ids_skips_ids_not_in_map(): + implant_map = {"a": {}} + group_list = [{"area": "X", "implantIDs": ["a", "ghost"]}] + assert _ordered_ids(group_list, implant_map) == ["a"] + + +# --- _parse_implants_response --- + + +def test_parse_implants_response_happy_path_and_ordering(): + response = _parse_implants_response(_sample_payload()) + assert response.total_count == 3 + ids = [i.id for i in response.implants] + assert ids == ["dev-chest", "dev-eye-r", "dev-unknown"] + assert response.implants[0].type == "Pacemaker" + assert response.implants[1].laterality == "Right" + assert response.implants[2].area is None + + +def test_parse_implants_response_empty(): + assert _parse_implants_response({"implantList": {}, "implantGroupList": []}).total_count == 0 + + +def test_parse_implants_response_malformed(): + assert _parse_implants_response({}).total_count == 0 + assert _parse_implants_response({"implantList": "nope"}).total_count == 0 + assert _parse_implants_response(None).total_count == 0 + assert _parse_implants_response("garbage").total_count == 0 + + +# --- HTTP integration --- + + +def _make_store() -> MagicMock: + from openkp.scrapers.auth import KaiserSession + + store = MagicMock() + store.get_session = AsyncMock( + return_value=KaiserSession( + cookies=[{"name": "k", "value": "v", "domain": ".kp.org", "path": "/"}], + user_agent="ua", + ) + ) + store.invalidate = AsyncMock() + return store + + +def _bind_request(responses: list[httpx.Response]) -> list[httpx.Response]: + req = httpx.Request("GET", "https://healthy.kaiserpermanente.org" + LIST_PATH) + for r in responses: + r.request = req + return responses + + +def _patch_http(responses: list[httpx.Response]): + mock_client = AsyncMock() + mock_client.request = AsyncMock(side_effect=_bind_request(responses)) + patched = patch("openkp.scrapers.request.httpx.AsyncClient") + client_cls = patched.start() + client_cls.return_value.__aenter__.return_value = mock_client + client_cls.return_value.__aexit__.return_value = None + return mock_client, patched + + +@pytest.mark.asyncio +async def test_fetch_implants_happy_path(): + from openkp.scrapers.request import KaiserRequest + + store = _make_store() + mock_client, p = _patch_http([ + httpx.Response(200, text=_csrf_html()), + httpx.Response(200, json=_sample_payload()), + ]) + try: + response = await fetch_implants(KaiserRequest(store)) + finally: + p.stop() + + assert isinstance(response, ImplantsResponse) + assert response.total_count == 3 + assert response.implants[0].name == "Fake Pacemaker Model Z" + + # Two HTTP calls: CSRF GET, then list POST + assert mock_client.request.await_count == 2 + + csrf_call = mock_client.request.await_args_list[0] + assert csrf_call.args[0] == "GET" + assert CSRF_PATH in csrf_call.args[1] + + list_call = mock_client.request.await_args_list[1] + assert list_call.args[0] == "POST" + assert LIST_PATH in list_call.args[1] + headers = list_call.kwargs["headers"] + assert headers["__RequestVerificationToken"] == _FAKE_CSRF + assert headers["Referer"] == PAGE_REFERER + assert list_call.kwargs["json"] == {} + + +@pytest.mark.asyncio +async def test_fetch_implants_empty_list(): + from openkp.scrapers.request import KaiserRequest + + store = _make_store() + _, p = _patch_http([ + httpx.Response(200, text=_csrf_html()), + httpx.Response(200, json={"implantList": {}, "implantGroupList": []}), + ]) + try: + response = await fetch_implants(KaiserRequest(store)) + finally: + p.stop() + + assert response.total_count == 0 + assert response.implants == []