diff --git a/.github/workflows/azure-iac.yml b/.github/workflows/azure-iac.yml new file mode 100644 index 0000000..2834c01 --- /dev/null +++ b/.github/workflows/azure-iac.yml @@ -0,0 +1,133 @@ +name: azure-iac + +# Lints and validates the Azure insurance-agent-platform IaC (azure/) on every PR. +# Jobs: +# fmt - terraform fmt -check across azure/ +# tflint - tflint static analysis per Terraform module +# validate - terraform init -backend=false + terraform validate per module +# policy - structural lint of the Azure Policy definitions / initiative JSON + +on: + pull_request: + paths: + - "azure/**" + - ".github/workflows/azure-iac.yml" + push: + branches: [main] + paths: + - "azure/**" + - ".github/workflows/azure-iac.yml" + workflow_dispatch: + +permissions: + contents: read + +env: + TERRAFORM_VERSION: "1.9.8" + +jobs: + fmt: + name: terraform fmt + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: hashicorp/setup-terraform@v3 + with: + terraform_version: ${{ env.TERRAFORM_VERSION }} + - name: terraform fmt -check -recursive + run: terraform fmt -check -recursive -diff azure + + tflint: + name: tflint (${{ matrix.module }}) + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + module: + - azure/landing-zones/application-platform + - azure/policy-as-code/initiative + - azure/workloads/insurance-app + steps: + - uses: actions/checkout@v4 + - uses: terraform-linters/setup-tflint@v4 + with: + tflint_version: latest + - name: tflint --init + run: tflint --init + working-directory: ${{ matrix.module }} + - name: tflint + run: tflint --format compact --recursive=false + working-directory: ${{ matrix.module }} + + validate: + name: terraform validate (${{ matrix.module }}) + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + module: + - azure/landing-zones/application-platform + - azure/policy-as-code/initiative + - azure/workloads/insurance-app + steps: + - uses: actions/checkout@v4 + - uses: hashicorp/setup-terraform@v3 + with: + terraform_version: ${{ env.TERRAFORM_VERSION }} + - name: terraform init (no backend) + run: terraform init -backend=false -input=false + working-directory: ${{ matrix.module }} + - name: terraform validate + run: terraform validate -no-color + working-directory: ${{ matrix.module }} + + policy: + name: azure policy json lint + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: validate JSON is well-formed + run: | + set -euo pipefail + fail=0 + while IFS= read -r -d '' f; do + if jq empty "$f" >/dev/null 2>&1; then + echo "ok $f" + else + echo "BAD $f (invalid JSON)"; fail=1 + fi + done < <(find azure/policy-as-code -name '*.json' -print0) + exit $fail + - name: validate policy definition structure + run: | + set -euo pipefail + fail=0 + for f in azure/policy-as-code/definitions/*.json; do + # each definition must have properties.displayName, .policyRule and either .mode or be ARM-mode + missing=$(jq -r ' + [ if (.properties|has("displayName")|not) then "properties.displayName" else empty end, + if (.properties|has("policyRule")|not) then "properties.policyRule" else empty end, + if (.properties|has("mode")|not) then "properties.mode" else empty end + ] | join(", ")' "$f") + if [ -n "$missing" ]; then + echo "BAD $f missing: $missing"; fail=1 + else + echo "ok $f" + fi + done + # the initiative must reference at least one policy definition + init=azure/policy-as-code/initiative/ai-agent-governance-initiative.json + n=$(jq '.properties.policyDefinitions | length' "$init") + if [ "$n" -lt 1 ]; then echo "BAD $init has no policyDefinitions"; fail=1; else echo "ok $init ($n definitions)"; fi + exit $fail + + openapi: + name: connector openapi lint + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: "20" + - name: spectral lint OpenAPI specs + run: npx --yes @stoplight/spectral-cli@6 lint "azure/workloads/insurance-app/connectors/*.openapi.yaml" diff --git a/.spectral.yaml b/.spectral.yaml new file mode 100644 index 0000000..04aa167 --- /dev/null +++ b/.spectral.yaml @@ -0,0 +1,10 @@ +extends: ["spectral:oas"] + +# Connector specs are intentionally small templates; relax the doc-completeness +# rules so the lint focuses on structural correctness. +rules: + oas3-api-servers: warn + operation-tag-defined: off + info-contact: off + info-license: off + oas3-unused-component: warn diff --git a/.tflint.hcl b/.tflint.hcl new file mode 100644 index 0000000..5fece8b --- /dev/null +++ b/.tflint.hcl @@ -0,0 +1,15 @@ +config { + call_module_type = "local" + force = false +} + +plugin "terraform" { + enabled = true + preset = "recommended" +} + +plugin "azurerm" { + enabled = true + version = "0.27.0" + source = "github.com/terraform-linters/tflint-ruleset-azurerm" +} diff --git a/README.md b/README.md index feb4f63..9ab150a 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,19 @@ -# aptumcloud +# adaptcloud Initial MVP repository and GKE setup this repo contains secure hardening and will contain chatGPT webhooks as well as GKE hooks for the community to use + +## Contents + +- `gke/` — hardened GKE / Terraform reference deployment (GCP). +- `azure/` — **insurance agent platform**: a low-code-first insurance application + (Power Platform + Copilot Studio) plus API connectors, deployed inside an Azure + Landing Zone with **policy as code** and governed AI **agent identities**, built + to the Microsoft Cloud Adoption Framework guidance + *"Govern and secure AI agents across your organization"*. Creates an + **Application Platform landing zone** if the tenant doesn't have one. See + [`azure/README.md`](azure/README.md) and + [`azure/docs/caf-ai-agent-governance-mapping.md`](azure/docs/caf-ai-agent-governance-mapping.md). +- `main.go`, `chatgpt.go` — sample webhook services. + +None of this code is expected to go from dev straight to prod — review, parameterize, +and run it through your own pipeline first. diff --git a/azure/.gitignore b/azure/.gitignore new file mode 100644 index 0000000..28c3e7b --- /dev/null +++ b/azure/.gitignore @@ -0,0 +1,24 @@ +# Terraform +**/.terraform/* +*.tfstate +*.tfstate.* +crash.log +crash.*.log + +# Real variable files may contain subscription IDs / tenant IDs / emails — keep +# only the *.example versions in source control. +*.tfvars +*.tfvars.json +!*.tfvars.example +# Committed cost/posture profiles (non-secret tuning; subscription/tenant IDs +# in them are placeholders — override with -var or a private *.tfvars). +!profiles/**/*.tfvars + +override.tf +override.tf.json +*_override.tf +*_override.tf.json + +.terraformrc +terraform.rc +.terraform.lock.hcl diff --git a/azure/COSTS.md b/azure/COSTS.md new file mode 100644 index 0000000..c66e5c1 --- /dev/null +++ b/azure/COSTS.md @@ -0,0 +1,104 @@ +# Cost & teardown + +Rough monthly costs for the insurance agent platform. **List prices, USD, +pay-as-you-go, East US 2, no EA/CSP discount, early-2026 rates — always confirm +with the [Azure Pricing Calculator](https://azure.com/e/) before you commit.** +Power Platform / Copilot Studio is **licensing & capacity**, not Azure metering — +treated separately below. + +Two ready-made variable sets are in [`profiles/`](profiles/): `dev-demo` and +`prod`. The module defaults sit between them. + +| | `profiles/dev-demo` | module defaults | `profiles/prod` | +|---|---|---|---| +| Idle Azure cost / month | **~$80–150** | ~$600–750 | **~$3,500–6,000+** | +| Private endpoints | off (public) | on | on | +| VNet injection / APIM mode | off / external | off / external (APIM Developer) | on / **Internal** | +| API Management SKU | `Consumption_0` (serverless) | `Developer_1` (~$50) | `Premium_1` (~$2,800/unit) | +| AI Search | `basic`, 1×1 (~$75) | `standard`, 1×2 (~$490) | `standard`, 1×3 (~$735) | +| Power Platform environments | 1 sandbox | 3 | 3 (dev/test/prod) | +| Agents (identities) | 1 | 4 | 4 | +| Log Analytics retention | 30 d | 90 d | 90 d | +| Defender for Cloud AI plan | off | on | on | +| Key Vault purge protection | off | on | on | +| Governance policies | run in **Audit** | enforced | enforced | + +> The demo profile uses **public endpoints** — only acceptable for non-sensitive +> demo data with the `ai-agent-governance` initiative assigned in **Audit** mode. + +## Where the money goes + +| Component | Pricing model | ~Idle / month | Notes & knobs | +|---|---|---|---| +| **Azure AI Search** | per *search unit* (replicas × partitions) × tier hourly rate; billed while the service exists, idle or not | `free` $0 · `basic` ~$75 · `standard` (S1) ~$245/unit | Biggest fixed line at the defaults (2 units ≈ $490). Knobs: `ai_search_sku`, `ai_search_replica_count`, `ai_search_partition_count`. `free` = no SLA, 3 indexes — fine for a demo. | +| **API Management** | per gateway unit, hourly (or per-call for Consumption) | `Consumption_0` ~$0 + ~$3.50 / 1M calls · `Developer_1` ~$50 (no SLA) · `StandardV2_1` ~$700 (VNet) · `Premium_1` ~$2,800/unit (Internal VNet + zones) | Knob: `apim_sku_name`. Consumption can't do Internal VNet — pair with `enable_vnet_injection=false`. Premium is the jump that takes you to four figures. | +| **Private endpoints** | ~$0.013/hr each (~$9.50/mo) + ~$0.01/GB processed | ~$45–65 (≈6–7 PEs) | Knob: `enable_private_endpoints` (off ⇒ no PEs, public access on). | +| **Log Analytics + Microsoft Sentinel** | per GB ingested (~$2.30–2.76/GB after 5 GB free) + Sentinel ~$2.46/GB + retention >31 d (~$0.12/GB/mo) | ~$10–80 | Scales with how chatty diagnostic settings are. Knob: `log_analytics_retention_days`; reuse the platform workspace via `central_log_analytics_workspace_id`. | +| **Defender for Cloud** (ARM + Key Vault + AI workloads plans) | ARM ~$4–5/sub · Key Vault ~$0.02/10K txns · AI plan metered | ~$10–30 | Knob: `enable_defender_ai_threat_protection`; the ARM + Key Vault plans are always set by the workload module. | +| **Azure OpenAI** (account is free) | per 1K/1M tokens | **$0 idle → variable** | `capacity` in `approved_model_deployments` is TPM *quota*, not a reservation — unused costs nothing. gpt-4o ≈ $2.50 / $10 per 1M in/out · gpt-4o-mini ≈ $0.15 / $0.60 · text-embedding-3-large ≈ $0.13 per 1M. (Provisioned throughput "PTU" would be a fixed hourly charge — not used here; deployments are `Standard` = PAYG.) | +| **Azure AI Content Safety** | per 1K records | $0 idle → ~$0.75 / 1K text records | Metered only. | +| **AI Foundry hub + project** | free (it's an Azure ML workspace) — you pay for attached storage/KV/ACR and any compute | ~$0 (no compute provisioned) | Add compute clusters/online endpoints and they bill separately. | +| **Application Insights** (per agent + APIM) | workspace-based ⇒ ingestion billed via Log Analytics above | components free | Included in the LAW line. | +| **Key Vault** (Standard) | ~$0.03 per 10K operations | ~$1–3 | Knob: `key_vault_purge_protection` (affects teardown, not cost — see below). | +| **Storage** (ZRS, Standard) | per GB + transactions | ~$1–5 | AI Foundry's backing store; grows with artifacts. | +| **Networking** (VNet, peering, route table, NSGs) | VNet/RT free; peering ~$0.01/GB each way | ~$1–10 | | +| **Management groups, policy assignments, managed identities, Entra apps/groups, budgets** | free | $0 | | +| **Power Platform / Copilot Studio** | licensing & capacity | **$0 if entitled, else $$$** | Each *production* Dataverse env needs ~1 GB DB drawn from tenant capacity (overage ~$40/GB/mo). Managed Environments require premium Power Platform per-user (~$5–20/user/mo) or per-app plans. **Copilot Studio** runs on a Copilot Studio license **or message packs (~$200/mo per 25K messages)**. Sandbox envs are cheaper / no min DB. | + +**Bottom line at the module defaults:** ~$600–750/month on the Azure side +(AI Search + APIM + private endpoints dominate), **plus** model token usage, +**plus** Power Platform / Copilot Studio licensing. + +### The five knobs that move the needle most +1. `apim_sku_name` — `Developer_1` (~$50) vs `Premium_1` (~$2,800/unit). +2. `ai_search_sku` + `ai_search_replica_count` — `basic` 1×1 (~$75) vs `standard` 1×3 (~$735). +3. `enable_private_endpoints` — off saves ~$50/mo (but you lose network isolation). +4. `log_analytics_retention_days` + reusing the platform workspace. +5. Number of `power_platform_environments` and `agents` (mostly licensing/quota, not Azure $). + +## Can you tear it down at will? + +**Yes** — `terraform destroy` per module, reverse order: +`workloads/insurance-app` → `policy-as-code/initiative` → `landing-zones/application-platform`. +(Deny-policies gate *create/update*, not *delete*, so order is for tidiness, not +correctness.) Things to know: + +- **What actually stops the meter:** AI Search and API Management bill *while they + exist*, idle or not — destroy those first if you just want billing to stop fast. + OpenAI / Content Safety stop the moment you stop calling them. +- **Key Vault won't fully disappear if `key_vault_purge_protection = true`.** It + stays soft-deleted for 90 days and **cannot be force-purged** — the name is + unusable for 90 days. The `dev-demo` profile sets it `false` (7-day soft-delete, + purgeable) so you can rebuild. To purge a soft-deleted vault: + `az keyvault purge --name --location ` (only works when purge + protection was off). +- **Cognitive Services** (Azure OpenAI, Content Safety) and **API Management** + also soft-delete (~48 h). Purge to free the name immediately: + `az cognitiveservices account purge -g -n -l ` / + `az apim deletedservice purge --service-name --location ` — or + just use new names on redeploy. APIM Developer deletes in ~5–15 min; Premium can + take 30–45 min. +- **Power Platform environments** are deleted with their Dataverse data; a PP + admin can restore an environment for ~7 days, then it's gone. The DLP policies + are removed too — note the **tenant baseline DLP** is tenant-wide. +- **The Application Platform management group** is deleted only if the workload + module *created* it (`create_application_platform_mg = true`) and it's empty. + If it pre-existed (referenced via data source), nothing happens. The + subscription→MG association is just removed; the subscription itself is **not** + deleted by Terraform (a *vended* subscription, if you used that path, is + *cancelled* — its own ~90-day window — not destroyed). +- **`DeployIfNotExists` leftovers:** removing the policy assignment doesn't remove + the diagnostic settings it deployed onto resources — moot if you're deleting + those resources anyway. +- **Portal-created agents aren't in Terraform state.** If you stood up a Copilot + Studio agent by hand, delete it in Copilot Studio — and the Entra Agent ID with + it; `terraform destroy` won't. +- **Defender for Cloud plans** revert to Free tier on destroy — no lingering cost. +- **Soft-deleted resources block redeploy** of the same names — purge them (above) + or change names. + +### Cheapest "create, demo, delete" loop +Use `profiles/dev-demo` (public endpoints, `Consumption_0` APIM, `basic`/`free` +search, `key_vault_purge_protection=false`, throwaway MG name). `terraform apply` +→ demo → `terraform destroy`. With purge protection off and the Consumption APIM +SKU, there's nothing that lingers or blocks an immediate rebuild. diff --git a/azure/QUICKSTART-portal.md b/azure/QUICKSTART-portal.md new file mode 100644 index 0000000..37a7818 --- /dev/null +++ b/azure/QUICKSTART-portal.md @@ -0,0 +1,236 @@ +# Quickstart — stand it up from the portal + +This is the click-by-click path to get a **governed insurance agent** running, +in the same order as the Terraform under `azure/`. Each step maps to an IaC file +so you can swap manual setup for code later (`terraform import`, or rebuild in a +non-prod subscription and retire the click-built one). + +Nothing here is meant to go straight to production — review, parameterize, and +run it through your own pipeline first. + +> **Day-1 minimum to demo a governed agent:** steps 1, 2 (a subset of policies), +> 3, and 4. Layer in 5–7 afterwards. + +--- + +## 1. Land the Application Platform landing zone — Azure portal + +> Maps to `azure/landing-zones/application-platform/` (`management-group.tf`, +> `subscription-vending.tf`, `main.tf`). + +1. **Management groups** (`portal.azure.com` → *Management groups*) → if you + don't already have an **Application Platform** management group, **+ Create** + one named `alz-application-platform` under your platform "Landing Zones" MG + (or under the tenant root if you have no MG hierarchy yet). *This is the "if + there's no Application Platform LZ, ensure we do that" step.* +2. Move (or create) the **`insurance-app` subscription** and place it under + `alz-application-platform` (*Subscriptions* → pick subscription → *Move* → + target = the new MG; or *+ Add* a new subscription). +3. In that subscription create three resource groups in a region on your + allow-list (e.g. **East US 2**): `rg-insurance-app-workload`, + `rg-insurance-app-ai`, `rg-insurance-app-identity`. +4. *(Optional now — do later with `networking.tf`)* a spoke VNet with subnets + `snet-privateendpoints`, `snet-powerplatform` (delegate to + `Microsoft.PowerPlatform/enterprisePolicies`), `snet-apim`, peered to your + connectivity hub. You can start without VNet injection / private endpoints and + add them before prod. + +--- + +## 2. Policy as code — Azure portal → Policy + +> Maps to `azure/policy-as-code/` (`definitions/*.json`, +> `initiative/ai-agent-governance-initiative.json`, `initiative/policy_assignments.tf`). + +1. **Policy → Definitions → + Policy definition** → set *Definition location* = + `alz-application-platform` → paste the contents of each file in + `azure/policy-as-code/definitions/*.json` (the whole object — the portal reads + the `properties` block) → *Save*. Repeat for all seven. +2. **Policy → Definitions → + Initiative definition** → location = + `alz-application-platform` → *Add policy definition(s)* → add the seven you + just created → on the *Initiative parameters* tab expose `allowedLocations`, + `allowedModelNames`, `requiredTagNames`, `logAnalyticsWorkspaceId` → set the + `effect` group values (Deny / Audit / DeployIfNotExists per the file + comments) → *Save*. +3. **Policy → Assignments → Assign initiative** → *Scope* = + `alz-application-platform` → pick the initiative → on *Remediation*, choose + **Create a system-assigned managed identity** (the portal grants it the role + needed for the `DeployIfNotExists` diagnostics policy) → set the parameter + values (your regions, approved model names, the Log Analytics workspace ID + from step 7) → *Review + create*. +4. **Faster, repeatable alternative:** open **Cloud Shell** from the portal and + run the Terraform in `azure/policy-as-code/initiative/` (it publishes the + definitions, builds the initiative, and assigns it with the platform + remediation identity), or `az policy definition create … && az policy + set-definition create … && az policy assignment create …`. + +> Day-1 subset if you want to move fast: assign at least +> *allowed-ai-locations*, *deny-ai-public-network-access*, and +> *require-agent-resource-tags*. Add the rest once the AI plane exists. + +--- + +## 3. Low-code platform — Power Platform admin center (`admin.powerplatform.microsoft.com`) + +> Maps to `azure/workloads/insurance-app/power-platform.tf` and `dlp-policy.tf`; +> connector lists are `variables.tf` → `business_connectors` / +> `non_business_connectors` / `blocked_connectors`. + +1. **Environments → + New** → create `insurance-dev` (*Type* = Sandbox, **Add a + Dataverse data store** = Yes). Repeat for `insurance-test` (Sandbox) and + `insurance-prod` (Production). +2. For each environment: select it → **Edit Managed Environment** → toggle it + **On** → *Sharing* limit makers to **20** users per app → *Solution checker* + = **Block** (block publish on errors) → *Usage insights* = **On (weekly)** → + add the maker onboarding note. *(IP firewall: bind to the spoke / APIM egress + ranges once the network exists.)* +3. **Policies → Data policies → + New policy** → name + *"Insurance agent platform – connector governance"*: + - **Business**: the certified/Microsoft-published connectors the apps may use + — Dataverse, SharePoint, Outlook (Office 365), Office 365 Users, Teams, + OneDrive for Business, SQL Server, Azure Blob, Azure Queues, Service Bus, + Event Hubs, **Azure OpenAI**, Text Analytics, Computer Vision, **Document + Intelligence (Form Recognizer)**, AI Builder, Dynamics 365 Business Central, + Salesforce, **DocuSign**, **Adobe Acrobat Sign**, Approvals, Excel Online + (Business), Word Online (Business), Power BI, Azure Automation. + - **Non-Business**: low-risk general connectors (RSS, MSN Weather, Bing Maps). + - **Blocked**: X/Twitter, Facebook, Instagram, YouTube, Dropbox, Box, Google + Drive, Gmail, consumer OneDrive, SMTP "send mail", HTTP-with-Azure-AD/HTTP, + and **non-Azure OpenAI**. + - **Custom connectors** → set a connector pattern that **allows only** + `https://apim-insurance-app.azure-api.net/*` and **blocks `*`** → so makers + can't hand-roll an ungoverned connector. + - **Scope** = these environments → `insurance-dev/test/prod`. +4. **Settings → Tenant settings** → restrict who can create production / trial / + developer environments; turn off broad maker capabilities you don't need. + *(Optional: install the **Power Platform CoE Starter Kit** for the live agent + & app inventory referenced in the governance doc.)* + +--- + +## 4. Create the agent — and its identity — Copilot Studio (`copilotstudio.microsoft.com`) + +> Maps to `azure/workloads/insurance-app/identity-agents.tf` (the per-agent +> identities, the `ai-agents` group, and the Conditional Access note). + +1. Switch to the **`insurance-dev`** environment → **+ Create / New agent** → + e.g. `claims-triage-agent`. Because the environment is a **Managed + Environment**, when you publish the agent it is automatically issued a + **Microsoft Entra Agent ID** — that's the secure, traceable, governed identity + (no client secret). +2. Configure the agent: + - **Settings → Security → Authentication** = *Authenticate with Microsoft* + (Microsoft Entra ID). + - **Knowledge** = only the approved sources (SharePoint sites / Dataverse + tables you've sanctioned). + - **Topics / Actions** for sensitive operations (bind quote, authorise + payout) → require **confirmation** (human-in-the-loop). + - **Actions / Tools** → connect only via the Business connectors and your + APIM custom connector from step 6. +3. **Entra admin center** (`entra.microsoft.com`): + - **Identity → Groups → + New group** → security group **`ai-agents`** → add + the agent's identity (and any user-assigned managed identities from the AI + plane) as members. This is the inventory + Conditional Access target. + - **Identity → Applications → Agent ID** (preview) — confirm the agent shows + up here; set an **owner**. + - **Protection → Conditional Access → + New policy** → *Assignments* target + **Workload identities** = the `ai-agents` group → *Conditions*: block + legacy authentication; *Locations*: allow only your named locations (the + spoke / APIM egress IPs) → *Grant*: Block otherwise. *(Conditional Access + for workload identities requires the Microsoft Entra Workload ID add-on.)* + - **Identity Governance → Access reviews → + New** → quarterly review over the + `ai-agents` group; disable then delete any agent with no owner or past its + `expiresOn` tag. + +--- + +## 5. AI plane (for grounded / code-first agents) — Azure AI Foundry portal (`ai.azure.com`) + +> Maps to `azure/workloads/insurance-app/ai-foundry.tf`, +> `key-vault.tf`, `networking.tf`. + +1. **+ New hub** in `rg-insurance-app-ai` → attach a Storage account and a Key + Vault → set **Public network access = Disabled** → then **+ New project** in + that hub (`proj-insurance-agents`). +2. **Deployments → + Deploy model** → deploy *only* the approved models: + `gpt-4o`, `gpt-4o-mini`, `text-embedding-3-large` — the + *allowed-aoai-model-deployments* policy will block anything else. +3. Add **Azure AI Search** (`srch-insurance-app`, public access disabled, local + auth disabled) for grounding, and an **Azure AI Content Safety** resource; + enable **Prompt Shields** and create a default blocklist. +4. **Private endpoints** (portal → each resource → *Networking → Private + endpoint connections → + Private endpoint*) into `snet-privateendpoints` for + the Azure OpenAI account, AI Search, Content Safety, the AI Foundry hub, the + storage account, and Key Vault. Disable public access on each. +5. **Microsoft Defender for Cloud** → *Environment settings* → the + `insurance-app` subscription → enable the **AI workloads** plan (prompt-injection + / anomalous-usage detection), plus **Key Vault** and **ARM** plans. +6. Give each agent's **user-assigned managed identity** (from `identity-agents.tf`, + or create them under `rg-insurance-app-identity` in *Managed Identities*) the + scoped roles only: *Cognitive Services OpenAI User* on the AOAI account, + *Search Index Data Reader* on AI Search, *Key Vault Secrets User* on the + vault, *Monitoring Metrics Publisher* on its App Insights. + +--- + +## 6. API connectors — Azure portal → API Management + +> Maps to `azure/workloads/insurance-app/connectors-apim.tf` and the OpenAPI +> specs in `azure/workloads/insurance-app/connectors/`. + +1. **Create a resource → API Management** in `rg-insurance-app-workload` + (Developer tier to start; switch to **Internal VNet** mode against + `snet-apim` when the spoke is ready). Enable its **system-assigned identity** + and give it **Key Vault Secrets User** on the workload vault. +2. **APIs → + Add API → OpenAPI** → upload + `connectors/insurance-policy-api.openapi.yaml`, then `connectors/claims-api.openapi.yaml`. +3. On each API → **Design → All operations → Inbound processing → (code + view)** → add a `validate-azure-ad-token` policy (and a `rate-limit`) so every + call is an authenticated Entra ID token — agents call APIM with their managed + identity / Entra Agent ID, never an API key. +4. On each API → **… → Export → Power Platform** (Power Automate / Power Apps) → + choose the `insurance-dev` environment → this publishes the API as a governed + **custom connector**. The DLP rule from step 3 means it's allowed (host = + `apim-insurance-app.azure-api.net`) and nothing else is. +5. **APIM → Monitoring → Diagnostic settings** → stream `GatewayLogs` to the + central Log Analytics workspace (the *require-diagnostic-settings* policy will + also remediate this). + +--- + +## 7. Observability — Azure portal + Purview + +> Maps to `azure/landing-zones/application-platform/monitoring.tf` and +> `azure/workloads/insurance-app/observability.tf`. + +1. **Create a Log Analytics workspace** `law-application-platform` (or reuse the + platform one) → **Microsoft Sentinel → + Add** onto that workspace. +2. For each agent: **Create → Application Insights** → *Workspace-based*, pointed + at `law-application-platform`; tag it `agentName=`. Wire the + connection string into the Copilot Studio agent's analytics / your code-first + agent. +3. **Diagnostic settings** on the Azure OpenAI account, AI Search, Content + Safety, Key Vault, and APIM → send all logs/metrics to + `law-application-platform` (policy enforces; this just avoids the remediation lag). +4. **Microsoft Purview / Compliance portal** → turn on **Audit** → enable **DSPM + for AI** → register the AI Foundry project and the agents' knowledge sources + as data sources → apply sensitivity labels so prompts/responses, data access, + and agent actions are captured and label-aware. + +--- + +## Where to go from the portal to IaC + +| Portal step | IaC to adopt next | +|---|---| +| 1 | `azure/landing-zones/application-platform/` (`terraform import` the MG + subscription association, or rebuild in a non-prod sub) | +| 2 | `azure/policy-as-code/` (definitions + initiative + assignment) | +| 3 | `azure/workloads/insurance-app/power-platform.tf`, `dlp-policy.tf` | +| 4 | `azure/workloads/insurance-app/identity-agents.tf` (+ keep the Conditional Access policy in your identity-governance pipeline) | +| 5 | `azure/workloads/insurance-app/ai-foundry.tf`, `key-vault.tf`, `networking.tf` | +| 6 | `azure/workloads/insurance-app/connectors-apim.tf` (+ the `connectors/*.openapi.yaml` specs) | +| 7 | `azure/landing-zones/application-platform/monitoring.tf`, `azure/workloads/insurance-app/observability.tf` | + +See `azure/docs/caf-ai-agent-governance-mapping.md` for the control-by-control +mapping to the Microsoft CAF "Govern and secure AI agents across your +organization" guidance. diff --git a/azure/README.md b/azure/README.md new file mode 100644 index 0000000..19ae478 --- /dev/null +++ b/azure/README.md @@ -0,0 +1,72 @@ +# Adapt Cloud — Insurance Agent Platform on Azure + +Infrastructure-as-code for an **insurance application** built **low-code first** +(Power Platform + Copilot Studio) and progressively layered with **API +connectors**, deployed inside an **Azure Landing Zone (ALZ)** with **policy as +code** governance. + +This directory is the implementation of the Microsoft Cloud Adoption Framework +guidance **"Govern and secure AI agents across your organization"** +() +and the AI landing zone / Application landing zone guidance. + +## Why this layout + +The CAF says: agents are a **new identity class**, they must run inside an +**Application (workload) landing zone** that sits under an **Application Platform +management group**, and their guardrails must be **policy as code**. If your +tenant does not already have an Application Platform landing zone, you must +create one before onboarding agent workloads — so we do. + +``` +azure/ +├── landing-zones/ +│ └── application-platform/ # Creates the Application Platform MG + vends the +│ # insurance-app workload subscription, spoke network, +│ # central Log Analytics, platform managed identities. +├── policy-as-code/ # Azure Policy definitions + AI-agent governance +│ # initiative (policy set) + assignments. Deployed by +│ # the platform team at the Application Platform MG scope. +├── workloads/ +│ └── insurance-app/ # The insurance workload landing zone: +│ # - Power Platform managed environment + DLP (connector +│ # classification) — low-code first +│ # - APIM-fronted custom/API connectors (templates) +│ # - Copilot Studio / AI Foundry agents, each with its +│ # own Entra Agent ID / user-assigned managed identity +│ # - Key Vault, Application Insights, private endpoints +└── docs/ + └── caf-ai-agent-governance-mapping.md # Control-by-control traceability matrix +``` + +> **Just want to get started clicking?** See [`QUICKSTART-portal.md`](QUICKSTART-portal.md) +> for the portal-only path (Azure portal + Power Platform admin center + Copilot +> Studio + AI Foundry), step-by-step, mapped back to each Terraform file. +> +> **Cost & teardown?** See [`COSTS.md`](COSTS.md). Ready-made variable sets — a +> ~$80–150/mo demo and a production posture — are in [`profiles/`](profiles/). + +## Build / deploy order + +1. **Platform team** — `landing-zones/application-platform/` (idempotent: creates + the Application Platform MG and supporting subscription only if absent). +2. **Platform team** — `policy-as-code/` (publishes definitions + the + `ai-agent-governance` initiative and assigns it at the Application Platform MG). +3. **Workload team** — `workloads/insurance-app/` (low-code environment first; + connectors and AI Foundry agents layered on top, all behind the policy guardrails). + +Each directory has its own `README.md`, `providers.tf`, `variables.tf`, and +`terraform.tfvars.example`. Nothing here is meant to go straight to production — +review, parameterize, and run through your own pipeline first. + +## Design principles (from the CAF article) + +| Principle | How it shows up here | +|---|---| +| Agents are first-class identities | Every agent gets a dedicated **Entra Agent ID** (Copilot Studio / AI Foundry) or **user-assigned managed identity**; no shared identities, no human impersonation, no secrets in code. See `workloads/insurance-app/identity-agents.tf`. | +| Least privilege | Scoped RBAC role assignments + scoped connector permissions + Conditional Access targeting agent identities. | +| Run inside a landing zone | The workload is a vended **Application landing zone** subscription peered to the connectivity hub; PaaS is private-endpoint only. | +| Policy as code | `policy-as-code/` — allowed regions, approved models, deny public network access, require diagnostic settings, require ownership/classification tags, audit managed identity / disable local auth. | +| Low-code first, widest connector reach | Power Platform **managed environment** + **DLP policy** classifying the full connector catalog into Business / Non-Business / Blocked; custom **API connectors** generated from OpenAPI specs in `workloads/insurance-app/connectors/`. | +| Traceable / observable | Central Log Analytics, Application Insights per agent, diagnostic settings enforced by policy, Purview/DSPM-for-AI hooks, Defender for Cloud AI threat protection. | +| Adaptive & flexible | Connector classification, model allow-lists, environment settings, and policy parameters are all variables — change governance posture without re-architecting. | diff --git a/azure/docs/caf-ai-agent-governance-mapping.md b/azure/docs/caf-ai-agent-governance-mapping.md new file mode 100644 index 0000000..a5f09b8 --- /dev/null +++ b/azure/docs/caf-ai-agent-governance-mapping.md @@ -0,0 +1,90 @@ +# CAF "Govern and secure AI agents across your organization" — control mapping + +Source guidance: +- +- (AI landing zone / Application landing zone) +- (Power Platform DLP) +- (Copilot Studio governance) +- (Microsoft Entra Agent ID — non-human/agent identities) + +The CAF article frames AI-agent governance around five disciplines. This table +maps each recommendation to the artifact in this repo that implements it. + +## 1. Identify and inventory agents + +| CAF recommendation | Implementation | +|---|---| +| Treat every agent as a discrete, named identity (a new identity class). | `workloads/insurance-app/identity-agents.tf` — one `azuread_application` + service principal **or** one `azurerm_user_assigned_identity` per agent (`policy-intake-agent`, `claims-triage-agent`, `underwriting-copilot`). Copilot Studio / AI Foundry agents receive an **Entra Agent ID** automatically when created in a Managed Environment; the Terraform records and tags them. | +| Maintain a catalog/registry of agents with owners. | Tags `agentName`, `agentOwner`, `agentPurpose`, `dataClassification` enforced by the `require-agent-resource-tags` policy; the Power Platform **CoE Starter Kit** (referenced in `workloads/insurance-app/README.md`) provides the live inventory. | +| Decommission unused agents. | Lifecycle notes + `expiresOn` tag in `workloads/insurance-app/variables.tf`; access reviews recommended in `docs` (see "Operating model"). | + +## 2. Govern agent identity and access + +| CAF recommendation | Implementation | +|---|---| +| Give each agent its own identity; never share or impersonate users. | Per-agent identities in `identity-agents.tf`; no agent uses a user account or a shared SP. | +| Prefer managed identities / workload identity federation over secrets. | `azurerm_user_assigned_identity` + `azuread_application_federated_identity_credential` for agents that run on Azure compute or in GitHub Actions; **no client secrets are created**. Key material that is unavoidable lives in Key Vault (`key-vault.tf`). | +| Apply least privilege with scoped RBAC and scoped API permissions. | Narrow `azurerm_role_assignment`s (e.g., `Cognitive Services OpenAI User`, `Key Vault Secrets User`, `Search Index Data Reader`) scoped to the specific resource, not the subscription. | +| Apply Conditional Access to agent identities. | `identity-agents.tf` tags identities into the `ai-agents` security group; a Conditional Access policy (named `CA-AI-Agents-Restrict`, managed in Entra, documented here) blocks legacy auth, requires the agents to originate from the workload's named locations / IP ranges, and blocks interactive sign-in. | +| Use PIM / just-in-time for any elevated agent access. | Documented in the operating model; elevated roles are eligible-only. | + +## 3. Run agents inside a landing zone (Application Platform landing zone) + +| CAF recommendation | Implementation | +|---|---| +| Place agent workloads in an **Application landing zone** under the **Application Platform** management group. **If one does not exist, create it.** | `landing-zones/application-platform/management-group.tf` creates the `alz-application-platform` management group (under the platform / "Landing Zones" MG) **only if it is absent**, then `subscription-vending.tf` vends/associates the `insurance-app` subscription beneath it. | +| Connect the workload spoke to the connectivity hub; use platform DNS, firewall, monitoring. | `landing-zones/application-platform/networking.tf` — spoke VNet, subnets (`snet-privateendpoints`, `snet-powerplatform-vnet-injection`, `snet-apim`), peering to the hub VNet, links to the platform Private DNS zones; `monitoring.tf` wires the spoke to the central Log Analytics workspace. | +| All PaaS over private endpoints; disable public network access. | `workloads/insurance-app/networking.tf` creates private endpoints for AI Foundry, Azure OpenAI, AI Search, Key Vault, Storage, APIM; `publicNetworkAccess = "Disabled"` everywhere; egress is forced through the hub Azure Firewall via UDR. Enforced by the `deny-ai-public-network-access` policy. | + +## 4. Govern data and content (responsible AI) + +| CAF recommendation | Implementation | +|---|---| +| Use Microsoft Purview / DSPM for AI for cataloging, sensitivity labels, DLP, and prompt/response auditing. | `workloads/insurance-app/observability.tf` references the tenant Purview account and enables the AI audit connector; sensitivity-label enforcement is a tenant control documented here. | +| Ground agents only on approved data sources; honor label-based access. | AI Search data sources are explicit inputs in `variables.tf`; the Copilot Studio agents are configured (in the Managed Environment) to use only those knowledge sources. | +| Apply content safety (jailbreak/prompt shields, groundedness, blocklists). | `ai-foundry.tf` provisions an **Azure AI Content Safety** resource and a default blocklist; **Microsoft Defender for Cloud — AI threat protection** plan is enabled in `landing-zones/application-platform/monitoring.tf`. | +| Human-in-the-loop for sensitive actions. | Copilot Studio topics for bind/quote/payout actions are flagged for confirmation; documented in `workloads/insurance-app/README.md`. | + +## 5. Govern with policy as code + +| CAF recommendation | Implementation (`policy-as-code/`) | +|---|---| +| Restrict the regions AI resources can deploy to. | `definitions/allowed-ai-locations.json` | +| Allow only approved/managed models. | `definitions/allowed-aoai-model-deployments.json` (parameter: list of `model.name`/`model.version` pairs, e.g. `gpt-4o`, `text-embedding-3-large`). | +| Deny public network access on AI resources / require private endpoints. | `definitions/deny-ai-public-network-access.json` | +| Disable local/key auth on Cognitive Services / AI resources (force Entra ID). | `definitions/deny-cognitive-services-local-auth.json` | +| Audit/require managed identity on agent-hosting resources. | `definitions/audit-managed-identity-on-agents.json` | +| Require diagnostic settings → central Log Analytics for AI + Power Platform resources. | `definitions/require-diagnostic-settings-ai.json` | +| Require ownership/classification tags on agent resources. | `definitions/require-agent-resource-tags.json` | +| Bundle into one initiative and assign at the Application Platform MG. | `initiative/ai-agent-governance-initiative.json` + `initiative/policy_assignments.tf` (assigns to `alz-application-platform` with a managed identity for `deployIfNotExists`/`modify` effects, and creates an exemption hook for break-glass). | + +## 6. Low-code platform governance (Power Platform / Copilot Studio) + +| CAF recommendation | Implementation (`workloads/insurance-app/`) | +|---|---| +| Use an environment strategy with separate dev/test/prod environments. | `power-platform.tf` creates `insurance-dev`, `insurance-test`, `insurance-prod` environments (Dataverse-enabled). | +| Use **Managed Environments** for governance (sharing limits, solution checker, usage insights, IP firewall, CMK). | `power-platform.tf` flips each environment to **Managed**, sets maker sharing limits, enforces Solution Checker on publish, enables weekly usage insights, and binds the environment IP firewall to the spoke ranges. | +| Apply **DLP policies** that classify the connector catalog (Business / Non-Business / Blocked) and restrict custom connectors. | `dlp-policy.tf` — a tenant-level "baseline" DLP policy plus an environment-scoped policy: insurance line-of-business + Microsoft 365 + Azure connectors → **Business**; general utility connectors → **Non-Business**; social media, consumer storage, unsanctioned AI, and "send to anyone" connectors → **Blocked**; custom connectors restricted to an approved URL pattern (the APIM gateway host). Connector lists are variables — adaptive. | +| Front custom / API connectors with Azure API Management. | `connectors-apim.tf` provisions an internal-VNet APIM instance; `connectors/insurance-policy-api.openapi.yaml` and `connectors/claims-api.openapi.yaml` are imported as APIM APIs and surfaced as Power Platform **custom connectors** with Entra ID auth. | +| Provide the widest array of template connectivity. | The DLP "Business" group is seeded from the full set of Microsoft-published/certified connectors relevant to insurance (Dataverse, SharePoint, Outlook, Teams, SQL, Azure Blob, Service Bus, Azure OpenAI, AI Builder, DocuSign, Adobe Sign, Salesforce, Dynamics 365, etc.) — see `variables.tf` `business_connectors`. | +| Copilot Studio agents: Entra ID auth, scoped knowledge, ALM via solutions/pipelines, get an Entra Agent ID. | Documented + the Managed Environment + DLP make this enforceable; agent ALM uses Power Platform pipelines (referenced in `README.md`). | + +## 7. Observability and traceability + +| CAF recommendation | Implementation | +|---|---| +| Centralized logging and a SIEM. | Central Log Analytics in `landing-zones/application-platform/monitoring.tf`; Microsoft Sentinel onboarded onto that workspace; diagnostic settings enforced by policy. | +| Per-agent telemetry. | `workloads/insurance-app/observability.tf` — one Application Insights component per agent, connected to the central workspace. | +| Audit who/what invoked the agent, which tools/connectors ran, what data was touched, prompts/responses. | Purview AI audit + Power Platform activity logging + APIM request logging to Log Analytics + AI Foundry trace export — every action correlates back to the agent's Entra Agent ID via the `agentName` dimension. | +| AI threat protection. | Defender for Cloud AI plan (prompt-injection / anomalous-usage detection) enabled at the subscription. | + +## Operating model (shared responsibility) + +| Team | Owns | +|---|---| +| **Platform** | `landing-zones/application-platform/`, `policy-as-code/`, hub networking, central Log Analytics/Sentinel, Defender plans, the tenant DLP baseline. | +| **Workload (insurance)** | `workloads/insurance-app/` — the Power Platform environments, the agents, the APIM connectors, the workload Key Vault and App Insights. | +| **Security / Identity** | Entra Agent ID lifecycle, Conditional Access for agent identities, access reviews, Purview labels/DSPM-for-AI, Sentinel content. | + +Access reviews run quarterly over the `ai-agents` group; any agent without an +owner or past its `expiresOn` tag is disabled then deleted. diff --git a/azure/landing-zones/application-platform/README.md b/azure/landing-zones/application-platform/README.md new file mode 100644 index 0000000..a5be81b --- /dev/null +++ b/azure/landing-zones/application-platform/README.md @@ -0,0 +1,28 @@ +# Application Platform landing zone + +Per the CAF AI-agent governance guidance, agent workloads must live in an +**Application landing zone** under an **Application Platform management group**. +This module **creates that management group if it does not already exist** and +then vends the `insurance-app` workload landing zone underneath it, wires its +spoke network to the connectivity hub, and connects it to central monitoring. + +Run this as the **platform team**, before any workload or policy deployment. + +## What it provisions + +| File | Resources | +|---|---| +| `management-group.tf` | `alz-application-platform` management group (child of the platform "Landing Zones" MG). Uses a data source first; only creates it when absent. | +| `subscription-vending.tf` | Associates (or vends, via `azurerm_subscription`) the `insurance-app` subscription and moves it under `alz-application-platform`. Applies platform tags and a budget. | +| `networking.tf` | Spoke VNet (`vnet-insurance-app`), subnets — `snet-privateendpoints`, `snet-powerplatform` (delegated to `Microsoft.PowerPlatform/enterprisePolicies` for VNet injection), `snet-apim`; VNet peering to the hub; links to platform Private DNS zones for the PaaS used by the workload. Route table forcing egress to the hub firewall. | +| `identity.tf` | Platform user-assigned managed identity used by the policy initiative's `deployIfNotExists`/`modify` remediations; the `ai-agents` Entra security group that agent identities are placed into (Conditional Access target). | +| `monitoring.tf` | Central Log Analytics workspace (or reference to the platform one), Microsoft Sentinel onboarding, Defender for Cloud plans including **AI threat protection**, an Activity Log diagnostic setting. | + +## Inputs + +See `variables.tf` and `terraform.tfvars.example`. Key ones: + +- `platform_landing_zones_mg_id` — the existing platform "Landing Zones" MG to nest under. +- `hub_vnet_id`, `hub_firewall_private_ip`, `platform_private_dns_zone_ids` — connectivity-hub references. +- `insurance_subscription_id` — the subscription to place under the new MG (leave blank to vend a new one if you have an EA/MCA billing scope). +- `central_log_analytics_workspace_id` — set to reuse the platform workspace instead of creating one. diff --git a/azure/landing-zones/application-platform/identity.tf b/azure/landing-zones/application-platform/identity.tf new file mode 100644 index 0000000..36e05c2 --- /dev/null +++ b/azure/landing-zones/application-platform/identity.tf @@ -0,0 +1,57 @@ +# --------------------------------------------------------------------------- +# Platform-side identities for the Application Platform landing zone. +# --------------------------------------------------------------------------- + +# Managed identity used by the AI-agent governance policy initiative for +# deployIfNotExists / modify remediation tasks (see ../policy-as-code). +resource "azurerm_resource_group" "platform_identity" { + name = "rg-application-platform-identity" + location = var.location + tags = var.platform_tags +} + +resource "azurerm_user_assigned_identity" "policy_remediation" { + name = "id-ai-agent-policy-remediation" + location = azurerm_resource_group.platform_identity.location + resource_group_name = azurerm_resource_group.platform_identity.name + tags = var.platform_tags +} + +# Grant the remediation identity the rights it needs at the Application Platform MG. +resource "azurerm_role_assignment" "policy_remediation_contributor" { + scope = local.application_platform_mg_id + role_definition_name = "Contributor" + principal_id = azurerm_user_assigned_identity.policy_remediation.principal_id +} + +resource "azurerm_role_assignment" "policy_remediation_monitoring" { + scope = local.application_platform_mg_id + role_definition_name = "Monitoring Contributor" + principal_id = azurerm_user_assigned_identity.policy_remediation.principal_id +} + +# Entra security group that every agent identity is placed into. This is the +# Conditional Access target ("CA-AI-Agents-Restrict") that blocks legacy auth, +# blocks interactive sign-in, and pins agents to named locations. +resource "azuread_group" "ai_agents" { + display_name = var.ai_agents_group_name + description = "All AI agent workload identities (Entra Agent ID / managed identities). Conditional Access target. Reviewed quarterly." + security_enabled = true + + lifecycle { + # Members are managed by the workload module as agents come and go. + ignore_changes = [members] + } +} + +output "policy_remediation_identity_id" { + value = azurerm_user_assigned_identity.policy_remediation.id +} + +output "policy_remediation_identity_principal_id" { + value = azurerm_user_assigned_identity.policy_remediation.principal_id +} + +output "ai_agents_group_object_id" { + value = azuread_group.ai_agents.object_id +} diff --git a/azure/landing-zones/application-platform/management-group.tf b/azure/landing-zones/application-platform/management-group.tf new file mode 100644 index 0000000..d480665 --- /dev/null +++ b/azure/landing-zones/application-platform/management-group.tf @@ -0,0 +1,53 @@ +# --------------------------------------------------------------------------- +# Application Platform management group — create-or-reference. +# +# CAF: agent workloads must sit in an Application landing zone under an +# "Application Platform" management group. If your tenant does not have one yet, +# leave var.create_application_platform_mg = true (the default) and this creates +# it. If it already exists, set the var to false and supply its name; we just +# reference it. +# --------------------------------------------------------------------------- + +locals { + # Parent MG: the platform "Landing Zones" MG if supplied, else the tenant root. + application_platform_parent_id = ( + var.platform_landing_zones_mg_id != "" + ? var.platform_landing_zones_mg_id + : "/providers/Microsoft.Management/managementGroups/${var.tenant_root_management_group_id}" + ) +} + +resource "azurerm_management_group" "application_platform" { + count = var.create_application_platform_mg ? 1 : 0 + name = var.application_platform_mg_name + display_name = var.application_platform_mg_display_name + parent_management_group_id = local.application_platform_parent_id + + lifecycle { + # Don't claw back subscriptions associated out of band (e.g. by subscription_association below). + ignore_changes = [subscription_ids] + } +} + +data "azurerm_management_group" "application_platform" { + count = var.create_application_platform_mg ? 0 : 1 + name = var.application_platform_mg_name +} + +locals { + application_platform_mg_id = ( + var.create_application_platform_mg + ? azurerm_management_group.application_platform[0].id + : data.azurerm_management_group.application_platform[0].id + ) +} + +output "application_platform_mg_id" { + description = "Resource ID of the Application Platform management group (created here, or referenced)." + value = local.application_platform_mg_id +} + +output "application_platform_mg_created" { + description = "True if this run created the Application Platform management group." + value = var.create_application_platform_mg +} diff --git a/azure/landing-zones/application-platform/monitoring.tf b/azure/landing-zones/application-platform/monitoring.tf new file mode 100644 index 0000000..d07e4c4 --- /dev/null +++ b/azure/landing-zones/application-platform/monitoring.tf @@ -0,0 +1,74 @@ +# --------------------------------------------------------------------------- +# Central monitoring + threat protection for the landing zone. +# --------------------------------------------------------------------------- + +locals { + create_workspace = var.central_log_analytics_workspace_id == "" +} + +resource "azurerm_resource_group" "monitoring" { + count = local.create_workspace ? 1 : 0 + name = "rg-application-platform-monitoring" + location = var.location + tags = var.platform_tags +} + +resource "azurerm_log_analytics_workspace" "central" { + count = local.create_workspace ? 1 : 0 + name = "law-application-platform" + location = azurerm_resource_group.monitoring[0].location + resource_group_name = azurerm_resource_group.monitoring[0].name + sku = "PerGB2018" + retention_in_days = var.log_analytics_retention_days + tags = var.platform_tags +} + +locals { + central_log_analytics_workspace_id = ( + local.create_workspace + ? azurerm_log_analytics_workspace.central[0].id + : var.central_log_analytics_workspace_id + ) +} + +# Microsoft Sentinel onto the central workspace (SIEM for agent activity). +resource "azurerm_sentinel_log_analytics_workspace_onboarding" "central" { + count = local.create_workspace ? 1 : 0 + workspace_id = local.central_log_analytics_workspace_id +} + +# Activity log -> central workspace at the Application Platform MG scope. +resource "azurerm_monitor_diagnostic_setting" "mg_activity" { + name = "to-central-law" + target_resource_id = local.application_platform_mg_id + log_analytics_workspace_id = local.central_log_analytics_workspace_id + + enabled_log { category = "Administrative" } + enabled_log { category = "Security" } + enabled_log { category = "Policy" } +} + +# Defender for Cloud — AI workloads threat protection (prompt injection / +# anomalous usage) on the insurance workload subscription. +resource "azurerm_security_center_subscription_pricing" "ai" { + count = var.enable_defender_ai_threat_protection ? 1 : 0 + provider = azurerm.workload + tier = "Standard" + resource_type = "AI" +} + +resource "azurerm_security_center_subscription_pricing" "key_vaults" { + provider = azurerm.workload + tier = "Standard" + resource_type = "KeyVaults" +} + +resource "azurerm_security_center_subscription_pricing" "arm" { + provider = azurerm.workload + tier = "Standard" + resource_type = "Arm" +} + +output "central_log_analytics_workspace_id" { + value = local.central_log_analytics_workspace_id +} diff --git a/azure/landing-zones/application-platform/networking.tf b/azure/landing-zones/application-platform/networking.tf new file mode 100644 index 0000000..963514c --- /dev/null +++ b/azure/landing-zones/application-platform/networking.tf @@ -0,0 +1,123 @@ +# --------------------------------------------------------------------------- +# Workload spoke network for the insurance-app Application landing zone. +# Lives in the workload subscription; peered to the connectivity hub. +# --------------------------------------------------------------------------- + +resource "azurerm_resource_group" "spoke_network" { + provider = azurerm.workload + name = "rg-insurance-app-network" + location = var.location + tags = var.platform_tags +} + +resource "azurerm_virtual_network" "spoke" { + provider = azurerm.workload + name = "vnet-insurance-app" + location = azurerm_resource_group.spoke_network.location + resource_group_name = azurerm_resource_group.spoke_network.name + address_space = var.spoke_address_space + tags = var.platform_tags +} + +resource "azurerm_subnet" "private_endpoints" { + provider = azurerm.workload + name = "snet-privateendpoints" + resource_group_name = azurerm_resource_group.spoke_network.name + virtual_network_name = azurerm_virtual_network.spoke.name + address_prefixes = [var.subnet_prefixes.private_endpoints] +} + +# Delegated subnet for Power Platform VNet injection (enterprise policy). +resource "azurerm_subnet" "power_platform" { + provider = azurerm.workload + name = "snet-powerplatform" + resource_group_name = azurerm_resource_group.spoke_network.name + virtual_network_name = azurerm_virtual_network.spoke.name + address_prefixes = [var.subnet_prefixes.power_platform] + + delegation { + name = "powerplatform-vnet-injection" + service_delegation { + name = "Microsoft.PowerPlatform/enterprisePolicies" + actions = ["Microsoft.Network/virtualNetworks/subnets/join/action"] + } + } +} + +resource "azurerm_subnet" "apim" { + provider = azurerm.workload + name = "snet-apim" + resource_group_name = azurerm_resource_group.spoke_network.name + virtual_network_name = azurerm_virtual_network.spoke.name + address_prefixes = [var.subnet_prefixes.apim] +} + +resource "azurerm_subnet" "workload_compute" { + provider = azurerm.workload + name = "snet-workload-compute" + resource_group_name = azurerm_resource_group.spoke_network.name + virtual_network_name = azurerm_virtual_network.spoke.name + address_prefixes = [var.subnet_prefixes.workload_compute] +} + +# Route everything not local out through the hub Azure Firewall. +resource "azurerm_route_table" "spoke" { + provider = azurerm.workload + name = "rt-insurance-app-default" + location = azurerm_resource_group.spoke_network.location + resource_group_name = azurerm_resource_group.spoke_network.name + tags = var.platform_tags + + route { + name = "to-hub-firewall" + address_prefix = "0.0.0.0/0" + next_hop_type = "VirtualAppliance" + next_hop_in_ip_address = var.hub_firewall_private_ip + } +} + +resource "azurerm_subnet_route_table_association" "apim" { + provider = azurerm.workload + subnet_id = azurerm_subnet.apim.id + route_table_id = azurerm_route_table.spoke.id +} + +resource "azurerm_subnet_route_table_association" "workload_compute" { + provider = azurerm.workload + subnet_id = azurerm_subnet.workload_compute.id + route_table_id = azurerm_route_table.spoke.id +} + +# Peering: spoke -> hub. +resource "azurerm_virtual_network_peering" "spoke_to_hub" { + provider = azurerm.workload + name = "peer-insurance-app-to-hub" + resource_group_name = azurerm_resource_group.spoke_network.name + virtual_network_name = azurerm_virtual_network.spoke.name + remote_virtual_network_id = var.hub_vnet_id + allow_forwarded_traffic = true + allow_virtual_network_access = true + use_remote_gateways = true +} + +# NOTE: the hub -> spoke peering, DNS forwarding, and the Private DNS zone +# virtual-network links are owned by the connectivity-hub configuration (the +# Private DNS zones live in the platform connectivity subscription, so their VNet +# links must be created there). After this module runs, the platform team links +# `azurerm_virtual_network.spoke.id` to each zone in var.platform_private_dns_zone_ids +# — or, more commonly, the ALZ "Deploy Private DNS zone group" policy does it +# automatically when private endpoints are created in the spoke. The zone IDs are +# passed through as an output so that step can be automated. + +output "spoke_vnet_id" { + value = azurerm_virtual_network.spoke.id +} + +output "subnet_ids" { + value = { + private_endpoints = azurerm_subnet.private_endpoints.id + power_platform = azurerm_subnet.power_platform.id + apim = azurerm_subnet.apim.id + workload_compute = azurerm_subnet.workload_compute.id + } +} diff --git a/azure/landing-zones/application-platform/outputs.tf b/azure/landing-zones/application-platform/outputs.tf new file mode 100644 index 0000000..75c6e2f --- /dev/null +++ b/azure/landing-zones/application-platform/outputs.tf @@ -0,0 +1,21 @@ +# Aggregated outputs consumed by ../policy-as-code and ../workloads/insurance-app. + +output "landing_zone_summary" { + description = "Key identifiers for downstream modules." + value = { + application_platform_mg_id = local.application_platform_mg_id + application_platform_mg_created = var.create_application_platform_mg + insurance_subscription_id = local.insurance_subscription_id + platform_private_dns_zone_ids = var.platform_private_dns_zone_ids + spoke_vnet_id = azurerm_virtual_network.spoke.id + subnet_ids = { + private_endpoints = azurerm_subnet.private_endpoints.id + power_platform = azurerm_subnet.power_platform.id + apim = azurerm_subnet.apim.id + workload_compute = azurerm_subnet.workload_compute.id + } + central_log_analytics_workspace_id = local.central_log_analytics_workspace_id + policy_remediation_identity_id = azurerm_user_assigned_identity.policy_remediation.id + ai_agents_group_object_id = azuread_group.ai_agents.object_id + } +} diff --git a/azure/landing-zones/application-platform/providers.tf b/azure/landing-zones/application-platform/providers.tf new file mode 100644 index 0000000..fcf19b3 --- /dev/null +++ b/azure/landing-zones/application-platform/providers.tf @@ -0,0 +1,32 @@ +terraform { + required_version = ">= 1.6.0" + + required_providers { + azurerm = { + source = "hashicorp/azurerm" + version = "~> 4.0" + } + azuread = { + source = "hashicorp/azuread" + version = "~> 3.0" + } + } + + # Recommended: remote state in a platform-owned storage account, not local. + # backend "azurerm" {} +} + +provider "azurerm" { + features {} + subscription_id = var.platform_management_subscription_id +} + +# Aliased provider scoped to the workload subscription (used for the spoke network +# resources that live inside the vended Application landing zone). +provider "azurerm" { + alias = "workload" + features {} + subscription_id = var.insurance_subscription_id +} + +provider "azuread" {} diff --git a/azure/landing-zones/application-platform/subscription-vending.tf b/azure/landing-zones/application-platform/subscription-vending.tf new file mode 100644 index 0000000..b2e5239 --- /dev/null +++ b/azure/landing-zones/application-platform/subscription-vending.tf @@ -0,0 +1,70 @@ +# --------------------------------------------------------------------------- +# Insurance-app Application landing zone — subscription association / vending. +# +# Either move an existing subscription under the Application Platform MG, or vend +# a brand-new one from the billing scope. Apply platform tags + a budget. +# --------------------------------------------------------------------------- + +locals { + vend_new_subscription = var.insurance_subscription_id == "" && var.billing_scope_id != "" +} + +resource "azurerm_subscription" "insurance" { + count = local.vend_new_subscription ? 1 : 0 + subscription_name = "sub-insurance-agent-platform-prod" + billing_scope_id = var.billing_scope_id + alias = "insurance-agent-platform-prod" + tags = var.platform_tags +} + +locals { + insurance_subscription_id = ( + local.vend_new_subscription + ? azurerm_subscription.insurance[0].subscription_id + : var.insurance_subscription_id + ) + insurance_subscription_resource_id = "/subscriptions/${local.insurance_subscription_id}" +} + +# Place the workload subscription under the Application Platform MG. +resource "azurerm_management_group_subscription_association" "insurance" { + management_group_id = local.application_platform_mg_id + subscription_id = local.insurance_subscription_resource_id +} + +# Guardrail budget on the workload subscription. +resource "azurerm_consumption_budget_subscription" "insurance" { + name = "budget-insurance-agent-platform" + subscription_id = local.insurance_subscription_resource_id + amount = 5000 + time_grain = "Monthly" + + time_period { + start_date = formatdate("YYYY-MM-01'T'00:00:00'Z'", timestamp()) + } + + notification { + enabled = true + threshold = 80 + operator = "GreaterThanOrEqualTo" + threshold_type = "Actual" + contact_roles = ["Owner", "Contributor"] + } + + notification { + enabled = true + threshold = 100 + operator = "GreaterThanOrEqualTo" + threshold_type = "Forecasted" + contact_roles = ["Owner"] + } + + lifecycle { + ignore_changes = [time_period] + } +} + +output "insurance_subscription_id" { + description = "Subscription ID hosting the insurance-app Application landing zone." + value = local.insurance_subscription_id +} diff --git a/azure/landing-zones/application-platform/terraform.tfvars.example b/azure/landing-zones/application-platform/terraform.tfvars.example new file mode 100644 index 0000000..4fce9fc --- /dev/null +++ b/azure/landing-zones/application-platform/terraform.tfvars.example @@ -0,0 +1,27 @@ +# Copy to terraform.tfvars and fill in. Do NOT commit the real file. + +platform_management_subscription_id = "00000000-0000-0000-0000-000000000000" +insurance_subscription_id = "11111111-1111-1111-1111-111111111111" # leave "" to vend a new one +billing_scope_id = "" # required only when vending + +location = "eastus2" +tenant_root_management_group_id = "22222222-2222-2222-2222-222222222222" +platform_landing_zones_mg_id = "/providers/Microsoft.Management/managementGroups/alz-landingzones" +application_platform_mg_name = "alz-application-platform" + +hub_vnet_id = "/subscriptions/.../resourceGroups/rg-hub-network/providers/Microsoft.Network/virtualNetworks/vnet-hub" +hub_firewall_private_ip = "10.0.1.4" + +platform_private_dns_zone_ids = { + "privatelink.openai.azure.com" = "/subscriptions/.../resourceGroups/rg-hub-dns/providers/Microsoft.Network/privateDnsZones/privatelink.openai.azure.com" + "privatelink.cognitiveservices.azure.com" = "/subscriptions/.../resourceGroups/rg-hub-dns/providers/Microsoft.Network/privateDnsZones/privatelink.cognitiveservices.azure.com" + "privatelink.search.windows.net" = "/subscriptions/.../resourceGroups/rg-hub-dns/providers/Microsoft.Network/privateDnsZones/privatelink.search.windows.net" + "privatelink.vaultcore.azure.net" = "/subscriptions/.../resourceGroups/rg-hub-dns/providers/Microsoft.Network/privateDnsZones/privatelink.vaultcore.azure.net" + "privatelink.blob.core.windows.net" = "/subscriptions/.../resourceGroups/rg-hub-dns/providers/Microsoft.Network/privateDnsZones/privatelink.blob.core.windows.net" + "privatelink.azure-api.net" = "/subscriptions/.../resourceGroups/rg-hub-dns/providers/Microsoft.Network/privateDnsZones/privatelink.azure-api.net" + "privatelink.api.azureml.ms" = "/subscriptions/.../resourceGroups/rg-hub-dns/providers/Microsoft.Network/privateDnsZones/privatelink.api.azureml.ms" + "privatelink.notebooks.azure.net" = "/subscriptions/.../resourceGroups/rg-hub-dns/providers/Microsoft.Network/privateDnsZones/privatelink.notebooks.azure.net" +} + +central_log_analytics_workspace_id = "" # set to reuse the platform workspace +enable_defender_ai_threat_protection = true diff --git a/azure/landing-zones/application-platform/variables.tf b/azure/landing-zones/application-platform/variables.tf new file mode 100644 index 0000000..26e94d4 --- /dev/null +++ b/azure/landing-zones/application-platform/variables.tf @@ -0,0 +1,130 @@ +variable "platform_management_subscription_id" { + description = "Subscription ID of the platform management subscription (where Terraform authenticates and where the central Log Analytics workspace lives if you create one here)." + type = string +} + +variable "insurance_subscription_id" { + description = "Subscription ID for the insurance-app Application landing zone. This subscription is moved under the Application Platform management group. Leave empty to vend a new one (requires var.billing_scope_id)." + type = string + default = "" +} + +variable "billing_scope_id" { + description = "EA/MCA billing scope used to vend a new subscription when insurance_subscription_id is empty. Example: /providers/Microsoft.Billing/billingAccounts/xxxx/billingProfiles/yyyy/invoiceSections/zzzz." + type = string + default = "" +} + +variable "location" { + description = "Primary Azure region for the landing zone (must be in the policy allow-list)." + type = string + default = "eastus2" +} + +variable "tenant_root_management_group_id" { + description = "Tenant root management group ID (the tenant ID), used as a fallback parent if the platform Landing Zones MG is not supplied." + type = string +} + +variable "platform_landing_zones_mg_id" { + description = "Resource ID of the existing platform 'Landing Zones' management group to nest the Application Platform MG under. If empty, the Application Platform MG is created directly under the tenant root." + type = string + default = "" +} + +variable "create_application_platform_mg" { + description = "If true (default), create the Application Platform management group — i.e. you don't have an Application Platform landing zone yet. Set to false to reference an existing one by name." + type = bool + default = true +} + +variable "application_platform_mg_name" { + description = "Name (ID) of the Application Platform management group to create or reference." + type = string + default = "alz-application-platform" +} + +variable "application_platform_mg_display_name" { + description = "Display name for the Application Platform management group." + type = string + default = "Application Platform" +} + +# --------------------------------------------------------------------------- +# Connectivity hub references (platform-owned) +# --------------------------------------------------------------------------- +variable "hub_vnet_id" { + description = "Resource ID of the connectivity hub VNet to peer the workload spoke with." + type = string +} + +variable "hub_firewall_private_ip" { + description = "Private IP of the hub Azure Firewall; used as the next hop for the spoke's default route." + type = string +} + +variable "platform_private_dns_zone_ids" { + description = "Map of Private DNS zone name => resource ID for the PaaS the workload uses (privatelink.openai.azure.com, privatelink.cognitiveservices.azure.com, privatelink.search.windows.net, privatelink.vaultcore.azure.net, privatelink.blob.core.windows.net, privatelink.azure-api.net, privatelink.api.azureml.ms, privatelink.notebooks.azure.net)." + type = map(string) +} + +# --------------------------------------------------------------------------- +# Spoke network +# --------------------------------------------------------------------------- +variable "spoke_address_space" { + description = "Address space for the workload spoke VNet." + type = list(string) + default = ["10.40.0.0/22"] +} + +variable "subnet_prefixes" { + description = "Subnet prefixes inside the spoke VNet." + type = map(string) + default = { + private_endpoints = "10.40.0.0/24" + power_platform = "10.40.1.0/24" # delegated for Power Platform VNet injection + apim = "10.40.2.0/24" + workload_compute = "10.40.3.0/24" + } +} + +# --------------------------------------------------------------------------- +# Monitoring +# --------------------------------------------------------------------------- +variable "central_log_analytics_workspace_id" { + description = "Existing platform Log Analytics workspace resource ID to reuse. If empty, a new one is created in the platform management subscription." + type = string + default = "" +} + +variable "log_analytics_retention_days" { + description = "Retention (days) when this module creates the workspace." + type = number + default = 90 +} + +variable "enable_defender_ai_threat_protection" { + description = "Enable Microsoft Defender for Cloud AI workloads plan on the workload subscription." + type = bool + default = true +} + +# --------------------------------------------------------------------------- +# Tagging +# --------------------------------------------------------------------------- +variable "platform_tags" { + description = "Tags applied to platform-managed resources in this landing zone." + type = map(string) + default = { + managedBy = "platform-team" + iac = "terraform" + landingZone = "application-platform" + workload = "insurance-agent-platform" + } +} + +variable "ai_agents_group_name" { + description = "Display name of the Entra security group that all agent identities are placed into (Conditional Access target)." + type = string + default = "ai-agents" +} diff --git a/azure/policy-as-code/README.md b/azure/policy-as-code/README.md new file mode 100644 index 0000000..d36659d --- /dev/null +++ b/azure/policy-as-code/README.md @@ -0,0 +1,42 @@ +# Policy as code — AI agent governance + +Azure Policy definitions and the `ai-agent-governance` initiative (policy set) +that the platform team assigns at the **Application Platform management group** +scope. This is the "governed with policy as code" requirement from the CAF +AI-agent guidance. + +## Contents + +| Path | Purpose | +|---|---| +| `definitions/allowed-ai-locations.json` | Deny AI/Cognitive Services/Search/AI Foundry/APIM/Key Vault resources outside the approved region list. | +| `definitions/allowed-aoai-model-deployments.json` | Deny Azure OpenAI / AI Foundry model deployments that are not on the approved `model.name` (and optionally `model.version`) list. | +| `definitions/deny-ai-public-network-access.json` | Deny Cognitive Services accounts (incl. OpenAI), AI Search, ML/AI Foundry workspaces, Key Vault, and Storage with `publicNetworkAccess` enabled — forces private endpoints. | +| `definitions/deny-cognitive-services-local-auth.json` | Deny Cognitive Services / OpenAI accounts that allow local (key) auth — forces Entra ID auth so every call is attributable to an agent identity. | +| `definitions/audit-managed-identity-on-agents.json` | Audit AI-hosting resources (Cognitive Services, App Service/Functions, Container Apps, AML online endpoints) that do not have a managed identity assigned. | +| `definitions/require-diagnostic-settings-ai.json` | `deployIfNotExists` — ensure Cognitive Services / OpenAI / AI Search / ML workspaces / APIM stream diagnostics to the central Log Analytics workspace. | +| `definitions/require-agent-resource-tags.json` | `modify`/`deny` — require `agentOwner`, `agentPurpose`, `dataClassification`, and `expiresOn` tags on resource groups and AI resources. | +| `initiative/ai-agent-governance-initiative.json` | The policy set bundling all of the above with sensible default parameters. | +| `initiative/policy_assignments.tf` | Publishes the definitions + initiative and assigns the initiative at the Application Platform MG with a user-assigned identity for the `deployIfNotExists`/`modify` effects. | + +## Deploy + +```bash +cd azure/policy-as-code/initiative +terraform init +terraform apply \ + -var "management_group_id=/providers/Microsoft.Management/managementGroups/alz-application-platform" \ + -var "central_log_analytics_workspace_id=/subscriptions/.../workspaces/law-application-platform" \ + -var "policy_remediation_identity_id=/subscriptions/.../userAssignedIdentities/id-ai-agent-policy-remediation" \ + -var 'allowed_locations=["eastus2","swedencentral"]' \ + -var 'allowed_aoai_models=[{name="gpt-4o"},{name="gpt-4o-mini"},{name="text-embedding-3-large"}]' +``` + +(Or wire the values straight from the landing-zone module's `landing_zone_summary` output.) + +## Adaptive by design + +`allowed_locations`, `allowed_aoai_models`, the required tag list, and the +diagnostic-settings target are all initiative parameters — tighten or relax the +posture by changing the assignment, not the code. Add an +`azurerm_management_group_policy_exemption` for break-glass when genuinely needed. diff --git a/azure/policy-as-code/definitions/allowed-ai-locations.json b/azure/policy-as-code/definitions/allowed-ai-locations.json new file mode 100644 index 0000000..1e366d9 --- /dev/null +++ b/azure/policy-as-code/definitions/allowed-ai-locations.json @@ -0,0 +1,53 @@ +{ + "properties": { + "displayName": "AI agent platform - allowed locations", + "policyType": "Custom", + "mode": "Indexed", + "description": "Restricts the regions in which AI agent platform resources (Cognitive Services / Azure OpenAI, AI Search, ML / AI Foundry workspaces, API Management, Key Vault, Storage) can be created. Aligns with the CAF guidance to constrain where agents and their data live.", + "metadata": { + "category": "AI agent governance", + "version": "1.0.0" + }, + "parameters": { + "allowedLocations": { + "type": "Array", + "metadata": { + "displayName": "Allowed locations", + "description": "List of regions where AI agent platform resources may be deployed.", + "strongType": "location" + }, + "defaultValue": ["eastus2", "swedencentral"] + }, + "effect": { + "type": "String", + "allowedValues": ["Deny", "Audit", "Disabled"], + "defaultValue": "Deny", + "metadata": { "displayName": "Effect" } + } + }, + "policyRule": { + "if": { + "allOf": [ + { + "field": "type", + "in": [ + "Microsoft.CognitiveServices/accounts", + "Microsoft.Search/searchServices", + "Microsoft.MachineLearningServices/workspaces", + "Microsoft.ApiManagement/service", + "Microsoft.KeyVault/vaults", + "Microsoft.Storage/storageAccounts" + ] + }, + { + "field": "location", + "notIn": "[parameters('allowedLocations')]" + } + ] + }, + "then": { + "effect": "[parameters('effect')]" + } + } + } +} diff --git a/azure/policy-as-code/definitions/allowed-aoai-model-deployments.json b/azure/policy-as-code/definitions/allowed-aoai-model-deployments.json new file mode 100644 index 0000000..7f55baa --- /dev/null +++ b/azure/policy-as-code/definitions/allowed-aoai-model-deployments.json @@ -0,0 +1,45 @@ +{ + "properties": { + "displayName": "AI agent platform - allow only approved model deployments", + "policyType": "Custom", + "mode": "All", + "description": "Denies Azure OpenAI / Azure AI Foundry model deployments whose model name is not on the approved list. Implements the CAF recommendation to allow only governed/approved models for agents.", + "metadata": { + "category": "AI agent governance", + "version": "1.0.0" + }, + "parameters": { + "allowedModelNames": { + "type": "Array", + "metadata": { + "displayName": "Allowed model names", + "description": "List of approved model names, e.g. gpt-4o, gpt-4o-mini, o3-mini, text-embedding-3-large." + }, + "defaultValue": ["gpt-4o", "gpt-4o-mini", "text-embedding-3-large"] + }, + "effect": { + "type": "String", + "allowedValues": ["Deny", "Audit", "Disabled"], + "defaultValue": "Deny", + "metadata": { "displayName": "Effect" } + } + }, + "policyRule": { + "if": { + "allOf": [ + { + "field": "type", + "equals": "Microsoft.CognitiveServices/accounts/deployments" + }, + { + "field": "Microsoft.CognitiveServices/accounts/deployments/model.name", + "notIn": "[parameters('allowedModelNames')]" + } + ] + }, + "then": { + "effect": "[parameters('effect')]" + } + } + } +} diff --git a/azure/policy-as-code/definitions/audit-managed-identity-on-agents.json b/azure/policy-as-code/definitions/audit-managed-identity-on-agents.json new file mode 100644 index 0000000..50b81da --- /dev/null +++ b/azure/policy-as-code/definitions/audit-managed-identity-on-agents.json @@ -0,0 +1,44 @@ +{ + "properties": { + "displayName": "AI agent platform - agent-hosting resources must use a managed identity", + "policyType": "Custom", + "mode": "Indexed", + "description": "Audits resources that commonly host or invoke AI agents (Cognitive Services / OpenAI accounts, App Service & Functions, Container Apps, Azure ML online endpoints) that do not have a system- or user-assigned managed identity. Supports the CAF recommendation to prefer managed identities over secrets for agent identities.", + "metadata": { + "category": "AI agent governance", + "version": "1.0.0" + }, + "parameters": { + "effect": { + "type": "String", + "allowedValues": ["Audit", "Deny", "Disabled"], + "defaultValue": "Audit", + "metadata": { "displayName": "Effect" } + } + }, + "policyRule": { + "if": { + "allOf": [ + { + "field": "type", + "in": [ + "Microsoft.CognitiveServices/accounts", + "Microsoft.Web/sites", + "Microsoft.App/containerApps", + "Microsoft.MachineLearningServices/workspaces/onlineEndpoints" + ] + }, + { + "anyOf": [ + { "field": "identity.type", "exists": false }, + { "field": "identity.type", "equals": "None" } + ] + } + ] + }, + "then": { + "effect": "[parameters('effect')]" + } + } + } +} diff --git a/azure/policy-as-code/definitions/deny-ai-public-network-access.json b/azure/policy-as-code/definitions/deny-ai-public-network-access.json new file mode 100644 index 0000000..22ffd9d --- /dev/null +++ b/azure/policy-as-code/definitions/deny-ai-public-network-access.json @@ -0,0 +1,56 @@ +{ + "properties": { + "displayName": "AI agent platform - deny public network access on AI and supporting resources", + "policyType": "Custom", + "mode": "Indexed", + "description": "Denies creation/update of Cognitive Services (incl. Azure OpenAI), AI Search, ML / AI Foundry workspaces, Key Vault, and Storage accounts that allow public network access. Forces private endpoints, per the CAF AI landing zone network-isolation guidance.", + "metadata": { + "category": "AI agent governance", + "version": "1.0.0" + }, + "parameters": { + "effect": { + "type": "String", + "allowedValues": ["Deny", "Audit", "Disabled"], + "defaultValue": "Deny", + "metadata": { "displayName": "Effect" } + } + }, + "policyRule": { + "if": { + "anyOf": [ + { + "allOf": [ + { "field": "type", "in": [ + "Microsoft.CognitiveServices/accounts", + "Microsoft.MachineLearningServices/workspaces" + ] }, + { "field": "Microsoft.CognitiveServices/accounts/publicNetworkAccess", "notEquals": "Disabled" } + ] + }, + { + "allOf": [ + { "field": "type", "equals": "Microsoft.Search/searchServices" }, + { "field": "Microsoft.Search/searchServices/publicNetworkAccess", "notEquals": "disabled" } + ] + }, + { + "allOf": [ + { "field": "type", "equals": "Microsoft.KeyVault/vaults" }, + { "field": "Microsoft.KeyVault/vaults/publicNetworkAccess", "notEquals": "Disabled" } + ] + }, + { + "allOf": [ + { "field": "type", "equals": "Microsoft.Storage/storageAccounts" }, + { "field": "Microsoft.Storage/storageAccounts/publicNetworkAccess", "notEquals": "Disabled" } + ] + } + ] + }, + "then": { + "effect": "[parameters('effect')]" + } + } + } +} diff --git a/azure/policy-as-code/definitions/deny-cognitive-services-local-auth.json b/azure/policy-as-code/definitions/deny-cognitive-services-local-auth.json new file mode 100644 index 0000000..9361179 --- /dev/null +++ b/azure/policy-as-code/definitions/deny-cognitive-services-local-auth.json @@ -0,0 +1,31 @@ +{ + "properties": { + "displayName": "AI agent platform - disable local (key) auth on Cognitive Services / Azure OpenAI", + "policyType": "Custom", + "mode": "Indexed", + "description": "Denies Cognitive Services / Azure OpenAI accounts that allow local API-key authentication. Forces Microsoft Entra ID auth so every model/agent call is attributable to an agent identity (Entra Agent ID / managed identity), per the CAF AI-agent identity guidance.", + "metadata": { + "category": "AI agent governance", + "version": "1.0.0" + }, + "parameters": { + "effect": { + "type": "String", + "allowedValues": ["Deny", "Audit", "Disabled"], + "defaultValue": "Deny", + "metadata": { "displayName": "Effect" } + } + }, + "policyRule": { + "if": { + "allOf": [ + { "field": "type", "equals": "Microsoft.CognitiveServices/accounts" }, + { "field": "Microsoft.CognitiveServices/accounts/disableLocalAuth", "notEquals": true } + ] + }, + "then": { + "effect": "[parameters('effect')]" + } + } + } +} diff --git a/azure/policy-as-code/definitions/require-agent-resource-tags.json b/azure/policy-as-code/definitions/require-agent-resource-tags.json new file mode 100644 index 0000000..cecac29 --- /dev/null +++ b/azure/policy-as-code/definitions/require-agent-resource-tags.json @@ -0,0 +1,59 @@ +{ + "properties": { + "displayName": "AI agent platform - require ownership & classification tags", + "policyType": "Custom", + "mode": "Indexed", + "description": "Requires governance tags (agentOwner, agentPurpose, dataClassification, expiresOn) on resource groups and AI resources so every agent and its supporting resources can be traced to an owner, purpose, data class, and review date. Implements the CAF 'inventory and govern agents' recommendation. Use the 'modify' effect to add a default where missing.", + "metadata": { + "category": "AI agent governance", + "version": "1.0.0" + }, + "parameters": { + "requiredTagNames": { + "type": "Array", + "metadata": { + "displayName": "Required tag names" + }, + "defaultValue": ["agentOwner", "agentPurpose", "dataClassification", "expiresOn"] + }, + "effect": { + "type": "String", + "allowedValues": ["Deny", "Audit", "Disabled"], + "defaultValue": "Deny", + "metadata": { "displayName": "Effect" } + } + }, + "policyRule": { + "if": { + "allOf": [ + { + "field": "type", + "in": [ + "Microsoft.Resources/subscriptions/resourceGroups", + "Microsoft.CognitiveServices/accounts", + "Microsoft.Search/searchServices", + "Microsoft.MachineLearningServices/workspaces", + "Microsoft.ApiManagement/service", + "Microsoft.Web/sites", + "Microsoft.App/containerApps" + ] + }, + { + "count": { + "value": "[parameters('requiredTagNames')]", + "name": "tagName", + "where": { + "value": "[empty(field(concat('tags[', current('tagName'), ']')))]", + "equals": true + } + }, + "greater": 0 + } + ] + }, + "then": { + "effect": "[parameters('effect')]" + } + } + } +} diff --git a/azure/policy-as-code/definitions/require-diagnostic-settings-ai.json b/azure/policy-as-code/definitions/require-diagnostic-settings-ai.json new file mode 100644 index 0000000..04044f1 --- /dev/null +++ b/azure/policy-as-code/definitions/require-diagnostic-settings-ai.json @@ -0,0 +1,100 @@ +{ + "properties": { + "displayName": "AI agent platform - stream AI resource diagnostics to central Log Analytics", + "policyType": "Custom", + "mode": "Indexed", + "description": "Ensures Cognitive Services / Azure OpenAI accounts send all logs and metrics to the central Log Analytics workspace via a diagnostic setting (deployIfNotExists). Provides the traceability/observability the CAF AI-agent guidance requires. Companion assignments cover AI Search, ML workspaces, and API Management with the same pattern.", + "metadata": { + "category": "AI agent governance", + "version": "1.0.0" + }, + "parameters": { + "logAnalyticsWorkspaceId": { + "type": "String", + "metadata": { + "displayName": "Central Log Analytics workspace resource ID", + "strongType": "Microsoft.OperationalInsights/workspaces" + } + }, + "diagnosticSettingName": { + "type": "String", + "defaultValue": "to-central-law", + "metadata": { "displayName": "Diagnostic setting name" } + }, + "effect": { + "type": "String", + "allowedValues": ["DeployIfNotExists", "AuditIfNotExists", "Disabled"], + "defaultValue": "DeployIfNotExists", + "metadata": { "displayName": "Effect" } + } + }, + "policyRule": { + "if": { + "field": "type", + "equals": "Microsoft.CognitiveServices/accounts" + }, + "then": { + "effect": "[parameters('effect')]", + "details": { + "type": "Microsoft.Insights/diagnosticSettings", + "name": "[parameters('diagnosticSettingName')]", + "roleDefinitionIds": [ + "/providers/Microsoft.Authorization/roleDefinitions/749f88d5-cbae-40b8-bcfc-e573ddc772fa", + "/providers/Microsoft.Authorization/roleDefinitions/92aaf0da-9dab-42b6-94a3-d43ce8d16293" + ], + "existenceCondition": { + "allOf": [ + { + "field": "Microsoft.Insights/diagnosticSettings/workspaceId", + "equals": "[parameters('logAnalyticsWorkspaceId')]" + }, + { + "field": "Microsoft.Insights/diagnosticSettings/logs.enabled", + "equals": "true" + } + ] + }, + "deployment": { + "properties": { + "mode": "incremental", + "parameters": { + "resourceName": { "value": "[field('name')]" }, + "logAnalyticsWorkspaceId": { "value": "[parameters('logAnalyticsWorkspaceId')]" }, + "diagnosticSettingName": { "value": "[parameters('diagnosticSettingName')]" }, + "location": { "value": "[field('location')]" } + }, + "template": { + "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "parameters": { + "resourceName": { "type": "string" }, + "logAnalyticsWorkspaceId": { "type": "string" }, + "diagnosticSettingName": { "type": "string" }, + "location": { "type": "string" } + }, + "resources": [ + { + "type": "Microsoft.CognitiveServices/accounts/providers/diagnosticSettings", + "apiVersion": "2021-05-01-preview", + "name": "[concat(parameters('resourceName'), '/Microsoft.Insights/', parameters('diagnosticSettingName'))]", + "location": "[parameters('location')]", + "properties": { + "workspaceId": "[parameters('logAnalyticsWorkspaceId')]", + "logs": [ + { "categoryGroup": "allLogs", "enabled": true }, + { "categoryGroup": "audit", "enabled": true } + ], + "metrics": [ + { "category": "AllMetrics", "enabled": true } + ] + } + } + ] + } + } + } + } + } + } + } +} diff --git a/azure/policy-as-code/initiative/ai-agent-governance-initiative.json b/azure/policy-as-code/initiative/ai-agent-governance-initiative.json new file mode 100644 index 0000000..391cb95 --- /dev/null +++ b/azure/policy-as-code/initiative/ai-agent-governance-initiative.json @@ -0,0 +1,147 @@ +{ + "properties": { + "displayName": "AI agent governance (insurance agent platform)", + "policyType": "Custom", + "description": "Bundles the AI-agent governance guardrails from the CAF 'Govern and secure AI agents across your organization' guidance: constrained regions, approved models only, no public network access, no local auth on Cognitive Services, managed identity on agent hosts, diagnostics to central Log Analytics, and ownership/classification tagging. Assigned at the Application Platform management group.", + "metadata": { + "category": "AI agent governance", + "version": "1.0.0", + "note": "Reference spec. Replace the {managementGroupScope} token with /providers/Microsoft.Management/managementGroups/ before deploying via Azure CLI/Bicep, or use the authoritative Terraform in ./policy_assignments.tf which resolves the definition IDs automatically." + }, + "parameters": { + "allowedLocations": { + "type": "Array", + "metadata": { "displayName": "Allowed locations for AI resources" }, + "defaultValue": ["eastus2", "swedencentral"] + }, + "allowedModelNames": { + "type": "Array", + "metadata": { "displayName": "Approved Azure OpenAI / AI Foundry model names" }, + "defaultValue": ["gpt-4o", "gpt-4o-mini", "text-embedding-3-large"] + }, + "requiredTagNames": { + "type": "Array", + "metadata": { "displayName": "Required governance tags" }, + "defaultValue": ["agentOwner", "agentPurpose", "dataClassification", "expiresOn"] + }, + "logAnalyticsWorkspaceId": { + "type": "String", + "metadata": { + "displayName": "Central Log Analytics workspace resource ID", + "strongType": "Microsoft.OperationalInsights/workspaces" + } + }, + "locationEffect": { + "type": "String", + "allowedValues": ["Deny", "Audit", "Disabled"], + "defaultValue": "Deny", + "metadata": { "displayName": "Effect - allowed locations" } + }, + "modelEffect": { + "type": "String", + "allowedValues": ["Deny", "Audit", "Disabled"], + "defaultValue": "Deny", + "metadata": { "displayName": "Effect - approved models" } + }, + "publicNetworkEffect": { + "type": "String", + "allowedValues": ["Deny", "Audit", "Disabled"], + "defaultValue": "Deny", + "metadata": { "displayName": "Effect - deny public network access" } + }, + "localAuthEffect": { + "type": "String", + "allowedValues": ["Deny", "Audit", "Disabled"], + "defaultValue": "Deny", + "metadata": { "displayName": "Effect - disable local auth" } + }, + "managedIdentityEffect": { + "type": "String", + "allowedValues": ["Audit", "Deny", "Disabled"], + "defaultValue": "Audit", + "metadata": { "displayName": "Effect - managed identity on agent hosts" } + }, + "diagnosticsEffect": { + "type": "String", + "allowedValues": ["DeployIfNotExists", "AuditIfNotExists", "Disabled"], + "defaultValue": "DeployIfNotExists", + "metadata": { "displayName": "Effect - diagnostic settings" } + }, + "tagEffect": { + "type": "String", + "allowedValues": ["Deny", "Audit", "Disabled"], + "defaultValue": "Deny", + "metadata": { "displayName": "Effect - required tags" } + } + }, + "policyDefinitionGroups": [ + { "name": "identity", "displayName": "Agent identity & access" }, + { "name": "network", "displayName": "Network isolation" }, + { "name": "model", "displayName": "Approved models & regions" }, + { "name": "observability", "displayName": "Traceability & observability" }, + { "name": "inventory", "displayName": "Inventory & ownership" } + ], + "policyDefinitions": [ + { + "policyDefinitionReferenceId": "allowedAiLocations", + "policyDefinitionId": "{managementGroupScope}/providers/Microsoft.Authorization/policyDefinitions/aiagent-allowed-locations", + "groupNames": ["model"], + "parameters": { + "allowedLocations": { "value": "[parameters('allowedLocations')]" }, + "effect": { "value": "[parameters('locationEffect')]" } + } + }, + { + "policyDefinitionReferenceId": "allowedAoaiModels", + "policyDefinitionId": "{managementGroupScope}/providers/Microsoft.Authorization/policyDefinitions/aiagent-allowed-aoai-models", + "groupNames": ["model"], + "parameters": { + "allowedModelNames": { "value": "[parameters('allowedModelNames')]" }, + "effect": { "value": "[parameters('modelEffect')]" } + } + }, + { + "policyDefinitionReferenceId": "denyAiPublicNetwork", + "policyDefinitionId": "{managementGroupScope}/providers/Microsoft.Authorization/policyDefinitions/aiagent-deny-public-network", + "groupNames": ["network"], + "parameters": { + "effect": { "value": "[parameters('publicNetworkEffect')]" } + } + }, + { + "policyDefinitionReferenceId": "denyCognitiveLocalAuth", + "policyDefinitionId": "{managementGroupScope}/providers/Microsoft.Authorization/policyDefinitions/aiagent-deny-local-auth", + "groupNames": ["identity"], + "parameters": { + "effect": { "value": "[parameters('localAuthEffect')]" } + } + }, + { + "policyDefinitionReferenceId": "auditManagedIdentityOnAgents", + "policyDefinitionId": "{managementGroupScope}/providers/Microsoft.Authorization/policyDefinitions/aiagent-audit-managed-identity", + "groupNames": ["identity"], + "parameters": { + "effect": { "value": "[parameters('managedIdentityEffect')]" } + } + }, + { + "policyDefinitionReferenceId": "requireDiagnosticsAi", + "policyDefinitionId": "{managementGroupScope}/providers/Microsoft.Authorization/policyDefinitions/aiagent-require-diagnostics", + "groupNames": ["observability"], + "parameters": { + "logAnalyticsWorkspaceId": { "value": "[parameters('logAnalyticsWorkspaceId')]" }, + "effect": { "value": "[parameters('diagnosticsEffect')]" } + } + }, + { + "policyDefinitionReferenceId": "requireAgentTags", + "policyDefinitionId": "{managementGroupScope}/providers/Microsoft.Authorization/policyDefinitions/aiagent-require-tags", + "groupNames": ["inventory"], + "parameters": { + "requiredTagNames": { "value": "[parameters('requiredTagNames')]" }, + "effect": { "value": "[parameters('tagEffect')]" } + } + } + ] + } +} diff --git a/azure/policy-as-code/initiative/policy_assignments.tf b/azure/policy-as-code/initiative/policy_assignments.tf new file mode 100644 index 0000000..29dcb64 --- /dev/null +++ b/azure/policy-as-code/initiative/policy_assignments.tf @@ -0,0 +1,159 @@ +# --------------------------------------------------------------------------- +# Publishes the AI-agent governance policy definitions + initiative at the +# Application Platform management group and assigns the initiative there. +# --------------------------------------------------------------------------- + +locals { + definitions_dir = "${path.module}/../definitions" + + # name (in ARM) => source JSON file + policy_files = { + "aiagent-allowed-locations" = "allowed-ai-locations.json" + "aiagent-allowed-aoai-models" = "allowed-aoai-model-deployments.json" + "aiagent-deny-public-network" = "deny-ai-public-network-access.json" + "aiagent-deny-local-auth" = "deny-cognitive-services-local-auth.json" + "aiagent-audit-managed-identity" = "audit-managed-identity-on-agents.json" + "aiagent-require-diagnostics" = "require-diagnostic-settings-ai.json" + "aiagent-require-tags" = "require-agent-resource-tags.json" + } +} + +resource "azurerm_policy_definition" "this" { + for_each = local.policy_files + + name = each.key + management_group_id = var.management_group_id + + policy_type = "Custom" + mode = jsondecode(file("${local.definitions_dir}/${each.value}")).properties.mode + display_name = jsondecode(file("${local.definitions_dir}/${each.value}")).properties.displayName + description = jsondecode(file("${local.definitions_dir}/${each.value}")).properties.description + metadata = jsonencode(jsondecode(file("${local.definitions_dir}/${each.value}")).properties.metadata) + parameters = jsonencode(jsondecode(file("${local.definitions_dir}/${each.value}")).properties.parameters) + policy_rule = jsonencode(jsondecode(file("${local.definitions_dir}/${each.value}")).properties.policyRule) +} + +resource "azurerm_policy_set_definition" "ai_agent_governance" { + name = "aiagent-governance" + policy_type = "Custom" + display_name = "AI agent governance (insurance agent platform)" + description = "CAF AI-agent governance guardrails: constrained regions, approved models only, no public network access, no local auth, managed identity on agent hosts, diagnostics to central Log Analytics, ownership/classification tagging." + management_group_id = var.management_group_id + + metadata = jsonencode({ + category = "AI agent governance" + version = "1.0.0" + }) + + parameters = jsonencode({ + allowedLocations = { type = "Array", defaultValue = var.allowed_locations } + allowedModelNames = { type = "Array", defaultValue = var.allowed_aoai_model_names } + requiredTagNames = { type = "Array", defaultValue = var.required_tag_names } + logAnalyticsWorkspaceId = { type = "String" } + }) + + policy_definition_reference { + reference_id = "allowedAiLocations" + policy_definition_id = azurerm_policy_definition.this["aiagent-allowed-locations"].id + parameter_values = jsonencode({ + allowedLocations = { value = "[parameters('allowedLocations')]" } + effect = { value = "Deny" } + }) + } + + policy_definition_reference { + reference_id = "allowedAoaiModels" + policy_definition_id = azurerm_policy_definition.this["aiagent-allowed-aoai-models"].id + parameter_values = jsonencode({ + allowedModelNames = { value = "[parameters('allowedModelNames')]" } + effect = { value = "Deny" } + }) + } + + policy_definition_reference { + reference_id = "denyAiPublicNetwork" + policy_definition_id = azurerm_policy_definition.this["aiagent-deny-public-network"].id + parameter_values = jsonencode({ + effect = { value = "Deny" } + }) + } + + policy_definition_reference { + reference_id = "denyCognitiveLocalAuth" + policy_definition_id = azurerm_policy_definition.this["aiagent-deny-local-auth"].id + parameter_values = jsonencode({ + effect = { value = "Deny" } + }) + } + + policy_definition_reference { + reference_id = "auditManagedIdentityOnAgents" + policy_definition_id = azurerm_policy_definition.this["aiagent-audit-managed-identity"].id + parameter_values = jsonencode({ + effect = { value = "Audit" } + }) + } + + policy_definition_reference { + reference_id = "requireDiagnosticsAi" + policy_definition_id = azurerm_policy_definition.this["aiagent-require-diagnostics"].id + parameter_values = jsonencode({ + logAnalyticsWorkspaceId = { value = "[parameters('logAnalyticsWorkspaceId')]" } + effect = { value = "DeployIfNotExists" } + }) + } + + policy_definition_reference { + reference_id = "requireAgentTags" + policy_definition_id = azurerm_policy_definition.this["aiagent-require-tags"].id + parameter_values = jsonencode({ + requiredTagNames = { value = "[parameters('requiredTagNames')]" } + effect = { value = "Deny" } + }) + } +} + +resource "azurerm_management_group_policy_assignment" "ai_agent_governance" { + name = "aiagent-governance" + display_name = "AI agent governance (insurance agent platform)" + description = "Assigned per CAF 'Govern and secure AI agents across your organization'. deployIfNotExists/modify effects use a platform-managed user-assigned identity." + policy_definition_id = azurerm_policy_set_definition.ai_agent_governance.id + management_group_id = var.management_group_id + enforce = var.enforcement_mode == "Default" + location = var.remediation_identity_location + + identity { + type = "UserAssigned" + identity_ids = [var.policy_remediation_identity_id] + } + + parameters = jsonencode({ + allowedLocations = { value = var.allowed_locations } + allowedModelNames = { value = var.allowed_aoai_model_names } + requiredTagNames = { value = var.required_tag_names } + logAnalyticsWorkspaceId = { value = var.central_log_analytics_workspace_id } + }) + + non_compliance_message { + content = "This resource violates the AI agent governance baseline (region, model, public network, local auth, managed identity, diagnostics, or required tags). See azure/policy-as-code/README.md." + } +} + +# Break-glass exemption hook — left empty by default; populate with a scope and a +# justification when a genuine, time-boxed exception is approved. +# resource "azurerm_management_group_policy_exemption" "break_glass" { +# name = "aiagent-governance-breakglass" +# management_group_id = var.management_group_id +# policy_assignment_id = azurerm_management_group_policy_assignment.ai_agent_governance.id +# exemption_category = "Waiver" +# expires_on = "2026-12-31T00:00:00Z" +# description = "Approved exception - ." +# } + +output "initiative_id" { + value = azurerm_policy_set_definition.ai_agent_governance.id +} + +output "assignment_id" { + value = azurerm_management_group_policy_assignment.ai_agent_governance.id +} diff --git a/azure/policy-as-code/initiative/providers.tf b/azure/policy-as-code/initiative/providers.tf new file mode 100644 index 0000000..08bdd7d --- /dev/null +++ b/azure/policy-as-code/initiative/providers.tf @@ -0,0 +1,16 @@ +terraform { + required_version = ">= 1.6.0" + + required_providers { + azurerm = { + source = "hashicorp/azurerm" + version = "~> 4.0" + } + } + + # backend "azurerm" {} +} + +provider "azurerm" { + features {} +} diff --git a/azure/policy-as-code/initiative/variables.tf b/azure/policy-as-code/initiative/variables.tf new file mode 100644 index 0000000..6b371a0 --- /dev/null +++ b/azure/policy-as-code/initiative/variables.tf @@ -0,0 +1,44 @@ +variable "management_group_id" { + description = "Full resource ID of the Application Platform management group to publish the definitions/initiative into and assign at. e.g. /providers/Microsoft.Management/managementGroups/alz-application-platform" + type = string +} + +variable "central_log_analytics_workspace_id" { + description = "Central Log Analytics workspace resource ID for the require-diagnostics policy." + type = string +} + +variable "policy_remediation_identity_id" { + description = "User-assigned managed identity resource ID used by the assignment for deployIfNotExists/modify remediation. Created by the application-platform landing zone module." + type = string +} + +variable "remediation_identity_location" { + description = "Location for the policy assignment's identity block (must match a region; required when using a user-assigned identity)." + type = string + default = "eastus2" +} + +variable "allowed_locations" { + description = "Approved regions for AI resources." + type = list(string) + default = ["eastus2", "swedencentral"] +} + +variable "allowed_aoai_model_names" { + description = "Approved Azure OpenAI / AI Foundry model names." + type = list(string) + default = ["gpt-4o", "gpt-4o-mini", "text-embedding-3-large"] +} + +variable "required_tag_names" { + description = "Governance tags required on agent resources / resource groups." + type = list(string) + default = ["agentOwner", "agentPurpose", "dataClassification", "expiresOn"] +} + +variable "enforcement_mode" { + description = "Set to 'Default' to enforce or 'DoNotEnforce' for a dry run." + type = string + default = "Default" +} diff --git a/azure/profiles/README.md b/azure/profiles/README.md new file mode 100644 index 0000000..c7f1917 --- /dev/null +++ b/azure/profiles/README.md @@ -0,0 +1,49 @@ +# Deployment profiles + +Two ready-made variable sets that trade cost against production-readiness. See +[`../COSTS.md`](../COSTS.md) for the full breakdown. + +| Profile | Rough Azure cost (idle) | Posture | +|---|---|---| +| `dev-demo/` | **~$80–150 / month** | Public endpoints, no VNet, AI Search Basic 1×1, APIM Developer, one Power Platform environment, 1 agent, Log Analytics 30-day retention, Defender AI plan off, Key Vault purge-protection off. Run the governance policies in **Audit** mode and use **non-sensitive data only**. | +| `prod/` | **~$3,500–6,000+ / month** | Private endpoints + Internal-VNet APIM (Premium, zone-redundant), AI Search Standard 1×3, dev/test/prod environments, all four agents, 90-day retention, Defender AI plan on, Key Vault purge-protection on, policies in **Deny/DeployIfNotExists** (enforced). | + +The defaults baked into the modules sit **between** these (private endpoints on, +AI Search Standard 1×2, APIM Developer, three environments) — roughly +$600–750/month idle. + +## How to use + +Each profile has two files — one per Terraform module: + +```bash +# 1) Application Platform landing zone +cd azure/landing-zones/application-platform +terraform apply \ + -var-file=../../profiles/prod/application-platform.tfvars \ + -var-file=ids.tfvars # your real subscription/tenant/hub IDs (gitignored) + +# 2) Policy as code (no profile needed; pass the landing-zone outputs) +cd ../../policy-as-code/initiative +terraform apply -var management_group_id=... -var central_log_analytics_workspace_id=... -var policy_remediation_identity_id=... + +# 3) Insurance workload +cd ../../workloads/insurance-app +terraform apply \ + -var-file=../../profiles/prod/insurance-app.tfvars \ + -var-file=ids.tfvars +``` + +> The `*_subscription_id` / `tenant_*` / `hub_*` values in these profile files +> are **placeholders** — put your real IDs in a separate, gitignored `ids.tfvars` +> (or `-var` flags) so the profile stays a pure, shareable posture preset. + +## Switching demo → prod + +`dev-demo` → `prod` flips: `enable_private_endpoints`, `enable_vnet_injection`, +`apim_sku_name`, `ai_search_sku`/replicas/partitions, `key_vault_purge_protection`, +the set of `power_platform_environments` and `agents`, `log_analytics_retention_days`, +and `enable_defender_ai_threat_protection`. Changing the AI Search SKU/replicas, +the APIM SKU, or toggling private endpoints/VNet injection forces resource +**replacement** — plan a maintenance window, or (better) build the prod stack in +its own subscription and migrate, then tear down the demo (see `../COSTS.md`). diff --git a/azure/profiles/dev-demo/application-platform.tfvars b/azure/profiles/dev-demo/application-platform.tfvars new file mode 100644 index 0000000..f93f480 --- /dev/null +++ b/azure/profiles/dev-demo/application-platform.tfvars @@ -0,0 +1,30 @@ +# --------------------------------------------------------------------------- +# Profile: dev-demo -> azure/landing-zones/application-platform +# Cheapest viable footprint. Pair with profiles/dev-demo/insurance-app.tfvars. +# Replace the placeholder IDs below via a separate gitignored ids.tfvars / -var. +# --------------------------------------------------------------------------- + +# --- placeholders: override these -------------------------------------------- +platform_management_subscription_id = "00000000-0000-0000-0000-000000000000" +insurance_subscription_id = "00000000-0000-0000-0000-000000000000" +tenant_root_management_group_id = "00000000-0000-0000-0000-000000000000" +# A demo doesn't need a real connectivity hub; these are unused when the +# workload profile sets enable_private_endpoints=false / enable_vnet_injection=false, +# but the variables are still required by the module, so leave dummies: +hub_vnet_id = "/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/none/providers/Microsoft.Network/virtualNetworks/none" +hub_firewall_private_ip = "10.0.0.4" +platform_private_dns_zone_ids = {} + +# --- posture ---------------------------------------------------------------- +location = "eastus2" +create_application_platform_mg = true # demo: create a throwaway MG you can delete after +application_platform_mg_name = "alz-application-platform-demo" +platform_landing_zones_mg_id = "" # nest under tenant root for a demo + +# Cheap monitoring: short retention, no Defender AI plan, no separate workspace +# unless you want one. +central_log_analytics_workspace_id = "" # creates a small workspace in the platform sub +log_analytics_retention_days = 30 +enable_defender_ai_threat_protection = false + +billing_scope_id = "" # only needed when vending a brand-new subscription diff --git a/azure/profiles/dev-demo/insurance-app.tfvars b/azure/profiles/dev-demo/insurance-app.tfvars new file mode 100644 index 0000000..bb8e6a0 --- /dev/null +++ b/azure/profiles/dev-demo/insurance-app.tfvars @@ -0,0 +1,61 @@ +# --------------------------------------------------------------------------- +# Profile: dev-demo -> azure/workloads/insurance-app +# ~$80-150/month idle. PUBLIC endpoints, no VNet — run the ai-agent-governance +# policy initiative in Audit mode and use NON-SENSITIVE demo data only. +# Replace the placeholder IDs below via a separate gitignored ids.tfvars / -var. +# --------------------------------------------------------------------------- + +# --- placeholders: override these -------------------------------------------- +insurance_subscription_id = "00000000-0000-0000-0000-000000000000" +central_log_analytics_workspace_id = "/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/rg-application-platform-monitoring/providers/Microsoft.OperationalInsights/workspaces/law-application-platform" +ai_agents_group_object_id = "00000000-0000-0000-0000-000000000000" +# No spoke network in a demo; leave empty (enable_* toggles below are false): +spoke_subnet_ids = {} + +# --- cost / network posture ------------------------------------------------- +location = "eastus2" +enable_private_endpoints = false # public endpoints (demo only!) +enable_vnet_injection = false # no Power Platform VNet injection; APIM not Internal +key_vault_purge_protection = false # so you can tear down + redeploy the same name + +# Cheapest connector gateway: serverless, pay-per-call. (Consumption SKU cannot +# do Internal VNet, which is fine here since enable_vnet_injection=false.) +apim_sku_name = "Consumption_0" + +# Cheapest grounding search: Basic tier, single unit. (Use "free" for $0 / 3 +# indexes if you don't need an SLA at all.) +ai_search_sku = "basic" +ai_search_replica_count = 1 +ai_search_partition_count = 1 + +purview_account_id = "" # skip Purview in a demo +cicd_github_repo = "" # skip workload-identity federation in a demo + +# --- one environment, one agent --------------------------------------------- +power_platform_location = "unitedstates" +power_platform_environments = { + "insurance-demo" = { environment_type = "Sandbox", description = "Insurance agent platform - demo" } +} +maker_sharing_limit = 20 + +agents = { + "claims-triage-agent" = { + purpose = "First-notice-of-loss triage and routing (demo)" + needs_openai = true + needs_search = true + needs_content_safety = true + } +} + +# Just the small/cheap models for a demo. +approved_model_deployments = [ + { name = "gpt-4o-mini", model = "gpt-4o-mini", version = "2024-07-18", capacity = 10 }, + { name = "text-embedding-3-large", model = "text-embedding-3-large", version = "1", capacity = 10 }, +] + +agent_owner = "demo-team@adapt.example" +data_classification = "Internal" +expires_on = "2026-09-30" + +apim_publisher_name = "Adapt Insurance Platform (demo)" +apim_publisher_email = "demo-team@adapt.example" diff --git a/azure/profiles/prod/application-platform.tfvars b/azure/profiles/prod/application-platform.tfvars new file mode 100644 index 0000000..e15f958 --- /dev/null +++ b/azure/profiles/prod/application-platform.tfvars @@ -0,0 +1,44 @@ +# --------------------------------------------------------------------------- +# Profile: prod -> azure/landing-zones/application-platform +# Production posture. Pair with profiles/prod/insurance-app.tfvars. +# Replace the placeholder IDs below via a separate gitignored ids.tfvars / -var. +# --------------------------------------------------------------------------- + +# --- placeholders: override these -------------------------------------------- +platform_management_subscription_id = "00000000-0000-0000-0000-000000000000" +insurance_subscription_id = "11111111-1111-1111-1111-111111111111" +tenant_root_management_group_id = "22222222-2222-2222-2222-222222222222" + +# Real connectivity-hub references (required for private endpoints + VNet injection): +hub_vnet_id = "/subscriptions/.../resourceGroups/rg-hub-network/providers/Microsoft.Network/virtualNetworks/vnet-hub" +hub_firewall_private_ip = "10.0.1.4" +platform_private_dns_zone_ids = { + "privatelink.openai.azure.com" = "/subscriptions/.../resourceGroups/rg-hub-dns/providers/Microsoft.Network/privateDnsZones/privatelink.openai.azure.com" + "privatelink.cognitiveservices.azure.com" = "/subscriptions/.../resourceGroups/rg-hub-dns/providers/Microsoft.Network/privateDnsZones/privatelink.cognitiveservices.azure.com" + "privatelink.search.windows.net" = "/subscriptions/.../resourceGroups/rg-hub-dns/providers/Microsoft.Network/privateDnsZones/privatelink.search.windows.net" + "privatelink.vaultcore.azure.net" = "/subscriptions/.../resourceGroups/rg-hub-dns/providers/Microsoft.Network/privateDnsZones/privatelink.vaultcore.azure.net" + "privatelink.blob.core.windows.net" = "/subscriptions/.../resourceGroups/rg-hub-dns/providers/Microsoft.Network/privateDnsZones/privatelink.blob.core.windows.net" + "privatelink.azure-api.net" = "/subscriptions/.../resourceGroups/rg-hub-dns/providers/Microsoft.Network/privateDnsZones/privatelink.azure-api.net" + "privatelink.api.azureml.ms" = "/subscriptions/.../resourceGroups/rg-hub-dns/providers/Microsoft.Network/privateDnsZones/privatelink.api.azureml.ms" + "privatelink.notebooks.azure.net" = "/subscriptions/.../resourceGroups/rg-hub-dns/providers/Microsoft.Network/privateDnsZones/privatelink.notebooks.azure.net" +} + +# --- posture ---------------------------------------------------------------- +location = "eastus2" + +# Set create_application_platform_mg = false if your tenant already has an +# Application Platform landing zone; otherwise leave true to create it. +create_application_platform_mg = true +application_platform_mg_name = "alz-application-platform" +platform_landing_zones_mg_id = "/providers/Microsoft.Management/managementGroups/alz-landingzones" + +# Monitoring: reuse the platform Log Analytics workspace if you have one; +# otherwise a 90-day workspace is created. Defender AI threat protection ON. +central_log_analytics_workspace_id = "" # e.g. "/subscriptions/.../workspaces/law-platform-prod" +log_analytics_retention_days = 90 +enable_defender_ai_threat_protection = true + +# Subnet sizing for a prod spoke (override if it collides with your address plan): +spoke_address_space = ["10.40.0.0/22"] + +billing_scope_id = "" # only needed when vending a brand-new subscription diff --git a/azure/profiles/prod/insurance-app.tfvars b/azure/profiles/prod/insurance-app.tfvars new file mode 100644 index 0000000..fcfee77 --- /dev/null +++ b/azure/profiles/prod/insurance-app.tfvars @@ -0,0 +1,87 @@ +# --------------------------------------------------------------------------- +# Profile: prod -> azure/workloads/insurance-app +# ~$3,500-6,000+/month idle (APIM Premium + AI Search S1 multi-replica + private +# endpoints dominate). Private-endpoint only, Internal-VNet APIM, all four +# agents, policies enforced (Deny / DeployIfNotExists). +# Replace the placeholder IDs below via a separate gitignored ids.tfvars / -var. +# --------------------------------------------------------------------------- + +# --- placeholders: override these -------------------------------------------- +insurance_subscription_id = "11111111-1111-1111-1111-111111111111" +central_log_analytics_workspace_id = "/subscriptions/.../resourceGroups/rg-application-platform-monitoring/providers/Microsoft.OperationalInsights/workspaces/law-application-platform" +ai_agents_group_object_id = "33333333-3333-3333-3333-333333333333" +spoke_subnet_ids = { + private_endpoints = "/subscriptions/.../resourceGroups/rg-insurance-app-network/providers/Microsoft.Network/virtualNetworks/vnet-insurance-app/subnets/snet-privateendpoints" + power_platform = "/subscriptions/.../resourceGroups/rg-insurance-app-network/providers/Microsoft.Network/virtualNetworks/vnet-insurance-app/subnets/snet-powerplatform" + apim = "/subscriptions/.../resourceGroups/rg-insurance-app-network/providers/Microsoft.Network/virtualNetworks/vnet-insurance-app/subnets/snet-apim" + workload_compute = "/subscriptions/.../resourceGroups/rg-insurance-app-network/providers/Microsoft.Network/virtualNetworks/vnet-insurance-app/subnets/snet-workload-compute" +} + +# --- cost / network posture ------------------------------------------------- +location = "eastus2" +enable_private_endpoints = true +enable_vnet_injection = true +key_vault_purge_protection = true + +# Premium = Internal VNet + availability zones + multi-region. Bump the unit +# count (Premium_2, Premium_3, ...) for more throughput / more zones. +apim_sku_name = "Premium_1" + +# Standard (S1) search with query+indexing SLA (3 replicas). Scale partitions +# with index size. +ai_search_sku = "standard" +ai_search_replica_count = 3 +ai_search_partition_count = 1 + +purview_account_id = "/subscriptions/.../resourceGroups/rg-purview/providers/Microsoft.Purview/accounts/purview-adapt" +cicd_github_repo = "realjkg/adaptcloud" + +# --- dev / test / prod environments + all four agents ----------------------- +power_platform_location = "unitedstates" +power_platform_environments = { + "insurance-dev" = { environment_type = "Sandbox", description = "Insurance agent platform - development" } + "insurance-test" = { environment_type = "Sandbox", description = "Insurance agent platform - test/UAT" } + "insurance-prod" = { environment_type = "Production", description = "Insurance agent platform - production" } +} +maker_sharing_limit = 20 + +agents = { + "policy-intake-agent" = { + purpose = "Quote intake and policy document understanding" + needs_openai = true + needs_search = true + needs_content_safety = true + federated_subjects = ["repo:realjkg/adaptcloud:environment:insurance-prod"] + } + "claims-triage-agent" = { + purpose = "First-notice-of-loss triage and routing" + needs_openai = true + needs_search = true + needs_content_safety = true + } + "underwriting-copilot" = { + purpose = "Underwriter assistant: risk summarisation and pricing guidance" + needs_openai = true + needs_search = true + needs_content_safety = true + } + "fraud-signal-agent" = { + purpose = "Claims fraud-signal detection and case annotation" + needs_openai = true + needs_search = false + needs_content_safety = true + } +} + +approved_model_deployments = [ + { name = "gpt-4o", model = "gpt-4o", version = "2024-11-20", capacity = 30 }, + { name = "gpt-4o-mini", model = "gpt-4o-mini", version = "2024-07-18", capacity = 30 }, + { name = "text-embedding-3-large", model = "text-embedding-3-large", version = "1", capacity = 30 }, +] + +agent_owner = "insurance-engineering@adapt.example" +data_classification = "Confidential" +expires_on = "2026-12-31" + +apim_publisher_name = "Adapt Insurance Platform" +apim_publisher_email = "platform@adapt.example" diff --git a/azure/workloads/insurance-app/README.md b/azure/workloads/insurance-app/README.md new file mode 100644 index 0000000..1afaa3a --- /dev/null +++ b/azure/workloads/insurance-app/README.md @@ -0,0 +1,56 @@ +# Insurance agent platform — workload landing zone + +The insurance application, **built low-code first** (Power Platform + +Copilot Studio) and progressively connected to APIs, deployed inside the +`insurance-app` Application landing zone created by +`../../landing-zones/application-platform/`. + +This is where the agents live. Everything here runs behind the +`ai-agent-governance` policy initiative, so it is governed-by-default: +private endpoints only, approved regions/models only, Entra ID auth only, +diagnostics to the central workspace, and required ownership/classification tags. + +## What it provisions + +| File | Resources | +|---|---| +| `main.tf` | Resource groups, naming, common tags (incl. the policy-required `agentOwner` / `agentPurpose` / `dataClassification` / `expiresOn`). | +| `identity-agents.tf` | One identity **per agent** — `policy-intake-agent`, `claims-triage-agent`, `underwriting-copilot`, `fraud-signal-agent`. Each is a user-assigned managed identity (for the Azure-hosted tool/back-end) **and** registered as an Entra app for the Copilot Studio agent's **Entra Agent ID**. No client secrets. Scoped RBAC. All added to the `ai-agents` group (Conditional Access target). Workload identity federation for the CI/CD pipeline. | +| `key-vault.tf` | Workload Key Vault — RBAC mode, purge protection, **public access disabled**, private endpoint. Holds the few unavoidable secrets (e.g., partner API keys consumed only via APIM). | +| `power-platform.tf` | Three Dataverse environments (`insurance-dev/test/prod`), each turned into a **Managed Environment** (maker sharing limits, Solution Checker enforced on publish, weekly usage insights, env IP firewall pinned to the spoke ranges). VNet injection enterprise policy bound to `snet-powerplatform`. | +| `dlp-policy.tf` | A tenant **baseline** DLP policy + an environment-scoped DLP policy classifying the connector catalog into **Business / Non-Business / Blocked**, with custom connectors restricted to the APIM gateway host. | +| `connectors-apim.tf` | Internal-VNet **API Management** instance; imports the OpenAPI specs in `connectors/` as APIM APIs and exposes them as Power Platform **custom connectors** with Entra ID auth + per-agent subscription keys held in Key Vault. The "widest array" of template connectivity comes from the Business connector group in `dlp-policy.tf`; bespoke insurance APIs come through here. | +| `ai-foundry.tf` | Azure AI Foundry hub + project, an Azure OpenAI account (local auth disabled, public access disabled) with **only approved model deployments**, an Azure AI Search service (private) for grounding, and an **Azure AI Content Safety** resource with a default blocklist. | +| `observability.tf` | One Application Insights component per agent (workspace-based, pointed at the central Log Analytics workspace); diagnostic settings on APIM / OpenAI / Search / Key Vault; reference to the tenant Purview account for the AI audit / DSPM-for-AI connector. | +| `networking.tf` | Private endpoints (into `snet-privateendpoints`) for OpenAI, AI Search, Key Vault, Storage, AI Foundry, and APIM. No public ingress. | +| `connectors/*.openapi.yaml` | Sample bespoke insurance API specs (`insurance-policy-api`, `claims-api`) imported by `connectors-apim.tf`. | + +## Low-code first → API connectors (the progression) + +1. **Day 1 — low-code.** Makers build the claims/underwriting apps and Copilot + Studio agents in `insurance-dev` using the **Business** connector group + (Dataverse, SharePoint, Outlook, Teams, SQL, Azure Blob, Service Bus, Azure + OpenAI, AI Builder, DocuSign/Adobe Sign, Salesforce, Dynamics 365, …). The + DLP policy and Managed Environment settings are already enforced, so nothing + risky can be wired up. +2. **Week N — bespoke APIs.** Policy-admin, rating, and claims-core APIs are + published behind APIM (`connectors-apim.tf`) and surfaced as governed custom + connectors with Entra ID auth. The DLP policy only allows custom connectors + whose host is the APIM gateway — so makers can't hand-roll an ungoverned one. +3. **Always — agents are identities.** Each Copilot Studio agent authenticates + with its **Entra Agent ID**; each Azure-side tool/back-end uses the matching + **user-assigned managed identity**; both are in the `ai-agents` group and + subject to Conditional Access; every call is logged and attributable. + +## ALM + +Power Platform **pipelines** promote solutions `insurance-dev → test → prod`. +Solution Checker runs on every publish (Managed Environment). Connector +definitions, environment settings, and DLP policies are all in this Terraform — +the apps/agents themselves are in solution files managed by the workload team. + +## Inputs + +See `variables.tf` / `terraform.tfvars.example`. The networking/identity/log +inputs are designed to be fed straight from the application-platform module's +`landing_zone_summary` output. diff --git a/azure/workloads/insurance-app/ai-foundry.tf b/azure/workloads/insurance-app/ai-foundry.tf new file mode 100644 index 0000000..6c29155 --- /dev/null +++ b/azure/workloads/insurance-app/ai-foundry.tf @@ -0,0 +1,164 @@ +# --------------------------------------------------------------------------- +# AI plane: Azure AI Foundry hub + project, Azure OpenAI (approved models only, +# Entra-only auth), Azure AI Search for grounding, Azure AI Content Safety. +# Private-endpoint only and public access disabled when enable_private_endpoints +# = true (the secure default); the demo profile sets it false. See azure/COSTS.md. +# --------------------------------------------------------------------------- + +# --- supporting storage for the AI Foundry hub ---------------------------- +resource "azurerm_storage_account" "ai" { + name = "stinsai${substr(sha1(var.insurance_subscription_id), 0, 8)}" + location = azurerm_resource_group.ai.location + resource_group_name = azurerm_resource_group.ai.name + account_tier = "Standard" + account_replication_type = "ZRS" + min_tls_version = "TLS1_2" + public_network_access_enabled = !var.enable_private_endpoints # deny-ai-public-network-access policy + allow_nested_items_to_be_public = false + shared_access_key_enabled = false + tags = local.common_tags +} + +# --- Azure OpenAI account -------------------------------------------------- +resource "azurerm_cognitive_account" "openai" { + name = "aoai-insurance-app" + location = azurerm_resource_group.ai.location + resource_group_name = azurerm_resource_group.ai.name + kind = "OpenAI" + sku_name = "S0" + custom_subdomain_name = "aoai-insurance-app-${substr(sha1(var.insurance_subscription_id), 0, 6)}" + local_auth_enabled = false # deny-cognitive-services-local-auth policy + public_network_access_enabled = !var.enable_private_endpoints # deny-ai-public-network-access policy + + identity { type = "SystemAssigned" } + + network_acls { + default_action = var.enable_private_endpoints ? "Deny" : "Allow" + } + + tags = local.common_tags +} + +resource "azurerm_cognitive_deployment" "approved" { + for_each = { for d in var.approved_model_deployments : d.name => d } + name = each.value.name + cognitive_account_id = azurerm_cognitive_account.openai.id + + model { + format = "OpenAI" + name = each.value.model + version = each.value.version + } + + sku { + name = each.value.sku + capacity = each.value.capacity + } +} + +# --- Azure AI Content Safety ---------------------------------------------- +resource "azurerm_cognitive_account" "content_safety" { + name = "acs-insurance-app" + location = azurerm_resource_group.ai.location + resource_group_name = azurerm_resource_group.ai.name + kind = "ContentSafety" + sku_name = "S0" + custom_subdomain_name = "acs-insurance-app-${substr(sha1(var.insurance_subscription_id), 0, 6)}" + local_auth_enabled = false + public_network_access_enabled = !var.enable_private_endpoints + + identity { type = "SystemAssigned" } + + network_acls { default_action = var.enable_private_endpoints ? "Deny" : "Allow" } + tags = local.common_tags +} + +# Content Safety blocklists and prompt-shield (jailbreak) detection are +# data-plane features of this account, configured by the AI Foundry project / +# the agents at runtime (e.g. an "insurance-default-blocklist" with deny-listed +# terms, plus Prompt Shields enabled on every request). They are not ARM +# resources; provision them from the ALM pipeline against the Content Safety +# endpoint above using the agents' Entra ID auth. + +# --- Azure AI Search (grounding) ------------------------------------------ +resource "azurerm_search_service" "grounding" { + name = "srch-insurance-app" + location = azurerm_resource_group.ai.location + resource_group_name = azurerm_resource_group.ai.name + sku = var.ai_search_sku + local_authentication_enabled = false # force Entra ID auth + public_network_access_enabled = !var.enable_private_endpoints # deny-ai-public-network-access policy + partition_count = var.ai_search_sku == "free" ? null : var.ai_search_partition_count + replica_count = var.ai_search_sku == "free" ? null : var.ai_search_replica_count + + identity { type = "SystemAssigned" } + tags = local.common_tags +} + +# --- Azure AI Foundry hub + project --------------------------------------- +resource "azurerm_ai_foundry" "hub" { + name = "aif-insurance-app" + location = azurerm_resource_group.ai.location + resource_group_name = azurerm_resource_group.ai.name + storage_account_id = azurerm_storage_account.ai.id + key_vault_id = azurerm_key_vault.workload.id + public_network_access = var.enable_private_endpoints ? "Disabled" : "Enabled" + high_business_impact_enabled = true + + identity { type = "SystemAssigned" } + tags = local.common_tags +} + +resource "azurerm_ai_foundry_project" "insurance" { + name = "proj-insurance-agents" + location = azurerm_ai_foundry.hub.location + ai_services_hub_id = azurerm_ai_foundry.hub.id + tags = merge(local.common_tags, { agentName = "shared", agentPurpose = "insurance-agent-platform" }) + + identity { type = "SystemAssigned" } +} + +# Let the AI Foundry hub use the OpenAI / Search / Content Safety accounts. +resource "azurerm_role_assignment" "hub_openai" { + scope = azurerm_cognitive_account.openai.id + role_definition_name = "Cognitive Services OpenAI Contributor" + principal_id = azurerm_ai_foundry.hub.identity[0].principal_id +} + +resource "azurerm_role_assignment" "hub_search_contributor" { + scope = azurerm_search_service.grounding.id + role_definition_name = "Search Service Contributor" + principal_id = azurerm_ai_foundry.hub.identity[0].principal_id +} + +resource "azurerm_role_assignment" "hub_search_data" { + scope = azurerm_search_service.grounding.id + role_definition_name = "Search Index Data Contributor" + principal_id = azurerm_ai_foundry.hub.identity[0].principal_id +} + +# Diagnostics (policy also enforces). +resource "azurerm_monitor_diagnostic_setting" "openai" { + name = "to-central-law" + target_resource_id = azurerm_cognitive_account.openai.id + log_analytics_workspace_id = var.central_log_analytics_workspace_id + enabled_log { category_group = "allLogs" } + enabled_log { category_group = "audit" } + metric { category = "AllMetrics" } +} + +resource "azurerm_monitor_diagnostic_setting" "search" { + name = "to-central-law" + target_resource_id = azurerm_search_service.grounding.id + log_analytics_workspace_id = var.central_log_analytics_workspace_id + enabled_log { category = "OperationLogs" } + metric { category = "AllMetrics" } +} + +resource "azurerm_monitor_diagnostic_setting" "content_safety" { + name = "to-central-law" + target_resource_id = azurerm_cognitive_account.content_safety.id + log_analytics_workspace_id = var.central_log_analytics_workspace_id + enabled_log { category_group = "allLogs" } + metric { category = "AllMetrics" } +} diff --git a/azure/workloads/insurance-app/connectors-apim.tf b/azure/workloads/insurance-app/connectors-apim.tf new file mode 100644 index 0000000..3f72e9a --- /dev/null +++ b/azure/workloads/insurance-app/connectors-apim.tf @@ -0,0 +1,150 @@ +# --------------------------------------------------------------------------- +# API connectors plane: Azure API Management (internal VNet mode) fronting the +# bespoke insurance APIs. APIM gives us throttling, Entra ID validation, +# logging, and a single governed host that the DLP policy whitelists for custom +# connectors. The OpenAPI specs in ./connectors/ are imported as APIM APIs and +# can be one-click "Export to Power Platform" as custom connectors. +# --------------------------------------------------------------------------- + +resource "azurerm_api_management" "connectors" { + name = "apim-insurance-app" + location = azurerm_resource_group.workload.location + resource_group_name = azurerm_resource_group.workload.name + publisher_name = var.apim_publisher_name + publisher_email = var.apim_publisher_email + sku_name = var.apim_sku_name # Developer_1 (non-prod) | StandardV2_1 | Premium_1 (prod, VNet+zones) | Consumption_0 (serverless demo) + + # Internal VNet mode when enable_vnet_injection = true; "None" otherwise (and + # for the Consumption SKU, which does not support VNet integration). + virtual_network_type = var.enable_vnet_injection ? "Internal" : "None" + + dynamic "virtual_network_configuration" { + for_each = var.enable_vnet_injection ? [1] : [] + content { + subnet_id = var.spoke_subnet_ids["apim"] + } + } + + identity { type = "SystemAssigned" } + tags = local.common_tags +} + +# Validate the caller's Entra ID token on every request — agents call APIM with +# their managed identity / Entra Agent ID token, never an API key. +resource "azurerm_api_management_named_value" "tenant_id" { + name = "tenant-id" + resource_group_name = azurerm_resource_group.workload.name + api_management_name = azurerm_api_management.connectors.name + display_name = "tenant-id" + value = data.azurerm_client_config.current.tenant_id +} + +# Let APIM's managed identity read secrets from the workload Key Vault, so +# partner API keys (the few unavoidable secrets) can be wired as Key-Vault-backed +# named values instead of being committed to source. Create one +# `azurerm_api_management_named_value` per partner secret with a +# `value_from_key_vault { secret_id = "${azurerm_key_vault.workload.vault_uri}secrets/" }` +# block once the secret exists in the vault (populated out of band / by the ALM +# pipeline). +resource "azurerm_role_assignment" "apim_keyvault" { + scope = azurerm_key_vault.workload.id + role_definition_name = "Key Vault Secrets User" + principal_id = azurerm_api_management.connectors.identity[0].principal_id +} + +# Tenant-wide inbound policy: require a valid Entra ID JWT. +resource "azurerm_api_management_policy" "global" { + api_management_id = azurerm_api_management.connectors.id + depends_on = [azurerm_api_management_named_value.tenant_id] + xml_content = <<-XML + + + + + api://insurance-app-connectors + + + + + + + + + XML +} + +# --- import the bespoke insurance APIs from the OpenAPI specs -------------- +locals { + bespoke_apis = { + "insurance-policy-api" = { + path = "policy" + display_name = "Insurance Policy API" + spec_file = "${path.module}/connectors/insurance-policy-api.openapi.yaml" + } + "claims-api" = { + path = "claims" + display_name = "Claims API" + spec_file = "${path.module}/connectors/claims-api.openapi.yaml" + } + } +} + +resource "azurerm_api_management_api" "bespoke" { + for_each = local.bespoke_apis + name = each.key + resource_group_name = azurerm_resource_group.workload.name + api_management_name = azurerm_api_management.connectors.name + revision = "1" + display_name = each.value.display_name + path = each.value.path + protocols = ["https"] + subscription_required = false # auth is Entra ID JWT, not subscription keys + + import { + content_format = "openapi" + content_value = file(each.value.spec_file) + } +} + +resource "azurerm_application_insights" "apim" { + name = "appi-apim-insurance-app" + location = azurerm_resource_group.workload.location + resource_group_name = azurerm_resource_group.workload.name + application_type = "web" + workspace_id = var.central_log_analytics_workspace_id + tags = local.common_tags +} + +# Log every APIM request to Application Insights (which is workspace-based, so it +# also lands in the central Log Analytics workspace). Diagnostics below are also +# enforced by the ai-agent-governance policy initiative. +resource "azurerm_api_management_logger" "appinsights" { + name = "appinsights" + api_management_name = azurerm_api_management.connectors.name + resource_group_name = azurerm_resource_group.workload.name + resource_id = azurerm_application_insights.apim.id + + application_insights { + connection_string = azurerm_application_insights.apim.connection_string + } +} + +resource "azurerm_monitor_diagnostic_setting" "apim" { + name = "to-central-law" + target_resource_id = azurerm_api_management.connectors.id + log_analytics_workspace_id = var.central_log_analytics_workspace_id + enabled_log { category = "GatewayLogs" } + enabled_log { category = "WebSocketConnectionLogs" } + metric { category = "AllMetrics" } +} + +# NOTE on custom connectors: once APIM is up, run "APIM -> API -> Export -> +# Power Platform: Power Automate / Power Apps" (or the equivalent +# az apim api ... / solution import) to publish each API as a governed custom +# connector into the insurance environments. The DLP policy already restricts +# custom connectors to `apim-insurance-app.azure-api.net`, so no other custom +# connector can be created. This step lives in the ALM pipeline. + +output "apim_gateway_url" { + value = azurerm_api_management.connectors.gateway_url +} diff --git a/azure/workloads/insurance-app/connectors/claims-api.openapi.yaml b/azure/workloads/insurance-app/connectors/claims-api.openapi.yaml new file mode 100644 index 0000000..6d0021e --- /dev/null +++ b/azure/workloads/insurance-app/connectors/claims-api.openapi.yaml @@ -0,0 +1,147 @@ +openapi: 3.0.3 +info: + title: Claims API + version: "1.0.0" + description: >- + Bespoke claims API for first-notice-of-loss, triage and status, fronted by + Azure API Management and surfaced to Power Platform / Copilot Studio as a + governed custom connector. Auth is a Microsoft Entra ID bearer token (the + calling agent's managed identity / Entra Agent ID), validated by APIM. +servers: + - url: https://apim-insurance-app.azure-api.net/claims +security: + - entraId: [] +paths: + /claims: + post: + operationId: SubmitFirstNoticeOfLoss + summary: Submit a first notice of loss (FNOL) + requestBody: + required: true + content: + application/json: + schema: { $ref: "#/components/schemas/FnolRequest" } + responses: + "201": + description: Claim created + content: + application/json: + schema: { $ref: "#/components/schemas/Claim" } + get: + operationId: ListClaims + summary: List claims + parameters: + - name: policyId + in: query + schema: { type: string } + - name: state + in: query + schema: { type: string, enum: [open, triaging, in_review, approved, denied, paid, closed] } + responses: + "200": + description: A page of claims + content: + application/json: + schema: + type: array + items: { $ref: "#/components/schemas/Claim" } + /claims/{claimId}: + get: + operationId: GetClaim + summary: Get a claim by id + parameters: + - name: claimId + in: path + required: true + schema: { type: string } + responses: + "200": + description: The claim + content: + application/json: + schema: { $ref: "#/components/schemas/Claim" } + "404": { description: Not found } + /claims/{claimId}/triage: + post: + operationId: TriageClaim + summary: Record a triage decision (severity, routing queue, fraud score) + parameters: + - name: claimId + in: path + required: true + schema: { type: string } + requestBody: + required: true + content: + application/json: + schema: { $ref: "#/components/schemas/TriageDecision" } + responses: + "200": + description: Triage recorded + content: + application/json: + schema: { $ref: "#/components/schemas/Claim" } + /claims/{claimId}/payout: + post: + operationId: AuthorisePayout + summary: Authorise a payout — human confirmation required in Copilot Studio + parameters: + - name: claimId + in: path + required: true + schema: { type: string } + requestBody: + required: true + content: + application/json: + schema: + type: object + required: [amount, currency] + properties: + amount: { type: number, format: double } + currency: { type: string, example: "USD" } + approverUpn: { type: string } + responses: + "200": + description: Payout authorised + content: + application/json: + schema: { $ref: "#/components/schemas/Claim" } +components: + securitySchemes: + entraId: + type: oauth2 + flows: + clientCredentials: + tokenUrl: https://login.microsoftonline.com/common/oauth2/v2.0/token + scopes: + api://insurance-app-connectors/.default: Access the insurance connectors API + schemas: + FnolRequest: + type: object + required: [policyId, lossType, lossDate] + properties: + policyId: { type: string } + lossType: { type: string, example: "collision" } + lossDate: { type: string, format: date } + description: { type: string } + reportedByUpn: { type: string } + TriageDecision: + type: object + required: [severity, routingQueue] + properties: + severity: { type: string, enum: [low, medium, high, catastrophic] } + routingQueue: { type: string } + fraudScore: { type: number, format: double, minimum: 0, maximum: 1 } + notes: { type: string } + Claim: + type: object + properties: + claimId: { type: string } + policyId: { type: string } + state: { type: string, enum: [open, triaging, in_review, approved, denied, paid, closed] } + lossType: { type: string } + lossDate: { type: string, format: date } + reserveAmount: { type: number, format: double } + fraudScore: { type: number, format: double } + createdAt: { type: string, format: date-time } diff --git a/azure/workloads/insurance-app/connectors/insurance-policy-api.openapi.yaml b/azure/workloads/insurance-app/connectors/insurance-policy-api.openapi.yaml new file mode 100644 index 0000000..3fd2592 --- /dev/null +++ b/azure/workloads/insurance-app/connectors/insurance-policy-api.openapi.yaml @@ -0,0 +1,108 @@ +openapi: 3.0.3 +info: + title: Insurance Policy API + version: "1.0.0" + description: >- + Bespoke insurance policy-administration API, fronted by Azure API Management + and surfaced to Power Platform / Copilot Studio as a governed custom + connector. Authentication is a Microsoft Entra ID bearer token (the calling + agent's managed identity / Entra Agent ID); APIM validates it. No API keys. +servers: + - url: https://apim-insurance-app.azure-api.net/policy +security: + - entraId: [] +paths: + /policies: + get: + operationId: ListPolicies + summary: List policies + parameters: + - name: customerId + in: query + schema: { type: string } + - name: status + in: query + schema: { type: string, enum: [active, lapsed, cancelled, quoted] } + responses: + "200": + description: A page of policies + content: + application/json: + schema: + type: array + items: { $ref: "#/components/schemas/Policy" } + post: + operationId: CreateQuote + summary: Create a quote (draft policy) + requestBody: + required: true + content: + application/json: + schema: { $ref: "#/components/schemas/QuoteRequest" } + responses: + "201": + description: Quote created + content: + application/json: + schema: { $ref: "#/components/schemas/Policy" } + /policies/{policyId}: + get: + operationId: GetPolicy + summary: Get a policy by id + parameters: + - name: policyId + in: path + required: true + schema: { type: string } + responses: + "200": + description: The policy + content: + application/json: + schema: { $ref: "#/components/schemas/Policy" } + "404": { description: Not found } + /policies/{policyId}:bind: + post: + operationId: BindPolicy + summary: Bind (issue) a quoted policy — human confirmation required in Copilot Studio + parameters: + - name: policyId + in: path + required: true + schema: { type: string } + responses: + "200": + description: Policy bound + content: + application/json: + schema: { $ref: "#/components/schemas/Policy" } +components: + securitySchemes: + entraId: + type: oauth2 + flows: + clientCredentials: + tokenUrl: https://login.microsoftonline.com/common/oauth2/v2.0/token + scopes: + api://insurance-app-connectors/.default: Access the insurance connectors API + schemas: + Policy: + type: object + properties: + policyId: { type: string } + customerId: { type: string } + product: { type: string, example: "auto" } + status: { type: string, enum: [active, lapsed, cancelled, quoted] } + premiumAnnual: { type: number, format: double } + effectiveDate: { type: string, format: date } + expiryDate: { type: string, format: date } + QuoteRequest: + type: object + required: [customerId, product] + properties: + customerId: { type: string } + product: { type: string } + coverageLevel: { type: string, enum: [basic, standard, premium] } + riskFactors: + type: object + additionalProperties: true diff --git a/azure/workloads/insurance-app/dlp-policy.tf b/azure/workloads/insurance-app/dlp-policy.tf new file mode 100644 index 0000000..b9411c0 --- /dev/null +++ b/azure/workloads/insurance-app/dlp-policy.tf @@ -0,0 +1,87 @@ +# --------------------------------------------------------------------------- +# Data Loss Prevention (DLP) — connector governance. +# +# CAF / Power Platform guidance: classify the connector catalog into +# Business / Non-Business / Blocked, default unknown connectors to a safe group, +# block risky connectors, and restrict custom connectors. We ship two policies: +# * a tenant-wide BASELINE that blocks the worst connectors everywhere; +# * an ENVIRONMENT-SCOPED policy for the insurance environments that puts the +# widest set of certified connectors in Business and pins custom connectors +# to the APIM gateway host. +# Connector lists live in variables.tf -> adaptive: change the posture there. +# --------------------------------------------------------------------------- + +# --- tenant baseline: block the high-risk connectors everywhere ------------ +resource "powerplatform_data_loss_prevention_policy" "tenant_baseline" { + display_name = "Tenant baseline - block high-risk connectors" + default_connectors_classification = "General" # unknown/new connectors land in Non-Business by default at tenant scope + environment_type = "AllEnvironments" + environments = [] + + business_connectors = [] + non_business_connectors = [] + + blocked_connectors = [ + for id in var.blocked_connectors : { + id = id + default_action_rule_behavior = "" + action_rules = [] + endpoint_rules = [] + } + ] + + custom_connectors_patterns = [ + { order = 1, host_url_pattern = "*", data_group = "Blocked" } # no custom connectors by default at tenant scope + ] +} + +# --- insurance environments: the working policy ---------------------------- +resource "powerplatform_data_loss_prevention_policy" "insurance" { + display_name = "Insurance agent platform - connector governance" + default_connectors_classification = "Blocked" + environment_type = "OnlyEnvironments" + environments = [for e in powerplatform_environment.this : e.id] + + # Widest array of template connectivity: certified Microsoft + Azure + LOB + # connectors that the insurance apps/agents are allowed to use. + business_connectors = [ + for id in var.business_connectors : { + id = id + default_action_rule_behavior = "Allow" + action_rules = [] + endpoint_rules = [] + } + ] + + non_business_connectors = [ + for id in var.non_business_connectors : { + id = id + default_action_rule_behavior = "Allow" + action_rules = [] + endpoint_rules = [] + } + ] + + blocked_connectors = [ + for id in var.blocked_connectors : { + id = id + default_action_rule_behavior = "" + action_rules = [] + endpoint_rules = [] + } + ] + + # Custom connectors are only allowed when they target the APIM gateway host. + custom_connectors_patterns = concat( + [ + for i, pattern in var.custom_connector_allowed_url_patterns : { + order = i + 1 + host_url_pattern = pattern + data_group = "Business" + } + ], + [ + { order = length(var.custom_connector_allowed_url_patterns) + 1, host_url_pattern = "*", data_group = "Blocked" } + ] + ) +} diff --git a/azure/workloads/insurance-app/identity-agents.tf b/azure/workloads/insurance-app/identity-agents.tf new file mode 100644 index 0000000..8004da3 --- /dev/null +++ b/azure/workloads/insurance-app/identity-agents.tf @@ -0,0 +1,169 @@ +# --------------------------------------------------------------------------- +# Agent identities. +# +# CAF: agents are a new identity class. Each agent gets: +# * a user-assigned managed identity -> used by the Azure-hosted tool/back-end +# it calls (no secrets); +# * an Entra application + service principal -> the registration the Copilot +# Studio / AI Foundry agent uses; in tenants with Microsoft Entra Agent ID +# enabled this surfaces the agent as a first-class "Agent" identity. Tag it +# so it is discoverable in the agent inventory. +# * membership in the `ai-agents` security group -> Conditional Access target +# (block legacy auth, block interactive sign-in, pin to named locations). +# * narrowly-scoped RBAC -> least privilege. +# * (optional) workload-identity-federation subjects -> for CI/CD, never secrets. +# No client secrets are created anywhere in this file. +# --------------------------------------------------------------------------- + +# --- user-assigned managed identities (one per agent) ---------------------- +resource "azurerm_user_assigned_identity" "agent" { + for_each = var.agents + name = "id-agent-${each.key}" + location = azurerm_resource_group.identity.location + resource_group_name = azurerm_resource_group.identity.name + tags = merge(local.common_tags, { + agentName = each.key + agentPurpose = each.value.purpose + agentKind = "managed-identity" + }) +} + +# --- Entra app registrations (Entra Agent ID for the low-code agent) ------- +resource "azuread_application" "agent" { + for_each = var.agents + display_name = "agent-${each.key}-insurance" + notes = "Insurance agent platform - ${each.value.purpose}. Managed by Terraform. Reviewed quarterly via the ai-agents access review." + + # Surfaces in Entra as an agent identity where Entra Agent ID is enabled; + # service-principal-only sign-in (no interactive users). + sign_in_audience = "AzureADMyOrg" + fallback_public_client_enabled = false + + tags = ["ai-agent", "insurance", "workload:insurance-agent-platform", "agent:${each.key}"] + + feature_tags { + enterprise = true + hide = true + } +} + +resource "azuread_service_principal" "agent" { + for_each = var.agents + client_id = azuread_application.agent[each.key].client_id + app_role_assignment_required = true + tags = ["ai-agent", "insurance", "agent:${each.key}"] +} + +# IMPORTANT: no azuread_application_password / client secret. If a confidential +# credential is unavoidable for a partner system, store it in Key Vault and +# reference it via APIM, never embed it. + +# --- workload identity federation (CI/CD) — no secrets -------------------- +locals { + federated_subjects = merge([ + for agent_name, cfg in var.agents : { + for subj in cfg.federated_subjects : + "${agent_name}::${subj}" => { agent = agent_name, subject = subj } + } + ]...) +} + +resource "azuread_application_federated_identity_credential" "agent" { + for_each = local.federated_subjects + application_id = azuread_application.agent[each.value.agent].id + display_name = "fic-${replace(each.value.subject, ":", "-")}" + description = "Workload identity federation for ${each.value.agent}." + audiences = ["api://AzureADTokenExchange"] + issuer = "https://token.actions.githubusercontent.com" + subject = each.value.subject +} + +resource "azuread_application_federated_identity_credential" "cicd" { + count = var.cicd_github_repo == "" ? 0 : 1 + application_id = azuread_application.agent[keys(var.agents)[0]].id + display_name = "fic-cicd-insurance-prod" + description = "GitHub Actions OIDC for insurance-app solution deployment (no secrets)." + audiences = ["api://AzureADTokenExchange"] + issuer = "https://token.actions.githubusercontent.com" + subject = "repo:${var.cicd_github_repo}:environment:insurance-prod" +} + +# --- put every agent identity into the ai-agents group -------------------- +resource "azuread_group_member" "agent_mi" { + for_each = var.agents + group_object_id = var.ai_agents_group_object_id + member_object_id = azurerm_user_assigned_identity.agent[each.key].principal_id +} + +resource "azuread_group_member" "agent_sp" { + for_each = var.agents + group_object_id = var.ai_agents_group_object_id + member_object_id = azuread_service_principal.agent[each.key].object_id +} + +# --------------------------------------------------------------------------- +# Least-privilege RBAC for the agents' managed identities. +# Scoped to the specific resources only. +# --------------------------------------------------------------------------- + +resource "azurerm_role_assignment" "agent_openai" { + for_each = { for k, v in var.agents : k => v if v.needs_openai } + scope = azurerm_cognitive_account.openai.id + role_definition_name = "Cognitive Services OpenAI User" + principal_id = azurerm_user_assigned_identity.agent[each.key].principal_id +} + +resource "azurerm_role_assignment" "agent_search" { + for_each = { for k, v in var.agents : k => v if v.needs_search } + scope = azurerm_search_service.grounding.id + role_definition_name = "Search Index Data Reader" + principal_id = azurerm_user_assigned_identity.agent[each.key].principal_id +} + +resource "azurerm_role_assignment" "agent_content_safety" { + for_each = { for k, v in var.agents : k => v if v.needs_content_safety } + scope = azurerm_cognitive_account.content_safety.id + role_definition_name = "Cognitive Services User" + principal_id = azurerm_user_assigned_identity.agent[each.key].principal_id +} + +resource "azurerm_role_assignment" "agent_keyvault" { + for_each = { for k, v in var.agents : k => v if v.keyvault_secret_reader } + scope = azurerm_key_vault.workload.id + role_definition_name = "Key Vault Secrets User" + principal_id = azurerm_user_assigned_identity.agent[each.key].principal_id +} + +# Agents may write their own telemetry but not read anyone else's. +resource "azurerm_role_assignment" "agent_appinsights" { + for_each = var.agents + scope = azurerm_application_insights.agent[each.key].id + role_definition_name = "Monitoring Metrics Publisher" + principal_id = azurerm_user_assigned_identity.agent[each.key].principal_id +} + +# --------------------------------------------------------------------------- +# Conditional Access for the agent identities. +# +# NOTE: a CA policy targeting service principals / workload identities ("CA-AI- +# Agents-Restrict") should be created in Entra ID (Conditional Access for +# workload identities is licensed separately). It targets the `ai-agents` group +# and: blocks legacy authentication, blocks interactive sign-in, and restricts +# sign-in to the named locations covering the spoke / APIM egress IPs. Manage it +# via your identity-governance pipeline; it is documented in +# ../../docs/caf-ai-agent-governance-mapping.md. +# --------------------------------------------------------------------------- + +output "agent_identities" { + description = "Per-agent identity details for wiring into Copilot Studio / AI Foundry." + value = { + for k in keys(var.agents) : k => { + managed_identity_client_id = azurerm_user_assigned_identity.agent[k].client_id + managed_identity_principal_id = azurerm_user_assigned_identity.agent[k].principal_id + entra_app_client_id = azuread_application.agent[k].client_id + entra_sp_object_id = azuread_service_principal.agent[k].object_id + app_insights_connection_string = azurerm_application_insights.agent[k].connection_string + } + } + sensitive = true +} diff --git a/azure/workloads/insurance-app/key-vault.tf b/azure/workloads/insurance-app/key-vault.tf new file mode 100644 index 0000000..121cf9b --- /dev/null +++ b/azure/workloads/insurance-app/key-vault.tf @@ -0,0 +1,54 @@ +# --------------------------------------------------------------------------- +# Workload Key Vault — RBAC mode, purge protection, NO public access, private +# endpoint. Holds only the unavoidable secrets (partner API keys consumed via +# APIM). Agents never read raw secrets except via the scoped "Key Vault Secrets +# User" role granted in identity-agents.tf. +# --------------------------------------------------------------------------- + +resource "azurerm_key_vault" "workload" { + name = "kv-insurance-app-${substr(sha1(var.insurance_subscription_id), 0, 6)}" + location = azurerm_resource_group.workload.location + resource_group_name = azurerm_resource_group.workload.name + tenant_id = data.azurerm_client_config.current.tenant_id + sku_name = "standard" + + enable_rbac_authorization = true + purge_protection_enabled = var.key_vault_purge_protection + soft_delete_retention_days = var.key_vault_purge_protection ? 90 : 7 + public_network_access_enabled = !var.enable_private_endpoints # required by deny-ai-public-network-access policy + + network_acls { + default_action = var.enable_private_endpoints ? "Deny" : "Allow" + bypass = "AzureServices" + } + + tags = local.common_tags +} + +resource "azurerm_private_endpoint" "key_vault" { + count = var.enable_private_endpoints ? 1 : 0 + name = "pe-kv-insurance-app" + location = azurerm_resource_group.workload.location + resource_group_name = azurerm_resource_group.workload.name + subnet_id = var.spoke_subnet_ids["private_endpoints"] + tags = local.common_tags + + private_service_connection { + name = "psc-kv-insurance-app" + private_connection_resource_id = azurerm_key_vault.workload.id + subresource_names = ["vault"] + is_manual_connection = false + } +} + +# Diagnostics -> central workspace (the policy also enforces this; declaring it +# here avoids a remediation lag). +resource "azurerm_monitor_diagnostic_setting" "key_vault" { + name = "to-central-law" + target_resource_id = azurerm_key_vault.workload.id + log_analytics_workspace_id = var.central_log_analytics_workspace_id + + enabled_log { category = "AuditEvent" } + enabled_log { category = "AzurePolicyEvaluationDetails" } + metric { category = "AllMetrics" } +} diff --git a/azure/workloads/insurance-app/main.tf b/azure/workloads/insurance-app/main.tf new file mode 100644 index 0000000..0dd44c5 --- /dev/null +++ b/azure/workloads/insurance-app/main.tf @@ -0,0 +1,46 @@ +# --------------------------------------------------------------------------- +# Common naming, tags, resource groups. +# --------------------------------------------------------------------------- + +locals { + name_prefix = "insurance-app" + + # Tags required by the ai-agent-governance policy initiative (require-agent-resource-tags). + governance_tags = { + agentOwner = var.agent_owner + agentPurpose = "insurance-agent-platform" + dataClassification = var.data_classification + expiresOn = var.expires_on + } + + common_tags = merge( + { + workload = "insurance-agent-platform" + iac = "terraform" + landingZone = "application-platform" + managedBy = "insurance-workload-team" + }, + local.governance_tags, + var.extra_tags + ) +} + +data "azurerm_client_config" "current" {} + +resource "azurerm_resource_group" "workload" { + name = "rg-${local.name_prefix}-workload" + location = var.location + tags = local.common_tags +} + +resource "azurerm_resource_group" "ai" { + name = "rg-${local.name_prefix}-ai" + location = var.location + tags = local.common_tags +} + +resource "azurerm_resource_group" "identity" { + name = "rg-${local.name_prefix}-identity" + location = var.location + tags = local.common_tags +} diff --git a/azure/workloads/insurance-app/networking.tf b/azure/workloads/insurance-app/networking.tf new file mode 100644 index 0000000..5fc5382 --- /dev/null +++ b/azure/workloads/insurance-app/networking.tf @@ -0,0 +1,114 @@ +# --------------------------------------------------------------------------- +# Private endpoints for the AI plane (into snet-privateendpoints). +# The Private DNS zones are platform-owned and were linked to the spoke VNet by +# the application-platform module, so name resolution Just Works. +# +# All of these are created only when var.enable_private_endpoints = true (the +# secure default). The demo profile sets it false and the AI/PaaS resources fall +# back to public network access — only acceptable for non-sensitive demo data +# with the governance policies in Audit mode. See azure/COSTS.md. +# --------------------------------------------------------------------------- + +locals { + pe_count = var.enable_private_endpoints ? 1 : 0 + pe_subnet_id = lookup(var.spoke_subnet_ids, "private_endpoints", null) +} + +resource "azurerm_private_endpoint" "openai" { + count = local.pe_count + name = "pe-aoai-insurance-app" + location = azurerm_resource_group.ai.location + resource_group_name = azurerm_resource_group.ai.name + subnet_id = local.pe_subnet_id + tags = local.common_tags + + private_service_connection { + name = "psc-aoai" + private_connection_resource_id = azurerm_cognitive_account.openai.id + subresource_names = ["account"] + is_manual_connection = false + } +} + +resource "azurerm_private_endpoint" "content_safety" { + count = local.pe_count + name = "pe-acs-insurance-app" + location = azurerm_resource_group.ai.location + resource_group_name = azurerm_resource_group.ai.name + subnet_id = local.pe_subnet_id + tags = local.common_tags + + private_service_connection { + name = "psc-acs" + private_connection_resource_id = azurerm_cognitive_account.content_safety.id + subresource_names = ["account"] + is_manual_connection = false + } +} + +resource "azurerm_private_endpoint" "search" { + count = local.pe_count + name = "pe-srch-insurance-app" + location = azurerm_resource_group.ai.location + resource_group_name = azurerm_resource_group.ai.name + subnet_id = local.pe_subnet_id + tags = local.common_tags + + private_service_connection { + name = "psc-srch" + private_connection_resource_id = azurerm_search_service.grounding.id + subresource_names = ["searchService"] + is_manual_connection = false + } +} + +resource "azurerm_private_endpoint" "ai_storage_blob" { + count = local.pe_count + name = "pe-stinsai-blob" + location = azurerm_resource_group.ai.location + resource_group_name = azurerm_resource_group.ai.name + subnet_id = local.pe_subnet_id + tags = local.common_tags + + private_service_connection { + name = "psc-stinsai-blob" + private_connection_resource_id = azurerm_storage_account.ai.id + subresource_names = ["blob"] + is_manual_connection = false + } +} + +resource "azurerm_private_endpoint" "ai_foundry_hub" { + count = local.pe_count + name = "pe-aif-insurance-app" + location = azurerm_resource_group.ai.location + resource_group_name = azurerm_resource_group.ai.name + subnet_id = local.pe_subnet_id + tags = local.common_tags + + private_service_connection { + name = "psc-aif" + private_connection_resource_id = azurerm_ai_foundry.hub.id + subresource_names = ["amlworkspace"] + is_manual_connection = false + } +} + +# APIM private endpoint (the gateway itself is VNet-injected internal mode when +# enable_vnet_injection = true — see connectors-apim.tf; this PE is for the +# developer/management plane if needed). +resource "azurerm_private_endpoint" "apim" { + count = local.pe_count + name = "pe-apim-insurance-app" + location = azurerm_resource_group.workload.location + resource_group_name = azurerm_resource_group.workload.name + subnet_id = local.pe_subnet_id + tags = local.common_tags + + private_service_connection { + name = "psc-apim" + private_connection_resource_id = azurerm_api_management.connectors.id + subresource_names = ["Gateway"] + is_manual_connection = false + } +} diff --git a/azure/workloads/insurance-app/observability.tf b/azure/workloads/insurance-app/observability.tf new file mode 100644 index 0000000..99d76a5 --- /dev/null +++ b/azure/workloads/insurance-app/observability.tf @@ -0,0 +1,38 @@ +# --------------------------------------------------------------------------- +# Traceability & observability. +# * One workspace-based Application Insights component per agent, bound to the +# central Log Analytics workspace -> every agent's telemetry is correlatable +# in one place by the `agentName` dimension and traceable to its Entra Agent +# ID / managed identity. +# * Diagnostic settings for the AI plane / APIM / Key Vault are declared next to +# each resource; the policy initiative also enforces them. +# * Microsoft Purview AI audit / DSPM-for-AI hook (optional input). +# --------------------------------------------------------------------------- + +resource "azurerm_application_insights" "agent" { + for_each = var.agents + name = "appi-agent-${each.key}" + location = azurerm_resource_group.workload.location + resource_group_name = azurerm_resource_group.workload.name + application_type = "web" + workspace_id = var.central_log_analytics_workspace_id + tags = merge(local.common_tags, { + agentName = each.key + agentPurpose = each.value.purpose + }) +} + +# Microsoft Purview hooks (configured in the Purview / compliance portal, not via +# ARM): enable Audit, turn on DSPM for AI, register the AI Foundry project and the +# agents' knowledge sources as data sources, and apply sensitivity labels so that +# prompts/responses, data access, and agent actions are captured and label-aware. +# var.purview_account_id is carried through to outputs so the ALM pipeline can +# target the right account. + +# A workbook scaffold that pivots agent telemetry by agentName + identity is +# expected to live alongside the Sentinel content owned by the security team; +# referenced in ../../docs/caf-ai-agent-governance-mapping.md. + +output "agent_app_insights" { + value = { for k in keys(var.agents) : k => azurerm_application_insights.agent[k].id } +} diff --git a/azure/workloads/insurance-app/outputs.tf b/azure/workloads/insurance-app/outputs.tf new file mode 100644 index 0000000..745625f --- /dev/null +++ b/azure/workloads/insurance-app/outputs.tf @@ -0,0 +1,33 @@ +output "workload_summary" { + description = "Key identifiers for the insurance agent platform workload." + value = { + resource_groups = { + workload = azurerm_resource_group.workload.name + ai = azurerm_resource_group.ai.name + identity = azurerm_resource_group.identity.name + } + openai_endpoint = azurerm_cognitive_account.openai.endpoint + openai_account_id = azurerm_cognitive_account.openai.id + search_service_id = azurerm_search_service.grounding.id + content_safety_endpoint = azurerm_cognitive_account.content_safety.endpoint + ai_foundry_hub_id = azurerm_ai_foundry.hub.id + ai_foundry_project_id = azurerm_ai_foundry_project.insurance.id + key_vault_uri = azurerm_key_vault.workload.vault_uri + apim_gateway_url = azurerm_api_management.connectors.gateway_url + power_platform_environments = { for k, v in powerplatform_environment.this : k => v.id } + model_deployments = [for d in var.approved_model_deployments : d.name] + purview_account_id = var.purview_account_id + } +} + +output "governance_notes" { + description = "Reminders for the steps that finish outside Terraform." + value = [ + "Create the 'CA-AI-Agents-Restrict' Conditional Access policy in Entra targeting the ai-agents group (block legacy auth, block interactive sign-in, restrict to named locations).", + "Run an Entra access review over the ai-agents group quarterly; disable then delete agents with no owner or past their expiresOn tag.", + "Associate the Power Platform VNet enterprise policy with each insurance environment from the ALM pipeline.", + "Export the APIM APIs to Power Platform as custom connectors from the ALM pipeline.", + "Enable Microsoft Purview DSPM for AI and apply sensitivity labels to the agents' knowledge sources.", + "Confirm the ai-agent-governance policy initiative is assigned at the Application Platform management group and reports compliant." + ] +} diff --git a/azure/workloads/insurance-app/power-platform.tf b/azure/workloads/insurance-app/power-platform.tf new file mode 100644 index 0000000..10e0baf --- /dev/null +++ b/azure/workloads/insurance-app/power-platform.tf @@ -0,0 +1,102 @@ +# --------------------------------------------------------------------------- +# Low-code platform: Power Platform environments + Managed Environment +# governance + VNet injection. This is the "start low-code" surface where +# makers build the insurance apps and Copilot Studio agents. +# +# CAF: use an environment strategy (dev/test/prod), enforce Managed Environment +# controls (sharing limits, Solution Checker, usage insights, env IP firewall), +# and inject the environments into the platform VNet. +# --------------------------------------------------------------------------- + +resource "powerplatform_environment" "this" { + for_each = var.power_platform_environments + display_name = each.key + location = var.power_platform_location + environment_type = each.value.environment_type + description = each.value.description + + dataverse = { + language_code = 1033 + currency_code = "USD" + security_group_id = "" # set to the environment's Entra security group object id to restrict membership + } +} + +# Turn each environment into a Managed Environment with governance controls. +resource "powerplatform_managed_environment" "this" { + for_each = powerplatform_environment.this + + environment_id = each.value.id + is_usage_insights_disabled = false # weekly usage insights ON + is_group_sharing_disabled = true # block sharing with security groups + limit_sharing_mode = "ExcludeSharingToSecurityGroups" + max_limit_user_sharing = var.maker_sharing_limit # cap per-app maker sharing + solution_checker_mode = "block" # block publish on Solution Checker errors + suppress_validation_emails = false + maker_onboarding_markdown = "Welcome to the insurance agent platform. All connectors are governed by DLP; custom connectors must go through the APIM gateway. Agents must use their assigned Entra Agent ID." + maker_onboarding_url = "https://aka.ms/insurance-agent-platform-onboarding" +} + +# Environment-level settings: IP firewall pinned to the spoke / APIM egress +# ranges, audit logging on, and bind audit settings. +resource "powerplatform_environment_settings" "this" { + for_each = powerplatform_environment.this + environment_id = each.value.id + + audit_and_logs = { + plugin_trace_log_setting = "All" + audit_settings = { + is_audit_enabled = true + is_user_access_audit_enabled = true + is_read_audit_enabled = true + } + } + + product = { + behavior_settings = { + show_dashboard_cards_in_expanded_state = true + } + features = { + power_apps_component_framework_for_canvas_apps = true + } + } +} + +# --------------------------------------------------------------------------- +# VNet injection: enterprise policy bound to the delegated spoke subnet, so +# environment traffic egresses through the platform network (and the hub +# firewall) instead of the public internet. +# --------------------------------------------------------------------------- +resource "azapi_resource" "powerplatform_vnet_enterprise_policy" { + count = var.enable_vnet_injection ? 1 : 0 + type = "Microsoft.PowerPlatform/enterprisePolicies@2020-10-30" + name = "ep-insurance-app-vnet" + location = var.location + parent_id = azurerm_resource_group.workload.id + body = { + kind = "NetworkInjection" + properties = { + networkInjection = { + virtualNetworks = [ + { + id = join("/", slice(split("/", var.spoke_subnet_ids["power_platform"]), 0, 9)) # the VNet id + subnet = { + name = element(split("/", var.spoke_subnet_ids["power_platform"]), length(split("/", var.spoke_subnet_ids["power_platform"])) - 1) + } + } + ] + } + } + } + tags = local.common_tags +} + +# NOTE: associating the enterprise policy with each environment is done via the +# Power Platform admin API ("New-PowerAppEnvironmentSubnetInjection" / +# Set-AdminPowerAppEnvironment) or the provider's environment enterprise-policy +# linkage once the environment exists. Run that step from the ALM pipeline after +# `terraform apply`. The policy object itself is created above so it is in IaC. + +output "power_platform_environment_ids" { + value = { for k, v in powerplatform_environment.this : k => v.id } +} diff --git a/azure/workloads/insurance-app/providers.tf b/azure/workloads/insurance-app/providers.tf new file mode 100644 index 0000000..65da375 --- /dev/null +++ b/azure/workloads/insurance-app/providers.tf @@ -0,0 +1,49 @@ +terraform { + required_version = ">= 1.6.0" + + required_providers { + azurerm = { + source = "hashicorp/azurerm" + version = "~> 4.0" + } + azuread = { + source = "hashicorp/azuread" + version = "~> 3.0" + } + azapi = { + source = "azure/azapi" + version = "~> 2.0" + } + powerplatform = { + source = "microsoft/power-platform" + version = "~> 3.0" + } + random = { + source = "hashicorp/random" + version = "~> 3.6" + } + } + + # backend "azurerm" {} +} + +provider "azurerm" { + features { + key_vault { + purge_soft_delete_on_destroy = false + } + } + subscription_id = var.insurance_subscription_id +} + +provider "azuread" {} +provider "azapi" {} + +# Power Platform provider — authenticates to the same tenant; needs the +# Power Platform admin scopes (use a service principal that is a Power Platform +# administrator). See https://registry.terraform.io/providers/microsoft/power-platform +provider "powerplatform" { + use_cli = true +} + +provider "random" {} diff --git a/azure/workloads/insurance-app/terraform.tfvars.example b/azure/workloads/insurance-app/terraform.tfvars.example new file mode 100644 index 0000000..ad96002 --- /dev/null +++ b/azure/workloads/insurance-app/terraform.tfvars.example @@ -0,0 +1,34 @@ +# Copy to terraform.tfvars and fill in. Do NOT commit the real file. +# Most of these come from the application-platform module's `landing_zone_summary` output. + +insurance_subscription_id = "11111111-1111-1111-1111-111111111111" +location = "eastus2" + +spoke_subnet_ids = { + private_endpoints = "/subscriptions/.../resourceGroups/rg-insurance-app-network/providers/Microsoft.Network/virtualNetworks/vnet-insurance-app/subnets/snet-privateendpoints" + power_platform = "/subscriptions/.../resourceGroups/rg-insurance-app-network/providers/Microsoft.Network/virtualNetworks/vnet-insurance-app/subnets/snet-powerplatform" + apim = "/subscriptions/.../resourceGroups/rg-insurance-app-network/providers/Microsoft.Network/virtualNetworks/vnet-insurance-app/subnets/snet-apim" + workload_compute = "/subscriptions/.../resourceGroups/rg-insurance-app-network/providers/Microsoft.Network/virtualNetworks/vnet-insurance-app/subnets/snet-workload-compute" +} + +central_log_analytics_workspace_id = "/subscriptions/.../resourceGroups/rg-application-platform-monitoring/providers/Microsoft.OperationalInsights/workspaces/law-application-platform" +ai_agents_group_object_id = "33333333-3333-3333-3333-333333333333" +purview_account_id = "" # optional +cicd_github_repo = "realjkg/adaptcloud" + +agent_owner = "insurance-engineering@adapt.example" +data_classification = "Confidential" +expires_on = "2026-12-31" + +power_platform_location = "unitedstates" +maker_sharing_limit = 20 + +apim_publisher_name = "Adapt Insurance Platform" +apim_publisher_email = "platform@adapt.example" + +# Override the connector classification lists / approved models here if needed. +# approved_model_deployments = [ +# { name = "gpt-4o", model = "gpt-4o", version = "2024-11-20", capacity = 30 }, +# { name = "gpt-4o-mini", model = "gpt-4o-mini", version = "2024-07-18", capacity = 30 }, +# { name = "text-embedding-3-large", model = "text-embedding-3-large", version = "1", capacity = 30 }, +# ] diff --git a/azure/workloads/insurance-app/variables.tf b/azure/workloads/insurance-app/variables.tf new file mode 100644 index 0000000..8efbf38 --- /dev/null +++ b/azure/workloads/insurance-app/variables.tf @@ -0,0 +1,284 @@ +# --------------------------------------------------------------------------- +# Wiring from the application-platform landing zone +# --------------------------------------------------------------------------- +variable "insurance_subscription_id" { + description = "Subscription ID of the insurance-app Application landing zone." + type = string +} + +variable "location" { + description = "Primary region (must be in the policy allow-list, e.g. eastus2)." + type = string + default = "eastus2" +} + +variable "spoke_subnet_ids" { + description = "Subnet IDs from the application-platform module: keys private_endpoints, power_platform, apim, workload_compute. May be left empty when enable_private_endpoints and enable_vnet_injection are both false (demo profile)." + type = map(string) + default = {} +} + +variable "central_log_analytics_workspace_id" { + description = "Central Log Analytics workspace resource ID (Application Insights components are bound to it)." + type = string +} + +variable "ai_agents_group_object_id" { + description = "Object ID of the 'ai-agents' Entra security group; all agent identities are added to it (Conditional Access target)." + type = string +} + +variable "purview_account_id" { + description = "Resource ID of the tenant Microsoft Purview account used for the AI audit / DSPM-for-AI connector. Leave empty to skip." + type = string + default = "" +} + +variable "cicd_github_repo" { + description = "GitHub org/repo allowed to assume the CI/CD identity via workload identity federation (subject = repo:/:environment:insurance-prod). Leave empty to skip." + type = string + default = "" +} + +# --------------------------------------------------------------------------- +# Governance tags required by the ai-agent-governance policy initiative +# --------------------------------------------------------------------------- +variable "agent_owner" { + description = "Owning team / DL for the insurance agent platform (agentOwner tag)." + type = string + default = "insurance-engineering@adapt.example" +} + +variable "data_classification" { + description = "Data classification of the workload (dataClassification tag): Public | Internal | Confidential | HighlyConfidential." + type = string + default = "Confidential" +} + +variable "expires_on" { + description = "Review/expiry date for the workload's agents (expiresOn tag), YYYY-MM-DD." + type = string + default = "2026-12-31" +} + +variable "extra_tags" { + description = "Additional tags merged onto all resources." + type = map(string) + default = {} +} + +# --------------------------------------------------------------------------- +# Agents +# --------------------------------------------------------------------------- +variable "agents" { + description = "The agents to provision identities for. Each gets a user-assigned managed identity, an Entra app (for the Copilot Studio Entra Agent ID), membership in the ai-agents group, and scoped RBAC." + type = map(object({ + purpose = string + needs_openai = optional(bool, true) + needs_search = optional(bool, false) + needs_content_safety = optional(bool, true) + keyvault_secret_reader = optional(bool, true) + federated_subjects = optional(list(string), []) + })) + default = { + "policy-intake-agent" = { + purpose = "Quote intake and policy document understanding" + needs_openai = true + needs_search = true + needs_content_safety = true + } + "claims-triage-agent" = { + purpose = "First-notice-of-loss triage and routing" + needs_openai = true + needs_search = true + needs_content_safety = true + } + "underwriting-copilot" = { + purpose = "Underwriter assistant: risk summarisation and pricing guidance" + needs_openai = true + needs_search = true + needs_content_safety = true + } + "fraud-signal-agent" = { + purpose = "Claims fraud-signal detection and case annotation" + needs_openai = true + needs_search = false + needs_content_safety = true + } + } +} + +# --------------------------------------------------------------------------- +# Cost / network posture toggles (see azure/COSTS.md and azure/profiles/) +# --------------------------------------------------------------------------- +variable "enable_private_endpoints" { + description = "Create private endpoints for the AI plane / Key Vault / APIM and disable their public network access (the secure default). Set false ONLY for a low-cost demo — resources become publicly reachable, so run the governance policies in Audit mode and use non-sensitive data." + type = bool + default = true +} + +variable "enable_vnet_injection" { + description = "Inject the Power Platform environments into the spoke VNet (enterprise policy) and run API Management in Internal VNet mode. Set false for a demo without a spoke network." + type = bool + default = true +} + +variable "key_vault_purge_protection" { + description = "Enable Key Vault purge protection (recommended for prod). When true, a destroyed vault stays soft-deleted for 90 days and CANNOT be force-purged — the name is unusable for that period. Set false for a demo you intend to tear down and rebuild." + type = bool + default = true +} + +variable "apim_sku_name" { + description = "API Management SKU. Developer_1 = non-prod, no SLA (~$50/mo); StandardV2_1 = ~$700/mo, supports VNet; Premium_1 (or more units) = ~$2,800/mo per unit, for prod multi-zone internal VNet; Consumption_0 = serverless pay-per-call, cheapest demo (but Consumption does NOT support Internal VNet, so pair it with enable_vnet_injection=false)." + type = string + default = "Developer_1" +} + +# --------------------------------------------------------------------------- +# AI Foundry / Azure OpenAI +# --------------------------------------------------------------------------- +variable "approved_model_deployments" { + description = "Model deployments to create on the Azure OpenAI account. Must be on the policy 'allowedModelNames' list." + type = list(object({ + name = string + model = string + version = string + capacity = number + sku = optional(string, "Standard") + })) + default = [ + { name = "gpt-4o", model = "gpt-4o", version = "2024-11-20", capacity = 30 }, + { name = "gpt-4o-mini", model = "gpt-4o-mini", version = "2024-07-18", capacity = 30 }, + { name = "text-embedding-3-large", model = "text-embedding-3-large", version = "1", capacity = 30 } + ] +} + +variable "ai_search_sku" { + description = "SKU for the grounding Azure AI Search service. 'free' = $0 (no SLA, 3 indexes, dev only); 'basic' = ~$75/mo; 'standard' (S1) = ~$245/mo per search unit; higher tiers cost more. Billed while the service exists, idle or not." + type = string + default = "standard" +} + +variable "ai_search_replica_count" { + description = "Azure AI Search replica count. Search units billed = replicas x partitions. Use 1 for demo/dev, 2+ for query SLA, 3+ for indexing+query SLA. Ignored on the 'free' SKU." + type = number + default = 2 +} + +variable "ai_search_partition_count" { + description = "Azure AI Search partition count (storage/scale). Ignored on the 'free' SKU." + type = number + default = 1 +} + +# --------------------------------------------------------------------------- +# Power Platform environments +# --------------------------------------------------------------------------- +variable "power_platform_location" { + description = "Power Platform geo for the environments (e.g. unitedstates, europe)." + type = string + default = "unitedstates" +} + +variable "power_platform_environments" { + description = "Dataverse environments to create as Managed Environments." + type = map(object({ + environment_type = string # Sandbox | Production + description = string + })) + default = { + "insurance-dev" = { environment_type = "Sandbox", description = "Insurance agent platform - development" } + "insurance-test" = { environment_type = "Sandbox", description = "Insurance agent platform - test/UAT" } + "insurance-prod" = { environment_type = "Production", description = "Insurance agent platform - production" } + } +} + +variable "maker_sharing_limit" { + description = "Managed Environment limit on the number of users a maker can share an app with (0 = sharing disabled, -1 = no limit)." + type = number + default = 20 +} + +# --------------------------------------------------------------------------- +# DLP connector classification (adaptive — change posture here, not in code) +# --------------------------------------------------------------------------- +variable "business_connectors" { + description = "Connector IDs placed in the 'Business' group: the widest set of certified/Microsoft-published connectors relevant to insurance + Azure + M365." + type = list(string) + default = [ + "/providers/Microsoft.PowerApps/apis/shared_commondataserviceforapps", # Microsoft Dataverse + "/providers/Microsoft.PowerApps/apis/shared_sharepointonline", + "/providers/Microsoft.PowerApps/apis/shared_office365", # Outlook + "/providers/Microsoft.PowerApps/apis/shared_office365users", + "/providers/Microsoft.PowerApps/apis/shared_teams", + "/providers/Microsoft.PowerApps/apis/shared_onedriveforbusiness", + "/providers/Microsoft.PowerApps/apis/shared_sql", + "/providers/Microsoft.PowerApps/apis/shared_azureblob", + "/providers/Microsoft.PowerApps/apis/shared_azurequeues", + "/providers/Microsoft.PowerApps/apis/shared_servicebus", + "/providers/Microsoft.PowerApps/apis/shared_eventhubs", + "/providers/Microsoft.PowerApps/apis/shared_azureopenai", + "/providers/Microsoft.PowerApps/apis/shared_cognitiveservicestextanalytics", + "/providers/Microsoft.PowerApps/apis/shared_cognitiveservicescomputervision", + "/providers/Microsoft.PowerApps/apis/shared_formrecognizer", # Document Intelligence + "/providers/Microsoft.PowerApps/apis/shared_aibuilder", + "/providers/Microsoft.PowerApps/apis/shared_dynamicssmb", + "/providers/Microsoft.PowerApps/apis/shared_dynamicsnav", + "/providers/Microsoft.PowerApps/apis/shared_dynamics365businesscentral", + "/providers/Microsoft.PowerApps/apis/shared_salesforce", + "/providers/Microsoft.PowerApps/apis/shared_docusign", + "/providers/Microsoft.PowerApps/apis/shared_adobesign", + "/providers/Microsoft.PowerApps/apis/shared_approvals", + "/providers/Microsoft.PowerApps/apis/shared_flowapproval", + "/providers/Microsoft.PowerApps/apis/shared_excelonlinebusiness", + "/providers/Microsoft.PowerApps/apis/shared_word", # Word Online (Business) + "/providers/Microsoft.PowerApps/apis/shared_powerbi", + "/providers/Microsoft.PowerApps/apis/shared_azureautomation" + ] +} + +variable "non_business_connectors" { + description = "Connector IDs placed in the 'Non-Business' (general/personal-productivity) group." + type = list(string) + default = [ + "/providers/Microsoft.PowerApps/apis/shared_rss", + "/providers/Microsoft.PowerApps/apis/shared_msnweather", + "/providers/Microsoft.PowerApps/apis/shared_bingmaps" + ] +} + +variable "blocked_connectors" { + description = "Connector IDs explicitly blocked everywhere (social, consumer storage, unsanctioned AI, 'send to anyone')." + type = list(string) + default = [ + "/providers/Microsoft.PowerApps/apis/shared_twitter", + "/providers/Microsoft.PowerApps/apis/shared_facebook", + "/providers/Microsoft.PowerApps/apis/shared_instagram", + "/providers/Microsoft.PowerApps/apis/shared_youtube", + "/providers/Microsoft.PowerApps/apis/shared_dropbox", + "/providers/Microsoft.PowerApps/apis/shared_box", + "/providers/Microsoft.PowerApps/apis/shared_googledrive", + "/providers/Microsoft.PowerApps/apis/shared_gmail", + "/providers/Microsoft.PowerApps/apis/shared_onedrive", # consumer OneDrive + "/providers/Microsoft.PowerApps/apis/shared_sendmail", # SMTP "send as anyone" + "/providers/Microsoft.PowerApps/apis/shared_webcontents", # arbitrary HTTP with auth + "/providers/Microsoft.PowerApps/apis/shared_openai" # non-Azure OpenAI + ] +} + +variable "custom_connector_allowed_url_patterns" { + description = "URL patterns custom connectors are allowed to target (the APIM gateway host). Everything else is blocked." + type = list(string) + default = ["https://apim-insurance-app.azure-api.net/*"] +} + +variable "apim_publisher_name" { + type = string + default = "Adapt Insurance Platform" +} + +variable "apim_publisher_email" { + type = string + default = "platform@adapt.example" +} diff --git a/gke/README.md b/gke/README.md index 83101af..6cbb945 100644 --- a/gke/README.md +++ b/gke/README.md @@ -1,4 +1,4 @@ -Aptumcloud.io - GKE Terraform deployment +Adaptcloud.io - GKE Terraform deployment Initial MVP repository and GKE setup this repo contains secure hardening and will contain chatGPT webhooks as well as GKE hooks for the community to use diff --git a/gke/main.tf b/gke/main.tf index 62fd248..941d651 100644 --- a/gke/main.tf +++ b/gke/main.tf @@ -16,7 +16,7 @@ module "gke_cluster" { source = "terraform-google-modules/kubernetes-engine/google//modules/hardened_cluster" project_id = "mystical-button-380517" - name = "aptumcloud-k8s" + name = "adaptcloud-k8s" region = "us-central1" zones = ["us-central1-a", "us-central1-b", "us-central1-c"] node_count = 3 @@ -43,7 +43,7 @@ module "gke_cluster" { private_cluster_config = { enable_private_endpoint = true master_ipv4_cidr_block = "172.16.0.0/28" - private_endpoint_dns_zone = "aptumcloud.com" + private_endpoint_dns_zone = "adaptcloud.com" } # Configure the GKE cluster to use workload identity and IAM roles for service accounts. @@ -104,12 +104,18 @@ module "gke_cluster" { - # Create a Kubernetes namespace for the logging and monitoring -resource "kubernetes_namespace" "logging - - - - #Begin Cluster Bindings +# Create a Kubernetes namespace for logging and monitoring +resource "kubernetes_namespace" "logging" { + metadata { + name = "logging-monitoring" + labels = { + managed-by = "terraform" + purpose = "logging-monitoring" + } + } +} + +#Begin Cluster Bindings module "gke_monitoring" { source = "terraform-google-modules/kubernetes-engine/google//modules/prometheus" @@ -128,7 +134,7 @@ resource "kubernetes_namespace" "logging } resource "google_organization_iam_binding" "gke_cluster_binding" { - org_id = "APTUMCLOUD_DEV" + org_id = "ADAPTCLOUD_DEV" role = "roles/container.admin" members = [ @@ -138,7 +144,7 @@ resource "google_organization_iam_binding" "gke_cluster_binding" { resource "google_organization_iam_binding" "gke_cluster_monitoring_binding" { - org_id = "APTUMCLOUD_DEV" + org_id = "ADAPTCLOUD_DEV" for_each = toset([ "roles/logging.privateLogViewer", @@ -191,7 +197,7 @@ resource "google_kms_crypto_key" "gcr_crypto_key" { } resource "google_storage_bucket" "gcr_bucket" { - name = "aptumcloud-k8s-docker-registry" + name = "adaptcloud-k8s-docker-registry" location = "us-central1" storage_class = "STANDARD" @@ -208,7 +214,7 @@ resource "google_storage_bucket" "gcr_bucket" { resource "google_service_account" "gcr_service_account" { - account_id = "aptumcloud-k8s-docker-registry" + account_id = "adaptcloud-k8s-docker-registry" display_name = "Docker Registry Service Account" } @@ -245,19 +251,19 @@ resource "google_service_account_key" "gcr_service_account_key" { #encryption blocks and KMS hardening here resource "google_kms_key_ring" "kms_keyring" { - name = "aptum-k8s-keyring" + name = "adapt-k8s-keyring" location = "us-central1" } resource "google_kms_crypto_key" "kms_crypto_key" { - name = "aptum-k8s-crypto-key" + name = "adapt-k8s-crypto-key" key_ring = google_kms_key_ring.kms_keyring.self_link rotation_period = "100000s" } resource "google_storage_bucket" "encrypted_bucket" { - name = "aptum-k8s-${random_id.bucket_suffix.hex}" + name = "adapt-k8s-${random_id.bucket_suffix.hex}" location = "us-central1" encryption { @@ -281,7 +287,7 @@ resource "random_id" "bucket_suffix" { # Ensure bucket hardening with the keyring config below resource "google_storage_bucket" "encrypted_bucket" { - name = "aptum-k8s-${random_id.bucket_suffix.hex}" + name = "adapt-k8s-${random_id.bucket_suffix.hex}" location = "us-central1" encryption { diff --git a/gke/terraform.tfvars b/gke/terraform.tfvars index a5a6345..f24a15c 100644 --- a/gke/terraform.tfvars +++ b/gke/terraform.tfvars @@ -29,7 +29,7 @@ variable "saml_idp_metadata_url" { variable "kms_keyring_name" { description = "The name of the KMS keyring" type = string - default = "aptum-k8s-keyring" + default = "adapt-k8s-keyring" } variable "kms_keyring_location" { @@ -41,7 +41,7 @@ variable "kms_keyring_location" { variable "kms_crypto_key_name" { description = "The name of the KMS crypto key" type = string - default = "aptum-k8s-crypto-key" + default = "adapt-k8s-crypto-key" } variable "kms_crypto_key_rotation_period" {