Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions modules/aws/aws-infra/.terraform-docs.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
formatter: "markdown table"

output:
file: "README.md"
mode: inject
template: |-
<!-- BEGIN_TF_DOCS -->
{{ .Content }}
<!-- END_TF_DOCS -->

sections:
show:
- inputs
- outputs

settings:
anchor: true
color: true
default: true
description: true
escape: true
hide-empty: false
html: true
indent: 2
required: true
sensitive: true
type: true
393 changes: 393 additions & 0 deletions modules/aws/aws-infra/README.md

Large diffs are not rendered by default.

111 changes: 111 additions & 0 deletions modules/aws/aws-infra/iam.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
# IAM Component
# Creates cross-account roles, Unity Catalog roles, and associated policies

# Databricks-generated Cross-Account Assume Role Policy
data "databricks_aws_assume_role_policy" "cross_account" {
external_id = var.databricks_account_id
}

# Cross-Account Role for Databricks (Always created)
resource "aws_iam_role" "cross_account" {
name = local.iam_config.cross_account_role_name
assume_role_policy = data.databricks_aws_assume_role_policy.cross_account.json

tags = merge(local.common_tags, {
Name = local.iam_config.cross_account_role_name
Purpose = "Databricks Cross-Account Access"
Type = "CrossAccount"
})
}

# Databricks-generated Cross-Account Policy
# policy_type options: "managed" (default), "restricted", "customer-managed"
data "databricks_aws_crossaccount_policy" "cross_account" {
policy_type = var.cross_account_policy_type
pass_roles = length(var.roles_to_assume) > 0 ? var.roles_to_assume : null
}

# Attach policy to cross-account role
resource "aws_iam_role_policy" "cross_account_inline" {
name = "databricks-cross-account-policy"
role = aws_iam_role.cross_account.id
policy = data.databricks_aws_crossaccount_policy.cross_account.json
}

# Unity Catalog IAM role is always created.
# When external_id is not yet known, a basic trust policy (no ExternalId condition) is used.
# Once you have the external_id from the Databricks Account Console, set it and re-apply —
# Terraform will update the trust policy in-place without recreating the role.

data "databricks_aws_unity_catalog_assume_role_policy" "unity_catalog" {
count = var.external_id != null ? 1 : 0
aws_account_id = local.account_id
role_name = local.iam_config.unity_catalog_role_name
external_id = var.external_id
}

# Fallback trust policy used when external_id is not yet available
data "aws_iam_policy_document" "unity_catalog_assume_role_basic" {
count = var.external_id == null ? 1 : 0

statement {
effect = "Allow"
actions = ["sts:AssumeRole"]
principals {
type = "AWS"
identifiers = ["arn:aws:iam::414351767826:role/unity-catalog-prod-UCMasterRole-14S5ZJVKOTYTL"]
}
}
}

resource "aws_iam_role" "unity_catalog" {
name = local.iam_config.unity_catalog_role_name
assume_role_policy = var.external_id != null ? (
data.databricks_aws_unity_catalog_assume_role_policy.unity_catalog[0].json
) : (
data.aws_iam_policy_document.unity_catalog_assume_role_basic[0].json
)

tags = merge(local.common_tags, {
Name = local.iam_config.unity_catalog_role_name
Purpose = "Unity Catalog Metastore Access"
Type = "UnityCatalog"
})
}

data "databricks_aws_unity_catalog_policy" "unity_catalog" {
aws_account_id = local.account_id
role_name = local.iam_config.unity_catalog_role_name
bucket_name = var.create_metastore_bucket ? aws_s3_bucket.metastore[0].bucket : ""
}

resource "aws_iam_role_policy" "unity_catalog_inline" {
name = "unity-catalog-metastore-policy"
role = aws_iam_role.unity_catalog.id
policy = data.databricks_aws_unity_catalog_policy.unity_catalog.json
}

# Instance Profiles (optional)
resource "aws_iam_instance_profile" "databricks" {
count = var.create_instance_profiles ? 1 : 0

name = "${var.prefix}-databricks-instance-profile"
role = aws_iam_role.cross_account.name

tags = merge(local.common_tags, {
Name = "${var.prefix}-databricks-instance-profile"
Purpose = "Databricks Compute Instance Profile"
})
}

# Wait for IAM role propagation before dependent resources use the roles
resource "time_sleep" "iam_propagation_wait" {
create_duration = "20s"

depends_on = [
aws_iam_role.cross_account,
aws_iam_role_policy.cross_account_inline,
aws_iam_role.unity_catalog,
aws_iam_role_policy.unity_catalog_inline,
]
}
73 changes: 73 additions & 0 deletions modules/aws/aws-infra/locals.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# Data sources
data "aws_availability_zones" "available" {
state = "available"
}

data "aws_caller_identity" "current" {}

data "aws_region" "current" {}

# Captures the timestamp once at resource creation time and remains static thereafter,
# preventing unnecessary plan diffs on every apply.
resource "time_static" "created" {}

locals {
# Common tags applied to all resources
common_tags = merge(var.tags, {
"ManagedBy" = "terraform"
"Module" = "aws-infra"
"Prefix" = var.prefix
"Region" = var.region
"CreatedDate" = formatdate("YYYY-MM-DD", time_static.created.rfc3339)
})

# Availability Zones
availability_zones = length(var.networking.availability_zones) > 0 ? var.networking.availability_zones : slice(data.aws_availability_zones.available.names, 0, min(length(data.aws_availability_zones.available.names), 3))

# Subnet CIDR calculations
private_subnet_cidrs = length(var.networking.private_subnet_cidrs) > 0 ? var.networking.private_subnet_cidrs : [
for i in range(length(local.availability_zones)) : cidrsubnet(var.networking.vpc_cidr, 8, i + 1)
]

public_subnet_cidrs = length(var.networking.public_subnet_cidrs) > 0 ? var.networking.public_subnet_cidrs : [
for i in range(length(local.availability_zones)) : cidrsubnet(var.networking.vpc_cidr, 8, i + 101)
]

# Storage configuration - hardcoded bucket names
root_bucket_name = "${var.prefix}-rootbucket"
metastore_bucket_name = "${var.prefix}-metastore"
data_bucket_name = "${var.prefix}-data"


# IAM configuration
iam_config = {
cross_account_role_name = "${var.prefix}-cross-account-role"
unity_catalog_role_name = "${var.prefix}-unity-catalog-role"
}

# Enable firewall if explicitly enabled OR if hub-spoke architecture is enabled
enable_firewall = var.security.enable_network_firewall || var.advanced_networking.hub_spoke_architecture

# When hub-spoke is enabled the spoke VPC routes egress through the hub, so a
# local NAT gateway is not needed. Callers can still override by setting
# networking.enable_nat_gateway = true explicitly.
enable_nat_gateway = var.advanced_networking.hub_spoke_architecture ? false : var.networking.enable_nat_gateway

# Advanced networking configuration
transit_gateway_config = var.advanced_networking.enable_transit_gateway ? {
name = "${var.prefix}-transit-gateway"
hub_vpc_cidr = var.advanced_networking.hub_vpc_cidr
spoke_vpc_cidr = var.networking.vpc_cidr

# Hub VPC subnets (single subnet for each type)
hub_public_subnet_cidr = cidrsubnet(var.advanced_networking.hub_vpc_cidr, 8, 1)
hub_private_subnet_cidr = cidrsubnet(var.advanced_networking.hub_vpc_cidr, 8, 10)
hub_firewall_subnet_cidr = cidrsubnet(var.advanced_networking.hub_vpc_cidr, 8, 20)
} : null

# Current account ID
account_id = data.aws_caller_identity.current.account_id

# Current region name
current_region = data.aws_region.current.id
}
48 changes: 48 additions & 0 deletions modules/aws/aws-infra/main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# AWS Infrastructure Module
# This module provides comprehensive AWS infrastructure for Databricks workloads
# All .tf files in this directory are automatically loaded by Terraform

# Core Components (Always Created):
# - networking.tf - VPC, Subnets, Security Groups, NAT Gateway
# - workspacestorage.tf - Root S3 Bucket for Databricks workspace
# - ucstorage.tf - Unity Catalog S3 Buckets (metastore & data)
# - iam.tf - IAM Roles (cross-account, Unity Catalog, instance profiles)
# - vpc-endpoints.tf - VPC Endpoints (S3, STS, Kinesis)

# Conditional Components (Created based on variables):
# - private-link.tf - Databricks Private Link (when enable_private_link = true)

# Submodules:
# - modules/hub-networking - Transit Gateway, Hub VPC, and Network Firewall (when hub_spoke_architecture = true)

# Configuration:
# - variables.tf - Input variables
# - locals.tf - Local values and computed configurations
# - outputs.tf - Module outputs
# - versions.tf - Provider version requirements

# Hub Networking Module (Transit Gateway + Firewall)
module "hub_networking" {
count = var.advanced_networking.hub_spoke_architecture ? 1 : 0
source = "./modules/hub-networking"

prefix = var.prefix
region = var.region

common_tags = local.common_tags

# Spoke VPC configuration
spoke_vpc_id = module.vpc.vpc_id
spoke_vpc_cidr = var.networking.vpc_cidr
spoke_private_subnet_ids = module.vpc.private_subnets
spoke_route_table_ids = module.vpc.private_route_table_ids

# Hub VPC configuration
hub_vpc_cidr = var.advanced_networking.hub_vpc_cidr
availability_zones = local.availability_zones

# Network Firewall configuration
enable_firewall = local.enable_firewall
allowed_fqdns = var.security.allowed_fqdns
allowed_network_rules = var.security.allowed_network_rules
}
Loading