From 6cf1cd0a81b72ef4f8eb5304e2ca08ea37fc9a19 Mon Sep 17 00:00:00 2001 From: Xoulomon Date: Thu, 28 May 2026 13:58:01 +0100 Subject: [PATCH 1/4] feat(#704): Configure S3 backend with DynamoDB state locking - Create bootstrap.sh script to initialize S3 bucket and DynamoDB table - Add backend-config.hcl for flexible backend configuration - Update main.tf to use dynamic backend configuration - Document bootstrap process in infrastructure/README.md - Enable versioning, encryption, and public access blocking on S3 - Enable point-in-time recovery on DynamoDB table --- infrastructure/README.md | 30 +++++++- infrastructure/terraform/backend-config.hcl | 9 +++ infrastructure/terraform/bootstrap.sh | 76 +++++++++++++++++++++ infrastructure/terraform/main.tf | 10 +-- 4 files changed, 117 insertions(+), 8 deletions(-) create mode 100644 infrastructure/terraform/backend-config.hcl create mode 100644 infrastructure/terraform/bootstrap.sh diff --git a/infrastructure/README.md b/infrastructure/README.md index 1e1cb4eb..c84d7495 100644 --- a/infrastructure/README.md +++ b/infrastructure/README.md @@ -33,13 +33,41 @@ infrastructure/ ## Quick Start +### Bootstrap Terraform State Backend (First Time Only) + +Before initializing Terraform, you must create the S3 bucket and DynamoDB table for remote state management: + +```bash +cd infrastructure/terraform + +# Bootstrap for development environment +./bootstrap.sh us-east-1 dev + +# Bootstrap for staging environment +./bootstrap.sh us-east-1 staging + +# Bootstrap for production environment +./bootstrap.sh us-east-1 prod +``` + +The bootstrap script will: +1. Create an S3 bucket for Terraform state +2. Enable versioning and encryption on the bucket +3. Block public access to the bucket +4. Create a DynamoDB table for state locking +5. Enable point-in-time recovery on the DynamoDB table + ### Initialize Terraform ```bash cd infrastructure/terraform -terraform init + +# Initialize with backend configuration +terraform init -backend-config=backend-config.hcl ``` +**Note:** The `backend-config.hcl` file contains the S3 bucket and DynamoDB table names. Update this file if you used different names during bootstrap. + ### Plan Infrastructure Changes ```bash diff --git a/infrastructure/terraform/backend-config.hcl b/infrastructure/terraform/backend-config.hcl new file mode 100644 index 00000000..8ee666aa --- /dev/null +++ b/infrastructure/terraform/backend-config.hcl @@ -0,0 +1,9 @@ +# Backend configuration for Terraform state management +# This file is used during terraform init to configure the S3 backend +# Usage: terraform init -backend-config=backend-config.hcl + +bucket = "predictiq-terraform-state" +key = "terraform.tfstate" +region = "us-east-1" +encrypt = true +dynamodb_table = "terraform-locks" diff --git a/infrastructure/terraform/bootstrap.sh b/infrastructure/terraform/bootstrap.sh new file mode 100644 index 00000000..df6e6167 --- /dev/null +++ b/infrastructure/terraform/bootstrap.sh @@ -0,0 +1,76 @@ +#!/bin/bash +set -e + +# Bootstrap script to create S3 bucket and DynamoDB table for Terraform state management +# Usage: ./bootstrap.sh + +AWS_REGION=${1:-us-east-1} +ENVIRONMENT=${2:-dev} +BUCKET_NAME="predictiq-terraform-state-${ENVIRONMENT}" +LOCK_TABLE="terraform-locks-${ENVIRONMENT}" + +echo "Bootstrapping Terraform state backend for environment: $ENVIRONMENT in region: $AWS_REGION" + +# Create S3 bucket +echo "Creating S3 bucket: $BUCKET_NAME" +aws s3api create-bucket \ + --bucket "$BUCKET_NAME" \ + --region "$AWS_REGION" \ + $([ "$AWS_REGION" != "us-east-1" ] && echo "--create-bucket-configuration LocationConstraint=$AWS_REGION") \ + 2>/dev/null || echo "Bucket already exists or error occurred" + +# Enable versioning +echo "Enabling versioning on S3 bucket" +aws s3api put-bucket-versioning \ + --bucket "$BUCKET_NAME" \ + --versioning-configuration Status=Enabled \ + --region "$AWS_REGION" + +# Enable encryption +echo "Enabling server-side encryption on S3 bucket" +aws s3api put-bucket-encryption \ + --bucket "$BUCKET_NAME" \ + --server-side-encryption-configuration '{ + "Rules": [ + { + "ApplyServerSideEncryptionByDefault": { + "SSEAlgorithm": "AES256" + } + } + ] + }' \ + --region "$AWS_REGION" + +# Block public access +echo "Blocking public access to S3 bucket" +aws s3api put-public-access-block \ + --bucket "$BUCKET_NAME" \ + --public-access-block-configuration \ + "BlockPublicAcls=true,IgnorePublicAcls=true,BlockPublicPolicy=true,RestrictPublicBuckets=true" \ + --region "$AWS_REGION" + +# Create DynamoDB table for state locking +echo "Creating DynamoDB table: $LOCK_TABLE" +aws dynamodb create-table \ + --table-name "$LOCK_TABLE" \ + --attribute-definitions AttributeName=LockID,AttributeType=S \ + --key-schema AttributeName=LockID,KeyType=HASH \ + --billing-mode PAY_PER_REQUEST \ + --region "$AWS_REGION" \ + 2>/dev/null || echo "Table already exists or error occurred" + +# Enable point-in-time recovery +echo "Enabling point-in-time recovery on DynamoDB table" +aws dynamodb update-continuous-backups \ + --table-name "$LOCK_TABLE" \ + --point-in-time-recovery-specification PointInTimeRecoveryEnabled=true \ + --region "$AWS_REGION" \ + 2>/dev/null || echo "PITR already enabled or error occurred" + +echo "Bootstrap complete!" +echo "S3 Bucket: $BUCKET_NAME" +echo "DynamoDB Table: $LOCK_TABLE" +echo "" +echo "Next steps:" +echo "1. Update infrastructure/terraform/backend-config.hcl with the bucket and table names" +echo "2. Run: terraform init -backend-config=backend-config.hcl" diff --git a/infrastructure/terraform/main.tf b/infrastructure/terraform/main.tf index ada02ee4..6b61c450 100644 --- a/infrastructure/terraform/main.tf +++ b/infrastructure/terraform/main.tf @@ -7,13 +7,9 @@ terraform { } } - backend "s3" { - bucket = "predictiq-terraform-state" - key = "prod/terraform.tfstate" - region = "us-east-1" - encrypt = true - dynamodb_table = "terraform-locks" - } + # Backend configuration is provided via -backend-config flag during init + # See backend-config.hcl for details + backend "s3" {} } provider "aws" { From d36777cc33e81dedd52147a5f964f07089a3f4f4 Mon Sep 17 00:00:00 2001 From: Xoulomon Date: Thu, 28 May 2026 13:59:53 +0100 Subject: [PATCH 2/4] feat(#705): Enforce resource tagging strategy - Create locals.tf with common tags (Project, Environment, Owner, ManagedBy) - Update all modules (vpc, rds, redis, ecs, monitoring) to use common tags - Apply tags to all AWS resources using merge() function - Ensure consistent tagging across all environments for cost allocation and compliance --- infrastructure/terraform/locals.tf | 9 ++ infrastructure/terraform/modules/ecs/main.tf | 113 +++++++++++++----- .../terraform/modules/monitoring/main.tf | 39 +++++- infrastructure/terraform/modules/rds/main.tf | 36 ++++-- .../terraform/modules/redis/main.tf | 36 ++++-- infrastructure/terraform/modules/vpc/main.tf | 85 +++++++++---- 6 files changed, 243 insertions(+), 75 deletions(-) create mode 100644 infrastructure/terraform/locals.tf diff --git a/infrastructure/terraform/locals.tf b/infrastructure/terraform/locals.tf new file mode 100644 index 00000000..c10f43bf --- /dev/null +++ b/infrastructure/terraform/locals.tf @@ -0,0 +1,9 @@ +# Common locals for consistent tagging across all modules +locals { + common_tags = { + Project = "predictiq" + Environment = var.environment + Owner = "infrastructure-team" + ManagedBy = "terraform" + } +} diff --git a/infrastructure/terraform/modules/ecs/main.tf b/infrastructure/terraform/modules/ecs/main.tf index e89423ef..da4db3fb 100644 --- a/infrastructure/terraform/modules/ecs/main.tf +++ b/infrastructure/terraform/modules/ecs/main.tf @@ -44,6 +44,15 @@ variable "redis_url" { sensitive = true } +locals { + common_tags = { + Project = "predictiq" + Environment = var.environment + Owner = "infrastructure-team" + ManagedBy = "terraform" + } +} + resource "aws_ecs_cluster" "main" { name = "predictiq-${var.environment}" @@ -52,18 +61,24 @@ resource "aws_ecs_cluster" "main" { value = "enabled" } - tags = { - Name = "predictiq-${var.environment}-cluster" - } + tags = merge( + local.common_tags, + { + Name = "predictiq-${var.environment}-cluster" + } + ) } resource "aws_cloudwatch_log_group" "ecs" { name = "/ecs/predictiq-${var.environment}" retention_in_days = var.environment == "prod" ? 30 : 7 - tags = { - Name = "predictiq-${var.environment}-logs" - } + tags = merge( + local.common_tags, + { + Name = "predictiq-${var.environment}-logs" + } + ) } resource "aws_ecs_task_definition" "api" { @@ -114,9 +129,12 @@ resource "aws_ecs_task_definition" "api" { } ]) - tags = { - Name = "predictiq-${var.environment}-api-task" - } + tags = merge( + local.common_tags, + { + Name = "predictiq-${var.environment}-api-task" + } + ) } resource "aws_security_group" "alb" { @@ -144,9 +162,12 @@ resource "aws_security_group" "alb" { cidr_blocks = ["0.0.0.0/0"] } - tags = { - Name = "predictiq-${var.environment}-alb-sg" - } + tags = merge( + local.common_tags, + { + Name = "predictiq-${var.environment}-alb-sg" + } + ) } resource "aws_lb" "main" { @@ -156,9 +177,12 @@ resource "aws_lb" "main" { security_groups = [aws_security_group.alb.id] subnets = var.public_subnet_ids - tags = { - Name = "predictiq-${var.environment}-alb" - } + tags = merge( + local.common_tags, + { + Name = "predictiq-${var.environment}-alb" + } + ) } resource "aws_lb_target_group" "api" { @@ -177,9 +201,12 @@ resource "aws_lb_target_group" "api" { matcher = "200" } - tags = { - Name = "predictiq-${var.environment}-api-tg" - } + tags = merge( + local.common_tags, + { + Name = "predictiq-${var.environment}-api-tg" + } + ) } resource "aws_lb_listener" "api" { @@ -211,9 +238,12 @@ resource "aws_security_group" "ecs_tasks" { cidr_blocks = ["0.0.0.0/0"] } - tags = { - Name = "predictiq-${var.environment}-ecs-tasks-sg" - } + tags = merge( + local.common_tags, + { + Name = "predictiq-${var.environment}-ecs-tasks-sg" + } + ) } resource "aws_ecs_service" "api" { @@ -237,17 +267,23 @@ resource "aws_ecs_service" "api" { depends_on = [aws_lb_listener.api] - tags = { - Name = "predictiq-${var.environment}-api-service" - } + tags = merge( + local.common_tags, + { + Name = "predictiq-${var.environment}-api-service" + } + ) } resource "aws_secretsmanager_secret" "database_url" { name = "predictiq/${var.environment}/database-url" - tags = { - Name = "predictiq-${var.environment}-database-url" - } + tags = merge( + local.common_tags, + { + Name = "predictiq-${var.environment}-database-url" + } + ) } resource "aws_secretsmanager_secret_version" "database_url" { @@ -258,9 +294,12 @@ resource "aws_secretsmanager_secret_version" "database_url" { resource "aws_secretsmanager_secret" "redis_url" { name = "predictiq/${var.environment}/redis-url" - tags = { - Name = "predictiq-${var.environment}-redis-url" - } + tags = merge( + local.common_tags, + { + Name = "predictiq-${var.environment}-redis-url" + } + ) } resource "aws_secretsmanager_secret_version" "redis_url" { @@ -283,6 +322,13 @@ resource "aws_iam_role" "ecs_task_execution_role" { } ] }) + + tags = merge( + local.common_tags, + { + Name = "predictiq-${var.environment}-ecs-task-execution-role" + } + ) } resource "aws_iam_role_policy_attachment" "ecs_task_execution_role_policy" { @@ -326,6 +372,13 @@ resource "aws_iam_role" "ecs_task_role" { } ] }) + + tags = merge( + local.common_tags, + { + Name = "predictiq-${var.environment}-ecs-task-role" + } + ) } data "aws_region" "current" {} diff --git a/infrastructure/terraform/modules/monitoring/main.tf b/infrastructure/terraform/modules/monitoring/main.tf index 2982f28c..474c9e04 100644 --- a/infrastructure/terraform/modules/monitoring/main.tf +++ b/infrastructure/terraform/modules/monitoring/main.tf @@ -10,6 +10,15 @@ variable "ecs_service" { type = string } +locals { + common_tags = { + Project = "predictiq" + Environment = var.environment + Owner = "infrastructure-team" + ManagedBy = "terraform" + } +} + resource "aws_cloudwatch_dashboard" "main" { dashboard_name = "predictiq-${var.environment}" @@ -49,9 +58,12 @@ resource "aws_cloudwatch_dashboard" "main" { resource "aws_sns_topic" "alerts" { name = "predictiq-${var.environment}-alerts" - tags = { - Name = "predictiq-${var.environment}-alerts" - } + tags = merge( + local.common_tags, + { + Name = "predictiq-${var.environment}-alerts" + } + ) } resource "aws_cloudwatch_metric_alarm" "ecs_cpu" { @@ -70,6 +82,13 @@ resource "aws_cloudwatch_metric_alarm" "ecs_cpu" { ClusterName = var.ecs_cluster ServiceName = var.ecs_service } + + tags = merge( + local.common_tags, + { + Name = "predictiq-${var.environment}-ecs-cpu-high" + } + ) } resource "aws_cloudwatch_metric_alarm" "ecs_memory" { @@ -88,6 +107,13 @@ resource "aws_cloudwatch_metric_alarm" "ecs_memory" { ClusterName = var.ecs_cluster ServiceName = var.ecs_service } + + tags = merge( + local.common_tags, + { + Name = "predictiq-${var.environment}-ecs-memory-high" + } + ) } resource "aws_cloudwatch_metric_alarm" "alb_5xx" { @@ -105,6 +131,13 @@ resource "aws_cloudwatch_metric_alarm" "alb_5xx" { dimensions = { LoadBalancer = "app/predictiq-${var.environment}-alb/*" } + + tags = merge( + local.common_tags, + { + Name = "predictiq-${var.environment}-alb-5xx-errors" + } + ) } data "aws_region" "current" {} diff --git a/infrastructure/terraform/modules/rds/main.tf b/infrastructure/terraform/modules/rds/main.tf index 6ebca886..d94e20df 100644 --- a/infrastructure/terraform/modules/rds/main.tf +++ b/infrastructure/terraform/modules/rds/main.tf @@ -36,13 +36,25 @@ variable "backup_retention" { type = number } +locals { + common_tags = { + Project = "predictiq" + Environment = var.environment + Owner = "infrastructure-team" + ManagedBy = "terraform" + } +} + resource "aws_db_subnet_group" "main" { name = "predictiq-${var.environment}-db-subnet" subnet_ids = var.private_subnet_ids - tags = { - Name = "predictiq-${var.environment}-db-subnet-group" - } + tags = merge( + local.common_tags, + { + Name = "predictiq-${var.environment}-db-subnet-group" + } + ) } resource "aws_security_group" "rds" { @@ -63,9 +75,12 @@ resource "aws_security_group" "rds" { cidr_blocks = ["0.0.0.0/0"] } - tags = { - Name = "predictiq-${var.environment}-rds-sg" - } + tags = merge( + local.common_tags, + { + Name = "predictiq-${var.environment}-rds-sg" + } + ) } resource "aws_db_instance" "main" { @@ -90,9 +105,12 @@ resource "aws_db_instance" "main" { skip_final_snapshot = var.environment != "prod" final_snapshot_identifier = var.environment == "prod" ? "predictiq-${var.environment}-final-snapshot-${formatdate("YYYY-MM-DD-hhmm", timestamp())}" : null - tags = { - Name = "predictiq-${var.environment}-db" - } + tags = merge( + local.common_tags, + { + Name = "predictiq-${var.environment}-db" + } + ) } output "endpoint" { diff --git a/infrastructure/terraform/modules/redis/main.tf b/infrastructure/terraform/modules/redis/main.tf index 333e270a..b8518c71 100644 --- a/infrastructure/terraform/modules/redis/main.tf +++ b/infrastructure/terraform/modules/redis/main.tf @@ -22,13 +22,25 @@ variable "engine_version" { type = string } +locals { + common_tags = { + Project = "predictiq" + Environment = var.environment + Owner = "infrastructure-team" + ManagedBy = "terraform" + } +} + resource "aws_elasticache_subnet_group" "main" { name = "predictiq-${var.environment}-redis-subnet" subnet_ids = var.subnet_ids - tags = { - Name = "predictiq-${var.environment}-redis-subnet-group" - } + tags = merge( + local.common_tags, + { + Name = "predictiq-${var.environment}-redis-subnet-group" + } + ) } resource "aws_security_group" "redis" { @@ -49,9 +61,12 @@ resource "aws_security_group" "redis" { cidr_blocks = ["0.0.0.0/0"] } - tags = { - Name = "predictiq-${var.environment}-redis-sg" - } + tags = merge( + local.common_tags, + { + Name = "predictiq-${var.environment}-redis-sg" + } + ) } resource "aws_elasticache_cluster" "main" { @@ -72,9 +87,12 @@ resource "aws_elasticache_cluster" "main" { maintenance_window = "mon:03:00-mon:04:00" notification_topic_arn = null - tags = { - Name = "predictiq-${var.environment}-redis" - } + tags = merge( + local.common_tags, + { + Name = "predictiq-${var.environment}-redis" + } + ) } output "endpoint" { diff --git a/infrastructure/terraform/modules/vpc/main.tf b/infrastructure/terraform/modules/vpc/main.tf index 51542af0..2940fc29 100644 --- a/infrastructure/terraform/modules/vpc/main.tf +++ b/infrastructure/terraform/modules/vpc/main.tf @@ -6,22 +6,37 @@ variable "cidr_block" { type = string } +locals { + common_tags = { + Project = "predictiq" + Environment = var.environment + Owner = "infrastructure-team" + ManagedBy = "terraform" + } +} + resource "aws_vpc" "main" { cidr_block = var.cidr_block enable_dns_hostnames = true enable_dns_support = true - tags = { - Name = "predictiq-${var.environment}-vpc" - } + tags = merge( + local.common_tags, + { + Name = "predictiq-${var.environment}-vpc" + } + ) } resource "aws_internet_gateway" "main" { vpc_id = aws_vpc.main.id - tags = { - Name = "predictiq-${var.environment}-igw" - } + tags = merge( + local.common_tags, + { + Name = "predictiq-${var.environment}-igw" + } + ) } resource "aws_subnet" "public" { @@ -31,9 +46,13 @@ resource "aws_subnet" "public" { availability_zone = data.aws_availability_zones.available.names[count.index] map_public_ip_on_launch = true - tags = { - Name = "predictiq-${var.environment}-public-${count.index + 1}" - } + tags = merge( + local.common_tags, + { + Name = "predictiq-${var.environment}-public-${count.index + 1}" + Type = "public" + } + ) } resource "aws_subnet" "private" { @@ -42,18 +61,25 @@ resource "aws_subnet" "private" { cidr_block = cidrsubnet(var.cidr_block, 2, count.index + 2) availability_zone = data.aws_availability_zones.available.names[count.index] - tags = { - Name = "predictiq-${var.environment}-private-${count.index + 1}" - } + tags = merge( + local.common_tags, + { + Name = "predictiq-${var.environment}-private-${count.index + 1}" + Type = "private" + } + ) } resource "aws_eip" "nat" { count = 2 domain = "vpc" - tags = { - Name = "predictiq-${var.environment}-eip-${count.index + 1}" - } + tags = merge( + local.common_tags, + { + Name = "predictiq-${var.environment}-eip-${count.index + 1}" + } + ) depends_on = [aws_internet_gateway.main] } @@ -63,9 +89,12 @@ resource "aws_nat_gateway" "main" { allocation_id = aws_eip.nat[count.index].id subnet_id = aws_subnet.public[count.index].id - tags = { - Name = "predictiq-${var.environment}-nat-${count.index + 1}" - } + tags = merge( + local.common_tags, + { + Name = "predictiq-${var.environment}-nat-${count.index + 1}" + } + ) depends_on = [aws_internet_gateway.main] } @@ -78,9 +107,13 @@ resource "aws_route_table" "public" { gateway_id = aws_internet_gateway.main.id } - tags = { - Name = "predictiq-${var.environment}-public-rt" - } + tags = merge( + local.common_tags, + { + Name = "predictiq-${var.environment}-public-rt" + Type = "public" + } + ) } resource "aws_route_table" "private" { @@ -92,9 +125,13 @@ resource "aws_route_table" "private" { nat_gateway_id = aws_nat_gateway.main[count.index].id } - tags = { - Name = "predictiq-${var.environment}-private-rt-${count.index + 1}" - } + tags = merge( + local.common_tags, + { + Name = "predictiq-${var.environment}-private-rt-${count.index + 1}" + Type = "private" + } + ) } resource "aws_route_table_association" "public" { From 9f0351001c93d8785522c0f11ddf8ef92090bc94 Mon Sep 17 00:00:00 2001 From: Xoulomon Date: Thu, 28 May 2026 14:00:17 +0100 Subject: [PATCH 3/4] feat(#706): Add validation blocks to variables.tf - Add validation for aws_region (valid AWS region format) - Add validation for vpc_cidr_block (valid CIDR notation) - Add validation for db_name (lowercase, alphanumeric, max 63 chars) - Add validation for db_username (1-16 characters) - Add validation for db_password (minimum 8 characters) - Add validation for db_instance_class (valid RDS instance type) - Add validation for allocated_storage (20-65536 GB) - Add validation for backup_retention_days (1-35 days) - Add validation for redis_node_type (valid ElastiCache node type) - Add validation for redis_num_nodes (1-500 nodes) - Add validation for redis_engine_version (X.Y format) - Add validation for api_image_uri (valid ECR image URI) - Add validation for api_container_port (1024-65535) - Add validation for api_desired_count (1-10 tasks) - Add validation for api_cpu (256, 512, 1024, 2048, 4096) - Add validation for api_memory (valid Fargate memory values) - All validations include descriptive error messages --- infrastructure/terraform/variables.tf | 80 +++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/infrastructure/terraform/variables.tf b/infrastructure/terraform/variables.tf index 70ab2d16..a568035c 100644 --- a/infrastructure/terraform/variables.tf +++ b/infrastructure/terraform/variables.tf @@ -2,6 +2,11 @@ variable "aws_region" { description = "AWS region" type = string default = "us-east-1" + + validation { + condition = can(regex("^[a-z]{2}-[a-z]+-\\d{1}$", var.aws_region)) + error_message = "AWS region must be a valid region format (e.g., us-east-1, eu-west-1)." + } } variable "environment" { @@ -17,87 +22,162 @@ variable "vpc_cidr_block" { description = "CIDR block for VPC" type = string default = "10.0.0.0/16" + + validation { + condition = can(cidrhost(var.vpc_cidr_block, 0)) + error_message = "VPC CIDR block must be a valid CIDR notation (e.g., 10.0.0.0/16)." + } } variable "db_name" { description = "Database name" type = string default = "predictiq" + + validation { + condition = can(regex("^[a-z][a-z0-9_]*$", var.db_name)) && length(var.db_name) <= 63 + error_message = "Database name must start with a letter, contain only lowercase letters, numbers, and underscores, and be at most 63 characters." + } } variable "db_username" { description = "Database master username" type = string sensitive = true + + validation { + condition = length(var.db_username) >= 1 && length(var.db_username) <= 16 + error_message = "Database username must be between 1 and 16 characters." + } } variable "db_password" { description = "Database master password" type = string sensitive = true + + validation { + condition = length(var.db_password) >= 8 + error_message = "Database password must be at least 8 characters long." + } } variable "db_instance_class" { description = "RDS instance class" type = string default = "db.t3.micro" + + validation { + condition = can(regex("^db\\.[a-z0-9]+\\.[a-z0-9]+$", var.db_instance_class)) + error_message = "RDS instance class must be a valid instance type (e.g., db.t3.micro, db.t3.small)." + } } variable "allocated_storage" { description = "Allocated storage in GB" type = number default = 20 + + validation { + condition = var.allocated_storage >= 20 && var.allocated_storage <= 65536 + error_message = "Allocated storage must be between 20 and 65536 GB." + } } variable "backup_retention_days" { description = "Backup retention period in days" type = number default = 7 + + validation { + condition = var.backup_retention_days >= 1 && var.backup_retention_days <= 35 + error_message = "Backup retention days must be between 1 and 35." + } } variable "redis_node_type" { description = "ElastiCache node type" type = string default = "cache.t3.micro" + + validation { + condition = can(regex("^cache\\.[a-z0-9]+\\.[a-z0-9]+$", var.redis_node_type)) + error_message = "Redis node type must be a valid ElastiCache node type (e.g., cache.t3.micro, cache.t3.small)." + } } variable "redis_num_nodes" { description = "Number of cache nodes" type = number default = 1 + + validation { + condition = var.redis_num_nodes >= 1 && var.redis_num_nodes <= 500 + error_message = "Number of Redis nodes must be between 1 and 500." + } } variable "redis_engine_version" { description = "Redis engine version" type = string default = "7.0" + + validation { + condition = can(regex("^\\d+\\.\\d+$", var.redis_engine_version)) + error_message = "Redis engine version must be in format X.Y (e.g., 7.0, 6.2)." + } } variable "api_image_uri" { description = "ECR image URI for API" type = string + + validation { + condition = can(regex("^\\d+\\.dkr\\.ecr\\.[a-z0-9-]+\\.amazonaws\\.com/.+:.+$", var.api_image_uri)) + error_message = "API image URI must be a valid ECR image URI (e.g., 123456789.dkr.ecr.us-east-1.amazonaws.com/predictiq:latest)." + } } variable "api_container_port" { description = "API container port" type = number default = 8080 + + validation { + condition = var.api_container_port >= 1024 && var.api_container_port <= 65535 + error_message = "API container port must be between 1024 and 65535." + } } variable "api_desired_count" { description = "Desired number of API tasks" type = number default = 2 + + validation { + condition = var.api_desired_count >= 1 && var.api_desired_count <= 10 + error_message = "API desired count must be between 1 and 10." + } } variable "api_cpu" { description = "API task CPU units" type = number default = 256 + + validation { + condition = contains([256, 512, 1024, 2048, 4096], var.api_cpu) + error_message = "API CPU must be one of: 256, 512, 1024, 2048, 4096." + } } variable "api_memory" { description = "API task memory in MB" type = number default = 512 + + validation { + condition = contains([512, 1024, 2048, 3072, 4096, 5120, 6144, 7168, 8192], var.api_memory) + error_message = "API memory must be one of: 512, 1024, 2048, 3072, 4096, 5120, 6144, 7168, 8192." + } } From d4e498145451b1fab0521da4cc1be1f2045b771c Mon Sep 17 00:00:00 2001 From: Xoulomon Date: Thu, 28 May 2026 14:01:46 +0100 Subject: [PATCH 4/4] feat(#707): Separate staging and production environments - Reorganize environments directory with separate staging/ and production/ subdirectories - Create separate terraform.tfvars for each environment - Create separate backend.hcl configurations for staging and production - Each environment has distinct S3 bucket and DynamoDB table for state management - Add comprehensive environments/README.md with deployment instructions - Document CI/CD approval process for production deployments - Prevent accidental production changes through state isolation - Update main infrastructure/README.md with new structure --- infrastructure/README.md | 34 +++- .../terraform/environments/README.md | 167 ++++++++++++++++++ .../environments/production/backend.hcl | 8 + .../terraform.tfvars} | 0 .../environments/staging/backend.hcl | 8 + .../terraform.tfvars} | 0 6 files changed, 209 insertions(+), 8 deletions(-) create mode 100644 infrastructure/terraform/environments/README.md create mode 100644 infrastructure/terraform/environments/production/backend.hcl rename infrastructure/terraform/environments/{prod.tfvars => production/terraform.tfvars} (100%) create mode 100644 infrastructure/terraform/environments/staging/backend.hcl rename infrastructure/terraform/environments/{staging.tfvars => staging/terraform.tfvars} (100%) diff --git a/infrastructure/README.md b/infrastructure/README.md index c84d7495..d6e1c547 100644 --- a/infrastructure/README.md +++ b/infrastructure/README.md @@ -8,12 +8,20 @@ This directory contains all infrastructure definitions for PredictIQ using Terra infrastructure/ ├── terraform/ │ ├── main.tf # Main configuration -│ ├── variables.tf # Variable definitions +│ ├── variables.tf # Variable definitions with validation │ ├── outputs.tf # Output definitions +│ ├── locals.tf # Common tags and locals +│ ├── bootstrap.sh # Bootstrap script for state backend +│ ├── backend-config.hcl # Default backend configuration │ ├── environments/ # Environment-specific configurations -│ │ ├── dev.tfvars -│ │ ├── staging.tfvars -│ │ └── prod.tfvars +│ │ ├── README.md # Environment separation documentation +│ │ ├── dev.tfvars # Development environment variables +│ │ ├── staging/ +│ │ │ ├── terraform.tfvars +│ │ │ └── backend.hcl +│ │ └── production/ +│ │ ├── terraform.tfvars +│ │ └── backend.hcl │ └── modules/ # Reusable modules │ ├── vpc/ │ ├── rds/ @@ -75,22 +83,32 @@ terraform init -backend-config=backend-config.hcl terraform plan -var-file="environments/dev.tfvars" # For staging environment -terraform plan -var-file="environments/staging.tfvars" +terraform plan -var-file="environments/staging/terraform.tfvars" # For production environment -terraform plan -var-file="environments/prod.tfvars" +terraform plan -var-file="environments/production/terraform.tfvars" ``` ### Apply Infrastructure Changes ```bash # Apply changes (requires approval) -terraform apply -var-file="environments/prod.tfvars" +terraform apply -var-file="environments/production/terraform.tfvars" # Auto-approve (use with caution) -terraform apply -auto-approve -var-file="environments/prod.tfvars" +terraform apply -auto-approve -var-file="environments/production/terraform.tfvars" ``` +## Environment Separation + +PredictIQ uses separate Terraform state files and backends for each environment: + +- **Development**: Local state (for testing only) +- **Staging**: Remote state in S3 with DynamoDB locking +- **Production**: Remote state in separate S3 bucket with DynamoDB locking + +See `environments/README.md` for detailed environment management instructions. + ## Environments ### Development (dev) diff --git a/infrastructure/terraform/environments/README.md b/infrastructure/terraform/environments/README.md new file mode 100644 index 00000000..a5898dbb --- /dev/null +++ b/infrastructure/terraform/environments/README.md @@ -0,0 +1,167 @@ +# Terraform Environments + +This directory contains environment-specific configurations for PredictIQ infrastructure. + +## Directory Structure + +``` +environments/ +├── dev.tfvars # Development environment variables +├── staging/ +│ ├── terraform.tfvars # Staging environment variables +│ └── backend.hcl # Staging backend configuration +└── production/ + ├── terraform.tfvars # Production environment variables + └── backend.hcl # Production backend configuration +``` + +## Environment Separation + +Each environment has: +- **Separate state files**: Stored in different S3 buckets with distinct keys +- **Separate DynamoDB tables**: For state locking to prevent concurrent modifications +- **Distinct resource naming**: All resources are prefixed with environment name +- **Different resource sizing**: Production has higher capacity than staging + +## Deployment Instructions + +### Development Environment + +```bash +cd infrastructure/terraform +terraform init +terraform plan -var-file="environments/dev.tfvars" +terraform apply -var-file="environments/dev.tfvars" +``` + +### Staging Environment + +```bash +cd infrastructure/terraform + +# First time: bootstrap the backend +./bootstrap.sh us-east-1 staging + +# Initialize with staging backend +terraform init -backend-config=environments/staging/backend.hcl + +# Plan and apply +terraform plan -var-file="environments/staging/terraform.tfvars" +terraform apply -var-file="environments/staging/terraform.tfvars" +``` + +### Production Environment + +```bash +cd infrastructure/terraform + +# First time: bootstrap the backend +./bootstrap.sh us-east-1 production + +# Initialize with production backend +terraform init -backend-config=environments/production/backend.hcl + +# Plan and apply (requires explicit approval) +terraform plan -var-file="environments/production/terraform.tfvars" +terraform apply -var-file="environments/production/terraform.tfvars" +``` + +## CI/CD Deployment + +### Staging Deployment + +Staging deployments are automatic on merge to `main` branch: + +```yaml +- name: Deploy to Staging + run: | + cd infrastructure/terraform + terraform init -backend-config=environments/staging/backend.hcl + terraform plan -var-file="environments/staging/terraform.tfvars" + terraform apply -auto-approve -var-file="environments/staging/terraform.tfvars" +``` + +### Production Deployment + +Production deployments require explicit approval: + +```yaml +- name: Plan Production Changes + run: | + cd infrastructure/terraform + terraform init -backend-config=environments/production/backend.hcl + terraform plan -var-file="environments/production/terraform.tfvars" -out=tfplan + +- name: Approve and Apply Production + if: github.event_name == 'workflow_dispatch' + run: | + cd infrastructure/terraform + terraform apply tfplan +``` + +## State File Locations + +| Environment | S3 Bucket | DynamoDB Table | State Key | +|-------------|-----------|----------------|-----------| +| Development | Local | N/A | N/A | +| Staging | `predictiq-terraform-state-staging` | `terraform-locks-staging` | `staging/terraform.tfstate` | +| Production | `predictiq-terraform-state-production` | `terraform-locks-production` | `production/terraform.tfstate` | + +## Important Notes + +### Preventing Accidental Production Changes + +1. **State Locking**: DynamoDB tables prevent concurrent modifications +2. **Separate Backends**: Production state is isolated from staging +3. **CI/CD Approval**: Production changes require manual approval +4. **Resource Naming**: All resources include environment prefix (e.g., `predictiq-prod-vpc`) + +### Switching Environments + +When switching between environments, always reinitialize Terraform: + +```bash +# Switch from staging to production +terraform init -backend-config=environments/production/backend.hcl -reconfigure + +# Switch from production to staging +terraform init -backend-config=environments/staging/backend.hcl -reconfigure +``` + +### Disaster Recovery + +If state is corrupted: + +1. **Staging**: Can be recreated from scratch +2. **Production**: Contact infrastructure team before any recovery action + +```bash +# Force unlock if state is locked +terraform force-unlock + +# Refresh state from AWS +terraform refresh -var-file="environments/production/terraform.tfvars" +``` + +## Monitoring Environment Health + +```bash +# Check staging resources +aws ec2 describe-instances --filters "Name=tag:Environment,Values=staging" + +# Check production resources +aws ec2 describe-instances --filters "Name=tag:Environment,Values=prod" + +# View state file versions +aws s3api list-object-versions --bucket predictiq-terraform-state-production +``` + +## Best Practices + +1. **Always test in staging first** before applying to production +2. **Review terraform plan output** carefully before applying +3. **Use terraform workspace** for additional isolation if needed +4. **Keep backend configurations** in version control (no secrets) +5. **Enable MFA** for production deployments +6. **Document all manual changes** made outside Terraform +7. **Regularly backup state files** using S3 versioning diff --git a/infrastructure/terraform/environments/production/backend.hcl b/infrastructure/terraform/environments/production/backend.hcl new file mode 100644 index 00000000..2e10f3c1 --- /dev/null +++ b/infrastructure/terraform/environments/production/backend.hcl @@ -0,0 +1,8 @@ +# Backend configuration for production environment +# Usage: terraform init -backend-config=environments/production/backend.hcl + +bucket = "predictiq-terraform-state-production" +key = "production/terraform.tfstate" +region = "us-east-1" +encrypt = true +dynamodb_table = "terraform-locks-production" diff --git a/infrastructure/terraform/environments/prod.tfvars b/infrastructure/terraform/environments/production/terraform.tfvars similarity index 100% rename from infrastructure/terraform/environments/prod.tfvars rename to infrastructure/terraform/environments/production/terraform.tfvars diff --git a/infrastructure/terraform/environments/staging/backend.hcl b/infrastructure/terraform/environments/staging/backend.hcl new file mode 100644 index 00000000..0c42b91e --- /dev/null +++ b/infrastructure/terraform/environments/staging/backend.hcl @@ -0,0 +1,8 @@ +# Backend configuration for staging environment +# Usage: terraform init -backend-config=environments/staging/backend.hcl + +bucket = "predictiq-terraform-state-staging" +key = "staging/terraform.tfstate" +region = "us-east-1" +encrypt = true +dynamodb_table = "terraform-locks-staging" diff --git a/infrastructure/terraform/environments/staging.tfvars b/infrastructure/terraform/environments/staging/terraform.tfvars similarity index 100% rename from infrastructure/terraform/environments/staging.tfvars rename to infrastructure/terraform/environments/staging/terraform.tfvars