Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ build-and-upload:build-and-upload/client-proxy
build-and-upload:build-and-upload/dashboard-api
build-and-upload:build-and-upload/docker-reverse-proxy
build-and-upload:build-and-upload/clean-nfs-cache
build-and-upload:build-and-upload/clean-rapid-cache
build-and-upload:build-and-upload/orchestrator
build-and-upload:build-and-upload/template-manager
build-and-upload:build-and-upload/envd
Expand All @@ -90,6 +91,9 @@ build-and-upload:build-and-upload/nomad-nodepool-apm
build-and-upload/clean-nfs-cache:
./scripts/confirm.sh $(TERRAFORM_ENVIRONMENT)
GCP_PROJECT_ID=$(GCP_PROJECT_ID) $(MAKE) -C packages/orchestrator build-and-upload/clean-nfs-cache
build-and-upload/clean-rapid-cache:
./scripts/confirm.sh $(TERRAFORM_ENVIRONMENT)
GCP_PROJECT_ID=$(GCP_PROJECT_ID) $(MAKE) -C packages/orchestrator build-and-upload/clean-rapid-cache
build-and-upload/template-manager:
./scripts/confirm.sh $(TERRAFORM_ENVIRONMENT)
GCP_PROJECT_ID=$(GCP_PROJECT_ID) $(MAKE) -C packages/orchestrator build-and-upload/template-manager
Expand Down
11 changes: 9 additions & 2 deletions iac/provider-gcp/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -315,8 +315,11 @@ module "nomad" {
envd_timeout = var.envd_timeout
persistent_volume_mounts = { for key, config in local.persistent_volume_types : key => config["local_mount_path"] }
default_persistent_volume_type = var.default_persistent_volume_type
orchestrator_env_vars = var.orchestrator_env_vars
orchestrator_enabled = var.orchestrator_enabled
orchestrator_env_vars = merge(
var.orchestrator_env_vars,
var.rapid_bucket_cache_bucket_name != "" ? { RAPID_BUCKET_CACHE_BUCKET_NAME = var.rapid_bucket_cache_bucket_name } : {},
)
orchestrator_enabled = var.orchestrator_enabled

# Template manager
builder_node_pool = var.build_node_pool
Expand All @@ -334,6 +337,10 @@ module "nomad" {

# Filestore
shared_chunk_cache_path = module.cluster.shared_chunk_cache_path
rapid_bucket_cache_bucket_name = var.rapid_bucket_cache_bucket_name
rapid_bucket_cache_cleanup_dry_run = var.rapid_bucket_cache_cleanup_dry_run
rapid_bucket_cache_cleanup_max_age = var.rapid_bucket_cache_cleanup_max_age
rapid_bucket_cache_cleanup_max_deletions = var.rapid_bucket_cache_cleanup_max_deletions
filestore_cache_cleanup_disk_usage_target = var.filestore_cache_cleanup_disk_usage_target
filestore_cache_cleanup_dry_run = var.filestore_cache_cleanup_dry_run
filestore_cache_cleanup_deletions_per_loop = var.filestore_cache_cleanup_deletions_per_loop
Expand Down
48 changes: 48 additions & 0 deletions iac/provider-gcp/nomad/jobs/clean-rapid-cache.hcl
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
job "rapid-cache-cleanup" {
type = "batch"
node_pool = "${node_pool}"

periodic {
cron = "0 * * * *"
prohibit_overlap = true
time_zone = "America/Los_Angeles"
}

group "rapid-cache-cleanup" {
restart {
attempts = 0
mode = "fail"
}

task "rapid-cache-cleanup" {
driver = "raw_exec"

resources {
memory = 512
}

env {
RAPID_BUCKET_CACHE_BUCKET_NAME = "${bucket_name}"
REDIS_URL = "${redis_url}"
REDIS_CLUSTER_URL = "${redis_cluster_url}"
REDIS_TLS_CA_BASE64 = "${redis_tls_ca_base64}"
}

config {
command = "local/clean-rapid-cache"
args = [
"--dry-run=${dry_run}",
"--max-age=${max_age}",
"--max-deletions=${max_deletions}",
"${bucket_name}",
]
}

artifact {
source = "${artifact_source}"
destination = "local/clean-rapid-cache"
mode = "file"
}
}
}
}
25 changes: 24 additions & 1 deletion iac/provider-gcp/nomad/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -673,8 +673,15 @@ data "google_storage_bucket_object" "filestore_cleanup" {
bucket = var.fc_env_pipeline_bucket_name
}

data "google_storage_bucket_object" "rapid_cache_cleanup" {
count = var.rapid_bucket_cache_bucket_name != "" ? 1 : 0
name = "clean-rapid-cache"
bucket = var.fc_env_pipeline_bucket_name
}

locals {
clean_nfs_cache_artifact_source = "gcs::https://www.googleapis.com/storage/v1/${var.fc_env_pipeline_bucket_name}/clean-nfs-cache?version=${data.google_storage_bucket_object.filestore_cleanup.generation}"
clean_nfs_cache_artifact_source = "gcs::https://www.googleapis.com/storage/v1/${var.fc_env_pipeline_bucket_name}/clean-nfs-cache?version=${data.google_storage_bucket_object.filestore_cleanup.generation}"
clean_rapid_cache_artifact_source = var.rapid_bucket_cache_bucket_name != "" ? "gcs::https://www.googleapis.com/storage/v1/${var.fc_env_pipeline_bucket_name}/clean-rapid-cache?version=${data.google_storage_bucket_object.rapid_cache_cleanup[0].generation}" : ""
}

resource "nomad_job" "clean_nfs_cache" {
Expand All @@ -696,3 +703,19 @@ resource "nomad_job" "clean_nfs_cache" {
launch_darkly_api_key = trimspace(data.google_secret_manager_secret_version.launch_darkly_api_key.secret_data)
})
}

resource "nomad_job" "clean_rapid_cache" {
count = var.rapid_bucket_cache_bucket_name != "" ? 1 : 0

jobspec = templatefile("${path.module}/jobs/clean-rapid-cache.hcl", {
node_pool = var.builder_node_pool
artifact_source = local.clean_rapid_cache_artifact_source
bucket_name = var.rapid_bucket_cache_bucket_name
dry_run = var.rapid_bucket_cache_cleanup_dry_run
max_age = var.rapid_bucket_cache_cleanup_max_age
max_deletions = var.rapid_bucket_cache_cleanup_max_deletions
redis_url = local.redis_url
redis_cluster_url = local.redis_cluster_url
redis_tls_ca_base64 = trimspace(data.google_secret_manager_secret_version.redis_tls_ca_base64.secret_data)
})
}
20 changes: 20 additions & 0 deletions iac/provider-gcp/nomad/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -454,6 +454,26 @@ variable "shared_chunk_cache_path" {
default = ""
}

variable "rapid_bucket_cache_bucket_name" {
type = string
default = ""
}

variable "rapid_bucket_cache_cleanup_dry_run" {
type = bool
default = true
}

variable "rapid_bucket_cache_cleanup_max_age" {
type = string
default = "168h"
}

variable "rapid_bucket_cache_cleanup_max_deletions" {
type = number
default = 10000
}

variable "filestore_cache_cleanup_disk_usage_target" {
type = number
description = "The disk usage target for the Filestore cache in percent"
Expand Down
20 changes: 20 additions & 0 deletions iac/provider-gcp/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -835,6 +835,26 @@ variable "anywhere_cache_ttl" {
default = null
}

variable "rapid_bucket_cache_bucket_name" {
type = string
default = ""
}

variable "rapid_bucket_cache_cleanup_dry_run" {
type = bool
default = true
}

variable "rapid_bucket_cache_cleanup_max_age" {
type = string
default = "168h"
}

variable "rapid_bucket_cache_cleanup_max_deletions" {
type = number
default = 10000
}

variable "orchestrator_env_vars" {
type = map(string)
default = {}
Expand Down
1 change: 1 addition & 0 deletions packages/orchestrator/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -44,4 +44,5 @@ RUN --mount=type=cache,target=/root/.cache/go-build make build-local COMMIT_SHA=
FROM scratch

COPY --from=builder /build/orchestrator/bin/clean-nfs-cache .
COPY --from=builder /build/orchestrator/bin/clean-rapid-cache .
COPY --from=builder /build/orchestrator/bin/orchestrator .
13 changes: 13 additions & 0 deletions packages/orchestrator/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ build-local:
$(eval COMMIT_SHA ?= $(shell git rev-parse --short HEAD))
CGO_ENABLED=1 GOOS=linux GOARCH=$(BUILD_ARCH) go build -o bin/orchestrator -ldflags "-X=main.commitSHA=$(COMMIT_SHA)" .
CGO_ENABLED=1 GOOS=linux GOARCH=$(BUILD_ARCH) go build -o bin/clean-nfs-cache -ldflags "-X=main.commitSHA=$(COMMIT_SHA)" ./cmd/clean-nfs-cache
CGO_ENABLED=0 GOOS=linux GOARCH=$(BUILD_ARCH) go build -o bin/clean-rapid-cache ./cmd/clean-rapid-cache

.PHONY: build-debug
build-debug:
Expand Down Expand Up @@ -96,6 +97,15 @@ else
gsutil -h "Cache-Control:no-cache, max-age=0" cp ./bin/clean-nfs-cache "gs://${GCP_BUCKET_PREFIX}fc-env-pipeline/clean-nfs-cache"
endif

.PHONY: upload/clean-rapid-cache
upload/clean-rapid-cache:
chmod +x ./bin/clean-rapid-cache
ifeq ($(PROVIDER),aws)
@echo "clean-rapid-cache is GCP-only"
else
gsutil -h "Cache-Control:no-cache, max-age=0" cp ./bin/clean-rapid-cache "gs://${GCP_BUCKET_PREFIX}fc-env-pipeline/clean-rapid-cache"
endif

.PHONY: upload/orchestrator
upload/orchestrator:
chmod +x ./bin/orchestrator
Expand All @@ -117,6 +127,9 @@ endif
.PHONY: build-and-upload/clean-nfs-cache
build-and-upload/clean-nfs-cache: build upload/clean-nfs-cache

.PHONY: build-and-upload/clean-rapid-cache
build-and-upload/clean-rapid-cache: build upload/clean-rapid-cache

.PHONY: build-and-upload/orchestrator
build-and-upload/orchestrator: build upload/orchestrator

Expand Down
176 changes: 176 additions & 0 deletions packages/orchestrator/cmd/clean-rapid-cache/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
package main

import (
"context"
"errors"
"flag"
"fmt"
"log"
"os"
"time"

gcs "cloud.google.com/go/storage"
"cloud.google.com/go/storage/experimental"
"google.golang.org/api/iterator"

"github.com/e2b-dev/infra/packages/shared/pkg/factories"
"github.com/e2b-dev/infra/packages/shared/pkg/storage"
)

const defaultPrefix = "rapid-cache/"

func main() {
var (
prefix string
maxAge time.Duration
maxDeletions int
dryRun bool
)

flags := flag.NewFlagSet("clean-rapid-cache", flag.ExitOnError)
flags.StringVar(&prefix, "prefix", defaultPrefix, "cache object prefix")
flags.DurationVar(&maxAge, "max-age", 7*24*time.Hour, "delete objects older than this")
flags.IntVar(&maxDeletions, "max-deletions", 10000, "maximum objects to delete")
flags.BoolVar(&dryRun, "dry-run", true, "dry run")
if err := flags.Parse(os.Args[1:]); err != nil {
log.Fatal(err)
}

bucket := os.Getenv("RAPID_BUCKET_CACHE_BUCKET_NAME")
if flags.NArg() > 0 {
bucket = flags.Arg(0)
}
if bucket == "" {
log.Fatal("missing bucket")
}
if prefix == "" {
log.Fatal("missing prefix")
}
if maxAge <= 0 {
log.Fatal("max-age must be positive")
}
if maxDeletions <= 0 {
log.Fatal("max-deletions must be positive")
}

ctx := context.Background()
index, closeIndex := newRapidIndex(ctx, bucket)
err := clean(ctx, bucket, prefix, time.Now().Add(-maxAge), maxDeletions, dryRun, index)
closeIndex()
if err != nil {
log.Fatal(err)
}
}

func clean(ctx context.Context, bucket string, prefix string, cutoff time.Time, maxDeletions int, dryRun bool, index storage.RapidCacheIndex) error {
client, err := gcs.NewGRPCClient(ctx, experimental.WithZonalBucketAPIs())
if err != nil {
return fmt.Errorf("create storage client: %w", err)
}
defer func() {
_ = client.Close()
}()

deleted := cleanFromIndex(ctx, client, bucket, cutoff, maxDeletions, dryRun, index)

return cleanFromBucket(ctx, client, bucket, prefix, cutoff, maxDeletions-deleted, dryRun, index)
}

func cleanFromIndex(ctx context.Context, client *gcs.Client, bucket string, cutoff time.Time, maxDeletions int, dryRun bool, index storage.RapidCacheIndex) int {
candidates, err := index.Candidates(ctx, cutoff, int64(maxDeletions))
if err != nil || len(candidates) == 0 {
return 0
}

deleted := 0
for _, path := range candidates {
lastAccess, ok, err := index.LastAccess(ctx, path)
if err != nil || (ok && lastAccess >= cutoff.Unix()) {
continue
}
obj := client.Bucket(bucket).Object(path)
attrs, err := obj.Attrs(ctx)
if errors.Is(err, gcs.ErrObjectNotExist) {
if !dryRun {
_ = index.Evict(ctx, path, 0)
}

continue
}
if err != nil {
continue
}
if dryRun {
deleted++

continue
}
if err := obj.Delete(ctx); err != nil {
continue
}
_ = index.Evict(ctx, path, attrs.Size)
deleted++
Comment thread
ValentaTomas marked this conversation as resolved.
}

return deleted
}

func cleanFromBucket(ctx context.Context, client *gcs.Client, bucket string, prefix string, cutoff time.Time, maxDeletions int, dryRun bool, index storage.RapidCacheIndex) error {
if maxDeletions <= 0 {
return nil
}

var scanned, matched, deleted int
objects := client.Bucket(bucket).Objects(ctx, &gcs.Query{Prefix: prefix})
for {
attrs, err := objects.Next()
if errors.Is(err, iterator.Done) {
break
}
if err != nil {
return fmt.Errorf("list cache objects: %w", err)
}

scanned++
if !attrs.Updated.Before(cutoff) {
continue
}
lastAccess, ok, err := index.LastAccess(ctx, attrs.Name)
if err == nil && ok && lastAccess >= cutoff.Unix() {
continue
Comment thread
ValentaTomas marked this conversation as resolved.
}
matched++
if deleted >= maxDeletions {
break
}
if dryRun {
deleted++

continue
}
if err := client.Bucket(bucket).Object(attrs.Name).Delete(ctx); err != nil {
return fmt.Errorf("delete cache object: %w", err)
}
_ = index.Evict(ctx, attrs.Name, attrs.Size)
deleted++
}
Comment thread
ValentaTomas marked this conversation as resolved.

log.Printf("summary dry_run=%t scanned=%d matched=%d deleted=%d", dryRun, scanned, matched, deleted)

return nil
}

func newRapidIndex(ctx context.Context, bucket string) (storage.RapidCacheIndex, func()) {
redisClient, err := factories.NewRedisClient(ctx, factories.RedisConfig{
RedisURL: os.Getenv("REDIS_URL"),
RedisClusterURL: os.Getenv("REDIS_CLUSTER_URL"),
RedisTLSCABase64: os.Getenv("REDIS_TLS_CA_BASE64"),
})
if err != nil {
return storage.NoopRapidCacheIndex(), func() {}
}

return storage.NewRedisRapidCacheIndex(redisClient, bucket), func() {
_ = factories.CloseCleanly(redisClient)
}
}
Loading
Loading