Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
349 changes: 180 additions & 169 deletions packages/api/internal/api/api.gen.go

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions packages/api/internal/handlers/proxy_grpc.go
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,7 @@ func (s *SandboxService) ResumeSandbox(ctx context.Context, req *proxygrpc.Sandb
s.api.buildResumeSandboxData(sandboxID, nil),
&headers,
true,
false,
nil, // mcp
)
if apiErr != nil {
Expand Down
4 changes: 4 additions & 0 deletions packages/api/internal/handlers/sandbox.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ func (a *APIStore) startSandbox(
getSandboxData orchestrator.SandboxDataFetcher,
requestHeader *http.Header,
isResume bool,
rebootFromRootfs bool,
mcp api.Mcp,
) (*api.Sandbox, *api.APIError) {
sbx, apiErr := a.startSandboxInternal(
Expand All @@ -38,6 +39,7 @@ func (a *APIStore) startSandbox(
getSandboxData,
requestHeader,
isResume,
rebootFromRootfs,
mcp,
)
if apiErr != nil {
Expand All @@ -56,6 +58,7 @@ func (a *APIStore) startSandboxInternal(
getSandboxData orchestrator.SandboxDataFetcher,
requestHeader *http.Header,
isResume bool,
rebootFromRootfs bool,
mcp api.Mcp,
) (sandbox.Sandbox, *api.APIError) {
startTime := time.Now()
Expand All @@ -77,6 +80,7 @@ func (a *APIStore) startSandboxInternal(
timeout,
isResume,
creationMeta,
rebootFromRootfs,
)
if instanceErr != nil {
telemetry.ReportError(ctx, "error when creating instance", instanceErr.Err)
Expand Down
2 changes: 2 additions & 0 deletions packages/api/internal/handlers/sandbox_connect.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ func (a *APIStore) PostSandboxesSandboxIDConnect(c *gin.Context, sandboxID api.S

return
}
rebootFromRootfs := body.Reboot != nil && *body.Reboot

teamID := teamInfo.Team.ID

Expand Down Expand Up @@ -152,6 +153,7 @@ func (a *APIStore) PostSandboxesSandboxIDConnect(c *gin.Context, sandboxID api.S
a.buildResumeSandboxData(sandboxID, nil),
&c.Request.Header,
true,
rebootFromRootfs,
nil, // mcp
)
if createErr != nil {
Expand Down
1 change: 1 addition & 0 deletions packages/api/internal/handlers/sandbox_create.go
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,7 @@ func (a *APIStore) PostSandboxes(c *gin.Context) {
getSandboxData,
&c.Request.Header,
false,
false,
mcp,
)
if createErr != nil {
Expand Down
20 changes: 19 additions & 1 deletion packages/api/internal/handlers/sandbox_pause.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,15 @@ import (
"github.com/e2b-dev/infra/packages/api/internal/sandbox"
"github.com/e2b-dev/infra/packages/api/internal/utils"
"github.com/e2b-dev/infra/packages/auth/pkg/auth"
"github.com/e2b-dev/infra/packages/shared/pkg/ginutils"
"github.com/e2b-dev/infra/packages/shared/pkg/logger"
"github.com/e2b-dev/infra/packages/shared/pkg/telemetry"
)

type pauseSandboxBody struct {
Memory *bool `json:"memory,omitempty"`
}

func (a *APIStore) PostSandboxesSandboxIDPause(c *gin.Context, sandboxID api.SandboxID) {
ctx := c.Request.Context()
// Get team from context, use TeamContextKey
Expand All @@ -42,9 +47,22 @@ func (a *APIStore) PostSandboxesSandboxIDPause(c *gin.Context, sandboxID api.San
traceID := span.SpanContext().TraceID().String()
c.Set("traceID", traceID)

memory := true
if c.Request.ContentLength != 0 {
body, err := ginutils.ParseBody[pauseSandboxBody](ctx, c)
if err != nil {
a.sendAPIStoreError(c, http.StatusBadRequest, fmt.Sprintf("Error when parsing request: %s", err))

return
}
if body.Memory != nil {
memory = *body.Memory
}
}

pause.LogInitiated(ctx, sandboxID, teamID.String(), pause.ReasonRequest)

err = a.orchestrator.RemoveSandbox(ctx, teamID, sandboxID, sandbox.RemoveOpts{Action: sandbox.StateActionPause})
err = a.orchestrator.RemoveSandbox(ctx, teamID, sandboxID, sandbox.RemoveOpts{Action: sandbox.StateActionPause, SkipMemory: !memory})
var transErr *sandbox.InvalidStateTransitionError

switch {
Expand Down
2 changes: 2 additions & 0 deletions packages/api/internal/handlers/sandbox_resume.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ func (a *APIStore) PostSandboxesSandboxIDResume(c *gin.Context, sandboxID api.Sa
}

telemetry.ReportEvent(ctx, "Parsed body")
rebootFromRootfs := body.Reboot != nil && *body.Reboot

timeout := sandbox.SandboxTimeoutDefault
if body.Timeout != nil {
Expand Down Expand Up @@ -160,6 +161,7 @@ func (a *APIStore) PostSandboxesSandboxIDResume(c *gin.Context, sandboxID api.Sa
a.buildResumeSandboxData(sandboxID, body.AutoPause),
&c.Request.Header,
true,
rebootFromRootfs,
nil, // mcp
)
if createErr != nil {
Expand Down
3 changes: 3 additions & 0 deletions packages/api/internal/handlers/snapshot_template_create.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ func (a *APIStore) PostSandboxesSandboxIDSnapshots(c *gin.Context, sandboxID api
opts := orchestrator.SnapshotTemplateOpts{
Tag: id.DefaultTag,
}
if body.Memory != nil {
opts.SkipMemory = !*body.Memory
}

if body.Name != nil {
identifier, tag, err := id.ParseName(*body.Name)
Expand Down
16 changes: 10 additions & 6 deletions packages/api/internal/orchestrator/create_instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/metric"
"go.uber.org/zap"
"google.golang.org/grpc/metadata"
"google.golang.org/protobuf/types/known/timestamppb"

"github.com/e2b-dev/infra/packages/api/internal/api"
Expand Down Expand Up @@ -127,6 +128,7 @@ func (o *Orchestrator) CreateSandbox(
timeout time.Duration,
isResume bool,
creationMeta sandbox.CreationMetadata,
rebootFromRootfsOpt ...bool,
) (sbx sandbox.Sandbox, apiErr *api.APIError) {
ctx, childSpan := tracer.Start(ctx, "create-sandbox")
defer childSpan.End()
Expand Down Expand Up @@ -252,6 +254,10 @@ func (o *Orchestrator) CreateSandbox(
TimeoutSeconds: sbxData.AutoResume.Timeout,
}
}
rebootFromRootfs := len(rebootFromRootfsOpt) > 0 && rebootFromRootfsOpt[0]
if rebootFromRootfs {
ctx = metadata.AppendToOutgoingContext(ctx, orchestrator.SandboxRebootFromRootfsGRPCMetadataKey, "true")
}

sbxRequest := &orchestrator.SandboxCreateRequest{
Sandbox: &orchestrator.SandboxConfig{
Expand Down Expand Up @@ -365,12 +371,10 @@ func (o *Orchestrator) CreateSandbox(
// Copy to a new variable to avoid race conditions
sbxToRemove := sbx
go func() {
killErr := o.removeSandboxFromNode(
context.WithoutCancel(ctx),
sbxToRemove,
sandbox.StateActionKill,
sandbox.KillReasonUnknown,
)
killErr := o.removeSandboxFromNode(context.WithoutCancel(ctx), sbxToRemove, sandbox.RemoveOpts{
Action: sandbox.StateActionKill,
Reason: sandbox.KillReasonUnknown,
})
if killErr != nil {
logger.L().Error(ctx, "Error removing sandbox",
zap.Error(killErr),
Expand Down
25 changes: 10 additions & 15 deletions packages/api/internal/orchestrator/delete_instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ func (o *Orchestrator) RemoveSandbox(ctx context.Context, teamID uuid.UUID, sand
defer func() { go o.analyticsRemove(context.WithoutCancel(ctx), sbx, opts.Action) }()
// Once we start the removal process, we want to make sure it gets removed from the store
defer o.sandboxStore.Remove(context.WithoutCancel(ctx), teamID, sandboxID)
err = o.removeSandboxFromNode(ctx, sbx, opts.Action, opts.Reason)
err = o.removeSandboxFromNode(ctx, sbx, opts)
if err != nil {
fields := []zap.Field{
zap.String("state_action", opts.Action.Name),
Expand All @@ -123,12 +123,7 @@ func (o *Orchestrator) RemoveSandbox(ctx context.Context, teamID uuid.UUID, sand
return nil
}

func (o *Orchestrator) removeSandboxFromNode(
ctx context.Context,
sbx sandbox.Sandbox,
stateAction sandbox.StateAction,
reason sandbox.KillReason,
) error {
func (o *Orchestrator) removeSandboxFromNode(ctx context.Context, sbx sandbox.Sandbox, opts sandbox.RemoveOpts) error {
ctx, span := tracer.Start(ctx, "remove-sandbox-from-node")
defer span.End()

Expand All @@ -137,8 +132,8 @@ func (o *Orchestrator) removeSandboxFromNode(
fields := []zap.Field{
logger.WithNodeID(sbx.NodeID),
}
if stateAction == sandbox.StateActionKill {
fields = append(fields, zap.String("kill_reason", reason.String()))
if opts.Action == sandbox.StateActionKill {
fields = append(fields, zap.String("kill_reason", opts.Reason.String()))
}

logger.L().Error(ctx, "failed to get node", fields...)
Expand All @@ -156,8 +151,8 @@ func (o *Orchestrator) removeSandboxFromNode(
zap.Error(err),
logger.WithSandboxID(sbx.SandboxID),
}
if stateAction == sandbox.StateActionKill {
fields = append(fields, zap.String("kill_reason", reason.String()))
if opts.Action == sandbox.StateActionKill {
fields = append(fields, zap.String("kill_reason", opts.Reason.String()))
}

logger.L().Error(ctx, "error removing routing record from catalog", fields...)
Expand All @@ -166,12 +161,12 @@ func (o *Orchestrator) removeSandboxFromNode(

sbxlogger.I(sbx).Debug(ctx, "Removing sandbox",
zap.Bool("auto_pause", sbx.AutoPause),
zap.String("state_action", stateAction.Name),
zap.String("state_action", opts.Action.Name),
)

switch stateAction {
switch opts.Action {
case sandbox.StateActionPause:
err := o.pauseSandbox(ctx, node, sbx)
err := o.pauseSandbox(ctx, node, sbx, opts.SkipMemory)
if err != nil {
if dberrors.IsForeignKeyViolation(err) {
killErr := o.killSandboxOnNode(ctx, node, sbx, sandbox.KillReasonBaseTemplateMissing)
Expand All @@ -191,7 +186,7 @@ func (o *Orchestrator) removeSandboxFromNode(

return nil
case sandbox.StateActionKill:
return o.killSandboxOnNode(ctx, node, sbx, reason)
return o.killSandboxOnNode(ctx, node, sbx, opts.Reason)
}

return nil
Expand Down
15 changes: 9 additions & 6 deletions packages/api/internal/orchestrator/pause_instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
"github.com/google/uuid"
"github.com/jackc/pgx/v5/pgtype"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/metadata"

"github.com/e2b-dev/infra/packages/api/internal/orchestrator/nodemanager"
"github.com/e2b-dev/infra/packages/api/internal/sandbox"
Expand All @@ -26,18 +27,18 @@
return "The pause queue is exhausted"
}

func (o *Orchestrator) pauseSandbox(ctx context.Context, node *nodemanager.Node, sbx sandbox.Sandbox) error {
func (o *Orchestrator) pauseSandbox(ctx context.Context, node *nodemanager.Node, sbx sandbox.Sandbox, skipMemory bool) error {
ctx, span := tracer.Start(ctx, "pause-sandbox")
defer span.End()

result, err := o.throttledUpsertSnapshot(ctx, buildUpsertSnapshotParams(sbx, node))
result, err := o.throttledUpsertSnapshot(ctx, buildUpsertSnapshotParams(sbx, node, skipMemory))
if err != nil {
telemetry.ReportCriticalError(ctx, "error inserting snapshot for env", err)

return err
}

err = snapshotInstance(ctx, node, sbx, result.TemplateID, result.BuildID.String())
err = snapshotInstance(ctx, node, sbx, result.TemplateID, result.BuildID.String(), skipMemory)
if errors.Is(err, PauseQueueExhaustedError{}) {
telemetry.ReportCriticalError(ctx, "pause queue exhausted", err)

Expand Down Expand Up @@ -68,11 +69,14 @@
return nil
}

func snapshotInstance(ctx context.Context, node *nodemanager.Node, sbx sandbox.Sandbox, templateID, buildID string) error {
func snapshotInstance(ctx context.Context, node *nodemanager.Node, sbx sandbox.Sandbox, templateID, buildID string, skipMemory bool) error {
childCtx, childSpan := tracer.Start(ctx, "snapshot-instance")
defer childSpan.End()

client, childCtx := node.GetSandboxDeleteCtx(childCtx, sbx.SandboxID, sbx.ExecutionID)
if skipMemory {
childCtx = metadata.AppendToOutgoingContext(childCtx, orchestrator.SandboxMemorySnapshotGRPCMetadataKey, "false")
}
_, err := client.Sandbox.Pause(
childCtx, &orchestrator.SandboxPauseRequest{
SandboxId: sbx.SandboxID,
Expand Down Expand Up @@ -103,15 +107,14 @@
return o.sandboxStore.WaitForStateChange(ctx, teamID, sandboxID)
}

func buildUpsertSnapshotParams(sbx sandbox.Sandbox, node *nodemanager.Node) queries.UpsertSnapshotParams {
func buildUpsertSnapshotParams(sbx sandbox.Sandbox, node *nodemanager.Node, skipMemory bool) queries.UpsertSnapshotParams {

Check failure on line 110 in packages/api/internal/orchestrator/pause_instance.go

View workflow job for this annotation

GitHub Actions / lint / golangci-lint (/home/runner/work/infra/infra/packages/api)

unused-parameter: parameter 'skipMemory' seems to be unused, consider removing or renaming it as _ (revive)
machineInfo := node.MachineInfo()

metadata := types.JSONBStringMap(sbx.Metadata)
if metadata == nil {
metadata = types.JSONBStringMap{}
}

return queries.UpsertSnapshotParams{

Check failure on line 117 in packages/api/internal/orchestrator/pause_instance.go

View workflow job for this annotation

GitHub Actions / lint / golangci-lint (/home/runner/work/infra/infra/packages/api)

return with no blank line before (nlreturn)
// Used if there's no snapshot for this sandbox yet
TemplateID: id.Generate(),
TeamID: sbx.TeamID,
Expand Down
8 changes: 7 additions & 1 deletion packages/api/internal/orchestrator/snapshot_template.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"time"

"github.com/google/uuid"
"google.golang.org/grpc/metadata"

"github.com/e2b-dev/infra/packages/api/internal/sandbox"
"github.com/e2b-dev/infra/packages/db/pkg/types"
Expand All @@ -30,6 +31,8 @@ type SnapshotTemplateOpts struct {
Namespace *string
// Tag is the build tag parsed from the name, defaults to "default".
Tag string
// SkipMemory controls whether the snapshot template should omit VM memory.
SkipMemory bool
}

// CreateSnapshotTemplate creates a persistent snapshot template from a running sandbox and immediately resumes it.
Expand Down Expand Up @@ -67,7 +70,7 @@ func (o *Orchestrator) CreateSnapshotTemplate(ctx context.Context, teamID uuid.U
return SnapshotTemplateResult{}, fmt.Errorf("node '%s' not found", sbx.NodeID)
}

upsertResult, err := o.throttledUpsertSnapshot(ctx, buildUpsertSnapshotParams(sbx, node))
upsertResult, err := o.throttledUpsertSnapshot(ctx, buildUpsertSnapshotParams(sbx, node, opts.SkipMemory))
if err != nil {
return SnapshotTemplateResult{}, fmt.Errorf("error upserting snapshot: %w", err)
}
Expand All @@ -84,6 +87,9 @@ func (o *Orchestrator) CreateSnapshotTemplate(ctx context.Context, teamID uuid.U
// kills the sandbox itself; RemoveSandbox is still needed to clean up
// API-side state (store, routing, analytics).
client, childCtx := node.GetClient(ctx)
if opts.SkipMemory {
childCtx = metadata.AppendToOutgoingContext(childCtx, orchestrator.SandboxMemorySnapshotGRPCMetadataKey, "false")
}
_, err = client.Sandbox.Checkpoint(childCtx, &orchestrator.SandboxCheckpointRequest{
SandboxId: sbx.SandboxID,
BuildId: upsertResult.BuildID.String(),
Expand Down
7 changes: 4 additions & 3 deletions packages/api/internal/sandbox/sandboxtypes/states.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,10 @@ func (r KillReason) String() string {

// RemoveOpts bundles the parameters that control sandbox removal.
type RemoveOpts struct {
Action StateAction
Eviction bool
Reason KillReason
Action StateAction
Eviction bool
SkipMemory bool
Reason KillReason
}

var AllowedTransitions = map[State]map[State]bool{
Expand Down
Loading
Loading