Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
349 changes: 180 additions & 169 deletions packages/api/internal/api/api.gen.go

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions packages/api/internal/handlers/proxy_grpc.go
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,7 @@ func (s *SandboxService) ResumeSandbox(ctx context.Context, req *proxygrpc.Sandb
s.api.buildResumeSandboxData(sandboxID, nil),
&headers,
true,
false,
nil, // mcp
)
if apiErr != nil {
Expand Down
4 changes: 4 additions & 0 deletions packages/api/internal/handlers/sandbox.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ func (a *APIStore) startSandbox(
getSandboxData orchestrator.SandboxDataFetcher,
requestHeader *http.Header,
isResume bool,
rebootFromRootfs bool,
mcp api.Mcp,
) (*api.Sandbox, *api.APIError) {
sbx, apiErr := a.startSandboxInternal(
Expand All @@ -38,6 +39,7 @@ func (a *APIStore) startSandbox(
getSandboxData,
requestHeader,
isResume,
rebootFromRootfs,
mcp,
)
if apiErr != nil {
Expand All @@ -56,6 +58,7 @@ func (a *APIStore) startSandboxInternal(
getSandboxData orchestrator.SandboxDataFetcher,
requestHeader *http.Header,
isResume bool,
rebootFromRootfs bool,
mcp api.Mcp,
) (sandbox.Sandbox, *api.APIError) {
startTime := time.Now()
Expand All @@ -77,6 +80,7 @@ func (a *APIStore) startSandboxInternal(
timeout,
isResume,
creationMeta,
rebootFromRootfs,
)
if instanceErr != nil {
telemetry.ReportError(ctx, "error when creating instance", instanceErr.Err)
Expand Down
2 changes: 2 additions & 0 deletions packages/api/internal/handlers/sandbox_connect.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ func (a *APIStore) PostSandboxesSandboxIDConnect(c *gin.Context, sandboxID api.S

return
}
rebootFromRootfs := body.Reboot != nil && *body.Reboot

teamID := teamInfo.Team.ID

Expand Down Expand Up @@ -152,6 +153,7 @@ func (a *APIStore) PostSandboxesSandboxIDConnect(c *gin.Context, sandboxID api.S
a.buildResumeSandboxData(sandboxID, nil),
&c.Request.Header,
true,
rebootFromRootfs,
nil, // mcp
)
if createErr != nil {
Expand Down
1 change: 1 addition & 0 deletions packages/api/internal/handlers/sandbox_create.go
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,7 @@ func (a *APIStore) PostSandboxes(c *gin.Context) {
getSandboxData,
&c.Request.Header,
false,
false,
mcp,
)
if createErr != nil {
Expand Down
20 changes: 19 additions & 1 deletion packages/api/internal/handlers/sandbox_pause.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,15 @@ import (
"github.com/e2b-dev/infra/packages/api/internal/sandbox"
"github.com/e2b-dev/infra/packages/api/internal/utils"
"github.com/e2b-dev/infra/packages/auth/pkg/auth"
"github.com/e2b-dev/infra/packages/shared/pkg/ginutils"
"github.com/e2b-dev/infra/packages/shared/pkg/logger"
"github.com/e2b-dev/infra/packages/shared/pkg/telemetry"
)

type pauseSandboxBody struct {
Memory *bool `json:"memory,omitempty"`
}

func (a *APIStore) PostSandboxesSandboxIDPause(c *gin.Context, sandboxID api.SandboxID) {
ctx := c.Request.Context()
// Get team from context, use TeamContextKey
Expand All @@ -42,9 +47,22 @@ func (a *APIStore) PostSandboxesSandboxIDPause(c *gin.Context, sandboxID api.San
traceID := span.SpanContext().TraceID().String()
c.Set("traceID", traceID)

memory := true
if c.Request.ContentLength != 0 {
body, err := ginutils.ParseBody[pauseSandboxBody](ctx, c)
if err != nil {
a.sendAPIStoreError(c, http.StatusBadRequest, fmt.Sprintf("Error when parsing request: %s", err))

return
}
if body.Memory != nil {
memory = *body.Memory
}
}

pause.LogInitiated(ctx, sandboxID, teamID.String(), pause.ReasonRequest)

err = a.orchestrator.RemoveSandbox(ctx, teamID, sandboxID, sandbox.RemoveOpts{Action: sandbox.StateActionPause})
err = a.orchestrator.RemoveSandbox(ctx, teamID, sandboxID, sandbox.RemoveOpts{Action: sandbox.StateActionPause, SkipMemory: !memory})
var transErr *sandbox.InvalidStateTransitionError

switch {
Expand Down
2 changes: 2 additions & 0 deletions packages/api/internal/handlers/sandbox_resume.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ func (a *APIStore) PostSandboxesSandboxIDResume(c *gin.Context, sandboxID api.Sa
}

telemetry.ReportEvent(ctx, "Parsed body")
rebootFromRootfs := body.Reboot != nil && *body.Reboot

timeout := sandbox.SandboxTimeoutDefault
if body.Timeout != nil {
Expand Down Expand Up @@ -160,6 +161,7 @@ func (a *APIStore) PostSandboxesSandboxIDResume(c *gin.Context, sandboxID api.Sa
a.buildResumeSandboxData(sandboxID, body.AutoPause),
&c.Request.Header,
true,
rebootFromRootfs,
nil, // mcp
)
if createErr != nil {
Expand Down
3 changes: 3 additions & 0 deletions packages/api/internal/handlers/snapshot_template_create.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ func (a *APIStore) PostSandboxesSandboxIDSnapshots(c *gin.Context, sandboxID api
opts := orchestrator.SnapshotTemplateOpts{
Tag: id.DefaultTag,
}
if body.Memory != nil {
opts.SkipMemory = !*body.Memory
}

if body.Name != nil {
identifier, tag, err := id.ParseName(*body.Name)
Expand Down
16 changes: 10 additions & 6 deletions packages/api/internal/orchestrator/create_instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/metric"
"go.uber.org/zap"
"google.golang.org/grpc/metadata"
"google.golang.org/protobuf/types/known/timestamppb"

"github.com/e2b-dev/infra/packages/api/internal/api"
Expand Down Expand Up @@ -127,6 +128,7 @@ func (o *Orchestrator) CreateSandbox(
timeout time.Duration,
isResume bool,
creationMeta sandbox.CreationMetadata,
rebootFromRootfsOpt ...bool,
) (sbx sandbox.Sandbox, apiErr *api.APIError) {
ctx, childSpan := tracer.Start(ctx, "create-sandbox")
defer childSpan.End()
Expand Down Expand Up @@ -252,6 +254,10 @@ func (o *Orchestrator) CreateSandbox(
TimeoutSeconds: sbxData.AutoResume.Timeout,
}
}
rebootFromRootfs := len(rebootFromRootfsOpt) > 0 && rebootFromRootfsOpt[0]
if rebootFromRootfs {
ctx = metadata.AppendToOutgoingContext(ctx, orchestrator.SandboxRebootFromRootfsGRPCMetadataKey, "true")
}

sbxRequest := &orchestrator.SandboxCreateRequest{
Sandbox: &orchestrator.SandboxConfig{
Expand Down Expand Up @@ -365,12 +371,10 @@ func (o *Orchestrator) CreateSandbox(
// Copy to a new variable to avoid race conditions
sbxToRemove := sbx
go func() {
killErr := o.removeSandboxFromNode(
context.WithoutCancel(ctx),
sbxToRemove,
sandbox.StateActionKill,
sandbox.KillReasonUnknown,
)
killErr := o.removeSandboxFromNode(context.WithoutCancel(ctx), sbxToRemove, sandbox.RemoveOpts{
Action: sandbox.StateActionKill,
Reason: sandbox.KillReasonUnknown,
})
if killErr != nil {
logger.L().Error(ctx, "Error removing sandbox",
zap.Error(killErr),
Expand Down
25 changes: 10 additions & 15 deletions packages/api/internal/orchestrator/delete_instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ func (o *Orchestrator) RemoveSandbox(ctx context.Context, teamID uuid.UUID, sand
defer func() { go o.analyticsRemove(context.WithoutCancel(ctx), sbx, opts.Action) }()
// Once we start the removal process, we want to make sure it gets removed from the store
defer o.sandboxStore.Remove(context.WithoutCancel(ctx), teamID, sandboxID)
err = o.removeSandboxFromNode(ctx, sbx, opts.Action, opts.Reason)
err = o.removeSandboxFromNode(ctx, sbx, opts)
if err != nil {
fields := []zap.Field{
zap.String("state_action", opts.Action.Name),
Expand All @@ -123,12 +123,7 @@ func (o *Orchestrator) RemoveSandbox(ctx context.Context, teamID uuid.UUID, sand
return nil
}

func (o *Orchestrator) removeSandboxFromNode(
ctx context.Context,
sbx sandbox.Sandbox,
stateAction sandbox.StateAction,
reason sandbox.KillReason,
) error {
func (o *Orchestrator) removeSandboxFromNode(ctx context.Context, sbx sandbox.Sandbox, opts sandbox.RemoveOpts) error {
ctx, span := tracer.Start(ctx, "remove-sandbox-from-node")
defer span.End()

Expand All @@ -137,8 +132,8 @@ func (o *Orchestrator) removeSandboxFromNode(
fields := []zap.Field{
logger.WithNodeID(sbx.NodeID),
}
if stateAction == sandbox.StateActionKill {
fields = append(fields, zap.String("kill_reason", reason.String()))
if opts.Action == sandbox.StateActionKill {
fields = append(fields, zap.String("kill_reason", opts.Reason.String()))
}

logger.L().Error(ctx, "failed to get node", fields...)
Expand All @@ -156,8 +151,8 @@ func (o *Orchestrator) removeSandboxFromNode(
zap.Error(err),
logger.WithSandboxID(sbx.SandboxID),
}
if stateAction == sandbox.StateActionKill {
fields = append(fields, zap.String("kill_reason", reason.String()))
if opts.Action == sandbox.StateActionKill {
fields = append(fields, zap.String("kill_reason", opts.Reason.String()))
}

logger.L().Error(ctx, "error removing routing record from catalog", fields...)
Expand All @@ -166,12 +161,12 @@ func (o *Orchestrator) removeSandboxFromNode(

sbxlogger.I(sbx).Debug(ctx, "Removing sandbox",
zap.Bool("auto_pause", sbx.AutoPause),
zap.String("state_action", stateAction.Name),
zap.String("state_action", opts.Action.Name),
)

switch stateAction {
switch opts.Action {
case sandbox.StateActionPause:
err := o.pauseSandbox(ctx, node, sbx)
err := o.pauseSandbox(ctx, node, sbx, opts.SkipMemory)
if err != nil {
if dberrors.IsForeignKeyViolation(err) {
killErr := o.killSandboxOnNode(ctx, node, sbx, sandbox.KillReasonBaseTemplateMissing)
Expand All @@ -191,7 +186,7 @@ func (o *Orchestrator) removeSandboxFromNode(

return nil
case sandbox.StateActionKill:
return o.killSandboxOnNode(ctx, node, sbx, reason)
return o.killSandboxOnNode(ctx, node, sbx, opts.Reason)
}

return nil
Expand Down
10 changes: 7 additions & 3 deletions packages/api/internal/orchestrator/pause_instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"github.com/google/uuid"
"github.com/jackc/pgx/v5/pgtype"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/metadata"

"github.com/e2b-dev/infra/packages/api/internal/orchestrator/nodemanager"
"github.com/e2b-dev/infra/packages/api/internal/sandbox"
Expand All @@ -26,7 +27,7 @@ func (PauseQueueExhaustedError) Error() string {
return "The pause queue is exhausted"
}

func (o *Orchestrator) pauseSandbox(ctx context.Context, node *nodemanager.Node, sbx sandbox.Sandbox) error {
func (o *Orchestrator) pauseSandbox(ctx context.Context, node *nodemanager.Node, sbx sandbox.Sandbox, skipMemory bool) error {
ctx, span := tracer.Start(ctx, "pause-sandbox")
defer span.End()

Expand All @@ -37,7 +38,7 @@ func (o *Orchestrator) pauseSandbox(ctx context.Context, node *nodemanager.Node,
return err
}

err = snapshotInstance(ctx, node, sbx, result.TemplateID, result.BuildID.String())
err = snapshotInstance(ctx, node, sbx, result.TemplateID, result.BuildID.String(), skipMemory)
if errors.Is(err, PauseQueueExhaustedError{}) {
telemetry.ReportCriticalError(ctx, "pause queue exhausted", err)

Expand Down Expand Up @@ -68,11 +69,14 @@ func (o *Orchestrator) pauseSandbox(ctx context.Context, node *nodemanager.Node,
return nil
}

func snapshotInstance(ctx context.Context, node *nodemanager.Node, sbx sandbox.Sandbox, templateID, buildID string) error {
func snapshotInstance(ctx context.Context, node *nodemanager.Node, sbx sandbox.Sandbox, templateID, buildID string, skipMemory bool) error {
childCtx, childSpan := tracer.Start(ctx, "snapshot-instance")
defer childSpan.End()

client, childCtx := node.GetSandboxDeleteCtx(childCtx, sbx.SandboxID, sbx.ExecutionID)
if skipMemory {
childCtx = metadata.AppendToOutgoingContext(childCtx, orchestrator.SandboxMemorySnapshotGRPCMetadataKey, "false")
}
_, err := client.Sandbox.Pause(
childCtx, &orchestrator.SandboxPauseRequest{
SandboxId: sbx.SandboxID,
Expand Down
6 changes: 6 additions & 0 deletions packages/api/internal/orchestrator/snapshot_template.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"time"

"github.com/google/uuid"
"google.golang.org/grpc/metadata"

"github.com/e2b-dev/infra/packages/api/internal/sandbox"
"github.com/e2b-dev/infra/packages/db/pkg/types"
Expand All @@ -30,6 +31,8 @@ type SnapshotTemplateOpts struct {
Namespace *string
// Tag is the build tag parsed from the name, defaults to "default".
Tag string
// SkipMemory controls whether the snapshot template should omit VM memory.
SkipMemory bool
}

// CreateSnapshotTemplate creates a persistent snapshot template from a running sandbox and immediately resumes it.
Expand Down Expand Up @@ -84,6 +87,9 @@ func (o *Orchestrator) CreateSnapshotTemplate(ctx context.Context, teamID uuid.U
// kills the sandbox itself; RemoveSandbox is still needed to clean up
// API-side state (store, routing, analytics).
client, childCtx := node.GetClient(ctx)
if opts.SkipMemory {
childCtx = metadata.AppendToOutgoingContext(childCtx, orchestrator.SandboxMemorySnapshotGRPCMetadataKey, "false")
}
_, err = client.Sandbox.Checkpoint(childCtx, &orchestrator.SandboxCheckpointRequest{
SandboxId: sbx.SandboxID,
BuildId: upsertResult.BuildID.String(),
Expand Down
7 changes: 4 additions & 3 deletions packages/api/internal/sandbox/sandboxtypes/states.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,10 @@ func (r KillReason) String() string {

// RemoveOpts bundles the parameters that control sandbox removal.
type RemoveOpts struct {
Action StateAction
Eviction bool
Reason KillReason
Action StateAction
Eviction bool
Reason KillReason
SkipMemory bool
}

var AllowedTransitions = map[State]map[State]bool{
Expand Down
11 changes: 8 additions & 3 deletions packages/orchestrator/pkg/sandbox/build_upload.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,14 @@ func NewUpload(
useCase string,
objectMetadata storage.ObjectMetadata,
) (*Upload, error) {
mem, memV4, err := resolveCompressConfig(ctx, cfg, ff, storage.MemfileName, snap.MemfileBlockSize, useCase)
if err != nil {
return nil, fmt.Errorf("resolve memfile compress config: %w", err)
var mem storage.CompressConfig
var memV4 bool
if snap.MemorySnapshot {
var err error
mem, memV4, err = resolveCompressConfig(ctx, cfg, ff, storage.MemfileName, snap.MemfileBlockSize, useCase)
if err != nil {
return nil, fmt.Errorf("resolve memfile compress config: %w", err)
}
}
root, rootV4, err := resolveCompressConfig(ctx, cfg, ff, storage.RootfsName, snap.RootfsBlockSize, useCase)
if err != nil {
Expand Down
39 changes: 39 additions & 0 deletions packages/orchestrator/pkg/sandbox/build_upload_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,16 @@
package sandbox

import (
"os"
"testing"

"github.com/google/uuid"
"github.com/launchdarkly/go-server-sdk/v7/testhelpers/ldtestdata"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/require"

"github.com/e2b-dev/infra/packages/orchestrator/pkg/sandbox/build"
sbxtemplate "github.com/e2b-dev/infra/packages/orchestrator/pkg/sandbox/template"
"github.com/e2b-dev/infra/packages/shared/pkg/featureflags"
"github.com/e2b-dev/infra/packages/shared/pkg/storage"
)
Expand Down Expand Up @@ -55,3 +60,37 @@ func TestResolveCompressConfig_V4_FlagOn(t *testing.T) {
ff := newV4HeaderFF(t, true)
require.True(t, resolveV4(t, ff))
}

func TestUploadRunV3MemorylessSkipsMemoryArtifacts(t *testing.T) {
t.Parallel()

buildID := uuid.New()
metaPath := t.TempDir() + "/metadata.json"
require.NoError(t, os.WriteFile(metaPath, []byte("{}"), 0o644))

store := storage.NewMockStorageProvider(t)
metadataBlob := storage.NewMockBlob(t)
store.EXPECT().
OpenBlob(mock.Anything, mock.MatchedBy(func(path string) bool {
return path == (storage.Paths{BuildID: buildID.String()}).Metadata()
}), storage.MetadataObjectType).
Return(metadataBlob, nil)
metadataBlob.EXPECT().Put(mock.Anything, []byte("{}"), mock.Anything).Return(nil)

upload := &Upload{
buildID: buildID,
snap: &Snapshot{
BuildID: buildID,
MemorySnapshot: false,
MemfileDiff: &build.NoDiff{},
MemfileDiffHeader: NewResolvedDiffHeader(nil),
RootfsDiff: &build.NoDiff{},
RootfsDiffHeader: NewResolvedDiffHeader(nil),
Metafile: sbxtemplate.NewLocalFileLink(metaPath),
},
paths: storage.Paths{BuildID: buildID.String()},
store: store,
}

require.NoError(t, upload.runV3(t.Context()))
}
Loading
Loading