Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 66 additions & 12 deletions cmd/kubesolo/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ var (
containerdReadyCh = make(chan struct{})
kineReadyCh = make(chan struct{})
apiServerReadyCh = make(chan struct{})
coreDNSReadyCh = make(chan struct{})
kubeletReadyCh = make(chan struct{})
controllerReadyCh = make(chan struct{})
kubeproxyReadyCh = make(chan struct{})
Expand Down Expand Up @@ -166,6 +167,18 @@ func (s *kubesolo) run() {
},
readyCh: apiServerReadyCh,
},
{
name: "coredns",
start: func() {
coreDNSService := coredns.NewService(ctx, cancel, coreDNSReadyCh, apiServerReadyCh, s.embedded)
s.wg.Go(func() {
if err := coreDNSService.Run(); err != nil {
log.Error().Str("component", "coredns").Err(err).Msg("coredns exited with error")
}
})
},
readyCh: coreDNSReadyCh,
},
{
name: "controller",
start: func() {
Expand Down Expand Up @@ -206,11 +219,6 @@ func (s *kubesolo) run() {
}
}

log.Info().Str("component", "kubesolo").Msg("deploying coredns...")
if err := coredns.Deploy(s.embedded.AdminKubeconfigFile); err != nil {
log.Fatal().Err(err).Msg("failed to deploy coredns")
}

if s.localStorage {
log.Info().Str("component", "kubesolo").Msg("deploying local path...")
if err := localpath.Deploy(s.embedded.AdminKubeconfigFile, s.embedded.LocalPathStorageDir, s.localStorageSharedPath); err != nil {
Expand Down Expand Up @@ -239,6 +247,47 @@ func (s *kubesolo) run() {
log.Info().Str("component", "kubesolo").Msg("all services have shutdown gracefully")
}

// cleanStaleState removes stale runtime artifacts from a previous run.
// After a reboot, the old container is gone but stale containerd metadata,
// sockets, and runtime state remain on the persistent volume. The containerd
// metadata DB (meta.db) retains references to EXITED containers, causing
// kubelet to fail pod synchronization on restart.
//
// Strategy: remove everything in the containerd directory except the embedded
// image archives (images/). These are re-imported by importImages() on every
// startup, so no data is lost. This gives containerd a clean slate while
// preserving the kine database (Kubernetes state) and PKI certificates.
func cleanStaleState(basePath string) {
// Stale system containerd socket symlink
if err := os.Remove(types.DefaultSystemContainerdSock); err == nil {
log.Info().Str("component", "kubesolo").Msgf("removed stale system containerd socket: %s", types.DefaultSystemContainerdSock)
}

// Clean all containerd subdirectories except images/ (embedded tar archives)
containerdDir := filepath.Join(basePath, types.DefaultContainerdDir)
entries, err := os.ReadDir(containerdDir)
if err != nil {
return
}

for _, entry := range entries {
name := entry.Name()
// Preserve embedded image archives — they are re-imported on startup
if name == "images" {
continue
}
// Preserve embedded binaries and config template
if name == "containerd" || name == "containerd-shim-runc-v2" || name == "crun" || name == "runc" {
continue
}

target := filepath.Join(containerdDir, name)
if err := os.RemoveAll(target); err == nil {
log.Info().Str("component", "kubesolo").Msgf("cleaned stale containerd artifact: %s", target)
}
}
}

// waitForService waits for a service to be ready
// it returns true if the service is ready
// it returns false if the service is not ready and the shutdown signal has been received
Expand Down Expand Up @@ -286,6 +335,12 @@ func (s *kubesolo) bootstrap() {

// Setup paths
basePath := *flags.Path

// Clean stale runtime state from previous runs (e.g., after reboot)
// This removes stale sockets and containerd runtime state that reference
// dead processes, while preserving images, kine database, and PKI certs.
cleanStaleState(basePath)

s.embedded = types.Embedded{
// System Node IP
NodeIP: nodeIP,
Expand Down Expand Up @@ -351,12 +406,12 @@ func (s *kubesolo) bootstrap() {
},

// Containerd paths
ContainerdDir: filepath.Join(basePath, types.DefaultContainerdDir),
ContainerdSocketFile: filepath.Join(basePath, types.DefaultContainerdDir, types.DefaultContainerdSocket),
ContainerdBinaryFile: filepath.Join(basePath, types.DefaultContainerdDir, "containerd"),
ContainerdImagesDir: filepath.Join(basePath, types.DefaultContainerdDir, "images"),
ContainerdShimBinaryFile: filepath.Join(basePath, types.DefaultContainerdDir, "containerd-shim-runc-v2"),
ContainerdConfigFile: filepath.Join(basePath, types.DefaultContainerdDir, "config.toml"),
ContainerdDir: filepath.Join(basePath, types.DefaultContainerdDir),
ContainerdSocketFile: filepath.Join(basePath, types.DefaultContainerdDir, types.DefaultContainerdSocket),
ContainerdBinaryFile: filepath.Join(basePath, types.DefaultContainerdDir, "containerd"),
ContainerdImagesDir: filepath.Join(basePath, types.DefaultContainerdDir, "images"),
ContainerdShimBinaryFile: filepath.Join(basePath, types.DefaultContainerdDir, "containerd-shim-runc-v2"),
ContainerdConfigFile: filepath.Join(basePath, types.DefaultContainerdDir, "config.toml"),
ContainerdRootDir: filepath.Join(basePath, types.DefaultContainerdDir, "root"),
ContainerdStateDir: filepath.Join(basePath, types.DefaultContainerdDir, "state"),
ContainerdRegistryConfigDir: filepath.Join(basePath, types.DefaultContainerdDir, "registry"),
Expand Down Expand Up @@ -395,7 +450,6 @@ func (s *kubesolo) bootstrap() {

// Image paths
PortainerAgentImageFile: filepath.Join(basePath, types.DefaultContainerdDir, "images", "portainer-agent.tar.gz"),
CorednsImageFile: filepath.Join(basePath, types.DefaultContainerdDir, "images", "coredns.tar.gz"),
SandboxImageFile: filepath.Join(basePath, types.DefaultContainerdDir, "images", "pause.tar.gz"),
LocalPathProvisionerImageFile: filepath.Join(basePath, types.DefaultContainerdDir, "images", "local-path-provisioner.tar.gz"),

Expand Down
Loading