From b8e5e97b441f9044b5089a6da44fd2369981e56f Mon Sep 17 00:00:00 2001 From: Z Date: Mon, 30 Mar 2026 19:44:11 +0900 Subject: [PATCH 1/9] feat: add log frequency visualization per container MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a "Logs" column to the container dashboard table showing real-time log output frequency as color-coded stacked bar charts per container. Colors: info=green, warn=orange, error=red, debug=blue, fatal=red. Backend: ContainerStore collects log stats every 5s by sampling recent Docker logs and counting by level using existing level_guesser. Data flows through SubscribeLogStats → SSE "container-log-stat" events. Frontend: New LogFrequencyChart component renders stacked bars. Container model tracks 60-entry log stats history with updateLogStat(). Co-Authored-By: Claude Opus 4.6 --- assets/components/ContainerTable.vue | 26 +++++ assets/components/LogFrequencyChart.vue | 104 ++++++++++++++++++ assets/models/Container.ts | 23 +++- assets/stores/container.ts | 10 +- assets/types/Container.d.ts | 9 ++ internal/agent/client_test.go | 4 + internal/agent/server.go | 1 + internal/container/container_store.go | 78 +++++++++++++ internal/container/level_guesser.go | 6 + internal/container/types.go | 10 ++ internal/support/container/agent_service.go | 4 + internal/support/container/client_service.go | 1 + internal/support/docker/docker_service.go | 4 + internal/support/docker/multi_host_service.go | 6 + internal/support/k8s/k8s_cluster_service.go | 4 + internal/support/k8s/k8s_service.go | 4 + internal/web/events.go | 7 ++ internal/web/routes.go | 1 + locales/en.yml | 1 + 19 files changed, 301 insertions(+), 2 deletions(-) create mode 100644 assets/components/LogFrequencyChart.vue diff --git a/assets/components/ContainerTable.vue b/assets/components/ContainerTable.vue index 3bc26e20cb22..172d929d5569 100644 --- a/assets/components/ContainerTable.vue +++ b/assets/components/ContainerTable.vue @@ -106,6 +106,12 @@ + +
+ + {{ totalLogRate(container) }} +
+ @@ -183,6 +189,18 @@ const fields: Record< mobileVisible: false, customClass: "min-w-48", }, + logs: { + label: "label.log-freq", + sortFunc: (a: Container, b: Container) => { + const aLast = a.logStatsHistory.at(-1); + const bLast = b.logStatsHistory.at(-1); + const aTotal = aLast ? aLast.info + aLast.warn + aLast.error + aLast.debug + aLast.fatal : 0; + const bTotal = bLast ? bLast.info + bLast.warn + bLast.error + bLast.debug + bLast.fatal : 0; + return (aTotal - bTotal) * direction.value; + }, + mobileVisible: false, + customClass: "min-w-40", + }, }; const { containers } = defineProps<{ @@ -227,6 +245,14 @@ function sort(field: keys) { direction.value = 1; } } +function totalLogRate(container: Container): string { + const last = container.logStatsHistory.at(-1); + if (!last) return "0"; + const total = last.info + last.warn + last.error + last.debug + last.fatal; + if (total >= 1000) return `${(total / 1000).toFixed(1)}k`; + return `${total}`; +} + function isVisible(field: keys) { return fields[field].mobileVisible || !isMobile.value; } diff --git a/assets/components/LogFrequencyChart.vue b/assets/components/LogFrequencyChart.vue new file mode 100644 index 000000000000..7e07bc0066d3 --- /dev/null +++ b/assets/components/LogFrequencyChart.vue @@ -0,0 +1,104 @@ + + + diff --git a/assets/models/Container.ts b/assets/models/Container.ts index 941c1e1426ff..5ab65b096d38 100644 --- a/assets/models/Container.ts +++ b/assets/models/Container.ts @@ -1,7 +1,8 @@ -import type { ContainerHealth, ContainerJson, ContainerStat, ContainerState } from "@/types/Container"; +import type { ContainerHealth, ContainerJson, ContainerStat, ContainerState, LogStat } from "@/types/Container"; import { Ref } from "vue"; export type Stat = Omit; +export type LogFreq = Omit; const hosts = computed(() => config.hosts.reduce( @@ -27,11 +28,15 @@ export class HistoricalContainer { ) {} } +const defaultLogFreq: LogFreq = { info: 0, warn: 0, error: 0, debug: 0, fatal: 0 }; +const LOG_STATS_HISTORY_SIZE = 60; + export class Container { private _stat: Ref; private _name: string; private readonly _statsHistory: Ref; private readonly movingAverageStat: Ref; + private readonly _logStatsHistory: Ref; constructor( public readonly id: string, @@ -57,10 +62,16 @@ export class Container { const padding = Array(300 - recentStats.length).fill(defaultStat); this._statsHistory = ref([...padding, ...recentStats]); this.movingAverageStat = ref(stats.at(-1) || defaultStat); + const logPadding = Array(LOG_STATS_HISTORY_SIZE).fill(defaultLogFreq); + this._logStatsHistory = ref(logPadding); this._name = name; } + get logStatsHistory() { + return unref(this._logStatsHistory); + } + get statsHistory() { return unref(this._statsHistory); } @@ -148,6 +159,16 @@ export class Container { } } + public updateLogStat(logStat: LogFreq) { + const history = isRef(this._logStatsHistory) + ? this._logStatsHistory.value + : (this._logStatsHistory as unknown as LogFreq[]); + history.push(logStat); + if (history.length > LOG_STATS_HISTORY_SIZE) { + history.shift(); + } + } + static fromJSON(c: ContainerJson): Container { return new Container( c.id, diff --git a/assets/stores/container.ts b/assets/stores/container.ts index 70b353f65f2d..798dca8d7cce 100644 --- a/assets/stores/container.ts +++ b/assets/stores/container.ts @@ -1,6 +1,6 @@ import { acceptHMRUpdate, defineStore } from "pinia"; import { Ref, UnwrapNestedRefs } from "vue"; -import type { ContainerHealth, ContainerJson, ContainerStat } from "@/types/Container"; +import type { ContainerHealth, ContainerJson, ContainerStat, LogStat } from "@/types/Container"; import { Container } from "@/models/Container"; import i18n from "@/modules/i18n"; import { Host } from "./hosts"; @@ -60,6 +60,14 @@ export const useContainerStore = defineStore("container", () => { container.updateStat(rest); } }); + es.addEventListener("container-log-stat", (e) => { + const logStat = JSON.parse((e as MessageEvent).data) as LogStat; + const container = allContainersById.value[logStat.id] as unknown as UnwrapNestedRefs; + if (container) { + const { id, ...rest } = logStat; + container.updateLogStat(rest); + } + }); es.addEventListener("container-event", (e) => { const event = JSON.parse((e as MessageEvent).data) as { actorId: string; name: string; time: string }; const container = allContainersById.value[event.actorId]; diff --git a/assets/types/Container.d.ts b/assets/types/Container.d.ts index 0ee7367ffb8b..fb6a61de62b3 100644 --- a/assets/types/Container.d.ts +++ b/assets/types/Container.d.ts @@ -7,6 +7,15 @@ export interface ContainerStat { readonly networkTxTotal: number; } +export interface LogStat { + readonly id: string; + readonly info: number; + readonly warn: number; + readonly error: number; + readonly debug: number; + readonly fatal: number; +} + export type ContainerJson = { readonly id: string; readonly created: string; diff --git a/internal/agent/client_test.go b/internal/agent/client_test.go index f359bdecce01..eb0907c12ec3 100644 --- a/internal/agent/client_test.go +++ b/internal/agent/client_test.go @@ -85,6 +85,10 @@ func (m *MockedClientService) SubscribeContainersStarted(ctx context.Context, co m.Called(ctx, containers) } +func (m *MockedClientService) SubscribeLogStats(ctx context.Context, logStats chan<- container.LogStat) { + m.Called(ctx, logStats) +} + func (m *MockedClientService) StreamLogs(ctx context.Context, c container.Container, from time.Time, stdTypes container.StdType, events chan<- *container.LogEvent) error { args := m.Called(ctx, c, from, stdTypes, events) return args.Error(0) diff --git a/internal/agent/server.go b/internal/agent/server.go index c7b5abb6887a..eb328bf1c4bf 100644 --- a/internal/agent/server.go +++ b/internal/agent/server.go @@ -44,6 +44,7 @@ type ClientService interface { SubscribeStats(context.Context, chan<- container.ContainerStat) SubscribeEvents(context.Context, chan<- container.ContainerEvent) SubscribeContainersStarted(context.Context, chan<- container.Container) + SubscribeLogStats(context.Context, chan<- container.LogStat) StreamLogs(context.Context, container.Container, time.Time, container.StdType, chan<- *container.LogEvent) error Attach(context.Context, container.Container, container.ExecEventReader, io.Writer) error Exec(context.Context, container.Container, []string, container.ExecEventReader, io.Writer) error diff --git a/internal/container/container_store.go b/internal/container/container_store.go index 56e80daa2334..e6be8750e821 100644 --- a/internal/container/container_store.go +++ b/internal/container/container_store.go @@ -1,6 +1,7 @@ package container import ( + "bufio" "context" "errors" "sync" @@ -23,6 +24,7 @@ type ContainerStore struct { containers *xsync.Map[string, *Container] subscribers *xsync.Map[context.Context, chan<- ContainerEvent] newContainerSubscribers *xsync.Map[context.Context, chan<- Container] + logStatSubscribers *xsync.Map[context.Context, chan<- LogStat] client Client statsCollector StatsCollector wg sync.WaitGroup @@ -34,6 +36,7 @@ type ContainerStore struct { const defaultTimeout = 10 * time.Second const reconcileInterval = 30 * time.Second +const logStatsInterval = 5 * time.Second func NewContainerStore(ctx context.Context, client Client, statsCollect StatsCollector, labels ContainerLabels) *ContainerStore { log.Debug().Str("host", client.Host().Name).Interface("labels", labels).Msg("initializing container store") @@ -43,6 +46,7 @@ func NewContainerStore(ctx context.Context, client Client, statsCollect StatsCol client: client, subscribers: xsync.NewMap[context.Context, chan<- ContainerEvent](), newContainerSubscribers: xsync.NewMap[context.Context, chan<- Container](), + logStatSubscribers: xsync.NewMap[context.Context, chan<- LogStat](), statsCollector: statsCollect, wg: sync.WaitGroup{}, events: make(chan ContainerEvent), @@ -267,6 +271,74 @@ func (s *ContainerStore) SubscribeNewContainers(ctx context.Context, containers }() } +func (s *ContainerStore) SubscribeLogStats(ctx context.Context, logStats chan<- LogStat) { + s.logStatSubscribers.Store(ctx, logStats) + go func() { + <-ctx.Done() + s.logStatSubscribers.Delete(ctx) + }() +} + +// collectLogStats fetches recent logs for all running containers and counts by level. +func (s *ContainerStore) collectLogStats() { + var ids []string + s.containers.Range(func(id string, c *Container) bool { + if c.State == "running" { + ids = append(ids, id) + } + return true + }) + + if len(ids) == 0 { + return + } + + now := time.Now() + since := now.Add(-logStatsInterval) + + for _, id := range ids { + ctx, cancel := context.WithTimeout(s.ctx, 2*time.Second) + reader, err := s.client.ContainerLogsBetweenDates(ctx, id, since, now, STDALL) + if err != nil { + cancel() + continue + } + + stat := LogStat{ID: id} + scanner := bufio.NewScanner(reader) + for scanner.Scan() { + line := scanner.Text() + if len(line) == 0 { + continue + } + level := GuessLogLevelFromLine(line) + switch level { + case "info": + stat.Info++ + case "warn": + stat.Warn++ + case "error": + stat.Error++ + case "debug": + stat.Debug++ + case "fatal": + stat.Fatal++ + } + } + reader.Close() + cancel() + + s.logStatSubscribers.Range(func(c context.Context, ch chan<- LogStat) bool { + select { + case ch <- stat: + case <-c.Done(): + s.logStatSubscribers.Delete(c) + } + return true + }) + } +} + // reconcile compares the in-memory container map with the actual Docker state // and removes any containers that Docker no longer knows about. This handles // cases where destroy events are missed (network hiccups, Docker race conditions). @@ -330,6 +402,9 @@ func (s *ContainerStore) init() { reconcileTicker := time.NewTicker(reconcileInterval) defer reconcileTicker.Stop() + logStatsTicker := time.NewTicker(logStatsInterval) + defer logStatsTicker.Stop() + for { select { case event := <-s.events: @@ -478,6 +553,9 @@ func (s *ContainerStore) init() { case <-reconcileTicker.C: s.reconcile() + case <-logStatsTicker.C: + go s.collectLogStats() + case <-s.ctx.Done(): return } diff --git a/internal/container/level_guesser.go b/internal/container/level_guesser.go index 720ce8170f30..6b512ae81701 100644 --- a/internal/container/level_guesser.go +++ b/internal/container/level_guesser.go @@ -118,6 +118,12 @@ var singleLetterToLevel = map[byte]string{ 'V': "trace", } +// GuessLogLevelFromLine guesses the log level from a raw log line string. +// Exported for use by log stats collection. +func GuessLogLevelFromLine(line string) string { + return guessFromString(line) +} + func guessFromString(value string) string { value = StripANSI(value) value = timestampRegex.ReplaceAllString(value, "") diff --git a/internal/container/types.go b/internal/container/types.go index 03abd8a180d5..c98e28ae83b5 100644 --- a/internal/container/types.go +++ b/internal/container/types.go @@ -115,6 +115,16 @@ type ContainerStat struct { NetworkTxTotal uint64 `json:"networkTxTotal"` } +// LogStat represents log frequency counts per level for a container over a time window +type LogStat struct { + ID string `json:"id"` + Info int `json:"info"` + Warn int `json:"warn"` + Error int `json:"error"` + Debug int `json:"debug"` + Fatal int `json:"fatal"` +} + // ContainerEvent represents events that are triggered type ContainerEvent struct { Name string `json:"name"` diff --git a/internal/support/container/agent_service.go b/internal/support/container/agent_service.go index 26a9e610fc7e..e0969c69ab4b 100644 --- a/internal/support/container/agent_service.go +++ b/internal/support/container/agent_service.go @@ -72,6 +72,10 @@ func (d *agentService) SubscribeContainersStarted(ctx context.Context, container go d.client.StreamNewContainers(ctx, containers) } +func (a *agentService) SubscribeLogStats(ctx context.Context, logStats chan<- container.LogStat) { + // Log stats not supported for remote agents yet +} + func (a *agentService) ContainerAction(ctx context.Context, container container.Container, action container.ContainerAction) error { return a.client.ContainerAction(ctx, container.ID, action) } diff --git a/internal/support/container/client_service.go b/internal/support/container/client_service.go index 9b586abe463f..d10a46beef62 100644 --- a/internal/support/container/client_service.go +++ b/internal/support/container/client_service.go @@ -22,6 +22,7 @@ type ClientService interface { SubscribeStats(context.Context, chan<- container.ContainerStat) SubscribeEvents(context.Context, chan<- container.ContainerEvent) SubscribeContainersStarted(context.Context, chan<- container.Container) + SubscribeLogStats(context.Context, chan<- container.LogStat) // Blocking streaming functions that should be used in a goroutine StreamLogs(context.Context, container.Container, time.Time, container.StdType, chan<- *container.LogEvent) error diff --git a/internal/support/docker/docker_service.go b/internal/support/docker/docker_service.go index d671d4de35e0..d430e2a9ecb5 100644 --- a/internal/support/docker/docker_service.go +++ b/internal/support/docker/docker_service.go @@ -103,6 +103,10 @@ func (d *DockerClientService) SubscribeStats(ctx context.Context, stats chan<- c d.store.SubscribeStats(ctx, stats) } +func (d *DockerClientService) SubscribeLogStats(ctx context.Context, logStats chan<- container.LogStat) { + d.store.SubscribeLogStats(ctx, logStats) +} + func (d *DockerClientService) SubscribeEvents(ctx context.Context, events chan<- container.ContainerEvent) { d.store.SubscribeEvents(ctx, events) } diff --git a/internal/support/docker/multi_host_service.go b/internal/support/docker/multi_host_service.go index fe5649f825ab..38935440df3c 100644 --- a/internal/support/docker/multi_host_service.go +++ b/internal/support/docker/multi_host_service.go @@ -129,6 +129,12 @@ func (m *MultiHostService) SubscribeEventsAndStats(ctx context.Context, events c } } +func (m *MultiHostService) SubscribeLogStats(ctx context.Context, logStats chan<- container.LogStat) { + for _, client := range m.manager.List() { + client.SubscribeLogStats(ctx, logStats) + } +} + func (m *MultiHostService) SubscribeContainersStarted(ctx context.Context, containers chan<- container.Container, filter container_support.ContainerFilter) { newContainers := make(chan container.Container) for _, client := range m.manager.List() { diff --git a/internal/support/k8s/k8s_cluster_service.go b/internal/support/k8s/k8s_cluster_service.go index 22e51ca15e33..b9ab7923e4f5 100644 --- a/internal/support/k8s/k8s_cluster_service.go +++ b/internal/support/k8s/k8s_cluster_service.go @@ -99,6 +99,10 @@ func (m *K8sClusterService) SubscribeEventsAndStats(ctx context.Context, events m.client.SubscribeStats(ctx, stats) } +func (m *K8sClusterService) SubscribeLogStats(ctx context.Context, logStats chan<- container.LogStat) { + m.client.SubscribeLogStats(ctx, logStats) +} + func (m *K8sClusterService) SubscribeContainersStarted(ctx context.Context, containers chan<- container.Container, filter container_support.ContainerFilter) { newContainers := make(chan container.Container) m.client.SubscribeContainersStarted(ctx, newContainers) diff --git a/internal/support/k8s/k8s_service.go b/internal/support/k8s/k8s_service.go index 403c00b89470..590da0344d92 100644 --- a/internal/support/k8s/k8s_service.go +++ b/internal/support/k8s/k8s_service.go @@ -92,6 +92,10 @@ func (k *K8sClientService) SubscribeContainersStarted(ctx context.Context, conta k.store.SubscribeNewContainers(ctx, containers) } +func (k *K8sClientService) SubscribeLogStats(ctx context.Context, logStats chan<- container.LogStat) { + k.store.SubscribeLogStats(ctx, logStats) +} + func (k *K8sClientService) Attach(ctx context.Context, c container.Container, events container.ExecEventReader, stdout io.Writer) error { cancelCtx, cancel := context.WithCancel(ctx) session, err := k.client.ContainerAttach(cancelCtx, c.ID) diff --git a/internal/web/events.go b/internal/web/events.go index ed9d87d170e9..c35d35a97c55 100644 --- a/internal/web/events.go +++ b/internal/web/events.go @@ -23,9 +23,11 @@ func (h *handler) streamEvents(w http.ResponseWriter, r *http.Request) { events := make(chan container.ContainerEvent) stats := make(chan container.ContainerStat) + logStats := make(chan container.LogStat) availableHosts := make(chan container.Host) h.hostService.SubscribeEventsAndStats(r.Context(), events, stats) + h.hostService.SubscribeLogStats(r.Context(), logStats) h.hostService.SubscribeAvailableHosts(r.Context(), availableHosts) userLabels := h.config.Labels @@ -65,6 +67,11 @@ func (h *handler) streamEvents(w http.ResponseWriter, r *http.Request) { log.Error().Err(err).Msg("error writing event to event stream") return } + case logStat := <-logStats: + if err := sseWriter.Event("container-log-stat", logStat); err != nil { + log.Error().Err(err).Msg("error writing log stat to event stream") + return + } case event, ok := <-events: if !ok { return diff --git a/internal/web/routes.go b/internal/web/routes.go index d142bcb5fe26..7dcb784142da 100644 --- a/internal/web/routes.go +++ b/internal/web/routes.go @@ -70,6 +70,7 @@ type HostService interface { ListAllContainers(labels container.ContainerLabels) ([]container.Container, []error) ListAllContainersFiltered(userFilter container.ContainerLabels, filter container_support.ContainerFilter) ([]container.Container, []error) SubscribeEventsAndStats(ctx context.Context, events chan<- container.ContainerEvent, stats chan<- container.ContainerStat) + SubscribeLogStats(ctx context.Context, logStats chan<- container.LogStat) SubscribeContainersStarted(ctx context.Context, containers chan<- container.Container, filter container_support.ContainerFilter) Hosts() []container.Host LocalHost() (container.Host, error) diff --git a/locales/en.yml b/locales/en.yml index 1b5b78de3f5a..f5bef9ec0f5e 100644 --- a/locales/en.yml +++ b/locales/en.yml @@ -41,6 +41,7 @@ label: created: Created avg-cpu: Avg. CPU (%) avg-mem: Avg. MEM (%) + log-freq: Logs pinned: Pinned per-page: Rows per page host-menu: Hosts and Containers From 13a2e965b76f05b48b0d63a1ce0cc78ac6915260 Mon Sep 17 00:00:00 2001 From: Z Date: Mon, 30 Mar 2026 20:05:14 +0900 Subject: [PATCH 2/9] fix: demux Docker multiplexed stream in collectLogStats Non-TTY containers return logs with 8-byte binary headers (stream type + length). bufio.Scanner was splitting on newlines within these binary frames, corrupting every line. Now properly strips multiplexed headers before scanning. Also counts lines with unrecognized log levels as "info" instead of silently discarding them. Co-Authored-By: Claude Opus 4.6 --- internal/container/container_store.go | 60 +++++++++++++++++++++++---- 1 file changed, 53 insertions(+), 7 deletions(-) diff --git a/internal/container/container_store.go b/internal/container/container_store.go index e6be8750e821..34da319960c1 100644 --- a/internal/container/container_store.go +++ b/internal/container/container_store.go @@ -3,7 +3,9 @@ package container import ( "bufio" "context" + "encoding/binary" "errors" + "io" "sync" "sync/atomic" "time" @@ -279,33 +281,73 @@ func (s *ContainerStore) SubscribeLogStats(ctx context.Context, logStats chan<- }() } +// demuxDockerStream strips the 8-byte multiplexed headers from non-TTY Docker log streams, +// writing clean text to w. For TTY containers, it copies the stream as-is. +func demuxDockerStream(w io.Writer, r io.Reader, tty bool) error { + if tty { + _, err := io.Copy(w, r) + return err + } + header := make([]byte, 8) + for { + if _, err := io.ReadFull(r, header); err != nil { + if err == io.EOF || err == io.ErrUnexpectedEOF { + return nil + } + return err + } + size := binary.BigEndian.Uint32(header[4:]) + if size == 0 { + continue + } + if _, err := io.CopyN(w, r, int64(size)); err != nil { + return err + } + } +} + // collectLogStats fetches recent logs for all running containers and counts by level. func (s *ContainerStore) collectLogStats() { - var ids []string + type idTty struct { + id string + tty bool + } + var targets []idTty s.containers.Range(func(id string, c *Container) bool { if c.State == "running" { - ids = append(ids, id) + targets = append(targets, idTty{id: id, tty: c.Tty}) } return true }) - if len(ids) == 0 { + if len(targets) == 0 { return } now := time.Now() since := now.Add(-logStatsInterval) - for _, id := range ids { + for _, t := range targets { ctx, cancel := context.WithTimeout(s.ctx, 2*time.Second) - reader, err := s.client.ContainerLogsBetweenDates(ctx, id, since, now, STDALL) + reader, err := s.client.ContainerLogsBetweenDates(ctx, t.id, since, now, STDALL) if err != nil { + log.Debug().Err(err).Str("id", t.id).Msg("failed to fetch logs for log stats") cancel() continue } - stat := LogStat{ID: id} - scanner := bufio.NewScanner(reader) + // Demux Docker multiplexed stream for non-TTY containers + pr, pw := io.Pipe() + go func() { + err := demuxDockerStream(pw, reader, t.tty) + if err != nil { + log.Debug().Err(err).Str("id", t.id).Msg("error demuxing docker stream") + } + pw.Close() + }() + + stat := LogStat{ID: t.id} + scanner := bufio.NewScanner(pr) for scanner.Scan() { line := scanner.Text() if len(line) == 0 { @@ -323,8 +365,12 @@ func (s *ContainerStore) collectLogStats() { stat.Debug++ case "fatal": stat.Fatal++ + default: + // Lines with unrecognized level still count as info + stat.Info++ } } + pr.Close() reader.Close() cancel() From 35959b95e489485a168ce87dfdfe6c86a414b6b8 Mon Sep 17 00:00:00 2001 From: Z Date: Mon, 30 Mar 2026 20:12:51 +0900 Subject: [PATCH 3/9] feat: add crash-loop badges and hot sort column - Track restart count (10min window) and exit codes from container events - Show status badges: OOM, SIGTERM, Exit N, crash-loop count, unhealthy - Add anomaly score based on restarts, health, error rate, CPU/mem pressure - Add sortable "Hot" column to surface containers needing attention Co-Authored-By: Claude Opus 4.6 --- assets/components/ContainerTable.vue | 34 ++++++++++++++- assets/models/Container.ts | 63 ++++++++++++++++++++++++++++ assets/stores/container.ts | 14 ++++++- locales/en.yml | 1 + 4 files changed, 110 insertions(+), 2 deletions(-) diff --git a/assets/components/ContainerTable.vue b/assets/components/ContainerTable.vue index 172d929d5569..eb2f2462b13d 100644 --- a/assets/components/ContainerTable.vue +++ b/assets/components/ContainerTable.vue @@ -96,7 +96,20 @@ {{ container.hostLabel }} - {{ container.state }} + + {{ container.state }} + + {{ container.statusBadge.text }} + + @@ -112,6 +125,19 @@ {{ totalLogRate(container) }} + + + {{ container.anomalyScore }} + + @@ -201,6 +227,12 @@ const fields: Record< mobileVisible: false, customClass: "min-w-40", }, + hot: { + label: "label.hot", + sortFunc: (a: Container, b: Container) => (a.anomalyScore - b.anomalyScore) * direction.value, + mobileVisible: false, + customClass: "w-1", + }, }; const { containers } = defineProps<{ diff --git a/assets/models/Container.ts b/assets/models/Container.ts index 5ab65b096d38..047661a11292 100644 --- a/assets/models/Container.ts +++ b/assets/models/Container.ts @@ -30,6 +30,7 @@ export class HistoricalContainer { const defaultLogFreq: LogFreq = { info: 0, warn: 0, error: 0, debug: 0, fatal: 0 }; const LOG_STATS_HISTORY_SIZE = 60; +const RESTART_WINDOW_MS = 10 * 60 * 1000; // 10 minutes export class Container { private _stat: Ref; @@ -38,6 +39,10 @@ export class Container { private readonly movingAverageStat: Ref; private readonly _logStatsHistory: Ref; + // Crash-loop / exit tracking + public lastExitCode: string | null = null; + private _restartTimestamps: number[] = []; + constructor( public readonly id: string, public readonly created: Date, @@ -169,6 +174,64 @@ export class Container { } } + public recordDie(exitCode?: string) { + this.lastExitCode = exitCode ?? null; + } + + public recordRestart() { + const now = Date.now(); + this._restartTimestamps.push(now); + // Keep only timestamps within the window + const cutoff = now - RESTART_WINDOW_MS; + this._restartTimestamps = this._restartTimestamps.filter((t) => t > cutoff); + } + + get restartCount(): number { + const now = Date.now(); + const cutoff = now - RESTART_WINDOW_MS; + return this._restartTimestamps.filter((t) => t > cutoff).length; + } + + get isCrashLooping(): boolean { + return this.restartCount >= 3; + } + + get statusBadge(): { text: string; type: "error" | "warning" | "info" } | null { + if (this.isCrashLooping) { + return { text: `${this.restartCount} restarts`, type: "error" }; + } + if (this.lastExitCode && this.lastExitCode !== "0" && this.state === "exited") { + const code = this.lastExitCode; + const label = code === "137" ? "OOM" : code === "143" ? "SIGTERM" : `Exit ${code}`; + return { text: label, type: "warning" }; + } + if (this.health === "unhealthy") { + return { text: "unhealthy", type: "warning" }; + } + return null; + } + + /** Anomaly score for "hot" sorting — higher means more attention needed */ + get anomalyScore(): number { + let score = 0; + // Crash-loop is the strongest signal + score += this.restartCount * 20; + // Unhealthy containers + if (this.health === "unhealthy") score += 30; + // Non-zero exit + if (this.lastExitCode && this.lastExitCode !== "0" && this.state === "exited") score += 15; + // Error/fatal log rate from recent history + const recent = this.logStatsHistory.slice(-3); + for (const r of recent) { + score += r.error * 3 + r.fatal * 10 + r.warn; + } + // High CPU (above 80% of EMA) + if (this.movingAverage.cpu > 80) score += 10; + // High memory (above 85%) + if (this.movingAverage.memory > 85) score += 10; + return score; + } + static fromJSON(c: ContainerJson): Container { return new Container( c.id, diff --git a/assets/stores/container.ts b/assets/stores/container.ts index 798dca8d7cce..6deaba1c0ab4 100644 --- a/assets/stores/container.ts +++ b/assets/stores/container.ts @@ -69,13 +69,25 @@ export const useContainerStore = defineStore("container", () => { } }); es.addEventListener("container-event", (e) => { - const event = JSON.parse((e as MessageEvent).data) as { actorId: string; name: string; time: string }; + const event = JSON.parse((e as MessageEvent).data) as { + actorId: string; + name: string; + time: string; + actorAttributes?: Record; + }; const container = allContainersById.value[event.actorId]; if (container) { switch (event.name) { case "die": container.state = "exited"; container.finishedAt = new Date(event.time); + container.recordDie(event.actorAttributes?.exitCode); + break; + case "start": + if (container.state === "exited") { + container.recordRestart(); + } + container.state = "running"; break; case "destroy": container.state = "deleted"; diff --git a/locales/en.yml b/locales/en.yml index f5bef9ec0f5e..7fd7d2e09fcb 100644 --- a/locales/en.yml +++ b/locales/en.yml @@ -42,6 +42,7 @@ label: avg-cpu: Avg. CPU (%) avg-mem: Avg. MEM (%) log-freq: Logs + hot: Hot pinned: Pinned per-page: Rows per page host-menu: Hosts and Containers From 9196594b2c25391ed6478994b92a42890825015b Mon Sep 17 00:00:00 2001 From: Z Date: Mon, 30 Mar 2026 20:22:52 +0900 Subject: [PATCH 4/9] =?UTF-8?q?fix:=20address=20Codex=20review=20=E2=80=94?= =?UTF-8?q?=20scanner=20buffer,=20error=20propagation,=20non-blocking=20su?= =?UTF-8?q?bscribers?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Increase bufio.Scanner buffer to 256KB for long JSON log lines - Check scanner.Err() after scan loop to surface truncated reads - Use pw.CloseWithError(err) to propagate demux errors to scanner - Make subscriber send non-blocking (drop stat if slow consumer) - Fix exit 137 label: "Killed" not "OOM" (SIGKILL != guaranteed OOM) - Prune _restartTimestamps on every restartCount access, compute once per getter chain Co-Authored-By: Claude Opus 4.6 --- assets/models/Container.ts | 21 ++++++++++----------- internal/container/container_store.go | 11 +++++++++-- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/assets/models/Container.ts b/assets/models/Container.ts index 047661a11292..759187b94c55 100644 --- a/assets/models/Container.ts +++ b/assets/models/Container.ts @@ -189,7 +189,9 @@ export class Container { get restartCount(): number { const now = Date.now(); const cutoff = now - RESTART_WINDOW_MS; - return this._restartTimestamps.filter((t) => t > cutoff).length; + // Prune stale timestamps while counting + this._restartTimestamps = this._restartTimestamps.filter((t) => t > cutoff); + return this._restartTimestamps.length; } get isCrashLooping(): boolean { @@ -197,12 +199,14 @@ export class Container { } get statusBadge(): { text: string; type: "error" | "warning" | "info" } | null { - if (this.isCrashLooping) { - return { text: `${this.restartCount} restarts`, type: "error" }; + const restarts = this.restartCount; + if (restarts >= 3) { + return { text: `${restarts} restarts`, type: "error" }; } if (this.lastExitCode && this.lastExitCode !== "0" && this.state === "exited") { const code = this.lastExitCode; - const label = code === "137" ? "OOM" : code === "143" ? "SIGTERM" : `Exit ${code}`; + // 137=SIGKILL (may be OOM but not always), 143=SIGTERM + const label = code === "137" ? "Killed" : code === "143" ? "SIGTERM" : `Exit ${code}`; return { text: label, type: "warning" }; } if (this.health === "unhealthy") { @@ -214,20 +218,15 @@ export class Container { /** Anomaly score for "hot" sorting — higher means more attention needed */ get anomalyScore(): number { let score = 0; - // Crash-loop is the strongest signal - score += this.restartCount * 20; - // Unhealthy containers + const restarts = this.restartCount; + score += restarts * 20; if (this.health === "unhealthy") score += 30; - // Non-zero exit if (this.lastExitCode && this.lastExitCode !== "0" && this.state === "exited") score += 15; - // Error/fatal log rate from recent history const recent = this.logStatsHistory.slice(-3); for (const r of recent) { score += r.error * 3 + r.fatal * 10 + r.warn; } - // High CPU (above 80% of EMA) if (this.movingAverage.cpu > 80) score += 10; - // High memory (above 85%) if (this.movingAverage.memory > 85) score += 10; return score; } diff --git a/internal/container/container_store.go b/internal/container/container_store.go index 34da319960c1..70b99f0ef0e3 100644 --- a/internal/container/container_store.go +++ b/internal/container/container_store.go @@ -342,12 +342,15 @@ func (s *ContainerStore) collectLogStats() { err := demuxDockerStream(pw, reader, t.tty) if err != nil { log.Debug().Err(err).Str("id", t.id).Msg("error demuxing docker stream") + pw.CloseWithError(err) + } else { + pw.Close() } - pw.Close() }() stat := LogStat{ID: t.id} scanner := bufio.NewScanner(pr) + scanner.Buffer(make([]byte, 0, 256*1024), 256*1024) // handle long JSON log lines for scanner.Scan() { line := scanner.Text() if len(line) == 0 { @@ -366,10 +369,12 @@ func (s *ContainerStore) collectLogStats() { case "fatal": stat.Fatal++ default: - // Lines with unrecognized level still count as info stat.Info++ } } + if err := scanner.Err(); err != nil { + log.Debug().Err(err).Str("id", t.id).Msg("scanner error during log stats collection") + } pr.Close() reader.Close() cancel() @@ -379,6 +384,8 @@ func (s *ContainerStore) collectLogStats() { case ch <- stat: case <-c.Done(): s.logStatSubscribers.Delete(c) + default: + // Drop stat if subscriber is slow — don't block the collector } return true }) From b3fe9dd0748bc8be3efee616e960874d82c9753e Mon Sep 17 00:00:00 2001 From: Z Date: Mon, 30 Mar 2026 20:25:45 +0900 Subject: [PATCH 5/9] fix: prevent goroutine leak and overlapping log stat collection - Extract collectLogStatForContainer for cleaner lifecycle management - Use pw.CloseWithError(err) consistently (nil propagation is safe) - Add atomic.Bool guard to prevent overlapping collectLogStats runs - Buffer logStats SSE channel (50) to reduce backpressure on slow clients Co-Authored-By: Claude Opus 4.6 --- internal/container/container_store.go | 88 ++++++++++++++------------- internal/web/events.go | 2 +- 2 files changed, 48 insertions(+), 42 deletions(-) diff --git a/internal/container/container_store.go b/internal/container/container_store.go index 70b99f0ef0e3..15801a06727f 100644 --- a/internal/container/container_store.go +++ b/internal/container/container_store.go @@ -31,6 +31,7 @@ type ContainerStore struct { statsCollector StatsCollector wg sync.WaitGroup connected atomic.Bool + logStatsRunning atomic.Bool events chan ContainerEvent ctx context.Context labels ContainerLabels @@ -336,46 +337,7 @@ func (s *ContainerStore) collectLogStats() { continue } - // Demux Docker multiplexed stream for non-TTY containers - pr, pw := io.Pipe() - go func() { - err := demuxDockerStream(pw, reader, t.tty) - if err != nil { - log.Debug().Err(err).Str("id", t.id).Msg("error demuxing docker stream") - pw.CloseWithError(err) - } else { - pw.Close() - } - }() - - stat := LogStat{ID: t.id} - scanner := bufio.NewScanner(pr) - scanner.Buffer(make([]byte, 0, 256*1024), 256*1024) // handle long JSON log lines - for scanner.Scan() { - line := scanner.Text() - if len(line) == 0 { - continue - } - level := GuessLogLevelFromLine(line) - switch level { - case "info": - stat.Info++ - case "warn": - stat.Warn++ - case "error": - stat.Error++ - case "debug": - stat.Debug++ - case "fatal": - stat.Fatal++ - default: - stat.Info++ - } - } - if err := scanner.Err(); err != nil { - log.Debug().Err(err).Str("id", t.id).Msg("scanner error during log stats collection") - } - pr.Close() + stat := s.collectLogStatForContainer(ctx, t.id, t.tty, reader) reader.Close() cancel() @@ -392,6 +354,45 @@ func (s *ContainerStore) collectLogStats() { } } +// collectLogStatForContainer demuxes and scans logs for a single container. +func (s *ContainerStore) collectLogStatForContainer(_ context.Context, id string, tty bool, reader io.ReadCloser) LogStat { + pr, pw := io.Pipe() + go func() { + err := demuxDockerStream(pw, reader, tty) + pw.CloseWithError(err) // always propagate (nil is fine) + }() + + stat := LogStat{ID: id} + scanner := bufio.NewScanner(pr) + scanner.Buffer(make([]byte, 0, 256*1024), 256*1024) + for scanner.Scan() { + line := scanner.Text() + if len(line) == 0 { + continue + } + level := GuessLogLevelFromLine(line) + switch level { + case "info": + stat.Info++ + case "warn": + stat.Warn++ + case "error": + stat.Error++ + case "debug": + stat.Debug++ + case "fatal": + stat.Fatal++ + default: + stat.Info++ + } + } + if err := scanner.Err(); err != nil { + log.Debug().Err(err).Str("id", id).Msg("scanner error during log stats collection") + } + pr.Close() + return stat +} + // reconcile compares the in-memory container map with the actual Docker state // and removes any containers that Docker no longer knows about. This handles // cases where destroy events are missed (network hiccups, Docker race conditions). @@ -607,7 +608,12 @@ func (s *ContainerStore) init() { s.reconcile() case <-logStatsTicker.C: - go s.collectLogStats() + if s.logStatsRunning.CompareAndSwap(false, true) { + go func() { + defer s.logStatsRunning.Store(false) + s.collectLogStats() + }() + } case <-s.ctx.Done(): return diff --git a/internal/web/events.go b/internal/web/events.go index c35d35a97c55..8b870d9937a3 100644 --- a/internal/web/events.go +++ b/internal/web/events.go @@ -23,7 +23,7 @@ func (h *handler) streamEvents(w http.ResponseWriter, r *http.Request) { events := make(chan container.ContainerEvent) stats := make(chan container.ContainerStat) - logStats := make(chan container.LogStat) + logStats := make(chan container.LogStat, 50) availableHosts := make(chan container.Host) h.hostService.SubscribeEventsAndStats(r.Context(), events, stats) From 4562ca93103de08df080ec6a0d38de5dd4aa344b Mon Sep 17 00:00:00 2001 From: Z Date: Mon, 30 Mar 2026 20:27:41 +0900 Subject: [PATCH 6/9] fix: include reactive deps in v-memo to enable live UI updates v-memo="[container.id, statMode]" was freezing log sparklines, crash badges, hot scores, and stat displays after initial render. Now tracks state, stat, anomalyScore, and latest logStat so rows re-render when data changes. Co-Authored-By: Claude Opus 4.6 --- assets/components/ContainerTable.vue | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/assets/components/ContainerTable.vue b/assets/components/ContainerTable.vue index eb2f2462b13d..cb50e33d954e 100644 --- a/assets/components/ContainerTable.vue +++ b/assets/components/ContainerTable.vue @@ -87,7 +87,14 @@ From 13b69021c845c619403f90a1006a5fb8f8b97eda Mon Sep 17 00:00:00 2001 From: Z Date: Tue, 31 Mar 2026 10:17:45 +0900 Subject: [PATCH 7/9] fix: resolve v-memo performance regression MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Root cause: adding container.stat (object, changes ~1/sec) to v-memo triggered ~13 full row re-renders/sec, cascading into LogFrequencyChart re-mounts (300 DOM elements × 13 containers). Changes: - v-memo now uses only primitives: id, statMode, state, logStatsVersion, anomalyScore (numbers that change every ~5s, not every ~1s) - Container.ts: added logStatsVersion counter + restartCount 1s cache - Go: parallelized collectLogStats with bounded 5-goroutine pool Co-Authored-By: Zhuge --- assets/components/ContainerTable.vue | 9 +------ assets/models/Container.ts | 21 ++++++++++++++- internal/container/container_store.go | 38 ++++++++++++++++++++------- 3 files changed, 49 insertions(+), 19 deletions(-) diff --git a/assets/components/ContainerTable.vue b/assets/components/ContainerTable.vue index cb50e33d954e..befac24ce360 100644 --- a/assets/components/ContainerTable.vue +++ b/assets/components/ContainerTable.vue @@ -87,14 +87,7 @@ diff --git a/assets/models/Container.ts b/assets/models/Container.ts index 759187b94c55..d288d231b255 100644 --- a/assets/models/Container.ts +++ b/assets/models/Container.ts @@ -38,10 +38,13 @@ export class Container { private readonly _statsHistory: Ref; private readonly movingAverageStat: Ref; private readonly _logStatsHistory: Ref; + readonly _logStatsVersion: Ref; // Crash-loop / exit tracking public lastExitCode: string | null = null; private _restartTimestamps: number[] = []; + private _cachedRestartCount: number = 0; + private _restartCacheTime: number = 0; constructor( public readonly id: string, @@ -69,6 +72,7 @@ export class Container { this.movingAverageStat = ref(stats.at(-1) || defaultStat); const logPadding = Array(LOG_STATS_HISTORY_SIZE).fill(defaultLogFreq); this._logStatsHistory = ref(logPadding); + this._logStatsVersion = ref(0); this._name = name; } @@ -77,6 +81,10 @@ export class Container { return unref(this._logStatsHistory); } + get logStatsVersion(): number { + return isRef(this._logStatsVersion) ? this._logStatsVersion.value : (this._logStatsVersion as unknown as number); + } + get statsHistory() { return unref(this._statsHistory); } @@ -172,6 +180,11 @@ export class Container { if (history.length > LOG_STATS_HISTORY_SIZE) { history.shift(); } + if (isRef(this._logStatsVersion)) { + this._logStatsVersion.value++; + } else { + (this._logStatsVersion as unknown as number)++; + } } public recordDie(exitCode?: string) { @@ -188,10 +201,16 @@ export class Container { get restartCount(): number { const now = Date.now(); + // Re-compute at most once per second to avoid repeated Date.now()+filter calls + if (now - this._restartCacheTime < 1000) { + return this._cachedRestartCount; + } const cutoff = now - RESTART_WINDOW_MS; // Prune stale timestamps while counting this._restartTimestamps = this._restartTimestamps.filter((t) => t > cutoff); - return this._restartTimestamps.length; + this._cachedRestartCount = this._restartTimestamps.length; + this._restartCacheTime = now; + return this._cachedRestartCount; } get isCrashLooping(): boolean { diff --git a/internal/container/container_store.go b/internal/container/container_store.go index 15801a06727f..c0207da87afe 100644 --- a/internal/container/container_store.go +++ b/internal/container/container_store.go @@ -328,19 +328,37 @@ func (s *ContainerStore) collectLogStats() { now := time.Now() since := now.Add(-logStatsInterval) + var wg sync.WaitGroup + sem := make(chan struct{}, 5) // max 5 concurrent + var mu sync.Mutex + var results []LogStat + for _, t := range targets { - ctx, cancel := context.WithTimeout(s.ctx, 2*time.Second) - reader, err := s.client.ContainerLogsBetweenDates(ctx, t.id, since, now, STDALL) - if err != nil { - log.Debug().Err(err).Str("id", t.id).Msg("failed to fetch logs for log stats") - cancel() - continue - } + wg.Add(1) + sem <- struct{}{} // acquire + go func(t idTty) { + defer wg.Done() + defer func() { <-sem }() // release + + ctx, cancel := context.WithTimeout(s.ctx, 2*time.Second) + defer cancel() + reader, err := s.client.ContainerLogsBetweenDates(ctx, t.id, since, now, STDALL) + if err != nil { + log.Debug().Err(err).Str("id", t.id).Msg("failed to fetch logs for log stats") + return + } + stat := s.collectLogStatForContainer(ctx, t.id, t.tty, reader) + reader.Close() - stat := s.collectLogStatForContainer(ctx, t.id, t.tty, reader) - reader.Close() - cancel() + mu.Lock() + results = append(results, stat) + mu.Unlock() + }(t) + } + wg.Wait() + // Publish all results + for _, stat := range results { s.logStatSubscribers.Range(func(c context.Context, ch chan<- LogStat) bool { select { case ch <- stat: From bc456b80aebc71fad0a1c8ada0e53c807b78719d Mon Sep 17 00:00:00 2001 From: Z Date: Tue, 31 Mar 2026 10:28:53 +0900 Subject: [PATCH 8/9] fix: reactive proxy increment no-op and semaphore context leak - _logStatsVersion: use property assignment instead of postfix++ on cast expression, which is a no-op when Vue reactive() unwraps the ref - _logStatsVersion: change from readonly to private readonly - collectLogStats: use select with s.ctx.Done() when acquiring semaphore to avoid blocking the loop on context cancellation Co-Authored-By: Zhuge --- assets/models/Container.ts | 5 +++-- internal/container/container_store.go | 7 ++++++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/assets/models/Container.ts b/assets/models/Container.ts index d288d231b255..f28f101403c1 100644 --- a/assets/models/Container.ts +++ b/assets/models/Container.ts @@ -38,7 +38,7 @@ export class Container { private readonly _statsHistory: Ref; private readonly movingAverageStat: Ref; private readonly _logStatsHistory: Ref; - readonly _logStatsVersion: Ref; + private readonly _logStatsVersion: Ref; // Crash-loop / exit tracking public lastExitCode: string | null = null; @@ -183,7 +183,8 @@ export class Container { if (isRef(this._logStatsVersion)) { this._logStatsVersion.value++; } else { - (this._logStatsVersion as unknown as number)++; + // When unwrapped by reactive(), assign via property to trigger reactivity + (this as any)._logStatsVersion = (this._logStatsVersion as unknown as number) + 1; } } diff --git a/internal/container/container_store.go b/internal/container/container_store.go index c0207da87afe..7dce03dc09fe 100644 --- a/internal/container/container_store.go +++ b/internal/container/container_store.go @@ -333,9 +333,14 @@ func (s *ContainerStore) collectLogStats() { var mu sync.Mutex var results []LogStat +loop: for _, t := range targets { + select { + case sem <- struct{}{}: // acquire + case <-s.ctx.Done(): + break loop + } wg.Add(1) - sem <- struct{}{} // acquire go func(t idTty) { defer wg.Done() defer func() { <-sem }() // release From 74ba36aa0e8573eaf1ea2a64bb69e368d8449b89 Mon Sep 17 00:00:00 2001 From: Z Date: Tue, 31 Mar 2026 15:36:00 +0900 Subject: [PATCH 9/9] ci: allow bot actors in claude-code-review workflow Add allowed_bots: '*' to claude-code-action step to prevent rejection of PRs opened by zhuge-liang-bot. Co-Authored-By: Zhuge --- .github/workflows/claude-code-review.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/claude-code-review.yml b/.github/workflows/claude-code-review.yml index abd1b9b15167..7edc5acd18d3 100644 --- a/.github/workflows/claude-code-review.yml +++ b/.github/workflows/claude-code-review.yml @@ -36,6 +36,7 @@ jobs: uses: anthropics/claude-code-action@v1 with: claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} + allowed_bots: '*' prompt: | REPO: ${{ github.repository }} PR NUMBER: ${{ github.event.pull_request.number }}