diff --git a/CLAUDE.md b/CLAUDE.md
index 995e7c74..c88029c2 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -266,6 +266,7 @@ Conventions:
 - Suite naming: `*_public_test.go` → `{Name}PublicTestSuite`,
   `*_test.go` → `{Name}TestSuite`
 - Table-driven structure with `validateFunc` callbacks
+- One suite method per function under test — all scenarios (success, errors, edge cases) as rows in one table
 - Avoid generic file names like `helpers.go` or `utils.go` — name
   files after what they contain
 
diff --git a/README.md b/README.md
index 28064460..cff63853 100644
--- a/README.md
+++ b/README.md
@@ -30,6 +30,7 @@ them to be used as appliances.
 | 🌐 **[Network Management][]** | DNS read/update, ping |
 | ⚙️ **[Command Execution][]** | Remote exec and shell across managed hosts |
 | 📊 **[System Facts][]** | Agent-collected system facts — architecture, kernel, FQDN, CPUs, network interfaces, service/package manager |
+| 🔄 **[Agent Lifecycle][]** | Node conditions (memory, disk, load pressure), graceful drain/cordon for maintenance |
 | ⚡ **[Async Job System][]** | NATS JetStream with KV-first architecture — broadcast, load-balanced, and label-based routing across hosts |
 | 💚 **[Health][] & [Metrics][]** | Liveness, readiness, system status endpoints, Prometheus `/metrics` |
 | 📋 **[Audit Logging][]** | Structured API audit trail in NATS KV with 30-day retention and admin-only read access |
@@ -77,5 +78,6 @@ them to be used as appliances.
 
 The [MIT][] License.
 
+[Agent Lifecycle]: https://osapi-io.github.io/osapi/sidebar/features/agent-lifecycle
 [System Facts]: https://osapi-io.github.io/osapi/sidebar/features/node-management
 [MIT]: LICENSE
diff --git a/cmd/api_helpers.go b/cmd/api_helpers.go
index c07f2811..72254b8a 100644
--- a/cmd/api_helpers.go
+++ b/cmd/api_helpers.go
@@ -74,6 +74,7 @@ type natsBundle struct {
 	jobsKV     jetstream.KeyValue
 	registryKV jetstream.KeyValue
 	factsKV    jetstream.KeyValue
+	stateKV    jetstream.KeyValue
 }
 
 // setupAPIServer connects to NATS, creates the API server with all handlers,
@@ -112,7 +113,7 @@ func setupAPIServer(
 	checker := newHealthChecker(b.nc, b.jobsKV)
 	auditStore, auditKV, serverOpts := createAuditStore(ctx, log, b.nc, namespace)
 	metricsProvider := newMetricsProvider(
-		b.nc, b.jobsKV, b.registryKV, b.factsKV, auditKV, streamName, b.jobClient,
+		b.nc, b.jobsKV, b.registryKV, b.factsKV, b.stateKV, auditKV, streamName, b.jobClient,
 	)
 
 	sm := api.New(appConfig, log, serverOpts...)
@@ -163,11 +164,21 @@ func connectNATSBundle(
 		}
 	}
 
+	var stateKV jetstream.KeyValue
+	if appConfig.NATS.State.Bucket != "" {
+		stateKVConfig := cli.BuildStateKVConfig(namespace, appConfig.NATS.State)
+		stateKV, err = nc.CreateOrUpdateKVBucketWithConfig(ctx, stateKVConfig)
+		if err != nil {
+			cli.LogFatal(log, "failed to create state KV bucket", err)
+		}
+	}
+
 	jc, err := jobclient.New(log, nc, &jobclient.Options{
 		Timeout:    30 * time.Second,
 		KVBucket:   jobsKV,
 		RegistryKV: registryKV,
 		FactsKV:    factsKV,
+		StateKV:    stateKV,
 		StreamName: streamName,
 	})
 	if err != nil {
@@ -180,6 +191,7 @@ func connectNATSBundle(
 		jobsKV:     jobsKV,
 		registryKV: registryKV,
 		factsKV:    factsKV,
+		stateKV:    stateKV,
 	}
 }
 
@@ -216,6 +228,7 @@ func newMetricsProvider(
 	jobsKV jetstream.KeyValue,
 	registryKV jetstream.KeyValue,
 	factsKV jetstream.KeyValue,
+	stateKV jetstream.KeyValue,
 	auditKV jetstream.KeyValue,
 	streamName string,
 	jc jobclient.JobClient,
@@ -254,7 +267,7 @@ func newMetricsProvider(
 			}, nil
 		},
 		KVInfoFn: func(fnCtx context.Context) ([]health.KVMetrics, error) {
-			buckets := []jetstream.KeyValue{jobsKV, registryKV, factsKV, auditKV}
+			buckets := []jetstream.KeyValue{jobsKV, registryKV, factsKV, stateKV, auditKV}
 			results := make([]health.KVMetrics, 0, len(buckets))
 
 			for _, kv := range buckets {
diff --git a/cmd/client_agent_drain.go b/cmd/client_agent_drain.go
new file mode 100644
index 00000000..c59c167e
--- /dev/null
+++ b/cmd/client_agent_drain.go
@@ -0,0 +1,61 @@
+// Copyright (c) 2026 John Dewey
+
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to
+// deal in the Software without restriction, including without limitation the
+// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+// sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+// DEALINGS IN THE SOFTWARE.
+
+package cmd
+
+import (
+	"fmt"
+
+	"github.com/spf13/cobra"
+
+	"github.com/retr0h/osapi/internal/cli"
+)
+
+// clientAgentDrainCmd represents the clientAgentDrain command.
+var clientAgentDrainCmd = &cobra.Command{
+	Use:   "drain",
+	Short: "Drain an agent",
+	Long:  `Stop an agent from accepting new jobs. In-flight jobs continue to completion.`,
+	Run: func(cmd *cobra.Command, _ []string) {
+		ctx := cmd.Context()
+		hostname, _ := cmd.Flags().GetString("hostname")
+
+		resp, err := sdkClient.Agent.Drain(ctx, hostname)
+		if err != nil {
+			cli.HandleError(err, logger)
+			return
+		}
+
+		if jsonOutput {
+			fmt.Println(string(resp.RawJSON()))
+			return
+		}
+
+		fmt.Println()
+		cli.PrintKV("Hostname", hostname, "Status", "Draining")
+		cli.PrintKV("Message", resp.Data.Message)
+	},
+}
+
+func init() {
+	clientAgentCmd.AddCommand(clientAgentDrainCmd)
+	clientAgentDrainCmd.Flags().String("hostname", "", "Hostname of the agent to drain")
+	_ = clientAgentDrainCmd.MarkFlagRequired("hostname")
+}
diff --git a/cmd/client_agent_get.go b/cmd/client_agent_get.go
index 52232ccc..dd3c0be9 100644
--- a/cmd/client_agent_get.go
+++ b/cmd/client_agent_get.go
@@ -64,6 +64,10 @@ func displayAgentGetDetail(
 	kvArgs := []string{"Hostname", data.Hostname, "Status", data.Status}
 	cli.PrintKV(kvArgs...)
 
+	if data.State != "" && data.State != "Ready" {
+		cli.PrintKV("State", data.State)
+	}
+
 	if len(data.Labels) > 0 {
 		cli.PrintKV("Labels", cli.FormatLabels(data.Labels))
 	}
@@ -138,6 +142,48 @@ func displayAgentGetDetail(
 			cli.PrintKV("Interface "+iface.Name, strings.Join(parts, "  "))
 		}
 	}
+
+	var sections []cli.Section
+
+	if len(data.Conditions) > 0 {
+		condRows := make([][]string, 0, len(data.Conditions))
+		for _, c := range data.Conditions {
+			status := "false"
+			if c.Status {
+				status = "true"
+			}
+			reason := c.Reason
+			since := ""
+			if !c.LastTransitionTime.IsZero() {
+				since = cli.FormatAge(time.Since(c.LastTransitionTime)) + " ago"
+			}
+			condRows = append(condRows, []string{c.Type, status, reason, since})
+		}
+		sections = append(sections, cli.Section{
+			Title:   "Conditions",
+			Headers: []string{"TYPE", "STATUS", "REASON", "SINCE"},
+			Rows:    condRows,
+		})
+	}
+
+	if len(data.Timeline) > 0 {
+		timelineRows := make([][]string, 0, len(data.Timeline))
+		for _, te := range data.Timeline {
+			timelineRows = append(
+				timelineRows,
+				[]string{te.Timestamp, te.Event, te.Hostname, te.Message, te.Error},
+			)
+		}
+		sections = append(sections, cli.Section{
+			Title:   "Timeline",
+			Headers: []string{"TIMESTAMP", "EVENT", "HOSTNAME", "MESSAGE", "ERROR"},
+			Rows:    timelineRows,
+		})
+	}
+
+	for _, sec := range sections {
+		cli.PrintCompactTable([]cli.Section{sec})
+	}
 }
 
 func init() {
diff --git a/cmd/client_agent_list.go b/cmd/client_agent_list.go
index 89ac4ccc..c43a5c56 100644
--- a/cmd/client_agent_list.go
+++ b/cmd/client_agent_list.go
@@ -22,6 +22,7 @@ package cmd
 
 import (
 	"fmt"
+	"strings"
 	"time"
 
 	"github.com/spf13/cobra"
@@ -57,6 +58,22 @@ Shows each agent's hostname, status, labels, age, load, and OS.`,
 
 		rows := make([][]string, 0, len(agents))
 		for _, a := range agents {
+			status := a.State
+			if status == "" {
+				status = "Ready"
+			}
+			conditions := "-"
+			if len(a.Conditions) > 0 {
+				active := make([]string, 0)
+				for _, c := range a.Conditions {
+					if c.Status {
+						active = append(active, c.Type)
+					}
+				}
+				if len(active) > 0 {
+					conditions = strings.Join(active, ",")
+				}
+			}
 			labels := cli.FormatLabels(a.Labels)
 			age := ""
 			if !a.StartedAt.IsZero() {
@@ -72,7 +89,8 @@ Shows each agent's hostname, status, labels, age, load, and OS.`,
 			}
 			rows = append(rows, []string{
 				a.Hostname,
-				a.Status,
+				status,
+				conditions,
 				labels,
 				age,
 				loadStr,
@@ -82,9 +100,17 @@ Shows each agent's hostname, status, labels, age, load, and OS.`,
 
 		sections := []cli.Section{
 			{
-				Title:   fmt.Sprintf("Active Agents (%d)", resp.Data.Total),
-				Headers: []string{"HOSTNAME", "STATUS", "LABELS", "AGE", "LOAD (1m)", "OS"},
-				Rows:    rows,
+				Title: fmt.Sprintf("Active Agents (%d)", resp.Data.Total),
+				Headers: []string{
+					"HOSTNAME",
+					"STATUS",
+					"CONDITIONS",
+					"LABELS",
+					"AGE",
+					"LOAD (1m)",
+					"OS",
+				},
+				Rows: rows,
 			},
 		}
 		cli.PrintCompactTable(sections)
diff --git a/cmd/client_agent_undrain.go b/cmd/client_agent_undrain.go
new file mode 100644
index 00000000..7f668f03
--- /dev/null
+++ b/cmd/client_agent_undrain.go
@@ -0,0 +1,61 @@
+// Copyright (c) 2026 John Dewey
+
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to
+// deal in the Software without restriction, including without limitation the
+// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+// sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+// DEALINGS IN THE SOFTWARE.
+
+package cmd
+
+import (
+	"fmt"
+
+	"github.com/spf13/cobra"
+
+	"github.com/retr0h/osapi/internal/cli"
+)
+
+// clientAgentUndrainCmd represents the clientAgentUndrain command.
+var clientAgentUndrainCmd = &cobra.Command{
+	Use:   "undrain",
+	Short: "Undrain an agent",
+	Long:  `Resume accepting jobs on a drained agent.`,
+	Run: func(cmd *cobra.Command, _ []string) {
+		ctx := cmd.Context()
+		hostname, _ := cmd.Flags().GetString("hostname")
+
+		resp, err := sdkClient.Agent.Undrain(ctx, hostname)
+		if err != nil {
+			cli.HandleError(err, logger)
+			return
+		}
+
+		if jsonOutput {
+			fmt.Println(string(resp.RawJSON()))
+			return
+		}
+
+		fmt.Println()
+		cli.PrintKV("Hostname", hostname, "Status", "Ready")
+		cli.PrintKV("Message", resp.Data.Message)
+	},
+}
+
+func init() {
+	clientAgentCmd.AddCommand(clientAgentUndrainCmd)
+	clientAgentUndrainCmd.Flags().String("hostname", "", "Hostname of the agent to undrain")
+	_ = clientAgentUndrainCmd.MarkFlagRequired("hostname")
+}
diff --git a/cmd/nats_helpers.go b/cmd/nats_helpers.go
index 1f189efc..2fbce3c5 100644
--- a/cmd/nats_helpers.go
+++ b/cmd/nats_helpers.go
@@ -172,6 +172,14 @@ func setupJetStream(
 		}
 	}
 
+	// Create state KV bucket with configured settings (no TTL)
+	if appConfig.NATS.State.Bucket != "" {
+		stateKVConfig := cli.BuildStateKVConfig(namespace, appConfig.NATS.State)
+		if _, err := nc.CreateOrUpdateKVBucketWithConfig(ctx, stateKVConfig); err != nil {
+			return fmt.Errorf("create state KV bucket %s: %w", stateKVConfig.Bucket, err)
+		}
+	}
+
 	// Create DLQ stream
 	dlqMaxAge, _ := time.ParseDuration(appConfig.NATS.DLQ.MaxAge)
 	dlqStorage := cli.ParseJetstreamStorageType(appConfig.NATS.DLQ.Storage)
diff --git a/configs/osapi.yaml b/configs/osapi.yaml
index e037a11a..3a09cd5a 100644
--- a/configs/osapi.yaml
+++ b/configs/osapi.yaml
@@ -98,6 +98,11 @@ nats:
     storage: file
     replicas: 1
 
+  state:
+    bucket: agent-state
+    storage: file
+    replicas: 1
+
 telemetry:
   tracing:
     enabled: true
@@ -133,3 +138,7 @@ agent:
     group: web.dev.us-east  # hierarchical: --target group:web, group:web.dev, etc.
   facts:
     interval: 60s
+  conditions:
+    memory_pressure_threshold: 90
+    high_load_multiplier: 2.0
+    disk_pressure_threshold: 90
diff --git a/docs/docs/gen/api/drain-agent.api.mdx b/docs/docs/gen/api/drain-agent.api.mdx
new file mode 100644
index 00000000..3e2e3d21
--- /dev/null
+++ b/docs/docs/gen/api/drain-agent.api.mdx
@@ -0,0 +1,525 @@
+---
+id: drain-agent
+title: "Drain an agent"
+description: "Stop the agent from accepting new jobs. In-flight jobs continue to completion."
+sidebar_label: "Drain an agent"
+hide_title: true
+hide_table_of_contents: true
+api: eJztVsFu20YQ/ZXFnFqAlpQ0PZQ3FakBBQhixAp6cAVjxB2Ja5O7zOzQjkrw34NZyjJt2UEOCdACPmmX3Jl5895w9ToIDTGKC35hIQfL6Px8S14gA0uxYNfoS8jhXEJjpCSD+tpsONQGi4IacX5rPN2aq7COE7PwJ5vKbUtJe1MEL863ZCSYItRNRZpv8o+HDAS3EfILSAUv36PHLdW6nJ8tLlOZywO8CKsMIhUtO9lBftHBn4RMPG+l1BzpeH7LTghW/SqDBhlrEuKYTnusCXIoQ5S0zMBpVw1KCRkwfW4dk4VcuKUMYlFSjZB3ILtG46Kw81voH7OyLMnc5TRhMyJIgklsTkDRMMUm+EhRc76ezfTnYaZEwhBinHfiUMhOIANlUAXJO8CmqVyR+JheRQ3rjqGG9RUVql/Dyp64oWhNMeKWnuipH/d/cTi46nt99Wb26hjsJ4+tlIHdv2TNiZmfLcw17cwhzQ9DTcyBjzE/lmFuRvs7HVKskRLFhKJomQc+6QvqGEIOp+gqsqoUk7CjGzJRUNo4GYQWdFX8juLWOl1iZfYxBtehlXsQT5a1w0fhSW4DXxtxNYVWUuki2LFQzgttiY8KLw9NasCDIr/PZiqeOEkl/9JTH/czCHfC/nYs7GngtbOWvDkxCx/bzcYVTgezIa5djOlLfFH3/6Dum+fuGB/EbELrf+Tt8qLkT1Tyj+eUxIoJ7c44n65eikI2UUMv0v73pe0zqEnKoMarCTExr3Ykh2myENPuzln00+QLQB0Q3wyWZmSHzlXJQayxKTqAL0Ua2Jsa3a/TIcj2i9PANQrk8O7vZXIDzm9CCt9DH4bt3p/p/z1koEAGFl5NZpOZsqZt1JjGa2+43iZDg35wRY/56+6H9KcYzKF/oS8ybSplsM+g5UoLD1TvjSNkkI+s4cD2Kkt2UQ913RojfeKq7/Xx55Z4N2hwg+xwrTRddGBd1LWFfINVpG80+8vHvVX61XyPhXymlf1D9DuVA6tWd5DBNe3GXrdf9RmUhJY4wRxezxO3o8CjS0KN62FCzz6cLyEDfDhaj0YppX8SVdcNJ5bhmnzfH0CK7hVh338FZL5baA==
+sidebar_class_name: "post api-method"
+info_path: gen/api/agent-management-api
+custom_edit_url: null
+---
+
+import ApiTabs from "@theme/ApiTabs";
+import DiscriminatorTabs from "@theme/DiscriminatorTabs";
+import MethodEndpoint from "@theme/ApiExplorer/MethodEndpoint";
+import SecuritySchemes from "@theme/ApiExplorer/SecuritySchemes";
+import MimeTabs from "@theme/MimeTabs";
+import ParamsItem from "@theme/ParamsItem";
+import ResponseSamples from "@theme/ResponseSamples";
+import SchemaItem from "@theme/SchemaItem";
+import SchemaTabs from "@theme/SchemaTabs";
+import Heading from "@theme/Heading";
+import OperationTabs from "@theme/OperationTabs";
+import TabItem from "@theme/TabItem";
+
+<Heading
+  as={"h1"}
+  className={"openapi__heading"}
+  children={"Drain an agent"}
+>
+</Heading>
+
+<MethodEndpoint
+  method={"post"}
+  path={"/agent/{hostname}/drain"}
+>
+  
+</MethodEndpoint>
+
+
+
+Stop the agent from accepting new jobs. In-flight jobs continue to completion.
+
+
+<Heading
+  id={"request"}
+  as={"h2"}
+  className={"openapi-tabs__heading"}
+  children={"Request"}
+>
+</Heading>
+
+<details
+  style={{"marginBottom":"1rem"}}
+  className={"openapi-markdown__details"}
+  data-collapsed={false}
+  open={true}
+>
+  <summary
+    style={{}}
+  >
+    <h3
+      className={"openapi-markdown__details-summary-header-params"}
+    >
+      Path Parameters
+    </h3>
+  </summary><div>
+    <ul>
+      <ParamsItem
+        className={"paramsItem"}
+        param={{"name":"hostname","in":"path","required":true,"schema":{"type":"string"},"description":"The hostname of the agent to drain."}}
+      >
+        
+      </ParamsItem>
+    </ul>
+  </div>
+</details><div>
+  <div>
+    <ApiTabs
+      label={undefined}
+      id={undefined}
+    >
+      <TabItem
+        label={"200"}
+        value={"200"}
+      >
+        <div>
+          
+          
+          Agent drain initiated.
+          
+          
+        </div><div>
+          <MimeTabs
+            className={"openapi-tabs__mime"}
+            schemaType={"response"}
+          >
+            <TabItem
+              label={"application/json"}
+              value={"application/json"}
+            >
+              <SchemaTabs
+                className={"openapi-tabs__schema"}
+              >
+                <TabItem
+                  label={"Schema"}
+                  value={"Schema"}
+                >
+                  <details
+                    style={{}}
+                    className={"openapi-markdown__details response"}
+                    data-collapsed={false}
+                    open={true}
+                  >
+                    <summary
+                      style={{}}
+                      className={"openapi-markdown__details-summary-response"}
+                    >
+                      <strong>
+                        Schema
+                      </strong>
+                    </summary><div
+                      style={{"textAlign":"left","marginLeft":"1rem"}}
+                    >
+                      
+                    </div><ul
+                      style={{"marginLeft":"1rem"}}
+                    >
+                      <SchemaItem
+                        collapsible={false}
+                        name={"message"}
+                        required={true}
+                        schemaName={"string"}
+                        qualifierMessage={undefined}
+                        schema={{"type":"string"}}
+                      >
+                        
+                      </SchemaItem>
+                    </ul>
+                  </details>
+                </TabItem><TabItem
+                  label={"Example (from schema)"}
+                  value={"Example (from schema)"}
+                >
+                  <ResponseSamples
+                    responseExample={"{\n  \"message\": \"string\"\n}"}
+                    language={"json"}
+                  >
+                    
+                  </ResponseSamples>
+                </TabItem>
+              </SchemaTabs>
+            </TabItem>
+          </MimeTabs>
+        </div>
+      </TabItem><TabItem
+        label={"401"}
+        value={"401"}
+      >
+        <div>
+          
+          
+          Unauthorized - API key required
+          
+          
+        </div><div>
+          <MimeTabs
+            className={"openapi-tabs__mime"}
+            schemaType={"response"}
+          >
+            <TabItem
+              label={"application/json"}
+              value={"application/json"}
+            >
+              <SchemaTabs
+                className={"openapi-tabs__schema"}
+              >
+                <TabItem
+                  label={"Schema"}
+                  value={"Schema"}
+                >
+                  <details
+                    style={{}}
+                    className={"openapi-markdown__details response"}
+                    data-collapsed={false}
+                    open={true}
+                  >
+                    <summary
+                      style={{}}
+                      className={"openapi-markdown__details-summary-response"}
+                    >
+                      <strong>
+                        Schema
+                      </strong>
+                    </summary><div
+                      style={{"textAlign":"left","marginLeft":"1rem"}}
+                    >
+                      
+                    </div><ul
+                      style={{"marginLeft":"1rem"}}
+                    >
+                      <SchemaItem
+                        collapsible={false}
+                        name={"error"}
+                        required={false}
+                        schemaName={"string"}
+                        qualifierMessage={undefined}
+                        schema={{"type":"string","description":"A description of the error that occurred.","example":"Failed to retrieve status."}}
+                      >
+                        
+                      </SchemaItem><SchemaItem
+                        collapsible={false}
+                        name={"details"}
+                        required={false}
+                        schemaName={"string"}
+                        qualifierMessage={undefined}
+                        schema={{"type":"string","description":"Additional details about the error.","example":"Failed due to network timeout."}}
+                      >
+                        
+                      </SchemaItem><SchemaItem
+                        collapsible={false}
+                        name={"code"}
+                        required={false}
+                        schemaName={"integer"}
+                        qualifierMessage={undefined}
+                        schema={{"type":"integer","description":"The error code.","example":500}}
+                      >
+                        
+                      </SchemaItem>
+                    </ul>
+                  </details>
+                </TabItem><TabItem
+                  label={"Example (from schema)"}
+                  value={"Example (from schema)"}
+                >
+                  <ResponseSamples
+                    responseExample={"{\n  \"error\": \"Failed to retrieve status.\",\n  \"details\": \"Failed due to network timeout.\",\n  \"code\": 500\n}"}
+                    language={"json"}
+                  >
+                    
+                  </ResponseSamples>
+                </TabItem>
+              </SchemaTabs>
+            </TabItem>
+          </MimeTabs>
+        </div>
+      </TabItem><TabItem
+        label={"403"}
+        value={"403"}
+      >
+        <div>
+          
+          
+          Forbidden - Insufficient permissions
+          
+          
+        </div><div>
+          <MimeTabs
+            className={"openapi-tabs__mime"}
+            schemaType={"response"}
+          >
+            <TabItem
+              label={"application/json"}
+              value={"application/json"}
+            >
+              <SchemaTabs
+                className={"openapi-tabs__schema"}
+              >
+                <TabItem
+                  label={"Schema"}
+                  value={"Schema"}
+                >
+                  <details
+                    style={{}}
+                    className={"openapi-markdown__details response"}
+                    data-collapsed={false}
+                    open={true}
+                  >
+                    <summary
+                      style={{}}
+                      className={"openapi-markdown__details-summary-response"}
+                    >
+                      <strong>
+                        Schema
+                      </strong>
+                    </summary><div
+                      style={{"textAlign":"left","marginLeft":"1rem"}}
+                    >
+                      
+                    </div><ul
+                      style={{"marginLeft":"1rem"}}
+                    >
+                      <SchemaItem
+                        collapsible={false}
+                        name={"error"}
+                        required={false}
+                        schemaName={"string"}
+                        qualifierMessage={undefined}
+                        schema={{"type":"string","description":"A description of the error that occurred.","example":"Failed to retrieve status."}}
+                      >
+                        
+                      </SchemaItem><SchemaItem
+                        collapsible={false}
+                        name={"details"}
+                        required={false}
+                        schemaName={"string"}
+                        qualifierMessage={undefined}
+                        schema={{"type":"string","description":"Additional details about the error.","example":"Failed due to network timeout."}}
+                      >
+                        
+                      </SchemaItem><SchemaItem
+                        collapsible={false}
+                        name={"code"}
+                        required={false}
+                        schemaName={"integer"}
+                        qualifierMessage={undefined}
+                        schema={{"type":"integer","description":"The error code.","example":500}}
+                      >
+                        
+                      </SchemaItem>
+                    </ul>
+                  </details>
+                </TabItem><TabItem
+                  label={"Example (from schema)"}
+                  value={"Example (from schema)"}
+                >
+                  <ResponseSamples
+                    responseExample={"{\n  \"error\": \"Failed to retrieve status.\",\n  \"details\": \"Failed due to network timeout.\",\n  \"code\": 500\n}"}
+                    language={"json"}
+                  >
+                    
+                  </ResponseSamples>
+                </TabItem>
+              </SchemaTabs>
+            </TabItem>
+          </MimeTabs>
+        </div>
+      </TabItem><TabItem
+        label={"404"}
+        value={"404"}
+      >
+        <div>
+          
+          
+          Agent not found.
+          
+          
+        </div><div>
+          <MimeTabs
+            className={"openapi-tabs__mime"}
+            schemaType={"response"}
+          >
+            <TabItem
+              label={"application/json"}
+              value={"application/json"}
+            >
+              <SchemaTabs
+                className={"openapi-tabs__schema"}
+              >
+                <TabItem
+                  label={"Schema"}
+                  value={"Schema"}
+                >
+                  <details
+                    style={{}}
+                    className={"openapi-markdown__details response"}
+                    data-collapsed={false}
+                    open={true}
+                  >
+                    <summary
+                      style={{}}
+                      className={"openapi-markdown__details-summary-response"}
+                    >
+                      <strong>
+                        Schema
+                      </strong>
+                    </summary><div
+                      style={{"textAlign":"left","marginLeft":"1rem"}}
+                    >
+                      
+                    </div><ul
+                      style={{"marginLeft":"1rem"}}
+                    >
+                      <SchemaItem
+                        collapsible={false}
+                        name={"error"}
+                        required={false}
+                        schemaName={"string"}
+                        qualifierMessage={undefined}
+                        schema={{"type":"string","description":"A description of the error that occurred.","example":"Failed to retrieve status."}}
+                      >
+                        
+                      </SchemaItem><SchemaItem
+                        collapsible={false}
+                        name={"details"}
+                        required={false}
+                        schemaName={"string"}
+                        qualifierMessage={undefined}
+                        schema={{"type":"string","description":"Additional details about the error.","example":"Failed due to network timeout."}}
+                      >
+                        
+                      </SchemaItem><SchemaItem
+                        collapsible={false}
+                        name={"code"}
+                        required={false}
+                        schemaName={"integer"}
+                        qualifierMessage={undefined}
+                        schema={{"type":"integer","description":"The error code.","example":500}}
+                      >
+                        
+                      </SchemaItem>
+                    </ul>
+                  </details>
+                </TabItem><TabItem
+                  label={"Example (from schema)"}
+                  value={"Example (from schema)"}
+                >
+                  <ResponseSamples
+                    responseExample={"{\n  \"error\": \"Failed to retrieve status.\",\n  \"details\": \"Failed due to network timeout.\",\n  \"code\": 500\n}"}
+                    language={"json"}
+                  >
+                    
+                  </ResponseSamples>
+                </TabItem>
+              </SchemaTabs>
+            </TabItem>
+          </MimeTabs>
+        </div>
+      </TabItem><TabItem
+        label={"409"}
+        value={"409"}
+      >
+        <div>
+          
+          
+          Agent already in requested state.
+          
+          
+        </div><div>
+          <MimeTabs
+            className={"openapi-tabs__mime"}
+            schemaType={"response"}
+          >
+            <TabItem
+              label={"application/json"}
+              value={"application/json"}
+            >
+              <SchemaTabs
+                className={"openapi-tabs__schema"}
+              >
+                <TabItem
+                  label={"Schema"}
+                  value={"Schema"}
+                >
+                  <details
+                    style={{}}
+                    className={"openapi-markdown__details response"}
+                    data-collapsed={false}
+                    open={true}
+                  >
+                    <summary
+                      style={{}}
+                      className={"openapi-markdown__details-summary-response"}
+                    >
+                      <strong>
+                        Schema
+                      </strong>
+                    </summary><div
+                      style={{"textAlign":"left","marginLeft":"1rem"}}
+                    >
+                      
+                    </div><ul
+                      style={{"marginLeft":"1rem"}}
+                    >
+                      <SchemaItem
+                        collapsible={false}
+                        name={"error"}
+                        required={false}
+                        schemaName={"string"}
+                        qualifierMessage={undefined}
+                        schema={{"type":"string","description":"A description of the error that occurred.","example":"Failed to retrieve status."}}
+                      >
+                        
+                      </SchemaItem><SchemaItem
+                        collapsible={false}
+                        name={"details"}
+                        required={false}
+                        schemaName={"string"}
+                        qualifierMessage={undefined}
+                        schema={{"type":"string","description":"Additional details about the error.","example":"Failed due to network timeout."}}
+                      >
+                        
+                      </SchemaItem><SchemaItem
+                        collapsible={false}
+                        name={"code"}
+                        required={false}
+                        schemaName={"integer"}
+                        qualifierMessage={undefined}
+                        schema={{"type":"integer","description":"The error code.","example":500}}
+                      >
+                        
+                      </SchemaItem>
+                    </ul>
+                  </details>
+                </TabItem><TabItem
+                  label={"Example (from schema)"}
+                  value={"Example (from schema)"}
+                >
+                  <ResponseSamples
+                    responseExample={"{\n  \"error\": \"Failed to retrieve status.\",\n  \"details\": \"Failed due to network timeout.\",\n  \"code\": 500\n}"}
+                    language={"json"}
+                  >
+                    
+                  </ResponseSamples>
+                </TabItem>
+              </SchemaTabs>
+            </TabItem>
+          </MimeTabs>
+        </div>
+      </TabItem>
+    </ApiTabs>
+  </div>
+</div>
+      
\ No newline at end of file
diff --git a/docs/docs/gen/api/get-agent-details.api.mdx b/docs/docs/gen/api/get-agent-details.api.mdx
index 5769d8d5..2ef825ed 100644
--- a/docs/docs/gen/api/get-agent-details.api.mdx
+++ b/docs/docs/gen/api/get-agent-details.api.mdx
@@ -5,7 +5,7 @@ description: "Get detailed information about a specific agent by hostname."
 sidebar_label: "Get agent details"
 hide_title: true
 hide_table_of_contents: true
-api: eJztWEtv2zgQ/isET7uAokiJnU10y3abIrt9BG2CHoLAoMWRzUYiFXLkxmvovy+Gkm3JdhIv2h4K9GRZmvnmLX2cBZfgUqtKVEbzhL8BZBJQqBwkUzozthD0iImxqZAJ5kpIVaZSJiagkY3nbGocalFAyANuSrBe/lLyhE8Az0nqLw/oeMBRTBxPbrm/PXontJhAQZfnV5cjjzhaQTh+F3AHaWUVznlyu+B/grBgzyucEoYXTywIye/qu4CXwooCEKzzwuQST/jSOx5wRQGWAqc84BYeKmVB8gRtBQF36RQKwZMFx3lJeg6t0hNeBxsJup7CKmJmMoZTaFOBhllAq2AGISeHLLjSaAeOYI+iiH76YD4Nbb4d5S81GkEjSYqyzFXqM3H4xZH4YttLM/4CKfKAl5byhqoxtgp6K569wwkpdIcCK7cLBXRVUBE+gpBzHvD3BpvLu10W0spairTB27aTizHkbldcQkpFQCK/6kX4QpX+gfnBTOQVsAaapUZnalJZkMzoDesWJsohWJAjgbuCbaaAJ1wKhANUvpv6Bj9PoQPLcuGQWcgsuCkNEjo2BWFxDGKVWYvf0WBpTQrOsRbX2zBuRBO8K619rA/NyOkJc3OHUHQHP9xqLqnIz3HVKO/TYN3meqt09ci6GGQBHkVR5gRzM640VuT+DKzb20QrvK+VoyiMBryuuy+C235kawfuAo4Kvd6HT5c6Mx/bwSYvq9KXZy8n2+w2Kk3jGyFHYgZWTODlOnUwSJG1io5lxrI4YMOACS1ZPGSF0hWC2y5eXKhuRnVVjMFuGXrbQffglFPf03EL3UtnFB4f1QEffiN2z+0O+NFpHfD4W9HjJ+GHm33gk9TGs7TcaQKycd6Y6HZCAYWx85er+M7LscqRi89OGhoUeQdQaYTJjqCvSY419pnSbDzfjPL0+PT0JKI8ZhZgD8gLC/As4lF09kc8pLJXjrL2IuKNA/ks4iA+GxxHg81iNEloHW+tdYrRZLNbB2HTqUJIsbJ7zOWrqxvW1ei/J0QhTwYEeg9WQz7a+5304RNrVJZvpj7uMIyHYXRwFh9MQINVKdlIy2qUmkrjHtl879ufXne5mahU5OzV1c1GPqnaD3IPZy+qPJ+zh0rkKlMgmTSFUJotSd3a7a8wPojisL0Rpqbw3zKwM5XCqJjYl21daoXtS6yP3dyTBFiK9F5M9gS8aoRZ4cmk3ahfiQRIWbSZSHvcQVgriLoohGIn9+gP41N8am0McBp5a+Vs8LxkfHYUxienYRzGS42T5zUyOI2SJPbvGZE+LxtFSRwnR0fJ8XEyGCTDIWllolD5fI/6XDEhpSUu0aj0EyorP4xL+qc0oGfWgCcERU/vNifYZ64zsu8Bvxp7f7ksynp4vZ8p/g8m2LD3fgivHxG0BMk8FsusKdham6jSTEmwLtwiAJ3DQkt9O257sk6ff17XpDiI4m1Gf6NFhVNj1b8g2QE7v7pk9zBnKyPfjeKDtWaP6Thnnf9LduR1GU4FMpN6bi77Vb5ozn+dE01L3cOGajfHuZeNr5Pe6rTHyJUTO83KCsi0btqEEVkyVUObUyP3+Xpdr4IkhZ6RYRRR8ZZFfU1SnQZsCnu8XdgLY8dKStDsgF1qV2WZSpVn3mAL5Zw/sP6q7s9Q3cFTB3FtkGWm0r/G9Gco5HDXRsULLtNBJ1rxg1Ysvwr7gwrrT1I4Ne0CkRJPK7uEH/pSHi6WH+maN9SzWfh1doWfqIRNlbobw5XXU8SStys/+j/2QjxoLy6Wy5e/P197hrBaonSJAFsvL+kr31kXJDwOo9DzutI4LIRe00e/X+215GbuFusG/dZlbBsuwiMelrlQ2u8rrD9RNjltl6g84MmK+twFngbRw8ViLBzc2Lyu6fZDBXS+pVTPhFViTNm49QshupY8yUTuNulYN6DfPrY86He25x71iSiWFF4TgfeLPp5wTse0eXfnW9/VAZ+CkGC9p83j8zSFEjuKW+8B2t6umvDN62uin/0e2ugZj77TqcWikbg296DreuUj0n9ysK7/A+haFv8=
+api: eJztWdFu47YS/RWCT72A4pUSO83qLd0mvbltd4PdBH1YBAYtjiw2EqklR+66hv79YkjZlmwncZH2ocA+xbHIMzNnhuPD0YpLcJlVNSqjecp/AmQSUKgSJFM6N7YS9IiJmWmQCeZqyFSuMibmoJHNlqwwDrWoYMQjbmqwfv2N5CmfA17Sqh89oOMRRzF3PP3M/dfTX4UWc6jo4+XtzdQjTjcQjj9E3EHWWIVLnn5e8R9AWLCXDRaE4ZenFoTkD+1DxGthRQUI1vnF5BJP+do7HnFFAdYCCx5xC18aZUHyFG0DEXdZAZXg6YrjsqZ9Dq3Sc95GOwTdFbCJmJmcYQEdFWiYBbQKFjDi5JAFVxvtwBHsaRzTnyGYp6Hj2xF/mdEIGmmlqOtSZZ6JN787Wr7a99LMfocMecRrS7yhCsY2Qe/Fc3Q4IwrdocDGHUIB3VSUhI8g5JJH/L3B8PHhkIWssZYiDXj7dkoxg9IdiktIqQhIlLeDCF/I0s+wPFmIsgEWoFlmdK7mjQXJjN6xbmGuHIIFORV4KNhwCnjKpUA4QeWraWjwtwJ6sKwUDpmF3IIr6CChYwUIizMQG2Yt/o0Ga2sycI51uN6GcVM6wYdoHWJ9CEdOz5lbOoSqf/BHe8UlFfk5a8LmYwqsX1y/KN18ZX0MsgBfRVWXBHM/azQ25P4CrDvaRLf4WCun8Sge87btN4LPw8i2DjxEHBX6fR8+3ejcfOwONnnZ1D49RznZsRu2hMI3Qk7FAqyYw8t56mHQRtZtdCw3liURm0RMaMmSCauUbhDcfvKSSvUZ1U01A7tn6JceugcnTn1NJx30gM54dHbaRnzySuyB2z3w04s24slr0ZMn4Se7deBJ6uJZW+4VAdm4DCb6lVBBZezy5Sz+6texxpGLz540NCjKHqDSCPMDQd/ROhbsM6XZbLkb5cXZxcV5TDzmFuAIyGsL8Cziafz2+2RCaW8csfYi4r0D+SziOHk7PovHu8kIJHSOd9Z6yQhs9vMgbFYohAwbe8S5fHd7z/o7hn1CVPJ8TKCPYDWU06N70odPLGxZd6Yh7mSUTEbxydvkZA4arMrIRlY308w0Go9g870vf2p3pZmrTJTs3e39Dp+U7S/yCGevm7Jcsi+NKFWuQDJpKqE0W4u6rdt/wOwkTkbdF6PMVP63DOxCZTCt5vZlWzdaYdfEhtjhO0mAtcgexfxIwNuwmFVeTNqd/NVIgMSizUU20A7CWkHSRSFUB7XH8DA+pae2xgCL2FurF+PnVyZvT0fJ+cUoGSXrHefP78jhIk7TxPcZkT2/No7TJElPT9Ozs3Q8TicT2pWLSpXLI/Jzy4SUlrRE2DIkVDb+MK7ln9KAXlkDnhMUPX3YPcGeud6RfQ/4h7GPN+ukbA+v9zPDv6AEg3ofhnD1FUFLkMxjsdyaim13k1RaKAnWbSTu4bzuKNwfrVA6PHpnrDQ69KFDgp5kumxKL6gI3hvKjA4uvKIGw9OnfQ3N8JayR80v4v9V84J+rsh/5R43Tx4OifuZMSUIHSSxcIfanNfqDqdohXY+mulT8ueAht1r7bRl48gT0P3CMRLerWncF/1XpPgFgmTaSGBbwj3/BFYqDa9gX1XgUFT1seFGHBagDyj8NnrmiualhHNDRbh9BtaaA21xj9uNs2svekTedVxc+Qd7RHZVTKXLtulghXJo7HK0Z6t3x+5y2bPlwUg187aljeM42b8I32vRYGGs+hMkO2GXtzfsEZZsY+Rvuxk/wd4eA6z3//pS4fcyLAQyk/krrRw2x+swNukNArob7yhwHKYgLxvf9qpuTzd92Thx0KxsgEzr0F0ZFYBpsOs98hjRd7cJkjYMjEziuG23Sb2iVb2+HRJ7tp/Ya2NnSkrQ7ITdaNfkucqUv7CCrZRzvh9+y+6/Ibvjp+ZX2iDLTaO/HdN/QyInhwaRfuGaDtIt4h+aTH5L7D+UWK8asDDd3J2Ip0l3yt/4VL5ZrX+kWx5ubGFO3huxf6IUhiz1B+0brwtEEhM+014u+kU86j5crwXQ/3678wphM3vsCwG2nfnTr3xvypbyZBSP/HWoNg4robe3Lv9aYlCSu9yttgX62ncYXbgIX/FNXQrllWZj/SAmcNq9e+ARTzfS5yFIOnq4Ws2Eg3tbti19/aUBGgsR1QthlZgRG5/9HJU+S57monS7t5h+QN997HTQf9iRrx+eiGKtezWpXj8f5ynnNN1Y9l+VtHQ9KEBIsN7T8Pgyy6DG3sa9PkAvPTZF+NPVHd3ahjW0UzMe/aBTq1VYcWceQbftxkek/8nBtv0/q4g1/g==
 sidebar_class_name: "get api-method"
 info_path: gen/api/agent-management-api
 custom_edit_url: null
@@ -594,6 +594,196 @@ Get detailed information about a specific agent by hostname.
                             </SchemaItem>
                           </div>
                         </details>
+                      </SchemaItem><SchemaItem
+                        collapsible={false}
+                        name={"state"}
+                        required={false}
+                        schemaName={"string"}
+                        qualifierMessage={"**Possible values:** [`Ready`, `Draining`, `Cordoned`]"}
+                        schema={{"type":"string","enum":["Ready","Draining","Cordoned"],"description":"Agent scheduling state."}}
+                      >
+                        
+                      </SchemaItem><SchemaItem
+                        collapsible={true}
+                        className={"schemaItem"}
+                      >
+                        <details
+                          style={{}}
+                          className={"openapi-markdown__details"}
+                        >
+                          <summary
+                            style={{}}
+                          >
+                            <span
+                              className={"openapi-schema__container"}
+                            >
+                              <strong
+                                className={"openapi-schema__property"}
+                              >
+                                conditions
+                              </strong><span
+                                className={"openapi-schema__name"}
+                              >
+                                 object[]
+                              </span>
+                            </span>
+                          </summary><div
+                            style={{"marginLeft":"1rem"}}
+                          >
+                            <div
+                              style={{"marginTop":".5rem","marginBottom":".5rem"}}
+                            >
+                              
+                              
+                              Evaluated node conditions.
+                              
+                              
+                            </div><li>
+                              <div
+                                style={{"fontSize":"var(--ifm-code-font-size)","opacity":"0.6","marginLeft":"-.5rem","paddingBottom":".5rem"}}
+                              >
+                                Array [
+                              </div>
+                            </li><SchemaItem
+                              collapsible={false}
+                              name={"type"}
+                              required={true}
+                              schemaName={"string"}
+                              qualifierMessage={"**Possible values:** [`MemoryPressure`, `HighLoad`, `DiskPressure`]"}
+                              schema={{"type":"string","enum":["MemoryPressure","HighLoad","DiskPressure"]}}
+                            >
+                              
+                            </SchemaItem><SchemaItem
+                              collapsible={false}
+                              name={"status"}
+                              required={true}
+                              schemaName={"boolean"}
+                              qualifierMessage={undefined}
+                              schema={{"type":"boolean"}}
+                            >
+                              
+                            </SchemaItem><SchemaItem
+                              collapsible={false}
+                              name={"reason"}
+                              required={false}
+                              schemaName={"string"}
+                              qualifierMessage={undefined}
+                              schema={{"type":"string"}}
+                            >
+                              
+                            </SchemaItem><SchemaItem
+                              collapsible={false}
+                              name={"last_transition_time"}
+                              required={true}
+                              schemaName={"date-time"}
+                              qualifierMessage={undefined}
+                              schema={{"type":"string","format":"date-time"}}
+                            >
+                              
+                            </SchemaItem><li>
+                              <div
+                                style={{"fontSize":"var(--ifm-code-font-size)","opacity":"0.6","marginLeft":"-.5rem"}}
+                              >
+                                ]
+                              </div>
+                            </li>
+                          </div>
+                        </details>
+                      </SchemaItem><SchemaItem
+                        collapsible={true}
+                        className={"schemaItem"}
+                      >
+                        <details
+                          style={{}}
+                          className={"openapi-markdown__details"}
+                        >
+                          <summary
+                            style={{}}
+                          >
+                            <span
+                              className={"openapi-schema__container"}
+                            >
+                              <strong
+                                className={"openapi-schema__property"}
+                              >
+                                timeline
+                              </strong><span
+                                className={"openapi-schema__name"}
+                              >
+                                 object[]
+                              </span>
+                            </span>
+                          </summary><div
+                            style={{"marginLeft":"1rem"}}
+                          >
+                            <div
+                              style={{"marginTop":".5rem","marginBottom":".5rem"}}
+                            >
+                              
+                              
+                              Agent state transition history.
+                              
+                              
+                            </div><li>
+                              <div
+                                style={{"fontSize":"var(--ifm-code-font-size)","opacity":"0.6","marginLeft":"-.5rem","paddingBottom":".5rem"}}
+                              >
+                                Array [
+                              </div>
+                            </li><SchemaItem
+                              collapsible={false}
+                              name={"timestamp"}
+                              required={true}
+                              schemaName={"date-time"}
+                              qualifierMessage={undefined}
+                              schema={{"type":"string","format":"date-time"}}
+                            >
+                              
+                            </SchemaItem><SchemaItem
+                              collapsible={false}
+                              name={"event"}
+                              required={true}
+                              schemaName={"string"}
+                              qualifierMessage={undefined}
+                              schema={{"type":"string"}}
+                            >
+                              
+                            </SchemaItem><SchemaItem
+                              collapsible={false}
+                              name={"hostname"}
+                              required={false}
+                              schemaName={"string"}
+                              qualifierMessage={undefined}
+                              schema={{"type":"string"}}
+                            >
+                              
+                            </SchemaItem><SchemaItem
+                              collapsible={false}
+                              name={"message"}
+                              required={false}
+                              schemaName={"string"}
+                              qualifierMessage={undefined}
+                              schema={{"type":"string"}}
+                            >
+                              
+                            </SchemaItem><SchemaItem
+                              collapsible={false}
+                              name={"error"}
+                              required={false}
+                              schemaName={"string"}
+                              qualifierMessage={undefined}
+                              schema={{"type":"string"}}
+                            >
+                              
+                            </SchemaItem><li>
+                              <div
+                                style={{"fontSize":"var(--ifm-code-font-size)","opacity":"0.6","marginLeft":"-.5rem"}}
+                              >
+                                ]
+                              </div>
+                            </li>
+                          </div>
+                        </details>
                       </SchemaItem>
                     </ul>
                   </details>
@@ -602,7 +792,7 @@ Get detailed information about a specific agent by hostname.
                   value={"Example (from schema)"}
                 >
                   <ResponseSamples
-                    responseExample={"{\n  \"hostname\": \"string\",\n  \"status\": \"Ready\",\n  \"labels\": {},\n  \"registered_at\": \"2024-07-29T15:51:28.071Z\",\n  \"started_at\": \"2024-07-29T15:51:28.071Z\",\n  \"os_info\": {\n    \"distribution\": \"Ubuntu\",\n    \"version\": \"20.04\"\n  },\n  \"uptime\": \"string\",\n  \"load_average\": {\n    \"1min\": 0.32,\n    \"5min\": 0.28,\n    \"15min\": 0.25\n  },\n  \"memory\": {\n    \"total\": 8388608,\n    \"free\": 2097152,\n    \"used\": 4194304\n  },\n  \"architecture\": \"amd64\",\n  \"kernel_version\": \"5.15.0-91-generic\",\n  \"cpu_count\": 4,\n  \"fqdn\": \"web-01.example.com\",\n  \"service_mgr\": \"systemd\",\n  \"package_mgr\": \"apt\",\n  \"interfaces\": [\n    {\n      \"name\": \"eth0\",\n      \"ipv4\": \"192.168.1.10\",\n      \"ipv6\": \"fe80::1\",\n      \"mac\": \"00:11:22:33:44:55\",\n      \"family\": \"dual\"\n    }\n  ],\n  \"facts\": {}\n}"}
+                    responseExample={"{\n  \"hostname\": \"string\",\n  \"status\": \"Ready\",\n  \"labels\": {},\n  \"registered_at\": \"2024-07-29T15:51:28.071Z\",\n  \"started_at\": \"2024-07-29T15:51:28.071Z\",\n  \"os_info\": {\n    \"distribution\": \"Ubuntu\",\n    \"version\": \"20.04\"\n  },\n  \"uptime\": \"string\",\n  \"load_average\": {\n    \"1min\": 0.32,\n    \"5min\": 0.28,\n    \"15min\": 0.25\n  },\n  \"memory\": {\n    \"total\": 8388608,\n    \"free\": 2097152,\n    \"used\": 4194304\n  },\n  \"architecture\": \"amd64\",\n  \"kernel_version\": \"5.15.0-91-generic\",\n  \"cpu_count\": 4,\n  \"fqdn\": \"web-01.example.com\",\n  \"service_mgr\": \"systemd\",\n  \"package_mgr\": \"apt\",\n  \"interfaces\": [\n    {\n      \"name\": \"eth0\",\n      \"ipv4\": \"192.168.1.10\",\n      \"ipv6\": \"fe80::1\",\n      \"mac\": \"00:11:22:33:44:55\",\n      \"family\": \"dual\"\n    }\n  ],\n  \"facts\": {},\n  \"state\": \"Ready\",\n  \"conditions\": [\n    {\n      \"type\": \"MemoryPressure\",\n      \"status\": true,\n      \"reason\": \"string\",\n      \"last_transition_time\": \"2024-07-29T15:51:28.071Z\"\n    }\n  ],\n  \"timeline\": [\n    {\n      \"timestamp\": \"2024-07-29T15:51:28.071Z\",\n      \"event\": \"string\",\n      \"hostname\": \"string\",\n      \"message\": \"string\",\n      \"error\": \"string\"\n    }\n  ]\n}"}
                     language={"json"}
                   >
                     
diff --git a/docs/docs/gen/api/list-active-agents.api.mdx b/docs/docs/gen/api/list-active-agents.api.mdx
index e5e56118..956db4a4 100644
--- a/docs/docs/gen/api/list-active-agents.api.mdx
+++ b/docs/docs/gen/api/list-active-agents.api.mdx
@@ -5,7 +5,7 @@ description: "Discover all active agents in the fleet."
 sidebar_label: "List active agents"
 hide_title: true
 hide_table_of_contents: true
-api: eJztWEtv2zgQ/isEz44qOXY20S3bbRbZ7SNoEvRQGAYtjmw2EqmQIzdeQ/99MZRsS7KbuGgX2ENPlqWZb57kPNZcgkusKlAZzWP+h3KJWYJlIsuYSFAtgYk5aHRMaYYLYGkGgAEfcBRzx+PP/JI+T98JLeaQ0+PlzfXU80xNAVYQsuOTAXeQlFbhisef1/x3EBbsZYkLwvDksQUh+aSaDLgFVxjtwPF4zYdhSD9dRd8qh8ykXR1JrcRoBI3EIYoiU4lX4NUXR2xr7pIF5IKecFUAj7mZfYEE+YAXltRFVQutAVt0wlqx4gOuEHL3Mv/CONQihxalQ6v0nA96ltwtgG2oySJyspce8GrAHQos3SEU0GVOvvsIQpJi7w3Wj5NDEpLSWtDIarx9OZmYQXbQLiGlIiCR3XQs7OpT9YX+DauTpchKYDU0S4xO1by0IJnRPekW5sohWJBTgYeMTY3N6QuXAuEEVQ57fvy0gBYsy4RDZiG14BYgmULHFiAszkBsPWvxJwosrEnAOdbgehnGTZVOzSG3drE+1CdFz5lbOYScERtpoIwO9pJLKtJzVtbMxyRYO7neKl0+sTYGSYAnkRcZwdzPSo0lqb8E644W0RAfK2UYBuGIVz74j6WyICmZO5btFJgMOCr0fB9ur3VqPjYXBGlZFj48RynZeLdmqRPfCDkVS7BiDi/HqYVBjKxhdCw1lkUDNh4woSWLxixXukRw+8GLctX2qC7zGdg9QW9b6B6cfOpzOmqgO+4Mg9NhNeDjH8TuqN0CH55XAx79KHr0TfhxPw+8kxp7NpJbSUAyLmsR7UzIITd29XIU33k6VjpS8dmThgZF1gJUGmF+wOg7omO1fKqUs1XfyvPT8/OzkPyYWoAjIK8swLOIw/Dit2hMYS8dee1FxHsH8lnEUXQxOg1H/WDUTmgUb6S1glF7sx0HYZOFQkiwtEecy9c396zN0b0nRC7PRgT6AFZDNj36Tvpwy2qWzc3UxR0H0TgITy6ikzlosCohGUlRThNTajzCm+99+tN1l5m5SkTGXt/c9/xJ0X6URyh7VWbZij2WIlOpAsmkyYXS/truqv0VZidhFDQvgsTkvpaBXaoEpvncvizrWitsLrEudv1OEmAhkgcxPxLwpiZmue8BbS9+BRIgedGmIoEf6Km+1U/thAEuQi+tWI6ep4wuhkF0dh5EQbThOHueI4XzMI4jf8+I5HnaMIyjKB4O49PTeDSKx2PiSkWustUR8blhQkpLvUTN0nWoLP1h3LR/SgN5in7OCIq+Tvon2HuudWTfA3419uF6E5Td4fV6JvgdnSDaEvomvHlC0BIk81gstSZnO25qlZZKgnXBXgOw7Zu3rW9LbT9qUPn3bN93N+vtce2NDH0Nms5/g9+uOsqhV8G1/EXsozDan1DutShxYaz6ByQ7YZc31+wBVmwr6qeNKmCtOeKUXrLW/02X5nkZLgQyk/gZQXaz7UqoDCRDwyygVbCEZoQI6pYfhcoOjic94bvgNzxMzEyJOyUOipUlkGhdpyujps2UdfueGHlMFb3bGkkMHSHjMPRp1ET3DVHtBfZ0P7BXxs6UlKDZCbvWrkxTlSg/AYDNlXN+3v0V3f9/dMeHFguekOYWvwehieyn7xZ+hfQ/CqkfAXBhJI/53JfFQtCCib/yMeR1owSW9lathdQtxa0OTXsttVV1gVgQryfjMZ95Ij5oHq42q4K/Pt35arId+dtli+02ZFQLWsNtzKMgDHwXUhiHudC7Zqfec3UqVt9j611aftf6rrYN4QlfFZlQ2o/S1hfU2mtNKaQKSFWZXqzXM+Hg3mZVRa8fS6Bxi3y5FFaJGZn7eVIN+AKEBOs3fQ+wIh8kCRQUAb8T8n1h7wDR3m8bvT/f3FHD0Y1Dz+8efdNJ6lULe72uKe7MA+iq4oNGCaT/vJpUVfUvDGwzlA==
+api: eJztWE1v2zgQ/SsEz4orOXY28S2bJrvZ7YfRJuihCAxaHFlsJFIhR268hv/7YijZlmQncZEusIeeLIucN8P3RsMhl1yCi60qUBnNR/ytcrGZg2Uiy5iIUc2BiRlodExphimwJAPAHg84ipnjo6/8nIYn74UWM8jp8Xx8PfE2E1OAFYTs+F3AHcSlVbjgo69L/jsIC/a8xJQw/PSRBSH53eou4BZcYbQDx0dL3g9D+mkH+k45ZCZpx0hhxUYjaCQLURSZin0Ab745MltyF6eQC3rCRQF8xM30G8TIA15YChdV5bQCbMwT1ooFD7hCyN3L9qlxqEUOjZkOrdIzHnRWcpMCW8+mFRHJ3nuPrwLuUGDp9qGALnPi7hMISYF9MFg93u3zEJfWgkZW4e36ycQUsr3rElIqAhLZuLXCdjyrrtO/YXE0F1kJrIJmsdGJmpUWJDO6493CTDkEC3IicN9iE2NzGuFSIByhymGHxy8pNGBZJhwyC4kFl4JkCh1LQVicgtgwa/EnOiysicE5VuN6H8ZNlE7MPlrbWB+rL0XPmFs4hJyRGUWgjO7tJJdUFOe0rIwPSbBmcr1TunxkTQzyAI8iLzKCuZ2WGksKfw7WHeyinnyol37YCwd85cV/KJUFScncWtk2gLuAo0Jv9/HztU7Mp7pAUJRl4eU5KMia3cqkSnwj5ETMwYoZvKxTA4MMWW3oWGIsiwI2DJjQkkVDlitdIrhd8aJcNRnVZT4Fu+PoXQPdgxOnPqejGrpFZ9g77q8CPnwldivsBnj/dBXw6LXo0ZPww24eeJLq9aw9N5KAfJxXLpqZkENu7OJlFd/7eax0FOKzXxoaFFkDUGmE2Z5F39A8VvmnnXK66K7y9Pj09CQkHhMLcADklQV4FrEfnv0WDUn20hFrLyLeOpDPIg6is8FxOOiKUZFQB157a4hRsdnUQdg4VQgxlvaA7/JifMuaFu06IXJ5MiDQe7AassnBNenjZ1aZrCtTG3fYi4a98OgsOpqBBqti8hEX5SQ2pcYD2Pzg05/KXWZmKhYZuxjfdvgktR/kAcFelVm2YA+lyFSiQDJpcqG0L9vtsL/D9CiMevWLXmxyv5eBnasYJvnMvuzrWiusi1gbu3onCbAQ8b2YHQg4riaz3PeAtqNfgQRILNpExPCKnuqpfmrrDDANvbdiPnh+ZnTW70Unp72oF60tTp63SOA0HI0iX2dE/PzcMBxF0ajfHx0fjwaD0XBIVonIVbY4QJ8xE1Ja6iUqkzahsvQf47r9UxqIKfo5ISgavet+wZ65xif7AfC7sffXa1G2H6+PM8Yf6ATRltBdwuUjgpYgmcdiiTU521pTqzRXEqzbtLj7de10uG+tULoaujBWGl3VobZrfxxh1ObLMvMNFcF7R7HRVQivyMFq9OlYq2I4JvWo+AX8TzVLabui+JW734zc7Wvup8ZkIHTVEgu3r8z5Xt3hBK3Qzq9m8lT7s6eH3SntZLIJ5AnoZuIYCRdrGneb/kvq+AWCZNpIYFvCPf8ElikNr2Bf5eBQ5MWhyw04zEHv6fBXwTNHNN9KONfuCLdjYK3ZUxZ3uN0Eu46iQeRNzcWlH9ghss5iSl22lYOlyqGxi96Or81aNlo2fHkw6pq92Y+1NHqzy3VO2t0I6gPzGr/ZrCmHPgTXKDNkPgij3YP9rRYlpsaqf0CyI3Y+vmb3sGAbVz/thP+EijtKsMb/9eHG2zJMBTIT+6O1bBfpK6EykAwNs4BWwRzqk3ev0hqFyvae6jvOtzWztmFiakrcBrHXrSyBXOuqyjNKRFNiXQPlIc3nzWaRZNByMgxDn0a1upc0a0fY411hr4ydKilBsyN2rV2ZJCpW/uAMNlfO+br8S93/v7rDffdxfiId9/31Ie27P/1K7pek/5GkfrvD1Eg+4jPfTRaC7mX5G68hr84XYOm6t3GP+5l0q6Rp3uZuQk0Raevz8vrmxk/iQf1wtd6u//py43eTzU1Zc9ti24tl2gsad0IjHvXCnm/eC+MwF3p7Rqiuh1s7Vpex5TYtf+jWu1obwiO+KTKhfBNUWr+hVqzVWyHtgLQr04vlcioc3NpstaLXDyXQLQVxORdWiSkt9ys1hCkICdZfkN/DgjiIYyhIAX+V6o9TnQ+Irss36v1xeUN9eluHDu8efd1+6UUDe7msZtyYe9CrFQ/qIJD+89XdarX6F9otUpM=
 sidebar_class_name: "get api-method"
 info_path: gen/api/agent-management-api
 custom_edit_url: null
@@ -603,6 +603,196 @@ Discover all active agents in the fleet.
                                   </SchemaItem>
                                 </div>
                               </details>
+                            </SchemaItem><SchemaItem
+                              collapsible={false}
+                              name={"state"}
+                              required={false}
+                              schemaName={"string"}
+                              qualifierMessage={"**Possible values:** [`Ready`, `Draining`, `Cordoned`]"}
+                              schema={{"type":"string","enum":["Ready","Draining","Cordoned"],"description":"Agent scheduling state."}}
+                            >
+                              
+                            </SchemaItem><SchemaItem
+                              collapsible={true}
+                              className={"schemaItem"}
+                            >
+                              <details
+                                style={{}}
+                                className={"openapi-markdown__details"}
+                              >
+                                <summary
+                                  style={{}}
+                                >
+                                  <span
+                                    className={"openapi-schema__container"}
+                                  >
+                                    <strong
+                                      className={"openapi-schema__property"}
+                                    >
+                                      conditions
+                                    </strong><span
+                                      className={"openapi-schema__name"}
+                                    >
+                                       object[]
+                                    </span>
+                                  </span>
+                                </summary><div
+                                  style={{"marginLeft":"1rem"}}
+                                >
+                                  <div
+                                    style={{"marginTop":".5rem","marginBottom":".5rem"}}
+                                  >
+                                    
+                                    
+                                    Evaluated node conditions.
+                                    
+                                    
+                                  </div><li>
+                                    <div
+                                      style={{"fontSize":"var(--ifm-code-font-size)","opacity":"0.6","marginLeft":"-.5rem","paddingBottom":".5rem"}}
+                                    >
+                                      Array [
+                                    </div>
+                                  </li><SchemaItem
+                                    collapsible={false}
+                                    name={"type"}
+                                    required={true}
+                                    schemaName={"string"}
+                                    qualifierMessage={"**Possible values:** [`MemoryPressure`, `HighLoad`, `DiskPressure`]"}
+                                    schema={{"type":"string","enum":["MemoryPressure","HighLoad","DiskPressure"]}}
+                                  >
+                                    
+                                  </SchemaItem><SchemaItem
+                                    collapsible={false}
+                                    name={"status"}
+                                    required={true}
+                                    schemaName={"boolean"}
+                                    qualifierMessage={undefined}
+                                    schema={{"type":"boolean"}}
+                                  >
+                                    
+                                  </SchemaItem><SchemaItem
+                                    collapsible={false}
+                                    name={"reason"}
+                                    required={false}
+                                    schemaName={"string"}
+                                    qualifierMessage={undefined}
+                                    schema={{"type":"string"}}
+                                  >
+                                    
+                                  </SchemaItem><SchemaItem
+                                    collapsible={false}
+                                    name={"last_transition_time"}
+                                    required={true}
+                                    schemaName={"date-time"}
+                                    qualifierMessage={undefined}
+                                    schema={{"type":"string","format":"date-time"}}
+                                  >
+                                    
+                                  </SchemaItem><li>
+                                    <div
+                                      style={{"fontSize":"var(--ifm-code-font-size)","opacity":"0.6","marginLeft":"-.5rem"}}
+                                    >
+                                      ]
+                                    </div>
+                                  </li>
+                                </div>
+                              </details>
+                            </SchemaItem><SchemaItem
+                              collapsible={true}
+                              className={"schemaItem"}
+                            >
+                              <details
+                                style={{}}
+                                className={"openapi-markdown__details"}
+                              >
+                                <summary
+                                  style={{}}
+                                >
+                                  <span
+                                    className={"openapi-schema__container"}
+                                  >
+                                    <strong
+                                      className={"openapi-schema__property"}
+                                    >
+                                      timeline
+                                    </strong><span
+                                      className={"openapi-schema__name"}
+                                    >
+                                       object[]
+                                    </span>
+                                  </span>
+                                </summary><div
+                                  style={{"marginLeft":"1rem"}}
+                                >
+                                  <div
+                                    style={{"marginTop":".5rem","marginBottom":".5rem"}}
+                                  >
+                                    
+                                    
+                                    Agent state transition history.
+                                    
+                                    
+                                  </div><li>
+                                    <div
+                                      style={{"fontSize":"var(--ifm-code-font-size)","opacity":"0.6","marginLeft":"-.5rem","paddingBottom":".5rem"}}
+                                    >
+                                      Array [
+                                    </div>
+                                  </li><SchemaItem
+                                    collapsible={false}
+                                    name={"timestamp"}
+                                    required={true}
+                                    schemaName={"date-time"}
+                                    qualifierMessage={undefined}
+                                    schema={{"type":"string","format":"date-time"}}
+                                  >
+                                    
+                                  </SchemaItem><SchemaItem
+                                    collapsible={false}
+                                    name={"event"}
+                                    required={true}
+                                    schemaName={"string"}
+                                    qualifierMessage={undefined}
+                                    schema={{"type":"string"}}
+                                  >
+                                    
+                                  </SchemaItem><SchemaItem
+                                    collapsible={false}
+                                    name={"hostname"}
+                                    required={false}
+                                    schemaName={"string"}
+                                    qualifierMessage={undefined}
+                                    schema={{"type":"string"}}
+                                  >
+                                    
+                                  </SchemaItem><SchemaItem
+                                    collapsible={false}
+                                    name={"message"}
+                                    required={false}
+                                    schemaName={"string"}
+                                    qualifierMessage={undefined}
+                                    schema={{"type":"string"}}
+                                  >
+                                    
+                                  </SchemaItem><SchemaItem
+                                    collapsible={false}
+                                    name={"error"}
+                                    required={false}
+                                    schemaName={"string"}
+                                    qualifierMessage={undefined}
+                                    schema={{"type":"string"}}
+                                  >
+                                    
+                                  </SchemaItem><li>
+                                    <div
+                                      style={{"fontSize":"var(--ifm-code-font-size)","opacity":"0.6","marginLeft":"-.5rem"}}
+                                    >
+                                      ]
+                                    </div>
+                                  </li>
+                                </div>
+                              </details>
                             </SchemaItem><li>
                               <div
                                 style={{"fontSize":"var(--ifm-code-font-size)","opacity":"0.6","marginLeft":"-.5rem"}}
@@ -629,7 +819,7 @@ Discover all active agents in the fleet.
                   value={"Example (from schema)"}
                 >
                   <ResponseSamples
-                    responseExample={"{\n  \"agents\": [\n    {\n      \"hostname\": \"string\",\n      \"status\": \"Ready\",\n      \"labels\": {},\n      \"registered_at\": \"2024-07-29T15:51:28.071Z\",\n      \"started_at\": \"2024-07-29T15:51:28.071Z\",\n      \"os_info\": {\n        \"distribution\": \"Ubuntu\",\n        \"version\": \"20.04\"\n      },\n      \"uptime\": \"string\",\n      \"load_average\": {\n        \"1min\": 0.32,\n        \"5min\": 0.28,\n        \"15min\": 0.25\n      },\n      \"memory\": {\n        \"total\": 8388608,\n        \"free\": 2097152,\n        \"used\": 4194304\n      },\n      \"architecture\": \"amd64\",\n      \"kernel_version\": \"5.15.0-91-generic\",\n      \"cpu_count\": 4,\n      \"fqdn\": \"web-01.example.com\",\n      \"service_mgr\": \"systemd\",\n      \"package_mgr\": \"apt\",\n      \"interfaces\": [\n        {\n          \"name\": \"eth0\",\n          \"ipv4\": \"192.168.1.10\",\n          \"ipv6\": \"fe80::1\",\n          \"mac\": \"00:11:22:33:44:55\",\n          \"family\": \"dual\"\n        }\n      ],\n      \"facts\": {}\n    }\n  ],\n  \"total\": 0\n}"}
+                    responseExample={"{\n  \"agents\": [\n    {\n      \"hostname\": \"string\",\n      \"status\": \"Ready\",\n      \"labels\": {},\n      \"registered_at\": \"2024-07-29T15:51:28.071Z\",\n      \"started_at\": \"2024-07-29T15:51:28.071Z\",\n      \"os_info\": {\n        \"distribution\": \"Ubuntu\",\n        \"version\": \"20.04\"\n      },\n      \"uptime\": \"string\",\n      \"load_average\": {\n        \"1min\": 0.32,\n        \"5min\": 0.28,\n        \"15min\": 0.25\n      },\n      \"memory\": {\n        \"total\": 8388608,\n        \"free\": 2097152,\n        \"used\": 4194304\n      },\n      \"architecture\": \"amd64\",\n      \"kernel_version\": \"5.15.0-91-generic\",\n      \"cpu_count\": 4,\n      \"fqdn\": \"web-01.example.com\",\n      \"service_mgr\": \"systemd\",\n      \"package_mgr\": \"apt\",\n      \"interfaces\": [\n        {\n          \"name\": \"eth0\",\n          \"ipv4\": \"192.168.1.10\",\n          \"ipv6\": \"fe80::1\",\n          \"mac\": \"00:11:22:33:44:55\",\n          \"family\": \"dual\"\n        }\n      ],\n      \"facts\": {},\n      \"state\": \"Ready\",\n      \"conditions\": [\n        {\n          \"type\": \"MemoryPressure\",\n          \"status\": true,\n          \"reason\": \"string\",\n          \"last_transition_time\": \"2024-07-29T15:51:28.071Z\"\n        }\n      ],\n      \"timeline\": [\n        {\n          \"timestamp\": \"2024-07-29T15:51:28.071Z\",\n          \"event\": \"string\",\n          \"hostname\": \"string\",\n          \"message\": \"string\",\n          \"error\": \"string\"\n        }\n      ]\n    }\n  ],\n  \"total\": 0\n}"}
                     language={"json"}
                   >
                     
diff --git a/docs/docs/gen/api/sidebar.ts b/docs/docs/gen/api/sidebar.ts
index dd455ba8..9309eedc 100644
--- a/docs/docs/gen/api/sidebar.ts
+++ b/docs/docs/gen/api/sidebar.ts
@@ -26,6 +26,18 @@ const sidebar: SidebarsConfig = {
           label: "Get agent details",
           className: "api-method get",
         },
+        {
+          type: "doc",
+          id: "gen/api/drain-agent",
+          label: "Drain an agent",
+          className: "api-method post",
+        },
+        {
+          type: "doc",
+          id: "gen/api/undrain-agent",
+          label: "Undrain an agent",
+          className: "api-method post",
+        },
       ],
     },
     {
diff --git a/docs/docs/gen/api/undrain-agent.api.mdx b/docs/docs/gen/api/undrain-agent.api.mdx
new file mode 100644
index 00000000..2ba26015
--- /dev/null
+++ b/docs/docs/gen/api/undrain-agent.api.mdx
@@ -0,0 +1,524 @@
+---
+id: undrain-agent
+title: "Undrain an agent"
+description: "Resume accepting jobs on a drained agent."
+sidebar_label: "Undrain an agent"
+hide_title: true
+hide_table_of_contents: true
+api: eJztVl1r20oQ/SvLPN2CartfD1dvvtBACqUhdbgPwYSxdmxtIu2qs6O0rtB/v8xKdpw4Lb3QQgt58q52Ps8Z754OQkOM4oI/tZBD6y2j8/MNeYEMLMWCXaPHkMM5xbYmg0VBjTi/MddhFU3wBk3yImtQHSeQgeAmQn4JKdLVe/S4oVqX87PTq2R1tc8cYZlBpKJlJ1vILzv4h5CJ562UGiOZ55/ZCcGyX2bQIGNNQhyTtceaIIcyREnLDJyW26CUkAHTp9YxWciFW8ogFiXVCHkHsm3ULwo7v4H+YbuLkswupglrIyUN/RkJZgRqAloPU2yCjxQ16svZTH/ux0ow7JyM804cCllFqgheFO28A2yayhUJk+l1VMfuuNywuqZCyWlYERQ3pK0pRtzQI331hxhc7g2Xfa9Hr2cvjsu98NhKGdh9JWuem/nZqbmhrdmH+WlVE3Pg45ofUjE3B/sdF8nXSIliQlG0zAOe9AXrptJgJ+gqssoWk7CjWzJRUNo4GcgWdFX8geTWOl1iZUYfg6vQyl0Rj6a1LWlqT/I58I0RV1NoJaUugj0kynmhDfFR4sW+SXW4l+TNbKbkiZOU8q1anY9TCDtiXx0TexJ45awlb56bUx/b9doVTkezIa5djOnf+MTun8Du62/dMj6IWYfW/8zb5YnJX8jk399j0vnhcdX3NmVgG/SlVYToieHfn+E+g5qkDCqvmhAT8qpMcpgmNTHtdiKjn44CAVQO8e2gbw600UflcqDrUCHtyy9FGhgVju5XyQiycXESuEaBHN79u0iywPl1SO5j8cPU3Yk1ffghAy1kwOHFZDaZKW7aSI1pwEb1dTFqG/SDSHqIYXc3qP9PSA6tCX2RaVMpOH0GLVcaccBxFIiQQX4gAXdQLrMkDNWs61YY6YKrvtfPn1ri7QDwLbLDlWJw2YF1UdcW8jVWkb7Tx1/noyB6Zn5MLH6jnfEj+q2ijVWrO8jghraHurZf9hmUhJY4FToczxOKB45Ht4BK1P0Inn34uIAM8P7kPJiUFP7RqrpusFiEG/J9vy9SdK8V9v1/nZdG4w==
+sidebar_class_name: "post api-method"
+info_path: gen/api/agent-management-api
+custom_edit_url: null
+---
+
+import ApiTabs from "@theme/ApiTabs";
+import DiscriminatorTabs from "@theme/DiscriminatorTabs";
+import MethodEndpoint from "@theme/ApiExplorer/MethodEndpoint";
+import SecuritySchemes from "@theme/ApiExplorer/SecuritySchemes";
+import MimeTabs from "@theme/MimeTabs";
+import ParamsItem from "@theme/ParamsItem";
+import ResponseSamples from "@theme/ResponseSamples";
+import SchemaItem from "@theme/SchemaItem";
+import SchemaTabs from "@theme/SchemaTabs";
+import Heading from "@theme/Heading";
+import OperationTabs from "@theme/OperationTabs";
+import TabItem from "@theme/TabItem";
+
+<Heading
+  as={"h1"}
+  className={"openapi__heading"}
+  children={"Undrain an agent"}
+>
+</Heading>
+
+<MethodEndpoint
+  method={"post"}
+  path={"/agent/{hostname}/undrain"}
+>
+  
+</MethodEndpoint>
+
+
+
+Resume accepting jobs on a drained agent.
+
+<Heading
+  id={"request"}
+  as={"h2"}
+  className={"openapi-tabs__heading"}
+  children={"Request"}
+>
+</Heading>
+
+<details
+  style={{"marginBottom":"1rem"}}
+  className={"openapi-markdown__details"}
+  data-collapsed={false}
+  open={true}
+>
+  <summary
+    style={{}}
+  >
+    <h3
+      className={"openapi-markdown__details-summary-header-params"}
+    >
+      Path Parameters
+    </h3>
+  </summary><div>
+    <ul>
+      <ParamsItem
+        className={"paramsItem"}
+        param={{"name":"hostname","in":"path","required":true,"schema":{"type":"string"},"description":"The hostname of the agent to undrain."}}
+      >
+        
+      </ParamsItem>
+    </ul>
+  </div>
+</details><div>
+  <div>
+    <ApiTabs
+      label={undefined}
+      id={undefined}
+    >
+      <TabItem
+        label={"200"}
+        value={"200"}
+      >
+        <div>
+          
+          
+          Agent undrain initiated.
+          
+          
+        </div><div>
+          <MimeTabs
+            className={"openapi-tabs__mime"}
+            schemaType={"response"}
+          >
+            <TabItem
+              label={"application/json"}
+              value={"application/json"}
+            >
+              <SchemaTabs
+                className={"openapi-tabs__schema"}
+              >
+                <TabItem
+                  label={"Schema"}
+                  value={"Schema"}
+                >
+                  <details
+                    style={{}}
+                    className={"openapi-markdown__details response"}
+                    data-collapsed={false}
+                    open={true}
+                  >
+                    <summary
+                      style={{}}
+                      className={"openapi-markdown__details-summary-response"}
+                    >
+                      <strong>
+                        Schema
+                      </strong>
+                    </summary><div
+                      style={{"textAlign":"left","marginLeft":"1rem"}}
+                    >
+                      
+                    </div><ul
+                      style={{"marginLeft":"1rem"}}
+                    >
+                      <SchemaItem
+                        collapsible={false}
+                        name={"message"}
+                        required={true}
+                        schemaName={"string"}
+                        qualifierMessage={undefined}
+                        schema={{"type":"string"}}
+                      >
+                        
+                      </SchemaItem>
+                    </ul>
+                  </details>
+                </TabItem><TabItem
+                  label={"Example (from schema)"}
+                  value={"Example (from schema)"}
+                >
+                  <ResponseSamples
+                    responseExample={"{\n  \"message\": \"string\"\n}"}
+                    language={"json"}
+                  >
+                    
+                  </ResponseSamples>
+                </TabItem>
+              </SchemaTabs>
+            </TabItem>
+          </MimeTabs>
+        </div>
+      </TabItem><TabItem
+        label={"401"}
+        value={"401"}
+      >
+        <div>
+          
+          
+          Unauthorized - API key required
+          
+          
+        </div><div>
+          <MimeTabs
+            className={"openapi-tabs__mime"}
+            schemaType={"response"}
+          >
+            <TabItem
+              label={"application/json"}
+              value={"application/json"}
+            >
+              <SchemaTabs
+                className={"openapi-tabs__schema"}
+              >
+                <TabItem
+                  label={"Schema"}
+                  value={"Schema"}
+                >
+                  <details
+                    style={{}}
+                    className={"openapi-markdown__details response"}
+                    data-collapsed={false}
+                    open={true}
+                  >
+                    <summary
+                      style={{}}
+                      className={"openapi-markdown__details-summary-response"}
+                    >
+                      <strong>
+                        Schema
+                      </strong>
+                    </summary><div
+                      style={{"textAlign":"left","marginLeft":"1rem"}}
+                    >
+                      
+                    </div><ul
+                      style={{"marginLeft":"1rem"}}
+                    >
+                      <SchemaItem
+                        collapsible={false}
+                        name={"error"}
+                        required={false}
+                        schemaName={"string"}
+                        qualifierMessage={undefined}
+                        schema={{"type":"string","description":"A description of the error that occurred.","example":"Failed to retrieve status."}}
+                      >
+                        
+                      </SchemaItem><SchemaItem
+                        collapsible={false}
+                        name={"details"}
+                        required={false}
+                        schemaName={"string"}
+                        qualifierMessage={undefined}
+                        schema={{"type":"string","description":"Additional details about the error.","example":"Failed due to network timeout."}}
+                      >
+                        
+                      </SchemaItem><SchemaItem
+                        collapsible={false}
+                        name={"code"}
+                        required={false}
+                        schemaName={"integer"}
+                        qualifierMessage={undefined}
+                        schema={{"type":"integer","description":"The error code.","example":500}}
+                      >
+                        
+                      </SchemaItem>
+                    </ul>
+                  </details>
+                </TabItem><TabItem
+                  label={"Example (from schema)"}
+                  value={"Example (from schema)"}
+                >
+                  <ResponseSamples
+                    responseExample={"{\n  \"error\": \"Failed to retrieve status.\",\n  \"details\": \"Failed due to network timeout.\",\n  \"code\": 500\n}"}
+                    language={"json"}
+                  >
+                    
+                  </ResponseSamples>
+                </TabItem>
+              </SchemaTabs>
+            </TabItem>
+          </MimeTabs>
+        </div>
+      </TabItem><TabItem
+        label={"403"}
+        value={"403"}
+      >
+        <div>
+          
+          
+          Forbidden - Insufficient permissions
+          
+          
+        </div><div>
+          <MimeTabs
+            className={"openapi-tabs__mime"}
+            schemaType={"response"}
+          >
+            <TabItem
+              label={"application/json"}
+              value={"application/json"}
+            >
+              <SchemaTabs
+                className={"openapi-tabs__schema"}
+              >
+                <TabItem
+                  label={"Schema"}
+                  value={"Schema"}
+                >
+                  <details
+                    style={{}}
+                    className={"openapi-markdown__details response"}
+                    data-collapsed={false}
+                    open={true}
+                  >
+                    <summary
+                      style={{}}
+                      className={"openapi-markdown__details-summary-response"}
+                    >
+                      <strong>
+                        Schema
+                      </strong>
+                    </summary><div
+                      style={{"textAlign":"left","marginLeft":"1rem"}}
+                    >
+                      
+                    </div><ul
+                      style={{"marginLeft":"1rem"}}
+                    >
+                      <SchemaItem
+                        collapsible={false}
+                        name={"error"}
+                        required={false}
+                        schemaName={"string"}
+                        qualifierMessage={undefined}
+                        schema={{"type":"string","description":"A description of the error that occurred.","example":"Failed to retrieve status."}}
+                      >
+                        
+                      </SchemaItem><SchemaItem
+                        collapsible={false}
+                        name={"details"}
+                        required={false}
+                        schemaName={"string"}
+                        qualifierMessage={undefined}
+                        schema={{"type":"string","description":"Additional details about the error.","example":"Failed due to network timeout."}}
+                      >
+                        
+                      </SchemaItem><SchemaItem
+                        collapsible={false}
+                        name={"code"}
+                        required={false}
+                        schemaName={"integer"}
+                        qualifierMessage={undefined}
+                        schema={{"type":"integer","description":"The error code.","example":500}}
+                      >
+                        
+                      </SchemaItem>
+                    </ul>
+                  </details>
+                </TabItem><TabItem
+                  label={"Example (from schema)"}
+                  value={"Example (from schema)"}
+                >
+                  <ResponseSamples
+                    responseExample={"{\n  \"error\": \"Failed to retrieve status.\",\n  \"details\": \"Failed due to network timeout.\",\n  \"code\": 500\n}"}
+                    language={"json"}
+                  >
+                    
+                  </ResponseSamples>
+                </TabItem>
+              </SchemaTabs>
+            </TabItem>
+          </MimeTabs>
+        </div>
+      </TabItem><TabItem
+        label={"404"}
+        value={"404"}
+      >
+        <div>
+          
+          
+          Agent not found.
+          
+          
+        </div><div>
+          <MimeTabs
+            className={"openapi-tabs__mime"}
+            schemaType={"response"}
+          >
+            <TabItem
+              label={"application/json"}
+              value={"application/json"}
+            >
+              <SchemaTabs
+                className={"openapi-tabs__schema"}
+              >
+                <TabItem
+                  label={"Schema"}
+                  value={"Schema"}
+                >
+                  <details
+                    style={{}}
+                    className={"openapi-markdown__details response"}
+                    data-collapsed={false}
+                    open={true}
+                  >
+                    <summary
+                      style={{}}
+                      className={"openapi-markdown__details-summary-response"}
+                    >
+                      <strong>
+                        Schema
+                      </strong>
+                    </summary><div
+                      style={{"textAlign":"left","marginLeft":"1rem"}}
+                    >
+                      
+                    </div><ul
+                      style={{"marginLeft":"1rem"}}
+                    >
+                      <SchemaItem
+                        collapsible={false}
+                        name={"error"}
+                        required={false}
+                        schemaName={"string"}
+                        qualifierMessage={undefined}
+                        schema={{"type":"string","description":"A description of the error that occurred.","example":"Failed to retrieve status."}}
+                      >
+                        
+                      </SchemaItem><SchemaItem
+                        collapsible={false}
+                        name={"details"}
+                        required={false}
+                        schemaName={"string"}
+                        qualifierMessage={undefined}
+                        schema={{"type":"string","description":"Additional details about the error.","example":"Failed due to network timeout."}}
+                      >
+                        
+                      </SchemaItem><SchemaItem
+                        collapsible={false}
+                        name={"code"}
+                        required={false}
+                        schemaName={"integer"}
+                        qualifierMessage={undefined}
+                        schema={{"type":"integer","description":"The error code.","example":500}}
+                      >
+                        
+                      </SchemaItem>
+                    </ul>
+                  </details>
+                </TabItem><TabItem
+                  label={"Example (from schema)"}
+                  value={"Example (from schema)"}
+                >
+                  <ResponseSamples
+                    responseExample={"{\n  \"error\": \"Failed to retrieve status.\",\n  \"details\": \"Failed due to network timeout.\",\n  \"code\": 500\n}"}
+                    language={"json"}
+                  >
+                    
+                  </ResponseSamples>
+                </TabItem>
+              </SchemaTabs>
+            </TabItem>
+          </MimeTabs>
+        </div>
+      </TabItem><TabItem
+        label={"409"}
+        value={"409"}
+      >
+        <div>
+          
+          
+          Agent not in draining or cordoned state.
+          
+          
+        </div><div>
+          <MimeTabs
+            className={"openapi-tabs__mime"}
+            schemaType={"response"}
+          >
+            <TabItem
+              label={"application/json"}
+              value={"application/json"}
+            >
+              <SchemaTabs
+                className={"openapi-tabs__schema"}
+              >
+                <TabItem
+                  label={"Schema"}
+                  value={"Schema"}
+                >
+                  <details
+                    style={{}}
+                    className={"openapi-markdown__details response"}
+                    data-collapsed={false}
+                    open={true}
+                  >
+                    <summary
+                      style={{}}
+                      className={"openapi-markdown__details-summary-response"}
+                    >
+                      <strong>
+                        Schema
+                      </strong>
+                    </summary><div
+                      style={{"textAlign":"left","marginLeft":"1rem"}}
+                    >
+                      
+                    </div><ul
+                      style={{"marginLeft":"1rem"}}
+                    >
+                      <SchemaItem
+                        collapsible={false}
+                        name={"error"}
+                        required={false}
+                        schemaName={"string"}
+                        qualifierMessage={undefined}
+                        schema={{"type":"string","description":"A description of the error that occurred.","example":"Failed to retrieve status."}}
+                      >
+                        
+                      </SchemaItem><SchemaItem
+                        collapsible={false}
+                        name={"details"}
+                        required={false}
+                        schemaName={"string"}
+                        qualifierMessage={undefined}
+                        schema={{"type":"string","description":"Additional details about the error.","example":"Failed due to network timeout."}}
+                      >
+                        
+                      </SchemaItem><SchemaItem
+                        collapsible={false}
+                        name={"code"}
+                        required={false}
+                        schemaName={"integer"}
+                        qualifierMessage={undefined}
+                        schema={{"type":"integer","description":"The error code.","example":500}}
+                      >
+                        
+                      </SchemaItem>
+                    </ul>
+                  </details>
+                </TabItem><TabItem
+                  label={"Example (from schema)"}
+                  value={"Example (from schema)"}
+                >
+                  <ResponseSamples
+                    responseExample={"{\n  \"error\": \"Failed to retrieve status.\",\n  \"details\": \"Failed due to network timeout.\",\n  \"code\": 500\n}"}
+                    language={"json"}
+                  >
+                    
+                  </ResponseSamples>
+                </TabItem>
+              </SchemaTabs>
+            </TabItem>
+          </MimeTabs>
+        </div>
+      </TabItem>
+    </ApiTabs>
+  </div>
+</div>
+      
\ No newline at end of file
diff --git a/docs/docs/sidebar/architecture/system-architecture.md b/docs/docs/sidebar/architecture/system-architecture.md
index d8e9cf6e..d41b05e3 100644
--- a/docs/docs/sidebar/architecture/system-architecture.md
+++ b/docs/docs/sidebar/architecture/system-architecture.md
@@ -13,14 +13,14 @@ that can either hit the REST API directly or manage the job queue.
 
 The system is organized into six layers, top to bottom:
 
-| Layer                      | Package                                 | Role                                                                |
-| -------------------------- | --------------------------------------- | ------------------------------------------------------------------- |
-| **CLI**                    | `cmd/`                                  | Cobra command tree (thin wiring)                                    |
-| **SDK Client**             | `osapi-sdk` (external)                  | OpenAPI-generated client used by CLI                                |
-| **REST API**               | `internal/api/`                         | Echo server with JWT middleware                                     |
-| **Job Client**             | `internal/job/client/`                  | Business logic for job CRUD and status                              |
-| **NATS JetStream**         | (external)                              | KV `job-queue`, Stream `JOBS`, KV `job-responses`, KV `agent-facts` |
-| **Agent / Provider Layer** | `internal/agent/`, `internal/provider/` | Consumes jobs, executes providers, publishes system facts           |
+| Layer                      | Package                                 | Role                                                                     |
+| -------------------------- | --------------------------------------- | ------------------------------------------------------------------------ |
+| **CLI**                    | `cmd/`                                  | Cobra command tree (thin wiring)                                         |
+| **SDK Client**             | `osapi-sdk` (external)                  | OpenAPI-generated client used by CLI                                     |
+| **REST API**               | `internal/api/`                         | Echo server with JWT middleware                                          |
+| **Job Client**             | `internal/job/client/`                  | Business logic for job CRUD and status                                   |
+| **NATS JetStream**         | (external)                              | KV `job-queue`, Stream `JOBS`, KV `job-responses`, KV `agent-facts`      |
+| **Agent / Provider Layer** | `internal/agent/`, `internal/provider/` | Consumes jobs, executes providers, evaluates conditions, drain lifecycle |
 
 ```mermaid
 graph TD
@@ -113,6 +113,23 @@ Providers are stateless and platform-specific (e.g., a Ubuntu DNS provider vs. a
 generic Linux DNS provider). Adding a new operation means implementing the
 provider interface and registering it in the agent's processor dispatch.
 
+### Agent Lifecycle (`internal/agent/`)
+
+Agents evaluate **node conditions** on each heartbeat tick (10s) and support
+**graceful drain** for maintenance. Conditions are threshold-based booleans
+(MemoryPressure, HighLoad, DiskPressure) computed from heartbeat metrics.
+
+The drain mechanism uses NATS consumer subscribe/unsubscribe. When an operator
+drains an agent, the API writes a `drain.{hostname}` key to the state KV bucket
+(`agent-state`, no TTL). The agent detects this on its next heartbeat,
+unsubscribes from all NATS JetStream consumers (stopping new job delivery), and
+transitions through `Draining` → `Cordoned` as in-flight jobs complete. Undrain
+deletes the key and the agent resubscribes.
+
+State transitions are recorded as append-only timeline events in the state KV
+bucket, following the same pattern used for job lifecycle events. See
+[Agent Lifecycle](../features/agent-lifecycle.md) for details.
+
 ### Configuration (`internal/config/`)
 
 Configuration is managed by [Viper][] and loaded from an `osapi.yaml` file.
diff --git a/docs/docs/sidebar/development/development.md b/docs/docs/sidebar/development/development.md
index 462e8d10..97fe0dbe 100644
--- a/docs/docs/sidebar/development/development.md
+++ b/docs/docs/sidebar/development/development.md
@@ -97,6 +97,12 @@ Unit tests should follow the Go convention of being located in a file named
 located in `test/integration/` and use a `//go:build integration` tag. They
 build and start a real `osapi` binary, so they require no external setup.
 
+Use `testify/suite` with table-driven patterns and `validateFunc` callbacks.
+**One suite method per function under test.** All scenarios for a function
+(success, error codes, transport failures, nil responses) belong as rows in a
+single table — never split into separate `TestFoo`, `TestFooError`,
+`TestFooNilResponse` methods.
+
 ### File naming
 
 Avoid generic file names like `helpers.go` or `utils.go`. Name files after what
diff --git a/docs/docs/sidebar/development/tasks/backlog/2026-02-26-kubernetes-systemd-patterns.md b/docs/docs/sidebar/development/tasks/backlog/2026-02-26-kubernetes-systemd-patterns.md
deleted file mode 100644
index 446aa048..00000000
--- a/docs/docs/sidebar/development/tasks/backlog/2026-02-26-kubernetes-systemd-patterns.md
+++ /dev/null
@@ -1,118 +0,0 @@
----
-title: Kubernetes and systemd inspired patterns
-status: backlog
-created: 2026-02-26
-updated: 2026-02-26
----
-
-## Objective
-
-Adopt proven patterns from Kubernetes and systemd to make OSAPI's node
-management feel more mature and operationally familiar. These are ideas to
-explore beyond the initial heartbeat enrichment and `node list`/`node get` work.
-
-## Ideas
-
-### Node Conditions (Kubernetes-inspired)
-
-Kubernetes nodes report conditions like `MemoryPressure`, `DiskPressure`,
-`PIDPressure`, and `NetworkUnavailable`. Since the heartbeat already collects
-memory and load data, we could derive conditions from thresholds:
-
-- Memory > 90% used -> `MemoryPressure: true`
-- Load 1m > 2x CPU count -> `HighLoad: true`
-- Disk > 90% used -> `DiskPressure: true` (would need disk in heartbeat or a
-  periodic deep scan)
-
-Conditions would be stored in the KV registration and shown in `node list` /
-`node get`. They give operators a quick "is anything wrong?" signal without
-digging into raw numbers.
-
-### Capacity and Allocatable (Kubernetes-inspired)
-
-Kubernetes tracks what resources a node has vs. what's available for scheduling.
-We could track:
-
-- `max_jobs` (configured) vs. `active_jobs` (current count)
-- Job slot utilization per agent visible in `node get`
-- Could inform smarter job routing (avoid overloaded agents)
-
-### Taints and Tolerations (Kubernetes-inspired)
-
-Kubernetes nodes can be "tainted" to repel workloads unless they explicitly
-tolerate the taint. We already have label-based routing, but taints would add:
-
-- Mark a node as `draining` or `maintenance` so new jobs avoid it
-- `NoSchedule` equivalent: agent stays registered but won't receive new jobs
-- `NoExecute` equivalent: evict running jobs (graceful drain)
-- CLI:
-  `osapi node taint --hostname web-01 --key maintenance --effect NoSchedule`
-
-### Node Lifecycle Events (Kubernetes-inspired)
-
-Kubernetes records lifecycle events per node (Joined, BecameReady,
-BecameNotReady, etc.). We could store agent lifecycle events in a dedicated KV
-bucket:
-
-- "agent started" with timestamp and version
-- "agent stopped" (clean shutdown)
-- "heartbeat missed" (detected by TTL expiry watcher)
-- "agent restarted" (same hostname re-registers)
-
-Visible via `node get --hostname X` or a dedicated `node events --hostname X`
-command.
-
-### Consistent Resource Model (Kubernetes-inspired)
-
-Every Kubernetes object has a uniform envelope: `apiVersion`, `kind`, `metadata`
-(name, namespace, labels, annotations, creationTimestamp, uid), `spec`,
-`status`. We could formalize OSAPI resources similarly:
-
-- Each resource type (node, job, audit entry) gets a consistent structure
-- `metadata.labels`, `metadata.annotations`, `metadata.createdAt` on every
-  resource
-- Annotations (separate from labels) for non-routing metadata
-- Enables generic tooling: filtering, sorting, field selectors
-
-### Agent States (systemd-inspired)
-
-Systemd units have explicit states: Active, Inactive, Failed, Activating,
-Deactivating. Currently we only have "present in KV = alive". Adding explicit
-states would enable:
-
-- `Starting` - agent is initializing, not yet processing jobs
-- `Ready` - agent is healthy and processing jobs
-- `Draining` - agent is shutting down gracefully, finishing in-flight jobs but
-  not accepting new ones
-- `Stopped` - clean shutdown (deregistered)
-
-State transitions would be visible in the registry and in lifecycle events.
-
-### Restart Tracking (systemd-inspired)
-
-Systemd tracks restart counts and restart reasons. We could add:
-
-- `restart_count` - how many times the agent process has started for this
-  hostname
-- `last_restart_reason` - "clean start", "crash recovery", etc.
-- Stability signal for fleet health dashboards
-
-### Additional State to Save
-
-- **First-seen timestamp** (`started_at`) distinct from last heartbeat
-  (`registered_at`) for true "AGE" display like `kubectl get nodes`
-- **Active job count** - how busy the agent is right now
-- **Agent binary version** - for fleet version tracking and rolling upgrade
-  visibility
-- **OS kernel version** - already available from host provider
-
-## Notes
-
-- These are incremental improvements that build on the heartbeat enrichment
-  work. Each can be implemented independently.
-- Priority should be driven by operational value: conditions and capacity
-  tracking are highest value for fleet operators.
-- Taints and lifecycle events add complexity but enable sophisticated fleet
-  management workflows.
-- The consistent resource model is the most ambitious change and would touch the
-  most code, but pays off long-term for tooling and API consistency.
diff --git a/docs/docs/sidebar/features/agent-lifecycle.md b/docs/docs/sidebar/features/agent-lifecycle.md
new file mode 100644
index 00000000..b3b83dde
--- /dev/null
+++ b/docs/docs/sidebar/features/agent-lifecycle.md
@@ -0,0 +1,132 @@
+---
+sidebar_position: 4
+---
+
+# Agent Lifecycle
+
+OSAPI agents report threshold-based **node conditions** and support graceful
+**drain/cordon** for maintenance. Both features are inspired by Kubernetes node
+management patterns.
+
+## Node Conditions
+
+Conditions are threshold-based booleans evaluated agent-side on every heartbeat
+(10 seconds). They surface "is anything wrong?" at a glance without requiring
+operators to interpret raw metrics.
+
+| Condition        | Default Threshold    | Data Source      |
+| ---------------- | -------------------- | ---------------- |
+| `MemoryPressure` | Memory used > 90%    | Heartbeat memory |
+| `HighLoad`       | Load1 > 2x CPU count | Heartbeat load   |
+| `DiskPressure`   | Any disk > 90% used  | Heartbeat disk   |
+
+Each condition tracks:
+
+- **Status** -- `true` when the threshold is exceeded, `false` otherwise
+- **Reason** -- human-readable explanation (e.g., "memory 94% used, 15.1/16.0
+  GB")
+- **LastTransitionTime** -- when the condition last flipped between true and
+  false
+
+### CLI Display
+
+`agent list` shows active conditions in the CONDITIONS column:
+
+```
+HOSTNAME  STATUS  CONDITIONS               LABELS  AGE    LOAD (1m)  OS
+web-01    Ready   HighLoad,MemoryPressure  -       3d 4h  4.12       Ubuntu 24.04
+web-02    Ready   -                        -       12h    0.31       Ubuntu 24.04
+db-01     Ready   DiskPressure             -       5d     1.22       Ubuntu 24.04
+```
+
+`agent get` shows full condition details:
+
+```
+Conditions:
+  TYPE              STATUS  REASON                                     SINCE
+  MemoryPressure    true    memory 94% used (15.1/16.0 GB)             2m ago
+  HighLoad          true    load 4.12, threshold 4.00 for 2 CPUs       5m ago
+  DiskPressure      false
+```
+
+### Configuration
+
+Thresholds are configurable in `osapi.yaml`:
+
+```yaml
+agent:
+  conditions:
+    memory_pressure_threshold: 90 # percent used
+    high_load_multiplier: 2.0 # load1 / cpu_count
+    disk_pressure_threshold: 90 # percent used
+```
+
+## Agent Drain
+
+Drain allows operators to gracefully remove an agent from the job routing pool
+for maintenance without stopping the process. When an agent stops without
+draining, it vanishes from the registry and looks identical to a crash.
+
+### State Machine
+
+Agents have an explicit scheduling state with three values:
+
+```
+Ready ──(drain)──> Draining ──(jobs done)──> Cordoned
+  ^                                              │
+  └──────────────(undrain)───────────────────────┘
+```
+
+| State      | Meaning                                     |
+| ---------- | ------------------------------------------- |
+| `Ready`    | Accepting and processing jobs (default)     |
+| `Draining` | Finishing in-flight jobs, not accepting new |
+| `Cordoned` | Fully drained, idle, not accepting jobs     |
+
+### How It Works
+
+1. Operator calls `osapi client agent drain --hostname web-01`
+2. API writes a `drain.{hostname}` key to the state KV bucket
+3. Agent detects the drain flag on its next heartbeat tick (10s)
+4. Agent transitions to `Draining` and **unsubscribes from NATS JetStream
+   consumers** -- this is how it stops receiving new jobs
+5. In-flight jobs continue to completion
+6. Once all in-flight jobs finish, state becomes `Cordoned`
+7. Operator calls `osapi client agent undrain --hostname web-01`
+8. API deletes the drain key; agent resubscribes and transitions to `Ready`
+
+### Timeline
+
+Every state transition is recorded as an append-only event in the state KV
+bucket (`agent-state`, no TTL). `agent get` shows the full transition history:
+
+```
+Timeline:
+  TIMESTAMP              EVENT      HOSTNAME  MESSAGE
+  2026-03-05 10:00:00    drain      web-01    Drain initiated
+  2026-03-05 10:05:23    cordoned   web-01    All jobs completed
+  2026-03-05 12:00:00    undrain    web-01    Resumed accepting jobs
+```
+
+### CLI Commands
+
+```bash
+osapi client agent drain --hostname web-01     # start draining
+osapi client agent undrain --hostname web-01   # resume accepting jobs
+```
+
+Both commands return the current state and a confirmation message.
+
+## Permissions
+
+Node conditions are included in the standard `agent:read` responses. Drain and
+undrain operations require the `agent:write` permission, which is included in
+the `admin` role by default.
+
+## Related
+
+- [Agent CLI Reference](../usage/cli/client/agent/agent.mdx) -- agent fleet
+  commands
+- [Node Management](node-management.md) -- node queries via the job system
+- [Job System](job-system.md) -- how async job processing works
+- [Configuration](../usage/configuration.md) -- full configuration reference
diff --git a/docs/docs/sidebar/features/authentication.md b/docs/docs/sidebar/features/authentication.md
index 7654352d..88d7631d 100644
--- a/docs/docs/sidebar/features/authentication.md
+++ b/docs/docs/sidebar/features/authentication.md
@@ -60,11 +60,11 @@ flowchart TD
 
 Built-in roles expand to these default permissions:
 
-| Role    | Permissions                                                                                        |
-| ------- | -------------------------------------------------------------------------------------------------- |
-| `admin` | `node:read`, `network:read`, `network:write`, `job:read`, `job:write`, `health:read`, `audit:read` |
-| `write` | `node:read`, `network:read`, `network:write`, `job:read`, `job:write`, `health:read`               |
-| `read`  | `node:read`, `network:read`, `job:read`, `health:read`                                             |
+| Role    | Permissions                                                                                                                     |
+| ------- | ------------------------------------------------------------------------------------------------------------------------------- |
+| `admin` | `agent:read`, `agent:write`, `node:read`, `network:read`, `network:write`, `job:read`, `job:write`, `health:read`, `audit:read` |
+| `write` | `agent:read`, `node:read`, `network:read`, `network:write`, `job:read`, `job:write`, `health:read`                              |
+| `read`  | `agent:read`, `node:read`, `network:read`, `job:read`, `health:read`                                                            |
 
 ### Custom Roles
 
diff --git a/docs/docs/sidebar/features/node-management.md b/docs/docs/sidebar/features/node-management.md
index 9a857ebf..a9eb931d 100644
--- a/docs/docs/sidebar/features/node-management.md
+++ b/docs/docs/sidebar/features/node-management.md
@@ -14,13 +14,15 @@ host.
 OSAPI separates agent fleet discovery from node system queries:
 
 - **Agent** commands (`agent list`, `agent get`) read directly from the NATS KV
-  heartbeat registry. They show which agents are online, their labels, and
-  lightweight metrics from the last heartbeat. No jobs are created. Agents also
-  expose typed **system facts** (architecture, kernel version, FQDN, CPU count,
-  network interfaces, service manager, package manager) gathered every 60
-  seconds via providers and stored in a separate `agent-facts` KV bucket with a
-  5-minute TTL. The API merges registry and facts data into a single `AgentInfo`
-  response.
+  heartbeat registry. They show which agents are online, their labels,
+  lightweight metrics, and [node conditions](agent-lifecycle.md) from the last
+  heartbeat. No jobs are created. Agents also expose typed **system facts**
+  (architecture, kernel version, FQDN, CPU count, network interfaces, service
+  manager, package manager) gathered every 60 seconds via providers and stored
+  in a separate `agent-facts` KV bucket with a 5-minute TTL. The API merges
+  registry and facts data into a single `AgentInfo` response. Agents can be
+  [drained](agent-lifecycle.md#agent-drain) for maintenance without stopping the
+  process.
 - **Node** commands (`node hostname`, `node status`) dispatch jobs to agents
   that execute system commands and return detailed results (disk usage, full
   memory breakdown, etc.).
diff --git a/docs/docs/sidebar/usage/cli/client/agent/drain.md b/docs/docs/sidebar/usage/cli/client/agent/drain.md
new file mode 100644
index 00000000..5b35701e
--- /dev/null
+++ b/docs/docs/sidebar/usage/cli/client/agent/drain.md
@@ -0,0 +1,24 @@
+# Drain
+
+Drain an agent to stop it from accepting new jobs. In-flight jobs continue to
+completion:
+
+```bash
+$ osapi client agent drain --hostname web-01
+
+  Hostname: web-01
+  Status: Draining
+  Message: Agent drain initiated
+```
+
+The agent transitions from `Ready` to `Draining`. Once all in-flight jobs
+finish, the state becomes `Cordoned`. The agent stays running and continues
+sending heartbeats -- it just stops pulling new work from the job queue.
+
+Use `agent undrain` to resume accepting jobs.
+
+## Flags
+
+| Flag         | Description                    | Required |
+| ------------ | ------------------------------ | -------- |
+| `--hostname` | Hostname of the agent to drain | Yes      |
diff --git a/docs/docs/sidebar/usage/cli/client/agent/get.md b/docs/docs/sidebar/usage/cli/client/agent/get.md
index 127ed5cc..8bde755f 100644
--- a/docs/docs/sidebar/usage/cli/client/agent/get.md
+++ b/docs/docs/sidebar/usage/cli/client/agent/get.md
@@ -6,6 +6,7 @@ Get detailed information about a specific agent by hostname:
 $ osapi client agent get --hostname web-01
 
   Hostname: web-01                       Status: Ready
+  State: Draining
   Labels: group:web.dev.us-east
   OS: Ubuntu 24.04
   Uptime: 6 days, 3 hours, 54 minutes
@@ -22,29 +23,43 @@ $ osapi client agent get --hostname web-01
   Interfaces:
     eth0: 10.0.1.10 (IPv4), fe80::1 (IPv6), MAC 00:1a:2b:3c:4d:5e
     lo: 127.0.0.1 (IPv4), ::1 (IPv6)
+
+  Conditions:
+    TYPE              STATUS  REASON                                     SINCE
+    MemoryPressure    true    memory 94% used (15.1/16.0 GB)             2m ago
+    HighLoad          true    load 4.12, threshold 4.00 for 2 CPUs       5m ago
+    DiskPressure      false
+
+  Timeline:
+    TIMESTAMP              EVENT      HOSTNAME  MESSAGE
+    2026-03-05 10:00:00    drain      web-01    Drain initiated
+    2026-03-05 10:05:23    cordoned   web-01    All jobs completed
 ```
 
 This command reads directly from the agent heartbeat registry -- no job is
 created. The data comes from the agent's most recent heartbeat write.
 
-| Field        | Description                                         |
-| ------------ | --------------------------------------------------- |
-| Hostname     | Agent's configured or OS hostname                   |
-| Status       | `Ready` if present in registry                      |
-| Labels       | Key-value labels from agent config                  |
-| OS           | Distribution and version                            |
-| Uptime       | System uptime reported by the agent                 |
-| Age          | Time since the agent process started                |
-| Last Seen    | Time since the last heartbeat refresh               |
-| Load         | 1-, 5-, and 15-minute load averages                 |
-| Memory       | Total, used, and free RAM                           |
-| Architecture | CPU architecture (e.g., amd64)                      |
-| Kernel       | OS kernel version                                   |
-| FQDN         | Fully qualified domain name                         |
-| CPUs         | Number of logical CPUs                              |
-| Service Mgr  | Init system (e.g., systemd)                         |
-| Package Mgr  | Package manager (e.g., apt)                         |
-| Interfaces   | Network interfaces with IPv4, IPv6, MAC, and family |
+| Field        | Description                                               |
+| ------------ | --------------------------------------------------------- |
+| Hostname     | Agent's configured or OS hostname                         |
+| Status       | `Ready` if present in registry                            |
+| State        | Scheduling state: `Draining` or `Cordoned` (if not Ready) |
+| Labels       | Key-value labels from agent config                        |
+| OS           | Distribution and version                                  |
+| Uptime       | System uptime reported by the agent                       |
+| Age          | Time since the agent process started                      |
+| Last Seen    | Time since the last heartbeat refresh                     |
+| Load         | 1-, 5-, and 15-minute load averages                       |
+| Memory       | Total, used, and free RAM                                 |
+| Architecture | CPU architecture (e.g., amd64)                            |
+| Kernel       | OS kernel version                                         |
+| FQDN         | Fully qualified domain name                               |
+| CPUs         | Number of logical CPUs                                    |
+| Service Mgr  | Init system (e.g., systemd)                               |
+| Package Mgr  | Package manager (e.g., apt)                               |
+| Interfaces   | Network interfaces with IPv4, IPv6, MAC, and family       |
+| Conditions   | Node conditions table (type, status, reason, since)       |
+| Timeline     | State transition events (timestamp, event, hostname)      |
 
 :::tip agent get vs. node status
 
diff --git a/docs/docs/sidebar/usage/cli/client/agent/list.md b/docs/docs/sidebar/usage/cli/client/agent/list.md
index 4c0f6973..172dc3e7 100644
--- a/docs/docs/sidebar/usage/cli/client/agent/list.md
+++ b/docs/docs/sidebar/usage/cli/client/agent/list.md
@@ -5,25 +5,27 @@ List active agents in the fleet with status, labels, age, and system metrics:
 ```bash
 $ osapi client agent list
 
-  Active Agents (2):
+  Active Agents (3):
 
-  HOSTNAME  STATUS  LABELS                 AGE     LOAD (1m)  OS
-  web-01    Ready   group:web.dev.us-east  3d 4h   1.78       Ubuntu 24.04
-  web-02    Ready   group:web.dev.us-west  12h 5m  0.45       Ubuntu 24.04
+  HOSTNAME  STATUS    CONDITIONS               LABELS                 AGE     LOAD (1m)  OS
+  web-01    Ready     HighLoad,MemoryPressure  group:web.dev.us-east  3d 4h   4.12       Ubuntu 24.04
+  web-02    Ready     -                        group:web.dev.us-west  12h 5m  0.45       Ubuntu 24.04
+  db-01     Cordoned  DiskPressure             -                      5d 2h   1.22       Ubuntu 24.04
 ```
 
 This command reads directly from the agent heartbeat registry -- no job is
 created. Each agent writes a heartbeat every 10 seconds with a 30-second TTL.
 Agents that stop heartbeating disappear from the list automatically.
 
-| Column    | Source                                  |
-| --------- | --------------------------------------- |
-| HOSTNAME  | Agent's configured or OS hostname       |
-| STATUS    | `Ready` if present in registry          |
-| LABELS    | Key-value labels from agent config      |
-| AGE       | Time since the agent process started    |
-| LOAD (1m) | 1-minute load average from heartbeat    |
-| OS        | Distribution and version from heartbeat |
+| Column     | Source                                                          |
+| ---------- | --------------------------------------------------------------- |
+| HOSTNAME   | Agent's configured or OS hostname                               |
+| STATUS     | Scheduling state: `Ready`, `Draining`, or `Cordoned`            |
+| CONDITIONS | Active node conditions (MemoryPressure, HighLoad, DiskPressure) |
+| LABELS     | Key-value labels from agent config                              |
+| AGE        | Time since the agent process started                            |
+| LOAD (1m)  | 1-minute load average from heartbeat                            |
+| OS         | Distribution and version from heartbeat                         |
 
 :::tip Full facts in JSON output
 
diff --git a/docs/docs/sidebar/usage/cli/client/agent/undrain.md b/docs/docs/sidebar/usage/cli/client/agent/undrain.md
new file mode 100644
index 00000000..aaa75ea2
--- /dev/null
+++ b/docs/docs/sidebar/usage/cli/client/agent/undrain.md
@@ -0,0 +1,20 @@
+# Undrain
+
+Resume accepting jobs on a drained or cordoned agent:
+
+```bash
+$ osapi client agent undrain --hostname web-01
+
+  Hostname: web-01
+  Status: Ready
+  Message: Agent undrain initiated
+```
+
+The agent re-subscribes to NATS JetStream consumers and transitions back to
+`Ready`.
+
+## Flags
+
+| Flag         | Description                      | Required |
+| ------------ | -------------------------------- | -------- |
+| `--hostname` | Hostname of the agent to undrain | Yes      |
diff --git a/docs/docs/sidebar/usage/configuration.md b/docs/docs/sidebar/usage/configuration.md
index 46f438f2..843b07ed 100644
--- a/docs/docs/sidebar/usage/configuration.md
+++ b/docs/docs/sidebar/usage/configuration.md
@@ -23,47 +23,53 @@ Every config key can be overridden with an environment variable using the
 `OSAPI_` prefix. Dots and nested keys become underscores, and the name is
 uppercased:
 
-| Config Key                         | Environment Variable                     |
-| ---------------------------------- | ---------------------------------------- |
-| `debug`                            | `OSAPI_DEBUG`                            |
-| `api.server.port`                  | `OSAPI_API_SERVER_PORT`                  |
-| `api.server.nats.host`             | `OSAPI_API_SERVER_NATS_HOST`             |
-| `api.server.nats.port`             | `OSAPI_API_SERVER_NATS_PORT`             |
-| `api.server.nats.client_name`      | `OSAPI_API_SERVER_NATS_CLIENT_NAME`      |
-| `api.server.nats.namespace`        | `OSAPI_API_SERVER_NATS_NAMESPACE`        |
-| `api.server.nats.auth.type`        | `OSAPI_API_SERVER_NATS_AUTH_TYPE`        |
-| `api.server.security.signing_key`  | `OSAPI_API_SERVER_SECURITY_SIGNING_KEY`  |
-| `api.client.security.bearer_token` | `OSAPI_API_CLIENT_SECURITY_BEARER_TOKEN` |
-| `nats.server.host`                 | `OSAPI_NATS_SERVER_HOST`                 |
-| `nats.server.port`                 | `OSAPI_NATS_SERVER_PORT`                 |
-| `nats.server.namespace`            | `OSAPI_NATS_SERVER_NAMESPACE`            |
-| `nats.server.auth.type`            | `OSAPI_NATS_SERVER_AUTH_TYPE`            |
-| `nats.stream.name`                 | `OSAPI_NATS_STREAM_NAME`                 |
-| `nats.kv.bucket`                   | `OSAPI_NATS_KV_BUCKET`                   |
-| `nats.kv.response_bucket`          | `OSAPI_NATS_KV_RESPONSE_BUCKET`          |
-| `nats.audit.bucket`                | `OSAPI_NATS_AUDIT_BUCKET`                |
-| `nats.audit.ttl`                   | `OSAPI_NATS_AUDIT_TTL`                   |
-| `nats.audit.max_bytes`             | `OSAPI_NATS_AUDIT_MAX_BYTES`             |
-| `nats.audit.storage`               | `OSAPI_NATS_AUDIT_STORAGE`               |
-| `nats.audit.replicas`              | `OSAPI_NATS_AUDIT_REPLICAS`              |
-| `nats.registry.bucket`             | `OSAPI_NATS_REGISTRY_BUCKET`             |
-| `nats.registry.ttl`                | `OSAPI_NATS_REGISTRY_TTL`                |
-| `nats.registry.storage`            | `OSAPI_NATS_REGISTRY_STORAGE`            |
-| `nats.registry.replicas`           | `OSAPI_NATS_REGISTRY_REPLICAS`           |
-| `nats.facts.bucket`                | `OSAPI_NATS_FACTS_BUCKET`                |
-| `nats.facts.ttl`                   | `OSAPI_NATS_FACTS_TTL`                   |
-| `nats.facts.storage`               | `OSAPI_NATS_FACTS_STORAGE`               |
-| `nats.facts.replicas`              | `OSAPI_NATS_FACTS_REPLICAS`              |
-| `telemetry.tracing.enabled`        | `OSAPI_TELEMETRY_TRACING_ENABLED`        |
-| `telemetry.tracing.exporter`       | `OSAPI_TELEMETRY_TRACING_EXPORTER`       |
-| `telemetry.tracing.otlp_endpoint`  | `OSAPI_TELEMETRY_TRACING_OTLP_ENDPOINT`  |
-| `agent.nats.host`                  | `OSAPI_AGENT_NATS_HOST`                  |
-| `agent.nats.port`                  | `OSAPI_AGENT_NATS_PORT`                  |
-| `agent.nats.client_name`           | `OSAPI_AGENT_NATS_CLIENT_NAME`           |
-| `agent.nats.namespace`             | `OSAPI_AGENT_NATS_NAMESPACE`             |
-| `agent.nats.auth.type`             | `OSAPI_AGENT_NATS_AUTH_TYPE`             |
-| `agent.hostname`                   | `OSAPI_AGENT_HOSTNAME`                   |
-| `agent.facts.interval`             | `OSAPI_AGENT_FACTS_INTERVAL`             |
+| Config Key                                   | Environment Variable                               |
+| -------------------------------------------- | -------------------------------------------------- |
+| `debug`                                      | `OSAPI_DEBUG`                                      |
+| `api.server.port`                            | `OSAPI_API_SERVER_PORT`                            |
+| `api.server.nats.host`                       | `OSAPI_API_SERVER_NATS_HOST`                       |
+| `api.server.nats.port`                       | `OSAPI_API_SERVER_NATS_PORT`                       |
+| `api.server.nats.client_name`                | `OSAPI_API_SERVER_NATS_CLIENT_NAME`                |
+| `api.server.nats.namespace`                  | `OSAPI_API_SERVER_NATS_NAMESPACE`                  |
+| `api.server.nats.auth.type`                  | `OSAPI_API_SERVER_NATS_AUTH_TYPE`                  |
+| `api.server.security.signing_key`            | `OSAPI_API_SERVER_SECURITY_SIGNING_KEY`            |
+| `api.client.security.bearer_token`           | `OSAPI_API_CLIENT_SECURITY_BEARER_TOKEN`           |
+| `nats.server.host`                           | `OSAPI_NATS_SERVER_HOST`                           |
+| `nats.server.port`                           | `OSAPI_NATS_SERVER_PORT`                           |
+| `nats.server.namespace`                      | `OSAPI_NATS_SERVER_NAMESPACE`                      |
+| `nats.server.auth.type`                      | `OSAPI_NATS_SERVER_AUTH_TYPE`                      |
+| `nats.stream.name`                           | `OSAPI_NATS_STREAM_NAME`                           |
+| `nats.kv.bucket`                             | `OSAPI_NATS_KV_BUCKET`                             |
+| `nats.kv.response_bucket`                    | `OSAPI_NATS_KV_RESPONSE_BUCKET`                    |
+| `nats.audit.bucket`                          | `OSAPI_NATS_AUDIT_BUCKET`                          |
+| `nats.audit.ttl`                             | `OSAPI_NATS_AUDIT_TTL`                             |
+| `nats.audit.max_bytes`                       | `OSAPI_NATS_AUDIT_MAX_BYTES`                       |
+| `nats.audit.storage`                         | `OSAPI_NATS_AUDIT_STORAGE`                         |
+| `nats.audit.replicas`                        | `OSAPI_NATS_AUDIT_REPLICAS`                        |
+| `nats.registry.bucket`                       | `OSAPI_NATS_REGISTRY_BUCKET`                       |
+| `nats.registry.ttl`                          | `OSAPI_NATS_REGISTRY_TTL`                          |
+| `nats.registry.storage`                      | `OSAPI_NATS_REGISTRY_STORAGE`                      |
+| `nats.registry.replicas`                     | `OSAPI_NATS_REGISTRY_REPLICAS`                     |
+| `nats.facts.bucket`                          | `OSAPI_NATS_FACTS_BUCKET`                          |
+| `nats.facts.ttl`                             | `OSAPI_NATS_FACTS_TTL`                             |
+| `nats.facts.storage`                         | `OSAPI_NATS_FACTS_STORAGE`                         |
+| `nats.facts.replicas`                        | `OSAPI_NATS_FACTS_REPLICAS`                        |
+| `nats.state.bucket`                          | `OSAPI_NATS_STATE_BUCKET`                          |
+| `nats.state.storage`                         | `OSAPI_NATS_STATE_STORAGE`                         |
+| `nats.state.replicas`                        | `OSAPI_NATS_STATE_REPLICAS`                        |
+| `telemetry.tracing.enabled`                  | `OSAPI_TELEMETRY_TRACING_ENABLED`                  |
+| `telemetry.tracing.exporter`                 | `OSAPI_TELEMETRY_TRACING_EXPORTER`                 |
+| `telemetry.tracing.otlp_endpoint`            | `OSAPI_TELEMETRY_TRACING_OTLP_ENDPOINT`            |
+| `agent.nats.host`                            | `OSAPI_AGENT_NATS_HOST`                            |
+| `agent.nats.port`                            | `OSAPI_AGENT_NATS_PORT`                            |
+| `agent.nats.client_name`                     | `OSAPI_AGENT_NATS_CLIENT_NAME`                     |
+| `agent.nats.namespace`                       | `OSAPI_AGENT_NATS_NAMESPACE`                       |
+| `agent.nats.auth.type`                       | `OSAPI_AGENT_NATS_AUTH_TYPE`                       |
+| `agent.hostname`                             | `OSAPI_AGENT_HOSTNAME`                             |
+| `agent.facts.interval`                       | `OSAPI_AGENT_FACTS_INTERVAL`                       |
+| `agent.conditions.memory_pressure_threshold` | `OSAPI_AGENT_CONDITIONS_MEMORY_PRESSURE_THRESHOLD` |
+| `agent.conditions.high_load_multiplier`      | `OSAPI_AGENT_CONDITIONS_HIGH_LOAD_MULTIPLIER`      |
+| `agent.conditions.disk_pressure_threshold`   | `OSAPI_AGENT_CONDITIONS_DISK_PRESSURE_THRESHOLD`   |
 
 Environment variables take precedence over file values.
 
@@ -127,11 +133,11 @@ OSAPI uses fine-grained `resource:verb` permissions for access control. Each API
 endpoint requires a specific permission. Built-in roles expand to a default set
 of permissions:
 
-| Role    | Permissions                                                                                                                         |
-| ------- | ----------------------------------------------------------------------------------------------------------------------------------- |
-| `admin` | `agent:read`, `node:read`, `network:read`, `network:write`, `job:read`, `job:write`, `health:read`, `audit:read`, `command:execute` |
-| `write` | `agent:read`, `node:read`, `network:read`, `network:write`, `job:read`, `job:write`, `health:read`                                  |
-| `read`  | `agent:read`, `node:read`, `network:read`, `job:read`, `health:read`                                                                |
+| Role    | Permissions                                                                                                                                        |
+| ------- | -------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `admin` | `agent:read`, `agent:write`, `node:read`, `network:read`, `network:write`, `job:read`, `job:write`, `health:read`, `audit:read`, `command:execute` |
+| `write` | `agent:read`, `node:read`, `network:read`, `network:write`, `job:read`, `job:write`, `health:read`                                                 |
+| `read`  | `agent:read`, `node:read`, `network:read`, `job:read`, `health:read`                                                                               |
 
 ### Custom Roles
 
@@ -226,9 +232,9 @@ api:
           - 'http://localhost:3001'
           - 'https://osapi-io.github.io'
       # Custom roles with fine-grained permissions.
-      # Permissions: agent:read, node:read, network:read, network:write,
-      #              job:read, job:write, health:read, audit:read,
-      #              command:execute
+      # Permissions: agent:read, agent:write, node:read, network:read,
+      #              network:write, job:read, job:write, health:read,
+      #              audit:read, command:execute
       # roles:
       #   ops:
       #     permissions:
@@ -324,6 +330,16 @@ nats:
     # Number of KV replicas.
     replicas: 1
 
+  # ── State KV bucket ──────────────────────────────────────
+  state:
+    # KV bucket for persistent agent state (drain flags, timeline events).
+    # No TTL — operator actions persist indefinitely.
+    bucket: 'agent-state'
+    # Storage backend: "file" or "memory".
+    storage: 'file'
+    # Number of KV replicas.
+    replicas: 1
+
   # ── Dead Letter Queue ─────────────────────────────────────
   dlq:
     # Maximum age of messages in the DLQ.
@@ -379,6 +395,14 @@ agent:
   facts:
     # How often the agent collects and publishes facts.
     interval: '60s'
+  # Node condition thresholds.
+  conditions:
+    # Memory pressure threshold (percent used).
+    memory_pressure_threshold: 90
+    # High load multiplier (load1 / cpu_count).
+    high_load_multiplier: 2.0
+    # Disk pressure threshold (percent used).
+    disk_pressure_threshold: 90
   # Queue group for load-balanced (_any) subscriptions.
   queue_group: 'job-agents'
   # Agent hostname for direct routing. Defaults to the
@@ -481,6 +505,14 @@ agent:
 | `storage`  | string | `"file"` or `"memory"`            |
 | `replicas` | int    | Number of KV replicas             |
 
+### `nats.state`
+
+| Key        | Type   | Description                                   |
+| ---------- | ------ | --------------------------------------------- |
+| `bucket`   | string | KV bucket for persistent agent state (no TTL) |
+| `storage`  | string | `"file"` or `"memory"`                        |
+| `replicas` | int    | Number of KV replicas                         |
+
 ### `nats.dlq`
 
 | Key        | Type   | Description                       |
@@ -500,23 +532,26 @@ agent:
 
 ### `agent`
 
-| Key                        | Type              | Description                              |
-| -------------------------- | ----------------- | ---------------------------------------- |
-| `nats.host`                | string            | NATS server hostname                     |
-| `nats.port`                | int               | NATS server port                         |
-| `nats.client_name`         | string            | NATS client identification name          |
-| `nats.namespace`           | string            | Subject namespace prefix                 |
-| `nats.auth.type`           | string            | Auth type: `none`, `user_pass`           |
-| `nats.auth.username`       | string            | Username for `user_pass` auth            |
-| `nats.auth.password`       | string            | Password for `user_pass` auth            |
-| `consumer.name`            | string            | Durable consumer name                    |
-| `consumer.max_deliver`     | int               | Max redelivery attempts before DLQ       |
-| `consumer.ack_wait`        | string            | ACK timeout (Go duration)                |
-| `consumer.max_ack_pending` | int               | Max outstanding unacknowledged msgs      |
-| `consumer.replay_policy`   | string            | `"instant"` or `"original"`              |
-| `consumer.back_off`        | []string          | Backoff durations between redeliveries   |
-| `queue_group`              | string            | Queue group for load-balanced routing    |
-| `hostname`                 | string            | Agent hostname (defaults to OS hostname) |
-| `max_jobs`                 | int               | Max concurrent jobs                      |
-| `facts.interval`           | string            | How often the agent collects facts       |
-| `labels`                   | map[string]string | Key-value pairs for label-based routing  |
+| Key                                    | Type              | Description                                    |
+| -------------------------------------- | ----------------- | ---------------------------------------------- |
+| `nats.host`                            | string            | NATS server hostname                           |
+| `nats.port`                            | int               | NATS server port                               |
+| `nats.client_name`                     | string            | NATS client identification name                |
+| `nats.namespace`                       | string            | Subject namespace prefix                       |
+| `nats.auth.type`                       | string            | Auth type: `none`, `user_pass`                 |
+| `nats.auth.username`                   | string            | Username for `user_pass` auth                  |
+| `nats.auth.password`                   | string            | Password for `user_pass` auth                  |
+| `consumer.name`                        | string            | Durable consumer name                          |
+| `consumer.max_deliver`                 | int               | Max redelivery attempts before DLQ             |
+| `consumer.ack_wait`                    | string            | ACK timeout (Go duration)                      |
+| `consumer.max_ack_pending`             | int               | Max outstanding unacknowledged msgs            |
+| `consumer.replay_policy`               | string            | `"instant"` or `"original"`                    |
+| `consumer.back_off`                    | []string          | Backoff durations between redeliveries         |
+| `queue_group`                          | string            | Queue group for load-balanced routing          |
+| `hostname`                             | string            | Agent hostname (defaults to OS hostname)       |
+| `max_jobs`                             | int               | Max concurrent jobs                            |
+| `facts.interval`                       | string            | How often the agent collects facts             |
+| `conditions.memory_pressure_threshold` | int               | Memory pressure threshold percent (default 90) |
+| `conditions.high_load_multiplier`      | float             | Load multiplier over CPU count (default 2.0)   |
+| `conditions.disk_pressure_threshold`   | int               | Disk pressure threshold percent (default 90)   |
+| `labels`                               | map[string]string | Key-value pairs for label-based routing        |
diff --git a/docs/docusaurus.config.ts b/docs/docusaurus.config.ts
index bb2a104a..a1927b92 100644
--- a/docs/docusaurus.config.ts
+++ b/docs/docusaurus.config.ts
@@ -90,6 +90,11 @@ const config: Config = {
               label: 'Network Management',
               docId: 'sidebar/features/network-management'
             },
+            {
+              type: 'doc',
+              label: 'Agent Lifecycle',
+              docId: 'sidebar/features/agent-lifecycle'
+            },
             {
               type: 'doc',
               label: 'Job System',
diff --git a/docs/plans/2026-03-05-node-conditions-drain-design.md b/docs/plans/2026-03-05-node-conditions-drain-design.md
new file mode 100644
index 00000000..5d784b22
--- /dev/null
+++ b/docs/plans/2026-03-05-node-conditions-drain-design.md
@@ -0,0 +1,336 @@
+# Node Conditions and Agent Drain
+
+## Context
+
+OSAPI agents collect rich system metrics (memory, load, disk, CPU count) via
+heartbeat and facts, but operators must manually interpret raw numbers to detect
+problems. Kubernetes solves this with node conditions — threshold-based booleans
+that surface "is anything wrong?" at a glance.
+
+Additionally, there's no way to gracefully remove an agent from the job routing
+pool for maintenance without stopping the process entirely. When an agent stops,
+it vanishes from the registry and looks identical to a crash. Kubernetes handles
+this with cordon/drain.
+
+This design adds both features to OSAPI.
+
+## Node Conditions
+
+### Condition Types
+
+Three conditions derived from existing heartbeat and facts data, evaluated
+agent-side on each heartbeat tick (10s):
+
+| Condition        | Default Threshold    | Data Source                                     |
+| ---------------- | -------------------- | ----------------------------------------------- |
+| `MemoryPressure` | memory used > 90%    | `MemoryStats` (heartbeat)                       |
+| `HighLoad`       | load1 > 2× CPU count | `LoadAverages` (heartbeat) + `CPUCount` (facts) |
+| `DiskPressure`   | any disk > 90% used  | `DiskStats` (new in heartbeat)                  |
+
+### Condition Structure
+
+Each condition has:
+
+```go
+type Condition struct {
+    Type               string    `json:"type"`
+    Status             bool      `json:"status"`
+    Reason             string    `json:"reason,omitempty"`
+    LastTransitionTime time.Time `json:"last_transition_time"`
+}
+```
+
+- `Status`: `true` = condition is active (pressure/overload detected)
+- `Reason`: human-readable explanation (e.g., "memory 94% used (15.1/16.0 GB)")
+- `LastTransitionTime`: when the condition last changed from true→false or
+  false→true
+
+### Configuration
+
+Thresholds configurable in `osapi.yaml` with sensible defaults:
+
+```yaml
+agent:
+  conditions:
+    memory_pressure_threshold: 90 # percent used
+    high_load_multiplier: 2.0 # load1 / cpu_count
+    disk_pressure_threshold: 90 # percent used
+```
+
+### Evaluation
+
+Conditions are evaluated in the agent during `writeRegistration()`. The agent
+maintains previous condition state in memory to track `LastTransitionTime` —
+only updated when the boolean flips.
+
+DiskPressure requires adding disk stats to the heartbeat. The existing
+`disk.Provider` already implements `GetUsage()` so the data is available. Disk
+collection joins the existing non-fatal provider pattern: if it fails, the
+DiskPressure condition is simply not evaluated.
+
+### Storage
+
+Conditions are stored as part of `AgentRegistration` in the registry KV bucket.
+No new KV bucket needed.
+
+```go
+type AgentRegistration struct {
+    // ... existing fields ...
+    Conditions []Condition `json:"conditions,omitempty"`
+}
+```
+
+### CLI Display
+
+`agent list` gains a CONDITIONS column showing active conditions:
+
+```
+HOSTNAME    STATUS   CONDITIONS              LOAD   OS
+web-01      Ready    HighLoad,MemoryPressure  4.12   Ubuntu 24.04
+web-02      Ready    -                        0.31   Ubuntu 24.04
+db-01       Ready    DiskPressure             1.22   Ubuntu 24.04
+```
+
+`agent get` shows full condition details and state timeline:
+
+```
+Conditions:
+  MemoryPressure: true  (memory 94% used, 15.1/16.0 GB)  since 2m ago
+  HighLoad: true  (load 4.12, threshold 4.00 for 2 CPUs)  since 5m ago
+  DiskPressure: false
+
+Timeline:
+  TIMESTAMP              EVENT      HOSTNAME    MESSAGE
+  2026-03-05 10:00:00    drain      web-01      Drain initiated
+  2026-03-05 10:05:23    cordoned   web-01      All jobs completed
+  2026-03-05 12:00:00    undrain    web-01      Resumed accepting jobs
+```
+
+## Agent Drain
+
+### State Machine
+
+Agents gain an explicit state field with three values:
+
+```
+Ready ──(drain)──> Draining ──(jobs done)──> Cordoned
+  ^                                              │
+  └──────────────(undrain)───────────────────────┘
+```
+
+| State      | Meaning                                          |
+| ---------- | ------------------------------------------------ |
+| `Ready`    | Accepting and processing jobs (default)          |
+| `Draining` | Finishing in-flight jobs, not accepting new ones |
+| `Cordoned` | Fully drained, idle, not accepting jobs          |
+
+### Mechanism
+
+1. Operator calls `POST /agent/{hostname}/drain`
+2. API writes a `drain.{hostname}` key to the state KV bucket
+3. Agent checks for drain key on each heartbeat tick (10s)
+4. When drain flag detected:
+   - Agent transitions state to `Draining`
+   - Agent unsubscribes from NATS consumer (stops receiving new jobs)
+   - In-flight jobs continue to completion
+5. Once WaitGroup drains (no in-flight jobs), state becomes `Cordoned`
+6. `POST /agent/{hostname}/undrain` deletes the drain key
+7. Agent detects drain key removal on next heartbeat:
+   - Transitions state to `Ready`
+   - Re-subscribes to NATS consumer
+
+### API Endpoints
+
+```
+POST /agent/{hostname}/drain     # Start draining
+POST /agent/{hostname}/undrain   # Resume accepting jobs
+```
+
+Both return 200 on success, 404 if agent not found, 409 if already in the
+requested state.
+
+### Permission
+
+New `agent:write` permission. Added to the `admin` role by default.
+
+### Storage
+
+Agent state transitions are recorded as **append-only events** in the state KV
+bucket (`agent-state`, no TTL), following the same pattern used for job status
+events (see `WriteStatusEvent` in `internal/job/client/agent.go`).
+
+Events reuse the existing `TimelineEvent` type (`internal/job/types.go`) — the
+same type used for job lifecycle events. This type is generic (Timestamp, Event,
+Hostname, Message, Error) and not job-specific:
+
+```
+Key format: timeline.{sanitized_hostname}.{event}.{unix_nano}
+Value:      TimelineEvent JSON
+```
+
+Events: `ready`, `drain`, `cordoned`, `undrain`
+
+On the SDK side, `TimelineEvent` is promoted from `job_types.go` to a shared
+top-level type in `pkg/osapi/types.go`. Both `JobDetail.Timeline` and
+`Agent.Timeline` reference the same type.
+
+Current state is **computed from the latest event**, just like job status is
+computed via `computeStatusFromEvents`. This preserves the full transition
+history (Ready → Draining → Cordoned → Ready → Draining → ...) and eliminates
+race conditions by never updating existing keys.
+
+The drain intent uses a separate key: `drain.{sanitized_hostname}`. The API
+writes this key to signal drain; the agent reads it on heartbeat and writes the
+state transition event. The API deletes the key on undrain.
+
+The `AgentRegistration` also carries the current state for quick reads without
+scanning events:
+
+```go
+type AgentRegistration struct {
+    // ... existing fields ...
+    State string `json:"state,omitempty"` // Ready, Draining, Cordoned
+}
+```
+
+### CLI Commands
+
+```bash
+osapi client agent drain --hostname web-01
+osapi client agent undrain --hostname web-01
+```
+
+`agent list` and `agent get` show the state in the STATUS column.
+
+## OpenAPI Changes
+
+### AgentInfo Schema
+
+Add to existing `AgentInfo`:
+
+```yaml
+state:
+  type: string
+  enum: [Ready, Draining, Cordoned]
+  description: Agent scheduling state.
+conditions:
+  type: array
+  items:
+    $ref: '#/components/schemas/NodeCondition'
+```
+
+New schema:
+
+```yaml
+NodeCondition:
+  type: object
+  properties:
+    type:
+      type: string
+      enum: [MemoryPressure, HighLoad, DiskPressure]
+    status:
+      type: boolean
+    reason:
+      type: string
+    last_transition_time:
+      type: string
+      format: date-time
+  required: [type, status, last_transition_time]
+```
+
+### New Endpoints
+
+```yaml
+/agent/{hostname}/drain:
+  post:
+    summary: Drain an agent
+    description: Stop the agent from accepting new jobs.
+    security:
+      - BearerAuth: []
+    responses:
+      200: ...
+      404: ...
+      409: ...
+
+/agent/{hostname}/undrain:
+  post:
+    summary: Undrain an agent
+    description: Resume accepting jobs on a drained agent.
+    security:
+      - BearerAuth: []
+    responses:
+      200: ...
+      404: ...
+      409: ...
+```
+
+### Permission Updates
+
+```yaml
+# New permission
+agent:write
+
+# Updated admin role
+admin:
+  permissions:
+    - agent:read
+    - agent:write    # new
+    - node:read
+    - ...
+```
+
+## Implementation Scope
+
+### Provider Changes
+
+- Extend heartbeat to collect disk stats (reuse existing `disk.Provider`)
+- Add condition evaluation logic to agent heartbeat
+
+### Agent Changes
+
+- Add `Condition` type and evaluation functions
+- Add state field to `AgentRegistration`
+- Add drain flag detection on heartbeat tick
+- Add consumer subscribe/unsubscribe for drain/undrain transitions
+- Add condition threshold config support
+
+### API Changes
+
+- New drain/undrain endpoints in the agent API domain
+- Extend `AgentInfo` schema with `state` and `conditions`
+- Add `agent:write` permission and wire into scope middleware
+
+### CLI Changes
+
+- `agent drain` and `agent undrain` commands
+- CONDITIONS column in `agent list`
+- Condition details and state timeline in `agent get`
+- State shown in STATUS column
+
+### SDK Changes
+
+- Promote `TimelineEvent` from `job_types.go` to shared `types.go`
+- Both `JobDetail.Timeline` and `Agent.Timeline` use the same type
+- Add `Agent.Drain()` and `Agent.Undrain()` methods
+- Add conditions, state, and timeline to `Agent` type
+
+### Config Changes
+
+- `agent.conditions` section with threshold defaults
+
+## Testing
+
+- **Unit**: condition evaluation logic (threshold math, transition tracking),
+  state machine transitions, drain flag detection
+- **HTTP wiring**: drain/undrain endpoints with RBAC (401, 403, 200, 404, 409)
+- **Integration**: drain agent → submit job → verify not routed to drained agent
+  → undrain → verify jobs resume
+
+## Verification
+
+```bash
+just generate        # regenerate specs + code
+go build ./...       # compiles
+just go::unit        # tests pass
+just go::vet         # lint passes
+```
diff --git a/docs/plans/2026-03-05-node-conditions-drain.md b/docs/plans/2026-03-05-node-conditions-drain.md
new file mode 100644
index 00000000..5227ccdd
--- /dev/null
+++ b/docs/plans/2026-03-05-node-conditions-drain.md
@@ -0,0 +1,1387 @@
+# Node Conditions & Agent Drain Implementation Plan
+
+> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to
+> implement this plan task-by-task.
+
+**Goal:** Add Kubernetes-inspired node conditions (MemoryPressure, HighLoad,
+DiskPressure) and agent drain/cordon lifecycle to OSAPI.
+
+**Architecture:** Conditions are evaluated agent-side on each heartbeat tick
+using existing provider data, stored in AgentRegistration. Drain uses
+append-only timeline events in the registry KV bucket (reusing the existing
+`TimelineEvent` type from job lifecycle), with a separate drain intent key the
+API writes and the agent reads on heartbeat. State transitions trigger NATS
+consumer subscribe/unsubscribe.
+
+**Tech Stack:** Go 1.25, NATS JetStream KV, Echo REST API, OpenAPI codegen,
+testify/suite
+
+**Design Doc:** `docs/plans/2026-03-05-node-conditions-drain-design.md`
+
+---
+
+## Task 1: Add Condition type and evaluation functions
+
+**Files:**
+
+- Create: `internal/agent/condition.go`
+- Create: `internal/agent/condition_test.go`
+
+**Step 1: Write the failing tests**
+
+```go
+// internal/agent/condition_test.go
+package agent
+
+import (
+    "testing"
+    "time"
+
+    "github.com/stretchr/testify/suite"
+
+    "github.com/retr0h/osapi/internal/job"
+    "github.com/retr0h/osapi/internal/provider/node/disk"
+    "github.com/retr0h/osapi/internal/provider/node/load"
+    "github.com/retr0h/osapi/internal/provider/node/mem"
+)
+
+type ConditionTestSuite struct {
+    suite.Suite
+}
+
+func TestConditionTestSuite(t *testing.T) {
+    suite.Run(t, new(ConditionTestSuite))
+}
+
+func (s *ConditionTestSuite) TestEvaluateMemoryPressure() {
+    tests := []struct {
+        name       string
+        stats      *mem.Stats
+        threshold  int
+        wantStatus bool
+        wantReason string
+    }{
+        {
+            name:       "above threshold",
+            stats:      &mem.Stats{Total: 16000000000, Used: 15000000000, Free: 1000000000},
+            threshold:  90,
+            wantStatus: true,
+        },
+        {
+            name:       "below threshold",
+            stats:      &mem.Stats{Total: 16000000000, Used: 8000000000, Free: 8000000000},
+            threshold:  90,
+            wantStatus: false,
+        },
+        {
+            name:       "nil stats",
+            stats:      nil,
+            threshold:  90,
+            wantStatus: false,
+        },
+    }
+
+    for _, tt := range tests {
+        s.Run(tt.name, func() {
+            c := evaluateMemoryPressure(tt.stats, tt.threshold, nil)
+            s.Equal(tt.wantStatus, c.Status)
+            s.Equal(job.ConditionMemoryPressure, c.Type)
+        })
+    }
+}
+
+func (s *ConditionTestSuite) TestEvaluateHighLoad() {
+    tests := []struct {
+        name       string
+        loadAvg    *load.AverageStats
+        cpuCount   int
+        multiplier float64
+        wantStatus bool
+    }{
+        {
+            name:       "above threshold",
+            loadAvg:    &load.AverageStats{OneMin: 5.0},
+            cpuCount:   2,
+            multiplier: 2.0,
+            wantStatus: true,
+        },
+        {
+            name:       "below threshold",
+            loadAvg:    &load.AverageStats{OneMin: 1.0},
+            cpuCount:   2,
+            multiplier: 2.0,
+            wantStatus: false,
+        },
+        {
+            name:       "nil load",
+            loadAvg:    nil,
+            cpuCount:   2,
+            multiplier: 2.0,
+            wantStatus: false,
+        },
+        {
+            name:       "zero cpus",
+            loadAvg:    &load.AverageStats{OneMin: 5.0},
+            cpuCount:   0,
+            multiplier: 2.0,
+            wantStatus: false,
+        },
+    }
+
+    for _, tt := range tests {
+        s.Run(tt.name, func() {
+            c := evaluateHighLoad(tt.loadAvg, tt.cpuCount, tt.multiplier, nil)
+            s.Equal(tt.wantStatus, c.Status)
+            s.Equal(job.ConditionHighLoad, c.Type)
+        })
+    }
+}
+
+func (s *ConditionTestSuite) TestEvaluateDiskPressure() {
+    tests := []struct {
+        name       string
+        disks      []disk.UsageStats
+        threshold  int
+        wantStatus bool
+    }{
+        {
+            name: "one disk above threshold",
+            disks: []disk.UsageStats{
+                {Name: "/dev/sda1", Total: 100000, Used: 95000, Free: 5000},
+            },
+            threshold:  90,
+            wantStatus: true,
+        },
+        {
+            name: "all disks below threshold",
+            disks: []disk.UsageStats{
+                {Name: "/dev/sda1", Total: 100000, Used: 50000, Free: 50000},
+            },
+            threshold:  90,
+            wantStatus: false,
+        },
+        {
+            name:       "nil disks",
+            disks:      nil,
+            threshold:  90,
+            wantStatus: false,
+        },
+    }
+
+    for _, tt := range tests {
+        s.Run(tt.name, func() {
+            c := evaluateDiskPressure(tt.disks, tt.threshold, nil)
+            s.Equal(tt.wantStatus, c.Status)
+            s.Equal(job.ConditionDiskPressure, c.Type)
+        })
+    }
+}
+
+func (s *ConditionTestSuite) TestLastTransitionTimeTracking() {
+    prev := []job.Condition{{
+        Type: job.ConditionMemoryPressure, Status: false,
+        LastTransitionTime: time.Now().Add(-5 * time.Minute),
+    }}
+    // Flip from false -> true: should update LastTransitionTime
+    c := evaluateMemoryPressure(
+        &mem.Stats{Total: 100, Used: 95, Free: 5}, 90, prev,
+    )
+    s.True(c.Status)
+    s.True(c.LastTransitionTime.After(time.Now().Add(-1 * time.Second)))
+
+    // Same status (true -> true): should keep old LastTransitionTime
+    prev2 := []job.Condition{c}
+    c2 := evaluateMemoryPressure(
+        &mem.Stats{Total: 100, Used: 95, Free: 5}, 90, prev2,
+    )
+    s.True(c2.Status)
+    s.Equal(c.LastTransitionTime, c2.LastTransitionTime)
+}
+```
+
+**Step 2: Run tests to verify they fail**
+
+Run: `go test -run TestConditionTestSuite -v ./internal/agent/` Expected: FAIL —
+`evaluateMemoryPressure` not defined
+
+**Step 3: Write minimal implementation**
+
+```go
+// internal/agent/condition.go
+package agent
+
+import (
+    "fmt"
+    "time"
+
+    "github.com/retr0h/osapi/internal/job"
+    "github.com/retr0h/osapi/internal/provider/node/disk"
+    "github.com/retr0h/osapi/internal/provider/node/load"
+    "github.com/retr0h/osapi/internal/provider/node/mem"
+)
+
+// findPrevCondition returns the previous condition of the given type,
+// or nil if not found.
+func findPrevCondition(
+    condType string,
+    prev []job.Condition,
+) *job.Condition {
+    for i := range prev {
+        if prev[i].Type == condType {
+            return &prev[i]
+        }
+    }
+    return nil
+}
+
+// transitionTime returns the previous LastTransitionTime if status
+// hasn't changed, otherwise returns now.
+func transitionTime(
+    condType string,
+    newStatus bool,
+    prev []job.Condition,
+) time.Time {
+    if p := findPrevCondition(condType, prev); p != nil {
+        if p.Status == newStatus {
+            return p.LastTransitionTime
+        }
+    }
+    return time.Now()
+}
+
+func evaluateMemoryPressure(
+    stats *mem.Stats,
+    threshold int,
+    prev []job.Condition,
+) job.Condition {
+    c := job.Condition{Type: job.ConditionMemoryPressure}
+    if stats == nil || stats.Total == 0 {
+        c.LastTransitionTime = transitionTime(c.Type, false, prev)
+        return c
+    }
+    pct := float64(stats.Used) / float64(stats.Total) * 100
+    c.Status = pct > float64(threshold)
+    if c.Status {
+        c.Reason = fmt.Sprintf(
+            "memory %.0f%% used (%.1f/%.1f GB)",
+            pct,
+            float64(stats.Used)/1024/1024/1024,
+            float64(stats.Total)/1024/1024/1024,
+        )
+    }
+    c.LastTransitionTime = transitionTime(c.Type, c.Status, prev)
+    return c
+}
+
+func evaluateHighLoad(
+    loadAvg *load.AverageStats,
+    cpuCount int,
+    multiplier float64,
+    prev []job.Condition,
+) job.Condition {
+    c := job.Condition{Type: job.ConditionHighLoad}
+    if loadAvg == nil || cpuCount == 0 {
+        c.LastTransitionTime = transitionTime(c.Type, false, prev)
+        return c
+    }
+    threshold := float64(cpuCount) * multiplier
+    c.Status = loadAvg.OneMin > threshold
+    if c.Status {
+        c.Reason = fmt.Sprintf(
+            "load %.2f, threshold %.2f for %d CPUs",
+            loadAvg.OneMin, threshold, cpuCount,
+        )
+    }
+    c.LastTransitionTime = transitionTime(c.Type, c.Status, prev)
+    return c
+}
+
+func evaluateDiskPressure(
+    disks []disk.UsageStats,
+    threshold int,
+    prev []job.Condition,
+) job.Condition {
+    c := job.Condition{Type: job.ConditionDiskPressure}
+    if len(disks) == 0 {
+        c.LastTransitionTime = transitionTime(c.Type, false, prev)
+        return c
+    }
+    for _, d := range disks {
+        if d.Total == 0 {
+            continue
+        }
+        pct := float64(d.Used) / float64(d.Total) * 100
+        if pct > float64(threshold) {
+            c.Status = true
+            c.Reason = fmt.Sprintf(
+                "%s %.0f%% used (%.1f/%.1f GB)",
+                d.Name, pct,
+                float64(d.Used)/1024/1024/1024,
+                float64(d.Total)/1024/1024/1024,
+            )
+            break
+        }
+    }
+    c.LastTransitionTime = transitionTime(c.Type, c.Status, prev)
+    return c
+}
+```
+
+**Step 4: Run tests to verify they pass**
+
+Run: `go test -run TestConditionTestSuite -v ./internal/agent/` Expected: PASS
+
+**Step 5: Commit**
+
+```bash
+git add internal/agent/condition.go internal/agent/condition_test.go
+git commit -m "feat(agent): add condition evaluation functions"
+```
+
+---
+
+## Task 2: Add Condition and State types to job domain
+
+**Files:**
+
+- Modify: `internal/job/types.go:273-331` (AgentRegistration, AgentInfo)
+
+**Step 1: Write the types**
+
+Add to `internal/job/types.go` after existing types:
+
+```go
+// Condition type constants.
+const (
+    ConditionMemoryPressure = "MemoryPressure"
+    ConditionHighLoad       = "HighLoad"
+    ConditionDiskPressure   = "DiskPressure"
+)
+
+// Agent state constants.
+const (
+    AgentStateReady    = "Ready"
+    AgentStateDraining = "Draining"
+    AgentStateCordoned = "Cordoned"
+)
+
+// Condition represents a node condition evaluated agent-side.
+type Condition struct {
+    Type               string    `json:"type"`
+    Status             bool      `json:"status"`
+    Reason             string    `json:"reason,omitempty"`
+    LastTransitionTime time.Time `json:"last_transition_time"`
+}
+
+```
+
+The existing `TimelineEvent` type (line 177) is already generic and will be
+reused for agent state transitions — no new event type needed.
+
+Add fields to `AgentRegistration`:
+
+```go
+Conditions []Condition `json:"conditions,omitempty"`
+State      string      `json:"state,omitempty"`
+```
+
+Add fields to `AgentInfo`:
+
+```go
+Conditions []Condition      `json:"conditions,omitempty"`
+State      string           `json:"state,omitempty"`
+Timeline   []TimelineEvent  `json:"timeline,omitempty"`
+```
+
+**Step 2: Run existing tests**
+
+Run: `go test ./internal/job/... -count=1` Expected: PASS (additive change)
+
+**Step 3: Commit**
+
+```bash
+git add internal/job/types.go
+git commit -m "feat(job): add Condition type and agent state constants"
+```
+
+---
+
+## Task 3: Add conditions config to AgentConfig
+
+**Files:**
+
+- Modify: `internal/config/types.go:262-277`
+- Modify: `configs/osapi.yaml`
+- Modify: `configs/osapi.local.yaml`
+
+**Step 1: Add config struct**
+
+Add to `internal/config/types.go`:
+
+```go
+// AgentConditions holds threshold configuration for node conditions.
+type AgentConditions struct {
+    MemoryPressureThreshold int     `mapstructure:"memory_pressure_threshold"`
+    HighLoadMultiplier      float64 `mapstructure:"high_load_multiplier"`
+    DiskPressureThreshold   int     `mapstructure:"disk_pressure_threshold"`
+}
+```
+
+Add field to `AgentConfig`:
+
+```go
+Conditions AgentConditions `mapstructure:"conditions,omitempty"`
+```
+
+**Step 2: Set defaults in osapi.yaml and osapi.local.yaml**
+
+```yaml
+agent:
+  conditions:
+    memory_pressure_threshold: 90
+    high_load_multiplier: 2.0
+    disk_pressure_threshold: 90
+```
+
+**Step 3: Verify compilation**
+
+Run: `go build ./...` Expected: compiles
+
+**Step 4: Commit**
+
+```bash
+git add internal/config/types.go configs/osapi.yaml configs/osapi.local.yaml
+git commit -m "feat(config): add agent conditions threshold configuration"
+```
+
+---
+
+## Task 4: Add disk stats to heartbeat and evaluate conditions
+
+**Files:**
+
+- Modify: `internal/agent/heartbeat.go:88-134` (writeRegistration)
+- Modify: `internal/agent/types.go:45-81` (add prevConditions, cpuCount)
+
+**Step 1: Add fields to Agent struct**
+
+In `internal/agent/types.go`, add to Agent struct:
+
+```go
+// prevConditions tracks condition state between heartbeats.
+prevConditions []job.Condition
+
+// cpuCount cached from facts for HighLoad evaluation.
+cpuCount int
+```
+
+**Step 2: Extend writeRegistration**
+
+In `internal/agent/heartbeat.go`, after memory stats collection (~line 111),
+add:
+
+```go
+// Collect disk stats (non-fatal).
+var diskStats []disk.UsageStats
+if stats, err := a.diskProvider.GetLocalUsageStats(); err == nil {
+    diskStats = stats
+}
+
+// Evaluate conditions.
+conditions := []job.Condition{
+    evaluateMemoryPressure(
+        memStats,
+        a.appConfig.Agent.Conditions.MemoryPressureThreshold,
+        a.prevConditions,
+    ),
+    evaluateHighLoad(
+        loadAvg,
+        a.cpuCount,
+        a.appConfig.Agent.Conditions.HighLoadMultiplier,
+        a.prevConditions,
+    ),
+    evaluateDiskPressure(
+        diskStats,
+        a.appConfig.Agent.Conditions.DiskPressureThreshold,
+        a.prevConditions,
+    ),
+}
+a.prevConditions = conditions
+```
+
+Add `Conditions: conditions` to the `AgentRegistration` literal.
+
+**Step 3: Set cpuCount from facts**
+
+In `internal/agent/facts.go` (the `writeFacts` function), after collecting
+`CPUCount`, add:
+
+```go
+a.cpuCount = cpuCount
+```
+
+**Step 4: Run tests**
+
+Run: `go test ./internal/agent/... -count=1` Expected: PASS
+
+**Step 5: Commit**
+
+```bash
+git add internal/agent/heartbeat.go internal/agent/types.go internal/agent/facts.go
+git commit -m "feat(agent): evaluate node conditions on heartbeat tick"
+```
+
+---
+
+## Task 5: Add drain timeline event storage functions
+
+**Files:**
+
+- Modify: `internal/job/client/agent.go:39-85`
+- Create: `internal/job/client/agent_timeline_test.go`
+
+**Step 1: Write failing tests**
+
+```go
+// internal/job/client/agent_timeline_test.go
+package client_test
+
+// Test WriteAgentTimelineEvent writes append-only key to registryKV.
+// Test ComputeAgentState returns latest state from timeline events.
+// Test GetAgentTimeline returns sorted timeline events.
+```
+
+Table-driven tests:
+
+- `WriteAgentTimelineEvent` writes key like
+  `timeline.{hostname}.{event}.{unix_nano}`
+- `ComputeAgentState` with no events returns "Ready"
+- `ComputeAgentState` with drain event returns "Draining"
+- `ComputeAgentState` with cordoned event returns "Cordoned"
+- `ComputeAgentState` with undrain event returns "Ready"
+
+**Step 2: Run tests to verify they fail**
+
+Run: `go test -run TestAgentTimeline -v ./internal/job/client/` Expected: FAIL
+
+**Step 3: Implement**
+
+Add to `internal/job/client/agent.go`:
+
+```go
+// WriteAgentTimelineEvent writes an append-only timeline event
+// for an agent state transition. Reuses the same TimelineEvent
+// type used by job lifecycle events.
+func (c *Client) WriteAgentTimelineEvent(
+    _ context.Context,
+    hostname, event, message string,
+) error {
+    now := time.Now()
+    key := fmt.Sprintf(
+        "timeline.%s.%s.%d",
+        job.SanitizeHostname(hostname),
+        event,
+        now.UnixNano(),
+    )
+    data, _ := json.Marshal(job.TimelineEvent{
+        Timestamp: now,
+        Event:     event,
+        Hostname:  hostname,
+        Message:   message,
+    })
+    _, err := c.registryKV.Put(key, data)
+    return err
+}
+
+// GetAgentTimeline returns sorted timeline events for a hostname.
+func (c *Client) GetAgentTimeline(
+    ctx context.Context,
+    hostname string,
+) ([]job.TimelineEvent, error) {
+    prefix := "timeline." + job.SanitizeHostname(hostname) + "."
+    // List keys with prefix, unmarshal, sort by Timestamp
+    // Return sorted events
+}
+
+// ComputeAgentState returns the current state from timeline events.
+func ComputeAgentState(
+    events []job.TimelineEvent,
+) string {
+    if len(events) == 0 {
+        return job.AgentStateReady
+    }
+    latest := events[len(events)-1]
+    switch latest.Event {
+    case "drain":
+        return job.AgentStateDraining
+    case "cordoned":
+        return job.AgentStateCordoned
+    case "undrain", "ready":
+        return job.AgentStateReady
+    default:
+        return job.AgentStateReady
+    }
+}
+```
+
+Add `WriteAgentTimelineEvent`, `GetAgentTimeline` to the `JobClient` interface
+in `internal/job/client/types.go`. Regenerate mocks.
+
+**Step 4: Run tests**
+
+Run: `go test -run TestAgentTimeline -v ./internal/job/client/` Expected: PASS
+
+**Step 5: Commit**
+
+```bash
+git add internal/job/client/agent.go internal/job/client/agent_timeline_test.go \
+    internal/job/client/types.go internal/job/client/mock_*.go
+git commit -m "feat(job): add append-only timeline events for agent drain"
+```
+
+---
+
+## Task 6: Add drain/undrain API endpoints
+
+**Files:**
+
+- Modify: `internal/api/agent/gen/api.yaml`
+- Create: `internal/api/agent/agent_drain.go`
+- Create: `internal/api/agent/agent_drain_public_test.go`
+
+**Step 1: Add to OpenAPI spec**
+
+Add to `internal/api/agent/gen/api.yaml`:
+
+```yaml
+/agent/{hostname}/drain:
+  post:
+    operationId: drainAgent
+    summary: Drain an agent
+    description: Stop the agent from accepting new jobs.
+    security:
+      - BearerAuth:
+          - 'agent:write'
+    parameters:
+      - name: hostname
+        in: path
+        required: true
+        schema:
+          type: string
+    responses:
+      '200':
+        description: Agent drain initiated.
+        content:
+          application/json:
+            schema:
+              type: object
+              properties:
+                message:
+                  type: string
+      '404':
+        description: Agent not found.
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/ErrorResponse'
+      '409':
+        description: Agent already in requested state.
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/ErrorResponse'
+
+/agent/{hostname}/undrain:
+  post:
+    operationId: undrainAgent
+    summary: Undrain an agent
+    description: Resume accepting jobs on a drained agent.
+    security:
+      - BearerAuth:
+          - 'agent:write'
+    parameters:
+      - name: hostname
+        in: path
+        required: true
+        schema:
+          type: string
+    responses:
+      '200':
+        description: Agent undrain initiated.
+        content:
+          application/json:
+            schema:
+              type: object
+              properties:
+                message:
+                  type: string
+      '404':
+        description: Agent not found.
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/ErrorResponse'
+      '409':
+        description: Agent not in draining/cordoned state.
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/ErrorResponse'
+```
+
+Add `agent:write` to BearerAuth scopes. Add `state` and `conditions` fields to
+`AgentInfo` schema. Add `NodeCondition` schema.
+
+Run: `just generate` to regenerate `*.gen.go`.
+
+**Step 2: Write failing tests**
+
+```go
+// internal/api/agent/agent_drain_public_test.go
+// Table-driven tests for DrainAgent and UndrainAgent:
+// - 200: agent found and drain initiated
+// - 404: agent not found
+// - 409: already draining/cordoned
+// - HTTP wiring: RBAC (401, 403 without agent:write, 200 with agent:write)
+```
+
+**Step 3: Implement handlers**
+
+```go
+// internal/api/agent/agent_drain.go
+package agent
+
+func (a *Agent) DrainAgent(
+    ctx context.Context,
+    request gen.DrainAgentRequestObject,
+) (gen.DrainAgentResponseObject, error) {
+    hostname := request.Hostname
+
+    // 1. Verify agent exists
+    agentInfo, err := a.JobClient.GetAgent(ctx, hostname)
+    if err != nil {
+        return gen.DrainAgent404JSONResponse{...}, nil
+    }
+
+    // 2. Check not already draining
+    if agentInfo.State == job.AgentStateDraining ||
+        agentInfo.State == job.AgentStateCordoned {
+        return gen.DrainAgent409JSONResponse{...}, nil
+    }
+
+    // 3. Write drain intent key
+    // 4. Write state event
+    return gen.DrainAgent200JSONResponse{...}, nil
+}
+
+func (a *Agent) UndrainAgent(
+    ctx context.Context,
+    request gen.UndrainAgentRequestObject,
+) (gen.UndrainAgentResponseObject, error) {
+    // Similar: verify exists, check state, delete drain key, write event
+}
+```
+
+**Step 4: Run tests**
+
+Run: `go test ./internal/api/agent/... -count=1` Expected: PASS
+
+**Step 5: Commit**
+
+```bash
+git add internal/api/agent/gen/api.yaml internal/api/agent/gen/*.gen.go \
+    internal/api/agent/agent_drain.go internal/api/agent/agent_drain_public_test.go
+git commit -m "feat(api): add drain/undrain endpoints with RBAC"
+```
+
+---
+
+## Task 7: Add agent:write permission
+
+**Files:**
+
+- Modify: `internal/authtoken/permissions.go:27-37` (add constant)
+- Modify: `internal/authtoken/permissions.go:53-81` (add to admin role)
+
+**Step 1: Add permission constant**
+
+```go
+PermAgentWrite Permission = "agent:write"
+```
+
+**Step 2: Add to admin role**
+
+In `DefaultRolePermissions`, add `PermAgentWrite` to the `admin` slice.
+
+**Step 3: Run tests**
+
+Run: `go test ./internal/authtoken/... -count=1` Expected: PASS
+
+**Step 4: Commit**
+
+```bash
+git add internal/authtoken/permissions.go
+git commit -m "feat(auth): add agent:write permission for drain operations"
+```
+
+---
+
+## Task 8: Wire drain endpoints into server
+
+**Files:**
+
+- Modify: `internal/api/handler_agent.go:34-61`
+- Modify: `internal/api/handler_agent_public_test.go`
+
+**Step 1: Update handler registration**
+
+The `GetAgentHandler` already wires all agent gen handlers through
+`scopeMiddleware`. After regenerating the OpenAPI code (Task 6), the new
+`DrainAgent` and `UndrainAgent` methods on the strict server interface will be
+picked up automatically by `RegisterHandlers`.
+
+No code change needed in `handler_agent.go` unless `unauthenticatedOperations`
+needs updating (it doesn't — drain requires auth).
+
+**Step 2: Verify compilation**
+
+Run: `go build ./...` Expected: compiles
+
+**Step 3: Add handler test cases**
+
+Add test cases to `handler_agent_public_test.go` for drain/undrain handler
+registration.
+
+**Step 4: Commit**
+
+```bash
+git add internal/api/handler_agent.go internal/api/handler_agent_public_test.go
+git commit -m "feat(api): wire drain/undrain handlers into server"
+```
+
+---
+
+## Task 9: Add drain detection to agent heartbeat
+
+**Files:**
+
+- Modify: `internal/agent/heartbeat.go:88-134`
+- Modify: `internal/agent/server.go:32-61`
+- Create: `internal/agent/drain.go`
+- Create: `internal/agent/drain_test.go`
+
+**Step 1: Write failing tests**
+
+```go
+// internal/agent/drain_test.go
+// Test checkDrainFlag: returns true when drain key exists
+// Test checkDrainFlag: returns false when drain key absent
+// Test handleDrainTransition: unsubscribes consumers when draining
+// Test handleUndrainTransition: resubscribes consumers when undrained
+```
+
+**Step 2: Implement drain detection**
+
+```go
+// internal/agent/drain.go
+package agent
+
+// checkDrainFlag reads drain.{hostname} from registryKV.
+func (a *Agent) checkDrainFlag(
+    ctx context.Context,
+    hostname string,
+) bool {
+    key := "drain." + job.SanitizeHostname(hostname)
+    _, err := a.registryKV.Get(ctx, key)
+    return err == nil
+}
+
+// handleDrainDetection checks drain flag on each heartbeat.
+func (a *Agent) handleDrainDetection(
+    ctx context.Context,
+    hostname string,
+) {
+    drainRequested := a.checkDrainFlag(ctx, hostname)
+
+    switch {
+    case drainRequested && a.state == job.AgentStateReady:
+        a.state = job.AgentStateDraining
+        a.unsubscribeConsumers()
+        // Write timeline event: "drain", "Drain initiated"
+        // When WaitGroup drains, transition to Cordoned
+
+    case !drainRequested && a.state == job.AgentStateCordoned:
+        a.state = job.AgentStateReady
+        a.resubscribeConsumers(ctx, hostname)
+        // Write timeline event: "undrain", "Resumed accepting jobs"
+    }
+}
+```
+
+**Step 3: Add state field to Agent struct**
+
+In `internal/agent/types.go`:
+
+```go
+state string // Ready, Draining, Cordoned
+```
+
+Initialize to `job.AgentStateReady` in `Start()`.
+
+**Step 4: Call from heartbeat**
+
+In `writeRegistration()`, add `a.handleDrainDetection(ctx, hostname)` and
+include `State: a.state` in the registration.
+
+**Step 5: Run tests**
+
+Run: `go test ./internal/agent/... -count=1` Expected: PASS
+
+**Step 6: Commit**
+
+```bash
+git add internal/agent/drain.go internal/agent/drain_test.go \
+    internal/agent/heartbeat.go internal/agent/types.go internal/agent/server.go
+git commit -m "feat(agent): detect drain flag and manage consumer lifecycle"
+```
+
+---
+
+## Task 10: Extend buildAgentInfo with conditions and state
+
+**Files:**
+
+- Modify: `internal/api/agent/agent_list.go:59-171` (buildAgentInfo)
+- Modify: `internal/api/agent/agent_list_public_test.go`
+- Modify: `internal/job/client/query.go:479-493` (agentInfoFromRegistration)
+
+**Step 1: Update agentInfoFromRegistration**
+
+Add to the returned `AgentInfo`:
+
+```go
+Conditions: reg.Conditions,
+State:      reg.State,
+```
+
+**Step 2: Update buildAgentInfo**
+
+Map conditions and state from `job.AgentInfo` to `gen.AgentInfo`:
+
+```go
+if len(a.Conditions) > 0 {
+    conditions := make([]gen.NodeCondition, 0, len(a.Conditions))
+    for _, c := range a.Conditions {
+        nc := gen.NodeCondition{
+            Type:               gen.NodeConditionType(c.Type),
+            Status:             c.Status,
+            LastTransitionTime: c.LastTransitionTime,
+        }
+        if c.Reason != "" {
+            nc.Reason = &c.Reason
+        }
+        conditions = append(conditions, nc)
+    }
+    info.Conditions = &conditions
+}
+
+if a.State != "" {
+    state := gen.AgentInfoState(a.State)
+    info.State = &state
+}
+```
+
+**Step 3: Update status derivation**
+
+Change status logic: if `a.State` is set, use it; otherwise default to `Ready`
+(existing behavior).
+
+**Step 4: Add test cases**
+
+Add table-driven test case for agent with conditions and Draining/Cordoned
+states.
+
+**Step 5: Run tests**
+
+Run: `go test ./internal/api/agent/... -count=1` Expected: PASS
+
+**Step 6: Commit**
+
+```bash
+git add internal/api/agent/agent_list.go internal/api/agent/agent_list_public_test.go \
+    internal/job/client/query.go
+git commit -m "feat(api): expose conditions and state in agent responses"
+```
+
+---
+
+## Task 11: Add timeline to GetAgent response
+
+**Files:**
+
+- Modify: `internal/job/client/query.go:423-445` (GetAgent)
+- Modify: `internal/job/client/query_public_test.go`
+
+**Step 1: Extend GetAgent to fetch timeline events**
+
+After building `AgentInfo`, fetch timeline events:
+
+```go
+timeline, err := c.GetAgentTimeline(ctx, hostname)
+if err == nil {
+    info.Timeline = timeline
+}
+```
+
+**Step 2: Add test cases**
+
+Test GetAgent returns timeline events when present.
+
+**Step 3: Run tests**
+
+Run: `go test ./internal/job/client/... -count=1` Expected: PASS
+
+**Step 4: Commit**
+
+```bash
+git add internal/job/client/query.go internal/job/client/query_public_test.go
+git commit -m "feat(job): include timeline events in GetAgent response"
+```
+
+---
+
+## Task 12: Update SDK with conditions, state, drain/undrain
+
+**Files:**
+
+- Modify: `osapi-sdk/pkg/osapi/gen/agent/api.yaml` (copy from osapi)
+- Modify: `osapi-sdk/pkg/osapi/agent.go` (add Drain, Undrain methods)
+- Modify: `osapi-sdk/pkg/osapi/agent_types.go` (add conditions, state, timeline
+  to Agent type)
+- Create: `osapi-sdk/pkg/osapi/types.go` (promote TimelineEvent to shared type)
+- Modify: `osapi-sdk/pkg/osapi/job_types.go` (remove TimelineEvent, import from
+  types.go)
+
+**Step 1: Promote TimelineEvent to shared type**
+
+Move `TimelineEvent` from `job_types.go` to a new `types.go`:
+
+```go
+// pkg/osapi/types.go
+
+// TimelineEvent represents a lifecycle event. Used by both job
+// timelines and agent state transition history.
+type TimelineEvent struct {
+    Timestamp string
+    Event     string
+    Hostname  string
+    Message   string
+    Error     string
+}
+```
+
+Update `job_types.go` to remove the `TimelineEvent` definition —
+`JobDetail.Timeline` now references the shared type.
+
+**Step 2: Sync OpenAPI spec**
+
+Copy `internal/api/agent/gen/api.yaml` to
+`osapi-sdk/pkg/osapi/gen/agent/api.yaml`.
+
+Run `redocly join` + `go generate` in the SDK.
+
+**Step 3: Add domain types**
+
+```go
+// In agent_types.go
+type Agent struct {
+    // ... existing fields ...
+    State      string
+    Conditions []Condition
+    Timeline   []TimelineEvent  // shared type from types.go
+}
+
+type Condition struct {
+    Type               string
+    Status             bool
+    Reason             string
+    LastTransitionTime time.Time
+}
+```
+
+**Step 4: Add Drain/Undrain methods**
+
+```go
+func (s *AgentService) Drain(
+    ctx context.Context,
+    hostname string,
+) (*Response[any], error) {
+    // POST /agent/{hostname}/drain
+}
+
+func (s *AgentService) Undrain(
+    ctx context.Context,
+    hostname string,
+) (*Response[any], error) {
+    // POST /agent/{hostname}/undrain
+}
+```
+
+**Step 4: Run SDK tests**
+
+Run: `go test ./pkg/osapi/... -count=1` Expected: PASS
+
+**Step 5: Commit (in osapi-sdk repo)**
+
+```bash
+git add pkg/osapi/
+git commit -m "feat(agent): add conditions, state, drain/undrain support"
+```
+
+---
+
+## Task 13: Add CONDITIONS column to agent list CLI
+
+**Files:**
+
+- Modify: `cmd/client_agent_list.go`
+
+**Step 1: Add CONDITIONS column**
+
+In the table builder for `agent list`, add a column that joins active condition
+type names:
+
+```go
+conditions := "-"
+if len(agent.Conditions) > 0 {
+    active := make([]string, 0)
+    for _, c := range agent.Conditions {
+        if c.Status {
+            active = append(active, c.Type)
+        }
+    }
+    if len(active) > 0 {
+        conditions = strings.Join(active, ",")
+    }
+}
+```
+
+Headers: `HOSTNAME`, `STATUS`, `CONDITIONS`, `LABELS`, `AGE`, `LOAD`, `OS`
+
+**Step 2: Use State for STATUS column**
+
+Replace hardcoded "Ready" with `agent.State` (defaulting to "Ready" if empty).
+
+**Step 3: Run `go build ./cmd/...`**
+
+Expected: compiles
+
+**Step 4: Commit**
+
+```bash
+git add cmd/client_agent_list.go
+git commit -m "feat(cli): add CONDITIONS column and state to agent list"
+```
+
+---
+
+## Task 14: Add conditions and timeline to agent get CLI
+
+**Files:**
+
+- Modify: `cmd/client_agent_get.go:58-141`
+
+**Step 1: Add state to agent get output**
+
+After the Status KV line, display the State:
+
+```go
+if data.State != "" && data.State != "Ready" {
+    cli.PrintKV("State", data.State)
+}
+```
+
+**Step 2: Add conditions section**
+
+```go
+if len(data.Conditions) > 0 {
+    condRows := make([][]string, 0, len(data.Conditions))
+    for _, c := range data.Conditions {
+        status := "false"
+        if c.Status {
+            status = "true"
+        }
+        reason := ""
+        if c.Reason != "" {
+            reason = c.Reason
+        }
+        since := cli.FormatAge(time.Since(c.LastTransitionTime)) + " ago"
+        condRows = append(condRows, []string{c.Type, status, reason, since})
+    }
+    sections = append(sections, cli.Section{
+        Title:   "Conditions",
+        Headers: []string{"TYPE", "STATUS", "REASON", "SINCE"},
+        Rows:    condRows,
+    })
+}
+```
+
+**Step 3: Add timeline section**
+
+Same pattern as `DisplayJobDetail` in `internal/cli/ui.go:600-615`:
+
+```go
+if len(data.Timeline) > 0 {
+    timelineRows := make([][]string, 0, len(data.Timeline))
+    for _, te := range data.Timeline {
+        timelineRows = append(timelineRows, []string{
+            te.Timestamp, te.Event, te.Hostname, te.Message, te.Error,
+        })
+    }
+    sections = append(sections, cli.Section{
+        Title:   "Timeline",
+        Headers: []string{"TIMESTAMP", "EVENT", "HOSTNAME", "MESSAGE", "ERROR"},
+        Rows:    timelineRows,
+    })
+}
+```
+
+**Step 4: Run `go build ./cmd/...`**
+
+Expected: compiles
+
+**Step 5: Commit**
+
+```bash
+git add cmd/client_agent_get.go
+git commit -m "feat(cli): display conditions and timeline in agent get"
+```
+
+---
+
+## Task 15: Add agent drain/undrain CLI commands
+
+**Files:**
+
+- Create: `cmd/client_agent_drain.go`
+- Create: `cmd/client_agent_undrain.go`
+
+**Step 1: Create drain command**
+
+```go
+// cmd/client_agent_drain.go
+var clientAgentDrainCmd = &cobra.Command{
+    Use:   "drain",
+    Short: "Drain an agent",
+    Long:  `Stop an agent from accepting new jobs. In-flight jobs complete.`,
+    Run: func(cmd *cobra.Command, _ []string) {
+        ctx := cmd.Context()
+        hostname, _ := cmd.Flags().GetString("hostname")
+
+        resp, err := sdkClient.Agent.Drain(ctx, hostname)
+        if err != nil {
+            cli.HandleError(err, logger)
+            return
+        }
+
+        if jsonOutput {
+            fmt.Println(string(resp.RawJSON()))
+            return
+        }
+
+        fmt.Printf("Agent %s drain initiated\n", hostname)
+    },
+}
+```
+
+**Step 2: Create undrain command**
+
+Similar pattern for `undrain`.
+
+**Step 3: Register commands**
+
+```go
+func init() {
+    clientAgentCmd.AddCommand(clientAgentDrainCmd)
+    clientAgentDrainCmd.Flags().String("hostname", "", "Hostname of the agent to drain")
+    _ = clientAgentDrainCmd.MarkFlagRequired("hostname")
+}
+```
+
+**Step 4: Run `go build ./cmd/...`**
+
+Expected: compiles
+
+**Step 5: Commit**
+
+```bash
+git add cmd/client_agent_drain.go cmd/client_agent_undrain.go
+git commit -m "feat(cli): add agent drain and undrain commands"
+```
+
+---
+
+## Task 16: Update documentation
+
+**Files:**
+
+- Modify: `docs/docs/sidebar/features/agent-management.md` (or create)
+- Modify: `docs/docs/sidebar/usage/configuration.md`
+- Modify: `docs/docs/sidebar/usage/cli/client/agent/`
+
+**Step 1: Add conditions and drain docs**
+
+Document:
+
+- Condition types and thresholds
+- Drain lifecycle (Ready → Draining → Cordoned)
+- CLI commands (`agent drain`, `agent undrain`)
+- Configuration section for `agent.conditions`
+
+**Step 2: Update permission table**
+
+Add `agent:write` to the permissions table in configuration.md.
+
+**Step 3: Commit**
+
+```bash
+git add docs/
+git commit -m "docs: add node conditions and agent drain documentation"
+```
+
+---
+
+## Task 17: Final verification
+
+**Step 1: Regenerate**
+
+Run: `just generate` Expected: no diff
+
+**Step 2: Build**
+
+Run: `go build ./...` Expected: compiles
+
+**Step 3: Unit tests**
+
+Run: `just go::unit` Expected: PASS
+
+**Step 4: Lint**
+
+Run: `just go::vet` Expected: clean
+
+**Step 5: Coverage check**
+
+Run:
+`go test -coverprofile=coverage.out ./internal/agent/... ./internal/job/client/... ./internal/api/agent/...`
+Expected: condition.go, drain.go, agent_drain.go at 100%
+
+---
+
+## Verification
+
+```bash
+just generate        # regenerate specs + code
+go build ./...       # compiles
+just go::unit        # tests pass
+just go::vet         # lint passes
+```
diff --git a/go.mod b/go.mod
index e8c53583..b53e2db0 100644
--- a/go.mod
+++ b/go.mod
@@ -18,7 +18,7 @@ require (
 	github.com/oapi-codegen/runtime v1.2.0
 	github.com/osapi-io/nats-client v0.0.0-20260222233639-d0822e0a4b86
 	github.com/osapi-io/nats-server v0.0.0-20260216201410-1f33dfc63848
-	github.com/osapi-io/osapi-sdk v0.0.0-20260305004213-6ad316fa4505
+	github.com/osapi-io/osapi-sdk v0.0.0-20260306002247-11cb3395b3f9
 	github.com/prometheus-community/pro-bing v0.8.0
 	github.com/prometheus/client_golang v1.23.2
 	github.com/samber/slog-echo v1.21.0
diff --git a/go.sum b/go.sum
index f8ce5a76..30b725b2 100644
--- a/go.sum
+++ b/go.sum
@@ -755,8 +755,8 @@ github.com/osapi-io/nats-client v0.0.0-20260222233639-d0822e0a4b86 h1:ML0fdgr0M4
 github.com/osapi-io/nats-client v0.0.0-20260222233639-d0822e0a4b86/go.mod h1:TQqODOjF2JuAOFrLtm1ItsMzPPAizKfHo+grOMuPDyE=
 github.com/osapi-io/nats-server v0.0.0-20260216201410-1f33dfc63848 h1:ELW1sTVBn5JIc17mHgd5fhpO3/7btaxJpxykG2Fe0U4=
 github.com/osapi-io/nats-server v0.0.0-20260216201410-1f33dfc63848/go.mod h1:4rzeY9jiJF/+Ej4WNwqK5HQ2sflZrEs60GxQpg3Iya8=
-github.com/osapi-io/osapi-sdk v0.0.0-20260305004213-6ad316fa4505 h1:J7Wv551BG39Ma9LLWxvZgsaWVNkP5TkteHzExSjt9e4=
-github.com/osapi-io/osapi-sdk v0.0.0-20260305004213-6ad316fa4505/go.mod h1:5Y45ymBR4BcxJTOJ7WhqYTDHXxtlQRW7Sr3G52pfMdI=
+github.com/osapi-io/osapi-sdk v0.0.0-20260306002247-11cb3395b3f9 h1:v7MKMVLktP3FotS5josRw5DlOKEsIwOQFAj2cd04VwE=
+github.com/osapi-io/osapi-sdk v0.0.0-20260306002247-11cb3395b3f9/go.mod h1:gL9oHgIkG+VMazSIXO4Nvwd3IXEuzRvuXstGiphSycc=
 github.com/otiai10/copy v1.2.0/go.mod h1:rrF5dJ5F0t/EWSYODDu4j9/vEeYHMkc8jt0zJChqQWw=
 github.com/otiai10/copy v1.14.0 h1:dCI/t1iTdYGtkvCuBG2BgR6KZa83PTclw4U5n2wAllU=
 github.com/otiai10/copy v1.14.0/go.mod h1:ECfuL02W+/FkTWZWgQqXPWZgW9oeKCSQ5qVfSc4qc4w=
diff --git a/internal/agent/condition.go b/internal/agent/condition.go
new file mode 100644
index 00000000..db786c99
--- /dev/null
+++ b/internal/agent/condition.go
@@ -0,0 +1,138 @@
+// Copyright (c) 2026 John Dewey
+
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to
+// deal in the Software without restriction, including without limitation the
+// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+// sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+// DEALINGS IN THE SOFTWARE.
+
+package agent
+
+import (
+	"fmt"
+	"time"
+
+	"github.com/retr0h/osapi/internal/job"
+	"github.com/retr0h/osapi/internal/provider/node/disk"
+	"github.com/retr0h/osapi/internal/provider/node/load"
+	"github.com/retr0h/osapi/internal/provider/node/mem"
+)
+
+// findPrevCondition returns the previous condition of the given type,
+// or nil if not found.
+func findPrevCondition(
+	condType string,
+	prev []job.Condition,
+) *job.Condition {
+	for i := range prev {
+		if prev[i].Type == condType {
+			return &prev[i]
+		}
+	}
+	return nil
+}
+
+// transitionTime returns the previous LastTransitionTime if status
+// hasn't changed, otherwise returns now.
+func transitionTime(
+	condType string,
+	newStatus bool,
+	prev []job.Condition,
+) time.Time {
+	if p := findPrevCondition(condType, prev); p != nil {
+		if p.Status == newStatus {
+			return p.LastTransitionTime
+		}
+	}
+	return time.Now()
+}
+
+func evaluateMemoryPressure(
+	stats *mem.Stats,
+	threshold int,
+	prev []job.Condition,
+) job.Condition {
+	c := job.Condition{Type: job.ConditionMemoryPressure}
+	if stats == nil || stats.Total == 0 {
+		c.LastTransitionTime = transitionTime(c.Type, false, prev)
+		return c
+	}
+	used := stats.Total - stats.Available
+	pct := float64(used) / float64(stats.Total) * 100
+	c.Status = pct > float64(threshold)
+	if c.Status {
+		c.Reason = fmt.Sprintf(
+			"memory %.0f%% used (%.1f/%.1f GB)",
+			pct,
+			float64(used)/1024/1024/1024,
+			float64(stats.Total)/1024/1024/1024,
+		)
+	}
+	c.LastTransitionTime = transitionTime(c.Type, c.Status, prev)
+	return c
+}
+
+func evaluateHighLoad(
+	loadAvg *load.AverageStats,
+	cpuCount int,
+	multiplier float64,
+	prev []job.Condition,
+) job.Condition {
+	c := job.Condition{Type: job.ConditionHighLoad}
+	if loadAvg == nil || cpuCount == 0 {
+		c.LastTransitionTime = transitionTime(c.Type, false, prev)
+		return c
+	}
+	threshold := float64(cpuCount) * multiplier
+	c.Status = float64(loadAvg.Load1) > threshold
+	if c.Status {
+		c.Reason = fmt.Sprintf(
+			"load %.2f, threshold %.2f for %d CPUs",
+			loadAvg.Load1, threshold, cpuCount,
+		)
+	}
+	c.LastTransitionTime = transitionTime(c.Type, c.Status, prev)
+	return c
+}
+
+func evaluateDiskPressure(
+	disks []disk.UsageStats,
+	threshold int,
+	prev []job.Condition,
+) job.Condition {
+	c := job.Condition{Type: job.ConditionDiskPressure}
+	if len(disks) == 0 {
+		c.LastTransitionTime = transitionTime(c.Type, false, prev)
+		return c
+	}
+	for _, d := range disks {
+		if d.Total == 0 {
+			continue
+		}
+		pct := float64(d.Used) / float64(d.Total) * 100
+		if pct > float64(threshold) {
+			c.Status = true
+			c.Reason = fmt.Sprintf(
+				"%s %.0f%% used (%.1f/%.1f GB)",
+				d.Name, pct,
+				float64(d.Used)/1024/1024/1024,
+				float64(d.Total)/1024/1024/1024,
+			)
+			break
+		}
+	}
+	c.LastTransitionTime = transitionTime(c.Type, c.Status, prev)
+	return c
+}
diff --git a/internal/agent/condition_test.go b/internal/agent/condition_test.go
new file mode 100644
index 00000000..720c971e
--- /dev/null
+++ b/internal/agent/condition_test.go
@@ -0,0 +1,619 @@
+// Copyright (c) 2026 John Dewey
+
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to
+// deal in the Software without restriction, including without limitation the
+// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+// sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+// DEALINGS IN THE SOFTWARE.
+
+package agent
+
+import (
+	"testing"
+	"time"
+
+	"github.com/stretchr/testify/suite"
+
+	"github.com/retr0h/osapi/internal/job"
+	"github.com/retr0h/osapi/internal/provider/node/disk"
+	"github.com/retr0h/osapi/internal/provider/node/load"
+	"github.com/retr0h/osapi/internal/provider/node/mem"
+)
+
+type ConditionTestSuite struct {
+	suite.Suite
+}
+
+func (s *ConditionTestSuite) TestFindPrevCondition() {
+	tests := []struct {
+		name         string
+		condType     string
+		prev         []job.Condition
+		validateFunc func(*job.Condition)
+	}{
+		{
+			name:     "when condition type is found returns pointer",
+			condType: job.ConditionMemoryPressure,
+			prev: []job.Condition{
+				{
+					Type:   job.ConditionMemoryPressure,
+					Status: true,
+					Reason: "high",
+				},
+				{
+					Type:   job.ConditionHighLoad,
+					Status: false,
+				},
+			},
+			validateFunc: func(c *job.Condition) {
+				s.Require().NotNil(c)
+				s.Equal(job.ConditionMemoryPressure, c.Type)
+				s.True(c.Status)
+				s.Equal("high", c.Reason)
+			},
+		},
+		{
+			name:     "when condition type is not found returns nil",
+			condType: job.ConditionDiskPressure,
+			prev: []job.Condition{
+				{
+					Type:   job.ConditionMemoryPressure,
+					Status: true,
+				},
+			},
+			validateFunc: func(c *job.Condition) {
+				s.Nil(c)
+			},
+		},
+		{
+			name:     "when prev is empty returns nil",
+			condType: job.ConditionHighLoad,
+			prev:     []job.Condition{},
+			validateFunc: func(c *job.Condition) {
+				s.Nil(c)
+			},
+		},
+		{
+			name:     "when prev is nil returns nil",
+			condType: job.ConditionHighLoad,
+			prev:     nil,
+			validateFunc: func(c *job.Condition) {
+				s.Nil(c)
+			},
+		},
+	}
+
+	for _, tt := range tests {
+		s.Run(tt.name, func() {
+			result := findPrevCondition(tt.condType, tt.prev)
+			tt.validateFunc(result)
+		})
+	}
+}
+
+func (s *ConditionTestSuite) TestTransitionTime() {
+	fixedTime := time.Date(2026, 1, 1, 0, 0, 0, 0, time.UTC)
+
+	tests := []struct {
+		name         string
+		condType     string
+		newStatus    bool
+		prev         []job.Condition
+		validateFunc func(time.Time)
+	}{
+		{
+			name:      "when matching prev has same status preserves transition time",
+			condType:  job.ConditionHighLoad,
+			newStatus: true,
+			prev: []job.Condition{
+				{
+					Type:               job.ConditionHighLoad,
+					Status:             true,
+					LastTransitionTime: fixedTime,
+				},
+			},
+			validateFunc: func(t time.Time) {
+				s.Equal(fixedTime, t)
+			},
+		},
+		{
+			name:      "when matching prev has different status returns now",
+			condType:  job.ConditionHighLoad,
+			newStatus: true,
+			prev: []job.Condition{
+				{
+					Type:               job.ConditionHighLoad,
+					Status:             false,
+					LastTransitionTime: fixedTime,
+				},
+			},
+			validateFunc: func(t time.Time) {
+				s.NotEqual(fixedTime, t)
+				s.WithinDuration(time.Now(), t, 2*time.Second)
+			},
+		},
+		{
+			name:      "when no matching prev returns now",
+			condType:  job.ConditionDiskPressure,
+			newStatus: true,
+			prev: []job.Condition{
+				{
+					Type:               job.ConditionHighLoad,
+					Status:             true,
+					LastTransitionTime: fixedTime,
+				},
+			},
+			validateFunc: func(t time.Time) {
+				s.WithinDuration(time.Now(), t, 2*time.Second)
+			},
+		},
+		{
+			name:      "when prev is empty returns now",
+			condType:  job.ConditionHighLoad,
+			newStatus: false,
+			prev:      []job.Condition{},
+			validateFunc: func(t time.Time) {
+				s.WithinDuration(time.Now(), t, 2*time.Second)
+			},
+		},
+		{
+			name:      "when prev is nil returns now",
+			condType:  job.ConditionHighLoad,
+			newStatus: false,
+			prev:      nil,
+			validateFunc: func(t time.Time) {
+				s.WithinDuration(time.Now(), t, 2*time.Second)
+			},
+		},
+	}
+
+	for _, tt := range tests {
+		s.Run(tt.name, func() {
+			result := transitionTime(tt.condType, tt.newStatus, tt.prev)
+			tt.validateFunc(result)
+		})
+	}
+}
+
+func (s *ConditionTestSuite) TestEvaluateMemoryPressure() {
+	tests := []struct {
+		name         string
+		stats        *mem.Stats
+		threshold    int
+		prev         []job.Condition
+		validateFunc func(job.Condition)
+	}{
+		{
+			name: "when usage above threshold returns true with reason",
+			stats: &mem.Stats{
+				Total:     8 * 1024 * 1024 * 1024, // 8 GB
+				Available: 1 * 1024 * 1024 * 1024, // 1 GB available = 87.5% used
+			},
+			threshold: 80,
+			prev:      nil,
+			validateFunc: func(c job.Condition) {
+				s.Equal(job.ConditionMemoryPressure, c.Type)
+				s.True(c.Status)
+				s.Contains(c.Reason, "memory")
+				s.Contains(c.Reason, "88%")
+				s.Contains(c.Reason, "GB")
+			},
+		},
+		{
+			name: "when usage below threshold returns false",
+			stats: &mem.Stats{
+				Total:     8 * 1024 * 1024 * 1024, // 8 GB
+				Available: 6 * 1024 * 1024 * 1024, // 6 GB available = 25% used
+			},
+			threshold: 80,
+			prev:      nil,
+			validateFunc: func(c job.Condition) {
+				s.Equal(job.ConditionMemoryPressure, c.Type)
+				s.False(c.Status)
+				s.Empty(c.Reason)
+			},
+		},
+		{
+			name:      "when stats is nil returns false",
+			stats:     nil,
+			threshold: 80,
+			prev:      nil,
+			validateFunc: func(c job.Condition) {
+				s.Equal(job.ConditionMemoryPressure, c.Type)
+				s.False(c.Status)
+				s.Empty(c.Reason)
+			},
+		},
+		{
+			name: "when total is zero returns false",
+			stats: &mem.Stats{
+				Total:     0,
+				Available: 0,
+			},
+			threshold: 80,
+			prev:      nil,
+			validateFunc: func(c job.Condition) {
+				s.Equal(job.ConditionMemoryPressure, c.Type)
+				s.False(c.Status)
+				s.Empty(c.Reason)
+			},
+		},
+		{
+			name: "when usage exactly at threshold returns false",
+			stats: &mem.Stats{
+				Total:     100,
+				Available: 20, // 80% used, threshold is 80 (> not >=)
+			},
+			threshold: 80,
+			prev:      nil,
+			validateFunc: func(c job.Condition) {
+				s.Equal(job.ConditionMemoryPressure, c.Type)
+				s.False(c.Status)
+				s.Empty(c.Reason)
+			},
+		},
+	}
+
+	for _, tt := range tests {
+		s.Run(tt.name, func() {
+			result := evaluateMemoryPressure(tt.stats, tt.threshold, tt.prev)
+			tt.validateFunc(result)
+		})
+	}
+}
+
+func (s *ConditionTestSuite) TestEvaluateHighLoad() {
+	tests := []struct {
+		name         string
+		loadAvg      *load.AverageStats
+		cpuCount     int
+		multiplier   float64
+		prev         []job.Condition
+		validateFunc func(job.Condition)
+	}{
+		{
+			name: "when load above threshold returns true with reason",
+			loadAvg: &load.AverageStats{
+				Load1:  8.5,
+				Load5:  7.0,
+				Load15: 6.0,
+			},
+			cpuCount:   4,
+			multiplier: 2.0, // threshold = 8.0
+			prev:       nil,
+			validateFunc: func(c job.Condition) {
+				s.Equal(job.ConditionHighLoad, c.Type)
+				s.True(c.Status)
+				s.Contains(c.Reason, "load 8.50")
+				s.Contains(c.Reason, "threshold 8.00")
+				s.Contains(c.Reason, "4 CPUs")
+			},
+		},
+		{
+			name: "when load below threshold returns false",
+			loadAvg: &load.AverageStats{
+				Load1:  2.0,
+				Load5:  1.5,
+				Load15: 1.0,
+			},
+			cpuCount:   4,
+			multiplier: 2.0, // threshold = 8.0
+			prev:       nil,
+			validateFunc: func(c job.Condition) {
+				s.Equal(job.ConditionHighLoad, c.Type)
+				s.False(c.Status)
+				s.Empty(c.Reason)
+			},
+		},
+		{
+			name:       "when load is nil returns false",
+			loadAvg:    nil,
+			cpuCount:   4,
+			multiplier: 2.0,
+			prev:       nil,
+			validateFunc: func(c job.Condition) {
+				s.Equal(job.ConditionHighLoad, c.Type)
+				s.False(c.Status)
+				s.Empty(c.Reason)
+			},
+		},
+		{
+			name: "when cpu count is zero returns false",
+			loadAvg: &load.AverageStats{
+				Load1:  8.5,
+				Load5:  7.0,
+				Load15: 6.0,
+			},
+			cpuCount:   0,
+			multiplier: 2.0,
+			prev:       nil,
+			validateFunc: func(c job.Condition) {
+				s.Equal(job.ConditionHighLoad, c.Type)
+				s.False(c.Status)
+				s.Empty(c.Reason)
+			},
+		},
+		{
+			name: "when load exactly at threshold returns false",
+			loadAvg: &load.AverageStats{
+				Load1:  8.0,
+				Load5:  5.0,
+				Load15: 3.0,
+			},
+			cpuCount:   4,
+			multiplier: 2.0, // threshold = 8.0, Load1 = 8.0 (not >)
+			prev:       nil,
+			validateFunc: func(c job.Condition) {
+				s.Equal(job.ConditionHighLoad, c.Type)
+				s.False(c.Status)
+				s.Empty(c.Reason)
+			},
+		},
+	}
+
+	for _, tt := range tests {
+		s.Run(tt.name, func() {
+			result := evaluateHighLoad(tt.loadAvg, tt.cpuCount, tt.multiplier, tt.prev)
+			tt.validateFunc(result)
+		})
+	}
+}
+
+func (s *ConditionTestSuite) TestEvaluateDiskPressure() {
+	tests := []struct {
+		name         string
+		disks        []disk.UsageStats
+		threshold    int
+		prev         []job.Condition
+		validateFunc func(job.Condition)
+	}{
+		{
+			name: "when one disk above threshold returns true",
+			disks: []disk.UsageStats{
+				{
+					Name:  "/dev/sda1",
+					Total: 100 * 1024 * 1024 * 1024, // 100 GB
+					Used:  95 * 1024 * 1024 * 1024,  // 95 GB = 95%
+					Free:  5 * 1024 * 1024 * 1024,
+				},
+			},
+			threshold: 90,
+			prev:      nil,
+			validateFunc: func(c job.Condition) {
+				s.Equal(job.ConditionDiskPressure, c.Type)
+				s.True(c.Status)
+				s.Contains(c.Reason, "/dev/sda1")
+				s.Contains(c.Reason, "95%")
+				s.Contains(c.Reason, "GB")
+			},
+		},
+		{
+			name: "when all disks below threshold returns false",
+			disks: []disk.UsageStats{
+				{
+					Name:  "/dev/sda1",
+					Total: 100 * 1024 * 1024 * 1024,
+					Used:  50 * 1024 * 1024 * 1024, // 50%
+					Free:  50 * 1024 * 1024 * 1024,
+				},
+				{
+					Name:  "/dev/sdb1",
+					Total: 200 * 1024 * 1024 * 1024,
+					Used:  60 * 1024 * 1024 * 1024, // 30%
+					Free:  140 * 1024 * 1024 * 1024,
+				},
+			},
+			threshold: 90,
+			prev:      nil,
+			validateFunc: func(c job.Condition) {
+				s.Equal(job.ConditionDiskPressure, c.Type)
+				s.False(c.Status)
+				s.Empty(c.Reason)
+			},
+		},
+		{
+			name:      "when disks is nil returns false",
+			disks:     nil,
+			threshold: 90,
+			prev:      nil,
+			validateFunc: func(c job.Condition) {
+				s.Equal(job.ConditionDiskPressure, c.Type)
+				s.False(c.Status)
+				s.Empty(c.Reason)
+			},
+		},
+		{
+			name:      "when disks is empty returns false",
+			disks:     []disk.UsageStats{},
+			threshold: 90,
+			prev:      nil,
+			validateFunc: func(c job.Condition) {
+				s.Equal(job.ConditionDiskPressure, c.Type)
+				s.False(c.Status)
+				s.Empty(c.Reason)
+			},
+		},
+		{
+			name: "when disk total is zero skips it",
+			disks: []disk.UsageStats{
+				{
+					Name:  "/dev/sda1",
+					Total: 0,
+					Used:  0,
+					Free:  0,
+				},
+			},
+			threshold: 90,
+			prev:      nil,
+			validateFunc: func(c job.Condition) {
+				s.Equal(job.ConditionDiskPressure, c.Type)
+				s.False(c.Status)
+				s.Empty(c.Reason)
+			},
+		},
+		{
+			name: "when second disk is above threshold reports it",
+			disks: []disk.UsageStats{
+				{
+					Name:  "/dev/sda1",
+					Total: 100 * 1024 * 1024 * 1024,
+					Used:  50 * 1024 * 1024 * 1024, // 50%
+					Free:  50 * 1024 * 1024 * 1024,
+				},
+				{
+					Name:  "/dev/sdb1",
+					Total: 200 * 1024 * 1024 * 1024,
+					Used:  195 * 1024 * 1024 * 1024, // 97.5%
+					Free:  5 * 1024 * 1024 * 1024,
+				},
+			},
+			threshold: 90,
+			prev:      nil,
+			validateFunc: func(c job.Condition) {
+				s.Equal(job.ConditionDiskPressure, c.Type)
+				s.True(c.Status)
+				s.Contains(c.Reason, "/dev/sdb1")
+			},
+		},
+	}
+
+	for _, tt := range tests {
+		s.Run(tt.name, func() {
+			result := evaluateDiskPressure(tt.disks, tt.threshold, tt.prev)
+			tt.validateFunc(result)
+		})
+	}
+}
+
+func (s *ConditionTestSuite) TestLastTransitionTimeTracking() {
+	fixedPast := time.Date(2026, 1, 1, 0, 0, 0, 0, time.UTC)
+
+	tests := []struct {
+		name         string
+		evalFunc     func([]job.Condition) job.Condition
+		prev         []job.Condition
+		validateFunc func(job.Condition)
+	}{
+		{
+			name: "when status flips from false to true transition time updates",
+			evalFunc: func(prev []job.Condition) job.Condition {
+				return evaluateMemoryPressure(
+					&mem.Stats{
+						Total:     100,
+						Available: 10, // 90% used
+					},
+					80,
+					prev,
+				)
+			},
+			prev: []job.Condition{
+				{
+					Type:               job.ConditionMemoryPressure,
+					Status:             false,
+					LastTransitionTime: fixedPast,
+				},
+			},
+			validateFunc: func(c job.Condition) {
+				s.True(c.Status)
+				s.NotEqual(fixedPast, c.LastTransitionTime)
+				s.WithinDuration(time.Now(), c.LastTransitionTime, 2*time.Second)
+			},
+		},
+		{
+			name: "when status stays true transition time is preserved",
+			evalFunc: func(prev []job.Condition) job.Condition {
+				return evaluateMemoryPressure(
+					&mem.Stats{
+						Total:     100,
+						Available: 10, // 90% used
+					},
+					80,
+					prev,
+				)
+			},
+			prev: []job.Condition{
+				{
+					Type:               job.ConditionMemoryPressure,
+					Status:             true,
+					LastTransitionTime: fixedPast,
+				},
+			},
+			validateFunc: func(c job.Condition) {
+				s.True(c.Status)
+				s.Equal(fixedPast, c.LastTransitionTime)
+			},
+		},
+		{
+			name: "when status flips from true to false transition time updates",
+			evalFunc: func(prev []job.Condition) job.Condition {
+				return evaluateMemoryPressure(
+					&mem.Stats{
+						Total:     100,
+						Available: 80, // 20% used
+					},
+					80,
+					prev,
+				)
+			},
+			prev: []job.Condition{
+				{
+					Type:               job.ConditionMemoryPressure,
+					Status:             true,
+					LastTransitionTime: fixedPast,
+				},
+			},
+			validateFunc: func(c job.Condition) {
+				s.False(c.Status)
+				s.NotEqual(fixedPast, c.LastTransitionTime)
+				s.WithinDuration(time.Now(), c.LastTransitionTime, 2*time.Second)
+			},
+		},
+		{
+			name: "when status stays false transition time is preserved",
+			evalFunc: func(prev []job.Condition) job.Condition {
+				return evaluateMemoryPressure(
+					&mem.Stats{
+						Total:     100,
+						Available: 80, // 20% used
+					},
+					80,
+					prev,
+				)
+			},
+			prev: []job.Condition{
+				{
+					Type:               job.ConditionMemoryPressure,
+					Status:             false,
+					LastTransitionTime: fixedPast,
+				},
+			},
+			validateFunc: func(c job.Condition) {
+				s.False(c.Status)
+				s.Equal(fixedPast, c.LastTransitionTime)
+			},
+		},
+	}
+
+	for _, tt := range tests {
+		s.Run(tt.name, func() {
+			result := tt.evalFunc(tt.prev)
+			tt.validateFunc(result)
+		})
+	}
+}
+
+func TestConditionTestSuite(t *testing.T) {
+	suite.Run(t, new(ConditionTestSuite))
+}
diff --git a/internal/agent/consumer.go b/internal/agent/consumer.go
index 532b1c72..03b95168 100644
--- a/internal/agent/consumer.go
+++ b/internal/agent/consumer.go
@@ -102,14 +102,14 @@ func (a *Agent) consumeQueryJobs(
 			continue
 		}
 
-		a.wg.Add(1)
+		a.consumerWg.Add(1)
 		go func(c struct {
 			name       string
 			filter     string
 			queueGroup string
 		},
 		) {
-			defer a.wg.Done()
+			defer a.consumerWg.Done()
 
 			opts := &natsclient.ConsumeOptions{
 				QueueGroup:  c.queueGroup,
@@ -194,14 +194,14 @@ func (a *Agent) consumeModifyJobs(
 			continue
 		}
 
-		a.wg.Add(1)
+		a.consumerWg.Add(1)
 		go func(c struct {
 			name       string
 			filter     string
 			queueGroup string
 		},
 		) {
-			defer a.wg.Done()
+			defer a.consumerWg.Done()
 
 			opts := &natsclient.ConsumeOptions{
 				QueueGroup:  c.queueGroup,
@@ -222,6 +222,21 @@ func (a *Agent) consumeModifyJobs(
 	return nil
 }
 
+// startConsumers creates a consumer context and starts all job consumers.
+func (a *Agent) startConsumers() {
+	a.consumerCtx, a.consumerCancel = context.WithCancel(a.ctx)
+	_ = a.consumeQueryJobs(a.consumerCtx, a.hostname)
+	_ = a.consumeModifyJobs(a.consumerCtx, a.hostname)
+}
+
+// stopConsumers cancels the consumer context and waits for all consumer
+// goroutines to finish. After this returns, the agent is no longer
+// receiving new jobs.
+func (a *Agent) stopConsumers() {
+	a.consumerCancel()
+	a.consumerWg.Wait()
+}
+
 // handleJobMessageJS wraps the existing handleJobMessage for JetStream compatibility.
 func (a *Agent) handleJobMessageJS(
 	msg jetstream.Msg,
diff --git a/internal/agent/drain.go b/internal/agent/drain.go
new file mode 100644
index 00000000..47105637
--- /dev/null
+++ b/internal/agent/drain.go
@@ -0,0 +1,70 @@
+// Copyright (c) 2026 John Dewey
+
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to
+// deal in the Software without restriction, including without limitation the
+// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+// sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+// DEALINGS IN THE SOFTWARE.
+
+package agent
+
+import (
+	"context"
+
+	"github.com/retr0h/osapi/internal/job"
+)
+
+// checkDrainFlag checks the drain flag via the job client. Drain flags
+// are stored in the main KV bucket (longer TTL than registry).
+func (a *Agent) checkDrainFlag(
+	ctx context.Context,
+	hostname string,
+) bool {
+	return a.jobClient.CheckDrainFlag(ctx, hostname)
+}
+
+// handleDrainDetection checks drain flag on each heartbeat tick.
+// When drain is requested and agent is Ready, it transitions to Draining
+// and stops accepting new jobs by stopping consumer message handlers.
+// When drain flag is removed and agent is Cordoned, it transitions back
+// to Ready and resumes accepting jobs.
+func (a *Agent) handleDrainDetection(
+	ctx context.Context,
+	hostname string,
+) {
+	drainRequested := a.checkDrainFlag(ctx, hostname)
+
+	switch {
+	case drainRequested && a.state == job.AgentStateReady:
+		a.logger.Info("drain detected, stopping job consumption")
+		a.stopConsumers()
+		a.state = job.AgentStateCordoned
+		a.logger.Info("all consumers stopped, agent cordoned")
+		_ = a.jobClient.WriteAgentTimelineEvent(
+			ctx, hostname, "drain", "Drain initiated",
+		)
+		_ = a.jobClient.WriteAgentTimelineEvent(
+			ctx, hostname, "cordoned", "All jobs completed",
+		)
+
+	case !drainRequested && (a.state == job.AgentStateDraining || a.state == job.AgentStateCordoned):
+		a.logger.Info("undrain detected, resuming job consumption")
+		a.startConsumers()
+		a.state = job.AgentStateReady
+		_ = a.jobClient.WriteAgentTimelineEvent(
+			ctx, hostname, "undrain", "Resumed accepting jobs",
+		)
+	}
+}
diff --git a/internal/agent/drain_test.go b/internal/agent/drain_test.go
new file mode 100644
index 00000000..4d746a4f
--- /dev/null
+++ b/internal/agent/drain_test.go
@@ -0,0 +1,253 @@
+// Copyright (c) 2026 John Dewey
+
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to
+// deal in the Software without restriction, including without limitation the
+// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+// sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+// DEALINGS IN THE SOFTWARE.
+
+package agent
+
+import (
+	"context"
+	"log/slog"
+	"testing"
+
+	"github.com/golang/mock/gomock"
+	"github.com/spf13/afero"
+	"github.com/stretchr/testify/suite"
+
+	"github.com/retr0h/osapi/internal/config"
+	"github.com/retr0h/osapi/internal/job"
+	"github.com/retr0h/osapi/internal/job/mocks"
+	commandMocks "github.com/retr0h/osapi/internal/provider/command/mocks"
+	dnsMocks "github.com/retr0h/osapi/internal/provider/network/dns/mocks"
+	netinfoMocks "github.com/retr0h/osapi/internal/provider/network/netinfo/mocks"
+	pingMocks "github.com/retr0h/osapi/internal/provider/network/ping/mocks"
+	diskMocks "github.com/retr0h/osapi/internal/provider/node/disk/mocks"
+	hostMocks "github.com/retr0h/osapi/internal/provider/node/host/mocks"
+	loadMocks "github.com/retr0h/osapi/internal/provider/node/load/mocks"
+	memMocks "github.com/retr0h/osapi/internal/provider/node/mem/mocks"
+)
+
+type DrainTestSuite struct {
+	suite.Suite
+
+	mockCtrl      *gomock.Controller
+	mockJobClient *mocks.MockJobClient
+	mockKV        *mocks.MockKeyValue
+	mockEntry     *mocks.MockKeyValueEntry
+	agent         *Agent
+}
+
+func (s *DrainTestSuite) SetupTest() {
+	s.mockCtrl = gomock.NewController(s.T())
+	s.mockJobClient = mocks.NewMockJobClient(s.mockCtrl)
+	s.mockKV = mocks.NewMockKeyValue(s.mockCtrl)
+	s.mockEntry = mocks.NewMockKeyValueEntry(s.mockCtrl)
+
+	appConfig := config.Config{
+		Agent: config.AgentConfig{
+			Labels: map[string]string{"group": "web"},
+		},
+	}
+
+	s.agent = New(
+		afero.NewMemMapFs(),
+		appConfig,
+		slog.Default(),
+		s.mockJobClient,
+		"test-stream",
+		hostMocks.NewDefaultMockProvider(s.mockCtrl),
+		diskMocks.NewDefaultMockProvider(s.mockCtrl),
+		memMocks.NewDefaultMockProvider(s.mockCtrl),
+		loadMocks.NewDefaultMockProvider(s.mockCtrl),
+		dnsMocks.NewDefaultMockProvider(s.mockCtrl),
+		pingMocks.NewDefaultMockProvider(s.mockCtrl),
+		netinfoMocks.NewDefaultMockProvider(s.mockCtrl),
+		commandMocks.NewDefaultMockProvider(s.mockCtrl),
+		s.mockKV,
+		nil,
+	)
+	s.agent.state = job.AgentStateReady
+	s.agent.ctx, s.agent.cancel = context.WithCancel(context.Background())
+	s.agent.consumerCtx, s.agent.consumerCancel = context.WithCancel(s.agent.ctx)
+}
+
+func (s *DrainTestSuite) TearDownTest() {
+	s.mockCtrl.Finish()
+}
+
+func (s *DrainTestSuite) TestCheckDrainFlag() {
+	tests := []struct {
+		name         string
+		setupMock    func()
+		validateFunc func(bool)
+	}{
+		{
+			name: "when drain key exists returns true",
+			setupMock: func() {
+				s.mockJobClient.EXPECT().
+					CheckDrainFlag(gomock.Any(), "test-agent").
+					Return(true)
+			},
+			validateFunc: func(result bool) {
+				s.True(result)
+			},
+		},
+		{
+			name: "when drain key missing returns false",
+			setupMock: func() {
+				s.mockJobClient.EXPECT().
+					CheckDrainFlag(gomock.Any(), "test-agent").
+					Return(false)
+			},
+			validateFunc: func(result bool) {
+				s.False(result)
+			},
+		},
+	}
+
+	for _, tt := range tests {
+		s.Run(tt.name, func() {
+			tt.setupMock()
+			result := s.agent.checkDrainFlag(context.Background(), "test-agent")
+			tt.validateFunc(result)
+		})
+	}
+}
+
+func (s *DrainTestSuite) TestHandleDrainDetection() {
+	tests := []struct {
+		name          string
+		initialState  string
+		setupMock     func()
+		expectedState string
+	}{
+		{
+			name:         "when drain flag set and agent is Ready transitions to Cordoned",
+			initialState: job.AgentStateReady,
+			setupMock: func() {
+				s.mockJobClient.EXPECT().
+					CheckDrainFlag(gomock.Any(), "test-agent").
+					Return(true)
+				s.mockJobClient.EXPECT().
+					WriteAgentTimelineEvent(
+						gomock.Any(),
+						"test-agent",
+						"drain",
+						"Drain initiated",
+					).
+					Return(nil)
+				s.mockJobClient.EXPECT().
+					WriteAgentTimelineEvent(
+						gomock.Any(),
+						"test-agent",
+						"cordoned",
+						"All jobs completed",
+					).
+					Return(nil)
+			},
+			expectedState: job.AgentStateCordoned,
+		},
+		{
+			name:         "when drain flag removed and agent is Draining transitions to Ready",
+			initialState: job.AgentStateDraining,
+			setupMock: func() {
+				s.mockJobClient.EXPECT().
+					CheckDrainFlag(gomock.Any(), "test-agent").
+					Return(false)
+				s.mockJobClient.EXPECT().
+					WriteAgentTimelineEvent(
+						gomock.Any(),
+						"test-agent",
+						"undrain",
+						"Resumed accepting jobs",
+					).
+					Return(nil)
+				// startConsumers re-creates consumers
+				s.mockJobClient.EXPECT().
+					CreateOrUpdateConsumer(gomock.Any(), gomock.Any(), gomock.Any()).
+					Return(nil).
+					AnyTimes()
+				s.mockJobClient.EXPECT().
+					ConsumeJobs(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).
+					Return(context.Canceled).
+					AnyTimes()
+			},
+			expectedState: job.AgentStateReady,
+		},
+		{
+			name:         "when drain flag removed and agent is Cordoned transitions to Ready",
+			initialState: job.AgentStateCordoned,
+			setupMock: func() {
+				s.mockJobClient.EXPECT().
+					CheckDrainFlag(gomock.Any(), "test-agent").
+					Return(false)
+				s.mockJobClient.EXPECT().
+					WriteAgentTimelineEvent(
+						gomock.Any(),
+						"test-agent",
+						"undrain",
+						"Resumed accepting jobs",
+					).
+					Return(nil)
+				// startConsumers re-creates consumers
+				s.mockJobClient.EXPECT().
+					CreateOrUpdateConsumer(gomock.Any(), gomock.Any(), gomock.Any()).
+					Return(nil).
+					AnyTimes()
+				s.mockJobClient.EXPECT().
+					ConsumeJobs(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).
+					Return(context.Canceled).
+					AnyTimes()
+			},
+			expectedState: job.AgentStateReady,
+		},
+		{
+			name:         "when drain flag still set and agent is already Draining stays Draining",
+			initialState: job.AgentStateDraining,
+			setupMock: func() {
+				s.mockJobClient.EXPECT().
+					CheckDrainFlag(gomock.Any(), "test-agent").
+					Return(true)
+			},
+			expectedState: job.AgentStateDraining,
+		},
+		{
+			name:         "when no drain flag and agent is Ready stays Ready",
+			initialState: job.AgentStateReady,
+			setupMock: func() {
+				s.mockJobClient.EXPECT().
+					CheckDrainFlag(gomock.Any(), "test-agent").
+					Return(false)
+			},
+			expectedState: job.AgentStateReady,
+		},
+	}
+
+	for _, tt := range tests {
+		s.Run(tt.name, func() {
+			s.agent.state = tt.initialState
+			tt.setupMock()
+			s.agent.handleDrainDetection(context.Background(), "test-agent")
+			s.Equal(tt.expectedState, s.agent.state)
+		})
+	}
+}
+
+func TestDrainTestSuite(t *testing.T) {
+	suite.Run(t, new(DrainTestSuite))
+}
diff --git a/internal/agent/facts.go b/internal/agent/facts.go
index d0642479..cac82974 100644
--- a/internal/agent/facts.go
+++ b/internal/agent/facts.go
@@ -85,6 +85,7 @@ func (a *Agent) writeFacts(
 
 	if count, err := a.hostProvider.GetCPUCount(); err == nil {
 		reg.CPUCount = count
+		a.cpuCount = count
 	}
 
 	if mgr, err := a.hostProvider.GetServiceManager(); err == nil {
diff --git a/internal/agent/heartbeat.go b/internal/agent/heartbeat.go
index f469e35e..959d7814 100644
--- a/internal/agent/heartbeat.go
+++ b/internal/agent/heartbeat.go
@@ -27,6 +27,9 @@ import (
 	"time"
 
 	"github.com/retr0h/osapi/internal/job"
+	"github.com/retr0h/osapi/internal/provider/node/disk"
+	"github.com/retr0h/osapi/internal/provider/node/load"
+	"github.com/retr0h/osapi/internal/provider/node/mem"
 )
 
 // heartbeatInterval is the interval between heartbeat refreshes.
@@ -89,11 +92,14 @@ func (a *Agent) writeRegistration(
 	ctx context.Context,
 	hostname string,
 ) {
+	a.handleDrainDetection(ctx, hostname)
+
 	reg := job.AgentRegistration{
 		Hostname:     hostname,
 		Labels:       a.appConfig.Agent.Labels,
 		RegisteredAt: time.Now(),
 		StartedAt:    a.startedAt,
+		State:        a.state,
 	}
 
 	if info, err := a.hostProvider.GetOSInfo(); err == nil {
@@ -104,14 +110,44 @@ func (a *Agent) writeRegistration(
 		reg.Uptime = uptime
 	}
 
+	var loadAvg *load.AverageStats
 	if avg, err := a.loadProvider.GetAverageStats(); err == nil {
+		loadAvg = avg
 		reg.LoadAverages = avg
 	}
 
+	var memStats *mem.Stats
 	if stats, err := a.memProvider.GetStats(); err == nil {
+		memStats = stats
 		reg.MemoryStats = stats
 	}
 
+	var diskStats []disk.UsageStats
+	if stats, err := a.diskProvider.GetLocalUsageStats(); err == nil {
+		diskStats = stats
+	}
+
+	conditions := []job.Condition{
+		evaluateMemoryPressure(
+			memStats,
+			a.appConfig.Agent.Conditions.MemoryPressureThreshold,
+			a.prevConditions,
+		),
+		evaluateHighLoad(
+			loadAvg,
+			a.cpuCount,
+			a.appConfig.Agent.Conditions.HighLoadMultiplier,
+			a.prevConditions,
+		),
+		evaluateDiskPressure(
+			diskStats,
+			a.appConfig.Agent.Conditions.DiskPressureThreshold,
+			a.prevConditions,
+		),
+	}
+	a.prevConditions = conditions
+	reg.Conditions = conditions
+
 	data, err := marshalJSON(reg)
 	if err != nil {
 		a.logger.Warn(
diff --git a/internal/agent/heartbeat_public_test.go b/internal/agent/heartbeat_public_test.go
index d607d56f..93db8d22 100644
--- a/internal/agent/heartbeat_public_test.go
+++ b/internal/agent/heartbeat_public_test.go
@@ -100,6 +100,12 @@ func (s *HeartbeatPublicTestSuite) TestStartWithHeartbeat() {
 		{
 			name: "when registryKV is set registers and deregisters",
 			setupFunc: func() *agent.Agent {
+				// Drain check on each heartbeat tick (no drain flag present)
+				s.mockJobClient.EXPECT().
+					CheckDrainFlag(gomock.Any(), "test-agent").
+					Return(false).
+					AnyTimes()
+
 				// Heartbeat initial write
 				s.mockKV.EXPECT().
 					Put(gomock.Any(), "agents.test_agent", gomock.Any()).
diff --git a/internal/agent/heartbeat_test.go b/internal/agent/heartbeat_test.go
index eacc6ef5..fb64492f 100644
--- a/internal/agent/heartbeat_test.go
+++ b/internal/agent/heartbeat_test.go
@@ -34,6 +34,7 @@ import (
 	"github.com/stretchr/testify/suite"
 
 	"github.com/retr0h/osapi/internal/config"
+	"github.com/retr0h/osapi/internal/job"
 	"github.com/retr0h/osapi/internal/job/mocks"
 	commandMocks "github.com/retr0h/osapi/internal/provider/command/mocks"
 	dnsMocks "github.com/retr0h/osapi/internal/provider/network/dns/mocks"
@@ -83,6 +84,14 @@ func (s *HeartbeatTestSuite) SetupTest() {
 		s.mockKV,
 		nil,
 	)
+	s.agent.state = job.AgentStateReady
+
+	// writeRegistration now calls handleDrainDetection which checks drain flag.
+	// Default: no drain flag present.
+	s.mockJobClient.EXPECT().
+		CheckDrainFlag(gomock.Any(), "test-agent").
+		Return(false).
+		AnyTimes()
 }
 
 func (s *HeartbeatTestSuite) TearDownTest() {
diff --git a/internal/agent/server.go b/internal/agent/server.go
index c1c0ec18..396b75b8 100644
--- a/internal/agent/server.go
+++ b/internal/agent/server.go
@@ -32,30 +32,29 @@ import (
 func (a *Agent) Start() {
 	a.ctx, a.cancel = context.WithCancel(context.Background())
 	a.startedAt = time.Now()
+	a.state = job.AgentStateReady
 
 	a.logger.Info("starting node agent")
 
 	// Determine agent hostname (GetAgentHostname always succeeds)
-	hostname, _ := job.GetAgentHostname(a.appConfig.Agent.Hostname)
+	a.hostname, _ = job.GetAgentHostname(a.appConfig.Agent.Hostname)
 
 	a.logger.Info(
 		"agent configuration",
-		slog.String("hostname", hostname),
+		slog.String("hostname", a.hostname),
 		slog.String("queue_group", a.appConfig.Agent.QueueGroup),
 		slog.Int("max_jobs", a.appConfig.Agent.MaxJobs),
 		slog.Any("labels", a.appConfig.Agent.Labels),
 	)
 
 	// Register in agent registry and start heartbeat keepalive.
-	a.startHeartbeat(a.ctx, hostname)
+	a.startHeartbeat(a.ctx, a.hostname)
 
 	// Collect and publish system facts.
-	a.startFacts(a.ctx, hostname)
+	a.startFacts(a.ctx, a.hostname)
 
 	// Start consuming messages for different job types.
-	// Each consume function spawns goroutines tracked by a.wg.
-	_ = a.consumeQueryJobs(a.ctx, hostname)
-	_ = a.consumeModifyJobs(a.ctx, hostname)
+	a.startConsumers()
 
 	a.logger.Info("node agent started successfully")
 }
@@ -70,6 +69,7 @@ func (a *Agent) Stop(
 
 	done := make(chan struct{})
 	go func() {
+		a.consumerWg.Wait()
 		a.wg.Wait()
 		close(done)
 	}()
diff --git a/internal/agent/types.go b/internal/agent/types.go
index 5e97581c..e3b31e01 100644
--- a/internal/agent/types.go
+++ b/internal/agent/types.go
@@ -30,6 +30,7 @@ import (
 	"github.com/spf13/afero"
 
 	"github.com/retr0h/osapi/internal/config"
+	"github.com/retr0h/osapi/internal/job"
 	"github.com/retr0h/osapi/internal/job/client"
 	"github.com/retr0h/osapi/internal/provider/command"
 	"github.com/retr0h/osapi/internal/provider/network/dns"
@@ -74,10 +75,27 @@ type Agent struct {
 	// startedAt records when the agent process started.
 	startedAt time.Time
 
+	// prevConditions tracks condition state between heartbeats.
+	prevConditions []job.Condition
+
+	// cpuCount cached from facts for HighLoad evaluation.
+	cpuCount int
+
+	// state is the agent's scheduling state (Ready, Draining, Cordoned).
+	state string
+
+	// hostname cached from Start for drain/undrain resubscribe.
+	hostname string
+
 	// Lifecycle management
 	ctx    context.Context
 	cancel context.CancelFunc
 	wg     sync.WaitGroup
+
+	// Consumer lifecycle for drain/undrain.
+	consumerCtx    context.Context
+	consumerCancel context.CancelFunc
+	consumerWg     sync.WaitGroup
 }
 
 // JobContext contains the context and data for a single job execution.
diff --git a/internal/api/agent/agent_drain.go b/internal/api/agent/agent_drain.go
new file mode 100644
index 00000000..ee3c1276
--- /dev/null
+++ b/internal/api/agent/agent_drain.go
@@ -0,0 +1,68 @@
+// Copyright (c) 2026 John Dewey
+
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to
+// deal in the Software without restriction, including without limitation the
+// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+// sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+// DEALINGS IN THE SOFTWARE.
+
+package agent
+
+import (
+	"context"
+	"fmt"
+	"strings"
+
+	"github.com/retr0h/osapi/internal/api/agent/gen"
+	"github.com/retr0h/osapi/internal/job"
+)
+
+// DrainAgent handles POST /agent/{hostname}/drain.
+func (a *Agent) DrainAgent(
+	ctx context.Context,
+	request gen.DrainAgentRequestObject,
+) (gen.DrainAgentResponseObject, error) {
+	hostname := request.Hostname
+
+	agentInfo, err := a.JobClient.GetAgent(ctx, hostname)
+	if err != nil {
+		errMsg := fmt.Sprintf("agent not found: %s", hostname)
+		return gen.DrainAgent404JSONResponse{Error: &errMsg}, nil
+	}
+
+	if agentInfo.State == job.AgentStateDraining || agentInfo.State == job.AgentStateCordoned {
+		errMsg := fmt.Sprintf("agent %s is already in %s state", hostname, agentInfo.State)
+		return gen.DrainAgent409JSONResponse{Error: &errMsg}, nil
+	}
+
+	if err := a.JobClient.SetDrainFlag(ctx, hostname); err != nil {
+		errMsg := fmt.Sprintf("failed to set drain flag: %s", err.Error())
+		return gen.DrainAgent409JSONResponse{Error: &errMsg}, nil
+	}
+
+	if err := a.JobClient.WriteAgentTimelineEvent(ctx, hostname, "drain", "Drain initiated via API"); err != nil {
+		if strings.Contains(err.Error(), "not found") {
+			errMsg := fmt.Sprintf("agent not found: %s", hostname)
+			return gen.DrainAgent404JSONResponse{Error: &errMsg}, nil
+		}
+
+		errMsg := err.Error()
+		return gen.DrainAgent409JSONResponse{Error: &errMsg}, nil
+	}
+
+	msg := fmt.Sprintf("drain initiated for agent %s", hostname)
+
+	return gen.DrainAgent200JSONResponse{Message: msg}, nil
+}
diff --git a/internal/api/agent/agent_drain_public_test.go b/internal/api/agent/agent_drain_public_test.go
new file mode 100644
index 00000000..91c99de5
--- /dev/null
+++ b/internal/api/agent/agent_drain_public_test.go
@@ -0,0 +1,358 @@
+// Copyright (c) 2026 John Dewey
+
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to
+// deal in the Software without restriction, including without limitation the
+// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+// sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+// DEALINGS IN THE SOFTWARE.
+
+package agent_test
+
+import (
+	"context"
+	"fmt"
+	"log/slog"
+	"net/http"
+	"net/http/httptest"
+	"os"
+	"testing"
+
+	"github.com/golang/mock/gomock"
+	"github.com/stretchr/testify/suite"
+
+	"github.com/retr0h/osapi/internal/api"
+	apiagent "github.com/retr0h/osapi/internal/api/agent"
+	"github.com/retr0h/osapi/internal/api/agent/gen"
+	"github.com/retr0h/osapi/internal/authtoken"
+	"github.com/retr0h/osapi/internal/config"
+	jobtypes "github.com/retr0h/osapi/internal/job"
+	jobmocks "github.com/retr0h/osapi/internal/job/mocks"
+)
+
+type AgentDrainPublicTestSuite struct {
+	suite.Suite
+
+	mockCtrl      *gomock.Controller
+	mockJobClient *jobmocks.MockJobClient
+	handler       *apiagent.Agent
+	ctx           context.Context
+	appConfig     config.Config
+	logger        *slog.Logger
+}
+
+func (s *AgentDrainPublicTestSuite) SetupTest() {
+	s.mockCtrl = gomock.NewController(s.T())
+	s.mockJobClient = jobmocks.NewMockJobClient(s.mockCtrl)
+	s.handler = apiagent.New(slog.Default(), s.mockJobClient)
+	s.ctx = context.Background()
+	s.appConfig = config.Config{}
+	s.logger = slog.New(slog.NewTextHandler(os.Stdout, nil))
+}
+
+func (s *AgentDrainPublicTestSuite) TearDownTest() {
+	s.mockCtrl.Finish()
+}
+
+func (s *AgentDrainPublicTestSuite) TestDrainAgent() {
+	tests := []struct {
+		name         string
+		hostname     string
+		mockAgent    *jobtypes.AgentInfo
+		mockGetErr   error
+		mockWriteErr error
+		skipWrite    bool
+		mockSetDrain bool
+		validateFunc func(resp gen.DrainAgentResponseObject)
+	}{
+		{
+			name:     "success drains agent",
+			hostname: "server1",
+			mockAgent: &jobtypes.AgentInfo{
+				Hostname: "server1",
+				State:    jobtypes.AgentStateReady,
+			},
+			mockSetDrain: true,
+			validateFunc: func(resp gen.DrainAgentResponseObject) {
+				r, ok := resp.(gen.DrainAgent200JSONResponse)
+				s.True(ok)
+				s.Contains(r.Message, "drain initiated for agent server1")
+			},
+		},
+		{
+			name:       "agent not found returns 404",
+			hostname:   "unknown",
+			mockGetErr: fmt.Errorf("agent not found: unknown"),
+			skipWrite:  true,
+			validateFunc: func(resp gen.DrainAgentResponseObject) {
+				_, ok := resp.(gen.DrainAgent404JSONResponse)
+				s.True(ok)
+			},
+		},
+		{
+			name:     "agent already draining returns 409",
+			hostname: "server1",
+			mockAgent: &jobtypes.AgentInfo{
+				Hostname: "server1",
+				State:    jobtypes.AgentStateDraining,
+			},
+			skipWrite: true,
+			validateFunc: func(resp gen.DrainAgentResponseObject) {
+				_, ok := resp.(gen.DrainAgent409JSONResponse)
+				s.True(ok)
+			},
+		},
+		{
+			name:     "agent already cordoned returns 409",
+			hostname: "server1",
+			mockAgent: &jobtypes.AgentInfo{
+				Hostname: "server1",
+				State:    jobtypes.AgentStateCordoned,
+			},
+			skipWrite: true,
+			validateFunc: func(resp gen.DrainAgentResponseObject) {
+				_, ok := resp.(gen.DrainAgent409JSONResponse)
+				s.True(ok)
+			},
+		},
+	}
+
+	for _, tt := range tests {
+		s.Run(tt.name, func() {
+			s.mockJobClient.EXPECT().
+				GetAgent(gomock.Any(), tt.hostname).
+				Return(tt.mockAgent, tt.mockGetErr)
+
+			if tt.mockSetDrain {
+				s.mockJobClient.EXPECT().
+					SetDrainFlag(gomock.Any(), tt.hostname).
+					Return(nil)
+			}
+
+			if !tt.skipWrite {
+				s.mockJobClient.EXPECT().
+					WriteAgentTimelineEvent(gomock.Any(), tt.hostname, "drain", "Drain initiated via API").
+					Return(tt.mockWriteErr)
+			}
+
+			resp, err := s.handler.DrainAgent(s.ctx, gen.DrainAgentRequestObject{
+				Hostname: tt.hostname,
+			})
+			s.NoError(err)
+			tt.validateFunc(resp)
+		})
+	}
+}
+
+func (s *AgentDrainPublicTestSuite) TestDrainAgentValidationHTTP() {
+	tests := []struct {
+		name         string
+		hostname     string
+		setupJobMock func() *jobmocks.MockJobClient
+		wantCode     int
+		wantContains []string
+	}{
+		{
+			name:     "when agent exists returns 200",
+			hostname: "server1",
+			setupJobMock: func() *jobmocks.MockJobClient {
+				mock := jobmocks.NewMockJobClient(s.mockCtrl)
+				mock.EXPECT().
+					GetAgent(gomock.Any(), "server1").
+					Return(&jobtypes.AgentInfo{
+						Hostname: "server1",
+						State:    jobtypes.AgentStateReady,
+					}, nil)
+				mock.EXPECT().
+					SetDrainFlag(gomock.Any(), "server1").
+					Return(nil)
+				mock.EXPECT().
+					WriteAgentTimelineEvent(gomock.Any(), "server1", "drain", "Drain initiated via API").
+					Return(nil)
+				return mock
+			},
+			wantCode:     http.StatusOK,
+			wantContains: []string{`"message"`, `drain initiated`},
+		},
+		{
+			name:     "when agent not found returns 404",
+			hostname: "unknown",
+			setupJobMock: func() *jobmocks.MockJobClient {
+				mock := jobmocks.NewMockJobClient(s.mockCtrl)
+				mock.EXPECT().
+					GetAgent(gomock.Any(), "unknown").
+					Return(nil, fmt.Errorf("agent not found: unknown"))
+				return mock
+			},
+			wantCode:     http.StatusNotFound,
+			wantContains: []string{`"error"`},
+		},
+		{
+			name:     "when agent already draining returns 409",
+			hostname: "server1",
+			setupJobMock: func() *jobmocks.MockJobClient {
+				mock := jobmocks.NewMockJobClient(s.mockCtrl)
+				mock.EXPECT().
+					GetAgent(gomock.Any(), "server1").
+					Return(&jobtypes.AgentInfo{
+						Hostname: "server1",
+						State:    jobtypes.AgentStateDraining,
+					}, nil)
+				return mock
+			},
+			wantCode:     http.StatusConflict,
+			wantContains: []string{`"error"`, `already in Draining`},
+		},
+	}
+
+	for _, tc := range tests {
+		s.Run(tc.name, func() {
+			jobMock := tc.setupJobMock()
+
+			agentHandler := apiagent.New(s.logger, jobMock)
+			strictHandler := gen.NewStrictHandler(agentHandler, nil)
+
+			a := api.New(s.appConfig, s.logger)
+			gen.RegisterHandlers(a.Echo, strictHandler)
+
+			req := httptest.NewRequest(
+				http.MethodPost,
+				fmt.Sprintf("/agent/%s/drain", tc.hostname),
+				nil,
+			)
+			rec := httptest.NewRecorder()
+
+			a.Echo.ServeHTTP(rec, req)
+
+			s.Equal(tc.wantCode, rec.Code)
+			for _, str := range tc.wantContains {
+				s.Contains(rec.Body.String(), str)
+			}
+		})
+	}
+}
+
+const rbacAgentDrainTestSigningKey = "test-signing-key-for-rbac-agent-drain"
+
+func (s *AgentDrainPublicTestSuite) TestDrainAgentRBACHTTP() {
+	tokenManager := authtoken.New(s.logger)
+
+	tests := []struct {
+		name         string
+		setupAuth    func(req *http.Request)
+		setupJobMock func() *jobmocks.MockJobClient
+		wantCode     int
+		wantContains []string
+	}{
+		{
+			name: "when no token returns 401",
+			setupAuth: func(_ *http.Request) {
+				// No auth header set
+			},
+			setupJobMock: func() *jobmocks.MockJobClient {
+				return jobmocks.NewMockJobClient(s.mockCtrl)
+			},
+			wantCode:     http.StatusUnauthorized,
+			wantContains: []string{"Bearer token required"},
+		},
+		{
+			name: "when insufficient permissions returns 403",
+			setupAuth: func(req *http.Request) {
+				token, err := tokenManager.Generate(
+					rbacAgentDrainTestSigningKey,
+					[]string{"read"},
+					"test-user",
+					[]string{"agent:read"},
+				)
+				s.Require().NoError(err)
+				req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", token))
+			},
+			setupJobMock: func() *jobmocks.MockJobClient {
+				return jobmocks.NewMockJobClient(s.mockCtrl)
+			},
+			wantCode:     http.StatusForbidden,
+			wantContains: []string{"Insufficient permissions"},
+		},
+		{
+			name: "when valid token with agent:write returns 200",
+			setupAuth: func(req *http.Request) {
+				token, err := tokenManager.Generate(
+					rbacAgentDrainTestSigningKey,
+					[]string{"admin"},
+					"test-user",
+					nil,
+				)
+				s.Require().NoError(err)
+				req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", token))
+			},
+			setupJobMock: func() *jobmocks.MockJobClient {
+				mock := jobmocks.NewMockJobClient(s.mockCtrl)
+				mock.EXPECT().
+					GetAgent(gomock.Any(), "server1").
+					Return(&jobtypes.AgentInfo{
+						Hostname: "server1",
+						State:    jobtypes.AgentStateReady,
+					}, nil)
+				mock.EXPECT().
+					SetDrainFlag(gomock.Any(), "server1").
+					Return(nil)
+				mock.EXPECT().
+					WriteAgentTimelineEvent(gomock.Any(), "server1", "drain", "Drain initiated via API").
+					Return(nil)
+				return mock
+			},
+			wantCode:     http.StatusOK,
+			wantContains: []string{`"message"`, `drain initiated`},
+		},
+	}
+
+	for _, tc := range tests {
+		s.Run(tc.name, func() {
+			jobMock := tc.setupJobMock()
+
+			appConfig := config.Config{
+				API: config.API{
+					Server: config.Server{
+						Security: config.ServerSecurity{
+							SigningKey: rbacAgentDrainTestSigningKey,
+						},
+					},
+				},
+			}
+
+			server := api.New(appConfig, s.logger)
+			handlers := server.GetAgentHandler(jobMock)
+			server.RegisterHandlers(handlers)
+
+			req := httptest.NewRequest(
+				http.MethodPost,
+				"/agent/server1/drain",
+				nil,
+			)
+			tc.setupAuth(req)
+			rec := httptest.NewRecorder()
+
+			server.Echo.ServeHTTP(rec, req)
+
+			s.Equal(tc.wantCode, rec.Code)
+			for _, str := range tc.wantContains {
+				s.Contains(rec.Body.String(), str)
+			}
+		})
+	}
+}
+
+func TestAgentDrainPublicTestSuite(t *testing.T) {
+	suite.Run(t, new(AgentDrainPublicTestSuite))
+}
diff --git a/internal/api/agent/agent_get_public_test.go b/internal/api/agent/agent_get_public_test.go
index 411d2991..635958b3 100644
--- a/internal/api/agent/agent_get_public_test.go
+++ b/internal/api/agent/agent_get_public_test.go
@@ -94,7 +94,7 @@ func (s *AgentGetPublicTestSuite) TestGetAgentDetails() {
 				r, ok := resp.(gen.GetAgentDetails200JSONResponse)
 				s.True(ok)
 				s.Equal("server1", r.Hostname)
-				s.Equal(gen.Ready, r.Status)
+				s.Equal(gen.AgentInfoStatusReady, r.Status)
 				s.NotNil(r.Labels)
 				s.NotNil(r.OsInfo)
 				s.Equal("Ubuntu", r.OsInfo.Distribution)
diff --git a/internal/api/agent/agent_list.go b/internal/api/agent/agent_list.go
index b2628d53..02e0b44f 100644
--- a/internal/api/agent/agent_list.go
+++ b/internal/api/agent/agent_list.go
@@ -59,7 +59,7 @@ func (a *Agent) GetAgent(
 func buildAgentInfo(
 	a *job.AgentInfo,
 ) gen.AgentInfo {
-	status := gen.Ready
+	status := gen.AgentInfoStatusReady
 	info := gen.AgentInfo{
 		Hostname: a.Hostname,
 		Status:   status,
@@ -167,6 +167,50 @@ func buildAgentInfo(
 		info.Facts = &facts
 	}
 
+	if a.State != "" {
+		state := gen.AgentInfoState(a.State)
+		info.State = &state
+	}
+
+	if len(a.Conditions) > 0 {
+		conditions := make([]gen.NodeCondition, len(a.Conditions))
+		for i, c := range a.Conditions {
+			conditions[i] = gen.NodeCondition{
+				Type:               gen.NodeConditionType(c.Type),
+				Status:             c.Status,
+				LastTransitionTime: c.LastTransitionTime,
+			}
+			if c.Reason != "" {
+				reason := c.Reason
+				conditions[i].Reason = &reason
+			}
+		}
+		info.Conditions = &conditions
+	}
+
+	if len(a.Timeline) > 0 {
+		timeline := make([]gen.TimelineEvent, len(a.Timeline))
+		for i, te := range a.Timeline {
+			timeline[i] = gen.TimelineEvent{
+				Timestamp: te.Timestamp,
+				Event:     te.Event,
+			}
+			if te.Hostname != "" {
+				hostname := te.Hostname
+				timeline[i].Hostname = &hostname
+			}
+			if te.Message != "" {
+				message := te.Message
+				timeline[i].Message = &message
+			}
+			if te.Error != "" {
+				errStr := te.Error
+				timeline[i].Error = &errStr
+			}
+		}
+		info.Timeline = &timeline
+	}
+
 	return info
 }
 
diff --git a/internal/api/agent/agent_list_public_test.go b/internal/api/agent/agent_list_public_test.go
index 38f43a21..78b641d0 100644
--- a/internal/api/agent/agent_list_public_test.go
+++ b/internal/api/agent/agent_list_public_test.go
@@ -98,7 +98,7 @@ func (s *AgentListPublicTestSuite) TestGetAgent() {
 				s.Equal(2, r.Total)
 				s.Len(r.Agents, 2)
 				s.Equal("server1", r.Agents[0].Hostname)
-				s.Equal(gen.Ready, r.Agents[0].Status)
+				s.Equal(gen.AgentInfoStatusReady, r.Agents[0].Status)
 				s.NotNil(r.Agents[0].Labels)
 				s.NotNil(r.Agents[0].RegisteredAt)
 				s.NotNil(r.Agents[0].StartedAt)
@@ -108,7 +108,7 @@ func (s *AgentListPublicTestSuite) TestGetAgent() {
 				s.NotNil(r.Agents[0].Memory)
 				s.NotNil(r.Agents[0].Uptime)
 				s.Equal("server2", r.Agents[1].Hostname)
-				s.Equal(gen.Ready, r.Agents[1].Status)
+				s.Equal(gen.AgentInfoStatusReady, r.Agents[1].Status)
 			},
 		},
 		{
diff --git a/internal/api/agent/agent_undrain.go b/internal/api/agent/agent_undrain.go
new file mode 100644
index 00000000..67bad3cc
--- /dev/null
+++ b/internal/api/agent/agent_undrain.go
@@ -0,0 +1,72 @@
+// Copyright (c) 2026 John Dewey
+
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to
+// deal in the Software without restriction, including without limitation the
+// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+// sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+// DEALINGS IN THE SOFTWARE.
+
+package agent
+
+import (
+	"context"
+	"fmt"
+	"strings"
+
+	"github.com/retr0h/osapi/internal/api/agent/gen"
+	"github.com/retr0h/osapi/internal/job"
+)
+
+// UndrainAgent handles POST /agent/{hostname}/undrain.
+func (a *Agent) UndrainAgent(
+	ctx context.Context,
+	request gen.UndrainAgentRequestObject,
+) (gen.UndrainAgentResponseObject, error) {
+	hostname := request.Hostname
+
+	agentInfo, err := a.JobClient.GetAgent(ctx, hostname)
+	if err != nil {
+		errMsg := fmt.Sprintf("agent not found: %s", hostname)
+		return gen.UndrainAgent404JSONResponse{Error: &errMsg}, nil
+	}
+
+	if agentInfo.State != job.AgentStateDraining && agentInfo.State != job.AgentStateCordoned {
+		errMsg := fmt.Sprintf(
+			"agent %s is not in draining or cordoned state (current: %s)",
+			hostname,
+			agentInfo.State,
+		)
+		return gen.UndrainAgent409JSONResponse{Error: &errMsg}, nil
+	}
+
+	if err := a.JobClient.DeleteDrainFlag(ctx, hostname); err != nil {
+		errMsg := fmt.Sprintf("failed to delete drain flag: %s", err.Error())
+		return gen.UndrainAgent409JSONResponse{Error: &errMsg}, nil
+	}
+
+	if err := a.JobClient.WriteAgentTimelineEvent(ctx, hostname, "undrain", "Undrain initiated via API"); err != nil {
+		if strings.Contains(err.Error(), "not found") {
+			errMsg := fmt.Sprintf("agent not found: %s", hostname)
+			return gen.UndrainAgent404JSONResponse{Error: &errMsg}, nil
+		}
+
+		errMsg := err.Error()
+		return gen.UndrainAgent409JSONResponse{Error: &errMsg}, nil
+	}
+
+	msg := fmt.Sprintf("undrain initiated for agent %s", hostname)
+
+	return gen.UndrainAgent200JSONResponse{Message: msg}, nil
+}
diff --git a/internal/api/agent/agent_undrain_public_test.go b/internal/api/agent/agent_undrain_public_test.go
new file mode 100644
index 00000000..30b55bbb
--- /dev/null
+++ b/internal/api/agent/agent_undrain_public_test.go
@@ -0,0 +1,372 @@
+// Copyright (c) 2026 John Dewey
+
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to
+// deal in the Software without restriction, including without limitation the
+// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+// sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+// DEALINGS IN THE SOFTWARE.
+
+package agent_test
+
+import (
+	"context"
+	"fmt"
+	"log/slog"
+	"net/http"
+	"net/http/httptest"
+	"os"
+	"testing"
+
+	"github.com/golang/mock/gomock"
+	"github.com/stretchr/testify/suite"
+
+	"github.com/retr0h/osapi/internal/api"
+	apiagent "github.com/retr0h/osapi/internal/api/agent"
+	"github.com/retr0h/osapi/internal/api/agent/gen"
+	"github.com/retr0h/osapi/internal/authtoken"
+	"github.com/retr0h/osapi/internal/config"
+	jobtypes "github.com/retr0h/osapi/internal/job"
+	jobmocks "github.com/retr0h/osapi/internal/job/mocks"
+)
+
+type AgentUndrainPublicTestSuite struct {
+	suite.Suite
+
+	mockCtrl      *gomock.Controller
+	mockJobClient *jobmocks.MockJobClient
+	handler       *apiagent.Agent
+	ctx           context.Context
+	appConfig     config.Config
+	logger        *slog.Logger
+}
+
+func (s *AgentUndrainPublicTestSuite) SetupTest() {
+	s.mockCtrl = gomock.NewController(s.T())
+	s.mockJobClient = jobmocks.NewMockJobClient(s.mockCtrl)
+	s.handler = apiagent.New(slog.Default(), s.mockJobClient)
+	s.ctx = context.Background()
+	s.appConfig = config.Config{}
+	s.logger = slog.New(slog.NewTextHandler(os.Stdout, nil))
+}
+
+func (s *AgentUndrainPublicTestSuite) TearDownTest() {
+	s.mockCtrl.Finish()
+}
+
+func (s *AgentUndrainPublicTestSuite) TestUndrainAgent() {
+	tests := []struct {
+		name            string
+		hostname        string
+		mockAgent       *jobtypes.AgentInfo
+		mockGetErr      error
+		mockWriteErr    error
+		skipWrite       bool
+		mockDeleteDrain bool
+		validateFunc    func(resp gen.UndrainAgentResponseObject)
+	}{
+		{
+			name:     "success undrains draining agent",
+			hostname: "server1",
+			mockAgent: &jobtypes.AgentInfo{
+				Hostname: "server1",
+				State:    jobtypes.AgentStateDraining,
+			},
+			mockDeleteDrain: true,
+			validateFunc: func(resp gen.UndrainAgentResponseObject) {
+				r, ok := resp.(gen.UndrainAgent200JSONResponse)
+				s.True(ok)
+				s.Contains(r.Message, "undrain initiated for agent server1")
+			},
+		},
+		{
+			name:     "success undrains cordoned agent",
+			hostname: "server1",
+			mockAgent: &jobtypes.AgentInfo{
+				Hostname: "server1",
+				State:    jobtypes.AgentStateCordoned,
+			},
+			mockDeleteDrain: true,
+			validateFunc: func(resp gen.UndrainAgentResponseObject) {
+				r, ok := resp.(gen.UndrainAgent200JSONResponse)
+				s.True(ok)
+				s.Contains(r.Message, "undrain initiated for agent server1")
+			},
+		},
+		{
+			name:       "agent not found returns 404",
+			hostname:   "unknown",
+			mockGetErr: fmt.Errorf("agent not found: unknown"),
+			skipWrite:  true,
+			validateFunc: func(resp gen.UndrainAgentResponseObject) {
+				_, ok := resp.(gen.UndrainAgent404JSONResponse)
+				s.True(ok)
+			},
+		},
+		{
+			name:     "agent in ready state returns 409",
+			hostname: "server1",
+			mockAgent: &jobtypes.AgentInfo{
+				Hostname: "server1",
+				State:    jobtypes.AgentStateReady,
+			},
+			skipWrite: true,
+			validateFunc: func(resp gen.UndrainAgentResponseObject) {
+				_, ok := resp.(gen.UndrainAgent409JSONResponse)
+				s.True(ok)
+			},
+		},
+		{
+			name:     "agent with empty state returns 409",
+			hostname: "server1",
+			mockAgent: &jobtypes.AgentInfo{
+				Hostname: "server1",
+				State:    "",
+			},
+			skipWrite: true,
+			validateFunc: func(resp gen.UndrainAgentResponseObject) {
+				_, ok := resp.(gen.UndrainAgent409JSONResponse)
+				s.True(ok)
+			},
+		},
+	}
+
+	for _, tt := range tests {
+		s.Run(tt.name, func() {
+			s.mockJobClient.EXPECT().
+				GetAgent(gomock.Any(), tt.hostname).
+				Return(tt.mockAgent, tt.mockGetErr)
+
+			if tt.mockDeleteDrain {
+				s.mockJobClient.EXPECT().
+					DeleteDrainFlag(gomock.Any(), tt.hostname).
+					Return(nil)
+			}
+
+			if !tt.skipWrite {
+				s.mockJobClient.EXPECT().
+					WriteAgentTimelineEvent(gomock.Any(), tt.hostname, "undrain", "Undrain initiated via API").
+					Return(tt.mockWriteErr)
+			}
+
+			resp, err := s.handler.UndrainAgent(s.ctx, gen.UndrainAgentRequestObject{
+				Hostname: tt.hostname,
+			})
+			s.NoError(err)
+			tt.validateFunc(resp)
+		})
+	}
+}
+
+func (s *AgentUndrainPublicTestSuite) TestUndrainAgentValidationHTTP() {
+	tests := []struct {
+		name         string
+		hostname     string
+		setupJobMock func() *jobmocks.MockJobClient
+		wantCode     int
+		wantContains []string
+	}{
+		{
+			name:     "when draining agent exists returns 200",
+			hostname: "server1",
+			setupJobMock: func() *jobmocks.MockJobClient {
+				mock := jobmocks.NewMockJobClient(s.mockCtrl)
+				mock.EXPECT().
+					GetAgent(gomock.Any(), "server1").
+					Return(&jobtypes.AgentInfo{
+						Hostname: "server1",
+						State:    jobtypes.AgentStateDraining,
+					}, nil)
+				mock.EXPECT().
+					DeleteDrainFlag(gomock.Any(), "server1").
+					Return(nil)
+				mock.EXPECT().
+					WriteAgentTimelineEvent(gomock.Any(), "server1", "undrain", "Undrain initiated via API").
+					Return(nil)
+				return mock
+			},
+			wantCode:     http.StatusOK,
+			wantContains: []string{`"message"`, `undrain initiated`},
+		},
+		{
+			name:     "when agent not found returns 404",
+			hostname: "unknown",
+			setupJobMock: func() *jobmocks.MockJobClient {
+				mock := jobmocks.NewMockJobClient(s.mockCtrl)
+				mock.EXPECT().
+					GetAgent(gomock.Any(), "unknown").
+					Return(nil, fmt.Errorf("agent not found: unknown"))
+				return mock
+			},
+			wantCode:     http.StatusNotFound,
+			wantContains: []string{`"error"`},
+		},
+		{
+			name:     "when agent in ready state returns 409",
+			hostname: "server1",
+			setupJobMock: func() *jobmocks.MockJobClient {
+				mock := jobmocks.NewMockJobClient(s.mockCtrl)
+				mock.EXPECT().
+					GetAgent(gomock.Any(), "server1").
+					Return(&jobtypes.AgentInfo{
+						Hostname: "server1",
+						State:    jobtypes.AgentStateReady,
+					}, nil)
+				return mock
+			},
+			wantCode:     http.StatusConflict,
+			wantContains: []string{`"error"`, `not in draining or cordoned`},
+		},
+	}
+
+	for _, tc := range tests {
+		s.Run(tc.name, func() {
+			jobMock := tc.setupJobMock()
+
+			agentHandler := apiagent.New(s.logger, jobMock)
+			strictHandler := gen.NewStrictHandler(agentHandler, nil)
+
+			a := api.New(s.appConfig, s.logger)
+			gen.RegisterHandlers(a.Echo, strictHandler)
+
+			req := httptest.NewRequest(
+				http.MethodPost,
+				fmt.Sprintf("/agent/%s/undrain", tc.hostname),
+				nil,
+			)
+			rec := httptest.NewRecorder()
+
+			a.Echo.ServeHTTP(rec, req)
+
+			s.Equal(tc.wantCode, rec.Code)
+			for _, str := range tc.wantContains {
+				s.Contains(rec.Body.String(), str)
+			}
+		})
+	}
+}
+
+const rbacAgentUndrainTestSigningKey = "test-signing-key-for-rbac-agent-undrain"
+
+func (s *AgentUndrainPublicTestSuite) TestUndrainAgentRBACHTTP() {
+	tokenManager := authtoken.New(s.logger)
+
+	tests := []struct {
+		name         string
+		setupAuth    func(req *http.Request)
+		setupJobMock func() *jobmocks.MockJobClient
+		wantCode     int
+		wantContains []string
+	}{
+		{
+			name: "when no token returns 401",
+			setupAuth: func(_ *http.Request) {
+				// No auth header set
+			},
+			setupJobMock: func() *jobmocks.MockJobClient {
+				return jobmocks.NewMockJobClient(s.mockCtrl)
+			},
+			wantCode:     http.StatusUnauthorized,
+			wantContains: []string{"Bearer token required"},
+		},
+		{
+			name: "when insufficient permissions returns 403",
+			setupAuth: func(req *http.Request) {
+				token, err := tokenManager.Generate(
+					rbacAgentUndrainTestSigningKey,
+					[]string{"read"},
+					"test-user",
+					[]string{"agent:read"},
+				)
+				s.Require().NoError(err)
+				req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", token))
+			},
+			setupJobMock: func() *jobmocks.MockJobClient {
+				return jobmocks.NewMockJobClient(s.mockCtrl)
+			},
+			wantCode:     http.StatusForbidden,
+			wantContains: []string{"Insufficient permissions"},
+		},
+		{
+			name: "when valid token with agent:write returns 200",
+			setupAuth: func(req *http.Request) {
+				token, err := tokenManager.Generate(
+					rbacAgentUndrainTestSigningKey,
+					[]string{"admin"},
+					"test-user",
+					nil,
+				)
+				s.Require().NoError(err)
+				req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", token))
+			},
+			setupJobMock: func() *jobmocks.MockJobClient {
+				mock := jobmocks.NewMockJobClient(s.mockCtrl)
+				mock.EXPECT().
+					GetAgent(gomock.Any(), "server1").
+					Return(&jobtypes.AgentInfo{
+						Hostname: "server1",
+						State:    jobtypes.AgentStateDraining,
+					}, nil)
+				mock.EXPECT().
+					DeleteDrainFlag(gomock.Any(), "server1").
+					Return(nil)
+				mock.EXPECT().
+					WriteAgentTimelineEvent(gomock.Any(), "server1", "undrain", "Undrain initiated via API").
+					Return(nil)
+				return mock
+			},
+			wantCode:     http.StatusOK,
+			wantContains: []string{`"message"`, `undrain initiated`},
+		},
+	}
+
+	for _, tc := range tests {
+		s.Run(tc.name, func() {
+			jobMock := tc.setupJobMock()
+
+			appConfig := config.Config{
+				API: config.API{
+					Server: config.Server{
+						Security: config.ServerSecurity{
+							SigningKey: rbacAgentUndrainTestSigningKey,
+						},
+					},
+				},
+			}
+
+			server := api.New(appConfig, s.logger)
+			handlers := server.GetAgentHandler(jobMock)
+			server.RegisterHandlers(handlers)
+
+			req := httptest.NewRequest(
+				http.MethodPost,
+				"/agent/server1/undrain",
+				nil,
+			)
+			tc.setupAuth(req)
+			rec := httptest.NewRecorder()
+
+			server.Echo.ServeHTTP(rec, req)
+
+			s.Equal(tc.wantCode, rec.Code)
+			for _, str := range tc.wantContains {
+				s.Contains(rec.Body.String(), str)
+			}
+		})
+	}
+}
+
+func TestAgentUndrainPublicTestSuite(t *testing.T) {
+	suite.Run(t, new(AgentUndrainPublicTestSuite))
+}
diff --git a/internal/api/agent/gen/agent.gen.go b/internal/api/agent/gen/agent.gen.go
index 60acbb7c..16ef4bec 100644
--- a/internal/api/agent/gen/agent.gen.go
+++ b/internal/api/agent/gen/agent.gen.go
@@ -20,10 +20,17 @@ const (
 	BearerAuthScopes = "BearerAuth.Scopes"
 )
 
+// Defines values for AgentInfoState.
+const (
+	AgentInfoStateCordoned AgentInfoState = "Cordoned"
+	AgentInfoStateDraining AgentInfoState = "Draining"
+	AgentInfoStateReady    AgentInfoState = "Ready"
+)
+
 // Defines values for AgentInfoStatus.
 const (
-	NotReady AgentInfoStatus = "NotReady"
-	Ready    AgentInfoStatus = "Ready"
+	AgentInfoStatusNotReady AgentInfoStatus = "NotReady"
+	AgentInfoStatusReady    AgentInfoStatus = "Ready"
 )
 
 // Defines values for NetworkInterfaceResponseFamily.
@@ -33,11 +40,21 @@ const (
 	Inet6 NetworkInterfaceResponseFamily = "inet6"
 )
 
+// Defines values for NodeConditionType.
+const (
+	DiskPressure   NodeConditionType = "DiskPressure"
+	HighLoad       NodeConditionType = "HighLoad"
+	MemoryPressure NodeConditionType = "MemoryPressure"
+)
+
 // AgentInfo defines model for AgentInfo.
 type AgentInfo struct {
 	// Architecture CPU architecture.
 	Architecture *string `json:"architecture,omitempty"`
 
+	// Conditions Evaluated node conditions.
+	Conditions *[]NodeCondition `json:"conditions,omitempty"`
+
 	// CpuCount Number of logical CPUs.
 	CpuCount *int `json:"cpu_count,omitempty"`
 
@@ -78,13 +95,22 @@ type AgentInfo struct {
 	// StartedAt When the agent process started.
 	StartedAt *time.Time `json:"started_at,omitempty"`
 
+	// State Agent scheduling state.
+	State *AgentInfoState `json:"state,omitempty"`
+
 	// Status The current status of the agent.
 	Status AgentInfoStatus `json:"status"`
 
+	// Timeline Agent state transition history.
+	Timeline *[]TimelineEvent `json:"timeline,omitempty"`
+
 	// Uptime The system uptime.
 	Uptime *string `json:"uptime,omitempty"`
 }
 
+// AgentInfoState Agent scheduling state.
+type AgentInfoState string
+
 // AgentInfoStatus The current status of the agent.
 type AgentInfoStatus string
 
@@ -136,6 +162,17 @@ type NetworkInterfaceResponse struct {
 // NetworkInterfaceResponseFamily IP address family.
 type NetworkInterfaceResponseFamily string
 
+// NodeCondition defines model for NodeCondition.
+type NodeCondition struct {
+	LastTransitionTime time.Time         `json:"last_transition_time"`
+	Reason             *string           `json:"reason,omitempty"`
+	Status             bool              `json:"status"`
+	Type               NodeConditionType `json:"type"`
+}
+
+// NodeConditionType defines model for NodeCondition.Type.
+type NodeConditionType string
+
 // OSInfoResponse Operating system information.
 type OSInfoResponse struct {
 	// Distribution The name of the Linux distribution.
@@ -145,6 +182,15 @@ type OSInfoResponse struct {
 	Version string `json:"version"`
 }
 
+// TimelineEvent defines model for TimelineEvent.
+type TimelineEvent struct {
+	Error     *string   `json:"error,omitempty"`
+	Event     string    `json:"event"`
+	Hostname  *string   `json:"hostname,omitempty"`
+	Message   *string   `json:"message,omitempty"`
+	Timestamp time.Time `json:"timestamp"`
+}
+
 // ServerInterface represents all server handlers.
 type ServerInterface interface {
 	// List active agents
@@ -153,6 +199,12 @@ type ServerInterface interface {
 	// Get agent details
 	// (GET /agent/{hostname})
 	GetAgentDetails(ctx echo.Context, hostname string) error
+	// Drain an agent
+	// (POST /agent/{hostname}/drain)
+	DrainAgent(ctx echo.Context, hostname string) error
+	// Undrain an agent
+	// (POST /agent/{hostname}/undrain)
+	UndrainAgent(ctx echo.Context, hostname string) error
 }
 
 // ServerInterfaceWrapper converts echo contexts to parameters.
@@ -189,6 +241,42 @@ func (w *ServerInterfaceWrapper) GetAgentDetails(ctx echo.Context) error {
 	return err
 }
 
+// DrainAgent converts echo context to params.
+func (w *ServerInterfaceWrapper) DrainAgent(ctx echo.Context) error {
+	var err error
+	// ------------- Path parameter "hostname" -------------
+	var hostname string
+
+	err = runtime.BindStyledParameterWithOptions("simple", "hostname", ctx.Param("hostname"), &hostname, runtime.BindStyledParameterOptions{ParamLocation: runtime.ParamLocationPath, Explode: false, Required: true})
+	if err != nil {
+		return echo.NewHTTPError(http.StatusBadRequest, fmt.Sprintf("Invalid format for parameter hostname: %s", err))
+	}
+
+	ctx.Set(BearerAuthScopes, []string{"agent:write"})
+
+	// Invoke the callback with all the unmarshaled arguments
+	err = w.Handler.DrainAgent(ctx, hostname)
+	return err
+}
+
+// UndrainAgent converts echo context to params.
+func (w *ServerInterfaceWrapper) UndrainAgent(ctx echo.Context) error {
+	var err error
+	// ------------- Path parameter "hostname" -------------
+	var hostname string
+
+	err = runtime.BindStyledParameterWithOptions("simple", "hostname", ctx.Param("hostname"), &hostname, runtime.BindStyledParameterOptions{ParamLocation: runtime.ParamLocationPath, Explode: false, Required: true})
+	if err != nil {
+		return echo.NewHTTPError(http.StatusBadRequest, fmt.Sprintf("Invalid format for parameter hostname: %s", err))
+	}
+
+	ctx.Set(BearerAuthScopes, []string{"agent:write"})
+
+	// Invoke the callback with all the unmarshaled arguments
+	err = w.Handler.UndrainAgent(ctx, hostname)
+	return err
+}
+
 // This is a simple interface which specifies echo.Route addition functions which
 // are present on both echo.Echo and echo.Group, since we want to allow using
 // either of them for path registration
@@ -219,6 +307,8 @@ func RegisterHandlersWithBaseURL(router EchoRouter, si ServerInterface, baseURL
 
 	router.GET(baseURL+"/agent", wrapper.GetAgent)
 	router.GET(baseURL+"/agent/:hostname", wrapper.GetAgentDetails)
+	router.POST(baseURL+"/agent/:hostname/drain", wrapper.DrainAgent)
+	router.POST(baseURL+"/agent/:hostname/undrain", wrapper.UndrainAgent)
 
 }
 
@@ -318,6 +408,116 @@ func (response GetAgentDetails500JSONResponse) VisitGetAgentDetailsResponse(w ht
 	return json.NewEncoder(w).Encode(response)
 }
 
+type DrainAgentRequestObject struct {
+	Hostname string `json:"hostname"`
+}
+
+type DrainAgentResponseObject interface {
+	VisitDrainAgentResponse(w http.ResponseWriter) error
+}
+
+type DrainAgent200JSONResponse struct {
+	Message string `json:"message"`
+}
+
+func (response DrainAgent200JSONResponse) VisitDrainAgentResponse(w http.ResponseWriter) error {
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(200)
+
+	return json.NewEncoder(w).Encode(response)
+}
+
+type DrainAgent401JSONResponse externalRef0.ErrorResponse
+
+func (response DrainAgent401JSONResponse) VisitDrainAgentResponse(w http.ResponseWriter) error {
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(401)
+
+	return json.NewEncoder(w).Encode(response)
+}
+
+type DrainAgent403JSONResponse externalRef0.ErrorResponse
+
+func (response DrainAgent403JSONResponse) VisitDrainAgentResponse(w http.ResponseWriter) error {
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(403)
+
+	return json.NewEncoder(w).Encode(response)
+}
+
+type DrainAgent404JSONResponse externalRef0.ErrorResponse
+
+func (response DrainAgent404JSONResponse) VisitDrainAgentResponse(w http.ResponseWriter) error {
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(404)
+
+	return json.NewEncoder(w).Encode(response)
+}
+
+type DrainAgent409JSONResponse externalRef0.ErrorResponse
+
+func (response DrainAgent409JSONResponse) VisitDrainAgentResponse(w http.ResponseWriter) error {
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(409)
+
+	return json.NewEncoder(w).Encode(response)
+}
+
+type UndrainAgentRequestObject struct {
+	Hostname string `json:"hostname"`
+}
+
+type UndrainAgentResponseObject interface {
+	VisitUndrainAgentResponse(w http.ResponseWriter) error
+}
+
+type UndrainAgent200JSONResponse struct {
+	Message string `json:"message"`
+}
+
+func (response UndrainAgent200JSONResponse) VisitUndrainAgentResponse(w http.ResponseWriter) error {
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(200)
+
+	return json.NewEncoder(w).Encode(response)
+}
+
+type UndrainAgent401JSONResponse externalRef0.ErrorResponse
+
+func (response UndrainAgent401JSONResponse) VisitUndrainAgentResponse(w http.ResponseWriter) error {
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(401)
+
+	return json.NewEncoder(w).Encode(response)
+}
+
+type UndrainAgent403JSONResponse externalRef0.ErrorResponse
+
+func (response UndrainAgent403JSONResponse) VisitUndrainAgentResponse(w http.ResponseWriter) error {
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(403)
+
+	return json.NewEncoder(w).Encode(response)
+}
+
+type UndrainAgent404JSONResponse externalRef0.ErrorResponse
+
+func (response UndrainAgent404JSONResponse) VisitUndrainAgentResponse(w http.ResponseWriter) error {
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(404)
+
+	return json.NewEncoder(w).Encode(response)
+}
+
+type UndrainAgent409JSONResponse externalRef0.ErrorResponse
+
+func (response UndrainAgent409JSONResponse) VisitUndrainAgentResponse(w http.ResponseWriter) error {
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(409)
+
+	return json.NewEncoder(w).Encode(response)
+}
+
 // StrictServerInterface represents all server handlers.
 type StrictServerInterface interface {
 	// List active agents
@@ -326,6 +526,12 @@ type StrictServerInterface interface {
 	// Get agent details
 	// (GET /agent/{hostname})
 	GetAgentDetails(ctx context.Context, request GetAgentDetailsRequestObject) (GetAgentDetailsResponseObject, error)
+	// Drain an agent
+	// (POST /agent/{hostname}/drain)
+	DrainAgent(ctx context.Context, request DrainAgentRequestObject) (DrainAgentResponseObject, error)
+	// Undrain an agent
+	// (POST /agent/{hostname}/undrain)
+	UndrainAgent(ctx context.Context, request UndrainAgentRequestObject) (UndrainAgentResponseObject, error)
 }
 
 type StrictHandlerFunc = strictecho.StrictEchoHandlerFunc
@@ -387,3 +593,53 @@ func (sh *strictHandler) GetAgentDetails(ctx echo.Context, hostname string) erro
 	}
 	return nil
 }
+
+// DrainAgent operation middleware
+func (sh *strictHandler) DrainAgent(ctx echo.Context, hostname string) error {
+	var request DrainAgentRequestObject
+
+	request.Hostname = hostname
+
+	handler := func(ctx echo.Context, request interface{}) (interface{}, error) {
+		return sh.ssi.DrainAgent(ctx.Request().Context(), request.(DrainAgentRequestObject))
+	}
+	for _, middleware := range sh.middlewares {
+		handler = middleware(handler, "DrainAgent")
+	}
+
+	response, err := handler(ctx, request)
+
+	if err != nil {
+		return err
+	} else if validResponse, ok := response.(DrainAgentResponseObject); ok {
+		return validResponse.VisitDrainAgentResponse(ctx.Response())
+	} else if response != nil {
+		return fmt.Errorf("unexpected response type: %T", response)
+	}
+	return nil
+}
+
+// UndrainAgent operation middleware
+func (sh *strictHandler) UndrainAgent(ctx echo.Context, hostname string) error {
+	var request UndrainAgentRequestObject
+
+	request.Hostname = hostname
+
+	handler := func(ctx echo.Context, request interface{}) (interface{}, error) {
+		return sh.ssi.UndrainAgent(ctx.Request().Context(), request.(UndrainAgentRequestObject))
+	}
+	for _, middleware := range sh.middlewares {
+		handler = middleware(handler, "UndrainAgent")
+	}
+
+	response, err := handler(ctx, request)
+
+	if err != nil {
+		return err
+	} else if validResponse, ok := response.(UndrainAgentResponseObject); ok {
+		return validResponse.VisitUndrainAgentResponse(ctx.Response())
+	} else if response != nil {
+		return fmt.Errorf("unexpected response type: %T", response)
+	}
+	return nil
+}
diff --git a/internal/api/agent/gen/api.yaml b/internal/api/agent/gen/api.yaml
index ad1f5d42..26fe3050 100644
--- a/internal/api/agent/gen/api.yaml
+++ b/internal/api/agent/gen/api.yaml
@@ -110,6 +110,114 @@ paths:
             application/json:
               schema:
                 $ref: '../../common/gen/api.yaml#/components/schemas/ErrorResponse'
+  /agent/{hostname}/drain:
+    post:
+      operationId: drainAgent
+      summary: Drain an agent
+      description: >
+        Stop the agent from accepting new jobs. In-flight jobs continue
+        to completion.
+      tags:
+        - agent_operations
+      security:
+        - BearerAuth:
+            - agent:write
+      parameters:
+        - name: hostname
+          in: path
+          required: true
+          schema:
+            type: string
+          description: The hostname of the agent to drain.
+      responses:
+        '200':
+          description: Agent drain initiated.
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  message:
+                    type: string
+                required:
+                  - message
+        '401':
+          description: Unauthorized - API key required
+          content:
+            application/json:
+              schema:
+                $ref: '../../common/gen/api.yaml#/components/schemas/ErrorResponse'
+        '403':
+          description: Forbidden - Insufficient permissions
+          content:
+            application/json:
+              schema:
+                $ref: '../../common/gen/api.yaml#/components/schemas/ErrorResponse'
+        '404':
+          description: Agent not found.
+          content:
+            application/json:
+              schema:
+                $ref: '../../common/gen/api.yaml#/components/schemas/ErrorResponse'
+        '409':
+          description: Agent already in requested state.
+          content:
+            application/json:
+              schema:
+                $ref: '../../common/gen/api.yaml#/components/schemas/ErrorResponse'
+  /agent/{hostname}/undrain:
+    post:
+      operationId: undrainAgent
+      summary: Undrain an agent
+      description: Resume accepting jobs on a drained agent.
+      tags:
+        - agent_operations
+      security:
+        - BearerAuth:
+            - agent:write
+      parameters:
+        - name: hostname
+          in: path
+          required: true
+          schema:
+            type: string
+          description: The hostname of the agent to undrain.
+      responses:
+        '200':
+          description: Agent undrain initiated.
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  message:
+                    type: string
+                required:
+                  - message
+        '401':
+          description: Unauthorized - API key required
+          content:
+            application/json:
+              schema:
+                $ref: '../../common/gen/api.yaml#/components/schemas/ErrorResponse'
+        '403':
+          description: Forbidden - Insufficient permissions
+          content:
+            application/json:
+              schema:
+                $ref: '../../common/gen/api.yaml#/components/schemas/ErrorResponse'
+        '404':
+          description: Agent not found.
+          content:
+            application/json:
+              schema:
+                $ref: '../../common/gen/api.yaml#/components/schemas/ErrorResponse'
+        '409':
+          description: Agent not in draining or cordoned state.
+          content:
+            application/json:
+              schema:
+                $ref: '../../common/gen/api.yaml#/components/schemas/ErrorResponse'
 
 components:
   securitySchemes:
@@ -200,6 +308,20 @@ components:
           type: object
           additionalProperties: true
           description: Extended facts from additional providers.
+        state:
+          type: string
+          enum: [Ready, Draining, Cordoned]
+          description: Agent scheduling state.
+        conditions:
+          type: array
+          items:
+            $ref: '#/components/schemas/NodeCondition'
+          description: Evaluated node conditions.
+        timeline:
+          type: array
+          items:
+            $ref: '#/components/schemas/TimelineEvent'
+          description: Agent state transition history.
       required:
         - hostname
         - status
@@ -287,3 +409,39 @@ components:
             - dual
       required:
         - name
+
+    NodeCondition:
+      type: object
+      properties:
+        type:
+          type: string
+          enum: [MemoryPressure, HighLoad, DiskPressure]
+        status:
+          type: boolean
+        reason:
+          type: string
+        last_transition_time:
+          type: string
+          format: date-time
+      required:
+        - type
+        - status
+        - last_transition_time
+
+    TimelineEvent:
+      type: object
+      properties:
+        timestamp:
+          type: string
+          format: date-time
+        event:
+          type: string
+        hostname:
+          type: string
+        message:
+          type: string
+        error:
+          type: string
+      required:
+        - timestamp
+        - event
diff --git a/internal/api/gen/api.yaml b/internal/api/gen/api.yaml
index 0dbe61d0..104c36da 100644
--- a/internal/api/gen/api.yaml
+++ b/internal/api/gen/api.yaml
@@ -118,6 +118,116 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/ErrorResponse'
+  /agent/{hostname}/drain:
+    servers: []
+    post:
+      operationId: drainAgent
+      summary: Drain an agent
+      description: >
+        Stop the agent from accepting new jobs. In-flight jobs continue to
+        completion.
+      tags:
+        - Agent_Management_API_agent_operations
+      security:
+        - BearerAuth:
+            - agent:write
+      parameters:
+        - name: hostname
+          in: path
+          required: true
+          schema:
+            type: string
+          description: The hostname of the agent to drain.
+      responses:
+        '200':
+          description: Agent drain initiated.
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  message:
+                    type: string
+                required:
+                  - message
+        '401':
+          description: Unauthorized - API key required
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorResponse'
+        '403':
+          description: Forbidden - Insufficient permissions
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorResponse'
+        '404':
+          description: Agent not found.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorResponse'
+        '409':
+          description: Agent already in requested state.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorResponse'
+  /agent/{hostname}/undrain:
+    servers: []
+    post:
+      operationId: undrainAgent
+      summary: Undrain an agent
+      description: Resume accepting jobs on a drained agent.
+      tags:
+        - Agent_Management_API_agent_operations
+      security:
+        - BearerAuth:
+            - agent:write
+      parameters:
+        - name: hostname
+          in: path
+          required: true
+          schema:
+            type: string
+          description: The hostname of the agent to undrain.
+      responses:
+        '200':
+          description: Agent undrain initiated.
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  message:
+                    type: string
+                required:
+                  - message
+        '401':
+          description: Unauthorized - API key required
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorResponse'
+        '403':
+          description: Forbidden - Insufficient permissions
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorResponse'
+        '404':
+          description: Agent not found.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorResponse'
+        '409':
+          description: Agent not in draining or cordoned state.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorResponse'
   /audit:
     servers: []
     get:
@@ -1380,6 +1490,23 @@ components:
           type: object
           additionalProperties: true
           description: Extended facts from additional providers.
+        state:
+          type: string
+          enum:
+            - Ready
+            - Draining
+            - Cordoned
+          description: Agent scheduling state.
+        conditions:
+          type: array
+          items:
+            $ref: '#/components/schemas/NodeCondition'
+          description: Evaluated node conditions.
+        timeline:
+          type: array
+          items:
+            $ref: '#/components/schemas/TimelineEvent'
+          description: Agent state transition history.
       required:
         - hostname
         - status
@@ -1463,6 +1590,43 @@ components:
             - dual
       required:
         - name
+    NodeCondition:
+      type: object
+      properties:
+        type:
+          type: string
+          enum:
+            - MemoryPressure
+            - HighLoad
+            - DiskPressure
+        status:
+          type: boolean
+        reason:
+          type: string
+        last_transition_time:
+          type: string
+          format: date-time
+      required:
+        - type
+        - status
+        - last_transition_time
+    TimelineEvent:
+      type: object
+      properties:
+        timestamp:
+          type: string
+          format: date-time
+        event:
+          type: string
+        hostname:
+          type: string
+        message:
+          type: string
+        error:
+          type: string
+      required:
+        - timestamp
+        - event
     AuditEntry:
       type: object
       properties:
diff --git a/internal/authtoken/permissions.go b/internal/authtoken/permissions.go
index ae2dbd4e..df3f2611 100644
--- a/internal/authtoken/permissions.go
+++ b/internal/authtoken/permissions.go
@@ -26,6 +26,7 @@ type Permission = string
 // Permission constants using resource:verb format.
 const (
 	PermAgentRead      Permission = "agent:read"
+	PermAgentWrite     Permission = "agent:write"
 	PermNodeRead       Permission = "node:read"
 	PermNetworkRead    Permission = "network:read"
 	PermNetworkWrite   Permission = "network:write"
@@ -39,6 +40,7 @@ const (
 // AllPermissions is the full set of known permissions.
 var AllPermissions = []Permission{
 	PermAgentRead,
+	PermAgentWrite,
 	PermNodeRead,
 	PermNetworkRead,
 	PermNetworkWrite,
@@ -53,6 +55,7 @@ var AllPermissions = []Permission{
 var DefaultRolePermissions = map[string][]Permission{
 	"admin": {
 		PermAgentRead,
+		PermAgentWrite,
 		PermNodeRead,
 		PermNetworkRead,
 		PermNetworkWrite,
diff --git a/internal/cli/nats.go b/internal/cli/nats.go
index 419d1543..b3d7e1c7 100644
--- a/internal/cli/nats.go
+++ b/internal/cli/nats.go
@@ -107,6 +107,21 @@ func BuildFactsKVConfig(
 	}
 }
 
+// BuildStateKVConfig builds a jetstream.KeyValueConfig from state config values.
+// The state bucket has no TTL so drain flags and timeline events persist indefinitely.
+func BuildStateKVConfig(
+	namespace string,
+	stateCfg config.NATSState,
+) jetstream.KeyValueConfig {
+	stateBucket := job.ApplyNamespaceToInfraName(namespace, stateCfg.Bucket)
+
+	return jetstream.KeyValueConfig{
+		Bucket:   stateBucket,
+		Storage:  ParseJetstreamStorageType(stateCfg.Storage),
+		Replicas: stateCfg.Replicas,
+	}
+}
+
 // BuildAuditKVConfig builds a jetstream.KeyValueConfig from audit config values.
 func BuildAuditKVConfig(
 	namespace string,
diff --git a/internal/config/types.go b/internal/config/types.go
index 61764d7f..afbcbfb6 100644
--- a/internal/config/types.go
+++ b/internal/config/types.go
@@ -93,6 +93,7 @@ type NATS struct {
 	Audit    NATSAudit    `mapstructure:"audit,omitempty"`
 	Registry NATSRegistry `mapstructure:"registry,omitempty"`
 	Facts    NATSFacts    `mapstructure:"facts,omitempty"`
+	State    NATSState    `mapstructure:"state,omitempty"`
 }
 
 // NATSAudit configuration for the audit log KV bucket.
@@ -123,6 +124,14 @@ type NATSFacts struct {
 	Replicas int    `mapstructure:"replicas"`
 }
 
+// NATSState configuration for the agent state KV bucket (drain flags, timeline events).
+type NATSState struct {
+	// Bucket is the KV bucket name for persistent agent state.
+	Bucket   string `mapstructure:"bucket"`
+	Storage  string `mapstructure:"storage"` // "file" or "memory"
+	Replicas int    `mapstructure:"replicas"`
+}
+
 // NATSServer configuration settings for the embedded NATS server.
 type NATSServer struct {
 	// Host the server will bind to.
@@ -258,6 +267,13 @@ type AgentFacts struct {
 	Interval string `mapstructure:"interval"` // e.g. "5m", "1h"
 }
 
+// AgentConditions holds threshold configuration for node conditions.
+type AgentConditions struct {
+	MemoryPressureThreshold int     `mapstructure:"memory_pressure_threshold"`
+	HighLoadMultiplier      float64 `mapstructure:"high_load_multiplier"`
+	DiskPressureThreshold   int     `mapstructure:"disk_pressure_threshold"`
+}
+
 // AgentConfig configuration settings.
 type AgentConfig struct {
 	// NATS connection settings for the agent.
@@ -274,4 +290,6 @@ type AgentConfig struct {
 	MaxJobs int `mapstructure:"max_jobs"`
 	// Labels are key-value pairs for label-based routing (e.g., role: web, env: prod).
 	Labels map[string]string `mapstructure:"labels"`
+	// Conditions holds threshold settings for node condition evaluation.
+	Conditions AgentConditions `mapstructure:"conditions,omitempty"`
 }
diff --git a/internal/job/client/agent.go b/internal/job/client/agent.go
index d0ab7cd8..9b9aec44 100644
--- a/internal/job/client/agent.go
+++ b/internal/job/client/agent.go
@@ -26,6 +26,8 @@ import (
 	"fmt"
 	"log/slog"
 	"regexp"
+	"sort"
+	"strings"
 	"time"
 
 	"github.com/nats-io/nats.go/jetstream"
@@ -164,6 +166,194 @@ func (c *Client) CreateOrUpdateConsumer(
 	return c.natsClient.CreateOrUpdateConsumerWithConfig(ctx, streamName, consumerConfig)
 }
 
+// WriteAgentTimelineEvent writes an append-only timeline event
+// for an agent state transition.
+func (c *Client) WriteAgentTimelineEvent(
+	ctx context.Context,
+	hostname, event, message string,
+) error {
+	if c.stateKV == nil {
+		return fmt.Errorf("agent state bucket not configured")
+	}
+
+	now := time.Now()
+	key := fmt.Sprintf(
+		"timeline.%s.%s.%d",
+		job.SanitizeHostname(hostname),
+		event,
+		now.UnixNano(),
+	)
+
+	data, err := json.Marshal(job.TimelineEvent{
+		Timestamp: now,
+		Event:     event,
+		Hostname:  hostname,
+		Message:   message,
+	})
+	if err != nil {
+		return fmt.Errorf("marshal timeline event: %w", err)
+	}
+
+	_, err = c.stateKV.Put(ctx, key, data)
+	if err != nil {
+		return fmt.Errorf("write timeline event: %w", err)
+	}
+
+	c.logger.Debug("wrote agent timeline event",
+		slog.String("hostname", hostname),
+		slog.String("event", event),
+		slog.String("key", key),
+	)
+
+	return nil
+}
+
+// GetAgentTimeline returns sorted timeline events for a hostname.
+func (c *Client) GetAgentTimeline(
+	ctx context.Context,
+	hostname string,
+) ([]job.TimelineEvent, error) {
+	if c.stateKV == nil {
+		return nil, fmt.Errorf("agent state bucket not configured")
+	}
+
+	prefix := "timeline." + job.SanitizeHostname(hostname) + "."
+
+	keys, err := c.stateKV.Keys(ctx)
+	if err != nil {
+		// No keys found is not an error for timeline
+		return []job.TimelineEvent{}, nil
+	}
+
+	var events []job.TimelineEvent
+	for _, key := range keys {
+		if !strings.HasPrefix(key, prefix) {
+			continue
+		}
+
+		entry, err := c.stateKV.Get(ctx, key)
+		if err != nil {
+			continue
+		}
+
+		var te job.TimelineEvent
+		if err := json.Unmarshal(entry.Value(), &te); err != nil {
+			continue
+		}
+
+		events = append(events, te)
+	}
+
+	// Sort by timestamp
+	sort.Slice(events, func(i, j int) bool {
+		return events[i].Timestamp.Before(events[j].Timestamp)
+	})
+
+	return events, nil
+}
+
+// ComputeAgentState returns the current state from timeline events.
+func ComputeAgentState(
+	events []job.TimelineEvent,
+) string {
+	if len(events) == 0 {
+		return job.AgentStateReady
+	}
+
+	latest := events[len(events)-1]
+	switch latest.Event {
+	case "drain":
+		return job.AgentStateDraining
+	case "cordoned":
+		return job.AgentStateCordoned
+	case "undrain", "ready":
+		return job.AgentStateReady
+	default:
+		return job.AgentStateReady
+	}
+}
+
+// overlayDrainState checks if a drain flag exists for the agent and
+// overrides the reported state. The agent always reports its own view
+// (Ready), but the operator may have drained it via the API. Drain
+// flags are stored in the agent-state KV bucket (no TTL).
+func (c *Client) overlayDrainState(
+	ctx context.Context,
+	info *job.AgentInfo,
+) {
+	if c.stateKV == nil {
+		return
+	}
+
+	key := "drain." + job.SanitizeHostname(info.Hostname)
+	_, err := c.stateKV.Get(ctx, key)
+	if err == nil {
+		info.State = job.AgentStateCordoned
+	}
+}
+
+// CheckDrainFlag returns true if the drain flag exists for the hostname.
+func (c *Client) CheckDrainFlag(
+	ctx context.Context,
+	hostname string,
+) bool {
+	if c.stateKV == nil {
+		return false
+	}
+
+	key := "drain." + job.SanitizeHostname(hostname)
+	_, err := c.stateKV.Get(ctx, key)
+	return err == nil
+}
+
+// SetDrainFlag writes the drain flag for an agent in the state KV bucket.
+// The agent detects this flag on heartbeat and stops accepting jobs.
+func (c *Client) SetDrainFlag(
+	ctx context.Context,
+	hostname string,
+) error {
+	if c.stateKV == nil {
+		return fmt.Errorf("agent state bucket not configured")
+	}
+
+	key := "drain." + job.SanitizeHostname(hostname)
+	_, err := c.stateKV.Put(ctx, key, []byte("1"))
+	if err != nil {
+		return fmt.Errorf("set drain flag: %w", err)
+	}
+
+	c.logger.Debug("set drain flag",
+		slog.String("hostname", hostname),
+		slog.String("key", key),
+	)
+
+	return nil
+}
+
+// DeleteDrainFlag removes the drain flag for an agent from the state KV bucket.
+// The agent detects this on heartbeat and resumes accepting jobs.
+func (c *Client) DeleteDrainFlag(
+	ctx context.Context,
+	hostname string,
+) error {
+	if c.stateKV == nil {
+		return fmt.Errorf("agent state bucket not configured")
+	}
+
+	key := "drain." + job.SanitizeHostname(hostname)
+	err := c.stateKV.Delete(ctx, key)
+	if err != nil {
+		return fmt.Errorf("delete drain flag: %w", err)
+	}
+
+	c.logger.Debug("deleted drain flag",
+		slog.String("hostname", hostname),
+		slog.String("key", key),
+	)
+
+	return nil
+}
+
 // sanitizeKeyForNATS sanitizes a string for use as a NATS key.
 func sanitizeKeyForNATS(
 	input string,
diff --git a/internal/job/client/agent_drain_public_test.go b/internal/job/client/agent_drain_public_test.go
new file mode 100644
index 00000000..9301403c
--- /dev/null
+++ b/internal/job/client/agent_drain_public_test.go
@@ -0,0 +1,353 @@
+// Copyright (c) 2026 John Dewey
+
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to
+// deal in the Software without restriction, including without limitation the
+// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+// sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+// DEALINGS IN THE SOFTWARE.
+
+package client_test
+
+import (
+	"context"
+	"errors"
+	"log/slog"
+	"testing"
+	"time"
+
+	"github.com/golang/mock/gomock"
+	"github.com/stretchr/testify/suite"
+
+	"github.com/retr0h/osapi/internal/job"
+	"github.com/retr0h/osapi/internal/job/client"
+	jobmocks "github.com/retr0h/osapi/internal/job/mocks"
+)
+
+type AgentDrainPublicTestSuite struct {
+	suite.Suite
+
+	mockCtrl       *gomock.Controller
+	mockNATSClient *jobmocks.MockNATSClient
+	mockKV         *jobmocks.MockKeyValue
+	ctx            context.Context
+}
+
+func (s *AgentDrainPublicTestSuite) SetupTest() {
+	s.mockCtrl = gomock.NewController(s.T())
+	s.mockNATSClient = jobmocks.NewMockNATSClient(s.mockCtrl)
+	s.mockKV = jobmocks.NewMockKeyValue(s.mockCtrl)
+	s.ctx = context.Background()
+}
+
+func (s *AgentDrainPublicTestSuite) TearDownTest() {
+	s.mockCtrl.Finish()
+}
+
+func (s *AgentDrainPublicTestSuite) newClientWithState(
+	stateKV *jobmocks.MockKeyValue,
+) *client.Client {
+	opts := &client.Options{
+		Timeout:  30 * time.Second,
+		KVBucket: s.mockKV,
+		StateKV:  stateKV,
+	}
+	c, err := client.New(slog.Default(), s.mockNATSClient, opts)
+	s.Require().NoError(err)
+
+	return c
+}
+
+func (s *AgentDrainPublicTestSuite) newClientWithoutState() *client.Client {
+	opts := &client.Options{
+		Timeout:  30 * time.Second,
+		KVBucket: s.mockKV,
+	}
+	c, err := client.New(slog.Default(), s.mockNATSClient, opts)
+	s.Require().NoError(err)
+
+	return c
+}
+
+func (s *AgentDrainPublicTestSuite) TestCheckDrainFlag() {
+	tests := []struct {
+		name       string
+		hostname   string
+		useState   bool
+		setupMocks func(*jobmocks.MockKeyValue)
+		expected   bool
+	}{
+		{
+			name:     "when drain flag exists returns true",
+			hostname: "server1",
+			useState: true,
+			setupMocks: func(kv *jobmocks.MockKeyValue) {
+				entry := jobmocks.NewMockKeyValueEntry(s.mockCtrl)
+				kv.EXPECT().
+					Get(gomock.Any(), "drain.server1").
+					Return(entry, nil)
+			},
+			expected: true,
+		},
+		{
+			name:     "when drain flag missing returns false",
+			hostname: "server1",
+			useState: true,
+			setupMocks: func(kv *jobmocks.MockKeyValue) {
+				kv.EXPECT().
+					Get(gomock.Any(), "drain.server1").
+					Return(nil, errors.New("key not found"))
+			},
+			expected: false,
+		},
+		{
+			name:     "when stateKV is nil returns false",
+			hostname: "server1",
+			useState: false,
+			expected: false,
+		},
+	}
+
+	for _, tt := range tests {
+		s.Run(tt.name, func() {
+			var jobsClient *client.Client
+			if tt.useState {
+				stateKV := jobmocks.NewMockKeyValue(s.mockCtrl)
+				if tt.setupMocks != nil {
+					tt.setupMocks(stateKV)
+				}
+				jobsClient = s.newClientWithState(stateKV)
+			} else {
+				jobsClient = s.newClientWithoutState()
+			}
+
+			result := jobsClient.CheckDrainFlag(s.ctx, tt.hostname)
+			s.Equal(tt.expected, result)
+		})
+	}
+}
+
+func (s *AgentDrainPublicTestSuite) TestSetDrainFlag() {
+	tests := []struct {
+		name        string
+		hostname    string
+		useState    bool
+		setupMocks  func(*jobmocks.MockKeyValue)
+		expectError bool
+		errorMsg    string
+	}{
+		{
+			name:     "when write succeeds sets drain flag",
+			hostname: "server1",
+			useState: true,
+			setupMocks: func(kv *jobmocks.MockKeyValue) {
+				kv.EXPECT().
+					Put(gomock.Any(), "drain.server1", []byte("1")).
+					Return(uint64(1), nil)
+			},
+		},
+		{
+			name:     "when KV put fails returns error",
+			hostname: "server1",
+			useState: true,
+			setupMocks: func(kv *jobmocks.MockKeyValue) {
+				kv.EXPECT().
+					Put(gomock.Any(), "drain.server1", []byte("1")).
+					Return(uint64(0), errors.New("kv connection failed"))
+			},
+			expectError: true,
+			errorMsg:    "set drain flag",
+		},
+		{
+			name:        "when stateKV is nil returns error",
+			hostname:    "server1",
+			useState:    false,
+			expectError: true,
+			errorMsg:    "agent state bucket not configured",
+		},
+	}
+
+	for _, tt := range tests {
+		s.Run(tt.name, func() {
+			var jobsClient *client.Client
+			if tt.useState {
+				stateKV := jobmocks.NewMockKeyValue(s.mockCtrl)
+				if tt.setupMocks != nil {
+					tt.setupMocks(stateKV)
+				}
+				jobsClient = s.newClientWithState(stateKV)
+			} else {
+				jobsClient = s.newClientWithoutState()
+			}
+
+			err := jobsClient.SetDrainFlag(s.ctx, tt.hostname)
+
+			if tt.expectError {
+				s.Error(err)
+				s.Contains(err.Error(), tt.errorMsg)
+			} else {
+				s.NoError(err)
+			}
+		})
+	}
+}
+
+func (s *AgentDrainPublicTestSuite) TestDeleteDrainFlag() {
+	tests := []struct {
+		name        string
+		hostname    string
+		useState    bool
+		setupMocks  func(*jobmocks.MockKeyValue)
+		expectError bool
+		errorMsg    string
+	}{
+		{
+			name:     "when delete succeeds removes drain flag",
+			hostname: "server1",
+			useState: true,
+			setupMocks: func(kv *jobmocks.MockKeyValue) {
+				kv.EXPECT().
+					Delete(gomock.Any(), "drain.server1").
+					Return(nil)
+			},
+		},
+		{
+			name:     "when KV delete fails returns error",
+			hostname: "server1",
+			useState: true,
+			setupMocks: func(kv *jobmocks.MockKeyValue) {
+				kv.EXPECT().
+					Delete(gomock.Any(), "drain.server1").
+					Return(errors.New("kv connection failed"))
+			},
+			expectError: true,
+			errorMsg:    "delete drain flag",
+		},
+		{
+			name:        "when stateKV is nil returns error",
+			hostname:    "server1",
+			useState:    false,
+			expectError: true,
+			errorMsg:    "agent state bucket not configured",
+		},
+	}
+
+	for _, tt := range tests {
+		s.Run(tt.name, func() {
+			var jobsClient *client.Client
+			if tt.useState {
+				stateKV := jobmocks.NewMockKeyValue(s.mockCtrl)
+				if tt.setupMocks != nil {
+					tt.setupMocks(stateKV)
+				}
+				jobsClient = s.newClientWithState(stateKV)
+			} else {
+				jobsClient = s.newClientWithoutState()
+			}
+
+			err := jobsClient.DeleteDrainFlag(s.ctx, tt.hostname)
+
+			if tt.expectError {
+				s.Error(err)
+				s.Contains(err.Error(), tt.errorMsg)
+			} else {
+				s.NoError(err)
+			}
+		})
+	}
+}
+
+func (s *AgentDrainPublicTestSuite) TestOverlayDrainState() {
+	tests := []struct {
+		name          string
+		useState      bool
+		setupMocks    func(*jobmocks.MockKeyValue)
+		expectedState string
+	}{
+		{
+			name:     "when drain flag exists sets state to Cordoned",
+			useState: true,
+			setupMocks: func(kv *jobmocks.MockKeyValue) {
+				entry := jobmocks.NewMockKeyValueEntry(s.mockCtrl)
+				kv.EXPECT().
+					Get(gomock.Any(), "drain.server1").
+					Return(entry, nil)
+			},
+			expectedState: job.AgentStateCordoned,
+		},
+		{
+			name:     "when drain flag missing keeps original state",
+			useState: true,
+			setupMocks: func(kv *jobmocks.MockKeyValue) {
+				kv.EXPECT().
+					Get(gomock.Any(), "drain.server1").
+					Return(nil, errors.New("key not found"))
+			},
+			expectedState: "",
+		},
+		{
+			name:          "when stateKV is nil keeps original state",
+			useState:      false,
+			expectedState: "",
+		},
+	}
+
+	for _, tt := range tests {
+		s.Run(tt.name, func() {
+			registryKV := jobmocks.NewMockKeyValue(s.mockCtrl)
+
+			// Set up the registry KV to return agent data
+			entry := jobmocks.NewMockKeyValueEntry(s.mockCtrl)
+			entry.EXPECT().Value().Return(
+				[]byte(`{"hostname":"server1","registered_at":"2026-01-01T00:00:00Z"}`),
+			)
+			registryKV.EXPECT().
+				Get(gomock.Any(), "agents.server1").
+				Return(entry, nil)
+
+			opts := &client.Options{
+				Timeout:    30 * time.Second,
+				KVBucket:   s.mockKV,
+				RegistryKV: registryKV,
+			}
+
+			if tt.useState {
+				stateKV := jobmocks.NewMockKeyValue(s.mockCtrl)
+				if tt.setupMocks != nil {
+					tt.setupMocks(stateKV)
+				}
+				opts.StateKV = stateKV
+				// GetAgent also calls GetAgentTimeline which uses stateKV
+				stateKV.EXPECT().
+					Keys(gomock.Any()).
+					Return(nil, errors.New("nats: no keys found"))
+			}
+
+			jobsClient, err := client.New(
+				slog.Default(),
+				s.mockNATSClient,
+				opts,
+			)
+			s.Require().NoError(err)
+
+			info, err := jobsClient.GetAgent(s.ctx, "server1")
+			s.NoError(err)
+			s.Equal(tt.expectedState, info.State)
+		})
+	}
+}
+
+func TestAgentDrainPublicTestSuite(t *testing.T) {
+	suite.Run(t, new(AgentDrainPublicTestSuite))
+}
diff --git a/internal/job/client/agent_timeline_public_test.go b/internal/job/client/agent_timeline_public_test.go
new file mode 100644
index 00000000..c3be65a4
--- /dev/null
+++ b/internal/job/client/agent_timeline_public_test.go
@@ -0,0 +1,484 @@
+// Copyright (c) 2026 John Dewey
+
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to
+// deal in the Software without restriction, including without limitation the
+// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+// sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+// DEALINGS IN THE SOFTWARE.
+
+package client_test
+
+import (
+	"context"
+	"encoding/json"
+	"errors"
+	"log/slog"
+	"testing"
+	"time"
+
+	"github.com/golang/mock/gomock"
+	"github.com/stretchr/testify/suite"
+
+	"github.com/retr0h/osapi/internal/job"
+	"github.com/retr0h/osapi/internal/job/client"
+	jobmocks "github.com/retr0h/osapi/internal/job/mocks"
+)
+
+type AgentTimelinePublicTestSuite struct {
+	suite.Suite
+
+	mockCtrl       *gomock.Controller
+	mockNATSClient *jobmocks.MockNATSClient
+	mockKV         *jobmocks.MockKeyValue
+	ctx            context.Context
+}
+
+func (s *AgentTimelinePublicTestSuite) SetupTest() {
+	s.mockCtrl = gomock.NewController(s.T())
+	s.mockNATSClient = jobmocks.NewMockNATSClient(s.mockCtrl)
+	s.mockKV = jobmocks.NewMockKeyValue(s.mockCtrl)
+	s.ctx = context.Background()
+}
+
+func (s *AgentTimelinePublicTestSuite) TearDownTest() {
+	s.mockCtrl.Finish()
+}
+
+func (s *AgentTimelinePublicTestSuite) newClientWithState(
+	stateKV *jobmocks.MockKeyValue,
+) *client.Client {
+	opts := &client.Options{
+		Timeout:  30 * time.Second,
+		KVBucket: s.mockKV,
+		StateKV:  stateKV,
+	}
+	c, err := client.New(slog.Default(), s.mockNATSClient, opts)
+	s.Require().NoError(err)
+
+	return c
+}
+
+func (s *AgentTimelinePublicTestSuite) newClientWithoutState() *client.Client {
+	opts := &client.Options{
+		Timeout:  30 * time.Second,
+		KVBucket: s.mockKV,
+	}
+	c, err := client.New(slog.Default(), s.mockNATSClient, opts)
+	s.Require().NoError(err)
+
+	return c
+}
+
+func (s *AgentTimelinePublicTestSuite) TestWriteAgentTimelineEvent() {
+	tests := []struct {
+		name        string
+		hostname    string
+		event       string
+		message     string
+		useState    bool
+		setupMocks  func(*jobmocks.MockKeyValue)
+		expectError bool
+		errorMsg    string
+	}{
+		{
+			name:     "when write succeeds stores timeline event",
+			hostname: "server1",
+			event:    "drain",
+			message:  "node marked for drain",
+			useState: true,
+			setupMocks: func(kv *jobmocks.MockKeyValue) {
+				kv.EXPECT().
+					Put(gomock.Any(), gomock.Any(), gomock.Any()).
+					DoAndReturn(func(
+						_ context.Context,
+						key string,
+						data []byte,
+					) (uint64, error) {
+						s.Contains(key, "timeline.server1.drain.")
+
+						var te job.TimelineEvent
+						err := json.Unmarshal(data, &te)
+						s.NoError(err)
+						s.Equal("drain", te.Event)
+						s.Equal("server1", te.Hostname)
+						s.Equal("node marked for drain", te.Message)
+						s.NotZero(te.Timestamp)
+
+						return 1, nil
+					})
+			},
+		},
+		{
+			name:     "when KV put fails returns error",
+			hostname: "server1",
+			event:    "drain",
+			message:  "drain requested",
+			useState: true,
+			setupMocks: func(kv *jobmocks.MockKeyValue) {
+				kv.EXPECT().
+					Put(gomock.Any(), gomock.Any(), gomock.Any()).
+					Return(uint64(0), errors.New("kv connection failed"))
+			},
+			expectError: true,
+			errorMsg:    "write timeline event",
+		},
+		{
+			name:        "when stateKV is nil returns error",
+			hostname:    "server1",
+			event:       "drain",
+			message:     "drain requested",
+			useState:    false,
+			expectError: true,
+			errorMsg:    "agent state bucket not configured",
+		},
+	}
+
+	for _, tt := range tests {
+		s.Run(tt.name, func() {
+			var jobsClient *client.Client
+			if tt.useState {
+				stateKV := jobmocks.NewMockKeyValue(s.mockCtrl)
+				if tt.setupMocks != nil {
+					tt.setupMocks(stateKV)
+				}
+				jobsClient = s.newClientWithState(stateKV)
+			} else {
+				jobsClient = s.newClientWithoutState()
+			}
+
+			err := jobsClient.WriteAgentTimelineEvent(
+				s.ctx,
+				tt.hostname,
+				tt.event,
+				tt.message,
+			)
+
+			if tt.expectError {
+				s.Error(err)
+				s.Contains(err.Error(), tt.errorMsg)
+			} else {
+				s.NoError(err)
+			}
+		})
+	}
+}
+
+func (s *AgentTimelinePublicTestSuite) TestGetAgentTimeline() {
+	now := time.Now()
+	earlier := now.Add(-10 * time.Minute)
+	later := now.Add(10 * time.Minute)
+
+	tests := []struct {
+		name          string
+		hostname      string
+		useState      bool
+		setupMocks    func(*jobmocks.MockKeyValue)
+		expectError   bool
+		errorMsg      string
+		expectedCount int
+		validateFunc  func([]job.TimelineEvent)
+	}{
+		{
+			name:     "when events exist returns sorted events",
+			hostname: "server1",
+			useState: true,
+			setupMocks: func(kv *jobmocks.MockKeyValue) {
+				kv.EXPECT().
+					Keys(gomock.Any()).
+					Return([]string{
+						"timeline.server1.drain.1000000000",
+						"timeline.server1.undrain.2000000000",
+						"agents.server1",
+					}, nil)
+
+				drainEvent, _ := json.Marshal(job.TimelineEvent{
+					Timestamp: later,
+					Event:     "drain",
+					Hostname:  "server1",
+					Message:   "drain requested",
+				})
+				entry1 := jobmocks.NewMockKeyValueEntry(s.mockCtrl)
+				entry1.EXPECT().Value().Return(drainEvent)
+				kv.EXPECT().
+					Get(gomock.Any(), "timeline.server1.drain.1000000000").
+					Return(entry1, nil)
+
+				undrainEvent, _ := json.Marshal(job.TimelineEvent{
+					Timestamp: earlier,
+					Event:     "undrain",
+					Hostname:  "server1",
+					Message:   "undrain requested",
+				})
+				entry2 := jobmocks.NewMockKeyValueEntry(s.mockCtrl)
+				entry2.EXPECT().Value().Return(undrainEvent)
+				kv.EXPECT().
+					Get(gomock.Any(), "timeline.server1.undrain.2000000000").
+					Return(entry2, nil)
+			},
+			expectedCount: 2,
+			validateFunc: func(events []job.TimelineEvent) {
+				// Should be sorted by timestamp (earlier first)
+				s.Equal("undrain", events[0].Event)
+				s.Equal("drain", events[1].Event)
+			},
+		},
+		{
+			name:     "when no keys found returns empty slice",
+			hostname: "server1",
+			useState: true,
+			setupMocks: func(kv *jobmocks.MockKeyValue) {
+				kv.EXPECT().
+					Keys(gomock.Any()).
+					Return(nil, errors.New("nats: no keys found"))
+			},
+			expectedCount: 0,
+		},
+		{
+			name:        "when stateKV is nil returns error",
+			hostname:    "server1",
+			useState:    false,
+			expectError: true,
+			errorMsg:    "agent state bucket not configured",
+		},
+		{
+			name:     "when Get fails for a key skips it",
+			hostname: "server1",
+			useState: true,
+			setupMocks: func(kv *jobmocks.MockKeyValue) {
+				kv.EXPECT().
+					Keys(gomock.Any()).
+					Return([]string{
+						"timeline.server1.drain.1000000000",
+						"timeline.server1.undrain.2000000000",
+					}, nil)
+
+				drainEvent, _ := json.Marshal(job.TimelineEvent{
+					Timestamp: now,
+					Event:     "drain",
+					Hostname:  "server1",
+					Message:   "drain requested",
+				})
+				entry1 := jobmocks.NewMockKeyValueEntry(s.mockCtrl)
+				entry1.EXPECT().Value().Return(drainEvent)
+				kv.EXPECT().
+					Get(gomock.Any(), "timeline.server1.drain.1000000000").
+					Return(entry1, nil)
+
+				kv.EXPECT().
+					Get(gomock.Any(), "timeline.server1.undrain.2000000000").
+					Return(nil, errors.New("key not found"))
+			},
+			expectedCount: 1,
+			validateFunc: func(events []job.TimelineEvent) {
+				s.Equal("drain", events[0].Event)
+			},
+		},
+		{
+			name:     "when unmarshal fails for a key skips it",
+			hostname: "server1",
+			useState: true,
+			setupMocks: func(kv *jobmocks.MockKeyValue) {
+				kv.EXPECT().
+					Keys(gomock.Any()).
+					Return([]string{
+						"timeline.server1.drain.1000000000",
+						"timeline.server1.undrain.2000000000",
+					}, nil)
+
+				drainEvent, _ := json.Marshal(job.TimelineEvent{
+					Timestamp: now,
+					Event:     "drain",
+					Hostname:  "server1",
+					Message:   "drain requested",
+				})
+				entry1 := jobmocks.NewMockKeyValueEntry(s.mockCtrl)
+				entry1.EXPECT().Value().Return(drainEvent)
+				kv.EXPECT().
+					Get(gomock.Any(), "timeline.server1.drain.1000000000").
+					Return(entry1, nil)
+
+				entry2 := jobmocks.NewMockKeyValueEntry(s.mockCtrl)
+				entry2.EXPECT().Value().Return([]byte("invalid json"))
+				kv.EXPECT().
+					Get(gomock.Any(), "timeline.server1.undrain.2000000000").
+					Return(entry2, nil)
+			},
+			expectedCount: 1,
+			validateFunc: func(events []job.TimelineEvent) {
+				s.Equal("drain", events[0].Event)
+			},
+		},
+		{
+			name:     "when keys exist for other hostnames filters them out",
+			hostname: "server1",
+			useState: true,
+			setupMocks: func(kv *jobmocks.MockKeyValue) {
+				kv.EXPECT().
+					Keys(gomock.Any()).
+					Return([]string{
+						"timeline.server1.drain.1000000000",
+						"timeline.server2.drain.2000000000",
+						"agents.server1",
+					}, nil)
+
+				drainEvent, _ := json.Marshal(job.TimelineEvent{
+					Timestamp: now,
+					Event:     "drain",
+					Hostname:  "server1",
+					Message:   "drain requested",
+				})
+				entry1 := jobmocks.NewMockKeyValueEntry(s.mockCtrl)
+				entry1.EXPECT().Value().Return(drainEvent)
+				kv.EXPECT().
+					Get(gomock.Any(), "timeline.server1.drain.1000000000").
+					Return(entry1, nil)
+			},
+			expectedCount: 1,
+			validateFunc: func(events []job.TimelineEvent) {
+				s.Equal("server1", events[0].Hostname)
+			},
+		},
+	}
+
+	for _, tt := range tests {
+		s.Run(tt.name, func() {
+			var jobsClient *client.Client
+			if tt.useState {
+				stateKV := jobmocks.NewMockKeyValue(s.mockCtrl)
+				if tt.setupMocks != nil {
+					tt.setupMocks(stateKV)
+				}
+				jobsClient = s.newClientWithState(stateKV)
+			} else {
+				jobsClient = s.newClientWithoutState()
+			}
+
+			events, err := jobsClient.GetAgentTimeline(s.ctx, tt.hostname)
+
+			if tt.expectError {
+				s.Error(err)
+				s.Contains(err.Error(), tt.errorMsg)
+			} else {
+				s.NoError(err)
+				s.Len(events, tt.expectedCount)
+				if tt.validateFunc != nil {
+					tt.validateFunc(events)
+				}
+			}
+		})
+	}
+}
+
+func (s *AgentTimelinePublicTestSuite) TestComputeAgentState() {
+	tests := []struct {
+		name          string
+		events        []job.TimelineEvent
+		expectedState string
+	}{
+		{
+			name:          "when no events returns Ready",
+			events:        []job.TimelineEvent{},
+			expectedState: job.AgentStateReady,
+		},
+		{
+			name:          "when nil events returns Ready",
+			events:        nil,
+			expectedState: job.AgentStateReady,
+		},
+		{
+			name: "when latest event is drain returns Draining",
+			events: []job.TimelineEvent{
+				{
+					Timestamp: time.Now(),
+					Event:     "drain",
+					Hostname:  "server1",
+					Message:   "drain requested",
+				},
+			},
+			expectedState: job.AgentStateDraining,
+		},
+		{
+			name: "when latest event is cordoned returns Cordoned",
+			events: []job.TimelineEvent{
+				{
+					Timestamp: time.Now(),
+					Event:     "cordoned",
+					Hostname:  "server1",
+					Message:   "node cordoned",
+				},
+			},
+			expectedState: job.AgentStateCordoned,
+		},
+		{
+			name: "when latest event is undrain returns Ready",
+			events: []job.TimelineEvent{
+				{
+					Timestamp: time.Now().Add(-10 * time.Minute),
+					Event:     "drain",
+					Hostname:  "server1",
+					Message:   "drain requested",
+				},
+				{
+					Timestamp: time.Now(),
+					Event:     "undrain",
+					Hostname:  "server1",
+					Message:   "undrain requested",
+				},
+			},
+			expectedState: job.AgentStateReady,
+		},
+		{
+			name: "when latest event is ready returns Ready",
+			events: []job.TimelineEvent{
+				{
+					Timestamp: time.Now().Add(-10 * time.Minute),
+					Event:     "drain",
+					Hostname:  "server1",
+					Message:   "drain requested",
+				},
+				{
+					Timestamp: time.Now(),
+					Event:     "ready",
+					Hostname:  "server1",
+					Message:   "agent ready",
+				},
+			},
+			expectedState: job.AgentStateReady,
+		},
+		{
+			name: "when latest event is unknown returns Ready",
+			events: []job.TimelineEvent{
+				{
+					Timestamp: time.Now(),
+					Event:     "something-unexpected",
+					Hostname:  "server1",
+					Message:   "unknown event",
+				},
+			},
+			expectedState: job.AgentStateReady,
+		},
+	}
+
+	for _, tt := range tests {
+		s.Run(tt.name, func() {
+			state := client.ComputeAgentState(tt.events)
+			s.Equal(tt.expectedState, state)
+		})
+	}
+}
+
+func TestAgentTimelinePublicTestSuite(t *testing.T) {
+	suite.Run(t, new(AgentTimelinePublicTestSuite))
+}
diff --git a/internal/job/client/client.go b/internal/job/client/client.go
index da57a3b7..5096913b 100644
--- a/internal/job/client/client.go
+++ b/internal/job/client/client.go
@@ -42,6 +42,7 @@ type Client struct {
 	kv         jetstream.KeyValue
 	registryKV jetstream.KeyValue
 	factsKV    jetstream.KeyValue
+	stateKV    jetstream.KeyValue
 	timeout    time.Duration
 	streamName string
 }
@@ -56,6 +57,8 @@ type Options struct {
 	RegistryKV jetstream.KeyValue
 	// FactsKV is the KV bucket for agent facts (optional).
 	FactsKV jetstream.KeyValue
+	// StateKV is the KV bucket for persistent agent state (drain flags, timeline).
+	StateKV jetstream.KeyValue
 	// StreamName is the JetStream stream name (used to derive DLQ name).
 	StreamName string
 }
@@ -79,6 +82,7 @@ func New(
 		kv:         opts.KVBucket,
 		registryKV: opts.RegistryKV,
 		factsKV:    opts.FactsKV,
+		stateKV:    opts.StateKV,
 		streamName: opts.StreamName,
 		timeout:    opts.Timeout,
 	}, nil
diff --git a/internal/job/client/query.go b/internal/job/client/query.go
index f715c712..f3c02ca5 100644
--- a/internal/job/client/query.go
+++ b/internal/job/client/query.go
@@ -24,6 +24,7 @@ import (
 	"context"
 	"encoding/json"
 	"fmt"
+	"strings"
 
 	"github.com/retr0h/osapi/internal/job"
 	"github.com/retr0h/osapi/internal/provider/network/dns"
@@ -401,6 +402,10 @@ func (c *Client) ListAgents(
 
 	agents := make([]job.AgentInfo, 0, len(keys))
 	for _, key := range keys {
+		if !strings.HasPrefix(key, "agents.") {
+			continue
+		}
+
 		entry, err := c.registryKV.Get(ctx, key)
 		if err != nil {
 			continue
@@ -413,6 +418,8 @@ func (c *Client) ListAgents(
 
 		info := agentInfoFromRegistration(&reg)
 		c.mergeFacts(ctx, &info)
+		c.overlayDrainState(ctx, &info)
+
 		agents = append(agents, info)
 	}
 
@@ -441,6 +448,13 @@ func (c *Client) GetAgent(
 
 	info := agentInfoFromRegistration(&reg)
 	c.mergeFacts(ctx, &info)
+	c.overlayDrainState(ctx, &info)
+
+	timeline, err := c.GetAgentTimeline(ctx, hostname)
+	if err == nil && len(timeline) > 0 {
+		info.Timeline = timeline
+	}
+
 	return &info, nil
 }
 
@@ -489,5 +503,7 @@ func agentInfoFromRegistration(
 		LoadAverages: reg.LoadAverages,
 		MemoryStats:  reg.MemoryStats,
 		AgentVersion: reg.AgentVersion,
+		Conditions:   reg.Conditions,
+		State:        reg.State,
 	}
 }
diff --git a/internal/job/client/query_public_test.go b/internal/job/client/query_public_test.go
index 6c07b949..d5394087 100644
--- a/internal/job/client/query_public_test.go
+++ b/internal/job/client/query_public_test.go
@@ -1133,6 +1133,7 @@ func (s *QueryPublicTestSuite) TestListAgents() {
 	tests := []struct {
 		name             string
 		setupMockKV      func(*jobmocks.MockKeyValue)
+		setupStateKV     func(*jobmocks.MockKeyValue)
 		setupMockFactsKV func(*jobmocks.MockKeyValue)
 		useRegistryKV    bool
 		useFactsKV       bool
@@ -1185,6 +1186,14 @@ func (s *QueryPublicTestSuite) TestListAgents() {
 					Get(gomock.Any(), "agents.server2").
 					Return(entry2, nil)
 			},
+			setupStateKV: func(kv *jobmocks.MockKeyValue) {
+				kv.EXPECT().
+					Get(gomock.Any(), "drain.server1").
+					Return(nil, errors.New("key not found"))
+				kv.EXPECT().
+					Get(gomock.Any(), "drain.server2").
+					Return(nil, errors.New("key not found"))
+			},
 			expectedCount: 2,
 			validateFunc: func(agents []job.AgentInfo) {
 				s.Equal("server1", agents[0].Hostname)
@@ -1228,6 +1237,11 @@ func (s *QueryPublicTestSuite) TestListAgents() {
 					Get(gomock.Any(), "agents.server2").
 					Return(nil, errors.New("key not found"))
 			},
+			setupStateKV: func(kv *jobmocks.MockKeyValue) {
+				kv.EXPECT().
+					Get(gomock.Any(), "drain.server1").
+					Return(nil, errors.New("key not found"))
+			},
 			expectedCount: 1,
 		},
 		{
@@ -1252,6 +1266,11 @@ func (s *QueryPublicTestSuite) TestListAgents() {
 					Get(gomock.Any(), "agents.server2").
 					Return(entry2, nil)
 			},
+			setupStateKV: func(kv *jobmocks.MockKeyValue) {
+				kv.EXPECT().
+					Get(gomock.Any(), "drain.server1").
+					Return(nil, errors.New("key not found"))
+			},
 			expectedCount: 1,
 		},
 		{
@@ -1273,6 +1292,11 @@ func (s *QueryPublicTestSuite) TestListAgents() {
 					Get(gomock.Any(), "agents.server1").
 					Return(entry, nil)
 			},
+			setupStateKV: func(kv *jobmocks.MockKeyValue) {
+				kv.EXPECT().
+					Get(gomock.Any(), "drain.server1").
+					Return(nil, errors.New("key not found"))
+			},
 			setupMockFactsKV: func(kv *jobmocks.MockKeyValue) {
 				factsEntry := jobmocks.NewMockKeyValueEntry(s.mockCtrl)
 				factsEntry.EXPECT().Value().Return(
@@ -1317,6 +1341,11 @@ func (s *QueryPublicTestSuite) TestListAgents() {
 					Get(gomock.Any(), "agents.server1").
 					Return(entry, nil)
 			},
+			setupStateKV: func(kv *jobmocks.MockKeyValue) {
+				kv.EXPECT().
+					Get(gomock.Any(), "drain.server1").
+					Return(nil, errors.New("key not found"))
+			},
 			expectedCount: 1,
 			validateFunc: func(agents []job.AgentInfo) {
 				s.Equal("server1", agents[0].Hostname)
@@ -1347,6 +1376,11 @@ func (s *QueryPublicTestSuite) TestListAgents() {
 					Get(gomock.Any(), "agents.server1").
 					Return(entry, nil)
 			},
+			setupStateKV: func(kv *jobmocks.MockKeyValue) {
+				kv.EXPECT().
+					Get(gomock.Any(), "drain.server1").
+					Return(nil, errors.New("key not found"))
+			},
 			setupMockFactsKV: func(kv *jobmocks.MockKeyValue) {
 				kv.EXPECT().
 					Get(gomock.Any(), "facts.server1").
@@ -1378,6 +1412,11 @@ func (s *QueryPublicTestSuite) TestListAgents() {
 					Get(gomock.Any(), "agents.server1").
 					Return(entry, nil)
 			},
+			setupStateKV: func(kv *jobmocks.MockKeyValue) {
+				kv.EXPECT().
+					Get(gomock.Any(), "drain.server1").
+					Return(nil, errors.New("key not found"))
+			},
 			setupMockFactsKV: func(kv *jobmocks.MockKeyValue) {
 				factsEntry := jobmocks.NewMockKeyValueEntry(s.mockCtrl)
 				factsEntry.EXPECT().Value().Return([]byte(`not valid json`))
@@ -1409,6 +1448,11 @@ func (s *QueryPublicTestSuite) TestListAgents() {
 			if tt.useRegistryKV {
 				opts.RegistryKV = registryKV
 			}
+			if tt.setupStateKV != nil {
+				stateKV := jobmocks.NewMockKeyValue(s.mockCtrl)
+				tt.setupStateKV(stateKV)
+				opts.StateKV = stateKV
+			}
 			if tt.useFactsKV {
 				factsKV := jobmocks.NewMockKeyValue(s.mockCtrl)
 				if tt.setupMockFactsKV != nil {
@@ -1444,6 +1488,7 @@ func (s *QueryPublicTestSuite) TestGetAgent() {
 		name             string
 		hostname         string
 		setupMockKV      func(*jobmocks.MockKeyValue)
+		setupStateKV     func(*jobmocks.MockKeyValue)
 		setupMockFactsKV func(*jobmocks.MockKeyValue)
 		useRegistryKV    bool
 		useFactsKV       bool
@@ -1473,6 +1518,14 @@ func (s *QueryPublicTestSuite) TestGetAgent() {
 					Get(gomock.Any(), "agents.server1").
 					Return(entry, nil)
 			},
+			setupStateKV: func(kv *jobmocks.MockKeyValue) {
+				kv.EXPECT().
+					Get(gomock.Any(), "drain.server1").
+					Return(nil, errors.New("key not found"))
+				kv.EXPECT().
+					Keys(gomock.Any()).
+					Return(nil, errors.New("nats: no keys found"))
+			},
 			validateFunc: func(info *job.AgentInfo) {
 				s.Equal("server1", info.Hostname)
 				s.Equal(map[string]string{"group": "web"}, info.Labels)
@@ -1524,6 +1577,14 @@ func (s *QueryPublicTestSuite) TestGetAgent() {
 					Get(gomock.Any(), "agents.server1").
 					Return(entry, nil)
 			},
+			setupStateKV: func(kv *jobmocks.MockKeyValue) {
+				kv.EXPECT().
+					Get(gomock.Any(), "drain.server1").
+					Return(nil, errors.New("key not found"))
+				kv.EXPECT().
+					Keys(gomock.Any()).
+					Return(nil, errors.New("nats: no keys found"))
+			},
 			setupMockFactsKV: func(kv *jobmocks.MockKeyValue) {
 				factsEntry := jobmocks.NewMockKeyValueEntry(s.mockCtrl)
 				factsEntry.EXPECT().Value().Return(
@@ -1565,6 +1626,14 @@ func (s *QueryPublicTestSuite) TestGetAgent() {
 					Get(gomock.Any(), "agents.server1").
 					Return(entry, nil)
 			},
+			setupStateKV: func(kv *jobmocks.MockKeyValue) {
+				kv.EXPECT().
+					Get(gomock.Any(), "drain.server1").
+					Return(nil, errors.New("key not found"))
+				kv.EXPECT().
+					Keys(gomock.Any()).
+					Return(nil, errors.New("nats: no keys found"))
+			},
 			validateFunc: func(info *job.AgentInfo) {
 				s.Equal("server1", info.Hostname)
 				s.Empty(info.Architecture)
@@ -1591,6 +1660,14 @@ func (s *QueryPublicTestSuite) TestGetAgent() {
 					Get(gomock.Any(), "agents.server1").
 					Return(entry, nil)
 			},
+			setupStateKV: func(kv *jobmocks.MockKeyValue) {
+				kv.EXPECT().
+					Get(gomock.Any(), "drain.server1").
+					Return(nil, errors.New("key not found"))
+				kv.EXPECT().
+					Keys(gomock.Any()).
+					Return(nil, errors.New("nats: no keys found"))
+			},
 			setupMockFactsKV: func(kv *jobmocks.MockKeyValue) {
 				kv.EXPECT().
 					Get(gomock.Any(), "facts.server1").
@@ -1602,6 +1679,96 @@ func (s *QueryPublicTestSuite) TestGetAgent() {
 				s.Empty(info.KernelVersion)
 			},
 		},
+		{
+			name:          "when timeline events exist includes timeline in response",
+			hostname:      "server1",
+			useRegistryKV: true,
+			setupMockKV: func(kv *jobmocks.MockKeyValue) {
+				entry := jobmocks.NewMockKeyValueEntry(s.mockCtrl)
+				entry.EXPECT().Value().Return(
+					[]byte(
+						`{"hostname":"server1","registered_at":"2026-01-01T00:00:00Z"}`,
+					),
+				)
+				kv.EXPECT().
+					Get(gomock.Any(), "agents.server1").
+					Return(entry, nil)
+			},
+			setupStateKV: func(kv *jobmocks.MockKeyValue) {
+				kv.EXPECT().
+					Get(gomock.Any(), "drain.server1").
+					Return(nil, errors.New("key not found"))
+
+				// GetAgentTimeline calls Keys then Get for matching keys
+				kv.EXPECT().
+					Keys(gomock.Any()).
+					Return([]string{
+						"agents.server1",
+						"timeline.server1.drain.1000000000",
+						"timeline.server1.undrain.2000000000",
+					}, nil)
+
+				drainEntry := jobmocks.NewMockKeyValueEntry(s.mockCtrl)
+				drainEntry.EXPECT().Value().Return(
+					[]byte(
+						`{"timestamp":"2026-01-01T01:00:00Z","event":"drain","hostname":"server1","message":"node draining"}`,
+					),
+				)
+				kv.EXPECT().
+					Get(gomock.Any(), "timeline.server1.drain.1000000000").
+					Return(drainEntry, nil)
+
+				undrainEntry := jobmocks.NewMockKeyValueEntry(s.mockCtrl)
+				undrainEntry.EXPECT().Value().Return(
+					[]byte(
+						`{"timestamp":"2026-01-01T02:00:00Z","event":"undrain","hostname":"server1","message":"node undrained"}`,
+					),
+				)
+				kv.EXPECT().
+					Get(gomock.Any(), "timeline.server1.undrain.2000000000").
+					Return(undrainEntry, nil)
+			},
+			validateFunc: func(info *job.AgentInfo) {
+				s.Equal("server1", info.Hostname)
+				s.Len(info.Timeline, 2)
+				s.Equal("drain", info.Timeline[0].Event)
+				s.Equal("node draining", info.Timeline[0].Message)
+				s.Equal("undrain", info.Timeline[1].Event)
+				s.Equal("node undrained", info.Timeline[1].Message)
+			},
+		},
+		{
+			name:          "when conditions and state set includes them in response",
+			hostname:      "server1",
+			useRegistryKV: true,
+			setupMockKV: func(kv *jobmocks.MockKeyValue) {
+				entry := jobmocks.NewMockKeyValueEntry(s.mockCtrl)
+				entry.EXPECT().Value().Return(
+					[]byte(
+						`{"hostname":"server1","registered_at":"2026-01-01T00:00:00Z","state":"Draining","conditions":[{"type":"DiskPressure","status":true,"reason":"disk usage 92%","last_transition_time":"2026-01-01T00:00:00Z"}]}`,
+					),
+				)
+				kv.EXPECT().
+					Get(gomock.Any(), "agents.server1").
+					Return(entry, nil)
+			},
+			setupStateKV: func(kv *jobmocks.MockKeyValue) {
+				kv.EXPECT().
+					Get(gomock.Any(), "drain.server1").
+					Return(nil, errors.New("key not found"))
+				kv.EXPECT().
+					Keys(gomock.Any()).
+					Return(nil, errors.New("nats: no keys found"))
+			},
+			validateFunc: func(info *job.AgentInfo) {
+				s.Equal("server1", info.Hostname)
+				s.Equal("Draining", info.State)
+				s.Len(info.Conditions, 1)
+				s.Equal("DiskPressure", info.Conditions[0].Type)
+				s.True(info.Conditions[0].Status)
+				s.Equal("disk usage 92%", info.Conditions[0].Reason)
+			},
+		},
 	}
 
 	for _, tt := range tests {
@@ -1618,6 +1785,11 @@ func (s *QueryPublicTestSuite) TestGetAgent() {
 			if tt.useRegistryKV {
 				opts.RegistryKV = registryKV
 			}
+			if tt.setupStateKV != nil {
+				stateKV := jobmocks.NewMockKeyValue(s.mockCtrl)
+				tt.setupStateKV(stateKV)
+				opts.StateKV = stateKV
+			}
 			if tt.useFactsKV {
 				factsKV := jobmocks.NewMockKeyValue(s.mockCtrl)
 				if tt.setupMockFactsKV != nil {
@@ -1730,6 +1902,7 @@ func (s *QueryPublicTestSuite) TestQueryNodeDiskBroadcast() {
 		timeout         time.Duration
 		opts            *publishAndCollectMockOpts
 		setupRegistryKV func(*jobmocks.MockKeyValue)
+		setupStateKV    func(*jobmocks.MockKeyValue)
 		expectError     bool
 		errorContains   string
 		expectedCount   int
@@ -1771,6 +1944,14 @@ func (s *QueryPublicTestSuite) TestQueryNodeDiskBroadcast() {
 					Get(gomock.Any(), "agents.server2").
 					Return(entry2, nil)
 			},
+			setupStateKV: func(kv *jobmocks.MockKeyValue) {
+				kv.EXPECT().
+					Get(gomock.Any(), "drain.server1").
+					Return(nil, errors.New("key not found"))
+				kv.EXPECT().
+					Get(gomock.Any(), "drain.server2").
+					Return(nil, errors.New("key not found"))
+			},
 			expectedCount: 2,
 		},
 		{
@@ -1834,6 +2015,12 @@ func (s *QueryPublicTestSuite) TestQueryNodeDiskBroadcast() {
 				opts.RegistryKV = mockRegistryKV
 			}
 
+			if tt.setupStateKV != nil {
+				stateKV := jobmocks.NewMockKeyValue(s.mockCtrl)
+				tt.setupStateKV(stateKV)
+				opts.StateKV = stateKV
+			}
+
 			jobsClient, err := client.New(slog.Default(), s.mockNATSClient, opts)
 			s.Require().NoError(err)
 
diff --git a/internal/job/client/types.go b/internal/job/client/types.go
index 4da2e789..5723b5dc 100644
--- a/internal/job/client/types.go
+++ b/internal/job/client/types.go
@@ -228,6 +228,30 @@ type JobClient interface {
 		hostname string,
 	) (*job.AgentInfo, error)
 
+	// Agent timeline
+	WriteAgentTimelineEvent(
+		ctx context.Context,
+		hostname, event, message string,
+	) error
+	GetAgentTimeline(
+		ctx context.Context,
+		hostname string,
+	) ([]job.TimelineEvent, error)
+
+	// Agent drain flag
+	CheckDrainFlag(
+		ctx context.Context,
+		hostname string,
+	) bool
+	SetDrainFlag(
+		ctx context.Context,
+		hostname string,
+	) error
+	DeleteDrainFlag(
+		ctx context.Context,
+		hostname string,
+	) error
+
 	// Job deletion
 	DeleteJob(
 		ctx context.Context,
diff --git a/internal/job/mocks/job_client.gen.go b/internal/job/mocks/job_client.gen.go
index 2ea4cb88..60a0265d 100644
--- a/internal/job/mocks/job_client.gen.go
+++ b/internal/job/mocks/job_client.gen.go
@@ -44,6 +44,20 @@ func (m *MockJobClient) EXPECT() *MockJobClientMockRecorder {
 	return m.recorder
 }
 
+// CheckDrainFlag mocks base method.
+func (m *MockJobClient) CheckDrainFlag(arg0 context.Context, arg1 string) bool {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "CheckDrainFlag", arg0, arg1)
+	ret0, _ := ret[0].(bool)
+	return ret0
+}
+
+// CheckDrainFlag indicates an expected call of CheckDrainFlag.
+func (mr *MockJobClientMockRecorder) CheckDrainFlag(arg0, arg1 interface{}) *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CheckDrainFlag", reflect.TypeOf((*MockJobClient)(nil).CheckDrainFlag), arg0, arg1)
+}
+
 // ConsumeJobs mocks base method.
 func (m *MockJobClient) ConsumeJobs(arg0 context.Context, arg1, arg2 string, arg3 func(jetstream.Msg) error, arg4 *client.ConsumeOptions) error {
 	m.ctrl.T.Helper()
@@ -87,6 +101,20 @@ func (mr *MockJobClientMockRecorder) CreateOrUpdateConsumer(arg0, arg1, arg2 int
 	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CreateOrUpdateConsumer", reflect.TypeOf((*MockJobClient)(nil).CreateOrUpdateConsumer), arg0, arg1, arg2)
 }
 
+// DeleteDrainFlag mocks base method.
+func (m *MockJobClient) DeleteDrainFlag(arg0 context.Context, arg1 string) error {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "DeleteDrainFlag", arg0, arg1)
+	ret0, _ := ret[0].(error)
+	return ret0
+}
+
+// DeleteDrainFlag indicates an expected call of DeleteDrainFlag.
+func (mr *MockJobClientMockRecorder) DeleteDrainFlag(arg0, arg1 interface{}) *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DeleteDrainFlag", reflect.TypeOf((*MockJobClient)(nil).DeleteDrainFlag), arg0, arg1)
+}
+
 // DeleteJob mocks base method.
 func (m *MockJobClient) DeleteJob(arg0 context.Context, arg1 string) error {
 	m.ctrl.T.Helper()
@@ -116,6 +144,21 @@ func (mr *MockJobClientMockRecorder) GetAgent(arg0, arg1 interface{}) *gomock.Ca
 	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetAgent", reflect.TypeOf((*MockJobClient)(nil).GetAgent), arg0, arg1)
 }
 
+// GetAgentTimeline mocks base method.
+func (m *MockJobClient) GetAgentTimeline(arg0 context.Context, arg1 string) ([]job.TimelineEvent, error) {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "GetAgentTimeline", arg0, arg1)
+	ret0, _ := ret[0].([]job.TimelineEvent)
+	ret1, _ := ret[1].(error)
+	return ret0, ret1
+}
+
+// GetAgentTimeline indicates an expected call of GetAgentTimeline.
+func (mr *MockJobClientMockRecorder) GetAgentTimeline(arg0, arg1 interface{}) *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetAgentTimeline", reflect.TypeOf((*MockJobClient)(nil).GetAgentTimeline), arg0, arg1)
+}
+
 // GetJobData mocks base method.
 func (m *MockJobClient) GetJobData(arg0 context.Context, arg1 string) ([]byte, error) {
 	m.ctrl.T.Helper()
@@ -763,6 +806,34 @@ func (mr *MockJobClientMockRecorder) RetryJob(arg0, arg1, arg2 interface{}) *gom
 	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "RetryJob", reflect.TypeOf((*MockJobClient)(nil).RetryJob), arg0, arg1, arg2)
 }
 
+// SetDrainFlag mocks base method.
+func (m *MockJobClient) SetDrainFlag(arg0 context.Context, arg1 string) error {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "SetDrainFlag", arg0, arg1)
+	ret0, _ := ret[0].(error)
+	return ret0
+}
+
+// SetDrainFlag indicates an expected call of SetDrainFlag.
+func (mr *MockJobClientMockRecorder) SetDrainFlag(arg0, arg1 interface{}) *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SetDrainFlag", reflect.TypeOf((*MockJobClient)(nil).SetDrainFlag), arg0, arg1)
+}
+
+// WriteAgentTimelineEvent mocks base method.
+func (m *MockJobClient) WriteAgentTimelineEvent(arg0 context.Context, arg1, arg2, arg3 string) error {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "WriteAgentTimelineEvent", arg0, arg1, arg2, arg3)
+	ret0, _ := ret[0].(error)
+	return ret0
+}
+
+// WriteAgentTimelineEvent indicates an expected call of WriteAgentTimelineEvent.
+func (mr *MockJobClientMockRecorder) WriteAgentTimelineEvent(arg0, arg1, arg2, arg3 interface{}) *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "WriteAgentTimelineEvent", reflect.TypeOf((*MockJobClient)(nil).WriteAgentTimelineEvent), arg0, arg1, arg2, arg3)
+}
+
 // WriteJobResponse mocks base method.
 func (m *MockJobClient) WriteJobResponse(arg0 context.Context, arg1, arg2 string, arg3 []byte, arg4, arg5 string, arg6 *bool) error {
 	m.ctrl.T.Helper()
diff --git a/internal/job/subjects.go b/internal/job/subjects.go
index b6bab149..0085bf64 100644
--- a/internal/job/subjects.go
+++ b/internal/job/subjects.go
@@ -339,10 +339,19 @@ func CountExpectedAgents(
 
 	switch routingType {
 	case BroadcastHost:
-		return len(agents)
+		count := 0
+		for i := range agents {
+			if agents[i].State != AgentStateCordoned && agents[i].State != AgentStateDraining {
+				count++
+			}
+		}
+		return count
 	case "label":
 		count := 0
 		for i := range agents {
+			if agents[i].State == AgentStateCordoned || agents[i].State == AgentStateDraining {
+				continue
+			}
 			if agentVal, ok := agents[i].Labels[key]; ok {
 				if agentVal == value || strings.HasPrefix(agentVal, value+".") {
 					count++
diff --git a/internal/job/subjects_public_test.go b/internal/job/subjects_public_test.go
index 55b0cc6a..b16315ea 100644
--- a/internal/job/subjects_public_test.go
+++ b/internal/job/subjects_public_test.go
@@ -956,6 +956,51 @@ func (suite *SubjectsPublicTestSuite) TestCountExpectedAgents() {
 			target: "_any",
 			want:   0,
 		},
+		{
+			name: "when _all excludes cordoned agents",
+			agents: []job.AgentInfo{
+				{Hostname: "web-01"},
+				{Hostname: "web-02", State: job.AgentStateCordoned},
+				{Hostname: "web-03"},
+			},
+			target: "_all",
+			want:   2,
+		},
+		{
+			name: "when _all excludes draining agents",
+			agents: []job.AgentInfo{
+				{Hostname: "web-01"},
+				{Hostname: "web-02", State: job.AgentStateDraining},
+			},
+			target: "_all",
+			want:   1,
+		},
+		{
+			name: "when label match excludes cordoned agents",
+			agents: []job.AgentInfo{
+				{
+					Hostname: "web-01",
+					Labels:   map[string]string{"group": "web.dev"},
+					State:    job.AgentStateCordoned,
+				},
+				{Hostname: "web-02", Labels: map[string]string{"group": "web.dev"}},
+			},
+			target: "group:web",
+			want:   1,
+		},
+		{
+			name: "when label match excludes draining agents",
+			agents: []job.AgentInfo{
+				{
+					Hostname: "web-01",
+					Labels:   map[string]string{"group": "web.dev"},
+					State:    job.AgentStateDraining,
+				},
+				{Hostname: "web-02", Labels: map[string]string{"group": "web.dev"}},
+			},
+			target: "group:web",
+			want:   1,
+		},
 	}
 
 	for _, tt := range tests {
diff --git a/internal/job/types.go b/internal/job/types.go
index 77c87403..753a9be3 100644
--- a/internal/job/types.go
+++ b/internal/job/types.go
@@ -270,6 +270,28 @@ type FactsRegistration struct {
 	Facts         map[string]any     `json:"facts,omitempty"`
 }
 
+// Condition type constants.
+const (
+	ConditionMemoryPressure = "MemoryPressure"
+	ConditionHighLoad       = "HighLoad"
+	ConditionDiskPressure   = "DiskPressure"
+)
+
+// Agent state constants.
+const (
+	AgentStateReady    = "Ready"
+	AgentStateDraining = "Draining"
+	AgentStateCordoned = "Cordoned"
+)
+
+// Condition represents a node condition evaluated agent-side.
+type Condition struct {
+	Type               string    `json:"type"`
+	Status             bool      `json:"status"`
+	Reason             string    `json:"reason,omitempty"`
+	LastTransitionTime time.Time `json:"last_transition_time"`
+}
+
 // AgentRegistration represents an agent's registration entry in the KV registry.
 type AgentRegistration struct {
 	// Hostname is the hostname of the agent.
@@ -290,6 +312,10 @@ type AgentRegistration struct {
 	MemoryStats *mem.Stats `json:"memory_stats,omitempty"`
 	// AgentVersion is the version of the agent binary.
 	AgentVersion string `json:"agent_version,omitempty"`
+	// Conditions contains the evaluated node conditions.
+	Conditions []Condition `json:"conditions,omitempty"`
+	// State is the agent's scheduling state (Ready, Draining, Cordoned).
+	State string `json:"state,omitempty"`
 }
 
 // AgentInfo represents information about an active agent.
@@ -328,6 +354,12 @@ type AgentInfo struct {
 	Interfaces []NetworkInterface `json:"interfaces,omitempty"`
 	// Facts contains arbitrary key-value facts collected by the agent.
 	Facts map[string]any `json:"facts,omitempty"`
+	// Conditions contains the evaluated node conditions.
+	Conditions []Condition `json:"conditions,omitempty"`
+	// State is the agent's scheduling state (Ready, Draining, Cordoned).
+	State string `json:"state,omitempty"`
+	// Timeline contains the chronological sequence of state transition events.
+	Timeline []TimelineEvent `json:"timeline,omitempty"`
 }
 
 // NodeDiskResponse represents the response for node.disk.get operations.
diff --git a/internal/provider/node/mem/darwin_get_vm.go b/internal/provider/node/mem/darwin_get_vm.go
index 819dd072..6df705a1 100644
--- a/internal/provider/node/mem/darwin_get_vm.go
+++ b/internal/provider/node/mem/darwin_get_vm.go
@@ -30,8 +30,9 @@ func (d *Darwin) GetStats() (*Stats, error) {
 	}
 
 	return &Stats{
-		Total:  memInfo.Total,
-		Free:   memInfo.Free,
-		Cached: memInfo.Cached,
+		Total:     memInfo.Total,
+		Available: memInfo.Available,
+		Free:      memInfo.Free,
+		Cached:    memInfo.Cached,
 	}, nil
 }
diff --git a/internal/provider/node/mem/types.go b/internal/provider/node/mem/types.go
index 2f242555..8a592d80 100644
--- a/internal/provider/node/mem/types.go
+++ b/internal/provider/node/mem/types.go
@@ -30,6 +30,8 @@ type Provider interface {
 type Stats struct {
 	// Total memory in bytes
 	Total uint64
+	// Available memory in bytes (free + reclaimable)
+	Available uint64
 	// Free memory in bytes
 	Free uint64
 	// Cached memory in bytes
diff --git a/internal/provider/node/mem/ubuntu_get_vm.go b/internal/provider/node/mem/ubuntu_get_vm.go
index ae4e1982..84411367 100644
--- a/internal/provider/node/mem/ubuntu_get_vm.go
+++ b/internal/provider/node/mem/ubuntu_get_vm.go
@@ -30,8 +30,9 @@ func (u *Ubuntu) GetStats() (*Stats, error) {
 	}
 
 	return &Stats{
-		Total:  memInfo.Total,
-		Free:   memInfo.Free,
-		Cached: memInfo.Cached,
+		Total:     memInfo.Total,
+		Available: memInfo.Available,
+		Free:      memInfo.Free,
+		Cached:    memInfo.Cached,
 	}, nil
 }