From 508a7da319d1eff71a189ad65a55e22fa33cf471 Mon Sep 17 00:00:00 2001
From: Max Holman <max@holmn.com>
Date: Fri, 20 Mar 2026 18:56:24 +0700
Subject: [PATCH 1/2] =?UTF-8?q?fix:=20UAT-driven=20fixes=20=E2=80=94=20nel?=
 =?UTF-8?q?i=20ACK=20parsing,=20MCP=20error=20codes,=20disconnect=20log?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three bugs found during 2026-03-20 UAT session:

1. neli 0.7 ACK parsing: recv_netlink_ack matched NlPayload::Payload
   which never matches for NLMSG_ERROR — neli 0.7 parses these into
   NlPayload::Ack/Err variants. OS route add/remove silently failed.

2. MCP error codes: all daemon errors (peer not found, invalid CIDR)
   were wrapped as internal_error (-32603). Now uses invalid_params
   (-32602) since these are user-input validation failures.

3. API-initiated peer disconnect produced no log line. Added info-level
   log in do_disconnect so the event appears in the ring buffer.

Also: route display uses "via" instead of Unicode arrow, stale
subcommand references removed from help text.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 crates/cli/src/bin/wallhack.rs     |  3 +-
 crates/cli/src/daemon_cli.rs       |  4 +-
 crates/core/src/control/handler.rs |  1 +
 crates/daemon/src/netlink.rs       | 38 +++++--------
 crates/mcp/src/convert.rs          |  2 +-
 crates/mcp/src/tools.rs            |  2 +-
 uat/2026-03-20.md                  | 89 ++++++++++++++++++++++++++++++
 7 files changed, 110 insertions(+), 29 deletions(-)
 create mode 100644 uat/2026-03-20.md

diff --git a/crates/cli/src/bin/wallhack.rs b/crates/cli/src/bin/wallhack.rs
index 3a45eb4..7fc4daa 100644
--- a/crates/cli/src/bin/wallhack.rs
+++ b/crates/cli/src/bin/wallhack.rs
@@ -7,8 +7,7 @@
 //! Invoked as `wallhack` (no args, slim build): starts daemon engine directly.
 //! Invoked as `wallhack --connect HOST [...]`: daemon in auto-negotiated mode.
 //! Invoked as `wallhack --listen ADDR [...]`: daemon in auto-negotiated mode.
-//! Invoked as `wallhack --role ROLE [...]`: daemon with fixed role hint.
-//! Invoked as `wallhack entry/exit/relay [...]`: daemon with explicit role override.
+//! Invoked as `wallhack --role ROLE [...]`: daemon with fixed role override.
 //! Invoked as `wallhackd` (or `wallhack daemon`): daemon engine directly.
 //! Invoked as `wallhackctl`: IPC control client only; fails if daemon not running.
 //! Invoked as `wallhack <control-subcommand>`: IPC control client.
diff --git a/crates/cli/src/daemon_cli.rs b/crates/cli/src/daemon_cli.rs
index d974707..cc7bd00 100644
--- a/crates/cli/src/daemon_cli.rs
+++ b/crates/cli/src/daemon_cli.rs
@@ -24,8 +24,8 @@ use wallhackd::{
 
 /// Network pivoting and tunneling tool.
 ///
-/// Auto-negotiates role from `--connect` / `--listen` flags. Use a subcommand
-/// (`entry`, `exit`, `relay`) to override with an explicit role.
+/// Auto-negotiates role from `--connect` / `--listen` flags. Use `--role` to
+/// override with an explicit role.
 #[allow(clippy::struct_excessive_bools)] // Independent CLI flags, not related state
 #[derive(FromArgs, Debug, Clone)]
 pub struct WallhackCli {
diff --git a/crates/core/src/control/handler.rs b/crates/core/src/control/handler.rs
index 6bd8a49..f35de82 100644
--- a/crates/core/src/control/handler.rs
+++ b/crates/core/src/control/handler.rs
@@ -363,6 +363,7 @@ impl Handler {
     fn do_disconnect(&self, id: &str) {
         self.peers.send_disconnect(id, "disconnected by API");
         let _ = self.peers.unregister(id);
+        tracing::info!("Peer disconnected: {id} (via API)");
     }
 }
 
diff --git a/crates/daemon/src/netlink.rs b/crates/daemon/src/netlink.rs
index f8536ad..f73b21f 100644
--- a/crates/daemon/src/netlink.rs
+++ b/crates/daemon/src/netlink.rs
@@ -144,10 +144,10 @@ pub(crate) fn add_os_route(cidr: &str, dev: &str) -> Result<(), String> {
 
 /// Receive and check the Netlink ACK/error response.
 ///
-/// `NLMSG_ERROR` (type 2) carries a 4-byte `i32` error code at the start of its
-/// payload. Error 0 = success (pure ACK), negative = errno.
-/// `-3` (ESRCH) after route delete and `-17` (EEXIST) after route add are
-/// treated as success (idempotent operations).
+/// neli 0.7 parses `NLMSG_ERROR` into `NlPayload::Ack` (error=0) or
+/// `NlPayload::Err` (error<0). We check the error code and treat
+/// `-3` (ESRCH, already gone) and `-17` (EEXIST, already present) as
+/// success for idempotent route operations.
 fn recv_netlink_ack(socket: &mut NlSocketHandle, op: &str) -> Result<(), String> {
     let (mut iter, _groups) = socket
         .recv::<u16, neli::types::Buffer>()
@@ -158,28 +158,20 @@ fn recv_netlink_ack(socket: &mut NlSocketHandle, op: &str) -> Result<(), String>
     };
     let msg = msg_result.map_err(|e| format!("Netlink recv error: {e}"))?;
 
-    // NLMSG_ERROR = 2
-    if *msg.nl_type() == 2 {
-        if let NlPayload::Payload(buf) = msg.nl_payload() {
-            let bytes: &[u8] = buf.as_ref();
-            if bytes.len() >= 4 {
-                let error = i32::from_ne_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]);
-                // 0 = success, -3 = ESRCH (already gone), -17 = EEXIST (already present)
-                if error == 0 || error == -3 || error == -17 {
-                    Ok(())
-                } else {
-                    let err_msg = format!("Netlink error: {error}");
-                    tracing::warn!("Failed to {op}: {err_msg}");
-                    Err(err_msg)
-                }
+    match msg.nl_payload() {
+        NlPayload::Ack(_) | NlPayload::Empty => Ok(()),
+        NlPayload::Err(err) => {
+            let code = *err.error();
+            // -3 = ESRCH (route already gone), -17 = EEXIST (route already present)
+            if code == -3 || code == -17 {
+                Ok(())
             } else {
-                Err("Netlink ACK payload too short".into())
+                let err_msg = format!("Netlink error: {code}");
+                tracing::warn!("Failed to {op}: {err_msg}");
+                Err(err_msg)
             }
-        } else {
-            Err("Unexpected payload in ACK".into())
         }
-    } else {
-        Err(format!("Unexpected message type: {}", msg.nl_type()))
+        _ => Err(format!("Unexpected Netlink response for {op}")),
     }
 }
 
diff --git a/crates/mcp/src/convert.rs b/crates/mcp/src/convert.rs
index 3ec68ff..ec87c12 100644
--- a/crates/mcp/src/convert.rs
+++ b/crates/mcp/src/convert.rs
@@ -67,7 +67,7 @@ pub fn format_response(resp: &ManagementResponse) -> Result<String, String> {
             let mut out = String::new();
             for route in &r.routes {
                 let tag = if route.auto_managed { " (auto)" } else { "" };
-                let _ = writeln!(out, "{} → {}{tag}", route.cidr, route.peer);
+                let _ = writeln!(out, "{} via {}{tag}", route.cidr, route.peer);
             }
             Ok(out)
         }
diff --git a/crates/mcp/src/tools.rs b/crates/mcp/src/tools.rs
index c4ad653..f178804 100644
--- a/crates/mcp/src/tools.rs
+++ b/crates/mcp/src/tools.rs
@@ -64,7 +64,7 @@ async fn ipc_call(request: management_request::Request) -> Result<String, rmcp::
         .await
         .map_err(|e| rmcp::ErrorData::internal_error(e.to_string(), None))?;
 
-    convert::format_response(&resp).map_err(|msg| rmcp::ErrorData::internal_error(msg, None))
+    convert::format_response(&resp).map_err(|msg| rmcp::ErrorData::invalid_params(msg, None))
 }
 
 #[rmcp::tool_router(vis = "pub")]
diff --git a/uat/2026-03-20.md b/uat/2026-03-20.md
new file mode 100644
index 0000000..2384196
--- /dev/null
+++ b/uat/2026-03-20.md
@@ -0,0 +1,89 @@
+# UAT Report — 2026-03-20
+
+## Session Summary
+- **Range**: Full 27-VM cyberrange (6 networks, perimeter→office→datacenter→management→vault)
+- **Persona**: CTF Player transitioning to Pentester critique
+- **Objective**: Validate logs feature (v0.13.0), tunnel connectivity, auto-routing, and MCP tool UX
+- **Outcome**: Logs feature works end-to-end. Single-hop pivot to office network confirmed. Found 2 bugs (neli ACK parsing, missing disconnect log) and 1 MCP error code issue.
+
+## Pontoon MCP — Findings
+
+### Tool Completeness
+Sufficient for range lifecycle and VM interaction. `range_up`, `vm_exec`, `vm_cp`, `vm_exec_bg`, `vm_tail` covered all needs.
+
+### UX & Discoverability
+Tool names are clear and consistent. `vm_port_probe` runs from the host, not inside the VM — this caused initial confusion (probed office network from host, got "closed" because the host isn't routed through the tunnel). Not a bug, but worth documenting.
+
+### Specific Issues
+| Severity | Finding | Suggested Fix |
+|----------|---------|---------------|
+| 🟢 Suggestion | `vm_port_probe` description doesn't clarify it runs from the host perspective, not inside the VM | Add "(from the host)" to description |
+
+## Wallhack MCP — Findings
+
+### Tool Completeness
+All management operations covered: info, peers, routes, stats, logs, connect, listen, disconnect, route_add, route_del, hint_set, hint_set_auto, shutdown. The new `logs` tool fills the observability gap — can now diagnose peer connections, route installation, and errors without SSH access.
+
+### Logging Quality
+- **Startup**: clear and informative (version, capabilities, listen addr, PSK warning)
+- **Peer lifecycle**: connect events logged with name, addr, TUN interface, route count
+- **Missing**: peer disconnect events not logged at info level — only connects appear in the ring buffer
+- **Verbosity**: right level for info — no data-plane noise, just lifecycle and errors
+
+### Terminology Consistency
+No issues found in this session. Role names (entry/exit), peer/route terminology all consistent.
+
+### UX & Workflow
+- Auto-routing on connect is excellent — office network was immediately reachable after gateway-perimeter connected
+- Route warning when adding a CIDR not advertised by the peer is helpful and actionable
+- `hint_set` tool name is confusing for first-time users — "hint" is protocol jargon. The REPL uses `role` which is much clearer. MCP should match.
+
+### Error Messages
+- "peer not found: nonexistent" — clear message, but wrapped as MCP internal_error (-32603) instead of invalid_params (-32602). All daemon `ErrorResponse` messages are mapped to internal_error regardless of cause.
+
+### Specific Issues
+| Severity | Finding | Suggested Fix |
+|----------|---------|---------------|
+| 🔴 Critical | neli 0.7 ACK parsing broken — `recv_netlink_ack` returns "Unexpected payload in ACK" for route add/remove OS operations. Kernel routes silently fail to install for manually added routes. Auto-routes may also be affected. | Fix the raw buffer parsing in `recv_netlink_ack` — the 0.7 `Buffer` payload may include the full nlmsghdr, not just the error code. Compare with neli 0.6 behavior. |
+| 🟡 Minor | Peer disconnect events not captured in log ring buffer — connects show "Peer connected: ..." but disconnects produce no corresponding info-level log line | Add `info!("Peer disconnected: name={name}")` in the disconnect path |
+| 🟡 Minor | MCP error codes: all daemon errors mapped to `-32603` (internal_error) regardless of cause. "peer not found" and "invalid CIDR" should be `-32602` (invalid_params) | Map `ErrorResponse` codes to appropriate MCP error types in `convert::format_response` |
+| 🟡 Minor | `hint_set` / `hint_set_auto` MCP tool names don't match REPL (`role` command). Per interface parity, MCP should expose `role` not `hint` | Rename MCP tools or add `role` as the primary name |
+| 🟢 Suggestion | `logs` output could include timestamps for each line (currently just `level: message`) | Prepend compact timestamp to each buffered line |
+
+## Cross-Cutting Observations
+
+- **Hot-patching workflow works but is manual** — kill old process, cp binary, restart with correct flags. The initrd binary is v0.11.0 while we're testing v0.13.0. A `pontoon build` + `range_up` cycle would be cleaner but slower.
+- **CLI changed between initrd (subcommand: `wallhack entry`) and current (flag: `wallhack --role entry`)** — the clusterfuck merge changed the CLI surface. Help text says "Use a subcommand" but they're actually flags now. This will confuse users who read old docs.
+- **The logs feature delivered exactly what was needed** — during this session, `logs` was the primary diagnostic tool for understanding peer connections, route installation, and error conditions. Without it, debugging would have required SSH + reading stderr.
+
+## Session Transcript (Condensed)
+
+▶ Brought range up (27 VMs)
+  → All VMs ready. Wallhack daemon auto-started on attacker (entry) and gateway-perimeter (exit).
+
+▶ `wallhack info` via MCP
+  → Connected to attacker daemon (v0.11.0 — stale initrd binary)
+
+▶ `wallhack logs` on stale binary
+  → "empty request" — logs feature doesn't exist in v0.11.0. Expected.
+
+▶ Hot-patched attacker and gateway-perimeter with v0.13.0 binary
+  → Restarted both daemons with `--role entry/exit` flags
+
+▶ `wallhack logs` on v0.13.0
+  → Returns startup lines, peer connect events, route installation. Feature works.
+
+▶ Curled intranet (10.99.2.80) from attacker through tunnel
+  → Page returned with DB creds. Full tunnel chain working.
+
+▶ `route_add` with nonexistent peer
+  → "peer not found: nonexistent" — clear message, wrong MCP error code (-32603 vs -32602)
+
+▶ `route_add` for CIDR not advertised by peer
+  → "OK" with warning about unreachable destination. Excellent UX.
+  → Bug: OS route operations fail with "Unexpected payload in ACK" (neli 0.7 parsing)
+
+▶ Disconnected web-filter peer
+  → Disconnect succeeded, peer removed from list, but no log line captured for the event
+
+▶ Tunnel still functional after disconnect — office network still reachable

From a42181a3421ab0d88cf40bf7fd73223aaf56333b Mon Sep 17 00:00:00 2001
From: Max Holman <max@holmn.com>
Date: Fri, 20 Mar 2026 19:43:46 +0700
Subject: [PATCH 2/2] fix: disambiguate "Role resolved" log line and route
 display

The log "Role resolved: name=X role=entry" was confusing because it
displayed the local node's resolved role next to the peer's name,
reading as if the peer's role was entry. Changed to
"Role resolved: peer=X local_role=entry" to prevent misreading.

Also: route display uses "via" instead of arrow, stale subcommand
reference removed from help text.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 crates/daemon/src/mode/auto.rs |  33 ++----
 uat/2026-03-20.md              | 189 +++++++++++++++++++++++----------
 2 files changed, 145 insertions(+), 77 deletions(-)

diff --git a/crates/daemon/src/mode/auto.rs b/crates/daemon/src/mode/auto.rs
index 5d2e208..0f7eb83 100644
--- a/crates/daemon/src/mode/auto.rs
+++ b/crates/daemon/src/mode/auto.rs
@@ -412,18 +412,17 @@ async fn run_auto_connect_session_dispatch(
         NegotiationResult::Indeterminate { .. } => NodeRole::Indeterminate,
     };
     node_state.update_role(negotiated_role);
+    let peer_role = super::peer_role_from_capabilities(peer_hs.capabilities.unwrap_or_default());
+    tracing::info!(
+        "Role resolved: peer={} addr={peer_addr} local_role={negotiated_role} peer_role={peer_role}",
+        peer_hs.name,
+    );
 
     match result {
         NegotiationResult::Resolved {
             role: NodeRole::Entry,
             ..
         } => {
-            tracing::info!(
-                "Role resolved: name={} addr={peer_addr} role=entry",
-                peer_hs.name,
-            );
-            node_state.update_role(NodeRole::Entry);
-
             // Install routes advertised by the exit peer. The inner function
             // applies routes from the table when it creates the TUN, so they
             // must be in the table before we call it.
@@ -477,11 +476,6 @@ async fn run_auto_connect_session_dispatch(
             role: NodeRole::Exit,
             ..
         } => {
-            tracing::info!(
-                "Role resolved: name={} addr={peer_addr} role=exit",
-                peer_hs.name,
-            );
-            node_state.update_role(NodeRole::Exit);
             let peer_caps = peer_hs.capabilities.unwrap_or_default();
             let peer_role = super::peer_role_from_capabilities(peer_caps);
             let peer_name = if peer_hs.name.is_empty() {
@@ -870,18 +864,17 @@ async fn run_auto_accept_session_inner(
         NegotiationResult::Indeterminate { .. } => NodeRole::Indeterminate,
     };
     node_state.update_role(negotiated_role);
+    let peer_role = super::peer_role_from_capabilities(peer_hs.capabilities.unwrap_or_default());
+    tracing::info!(
+        "Role resolved: peer={} addr={peer_addr} local_role={negotiated_role} peer_role={peer_role}",
+        peer_hs.name,
+    );
 
     match result {
         NegotiationResult::Resolved {
             role: NodeRole::Entry,
             ..
         } => {
-            tracing::info!(
-                "Role resolved: name={} addr={peer_addr} role=entry",
-                peer_hs.name,
-            );
-            node_state.update_role(NodeRole::Entry);
-
             // Spawn data tasks: incoming (peer→instructions/responses) + outgoing (instructions→peer).
             super::entry::spawn_data_tasks(
                 &transport,
@@ -1040,12 +1033,6 @@ async fn run_auto_accept_session_inner(
             role: NodeRole::Exit,
             ..
         } => {
-            tracing::info!(
-                "Role resolved: name={} addr={peer_addr} role=exit",
-                peer_hs.name,
-            );
-            node_state.update_role(NodeRole::Exit);
-
             // Spawn data tasks for exit: incoming (peer→broadcasts) + outgoing (responses→peer).
             {
                 let transport = Arc::clone(&transport);
diff --git a/uat/2026-03-20.md b/uat/2026-03-20.md
index 2384196..071cc7d 100644
--- a/uat/2026-03-20.md
+++ b/uat/2026-03-20.md
@@ -1,89 +1,170 @@
 # UAT Report — 2026-03-20
 
 ## Session Summary
-- **Range**: Full 27-VM cyberrange (6 networks, perimeter→office→datacenter→management→vault)
-- **Persona**: CTF Player transitioning to Pentester critique
-- **Objective**: Validate logs feature (v0.13.0), tunnel connectivity, auto-routing, and MCP tool UX
-- **Outcome**: Logs feature works end-to-end. Single-hop pivot to office network confirmed. Found 2 bugs (neli ACK parsing, missing disconnect log) and 1 MCP error code issue.
+- **Range**: 27-VM cyberrange — 6 network segments (perimeter → office → datacenter → management → vault → proxy-vault)
+- **Persona**: CTF Player (Ligolo-ng / Chisel background), shifting to Pentester critique mid-session
+- **Objective**: Pivot through the range using Wallhack MCP — reach the gold target at 10.99.5.100, retrieve flag
+- **Outcome**: Partial. Reached office network (10.99.2.0/24) via gateway-perimeter auto-routing. Retrieved loot flag (`flag{fb8260d999208edf1e4325679851d3b4}`) and DB credentials. Could NOT reach datacenter, management, or vault networks. Gold flag (`flag{fa0a3354313859653ab093b03ef59158}`) visible via direct `vm_exec` on gold VM but unreachable through the tunnel. `connect` and `listen` MCP tools are stubs — relay chaining not possible through MCP alone.
+
+---
 
 ## Pontoon MCP — Findings
 
 ### Tool Completeness
-Sufficient for range lifecycle and VM interaction. `range_up`, `vm_exec`, `vm_cp`, `vm_exec_bg`, `vm_tail` covered all needs.
+Sufficient for basic range lifecycle and VM inspection. `range_up`, `range_down`, `vm_exec`, `vm_ps`, `vm_logs`, `vm_exec_bg`, `vm_tail`, `vm_inject`/`vm_cp`, `vm_pkill`, `network_tcpdump` — all present and mostly functional. Missing: no topology enumeration tool (no way to list VMs or networks from the MCP; must read `pontoon.yml` manually).
 
 ### UX & Discoverability
-Tool names are clear and consistent. `vm_port_probe` runs from the host, not inside the VM — this caused initial confusion (probed office network from host, got "closed" because the host isn't routed through the tunnel). Not a bug, but worth documenting.
+Tool names are logical and consistent. The `vm_exec` / `vm_exec_bg` / `vm_tail` pattern is well-designed. `vm_ps` returning structured JSON is excellent — far more usable than raw `ps aux` output from `vm_exec`. One pain point: `range_up` output lists PIDs but no names — "launched (pid=3758486)" is meaningless to a user who hasn't memorised the launch order.
+
+### Error Messages & Feedback
+`deny_root` enforcement on `loot` VM returns a clear error: "error: root exec denied on loot" — good. No silent failures observed.
 
 ### Specific Issues
+
 | Severity | Finding | Suggested Fix |
 |----------|---------|---------------|
-| 🟢 Suggestion | `vm_port_probe` description doesn't clarify it runs from the host perspective, not inside the VM | Add "(from the host)" to description |
+| 🟡 Minor | `range_up` progress output shows `[vm] launched (pid=…)` with no VM name — user can't tell which VMs are ready or in what order | Include VM name: `[vm/attacker] launched (pid=3758486)` |
+| 🟡 Minor | `vm_logs` shows only raw kernel serial output — wallhack application logs don't appear there, making VM-side wallhack debugging completely opaque | Capture wallhack stdout/stderr to a tagged log accessible via `vm_tail` |
+| 🟡 Minor | `vm_exec` `ps aux \| grep wallhack` as root returned only the grep process; `vm_ps` correctly showed wallhack running (pid 45). Output inconsistency between the two tools | Likely a timing/pipe artifact — document or investigate |
+| 🟢 Suggestion | No topology listing tool — user must parse `pontoon.yml` to understand network structure | Add `range_info` or `topology_get` tool returning network CIDRs and VM names/IPs |
+
+---
 
 ## Wallhack MCP — Findings
 
 ### Tool Completeness
-All management operations covered: info, peers, routes, stats, logs, connect, listen, disconnect, route_add, route_del, hint_set, hint_set_auto, shutdown. The new `logs` tool fills the observability gap — can now diagnose peer connections, route installation, and errors without SSH access.
+The MCP exposes 14 tools but **two of the most fundamental are non-functional stubs**: `connect` and `listen` both return "not yet implemented — specify at startup". `disconnect` is partially implemented: disconnecting a named peer works, but `disconnect` with no peer (transport-level disconnect) also returns "not yet implemented". This means:
+
+- Cannot add new peer connections at runtime
+- Cannot open new listeners at runtime
+- Cannot perform the reverse-target connection scenario (exit at 10.99.5.50:9000 waiting for entry to connect)
+- Cannot reach `udp-only` VM (which needs a `:53` listener that can't be opened dynamically)
 
 ### Logging Quality
-- **Startup**: clear and informative (version, capabilities, listen addr, PSK warning)
-- **Peer lifecycle**: connect events logged with name, addr, TUN interface, route count
-- **Missing**: peer disconnect events not logged at info level — only connects appear in the ring buffer
-- **Verbosity**: right level for info — no data-plane noise, just lifecycle and errors
+Log content is good — appropriate verbosity at info level, clean format. The "No authentication configured" warning is a nice security nudge. Log ring buffer via `logs` tool is useful. One issue: wallhack logs are not surfaced in `vm_logs` (kernel serial console) on the VM side, making it impossible to debug VM-side wallhack issues without a separate log capture mechanism.
 
 ### Terminology Consistency
-No issues found in this session. Role names (entry/exit), peer/route terminology all consistent.
+**Bug**: "Role resolved" log lines are misleading. The log shows:
+```
+Role resolved: name=gateway-perimeter addr=10.99.1.10:45548 role=entry
+```
+…but the `peers` tool shows `gateway-perimeter role=exit`. The log is displaying *this node's* resolved role ("entry") next to the peer's name, making it read as if gateway-perimeter's role is "entry". This contradicts the `peers` output. Should instead be:
+```
+Role resolved: my_role=entry peer=gateway-perimeter peer_role=exit
+```
 
 ### UX & Workflow
-- Auto-routing on connect is excellent — office network was immediately reachable after gateway-perimeter connected
-- Route warning when adding a CIDR not advertised by the peer is helpful and actionable
-- `hint_set` tool name is confusing for first-time users — "hint" is protocol jargon. The REPL uses `role` which is much clearer. MCP should match.
+**What works well:**
+- `info` output is clean and complete: name, role, listen addr, capabilities, version with build metadata, uptime. First thing I checked; answered all basic questions immediately.
+- Auto-routing is excellent: `10.99.2.0/24 via gateway-perimeter (auto)` — routes appear automatically when a peer connects, labelled with `(auto)` to distinguish from manual routes.
+- `route_add` warning is **excellent**: "peer gateway-perimeter does not advertise a route covering 10.99.3.0/24; traffic may not reach the destination" — tells the user exactly why the route won't work, preventing dead-end chasing.
+- `peers` output is informative: name, address, role, status, latency, capabilities all on one line.
+- Disconnect log attribution "Peer disconnected: web-filter (via API)" is a nice auditability touch.
+
+**What's missing/broken:**
+- `listen` and `connect` are stubs — the entire dynamic peer management workflow is blocked.
+- `hint_set` / `hint_set_auto` return only "OK" — no feedback about the current hint state or effect. After calling `hint_set role=entry level=fixed`, I had no confirmation the daemon changed behaviour.
+- `stats` shows 0 connections and 0 flows even after traffic successfully passed through the tunnel (ping and HTTP confirmed). Stats don't reflect actual traffic.
+- After forcibly disconnecting `web-filter`, the exit peer **never reconnected** (checked ~90s later). A CTF player would expect auto-reconnect with backoff.
+- Reaching deeper networks (datacenter, management, vault) requires relay chaining, which is not possible through the MCP. This is the primary blocker to completing the range objective.
 
 ### Error Messages
-- "peer not found: nonexistent" — clear message, but wrapped as MCP internal_error (-32603) instead of invalid_params (-32602). All daemon `ErrorResponse` messages are mapped to internal_error regardless of cause.
+Good signal-to-noise. `listen` and `connect` error messages identify the problem ("dynamic … not yet implemented") but give no workaround. A CTF player hitting these errors has no forward path surfaced by the error itself.
 
 ### Specific Issues
+
 | Severity | Finding | Suggested Fix |
 |----------|---------|---------------|
-| 🔴 Critical | neli 0.7 ACK parsing broken — `recv_netlink_ack` returns "Unexpected payload in ACK" for route add/remove OS operations. Kernel routes silently fail to install for manually added routes. Auto-routes may also be affected. | Fix the raw buffer parsing in `recv_netlink_ack` — the 0.7 `Buffer` payload may include the full nlmsghdr, not just the error code. Compare with neli 0.6 behavior. |
-| 🟡 Minor | Peer disconnect events not captured in log ring buffer — connects show "Peer connected: ..." but disconnects produce no corresponding info-level log line | Add `info!("Peer disconnected: name={name}")` in the disconnect path |
-| 🟡 Minor | MCP error codes: all daemon errors mapped to `-32603` (internal_error) regardless of cause. "peer not found" and "invalid CIDR" should be `-32602` (invalid_params) | Map `ErrorResponse` codes to appropriate MCP error types in `convert::format_response` |
-| 🟡 Minor | `hint_set` / `hint_set_auto` MCP tool names don't match REPL (`role` command). Per interface parity, MCP should expose `role` not `hint` | Rename MCP tools or add `role` as the primary name |
-| 🟢 Suggestion | `logs` output could include timestamps for each line (currently just `level: message`) | Prepend compact timestamp to each buffered line |
+| 🔴 Critical | `connect` MCP tool is a stub — "dynamic connect not yet implemented". Blocks reverse-target scenario and any manual peer addition at runtime | Implement dynamic connect, or clearly mark the tool as not yet available in its description |
+| 🔴 Critical | `listen` MCP tool is a stub — "dynamic listen not yet implemented". Blocks `udp-only` VM (needs `:53`) and any runtime listener changes | Same — implement or document clearly in tool description |
+| 🔴 Critical | Multi-hop relay chaining not achievable via MCP — no way to reach datacenter/management/vault from the attacker | Expose relay configuration via MCP, or document the `vm_exec` workaround for chaining wallhack instances |
+| 🟡 Minor | `disconnect` with no peer returns "not yet implemented" — but named peer disconnect works | Implement transport disconnect or return a meaningful error explaining the distinction |
+| 🟡 Minor | "Role resolved: name=\<peer\> role=entry" log is misleading — displays attacker's resolved role next to peer's name, contradicting `peers` output | Fix log format to clearly separate my_role and peer_role |
+| 🟡 Minor | `stats` shows 0 connections/flows even after confirmed traffic flows through tunnel | Fix stats tracking to count TUN flows and connections |
+| 🟡 Minor | `hint_set` / `hint_set_auto` return "OK" with no state reflection | Return current hint state in the response, e.g. "role hint set: entry=fixed" |
+| 🟡 Minor | Exit peer (web-filter) did not auto-reconnect after being forcibly disconnected by the entry | Implement reconnect backoff on exit peers, or document expected behaviour clearly |
+| 🟢 Suggestion | `wallhack --help` has no examples — a CTF player from Ligolo-ng muscle memory would benefit from a one-liner showing the connect/listen/relay workflow | Add examples section to help output |
+| 🟢 Suggestion | `info` output shows `capabilities: tun=true listen=true connect=false` — `connect=false` on the entry node is counterintuitive (it's the one initiating connections) | Clarify capabilities semantics in docs or rename to reflect actual meaning |
+
+---
 
 ## Cross-Cutting Observations
 
-- **Hot-patching workflow works but is manual** — kill old process, cp binary, restart with correct flags. The initrd binary is v0.11.0 while we're testing v0.13.0. A `pontoon build` + `range_up` cycle would be cleaner but slower.
-- **CLI changed between initrd (subcommand: `wallhack entry`) and current (flag: `wallhack --role entry`)** — the clusterfuck merge changed the CLI surface. Help text says "Use a subcommand" but they're actually flags now. This will confuse users who read old docs.
-- **The logs feature delivered exactly what was needed** — during this session, `logs` was the primary diagnostic tool for understanding peer connections, route installation, and error conditions. Without it, debugging would have required SSH + reading stderr.
-
-## Session Transcript (Condensed)
-
-▶ Brought range up (27 VMs)
-  → All VMs ready. Wallhack daemon auto-started on attacker (entry) and gateway-perimeter (exit).
-
-▶ `wallhack info` via MCP
-  → Connected to attacker daemon (v0.11.0 — stale initrd binary)
-
-▶ `wallhack logs` on stale binary
-  → "empty request" — logs feature doesn't exist in v0.11.0. Expected.
-
-▶ Hot-patched attacker and gateway-perimeter with v0.13.0 binary
-  → Restarted both daemons with `--role entry/exit` flags
+- **Pontoon and Wallhack complement each other well for the first hop.** `range_up` → auto-peer-connect → auto-routes is a smooth first experience. Reaching the office network required zero manual configuration on the attacker side. This is the best part of the current experience.
+- **The range topology is well-constructed** — realistic with gateways, masquerading, and deny rules. `deny_cp` and `deny_root` enforcement is solid. The loot → credentials → deeper chain is exactly how a real CTF progression works.
+- **The MCP's effective coverage ends at the office network.** Everything past 10.99.2.0/24 requires capabilities (dynamic connect/listen, relay config) that don't yet exist in the MCP. If a CTF player is expected to reach gold, the relay workflow must be MCP-accessible.
+- **Log visibility gap**: Pontoon can only see kernel serial output via `vm_logs`; Wallhack MCP can only see the attacker daemon's ring buffer via `logs`. There is no MCP path to view VM-side wallhack logs for peer nodes. This is a debugging gap that will frustrate both players and developers.
 
-▶ `wallhack logs` on v0.13.0
-  → Returns startup lines, peer connect events, route installation. Feature works.
+---
 
-▶ Curled intranet (10.99.2.80) from attacker through tunnel
-  → Page returned with DB creds. Full tunnel chain working.
-
-▶ `route_add` with nonexistent peer
-  → "peer not found: nonexistent" — clear message, wrong MCP error code (-32603 vs -32602)
-
-▶ `route_add` for CIDR not advertised by peer
-  → "OK" with warning about unreachable destination. Excellent UX.
-  → Bug: OS route operations fail with "Unexpected payload in ACK" (neli 0.7 parsing)
-
-▶ Disconnected web-filter peer
-  → Disconnect succeeded, peer removed from list, but no log line captured for the event
+## Session Transcript (Condensed)
 
-▶ Tunnel still functional after disconnect — office network still reachable
+```
+▶ wallhack info (MCP) before range_up
+  → Error: "cannot connect to daemon via vsock 3:4434: No such device"
+  💬 Clear error. I know the range isn't running. Good.
+
+▶ range_up
+  → 27 VMs launched and ready. PID list, no names.
+  💬 "launched (pid=3758486)" tells me nothing about which VM that is.
+
+▶ wallhack info (MCP) after range_up
+  → name=attacker role=entry listen=[::]:443 caps=tun/listen version=0.14.0+508a7da uptime=6s
+  💬 Clean. Answered all my first questions. Version with build metadata is excellent.
+
+▶ wallhack --help (on attacker VM)
+  → Full flag list. No examples. --prefer-role / --exclude-role / --role flags listed.
+  💬 As a Ligolo user I'd expect "agent" and "proxy" — "entry", "exit", "relay" map okay though.
+     No examples is a miss for first-timers.
+
+▶ wallhack peers
+  → gateway-perimeter (role=exit, connected, 0.6ms), web-filter (role=exit, connected, 0.5ms)
+  💬 Two auto-connections already. Nice. But logs say "Role resolved: name=gateway-perimeter role=entry"
+     — that's the opposite of what peers shows. Bug.
+
+▶ wallhack routes
+  → 10.99.1.0/24 (auto) and 10.99.2.0/24 (auto) via gateway-perimeter
+  💬 Auto-routing is excellent. "(auto)" label is a great UX touch.
+
+▶ curl http://10.99.2.80/ from attacker VM through tunnel
+  → "Acme Corp Internal Portal" — works!
+  💬 Zero config required to reach the internal network. Very smooth.
+
+▶ wallhack stats
+  → bytes in: 659, bytes out: 242, connections: 0, flows: 0
+  💬 Traffic clearly flowed but connections=0, flows=0. Stats are broken or not counting TUN traffic.
+
+▶ wallhack route_add 10.99.3.0/24 via gateway-perimeter
+  → OK + warning: "peer gateway-perimeter does not advertise a route covering 10.99.3.0/24"
+  💬 Warning is perfect. I immediately know this won't work and why. Best error in the session.
+
+▶ ping 10.99.3.20 from attacker (with manual route)
+  → Network unreachable
+  💬 Confirmed. gateway-perimeter only knows 10.99.2.0/24. Dead end with no relay chain.
+
+▶ wallhack listen 0.0.0.0:53 (to let udp-only VM connect)
+  → Error: "dynamic listen not yet implemented — specify --listen at startup"
+  💬 udp-only is waiting to connect to :53. Can't open a new listener. Completely stuck.
+
+▶ wallhack connect 10.99.9.99:4433 (testing error quality)
+  → Error: "dynamic connect not yet implemented — specify --connect at startup"
+  💬 Same error format as listen — at least consistent. But both are stubs. Two tools, zero utility.
+
+▶ disconnect web-filter
+  → OK. Log: "Peer disconnected: web-filter (via API)"
+  💬 Clean. "(via API)" attribution in logs is a nice touch.
+  → Checked 90s later — web-filter never reconnected
+  💬 Exit peers should retry. No auto-reconnect is surprising and frustrating.
+
+▶ disconnect (no peer — testing transport-level disconnect)
+  → Error: "dynamic disconnect not yet implemented"
+  💬 Named peer disconnect works but transport disconnect doesn't. Inconsistent partial implementation.
+
+▶ hint_set role=entry level=fixed
+  → OK
+  💬 No state feedback. Did it do anything? Can't tell.
+
+▶ hint_set_auto
+  → OK
+  💬 Same — just "OK". No indication of restored state.
+```