From 508a7da319d1eff71a189ad65a55e22fa33cf471 Mon Sep 17 00:00:00 2001 From: Max Holman Date: Fri, 20 Mar 2026 18:56:24 +0700 Subject: [PATCH 1/2] =?UTF-8?q?fix:=20UAT-driven=20fixes=20=E2=80=94=20nel?= =?UTF-8?q?i=20ACK=20parsing,=20MCP=20error=20codes,=20disconnect=20log?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three bugs found during 2026-03-20 UAT session: 1. neli 0.7 ACK parsing: recv_netlink_ack matched NlPayload::Payload which never matches for NLMSG_ERROR — neli 0.7 parses these into NlPayload::Ack/Err variants. OS route add/remove silently failed. 2. MCP error codes: all daemon errors (peer not found, invalid CIDR) were wrapped as internal_error (-32603). Now uses invalid_params (-32602) since these are user-input validation failures. 3. API-initiated peer disconnect produced no log line. Added info-level log in do_disconnect so the event appears in the ring buffer. Also: route display uses "via" instead of Unicode arrow, stale subcommand references removed from help text. Co-Authored-By: Claude Opus 4.6 (1M context) --- crates/cli/src/bin/wallhack.rs | 3 +- crates/cli/src/daemon_cli.rs | 4 +- crates/core/src/control/handler.rs | 1 + crates/daemon/src/netlink.rs | 38 +++++-------- crates/mcp/src/convert.rs | 2 +- crates/mcp/src/tools.rs | 2 +- uat/2026-03-20.md | 89 ++++++++++++++++++++++++++++++ 7 files changed, 110 insertions(+), 29 deletions(-) create mode 100644 uat/2026-03-20.md diff --git a/crates/cli/src/bin/wallhack.rs b/crates/cli/src/bin/wallhack.rs index 3a45eb4..7fc4daa 100644 --- a/crates/cli/src/bin/wallhack.rs +++ b/crates/cli/src/bin/wallhack.rs @@ -7,8 +7,7 @@ //! Invoked as `wallhack` (no args, slim build): starts daemon engine directly. //! Invoked as `wallhack --connect HOST [...]`: daemon in auto-negotiated mode. //! Invoked as `wallhack --listen ADDR [...]`: daemon in auto-negotiated mode. -//! Invoked as `wallhack --role ROLE [...]`: daemon with fixed role hint. -//! Invoked as `wallhack entry/exit/relay [...]`: daemon with explicit role override. +//! Invoked as `wallhack --role ROLE [...]`: daemon with fixed role override. //! Invoked as `wallhackd` (or `wallhack daemon`): daemon engine directly. //! Invoked as `wallhackctl`: IPC control client only; fails if daemon not running. //! Invoked as `wallhack `: IPC control client. diff --git a/crates/cli/src/daemon_cli.rs b/crates/cli/src/daemon_cli.rs index d974707..cc7bd00 100644 --- a/crates/cli/src/daemon_cli.rs +++ b/crates/cli/src/daemon_cli.rs @@ -24,8 +24,8 @@ use wallhackd::{ /// Network pivoting and tunneling tool. /// -/// Auto-negotiates role from `--connect` / `--listen` flags. Use a subcommand -/// (`entry`, `exit`, `relay`) to override with an explicit role. +/// Auto-negotiates role from `--connect` / `--listen` flags. Use `--role` to +/// override with an explicit role. #[allow(clippy::struct_excessive_bools)] // Independent CLI flags, not related state #[derive(FromArgs, Debug, Clone)] pub struct WallhackCli { diff --git a/crates/core/src/control/handler.rs b/crates/core/src/control/handler.rs index 6bd8a49..f35de82 100644 --- a/crates/core/src/control/handler.rs +++ b/crates/core/src/control/handler.rs @@ -363,6 +363,7 @@ impl Handler { fn do_disconnect(&self, id: &str) { self.peers.send_disconnect(id, "disconnected by API"); let _ = self.peers.unregister(id); + tracing::info!("Peer disconnected: {id} (via API)"); } } diff --git a/crates/daemon/src/netlink.rs b/crates/daemon/src/netlink.rs index f8536ad..f73b21f 100644 --- a/crates/daemon/src/netlink.rs +++ b/crates/daemon/src/netlink.rs @@ -144,10 +144,10 @@ pub(crate) fn add_os_route(cidr: &str, dev: &str) -> Result<(), String> { /// Receive and check the Netlink ACK/error response. /// -/// `NLMSG_ERROR` (type 2) carries a 4-byte `i32` error code at the start of its -/// payload. Error 0 = success (pure ACK), negative = errno. -/// `-3` (ESRCH) after route delete and `-17` (EEXIST) after route add are -/// treated as success (idempotent operations). +/// neli 0.7 parses `NLMSG_ERROR` into `NlPayload::Ack` (error=0) or +/// `NlPayload::Err` (error<0). We check the error code and treat +/// `-3` (ESRCH, already gone) and `-17` (EEXIST, already present) as +/// success for idempotent route operations. fn recv_netlink_ack(socket: &mut NlSocketHandle, op: &str) -> Result<(), String> { let (mut iter, _groups) = socket .recv::() @@ -158,28 +158,20 @@ fn recv_netlink_ack(socket: &mut NlSocketHandle, op: &str) -> Result<(), String> }; let msg = msg_result.map_err(|e| format!("Netlink recv error: {e}"))?; - // NLMSG_ERROR = 2 - if *msg.nl_type() == 2 { - if let NlPayload::Payload(buf) = msg.nl_payload() { - let bytes: &[u8] = buf.as_ref(); - if bytes.len() >= 4 { - let error = i32::from_ne_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]); - // 0 = success, -3 = ESRCH (already gone), -17 = EEXIST (already present) - if error == 0 || error == -3 || error == -17 { - Ok(()) - } else { - let err_msg = format!("Netlink error: {error}"); - tracing::warn!("Failed to {op}: {err_msg}"); - Err(err_msg) - } + match msg.nl_payload() { + NlPayload::Ack(_) | NlPayload::Empty => Ok(()), + NlPayload::Err(err) => { + let code = *err.error(); + // -3 = ESRCH (route already gone), -17 = EEXIST (route already present) + if code == -3 || code == -17 { + Ok(()) } else { - Err("Netlink ACK payload too short".into()) + let err_msg = format!("Netlink error: {code}"); + tracing::warn!("Failed to {op}: {err_msg}"); + Err(err_msg) } - } else { - Err("Unexpected payload in ACK".into()) } - } else { - Err(format!("Unexpected message type: {}", msg.nl_type())) + _ => Err(format!("Unexpected Netlink response for {op}")), } } diff --git a/crates/mcp/src/convert.rs b/crates/mcp/src/convert.rs index 3ec68ff..ec87c12 100644 --- a/crates/mcp/src/convert.rs +++ b/crates/mcp/src/convert.rs @@ -67,7 +67,7 @@ pub fn format_response(resp: &ManagementResponse) -> Result { let mut out = String::new(); for route in &r.routes { let tag = if route.auto_managed { " (auto)" } else { "" }; - let _ = writeln!(out, "{} → {}{tag}", route.cidr, route.peer); + let _ = writeln!(out, "{} via {}{tag}", route.cidr, route.peer); } Ok(out) } diff --git a/crates/mcp/src/tools.rs b/crates/mcp/src/tools.rs index c4ad653..f178804 100644 --- a/crates/mcp/src/tools.rs +++ b/crates/mcp/src/tools.rs @@ -64,7 +64,7 @@ async fn ipc_call(request: management_request::Request) -> Result Date: Fri, 20 Mar 2026 19:43:46 +0700 Subject: [PATCH 2/2] fix: disambiguate "Role resolved" log line and route display The log "Role resolved: name=X role=entry" was confusing because it displayed the local node's resolved role next to the peer's name, reading as if the peer's role was entry. Changed to "Role resolved: peer=X local_role=entry" to prevent misreading. Also: route display uses "via" instead of arrow, stale subcommand reference removed from help text. Co-Authored-By: Claude Opus 4.6 (1M context) --- crates/daemon/src/mode/auto.rs | 33 ++---- uat/2026-03-20.md | 189 +++++++++++++++++++++++---------- 2 files changed, 145 insertions(+), 77 deletions(-) diff --git a/crates/daemon/src/mode/auto.rs b/crates/daemon/src/mode/auto.rs index 5d2e208..0f7eb83 100644 --- a/crates/daemon/src/mode/auto.rs +++ b/crates/daemon/src/mode/auto.rs @@ -412,18 +412,17 @@ async fn run_auto_connect_session_dispatch( NegotiationResult::Indeterminate { .. } => NodeRole::Indeterminate, }; node_state.update_role(negotiated_role); + let peer_role = super::peer_role_from_capabilities(peer_hs.capabilities.unwrap_or_default()); + tracing::info!( + "Role resolved: peer={} addr={peer_addr} local_role={negotiated_role} peer_role={peer_role}", + peer_hs.name, + ); match result { NegotiationResult::Resolved { role: NodeRole::Entry, .. } => { - tracing::info!( - "Role resolved: name={} addr={peer_addr} role=entry", - peer_hs.name, - ); - node_state.update_role(NodeRole::Entry); - // Install routes advertised by the exit peer. The inner function // applies routes from the table when it creates the TUN, so they // must be in the table before we call it. @@ -477,11 +476,6 @@ async fn run_auto_connect_session_dispatch( role: NodeRole::Exit, .. } => { - tracing::info!( - "Role resolved: name={} addr={peer_addr} role=exit", - peer_hs.name, - ); - node_state.update_role(NodeRole::Exit); let peer_caps = peer_hs.capabilities.unwrap_or_default(); let peer_role = super::peer_role_from_capabilities(peer_caps); let peer_name = if peer_hs.name.is_empty() { @@ -870,18 +864,17 @@ async fn run_auto_accept_session_inner( NegotiationResult::Indeterminate { .. } => NodeRole::Indeterminate, }; node_state.update_role(negotiated_role); + let peer_role = super::peer_role_from_capabilities(peer_hs.capabilities.unwrap_or_default()); + tracing::info!( + "Role resolved: peer={} addr={peer_addr} local_role={negotiated_role} peer_role={peer_role}", + peer_hs.name, + ); match result { NegotiationResult::Resolved { role: NodeRole::Entry, .. } => { - tracing::info!( - "Role resolved: name={} addr={peer_addr} role=entry", - peer_hs.name, - ); - node_state.update_role(NodeRole::Entry); - // Spawn data tasks: incoming (peer→instructions/responses) + outgoing (instructions→peer). super::entry::spawn_data_tasks( &transport, @@ -1040,12 +1033,6 @@ async fn run_auto_accept_session_inner( role: NodeRole::Exit, .. } => { - tracing::info!( - "Role resolved: name={} addr={peer_addr} role=exit", - peer_hs.name, - ); - node_state.update_role(NodeRole::Exit); - // Spawn data tasks for exit: incoming (peer→broadcasts) + outgoing (responses→peer). { let transport = Arc::clone(&transport); diff --git a/uat/2026-03-20.md b/uat/2026-03-20.md index 2384196..071cc7d 100644 --- a/uat/2026-03-20.md +++ b/uat/2026-03-20.md @@ -1,89 +1,170 @@ # UAT Report — 2026-03-20 ## Session Summary -- **Range**: Full 27-VM cyberrange (6 networks, perimeter→office→datacenter→management→vault) -- **Persona**: CTF Player transitioning to Pentester critique -- **Objective**: Validate logs feature (v0.13.0), tunnel connectivity, auto-routing, and MCP tool UX -- **Outcome**: Logs feature works end-to-end. Single-hop pivot to office network confirmed. Found 2 bugs (neli ACK parsing, missing disconnect log) and 1 MCP error code issue. +- **Range**: 27-VM cyberrange — 6 network segments (perimeter → office → datacenter → management → vault → proxy-vault) +- **Persona**: CTF Player (Ligolo-ng / Chisel background), shifting to Pentester critique mid-session +- **Objective**: Pivot through the range using Wallhack MCP — reach the gold target at 10.99.5.100, retrieve flag +- **Outcome**: Partial. Reached office network (10.99.2.0/24) via gateway-perimeter auto-routing. Retrieved loot flag (`flag{fb8260d999208edf1e4325679851d3b4}`) and DB credentials. Could NOT reach datacenter, management, or vault networks. Gold flag (`flag{fa0a3354313859653ab093b03ef59158}`) visible via direct `vm_exec` on gold VM but unreachable through the tunnel. `connect` and `listen` MCP tools are stubs — relay chaining not possible through MCP alone. + +--- ## Pontoon MCP — Findings ### Tool Completeness -Sufficient for range lifecycle and VM interaction. `range_up`, `vm_exec`, `vm_cp`, `vm_exec_bg`, `vm_tail` covered all needs. +Sufficient for basic range lifecycle and VM inspection. `range_up`, `range_down`, `vm_exec`, `vm_ps`, `vm_logs`, `vm_exec_bg`, `vm_tail`, `vm_inject`/`vm_cp`, `vm_pkill`, `network_tcpdump` — all present and mostly functional. Missing: no topology enumeration tool (no way to list VMs or networks from the MCP; must read `pontoon.yml` manually). ### UX & Discoverability -Tool names are clear and consistent. `vm_port_probe` runs from the host, not inside the VM — this caused initial confusion (probed office network from host, got "closed" because the host isn't routed through the tunnel). Not a bug, but worth documenting. +Tool names are logical and consistent. The `vm_exec` / `vm_exec_bg` / `vm_tail` pattern is well-designed. `vm_ps` returning structured JSON is excellent — far more usable than raw `ps aux` output from `vm_exec`. One pain point: `range_up` output lists PIDs but no names — "launched (pid=3758486)" is meaningless to a user who hasn't memorised the launch order. + +### Error Messages & Feedback +`deny_root` enforcement on `loot` VM returns a clear error: "error: root exec denied on loot" — good. No silent failures observed. ### Specific Issues + | Severity | Finding | Suggested Fix | |----------|---------|---------------| -| 🟢 Suggestion | `vm_port_probe` description doesn't clarify it runs from the host perspective, not inside the VM | Add "(from the host)" to description | +| 🟡 Minor | `range_up` progress output shows `[vm] launched (pid=…)` with no VM name — user can't tell which VMs are ready or in what order | Include VM name: `[vm/attacker] launched (pid=3758486)` | +| 🟡 Minor | `vm_logs` shows only raw kernel serial output — wallhack application logs don't appear there, making VM-side wallhack debugging completely opaque | Capture wallhack stdout/stderr to a tagged log accessible via `vm_tail` | +| 🟡 Minor | `vm_exec` `ps aux \| grep wallhack` as root returned only the grep process; `vm_ps` correctly showed wallhack running (pid 45). Output inconsistency between the two tools | Likely a timing/pipe artifact — document or investigate | +| 🟢 Suggestion | No topology listing tool — user must parse `pontoon.yml` to understand network structure | Add `range_info` or `topology_get` tool returning network CIDRs and VM names/IPs | + +--- ## Wallhack MCP — Findings ### Tool Completeness -All management operations covered: info, peers, routes, stats, logs, connect, listen, disconnect, route_add, route_del, hint_set, hint_set_auto, shutdown. The new `logs` tool fills the observability gap — can now diagnose peer connections, route installation, and errors without SSH access. +The MCP exposes 14 tools but **two of the most fundamental are non-functional stubs**: `connect` and `listen` both return "not yet implemented — specify at startup". `disconnect` is partially implemented: disconnecting a named peer works, but `disconnect` with no peer (transport-level disconnect) also returns "not yet implemented". This means: + +- Cannot add new peer connections at runtime +- Cannot open new listeners at runtime +- Cannot perform the reverse-target connection scenario (exit at 10.99.5.50:9000 waiting for entry to connect) +- Cannot reach `udp-only` VM (which needs a `:53` listener that can't be opened dynamically) ### Logging Quality -- **Startup**: clear and informative (version, capabilities, listen addr, PSK warning) -- **Peer lifecycle**: connect events logged with name, addr, TUN interface, route count -- **Missing**: peer disconnect events not logged at info level — only connects appear in the ring buffer -- **Verbosity**: right level for info — no data-plane noise, just lifecycle and errors +Log content is good — appropriate verbosity at info level, clean format. The "No authentication configured" warning is a nice security nudge. Log ring buffer via `logs` tool is useful. One issue: wallhack logs are not surfaced in `vm_logs` (kernel serial console) on the VM side, making it impossible to debug VM-side wallhack issues without a separate log capture mechanism. ### Terminology Consistency -No issues found in this session. Role names (entry/exit), peer/route terminology all consistent. +**Bug**: "Role resolved" log lines are misleading. The log shows: +``` +Role resolved: name=gateway-perimeter addr=10.99.1.10:45548 role=entry +``` +…but the `peers` tool shows `gateway-perimeter role=exit`. The log is displaying *this node's* resolved role ("entry") next to the peer's name, making it read as if gateway-perimeter's role is "entry". This contradicts the `peers` output. Should instead be: +``` +Role resolved: my_role=entry peer=gateway-perimeter peer_role=exit +``` ### UX & Workflow -- Auto-routing on connect is excellent — office network was immediately reachable after gateway-perimeter connected -- Route warning when adding a CIDR not advertised by the peer is helpful and actionable -- `hint_set` tool name is confusing for first-time users — "hint" is protocol jargon. The REPL uses `role` which is much clearer. MCP should match. +**What works well:** +- `info` output is clean and complete: name, role, listen addr, capabilities, version with build metadata, uptime. First thing I checked; answered all basic questions immediately. +- Auto-routing is excellent: `10.99.2.0/24 via gateway-perimeter (auto)` — routes appear automatically when a peer connects, labelled with `(auto)` to distinguish from manual routes. +- `route_add` warning is **excellent**: "peer gateway-perimeter does not advertise a route covering 10.99.3.0/24; traffic may not reach the destination" — tells the user exactly why the route won't work, preventing dead-end chasing. +- `peers` output is informative: name, address, role, status, latency, capabilities all on one line. +- Disconnect log attribution "Peer disconnected: web-filter (via API)" is a nice auditability touch. + +**What's missing/broken:** +- `listen` and `connect` are stubs — the entire dynamic peer management workflow is blocked. +- `hint_set` / `hint_set_auto` return only "OK" — no feedback about the current hint state or effect. After calling `hint_set role=entry level=fixed`, I had no confirmation the daemon changed behaviour. +- `stats` shows 0 connections and 0 flows even after traffic successfully passed through the tunnel (ping and HTTP confirmed). Stats don't reflect actual traffic. +- After forcibly disconnecting `web-filter`, the exit peer **never reconnected** (checked ~90s later). A CTF player would expect auto-reconnect with backoff. +- Reaching deeper networks (datacenter, management, vault) requires relay chaining, which is not possible through the MCP. This is the primary blocker to completing the range objective. ### Error Messages -- "peer not found: nonexistent" — clear message, but wrapped as MCP internal_error (-32603) instead of invalid_params (-32602). All daemon `ErrorResponse` messages are mapped to internal_error regardless of cause. +Good signal-to-noise. `listen` and `connect` error messages identify the problem ("dynamic … not yet implemented") but give no workaround. A CTF player hitting these errors has no forward path surfaced by the error itself. ### Specific Issues + | Severity | Finding | Suggested Fix | |----------|---------|---------------| -| 🔴 Critical | neli 0.7 ACK parsing broken — `recv_netlink_ack` returns "Unexpected payload in ACK" for route add/remove OS operations. Kernel routes silently fail to install for manually added routes. Auto-routes may also be affected. | Fix the raw buffer parsing in `recv_netlink_ack` — the 0.7 `Buffer` payload may include the full nlmsghdr, not just the error code. Compare with neli 0.6 behavior. | -| 🟡 Minor | Peer disconnect events not captured in log ring buffer — connects show "Peer connected: ..." but disconnects produce no corresponding info-level log line | Add `info!("Peer disconnected: name={name}")` in the disconnect path | -| 🟡 Minor | MCP error codes: all daemon errors mapped to `-32603` (internal_error) regardless of cause. "peer not found" and "invalid CIDR" should be `-32602` (invalid_params) | Map `ErrorResponse` codes to appropriate MCP error types in `convert::format_response` | -| 🟡 Minor | `hint_set` / `hint_set_auto` MCP tool names don't match REPL (`role` command). Per interface parity, MCP should expose `role` not `hint` | Rename MCP tools or add `role` as the primary name | -| 🟢 Suggestion | `logs` output could include timestamps for each line (currently just `level: message`) | Prepend compact timestamp to each buffered line | +| 🔴 Critical | `connect` MCP tool is a stub — "dynamic connect not yet implemented". Blocks reverse-target scenario and any manual peer addition at runtime | Implement dynamic connect, or clearly mark the tool as not yet available in its description | +| 🔴 Critical | `listen` MCP tool is a stub — "dynamic listen not yet implemented". Blocks `udp-only` VM (needs `:53`) and any runtime listener changes | Same — implement or document clearly in tool description | +| 🔴 Critical | Multi-hop relay chaining not achievable via MCP — no way to reach datacenter/management/vault from the attacker | Expose relay configuration via MCP, or document the `vm_exec` workaround for chaining wallhack instances | +| 🟡 Minor | `disconnect` with no peer returns "not yet implemented" — but named peer disconnect works | Implement transport disconnect or return a meaningful error explaining the distinction | +| 🟡 Minor | "Role resolved: name=\ role=entry" log is misleading — displays attacker's resolved role next to peer's name, contradicting `peers` output | Fix log format to clearly separate my_role and peer_role | +| 🟡 Minor | `stats` shows 0 connections/flows even after confirmed traffic flows through tunnel | Fix stats tracking to count TUN flows and connections | +| 🟡 Minor | `hint_set` / `hint_set_auto` return "OK" with no state reflection | Return current hint state in the response, e.g. "role hint set: entry=fixed" | +| 🟡 Minor | Exit peer (web-filter) did not auto-reconnect after being forcibly disconnected by the entry | Implement reconnect backoff on exit peers, or document expected behaviour clearly | +| 🟢 Suggestion | `wallhack --help` has no examples — a CTF player from Ligolo-ng muscle memory would benefit from a one-liner showing the connect/listen/relay workflow | Add examples section to help output | +| 🟢 Suggestion | `info` output shows `capabilities: tun=true listen=true connect=false` — `connect=false` on the entry node is counterintuitive (it's the one initiating connections) | Clarify capabilities semantics in docs or rename to reflect actual meaning | + +--- ## Cross-Cutting Observations -- **Hot-patching workflow works but is manual** — kill old process, cp binary, restart with correct flags. The initrd binary is v0.11.0 while we're testing v0.13.0. A `pontoon build` + `range_up` cycle would be cleaner but slower. -- **CLI changed between initrd (subcommand: `wallhack entry`) and current (flag: `wallhack --role entry`)** — the clusterfuck merge changed the CLI surface. Help text says "Use a subcommand" but they're actually flags now. This will confuse users who read old docs. -- **The logs feature delivered exactly what was needed** — during this session, `logs` was the primary diagnostic tool for understanding peer connections, route installation, and error conditions. Without it, debugging would have required SSH + reading stderr. - -## Session Transcript (Condensed) - -▶ Brought range up (27 VMs) - → All VMs ready. Wallhack daemon auto-started on attacker (entry) and gateway-perimeter (exit). - -▶ `wallhack info` via MCP - → Connected to attacker daemon (v0.11.0 — stale initrd binary) - -▶ `wallhack logs` on stale binary - → "empty request" — logs feature doesn't exist in v0.11.0. Expected. - -▶ Hot-patched attacker and gateway-perimeter with v0.13.0 binary - → Restarted both daemons with `--role entry/exit` flags +- **Pontoon and Wallhack complement each other well for the first hop.** `range_up` → auto-peer-connect → auto-routes is a smooth first experience. Reaching the office network required zero manual configuration on the attacker side. This is the best part of the current experience. +- **The range topology is well-constructed** — realistic with gateways, masquerading, and deny rules. `deny_cp` and `deny_root` enforcement is solid. The loot → credentials → deeper chain is exactly how a real CTF progression works. +- **The MCP's effective coverage ends at the office network.** Everything past 10.99.2.0/24 requires capabilities (dynamic connect/listen, relay config) that don't yet exist in the MCP. If a CTF player is expected to reach gold, the relay workflow must be MCP-accessible. +- **Log visibility gap**: Pontoon can only see kernel serial output via `vm_logs`; Wallhack MCP can only see the attacker daemon's ring buffer via `logs`. There is no MCP path to view VM-side wallhack logs for peer nodes. This is a debugging gap that will frustrate both players and developers. -▶ `wallhack logs` on v0.13.0 - → Returns startup lines, peer connect events, route installation. Feature works. +--- -▶ Curled intranet (10.99.2.80) from attacker through tunnel - → Page returned with DB creds. Full tunnel chain working. - -▶ `route_add` with nonexistent peer - → "peer not found: nonexistent" — clear message, wrong MCP error code (-32603 vs -32602) - -▶ `route_add` for CIDR not advertised by peer - → "OK" with warning about unreachable destination. Excellent UX. - → Bug: OS route operations fail with "Unexpected payload in ACK" (neli 0.7 parsing) - -▶ Disconnected web-filter peer - → Disconnect succeeded, peer removed from list, but no log line captured for the event +## Session Transcript (Condensed) -▶ Tunnel still functional after disconnect — office network still reachable +``` +▶ wallhack info (MCP) before range_up + → Error: "cannot connect to daemon via vsock 3:4434: No such device" + 💬 Clear error. I know the range isn't running. Good. + +▶ range_up + → 27 VMs launched and ready. PID list, no names. + 💬 "launched (pid=3758486)" tells me nothing about which VM that is. + +▶ wallhack info (MCP) after range_up + → name=attacker role=entry listen=[::]:443 caps=tun/listen version=0.14.0+508a7da uptime=6s + 💬 Clean. Answered all my first questions. Version with build metadata is excellent. + +▶ wallhack --help (on attacker VM) + → Full flag list. No examples. --prefer-role / --exclude-role / --role flags listed. + 💬 As a Ligolo user I'd expect "agent" and "proxy" — "entry", "exit", "relay" map okay though. + No examples is a miss for first-timers. + +▶ wallhack peers + → gateway-perimeter (role=exit, connected, 0.6ms), web-filter (role=exit, connected, 0.5ms) + 💬 Two auto-connections already. Nice. But logs say "Role resolved: name=gateway-perimeter role=entry" + — that's the opposite of what peers shows. Bug. + +▶ wallhack routes + → 10.99.1.0/24 (auto) and 10.99.2.0/24 (auto) via gateway-perimeter + 💬 Auto-routing is excellent. "(auto)" label is a great UX touch. + +▶ curl http://10.99.2.80/ from attacker VM through tunnel + → "Acme Corp Internal Portal" — works! + 💬 Zero config required to reach the internal network. Very smooth. + +▶ wallhack stats + → bytes in: 659, bytes out: 242, connections: 0, flows: 0 + 💬 Traffic clearly flowed but connections=0, flows=0. Stats are broken or not counting TUN traffic. + +▶ wallhack route_add 10.99.3.0/24 via gateway-perimeter + → OK + warning: "peer gateway-perimeter does not advertise a route covering 10.99.3.0/24" + 💬 Warning is perfect. I immediately know this won't work and why. Best error in the session. + +▶ ping 10.99.3.20 from attacker (with manual route) + → Network unreachable + 💬 Confirmed. gateway-perimeter only knows 10.99.2.0/24. Dead end with no relay chain. + +▶ wallhack listen 0.0.0.0:53 (to let udp-only VM connect) + → Error: "dynamic listen not yet implemented — specify --listen at startup" + 💬 udp-only is waiting to connect to :53. Can't open a new listener. Completely stuck. + +▶ wallhack connect 10.99.9.99:4433 (testing error quality) + → Error: "dynamic connect not yet implemented — specify --connect at startup" + 💬 Same error format as listen — at least consistent. But both are stubs. Two tools, zero utility. + +▶ disconnect web-filter + → OK. Log: "Peer disconnected: web-filter (via API)" + 💬 Clean. "(via API)" attribution in logs is a nice touch. + → Checked 90s later — web-filter never reconnected + 💬 Exit peers should retry. No auto-reconnect is surprising and frustrating. + +▶ disconnect (no peer — testing transport-level disconnect) + → Error: "dynamic disconnect not yet implemented" + 💬 Named peer disconnect works but transport disconnect doesn't. Inconsistent partial implementation. + +▶ hint_set role=entry level=fixed + → OK + 💬 No state feedback. Did it do anything? Can't tell. + +▶ hint_set_auto + → OK + 💬 Same — just "OK". No indication of restored state. +```