From 8c02d4bb9ae674e345cee7b3a43588a39cea0c25 Mon Sep 17 00:00:00 2001 From: Marco Walz Date: Thu, 16 Apr 2026 15:30:41 +0200 Subject: [PATCH 1/2] docs: inter-canister calls security guide --- docs/guides/security/inter-canister-calls.md | 482 ++++++++++++++++++- 1 file changed, 473 insertions(+), 9 deletions(-) diff --git a/docs/guides/security/inter-canister-calls.md b/docs/guides/security/inter-canister-calls.md index 82a4fb41..671b4747 100644 --- a/docs/guides/security/inter-canister-calls.md +++ b/docs/guides/security/inter-canister-calls.md @@ -5,15 +5,479 @@ sidebar: order: 5 --- -TODO: Write content for this page. +Inter-canister calls are the most common source of security bugs on the Internet Computer. The async messaging model creates a class of vulnerabilities that do not exist in synchronous systems: state can change between an `await` and its response, traps in callbacks can skip security-critical operations, and calls to untrusted canisters can permanently block upgrades. - -Handle the security pitfalls of async inter-canister calls. Cover reentrancy attacks (CallerGuard pattern), the saga pattern for multi-step operations, callback traps and how to handle them, bounded vs unbounded wait (the 2MB response limit), and state rollback on call failure. Explain why inter-canister calls are the #1 source of canister bugs. +This guide covers the specific patterns you must apply whenever your canister makes an inter-canister call. - -- icskills: canister-security, multi-canister +## Why inter-canister calls are dangerous - -- guides/canister-calls/onchain-calls -- basic inter-canister call patterns -- guides/canister-calls/parallel-calls -- parallel call safety -- concepts/security -- security model +When your canister `await`s a call to another canister, the IC scheduler can interleave other incoming messages while your canister waits for the response. This means: + +- State your canister read before the `await` may be different when the callback runs. +- A second call from the same user can arrive and begin executing before the first call's callback completes. +- If the callback traps, any mutations made in the callback are rolled back — but mutations made before the `await` are already committed. + +The code before `await` and the code after `await` execute as **separate atomic message executions**. Understanding this is the foundation of inter-canister call security. + +## Reentrancy and the CallerGuard pattern + +A reentrancy bug occurs when a second message from the same caller interleaves with a first message that is still in progress — that is, awaiting a response. In DeFi contexts this enables double-spending: the attacker calls `withdraw()`, waits for it to begin the inter-canister transfer, then calls `withdraw()` again before the first call updates the balance. + +The CallerGuard pattern prevents this by tracking which callers have an in-flight operation. When a second call arrives from the same caller, it is rejected before it can interleave. + +### Motoko + +In Motoko, the guard must be released in a `finally` block. The `finally` block runs in cleanup context, where state changes are committed even if the `try` body trapped. If you release the guard inside the `try` body, a trap in the callback leaves the guard held forever — the caller is permanently locked out. + +```motoko +import Map "mo:core/Map"; +import Principal "mo:core/Principal"; +import Error "mo:core/Error"; +import Result "mo:core/Result"; + +// Inside your persistent actor class { ... } +// Replace otherCanister with your canister reference. + +let pendingRequests = Map.empty(); + +func acquireGuard(principal : Principal) : Result.Result<(), Text> { + if (Map.get(pendingRequests, Principal.compare, principal) != null) { + return #err("already processing a request for this caller"); + }; + Map.add(pendingRequests, Principal.compare, principal, true); + #ok +}; + +func releaseGuard(principal : Principal) { + ignore Map.delete(pendingRequests, Principal.compare, principal); +}; + +public shared ({ caller }) func withdraw(amount : Nat) : async Result.Result<(), Text> { + if (Principal.isAnonymous(caller)) { + return #err("anonymous caller not allowed"); + }; + + // Acquire per-caller lock before any state reads or async calls. + switch (acquireGuard(caller)) { + case (#err(msg)) { return #err(msg) }; + case (#ok) {}; + }; + + try { + // Read state and make the inter-canister call here. + let result = await otherCanister.transfer(caller, amount); + #ok(result) + } catch (e) { + #err("transfer failed: " # Error.message(e)) + } finally { + // Runs in cleanup context regardless of success or trap. + // State mutations here are always committed. + releaseGuard(caller); + }; +}; +``` + +### Rust + +In Rust, the `Drop` trait releases the lock when the guard goes out of scope — including when the async function is cancelled or a trap occurs. Never write `let _ = CallerGuard::new(caller)?` — the leading underscore drops the guard immediately, making locking ineffective. Always bind to a named variable: `let _guard = CallerGuard::new(caller)?`. + +```rust +use std::cell::RefCell; +use std::collections::BTreeSet; +use candid::Principal; +use ic_cdk::update; +use ic_cdk::api::msg_caller; +use ic_cdk::call::Call; + +// Replace other_canister_id() with your canister's ID lookup. + +thread_local! { + static PENDING: RefCell> = RefCell::new(BTreeSet::new()); +} + +struct CallerGuard { + principal: Principal, +} + +impl CallerGuard { + fn new(principal: Principal) -> Result { + PENDING.with(|p| { + if !p.borrow_mut().insert(principal) { + return Err("already processing a request for this caller".to_string()); + } + Ok(Self { principal }) + }) + } +} + +impl Drop for CallerGuard { + fn drop(&mut self) { + PENDING.with(|p| { + p.borrow_mut().remove(&self.principal); + }); + } +} + +#[update] +async fn withdraw(amount: u64) -> Result<(), String> { + let caller = msg_caller(); + if caller == Principal::anonymous() { + return Err("anonymous caller not allowed".to_string()); + } + + // Acquire per-caller lock. Drop releases the lock when _guard goes out of scope. + let _guard = CallerGuard::new(caller)?; + + // Make the inter-canister call while the lock is held. + Call::bounded_wait(other_canister_id(), "transfer") + .with_args((caller, amount)) + .await + .map_err(|e| format!("transfer failed: {:?}", e))?; + + Ok(()) + // _guard dropped here — lock released +} +``` + +## State mutations before and after await + +Because the code before `await` and the code after `await` are separate message executions, you must treat them independently when reasoning about consistency. + +**The critical rule:** If your canister mutates state before an `await`, that mutation is committed even if the callback traps. + +### Example: deduct before transferring + +In a token transfer flow, deduct the balance before the inter-canister call rather than after. If the call fails, refund in the callback. This approach is safe: if the callback traps, the pre-deducted balance stays deducted — you can detect and remediate the stuck state. If you deduct after the call and the callback traps, the transfer happened but the balance was never deducted — funds are double-spent. + +**Motoko:** + +```motoko +import Map "mo:core/Map"; +import Principal "mo:core/Principal"; +import Error "mo:core/Error"; +import Result "mo:core/Result"; + +// Inside your persistent actor class { ... } +let balances = Map.empty(); + +public shared ({ caller }) func transfer(to : Principal, amount : Nat) : async Result.Result<(), Text> { + // 1. Validate balance before the await. + let balance = switch (Map.get(balances, Principal.compare, caller)) { + case (?b) b; + case null 0; + }; + if (balance < amount) { + return #err("insufficient balance"); + }; + + // 2. Deduct BEFORE the await — mutation is committed regardless of callback outcome. + Map.put(balances, Principal.compare, caller, balance - amount); + + // 3. Perform the inter-canister call. + try { + await ledgerCanister.transfer(to, amount); + #ok(()) + } catch (e) { + // 4. Refund on failure — the deduction persists even if this try/catch runs. + let currentBalance = switch (Map.get(balances, Principal.compare, caller)) { + case (?b) b; + case null 0; + }; + Map.put(balances, Principal.compare, caller, currentBalance + amount); + #err("transfer failed: " # Error.message(e)) + } +}; +``` + +**Rust:** + +```rust +use std::cell::RefCell; +use std::collections::BTreeMap; +use candid::Principal; +use ic_cdk::update; +use ic_cdk::api::msg_caller; +use ic_cdk::call::Call; + +// Replace ledger_canister_id() with your canister's ID lookup. + +thread_local! { + static BALANCES: RefCell> = + RefCell::new(BTreeMap::new()); +} + +#[update] +async fn transfer(to: Principal, amount: u64) -> Result<(), String> { + let caller = msg_caller(); + + // 1. Validate and deduct BEFORE the await. + let deducted = BALANCES.with(|b| { + let mut balances = b.borrow_mut(); + let balance = balances.get(&caller).copied().unwrap_or(0); + if balance < amount { + return Err("insufficient balance".to_string()); + } + balances.insert(caller, balance - amount); + Ok(()) + })?; + let _ = deducted; + + // 2. Make the inter-canister call. + let result = Call::bounded_wait(ledger_canister_id(), "transfer") + .with_args((to, amount)) + .await; + + if let Err(e) = result { + // 3. Refund on failure. + BALANCES.with(|b| { + let mut balances = b.borrow_mut(); + let current = balances.get(&caller).copied().unwrap_or(0); + balances.insert(caller, current + amount); + }); + return Err(format!("transfer failed: {:?}", e)); + } + + Ok(()) +} +``` + +## Callback traps and security-critical cleanup + +A trap in an inter-canister call callback is particularly dangerous: the callback's state mutations are rolled back, but the pre-`await` mutations are not. A malicious callee can induce a trap in your callback to skip actions that should always run — like debiting an account. + +To protect against this: + +1. **Keep callbacks minimal.** The less logic in a callback, the fewer opportunities for a trap. +2. **Use `finally` (Motoko) or `Drop` guards (Rust) for cleanup.** Cleanup that runs in `finally` or in `drop()` executes in cleanup context where mutations persist even after a trap. +3. **Avoid calling untrusted canisters** from callbacks that perform security-critical state changes. The callee can cause your callback to trap. + +### Motoko: cleanup in finally + +```motoko +import Error "mo:core/Error"; + +// Inside your persistent actor class { ... } +// Replace otherCanister with your canister reference. + +var operationInProgress = false; + +public shared ({ caller }) func riskyOperation() : async () { + operationInProgress := true; // Committed immediately + + try { + await otherCanister.doSomething(); + // ... callback logic + } catch (e) { + // Handle error + ignore Error.message(e); + } finally { + // Runs in cleanup context — mutation persists even if callback trapped. + operationInProgress := false; + } +}; +``` + +### Rust: cleanup via Drop + +```rust +use std::cell::Cell; +use ic_cdk::update; +use ic_cdk::call::Call; + +// Replace other_canister_id() with your canister's ID lookup. + +thread_local! { + static OPERATION_IN_PROGRESS: Cell = Cell::new(false); +} + +struct OperationGuard; + +impl Drop for OperationGuard { + fn drop(&mut self) { + // Runs when the guard is dropped, even during cleanup after a trap. + OPERATION_IN_PROGRESS.with(|f| f.set(false)); + } +} + +#[update] +async fn risky_operation() -> Result<(), String> { + OPERATION_IN_PROGRESS.with(|f| f.set(true)); // Committed immediately + + // _guard released (Drop called) when this function returns or is cancelled. + let _guard = OperationGuard; + + Call::bounded_wait(other_canister_id(), "do_something") + .await + .map_err(|e| format!("call failed: {:?}", e))?; + + Ok(()) +} +``` + +## Bounded vs unbounded wait + +The IC offers two kinds of inter-canister calls: + +| | `bounded_wait` | `unbounded_wait` | +|---|---|---| +| Timeout | 300 seconds (default) | No timeout | +| If callee is unresponsive | Returns `SYS_UNKNOWN` error | Waits indefinitely | +| Upgrade safety | Canister can be stopped and upgraded after timeout | Canister **cannot be stopped** while awaiting | +| Use for | Calls to external or untrusted canisters | Calls to your own canisters you control | + +**The upgrade safety issue:** A canister cannot be stopped (and therefore cannot be upgraded) while it has outstanding unbounded-wait calls. If the callee is malicious or buggy and never responds, your canister is permanently stuck. Use `bounded_wait` for any call to a canister you do not control. + +### Motoko: bounded vs unbounded + +Motoko does not yet expose a direct API to switch between bounded and unbounded wait. The `await` keyword currently uses unbounded wait. For calls to untrusted canisters, prefer the system-level API (available via Rust) or structure your application so calls to untrusted canisters only go out from canisters you can afford to sacrifice. + + + +### Rust: choose bounded_wait for untrusted canisters + +```rust +use ic_cdk::call::Call; +use candid::Principal; + +async fn call_trusted(canister: Principal, method: &str) -> Result { + // Use unbounded_wait only for canisters you control. + Call::unbounded_wait(canister, method) + .await + .map_err(|e| format!("call failed: {:?}", e))? + .candid() + .map_err(|e| format!("decode failed: {:?}", e)) +} + +async fn call_untrusted(canister: Principal, method: &str) -> Result { + // Use bounded_wait for external or untrusted canisters. + // Default timeout is 300 seconds. Adjust with .change_timeout(seconds). + Call::bounded_wait(canister, method) + .await + .map_err(|e| format!("call failed: {:?}", e))? + .candid() + .map_err(|e| format!("decode failed: {:?}", e)) +} +``` + +## Response size limits + +All inter-canister call payloads — both requests and responses — are limited to **2 MB**. A request above 2 MB fails synchronously. A response above 2 MB causes the callee to trap. + +When reading large datasets across canisters, use pagination: return chunks of data per call rather than everything at once. Keep individual payloads under 1 MB to leave room for encoding overhead. + +```motoko +// Paginated query — avoid returning unbounded data +public query func getItems(offset : Nat, limit : Nat) : async [Item] { + // Return at most `limit` items starting from `offset`. + // Caller makes multiple calls to retrieve all data. + Array.subArray(items, offset, Nat.min(limit, items.size() - offset)) +}; +``` + +## Caller identity across await points + +In Motoko, the `caller` is captured as an immutable binding at function entry via `public shared ({ caller }) func`. This is safe across `await` points. + +In Rust, the current ic-cdk executor preserves caller across `.await` points via protected tasks, but this is an implementation detail — not a language guarantee. Bind `msg_caller()` before the first `await` as a defensive practice. + +```rust +use ic_cdk::update; +use ic_cdk::api::msg_caller; +use ic_cdk::call::Call; +use candid::Principal; + +// Replace other_canister_id() with your canister's ID lookup. + +#[update] +async fn process() -> Result<(), String> { + // Capture caller BEFORE any await — defensive practice in Rust. + let caller: Principal = msg_caller(); + + Call::bounded_wait(other_canister_id(), "validate") + .with_arg(caller) + .await + .map_err(|e| format!("validation failed: {:?}", e))?; + + // Use the captured binding, not msg_caller() again. + do_work_for(caller); + Ok(()) +} + +fn do_work_for(_caller: Principal) { + // ... +} +``` + +## canister_inspect_message is not called for inter-canister calls + +`canister_inspect_message` (Motoko: `system func inspect`) runs only for **ingress messages** — calls from external users arriving at the boundary nodes. It is never called for inter-canister calls. + +This means any access control you implement in `inspect_message` does not protect your canister from being called by another canister. Always duplicate access checks inside the method body itself. + +For full details on access control patterns, see [access management](access-management.mdx). + +## Handling rejected calls + +Inter-canister calls can be rejected for reasons beyond your control: the callee may have trapped, run out of cycles, been stopped, or the system may have rejected the message due to resource pressure. Unhandled rejections trap your canister. + +Always handle the error result of an inter-canister call. + +**Motoko:** use `try/catch`: + +```motoko +import Error "mo:core/Error"; +import Result "mo:core/Result"; + +// Inside your persistent actor class { ... } +// Replace otherCanister with your canister reference. + +public shared func callSomething() : async Result.Result { + try { + let result = await otherCanister.someMethod(); + #ok(result) + } catch (e) { + #err("call failed: " # Error.message(e)) + } +}; +``` + +**Rust:** handle the `Result` from `Call::bounded_wait`: + +```rust +use ic_cdk::update; +use ic_cdk::call::Call; +use candid::Principal; + +// Replace other_canister_id() with your canister's ID lookup. + +#[update] +async fn call_something() -> Result { + let response = Call::bounded_wait(other_canister_id(), "some_method") + .await + .map_err(|e| format!("call rejected: {:?}", e))?; + + response.candid::() + .map_err(|e| format!("decode failed: {:?}", e)) +} +``` + +## Summary checklist + +Before shipping any canister that makes inter-canister calls: + +- **Reentrancy:** Apply CallerGuard (per-caller lock) to any method that makes an inter-canister call and reads or writes shared state. +- **State ordering:** Deduct or commit before `await`; compensate on failure in the callback. +- **Cleanup:** Use `finally` (Motoko) or `Drop` (Rust) for locks and cleanup that must always run. +- **Wait type:** Use `bounded_wait` for calls to canisters you do not control; `unbounded_wait` only for your own canisters. +- **Payload size:** Keep request and response payloads under 1 MB; paginate larger datasets. +- **Caller capture:** In Rust, bind `msg_caller()` before the first `await`. +- **Access control:** Do not rely on `canister_inspect_message` for inter-canister call security — always check the caller inside the method. +- **Error handling:** Always handle the `Result` of every inter-canister call. + +## Next steps + +- [Onchain calls](../canister-calls/onchain-calls.md) — Basic inter-canister call patterns and the `Call` API +- [Parallel calls](../canister-calls/parallel-calls.md) — Running multiple calls concurrently and handling partial failures +- [Security concepts](../../concepts/security.md) — IC security model and threat landscape + + From 349ab2152c842ce90bfb7690facadb84c64e5018 Mon Sep 17 00:00:00 2001 From: Marco Walz Date: Thu, 16 Apr 2026 17:08:14 +0200 Subject: [PATCH 2/2] fix(security): address PR #79 feedback on inter-canister-calls page - Replace Map.put with Map.add (Map.put does not exist in mo:core/Map) - Replace Array.subArray with Array.sliceToArray and fix argument order - Add & reference to .with_args() calls (CDK-RS API requires &A) - Fix access-management.mdx link to use .md extension per project rules - Remove no-op let deducted / let _ = deducted binding in Rust example - Add missing import comment to paginated Motoko snippet Co-Authored-By: Claude Sonnet 4.6 --- docs/guides/security/inter-canister-calls.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/guides/security/inter-canister-calls.md b/docs/guides/security/inter-canister-calls.md index 671b4747..d70e1bc3 100644 --- a/docs/guides/security/inter-canister-calls.md +++ b/docs/guides/security/inter-canister-calls.md @@ -130,7 +130,7 @@ async fn withdraw(amount: u64) -> Result<(), String> { // Make the inter-canister call while the lock is held. Call::bounded_wait(other_canister_id(), "transfer") - .with_args((caller, amount)) + .with_args(&(caller, amount)) .await .map_err(|e| format!("transfer failed: {:?}", e))?; @@ -171,7 +171,7 @@ public shared ({ caller }) func transfer(to : Principal, amount : Nat) : async R }; // 2. Deduct BEFORE the await — mutation is committed regardless of callback outcome. - Map.put(balances, Principal.compare, caller, balance - amount); + Map.add(balances, Principal.compare, caller, balance - amount); // 3. Perform the inter-canister call. try { @@ -183,7 +183,7 @@ public shared ({ caller }) func transfer(to : Principal, amount : Nat) : async R case (?b) b; case null 0; }; - Map.put(balances, Principal.compare, caller, currentBalance + amount); + Map.add(balances, Principal.compare, caller, currentBalance + amount); #err("transfer failed: " # Error.message(e)) } }; @@ -211,7 +211,7 @@ async fn transfer(to: Principal, amount: u64) -> Result<(), String> { let caller = msg_caller(); // 1. Validate and deduct BEFORE the await. - let deducted = BALANCES.with(|b| { + BALANCES.with(|b| { let mut balances = b.borrow_mut(); let balance = balances.get(&caller).copied().unwrap_or(0); if balance < amount { @@ -220,11 +220,10 @@ async fn transfer(to: Principal, amount: u64) -> Result<(), String> { balances.insert(caller, balance - amount); Ok(()) })?; - let _ = deducted; // 2. Make the inter-canister call. let result = Call::bounded_wait(ledger_canister_id(), "transfer") - .with_args((to, amount)) + .with_args(&(to, amount)) .await; if let Err(e) = result { @@ -367,10 +366,11 @@ When reading large datasets across canisters, use pagination: return chunks of d ```motoko // Paginated query — avoid returning unbounded data +// Requires: import Array "mo:core/Array"; import Nat "mo:core/Nat"; public query func getItems(offset : Nat, limit : Nat) : async [Item] { // Return at most `limit` items starting from `offset`. // Caller makes multiple calls to retrieve all data. - Array.subArray(items, offset, Nat.min(limit, items.size() - offset)) + Array.sliceToArray(items, offset, offset + Nat.min(limit, items.size() - offset)) }; ``` @@ -414,7 +414,7 @@ fn do_work_for(_caller: Principal) { This means any access control you implement in `inspect_message` does not protect your canister from being called by another canister. Always duplicate access checks inside the method body itself. -For full details on access control patterns, see [access management](access-management.mdx). +For full details on access control patterns, see [access management](access-management.md). ## Handling rejected calls