From 594ca189bcf59cd481a14ef08e6e784b97647453 Mon Sep 17 00:00:00 2001 From: Andrew Jeffery Date: Tue, 15 Apr 2025 10:17:46 +0930 Subject: [PATCH 1/2] mctpd: Clean up unused ev_state From at least systemd 252, sd_event_source_get_enabled() is documented as allowing NULL for the event parameter. We weren't using the output, so drop the variable. Signed-off-by: Andrew Jeffery --- src/mctpd.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/mctpd.c b/src/mctpd.c index e9d5ccdd..f6c4b17a 100644 --- a/src/mctpd.c +++ b/src/mctpd.c @@ -2564,7 +2564,6 @@ static int method_endpoint_remove(sd_bus_message *call, void *data, static int peer_endpoint_recover(sd_event_source *s, uint64_t usec, void *userdata) { - int ev_state __attribute__((unused)); peer *peer = userdata; ctx *ctx = peer->ctx; char *peer_path; @@ -2624,7 +2623,7 @@ peer_endpoint_recover(sd_event_source *s, uint64_t usec, void *userdata) /* It's not known to be the same device, allocate a new EID */ dest_phys phys = peer->phys; - assert(sd_event_source_get_enabled(peer->recovery.source, &ev_state) == 0); + assert(sd_event_source_get_enabled(peer->recovery.source, NULL) == 0); remove_peer(peer); /* * The representation of the old peer is now gone. Set up the new peer, @@ -2662,7 +2661,7 @@ peer_endpoint_recover(sd_event_source *s, uint64_t usec, void *userdata) goto reschedule; } - assert(sd_event_source_get_enabled(peer->recovery.source, &ev_state) == 0); + assert(sd_event_source_get_enabled(peer->recovery.source, NULL) == 0); sd_event_source_unref(peer->recovery.source); peer->recovery.delay = 0; peer->recovery.source = NULL; @@ -2681,7 +2680,7 @@ peer_endpoint_recover(sd_event_source *s, uint64_t usec, void *userdata) if (rc < 0) { reclaim: /* Recovery unsuccessful, clean up the peer */ - assert(sd_event_source_get_enabled(peer->recovery.source, &ev_state) == 0); + assert(sd_event_source_get_enabled(peer->recovery.source, NULL) == 0); remove_peer(peer); } return rc < 0 ? rc : 0; From 498f73e85beaeb235a49aa86407af0ef60cca6ec Mon Sep 17 00:00:00 2001 From: Andrew Jeffery Date: Tue, 15 Apr 2025 10:04:38 +0930 Subject: [PATCH 2/2] mctpd: add_peer: Update userdata pointer for active event sources Mitigate the issue reported via [1] and highlighted by ASAN: ==179005==ERROR: AddressSanitizer: heap-use-after-free on address 0x61f000000c38 at pc 0x55dfaa7fa308 bp 0x7ffe10264420 sp 0x7ffe10264418 READ of size 8 at 0x61f000000c38 thread T0 0 0x55dfaa7fa307 in peer_endpoint_recover ../src/mctpd.c:2570 1 0x7f9a43dadae3 (/lib/x86_64-linux-gnu/libsystemd.so.0+0x78ae3) 2 0x7f9a43dade04 in sd_event_dispatch (/lib/x86_64-linux-gnu/libsystemd.so.0+0x78e04) 3 0x7f9a43daf2e7 in sd_event_run (/lib/x86_64-linux-gnu/libsystemd.so.0+0x7a2e7) 4 0x7f9a43daf506 in sd_event_loop (/lib/x86_64-linux-gnu/libsystemd.so.0+0x7a506) 5 0x55dfaa80a609 in main ../src/mctpd.c:4547 6 0x7f9a42c46249 in __libc_start_call_main ../sysdeps/nptl/libc_start_call_main.h:58 7 0x7f9a42c46304 in __libc_start_main_impl ../csu/libc-start.c:360 8 0x55dfaa7e38d0 in _start (mctp/build/test-mctpd+0x688d0) 0x61f000000c38 is located 3000 bytes inside of 3040-byte region [0x61f000000080,0x61f000000c60) freed by thread T0 here: 0 0x7f9a436b78d5 in __interceptor_realloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:85 1 0x55dfaa7ef028 in add_peer ../src/mctpd.c:1419 2 0x55dfaa7f1587 in endpoint_assign_eid ../src/mctpd.c:1601 3 0x55dfaa7f55a0 in method_setup_endpoint ../src/mctpd.c:2038 4 0x7f9a43d650ad (/lib/x86_64-linux-gnu/libsystemd.so.0+0x300ad) previously allocated by thread T0 here: 0 0x7f9a436b78d5 in __interceptor_realloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:85 1 0x55dfaa7ef028 in add_peer ../src/mctpd.c:1419 2 0x55dfaa805741 in add_local_eid ../src/mctpd.c:4052 3 0x55dfaa80627f in add_interface_local ../src/mctpd.c:4114 4 0x55dfaa806ffa in setup_nets ../src/mctpd.c:4200 5 0x55dfaa80a380 in main ../src/mctpd.c:4525 6 0x7f9a42c46249 in __libc_start_call_main ../sysdeps/nptl/libc_start_call_main.h:58 SUMMARY: AddressSanitizer: heap-use-after-free ../src/mctpd.c:2570 in peer_endpoint_recover Shadow bytes around the buggy address: 0x0c3e7fff8130: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd 0x0c3e7fff8140: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd 0x0c3e7fff8150: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd 0x0c3e7fff8160: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd 0x0c3e7fff8170: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd =>0x0c3e7fff8180: fd fd fd fd fd fd fd[fd]fd fd fd fd fa fa fa fa 0x0c3e7fff8190: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 0x0c3e7fff81a0: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 0x0c3e7fff81b0: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 0x0c3e7fff81c0: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 0x0c3e7fff81d0: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa Shadow byte legend (one shadow byte represents 8 application bytes): Addressable: 00 Partially addressable: 01 02 03 04 05 06 07 Heap left redzone: fa Freed heap region: fd Stack left redzone: f1 Stack mid redzone: f2 Stack right redzone: f3 Stack after return: f5 Stack use after scope: f8 Global redzone: f9 Global init order: f6 Poisoned by user: f7 Container overflow: fc Array cookie: ac Intra object redzone: bb ASan internal: fe Left alloca redzone: ca Right alloca redzone: cb ==179005==ABORTING Link: https://github.com/codeconstruct/mctp/issues/69 [1] Fixes: 7ec2f8daa3a8 ("mctpd: Add support for endpoint recovery") Signed-off-by: Andrew Jeffery --- src/mctpd.c | 6 +++++ tests/test_mctpd.py | 65 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+) diff --git a/src/mctpd.c b/src/mctpd.c index f6c4b17a..46c8dca9 100644 --- a/src/mctpd.c +++ b/src/mctpd.c @@ -1423,6 +1423,12 @@ static int add_peer(ctx *ctx, const dest_phys *dest, mctp_eid_t eid, memset(&ctx->peers[ctx->size_peers], 0x0, sizeof(*ctx->peers) * (new_size - ctx->size_peers)); ctx->size_peers = new_size; + // Update user data for recovery tasks + for (size_t ridx = 0; ridx < ctx->size_peers; ridx++) { + peer = &ctx->peers[ridx]; + if (sd_event_source_get_enabled(peer->recovery.source, NULL) > 0) + sd_event_source_set_userdata(peer->recovery.source, peer); + } } // Populate it diff --git a/tests/test_mctpd.py b/tests/test_mctpd.py index 673bc12a..2ee7b2ae 100644 --- a/tests/test_mctpd.py +++ b/tests/test_mctpd.py @@ -483,3 +483,68 @@ async def test_network_local_eids_none(dbus, mctpd): eids = list(await net.get_local_eids()) assert eids == [] + +async def test_concurrent_recovery_setup(dbus, mctpd): + iface = mctpd.system.interfaces[0] + mctp_i = await mctpd_mctp_iface_obj(dbus, iface) + + # mctpd context tracks 20 peer objects by default, add and set up 19 so we + # reach the allocation boundary. + split = 19 + for i in range(split): + pep = Endpoint(iface, bytes([0x1e + i])) + mctpd.network.add_endpoint(pep) + (_, _, path, _) = await mctp_i.call_setup_endpoint(pep.lladdr) + + # Grab the DBus path for an endpoint that we will cause to be removed from + # the network through the recovery path. Arbitrarily use the most recent + # one added + ep = await dbus.get_proxy_object(MCTPD_C, path) + ep_props = await ep.get_interface(DBUS_PROPERTIES_I) + + # Set up a match for Connectivity transitioning to Degraded on the endpoint + # for which we request recovery + degraded = trio.Semaphore(initial_value = 0) + def ep_connectivity_changed(iface, changed, invalidated): + if iface == MCTPD_ENDPOINT_I and 'Connectivity' in changed: + if 'Degraded' == changed['Connectivity'].value: + degraded.release() + await ep_props.on_properties_changed(ep_connectivity_changed) + + # Set up a match for the recovery endpoint object being removed from DBus + mctp_p = await dbus.get_proxy_object(MCTPD_C, MCTPD_MCTP_P) + mctp_objmgr = await mctp_p.get_interface(DBUS_OBJECT_MANAGER_I) + removed = trio.Semaphore(initial_value = 0) + def ep_removed(ep_path, interfaces): + if ep_path == path and MCTPD_ENDPOINT_I in interfaces: + removed.release() + + await mctp_objmgr.on_interfaces_removed(ep_removed) + + # Delete the endpoint from the network so its recovery will fail after + # timeout. Note we delete at the split index as the network was already + # populated with the default endpoint + del mctpd.network.endpoints[split] + + # Begin recovery for the endpoint ... + ep_ep = await ep.get_interface(MCTPD_ENDPOINT_I) + await ep_ep.call_recover() + + # ... and wait until we're notified the recovery process has begun + with trio.move_on_after(1) as expected: + await degraded.acquire() + assert not expected.cancelled_caught + + # Now that we're asynchronously waiting for the endpoint recovery process + # to complete, force a realloc() of the peer object array by adding a new + # peer, which will invalidate the recovering peer's pointer + pep = Endpoint(iface, bytes([0x1e + split])) + mctpd.network.add_endpoint(pep) + (_, _, _, new) = await mctp_i.call_setup_endpoint(pep.lladdr) + assert new + + # Verify the recovery process completed gracefully with removal of the + # endpoint's DBus object + with trio.move_on_after(2 * MCTPD_TRECLAIM) as expected: + await removed.acquire() + assert not expected.cancelled_caught