Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions src/mctpd.c
Original file line number Diff line number Diff line change
Expand Up @@ -1423,6 +1423,12 @@ static int add_peer(ctx *ctx, const dest_phys *dest, mctp_eid_t eid,
memset(&ctx->peers[ctx->size_peers], 0x0,
sizeof(*ctx->peers) * (new_size - ctx->size_peers));
ctx->size_peers = new_size;
// Update user data for recovery tasks
for (size_t ridx = 0; ridx < ctx->size_peers; ridx++) {
peer = &ctx->peers[ridx];
if (sd_event_source_get_enabled(peer->recovery.source, NULL) > 0)
sd_event_source_set_userdata(peer->recovery.source, peer);
}
}

// Populate it
Expand Down Expand Up @@ -2564,7 +2570,6 @@ static int method_endpoint_remove(sd_bus_message *call, void *data,
static int
peer_endpoint_recover(sd_event_source *s, uint64_t usec, void *userdata)
{
int ev_state __attribute__((unused));
peer *peer = userdata;
ctx *ctx = peer->ctx;
char *peer_path;
Expand Down Expand Up @@ -2624,7 +2629,7 @@ peer_endpoint_recover(sd_event_source *s, uint64_t usec, void *userdata)
/* It's not known to be the same device, allocate a new EID */
dest_phys phys = peer->phys;

assert(sd_event_source_get_enabled(peer->recovery.source, &ev_state) == 0);
assert(sd_event_source_get_enabled(peer->recovery.source, NULL) == 0);
remove_peer(peer);
/*
* The representation of the old peer is now gone. Set up the new peer,
Expand Down Expand Up @@ -2662,7 +2667,7 @@ peer_endpoint_recover(sd_event_source *s, uint64_t usec, void *userdata)
goto reschedule;
}

assert(sd_event_source_get_enabled(peer->recovery.source, &ev_state) == 0);
assert(sd_event_source_get_enabled(peer->recovery.source, NULL) == 0);
sd_event_source_unref(peer->recovery.source);
peer->recovery.delay = 0;
peer->recovery.source = NULL;
Expand All @@ -2681,7 +2686,7 @@ peer_endpoint_recover(sd_event_source *s, uint64_t usec, void *userdata)
if (rc < 0) {
reclaim:
/* Recovery unsuccessful, clean up the peer */
assert(sd_event_source_get_enabled(peer->recovery.source, &ev_state) == 0);
assert(sd_event_source_get_enabled(peer->recovery.source, NULL) == 0);
remove_peer(peer);
}
return rc < 0 ? rc : 0;
Expand Down
65 changes: 65 additions & 0 deletions tests/test_mctpd.py
Original file line number Diff line number Diff line change
Expand Up @@ -483,3 +483,68 @@ async def test_network_local_eids_none(dbus, mctpd):
eids = list(await net.get_local_eids())

assert eids == []

async def test_concurrent_recovery_setup(dbus, mctpd):
iface = mctpd.system.interfaces[0]
mctp_i = await mctpd_mctp_iface_obj(dbus, iface)

# mctpd context tracks 20 peer objects by default, add and set up 19 so we
# reach the allocation boundary.
split = 19
for i in range(split):
pep = Endpoint(iface, bytes([0x1e + i]))
mctpd.network.add_endpoint(pep)
(_, _, path, _) = await mctp_i.call_setup_endpoint(pep.lladdr)

# Grab the DBus path for an endpoint that we will cause to be removed from
# the network through the recovery path. Arbitrarily use the most recent
# one added
ep = await dbus.get_proxy_object(MCTPD_C, path)
ep_props = await ep.get_interface(DBUS_PROPERTIES_I)

# Set up a match for Connectivity transitioning to Degraded on the endpoint
# for which we request recovery
degraded = trio.Semaphore(initial_value = 0)
def ep_connectivity_changed(iface, changed, invalidated):
if iface == MCTPD_ENDPOINT_I and 'Connectivity' in changed:
if 'Degraded' == changed['Connectivity'].value:
degraded.release()
await ep_props.on_properties_changed(ep_connectivity_changed)

# Set up a match for the recovery endpoint object being removed from DBus
mctp_p = await dbus.get_proxy_object(MCTPD_C, MCTPD_MCTP_P)
mctp_objmgr = await mctp_p.get_interface(DBUS_OBJECT_MANAGER_I)
removed = trio.Semaphore(initial_value = 0)
def ep_removed(ep_path, interfaces):
if ep_path == path and MCTPD_ENDPOINT_I in interfaces:
removed.release()

await mctp_objmgr.on_interfaces_removed(ep_removed)

# Delete the endpoint from the network so its recovery will fail after
# timeout. Note we delete at the split index as the network was already
# populated with the default endpoint
del mctpd.network.endpoints[split]

# Begin recovery for the endpoint ...
ep_ep = await ep.get_interface(MCTPD_ENDPOINT_I)
await ep_ep.call_recover()

# ... and wait until we're notified the recovery process has begun
with trio.move_on_after(1) as expected:
await degraded.acquire()
assert not expected.cancelled_caught

# Now that we're asynchronously waiting for the endpoint recovery process
# to complete, force a realloc() of the peer object array by adding a new
# peer, which will invalidate the recovering peer's pointer
pep = Endpoint(iface, bytes([0x1e + split]))
mctpd.network.add_endpoint(pep)
(_, _, _, new) = await mctp_i.call_setup_endpoint(pep.lladdr)
assert new

# Verify the recovery process completed gracefully with removal of the
# endpoint's DBus object
with trio.move_on_after(2 * MCTPD_TRECLAIM) as expected:
await removed.acquire()
assert not expected.cancelled_caught