Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 96 additions & 7 deletions drivers/libusb1.c
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,14 @@
#include "usb-common.h"
#include "nut_libusb.h"
#include "nut_stdint.h"
#include "nut_bool.h"

#ifndef WIN32
# include <signal.h> /* sigaction(), SIGALRM for the atexit watchdog */
#endif

#define USB_DRIVER_NAME "USB communication driver (libusb 1.0)"
#define USB_DRIVER_VERSION "0.53"
#define USB_DRIVER_VERSION "0.54"

/* driver description structure */
upsdrv_info_t comm_upsdrv_info = {
Expand All @@ -55,6 +60,77 @@ upsdrv_info_t comm_upsdrv_info = {

#define MAX_REPORT_SIZE 0x1800

/* Explicit libusb context, initialized once on the first call to
* nut_libusb_open() and released exactly once at process shutdown via
* the atexit handler below.
*
* Background: the original code called libusb_init(NULL) on every open and
* libusb_exit(NULL) on every close, bumping/decrementing the default
* context's refcount. That pattern violates the documented contract for
* libusb_exit (which "should be called after closing all open devices and
* before your application terminates"), and on certain firmware can wedge
* indefinitely: when libusb_exit hits the 1->0 transition it tears the
* context down, which waits on its internal sync primitives for URBs to
* drain -- but URBs orphaned by libusb_reset_device or by an unexpected
* USB device disconnect never drain, so the wait never returns. That
* deadlock has been observed on the 0665:5161 Cypress USB-serial bridge
* family (Salicru SPS, Ippon, ViewPower, various Voltronic Power UPSes;
* see NUT issues #598, #993, #2453) but the underlying bug class is
* generic and reachable via any reconnect path.
*
* The fix is to use an explicit context that we own end-to-end:
* initialize it once at first open, never call libusb_exit during runtime
* (close_dev no longer does), and release it exactly once at process
* shutdown via atexit. A SIGALRM-bounded timeout on the atexit libusb_exit
* keeps a deadlock-prone teardown from wedging supervisor shutdowns
* (e.g. systemctl stop) if it ever happens at exit time. Generalizes the
* 2018 partial fix in commit 4f84b7f92 ("don't libusb_exit() when closing
* a previously opened device").
*/
static libusb_context *nut_usb_ctx = NULL;
static nut_bool_t nut_usb_ctx_initialized = false;

#ifndef WIN32
__attribute__((noreturn))
static void nut_libusb_cleanup_alarm(int sig)
{
NUT_UNUSED_VARIABLE(sig);
/* libusb_exit is taking too long, almost certainly waiting on URBs
* orphaned by a recent device reset / disconnect that will never
* drain. Force the process down so supervisor shutdown isn't blocked.
*/
_exit(0);
}
#endif /* !WIN32 */

/* Release the libusb context that nut_libusb_open() initialized.
* Bounded by a SIGALRM (POSIX) so deadlock-prone teardown can't wedge
* process shutdown indefinitely.
*/
static void nut_libusb_cleanup_atexit(void)
{
if (!nut_usb_ctx_initialized || !nut_usb_ctx) {
return;
}
#ifndef WIN32
{
struct sigaction sa, prev;
memset(&sa, 0, sizeof(sa));
sa.sa_handler = nut_libusb_cleanup_alarm;
sigemptyset(&sa.sa_mask);
sigaction(SIGALRM, &sa, &prev);
alarm(2); /* sub-ms on healthy context; 2s is generous */
libusb_exit(nut_usb_ctx);
alarm(0);
sigaction(SIGALRM, &prev, NULL);
}
#else /* WIN32 */
libusb_exit(nut_usb_ctx);
#endif /* !WIN32 */
nut_usb_ctx = NULL;
nut_usb_ctx_initialized = false;
}

static void nut_libusb_close(libusb_device_handle *udev);

/*! Add USB-related driver variables with addvar() and dstate_setinfo().
Expand Down Expand Up @@ -291,10 +367,18 @@ static int nut_libusb_open(libusb_device_handle **udevp,
usb_hid_number_opts_parsed = 1;
}

/* libusb base init */
if (libusb_init(NULL) < 0) {
libusb_exit(NULL);
fatal_with_errno(EXIT_FAILURE, "Failed to init libusb 1.0");
/* libusb base init: initialize our explicit context on the first call
* here, and register the matching one-shot teardown via atexit. See the
* comment near the file-scope declaration of nut_usb_ctx for the design
* rationale (avoids the libusb_exit-per-reconnect misuse and the
* teardown-deadlock it enables on some firmware).
*/
if (!nut_usb_ctx_initialized) {
if (libusb_init(&nut_usb_ctx) < 0) {
fatal_with_errno(EXIT_FAILURE, "Failed to init libusb 1.0");
}
nut_usb_ctx_initialized = true;
atexit(nut_libusb_cleanup_atexit);
}

/* TODO: Find a place for this, from Windows branch made for libusb0.c */
Expand All @@ -312,7 +396,7 @@ static int nut_libusb_open(libusb_device_handle **udevp,
libusb_close(*udevp);
#endif

devcount = libusb_get_device_list(NULL, &devlist);
devcount = libusb_get_device_list(nut_usb_ctx, &devlist);

/* devcount may be < 0, loop will get skipped;
* its SSIZE_MAX < SIZE_MAX for devnum */
Expand Down Expand Up @@ -1166,7 +1250,12 @@ static void nut_libusb_close(libusb_device_handle *udev)
*/
/* libusb_release_interface(udev, usb_subdriver.hid_rep_index); */
libusb_close(udev);
libusb_exit(NULL);
/* libusb_exit() is no longer called here. The libusb context is owned
* by nut_libusb_open() and released exactly once at process shutdown
* via the atexit handler; calling it per-close was a longstanding
* misuse that deadlocked on orphaned URBs after a device reset or
* disconnect (NUT #598). See the comment near nut_usb_ctx above.
*/
}

usb_communication_subdriver_t usb_subdriver = {
Expand Down
34 changes: 33 additions & 1 deletion drivers/nutdrv_qx.c
Original file line number Diff line number Diff line change
Expand Up @@ -3881,10 +3881,23 @@ void upsdrv_cleanup(void)

/* Generic command processing function: send a command and read a reply.
* Returns < 0 on error, 0 on timeout and the number of bytes read on success. */
/* Consecutive LIBUSB_ERROR_OVERFLOW results on the interrupt-IN endpoint
* tolerated before escalating from "retry on the next poll" to a USB-level
* device reset. A one-off oversized frame is transient; a sustained run means
* the bridge firmware has wedged the endpoint and only re-enumeration clears
* it (e.g. the 0665:5161 Cypress USB-serial family: Salicru SPS, Ippon,
* ViewPower, various Voltronic Power). See NUT issue #598. */
#define QX_USB_OVERFLOW_RESET_TRIES 3

static ssize_t qx_command(const char *cmd, size_t cmdlen, char *buf, size_t buflen)
{
#ifndef TESTING
ssize_t ret = -1;
# ifdef QX_USB
/* Persists across calls; only consecutive overflows accumulate (any clean
* read zeroes it, see the switch on `ret` below). */
static int overflow_tries = 0;
# endif
#endif

/* NOTE: Could not find in which ifdef-ed codepath, but clang complained
Expand Down Expand Up @@ -3918,6 +3931,7 @@ static ssize_t qx_command(const char *cmd, size_t cmdlen, char *buf, size_t bufl
ret = (*subdriver_command)(cmd, cmdlen, buf, buflen);

if (ret >= 0) {
overflow_tries = 0; /* clean read: forget any overflow streak */
return ret;
}

Expand Down Expand Up @@ -3979,8 +3993,26 @@ static ssize_t qx_command(const char *cmd, size_t cmdlen, char *buf, size_t bufl
udev = NULL;
break;

case LIBUSB_ERROR_OVERFLOW: /* Value too large for defined data type:
* an oversized interrupt-IN frame. A one-off is
* transient (retry on the next poll); a sustained
* run means the bridge firmware has wedged the
* endpoint and only a USB-level reset recovers it.
* See NUT issue #598. */
if (++overflow_tries < QX_USB_OVERFLOW_RESET_TRIES) {
upsdebugx(2, "Got OVERFLOW on EP 0x81 (%d/%d), retrying on next poll",
overflow_tries, QX_USB_OVERFLOW_RESET_TRIES);
break;
}
upsdebugx(1, "OVERFLOW on EP 0x81 persisted for %d polls; resetting device",
overflow_tries);
overflow_tries = 0;
if (usb_reset(udev) == 0) {
upsdebugx(1, "Device reset handled");
}
goto fallthrough_case_reconnect;

case LIBUSB_ERROR_TIMEOUT: /* Connection timed out */
case LIBUSB_ERROR_OVERFLOW: /* Value too large for defined data type */
#if EPROTO && WITH_LIBUSB_0_1 /* limit to libusb 0.1 implementation */
case -EPROTO: /* Protocol error */
#endif
Expand Down
Loading