From 249f6f572692dfc03d7e6b5ced1edcd983fe20da Mon Sep 17 00:00:00 2001 From: Ashesh Vashi Date: Wed, 20 May 2026 15:38:23 +0530 Subject: [PATCH] fix(test): stabilize bgprocess polling in backup/restore/IE/maintenance tests The shared polling helpers in: - web/pgadmin/tools/backup/tests/test_backup_utils.py - web/pgadmin/tools/import_export/tests/test_import_export_utils.py - web/pgadmin/tools/maintenance/tests/test_create_maintenance_job.py - web/pgadmin/tools/restore/tests/test_create_restore_job.py all share the same race that surfaced on macos-latest / pg16 in PR #9955's CI run: - Wait budget was 2.5s (5 iterations x 0.5s; maintenance used 5s). - The break condition was `execution_time' in the_process`, but `execution_time` is the elapsed time of a *running* bgprocess -- it is set before the wrapped pg_dump / pg_restore / psql / COPY actually finishes. The completion signal is `exit_code` becoming non-None. - So the helper could return control while the wrapped command was still running, and the next assertion -- e.g. `assert_equal(the_process['exit_code'] in [0, 1], True)` -- would fire on `None in [0, 1]`, i.e. `False != True`. Some scenarios masked the bug by listing `None` in their `expected_exit_code` set (a tell that someone noticed the polling was unreliable and worked around it by widening accepted exit codes). Scenarios that didn't include `None` were the ones that flaked. Fix all four helpers identically: - Poll for up to 60 iterations x 0.5s = 30s, generous enough for the slowest CI runner. - Break only when `the_process.get('exit_code') is not None`, the actual completion signal. - Narrow `except Exception` to `except StopIteration`, which is the only thing `next(...)` here can raise. No call-site changes needed; the helper contract (returns once the job is done; raises if the bgprocess never finished) is unchanged in spirit and strictly more reliable in practice. Verified: - pycodestyle on the four files: 0 violations. This fixes the failure observed in the macos-latest / pg16 leg of PR #9955's CI run (run 26154521710, job 76930277702), which was unrelated to that PR's lockfile-only changes. --- .../tools/backup/tests/test_backup_utils.py | 14 +++++++------- .../tests/test_import_export_utils.py | 14 +++++++------- .../tests/test_create_maintenance_job.py | 14 +++++++------- .../tools/restore/tests/test_create_restore_job.py | 14 +++++++------- 4 files changed, 28 insertions(+), 28 deletions(-) diff --git a/web/pgadmin/tools/backup/tests/test_backup_utils.py b/web/pgadmin/tools/backup/tests/test_backup_utils.py index c38e7e591a0..3f4669c2870 100644 --- a/web/pgadmin/tools/backup/tests/test_backup_utils.py +++ b/web/pgadmin/tools/backup/tests/test_backup_utils.py @@ -25,11 +25,12 @@ def create_backup_job(tester, url, params, assert_equal): def run_backup_job(tester, job_id, expected_params, assert_in, assert_not_in, assert_equal): - cnt = 0 the_process = None - while True: - if cnt >= 5: - break + # Wait up to 30s for the background pg_dump to actually finish. + # The completion signal is `exit_code` becoming non-None — NOT the + # mere presence of `execution_time`, which is set while the process + # is still running. + for _ in range(60): # Check the process list response1 = tester.get('/misc/bgprocess/?_={0}'.format( secrets.choice(range(1, 9999999)))) @@ -39,13 +40,12 @@ def run_backup_job(tester, job_id, expected_params, assert_in, assert_not_in, try: the_process = next( p for p in process_list if p['id'] == job_id) - except Exception: + except StopIteration: the_process = None - if the_process and 'execution_time' in the_process: + if the_process and the_process.get('exit_code') is not None: break time.sleep(0.5) - cnt += 1 assert_equal('execution_time' in the_process, True) assert_equal('stime' in the_process, True) diff --git a/web/pgadmin/tools/import_export/tests/test_import_export_utils.py b/web/pgadmin/tools/import_export/tests/test_import_export_utils.py index 85a0de0f798..0f90608d6b5 100644 --- a/web/pgadmin/tools/import_export/tests/test_import_export_utils.py +++ b/web/pgadmin/tools/import_export/tests/test_import_export_utils.py @@ -38,11 +38,12 @@ def create_import_export_job(tester, url, params, assert_equal): def run_import_export_job(tester, job_id, expected_params, assert_in, assert_not_in, assert_equal): - cnt = 0 the_process = None - while True: - if cnt >= 5: - break + # Wait up to 30s for the background psql/COPY job to actually finish. + # The completion signal is `exit_code` becoming non-None — NOT the + # mere presence of `execution_time`, which is set while the process + # is still running. + for _ in range(60): # Check the process list response1 = tester.get('/misc/bgprocess/?_={0}'.format( secrets.choice(range(1, 9999999)))) @@ -52,13 +53,12 @@ def run_import_export_job(tester, job_id, expected_params, assert_in, try: the_process = next( p for p in process_list if p['id'] == job_id) - except Exception: + except StopIteration: the_process = None - if the_process and 'execution_time' in the_process: + if the_process and the_process.get('exit_code') is not None: break time.sleep(0.5) - cnt += 1 assert_equal('execution_time' in the_process, True) assert_equal('stime' in the_process, True) diff --git a/web/pgadmin/tools/maintenance/tests/test_create_maintenance_job.py b/web/pgadmin/tools/maintenance/tests/test_create_maintenance_job.py index 5622e0a1f95..55f0b850d24 100644 --- a/web/pgadmin/tools/maintenance/tests/test_create_maintenance_job.py +++ b/web/pgadmin/tools/maintenance/tests/test_create_maintenance_job.py @@ -73,11 +73,12 @@ def runTest(self): response_data = json.loads(response.data.decode('utf-8')) job_id = response_data['data']['job_id'] - cnt = 0 the_process = None - while True: - if cnt >= 10: - break + # Wait up to 30s for the background maintenance command to + # actually finish. The completion signal is `exit_code` becoming + # non-None — NOT the mere presence of `execution_time`, which is + # set while the process is still running. + for _ in range(60): # Check the process list response1 = self.tester.get('/misc/bgprocess/?_={0}'.format( secrets.choice(range(1, 9999999)))) @@ -87,13 +88,12 @@ def runTest(self): try: the_process = next( p for p in process_list if p['id'] == job_id) - except Exception: + except StopIteration: the_process = None - if the_process and 'execution_time' in the_process: + if the_process and the_process.get('exit_code') is not None: break time.sleep(0.5) - cnt += 1 self.assertTrue('execution_time' in the_process) self.assertTrue('stime' in the_process) diff --git a/web/pgadmin/tools/restore/tests/test_create_restore_job.py b/web/pgadmin/tools/restore/tests/test_create_restore_job.py index 0a7d22b1077..d971cedf7bd 100644 --- a/web/pgadmin/tools/restore/tests/test_create_restore_job.py +++ b/web/pgadmin/tools/restore/tests/test_create_restore_job.py @@ -165,11 +165,12 @@ def runTest(self): response_data = json.loads(response.data.decode('utf-8')) job_id = response_data['data']['job_id'] - cnt = 0 the_process = None - while True: - if cnt >= 5: - break + # Wait up to 30s for the background pg_restore to actually + # finish. The completion signal is `exit_code` becoming non-None + # — NOT the mere presence of `execution_time`, which is set + # while the process is still running. + for _ in range(60): # Check the process list response1 = self.tester.get('/misc/bgprocess/?_={0}'.format( secrets.choice(range(1, 9999999)))) @@ -179,13 +180,12 @@ def runTest(self): try: the_process = next( p for p in process_list if p['id'] == job_id) - except Exception: + except StopIteration: the_process = None - if the_process and 'execution_time' in the_process: + if the_process and the_process.get('exit_code') is not None: break time.sleep(0.5) - cnt += 1 self.assertTrue('execution_time' in the_process) self.assertTrue('stime' in the_process)