From 77b589be11e2665f7c41ff0598041b8a24e8c948 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C4=81ris=20Narti=C5=A1s?= Date: Tue, 2 Sep 2025 16:20:56 +0300 Subject: [PATCH 1/2] mapcache_seed: Refactor seeder for robust multiprocessing and shutdown This commit is a major overhaul of the mapcache_seed utility, addressing critical bugs in signal handling, multiprocessing, and thread synchronization that could cause deadlocks or unresponsive behavior. The key changes are: 1. Robust Signal Handling and Shutdown: The Ctrl+C (SIGINT) handling has been completely rewritten to ensure a prompt and clean shutdown in all modes, fixing numerous deadlocks and race conditions. - For Multiprocessing: - pop_queue() now checks a sig_int_received flag at the start to ensure child processes exit immediately after finishing their current task. - push_queue()'s EINTR retry loop now also checks the signal flag, preventing the parent process from deadlocking on a full message queue after a signal has been received. - For Multithreading: - On SIGINT, the feed_worker now calls apr_queue_interrupt_all() to wake all blocked threads. pop_queue() correctly handles the resulting APR_EOF to stop the worker. - A race condition where a thread could start a new job just as a signal was received has been fixed by re-checking the signal flag after a work item is successfully popped from the queue. - Graceful vs. Urgent Shutdown: - The final loop in feed_worker() that sends STOP commands is now wrapped in an if(!sig_int_received) block. This ensures it only runs during a normal, graceful shutdown and is skipped on Ctrl+C, preventing a major deadlock. 2. Multiprocessing Enhancements: The -p mode has been significantly improved for stability and correctness. - IPC-based Logging: A new, dedicated System V message queue (log_msqid) has been implemented for logging in multiprocessing mode. This decouples logging from the main work queue, improving performance and stability. log_thread_fn and seed_worker were updated to use this new mechanism. - `-p 1` Bugfix: A long-standing bug that caused the seeder to stall when using -p 1 has been fixed by changing all relevant logic guards from nprocesses > 1 to nprocesses >= 1. - IPC Creation: Message queues are now created using IPC_PRIVATE instead of ftok, which is more robust. 3. Code Quality and Minor Fixes: - child_init is correctly called at start of each worker process after fork(). - sig_int_received is now correctly typed as volatile sig_atomic_t. - memset() is now used to initialize structs before use. - The help text for the -p option has been clarified. Co-authored with gemini-2.5-pro --- util/mapcache_seed.c | 517 ++++++++++++++++++++++++++++--------------- 1 file changed, 334 insertions(+), 183 deletions(-) diff --git a/util/mapcache_seed.c b/util/mapcache_seed.c index ab9da848..01fe8e45 100644 --- a/util/mapcache_seed.c +++ b/util/mapcache_seed.c @@ -46,7 +46,7 @@ #include #ifdef USE_FORK -int msqid; +int msqid, log_msqid; #include #include #include @@ -81,7 +81,7 @@ int quiet = 0; int verbose = 0; int force = 0; int non_interactive = 0; -int sig_int_received = 0; +volatile sig_atomic_t sig_int_received = 0; int error_detected = 0; double percent_failed_allowed = 1.0; int n_metatiles_tot = 0; @@ -138,6 +138,16 @@ struct msg_cmd { long mtype; struct seed_cmd cmd; }; + +#define MAX_ERR_MSG_SIZE 512 +struct ipc_log_status { + long mtype; + s_status status; + int x,y,z; + int nodata; + int skipped; + char msg[MAX_ERR_MSG_SIZE]; +}; #endif cmd mode = MAPCACHE_CMD_SEED; /* the mode the utility will be running in: either seed or delete */ @@ -148,11 +158,15 @@ int push_queue(struct seed_cmd cmd) int retries=0; int ret; #ifdef USE_FORK - if(nprocesses > 1) { + if(nprocesses >= 1) { struct msg_cmd mcmd; + memset(&mcmd, 0, sizeof(struct msg_cmd)); mcmd.mtype = 1; mcmd.cmd = cmd; - if (msgsnd(msqid, &mcmd, sizeof(struct seed_cmd), 0) == -1) { + do { + ret = msgsnd(msqid, &mcmd, sizeof(struct seed_cmd), 0); + } while (ret == -1 && errno == EINTR && !sig_int_received); + if (ret == -1) { printf("failed to push tile %d %d %d\n",cmd.z,cmd.y,cmd.x); return APR_EGENERAL; } @@ -179,27 +193,50 @@ int pop_queue(struct seed_cmd *cmd) struct seed_cmd *pcmd; #ifdef USE_FORK - if(nprocesses > 1) { + if(nprocesses >= 1) { struct msg_cmd mcmd; - if (msgrcv(msqid, &mcmd, sizeof(struct seed_cmd), 1, 0) == -1) { - printf("failed to pop tile\n"); - return APR_EGENERAL; + + // if there is a sig_int, then it is useless to wait for messages. + if(sig_int_received) { + cmd->command = MAPCACHE_CMD_STOP; + return APR_SUCCESS; + } + + do { + ret = msgrcv(msqid, &mcmd, sizeof(struct seed_cmd), 1, 0); + } while (ret == -1 && errno == EINTR && !sig_int_received); + + if (ret == -1) { + cmd->command = MAPCACHE_CMD_STOP; + return APR_SUCCESS; } *cmd = mcmd.cmd; return APR_SUCCESS; } #endif - ret = apr_queue_pop(work_queue, (void**)&pcmd); - while(ret == APR_EINTR && retries<10) { + do { retries++; ret = apr_queue_pop(work_queue, (void**)&pcmd); + } while(ret == APR_EINTR && retries < 10 && !sig_int_received); + + /* + * Check the flag *after* the pop. This closes the race condition. + * If the pop succeeded but a signal has been received, we discard the + * item and stop. If the pop failed, we also stop. + */ + if(ret != APR_SUCCESS || sig_int_received) { + if(ret == APR_SUCCESS) { + // We popped an item but must now discard it. + free(pcmd); + } + cmd->command = MAPCACHE_CMD_STOP; + return APR_SUCCESS; } - if(ret == APR_SUCCESS) { - *cmd = *pcmd; - free(pcmd); - } - return ret; + + *cmd = *pcmd; + free(pcmd); + return APR_SUCCESS; } int trypop_queue(struct seed_cmd *cmd) @@ -208,11 +245,11 @@ int trypop_queue(struct seed_cmd *cmd) struct seed_cmd *pcmd; #ifdef USE_FORK - if(nprocesses>1) { + if(nprocesses >= 1) { struct msg_cmd mcmd; ret = msgrcv(msqid, &mcmd, sizeof(struct seed_cmd), 1, IPC_NOWAIT); if(errno == ENOMSG) return APR_EAGAIN; - if(ret>0) { + if(ret > 0) { *cmd = mcmd.cmd; return APR_SUCCESS; } else { @@ -258,7 +295,7 @@ static const apr_getopt_option_t seed_options[] = { { "metasize", 'M', TRUE, "override metatile size while seeding, eg 8,8" }, { "nthreads", 'n', TRUE, "number of parallel threads to use (incompatible with -p/--nprocesses)" }, { "older", 'o', TRUE, "reseed tiles older than supplied date (format: year/month/day hour:minute, eg: 2011/01/31 20:45" }, - { "nprocesses", 'p', TRUE, "number of parallel processes to use (incompatible with -n/--nthreads)" }, + { "nprocesses", 'p', TRUE, "number of parallel worker processes to use (incompatible with -n/--nthreads)" }, { "percent", 'P', TRUE, "percent of failed requests allowed from the last 1000 before we abort (default: 1(%), set to 0 to abort on first error)" }, { "quiet", 'q', FALSE, "don't show progress info" }, { "retry-failed", 'R', TRUE, "retry failed requests logged to [file] by --log-failed" }, @@ -488,26 +525,30 @@ void cmd_recurse(mapcache_context *cmd_ctx, mapcache_tile *tile) apr_pool_clear(cmd_ctx->pool); if(sig_int_received || error_detected) { //stop if we were asked to stop by hitting ctrl-c - //remove all items from the queue - struct seed_cmd entry; - int retry_count = 0; - int ret = trypop_queue(&entry); - while (ret != APR_EAGAIN) { - // try to empty queue with a graceful retreat up to 55 seconds - // for retries before forcefully terminating threads - if (ret == APR_EOF) - break; - if (ret != APR_SUCCESS) - retry_count++; - if (retry_count > 10) { - printf("Feed worker threads failed to terminate. Stopping forcefully.\n"); - apr_queue_interrupt_all(work_queue); - break; + if (nprocesses >= 1) { + return; + } else { + //remove all items from the queue + struct seed_cmd entry; + int retry_count = 0; + int ret = trypop_queue(&entry); + while (ret != APR_EAGAIN) { + // try to empty queue with a graceful retreat up to 55 seconds + // for retries before forcefully terminating threads + if (ret == APR_EOF) + break; + if (ret != APR_SUCCESS) + retry_count++; + if (retry_count > 10) { + printf("Feed worker threads failed to terminate. Stopping forcefully.\n"); + apr_queue_interrupt_all(work_queue); + break; + } + apr_sleep(retry_count * 1000000); + ret = trypop_queue(&entry); } - apr_sleep(retry_count * 1000000); - ret = trypop_queue(&entry); + return; } - return; } action = examine_tile(cmd_ctx, tile); @@ -515,6 +556,7 @@ void cmd_recurse(mapcache_context *cmd_ctx, mapcache_tile *tile) if(action == MAPCACHE_CMD_SEED || action == MAPCACHE_CMD_DELETE || action == MAPCACHE_CMD_TRANSFER) { //current x,y,z needs seeding, add it to the queue struct seed_cmd cmd; + memset(&cmd, 0, sizeof(struct seed_cmd)); cmd.x = tile->x; cmd.y = tile->y; cmd.z = tile->z; @@ -633,25 +675,10 @@ void feed_worker() int action; apr_pool_clear(cmd_ctx.pool); if(sig_int_received || error_detected) { //stop if we were asked to stop by hitting ctrl-c - //remove all items from the queue - struct seed_cmd entry; - int retry_count = 0; - int ret = trypop_queue(&entry); - while (ret != APR_EAGAIN) { - // try to empty queue with a graceful retreat up to 55 seconds - // for retries before forcefully terminating threads - if (ret == APR_EOF) - break; - if (ret != APR_SUCCESS) - retry_count++; - if (retry_count > 10) { - printf("Feed worker threads failed to terminate. Stopping forcefully.\n"); - apr_queue_interrupt_all(work_queue); - break; - } - apr_sleep(retry_count * 1000000); - ret = trypop_queue(&entry); + if(nthreads > 0) { + apr_queue_interrupt_all(work_queue); } + // Multiprocess case is already handled by pop_queue/push_queue break; } if(iteration_mode == MAPCACHE_ITERATION_LOG) { @@ -669,6 +696,10 @@ void feed_worker() if(action == MAPCACHE_CMD_SEED || action == MAPCACHE_CMD_DELETE || action == MAPCACHE_CMD_TRANSFER) { //current x,y,z needs seeding, add it to the queue struct seed_cmd cmd; +#ifdef USE_FORK + // zero struct to prevent sending garbage over msgsnd + memset(&cmd, 0, sizeof(struct seed_cmd)); +#endif cmd.x = x; cmd.y = y; cmd.z = z; @@ -677,19 +708,38 @@ void feed_worker() rate_limit_sleep(); push_queue(cmd); } else if (action == MAPCACHE_CMD_SKIP) { - apr_status_t ret; - struct seed_status *st = calloc(1,sizeof(struct seed_status)); - int retries=0; - st->x=tile->x; - st->y=tile->y; - st->z=tile->z; - st->nodata = 0; - st->skipped = 1; - st->status = MAPCACHE_STATUS_OK; - ret = apr_queue_push(log_queue,(void*)st); - while( ret == APR_EINTR && retries < 10) { - retries++; +#ifdef USE_FORK + if(nprocesses >= 1) { + struct ipc_log_status ipc_st; + memset(&ipc_st, 0, sizeof(struct ipc_log_status)); + ipc_st.mtype = 1; + ipc_st.status = MAPCACHE_STATUS_OK; + ipc_st.x = tile->x; + ipc_st.y = tile->y; + ipc_st.z = tile->z; + ipc_st.nodata = 0; + ipc_st.skipped = 1; + if(msgsnd(log_msqid,&ipc_st,sizeof(struct ipc_log_status)-sizeof(long),0)) { + printf("FATAL ERROR: unable to log progress to msqid, aborting\n"); + error_detected = 1; + } + } else +#endif + { + apr_status_t ret; + struct seed_status *st = calloc(1,sizeof(struct seed_status)); + int retries=0; + st->x=tile->x; + st->y=tile->y; + st->z=tile->z; + st->nodata = 0; + st->skipped = 1; + st->status = MAPCACHE_STATUS_OK; ret = apr_queue_push(log_queue,(void*)st); + while( ret == APR_EINTR && retries < 10) { + retries++; + ret = apr_queue_push(log_queue,(void*)st); + } } } @@ -708,12 +758,16 @@ void feed_worker() } } } - //instruct rendering threads to stop working - for(n=0; nstatus = MAPCACHE_STATUS_OK; } - ret = apr_queue_push(log_queue,(void*)st); - while( ret == APR_EINTR && retries < 10) { - retries++; - ret = apr_queue_push(log_queue,(void*)st); - } - if( ret == APR_EINTR) { - printf("FATAL ERROR: unable to log progress after 10 retries, aborting\n"); - break; - } - if(ret != APR_SUCCESS) +#ifdef USE_FORK + if(nprocesses >= 1) { + int mret; + struct ipc_log_status ipc_st; + memset(&ipc_st, 0, sizeof(struct ipc_log_status)); + ipc_st.mtype = 1; + ipc_st.status = st->status; + ipc_st.x = st->x; + ipc_st.y = st->y; + ipc_st.z = st->z; + ipc_st.nodata = st->nodata; + ipc_st.skipped = st->skipped; + if(st->msg) { + strncpy(ipc_st.msg,st->msg,MAX_ERR_MSG_SIZE-1); + ipc_st.msg[MAX_ERR_MSG_SIZE-1] = '\0'; + } else { + ipc_st.msg[0] = 0; + } + free(st->msg); + free(st); + mret = msgsnd(log_msqid,&ipc_st,sizeof(struct ipc_log_status)-sizeof(long),0); + if(mret) { + printf("FATAL ERROR: unable to log progress, aborting\n"); + break; + } + + } else +#endif { - printf("FATAL ERROR: unable to log progress\n"); - break; + ret = apr_queue_push(log_queue,(void*)st); + while( ret == APR_EINTR && retries < 10) { + retries++; + ret = apr_queue_push(log_queue,(void*)st); + } + if( ret == APR_EINTR) { + printf("FATAL ERROR: unable to log progress after 10 retries, aborting\n"); + break; + } + if(ret != APR_SUCCESS) + { + printf("FATAL ERROR: unable to log progress\n"); + break; + } } } } @@ -853,68 +937,133 @@ static void* APR_THREAD_FUNC log_thread_fn(apr_thread_t *thread, void *data) { memset(failed,-1,FAIL_BACKLOG_COUNT); cur=0; last_time=0; - while(1) { - int retries = 0; - struct seed_status *st; - apr_status_t ret = apr_queue_pop(log_queue, (void**)&st); - while(ret == APR_EINTR && retries<10) { - retries++; - ret = apr_queue_pop(log_queue, (void**)&st); - } - if(ret != APR_SUCCESS || !st) break; - if(st->status == MAPCACHE_STATUS_FINISHED) - return NULL; - if(st->status == MAPCACHE_STATUS_OK) { - failed[cur]=0; - if (st->skipped) { - n_skipped_tot++; - } else { - n_metatiles_tot++; - } - if(st->nodata) { - n_nodata_tot++; +#ifdef USE_FORK + if(nprocesses >= 1) { + while(1) { + struct ipc_log_status ipc_st; + int ret = msgrcv(log_msqid, &ipc_st, sizeof(struct ipc_log_status)-sizeof(long),1,0); + if(ret == -1) { + /* msqid has been removed, or something else went wrong. in any case, we must exit */ + return NULL; } - if(!quiet) { - struct mctimeval now; - mapcache_gettimeofday(&now,NULL); - now_time = now.tv_sec + now.tv_usec / 1000000.0; - if((now_time - last_time) > 1.0) { - int seeded_count = n_metatiles_tot*tileset->metasize_x*tileset->metasize_y; - int skipped_count = n_skipped_tot*tileset->metasize_x*tileset->metasize_y; - if (non_interactive) { - printf("seeded %d tiles (%d skipped), now at z%d x%d y%d\n",seeded_count,skipped_count,st->z,st->x,st->y); - } else { - printf(" \r"); - printf("seeded %d tiles (%d skipped), now at z%d x%d y%d\r",seeded_count,skipped_count,st->z,st->x,st->y); - fflush(stdout); - } + if(ipc_st.status == MAPCACHE_STATUS_FINISHED) + return NULL; + if(ipc_st.status == MAPCACHE_STATUS_OK) { + failed[cur]=0; + if (ipc_st.skipped) { + n_skipped_tot++; + } else { + n_metatiles_tot++; + } + if(ipc_st.nodata) { + n_nodata_tot++; + } + if(!quiet) { + struct mctimeval now; + mapcache_gettimeofday(&now,NULL); + now_time = now.tv_sec + now.tv_usec / 1000000.0; + if((now_time - last_time) > 1.0) { + int seeded_count = n_metatiles_tot*tileset->metasize_x*tileset->metasize_y; + int skipped_count = n_skipped_tot*tileset->metasize_x*tileset->metasize_y; + if (non_interactive) { + printf("seeded %d tiles (%d skipped), now at z%d x%d y%d\n",seeded_count,skipped_count,ipc_st.z,ipc_st.x,ipc_st.y); + } else { + printf(" \r"); + printf("seeded %d tiles (%d skipped), now at z%d x%d y%d\r",seeded_count,skipped_count,ipc_st.z,ipc_st.x,ipc_st.y); + fflush(stdout); + } - last_time = now_time; + last_time = now_time; + } + } + } else { + /* count how many errors and successes we have */ + failed[cur]=1; + nfailed=0; + ntotal=0; + if(failed_log) { + fprintf(failed_log,"%d,%d,%d\n",ipc_st.x,ipc_st.y,ipc_st.z); + } + for(i=0; i=0) ntotal++; + if(failed[i]==1) nfailed++; + } + ctx.log(&ctx, MAPCACHE_WARN, "failed to seed tile z%d,x%d,y%d:\n%s\n", ipc_st.z,ipc_st.x,ipc_st.y,ipc_st.msg); + pct = ((double)nfailed / (double)ntotal) * 100; + if(pct > percent_failed_allowed) { + ctx.log(&ctx, MAPCACHE_ERROR, "aborting seed as %.1f%% of the last %d requests failed\n", pct, FAIL_BACKLOG_COUNT); + error_detected = 1; } } - } else { - /* count how many errors and successes we have */ - failed[cur]=1; - nfailed=0; - ntotal=0; - if(failed_log) { - fprintf(failed_log,"%d,%d,%d\n",st->x,st->y,st->z); - } - for(i=0; i=0) ntotal++; - if(failed[i]==1) nfailed++; + cur++; + cur %= FAIL_BACKLOG_COUNT; + } + } +#endif + { + while(1) { + int retries = 0; + struct seed_status *st; + apr_status_t ret = apr_queue_pop(log_queue, (void**)&st); + while(ret == APR_EINTR && retries<10) { + retries++; + ret = apr_queue_pop(log_queue, (void**)&st); } - ctx.log(&ctx, MAPCACHE_WARN, "failed to seed tile z%d,x%d,y%d:\n%s\n", st->z,st->x,st->y,st->msg); - pct = ((double)nfailed / (double)ntotal) * 100; - if(pct > percent_failed_allowed) { - ctx.log(&ctx, MAPCACHE_ERROR, "aborting seed as %.1f%% of the last %d requests failed\n", pct, FAIL_BACKLOG_COUNT); - error_detected = 1; + if(ret != APR_SUCCESS || !st) break; + if(st->status == MAPCACHE_STATUS_FINISHED) + return NULL; + if(st->status == MAPCACHE_STATUS_OK) { + failed[cur]=0; + if (st->skipped) { + n_skipped_tot++; + } else { + n_metatiles_tot++; + } + if(st->nodata) { + n_nodata_tot++; + } + if(!quiet) { + struct mctimeval now; + mapcache_gettimeofday(&now,NULL); + now_time = now.tv_sec + now.tv_usec / 1000000.0; + if((now_time - last_time) > 1.0) { + int seeded_count = n_metatiles_tot*tileset->metasize_x*tileset->metasize_y; + int skipped_count = n_skipped_tot*tileset->metasize_x*tileset->metasize_y; + if (non_interactive) { + printf("seeded %d tiles (%d skipped), now at z%d x%d y%d\n",seeded_count,skipped_count,st->z,st->x,st->y); + } else { + printf(" \r"); + printf("seeded %d tiles (%d skipped), now at z%d x%d y%d\r",seeded_count,skipped_count,st->z,st->x,st->y); + fflush(stdout); + } + + last_time = now_time; + } + } + } else { + /* count how many errors and successes we have */ + failed[cur]=1; + nfailed=0; + ntotal=0; + if(failed_log) { + fprintf(failed_log,"%d,%d,%d\n",st->x,st->y,st->z); + } + for(i=0; i=0) ntotal++; + if(failed[i]==1) nfailed++; + } + ctx.log(&ctx, MAPCACHE_WARN, "failed to seed tile z%d,x%d,y%d:\n%s\n", st->z,st->x,st->y,st->msg); + pct = ((double)nfailed / (double)ntotal) * 100; + if(pct > percent_failed_allowed) { + ctx.log(&ctx, MAPCACHE_ERROR, "aborting seed as %.1f%% of the last %d requests failed\n", pct, FAIL_BACKLOG_COUNT); + error_detected = 1; + } } + if(st->msg) free(st->msg); + free(st); + cur++; + cur %= FAIL_BACKLOG_COUNT; } - if(st->msg) free(st->msg); - free(st); - cur++; - cur %= FAIL_BACKLOG_COUNT; } return NULL; } @@ -1132,7 +1281,7 @@ int main(int argc, const char **argv) case 'p': #ifdef USE_FORK nprocesses = (int)strtol(optarg, NULL, 10); - if(nprocesses <=0 ) + if(nprocesses < 1 ) return usage(argv[0], "failed to parse nprocesses, expecting positive integer"); break; #else @@ -1519,68 +1668,60 @@ int main(int argc, const char **argv) return usage(argv[0],"cannot set both nthreads and nprocesses"); } - { - /* start the logging thread */ - //create the queue where the seeding statuses will be put - apr_threadattr_t *log_thread_attrs; - apr_queue_create(&log_queue,MAPCACHE_MAX(nthreads,nprocesses),ctx.pool); - - //start the rendering threads. - apr_threadattr_create(&log_thread_attrs, ctx.pool); - apr_thread_create(&log_thread, log_thread_attrs, log_thread_fn, NULL, ctx.pool); + if(nprocesses >= 1) { +#ifdef USE_FORK + if ((log_msqid = msgget(IPC_PRIVATE, 0644 | IPC_CREAT|S_IRUSR|S_IWUSR)) == -1) { + return usage(argv[0],"failed to create sysv ipc log message queue: %d %s\n", errno, strerror(errno)); + } +#endif + } else { + apr_queue_create(&log_queue,FAIL_BACKLOG_COUNT,ctx.pool); } - if(nprocesses > 1) { + if(nprocesses >= 1) { #ifdef USE_FORK - key_t key; int i; + // workers only pid_t *pids = malloc(nprocesses*sizeof(pid_t)); - struct msqid_ds queue_ds; - key = ftok(argv[0], 'B'); - if ((msqid = msgget(key, 0644 | IPC_CREAT|S_IRUSR|S_IWUSR)) == -1) { - return usage(argv[0],"failed to create sysv ipc message queue"); - } - if (-1 == msgctl(msqid, IPC_STAT, &queue_ds)) { - return usage(argv[0], "\nFailure in msgctl() stat"); - } - queue_ds.msg_qbytes = nprocesses*sizeof(struct seed_cmd); - if(-1 == msgctl(msqid, IPC_SET, &queue_ds)) { - switch(errno) { - case EACCES: - return usage(argv[0], "\nFailure in msgctl() set qbytes: EACCESS (should not happen here)"); - case EFAULT: - return usage(argv[0], "\nFailure in msgctl() set qbytes: EFAULT queue not accessible"); - case EIDRM: - return usage(argv[0], "\nFailure in msgctl() set qbytes: EIDRM message queue removed"); - case EINVAL: - return usage(argv[0], "\nFailure in msgctl() set qbytes: EINVAL invalid value for msg_qbytes"); - case EPERM: - return usage(argv[0], "\nFailure in msgctl() set qbytes: EPERM permission denied on msg_qbytes"); - default: - return usage(argv[0], "\nFailure in msgctl() set qbytes: unknown"); - } + if ((msqid = msgget(IPC_PRIVATE, 0644 | IPC_CREAT|S_IRUSR|S_IWUSR)) == -1) { + return usage(argv[0],"failed to create sysv ipc message queue: %d %s\n", errno, strerror(errno)); } for(i=0; istatus = MAPCACHE_STATUS_FINISHED; - ret = apr_queue_push(log_queue,(void*)st); - while (ret == APR_EINTR && retries<10) { - retries++; + if(nprocesses >= 1) { +#ifdef USE_FORK + struct ipc_log_status ipc_st; + memset(&ipc_st, 0, sizeof(struct ipc_log_status)); + ipc_st.mtype = 1; + ipc_st.status = MAPCACHE_STATUS_FINISHED; + msgsnd(log_msqid,&ipc_st,sizeof(struct ipc_log_status)-sizeof(long),0); +#endif + } else { + int retries=0; + int ret; + struct seed_status *st = calloc(1,sizeof(struct seed_status)); + st->status = MAPCACHE_STATUS_FINISHED; ret = apr_queue_push(log_queue,(void*)st); + while (ret == APR_EINTR && retries<10) { + retries++; + ret = apr_queue_push(log_queue,(void*)st); + } } apr_thread_join(&rv, log_thread); } From 6239413c9f6adf9f6ba54449b893e9b4a40760c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C4=81ris=20Narti=C5=A1s?= Date: Tue, 30 Sep 2025 14:08:16 +0300 Subject: [PATCH 2/2] Test mapcache_seed parallel worker mode --- tests/run_tests.sh | 69 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 63 insertions(+), 6 deletions(-) diff --git a/tests/run_tests.sh b/tests/run_tests.sh index 10260908..b353e1f7 100755 --- a/tests/run_tests.sh +++ b/tests/run_tests.sh @@ -2,10 +2,10 @@ # Project: MapCache # Purpose: MapCache tests -# Author: Even Rouault +# Author: Even Rouault, Maris Nartiss # #***************************************************************************** -# Copyright (c) 2017 Regents of the University of Minnesota. +# Copyright (c) 2017, 2025 Regents of the University of Minnesota. # # Permission is hereby granted, free of charge, to any person obtaining a # copy of this software and associated documentation files (the "Software"), @@ -30,10 +30,10 @@ set -e MAPCACHE_CONF=/tmp/mc/mapcache.xml -sudo rm -rf /tmp/mc/global +rm -rf /tmp/mc/global mapcache_seed -c /tmp/mc/mapcache.xml -t global --force -z 0,1 -gdalinfo -checksum /tmp/mc/global/GoogleMapsCompatible/00/000/000/000/000/000/000.jpg | grep Checksum=20574 >/dev/null || (echo "Did not get expected checksum"; gdalinfo -checksum /tmp/mc/global/GoogleMapsCompatible/00/000/000/000/000/000/000.jpg; /bin/false) -sudo rm -rf /tmp/mc/global +gdalinfo /tmp/mc/global/GoogleMapsCompatible/00/000/000/000/000/000/000.jpg | grep "Size is 256, 256" >/dev/null || (echo "Invalid image size"; gdalinfo /tmp/mc/global/GoogleMapsCompatible/00/000/000/000/000/000/000.jpg; /bin/false) +rm -rf /tmp/mc/global curl -s "http://localhost/mapcache/?SERVICE=WMS&REQUEST=GetCapabilities" | xmllint --format - > /tmp/wms_capabilities.xml diff -u /tmp/wms_capabilities.xml expected @@ -42,8 +42,65 @@ curl -s "http://localhost/mapcache/wmts?SERVICE=WMTS&REQUEST=GetCapabilities" | diff -u /tmp/wmts_capabilities.xml expected curl -s "http://localhost/mapcache/wmts/1.0.0/global/default/GoogleMapsCompatible/0/0/0.jpg" > /tmp/0.jpg -gdalinfo -checksum /tmp/0.jpg | grep Checksum=20574 >/dev/null || (echo "Did not get expected checksum"; gdalinfo -checksum /tmp/0.jpg; /bin/false) +gdalinfo /tmp/0.jpg | grep "Size is 256, 256" >/dev/null || (echo "Invalid image size"; gdalinfo /tmp/0.jpg; /bin/false) curl -s "http://localhost/mapcache/wmts/1.0.0/global/default/GoogleMapsCompatible/0/0/0.jpg" > /tmp/0_bis.jpg diff /tmp/0.jpg /tmp/0_bis.jpg + +# --- Test parallel seeding --- +echo "== Testing parallel seeding with -p 4 ==" +rm -rf /tmp/mc/global +mapcache_seed -c /tmp/mc/mapcache.xml -t global --force -z 0,2 -p 4 +gdalinfo /tmp/mc/global/GoogleMapsCompatible/00/000/000/000/000/000/000.jpg | grep "Size is 256, 256" >/dev/null || (echo "Invalid image size for tile 0/0/0"; gdalinfo /tmp/mc/global/GoogleMapsCompatible/00/000/000/000/000/000/000.jpg; /bin/false) +gdalinfo /tmp/mc/global/GoogleMapsCompatible/01/000/000/000/000/000/000.jpg | grep "Size is 256, 256" >/dev/null || (echo "Invalid image size for tile 1/0/0"; gdalinfo /tmp/mc/global/GoogleMapsCompatible/01/000/000/000/000/000/000.jpg; /bin/false) +gdalinfo /tmp/mc/global/GoogleMapsCompatible/02/000/000/000/000/000/000.jpg | grep "Size is 256, 256" >/dev/null || (echo "Invalid image size for tile 2/0/0"; gdalinfo /tmp/mc/global/GoogleMapsCompatible/02/000/000/000/000/000/000.jpg; /bin/false) +echo "OK: Parallel seeding with -p 4 successful" +rm -rf /tmp/mc/global + +# --- Test parallel seeding with -p 1 --- +echo "== Testing parallel seeding with -p 1 ==" +rm -rf /tmp/mc/global +mapcache_seed -c /tmp/mc/mapcache.xml -t global --force -z 0,2 -p 1 +gdalinfo /tmp/mc/global/GoogleMapsCompatible/00/000/000/000/000/000/000.jpg | grep "Size is 256, 256" >/dev/null || (echo "Invalid image size for tile 0/0/0"; exit 1) +gdalinfo /tmp/mc/global/GoogleMapsCompatible/01/000/000/000/000/000/000.jpg | grep "Size is 256, 256" >/dev/null || (echo "Invalid image size for tile 1/0/0"; exit 1) +gdalinfo /tmp/mc/global/GoogleMapsCompatible/02/000/000/000/000/000/000.jpg | grep "Size is 256, 256" >/dev/null || (echo "Invalid image size for tile 2/0/0"; exit 1) +echo "OK: Parallel seeding with -p 1 successful" +rm -rf /tmp/mc/global + +# --- Test graceful shutdown --- +echo "== Testing graceful shutdown of parallel seeder ==" +rm -rf /tmp/mc_shutdown +mkdir -p /tmp/mc_shutdown +cp /tmp/mc/mapcache.xml /tmp/mc/mapcache_shutdown.xml +sed -i 's|/tmp/mc|/tmp/mc_shutdown|' /tmp/mc/mapcache_shutdown.xml +# Run seeder in the background, in its own process group +set -m +mapcache_seed -c /tmp/mc/mapcache_shutdown.xml -t global -z 0,8 -p 4 -q & +PID=$! +set +m +# Give it a moment to start seeding +sleep 2 +PGID=$(ps -o pgid= $PID | xargs) +if [ -z "$PGID" ]; then + echo "Could not get PGID of seeder process. Test cannot continue." + exit 1 +fi +# Send SIGINT to the process group +echo "Sending SIGINT to process group $PGID" +kill -INT -$PGID +# Wait for the process to terminate, with a timeout +if wait $PID 2>/dev/null; then + echo "OK: Seeder terminated gracefully on SIGINT" +else + # The process might have already exited and `wait` fails. + # Let's check if it's still running. + if ps -p $PID > /dev/null; then + echo "Error: Seeder did not terminate gracefully" + kill -9 -$PGID + exit 1 + else + echo "OK: Seeder terminated gracefully on SIGINT" + fi +fi +rm -rf /tmp/mc_shutdown