=== Applying patches on top of PostgreSQL commit ID 371a302eecdc82274b0ae2967d18fd726a0aa6a1 === /etc/rc.d/jail: WARNING: Per-jail configuration via jail_* variables is obsolete. Please consider migrating to /etc/jail.conf. Sun Oct 26 20:57:19 UTC 2025 On branch cf/4884 nothing to commit, working tree clean === using 'git am' to apply patch ./0001-Fix-rare-recovery-shutdown-hang-due-to-checkpointer.patch === Applying: Fix rare recovery shutdown hang due to checkpointer. Using index info to reconstruct a base tree... M src/backend/postmaster/postmaster.c Falling back to patching base and 3-way merge... Auto-merging src/backend/postmaster/postmaster.c CONFLICT (content): Merge conflict in src/backend/postmaster/postmaster.c error: Failed to merge in the changes. hint: Use 'git am --show-current-patch=diff' to see the failed patch Patch failed at 0001 Fix rare recovery shutdown hang due to checkpointer. When you have resolved this problem, run "git am --continue". If you prefer to skip this patch, run "git am --skip" instead. To restore the original branch and stop patching, run "git am --abort". Unstaged changes after reset: M src/backend/postmaster/postmaster.c === using patch(1) to apply patch ./0001-Fix-rare-recovery-shutdown-hang-due-to-checkpointer.patch === patching file src/backend/postmaster/postmaster.c Hunk #1 FAILED at 3748. Hunk #2 succeeded at 3032 with fuzz 1 (offset -735 lines). Hunk #3 succeeded at 3071 with fuzz 1 (offset -734 lines). 1 out of 3 hunks FAILED -- saving rejects to file src/backend/postmaster/postmaster.c.rej Unstaged changes after reset: M src/backend/postmaster/postmaster.c Removing src/backend/postmaster/postmaster.c.rej === using 'git apply' to apply patch ./0001-Fix-rare-recovery-shutdown-hang-due-to-checkpointer.patch === Applied patch to 'src/backend/postmaster/postmaster.c' with conflicts. U src/backend/postmaster/postmaster.c diff --cc src/backend/postmaster/postmaster.c index 00de559ba8f,62db752228a..00000000000 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@@ -2908,131 -3685,91 +2908,146 @@@ PostmasterStateMachine(void } /* - * If we're ready to do so, signal child processes to shut down. (This - * isn't a persistent state, but treating it as a distinct pmState allows - * us to share this code across multiple shutdown code paths.) + * In the PM_WAIT_BACKENDS state, wait for all the regular backends and + * processes like autovacuum and background workers that are comparable to + * backends to exit. + * + * PM_STOP_BACKENDS is a transient state that means the same as + * PM_WAIT_BACKENDS, but we signal the processes first, before waiting for + * them. Treating it as a distinct pmState allows us to share this code + * across multiple shutdown code paths. */ - if (pmState == PM_STOP_BACKENDS) + if (pmState == PM_STOP_BACKENDS || pmState == PM_WAIT_BACKENDS) { + BackendTypeMask targetMask = BTYPE_MASK_NONE; + /* - * Forget any pending requests for background workers, since we're no - * longer willing to launch any new workers. (If additional requests - * arrive, BackgroundWorkerStateChange will reject them.) + * PM_WAIT_BACKENDS state ends when we have no regular backends, no + * autovac launcher or workers, and no bgworkers (including + * unconnected ones). */ - ForgetUnstartedBackgroundWorkers(); - - /* Signal all backend children except walsenders */ - SignalSomeChildren(SIGTERM, - BACKEND_TYPE_ALL - BACKEND_TYPE_WALSND); - /* and the autovac launcher too */ - if (AutoVacPID != 0) - signal_child(AutoVacPID, SIGTERM); - /* and the bgwriter too */ - if (BgWriterPID != 0) - signal_child(BgWriterPID, SIGTERM); - /* and the walwriter too */ - if (WalWriterPID != 0) - signal_child(WalWriterPID, SIGTERM); + targetMask = btmask_add(targetMask, + B_BACKEND, + B_AUTOVAC_LAUNCHER, + B_AUTOVAC_WORKER, + B_BG_WORKER); + + /* + * No walwriter, bgwriter, slot sync worker, or WAL summarizer either. + */ + targetMask = btmask_add(targetMask, + B_WAL_WRITER, + B_BG_WRITER, + B_SLOTSYNC_WORKER, + B_WAL_SUMMARIZER); + /* If we're in recovery, also stop startup and walreceiver procs */ - if (StartupPID != 0) - signal_child(StartupPID, SIGTERM); - if (WalReceiverPID != 0) - signal_child(WalReceiverPID, SIGTERM); - if (WalSummarizerPID != 0) - signal_child(WalSummarizerPID, SIGTERM); - if (SlotSyncWorkerPID != 0) - signal_child(SlotSyncWorkerPID, SIGTERM); - /* checkpointer, archiver, stats, and syslogger may continue for now */ - - /* Now transition to PM_WAIT_BACKENDS state to wait for them to die */ - pmState = PM_WAIT_BACKENDS; - } + targetMask = btmask_add(targetMask, + B_STARTUP, + B_WAL_RECEIVER); - /* - * If we are in a state-machine state that implies waiting for backends to - * exit, see if they're all gone, and change state if so. - */ - if (pmState == PM_WAIT_BACKENDS) - { /* - * PM_WAIT_BACKENDS state ends when we have no regular backends - * (including autovac workers), no bgworkers (including unconnected - * ones), and no walwriter, autovac launcher, bgwriter or slot sync - * worker. If we are doing crash recovery or an immediate shutdown - * then we expect the checkpointer to exit as well, otherwise not. The - * stats and syslogger processes are disregarded since they are not - * connected to shared memory; we also disregard dead_end children - * here. Walsenders and archiver are also disregarded, they will be - * terminated later after writing the checkpoint record. + * If we are doing crash recovery or an immediate shutdown then we + * expect archiver, checkpointer, io workers and walsender to exit as + * well, otherwise not. + */ + if (FatalError || Shutdown >= ImmediateShutdown) + targetMask = btmask_add(targetMask, + B_CHECKPOINTER, + B_ARCHIVER, + B_IO_WORKER, + B_WAL_SENDER); + + /* + * Normally archiver, checkpointer, IO workers and walsenders will + * continue running; they will be terminated later after writing the + * checkpoint record. We also let dead-end children to keep running + * for now. The syslogger process exits last. + * + * This assertion checks that we have covered all backend types, + * either by including them in targetMask, or by noting here that they + * are allowed to continue running. */ ++<<<<<<< ours +#ifdef USE_ASSERT_CHECKING + { + BackendTypeMask remainMask = BTYPE_MASK_NONE; + + remainMask = btmask_add(remainMask, + B_DEAD_END_BACKEND, + B_LOGGER); + + /* + * Archiver, checkpointer, IO workers, and walsender may or may + * not be in targetMask already. + */ + remainMask = btmask_add(remainMask, + B_ARCHIVER, + B_CHECKPOINTER, + B_IO_WORKER, + B_WAL_SENDER); + + /* these are not real postmaster children */ + remainMask = btmask_add(remainMask, + B_INVALID, + B_STANDALONE_BACKEND); + + /* All types should be included in targetMask or remainMask */ + Assert((remainMask.mask | targetMask.mask) == BTYPE_MASK_ALL.mask); + } +#endif + + /* If we had not yet signaled the processes to exit, do so now */ + if (pmState == PM_STOP_BACKENDS) + { + /* + * Forget any pending requests for background workers, since we're + * no longer willing to launch any new workers. (If additional + * requests arrive, BackgroundWorkerStateChange will reject them.) + */ + ForgetUnstartedBackgroundWorkers(); + + SignalChildren(SIGTERM, targetMask); + + UpdatePMState(PM_WAIT_BACKENDS); + } + + /* Are any of the target processes still running? */ + if (CountChildren(targetMask) == 0) ++======= + if (CountChildren(BACKEND_TYPE_ALL - BACKEND_TYPE_WALSND) == 0 && + StartupPID == 0 && + WalReceiverPID == 0 && + WalSummarizerPID == 0 && + BgWriterPID == 0 && + (CheckpointerPID == 0 || + (!FatalError && Shutdown < ImmediateShutdown) || + (FatalError && CheckpointerPID != 0)) && + WalWriterPID == 0 && + AutoVacPID == 0 && + SlotSyncWorkerPID == 0) ++>>>>>>> theirs { - if (Shutdown >= ImmediateShutdown || FatalError) + if (CheckpointerPID == 0 && + (Shutdown >= ImmediateShutdown || FatalError)) { /* - * Start waiting for dead_end children to die. This state - * change causes ServerLoop to stop creating new ones. + * Stop any dead-end children and stop creating new ones. + * + * NB: Similar code exists in HandleFatalError(), when the + * error happens in pmState > PM_WAIT_BACKENDS. */ - pmState = PM_WAIT_DEAD_END; + UpdatePMState(PM_WAIT_DEAD_END); + ConfigurePostmasterWaitSet(false); + SignalChildren(SIGQUIT, btmask(B_DEAD_END_BACKEND)); /* - * We already SIGQUIT'd the archiver and stats processes, if - * any, when we started immediate shutdown or entered - * FatalError state. + * We already SIGQUIT'd auxiliary processes (other than + * logger), if any, when we started immediate shutdown or + * entered FatalError state. */ } - else + else if (Shutdown > NoShutdown && Shutdown < ImmediateShutdown) { /* * If we get here, we are proceeding with normal shutdown. All @@@ -3060,17 -3797,26 +3075,27 @@@ * We don't consult send_abort_for_crash here, as it's * unlikely that dumping cores would illuminate the reason * for checkpointer fork failure. + * + * XXX: It may be worth to introduce a different PMQUIT + * value that signals that the cluster is in a bad state, + * without a process having crashed. But right now this + * path is very unlikely to be reached, so it isn't + * obviously worthwhile adding a distinct error message in + * quickdie(). */ - FatalError = true; - pmState = PM_WAIT_DEAD_END; - - /* Kill the walsenders and archiver too */ - SignalChildren(SIGQUIT); - if (PgArchPID != 0) - signal_child(PgArchPID, SIGQUIT); + HandleFatalError(PMQUIT_FOR_CRASH, false); } } + else + { + /* + * Either it's an immediate shutdown or a child crashed, and + * we're still waiting for all the children to quit. The + * checkpointer was already told to quit. + */ + Assert(Shutdown == ImmediateShutdown || + (Shutdown == NoShutdown && FatalError)); + } } }