=== Applying patches on top of PostgreSQL commit ID a6524105d20b190fb4f5b2116e044e29be88f215 === /etc/rc.d/jail: WARNING: Per-jail configuration via jail_* variables is obsolete. Please consider migrating to /etc/jail.conf. Tue Mar 18 20:13:22 UTC 2025 On branch cf/4884 nothing to commit, working tree clean === using 'git am' to apply patch ./0001-Fix-rare-recovery-shutdown-hang-due-to-checkpointer.patch === Applying: Fix rare recovery shutdown hang due to checkpointer. Using index info to reconstruct a base tree... M src/backend/postmaster/postmaster.c Falling back to patching base and 3-way merge... Auto-merging src/backend/postmaster/postmaster.c CONFLICT (content): Merge conflict in src/backend/postmaster/postmaster.c error: Failed to merge in the changes. hint: Use 'git am --show-current-patch=diff' to see the failed patch Patch failed at 0001 Fix rare recovery shutdown hang due to checkpointer. When you have resolved this problem, run "git am --continue". If you prefer to skip this patch, run "git am --skip" instead. To restore the original branch and stop patching, run "git am --abort". Unstaged changes after reset: M src/backend/postmaster/postmaster.c === using patch(1) to apply patch ./0001-Fix-rare-recovery-shutdown-hang-due-to-checkpointer.patch === patch: unrecognized option `--no-backup-if-mismatch' usage: patch [-bCcEeflNnRstuv] [-B backup-prefix] [-D symbol] [-d directory] [-F max-fuzz] [-i patchfile] [-o out-file] [-p strip-count] [-r rej-name] [-V t | nil | never | none] [-x number] [-z backup-ext] [--posix] [origfile [patchfile]] patch = ImmediateShutdown) + targetMask = btmask_add(targetMask, + B_CHECKPOINTER, + B_ARCHIVER, + B_IO_WORKER, + B_WAL_SENDER); + + /* + * Normally archiver, checkpointer, IO workers and walsenders will + * continue running; they will be terminated later after writing the + * checkpoint record. We also let dead-end children to keep running + * for now. The syslogger process exits last. + * + * This assertion checks that we have covered all backend types, + * either by including them in targetMask, or by noting here that they + * are allowed to continue running. + */ +#ifdef USE_ASSERT_CHECKING + { + BackendTypeMask remainMask = BTYPE_MASK_NONE; + + remainMask = btmask_add(remainMask, + B_DEAD_END_BACKEND, + B_LOGGER); + + /* + * Archiver, checkpointer, IO workers, and walsender may or may + * not be in targetMask already. + */ + remainMask = btmask_add(remainMask, + B_ARCHIVER, + B_CHECKPOINTER, + B_IO_WORKER, + B_WAL_SENDER); + + /* these are not real postmaster children */ + remainMask = btmask_add(remainMask, + B_INVALID, + B_STANDALONE_BACKEND); + + /* All types should be included in targetMask or remainMask */ + Assert((remainMask.mask | targetMask.mask) == BTYPE_MASK_ALL.mask); + } +#endif + + /* If we had not yet signaled the processes to exit, do so now */ + if (pmState == PM_STOP_BACKENDS) + { + /* + * Forget any pending requests for background workers, since we're + * no longer willing to launch any new workers. (If additional + * requests arrive, BackgroundWorkerStateChange will reject them.) + */ + ForgetUnstartedBackgroundWorkers(); + + SignalChildren(SIGTERM, targetMask); + + UpdatePMState(PM_WAIT_BACKENDS); + } + + /* Are any of the target processes still running? */ + if (CountChildren(targetMask) == 0) ++======= + if (CountChildren(BACKEND_TYPE_ALL - BACKEND_TYPE_WALSND) == 0 && + StartupPID == 0 && + WalReceiverPID == 0 && + WalSummarizerPID == 0 && + BgWriterPID == 0 && + (CheckpointerPID == 0 || + (!FatalError && Shutdown < ImmediateShutdown) || + (FatalError && CheckpointerPID != 0)) && + WalWriterPID == 0 && + AutoVacPID == 0 && + SlotSyncWorkerPID == 0) ++>>>>>>> theirs { - if (Shutdown >= ImmediateShutdown || FatalError) + if (CheckpointerPID == 0 && + (Shutdown >= ImmediateShutdown || FatalError)) { /* - * Start waiting for dead_end children to die. This state - * change causes ServerLoop to stop creating new ones. + * Stop any dead-end children and stop creating new ones. + * + * NB: Similar code exists in HandleFatalErrors(), when the + * error happens in pmState > PM_WAIT_BACKENDS. */ - pmState = PM_WAIT_DEAD_END; + UpdatePMState(PM_WAIT_DEAD_END); + ConfigurePostmasterWaitSet(false); + SignalChildren(SIGQUIT, btmask(B_DEAD_END_BACKEND)); /* - * We already SIGQUIT'd the archiver and stats processes, if - * any, when we started immediate shutdown or entered - * FatalError state. + * We already SIGQUIT'd auxiliary processes (other than + * logger), if any, when we started immediate shutdown or + * entered FatalError state. */ } - else + else if (Shutdown > NoShutdown && Shutdown < ImmediateShutdown) { /* * If we get here, we are proceeding with normal shutdown. All @@@ -3042,17 -3797,26 +3057,27 @@@ * We don't consult send_abort_for_crash here, as it's * unlikely that dumping cores would illuminate the reason * for checkpointer fork failure. + * + * XXX: It may be worth to introduce a different PMQUIT + * value that signals that the cluster is in a bad state, + * without a process having crashed. But right now this + * path is very unlikely to be reached, so it isn't + * obviously worthwhile adding a distinct error message in + * quickdie(). */ - FatalError = true; - pmState = PM_WAIT_DEAD_END; - - /* Kill the walsenders and archiver too */ - SignalChildren(SIGQUIT); - if (PgArchPID != 0) - signal_child(PgArchPID, SIGQUIT); + HandleFatalError(PMQUIT_FOR_CRASH, false); } } + else + { + /* + * Either it's an immediate shutdown or a child crashed, and + * we're still waiting for all the children to quit. The + * checkpointer was already told to quit. + */ + Assert(Shutdown == ImmediateShutdown || + (Shutdown == NoShutdown && FatalError)); + } } }