=== Applying patches on top of PostgreSQL commit ID 53a49365052026907afff7613929710d1e7f0da0 === /etc/rc.d/jail: WARNING: Per-jail configuration via jail_* variables is obsolete. Please consider migrating to /etc/jail.conf. Sat Feb 1 00:51:29 UTC 2025 On branch cf/4884 nothing to commit, working tree clean === applying patch ./0001-Fix-rare-recovery-shutdown-hang-due-to-checkpointer.patch Applied patch to 'src/backend/postmaster/postmaster.c' with conflicts. U src/backend/postmaster/postmaster.c diff --cc src/backend/postmaster/postmaster.c index bb22b13ade,62db752228..0000000000 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@@ -2213,537 -3408,685 +2213,567 @@@ handle_pm_child_exit_signal(SIGNAL_ARGS } /* - * HandleChildCrash -- cleanup after failed backend, bgwriter, checkpointer, - * walwriter, autovacuum, archiver, slot sync worker, or background worker. - * - * The objectives here are to clean up our local state about the child - * process, and to signal all other remaining children to quickdie. + * Cleanup after a child process dies. */ static void -HandleChildCrash(int pid, int exitstatus, const char *procname) +process_pm_child_exit(void) { - dlist_mutable_iter iter; - slist_iter siter; - Backend *bp; - bool take_action; + int pid; /* process id of dead child process */ + int exitstatus; /* its exit status */ - /* - * We only log messages and send signals if this is the first process - * crash and we're not doing an immediate shutdown; otherwise, we're only - * here to update postmaster's idea of live processes. If we have already - * signaled children, nonzero exit status is to be expected, so don't - * clutter log. - */ - take_action = !FatalError && Shutdown != ImmediateShutdown; + pending_pm_child_exit = false; - if (take_action) - { - LogChildExit(LOG, procname, pid, exitstatus); - ereport(LOG, - (errmsg("terminating any other active server processes"))); - SetQuitSignalReason(PMQUIT_FOR_CRASH); - } + ereport(DEBUG4, + (errmsg_internal("reaping dead processes"))); - /* Process background workers. */ - slist_foreach(siter, &BackgroundWorkerList) + while ((pid = waitpid(-1, &exitstatus, WNOHANG)) > 0) { - RegisteredBgWorker *rw; + PMChild *pmchild; - rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur); - if (rw->rw_pid == 0) - continue; /* not running */ - if (rw->rw_pid == pid) - { - /* - * Found entry for freshly-dead worker, so remove it. - */ - (void) ReleasePostmasterChildSlot(rw->rw_child_slot); - dlist_delete(&rw->rw_backend->elem); -#ifdef EXEC_BACKEND - ShmemBackendArrayRemove(rw->rw_backend); -#endif - pfree(rw->rw_backend); - rw->rw_backend = NULL; - rw->rw_pid = 0; - rw->rw_child_slot = 0; - /* don't reset crashed_at */ - /* don't report child stop, either */ - /* Keep looping so we can signal remaining workers */ - } - else + /* + * Check if this child was a startup process. + */ + if (StartupPMChild && pid == StartupPMChild->pid) { + ReleasePostmasterChildSlot(StartupPMChild); + StartupPMChild = NULL; + /* - * This worker is still alive. Unless we did so already, tell it - * to commit hara-kiri. + * Startup process exited in response to a shutdown request (or it + * completed normally regardless of the shutdown request). */ - if (take_action) - sigquit_child(rw->rw_pid); - } - } + if (Shutdown > NoShutdown && + (EXIT_STATUS_0(exitstatus) || EXIT_STATUS_1(exitstatus))) + { + StartupStatus = STARTUP_NOT_RUNNING; + UpdatePMState(PM_WAIT_BACKENDS); + /* PostmasterStateMachine logic does the rest */ + continue; + } - /* Process regular backends */ - dlist_foreach_modify(iter, &BackendList) - { - bp = dlist_container(Backend, elem, iter.cur); + if (EXIT_STATUS_3(exitstatus)) + { + ereport(LOG, + (errmsg("shutdown at recovery target"))); + StartupStatus = STARTUP_NOT_RUNNING; + Shutdown = Max(Shutdown, SmartShutdown); + TerminateChildren(SIGTERM); + UpdatePMState(PM_WAIT_BACKENDS); + /* PostmasterStateMachine logic does the rest */ + continue; + } - if (bp->pid == pid) - { /* - * Found entry for freshly-dead backend, so remove it. + * Unexpected exit of startup process (including FATAL exit) + * during PM_STARTUP is treated as catastrophic. There are no + * other processes running yet, so we can just exit. */ - if (!bp->dead_end) + if (pmState == PM_STARTUP && + StartupStatus != STARTUP_SIGNALED && + !EXIT_STATUS_0(exitstatus)) { - (void) ReleasePostmasterChildSlot(bp->child_slot); -#ifdef EXEC_BACKEND - ShmemBackendArrayRemove(bp); -#endif + LogChildExit(LOG, _("startup process"), + pid, exitstatus); + ereport(LOG, + (errmsg("aborting startup due to startup process failure"))); + ExitPostmaster(1); } - dlist_delete(iter.cur); - pfree(bp); - /* Keep looping so we can signal remaining backends */ - } - else - { + /* - * This backend is still alive. Unless we did so already, tell it - * to commit hara-kiri. - * - * We could exclude dead_end children here, but at least when - * sending SIGABRT it seems better to include them. + * After PM_STARTUP, any unexpected exit (including FATAL exit) of + * the startup process is catastrophic, so kill other children, + * and set StartupStatus so we don't try to reinitialize after + * they're gone. Exception: if StartupStatus is STARTUP_SIGNALED, + * then we previously sent the startup process a SIGQUIT; so + * that's probably the reason it died, and we do want to try to + * restart in that case. * - * Background workers were already processed above; ignore them - * here. - */ - if (bp->bkend_type == BACKEND_TYPE_BGWORKER) - continue; - - if (take_action) - sigquit_child(bp->pid); - } - } - - /* Take care of the startup process too */ - if (pid == StartupPID) - { - StartupPID = 0; - /* Caller adjusts StartupStatus, so don't touch it here */ - } - else if (StartupPID != 0 && take_action) - { - sigquit_child(StartupPID); - StartupStatus = STARTUP_SIGNALED; - } - - /* Take care of the bgwriter too */ - if (pid == BgWriterPID) - BgWriterPID = 0; - else if (BgWriterPID != 0 && take_action) - sigquit_child(BgWriterPID); - - /* Take care of the checkpointer too */ - if (pid == CheckpointerPID) - CheckpointerPID = 0; - else if (CheckpointerPID != 0 && take_action) - sigquit_child(CheckpointerPID); - - /* Take care of the walwriter too */ - if (pid == WalWriterPID) - WalWriterPID = 0; - else if (WalWriterPID != 0 && take_action) - sigquit_child(WalWriterPID); - - /* Take care of the walreceiver too */ - if (pid == WalReceiverPID) - WalReceiverPID = 0; - else if (WalReceiverPID != 0 && take_action) - sigquit_child(WalReceiverPID); - - /* Take care of the walsummarizer too */ - if (pid == WalSummarizerPID) - WalSummarizerPID = 0; - else if (WalSummarizerPID != 0 && take_action) - sigquit_child(WalSummarizerPID); - - /* Take care of the autovacuum launcher too */ - if (pid == AutoVacPID) - AutoVacPID = 0; - else if (AutoVacPID != 0 && take_action) - sigquit_child(AutoVacPID); - - /* Take care of the archiver too */ - if (pid == PgArchPID) - PgArchPID = 0; - else if (PgArchPID != 0 && take_action) - sigquit_child(PgArchPID); - - /* Take care of the slot sync worker too */ - if (pid == SlotSyncWorkerPID) - SlotSyncWorkerPID = 0; - else if (SlotSyncWorkerPID != 0 && take_action) - sigquit_child(SlotSyncWorkerPID); - - /* We do NOT restart the syslogger */ - - if (Shutdown != ImmediateShutdown) - FatalError = true; - - /* We now transit into a state of waiting for children to die */ - if (pmState == PM_RECOVERY || - pmState == PM_HOT_STANDBY || - pmState == PM_RUN || - pmState == PM_STOP_BACKENDS || - pmState == PM_SHUTDOWN) - pmState = PM_WAIT_BACKENDS; - - /* - * .. and if this doesn't happen quickly enough, now the clock is ticking - * for us to kill them without mercy. - */ - if (AbortStartTime == 0) - AbortStartTime = time(NULL); -} - -/* - * Log the death of a child process. - */ -static void -LogChildExit(int lev, const char *procname, int pid, int exitstatus) -{ - /* - * size of activity_buffer is arbitrary, but set equal to default - * track_activity_query_size - */ - char activity_buffer[1024]; - const char *activity = NULL; - - if (!EXIT_STATUS_0(exitstatus)) - activity = pgstat_get_crashed_backend_activity(pid, - activity_buffer, - sizeof(activity_buffer)); - - if (WIFEXITED(exitstatus)) - ereport(lev, - - /*------ - translator: %s is a noun phrase describing a child process, such as - "server process" */ - (errmsg("%s (PID %d) exited with exit code %d", - procname, pid, WEXITSTATUS(exitstatus)), - activity ? errdetail("Failed process was running: %s", activity) : 0)); - else if (WIFSIGNALED(exitstatus)) - { -#if defined(WIN32) - ereport(lev, - - /*------ - translator: %s is a noun phrase describing a child process, such as - "server process" */ - (errmsg("%s (PID %d) was terminated by exception 0x%X", - procname, pid, WTERMSIG(exitstatus)), - errhint("See C include file \"ntstatus.h\" for a description of the hexadecimal value."), - activity ? errdetail("Failed process was running: %s", activity) : 0)); -#else - ereport(lev, - - /*------ - translator: %s is a noun phrase describing a child process, such as - "server process" */ - (errmsg("%s (PID %d) was terminated by signal %d: %s", - procname, pid, WTERMSIG(exitstatus), - pg_strsignal(WTERMSIG(exitstatus))), - activity ? errdetail("Failed process was running: %s", activity) : 0)); -#endif - } - else - ereport(lev, + * This stanza also handles the case where we sent a SIGQUIT + * during PM_STARTUP due to some dead-end child crashing: in that + * situation, if the startup process dies on the SIGQUIT, we need + * to transition to PM_WAIT_BACKENDS state which will allow + * PostmasterStateMachine to restart the startup process. (On the + * other hand, the startup process might complete normally, if we + * were too late with the SIGQUIT. In that case we'll fall + * through and commence normal operations.) + */ + if (!EXIT_STATUS_0(exitstatus)) + { + if (StartupStatus == STARTUP_SIGNALED) + { + StartupStatus = STARTUP_NOT_RUNNING; + if (pmState == PM_STARTUP) + UpdatePMState(PM_WAIT_BACKENDS); + } + else + StartupStatus = STARTUP_CRASHED; + HandleChildCrash(pid, exitstatus, + _("startup process")); + continue; + } - /*------ - translator: %s is a noun phrase describing a child process, such as - "server process" */ - (errmsg("%s (PID %d) exited with unrecognized status %d", - procname, pid, exitstatus), - activity ? errdetail("Failed process was running: %s", activity) : 0)); -} + /* + * Startup succeeded, commence normal operations + */ + StartupStatus = STARTUP_NOT_RUNNING; + FatalError = false; + AbortStartTime = 0; + ReachedNormalRunning = true; + UpdatePMState(PM_RUN); + connsAllowed = true; -/* - * Advance the postmaster's state machine and take actions as appropriate - * - * This is common code for process_pm_shutdown_request(), - * process_pm_child_exit() and process_pm_pmsignal(), which process the signals - * that might mean we need to change state. - */ -static void -PostmasterStateMachine(void) -{ - /* If we're doing a smart shutdown, try to advance that state. */ - if (pmState == PM_RUN || pmState == PM_HOT_STANDBY) - { - if (!connsAllowed) - { /* - * This state ends when we have no normal client backends running. - * Then we're ready to stop other children. + * At the next iteration of the postmaster's main loop, we will + * crank up the background tasks like the autovacuum launcher and + * background workers that were not started earlier already. */ - if (CountChildren(BACKEND_TYPE_NORMAL) == 0) - pmState = PM_STOP_BACKENDS; + StartWorkerNeeded = true; + + /* at this point we are really open for business */ + ereport(LOG, + (errmsg("database system is ready to accept connections"))); + + /* Report status */ + AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_READY); +#ifdef USE_SYSTEMD + sd_notify(0, "READY=1"); +#endif + + continue; } - } - /* - * If we're ready to do so, signal child processes to shut down. (This - * isn't a persistent state, but treating it as a distinct pmState allows - * us to share this code across multiple shutdown code paths.) - */ - if (pmState == PM_STOP_BACKENDS) - { /* - * Forget any pending requests for background workers, since we're no - * longer willing to launch any new workers. (If additional requests - * arrive, BackgroundWorkerStateChange will reject them.) + * Was it the bgwriter? Normal exit can be ignored; we'll start a new + * one at the next iteration of the postmaster's main loop, if + * necessary. Any other exit condition is treated as a crash. */ - ForgetUnstartedBackgroundWorkers(); - - /* Signal all backend children except walsenders */ - SignalSomeChildren(SIGTERM, - BACKEND_TYPE_ALL - BACKEND_TYPE_WALSND); - /* and the autovac launcher too */ - if (AutoVacPID != 0) - signal_child(AutoVacPID, SIGTERM); - /* and the bgwriter too */ - if (BgWriterPID != 0) - signal_child(BgWriterPID, SIGTERM); - /* and the walwriter too */ - if (WalWriterPID != 0) - signal_child(WalWriterPID, SIGTERM); - /* If we're in recovery, also stop startup and walreceiver procs */ - if (StartupPID != 0) - signal_child(StartupPID, SIGTERM); - if (WalReceiverPID != 0) - signal_child(WalReceiverPID, SIGTERM); - if (WalSummarizerPID != 0) - signal_child(WalSummarizerPID, SIGTERM); - if (SlotSyncWorkerPID != 0) - signal_child(SlotSyncWorkerPID, SIGTERM); - /* checkpointer, archiver, stats, and syslogger may continue for now */ - - /* Now transition to PM_WAIT_BACKENDS state to wait for them to die */ - pmState = PM_WAIT_BACKENDS; - } + if (BgWriterPMChild && pid == BgWriterPMChild->pid) + { + ReleasePostmasterChildSlot(BgWriterPMChild); + BgWriterPMChild = NULL; + if (!EXIT_STATUS_0(exitstatus)) + HandleChildCrash(pid, exitstatus, + _("background writer process")); + continue; + } - /* - * If we are in a state-machine state that implies waiting for backends to - * exit, see if they're all gone, and change state if so. - */ - if (pmState == PM_WAIT_BACKENDS) - { /* - * PM_WAIT_BACKENDS state ends when we have no regular backends - * (including autovac workers), no bgworkers (including unconnected - * ones), and no walwriter, autovac launcher, bgwriter or slot sync - * worker. If we are doing crash recovery or an immediate shutdown - * then we expect the checkpointer to exit as well, otherwise not. The - * stats and syslogger processes are disregarded since they are not - * connected to shared memory; we also disregard dead_end children - * here. Walsenders and archiver are also disregarded, they will be - * terminated later after writing the checkpoint record. + * Was it the checkpointer? */ ++<<<<<<< ours + if (CheckpointerPMChild && pid == CheckpointerPMChild->pid) + { + ReleasePostmasterChildSlot(CheckpointerPMChild); + CheckpointerPMChild = NULL; + if (EXIT_STATUS_0(exitstatus) && pmState == PM_WAIT_CHECKPOINTER) ++======= + if (CountChildren(BACKEND_TYPE_ALL - BACKEND_TYPE_WALSND) == 0 && + StartupPID == 0 && + WalReceiverPID == 0 && + WalSummarizerPID == 0 && + BgWriterPID == 0 && + (CheckpointerPID == 0 || + (!FatalError && Shutdown < ImmediateShutdown) || + (FatalError && CheckpointerPID != 0)) && + WalWriterPID == 0 && + AutoVacPID == 0 && + SlotSyncWorkerPID == 0) + { + if (CheckpointerPID == 0 && + (Shutdown >= ImmediateShutdown || FatalError)) ++>>>>>>> theirs { /* - * Start waiting for dead_end children to die. This state - * change causes ServerLoop to stop creating new ones. - */ - pmState = PM_WAIT_DEAD_END; - - /* - * We already SIGQUIT'd the archiver and stats processes, if - * any, when we started immediate shutdown or entered - * FatalError state. + * OK, we saw normal exit of the checkpointer after it's been + * told to shut down. We know checkpointer wrote a shutdown + * checkpoint, otherwise we'd still be in + * PM_WAIT_XLOG_SHUTDOWN state. + * + * At this point only dead-end children and logger should be + * left. */ + UpdatePMState(PM_WAIT_DEAD_END); + ConfigurePostmasterWaitSet(false); + SignalChildren(SIGTERM, btmask_all_except(B_LOGGER)); } - else + else if (Shutdown > NoShutdown && Shutdown < ImmediateShutdown) { /* - * If we get here, we are proceeding with normal shutdown. All - * the regular children are gone, and it's time to tell the - * checkpointer to do a shutdown checkpoint. + * Any unexpected exit of the checkpointer (including FATAL + * exit) is treated as a crash. */ - Assert(Shutdown > NoShutdown); - /* Start the checkpointer if not running */ - if (CheckpointerPID == 0) - CheckpointerPID = StartChildProcess(CheckpointerProcess); - /* And tell it to shut down */ - if (CheckpointerPID != 0) - { - signal_child(CheckpointerPID, SIGUSR2); - pmState = PM_SHUTDOWN; - } - else - { - /* - * If we failed to fork a checkpointer, just shut down. - * Any required cleanup will happen at next restart. We - * set FatalError so that an "abnormal shutdown" message - * gets logged when we exit. - * - * We don't consult send_abort_for_crash here, as it's - * unlikely that dumping cores would illuminate the reason - * for checkpointer fork failure. - */ - FatalError = true; - pmState = PM_WAIT_DEAD_END; - - /* Kill the walsenders and archiver too */ - SignalChildren(SIGQUIT); - if (PgArchPID != 0) - signal_child(PgArchPID, SIGQUIT); - } + HandleChildCrash(pid, exitstatus, + _("checkpointer process")); } ++<<<<<<< ours + + continue; ++======= + else + { + /* + * Either it's an immediate shutdown or a child crashed, and + * we're still waiting for all the children to quit. The + * checkpointer was already told to quit. + */ + Assert(Shutdown == ImmediateShutdown || + (Shutdown == NoShutdown && FatalError)); + } ++>>>>>>> theirs } - } - if (pmState == PM_SHUTDOWN_2) - { /* - * PM_SHUTDOWN_2 state ends when there's no other children than - * dead_end children left. There shouldn't be any regular backends - * left by now anyway; what we're really waiting for is walsenders and - * archiver. + * Was it the wal writer? Normal exit can be ignored; we'll start a + * new one at the next iteration of the postmaster's main loop, if + * necessary. Any other exit condition is treated as a crash. */ - if (PgArchPID == 0 && CountChildren(BACKEND_TYPE_ALL) == 0) + if (WalWriterPMChild && pid == WalWriterPMChild->pid) { - pmState = PM_WAIT_DEAD_END; + ReleasePostmasterChildSlot(WalWriterPMChild); + WalWriterPMChild = NULL; + if (!EXIT_STATUS_0(exitstatus)) + HandleChildCrash(pid, exitstatus, + _("WAL writer process")); + continue; } - } - if (pmState == PM_WAIT_DEAD_END) - { - /* Don't allow any new socket connection events. */ - ConfigurePostmasterWaitSet(false); + /* + * Was it the wal receiver? If exit status is zero (normal) or one + * (FATAL exit), we assume everything is all right just like normal + * backends. (If we need a new wal receiver, we'll start one at the + * next iteration of the postmaster's main loop.) + */ + if (WalReceiverPMChild && pid == WalReceiverPMChild->pid) + { + ReleasePostmasterChildSlot(WalReceiverPMChild); + WalReceiverPMChild = NULL; + if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus)) + HandleChildCrash(pid, exitstatus, + _("WAL receiver process")); + continue; + } /* - * PM_WAIT_DEAD_END state ends when the BackendList is entirely empty - * (ie, no dead_end children remain), and the archiver is gone too. - * - * The reason we wait for those two is to protect them against a new - * postmaster starting conflicting subprocesses; this isn't an - * ironclad protection, but it at least helps in the - * shutdown-and-immediately-restart scenario. Note that they have - * already been sent appropriate shutdown signals, either during a - * normal state transition leading up to PM_WAIT_DEAD_END, or during - * FatalError processing. + * Was it the wal summarizer? Normal exit can be ignored; we'll start + * a new one at the next iteration of the postmaster's main loop, if + * necessary. Any other exit condition is treated as a crash. */ - if (dlist_is_empty(&BackendList) && PgArchPID == 0) + if (WalSummarizerPMChild && pid == WalSummarizerPMChild->pid) { - /* These other guys should be dead already */ - Assert(StartupPID == 0); - Assert(WalReceiverPID == 0); - Assert(WalSummarizerPID == 0); - Assert(BgWriterPID == 0); - Assert(CheckpointerPID == 0); - Assert(WalWriterPID == 0); - Assert(AutoVacPID == 0); - Assert(SlotSyncWorkerPID == 0); - /* syslogger is not considered here */ - pmState = PM_NO_CHILDREN; + ReleasePostmasterChildSlot(WalSummarizerPMChild); + WalSummarizerPMChild = NULL; + if (!EXIT_STATUS_0(exitstatus)) + HandleChildCrash(pid, exitstatus, + _("WAL summarizer process")); + continue; } - } - /* - * If we've been told to shut down, we exit as soon as there are no - * remaining children. If there was a crash, cleanup will occur at the - * next startup. (Before PostgreSQL 8.3, we tried to recover from the - * crash before exiting, but that seems unwise if we are quitting because - * we got SIGTERM from init --- there may well not be time for recovery - * before init decides to SIGKILL us.) - * - * Note that the syslogger continues to run. It will exit when it sees - * EOF on its input pipe, which happens when there are no more upstream - * processes. - */ - if (Shutdown > NoShutdown && pmState == PM_NO_CHILDREN) - { - if (FatalError) + /* + * Was it the autovacuum launcher? Normal exit can be ignored; we'll + * start a new one at the next iteration of the postmaster's main + * loop, if necessary. Any other exit condition is treated as a + * crash. + */ + if (AutoVacLauncherPMChild && pid == AutoVacLauncherPMChild->pid) { - ereport(LOG, (errmsg("abnormal database system shutdown"))); - ExitPostmaster(1); + ReleasePostmasterChildSlot(AutoVacLauncherPMChild); + AutoVacLauncherPMChild = NULL; + if (!EXIT_STATUS_0(exitstatus)) + HandleChildCrash(pid, exitstatus, + _("autovacuum launcher process")); + continue; + } + + /* + * Was it the archiver? If exit status is zero (normal) or one (FATAL + * exit), we assume everything is all right just like normal backends + * and just try to start a new one on the next cycle of the + * postmaster's main loop, to retry archiving remaining files. + */ + if (PgArchPMChild && pid == PgArchPMChild->pid) + { + ReleasePostmasterChildSlot(PgArchPMChild); + PgArchPMChild = NULL; + if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus)) + HandleChildCrash(pid, exitstatus, + _("archiver process")); + continue; + } + + /* Was it the system logger? If so, try to start a new one */ + if (SysLoggerPMChild && pid == SysLoggerPMChild->pid) + { + ReleasePostmasterChildSlot(SysLoggerPMChild); + SysLoggerPMChild = NULL; + + /* for safety's sake, launch new logger *first* */ + if (Logging_collector) + StartSysLogger(); + + if (!EXIT_STATUS_0(exitstatus)) + LogChildExit(LOG, _("system logger process"), + pid, exitstatus); + continue; + } + + /* + * Was it the slot sync worker? Normal exit or FATAL exit can be + * ignored (FATAL can be caused by libpqwalreceiver on receiving + * shutdown request by the startup process during promotion); we'll + * start a new one at the next iteration of the postmaster's main + * loop, if necessary. Any other exit condition is treated as a crash. + */ + if (SlotSyncWorkerPMChild && pid == SlotSyncWorkerPMChild->pid) + { + ReleasePostmasterChildSlot(SlotSyncWorkerPMChild); + SlotSyncWorkerPMChild = NULL; + if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus)) + HandleChildCrash(pid, exitstatus, + _("slot sync worker process")); + continue; + } + + /* + * Was it a backend or a background worker? + */ + pmchild = FindPostmasterChildByPid(pid); + if (pmchild) + { + CleanupBackend(pmchild, exitstatus); } + + /* + * We don't know anything about this child process. That's highly + * unexpected, as we do track all the child processes that we fork. + */ else { - /* - * Normal exit from the postmaster is here. We don't need to log - * anything here, since the UnlinkLockFiles proc_exit callback - * will do so, and that should be the last user-visible action. - */ - ExitPostmaster(0); + if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus)) + HandleChildCrash(pid, exitstatus, _("untracked child process")); + else + LogChildExit(LOG, _("untracked child process"), pid, exitstatus); } + } /* loop over pending child-death reports */ + + /* + * After cleaning out the SIGCHLD queue, see if we have any state changes + * or actions to make. + */ + PostmasterStateMachine(); +} + +/* + * CleanupBackend -- cleanup after terminated backend or background worker. + * + * Remove all local state associated with the child process and release its + * PMChild slot. + */ +static void +CleanupBackend(PMChild *bp, + int exitstatus) /* child's exit status. */ +{ + char namebuf[MAXPGPATH]; + const char *procname; + bool crashed = false; + bool logged = false; + pid_t bp_pid; + bool bp_bgworker_notify; + BackendType bp_bkend_type; + RegisteredBgWorker *rw; + + /* Construct a process name for the log message */ + if (bp->bkend_type == B_BG_WORKER) + { + snprintf(namebuf, MAXPGPATH, _("background worker \"%s\""), + bp->rw->rw_worker.bgw_type); + procname = namebuf; } + else + procname = _(GetBackendTypeDesc(bp->bkend_type)); + + /* + * If a backend dies in an ugly way then we must signal all other backends + * to quickdie. If exit status is zero (normal) or one (FATAL exit), we + * assume everything is all right and proceed to remove the backend from + * the active child list. + */ + if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus)) + crashed = true; + +#ifdef WIN32 /* - * If the startup process failed, or the user does not want an automatic - * restart after backend crashes, wait for all non-syslogger children to - * exit, and then exit postmaster. We don't try to reinitialize when the - * startup process fails, because more than likely it will just fail again - * and we will keep trying forever. + * On win32, also treat ERROR_WAIT_NO_CHILDREN (128) as nonfatal case, + * since that sometimes happens under load when the process fails to start + * properly (long before it starts using shared memory). Microsoft reports + * it is related to mutex failure: + * http://archives.postgresql.org/pgsql-hackers/2010-09/msg00790.php */ - if (pmState == PM_NO_CHILDREN) + if (exitstatus == ERROR_WAIT_NO_CHILDREN) { - if (StartupStatus == STARTUP_CRASHED) - { - ereport(LOG, - (errmsg("shutting down due to startup process failure"))); - ExitPostmaster(1); - } - if (!restart_after_crash) - { - ereport(LOG, - (errmsg("shutting down because restart_after_crash is off"))); - ExitPostmaster(1); - } + LogChildExit(LOG, procname, bp->pid, exitstatus); + logged = true; + crashed = false; } +#endif /* - * If we need to recover from a crash, wait for all non-syslogger children - * to exit, then reset shmem and start the startup process. + * Release the PMChild entry. + * + * If the process attached to shared memory, this also checks that it + * detached cleanly. */ - if (FatalError && pmState == PM_NO_CHILDREN) + bp_pid = bp->pid; + bp_bgworker_notify = bp->bgworker_notify; + bp_bkend_type = bp->bkend_type; + rw = bp->rw; + if (!ReleasePostmasterChildSlot(bp)) { - ereport(LOG, - (errmsg("all server processes terminated; reinitializing"))); - - /* remove leftover temporary files after a crash */ - if (remove_temp_files_after_crash) - RemovePgTempFiles(); + /* + * Uh-oh, the child failed to clean itself up. Treat as a crash after + * all. + */ + crashed = true; + } + bp = NULL; - /* allow background workers to immediately restart */ - ResetBackgroundWorkerCrashTimes(); + if (crashed) + { + HandleChildCrash(bp_pid, exitstatus, procname); + return; + } - shmem_exit(1); + /* + * This backend may have been slated to receive SIGUSR1 when some + * background worker started or stopped. Cancel those notifications, as + * we don't want to signal PIDs that are not PostgreSQL backends. This + * gets skipped in the (probably very common) case where the backend has + * never requested any such notifications. + */ + if (bp_bgworker_notify) + BackgroundWorkerStopNotifications(bp_pid); - /* re-read control file into local memory */ - LocalProcessControlFile(true); + /* + * If it was a background worker, also update its RegisteredBgWorker + * entry. + */ + if (bp_bkend_type == B_BG_WORKER) + { + if (!EXIT_STATUS_0(exitstatus)) + { + /* Record timestamp, so we know when to restart the worker. */ + rw->rw_crashed_at = GetCurrentTimestamp(); + } + else + { + /* Zero exit status means terminate */ + rw->rw_crashed_at = 0; + rw->rw_terminate = true; + } - /* re-create shared memory and semaphores */ - CreateSharedMemoryAndSemaphores(); + rw->rw_pid = 0; + ReportBackgroundWorkerExit(rw); /* report child death */ - StartupPID = StartChildProcess(StartupProcess); - Assert(StartupPID != 0); - StartupStatus = STARTUP_RUNNING; - pmState = PM_STARTUP; - /* crash recovery started, reset SIGKILL flag */ - AbortStartTime = 0; + if (!logged) + { + LogChildExit(EXIT_STATUS_0(exitstatus) ? DEBUG1 : LOG, + procname, bp_pid, exitstatus); + logged = true; + } - /* start accepting server socket connection events again */ - ConfigurePostmasterWaitSet(true); + /* have it be restarted */ + HaveCrashedWorker = true; } -} + if (!logged) + LogChildExit(DEBUG2, procname, bp_pid, exitstatus); +} /* - * Send a signal to a postmaster child process - * - * On systems that have setsid(), each child process sets itself up as a - * process group leader. For signals that are generally interpreted in the - * appropriate fashion, we signal the entire process group not just the - * direct child process. This allows us to, for example, SIGQUIT a blocked - * archive_recovery script, or SIGINT a script being run by a backend via - * system(). + * Transition into FatalError state, in response to something bad having + * happened. Commonly the caller will have logged the reason for entering + * FatalError state. * - * There is a race condition for recently-forked children: they might not - * have executed setsid() yet. So we signal the child directly as well as - * the group. We assume such a child will handle the signal before trying - * to spawn any grandchild processes. We also assume that signaling the - * child twice will not cause any problems. + * This should only be called when not already in FatalError or + * ImmediateShutdown state. */ static void -signal_child(pid_t pid, int signal) +HandleFatalError(QuitSignalReason reason, bool consider_sigabrt) { - if (kill(pid, signal) < 0) - elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) pid, signal); -#ifdef HAVE_SETSID - switch (signal) + int sigtosend; + + Assert(!FatalError); + Assert(Shutdown != ImmediateShutdown); + + SetQuitSignalReason(reason); + + if (consider_sigabrt && send_abort_for_crash) + sigtosend = SIGABRT; + else + sigtosend = SIGQUIT; + + /* + * Signal all other child processes to exit. + * + * We could exclude dead-end children here, but at least when sending + * SIGABRT it seems better to include them. + */ + TerminateChildren(sigtosend); + + FatalError = true; + + /* + * Choose the appropriate new state to react to the fatal error. Unless we + * were already in the process of shutting down, we go through + * PM_WAIT_BACKEND. For errors during the shutdown sequence, we directly + * switch to PM_WAIT_DEAD_END. + */ + switch (pmState) { - case SIGINT: - case SIGTERM: - case SIGQUIT: - case SIGKILL: - case SIGABRT: - if (kill(-pid, signal) < 0) - elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) (-pid), signal); + case PM_INIT: + /* shouldn't have any children */ + Assert(false); break; - default: + case PM_STARTUP: + /* should have been handled in process_pm_child_exit */ + Assert(false); break; - } -#endif -} - -/* - * Convenience function for killing a child process after a crash of some - * other child process. We log the action at a higher level than we would - * otherwise do, and we apply send_abort_for_crash to decide which signal - * to send. Normally it's SIGQUIT -- and most other comments in this file - * are written on the assumption that it is -- but developers might prefer - * to use SIGABRT to collect per-child core dumps. - */ -static void -sigquit_child(pid_t pid) -{ - ereport(DEBUG2, - (errmsg_internal("sending %s to process %d", - (send_abort_for_crash ? "SIGABRT" : "SIGQUIT"), - (int) pid))); - signal_child(pid, (send_abort_for_crash ? SIGABRT : SIGQUIT)); -} -/* - * Send a signal to the targeted children (but NOT special children; - * dead_end children are never signaled, either). - */ -static bool -SignalSomeChildren(int signal, int target) -{ - dlist_iter iter; - bool signaled = false; + /* wait for children to die */ + case PM_RECOVERY: + case PM_HOT_STANDBY: + case PM_RUN: + case PM_STOP_BACKENDS: + UpdatePMState(PM_WAIT_BACKENDS); + break; - dlist_foreach(iter, &BackendList) - { - Backend *bp = dlist_container(Backend, elem, iter.cur); + case PM_WAIT_BACKENDS: + /* there might be more backends to wait for */ + break; - if (bp->dead_end) - continue; + case PM_WAIT_XLOG_SHUTDOWN: + case PM_WAIT_XLOG_ARCHIVAL: + case PM_WAIT_CHECKPOINTER: - /* - * Since target == BACKEND_TYPE_ALL is the most common case, we test - * it first and avoid touching shared memory for every child. - */ - if (target != BACKEND_TYPE_ALL) - { /* - * Assign bkend_type for any recently announced WAL Sender - * processes. + * NB: Similar code exists in PostmasterStateMachine()'s handling + * of FatalError in PM_STOP_BACKENDS/PM_WAIT_BACKENDS states. */ - if (bp->bkend_type == BACKEND_TYPE_NORMAL && - IsPostmasterChildWalSender(bp->child_slot)) - bp->bkend_type = BACKEND_TYPE_WALSND; - - if (!(target & bp->bkend_type)) - continue; - } + ConfigurePostmasterWaitSet(false); + UpdatePMState(PM_WAIT_DEAD_END); + break; - ereport(DEBUG4, - (errmsg_internal("sending signal %d to process %d", - signal, (int) bp->pid))); - signal_child(bp->pid, signal); - signaled = true; + case PM_WAIT_DEAD_END: + case PM_NO_CHILDREN: + break; } - return signaled; -} -/* - * Send a termination signal to children. This considers all of our children - * processes, except syslogger and dead_end backends. - */ -static void -TerminateChildren(int signal) -{ - SignalChildren(signal); - if (StartupPID != 0) - { - signal_child(StartupPID, signal); - if (signal == SIGQUIT || signal == SIGKILL || signal == SIGABRT) - StartupStatus = STARTUP_SIGNALED; - } - if (BgWriterPID != 0) - signal_child(BgWriterPID, signal); - if (CheckpointerPID != 0) - signal_child(CheckpointerPID, signal); - if (WalWriterPID != 0) - signal_child(WalWriterPID, signal); - if (WalReceiverPID != 0) - signal_child(WalReceiverPID, signal); - if (WalSummarizerPID != 0) - signal_child(WalSummarizerPID, signal); - if (AutoVacPID != 0) - signal_child(AutoVacPID, signal); - if (PgArchPID != 0) - signal_child(PgArchPID, signal); - if (SlotSyncWorkerPID != 0) - signal_child(SlotSyncWorkerPID, signal); + /* + * .. and if this doesn't happen quickly enough, now the clock is ticking + * for us to kill them without mercy. + */ + if (AbortStartTime == 0) + AbortStartTime = time(NULL); } /*