Commit d8f3229c authored by Jeff Trawick's avatar Jeff Trawick
Browse files

worker MPM: Fix a problem which could cause httpd processes to

remain active after shutdown.

The problem occurred when a scoreboard entry currently
in use by an exiting child process was used for a new child
process.  At that point, the MPM forgot about the exiting
child process, so ap_reclaim_child_processes() wouldn't be
able to forceably terminate it.

(An exiting child process may *never* exit due to a stuck
or long-running request being handled on one of the threads.)


git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/trunk@109510 13f79535-47bb-0310-9956-ffa450edef68
parent 78fd4529
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -2,6 +2,9 @@ Changes with Apache 2.1.2-dev
  [Remove entries to the current 2.0 section below, when backported]
  *) worker MPM: Fix a problem which could cause httpd processes to
     remain active after shutdown.  [Jeff Trawick]
  *) core: Error out on sections that are missing an argument instead of
     silently consuming the section. PR 25460.
     [Geoffrey Young, Paul Querna]
+32 −1
Original line number Diff line number Diff line
@@ -59,7 +59,7 @@ extern "C" {
 * Make sure all child processes that have been spawned by the parent process
 * have died.  This includes process registered as "other_children".
 * @warning This is only defined if the MPM defines 
 *          MPM_NEEDS_RECLAIM_CHILD_PROCESS
 *          AP_MPM_WANT_RECLAIM_CHILD_PROCESSES
 * @param terminate Either 1 or 0.  If 1, send the child processes SIGTERM
 *        each time through the loop.  If 0, give the process time to die
 *        on its own before signalling it.
@@ -67,11 +67,42 @@ extern "C" {
 *  MPM_CHILD_PID -- Get the pid from the specified spot in the scoreboard
 *  MPM_NOTE_CHILD_KILLED -- Note the child died in the scoreboard
 * </pre>
 * @tip The MPM child processes which are reclaimed are those listed
 * in the scoreboard as well as those currently registered via
 * ap_register_extra_mpm_process().
 */
#ifdef AP_MPM_WANT_RECLAIM_CHILD_PROCESSES
void ap_reclaim_child_processes(int terminate);
#endif

/**
 * Tell ap_reclaim_child_processes() about an MPM child process which has no
 * entry in the scoreboard.
 * @warning This is only defined if the MPM defines
 *          AP_MPM_WANT_RECLAIM_CHILD_PROCESSES
 * @param pid The process id of an MPM child process which should be
 * reclaimed when ap_reclaim_child_processes() is called.
 * @tip If an extra MPM child process terminates prior to calling
 * ap_reclaim_child_processes(), remove it from the list of such processes
 * by calling ap_unregister_extra_mpm_process().
 */
#ifdef AP_MPM_WANT_RECLAIM_CHILD_PROCESSES
void ap_register_extra_mpm_process(pid_t pid);
#endif

/**
 * Unregister an MPM child process which was previously registered by a
 * call to ap_register_extra_mpm_process().
 * @warning This is only defined if the MPM defines
 *          AP_MPM_WANT_RECLAIM_CHILD_PROCESSES
 * @param pid The process id of an MPM child process which no longer needs to
 * be reclaimed.
 * @return 1 if the process was found and removed, 0 otherwise
 */
#ifdef AP_MPM_WANT_RECLAIM_CHILD_PROCESSES
int ap_unregister_extra_mpm_process(pid_t pid);
#endif

/**
 * Determine if any child process has died.  If no child process died, then
 * this process sleeps for the amount of time specified by the MPM defined
+18 −0
Original line number Diff line number Diff line
@@ -1310,6 +1310,21 @@ static int make_child(server_rec *s, int slot)
        clean_child_exit(0);
    }
    /* else */
    if (ap_scoreboard_image->parent[slot].pid != 0) {
        /* This new child process is squatting on the scoreboard
         * entry owned by an exiting child process, which cannot
         * exit until all active requests complete.
         * Don't forget about this exiting child process, or we
         * won't be able to kill it if it doesn't exit by the
         * time the server is shut down.
         */
        ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
                     "taking over scoreboard slot from %" APR_PID_T_FMT "%s",
                     ap_scoreboard_image->parent[slot].pid,
                     ap_scoreboard_image->parent[slot].quiescing ?
                         " (quiescing)" : "");
        ap_register_extra_mpm_process(ap_scoreboard_image->parent[slot].pid);
    }
    ap_scoreboard_image->parent[slot].quiescing = 0;
    ap_scoreboard_image->parent[slot].pid = pid;
    return 0;
@@ -1524,6 +1539,9 @@ static void server_main_loop(int remaining_children_to_start)
                    make_child(ap_server_conf, child_slot);
                    --remaining_children_to_start;
                }
            }
            else if (ap_unregister_extra_mpm_process(pid.pid) == 1) {
                /* handled */
#if APR_HAS_OTHER_CHILD
            }
            else if (apr_proc_other_child_alert(&pid, APR_OC_REASON_DEATH,
+129 −59
Original line number Diff line number Diff line
@@ -59,76 +59,63 @@
#endif

#ifdef AP_MPM_WANT_RECLAIM_CHILD_PROCESSES
void ap_reclaim_child_processes(int terminate)

typedef enum {DO_NOTHING, SEND_SIGTERM, SEND_SIGKILL, GIVEUP} action_t;

typedef struct extra_process_t {
    struct extra_process_t *next;
    pid_t pid;
} extra_process_t;

static extra_process_t *extras;

void ap_register_extra_mpm_process(pid_t pid)
{
    apr_time_t waittime = 1024 * 16;
    apr_status_t waitret;
    int i;
    int not_dead_yet;
    int max_daemons;
    apr_time_t starttime = apr_time_now();
    /* this table of actions and elapsed times tells what action is taken
     * at which elapsed time from starting the reclaim
     */
    struct {
        enum {DO_NOTHING, SEND_SIGTERM, SEND_SIGKILL, GIVEUP} action;
        apr_time_t action_time;
    } action_table[] = {
        {DO_NOTHING, 0}, /* dummy entry for iterations where we reap
                          * children but take no action against
                          * stragglers
                          */
        {SEND_SIGTERM, apr_time_from_sec(3)},
        {SEND_SIGTERM, apr_time_from_sec(5)},
        {SEND_SIGTERM, apr_time_from_sec(7)},
        {SEND_SIGKILL, apr_time_from_sec(9)},
        {GIVEUP,       apr_time_from_sec(10)}
    };
    int cur_action;      /* index of action we decided to take this
                          * iteration
                          */
    int next_action = 1; /* index of first real action */
    extra_process_t *p = (extra_process_t *)malloc(sizeof(extra_process_t));

    ap_mpm_query(AP_MPMQ_MAX_DAEMON_USED, &max_daemons);
    p->next = extras;
    p->pid = pid;
    extras = p;
}

    do {
        apr_sleep(waittime);
        /* don't let waittime get longer than 1 second; otherwise, we don't
         * react quickly to the last child exiting, and taking action can
         * be delayed
         */
        waittime = waittime * 4;
        if (waittime > apr_time_from_sec(1)) {
            waittime = apr_time_from_sec(1);
int ap_unregister_extra_mpm_process(pid_t pid)
{
    extra_process_t *cur = extras;
    extra_process_t *prev = NULL;

    while (cur && cur->pid != pid) {
        prev = cur;
        cur = cur->next;
    }

        /* see what action to take, if any */
        if (action_table[next_action].action_time <= apr_time_now() - starttime) {
            cur_action = next_action;
            ++next_action;
    if (cur) {
        if (prev) {
            prev->next = cur->next;
        }
        else {
            cur_action = 0; /* nothing to do */
            extras = cur->next;
        }
        free(cur);
        return 1; /* found */
    }
    else {
        /* we don't know about any such process */
        return 0;
    }
}

        /* now see who is done */
        not_dead_yet = 0;
        for (i = 0; i < max_daemons; ++i) {
            pid_t pid = MPM_CHILD_PID(i);
static int reclaim_one_pid(pid_t pid, action_t action)
{
    apr_proc_t proc;

            if (pid == 0)
                continue;
    apr_status_t waitret;

    proc.pid = pid;
    waitret = apr_proc_wait(&proc, NULL, NULL, APR_NOWAIT);
    if (waitret != APR_CHILD_NOTDONE) {
                MPM_NOTE_CHILD_KILLED(i);
                continue;
        return 1;
    }

            ++not_dead_yet;
            switch(action_table[cur_action].action) {
    switch(action) {
    case DO_NOTHING:
        break;
        
@@ -177,8 +164,91 @@ void ap_reclaim_child_processes(int terminate)
                     pid);
        break;
    }
    
    return 0;
}

void ap_reclaim_child_processes(int terminate)
{
    apr_time_t waittime = 1024 * 16;
    int i;
    extra_process_t *cur_extra;
    int not_dead_yet;
    int max_daemons;
    apr_time_t starttime = apr_time_now();
    /* this table of actions and elapsed times tells what action is taken
     * at which elapsed time from starting the reclaim
     */
    struct {
        action_t action;
        apr_time_t action_time;
    } action_table[] = {
        {DO_NOTHING, 0}, /* dummy entry for iterations where we reap
                          * children but take no action against
                          * stragglers
                          */
        {SEND_SIGTERM, apr_time_from_sec(3)},
        {SEND_SIGTERM, apr_time_from_sec(5)},
        {SEND_SIGTERM, apr_time_from_sec(7)},
        {SEND_SIGKILL, apr_time_from_sec(9)},
        {GIVEUP,       apr_time_from_sec(10)}
    };
    int cur_action;      /* index of action we decided to take this
                          * iteration
                          */
    int next_action = 1; /* index of first real action */

    ap_mpm_query(AP_MPMQ_MAX_DAEMON_USED, &max_daemons);

    do {
        apr_sleep(waittime);
        /* don't let waittime get longer than 1 second; otherwise, we don't
         * react quickly to the last child exiting, and taking action can
         * be delayed
         */
        waittime = waittime * 4;
        if (waittime > apr_time_from_sec(1)) {
            waittime = apr_time_from_sec(1);
        }

        /* see what action to take, if any */
        if (action_table[next_action].action_time <= apr_time_now() - starttime) {
            cur_action = next_action;
            ++next_action;
        }
        else {
            cur_action = 0; /* nothing to do */
        }

        /* now see who is done */
        not_dead_yet = 0;
        for (i = 0; i < max_daemons; ++i) {
            pid_t pid = MPM_CHILD_PID(i);

            if (pid == 0) {
                continue; /* not every scoreboard entry is in use */
            }

            if (reclaim_one_pid(pid, action_table[cur_action].action)) {
                MPM_NOTE_CHILD_KILLED(i);
            }
            else {
                ++not_dead_yet;
            }
        }

        cur_extra = extras;
        while (cur_extra) {
            extra_process_t *next = cur_extra->next;

            if (reclaim_one_pid(cur_extra->pid, action_table[cur_action].action)) {
                AP_DEBUG_ASSERT(1 == ap_unregister_extra_mpm_process(cur_extra->pid));
            }
            else {
                ++not_dead_yet;
            }
            cur_extra = next;
        }
#if APR_HAS_OTHER_CHILD
        apr_proc_other_child_refresh_all(APR_OC_REASON_RESTART);
#endif