Commit c4c37e5b authored by Jeff Trawick's avatar Jeff Trawick
Browse files

merge these fixes from 2.1-dev:

  *) worker MPM: Fix a problem which could cause httpd processes to
     remain active after shutdown.  [Jeff Trawick]

  *) Unix MPMs: Shut down the server more quickly when child processes are
     slow to exit.  [Joe Orton, Jeff Trawick]

Reviewed by: stoddard, striker


git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/branches/2.0.x@159470 13f79535-47bb-0310-9956-ffa450edef68
parent 0e1155c6
Loading
Loading
Loading
Loading
+6 −0
Original line number Diff line number Diff line
Changes with Apache 2.0.54
  *) worker MPM: Fix a problem which could cause httpd processes to
     remain active after shutdown.  [Jeff Trawick]
  *) Unix MPMs: Shut down the server more quickly when child processes are
     slow to exit.  [Joe Orton, Jeff Trawick]
  *) Remove formatting characters from ap_log_error() calls.  These
     were escaped as fallout from CAN-2003-0020.
     [Eric Covener <ecovener gmail.com>]
+0 −9
Original line number Diff line number Diff line
@@ -209,15 +209,6 @@ PATCHES TO BACKPORT FROM TRUNK:
                   it as-is.  For the one or two platforms that don't like 
                   which, they can write their own version of the script.

    * worker MPM: Fix a problem which could cause httpd processes to
      remain active after shutdown.  (Reliability issue.)
      Unix MPMs: Shut down the server more quickly when child processes are
      slow to exit.  (Nice-to-have, but code intersects with the
      reliability issue)
      http://svn.apache.org/viewcvs.cgi?rev=109510&view=rev
      http://svn.apache.org/viewcvs.cgi?rev=105195&view=rev
      +1: trawick, stoddard, striker

    * modules/http/http_request.c (ap_internal_fast_redirect): Take over
      important members of the subrequest. Especially the proxyreq copying
      is interesting for proxying DirectoryIndex'd resources:
+32 −1
Original line number Diff line number Diff line
@@ -60,7 +60,7 @@ extern "C" {
 * Make sure all child processes that have been spawned by the parent process
 * have died.  This includes process registered as "other_children".
 * @warning This is only defined if the MPM defines 
 *          MPM_NEEDS_RECLAIM_CHILD_PROCESS
 *          AP_MPM_WANT_RECLAIM_CHILD_PROCESSES
 * @param terminate Either 1 or 0.  If 1, send the child processes SIGTERM
 *        each time through the loop.  If 0, give the process time to die
 *        on its own before signalling it.
@@ -68,11 +68,42 @@ extern "C" {
 *  MPM_CHILD_PID -- Get the pid from the specified spot in the scoreboard
 *  MPM_NOTE_CHILD_KILLED -- Note the child died in the scoreboard
 * </pre>
 * @tip The MPM child processes which are reclaimed are those listed
 * in the scoreboard as well as those currently registered via
 * ap_register_extra_mpm_process().
 */
#ifdef AP_MPM_WANT_RECLAIM_CHILD_PROCESSES
void ap_reclaim_child_processes(int terminate);
#endif

/**
 * Tell ap_reclaim_child_processes() about an MPM child process which has no
 * entry in the scoreboard.
 * @warning This is only defined if the MPM defines
 *          AP_MPM_WANT_RECLAIM_CHILD_PROCESSES
 * @param pid The process id of an MPM child process which should be
 * reclaimed when ap_reclaim_child_processes() is called.
 * @tip If an extra MPM child process terminates prior to calling
 * ap_reclaim_child_processes(), remove it from the list of such processes
 * by calling ap_unregister_extra_mpm_process().
 */
#ifdef AP_MPM_WANT_RECLAIM_CHILD_PROCESSES
void ap_register_extra_mpm_process(pid_t pid);
#endif

/**
 * Unregister an MPM child process which was previously registered by a
 * call to ap_register_extra_mpm_process().
 * @warning This is only defined if the MPM defines
 *          AP_MPM_WANT_RECLAIM_CHILD_PROCESSES
 * @param pid The process id of an MPM child process which no longer needs to
 * be reclaimed.
 * @return 1 if the process was found and removed, 0 otherwise
 */
#ifdef AP_MPM_WANT_RECLAIM_CHILD_PROCESSES
int ap_unregister_extra_mpm_process(pid_t pid);
#endif

/**
 * Determine if any child process has died.  If no child process died, then
 * this process sleeps for the amount of time specified by the MPM defined
+18 −0
Original line number Diff line number Diff line
@@ -1285,6 +1285,21 @@ static int make_child(server_rec *s, int slot)
        clean_child_exit(0);
    }
    /* else */
    if (ap_scoreboard_image->parent[slot].pid != 0) {
        /* This new child process is squatting on the scoreboard
         * entry owned by an exiting child process, which cannot
         * exit until all active requests complete.
         * Don't forget about this exiting child process, or we
         * won't be able to kill it if it doesn't exit by the
         * time the server is shut down.
         */
        ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
                     "taking over scoreboard slot from %" APR_PID_T_FMT "%s",
                     ap_scoreboard_image->parent[slot].pid,
                     ap_scoreboard_image->parent[slot].quiescing ?
                         " (quiescing)" : "");
        ap_register_extra_mpm_process(ap_scoreboard_image->parent[slot].pid);
    }
    ap_scoreboard_image->parent[slot].quiescing = 0;
    ap_scoreboard_image->parent[slot].pid = pid;
    return 0;
@@ -1499,6 +1514,9 @@ static void server_main_loop(int remaining_children_to_start)
                    make_child(ap_server_conf, child_slot);
                    --remaining_children_to_start;
                }
            }
            else if (ap_unregister_extra_mpm_process(pid.pid) == 1) {
                /* handled */
#if APR_HAS_OTHER_CHILD
            }
            else if (apr_proc_other_child_read(&pid, status) == 0) {
+170 −72
Original line number Diff line number Diff line
@@ -60,68 +60,84 @@
#endif

#ifdef AP_MPM_WANT_RECLAIM_CHILD_PROCESSES
void ap_reclaim_child_processes(int terminate)

typedef enum {DO_NOTHING, SEND_SIGTERM, SEND_SIGKILL, GIVEUP} action_t;

typedef struct extra_process_t {
    struct extra_process_t *next;
    pid_t pid;
} extra_process_t;

static extra_process_t *extras;

void ap_register_extra_mpm_process(pid_t pid)
{
    int i;
    long int waittime = 1024 * 16;      /* in usecs */
    apr_status_t waitret;
    int tries;
    int not_dead_yet;
    int max_daemons;
    extra_process_t *p = (extra_process_t *)malloc(sizeof(extra_process_t));

    ap_mpm_query(AP_MPMQ_MAX_DAEMON_USED, &max_daemons);
    p->next = extras;
    p->pid = pid;
    extras = p;
}

    for (tries = terminate ? 4 : 1; tries <= 9; ++tries) {
        /* don't want to hold up progress any more than
         * necessary, but we need to allow children a few moments to exit.
         * Set delay with an exponential backoff.
         */
        apr_sleep(waittime);
        waittime = waittime * 4;
int ap_unregister_extra_mpm_process(pid_t pid)
{
    extra_process_t *cur = extras;
    extra_process_t *prev = NULL;

        /* now see who is done */
        not_dead_yet = 0;
        for (i = 0; i < max_daemons; ++i) {
            pid_t pid = MPM_CHILD_PID(i);
            apr_proc_t proc;
    while (cur && cur->pid != pid) {
        prev = cur;
        cur = cur->next;
    }

            if (pid == 0)
                continue;
    if (cur) {
        if (prev) {
            prev->next = cur->next;
        }
        else {
            extras = cur->next;
        }
        free(cur);
        return 1; /* found */
    }
    else {
        /* we don't know about any such process */
        return 0;
    }
}

static int reclaim_one_pid(pid_t pid, action_t action)
{
    apr_proc_t proc;
    apr_status_t waitret;

    proc.pid = pid;
    waitret = apr_proc_wait(&proc, NULL, NULL, APR_NOWAIT);
    if (waitret != APR_CHILD_NOTDONE) {
                MPM_NOTE_CHILD_KILLED(i);
                continue;
        return 1;
    }

            ++not_dead_yet;
            switch (tries) {
            case 1:     /*  16ms */
            case 2:     /*  82ms */
            case 3:     /* 344ms */
            case 4:     /*  16ms */
    switch(action) {
    case DO_NOTHING:
        break;
        
            case 5:     /*  82ms */
            case 6:     /* 344ms */
            case 7:     /* 1.4sec */
    case SEND_SIGTERM:
        /* ok, now it's being annoying */
        ap_log_error(APLOG_MARK, APLOG_WARNING,
                     0, ap_server_conf,
                             "child process %ld still did not exit, "
                     "child process %" APR_PID_T_FMT
                     " still did not exit, "
                     "sending a SIGTERM",
                             (long)pid);
                     pid);
        kill(pid, SIGTERM);
        break;
        
            case 8:     /*  6 sec */
                /* die child scum */
    case SEND_SIGKILL:
        ap_log_error(APLOG_MARK, APLOG_ERR,
                     0, ap_server_conf,
                             "child process %ld still did not exit, "
                     "child process %" APR_PID_T_FMT
                     " still did not exit, "
                     "sending a SIGKILL",
                             (long)pid);
                     pid);
#ifndef BEOS
        kill(pid, SIGKILL);
#else
@@ -135,7 +151,7 @@ void ap_reclaim_child_processes(int terminate)
#endif
        break;
                
            case 9:     /* 14 sec */
    case GIVEUP:
        /* gave it our best shot, but alas...  If this really
         * is a child we are trying to kill and it really hasn't
         * exited, we will likely fail to bind to the port
@@ -143,22 +159,104 @@ void ap_reclaim_child_processes(int terminate)
         */
        ap_log_error(APLOG_MARK, APLOG_ERR,
                     0, ap_server_conf,
                             "could not make child process %ld exit, "
                     "could not make child process %" APR_PID_T_FMT
                     " exit, "
                     "attempting to continue anyway",
                             (long)pid);
                     pid);
        break;
    }
    
    return 0;
}

void ap_reclaim_child_processes(int terminate)
{
    apr_time_t waittime = 1024 * 16;
    int i;
    extra_process_t *cur_extra;
    int not_dead_yet;
    int max_daemons;
    apr_time_t starttime = apr_time_now();
    /* this table of actions and elapsed times tells what action is taken
     * at which elapsed time from starting the reclaim
     */
    struct {
        action_t action;
        apr_time_t action_time;
    } action_table[] = {
        {DO_NOTHING, 0}, /* dummy entry for iterations where we reap
                          * children but take no action against
                          * stragglers
                          */
        {SEND_SIGTERM, apr_time_from_sec(3)},
        {SEND_SIGTERM, apr_time_from_sec(5)},
        {SEND_SIGTERM, apr_time_from_sec(7)},
        {SEND_SIGKILL, apr_time_from_sec(9)},
        {GIVEUP,       apr_time_from_sec(10)}
    };
    int cur_action;      /* index of action we decided to take this
                          * iteration
                          */
    int next_action = 1; /* index of first real action */

    ap_mpm_query(AP_MPMQ_MAX_DAEMON_USED, &max_daemons);

    do {
        apr_sleep(waittime);
        /* don't let waittime get longer than 1 second; otherwise, we don't
         * react quickly to the last child exiting, and taking action can
         * be delayed
         */
        waittime = waittime * 4;
        if (waittime > apr_time_from_sec(1)) {
            waittime = apr_time_from_sec(1);
        }

        /* see what action to take, if any */
        if (action_table[next_action].action_time <= apr_time_now() - starttime) {
            cur_action = next_action;
            ++next_action;
        }
        else {
            cur_action = 0; /* nothing to do */
        }

        /* now see who is done */
        not_dead_yet = 0;
        for (i = 0; i < max_daemons; ++i) {
            pid_t pid = MPM_CHILD_PID(i);

            if (pid == 0) {
                continue; /* not every scoreboard entry is in use */
            }

            if (reclaim_one_pid(pid, action_table[cur_action].action)) {
                MPM_NOTE_CHILD_KILLED(i);
            }
            else {
                ++not_dead_yet;
            }
        }
 
        cur_extra = extras;
        while (cur_extra) {
            extra_process_t *next = cur_extra->next;

            if (reclaim_one_pid(cur_extra->pid, action_table[cur_action].action)) {
                AP_DEBUG_ASSERT(1 == ap_unregister_extra_mpm_process(cur_extra->pid));
            }
            else {
                ++not_dead_yet;
            }
            cur_extra = next;
        }

#if APR_HAS_OTHER_CHILD
        apr_proc_other_child_check();
#endif

        if (!not_dead_yet) {
            /* nothing left to wait for */
            break;
        }
    }
    } while (not_dead_yet > 0 &&
             action_table[cur_action].action != GIVEUP);
}
#endif /* AP_MPM_WANT_RECLAIM_CHILD_PROCESSES */