Commit a4a1692f authored by Greg Ames's avatar Greg Ames
Browse files

Limit the threaded mpm to quiescing one process at a time.

This is to fix a problem where the scoreboard is filled with
quiescing processes and no working processes can start, triggered by
MaxRequestsPerChild.  perform_idle_server_maintenance could theoretically
cause it as well.


git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/trunk@88913 13f79535-47bb-0310-9956-ffa450edef68
parent 693252df
Loading
Loading
Loading
Loading
+6 −1
Original line number Diff line number Diff line
Changes with Apache 2.0.18-dev
 
  *) Limit the threaded mpm to quiescing one process at a time.
     This is to fix a problem where the scoreboard is filled with
     quiescing processes and no working processes can start.
     [Greg Ames]
     
  *) Change how input filters decide how much data is returned to the
     higher filter.  We used to use a field in the conn_rec, with this
     change, we use an argument to ap_get_brigade to determine how much
+5 −0
Original line number Diff line number Diff line
@@ -189,6 +189,11 @@ typedef struct {
    ap_scoreboard_e sb_type;
    ap_generation_t running_generation;	/* the generation of children which
                                         * should still be serving requests. */
#if APR_HAS_THREADS
    pid_t quiescing_pid;                /* pid of process which is going down
                                         * due to MaxRequestsPerChild or
                                         * perform_idle_server_maintanence   */
#endif 
} global_score;

/* stuff which the parent generally writes and the children rarely read */
+36 −8
Original line number Diff line number Diff line
@@ -200,6 +200,9 @@ AP_DECLARE(apr_status_t) ap_mpm_query(int query_code, int *result)
static void clean_child_exit(int code) __attribute__ ((noreturn));
static void clean_child_exit(int code)
{
    if (ap_scoreboard_image->global.quiescing_pid == ap_my_pid) {
        ap_scoreboard_image->global.quiescing_pid = 0;
    }
    if (pchild) {
	apr_pool_destroy(pchild);
    }
@@ -211,6 +214,12 @@ static void sig_coredump(int sig)
{
    chdir(ap_coredump_dir);
    apr_signal(sig, SIG_DFL);
    
    /* clean up so that other processes can quiesce */
    if (ap_scoreboard_image->global.quiescing_pid == ap_my_pid) {
        ap_scoreboard_image->global.quiescing_pid = 0;
    }
    
    kill(ap_my_pid, sig);
    /* At this point we've got sig blocked, because we're still inside
     * the signal handler.  When we leave the signal handler it will
@@ -487,7 +496,9 @@ static void * worker_thread(void * dummy)
    /* TODO: Switch to a system where threads reuse the results from earlier
       poll calls - manoj */
    while (1) {
        workers_may_exit |= (ap_max_requests_per_child != 0) && (requests_this_child <= 0);
        workers_may_exit |= (ap_max_requests_per_child != 0) 
                             && (requests_this_child <= 0)
                             && (ap_scoreboard_image->global.quiescing_pid == 0);
        if (workers_may_exit) break;

        (void) ap_update_child_status(process_slot, thread_slot, SERVER_READY, 
@@ -581,14 +592,22 @@ static void * worker_thread(void * dummy)
        apr_pool_clear(ptrans);
    }

    if (ap_scoreboard_image->global.quiescing_pid == 0) {
        /* yeah, I realize there's a race condition here, but it works 
         * out OK without serialization                               */ 
        ap_scoreboard_image->global.quiescing_pid = ap_my_pid;   
    }
    apr_pool_destroy(tpool);
    ap_update_child_status(process_slot, thread_slot, SERVER_DEAD,
        (request_rec *) NULL);
    apr_lock_acquire(worker_thread_count_mutex);
    worker_thread_count--;
    if (worker_thread_count == 0) {
        /* All the threads have exited, now finish the shutdown process
         * by signalling the sigwait thread */
        /* All the threads have exited, now finish the shutdown process */
        if (ap_scoreboard_image->global.quiescing_pid == ap_my_pid) {
            ap_scoreboard_image->global.quiescing_pid = 0;
        }
        /* signal the sigwait thread */
        kill(ap_my_pid, SIGTERM);
    }
    apr_lock_release(worker_thread_count_mutex);
@@ -858,11 +877,18 @@ static void perform_idle_server_maintenance(void)
	        ++idle_thread_addition;
	    }
	}
	if (all_dead_threads && free_length < idle_spawn_rate) {
	if (all_dead_threads) {
            pid_t dead_pid = ap_scoreboard_image->parent[i].pid; 
            if (ap_scoreboard_image->global.quiescing_pid == dead_pid) {
                /* shouldn't happen, but just in case... */
                ap_scoreboard_image->global.quiescing_pid = 0;
            }
            if (free_length < idle_spawn_rate) {
	        free_slots[free_length] = i;
	        ++free_length;
            }
	if (!all_dead_threads) {
	}
	else {                       /* ! all_dead_threads */
            last_non_dead = i;
	}
        if (!any_dying_threads) {
@@ -872,7 +898,8 @@ static void perform_idle_server_maintenance(void)
    }
    ap_max_daemons_limit = last_non_dead + 1;

    if (idle_thread_count > max_spare_threads) {
    if (idle_thread_count > max_spare_threads 
        &&  ap_scoreboard_image->global.quiescing_pid == 0) {
        /* Kill off one child */
        char char_of_death = '!';
        if ((rv = apr_file_write(pipe_of_death_out, &char_of_death, &one)) != APR_SUCCESS) {
@@ -1114,6 +1141,7 @@ int ap_mpm_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s)
     */
    ++ap_my_generation;
    ap_scoreboard_image->global.running_generation = ap_my_generation;
    ap_scoreboard_image->global.quiescing_pid = 0;
    update_scoreboard_global();

    if (is_graceful) {