Commit 27677a43 authored by Justin Erenkrantz's avatar Justin Erenkrantz
Browse files

Input filtering rewrite. Consolidate how we handle HTTP input parsing by

rearranging and rethinking some things.  The net result is that the HTTP
filter is now a request filter and is now only responsible for HTTP things.
The core input filter is now responsible for handling all of the dirty work.

Highlights:
- Removes the dechunk filter and merges it with ap_http_filter (aka HTTP_IN).
  The dechunk filter was incorrectly handling certain cases (trailers).
- Moves ap_http_filter from a connection filter to a request filter
  to support the consolidation above (it needs header info).
- Change support code to allow the http_filter to be a
  request filter (how the request is setup initially).
- Move most of the logic from HTTP_IN to CORE_IN (core_input_filter).
  HTTP_IN is now only concerned about HTTP things.  The core filter
  is now responsible for returning data.  It is impossible to
  consolidate dechunk and http without this because HTTP_IN previously
  buffered data.  As Greg has suggested, it may make sense to write
  some brigade functions that handle input (getline).  It should be
  fairly trivial to add these.  Some of the calls in ap_http_filter
  could be switched as well.

This is the original patch as submitted to dev@httpd on Monday, Sep.
24th.  Additional comments and some minor tweaks done after that
submission are coming up next.  This should allow people who reviewed
the original patch to see what has changed and review them piecemeal.

This test passes all current tests in httpd-test.  Please perform
chicken sacrifices to verify that this hasn't blown up your favorite
input.

Reviewed by:	Greg Stein, Ryan Bloom, and Cliff Woolley (buckets)


git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/trunk@91189 13f79535-47bb-0310-9956-ffa450edef68
parent bed39e1a
Loading
Loading
Loading
Loading
+10 −2
Original line number Diff line number Diff line
@@ -262,7 +262,6 @@ static apr_port_t http_port(const request_rec *r)

static int ap_pre_http_connection(conn_rec *c)
{
    ap_add_input_filter("HTTP_IN", NULL, NULL, c);
    ap_add_input_filter("CORE_IN", NULL, NULL, c);
    ap_add_output_filter("CORE", NULL, NULL, c);
    return OK;
@@ -302,6 +301,15 @@ static int ap_process_http_connection(conn_rec *c)
    return OK;
}

static int ap_http_create_req(request_rec *r)
{
    if (!r->main)
    {
        ap_add_input_filter("HTTP_IN", NULL, r, r->connection);
    }
    return OK;
}

static void ap_http_insert_filter(request_rec *r)
{
    if (!r->main) {
@@ -320,10 +328,10 @@ static void register_hooks(apr_pool_t *p)
    ap_hook_map_to_storage(ap_send_http_trace,NULL,NULL,APR_HOOK_MIDDLE);
    ap_hook_http_method(http_method,NULL,NULL,APR_HOOK_REALLY_LAST);
    ap_hook_default_port(http_port,NULL,NULL,APR_HOOK_REALLY_LAST);
    ap_hook_create_request(ap_http_create_req, NULL, NULL, APR_HOOK_MIDDLE);

    ap_hook_insert_filter(ap_http_insert_filter, NULL, NULL, APR_HOOK_REALLY_LAST);
    ap_register_input_filter("HTTP_IN", ap_http_filter, AP_FTYPE_CONNECTION);
    ap_register_input_filter("DECHUNK", ap_dechunk_filter, AP_FTYPE_TRANSCODE);
    ap_register_output_filter("HTTP_HEADER", ap_http_header_filter, 
                              AP_FTYPE_HTTP_HEADER);
    ap_register_output_filter("CHUNK", chunk_filter, AP_FTYPE_TRANSCODE);
+83 −232
Original line number Diff line number Diff line
@@ -481,265 +481,117 @@ AP_DECLARE(const char *) ap_method_name_of(int methnum)
    return AP_HTTP_METHODS[methnum];
}

struct dechunk_ctx {
    apr_size_t chunk_size;
    apr_size_t bytes_delivered;
static long get_chunk_size(char *);

typedef struct http_filter_ctx {
    int status;
    apr_size_t remaining;
    enum {
        WANT_HDR /* must have value zero */,
        WANT_BODY,
        WANT_TRL
        BODY_NONE   /* must have value of zero */,
        BODY_LENGTH,
        BODY_CHUNK
    } state;
};

static long get_chunk_size(char *);
} http_ctx_t;

apr_status_t ap_dechunk_filter(ap_filter_t *f, apr_bucket_brigade *bb,
                               ap_input_mode_t mode, apr_off_t *readbytes)
/* Hi, I'm the main input filter for HTTP requests. 
 * I handle chunked and content-length bodies. */
apr_status_t ap_http_filter(ap_filter_t *f, apr_bucket_brigade *b, ap_input_mode_t mode, apr_off_t *readbytes)
{
    apr_bucket *e;
    http_ctx_t *ctx = f->ctx;
    apr_status_t rv;
    struct dechunk_ctx *ctx = f->ctx;
    apr_bucket *b;
    const char *buf;
    apr_size_t len;

    if (!ctx) {
        f->ctx = ctx = apr_pcalloc(f->r->pool, sizeof(struct dechunk_ctx));
        f->ctx = ctx = apr_pcalloc(f->r->pool, sizeof(*ctx));
        ctx->status = f->r->status;
    }

    do {
        if (ctx->chunk_size == ctx->bytes_delivered) {
            /* Time to read another chunk header or trailer...  ap_http_filter() is 
             * the next filter in line and it knows how to return a brigade with 
             * one line.
             */
    /* Basically, we have to stay out of the way until server/protocol.c
     * says it is okay - which it does by setting r->status to OK. */
    if (f->r->status != ctx->status)
    {
        int old_status;
        /* Allow us to be reentrant! */
        old_status = ctx->status;
        ctx->status = f->r->status;

        if (old_status == HTTP_REQUEST_TIME_OUT && f->r->status == HTTP_OK)
        {
            const char *tenc, *lenp;
            tenc = apr_table_get(f->r->headers_in, "Transfer-Encoding");
            lenp = apr_table_get(f->r->headers_in, "Content-Length");

            if (tenc) {
                if (!strcasecmp(tenc, "chunked")) {
                    char line[30];
            
            if ((rv = ap_getline(line, sizeof(line), f->r,
                                 0 /* readline */)) < 0) {
                    if ((rv = ap_getline(line, sizeof(line), f->r, 0)) < 0) {
                        return rv;
                    }
            switch(ctx->state) {
            case WANT_HDR:
                ctx->chunk_size = get_chunk_size(line);
                ctx->bytes_delivered = 0;
                if (ctx->chunk_size == 0) {
                    ctx->state = WANT_TRL;
                }
                else {
                    ctx->state = WANT_BODY;
                }
                break;
            case WANT_TRL:
                /* XXX sanity check end chunk here */
                if (strlen(line)) {
                    /* bad trailer */
                }
                if (ctx->chunk_size == 0) { /* we just finished the last chunk? */
                    /* ### woah... ap_http_filter() is doing this, too */
                    /* append eos bucket and get out */
                    b = apr_bucket_eos_create();
                    APR_BRIGADE_INSERT_TAIL(bb, b);
                    return APR_SUCCESS;
                }
                ctx->state = WANT_HDR;
                break;
            default:
                ap_assert(ctx->state == WANT_HDR || ctx->state == WANT_TRL);
                    ctx->state = BODY_CHUNK;
                    ctx->remaining = get_chunk_size(line);
                }
            }
    } while (ctx->state != WANT_BODY);

    if (ctx->state == WANT_BODY) {
        /* Tell ap_http_filter() how many bytes to deliver. */
        apr_off_t chunk_bytes = ctx->chunk_size - ctx->bytes_delivered;
            else if (lenp) {
                const char *pos = lenp;

        if ((rv = ap_get_brigade(f->next, bb, mode,
                                 &chunk_bytes)) != APR_SUCCESS) {
            return rv;
                while (apr_isdigit(*pos) || apr_isspace(*pos))
                    ++pos;
                if (*pos == '\0') {
                    ctx->state = BODY_LENGTH;
                    ctx->remaining = atol(lenp);
                }

        /* Walk through the body, accounting for bytes, and removing an eos
         * bucket if ap_http_filter() delivered the entire chunk.
         *
         * ### this shouldn't be necessary. 1) ap_http_filter shouldn't be
         * ### adding EOS buckets. 2) it shouldn't return more bytes than
         * ### we requested, therefore the total len can be found with a
         * ### simple call to apr_brigade_length(). no further munging
         * ### would be needed.
         */
        b = APR_BRIGADE_FIRST(bb);
        while (b != APR_BRIGADE_SENTINEL(bb) && !APR_BUCKET_IS_EOS(b)) {
            apr_bucket_read(b, &buf, &len, mode);
            AP_DEBUG_ASSERT(len <= ctx->chunk_size - ctx->bytes_delivered);
            ctx->bytes_delivered += len;
            b = APR_BUCKET_NEXT(b);
            }
        if (ctx->bytes_delivered == ctx->chunk_size) {
            AP_DEBUG_ASSERT(APR_BUCKET_IS_EOS(b));
            apr_bucket_delete(b);
            ctx->state = WANT_TRL;
        }
    }

    if (!ctx->remaining)
    {
        switch (ctx->state)
        {
        case BODY_NONE:
            break;
        case BODY_LENGTH:
            e = apr_bucket_eos_create();
            APR_BRIGADE_INSERT_TAIL(b, e);
            return APR_SUCCESS;
}

typedef struct http_filter_ctx {
    apr_bucket_brigade *b;
} http_ctx_t;

apr_status_t ap_http_filter(ap_filter_t *f, apr_bucket_brigade *b, ap_input_mode_t mode, apr_off_t *readbytes)
        case BODY_CHUNK:
            {
    apr_bucket *e;
    char *buff;
    apr_size_t len;
    char *pos;
    http_ctx_t *ctx = f->ctx;
    apr_status_t rv;
                char line[30];
        
    if (!ctx) {
        f->ctx = ctx = apr_pcalloc(f->c->pool, sizeof(*ctx));
        ctx->b = apr_brigade_create(f->c->pool);
    }
                ctx->state = BODY_NONE;

    if (mode == AP_MODE_PEEK) {
        apr_bucket *e;
        const char *str;
        apr_size_t length;

        /* The purpose of this loop is to ignore any CRLF (or LF) at the end
         * of a request.  Many browsers send extra lines at the end of POST
         * requests.  We use the PEEK method to determine if there is more
         * data on the socket, so that we know if we should delay sending the
         * end of one request until we have served the second request in a
         * pipelined situation.  We don't want to actually delay sending a
         * response if the server finds a CRLF (or LF), becuause that doesn't
         * mean that there is another request, just a blank line.
         */
        while (1) {
            if (APR_BRIGADE_EMPTY(ctx->b)) {
                e = NULL;
            }
            else {
                e = APR_BRIGADE_FIRST(ctx->b);
            }
            if (!e || apr_bucket_read(e, &str, &length, APR_NONBLOCK_READ) != APR_SUCCESS) {
                return APR_EOF;
            }
            else {
                const char *c = str;
                while (c < str + length) {
                    if (*c == APR_ASCII_LF)
                        c++;
                    else if (*c == APR_ASCII_CR && *(c + 1) == APR_ASCII_LF)
                        c += 2;
                    else return APR_SUCCESS;
                }
                apr_bucket_delete(e);
            }
        }
                /* We need to read the CRLF after the chunk.  */
                if ((rv = ap_getline(line, sizeof(line), f->r, 0)) < 0) {
                    return rv;
                }

    if (APR_BRIGADE_EMPTY(ctx->b)) {
        if ((rv = ap_get_brigade(f->next, ctx->b, mode, readbytes)) != APR_SUCCESS) {
                /* Read the real chunk line. */
                if ((rv = ap_getline(line, sizeof(line), f->r, 0)) < 0) {
                    return rv;
                }
    }
                ctx->state = BODY_CHUNK;
                ctx->remaining = get_chunk_size(line);

    /* If readbytes is -1, we want to just read everything until the end
     * of the brigade, which in this case means the end of the socket.  To
     * do this, we loop through the entire brigade, until the socket is
     * exhausted, at which point, it will automagically remove itself from
     * the brigade.
     */
    if (*readbytes == -1) {
        apr_bucket *e;
        apr_off_t total;
        APR_BRIGADE_FOREACH(e, ctx->b) {
            const char *str;
            apr_size_t len;
            apr_bucket_read(e, &str, &len, APR_BLOCK_READ);
        }
        APR_BRIGADE_CONCAT(b, ctx->b);
        apr_brigade_length(b, 1, &total);
        *readbytes = total;
                if (!ctx->remaining)
                {
                    e = apr_bucket_eos_create();
                    APR_BRIGADE_INSERT_TAIL(b, e);
                    return APR_SUCCESS;
                }
    /* readbytes == 0 is "read a single line". otherwise, read a block. */
    if (*readbytes) {
        apr_off_t total;

        /* ### the code below, which moves bytes from one brigade to the
           ### other is probably bogus. presuming the next filter down was
           ### working properly, it should not have returned more than
           ### READBYTES bytes, and we wouldn't have to do any work.
        */

        APR_BRIGADE_NORMALIZE(ctx->b);
        if (APR_BRIGADE_EMPTY(ctx->b)) {
            if ((rv = ap_get_brigade(f->next, ctx->b, mode, readbytes)) != APR_SUCCESS) {
                return rv;
            }
            }
            
        apr_brigade_partition(ctx->b, *readbytes, &e);
        APR_BRIGADE_CONCAT(b, ctx->b);
        if (e != APR_BRIGADE_SENTINEL(ctx->b)) {
            apr_bucket_brigade *temp;

            temp = apr_brigade_split(b, e);

            /* ### darn. gotta ensure the split brigade is in the proper pool.
               ### this is a band-aid solution; we shouldn't even be doing
               ### all of this brigade munging (per the comment above).
               ### until then, this will get the right lifetimes. */
            APR_BRIGADE_CONCAT(ctx->b, temp);
        }
        else {
            if (!APR_BRIGADE_EMPTY(ctx->b)) {
                ctx->b = NULL; /*XXX*/
            break;
        }
    }
        apr_brigade_length(b, 1, &total);
        *readbytes -= total;

        /* ### this is a hack. it is saying, "if we have read everything
           ### that was requested, then we are at the end of the request."
           ### it presumes that the next filter up will *only* call us
           ### with readbytes set to the Content-Length of the request.
           ### that may not always be true, and this code is *definitely*
           ### too presumptive of the caller's intent. the point is: this
           ### filter is not the guy that is parsing the headers or the
           ### chunks to determine where the end of the request is, so we
           ### shouldn't be monkeying with EOS buckets.
        */
        if (*readbytes == 0) {
            apr_bucket *eos = apr_bucket_eos_create();
                
            APR_BRIGADE_INSERT_TAIL(b, eos);
        }
        return APR_SUCCESS;
    }
    rv = ap_get_brigade(f->next, b, mode, readbytes);

    /* we are reading a single line, e.g. the HTTP headers */
    while (!APR_BRIGADE_EMPTY(ctx->b)) {
        e = APR_BRIGADE_FIRST(ctx->b);
        if ((rv = apr_bucket_read(e, (const char **)&buff, &len, mode)) != APR_SUCCESS) {
    if (rv != APR_SUCCESS)
        return rv;
        }

        pos = memchr(buff, APR_ASCII_LF, len);
        if (pos != NULL) {
            apr_bucket_split(e, pos - buff + 1);
            APR_BUCKET_REMOVE(e);
            APR_BRIGADE_INSERT_TAIL(b, e);
            return APR_SUCCESS;
        }
        APR_BUCKET_REMOVE(e);
        APR_BRIGADE_INSERT_TAIL(b, e);
    }
    if (ctx->state != BODY_NONE)
        ctx->remaining -= *readbytes;

    return APR_SUCCESS;
}

@@ -1406,7 +1258,6 @@ AP_DECLARE(int) ap_setup_client_block(request_rec *r, int read_policy)
        }

        r->read_chunked = 1;
        ap_add_input_filter("DECHUNK", NULL, r, r->connection);
    }
    else if (lenp) {
        const char *pos = lenp;
+130 −8
Original line number Diff line number Diff line
@@ -2747,23 +2747,145 @@ static int default_handler(request_rec *r)
    return ap_pass_brigade(r->output_filters, bb);
}

typedef struct core_filter_ctx {
    apr_bucket_brigade *b;
} core_ctx_t;

static int core_input_filter(ap_filter_t *f, apr_bucket_brigade *b, ap_input_mode_t mode, apr_off_t *readbytes)
{
    apr_bucket *e;
    apr_status_t rv;
    core_ctx_t *ctx = f->ctx;
    char *buff, *pos;
    apr_size_t len;

    if (!f->ctx) {    /* If we haven't passed up the socket yet... */
        f->ctx = (void *)1;
    if (!ctx)
    {
        f->ctx = ctx = apr_pcalloc(f->c->pool, sizeof(*ctx));
        ctx->b = apr_brigade_create(f->c->pool);

        /* seed the brigade with the client socket. */
        e = apr_bucket_socket_create(f->c->client_socket);
        APR_BRIGADE_INSERT_TAIL(b, e);
        return APR_SUCCESS;
        APR_BRIGADE_INSERT_TAIL(ctx->b, e);
    }
    else {            
        /* Either some code lost track of the socket
         * bucket or we already found out that the
         * client closed.

    if (mode == AP_MODE_PEEK) {
        apr_bucket *e;
        const char *str, *c;
        apr_size_t length;

        /* The purpose of this loop is to ignore any CRLF (or LF) at the end
         * of a request.  Many browsers send extra lines at the end of POST
         * requests.  We use the PEEK method to determine if there is more
         * data on the socket, so that we know if we should delay sending the
         * end of one request until we have served the second request in a
         * pipelined situation.  We don't want to actually delay sending a
         * response if the server finds a CRLF (or LF), becuause that doesn't
         * mean that there is another request, just a blank line.
         */
        while (1) {

            if (APR_BRIGADE_EMPTY(ctx->b))
                return APR_EOF;

            e = APR_BRIGADE_FIRST(ctx->b);

            rv = apr_bucket_read(e, &str, &length, APR_NONBLOCK_READ);

            if (rv != APR_SUCCESS)
                return rv;

            c = str;
            while (c < str + length) {
                if (*c == APR_ASCII_LF)
                    c++;
                else if (*c == APR_ASCII_CR && *(c + 1) == APR_ASCII_LF)
                    c += 2;
                else 
                    return APR_SUCCESS;
            }
            /* If we reach here, we were a bucket just full of CRLFs, so
             * just toss the bucket. */
            /* FIXME: Is this the right thing to do in the core? */
            apr_bucket_delete(e);
        }
    }

    /* If readbytes is -1, we want to just read everything until the end
     * of the brigade, which in this case means the end of the socket.  To
     * do this, we loop through the entire brigade, until the socket is
     * exhausted, at which point, it will automagically remove itself from
     * the brigade.
     */
    if (*readbytes == -1) {
        apr_bucket *e;
        apr_off_t total;
        const char *str;
        apr_size_t len;
        APR_BRIGADE_FOREACH(e, ctx->b) {
            /* We don't care about these values.  We just want to force the
             * lower level to just read it. */
            apr_bucket_read(e, &str, &len, APR_BLOCK_READ);
        }
        APR_BRIGADE_CONCAT(b, ctx->b);

        /* Force a recompute of the length */
        apr_brigade_length(b, 1, &total);
        *readbytes = total;
        /* We have read until the brigade was empty, so we know that we 
         * must be EOS. */
        e = apr_bucket_eos_create();
        APR_BRIGADE_INSERT_TAIL(b, e);
        return APR_SUCCESS;
    }
    /* readbytes == 0 is "read a single line". otherwise, read a block. */
    if (*readbytes) {
        apr_off_t total;
        apr_bucket *e;
        apr_bucket_brigade *newbb;

        newbb = NULL;

        apr_brigade_partition(ctx->b, *readbytes, &e);
        /* Must do split before CONCAT */
        if (e != APR_BRIGADE_SENTINEL(ctx->b)) {
            newbb = apr_brigade_split(ctx->b, e);
        }
        APR_BRIGADE_CONCAT(b, ctx->b);

        /* FIXME: Is this really needed?  Due to pointer use in sentinels,
         * I think so. */
        if (newbb)
            APR_BRIGADE_CONCAT(ctx->b, newbb);

        apr_brigade_length(b, 1, &total);
        *readbytes = total;

        return APR_SUCCESS;
    }

    /* we are reading a single LF line, e.g. the HTTP headers */
    while (!APR_BRIGADE_EMPTY(ctx->b)) {
        e = APR_BRIGADE_FIRST(ctx->b);
        if ((rv = apr_bucket_read(e, (const char **)&buff, &len, 
                                  mode)) != APR_SUCCESS) {
            return rv;
        }

        pos = memchr(buff, APR_ASCII_LF, len);
        /* We found a match. */
        if (pos != NULL) {
            apr_bucket_split(e, pos - buff + 1);
            APR_BUCKET_REMOVE(e);
            APR_BRIGADE_INSERT_TAIL(b, e);
            return APR_SUCCESS;
        }
        APR_BUCKET_REMOVE(e);
        APR_BRIGADE_INSERT_TAIL(b, e);
        *readbytes += len;
    }

    return APR_SUCCESS;
}

/* Default filter.  This filter should almost always be used.  Its only job
+4 −4
Original line number Diff line number Diff line
@@ -205,7 +205,6 @@ AP_CORE_DECLARE(int) ap_getline(char *s, int n, request_rec *r, int fold)
    int total = 0;
    int looking_ahead = 0;
    apr_size_t length;
    conn_rec *c = r->connection;
    core_request_config *req_cfg;
    apr_bucket_brigade *b;
    apr_bucket *e;
@@ -219,7 +218,7 @@ AP_CORE_DECLARE(int) ap_getline(char *s, int n, request_rec *r, int fold)
    while (1) {
        if (APR_BRIGADE_EMPTY(b)) {
            apr_off_t zero = 0;
            if ((retval = ap_get_brigade(c->input_filters, b,
            if ((retval = ap_get_brigade(r->input_filters, b,
                                         AP_MODE_BLOCKING,
                                         &zero /* readline */)) != APR_SUCCESS ||
                APR_BRIGADE_EMPTY(b)) {
@@ -562,6 +561,9 @@ request_rec *ap_read_request(conn_rec *conn)
    r->notes           = apr_table_make(r->pool, 5);

    r->request_config  = ap_create_request_config(r->pool);
    /* Must be set before we run create request hook */
    r->output_filters  = conn->output_filters;
    r->input_filters   = conn->input_filters;
    ap_run_create_request(r);
    r->per_dir_config  = r->server->lookup_defaults;

@@ -572,8 +574,6 @@ request_rec *ap_read_request(conn_rec *conn)

    r->status          = HTTP_REQUEST_TIME_OUT;  /* Until we get a request */
    r->the_request     = NULL;
    r->output_filters  = conn->output_filters;
    r->input_filters   = conn->input_filters;

    apr_setsocketopt(conn->client_socket, APR_SO_TIMEOUT, 
                     (int)(keptalive