Commit 145ac8b6 authored by Bill Stoddard's avatar Bill Stoddard
Browse files

Add content negotiation and expiration policy to mod_cache and mod_mem_cache.

mod_disk_cache still needs work.


git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/trunk@95514 13f79535-47bb-0310-9956-ffa450edef68
parent 3c5bdf01
Loading
Loading
Loading
Loading
+55 −8
Original line number Diff line number Diff line
@@ -186,18 +186,65 @@ int cache_select_url(request_rec *r, const char *types, char *url)
        type = ap_cache_tokstr(r->pool, next, &next);
        switch ((rv = cache_run_open_entity(cache->handle, r, type, key))) {
        case OK: {
            char *vary = NULL;
            info = &(cache->handle->cache_obj->info);
            /* XXX:
             * Handle being returned a collection of entities.
            if (cache_read_entity_headers(cache->handle, r) != APR_SUCCESS) {
                /* TODO: Handle this error */
                return DECLINED;
            }

            /*
             * Check Content-Negotiation - Vary
             * 
             * At this point we need to make sure that the object we found in the cache
             * is the same object that would be delivered to the client, when the
             * effects of content negotiation are taken into effect.
             * 
             * In plain english, we want to make sure that a language-negotiated
             * document in one language is not given to a client asking for a
             * language negotiated document in a different language by mistake.
             * 
             * This code makes the assumption that the storage manager will
             * cache the info->req_hdrs if the response contains a Vary
             * header.
             * 
             * RFC2616 13.6 and 14.44 describe the Vary mechanism.
             */
            vary = ap_pstrdup(r->pool, ap_table_get(r->headers_out, "Vary"));
            while (vary && *vary) {
                char *name = vary;
                const char *h1, *h2;

            /* Has the cache entry expired? */
            if (r->request_time > info->expire)
                cache->fresh = 0;
            else
                cache->fresh = 1;
                /* isolate header name */
                while (*vary && !ap_isspace(*vary) && (*vary != ','))
                    ++vary;
                while (*vary && (ap_isspace(*vary) || (*vary == ','))) {
                    *vary = '\0';
                    ++vary;
                }

            /*** do content negotiation here */
                /*
                 * is this header in the request and the header in the cached
                 * request identical? If not, we give up and do a straight get
                 */
                h1 = ap_table_get(r->headers_in, name);
                h2 = ap_table_get(info->req_hdrs, name);
                if (h1 == h2) {
                    /* both headers NULL, so a match - do nothing */
                }
                else if (h1 && h2 && !strcmp(h1, h2)) {
                    /* both headers exist and are equal - do nothing */
                }
                else {
                    /* headers do not match, so Vary failed */
                    ap_log_error(APLOG_MARK, APLOG_INFO, APR_SUCCESS, r->server,
                                 "cache_select_url(): Vary header mismatch - Cached document cannot be used. \n");
                    apr_table_clear(r->headers_out);
                    r->status_line = NULL;
                    cache->handle = NULL;
                    return DECLINED;
                }
            }
            return OK;
        }
        case DECLINED: {
+138 −1
Original line number Diff line number Diff line
@@ -133,6 +133,143 @@ CACHE_DECLARE(const char *)ap_cache_get_cachetype(request_rec *r,
    return type;
}


/* do a HTTP/1.1 age calculation */
CACHE_DECLARE(apr_time_t) ap_cache_current_age(cache_info *info, const apr_time_t age_value)
{
    apr_time_t apparent_age, corrected_received_age, response_delay, corrected_initial_age,
           resident_time, current_age;

    /* Perform an HTTP/1.1 age calculation. (RFC2616 13.2.3) */

    apparent_age = MAX(0, info->response_time - info->date);
    corrected_received_age = MAX(apparent_age, age_value);
    response_delay = info->response_time - info->request_time;
    corrected_initial_age = corrected_received_age + response_delay;
    resident_time = apr_time_now() - info->response_time;
    current_age = corrected_initial_age + resident_time;

    return (current_age);
}

CACHE_DECLARE(int) ap_cache_check_freshness(cache_request_rec *cache, 
                                            request_rec *r)
{
    apr_time_t age, maxage_req, maxage_cresp, maxage, smaxage, maxstale, minfresh;
    const char *cc_cresp, *cc_req, *pragma_cresp;
    const char *agestr = NULL;
    char *val;
    apr_time_t age_c = 0;
    cache_info *info = &(cache->handle->cache_obj->info);

    /*
     * We now want to check if our cached data is still fresh. This depends
     * on a few things, in this order:
     *
     * - RFC2616 14.9.4 End to end reload, Cache-Control: no-cache no-cache in
     * either the request or the cached response means that we must
     * revalidate the request unconditionally, overriding any expiration
     * mechanism. It's equivalent to max-age=0,must-revalidate.
     * 
     * - RFC2616 14.32 Pragma: no-cache This is treated the same as
     * Cache-Control: no-cache.
     * 
     * - RFC2616 14.9.3 Cache-Control: max-stale, must-revalidate,
     * proxy-revalidate if the max-stale request header exists, modify the
     * stale calculations below so that an object can be at most <max-stale>
     * seconds stale before we request a revalidation, _UNLESS_ a
     * must-revalidate or proxy-revalidate cached response header exists to
     * stop us doing this.
     * 
     * - RFC2616 14.9.3 Cache-Control: s-maxage the origin server specifies the
     * maximum age an object can be before it is considered stale. This
     * directive has the effect of proxy|must revalidate, which in turn means
     * simple ignore any max-stale setting.
     * 
     * - RFC2616 14.9.4 Cache-Control: max-age this header can appear in both
     * requests and responses. If both are specified, the smaller of the two
     * takes priority.
     * 
     * - RFC2616 14.21 Expires: if this request header exists in the cached
     * entity, and it's value is in the past, it has expired.
     * 
     */
    cc_cresp = ap_table_get(r->headers_out, "Cache-Control");
    cc_req = ap_table_get(r->headers_in, "Cache-Control");
    pragma_cresp = ap_table_get(r->headers_out, "Pragma");  /* TODO: pragma_cresp not being used? */
    if ((agestr = ap_table_get(r->headers_out, "Age"))) {
        age_c = atoi(agestr);
    }

    /* calculate age of object */
    age = ap_cache_current_age(info, age_c);

    /* extract s-maxage */
    if (cc_cresp && ap_cache_liststr(cc_cresp, "s-maxage", &val))
        smaxage = atoi(val);
    else
        smaxage = -1;

    /* extract max-age from request */
    if (cc_req && ap_cache_liststr(cc_req, "max-age", &val))
        maxage_req = atoi(val);
    else
        maxage_req = -1;

    /* extract max-age from response */
    if (cc_cresp && ap_cache_liststr(cc_cresp, "max-age", &val))
        maxage_cresp = atoi(val);
    else
        maxage_cresp = -1;

    /*
     * if both maxage request and response, the smaller one takes priority
     */
    if (-1 == maxage_req)
        maxage = maxage_cresp;
    else if (-1 == maxage_cresp)
        maxage = maxage_req;
    else
        maxage = MIN(maxage_req, maxage_cresp);

    /* extract max-stale */
    if (cc_req && ap_cache_liststr(cc_req, "max-stale", &val))
        maxstale = atoi(val);
    else
        maxstale = 0;

    /* extract min-fresh */
    if (cc_req && ap_cache_liststr(cc_req, "min-fresh", &val))
        minfresh = atoi(val);
    else
        minfresh = 0;

    /* override maxstale if must-revalidate or proxy-revalidate */
    if (maxstale && ((cc_cresp &&
                      ap_cache_liststr(cc_cresp, "must-revalidate", NULL))
                     || (cc_cresp && ap_cache_liststr(cc_cresp,
                                                      "proxy-revalidate", NULL))))
        maxstale = 0;
    /* handle expiration */
    if ((-1 < smaxage && age < (smaxage - minfresh)) ||
        (-1 < maxage && age < (maxage + maxstale - minfresh)) ||
        (info->expire != APR_DATE_BAD && age < (info->expire - info->date + maxstale - minfresh))) {
        /* it's fresh darlings... */
        /* set age header on response */
        ap_table_set(r->headers_out, "Age",
                     ap_psprintf(r->pool, "%lu", (unsigned long)age));

        /* add warning if maxstale overrode freshness calculation */
        if (!((-1 < smaxage && age < smaxage) ||
              (-1 < maxage && age < maxage) ||
              (info->expire != APR_DATE_BAD && (info->expire - info->date) > age))) {
            /* make sure we don't stomp on a previous warning */
            ap_table_merge(r->headers_out, "Warning", "110 Response is stale");
        }
        return 1;    /* Cache object is fresh */
    }
    return 0;        /* Cache object is stale */
}
/* 
 * list is a comma-separated list of case-insensitive tokens, with
 * optional whitespace around the tokens.
+5 −18
Original line number Diff line number Diff line
@@ -152,8 +152,6 @@ static int cache_url_handler(request_rec *r, int lookup)
     * - RFC2616 14.9.2 Cache-Control: no-store
     * - Pragma: no-cache
     * - Any requests requiring authorization.
     * - Any URLs whose length exceeds MAX_URL_LENGTH
     * - TODO: Make MAX_URL_LENGTH a config directive?
     */
    if (conf->ignorecachecontrol_set == 1 && conf->ignorecachecontrol == 1 && auth == NULL) {
        ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
@@ -199,12 +197,13 @@ static int cache_url_handler(request_rec *r, int lookup)
        return DECLINED;
    }
    else if (OK == rv) {
        /* cache file exists */
        /* RFC2616 13.2 - Check cache object expiration */
        cache->fresh = ap_cache_check_freshness(cache, r);
        if (cache->fresh) {
            /* fresh data available */
            apr_bucket_brigade *out;
            conn_rec *c = r->connection;

            /* fresh data available */
            if (lookup) {
                return OK;
            }
@@ -214,17 +213,8 @@ static int cache_url_handler(request_rec *r, int lookup)

            /* We are in the quick handler hook, which means that no output
             * filters have been set. So lets run the insert_filter hook.
             * XXX - Should we be inserting filters in the output stream
             * for proxy requests? Certainly we need the core filters
             * (byterange, chunking, etc.).  I can also see the need to
             * conditionally insert tag processing filters (e.g. INCLUDES).
             */
            ap_run_insert_filter(r);

            /* Now add the cache_out filter. cache_out is a FTYPE_CONTENT
             * which means it will be inserted first in the stream, which
             * is exactly what we need.
             */
            ap_add_output_filter("CACHE_OUT", NULL, r, r->connection);

            /* kick off the filter stack */
@@ -252,7 +242,7 @@ static int cache_url_handler(request_rec *r, int lookup)
                             r->server,
                             "cache: conditional - add cache_in filter and "
                             "DECLINE");

                /* Why not add CACHE_CONDITIONAL? */
                ap_add_output_filter("CACHE_IN", NULL, r, r->connection);

                return DECLINED;
@@ -339,10 +329,7 @@ static int cache_out_filter(ap_filter_t *f, apr_bucket_brigade *bb)
    ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
            "cache: running CACHE_OUT filter");

    /* TODO: Handle getting errors on either of these calls 
     * ???: Should we return headers on a subrequest?
     */
    cache_read_entity_headers(cache->handle, r);    
    /* cache_read_entity_headers() was called in cache_select_url() */
    cache_read_entity_body(cache->handle, r->pool, bb);

    /* This filter is done once it has served up its content */
+13 −1
Original line number Diff line number Diff line
@@ -198,6 +198,11 @@ struct cache_info {
    apr_time_t request_time;
    apr_time_t response_time;
    apr_size_t len;
    apr_time_t ims;    /*  If-Modified_Since header value    */
    apr_time_t ius;    /*  If-UnModified_Since header value    */
    const char *im;         /* If-Match header value */
    const char *inm;         /* If-None-Match header value */
    apr_table_t *req_hdrs;   /* These are the original request headers   */
};

/* cache handle information */
@@ -238,9 +243,16 @@ typedef struct {


/* cache_util.c */
/* do a HTTP/1.1 age calculation */
CACHE_DECLARE(apr_time_t) ap_cache_current_age(cache_info *info, const apr_time_t age_value);

/**
 *
 * Check the freshness of the cache object per RFC2616 section 13.2 (Expiration Model)
 * @param cache cache_request_rec
 * @param r request_rec
 * @return 0 ==> cache object is stale, 1 ==> cache object is fresh
 */
CACHE_DECLARE(int) ap_cache_check_freshness(cache_request_rec *cache, request_rec *r);
CACHE_DECLARE(apr_time_t) ap_cache_hex2usec(const char *x);
CACHE_DECLARE(void) ap_cache_usec2hex(apr_time_t j, char *y);
CACHE_DECLARE(char *) generate_name(apr_pool_t *p, int dirlevels, 
+23 −5
Original line number Diff line number Diff line
@@ -87,9 +87,11 @@ typedef struct mem_cache_object {
    apr_ssize_t num_header_out;
    apr_ssize_t num_subprocess_env;
    apr_ssize_t num_notes;
    apr_ssize_t num_req_hdrs;
    cache_header_tbl_t *header_out;
    cache_header_tbl_t *subprocess_env;
    cache_header_tbl_t *notes;
    cache_header_tbl_t *req_hdrs; /* for Vary negotiation */
    apr_size_t m_len;
    void *m;
    apr_os_file_t fd;
@@ -363,7 +365,6 @@ static int create_entity(cache_handle_t *h, request_rec *r,
    strncpy(obj->key, key, strlen(key) + 1);
    obj->info.len = len;


    /* Allocate and init mem_cache_object_t */
    mobj = calloc(1, sizeof(*mobj));
    if (!mobj) {
@@ -633,10 +634,15 @@ static apr_status_t read_headers(cache_handle_t *h, request_rec *r)
{
    int rc;
    mem_cache_object_t *mobj = (mem_cache_object_t*) h->cache_obj->vobj;
    cache_info *info = &(h->cache_obj->info);

    info->req_hdrs = apr_table_make(r->pool, mobj->num_req_hdrs);
    r->headers_out = apr_table_make(r->pool,mobj->num_header_out);
    r->subprocess_env = apr_table_make(r->pool, mobj->num_subprocess_env);
    r->notes = apr_table_make(r->pool, mobj->num_notes);
    rc = unserialize_table(mobj->req_hdrs,
                           mobj->num_req_hdrs,
                           info->req_hdrs);
    rc = unserialize_table( mobj->header_out,
                            mobj->num_header_out, 
                            r->headers_out);
@@ -684,7 +690,19 @@ static apr_status_t write_headers(cache_handle_t *h, request_rec *r, cache_info
    mem_cache_object_t *mobj = (mem_cache_object_t*) obj->vobj;
    int rc;

    /* Precompute how much storage we need to hold the headers */
    /*
     * The cache needs to keep track of the following information: 
     * - Date, LastMod, Version, ReqTime, RespTime, ContentLength 
     * - The original request headers (for Vary) 
     * - The original response headers (for returning with a cached response) 
     * - The body of the message
     */
    rc = serialize_table(&mobj->req_hdrs,
                         &mobj->num_req_hdrs,
                         r->headers_in);
    if (rc != APR_SUCCESS) {
        return rc;
    }
    rc = serialize_table(&mobj->header_out, 
                         &mobj->num_header_out, 
                         r->headers_out);   
@@ -710,15 +728,15 @@ static apr_status_t write_headers(cache_handle_t *h, request_rec *r, cache_info
    if (info->lastmod) {
        obj->info.lastmod = info->lastmod;
    }
    if (info->expire) {
        obj->info.expire = info->expire;
    }
    if (info->response_time) {
        obj->info.response_time = info->response_time;
    }
    if (info->request_time) {
        obj->info.request_time = info->request_time;
    }
    if (info->expire) {
        obj->info.expire = info->expire;
    }
    if (info->content_type) {
        obj->info.content_type = (char*) calloc(1, strlen(info->content_type) + 1);
        if (!obj->info.content_type) {