Commit d13d04a5 authored by Jim Jagielski's avatar Jim Jagielski
Browse files

Merge r1584417, r1585157 from trunk:

allow users to workaround the over-agressive backreference
escaping by selecting the characters to escape. 



add BNP flag to give control to the user on whether a space ' ' in
an escaped backrefernece is decoded to a + (default) or %20. Useful
if your backreference isn't going into the query string.


Submitted by: covener
Reviewed by: jailletc36, covener, ylavic


git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/branches/2.4.x@1796850 13f79535-47bb-0310-9956-ffa450edef68
parent 418685e5
Loading
Loading
Loading
Loading
+8 −0
Original line number Diff line number Diff line
@@ -14,6 +14,14 @@ Changes with Apache 2.4.26
  *) Evaluate nested If/ElseIf/Else configuration blocks.
     [Luca Toscano, Jacob Champion]

  *) mod_rewrite: Add 'BNP' (backreferences-no-plus) flag to RewriteRule to 
     allow spaces in backreferences to be encoded as %20 instead of '+'.
     [Eric Covener]

  *) mod_rewrite: Add the possibility to limit the escaping to specific
     characters in backreferences by listing them in the B flag.
     [Eric Covener]

  *) mod_substitute: Fix spurious AH01328 (Line too long) errors on EBCDIC
     systems.  [Eric Covener]

+0 −11
Original line number Diff line number Diff line
@@ -120,17 +120,6 @@ RELEASE SHOWSTOPPERS:
PATCHES ACCEPTED TO BACKPORT FROM TRUNK:
  [ start all new proposals below, under PATCHES PROPOSED. ]

  *) mod_rewrite: allow users to workaround the over-agressive backreference
                  escaping by selecting the characters to escape
     mod_rewrite: add BNP flag (backrefnoplus)
     trunk patch: http://svn.apache.org/r1584417
                  http://svn.apache.org/r1585157
     2.4.x patch: http://home.apache.org/~jailletc36/BNP.diff
           (mod_rewrite.xml from r1584417 has already been eroneously merged 
           with r1728060 + tweak taken from r1701545 partly backported in r1703403.
           I have also added an entry for r1584417)
     +1: jailletc36, covener, ylavic

  *) mod_rewrite: When a substitution is a fully qualified URL, and the 
     scheme/host/port matches the current virtual host, stop interpreting the 
     path component as a local path just because the first component of the 
+18 −2
Original line number Diff line number Diff line
@@ -69,8 +69,11 @@ of how you might use them.</p>
<section id="flag_b"><title>B (escape backreferences)</title>
<p>The [B] flag instructs <directive
module="mod_rewrite">RewriteRule</directive> to escape non-alphanumeric
characters before applying the transformation.
</p>
characters before applying the transformation.</p>
<p>In 2.4.26 and later, you can limit the escaping to specific characters
in backreferences by listing them: <code>[B=#?;]</code>. Note: The space
character can be used in the list of characters to escape, but it cannot be
the last character in the list.</p>

<p><code>mod_rewrite</code> has to unescape URLs before mapping them,
so backreferences are unescaped at the time they are applied.
@@ -103,6 +106,19 @@ returns a 404 if it sees one.</p>
<p>This escaping is particularly necessary in a proxy situation,
when the backend may break if presented with an unescaped URL.</p>

<p>An alternative to this flag is using a <directive module="mod_rewrite"
>RewriteCond</directive> to capture against %{THE_REQUEST} which will capture
strings in the encoded form.</p>
</section>

<section id="flag_bnp"><title>BNP|backrefnoplus (don't escape space to +)</title>
<p>The [BNP] flag instructs <directive
module="mod_rewrite">RewriteRule</directive> to escape the space character
in a backreference to %20 rather than '+'. Useful when the backreference
will be used in the path component rather than the query string.</p>

<p>This flag is available in version 2.4.26 and later.</p>

</section>

<section id="flag_c"><title>C|chain</title>
+39 −11
Original line number Diff line number Diff line
@@ -166,6 +166,7 @@ static const char* really_last_key = "rewrite_really_last";
#define RULEFLAG_DISCARDPATHINFO    (1<<15)
#define RULEFLAG_QSDISCARD          (1<<16)
#define RULEFLAG_END                (1<<17)
#define RULEFLAG_ESCAPENOPLUS       (1<<18)
#define RULEFLAG_QSLAST             (1<<19)

/* return code of the rewrite rule
@@ -317,6 +318,7 @@ typedef struct {
    data_item *cookie;               /* added cookies                         */
    int        skip;                 /* number of next rules to skip          */
    int        maxrounds;            /* limit on number of loops with N flag  */
    char       *escapes;             /* specific backref escapes              */
} rewriterule_entry;

typedef struct {
@@ -417,7 +419,7 @@ static const char *rewritemap_mutex_type = "rewrite-map";
/* Optional functions imported from mod_ssl when loaded: */
static APR_OPTIONAL_FN_TYPE(ssl_var_lookup) *rewrite_ssl_lookup = NULL;
static APR_OPTIONAL_FN_TYPE(ssl_is_https) *rewrite_is_https = NULL;
static char *escape_uri(apr_pool_t *p, const char *path);
static char *escape_backref(apr_pool_t *p, const char *path, const char *escapeme, int noplus);

/*
 * +-------------------------------------------------------+
@@ -634,25 +636,45 @@ static APR_INLINE unsigned char *c2x(unsigned what, unsigned char prefix,
}

/*
 * Escapes a uri in a similar way as php's urlencode does.
 * Escapes a backreference in a similar way as php's urlencode does.
 * Based on ap_os_escape_path in server/util.c
 */
static char *escape_uri(apr_pool_t *p, const char *path) {
static char *escape_backref(apr_pool_t *p, const char *path, const char *escapeme, int noplus) {
    char *copy = apr_palloc(p, 3 * strlen(path) + 3);
    const unsigned char *s = (const unsigned char *)path;
    unsigned char *d = (unsigned char *)copy;
    unsigned c;

    while ((c = *s)) {
        if (!escapeme) { 
            if (apr_isalnum(c) || c == '_') {
                *d++ = c;
            }
        else if (c == ' ') {
            else if (c == ' ' && !noplus) {
                *d++ = '+';
            }
            else {
                d = c2x(c, '%', d);
            }
        }
        else { 
            const char *esc = escapeme;
            while (*esc) { 
                if (c == *esc) { 
                    if (c == ' ' && !noplus) { 
                        *d++ = '+';
                    }
                    else { 
                        d = c2x(c, '%', d);
                    }
                    break;
                }
                ++esc;
            }
            if (!*esc) { 
                *d++ = c;
            }
        }
        ++s;
    }
    *d = '\0';
@@ -2390,7 +2412,7 @@ static char *do_expand(char *input, rewrite_ctx *ctx, rewriterule_entry *entry)
                    /* escape the backreference */
                    char *tmp2, *tmp;
                    tmp = apr_pstrmemdup(pool, bri->source + bri->regmatch[n].rm_so, span);
                    tmp2 = escape_uri(pool, tmp);
                    tmp2 = escape_backref(pool, tmp, entry->escapes, entry->flags & RULEFLAG_ESCAPENOPLUS);
                    rewritelog((ctx->r, 5, ctx->perdir, "escaping backreference '%s' to '%s'",
                            tmp, tmp2));

@@ -3446,6 +3468,12 @@ static const char *cmd_rewriterule_setflag(apr_pool_t *p, void *_cfg,
    case 'B':
        if (!*key || !strcasecmp(key, "ackrefescaping")) {
            cfg->flags |= RULEFLAG_ESCAPEBACKREF;
            if (val && *val) { 
                cfg->escapes = val;
            }
        }
        else if (!strcasecmp(key, "NP") || !strcasecmp(key, "ackrefernoplus")) { 
            cfg->flags |= RULEFLAG_ESCAPENOPLUS;
        }
        else {
            ++error;