gen_test_char.c 5.95 KB
Newer Older
powelld's avatar
powelld committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173
/* Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifdef CROSS_COMPILE

#include <ctype.h>
#define apr_isalnum(c) (isalnum(((unsigned char)(c))))
#define apr_isalpha(c) (isalpha(((unsigned char)(c))))
#define apr_iscntrl(c) (iscntrl(((unsigned char)(c))))
#define apr_isprint(c) (isprint(((unsigned char)(c))))
#define APR_HAVE_STDIO_H 1
#define APR_HAVE_STRING_H 1

#else

#include "apr.h"
#include "apr_lib.h"

#endif

#if defined(WIN32) || defined(OS2)
#define NEED_ENHANCED_ESCAPES
#endif

#if APR_HAVE_STDIO_H
#include <stdio.h>
#endif
#if APR_HAVE_STRING_H
#include <string.h>
#endif

/* A bunch of functions in util.c scan strings looking for certain characters.
 * To make that more efficient we encode a lookup table.
 */
#define T_ESCAPE_SHELL_CMD    (0x01)
#define T_ESCAPE_PATH_SEGMENT (0x02)
#define T_OS_ESCAPE_PATH      (0x04)
#define T_HTTP_TOKEN_STOP     (0x08)
#define T_ESCAPE_LOGITEM      (0x10)
#define T_ESCAPE_FORENSIC     (0x20)
#define T_ESCAPE_URLENCODED   (0x40)
#define T_HTTP_CTRLS          (0x80)
#define T_VCHAR_OBSTEXT      (0x100)

int main(int argc, char *argv[])
{
    unsigned c;
    unsigned short flags;

    printf("/* this file is automatically generated by gen_test_char, "
           "do not edit */\n"
           "#define T_ESCAPE_SHELL_CMD     (%u)\n"
           "#define T_ESCAPE_PATH_SEGMENT  (%u)\n"
           "#define T_OS_ESCAPE_PATH       (%u)\n"
           "#define T_HTTP_TOKEN_STOP      (%u)\n"
           "#define T_ESCAPE_LOGITEM       (%u)\n"
           "#define T_ESCAPE_FORENSIC      (%u)\n"
           "#define T_ESCAPE_URLENCODED    (%u)\n"
           "#define T_HTTP_CTRLS           (%u)\n"
           "#define T_VCHAR_OBSTEXT        (%u)\n"
           "\n"
           "static const unsigned short test_char_table[256] = {",
           T_ESCAPE_SHELL_CMD,
           T_ESCAPE_PATH_SEGMENT,
           T_OS_ESCAPE_PATH,
           T_HTTP_TOKEN_STOP,
           T_ESCAPE_LOGITEM,
           T_ESCAPE_FORENSIC,
           T_ESCAPE_URLENCODED,
           T_HTTP_CTRLS,
           T_VCHAR_OBSTEXT);

    for (c = 0; c < 256; ++c) {
        flags = 0;
        if (c % 8 == 0)
            printf("\n    ");

        /* escape_shell_cmd */
#ifdef NEED_ENHANCED_ESCAPES
        /* Win32/OS2 have many of the same vulnerable characters
         * as Unix sh, plus the carriage return and percent char.
         * The proper escaping of these characters varies from unix
         * since Win32/OS2 use carets or doubled-double quotes,
         * and neither lf nor cr can be escaped.  We escape unix
         * specific as well, to assure that cross-compiled unix
         * applications behave similarly when invoked on win32/os2.
         *
         * Rem please keep in-sync with apr's list in win32/filesys.c
         */
        if (c && strchr("&;`'\"|*?~<>^()[]{}$\\\n\r%", c)) {
            flags |= T_ESCAPE_SHELL_CMD;
        }
#else
        if (c && strchr("&;`'\"|*?~<>^()[]{}$\\\n", c)) {
            flags |= T_ESCAPE_SHELL_CMD;
        }
#endif

        if (!apr_isalnum(c) && !strchr("$-_.+!*'(),:@&=~", c)) {
            flags |= T_ESCAPE_PATH_SEGMENT;
        }

        if (!apr_isalnum(c) && !strchr("$-_.+!*'(),:;@&=/~", c)) {
            flags |= T_OS_ESCAPE_PATH;
        }

        if (!apr_isalnum(c) && !strchr(".-*_ ", c)) {
            flags |= T_ESCAPE_URLENCODED;
        }

        /* Stop for any non-'token' character, including ctrls, obs-text,
         * and "tspecials" (RFC2068) a.k.a. "separators" (RFC2616), which
         * is easer to express as characters remaining in the ASCII token set
         */
        if (!c || !(apr_isalnum(c) || strchr("!#$%&'*+-.^_`|~", c))) {
            flags |= T_HTTP_TOKEN_STOP;
        }

        /* Catch CTRLs other than VCHAR, HT and SP, and obs-text (RFC7230 3.2)
         * This includes only the C0 plane, not C1 (which is obs-text itself.)
         * XXX: We should verify that all ASCII C0 ctrls/DEL corresponding to
         * the current EBCDIC translation are captured, and ASCII C1 ctrls
         * corresponding are all permitted (as they fall under obs-text rule)
         */
        if (!c || (apr_iscntrl(c) && c != '\t')) {
            flags |= T_HTTP_CTRLS;
        }

        /* From RFC3986, the specific sets of gen-delims, sub-delims (2.2),
         * and unreserved (2.3) that are possible somewhere within a URI.
         * Spec requires all others to be %XX encoded, including obs-text.
         */
        if (c && !apr_iscntrl(c) && c != ' ') {
            flags |= T_VCHAR_OBSTEXT;
        }

        /* For logging, escape all control characters,
         * double quotes (because they delimit the request in the log file)
         * backslashes (because we use backslash for escaping)
         * and 8-bit chars with the high bit set
         */
        if (c && (!apr_isprint(c) || c == '"' || c == '\\' || apr_iscntrl(c))) {
            flags |= T_ESCAPE_LOGITEM;
        }

        /* For forensic logging, escape all control characters, top bit set,
         * :, | (used as delimiters) and % (used for escaping).
         */
        if (!apr_isprint(c) || c == ':' || c == '|' || c == '%'
            || apr_iscntrl(c) || !c) {
            flags |= T_ESCAPE_FORENSIC;
        }

        printf("0x%03x%c", flags, (c < 255) ? ',' : ' ');
    }

    printf("\n};\n");

    return 0;
}