mod_charset_lite.c 41.9 KB
Newer Older
powelld's avatar
powelld committed
                                        * instances using the same context
                                        */
        }
        if (!ctx) {                   /* no idea how to translate; don't do anything */
            ctx = f->ctx = apr_pcalloc(f->r->pool, sizeof(charset_filter_ctx_t));
            ctx->dc = dc;
            ctx->noop = 1;
        }
    }

    ap_log_rerror(APLOG_MARK, APLOG_TRACE6, 0, f->r,
                 "xlate_in_filter() - "
                 "charset_source: %s charset_default: %s",
                 dc && dc->charset_source ? dc->charset_source : "(none)",
                 dc && dc->charset_default ? dc->charset_default : "(none)");

    if (!ctx->ran) {  /* filter never ran before */
        chk_filter_chain(f);
        ctx->ran = 1;
        if (!ctx->noop && !ctx->is_sb
            && apr_table_get(f->r->headers_in, "Content-Length")) {
            /* A Content-Length header is present, but it won't be valid after
             * conversion because we're not converting between two single-byte
             * charsets.  This will affect most CGI scripts and may affect
             * some modules.
             * Content-Length can't be unset here because that would break
             * being able to read the request body.
             * Processing of chunked request bodies is not impacted by this
             * filter since the length was not declared anyway.
             */
            ap_log_rerror(APLOG_MARK, APLOG_TRACE1, 0, f->r,
                          "Request body length may change, resulting in "
                          "misprocessing by some modules or scripts");
        }
    }

    if (ctx->noop) {
        return ap_get_brigade(f->next, bb, mode, block, readbytes);
    }

    if (APR_BRIGADE_EMPTY(ctx->bb)) {
        if ((rv = ap_get_brigade(f->next, bb, mode, block,
                                 readbytes)) != APR_SUCCESS) {
            return rv;
        }
    }
    else {
        APR_BRIGADE_PREPEND(bb, ctx->bb); /* first use the leftovers */
    }

    buffer_size = INPUT_XLATE_BUF_SIZE;
    rv = xlate_brigade(ctx, bb, ctx->tmp, &buffer_size, &hit_eos);
    if (rv == APR_SUCCESS) {
        if (!hit_eos) {
            /* move anything leftover into our context for next time;
             * we don't currently "set aside" since the data came from
             * down below, but I suspect that for long-term we need to
             * do that
             */
            APR_BRIGADE_CONCAT(ctx->bb, bb);
        }
        if (buffer_size < INPUT_XLATE_BUF_SIZE) { /* do we have output? */
            apr_bucket *e;

            e = apr_bucket_heap_create(ctx->tmp,
                                       INPUT_XLATE_BUF_SIZE - buffer_size,
                                       NULL, f->r->connection->bucket_alloc);
            /* make sure we insert at the head, because there may be
             * an eos bucket already there, and the eos bucket should
             * come after the data
             */
            APR_BRIGADE_INSERT_HEAD(bb, e);
        }
        else {
            /* XXX need to get some more data... what if the last brigade
             * we got had only the first byte of a multibyte char?  we need
             * to grab more data from the network instead of returning an
             * empty brigade
             */
        }
        /* If we have any metadata at the head of ctx->bb, go ahead and move it
         * onto the end of bb to be returned to our caller.
         */
        if (!APR_BRIGADE_EMPTY(ctx->bb)) {
            apr_bucket *b = APR_BRIGADE_FIRST(ctx->bb);
            while (b != APR_BRIGADE_SENTINEL(ctx->bb)
                   && APR_BUCKET_IS_METADATA(b)) {
                APR_BUCKET_REMOVE(b);
                APR_BRIGADE_INSERT_TAIL(bb, b);
                b = APR_BRIGADE_FIRST(ctx->bb);
            }
        }
    }
    else {
        log_xlate_error(f, rv);
    }

    return rv;
}

static const command_rec cmds[] =
{
    AP_INIT_TAKE1("CharsetSourceEnc",
                  add_charset_source,
                  NULL,
                  OR_FILEINFO,
                  "source (html,cgi,ssi) file charset"),
    AP_INIT_TAKE1("CharsetDefault",
                  add_charset_default,
                  NULL,
                  OR_FILEINFO,
                  "name of default charset"),
    AP_INIT_ITERATE("CharsetOptions",
                    add_charset_options,
                    NULL,
                    OR_FILEINFO,
                    "valid options: ImplicitAdd, NoImplicitAdd, TranslateAllMimeTypes, "
                    "NoTranslateAllMimeTypes"),
    {NULL}
};

static void charset_register_hooks(apr_pool_t *p)
{
    ap_hook_fixups(find_code_page, NULL, NULL, APR_HOOK_MIDDLE);
    ap_hook_insert_filter(xlate_insert_filter, NULL, NULL, APR_HOOK_REALLY_LAST);
    ap_register_output_filter(XLATEOUT_FILTER_NAME, xlate_out_filter, NULL,
                              AP_FTYPE_RESOURCE);
    ap_register_input_filter(XLATEIN_FILTER_NAME, xlate_in_filter, NULL,
                             AP_FTYPE_RESOURCE);
}

AP_DECLARE_MODULE(charset_lite) =
{
    STANDARD20_MODULE_STUFF,
    create_charset_dir_conf,
    merge_charset_dir_conf,
    NULL,
    NULL,
    cmds,
    charset_register_hooks
};