Commit 55b8ceac authored by Daniel Stenberg's avatar Daniel Stenberg
Browse files

chunked transfer encoding support

parent bcf448ee
Loading
Loading
Loading
Loading

lib/http_chunks.c

0 → 100644
+188 −0
Original line number Diff line number Diff line
/*****************************************************************************
 *                                  _   _ ____  _     
 *  Project                     ___| | | |  _ \| |    
 *                             / __| | | | |_) | |    
 *                            | (__| |_| |  _ <| |___ 
 *                             \___|\___/|_| \_\_____|
 *
 * Copyright (C) 2001, Daniel Stenberg, <daniel@haxx.se>, et al.
 *
 * In order to be useful for every potential user, curl and libcurl are
 * dual-licensed under the MPL and the MIT/X-derivate licenses.
 *
 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
 * copies of the Software, and permit persons to whom the Software is
 * furnished to do so, under the terms of the MPL or the MIT/X-derivate
 * licenses. You may pick one of these licenses.
 *
 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
 * KIND, either express or implied.
 *
 * $Id$
 *****************************************************************************/
#include "setup.h"

/* -- WIN32 approved -- */
#include <stdio.h>
#include <string.h>
#include <stdarg.h>
#include <stdlib.h>
#include <ctype.h>

#include "urldata.h" /* it includes http_chunks.h */
#include "sendf.h"   /* for the client write stuff */

#define _MPRINTF_REPLACE /* use our functions only */
#include <curl/mprintf.h>

/* The last #include file should be: */
#ifdef MALLOCDEBUG
#include "memdebug.h"
#endif

/* 
 * Chunk format (simplified):
 *
 * <HEX SIZE>[ chunk extension ] CRLF
 * <DATA>
 *
 * Highlights from RFC2616 section 3.6 say:

   The chunked encoding modifies the body of a message in order to
   transfer it as a series of chunks, each with its own size indicator,
   followed by an OPTIONAL trailer containing entity-header fields. This
   allows dynamically produced content to be transferred along with the
   information necessary for the recipient to verify that it has
   received the full message.

       Chunked-Body   = *chunk
                        last-chunk
                        trailer
                        CRLF

       chunk          = chunk-size [ chunk-extension ] CRLF
                        chunk-data CRLF
       chunk-size     = 1*HEX
       last-chunk     = 1*("0") [ chunk-extension ] CRLF

       chunk-extension= *( ";" chunk-ext-name [ "=" chunk-ext-val ] )
       chunk-ext-name = token
       chunk-ext-val  = token | quoted-string
       chunk-data     = chunk-size(OCTET)
       trailer        = *(entity-header CRLF)

   The chunk-size field is a string of hex digits indicating the size of
   the chunk. The chunked encoding is ended by any chunk whose size is
   zero, followed by the trailer, which is terminated by an empty line.

 */


void Curl_httpchunk_init(struct connectdata *conn)
{
  struct Curl_chunker *chunk = &conn->proto.http->chunk;
  chunk->hexindex=0; /* start at 0 */
  chunk->state = CHUNK_HEX; /* we get hex first! */
}

/*
 * chunk_read() returns a 0 for normal operations, or a positive return code
 * for errors. A negative number means this sequence of chunks is complete,
 * and that many ~bytes were NOT used at the end of the buffer passed in.
 * The 'wrote' argument is set to tell the caller how many bytes we actually
 * passed to the client (for byte-counting and whatever).
 *
 * The states and the state-machine is further explained in the header file.
 */
CHUNKcode Curl_httpchunk_read(struct connectdata *conn,
                              char *datap,
                              ssize_t length,
                              ssize_t *wrote)
{
  CURLcode result;
  struct Curl_chunker *ch = &conn->proto.http->chunk;
  int piece;
  *wrote = 0; /* nothing yet */

  while(length) {
    switch(ch->state) {
    case CHUNK_HEX:
      if(isxdigit((int)*datap)) {
        if(ch->hexindex < MAXNUM_SIZE) {
          ch->hexbuffer[ch->hexindex] = *datap;
          datap++;
          length--;
          ch->hexindex++;
        }
        else {
          return 1; /* longer hex than we support */
        }
      }
      else {
        /* length and datap are unmodified */
        ch->hexbuffer[ch->hexindex]=0;
        ch->datasize=strtoul(ch->hexbuffer, NULL, 16);
        ch->state = CHUNK_POSTHEX;
      }
      break;

    case CHUNK_POSTHEX:
      /* just a lame state waiting for CRLF to arrive */
      if(*datap == '\r')
        ch->state = CHUNK_CR;
      length--;
      datap++;
      break;

    case CHUNK_CR:
      /* waiting for the LF */
      if(*datap == '\n') {
        /* we're now expecting data to come, unless size was zero! */
        if(0 == ch->datasize) {
          ch->state = CHUNK_STOP; /* stop reading! */
          if(1 == length) {
            /* This was the final byte, return right now */
            return ~0;
          }
        }
        else
          ch->state = CHUNK_DATA;
      }
      else
        /* previously we got a fake CR, go back to CR waiting! */
        ch->state = CHUNK_CR;
      datap++;
      length--;
      break;

    case CHUNK_DATA:
      /* we get pure and fine data

         We expect another 'datasize' of data. We have 'length' right now,
         it can be more or less than 'datasize'. Get the smallest piece.
      */
      piece = (ch->datasize >= length)?length:ch->datasize;

      /* Write the data portion available */
      result = Curl_client_write(conn->data, CLIENTWRITE_BODY, datap, piece);
      if(result)
        return CHUNKE_WRITE_ERROR;
      *wrote += piece;

      ch->datasize -= piece; /* decrease amount left to expect */
      datap += piece;    /* move read pointer forward */
      length -= piece;   /* decrease space left in this round */

      if(0 == ch->datasize)
        /* end of data this round, go back to get a new size */
        Curl_httpchunk_init(conn);

      break;
    case CHUNK_STOP:
      return ~length; /* return the data size left */
    default:
      return CHUNKE_STATE_ERROR;
    }
  }
  return CHUNKE_OK;
}

lib/http_chunks.h

0 → 100644
+73 −0
Original line number Diff line number Diff line
#ifndef __HTTP_CHUNKS_H
#define __HTTP_CHUNKS_H
/*****************************************************************************
 *                                  _   _ ____  _     
 *  Project                     ___| | | |  _ \| |    
 *                             / __| | | | |_) | |    
 *                            | (__| |_| |  _ <| |___ 
 *                             \___|\___/|_| \_\_____|
 *
 * Copyright (C) 2001, Daniel Stenberg, <daniel@haxx.se>, et al.
 *
 * In order to be useful for every potential user, curl and libcurl are
 * dual-licensed under the MPL and the MIT/X-derivate licenses.
 *
 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
 * copies of the Software, and permit persons to whom the Software is
 * furnished to do so, under the terms of the MPL or the MIT/X-derivate
 * licenses. You may pick one of these licenses.
 *
 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
 * KIND, either express or implied.
 *
 * $Id$
 *****************************************************************************/
/*
 * The longest possible hexadecimal number we support in a chunked transfer.
 * Weird enoug, RFC2616 doesn't set a maximum size! Since we use strtoul()
 * to convert it, we "only" support 2^32 bytes chunk data.
 */
#define MAXNUM_SIZE 16

typedef enum {
  CHUNK_LOST, /* never use */

  /* In this we await and buffer all hexadecimal digits until we get one
     that isn't a hexadecimal digit. When done, we go POSTHEX */
  CHUNK_HEX,

  /* We have received the hexadecimal digit and we eat all characters until
     we get a CRLF pair. When we see a CR we go to the CR state. */
  CHUNK_POSTHEX,

  /* A single CR has been found and we should get a LF right away in this
     state or we go back to POSTHEX. When LF is received, we go to DATA.
     If the size given was zero, we set state to STOP and return. */
  CHUNK_CR,

  /* We eat the amount of data specified. When done, we move back to the
     HEX state. */
  CHUNK_DATA,

  /* This is only used to really mark that we're out of the game */
  CHUNK_STOP,

  CHUNK_LAST /* never use */
} ChunkyState;

typedef enum {
  CHUNKE_OK,
  CHUNKE_TOO_LONG_HEX,
  CHUNKE_WRITE_ERROR,
  CHUNKE_STATE_ERROR,
  CHUNKE_LAST
} CHUNKcode;

struct Curl_chunker {
  char hexbuffer[ MAXNUM_SIZE + 1];
  int hexindex;
  ChunkyState state;
  unsigned long datasize;
};

#endif