Commit 3f7b1bb8 authored by Daniel Stenberg's avatar Daniel Stenberg
Browse files

http redirects: %-encode bytes outside of ascii range

Apparently there are sites out there that do redirects to URLs they
provide in plain UTF-8 or similar. Browsers and wget %-encode such
headers when doing a subsequent request. Now libcurl does too.

Added test 1138 to verify.

Closes #473
parent 1ea3a7d5
Loading
Loading
Loading
Loading
+23 −18
Original line number Diff line number Diff line
@@ -1396,16 +1396,18 @@ CURLcode Curl_posttransfer(struct SessionHandle *data)
 */
static size_t strlen_url(const char *url)
{
  const char *ptr;
  const unsigned char *ptr;
  size_t newlen=0;
  bool left=TRUE; /* left side of the ? */

  for(ptr=url; *ptr; ptr++) {
  for(ptr=(unsigned char *)url; *ptr; ptr++) {
    switch(*ptr) {
    case '?':
      left=FALSE;
      /* fall through */
    default:
      if(*ptr >= 0x80)
        newlen += 2;
      newlen++;
      break;
    case ' ':
@@ -1426,9 +1428,9 @@ static void strcpy_url(char *output, const char *url)
{
  /* we must add this with whitespace-replacing */
  bool left=TRUE;
  const char *iptr;
  const unsigned char *iptr;
  char *optr = output;
  for(iptr = url;    /* read from here */
  for(iptr = (unsigned char *)url;    /* read from here */
      *iptr;         /* until zero byte */
      iptr++) {
    switch(*iptr) {
@@ -1436,6 +1438,11 @@ static void strcpy_url(char *output, const char *url)
      left=FALSE;
      /* fall through */
    default:
      if(*iptr >= 0x80) {
        snprintf(optr, 4, "%%%02x", *iptr);
        optr += 3;
      }
      else
        *optr++=*iptr;
      break;
    case ' ':
@@ -1684,15 +1691,14 @@ CURLcode Curl_follow(struct SessionHandle *data,
    newurl = absolute;
  }
  else {
    /* This is an absolute URL, don't allow the custom port number */
    disallowport = TRUE;

    if(strchr(newurl, ' ')) {
      /* This new URL contains at least one space, this is a mighty stupid
         redirect but we still make an effort to do "right". */
    /* The new URL MAY contain space or high byte values, that means a mighty
       stupid redirect URL but we still make an effort to do "right". */
    char *newest;
    size_t newlen = strlen_url(newurl);

    /* This is an absolute URL, don't allow the custom port number */
    disallowport = TRUE;

    newest = malloc(newlen+1); /* get memory for this */
    if(!newest)
      return CURLE_OUT_OF_MEMORY;
@@ -1700,7 +1706,6 @@ CURLcode Curl_follow(struct SessionHandle *data,

    free(newurl); /* that was no good */
    newurl = newest; /* use this instead now */
    }

  }

+1 −1
Original line number Diff line number Diff line
@@ -118,7 +118,7 @@ test1104 test1105 test1106 test1107 test1108 test1109 test1110 test1111 \
test1112 test1113 test1114 test1115 test1116 test1117 test1118 test1119 \
test1120 test1121 test1122 test1123 test1124 test1125 test1126 test1127 \
test1128 test1129 test1130 test1131 test1132 test1133 test1134 test1135 \
test1136 test1137 \
test1136 test1137 test1138 \
\
test1200 test1201 test1202 test1203 test1204 test1205 test1206 test1207 \
test1208 test1209 test1210 test1211 test1212 test1213 test1214 test1215 \

tests/data/test1138

0 → 100644
+74 −0
Original line number Diff line number Diff line
<testcase>
<info>
<keywords>
HTTP
HTTP GET
followlocation
</keywords>
</info>
#
# Server-side
<reply>
<data>
HTTP/1.1 302 OK swsclose
Location: ../moo.html/?name=آغاز-سم-زدایی-از-بازار-پول&testcase=/11380002    
Date: Thu, 09 Nov 2010 14:49:00 GMT
Connection: close

</data>
<data2>
HTTP/1.1 200 OK swsclose
Location: this should be ignored
Date: Thu, 09 Nov 2010 14:49:00 GMT
Connection: close

body
</data2>
<datacheck>
HTTP/1.1 302 OK swsclose
Location: ../moo.html/?name=آغاز-سم-زدایی-از-بازار-پول&testcase=/11380002    
Date: Thu, 09 Nov 2010 14:49:00 GMT
Connection: close

HTTP/1.1 200 OK swsclose
Location: this should be ignored
Date: Thu, 09 Nov 2010 14:49:00 GMT
Connection: close

body
</datacheck>
</reply>

#
# Client-side
<client>
<server>
http
</server>
 <name>
HTTP redirect with UTF-8 characters
 </name>
 <command>
http://%HOSTIP:%HTTPPORT/we/are/all/twits/1138 -L
</command>
</client>

#
# Verify data after the test has been "shot"
<verify>
<strip>
^User-Agent:.*
</strip>
<protocol>
GET /we/are/all/twits/1138 HTTP/1.1
Host: %HOSTIP:%HTTPPORT
Accept: */*

GET /we/are/all/moo.html/?name=%d8%a2%d8%ba%d8%a7%d8%b2-%d8%b3%d9%85-%d8%b2%d8%af%d8%a7%db%8c%db%8c-%d8%a7%d8%b2-%d8%a8%d8%a7%d8%b2%d8%a7%d8%b1-%d9%be%d9%88%d9%84&testcase=/11380002 HTTP/1.1
User-Agent: curl/7.10 (i686-pc-linux-gnu) libcurl/7.10 OpenSSL/0.9.6c ipv6 zlib/1.1.3
Host: %HOSTIP:%HTTPPORT
Accept: */*

</protocol>
</verify>
</testcase>