Newer
Older
return newlen;
}
/* strcpy_url() copies a url to a output buffer and URL-encodes the spaces in
* the source URL accordingly.
*/
static void strcpy_url(char *output, const char *url)
{
/* we must add this with whitespace-replacing */
bool left=TRUE;
const char *iptr;
char *optr = output;
for(iptr = url; /* read from here */
*iptr; /* until zero byte */
iptr++) {
switch(*iptr) {
case '?':
left=FALSE;
/* fall through */
default:
*optr++=*iptr;
break;
case ' ':
if(left) {
*optr++='%'; /* add a '%' */
*optr++='2'; /* add a '2' */
*optr++='0'; /* add a '0' */
}
else
*optr++='+'; /* add a '+' here */
break;
}
}
*optr=0; /* zero terminate output buffer */
}
/*
* Returns true if the given URL is absolute (as opposed to relative)
*/
static bool is_absolute_url(const char *url)
{
char prot[16]; /* URL protocol string storage */
char letter; /* used for a silly sscanf */
return (bool)(2 == sscanf(url, "%15[^?&/:]://%c", prot, &letter));
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
}
/*
* Concatenate a relative URL to a base URL making it absolute.
* URL-encodes any spaces.
* The returned pointer must be freed by the caller unless NULL
* (returns NULL on out of memory).
*/
static char *concat_url(const char *base, const char *relurl)
{
/***
TRY to append this new path to the old URL
to the right of the host part. Oh crap, this is doomed to cause
problems in the future...
*/
char *newest;
char *protsep;
char *pathsep;
size_t newlen;
const char *useurl = relurl;
size_t urllen;
/* we must make our own copy of the URL to play with, as it may
point to read-only data */
char *url_clone=strdup(base);
if(!url_clone)
return NULL; /* skip out of this NOW */
/* protsep points to the start of the host name */
protsep=strstr(url_clone, "//");
if(!protsep)
protsep=url_clone;
else
protsep+=2; /* pass the slashes */
if('/' != relurl[0]) {
int level=0;
/* First we need to find out if there's a ?-letter in the URL,
and cut it and the right-side of that off */
pathsep = strchr(protsep, '?');
if(pathsep)
*pathsep=0;
/* we have a relative path to append to the last slash if there's one
available, or if the new URL is just a query string (starts with a
'?') we append the new one at the end of the entire currently worked
out URL */
if(useurl[0] != '?') {
pathsep = strrchr(protsep, '/');
if(pathsep)
*pathsep=0;
}
/* Check if there's any slash after the host name, and if so, remember
that position instead */
pathsep = strchr(protsep, '/');
if(pathsep)
protsep = pathsep+1;
else
protsep = NULL;
/* now deal with one "./" or any amount of "../" in the newurl
and act accordingly */
if((useurl[0] == '.') && (useurl[1] == '/'))
useurl+=2; /* just skip the "./" */
while((useurl[0] == '.') &&
(useurl[1] == '.') &&
(useurl[2] == '/')) {
level++;
useurl+=3; /* pass the "../" */
}
if(protsep) {
while(level--) {
/* cut off one more level from the right of the original URL */
pathsep = strrchr(protsep, '/');
if(pathsep)
*pathsep=0;
else {
*protsep=0;
break;
}
}
}
}
else {
/* We got a new absolute path for this server, cut off from the
first slash */
pathsep = strchr(protsep, '/');
if(pathsep) {
/* When people use badly formatted URLs, such as
"http://www.url.com?dir=/home/daniel" we must not use the first
slash, if there's a ?-letter before it! */
char *sep = strchr(protsep, '?');
if(sep && (sep < pathsep))
pathsep = sep;
*pathsep=0;
}
else {
/* There was no slash. Now, since we might be operating on a badly
formatted URL, such as "http://www.url.com?id=2380" which doesn't
use a slash separator as it is supposed to, we need to check for a
?-letter as well! */
pathsep = strchr(protsep, '?');
if(pathsep)
*pathsep=0;
}
}
/* If the new part contains a space, this is a mighty stupid redirect
but we still make an effort to do "right". To the left of a '?'
letter we replace each space with %20 while it is replaced with '+'
on the right side of the '?' letter.
*/
newlen = strlen_url(useurl);
urllen = strlen(url_clone);
newest = malloc( urllen + 1 + /* possible slash */
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
newlen + 1 /* zero byte */);
if(!newest) {
free(url_clone); /* don't leak this */
return NULL;
}
/* copy over the root url part */
memcpy(newest, url_clone, urllen);
/* check if we need to append a slash */
if(('/' == useurl[0]) || (protsep && !*protsep) || ('?' == useurl[0]))
;
else
newest[urllen++]='/';
/* then append the new piece on the right side */
strcpy_url(&newest[urllen], useurl);
free(url_clone);
return newest;
}
#endif /* CURL_DISABLE_HTTP */
/*
* Curl_follow() handles the URL redirect magic. Pass in the 'newurl' string
* as given by the remote server and set up the new URL to request.
*/
CURLcode Curl_follow(struct SessionHandle *data,
Daniel Stenberg
committed
char *newurl, /* this 'newurl' is the Location: string,
and it must be malloc()ed before passed
here */
Daniel Stenberg
committed
followtype type) /* see transfer.h */
{
#ifdef CURL_DISABLE_HTTP
(void)data;
(void)newurl;
(void)type;
/* Location: following will not happen when HTTP is disabled */
return CURLE_TOO_MANY_REDIRECTS;
#else
/* Location: redirect */
Daniel Stenberg
committed
bool disallowport = FALSE;
Daniel Stenberg
committed
if(type == FOLLOW_REDIR) {
Daniel Stenberg
committed
if((data->set.maxredirs != -1) &&
Daniel Stenberg
committed
(data->set.followlocation >= data->set.maxredirs)) {
failf(data,"Maximum (%d) redirects followed", data->set.maxredirs);
return CURLE_TOO_MANY_REDIRECTS;
}
Daniel Stenberg
committed
/* mark the next request as a followed location: */
data->state.this_is_a_follow = TRUE;
Daniel Stenberg
committed
data->set.followlocation++; /* count location-followers */
Daniel Stenberg
committed
if(data->set.http_auto_referer) {
/* We are asked to automatically set the previous URL as the referer
when we get the next URL. We pick the ->url field, which may or may
not be 100% correct */
Daniel Stenberg
committed
if(data->change.referer_alloc)
/* If we already have an allocated referer, free this first */
free(data->change.referer);
Daniel Stenberg
committed
data->change.referer = strdup(data->change.url);
if (!data->change.referer) {
data->change.referer_alloc = FALSE;
return CURLE_OUT_OF_MEMORY;
}
Daniel Stenberg
committed
data->change.referer_alloc = TRUE; /* yes, free this later */
}
}
if(!is_absolute_url(newurl)) {
/***
*DANG* this is an RFC 2068 violation. The URL is supposed
to be absolute and this doesn't seem to be that!
*/
char *absolute = concat_url(data->change.url, newurl);
if (!absolute)
return CURLE_OUT_OF_MEMORY;
free(newurl);
newurl = absolute;
}
/* This is an absolute URL, don't allow the custom port number */
Daniel Stenberg
committed
disallowport = TRUE;
if(strchr(newurl, ' ')) {
/* This new URL contains at least one space, this is a mighty stupid
redirect but we still make an effort to do "right". */
char *newest;
size_t newlen = strlen_url(newurl);
newest = malloc(newlen+1); /* get memory for this */
if (!newest)
return CURLE_OUT_OF_MEMORY;
strcpy_url(newest, newurl); /* create a space-free URL */
free(newurl); /* that was no good */
newurl = newest; /* use this instead now */
}
}
Daniel Stenberg
committed
if(type == FOLLOW_FAKE) {
/* we're only figuring out the new url if we would've followed locations
but now we're done so we can get out! */
data->info.wouldredirect = newurl;
return CURLE_OK;
}
if(disallowport)
data->state.allow_port = FALSE;
if(data->change.url_alloc)
free(data->change.url);
else
data->change.url_alloc = TRUE; /* the URL is allocated */
data->change.url = newurl;
newurl = NULL; /* don't free! */
infof(data, "Issue another request to this URL: '%s'\n", data->change.url);
/*
* We get here when the HTTP code is 300-399 (and 401). We need to perform
* differently based on exactly what return code there was.
* News from 7.10.6: we can also get here on a 401 or 407, in case we act on
* a HTTP (proxy-) authentication scheme other than Basic.
*/
switch(data->info.httpcode) {
/* 401 - Act on a WWW-Authenticate, we keep on moving and do the
Authorization: XXXX header in the HTTP request code snippet */
/* 407 - Act on a Proxy-Authenticate, we keep on moving and do the
Proxy-Authorization: XXXX header in the HTTP request code snippet */
/* 300 - Multiple Choices */
/* 306 - Not used */
/* 307 - Temporary Redirect */
default: /* for all above (and the unknown ones) */
/* Some codes are explicitly mentioned since I've checked RFC2616 and they
* seem to be OK to POST to.
*/
break;
case 301: /* Moved Permanently */
/* (quote from RFC2616, section 10.3.2):
* Note: When automatically redirecting a POST request after receiving a
* 301 status code, some existing HTTP/1.0 user agents will erroneously
* change it into a GET request.
*
* ----
*
* Warning: Because most of importants user agents do this obvious RFC2616
* violation, many webservers expect this misbehavior. So these servers
* often answers to a POST request with an error page. To be sure that
* libcurl gets the page that most user agents would get, libcurl has to
* force GET.
*
* This behaviour can be overridden with CURLOPT_POSTREDIR.
*/
if( (data->set.httpreq == HTTPREQ_POST
|| data->set.httpreq == HTTPREQ_POST_FORM)
&& !data->set.post301) {
infof(data,
"Violate RFC 2616/10.3.2 and switch from POST to GET\n");
data->set.httpreq = HTTPREQ_GET;
}
break;
case 302: /* Found */
/* (From 10.3.3)
Note: RFC 1945 and RFC 2068 specify that the client is not allowed
to change the method on the redirected request. However, most
existing user agent implementations treat 302 as if it were a 303
response, performing a GET on the Location field-value regardless
of the original request method. The status codes 303 and 307 have
been added for servers that wish to make unambiguously clear which
kind of reaction is expected of the client.
(From 10.3.4)
Note: Many pre-HTTP/1.1 user agents do not understand the 303
status. When interoperability with such clients is a concern, the
302 status code may be used instead, since most user agents react
to a 302 response as described here for 303.
This behaviour can be overriden with CURLOPT_POSTREDIR
*/
if( (data->set.httpreq == HTTPREQ_POST
|| data->set.httpreq == HTTPREQ_POST_FORM)
&& !data->set.post302) {
infof(data,
"Violate RFC 2616/10.3.3 and switch from POST to GET\n");
data->set.httpreq = HTTPREQ_GET;
}
break;
case 303: /* See Other */
/* Disable both types of POSTs, since doing a second POST when
* following isn't what anyone would want! */
if(data->set.httpreq != HTTPREQ_GET) {
data->set.httpreq = HTTPREQ_GET; /* enforce GET request */
infof(data, "Disables POST, goes with %s\n",
Daniel Stenberg
committed
data->set.opt_no_body?"HEAD":"GET");
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
}
break;
case 304: /* Not Modified */
/* 304 means we did a conditional request and it was "Not modified".
* We shouldn't get any Location: header in this response!
*/
break;
case 305: /* Use Proxy */
/* (quote from RFC2616, section 10.3.6):
* "The requested resource MUST be accessed through the proxy given
* by the Location field. The Location field gives the URI of the
* proxy. The recipient is expected to repeat this single request
* via the proxy. 305 responses MUST only be generated by origin
* servers."
*/
break;
}
Curl_pgrsTime(data, TIMER_REDIRECT);
Curl_pgrsResetTimes(data);
return CURLE_OK;
#endif /* CURL_DISABLE_HTTP */
}
static CURLcode
connect_host(struct SessionHandle *data,
struct connectdata **conn)
{
CURLcode res = CURLE_OK;
Daniel Stenberg
committed
bool async;
bool protocol_done=TRUE; /* will be TRUE always since this is only used
within the easy interface */
Daniel Stenberg
committed
Curl_pgrsTime(data, TIMER_STARTSINGLE);
res = Curl_connect(data, conn, &async, &protocol_done);
if((CURLE_OK == res) && async) {
/* Now, if async is TRUE here, we need to wait for the name
to resolve */
res = Curl_wait_for_resolv(*conn, NULL);
if(CURLE_OK == res)
/* Resolved, continue with the connection */
res = Curl_async_resolved(*conn, &protocol_done);
else
/* if we can't resolve, we kill this "connection" now */
(void)Curl_disconnect(*conn);
}
return res;
}
Daniel Stenberg
committed
/* Returns TRUE and sets '*url' if a request retry is wanted.
NOTE: that the *url is malloc()ed. */
bool Curl_retry_request(struct connectdata *conn,
char **url)
{
bool retry = FALSE;
struct SessionHandle *data = conn->data;
Daniel Stenberg
committed
/* if we're talking upload, we can't do the checks below, unless the protocol
is HTTP as when uploading over HTTP we will still get a response */
if(data->set.upload && !(conn->protocol&PROT_HTTP))
return retry;
Daniel Stenberg
committed
if((data->req.bytecount +
data->req.headerbytecount == 0) &&
Daniel Stenberg
committed
conn->bits.reuse &&
Daniel Stenberg
committed
!data->set.opt_no_body) {
Daniel Stenberg
committed
/* We got no data, we attempted to re-use a connection and yet we want a
"body". This might happen if the connection was left alive when we were
done using it before, but that was closed when we wanted to read from
it again. Bad luck. Retry the same request on a fresh connect! */
infof(conn->data, "Connection died, retrying a fresh connect\n");
*url = strdup(conn->data->change.url);
conn->bits.close = TRUE; /* close this connection */
conn->bits.retry = TRUE; /* mark this as a connection we're about
to retry. Marking it this way should
prevent i.e HTTP transfers to return
error just because nothing has been
transfered! */
retry = TRUE;
}
return retry;
}
/*
* Curl_perform() is the internal high-level function that gets called by the
* external curl_easy_perform() function. It inits, performs and cleans up a
* single file transfer.
*/
CURLcode Curl_perform(struct SessionHandle *data)
{
CURLcode res;
CURLcode res2;
struct connectdata *conn=NULL;
char *newurl = NULL; /* possibly a new URL to follow to! */
followtype follow = FOLLOW_NONE;
data->state.used_interface = Curl_if_easy;
res = Curl_pretransfer(data);
if(res)
return res;
/*
* It is important that there is NO 'return' from this function at any other
* place than falling down to the end of the function! This is because we
* have cleanup stuff that must be done before we get back, and that is only
* performed after this do-while loop.
*/
res = connect_host(data, &conn); /* primary connection */
if(res == CURLE_OK) {
bool do_done;
Daniel Stenberg
committed
if(data->set.connect_only) {
/* keep connection open for application to use the socket */
conn->bits.close = FALSE;
Daniel Stenberg
committed
res = Curl_done(&conn, CURLE_OK, FALSE);
Daniel Stenberg
committed
break;
}
res = Curl_do(&conn, &do_done);
if(res == CURLE_OK) {
res = Transfer(conn); /* now fetch that URL please */
if((res == CURLE_OK) || (res == CURLE_RECV_ERROR)) {
Daniel Stenberg
committed
bool retry = Curl_retry_request(conn, &newurl);
res = CURLE_OK;
Daniel Stenberg
committed
follow = FOLLOW_RETRY;
if (!newurl)
res = CURLE_OUT_OF_MEMORY;
}
else if (res == CURLE_OK) {
/*
* We must duplicate the new URL here as the connection data may
Daniel Stenberg
committed
* be free()ed in the Curl_done() function. We prefer the newurl
* one since that's used for redirects or just further requests
* for retries or multi-stage HTTP auth methods etc.
*/
Daniel Stenberg
committed
if(data->req.newurl) {
follow = FOLLOW_REDIR;
newurl = strdup(data->req.newurl);
if (!newurl)
res = CURLE_OUT_OF_MEMORY;
Daniel Stenberg
committed
}
else if(data->req.location) {
follow = FOLLOW_FAKE;
newurl = strdup(data->req.location);
if (!newurl)
res = CURLE_OUT_OF_MEMORY;
Daniel Stenberg
committed
}
}
/* in the above cases where 'newurl' gets assigned, we have a fresh
* allocated memory pointed to */
}
if(res != CURLE_OK) {
/* The transfer phase returned error, we mark the connection to get
* closed to prevent being re-used. This is because we can't
* possibly know if the connection is in a good shape or not now. */
conn->bits.close = TRUE;
Daniel Stenberg
committed
Daniel Stenberg
committed
if(CURL_SOCKET_BAD != conn->sock[SECONDARYSOCKET]) {
Daniel Stenberg
committed
/* if we failed anywhere, we must clean up the secondary socket if
it was used */
Daniel Stenberg
committed
sclose(conn->sock[SECONDARYSOCKET]);
Daniel Stenberg
committed
conn->sock[SECONDARYSOCKET] = CURL_SOCKET_BAD;
Daniel Stenberg
committed
}
}
Daniel Stenberg
committed
/* Always run Curl_done(), even if some of the previous calls
failed, but return the previous (original) error code */
Daniel Stenberg
committed
res2 = Curl_done(&conn, res, FALSE);
Daniel Stenberg
committed
if(CURLE_OK == res)
res = res2;
else if(conn)
/* Curl_do() failed, clean up left-overs in the done-call, but note
that at some cases the conn pointer is NULL when Curl_do() failed
and the connection cache is very small so only call Curl_done() if
conn is still "alive".
*/
Daniel Stenberg
committed
res2 = Curl_done(&conn, res, FALSE);
Daniel Stenberg
committed
/*
* Important: 'conn' cannot be used here, since it may have been closed
* in 'Curl_done' or other functions.
*/
Daniel Stenberg
committed
if((res == CURLE_OK) && follow) {
res = Curl_follow(data, newurl, follow);
if(CURLE_OK == res) {
Daniel Stenberg
committed
/* if things went fine, Curl_follow() freed or otherwise took
responsibility for the newurl pointer */
newurl = NULL;
Daniel Stenberg
committed
if(follow >= FOLLOW_RETRY) {
follow = FOLLOW_NONE;
continue;
}
/* else we break out of the loop below */
}
}
break; /* it only reaches here when this shouldn't loop */
} while(1); /* loop if Location: */
Daniel Stenberg
committed
if(newurl)
free(newurl);
Daniel Stenberg
committed
if(res && !data->state.errorbuf) {
/*
* As an extra precaution: if no error string has been set and there was
* an error, use the strerror() string or if things are so bad that not
* even that is good, set a bad string that mentions the error code.
*/
Daniel Stenberg
committed
if(!str)
failf(data, "unspecified error %d", (int)res);
else
failf(data, "%s", str);
}
/* run post-transfer unconditionally, but don't clobber the return code if
we already have an error code recorder */
res2 = Curl_posttransfer(data);
if(!res && res2)
res = res2;
return res;
}
* Curl_setup_transfer() is called to setup some basic properties for the
* upcoming transfer.
Curl_setup_transfer(
Daniel Stenberg
committed
struct connectdata *conn, /* connection data */
int sockindex, /* socket index to read from or -1 */
curl_off_t size, /* -1 if unknown at this point */
bool getheader, /* TRUE if header parsing is wanted */
curl_off_t *bytecountp, /* return number of bytes read or NULL */
int writesockindex, /* socket index to write to, it may very well be
the same we read from. -1 disables */
curl_off_t *writecountp /* return number of bytes written or NULL */
)
struct SessionHandle *data;
Daniel Stenberg
committed
struct SingleRequest *k;
Daniel Stenberg
committed
DEBUGASSERT(conn != NULL);
data = conn->data;
Daniel Stenberg
committed
k = &data->req;
DEBUGASSERT((sockindex <= 1) && (sockindex >= -1));
/* now copy all input parameters */
conn->sockfd = sockindex == -1 ?
CURL_SOCKET_BAD : conn->sock[sockindex];
conn->writesockfd = writesockindex == -1 ?
CURL_SOCKET_BAD:conn->sock[writesockindex];
Daniel Stenberg
committed
k->getheader = getheader;
Daniel Stenberg
committed
k->size = size;
k->bytecountp = bytecountp;
k->writebytecountp = writecountp;
Daniel Stenberg
committed
/* The code sequence below is placed in this function just because all
necessary input is not always known in do_complete() as this function may
be called after that */
Daniel Stenberg
committed
if(!k->getheader) {
Daniel Stenberg
committed
k->header = FALSE;
Daniel Stenberg
committed
if(size > 0)
Curl_pgrsSetDownloadSize(data, size);
Daniel Stenberg
committed
}
/* we want header and/or body, if neither then don't do this! */
Daniel Stenberg
committed
if(k->getheader || !data->set.opt_no_body) {
Daniel Stenberg
committed
if(conn->sockfd != CURL_SOCKET_BAD) {
k->keepon |= KEEP_READ;
}
if(conn->writesockfd != CURL_SOCKET_BAD) {
/* HTTP 1.1 magic:
Even if we require a 100-return code before uploading data, we might
need to write data before that since the REQUEST may not have been
finished sent off just yet.
Thus, we must check if the request has been sent before we set the
state info where we wait for the 100-return code
*/
if((data->state.expect100header) &&
(data->state.proto.http->sending == HTTPSEND_BODY)) {
Daniel Stenberg
committed
/* wait with write until we either got 100-continue or a timeout */
k->exp100 = EXP100_AWAITING_CONTINUE;
Daniel Stenberg
committed
k->start100 = k->start;
}
else {
if(data->state.expect100header)
/* when we've sent off the rest of the headers, we must await a
100-continue but first finish sending the request */
k->exp100 = EXP100_SENDING_REQUEST;
/* enable the write bit when we're not waiting for continue */
Daniel Stenberg
committed
k->keepon |= KEEP_WRITE;
}
} /* if(conn->writesockfd != CURL_SOCKET_BAD) */
} /* if(k->getheader || !data->set.opt_no_body) */
Daniel Stenberg
committed
return CURLE_OK;