Skip to content
url.c 181 KiB
Newer Older
    (needle->handler->protocol & PROTO_FAMILY_HTTP) ? TRUE : FALSE;
  *force_reuse = FALSE;

  /* We can't pipe if the site is blacklisted */
  if(canPipeline && Curl_pipeline_site_blacklisted(data, needle)) {
    canPipeline = FALSE;
  }

  /* Look up the bundle with all the connections to this
     particular host */
  bundle = Curl_conncache_find_bundle(data->state.conn_cache,
                                      needle->host.name);
  if(bundle) {
    size_t max_pipe_len = Curl_multi_max_pipeline_length(data->multi);
    size_t best_pipe_len = max_pipe_len;
    infof(data, "Found bundle for host %s: %p\n",
          needle->host.name, (void *)bundle);
    /* We can't pipe if we don't know anything about the server */
    if(canPipeline && !bundle->server_supports_pipelining) {
      infof(data, "Server doesn't support pipelining\n");
      canPipeline = FALSE;
    }

    curr = bundle->conn_list->head;
    while(curr) {
      bool match = FALSE;
      bool credentialsMatch = FALSE;
      size_t pipeLen;
      /*
       * Note that if we use a HTTP proxy, we check connections to that
       * proxy and not to the actual remote server.
       */
      check = curr->ptr;
      curr = curr->next;

      if(disconnect_if_dead(check, data))
        continue;
      pipeLen = check->send_pipe->size + check->recv_pipe->size;
      if(canPipeline) {
        /* Make sure the pipe has only GET requests */
        struct SessionHandle* sh = gethandleathead(check->send_pipe);
        struct SessionHandle* rh = gethandleathead(check->recv_pipe);
        if(sh) {
          if(!IsPipeliningPossible(sh, check))
            continue;
        }
        else if(rh) {
          if(!IsPipeliningPossible(rh, check))
            continue;
        }
      else {
        if(pipeLen > 0) {
          /* can only happen within multi handles, and means that another easy
             handle is using this connection */
        if(Curl_resolver_asynch()) {
          /* ip_addr_str[0] is NUL only if the resolving of the name hasn't
             completed yet and until then we don't re-use this connection */
          if(!check->ip_addr_str[0]) {
            infof(data,
                  "Connection #%ld is still name resolving, can't reuse\n",
                  check->connection_id);
            continue;
          }

        if((check->sock[FIRSTSOCKET] == CURL_SOCKET_BAD) ||
           check->bits.close) {
          /* Don't pick a connection that hasn't connected yet or that is going
             to get closed. */
          infof(data, "Connection #%ld isn't open enough, can't reuse\n",
                check->connection_id);
#ifdef DEBUGBUILD
          if(check->recv_pipe->size > 0) {
            infof(data,
                  "BAD! Unconnected #%ld has a non-empty recv pipeline!\n",
                  check->connection_id);
          }
      if((needle->handler->flags&PROTOPT_SSL) !=
         (check->handler->flags&PROTOPT_SSL))
        /* don't do mixed SSL and non-SSL connections */
        if(!(needle->handler->protocol & check->handler->protocol))
          /* except protocols that have been upgraded via TLS */
          continue;
      if(needle->handler->flags&PROTOPT_SSL) {
        if((data->set.ssl.verifypeer != check->verifypeer) ||
           (data->set.ssl.verifyhost != check->verifyhost))
          continue;
      }

      if(needle->bits.proxy != check->bits.proxy)
        /* don't do mixed proxy and non-proxy connections */
Yang Tse's avatar
 
Yang Tse committed
        continue;

      if(!canPipeline && check->inuse)
        /* this request can't be pipelined but the checked connection is
           already in use so we skip it */
      if(needle->localdev || needle->localport) {
        /* If we are bound to a specific local end (IP+port), we must not
           re-use a random other one, although if we didn't ask for a
           particular one we can reuse one that was bound.

           This comparison is a bit rough and too strict. Since the input
           parameters can be specified in numerous ways and still end up the
           same it would take a lot of processing to make it really accurate.
           Instead, this matching will assume that re-uses of bound connections
           will most likely also re-use the exact same binding parameters and
           missing out a few edge cases shouldn't hurt anyone very much.
        */
        if((check->localport != needle->localport) ||
           (check->localportrange != needle->localportrange) ||
           !check->localdev ||
           !needle->localdev ||
           strcmp(check->localdev, needle->localdev))
          continue;
      }

      if((!(needle->handler->flags & PROTOPT_CREDSPERREQUEST)) ||
         wantNTLMhttp) {
        /* This protocol requires credentials per connection or is HTTP+NTLM,
           so verify that we're using the same name and password as well */
        if(!strequal(needle->user, check->user) ||
           !strequal(needle->passwd, check->passwd)) {
          /* one of them was different */
          continue;
        }
        credentialsMatch = TRUE;
      if(!needle->bits.httpproxy || needle->handler->flags&PROTOPT_SSL ||
         (needle->bits.httpproxy && check->bits.httpproxy &&
          needle->bits.tunnel_proxy && check->bits.tunnel_proxy &&
          Curl_raw_equal(needle->proxy.name, check->proxy.name) &&
          (needle->port == check->port))) {
        /* The requested connection does not use a HTTP proxy or it uses SSL or
           it is a non-SSL protocol tunneled over the same http proxy name and
           port number or it is a non-SSL protocol which is allowed to be
           upgraded via TLS */

        if((Curl_raw_equal(needle->handler->scheme, check->handler->scheme) ||
            needle->handler->protocol & check->handler->protocol) &&
           Curl_raw_equal(needle->host.name, check->host.name) &&
           needle->remote_port == check->remote_port) {
          if(needle->handler->flags & PROTOPT_SSL) {
            /* This is a SSL connection so verify that we're using the same
               SSL options as well */
            if(!Curl_ssl_config_matches(&needle->ssl_config,
                                        &check->ssl_config)) {
              DEBUGF(infof(data,
                           "Connection #%ld has different SSL parameters, "
                           "can't reuse\n",
                           check->connection_id));
              continue;
            }
            else if(check->ssl[FIRSTSOCKET].state != ssl_connection_complete) {
              DEBUGF(infof(data,
                           "Connection #%ld has not started SSL connect, "
                           "can't reuse\n",
                           check->connection_id));
              continue;
            }
Daniel Stenberg's avatar
Daniel Stenberg committed
      }
      else { /* The requested needle connection is using a proxy,
                is the checked one using the same host, port and type? */
        if(check->bits.proxy &&
           (needle->proxytype == check->proxytype) &&
           (needle->bits.tunnel_proxy == check->bits.tunnel_proxy) &&
           Curl_raw_equal(needle->proxy.name, check->proxy.name) &&
           needle->port == check->port) {
          /* This is the same proxy connection, use it! */
          match = TRUE;
        }
        /* If we are looking for an HTTP+NTLM connection, check if this is
           already authenticating with the right credentials. If not, keep
           looking so that we can reuse NTLM connections if
           possible. (Especially we must not reuse the same connection if
           partway through a handshake!) */
        if(wantNTLMhttp) {
          if(credentialsMatch && check->ntlm.state != NTLMSTATE_NONE) {
            chosen = check;

            /* We must use this connection, no other */
            *force_reuse = TRUE;
            break;
          }
          else if(credentialsMatch)
            /* this is a backup choice */
            chosen = check;
          continue;
        if(canPipeline) {
          /* We can pipeline if we want to. Let's continue looking for
             the optimal connection to use, i.e the shortest pipe that is not
             blacklisted. */
          if(pipeLen == 0) {
            /* We have the optimal connection. Let's stop looking. */
            chosen = check;
            break;
          }

          /* We can't use the connection if the pipe is full */
          if(pipeLen >= max_pipe_len)
            continue;

          /* We can't use the connection if the pipe is penalized */
          if(Curl_pipeline_penalized(data, check))
            continue;

          if(pipeLen < best_pipe_len) {
            /* This connection has a shorter pipe so far. We'll pick this
               and continue searching */
            chosen = check;
            best_pipe_len = pipeLen;
            continue;
          }
        }
        else {
          /* We have found a connection. Let's stop searching. */
          chosen = check;
  if(chosen) {
    *usethis = chosen;
    return TRUE; /* yes, we found one to use! */
  }

  return FALSE; /* no matching connecting exists */
}

/* Mark the connection as 'idle', or close it if the cache is full.
   Returns TRUE if the connection is kept, or FALSE if it was closed. */
static bool
ConnectionDone(struct SessionHandle *data, struct connectdata *conn)
  /* data->multi->maxconnects can be negative, deal with it. */
  size_t maxconnects =
    (data->multi->maxconnects < 0) ? data->multi->num_easy * 4:
    data->multi->maxconnects;
  struct connectdata *conn_candidate = NULL;

  /* Mark the current connection as 'unused' */

  if(maxconnects > 0 &&
     data->state.conn_cache->num_connections > maxconnects) {
    infof(data, "Connection cache is full, closing the oldest one.\n");

    conn_candidate = find_oldest_idle_connection(data);

    if(conn_candidate) {
      /* Set the connection's owner correctly */
      conn_candidate->data = data;

      /* the winner gets the honour of being disconnected */
      (void)Curl_disconnect(conn_candidate, /* dead_connection */ FALSE);
    }
  }

  return (conn_candidate == conn) ? FALSE : TRUE;
Daniel Stenberg's avatar
Daniel Stenberg committed
 * The given input connection struct pointer is to be stored in the connection
 * cache. If the cache is already full, least interesting existing connection
 * (if any) gets closed.
 *
 * The given connection should be unique. That must've been checked prior to
 * this call.
 */
static CURLcode ConnectionStore(struct SessionHandle *data,
                                struct connectdata *conn)
  return Curl_conncache_add_conn(data->state.conn_cache, conn);
/* after a TCP connection to the proxy has been verified, this function does
   the next magic step.

   Note: this function's sub-functions call failf()
CURLcode Curl_connected_proxy(struct connectdata *conn,
                              int sockindex)
  if(!conn->bits.proxy || sockindex)
    /* this magic only works for the primary socket as the secondary is used
       for FTP only and it has FTP specific magic in ftp.c */
    return CURLE_OK;

  switch(conn->proxytype) {
#ifndef CURL_DISABLE_PROXY
  case CURLPROXY_SOCKS5:
  case CURLPROXY_SOCKS5_HOSTNAME:
    return Curl_SOCKS5(conn->proxyuser, conn->proxypasswd,
                       conn->host.name, conn->remote_port,
                       FIRSTSOCKET, conn);

    return Curl_SOCKS4(conn->proxyuser, conn->host.name,
                       conn->remote_port, FIRSTSOCKET, conn, FALSE);

  case CURLPROXY_SOCKS4A:
    return Curl_SOCKS4(conn->proxyuser, conn->host.name,
                       conn->remote_port, FIRSTSOCKET, conn, TRUE);

#endif /* CURL_DISABLE_PROXY */
  case CURLPROXY_HTTP:
  case CURLPROXY_HTTP_1_0:
    /* do nothing here. handled later. */
    break;
  default:
 * verboseconnect() displays verbose information after a connect
Yang Tse's avatar
Yang Tse committed
#ifndef CURL_DISABLE_VERBOSE_STRINGS
void Curl_verboseconnect(struct connectdata *conn)
  if(conn->data->set.verbose)
    infof(conn->data, "Connected to %s (%s) port %ld (#%ld)\n",
          conn->bits.proxy ? conn->proxy.dispname : conn->host.dispname,
          conn->ip_addr_str, conn->port, conn->connection_id);
Yang Tse's avatar
Yang Tse committed
#endif
int Curl_protocol_getsock(struct connectdata *conn,
                          curl_socket_t *socks,
                          int numsocks)
  if(conn->handler->proto_getsock)
    return conn->handler->proto_getsock(conn, socks, numsocks);
int Curl_doing_getsock(struct connectdata *conn,
                       curl_socket_t *socks,
                       int numsocks)
  if(conn && conn->handler->doing_getsock)
    return conn->handler->doing_getsock(conn, socks, numsocks);
}

/*
 * We are doing protocol-specific connecting and this is being called over and
 * over from the multi interface until the connection phase is done on
 * protocol layer.
 */

CURLcode Curl_protocol_connecting(struct connectdata *conn,
                                  bool *done)
  if(conn && conn->handler->connecting) {
    result = conn->handler->connecting(conn, done);
  }
  else
    *done = TRUE;

  return result;
}

/*
 * We are DOING this is being called over and over from the multi interface
 * until the DOING phase is done on protocol layer.
 */

CURLcode Curl_protocol_doing(struct connectdata *conn, bool *done)
{
  CURLcode result=CURLE_OK;

    result = conn->handler->doing(conn, done);
/*
 * We have discovered that the TCP connection has been successful, we can now
 * proceed with some action.
 *
 */
CURLcode Curl_protocol_connect(struct connectdata *conn,
                               bool *protocol_done)
  if(conn->bits.tcpconnect[FIRSTSOCKET] && conn->bits.protoconnstart) {
    /* We already are connected, get back. This may happen when the connect
       worked fine in the first call, like when we connect to a local server
       or proxy. Note that we don't know if the protocol is actually done.

       Unless this protocol doesn't have any protocol-connect callback, as
       then we know we're done. */
    result = Curl_proxy_connect(conn);
    if(result)
      return result;
    if(conn->bits.tunnel_proxy && conn->bits.httpproxy &&
       (conn->tunnel_state[FIRSTSOCKET] != TUNNEL_COMPLETE))
      /* when using an HTTP tunnel proxy, await complete tunnel establishment
         before proceeding further. Return CURLE_OK so we'll be called again */
      return CURLE_OK;

      /* is there a protocol-specific connect() procedure? */
      /* Call the protocol-specific connect function */
      result = conn->handler->connect_it(conn, protocol_done);
    }
    else
      *protocol_done = TRUE;

    /* it has started, possibly even completed but that knowledge isn't stored
       in this bit! */
static bool is_ASCII_name(const char *hostname)
{
  const unsigned char *ch = (const unsigned char*)hostname;

Gisle Vanem's avatar
 
Gisle Vanem committed

#ifdef USE_LIBIDN
Gisle Vanem's avatar
 
Gisle Vanem committed
/*
 * Check if characters in hostname is allowed in Top Level Domain.
 */
static bool tld_check_name(struct SessionHandle *data,
                           const char *ace_hostname)
Gisle Vanem's avatar
 
Gisle Vanem committed
{
  size_t err_pos;
  char *uc_name = NULL;
  int rc;
Yang Tse's avatar
Yang Tse committed
#ifndef CURL_DISABLE_VERBOSE_STRINGS
  const char *tld_errmsg = "<no msg>";
Yang Tse's avatar
Yang Tse committed
#else
  (void)data;
#endif
Gisle Vanem's avatar
 
Gisle Vanem committed

  /* Convert (and downcase) ACE-name back into locale's character set */
  rc = idna_to_unicode_lzlz(ace_hostname, &uc_name, 0);
Yang Tse's avatar
Yang Tse committed
    return FALSE;
Gisle Vanem's avatar
 
Gisle Vanem committed

  rc = tld_check_lz(uc_name, &err_pos, NULL);
Yang Tse's avatar
Yang Tse committed
#ifndef CURL_DISABLE_VERBOSE_STRINGS
    tld_errmsg = tld_strerror((Tld_rc)rc);
Yang Tse's avatar
Yang Tse committed
    infof(data, "WARNING: %s; pos %u = `%c'/0x%02X\n",
          tld_errmsg, err_pos, uc_name[err_pos],
          uc_name[err_pos] & 255);
Yang Tse's avatar
Yang Tse committed
    infof(data, "WARNING: TLD check for %s failed; %s\n",
          uc_name, tld_errmsg);
#endif /* CURL_DISABLE_VERBOSE_STRINGS */
Gisle Vanem's avatar
 
Gisle Vanem committed
     idn_free(uc_name);
Yang Tse's avatar
Yang Tse committed
  if(rc != TLD_SUCCESS)
    return FALSE;

  return TRUE;
Gisle Vanem's avatar
 
Gisle Vanem committed
}
/*
 * Perform any necessary IDN conversion of hostname
 */
static void fix_hostname(struct SessionHandle *data,
                         struct connectdata *conn, struct hostname *host)
Yang Tse's avatar
Yang Tse committed
#ifndef USE_LIBIDN
  (void)data;
  (void)conn;
#elif defined(CURL_DISABLE_VERBOSE_STRINGS)
  (void)conn;
#endif

  /* set the name we use to display the host name */
Daniel Stenberg's avatar
Daniel Stenberg committed
  host->dispname = host->name;

  len = strlen(host->name);
  if(host->name[len-1] == '.')
    /* strip off a single trailing dot if present, primarily for SNI but
       there's no use for it */
    host->name[len-1]=0;

  if(!is_ASCII_name(host->name)) {
#ifdef USE_LIBIDN
  /*************************************************************
   * Check name for non-ASCII and convert hostname to ACE form.
   *************************************************************/
  if(stringprep_check_version(LIBIDN_REQUIRED_VERSION)) {
    char *ace_hostname = NULL;
    int rc = idna_to_ascii_lz(host->name, &ace_hostname, 0);
    infof (data, "Input domain encoded as `%s'\n",
           stringprep_locale_charset ());
      infof(data, "Failed to convert %s to ACE; %s\n",
            host->name, Curl_idn_strerror(conn,rc));
      /* tld_check_name() displays a warning if the host name contains
         "illegal" characters for this TLD */
      (void)tld_check_name(data, ace_hostname);
Gisle Vanem's avatar
 
Gisle Vanem committed

      host->encalloc = ace_hostname;
      /* change the name pointer to point to the encoded hostname */
      host->name = host->encalloc;
    }
  }
#elif defined(USE_WIN32_IDN)
  /*************************************************************
   * Check name for non-ASCII and convert hostname to ACE form.
   *************************************************************/
    char *ace_hostname = NULL;
Guenter Knauf's avatar
Guenter Knauf committed
    int rc = curl_win32_idn_to_ascii(host->name, &ace_hostname);
    if(rc == 0)
      infof(data, "Failed to convert %s to ACE;\n",
            host->name);
    else {
      host->encalloc = ace_hostname;
      /* change the name pointer to point to the encoded hostname */
      host->name = host->encalloc;
    }
#else
    infof(data, "IDN support not present, can't parse Unicode domains\n");
Gisle Vanem's avatar
Gisle Vanem committed
#endif
static void llist_dtor(void *user, void *element)
{
  (void)user;
  (void)element;
  /* Do nothing */
}

/*
 * Allocate and initialize a new connectdata object.
 */
static struct connectdata *allocate_conn(struct SessionHandle *data)
  struct connectdata *conn = calloc(1, sizeof(struct connectdata));
  if(!conn)
    return NULL;

  conn->handler = &Curl_handler_dummy;  /* Be sure we have a handler defined
                                           already from start to avoid NULL
                                           situations and checks */

  /* and we setup a few fields in case we end up actually using this struct */

  conn->sock[FIRSTSOCKET] = CURL_SOCKET_BAD;     /* no file descriptor */
  conn->sock[SECONDARYSOCKET] = CURL_SOCKET_BAD; /* no file descriptor */
  conn->tempsock[0] = CURL_SOCKET_BAD; /* no file descriptor */
  conn->tempsock[1] = CURL_SOCKET_BAD; /* no file descriptor */
  conn->connection_id = -1;    /* no ID */
  conn->port = -1; /* unknown at this point */
  conn->remote_port = -1; /* unknown */

  /* Default protocol-independent behavior doesn't support persistent
     connections, so we set this to force-close. Protocols that support
     this need to set this to FALSE in their "curl_do" functions. */
  connclose(conn, "Default to force-close");

  /* Store creation time to help future close decision making */
  conn->created = Curl_tvnow();

  conn->data = data; /* Setup the association between this connection
                        and the SessionHandle */

  conn->proxytype = data->set.proxytype; /* type */

#ifdef CURL_DISABLE_PROXY

  conn->bits.proxy = FALSE;
  conn->bits.httpproxy = FALSE;
  conn->bits.proxy_user_passwd = FALSE;
  conn->bits.tunnel_proxy = FALSE;

#else /* CURL_DISABLE_PROXY */

  /* note that these two proxy bits are now just on what looks to be
     requested, they may be altered down the road */
  conn->bits.proxy = (data->set.str[STRING_PROXY] &&
                      *data->set.str[STRING_PROXY])?TRUE:FALSE;
  conn->bits.httpproxy = (conn->bits.proxy &&
                          (conn->proxytype == CURLPROXY_HTTP ||
                           conn->proxytype == CURLPROXY_HTTP_1_0))?TRUE:FALSE;
  conn->bits.proxy_user_passwd =
    (NULL != data->set.str[STRING_PROXYUSERNAME])?TRUE:FALSE;
  conn->bits.tunnel_proxy = data->set.tunnel_thru_httpproxy;

#endif /* CURL_DISABLE_PROXY */

  conn->bits.user_passwd = (NULL != data->set.str[STRING_USERNAME])?TRUE:FALSE;
  conn->bits.ftp_use_epsv = data->set.ftp_use_epsv;
  conn->bits.ftp_use_eprt = data->set.ftp_use_eprt;

  conn->verifypeer = data->set.ssl.verifypeer;
  conn->verifyhost = data->set.ssl.verifyhost;

  conn->ip_version = data->set.ipver;

#if defined(USE_NTLM) && defined(NTLM_WB_ENABLED)
  conn->ntlm_auth_hlpr_socket = CURL_SOCKET_BAD;
  conn->ntlm_auth_hlpr_pid = 0;
  conn->challenge_header = NULL;
  conn->response_header = NULL;
#endif

  if(Curl_multi_pipeline_enabled(data->multi) &&
      !conn->master_buffer) {
    /* Allocate master_buffer to be used for pipelining */
    conn->master_buffer = calloc(BUFSIZE, sizeof (char));
    if(!conn->master_buffer)
      goto error;
  }

  /* Initialize the pipeline lists */
  conn->send_pipe = Curl_llist_alloc((curl_llist_dtor) llist_dtor);
  conn->recv_pipe = Curl_llist_alloc((curl_llist_dtor) llist_dtor);
  if(!conn->send_pipe || !conn->recv_pipe)
#ifdef HAVE_GSSAPI
  conn->data_prot = PROT_CLEAR;
  /* Store the local bind parameters that will be used for this connection */
  if(data->set.str[STRING_DEVICE]) {
    conn->localdev = strdup(data->set.str[STRING_DEVICE]);
    if(!conn->localdev)
      goto error;
  }
  conn->localportrange = data->set.localportrange;
  conn->localport = data->set.localport;

  /* the close socket stuff needs to be copied to the connection struct as
     it may live on without (this specific) SessionHandle */
  conn->fclosesocket = data->set.fclosesocket;
  conn->closesocket_client = data->set.closesocket_client;

  Curl_llist_destroy(conn->send_pipe, NULL);
  Curl_llist_destroy(conn->recv_pipe, NULL);

  conn->send_pipe = NULL;
  conn->recv_pipe = NULL;

  Curl_safefree(conn->master_buffer);
  Curl_safefree(conn->localdev);
  Curl_safefree(conn);
  return NULL;
static CURLcode findprotocol(struct SessionHandle *data,
                             struct connectdata *conn,
                             const char *protostr)
{
  const struct Curl_handler * const *pp;
  const struct Curl_handler *p;

  /* Scan protocol handler table and match against 'protostr' to set a few
     variables based on the URL. Now that the handler may be changed later
     when the protocol specific setup function is called. */
  for(pp = protocols; (p = *pp) != NULL; pp++) {
    if(Curl_raw_equal(p->scheme, protostr)) {
      /* Protocol found in table. Check if allowed */
      if(!(data->set.allowed_protocols & p->protocol))
        /* nope, get out */
        break;

      /* it is allowed for "normal" request, now do an extra check if this is
         the result of a redirect */
      if(data->state.this_is_a_follow &&
         !(data->set.redir_protocols & p->protocol))
        /* nope, get out */
        break;

      /* Perform setup complement if some. */
      conn->handler = conn->given = p;

      /* 'port' and 'remote_port' are set in setup_connection_internals() */
      return CURLE_OK;
    }
  }


  /* The protocol was not found in the table, but we don't have to assign it
     to anything since it is already assigned to a dummy-struct in the
     create_conn() function when the connectdata struct is allocated. */
  failf(data, "Protocol \"%s\" not supported or disabled in " LIBCURL_NAME,
/*
 * Parse URL and fill in the relevant members of the connection struct.
static CURLcode parseurlandfillconn(struct SessionHandle *data,
                                    char **userp, char **passwdp,
                                    char **optionsp)
  bool rebuild_url = FALSE;
  *prot_missing = FALSE;
Daniel Stenberg's avatar
Daniel Stenberg committed
  /*************************************************************
   * Parse the URL.
   *
   * We need to parse the url even when using the proxy, because we will need
   * the hostname and port in case we are trying to SSL connect through the
   * proxy -- and we don't know if we will need to use SSL until we parse the
   * url ...
   ************************************************************/
  if((2 == sscanf(data->change.url, "%15[^:]:%[^\n]",
                  protobuf, path)) &&
     Curl_raw_equal(protobuf, "file")) {
    if(path[0] == '/' && path[1] == '/') {
      /* Allow omitted hostname (e.g. file:/<path>).  This is not strictly
       * speaking a valid file: URL by RFC 1738, but treating file:/<path> as
       * file://localhost/<path> is similar to how other schemes treat missing
       * hostnames.  See RFC 1808. */

      /* This cannot be done with strcpy() in a portable manner, since the
         memory areas overlap! */
      memmove(path, path + 2, strlen(path + 2)+1);
Daniel Stenberg's avatar
Daniel Stenberg committed
    /*
     * we deal with file://<host>/<path> differently since it supports no
     * hostname other than "localhost" and "127.0.0.1", which is unique among
     * the URL protocols specified in RFC 1738
     */
Daniel Stenberg's avatar
Daniel Stenberg committed
      /* the URL included a host name, we ignore host names in file:// URLs
         as the standards don't define what to do with them */
Daniel Stenberg's avatar
Daniel Stenberg committed
      if(ptr) {
        /* there was a slash present
Daniel Stenberg's avatar
Daniel Stenberg committed
           RFC1738 (section 3.1, page 5) says:
Daniel Stenberg's avatar
Daniel Stenberg committed
           The rest of the locator consists of data specific to the scheme,
           and is known as the "url-path". It supplies the details of how the
           specified resource can be accessed. Note that the "/" between the
           host (or port) and the url-path is NOT part of the url-path.
Daniel Stenberg's avatar
Daniel Stenberg committed
           As most agents use file://localhost/foo to get '/foo' although the
           slash preceding foo is a separator and not a slash for the path,
Daniel Stenberg's avatar
Daniel Stenberg committed
           a URL as file://localhost//foo must be valid as well, to refer to
           the same file with an absolute path.
        */

        if(ptr[1] && ('/' == ptr[1]))
          /* if there was two slashes, we skip the first one as that is then
             used truly as a separator */
        /* This cannot be made with strcpy, as the memory chunks overlap! */
Daniel Stenberg's avatar
Daniel Stenberg committed
  }
                   "%15[^\n:]://%[^\n/?]%[^\n]",
Daniel Stenberg's avatar
Daniel Stenberg committed
      /*
       * The URL was badly formatted, let's try the browser-style _without_
       * protocol specified like 'http://'.
       */
      rc = sscanf(data->change.url, "%[^\n/?]%[^\n]", conn->host.name, path);
Yang Tse's avatar
 
Yang Tse committed
      if(1 > rc) {
Daniel Stenberg's avatar
Daniel Stenberg committed
        /*
         * We couldn't even get this format.
         * djgpp 2.04 has a sscanf() bug where 'conn->host.name' is
         * assigned, but the return value is EOF!
#if defined(__DJGPP__) && (DJGPP_MINOR == 4)
        if(!(rc == -1 && *conn->host.name))
#endif
        {
          failf(data, "<url> malformed");
          return CURLE_URL_MALFORMAT;
        }
Daniel Stenberg's avatar
Daniel Stenberg committed

      /*
       * Since there was no protocol part specified, we guess what protocol it
       * is based on the first letters of the server name.
       */

      /* Note: if you add a new protocol, please update the list in
       * lib/version.c too! */
      if(checkprefix("FTP.", conn->host.name))
      else if(checkprefix("DICT.", conn->host.name))
      else if(checkprefix("LDAP.", conn->host.name))
      else if(checkprefix("IMAP.", conn->host.name))
      else if(checkprefix("SMTP.", conn->host.name))
        protop = "smtp";
      else if(checkprefix("POP3.", conn->host.name))
        protop = "pop3";
      *prot_missing = TRUE; /* not given in URL */
  /* We search for '?' in the host name (but only on the right side of a
   * @-letter to allow ?-letters in username and password) to handle things
   * like http://example.com?param= (notice the missing '/').
   */
  at = strchr(conn->host.name, '@');
  if(at)
    /* We must insert a slash before the '?'-letter in the URL. If the URL had
       a slash after the '?', that is where the path currently begins and the
       '?string' is still part of the host name.

       We must move the trailing part from the host name and put it first in
       the path. And have it all prefixed with a slash.
    */


    /* move the existing path plus the zero byte forward, to make room for
       the host-name part */
    memmove(path+hostlen+1, path, pathlen+1);

     /* now copy the trailing host part in front of the existing path */
    path[0]='/'; /* prepend the missing slash */
    rebuild_url = TRUE;
    /* if there's no path set, use a single slash */
    rebuild_url = TRUE;
  /* If the URL is malformatted (missing a '/' after hostname before path) we
   * insert a slash here. The only letter except '/' we accept to start a path
   * is '?'.
   */
    /* We need this function to deal with overlapping memory areas. We know
       that the memory area 'path' points to is 'urllen' bytes big and that
       is bigger than the path. Use +1 to move the zero byte too. */
    memmove(&path[1], path, strlen(path)+1);
    path[0] = '/';
    rebuild_url = TRUE;
  }
  else {
    /* sanitise paths and remove ../ and ./ sequences according to RFC3986 */
    char *newp = Curl_dedotdotify(path);
Yang Tse's avatar
Yang Tse committed
    if(!newp)
      return CURLE_OUT_OF_MEMORY;

    if(strcmp(newp, path)) {
      rebuild_url = TRUE;
      free(data->state.pathbuffer);
      data->state.pathbuffer = newp;
      data->state.path = newp;
      path = newp;
    }
    else
      free(newp);
   * "rebuild_url" means that one or more URL components have been modified so
   * we need to generate an updated full version.  We need the corrected URL
   * when communicating over HTTP proxy and we don't know at this point if
   * we're using a proxy or not.
  if(rebuild_url) {
    char *reurl;

    size_t plen = strlen(path); /* new path, should be 1 byte longer than
                                   the original */
    size_t urllen = strlen(data->change.url); /* original URL length */

    size_t prefixlen = strlen(conn->host.name);

    if(!*prot_missing)
      prefixlen += strlen(protop) + strlen("://");

    reurl = malloc(urllen + 2); /* 2 for zerobyte + slash */
    if(!reurl)
      return CURLE_OUT_OF_MEMORY;

    /* copy the prefix */
    memcpy(reurl, data->change.url, prefixlen);