url.c


/*
 * We have discovered that the TCP connection has been successful, we can now
 * proceed with some action.
 *
 * If we're using the multi interface, this host address pointer is most
 * likely NULL at this point as we can't keep the resolved info around. This
 * may call for some reworking, like a reference counter in the struct or
 * something.
 */
CURLcode Curl_protocol_connect(struct connectdata *conn)
{
  struct SessionHandle *data = conn->data;
  CURLcode result=CURLE_OK;

  if(conn->bits.tcpconnect)
    /* We already are connected, get back. This may happen when the connect
       worked fine in the first call, like when we connect to a local server
       or proxy. */
    return CURLE_OK;

  Curl_pgrsTime(data, TIMER_CONNECT); /* connect done */

  if(data->set.verbose)
    verboseconnect(conn);

  if(conn->curl_connect) {
    /* is there a protocol-specific connect() procedure? */

    /* set start time here for timeout purposes in the
     * connect procedure, it is later set again for the
     * progress meter purpose */
    conn->now = Curl_tvnow();

    /* Call the protocol-specific connect function */
    result = conn->curl_connect(conn);
  }

  return result; /* pass back status */
}

/*
 * Helpers for IDNA convertions.
 */
#ifdef USE_LIBIDN
static bool is_ASCII_name (const char *hostname)
{
  const unsigned char *ch = (const unsigned char*)hostname;

  while (*ch) {
    if (*ch++ & 0x80)
      return FALSE;
  }
  return TRUE;
}

/*
 * Check if characters in hostname is allowed in Top Level Domain.
 */
static bool tld_check_name (struct SessionHandle *data,
                            const char *ace_hostname)
{
  size_t err_pos;
  char *uc_name = NULL;
  int rc;

  /* Convert (and downcase) ACE-name back into locale's character set */
  rc = idna_to_unicode_lzlz(ace_hostname, &uc_name, 0);
  if (rc != IDNA_SUCCESS)
    return (FALSE);

  rc = tld_check_lz(uc_name, &err_pos, NULL);
  if (rc == TLD_INVALID)
     infof(data, "WARNING: %s; pos %u = `%c'/0x%02X\n",
#ifdef HAVE_TLD_STRERROR
           tld_strerror(rc),
#else
           "<no msg>",
#endif
           err_pos, uc_name[err_pos],
           uc_name[err_pos] & 255);
  else if (rc != TLD_SUCCESS)
       infof(data, "WARNING: TLD check for %s failed; %s\n",
             uc_name,
#ifdef HAVE_TLD_STRERROR
             tld_strerror(rc)
#else
             "<no msg>"
#endif
         );
  if (uc_name)
     idn_free(uc_name);
  return (rc == TLD_SUCCESS);
}
#endif

static void fix_hostname(struct connectdata *conn, struct hostname *host)
{
  /* set the name we use to display the host name */
  host->dispname = host->name;

#ifdef USE_LIBIDN
  /*************************************************************
   * Check name for non-ASCII and convert hostname to ACE form.
   *************************************************************/
  if (!is_ASCII_name(host->name) &&
      stringprep_check_version(LIBIDN_REQUIRED_VERSION)) {
    char *ace_hostname = NULL;
    struct SessionHandle *data = conn->data;
    int rc = idna_to_ascii_lz(host->name, &ace_hostname, 0);
    infof (data, "Input domain encoded as `%s'\n",
           stringprep_locale_charset ());
    if (rc != IDNA_SUCCESS)
      infof(data, "Failed to convert %s to ACE; %s\n",
            host->name, Curl_idn_strerror(conn,rc));
    else {
      tld_check_name(data, ace_hostname);

      host->encalloc = ace_hostname;
      /* change the name pointer to point to the encoded hostname */
      host->name = host->encalloc;
    }
  }
#else
  (void)conn; /* never used */
#endif
}


/**
 * CreateConnection() sets up a new connectdata struct, or re-uses an already
 * existing one, and resolves host name.
 *
 * if this function returns CURLE_OK and *async is set to TRUE, the resolve
 * response will be coming asynchronously. If *async is FALSE, the name is
 * already resolved.
 *
 * @param data The sessionhandle pointer
 * @param in_connect is set to the next connection data pointer
 * @param addr is set to the new dns entry for this connection
 * @param async is set TRUE/FALSE depending on the nature of this lookup
 * @return CURLcode
 * @see SetupConnection()
 */

static CURLcode CreateConnection(struct SessionHandle *data,
                                 struct connectdata **in_connect,
                                 struct Curl_dns_entry **addr,
                                 bool *async)
{
  char *tmp;
  char *at;
  CURLcode result=CURLE_OK;
  struct connectdata *conn;
  struct connectdata *conn_temp;
  size_t urllen;
  struct Curl_dns_entry *hostaddr;
#if defined(HAVE_ALARM) && !defined(USE_ARES)
  unsigned int prev_alarm=0;
#endif
  char endbracket;
  char user[MAX_CURL_USER_LENGTH];
  char passwd[MAX_CURL_PASSWORD_LENGTH];
  int rc;
  bool reuse;

#ifndef USE_ARES
#ifdef SIGALRM
#ifdef HAVE_SIGACTION
  struct sigaction keep_sigact;   /* store the old struct here */
  bool keep_copysig=FALSE;        /* did copy it? */
#else
#ifdef HAVE_SIGNAL
  void *keep_sigact;              /* store the old handler here */
#endif /* HAVE_SIGNAL */
#endif /* HAVE_SIGACTION */
#endif /* SIGALRM */
#endif /* USE_ARES */

  *addr = NULL; /* nothing yet */
  *async = FALSE;

  /*************************************************************
   * Check input data
   *************************************************************/

  if(!data->change.url)
    return CURLE_URL_MALFORMAT;

  /* First, split up the current URL in parts so that we can use the
     parts for checking against the already present connections. In order
     to not have to modify everything at once, we allocate a temporary
     connection data struct and fill in for comparison purposes. */

  conn = (struct connectdata *)malloc(sizeof(struct connectdata));
  if(!conn) {
    *in_connect = NULL; /* clear the pointer */
    return CURLE_OUT_OF_MEMORY;
  }
  /* We must set the return variable as soon as possible, so that our
     parent can cleanup any possible allocs we may have done before
     any failure */
  *in_connect = conn;

  /* we have to init the struct */
  memset(conn, 0, sizeof(struct connectdata));

  /* and we setup a few fields in case we end up actually using this struct */
  conn->data = data;           /* remember our daddy */
  conn->sock[FIRSTSOCKET] = CURL_SOCKET_BAD;     /* no file descriptor */
  conn->sock[SECONDARYSOCKET] = CURL_SOCKET_BAD; /* no file descriptor */
  conn->connectindex = -1;    /* no index */
  conn->bits.httpproxy = (data->change.proxy && *data->change.proxy &&
                          (data->set.proxytype == CURLPROXY_HTTP))?
    TRUE:FALSE; /* http proxy or not */

  /* Default protocol-independent behavior doesn't support persistant
     connections, so we set this to force-close. Protocols that support
     this need to set this to FALSE in their "curl_do" functions. */
  conn->bits.close = TRUE;

  /* maxdownload must be -1 on init, as 0 is a valid value! */
  conn->maxdownload = -1;  /* might have been used previously! */

  /* Store creation time to help future close decision making */
  conn->created = Curl_tvnow();

  conn->bits.use_range = data->set.set_range?TRUE:FALSE; /* range status */
  conn->range = data->set.set_range;              /* clone the range setting */
  conn->resume_from = data->set.set_resume_from;   /* inherite resume_from */

  conn->bits.user_passwd = data->set.userpwd?1:0;
  conn->bits.proxy_user_passwd = data->set.proxyuserpwd?1:0;
  conn->bits.no_body = data->set.opt_no_body;
  conn->bits.tunnel_proxy = data->set.tunnel_thru_httpproxy;
  conn->bits.ftp_use_epsv = data->set.ftp_use_epsv;
  conn->bits.ftp_use_eprt = data->set.ftp_use_eprt;
  conn->bits.ftp_use_lprt = data->set.ftp_use_lprt;

  /* This initing continues below, see the comment "Continue connectdata
   * initialization here" */

  /***********************************************************
   * We need to allocate memory to store the path in. We get the size of the
   * full URL to be sure, and we need to make it at least 256 bytes since
   * other parts of the code will rely on this fact
   ***********************************************************/
#define LEAST_PATH_ALLOC 256
  urllen=strlen(data->change.url);
  if(urllen < LEAST_PATH_ALLOC)
    urllen=LEAST_PATH_ALLOC;

  conn->pathbuffer=(char *)malloc(urllen);
  if(NULL == conn->pathbuffer)
    return CURLE_OUT_OF_MEMORY; /* really bad error */
  conn->path = conn->pathbuffer;

  conn->host.rawalloc=(char *)malloc(urllen);
  if(NULL == conn->host.rawalloc)
    return CURLE_OUT_OF_MEMORY;
  conn->host.name = conn->host.rawalloc;

  /*************************************************************
   * Parse the URL.
   *
   * We need to parse the url even when using the proxy, because we will need
   * the hostname and port in case we are trying to SSL connect through the
   * proxy -- and we don't know if we will need to use SSL until we parse the
   * url ...
   ************************************************************/
  if((2 == sscanf(data->change.url, "%15[^:]:%[^\n]",
                  conn->protostr,
                  conn->path)) && strequal(conn->protostr, "file")) {
    if(conn->path[0] == '/' && conn->path[1] == '/') {
      /* Allow omitted hostname (e.g. file:/<path>).  This is not strictly
       * speaking a valid file: URL by RFC 1738, but treating file:/<path> as
       * file://localhost/<path> is similar to how other schemes treat missing
       * hostnames.  See RFC 1808. */

      /* This cannot be done with strcpy() in a portable manner, since the
         memory areas overlap! */
      memmove(conn->path, conn->path + 2, strlen(conn->path + 2)+1);
    }
    /*
     * we deal with file://<host>/<path> differently since it supports no
     * hostname other than "localhost" and "127.0.0.1", which is unique among
     * the URL protocols specified in RFC 1738
     */
    if(conn->path[0] != '/') {
      /* the URL included a host name, we ignore host names in file:// URLs
         as the standards don't define what to do with them */
      char *ptr=strchr(conn->path, '/');
      if(ptr) {
        /* there was a slash present

           RFC1738 (section 3.1, page 5) says:

           The rest of the locator consists of data specific to the scheme,
           and is known as the "url-path". It supplies the details of how the
           specified resource can be accessed. Note that the "/" between the
           host (or port) and the url-path is NOT part of the url-path.

           As most agents use file://localhost/foo to get '/foo' although the
           slash preceeding foo is a separator and not a slash for the path,
           a URL as file://localhost//foo must be valid as well, to refer to
           the same file with an absolute path.
        */

        if(ptr[1] && ('/' == ptr[1]))
          /* if there was two slashes, we skip the first one as that is then
             used truly as a separator */
          ptr++;

        /* This cannot be made with strcpy, as the memory chunks overlap! */
        memmove(conn->path, ptr, strlen(ptr)+1);
      }
    }

    strcpy(conn->protostr, "file"); /* store protocol string lowercase */
  }
  else {
    /* Set default path */
    strcpy(conn->path, "/");

    if (2 > sscanf(data->change.url,
                   "%15[^\n:]://%[^\n/]%[^\n]",
                   conn->protostr,
                   conn->host.name, conn->path)) {

      /*
       * The URL was badly formatted, let's try the browser-style _without_
       * protocol specified like 'http://'.
       */
      if((1 > sscanf(data->change.url, "%[^\n/]%[^\n]",
                     conn->host.name, conn->path)) ) {
        /*
         * We couldn't even get this format.
         */
        failf(data, "<url> malformed");
        return CURLE_URL_MALFORMAT;
      }

      /*
       * Since there was no protocol part specified, we guess what protocol it
       * is based on the first letters of the server name.
       */

      /* Note: if you add a new protocol, please update the list in
       * lib/version.c too! */

      if(checkprefix("GOPHER", conn->host.name))
        strcpy(conn->protostr, "gopher");
#ifdef USE_SSLEAY
      else if(checkprefix("HTTPS", conn->host.name))
        strcpy(conn->protostr, "https");
      else if(checkprefix("FTPS", conn->host.name))
        strcpy(conn->protostr, "ftps");
#endif /* USE_SSLEAY */
      else if(checkprefix("FTP", conn->host.name))
        strcpy(conn->protostr, "ftp");
      else if(checkprefix("TELNET", conn->host.name))
        strcpy(conn->protostr, "telnet");
      else if (checkprefix("DICT", conn->host.name))
        strcpy(conn->protostr, "DICT");
      else if (checkprefix("LDAP", conn->host.name))
        strcpy(conn->protostr, "LDAP");
      else {
        strcpy(conn->protostr, "http");
      }

      conn->protocol |= PROT_MISSING; /* not given in URL */
    }
  }

  /* We search for '?' in the host name (but only on the right side of a
   * @-letter to allow ?-letters in username and password) to handle things
   * like http://example.com?param= (notice the missing '/').
   */
  at = strchr(conn->host.name, '@');
  if(at)
    tmp = strchr(at+1, '?');
  else
    tmp = strchr(conn->host.name, '?');

  if(tmp) {
    /* The right part of the ?-letter needs to be moved to prefix
       the current path buffer! */
    size_t len = strlen(tmp);
    /* move the existing path plus the zero byte */
    memmove(conn->path+len+1, conn->path, strlen(conn->path)+1);
    conn->path[0]='/'; /* prepend the missing slash */
    memcpy(conn->path+1, tmp, len); /* now copy the prefix part */
    *tmp=0; /* now cut off the hostname at the ? */
  }

  /* If the URL is malformatted (missing a '/' after hostname before path) we
   * insert a slash here. The only letter except '/' we accept to start a path
   * is '?'.
   */
  if(conn->path[0] == '?') {
    /* We need this function to deal with overlapping memory areas. We know
       that the memory area 'path' points to is 'urllen' bytes big and that
       is bigger than the path. Use +1 to move the zero byte too. */
    memmove(&conn->path[1], conn->path, strlen(conn->path)+1);
    conn->path[0] = '/';
  }

  /*
   * So if the URL was A://B/C,
   *   conn->protostr is A
   *   conn->host.name is B
   *   conn->path is /C
   */

  /*************************************************************
   * Take care of proxy authentication stuff
   *************************************************************/
  if(conn->bits.proxy_user_passwd) {
    char proxyuser[MAX_CURL_USER_LENGTH]="";
    char proxypasswd[MAX_CURL_PASSWORD_LENGTH]="";

    sscanf(data->set.proxyuserpwd,
           "%" MAX_CURL_USER_LENGTH_TXT "[^:]:"
           "%" MAX_CURL_PASSWORD_LENGTH_TXT "[^\n]",
           proxyuser, proxypasswd);

    conn->proxyuser = strdup(proxyuser);
    if(!conn->proxyuser)
      return CURLE_OUT_OF_MEMORY;

    conn->proxypasswd = strdup(proxypasswd);
    if(!conn->proxypasswd)
      return CURLE_OUT_OF_MEMORY;
  }

  /*************************************************************
   * Detect what (if any) proxy to use
   *************************************************************/
  if(!data->change.proxy) {
    /* If proxy was not specified, we check for default proxy environment
     * variables, to enable i.e Lynx compliance:
     *
     * http_proxy=http://some.server.dom:port/
     * https_proxy=http://some.server.dom:port/
     * ftp_proxy=http://some.server.dom:port/
     * gopher_proxy=http://some.server.dom:port/
     * no_proxy=domain1.dom,host.domain2.dom
     *   (a comma-separated list of hosts which should
     *   not be proxied, or an asterisk to override
     *   all proxy variables)
     * all_proxy=http://some.server.dom:port/
     *   (seems to exist for the CERN www lib. Probably
     *   the first to check for.)
     *
     * For compatibility, the all-uppercase versions of these variables are
     * checked if the lowercase versions don't exist.
     */
    char *no_proxy=NULL;
    char *no_proxy_tok_buf;
    char *proxy=NULL;
    char proxy_env[128];

    no_proxy=curl_getenv("no_proxy");
    if(!no_proxy)
      no_proxy=curl_getenv("NO_PROXY");

    if(!no_proxy || !strequal("*", no_proxy)) {
      /* NO_PROXY wasn't specified or it wasn't just an asterisk */
      char *nope;

      nope=no_proxy?strtok_r(no_proxy, ", ", &no_proxy_tok_buf):NULL;
      while(nope) {
        size_t namelen;
        char *endptr = strchr(conn->host.name, ':');
        if(endptr)
          namelen=endptr-conn->host.name;
        else
          namelen=strlen(conn->host.name);

        if(strlen(nope) <= namelen) {
          char *checkn=
            conn->host.name + namelen - strlen(nope);
          if(checkprefix(nope, checkn)) {
            /* no proxy for this host! */
            break;
          }
        }
        nope=strtok_r(NULL, ", ", &no_proxy_tok_buf);
      }
      if(!nope) {
        /* It was not listed as without proxy */
        char *protop = conn->protostr;
        char *envp = proxy_env;
        char *prox;

        /* Now, build <protocol>_proxy and check for such a one to use */
        while(*protop)
          *envp++ = tolower((int)*protop++);

        /* append _proxy */
        strcpy(envp, "_proxy");

        /* read the protocol proxy: */
        prox=curl_getenv(proxy_env);

        /*
         * We don't try the uppercase version of HTTP_PROXY because of
         * security reasons:
         *
         * When curl is used in a webserver application
         * environment (cgi or php), this environment variable can
         * be controlled by the web server user by setting the
         * http header 'Proxy:' to some value.
         *
         * This can cause 'internal' http/ftp requests to be
         * arbitrarily redirected by any external attacker.
         */
        if(!prox && !strequal("http_proxy", proxy_env)) {
          /* There was no lowercase variable, try the uppercase version: */
          for(envp = proxy_env; *envp; envp++)
            *envp = toupper((int)*envp);
          prox=curl_getenv(proxy_env);
        }

        if(prox && *prox) { /* don't count "" strings */
          proxy = prox; /* use this */
        }
        else {
          proxy = curl_getenv("all_proxy"); /* default proxy to use */
          if(!proxy)
            proxy=curl_getenv("ALL_PROXY");
        }

        if(proxy && *proxy) {
          /* we have a proxy here to set */
          char *ptr;
          char proxyuser[MAX_CURL_USER_LENGTH];
          char proxypasswd[MAX_CURL_PASSWORD_LENGTH];

          char *fineptr;

          /* skip the possible protocol piece */
          ptr=strstr(proxy, "://");
          if(ptr)
            ptr += 3;
          else
            ptr = proxy;

          fineptr = ptr;

          /* check for an @-letter */
          ptr = strchr(ptr, '@');
          if(ptr && (2 == sscanf(fineptr,
                                 "%" MAX_CURL_USER_LENGTH_TXT"[^:]:"
                                 "%" MAX_CURL_PASSWORD_LENGTH_TXT "[^@]",
                                 proxyuser, proxypasswd))) {
            CURLcode res = CURLE_OK;

            /* found user and password, rip them out */
            Curl_safefree(conn->proxyuser);
            conn->proxyuser = strdup(proxyuser);

            if(!conn->proxyuser)
              res = CURLE_OUT_OF_MEMORY;
            else {
              Curl_safefree(conn->proxypasswd);
              conn->proxypasswd = strdup(proxypasswd);

              if(!conn->proxypasswd)
                res = CURLE_OUT_OF_MEMORY;
            }

            if(CURLE_OK == res) {
              conn->bits.proxy_user_passwd = TRUE; /* enable it */
              ptr = strdup(ptr+1); /* the right side of the @-letter */

              if(ptr) {
                free(proxy); /* free the former proxy string */
                proxy = ptr; /* now use this instead */
              }
              else
                res = CURLE_OUT_OF_MEMORY;
            }

            if(res) {
              free(proxy); /* free the allocated proxy string */
              return res;
            }
          }

          data->change.proxy = proxy;
          data->change.proxy_alloc=TRUE; /* this needs to be freed later */
          conn->bits.httpproxy = TRUE;
        }
      } /* if (!nope) - it wasn't specified non-proxy */
    } /* NO_PROXY wasn't specified or '*' */
    if(no_proxy)
      free(no_proxy);
  } /* if not using proxy */

  /*************************************************************
   * No protocol part in URL was used, add it!
   *************************************************************/
  if(conn->protocol&PROT_MISSING) {
    /* We're guessing prefixes here and if we're told to use a proxy or if
       we're gonna follow a Location: later or... then we need the protocol
       part added so that we have a valid URL. */
    char *reurl;

    reurl = aprintf("%s://%s", conn->protostr, data->change.url);

    if(!reurl)
      return CURLE_OUT_OF_MEMORY;

    data->change.url = reurl;
    data->change.url_alloc = TRUE; /* free this later */
    conn->protocol &= ~PROT_MISSING; /* switch that one off again */
  }

#ifndef CURL_DISABLE_HTTP
  /************************************************************
   * RESUME on a HTTP page is a tricky business. First, let's just check that
   * 'range' isn't used, then set the range parameter and leave the resume as
   * it is to inform about this situation for later use. We will then
   * "attempt" to resume, and if we're talking to a HTTP/1.1 (or later)
   * server, we will get the document resumed. If we talk to a HTTP/1.0
   * server, we just fail since we can't rewind the file writing from within
   * this function.
   ***********************************************************/
  if(conn->resume_from) {
    if(!conn->bits.use_range) {
      /* if it already was in use, we just skip this */
      conn->range = aprintf("%" FORMAT_OFF_T "-", conn->resume_from);
      if(!conn->range)
        return CURLE_OUT_OF_MEMORY;
      conn->bits.rangestringalloc = TRUE; /* mark as allocated */
      conn->bits.use_range = 1; /* switch on range usage */
    }
  }
#endif
  /*************************************************************
   * Setup internals depending on protocol
   *************************************************************/

  if (strequal(conn->protostr, "HTTP")) {
#ifndef CURL_DISABLE_HTTP
    conn->port = (data->set.use_port && data->state.allow_port)?
      data->set.use_port:PORT_HTTP;
    conn->remote_port = PORT_HTTP;
    conn->protocol |= PROT_HTTP;
    conn->curl_do = Curl_http;
    conn->curl_do_more = NULL;
    conn->curl_done = Curl_http_done;
    conn->curl_connect = Curl_http_connect;
#else
    failf(data, LIBCURL_NAME
          " was built with HTTP disabled, http: not supported!");
    return CURLE_UNSUPPORTED_PROTOCOL;
#endif
  }
  else if (strequal(conn->protostr, "HTTPS")) {
#if defined(USE_SSLEAY) && !defined(CURL_DISABLE_HTTP)

    conn->port = (data->set.use_port && data->state.allow_port)?
      data->set.use_port:PORT_HTTPS;
    conn->remote_port = PORT_HTTPS;
    conn->protocol |= PROT_HTTP|PROT_HTTPS|PROT_SSL;

    conn->curl_do = Curl_http;
    conn->curl_do_more = NULL;
    conn->curl_done = Curl_http_done;
    conn->curl_connect = Curl_http_connect;

#else /* USE_SSLEAY */
    failf(data, LIBCURL_NAME
          " was built with SSL disabled, https: not supported!");
    return CURLE_UNSUPPORTED_PROTOCOL;
#endif /* !USE_SSLEAY */
  }
  else if (strequal(conn->protostr, "GOPHER")) {
#ifndef CURL_DISABLE_GOPHER
    conn->port = (data->set.use_port && data->state.allow_port)?
      data->set.use_port:PORT_GOPHER;
    conn->remote_port = PORT_GOPHER;
    /* Skip /<item-type>/ in path if present */
    if (isdigit((int)conn->path[1])) {
      conn->path = strchr(&conn->path[1], '/');
      if (conn->path == NULL)
        conn->path = conn->pathbuffer;
    }
    conn->protocol |= PROT_GOPHER;
    conn->curl_do = Curl_http;
    conn->curl_do_more = NULL;
    conn->curl_done = Curl_http_done;
#else
    failf(data, LIBCURL_NAME
          " was built with GOPHER disabled, gopher: not supported!");
#endif
  }
  else if(strequal(conn->protostr, "FTP") ||
          strequal(conn->protostr, "FTPS")) {

#ifndef CURL_DISABLE_FTP
    char *type;
    int port = PORT_FTP;

    if(strequal(conn->protostr, "FTPS")) {
#ifdef USE_SSLEAY
      conn->protocol |= PROT_FTPS|PROT_SSL;
      conn->ssl[SECONDARYSOCKET].use = TRUE; /* send data securely */
      port = PORT_FTPS;
#else
      failf(data, LIBCURL_NAME
            " was built with SSL disabled, ftps: not supported!");
      return CURLE_UNSUPPORTED_PROTOCOL;
#endif /* !USE_SSLEAY */
    }

    conn->port = (data->set.use_port && data->state.allow_port)?
      data->set.use_port:port;
    conn->remote_port = port;
    conn->protocol |= PROT_FTP;

    if(data->change.proxy &&
       *data->change.proxy &&
       !data->set.tunnel_thru_httpproxy) {
      /* Unless we have asked to tunnel ftp operations through the proxy, we
         switch and use HTTP operations only */
      if(conn->protocol & PROT_FTPS) {
        /* FTPS is a hacked protocol and does not work through your
           ordinary http proxy! */
        failf(data, "ftps does not work through http proxy!");
        return CURLE_UNSUPPORTED_PROTOCOL;
      }
#ifndef CURL_DISABLE_HTTP
      conn->curl_do = Curl_http;
      conn->curl_done = Curl_http_done;
#else
      failf(data, "FTP over http proxy requires HTTP support built-in!");
      return CURLE_UNSUPPORTED_PROTOCOL;
#endif
    }
    else {
      conn->curl_do = Curl_ftp;
      conn->curl_do_more = Curl_ftp_nextconnect;
      conn->curl_done = Curl_ftp_done;
      conn->curl_connect = Curl_ftp_connect;
      conn->curl_disconnect = Curl_ftp_disconnect;
    }

    conn->path++; /* don't include the initial slash */

    /* FTP URLs support an extension like ";type=<typecode>" that
     * we'll try to get now! */
    type=strstr(conn->path, ";type=");
    if(!type) {
      type=strstr(conn->host.rawalloc, ";type=");
    }
    if(type) {
      char command;
      *type=0;                     /* it was in the middle of the hostname */
      command = toupper((int)type[6]);
      switch(command) {
      case 'A': /* ASCII mode */
        data->set.ftp_ascii = 1;
        break;
      case 'D': /* directory mode */
        data->set.ftp_list_only = 1;
        break;
      case 'I': /* binary mode */
      default:
        /* switch off ASCII */
        data->set.ftp_ascii = 0;
        break;
      }
    }
#else /* CURL_DISABLE_FTP */
    failf(data, LIBCURL_NAME
          " was built with FTP disabled, ftp/ftps: not supported!");
    return CURLE_UNSUPPORTED_PROTOCOL;
#endif
  }
  else if(strequal(conn->protostr, "TELNET")) {
#ifndef CURL_DISABLE_TELNET
    /* telnet testing factory */
    conn->protocol |= PROT_TELNET;

    conn->port = (data->set.use_port && data->state.allow_port)?
      data->set.use_port: PORT_TELNET;
    conn->remote_port = PORT_TELNET;
    conn->curl_do = Curl_telnet;
    conn->curl_done = Curl_telnet_done;
#else
    failf(data, LIBCURL_NAME
          " was built with TELNET disabled!");
#endif
  }
  else if (strequal(conn->protostr, "DICT")) {
#ifndef CURL_DISABLE_DICT
    conn->protocol |= PROT_DICT;
    conn->port = (data->set.use_port && data->state.allow_port)?
      data->set.use_port:PORT_DICT;
    conn->remote_port = PORT_DICT;
    conn->curl_do = Curl_dict;
    conn->curl_done = NULL; /* no DICT-specific done */
#else
    failf(data, LIBCURL_NAME
          " was built with DICT disabled!");
#endif
  }
  else if (strequal(conn->protostr, "LDAP")) {
#ifndef CURL_DISABLE_LDAP
    conn->protocol |= PROT_LDAP;
    conn->port = (data->set.use_port && data->state.allow_port)?
      data->set.use_port:PORT_LDAP;
    conn->remote_port = PORT_LDAP;
    conn->curl_do = Curl_ldap;
    conn->curl_done = NULL; /* no LDAP-specific done */
#else
    failf(data, LIBCURL_NAME
          " was built with LDAP disabled!");
#endif
  }
  else if (strequal(conn->protostr, "FILE")) {
#ifndef CURL_DISABLE_FILE
    conn->protocol |= PROT_FILE;

    conn->curl_do = Curl_file;
    conn->curl_done = Curl_file_done;

    /* anyway, this is supposed to be the connect function so we better
       at least check that the file is present here! */
    result = Curl_file_connect(conn);

    /* Setup a "faked" transfer that'll do nothing */
    if(CURLE_OK == result) {
      conn->bits.tcpconnect = TRUE; /* we are "connected */
      result = Curl_Transfer(conn, -1, -1, FALSE, NULL, /* no download */
                             -1, NULL); /* no upload */
    }

    return result;
#else
    failf(data, LIBCURL_NAME
          " was built with FILE disabled!");
#endif
  }
  else {
    /* We fell through all checks and thus we don't support the specified
       protocol */
    failf(data, "Unsupported protocol: %s", conn->protostr);
    return CURLE_UNSUPPORTED_PROTOCOL;
  }

  if(data->change.proxy && *data->change.proxy) {
    /* If this is supposed to use a proxy, we need to figure out the proxy
       host name name, so that we can re-use an existing connection
       that may exist registered to the same proxy host. */

    char *prox_portno;
    char *endofprot;

    /* We need to make a duplicate of the proxy so that we can modify the
       string safely. */
    char *proxydup=strdup(data->change.proxy);

    /* We use 'proxyptr' to point to the proxy name from now on... */
    char *proxyptr=proxydup;

    if(NULL == proxydup) {
      failf(data, "memory shortage");
      return CURLE_OUT_OF_MEMORY;
    }

    /* Daniel Dec 10, 1998:
       We do the proxy host string parsing here. We want the host name and the
       port name. Accept a protocol:// prefix, even though it should just be
       ignored. */

    /* 1. skip the protocol part if present */
    endofprot=strstr(proxyptr, "://");
    if(endofprot) {
      proxyptr = endofprot+3;
    }

    /* allow user to specify proxy.server.com:1080 if desired */
    prox_portno = strchr (proxyptr, ':');
    if (prox_portno) {
      *prox_portno = 0x0; /* cut off number from host name */
      prox_portno ++;
      /* now set the local port number */
      conn->port = atoi(prox_portno);
    }
    else if(data->set.proxyport) {
      /* None given in the proxy string, then get the default one if it is
         given */
      conn->port = data->set.proxyport;
    }

    /* now, clone the cleaned proxy host name */
    conn->proxy.rawalloc = strdup(proxyptr);
    conn->proxy.name = conn->proxy.rawalloc;

    free(proxydup); /* free the duplicate pointer and not the modified */
    if(!conn->proxy.rawalloc)
      return CURLE_OUT_OF_MEMORY;
  }

  /*************************************************************
   * If the protcol is using SSL and HTTP proxy is used, we set
   * the tunnel_proxy bit.
   *************************************************************/
  if((conn->protocol&PROT_SSL) && conn->bits.httpproxy)
    conn->bits.tunnel_proxy = TRUE;

  /*************************************************************
   * Take care of user and password authentication stuff
   *************************************************************/

  /*
   * Inputs: data->set.userpwd   (CURLOPT_USERPWD)
   *         data->set.fpasswd   (CURLOPT_PASSWDFUNCTION)
   *         data->set.use_netrc (CURLOPT_NETRC)
   *         conn->host.name
   *         netrc file
   *         hard-coded defaults
   *
   * Outputs: (almost :- all currently undefined)
   *          conn->bits.user_passwd  - non-zero if non-default passwords exist
   *          conn->user              - non-zero length if defined
   *          conn->passwd            -   ditto
   *          conn->host.name          - remove user name and password
   */

  /* At this point, we're hoping all the other special cases have
   * been taken care of, so conn->host.name is at most
   *    [user[:password]]@]hostname
   *
   * We need somewhere to put the embedded details, so do that first.
   */

  user[0] =0;   /* to make everything well-defined */
  passwd[0]=0;

  if (conn->protocol & (PROT_FTP|PROT_HTTP)) {
    /* This is a FTP or HTTP URL, we will now try to extract the possible
     * user+password pair in a string like:
     * ftp://user:password@ftp.my.site:8021/README */
    char *ptr=strchr(conn->host.name, '@');
    char *userpass = conn->host.name;
    if(ptr != NULL) {
      /* there's a user+password given here, to the left of the @ */

      conn->host.name = ++ptr;

      /* So the hostname is sane.  Only bother interpreting the
       * results if we could care.  It could still be wasted
       * work because it might be overtaken by the programmatically
       * set user/passwd, but doing that first adds more cases here :-(
       */

      if (data->set.use_netrc != CURL_NETRC_REQUIRED) {
        /* We could use the one in the URL */

        conn->bits.user_passwd = 1; /* enable user+password */

        if(*userpass != ':') {
          /* the name is given, get user+password */
          sscanf(userpass, "%127[^:@]:%127[^@]",
                 user, passwd);
        }
        else
          /* no name given, get the password only */
          sscanf(userpass, ":%127[^@]", passwd);

        if(user[0]) {
          char *newname=curl_unescape(user, 0);
          if(!newname)
            return CURLE_OUT_OF_MEMORY;
          if(strlen(newname) < sizeof(user))
            strcpy(user, newname);

          /* if the new name is longer than accepted, then just use
             the unconverted name, it'll be wrong but what the heck */
          free(newname);
        }
        if (passwd[0]) {
          /* we have a password found in the URL, decode it! */
          char *newpasswd=curl_unescape(passwd, 0);
          if(!newpasswd)
            return CURLE_OUT_OF_MEMORY;
          if(strlen(newpasswd) < sizeof(passwd))
            strcpy(passwd, newpasswd);

          free(newpasswd);
        }
      }
    }
  }