From 9317eced98408c7fefa6dd5f1559050e1ec8a3b7 Mon Sep 17 00:00:00 2001
From: Till Maas <opensource@till.name>
Date: Sat, 15 Mar 2014 22:42:50 +0100
Subject: [PATCH] URL parser: IPv6 zone identifiers are now supported

---
 docs/KNOWN_BUGS | 11 +-------
 docs/MANUAL     |  6 ++---
 lib/url.c       | 69 ++++++++++++++++++++++++++++++++++++++++---------
 3 files changed, 61 insertions(+), 25 deletions(-)

diff --git a/docs/KNOWN_BUGS b/docs/KNOWN_BUGS
index ad997a0c6c..c8ad032a3f 100644
--- a/docs/KNOWN_BUGS
+++ b/docs/KNOWN_BUGS
@@ -180,16 +180,7 @@ may have been fixed since this was written!
   --cflags suffers from the same effects with CFLAGS/CPPFLAGS.
 
 30. You need to use -g to the command line tool in order to use RFC2732-style
-  IPv6 numerical addresses in URLs.
-
-29. IPv6 URLs with zone ID is not nicely supported.
-  http://www.ietf.org/internet-drafts/draft-fenner-literal-zone-02.txt (expired)
-  specifies the use of a plus sign instead of a percent when specifying zone
-  IDs in URLs to get around the problem of percent signs being
-  special. According to the reporter, Firefox deals with the URL _with_ a
-  percent letter (which seems like a blatant URL spec violation).
-  libcurl supports zone IDs where the percent sign is URL-escaped (i.e. %25):
-  http://curl.haxx.se/bug/view.cgi?id=555
+  or RFC6874-style IPv6 numerical addresses in URLs.
 
 26. NTLM authentication using SSPI (on Windows) when (lib)curl is running in
   "system context" will make it use wrong(?) user name - at least when compared
diff --git a/docs/MANUAL b/docs/MANUAL
index 4ad2e135e3..da8f6021fd 100644
--- a/docs/MANUAL
+++ b/docs/MANUAL
@@ -956,9 +956,9 @@ IPv6
   When this style is used, the -g option must be given to stop curl from
   interpreting the square brackets as special globbing characters.  Link local
   and site local addresses including a scope identifier, such as fe80::1234%1,
-  may also be used, but the scope portion must be numeric and the percent
-  character must be URL escaped. The previous example in an SFTP URL might
-  look like:
+  may also be used, but the scope portion must be numeric or match an existing
+  network interface on Linux and the percent character must be URL escaped. The
+  previous example in an SFTP URL might look like:
 
     sftp://[fe80::1234%251]/
 
diff --git a/lib/url.c b/lib/url.c
index 0e420c7a30..40751cc568 100644
--- a/lib/url.c
+++ b/lib/url.c
@@ -3951,23 +3951,59 @@ static CURLcode parseurlandfillconn(struct SessionHandle *data,
   if(result != CURLE_OK)
     return result;
 
-  if(conn->host.name[0] == '[') {
+  if(conn->host.name[0] == '[' && !data->state.this_is_a_follow) {
     /* This looks like an IPv6 address literal.  See if there is an address
-       scope.  */
-    char *percent = strstr (conn->host.name, "%25");
+       scope if there is no location header */
+    char *percent = strchr(conn->host.name, '%');
     if(percent) {
+      unsigned int identifier_offset = 3;
       char *endp;
-      unsigned long scope = strtoul (percent + 3, &endp, 10);
+      unsigned long scope;
+      if(strncmp("%25", percent, 3) != 0) {
+        infof(data,
+              "Please URL encode %% as %%25, see RFC 6874.\n");
+        identifier_offset = 1;
+      }
+      scope = strtoul(percent + identifier_offset, &endp, 10);
       if(*endp == ']') {
         /* The address scope was well formed.  Knock it out of the
            hostname. */
         memmove(percent, endp, strlen(endp)+1);
-        if(!data->state.this_is_a_follow)
-          /* Don't honour a scope given in a Location: header */
-          conn->scope = (unsigned int)scope;
+        conn->scope = (unsigned int)scope;
+      }
+      else {
+        /* Zone identifier is not numeric */
+#ifdef HAVE_NET_IF_H
+        char ifname[IFNAMSIZ + 2];
+        char *square_bracket;
+        unsigned int scopeidx = 0;
+        strncpy(ifname, percent + identifier_offset, IFNAMSIZ + 2);
+        /* Ensure nullbyte termination */
+        ifname[IFNAMSIZ + 1] = '\0';
+        square_bracket = strchr(ifname, ']');
+        if(square_bracket) {
+          /* Remove ']' */
+          *square_bracket = '\0';
+          scopeidx = if_nametoindex(ifname);
+          if(scopeidx == 0) {
+            infof(data, "Invalid network interface: %s; %s\n", ifname,
+                  strerror(errno));
+          }
+        }
+        if(scopeidx > 0) {
+          /* Remove zone identifier from hostname */
+          memmove(percent,
+                  percent + identifier_offset + strlen(ifname),
+                  identifier_offset + strlen(ifname));
+          conn->scope = scopeidx;
+        }
+        else {
+#endif /* HAVE_NET_IF_H */
+          infof(data, "Invalid IPv6 address format\n");
+#ifdef HAVE_NET_IF_H
+        }
+#endif /* HAVE_NET_IF_H */
       }
-      else
-        infof(data, "Invalid IPv6 address format\n");
     }
   }
 
@@ -4350,12 +4386,21 @@ static CURLcode parse_proxy(struct SessionHandle *data,
   /* start scanning for port number at this point */
   portptr = proxyptr;
 
-  /* detect and extract RFC2732-style IPv6-addresses */
+  /* detect and extract RFC6874-style IPv6-addresses */
   if(*proxyptr == '[') {
     char *ptr = ++proxyptr; /* advance beyond the initial bracket */
-    while(*ptr && (ISXDIGIT(*ptr) || (*ptr == ':') || (*ptr == '%') ||
-                   (*ptr == '.')))
+    while(*ptr && (ISXDIGIT(*ptr) || (*ptr == ':') || (*ptr == '.')))
+      ptr++;
+    if(*ptr == '%') {
+      /* There might be a zone identifier */
+      if(strncmp("%25", ptr, 3))
+        infof(data, "Please URL encode %% as %%25, see RFC 6874.\n");
       ptr++;
+      /* Allow unresered characters as defined in RFC 3986 */
+      while(*ptr && (ISALPHA(*ptr) || ISXDIGIT(*ptr) || (*ptr == '-') ||
+                     (*ptr == '.') || (*ptr == '_') || (*ptr == '~')))
+        ptr++;
+    }
     if(*ptr == ']')
       /* yeps, it ended nicely with a bracket as well */
       *ptr++ = 0;
-- 
GitLab