And add a test for an address that was not canonicalized correctly in the previous implementation.
Signed-off-by: Alex Henrie alexhenrie24@gmail.com --- dlls/urlmon/tests/uri.c | 25 +++ dlls/urlmon/uri.c | 439 +++------------------------------------- 2 files changed, 48 insertions(+), 416 deletions(-)
diff --git a/dlls/urlmon/tests/uri.c b/dlls/urlmon/tests/uri.c index b6d2182a270..b161d1f3eec 100644 --- a/dlls/urlmon/tests/uri.c +++ b/dlls/urlmon/tests/uri.c @@ -1725,6 +1725,31 @@ static const uri_properties uri_tests[] = { {URLZONE_INVALID,E_NOTIMPL,FALSE} } }, + { "http://%5B::5efe:1.2.3.4]", 0, S_OK, FALSE, + { + {"http://%5B::5efe:1.2.3.4%5D/%22,S_OK,FALSE%7D, + {"[::5efe:1.2.3.4]",S_OK,FALSE}, + {"http://%5B::5efe:1.2.3.4%5D/%22,S_OK,FALSE%7D, + {"",S_FALSE,FALSE}, + {"",S_FALSE,FALSE}, + {"",S_FALSE,FALSE}, + {"::5efe:1.2.3.4",S_OK,FALSE}, + {"",S_FALSE,FALSE}, + {"/",S_OK,FALSE}, + {"/",S_OK,FALSE}, + {"",S_FALSE,FALSE}, + {"http://%5B::5efe:1.2.3.4%5D%22,S_OK,FALSE%7D, + {"http",S_OK,FALSE}, + {"",S_FALSE,FALSE}, + {"",S_FALSE,FALSE}, + }, + { + {Uri_HOST_IPV6,S_OK,FALSE}, + {80,S_OK,FALSE}, + {URL_SCHEME_HTTP,S_OK,FALSE}, + {URLZONE_INVALID,E_NOTIMPL,FALSE} + } + }, /* Windows doesn't do anything to IPv6's in unknown schemes. */ { "zip://[0001:0:000:0004:0005:0006:001.002.003.000]", 0, S_OK, FALSE, { diff --git a/dlls/urlmon/uri.c b/dlls/urlmon/uri.c index b16ad07d6a8..953e7ff070c 100644 --- a/dlls/urlmon/uri.c +++ b/dlls/urlmon/uri.c @@ -27,6 +27,10 @@ #include "shlwapi.h"
#include "strsafe.h" +#include "winternl.h" +#include "inaddr.h" +#include "in6addr.h" +#include "ip2string.h"
#define URI_DISPLAY_NO_ABSOLUTE_URI 0x1 #define URI_DISPLAY_NO_DEFAULT_PORT_AUTH 0x2 @@ -128,27 +132,6 @@ typedef struct { DWORD username_len; } UriBuilder;
-typedef struct { - const WCHAR *str; - DWORD len; -} h16; - -typedef struct { - /* IPv6 addresses can hold up to 8 h16 components. */ - h16 components[8]; - DWORD h16_count; - - /* An IPv6 can have 1 elision ("::"). */ - const WCHAR *elision; - - /* An IPv6 can contain 1 IPv4 address as the last 32bits of the address. */ - const WCHAR *ipv4; - DWORD ipv4_len; - - INT components_size; - INT elision_size; -} ipv6_address; - typedef struct { BSTR uri;
@@ -173,7 +156,7 @@ typedef struct { DWORD host_len; Uri_HOST_TYPE host_type;
- ipv6_address ipv6_address; + IN6_ADDR ipv6_address;
BOOL has_port; const WCHAR *port; @@ -434,31 +417,6 @@ static inline BOOL is_hierarchical_uri(const WCHAR **ptr, const parse_data *data return FALSE; }
-/* Computes the size of the given IPv6 address. - * Each h16 component is 16 bits. If there is an IPv4 address, it's - * 32 bits. If there's an elision it can be 16 to 128 bits, depending - * on the number of other components. - * - * Modeled after google-url's CheckIPv6ComponentsSize function - */ -static void compute_ipv6_comps_size(ipv6_address *address) { - address->components_size = address->h16_count * 2; - - if(address->ipv4) - /* IPv4 address is 4 bytes. */ - address->components_size += 4; - - if(address->elision) { - /* An elision can be anywhere from 2 bytes up to 16 bytes. - * Its size depends on the size of the h16 and IPv4 components. - */ - address->elision_size = 16 - address->components_size; - if(address->elision_size < 2) - address->elision_size = 2; - } else - address->elision_size = 0; -} - /* Taken from dlls/jscript/lex.c */ static int hex_to_int(WCHAR val) { if(val >= '0' && val <= '9') @@ -693,72 +651,6 @@ static INT find_file_extension(const WCHAR *path, DWORD path_len) { return -1; }
-/* Computes the location where the elision should occur in the IPv6 - * address using the numerical values of each component stored in - * 'values'. If the address shouldn't contain an elision then 'index' - * is assigned -1 as its value. Otherwise 'index' will contain the - * starting index (into values) where the elision should be, and 'count' - * will contain the number of cells the elision covers. - * - * NOTES: - * Windows will expand an elision if the elision only represents one h16 - * component of the address. - * - * Ex: [1::2:3:4:5:6:7] -> [1:0:2:3:4:5:6:7] - * - * If the IPv6 address contains an IPv4 address, the IPv4 address is also - * considered for being included as part of an elision if all its components - * are zeros. - * - * Ex: [1:2:3:4:5:6:0.0.0.0] -> [1:2:3:4:5:6::] - */ -static void compute_elision_location(const ipv6_address *address, const USHORT values[8], - INT *index, DWORD *count) { - DWORD i, max_len, cur_len; - INT max_index, cur_index; - - max_len = cur_len = 0; - max_index = cur_index = -1; - for(i = 0; i < 8; ++i) { - BOOL check_ipv4 = (address->ipv4 && i == 6); - BOOL is_end = (check_ipv4 || i == 7); - - if(check_ipv4) { - /* Check if the IPv4 address contains only zeros. */ - if(values[i] == 0 && values[i+1] == 0) { - if(cur_index == -1) - cur_index = i; - - cur_len += 2; - ++i; - } - } else if(values[i] == 0) { - if(cur_index == -1) - cur_index = i; - - ++cur_len; - } - - if(is_end || values[i] != 0) { - /* We only consider it for an elision if it's - * more than 1 component long. - */ - if(cur_len > 1 && cur_len > max_len) { - /* Found the new elision location. */ - max_len = cur_len; - max_index = cur_index; - } - - /* Reset the current range for the next range of zeros. */ - cur_index = -1; - cur_len = 0; - } - } - - *index = max_index; - *count = max_len; -} - /* Removes all the leading and trailing white spaces or * control characters from the URI and removes all control * characters inside of the URI string. @@ -798,30 +690,6 @@ static BSTR pre_process_uri(LPCWSTR uri) { return ret; }
-/* Converts the specified IPv4 address into an uint value. - * - * This function assumes that the IPv4 address has already been validated. - */ -static UINT ipv4toui(const WCHAR *ip, DWORD len) { - UINT ret = 0; - DWORD comp_value = 0; - const WCHAR *ptr; - - for(ptr = ip; ptr < ip+len; ++ptr) { - if(*ptr == '.') { - ret <<= 8; - ret += comp_value; - comp_value = 0; - } else - comp_value = comp_value*10 + (*ptr-'0'); - } - - ret <<= 8; - ret += comp_value; - - return ret; -} - /* Converts an IPv4 address in numerical form into its fully qualified * string form. This function returns the number of characters written * to 'dest'. If 'dest' is NULL this function will return the number of @@ -863,70 +731,6 @@ static DWORD ui2str(WCHAR *dest, UINT value) { return ret; }
-/* Converts a h16 component (from an IPv6 address) into its - * numerical value. - * - * This function assumes that the h16 component has already been validated. - */ -static USHORT h16tous(h16 component) { - DWORD i; - USHORT ret = 0; - - for(i = 0; i < component.len; ++i) { - ret <<= 4; - ret += hex_to_int(component.str[i]); - } - - return ret; -} - -/* Converts an IPv6 address into its 128 bits (16 bytes) numerical value. - * - * This function assumes that the ipv6_address has already been validated. - */ -static BOOL ipv6_to_number(const ipv6_address *address, USHORT number[8]) { - DWORD i, cur_component = 0; - BOOL already_passed_elision = FALSE; - - for(i = 0; i < address->h16_count; ++i) { - if(address->elision) { - if(address->components[i].str > address->elision && !already_passed_elision) { - /* Means we just passed the elision and need to add its values to - * 'number' before we do anything else. - */ - INT j; - for(j = 0; j < address->elision_size; j+=2) - number[cur_component++] = 0; - - already_passed_elision = TRUE; - } - } - - number[cur_component++] = h16tous(address->components[i]); - } - - /* Case when the elision appears after the h16 components. */ - if(!already_passed_elision && address->elision) { - INT j; - for(j = 0; j < address->elision_size; j+=2) - number[cur_component++] = 0; - } - - if(address->ipv4) { - UINT value = ipv4toui(address->ipv4, address->ipv4_len); - - if(cur_component != 6) { - ERR("(%p %p): Failed sanity check with %d\n", address, number, cur_component); - return FALSE; - } - - number[cur_component++] = (value >> 16) & 0xffff; - number[cur_component] = value & 0xffff; - } - - return TRUE; -} - /* Checks if the characters pointed to by 'ptr' are * a percent encoded data octet. * @@ -1566,141 +1370,17 @@ static BOOL parse_reg_name(const WCHAR **ptr, parse_data *data, DWORD extras) { * * h16 = 1*4HEXDIG * ; 16 bits of address represented in hexadecimal. - * - * Modeled after google-url's 'DoParseIPv6' function. */ static BOOL parse_ipv6address(const WCHAR **ptr, parse_data *data) { - const WCHAR *start, *cur_start; - ipv6_address ip; - - start = cur_start = *ptr; - memset(&ip, 0, sizeof(ipv6_address)); - - for(;; ++(*ptr)) { - /* Check if we're on the last character of the host. */ - BOOL is_end = (is_auth_delim(**ptr, data->scheme_type != URL_SCHEME_UNKNOWN) - || **ptr == ']'); - - BOOL is_split = (**ptr == ':'); - BOOL is_elision = (is_split && !is_end && *(*ptr+1) == ':'); - - /* Check if we're at the end of a component, or - * if we're at the end of the IPv6 address. - */ - if(is_split || is_end) { - DWORD cur_len = 0; - - cur_len = *ptr - cur_start; - - /* h16 can't have a length > 4. */ - if(cur_len > 4) { - *ptr = start; - - TRACE("(%p %p): h16 component to long.\n", ptr, data); - return FALSE; - } - - if(cur_len == 0) { - /* An h16 component can't have the length of 0 unless - * the elision is at the beginning of the address, or - * at the end of the address. - */ - if(!((*ptr == start && is_elision) || - (is_end && (*ptr-2) == ip.elision))) { - *ptr = start; - TRACE("(%p %p): IPv6 component cannot have a length of 0.\n", ptr, data); - return FALSE; - } - } - - if(cur_len > 0) { - /* An IPv6 address can have no more than 8 h16 components. */ - if(ip.h16_count >= 8) { - *ptr = start; - TRACE("(%p %p): Not a IPv6 address, too many h16 components.\n", ptr, data); - return FALSE; - } - - ip.components[ip.h16_count].str = cur_start; - ip.components[ip.h16_count].len = cur_len; - - TRACE("(%p %p): Found h16 component %s, len=%d, h16_count=%d\n", - ptr, data, debugstr_wn(cur_start, cur_len), cur_len, - ip.h16_count); - ++ip.h16_count; - } - } - - if(is_end) - break; + const WCHAR *terminator;
- if(is_elision) { - /* A IPv6 address can only have 1 elision ('::'). */ - if(ip.elision) { - *ptr = start; - - TRACE("(%p %p): IPv6 address cannot have 2 elisions.\n", ptr, data); - return FALSE; - } - - ip.elision = *ptr; - ++(*ptr); - } - - if(is_split) - cur_start = *ptr+1; - else { - if(!check_ipv4address(ptr, TRUE)) { - if(!is_hexdigit(**ptr)) { - /* Not a valid character for an IPv6 address. */ - *ptr = start; - return FALSE; - } - } else { - /* Found an IPv4 address. */ - ip.ipv4 = cur_start; - ip.ipv4_len = *ptr - cur_start; - - TRACE("(%p %p): Found an attached IPv4 address %s len=%d.\n", - ptr, data, debugstr_wn(ip.ipv4, ip.ipv4_len), ip.ipv4_len); - - /* IPv4 addresses can only appear at the end of a IPv6. */ - break; - } - } - } - - compute_ipv6_comps_size(&ip); - - /* Make sure the IPv6 address adds up to 16 bytes. */ - if(ip.components_size + ip.elision_size != 16) { - *ptr = start; - TRACE("(%p %p): Invalid IPv6 address, did not add up to 16 bytes.\n", ptr, data); + if(RtlIpv6StringToAddressW(*ptr, &terminator, &data->ipv6_address) != 0) + return FALSE; + if(*terminator != ']' && !is_auth_delim(*terminator, data->scheme_type != URL_SCHEME_UNKNOWN)) return FALSE; - } - - if(ip.elision_size == 2) { - /* For some reason on Windows if an elision that represents - * only one h16 component is encountered at the very begin or - * end of an IPv6 address, Windows does not consider it a - * valid IPv6 address. - * - * Ex: [::2:3:4:5:6:7] is not valid, even though the sum - * of all the components == 128bits. - */ - if(ip.elision < ip.components[0].str || - ip.elision > ip.components[ip.h16_count-1].str) { - *ptr = start; - TRACE("(%p %p): Invalid IPv6 address. Detected elision of 2 bytes at the beginning or end of the address.\n", - ptr, data); - return FALSE; - } - }
+ *ptr = terminator; data->host_type = Uri_HOST_IPV6; - data->ipv6_address = ip; - - TRACE("(%p %p): Found valid IPv6 literal %s len=%d\n", ptr, data, debugstr_wn(start, *ptr-start), (int)(*ptr-start)); return TRUE; }
@@ -2555,96 +2235,23 @@ static BOOL canonicalize_ipv6address(const parse_data *data, Uri *uri, memcpy(uri->canon_uri+uri->canon_len, data->host, data->host_len*sizeof(WCHAR)); uri->canon_len += data->host_len; } else { - USHORT values[8]; - INT elision_start; - DWORD i, elision_len; + WCHAR buffer[46]; + ULONG size = ARRAY_SIZE(buffer);
- if(!ipv6_to_number(&(data->ipv6_address), values)) { - TRACE("(%p %p %x %d): Failed to compute numerical value for IPv6 address.\n", - data, uri, flags, computeOnly); - return FALSE; + if(computeOnly) + { + RtlIpv6AddressToStringExW(&data->ipv6_address, 0, 0, buffer, &size); + uri->canon_len += size + 1; } - - if(!computeOnly) + else + { uri->canon_uri[uri->canon_len] = '['; - ++uri->canon_len; - - /* Find where the elision should occur (if any). */ - compute_elision_location(&(data->ipv6_address), values, &elision_start, &elision_len); - - TRACE("%p %p %x %d): Elision starts at %d, len=%u\n", data, uri, flags, - computeOnly, elision_start, elision_len); - - for(i = 0; i < 8; ++i) { - BOOL in_elision = (elision_start > -1 && i >= elision_start && - i < elision_start+elision_len); - BOOL do_ipv4 = (i == 6 && data->ipv6_address.ipv4 && !in_elision && - data->ipv6_address.h16_count == 0); - - if(i == elision_start) { - if(!computeOnly) { - uri->canon_uri[uri->canon_len] = ':'; - uri->canon_uri[uri->canon_len+1] = ':'; - } - uri->canon_len += 2; - } - - /* We can ignore the current component if we're in the elision. */ - if(in_elision) - continue; - - /* We only add a ':' if we're not at i == 0, or when we're at - * the very end of elision range since the ':' colon was handled - * earlier. Otherwise we would end up with ":::" after elision. - */ - if(i != 0 && !(elision_start > -1 && i == elision_start+elision_len)) { - if(!computeOnly) - uri->canon_uri[uri->canon_len] = ':'; - ++uri->canon_len; - } - - if(do_ipv4) { - UINT val; - DWORD len; - - /* Combine the two parts of the IPv4 address values. */ - val = values[i]; - val <<= 16; - val += values[i+1]; - - if(!computeOnly) - len = ui2ipv4(uri->canon_uri+uri->canon_len, val); - else - len = ui2ipv4(NULL, val); - - uri->canon_len += len; - ++i; - } else { - /* Write a regular h16 component to the URI. */ - - /* Short circuit for the trivial case. */ - if(values[i] == 0) { - if(!computeOnly) - uri->canon_uri[uri->canon_len] = '0'; - ++uri->canon_len; - } else { - static const WCHAR formatW[] = {'%','x',0}; - - if(!computeOnly) - uri->canon_len += swprintf(uri->canon_uri+uri->canon_len, 5, - formatW, values[i]); - else { - WCHAR tmp[5]; - uri->canon_len += swprintf(tmp, ARRAY_SIZE(tmp), formatW, values[i]); - } - } - } - } - - /* Add the closing ']'. */ - if(!computeOnly) + ++uri->canon_len; + RtlIpv6AddressToStringExW(&data->ipv6_address, 0, 0, uri->canon_uri + uri->canon_len, &size); + uri->canon_len += size - 1; uri->canon_uri[uri->canon_len] = ']'; - ++uri->canon_len; + ++uri->canon_len; + } }
uri->host_len = uri->canon_len - uri->host_start;