Module: wine Branch: master Commit: 0ecec6d7c01050ac5b97a8bab7b9d4177f934b98 URL: http://source.winehq.org/git/wine.git/?a=commit;h=0ecec6d7c01050ac5b97a8bab7...
Author: Thomas Mullaly thomas.mullaly@gmail.com Date: Sun Jul 18 10:27:21 2010 -0400
urlmon: Implemented a path parser for hierarchical URIs.
---
dlls/urlmon/tests/uri.c | 32 ++++++++++++++++++- dlls/urlmon/uri.c | 79 ++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 109 insertions(+), 2 deletions(-)
diff --git a/dlls/urlmon/tests/uri.c b/dlls/urlmon/tests/uri.c index b87b9a3..7cd11ff 100644 --- a/dlls/urlmon/tests/uri.c +++ b/dlls/urlmon/tests/uri.c @@ -2217,6 +2217,34 @@ static const uri_properties uri_tests[] = { {URL_SCHEME_HTTP,S_OK,FALSE}, {URLZONE_INVALID,E_NOTIMPL,FALSE} } + }, + { "zip://www.google.com\test", Uri_CREATE_NO_CANONICALIZE, S_OK, FALSE, + Uri_HAS_ABSOLUTE_URI|Uri_HAS_AUTHORITY|Uri_HAS_DISPLAY_URI|Uri_HAS_DOMAIN| + Uri_HAS_HOST|Uri_HAS_RAW_URI|Uri_HAS_SCHEME_NAME|Uri_HAS_HOST_TYPE|Uri_HAS_SCHEME, + TRUE, + { + {"zip://www.google.com\test",S_OK,TRUE}, + {"www.google.com\test",S_OK,FALSE}, + {"zip://www.google.com\test",S_OK,TRUE}, + {"google.com\test",S_OK,FALSE}, + {"",S_FALSE,TRUE}, + {"",S_FALSE,TRUE}, + {"www.google.com\test",S_OK,FALSE}, + {"",S_FALSE,FALSE}, + {"",S_FALSE,TRUE}, + {"",S_FALSE,TRUE}, + {"",S_FALSE,TRUE}, + {"zip://www.google.com\test",S_OK,FALSE}, + {"zip",S_OK,FALSE}, + {"",S_FALSE,FALSE}, + {"",S_FALSE,FALSE} + }, + { + {Uri_HOST_DNS,S_OK,FALSE}, + {0,S_FALSE,FALSE}, + {URL_SCHEME_UNKNOWN,S_OK,FALSE}, + {URLZONE_INVALID,E_NOTIMPL,FALSE} + } } };
@@ -2264,7 +2292,9 @@ static const invalid_uri invalid_uri_tests[] = { /* Invalid port with IPv4 address. */ {"http://www.winehq.org:1abcd%22,0,FALSE%7D, /* Invalid port with IPv6 address. */ - {"http://%5B::ffff%5D:32xy%22,0,FALSE%7D + {"http://%5B::ffff%5D:32xy%22,0,FALSE%7D, + /* Not allowed to have backslashes with NO_CANONICALIZE. */ + {"gopher://www.google.com\test",Uri_CREATE_NO_CANONICALIZE,FALSE} };
typedef struct _uri_equality { diff --git a/dlls/urlmon/uri.c b/dlls/urlmon/uri.c index 20d8bb1..9ab1dda 100644 --- a/dlls/urlmon/uri.c +++ b/dlls/urlmon/uri.c @@ -113,6 +113,9 @@ typedef struct { const WCHAR *port; DWORD port_len; USHORT port_value; + + const WCHAR *path; + DWORD path_len; } parse_data;
static const CHAR hexDigits[] = "0123456789ABCDEF"; @@ -257,6 +260,10 @@ static inline BOOL is_hexdigit(WCHAR val) { (val >= '0' && val <= '9')); }
+static inline BOOL is_path_delim(WCHAR val) { + return (!val || val == '#' || val == '?'); +} + /* Computes the size of the given IPv6 address. * Each h16 component is 16bits, if there is an IPv4 address, it's * 32bits. If there's an elision it can be 16bits to 128bits, depending @@ -1482,6 +1489,70 @@ static BOOL parse_authority(const WCHAR **ptr, parse_data *data, DWORD flags) { return TRUE; }
+/* Attempts to parse the path information of a hierarchical URI. */ +static BOOL parse_path_hierarchical(const WCHAR **ptr, parse_data *data, DWORD flags) { + const WCHAR *start = *ptr; + static const WCHAR slash[] = {'/',0}; + + if(is_path_delim(**ptr)) { + if(data->scheme_type == URL_SCHEME_WILDCARD) { + /* Wildcard schemes don't get a '/' attached if their path is + * empty. + */ + data->path = NULL; + data->path_len = 0; + } else if(!(flags & Uri_CREATE_NO_CANONICALIZE)) { + /* If the path component is empty, then a '/' is added. */ + data->path = slash; + data->path_len = 1; + } + } else { + while(!is_path_delim(**ptr)) { + if(**ptr == '%' && data->scheme_type != URL_SCHEME_UNKNOWN && + data->scheme_type != URL_SCHEME_FILE) { + if(!check_pct_encoded(ptr)) { + *ptr = start; + return FALSE; + } else + continue; + } else if(**ptr == '\') { + /* Not allowed to have a backslash if NO_CANONICALIZE is set + * and the scheme is known type (but not a file scheme). + */ + if(flags & Uri_CREATE_NO_CANONICALIZE) { + if(data->scheme_type != URL_SCHEME_FILE && + data->scheme_type != URL_SCHEME_UNKNOWN) { + *ptr = start; + return FALSE; + } + } + } + + ++(*ptr); + } + + /* The only time a URI doesn't have a path is when + * the NO_CANONICALIZE flag is set and the raw URI + * didn't contain one. + */ + if(*ptr == start) { + data->path = NULL; + data->path_len = 0; + } else { + data->path = start; + data->path_len = *ptr - start; + } + } + + if(data->path) + TRACE("(%p %p %x): Parsed path %s len=%d\n", ptr, data, flags, + debugstr_wn(data->path, data->path_len), data->path_len); + else + TRACE("(%p %p %x): The URI contained no path\n", ptr, data, flags); + + return TRUE; +} + /* Determines how the URI should be parsed after the scheme information. * * If the scheme is followed, by "//" then, it is treated as an hierarchical URI @@ -1525,11 +1596,15 @@ static BOOL parse_hierpart(const WCHAR **ptr, parse_data *data, DWORD flags) { TRACE("(%p %p %x): Treating URI as an hierarchical URI.\n", ptr, data, flags); data->is_opaque = FALSE;
+ if(data->scheme_type == URL_SCHEME_FILE) + /* Skip past the "//" after the scheme (if any). */ + check_hierarchical(ptr); + /* TODO: Handle hierarchical URI's, parse authority then parse the path. */ if(!parse_authority(ptr, data, flags)) return FALSE;
- return TRUE; + return parse_path_hierarchical(ptr, data, flags); } }
@@ -1564,6 +1639,8 @@ static BOOL parse_uri(parse_data *data, DWORD flags) { if(!parse_hierpart(pptr, data, flags)) return FALSE;
+ /* TODO: Parse query and fragment (if the URI has one). */ + TRACE("(%p %x): FINISHED PARSING URI.\n", data, flags); return TRUE; }