diff --git a/src/ChangeLog b/src/ChangeLog index 74b58e4e..dbf13b80 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -40,8 +40,11 @@ translated by http_loop to RETROK. * url.c (are_urls_equal): Don't call getchar_from_escaped_string if u2 is shorter than u1. + (getchar_from_escaped_string): Don't decode reserved characters. + Handle illegally appearing '%'s as literal '%'s. Ensure hex + digits before attempting to decode. (test_are_urls_equal): Added tests to handle u2 shorter than u1, - and %2f not treated the same as / (latter currently fails). + and %2f not treated the same as /. * spider.c (in_url_list_p): Don't call are_urls_equal if one of them is NULL. diff --git a/src/url.c b/src/url.c index 683a7745..d721501a 100644 --- a/src/url.c +++ b/src/url.c @@ -1939,10 +1939,7 @@ getchar_from_escaped_string (const char *str, char *c) if (p[0] == '%') { - if (p[1] == 0) - return 0; /* error: invalid string */ - - if (p[1] == '%') + if (!ISXDIGIT(p[1]) || !ISXDIGIT(p[2])) { *c = '%'; return 1; @@ -1953,8 +1950,13 @@ getchar_from_escaped_string (const char *str, char *c) return 0; /* error: invalid string */ *c = X2DIGITS_TO_NUM (p[1], p[2]); - - return 3; + if (URL_RESERVED_CHAR(*c)) + { + *c = '%'; + return 1; + } + else + return 3; } } else