1
0
mirror of https://github.com/moparisthebest/wget synced 2024-07-03 16:38:41 -04:00

iri: Fix parsing of some URLs contained in HTML documents

This commit is contained in:
Giuseppe Scrivano 2013-10-10 23:13:13 +02:00
parent c289349684
commit 1fd32c995c
3 changed files with 17 additions and 3 deletions

View File

@ -1,3 +1,8 @@
2013-10-10 Giuseppe Scrivano <gscrivan@redhat.com>
* url.c (url_parse): Try to convert UTF-8 URLs to IDN.
* html-url.c (append_url): Parse URLs specifying an IRI structure.
2013-09-13 Tim Ruehsen <tim.ruehsen@gmx.de>
* recur.c (download_child_p): fix compile error when

View File

@ -284,6 +284,10 @@ append_url (const char *link_uri, int position, int size,
const char *base = ctx->base ? ctx->base : ctx->parent_base;
struct url *url;
struct iri *iri = iri_new ();
set_uri_encoding (iri, opt.locale, true);
iri->utf8_encode = true;
if (!base)
{
DEBUGP (("%s: no base, merge will use \"%s\".\n",
@ -301,7 +305,7 @@ append_url (const char *link_uri, int position, int size,
return NULL;
}
url = url_parse (link_uri, NULL, NULL, false);
url = url_parse (link_uri, NULL, iri, false);
if (!url)
{
DEBUGP (("%s: link \"%s\" doesn't parse.\n",
@ -323,7 +327,7 @@ append_url (const char *link_uri, int position, int size,
quote_n (2, link_uri),
quotearg_n_style (3, escape_quoting_style, complete_uri)));
url = url_parse (complete_uri, NULL, NULL, false);
url = url_parse (complete_uri, NULL, iri, false);
if (!url)
{
DEBUGP (("%s: merged link \"%s\" doesn't parse.\n",
@ -334,6 +338,8 @@ append_url (const char *link_uri, int position, int size,
xfree (complete_uri);
}
iri_free (iri);
DEBUGP (("appending %s to urlpos.\n", quote (url->url)));
newel = xnew0 (struct urlpos);

View File

@ -701,7 +701,10 @@ url_parse (const char *url, int *error, struct iri *iri, bool percent_encode)
if (!iri->utf8_encode)
new_url = NULL;
else
{
iri->orig_url = xstrdup (url);
percent_encode = true;
}
}
/* XXX XXX Could that change introduce (security) bugs ??? XXX XXX*/