From 1fd32c995cb27d6f3b5879fcf17417284eb039d0 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Thu, 10 Oct 2013 23:13:13 +0200 Subject: [PATCH] iri: Fix parsing of some URLs contained in HTML documents --- src/ChangeLog | 5 +++++ src/html-url.c | 10 ++++++++-- src/url.c | 5 ++++- 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/src/ChangeLog b/src/ChangeLog index 43997f88..81320ca4 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,8 @@ +2013-10-10 Giuseppe Scrivano + + * url.c (url_parse): Try to convert UTF-8 URLs to IDN. + * html-url.c (append_url): Parse URLs specifying an IRI structure. + 2013-09-13 Tim Ruehsen * recur.c (download_child_p): fix compile error when diff --git a/src/html-url.c b/src/html-url.c index bb2b20e3..1bb44b46 100644 --- a/src/html-url.c +++ b/src/html-url.c @@ -284,6 +284,10 @@ append_url (const char *link_uri, int position, int size, const char *base = ctx->base ? ctx->base : ctx->parent_base; struct url *url; + struct iri *iri = iri_new (); + set_uri_encoding (iri, opt.locale, true); + iri->utf8_encode = true; + if (!base) { DEBUGP (("%s: no base, merge will use \"%s\".\n", @@ -301,7 +305,7 @@ append_url (const char *link_uri, int position, int size, return NULL; } - url = url_parse (link_uri, NULL, NULL, false); + url = url_parse (link_uri, NULL, iri, false); if (!url) { DEBUGP (("%s: link \"%s\" doesn't parse.\n", @@ -323,7 +327,7 @@ append_url (const char *link_uri, int position, int size, quote_n (2, link_uri), quotearg_n_style (3, escape_quoting_style, complete_uri))); - url = url_parse (complete_uri, NULL, NULL, false); + url = url_parse (complete_uri, NULL, iri, false); if (!url) { DEBUGP (("%s: merged link \"%s\" doesn't parse.\n", @@ -334,6 +338,8 @@ append_url (const char *link_uri, int position, int size, xfree (complete_uri); } + iri_free (iri); + DEBUGP (("appending %s to urlpos.\n", quote (url->url))); newel = xnew0 (struct urlpos); diff --git a/src/url.c b/src/url.c index bf9d6971..f554432c 100644 --- a/src/url.c +++ b/src/url.c @@ -701,7 +701,10 @@ url_parse (const char *url, int *error, struct iri *iri, bool percent_encode) if (!iri->utf8_encode) new_url = NULL; else - iri->orig_url = xstrdup (url); + { + iri->orig_url = xstrdup (url); + percent_encode = true; + } } /* XXX XXX Could that change introduce (security) bugs ??? XXX XXX*/