Automated merge.

2024-07-03 16:38:41 -04:00 · 2008-08-28 01:21:59 -07:00 · 2008-08-28 01:21:59 -07:00 · 090f1596ae
commit 090f1596ae
parent f0438edcf2 26a3eea8e2
40 changed files with 1936 additions and 104 deletions
--- a/8
+++ b/8
@ -9,6 +9,14 @@
 	* AUTHORS: Added Steven Schubiger.
 2008-06-26  Xavier Saint  <wget@sxav.eu>
 	* configure.ac : IRIs support required libiconv, check it.
 2008-06-14  Xavier Saint  <wget@sxav.eu>
 	* configure.ac: Add support for IRIs
 2008-05-29  Micah Cowan  <micah@cowan.name>
 	* po/*.po: Updated from TP (the 1.11.3 set).
--- a/configure.ac
+++ b/configure.ac
@ -460,6 +460,77 @@ else
 fi
 AC_SUBST(COMMENT_IF_NO_POD2MAN)
 dnl
 dnl Check for IDN/IRIs
 dnl
 AC_ARG_ENABLE(iri,
  AC_HELP_STRING([--disable-iri],[disable IDN/IRIs support]),
  [case "${enable_iri}" in
    no)
      dnl Disable IRIs checking
      AC_MSG_NOTICE([disabling IRIs at user request])
      iri=no
      ;;
    yes)
      dnl IRIs explicitly enabled
      iri=yes
      force_iri=yes
      ;;
    auto)
      dnl Auto-detect IRI
      iri=yes
      ;;
    *)
      AC_MSG_ERROR([Invalid --enable-iri argument \`$enable_iri'])
      ;;
    esac
  ], [
    dnl If nothing is specified, assume auto-detection
    iri=yes
  ]
 )
 AC_ARG_WITH(libidn, AC_HELP_STRING([--with-libidn=[DIR]],
                                   [Support IDN/IRIs (needs GNU Libidn)]),
                                   libidn=$withval, libidn="")
 if test "X$iri" != "Xno"; then
  AM_ICONV
  if test "X$am_cv_func_iconv" != "Xyes"; then
    iri=no
    if test "X$force_iri" = "Xyes"; then
      AC_MSG_ERROR([Libiconv is required for IRIs support])
    else
      AC_MSG_NOTICE([disabling IRIs because libiconv wasn't found])
    fi
  fi
 fi
 if test "X$iri" != "Xno"; then
  if test "$libidn" != ""; then
    LDFLAGS="${LDFLAGS} -L$libidn/lib"
    CPPFLAGS="${CPPFLAGS} -I$libidn/include"
  fi
  AC_CHECK_HEADER(idna.h,
    AC_CHECK_LIB(idn, stringprep_check_version,
      [iri=yes LIBS="${LIBS} -lidn"], iri=no),
    iri=no)
  if test "X$iri" != "Xno" ; then
    AC_DEFINE(ENABLE_IRI, 1, [Define if IRI support is enabled.])
    AC_MSG_NOTICE([Enabling support for IRI.])
  else
    AC_MSG_WARN([Libidn not found])
  fi
 fi
 dnl Needed by src/Makefile.am
 AM_CONDITIONAL([IRI_IS_ENABLED], [test "X$iri" != "Xno"])
 dnl
 dnl Create output
 dnl
--- a/doc/ChangeLog
+++ b/doc/ChangeLog
@ -1,3 +1,12 @@
 2008-08-03  Xavier Saint  <wget@sxav.eu>
 	* wget.texi : Add option descriptions for the three new
 	options --iri, --locale and --remote-encoding related to
 	IRI support.
 	* sample.wgetrc : Add commented lines for the three new
 	command iri, locale and encoding related to IRI support.
 2008-08-03  Micah Cowan  <micah@cowan.name>
 	* wget.texi: Don't set UPDATED; already set by version.texi.
--- a/doc/sample.wgetrc
+++ b/doc/sample.wgetrc
@ -113,3 +113,12 @@ waitretry = 10
 # To try ipv6 addresses first:
 #prefer-family = IPv6
 # Set default IRI support state
 #iri = off
 # Force the default system encoding
 #locale = UTF-8
 # Force the default remote server encoding
 #remoteencoding = UTF-8
--- a/doc/wget.texi
+++ b/doc/wget.texi
@ -674,6 +674,30 @@ Another instance where you'll get a garbled file if you try to use
 Note that @samp{-c} only works with @sc{ftp} servers and with @sc{http}
 servers that support the @code{Range} header.
@cindex iri support
@cindex idn support
@item --iri
 Turn on internationalized URI (IRI) support. Use @samp{--iri=no} to
 turn it off. IRI support is activated by default.
 You can set the default state of IRI support using @code{iri} command in
@file{.wgetrc}. That setting may be overridden from the command line.
@cindex local encoding
@cindex locale
@item --locale=@var{encoding}
 Force Wget to use @var{encoding} as the default system encoding. That affects
 how Wget converts URLs specified as arguments from locale to @sc{utf-8} for
 IRI support.
 Wget use the function @code{nl_langinfo()} and then the @code{CHARSET}
 environment variable to get the locale. If it fails, @sc{ascii} is used.
 You can set the default locale using the @code{locale} command in
@file{.wgetrc}. That setting may be overridden from the command line.
@cindex progress indicator
@cindex dot style
@item --progress=@var{type}
@ -705,6 +729,21 @@ command line.  The exception is that, when the output is not a TTY, the
 ``dot'' progress will be favored over ``bar''.  To force the bar output,
 use @samp{--progress=bar:force}.
@cindex remote encoding
@item --remote-encoding=@var{encoding}
 Force Wget to use encoding as the default remote server encoding. That
 affects how Wget converts URIs found in files from remote encoding to
@sc{utf-8} during a recursive fetch. This options is only useful for
 IRI support, for the interpretation of non-@sc{ascii} characters.
 For HTTP, remote encoding can be found in HTTP @code{Content-Type}
 header and in HTML @code{Content-Type http-equiv} meta tag.
 You can set the default encoding using the @code{remoteencoding}
 command in @file{.wgetrc}. That setting may be overridden from the
 command line.
@item -N
@itemx --timestamping
 Turn on time-stamping.  @xref{Time-Stamping}, for details.
--- a/src/ChangeLog
+++ b/src/ChangeLog
@ -32,11 +32,27 @@
 	* init.c (cleanup): Free the memory associated with the base
 	option (when DEBUG_MALLOC is defined).
 2008-07-02  Xavier Saint  <wget@sxav.eu>
 	* iri.c, iri.h  : New function idn_decode() to decode ASCII
 	encoded hostname to the locale.
 	* host.c : Show hostname to be resolved both in locale and
 	ASCII encoded.
 2008-06-28  Steven Schubiger  <stsc@members.fsf.org>
 	* retr.c (retrieve_from_file): Allow for reading the links from
 	an external file (HTTP/FTP).
 2008-06-26  Xavier Saint  <wget@sxav.eu>
 	* iri.c, iri.h : New functions locale_to_utf8() and
 	idn_encode() adding basic capabilities of IRI/IDN.
 	* url.c : Convert URLs from locale to UTF-8 allowing a basic
 	support of IRI/IDN
 2008-06-25  Steven Schubiger  <stsc@members.fsf.org>
 	* ftp.c (getftp): When spidering a FTP URL, emit a diagnostic
@ -61,7 +77,7 @@
 	* http.c: Make -nv --spider include the file's name when it
 	exists.
-	
+
 2008-06-22  Micah Cowan  <micah@cowan.name>
 	* Makefile.am (version.c): Fixed version string invocation so it
@ -69,12 +85,57 @@
 	string vars pointers-to-const, and moved line lengths
 	below 80 (in Makefile.am, not in version.c).
 2008-06-19  Xavier Saint  <wget@sxav.eu>
 	* iri.c, iri.h : New function check_encoding_name() as
 	a preliminary encoding name check.
 	* main.c, iri.c : Make use of check_encoding_name().
 2008-06-19  Xavier Saint  <wget@sxav.eu>
 	* iri.c : Include missing stringprep.h file and add a
 	cast.
 	* init.c : set a default initial value for opt.enable_iri,
 	opt.locale and opt.encoding_remote.
 2008-06-19  Xavier Saint  <wget@sxav.eu>
 	* iri.c, iri.h : Add a new function find_locale() to find
 	out the local system encoding.
 	* main.c : Make use of find_locale().
 2008-06-19  Xavier Saint  <wget@sxav.eu>
 	* html-url.c : Add "content-type" meta tag parsing for
 	retrieving page encoding.
 	* iri.h : Make no-op version of parse_charset() return
 	NULL.
 2008-06-16  Micah Cowan  <micah@cowan.name>
 	* http.c (http_loop): When hstat.len is higher than the
 	successfully completed content's length, but it's because we
 	_set_ it that way, don't abort.
 2008-06-14  Xavier Saint  <wget@sxav.eu>
 	* iri.c, iri.h : New files.
 	* Makefile.am : Add files iri.h and conditional iri.c.
 	* build_info.c : Add compiled feature "iri".
 	* http.c : include iri.h and parse charset from Content-Type
 	header.
 	* init.c, main.c, options.h : if an options isn't supported
 	at compiled time, don't get rid off it and show a dummy
 	message instead if they are used.
 2008-06-13  Micah Cowan  <micah@cowan.name>
 	* build_info.c: ENABLE_NTLM, not HAVE_NTLM; distinguish OpenSSL
@ -118,11 +179,11 @@
 	default.
 2008-05-17  Kenny Parnell  <k.parnell@gmail.com>
-	
+
 	(cmd_spec_prefer_family): Initialize prefer_family to prefer_none.
 2008-05-17  Micah Cowan  <micah@cowan.name>
-	
+
 	* main.c (main): Handle Ctrl-D on command-line.
 2008-05-15  Steven Schubiger  <schubiger@gmail.com>
@ -161,7 +222,7 @@
 	* options.h: Add an according boolean member to the options
 	struct.
-	
+
 	* sysdep.h: Comment the defines __EXTENSIONS__ and _GNU_SOURCE
 	out, because they're now defined independently by config.h.
--- a/src/Makefile.am
+++ b/src/Makefile.am
@ -30,6 +30,10 @@
 # Version: @VERSION@
 #
 if IRI_IS_ENABLED
 IRI_OBJ = iri.c
 endif
 # The following line is losing on some versions of make!
 DEFS     = @DEFS@ -DSYSTEM_WGETRC=\"$(sysconfdir)/wgetrc\" -DLOCALEDIR=\"$(localedir)\"
 LIBS     = @LIBSSL@ @LIBGNUTLS@ @LIBINTL@ @LIBS@
@ -40,8 +44,8 @@ wget_SOURCES = build_info.c cmpt.c connect.c convert.c cookies.c ftp.c    \
 	       ftp-basic.c ftp-ls.c hash.c host.c html-parse.c html-url.c \
 	       http.c init.c log.c main.c netrc.c progress.c ptimer.c     \
 	       recur.c res.c retr.c snprintf.c spider.c url.c	          \
-	       utils.c				          \
+	       utils.c $(IRI_OBJ)				          \
-	       css-url.h connect.h convert.h cookies.h \
+	       css-url.h connect.h convert.h cookies.h	                  \
 	       ftp.h gen-md5.h hash.h host.h html-parse.h html-url.h      \
 	       http.h http-ntlm.h init.h log.h mswindows.h netrc.h        \
 	       options.h progress.h ptimer.h recur.h res.h retr.h         \
--- a/src/build_info.c
+++ b/src/build_info.c
@ -100,6 +100,13 @@ const char* (compiled_features[]) =
 #else
  "-gettext",
 #endif
 #ifdef ENABLE_IRI
  "+iri",
 #else
  "-iri",
 #endif
  /* sentinel value */
  NULL
 };
--- a/src/connect.c
+++ b/src/connect.c
@ -266,9 +266,25 @@ connect_to_ip (const ip_address *ip, int port, const char *print)
  if (print)
    {
      const char *txt_addr = print_address (ip);
-      if (print && 0 != strcmp (print, txt_addr))
+      if (0 != strcmp (print, txt_addr))
-        logprintf (LOG_VERBOSE, _("Connecting to %s|%s|:%d... "),
+        {
-                   escnonprint_uri (print), txt_addr, port);
+				  char *str = NULL, *name;
          if (opt.enable_iri && (name = idn_decode ((char *) print)) != NULL)
            {
              int len = strlen (print) + strlen (name) + 4;
              str = xmalloc (len);
              snprintf (str, len, "%s (%s)", name, print);
              str[len-1] = '\0';
              xfree (name);
            }
          logprintf (LOG_VERBOSE, _("Connecting to %s|%s|:%d... "),
                     str ? str : escnonprint_uri (print), txt_addr, port);
 					if (str)
 					  xfree (str);
        }
      else
        logprintf (LOG_VERBOSE, _("Connecting to %s:%d... "), txt_addr, port);
    }
--- a/src/convert.c
+++ b/src/convert.c
@ -96,7 +96,7 @@ convert_links_in_hashtable (struct hash_table *downloaded_set,
      /* Parse the file...  */
      urls = is_css ? get_urls_css_file (file, url) :
-                      get_urls_html (file, url, NULL);
+                      get_urls_html (file, url, NULL, NULL);
      /* We don't respect meta_disallow_follow here because, even if
         the file is not followed, we might still want to convert the
--- a/src/ftp-basic.c
+++ b/src/ftp-basic.c
@ -68,7 +68,7 @@ ftp_response (int fd, char **ret_line)
        return FTPRERR;
      /* Strip trailing CRLF before printing the line, so that
-         escnonprint doesn't include bogus \012 and \015. */
+         quotting doesn't include bogus \012 and \015. */
      p = strchr (line, '\0');
      if (p > line && p[-1] == '\n')
        *--p = '\0';
--- a/src/host.c
+++ b/src/host.c
@ -712,8 +712,24 @@ lookup_host (const char *host, int flags)
  /* No luck with the cache; resolve HOST. */
  if (!silent && !numeric_address)
-    logprintf (LOG_VERBOSE, _("Resolving %s... "), 
+    {
-               quotearg_style (escape_quoting_style, host));
+      char *str = NULL, *name;
      if (opt.enable_iri && (name = idn_decode ((char *) host)) != NULL)
        {
          int len = strlen (host) + strlen (name) + 4;
          str = xmalloc (len);
          snprintf (str, len, "%s (%s)", name, host);
          str[len-1] = '\0';
          xfree (name);
        }
      logprintf (LOG_VERBOSE, _("Resolving %s... "),
                 quotearg_style (escape_quoting_style, str ? str : host));
      if (str)
        xfree (str);
    }
 #ifdef ENABLE_IPV6
  {
--- a/src/html-url.c
+++ b/src/html-url.c
@ -174,6 +174,10 @@ static const char *additional_attributes[] = {
 static struct hash_table *interesting_tags;
 static struct hash_table *interesting_attributes;
 /* Will contains the (last) charset found in 'http-equiv=content-type'
   meta tags  */
 static char *meta_charset;
 static void
 init_interesting (void)
 {
@ -284,7 +288,7 @@ append_url (const char *link_uri, int position, int size,
          return NULL;
        }
-      url = url_parse (link_uri, NULL);
+      url = url_parse (link_uri, NULL, NULL);
      if (!url)
        {
          DEBUGP (("%s: link \"%s\" doesn't parse.\n",
@ -303,7 +307,7 @@ append_url (const char *link_uri, int position, int size,
      DEBUGP (("%s: merge(\"%s\", \"%s\") -> %s\n",
               ctx->document_file, base, link_uri, complete_uri));
-      url = url_parse (complete_uri, NULL);
+      url = url_parse (complete_uri, NULL, NULL);
      if (!url)
        {
          DEBUGP (("%s: merged link \"%s\" doesn't parse.\n",
@ -553,6 +557,23 @@ tag_handle_meta (int tagid, struct taginfo *tag, struct map_context *ctx)
          entry->link_expect_html = 1;
        }
    }
  else if (http_equiv && 0 == strcasecmp (http_equiv, "content-type"))
    {
      /* Handle stuff like:
         <meta http-equiv="Content-Type" content="text/html; charset=CHARSET"> */
      char *mcharset;
      char *content = find_attr (tag, "content", NULL);
      if (!content)
        return;
      mcharset = parse_charset (content);
      if (!mcharset)
        return;
      xfree_null (meta_charset);
      meta_charset = mcharset;
    }
  else if (name && 0 == strcasecmp (name, "robots"))
    {
      /* Handle stuff like:
@ -617,7 +638,8 @@ collect_tags_mapper (struct taginfo *tag, void *arg)
   <base href=...> and does the right thing.  */
 struct urlpos *
-get_urls_html (const char *file, const char *url, bool *meta_disallow_follow)
+get_urls_html (const char *file, const char *url, bool *meta_disallow_follow,
               struct iri *iri)
 {
  struct file_memory *fm;
  struct map_context ctx;
@ -657,6 +679,10 @@ get_urls_html (const char *file, const char *url, bool *meta_disallow_follow)
  map_html_tags (fm->content, fm->length, collect_tags_mapper, &ctx, flags,
                 NULL, interesting_attributes);
  /* If meta charset isn't null, override content encoding */
  if (iri && meta_charset)
    set_content_encoding (iri, meta_charset);
  DEBUGP (("no-follow in %s: %d\n", file, ctx.nofollow));
  if (meta_disallow_follow)
    *meta_disallow_follow = ctx.nofollow;
@ -726,7 +752,7 @@ get_urls_file (const char *file)
          url_text = merged;
        }
-      url = url_parse (url_text, &up_error_code);
+      url = url_parse (url_text, &up_error_code, NULL);
      if (!url)
        {
          char *error = url_error (url_text, up_error_code);
--- a/src/html-url.h
+++ b/src/html-url.h
@ -44,7 +44,7 @@ struct map_context {
 };
 struct urlpos *get_urls_file (const char *);
-struct urlpos *get_urls_html (const char *, const char *, bool *);
+struct urlpos *get_urls_html (const char *, const char *, bool *, struct iri *);
 struct urlpos *append_url (const char *, int, int, struct map_context *);
 void free_urlpos (struct urlpos *);
--- a/src/http.c
+++ b/src/http.c
@ -1364,7 +1364,8 @@ free_hstat (struct http_stat *hs)
   If PROXY is non-NULL, the connection will be made to the proxy
   server, and u->url will be requested.  */
 static uerr_t
-gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
+gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy,
         struct iri *iri)
 {
  struct request *req;
@ -1827,7 +1828,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
          hs->local_file = url_file_name (u);
        }
    }
-  
+
  /* TODO: perform this check only once. */
  if (!hs->existence_checked && file_exists_p (hs->local_file))
    {
@ -1896,7 +1897,7 @@ File %s already there; not retrieving.\n\n"), quote (hs->local_file));
              local_dot_orig_file_exists = true;
              local_filename = filename_plus_orig_suffix;
            }
-        }      
+        }
      if (!local_dot_orig_file_exists)
        /* Couldn't stat() <file>.orig, so try to stat() <file>. */
@ -2048,9 +2049,20 @@ File %s already there; not retrieving.\n\n"), quote (hs->local_file));
      char *tmp = strchr (type, ';');
      if (tmp)
        {
          /* sXXXav: only needed if IRI support is enabled */
          char *tmp2 = tmp + 1;
          while (tmp > type && c_isspace (tmp[-1]))
            --tmp;
          *tmp = '\0';
          /* Try to get remote encoding if needed */
          if (opt.enable_iri && !opt.encoding_remote)
            {
              tmp = parse_charset (tmp2);
              if (tmp)
                set_content_encoding (iri, tmp);
            }
        }
    }
  hs->newloc = resp_header_strdup (resp, "Location");
@ -2325,7 +2337,7 @@ File %s already there; not retrieving.\n\n"), quote (hs->local_file));
   retried, and retried, and retried, and...  */
 uerr_t
 http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
-           int *dt, struct url *proxy)
+           int *dt, struct url *proxy, struct iri *iri)
 {
  int count;
  bool got_head = false;         /* used for time-stamping and filename detection */
@ -2336,16 +2348,16 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
  uerr_t err, ret = TRYLIMEXC;
  time_t tmr = -1;               /* remote time-stamp */
  struct http_stat hstat;        /* HTTP status */
-  struct_stat st;  
+  struct_stat st;
  bool send_head_first = true;
  /* Assert that no value for *LOCAL_FILE was passed. */
  assert (local_file == NULL || *local_file == NULL);
-  
+
  /* Set LOCAL_FILE parameter. */
  if (local_file && opt.output_document)
    *local_file = HYPHENP (opt.output_document) ? NULL : xstrdup (opt.output_document);
-  
+
  /* Reset NEWLOC parameter. */
  *newloc = NULL;
@ -2382,7 +2394,7 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
         retrieve the file. But if the output_document was given, then this
         test was already done and the file didn't exist. Hence the !opt.output_document */
      logprintf (LOG_VERBOSE, _("\
-File %s already there; not retrieving.\n\n"), 
+File %s already there; not retrieving.\n\n"),
                 quote (hstat.local_file));
      /* If the file is there, we suppose it's retrieved OK.  */
      *dt |= RETROKF;
@ -2398,10 +2410,10 @@ File %s already there; not retrieving.\n\n"),
  /* Reset the counter. */
  count = 0;
-  
+
  /* Reset the document type. */
  *dt = 0;
-  
+
  /* Skip preliminary HEAD request if we're not in spider mode AND
   * if -O was given or HTTP Content-Disposition support is disabled. */
  if (!opt.spider
@ -2410,21 +2422,21 @@ File %s already there; not retrieving.\n\n"),
  /* Send preliminary HEAD request if -N is given and we have an existing 
   * destination file. */
-  if (opt.timestamping 
+  if (opt.timestamping
      && !opt.content_disposition
      && file_exists_p (url_file_name (u)))
    send_head_first = true;
-  
+
  /* THE loop */
  do
    {
      /* Increment the pass counter.  */
      ++count;
      sleep_between_retrievals (count);
-      
+
      /* Get the current time string.  */
      tms = datetime_str (time (NULL));
-      
+
      if (opt.spider && !got_head)
        logprintf (LOG_VERBOSE, _("\
 Spider mode enabled. Check if remote file exists.\n"));
@ -2433,20 +2445,20 @@ Spider mode enabled. Check if remote file exists.\n"));
      if (opt.verbose)
        {
          char *hurl = url_string (u, URL_AUTH_HIDE_PASSWD);
-          
+
-          if (count > 1) 
+          if (count > 1)
            {
              char tmp[256];
              sprintf (tmp, _("(try:%2d)"), count);
              logprintf (LOG_NOTQUIET, "--%s--  %s  %s\n",
                         tms, tmp, hurl);
            }
-          else 
+          else
            {
              logprintf (LOG_NOTQUIET, "--%s--  %s\n",
                         tms, hurl);
            }
-          
+
 #ifdef WINDOWS
          ws_changetitle (hurl);
 #endif
@ -2456,7 +2468,7 @@ Spider mode enabled. Check if remote file exists.\n"));
      /* Default document type is empty.  However, if spider mode is
         on or time-stamping is employed, HEAD_ONLY commands is
         encoded within *dt.  */
-      if (send_head_first && !got_head) 
+      if (send_head_first && !got_head)
        *dt |= HEAD_ONLY;
      else
        *dt &= ~HEAD_ONLY;
@ -2489,11 +2501,11 @@ Spider mode enabled. Check if remote file exists.\n"));
        *dt &= ~SEND_NOCACHE;
      /* Try fetching the document, or at least its head.  */
-      err = gethttp (u, &hstat, dt, proxy);
+      err = gethttp (u, &hstat, dt, proxy, iri);
      /* Time?  */
      tms = datetime_str (time (NULL));
-      
+
      /* Get the new location (with or without the redirection).  */
      if (hstat.newloc)
        *newloc = xstrdup (hstat.newloc);
@ -2532,7 +2544,7 @@ Spider mode enabled. Check if remote file exists.\n"));
                         hstat.statcode);
              ret = WRONGCODE;
            }
-          else 
+          else
            {
              ret = NEWLOCATION;
            }
@ -2548,7 +2560,7 @@ Spider mode enabled. Check if remote file exists.\n"));
          /* All possibilities should have been exhausted.  */
          abort ();
        }
-      
+
      if (!(*dt & RETROKF))
        {
          char *hurl = NULL;
@ -2567,11 +2579,13 @@ Spider mode enabled. Check if remote file exists.\n"));
              continue;
            }
          /* Maybe we should always keep track of broken links, not just in
-           * spider mode.  */
+           * spider mode.
-          else if (opt.spider)
+           * Don't log error if it was UTF-8 encoded because we will try
           * once unencoded. */
          else if (opt.spider && !iri->utf8_encode)
            {
              /* #### Again: ugly ugly ugly! */
-              if (!hurl) 
+              if (!hurl)
                hurl = url_string (u, URL_AUTH_HIDE_PASSWD);
              nonexisting_url (hurl);
              logprintf (LOG_NOTQUIET, _("\
@ -2580,7 +2594,7 @@ Remote file does not exist -- broken link!!!\n"));
          else
            {
              logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"),
-                         tms, hstat.statcode, 
+                         tms, hstat.statcode,
                         quotearg_style (escape_quoting_style, hstat.error));
            }
          logputs (LOG_VERBOSE, "\n");
--- a/src/http.h
+++ b/src/http.h
@ -33,7 +33,7 @@ as that of the covered work.  */
 struct url;
 uerr_t http_loop (struct url *, char **, char **, const char *, int *,
-		  struct url *);
+		  struct url *, struct iri *);
 void save_cookies (void);
 void http_cleanup (void);
 time_t http_atotm (const char *);
--- a/src/init.c
+++ b/src/init.c
@ -182,9 +182,11 @@ static const struct {
  { "inet6only",        &opt.ipv6_only,         cmd_boolean },
 #endif
  { "input",            &opt.input_filename,    cmd_file },
  { "iri",              &opt.enable_iri,        cmd_boolean },
  { "keepsessioncookies", &opt.keep_session_cookies, cmd_boolean },
  { "limitrate",        &opt.limit_rate,        cmd_bytes },
  { "loadcookies",      &opt.cookies_input,     cmd_file },
  { "locale",           &opt.locale,            cmd_string },
  { "logfile",          &opt.lfilename,         cmd_file },
  { "login",            &opt.ftp_user,          cmd_string },/* deprecated*/
  { "maxredirect",      &opt.max_redirect,      cmd_number },
@ -224,6 +226,7 @@ static const struct {
  { "referer",          &opt.referer,           cmd_string },
  { "reject",           &opt.rejects,           cmd_vector },
  { "relativeonly",     &opt.relative_only,     cmd_boolean },
  { "remoteencoding",   &opt.encoding_remote,   cmd_string },
  { "removelisting",    &opt.remove_listing,    cmd_boolean },
  { "restrictfilenames", NULL,                  cmd_spec_restrict_file_names },
  { "retrsymlinks",     &opt.retr_symlinks,     cmd_boolean },
@ -331,6 +334,14 @@ defaults (void)
  opt.restrict_files_case = restrict_no_case_restriction;
  opt.max_redirect = 20;
 #ifdef ENABLE_IRI
  opt.enable_iri = true;
 #else
  opt.enable_iri = false;
 #endif
  opt.locale = NULL;
  opt.encoding_remote = NULL;
 }
 /* Return the user's home directory (strdup-ed), or NULL if none is
--- a/src/iri.c
+++ b/src/iri.c
@ -0,0 +1,348 @@
 /* IRI related functions.
   Copyright (C) 2008 Free Software Foundation, Inc.
 This file is part of GNU Wget.
 GNU Wget is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 3 of the License, or (at
 your option) any later version.
 GNU Wget is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 You should have received a copy of the GNU General Public License
 along with Wget.  If not, see <http://www.gnu.org/licenses/>.
 Additional permission under GNU GPL version 3 section 7
 If you modify this program, or any covered work, by linking or
 combining it with the OpenSSL project's OpenSSL library (or a
 modified version of that library), containing parts covered by the
 terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
 grants you additional permission to convey the resulting work.
 Corresponding Source for a non-source form of such a combination
 shall include the source code for the parts of OpenSSL used as well
 as that of the covered work.  */
 #include "wget.h"
 #include <stdio.h>
 #include <stdlib.h>
 #include <assert.h>
 #include <string.h>
 #include <iconv.h>
 #include <stringprep.h>
 #include <idna.h>
 #include <errno.h>
 #include "utils.h"
 /* RFC3987 section 3.1 mandates STD3 ASCII RULES */
 #define IDNA_FLAGS  IDNA_USE_STD3_ASCII_RULES
 /* Note: locale encoding is kept in options struct (opt.locale) */
 static bool do_conversion (iconv_t cd, char *in, size_t inlen, char **out);
 /* Given a string containing "charset=XXX", return the encoding if found,
   or NULL otherwise */
 char *
 parse_charset (char *str)
 {
  char *charset;
  if (!str || !*str)
    return NULL;
  str = strcasestr (str, "charset=");
  if (!str)
    return NULL;
  str += 8;
  charset = str;
  /* sXXXav: which chars should be banned ??? */
  while (*charset && !c_isspace (*charset))
    charset++;
  /* sXXXav: could strdupdelim return NULL ? */
  charset = strdupdelim (str, charset);
  /* Do a minimum check on the charset value */
  if (!check_encoding_name (charset))
    {
      xfree (charset);
      return NULL;
    }
  /*logprintf (LOG_VERBOSE, "parse_charset: %s\n", quote (charset));*/
  return charset;
 }
 /* Find the locale used, or fall back on a default value */
 char *
 find_locale (void)
 {
  return (char *) stringprep_locale_charset ();
 }
 /* Basic check of an encoding name. */
 bool
 check_encoding_name (char *encoding)
 {
  char *s = encoding;
  while (*s)
    {
      if (!c_isascii (*s) || c_isspace (*s))
        {
          logprintf (LOG_VERBOSE, "Encoding %s isn't valid\n", quote (encoding));
          return false;
        }
      s++;
    }
  return true;
 }
 /* Try opening an iconv_t descriptor for conversion from locale to UTF-8 */
 static bool
 open_locale_to_utf8 (void)
 {
 }
 /* Try converting string str from locale to UTF-8. Return a new string
   on success, or str on error or if conversion isn't needed. */
 const char *
 locale_to_utf8 (const char *str)
 {
  iconv_t l2u;
  char *new;
  /* That shouldn't happen, just in case */
  if (!opt.locale)
    {
      logprintf (LOG_VERBOSE, "open_locale_to_utf8: locale is unset\n");
      opt.locale = find_locale ();
    }
  if (!opt.locale || !strcasecmp (opt.locale, "utf-8"))
    return str;
  l2u = iconv_open ("UTF-8", opt.locale);
  if (l2u != (iconv_t)(-1))
    { 
      logprintf (LOG_VERBOSE, "Conversion from %s to %s isn't supported\n",
                 quote (opt.locale), quote ("UTF-8"));
      return str;
    }
  if (do_conversion (l2u, (char *) str, strlen ((char *) str), &new))
    return (const char *) new;
  return str;
 }
 /* Do the conversion according to the passed conversion descriptor cd. *out
   will contain the transcoded string on success. *out content is
   unspecified otherwise. */
 static bool
 do_conversion (iconv_t cd, char *in, size_t inlen, char **out)
 {
  /* sXXXav : hummm hard to guess... */
  size_t len, done, outlen = inlen * 2;
  int invalid = 0, tooshort = 0;
  char *s;
  s = xmalloc (outlen + 1);
  *out = s;
  len = outlen;
  done = 0;
  for (;;)
    {
      if (iconv (cd, &in, &inlen, out, &outlen) != (size_t)(-1))
        {
          *out = s;
          *(s + len - outlen - done) = '\0';
          return true;
        }
      /* Incomplete or invalid multibyte sequence */
      if (errno == EINVAL || errno == EILSEQ)
        {
          if (!invalid)
            logprintf (LOG_VERBOSE,
                      "Incomplete or invalide multibyte sequence encountered\n");
          invalid++;
          **out = *in;
          in++;
          inlen--;
          (*out)++;
          outlen--;
        }
      else if (errno == E2BIG) /* Output buffer full */
        {
          char *new;
          tooshort++;
          done = len;
          outlen = done + inlen * 2;
          new = xmalloc (outlen + 1);
          memcpy (new, s, done);
          xfree (s);
          s = new;
          len = outlen;
          *out = s + done;
        }
      else /* Weird, we got an unspecified error */
        {
          logprintf (LOG_VERBOSE, "Unhandled errno %d\n", errno);
          break;
        }
    }
    return false;
 }
 /* Try to "ASCII encode" UTF-8 host. Return the new domain on success or NULL
   on error. */
 char *
 idn_encode (struct iri *i, char *host)
 {
  char *new;
  int ret;
  /* Encode to UTF-8 if not done */
  if (!i->utf8_encode)
    {
      if (!remote_to_utf8 (i, (const char *) host, (const char **) &new))
          return NULL;  /* Nothing to encode or an error occured */
      host = new;
    }
  /* toASCII UTF-8 NULL terminated string */
  ret = idna_to_ascii_8z (host, &new, IDNA_FLAGS);
  if (ret != IDNA_SUCCESS)
    {
      /* sXXXav : free new when needed ! */
      logprintf (LOG_VERBOSE, "idn_encode failed (%d): %s\n", ret,
                 quote (idna_strerror (ret)));
      return NULL;
    }
  return new;
 }
 /* Try to decode an "ASCII encoded" host. Return the new domain in the locale
   on success or NULL on error. */
 char *
 idn_decode (char *host)
 {
  char *new;
  int ret;
  ret = idna_to_unicode_8zlz (host, &new, IDNA_FLAGS);
  if (ret != IDNA_SUCCESS)
    {
      logprintf (LOG_VERBOSE, "idn_decode failed (%d): %s\n", ret,
                 quote (idna_strerror (ret)));
      return NULL;
    }
  return new;
 }
 /* Try to transcode string str from remote encoding to UTF-8. On success, *new
   contains the transcoded string. *new content is unspecified otherwise. */
 bool
 remote_to_utf8 (struct iri *i, const char *str, const char **new)
 {
  iconv_t cd;
  bool ret = false;
  if (!i->uri_encoding)
    return false;
  cd = iconv_open ("UTF-8", i->uri_encoding);
  if (cd == (iconv_t)(-1))
    return false;
  if (do_conversion (cd, (char *) str, strlen ((char *) str), (char **) new))
    ret = true;
  iconv_close (cd);
  /* Test if something was converted */
  if (!strcmp (str, *new))
    {
      xfree ((char *) *new);
      return false;
    }
  return ret;
 }
 /* Allocate a new iri structure and return a pointer to it. */
 struct iri *
 iri_new (void)
 {
  struct iri *i = xmalloc (sizeof (struct iri));
  i->uri_encoding = opt.encoding_remote ? xstrdup (opt.encoding_remote) : NULL;
  i->content_encoding = NULL;
  i->utf8_encode = opt.enable_iri;
  return i;
 }
 /* Completely free an iri structure. */
 void
 iri_free (struct iri *i)
 {
  xfree_null (i->uri_encoding);
  xfree_null (i->content_encoding);
  xfree (i);
 }
 /* Set uri_encoding of struct iri i. If a remote encoding was specified, use
   it unless force is true. */
 void
 set_uri_encoding (struct iri *i, char *charset, bool force)
 {
  DEBUGP (("URI encoding = %s\n", charset ? quote (charset) : "None"));
  if (!force && opt.encoding_remote)
    return;
  if (i->uri_encoding)
    {
      if (charset && !strcasecmp (i->uri_encoding, charset))
        return;
      xfree (i->uri_encoding);
    }
  i->uri_encoding = charset ? xstrdup (charset) : NULL;
 }
 /* Set content_encoding of struct iri i. */
 void
 set_content_encoding (struct iri *i, char *charset)
 {
  DEBUGP (("URI content encoding = %s\n", charset ? quote (charset) : "None"));
  if (opt.encoding_remote)
    return;
  if (i->content_encoding)
    {
      if (charset && !strcasecmp (i->content_encoding, charset))
        return;
      xfree (i->content_encoding);
    }
  i->content_encoding = charset ? xstrdup (charset) : NULL;
 }
--- a/src/iri.h
+++ b/src/iri.h
@ -0,0 +1,70 @@
 /* Internationalization related declarations.
   Copyright (C) 2008 Free Software Foundation, Inc.
 This file is part of GNU Wget.
 GNU Wget is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 3 of the License, or
 (at your option) any later version.
 GNU Wget is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 You should have received a copy of the GNU General Public License
 along with Wget.  If not, see <http://www.gnu.org/licenses/>.
 Additional permission under GNU GPL version 3 section 7
 If you modify this program, or any covered work, by linking or
 combining it with the OpenSSL project's OpenSSL library (or a
 modified version of that library), containing parts covered by the
 terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
 grants you additional permission to convey the resulting work.
 Corresponding Source for a non-source form of such a combination
 shall include the source code for the parts of OpenSSL used as well
 as that of the covered work.  */
 #ifndef IRI_H
 #define IRI_H
 struct iri {
  char *uri_encoding;      /* Encoding of the uri to fetch */
  char *content_encoding;  /* Encoding of links inside the fetched file */
  bool utf8_encode;        /* Will/Is the current url encoded in utf8 */
 };
 #ifdef ENABLE_IRI
 char *parse_charset (char *str);
 char *find_locale (void);
 bool check_encoding_name (char *encoding);
 const char *locale_to_utf8 (const char *str);
 char *idn_encode (struct iri *i, char *host);
 char *idn_decode (char *host);
 bool remote_to_utf8 (struct iri *i, const char *str, const char **new);
 struct iri *iri_new (void);
 void iri_free (struct iri *i);
 void set_uri_encoding (struct iri *i, char *charset, bool force);
 void set_content_encoding (struct iri *i, char *charset);
 #else /* ENABLE_IRI */
 struct iri dummy_iri;
 #define parse_charset(str)          NULL
 #define find_locale()               NULL
 #define check_encoding_name(str)    false
 #define locale_to_utf8(str)         (str)
 #define idn_encode(a,b)             NULL
 #define idn_decode(str)             NULL
 #define remote_to_utf8(a,b,c)       false
 #define iri_new()                   (&dummy_iri)
 #define iri_free(a)
 #define set_uri_encoding(a,b,c)
 #define set_content_encoding(a,b)
 #endif /* ENABLE_IRI */
 #endif /* IRI_H */
--- a/src/log.c
+++ b/src/log.c
@ -43,7 +43,7 @@ as that of the covered work.  */
 #include "utils.h"
 #include "log.h"
-/* This file impplement support for "logging".  Logging means printing
+/* This file implement support for "logging".  Logging means printing
   output, plus several additional features:
   - Cataloguing output by importance.  You can specify that a log
--- a/src/main.c
+++ b/src/main.c
@ -201,10 +201,12 @@ static struct cmdline_option option_data[] =
    { "inet6-only", '6', OPT_BOOLEAN, "inet6only", -1 },
 #endif
    { "input-file", 'i', OPT_VALUE, "input", -1 },
    { "iri", 0, OPT_BOOLEAN, "iri", -1 },
    { "keep-session-cookies", 0, OPT_BOOLEAN, "keepsessioncookies", -1 },
    { "level", 'l', OPT_VALUE, "reclevel", -1 },
    { "limit-rate", 0, OPT_VALUE, "limitrate", -1 },
    { "load-cookies", 0, OPT_VALUE, "loadcookies", -1 },
    { "locale", 0, OPT_VALUE, "locale", -1 },
    { "max-redirect", 0, OPT_VALUE, "maxredirect", -1 },
    { "mirror", 'm', OPT_BOOLEAN, "mirror", -1 },
    { "no", 'n', OPT__NO, NULL, required_argument },
@ -238,6 +240,7 @@ static struct cmdline_option option_data[] =
    { "referer", 0, OPT_VALUE, "referer", -1 },
    { "reject", 'R', OPT_VALUE, "reject", -1 },
    { "relative", 'L', OPT_BOOLEAN, "relativeonly", -1 },
    { "remote-encoding", 0, OPT_VALUE, "remoteencoding", -1},
    { "remove-listing", 0, OPT_BOOLEAN, "removelisting", -1 },
    { "restrict-file-names", 0, OPT_BOOLEAN, "restrictfilenames", -1 },
    { "retr-symlinks", 0, OPT_BOOLEAN, "retrsymlinks", -1 },
@ -1062,6 +1065,27 @@ for details.\n\n"));
      exit (1);
    }
 #ifdef ENABLE_IRI
  if (opt.enable_iri)
    {
      if (opt.locale && !check_encoding_name (opt.locale))
        opt.locale = NULL;
      if (!opt.locale)
        opt.locale = find_locale ();
      if (opt.encoding_remote && !check_encoding_name (opt.encoding_remote))
        opt.encoding_remote = NULL;
    }
 #else
  if (opt.enable_iri || opt.locale || opt.encoding_remote)
    {
      /* sXXXav : be more specific... */
      printf(_("This version does not have support for IRIs\n"));
      exit(1);
    }
 #endif
  if (opt.ask_passwd)
    {
      opt.passwd = prompt_for_password ();
@ -1171,15 +1195,21 @@ WARNING: Can't reopen standard output in binary mode;\n\
          int old_follow_ftp = opt.follow_ftp;
          /* Turn opt.follow_ftp on in case of recursive FTP retrieval */
-          if (url_scheme (*t) == SCHEME_FTP) 
+          if (url_scheme (*t) == SCHEME_FTP)
            opt.follow_ftp = 1;
-          
+
-          status = retrieve_tree (*t);
+          status = retrieve_tree (*t, NULL);
          opt.follow_ftp = old_follow_ftp;
        }
      else
-        status = retrieve_url (*t, &filename, &redirected_URL, NULL, &dt, opt.recursive);
+        {
          struct iri *i = iri_new ();
          set_uri_encoding (i, opt.locale, true);
          status = retrieve_url (*t, &filename, &redirected_URL, NULL, &dt,
                                 opt.recursive, i);
          iri_free (i);
        }
      if (opt.delete_after && file_exists_p(filename))
        {
--- a/src/options.h
+++ b/src/options.h
@ -239,6 +239,10 @@ struct options
  bool content_disposition;	/* Honor HTTP Content-Disposition header. */
  bool auth_without_challenge;  /* Issue Basic authentication creds without
                                   waiting for a challenge. */
  bool enable_iri;
  char *encoding_remote;
  char *locale;
 };
 extern struct options opt;
--- a/src/recur.c
+++ b/src/recur.c
@ -51,7 +51,7 @@ as that of the covered work.  */
 #include "html-url.h"
 #include "css-url.h"
 #include "spider.h"
-
+
 /* Functions for maintaining the URL queue.  */
 struct queue_element {
@ -60,6 +60,7 @@ struct queue_element {
  int depth;                    /* the depth */
  bool html_allowed;            /* whether the document is allowed to
                                   be treated as HTML. */
  struct iri *iri;                /* sXXXav */
  bool css_allowed;             /* whether the document is allowed to
                                   be treated as CSS. */
  struct queue_element *next;   /* next element in queue */
@ -93,11 +94,12 @@ url_queue_delete (struct url_queue *queue)
   into it.  */
 static void
-url_enqueue (struct url_queue *queue,
+url_enqueue (struct url_queue *queue, struct iri *i,
             const char *url, const char *referer, int depth,
             bool html_allowed, bool css_allowed)
 {
  struct queue_element *qel = xnew (struct queue_element);
  qel->iri = i;
  qel->url = url;
  qel->referer = referer;
  qel->depth = depth;
@ -112,6 +114,10 @@ url_enqueue (struct url_queue *queue,
  DEBUGP (("Enqueuing %s at depth %d\n", url, depth));
  DEBUGP (("Queue count %d, maxcount %d.\n", queue->count, queue->maxcount));
  if (i)
    DEBUGP (("[IRI Enqueuing %s with %s\n", quote_n (0, url),
             i->uri_encoding ? quote_n (1, i->uri_encoding) : "None"));
  if (queue->tail)
    queue->tail->next = qel;
  queue->tail = qel;
@ -124,7 +130,7 @@ url_enqueue (struct url_queue *queue,
   succeeded, or false if the queue is empty.  */
 static bool
-url_dequeue (struct url_queue *queue,
+url_dequeue (struct url_queue *queue, struct iri **i,
             const char **url, const char **referer, int *depth,
             bool *html_allowed, bool *css_allowed)
 {
@ -137,6 +143,7 @@ url_dequeue (struct url_queue *queue,
  if (!queue->head)
    queue->tail = NULL;
  *i = qel->iri;
  *url = qel->url;
  *referer = qel->referer;
  *depth = qel->depth;
@ -153,9 +160,9 @@ url_dequeue (struct url_queue *queue,
 }
 static bool download_child_p (const struct urlpos *, struct url *, int,
-                              struct url *, struct hash_table *);
+                              struct url *, struct hash_table *, struct iri *);
 static bool descend_redirect_p (const char *, const char *, int,
-                                struct url *, struct hash_table *);
+                                struct url *, struct hash_table *, struct iri *);
 /* Retrieve a part of the web beginning with START_URL.  This used to
@ -180,7 +187,7 @@ static bool descend_redirect_p (const char *, const char *, int,
          options, add it to the queue. */
 uerr_t
-retrieve_tree (const char *start_url)
+retrieve_tree (const char *start_url, struct iri *pi)
 {
  uerr_t status = RETROK;
@ -192,8 +199,22 @@ retrieve_tree (const char *start_url)
  struct hash_table *blacklist;
  int up_error_code;
-  struct url *start_url_parsed = url_parse (start_url, &up_error_code);
+  struct url *start_url_parsed;
  struct iri *i = iri_new ();
 #define COPYSTR(x)  (x) ? xstrdup(x) : NULL;
  /* Duplicate pi struct if not NULL */
  if (pi)
    {
      i->uri_encoding = COPYSTR (pi->uri_encoding);
      i->content_encoding = COPYSTR (pi->content_encoding);
      i->utf8_encode = pi->utf8_encode;
    }
  else
    set_uri_encoding (i, opt.locale, true);
 #undef COPYSTR
  start_url_parsed = url_parse (start_url, &up_error_code, i);
  if (!start_url_parsed)
    {
      char *error = url_error (start_url, up_error_code);
@ -207,7 +228,8 @@ retrieve_tree (const char *start_url)
  /* Enqueue the starting URL.  Use start_url_parsed->url rather than
     just URL so we enqueue the canonical form of the URL.  */
-  url_enqueue (queue, xstrdup (start_url_parsed->url), NULL, 0, true, false);
+  url_enqueue (queue, i, xstrdup (start_url_parsed->url), NULL, 0, true,
               false);
  string_set_add (blacklist, start_url_parsed->url);
  while (1)
@ -226,7 +248,7 @@ retrieve_tree (const char *start_url)
      /* Get the next URL from the queue... */
-      if (!url_dequeue (queue,
+      if (!url_dequeue (queue, (struct iri **) &i,
                        (const char **)&url, (const char **)&referer,
                        &depth, &html_allowed, &css_allowed))
        break;
@ -267,7 +289,8 @@ retrieve_tree (const char *start_url)
          int dt = 0;
          char *redirected = NULL;
-          status = retrieve_url (url, &file, &redirected, referer, &dt, false);
+          status = retrieve_url (url, &file, &redirected, referer, &dt,
                                 false, i);
          if (html_allowed && file && status == RETROK
              && (dt & RETROKF) && (dt & TEXTHTML))
@ -295,7 +318,7 @@ retrieve_tree (const char *start_url)
              if (descend)
                {
                  if (!descend_redirect_p (redirected, url, depth,
-                                           start_url_parsed, blacklist))
+                                           start_url_parsed, blacklist, i))
                    descend = false;
                  else
                    /* Make sure that the old pre-redirect form gets
@ -347,7 +370,7 @@ retrieve_tree (const char *start_url)
          bool meta_disallow_follow = false;
          struct urlpos *children
            = is_css ? get_urls_css_file (file, url) :
-                       get_urls_html (file, url, &meta_disallow_follow);
+                       get_urls_html (file, url, &meta_disallow_follow, i);
          if (opt.use_robots && meta_disallow_follow)
            {
@ -358,7 +381,8 @@ retrieve_tree (const char *start_url)
          if (children)
            {
              struct urlpos *child = children;
-              struct url *url_parsed = url_parsed = url_parse (url, NULL);
+              struct url *url_parsed = url_parse (url, NULL, i);
              struct iri *ci;
              char *referer_url = url;
              bool strip_auth = (url_parsed != NULL
                                 && url_parsed->user != NULL);
@ -375,9 +399,11 @@ retrieve_tree (const char *start_url)
                  if (dash_p_leaf_HTML && !child->link_inline_p)
                    continue;
                  if (download_child_p (child, url_parsed, depth, start_url_parsed,
-                                        blacklist))
+                                        blacklist, i))
                    {
-                      url_enqueue (queue, xstrdup (child->url->url),
+                      ci = iri_new ();
                      set_uri_encoding (ci, i->content_encoding, false);
                      url_enqueue (queue, ci, xstrdup (child->url->url),
                                   xstrdup (referer_url), depth + 1,
                                   child->link_expect_html,
                                   child->link_expect_css);
@ -395,18 +421,18 @@ retrieve_tree (const char *start_url)
            }
        }
-      if (file 
+      if (file
-          && (opt.delete_after 
+          && (opt.delete_after
              || opt.spider /* opt.recursive is implicitely true */
              || !acceptable (file)))
        {
          /* Either --delete-after was specified, or we loaded this
-             (otherwise unneeded because of --spider or rejected by -R) 
+             (otherwise unneeded because of --spider or rejected by -R)
-             HTML file just to harvest its hyperlinks -- in either case, 
+             HTML file just to harvest its hyperlinks -- in either case,
             delete the local file. */
          DEBUGP (("Removing file due to %s in recursive_retrieve():\n",
                   opt.delete_after ? "--delete-after" :
-                   (opt.spider ? "--spider" : 
+                   (opt.spider ? "--spider" :
                    "recursive rejection criteria")));
          logprintf (LOG_VERBOSE,
                     (opt.delete_after || opt.spider
@ -422,6 +448,7 @@ retrieve_tree (const char *start_url)
      xfree (url);
      xfree_null (referer);
      xfree_null (file);
      iri_free (i);
    }
  /* If anything is left of the queue due to a premature exit, free it
@ -430,9 +457,11 @@ retrieve_tree (const char *start_url)
    char *d1, *d2;
    int d3;
    bool d4, d5;
-    while (url_dequeue (queue,
+    struct iri *d6;
    while (url_dequeue (queue, (struct iri **)&d6,
                        (const char **)&d1, (const char **)&d2, &d3, &d4, &d5))
      {
        iri_free (d6);
        xfree (d1);
        xfree_null (d2);
      }
@ -461,7 +490,8 @@ retrieve_tree (const char *start_url)
 static bool
 download_child_p (const struct urlpos *upos, struct url *parent, int depth,
-                  struct url *start_url_parsed, struct hash_table *blacklist)
+                  struct url *start_url_parsed, struct hash_table *blacklist,
                  struct iri *iri)
 {
  struct url *u = upos->url;
  const char *url = u->url;
@ -471,7 +501,7 @@ download_child_p (const struct urlpos *upos, struct url *parent, int depth,
  if (string_set_contains (blacklist, url))
    {
-      if (opt.spider) 
+      if (opt.spider)
        {
          char *referrer = url_string (parent, URL_AUTH_HIDE_PASSWD);
          DEBUGP (("download_child_p: parent->url is: %s\n", quote (parent->url)));
@ -602,7 +632,7 @@ download_child_p (const struct urlpos *upos, struct url *parent, int depth,
      if (!specs)
        {
          char *rfile;
-          if (res_retrieve_file (url, &rfile))
+          if (res_retrieve_file (url, &rfile, iri))
            {
              specs = res_parse_from_file (rfile);
@ -657,23 +687,24 @@ download_child_p (const struct urlpos *upos, struct url *parent, int depth,
 static bool
 descend_redirect_p (const char *redirected, const char *original, int depth,
-                    struct url *start_url_parsed, struct hash_table *blacklist)
+                    struct url *start_url_parsed, struct hash_table *blacklist,
                    struct iri *iri)
 {
  struct url *orig_parsed, *new_parsed;
  struct urlpos *upos;
  bool success;
-  orig_parsed = url_parse (original, NULL);
+  orig_parsed = url_parse (original, NULL, NULL);
  assert (orig_parsed != NULL);
-  new_parsed = url_parse (redirected, NULL);
+  new_parsed = url_parse (redirected, NULL, NULL);
  assert (new_parsed != NULL);
  upos = xnew0 (struct urlpos);
  upos->url = new_parsed;
  success = download_child_p (upos, orig_parsed, depth,
-                              start_url_parsed, blacklist);
+                              start_url_parsed, blacklist, iri);
  url_free (orig_parsed);
  url_free (new_parsed);
--- a/src/recur.h
+++ b/src/recur.h
@ -42,6 +42,6 @@ as that of the covered work.  */
 struct urlpos;
 void recursive_cleanup (void);
-uerr_t retrieve_tree (const char *);
+uerr_t retrieve_tree (const char *, struct iri *);
 #endif /* RECUR_H */
--- a/src/res.c
+++ b/src/res.c
@ -532,21 +532,28 @@ res_get_specs (const char *host, int port)
   Return true if robots were retrieved OK, false otherwise.  */
 bool
-res_retrieve_file (const char *url, char **file)
+res_retrieve_file (const char *url, char **file, struct iri *iri)
 {
  struct iri *i = iri_new ();
  uerr_t err;
  char *robots_url = uri_merge (url, RES_SPECS_LOCATION);
  int saved_ts_val = opt.timestamping;
  int saved_sp_val = opt.spider;
  /* Copy server URI encoding for a possible IDNA transformation, no need to
     encode the full URI in UTF-8 because "robots.txt" is plain ASCII */
  set_uri_encoding (i, iri->uri_encoding, false);
  i->utf8_encode = false;
  logputs (LOG_VERBOSE, _("Loading robots.txt; please ignore errors.\n"));
  *file = NULL;
  opt.timestamping = false;
  opt.spider       = false;
-  err = retrieve_url (robots_url, file, NULL, NULL, NULL, false);
+  err = retrieve_url (robots_url, file, NULL, NULL, NULL, false, i);
  opt.timestamping = saved_ts_val;
-  opt.spider       = saved_sp_val;  
+  opt.spider       = saved_sp_val;
  xfree (robots_url);
  iri_free (i);
  if (err != RETROK && *file != NULL)
    {
--- a/src/res.h
+++ b/src/res.h
@ -40,7 +40,7 @@ bool res_match_path (const struct robot_specs *, const char *);
 void res_register_specs (const char *, int, struct robot_specs *);
 struct robot_specs *res_get_specs (const char *, int);
-bool res_retrieve_file (const char *, char **);
+bool res_retrieve_file (const char *, char **, struct iri *);
 bool is_robots_txt_url (const char *);
--- a/src/retr.c
+++ b/src/retr.c
@ -597,7 +597,7 @@ static char *getproxy (struct url *);
 uerr_t
 retrieve_url (const char *origurl, char **file, char **newloc,
-              const char *refurl, int *dt, bool recursive)
+              const char *refurl, int *dt, bool recursive, struct iri *iri)
 {
  uerr_t result;
  char *url;
@ -625,7 +625,8 @@ retrieve_url (const char *origurl, char **file, char **newloc,
  if (file)
    *file = NULL;
-  u = url_parse (url, &up_error_code);
+ second_try:
  u = url_parse (url, &up_error_code, iri);
  if (!u)
    {
      char *error = url_error (url, up_error_code);
@ -635,6 +636,10 @@ retrieve_url (const char *origurl, char **file, char **newloc,
      return URLERROR;
    }
  DEBUGP (("[IRI Retrieving %s with %s (UTF-8=%d)\n", quote_n (0, url),
           iri->uri_encoding ? quote_n (1, iri->uri_encoding) : "None",
           iri->utf8_encode));
  if (!refurl)
    refurl = opt.referer;
@ -648,8 +653,12 @@ retrieve_url (const char *origurl, char **file, char **newloc,
  proxy = getproxy (u);
  if (proxy)
    {
      struct iri *pi = iri_new ();
      set_uri_encoding (pi, opt.locale, true);
      pi->utf8_encode = false;
      /* Parse the proxy URL.  */
-      proxy_url = url_parse (proxy, &up_error_code);
+      proxy_url = url_parse (proxy, &up_error_code, NULL);
      if (!proxy_url)
        {
          char *error = url_error (proxy, up_error_code);
@ -676,7 +685,7 @@ retrieve_url (const char *origurl, char **file, char **newloc,
 #endif
      || (proxy_url && proxy_url->scheme == SCHEME_HTTP))
    {
-      result = http_loop (u, &mynewloc, &local_file, refurl, dt, proxy_url);
+      result = http_loop (u, &mynewloc, &local_file, refurl, dt, proxy_url, iri);
    }
  else if (u->scheme == SCHEME_FTP)
    {
@ -726,8 +735,13 @@ retrieve_url (const char *origurl, char **file, char **newloc,
      xfree (mynewloc);
      mynewloc = construced_newloc;
      /* Reset UTF-8 encoding state, keep the URI encoding and reset
         the content encoding. */
      iri->utf8_encode = opt.enable_iri;
      set_content_encoding (iri, NULL);
      /* Now, see if this new location makes sense. */
-      newloc_parsed = url_parse (mynewloc, &up_error_code);
+      newloc_parsed = url_parse (mynewloc, &up_error_code, iri);
      if (!newloc_parsed)
        {
          char *error = url_error (mynewloc, up_error_code);
@ -776,8 +790,21 @@ retrieve_url (const char *origurl, char **file, char **newloc,
      goto redirected;
    }
-  if (local_file)
+  /* Try to not encode in UTF-8 if fetching failed */
  if (!(*dt & RETROKF) && iri->utf8_encode)
    {
      iri->utf8_encode = false;
      DEBUGP (("[IRI Fallbacking to non-utf8 for %s\n", quote (url)));
      goto second_try;
    }
  if (local_file && *dt & RETROKF)
    {
      register_download (u->url, local_file);
      if (redirection_count && 0 != strcmp (origurl, u->url))
        register_redirection (origurl, u->url);
      if (*dt & TEXTHTML)
        register_html (u->url, local_file);
      if (*dt & RETROKF)
        {
          register_download (u->url, local_file);
@ -827,13 +854,18 @@ retrieve_from_file (const char *file, bool html, int *count)
 {
  uerr_t status;
  struct urlpos *url_list, *cur_url;
  struct iri *iri = iri_new();
  char *input_file = NULL;
  const char *url = file;
  status = RETROK;             /* Suppose everything is OK.  */
  *count = 0;                  /* Reset the URL count.  */
-  
+
  /* sXXXav : Assume filename and links in the file are in the locale */
  set_uri_encoding (iri, opt.locale, true);
  set_content_encoding (iri, opt.locale);
  if (url_has_scheme (url))
    {
      int dt;
@ -842,17 +874,21 @@ retrieve_from_file (const char *file, bool html, int *count)
      if (!opt.base_href)
        opt.base_href = xstrdup (url);
-      status = retrieve_url (url, &input_file, NULL, NULL, &dt, false);
+      status = retrieve_url (url, &input_file, NULL, NULL, &dt, false, iri);
      if (status != RETROK)
        return status;
      if (dt & TEXTHTML)
        html = true;
      /* If we have a found a content encoding, use it */
      if (iri->content_encoding)
 	  set_uri_encoding (iri, iri->content_encoding, false);
    }
  else
    input_file = (char *) file;
-  url_list = (html ? get_urls_html (input_file, NULL, NULL)
+  url_list = (html ? get_urls_html (input_file, NULL, NULL, iri)
              : get_urls_file (input_file));
  for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count)
@ -868,21 +904,26 @@ retrieve_from_file (const char *file, bool html, int *count)
          status = QUOTEXC;
          break;
        }
      /* Reset UTF-8 encode status */
      iri->utf8_encode = opt.enable_iri;
      if ((opt.recursive || opt.page_requisites)
          && (cur_url->url->scheme != SCHEME_FTP || getproxy (cur_url->url)))
        {
          int old_follow_ftp = opt.follow_ftp;
          /* Turn opt.follow_ftp on in case of recursive FTP retrieval */
-          if (cur_url->url->scheme == SCHEME_FTP) 
+          if (cur_url->url->scheme == SCHEME_FTP)
            opt.follow_ftp = 1;
-          
+
-          status = retrieve_tree (cur_url->url->url);
+          status = retrieve_tree (cur_url->url->url, iri);
          opt.follow_ftp = old_follow_ftp;
        }
      else
-        status = retrieve_url (cur_url->url->url, &filename, &new_file, NULL, &dt, opt.recursive);
+        status = retrieve_url (cur_url->url->url, &filename, &new_file, NULL,
 	                       &dt, opt.recursive, iri);
      if (filename && opt.delete_after && file_exists_p (filename))
        {
@ -901,6 +942,8 @@ Removing file due to --delete-after in retrieve_from_file():\n"));
  /* Free the linked list of URL-s.  */
  free_urlpos (url_list);
  iri_free (iri);
  return status;
 }
@ -1053,7 +1096,11 @@ bool
 url_uses_proxy (const char *url)
 {
  bool ret;
-  struct url *u = url_parse (url, NULL);
+  struct url *u;
  struct iri *i = iri_new();
  /* url was given in the command line, so use locale as encoding */
  set_uri_encoding (i, opt.locale, true);
  u= url_parse (url, NULL, i);
  if (!u)
    return false;
  ret = getproxy (u) != NULL;
--- a/src/retr.h
+++ b/src/retr.h
@ -51,7 +51,8 @@ typedef const char *(*hunk_terminator_t) (const char *, const char *, int);
 char *fd_read_hunk (int, hunk_terminator_t, long, long);
 char *fd_read_line (int);
-uerr_t retrieve_url (const char *, char **, char **, const char *, int *, bool);
+uerr_t retrieve_url (const char *, char **, char **, const char *, int *,
                     bool, struct iri *);
 uerr_t retrieve_from_file (const char *, bool, int *);
 const char *retr_rate (wgint, double);
--- a/src/url.c
+++ b/src/url.c
@ -640,7 +640,7 @@ static const char *parse_errors[] = {
   error, and if ERROR is not NULL, also set *ERROR to the appropriate
   error code. */
 struct url *
-url_parse (const char *url, int *error)
+url_parse (const char *url, int *error, struct iri *iri)
 {
  struct url *u;
  const char *p;
@ -659,7 +659,7 @@ url_parse (const char *url, int *error)
  int port;
  char *user = NULL, *passwd = NULL;
-  char *url_encoded = NULL;
+  char *url_encoded = NULL, *new_url = NULL;
  int error_code;
@ -670,9 +670,20 @@ url_parse (const char *url, int *error)
      goto error;
    }
-  url_encoded = reencode_escapes (url);
+  if (iri && iri->utf8_encode)
    {
      url_unescape ((char *) url);
      iri->utf8_encode = remote_to_utf8 (iri, url, (const char **) &new_url);
      if (!iri->utf8_encode)
        new_url = NULL;
    }
  url_encoded = reencode_escapes (new_url ? new_url : url);
  p = url_encoded;
  if (new_url && url_encoded != new_url)
    xfree (new_url);
  p += strlen (supported_schemes[scheme].leading_string);
  uname_b = p;
  p = url_skip_credentials (p);
@ -842,6 +853,18 @@ url_parse (const char *url, int *error)
    {
      url_unescape (u->host);
      host_modified = true;
      /* Apply IDNA regardless of iri->utf8_encode status */
      if (opt.enable_iri && iri)
        {
          char *new = idn_encode (iri, u->host);
          if (new)
            {
              xfree (u->host);
              u->host = new;
              host_modified = true;
            }
        }
    }
  if (params_b)
@ -851,7 +874,7 @@ url_parse (const char *url, int *error)
  if (fragment_b)
    u->fragment = strdupdelim (fragment_b, fragment_e);
-  if (path_modified || u->fragment || host_modified || path_b == path_e)
+  if (opt.enable_iri || path_modified || u->fragment || host_modified || path_b == path_e)
    {
      /* If we suspect that a transformation has rendered what
         url_string might return different from URL_ENCODED, rebuild
--- a/src/url.h
+++ b/src/url.h
@ -84,7 +84,7 @@ struct url
 char *url_escape (const char *);
-struct url *url_parse (const char *, int *);
+struct url *url_parse (const char *, int *, struct iri *iri);
 char *url_error (const char *, int);
 char *url_full_path (const struct url *);
 void url_set_dir (struct url *, const char *);
--- a/src/wget.h
+++ b/src/wget.h
@ -218,6 +218,9 @@ typedef double SUM_SIZE_INT;
 #include "quote.h"
 #include "quotearg.h"
 /* Likewise for struct iri definition */
 #include "iri.h"
 /* Useful macros used across the code: */
 /* The number of elements in an array.  For example:
--- a/tests/ChangeLog
+++ b/tests/ChangeLog
@ -1,3 +1,30 @@
 2008-08-14  Xavier Saint <wget@sxav.eu>
 	* Test-iri-list.px : Fetch files from a remote list.
 2008-08-03  Xavier Saint <wget@sxav.eu>
 	* Test-iri.px : HTTP recursive fetch for testing IRI support and
 	fallback.
 	* Test-iri-disabled.px : Same file structure as Test-iri.px but with
 	IRI support disabled
 	* Test-iri-forced-remote.px : There's a difference between ISO-8859-1
 	and ISO-8859-15 for character 0xA4 (respectively currency sign and
 	euro sign). So with a forced ISO-8859-1 remote encoding, wget should
 	see 0xA4 as a currency sign and transcode it correctly in UTF-8 instead
 	of using the ISO-8859-15 given by the server.
 	* Test-ftp-iri.px : Give a file to fetch via FTP in a specific locale
 	and expect wget to fetch the file UTF-8 encoded.
 	* Test-ftp-iri-fallback.px : Same as above but wget should fallback on
 	locale encoding to fetch the file.
 	* Test-ftp-iri.px : Same as Test-ftp-iri.px but with IRI support
 	disabled. The UTF-8 encoded file should not be retrieved.
 2008-06-22  Micah Cowan  <micah@cowan.name>
 	* Test-proxied-https-auth.px: Shift exit code so it falls in the
--- a/tests/Test-ftp-iri-disabled.px
+++ b/tests/Test-ftp-iri-disabled.px
@ -0,0 +1,50 @@
 #!/usr/bin/perl -w
 use strict;
 use FTPTest;
 ###############################################################################
 my $ccedilla_l1 = "\xE7";
 my $ccedilla_u8 = "\xC3\xA7";
 my $francais = <<EOF;
 Some text.
 EOF
 $francais =~ s/\n/\r\n/;
 # code, msg, headers, content
 my %urls = (
    "/fran${ccedilla_u8}ais.txt" => {
        content => $francais,
    },
    "/fran${ccedilla_l1}ais.txt" => {
        content => $francais,
    },
 );
 my $cmdline = $WgetTest::WGETPATH . " --iri=no --locale=iso-8859-1 -S ftp://localhost:{{port}}/fran${ccedilla_l1}ais.txt";
 my $expected_error_code = 0;
 my %expected_downloaded_files = (
    "fran${ccedilla_l1}ais.txt" => {
        content => $francais,
    },
 );
 ###############################################################################
 my $the_test = FTPTest->new (name => "Test-ftp-iri",
                             input => \%urls, 
                             cmdline => $cmdline, 
                             errcode => $expected_error_code, 
                             output => \%expected_downloaded_files);
 exit $the_test->run();
 # vim: et ts=4 sw=4
--- a/tests/Test-ftp-iri-fallback.px
+++ b/tests/Test-ftp-iri-fallback.px
@ -0,0 +1,46 @@
 #!/usr/bin/perl -w
 use strict;
 use FTPTest;
 ###############################################################################
 my $ccedilla_l1 = "\xE7";
 my $ccedilla_u8 = "\xC3\xA7";
 my $francais = <<EOF;
 Some text.
 EOF
 $francais =~ s/\n/\r\n/;
 # code, msg, headers, content
 my %urls = (
    "/fran${ccedilla_l1}ais.txt" => {
        content => $francais,
    },
 );
 my $cmdline = $WgetTest::WGETPATH . " --locale=iso-8859-1 -S ftp://localhost:{{port}}/fran${ccedilla_l1}ais.txt";
 my $expected_error_code = 0;
 my %expected_downloaded_files = (
    "fran${ccedilla_l1}ais.txt" => {
        content => $francais,
    },
 );
 ###############################################################################
 my $the_test = FTPTest->new (name => "Test-ftp-iri",
                             input => \%urls, 
                             cmdline => $cmdline, 
                             errcode => $expected_error_code, 
                             output => \%expected_downloaded_files);
 exit $the_test->run();
 # vim: et ts=4 sw=4
--- a/tests/Test-ftp-iri.px
+++ b/tests/Test-ftp-iri.px
@ -0,0 +1,47 @@
 #!/usr/bin/perl -w
 use strict;
 use FTPTest;
 ###############################################################################
 my $ccedilla_l1 = "\xE7";
 my $ccedilla_u8 = "\xC3\xA7";
 my $francais = <<EOF;
 Some text.
 EOF
 $francais =~ s/\n/\r\n/;
 # code, msg, headers, content
 my %urls = (
    "/fran${ccedilla_u8}ais.txt" => {
        content => $francais,
    },
 );
 my $cmdline = $WgetTest::WGETPATH . " --locale=iso-8859-1 -S ftp://localhost:{{port}}/fran${ccedilla_l1}ais.txt";
 my $expected_error_code = 0;
 my %expected_downloaded_files = (
    "fran${ccedilla_u8}ais.txt" => {
        content => $francais,
    },
 );
 ###############################################################################
 my $the_test = FTPTest->new (name => "Test-ftp-iri",
                             input => \%urls, 
                             cmdline => $cmdline, 
                             errcode => $expected_error_code, 
                             output => \%expected_downloaded_files);
 exit $the_test->run();
 # vim: et ts=4 sw=4
--- a/tests/Test-iri-disabled.px
+++ b/tests/Test-iri-disabled.px
@ -0,0 +1,196 @@
 #!/usr/bin/perl -w
 use strict;
 use HTTPTest;
 # cf. http://en.wikipedia.org/wiki/Latin1
 #     http://en.wikipedia.org/wiki/ISO-8859-15
 ###############################################################################
 #
 # mime : charset found in Content-Type HTTP MIME header
 # meta : charset found in Content-Type meta tag
 #
 # index.html                  mime + file = iso-8859-15
 # p1_français.html            meta + file = iso-8859-1, mime = utf-8
 # p2_één.html                 mime + file = iso-8859-1
 # p3_€€€.html                 meta + file = utf-8, mime = iso-8859-1
 #
 my $ccedilla_l15 = "\xE7";
 my $ccedilla_u8 = "\xC3\xA7";
 my $eacute_l1 = "\xE9";
 my $eacute_u8 = "\xC3\xA9";
 my $eurosign_l15 = "\xA4";
 my $eurosign_u8 = "\xE2\x82\xAC";
 my $pageindex = <<EOF;
 <html>
 <head>
  <title>Main Page</title>
 </head>
 <body>
  <p>
    Link to page 1 <a href="http://localhost:{{port}}/p1_fran${ccedilla_l15}ais.html">La seule page en fran&ccedil;ais</a>.
    Link to page 3 <a href="http://localhost:{{port}}/p3_${eurosign_l15}${eurosign_l15}${eurosign_l15}.html">My tailor is rich</a>.
  </p>
 </body>
 </html>
 EOF
 my $pagefrancais = <<EOF;
 <html>
 <head>
  <title>La seule page en français</title>
  <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"/>
 </head>
 <body>
  <p>
    Link to page 2 <a href="http://localhost:{{port}}/p2_${eacute_l1}${eacute_l1}n.html">Die enkele nerderlangstalige pagina</a>.
  </p>
 </body>
 </html>
 EOF
 my $pageeen = <<EOF;
 <html>
 <head>
  <title>Die enkele nederlandstalige pagina</title>
 </head>
 <body>
  <p>
    &Eacute;&eacute;n is niet veel maar toch meer dan nul.<br/>
    Nerdelands is een mooie taal... dit zin stuckje spreekt vanzelf, of niet :)
  </p>
 </body>
 </html>
 EOF
 my $pageeuro = <<EOF;
 <html>
 <head>
  <title>Euro page</title>
 </head>
 <body>
  <p>
    My tailor isn't rich anymore.
  </p>
 </body>
 </html>
 EOF
 my $page404 = <<EOF;
 <html>
 <head>
  <title>404</title>
 </head>
 <body>
  <p>
    Nop nop nop...
  </p>
 </body>
 </html>
 EOF
 # code, msg, headers, content
 my %urls = (
    '/index.html' => {
        code => "200",
        msg => "Ok",
        headers => {
            "Content-type" => "text/html; charset=ISO-8859-15",
        },
        content => $pageindex,
    },
    '/robots.txt' => {
        code => "200",
        msg => "Ok",
        headers => {
            "Content-type" => "text/plain",
        },
        content => "",
    },
    '/p1_fran%C3%A7ais.html' => {	# UTF-8 encoded
        code => "200",
        msg => "File not found",
        headers => {
            "Content-type" => "text/html; charset=UTF-8",
        },
        content => $pagefrancais,
    },
    '/p1_fran%E7ais.html' => {
        code => "200",
        msg => "Ok",
        headers => {
            "Content-type" => "text/html; charset=UTF-8",
        },
        content => $pagefrancais,
    },
    '/p2_%C3%A9%C3%A9n.html' => {	# UTF-8 encoded
        code => "200",
        msg => "Ok",
        headers => {
            "Content-type" => "text/html; charset=UTF-8",
        },
        content => $pageeen,
    },
    '/p2_%E9%E9n.html' => {
        code => "200",
        msg => "Ok",
        headers => {
            "Content-type" => "text/html; charset=ISO-8859-1",
        },
        content => $pageeen,
    },
    '/p3_%E2%82%AC%E2%82%AC%E2%82%AC.html' => {	# UTF-8 encoded
        code => "200",
        msg => "Ok",
        headers => {
            "Content-type" => "text/plain",
        },
        content => $pageeuro,
    },
    '/p3_%A4%A4%A4.html' => {
        code => "200",
        msg => "Ok",
        headers => {
            "Content-type" => "text/plain",
        },
        content => $pageeuro,
    },
 );
 my $cmdline = $WgetTest::WGETPATH . " --iri=no -nH -r http://localhost:{{port}}/";
 my $expected_error_code = 0;
 my %expected_downloaded_files = (
    'index.html' => {
        content => $pageindex,
    },
    'robots.txt' => {
        content => "",
    },
    "p1_fran${ccedilla_l15}ais.html" => {
        content => $pagefrancais,
    },
    "p2_${eacute_l1}${eacute_l1}n.html" => {
        content => $pageeen,
    },
    "p3_${eurosign_l15}${eurosign_l15}${eurosign_l15}.html" => {
        content => $pageeuro,
    },
 );
 ###############################################################################
 my $the_test = HTTPTest->new (name => "Test-iri-disabled",
                              input => \%urls, 
                              cmdline => $cmdline, 
                              errcode => $expected_error_code, 
                              output => \%expected_downloaded_files);
 exit $the_test->run();
 # vim: et ts=4 sw=4
--- a/tests/Test-iri-forced-remote.px
+++ b/tests/Test-iri-forced-remote.px
@ -0,0 +1,207 @@
 #!/usr/bin/perl -w
 use strict;
 use HTTPTest;
 # cf. http://en.wikipedia.org/wiki/Latin1
 #     http://en.wikipedia.org/wiki/ISO-8859-15
 ###############################################################################
 # Force remote encoding to ISO-8859-1
 #
 # mime : charset found in Content-Type HTTP MIME header
 # meta : charset found in Content-Type meta tag
 #
 # index.html                  mime + file = iso-8859-15
 # p1_français.html            meta + file = iso-8859-1, mime = utf-8
 # p2_één.html                 mime + file = iso-8859-1
 # p3_€€€.html                 meta + file = utf-8, mime = iso-8859-1
 #
 my $ccedilla_l15 = "\xE7";
 my $ccedilla_u8 = "\xC3\xA7";
 my $eacute_l1 = "\xE9";
 my $eacute_u8 = "\xC3\xA9";
 my $eurosign_l15 = "\xA4";
 my $eurosign_u8 = "\xE2\x82\xAC";
 my $currency_l1 = "\xA4";
 my $currency_u8 = "\xC2\xA4";
 my $pageindex = <<EOF;
 <html>
 <head>
  <title>Main Page</title>
 </head>
 <body>
  <p>
    Link to page 1 <a href="http://localhost:{{port}}/p1_fran${ccedilla_l15}ais.html">La seule page en fran&ccedil;ais</a>.
    Link to page 3 <a href="http://localhost:{{port}}/p3_${eurosign_l15}${eurosign_l15}${eurosign_l15}.html">My tailor is rich</a>.
  </p>
 </body>
 </html>
 EOF
 my $pagefrancais = <<EOF;
 <html>
 <head>
  <title>La seule page en français</title>
  <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"/>
 </head>
 <body>
  <p>
    Link to page 2 <a href="http://localhost:{{port}}/p2_${eacute_l1}${eacute_l1}n.html">Die enkele nerderlangstalige pagina</a>.
  </p>
 </body>
 </html>
 EOF
 my $pageeen = <<EOF;
 <html>
 <head>
  <title>Die enkele nederlandstalige pagina</title>
 </head>
 <body>
  <p>
    &Eacute;&eacute;n is niet veel maar toch meer dan nul.<br/>
    Nerdelands is een mooie taal... dit zin stuckje spreekt vanzelf, of niet :)
  </p>
 </body>
 </html>
 EOF
 my $pageeuro = <<EOF;
 <html>
 <head>
  <title>Euro page</title>
 </head>
 <body>
  <p>
    My tailor isn't rich anymore.
  </p>
 </body>
 </html>
 EOF
 my $page404 = <<EOF;
 <html>
 <head>
  <title>404</title>
 </head>
 <body>
  <p>
    Nop nop nop...
  </p>
 </body>
 </html>
 EOF
 # code, msg, headers, content
 my %urls = (
    '/index.html' => {
        code => "200",
        msg => "Ok",
        headers => {
            "Content-type" => "text/html; charset=ISO-8859-15",
        },
        content => $pageindex,
    },
    '/robots.txt' => {
        code => "200",
        msg => "Ok",
        headers => {
            "Content-type" => "text/plain",
        },
        content => "",
    },
    '/p1_fran%C3%A7ais.html' => {	# UTF-8 encoded
        code => "404",
        msg => "File not found",
        headers => {
            "Content-type" => "text/html; charset=UTF-8",
        },
        content => $page404,
    },
    '/p1_fran%E7ais.html' => {
        code => "200",
        msg => "Ok",
        headers => {
            "Content-type" => "text/html; charset=UTF-8",
        },
        content => $pagefrancais,
    },
    '/p2_%C3%A9%C3%A9n.html' => {	# UTF-8 encoded
        code => "200",
        msg => "Ok",
        headers => {
            "Content-type" => "text/html; charset=UTF-8",
        },
        content => $pageeen,
    },
    '/p2_%E9%E9n.html' => {
        code => "200",
        msg => "Ok",
        headers => {
            "Content-type" => "text/html; charset=ISO-8859-1",
        },
        content => $pageeen,
    },
    '/p3_%E2%82%AC%E2%82%AC%E2%82%AC.html' => {	# UTF-8 encoded
        code => "200",
        msg => "Ok",
        headers => {
            "Content-type" => "text/plain",
        },
        content => $pageeuro,
    },
    '/p3_%A4%A4%A4.html' => {
        code => "200",
        msg => "Ok",
        headers => {
            "Content-type" => "text/plain",
        },
        content => $pageeuro,
    },
    '/p3_%C2%A4%C2%A4%C2%A4.html' => {	# UTF-8 encoded
        code => "200",
        msg => "Ok",
        headers => {
            "Content-type" => "text/plain",
        },
        content => $pageeuro,
    },
 );
 my $cmdline = $WgetTest::WGETPATH . " --iri --remote-encoding=iso-8859-1 -nH -r http://localhost:{{port}}/";
 my $expected_error_code = 0;
 my %expected_downloaded_files = (
    'index.html' => {
        content => $pageindex,
    },
    'robots.txt' => {
        content => "",
    },
    "p1_fran${ccedilla_l15}ais.html" => {
        content => $pagefrancais,
    },
    "p2_${eacute_u8}${eacute_u8}n.html" => {
        content => $pageeen,
    },
    "p3_${currency_u8}${currency_u8}${currency_u8}.html" => {
        content => $pageeuro,
    },
 );
 ###############################################################################
 my $the_test = HTTPTest->new (name => "Test-iri-forced-remote",
                              input => \%urls, 
                              cmdline => $cmdline, 
                              errcode => $expected_error_code, 
                              output => \%expected_downloaded_files);
 exit $the_test->run();
 # vim: et ts=4 sw=4
--- a/tests/Test-iri-list.px
+++ b/tests/Test-iri-list.px
@ -0,0 +1,173 @@
 #!/usr/bin/perl -w
 use strict;
 use HTTPTest;
 # cf. http://en.wikipedia.org/wiki/Latin1
 #     http://en.wikipedia.org/wiki/ISO-8859-15
 ###############################################################################
 #
 # mime : charset found in Content-Type HTTP MIME header
 # meta : charset found in Content-Type meta tag
 #
 # index.html                  mime + file = iso-8859-15
 # p1_français.html            meta + file = iso-8859-1, mime = utf-8
 # p2_één.html                 meta + file = utf-8, mime =iso-8859-1
 #
 my $ccedilla_l1 = "\xE7";
 my $ccedilla_u8 = "\xC3\xA7";
 my $eacute_l1 = "\xE9";
 my $eacute_u8 = "\xC3\xA9";
 my $urllist = <<EOF;
 http://localhost:{{port}}/
 http://localhost:{{port}}/p1_fran${ccedilla_l1}ais.html
 http://localhost:{{port}}/p2_${eacute_l1}${eacute_l1}n.html
 EOF
 my $pageindex = <<EOF;
 <html>
 <head>
  <title>Main Page</title>
 </head>
 <body>
  <p>
 	Main page.
  </p>
 </body>
 </html>
 EOF
 my $pagefrancais = <<EOF;
 <html>
 <head>
  <title>La seule page en français</title>
  <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"/>
 </head>
 <body>
  <p>
    French page.
  </p>
 </body>
 </html>
 EOF
 my $pageeen = <<EOF;
 <html>
 <head>
  <title>Die enkele nederlandstalige pagina</title>
  <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
 </head>
 <body>
  <p>
    Dutch page.
  </p>
 </body>
 </html>
 EOF
 my $page404 = <<EOF;
 <html>
 <head>
  <title>404</title>
 </head>
 <body>
  <p>
    Nop nop nop...
  </p>
 </body>
 </html>
 EOF
 # code, msg, headers, content
 my %urls = (
    '/index.html' => {
        code => "200",
        msg => "Ok",
        headers => {
            "Content-type" => "text/html; charset=ISO-8859-15",
        },
        content => $pageindex,
    },
    '/robots.txt' => {
        code => "200",
        msg => "Ok",
        headers => {
            "Content-type" => "text/plain",
        },
        content => "",
    },
    '/p1_fran%C3%A7ais.html' => {	# UTF-8 encoded
        code => "404",
        msg => "File not found",
        headers => {
            "Content-type" => "text/html; charset=UTF-8",
        },
        content => $page404,
    },
    '/p1_fran%E7ais.html' => {
        code => "200",
        msg => "Ok",
        headers => {
            "Content-type" => "text/html; charset=UTF-8",
        },
        content => $pagefrancais,
    },
    '/p2_%C3%A9%C3%A9n.html' => {	# UTF-8 encoded
        code => "200",
        msg => "Ok",
        headers => {
            "Content-type" => "text/html; charset=ISO-8859-1",
        },
        content => $pageeen,
    },
    '/p2_%E9%E9n.html' => {
        code => "200",
        msg => "Ok",
        headers => {
            "Content-type" => "text/html; charset=ISO-8859-1",
        },
        content => $pageeen,
    },
    '/url_list.txt' => {
        code => "200",
        msg => "Ok",
        headers => {
            "Content-type" => "text/plain; charset=ISO-8859-1",
        },
        content => $urllist,
    },
 );
 my $cmdline = $WgetTest::WGETPATH . " --iri -d -i http://localhost:{{port}}/url_list.txt";
 my $expected_error_code = 0;
 my %expected_downloaded_files = (
    'url_list.txt' => {
        content => $urllist,
    },
    'index.html' => {
        content => $pageindex,
    },
    "p1_fran${ccedilla_l1}ais.html" => {
        content => $pagefrancais,
    },
    "p2_${eacute_u8}${eacute_u8}n.html" => {
        content => $pageeen,
    },
 );
 ###############################################################################
 my $the_test = HTTPTest->new (name => "Test-iri-list",
                              input => \%urls, 
                              cmdline => $cmdline, 
                              errcode => $expected_error_code, 
                              output => \%expected_downloaded_files);
 exit $the_test->run();
 # vim: et ts=4 sw=4
--- a/tests/Test-iri.px
+++ b/tests/Test-iri.px
@ -0,0 +1,224 @@
 #!/usr/bin/perl -w
 use strict;
 use HTTPTest;
 # cf. http://en.wikipedia.org/wiki/Latin1
 #     http://en.wikipedia.org/wiki/ISO-8859-15
 ###############################################################################
 #
 # mime : charset found in Content-Type HTTP MIME header
 # meta : charset found in Content-Type meta tag
 #
 # index.html                  mime + file = iso-8859-15
 # p1_français.html            meta + file = iso-8859-1, mime = utf-8
 # p2_één.html                 meta + file = utf-8, mime =iso-8859-1
 # p3_€€€.html                 meta + file = utf-8, mime = iso-8859-1
 # p4_méér.html                mime + file = utf-8
 #
 my $ccedilla_l15 = "\xE7";
 my $ccedilla_u8 = "\xC3\xA7";
 my $eacute_l1 = "\xE9";
 my $eacute_u8 = "\xC3\xA9";
 my $eurosign_l15 = "\xA4";
 my $eurosign_u8 = "\xE2\x82\xAC";
 my $pageindex = <<EOF;
 <html>
 <head>
  <title>Main Page</title>
 </head>
 <body>
  <p>
    Link to page 1 <a href="http://localhost:{{port}}/p1_fran${ccedilla_l15}ais.html">La seule page en fran&ccedil;ais</a>.
    Link to page 3 <a href="http://localhost:{{port}}/p3_${eurosign_l15}${eurosign_l15}${eurosign_l15}.html">My tailor is rich</a>.
  </p>
 </body>
 </html>
 EOF
 my $pagefrancais = <<EOF;
 <html>
 <head>
  <title>La seule page en français</title>
  <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"/>
 </head>
 <body>
  <p>
    Link to page 2 <a href="http://localhost:{{port}}/p2_${eacute_l1}${eacute_l1}n.html">Die enkele nerderlangstalige pagina</a>.
  </p>
 </body>
 </html>
 EOF
 my $pageeen = <<EOF;
 <html>
 <head>
  <title>Die enkele nederlandstalige pagina</title>
  <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
 </head>
 <body>
  <p>
    &Eacute;&eacute;n is niet veel maar toch meer dan nul.<br/>
    Nerdelands is een mooie taal... dit zin stuckje spreekt vanzelf, of niet :)<br/>
    <a href="http://localhost:{{port}}/p4_m${eacute_u8}${eacute_u8}r.html">M&eacute&eacute;r</a>
  </p>
 </body>
 </html>
 EOF
 my $pageeuro = <<EOF;
 <html>
 <head>
  <title>Euro page</title>
  <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
 </head>
 <body>
  <p>
    My tailor isn't rich anymore.
  </p>
 </body>
 </html>
 EOF
 my $pagemeer = <<EOF;
 <html>
 <head>
  <title>Bekende supermarkt</title>
 </head>
 <body>
  <p>
    Ik ben toch niet gek !
  </p>
 </body>
 </html>
 EOF
 my $page404 = <<EOF;
 <html>
 <head>
  <title>404</title>
 </head>
 <body>
  <p>
    Nop nop nop...
  </p>
 </body>
 </html>
 EOF
 # code, msg, headers, content
 my %urls = (
    '/index.html' => {
        code => "200",
        msg => "Ok",
        headers => {
            "Content-type" => "text/html; charset=ISO-8859-15",
        },
        content => $pageindex,
    },
    '/robots.txt' => {
        code => "200",
        msg => "Ok",
        headers => {
            "Content-type" => "text/plain",
        },
        content => "",
    },
    '/p1_fran%C3%A7ais.html' => {	# UTF-8 encoded
        code => "404",
        msg => "File not found",
        headers => {
            "Content-type" => "text/html; charset=UTF-8",
        },
        content => $page404,
    },
    '/p1_fran%E7ais.html' => {
        code => "200",
        msg => "Ok",
        headers => {
            "Content-type" => "text/html; charset=UTF-8",
        },
        content => $pagefrancais,
    },
    '/p2_%C3%A9%C3%A9n.html' => {	# UTF-8 encoded
        code => "200",
        msg => "Ok",
        headers => {
            "Content-type" => "text/html; charset=ISO-8859-1",
        },
        content => $pageeen,
    },
    '/p2_%E9%E9n.html' => {
        code => "200",
        msg => "Ok",
        headers => {
            "Content-type" => "text/html; charset=ISO-8859-1",
        },
        content => $pageeen,
    },
    '/p3_%E2%82%AC%E2%82%AC%E2%82%AC.html' => {	# UTF-8 encoded
        code => "200",
        msg => "Ok",
        headers => {
            "Content-type" => "text/plain; charset=ISO-8859-1",
        },
        content => $pageeuro,
    },
    '/p3_%A4%A4%A4.html' => {
        code => "200",
        msg => "Ok",
        headers => {
            "Content-type" => "text/plain; charset=ISO-8859-1",
        },
        content => $pageeuro,
    },
    '/p4_m%C3%A9%C3%A9r.html' => {
        code => "200",
        msg => "Ok",
        headers => {
            "Content-type" => "text/plain; charset=UTF-8",
        },
        content => $pagemeer,
    },
 );
 my $cmdline = $WgetTest::WGETPATH . " --iri --restrict-file-names=nocontrol -nH -r http://localhost:{{port}}/";
 my $expected_error_code = 0;
 my %expected_downloaded_files = (
    'index.html' => {
        content => $pageindex,
    },
    'robots.txt' => {
        content => "",
    },
    "p1_fran${ccedilla_l15}ais.html" => {
        content => $pagefrancais,
    },
    "p2_${eacute_u8}${eacute_u8}n.html" => {
        content => $pageeen,
    },
    "p3_${eurosign_u8}${eurosign_u8}${eurosign_u8}.html" => {
        content => $pageeuro,
    },
    "p4_m${eacute_u8}${eacute_u8}r.html" => {
        content => $pagemeer,
    },
 );
 ###############################################################################
 my $the_test = HTTPTest->new (name => "Test-iri",
                              input => \%urls, 
                              cmdline => $cmdline, 
                              errcode => $expected_error_code, 
                              output => \%expected_downloaded_files);
 exit $the_test->run();
 # vim: et ts=4 sw=4
--- a/tests/run-px
+++ b/tests/run-px
@ -17,9 +17,16 @@ my @tests = (
    'Test-E-k-K.px',
    'Test-E-k.px',
    'Test-ftp.px',
    'Test-ftp-iri.px',
    'Test-ftp-iri-fallback.px',
    'Test-ftp-iri-disabled.px',
    'Test-HTTP-Content-Disposition-1.px',
    'Test-HTTP-Content-Disposition-2.px',
    'Test-HTTP-Content-Disposition.px',
    'Test-iri.px',
    'Test-iri-disabled.px',
    'Test-iri-forced-remote.px',
    'Test-iri-list.px',
    'Test-N-current.px',
    'Test-N-smaller.px',
    'Test-N-no-info.px',