mirror of
https://github.com/moparisthebest/wget
synced 2024-07-03 16:38:41 -04:00
fixed IRI misbehaviour(s)
This commit is contained in:
parent
6fc11e46ec
commit
bc347cc36f
@ -1,5 +1,11 @@
|
|||||||
2014-10-16 Tim Ruehsen <tim.ruehsen@gmx.de>
|
2014-10-16 Tim Ruehsen <tim.ruehsen@gmx.de>
|
||||||
|
|
||||||
|
* url.c (url_parse): little code cleanup
|
||||||
|
* html-url.c (get_urls_html): HTTP header Content-Type charset preceeds http-equiv
|
||||||
|
* iri.c (do_conversion): moved iconv code completely into the function
|
||||||
|
* iri.c (do_conversion): call url_unescape to fix charset conversion
|
||||||
|
* iri.c (remote_to_utf8): use strcasecmp to compare encoding
|
||||||
|
|
||||||
* gnutls.c (ssl_connect_wget): do not use SSLv3 except explicitely requested
|
* gnutls.c (ssl_connect_wget): do not use SSLv3 except explicitely requested
|
||||||
* openssl.c (ssl_init): do not use SSLv3 except explicitely requested
|
* openssl.c (ssl_init): do not use SSLv3 except explicitely requested
|
||||||
|
|
||||||
|
@ -748,8 +748,9 @@ get_urls_html (const char *file, const char *url, bool *meta_disallow_follow,
|
|||||||
map_html_tags (fm->content, fm->length, collect_tags_mapper, &ctx, flags,
|
map_html_tags (fm->content, fm->length, collect_tags_mapper, &ctx, flags,
|
||||||
NULL, interesting_attributes);
|
NULL, interesting_attributes);
|
||||||
|
|
||||||
/* If meta charset isn't null, override content encoding */
|
/* Meta charset is only valid if there was no HTTP header Content-Type charset. */
|
||||||
if (iri && meta_charset)
|
/* This is true for HTTP 1.0 and 1.1. */
|
||||||
|
if (iri && !iri->content_encoding && meta_charset)
|
||||||
set_content_encoding (iri, meta_charset);
|
set_content_encoding (iri, meta_charset);
|
||||||
|
|
||||||
DEBUGP (("no-follow in %s: %d\n", file, ctx.nofollow));
|
DEBUGP (("no-follow in %s: %d\n", file, ctx.nofollow));
|
||||||
|
102
src/iri.c
102
src/iri.c
@ -38,15 +38,13 @@ as that of the covered work. */
|
|||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
|
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
|
#include "url.h"
|
||||||
|
|
||||||
/* RFC3987 section 3.1 mandates STD3 ASCII RULES */
|
/* RFC3987 section 3.1 mandates STD3 ASCII RULES */
|
||||||
#define IDNA_FLAGS IDNA_USE_STD3_ASCII_RULES
|
#define IDNA_FLAGS IDNA_USE_STD3_ASCII_RULES
|
||||||
|
|
||||||
/* Note: locale encoding is kept in options struct (opt.locale) */
|
/* Note: locale encoding is kept in options struct (opt.locale) */
|
||||||
|
|
||||||
static bool do_conversion (iconv_t cd, char *in, size_t inlen, char **out);
|
|
||||||
|
|
||||||
|
|
||||||
/* Given a string containing "charset=XXX", return the encoding if found,
|
/* Given a string containing "charset=XXX", return the encoding if found,
|
||||||
or NULL otherwise */
|
or NULL otherwise */
|
||||||
char *
|
char *
|
||||||
@ -110,52 +108,34 @@ check_encoding_name (char *encoding)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Try converting string str from locale to UTF-8. Return a new string
|
|
||||||
on success, or str on error or if conversion isn't needed. */
|
|
||||||
const char *
|
|
||||||
locale_to_utf8 (const char *str)
|
|
||||||
{
|
|
||||||
iconv_t l2u;
|
|
||||||
char *new;
|
|
||||||
|
|
||||||
/* That shouldn't happen, just in case */
|
|
||||||
if (!opt.locale)
|
|
||||||
{
|
|
||||||
logprintf (LOG_VERBOSE, _("locale_to_utf8: locale is unset\n"));
|
|
||||||
opt.locale = find_locale ();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!opt.locale || !strcasecmp (opt.locale, "utf-8"))
|
|
||||||
return str;
|
|
||||||
|
|
||||||
l2u = iconv_open ("UTF-8", opt.locale);
|
|
||||||
if (l2u == (iconv_t)(-1))
|
|
||||||
{
|
|
||||||
logprintf (LOG_VERBOSE, _("Conversion from %s to %s isn't supported\n"),
|
|
||||||
quote (opt.locale), quote ("UTF-8"));
|
|
||||||
return str;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (do_conversion (l2u, (char *) str, strlen ((char *) str), &new))
|
|
||||||
return (const char *) new;
|
|
||||||
|
|
||||||
return str;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Do the conversion according to the passed conversion descriptor cd. *out
|
/* Do the conversion according to the passed conversion descriptor cd. *out
|
||||||
will contain the transcoded string on success. *out content is
|
will contain the transcoded string on success. *out content is
|
||||||
unspecified otherwise. */
|
unspecified otherwise. */
|
||||||
static bool
|
static bool
|
||||||
do_conversion (iconv_t cd, char *in, size_t inlen, char **out)
|
do_conversion (const char *tocode, const char *fromcode, char *in, size_t inlen, char **out)
|
||||||
{
|
{
|
||||||
|
iconv_t cd;
|
||||||
/* sXXXav : hummm hard to guess... */
|
/* sXXXav : hummm hard to guess... */
|
||||||
size_t len, done, outlen = inlen * 2;
|
size_t len, done, outlen;
|
||||||
int invalid = 0, tooshort = 0;
|
int invalid = 0, tooshort = 0;
|
||||||
char *s;
|
char *s, *in_org, *in_save;
|
||||||
|
|
||||||
s = xmalloc (outlen + 1);
|
cd = iconv_open (tocode, fromcode);
|
||||||
*out = s;
|
if (cd == (iconv_t)(-1))
|
||||||
len = outlen;
|
{
|
||||||
|
logprintf (LOG_VERBOSE, _("Conversion from %s to %s isn't supported\n"),
|
||||||
|
quote (opt.locale), quote ("UTF-8"));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* iconv() has to work on an unescaped string */
|
||||||
|
in_org = in;
|
||||||
|
in_save = in = strndup(in, inlen);
|
||||||
|
url_unescape(in);
|
||||||
|
inlen = strlen(in);
|
||||||
|
|
||||||
|
len = outlen = inlen * 2;
|
||||||
|
*out = s = xmalloc (outlen + 1);
|
||||||
done = 0;
|
done = 0;
|
||||||
|
|
||||||
for (;;)
|
for (;;)
|
||||||
@ -164,6 +144,9 @@ do_conversion (iconv_t cd, char *in, size_t inlen, char **out)
|
|||||||
{
|
{
|
||||||
*out = s;
|
*out = s;
|
||||||
*(s + len - outlen - done) = '\0';
|
*(s + len - outlen - done) = '\0';
|
||||||
|
xfree(in_save);
|
||||||
|
iconv_close(cd);
|
||||||
|
logprintf (LOG_VERBOSE, _("converted '%s' (%s) -> '%s' (%s)\n"), in_org, fromcode, *out, tocode);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -202,9 +185,35 @@ do_conversion (iconv_t cd, char *in, size_t inlen, char **out)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
xfree(in_save);
|
||||||
|
iconv_close(cd);
|
||||||
|
logprintf (LOG_VERBOSE, _("converted '%s' (%s) -> '%s' (%s)\n"), in_org, fromcode, *out, tocode);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Try converting string str from locale to UTF-8. Return a new string
|
||||||
|
on success, or str on error or if conversion isn't needed. */
|
||||||
|
const char *
|
||||||
|
locale_to_utf8 (const char *str)
|
||||||
|
{
|
||||||
|
char *new;
|
||||||
|
|
||||||
|
/* That shouldn't happen, just in case */
|
||||||
|
if (!opt.locale)
|
||||||
|
{
|
||||||
|
logprintf (LOG_VERBOSE, _("locale_to_utf8: locale is unset\n"));
|
||||||
|
opt.locale = find_locale ();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!opt.locale || !strcasecmp (opt.locale, "utf-8"))
|
||||||
|
return str;
|
||||||
|
|
||||||
|
if (do_conversion ("UTF-8", opt.locale, (char *) str, strlen ((char *) str), &new))
|
||||||
|
return (const char *) new;
|
||||||
|
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
|
||||||
/* Try to "ASCII encode" UTF-8 host. Return the new domain on success or NULL
|
/* Try to "ASCII encode" UTF-8 host. Return the new domain on success or NULL
|
||||||
on error. */
|
on error. */
|
||||||
char *
|
char *
|
||||||
@ -258,7 +267,6 @@ idn_decode (char *host)
|
|||||||
bool
|
bool
|
||||||
remote_to_utf8 (struct iri *iri, const char *str, const char **new)
|
remote_to_utf8 (struct iri *iri, const char *str, const char **new)
|
||||||
{
|
{
|
||||||
iconv_t cd;
|
|
||||||
bool ret = false;
|
bool ret = false;
|
||||||
|
|
||||||
if (!iri->uri_encoding)
|
if (!iri->uri_encoding)
|
||||||
@ -267,7 +275,7 @@ remote_to_utf8 (struct iri *iri, const char *str, const char **new)
|
|||||||
/* When `i->uri_encoding' == "UTF-8" there is nothing to convert. But we must
|
/* When `i->uri_encoding' == "UTF-8" there is nothing to convert. But we must
|
||||||
test for non-ASCII symbols for correct hostname processing in `idn_encode'
|
test for non-ASCII symbols for correct hostname processing in `idn_encode'
|
||||||
function. */
|
function. */
|
||||||
if (!strcmp (iri->uri_encoding, "UTF-8"))
|
if (!strcasecmp (iri->uri_encoding, "UTF-8"))
|
||||||
{
|
{
|
||||||
const char *p = str;
|
const char *p = str;
|
||||||
for (p = str; *p; p++)
|
for (p = str; *p; p++)
|
||||||
@ -279,15 +287,9 @@ remote_to_utf8 (struct iri *iri, const char *str, const char **new)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
cd = iconv_open ("UTF-8", iri->uri_encoding);
|
if (do_conversion ("UTF-8", iri->uri_encoding, (char *) str, strlen (str), (char **) new))
|
||||||
if (cd == (iconv_t)(-1))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
if (do_conversion (cd, (char *) str, strlen ((char *) str), (char **) new))
|
|
||||||
ret = true;
|
ret = true;
|
||||||
|
|
||||||
iconv_close (cd);
|
|
||||||
|
|
||||||
/* Test if something was converted */
|
/* Test if something was converted */
|
||||||
if (!strcmp (str, *new))
|
if (!strcmp (str, *new))
|
||||||
{
|
{
|
||||||
|
19
src/url.c
19
src/url.c
@ -681,7 +681,6 @@ url_parse (const char *url, int *error, struct iri *iri, bool percent_encode)
|
|||||||
char *user = NULL, *passwd = NULL;
|
char *user = NULL, *passwd = NULL;
|
||||||
|
|
||||||
const char *url_encoded = NULL;
|
const char *url_encoded = NULL;
|
||||||
char *new_url = NULL;
|
|
||||||
|
|
||||||
int error_code;
|
int error_code;
|
||||||
|
|
||||||
@ -695,29 +694,29 @@ url_parse (const char *url, int *error, struct iri *iri, bool percent_encode)
|
|||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
url_encoded = url;
|
||||||
|
|
||||||
if (iri && iri->utf8_encode)
|
if (iri && iri->utf8_encode)
|
||||||
{
|
{
|
||||||
|
char *new_url = NULL;
|
||||||
|
|
||||||
iri->utf8_encode = remote_to_utf8 (iri, iri->orig_url ? iri->orig_url : url, (const char **) &new_url);
|
iri->utf8_encode = remote_to_utf8 (iri, iri->orig_url ? iri->orig_url : url, (const char **) &new_url);
|
||||||
if (!iri->utf8_encode)
|
if (!iri->utf8_encode)
|
||||||
new_url = NULL;
|
new_url = NULL;
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
iri->orig_url = xstrdup (url);
|
iri->orig_url = xstrdup (url);
|
||||||
percent_encode = true;
|
url_encoded = reencode_escapes (new_url);
|
||||||
|
if (url_encoded != new_url)
|
||||||
|
xfree (new_url);
|
||||||
|
percent_encode = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* XXX XXX Could that change introduce (security) bugs ??? XXX XXX*/
|
|
||||||
if (percent_encode)
|
if (percent_encode)
|
||||||
url_encoded = reencode_escapes (new_url ? new_url : url);
|
url_encoded = reencode_escapes (url);
|
||||||
else
|
|
||||||
url_encoded = new_url ? new_url : url;
|
|
||||||
|
|
||||||
p = url_encoded;
|
p = url_encoded;
|
||||||
|
|
||||||
if (new_url && url_encoded != new_url)
|
|
||||||
xfree (new_url);
|
|
||||||
|
|
||||||
p += strlen (supported_schemes[scheme].leading_string);
|
p += strlen (supported_schemes[scheme].leading_string);
|
||||||
uname_b = p;
|
uname_b = p;
|
||||||
p = url_skip_credentials (p);
|
p = url_skip_credentials (p);
|
||||||
|
@ -1,3 +1,10 @@
|
|||||||
|
2014-10-06 Tim Ruehsen <tim.ruehsen@gmx.de>
|
||||||
|
|
||||||
|
* Test-iri.px: fixed encodings
|
||||||
|
* Test-iri-forced-remote.px: fixed encodings
|
||||||
|
* Test-iri-percent.px: fixed encodings
|
||||||
|
* Test-idn-meta.px: fixed encodings
|
||||||
|
|
||||||
2014-10-02 Tim Ruehsen <tim.ruehsen@gmx.de>
|
2014-10-02 Tim Ruehsen <tim.ruehsen@gmx.de>
|
||||||
|
|
||||||
* tests/WgetTests.pm: use filename as default test name
|
* tests/WgetTests.pm: use filename as default test name
|
||||||
|
@ -27,7 +27,8 @@ my %urls = (
|
|||||||
code => "200",
|
code => "200",
|
||||||
msg => "You want fries with that?",
|
msg => "You want fries with that?",
|
||||||
headers => {
|
headers => {
|
||||||
'Content-Type' => 'text/html; charset=UTF-8',
|
# HTTP header preceeds http-equiv, simply just omit it here
|
||||||
|
#'Content-Type' => 'text/html; charset=UTF-8',
|
||||||
},
|
},
|
||||||
content => $starter_file,
|
content => $starter_file,
|
||||||
},
|
},
|
||||||
|
@ -48,7 +48,7 @@ my $pagefrancais = <<EOF;
|
|||||||
<html>
|
<html>
|
||||||
<head>
|
<head>
|
||||||
<title>La seule page en français</title>
|
<title>La seule page en français</title>
|
||||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"/>
|
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<p>
|
<p>
|
||||||
@ -117,17 +117,10 @@ my %urls = (
|
|||||||
content => "",
|
content => "",
|
||||||
},
|
},
|
||||||
'/p1_fran%C3%A7ais.html' => { # UTF-8 encoded
|
'/p1_fran%C3%A7ais.html' => { # UTF-8 encoded
|
||||||
code => "404",
|
|
||||||
msg => "File not found",
|
|
||||||
headers => {
|
|
||||||
"Content-type" => "text/html; charset=UTF-8",
|
|
||||||
},
|
|
||||||
content => $page404,
|
|
||||||
},
|
|
||||||
'/p1_fran%E7ais.html' => {
|
|
||||||
code => "200",
|
code => "200",
|
||||||
msg => "Ok",
|
msg => "Ok",
|
||||||
headers => {
|
headers => {
|
||||||
|
# wrong charset here, overridden by --remote-encoding=iso-8859-1
|
||||||
"Content-type" => "text/html; charset=UTF-8",
|
"Content-type" => "text/html; charset=UTF-8",
|
||||||
},
|
},
|
||||||
content => $pagefrancais,
|
content => $pagefrancais,
|
||||||
@ -140,14 +133,6 @@ my %urls = (
|
|||||||
},
|
},
|
||||||
content => $pageeen,
|
content => $pageeen,
|
||||||
},
|
},
|
||||||
'/p2_%E9%E9n.html' => {
|
|
||||||
code => "200",
|
|
||||||
msg => "Ok",
|
|
||||||
headers => {
|
|
||||||
"Content-type" => "text/html; charset=ISO-8859-1",
|
|
||||||
},
|
|
||||||
content => $pageeen,
|
|
||||||
},
|
|
||||||
'/p3_%E2%82%AC%E2%82%AC%E2%82%AC.html' => { # UTF-8 encoded
|
'/p3_%E2%82%AC%E2%82%AC%E2%82%AC.html' => { # UTF-8 encoded
|
||||||
code => "200",
|
code => "200",
|
||||||
msg => "Ok",
|
msg => "Ok",
|
||||||
@ -156,14 +141,6 @@ my %urls = (
|
|||||||
},
|
},
|
||||||
content => $pageeuro,
|
content => $pageeuro,
|
||||||
},
|
},
|
||||||
'/p3_%A4%A4%A4.html' => {
|
|
||||||
code => "200",
|
|
||||||
msg => "Ok",
|
|
||||||
headers => {
|
|
||||||
"Content-type" => "text/plain",
|
|
||||||
},
|
|
||||||
content => $pageeuro,
|
|
||||||
},
|
|
||||||
'/p3_%C2%A4%C2%A4%C2%A4.html' => { # UTF-8 encoded
|
'/p3_%C2%A4%C2%A4%C2%A4.html' => { # UTF-8 encoded
|
||||||
code => "200",
|
code => "200",
|
||||||
msg => "Ok",
|
msg => "Ok",
|
||||||
@ -174,7 +151,7 @@ my %urls = (
|
|||||||
},
|
},
|
||||||
);
|
);
|
||||||
|
|
||||||
my $cmdline = $WgetTest::WGETPATH . " --iri --trust-server-names --remote-encoding=iso-8859-1 -nH -r http://localhost:{{port}}/";
|
my $cmdline = $WgetTest::WGETPATH . " --iri -e robots=on --trust-server-names --remote-encoding=iso-8859-1 -nH -r http://localhost:{{port}}/";
|
||||||
|
|
||||||
my $expected_error_code = 0;
|
my $expected_error_code = 0;
|
||||||
|
|
||||||
@ -185,7 +162,7 @@ my %expected_downloaded_files = (
|
|||||||
'robots.txt' => {
|
'robots.txt' => {
|
||||||
content => "",
|
content => "",
|
||||||
},
|
},
|
||||||
"p1_fran${ccedilla_l15}ais.html" => {
|
"p1_fran${ccedilla_u8}ais.html" => {
|
||||||
content => $pagefrancais,
|
content => $pagefrancais,
|
||||||
},
|
},
|
||||||
"p2_${eacute_u8}${eacute_u8}n.html" => {
|
"p2_${eacute_u8}${eacute_u8}n.html" => {
|
||||||
|
@ -11,6 +11,8 @@ use HTTPTest;
|
|||||||
|
|
||||||
my $ccedilla_l15 = "\xE7";
|
my $ccedilla_l15 = "\xE7";
|
||||||
my $ccedilla_l15_pct = "%E7";
|
my $ccedilla_l15_pct = "%E7";
|
||||||
|
my $ccedilla_u8 = "\xC3\xA7";
|
||||||
|
my $ccedilla_u8_pct = "%C3%A7";
|
||||||
my $eacute_l1 = "\xE9";
|
my $eacute_l1 = "\xE9";
|
||||||
my $eacute_u8 = "\xC3\xA9";
|
my $eacute_u8 = "\xC3\xA9";
|
||||||
my $eacute_u8_pct = "%C3%A9";
|
my $eacute_u8_pct = "%C3%A9";
|
||||||
@ -52,7 +54,7 @@ my %urls = (
|
|||||||
},
|
},
|
||||||
content => $pageindex,
|
content => $pageindex,
|
||||||
},
|
},
|
||||||
"/hello_${ccedilla_l15_pct}${eacute_u8_pct}.html" => {
|
"/hello_${ccedilla_u8_pct}${eacute_u8_pct}.html" => {
|
||||||
code => "200",
|
code => "200",
|
||||||
msg => "Ok",
|
msg => "Ok",
|
||||||
headers => {
|
headers => {
|
||||||
@ -70,7 +72,7 @@ my %expected_downloaded_files = (
|
|||||||
'index.html' => {
|
'index.html' => {
|
||||||
content => $pageindex,
|
content => $pageindex,
|
||||||
},
|
},
|
||||||
"hello_${ccedilla_l15}${eacute_u8}.html" => {
|
"hello_${ccedilla_u8}${eacute_u8}.html" => {
|
||||||
content => $pagefrancais,
|
content => $pagefrancais,
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
|
@ -42,11 +42,12 @@ my $pageindex = <<EOF;
|
|||||||
</html>
|
</html>
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
|
# specifying a wrong charset in http-equiv - it will be overridden by Content-Type HTTP header
|
||||||
my $pagefrancais = <<EOF;
|
my $pagefrancais = <<EOF;
|
||||||
<html>
|
<html>
|
||||||
<head>
|
<head>
|
||||||
<title>La seule page en français</title>
|
<title>La seule page en français</title>
|
||||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"/>
|
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<p>
|
<p>
|
||||||
@ -131,18 +132,11 @@ my %urls = (
|
|||||||
content => "",
|
content => "",
|
||||||
},
|
},
|
||||||
'/p1_fran%C3%A7ais.html' => { # UTF-8 encoded
|
'/p1_fran%C3%A7ais.html' => { # UTF-8 encoded
|
||||||
code => "404",
|
|
||||||
msg => "File not found",
|
|
||||||
headers => {
|
|
||||||
"Content-type" => "text/html; charset=UTF-8",
|
|
||||||
},
|
|
||||||
content => $page404,
|
|
||||||
},
|
|
||||||
'/p1_fran%E7ais.html' => {
|
|
||||||
code => "200",
|
code => "200",
|
||||||
msg => "Ok",
|
msg => "Ok",
|
||||||
headers => {
|
headers => {
|
||||||
"Content-type" => "text/html; charset=UTF-8",
|
# Content-Type header overrides http-equiv Content-Type
|
||||||
|
"Content-type" => "text/html; charset=ISO-8859-15",
|
||||||
},
|
},
|
||||||
content => $pagefrancais,
|
content => $pagefrancais,
|
||||||
},
|
},
|
||||||
@ -150,10 +144,10 @@ my %urls = (
|
|||||||
code => "200",
|
code => "200",
|
||||||
msg => "Ok",
|
msg => "Ok",
|
||||||
request_headers => {
|
request_headers => {
|
||||||
"Referer" => qr|http://localhost:[0-9]+/p1_fran%E7ais.html|,
|
"Referer" => qr|http://localhost:[0-9]+/p1_fran%C3%A7ais.html|,
|
||||||
},
|
},
|
||||||
headers => {
|
headers => {
|
||||||
"Content-type" => "text/html; charset=ISO-8859-1",
|
"Content-type" => "text/html; charset=UTF-8",
|
||||||
},
|
},
|
||||||
content => $pageeen,
|
content => $pageeen,
|
||||||
},
|
},
|
||||||
@ -165,14 +159,6 @@ my %urls = (
|
|||||||
},
|
},
|
||||||
content => $pageeuro,
|
content => $pageeuro,
|
||||||
},
|
},
|
||||||
'/p3_%A4%A4%A4.html' => {
|
|
||||||
code => "200",
|
|
||||||
msg => "Ok",
|
|
||||||
headers => {
|
|
||||||
"Content-type" => "text/plain; charset=ISO-8859-1",
|
|
||||||
},
|
|
||||||
content => $pageeuro,
|
|
||||||
},
|
|
||||||
'/p4_m%C3%A9%C3%A9r.html' => {
|
'/p4_m%C3%A9%C3%A9r.html' => {
|
||||||
code => "200",
|
code => "200",
|
||||||
msg => "Ok",
|
msg => "Ok",
|
||||||
@ -197,7 +183,7 @@ my %expected_downloaded_files = (
|
|||||||
'robots.txt' => {
|
'robots.txt' => {
|
||||||
content => "",
|
content => "",
|
||||||
},
|
},
|
||||||
"p1_fran${ccedilla_l15}ais.html" => {
|
"p1_fran${ccedilla_u8}ais.html" => {
|
||||||
content => $pagefrancais,
|
content => $pagefrancais,
|
||||||
},
|
},
|
||||||
"p2_${eacute_u8}${eacute_u8}n.html" => {
|
"p2_${eacute_u8}${eacute_u8}n.html" => {
|
||||||
|
Loading…
Reference in New Issue
Block a user