From 8682c2612ff35062f1b2e20ec1b008db49b252a3 Mon Sep 17 00:00:00 2001 From: Ander Juaristi Date: Mon, 20 Apr 2015 23:16:18 +0200 Subject: [PATCH] Make sure Wget does not unescape reserved chars. * testenv/Test-reserved-chars.py: New file. * testenv/Makefile.am: Added new test Test-reserved-chars.py. When following redirections, Wget should not unescape the reserved characters that might appear in target URLs. --- testenv/Makefile.am | 3 +- testenv/Test-reserved-chars.py | 59 ++++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+), 1 deletion(-) create mode 100755 testenv/Test-reserved-chars.py diff --git a/testenv/Makefile.am b/testenv/Makefile.am index a4e03522..9acf0f34 100644 --- a/testenv/Makefile.am +++ b/testenv/Makefile.am @@ -53,7 +53,8 @@ if HAVE_PYTHON3 Test-Post.py \ Test-504.py \ Test--spider-r.py \ - Test-redirect-crash.py + Test-redirect-crash.py \ + Test-reserved-chars.py # added test cases expected to fail here and under TESTS XFAIL_TESTS = diff --git a/testenv/Test-reserved-chars.py b/testenv/Test-reserved-chars.py new file mode 100755 index 00000000..e5d33d07 --- /dev/null +++ b/testenv/Test-reserved-chars.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python3 +from sys import exit +from os import environ # to set LC_ALL +from test.http_test import HTTPTest +from misc.wget_file import WgetFile + +""" +This test ensures that Wget keeps reserved characters in URLs in non-UTF-8 charsets. +""" +# This bug only happened with ASCII charset, +# so we need to set LC_ALL="C" in order to reproduce it. +environ["LC_ALL"] = "C" + +TEST_NAME = "URLs with reserved characters" +######### File Definitions ######### +RequestList = [ + [ + "HEAD /base.html", + "GET /base.html", + "GET /robots.txt", + "HEAD /a%2Bb.html", + "GET /a%2Bb.html" + ] +] +A_File_Name = "base.html" +B_File_Name = "a%2Bb.html" +A_File = WgetFile (A_File_Name, "") +B_File = WgetFile (B_File_Name, "this is file B") + +WGET_OPTIONS = " --spider -r" +WGET_URLS = [[A_File_Name]] + +Files = [[A_File, B_File]] + +ExpectedReturnCode = 0 +ExpectedDownloadedFiles = [] + +######### Pre and Post Test Hooks ######### +pre_test = { + "ServerFiles" : Files +} +test_options = { + "WgetCommands" : WGET_OPTIONS, + "Urls" : WGET_URLS +} +post_test = { + "ExpectedFiles" : ExpectedDownloadedFiles, + "ExpectedRetcode" : ExpectedReturnCode, + "FilesCrawled" : RequestList +} + +err = HTTPTest ( + name=TEST_NAME, + pre_hook=pre_test, + test_params=test_options, + post_hook=post_test +).begin () + +exit (err)