1
0
mirror of https://github.com/moparisthebest/wget synced 2024-07-03 16:38:41 -04:00
wget/testenv/Test-reserved-chars.py
Ander Juaristi 8682c2612f Make sure Wget does not unescape reserved chars.
* testenv/Test-reserved-chars.py: New file.

* testenv/Makefile.am: Added new test Test-reserved-chars.py.

When following redirections, Wget should not unescape the reserved
characters that might appear in target URLs.
2015-05-12 21:24:11 +02:00

60 lines
1.4 KiB
Python
Executable File

#!/usr/bin/env python3
from sys import exit
from os import environ # to set LC_ALL
from test.http_test import HTTPTest
from misc.wget_file import WgetFile
"""
This test ensures that Wget keeps reserved characters in URLs in non-UTF-8 charsets.
"""
# This bug only happened with ASCII charset,
# so we need to set LC_ALL="C" in order to reproduce it.
environ["LC_ALL"] = "C"
TEST_NAME = "URLs with reserved characters"
######### File Definitions #########
RequestList = [
[
"HEAD /base.html",
"GET /base.html",
"GET /robots.txt",
"HEAD /a%2Bb.html",
"GET /a%2Bb.html"
]
]
A_File_Name = "base.html"
B_File_Name = "a%2Bb.html"
A_File = WgetFile (A_File_Name, "<a href=\"a%2Bb.html\">")
B_File = WgetFile (B_File_Name, "this is file B")
WGET_OPTIONS = " --spider -r"
WGET_URLS = [[A_File_Name]]
Files = [[A_File, B_File]]
ExpectedReturnCode = 0
ExpectedDownloadedFiles = []
######### Pre and Post Test Hooks #########
pre_test = {
"ServerFiles" : Files
}
test_options = {
"WgetCommands" : WGET_OPTIONS,
"Urls" : WGET_URLS
}
post_test = {
"ExpectedFiles" : ExpectedDownloadedFiles,
"ExpectedRetcode" : ExpectedReturnCode,
"FilesCrawled" : RequestList
}
err = HTTPTest (
name=TEST_NAME,
pre_hook=pre_test,
test_params=test_options,
post_hook=post_test
).begin ()
exit (err)