1
0
mirror of https://github.com/moparisthebest/wget synced 2024-07-03 16:38:41 -04:00

Do not assume external links type to be always "text/html".

This commit is contained in:
Manfred Koizar 2010-10-18 10:55:16 +02:00 committed by Giuseppe Scrivano
parent 542c549985
commit cea1f0718d
2 changed files with 19 additions and 6 deletions

View File

@ -1,3 +1,8 @@
2010-10-18 Manfred Koizar <mkoi-pg@aon.at> (tiny change)
* html-url.c (tag_handle_link): Do not assume external links type
to be always "text/html".
2010-10-16 Giuseppe Scrivano <gscrivano@gnu.org> 2010-10-16 Giuseppe Scrivano <gscrivano@gnu.org>
* connect.c (socket_ip_address): Initialize `sockaddr' to zero. * connect.c (socket_ip_address): Initialize `sockaddr' to zero.

View File

@ -164,6 +164,7 @@ static struct {
to the attributes not mentioned here. We add them manually. */ to the attributes not mentioned here. We add them manually. */
static const char *additional_attributes[] = { static const char *additional_attributes[] = {
"rel", /* used by tag_handle_link */ "rel", /* used by tag_handle_link */
"type", /* used by tag_handle_link */
"http-equiv", /* used by tag_handle_meta */ "http-equiv", /* used by tag_handle_meta */
"name", /* used by tag_handle_meta */ "name", /* used by tag_handle_meta */
"content", /* used by tag_handle_meta */ "content", /* used by tag_handle_meta */
@ -505,8 +506,8 @@ tag_handle_link (int tagid, struct taginfo *tag, struct map_context *ctx)
/* All <link href="..."> link references are external, except those /* All <link href="..."> link references are external, except those
known not to be, such as style sheet and shortcut icon: known not to be, such as style sheet and shortcut icon:
<link rel="stylesheet" href="..."> <link rel="stylesheet" href="...">
<link rel="shortcut icon" href="..."> <link rel="shortcut icon" href="...">
*/ */
if (href) if (href)
{ {
@ -526,11 +527,18 @@ tag_handle_link (int tagid, struct taginfo *tag, struct map_context *ctx)
{ {
up->link_inline_p = 1; up->link_inline_p = 1;
} }
else
{
/* The external ones usually point to HTML pages, such as
<link rel="next" href="...">
except when the type attribute says otherwise:
<link rel="alternate" type="application/rss+xml" href=".../?feed=rss2" />
*/
char *type = find_attr (tag, "type", NULL);
if (!type || strcasecmp (type, "text/html") == 0)
up->link_expect_html = 1;
}
} }
else
/* The external ones usually point to HTML pages, such as
<link rel="next" href="..."> */
up->link_expect_html = 1;
} }
} }
} }