mirror of
https://github.com/moparisthebest/wget
synced 2024-07-03 16:38:41 -04:00
Add "content-type" meta tag parsing for retrieving HTML page encoding.
This commit is contained in:
parent
ed558a83f6
commit
13fec85566
@ -1,3 +1,11 @@
|
||||
2008-06-19 Xavier Saint <wget@sxav.eu>
|
||||
|
||||
* html-url.c : Add "content-type" meta tag parsing for
|
||||
retrieving page encoding.
|
||||
|
||||
* iri.h : Make no-op version of parse_charset() return
|
||||
NULL.
|
||||
|
||||
2008-06-14 Xavier Saint <wget@sxav.eu>
|
||||
|
||||
* iri.c, iri.h : New files.
|
||||
|
@ -42,6 +42,7 @@ as that of the covered work. */
|
||||
#include "hash.h"
|
||||
#include "convert.h"
|
||||
#include "recur.h" /* declaration of get_urls_html */
|
||||
#include "iri.h"
|
||||
|
||||
struct map_context;
|
||||
|
||||
@ -534,6 +535,25 @@ tag_handle_meta (int tagid, struct taginfo *tag, struct map_context *ctx)
|
||||
entry->link_expect_html = 1;
|
||||
}
|
||||
}
|
||||
else if (http_equiv && 0 == strcasecmp (http_equiv, "content-type"))
|
||||
{
|
||||
/* Handle stuff like:
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=CHARSET"> */
|
||||
|
||||
char *mcharset;
|
||||
char *content = find_attr (tag, "content", NULL);
|
||||
if (!content)
|
||||
return;
|
||||
|
||||
mcharset = parse_charset (content);
|
||||
if (!mcharset)
|
||||
return;
|
||||
|
||||
logprintf (LOG_VERBOSE, "Meta tag charset : %s\n", quote (mcharset));
|
||||
|
||||
/* sXXXav: Not used yet */
|
||||
xfree (mcharset);
|
||||
}
|
||||
else if (name && 0 == strcasecmp (name, "robots"))
|
||||
{
|
||||
/* Handle stuff like:
|
||||
|
Loading…
Reference in New Issue
Block a user