mirror of
https://github.com/moparisthebest/wget
synced 2024-07-03 16:38:41 -04:00
Add "content-type" meta tag parsing for retrieving HTML page encoding.
This commit is contained in:
parent
ed558a83f6
commit
13fec85566
@ -1,3 +1,11 @@
|
|||||||
|
2008-06-19 Xavier Saint <wget@sxav.eu>
|
||||||
|
|
||||||
|
* html-url.c : Add "content-type" meta tag parsing for
|
||||||
|
retrieving page encoding.
|
||||||
|
|
||||||
|
* iri.h : Make no-op version of parse_charset() return
|
||||||
|
NULL.
|
||||||
|
|
||||||
2008-06-14 Xavier Saint <wget@sxav.eu>
|
2008-06-14 Xavier Saint <wget@sxav.eu>
|
||||||
|
|
||||||
* iri.c, iri.h : New files.
|
* iri.c, iri.h : New files.
|
||||||
|
@ -42,6 +42,7 @@ as that of the covered work. */
|
|||||||
#include "hash.h"
|
#include "hash.h"
|
||||||
#include "convert.h"
|
#include "convert.h"
|
||||||
#include "recur.h" /* declaration of get_urls_html */
|
#include "recur.h" /* declaration of get_urls_html */
|
||||||
|
#include "iri.h"
|
||||||
|
|
||||||
struct map_context;
|
struct map_context;
|
||||||
|
|
||||||
@ -534,6 +535,25 @@ tag_handle_meta (int tagid, struct taginfo *tag, struct map_context *ctx)
|
|||||||
entry->link_expect_html = 1;
|
entry->link_expect_html = 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else if (http_equiv && 0 == strcasecmp (http_equiv, "content-type"))
|
||||||
|
{
|
||||||
|
/* Handle stuff like:
|
||||||
|
<meta http-equiv="Content-Type" content="text/html; charset=CHARSET"> */
|
||||||
|
|
||||||
|
char *mcharset;
|
||||||
|
char *content = find_attr (tag, "content", NULL);
|
||||||
|
if (!content)
|
||||||
|
return;
|
||||||
|
|
||||||
|
mcharset = parse_charset (content);
|
||||||
|
if (!mcharset)
|
||||||
|
return;
|
||||||
|
|
||||||
|
logprintf (LOG_VERBOSE, "Meta tag charset : %s\n", quote (mcharset));
|
||||||
|
|
||||||
|
/* sXXXav: Not used yet */
|
||||||
|
xfree (mcharset);
|
||||||
|
}
|
||||||
else if (name && 0 == strcasecmp (name, "robots"))
|
else if (name && 0 == strcasecmp (name, "robots"))
|
||||||
{
|
{
|
||||||
/* Handle stuff like:
|
/* Handle stuff like:
|
||||||
|
Loading…
Reference in New Issue
Block a user