1
0
mirror of https://github.com/moparisthebest/wget synced 2024-07-03 16:38:41 -04:00

Add "content-type" meta tag parsing for retrieving HTML page encoding.

This commit is contained in:
Saint Xavier 2008-06-19 22:33:38 +02:00
parent ed558a83f6
commit 13fec85566
3 changed files with 29 additions and 1 deletions

View File

@ -1,3 +1,11 @@
2008-06-19 Xavier Saint <wget@sxav.eu>
* html-url.c : Add "content-type" meta tag parsing for
retrieving page encoding.
* iri.h : Make no-op version of parse_charset() return
NULL.
2008-06-14 Xavier Saint <wget@sxav.eu>
* iri.c, iri.h : New files.

View File

@ -42,6 +42,7 @@ as that of the covered work. */
#include "hash.h"
#include "convert.h"
#include "recur.h" /* declaration of get_urls_html */
#include "iri.h"
struct map_context;
@ -534,6 +535,25 @@ tag_handle_meta (int tagid, struct taginfo *tag, struct map_context *ctx)
entry->link_expect_html = 1;
}
}
else if (http_equiv && 0 == strcasecmp (http_equiv, "content-type"))
{
/* Handle stuff like:
<meta http-equiv="Content-Type" content="text/html; charset=CHARSET"> */
char *mcharset;
char *content = find_attr (tag, "content", NULL);
if (!content)
return;
mcharset = parse_charset (content);
if (!mcharset)
return;
logprintf (LOG_VERBOSE, "Meta tag charset : %s\n", quote (mcharset));
/* sXXXav: Not used yet */
xfree (mcharset);
}
else if (name && 0 == strcasecmp (name, "robots"))
{
/* Handle stuff like:

View File

@ -37,7 +37,7 @@ char *parse_charset (char *str);
#else /* ENABLE_IRI */
#define parse_charset(str) /* no-op */
#define parse_charset(str) NULL
#endif /* ENABLE_IRI */
#endif /* IRI_H */