mirror of
https://github.com/moparisthebest/wget
synced 2024-07-03 16:38:41 -04:00
[svn] Use new function to test filename for common html suffixes.
Submitted by Ian Abbott in <3CB72D29.4898.1F34872@localhost> with minor changes to formatting and comments.
This commit is contained in:
parent
2a72eef0af
commit
cfd7b9a951
@ -1,3 +1,16 @@
|
||||
2002-04-12 Ian Abbott <abbotti@mev.co.uk>
|
||||
|
||||
* utils.c (has_html_suffix_p): New function to test filename for
|
||||
common html extensions.
|
||||
|
||||
* utils.h: Declare it.
|
||||
|
||||
* http.c (http_loop): Use it instead of previous test.
|
||||
|
||||
* retr.c (retrieve_url): Ditto.
|
||||
|
||||
* recur.c (download_child_p): Ditto.
|
||||
|
||||
2002-04-12 Hrvoje Niksic <hniksic@arsdigita.com>
|
||||
|
||||
* config.h.in: Define _VA_LIST on Solaris to prevent stdio.h from
|
||||
|
@ -1405,7 +1405,7 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
|
||||
int use_ts, got_head = 0; /* time-stamping info */
|
||||
char *filename_plus_orig_suffix;
|
||||
char *local_filename = NULL;
|
||||
char *tms, *suf, *locf, *tmrate;
|
||||
char *tms, *locf, *tmrate;
|
||||
uerr_t err;
|
||||
time_t tml = -1, tmr = -1; /* local and remote time-stamps */
|
||||
long local_size = 0; /* the size of the local file */
|
||||
@ -1465,9 +1465,8 @@ File `%s' already there, will not retrieve.\n"), *hstat.local_file);
|
||||
*dt |= RETROKF;
|
||||
|
||||
/* #### Bogusness alert. */
|
||||
/* If its suffix is "html" or "htm", assume text/html. */
|
||||
if (((suf = suffix (*hstat.local_file)) != NULL)
|
||||
&& (!strcmp (suf, "html") || !strcmp (suf, "htm")))
|
||||
/* If its suffix is "html" or "htm" or similar, assume text/html. */
|
||||
if (has_html_suffix_p (*hstat.local_file))
|
||||
*dt |= TEXTHTML;
|
||||
|
||||
FREE_MAYBE (dummy);
|
||||
|
@ -510,7 +510,6 @@ download_child_p (const struct urlpos *upos, struct url *parent, int depth,
|
||||
|
||||
/* 6. */
|
||||
{
|
||||
char *suf;
|
||||
/* Check for acceptance/rejection rules. We ignore these rules
|
||||
for HTML documents because they might lead to other files which
|
||||
need to be downloaded. Of course, we don't know which
|
||||
@ -521,14 +520,13 @@ download_child_p (const struct urlpos *upos, struct url *parent, int depth,
|
||||
* u->file is not "" (i.e. it is not a directory)
|
||||
and either:
|
||||
+ there is no file suffix,
|
||||
+ or there is a suffix, but is not "html" or "htm",
|
||||
+ or there is a suffix, but is not "html" or "htm" or similar,
|
||||
+ both:
|
||||
- recursion is not infinite,
|
||||
- and we are at its very end. */
|
||||
|
||||
if (u->file[0] != '\0'
|
||||
&& ((suf = suffix (url)) == NULL
|
||||
|| (0 != strcmp (suf, "html") && 0 != strcmp (suf, "htm"))
|
||||
&& (!has_html_suffix_p (url)
|
||||
|| (opt.reclevel != INFINITE_RECURSION && depth >= opt.reclevel)))
|
||||
{
|
||||
if (!acceptable (u->file))
|
||||
|
@ -384,12 +384,11 @@ retrieve_url (const char *origurl, char **file, char **newloc,
|
||||
|
||||
/* There is a possibility of having HTTP being redirected to
|
||||
FTP. In these cases we must decide whether the text is HTML
|
||||
according to the suffix. The HTML suffixes are `.html' and
|
||||
`.htm', case-insensitive. */
|
||||
according to the suffix. The HTML suffixes are `.html',
|
||||
`.htm' and a few others, case-insensitive. */
|
||||
if (redirection_count && local_file && u->scheme == SCHEME_FTP)
|
||||
{
|
||||
char *suf = suffix (local_file);
|
||||
if (suf && (!strcasecmp (suf, "html") || !strcasecmp (suf, "htm")))
|
||||
if (has_html_suffix_p (local_file))
|
||||
*dt |= TEXTHTML;
|
||||
}
|
||||
}
|
||||
|
25
src/utils.c
25
src/utils.c
@ -792,6 +792,31 @@ suffix (const char *str)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Return non-zero if FNAME ends with a typical HTML suffix. The
|
||||
following (case-insensitive) suffixes are presumed to be HTML files:
|
||||
|
||||
html
|
||||
htm
|
||||
?html (`?' matches one character)
|
||||
|
||||
#### CAVEAT. This is not necessarily a good indication that FNAME
|
||||
refers to a file that contains HTML! */
|
||||
int
|
||||
has_html_suffix_p (const char *fname)
|
||||
{
|
||||
char *suf;
|
||||
|
||||
if ((suf = suffix (fname)) == NULL)
|
||||
return 0;
|
||||
if (!strcasecmp (suf, "html"))
|
||||
return 1;
|
||||
if (!strcasecmp (suf, "htm"))
|
||||
return 1;
|
||||
if (suf[0] && !strcasecmp (suf + 1, "html"))
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Read a line from FP and return the pointer to freshly allocated
|
||||
storage. The stoarage space is obtained through malloc() and
|
||||
should be freed with free() when it is no longer needed.
|
||||
|
@ -70,6 +70,8 @@ int accdir PARAMS ((const char *s, enum accd));
|
||||
char *suffix PARAMS ((const char *s));
|
||||
int match_tail PARAMS ((const char *, const char *));
|
||||
|
||||
int has_html_suffix_p PARAMS ((const char *));
|
||||
|
||||
char *read_whole_line PARAMS ((FILE *));
|
||||
struct file_memory *read_file PARAMS ((const char *));
|
||||
void read_file_free PARAMS ((struct file_memory *));
|
||||
|
Loading…
Reference in New Issue
Block a user