Ted Mielczarek's CSS wonder-patch, applied against the source from around the time the patch was written.

2024-07-03 16:38:41 -04:00 · 2008-04-22 00:15:48 -07:00 · 2008-04-22 00:15:48 -07:00 · a0d0f332d5
commit a0d0f332d5
parent 3f51773542
17 changed files with 970 additions and 128 deletions
--- a/configure.in
+++ b/configure.in
@ -115,6 +115,9 @@ test -z "$CC" && cc_specified=yes
 AC_PROG_CC
 AC_AIX
 YYTEXT_POINTER=1
 AC_PROG_LEX
 dnl Turn on optimization by default.  Specifically:
 dnl
 dnl if the user hasn't specified CFLAGS, then
--- a/src/Makefile.in
+++ b/src/Makefile.in
@ -54,6 +54,7 @@ CFLAGS   = @CFLAGS@
 LDFLAGS  = @LDFLAGS@ 
 LIBS     = @LIBS@ @LIBSSL@ @LIBGNUTLS@
 exeext   = @exeext@
 LEX      = @LEX@
 INCLUDES = -I. -I$(srcdir)
@ -72,12 +73,12 @@ NTLM_OBJ   = @NTLM_OBJ@
 SSL_OBJ    = @SSL_OBJ@
 GETOPT_OBJ = @GETOPT_OBJ@
-OBJ = $(ALLOCA) cmpt.o connect.o convert.o cookies.o              \
+OBJ = $(ALLOCA) cmpt.o connect.o convert.o cookies.o css-url.o     \
-      ftp.o ftp-basic.o ftp-ls.o $(OPIE_OBJ) $(GETOPT_OBJ) hash.o \
+      ftp.o ftp-basic.o ftp-ls.o $(OPIE_OBJ) $(GETOPT_OBJ) hash.o  \
-      host.o html-parse.o html-url.o http.o $(NTLM_OBJ) init.o    \
+      host.o html-parse.o html-url.o http.o $(NTLM_OBJ) init.o     \
-      log.o main.o $(MD5_OBJ) netrc.o progress.o ptimer.o recur.o \
+      lex.yy.o log.o main.o $(MD5_OBJ) netrc.o progress.o ptimer.o \
-      res.o retr.o safe-ctype.o snprintf.o spider.o $(SSL_OBJ)    \
+      recur.o res.o retr.o safe-ctype.o snprintf.o spider.o        \
-      url.o utils.o version.o xmalloc.o
+      $(SSL_OBJ) url.o utils.o version.o xmalloc.o
 .SUFFIXES:
 .SUFFIXES: .c .o
@ -90,16 +91,19 @@ OBJ = $(ALLOCA) cmpt.o connect.o convert.o cookies.o              \
 wget$(exeext): $(OBJ)
 	$(LINK) $(OBJ) $(LIBS)
 lex.yy.c: css.lex
 	$(LEX) $<
 # We make object files depend on every header.  Rather than attempt to
 # track dependencies, everything gets recompiled when a header
 # changes.  With a program of Wget's size this doesn't waste much
 # time, and it's a lot safer than attempting to get all the
 # dependencies right.
-$(OBJ): config-post.h config.h connect.h convert.h cookies.h ftp.h \
+$(OBJ): config-post.h config.h connect.h convert.h cookies.h css-url.h \
-        gen-md5.h getopt.h gnu-md5.h hash.h host.h html-parse.h    \
+        ftp.h gen-md5.h getopt.h gnu-md5.h hash.h host.h html-parse.h  \
-        http-ntlm.h init.h log.h mswindows.h netrc.h options.h     \
+        http-ntlm.h init.h log.h mswindows.h netrc.h options.h         \
-        progress.h ptimer.h recur.h res.h retr.h safe-ctype.h      \
+        progress.h ptimer.h recur.h res.h retr.h safe-ctype.h          \
        spider.h ssl.h sysdep.h url.h utils.h wget.h xmalloc.h
 #
@ -122,7 +126,7 @@ uninstall.bin:
 #
 clean:
-	$(RM) *.o wget$(exeext) *~ *.bak core core.[0-9]*
+	$(RM) *.o wget$(exeext) *~ *.bak core core.[0-9]* lex.yy.c
 distclean: clean
 	$(RM) Makefile config.h
--- a/src/convert.c
+++ b/src/convert.c
@ -46,50 +46,37 @@ so, delete this exception statement from your version.  */
 #include "hash.h"
 #include "ptimer.h"
 #include "res.h"
 #include "html-url.h"
 #include "css-url.h"
 static struct hash_table *dl_file_url_map;
 struct hash_table *dl_url_file_map;
-/* Set of HTML files downloaded in this Wget run, used for link
+/* Set of HTML/CSS files downloaded in this Wget run, used for link
   conversion after Wget is done.  */
 struct hash_table *downloaded_html_set;
 struct hash_table *downloaded_css_set;
 static void convert_links (const char *, struct urlpos *);
 /* This function is called when the retrieval is done to convert the
   links that have been downloaded.  It has to be called at the end of
   the retrieval, because only then does Wget know conclusively which
   URLs have been downloaded, and which not, so it can tell which
   direction to convert to.
   The "direction" means that the URLs to the files that have been
   downloaded get converted to the relative URL which will point to
   that file.  And the other URLs get converted to the remote URL on
   the server.
   All the downloaded HTMLs are kept in downloaded_html_files, and
   downloaded URLs in urls_downloaded.  All the information is
   extracted from these two lists.  */
 void
-convert_all_links (void)
+convert_links_in_hashtable (struct hash_table *downloaded_set,
                            int is_css,
                            int *file_count)
 {
  int i;
  double secs;
  int file_count = 0;
  struct ptimer *timer = ptimer_new ();
  int cnt;
  char **file_array;
  cnt = 0;
-  if (downloaded_html_set)
+  if (downloaded_set)
-    cnt = hash_table_count (downloaded_html_set);
+    cnt = hash_table_count (downloaded_set);
  if (cnt == 0)
    return;
  file_array = alloca_array (char *, cnt);
-  string_set_to_array (downloaded_html_set, file_array);
+  string_set_to_array (downloaded_set, file_array);
  for (i = 0; i < cnt; i++)
    {
@ -97,7 +84,7 @@ convert_all_links (void)
      char *url;
      char *file = file_array[i];
-      /* Determine the URL of the HTML file.  get_urls_html will need
+      /* Determine the URL of the file.  get_urls_{html,css} will need
         it.  */
      url = hash_table_get (dl_file_url_map, file);
      if (!url)
@ -108,8 +95,9 @@ convert_all_links (void)
      DEBUGP (("Scanning %s (from %s)\n", file, url));
-      /* Parse the HTML file...  */
+      /* Parse the file...  */
-      urls = get_urls_html (file, url, NULL);
+      urls = is_css ? get_urls_css_file (file, url) :
                      get_urls_html (file, url, NULL);
      /* We don't respect meta_disallow_follow here because, even if
         the file is not followed, we might still want to convert the
@ -161,11 +149,38 @@ convert_all_links (void)
      /* Convert the links in the file.  */
      convert_links (file, urls);
-      ++file_count;
+      ++*file_count;
      /* Free the data.  */
      free_urlpos (urls);
    }
 }
 /* This function is called when the retrieval is done to convert the
   links that have been downloaded.  It has to be called at the end of
   the retrieval, because only then does Wget know conclusively which
   URLs have been downloaded, and which not, so it can tell which
   direction to convert to.
   The "direction" means that the URLs to the files that have been
   downloaded get converted to the relative URL which will point to
   that file.  And the other URLs get converted to the remote URL on
   the server.
   All the downloaded HTMLs are kept in downloaded_html_files, and
   downloaded URLs in urls_downloaded.  All the information is
   extracted from these two lists.  */
 void
 convert_all_links (void)
 {
  double secs;
  int file_count = 0;
  struct ptimer *timer = ptimer_new ();
  convert_links_in_hashtable (downloaded_html_set, 0, &file_count);
  convert_links_in_hashtable (downloaded_css_set, 1, &file_count);
  secs = ptimer_measure (timer);
  ptimer_destroy (timer);
@ -174,13 +189,14 @@ convert_all_links (void)
 }
 static void write_backup_file (const char *, downloaded_file_t);
 static const char *replace_plain (const char*, int, FILE*, const char *);
 static const char *replace_attr (const char *, int, FILE *, const char *);
 static const char *replace_attr_refresh_hack (const char *, int, FILE *,
                                              const char *, int);
 static char *local_quote_string (const char *);
 static char *construct_relative (const char *, const char *);
-/* Change the links in one HTML file.  LINKS is a list of links in the
+/* Change the links in one file.  LINKS is a list of links in the
   document, along with their positions and the desired direction of
   the conversion.  */
 static void
@ -277,7 +293,9 @@ convert_links (const char *file, struct urlpos *links)
            char *newname = construct_relative (file, link->local_name);
            char *quoted_newname = local_quote_string (newname);
-            if (!link->link_refresh_p)
+            if (link->link_css_p)
              p = replace_plain (p, link->size, fp, quoted_newname);
            else if (!link->link_refresh_p)
              p = replace_attr (p, link->size, fp, quoted_newname);
            else
              p = replace_attr_refresh_hack (p, link->size, fp, quoted_newname,
@ -296,7 +314,9 @@ convert_links (const char *file, struct urlpos *links)
            char *newlink = link->url->url;
            char *quoted_newlink = html_quote_string (newlink);
-            if (!link->link_refresh_p)
+            if (link->link_css_p)
              p = replace_plain (p, link->size, fp, quoted_newlink);
            else if (!link->link_refresh_p)
              p = replace_attr (p, link->size, fp, quoted_newlink);
            else
              p = replace_attr_refresh_hack (p, link->size, fp, quoted_newlink,
@ -406,6 +426,7 @@ write_backup_file (const char *file, downloaded_file_t downloaded_file_return)
  size_t         filename_len = strlen (file);
  char*          filename_plus_orig_suffix;
  /* TODO: hack this to work with css files */
  if (downloaded_file_return == FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED)
    {
      /* Just write "orig" over "html".  We need to do it this way
@ -465,6 +486,15 @@ write_backup_file (const char *file, downloaded_file_t downloaded_file_return)
 static bool find_fragment (const char *, int, const char **, const char **);
 /* Replace a string with NEW_TEXT.  Ignore quoting. */
 static const char *
 replace_plain (const char *p, int size, FILE *fp, const char *new_text)
 {
  fputs (new_text, fp);
  p += size;
  return p;
 }
 /* Replace an attribute's original text with NEW_TEXT. */
 static const char *
@ -832,6 +862,16 @@ register_html (const char *url, const char *file)
  string_set_add (downloaded_html_set, file);
 }
 /* Register that FILE is a CSS file that has been downloaded. */
 void
 register_css (const char *url, const char *file)
 {
  if (!downloaded_css_set)
    downloaded_css_set = make_string_hash_table (0);
  string_set_add (downloaded_css_set, file);
 }
 static void downloaded_files_free (void);
 /* Cleanup the data structures associated with this file.  */
--- a/src/convert.h
+++ b/src/convert.h
@ -33,6 +33,7 @@ so, delete this exception statement from your version.  */
 struct hash_table;		/* forward decl */
 extern struct hash_table *dl_url_file_map;
 extern struct hash_table *downloaded_html_set;
 extern struct hash_table *downloaded_css_set;
 enum convert_options {
  CO_NOCONVERT = 0,		/* don't convert this URL */
@ -64,7 +65,9 @@ struct urlpos {
  unsigned int link_complete_p	:1; /* the link was complete (had host name) */
  unsigned int link_base_p	:1; /* the url came from <base href=...> */
  unsigned int link_inline_p	:1; /* needed to render the page */
  unsigned int link_css_p	:1; /* the url came from CSS */
  unsigned int link_expect_html	:1; /* expected to contain HTML */
  unsigned int link_expect_css	:1; /* expected to contain CSS */
  unsigned int link_refresh_p	:1; /* link was received from
 				       <meta http-equiv=refresh content=...> */
@ -98,6 +101,7 @@ downloaded_file_t downloaded_file (downloaded_file_t, const char *);
 void register_download (const char *, const char *);
 void register_redirection (const char *, const char *);
 void register_html (const char *, const char *);
 void register_css (const char *, const char *);
 void register_delete_file (const char *);
 void convert_all_links (void);
 void convert_cleanup (void);
--- a/src/css-tokens.h
+++ b/src/css-tokens.h
@ -0,0 +1,66 @@
 /* Declarations for css.lex
   Copyright (C) 2006 Free Software Foundation, Inc.
 This file is part of GNU Wget.
 GNU Wget is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 2 of the License, or
 (at your option) any later version.
 GNU Wget is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 You should have received a copy of the GNU General Public License
 along with Wget; if not, write to the Free Software
 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 In addition, as a special exception, the Free Software Foundation
 gives permission to link the code of its release of Wget with the
 OpenSSL project's "OpenSSL" library (or with modified versions of it
 that use the same license as the "OpenSSL" library), and distribute
 the linked executables.  You must obey the GNU General Public License
 in all respects for all of the code used other than "OpenSSL".  If you
 modify this file, you may extend this exception to your version of the
 file, but you are not obligated to do so.  If you do not wish to do
 so, delete this exception statement from your version.  */
 #ifndef CSS_TOKENS_H
 #define CSS_TOKENS_H
 enum {
  CSSEOF,
  S,
  CDO,
  CDC,
  INCLUDES,
  DASHMATCH,
  LBRACE,
  PLUS,
  GREATER,
  COMMA,
  STRING,
  INVALID,
  IDENT,
  HASH,
  IMPORT_SYM,
  PAGE_SYM,
  MEDIA_SYM,
  CHARSET_SYM,
  IMPORTANT_SYM,
  EMS,
  EXS,
  LENGTH,
  ANGLE,
  TIME,
  FREQ,
  DIMENSION,
  PERCENTAGE,
  NUMBER,
  URI,
  FUNCTION
 } css_tokens;
 #endif /* CSS_TOKENS_H */
--- a/src/css-url.c
+++ b/src/css-url.c
@ -0,0 +1,273 @@
 /* Collect URLs from CSS source.
   Copyright (C) 1998, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
 This file is part of GNU Wget.
 GNU Wget is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 2 of the License, or
 (at your option) any later version.
 GNU Wget is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 You should have received a copy of the GNU General Public License
 along with Wget; if not, write to the Free Software
 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 In addition, as a special exception, the Free Software Foundation
 gives permission to link the code of its release of Wget with the
 OpenSSL project's "OpenSSL" library (or with modified versions of it
 that use the same license as the "OpenSSL" library), and distribute
 the linked executables.  You must obey the GNU General Public License
 in all respects for all of the code used other than "OpenSSL".  If you
 modify this file, you may extend this exception to your version of the
 file, but you are not obligated to do so.  If you do not wish to do
 so, delete this exception statement from your version.  */
 /*
  Note that this is not an actual CSS parser, but just a lexical
  scanner with a tiny bit more smarts bolted on top.  A full parser
  is somewhat overkill for this job.  The only things we're interested
  in are @import rules and url() tokens, so it's easy enough to
  grab those without truly understanding the input.  The only downside
  to this is that we might be coerced into downloading files that
  a browser would ignore.  That might merit some more investigation.
 */
 #include <config.h>
 #include <stdio.h>
 #ifdef HAVE_STRING_H
 # include <string.h>
 #else
 # include <strings.h>
 #endif
 #include <stdlib.h>
 #include <ctype.h>
 #include <errno.h>
 #include <assert.h>
 #include "wget.h"
 #include "utils.h"
 #include "convert.h"
 #include "html-url.h"
 #include "css-tokens.h"
 /* from lex.yy.c */
 extern char *yytext;
 extern int yyleng;
 typedef struct yy_buffer_state *YY_BUFFER_STATE;
 extern YY_BUFFER_STATE yy_scan_bytes (const char *bytes,int len  );
 extern int yylex (void);
 #if 1
 const char *token_names[] = {
  "CSSEOF",
  "S",
  "CDO",
  "CDC",
  "INCLUDES",
  "DASHMATCH",
  "LBRACE",
  "PLUS",
  "GREATER",
  "COMMA",
  "STRING",
  "INVALID",
  "IDENT",
  "HASH",
  "IMPORT_SYM",
  "PAGE_SYM",
  "MEDIA_SYM",
  "CHARSET_SYM",
  "IMPORTANT_SYM",
  "EMS",
  "EXS",
  "LENGTH",
  "ANGLE",
  "TIME",
  "FREQ",
  "DIMENSION",
  "PERCENTAGE",
  "NUMBER",
  "URI",
  "FUNCTION"
 };
 #endif
 /*
  Given a detected URI token, get only the URI specified within.
  Also adjust the starting position and length of the string.
  A URI can be specified with or without quotes, and the quotes
  can be single or double quotes.  In addition there can be
  whitespace after the opening parenthesis and before the closing
  parenthesis.
 */
 char *
 get_uri_string (const char *at, int *pos, int *length)
 {
  char *uri;
  /*char buf[1024];
  strncpy(buf,at + *pos, *length);
  buf[*length] = '\0';
  DEBUGP (("get_uri_string: \"%s\"\n", buf));*/
  if (0 != strncasecmp (at + *pos, "url(", 4))
    return NULL;
  *pos += 4;
  *length -= 5; /* url() */
  /* skip leading space */
  while (isspace (at[*pos]))
    {
    (*pos)++;
    (*length)--;
    }
  /* skip trailing space */
  while (isspace (at[*pos + *length - 1]))
    {
      (*length)--;
    }
  /* trim off quotes */
  if (at[*pos] == '\'' || at[*pos] == '"')
    {
      (*pos)++;
      *length -= 2;
    }
  uri = xmalloc (*length + 1);
  if (uri)
    {
      strncpy (uri, at + *pos, *length);
      uri[*length] = '\0';      
    }
  return uri;
 }
 void
 get_urls_css (struct map_context *ctx, int offset, int buf_length)
 {
  int token;
  /*char tmp[2048];*/
  int buffer_pos = 0;
  int pos, length;
  char *uri;
  /*
  strncpy(tmp,ctx->text + offset, buf_length);
  tmp[buf_length] = '\0';
  DEBUGP (("get_urls_css: \"%s\"\n", tmp));
  */
  /* tell flex to scan from this buffer */
  yy_scan_bytes (ctx->text + offset, buf_length);
  while((token = yylex()) != CSSEOF)
    {
      /*DEBUGP (("%s ", token_names[token]));*/
      /* @import "foo.css"
         or @import url(foo.css)
      */
      if(token == IMPORT_SYM)
        {
          do {
            buffer_pos += yyleng;
          } while((token = yylex()) == S);
          /*DEBUGP (("%s ", token_names[token]));*/
          if (token == STRING || token == URI)
            {
              /*DEBUGP (("Got URI "));*/
              pos = buffer_pos + offset;
              length = yyleng;
              if (token == URI)
                {
                  uri = get_uri_string (ctx->text, &pos, &length);
                }
              else
                {
                  /* cut out quote characters */
                  pos++;
                  length -= 2;
                  uri = xmalloc (length + 1);
                  strncpy (uri, yytext + 1, length);
                  uri[length] = '\0';
                }
              if (uri)
                {
                  struct urlpos *up = append_url (uri, pos, length, ctx);
                  DEBUGP (("Found @import: [%s] at %d [%s]\n", yytext, buffer_pos, uri));
                  if (up)
                    {
                      up->link_inline_p = 1;
                      up->link_css_p = 1;
                      up->link_expect_css = 1;
                    }
                  xfree(uri);
                }
            }
        }
      /* background-image: url(foo.png)
         note that we don't care what
         property this is actually on.
      */
      else if(token == URI)
        {
          pos = buffer_pos + offset;
          length = yyleng;
          uri = get_uri_string (ctx->text, &pos, &length);
          if (uri)
            {
              struct urlpos *up = append_url (uri, pos, length, ctx);
              DEBUGP (("Found URI: [%s] at %d [%s]\n", yytext, buffer_pos, uri));
              if (up)
                {
                  up->link_inline_p = 1;
                  up->link_css_p = 1;
                }
              xfree (uri);
            }
        }
      buffer_pos += yyleng;
    }
  DEBUGP (("\n"));
 }
 struct urlpos *
 get_urls_css_file (const char *file, const char *url)
 {
  struct file_memory *fm;
  struct map_context ctx;
  /* Load the file. */
  fm = read_file (file);
  if (!fm)
    {
      logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
      return NULL;
    }
  DEBUGP (("Loaded %s (size %s).\n", file, number_to_static_string (fm->length)));
  ctx.text = fm->content;
  ctx.head = ctx.tail = NULL;
  ctx.base = NULL;
  ctx.parent_base = url ? url : opt.base_href;
  ctx.document_file = file;
  ctx.nofollow = 0;
  get_urls_css (&ctx, 0, fm->length);
  read_file_free (fm);
  return ctx.head;
 }
--- a/src/css-url.h
+++ b/src/css-url.h
@ -0,0 +1,36 @@
 /* Declarations for css-url.c.
   Copyright (C) 2006 Free Software Foundation, Inc.
 This file is part of GNU Wget.
 GNU Wget is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 2 of the License, or
 (at your option) any later version.
 GNU Wget is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 You should have received a copy of the GNU General Public License
 along with Wget; if not, write to the Free Software
 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 In addition, as a special exception, the Free Software Foundation
 gives permission to link the code of its release of Wget with the
 OpenSSL project's "OpenSSL" library (or with modified versions of it
 that use the same license as the "OpenSSL" library), and distribute
 the linked executables.  You must obey the GNU General Public License
 in all respects for all of the code used other than "OpenSSL".  If you
 modify this file, you may extend this exception to your version of the
 file, but you are not obligated to do so.  If you do not wish to do
 so, delete this exception statement from your version.  */
 #ifndef CSS_URL_H
 #define CSS_URL_H
 void get_urls_css (struct map_context *, int, int);
 struct urlpos *get_urls_css_file (const char *, const char *);
 #endif /* CSS_URL_H */
--- a/src/css.lex
+++ b/src/css.lex
@ -0,0 +1,137 @@
 %option case-insensitive
 %option noyywrap
 %option never-interactive
 %{
 /* Lex source for CSS tokenizing.
   Taken from http://www.w3.org/TR/CSS21/grammar.html#q2
   Copyright (C) 2006 Free Software Foundation, Inc.
 This file is part of GNU Wget.
 GNU Wget is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 2 of the License, or
 (at your option) any later version.
 GNU Wget is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 You should have received a copy of the GNU General Public License
 along with Wget; if not, write to the Free Software
 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 In addition, as a special exception, the Free Software Foundation
 gives permission to link the code of its release of Wget with the
 OpenSSL project's "OpenSSL" library (or with modified versions of it
 that use the same license as the "OpenSSL" library), and distribute
 the linked executables.  You must obey the GNU General Public License
 in all respects for all of the code used other than "OpenSSL".  If you
 modify this file, you may extend this exception to your version of the
 file, but you are not obligated to do so.  If you do not wish to do
 so, delete this exception statement from your version.  */
 #include "css-tokens.h"
 /* {s}+\/\*[^*]*\*+([^/*][^*]*\*+)*\/      {unput(' '); } */
 /*replace by space*/
 %}
 h               [0-9a-f]
 nonascii        [\200-\377]
 unicode         \\{h}{1,6}(\r\n|[ \t\r\n\f])?
 escape          {unicode}|\\[^\r\n\f0-9a-f]
 nmstart         [_a-z]|{nonascii}|{escape}
 nmchar          [_a-z0-9-]|{nonascii}|{escape}
 string1         \"([^\n\r\f\\"]|\\{nl}|{escape})*\"
 string2         \'([^\n\r\f\\']|\\{nl}|{escape})*\'
 invalid1        \"([^\n\r\f\\"]|\\{nl}|{escape})*
 invalid2        \'([^\n\r\f\\']|\\{nl}|{escape})*
 comment         \/\*[^*]*\*+([^/*][^*]*\*+)*\/
 ident           -?{nmstart}{nmchar}*
 name            {nmchar}+
 num             [0-9]+|[0-9]*"."[0-9]+
 string          {string1}|{string2}
 invalid         {invalid1}|{invalid2}
 url             ([!#$%&*-~]|{nonascii}|{escape})*
 s               [ \t\r\n\f]
 w               ({s}|{comment})*
 nl              \n|\r\n|\r|\f
 A               a|\\0{0,4}(41|61)(\r\n|[ \t\r\n\f])?
 C               c|\\0{0,4}(43|63)(\r\n|[ \t\r\n\f])?
 D               d|\\0{0,4}(44|64)(\r\n|[ \t\r\n\f])?
 E               e|\\0{0,4}(45|65)(\r\n|[ \t\r\n\f])?
 G               g|\\0{0,4}(47|67)(\r\n|[ \t\r\n\f])?|\\g
 H               h|\\0{0,4}(48|68)(\r\n|[ \t\r\n\f])?|\\h
 I               i|\\0{0,4}(49|69)(\r\n|[ \t\r\n\f])?|\\i
 K               k|\\0{0,4}(4b|6b)(\r\n|[ \t\r\n\f])?|\\k
 M               m|\\0{0,4}(4d|6d)(\r\n|[ \t\r\n\f])?|\\m
 N               n|\\0{0,4}(4e|6e)(\r\n|[ \t\r\n\f])?|\\n
 P               p|\\0{0,4}(50|70)(\r\n|[ \t\r\n\f])?|\\p
 R               r|\\0{0,4}(52|72)(\r\n|[ \t\r\n\f])?|\\r
 S               s|\\0{0,4}(53|73)(\r\n|[ \t\r\n\f])?|\\s
 T               t|\\0{0,4}(54|74)(\r\n|[ \t\r\n\f])?|\\t
 X               x|\\0{0,4}(58|78)(\r\n|[ \t\r\n\f])?|\\x
 Z               z|\\0{0,4}(5a|7a)(\r\n|[ \t\r\n\f])?|\\z
 %%
 {s}                     {return S;}
 \/\*[^*]*\*+([^/*][^*]*\*+)*\/          {return S;} /* ignore comments */
 "<!--"          {return CDO;}
 "-->"                   {return CDC;}
 "~="                    {return INCLUDES;}
 "|="                    {return DASHMATCH;}
 {w}"{"                  {return LBRACE;}
 {w}"+"                  {return PLUS;}
 {w}">"                  {return GREATER;}
 {w}","                  {return COMMA;}
 {string}                {return STRING;}
 {invalid}               {return INVALID; /* unclosed string */}
 {ident}                 {return IDENT;}
 "#"{name}               {return HASH;}
 "@import"               {return IMPORT_SYM;}
 "@page"                 {return PAGE_SYM;}
 "@media"                {return MEDIA_SYM;}
 "@charset "             {return CHARSET_SYM;}
 "!"{w}"important"       {return IMPORTANT_SYM;}
 {num}{E}{M}             {return EMS;}
 {num}{E}{X}             {return EXS;}
 {num}{P}{X}             {return LENGTH;}
 {num}{C}{M}             {return LENGTH;}
 {num}{M}{M}             {return LENGTH;}
 {num}{I}{N}             {return LENGTH;}
 {num}{P}{T}             {return LENGTH;}
 {num}{P}{C}             {return LENGTH;}
 {num}{D}{E}{G}          {return ANGLE;}
 {num}{R}{A}{D}          {return ANGLE;}
 {num}{G}{R}{A}{D}       {return ANGLE;}
 {num}{M}{S}             {return TIME;}
 {num}{S}                {return TIME;}
 {num}{H}{Z}             {return FREQ;}
 {num}{K}{H}{Z}          {return FREQ;}
 {num}{ident}            {return DIMENSION;}
 {num}%                  {return PERCENTAGE;}
 {num}                   {return NUMBER;}
 "url("{w}{string}{w}")" {return URI;}
 "url("{w}{url}{w}")"    {return URI;}
 {ident}"("              {return FUNCTION;}
 .                       {return *yytext;}
 %%
--- a/src/html-parse.c
+++ b/src/html-parse.c
@ -271,6 +271,94 @@ struct pool {
   to "<foo", but "&lt,foo" to "<,foo".  */
 #define SKIP_SEMI(p, inc) (p += inc, p < end && *p == ';' ? ++p : p)
 struct tagstack_item {
  const char *tagname_begin;
  const char *tagname_end;
  const char *contents_begin;
  struct tagstack_item *prev;
  struct tagstack_item *next;
 };
 struct tagstack_item *
 tagstack_push (struct tagstack_item **head, struct tagstack_item **tail)
 {
  struct tagstack_item *ts = xmalloc(sizeof(struct tagstack_item));
  if (*head == NULL)
    {
      *head = *tail = ts;
      ts->prev = ts->next = NULL;
    }
  else
    {
      (*tail)->next = ts;
      ts->prev = *tail;
      *tail = ts;
      ts->next = NULL;
    }
  return ts;
 }
 /* remove ts and everything after it from the stack */
 void
 tagstack_pop (struct tagstack_item **head, struct tagstack_item **tail,
              struct tagstack_item *ts)
 {
  if (*head == NULL)
    return;
  if (ts == *tail)
    {
      if (ts == *head)
        {
          xfree (ts);
          *head = *tail = NULL;
        }
      else
        {
          ts->prev->next = NULL;
          *tail = ts->prev;
          xfree (ts);
        }
    }
  else
    {
      if (ts == *head)
        {
          *head = NULL;
        }
      *tail = ts->prev;
      if (ts->prev)
        {
          ts->prev->next = NULL;
        }
      while (ts)
        {
          struct tagstack_item *p = ts->next;
          xfree (ts);
          ts = p;
        }
    }
 }
 struct tagstack_item *
 tagstack_find (struct tagstack_item *tail, const char *tagname_begin,
               const char *tagname_end)
 {
  int len = tagname_end - tagname_begin;
  while (tail)
    {
      if (len == (tail->tagname_end - tail->tagname_begin))
        {
          if (0 == strncasecmp (tail->tagname_begin, tagname_begin, len))
            return tail;
        }
      tail = tail->prev;
    }
  return NULL;
 }
 /* Decode the HTML character entity at *PTR, considering END to be end
   of buffer.  It is assumed that the "&" character that marks the
   beginning of the entity has been seen at *PTR-1.  If a recognized
@ -756,6 +844,9 @@ map_html_tags (const char *text, int size,
  bool attr_pair_resized = false;
  struct attr_pair *pairs = attr_pair_initial_storage;
  struct tagstack_item *head = NULL;
  struct tagstack_item *tail = NULL;
  if (!size)
    return;
@ -822,6 +913,18 @@ map_html_tags (const char *text, int size,
      goto look_for_tag;
    tag_name_end = p;
    SKIP_WS (p);
    if (!end_tag)
      {
        struct tagstack_item *ts = tagstack_push (&head, &tail);
        if (ts)
          {
            ts->tagname_begin  = tag_name_begin;
            ts->tagname_end    = tag_name_end;
            ts->contents_begin = NULL;
          }
      }
    if (end_tag && *p != '>')
      goto backout_tag;
@ -983,6 +1086,11 @@ map_html_tags (const char *text, int size,
 	++nattrs;
      }
    if (!end_tag && tail && (tail->tagname_begin == tag_name_begin))
      {
        tail->contents_begin = p+1;
      }
    if (uninteresting_tag)
      {
 	ADVANCE (p);
@ -994,6 +1102,7 @@ map_html_tags (const char *text, int size,
    {
      int i;
      struct taginfo taginfo;
      struct tagstack_item *ts = NULL;
      taginfo.name      = pool.contents;
      taginfo.end_tag_p = end_tag;
@ -1010,6 +1119,23 @@ map_html_tags (const char *text, int size,
      taginfo.attrs = pairs;
      taginfo.start_position = tag_start_position;
      taginfo.end_position   = p + 1;
      taginfo.contents_begin = NULL;
      taginfo.contents_end = NULL;
      if (end_tag)
        {
          ts = tagstack_find (tail, tag_name_begin, tag_name_end);
          if (ts)
            {
              if (ts->contents_begin)
                {
                  taginfo.contents_begin = ts->contents_begin;
                  taginfo.contents_end   = tag_start_position;
                }
              tagstack_pop (&head, &tail, ts);
            }
        }
      mapfun (&taginfo, maparg);
      ADVANCE (p);
    }
@ -1029,6 +1155,8 @@ map_html_tags (const char *text, int size,
  POOL_FREE (&pool);
  if (attr_pair_resized)
    xfree (pairs);
  /* pop any tag stack that's left */
  tagstack_pop (&head, &tail, head);
 }
 #undef ADVANCE
--- a/src/html-parse.h
+++ b/src/html-parse.h
@ -51,6 +51,9 @@ struct taginfo {
  const char *start_position;	/* start position of tag */
  const char *end_position;	/* end position of tag */
  const char *contents_begin;   /* delimiters of tag contents */
  const char *contents_end;     /* only valid if end_tag_p */
 };
 struct hash_table;		/* forward declaration */
--- a/src/html-url.c
+++ b/src/html-url.c
@ -41,9 +41,9 @@ so, delete this exception statement from your version.  */
 #include "utils.h"
 #include "hash.h"
 #include "convert.h"
-#include "recur.h"		/* declaration of get_urls_html */
+#include "recur.h"
-
+#include "html-url.h"
-struct map_context;
+#include "css-url.h"
 typedef void (*tag_handler_t) (int, struct taginfo *, struct map_context *);
@ -163,11 +163,12 @@ static struct {
   from the information above.  However, some places in the code refer
   to the attributes not mentioned here.  We add them manually.  */
 static const char *additional_attributes[] = {
-  "rel",			/* used by tag_handle_link */
+  "rel",			/* used by tag_handle_link  */
-  "http-equiv",			/* used by tag_handle_meta */
+  "http-equiv",			/* used by tag_handle_meta  */
-  "name",			/* used by tag_handle_meta */
+  "name",			/* used by tag_handle_meta  */
-  "content",			/* used by tag_handle_meta */
+  "content",			/* used by tag_handle_meta  */
-  "action"			/* used by tag_handle_form */
+  "action",			/* used by tag_handle_form  */
  "style"			/* used by check_style_attr */
 };
 static struct hash_table *interesting_tags;
@ -246,28 +247,20 @@ find_attr (struct taginfo *tag, const char *name, int *attrind)
  return NULL;
 }
-struct map_context {
+/* used for calls to append_url */
-  char *text;			/* HTML text. */
+#define ATTR_POS(tag, attrind, ctx) \
-  char *base;			/* Base URI of the document, possibly
+ (tag->attrs[attrind].value_raw_beginning - ctx->text)
-				   changed through <base href=...>. */
+#define ATTR_SIZE(tag, attrind) \
-  const char *parent_base;	/* Base of the current document. */
+ (tag->attrs[attrind].value_raw_size)
  const char *document_file;	/* File name of this document. */
  bool nofollow;		/* whether NOFOLLOW was specified in a
                                   <meta name=robots> tag. */
  struct urlpos *head, *tail;	/* List of URLs that is being
 				   built. */
 };
 /* Append LINK_URI to the urlpos structure that is being built.
-   LINK_URI will be merged with the current document base.  TAG and
+   LINK_URI will be merged with the current document base.
-   ATTRIND are the necessary context to store the position and
+*/
   size.  */
-static struct urlpos *
+struct urlpos *
-append_url (const char *link_uri,
+append_url (const char *link_uri, int position, int size,
-	    struct taginfo *tag, int attrind, struct map_context *ctx)
+            struct map_context *ctx)
 {
  int link_has_scheme = url_has_scheme (link_uri);
  struct urlpos *newel;
@ -325,8 +318,8 @@ append_url (const char *link_uri,
  newel = xnew0 (struct urlpos);
  newel->url = url;
-  newel->pos = tag->attrs[attrind].value_raw_beginning - ctx->text;
+  newel->pos = position;
-  newel->size = tag->attrs[attrind].value_raw_size;
+  newel->size = size;
  /* A URL is relative if the host is not named, and the name does not
     start with `/'.  */
@ -346,6 +339,18 @@ append_url (const char *link_uri,
  return newel;
 }
 static void
 check_style_attr (struct taginfo *tag, struct map_context *ctx)
 {
  int attrind;
  char *style = find_attr (tag, "style", &attrind);
  if (!style)
    return;
  /* raw pos and raw size include the quotes, hence the +1 -2 */
  get_urls_css (ctx, ATTR_POS(tag,attrind,ctx)+1, ATTR_SIZE(tag,attrind)-2);
 }
 /* All the tag_* functions are called from collect_tags_mapper, as
   specified by KNOWN_TAGS.  */
@ -393,7 +398,8 @@ tag_find_urls (int tagid, struct taginfo *tag, struct map_context *ctx)
 	  if (0 == strcasecmp (tag->attrs[attrind].name,
 			       tag_url_attributes[i].attr_name))
 	    {
-	      struct urlpos *up = append_url (link, tag, attrind, ctx);
+	      struct urlpos *up = append_url (link, ATTR_POS(tag,attrind,ctx),
                                              ATTR_SIZE(tag,attrind), ctx);
 	      if (up)
 		{
 		  int flags = tag_url_attributes[i].flags;
@ -418,7 +424,8 @@ tag_handle_base (int tagid, struct taginfo *tag, struct map_context *ctx)
  if (!newbase)
    return;
-  base_urlpos = append_url (newbase, tag, attrind, ctx);
+  base_urlpos = append_url (newbase, ATTR_POS(tag,attrind,ctx),
                            ATTR_SIZE(tag,attrind), ctx);
  if (!base_urlpos)
    return;
  base_urlpos->ignore_when_downloading = 1;
@ -439,9 +446,11 @@ tag_handle_form (int tagid, struct taginfo *tag, struct map_context *ctx)
 {
  int attrind;
  char *action = find_attr (tag, "action", &attrind);
  if (action)
    {
-      struct urlpos *up = append_url (action, tag, attrind, ctx);
+      struct urlpos *up = append_url (action, ATTR_POS(tag,attrind,ctx),
                                      ATTR_SIZE(tag,attrind), ctx);
      if (up)
 	up->ignore_when_downloading = 1;
    }
@ -464,14 +473,23 @@ tag_handle_link (int tagid, struct taginfo *tag, struct map_context *ctx)
  */
  if (href)
    {
-      struct urlpos *up = append_url (href, tag, attrind, ctx);
+      struct urlpos *up = append_url (href, ATTR_POS(tag,attrind,ctx),
                                      ATTR_SIZE(tag,attrind), ctx);
      if (up)
 	{
 	  char *rel = find_attr (tag, "rel", NULL);
-	  if (rel
+	  if (rel)
-	      && (0 == strcasecmp (rel, "stylesheet")
+            {
-		  || 0 == strcasecmp (rel, "shortcut icon")))
+	      if (0 == strcasecmp (rel, "stylesheet"))
-	    up->link_inline_p = 1;
+                {
                  up->link_inline_p = 1;
                  up->link_expect_css = 1;
                }
 	      else if (0 == strcasecmp (rel, "shortcut icon"))
                {
                  up->link_inline_p = 1;
                }
            }
 	  else
 	    /* The external ones usually point to HTML pages, such as
 	       <link rel="next" href="..."> */
@ -525,7 +543,8 @@ tag_handle_meta (int tagid, struct taginfo *tag, struct map_context *ctx)
      while (ISSPACE (*p))
 	++p;
-      entry = append_url (p, tag, attrind, ctx);
+      entry = append_url (p, ATTR_POS(tag,attrind,ctx),
                          ATTR_SIZE(tag,attrind), ctx);
      if (entry)
 	{
 	  entry->link_refresh_p = 1;
@ -570,11 +589,26 @@ collect_tags_mapper (struct taginfo *tag, void *arg)
  struct map_context *ctx = (struct map_context *)arg;
  /* Find the tag in our table of tags.  This must not fail because
-     map_html_tags only returns tags found in interesting_tags.  */
+     map_html_tags only returns tags found in interesting_tags.
  struct known_tag *t = hash_table_get (interesting_tags, tag->name);
  assert (t != NULL);
-  t->handler (t->tagid, tag, ctx);
+     I've changed this for now, I'm passing NULL as interesting_tags
     to map_html_tags.  This way we can check all tags for a style
     attribute.
  */
  struct known_tag *t = hash_table_get (interesting_tags, tag->name);
  if (t != NULL)
    t->handler (t->tagid, tag, ctx);
  check_style_attr (tag, ctx);
  if (tag->end_tag_p && (0 == strcasecmp (tag->name, "style")) &&
      tag->contents_begin && tag->contents_end)
  {
    /* parse contents */
    get_urls_css (ctx, tag->contents_begin - ctx->text,
                  tag->contents_end - tag->contents_begin);
  }
 }
 /* Analyze HTML tags FILE and construct a list of URLs referenced from
@ -618,8 +652,9 @@ get_urls_html (const char *file, const char *url, bool *meta_disallow_follow)
  if (opt.strict_comments)
    flags |= MHT_STRICT_COMMENTS;
  /* the NULL here used to be interesting_tags */
  map_html_tags (fm->content, fm->length, collect_tags_mapper, &ctx, flags,
-		 interesting_tags, interesting_attributes);
+		 NULL, interesting_attributes);
  DEBUGP (("no-follow in %s: %d\n", file, ctx.nofollow));
  if (meta_disallow_follow)
--- a/src/html-url.h
+++ b/src/html-url.h
@ -0,0 +1,51 @@
 /* Declarations for html-url.c.
   Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.
 This file is part of GNU Wget.
 GNU Wget is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 2 of the License, or
 (at your option) any later version.
 GNU Wget is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 You should have received a copy of the GNU General Public License
 along with Wget; if not, write to the Free Software
 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 In addition, as a special exception, the Free Software Foundation
 gives permission to link the code of its release of Wget with the
 OpenSSL project's "OpenSSL" library (or with modified versions of it
 that use the same license as the "OpenSSL" library), and distribute
 the linked executables.  You must obey the GNU General Public License
 in all respects for all of the code used other than "OpenSSL".  If you
 modify this file, you may extend this exception to your version of the
 file, but you are not obligated to do so.  If you do not wish to do
 so, delete this exception statement from your version.  */
 #ifndef HTML_URL_H
 #define HTML_URL_H
 struct map_context {
  char *text;			/* HTML text. */
  char *base;			/* Base URI of the document, possibly
 				   changed through <base href=...>. */
  const char *parent_base;	/* Base of the current document. */
  const char *document_file;	/* File name of this document. */
  bool nofollow;		/* whether NOFOLLOW was specified in a
                                   <meta name=robots> tag. */
  struct urlpos *head, *tail;	/* List of URLs that is being
 				   built. */
 };
 struct urlpos *get_urls_file (const char *);
 struct urlpos *get_urls_html (const char *, const char *, bool *);
 struct urlpos *append_url (const char *, int, int, struct map_context *);
 void free_urlpos (struct urlpos *);
 #endif /* HTML_URL_H */
--- a/src/http.c
+++ b/src/http.c
@ -77,6 +77,7 @@ static struct cookie_jar *wget_cookie_jar;
 #define TEXTHTML_S "text/html"
 #define TEXTXHTML_S "application/xhtml+xml"
 #define TEXTCSS_S "text/css"
 /* Some status code validation macros: */
 #define H_20X(x)        (((x) >= 200) && ((x) < 300))
@ -1235,6 +1236,7 @@ static char *create_authorization_line (const char *, const char *,
                                        const char *, bool *);
 static char *basic_authentication_encode (const char *, const char *);
 static bool known_authentication_scheme_p (const char *, const char *);
 static void ensure_extension (struct http_stat *, const char *, int *);
 static void load_cookies (void);
 #define BEGINS_WITH(line, string_constant)                               \
@ -2017,34 +2019,25 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file);
  else
    *dt &= ~TEXTHTML;
-  if (opt.html_extension && (*dt & TEXTHTML))
+  if (type &&
-    /* -E / --html-extension / html_extension = on was specified, and this is a
+      0 == strncasecmp (type, TEXTCSS_S, strlen (TEXTCSS_S)))
-       text/html file.  If some case-insensitive variation on ".htm[l]" isn't
+    *dt |= TEXTCSS;
-       already the file's suffix, tack on ".html". */
+  else
-    {
+    *dt &= ~TEXTCSS;
      char *last_period_in_local_filename = strrchr (hs->local_file, '.');
-      if (last_period_in_local_filename == NULL
+  if (opt.html_extension)
-          || !(0 == strcasecmp (last_period_in_local_filename, ".htm")
+    {
-               || 0 == strcasecmp (last_period_in_local_filename, ".html")))
+      if (*dt & TEXTHTML)
        /* -E / --html-extension / html_extension = on was specified,
           and this is a text/html file.  If some case-insensitive
           variation on ".htm[l]" isn't already the file's suffix,
           tack on ".html". */
        {
-          int local_filename_len = strlen (hs->local_file);
+          ensure_extension (hs, ".html", dt);
-          /* Resize the local file, allowing for ".html" preceded by
+        }
-             optional ".NUMBER".  */
+      else if (*dt & TEXTCSS)
-          hs->local_file = xrealloc (hs->local_file,
+        {
-                                     local_filename_len + 24 + sizeof (".html"));
+          ensure_extension (hs, ".css", dt);
          strcpy(hs->local_file + local_filename_len, ".html");
          /* If clobbering is not allowed and the file, as named,
             exists, tack on ".NUMBER.html" instead. */
          if (!ALLOW_CLOBBER && file_exists_p (hs->local_file))
            {
              int ext_num = 1;
              do
                sprintf (hs->local_file + local_filename_len,
                         ".%d.html", ext_num++);
              while (file_exists_p (hs->local_file));
            }
          *dt |= ADDED_HTML_EXTENSION;
        }
    }
@ -3018,6 +3011,42 @@ http_cleanup (void)
    cookie_jar_delete (wget_cookie_jar);
 }
 void
 ensure_extension (struct http_stat *hs, const char *ext, int *dt)
 {
  char *last_period_in_local_filename = strrchr (hs->local_file, '.');
  char shortext[8];
  int len = strlen (ext);
  if (len == 5)
    {
      strncpy (shortext, ext, len - 1);
      shortext[len - 2] = '\0';
    }
  if (last_period_in_local_filename == NULL
      || !(0 == strcasecmp (last_period_in_local_filename, shortext)
           || 0 == strcasecmp (last_period_in_local_filename, ext)))
    {
      int local_filename_len = strlen (hs->local_file);
      /* Resize the local file, allowing for ".html" preceded by
         optional ".NUMBER".  */
      hs->local_file = xrealloc (hs->local_file,
                                 local_filename_len + 24 + len);
      strcpy (hs->local_file + local_filename_len, ext);
      /* If clobbering is not allowed and the file, as named,
         exists, tack on ".NUMBER.html" instead. */
      if (!ALLOW_CLOBBER && file_exists_p (hs->local_file))
        {
          int ext_num = 1;
          do
            sprintf (hs->local_file + local_filename_len,
                     ".%d%s", ext_num++, ext);
          while (file_exists_p (hs->local_file));
        }
      *dt |= ADDED_HTML_EXTENSION;
    }
 }
 #ifdef TESTING
--- a/src/recur.c
+++ b/src/recur.c
@ -48,8 +48,10 @@ so, delete this exception statement from your version.  */
 #include "hash.h"
 #include "res.h"
 #include "convert.h"
 #include "html-url.h"
 #include "css-url.h"
 #include "spider.h"
-
+
 /* Functions for maintaining the URL queue.  */
 struct queue_element {
@ -58,7 +60,8 @@ struct queue_element {
  int depth;			/* the depth */
  bool html_allowed;	        /* whether the document is allowed to
 				   be treated as HTML. */
-
+  bool css_allowed; 	        /* whether the document is allowed to
 				   be treated as CSS. */
  struct queue_element *next;	/* next element in queue */
 };
@ -91,13 +94,15 @@ url_queue_delete (struct url_queue *queue)
 static void
 url_enqueue (struct url_queue *queue,
-	     const char *url, const char *referer, int depth, bool html_allowed)
+	     const char *url, const char *referer, int depth,
             bool html_allowed, bool css_allowed)
 {
  struct queue_element *qel = xnew (struct queue_element);
  qel->url = url;
  qel->referer = referer;
  qel->depth = depth;
  qel->html_allowed = html_allowed;
  qel->css_allowed = css_allowed;
  qel->next = NULL;
  ++queue->count;
@ -121,7 +126,7 @@ url_enqueue (struct url_queue *queue,
 static bool
 url_dequeue (struct url_queue *queue,
 	     const char **url, const char **referer, int *depth,
-	     bool *html_allowed)
+	     bool *html_allowed, bool *css_allowed)
 {
  struct queue_element *qel = queue->head;
@ -136,6 +141,7 @@ url_dequeue (struct url_queue *queue,
  *referer = qel->referer;
  *depth = qel->depth;
  *html_allowed = qel->html_allowed;
  *css_allowed = qel->css_allowed;
  --queue->count;
@ -200,7 +206,7 @@ retrieve_tree (const char *start_url)
  /* Enqueue the starting URL.  Use start_url_parsed->url rather than
     just URL so we enqueue the canonical form of the URL.  */
-  url_enqueue (queue, xstrdup (start_url_parsed->url), NULL, 0, true);
+  url_enqueue (queue, xstrdup (start_url_parsed->url), NULL, 0, true, false);
  string_set_add (blacklist, start_url_parsed->url);
  while (1)
@ -208,7 +214,8 @@ retrieve_tree (const char *start_url)
      bool descend = false;
      char *url, *referer, *file = NULL;
      int depth;
-      bool html_allowed;
+      bool html_allowed, css_allowed;
      bool is_css = false;
      bool dash_p_leaf_HTML = false;
      if (opt.quota && total_downloaded_bytes > opt.quota)
@ -220,7 +227,7 @@ retrieve_tree (const char *start_url)
      if (!url_dequeue (queue,
 			(const char **)&url, (const char **)&referer,
-			&depth, &html_allowed))
+			&depth, &html_allowed, &css_allowed))
 	break;
      /* ...and download it.  Note that this download is in most cases
@ -238,10 +245,21 @@ retrieve_tree (const char *start_url)
 	  DEBUGP (("Already downloaded \"%s\", reusing it from \"%s\".\n",
 		   url, file));
          /* this sucks, needs to be combined! */
 	  if (html_allowed
 	      && downloaded_html_set
 	      && string_set_contains (downloaded_html_set, file))
-	    descend = true;
+            {
              descend = true;
              is_css = false;
            }
          if (css_allowed
              && downloaded_css_set
              && string_set_contains (downloaded_css_set, file))
            {
              descend = 1;
              is_css = true;
            }
 	}
      else
 	{
@ -252,7 +270,21 @@ retrieve_tree (const char *start_url)
 	  if (html_allowed && file && status == RETROK
 	      && (dt & RETROKF) && (dt & TEXTHTML))
-	    descend = true;
+            {
              descend = true;
              is_css = false;
            }
          /* a little different, css_allowed can override content type
             lots of web servers serve css with an incorrect content type
          */
          if (file && status == RETROK
              && (dt & RETROKF) &&
              ((dt & TEXTCSS) || css_allowed))
            {
              descend = true;
              is_css = false;
            }
 	  if (redirected)
 	    {
@ -306,14 +338,15 @@ retrieve_tree (const char *start_url)
 	    }
 	}
-      /* If the downloaded document was HTML, parse it and enqueue the
+      /* If the downloaded document was HTML or CSS, parse it and enqueue the
 	 links it contains. */
      if (descend)
 	{
 	  bool meta_disallow_follow = false;
 	  struct urlpos *children
-	    = get_urls_html (file, url, &meta_disallow_follow);
+	    = is_css ? get_urls_css_file (file, url) :
                       get_urls_html (file, url, &meta_disallow_follow);
 	  if (opt.use_robots && meta_disallow_follow)
 	    {
@ -338,7 +371,8 @@ retrieve_tree (const char *start_url)
 		    {
 		      url_enqueue (queue, xstrdup (child->url->url),
 				   xstrdup (url), depth + 1,
-				   child->link_expect_html);
+				   child->link_expect_html,
 				   child->link_expect_css);
 		      /* We blacklist the URL we have enqueued, because we
 			 don't want to enqueue (and hence download) the
 			 same URL twice.  */
@ -385,9 +419,9 @@ retrieve_tree (const char *start_url)
  {
    char *d1, *d2;
    int d3;
-    bool d4;
+    bool d4, d5;
    while (url_dequeue (queue,
-			(const char **)&d1, (const char **)&d2, &d3, &d4))
+			(const char **)&d1, (const char **)&d2, &d3, &d4, &d5))
      {
 	xfree (d1);
 	xfree_null (d2);
--- a/src/recur.h
+++ b/src/recur.h
@ -43,9 +43,4 @@ struct urlpos;
 void recursive_cleanup (void);
 uerr_t retrieve_tree (const char *);
 /* These are really in html-url.c. */
 struct urlpos *get_urls_file (const char *);
 struct urlpos *get_urls_html (const char *, const char *, bool *);
 void free_urlpos (struct urlpos *);
 #endif /* RECUR_H */
--- a/src/retr.c
+++ b/src/retr.c
@ -51,6 +51,7 @@ so, delete this exception statement from your version.  */
 #include "hash.h"
 #include "convert.h"
 #include "ptimer.h"
 #include "html-url.h"
 /* Total size of downloaded files.  Used to enforce quota.  */
 SUM_SIZE_INT total_downloaded_bytes;
@ -784,6 +785,8 @@ retrieve_url (const char *origurl, char **file, char **newloc,
 	    register_redirection (origurl, u->url);
 	  if (*dt & TEXTHTML)
 	    register_html (u->url, local_file);
 	  if (*dt & TEXTCSS)
 	    register_css (u->url, local_file);
 	}
    }
--- a/src/wget.h
+++ b/src/wget.h
@ -304,7 +304,8 @@ enum
  HEAD_ONLY            = 0x0004,	/* only send the HEAD request */
  SEND_NOCACHE         = 0x0008,	/* send Pragma: no-cache directive */
  ACCEPTRANGES         = 0x0010,	/* Accept-ranges header was found */
-  ADDED_HTML_EXTENSION = 0x0020         /* added ".html" extension due to -E */
+  ADDED_HTML_EXTENSION = 0x0020,        /* added ".html" extension due to -E */
  TEXTCSS              = 0x0040	        /* document is of type text/css */
 };
 /* Universal error type -- used almost everywhere.  Error reporting of