1
0
mirror of https://github.com/moparisthebest/wget synced 2024-07-03 16:38:41 -04:00
wget/src/html-parse.h
hniksic b0b1c815c1 [svn] A bunch of new features:
- use mmap() to read whole files in core instead of allocating memory
  and read'ing it.

- use a new, more general, HTML parser (html-parse.c) and interface to
  it from Wget (html-url.c).

- respect <meta name=robots content=nofollow> (easy with the new HTML
  parser).

- use hash tables instead of linked lists in places where the lists
  were used to facilitate mappings.

- rewrite the code in host.c to be more readable and faster (hash
  tables instead of home-grown lists.)

- make convert_links properly convert partial URLs to complete ones
  for those URLs that have *not* been downloaded.

- use HTTP persistent connections where available.  very
  simple-minded, caches the last connection to the server.

Published in <sxshf533d5r.fsf@florida.arsdigita.de>.
2000-11-19 12:50:10 -08:00

45 lines
1.5 KiB
C

/* Declarations for html-parse.c.
Copyright (C) 1998 Free Software Foundation, Inc.
This file is part of Wget.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
struct attr_pair {
char *name; /* attribute name */
char *value; /* attribute value */
/* Needed for URL conversion; the places where the value begins and
ends, including the quotes and everything. */
const char *value_raw_beginning;
int value_raw_size;
/* Used internally by map_html_tags. */
int name_pool_index, value_pool_index;
};
struct taginfo {
char *name; /* tag name */
int end_tag_p; /* whether this is an end-tag */
int nattrs; /* number of attributes */
struct attr_pair *attrs; /* attributes */
const char *start_position; /* start position of tag */
const char *end_position; /* end position of tag */
};
void map_html_tags PARAMS ((const char *, int, const char **, const char **,
void (*) (struct taginfo *, void *), void *));