From 18d476b8d013cdc117a376c840bb00ccd32c2798 Mon Sep 17 00:00:00 2001 From: hniksic Date: Sat, 10 Feb 2001 16:22:42 -0800 Subject: [PATCH] [svn] Applied doc fixes from Jan's "realclean patch". The doc fixes include the new texi2pod generator for creating the man page out of Texinfo docs. Published in <20010106184340.A14968@erwin.telekabel.at>. --- doc/ChangeLog | 18 +++ doc/Makefile.in | 39 ++++-- doc/texi2pod.pl | 331 ++++++++++++++++++++++++++++++++++++++++++++++++ doc/wget.texi | 70 +++++++++- 4 files changed, 438 insertions(+), 20 deletions(-) create mode 100644 doc/texi2pod.pl diff --git a/doc/ChangeLog b/doc/ChangeLog index a900cf87..2c58040d 100644 --- a/doc/ChangeLog +++ b/doc/ChangeLog @@ -1,3 +1,21 @@ +2001-01-06 Jan Prikryl + + * wget.texi (Reporting Bugs): Deleted the setence about Cc-ing the + bug report to Wget mailing list as the bug report address is an + alias for the mailing ist anyway. + (Mailing List): Added URL for the alternate archive. + + * wget.texi: Bunch of cosmetical changes. + + * Makefile.in: Added targets for manpage generation using + texi2pod.pl and pod2man (comes with Perl5). As we cannot rely on + Perl5 being available on the system, manpage is not being built + automatically. Updated '*clean' targets to remove + 'sample.wgetrc.munged...', 'wget.pod', and 'wget.man'. + + * texi2pod.pl: New file copied from GCC distribution to facilitate + automatic manpage generation. + 2001-01-09 Dan Harkless * wget.texi: Did a bunch of clarification and correction to the diff --git a/doc/Makefile.in b/doc/Makefile.in index 867a3c66..e170e936 100644 --- a/doc/Makefile.in +++ b/doc/Makefile.in @@ -44,25 +44,36 @@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ RM = rm -f -MAN = wget.$(manext) -WGETRC = $(sysconfdir)/wgetrc +TEXI2POD = ./texi2pod.pl +POD2MAN = pod2man +MAN = wget.$(manext) +WGETRC = $(sysconfdir)/wgetrc +SAMPLERCTEXI = sample.wgetrc.munged_for_texi_inclusion # # Dependencies for building # -all: wget.info # wget.cat +all: wget.info everything: all wget_us.ps wget_a4.ps wget_toc.html -sample.wgetrc.munged_for_texi_inclusion: sample.wgetrc - sed s/@/@@/g $(srcdir)/sample.wgetrc > sample.wgetrc.munged_for_texi_inclusion +$(SAMPLERCTEXI): $(srcdir)/sample.wgetrc + sed s/@/@@/g $< > $@ -wget.info: sample.wgetrc.munged_for_texi_inclusion wget.texi +wget.info: $(SAMPLERCTEXI) wget.texi -$(MAKEINFO) +wget.pod: wget.texi + $(TEXI2POD) $< > $@ + +$(MAN): wget.pod + $(POD2MAN) --center="GNU Wget" --release="GNU Wget @VERSION@" $< > $@ + +man: $(MAN) + #wget.cat: $(MAN) -# nroff -man $(srcdir)/$(MAN) > wget.cat +# nroff -man $< > $@ dvi: wget.dvi @@ -96,9 +107,9 @@ install.info: wget.info done # install man page, creating install directory if necessary -#install.man: -# $(top_srcdir)/mkinstalldirs $(mandir)/man$(manext) -# $(INSTALL_DATA) $(srcdir)/$(MAN) $(mandir)/man$(manext)/$(MAN) +install.man: + $(top_srcdir)/mkinstalldirs $(mandir)/man$(manext) + $(INSTALL_DATA) $(srcdir)/$(MAN) $(mandir)/man$(manext)/$(MAN) # install sample.wgetrc install.wgetrc: @@ -127,22 +138,24 @@ uninstall.info: $(RM) $(infodir)/wget.info* # uninstall man page -#uninstall.man: -# $(RM) $(mandir)/man$(manext)/$(MAN) +uninstall.man: + $(RM) $(mandir)/man$(manext)/$(MAN) # # Dependencies for cleanup # clean: - $(RM) *~ *.bak *.cat *.html + $(RM) *~ *.bak *.cat *.pod *.html $(RM) *.dvi *.aux *.cp *.cps *.fn *.toc *.tp *.vr *.ps *.ky *.pg *.log distclean: clean $(RM) Makefile + $(RM) $(MAN) realclean: distclean $(RM) wget.info* + $(RM) $(SAMPLERCTEXI) # # Dependencies for maintenance diff --git a/doc/texi2pod.pl b/doc/texi2pod.pl new file mode 100644 index 00000000..fcf910d2 --- /dev/null +++ b/doc/texi2pod.pl @@ -0,0 +1,331 @@ +#! /usr/bin/perl -w + +# This does trivial (and I mean _trivial_) conversion of Texinfo +# markup to Perl POD format. It's intended to be used to extract +# something suitable for a manpage from a Texinfo document. + +$output = 0; +$skipping = 0; +%sects = (); +$section = ""; +@icstack = (); +@endwstack = (); +@skstack = (); +$shift = ""; +%defs = (); +$fnno = 1; + +while ($_ = shift) { + if (/^-D(.*)$/) { + if ($1 ne "") { + $flag = $1; + } else { + $flag = shift; + } + die "no flag specified for -D\n" + unless $flag ne ""; + die "flags may only contain letters, digits, hyphens, and underscores\n" + unless $flag =~ /^[a-zA-Z0-9_-]+$/; + $defs{$flag} = ""; + } elsif (/^-/) { + usage(); + } else { + $in = $_, next unless defined $in; + $out = $_, next unless defined $out; + usage(); + } +} + +if (defined $in) { + open(STDIN, $in) or die "opening \"$in\": $!\n"; +} +if (defined $out) { + open(STDOUT, ">$out") or die "opening \"$out\": $!\n"; +} + +while() +{ + # Certain commands are discarded without further processing. + /^\@(?: + [a-z]+index # @*index: useful only in complete manual + |need # @need: useful only in printed manual + |(?:end\s+)?group # @group .. @end group: ditto + |page # @page: ditto + |node # @node: useful only in .info file + )\b/x and next; + + chomp; + + # Look for filename and title markers. + /^\@setfilename\s+([^.]+)/ and $fn = $1, next; + /^\@settitle\s+([^.]+)/ and $tl = $1, next; + + # Look for blocks surrounded by @c man begin SECTION ... @c man end. + # This really oughta be @ifman ... @end ifman and the like, but such + # would require rev'ing all other Texinfo translators. + /^\@c man begin ([A-Z]+)/ and $sect = $1, $output = 1, next; + /^\@c man end/ and do { + $sects{$sect} = "" unless exists $sects{$sect}; + $sects{$sect} .= postprocess($section); + $section = ""; + $output = 0; + next; + }; + next unless $output; + + # Discard comments. (Can't do it above, because then we'd never see + # @c man lines.) + /^\@c\b/ and next; + + # End-block handler goes up here because it needs to operate even + # if we are skipping. + /^\@end\s+([a-z]+)/ and do { + # Ignore @end foo, where foo is not an operation which may + # cause us to skip, if we are presently skipping. + my $ended = $1; + next if $skipping && $ended !~ /^(?:ifset|ifclear|ignore|menu)$/; + + die "\@end $ended without \@$ended at line $.\n" unless defined $endw; + die "\@$endw ended by \@end $ended at line $.\n" unless $ended eq $endw; + + $endw = pop @endwstack; + + if ($ended =~ /^(?:ifset|ifclear|ignore|menu)$/) { + $skipping = pop @skstack; + next; + } elsif ($ended =~ /^(?:example|smallexample)$/) { + $shift = ""; + $_ = ""; # need a paragraph break + } elsif ($ended =~ /^(?:itemize|enumerate|table)$/) { + $_ = "\n=back\n"; + $ic = pop @icstack; + } else { + die "unknown command \@end $ended at line $.\n"; + } + }; + + # We must handle commands which can cause skipping even while we + # are skipping, otherwise we will not process nested conditionals + # correctly. + /^\@ifset\s+([a-zA-Z0-9_-]+)/ and do { + push @endwstack, $endw; + push @skstack, $skipping; + $endw = "ifset"; + $skipping = 1 unless exists $defs{$1}; + next; + }; + + /^\@ifclear\s+([a-zA-Z0-9_-]+)/ and do { + push @endwstack, $endw; + push @skstack, $skipping; + $endw = "ifclear"; + $skipping = 1 if exists $defs{$1}; + next; + }; + + /^\@(ignore|menu)\b/ and do { + push @endwstack, $endw; + push @skstack, $skipping; + $endw = $1; + $skipping = 1; + next; + }; + + next if $skipping; + + # Character entities. First the ones that can be replaced by raw text + # or discarded outright: + s/\@copyright\{\}/(c)/g; + s/\@dots\{\}/.../g; + s/\@enddots\{\}/..../g; + s/\@([.!? ])/$1/g; + s/\@[:-]//g; + s/\@bullet(?:\{\})?/*/g; + s/\@TeX\{\}/TeX/g; + s/\@pounds\{\}/\#/g; + s/\@minus(?:\{\})?/-/g; + + # Now the ones that have to be replaced by special escapes + # (which will be turned back into text by unmunge()) + s/&/&/g; + s/\@\{/{/g; + s/\@\}/}/g; + s/\@\@/&at;/g; + # POD doesn't interpret E<> inside a verbatim block. + if ($shift eq "") { + s//>/g; + } else { + s//>/g; + } + + # Single line command handlers. + /^\@set\s+([a-zA-Z0-9_-]+)\s*(.*)$/ and $defs{$1} = $2, next; + /^\@clear\s+([a-zA-Z0-9_-]+)/ and delete $defs{$1}, next; + + /^\@section\s+(.+)$/ and $_ = "\n=head2 $1\n"; + /^\@subsection\s+(.+)$/ and $_ = "\n=head3 $1\n"; + + # Block command handlers: + /^\@itemize\s+(\@[a-z]+|\*|-)/ and do { + push @endwstack, $endw; + push @icstack, $ic; + $ic = $1; + $_ = "\n=over 4\n"; + $endw = "itemize"; + }; + + /^\@enumerate(?:\s+([A-Z0-9]+))?/ and do { + push @endwstack, $endw; + push @icstack, $ic; + if (defined $1) { + $ic = $1 . "."; + } else { + $ic = "1."; + } + $_ = "\n=over 4\n"; + $endw = "enumerate"; + }; + + /^\@table\s+(\@[a-z]+)/ and do { + push @endwstack, $endw; + push @icstack, $ic; + $ic = $1; + $ic =~ s/\@(?:samp|strong|key)/B/; + $ic =~ s/\@(?:code|kbd)/C/; + $ic =~ s/\@(?:dfn|var|emph|cite|i)/I/; + $ic =~ s/\@(?:file)/F/; + $_ = "\n=over 4\n"; + $endw = "table"; + }; + + /^\@((?:small)?example)/ and do { + push @endwstack, $endw; + $endw = $1; + $shift = "\t"; + $_ = ""; # need a paragraph break + }; + + /^\@itemx?\s*(.+)?$/ and do { + if (defined $1) { + # Entity escapes prevent munging by the <> processing below. + $_ = "\n=item $ic\<$1\>\n"; + } else { + $_ = "\n=item $ic\n"; + $ic =~ y/A-Ya-y1-8/B-Zb-z2-9/; + } + }; + + $section .= $shift.$_."\n"; +} + +die "No filename or title\n" unless defined $fn && defined $tl; + +$sects{NAME} = "$fn \- $tl\n"; +$sects{FOOTNOTES} .= "=back\n" if exists $sects{FOOTNOTES}; + +for $sect (qw(NAME SYNOPSIS DESCRIPTION OPTIONS ENVIRONMENT FILES + BUGS NOTES FOOTNOTES SEEALSO AUTHOR COPYRIGHT)) { + if(exists $sects{$sect}) { + $head = $sect; + $head =~ s/SEEALSO/SEE ALSO/; + print "=head1 $head\n\n"; + print scalar unmunge ($sects{$sect}); + print "\n"; + } +} + +sub usage +{ + die "usage: $0 [-D toggle...] [infile [outfile]]\n"; +} + +sub postprocess +{ + local $_ = $_[0]; + + # @value{foo} is replaced by whatever 'foo' is defined as. + s/\@value\{([a-zA-Z0-9_-]+)\}/$defs{$1}/g; + + # Formatting commands. + s/\@(?:dfn|var|emph|cite|i)\{([^\}]*)\}/I<$1>/g; + s/\@(?:code|kbd)\{([^\}]*)\}/C<$1>/g; + s/\@(?:samp|strong|key|option|env|b)\{([^\}]*)\}/B<$1>/g; + s/\@sc\{([^\}]*)\}/\U$1/g; + s/\@file\{([^\}]*)\}/F<$1>/g; + s/\@w\{([^\}]*)\}/S<$1>/g; + s/\@(?:dmn|math)\{([^\}]*)\}/$1/g; + + # Cross references are thrown away, as are @noindent and @refill. + # (@noindent is impossible in .pod, and @refill is unnecessary.) + # @* is also impossible in .pod; we discard it and any newline that + # follows it. + + s/\@xref\{(?:[^\}]*)\}[^.]*.//g; + + # Originally "s/\s+\(\@pxref\{(?:[^\}]*)\}\)//g;", would not + # process cross-references like '(@pxref{some section} for more + # details)" + s/\s+\(\@pxref\{(?:[^\}]*)\}[^\)]*\)//g; + + s/;\s+\@pxref\{(?:[^\}]*)\}//g; + s/\@noindent\s*//g; + s/\@refill//g; + s/\@\*\s*\n?//g; + + # @uref can take one, two, or three arguments, with different + # semantics each time. @url and @email are just like @uref with + # one argument, for our purposes. + s/\@(?:uref|url|email)\{([^\},]*)\}/<C<$1>>/g; + s/\@uref\{([^\},]*),([^\},]*)\}/$2 (C<$1>)/g; + s/\@uref\{([^\},]*),([^\},]*),([^\},]*)\}/$3/g; + + # Turn B blah> into B I B to + # match Texinfo semantics of @emph inside @samp. + s/<//g; + 1 while (s/B<([^<>]*)I<([^>]+)>/B<$1>I<$2>B]*)B<([^>]+)>/I<$1>B<$2>I//g; + s/([BI])<(\s+)([^>]+)>/$2$1<$3>/g; + s/([BI])<([^>]+?)(\s+)>/$1<$2>$3/g; + + # Extract footnotes. This has to be done after all other + # processing because otherwise the regexp will choke on formatting + # inside @footnote. + while (/\@footnote/g) { + s/\@footnote\{([^\}]+)\}/[$fnno]/; + add_footnote($1, $fnno); + $fnno++; + } + + return $_; +} + +sub unmunge +{ + # Replace escaped symbols with their equivalents. + local $_ = $_[0]; + + s/</E/g; + s/>/E/g; + s/{/\{/g; + s/}/\}/g; + s/&at;/\@/g; + s/&/&/g; + return $_; +} + +sub add_footnote +{ + unless (exists $sects{FOOTNOTES}) { + $sects{FOOTNOTES} = "\n=over 4\n\n"; + } + + $sects{FOOTNOTES} .= "=item $fnno.\n\n"; $fnno++; + $sects{FOOTNOTES} .= $_[0]; + $sects{FOOTNOTES} .= "\n\n"; +} + + diff --git a/doc/wget.texi b/doc/wget.texi index ac7d1b65..5e4b4614 100644 --- a/doc/wget.texi +++ b/doc/wget.texi @@ -16,7 +16,7 @@ @c This should really be auto-generated! @set VERSION 1.7-dev -@set UPDATED Dec 2000 +@set UPDATED Jan 2001 @dircategory Net Utilities @dircategory World Wide Web @@ -28,7 +28,9 @@ This file documents the the GNU Wget utility for downloading network data. -Copyright (C) 1996, 1997, 1998, 2000 Free Software Foundation, Inc. +@c man begin COPYRIGHT +Copyright @copyright{} 1996, 1997, 1998, 2000, 2001 Free Software +Foundation, Inc. Permission is granted to make and distribute verbatim copies of this manual provided the copyright notice and this permission notice @@ -47,6 +49,7 @@ Invariant Sections being ``GNU General Public License'' and ``GNU Free Documentation License'', with no Front-Cover Texts, and with no Back-Cover Texts. A copy of the license is included in the section entitled ``GNU Free Documentation License''. +@c man end @end ifinfo @titlepage @@ -55,9 +58,19 @@ entitled ``GNU Free Documentation License''. @subtitle Updated for Wget @value{VERSION}, @value{UPDATED} @author by Hrvoje Nik@v{s}i@'{c} and the developers +@ignore +@c man begin AUTHOR +Originally written by Hrvoje Niksic . +@c man end +@c man begin SEEALSO +GNU Info entry for @file{wget}. +@c man end +@end ignore + @page @vskip 0pt plus 1filll -Copyright @copyright{} 1996, 1997, 1998, 2000 Free Software Foundation, Inc. +Copyright @copyright{} 1996, 1997, 1998, 2000, 2001 Free Software +Foundation, Inc. Permission is granted to copy, distribute and/or modify this document under the terms of the GNU Free Documentation License, Version 1.1 or @@ -97,6 +110,7 @@ Copyright @copyright{} 1996, 1997, 1998, 2000 Free Software Foundation, Inc. @cindex overview @cindex features +@c man begin DESCRIPTION GNU Wget is a freely available network utility to retrieve files from the World Wide Web, using @sc{http} (Hyper Text Transfer Protocol) and @sc{ftp} (File Transfer Protocol), the two most widely used Internet @@ -110,8 +124,10 @@ while the user is not logged on. This allows you to start a retrieval and disconnect from the system, letting Wget finish the work. By contrast, most of the Web browsers require constant user's presence, which can be a great hindrance when transferring a lot of data. +@c man end @sp 1 +@c man begin DESCRIPTION @item Wget is capable of descending recursively through the structure of @sc{html} documents and @sc{ftp} directory trees, making a local copy of @@ -119,8 +135,10 @@ the directory hierarchy similar to the one on the remote server. This feature can be used to mirror archives and home pages, or traverse the web in search of data, like a @sc{www} robot (@pxref{Robots}). In that spirit, Wget understands the @code{norobots} convention. +@c man end @sp 1 +@c man begin DESCRIPTION @item File name wildcard matching and recursive mirroring of directories are available when retrieving via @sc{ftp}. Wget can read the time-stamp @@ -129,16 +147,20 @@ locally. Thus Wget can see if the remote file has changed since last retrieval, and automatically retrieve the new version if it has. This makes Wget suitable for mirroring of @sc{ftp} sites, as well as home pages. +@c man end @sp 1 +@c man begin DESCRIPTION @item Wget works exceedingly well on slow or unstable connections, retrying the document until it is fully retrieved, or until a user-specified retry count is surpassed. It will try to resume the download from the point of interruption, using @code{REST} with @sc{ftp} and @code{Range} with @sc{http} servers that support them. +@c man end @sp 1 +@c man begin DESCRIPTION @item By default, Wget supports proxy servers, which can lighten the network load, speed up retrieval and provide access behind firewalls. However, @@ -146,32 +168,53 @@ if you are behind a firewall that requires that you use a socks style gateway, you can get the socks library and build Wget with support for socks. Wget also supports the passive @sc{ftp} downloading as an option. +@c man end @sp 1 +@c man begin DESCRIPTION @item Builtin features offer mechanisms to tune which links you wish to follow (@pxref{Following Links}). +@c man end @sp 1 +@c man begin DESCRIPTION @item The retrieval is conveniently traced with printing dots, each dot representing a fixed amount of data received (1KB by default). These representations can be customized to your preferences. +@c man end @sp 1 +@c man begin DESCRIPTION @item Most of the features are fully configurable, either through command line options, or via the initialization file @file{.wgetrc} (@pxref{Startup File}). Wget allows you to define @dfn{global} startup files (@file{/usr/local/etc/wgetrc} by default) for site settings. +@c man end + +@ignore +@c man begin FILES +@table @samp +@item /usr/local/etc/wgetrc +Default location of the @dfn{global} startup file. + +@item .wgetrc +User startup file. +@end table +@c man end +@end ignore @sp 1 +@c man begin DESCRIPTION @item Finally, GNU Wget is free software. This means that everyone may use it, redistribute it and/or modify it under the terms of the GNU General Public License, as published by the Free Software Foundation (@pxref{Copying}). @end itemize +@c man end @node Invoking, Recursive Retrieval, Overview, Top @chapter Invoking @@ -183,7 +226,9 @@ Public License, as published by the Free Software Foundation By default, Wget is very simple to invoke. The basic syntax is: @example +@c man begin SYNOPSIS wget [@var{option}]@dots{} [@var{URL}]@dots{} +@c man end @end example Wget will simply download all the @sc{url}s specified on the command @@ -327,6 +372,8 @@ and @file{/~somebody}. You can also clear the lists in @file{.wgetrc} wget -X '' -X /~nobody,/~somebody @end example +@c man begin OPTIONS + @node Basic Startup Options, Logging and Input File Options, Option Syntax, Invoking @section Basic Startup Options @@ -1130,6 +1177,8 @@ This is a useful option, since it guarantees that only the files @xref{Directory-Based Limits}, for more details. @end table +@c man end + @node Recursive Retrieval, Following Links, Invoking, Top @chapter Recursive Retrieval @cindex recursion @@ -2386,17 +2435,18 @@ the magic word @samp{subscribe} in the subject line. Unsubscribe by mailing to @email{wget-unsubscribe@@sunsite.auc.dk}. The mailing list is archived at @url{http://fly.srk.fer.hr/archive/wget}. - +Alternative archive is available at +@url{http://www.mail-archive.com/wget%40sunsite.auc.dk/}. + @node Reporting Bugs, Portability, Mailing List, Various @section Reporting Bugs @cindex bugs @cindex reporting bugs @cindex bug reports +@c man begin BUGS You are welcome to send bug reports about GNU Wget to -@email{bug-wget@@gnu.org}. The bugs that you think are of the -interest to the public (i.e. more people should be informed about them) -can be Cc-ed to the mailing list at @email{wget@@sunsite.auc.dk}. +@email{bug-wget@@gnu.org}. Before actually submitting a bug report, please try to follow a few simple guidelines. @@ -2434,6 +2484,7 @@ wget` core} and type @code{where} to get the backtrace. @item Find where the bug is, fix it and send me the patches. :-) @end enumerate +@c man end @node Portability, Signals, Reporting Bugs, Various @section Portability @@ -2755,7 +2806,12 @@ Charlie Negyesi, R. K. Owen, Andrew Pollock, Steve Pothier, +@iftex +Jan P@v{r}ikryl, +@end iftex +@ifinfo Jan Prikryl, +@end ifinfo Marin Purgar, Keith Refson, Tyler Riddle,