From e5408e7db8f3d0c80ce4c51a6b8d67bc26baa815 Mon Sep 17 00:00:00 2001 From: dan Date: Tue, 29 Feb 2000 16:17:23 -0800 Subject: [PATCH] [svn] Implemented new -K / --backup-converted / backup_converted = on option. --- ChangeLog | 4 ++++ TODO | 3 +++ doc/ChangeLog | 4 ++++ doc/wget.texi | 9 +++++++++ src/ChangeLog | 13 ++++++++++++ src/init.c | 1 + src/main.c | 7 ++++++- src/options.h | 4 +++- src/url.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++--- src/url.h | 2 +- 10 files changed, 96 insertions(+), 6 deletions(-) diff --git a/ChangeLog b/ChangeLog index 58b899a2..767b9887 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,7 @@ +2000-02-18 Dan Harkless + + * TODO: When -K is used with -N, check local X.orig against server X. + 1998-06-23 Dave Love * configure.in (exext): Define. diff --git a/TODO b/TODO index 9f547699..f25ac4f2 100644 --- a/TODO +++ b/TODO @@ -76,3 +76,6 @@ particular order. Not all of them are user-visible changes. * Implement more HTTP/1.1 bells and whistles (ETag, Content-MD5 etc.) * Support SSL encryption through SSLeay or OpenSSL. + +* When -K is used with -N, check local file X.orig (if extant) against server + file X. diff --git a/doc/ChangeLog b/doc/ChangeLog index 90f09436..c546473a 100644 --- a/doc/ChangeLog +++ b/doc/ChangeLog @@ -1,3 +1,7 @@ +2000-02-18 Dan Harkless + + * wget.texi (-K / --backup-converted): Documented this new option. + 1998-09-10 Hrvoje Niksic * wget.texi (HTTP Options): Warn against masquerading as Mozilla. diff --git a/doc/wget.texi b/doc/wget.texi index 26f624ae..10dbf122 100644 --- a/doc/wget.texi +++ b/doc/wget.texi @@ -832,6 +832,11 @@ Note that only at the end of the download can Wget know which links have been downloaded. Because of that, much of the work done by @samp{-k} will be performed at the end of the downloads. +@cindex backing up converted files +@item -K +@itemx --backup-converted +When converting a file, back up the original version with a @samp{.orig} suffix. + @item -m @itemx --mirror Turn on options suitable for mirroring. This option turns on recursion @@ -1511,6 +1516,10 @@ Enable/disable continuation of the retrieval, the same as @samp{-c} Enable/disable going to background, the same as @samp{-b} (which enables it). +@item backup_converted = on/off +Enable/disable saving pre-converted files with the suffix @samp{.orig} +-- the same as @samp{-K} (which enables it). + @c @item backups = @var{number} @c #### Document me! @item base = @var{string} diff --git a/src/ChangeLog b/src/ChangeLog index 52e16376..ed9e3cbb 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,16 @@ +2000-02-18 Dan Harkless + + * init.c (backup_converted): Added this new option. + + * main.c (-K / --backup-converted): Added this new option. + + * options.h (backup_converted): Added this new option. + + * url.c (convert_links): When backup_converted is specified, save + file X as X.orig before converting. + + * url.h (urlpos): Fixed typo -- said "Rekative" instead of "Relative". + 1998-09-21 Hrvoje Niksic * version.c: Wget 1.5.3 is released. diff --git a/src/init.c b/src/init.c index 9be64e9a..1d004c39 100644 --- a/src/init.c +++ b/src/init.c @@ -84,6 +84,7 @@ static struct { { "addhostdir", &opt.add_hostdir, cmd_boolean }, { "alwaysrest", &opt.always_rest, cmd_boolean }, /* deprecated */ { "background", &opt.background, cmd_boolean }, + { "backupconverted", &opt.backup_converted, cmd_boolean }, { "backups", &opt.backups, cmd_number }, { "base", &opt.base_href, cmd_string }, { "cache", &opt.proxy_cache, cmd_boolean }, diff --git a/src/main.c b/src/main.c index 436ff236..65d5f6ac 100644 --- a/src/main.c +++ b/src/main.c @@ -173,6 +173,7 @@ Recursive retrieval:\n\ -l, --level=NUMBER maximum recursion depth (0 to unlimit).\n\ --delete-after delete downloaded files.\n\ -k, --convert-links convert non-relative links to relative.\n\ + -K, --backup-converted before converting file X, back up as X.orig.\n\ -m, --mirror turn on options suitable for mirroring.\n\ -nr, --dont-remove-listing don\'t remove `.listing\' files.\n\ \n"), _("\ @@ -202,6 +203,7 @@ main (int argc, char *const *argv) { "background", no_argument, NULL, 'b' }, { "continue", no_argument, NULL, 'c' }, { "convert-links", no_argument, NULL, 'k' }, + { "backup-converted", no_argument, NULL, 'K' }, { "debug", no_argument, NULL, 'd' }, { "dont-remove-listing", no_argument, NULL, 21 }, { "email-address", no_argument, NULL, 'E' }, /* undocumented (debug) */ @@ -286,7 +288,7 @@ main (int argc, char *const *argv) initialize (); while ((c = getopt_long (argc, argv, "\ -hVqvdksxmNWrHSLcFbEY:g:T:U:O:l:n:i:o:a:t:D:A:R:P:B:e:Q:X:I:w:", +hVqvdkKsxmNWrHSLcFbEY:g:T:U:O:l:n:i:o:a:t:D:A:R:P:B:e:Q:X:I:w:", long_options, (int *)0)) != EOF) { switch (c) @@ -369,6 +371,9 @@ hVqvdksxmNWrHSLcFbEY:g:T:U:O:l:n:i:o:a:t:D:A:R:P:B:e:Q:X:I:w:", case 'k': setval ("convertlinks", "on"); break; + case 'K': + setval ("backupconverted", "on"); + break; case 'L': setval ("relativeonly", "on"); break; diff --git a/src/options.h b/src/options.h index 004cfb0d..cd53f8e5 100644 --- a/src/options.h +++ b/src/options.h @@ -113,7 +113,9 @@ struct options #endif /* DEBUG */ int timestamping; /* Whether to use time-stamping. */ - int backups; /* Are backups made? */ + + int backup_converted; /* Do we save pre-converted files as *.orig? */ + int backups; /* Are numeric backups made? */ char *useragent; /* Naughty User-Agent, which can be set to something other than diff --git a/src/url.c b/src/url.c index f8bd9673..87a97b30 100644 --- a/src/url.c +++ b/src/url.c @@ -1366,9 +1366,10 @@ no_proxy_match (const char *host, const char **no_proxy) void convert_links (const char *file, urlpos *l) { - FILE *fp; - char *buf, *p, *p2; - long size; + FILE *fp; + char *buf, *p, *p2; + long size; + static slist* converted_files = NULL; logprintf (LOG_VERBOSE, _("Converting %s... "), file); /* Read from the file.... */ @@ -1382,6 +1383,54 @@ convert_links (const char *file, urlpos *l) /* ...to a buffer. */ load_file (fp, &buf, &size); fclose (fp); + if (opt.backup_converted) + /* Rather than just writing over the original .html file with the converted + version, save the former to *.orig. */ + { + /* Construct the backup filename as the original name plus ".orig". */ + size_t filename_len = strlen(file); + char* filename_plus_orig_suffix = malloc(filename_len + sizeof(".orig")); + int already_wrote_backup_file = 0; + slist* converted_file_ptr; + + strcpy(filename_plus_orig_suffix, file); + strcpy(filename_plus_orig_suffix + filename_len, ".orig"); + + /* We can get called twice on the same URL thanks to the + convert_all_links() call in main(). If we write the .orig file each + time in such a case, it'll end up containing the first-pass conversion, + not the original file. */ + converted_file_ptr = converted_files; + while (converted_file_ptr != NULL) + if (strcmp(converted_file_ptr->string, file) == 0) + { + already_wrote_backup_file = 1; + break; + } + else + converted_file_ptr = converted_file_ptr->next; + + if (!already_wrote_backup_file) + { + /* Rename to .orig before former gets written over. */ + if (rename(file, filename_plus_orig_suffix) != 0) + logprintf (LOG_NOTQUIET, _("Cannot back up %s as %s: %s\n"), + file, filename_plus_orig_suffix, strerror (errno)); + + /* Remember that we've already written a .orig backup for this file. + Note that we never free this memory since we need it till the + convert_all_links() call, which is one of the last things the + program does before terminating. BTW, I'm not sure if it would be + safe to just set converted_file_ptr->string to file below, rather + than making a copy of the string... */ + converted_file_ptr = malloc(sizeof(slist)); + converted_file_ptr->string = strdup(file); + converted_file_ptr->next = converted_files; + converted_files = converted_file_ptr; + } + + free(filename_plus_orig_suffix); + } /* Now open the file for writing. */ fp = fopen (file, "wb"); if (!fp) diff --git a/src/url.h b/src/url.h index 5c7df97e..771f17a4 100644 --- a/src/url.h +++ b/src/url.h @@ -58,7 +58,7 @@ typedef struct _urlpos char *url; /* URL */ char *local_name; /* Local file to which it was saved */ enum uflags flags; /* Various flags */ - int pos, size; /* Rekative position in the buffer */ + int pos, size; /* Relative position in the buffer */ struct _urlpos *next; /* Next struct in list */ } urlpos;