1
0
mirror of https://github.com/moparisthebest/wget synced 2024-07-03 16:38:41 -04:00

Trivial fixes for C89 compliancy

This commit is contained in:
Tim Rühsen 2014-11-20 09:51:35 +01:00
parent f9646a0c14
commit 1356e90a14
12 changed files with 129 additions and 92 deletions

View File

@ -518,12 +518,12 @@ check_domain_match (const char *cookie_domain, const char *host)
{ {
#ifdef HAVE_LIBPSL #ifdef HAVE_LIBPSL
DEBUGP (("cdm: 1"));
char *cookie_domain_lower = NULL; char *cookie_domain_lower = NULL;
char *host_lower = NULL; char *host_lower = NULL;
const psl_ctx_t *psl; const psl_ctx_t *psl;
int is_acceptable; int is_acceptable;
DEBUGP (("cdm: 1"));
if (!(psl = psl_builtin())) if (!(psl = psl_builtin()))
{ {
DEBUGP (("\nlibpsl not built with a public suffix list. " DEBUGP (("\nlibpsl not built with a public suffix list. "

View File

@ -963,16 +963,18 @@ ftp_list (int csock, const char *file, bool avoid_list_a, bool avoid_list,
bool ok = false; bool ok = false;
size_t i = 0; size_t i = 0;
*list_a_used = false;
/* 2013-10-12 Andrea Urbani (matfanjol) /* 2013-10-12 Andrea Urbani (matfanjol)
For more information about LIST and "LIST -a" please look at ftp.c, For more information about LIST and "LIST -a" please look at ftp.c,
function getftp, text "__LIST_A_EXPLANATION__". function getftp, text "__LIST_A_EXPLANATION__".
If somebody changes the following commands, please, checks also the If somebody changes the following commands, please, checks also the
later "i" variable. */ later "i" variable. */
const char *list_commands[] = { "LIST -a", static const char *list_commands[] = {
"LIST" }; "LIST -a",
"LIST"
};
*list_a_used = false;
if (avoid_list_a) if (avoid_list_a)
{ {

View File

@ -2221,9 +2221,9 @@ has_insecure_name_p (const char *s)
static bool static bool
is_invalid_entry (struct fileinfo *f) is_invalid_entry (struct fileinfo *f)
{ {
struct fileinfo *cur; struct fileinfo *cur = f;
cur = f;
char *f_name = f->name; char *f_name = f->name;
/* If the node we're currently checking has a duplicate later, we eliminate /* If the node we're currently checking has a duplicate later, we eliminate
* the current node and leave the next one intact. */ * the current node and leave the next one intact. */
while (cur->next) while (cur->next)

View File

@ -122,9 +122,10 @@ ssl_init (void)
while ((dent = readdir (dir)) != NULL) while ((dent = readdir (dir)) != NULL)
{ {
struct stat st; struct stat st;
char ca_file[dirlen + strlen(dent->d_name) + 2]; size_t ca_file_length = dirlen + strlen(dent->d_name) + 2;
char *ca_file = alloca(ca_file_length);
snprintf (ca_file, sizeof(ca_file), "%s/%s", ca_directory, dent->d_name); snprintf (ca_file, ca_file_length, "%s/%s", ca_directory, dent->d_name);
if (stat (ca_file, &st) != 0) if (stat (ca_file, &st) != 0)
continue; continue;
@ -432,9 +433,10 @@ ssl_connect_wget (int fd, const char *hostname)
struct wgnutls_transport_context *ctx; struct wgnutls_transport_context *ctx;
gnutls_session_t session; gnutls_session_t session;
int err,alert; int err,alert;
gnutls_init (&session, GNUTLS_CLIENT);
const char *str; const char *str;
gnutls_init (&session, GNUTLS_CLIENT);
/* We set the server name but only if it's not an IP address. */ /* We set the server name but only if it's not an IP address. */
if (! is_valid_ip_address (hostname)) if (! is_valid_ip_address (hostname))
{ {

View File

@ -592,7 +592,7 @@ cache_query (const char *host)
al = hash_table_get (host_name_addresses_map, host); al = hash_table_get (host_name_addresses_map, host);
if (al) if (al)
{ {
DEBUGP (("Found %s in host_name_addresses_map (%p)\n", host, al)); DEBUGP (("Found %s in host_name_addresses_map (%p)\n", host, (void *) al));
++al->refcount; ++al->refcount;
return al; return al;
} }

View File

@ -788,6 +788,7 @@ get_urls_file (const char *file)
{ {
int up_error_code; int up_error_code;
char *url_text; char *url_text;
char *new_url;
struct urlpos *entry; struct urlpos *entry;
struct url *url; struct url *url;
@ -822,7 +823,7 @@ get_urls_file (const char *file)
url_text = merged; url_text = merged;
} }
char *new_url = rewrite_shorthand_url (url_text); new_url = rewrite_shorthand_url (url_text);
if (new_url) if (new_url)
{ {
xfree (url_text); xfree (url_text);

View File

@ -1532,6 +1532,7 @@ read_response_body (struct http_stat *hs, int sock, FILE *fp, wgint contlen,
int warc_payload_offset = 0; int warc_payload_offset = 0;
FILE *warc_tmp = NULL; FILE *warc_tmp = NULL;
int warcerr = 0; int warcerr = 0;
int flags = 0;
if (opt.warc_filename != NULL) if (opt.warc_filename != NULL)
{ {
@ -1568,7 +1569,6 @@ read_response_body (struct http_stat *hs, int sock, FILE *fp, wgint contlen,
} }
/* Read the response body. */ /* Read the response body. */
int flags = 0;
if (contlen != -1) if (contlen != -1)
/* If content-length is present, read that much; otherwise, read /* If content-length is present, read that much; otherwise, read
until EOF. The HTTP spec doesn't require the server to until EOF. The HTTP spec doesn't require the server to
@ -2147,11 +2147,13 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy,
write_error = fd_write (sock, opt.body_data, body_data_size, -1); write_error = fd_write (sock, opt.body_data, body_data_size, -1);
if (write_error >= 0 && warc_tmp != NULL) if (write_error >= 0 && warc_tmp != NULL)
{ {
int warc_tmp_written;
/* Remember end of headers / start of payload. */ /* Remember end of headers / start of payload. */
warc_payload_offset = ftello (warc_tmp); warc_payload_offset = ftello (warc_tmp);
/* Write a copy of the data to the WARC record. */ /* Write a copy of the data to the WARC record. */
int warc_tmp_written = fwrite (opt.body_data, 1, body_data_size, warc_tmp); warc_tmp_written = fwrite (opt.body_data, 1, body_data_size, warc_tmp);
if (warc_tmp_written != body_data_size) if (warc_tmp_written != body_data_size)
write_error = -2; write_error = -2;
} }
@ -2334,6 +2336,7 @@ read_header:
if (statcode == HTTP_STATUS_UNAUTHORIZED) if (statcode == HTTP_STATUS_UNAUTHORIZED)
{ {
/* Authorization is required. */ /* Authorization is required. */
uerr_t auth_err = RETROK;
/* Normally we are not interested in the response body. /* Normally we are not interested in the response body.
But if we are writing a WARC file we are: we like to keep everyting. */ But if we are writing a WARC file we are: we like to keep everyting. */
@ -2371,7 +2374,6 @@ read_header:
} }
pconn.authorized = false; pconn.authorized = false;
uerr_t auth_err = RETROK;
if (!auth_finished && (user && passwd)) if (!auth_finished && (user && passwd))
{ {
/* IIS sends multiple copies of WWW-Authenticate, one with /* IIS sends multiple copies of WWW-Authenticate, one with
@ -3864,7 +3866,7 @@ digest_authentication_encode (const char *au, const char *user,
snprintf (cnonce, sizeof (cnonce), "%08x", random_number(INT_MAX)); snprintf (cnonce, sizeof (cnonce), "%08x", random_number(INT_MAX));
md5_init_ctx (&ctx); md5_init_ctx (&ctx);
// md5_process_bytes (hash, MD5_DIGEST_SIZE, &ctx); /* md5_process_bytes (hash, MD5_DIGEST_SIZE, &ctx); */
md5_process_bytes (a1buf, MD5_DIGEST_SIZE * 2, &ctx); md5_process_bytes (a1buf, MD5_DIGEST_SIZE * 2, &ctx);
md5_process_bytes ((unsigned char *)":", 1, &ctx); md5_process_bytes ((unsigned char *)":", 1, &ctx);
md5_process_bytes ((unsigned char *)nonce, strlen (nonce), &ctx); md5_process_bytes ((unsigned char *)nonce, strlen (nonce), &ctx);

View File

@ -1010,18 +1010,21 @@ char *program_argstring; /* Needed by wget_warc.c. */
int int
main (int argc, char **argv) main (int argc, char **argv)
{ {
char **url, **t; char **url, **t, *p;
int i, ret, longindex; int i, ret, longindex;
int nurl; int nurl;
int retconf;
int argstring_length;
bool use_userconfig = false;
bool noconfig = false;
bool append_to_log = false; bool append_to_log = false;
total_downloaded_bytes = 0;
program_name = argv[0];
struct ptimer *timer = ptimer_new (); struct ptimer *timer = ptimer_new ();
double start_time = ptimer_measure (timer); double start_time = ptimer_measure (timer);
total_downloaded_bytes = 0;
program_name = argv[0];
i18n_initialize (); i18n_initialize ();
/* Construct the name of the executable, without the directory part. */ /* Construct the name of the executable, without the directory part. */
@ -1042,10 +1045,9 @@ main (int argc, char **argv)
#endif #endif
/* Construct the arguments string. */ /* Construct the arguments string. */
int argstring_length = 1; for (argstring_length = 1, i = 1; i < argc; i++)
for (i = 1; i < argc; i++)
argstring_length += strlen (argv[i]) + 2 + 1; argstring_length += strlen (argv[i]) + 2 + 1;
char *p = program_argstring = malloc (argstring_length * sizeof (char)); p = program_argstring = malloc (argstring_length * sizeof (char));
if (p == NULL) if (p == NULL)
{ {
fprintf (stderr, _("Memory allocation problem\n")); fprintf (stderr, _("Memory allocation problem\n"));
@ -1053,8 +1055,10 @@ main (int argc, char **argv)
} }
for (i = 1; i < argc; i++) for (i = 1; i < argc; i++)
{ {
int arglen;
*p++ = '"'; *p++ = '"';
int arglen = strlen (argv[i]); arglen = strlen (argv[i]);
memcpy (p, argv[i], arglen); memcpy (p, argv[i], arglen);
p += arglen; p += arglen;
*p++ = '"'; *p++ = '"';
@ -1070,9 +1074,6 @@ main (int argc, char **argv)
/* This separate getopt_long is needed to find the user config file /* This separate getopt_long is needed to find the user config file
option ("--config") and parse it before the other user options. */ option ("--config") and parse it before the other user options. */
longindex = -1; longindex = -1;
int retconf;
bool use_userconfig = false;
bool noconfig = false;
while ((retconf = getopt_long (argc, argv, while ((retconf = getopt_long (argc, argv,
short_options, long_options, &longindex)) != -1) short_options, long_options, &longindex)) != -1)
@ -1731,10 +1732,11 @@ outputting to a regular file.\n"));
total_downloaded_bytes != 0) total_downloaded_bytes != 0)
{ {
double end_time = ptimer_measure (timer); double end_time = ptimer_measure (timer);
ptimer_destroy (timer);
char *wall_time = xstrdup (secs_to_human_time (end_time - start_time)); char *wall_time = xstrdup (secs_to_human_time (end_time - start_time));
char *download_time = xstrdup (secs_to_human_time (total_download_time)); char *download_time = xstrdup (secs_to_human_time (total_download_time));
ptimer_destroy (timer);
logprintf (LOG_NOTQUIET, logprintf (LOG_NOTQUIET,
_("FINISHED --%s--\nTotal wall clock time: %s\n" _("FINISHED --%s--\nTotal wall clock time: %s\n"
"Downloaded: %d files, %s in %s (%s)\n"), "Downloaded: %d files, %s in %s (%s)\n"),

View File

@ -169,6 +169,8 @@ static int ssl_true_initialized = 0;
bool bool
ssl_init (void) ssl_init (void)
{ {
SSL_METHOD const *meth;
#if OPENSSL_VERSION_NUMBER >= 0x00907000 #if OPENSSL_VERSION_NUMBER >= 0x00907000
if (ssl_true_initialized == 0) if (ssl_true_initialized == 0)
{ {
@ -177,8 +179,6 @@ ssl_init (void)
} }
#endif #endif
SSL_METHOD const *meth;
if (ssl_ctx) if (ssl_ctx)
/* The SSL has already been initialized. */ /* The SSL has already been initialized. */
return true; return true;

View File

@ -946,6 +946,8 @@ create_image (struct bar_progress *bp, double dl_total_time, bool done)
/* The difference between the number of bytes used, /* The difference between the number of bytes used,
and the number of columns used. */ and the number of columns used. */
int bytes_cols_diff = 0; int bytes_cols_diff = 0;
int cols_diff;
const char *down_size;
if (progress_size < 5) if (progress_size < 5)
progress_size = 0; progress_size = 0;
@ -963,6 +965,7 @@ create_image (struct bar_progress *bp, double dl_total_time, bool done)
int offset_cols; int offset_cols;
int bytes_in_filename, offset_bytes, col; int bytes_in_filename, offset_bytes, col;
int *cols_ret = &col; int *cols_ret = &col;
int padding;
if (((orig_filename_cols > MAX_FILENAME_COLS) && !opt.noscroll) && !done) if (((orig_filename_cols > MAX_FILENAME_COLS) && !opt.noscroll) && !done)
offset_cols = ((int) bp->tick) % (orig_filename_cols - MAX_FILENAME_COLS + 1); offset_cols = ((int) bp->tick) % (orig_filename_cols - MAX_FILENAME_COLS + 1);
@ -972,7 +975,7 @@ create_image (struct bar_progress *bp, double dl_total_time, bool done)
bytes_in_filename = cols_to_bytes (bp->f_download + offset_bytes, MAX_FILENAME_COLS, cols_ret); bytes_in_filename = cols_to_bytes (bp->f_download + offset_bytes, MAX_FILENAME_COLS, cols_ret);
memcpy (p, bp->f_download + offset_bytes, bytes_in_filename); memcpy (p, bp->f_download + offset_bytes, bytes_in_filename);
p += bytes_in_filename; p += bytes_in_filename;
int padding = MAX_FILENAME_COLS - *cols_ret; padding = MAX_FILENAME_COLS - *cols_ret;
for (;padding;padding--) for (;padding;padding--)
*p++ = ' '; *p++ = ' ';
*p++ = ' '; *p++ = ' ';
@ -1055,8 +1058,8 @@ create_image (struct bar_progress *bp, double dl_total_time, bool done)
++bp->tick; ++bp->tick;
/* " 234.56M" */ /* " 234.56M" */
const char * down_size = human_readable (size, 1000, 2); down_size = human_readable (size, 1000, 2);
int cols_diff = 7 - count_cols (down_size); cols_diff = 7 - count_cols (down_size);
while (cols_diff > 0) while (cols_diff > 0)
{ {
*p++=' '; *p++=' ';

View File

@ -378,8 +378,10 @@ fd_read_body (const char *downloaded_filename, int fd, FILE *out, wgint toread,
if (ret > 0) if (ret > 0)
{ {
int write_res;
sum_read += ret; sum_read += ret;
int write_res = write_data (out, out2, dlbuf, ret, &skip, &sum_written); write_res = write_data (out, out2, dlbuf, ret, &skip, &sum_written);
if (write_res < 0) if (write_res < 0)
{ {
ret = (write_res == -3) ? -3 : -2; ret = (write_res == -3) ? -3 : -2;
@ -1056,7 +1058,7 @@ retrieve_from_file (const char *file, bool html, int *count)
for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count) for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count)
{ {
char *filename = NULL, *new_file = NULL; char *filename = NULL, *new_file = NULL, *proxy;
int dt; int dt;
struct iri *tmpiri = iri_dup (iri); struct iri *tmpiri = iri_dup (iri);
struct url *parsed_url = NULL; struct url *parsed_url = NULL;
@ -1072,7 +1074,7 @@ retrieve_from_file (const char *file, bool html, int *count)
parsed_url = url_parse (cur_url->url->url, NULL, tmpiri, true); parsed_url = url_parse (cur_url->url->url, NULL, tmpiri, true);
char *proxy = getproxy (cur_url->url); proxy = getproxy (cur_url->url);
if ((opt.recursive || opt.page_requisites) if ((opt.recursive || opt.page_requisites)
&& (cur_url->url->scheme != SCHEME_FTP || proxy)) && (cur_url->url->scheme != SCHEME_FTP || proxy))
{ {
@ -1285,9 +1287,11 @@ bool
url_uses_proxy (struct url * u) url_uses_proxy (struct url * u)
{ {
bool ret; bool ret;
char *proxy;
if (!u) if (!u)
return false; return false;
char *proxy = getproxy (u); proxy = getproxy (u);
ret = proxy != NULL; ret = proxy != NULL;
free(proxy); free(proxy);
return ret; return ret;

View File

@ -165,10 +165,12 @@ warc_write_buffer (const char *buffer, size_t size)
static bool static bool
warc_write_string (const char *str) warc_write_string (const char *str)
{ {
size_t n;
if (!warc_write_ok) if (!warc_write_ok)
return false; return false;
size_t n = strlen (str); n = strlen (str);
if (n != warc_write_buffer (str, n)) if (n != warc_write_buffer (str, n))
warc_write_ok = false; warc_write_ok = false;
@ -257,6 +259,9 @@ warc_write_block_from_file (FILE *data_in)
{ {
/* Add the Content-Length header. */ /* Add the Content-Length header. */
char content_length[MAX_INT_TO_STRING_LEN(off_t)]; char content_length[MAX_INT_TO_STRING_LEN(off_t)];
char buffer[BUFSIZ];
size_t s;
fseeko (data_in, 0L, SEEK_END); fseeko (data_in, 0L, SEEK_END);
number_to_string (content_length, ftello (data_in)); number_to_string (content_length, ftello (data_in));
warc_write_header ("Content-Length", content_length); warc_write_header ("Content-Length", content_length);
@ -268,8 +273,6 @@ warc_write_block_from_file (FILE *data_in)
warc_write_ok = false; warc_write_ok = false;
/* Copy the data in the file to the WARC record. */ /* Copy the data in the file to the WARC record. */
char buffer[BUFSIZ];
size_t s;
while (warc_write_ok && (s = fread (buffer, 1, BUFSIZ, data_in)) > 0) while (warc_write_ok && (s = fread (buffer, 1, BUFSIZ, data_in)) > 0)
{ {
if (warc_write_buffer (buffer, s) < s) if (warc_write_buffer (buffer, s) < s)
@ -294,6 +297,11 @@ warc_write_end_record (void)
/* We start a new gzip stream for each record. */ /* We start a new gzip stream for each record. */
if (warc_write_ok && warc_current_gzfile) if (warc_write_ok && warc_current_gzfile)
{ {
char extra_header[EXTRA_GZIP_HEADER_SIZE];
char static_header[GZIP_STATIC_HEADER_SIZE];
off_t current_offset, uncompressed_size, compressed_size;
size_t result;
if (gzclose (warc_current_gzfile) != Z_OK) if (gzclose (warc_current_gzfile) != Z_OK)
{ {
warc_write_ok = false; warc_write_ok = false;
@ -319,17 +327,16 @@ warc_write_end_record (void)
*/ */
/* Calculate the uncompressed and compressed sizes. */ /* Calculate the uncompressed and compressed sizes. */
off_t current_offset = ftello (warc_current_file); current_offset = ftello (warc_current_file);
off_t uncompressed_size = current_offset - warc_current_gzfile_offset; uncompressed_size = current_offset - warc_current_gzfile_offset;
off_t compressed_size = warc_current_gzfile_uncompressed_size; compressed_size = warc_current_gzfile_uncompressed_size;
/* Go back to the static GZIP header. */ /* Go back to the static GZIP header. */
fseeko (warc_current_file, warc_current_gzfile_offset fseeko (warc_current_file, warc_current_gzfile_offset
+ EXTRA_GZIP_HEADER_SIZE, SEEK_SET); + EXTRA_GZIP_HEADER_SIZE, SEEK_SET);
/* Read the header. */ /* Read the header. */
char static_header[GZIP_STATIC_HEADER_SIZE]; result = fread (static_header, 1, GZIP_STATIC_HEADER_SIZE,
size_t result = fread (static_header, 1, GZIP_STATIC_HEADER_SIZE,
warc_current_file); warc_current_file);
if (result != GZIP_STATIC_HEADER_SIZE) if (result != GZIP_STATIC_HEADER_SIZE)
{ {
@ -346,7 +353,6 @@ warc_write_end_record (void)
fwrite (static_header, 1, GZIP_STATIC_HEADER_SIZE, warc_current_file); fwrite (static_header, 1, GZIP_STATIC_HEADER_SIZE, warc_current_file);
/* Prepare the extra GZIP header. */ /* Prepare the extra GZIP header. */
char extra_header[EXTRA_GZIP_HEADER_SIZE];
/* XLEN, the length of the extra header fields. */ /* XLEN, the length of the extra header fields. */
extra_header[0] = ((EXTRA_GZIP_HEADER_SIZE - 2) & 255); extra_header[0] = ((EXTRA_GZIP_HEADER_SIZE - 2) & 255);
extra_header[1] = ((EXTRA_GZIP_HEADER_SIZE - 2) >> 8) & 255; extra_header[1] = ((EXTRA_GZIP_HEADER_SIZE - 2) >> 8) & 255;
@ -660,16 +666,18 @@ warc_uuid_str (char *urn_str)
static bool static bool
warc_write_warcinfo_record (char *filename) warc_write_warcinfo_record (char *filename)
{ {
FILE *warc_tmp;
char timestamp[22];
char *filename_copy, *filename_basename;
/* Write warc-info record as the first record of the file. */ /* Write warc-info record as the first record of the file. */
/* We add the record id of this info record to the other records in the /* We add the record id of this info record to the other records in the
file. */ file. */
warc_current_warcinfo_uuid_str = (char *) malloc (48); warc_current_warcinfo_uuid_str = (char *) malloc (48);
warc_uuid_str (warc_current_warcinfo_uuid_str); warc_uuid_str (warc_current_warcinfo_uuid_str);
char timestamp[22];
warc_timestamp (timestamp); warc_timestamp (timestamp);
char *filename_copy, *filename_basename;
filename_copy = strdup (filename); filename_copy = strdup (filename);
filename_basename = strdup (basename (filename_copy)); filename_basename = strdup (basename (filename_copy));
@ -681,7 +689,7 @@ warc_write_warcinfo_record (char *filename)
warc_write_header ("WARC-Filename", filename_basename); warc_write_header ("WARC-Filename", filename_basename);
/* Create content. */ /* Create content. */
FILE *warc_tmp = warc_tempfile (); warc_tmp = warc_tempfile ();
if (warc_tmp == NULL) if (warc_tmp == NULL)
{ {
free (filename_copy); free (filename_copy);
@ -731,22 +739,6 @@ warc_write_warcinfo_record (char *filename)
static bool static bool
warc_start_new_file (bool meta) warc_start_new_file (bool meta)
{ {
if (opt.warc_filename == NULL)
return false;
if (warc_current_file != NULL)
fclose (warc_current_file);
free (warc_current_warcinfo_uuid_str);
free (warc_current_filename);
warc_current_file_number++;
int base_filename_length = strlen (opt.warc_filename);
/* filename format: base + "-" + 5 digit serial number + ".warc.gz" */
char *new_filename = malloc (base_filename_length + 1 + 5 + 8 + 1);
warc_current_filename = new_filename;
#ifdef __VMS #ifdef __VMS
# define WARC_GZ "warc-gz" # define WARC_GZ "warc-gz"
#else /* def __VMS */ #else /* def __VMS */
@ -759,6 +751,25 @@ warc_start_new_file (bool meta)
const char *extension = "warc"; const char *extension = "warc";
#endif #endif
int base_filename_length;
char *new_filename;
if (opt.warc_filename == NULL)
return false;
if (warc_current_file != NULL)
fclose (warc_current_file);
free (warc_current_warcinfo_uuid_str);
free (warc_current_filename);
warc_current_file_number++;
base_filename_length = strlen (opt.warc_filename);
/* filename format: base + "-" + 5 digit serial number + ".warc.gz" */
new_filename = malloc (base_filename_length + 1 + 5 + 8 + 1);
warc_current_filename = new_filename;
/* If max size is enabled, we add a serial number to the file names. */ /* If max size is enabled, we add a serial number to the file names. */
if (meta) if (meta)
sprintf (new_filename, "%s-meta.%s", opt.warc_filename, extension); sprintf (new_filename, "%s-meta.%s", opt.warc_filename, extension);
@ -830,12 +841,13 @@ static bool
warc_parse_cdx_header (char *lineptr, int *field_num_original_url, warc_parse_cdx_header (char *lineptr, int *field_num_original_url,
int *field_num_checksum, int *field_num_record_id) int *field_num_checksum, int *field_num_record_id)
{ {
char *token;
char *save_ptr;
*field_num_original_url = -1; *field_num_original_url = -1;
*field_num_checksum = -1; *field_num_checksum = -1;
*field_num_record_id = -1; *field_num_record_id = -1;
char *token;
char *save_ptr;
token = strtok_r (lineptr, CDX_FIELDSEP, &save_ptr); token = strtok_r (lineptr, CDX_FIELDSEP, &save_ptr);
if (token != NULL && strcmp (token, "CDX") == 0) if (token != NULL && strcmp (token, "CDX") == 0)
@ -876,13 +888,12 @@ warc_process_cdx_line (char *lineptr, int field_num_original_url,
char *original_url = NULL; char *original_url = NULL;
char *checksum = NULL; char *checksum = NULL;
char *record_id = NULL; char *record_id = NULL;
char *token; char *token;
char *save_ptr; char *save_ptr;
token = strtok_r (lineptr, CDX_FIELDSEP, &save_ptr); int field_num = 0;
/* Read this line to get the fields we need. */ /* Read this line to get the fields we need. */
int field_num = 0; token = strtok_r (lineptr, CDX_FIELDSEP, &save_ptr);
while (token != NULL) while (token != NULL)
{ {
char **val; char **val;
@ -944,17 +955,17 @@ warc_process_cdx_line (char *lineptr, int field_num_original_url,
static bool static bool
warc_load_cdx_dedup_file (void) warc_load_cdx_dedup_file (void)
{ {
FILE *f = fopen (opt.warc_cdx_dedup_filename, "r"); FILE *f;
if (f == NULL) char *lineptr = NULL;
return false; size_t n = 0;
ssize_t line_length;
int field_num_original_url = -1; int field_num_original_url = -1;
int field_num_checksum = -1; int field_num_checksum = -1;
int field_num_record_id = -1; int field_num_record_id = -1;
char *lineptr = NULL; f = fopen (opt.warc_cdx_dedup_filename, "r");
size_t n = 0; if (f == NULL)
ssize_t line_length; return false;
/* The first line should contain the CDX header. /* The first line should contain the CDX header.
Format: " CDX x x x x x" Format: " CDX x x x x x"
@ -983,6 +994,8 @@ _("CDX file does not list record ids. (Missing column 'u'.)\n"));
} }
else else
{ {
int nrecords;
/* Initialize the table. */ /* Initialize the table. */
warc_cdx_dedup_table = hash_table_new (1000, warc_hash_sha1_digest, warc_cdx_dedup_table = hash_table_new (1000, warc_hash_sha1_digest,
warc_cmp_sha1_digest); warc_cmp_sha1_digest);
@ -1000,7 +1013,7 @@ _("CDX file does not list record ids. (Missing column 'u'.)\n"));
while (line_length != -1); while (line_length != -1);
/* Print results. */ /* Print results. */
int nrecords = hash_table_count (warc_cdx_dedup_table); nrecords = hash_table_count (warc_cdx_dedup_table);
logprintf (LOG_VERBOSE, ngettext ("Loaded %d record from CDX.\n\n", logprintf (LOG_VERBOSE, ngettext ("Loaded %d record from CDX.\n\n",
"Loaded %d records from CDX.\n\n", "Loaded %d records from CDX.\n\n",
nrecords), nrecords),
@ -1020,11 +1033,12 @@ _("CDX file does not list record ids. (Missing column 'u'.)\n"));
static struct warc_cdx_record * static struct warc_cdx_record *
warc_find_duplicate_cdx_record (char *url, char *sha1_digest_payload) warc_find_duplicate_cdx_record (char *url, char *sha1_digest_payload)
{ {
struct warc_cdx_record *rec_existing;
if (warc_cdx_dedup_table == NULL) if (warc_cdx_dedup_table == NULL)
return NULL; return NULL;
struct warc_cdx_record *rec_existing rec_existing = hash_table_get (warc_cdx_dedup_table, sha1_digest_payload);
= hash_table_get (warc_cdx_dedup_table, sha1_digest_payload);
if (rec_existing && strcmp (rec_existing->url, url) == 0) if (rec_existing && strcmp (rec_existing->url, url) == 0)
return rec_existing; return rec_existing;
@ -1095,11 +1109,13 @@ warc_init (void)
static void static void
warc_write_metadata (void) warc_write_metadata (void)
{ {
char manifest_uuid[48];
FILE *warc_tmp_fp;
/* If there are multiple WARC files, the metadata should be written to a separate file. */ /* If there are multiple WARC files, the metadata should be written to a separate file. */
if (opt.warc_maxsize > 0) if (opt.warc_maxsize > 0)
warc_start_new_file (true); warc_start_new_file (true);
char manifest_uuid [48];
warc_uuid_str (manifest_uuid); warc_uuid_str (manifest_uuid);
fflush (warc_manifest_fp); fflush (warc_manifest_fp);
@ -1109,7 +1125,7 @@ warc_write_metadata (void)
warc_manifest_fp, -1); warc_manifest_fp, -1);
/* warc_write_resource_record has closed warc_manifest_fp. */ /* warc_write_resource_record has closed warc_manifest_fp. */
FILE * warc_tmp_fp = warc_tempfile (); warc_tmp_fp = warc_tempfile ();
if (warc_tmp_fp == NULL) if (warc_tmp_fp == NULL)
{ {
logprintf (LOG_NOTQUIET, _("Could not open temporary WARC file.\n")); logprintf (LOG_NOTQUIET, _("Could not open temporary WARC file.\n"));
@ -1164,6 +1180,8 @@ FILE *
warc_tempfile (void) warc_tempfile (void)
{ {
char filename[100]; char filename[100];
int fd;
if (path_search (filename, 100, opt.warc_tempdir, "wget", true) == -1) if (path_search (filename, 100, opt.warc_tempdir, "wget", true) == -1)
return NULL; return NULL;
@ -1182,7 +1200,7 @@ warc_tempfile (void)
return fopen (tfn, "w+", "fop=tmd"); /* Create auto-delete temp file. */ return fopen (tfn, "w+", "fop=tmd"); /* Create auto-delete temp file. */
} }
#else /* def __VMS */ #else /* def __VMS */
int fd = mkostemp (filename, O_TEMPORARY); fd = mkostemp (filename, O_TEMPORARY);
if (fd < 0) if (fd < 0)
return NULL; return NULL;
@ -1246,6 +1264,9 @@ warc_write_cdx_record (const char *url, const char *timestamp_str,
{ {
/* Transform the timestamp. */ /* Transform the timestamp. */
char timestamp_str_cdx[15]; char timestamp_str_cdx[15];
char offset_string[MAX_INT_TO_STRING_LEN(off_t)];
const char *checksum;
memcpy (timestamp_str_cdx , timestamp_str , 4); /* "YYYY" "-" */ memcpy (timestamp_str_cdx , timestamp_str , 4); /* "YYYY" "-" */
memcpy (timestamp_str_cdx + 4, timestamp_str + 5, 2); /* "mm" "-" */ memcpy (timestamp_str_cdx + 4, timestamp_str + 5, 2); /* "mm" "-" */
memcpy (timestamp_str_cdx + 6, timestamp_str + 8, 2); /* "dd" "T" */ memcpy (timestamp_str_cdx + 6, timestamp_str + 8, 2); /* "dd" "T" */
@ -1255,7 +1276,6 @@ warc_write_cdx_record (const char *url, const char *timestamp_str,
timestamp_str_cdx[14] = '\0'; timestamp_str_cdx[14] = '\0';
/* Rewrite the checksum. */ /* Rewrite the checksum. */
const char *checksum;
if (payload_digest != NULL) if (payload_digest != NULL)
checksum = payload_digest + 5; /* Skip the "sha1:" */ checksum = payload_digest + 5; /* Skip the "sha1:" */
else else
@ -1266,7 +1286,6 @@ warc_write_cdx_record (const char *url, const char *timestamp_str,
if (redirect_location == NULL || strlen(redirect_location) == 0) if (redirect_location == NULL || strlen(redirect_location) == 0)
redirect_location = "-"; redirect_location = "-";
char offset_string[MAX_INT_TO_STRING_LEN(off_t)];
number_to_string (offset_string, offset); number_to_string (offset_string, offset);
/* Print the CDX line. */ /* Print the CDX line. */
@ -1298,10 +1317,11 @@ warc_write_revisit_record (char *url, char *timestamp_str,
char *refers_to, ip_address *ip, FILE *body) char *refers_to, ip_address *ip, FILE *body)
{ {
char revisit_uuid [48]; char revisit_uuid [48];
warc_uuid_str (revisit_uuid);
char *block_digest = NULL; char *block_digest = NULL;
char sha1_res_block[SHA1_DIGEST_SIZE]; char sha1_res_block[SHA1_DIGEST_SIZE];
warc_uuid_str (revisit_uuid);
sha1_stream (body, sha1_res_block); sha1_stream (body, sha1_res_block);
block_digest = warc_base32_sha1_digest (sha1_res_block); block_digest = warc_base32_sha1_digest (sha1_res_block);
@ -1351,6 +1371,8 @@ warc_write_response_record (char *url, char *timestamp_str,
char *payload_digest = NULL; char *payload_digest = NULL;
char sha1_res_block[SHA1_DIGEST_SIZE]; char sha1_res_block[SHA1_DIGEST_SIZE];
char sha1_res_payload[SHA1_DIGEST_SIZE]; char sha1_res_payload[SHA1_DIGEST_SIZE];
char response_uuid [48];
off_t offset;
if (opt.warc_digests_enabled) if (opt.warc_digests_enabled)
{ {
@ -1395,11 +1417,10 @@ warc_write_response_record (char *url, char *timestamp_str,
/* Not a revisit, just store the record. */ /* Not a revisit, just store the record. */
char response_uuid [48];
warc_uuid_str (response_uuid); warc_uuid_str (response_uuid);
fseeko (warc_current_file, 0L, SEEK_END); fseeko (warc_current_file, 0L, SEEK_END);
off_t offset = ftello (warc_current_file); offset = ftello (warc_current_file);
warc_write_start_record (); warc_write_start_record ();
warc_write_header ("WARC-Type", "response"); warc_write_header ("WARC-Type", "response");