mirror of
https://github.com/moparisthebest/wget
synced 2024-07-03 16:38:41 -04:00
Trivial fixes for C89 compliancy
This commit is contained in:
parent
f9646a0c14
commit
1356e90a14
@ -518,12 +518,12 @@ check_domain_match (const char *cookie_domain, const char *host)
|
||||
{
|
||||
|
||||
#ifdef HAVE_LIBPSL
|
||||
DEBUGP (("cdm: 1"));
|
||||
char *cookie_domain_lower = NULL;
|
||||
char *host_lower = NULL;
|
||||
const psl_ctx_t *psl;
|
||||
int is_acceptable;
|
||||
|
||||
DEBUGP (("cdm: 1"));
|
||||
if (!(psl = psl_builtin()))
|
||||
{
|
||||
DEBUGP (("\nlibpsl not built with a public suffix list. "
|
||||
|
@ -963,16 +963,18 @@ ftp_list (int csock, const char *file, bool avoid_list_a, bool avoid_list,
|
||||
bool ok = false;
|
||||
size_t i = 0;
|
||||
|
||||
*list_a_used = false;
|
||||
|
||||
/* 2013-10-12 Andrea Urbani (matfanjol)
|
||||
For more information about LIST and "LIST -a" please look at ftp.c,
|
||||
function getftp, text "__LIST_A_EXPLANATION__".
|
||||
|
||||
If somebody changes the following commands, please, checks also the
|
||||
later "i" variable. */
|
||||
const char *list_commands[] = { "LIST -a",
|
||||
"LIST" };
|
||||
static const char *list_commands[] = {
|
||||
"LIST -a",
|
||||
"LIST"
|
||||
};
|
||||
|
||||
*list_a_used = false;
|
||||
|
||||
if (avoid_list_a)
|
||||
{
|
||||
|
@ -2221,9 +2221,9 @@ has_insecure_name_p (const char *s)
|
||||
static bool
|
||||
is_invalid_entry (struct fileinfo *f)
|
||||
{
|
||||
struct fileinfo *cur;
|
||||
cur = f;
|
||||
struct fileinfo *cur = f;
|
||||
char *f_name = f->name;
|
||||
|
||||
/* If the node we're currently checking has a duplicate later, we eliminate
|
||||
* the current node and leave the next one intact. */
|
||||
while (cur->next)
|
||||
|
@ -122,9 +122,10 @@ ssl_init (void)
|
||||
while ((dent = readdir (dir)) != NULL)
|
||||
{
|
||||
struct stat st;
|
||||
char ca_file[dirlen + strlen(dent->d_name) + 2];
|
||||
size_t ca_file_length = dirlen + strlen(dent->d_name) + 2;
|
||||
char *ca_file = alloca(ca_file_length);
|
||||
|
||||
snprintf (ca_file, sizeof(ca_file), "%s/%s", ca_directory, dent->d_name);
|
||||
snprintf (ca_file, ca_file_length, "%s/%s", ca_directory, dent->d_name);
|
||||
if (stat (ca_file, &st) != 0)
|
||||
continue;
|
||||
|
||||
@ -432,9 +433,10 @@ ssl_connect_wget (int fd, const char *hostname)
|
||||
struct wgnutls_transport_context *ctx;
|
||||
gnutls_session_t session;
|
||||
int err,alert;
|
||||
gnutls_init (&session, GNUTLS_CLIENT);
|
||||
const char *str;
|
||||
|
||||
gnutls_init (&session, GNUTLS_CLIENT);
|
||||
|
||||
/* We set the server name but only if it's not an IP address. */
|
||||
if (! is_valid_ip_address (hostname))
|
||||
{
|
||||
|
@ -592,7 +592,7 @@ cache_query (const char *host)
|
||||
al = hash_table_get (host_name_addresses_map, host);
|
||||
if (al)
|
||||
{
|
||||
DEBUGP (("Found %s in host_name_addresses_map (%p)\n", host, al));
|
||||
DEBUGP (("Found %s in host_name_addresses_map (%p)\n", host, (void *) al));
|
||||
++al->refcount;
|
||||
return al;
|
||||
}
|
||||
|
@ -788,6 +788,7 @@ get_urls_file (const char *file)
|
||||
{
|
||||
int up_error_code;
|
||||
char *url_text;
|
||||
char *new_url;
|
||||
struct urlpos *entry;
|
||||
struct url *url;
|
||||
|
||||
@ -822,7 +823,7 @@ get_urls_file (const char *file)
|
||||
url_text = merged;
|
||||
}
|
||||
|
||||
char *new_url = rewrite_shorthand_url (url_text);
|
||||
new_url = rewrite_shorthand_url (url_text);
|
||||
if (new_url)
|
||||
{
|
||||
xfree (url_text);
|
||||
|
10
src/http.c
10
src/http.c
@ -1532,6 +1532,7 @@ read_response_body (struct http_stat *hs, int sock, FILE *fp, wgint contlen,
|
||||
int warc_payload_offset = 0;
|
||||
FILE *warc_tmp = NULL;
|
||||
int warcerr = 0;
|
||||
int flags = 0;
|
||||
|
||||
if (opt.warc_filename != NULL)
|
||||
{
|
||||
@ -1568,7 +1569,6 @@ read_response_body (struct http_stat *hs, int sock, FILE *fp, wgint contlen,
|
||||
}
|
||||
|
||||
/* Read the response body. */
|
||||
int flags = 0;
|
||||
if (contlen != -1)
|
||||
/* If content-length is present, read that much; otherwise, read
|
||||
until EOF. The HTTP spec doesn't require the server to
|
||||
@ -2147,11 +2147,13 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy,
|
||||
write_error = fd_write (sock, opt.body_data, body_data_size, -1);
|
||||
if (write_error >= 0 && warc_tmp != NULL)
|
||||
{
|
||||
int warc_tmp_written;
|
||||
|
||||
/* Remember end of headers / start of payload. */
|
||||
warc_payload_offset = ftello (warc_tmp);
|
||||
|
||||
/* Write a copy of the data to the WARC record. */
|
||||
int warc_tmp_written = fwrite (opt.body_data, 1, body_data_size, warc_tmp);
|
||||
warc_tmp_written = fwrite (opt.body_data, 1, body_data_size, warc_tmp);
|
||||
if (warc_tmp_written != body_data_size)
|
||||
write_error = -2;
|
||||
}
|
||||
@ -2334,6 +2336,7 @@ read_header:
|
||||
if (statcode == HTTP_STATUS_UNAUTHORIZED)
|
||||
{
|
||||
/* Authorization is required. */
|
||||
uerr_t auth_err = RETROK;
|
||||
|
||||
/* Normally we are not interested in the response body.
|
||||
But if we are writing a WARC file we are: we like to keep everyting. */
|
||||
@ -2371,7 +2374,6 @@ read_header:
|
||||
}
|
||||
|
||||
pconn.authorized = false;
|
||||
uerr_t auth_err = RETROK;
|
||||
if (!auth_finished && (user && passwd))
|
||||
{
|
||||
/* IIS sends multiple copies of WWW-Authenticate, one with
|
||||
@ -3864,7 +3866,7 @@ digest_authentication_encode (const char *au, const char *user,
|
||||
snprintf (cnonce, sizeof (cnonce), "%08x", random_number(INT_MAX));
|
||||
|
||||
md5_init_ctx (&ctx);
|
||||
// md5_process_bytes (hash, MD5_DIGEST_SIZE, &ctx);
|
||||
/* md5_process_bytes (hash, MD5_DIGEST_SIZE, &ctx); */
|
||||
md5_process_bytes (a1buf, MD5_DIGEST_SIZE * 2, &ctx);
|
||||
md5_process_bytes ((unsigned char *)":", 1, &ctx);
|
||||
md5_process_bytes ((unsigned char *)nonce, strlen (nonce), &ctx);
|
||||
|
30
src/main.c
30
src/main.c
@ -1010,18 +1010,21 @@ char *program_argstring; /* Needed by wget_warc.c. */
|
||||
int
|
||||
main (int argc, char **argv)
|
||||
{
|
||||
char **url, **t;
|
||||
char **url, **t, *p;
|
||||
int i, ret, longindex;
|
||||
int nurl;
|
||||
int retconf;
|
||||
int argstring_length;
|
||||
bool use_userconfig = false;
|
||||
bool noconfig = false;
|
||||
bool append_to_log = false;
|
||||
|
||||
total_downloaded_bytes = 0;
|
||||
|
||||
program_name = argv[0];
|
||||
|
||||
struct ptimer *timer = ptimer_new ();
|
||||
double start_time = ptimer_measure (timer);
|
||||
|
||||
total_downloaded_bytes = 0;
|
||||
program_name = argv[0];
|
||||
|
||||
i18n_initialize ();
|
||||
|
||||
/* Construct the name of the executable, without the directory part. */
|
||||
@ -1042,10 +1045,9 @@ main (int argc, char **argv)
|
||||
#endif
|
||||
|
||||
/* Construct the arguments string. */
|
||||
int argstring_length = 1;
|
||||
for (i = 1; i < argc; i++)
|
||||
for (argstring_length = 1, i = 1; i < argc; i++)
|
||||
argstring_length += strlen (argv[i]) + 2 + 1;
|
||||
char *p = program_argstring = malloc (argstring_length * sizeof (char));
|
||||
p = program_argstring = malloc (argstring_length * sizeof (char));
|
||||
if (p == NULL)
|
||||
{
|
||||
fprintf (stderr, _("Memory allocation problem\n"));
|
||||
@ -1053,8 +1055,10 @@ main (int argc, char **argv)
|
||||
}
|
||||
for (i = 1; i < argc; i++)
|
||||
{
|
||||
int arglen;
|
||||
|
||||
*p++ = '"';
|
||||
int arglen = strlen (argv[i]);
|
||||
arglen = strlen (argv[i]);
|
||||
memcpy (p, argv[i], arglen);
|
||||
p += arglen;
|
||||
*p++ = '"';
|
||||
@ -1070,9 +1074,6 @@ main (int argc, char **argv)
|
||||
/* This separate getopt_long is needed to find the user config file
|
||||
option ("--config") and parse it before the other user options. */
|
||||
longindex = -1;
|
||||
int retconf;
|
||||
bool use_userconfig = false;
|
||||
bool noconfig = false;
|
||||
|
||||
while ((retconf = getopt_long (argc, argv,
|
||||
short_options, long_options, &longindex)) != -1)
|
||||
@ -1731,10 +1732,11 @@ outputting to a regular file.\n"));
|
||||
total_downloaded_bytes != 0)
|
||||
{
|
||||
double end_time = ptimer_measure (timer);
|
||||
ptimer_destroy (timer);
|
||||
|
||||
char *wall_time = xstrdup (secs_to_human_time (end_time - start_time));
|
||||
char *download_time = xstrdup (secs_to_human_time (total_download_time));
|
||||
|
||||
ptimer_destroy (timer);
|
||||
|
||||
logprintf (LOG_NOTQUIET,
|
||||
_("FINISHED --%s--\nTotal wall clock time: %s\n"
|
||||
"Downloaded: %d files, %s in %s (%s)\n"),
|
||||
|
@ -169,6 +169,8 @@ static int ssl_true_initialized = 0;
|
||||
bool
|
||||
ssl_init (void)
|
||||
{
|
||||
SSL_METHOD const *meth;
|
||||
|
||||
#if OPENSSL_VERSION_NUMBER >= 0x00907000
|
||||
if (ssl_true_initialized == 0)
|
||||
{
|
||||
@ -177,8 +179,6 @@ ssl_init (void)
|
||||
}
|
||||
#endif
|
||||
|
||||
SSL_METHOD const *meth;
|
||||
|
||||
if (ssl_ctx)
|
||||
/* The SSL has already been initialized. */
|
||||
return true;
|
||||
|
@ -946,6 +946,8 @@ create_image (struct bar_progress *bp, double dl_total_time, bool done)
|
||||
/* The difference between the number of bytes used,
|
||||
and the number of columns used. */
|
||||
int bytes_cols_diff = 0;
|
||||
int cols_diff;
|
||||
const char *down_size;
|
||||
|
||||
if (progress_size < 5)
|
||||
progress_size = 0;
|
||||
@ -963,6 +965,7 @@ create_image (struct bar_progress *bp, double dl_total_time, bool done)
|
||||
int offset_cols;
|
||||
int bytes_in_filename, offset_bytes, col;
|
||||
int *cols_ret = &col;
|
||||
int padding;
|
||||
|
||||
if (((orig_filename_cols > MAX_FILENAME_COLS) && !opt.noscroll) && !done)
|
||||
offset_cols = ((int) bp->tick) % (orig_filename_cols - MAX_FILENAME_COLS + 1);
|
||||
@ -972,7 +975,7 @@ create_image (struct bar_progress *bp, double dl_total_time, bool done)
|
||||
bytes_in_filename = cols_to_bytes (bp->f_download + offset_bytes, MAX_FILENAME_COLS, cols_ret);
|
||||
memcpy (p, bp->f_download + offset_bytes, bytes_in_filename);
|
||||
p += bytes_in_filename;
|
||||
int padding = MAX_FILENAME_COLS - *cols_ret;
|
||||
padding = MAX_FILENAME_COLS - *cols_ret;
|
||||
for (;padding;padding--)
|
||||
*p++ = ' ';
|
||||
*p++ = ' ';
|
||||
@ -1055,8 +1058,8 @@ create_image (struct bar_progress *bp, double dl_total_time, bool done)
|
||||
++bp->tick;
|
||||
|
||||
/* " 234.56M" */
|
||||
const char * down_size = human_readable (size, 1000, 2);
|
||||
int cols_diff = 7 - count_cols (down_size);
|
||||
down_size = human_readable (size, 1000, 2);
|
||||
cols_diff = 7 - count_cols (down_size);
|
||||
while (cols_diff > 0)
|
||||
{
|
||||
*p++=' ';
|
||||
|
12
src/retr.c
12
src/retr.c
@ -378,8 +378,10 @@ fd_read_body (const char *downloaded_filename, int fd, FILE *out, wgint toread,
|
||||
|
||||
if (ret > 0)
|
||||
{
|
||||
int write_res;
|
||||
|
||||
sum_read += ret;
|
||||
int write_res = write_data (out, out2, dlbuf, ret, &skip, &sum_written);
|
||||
write_res = write_data (out, out2, dlbuf, ret, &skip, &sum_written);
|
||||
if (write_res < 0)
|
||||
{
|
||||
ret = (write_res == -3) ? -3 : -2;
|
||||
@ -1056,7 +1058,7 @@ retrieve_from_file (const char *file, bool html, int *count)
|
||||
|
||||
for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count)
|
||||
{
|
||||
char *filename = NULL, *new_file = NULL;
|
||||
char *filename = NULL, *new_file = NULL, *proxy;
|
||||
int dt;
|
||||
struct iri *tmpiri = iri_dup (iri);
|
||||
struct url *parsed_url = NULL;
|
||||
@ -1072,7 +1074,7 @@ retrieve_from_file (const char *file, bool html, int *count)
|
||||
|
||||
parsed_url = url_parse (cur_url->url->url, NULL, tmpiri, true);
|
||||
|
||||
char *proxy = getproxy (cur_url->url);
|
||||
proxy = getproxy (cur_url->url);
|
||||
if ((opt.recursive || opt.page_requisites)
|
||||
&& (cur_url->url->scheme != SCHEME_FTP || proxy))
|
||||
{
|
||||
@ -1285,9 +1287,11 @@ bool
|
||||
url_uses_proxy (struct url * u)
|
||||
{
|
||||
bool ret;
|
||||
char *proxy;
|
||||
|
||||
if (!u)
|
||||
return false;
|
||||
char *proxy = getproxy (u);
|
||||
proxy = getproxy (u);
|
||||
ret = proxy != NULL;
|
||||
free(proxy);
|
||||
return ret;
|
||||
|
127
src/warc.c
127
src/warc.c
@ -165,10 +165,12 @@ warc_write_buffer (const char *buffer, size_t size)
|
||||
static bool
|
||||
warc_write_string (const char *str)
|
||||
{
|
||||
size_t n;
|
||||
|
||||
if (!warc_write_ok)
|
||||
return false;
|
||||
|
||||
size_t n = strlen (str);
|
||||
n = strlen (str);
|
||||
if (n != warc_write_buffer (str, n))
|
||||
warc_write_ok = false;
|
||||
|
||||
@ -257,6 +259,9 @@ warc_write_block_from_file (FILE *data_in)
|
||||
{
|
||||
/* Add the Content-Length header. */
|
||||
char content_length[MAX_INT_TO_STRING_LEN(off_t)];
|
||||
char buffer[BUFSIZ];
|
||||
size_t s;
|
||||
|
||||
fseeko (data_in, 0L, SEEK_END);
|
||||
number_to_string (content_length, ftello (data_in));
|
||||
warc_write_header ("Content-Length", content_length);
|
||||
@ -268,8 +273,6 @@ warc_write_block_from_file (FILE *data_in)
|
||||
warc_write_ok = false;
|
||||
|
||||
/* Copy the data in the file to the WARC record. */
|
||||
char buffer[BUFSIZ];
|
||||
size_t s;
|
||||
while (warc_write_ok && (s = fread (buffer, 1, BUFSIZ, data_in)) > 0)
|
||||
{
|
||||
if (warc_write_buffer (buffer, s) < s)
|
||||
@ -294,6 +297,11 @@ warc_write_end_record (void)
|
||||
/* We start a new gzip stream for each record. */
|
||||
if (warc_write_ok && warc_current_gzfile)
|
||||
{
|
||||
char extra_header[EXTRA_GZIP_HEADER_SIZE];
|
||||
char static_header[GZIP_STATIC_HEADER_SIZE];
|
||||
off_t current_offset, uncompressed_size, compressed_size;
|
||||
size_t result;
|
||||
|
||||
if (gzclose (warc_current_gzfile) != Z_OK)
|
||||
{
|
||||
warc_write_ok = false;
|
||||
@ -319,17 +327,16 @@ warc_write_end_record (void)
|
||||
*/
|
||||
|
||||
/* Calculate the uncompressed and compressed sizes. */
|
||||
off_t current_offset = ftello (warc_current_file);
|
||||
off_t uncompressed_size = current_offset - warc_current_gzfile_offset;
|
||||
off_t compressed_size = warc_current_gzfile_uncompressed_size;
|
||||
current_offset = ftello (warc_current_file);
|
||||
uncompressed_size = current_offset - warc_current_gzfile_offset;
|
||||
compressed_size = warc_current_gzfile_uncompressed_size;
|
||||
|
||||
/* Go back to the static GZIP header. */
|
||||
fseeko (warc_current_file, warc_current_gzfile_offset
|
||||
+ EXTRA_GZIP_HEADER_SIZE, SEEK_SET);
|
||||
|
||||
/* Read the header. */
|
||||
char static_header[GZIP_STATIC_HEADER_SIZE];
|
||||
size_t result = fread (static_header, 1, GZIP_STATIC_HEADER_SIZE,
|
||||
result = fread (static_header, 1, GZIP_STATIC_HEADER_SIZE,
|
||||
warc_current_file);
|
||||
if (result != GZIP_STATIC_HEADER_SIZE)
|
||||
{
|
||||
@ -346,7 +353,6 @@ warc_write_end_record (void)
|
||||
fwrite (static_header, 1, GZIP_STATIC_HEADER_SIZE, warc_current_file);
|
||||
|
||||
/* Prepare the extra GZIP header. */
|
||||
char extra_header[EXTRA_GZIP_HEADER_SIZE];
|
||||
/* XLEN, the length of the extra header fields. */
|
||||
extra_header[0] = ((EXTRA_GZIP_HEADER_SIZE - 2) & 255);
|
||||
extra_header[1] = ((EXTRA_GZIP_HEADER_SIZE - 2) >> 8) & 255;
|
||||
@ -660,16 +666,18 @@ warc_uuid_str (char *urn_str)
|
||||
static bool
|
||||
warc_write_warcinfo_record (char *filename)
|
||||
{
|
||||
FILE *warc_tmp;
|
||||
char timestamp[22];
|
||||
char *filename_copy, *filename_basename;
|
||||
|
||||
/* Write warc-info record as the first record of the file. */
|
||||
/* We add the record id of this info record to the other records in the
|
||||
file. */
|
||||
warc_current_warcinfo_uuid_str = (char *) malloc (48);
|
||||
warc_uuid_str (warc_current_warcinfo_uuid_str);
|
||||
|
||||
char timestamp[22];
|
||||
warc_timestamp (timestamp);
|
||||
|
||||
char *filename_copy, *filename_basename;
|
||||
filename_copy = strdup (filename);
|
||||
filename_basename = strdup (basename (filename_copy));
|
||||
|
||||
@ -681,7 +689,7 @@ warc_write_warcinfo_record (char *filename)
|
||||
warc_write_header ("WARC-Filename", filename_basename);
|
||||
|
||||
/* Create content. */
|
||||
FILE *warc_tmp = warc_tempfile ();
|
||||
warc_tmp = warc_tempfile ();
|
||||
if (warc_tmp == NULL)
|
||||
{
|
||||
free (filename_copy);
|
||||
@ -731,22 +739,6 @@ warc_write_warcinfo_record (char *filename)
|
||||
static bool
|
||||
warc_start_new_file (bool meta)
|
||||
{
|
||||
if (opt.warc_filename == NULL)
|
||||
return false;
|
||||
|
||||
if (warc_current_file != NULL)
|
||||
fclose (warc_current_file);
|
||||
|
||||
free (warc_current_warcinfo_uuid_str);
|
||||
free (warc_current_filename);
|
||||
|
||||
warc_current_file_number++;
|
||||
|
||||
int base_filename_length = strlen (opt.warc_filename);
|
||||
/* filename format: base + "-" + 5 digit serial number + ".warc.gz" */
|
||||
char *new_filename = malloc (base_filename_length + 1 + 5 + 8 + 1);
|
||||
warc_current_filename = new_filename;
|
||||
|
||||
#ifdef __VMS
|
||||
# define WARC_GZ "warc-gz"
|
||||
#else /* def __VMS */
|
||||
@ -759,6 +751,25 @@ warc_start_new_file (bool meta)
|
||||
const char *extension = "warc";
|
||||
#endif
|
||||
|
||||
int base_filename_length;
|
||||
char *new_filename;
|
||||
|
||||
if (opt.warc_filename == NULL)
|
||||
return false;
|
||||
|
||||
if (warc_current_file != NULL)
|
||||
fclose (warc_current_file);
|
||||
|
||||
free (warc_current_warcinfo_uuid_str);
|
||||
free (warc_current_filename);
|
||||
|
||||
warc_current_file_number++;
|
||||
|
||||
base_filename_length = strlen (opt.warc_filename);
|
||||
/* filename format: base + "-" + 5 digit serial number + ".warc.gz" */
|
||||
new_filename = malloc (base_filename_length + 1 + 5 + 8 + 1);
|
||||
warc_current_filename = new_filename;
|
||||
|
||||
/* If max size is enabled, we add a serial number to the file names. */
|
||||
if (meta)
|
||||
sprintf (new_filename, "%s-meta.%s", opt.warc_filename, extension);
|
||||
@ -830,12 +841,13 @@ static bool
|
||||
warc_parse_cdx_header (char *lineptr, int *field_num_original_url,
|
||||
int *field_num_checksum, int *field_num_record_id)
|
||||
{
|
||||
char *token;
|
||||
char *save_ptr;
|
||||
|
||||
*field_num_original_url = -1;
|
||||
*field_num_checksum = -1;
|
||||
*field_num_record_id = -1;
|
||||
|
||||
char *token;
|
||||
char *save_ptr;
|
||||
token = strtok_r (lineptr, CDX_FIELDSEP, &save_ptr);
|
||||
|
||||
if (token != NULL && strcmp (token, "CDX") == 0)
|
||||
@ -876,13 +888,12 @@ warc_process_cdx_line (char *lineptr, int field_num_original_url,
|
||||
char *original_url = NULL;
|
||||
char *checksum = NULL;
|
||||
char *record_id = NULL;
|
||||
|
||||
char *token;
|
||||
char *save_ptr;
|
||||
token = strtok_r (lineptr, CDX_FIELDSEP, &save_ptr);
|
||||
int field_num = 0;
|
||||
|
||||
/* Read this line to get the fields we need. */
|
||||
int field_num = 0;
|
||||
token = strtok_r (lineptr, CDX_FIELDSEP, &save_ptr);
|
||||
while (token != NULL)
|
||||
{
|
||||
char **val;
|
||||
@ -944,17 +955,17 @@ warc_process_cdx_line (char *lineptr, int field_num_original_url,
|
||||
static bool
|
||||
warc_load_cdx_dedup_file (void)
|
||||
{
|
||||
FILE *f = fopen (opt.warc_cdx_dedup_filename, "r");
|
||||
if (f == NULL)
|
||||
return false;
|
||||
|
||||
FILE *f;
|
||||
char *lineptr = NULL;
|
||||
size_t n = 0;
|
||||
ssize_t line_length;
|
||||
int field_num_original_url = -1;
|
||||
int field_num_checksum = -1;
|
||||
int field_num_record_id = -1;
|
||||
|
||||
char *lineptr = NULL;
|
||||
size_t n = 0;
|
||||
ssize_t line_length;
|
||||
f = fopen (opt.warc_cdx_dedup_filename, "r");
|
||||
if (f == NULL)
|
||||
return false;
|
||||
|
||||
/* The first line should contain the CDX header.
|
||||
Format: " CDX x x x x x"
|
||||
@ -983,6 +994,8 @@ _("CDX file does not list record ids. (Missing column 'u'.)\n"));
|
||||
}
|
||||
else
|
||||
{
|
||||
int nrecords;
|
||||
|
||||
/* Initialize the table. */
|
||||
warc_cdx_dedup_table = hash_table_new (1000, warc_hash_sha1_digest,
|
||||
warc_cmp_sha1_digest);
|
||||
@ -1000,7 +1013,7 @@ _("CDX file does not list record ids. (Missing column 'u'.)\n"));
|
||||
while (line_length != -1);
|
||||
|
||||
/* Print results. */
|
||||
int nrecords = hash_table_count (warc_cdx_dedup_table);
|
||||
nrecords = hash_table_count (warc_cdx_dedup_table);
|
||||
logprintf (LOG_VERBOSE, ngettext ("Loaded %d record from CDX.\n\n",
|
||||
"Loaded %d records from CDX.\n\n",
|
||||
nrecords),
|
||||
@ -1020,11 +1033,12 @@ _("CDX file does not list record ids. (Missing column 'u'.)\n"));
|
||||
static struct warc_cdx_record *
|
||||
warc_find_duplicate_cdx_record (char *url, char *sha1_digest_payload)
|
||||
{
|
||||
struct warc_cdx_record *rec_existing;
|
||||
|
||||
if (warc_cdx_dedup_table == NULL)
|
||||
return NULL;
|
||||
|
||||
struct warc_cdx_record *rec_existing
|
||||
= hash_table_get (warc_cdx_dedup_table, sha1_digest_payload);
|
||||
rec_existing = hash_table_get (warc_cdx_dedup_table, sha1_digest_payload);
|
||||
|
||||
if (rec_existing && strcmp (rec_existing->url, url) == 0)
|
||||
return rec_existing;
|
||||
@ -1095,11 +1109,13 @@ warc_init (void)
|
||||
static void
|
||||
warc_write_metadata (void)
|
||||
{
|
||||
char manifest_uuid[48];
|
||||
FILE *warc_tmp_fp;
|
||||
|
||||
/* If there are multiple WARC files, the metadata should be written to a separate file. */
|
||||
if (opt.warc_maxsize > 0)
|
||||
warc_start_new_file (true);
|
||||
|
||||
char manifest_uuid [48];
|
||||
warc_uuid_str (manifest_uuid);
|
||||
|
||||
fflush (warc_manifest_fp);
|
||||
@ -1109,7 +1125,7 @@ warc_write_metadata (void)
|
||||
warc_manifest_fp, -1);
|
||||
/* warc_write_resource_record has closed warc_manifest_fp. */
|
||||
|
||||
FILE * warc_tmp_fp = warc_tempfile ();
|
||||
warc_tmp_fp = warc_tempfile ();
|
||||
if (warc_tmp_fp == NULL)
|
||||
{
|
||||
logprintf (LOG_NOTQUIET, _("Could not open temporary WARC file.\n"));
|
||||
@ -1164,6 +1180,8 @@ FILE *
|
||||
warc_tempfile (void)
|
||||
{
|
||||
char filename[100];
|
||||
int fd;
|
||||
|
||||
if (path_search (filename, 100, opt.warc_tempdir, "wget", true) == -1)
|
||||
return NULL;
|
||||
|
||||
@ -1182,7 +1200,7 @@ warc_tempfile (void)
|
||||
return fopen (tfn, "w+", "fop=tmd"); /* Create auto-delete temp file. */
|
||||
}
|
||||
#else /* def __VMS */
|
||||
int fd = mkostemp (filename, O_TEMPORARY);
|
||||
fd = mkostemp (filename, O_TEMPORARY);
|
||||
if (fd < 0)
|
||||
return NULL;
|
||||
|
||||
@ -1245,7 +1263,10 @@ warc_write_cdx_record (const char *url, const char *timestamp_str,
|
||||
const char *response_uuid)
|
||||
{
|
||||
/* Transform the timestamp. */
|
||||
char timestamp_str_cdx [15];
|
||||
char timestamp_str_cdx[15];
|
||||
char offset_string[MAX_INT_TO_STRING_LEN(off_t)];
|
||||
const char *checksum;
|
||||
|
||||
memcpy (timestamp_str_cdx , timestamp_str , 4); /* "YYYY" "-" */
|
||||
memcpy (timestamp_str_cdx + 4, timestamp_str + 5, 2); /* "mm" "-" */
|
||||
memcpy (timestamp_str_cdx + 6, timestamp_str + 8, 2); /* "dd" "T" */
|
||||
@ -1255,7 +1276,6 @@ warc_write_cdx_record (const char *url, const char *timestamp_str,
|
||||
timestamp_str_cdx[14] = '\0';
|
||||
|
||||
/* Rewrite the checksum. */
|
||||
const char *checksum;
|
||||
if (payload_digest != NULL)
|
||||
checksum = payload_digest + 5; /* Skip the "sha1:" */
|
||||
else
|
||||
@ -1266,7 +1286,6 @@ warc_write_cdx_record (const char *url, const char *timestamp_str,
|
||||
if (redirect_location == NULL || strlen(redirect_location) == 0)
|
||||
redirect_location = "-";
|
||||
|
||||
char offset_string[MAX_INT_TO_STRING_LEN(off_t)];
|
||||
number_to_string (offset_string, offset);
|
||||
|
||||
/* Print the CDX line. */
|
||||
@ -1298,10 +1317,11 @@ warc_write_revisit_record (char *url, char *timestamp_str,
|
||||
char *refers_to, ip_address *ip, FILE *body)
|
||||
{
|
||||
char revisit_uuid [48];
|
||||
warc_uuid_str (revisit_uuid);
|
||||
|
||||
char *block_digest = NULL;
|
||||
char sha1_res_block[SHA1_DIGEST_SIZE];
|
||||
|
||||
warc_uuid_str (revisit_uuid);
|
||||
|
||||
sha1_stream (body, sha1_res_block);
|
||||
block_digest = warc_base32_sha1_digest (sha1_res_block);
|
||||
|
||||
@ -1351,6 +1371,8 @@ warc_write_response_record (char *url, char *timestamp_str,
|
||||
char *payload_digest = NULL;
|
||||
char sha1_res_block[SHA1_DIGEST_SIZE];
|
||||
char sha1_res_payload[SHA1_DIGEST_SIZE];
|
||||
char response_uuid [48];
|
||||
off_t offset;
|
||||
|
||||
if (opt.warc_digests_enabled)
|
||||
{
|
||||
@ -1395,11 +1417,10 @@ warc_write_response_record (char *url, char *timestamp_str,
|
||||
|
||||
/* Not a revisit, just store the record. */
|
||||
|
||||
char response_uuid [48];
|
||||
warc_uuid_str (response_uuid);
|
||||
|
||||
fseeko (warc_current_file, 0L, SEEK_END);
|
||||
off_t offset = ftello (warc_current_file);
|
||||
offset = ftello (warc_current_file);
|
||||
|
||||
warc_write_start_record ();
|
||||
warc_write_header ("WARC-Type", "response");
|
||||
|
Loading…
Reference in New Issue
Block a user