mirror of
https://github.com/moparisthebest/wget
synced 2024-07-03 16:38:41 -04:00
Trivial fixes for C89 compliancy
This commit is contained in:
parent
f9646a0c14
commit
1356e90a14
@ -518,12 +518,12 @@ check_domain_match (const char *cookie_domain, const char *host)
|
|||||||
{
|
{
|
||||||
|
|
||||||
#ifdef HAVE_LIBPSL
|
#ifdef HAVE_LIBPSL
|
||||||
DEBUGP (("cdm: 1"));
|
|
||||||
char *cookie_domain_lower = NULL;
|
char *cookie_domain_lower = NULL;
|
||||||
char *host_lower = NULL;
|
char *host_lower = NULL;
|
||||||
const psl_ctx_t *psl;
|
const psl_ctx_t *psl;
|
||||||
int is_acceptable;
|
int is_acceptable;
|
||||||
|
|
||||||
|
DEBUGP (("cdm: 1"));
|
||||||
if (!(psl = psl_builtin()))
|
if (!(psl = psl_builtin()))
|
||||||
{
|
{
|
||||||
DEBUGP (("\nlibpsl not built with a public suffix list. "
|
DEBUGP (("\nlibpsl not built with a public suffix list. "
|
||||||
|
@ -963,16 +963,18 @@ ftp_list (int csock, const char *file, bool avoid_list_a, bool avoid_list,
|
|||||||
bool ok = false;
|
bool ok = false;
|
||||||
size_t i = 0;
|
size_t i = 0;
|
||||||
|
|
||||||
*list_a_used = false;
|
|
||||||
|
|
||||||
/* 2013-10-12 Andrea Urbani (matfanjol)
|
/* 2013-10-12 Andrea Urbani (matfanjol)
|
||||||
For more information about LIST and "LIST -a" please look at ftp.c,
|
For more information about LIST and "LIST -a" please look at ftp.c,
|
||||||
function getftp, text "__LIST_A_EXPLANATION__".
|
function getftp, text "__LIST_A_EXPLANATION__".
|
||||||
|
|
||||||
If somebody changes the following commands, please, checks also the
|
If somebody changes the following commands, please, checks also the
|
||||||
later "i" variable. */
|
later "i" variable. */
|
||||||
const char *list_commands[] = { "LIST -a",
|
static const char *list_commands[] = {
|
||||||
"LIST" };
|
"LIST -a",
|
||||||
|
"LIST"
|
||||||
|
};
|
||||||
|
|
||||||
|
*list_a_used = false;
|
||||||
|
|
||||||
if (avoid_list_a)
|
if (avoid_list_a)
|
||||||
{
|
{
|
||||||
|
@ -2221,9 +2221,9 @@ has_insecure_name_p (const char *s)
|
|||||||
static bool
|
static bool
|
||||||
is_invalid_entry (struct fileinfo *f)
|
is_invalid_entry (struct fileinfo *f)
|
||||||
{
|
{
|
||||||
struct fileinfo *cur;
|
struct fileinfo *cur = f;
|
||||||
cur = f;
|
|
||||||
char *f_name = f->name;
|
char *f_name = f->name;
|
||||||
|
|
||||||
/* If the node we're currently checking has a duplicate later, we eliminate
|
/* If the node we're currently checking has a duplicate later, we eliminate
|
||||||
* the current node and leave the next one intact. */
|
* the current node and leave the next one intact. */
|
||||||
while (cur->next)
|
while (cur->next)
|
||||||
|
@ -122,9 +122,10 @@ ssl_init (void)
|
|||||||
while ((dent = readdir (dir)) != NULL)
|
while ((dent = readdir (dir)) != NULL)
|
||||||
{
|
{
|
||||||
struct stat st;
|
struct stat st;
|
||||||
char ca_file[dirlen + strlen(dent->d_name) + 2];
|
size_t ca_file_length = dirlen + strlen(dent->d_name) + 2;
|
||||||
|
char *ca_file = alloca(ca_file_length);
|
||||||
|
|
||||||
snprintf (ca_file, sizeof(ca_file), "%s/%s", ca_directory, dent->d_name);
|
snprintf (ca_file, ca_file_length, "%s/%s", ca_directory, dent->d_name);
|
||||||
if (stat (ca_file, &st) != 0)
|
if (stat (ca_file, &st) != 0)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
@ -432,9 +433,10 @@ ssl_connect_wget (int fd, const char *hostname)
|
|||||||
struct wgnutls_transport_context *ctx;
|
struct wgnutls_transport_context *ctx;
|
||||||
gnutls_session_t session;
|
gnutls_session_t session;
|
||||||
int err,alert;
|
int err,alert;
|
||||||
gnutls_init (&session, GNUTLS_CLIENT);
|
|
||||||
const char *str;
|
const char *str;
|
||||||
|
|
||||||
|
gnutls_init (&session, GNUTLS_CLIENT);
|
||||||
|
|
||||||
/* We set the server name but only if it's not an IP address. */
|
/* We set the server name but only if it's not an IP address. */
|
||||||
if (! is_valid_ip_address (hostname))
|
if (! is_valid_ip_address (hostname))
|
||||||
{
|
{
|
||||||
|
@ -592,7 +592,7 @@ cache_query (const char *host)
|
|||||||
al = hash_table_get (host_name_addresses_map, host);
|
al = hash_table_get (host_name_addresses_map, host);
|
||||||
if (al)
|
if (al)
|
||||||
{
|
{
|
||||||
DEBUGP (("Found %s in host_name_addresses_map (%p)\n", host, al));
|
DEBUGP (("Found %s in host_name_addresses_map (%p)\n", host, (void *) al));
|
||||||
++al->refcount;
|
++al->refcount;
|
||||||
return al;
|
return al;
|
||||||
}
|
}
|
||||||
|
@ -788,6 +788,7 @@ get_urls_file (const char *file)
|
|||||||
{
|
{
|
||||||
int up_error_code;
|
int up_error_code;
|
||||||
char *url_text;
|
char *url_text;
|
||||||
|
char *new_url;
|
||||||
struct urlpos *entry;
|
struct urlpos *entry;
|
||||||
struct url *url;
|
struct url *url;
|
||||||
|
|
||||||
@ -822,7 +823,7 @@ get_urls_file (const char *file)
|
|||||||
url_text = merged;
|
url_text = merged;
|
||||||
}
|
}
|
||||||
|
|
||||||
char *new_url = rewrite_shorthand_url (url_text);
|
new_url = rewrite_shorthand_url (url_text);
|
||||||
if (new_url)
|
if (new_url)
|
||||||
{
|
{
|
||||||
xfree (url_text);
|
xfree (url_text);
|
||||||
|
10
src/http.c
10
src/http.c
@ -1532,6 +1532,7 @@ read_response_body (struct http_stat *hs, int sock, FILE *fp, wgint contlen,
|
|||||||
int warc_payload_offset = 0;
|
int warc_payload_offset = 0;
|
||||||
FILE *warc_tmp = NULL;
|
FILE *warc_tmp = NULL;
|
||||||
int warcerr = 0;
|
int warcerr = 0;
|
||||||
|
int flags = 0;
|
||||||
|
|
||||||
if (opt.warc_filename != NULL)
|
if (opt.warc_filename != NULL)
|
||||||
{
|
{
|
||||||
@ -1568,7 +1569,6 @@ read_response_body (struct http_stat *hs, int sock, FILE *fp, wgint contlen,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Read the response body. */
|
/* Read the response body. */
|
||||||
int flags = 0;
|
|
||||||
if (contlen != -1)
|
if (contlen != -1)
|
||||||
/* If content-length is present, read that much; otherwise, read
|
/* If content-length is present, read that much; otherwise, read
|
||||||
until EOF. The HTTP spec doesn't require the server to
|
until EOF. The HTTP spec doesn't require the server to
|
||||||
@ -2147,11 +2147,13 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy,
|
|||||||
write_error = fd_write (sock, opt.body_data, body_data_size, -1);
|
write_error = fd_write (sock, opt.body_data, body_data_size, -1);
|
||||||
if (write_error >= 0 && warc_tmp != NULL)
|
if (write_error >= 0 && warc_tmp != NULL)
|
||||||
{
|
{
|
||||||
|
int warc_tmp_written;
|
||||||
|
|
||||||
/* Remember end of headers / start of payload. */
|
/* Remember end of headers / start of payload. */
|
||||||
warc_payload_offset = ftello (warc_tmp);
|
warc_payload_offset = ftello (warc_tmp);
|
||||||
|
|
||||||
/* Write a copy of the data to the WARC record. */
|
/* Write a copy of the data to the WARC record. */
|
||||||
int warc_tmp_written = fwrite (opt.body_data, 1, body_data_size, warc_tmp);
|
warc_tmp_written = fwrite (opt.body_data, 1, body_data_size, warc_tmp);
|
||||||
if (warc_tmp_written != body_data_size)
|
if (warc_tmp_written != body_data_size)
|
||||||
write_error = -2;
|
write_error = -2;
|
||||||
}
|
}
|
||||||
@ -2334,6 +2336,7 @@ read_header:
|
|||||||
if (statcode == HTTP_STATUS_UNAUTHORIZED)
|
if (statcode == HTTP_STATUS_UNAUTHORIZED)
|
||||||
{
|
{
|
||||||
/* Authorization is required. */
|
/* Authorization is required. */
|
||||||
|
uerr_t auth_err = RETROK;
|
||||||
|
|
||||||
/* Normally we are not interested in the response body.
|
/* Normally we are not interested in the response body.
|
||||||
But if we are writing a WARC file we are: we like to keep everyting. */
|
But if we are writing a WARC file we are: we like to keep everyting. */
|
||||||
@ -2371,7 +2374,6 @@ read_header:
|
|||||||
}
|
}
|
||||||
|
|
||||||
pconn.authorized = false;
|
pconn.authorized = false;
|
||||||
uerr_t auth_err = RETROK;
|
|
||||||
if (!auth_finished && (user && passwd))
|
if (!auth_finished && (user && passwd))
|
||||||
{
|
{
|
||||||
/* IIS sends multiple copies of WWW-Authenticate, one with
|
/* IIS sends multiple copies of WWW-Authenticate, one with
|
||||||
@ -3864,7 +3866,7 @@ digest_authentication_encode (const char *au, const char *user,
|
|||||||
snprintf (cnonce, sizeof (cnonce), "%08x", random_number(INT_MAX));
|
snprintf (cnonce, sizeof (cnonce), "%08x", random_number(INT_MAX));
|
||||||
|
|
||||||
md5_init_ctx (&ctx);
|
md5_init_ctx (&ctx);
|
||||||
// md5_process_bytes (hash, MD5_DIGEST_SIZE, &ctx);
|
/* md5_process_bytes (hash, MD5_DIGEST_SIZE, &ctx); */
|
||||||
md5_process_bytes (a1buf, MD5_DIGEST_SIZE * 2, &ctx);
|
md5_process_bytes (a1buf, MD5_DIGEST_SIZE * 2, &ctx);
|
||||||
md5_process_bytes ((unsigned char *)":", 1, &ctx);
|
md5_process_bytes ((unsigned char *)":", 1, &ctx);
|
||||||
md5_process_bytes ((unsigned char *)nonce, strlen (nonce), &ctx);
|
md5_process_bytes ((unsigned char *)nonce, strlen (nonce), &ctx);
|
||||||
|
30
src/main.c
30
src/main.c
@ -1010,18 +1010,21 @@ char *program_argstring; /* Needed by wget_warc.c. */
|
|||||||
int
|
int
|
||||||
main (int argc, char **argv)
|
main (int argc, char **argv)
|
||||||
{
|
{
|
||||||
char **url, **t;
|
char **url, **t, *p;
|
||||||
int i, ret, longindex;
|
int i, ret, longindex;
|
||||||
int nurl;
|
int nurl;
|
||||||
|
int retconf;
|
||||||
|
int argstring_length;
|
||||||
|
bool use_userconfig = false;
|
||||||
|
bool noconfig = false;
|
||||||
bool append_to_log = false;
|
bool append_to_log = false;
|
||||||
|
|
||||||
total_downloaded_bytes = 0;
|
|
||||||
|
|
||||||
program_name = argv[0];
|
|
||||||
|
|
||||||
struct ptimer *timer = ptimer_new ();
|
struct ptimer *timer = ptimer_new ();
|
||||||
double start_time = ptimer_measure (timer);
|
double start_time = ptimer_measure (timer);
|
||||||
|
|
||||||
|
total_downloaded_bytes = 0;
|
||||||
|
program_name = argv[0];
|
||||||
|
|
||||||
i18n_initialize ();
|
i18n_initialize ();
|
||||||
|
|
||||||
/* Construct the name of the executable, without the directory part. */
|
/* Construct the name of the executable, without the directory part. */
|
||||||
@ -1042,10 +1045,9 @@ main (int argc, char **argv)
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Construct the arguments string. */
|
/* Construct the arguments string. */
|
||||||
int argstring_length = 1;
|
for (argstring_length = 1, i = 1; i < argc; i++)
|
||||||
for (i = 1; i < argc; i++)
|
|
||||||
argstring_length += strlen (argv[i]) + 2 + 1;
|
argstring_length += strlen (argv[i]) + 2 + 1;
|
||||||
char *p = program_argstring = malloc (argstring_length * sizeof (char));
|
p = program_argstring = malloc (argstring_length * sizeof (char));
|
||||||
if (p == NULL)
|
if (p == NULL)
|
||||||
{
|
{
|
||||||
fprintf (stderr, _("Memory allocation problem\n"));
|
fprintf (stderr, _("Memory allocation problem\n"));
|
||||||
@ -1053,8 +1055,10 @@ main (int argc, char **argv)
|
|||||||
}
|
}
|
||||||
for (i = 1; i < argc; i++)
|
for (i = 1; i < argc; i++)
|
||||||
{
|
{
|
||||||
|
int arglen;
|
||||||
|
|
||||||
*p++ = '"';
|
*p++ = '"';
|
||||||
int arglen = strlen (argv[i]);
|
arglen = strlen (argv[i]);
|
||||||
memcpy (p, argv[i], arglen);
|
memcpy (p, argv[i], arglen);
|
||||||
p += arglen;
|
p += arglen;
|
||||||
*p++ = '"';
|
*p++ = '"';
|
||||||
@ -1070,9 +1074,6 @@ main (int argc, char **argv)
|
|||||||
/* This separate getopt_long is needed to find the user config file
|
/* This separate getopt_long is needed to find the user config file
|
||||||
option ("--config") and parse it before the other user options. */
|
option ("--config") and parse it before the other user options. */
|
||||||
longindex = -1;
|
longindex = -1;
|
||||||
int retconf;
|
|
||||||
bool use_userconfig = false;
|
|
||||||
bool noconfig = false;
|
|
||||||
|
|
||||||
while ((retconf = getopt_long (argc, argv,
|
while ((retconf = getopt_long (argc, argv,
|
||||||
short_options, long_options, &longindex)) != -1)
|
short_options, long_options, &longindex)) != -1)
|
||||||
@ -1731,10 +1732,11 @@ outputting to a regular file.\n"));
|
|||||||
total_downloaded_bytes != 0)
|
total_downloaded_bytes != 0)
|
||||||
{
|
{
|
||||||
double end_time = ptimer_measure (timer);
|
double end_time = ptimer_measure (timer);
|
||||||
ptimer_destroy (timer);
|
|
||||||
|
|
||||||
char *wall_time = xstrdup (secs_to_human_time (end_time - start_time));
|
char *wall_time = xstrdup (secs_to_human_time (end_time - start_time));
|
||||||
char *download_time = xstrdup (secs_to_human_time (total_download_time));
|
char *download_time = xstrdup (secs_to_human_time (total_download_time));
|
||||||
|
|
||||||
|
ptimer_destroy (timer);
|
||||||
|
|
||||||
logprintf (LOG_NOTQUIET,
|
logprintf (LOG_NOTQUIET,
|
||||||
_("FINISHED --%s--\nTotal wall clock time: %s\n"
|
_("FINISHED --%s--\nTotal wall clock time: %s\n"
|
||||||
"Downloaded: %d files, %s in %s (%s)\n"),
|
"Downloaded: %d files, %s in %s (%s)\n"),
|
||||||
|
@ -169,6 +169,8 @@ static int ssl_true_initialized = 0;
|
|||||||
bool
|
bool
|
||||||
ssl_init (void)
|
ssl_init (void)
|
||||||
{
|
{
|
||||||
|
SSL_METHOD const *meth;
|
||||||
|
|
||||||
#if OPENSSL_VERSION_NUMBER >= 0x00907000
|
#if OPENSSL_VERSION_NUMBER >= 0x00907000
|
||||||
if (ssl_true_initialized == 0)
|
if (ssl_true_initialized == 0)
|
||||||
{
|
{
|
||||||
@ -177,8 +179,6 @@ ssl_init (void)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
SSL_METHOD const *meth;
|
|
||||||
|
|
||||||
if (ssl_ctx)
|
if (ssl_ctx)
|
||||||
/* The SSL has already been initialized. */
|
/* The SSL has already been initialized. */
|
||||||
return true;
|
return true;
|
||||||
|
@ -946,6 +946,8 @@ create_image (struct bar_progress *bp, double dl_total_time, bool done)
|
|||||||
/* The difference between the number of bytes used,
|
/* The difference between the number of bytes used,
|
||||||
and the number of columns used. */
|
and the number of columns used. */
|
||||||
int bytes_cols_diff = 0;
|
int bytes_cols_diff = 0;
|
||||||
|
int cols_diff;
|
||||||
|
const char *down_size;
|
||||||
|
|
||||||
if (progress_size < 5)
|
if (progress_size < 5)
|
||||||
progress_size = 0;
|
progress_size = 0;
|
||||||
@ -963,6 +965,7 @@ create_image (struct bar_progress *bp, double dl_total_time, bool done)
|
|||||||
int offset_cols;
|
int offset_cols;
|
||||||
int bytes_in_filename, offset_bytes, col;
|
int bytes_in_filename, offset_bytes, col;
|
||||||
int *cols_ret = &col;
|
int *cols_ret = &col;
|
||||||
|
int padding;
|
||||||
|
|
||||||
if (((orig_filename_cols > MAX_FILENAME_COLS) && !opt.noscroll) && !done)
|
if (((orig_filename_cols > MAX_FILENAME_COLS) && !opt.noscroll) && !done)
|
||||||
offset_cols = ((int) bp->tick) % (orig_filename_cols - MAX_FILENAME_COLS + 1);
|
offset_cols = ((int) bp->tick) % (orig_filename_cols - MAX_FILENAME_COLS + 1);
|
||||||
@ -972,7 +975,7 @@ create_image (struct bar_progress *bp, double dl_total_time, bool done)
|
|||||||
bytes_in_filename = cols_to_bytes (bp->f_download + offset_bytes, MAX_FILENAME_COLS, cols_ret);
|
bytes_in_filename = cols_to_bytes (bp->f_download + offset_bytes, MAX_FILENAME_COLS, cols_ret);
|
||||||
memcpy (p, bp->f_download + offset_bytes, bytes_in_filename);
|
memcpy (p, bp->f_download + offset_bytes, bytes_in_filename);
|
||||||
p += bytes_in_filename;
|
p += bytes_in_filename;
|
||||||
int padding = MAX_FILENAME_COLS - *cols_ret;
|
padding = MAX_FILENAME_COLS - *cols_ret;
|
||||||
for (;padding;padding--)
|
for (;padding;padding--)
|
||||||
*p++ = ' ';
|
*p++ = ' ';
|
||||||
*p++ = ' ';
|
*p++ = ' ';
|
||||||
@ -1055,8 +1058,8 @@ create_image (struct bar_progress *bp, double dl_total_time, bool done)
|
|||||||
++bp->tick;
|
++bp->tick;
|
||||||
|
|
||||||
/* " 234.56M" */
|
/* " 234.56M" */
|
||||||
const char * down_size = human_readable (size, 1000, 2);
|
down_size = human_readable (size, 1000, 2);
|
||||||
int cols_diff = 7 - count_cols (down_size);
|
cols_diff = 7 - count_cols (down_size);
|
||||||
while (cols_diff > 0)
|
while (cols_diff > 0)
|
||||||
{
|
{
|
||||||
*p++=' ';
|
*p++=' ';
|
||||||
|
12
src/retr.c
12
src/retr.c
@ -378,8 +378,10 @@ fd_read_body (const char *downloaded_filename, int fd, FILE *out, wgint toread,
|
|||||||
|
|
||||||
if (ret > 0)
|
if (ret > 0)
|
||||||
{
|
{
|
||||||
|
int write_res;
|
||||||
|
|
||||||
sum_read += ret;
|
sum_read += ret;
|
||||||
int write_res = write_data (out, out2, dlbuf, ret, &skip, &sum_written);
|
write_res = write_data (out, out2, dlbuf, ret, &skip, &sum_written);
|
||||||
if (write_res < 0)
|
if (write_res < 0)
|
||||||
{
|
{
|
||||||
ret = (write_res == -3) ? -3 : -2;
|
ret = (write_res == -3) ? -3 : -2;
|
||||||
@ -1056,7 +1058,7 @@ retrieve_from_file (const char *file, bool html, int *count)
|
|||||||
|
|
||||||
for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count)
|
for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count)
|
||||||
{
|
{
|
||||||
char *filename = NULL, *new_file = NULL;
|
char *filename = NULL, *new_file = NULL, *proxy;
|
||||||
int dt;
|
int dt;
|
||||||
struct iri *tmpiri = iri_dup (iri);
|
struct iri *tmpiri = iri_dup (iri);
|
||||||
struct url *parsed_url = NULL;
|
struct url *parsed_url = NULL;
|
||||||
@ -1072,7 +1074,7 @@ retrieve_from_file (const char *file, bool html, int *count)
|
|||||||
|
|
||||||
parsed_url = url_parse (cur_url->url->url, NULL, tmpiri, true);
|
parsed_url = url_parse (cur_url->url->url, NULL, tmpiri, true);
|
||||||
|
|
||||||
char *proxy = getproxy (cur_url->url);
|
proxy = getproxy (cur_url->url);
|
||||||
if ((opt.recursive || opt.page_requisites)
|
if ((opt.recursive || opt.page_requisites)
|
||||||
&& (cur_url->url->scheme != SCHEME_FTP || proxy))
|
&& (cur_url->url->scheme != SCHEME_FTP || proxy))
|
||||||
{
|
{
|
||||||
@ -1285,9 +1287,11 @@ bool
|
|||||||
url_uses_proxy (struct url * u)
|
url_uses_proxy (struct url * u)
|
||||||
{
|
{
|
||||||
bool ret;
|
bool ret;
|
||||||
|
char *proxy;
|
||||||
|
|
||||||
if (!u)
|
if (!u)
|
||||||
return false;
|
return false;
|
||||||
char *proxy = getproxy (u);
|
proxy = getproxy (u);
|
||||||
ret = proxy != NULL;
|
ret = proxy != NULL;
|
||||||
free(proxy);
|
free(proxy);
|
||||||
return ret;
|
return ret;
|
||||||
|
125
src/warc.c
125
src/warc.c
@ -165,10 +165,12 @@ warc_write_buffer (const char *buffer, size_t size)
|
|||||||
static bool
|
static bool
|
||||||
warc_write_string (const char *str)
|
warc_write_string (const char *str)
|
||||||
{
|
{
|
||||||
|
size_t n;
|
||||||
|
|
||||||
if (!warc_write_ok)
|
if (!warc_write_ok)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
size_t n = strlen (str);
|
n = strlen (str);
|
||||||
if (n != warc_write_buffer (str, n))
|
if (n != warc_write_buffer (str, n))
|
||||||
warc_write_ok = false;
|
warc_write_ok = false;
|
||||||
|
|
||||||
@ -257,6 +259,9 @@ warc_write_block_from_file (FILE *data_in)
|
|||||||
{
|
{
|
||||||
/* Add the Content-Length header. */
|
/* Add the Content-Length header. */
|
||||||
char content_length[MAX_INT_TO_STRING_LEN(off_t)];
|
char content_length[MAX_INT_TO_STRING_LEN(off_t)];
|
||||||
|
char buffer[BUFSIZ];
|
||||||
|
size_t s;
|
||||||
|
|
||||||
fseeko (data_in, 0L, SEEK_END);
|
fseeko (data_in, 0L, SEEK_END);
|
||||||
number_to_string (content_length, ftello (data_in));
|
number_to_string (content_length, ftello (data_in));
|
||||||
warc_write_header ("Content-Length", content_length);
|
warc_write_header ("Content-Length", content_length);
|
||||||
@ -268,8 +273,6 @@ warc_write_block_from_file (FILE *data_in)
|
|||||||
warc_write_ok = false;
|
warc_write_ok = false;
|
||||||
|
|
||||||
/* Copy the data in the file to the WARC record. */
|
/* Copy the data in the file to the WARC record. */
|
||||||
char buffer[BUFSIZ];
|
|
||||||
size_t s;
|
|
||||||
while (warc_write_ok && (s = fread (buffer, 1, BUFSIZ, data_in)) > 0)
|
while (warc_write_ok && (s = fread (buffer, 1, BUFSIZ, data_in)) > 0)
|
||||||
{
|
{
|
||||||
if (warc_write_buffer (buffer, s) < s)
|
if (warc_write_buffer (buffer, s) < s)
|
||||||
@ -294,6 +297,11 @@ warc_write_end_record (void)
|
|||||||
/* We start a new gzip stream for each record. */
|
/* We start a new gzip stream for each record. */
|
||||||
if (warc_write_ok && warc_current_gzfile)
|
if (warc_write_ok && warc_current_gzfile)
|
||||||
{
|
{
|
||||||
|
char extra_header[EXTRA_GZIP_HEADER_SIZE];
|
||||||
|
char static_header[GZIP_STATIC_HEADER_SIZE];
|
||||||
|
off_t current_offset, uncompressed_size, compressed_size;
|
||||||
|
size_t result;
|
||||||
|
|
||||||
if (gzclose (warc_current_gzfile) != Z_OK)
|
if (gzclose (warc_current_gzfile) != Z_OK)
|
||||||
{
|
{
|
||||||
warc_write_ok = false;
|
warc_write_ok = false;
|
||||||
@ -319,17 +327,16 @@ warc_write_end_record (void)
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
/* Calculate the uncompressed and compressed sizes. */
|
/* Calculate the uncompressed and compressed sizes. */
|
||||||
off_t current_offset = ftello (warc_current_file);
|
current_offset = ftello (warc_current_file);
|
||||||
off_t uncompressed_size = current_offset - warc_current_gzfile_offset;
|
uncompressed_size = current_offset - warc_current_gzfile_offset;
|
||||||
off_t compressed_size = warc_current_gzfile_uncompressed_size;
|
compressed_size = warc_current_gzfile_uncompressed_size;
|
||||||
|
|
||||||
/* Go back to the static GZIP header. */
|
/* Go back to the static GZIP header. */
|
||||||
fseeko (warc_current_file, warc_current_gzfile_offset
|
fseeko (warc_current_file, warc_current_gzfile_offset
|
||||||
+ EXTRA_GZIP_HEADER_SIZE, SEEK_SET);
|
+ EXTRA_GZIP_HEADER_SIZE, SEEK_SET);
|
||||||
|
|
||||||
/* Read the header. */
|
/* Read the header. */
|
||||||
char static_header[GZIP_STATIC_HEADER_SIZE];
|
result = fread (static_header, 1, GZIP_STATIC_HEADER_SIZE,
|
||||||
size_t result = fread (static_header, 1, GZIP_STATIC_HEADER_SIZE,
|
|
||||||
warc_current_file);
|
warc_current_file);
|
||||||
if (result != GZIP_STATIC_HEADER_SIZE)
|
if (result != GZIP_STATIC_HEADER_SIZE)
|
||||||
{
|
{
|
||||||
@ -346,7 +353,6 @@ warc_write_end_record (void)
|
|||||||
fwrite (static_header, 1, GZIP_STATIC_HEADER_SIZE, warc_current_file);
|
fwrite (static_header, 1, GZIP_STATIC_HEADER_SIZE, warc_current_file);
|
||||||
|
|
||||||
/* Prepare the extra GZIP header. */
|
/* Prepare the extra GZIP header. */
|
||||||
char extra_header[EXTRA_GZIP_HEADER_SIZE];
|
|
||||||
/* XLEN, the length of the extra header fields. */
|
/* XLEN, the length of the extra header fields. */
|
||||||
extra_header[0] = ((EXTRA_GZIP_HEADER_SIZE - 2) & 255);
|
extra_header[0] = ((EXTRA_GZIP_HEADER_SIZE - 2) & 255);
|
||||||
extra_header[1] = ((EXTRA_GZIP_HEADER_SIZE - 2) >> 8) & 255;
|
extra_header[1] = ((EXTRA_GZIP_HEADER_SIZE - 2) >> 8) & 255;
|
||||||
@ -660,16 +666,18 @@ warc_uuid_str (char *urn_str)
|
|||||||
static bool
|
static bool
|
||||||
warc_write_warcinfo_record (char *filename)
|
warc_write_warcinfo_record (char *filename)
|
||||||
{
|
{
|
||||||
|
FILE *warc_tmp;
|
||||||
|
char timestamp[22];
|
||||||
|
char *filename_copy, *filename_basename;
|
||||||
|
|
||||||
/* Write warc-info record as the first record of the file. */
|
/* Write warc-info record as the first record of the file. */
|
||||||
/* We add the record id of this info record to the other records in the
|
/* We add the record id of this info record to the other records in the
|
||||||
file. */
|
file. */
|
||||||
warc_current_warcinfo_uuid_str = (char *) malloc (48);
|
warc_current_warcinfo_uuid_str = (char *) malloc (48);
|
||||||
warc_uuid_str (warc_current_warcinfo_uuid_str);
|
warc_uuid_str (warc_current_warcinfo_uuid_str);
|
||||||
|
|
||||||
char timestamp[22];
|
|
||||||
warc_timestamp (timestamp);
|
warc_timestamp (timestamp);
|
||||||
|
|
||||||
char *filename_copy, *filename_basename;
|
|
||||||
filename_copy = strdup (filename);
|
filename_copy = strdup (filename);
|
||||||
filename_basename = strdup (basename (filename_copy));
|
filename_basename = strdup (basename (filename_copy));
|
||||||
|
|
||||||
@ -681,7 +689,7 @@ warc_write_warcinfo_record (char *filename)
|
|||||||
warc_write_header ("WARC-Filename", filename_basename);
|
warc_write_header ("WARC-Filename", filename_basename);
|
||||||
|
|
||||||
/* Create content. */
|
/* Create content. */
|
||||||
FILE *warc_tmp = warc_tempfile ();
|
warc_tmp = warc_tempfile ();
|
||||||
if (warc_tmp == NULL)
|
if (warc_tmp == NULL)
|
||||||
{
|
{
|
||||||
free (filename_copy);
|
free (filename_copy);
|
||||||
@ -731,22 +739,6 @@ warc_write_warcinfo_record (char *filename)
|
|||||||
static bool
|
static bool
|
||||||
warc_start_new_file (bool meta)
|
warc_start_new_file (bool meta)
|
||||||
{
|
{
|
||||||
if (opt.warc_filename == NULL)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
if (warc_current_file != NULL)
|
|
||||||
fclose (warc_current_file);
|
|
||||||
|
|
||||||
free (warc_current_warcinfo_uuid_str);
|
|
||||||
free (warc_current_filename);
|
|
||||||
|
|
||||||
warc_current_file_number++;
|
|
||||||
|
|
||||||
int base_filename_length = strlen (opt.warc_filename);
|
|
||||||
/* filename format: base + "-" + 5 digit serial number + ".warc.gz" */
|
|
||||||
char *new_filename = malloc (base_filename_length + 1 + 5 + 8 + 1);
|
|
||||||
warc_current_filename = new_filename;
|
|
||||||
|
|
||||||
#ifdef __VMS
|
#ifdef __VMS
|
||||||
# define WARC_GZ "warc-gz"
|
# define WARC_GZ "warc-gz"
|
||||||
#else /* def __VMS */
|
#else /* def __VMS */
|
||||||
@ -759,6 +751,25 @@ warc_start_new_file (bool meta)
|
|||||||
const char *extension = "warc";
|
const char *extension = "warc";
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
int base_filename_length;
|
||||||
|
char *new_filename;
|
||||||
|
|
||||||
|
if (opt.warc_filename == NULL)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (warc_current_file != NULL)
|
||||||
|
fclose (warc_current_file);
|
||||||
|
|
||||||
|
free (warc_current_warcinfo_uuid_str);
|
||||||
|
free (warc_current_filename);
|
||||||
|
|
||||||
|
warc_current_file_number++;
|
||||||
|
|
||||||
|
base_filename_length = strlen (opt.warc_filename);
|
||||||
|
/* filename format: base + "-" + 5 digit serial number + ".warc.gz" */
|
||||||
|
new_filename = malloc (base_filename_length + 1 + 5 + 8 + 1);
|
||||||
|
warc_current_filename = new_filename;
|
||||||
|
|
||||||
/* If max size is enabled, we add a serial number to the file names. */
|
/* If max size is enabled, we add a serial number to the file names. */
|
||||||
if (meta)
|
if (meta)
|
||||||
sprintf (new_filename, "%s-meta.%s", opt.warc_filename, extension);
|
sprintf (new_filename, "%s-meta.%s", opt.warc_filename, extension);
|
||||||
@ -830,12 +841,13 @@ static bool
|
|||||||
warc_parse_cdx_header (char *lineptr, int *field_num_original_url,
|
warc_parse_cdx_header (char *lineptr, int *field_num_original_url,
|
||||||
int *field_num_checksum, int *field_num_record_id)
|
int *field_num_checksum, int *field_num_record_id)
|
||||||
{
|
{
|
||||||
|
char *token;
|
||||||
|
char *save_ptr;
|
||||||
|
|
||||||
*field_num_original_url = -1;
|
*field_num_original_url = -1;
|
||||||
*field_num_checksum = -1;
|
*field_num_checksum = -1;
|
||||||
*field_num_record_id = -1;
|
*field_num_record_id = -1;
|
||||||
|
|
||||||
char *token;
|
|
||||||
char *save_ptr;
|
|
||||||
token = strtok_r (lineptr, CDX_FIELDSEP, &save_ptr);
|
token = strtok_r (lineptr, CDX_FIELDSEP, &save_ptr);
|
||||||
|
|
||||||
if (token != NULL && strcmp (token, "CDX") == 0)
|
if (token != NULL && strcmp (token, "CDX") == 0)
|
||||||
@ -876,13 +888,12 @@ warc_process_cdx_line (char *lineptr, int field_num_original_url,
|
|||||||
char *original_url = NULL;
|
char *original_url = NULL;
|
||||||
char *checksum = NULL;
|
char *checksum = NULL;
|
||||||
char *record_id = NULL;
|
char *record_id = NULL;
|
||||||
|
|
||||||
char *token;
|
char *token;
|
||||||
char *save_ptr;
|
char *save_ptr;
|
||||||
token = strtok_r (lineptr, CDX_FIELDSEP, &save_ptr);
|
int field_num = 0;
|
||||||
|
|
||||||
/* Read this line to get the fields we need. */
|
/* Read this line to get the fields we need. */
|
||||||
int field_num = 0;
|
token = strtok_r (lineptr, CDX_FIELDSEP, &save_ptr);
|
||||||
while (token != NULL)
|
while (token != NULL)
|
||||||
{
|
{
|
||||||
char **val;
|
char **val;
|
||||||
@ -944,17 +955,17 @@ warc_process_cdx_line (char *lineptr, int field_num_original_url,
|
|||||||
static bool
|
static bool
|
||||||
warc_load_cdx_dedup_file (void)
|
warc_load_cdx_dedup_file (void)
|
||||||
{
|
{
|
||||||
FILE *f = fopen (opt.warc_cdx_dedup_filename, "r");
|
FILE *f;
|
||||||
if (f == NULL)
|
char *lineptr = NULL;
|
||||||
return false;
|
size_t n = 0;
|
||||||
|
ssize_t line_length;
|
||||||
int field_num_original_url = -1;
|
int field_num_original_url = -1;
|
||||||
int field_num_checksum = -1;
|
int field_num_checksum = -1;
|
||||||
int field_num_record_id = -1;
|
int field_num_record_id = -1;
|
||||||
|
|
||||||
char *lineptr = NULL;
|
f = fopen (opt.warc_cdx_dedup_filename, "r");
|
||||||
size_t n = 0;
|
if (f == NULL)
|
||||||
ssize_t line_length;
|
return false;
|
||||||
|
|
||||||
/* The first line should contain the CDX header.
|
/* The first line should contain the CDX header.
|
||||||
Format: " CDX x x x x x"
|
Format: " CDX x x x x x"
|
||||||
@ -983,6 +994,8 @@ _("CDX file does not list record ids. (Missing column 'u'.)\n"));
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
int nrecords;
|
||||||
|
|
||||||
/* Initialize the table. */
|
/* Initialize the table. */
|
||||||
warc_cdx_dedup_table = hash_table_new (1000, warc_hash_sha1_digest,
|
warc_cdx_dedup_table = hash_table_new (1000, warc_hash_sha1_digest,
|
||||||
warc_cmp_sha1_digest);
|
warc_cmp_sha1_digest);
|
||||||
@ -1000,7 +1013,7 @@ _("CDX file does not list record ids. (Missing column 'u'.)\n"));
|
|||||||
while (line_length != -1);
|
while (line_length != -1);
|
||||||
|
|
||||||
/* Print results. */
|
/* Print results. */
|
||||||
int nrecords = hash_table_count (warc_cdx_dedup_table);
|
nrecords = hash_table_count (warc_cdx_dedup_table);
|
||||||
logprintf (LOG_VERBOSE, ngettext ("Loaded %d record from CDX.\n\n",
|
logprintf (LOG_VERBOSE, ngettext ("Loaded %d record from CDX.\n\n",
|
||||||
"Loaded %d records from CDX.\n\n",
|
"Loaded %d records from CDX.\n\n",
|
||||||
nrecords),
|
nrecords),
|
||||||
@ -1020,11 +1033,12 @@ _("CDX file does not list record ids. (Missing column 'u'.)\n"));
|
|||||||
static struct warc_cdx_record *
|
static struct warc_cdx_record *
|
||||||
warc_find_duplicate_cdx_record (char *url, char *sha1_digest_payload)
|
warc_find_duplicate_cdx_record (char *url, char *sha1_digest_payload)
|
||||||
{
|
{
|
||||||
|
struct warc_cdx_record *rec_existing;
|
||||||
|
|
||||||
if (warc_cdx_dedup_table == NULL)
|
if (warc_cdx_dedup_table == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
struct warc_cdx_record *rec_existing
|
rec_existing = hash_table_get (warc_cdx_dedup_table, sha1_digest_payload);
|
||||||
= hash_table_get (warc_cdx_dedup_table, sha1_digest_payload);
|
|
||||||
|
|
||||||
if (rec_existing && strcmp (rec_existing->url, url) == 0)
|
if (rec_existing && strcmp (rec_existing->url, url) == 0)
|
||||||
return rec_existing;
|
return rec_existing;
|
||||||
@ -1095,11 +1109,13 @@ warc_init (void)
|
|||||||
static void
|
static void
|
||||||
warc_write_metadata (void)
|
warc_write_metadata (void)
|
||||||
{
|
{
|
||||||
|
char manifest_uuid[48];
|
||||||
|
FILE *warc_tmp_fp;
|
||||||
|
|
||||||
/* If there are multiple WARC files, the metadata should be written to a separate file. */
|
/* If there are multiple WARC files, the metadata should be written to a separate file. */
|
||||||
if (opt.warc_maxsize > 0)
|
if (opt.warc_maxsize > 0)
|
||||||
warc_start_new_file (true);
|
warc_start_new_file (true);
|
||||||
|
|
||||||
char manifest_uuid [48];
|
|
||||||
warc_uuid_str (manifest_uuid);
|
warc_uuid_str (manifest_uuid);
|
||||||
|
|
||||||
fflush (warc_manifest_fp);
|
fflush (warc_manifest_fp);
|
||||||
@ -1109,7 +1125,7 @@ warc_write_metadata (void)
|
|||||||
warc_manifest_fp, -1);
|
warc_manifest_fp, -1);
|
||||||
/* warc_write_resource_record has closed warc_manifest_fp. */
|
/* warc_write_resource_record has closed warc_manifest_fp. */
|
||||||
|
|
||||||
FILE * warc_tmp_fp = warc_tempfile ();
|
warc_tmp_fp = warc_tempfile ();
|
||||||
if (warc_tmp_fp == NULL)
|
if (warc_tmp_fp == NULL)
|
||||||
{
|
{
|
||||||
logprintf (LOG_NOTQUIET, _("Could not open temporary WARC file.\n"));
|
logprintf (LOG_NOTQUIET, _("Could not open temporary WARC file.\n"));
|
||||||
@ -1164,6 +1180,8 @@ FILE *
|
|||||||
warc_tempfile (void)
|
warc_tempfile (void)
|
||||||
{
|
{
|
||||||
char filename[100];
|
char filename[100];
|
||||||
|
int fd;
|
||||||
|
|
||||||
if (path_search (filename, 100, opt.warc_tempdir, "wget", true) == -1)
|
if (path_search (filename, 100, opt.warc_tempdir, "wget", true) == -1)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
@ -1182,7 +1200,7 @@ warc_tempfile (void)
|
|||||||
return fopen (tfn, "w+", "fop=tmd"); /* Create auto-delete temp file. */
|
return fopen (tfn, "w+", "fop=tmd"); /* Create auto-delete temp file. */
|
||||||
}
|
}
|
||||||
#else /* def __VMS */
|
#else /* def __VMS */
|
||||||
int fd = mkostemp (filename, O_TEMPORARY);
|
fd = mkostemp (filename, O_TEMPORARY);
|
||||||
if (fd < 0)
|
if (fd < 0)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
@ -1246,6 +1264,9 @@ warc_write_cdx_record (const char *url, const char *timestamp_str,
|
|||||||
{
|
{
|
||||||
/* Transform the timestamp. */
|
/* Transform the timestamp. */
|
||||||
char timestamp_str_cdx[15];
|
char timestamp_str_cdx[15];
|
||||||
|
char offset_string[MAX_INT_TO_STRING_LEN(off_t)];
|
||||||
|
const char *checksum;
|
||||||
|
|
||||||
memcpy (timestamp_str_cdx , timestamp_str , 4); /* "YYYY" "-" */
|
memcpy (timestamp_str_cdx , timestamp_str , 4); /* "YYYY" "-" */
|
||||||
memcpy (timestamp_str_cdx + 4, timestamp_str + 5, 2); /* "mm" "-" */
|
memcpy (timestamp_str_cdx + 4, timestamp_str + 5, 2); /* "mm" "-" */
|
||||||
memcpy (timestamp_str_cdx + 6, timestamp_str + 8, 2); /* "dd" "T" */
|
memcpy (timestamp_str_cdx + 6, timestamp_str + 8, 2); /* "dd" "T" */
|
||||||
@ -1255,7 +1276,6 @@ warc_write_cdx_record (const char *url, const char *timestamp_str,
|
|||||||
timestamp_str_cdx[14] = '\0';
|
timestamp_str_cdx[14] = '\0';
|
||||||
|
|
||||||
/* Rewrite the checksum. */
|
/* Rewrite the checksum. */
|
||||||
const char *checksum;
|
|
||||||
if (payload_digest != NULL)
|
if (payload_digest != NULL)
|
||||||
checksum = payload_digest + 5; /* Skip the "sha1:" */
|
checksum = payload_digest + 5; /* Skip the "sha1:" */
|
||||||
else
|
else
|
||||||
@ -1266,7 +1286,6 @@ warc_write_cdx_record (const char *url, const char *timestamp_str,
|
|||||||
if (redirect_location == NULL || strlen(redirect_location) == 0)
|
if (redirect_location == NULL || strlen(redirect_location) == 0)
|
||||||
redirect_location = "-";
|
redirect_location = "-";
|
||||||
|
|
||||||
char offset_string[MAX_INT_TO_STRING_LEN(off_t)];
|
|
||||||
number_to_string (offset_string, offset);
|
number_to_string (offset_string, offset);
|
||||||
|
|
||||||
/* Print the CDX line. */
|
/* Print the CDX line. */
|
||||||
@ -1298,10 +1317,11 @@ warc_write_revisit_record (char *url, char *timestamp_str,
|
|||||||
char *refers_to, ip_address *ip, FILE *body)
|
char *refers_to, ip_address *ip, FILE *body)
|
||||||
{
|
{
|
||||||
char revisit_uuid [48];
|
char revisit_uuid [48];
|
||||||
warc_uuid_str (revisit_uuid);
|
|
||||||
|
|
||||||
char *block_digest = NULL;
|
char *block_digest = NULL;
|
||||||
char sha1_res_block[SHA1_DIGEST_SIZE];
|
char sha1_res_block[SHA1_DIGEST_SIZE];
|
||||||
|
|
||||||
|
warc_uuid_str (revisit_uuid);
|
||||||
|
|
||||||
sha1_stream (body, sha1_res_block);
|
sha1_stream (body, sha1_res_block);
|
||||||
block_digest = warc_base32_sha1_digest (sha1_res_block);
|
block_digest = warc_base32_sha1_digest (sha1_res_block);
|
||||||
|
|
||||||
@ -1351,6 +1371,8 @@ warc_write_response_record (char *url, char *timestamp_str,
|
|||||||
char *payload_digest = NULL;
|
char *payload_digest = NULL;
|
||||||
char sha1_res_block[SHA1_DIGEST_SIZE];
|
char sha1_res_block[SHA1_DIGEST_SIZE];
|
||||||
char sha1_res_payload[SHA1_DIGEST_SIZE];
|
char sha1_res_payload[SHA1_DIGEST_SIZE];
|
||||||
|
char response_uuid [48];
|
||||||
|
off_t offset;
|
||||||
|
|
||||||
if (opt.warc_digests_enabled)
|
if (opt.warc_digests_enabled)
|
||||||
{
|
{
|
||||||
@ -1395,11 +1417,10 @@ warc_write_response_record (char *url, char *timestamp_str,
|
|||||||
|
|
||||||
/* Not a revisit, just store the record. */
|
/* Not a revisit, just store the record. */
|
||||||
|
|
||||||
char response_uuid [48];
|
|
||||||
warc_uuid_str (response_uuid);
|
warc_uuid_str (response_uuid);
|
||||||
|
|
||||||
fseeko (warc_current_file, 0L, SEEK_END);
|
fseeko (warc_current_file, 0L, SEEK_END);
|
||||||
off_t offset = ftello (warc_current_file);
|
offset = ftello (warc_current_file);
|
||||||
|
|
||||||
warc_write_start_record ();
|
warc_write_start_record ();
|
||||||
warc_write_header ("WARC-Type", "response");
|
warc_write_header ("WARC-Type", "response");
|
||||||
|
Loading…
Reference in New Issue
Block a user