From ea3745e8dc2f63cc26557f111f77374dc63e5976 Mon Sep 17 00:00:00 2001 From: gerel Date: Sun, 1 Feb 2009 15:03:51 -0300 Subject: [PATCH] removed some more calls --- src/main.c | 55 ++++++++++++++++++++++++++++-------------------- src/recur.c | 18 +++------------- src/recur.h | 4 +++- src/res.c | 20 ++++++++++++++++-- src/retr.c | 60 ++++++++++++++++++++++++++++++++--------------------- src/retr.h | 6 ++++-- 6 files changed, 97 insertions(+), 66 deletions(-) diff --git a/src/main.c b/src/main.c index 02ecb1d6..73882918 100644 --- a/src/main.c +++ b/src/main.c @@ -1178,34 +1178,45 @@ WARNING: Can't reopen standard output in binary mode;\n\ for (t = url; *t; t++) { char *filename = NULL, *redirected_URL = NULL; - int dt; + int dt, url_err; + struct url *url_parsed = url_parse (*t, &url_err); - if ((opt.recursive || opt.page_requisites) - && (url_scheme (*t) != SCHEME_FTP || url_uses_proxy (*t))) + if (!url_parsed) { - int old_follow_ftp = opt.follow_ftp; - - /* Turn opt.follow_ftp on in case of recursive FTP retrieval */ - if (url_scheme (*t) == SCHEME_FTP) - opt.follow_ftp = 1; - - status = retrieve_tree (*t); - - opt.follow_ftp = old_follow_ftp; + char *error = url_error (*t, url_err); + logprintf (LOG_NOTQUIET, "%s: %s.\n",*t, error); + xfree (error); + status = URLERROR; } else - status = retrieve_url (*t, &filename, &redirected_URL, NULL, &dt, opt.recursive); - - if (opt.delete_after && file_exists_p(filename)) { - DEBUGP (("Removing file due to --delete-after in main():\n")); - logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename); - if (unlink (filename)) - logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno)); - } + if ((opt.recursive || opt.page_requisites) + && (url_scheme (*t) != SCHEME_FTP || url_uses_proxy (url_parsed))) + { + int old_follow_ftp = opt.follow_ftp; - xfree_null (redirected_URL); - xfree_null (filename); + /* Turn opt.follow_ftp on in case of recursive FTP retrieval */ + if (url_scheme (*t) == SCHEME_FTP) + opt.follow_ftp = 1; + + status = retrieve_tree (url_parsed); + + opt.follow_ftp = old_follow_ftp; + } + else + status = retrieve_url (url_parsed, *t, &filename, &redirected_URL, NULL, &dt, opt.recursive); + + if (opt.delete_after && file_exists_p(filename)) + { + DEBUGP (("Removing file due to --delete-after in main():\n")); + logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename); + if (unlink (filename)) + logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno)); + } + xfree_null (redirected_URL); + xfree_null (filename); + url_free (url_parsed); + } } /* And then from the input file, if any. */ diff --git a/src/recur.c b/src/recur.c index 98e7dc49..2e067505 100644 --- a/src/recur.c +++ b/src/recur.c @@ -180,7 +180,7 @@ static bool descend_redirect_p (const char *, struct url *, int, options, add it to the queue. */ uerr_t -retrieve_tree (const char *start_url) +retrieve_tree (struct url *start_url_parsed) { uerr_t status = RETROK; @@ -191,17 +191,6 @@ retrieve_tree (const char *start_url) the queue, but haven't been downloaded yet. */ struct hash_table *blacklist; - int up_error_code; - struct url *start_url_parsed = url_parse (start_url, &up_error_code); - - if (!start_url_parsed) - { - char *error = url_error (start_url, up_error_code); - logprintf (LOG_NOTQUIET, "%s: %s.\n", start_url, error); - xfree (error); - return URLERROR; - } - queue = url_queue_new (); blacklist = make_string_hash_table (0); @@ -277,7 +266,8 @@ retrieve_tree (const char *start_url) } else { - status = retrieve_url (url, &file, &redirected, referer, &dt, false); + status = retrieve_url (url_parsed, url, &file, &redirected, + referer, &dt, false); } if (html_allowed && file && status == RETROK @@ -451,8 +441,6 @@ retrieve_tree (const char *start_url) } url_queue_delete (queue); - if (start_url_parsed) - url_free (start_url_parsed); string_set_free (blacklist); if (opt.quota && total_downloaded_bytes > opt.quota) diff --git a/src/recur.h b/src/recur.h index 5ab26a95..40a8c2de 100644 --- a/src/recur.h +++ b/src/recur.h @@ -31,6 +31,8 @@ as that of the covered work. */ #ifndef RECUR_H #define RECUR_H +#include "url.h" + /* For most options, 0 means no limits, but with -p in the picture, that causes a problem on the maximum recursion depth variable. To retain backwards compatibility we allow users to consider "0" to be @@ -42,6 +44,6 @@ as that of the covered work. */ struct urlpos; void recursive_cleanup (void); -uerr_t retrieve_tree (const char *); +uerr_t retrieve_tree (struct url *); #endif /* RECUR_H */ diff --git a/src/res.c b/src/res.c index 8c35f0e1..20ffe1c8 100644 --- a/src/res.c +++ b/src/res.c @@ -537,13 +537,29 @@ res_retrieve_file (const char *url, char **file) uerr_t err; char *robots_url = uri_merge (url, RES_SPECS_LOCATION); int saved_ts_val = opt.timestamping; - int saved_sp_val = opt.spider; + int saved_sp_val = opt.spider, url_err; + struct url * url_parsed; logputs (LOG_VERBOSE, _("Loading robots.txt; please ignore errors.\n")); *file = NULL; opt.timestamping = false; opt.spider = false; - err = retrieve_url (robots_url, file, NULL, NULL, NULL, false); + + url_parsed = url_parse (robots_url, &url_err); + if (!url_parsed) + { + char *error = url_error (robots_url, url_err); + logprintf (LOG_NOTQUIET, "%s: %s.\n", robots_url, error); + xfree (error); + err = URLERROR; + } + else + { + err = retrieve_url (url_parsed, robots_url, file, NULL, NULL, NULL, + false); + url_free(url_parsed); + } + opt.timestamping = saved_ts_val; opt.spider = saved_sp_val; xfree (robots_url); diff --git a/src/retr.c b/src/retr.c index 21c9002e..4fabd757 100644 --- a/src/retr.c +++ b/src/retr.c @@ -596,15 +596,15 @@ static char *getproxy (struct url *); multiple points. */ uerr_t -retrieve_url (const char *origurl, char **file, char **newloc, - const char *refurl, int *dt, bool recursive) +retrieve_url (struct url * orig_parsed, const char *origurl, char **file, + char **newloc, const char *refurl, int *dt, bool recursive) { uerr_t result; char *url; bool location_changed; int dummy; char *mynewloc, *proxy; - struct url *u, *proxy_url; + struct url *u = orig_parsed, *proxy_url; int up_error_code; /* url parse error code */ char *local_file; int redirection_count = 0; @@ -625,16 +625,6 @@ retrieve_url (const char *origurl, char **file, char **newloc, if (file) *file = NULL; - u = url_parse (url, &up_error_code); - if (!u) - { - char *error = url_error (url, up_error_code); - logprintf (LOG_NOTQUIET, "%s: %s.\n", url, error); - xfree (url); - xfree (error); - return URLERROR; - } - if (!refurl) refurl = opt.referer; @@ -733,7 +723,10 @@ retrieve_url (const char *origurl, char **file, char **newloc, char *error = url_error (mynewloc, up_error_code); logprintf (LOG_NOTQUIET, "%s: %s.\n", escnonprint_uri (mynewloc), error); - url_free (u); + if (orig_parsed != u) + { + url_free (u); + } xfree (url); xfree (mynewloc); xfree (error); @@ -753,7 +746,10 @@ retrieve_url (const char *origurl, char **file, char **newloc, logprintf (LOG_NOTQUIET, _("%d redirections exceeded.\n"), opt.max_redirect); url_free (newloc_parsed); - url_free (u); + if (orig_parsed != u) + { + url_free (u); + } xfree (url); xfree (mynewloc); RESTORE_POST_DATA; @@ -762,7 +758,10 @@ retrieve_url (const char *origurl, char **file, char **newloc, xfree (url); url = mynewloc; - url_free (u); + if (orig_parsed != u) + { + url_free (u); + } u = newloc_parsed; /* If we're being redirected from POST, we don't want to POST @@ -795,7 +794,10 @@ retrieve_url (const char *origurl, char **file, char **newloc, else xfree_null (local_file); - url_free (u); + if (orig_parsed != u) + { + url_free (u); + } if (redirection_count) { @@ -836,13 +838,22 @@ retrieve_from_file (const char *file, bool html, int *count) if (url_has_scheme (url)) { - int dt; + int dt,url_err; uerr_t status; + struct url * url_parsed = url_parse(url, &url_err); + + if (!url_parsed) + { + char *error = url_error (url, url_err); + logprintf (LOG_NOTQUIET, "%s: %s.\n", url, error); + xfree (error); + return URLERROR; + } if (!opt.base_href) opt.base_href = xstrdup (url); - status = retrieve_url (url, &input_file, NULL, NULL, &dt, false); + status = retrieve_url (url_parsed, url, &input_file, NULL, NULL, &dt, false); if (status != RETROK) return status; @@ -877,12 +888,15 @@ retrieve_from_file (const char *file, bool html, int *count) if (cur_url->url->scheme == SCHEME_FTP) opt.follow_ftp = 1; - status = retrieve_tree (cur_url->url->url); + status = retrieve_tree (cur_url->url); opt.follow_ftp = old_follow_ftp; } else - status = retrieve_url (cur_url->url->url, &filename, &new_file, NULL, &dt, opt.recursive); + { + status = retrieve_url (cur_url->url, cur_url->url->url, &filename, + &new_file, NULL, &dt, opt.recursive); + } if (filename && opt.delete_after && file_exists_p (filename)) { @@ -1050,14 +1064,12 @@ getproxy (struct url *u) /* Returns true if URL would be downloaded through a proxy. */ bool -url_uses_proxy (const char *url) +url_uses_proxy (struct url * u) { bool ret; - struct url *u = url_parse (url, NULL); if (!u) return false; ret = getproxy (u) != NULL; - url_free (u); return ret; } diff --git a/src/retr.h b/src/retr.h index ec55cfda..72be93b7 100644 --- a/src/retr.h +++ b/src/retr.h @@ -31,6 +31,8 @@ as that of the covered work. */ #ifndef RETR_H #define RETR_H +#include "url.h" + /* These global vars should be made static to retr.c and exported via functions! */ extern SUM_SIZE_INT total_downloaded_bytes; @@ -51,7 +53,7 @@ typedef const char *(*hunk_terminator_t) (const char *, const char *, int); char *fd_read_hunk (int, hunk_terminator_t, long, long); char *fd_read_line (int); -uerr_t retrieve_url (const char *, char **, char **, const char *, int *, bool); +uerr_t retrieve_url (struct url *, const char *, char **, char **, const char *, int *, bool); uerr_t retrieve_from_file (const char *, bool, int *); const char *retr_rate (wgint, double); @@ -62,6 +64,6 @@ void sleep_between_retrievals (int); void rotate_backups (const char *); -bool url_uses_proxy (const char *); +bool url_uses_proxy (struct url *); #endif /* RETR_H */