From 3a8c75cac48ba8c7a72c5a666a315a0204eb0f4a Mon Sep 17 00:00:00 2001 From: dan Date: Thu, 2 Mar 2000 14:48:07 -0800 Subject: [PATCH] [svn] Dan Berger's query string patch is totally bogus. If you have two different URLs, gen_page.cgi?page1 and get_page.cgi?page2, they'll both be saved as get_page.cgi and the second will overwrite the first. Also, parameters to implicit CGIs, like "http://www.host.com/db/?2000-03-02" cause the URLs to be printed with trailing garbage characters, and could seg fault. I'm not sure what Dan had in mind with this patch (no explanatory comments), but I'm removing it for now. If he can rewrite it so it doesn't break stuff, okay. --- src/ChangeLog | 18 +++++++++++++++++- src/http.c | 9 +++------ src/url.c | 32 ++++---------------------------- 3 files changed, 24 insertions(+), 35 deletions(-) diff --git a/src/ChangeLog b/src/ChangeLog index 462ccceb..bdc94cde 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -3,7 +3,16 @@ * ftp.c (ftp_loop_internal): Heiko introduced "suggest explicit braces to avoid ambiguous `else'" warnings. Eliminated them. - * http.c (http_loop): Heiko introduced "suggest explicit + * http.c (gethttp): Dan Berger's query string patch is totally + bogus. If you have two different URLs, gen_page.cgi?page1 and + get_page.cgi?page2, they'll both be saved as get_page.cgi and the + second will overwrite the first. Also, parameters to implicit + CGIs, like "http://www.host.com/db/?2000-03-02" cause the URLs to + be printed with trailing garbage characters, and could seg fault. + I'm not sure what Dan had in mind with this patch (no explanatory + comments), but I'm removing it for now. If he can rewrite it so + it doesn't break stuff, okay. + (http_loop): Heiko introduced "suggest explicit braces to avoid ambiguous `else'" warnings. Eliminated them. * main.c: Heiko's --wait / --waitretry backwards compatibility @@ -11,6 +20,13 @@ 'wr' was used without being initialized, and a long int was passed into setval()'s char* val parameter. + * recur.c (parse_robots): Applied Edward J. Sabol + 's patch for Guan Yang's reported + problem with "User-agent:*" lines in robots.txt. + + * url.c (parseurl, str_url): Removing Dan Berger's code (see + http.c above for explanation). + 1999-08-25 Heiko Herold * ftp.c: Respect new option waitretry. diff --git a/src/http.c b/src/http.c index 8f542423..aaaeb580 100644 --- a/src/http.c +++ b/src/http.c @@ -303,7 +303,7 @@ static time_t http_atotm PARAMS ((char *)); static uerr_t gethttp (struct urlinfo *u, struct http_stat *hs, int *dt) { - char *request, *type, *command, *path, *qstring; + char *request, *type, *command, *path; char *user, *passwd; char *pragma_h, *referer, *useragent, *range, *wwwauth, *remhost; char *authenticate_h; @@ -385,8 +385,6 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt) else path = u->path; - qstring = u->qstring; - command = (*dt & HEAD_ONLY) ? "HEAD" : "GET"; referer = NULL; if (ou->referer) @@ -470,7 +468,6 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt) /* Allocate the memory for the request. */ request = (char *)alloca (strlen (command) + strlen (path) - + (qstring ? strlen (qstring) : 0) + strlen (useragent) + strlen (remhost) + host_port_len + strlen (HTTP_ACCEPT) @@ -483,12 +480,12 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt) + 64); /* Construct the request. */ sprintf (request, "\ -%s %s%s HTTP/1.0\r\n\ +%s %s HTTP/1.0\r\n\ User-Agent: %s\r\n\ Host: %s%s\r\n\ Accept: %s\r\n\ %s%s%s%s%s%s\r\n", - command, path, qstring ? qstring : "", useragent, remhost, + command, path, useragent, remhost, host_port ? host_port : "", HTTP_ACCEPT, referer ? referer : "", wwwauth ? wwwauth : "", diff --git a/src/url.c b/src/url.c index b00484e4..33aa37b8 100644 --- a/src/url.c +++ b/src/url.c @@ -458,23 +458,8 @@ parseurl (const char *url, struct urlinfo *u, int strict) if (type == URLHTTP) while (url[i] && url[i] == '/') ++i; - - /* dfb: break "path" into "path" and "qstring" if the URL is HTTP - if it's not an HTTP url, set l to the last character, so the - xmalloc and strncpy work as desired */ - if (type == URLHTTP) { - for (l = i; url[l] && url[l] != '?'; l++); - if (l != strlen(url)) { - /* copy the query string, including the '?' into u->qstring */ - u->qstring = (char *)xmalloc (strlen (url + l) + 8); - strcpy (u->qstring, url + l); - } - } else { - l = strlen(url); - } - - - u->path = strdupdelim (url + i, url + l); + u->path = (char *)xmalloc (strlen (url + i) + 8); + strcpy (u->path, url + i); if (type == URLFTP) { u->ftp_type = process_ftp_type (u->path); @@ -495,8 +480,6 @@ parseurl (const char *url, struct urlinfo *u, int strict) /* Parse the directory. */ parse_dir (u->path, &u->dir, &u->file); DEBUGP (("dir %s -> file %s -> ", u->dir, u->file)); - if (type == URLHTTP && u->qstring) - DEBUGP (("query-string %s -> ", u->qstring)); /* Simplify the directory. */ path_simplify (u->dir); /* Remove the leading `/' in HTTP. */ @@ -643,7 +626,7 @@ char * str_url (const struct urlinfo *u, int hide) { char *res, *host, *user, *passwd, *proto_name, *dir, *file; - int i, l, ln, lu, lh, lp, lf, ld, lq; + int i, l, ln, lu, lh, lp, lf, ld; /* Look for the protocol name. */ for (i = 0; i < ARRAY_SIZE (sup_protos); i++) @@ -684,8 +667,7 @@ str_url (const struct urlinfo *u, int hide) lh = strlen (host); ld = strlen (dir); lf = strlen (file); - lq = (u->proto == URLHTTP && u->qstring) ? strlen (u->qstring) : 0; - res = (char *)xmalloc (ln + lu + lp + lh + ld + lf + lq + 20); /* safe sex */ + res = (char *)xmalloc (ln + lu + lp + lh + ld + lf + 20); /* safe sex */ /* sprintf (res, "%s%s%s%s%s%s:%d/%s%s%s", proto_name, (user ? user : ""), (passwd ? ":" : ""), (passwd ? passwd : ""), (user ? "@" : ""), @@ -716,15 +698,9 @@ str_url (const struct urlinfo *u, int hide) if (*dir) res[l++] = '/'; strcpy (res + l, file); - l += lf; free (host); free (dir); free (file); - if (u->qstring) - { - /* copy in the raw query string to avoid munging arguments */ - memcpy (res + l, u->qstring, lq); - } FREE_MAYBE (user); FREE_MAYBE (passwd); return res;