diff --git a/src/ChangeLog b/src/ChangeLog index 462ccceb..bdc94cde 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -3,7 +3,16 @@ * ftp.c (ftp_loop_internal): Heiko introduced "suggest explicit braces to avoid ambiguous `else'" warnings. Eliminated them. - * http.c (http_loop): Heiko introduced "suggest explicit + * http.c (gethttp): Dan Berger's query string patch is totally + bogus. If you have two different URLs, gen_page.cgi?page1 and + get_page.cgi?page2, they'll both be saved as get_page.cgi and the + second will overwrite the first. Also, parameters to implicit + CGIs, like "http://www.host.com/db/?2000-03-02" cause the URLs to + be printed with trailing garbage characters, and could seg fault. + I'm not sure what Dan had in mind with this patch (no explanatory + comments), but I'm removing it for now. If he can rewrite it so + it doesn't break stuff, okay. + (http_loop): Heiko introduced "suggest explicit braces to avoid ambiguous `else'" warnings. Eliminated them. * main.c: Heiko's --wait / --waitretry backwards compatibility @@ -11,6 +20,13 @@ 'wr' was used without being initialized, and a long int was passed into setval()'s char* val parameter. + * recur.c (parse_robots): Applied Edward J. Sabol + 's patch for Guan Yang's reported + problem with "User-agent:*" lines in robots.txt. + + * url.c (parseurl, str_url): Removing Dan Berger's code (see + http.c above for explanation). + 1999-08-25 Heiko Herold * ftp.c: Respect new option waitretry. diff --git a/src/http.c b/src/http.c index 8f542423..aaaeb580 100644 --- a/src/http.c +++ b/src/http.c @@ -303,7 +303,7 @@ static time_t http_atotm PARAMS ((char *)); static uerr_t gethttp (struct urlinfo *u, struct http_stat *hs, int *dt) { - char *request, *type, *command, *path, *qstring; + char *request, *type, *command, *path; char *user, *passwd; char *pragma_h, *referer, *useragent, *range, *wwwauth, *remhost; char *authenticate_h; @@ -385,8 +385,6 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt) else path = u->path; - qstring = u->qstring; - command = (*dt & HEAD_ONLY) ? "HEAD" : "GET"; referer = NULL; if (ou->referer) @@ -470,7 +468,6 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt) /* Allocate the memory for the request. */ request = (char *)alloca (strlen (command) + strlen (path) - + (qstring ? strlen (qstring) : 0) + strlen (useragent) + strlen (remhost) + host_port_len + strlen (HTTP_ACCEPT) @@ -483,12 +480,12 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt) + 64); /* Construct the request. */ sprintf (request, "\ -%s %s%s HTTP/1.0\r\n\ +%s %s HTTP/1.0\r\n\ User-Agent: %s\r\n\ Host: %s%s\r\n\ Accept: %s\r\n\ %s%s%s%s%s%s\r\n", - command, path, qstring ? qstring : "", useragent, remhost, + command, path, useragent, remhost, host_port ? host_port : "", HTTP_ACCEPT, referer ? referer : "", wwwauth ? wwwauth : "", diff --git a/src/url.c b/src/url.c index b00484e4..33aa37b8 100644 --- a/src/url.c +++ b/src/url.c @@ -458,23 +458,8 @@ parseurl (const char *url, struct urlinfo *u, int strict) if (type == URLHTTP) while (url[i] && url[i] == '/') ++i; - - /* dfb: break "path" into "path" and "qstring" if the URL is HTTP - if it's not an HTTP url, set l to the last character, so the - xmalloc and strncpy work as desired */ - if (type == URLHTTP) { - for (l = i; url[l] && url[l] != '?'; l++); - if (l != strlen(url)) { - /* copy the query string, including the '?' into u->qstring */ - u->qstring = (char *)xmalloc (strlen (url + l) + 8); - strcpy (u->qstring, url + l); - } - } else { - l = strlen(url); - } - - - u->path = strdupdelim (url + i, url + l); + u->path = (char *)xmalloc (strlen (url + i) + 8); + strcpy (u->path, url + i); if (type == URLFTP) { u->ftp_type = process_ftp_type (u->path); @@ -495,8 +480,6 @@ parseurl (const char *url, struct urlinfo *u, int strict) /* Parse the directory. */ parse_dir (u->path, &u->dir, &u->file); DEBUGP (("dir %s -> file %s -> ", u->dir, u->file)); - if (type == URLHTTP && u->qstring) - DEBUGP (("query-string %s -> ", u->qstring)); /* Simplify the directory. */ path_simplify (u->dir); /* Remove the leading `/' in HTTP. */ @@ -643,7 +626,7 @@ char * str_url (const struct urlinfo *u, int hide) { char *res, *host, *user, *passwd, *proto_name, *dir, *file; - int i, l, ln, lu, lh, lp, lf, ld, lq; + int i, l, ln, lu, lh, lp, lf, ld; /* Look for the protocol name. */ for (i = 0; i < ARRAY_SIZE (sup_protos); i++) @@ -684,8 +667,7 @@ str_url (const struct urlinfo *u, int hide) lh = strlen (host); ld = strlen (dir); lf = strlen (file); - lq = (u->proto == URLHTTP && u->qstring) ? strlen (u->qstring) : 0; - res = (char *)xmalloc (ln + lu + lp + lh + ld + lf + lq + 20); /* safe sex */ + res = (char *)xmalloc (ln + lu + lp + lh + ld + lf + 20); /* safe sex */ /* sprintf (res, "%s%s%s%s%s%s:%d/%s%s%s", proto_name, (user ? user : ""), (passwd ? ":" : ""), (passwd ? passwd : ""), (user ? "@" : ""), @@ -716,15 +698,9 @@ str_url (const struct urlinfo *u, int hide) if (*dir) res[l++] = '/'; strcpy (res + l, file); - l += lf; free (host); free (dir); free (file); - if (u->qstring) - { - /* copy in the raw query string to avoid munging arguments */ - memcpy (res + l, u->qstring, lq); - } FREE_MAYBE (user); FREE_MAYBE (passwd); return res;