diff --git a/CHANGES b/CHANGES index 9bf8f0c8b..5ee62854e 100644 --- a/CHANGES +++ b/CHANGES @@ -8,6 +8,18 @@ +Daniel (10 November 2005) +- Introducing range stepping to the curl globbing support. Now you can specify + step counter by adding :[num] within the brackets when specifying a range: + + [1-100:10] + [a-z:2] + + If no step counter is set, it defaults to 1 as before: + + [1-100] + [d-h] + Daniel (8 November 2005) - Removed the use of AI_CANONNAME in the IPv6-enabled resolver functions since we really have no use for reverse lookups of the address. diff --git a/RELEASE-NOTES b/RELEASE-NOTES index 799784cc2..ac18e8f82 100644 --- a/RELEASE-NOTES +++ b/RELEASE-NOTES @@ -11,6 +11,7 @@ Curl and libcurl 7.15.1 This release includes the following changes: + o URL globbing now offers "range steps": [1-100:10] o LDAPv3 is now the preferred LDAP protocol version o --max-redirs and CURLOPT_MAXREDIRS set to 0 limits redirects o improved MSVC makefile diff --git a/docs/curl.1 b/docs/curl.1 index ccc22dd22..688a14b96 100644 --- a/docs/curl.1 +++ b/docs/curl.1 @@ -21,7 +21,7 @@ .\" * $Id$ .\" ************************************************************************** .\" -.TH curl 1 "4 Sep 2005" "Curl 7.14.2" "Curl Manual" +.TH curl 1 "10 Nov 2005" "Curl 7.15.1" "Curl Manual" .SH NAME curl \- transfer a URL .SH SYNOPSIS @@ -64,6 +64,12 @@ several ones next to each other: You can specify any amount of URLs on the command line. They will be fetched in a sequential manner in the specified order. +Since curl 7.15.1 you can also specify step counter for the ranges, so that +you can get every Nth number or letter: + + http://www.numericals.com/file[1-100:10].txt + http://www.letters.com/file[a-z:2].txt + If you specify URL without protocol:// prefix, curl will attempt to guess what protocol you might want. It will then default to HTTP but try other protocols based on often-used host name prefixes. For example, for host names starting diff --git a/src/urlglob.c b/src/urlglob.c index 2843a51f9..0d7b5774a 100644 --- a/src/urlglob.c +++ b/src/urlglob.c @@ -166,78 +166,101 @@ static GlobCode glob_range(URLGlob *glob, char *pattern, URLPattern *pat; char *c; int wordamount=1; + char sep; + char sep2; + int step; + int rc; pat = (URLPattern*)&glob->pattern[glob->size / 2]; /* patterns 0,1,2,... correspond to size=1,3,5,... */ ++glob->size; if (isalpha((int)*pattern)) { /* character range detected */ + char min_c; + char max_c; + pat->type = UPTCharRange; - if (sscanf(pattern, "%c-%c]", &pat->content.CharRange.min_c, - &pat->content.CharRange.max_c) != 2 || - pat->content.CharRange.min_c >= pat->content.CharRange.max_c || - pat->content.CharRange.max_c - pat->content.CharRange.min_c > 'z' - 'a') { + rc = sscanf(pattern, "%c-%c%c%d%c", &min_c, &max_c, &sep, &step, &sep2); + if ((rc < 3) || (min_c >= max_c) || ((max_c - min_c) > ('z' - 'a'))) { /* the pattern is not well-formed */ snprintf(glob->errormsg, sizeof(glob->errormsg), - "illegal pattern or range specification after pos %d\n", pos); + "errpr: bad range specification after pos %d\n", pos); return GLOB_ERROR; } - pat->content.CharRange.ptr_c = pat->content.CharRange.min_c; - /* always check for a literal (may be "") between patterns */ - if(GLOB_ERROR == glob_word(glob, pattern + 4, pos + 4, &wordamount)) - wordamount=1; + /* check the (first) separating character */ + if((sep != ']') && (sep != ':')) { + snprintf(glob->errormsg, sizeof(glob->errormsg), + "error: unsupported character (%c) after range at pos %d\n", + sep, pos); + return GLOB_ERROR; + } - *amount = (pat->content.CharRange.max_c - - pat->content.CharRange.min_c + 1) * - wordamount; + /* if there was a ":[num]" thing, use that as step or else use 1 */ + pat->content.CharRange.step = + ((sep == ':') && (rc == 5) && (sep2 == ']'))?step:1; - return GLOB_OK; + pat->content.CharRange.ptr_c = pat->content.CharRange.min_c = min_c; + pat->content.CharRange.max_c = max_c; } - - if (isdigit((int)*pattern)) { /* numeric range detected */ + else if (isdigit((int)*pattern)) { /* numeric range detected */ + int min_n; + int max_n; pat->type = UPTNumRange; pat->content.NumRange.padlength = 0; - if (sscanf(pattern, "%d-%d]", - &pat->content.NumRange.min_n, - &pat->content.NumRange.max_n) != 2 || - pat->content.NumRange.min_n >= pat->content.NumRange.max_n) { + + rc = sscanf(pattern, "%d-%d%c%d%c", &min_n, &max_n, &sep, &step, &sep2); + + if ((rc < 2) || (min_n >= max_n)) { /* the pattern is not well-formed */ snprintf(glob->errormsg, sizeof(glob->errormsg), - "error: illegal pattern or range specification after pos %d\n", - pos); + "error: bad range specification after pos %d\n", pos); return GLOB_ERROR; } + pat->content.NumRange.ptr_n = pat->content.NumRange.min_n = min_n; + pat->content.NumRange.max_n = max_n; + + /* if there was a ":[num]" thing, use that as step or else use 1 */ + pat->content.NumRange.step = + ((sep == ':') && (rc == 5) && (sep2 == ']'))?step:1; + if (*pattern == '0') { /* leading zero specified */ c = pattern; while (isdigit((int)*c++)) ++pat->content.NumRange.padlength; /* padding length is set for all instances of this pattern */ } - pat->content.NumRange.ptr_n = pat->content.NumRange.min_n; - c = (char*)strchr(pattern, ']'); /* continue after next ']' */ - if(c) - c++; - else { - snprintf(glob->errormsg, sizeof(glob->errormsg), "missing ']'"); - return GLOB_ERROR; /* missing ']' */ - } - /* always check for a literal (may be "") between patterns */ - - if(GLOB_ERROR == glob_word(glob, c, pos + (c - pattern), &wordamount)) - wordamount = 1; - - *amount = (pat->content.NumRange.max_n - - pat->content.NumRange.min_n + 1) * - wordamount; - - return GLOB_OK; } - snprintf(glob->errormsg, sizeof(glob->errormsg), - "illegal character in range specification at pos %d\n", pos); - return GLOB_ERROR; + else { + snprintf(glob->errormsg, sizeof(glob->errormsg), + "illegal character in range specification at pos %d\n", pos); + return GLOB_ERROR; + } + + c = (char*)strchr(pattern, ']'); /* continue after next ']' */ + if(c) + c++; + else { + snprintf(glob->errormsg, sizeof(glob->errormsg), "missing ']'"); + return GLOB_ERROR; /* missing ']' */ + } + + /* always check for a literal (may be "") between patterns */ + + if(GLOB_ERROR == glob_word(glob, c, pos + (c - pattern), &wordamount)) + wordamount = 1; + + if(pat->type == UPTCharRange) + *amount = (pat->content.CharRange.max_c - + pat->content.CharRange.min_c + 1) * + wordamount; + else + *amount = (pat->content.NumRange.max_n - + pat->content.NumRange.min_n + 1) * wordamount; + + return GLOB_OK; } static GlobCode glob_word(URLGlob *glob, char *pattern, @@ -374,35 +397,36 @@ char *glob_next_url(URLGlob *glob) char *lit; size_t i; size_t j; - int carry; if (!glob->beenhere) glob->beenhere = 1; else { - carry = 1; + bool carry = TRUE; /* implement a counter over the index ranges of all patterns, starting with the rightmost pattern */ for (i = glob->size / 2 - 1; carry && i < glob->size; --i) { - carry = 0; + carry = FALSE; pat = &glob->pattern[i]; switch (pat->type) { case UPTSet: if (++pat->content.Set.ptr_s == pat->content.Set.size) { pat->content.Set.ptr_s = 0; - carry = 1; + carry = TRUE; } break; case UPTCharRange: - if (++pat->content.CharRange.ptr_c > pat->content.CharRange.max_c) { + pat->content.CharRange.ptr_c += pat->content.CharRange.step; + if (pat->content.CharRange.ptr_c > pat->content.CharRange.max_c) { pat->content.CharRange.ptr_c = pat->content.CharRange.min_c; - carry = 1; + carry = TRUE; } break; case UPTNumRange: - if (++pat->content.NumRange.ptr_n > pat->content.NumRange.max_n) { + pat->content.NumRange.ptr_n += pat->content.NumRange.step; + if (pat->content.NumRange.ptr_n > pat->content.NumRange.max_n) { pat->content.NumRange.ptr_n = pat->content.NumRange.min_n; - carry = 1; + carry = TRUE; } break; default: diff --git a/src/urlglob.h b/src/urlglob.h index fdda41eef..d0818407f 100644 --- a/src/urlglob.h +++ b/src/urlglob.h @@ -1,18 +1,18 @@ #ifndef __URLGLOB_H #define __URLGLOB_H /*************************************************************************** - * _ _ ____ _ - * Project ___| | | | _ \| | - * / __| | | | |_) | | - * | (__| |_| | _ <| |___ + * _ _ ____ _ + * Project ___| | | | _ \| | + * / __| | | | |_) | | + * | (__| |_| | _ <| |___ * \___|\___/|_| \_\_____| * - * Copyright (C) 1998 - 2004, Daniel Stenberg, , et al. + * Copyright (C) 1998 - 2005, Daniel Stenberg, , et al. * * This software is licensed as described in the file COPYING, which * you should have received as part of this distribution. The terms * are also available at http://curl.haxx.se/docs/copyright.html. - * + * * You may opt to use, copy, modify, merge, publish, distribute and/or sell * copies of the Software, and permit persons to whom the Software is * furnished to do so, under the terms of the COPYING file. @@ -39,11 +39,13 @@ typedef struct { struct { char min_c, max_c; char ptr_c; + int step; } CharRange; struct { int min_n, max_n; short padlength; int ptr_n; + int step; } NumRange ; } content; } URLPattern; @@ -60,7 +62,7 @@ typedef struct { int glob_url(URLGlob**, char*, int *, FILE *); char* glob_next_url(URLGlob*); -char* glob_match_url(char*, URLGlob *); +char* glob_match_url(char*, URLGlob *); void glob_cleanup(URLGlob* glob); #endif