Introducing range stepping to the curl globbing support. Now you can specify

step counter by adding :[num] within the brackets when specifying a range.
This commit is contained in:
Daniel Stenberg 2005-11-10 22:11:01 +00:00
parent 00a7dda273
commit bd8baed138
5 changed files with 103 additions and 58 deletions

12
CHANGES
View File

@ -8,6 +8,18 @@
Daniel (10 November 2005)
- Introducing range stepping to the curl globbing support. Now you can specify
step counter by adding :[num] within the brackets when specifying a range:
[1-100:10]
[a-z:2]
If no step counter is set, it defaults to 1 as before:
[1-100]
[d-h]
Daniel (8 November 2005)
- Removed the use of AI_CANONNAME in the IPv6-enabled resolver functions since
we really have no use for reverse lookups of the address.

View File

@ -11,6 +11,7 @@ Curl and libcurl 7.15.1
This release includes the following changes:
o URL globbing now offers "range steps": [1-100:10]
o LDAPv3 is now the preferred LDAP protocol version
o --max-redirs and CURLOPT_MAXREDIRS set to 0 limits redirects
o improved MSVC makefile

View File

@ -21,7 +21,7 @@
.\" * $Id$
.\" **************************************************************************
.\"
.TH curl 1 "4 Sep 2005" "Curl 7.14.2" "Curl Manual"
.TH curl 1 "10 Nov 2005" "Curl 7.15.1" "Curl Manual"
.SH NAME
curl \- transfer a URL
.SH SYNOPSIS
@ -64,6 +64,12 @@ several ones next to each other:
You can specify any amount of URLs on the command line. They will be fetched
in a sequential manner in the specified order.
Since curl 7.15.1 you can also specify step counter for the ranges, so that
you can get every Nth number or letter:
http://www.numericals.com/file[1-100:10].txt
http://www.letters.com/file[a-z:2].txt
If you specify URL without protocol:// prefix, curl will attempt to guess what
protocol you might want. It will then default to HTTP but try other protocols
based on often-used host name prefixes. For example, for host names starting

View File

@ -166,56 +166,79 @@ static GlobCode glob_range(URLGlob *glob, char *pattern,
URLPattern *pat;
char *c;
int wordamount=1;
char sep;
char sep2;
int step;
int rc;
pat = (URLPattern*)&glob->pattern[glob->size / 2];
/* patterns 0,1,2,... correspond to size=1,3,5,... */
++glob->size;
if (isalpha((int)*pattern)) { /* character range detected */
char min_c;
char max_c;
pat->type = UPTCharRange;
if (sscanf(pattern, "%c-%c]", &pat->content.CharRange.min_c,
&pat->content.CharRange.max_c) != 2 ||
pat->content.CharRange.min_c >= pat->content.CharRange.max_c ||
pat->content.CharRange.max_c - pat->content.CharRange.min_c > 'z' - 'a') {
rc = sscanf(pattern, "%c-%c%c%d%c", &min_c, &max_c, &sep, &step, &sep2);
if ((rc < 3) || (min_c >= max_c) || ((max_c - min_c) > ('z' - 'a'))) {
/* the pattern is not well-formed */
snprintf(glob->errormsg, sizeof(glob->errormsg),
"illegal pattern or range specification after pos %d\n", pos);
"errpr: bad range specification after pos %d\n", pos);
return GLOB_ERROR;
}
pat->content.CharRange.ptr_c = pat->content.CharRange.min_c;
/* always check for a literal (may be "") between patterns */
if(GLOB_ERROR == glob_word(glob, pattern + 4, pos + 4, &wordamount))
wordamount=1;
*amount = (pat->content.CharRange.max_c -
pat->content.CharRange.min_c + 1) *
wordamount;
return GLOB_OK;
/* check the (first) separating character */
if((sep != ']') && (sep != ':')) {
snprintf(glob->errormsg, sizeof(glob->errormsg),
"error: unsupported character (%c) after range at pos %d\n",
sep, pos);
return GLOB_ERROR;
}
if (isdigit((int)*pattern)) { /* numeric range detected */
/* if there was a ":[num]" thing, use that as step or else use 1 */
pat->content.CharRange.step =
((sep == ':') && (rc == 5) && (sep2 == ']'))?step:1;
pat->content.CharRange.ptr_c = pat->content.CharRange.min_c = min_c;
pat->content.CharRange.max_c = max_c;
}
else if (isdigit((int)*pattern)) { /* numeric range detected */
int min_n;
int max_n;
pat->type = UPTNumRange;
pat->content.NumRange.padlength = 0;
if (sscanf(pattern, "%d-%d]",
&pat->content.NumRange.min_n,
&pat->content.NumRange.max_n) != 2 ||
pat->content.NumRange.min_n >= pat->content.NumRange.max_n) {
rc = sscanf(pattern, "%d-%d%c%d%c", &min_n, &max_n, &sep, &step, &sep2);
if ((rc < 2) || (min_n >= max_n)) {
/* the pattern is not well-formed */
snprintf(glob->errormsg, sizeof(glob->errormsg),
"error: illegal pattern or range specification after pos %d\n",
pos);
"error: bad range specification after pos %d\n", pos);
return GLOB_ERROR;
}
pat->content.NumRange.ptr_n = pat->content.NumRange.min_n = min_n;
pat->content.NumRange.max_n = max_n;
/* if there was a ":[num]" thing, use that as step or else use 1 */
pat->content.NumRange.step =
((sep == ':') && (rc == 5) && (sep2 == ']'))?step:1;
if (*pattern == '0') { /* leading zero specified */
c = pattern;
while (isdigit((int)*c++))
++pat->content.NumRange.padlength; /* padding length is set for all
instances of this pattern */
}
pat->content.NumRange.ptr_n = pat->content.NumRange.min_n;
}
else {
snprintf(glob->errormsg, sizeof(glob->errormsg),
"illegal character in range specification at pos %d\n", pos);
return GLOB_ERROR;
}
c = (char*)strchr(pattern, ']'); /* continue after next ']' */
if(c)
c++;
@ -229,15 +252,15 @@ static GlobCode glob_range(URLGlob *glob, char *pattern,
if(GLOB_ERROR == glob_word(glob, c, pos + (c - pattern), &wordamount))
wordamount = 1;
*amount = (pat->content.NumRange.max_n -
pat->content.NumRange.min_n + 1) *
if(pat->type == UPTCharRange)
*amount = (pat->content.CharRange.max_c -
pat->content.CharRange.min_c + 1) *
wordamount;
else
*amount = (pat->content.NumRange.max_n -
pat->content.NumRange.min_n + 1) * wordamount;
return GLOB_OK;
}
snprintf(glob->errormsg, sizeof(glob->errormsg),
"illegal character in range specification at pos %d\n", pos);
return GLOB_ERROR;
}
static GlobCode glob_word(URLGlob *glob, char *pattern,
@ -374,35 +397,36 @@ char *glob_next_url(URLGlob *glob)
char *lit;
size_t i;
size_t j;
int carry;
if (!glob->beenhere)
glob->beenhere = 1;
else {
carry = 1;
bool carry = TRUE;
/* implement a counter over the index ranges of all patterns,
starting with the rightmost pattern */
for (i = glob->size / 2 - 1; carry && i < glob->size; --i) {
carry = 0;
carry = FALSE;
pat = &glob->pattern[i];
switch (pat->type) {
case UPTSet:
if (++pat->content.Set.ptr_s == pat->content.Set.size) {
pat->content.Set.ptr_s = 0;
carry = 1;
carry = TRUE;
}
break;
case UPTCharRange:
if (++pat->content.CharRange.ptr_c > pat->content.CharRange.max_c) {
pat->content.CharRange.ptr_c += pat->content.CharRange.step;
if (pat->content.CharRange.ptr_c > pat->content.CharRange.max_c) {
pat->content.CharRange.ptr_c = pat->content.CharRange.min_c;
carry = 1;
carry = TRUE;
}
break;
case UPTNumRange:
if (++pat->content.NumRange.ptr_n > pat->content.NumRange.max_n) {
pat->content.NumRange.ptr_n += pat->content.NumRange.step;
if (pat->content.NumRange.ptr_n > pat->content.NumRange.max_n) {
pat->content.NumRange.ptr_n = pat->content.NumRange.min_n;
carry = 1;
carry = TRUE;
}
break;
default:

View File

@ -7,7 +7,7 @@
* | (__| |_| | _ <| |___
* \___|\___/|_| \_\_____|
*
* Copyright (C) 1998 - 2004, Daniel Stenberg, <daniel@haxx.se>, et al.
* Copyright (C) 1998 - 2005, Daniel Stenberg, <daniel@haxx.se>, et al.
*
* This software is licensed as described in the file COPYING, which
* you should have received as part of this distribution. The terms
@ -39,11 +39,13 @@ typedef struct {
struct {
char min_c, max_c;
char ptr_c;
int step;
} CharRange;
struct {
int min_n, max_n;
short padlength;
int ptr_n;
int step;
} NumRange ;
} content;
} URLPattern;