Introducing range stepping to the curl globbing support. Now you can specify

step counter by adding :[num] within the brackets when specifying a range.
This commit is contained in:
Daniel Stenberg 2005-11-10 22:11:01 +00:00
parent 00a7dda273
commit bd8baed138
5 changed files with 103 additions and 58 deletions

12
CHANGES
View File

@ -8,6 +8,18 @@
Daniel (10 November 2005)
- Introducing range stepping to the curl globbing support. Now you can specify
step counter by adding :[num] within the brackets when specifying a range:
[1-100:10]
[a-z:2]
If no step counter is set, it defaults to 1 as before:
[1-100]
[d-h]
Daniel (8 November 2005) Daniel (8 November 2005)
- Removed the use of AI_CANONNAME in the IPv6-enabled resolver functions since - Removed the use of AI_CANONNAME in the IPv6-enabled resolver functions since
we really have no use for reverse lookups of the address. we really have no use for reverse lookups of the address.

View File

@ -11,6 +11,7 @@ Curl and libcurl 7.15.1
This release includes the following changes: This release includes the following changes:
o URL globbing now offers "range steps": [1-100:10]
o LDAPv3 is now the preferred LDAP protocol version o LDAPv3 is now the preferred LDAP protocol version
o --max-redirs and CURLOPT_MAXREDIRS set to 0 limits redirects o --max-redirs and CURLOPT_MAXREDIRS set to 0 limits redirects
o improved MSVC makefile o improved MSVC makefile

View File

@ -21,7 +21,7 @@
.\" * $Id$ .\" * $Id$
.\" ************************************************************************** .\" **************************************************************************
.\" .\"
.TH curl 1 "4 Sep 2005" "Curl 7.14.2" "Curl Manual" .TH curl 1 "10 Nov 2005" "Curl 7.15.1" "Curl Manual"
.SH NAME .SH NAME
curl \- transfer a URL curl \- transfer a URL
.SH SYNOPSIS .SH SYNOPSIS
@ -64,6 +64,12 @@ several ones next to each other:
You can specify any amount of URLs on the command line. They will be fetched You can specify any amount of URLs on the command line. They will be fetched
in a sequential manner in the specified order. in a sequential manner in the specified order.
Since curl 7.15.1 you can also specify step counter for the ranges, so that
you can get every Nth number or letter:
http://www.numericals.com/file[1-100:10].txt
http://www.letters.com/file[a-z:2].txt
If you specify URL without protocol:// prefix, curl will attempt to guess what If you specify URL without protocol:// prefix, curl will attempt to guess what
protocol you might want. It will then default to HTTP but try other protocols protocol you might want. It will then default to HTTP but try other protocols
based on often-used host name prefixes. For example, for host names starting based on often-used host name prefixes. For example, for host names starting

View File

@ -166,78 +166,101 @@ static GlobCode glob_range(URLGlob *glob, char *pattern,
URLPattern *pat; URLPattern *pat;
char *c; char *c;
int wordamount=1; int wordamount=1;
char sep;
char sep2;
int step;
int rc;
pat = (URLPattern*)&glob->pattern[glob->size / 2]; pat = (URLPattern*)&glob->pattern[glob->size / 2];
/* patterns 0,1,2,... correspond to size=1,3,5,... */ /* patterns 0,1,2,... correspond to size=1,3,5,... */
++glob->size; ++glob->size;
if (isalpha((int)*pattern)) { /* character range detected */ if (isalpha((int)*pattern)) { /* character range detected */
char min_c;
char max_c;
pat->type = UPTCharRange; pat->type = UPTCharRange;
if (sscanf(pattern, "%c-%c]", &pat->content.CharRange.min_c, rc = sscanf(pattern, "%c-%c%c%d%c", &min_c, &max_c, &sep, &step, &sep2);
&pat->content.CharRange.max_c) != 2 || if ((rc < 3) || (min_c >= max_c) || ((max_c - min_c) > ('z' - 'a'))) {
pat->content.CharRange.min_c >= pat->content.CharRange.max_c ||
pat->content.CharRange.max_c - pat->content.CharRange.min_c > 'z' - 'a') {
/* the pattern is not well-formed */ /* the pattern is not well-formed */
snprintf(glob->errormsg, sizeof(glob->errormsg), snprintf(glob->errormsg, sizeof(glob->errormsg),
"illegal pattern or range specification after pos %d\n", pos); "errpr: bad range specification after pos %d\n", pos);
return GLOB_ERROR; return GLOB_ERROR;
} }
pat->content.CharRange.ptr_c = pat->content.CharRange.min_c;
/* always check for a literal (may be "") between patterns */
if(GLOB_ERROR == glob_word(glob, pattern + 4, pos + 4, &wordamount)) /* check the (first) separating character */
wordamount=1; if((sep != ']') && (sep != ':')) {
snprintf(glob->errormsg, sizeof(glob->errormsg),
"error: unsupported character (%c) after range at pos %d\n",
sep, pos);
return GLOB_ERROR;
}
*amount = (pat->content.CharRange.max_c - /* if there was a ":[num]" thing, use that as step or else use 1 */
pat->content.CharRange.min_c + 1) * pat->content.CharRange.step =
wordamount; ((sep == ':') && (rc == 5) && (sep2 == ']'))?step:1;
return GLOB_OK; pat->content.CharRange.ptr_c = pat->content.CharRange.min_c = min_c;
pat->content.CharRange.max_c = max_c;
} }
else if (isdigit((int)*pattern)) { /* numeric range detected */
if (isdigit((int)*pattern)) { /* numeric range detected */ int min_n;
int max_n;
pat->type = UPTNumRange; pat->type = UPTNumRange;
pat->content.NumRange.padlength = 0; pat->content.NumRange.padlength = 0;
if (sscanf(pattern, "%d-%d]",
&pat->content.NumRange.min_n, rc = sscanf(pattern, "%d-%d%c%d%c", &min_n, &max_n, &sep, &step, &sep2);
&pat->content.NumRange.max_n) != 2 ||
pat->content.NumRange.min_n >= pat->content.NumRange.max_n) { if ((rc < 2) || (min_n >= max_n)) {
/* the pattern is not well-formed */ /* the pattern is not well-formed */
snprintf(glob->errormsg, sizeof(glob->errormsg), snprintf(glob->errormsg, sizeof(glob->errormsg),
"error: illegal pattern or range specification after pos %d\n", "error: bad range specification after pos %d\n", pos);
pos);
return GLOB_ERROR; return GLOB_ERROR;
} }
pat->content.NumRange.ptr_n = pat->content.NumRange.min_n = min_n;
pat->content.NumRange.max_n = max_n;
/* if there was a ":[num]" thing, use that as step or else use 1 */
pat->content.NumRange.step =
((sep == ':') && (rc == 5) && (sep2 == ']'))?step:1;
if (*pattern == '0') { /* leading zero specified */ if (*pattern == '0') { /* leading zero specified */
c = pattern; c = pattern;
while (isdigit((int)*c++)) while (isdigit((int)*c++))
++pat->content.NumRange.padlength; /* padding length is set for all ++pat->content.NumRange.padlength; /* padding length is set for all
instances of this pattern */ instances of this pattern */
} }
pat->content.NumRange.ptr_n = pat->content.NumRange.min_n;
c = (char*)strchr(pattern, ']'); /* continue after next ']' */
if(c)
c++;
else {
snprintf(glob->errormsg, sizeof(glob->errormsg), "missing ']'");
return GLOB_ERROR; /* missing ']' */
}
/* always check for a literal (may be "") between patterns */
if(GLOB_ERROR == glob_word(glob, c, pos + (c - pattern), &wordamount))
wordamount = 1;
*amount = (pat->content.NumRange.max_n -
pat->content.NumRange.min_n + 1) *
wordamount;
return GLOB_OK;
} }
snprintf(glob->errormsg, sizeof(glob->errormsg), else {
"illegal character in range specification at pos %d\n", pos); snprintf(glob->errormsg, sizeof(glob->errormsg),
return GLOB_ERROR; "illegal character in range specification at pos %d\n", pos);
return GLOB_ERROR;
}
c = (char*)strchr(pattern, ']'); /* continue after next ']' */
if(c)
c++;
else {
snprintf(glob->errormsg, sizeof(glob->errormsg), "missing ']'");
return GLOB_ERROR; /* missing ']' */
}
/* always check for a literal (may be "") between patterns */
if(GLOB_ERROR == glob_word(glob, c, pos + (c - pattern), &wordamount))
wordamount = 1;
if(pat->type == UPTCharRange)
*amount = (pat->content.CharRange.max_c -
pat->content.CharRange.min_c + 1) *
wordamount;
else
*amount = (pat->content.NumRange.max_n -
pat->content.NumRange.min_n + 1) * wordamount;
return GLOB_OK;
} }
static GlobCode glob_word(URLGlob *glob, char *pattern, static GlobCode glob_word(URLGlob *glob, char *pattern,
@ -374,35 +397,36 @@ char *glob_next_url(URLGlob *glob)
char *lit; char *lit;
size_t i; size_t i;
size_t j; size_t j;
int carry;
if (!glob->beenhere) if (!glob->beenhere)
glob->beenhere = 1; glob->beenhere = 1;
else { else {
carry = 1; bool carry = TRUE;
/* implement a counter over the index ranges of all patterns, /* implement a counter over the index ranges of all patterns,
starting with the rightmost pattern */ starting with the rightmost pattern */
for (i = glob->size / 2 - 1; carry && i < glob->size; --i) { for (i = glob->size / 2 - 1; carry && i < glob->size; --i) {
carry = 0; carry = FALSE;
pat = &glob->pattern[i]; pat = &glob->pattern[i];
switch (pat->type) { switch (pat->type) {
case UPTSet: case UPTSet:
if (++pat->content.Set.ptr_s == pat->content.Set.size) { if (++pat->content.Set.ptr_s == pat->content.Set.size) {
pat->content.Set.ptr_s = 0; pat->content.Set.ptr_s = 0;
carry = 1; carry = TRUE;
} }
break; break;
case UPTCharRange: case UPTCharRange:
if (++pat->content.CharRange.ptr_c > pat->content.CharRange.max_c) { pat->content.CharRange.ptr_c += pat->content.CharRange.step;
if (pat->content.CharRange.ptr_c > pat->content.CharRange.max_c) {
pat->content.CharRange.ptr_c = pat->content.CharRange.min_c; pat->content.CharRange.ptr_c = pat->content.CharRange.min_c;
carry = 1; carry = TRUE;
} }
break; break;
case UPTNumRange: case UPTNumRange:
if (++pat->content.NumRange.ptr_n > pat->content.NumRange.max_n) { pat->content.NumRange.ptr_n += pat->content.NumRange.step;
if (pat->content.NumRange.ptr_n > pat->content.NumRange.max_n) {
pat->content.NumRange.ptr_n = pat->content.NumRange.min_n; pat->content.NumRange.ptr_n = pat->content.NumRange.min_n;
carry = 1; carry = TRUE;
} }
break; break;
default: default:

View File

@ -1,18 +1,18 @@
#ifndef __URLGLOB_H #ifndef __URLGLOB_H
#define __URLGLOB_H #define __URLGLOB_H
/*************************************************************************** /***************************************************************************
* _ _ ____ _ * _ _ ____ _
* Project ___| | | | _ \| | * Project ___| | | | _ \| |
* / __| | | | |_) | | * / __| | | | |_) | |
* | (__| |_| | _ <| |___ * | (__| |_| | _ <| |___
* \___|\___/|_| \_\_____| * \___|\___/|_| \_\_____|
* *
* Copyright (C) 1998 - 2004, Daniel Stenberg, <daniel@haxx.se>, et al. * Copyright (C) 1998 - 2005, Daniel Stenberg, <daniel@haxx.se>, et al.
* *
* This software is licensed as described in the file COPYING, which * This software is licensed as described in the file COPYING, which
* you should have received as part of this distribution. The terms * you should have received as part of this distribution. The terms
* are also available at http://curl.haxx.se/docs/copyright.html. * are also available at http://curl.haxx.se/docs/copyright.html.
* *
* You may opt to use, copy, modify, merge, publish, distribute and/or sell * You may opt to use, copy, modify, merge, publish, distribute and/or sell
* copies of the Software, and permit persons to whom the Software is * copies of the Software, and permit persons to whom the Software is
* furnished to do so, under the terms of the COPYING file. * furnished to do so, under the terms of the COPYING file.
@ -39,11 +39,13 @@ typedef struct {
struct { struct {
char min_c, max_c; char min_c, max_c;
char ptr_c; char ptr_c;
int step;
} CharRange; } CharRange;
struct { struct {
int min_n, max_n; int min_n, max_n;
short padlength; short padlength;
int ptr_n; int ptr_n;
int step;
} NumRange ; } NumRange ;
} content; } content;
} URLPattern; } URLPattern;
@ -60,7 +62,7 @@ typedef struct {
int glob_url(URLGlob**, char*, int *, FILE *); int glob_url(URLGlob**, char*, int *, FILE *);
char* glob_next_url(URLGlob*); char* glob_next_url(URLGlob*);
char* glob_match_url(char*, URLGlob *); char* glob_match_url(char*, URLGlob *);
void glob_cleanup(URLGlob* glob); void glob_cleanup(URLGlob* glob);
#endif #endif