Introducing range stepping to the curl globbing support. Now you can specify

step counter by adding :[num] within the brackets when specifying a range.
2024-12-21 07:38:49 -05:00 · 2005-11-10 22:11:01 +00:00 · 2005-11-10 22:11:01 +00:00 · bd8baed138
commit bd8baed138
parent 00a7dda273
5 changed files with 103 additions and 58 deletions
--- a/12
+++ b/12
@ -8,6 +8,18 @@



+Daniel (10 November 2005)
+- Introducing range stepping to the curl globbing support. Now you can specify
+  step counter by adding :[num] within the brackets when specifying a range:
+
+   [1-100:10]
+   [a-z:2]
+
+  If no step counter is set, it defaults to 1 as before:
+
+   [1-100]
+   [d-h]
+
 Daniel (8 November 2005)
 - Removed the use of AI_CANONNAME in the IPv6-enabled resolver functions since
  we really have no use for reverse lookups of the address.
--- a/1
+++ b/1
@ -11,6 +11,7 @@ Curl and libcurl 7.15.1

 This release includes the following changes:

+ o URL globbing now offers "range steps": [1-100:10]
 o LDAPv3 is now the preferred LDAP protocol version
 o --max-redirs and CURLOPT_MAXREDIRS set to 0 limits redirects
 o improved MSVC makefile
--- a/docs/curl.1
+++ b/docs/curl.1
@ -21,7 +21,7 @@
 .\" * $Id$
 .\" **************************************************************************
 .\"
-.TH curl 1 "4 Sep 2005" "Curl 7.14.2" "Curl Manual"
+.TH curl 1 "10 Nov 2005" "Curl 7.15.1" "Curl Manual"
 .SH NAME
 curl \- transfer a URL
 .SH SYNOPSIS
@ -64,6 +64,12 @@ several ones next to each other:
 You can specify any amount of URLs on the command line. They will be fetched
 in a sequential manner in the specified order.

+Since curl 7.15.1 you can also specify step counter for the ranges, so that
+you can get every Nth number or letter:
+
+ http://www.numericals.com/file[1-100:10].txt
+ http://www.letters.com/file[a-z:2].txt
+
 If you specify URL without protocol:// prefix, curl will attempt to guess what
 protocol you might want. It will then default to HTTP but try other protocols
 based on often-used host name prefixes. For example, for host names starting
--- a/src/urlglob.c
+++ b/src/urlglob.c
@ -166,78 +166,101 @@ static GlobCode glob_range(URLGlob *glob, char *pattern,
  URLPattern *pat;
  char *c;
  int wordamount=1;
+  char sep;
+  char sep2;
+  int step;
+  int rc;

  pat = (URLPattern*)&glob->pattern[glob->size / 2];
  /* patterns 0,1,2,... correspond to size=1,3,5,... */
  ++glob->size;

  if (isalpha((int)*pattern)) {         /* character range detected */
+    char min_c;
+    char max_c;
+
    pat->type = UPTCharRange;
-    if (sscanf(pattern, "%c-%c]", &pat->content.CharRange.min_c,
-               &pat->content.CharRange.max_c) != 2 ||
-        pat->content.CharRange.min_c >= pat->content.CharRange.max_c ||
-        pat->content.CharRange.max_c - pat->content.CharRange.min_c > 'z' - 'a') {
+    rc = sscanf(pattern, "%c-%c%c%d%c", &min_c, &max_c, &sep, &step, &sep2);
+    if ((rc < 3) || (min_c >= max_c) || ((max_c - min_c) > ('z' - 'a'))) {
      /* the pattern is not well-formed */
      snprintf(glob->errormsg, sizeof(glob->errormsg),
-               "illegal pattern or range specification after pos %d\n", pos);
+               "errpr: bad range specification after pos %d\n", pos);
      return GLOB_ERROR;
    }
-    pat->content.CharRange.ptr_c = pat->content.CharRange.min_c;
-    /* always check for a literal (may be "") between patterns */

-    if(GLOB_ERROR == glob_word(glob, pattern + 4, pos + 4, &wordamount))
-      wordamount=1;
+    /* check the (first) separating character */
+    if((sep != ']') && (sep != ':')) {
+      snprintf(glob->errormsg, sizeof(glob->errormsg),
+               "error: unsupported character (%c) after range at pos %d\n",
+               sep, pos);
+      return GLOB_ERROR;
+    }

-    *amount = (pat->content.CharRange.max_c -
-               pat->content.CharRange.min_c + 1) *
-      wordamount;
+    /* if there was a ":[num]" thing, use that as step or else use 1 */
+    pat->content.CharRange.step =
+      ((sep == ':') && (rc == 5) && (sep2 == ']'))?step:1;

-    return GLOB_OK;
+    pat->content.CharRange.ptr_c = pat->content.CharRange.min_c = min_c;
+    pat->content.CharRange.max_c = max_c;
  }
-
-  if (isdigit((int)*pattern)) { /* numeric range detected */
+  else if (isdigit((int)*pattern)) { /* numeric range detected */
+    int min_n;
+    int max_n;

    pat->type = UPTNumRange;
    pat->content.NumRange.padlength = 0;
-    if (sscanf(pattern, "%d-%d]",
-               &pat->content.NumRange.min_n,
-               &pat->content.NumRange.max_n) != 2 ||
-        pat->content.NumRange.min_n >= pat->content.NumRange.max_n) {
+
+    rc = sscanf(pattern, "%d-%d%c%d%c", &min_n, &max_n, &sep, &step, &sep2);
+
+    if ((rc < 2) || (min_n >= max_n)) {
      /* the pattern is not well-formed */
      snprintf(glob->errormsg, sizeof(glob->errormsg),
-               "error: illegal pattern or range specification after pos %d\n",
-               pos);
+               "error: bad range specification after pos %d\n", pos);
      return GLOB_ERROR;
    }
+    pat->content.NumRange.ptr_n =  pat->content.NumRange.min_n = min_n;
+    pat->content.NumRange.max_n = max_n;
+
+    /* if there was a ":[num]" thing, use that as step or else use 1 */
+    pat->content.NumRange.step =
+      ((sep == ':') && (rc == 5) && (sep2 == ']'))?step:1;
+
    if (*pattern == '0') {              /* leading zero specified */
      c = pattern;
      while (isdigit((int)*c++))
        ++pat->content.NumRange.padlength; /* padding length is set for all
                                              instances of this pattern */
    }
-    pat->content.NumRange.ptr_n = pat->content.NumRange.min_n;
-    c = (char*)strchr(pattern, ']'); /* continue after next ']' */
-    if(c)
-      c++;
-    else {
-      snprintf(glob->errormsg, sizeof(glob->errormsg), "missing ']'");
-      return GLOB_ERROR; /* missing ']' */
-    }

-    /* always check for a literal (may be "") between patterns */
-
-    if(GLOB_ERROR == glob_word(glob, c, pos + (c - pattern), &wordamount))
-      wordamount = 1;
-
-    *amount = (pat->content.NumRange.max_n -
-               pat->content.NumRange.min_n + 1) *
-      wordamount;
-
-    return GLOB_OK;
  }
-  snprintf(glob->errormsg, sizeof(glob->errormsg),
-           "illegal character in range specification at pos %d\n", pos);
-  return GLOB_ERROR;
+  else {
+    snprintf(glob->errormsg, sizeof(glob->errormsg),
+             "illegal character in range specification at pos %d\n", pos);
+    return GLOB_ERROR;
+  }
+
+  c = (char*)strchr(pattern, ']'); /* continue after next ']' */
+  if(c)
+    c++;
+  else {
+    snprintf(glob->errormsg, sizeof(glob->errormsg), "missing ']'");
+    return GLOB_ERROR; /* missing ']' */
+  }
+
+  /* always check for a literal (may be "") between patterns */
+
+  if(GLOB_ERROR == glob_word(glob, c, pos + (c - pattern), &wordamount))
+    wordamount = 1;
+
+  if(pat->type == UPTCharRange)
+    *amount = (pat->content.CharRange.max_c -
+               pat->content.CharRange.min_c + 1) *
+      wordamount;
+  else
+    *amount = (pat->content.NumRange.max_n -
+               pat->content.NumRange.min_n + 1) * wordamount;
+
+  return GLOB_OK;
 }

 static GlobCode glob_word(URLGlob *glob, char *pattern,
@ -374,35 +397,36 @@ char *glob_next_url(URLGlob *glob)
  char *lit;
  size_t i;
  size_t j;
-  int carry;

  if (!glob->beenhere)
    glob->beenhere = 1;
  else {
-    carry = 1;
+    bool carry = TRUE;

    /* implement a counter over the index ranges of all patterns,
       starting with the rightmost pattern */
    for (i = glob->size / 2 - 1; carry && i < glob->size; --i) {
-      carry = 0;
+      carry = FALSE;
      pat = &glob->pattern[i];
      switch (pat->type) {
      case UPTSet:
        if (++pat->content.Set.ptr_s == pat->content.Set.size) {
          pat->content.Set.ptr_s = 0;
-          carry = 1;
+          carry = TRUE;
        }
        break;
      case UPTCharRange:
-        if (++pat->content.CharRange.ptr_c > pat->content.CharRange.max_c) {
+        pat->content.CharRange.ptr_c += pat->content.CharRange.step;
+        if (pat->content.CharRange.ptr_c > pat->content.CharRange.max_c) {
          pat->content.CharRange.ptr_c = pat->content.CharRange.min_c;
-          carry = 1;
+          carry = TRUE;
        }
        break;
      case UPTNumRange:
-        if (++pat->content.NumRange.ptr_n > pat->content.NumRange.max_n) {
+        pat->content.NumRange.ptr_n += pat->content.NumRange.step;
+        if (pat->content.NumRange.ptr_n > pat->content.NumRange.max_n) {
          pat->content.NumRange.ptr_n = pat->content.NumRange.min_n;
-          carry = 1;
+          carry = TRUE;
        }
        break;
      default:
--- a/src/urlglob.h
+++ b/src/urlglob.h
@ -7,7 +7,7 @@
 *                            | (__| |_| |  _ <| |___
 *                             \___|\___/|_| \_\_____|
 *
- * Copyright (C) 1998 - 2004, Daniel Stenberg, <daniel@haxx.se>, et al.
+ * Copyright (C) 1998 - 2005, Daniel Stenberg, <daniel@haxx.se>, et al.
 *
 * This software is licensed as described in the file COPYING, which
 * you should have received as part of this distribution. The terms
@ -39,11 +39,13 @@ typedef struct {
    struct {
      char min_c, max_c;
      char ptr_c;
+      int step;
    } CharRange;
    struct {
      int min_n, max_n;
      short padlength;
      int ptr_n;
+      int step;
    } NumRange ;
  } content;
 } URLPattern;