1
0
mirror of https://github.com/moparisthebest/curl synced 2024-08-13 17:03:50 -04:00
curl/src/tool_urlglob.c
Emil Lerner 7b223a3a21 globbing: fix step parsing for character globbing ranges
The glob_range function used wrong offset (3 instead of 4) for parsing
integer step inside character range specification, which led to 'bad
range' error when using character ranges with explicitly specified step
(such as '[a-z:2]')
2015-03-25 11:29:46 +01:00

670 lines
19 KiB
C

/***************************************************************************
* _ _ ____ _
* Project ___| | | | _ \| |
* / __| | | | |_) | |
* | (__| |_| | _ <| |___
* \___|\___/|_| \_\_____|
*
* Copyright (C) 1998 - 2015, Daniel Stenberg, <daniel@haxx.se>, et al.
*
* This software is licensed as described in the file COPYING, which
* you should have received as part of this distribution. The terms
* are also available at http://curl.haxx.se/docs/copyright.html.
*
* You may opt to use, copy, modify, merge, publish, distribute and/or sell
* copies of the Software, and permit persons to whom the Software is
* furnished to do so, under the terms of the COPYING file.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
***************************************************************************/
#include "tool_setup.h"
#define ENABLE_CURLX_PRINTF
/* use our own printf() functions */
#include "curlx.h"
#include "tool_urlglob.h"
#include "tool_vms.h"
#include "memdebug.h" /* keep this as LAST include */
#define GLOBERROR(string, column, code) \
glob->error = string, glob->pos = column, code
void glob_cleanup(URLGlob* glob);
static CURLcode glob_fixed(URLGlob *glob, char *fixed, size_t len)
{
URLPattern *pat = &glob->pattern[glob->size];
pat->type = UPTSet;
pat->content.Set.size = 1;
pat->content.Set.ptr_s = 0;
pat->globindex = -1;
pat->content.Set.elements = malloc(sizeof(char*));
if(!pat->content.Set.elements)
return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
pat->content.Set.elements[0] = malloc(len+1);
if(!pat->content.Set.elements[0])
return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
memcpy(pat->content.Set.elements[0], fixed, len);
pat->content.Set.elements[0][len] = 0;
return CURLE_OK;
}
/* multiply
*
* Multiplies and checks for overflow.
*/
static int multiply(unsigned long *amount, long with)
{
unsigned long sum = *amount * with;
if(sum/with != *amount)
return 1; /* didn't fit, bail out */
*amount = sum;
return 0;
}
static CURLcode glob_set(URLGlob *glob, char **patternp,
size_t *posp, unsigned long *amount,
int globindex)
{
/* processes a set expression with the point behind the opening '{'
','-separated elements are collected until the next closing '}'
*/
URLPattern *pat;
bool done = FALSE;
char *buf = glob->glob_buffer;
char *pattern = *patternp;
char *opattern = pattern;
size_t opos = *posp-1;
pat = &glob->pattern[glob->size];
/* patterns 0,1,2,... correspond to size=1,3,5,... */
pat->type = UPTSet;
pat->content.Set.size = 0;
pat->content.Set.ptr_s = 0;
pat->content.Set.elements = NULL;
pat->globindex = globindex;
while(!done) {
switch (*pattern) {
case '\0': /* URL ended while set was still open */
return GLOBERROR("unmatched brace", opos, CURLE_URL_MALFORMAT);
case '{':
case '[': /* no nested expressions at this time */
return GLOBERROR("nested brace", *posp, CURLE_URL_MALFORMAT);
case '}': /* set element completed */
if(opattern == pattern)
return GLOBERROR("empty string within braces", *posp,
CURLE_URL_MALFORMAT);
/* add 1 to size since it'll be incremented below */
if(multiply(amount, pat->content.Set.size+1))
return GLOBERROR("range overflow", 0, CURLE_URL_MALFORMAT);
/* fall-through */
case ',':
*buf = '\0';
if(pat->content.Set.elements) {
char **new_arr = realloc(pat->content.Set.elements,
(pat->content.Set.size + 1) * sizeof(char*));
if(!new_arr)
return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
pat->content.Set.elements = new_arr;
}
else
pat->content.Set.elements = malloc(sizeof(char*));
if(!pat->content.Set.elements)
return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
pat->content.Set.elements[pat->content.Set.size] =
strdup(glob->glob_buffer);
if(!pat->content.Set.elements[pat->content.Set.size])
return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
++pat->content.Set.size;
if(*pattern == '}') {
pattern++; /* pass the closing brace */
done = TRUE;
continue;
}
buf = glob->glob_buffer;
++pattern;
++(*posp);
break;
case ']': /* illegal closing bracket */
return GLOBERROR("unexpected close bracket", *posp, CURLE_URL_MALFORMAT);
case '\\': /* escaped character, skip '\' */
if(pattern[1]) {
++pattern;
++(*posp);
}
/* intentional fallthrough */
default:
*buf++ = *pattern++; /* copy character to set element */
++(*posp);
}
}
*patternp = pattern; /* return with the new position */
return CURLE_OK;
}
static CURLcode glob_range(URLGlob *glob, char **patternp,
size_t *posp, unsigned long *amount,
int globindex)
{
/* processes a range expression with the point behind the opening '['
- char range: e.g. "a-z]", "B-Q]"
- num range: e.g. "0-9]", "17-2000]"
- num range with leading zeros: e.g. "001-999]"
expression is checked for well-formedness and collected until the next ']'
*/
URLPattern *pat;
int rc;
char *pattern = *patternp;
char *c;
pat = &glob->pattern[glob->size];
pat->globindex = globindex;
if(ISALPHA(*pattern)) {
/* character range detected */
char min_c;
char max_c;
int step=1;
pat->type = UPTCharRange;
rc = sscanf(pattern, "%c-%c", &min_c, &max_c);
if((rc == 2) && (pattern[3] == ':')) {
char *endp;
unsigned long lstep;
errno = 0;
lstep = strtoul(&pattern[4], &endp, 10);
if(errno || (*endp != ']'))
step = -1;
else {
pattern = endp+1;
step = (int)lstep;
if(step > (max_c - min_c))
step = -1;
}
}
else
pattern += 4;
*posp += (pattern - *patternp);
if((rc != 2) || (min_c >= max_c) || ((max_c - min_c) > ('z' - 'a')) ||
(step < 0) )
/* the pattern is not well-formed */
return GLOBERROR("bad range", *posp, CURLE_URL_MALFORMAT);
/* if there was a ":[num]" thing, use that as step or else use 1 */
pat->content.CharRange.step = step;
pat->content.CharRange.ptr_c = pat->content.CharRange.min_c = min_c;
pat->content.CharRange.max_c = max_c;
if(multiply(amount, (pat->content.CharRange.max_c -
pat->content.CharRange.min_c + 1)))
return GLOBERROR("range overflow", *posp, CURLE_URL_MALFORMAT);
}
else if(ISDIGIT(*pattern)) {
/* numeric range detected */
unsigned long min_n;
unsigned long max_n = 0;
unsigned long step_n = 0;
char *endp;
pat->type = UPTNumRange;
pat->content.NumRange.padlength = 0;
if(*pattern == '0') {
/* leading zero specified, count them! */
c = pattern;
while(ISDIGIT(*c)) {
c++;
++pat->content.NumRange.padlength; /* padding length is set for all
instances of this pattern */
}
}
errno = 0;
min_n = strtoul(pattern, &endp, 10);
if(errno || (endp == pattern))
endp=NULL;
else {
if(*endp != '-')
endp = NULL;
else {
pattern = endp+1;
errno = 0;
max_n = strtoul(pattern, &endp, 10);
if(errno || (*endp == ':')) {
pattern = endp+1;
errno = 0;
step_n = strtoul(pattern, &endp, 10);
if(errno)
/* over/underflow situation */
endp = NULL;
}
else
step_n = 1;
if(endp && (*endp == ']')) {
pattern= endp+1;
}
else
endp = NULL;
}
}
*posp += (pattern - *patternp);
if(!endp || (min_n > max_n) || (step_n > (max_n - min_n)))
/* the pattern is not well-formed */
return GLOBERROR("bad range", *posp, CURLE_URL_MALFORMAT);
/* typecasting to ints are fine here since we make sure above that we
are within 31 bits */
pat->content.NumRange.ptr_n = pat->content.NumRange.min_n = min_n;
pat->content.NumRange.max_n = max_n;
pat->content.NumRange.step = step_n;
if(multiply(amount, (pat->content.NumRange.max_n -
pat->content.NumRange.min_n + 1)))
return GLOBERROR("range overflow", *posp, CURLE_URL_MALFORMAT);
}
else
return GLOBERROR("bad range specification", *posp, CURLE_URL_MALFORMAT);
*patternp = pattern;
return CURLE_OK;
}
static bool peek_ipv6(const char *str, size_t *skip)
{
/*
* Scan for a potential IPv6 literal.
* - Valid globs contain a hyphen and <= 1 colon.
* - IPv6 literals contain no hyphens and >= 2 colons.
*/
size_t i = 0;
size_t colons = 0;
if(str[i++] != '[') {
return FALSE;
}
for(;;) {
const char c = str[i++];
if(ISALNUM(c) || c == '.' || c == '%') {
/* ok */
}
else if(c == ':') {
colons++;
}
else if(c == ']') {
*skip = i;
return colons >= 2 ? TRUE : FALSE;
}
else {
return FALSE;
}
}
}
static CURLcode glob_parse(URLGlob *glob, char *pattern,
size_t pos, unsigned long *amount)
{
/* processes a literal string component of a URL
special characters '{' and '[' branch to set/range processing functions
*/
CURLcode res = CURLE_OK;
int globindex = 0; /* count "actual" globs */
*amount = 1;
while(*pattern && !res) {
char *buf = glob->glob_buffer;
size_t sublen = 0;
while(*pattern && *pattern != '{') {
if(*pattern == '[') {
/* Skip over potential IPv6 literals. */
size_t skip;
if(peek_ipv6(pattern, &skip)) {
memcpy(buf, pattern, skip);
buf += skip;
pattern += skip;
sublen += skip;
continue;
}
break;
}
if(*pattern == '}' || *pattern == ']')
return GLOBERROR("unmatched close brace/bracket", pos,
CURLE_URL_MALFORMAT);
/* only allow \ to escape known "special letters" */
if(*pattern == '\\' &&
(*(pattern+1) == '{' || *(pattern+1) == '[' ||
*(pattern+1) == '}' || *(pattern+1) == ']') ) {
/* escape character, skip '\' */
++pattern;
++pos;
}
*buf++ = *pattern++; /* copy character to literal */
++pos;
sublen++;
}
if(sublen) {
/* we got a literal string, add it as a single-item list */
*buf = '\0';
res = glob_fixed(glob, glob->glob_buffer, sublen);
}
else {
switch (*pattern) {
case '\0': /* done */
break;
case '{':
/* process set pattern */
pattern++;
pos++;
res = glob_set(glob, &pattern, &pos, amount, globindex++);
break;
case '[':
/* process range pattern */
pattern++;
pos++;
res = glob_range(glob, &pattern, &pos, amount, globindex++);
break;
}
}
if(++glob->size > GLOB_PATTERN_NUM)
return GLOBERROR("too many globs", pos, CURLE_URL_MALFORMAT);
}
return res;
}
CURLcode glob_url(URLGlob** glob, char* url, unsigned long *urlnum,
FILE *error)
{
/*
* We can deal with any-size, just make a buffer with the same length
* as the specified URL!
*/
URLGlob *glob_expand;
unsigned long amount = 0;
char *glob_buffer;
CURLcode res;
*glob = NULL;
glob_buffer = malloc(strlen(url) + 1);
if(!glob_buffer)
return CURLE_OUT_OF_MEMORY;
glob_expand = calloc(1, sizeof(URLGlob));
if(!glob_expand) {
Curl_safefree(glob_buffer);
return CURLE_OUT_OF_MEMORY;
}
glob_expand->urllen = strlen(url);
glob_expand->glob_buffer = glob_buffer;
res = glob_parse(glob_expand, url, 1, &amount);
if(!res)
*urlnum = amount;
else {
if(error && glob_expand->error) {
char text[128];
const char *t;
if(glob_expand->pos) {
snprintf(text, sizeof(text), "%s in column %zu", glob_expand->error,
glob_expand->pos);
t = text;
}
else
t = glob_expand->error;
/* send error description to the error-stream */
fprintf(error, "curl: (%d) [globbing] %s\n", res, t);
}
/* it failed, we cleanup */
glob_cleanup(glob_expand);
*urlnum = 1;
return res;
}
*glob = glob_expand;
return CURLE_OK;
}
void glob_cleanup(URLGlob* glob)
{
size_t i;
int elem;
for(i = 0; i < glob->size; i++) {
if((glob->pattern[i].type == UPTSet) &&
(glob->pattern[i].content.Set.elements)) {
for(elem = glob->pattern[i].content.Set.size - 1;
elem >= 0;
--elem) {
Curl_safefree(glob->pattern[i].content.Set.elements[elem]);
}
Curl_safefree(glob->pattern[i].content.Set.elements);
}
}
Curl_safefree(glob->glob_buffer);
Curl_safefree(glob);
}
CURLcode glob_next_url(char **globbed, URLGlob *glob)
{
URLPattern *pat;
size_t i;
size_t len;
size_t buflen = glob->urllen + 1;
char *buf = glob->glob_buffer;
*globbed = NULL;
if(!glob->beenhere)
glob->beenhere = 1;
else {
bool carry = TRUE;
/* implement a counter over the index ranges of all patterns, starting
with the rightmost pattern */
for(i = 0; carry && (i < glob->size); i++) {
carry = FALSE;
pat = &glob->pattern[glob->size - 1 - i];
switch (pat->type) {
case UPTSet:
if((pat->content.Set.elements) &&
(++pat->content.Set.ptr_s == pat->content.Set.size)) {
pat->content.Set.ptr_s = 0;
carry = TRUE;
}
break;
case UPTCharRange:
pat->content.CharRange.ptr_c =
(char)(pat->content.CharRange.step +
(int)((unsigned char)pat->content.CharRange.ptr_c));
if(pat->content.CharRange.ptr_c > pat->content.CharRange.max_c) {
pat->content.CharRange.ptr_c = pat->content.CharRange.min_c;
carry = TRUE;
}
break;
case UPTNumRange:
pat->content.NumRange.ptr_n += pat->content.NumRange.step;
if(pat->content.NumRange.ptr_n > pat->content.NumRange.max_n) {
pat->content.NumRange.ptr_n = pat->content.NumRange.min_n;
carry = TRUE;
}
break;
default:
printf("internal error: invalid pattern type (%d)\n", (int)pat->type);
return CURLE_FAILED_INIT;
}
}
if(carry) { /* first pattern ptr has run into overflow, done! */
/* TODO: verify if this should actally return CURLE_OK. */
return CURLE_OK; /* CURLE_OK to match previous behavior */
}
}
for(i = 0; i < glob->size; ++i) {
pat = &glob->pattern[i];
switch(pat->type) {
case UPTSet:
if(pat->content.Set.elements) {
len = strlen(pat->content.Set.elements[pat->content.Set.ptr_s]);
snprintf(buf, buflen, "%s",
pat->content.Set.elements[pat->content.Set.ptr_s]);
buf += len;
buflen -= len;
}
break;
case UPTCharRange:
*buf++ = pat->content.CharRange.ptr_c;
break;
case UPTNumRange:
len = snprintf(buf, buflen, "%0*ld",
pat->content.NumRange.padlength,
pat->content.NumRange.ptr_n);
buf += len;
buflen -= len;
break;
default:
printf("internal error: invalid pattern type (%d)\n", (int)pat->type);
return CURLE_FAILED_INIT;
}
}
*buf = '\0';
*globbed = strdup(glob->glob_buffer);
if(!*globbed)
return CURLE_OUT_OF_MEMORY;
return CURLE_OK;
}
CURLcode glob_match_url(char **result, char *filename, URLGlob *glob)
{
char *target;
size_t allocsize;
char numbuf[18];
char *appendthis = NULL;
size_t appendlen = 0;
size_t stringlen = 0;
*result = NULL;
/* We cannot use the glob_buffer for storage here since the filename may
* be longer than the URL we use. We allocate a good start size, then
* we need to realloc in case of need.
*/
allocsize = strlen(filename) + 1; /* make it at least one byte to store the
trailing zero */
target = malloc(allocsize);
if(!target)
return CURLE_OUT_OF_MEMORY;
while(*filename) {
if(*filename == '#' && ISDIGIT(filename[1])) {
unsigned long i;
char *ptr = filename;
unsigned long num = strtoul(&filename[1], &filename, 10);
URLPattern *pat =NULL;
if(num < glob->size) {
num--; /* make it zero based */
/* find the correct glob entry */
for(i=0; i<glob->size; i++) {
if(glob->pattern[i].globindex == (int)num) {
pat = &glob->pattern[i];
break;
}
}
}
if(pat) {
switch (pat->type) {
case UPTSet:
if(pat->content.Set.elements) {
appendthis = pat->content.Set.elements[pat->content.Set.ptr_s];
appendlen =
strlen(pat->content.Set.elements[pat->content.Set.ptr_s]);
}
break;
case UPTCharRange:
numbuf[0] = pat->content.CharRange.ptr_c;
numbuf[1] = 0;
appendthis = numbuf;
appendlen = 1;
break;
case UPTNumRange:
snprintf(numbuf, sizeof(numbuf), "%0*d",
pat->content.NumRange.padlength,
pat->content.NumRange.ptr_n);
appendthis = numbuf;
appendlen = strlen(numbuf);
break;
default:
fprintf(stderr, "internal error: invalid pattern type (%d)\n",
(int)pat->type);
Curl_safefree(target);
return CURLE_FAILED_INIT;
}
}
else {
/* #[num] out of range, use the #[num] in the output */
filename = ptr;
appendthis = filename++;
appendlen = 1;
}
}
else {
appendthis = filename++;
appendlen = 1;
}
if(appendlen + stringlen >= allocsize) {
char *newstr;
/* we append a single byte to allow for the trailing byte to be appended
at the end of this function outside the while() loop */
allocsize = (appendlen + stringlen) * 2;
newstr = realloc(target, allocsize + 1);
if(!newstr) {
Curl_safefree(target);
return CURLE_OUT_OF_MEMORY;
}
target = newstr;
}
memcpy(&target[stringlen], appendthis, appendlen);
stringlen += appendlen;
}
target[stringlen]= '\0';
*result = target;
return CURLE_OK;
}