socat/nestlex.c

/* source: nestlex.c */
/* Copyright Gerhard Rieger 2006 */
/* Published under the GNU General Public License V.2, see file COPYING */

/* a function for lexical scanning of nested character patterns */

#include "config.h"
#include "mytypes.h"

#include "sysincludes.h"


/* sub: scan a string and copy its value to output string
   end scanning when an unescaped, unnested string from ends array is found
   does not copy the end pattern
   does not write a trailing \0 to token
   allows escaping with \ and quoting (\ and quotes are removed)
   allows nesting with div. parens
   returns -1 if out string was too small
   returns 1 if addr ended unexpectedly
   returns 0 if token could be extracted successfully
*/
int nestlex(const char **addr,	/* input string; aft points to end token */
	    char **token,	/* output token; aft points to first unwritten
				   char (caller might want to set it to \0) */
	    size_t *len,	/* remaining bytes in token space (incl. \0) */
	    const char *ends[],	/* list of end strings */
	    const char *hquotes[],/* list of strings that quote (hard qu.) */
	    const char *squotes[],/* list of strings that quote softly */
	    const char *nests[],/* list of strings that start nesting;
				   every second one is matching end */
	    bool dropquotes,	/* drop the outermost quotes */
	    bool c_esc,		/* solve C char escapes: \n \t \0 etc */
	    bool html_esc	/* solve HTML char escapes: %0d %08 etc */
	    ) {
   const char *in = *addr;	/* pointer into input string */
   const char **endx;	/* loops over end patterns */
   const char **quotx;	/* loops over quote patterns */
   const char **nestx;	/* loops over nest patterns */
   char *out = *token;	/* pointer into output token */
   char c;
   int i;
   int result;

   while (true) {

      /* is this end of input string? */
      if (*in == 0)  {
	 
	 break; /* end of string */
      }

      /* first check the end patterns (e.g. for ']') */
      endx = ends;  i = 0;
      while (*endx) {
	 if (!strncmp(in, *endx, strlen(*endx))) {
	    /* this end pattern matches */
	    *addr = in;
	    *token = out;
	    return 0;
	 }
	 ++endx;
      }

      /* check for hard quoting pattern */
      quotx = hquotes;
      while (hquotes && *quotx) {
	 if (!strncmp(in, *quotx, strlen(*quotx))) {
	    /* this quote pattern matches */
	    const char *endnest[2];
	    if (dropquotes) {
	       /* we strip this quote */
	       in += strlen(*quotx);
	    } else {
	       for (i = strlen(*quotx); i > 0; --i) {
		  *out++ = *in++;
		  if (--*len <= 0) { *addr = in; *token = out; return -1; }
	       }
	    }
	    /* we call nestlex recursively */
	    endnest[0] = *quotx;
	    endnest[1] = NULL;
	    result =
	       nestlex(&in, &out, len, endnest, NULL/*hquotes*/,
		       NULL/*squotes*/, NULL/*nests*/,
		       false, c_esc, html_esc);
	    if (result == 0 && dropquotes) {
	       /* we strip this quote */
	       in += strlen(*quotx);
	    } else {
	       /* we copy the trailing quote */
	       for (i = strlen(*quotx); i > 0; --i) {
		  *out++ = *in++;
		  if (--*len <= 0) { *addr = in; *token = out; return -1; }
	       }
	    }
	       
	    break;
	 }
	 ++quotx;
      }
      if (hquotes && *quotx != NULL) {
	 /* there was a quote; string might continue with hard quote */
	 continue;
      }

      /* check for soft quoting pattern */
      quotx = squotes;
      while (squotes && *quotx) {
	 if (!strncmp(in, *quotx, strlen(*quotx))) {
	    /* this quote pattern matches */
	    /* we strip this quote */
	    /* we call nestlex recursively */
	    const char *endnest[2];
	    if (dropquotes) {
	       /* we strip this quote */
	       in += strlen(*quotx);
	    } else {
	       for (i = strlen(*quotx); i > 0; --i) {
		  *out++ = *in++;
		  if (--*len <= 0) { *addr = in; *token = out; return -1; }
	       }
	    }
	    endnest[0] = *quotx;
	    endnest[1] = NULL;
	    result =
	       nestlex(&in, &out, len, endnest, hquotes,
		       squotes, nests,
		       false, c_esc, html_esc);

	    if (result == 0 && dropquotes) {
	       /* we strip the trailing quote */
	       in += strlen(*quotx);
	    } else {
	       /* we copy the trailing quote */
	       for (i = strlen(*quotx); i > 0; --i) {
		  *out++ = *in++;
		  if (--*len <= 0) { *addr = in; *token = out; return -1; }
	       }
	    }
	    break;
	 }
	 ++quotx;
      }
      if (squotes && *quotx != NULL) {
	 /* there was a soft quote; string might continue with any quote */
	 continue;
      }

      /* check patterns that start a nested clause */
      nestx = nests;  i = 0;
      while (nests && *nestx) {
	 if (!strncmp(in, *nestx, strlen(*nestx))) {
	    /* this nest pattern matches */
	    const char *endnest[2];
	    endnest[0] = nestx[1];
	    endnest[1] = NULL;

	    for (i = strlen(nestx[1]); i > 0; --i) {
	       *out++ = *in++;
	       if (--*len <= 0)  { *addr = in; *token = out; return -1; }
	    }

	    result =
	       nestlex(&in, &out, len, endnest, hquotes, squotes, nests,
		       false, c_esc, html_esc);
	    if (result == 0) {
	       /* copy endnest */
	       i = strlen(nestx[1]); while (i > 0) {
		  *out++ = *in++;
		  if (--*len <= 0) {
		     *addr = in;
		     *token = out;
		     return -1;
		  }
		  --i;
	       }
	    }
	    break;
	 }
	 nestx += 2;	/* skip matching end pattern in table */
      }
      if (nests && *nestx) {
	 /* we handled a nested expression, continue loop */
	 continue;
      }

      /* "normal" data, possibly escaped */
      c = *in++;
      if (c == '\\') {
	 /* found a plain \ escaped part */
	 c = *in++;
	 if (c == 0)  { /* Warn("trailing '\\'");*/ break; }
	 if (c_esc) { /* solve C char escapes: \n \t \0 etc */
	    switch (c) {
	    case '0': c = '\0'; break;
	    case 'a': c = '\a'; break;
	    case 'b': c = '\b'; break;
	    case 'f': c = '\f'; break;
	    case 'n': c = '\n'; break;
	    case 'r': c = '\r'; break;
	    case 't': c = '\t'; break;
	    case 'v': c = '\v'; break;
#if LATER
	    case 'x': !!! 1 to 2 hex digits; break;
	    case 'u': !!! 4 hex digits?; break;
	    case 'U': !!! 8 hex digits?; break;
#endif
	    default: break;
	    }
	 }
	 *out++ = c;
	 --*len;
	 if (len == 0) {
	    *addr = in;
	    *token = out;
	    return -1;	/* output overflow */
	 }
	 continue;
      }

      /* just a simple char */
      *out++ = c;
      --*len;
      if (len == 0) {
	 *addr = in;
	 *token = out;
	 return -1;	/* output overflow */
      }

   }
   /* never come here? */

   *addr = in;
   *token = out;
   return 0;	/* OK */
}
replaced RCS ID's by source file names 2008-01-28 16:37:16 -05:00			`/* source: nestlex.c */`
socat V1.6.0.0 (initial GIT commit) 2008-01-27 07:00:08 -05:00			`/* Copyright Gerhard Rieger 2006 */`
			`/* Published under the GNU General Public License V.2, see file COPYING */`

			`/* a function for lexical scanning of nested character patterns */`

			`#include "config.h"`
			`#include "mytypes.h"`

			`#include "sysincludes.h"`


			`/* sub: scan a string and copy its value to output string`
			`end scanning when an unescaped, unnested string from ends array is found`
			`does not copy the end pattern`
			`does not write a trailing \0 to token`
			`allows escaping with \ and quoting (\ and quotes are removed)`
			`allows nesting with div. parens`
			`returns -1 if out string was too small`
			`returns 1 if addr ended unexpectedly`
			`returns 0 if token could be extracted successfully`
			`*/`
			`int nestlex(const char *addr, / input string; aft points to end token */`
			`char *token, / output token; aft points to first unwritten`
			`char (caller might want to set it to \0) */`
			`size_t len, / remaining bytes in token space (incl. \0) */`
			`const char ends[], / list of end strings */`
			`const char hquotes[],/ list of strings that quote (hard qu.) */`
			`const char squotes[],/ list of strings that quote softly */`
			`const char nests[],/ list of strings that start nesting;`
			`every second one is matching end */`
			`bool dropquotes, /* drop the outermost quotes */`
			`bool c_esc, /* solve C char escapes: \n \t \0 etc */`
			`bool html_esc /* solve HTML char escapes: %0d %08 etc */`
			`) {`
			`const char in = addr; /* pointer into input string */`
			`const char *endx; / loops over end patterns */`
			`const char *quotx; / loops over quote patterns */`
			`const char *nestx; / loops over nest patterns */`
			`char out = token; /* pointer into output token */`
			`char c;`
			`int i;`
			`int result;`

			`while (true) {`

			`/* is this end of input string? */`
			`if (*in == 0) {`

			`break; /* end of string */`
			`}`

			`/* first check the end patterns (e.g. for ']') */`
			`endx = ends; i = 0;`
			`while (*endx) {`
			`if (!strncmp(in, endx, strlen(endx))) {`
			`/* this end pattern matches */`
			`*addr = in;`
			`*token = out;`
			`return 0;`
			`}`
			`++endx;`
			`}`

			`/* check for hard quoting pattern */`
			`quotx = hquotes;`
			`while (hquotes && *quotx) {`
			`if (!strncmp(in, quotx, strlen(quotx))) {`
			`/* this quote pattern matches */`
			`const char *endnest[2];`
			`if (dropquotes) {`
			`/* we strip this quote */`
			`in += strlen(*quotx);`
			`} else {`
			`for (i = strlen(*quotx); i > 0; --i) {`
			`out++ = in++;`
			`if (--len <= 0) { addr = in; *token = out; return -1; }`
			`}`
			`}`
			`/* we call nestlex recursively */`
			`endnest[0] = *quotx;`
			`endnest[1] = NULL;`
			`result =`
			`nestlex(&in, &out, len, endnest, NULL/hquotes/,`
			`NULL/squotes/, NULL/nests/,`
			`false, c_esc, html_esc);`
			`if (result == 0 && dropquotes) {`
			`/* we strip this quote */`
			`in += strlen(*quotx);`
			`} else {`
			`/* we copy the trailing quote */`
			`for (i = strlen(*quotx); i > 0; --i) {`
			`out++ = in++;`
			`if (--len <= 0) { addr = in; *token = out; return -1; }`
			`}`
			`}`

			`break;`
			`}`
			`++quotx;`
			`}`
			`if (hquotes && *quotx != NULL) {`
			`/* there was a quote; string might continue with hard quote */`
			`continue;`
			`}`

			`/* check for soft quoting pattern */`
			`quotx = squotes;`
			`while (squotes && *quotx) {`
			`if (!strncmp(in, quotx, strlen(quotx))) {`
			`/* this quote pattern matches */`
			`/* we strip this quote */`
			`/* we call nestlex recursively */`
			`const char *endnest[2];`
			`if (dropquotes) {`
			`/* we strip this quote */`
			`in += strlen(*quotx);`
			`} else {`
			`for (i = strlen(*quotx); i > 0; --i) {`
			`out++ = in++;`
			`if (--len <= 0) { addr = in; *token = out; return -1; }`
			`}`
			`}`
			`endnest[0] = *quotx;`
			`endnest[1] = NULL;`
			`result =`
			`nestlex(&in, &out, len, endnest, hquotes,`
			`squotes, nests,`
			`false, c_esc, html_esc);`

			`if (result == 0 && dropquotes) {`
			`/* we strip the trailing quote */`
			`in += strlen(*quotx);`
			`} else {`
			`/* we copy the trailing quote */`
			`for (i = strlen(*quotx); i > 0; --i) {`
			`out++ = in++;`
			`if (--len <= 0) { addr = in; *token = out; return -1; }`
			`}`
			`}`
			`break;`
			`}`
			`++quotx;`
			`}`
			`if (squotes && *quotx != NULL) {`
			`/* there was a soft quote; string might continue with any quote */`
			`continue;`
			`}`

			`/* check patterns that start a nested clause */`
			`nestx = nests; i = 0;`
			`while (nests && *nestx) {`
			`if (!strncmp(in, nestx, strlen(nestx))) {`
			`/* this nest pattern matches */`
			`const char *endnest[2];`
			`endnest[0] = nestx[1];`
			`endnest[1] = NULL;`

			`for (i = strlen(nestx[1]); i > 0; --i) {`
			`out++ = in++;`
			`if (--len <= 0) { addr = in; *token = out; return -1; }`
			`}`

			`result =`
			`nestlex(&in, &out, len, endnest, hquotes, squotes, nests,`
			`false, c_esc, html_esc);`
			`if (result == 0) {`
			`/* copy endnest */`
			`i = strlen(nestx[1]); while (i > 0) {`
			`out++ = in++;`
			`if (--*len <= 0) {`
			`*addr = in;`
			`*token = out;`
			`return -1;`
			`}`
			`--i;`
			`}`
			`}`
			`break;`
			`}`
			`nestx += 2; /* skip matching end pattern in table */`
			`}`
			`if (nests && *nestx) {`
			`/* we handled a nested expression, continue loop */`
			`continue;`
			`}`

			`/* "normal" data, possibly escaped */`
			`c = *in++;`
			`if (c == '\\') {`
			`/* found a plain \ escaped part */`
			`c = *in++;`
			`if (c == 0) { /* Warn("trailing '\\'");*/ break; }`
			`if (c_esc) { /* solve C char escapes: \n \t \0 etc */`
			`switch (c) {`
			`case '0': c = '\0'; break;`
			`case 'a': c = '\a'; break;`
			`case 'b': c = '\b'; break;`
			`case 'f': c = '\f'; break;`
			`case 'n': c = '\n'; break;`
			`case 'r': c = '\r'; break;`
			`case 't': c = '\t'; break;`
			`case 'v': c = '\v'; break;`
			`#if LATER`
			`case 'x': !!! 1 to 2 hex digits; break;`
			`case 'u': !!! 4 hex digits?; break;`
			`case 'U': !!! 8 hex digits?; break;`
			`#endif`
			`default: break;`
			`}`
			`}`
			`*out++ = c;`
			`--*len;`
			`if (len == 0) {`
			`*addr = in;`
			`*token = out;`
			`return -1; /* output overflow */`
			`}`
			`continue;`
			`}`

			`/* just a simple char */`
			`*out++ = c;`
			`--*len;`
			`if (len == 0) {`
			`*addr = in;`
			`*token = out;`
			`return -1; /* output overflow */`
			`}`

			`}`
			`/* never come here? */`

			`*addr = in;`
			`*token = out;`
			`return 0; /* OK */`
			`}`