shrpx: Update http-parser

This commit is contained in:
Tatsuhiro Tsujikawa 2012-09-11 00:12:23 +09:00
parent b0f155eebc
commit 427b9ebfdb
4 changed files with 612 additions and 133 deletions

View File

@ -30,3 +30,8 @@ James McLaughlin <jamie@lacewing-project.org>
David Gwynne <loki@animata.net> David Gwynne <loki@animata.net>
LE ROUX Thomas <thomas@procheo.fr> LE ROUX Thomas <thomas@procheo.fr>
Randy Rizun <rrizun@ortivawireless.com> Randy Rizun <rrizun@ortivawireless.com>
Andre Louis Caron <andre.louis.caron@usherbrooke.ca>
Simon Zimmermann <simonz05@gmail.com>
Erik Dubbelboer <erik@dubbelboer.com>
Martell Malone <martellmalone@gmail.com>
Bertrand Paquet <bpaquet@octo.com>

View File

@ -37,6 +37,19 @@
# define MIN(a,b) ((a) < (b) ? (a) : (b)) # define MIN(a,b) ((a) < (b) ? (a) : (b))
#endif #endif
#ifndef ARRAY_SIZE
# define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
#endif
#ifndef BIT_AT
# define BIT_AT(a, i) \
(!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \
(1 << ((unsigned int) (i) & 7))))
#endif
#ifndef ELEM_AT
# define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v))
#endif
#if HTTP_PARSER_DEBUG #if HTTP_PARSER_DEBUG
#define SET_ERRNO(e) \ #define SET_ERRNO(e) \
@ -185,45 +198,45 @@ static const int8_t unhex[256] =
#if HTTP_PARSER_STRICT #if HTTP_PARSER_STRICT
# define T 0 # define T(v) 0
#else #else
# define T 1 # define T(v) v
#endif #endif
static const uint8_t normal_url_char[256] = { static const uint8_t normal_url_char[32] = {
/* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */ /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
0, 0, 0, 0, 0, 0, 0, 0, 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
/* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */ /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
0, T, 0, 0, T, 0, 0, 0, 0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0,
/* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */ /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
0, 0, 0, 0, 0, 0, 0, 0, 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
/* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */ /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
0, 0, 0, 0, 0, 0, 0, 0, 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
/* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */ /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
0, 1, 1, 0, 1, 1, 1, 1, 0 | 2 | 4 | 0 | 16 | 32 | 64 | 128,
/* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */ /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
1, 1, 1, 1, 1, 1, 1, 1, 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
/* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */ /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
1, 1, 1, 1, 1, 1, 1, 1, 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
/* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */ /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
1, 1, 1, 1, 1, 1, 1, 0, 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0,
/* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */ /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
1, 1, 1, 1, 1, 1, 1, 1, 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
/* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */ /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
1, 1, 1, 1, 1, 1, 1, 1, 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
/* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */ /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
1, 1, 1, 1, 1, 1, 1, 1, 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
/* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */ /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
1, 1, 1, 1, 1, 1, 1, 1, 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
/* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */ /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
1, 1, 1, 1, 1, 1, 1, 1, 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
/* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */ /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
1, 1, 1, 1, 1, 1, 1, 1, 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
/* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */ /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
1, 1, 1, 1, 1, 1, 1, 1, 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
/* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */ /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
1, 1, 1, 1, 1, 1, 1, 0, }; 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, };
#undef T #undef T
@ -253,13 +266,9 @@ enum state
, s_req_schema , s_req_schema
, s_req_schema_slash , s_req_schema_slash
, s_req_schema_slash_slash , s_req_schema_slash_slash
, s_req_host_start , s_req_server_start
, s_req_host_v6_start , s_req_server
, s_req_host_v6 , s_req_server_with_at
, s_req_host_v6_end
, s_req_host
, s_req_port_start
, s_req_port
, s_req_path , s_req_path
, s_req_query_string_start , s_req_query_string_start
, s_req_query_string , s_req_query_string
@ -337,6 +346,19 @@ enum header_states
, h_connection_close , h_connection_close
}; };
enum http_host_state
{
s_http_host_dead = 1
, s_http_userinfo_start
, s_http_userinfo
, s_http_host_start
, s_http_host_v6_start
, s_http_host
, s_http_host_v6
, s_http_host_v6_end
, s_http_host_port_start
, s_http_host_port
};
/* Macros for character classes; depends on strict-mode */ /* Macros for character classes; depends on strict-mode */
#define CR '\r' #define CR '\r'
@ -346,15 +368,21 @@ enum header_states
#define IS_NUM(c) ((c) >= '0' && (c) <= '9') #define IS_NUM(c) ((c) >= '0' && (c) <= '9')
#define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c)) #define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c))
#define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f')) #define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
#define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \
(c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \
(c) == ')')
#define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \
(c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
(c) == '$' || (c) == ',')
#if HTTP_PARSER_STRICT #if HTTP_PARSER_STRICT
#define TOKEN(c) (tokens[(unsigned char)c]) #define TOKEN(c) (tokens[(unsigned char)c])
#define IS_URL_CHAR(c) (normal_url_char[(unsigned char) (c)]) #define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c))
#define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-') #define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
#else #else
#define TOKEN(c) ((c == ' ') ? ' ' : tokens[(unsigned char)c]) #define TOKEN(c) ((c == ' ') ? ' ' : tokens[(unsigned char)c])
#define IS_URL_CHAR(c) \ #define IS_URL_CHAR(c) \
(normal_url_char[(unsigned char) (c)] || ((c) & 0x80)) (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))
#define IS_HOST_CHAR(c) \ #define IS_HOST_CHAR(c) \
(IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_') (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
#endif #endif
@ -388,7 +416,7 @@ static struct {
}; };
#undef HTTP_STRERROR_GEN #undef HTTP_STRERROR_GEN
int http_message_needs_eof(http_parser *parser); int http_message_needs_eof(const http_parser *parser);
/* Our URL parser. /* Our URL parser.
* *
@ -450,67 +478,33 @@ parse_url_char(enum state s, const char ch)
case s_req_schema_slash_slash: case s_req_schema_slash_slash:
if (ch == '/') { if (ch == '/') {
return s_req_host_start; return s_req_server_start;
} }
break; break;
case s_req_host_start: case s_req_server_with_at:
if (ch == '[') { if (ch == '@') {
return s_req_host_v6_start; return s_dead;
}
if (IS_HOST_CHAR(ch)) {
return s_req_host;
}
break;
case s_req_host:
if (IS_HOST_CHAR(ch)) {
return s_req_host;
} }
/* FALLTHROUGH */ /* FALLTHROUGH */
case s_req_host_v6_end: case s_req_server_start:
switch (ch) { case s_req_server:
case ':': if (ch == '/') {
return s_req_port_start;
case '/':
return s_req_path; return s_req_path;
}
case '?': if (ch == '?') {
return s_req_query_string_start; return s_req_query_string_start;
} }
break; if (ch == '@') {
return s_req_server_with_at;
case s_req_host_v6:
if (ch == ']') {
return s_req_host_v6_end;
} }
/* FALLTHROUGH */ if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') {
case s_req_host_v6_start: return s_req_server;
if (IS_HEX(ch) || ch == ':') {
return s_req_host_v6;
}
break;
case s_req_port:
switch (ch) {
case '/':
return s_req_path;
case '?':
return s_req_query_string_start;
}
/* FALLTHROUGH */
case s_req_port_start:
if (IS_NUM(ch)) {
return s_req_port;
} }
break; break;
@ -632,13 +626,9 @@ size_t http_parser_execute (http_parser *parser,
case s_req_schema: case s_req_schema:
case s_req_schema_slash: case s_req_schema_slash:
case s_req_schema_slash_slash: case s_req_schema_slash_slash:
case s_req_host_start: case s_req_server_start:
case s_req_host_v6_start: case s_req_server:
case s_req_host_v6: case s_req_server_with_at:
case s_req_host_v6_end:
case s_req_host:
case s_req_port_start:
case s_req_port:
case s_req_query_string_start: case s_req_query_string_start:
case s_req_query_string: case s_req_query_string:
case s_req_fragment_start: case s_req_fragment_start:
@ -999,7 +989,7 @@ size_t http_parser_execute (http_parser *parser,
MARK(url); MARK(url);
if (parser->method == HTTP_CONNECT) { if (parser->method == HTTP_CONNECT) {
parser->state = s_req_host_start; parser->state = s_req_server_start;
} }
parser->state = parse_url_char((enum state)parser->state, ch); parser->state = parse_url_char((enum state)parser->state, ch);
@ -1014,10 +1004,7 @@ size_t http_parser_execute (http_parser *parser,
case s_req_schema: case s_req_schema:
case s_req_schema_slash: case s_req_schema_slash:
case s_req_schema_slash_slash: case s_req_schema_slash_slash:
case s_req_host_start: case s_req_server_start:
case s_req_host_v6_start:
case s_req_host_v6:
case s_req_port_start:
{ {
switch (ch) { switch (ch) {
/* No whitespace allowed here */ /* No whitespace allowed here */
@ -1037,9 +1024,8 @@ size_t http_parser_execute (http_parser *parser,
break; break;
} }
case s_req_host: case s_req_server:
case s_req_host_v6_end: case s_req_server_with_at:
case s_req_port:
case s_req_path: case s_req_path:
case s_req_query_string_start: case s_req_query_string_start:
case s_req_query_string: case s_req_query_string:
@ -1873,7 +1859,7 @@ error:
/* Does the parser need to see an EOF to find the end of the message? */ /* Does the parser need to see an EOF to find the end of the message? */
int int
http_message_needs_eof (http_parser *parser) http_message_needs_eof (const http_parser *parser)
{ {
if (parser->type == HTTP_REQUEST) { if (parser->type == HTTP_REQUEST) {
return 0; return 0;
@ -1896,7 +1882,7 @@ http_message_needs_eof (http_parser *parser)
int int
http_should_keep_alive (http_parser *parser) http_should_keep_alive (const http_parser *parser)
{ {
if (parser->http_major > 0 && parser->http_minor > 0) { if (parser->http_major > 0 && parser->http_minor > 0) {
/* HTTP/1.1 */ /* HTTP/1.1 */
@ -1914,9 +1900,10 @@ http_should_keep_alive (http_parser *parser)
} }
const char * http_method_str (enum http_method m) const char *
http_method_str (enum http_method m)
{ {
return method_strings[m]; return ELEM_AT(method_strings, m, "<unknown>");
} }
@ -1943,6 +1930,144 @@ http_errno_description(enum http_errno err) {
return http_strerror_tab[err].description; return http_strerror_tab[err].description;
} }
static enum http_host_state
http_parse_host_char(enum http_host_state s, const char ch) {
switch(s) {
case s_http_userinfo:
case s_http_userinfo_start:
if (ch == '@') {
return s_http_host_start;
}
if (IS_USERINFO_CHAR(ch)) {
return s_http_userinfo;
}
break;
case s_http_host_start:
if (ch == '[') {
return s_http_host_v6_start;
}
if (IS_HOST_CHAR(ch)) {
return s_http_host;
}
break;
case s_http_host:
if (IS_HOST_CHAR(ch)) {
return s_http_host;
}
/* FALLTHROUGH */
case s_http_host_v6_end:
if (ch == ':') {
return s_http_host_port_start;
}
break;
case s_http_host_v6:
if (ch == ']') {
return s_http_host_v6_end;
}
/* FALLTHROUGH */
case s_http_host_v6_start:
if (IS_HEX(ch) || ch == ':') {
return s_http_host_v6;
}
break;
case s_http_host_port:
case s_http_host_port_start:
if (IS_NUM(ch)) {
return s_http_host_port;
}
break;
default:
break;
}
return s_http_host_dead;
}
static int
http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
enum http_host_state s;
const char *p;
size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;
u->field_data[UF_HOST].len = 0;
s = found_at ? s_http_userinfo_start : s_http_host_start;
for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) {
enum http_host_state new_s = http_parse_host_char(s, *p);
if (new_s == s_http_host_dead) {
return 1;
}
switch(new_s) {
case s_http_host:
if (s != s_http_host) {
u->field_data[UF_HOST].off = p - buf;
}
u->field_data[UF_HOST].len++;
break;
case s_http_host_v6:
if (s != s_http_host_v6) {
u->field_data[UF_HOST].off = p - buf;
}
u->field_data[UF_HOST].len++;
break;
case s_http_host_port:
if (s != s_http_host_port) {
u->field_data[UF_PORT].off = p - buf;
u->field_data[UF_PORT].len = 0;
u->field_set |= (1 << UF_PORT);
}
u->field_data[UF_PORT].len++;
break;
case s_http_userinfo:
if (s != s_http_userinfo) {
u->field_data[UF_USERINFO].off = p - buf ;
u->field_data[UF_USERINFO].len = 0;
u->field_set |= (1 << UF_USERINFO);
}
u->field_data[UF_USERINFO].len++;
break;
default:
break;
}
s = new_s;
}
/* Make sure we don't end somewhere unexpected */
switch (s) {
case s_http_host_start:
case s_http_host_v6_start:
case s_http_host_v6:
case s_http_host_port_start:
case s_http_userinfo:
case s_http_userinfo_start:
return 1;
default:
break;
}
return 0;
}
int int
http_parser_parse_url(const char *buf, size_t buflen, int is_connect, http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
struct http_parser_url *u) struct http_parser_url *u)
@ -1950,9 +2075,10 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
enum state s; enum state s;
const char *p; const char *p;
enum http_parser_url_fields uf, old_uf; enum http_parser_url_fields uf, old_uf;
int found_at = 0;
u->port = u->field_set = 0; u->port = u->field_set = 0;
s = is_connect ? s_req_host_start : s_req_spaces_before_url; s = is_connect ? s_req_server_start : s_req_spaces_before_url;
uf = old_uf = UF_MAX; uf = old_uf = UF_MAX;
for (p = buf; p < buf + buflen; p++) { for (p = buf; p < buf + buflen; p++) {
@ -1966,10 +2092,7 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
/* Skip delimeters */ /* Skip delimeters */
case s_req_schema_slash: case s_req_schema_slash:
case s_req_schema_slash_slash: case s_req_schema_slash_slash:
case s_req_host_start: case s_req_server_start:
case s_req_host_v6_start:
case s_req_host_v6_end:
case s_req_port_start:
case s_req_query_string_start: case s_req_query_string_start:
case s_req_fragment_start: case s_req_fragment_start:
continue; continue;
@ -1978,13 +2101,12 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
uf = UF_SCHEMA; uf = UF_SCHEMA;
break; break;
case s_req_host: case s_req_server_with_at:
case s_req_host_v6: found_at = 1;
uf = UF_HOST;
break;
case s_req_port: /* FALLTROUGH */
uf = UF_PORT; case s_req_server:
uf = UF_HOST;
break; break;
case s_req_path: case s_req_path:
@ -2017,23 +2139,19 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
old_uf = uf; old_uf = uf;
} }
/* host must be present if there is a schema */
/* parsing http:///toto will fail */
if ((u->field_set & ((1 << UF_SCHEMA) | (1 << UF_HOST))) != 0) {
if (http_parse_host(buf, u, found_at) != 0) {
return 1;
}
}
/* CONNECT requests can only contain "hostname:port" */ /* CONNECT requests can only contain "hostname:port" */
if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) { if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
return 1; return 1;
} }
/* Make sure we don't end somewhere unexpected */
switch (s) {
case s_req_host_v6_start:
case s_req_host_v6:
case s_req_host_v6_end:
case s_req_host:
case s_req_port_start:
return 1;
default:
break;
}
if (u->field_set & (1 << UF_PORT)) { if (u->field_set & (1 << UF_PORT)) {
/* Don't bother with endp; we've already validated the string */ /* Don't bother with endp; we've already validated the string */
unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10); unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10);
@ -2062,3 +2180,8 @@ http_parser_pause(http_parser *parser, int paused) {
assert(0 && "Attempting to pause parser in error state"); assert(0 && "Attempting to pause parser in error state");
} }
} }
int
http_body_is_final(const struct http_parser *parser) {
return parser->state == s_message_done;
}

View File

@ -29,6 +29,7 @@ extern "C" {
#include <sys/types.h> #include <sys/types.h>
#if defined(_WIN32) && !defined(__MINGW32__) && (!defined(_MSC_VER) || _MSC_VER<1600) #if defined(_WIN32) && !defined(__MINGW32__) && (!defined(_MSC_VER) || _MSC_VER<1600)
#include <BaseTsd.h>
typedef __int8 int8_t; typedef __int8 int8_t;
typedef unsigned __int8 uint8_t; typedef unsigned __int8 uint8_t;
typedef __int16 int16_t; typedef __int16 int16_t;
@ -37,9 +38,8 @@ typedef __int32 int32_t;
typedef unsigned __int32 uint32_t; typedef unsigned __int32 uint32_t;
typedef __int64 int64_t; typedef __int64 int64_t;
typedef unsigned __int64 uint64_t; typedef unsigned __int64 uint64_t;
typedef SIZE_T size_t;
typedef unsigned int size_t; typedef SSIZE_T ssize_t;
typedef int ssize_t;
#else #else
#include <stdint.h> #include <stdint.h>
#endif #endif
@ -256,7 +256,8 @@ enum http_parser_url_fields
, UF_PATH = 3 , UF_PATH = 3
, UF_QUERY = 4 , UF_QUERY = 4
, UF_FRAGMENT = 5 , UF_FRAGMENT = 5
, UF_MAX = 6 , UF_USERINFO = 6
, UF_MAX = 7
}; };
@ -288,12 +289,12 @@ size_t http_parser_execute(http_parser *parser,
/* If http_should_keep_alive() in the on_headers_complete or /* If http_should_keep_alive() in the on_headers_complete or
* on_message_complete callback returns true, then this will be should be * on_message_complete callback returns 0, then this should be
* the last message on the connection. * the last message on the connection.
* If you are the server, respond with the "Connection: close" header. * If you are the server, respond with the "Connection: close" header.
* If you are the client, close the connection. * If you are the client, close the connection.
*/ */
int http_should_keep_alive(http_parser *parser); int http_should_keep_alive(const http_parser *parser);
/* Returns a string version of the HTTP method. */ /* Returns a string version of the HTTP method. */
const char *http_method_str(enum http_method m); const char *http_method_str(enum http_method m);
@ -312,6 +313,9 @@ int http_parser_parse_url(const char *buf, size_t buflen,
/* Pause or un-pause the parser; a nonzero value pauses */ /* Pause or un-pause the parser; a nonzero value pauses */
void http_parser_pause(http_parser *parser, int paused); void http_parser_pause(http_parser *parser, int paused);
/* Checks if this is the final chunk of the body. */
int http_body_is_final(const http_parser *parser);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

View File

@ -50,6 +50,8 @@ struct message {
char query_string[MAX_ELEMENT_SIZE]; char query_string[MAX_ELEMENT_SIZE];
char body[MAX_ELEMENT_SIZE]; char body[MAX_ELEMENT_SIZE];
size_t body_size; size_t body_size;
const char *host;
const char *userinfo;
uint16_t port; uint16_t port;
int num_headers; int num_headers;
enum { NONE=0, FIELD, VALUE } last_header_element; enum { NONE=0, FIELD, VALUE } last_header_element;
@ -65,6 +67,7 @@ struct message {
int headers_complete_cb_called; int headers_complete_cb_called;
int message_complete_cb_called; int message_complete_cb_called;
int message_complete_on_eof; int message_complete_on_eof;
int body_is_final;
}; };
static int currently_parsing_eof; static int currently_parsing_eof;
@ -630,6 +633,7 @@ const struct message requests[] =
,.fragment= "" ,.fragment= ""
,.request_path= "" ,.request_path= ""
,.request_url= "http://hypnotoad.org?hail=all" ,.request_url= "http://hypnotoad.org?hail=all"
,.host= "hypnotoad.org"
,.num_headers= 0 ,.num_headers= 0
,.headers= { } ,.headers= { }
,.body= "" ,.body= ""
@ -649,6 +653,7 @@ const struct message requests[] =
,.fragment= "" ,.fragment= ""
,.request_path= "" ,.request_path= ""
,.request_url= "http://hypnotoad.org:1234?hail=all" ,.request_url= "http://hypnotoad.org:1234?hail=all"
,.host= "hypnotoad.org"
,.port= 1234 ,.port= 1234
,.num_headers= 0 ,.num_headers= 0
,.headers= { } ,.headers= { }
@ -669,6 +674,7 @@ const struct message requests[] =
,.fragment= "" ,.fragment= ""
,.request_path= "" ,.request_path= ""
,.request_url= "http://hypnotoad.org:1234" ,.request_url= "http://hypnotoad.org:1234"
,.host= "hypnotoad.org"
,.port= 1234 ,.port= 1234
,.num_headers= 0 ,.num_headers= 0
,.headers= { } ,.headers= { }
@ -870,6 +876,28 @@ const struct message requests[] =
,.body= "" ,.body= ""
} }
#define PROXY_WITH_BASIC_AUTH 33
, {.name= "host:port and basic_auth"
,.type= HTTP_REQUEST
,.raw= "GET http://a%12:b!&*$@hypnotoad.org:1234/toto HTTP/1.1\r\n"
"\r\n"
,.should_keep_alive= TRUE
,.message_complete_on_eof= FALSE
,.http_major= 1
,.http_minor= 1
,.method= HTTP_GET
,.fragment= ""
,.request_path= "/toto"
,.request_url= "http://a%12:b!&*$@hypnotoad.org:1234/toto"
,.host= "hypnotoad.org"
,.userinfo= "a%12:b!&*$"
,.port= 1234
,.num_headers= 0
,.headers= { }
,.body= ""
}
, {.name= NULL } /* sentinel */ , {.name= NULL } /* sentinel */
}; };
@ -1422,12 +1450,26 @@ header_value_cb (http_parser *p, const char *buf, size_t len)
return 0; return 0;
} }
void
check_body_is_final (const http_parser *p)
{
if (messages[num_messages].body_is_final) {
fprintf(stderr, "\n\n *** Error http_body_is_final() should return 1 "
"on last on_body callback call "
"but it doesn't! ***\n\n");
assert(0);
abort();
}
messages[num_messages].body_is_final = http_body_is_final(p);
}
int int
body_cb (http_parser *p, const char *buf, size_t len) body_cb (http_parser *p, const char *buf, size_t len)
{ {
assert(p == parser); assert(p == parser);
strncat(messages[num_messages].body, buf, len); strncat(messages[num_messages].body, buf, len);
messages[num_messages].body_size += len; messages[num_messages].body_size += len;
check_body_is_final(p);
// printf("body_cb: '%s'\n", requests[num_messages].body); // printf("body_cb: '%s'\n", requests[num_messages].body);
return 0; return 0;
} }
@ -1438,6 +1480,7 @@ count_body_cb (http_parser *p, const char *buf, size_t len)
assert(p == parser); assert(p == parser);
assert(buf); assert(buf);
messages[num_messages].body_size += len; messages[num_messages].body_size += len;
check_body_is_final(p);
return 0; return 0;
} }
@ -1474,6 +1517,18 @@ message_complete_cb (http_parser *p)
assert(0); assert(0);
abort(); abort();
} }
if (messages[num_messages].body_size &&
http_body_is_final(p) &&
!messages[num_messages].body_is_final)
{
fprintf(stderr, "\n\n *** Error http_body_is_final() should return 1 "
"on last on_body callback call "
"but it doesn't! ***\n\n");
assert(0);
abort();
}
messages[num_messages].message_complete_cb_called = TRUE; messages[num_messages].message_complete_cb_called = TRUE;
messages[num_messages].message_complete_on_eof = currently_parsing_eof; messages[num_messages].message_complete_on_eof = currently_parsing_eof;
@ -1794,6 +1849,14 @@ message_eq (int index, const struct message *expected)
abort(); abort();
} }
if (expected->host) {
MESSAGE_CHECK_URL_EQ(&u, expected, m, host, UF_HOST);
}
if (expected->userinfo) {
MESSAGE_CHECK_URL_EQ(&u, expected, m, userinfo, UF_USERINFO);
}
m->port = (u.field_set & (1 << UF_PORT)) ? m->port = (u.field_set & (1 << UF_PORT)) ?
u.port : 0; u.port : 0;
@ -1966,6 +2029,26 @@ const struct url_test url_tests[] =
,{ 15, 1 } /* UF_PATH */ ,{ 15, 1 } /* UF_PATH */
,{ 0, 0 } /* UF_QUERY */ ,{ 0, 0 } /* UF_QUERY */
,{ 0, 0 } /* UF_FRAGMENT */ ,{ 0, 0 } /* UF_FRAGMENT */
,{ 0, 0 } /* UF_USERINFO */
}
}
,.rv=0
}
, {.name="proxy request with port"
,.url="http://hostname:444/"
,.is_connect=0
,.u=
{.field_set=(1 << UF_SCHEMA) | (1 << UF_HOST) | (1 << UF_PORT) | (1 << UF_PATH)
,.port=444
,.field_data=
{{ 0, 4 } /* UF_SCHEMA */
,{ 7, 8 } /* UF_HOST */
,{ 16, 3 } /* UF_PORT */
,{ 19, 1 } /* UF_PATH */
,{ 0, 0 } /* UF_QUERY */
,{ 0, 0 } /* UF_FRAGMENT */
,{ 0, 0 } /* UF_USERINFO */
} }
} }
,.rv=0 ,.rv=0
@ -1984,11 +2067,18 @@ const struct url_test url_tests[] =
,{ 0, 0 } /* UF_PATH */ ,{ 0, 0 } /* UF_PATH */
,{ 0, 0 } /* UF_QUERY */ ,{ 0, 0 } /* UF_QUERY */
,{ 0, 0 } /* UF_FRAGMENT */ ,{ 0, 0 } /* UF_FRAGMENT */
,{ 0, 0 } /* UF_USERINFO */
} }
} }
,.rv=0 ,.rv=0
} }
, {.name="CONNECT request but not connect"
,.url="hostname:443"
,.is_connect=0
,.rv=1
}
, {.name="proxy ipv6 request" , {.name="proxy ipv6 request"
,.url="http://[1:2::3:4]/" ,.url="http://[1:2::3:4]/"
,.is_connect=0 ,.is_connect=0
@ -2002,6 +2092,26 @@ const struct url_test url_tests[] =
,{ 17, 1 } /* UF_PATH */ ,{ 17, 1 } /* UF_PATH */
,{ 0, 0 } /* UF_QUERY */ ,{ 0, 0 } /* UF_QUERY */
,{ 0, 0 } /* UF_FRAGMENT */ ,{ 0, 0 } /* UF_FRAGMENT */
,{ 0, 0 } /* UF_USERINFO */
}
}
,.rv=0
}
, {.name="proxy ipv6 request with port"
,.url="http://[1:2::3:4]:67/"
,.is_connect=0
,.u=
{.field_set=(1 << UF_SCHEMA) | (1 << UF_HOST) | (1 << UF_PORT) | (1 << UF_PATH)
,.port=67
,.field_data=
{{ 0, 4 } /* UF_SCHEMA */
,{ 8, 8 } /* UF_HOST */
,{ 18, 2 } /* UF_PORT */
,{ 20, 1 } /* UF_PATH */
,{ 0, 0 } /* UF_QUERY */
,{ 0, 0 } /* UF_FRAGMENT */
,{ 0, 0 } /* UF_USERINFO */
} }
} }
,.rv=0 ,.rv=0
@ -2020,13 +2130,16 @@ const struct url_test url_tests[] =
,{ 0, 0 } /* UF_PATH */ ,{ 0, 0 } /* UF_PATH */
,{ 0, 0 } /* UF_QUERY */ ,{ 0, 0 } /* UF_QUERY */
,{ 0, 0 } /* UF_FRAGMENT */ ,{ 0, 0 } /* UF_FRAGMENT */
,{ 0, 0 } /* UF_USERINFO */
} }
} }
,.rv=0 ,.rv=0
} }
, {.name="extra ? in query string" , {.name="extra ? in query string"
,.url="http://a.tbcdn.cn/p/fp/2010c/??fp-header-min.css,fp-base-min.css,fp-channel-min.css,fp-product-min.css,fp-mall-min.css,fp-category-min.css,fp-sub-min.css,fp-gdp4p-min.css,fp-css3-min.css,fp-misc-min.css?t=20101022.css" ,.url="http://a.tbcdn.cn/p/fp/2010c/??fp-header-min.css,fp-base-min.css,"
"fp-channel-min.css,fp-product-min.css,fp-mall-min.css,fp-category-min.css,"
"fp-sub-min.css,fp-gdp4p-min.css,fp-css3-min.css,fp-misc-min.css?t=20101022.css"
,.is_connect=0 ,.is_connect=0
,.u= ,.u=
{.field_set=(1<<UF_SCHEMA) | (1<<UF_HOST) | (1<<UF_PATH) | (1<<UF_QUERY) {.field_set=(1<<UF_SCHEMA) | (1<<UF_HOST) | (1<<UF_PATH) | (1<<UF_QUERY)
@ -2038,11 +2151,118 @@ const struct url_test url_tests[] =
,{ 17, 12 } /* UF_PATH */ ,{ 17, 12 } /* UF_PATH */
,{ 30,187 } /* UF_QUERY */ ,{ 30,187 } /* UF_QUERY */
,{ 0, 0 } /* UF_FRAGMENT */ ,{ 0, 0 } /* UF_FRAGMENT */
,{ 0, 0 } /* UF_USERINFO */
} }
} }
,.rv=0 ,.rv=0
} }
, {.name="space URL encoded"
,.url="/toto.html?toto=a%20b"
,.is_connect=0
,.u=
{.field_set= (1<<UF_PATH) | (1<<UF_QUERY)
,.port=0
,.field_data=
{{ 0, 0 } /* UF_SCHEMA */
,{ 0, 0 } /* UF_HOST */
,{ 0, 0 } /* UF_PORT */
,{ 0, 10 } /* UF_PATH */
,{ 11, 10 } /* UF_QUERY */
,{ 0, 0 } /* UF_FRAGMENT */
,{ 0, 0 } /* UF_USERINFO */
}
}
,.rv=0
}
, {.name="URL fragment"
,.url="/toto.html#titi"
,.is_connect=0
,.u=
{.field_set= (1<<UF_PATH) | (1<<UF_FRAGMENT)
,.port=0
,.field_data=
{{ 0, 0 } /* UF_SCHEMA */
,{ 0, 0 } /* UF_HOST */
,{ 0, 0 } /* UF_PORT */
,{ 0, 10 } /* UF_PATH */
,{ 0, 0 } /* UF_QUERY */
,{ 11, 4 } /* UF_FRAGMENT */
,{ 0, 0 } /* UF_USERINFO */
}
}
,.rv=0
}
, {.name="complex URL fragment"
,.url="http://www.webmasterworld.com/r.cgi?f=21&d=8405&url="
"http://www.example.com/index.html?foo=bar&hello=world#midpage"
,.is_connect=0
,.u=
{.field_set= (1<<UF_SCHEMA) | (1<<UF_HOST) | (1<<UF_PATH) | (1<<UF_QUERY) |\
(1<<UF_FRAGMENT)
,.port=0
,.field_data=
{{ 0, 4 } /* UF_SCHEMA */
,{ 7, 22 } /* UF_HOST */
,{ 0, 0 } /* UF_PORT */
,{ 29, 6 } /* UF_PATH */
,{ 36, 69 } /* UF_QUERY */
,{106, 7 } /* UF_FRAGMENT */
,{ 0, 0 } /* UF_USERINFO */
}
}
,.rv=0
}
, {.name="complex URL from node js url parser doc"
,.url="http://host.com:8080/p/a/t/h?query=string#hash"
,.is_connect=0
,.u=
{.field_set= (1<<UF_SCHEMA) | (1<<UF_HOST) | (1<<UF_PORT) | (1<<UF_PATH) |\
(1<<UF_QUERY) | (1<<UF_FRAGMENT)
,.port=8080
,.field_data=
{{ 0, 4 } /* UF_SCHEMA */
,{ 7, 8 } /* UF_HOST */
,{ 16, 4 } /* UF_PORT */
,{ 20, 8 } /* UF_PATH */
,{ 29, 12 } /* UF_QUERY */
,{ 42, 4 } /* UF_FRAGMENT */
,{ 0, 0 } /* UF_USERINFO */
}
}
,.rv=0
}
, {.name="complex URL with basic auth from node js url parser doc"
,.url="http://a:b@host.com:8080/p/a/t/h?query=string#hash"
,.is_connect=0
,.u=
{.field_set= (1<<UF_SCHEMA) | (1<<UF_HOST) | (1<<UF_PORT) | (1<<UF_PATH) |\
(1<<UF_QUERY) | (1<<UF_FRAGMENT) | (1<<UF_USERINFO)
,.port=8080
,.field_data=
{{ 0, 4 } /* UF_SCHEMA */
,{ 11, 8 } /* UF_HOST */
,{ 20, 4 } /* UF_PORT */
,{ 24, 8 } /* UF_PATH */
,{ 33, 12 } /* UF_QUERY */
,{ 46, 4 } /* UF_FRAGMENT */
,{ 7, 3 } /* UF_USERINFO */
}
}
,.rv=0
}
, {.name="double @"
,.url="http://a:b@@hostname:443/"
,.is_connect=0
,.rv=1
}
, {.name="proxy empty host" , {.name="proxy empty host"
,.url="http://:443/" ,.url="http://:443/"
,.is_connect=0 ,.is_connect=0
@ -2055,6 +2275,12 @@ const struct url_test url_tests[] =
,.rv=1 ,.rv=1
} }
, {.name="CONNECT with basic auth"
,.url="a:b@hostname:443"
,.is_connect=1
,.rv=1
}
, {.name="CONNECT empty host" , {.name="CONNECT empty host"
,.url=":443" ,.url=":443"
,.is_connect=1 ,.is_connect=1
@ -2078,16 +2304,130 @@ const struct url_test url_tests[] =
,.rv=1 /* s_dead */ ,.rv=1 /* s_dead */
} }
, {.name="proxy basic auth with space url encoded"
,.url="http://a%20:b@host.com/"
,.is_connect=0
,.u=
{.field_set= (1<<UF_SCHEMA) | (1<<UF_HOST) | (1<<UF_PATH) | (1<<UF_USERINFO)
,.port=0
,.field_data=
{{ 0, 4 } /* UF_SCHEMA */
,{ 14, 8 } /* UF_HOST */
,{ 0, 0 } /* UF_PORT */
,{ 22, 1 } /* UF_PATH */
,{ 0, 0 } /* UF_QUERY */
,{ 0, 0 } /* UF_FRAGMENT */
,{ 7, 6 } /* UF_USERINFO */
}
}
,.rv=0
}
, {.name="carriage return in URL" , {.name="carriage return in URL"
,.url="/foo\rbar/" ,.url="/foo\rbar/"
,.rv=1 /* s_dead */ ,.rv=1 /* s_dead */
} }
, {.name="proxy double : in URL"
,.url="http://hostname::443/"
,.rv=1 /* s_dead */
}
, {.name="proxy basic auth with double :"
,.url="http://a::b@host.com/"
,.is_connect=0
,.u=
{.field_set= (1<<UF_SCHEMA) | (1<<UF_HOST) | (1<<UF_PATH) | (1<<UF_USERINFO)
,.port=0
,.field_data=
{{ 0, 4 } /* UF_SCHEMA */
,{ 12, 8 } /* UF_HOST */
,{ 0, 0 } /* UF_PORT */
,{ 20, 1 } /* UF_PATH */
,{ 0, 0 } /* UF_QUERY */
,{ 0, 0 } /* UF_FRAGMENT */
,{ 7, 4 } /* UF_USERINFO */
}
}
,.rv=0
}
, {.name="line feed in URL" , {.name="line feed in URL"
,.url="/foo\nbar/" ,.url="/foo\nbar/"
,.rv=1 /* s_dead */ ,.rv=1 /* s_dead */
} }
, {.name="proxy empty basic auth"
,.url="http://@hostname/fo"
,.u=
{.field_set= (1<<UF_SCHEMA) | (1<<UF_HOST) | (1<<UF_PATH)
,.port=0
,.field_data=
{{ 0, 4 } /* UF_SCHEMA */
,{ 8, 8 } /* UF_HOST */
,{ 0, 0 } /* UF_PORT */
,{ 16, 3 } /* UF_PATH */
,{ 0, 0 } /* UF_QUERY */
,{ 0, 0 } /* UF_FRAGMENT */
,{ 0, 0 } /* UF_USERINFO */
}
}
,.rv=0
}
, {.name="proxy line feed in hostname"
,.url="http://host\name/fo"
,.rv=1 /* s_dead */
}
, {.name="proxy % in hostname"
,.url="http://host%name/fo"
,.rv=1 /* s_dead */
}
, {.name="proxy ; in hostname"
,.url="http://host;ame/fo"
,.rv=1 /* s_dead */
}
, {.name="proxy basic auth with unreservedchars"
,.url="http://a!;-_!=+$@host.com/"
,.is_connect=0
,.u=
{.field_set= (1<<UF_SCHEMA) | (1<<UF_HOST) | (1<<UF_PATH) | (1<<UF_USERINFO)
,.port=0
,.field_data=
{{ 0, 4 } /* UF_SCHEMA */
,{ 17, 8 } /* UF_HOST */
,{ 0, 0 } /* UF_PORT */
,{ 25, 1 } /* UF_PATH */
,{ 0, 0 } /* UF_QUERY */
,{ 0, 0 } /* UF_FRAGMENT */
,{ 7, 9 } /* UF_USERINFO */
}
}
,.rv=0
}
, {.name="proxy only empty basic auth"
,.url="http://@/fo"
,.rv=1 /* s_dead */
}
, {.name="proxy only basic auth"
,.url="http://toto@/fo"
,.rv=1 /* s_dead */
}
, {.name="proxy emtpy hostname"
,.url="http:///fo"
,.rv=1 /* s_dead */
}
, {.name="proxy = in URL"
,.url="http://host=ame/fo"
,.rv=1 /* s_dead */
}
#if HTTP_PARSER_STRICT #if HTTP_PARSER_STRICT
, {.name="tab in URL" , {.name="tab in URL"
@ -2113,6 +2453,7 @@ const struct url_test url_tests[] =
,{ 0, 9 } /* UF_PATH */ ,{ 0, 9 } /* UF_PATH */
,{ 0, 0 } /* UF_QUERY */ ,{ 0, 0 } /* UF_QUERY */
,{ 0, 0 } /* UF_FRAGMENT */ ,{ 0, 0 } /* UF_FRAGMENT */
,{ 0, 0 } /* UF_USERINFO */
} }
} }
,.rv=0 ,.rv=0
@ -2129,6 +2470,7 @@ const struct url_test url_tests[] =
,{ 0, 9 } /* UF_PATH */ ,{ 0, 9 } /* UF_PATH */
,{ 0, 0 } /* UF_QUERY */ ,{ 0, 0 } /* UF_QUERY */
,{ 0, 0 } /* UF_FRAGMENT */ ,{ 0, 0 } /* UF_FRAGMENT */
,{ 0, 0 } /* UF_USERINFO */
} }
} }
,.rv=0 ,.rv=0
@ -2139,7 +2481,6 @@ const struct url_test url_tests[] =
void void
dump_url (const char *url, const struct http_parser_url *u) dump_url (const char *url, const struct http_parser_url *u)
{ {
char part[512];
unsigned int i; unsigned int i;
printf("\tfield_set: 0x%x, port: %u\n", u->field_set, u->port); printf("\tfield_set: 0x%x, port: %u\n", u->field_set, u->port);
@ -2149,14 +2490,12 @@ dump_url (const char *url, const struct http_parser_url *u)
continue; continue;
} }
memcpy(part, url + u->field_data[i].off, u->field_data[i].len); printf("\tfield_data[%u]: off: %u len: %u part: \"%.*s\n",
part[u->field_data[i].len] = '\0';
printf("\tfield_data[%u]: off: %u len: %u part: \"%s\"\n",
i, i,
u->field_data[i].off, u->field_data[i].off,
u->field_data[i].len, u->field_data[i].len,
part); u->field_data[i].len,
url + u->field_data[i].off);
} }
} }
@ -2206,6 +2545,13 @@ test_parse_url (void)
} }
} }
void
test_method_str (void)
{
assert(0 == strcmp("GET", http_method_str(HTTP_GET)));
assert(0 == strcmp("<unknown>", http_method_str(1337)));
}
void void
test_message (const struct message *message) test_message (const struct message *message)
{ {
@ -2715,6 +3061,7 @@ main (void)
//// API //// API
test_preserve_data(); test_preserve_data();
test_parse_url(); test_parse_url();
test_method_str();
//// OVERFLOW CONDITIONS //// OVERFLOW CONDITIONS