Browse Source

Upgrade http-parser

v0.7.4-release
Ryan Dahl 15 years ago
parent
commit
9be6c501ec
  1. 6
      deps/http_parser/Makefile
  2. 56
      deps/http_parser/README.md
  3. 100
      deps/http_parser/http_parser.c
  4. 39
      deps/http_parser/http_parser.h
  5. 77
      deps/http_parser/test.c
  6. 2
      src/node_http_parser.cc

6
deps/http_parser/Makefile

@ -8,10 +8,10 @@ test: test_g
test_g: http_parser_g.o test_g.o test_g: http_parser_g.o test_g.o
gcc $(OPT_DEBUG) http_parser_g.o test_g.o -o $@ gcc $(OPT_DEBUG) http_parser_g.o test_g.o -o $@
test_g.o: test.c Makefile test_g.o: test.c http_parser.h Makefile
gcc $(OPT_DEBUG) -c test.c -o $@ gcc $(OPT_DEBUG) -c test.c -o $@
test.o: test.c Makefile test.o: test.c http_parser.h Makefile
gcc $(OPT_FAST) -c test.c -o $@ gcc $(OPT_FAST) -c test.c -o $@
http_parser_g.o: http_parser.c http_parser.h Makefile http_parser_g.o: http_parser.c http_parser.h Makefile
@ -23,7 +23,7 @@ test-valgrind: test_g
http_parser.o: http_parser.c http_parser.h Makefile http_parser.o: http_parser.c http_parser.h Makefile
gcc $(OPT_FAST) -c http_parser.c gcc $(OPT_FAST) -c http_parser.c
test_fast: http_parser.o test.c test_fast: http_parser.o test.c http_parser.h
gcc $(OPT_FAST) http_parser.o test.c -o $@ gcc $(OPT_FAST) http_parser.o test.c -o $@
test-run-timed: test_fast test-run-timed: test_fast

56
deps/http_parser/README.md

@ -1,30 +1,32 @@
HTTP Parser HTTP Parser
=========== ===========
This is a parser for HTTP messages written in C. It parses both requests This is a parser for HTTP messages written in C. It parses both requests and
and responses. The parser is designed to be used in performance HTTP responses. The parser is designed to be used in performance HTTP
applications. It does not make any allocations, it does not buffer data, and applications. It does not make any syscalls nor allocations, it does not
it can be interrupted at anytime. Depending on your architecture, it only buffer data, it can be interrupted at anytime. Depending on your
requires between 100 and 200 bytes of data per message stream (in a web architecture, it only requires between 100 and 200 bytes of data per message
server that is per connection). stream (in a web server that is per connection).
Features: Features:
* No dependencies * No dependencies
* Parses both requests and responses. * Handles persistent streams (keep-alive).
* Handles persistent streams.
* Decodes chunked encoding. * Decodes chunked encoding.
* Extracts the following data from a message
* header fields and values
* content-length
* request method
* response status code
* transfer-encoding
* http version
* request path, query string, fragment
* message body
* Defends against buffer overflow attacks.
* Upgrade support * Upgrade support
* Defends against buffer overflow attacks.
The parser extracts the following information from HTTP messages:
* Header fields and values
* Content-Length
* Request method
* Response status code
* Transfer-Encoding
* HTTP version
* Request path, query string, fragment
* Message body
Usage Usage
----- -----
@ -55,10 +57,9 @@ When data is received on the socket execute the parser and check for errors.
} }
/* Start up / continue the parser. /* Start up / continue the parser.
* Note we pass the recved==0 to http_parse_requests to signal * Note we pass recved==0 to signal that EOF has been recieved.
* that EOF has been recieved.
*/ */
nparsed = http_parser_execute(parser, settings, buf, recved); nparsed = http_parser_execute(parser, &settings, buf, recved);
if (parser->upgrade) { if (parser->upgrade) {
/* handle new protocol */ /* handle new protocol */
@ -83,10 +84,6 @@ The parser decodes the transfer-encoding for both requests and responses
transparently. That is, a chunked encoding is decoded before being sent to transparently. That is, a chunked encoding is decoded before being sent to
the on_body callback. the on_body callback.
It does not decode the content-encoding (gzip). Not all HTTP applications
need to inspect the body. Decoding gzip is non-neglagable amount of
processing (and requires making allocations). HTTP proxies using this
parser, for example, would not want such a feature.
The Special Problem of Upgrade The Special Problem of Upgrade
------------------------------ ------------------------------
@ -109,11 +106,11 @@ information the Web Socket protocol.)
To support this, the parser will treat this as a normal HTTP message without a To support this, the parser will treat this as a normal HTTP message without a
body. Issuing both on_headers_complete and on_message_complete callbacks. However body. Issuing both on_headers_complete and on_message_complete callbacks. However
http_parser_execute() may finish without parsing the entire supplied buffer. http_parser_execute() will stop parsing at the end of the headers and return.
The user needs to check if parser->upgrade has been set to 1 after The user is expected to check if `parser->upgrade` has been set to 1 after
http_parser_execute() returns to determine if a premature exit was due to an `http_parser_execute()` returns. Non-HTTP data begins at the buffer supplied
upgrade or an error. offset by the return value of `http_parser_execute()`.
Callbacks Callbacks
@ -166,6 +163,7 @@ and apply following logic:
| | | and append callback data to it | | | | and append callback data to it |
------------------------ ------------ -------------------------------------------- ------------------------ ------------ --------------------------------------------
See examples of reading in headers: See examples of reading in headers:
* [partial example](http://gist.github.com/155877) in C * [partial example](http://gist.github.com/155877) in C

100
deps/http_parser/http_parser.c

@ -32,9 +32,6 @@
#endif #endif
#define MAX_FIELD_SIZE (80*1024)
#define CALLBACK2(FOR) \ #define CALLBACK2(FOR) \
do { \ do { \
if (settings->on_##FOR) { \ if (settings->on_##FOR) { \
@ -45,20 +42,16 @@ do { \
#define MARK(FOR) \ #define MARK(FOR) \
do { \ do { \
parser->FOR##_mark = p; \ FOR##_mark = p; \
parser->FOR##_size = 0; \
} while (0) } while (0)
#define CALLBACK_NOCLEAR(FOR) \ #define CALLBACK_NOCLEAR(FOR) \
do { \ do { \
if (parser->FOR##_mark) { \ if (FOR##_mark) { \
parser->FOR##_size += p - parser->FOR##_mark; \
if (parser->FOR##_size > MAX_FIELD_SIZE) return (p - data); \
if (settings->on_##FOR) { \ if (settings->on_##FOR) { \
if (0 != settings->on_##FOR(parser, \ if (0 != settings->on_##FOR(parser, \
parser->FOR##_mark, \ FOR##_mark, \
p - parser->FOR##_mark)) \ p - FOR##_mark)) \
{ \ { \
return (p - data); \ return (p - data); \
} \ } \
@ -70,7 +63,7 @@ do { \
#define CALLBACK(FOR) \ #define CALLBACK(FOR) \
do { \ do { \
CALLBACK_NOCLEAR(FOR); \ CALLBACK_NOCLEAR(FOR); \
parser->FOR##_mark = NULL; \ FOR##_mark = NULL; \
} while (0) } while (0)
@ -132,6 +125,8 @@ static const uint32_t usual[] = {
enum state enum state
{ s_dead = 1 /* important that this is > 0 */ { s_dead = 1 /* important that this is > 0 */
, s_start_res_or_resp
, s_res_or_resp_H
, s_start_res , s_start_res
, s_res_H , s_res_H
, s_res_HT , s_res_HT
@ -303,12 +298,31 @@ size_t http_parser_execute (http_parser *parser,
return 0; return 0;
} }
if (parser->header_field_mark) parser->header_field_mark = data; /* technically we could combine all of these (except for url_mark) into one
if (parser->header_value_mark) parser->header_value_mark = data; variable, saving stack space, but it seems more clear to have them
if (parser->fragment_mark) parser->fragment_mark = data; separated. */
if (parser->query_string_mark) parser->query_string_mark = data; const char *header_field_mark = 0;
if (parser->path_mark) parser->path_mark = data; const char *header_value_mark = 0;
if (parser->url_mark) parser->url_mark = data; const char *fragment_mark = 0;
const char *query_string_mark = 0;
const char *path_mark = 0;
const char *url_mark = 0;
if (state == s_header_field)
header_field_mark = data;
if (state == s_header_value)
header_value_mark = data;
if (state == s_req_fragment)
fragment_mark = data;
if (state == s_req_query_string)
query_string_mark = data;
if (state == s_req_path)
path_mark = data;
if (state == s_req_path || state == s_req_schema || state == s_req_schema_slash
|| state == s_req_schema_slash_slash || state == s_req_port
|| state == s_req_query_string_start || state == s_req_query_string
|| state == s_req_fragment_start || state == s_req_fragment)
url_mark = data;
for (p=data, pe=data+len; p != pe; p++) { for (p=data, pe=data+len; p != pe; p++) {
ch = *p; ch = *p;
@ -326,6 +340,42 @@ size_t http_parser_execute (http_parser *parser,
*/ */
goto error; goto error;
case s_start_res_or_resp:
{
if (ch == CR || ch == LF)
break;
parser->flags = 0;
parser->content_length = -1;
CALLBACK2(message_begin);
if (ch == 'H')
state = s_res_or_resp_H;
else {
parser->type = HTTP_REQUEST;
if (ch < 'A' || 'Z' < ch) goto error;
parser->buffer[0] = ch;
index = 0;
state = s_req_method;
}
break;
}
case s_res_or_resp_H:
if (ch == 'T') {
parser->type = HTTP_RESPONSE;
state = s_res_HT;
} else {
if (ch < 'A' || 'Z' < ch) goto error;
parser->type = HTTP_REQUEST;
parser->method = (enum http_method) 0;
parser->buffer[0] = 'H';
parser->buffer[1] = ch;
index = 1;
state = s_req_method;
}
break;
case s_start_res: case s_start_res:
{ {
parser->flags = 0; parser->flags = 0;
@ -637,6 +687,9 @@ size_t http_parser_execute (http_parser *parser,
if (ch == ':') { if (ch == ':') {
state = s_req_schema_slash; state = s_req_schema_slash;
break; break;
} else if (ch == '.') {
state = s_req_host;
break;
} }
goto error; goto error;
@ -1156,12 +1209,14 @@ size_t http_parser_execute (http_parser *parser,
if (!c) { if (!c) {
if (ch == CR) { if (ch == CR) {
CALLBACK(header_value);
header_state = h_general; header_state = h_general;
state = s_header_almost_done; state = s_header_almost_done;
break; break;
} }
if (ch == LF) { if (ch == LF) {
CALLBACK(header_value);
state = s_header_field_start; state = s_header_field_start;
break; break;
} }
@ -1547,15 +1602,8 @@ void
http_parser_init (http_parser *parser, enum http_parser_type t) http_parser_init (http_parser *parser, enum http_parser_type t)
{ {
parser->type = t; parser->type = t;
parser->state = (t == HTTP_REQUEST ? s_start_req : s_start_res); parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_res_or_resp));
parser->nread = 0; parser->nread = 0;
parser->upgrade = 0; parser->upgrade = 0;
parser->header_field_mark = NULL;
parser->header_value_mark = NULL;
parser->query_string_mark = NULL;
parser->path_mark = NULL;
parser->url_mark = NULL;
parser->fragment_mark = NULL;
} }

39
deps/http_parser/http_parser.h

@ -89,22 +89,15 @@ enum http_method
}; };
enum http_parser_type { HTTP_REQUEST, HTTP_RESPONSE }; enum http_parser_type { HTTP_REQUEST, HTTP_RESPONSE, HTTP_BOTH };
struct http_parser { struct http_parser {
/** PRIVATE **/ /** PRIVATE **/
enum http_parser_type type; unsigned char type;
unsigned short state; unsigned char state;
unsigned short header_state; unsigned char header_state;
size_t index; unsigned char index;
/* 1 = Upgrade header was present and the parser has exited because of that.
* 0 = No upgrade header present.
* Should be checked when http_parser_execute() returns in addition to
* error checking.
*/
unsigned short upgrade;
char flags; char flags;
@ -112,26 +105,20 @@ struct http_parser {
ssize_t body_read; ssize_t body_read;
ssize_t content_length; ssize_t content_length;
const char *header_field_mark;
size_t header_field_size;
const char *header_value_mark;
size_t header_value_size;
const char *query_string_mark;
size_t query_string_size;
const char *path_mark;
size_t path_size;
const char *url_mark;
size_t url_size;
const char *fragment_mark;
size_t fragment_size;
/** READ-ONLY **/ /** READ-ONLY **/
unsigned short status_code; /* responses only */ unsigned short status_code; /* responses only */
enum http_method method; /* requests only */ unsigned short method; /* requests only */
unsigned short http_major; unsigned short http_major;
unsigned short http_minor; unsigned short http_minor;
char buffer[HTTP_PARSER_MAX_METHOD_LEN]; char buffer[HTTP_PARSER_MAX_METHOD_LEN];
/* 1 = Upgrade header was present and the parser has exited because of that.
* 0 = No upgrade header present.
* Should be checked when http_parser_execute() returns in addition to
* error checking.
*/
char upgrade;
/** PUBLIC **/ /** PUBLIC **/
void *data; /* A pointer to get hook to the "connection" or "socket" object */ void *data; /* A pointer to get hook to the "connection" or "socket" object */
}; };

77
deps/http_parser/test.c

@ -495,6 +495,30 @@ const struct message requests[] =
,.body= "" ,.body= ""
} }
#define CONNECT_REQUEST 17
, {.name = "connect request"
,.type= HTTP_REQUEST
,.raw= "CONNECT home.netscape.com:443 HTTP/1.0\r\n"
"User-agent: Mozilla/1.1N\r\n"
"Proxy-authorization: basic aGVsbG86d29ybGQ=\r\n"
"\r\n"
,.should_keep_alive= FALSE
,.message_complete_on_eof= FALSE
,.http_major= 1
,.http_minor= 0
,.method= HTTP_CONNECT
,.query_string= ""
,.fragment= ""
,.request_path= ""
,.request_url= "home.netscape.com:443"
,.num_headers= 2
,.upgrade=0
,.headers= { { "User-agent", "Mozilla/1.1N" }
, { "Proxy-authorization", "basic aGVsbG86d29ybGQ=" }
}
,.body= ""
}
, {.name= NULL } /* sentinel */ , {.name= NULL } /* sentinel */
}; };
@ -721,6 +745,43 @@ const struct message responses[] =
,.body= "" ,.body= ""
} }
#define BONJOUR_MADAME_FR 8
/* The client should not merge two headers fields when the first one doesn't
* have a value.
*/
, {.name= "bonjourmadame.fr"
,.type= HTTP_RESPONSE
,.raw= "HTTP/1.0 301 Moved Permanently\r\n"
"Date: Thu, 03 Jun 2010 09:56:32 GMT\r\n"
"Server: Apache/2.2.3 (Red Hat)\r\n"
"Cache-Control: public\r\n"
"Pragma: \r\n"
"Location: http://www.bonjourmadame.fr/\r\n"
"Vary: Accept-Encoding\r\n"
"Content-Length: 0\r\n"
"Content-Type: text/html; charset=UTF-8\r\n"
"Connection: keep-alive\r\n"
"\r\n"
,.should_keep_alive= TRUE
,.message_complete_on_eof= FALSE
,.http_major= 1
,.http_minor= 0
,.status_code= 301
,.num_headers= 9
,.headers=
{ { "Date", "Thu, 03 Jun 2010 09:56:32 GMT" }
, { "Server", "Apache/2.2.3 (Red Hat)" }
, { "Cache-Control", "public" }
, { "Pragma", "" }
, { "Location", "http://www.bonjourmadame.fr/" }
, { "Vary", "Accept-Encoding" }
, { "Content-Length", "0" }
, { "Content-Type", "text/html; charset=UTF-8" }
, { "Connection", "keep-alive" }
}
,.body= ""
}
, {.name= NULL } /* sentinel */ , {.name= NULL } /* sentinel */
}; };
@ -1207,12 +1268,13 @@ test_scan (const struct message *r1, const struct message *r2, const struct mess
int total_len = strlen(total); int total_len = strlen(total);
int total_ops = (total_len - 1) * (total_len - 2) / 2; int total_ops = 2 * (total_len - 1) * (total_len - 2) / 2;
int ops = 0 ; int ops = 0 ;
size_t buf1_len, buf2_len, buf3_len; size_t buf1_len, buf2_len, buf3_len;
int i,j; int i,j,type_both;
for (type_both = 0; type_both < 2; type_both ++ ) {
for (j = 2; j < total_len; j ++ ) { for (j = 2; j < total_len; j ++ ) {
for (i = 1; i < j; i ++ ) { for (i = 1; i < j; i ++ ) {
@ -1222,7 +1284,7 @@ test_scan (const struct message *r1, const struct message *r2, const struct mess
} }
ops += 1; ops += 1;
parser_init(r1->type); parser_init(type_both ? HTTP_BOTH : r1->type);
buf1_len = i; buf1_len = i;
strncpy(buf1, total, buf1_len); strncpy(buf1, total, buf1_len);
@ -1279,6 +1341,7 @@ test_scan (const struct message *r1, const struct message *r2, const struct mess
parser_free(); parser_free();
} }
} }
}
puts("\b\b\b\b100%"); puts("\b\b\b\b100%");
return; return;
@ -1395,12 +1458,18 @@ main (void)
printf("response scan 1/1 "); printf("response scan 1/2 ");
test_scan( &responses[TRAILING_SPACE_ON_CHUNKED_BODY] test_scan( &responses[TRAILING_SPACE_ON_CHUNKED_BODY]
, &responses[NO_HEADERS_NO_BODY_404] , &responses[NO_HEADERS_NO_BODY_404]
, &responses[NO_REASON_PHRASE] , &responses[NO_REASON_PHRASE]
); );
printf("response scan 1/2 ");
test_scan( &responses[BONJOUR_MADAME_FR]
, &responses[UNDERSTORE_HEADER_KEY]
, &responses[NO_CARRIAGE_RET]
);
puts("responses okay"); puts("responses okay");

2
src/node_http_parser.cc

@ -102,7 +102,7 @@ static struct http_parser_settings settings;
static inline Persistent<String> static inline Persistent<String>
method_to_str(enum http_method m) { method_to_str(unsigned short m) {
switch (m) { switch (m) {
case HTTP_DELETE: return delete_sym; case HTTP_DELETE: return delete_sym;
case HTTP_GET: return get_sym; case HTTP_GET: return get_sym;

Loading…
Cancel
Save