From 9be6c501ecf3d9432bdfca18a1b007ef4868ef5f Mon Sep 17 00:00:00 2001 From: Ryan Dahl Date: Sun, 6 Jun 2010 17:41:03 -0700 Subject: [PATCH] Upgrade http-parser --- deps/http_parser/Makefile | 6 +- deps/http_parser/README.md | 56 +++++------ deps/http_parser/http_parser.c | 102 ++++++++++++++----- deps/http_parser/http_parser.h | 39 +++----- deps/http_parser/test.c | 177 +++++++++++++++++++++++---------- src/node_http_parser.cc | 2 +- 6 files changed, 242 insertions(+), 140 deletions(-) diff --git a/deps/http_parser/Makefile b/deps/http_parser/Makefile index 72e9d02c2c..dee994ea50 100644 --- a/deps/http_parser/Makefile +++ b/deps/http_parser/Makefile @@ -8,10 +8,10 @@ test: test_g test_g: http_parser_g.o test_g.o gcc $(OPT_DEBUG) http_parser_g.o test_g.o -o $@ -test_g.o: test.c Makefile +test_g.o: test.c http_parser.h Makefile gcc $(OPT_DEBUG) -c test.c -o $@ -test.o: test.c Makefile +test.o: test.c http_parser.h Makefile gcc $(OPT_FAST) -c test.c -o $@ http_parser_g.o: http_parser.c http_parser.h Makefile @@ -23,7 +23,7 @@ test-valgrind: test_g http_parser.o: http_parser.c http_parser.h Makefile gcc $(OPT_FAST) -c http_parser.c -test_fast: http_parser.o test.c +test_fast: http_parser.o test.c http_parser.h gcc $(OPT_FAST) http_parser.o test.c -o $@ test-run-timed: test_fast diff --git a/deps/http_parser/README.md b/deps/http_parser/README.md index 6666482ab8..79aef0efb4 100644 --- a/deps/http_parser/README.md +++ b/deps/http_parser/README.md @@ -1,30 +1,32 @@ HTTP Parser =========== -This is a parser for HTTP messages written in C. It parses both requests -and responses. The parser is designed to be used in performance HTTP -applications. It does not make any allocations, it does not buffer data, and -it can be interrupted at anytime. Depending on your architecture, it only -requires between 100 and 200 bytes of data per message stream (in a web -server that is per connection). +This is a parser for HTTP messages written in C. It parses both requests and +responses. The parser is designed to be used in performance HTTP +applications. It does not make any syscalls nor allocations, it does not +buffer data, it can be interrupted at anytime. Depending on your +architecture, it only requires between 100 and 200 bytes of data per message +stream (in a web server that is per connection). Features: * No dependencies - * Parses both requests and responses. - * Handles persistent streams. + * Handles persistent streams (keep-alive). * Decodes chunked encoding. - * Extracts the following data from a message - * header fields and values - * content-length - * request method - * response status code - * transfer-encoding - * http version - * request path, query string, fragment - * message body - * Defends against buffer overflow attacks. * Upgrade support + * Defends against buffer overflow attacks. + +The parser extracts the following information from HTTP messages: + + * Header fields and values + * Content-Length + * Request method + * Response status code + * Transfer-Encoding + * HTTP version + * Request path, query string, fragment + * Message body + Usage ----- @@ -55,10 +57,9 @@ When data is received on the socket execute the parser and check for errors. } /* Start up / continue the parser. - * Note we pass the recved==0 to http_parse_requests to signal - * that EOF has been recieved. + * Note we pass recved==0 to signal that EOF has been recieved. */ - nparsed = http_parser_execute(parser, settings, buf, recved); + nparsed = http_parser_execute(parser, &settings, buf, recved); if (parser->upgrade) { /* handle new protocol */ @@ -83,10 +84,6 @@ The parser decodes the transfer-encoding for both requests and responses transparently. That is, a chunked encoding is decoded before being sent to the on_body callback. -It does not decode the content-encoding (gzip). Not all HTTP applications -need to inspect the body. Decoding gzip is non-neglagable amount of -processing (and requires making allocations). HTTP proxies using this -parser, for example, would not want such a feature. The Special Problem of Upgrade ------------------------------ @@ -109,11 +106,11 @@ information the Web Socket protocol.) To support this, the parser will treat this as a normal HTTP message without a body. Issuing both on_headers_complete and on_message_complete callbacks. However -http_parser_execute() may finish without parsing the entire supplied buffer. +http_parser_execute() will stop parsing at the end of the headers and return. -The user needs to check if parser->upgrade has been set to 1 after -http_parser_execute() returns to determine if a premature exit was due to an -upgrade or an error. +The user is expected to check if `parser->upgrade` has been set to 1 after +`http_parser_execute()` returns. Non-HTTP data begins at the buffer supplied +offset by the return value of `http_parser_execute()`. Callbacks @@ -166,6 +163,7 @@ and apply following logic: | | | and append callback data to it | ------------------------ ------------ -------------------------------------------- + See examples of reading in headers: * [partial example](http://gist.github.com/155877) in C diff --git a/deps/http_parser/http_parser.c b/deps/http_parser/http_parser.c index 8ecc94c2ee..7556d92099 100644 --- a/deps/http_parser/http_parser.c +++ b/deps/http_parser/http_parser.c @@ -32,9 +32,6 @@ #endif -#define MAX_FIELD_SIZE (80*1024) - - #define CALLBACK2(FOR) \ do { \ if (settings->on_##FOR) { \ @@ -45,20 +42,16 @@ do { \ #define MARK(FOR) \ do { \ - parser->FOR##_mark = p; \ - parser->FOR##_size = 0; \ + FOR##_mark = p; \ } while (0) - #define CALLBACK_NOCLEAR(FOR) \ do { \ - if (parser->FOR##_mark) { \ - parser->FOR##_size += p - parser->FOR##_mark; \ - if (parser->FOR##_size > MAX_FIELD_SIZE) return (p - data); \ + if (FOR##_mark) { \ if (settings->on_##FOR) { \ if (0 != settings->on_##FOR(parser, \ - parser->FOR##_mark, \ - p - parser->FOR##_mark)) \ + FOR##_mark, \ + p - FOR##_mark)) \ { \ return (p - data); \ } \ @@ -70,7 +63,7 @@ do { \ #define CALLBACK(FOR) \ do { \ CALLBACK_NOCLEAR(FOR); \ - parser->FOR##_mark = NULL; \ + FOR##_mark = NULL; \ } while (0) @@ -132,6 +125,8 @@ static const uint32_t usual[] = { enum state { s_dead = 1 /* important that this is > 0 */ + , s_start_res_or_resp + , s_res_or_resp_H , s_start_res , s_res_H , s_res_HT @@ -303,12 +298,31 @@ size_t http_parser_execute (http_parser *parser, return 0; } - if (parser->header_field_mark) parser->header_field_mark = data; - if (parser->header_value_mark) parser->header_value_mark = data; - if (parser->fragment_mark) parser->fragment_mark = data; - if (parser->query_string_mark) parser->query_string_mark = data; - if (parser->path_mark) parser->path_mark = data; - if (parser->url_mark) parser->url_mark = data; + /* technically we could combine all of these (except for url_mark) into one + variable, saving stack space, but it seems more clear to have them + separated. */ + const char *header_field_mark = 0; + const char *header_value_mark = 0; + const char *fragment_mark = 0; + const char *query_string_mark = 0; + const char *path_mark = 0; + const char *url_mark = 0; + + if (state == s_header_field) + header_field_mark = data; + if (state == s_header_value) + header_value_mark = data; + if (state == s_req_fragment) + fragment_mark = data; + if (state == s_req_query_string) + query_string_mark = data; + if (state == s_req_path) + path_mark = data; + if (state == s_req_path || state == s_req_schema || state == s_req_schema_slash + || state == s_req_schema_slash_slash || state == s_req_port + || state == s_req_query_string_start || state == s_req_query_string + || state == s_req_fragment_start || state == s_req_fragment) + url_mark = data; for (p=data, pe=data+len; p != pe; p++) { ch = *p; @@ -326,6 +340,42 @@ size_t http_parser_execute (http_parser *parser, */ goto error; + case s_start_res_or_resp: + { + if (ch == CR || ch == LF) + break; + parser->flags = 0; + parser->content_length = -1; + + CALLBACK2(message_begin); + + if (ch == 'H') + state = s_res_or_resp_H; + else { + parser->type = HTTP_REQUEST; + if (ch < 'A' || 'Z' < ch) goto error; + parser->buffer[0] = ch; + index = 0; + state = s_req_method; + } + break; + } + + case s_res_or_resp_H: + if (ch == 'T') { + parser->type = HTTP_RESPONSE; + state = s_res_HT; + } else { + if (ch < 'A' || 'Z' < ch) goto error; + parser->type = HTTP_REQUEST; + parser->method = (enum http_method) 0; + parser->buffer[0] = 'H'; + parser->buffer[1] = ch; + index = 1; + state = s_req_method; + } + break; + case s_start_res: { parser->flags = 0; @@ -584,7 +634,7 @@ size_t http_parser_execute (http_parser *parser, break; case 9: - if (ngx_str9cmp(parser->buffer, + if (ngx_str9cmp(parser->buffer, 'P', 'R', 'O', 'P', 'P', 'A', 'T', 'C', 'H')) { parser->method = HTTP_PROPPATCH; break; @@ -637,6 +687,9 @@ size_t http_parser_execute (http_parser *parser, if (ch == ':') { state = s_req_schema_slash; break; + } else if (ch == '.') { + state = s_req_host; + break; } goto error; @@ -1156,12 +1209,14 @@ size_t http_parser_execute (http_parser *parser, if (!c) { if (ch == CR) { + CALLBACK(header_value); header_state = h_general; state = s_header_almost_done; break; } if (ch == LF) { + CALLBACK(header_value); state = s_header_field_start; break; } @@ -1547,15 +1602,8 @@ void http_parser_init (http_parser *parser, enum http_parser_type t) { parser->type = t; - parser->state = (t == HTTP_REQUEST ? s_start_req : s_start_res); + parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_res_or_resp)); parser->nread = 0; parser->upgrade = 0; - - parser->header_field_mark = NULL; - parser->header_value_mark = NULL; - parser->query_string_mark = NULL; - parser->path_mark = NULL; - parser->url_mark = NULL; - parser->fragment_mark = NULL; } diff --git a/deps/http_parser/http_parser.h b/deps/http_parser/http_parser.h index 977ecdfa00..a4abf3208f 100644 --- a/deps/http_parser/http_parser.h +++ b/deps/http_parser/http_parser.h @@ -89,22 +89,15 @@ enum http_method }; -enum http_parser_type { HTTP_REQUEST, HTTP_RESPONSE }; +enum http_parser_type { HTTP_REQUEST, HTTP_RESPONSE, HTTP_BOTH }; struct http_parser { /** PRIVATE **/ - enum http_parser_type type; - unsigned short state; - unsigned short header_state; - size_t index; - - /* 1 = Upgrade header was present and the parser has exited because of that. - * 0 = No upgrade header present. - * Should be checked when http_parser_execute() returns in addition to - * error checking. - */ - unsigned short upgrade; + unsigned char type; + unsigned char state; + unsigned char header_state; + unsigned char index; char flags; @@ -112,26 +105,20 @@ struct http_parser { ssize_t body_read; ssize_t content_length; - const char *header_field_mark; - size_t header_field_size; - const char *header_value_mark; - size_t header_value_size; - const char *query_string_mark; - size_t query_string_size; - const char *path_mark; - size_t path_size; - const char *url_mark; - size_t url_size; - const char *fragment_mark; - size_t fragment_size; - /** READ-ONLY **/ unsigned short status_code; /* responses only */ - enum http_method method; /* requests only */ + unsigned short method; /* requests only */ unsigned short http_major; unsigned short http_minor; char buffer[HTTP_PARSER_MAX_METHOD_LEN]; + /* 1 = Upgrade header was present and the parser has exited because of that. + * 0 = No upgrade header present. + * Should be checked when http_parser_execute() returns in addition to + * error checking. + */ + char upgrade; + /** PUBLIC **/ void *data; /* A pointer to get hook to the "connection" or "socket" object */ }; diff --git a/deps/http_parser/test.c b/deps/http_parser/test.c index 03a98ff2a3..a9746adb54 100644 --- a/deps/http_parser/test.c +++ b/deps/http_parser/test.c @@ -495,6 +495,30 @@ const struct message requests[] = ,.body= "" } +#define CONNECT_REQUEST 17 +, {.name = "connect request" + ,.type= HTTP_REQUEST + ,.raw= "CONNECT home.netscape.com:443 HTTP/1.0\r\n" + "User-agent: Mozilla/1.1N\r\n" + "Proxy-authorization: basic aGVsbG86d29ybGQ=\r\n" + "\r\n" + ,.should_keep_alive= FALSE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 0 + ,.method= HTTP_CONNECT + ,.query_string= "" + ,.fragment= "" + ,.request_path= "" + ,.request_url= "home.netscape.com:443" + ,.num_headers= 2 + ,.upgrade=0 + ,.headers= { { "User-agent", "Mozilla/1.1N" } + , { "Proxy-authorization", "basic aGVsbG86d29ybGQ=" } + } + ,.body= "" + } + , {.name= NULL } /* sentinel */ }; @@ -721,6 +745,43 @@ const struct message responses[] = ,.body= "" } +#define BONJOUR_MADAME_FR 8 +/* The client should not merge two headers fields when the first one doesn't + * have a value. + */ +, {.name= "bonjourmadame.fr" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.0 301 Moved Permanently\r\n" + "Date: Thu, 03 Jun 2010 09:56:32 GMT\r\n" + "Server: Apache/2.2.3 (Red Hat)\r\n" + "Cache-Control: public\r\n" + "Pragma: \r\n" + "Location: http://www.bonjourmadame.fr/\r\n" + "Vary: Accept-Encoding\r\n" + "Content-Length: 0\r\n" + "Content-Type: text/html; charset=UTF-8\r\n" + "Connection: keep-alive\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 0 + ,.status_code= 301 + ,.num_headers= 9 + ,.headers= + { { "Date", "Thu, 03 Jun 2010 09:56:32 GMT" } + , { "Server", "Apache/2.2.3 (Red Hat)" } + , { "Cache-Control", "public" } + , { "Pragma", "" } + , { "Location", "http://www.bonjourmadame.fr/" } + , { "Vary", "Accept-Encoding" } + , { "Content-Length", "0" } + , { "Content-Type", "text/html; charset=UTF-8" } + , { "Connection", "keep-alive" } + } + ,.body= "" + } + , {.name= NULL } /* sentinel */ }; @@ -1207,82 +1268,84 @@ test_scan (const struct message *r1, const struct message *r2, const struct mess int total_len = strlen(total); - int total_ops = (total_len - 1) * (total_len - 2) / 2; + int total_ops = 2 * (total_len - 1) * (total_len - 2) / 2; int ops = 0 ; size_t buf1_len, buf2_len, buf3_len; - int i,j; - for (j = 2; j < total_len; j ++ ) { - for (i = 1; i < j; i ++ ) { + int i,j,type_both; + for (type_both = 0; type_both < 2; type_both ++ ) { + for (j = 2; j < total_len; j ++ ) { + for (i = 1; i < j; i ++ ) { - if (ops % 1000 == 0) { - printf("\b\b\b\b%3.0f%%", 100 * (float)ops /(float)total_ops); - fflush(stdout); - } - ops += 1; + if (ops % 1000 == 0) { + printf("\b\b\b\b%3.0f%%", 100 * (float)ops /(float)total_ops); + fflush(stdout); + } + ops += 1; - parser_init(r1->type); + parser_init(type_both ? HTTP_BOTH : r1->type); - buf1_len = i; - strncpy(buf1, total, buf1_len); - buf1[buf1_len] = 0; + buf1_len = i; + strncpy(buf1, total, buf1_len); + buf1[buf1_len] = 0; - buf2_len = j - i; - strncpy(buf2, total+i, buf2_len); - buf2[buf2_len] = 0; + buf2_len = j - i; + strncpy(buf2, total+i, buf2_len); + buf2[buf2_len] = 0; - buf3_len = total_len - j; - strncpy(buf3, total+j, buf3_len); - buf3[buf3_len] = 0; + buf3_len = total_len - j; + strncpy(buf3, total+j, buf3_len); + buf3[buf3_len] = 0; - read = parse(buf1, buf1_len); - if (read != buf1_len) { - print_error(buf1, read); - goto error; - } + read = parse(buf1, buf1_len); + if (read != buf1_len) { + print_error(buf1, read); + goto error; + } - read = parse(buf2, buf2_len); - if (read != buf2_len) { - print_error(buf2, read); - goto error; - } + read = parse(buf2, buf2_len); + if (read != buf2_len) { + print_error(buf2, read); + goto error; + } - read = parse(buf3, buf3_len); - if (read != buf3_len) { - print_error(buf3, read); - goto error; - } + read = parse(buf3, buf3_len); + if (read != buf3_len) { + print_error(buf3, read); + goto error; + } - parse(NULL, 0); + parse(NULL, 0); - if (3 != num_messages) { - fprintf(stderr, "\n\nParser didn't see 3 messages only %d\n", num_messages); - goto error; - } + if (3 != num_messages) { + fprintf(stderr, "\n\nParser didn't see 3 messages only %d\n", num_messages); + goto error; + } - if (!message_eq(0, r1)) { - fprintf(stderr, "\n\nError matching messages[0] in test_scan.\n"); - goto error; - } + if (!message_eq(0, r1)) { + fprintf(stderr, "\n\nError matching messages[0] in test_scan.\n"); + goto error; + } - if (!message_eq(1, r2)) { - fprintf(stderr, "\n\nError matching messages[1] in test_scan.\n"); - goto error; - } + if (!message_eq(1, r2)) { + fprintf(stderr, "\n\nError matching messages[1] in test_scan.\n"); + goto error; + } - if (!message_eq(2, r3)) { - fprintf(stderr, "\n\nError matching messages[2] in test_scan.\n"); - goto error; - } + if (!message_eq(2, r3)) { + fprintf(stderr, "\n\nError matching messages[2] in test_scan.\n"); + goto error; + } - parser_free(); + parser_free(); + } } } puts("\b\b\b\b100%"); return; -error: + error: fprintf(stderr, "i=%d j=%d\n", i, j); fprintf(stderr, "buf1 (%u) %s\n\n", (unsigned int)buf1_len, buf1); fprintf(stderr, "buf2 (%u) %s\n\n", (unsigned int)buf2_len , buf2); @@ -1395,12 +1458,18 @@ main (void) - printf("response scan 1/1 "); + printf("response scan 1/2 "); test_scan( &responses[TRAILING_SPACE_ON_CHUNKED_BODY] , &responses[NO_HEADERS_NO_BODY_404] , &responses[NO_REASON_PHRASE] ); + printf("response scan 1/2 "); + test_scan( &responses[BONJOUR_MADAME_FR] + , &responses[UNDERSTORE_HEADER_KEY] + , &responses[NO_CARRIAGE_RET] + ); + puts("responses okay"); diff --git a/src/node_http_parser.cc b/src/node_http_parser.cc index 29423c0333..ac25570e6b 100644 --- a/src/node_http_parser.cc +++ b/src/node_http_parser.cc @@ -102,7 +102,7 @@ static struct http_parser_settings settings; static inline Persistent -method_to_str(enum http_method m) { +method_to_str(unsigned short m) { switch (m) { case HTTP_DELETE: return delete_sym; case HTTP_GET: return get_sym;