Browse Source

Upgrade http-parser

v0.7.4-release
Ryan Dahl 15 years ago
parent
commit
9be6c501ec
  1. 6
      deps/http_parser/Makefile
  2. 56
      deps/http_parser/README.md
  3. 100
      deps/http_parser/http_parser.c
  4. 39
      deps/http_parser/http_parser.h
  5. 177
      deps/http_parser/test.c
  6. 2
      src/node_http_parser.cc

6
deps/http_parser/Makefile

@ -8,10 +8,10 @@ test: test_g
test_g: http_parser_g.o test_g.o
gcc $(OPT_DEBUG) http_parser_g.o test_g.o -o $@
test_g.o: test.c Makefile
test_g.o: test.c http_parser.h Makefile
gcc $(OPT_DEBUG) -c test.c -o $@
test.o: test.c Makefile
test.o: test.c http_parser.h Makefile
gcc $(OPT_FAST) -c test.c -o $@
http_parser_g.o: http_parser.c http_parser.h Makefile
@ -23,7 +23,7 @@ test-valgrind: test_g
http_parser.o: http_parser.c http_parser.h Makefile
gcc $(OPT_FAST) -c http_parser.c
test_fast: http_parser.o test.c
test_fast: http_parser.o test.c http_parser.h
gcc $(OPT_FAST) http_parser.o test.c -o $@
test-run-timed: test_fast

56
deps/http_parser/README.md

@ -1,30 +1,32 @@
HTTP Parser
===========
This is a parser for HTTP messages written in C. It parses both requests
and responses. The parser is designed to be used in performance HTTP
applications. It does not make any allocations, it does not buffer data, and
it can be interrupted at anytime. Depending on your architecture, it only
requires between 100 and 200 bytes of data per message stream (in a web
server that is per connection).
This is a parser for HTTP messages written in C. It parses both requests and
responses. The parser is designed to be used in performance HTTP
applications. It does not make any syscalls nor allocations, it does not
buffer data, it can be interrupted at anytime. Depending on your
architecture, it only requires between 100 and 200 bytes of data per message
stream (in a web server that is per connection).
Features:
* No dependencies
* Parses both requests and responses.
* Handles persistent streams.
* Handles persistent streams (keep-alive).
* Decodes chunked encoding.
* Extracts the following data from a message
* header fields and values
* content-length
* request method
* response status code
* transfer-encoding
* http version
* request path, query string, fragment
* message body
* Defends against buffer overflow attacks.
* Upgrade support
* Defends against buffer overflow attacks.
The parser extracts the following information from HTTP messages:
* Header fields and values
* Content-Length
* Request method
* Response status code
* Transfer-Encoding
* HTTP version
* Request path, query string, fragment
* Message body
Usage
-----
@ -55,10 +57,9 @@ When data is received on the socket execute the parser and check for errors.
}
/* Start up / continue the parser.
* Note we pass the recved==0 to http_parse_requests to signal
* that EOF has been recieved.
* Note we pass recved==0 to signal that EOF has been recieved.
*/
nparsed = http_parser_execute(parser, settings, buf, recved);
nparsed = http_parser_execute(parser, &settings, buf, recved);
if (parser->upgrade) {
/* handle new protocol */
@ -83,10 +84,6 @@ The parser decodes the transfer-encoding for both requests and responses
transparently. That is, a chunked encoding is decoded before being sent to
the on_body callback.
It does not decode the content-encoding (gzip). Not all HTTP applications
need to inspect the body. Decoding gzip is non-neglagable amount of
processing (and requires making allocations). HTTP proxies using this
parser, for example, would not want such a feature.
The Special Problem of Upgrade
------------------------------
@ -109,11 +106,11 @@ information the Web Socket protocol.)
To support this, the parser will treat this as a normal HTTP message without a
body. Issuing both on_headers_complete and on_message_complete callbacks. However
http_parser_execute() may finish without parsing the entire supplied buffer.
http_parser_execute() will stop parsing at the end of the headers and return.
The user needs to check if parser->upgrade has been set to 1 after
http_parser_execute() returns to determine if a premature exit was due to an
upgrade or an error.
The user is expected to check if `parser->upgrade` has been set to 1 after
`http_parser_execute()` returns. Non-HTTP data begins at the buffer supplied
offset by the return value of `http_parser_execute()`.
Callbacks
@ -166,6 +163,7 @@ and apply following logic:
| | | and append callback data to it |
------------------------ ------------ --------------------------------------------
See examples of reading in headers:
* [partial example](http://gist.github.com/155877) in C

100
deps/http_parser/http_parser.c

@ -32,9 +32,6 @@
#endif
#define MAX_FIELD_SIZE (80*1024)
#define CALLBACK2(FOR) \
do { \
if (settings->on_##FOR) { \
@ -45,20 +42,16 @@ do { \
#define MARK(FOR) \
do { \
parser->FOR##_mark = p; \
parser->FOR##_size = 0; \
FOR##_mark = p; \
} while (0)
#define CALLBACK_NOCLEAR(FOR) \
do { \
if (parser->FOR##_mark) { \
parser->FOR##_size += p - parser->FOR##_mark; \
if (parser->FOR##_size > MAX_FIELD_SIZE) return (p - data); \
if (FOR##_mark) { \
if (settings->on_##FOR) { \
if (0 != settings->on_##FOR(parser, \
parser->FOR##_mark, \
p - parser->FOR##_mark)) \
FOR##_mark, \
p - FOR##_mark)) \
{ \
return (p - data); \
} \
@ -70,7 +63,7 @@ do { \
#define CALLBACK(FOR) \
do { \
CALLBACK_NOCLEAR(FOR); \
parser->FOR##_mark = NULL; \
FOR##_mark = NULL; \
} while (0)
@ -132,6 +125,8 @@ static const uint32_t usual[] = {
enum state
{ s_dead = 1 /* important that this is > 0 */
, s_start_res_or_resp
, s_res_or_resp_H
, s_start_res
, s_res_H
, s_res_HT
@ -303,12 +298,31 @@ size_t http_parser_execute (http_parser *parser,
return 0;
}
if (parser->header_field_mark) parser->header_field_mark = data;
if (parser->header_value_mark) parser->header_value_mark = data;
if (parser->fragment_mark) parser->fragment_mark = data;
if (parser->query_string_mark) parser->query_string_mark = data;
if (parser->path_mark) parser->path_mark = data;
if (parser->url_mark) parser->url_mark = data;
/* technically we could combine all of these (except for url_mark) into one
variable, saving stack space, but it seems more clear to have them
separated. */
const char *header_field_mark = 0;
const char *header_value_mark = 0;
const char *fragment_mark = 0;
const char *query_string_mark = 0;
const char *path_mark = 0;
const char *url_mark = 0;
if (state == s_header_field)
header_field_mark = data;
if (state == s_header_value)
header_value_mark = data;
if (state == s_req_fragment)
fragment_mark = data;
if (state == s_req_query_string)
query_string_mark = data;
if (state == s_req_path)
path_mark = data;
if (state == s_req_path || state == s_req_schema || state == s_req_schema_slash
|| state == s_req_schema_slash_slash || state == s_req_port
|| state == s_req_query_string_start || state == s_req_query_string
|| state == s_req_fragment_start || state == s_req_fragment)
url_mark = data;
for (p=data, pe=data+len; p != pe; p++) {
ch = *p;
@ -326,6 +340,42 @@ size_t http_parser_execute (http_parser *parser,
*/
goto error;
case s_start_res_or_resp:
{
if (ch == CR || ch == LF)
break;
parser->flags = 0;
parser->content_length = -1;
CALLBACK2(message_begin);
if (ch == 'H')
state = s_res_or_resp_H;
else {
parser->type = HTTP_REQUEST;
if (ch < 'A' || 'Z' < ch) goto error;
parser->buffer[0] = ch;
index = 0;
state = s_req_method;
}
break;
}
case s_res_or_resp_H:
if (ch == 'T') {
parser->type = HTTP_RESPONSE;
state = s_res_HT;
} else {
if (ch < 'A' || 'Z' < ch) goto error;
parser->type = HTTP_REQUEST;
parser->method = (enum http_method) 0;
parser->buffer[0] = 'H';
parser->buffer[1] = ch;
index = 1;
state = s_req_method;
}
break;
case s_start_res:
{
parser->flags = 0;
@ -637,6 +687,9 @@ size_t http_parser_execute (http_parser *parser,
if (ch == ':') {
state = s_req_schema_slash;
break;
} else if (ch == '.') {
state = s_req_host;
break;
}
goto error;
@ -1156,12 +1209,14 @@ size_t http_parser_execute (http_parser *parser,
if (!c) {
if (ch == CR) {
CALLBACK(header_value);
header_state = h_general;
state = s_header_almost_done;
break;
}
if (ch == LF) {
CALLBACK(header_value);
state = s_header_field_start;
break;
}
@ -1547,15 +1602,8 @@ void
http_parser_init (http_parser *parser, enum http_parser_type t)
{
parser->type = t;
parser->state = (t == HTTP_REQUEST ? s_start_req : s_start_res);
parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_res_or_resp));
parser->nread = 0;
parser->upgrade = 0;
parser->header_field_mark = NULL;
parser->header_value_mark = NULL;
parser->query_string_mark = NULL;
parser->path_mark = NULL;
parser->url_mark = NULL;
parser->fragment_mark = NULL;
}

39
deps/http_parser/http_parser.h

@ -89,22 +89,15 @@ enum http_method
};
enum http_parser_type { HTTP_REQUEST, HTTP_RESPONSE };
enum http_parser_type { HTTP_REQUEST, HTTP_RESPONSE, HTTP_BOTH };
struct http_parser {
/** PRIVATE **/
enum http_parser_type type;
unsigned short state;
unsigned short header_state;
size_t index;
/* 1 = Upgrade header was present and the parser has exited because of that.
* 0 = No upgrade header present.
* Should be checked when http_parser_execute() returns in addition to
* error checking.
*/
unsigned short upgrade;
unsigned char type;
unsigned char state;
unsigned char header_state;
unsigned char index;
char flags;
@ -112,26 +105,20 @@ struct http_parser {
ssize_t body_read;
ssize_t content_length;
const char *header_field_mark;
size_t header_field_size;
const char *header_value_mark;
size_t header_value_size;
const char *query_string_mark;
size_t query_string_size;
const char *path_mark;
size_t path_size;
const char *url_mark;
size_t url_size;
const char *fragment_mark;
size_t fragment_size;
/** READ-ONLY **/
unsigned short status_code; /* responses only */
enum http_method method; /* requests only */
unsigned short method; /* requests only */
unsigned short http_major;
unsigned short http_minor;
char buffer[HTTP_PARSER_MAX_METHOD_LEN];
/* 1 = Upgrade header was present and the parser has exited because of that.
* 0 = No upgrade header present.
* Should be checked when http_parser_execute() returns in addition to
* error checking.
*/
char upgrade;
/** PUBLIC **/
void *data; /* A pointer to get hook to the "connection" or "socket" object */
};

177
deps/http_parser/test.c

@ -495,6 +495,30 @@ const struct message requests[] =
,.body= ""
}
#define CONNECT_REQUEST 17
, {.name = "connect request"
,.type= HTTP_REQUEST
,.raw= "CONNECT home.netscape.com:443 HTTP/1.0\r\n"
"User-agent: Mozilla/1.1N\r\n"
"Proxy-authorization: basic aGVsbG86d29ybGQ=\r\n"
"\r\n"
,.should_keep_alive= FALSE
,.message_complete_on_eof= FALSE
,.http_major= 1
,.http_minor= 0
,.method= HTTP_CONNECT
,.query_string= ""
,.fragment= ""
,.request_path= ""
,.request_url= "home.netscape.com:443"
,.num_headers= 2
,.upgrade=0
,.headers= { { "User-agent", "Mozilla/1.1N" }
, { "Proxy-authorization", "basic aGVsbG86d29ybGQ=" }
}
,.body= ""
}
, {.name= NULL } /* sentinel */
};
@ -721,6 +745,43 @@ const struct message responses[] =
,.body= ""
}
#define BONJOUR_MADAME_FR 8
/* The client should not merge two headers fields when the first one doesn't
* have a value.
*/
, {.name= "bonjourmadame.fr"
,.type= HTTP_RESPONSE
,.raw= "HTTP/1.0 301 Moved Permanently\r\n"
"Date: Thu, 03 Jun 2010 09:56:32 GMT\r\n"
"Server: Apache/2.2.3 (Red Hat)\r\n"
"Cache-Control: public\r\n"
"Pragma: \r\n"
"Location: http://www.bonjourmadame.fr/\r\n"
"Vary: Accept-Encoding\r\n"
"Content-Length: 0\r\n"
"Content-Type: text/html; charset=UTF-8\r\n"
"Connection: keep-alive\r\n"
"\r\n"
,.should_keep_alive= TRUE
,.message_complete_on_eof= FALSE
,.http_major= 1
,.http_minor= 0
,.status_code= 301
,.num_headers= 9
,.headers=
{ { "Date", "Thu, 03 Jun 2010 09:56:32 GMT" }
, { "Server", "Apache/2.2.3 (Red Hat)" }
, { "Cache-Control", "public" }
, { "Pragma", "" }
, { "Location", "http://www.bonjourmadame.fr/" }
, { "Vary", "Accept-Encoding" }
, { "Content-Length", "0" }
, { "Content-Type", "text/html; charset=UTF-8" }
, { "Connection", "keep-alive" }
}
,.body= ""
}
, {.name= NULL } /* sentinel */
};
@ -1207,82 +1268,84 @@ test_scan (const struct message *r1, const struct message *r2, const struct mess
int total_len = strlen(total);
int total_ops = (total_len - 1) * (total_len - 2) / 2;
int total_ops = 2 * (total_len - 1) * (total_len - 2) / 2;
int ops = 0 ;
size_t buf1_len, buf2_len, buf3_len;
int i,j;
for (j = 2; j < total_len; j ++ ) {
for (i = 1; i < j; i ++ ) {
int i,j,type_both;
for (type_both = 0; type_both < 2; type_both ++ ) {
for (j = 2; j < total_len; j ++ ) {
for (i = 1; i < j; i ++ ) {
if (ops % 1000 == 0) {
printf("\b\b\b\b%3.0f%%", 100 * (float)ops /(float)total_ops);
fflush(stdout);
}
ops += 1;
if (ops % 1000 == 0) {
printf("\b\b\b\b%3.0f%%", 100 * (float)ops /(float)total_ops);
fflush(stdout);
}
ops += 1;
parser_init(r1->type);
parser_init(type_both ? HTTP_BOTH : r1->type);
buf1_len = i;
strncpy(buf1, total, buf1_len);
buf1[buf1_len] = 0;
buf1_len = i;
strncpy(buf1, total, buf1_len);
buf1[buf1_len] = 0;
buf2_len = j - i;
strncpy(buf2, total+i, buf2_len);
buf2[buf2_len] = 0;
buf2_len = j - i;
strncpy(buf2, total+i, buf2_len);
buf2[buf2_len] = 0;
buf3_len = total_len - j;
strncpy(buf3, total+j, buf3_len);
buf3[buf3_len] = 0;
buf3_len = total_len - j;
strncpy(buf3, total+j, buf3_len);
buf3[buf3_len] = 0;
read = parse(buf1, buf1_len);
if (read != buf1_len) {
print_error(buf1, read);
goto error;
}
read = parse(buf1, buf1_len);
if (read != buf1_len) {
print_error(buf1, read);
goto error;
}
read = parse(buf2, buf2_len);
if (read != buf2_len) {
print_error(buf2, read);
goto error;
}
read = parse(buf2, buf2_len);
if (read != buf2_len) {
print_error(buf2, read);
goto error;
}
read = parse(buf3, buf3_len);
if (read != buf3_len) {
print_error(buf3, read);
goto error;
}
read = parse(buf3, buf3_len);
if (read != buf3_len) {
print_error(buf3, read);
goto error;
}
parse(NULL, 0);
parse(NULL, 0);
if (3 != num_messages) {
fprintf(stderr, "\n\nParser didn't see 3 messages only %d\n", num_messages);
goto error;
}
if (3 != num_messages) {
fprintf(stderr, "\n\nParser didn't see 3 messages only %d\n", num_messages);
goto error;
}
if (!message_eq(0, r1)) {
fprintf(stderr, "\n\nError matching messages[0] in test_scan.\n");
goto error;
}
if (!message_eq(0, r1)) {
fprintf(stderr, "\n\nError matching messages[0] in test_scan.\n");
goto error;
}
if (!message_eq(1, r2)) {
fprintf(stderr, "\n\nError matching messages[1] in test_scan.\n");
goto error;
}
if (!message_eq(1, r2)) {
fprintf(stderr, "\n\nError matching messages[1] in test_scan.\n");
goto error;
}
if (!message_eq(2, r3)) {
fprintf(stderr, "\n\nError matching messages[2] in test_scan.\n");
goto error;
}
if (!message_eq(2, r3)) {
fprintf(stderr, "\n\nError matching messages[2] in test_scan.\n");
goto error;
}
parser_free();
parser_free();
}
}
}
puts("\b\b\b\b100%");
return;
error:
error:
fprintf(stderr, "i=%d j=%d\n", i, j);
fprintf(stderr, "buf1 (%u) %s\n\n", (unsigned int)buf1_len, buf1);
fprintf(stderr, "buf2 (%u) %s\n\n", (unsigned int)buf2_len , buf2);
@ -1395,12 +1458,18 @@ main (void)
printf("response scan 1/1 ");
printf("response scan 1/2 ");
test_scan( &responses[TRAILING_SPACE_ON_CHUNKED_BODY]
, &responses[NO_HEADERS_NO_BODY_404]
, &responses[NO_REASON_PHRASE]
);
printf("response scan 1/2 ");
test_scan( &responses[BONJOUR_MADAME_FR]
, &responses[UNDERSTORE_HEADER_KEY]
, &responses[NO_CARRIAGE_RET]
);
puts("responses okay");

2
src/node_http_parser.cc

@ -102,7 +102,7 @@ static struct http_parser_settings settings;
static inline Persistent<String>
method_to_str(enum http_method m) {
method_to_str(unsigned short m) {
switch (m) {
case HTTP_DELETE: return delete_sym;
case HTTP_GET: return get_sym;

Loading…
Cancel
Save