lightning/ccan/ccan/cdump/cdump.c


								/* MIT (BSD) license - see LICENSE file for details */

								#include "cdump.h"

								#include <ccan/tal/str/str.h>

								#include <assert.h>


								struct token {

									const char *p;

									size_t len;

								};


								static void add_token(struct token **toks, const char *p, size_t len)

								{

									size_t n = tal_count(*toks);

									tal_resize(toks, n+1);

									(*toks)[n].p = p;

									(*toks)[n].len = len;

								}


								static size_t to_eol(const char *p)

								{

									size_t len = strcspn(p, "\n");


									/* And any \ continuations. */

									while (p[len] && p[len-1] == '\\')

										len += strcspn(p+len+1, "\n") + 1;

									return len;

								}


								/* Simplified tokenizer: comments and preproc directives removed,

								   identifiers are a token, others are single char tokens. */

								static struct token *tokenize(const void *ctx, const char *code)

								{

									unsigned int i, len, tok_start = -1;

									bool start_of_line = true;

									struct token *toks = tal_arr(ctx, struct token, 0);


									for (i = 0; code[i]; i += len) {

										if (code[i] == '#' && start_of_line) {

											/* Preprocessor line. */

											len = to_eol(code + i);

										} else if (code[i] == '/' && code[i+1] == '/') {

											/* One line comment. */

											len = to_eol(code + i);

											if (tok_start != -1U) {

												add_token(&toks, code+tok_start, i - tok_start);

												tok_start = -1U;

											}

										} else if (code[i] == '/' && code[i+1] == '*') {

											/* Multi-line comment. */

											const char *end = strstr(code+i+2, "*/");

											len = (end + 2) - (code + i);

											if (!end)

												len = strlen(code + i);

											if (tok_start != -1U) {

												add_token(&toks, code+tok_start, i - tok_start);

												tok_start = -1U;

											}

										} else if (cisalnum(code[i]) || code[i] == '_') {

											/* Identifier or part thereof */

											if (tok_start == -1U)

												tok_start = i;

											len = 1;

										} else if (!cisspace(code[i])) {

											/* Punctuation: treat as single char token. */

											if (tok_start != -1U) {

												add_token(&toks, code+tok_start, i - tok_start);

												tok_start = -1U;

											}

											add_token(&toks, code+i, 1);

											len = 1;

										} else {

											/* Whitespace. */

											if (tok_start != -1U) {

												add_token(&toks, code+tok_start, i - tok_start);

												tok_start = -1U;

											}

											len = 1;

										}

										if (code[i] == '\n')

											start_of_line = true;

										else if (!cisspace(code[i]))

											start_of_line = false;

									}


									/* Add terminating NULL. */

									tal_resizez(&toks, tal_count(toks) + 1);

									return toks;

								}


								struct parse_state {

									const char *code;

									const struct token *toks;

									struct cdump_definitions *defs;

									char *complaints;

								};


								static const struct token *tok_peek(const struct token **toks)

								{

									/* Ignore removed tokens (eg. comments) */

									while (toks[0]->len == 0) {

										if (!toks[0]->p)

											return NULL;

										(*toks)++;

									}

									return toks[0];

								}


								static bool tok_is(const struct token **toks, const char *target)

								{

									const struct token *t = tok_peek(toks);

									return (t && t->len == strlen(target)

										&& memcmp(t->p, target, t->len) == 0);

								}


								static const struct token *tok_take(const struct token **toks)

								{

									const struct token *t = tok_peek(toks);

									if (t)

										(*toks)++;


									return t;

								}


								static const struct token *tok_take_if(const struct token **toks,

												       const char *target)

								{

									if (tok_is(toks, target))

										return tok_take(toks);

									return NULL;

								}


								static const char *tok_take_ident(const tal_t *ctx, const struct token **toks)

								{

									const struct token *t = tok_peek(toks);


									if (!t)

										return NULL;


									if (strspn(t->p, "_0123456789"

										   "abcdefghijklmnopqrstuvwxyz"

										   "ABCDEFGHIJKLMNOPQRSTUVWXYZ") < t->len)

										return NULL;


									t = tok_take(toks);

									return tal_strndup(ctx, t->p, t->len);

								}


								static char *string_of_toks(const tal_t *ctx,

											    const struct token *first,

											    const struct token *until)

								{

									char *str, *p;


									/* Careful to skip erased tokens (eg. comments) */

									str = p = tal_arr(ctx, char, until->p - first->p + 1);

									while (first != until) {

										const struct token *next = first + 1;


										if (first->len) {

											memcpy(p, first->p, first->len);

											p += first->len;

											/* Insert space if they weren't adjacent, unless last */

											if (next != until) {

												if (first->p + first->len != next->p)

													*(p++) = ' ';

											}

										}

										first = next;

									}

									*p = '\0';


									return str;

								}


								static char *tok_take_until(const tal_t *ctx,

											    const struct token **toks,

											    const char *delims)

								{

									const struct token *t, *start;


									start = tok_peek(toks);

									while ((t = tok_peek(toks)) != NULL) {

										/* If this contains a delimiter, copy up to prev token. */

										if (strcspn(t->p, delims) < t->len)

											return string_of_toks(ctx, start, t);

										tok_take(toks);

									};


									/* EOF without finding delimiter */

									return NULL;

								}


								static bool type_defined(const struct cdump_type *t)

								{

									switch (t->kind) {

									case CDUMP_STRUCT:

									case CDUMP_UNION:

										return (t->u.members != NULL);

									case CDUMP_ENUM:

										return (t->u.enum_vals != NULL);


									/* These shouldn't happen; we don't try to define them. */

									case CDUMP_UNKNOWN:

									case CDUMP_ARRAY:

									case CDUMP_POINTER:

										break;

									}

									abort();

								}


								/* May allocate a new type if not already found (steals @name) */

								static struct cdump_type *get_type(struct cdump_definitions *defs,

												   enum cdump_type_kind kind,

												   const char *name)

								{

									cdump_map_t *m = (void *)0x1L; /* Shouldn't be used */

									struct cdump_type *t;


									switch (kind) {

									case CDUMP_STRUCT:

										m = &defs->structs;

										break;

									case CDUMP_UNION:

										m = &defs->unions;

										break;

									case CDUMP_ENUM:

										m = &defs->enums;

										break;

									case CDUMP_UNKNOWN:

									case CDUMP_ARRAY:

									case CDUMP_POINTER:

										m = NULL;

									}


									/* Do we already have it? */

									if (m) {

										t = strmap_get(m, name);

										if (t)

											return t;

									}


									t = tal(defs, struct cdump_type);

									t->kind = kind;

									t->name = name ? tal_steal(t, name) : NULL;

									/* These are actually the same, but be thorough */

									t->u.members = NULL;

									t->u.enum_vals = NULL;

									if (m)

										strmap_add(m, t->name, t);


									return t;

								}


								static void complain(struct parse_state *ps, const char *complaint)

								{

									unsigned int linenum;

									const char *p = ps->code;


									for (linenum = 1; p < ps->toks[0].p; linenum++) {

										p = strchr(p+1, '\n');

										if (!p)

											break;

									}


									tal_append_fmt(&ps->complaints,

										       "Line %u: '%.*s': %s\n",

										       linenum, (int)ps->toks[0].len,

										       ps->toks[0].p, complaint);

								}


								static void tok_take_unknown_statement(struct parse_state *ps)

								{

									complain(ps, "Ignoring unknown statement until next semicolon");

									tal_free(tok_take_until(NULL, &ps->toks, ";"));

									tok_take_if(&ps->toks, ";");

								}


								static bool tok_take_expr(struct parse_state *ps, const char *term)

								{

									while (!tok_is(&ps->toks, term)) {

										if (tok_take_if(&ps->toks, "(")) {

											if (!tok_take_expr(ps, ")"))

												return false;

										} else if (tok_take_if(&ps->toks, "[")) {

											if (!tok_take_expr(ps, "]"))

												return false;

										} else if (!tok_take(&ps->toks))

											return false;

									}

									return tok_take(&ps->toks);

								}


								static char *tok_take_expr_str(const tal_t *ctx,

											       struct parse_state *ps,

											       const char *term)

								{

									const struct token *start = tok_peek(&ps->toks);


									if (!tok_take_expr(ps, term))

										return NULL;


									return string_of_toks(ctx, start, ps->toks - 1);

								}


								/* [ ... */

								static bool tok_take_array(struct parse_state *ps, struct cdump_type **type)

								{

									/* This will be some arbitrary expression! */

									struct cdump_type *arr = get_type(ps->defs, CDUMP_ARRAY, NULL);


									arr->u.arr.size = tok_take_expr_str(arr, ps, "]");

									if (!arr->u.arr.size) {

										complain(ps, "Could not find closing array size ]");

										return false;

									}


									arr->u.arr.type = *type;

									*type = arr;


									return true;

								}


								static struct cdump_type *ptr_of(struct parse_state *ps,

												 const struct cdump_type *ptr_to)

								{

									struct cdump_type *ptr = get_type(ps->defs, CDUMP_POINTER, NULL);

									ptr->u.ptr = ptr_to;

									return ptr;

								}


								static bool tok_take_type(struct parse_state *ps, struct cdump_type **type)

								{

									const char *name;

									const struct token *types;

									enum cdump_type_kind kind;


									/* Ignoring weird typedefs, only these can be combined. */

									types = ps->toks;

									while (tok_take_if(&ps->toks, "int")

									       || tok_take_if(&ps->toks, "long")

									       || tok_take_if(&ps->toks, "short")

									       || tok_take_if(&ps->toks, "double")

									       || tok_take_if(&ps->toks, "float")

									       || tok_take_if(&ps->toks, "char")

									       || tok_take_if(&ps->toks, "signed")

									       || tok_take_if(&ps->toks, "unsigned"));


									/* Did we get some? */

									if (ps->toks != types) {

										name = string_of_toks(NULL, types, tok_peek(&ps->toks));

										kind = CDUMP_UNKNOWN;

									} else {

										/* Try normal types (or simple typedefs, etc). */

										if (tok_take_if(&ps->toks, "struct")) {

											kind = CDUMP_STRUCT;

										} else if (tok_take_if(&ps->toks, "union")) {

											kind = CDUMP_UNION;

										} else if (tok_take_if(&ps->toks, "enum")) {

											kind = CDUMP_ENUM;

										} else

											kind = CDUMP_UNKNOWN;


										name = tok_take_ident(ps->defs, &ps->toks);

										if (!name) {

											complain(ps, "Invalid typename");

											return false;

										}

									}


									*type = get_type(ps->defs, kind, name);

									return true;

								}


								/* CDUMP */

								static bool tok_maybe_take_cdump_note(const tal_t *ctx,

												      struct parse_state *ps, const char **note)

								{

									*note = NULL;

									if (tok_take_if(&ps->toks, "CDUMP")) {

										if (!tok_take_if(&ps->toks, "(")) {

											complain(ps, "Expected ( after CDUMP");

											return false;

										}

										*note = tok_take_expr_str(ctx, ps, ")");

										if (!*note) {

											complain(ps, "Expected ) after CDUMP(");

											return false;

										}

									}

									return true;

								}


								/* __attribute__((...)) */

								static bool tok_ignore_attribute(struct parse_state *ps)

								{

									if (!tok_take_if(&ps->toks, "__attribute__"))

										return true;


									if (!tok_take_if(&ps->toks, "(") || !tok_take_if(&ps->toks, "(")) {

										complain(ps, "Expected (( after __attribute__");

										return false;

									}


									if (!tok_take_expr(ps, ")")) {

										complain(ps, "Expected expression after __attribute__((");

										return false;

									}

									if (!tok_take_if(&ps->toks, ")")) {

										complain(ps, "Expected )) __attribute__((");

										return false;

									}

									return true;

								}


								/* struct|union ... */

								static bool tok_take_conglom(struct parse_state *ps,

											     enum cdump_type_kind conglom_kind)

								{

									struct cdump_type *e;

									const char *name;

									size_t n;


									assert(conglom_kind == CDUMP_STRUCT || conglom_kind == CDUMP_UNION);


									name = tok_take_ident(ps->defs, &ps->toks);

									if (!name) {

										complain(ps, "Invalid struct/union name");

										return false;

									}


									e = get_type(ps->defs, conglom_kind, name);

									if (type_defined(e)) {

										complain(ps, "Type already defined");

										return false;

									}


									if (!tok_maybe_take_cdump_note(e, ps, &e->note))

										return false;


									if (!tok_ignore_attribute(ps))

										return false;


									if (!tok_take_if(&ps->toks, "{")) {

										complain(ps, "Expected { for struct/union");

										return false;

									}


									e->u.members = tal_arr(e, struct cdump_member, n = 0);

									while (!tok_is(&ps->toks, "}")) {

										struct cdump_type *basetype;

										const struct token *quals;

										unsigned int num_quals = 0;


										if (!tok_ignore_attribute(ps))

											return false;


										/* Anything can have these prepended. */

										quals = ps->toks;

										while (tok_take_if(&ps->toks, "const")

										       || tok_take_if(&ps->toks, "volatile"))

											num_quals++;


										/* eg. "struct foo" or "varint_t" */

										if (!tok_take_type(ps, &basetype)) {

											complain(ps, "Expected typename inside struct/union");

											return false;

										}


										do {

											struct cdump_member *m;


											tal_resize(&e->u.members, n+1);

											m = &e->u.members[n++];

											m->type = basetype;

											if (num_quals) {

												m->qualifiers

													= string_of_toks(e, quals,

															 quals + num_quals);

											} else

												m->qualifiers = NULL;


											/* May have multiple asterisks. */

											while (tok_take_if(&ps->toks, "*"))

												m->type = ptr_of(ps, m->type);


											if (!tok_ignore_attribute(ps))

												return false;


											m->name = tok_take_ident(e, &ps->toks);

											if (!m->name) {

												complain(ps, "Expected name for member");

												return false;

											}


											/* May be an array. */

											while (tok_take_if(&ps->toks, "[")) {

												if (!tok_take_array(ps, &m->type))

													return false;

											}


											/* CDUMP() */

											if (!tok_maybe_take_cdump_note(e->u.members,

														       ps, &m->note))

												return false;


											if (!tok_ignore_attribute(ps))

												return false;

										} while (tok_take_if(&ps->toks, ","));


										if (!tok_take_if(&ps->toks, ";")) {

											complain(ps, "Expected ; at end of member");

											return false;

										}

									}


									if (!tok_take_if(&ps->toks, "}")) {

										complain(ps, "Expected } at end of struct/union");

										return false;

									}


									if (!tok_ignore_attribute(ps))

										return false;


									if (!tok_take_if(&ps->toks, ";")) {

										complain(ps, "Expected ; at end of struct/union");

										return false;

									}

									return true;

								}


								/* enum ... */

								static bool tok_take_enum(struct parse_state *ps)

								{

									size_t n = 0;

									struct cdump_type *e;

									const char *name;


									name = tok_take_ident(ps->defs, &ps->toks);

									if (!name) {

										complain(ps, "Expected enum name");

										return false;

									}


									e = get_type(ps->defs, CDUMP_ENUM, name);


									/* Duplicate name? */

									if (type_defined(e)) {

										complain(ps, "enum already defined");

										return false;

									}


									/* CDUMP() */

									if (!tok_maybe_take_cdump_note(e, ps, &e->note))

										return false;


									if (!tok_ignore_attribute(ps))

										return false;


									if (!tok_take_if(&ps->toks, "{")) {

										complain(ps, "Expected { after enum name");

										return false;

									}


									e->u.enum_vals = tal_arr(e, struct cdump_enum_val, n);

									do {

										struct cdump_enum_val *v;


										/* GCC extension: comma and end of enum */

										if (tok_is(&ps->toks, "}"))

											break;


										tal_resize(&e->u.enum_vals, n+1);

										v = &e->u.enum_vals[n++];


										v->name = tok_take_ident(e, &ps->toks);

										if (!v->name) {

											complain(ps, "Expected enum value name");

											return false;

										}


										/* CDUMP() */

										if (!tok_maybe_take_cdump_note(e->u.enum_vals, ps, &v->note))

											return false;


										if (tok_take_if(&ps->toks, "=")) {

											v->value = tok_take_until(e, &ps->toks, ",}");

											if (!v->value) {

												complain(ps, "Expected , or } to end value");

												return false;

											}

										} else

											v->value = NULL;

									} while (tok_take_if(&ps->toks, ","));


									if (!tok_take_if(&ps->toks, "}")) {

										complain(ps, "Expected } at end of enum");

										return false;

									}


									if (!tok_ignore_attribute(ps))

										return false;


									if (!tok_take_if(&ps->toks, ";")) {

										complain(ps, "Expected ; at end of enum");

										return false;

									}

									return true;

								}


								static bool gather_undefines(const char *name,

											     struct cdump_type *t,

											     cdump_map_t *undefs)

								{

									if (!type_defined(t))

										strmap_add(undefs, name, t);

									return true;

								}


								static bool remove_from_map(const char *name,

											    struct cdump_type *t,

											    cdump_map_t *map)

								{

									strmap_del(map, name, NULL);

									return true;

								}


								static void remove_undefined(cdump_map_t *map)

								{

									cdump_map_t undefs;


									/* We can't delete inside iterator, so gather all the undefs

									 * then remove them. */

									strmap_init(&undefs);


									strmap_iterate(map, gather_undefines, &undefs);

									strmap_iterate(&undefs, remove_from_map, map);

									strmap_clear(&undefs);

								}


								static void destroy_definitions(struct cdump_definitions *defs)

								{

									strmap_clear(&defs->enums);

									strmap_clear(&defs->structs);

									strmap_clear(&defs->unions);

								}


								/* Simple LL(1) parser, inspired by Tridge's genstruct.pl. */

								struct cdump_definitions *cdump_extract(const tal_t *ctx, const char *code,

													char **complaints)

								{

									struct parse_state ps;

									const struct token *toks;


									ps.defs = tal(ctx, struct cdump_definitions);

									ps.complaints = tal_strdup(ctx, "");

									ps.code = code;


									strmap_init(&ps.defs->enums);

									strmap_init(&ps.defs->structs);

									strmap_init(&ps.defs->unions);

									tal_add_destructor(ps.defs, destroy_definitions);


									toks = ps.toks = tokenize(ps.defs, code);

									while (tok_peek(&ps.toks)) {

										if (!tok_ignore_attribute(&ps))

											goto fail;

										if (tok_take_if(&ps.toks, "struct")) {

											if (!tok_take_conglom(&ps, CDUMP_STRUCT))

												goto fail;

										} else if (tok_take_if(&ps.toks, "union")) {

											if (!tok_take_conglom(&ps, CDUMP_UNION))

												goto fail;

										} else if (tok_take_if(&ps.toks, "enum")) {

											if (!tok_take_enum(&ps))

												goto fail;

										} else

											tok_take_unknown_statement(&ps);

									}


									/* Now, remove any undefined types! */

									remove_undefined(&ps.defs->enums);

									remove_undefined(&ps.defs->structs);

									remove_undefined(&ps.defs->unions);

									tal_free(toks);


								out:

									if (streq(ps.complaints, ""))

										ps.complaints = tal_free(ps.complaints);


									if (complaints)

										*complaints = ps.complaints;

									else

										tal_free(ps.complaints);

									return ps.defs;


								fail:

									ps.defs = tal_free(ps.defs);

									goto out;

								}