You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

301 lines
5.9 KiB

/* Simple program to search for BOLT references in C files and make sure
* they're accurate. */
#include <ccan/err/err.h>
#include <ccan/opt/opt.h>
#include <ccan/str/str.h>
#include <ccan/tal/grab_file/grab_file.h>
#include <ccan/tal/path/path.h>
#include <ccan/tal/str/str.h>
#include <ccan/tal/tal.h>
#include <sys/types.h>
#include <dirent.h>
static bool verbose = false;
struct bolt_file {
const char *prefix;
const char *contents;
};
/* Turn any whitespace into a single space. */
static char *canonicalize(char *str)
{
char *to = str, *from = str;
bool have_space = true;
while (*from) {
if (cisspace(*from)) {
if (!have_space)
*(to++) = ' ';
have_space = true;
} else {
*(to++) = *from;
have_space = false;
}
from++;
}
if (have_space && to != str)
to--;
*to = '\0';
tal_resize(&str, to + 1 - str);
return str;
}
static void get_files(const char *dir, const char *subdir,
struct bolt_file **files)
{
char *path = path_join(NULL, dir, subdir);
DIR *d = opendir(path);
size_t n = tal_count(*files);
struct dirent *e;
if (!d)
err(1, "Opening BOLT dir %s", path);
while ((e = readdir(d)) != NULL) {
int preflen;
/* Must end in .md */
if (!strends(e->d_name, ".md"))
continue;
/* Prefix is anything up to - */
preflen = strspn(e->d_name,
"0123456789"
"abcdefghijklmnopqrstuvwxyz"
"ABCDEFGHIJKLMNOPQRSTUVWXYZ");
if (!preflen)
continue;
if (preflen + strlen(".md") != strlen(e->d_name)
&& e->d_name[preflen] != '-')
continue;
if (verbose)
printf("Found bolt %.*s\n", preflen, e->d_name);
tal_resize(files, n+1);
(*files)[n].prefix = tal_strndup(*files,
e->d_name, preflen);
(*files)[n].contents
= canonicalize(grab_file(*files,
path_join(path, path,
e->d_name)));
n++;
}
}
static struct bolt_file *get_bolt_files(const char *dir)
{
struct bolt_file *bolts = tal_arr(NULL, struct bolt_file, 0);
get_files(dir, "bolts", &bolts);
get_files(dir, "early-drafts", &bolts);
return bolts;
}
static char *find_bolt_ref(char **p, size_t *len)
{
for (;;) {
char *bolt, *end;
size_t preflen;
/* BOLT #X: */
*p = strstr(*p, "BOLT");
if (!*p)
return NULL;
*p += 4;
while (cisspace(**p))
(*p)++;
if (**p != '#')
continue;
(*p)++;
preflen = strcspn(*p, " :");
bolt = tal_strndup(NULL, *p, preflen);
(*p) += preflen;
while (cisspace(**p))
(*p)++;
if (**p != ':')
continue;
(*p)++;
end = strstr(*p, "*/");
if (!end)
*len = strlen(*p);
else
*len = end - *p;
return bolt;
}
}
static char *code_to_regex(const char *code, size_t len, bool escape)
{
char *pattern = tal_arr(NULL, char, len*2 + 1), *p;
size_t i;
bool after_nl = true;
/* We swallow '*' if first in line: block comments */
p = pattern;
for (i = 0; i < len; i++) {
/* ... matches anything. */
if (strstarts(code + i, "...")) {
*(p++) = '.';
*(p++) = '*';
i += 2;
continue;
}
switch (code[i]) {
case '\n':
after_nl = true;
*(p++) = code[i];
break;
case '*':
if (after_nl) {
after_nl = false;
continue;
}
/* Fall thru. */
case '.':
case '$':
case '^':
case '[':
case ']':
case '(':
case ')':
case '+':
case '|':
if (escape)
*(p++) = '\\';
/* Fall thru */
default:
*(p++) = code[i];
}
}
*p = '\0';
return canonicalize(pattern);
}
/* Moves *pos to start of line. */
static unsigned linenum(const char *raw, const char **pos)
{
unsigned line = 0; /* Out-by-one below */
const char *l = raw, *point = *pos;
while (l < point) {
*pos = l;
l = strchr(l, '\n');
line++;
if (!l)
break;
l++;
}
return line;
}
static void fail_mismatch(const char *filename,
const char *raw, const char *pos,
size_t len, struct bolt_file *bolt)
{
unsigned line = linenum(raw, &pos);
char *try;
fprintf(stderr, "%s:%u:mismatch:%.*s\n",
filename, line, (int)strcspn(pos, "\n"), pos);
/* Try to find longest match, as a hint. */
try = code_to_regex(pos + strcspn(pos, "\n"), len, false);
while (strlen(try)) {
const char *p = strstr(bolt->contents, try);
if (p) {
fprintf(stderr, "Closest match: %s...[%.20s]\n",
try, p + strlen(try));
break;
}
try[strlen(try)-1] = '\0';
}
exit(1);
}
static void fail_nobolt(const char *filename,
const char *raw, const char *pos,
const char *bolt_prefix)
{
unsigned line = linenum(raw, &pos);
fprintf(stderr, "%s:%u:unknown bolt %s\n",
filename, line, bolt_prefix);
exit(1);
}
static struct bolt_file *find_bolt(const char *bolt_prefix,
struct bolt_file *bolts)
{
size_t i, n = tal_count(bolts);
size_t boltnum;
for (i = 0; i < n; i++)
if (streq(bolts[i].prefix, bolt_prefix))
return bolts+i;
/* Now search for numerical match. */
boltnum = atoi(bolt_prefix);
if (boltnum) {
for (i = 0; i < n; i++)
if (atoi(bolts[i].prefix) == boltnum)
return bolts+i;
}
return NULL;
}
int main(int argc, char *argv[])
{
struct bolt_file *bolts;
int i;
err_set_progname(argv[0]);
opt_register_noarg("--help|-h", opt_usage_and_exit,
"<bolt-dir> <srcfile>...\n"
"A source checker for BOLT RFC references.",
"Print this message.");
opt_register_noarg("--verbose", opt_set_bool, &verbose,
"Print out files as we find them");
opt_parse(&argc, argv, opt_log_stderr_exit);
if (argc < 2)
opt_usage_exit_fail("Expected a bolt directory");
bolts = get_bolt_files(argv[1]);
for (i = 2; i < argc; i++) {
char *f = grab_file(NULL, argv[i]), *p, *bolt;
size_t len;
if (!f)
err(1, "Loading %s", argv[i]);
if (verbose)
printf("Checking %s...\n", argv[i]);
p = f;
while ((bolt = find_bolt_ref(&p, &len)) != NULL) {
char *pattern = code_to_regex(p, len, true);
struct bolt_file *b = find_bolt(bolt, bolts);
if (!b)
fail_nobolt(argv[i], f, p, bolt);
if (!tal_strreg(f, b->contents, pattern, NULL))
fail_mismatch(argv[i], f, p, len, b);
if (verbose)
printf(" Found %.10s... in %s\n",
p, b->prefix);
p += len;
}
tal_free(f);
}
return 0;
}