hsmd: don't use daemon_conn for clients.

It offers them a DoS vector, if they don't read the replies. We really want to use raw ccan/io so we can avoid buffering for this. It makes the handing of fds for new clients a bit more complex (callback based), but it's not too bad. Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
6 years ago · b0769d9c0c
1 changed files with 120 additions and 62 deletions
--- a/hsmd/hsmd.c
+++ b/hsmd/hsmd.c
@ -67,8 +67,15 @@ static struct {

 /*~ We keep track of clients, but there's not much to keep. */
 struct client {
-	struct daemon_conn dc;
-	struct daemon_conn *master;
+	/* The connection to lightningd itself */
+	struct client *master;
+
+	/* The ccan/io async io connection for this client: it closes, we die. */
+	struct io_conn *conn;
+
+	/*~ io_read_wire needs a pointer to store incoming messages until
+	 * it has the complete thing; this is it. */
+	u8 *msg_in;

 	/* ~Useful for logging, but also used to derive the per-channel seed. */
 	struct pubkey id;
@ -105,11 +112,12 @@ extern void dev_disconnect_init(int fd);
 void dev_disconnect_init(int fd UNUSED) { }

 /* Pre-declare this, due to mutual recursion */
-static struct client *new_client(struct daemon_conn *master,
+static struct client *new_client(struct client *master,
 				 const struct pubkey *id,
 				 u64 dbid,
 				 const u64 capabilities,
 				 int fd);
+static struct io_plan *handle_client(struct io_conn *conn, struct client *c);

 /*~ ccan/compiler.h defines PRINTF_FMT as the gcc compiler hint so it will
 * check that fmt and other trailing arguments really are the correct type.
@ -133,7 +141,7 @@ static PRINTF_FMT(4,5)

 	/*~ If the client was actually lightningd, it's Game Over; we actually
 	 * fail in this case, and it will too. */
-	if (&c->dc == c->master) {
+	if (c == c->master) {
 		status_broken("%s", str);
 		master_badmsg(fromwire_peektype(msg_in), msg_in);
 	}
@ -164,13 +172,39 @@ static struct io_plan *bad_req(struct io_conn *conn,
 	return bad_req_fmt(conn, c, msg_in, "could not parse request");
 }

+/*~ This plan simply says: read the next packet into 'c->msg_in' (parent 'c'),
+ * and then call handle_client with argument 'c' */
+static struct io_plan *client_read_next(struct io_conn *conn, struct client *c)
+{
+	return io_read_wire(conn, c, &c->msg_in, handle_client, c);
+}
+
 /* This is the common pattern for the tail of each handler in this file. */
 static struct io_plan *req_reply(struct io_conn *conn,
 				 struct client *c,
 				 const u8 *msg_out TAKES)
 {
-	daemon_conn_send(&c->dc, msg_out);
-	return daemon_conn_read_next(conn, &c->dc);
+	/*~ Write this out, then read the next one.  This works perfectly for
+	 * a simple request/response system like this.
+	 *
+	 * Internally, the ccan/io subsystem gathers all the file descriptors,
+	 * figures out which want to write and read, asks the OS which ones
+	 * are available, and for those file descriptors, tries to do the
+	 * reads/writes we've asked it.	 It handles retry in the case where a
+	 * read or write is done partially.
+	 *
+	 * Since the OS does buffering internally (on my system, over 100k
+	 * worth) writes will normally succeed immediately.  However, if the
+	 * client is slow or malicious, and doesn't read from the socket as
+	 * fast as we're writing, eventually the socket buffer will fill up;
+	 * we don't care, because ccan/io will wait until there's room to
+	 * write this reply before it will read again.  The client just hurts
+	 * themselves, and there's no Denial of Service on us.
+	 *
+	 * If we were to queue outgoing messages ourselves, we *would* have to
+	 * consider such scenarios; this is why our daemons generally avoid
+	 * buffering from untrusted parties. */
+	return io_write_wire(conn, msg_out, client_read_next, c);
 }

 /*~ This returns the secret and/or public key for this node. */
@ -416,7 +450,7 @@ static struct io_plan *init_hsm(struct io_conn *conn,
 	struct pubkey node_id;

 	/* This must be the master. */
-	assert(&c->dc == c->master);
+	assert(c == c->master);

 	/*~ The fromwire_* routines are autogenerated, based on the message
 	 * definitions in hsm_client_wire.csv.  The format of those files is
@ -1092,7 +1126,37 @@ static struct io_plan *handle_sign_mutual_close_tx(struct io_conn *conn,
 	return req_reply(conn, c, take(towire_hsm_sign_tx_reply(NULL, &sig)));
 }

-/* This is used by by the master to create a new client connection (which
+/*~ Since we process requests then service them in strict order, and because
+ * only lightningd can request a new client fd, we can get away with a global
+ * here!  But because we are being tricky, I set it to an invalid value when
+ * not in use, and sprinkle assertions around. */
+static int pending_client_fd = -1;
+
+/*~ This is the callback from below: having sent the reply, we now send the
+ * fd for the client end of the new socketpair. */
+static struct io_plan *send_pending_client_fd(struct io_conn *conn,
+					      struct client *master)
+{
+	int fd = pending_client_fd;
+	/* This must be the master. */
+	assert(master == master->master);
+	assert(fd != -1);
+
+	/* This sanity check shouldn't be necessary, but it's cheap. */
+	pending_client_fd = -1;
+
+	/*~There's arcane UNIX magic to send an open file descriptor over a
+	 * UNIX domain socket.  There's no great way to autogenerate this
+	 * though; especially for the receive side, so we always pass these
+	 * manually immediately following the message.
+	 *
+	 * io_send_fd()'s third parameter is whether to close the local one
+	 * after sending; that saves us YA callback.
+	 */
+	return io_send_fd(conn, fd, true, client_read_next, master);
+}
+
+/*~ This is used by by the master to create a new client connection (which
 * becomes the HSM_FD for the subdaemon after forking). */
 static struct io_plan *pass_client_hsmfd(struct io_conn *conn,
 					 struct client *c,
@ -1102,8 +1166,8 @@ static struct io_plan *pass_client_hsmfd(struct io_conn *conn,
 	u64 dbid, capabilities;
 	struct pubkey id;

-	/* This must be the master. */
-	assert(&c->dc == c->master);
+	/* This must be lightningd itself. */
+	assert(c == c->master);

 	if (!fromwire_hsm_client_hsmfd(msg_in, &id, &dbid, &capabilities))
 		return bad_req(conn, c, msg_in);
@ -1113,14 +1177,15 @@ static struct io_plan *pass_client_hsmfd(struct io_conn *conn,
 		status_failed(STATUS_FAIL_INTERNAL_ERROR, "creating fds: %s",
 			      strerror(errno));

-	new_client(&c->dc, &id, dbid, capabilities, fds[0]);
-	daemon_conn_send(&c->dc, take(towire_hsm_client_hsmfd_reply(NULL)));
-	/* There's arcane UNIX magic to send an open file descriptor over a
-	 * UNIX domain socket.  There's no great way to autogenerate this
-	 * though; especially for the receive side, so we always pass these
-	 * manually immediately following the message. */
-	daemon_conn_send_fd(&c->dc, fds[1]);
-	return daemon_conn_read_next(conn, &c->dc);
+	status_trace("new_client: %"PRIu64, dbid);
+	new_client(c, &id, dbid, capabilities, fds[0]);
+
+	/*~ We stash this in a global, because we need to get both the fd and
+	 * the client pointer to the callback.  The other way would be to
+	 * create a boutique structure and hand that, but we don't need to. */
+	pending_client_fd = fds[1];
+	return io_write_wire(conn, take(towire_hsm_client_hsmfd_reply(NULL)),
+			     send_pending_client_fd, c);
 }

 /*~ For almost every wallet tx we use the BIP32 seed, but not for onchain
@ -1481,87 +1546,79 @@ static bool check_client_capabilities(struct client *client,
 }

 /*~ This is the core of the HSM daemon: handling requests. */
-static struct io_plan *handle_client(struct io_conn *conn,
-				     struct daemon_conn *dc)
+static struct io_plan *handle_client(struct io_conn *conn, struct client *c)
 {
-	/*~ Note the use of container_of here: this is the Linux kernel way of
-	 * doing callbacks.  Rather than have struct daemon_conn contain a
-	 * void * pointer to the structure for this use, we simply embed the
-	 * daemon_conn in the structure; container_of is a fancy way of doing
-	 * pointer arithmetic to get the containing structure, saving a
-	 * pointer. */
-	struct client *c = container_of(dc, struct client, dc);
-	enum hsm_wire_type t = fromwire_peektype(dc->msg_in);
+	enum hsm_wire_type t = fromwire_peektype(c->msg_in);

 	status_debug("Client: Received message %d from client", t);

 	/* Before we do anything else, is this client allowed to do
 	 * what he asks for? */
 	if (!check_client_capabilities(c, t))
-		return bad_req_fmt(conn, c, dc->msg_in,
+		return bad_req_fmt(conn, c, c->msg_in,
 				   "does not have capability to run %d", t);

 	/* Now actually go and do what the client asked for */
 	switch (t) {
 	case WIRE_HSM_INIT:
-		return init_hsm(conn, c, dc->msg_in);
+		return init_hsm(conn, c, c->msg_in);

 	case WIRE_HSM_CLIENT_HSMFD:
-		return pass_client_hsmfd(conn, c, dc->msg_in);
+		return pass_client_hsmfd(conn, c, c->msg_in);

 	case WIRE_HSM_GET_CHANNEL_BASEPOINTS:
-		return handle_get_channel_basepoints(conn, c, dc->msg_in);
+		return handle_get_channel_basepoints(conn, c, c->msg_in);

 	case WIRE_HSM_ECDH_REQ:
-		return handle_ecdh(conn, c, dc->msg_in);
+		return handle_ecdh(conn, c, c->msg_in);

 	case WIRE_HSM_CANNOUNCEMENT_SIG_REQ:
-		return handle_cannouncement_sig(conn, c, dc->msg_in);
+		return handle_cannouncement_sig(conn, c, c->msg_in);

 	case WIRE_HSM_CUPDATE_SIG_REQ:
-		return handle_channel_update_sig(conn, c, dc->msg_in);
+		return handle_channel_update_sig(conn, c, c->msg_in);

 	case WIRE_HSM_SIGN_FUNDING:
-		return handle_sign_funding_tx(conn, c, dc->msg_in);
+		return handle_sign_funding_tx(conn, c, c->msg_in);

 	case WIRE_HSM_NODE_ANNOUNCEMENT_SIG_REQ:
-		return handle_sign_node_announcement(conn, c, dc->msg_in);
+		return handle_sign_node_announcement(conn, c, c->msg_in);

 	case WIRE_HSM_SIGN_INVOICE:
-		return handle_sign_invoice(conn, c, dc->msg_in);
+		return handle_sign_invoice(conn, c, c->msg_in);

 	case WIRE_HSM_SIGN_WITHDRAWAL:
-		return handle_sign_withdrawal_tx(conn, c, dc->msg_in);
+		return handle_sign_withdrawal_tx(conn, c, c->msg_in);

 	case WIRE_HSM_SIGN_COMMITMENT_TX:
-		return handle_sign_commitment_tx(conn, c, dc->msg_in);
+		return handle_sign_commitment_tx(conn, c, c->msg_in);

 	case WIRE_HSM_SIGN_DELAYED_PAYMENT_TO_US:
-		return handle_sign_delayed_payment_to_us(conn, c, dc->msg_in);
+		return handle_sign_delayed_payment_to_us(conn, c, c->msg_in);

 	case WIRE_HSM_SIGN_REMOTE_HTLC_TO_US:
-		return handle_sign_remote_htlc_to_us(conn, c, dc->msg_in);
+		return handle_sign_remote_htlc_to_us(conn, c, c->msg_in);

 	case WIRE_HSM_SIGN_PENALTY_TO_US:
-		return handle_sign_penalty_to_us(conn, c, dc->msg_in);
+		return handle_sign_penalty_to_us(conn, c, c->msg_in);

 	case WIRE_HSM_SIGN_LOCAL_HTLC_TX:
-		return handle_sign_local_htlc_tx(conn, c, dc->msg_in);
+		return handle_sign_local_htlc_tx(conn, c, c->msg_in);

 	case WIRE_HSM_GET_PER_COMMITMENT_POINT:
-		return handle_get_per_commitment_point(conn, c, dc->msg_in);
+		return handle_get_per_commitment_point(conn, c, c->msg_in);

 	case WIRE_HSM_CHECK_FUTURE_SECRET:
-		return handle_check_future_secret(conn, c, dc->msg_in);
+		return handle_check_future_secret(conn, c, c->msg_in);

 	case WIRE_HSM_SIGN_REMOTE_COMMITMENT_TX:
-		return handle_sign_remote_commitment_tx(conn, c, dc->msg_in);
+		return handle_sign_remote_commitment_tx(conn, c, c->msg_in);

 	case WIRE_HSM_SIGN_REMOTE_HTLC_TX:
-		return handle_sign_remote_htlc_tx(conn, c, dc->msg_in);
+		return handle_sign_remote_htlc_tx(conn, c, c->msg_in);

 	case WIRE_HSM_SIGN_MUTUAL_CLOSE_TX:
-		return handle_sign_mutual_close_tx(conn, c, dc->msg_in);
+		return handle_sign_mutual_close_tx(conn, c, c->msg_in);

 	case WIRE_HSM_ECDH_RESP:
 	case WIRE_HSM_CANNOUNCEMENT_SIG_REPLY:
@ -1581,7 +1638,7 @@ static struct io_plan *handle_client(struct io_conn *conn,
 		break;
 	}

-	return bad_req_fmt(conn, c, dc->msg_in, "Unknown request");
+	return bad_req_fmt(conn, c, c->msg_in, "Unknown request");
 }

 /*~ This is the destructor on our client: we may call it manually, but
@ -1594,7 +1651,7 @@ static void destroy_client(struct client *c)
 			      "Failed to remove client dbid %"PRIu64, c->dbid);
 }

-static struct client *new_client(struct daemon_conn *master,
+static struct client *new_client(struct client *master,
 				 const struct pubkey *id,
 				 u64 dbid,
 				 const u64 capabilities,
@ -1612,22 +1669,23 @@ static struct client *new_client(struct daemon_conn *master,

 	c->master = master;
 	c->capabilities = capabilities;
-	/*~ This is our daemon_conn infrastructure, which does the queueing for
-	 * us; we just tell it what our handler function is. */
-	daemon_conn_init(c, &c->dc, fd, handle_client, NULL);
+	/*~ This is the core of ccan/io: the connection creation calls a
+	 * callback which returns the initial plan to execute: in our case,
+	 * read a message.*/
+	c->conn = io_new_conn(master, fd, client_read_next, c);

 	/*~ tal_steal() moves a pointer to a new parent.  At this point, the
 	 * hierarchy is:
 	 *
-	 *   master -> c -> daemon_conn.conn
+	 *   master -> c
+	 *   master -> c->conn
 	 *
-	 * We want to invert the bottom two, so that if the io_conn closes,
+	 * We want to the c->conn to own 'c', so that if the io_conn closes,
 	 * the client is freed:
 	 *
 	 *   master -> c->conn -> c.
 	 */
-	tal_steal(master, c->dc.conn);
-	tal_steal(c->dc.conn, c);
+	tal_steal(c->conn, c);

 	/* We put the special zero-db HSM connections into an array, the rest
 	 * go into the map. */
@ -1639,7 +1697,7 @@ static struct client *new_client(struct daemon_conn *master,

 		/* Close conn and free any old client of this dbid. */
 		if (old_client)
-			io_close(old_client->dc.conn);
+			io_close(old_client->conn);

 		if (!uintmap_add(&clients, dbid, c))
 			status_failed(STATUS_FAIL_INTERNAL_ERROR,
@ -1650,7 +1708,7 @@ static struct client *new_client(struct daemon_conn *master,
 	return c;
 }

-static void master_gone(struct io_conn *unused UNUSED, struct daemon_conn *dc UNUSED)
+static void master_gone(struct io_conn *unused UNUSED, struct client *c UNUSED)
 {
 	daemon_shutdown();
 	/* Can't tell master, it's gone. */
@ -1677,10 +1735,10 @@ int main(int argc, char *argv[])
 			    REQ_FD);

 	/* We're our own master! */
-	master->master = &master->dc;
+	master->master = master;

 	/* When conn closes, everything is freed. */
-	io_set_finish(master->dc.conn, master_gone, &master->dc);
+	io_set_finish(master->conn, master_gone, master);

 	/*~ The two NULL args a list of timers, and the timer which expired:
 	 * we don't have any timers. */