zlib: fix gzip member head/buffer boundary issue

Make sure that, even if an `inflate()` call only sees the first few bytes of a following gzip member, all members are decompressed and part of the full output. This change also modifies behaviour for trailing garbage: If there is trailing garbage which happens to start with the gzip magic bytes, it is no longer discarded but rather throws an error, since we cannot reliably tell random garbage from a valid gzip member anyway and have to try and decompress it. (Null byte padding is not affected, since it has been pointed out at various occasions that such padding is normal and discarded by `gzip(1)`, too.) Adds tests for the special case that the first `inflate()` call receives only the first few bytes of a second gzip member but not the whole header (or even just the magic bytes). PR-URL: https://github.com/nodejs/node/pull/5883 Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl> Reviewed-By: James M Snell <jasnell@gmail.com>
9 years ago · 54a5287e3e
3 changed files with 37 additions and 14 deletions
--- a/src/node_zlib.cc
+++ b/src/node_zlib.cc
@ -43,7 +43,6 @@ enum node_zlib_mode {

 #define GZIP_HEADER_ID1 0x1f
 #define GZIP_HEADER_ID2 0x8b
-#define GZIP_MIN_HEADER_SIZE 10

 void InitZlib(v8::Local<v8::Object> target);

@ -257,17 +256,16 @@ class ZCtx : public AsyncWrap {
            ctx->err_ = Z_NEED_DICT;
          }
        }
-        while (ctx->strm_.avail_in >= GZIP_MIN_HEADER_SIZE &&
+
+        while (ctx->strm_.avail_in > 0 &&
               ctx->mode_ == GUNZIP &&
-               ctx->err_ == Z_STREAM_END) {
+               ctx->err_ == Z_STREAM_END &&
+               ctx->strm_.next_in[0] != 0x00) {
          // Bytes remain in input buffer. Perhaps this is another compressed
          // member in the same archive, or just trailing garbage.
-          // Check the header to find out.
-          if (ctx->strm_.next_in[0] != GZIP_HEADER_ID1 ||
-              ctx->strm_.next_in[1] != GZIP_HEADER_ID2) {
-            // Not a valid gzip member
-            break;
-          }
+          // Trailing zero bytes are okay, though, since they are frequently
+          // used for padding.
+
          Reset(ctx);
          ctx->err_ = inflate(&ctx->strm_, ctx->flush_);
        }
--- a/test/parallel/test-zlib-from-concatenated-gzip.js
+++ b/test/parallel/test-zlib-from-concatenated-gzip.js
@ -7,9 +7,12 @@ const zlib = require('zlib');
 const path = require('path');
 const fs = require('fs');

+const abcEncoded = zlib.gzipSync('abc');
+const defEncoded = zlib.gzipSync('def');
+
 const data = Buffer.concat([
-  zlib.gzipSync('abc'),
-  zlib.gzipSync('def')
+  abcEncoded,
+  defEncoded
 ]);

 assert.equal(zlib.gunzipSync(data).toString(), 'abcdef');
@ -38,3 +41,26 @@ fs.createReadStream(pmmFileGz)
    assert.deepStrictEqual(Buffer.concat(pmmResultBuffers), pmmExpected,
      'result should match original random garbage');
  }));
+
+// test that the next gzip member can wrap around the input buffer boundary
+[0, 1, 2, 3, 4, defEncoded.length].forEach((offset) => {
+  const resultBuffers = [];
+
+  const unzip = zlib.createGunzip()
+   .on('error', (err) => {
+     assert.ifError(err);
+   })
+   .on('data', (data) => resultBuffers.push(data))
+   .on('finish', common.mustCall(() => {
+     assert.strictEqual(Buffer.concat(resultBuffers).toString(), 'abcdef',
+      `result should match original input (offset = ${offset})`);
+   }));
+
+  // first write: write "abc" + the first bytes of "def"
+  unzip.write(Buffer.concat([
+    abcEncoded, defEncoded.slice(0, offset)
+  ]));
+
+  // write remaining bytes of "def"
+  unzip.end(defEncoded.slice(offset));
+});
--- a/test/parallel/test-zlib-from-gzip-with-trailing-garbage.js
+++ b/test/parallel/test-zlib-from-gzip-with-trailing-garbage.js
@ -42,9 +42,8 @@ data = Buffer.concat([
  Buffer([0x1f, 0x8b, 0xff, 0xff])
 ]);

-assert.equal(zlib.gunzipSync(data).toString(), 'abcdef');
+assert.throws(() => zlib.gunzipSync(data));

 zlib.gunzip(data, common.mustCall((err, result) => {
-  assert.ifError(err);
-  assert.equal(result, 'abcdef', 'result should match original string');
+  assert(err);
 }));