Browse Source

zlib: fix gzip member head/buffer boundary issue

Make sure that, even if an `inflate()` call only sees the first
few bytes of a following gzip member, all members are decompressed
and part of the full output.

This change also modifies behaviour for trailing garbage:
If there is trailing garbage which happens to start with the
gzip magic bytes, it is no longer discarded but rather throws
an error, since we cannot reliably tell random garbage from
a valid gzip member anyway and have to try and decompress it.
(Null byte padding is not affected, since it has been pointed
out at various occasions that such padding is normal and
discarded by `gzip(1)`, too.)

Adds tests for the special case that the first `inflate()` call
receives only the first few bytes of a second gzip member but
not the whole header (or even just the magic bytes).

PR-URL: https://github.com/nodejs/node/pull/5883
Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl>
Reviewed-By: James M Snell <jasnell@gmail.com>
process-exit-stdio-flushing
Anna Henningsen 9 years ago
committed by Ben Noordhuis
parent
commit
54a5287e3e
  1. 16
      src/node_zlib.cc
  2. 30
      test/parallel/test-zlib-from-concatenated-gzip.js
  3. 5
      test/parallel/test-zlib-from-gzip-with-trailing-garbage.js

16
src/node_zlib.cc

@ -43,7 +43,6 @@ enum node_zlib_mode {
#define GZIP_HEADER_ID1 0x1f
#define GZIP_HEADER_ID2 0x8b
#define GZIP_MIN_HEADER_SIZE 10
void InitZlib(v8::Local<v8::Object> target);
@ -257,17 +256,16 @@ class ZCtx : public AsyncWrap {
ctx->err_ = Z_NEED_DICT;
}
}
while (ctx->strm_.avail_in >= GZIP_MIN_HEADER_SIZE &&
while (ctx->strm_.avail_in > 0 &&
ctx->mode_ == GUNZIP &&
ctx->err_ == Z_STREAM_END) {
ctx->err_ == Z_STREAM_END &&
ctx->strm_.next_in[0] != 0x00) {
// Bytes remain in input buffer. Perhaps this is another compressed
// member in the same archive, or just trailing garbage.
// Check the header to find out.
if (ctx->strm_.next_in[0] != GZIP_HEADER_ID1 ||
ctx->strm_.next_in[1] != GZIP_HEADER_ID2) {
// Not a valid gzip member
break;
}
// Trailing zero bytes are okay, though, since they are frequently
// used for padding.
Reset(ctx);
ctx->err_ = inflate(&ctx->strm_, ctx->flush_);
}

30
test/parallel/test-zlib-from-concatenated-gzip.js

@ -7,9 +7,12 @@ const zlib = require('zlib');
const path = require('path');
const fs = require('fs');
const abcEncoded = zlib.gzipSync('abc');
const defEncoded = zlib.gzipSync('def');
const data = Buffer.concat([
zlib.gzipSync('abc'),
zlib.gzipSync('def')
abcEncoded,
defEncoded
]);
assert.equal(zlib.gunzipSync(data).toString(), 'abcdef');
@ -38,3 +41,26 @@ fs.createReadStream(pmmFileGz)
assert.deepStrictEqual(Buffer.concat(pmmResultBuffers), pmmExpected,
'result should match original random garbage');
}));
// test that the next gzip member can wrap around the input buffer boundary
[0, 1, 2, 3, 4, defEncoded.length].forEach((offset) => {
const resultBuffers = [];
const unzip = zlib.createGunzip()
.on('error', (err) => {
assert.ifError(err);
})
.on('data', (data) => resultBuffers.push(data))
.on('finish', common.mustCall(() => {
assert.strictEqual(Buffer.concat(resultBuffers).toString(), 'abcdef',
`result should match original input (offset = ${offset})`);
}));
// first write: write "abc" + the first bytes of "def"
unzip.write(Buffer.concat([
abcEncoded, defEncoded.slice(0, offset)
]));
// write remaining bytes of "def"
unzip.end(defEncoded.slice(offset));
});

5
test/parallel/test-zlib-from-gzip-with-trailing-garbage.js

@ -42,9 +42,8 @@ data = Buffer.concat([
Buffer([0x1f, 0x8b, 0xff, 0xff])
]);
assert.equal(zlib.gunzipSync(data).toString(), 'abcdef');
assert.throws(() => zlib.gunzipSync(data));
zlib.gunzip(data, common.mustCall((err, result) => {
assert.ifError(err);
assert.equal(result, 'abcdef', 'result should match original string');
assert(err);
}));

Loading…
Cancel
Save