From 2d7e31614dbc61b4fd5a594649c12d65398a815e Mon Sep 17 00:00:00 2001 From: Anna Henningsen Date: Thu, 24 Mar 2016 02:13:09 +0100 Subject: [PATCH] zlib: detect gzip files when using unzip* Detect whether a gzip file is being passed to `unzip*` by testing the first bytes for the gzip magic bytes, and setting the decompression mode to `GUNZIP` or `INFLATE` according to the result. This enables gzip-only features like multi-member support to be used together with the `unzip*` autodetection support and thereby makes `gunzip*` and `unzip*` return identical results for gzip input again. Add a simple test for checking that features specific to `zlib.gunzip`, notably support for multiple members, also work when using `zlib.unzip`. PR-URL: https://github.com/nodejs/node/pull/5884 Reviewed-By: Ben Noordhuis Reviewed-By: James M Snell --- src/node_zlib.cc | 50 ++++++++++++++++++- .../test-zlib-from-concatenated-gzip.js | 14 ++++++ .../test-zlib-unzip-one-byte-chunks.js | 28 +++++++++++ 3 files changed, 91 insertions(+), 1 deletion(-) create mode 100644 test/parallel/test-zlib-unzip-one-byte-chunks.js diff --git a/src/node_zlib.cc b/src/node_zlib.cc index 4bf5cc76ba..aee1cbae95 100644 --- a/src/node_zlib.cc +++ b/src/node_zlib.cc @@ -68,7 +68,8 @@ class ZCtx : public AsyncWrap { windowBits_(0), write_in_progress_(false), pending_close_(false), - refs_(0) { + refs_(0), + gzip_id_bytes_read_(0) { MakeWeak(this); } @@ -225,6 +226,8 @@ class ZCtx : public AsyncWrap { static void Process(uv_work_t* work_req) { ZCtx *ctx = ContainerOf(&ZCtx::work_req_, work_req); + const Bytef* next_expected_header_byte = nullptr; + // If the avail_out is left at 0, then it means that it ran out // of room. If there was avail_out left over, then it means // that all of the input was consumed. @@ -235,6 +238,50 @@ class ZCtx : public AsyncWrap { ctx->err_ = deflate(&ctx->strm_, ctx->flush_); break; case UNZIP: + if (ctx->strm_.avail_in > 0) { + next_expected_header_byte = ctx->strm_.next_in; + } + + switch (ctx->gzip_id_bytes_read_) { + case 0: + if (next_expected_header_byte == nullptr) { + break; + } + + if (*next_expected_header_byte == GZIP_HEADER_ID1) { + ctx->gzip_id_bytes_read_ = 1; + next_expected_header_byte++; + + if (ctx->strm_.avail_in == 1) { + // The only available byte was already read. + break; + } + } else { + ctx->mode_ = INFLATE; + break; + } + + // fallthrough + case 1: + if (next_expected_header_byte == nullptr) { + break; + } + + if (*next_expected_header_byte == GZIP_HEADER_ID2) { + ctx->gzip_id_bytes_read_ = 2; + ctx->mode_ = GUNZIP; + } else { + // There is no actual difference between INFLATE and INFLATERAW + // (after initialization). + ctx->mode_ = INFLATE; + } + + break; + default: + CHECK(0 && "invalid number of gzip magic number bytes read"); + } + + // fallthrough case INFLATE: case GUNZIP: case INFLATERAW: @@ -591,6 +638,7 @@ class ZCtx : public AsyncWrap { bool write_in_progress_; bool pending_close_; unsigned int refs_; + unsigned int gzip_id_bytes_read_; }; diff --git a/test/parallel/test-zlib-from-concatenated-gzip.js b/test/parallel/test-zlib-from-concatenated-gzip.js index d5bf1e0e00..b5007820c8 100644 --- a/test/parallel/test-zlib-from-concatenated-gzip.js +++ b/test/parallel/test-zlib-from-concatenated-gzip.js @@ -22,6 +22,20 @@ zlib.gunzip(data, common.mustCall((err, result) => { assert.equal(result, 'abcdef', 'result should match original string'); })); +zlib.unzip(data, common.mustCall((err, result) => { + assert.ifError(err); + assert.equal(result, 'abcdef', 'result should match original string'); +})); + +// Multi-member support does not apply to zlib inflate/deflate. +zlib.unzip(Buffer.concat([ + zlib.deflateSync('abc'), + zlib.deflateSync('def') +]), common.mustCall((err, result) => { + assert.ifError(err); + assert.equal(result, 'abc', 'result should match contents of first "member"'); +})); + // files that have the "right" magic bytes for starting a new gzip member // in the middle of themselves, even if they are part of a single // regularly compressed member diff --git a/test/parallel/test-zlib-unzip-one-byte-chunks.js b/test/parallel/test-zlib-unzip-one-byte-chunks.js new file mode 100644 index 0000000000..f1b1c0f508 --- /dev/null +++ b/test/parallel/test-zlib-unzip-one-byte-chunks.js @@ -0,0 +1,28 @@ +'use strict'; +const common = require('../common'); +const assert = require('assert'); +const zlib = require('zlib'); + +const data = Buffer.concat([ + zlib.gzipSync('abc'), + zlib.gzipSync('def') +]); + +const resultBuffers = []; + +const unzip = zlib.createUnzip() + .on('error', (err) => { + assert.ifError(err); + }) + .on('data', (data) => resultBuffers.push(data)) + .on('finish', common.mustCall(() => { + assert.deepStrictEqual(Buffer.concat(resultBuffers).toString(), 'abcdef', + 'result should match original string'); + })); + +for (let i = 0; i < data.length; i++) { + // Write each single byte individually. + unzip.write(Buffer.from([data[i]])); +} + +unzip.end();