This fixes a case where malformed object input can cause us to hit a
BUG() call in the git-zlib.c code.
The zlib format allows the use of preset dictionaries to reduce the size
of deflated data. The checksum of the dictionary is computed by the
deflate code and goes into the stream. On the inflating side, zlib sees
the dictionary checksum and returns Z_NEED_DICT, asking the caller to
provide the dictionary data via inflateSetDictionary().
This should never happen in Git, because we never provide a dictionary
for deflating (and if we get a stream that mentions a dictionary, we
have no idea how to provide it). So normally Z_NEED_DICT is a hard error
for us. But something interesting happens if we _do_ happen to see it
(e.g., because of a corrupt or malicious input).
In git_inflate() as we loop over calls to zlib's inflate(), we translate
between our large-integer git_zstream values and zlib's native z_stream
types, copying in and out with zlib_pre_call() and zlib_post_call(). In
zlib_post_call() we have a few sanity checks, including one that checks
that the number of bytes consumed by zlib (as measured by it moving the
"next_in" pointer) is equal to the movement of its "total_in" count.
But these do not correspond when we see Z_NEED_DICT! Zlib consumes the
bytes from the input buffer but it does not increment total_in. And so
we hit the BUG("total_in mismatch") call.
There are a few options here:
- We could ditch that BUG() check. It is making too many assumptions
about how zlib updates these values. But it does have value in most
cases as a sanity check on the values we're copying.
- We could skip the zlib_post_call() entirely when we see Z_NEED_DICT.
We know that it's hard error for us, so we should just send the
status up the stack and let the caller bail.
The downside is that if we ever did want to support dictionaries,
we couldn't (the git_zstream will be out of sync, since we never
copied its values back from the z_stream).
- We could continue to call zlib_post_call(), but skip just that BUG()
check if the status is Z_NEED_DICT. This keeps git_inflate() as a
thin wrapper around inflate(), and would let us later support
dictionaries for some calls if we wanted to.
This patch uses the third approach. It seems like the least-surprising
thing to keep git_inflate() a close to inflate() as possible. And while
it makes the diff a bit larger (since we have to pass the status down to
to the zlib_post_call() function), it's a static local function, and
every caller by definition will have just made a zlib call (and so will
have a status integer).
Co-authored-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
280 lines
6.4 KiB
C
280 lines
6.4 KiB
C
/*
|
|
* zlib wrappers to make sure we don't silently miss errors
|
|
* at init time.
|
|
*/
|
|
#include "git-compat-util.h"
|
|
#include "git-zlib.h"
|
|
|
|
static const char *zerr_to_string(int status)
|
|
{
|
|
switch (status) {
|
|
case Z_MEM_ERROR:
|
|
return "out of memory";
|
|
case Z_VERSION_ERROR:
|
|
return "wrong version";
|
|
case Z_NEED_DICT:
|
|
return "needs dictionary";
|
|
case Z_DATA_ERROR:
|
|
return "data stream error";
|
|
case Z_STREAM_ERROR:
|
|
return "stream consistency error";
|
|
default:
|
|
return "unknown error";
|
|
}
|
|
}
|
|
|
|
/*
|
|
* avail_in and avail_out in zlib are counted in uInt, which typically
|
|
* limits the size of the buffer we can use to 4GB when interacting
|
|
* with zlib in a single call to inflate/deflate.
|
|
*/
|
|
/* #define ZLIB_BUF_MAX ((uInt)-1) */
|
|
#define ZLIB_BUF_MAX ((uInt) 1024 * 1024 * 1024) /* 1GB */
|
|
static inline uInt zlib_buf_cap(unsigned long len)
|
|
{
|
|
return (ZLIB_BUF_MAX < len) ? ZLIB_BUF_MAX : len;
|
|
}
|
|
|
|
static void zlib_pre_call(git_zstream *s)
|
|
{
|
|
s->z.next_in = s->next_in;
|
|
s->z.next_out = s->next_out;
|
|
s->z.total_in = s->total_in;
|
|
s->z.total_out = s->total_out;
|
|
s->z.avail_in = zlib_buf_cap(s->avail_in);
|
|
s->z.avail_out = zlib_buf_cap(s->avail_out);
|
|
}
|
|
|
|
static void zlib_post_call(git_zstream *s, int status)
|
|
{
|
|
unsigned long bytes_consumed;
|
|
unsigned long bytes_produced;
|
|
|
|
bytes_consumed = s->z.next_in - s->next_in;
|
|
bytes_produced = s->z.next_out - s->next_out;
|
|
if (s->z.total_out != s->total_out + bytes_produced)
|
|
BUG("total_out mismatch");
|
|
/*
|
|
* zlib does not update total_in when it returns Z_NEED_DICT,
|
|
* causing a mismatch here. Skip the sanity check in that case.
|
|
*/
|
|
if (status != Z_NEED_DICT &&
|
|
s->z.total_in != s->total_in + bytes_consumed)
|
|
BUG("total_in mismatch");
|
|
|
|
s->total_out = s->z.total_out;
|
|
s->total_in = s->z.total_in;
|
|
s->next_in = s->z.next_in;
|
|
s->next_out = s->z.next_out;
|
|
s->avail_in -= bytes_consumed;
|
|
s->avail_out -= bytes_produced;
|
|
}
|
|
|
|
void git_inflate_init(git_zstream *strm)
|
|
{
|
|
int status;
|
|
|
|
zlib_pre_call(strm);
|
|
status = inflateInit(&strm->z);
|
|
zlib_post_call(strm, status);
|
|
if (status == Z_OK)
|
|
return;
|
|
die("inflateInit: %s (%s)", zerr_to_string(status),
|
|
strm->z.msg ? strm->z.msg : "no message");
|
|
}
|
|
|
|
void git_inflate_init_gzip_only(git_zstream *strm)
|
|
{
|
|
/*
|
|
* Use default 15 bits, +16 is to accept only gzip and to
|
|
* yield Z_DATA_ERROR when fed zlib format.
|
|
*/
|
|
const int windowBits = 15 + 16;
|
|
int status;
|
|
|
|
zlib_pre_call(strm);
|
|
status = inflateInit2(&strm->z, windowBits);
|
|
zlib_post_call(strm, status);
|
|
if (status == Z_OK)
|
|
return;
|
|
die("inflateInit2: %s (%s)", zerr_to_string(status),
|
|
strm->z.msg ? strm->z.msg : "no message");
|
|
}
|
|
|
|
void git_inflate_end(git_zstream *strm)
|
|
{
|
|
int status;
|
|
|
|
zlib_pre_call(strm);
|
|
status = inflateEnd(&strm->z);
|
|
zlib_post_call(strm, status);
|
|
if (status == Z_OK)
|
|
return;
|
|
error("inflateEnd: %s (%s)", zerr_to_string(status),
|
|
strm->z.msg ? strm->z.msg : "no message");
|
|
}
|
|
|
|
int git_inflate(git_zstream *strm, int flush)
|
|
{
|
|
int status;
|
|
|
|
for (;;) {
|
|
zlib_pre_call(strm);
|
|
/* Never say Z_FINISH unless we are feeding everything */
|
|
status = inflate(&strm->z,
|
|
(strm->z.avail_in != strm->avail_in)
|
|
? 0 : flush);
|
|
if (status == Z_MEM_ERROR)
|
|
die("inflate: out of memory");
|
|
zlib_post_call(strm, status);
|
|
|
|
/*
|
|
* Let zlib work another round, while we can still
|
|
* make progress.
|
|
*/
|
|
if ((strm->avail_out && !strm->z.avail_out) &&
|
|
(status == Z_OK || status == Z_BUF_ERROR))
|
|
continue;
|
|
break;
|
|
}
|
|
|
|
switch (status) {
|
|
/* Z_BUF_ERROR: normal, needs more space in the output buffer */
|
|
case Z_BUF_ERROR:
|
|
case Z_OK:
|
|
case Z_STREAM_END:
|
|
return status;
|
|
default:
|
|
break;
|
|
}
|
|
error("inflate: %s (%s)", zerr_to_string(status),
|
|
strm->z.msg ? strm->z.msg : "no message");
|
|
return status;
|
|
}
|
|
|
|
#if defined(NO_DEFLATE_BOUND) || ZLIB_VERNUM < 0x1200
|
|
#define deflateBound(c,s) ((s) + (((s) + 7) >> 3) + (((s) + 63) >> 6) + 11)
|
|
#endif
|
|
|
|
unsigned long git_deflate_bound(git_zstream *strm, unsigned long size)
|
|
{
|
|
return deflateBound(&strm->z, size);
|
|
}
|
|
|
|
void git_deflate_init(git_zstream *strm, int level)
|
|
{
|
|
int status;
|
|
|
|
memset(strm, 0, sizeof(*strm));
|
|
zlib_pre_call(strm);
|
|
status = deflateInit(&strm->z, level);
|
|
zlib_post_call(strm, status);
|
|
if (status == Z_OK)
|
|
return;
|
|
die("deflateInit: %s (%s)", zerr_to_string(status),
|
|
strm->z.msg ? strm->z.msg : "no message");
|
|
}
|
|
|
|
static void do_git_deflate_init(git_zstream *strm, int level, int windowBits)
|
|
{
|
|
int status;
|
|
|
|
memset(strm, 0, sizeof(*strm));
|
|
zlib_pre_call(strm);
|
|
status = deflateInit2(&strm->z, level,
|
|
Z_DEFLATED, windowBits,
|
|
8, Z_DEFAULT_STRATEGY);
|
|
zlib_post_call(strm, status);
|
|
if (status == Z_OK)
|
|
return;
|
|
die("deflateInit2: %s (%s)", zerr_to_string(status),
|
|
strm->z.msg ? strm->z.msg : "no message");
|
|
}
|
|
|
|
void git_deflate_init_gzip(git_zstream *strm, int level)
|
|
{
|
|
/*
|
|
* Use default 15 bits, +16 is to generate gzip header/trailer
|
|
* instead of the zlib wrapper.
|
|
*/
|
|
do_git_deflate_init(strm, level, 15 + 16);
|
|
}
|
|
|
|
void git_deflate_init_raw(git_zstream *strm, int level)
|
|
{
|
|
/*
|
|
* Use default 15 bits, negate the value to get raw compressed
|
|
* data without zlib header and trailer.
|
|
*/
|
|
do_git_deflate_init(strm, level, -15);
|
|
}
|
|
|
|
int git_deflate_abort(git_zstream *strm)
|
|
{
|
|
int status;
|
|
|
|
zlib_pre_call(strm);
|
|
status = deflateEnd(&strm->z);
|
|
zlib_post_call(strm, status);
|
|
return status;
|
|
}
|
|
|
|
void git_deflate_end(git_zstream *strm)
|
|
{
|
|
int status = git_deflate_abort(strm);
|
|
|
|
if (status == Z_OK)
|
|
return;
|
|
error("deflateEnd: %s (%s)", zerr_to_string(status),
|
|
strm->z.msg ? strm->z.msg : "no message");
|
|
}
|
|
|
|
int git_deflate_end_gently(git_zstream *strm)
|
|
{
|
|
int status;
|
|
|
|
zlib_pre_call(strm);
|
|
status = deflateEnd(&strm->z);
|
|
zlib_post_call(strm, status);
|
|
return status;
|
|
}
|
|
|
|
int git_deflate(git_zstream *strm, int flush)
|
|
{
|
|
int status;
|
|
|
|
for (;;) {
|
|
zlib_pre_call(strm);
|
|
|
|
/* Never say Z_FINISH unless we are feeding everything */
|
|
status = deflate(&strm->z,
|
|
(strm->z.avail_in != strm->avail_in)
|
|
? 0 : flush);
|
|
if (status == Z_MEM_ERROR)
|
|
die("deflate: out of memory");
|
|
zlib_post_call(strm, status);
|
|
|
|
/*
|
|
* Let zlib work another round, while we can still
|
|
* make progress.
|
|
*/
|
|
if ((strm->avail_out && !strm->z.avail_out) &&
|
|
(status == Z_OK || status == Z_BUF_ERROR))
|
|
continue;
|
|
break;
|
|
}
|
|
|
|
switch (status) {
|
|
/* Z_BUF_ERROR: normal, needs more space in the output buffer */
|
|
case Z_BUF_ERROR:
|
|
case Z_OK:
|
|
case Z_STREAM_END:
|
|
return status;
|
|
default:
|
|
break;
|
|
}
|
|
error("deflate: %s (%s)", zerr_to_string(status),
|
|
strm->z.msg ? strm->z.msg : "no message");
|
|
return status;
|
|
}
|