When comparing large commit ranges (e.g., 250,000+ commits), range-diff attempts to allocate an n×n cost matrix that can exhaust available memory. For example, with 256,784 commits (n = 513,568), the matrix would require approximately 256GB of memory (513,568² × 4 bytes), causing either immediate segmentation faults due to integer overflow or system hangs. Add a memory limit check in get_correspondences() before allocating the cost matrix. This check uses the total size in bytes (n² × sizeof(int)) and compares it against a configurable maximum, preventing both excessive memory usage and integer overflow issues. The limit is configurable via a new --max-memory option that accepts human-readable sizes (e.g., "1G", "500M"). The default is 4GB for 64 bit systems and 2GB for 32 bit systems. This allows comparing ranges of approximately 32,000 (16,000) commits - generous for real-world use cases while preventing impractical operations. When the limit is exceeded, range-diff now displays a clear error message showing both the requested memory size and the maximum allowed, formatted in human-readable units for better user experience. Example usage: git range-diff --max-memory=1G branch1...branch2 git range-diff --max-memory=500M base..topic1 base..topic2 This approach was chosen over alternatives: - Pre-counting commits: Would require spawning additional git processes and reading all commits twice - Limiting by commit count: Less precise than actual memory usage - Streaming approach: Would require significant refactoring of the current algorithm This issue was previously discussed in: https://lore.kernel.org/git/RFC-cover-v2-0.5-00000000000-20211210T122901Z-avarab@gmail.com/ Acked-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Paulo Casaretto <pcasaretto@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
43 lines
1.1 KiB
C
43 lines
1.1 KiB
C
#ifndef RANGE_DIFF_H
|
|
#define RANGE_DIFF_H
|
|
|
|
#include "diff.h"
|
|
#include "strvec.h"
|
|
|
|
#define RANGE_DIFF_CREATION_FACTOR_DEFAULT 60
|
|
#define RANGE_DIFF_MAX_MEMORY_DEFAULT \
|
|
(sizeof(void*) >= 8 ? \
|
|
((size_t)(1024L * 1024L) * (size_t)(4L * 1024L)) : /* 4GB on 64-bit */ \
|
|
((size_t)(1024L * 1024L) * (size_t)(2L * 1024L))) /* 2GB on 32-bit */
|
|
|
|
/*
|
|
* A much higher value than the default, when we KNOW we are comparing
|
|
* the same series (e.g., used when format-patch calls range-diff).
|
|
*/
|
|
#define CREATION_FACTOR_FOR_THE_SAME_SERIES 999
|
|
|
|
struct range_diff_options {
|
|
int creation_factor;
|
|
unsigned dual_color:1;
|
|
unsigned left_only:1, right_only:1;
|
|
unsigned include_merges:1;
|
|
size_t max_memory;
|
|
const struct diff_options *diffopt; /* may be NULL */
|
|
const struct strvec *other_arg; /* may be NULL */
|
|
};
|
|
|
|
/*
|
|
* Compare series of commits in `range1` and `range2`, and emit to the
|
|
* standard output.
|
|
*/
|
|
int show_range_diff(const char *range1, const char *range2,
|
|
struct range_diff_options *opts);
|
|
|
|
/*
|
|
* Determine whether the given argument is usable as a range argument of `git
|
|
* range-diff`, e.g. A..B.
|
|
*/
|
|
int is_range_diff_range(const char *arg);
|
|
|
|
#endif
|