Files
git/builtin/diff.c
Patrick Steinhardt 7d70b29c4f hash: stop depending on the_repository in null_oid()
The `null_oid()` function returns the object ID that only consists of
zeroes. Naturally, this ID also depends on the hash algorithm used, as
the number of zeroes is different between SHA1 and SHA256. Consequently,
the function returns the hash-algorithm-specific null object ID.

This is currently done by depending on `the_hash_algo`, which implicitly
makes us depend on `the_repository`. Refactor the function to instead
pass in the hash algorithm for which we want to retrieve the null object
ID. Adapt callsites accordingly by passing in `the_repository`, thus
bubbling up the dependency on that global variable by one layer.

There are a couple of trivial exceptions for subsystems that already got
rid of `the_repository`. These subsystems instead use the repository
that is available via the calling context:

  - "builtin/grep.c"
  - "grep.c"
  - "refs/debug.c"

There are also two non-trivial exceptions:

  - "diff-no-index.c": Here we know that we may not have a repository
    initialized at all, so we cannot rely on `the_repository`. Instead,
    we adapt `diff_no_index()` to get a `struct git_hash_algo` as
    parameter. The only caller is located in "builtin/diff.c", where we
    know to call `repo_set_hash_algo()` in case we're running outside of
    a Git repository. Consequently, it is fine to continue passing
    `the_repository->hash_algo` even in this case.

  - "builtin/ls-files.c": There is an in-flight patch series that drops
    `USE_THE_REPOSITORY_VARIABLE` in this file, which causes a semantic
    conflict because we use `null_oid()` in `show_submodule()`. The
    value is passed to `repo_submodule_init()`, which may use the object
    ID to resolve a tree-ish in the superproject from which we want to
    read the submodule config. As such, the object ID should refer to an
    object in the superproject, and consequently we need to use its hash
    algorithm.

    This means that we could in theory just not bother about this edge
    case at all and just use `the_repository` in "diff-no-index.c". But
    doing so would feel misdesigned.

Remove the `USE_THE_REPOSITORY_VARIABLE` preprocessor define in
"hash.c".

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2025-03-10 13:16:20 -07:00

637 lines
17 KiB
C

/*
* Builtin "git diff"
*
* Copyright (c) 2006 Junio C Hamano
*/
#define USE_THE_REPOSITORY_VARIABLE
#define DISABLE_SIGN_COMPARE_WARNINGS
#include "builtin.h"
#include "config.h"
#include "ewah/ewok.h"
#include "lockfile.h"
#include "color.h"
#include "commit.h"
#include "gettext.h"
#include "tag.h"
#include "diff.h"
#include "diff-merges.h"
#include "diffcore.h"
#include "preload-index.h"
#include "read-cache-ll.h"
#include "revision.h"
#include "log-tree.h"
#include "setup.h"
#include "oid-array.h"
#include "tree.h"
#define DIFF_NO_INDEX_EXPLICIT 1
#define DIFF_NO_INDEX_IMPLICIT 2
static const char builtin_diff_usage[] =
"git diff [<options>] [<commit>] [--] [<path>...]\n"
" or: git diff [<options>] --cached [--merge-base] [<commit>] [--] [<path>...]\n"
" or: git diff [<options>] [--merge-base] <commit> [<commit>...] <commit> [--] [<path>...]\n"
" or: git diff [<options>] <commit>...<commit> [--] [<path>...]\n"
" or: git diff [<options>] <blob> <blob>\n"
" or: git diff [<options>] --no-index [--] <path> <path>"
"\n"
COMMON_DIFF_OPTIONS_HELP;
static const char *blob_path(struct object_array_entry *entry)
{
return entry->path ? entry->path : entry->name;
}
static void stuff_change(struct diff_options *opt,
unsigned old_mode, unsigned new_mode,
const struct object_id *old_oid,
const struct object_id *new_oid,
int old_oid_valid,
int new_oid_valid,
const char *old_path,
const char *new_path)
{
struct diff_filespec *one, *two;
if (!is_null_oid(old_oid) && !is_null_oid(new_oid) &&
oideq(old_oid, new_oid) && (old_mode == new_mode))
return;
if (opt->flags.reverse_diff) {
SWAP(old_mode, new_mode);
SWAP(old_oid, new_oid);
SWAP(old_path, new_path);
}
if (opt->prefix &&
(strncmp(old_path, opt->prefix, opt->prefix_length) ||
strncmp(new_path, opt->prefix, opt->prefix_length)))
return;
one = alloc_filespec(old_path);
two = alloc_filespec(new_path);
fill_filespec(one, old_oid, old_oid_valid, old_mode);
fill_filespec(two, new_oid, new_oid_valid, new_mode);
diff_queue(&diff_queued_diff, one, two);
}
static void builtin_diff_b_f(struct rev_info *revs,
int argc, const char **argv UNUSED,
struct object_array_entry **blob)
{
/* Blob vs file in the working tree*/
struct stat st;
const char *path;
if (argc > 1)
usage(builtin_diff_usage);
GUARD_PATHSPEC(&revs->prune_data, PATHSPEC_FROMTOP | PATHSPEC_LITERAL);
path = revs->prune_data.items[0].match;
if (lstat(path, &st))
die_errno(_("failed to stat '%s'"), path);
if (!(S_ISREG(st.st_mode) || S_ISLNK(st.st_mode)))
die(_("'%s': not a regular file or symlink"), path);
diff_set_mnemonic_prefix(&revs->diffopt, "o/", "w/");
if (blob[0]->mode == S_IFINVALID)
blob[0]->mode = canon_mode(st.st_mode);
stuff_change(&revs->diffopt,
blob[0]->mode, canon_mode(st.st_mode),
&blob[0]->item->oid, null_oid(the_hash_algo),
1, 0,
blob[0]->path ? blob[0]->path : path,
path);
diffcore_std(&revs->diffopt);
diff_flush(&revs->diffopt);
}
static void builtin_diff_blobs(struct rev_info *revs,
int argc, const char **argv UNUSED,
struct object_array_entry **blob)
{
const unsigned mode = canon_mode(S_IFREG | 0644);
if (argc > 1)
usage(builtin_diff_usage);
if (blob[0]->mode == S_IFINVALID)
blob[0]->mode = mode;
if (blob[1]->mode == S_IFINVALID)
blob[1]->mode = mode;
stuff_change(&revs->diffopt,
blob[0]->mode, blob[1]->mode,
&blob[0]->item->oid, &blob[1]->item->oid,
1, 1,
blob_path(blob[0]), blob_path(blob[1]));
diffcore_std(&revs->diffopt);
diff_flush(&revs->diffopt);
}
static void builtin_diff_index(struct rev_info *revs,
int argc, const char **argv)
{
unsigned int option = 0;
while (1 < argc) {
const char *arg = argv[1];
if (!strcmp(arg, "--cached") || !strcmp(arg, "--staged"))
option |= DIFF_INDEX_CACHED;
else if (!strcmp(arg, "--merge-base"))
option |= DIFF_INDEX_MERGE_BASE;
else
usage(builtin_diff_usage);
argv++; argc--;
}
/*
* Make sure there is one revision (i.e. pending object),
* and there is no revision filtering parameters.
*/
if (revs->pending.nr != 1 ||
revs->max_count != -1 || revs->min_age != -1 ||
revs->max_age != -1)
usage(builtin_diff_usage);
if (!(option & DIFF_INDEX_CACHED)) {
setup_work_tree();
if (repo_read_index_preload(the_repository,
&revs->diffopt.pathspec, 0) < 0) {
die_errno("repo_read_index_preload");
}
} else if (repo_read_index(the_repository) < 0) {
die_errno("repo_read_cache");
}
run_diff_index(revs, option);
}
static void builtin_diff_tree(struct rev_info *revs,
int argc, const char **argv,
struct object_array_entry *ent0,
struct object_array_entry *ent1)
{
const struct object_id *(oid[2]);
struct object_id mb_oid;
int merge_base = 0;
while (1 < argc) {
const char *arg = argv[1];
if (!strcmp(arg, "--merge-base"))
merge_base = 1;
else
usage(builtin_diff_usage);
argv++; argc--;
}
if (merge_base) {
diff_get_merge_base(revs, &mb_oid);
oid[0] = &mb_oid;
oid[1] = &revs->pending.objects[1].item->oid;
} else {
int swap = 0;
/*
* We saw two trees, ent0 and ent1. If ent1 is uninteresting,
* swap them.
*/
if (ent1->item->flags & UNINTERESTING)
swap = 1;
oid[swap] = &ent0->item->oid;
oid[1 - swap] = &ent1->item->oid;
}
diff_tree_oid(oid[0], oid[1], "", &revs->diffopt);
log_tree_diff_flush(revs);
}
static void builtin_diff_combined(struct rev_info *revs,
int argc, const char **argv UNUSED,
struct object_array_entry *ent,
int ents, int first_non_parent)
{
struct oid_array parents = OID_ARRAY_INIT;
int i;
if (argc > 1)
usage(builtin_diff_usage);
if (first_non_parent < 0)
die(_("no merge given, only parents."));
if (first_non_parent >= ents)
BUG("first_non_parent out of range: %d", first_non_parent);
diff_merges_set_dense_combined_if_unset(revs);
for (i = 0; i < ents; i++) {
if (i != first_non_parent)
oid_array_append(&parents, &ent[i].item->oid);
}
diff_tree_combined(&ent[first_non_parent].item->oid, &parents, revs);
oid_array_clear(&parents);
}
static void refresh_index_quietly(void)
{
struct lock_file lock_file = LOCK_INIT;
int fd;
fd = repo_hold_locked_index(the_repository, &lock_file, 0);
if (fd < 0)
return;
discard_index(the_repository->index);
repo_read_index(the_repository);
refresh_index(the_repository->index, REFRESH_QUIET|REFRESH_UNMERGED, NULL, NULL,
NULL);
repo_update_index_if_able(the_repository, &lock_file);
}
static void builtin_diff_files(struct rev_info *revs, int argc, const char **argv)
{
unsigned int options = 0;
while (1 < argc && argv[1][0] == '-') {
if (!strcmp(argv[1], "--base"))
revs->max_count = 1;
else if (!strcmp(argv[1], "--ours"))
revs->max_count = 2;
else if (!strcmp(argv[1], "--theirs"))
revs->max_count = 3;
else if (!strcmp(argv[1], "-q"))
options |= DIFF_SILENT_ON_REMOVED;
else if (!strcmp(argv[1], "-h"))
usage(builtin_diff_usage);
else {
error(_("invalid option: %s"), argv[1]);
usage(builtin_diff_usage);
}
argv++; argc--;
}
/*
* "diff --base" should not combine merges because it was not
* asked to. "diff -c" should not densify (if the user wants
* dense one, --cc can be explicitly asked for, or just rely
* on the default).
*/
if (revs->max_count == -1 &&
(revs->diffopt.output_format & DIFF_FORMAT_PATCH))
diff_merges_set_dense_combined_if_unset(revs);
setup_work_tree();
if (repo_read_index_preload(the_repository, &revs->diffopt.pathspec,
0) < 0) {
die_errno("repo_read_index_preload");
}
run_diff_files(revs, options);
}
struct symdiff {
struct bitmap *skip;
int warn;
const char *base, *left, *right;
};
/*
* Check for symmetric-difference arguments, and if present, arrange
* everything we need to know to handle them correctly. As a bonus,
* weed out all bogus range-based revision specifications, e.g.,
* "git diff A..B C..D" or "git diff A..B C" get rejected.
*
* For an actual symmetric diff, *symdiff is set this way:
*
* - its skip is non-NULL and marks *all* rev->pending.objects[i]
* indices that the caller should ignore (extra merge bases, of
* which there might be many, and A in A...B). Note that the
* chosen merge base and right side are NOT marked.
* - warn is set if there are multiple merge bases.
* - base, left, and right point to the names to use in a
* warning about multiple merge bases.
*
* If there is no symmetric diff argument, sym->skip is NULL and
* sym->warn is cleared. The remaining fields are not set.
*/
static void symdiff_prepare(struct rev_info *rev, struct symdiff *sym)
{
int i, is_symdiff = 0, basecount = 0, othercount = 0;
int lpos = -1, rpos = -1, basepos = -1;
struct bitmap *map = NULL;
/*
* Use the whence fields to find merge bases and left and
* right parts of symmetric difference, so that we do not
* depend on the order that revisions are parsed. If there
* are any revs that aren't from these sources, we have a
* "git diff C A...B" or "git diff A...B C" case. Or we
* could even get "git diff A...B C...E", for instance.
*
* If we don't have just one merge base, we pick one
* at random.
*
* NB: REV_CMD_LEFT, REV_CMD_RIGHT are also used for A..B,
* so we must check for SYMMETRIC_LEFT too. The two arrays
* rev->pending.objects and rev->cmdline.rev are parallel.
*/
for (i = 0; i < rev->cmdline.nr; i++) {
struct object *obj = rev->pending.objects[i].item;
switch (rev->cmdline.rev[i].whence) {
case REV_CMD_MERGE_BASE:
if (basepos < 0)
basepos = i;
basecount++;
break; /* do mark all bases */
case REV_CMD_LEFT:
if (lpos >= 0)
usage(builtin_diff_usage);
lpos = i;
if (obj->flags & SYMMETRIC_LEFT) {
is_symdiff = 1;
break; /* do mark A */
}
continue;
case REV_CMD_RIGHT:
if (rpos >= 0)
usage(builtin_diff_usage);
rpos = i;
continue; /* don't mark B */
case REV_CMD_PARENTS_ONLY:
case REV_CMD_REF:
case REV_CMD_REV:
othercount++;
continue;
}
if (!map)
map = bitmap_new();
bitmap_set(map, i);
}
/*
* Forbid any additional revs for both A...B and A..B.
*/
if (lpos >= 0 && othercount > 0)
usage(builtin_diff_usage);
if (!is_symdiff) {
bitmap_free(map);
sym->warn = 0;
sym->skip = NULL;
return;
}
sym->left = rev->pending.objects[lpos].name;
sym->right = rev->pending.objects[rpos].name;
if (basecount == 0)
die(_("%s...%s: no merge base"), sym->left, sym->right);
sym->base = rev->pending.objects[basepos].name;
bitmap_unset(map, basepos); /* unmark the base we want */
sym->warn = basecount > 1;
sym->skip = map;
}
static void symdiff_release(struct symdiff *sdiff)
{
bitmap_free(sdiff->skip);
}
int cmd_diff(int argc,
const char **argv,
const char *prefix,
struct repository *repo UNUSED)
{
int i;
struct rev_info rev;
struct object_array ent = OBJECT_ARRAY_INIT;
int first_non_parent = -1;
int blobs = 0, paths = 0;
struct object_array_entry *blob[2];
int nongit = 0, no_index = 0;
int result;
struct symdiff sdiff;
/*
* We could get N tree-ish in the rev.pending_objects list.
* Also there could be M blobs there, and P pathspecs. --cached may
* also be present.
*
* N=0, M=0:
* cache vs files (diff-files)
*
* N=0, M=0, --cached:
* HEAD vs cache (diff-index --cached)
*
* N=0, M=2:
* compare two random blobs. P must be zero.
*
* N=0, M=1, P=1:
* compare a blob with a working tree file.
*
* N=1, M=0:
* tree vs files (diff-index)
*
* N=1, M=0, --cached:
* tree vs cache (diff-index --cached)
*
* N=2, M=0:
* tree vs tree (diff-tree)
*
* N=0, M=0, P=2:
* compare two filesystem entities (aka --no-index).
*
* Other cases are errors.
*/
/* Were we asked to do --no-index explicitly? */
for (i = 1; i < argc; i++) {
if (!strcmp(argv[i], "--")) {
i++;
break;
}
if (!strcmp(argv[i], "--no-index"))
no_index = DIFF_NO_INDEX_EXPLICIT;
if (argv[i][0] != '-')
break;
}
prefix = setup_git_directory_gently(&nongit);
if (!nongit) {
prepare_repo_settings(the_repository);
the_repository->settings.command_requires_full_index = 0;
}
if (!no_index) {
/*
* Treat git diff with at least one path outside of the
* repo the same as if the command would have been executed
* outside of a git repository. In this case it behaves
* the same way as "git diff --no-index <a> <b>", which acts
* as a colourful "diff" replacement.
*/
if (nongit || ((argc == i + 2) &&
(!path_inside_repo(prefix, argv[i]) ||
!path_inside_repo(prefix, argv[i + 1]))))
no_index = DIFF_NO_INDEX_IMPLICIT;
}
/*
* When operating outside of a Git repository we need to have a hash
* algorithm at hand so that we can generate the blob hashes. We
* default to SHA1 here, but may eventually want to change this to be
* configurable via a command line option.
*/
if (nongit)
repo_set_hash_algo(the_repository, GIT_HASH_SHA1);
init_diff_ui_defaults();
git_config(git_diff_ui_config, NULL);
prefix = precompose_argv_prefix(argc, argv, prefix);
repo_init_revisions(the_repository, &rev, prefix);
/* Set up defaults that will apply to both no-index and regular diffs. */
init_diffstat_widths(&rev.diffopt);
rev.diffopt.flags.allow_external = 1;
rev.diffopt.flags.allow_textconv = 1;
/* If this is a no-index diff, just run it and exit there. */
if (no_index)
exit(diff_no_index(&rev, the_repository->hash_algo,
no_index == DIFF_NO_INDEX_IMPLICIT,
argc, argv));
/*
* Otherwise, we are doing the usual "git" diff; set up any
* further defaults that apply to regular diffs.
*/
rev.diffopt.skip_stat_unmatch = !!diff_auto_refresh_index;
/*
* Default to intent-to-add entries invisible in the
* index. This makes them show up as new files in diff-files
* and not at all in diff-cached.
*/
rev.diffopt.ita_invisible_in_index = 1;
if (nongit)
die(_("Not a git repository"));
argc = setup_revisions(argc, argv, &rev, NULL);
if (!rev.diffopt.output_format) {
rev.diffopt.output_format = DIFF_FORMAT_PATCH;
diff_setup_done(&rev.diffopt);
}
rev.diffopt.flags.recursive = 1;
rev.diffopt.rotate_to_strict = 1;
setup_diff_pager(&rev.diffopt);
/*
* Do we have --cached and not have a pending object, then
* default to HEAD by hand. Eek.
*/
if (!rev.pending.nr) {
int i;
for (i = 1; i < argc; i++) {
const char *arg = argv[i];
if (!strcmp(arg, "--"))
break;
else if (!strcmp(arg, "--cached") ||
!strcmp(arg, "--staged")) {
add_head_to_pending(&rev);
if (!rev.pending.nr) {
struct tree *tree;
tree = lookup_tree(the_repository,
the_repository->hash_algo->empty_tree);
add_pending_object(&rev, &tree->object, "HEAD");
}
break;
}
}
}
symdiff_prepare(&rev, &sdiff);
for (i = 0; i < rev.pending.nr; i++) {
struct object_array_entry *entry = &rev.pending.objects[i];
struct object *obj = entry->item;
const char *name = entry->name;
int flags = (obj->flags & UNINTERESTING);
if (!obj->parsed)
obj = parse_object(the_repository, &obj->oid);
obj = deref_tag(the_repository, obj, NULL, 0);
if (!obj)
die(_("invalid object '%s' given."), name);
if (obj->type == OBJ_COMMIT)
obj = &repo_get_commit_tree(the_repository,
((struct commit *)obj))->object;
if (obj->type == OBJ_TREE) {
if (sdiff.skip && bitmap_get(sdiff.skip, i))
continue;
obj->flags |= flags;
add_object_array(obj, name, &ent);
if (first_non_parent < 0 &&
(i >= rev.cmdline.nr || /* HEAD by hand. */
rev.cmdline.rev[i].whence != REV_CMD_PARENTS_ONLY))
first_non_parent = ent.nr - 1;
} else if (obj->type == OBJ_BLOB) {
if (2 <= blobs)
die(_("more than two blobs given: '%s'"), name);
blob[blobs] = entry;
blobs++;
} else {
die(_("unhandled object '%s' given."), name);
}
}
if (rev.prune_data.nr)
paths += rev.prune_data.nr;
/*
* Now, do the arguments look reasonable?
*/
if (!ent.nr) {
switch (blobs) {
case 0:
builtin_diff_files(&rev, argc, argv);
break;
case 1:
if (paths != 1)
usage(builtin_diff_usage);
builtin_diff_b_f(&rev, argc, argv, blob);
break;
case 2:
if (paths)
usage(builtin_diff_usage);
builtin_diff_blobs(&rev, argc, argv, blob);
break;
default:
usage(builtin_diff_usage);
}
}
else if (blobs)
usage(builtin_diff_usage);
else if (ent.nr == 1)
builtin_diff_index(&rev, argc, argv);
else if (ent.nr == 2) {
if (sdiff.warn)
warning(_("%s...%s: multiple merge bases, using %s"),
sdiff.left, sdiff.right, sdiff.base);
builtin_diff_tree(&rev, argc, argv,
&ent.objects[0], &ent.objects[1]);
} else
builtin_diff_combined(&rev, argc, argv,
ent.objects, ent.nr,
first_non_parent);
result = diff_result_code(&rev);
if (1 < rev.diffopt.skip_stat_unmatch)
refresh_index_quietly();
release_revisions(&rev);
object_array_clear(&ent);
symdiff_release(&sdiff);
return result;
}