From 070879ca93a7d358086f4c8aff4553493dcb9210 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sun, 12 Feb 2006 02:57:57 +0100 Subject: [PATCH 1/5] Use a hashtable for objects instead of a sorted list In a simple test, this brings down the CPU time from 47 sec to 22 sec. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- fsck-objects.c | 5 +++- name-rev.c | 7 +++--- object.c | 65 +++++++++++++++++++++++++++++--------------------- object.h | 2 +- 4 files changed, 47 insertions(+), 32 deletions(-) diff --git a/fsck-objects.c b/fsck-objects.c index 9950be2645..6439d55126 100644 --- a/fsck-objects.c +++ b/fsck-objects.c @@ -61,9 +61,12 @@ static void check_connectivity(void) int i; /* Look up all the requirements, warn about missing objects.. */ - for (i = 0; i < nr_objs; i++) { + for (i = 0; i < obj_allocs; i++) { struct object *obj = objs[i]; + if (!obj) + continue; + if (!obj->parsed) { if (!standalone && has_sha1_file(obj->sha1)) ; /* it is in pack */ diff --git a/name-rev.c b/name-rev.c index bbadb91aa0..0c3f547622 100644 --- a/name-rev.c +++ b/name-rev.c @@ -232,9 +232,10 @@ int main(int argc, char **argv) } else if (all) { int i; - for (i = 0; i < nr_objs; i++) - printf("%s %s\n", sha1_to_hex(objs[i]->sha1), - get_rev_name(objs[i])); + for (i = 0; i < obj_allocs; i++) + if (objs[i]) + printf("%s %s\n", sha1_to_hex(objs[i]->sha1), + get_rev_name(objs[i])); } else for ( ; revs; revs = revs->next) printf("%s %s\n", revs->name, get_rev_name(revs->item)); diff --git a/object.c b/object.c index 1577f74281..3259862ab2 100644 --- a/object.c +++ b/object.c @@ -6,30 +6,32 @@ #include "tag.h" struct object **objs; -int nr_objs; -static int obj_allocs; +static int nr_objs; +int obj_allocs; int track_object_refs = 1; +static int hashtable_index(const unsigned char *sha1) +{ + unsigned int i = *(unsigned int *)sha1; + return (int)(i % obj_allocs); +} + static int find_object(const unsigned char *sha1) { - int first = 0, last = nr_objs; + int i = hashtable_index(sha1); - while (first < last) { - int next = (first + last) / 2; - struct object *obj = objs[next]; - int cmp; + if (!objs) + return -1; - cmp = memcmp(sha1, obj->sha1, 20); - if (!cmp) - return next; - if (cmp < 0) { - last = next; - continue; - } - first = next+1; - } - return -first-1; + while (objs[i]) { + if (memcmp(sha1, objs[i]->sha1, 20) == 0) + return i; + i++; + if (i == obj_allocs) + i = 0; + } + return -1 - i; } struct object *lookup_object(const unsigned char *sha1) @@ -42,7 +44,7 @@ struct object *lookup_object(const unsigned char *sha1) void created_object(const unsigned char *sha1, struct object *obj) { - int pos = find_object(sha1); + int pos; obj->parsed = 0; memcpy(obj->sha1, sha1, 20); @@ -50,19 +52,28 @@ void created_object(const unsigned char *sha1, struct object *obj) obj->refs = NULL; obj->used = 0; + if (obj_allocs - 1 <= nr_objs * 2) { + int i, count = obj_allocs; + obj_allocs = (obj_allocs < 32 ? 32 : 2 * obj_allocs); + objs = xrealloc(objs, obj_allocs * sizeof(struct object *)); + memset(objs + count, 0, (obj_allocs - count) + * sizeof(struct object *)); + for (i = 0; i < count; i++) + if (objs[i]) { + int j = find_object(objs[i]->sha1); + if (j != i) { + j = -1 - j; + objs[j] = objs[i]; + objs[i] = NULL; + } + } + } + + pos = find_object(sha1); if (pos >= 0) die("Inserting %s twice\n", sha1_to_hex(sha1)); pos = -pos-1; - if (obj_allocs == nr_objs) { - obj_allocs = alloc_nr(obj_allocs); - objs = xrealloc(objs, obj_allocs * sizeof(struct object *)); - } - - /* Insert it into the right place */ - memmove(objs + pos + 1, objs + pos, (nr_objs - pos) * - sizeof(struct object *)); - objs[pos] = obj; nr_objs++; } diff --git a/object.h b/object.h index 0e7618283c..e08afbd29f 100644 --- a/object.h +++ b/object.h @@ -23,7 +23,7 @@ struct object { }; extern int track_object_refs; -extern int nr_objs; +extern int obj_allocs; extern struct object **objs; /** Internal only **/ From 2b796360acbdf3186ab9a5dcb84fe416eda4ffd5 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sat, 11 Feb 2006 18:51:19 -0800 Subject: [PATCH 2/5] hashtable-based objects: minimum fixups. Calling hashtable_index from find_object before objs is created would result in division by zero failure. Avoid it. Also the given object name may not be aligned suitably for unsigned int; avoid dereferencing casted pointer. Signed-off-by: Junio C Hamano --- object.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/object.c b/object.c index 3259862ab2..c3616da813 100644 --- a/object.c +++ b/object.c @@ -13,17 +13,19 @@ int track_object_refs = 1; static int hashtable_index(const unsigned char *sha1) { - unsigned int i = *(unsigned int *)sha1; + unsigned int i; + memcpy(&i, sha1, sizeof(unsigned int)); return (int)(i % obj_allocs); } static int find_object(const unsigned char *sha1) { - int i = hashtable_index(sha1); + int i; if (!objs) return -1; + i = hashtable_index(sha1); while (objs[i]) { if (memcmp(sha1, objs[i]->sha1, 20) == 0) return i; From d7ee090d0d425606c599327c01fcbcdb60f6b090 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 12 Feb 2006 11:24:50 -0800 Subject: [PATCH 3/5] Fix object re-hashing The hashed object lookup had a subtle bug in re-hashing: it did for (i = 0; i < count; i++) if (objs[i]) { .. rehash .. where "count" was the old hash couny. Oon the face of it is obvious, since it clearly re-hashes all the old objects. However, it's wrong. If the last old hash entry before re-hashing was in use (or became in use by the re-hashing), then when re-hashing could have inserted an object into the hash entries with idx >= count due to overflow. When we then rehash the last old entry, that old entry might become empty, which means that the overflow entries should be re-hashed again. In other words, the loop has to be fixed to either traverse the whole array, rather than just the old count. (There's room for a slight optimization: instead of counting all the way up, we can break when we see the first empty slot that is above the old "count". At that point we know we don't have any collissions that we might have to fix up any more. This patch only does the trivial fix) [jc: with trivial fix on trivial fix] Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- object.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/object.c b/object.c index c3616da813..c9ca481498 100644 --- a/object.c +++ b/object.c @@ -60,7 +60,7 @@ void created_object(const unsigned char *sha1, struct object *obj) objs = xrealloc(objs, obj_allocs * sizeof(struct object *)); memset(objs + count, 0, (obj_allocs - count) * sizeof(struct object *)); - for (i = 0; i < count; i++) + for (i = 0; i < obj_allocs; i++) if (objs[i]) { int j = find_object(objs[i]->sha1); if (j != i) { From ef1af9d9afe2f402ad60b054ac529c03c4b79299 Mon Sep 17 00:00:00 2001 From: Alex Riesen Date: Sun, 12 Feb 2006 19:03:16 +0100 Subject: [PATCH 4/5] fix "test: 2: unexpected operator" on bsd Signed-off-by: Junio C Hamano --- t/t0000-basic.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/t/t0000-basic.sh b/t/t0000-basic.sh index bc3e711a52..c339a366f4 100755 --- a/t/t0000-basic.sh +++ b/t/t0000-basic.sh @@ -33,7 +33,7 @@ then fi merge >/dev/null 2>/dev/null -if test $? == 127 +if test $? = 127 then echo >&2 'You do not seem to have "merge" installed. Please check INSTALL document.' From 0dbc4e89bb3ddaaa4230f91d5f59b08f3d33e91b Mon Sep 17 00:00:00 2001 From: Alex Riesen Date: Sun, 12 Feb 2006 19:05:34 +0100 Subject: [PATCH 5/5] avoid echo -e, there are systems where it does not work FreeBSD 4.11 being one example: the built-in echo doesn't have -e, and the installed /bin/echo does not do "-e" as well. "printf" works, laking just "\e" and "\xAB'. Signed-off-by: Junio C Hamano --- git-tag.sh | 3 ++- t/t3001-ls-files-others-exclude.sh | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/git-tag.sh b/git-tag.sh index 6d0c973239..c74e1b4151 100755 --- a/git-tag.sh +++ b/git-tag.sh @@ -85,7 +85,8 @@ if [ "$annotate" ]; then exit 1 } - ( echo -e "object $object\ntype $type\ntag $name\ntagger $tagger\n"; + ( printf 'object %s\ntype %s\ntag %s\ntagger %s\n\n' \ + "$object" "$type" "$name" "$tagger"; cat "$GIT_DIR"/TAG_FINALMSG ) >"$GIT_DIR"/TAG_TMP rm -f "$GIT_DIR"/TAG_TMP.asc "$GIT_DIR"/TAG_FINALMSG if [ "$signed" ]; then diff --git a/t/t3001-ls-files-others-exclude.sh b/t/t3001-ls-files-others-exclude.sh index fde2bb25fa..6979b7c1c0 100755 --- a/t/t3001-ls-files-others-exclude.sh +++ b/t/t3001-ls-files-others-exclude.sh @@ -68,7 +68,7 @@ test_expect_success \ diff -u expect output' # Test \r\n (MSDOS-like systems) -echo -ne '*.1\r\n/*.3\r\n!*.6\r\n' >.gitignore +printf '*.1\r\n/*.3\r\n!*.6\r\n' >.gitignore test_expect_success \ 'git-ls-files --others with \r\n line endings.' \