Merge branch 'am/xdiff-hash-tweak'

Inspired by Ezekiel's recent effort to showcase Rust interface, the
hash function implementation used to hash lines have been updated
to the one used for ELF symbol lookup by Glibc.

* am/xdiff-hash-tweak:
  xdiff: optimize xdl_hash_record_verbatim
  xdiff: refactor xdl_hash_record()
This commit is contained in:
Junio C Hamano
2025-09-08 14:54:34 -07:00
2 changed files with 65 additions and 9 deletions

View File

@@ -249,7 +249,7 @@ int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags)
return 1; return 1;
} }
static unsigned long xdl_hash_record_with_whitespace(char const **data, unsigned long xdl_hash_record_with_whitespace(char const **data,
char const *top, long flags) { char const *top, long flags) {
unsigned long ha = 5381; unsigned long ha = 5381;
char const *ptr = *data; char const *ptr = *data;
@@ -294,19 +294,67 @@ static unsigned long xdl_hash_record_with_whitespace(char const **data,
return ha; return ha;
} }
unsigned long xdl_hash_record(char const **data, char const *top, long flags) { /*
unsigned long ha = 5381; * Compiler reassociation barrier: pretend to modify X and Y to disallow
* changing evaluation order with respect to following uses of X and Y.
*/
#ifdef __GNUC__
#define REASSOC_FENCE(x, y) __asm__("" : "+r"(x), "+r"(y))
#else
#define REASSOC_FENCE(x, y)
#endif
unsigned long xdl_hash_record_verbatim(char const **data, char const *top) {
unsigned long ha = 5381, c0, c1;
char const *ptr = *data; char const *ptr = *data;
#if 0
if (flags & XDF_WHITESPACE_FLAGS) /*
return xdl_hash_record_with_whitespace(data, top, flags); * The baseline form of the optimized loop below. This is the djb2
* hash (the above function uses a variant with XOR instead of ADD).
*/
for (; ptr < top && *ptr != '\n'; ptr++) { for (; ptr < top && *ptr != '\n'; ptr++) {
ha += (ha << 5); ha += (ha << 5);
ha ^= (unsigned long) *ptr; ha += (unsigned long) *ptr;
} }
*data = ptr < top ? ptr + 1: ptr; *data = ptr < top ? ptr + 1: ptr;
#else
/* Process two characters per iteration. */
if (top - ptr >= 2) do {
if ((c0 = ptr[0]) == '\n') {
*data = ptr + 1;
return ha;
}
if ((c1 = ptr[1]) == '\n') {
*data = ptr + 2;
c0 += ha;
REASSOC_FENCE(c0, ha);
ha = ha * 32 + c0;
return ha;
}
/*
* Combine characters C0 and C1 into the hash HA. We have
* HA = (HA * 33 + C0) * 33 + C1, and we want to ensure
* that dependency chain over HA is just one multiplication
* and one addition, i.e. we want to evaluate this as
* HA = HA * 33 * 33 + (C0 * 33 + C1), and likewise prefer
* (C0 * 32 + (C0 + C1)) for the expression in parenthesis.
*/
ha *= 33 * 33;
c1 += c0;
REASSOC_FENCE(c1, c0);
c1 += c0 * 32;
REASSOC_FENCE(c1, ha);
ha += c1;
ptr += 2;
} while (ptr < top - 1);
*data = top;
if (ptr < top && (c0 = ptr[0]) != '\n') {
c0 += ha;
REASSOC_FENCE(c0, ha);
ha = ha * 32 + c0;
}
#endif
return ha; return ha;
} }

View File

@@ -34,7 +34,15 @@ void *xdl_cha_alloc(chastore_t *cha);
long xdl_guess_lines(mmfile_t *mf, long sample); long xdl_guess_lines(mmfile_t *mf, long sample);
int xdl_blankline(const char *line, long size, long flags); int xdl_blankline(const char *line, long size, long flags);
int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags); int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags);
unsigned long xdl_hash_record(char const **data, char const *top, long flags); unsigned long xdl_hash_record_verbatim(char const **data, char const *top);
unsigned long xdl_hash_record_with_whitespace(char const **data, char const *top, long flags);
static inline unsigned long xdl_hash_record(char const **data, char const *top, long flags)
{
if (flags & XDF_WHITESPACE_FLAGS)
return xdl_hash_record_with_whitespace(data, top, flags);
else
return xdl_hash_record_verbatim(data, top);
}
unsigned int xdl_hashbits(unsigned int size); unsigned int xdl_hashbits(unsigned int size);
int xdl_num_out(char *out, long val); int xdl_num_out(char *out, long val);
int xdl_emit_hunk_hdr(long s1, long c1, long s2, long c2, int xdl_emit_hunk_hdr(long s1, long c1, long s2, long c2,