Merge branch 'rs/grep-lookahead'

Fix 'git grep' regression on macOS by disabling lookahead when
encountering invalid UTF-8 byte sequences.

* rs/grep-lookahead:
  grep: disable lookahead on error
This commit is contained in:
Taylor Blau
2024-11-01 12:53:28 -04:00
2 changed files with 29 additions and 10 deletions

30
grep.c
View File

@@ -906,15 +906,17 @@ static int patmatch(struct grep_pat *p,
const char *line, const char *eol, const char *line, const char *eol,
regmatch_t *match, int eflags) regmatch_t *match, int eflags)
{ {
int hit;
if (p->pcre2_pattern) if (p->pcre2_pattern)
hit = !pcre2match(p, line, eol, match, eflags); return !pcre2match(p, line, eol, match, eflags);
else
hit = !regexec_buf(&p->regexp, line, eol - line, 1, match,
eflags);
return hit; switch (regexec_buf(&p->regexp, line, eol - line, 1, match, eflags)) {
case 0:
return 1;
case REG_NOMATCH:
return 0;
default:
return -1;
}
} }
static void strip_timestamp(const char *bol, const char **eol_p) static void strip_timestamp(const char *bol, const char **eol_p)
@@ -952,6 +954,8 @@ static int headerless_match_one_pattern(struct grep_pat *p,
again: again:
hit = patmatch(p, bol, eol, pmatch, eflags); hit = patmatch(p, bol, eol, pmatch, eflags);
if (hit < 0)
hit = 0;
if (hit && p->word_regexp) { if (hit && p->word_regexp) {
if ((pmatch[0].rm_so < 0) || if ((pmatch[0].rm_so < 0) ||
@@ -1461,6 +1465,8 @@ static int look_ahead(struct grep_opt *opt,
regmatch_t m; regmatch_t m;
hit = patmatch(p, bol, bol + *left_p, &m, 0); hit = patmatch(p, bol, bol + *left_p, &m, 0);
if (hit < 0)
return -1;
if (!hit || m.rm_so < 0 || m.rm_eo < 0) if (!hit || m.rm_so < 0 || m.rm_eo < 0)
continue; continue;
if (earliest < 0 || m.rm_so < earliest) if (earliest < 0 || m.rm_so < earliest)
@@ -1655,9 +1661,13 @@ static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int colle
if (try_lookahead if (try_lookahead
&& !(last_hit && !(last_hit
&& (show_function || && (show_function ||
lno <= last_hit + opt->post_context)) lno <= last_hit + opt->post_context))) {
&& look_ahead(opt, &left, &lno, &bol)) hit = look_ahead(opt, &left, &lno, &bol);
break; if (hit < 0)
try_lookahead = 0;
else if (hit)
break;
}
eol = end_of_line(bol, &left); eol = end_of_line(bol, &left);
if ((ctx == GREP_CONTEXT_HEAD) && (eol == bol)) if ((ctx == GREP_CONTEXT_HEAD) && (eol == bol))

View File

@@ -87,6 +87,7 @@ test_expect_success setup '
# Still a no-op. # Still a no-op.
function dummy() {} function dummy() {}
EOF EOF
printf "\200\nASCII\n" >invalid-utf8 &&
if test_have_prereq FUNNYNAMES if test_have_prereq FUNNYNAMES
then then
echo unusual >"\"unusual\" pathname" && echo unusual >"\"unusual\" pathname" &&
@@ -534,6 +535,14 @@ do
test_cmp expected actual test_cmp expected actual
' '
test_expect_success "grep $L searches past invalid lines on UTF-8 locale" '
LC_ALL=en_US.UTF-8 git grep A. invalid-utf8 >actual &&
cat >expected <<-EOF &&
invalid-utf8:ASCII
EOF
test_cmp expected actual
'
test_expect_success FUNNYNAMES "grep $L should quote unusual pathnames" ' test_expect_success FUNNYNAMES "grep $L should quote unusual pathnames" '
cat >expected <<-EOF && cat >expected <<-EOF &&
${HC}"\"unusual\" pathname":unusual ${HC}"\"unusual\" pathname":unusual