Merge branch 'rs/grep-lookahead'
Fix 'git grep' regression on macOS by disabling lookahead when encountering invalid UTF-8 byte sequences. * rs/grep-lookahead: grep: disable lookahead on error
This commit is contained in:
30
grep.c
30
grep.c
@@ -906,15 +906,17 @@ static int patmatch(struct grep_pat *p,
|
|||||||
const char *line, const char *eol,
|
const char *line, const char *eol,
|
||||||
regmatch_t *match, int eflags)
|
regmatch_t *match, int eflags)
|
||||||
{
|
{
|
||||||
int hit;
|
|
||||||
|
|
||||||
if (p->pcre2_pattern)
|
if (p->pcre2_pattern)
|
||||||
hit = !pcre2match(p, line, eol, match, eflags);
|
return !pcre2match(p, line, eol, match, eflags);
|
||||||
else
|
|
||||||
hit = !regexec_buf(&p->regexp, line, eol - line, 1, match,
|
|
||||||
eflags);
|
|
||||||
|
|
||||||
return hit;
|
switch (regexec_buf(&p->regexp, line, eol - line, 1, match, eflags)) {
|
||||||
|
case 0:
|
||||||
|
return 1;
|
||||||
|
case REG_NOMATCH:
|
||||||
|
return 0;
|
||||||
|
default:
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void strip_timestamp(const char *bol, const char **eol_p)
|
static void strip_timestamp(const char *bol, const char **eol_p)
|
||||||
@@ -952,6 +954,8 @@ static int headerless_match_one_pattern(struct grep_pat *p,
|
|||||||
|
|
||||||
again:
|
again:
|
||||||
hit = patmatch(p, bol, eol, pmatch, eflags);
|
hit = patmatch(p, bol, eol, pmatch, eflags);
|
||||||
|
if (hit < 0)
|
||||||
|
hit = 0;
|
||||||
|
|
||||||
if (hit && p->word_regexp) {
|
if (hit && p->word_regexp) {
|
||||||
if ((pmatch[0].rm_so < 0) ||
|
if ((pmatch[0].rm_so < 0) ||
|
||||||
@@ -1461,6 +1465,8 @@ static int look_ahead(struct grep_opt *opt,
|
|||||||
regmatch_t m;
|
regmatch_t m;
|
||||||
|
|
||||||
hit = patmatch(p, bol, bol + *left_p, &m, 0);
|
hit = patmatch(p, bol, bol + *left_p, &m, 0);
|
||||||
|
if (hit < 0)
|
||||||
|
return -1;
|
||||||
if (!hit || m.rm_so < 0 || m.rm_eo < 0)
|
if (!hit || m.rm_so < 0 || m.rm_eo < 0)
|
||||||
continue;
|
continue;
|
||||||
if (earliest < 0 || m.rm_so < earliest)
|
if (earliest < 0 || m.rm_so < earliest)
|
||||||
@@ -1655,9 +1661,13 @@ static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int colle
|
|||||||
if (try_lookahead
|
if (try_lookahead
|
||||||
&& !(last_hit
|
&& !(last_hit
|
||||||
&& (show_function ||
|
&& (show_function ||
|
||||||
lno <= last_hit + opt->post_context))
|
lno <= last_hit + opt->post_context))) {
|
||||||
&& look_ahead(opt, &left, &lno, &bol))
|
hit = look_ahead(opt, &left, &lno, &bol);
|
||||||
break;
|
if (hit < 0)
|
||||||
|
try_lookahead = 0;
|
||||||
|
else if (hit)
|
||||||
|
break;
|
||||||
|
}
|
||||||
eol = end_of_line(bol, &left);
|
eol = end_of_line(bol, &left);
|
||||||
|
|
||||||
if ((ctx == GREP_CONTEXT_HEAD) && (eol == bol))
|
if ((ctx == GREP_CONTEXT_HEAD) && (eol == bol))
|
||||||
|
|||||||
@@ -87,6 +87,7 @@ test_expect_success setup '
|
|||||||
# Still a no-op.
|
# Still a no-op.
|
||||||
function dummy() {}
|
function dummy() {}
|
||||||
EOF
|
EOF
|
||||||
|
printf "\200\nASCII\n" >invalid-utf8 &&
|
||||||
if test_have_prereq FUNNYNAMES
|
if test_have_prereq FUNNYNAMES
|
||||||
then
|
then
|
||||||
echo unusual >"\"unusual\" pathname" &&
|
echo unusual >"\"unusual\" pathname" &&
|
||||||
@@ -534,6 +535,14 @@ do
|
|||||||
test_cmp expected actual
|
test_cmp expected actual
|
||||||
'
|
'
|
||||||
|
|
||||||
|
test_expect_success "grep $L searches past invalid lines on UTF-8 locale" '
|
||||||
|
LC_ALL=en_US.UTF-8 git grep A. invalid-utf8 >actual &&
|
||||||
|
cat >expected <<-EOF &&
|
||||||
|
invalid-utf8:ASCII
|
||||||
|
EOF
|
||||||
|
test_cmp expected actual
|
||||||
|
'
|
||||||
|
|
||||||
test_expect_success FUNNYNAMES "grep $L should quote unusual pathnames" '
|
test_expect_success FUNNYNAMES "grep $L should quote unusual pathnames" '
|
||||||
cat >expected <<-EOF &&
|
cat >expected <<-EOF &&
|
||||||
${HC}"\"unusual\" pathname":unusual
|
${HC}"\"unusual\" pathname":unusual
|
||||||
|
|||||||
Reference in New Issue
Block a user