🌐 AI搜索 & 代理 主页
blob: 5f23d1a50cabb35732f9515355fe0a85a33d2ff8 [file] [log] [blame]
Elijah Newren4f6728d2023-03-21 06:25:561#include "git-compat-util.h"
Brandon Williamsb2141fc2017-06-14 18:07:362#include "config.h"
Elijah Newrenf394e092023-03-21 06:25:543#include "gettext.h"
Junio C Hamano83b5d2f2006-09-17 23:02:524#include "grep.h"
Elijah Newren41771fa2023-02-24 00:09:275#include "hex.h"
Elijah Newrena034e912023-05-16 06:34:066#include "object-store-ll.h"
Elijah Newrend4a4f922023-04-22 20:17:267#include "pretty.h"
René Scharfe60ecac92009-07-01 22:07:248#include "userdiff.h"
Johannes Schindelin6bfce932007-06-05 02:36:119#include "xdiff-interface.h"
Jeff King335ec3b2013-05-10 15:10:1510#include "diff.h"
11#include "diffcore.h"
Nguyễn Thái Ngọc Duy793dc672016-06-25 05:22:3112#include "quote.h"
Nguyễn Thái Ngọc Duy3ac68a92018-05-26 13:55:2413#include "help.h"
Junio C Hamano83b5d2f2006-09-17 23:02:5214
Junio C Hamano07a7d652012-09-15 21:04:3615static int grep_source_load(struct grep_source *gs);
Nguyễn Thái Ngọc Duyacd00ea2018-09-21 15:57:3316static int grep_source_is_binary(struct grep_source *gs,
17 struct index_state *istate);
Junio C Hamano07a7d652012-09-15 21:04:3618
Jeff Kingbcba4462023-08-29 23:45:2719static void std_output(struct grep_opt *opt UNUSED, const void *buf, size_t size)
Martin Ågren96313422020-11-21 18:31:0820{
21 fwrite(buf, size, 1, stdout);
22}
23
Nguyễn Thái Ngọc Duyfa151dc2018-05-26 13:55:2224static const char *color_grep_slots[] = {
25 [GREP_COLOR_CONTEXT] = "context",
26 [GREP_COLOR_FILENAME] = "filename",
27 [GREP_COLOR_FUNCTION] = "function",
28 [GREP_COLOR_LINENO] = "lineNumber",
Junio C Hamanod036d662018-07-18 19:20:3129 [GREP_COLOR_COLUMNNO] = "column",
Nguyễn Thái Ngọc Duyfa151dc2018-05-26 13:55:2230 [GREP_COLOR_MATCH_CONTEXT] = "matchContext",
31 [GREP_COLOR_MATCH_SELECTED] = "matchSelected",
32 [GREP_COLOR_SELECTED] = "selected",
33 [GREP_COLOR_SEP] = "separator",
34};
35
Junio C Hamano7687a052012-10-09 23:17:5036static int parse_pattern_type_arg(const char *opt, const char *arg)
37{
38 if (!strcmp(arg, "default"))
39 return GREP_PATTERN_TYPE_UNSPECIFIED;
40 else if (!strcmp(arg, "basic"))
41 return GREP_PATTERN_TYPE_BRE;
42 else if (!strcmp(arg, "extended"))
43 return GREP_PATTERN_TYPE_ERE;
44 else if (!strcmp(arg, "fixed"))
45 return GREP_PATTERN_TYPE_FIXED;
46 else if (!strcmp(arg, "perl"))
47 return GREP_PATTERN_TYPE_PCRE;
48 die("bad %s argument: %s", opt, arg);
49}
50
Nguyễn Thái Ngọc Duy3ac68a92018-05-26 13:55:2451define_list_config_array_extra(color_grep_slots, {"match"});
52
Junio C Hamano7687a052012-10-09 23:17:5053/*
54 * Read the configuration file once and store it in
55 * the grep_defaults template.
56 */
Glen Chooa4e7e312023-06-28 19:26:2257int grep_config(const char *var, const char *value,
58 const struct config_context *ctx, void *cb)
Junio C Hamano7687a052012-10-09 23:17:5059{
Ævar Arnfjörð Bjarmason72365bb2022-02-16 00:00:3660 struct grep_opt *opt = cb;
Nguyễn Thái Ngọc Duyfa151dc2018-05-26 13:55:2261 const char *slot;
Junio C Hamano7687a052012-10-09 23:17:5062
63 if (userdiff_config(var, value) < 0)
64 return -1;
65
66 if (!strcmp(var, "grep.extendedregexp")) {
Ævar Arnfjörð Bjarmasonc7e38552017-06-29 22:22:1867 opt->extended_regexp_option = git_config_bool(var, value);
Junio C Hamano7687a052012-10-09 23:17:5068 return 0;
69 }
70
71 if (!strcmp(var, "grep.patterntype")) {
72 opt->pattern_type_option = parse_pattern_type_arg(var, value);
73 return 0;
74 }
75
76 if (!strcmp(var, "grep.linenumber")) {
77 opt->linenum = git_config_bool(var, value);
78 return 0;
79 }
Taylor Blau6653fec2018-06-22 15:49:4980 if (!strcmp(var, "grep.column")) {
81 opt->columnnum = git_config_bool(var, value);
82 return 0;
83 }
Junio C Hamano7687a052012-10-09 23:17:5084
Andreas Schwab6453f7b2014-03-17 19:16:0585 if (!strcmp(var, "grep.fullname")) {
86 opt->relative = !git_config_bool(var, value);
87 return 0;
88 }
89
Junio C Hamano7687a052012-10-09 23:17:5090 if (!strcmp(var, "color.grep"))
91 opt->color = git_config_colorbool(var, value);
Nguyễn Thái Ngọc Duyfa151dc2018-05-26 13:55:2292 if (!strcmp(var, "color.grep.match")) {
Glen Chooa4e7e312023-06-28 19:26:2293 if (grep_config("color.grep.matchcontext", value, ctx, cb) < 0)
Nguyễn Thái Ngọc Duyfa151dc2018-05-26 13:55:2294 return -1;
Glen Chooa4e7e312023-06-28 19:26:2295 if (grep_config("color.grep.matchselected", value, ctx, cb) < 0)
Nguyễn Thái Ngọc Duyfa151dc2018-05-26 13:55:2296 return -1;
97 } else if (skip_prefix(var, "color.grep.", &slot)) {
98 int i = LOOKUP_CONFIG(color_grep_slots, slot);
99 char *color;
Junio C Hamano7687a052012-10-09 23:17:50100
Nguyễn Thái Ngọc Duyfa151dc2018-05-26 13:55:22101 if (i < 0)
102 return -1;
103 color = opt->colors[i];
Junio C Hamano7687a052012-10-09 23:17:50104 if (!value)
105 return config_error_nonbool(var);
Jeff Kingf6c5a292014-10-07 19:33:09106 return color_parse(value, color);
Junio C Hamano7687a052012-10-09 23:17:50107 }
108 return 0;
109}
110
Ævar Arnfjörð Bjarmason9725c8d2022-02-16 00:00:34111void grep_init(struct grep_opt *opt, struct repository *repo)
Junio C Hamano7687a052012-10-09 23:17:50112{
Ævar Arnfjörð Bjarmason72365bb2022-02-16 00:00:36113 struct grep_opt blank = GREP_OPT_INIT;
114 memcpy(opt, &blank, sizeof(*opt));
Martin Ågren6ba9bb72020-11-29 19:52:21115
Nguyễn Thái Ngọc Duy38bbc2e2018-09-21 15:57:23116 opt->repo = repo;
Junio C Hamano7687a052012-10-09 23:17:50117 opt->pattern_tail = &opt->pattern_list;
118 opt->header_tail = &opt->header_list;
Junio C Hamano7687a052012-10-09 23:17:50119}
Junio C Hamano07a7d652012-09-15 21:04:36120
René Scharfefc456752012-05-20 14:32:39121static struct grep_pat *create_grep_pat(const char *pat, size_t patlen,
122 const char *origin, int no,
123 enum grep_pat_token t,
124 enum grep_header_field field)
Junio C Hamanoa4d7d2c2008-09-05 05:15:02125{
126 struct grep_pat *p = xcalloc(1, sizeof(*p));
René Scharfe526a8582012-05-20 14:33:07127 p->pattern = xmemdupz(pat, patlen);
René Scharfefc456752012-05-20 14:32:39128 p->patternlen = patlen;
129 p->origin = origin;
130 p->no = no;
131 p->token = t;
Junio C Hamanoa4d7d2c2008-09-05 05:15:02132 p->field = field;
René Scharfefc456752012-05-20 14:32:39133 return p;
134}
135
René Scharfe2b3873f2012-05-20 14:32:54136static void do_append_grep_pat(struct grep_pat ***tail, struct grep_pat *p)
137{
138 **tail = p;
139 *tail = &p->next;
Junio C Hamanoa4d7d2c2008-09-05 05:15:02140 p->next = NULL;
René Scharfe526a8582012-05-20 14:33:07141
142 switch (p->token) {
143 case GREP_PATTERN: /* atom */
144 case GREP_PATTERN_HEAD:
145 case GREP_PATTERN_BODY:
146 for (;;) {
147 struct grep_pat *new_pat;
148 size_t len = 0;
149 char *cp = p->pattern + p->patternlen, *nl = NULL;
150 while (++len <= p->patternlen) {
151 if (*(--cp) == '\n') {
152 nl = cp;
153 break;
154 }
155 }
156 if (!nl)
157 break;
158 new_pat = create_grep_pat(nl + 1, len - 1, p->origin,
159 p->no, p->token, p->field);
160 new_pat->next = p->next;
161 if (!p->next)
162 *tail = &new_pat->next;
163 p->next = new_pat;
164 *nl = '\0';
165 p->patternlen -= len;
166 }
167 break;
168 default:
169 break;
170 }
René Scharfe2b3873f2012-05-20 14:32:54171}
172
René Scharfefc456752012-05-20 14:32:39173void append_header_grep_pattern(struct grep_opt *opt,
174 enum grep_header_field field, const char *pat)
175{
176 struct grep_pat *p = create_grep_pat(pat, strlen(pat), "header", 0,
177 GREP_PATTERN_HEAD, field);
Junio C Hamanobaa63782012-09-29 18:59:52178 if (field == GREP_HEADER_REFLOG)
179 opt->use_reflog_filter = 1;
René Scharfe2b3873f2012-05-20 14:32:54180 do_append_grep_pat(&opt->header_tail, p);
Junio C Hamanoa4d7d2c2008-09-05 05:15:02181}
182
Junio C Hamano83b5d2f2006-09-17 23:02:52183void append_grep_pattern(struct grep_opt *opt, const char *pat,
184 const char *origin, int no, enum grep_pat_token t)
185{
René Scharfeed40a092010-05-22 21:43:43186 append_grep_pat(opt, pat, strlen(pat), origin, no, t);
187}
188
189void append_grep_pat(struct grep_opt *opt, const char *pat, size_t patlen,
190 const char *origin, int no, enum grep_pat_token t)
191{
René Scharfefc456752012-05-20 14:32:39192 struct grep_pat *p = create_grep_pat(pat, patlen, origin, no, t, 0);
René Scharfe2b3873f2012-05-20 14:32:54193 do_append_grep_pat(&opt->pattern_tail, p);
Junio C Hamano83b5d2f2006-09-17 23:02:52194}
195
Fredrik Kuivinen5b594f42010-01-25 22:51:39196struct grep_opt *grep_opt_dup(const struct grep_opt *opt)
197{
198 struct grep_pat *pat;
199 struct grep_opt *ret = xmalloc(sizeof(struct grep_opt));
200 *ret = *opt;
201
202 ret->pattern_list = NULL;
203 ret->pattern_tail = &ret->pattern_list;
204
205 for(pat = opt->pattern_list; pat != NULL; pat = pat->next)
206 {
207 if(pat->token == GREP_PATTERN_HEAD)
208 append_header_grep_pattern(ret, pat->field,
209 pat->pattern);
210 else
René Scharfeed40a092010-05-22 21:43:43211 append_grep_pat(ret, pat->pattern, pat->patternlen,
212 pat->origin, pat->no, pat->token);
Fredrik Kuivinen5b594f42010-01-25 22:51:39213 }
214
215 return ret;
216}
217
Michał Kiedrowicza30c1482011-05-09 21:52:04218static NORETURN void compile_regexp_failed(const struct grep_pat *p,
219 const char *error)
220{
221 char where[1024];
222
223 if (p->no)
Jeff King19bdd3e2015-09-24 21:06:51224 xsnprintf(where, sizeof(where), "In '%s' at %d, ", p->origin, p->no);
Michał Kiedrowicza30c1482011-05-09 21:52:04225 else if (p->origin)
Jeff King19bdd3e2015-09-24 21:06:51226 xsnprintf(where, sizeof(where), "%s, ", p->origin);
Michał Kiedrowicza30c1482011-05-09 21:52:04227 else
228 where[0] = 0;
229
230 die("%s'%s': %s", where, p->pattern, error);
231}
232
Ævar Arnfjörð Bjarmason543f1c02017-05-25 19:45:30233static int is_fixed(const char *s, size_t len)
234{
235 size_t i;
236
237 for (i = 0; i < len; i++) {
238 if (is_regex_special(s[i]))
239 return 0;
240 }
241
242 return 1;
243}
244
Ævar Arnfjörð Bjarmason94da9192017-06-01 18:20:56245#ifdef USE_LIBPCRE2
Ævar Arnfjörð Bjarmasonc1760352021-02-18 00:07:28246#define GREP_PCRE2_DEBUG_MALLOC 0
247
248static void *pcre2_malloc(PCRE2_SIZE size, MAYBE_UNUSED void *memory_data)
249{
250 void *pointer = malloc(size);
251#if GREP_PCRE2_DEBUG_MALLOC
252 static int count = 1;
253 fprintf(stderr, "PCRE2:%p -> #%02d: alloc(%lu)\n", pointer, count++, size);
254#endif
255 return pointer;
256}
257
258static void pcre2_free(void *pointer, MAYBE_UNUSED void *memory_data)
259{
260#if GREP_PCRE2_DEBUG_MALLOC
261 static int count = 1;
262 if (pointer)
263 fprintf(stderr, "PCRE2:%p -> #%02d: free()\n", pointer, count++);
264#endif
265 free(pointer);
266}
267
Mathias Krause50b6ad52023-01-31 18:56:11268static int pcre2_jit_functional(void)
269{
270 static int jit_working = -1;
271 pcre2_code *code;
272 size_t off;
273 int err;
274
275 if (jit_working != -1)
276 return jit_working;
277
278 /*
279 * Try to JIT compile a simple pattern to probe if the JIT is
280 * working in general. It might fail for systems where creating
281 * memory mappings for runtime code generation is restricted.
282 */
283 code = pcre2_compile((PCRE2_SPTR)".", 1, 0, &err, &off, NULL);
284 if (!code)
285 return 0;
286
287 jit_working = pcre2_jit_compile(code, PCRE2_JIT_COMPLETE) == 0;
288 pcre2_code_free(code);
289
290 return jit_working;
291}
292
Ævar Arnfjörð Bjarmason94da9192017-06-01 18:20:56293static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt)
294{
295 int error;
296 PCRE2_UCHAR errbuf[256];
297 PCRE2_SIZE erroffset;
298 int options = PCRE2_MULTILINE;
Ævar Arnfjörð Bjarmason94da9192017-06-01 18:20:56299 int jitret;
Ævar Arnfjörð Bjarmasona25b9082017-11-23 14:16:58300 int patinforet;
301 size_t jitsizearg;
René Scharfe32e3e8b2021-12-18 19:53:15302 int literal = !opt->ignore_case && (p->fixed || p->is_fixed);
Ævar Arnfjörð Bjarmason94da9192017-06-01 18:20:56303
Ævar Arnfjörð Bjarmasoncbe81e62021-02-18 00:07:27304 /*
305 * Call pcre2_general_context_create() before calling any
306 * other pcre2_*(). It sets up our malloc()/free() functions
307 * with which everything else is allocated.
308 */
309 p->pcre2_general_context = pcre2_general_context_create(
310 pcre2_malloc, pcre2_free, NULL);
311 if (!p->pcre2_general_context)
312 die("Couldn't allocate PCRE2 general context");
Ævar Arnfjörð Bjarmason94da9192017-06-01 18:20:56313
Ævar Arnfjörð Bjarmason94da9192017-06-01 18:20:56314 if (opt->ignore_case) {
Ævar Arnfjörð Bjarmason44570182019-06-27 23:39:05315 if (!opt->ignore_locale && has_non_ascii(p->pattern)) {
Ævar Arnfjörð Bjarmasoncbe81e62021-02-18 00:07:27316 p->pcre2_tables = pcre2_maketables(p->pcre2_general_context);
317 p->pcre2_compile_context = pcre2_compile_context_create(p->pcre2_general_context);
Carlo Marcelo Arenas Belón10da0302019-10-16 12:10:24318 pcre2_set_character_tables(p->pcre2_compile_context,
319 p->pcre2_tables);
Ævar Arnfjörð Bjarmason94da9192017-06-01 18:20:56320 }
321 options |= PCRE2_CASELESS;
322 }
René Scharfe32e3e8b2021-12-18 19:53:15323 if (!opt->ignore_locale && is_utf8_locale() && !literal)
Carlo Marcelo Arenas Belónacabd202023-01-08 15:52:17324 options |= (PCRE2_UTF | PCRE2_UCP | PCRE2_MATCH_INVALID_UTF);
Ævar Arnfjörð Bjarmason95ca1f92021-01-24 17:28:13325
Mathias Krause14b9a042023-03-23 17:25:39326#ifndef GIT_PCRE2_VERSION_10_35_OR_HIGHER
327 /*
328 * Work around a JIT bug related to invalid Unicode character handling
329 * fixed in 10.35:
330 * https://github.com/PCRE2Project/pcre2/commit/c21bd977547d
331 */
332 options &= ~PCRE2_UCP;
333#endif
334
René Scharfe97169fc2022-02-17 21:14:29335#ifndef GIT_PCRE2_VERSION_10_36_OR_HIGHER
Ævar Arnfjörð Bjarmason95ca1f92021-01-24 17:28:13336 /* Work around https://bugs.exim.org/show_bug.cgi?id=2642 fixed in 10.36 */
Ævar Arnfjörð Bjarmason797c3592021-02-18 00:07:24337 if (PCRE2_MATCH_INVALID_UTF && options & (PCRE2_UTF | PCRE2_CASELESS))
338 options |= PCRE2_NO_START_OPTIMIZE;
339#endif
Ævar Arnfjörð Bjarmason94da9192017-06-01 18:20:56340
341 p->pcre2_pattern = pcre2_compile((PCRE2_SPTR)p->pattern,
342 p->patternlen, options, &error, &erroffset,
343 p->pcre2_compile_context);
344
345 if (p->pcre2_pattern) {
Ævar Arnfjörð Bjarmasoncbe81e62021-02-18 00:07:27346 p->pcre2_match_data = pcre2_match_data_create_from_pattern(p->pcre2_pattern, p->pcre2_general_context);
Ævar Arnfjörð Bjarmason94da9192017-06-01 18:20:56347 if (!p->pcre2_match_data)
348 die("Couldn't allocate PCRE2 match data");
349 } else {
350 pcre2_get_error_message(error, errbuf, sizeof(errbuf));
351 compile_regexp_failed(p, (const char *)&errbuf);
352 }
353
354 pcre2_config(PCRE2_CONFIG_JIT, &p->pcre2_jit_on);
Ævar Arnfjörð Bjarmason04bef502019-07-26 15:08:11355 if (p->pcre2_jit_on) {
Ævar Arnfjörð Bjarmason94da9192017-06-01 18:20:56356 jitret = pcre2_jit_compile(p->pcre2_pattern, PCRE2_JIT_COMPLETE);
Mathias Krause50b6ad52023-01-31 18:56:11357 if (jitret == PCRE2_ERROR_NOMEMORY && !pcre2_jit_functional()) {
358 /*
359 * Even though pcre2_config(PCRE2_CONFIG_JIT, ...)
360 * indicated JIT support, the library might still
361 * fail to generate JIT code for various reasons,
362 * e.g. when SELinux's 'deny_execmem' or PaX's
363 * MPROTECT prevent creating W|X memory mappings.
364 *
365 * Instead of faling hard, fall back to interpreter
366 * mode, just as if the pattern was prefixed with
367 * '(*NO_JIT)'.
368 */
369 p->pcre2_jit_on = 0;
370 return;
371 } else if (jitret) {
372 int need_clip = p->patternlen > 64;
373 int clip_len = need_clip ? 64 : p->patternlen;
374 die("Couldn't JIT the PCRE2 pattern '%.*s'%s, got '%d'%s",
375 clip_len, p->pattern, need_clip ? "..." : "", jitret,
376 pcre2_jit_functional()
377 ? "\nPerhaps prefix (*NO_JIT) to your pattern?"
378 : "");
379 }
Ævar Arnfjörð Bjarmasona25b9082017-11-23 14:16:58380
381 /*
382 * The pcre2_config(PCRE2_CONFIG_JIT, ...) call just
383 * tells us whether the library itself supports JIT,
384 * but to see whether we're going to be actually using
385 * JIT we need to extract PCRE2_INFO_JITSIZE from the
386 * pattern *after* we do pcre2_jit_compile() above.
387 *
388 * This is because if the pattern contains the
389 * (*NO_JIT) verb (see pcre2syntax(3))
390 * pcre2_jit_compile() will exit early with 0. If we
391 * then proceed to call pcre2_jit_match() further down
392 * the line instead of pcre2_match() we'll either
393 * segfault (pre PCRE 10.31) or run into a fatal error
394 * (post PCRE2 10.31)
395 */
396 patinforet = pcre2_pattern_info(p->pcre2_pattern, PCRE2_INFO_JITSIZE, &jitsizearg);
397 if (patinforet)
398 BUG("pcre2_pattern_info() failed: %d", patinforet);
399 if (jitsizearg == 0) {
400 p->pcre2_jit_on = 0;
401 return;
402 }
Ævar Arnfjörð Bjarmason94da9192017-06-01 18:20:56403 }
404}
405
406static int pcre2match(struct grep_pat *p, const char *line, const char *eol,
407 regmatch_t *match, int eflags)
408{
409 int ret, flags = 0;
410 PCRE2_SIZE *ovector;
411 PCRE2_UCHAR errbuf[256];
412
413 if (eflags & REG_NOTBOL)
414 flags |= PCRE2_NOTBOL;
415
416 if (p->pcre2_jit_on)
417 ret = pcre2_jit_match(p->pcre2_pattern, (unsigned char *)line,
418 eol - line, 0, flags, p->pcre2_match_data,
419 NULL);
420 else
421 ret = pcre2_match(p->pcre2_pattern, (unsigned char *)line,
422 eol - line, 0, flags, p->pcre2_match_data,
423 NULL);
424
425 if (ret < 0 && ret != PCRE2_ERROR_NOMATCH) {
426 pcre2_get_error_message(ret, errbuf, sizeof(errbuf));
427 die("%s failed with error code %d: %s",
428 (p->pcre2_jit_on ? "pcre2_jit_match" : "pcre2_match"), ret,
429 errbuf);
430 }
431 if (ret > 0) {
432 ovector = pcre2_get_ovector_pointer(p->pcre2_match_data);
433 ret = 0;
434 match->rm_so = (int)ovector[0];
435 match->rm_eo = (int)ovector[1];
436 }
437
438 return ret;
439}
440
441static void free_pcre2_pattern(struct grep_pat *p)
442{
443 pcre2_compile_context_free(p->pcre2_compile_context);
444 pcre2_code_free(p->pcre2_pattern);
445 pcre2_match_data_free(p->pcre2_match_data);
Ævar Arnfjörð Bjarmasonb76bf272021-02-18 00:07:25446#ifdef GIT_PCRE2_VERSION_10_34_OR_HIGHER
Ævar Arnfjörð Bjarmasoncbe81e62021-02-18 00:07:27447 pcre2_maketables_free(p->pcre2_general_context, p->pcre2_tables);
Ævar Arnfjörð Bjarmasonb76bf272021-02-18 00:07:25448#else
Carlo Marcelo Arenas Belón10da0302019-10-16 12:10:24449 free((void *)p->pcre2_tables);
Ævar Arnfjörð Bjarmasonb76bf272021-02-18 00:07:25450#endif
Ævar Arnfjörð Bjarmasoncbe81e62021-02-18 00:07:27451 pcre2_general_context_free(p->pcre2_general_context);
Ævar Arnfjörð Bjarmason94da9192017-06-01 18:20:56452}
453#else /* !USE_LIBPCRE2 */
Jeff King4548b012023-08-29 23:45:34454static void compile_pcre2_pattern(struct grep_pat *p UNUSED,
455 const struct grep_opt *opt UNUSED)
Ævar Arnfjörð Bjarmason94da9192017-06-01 18:20:56456{
Ævar Arnfjörð Bjarmason94da9192017-06-01 18:20:56457 die("cannot use Perl-compatible regexes when not compiled with USE_LIBPCRE");
458}
459
Jeff King4548b012023-08-29 23:45:34460static int pcre2match(struct grep_pat *p UNUSED, const char *line UNUSED,
461 const char *eol UNUSED, regmatch_t *match UNUSED,
462 int eflags UNUSED)
Ævar Arnfjörð Bjarmason94da9192017-06-01 18:20:56463{
464 return 1;
465}
466
Jeff King4548b012023-08-29 23:45:34467static void free_pcre2_pattern(struct grep_pat *p UNUSED)
Ævar Arnfjörð Bjarmason94da9192017-06-01 18:20:56468{
469}
Ævar Arnfjörð Bjarmason94da9192017-06-01 18:20:56470
Nguyễn Thái Ngọc Duy793dc672016-06-25 05:22:31471static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt)
472{
473 struct strbuf sb = STRBUF_INIT;
474 int err;
Ævar Arnfjörð Bjarmason1ceabab2017-06-29 22:22:22475 int regflags = 0;
Nguyễn Thái Ngọc Duy793dc672016-06-25 05:22:31476
477 basic_regex_quote_buf(&sb, p->pattern);
Nguyễn Thái Ngọc Duy793dc672016-06-25 05:22:31478 if (opt->ignore_case)
479 regflags |= REG_ICASE;
480 err = regcomp(&p->regexp, sb.buf, regflags);
Nguyễn Thái Ngọc Duy793dc672016-06-25 05:22:31481 strbuf_release(&sb);
482 if (err) {
483 char errbuf[1024];
484 regerror(err, &p->regexp, errbuf, sizeof(errbuf));
Nguyễn Thái Ngọc Duy793dc672016-06-25 05:22:31485 compile_regexp_failed(p, errbuf);
486 }
487}
Ævar Arnfjörð Bjarmasonb65abca2019-07-01 21:21:00488#endif /* !USE_LIBPCRE2 */
Nguyễn Thái Ngọc Duy793dc672016-06-25 05:22:31489
Junio C Hamano83b5d2f2006-09-17 23:02:52490static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
491{
René Scharfec8222552009-01-09 23:18:34492 int err;
Ævar Arnfjörð Bjarmason07a3d412017-06-29 22:22:21493 int regflags = REG_NEWLINE;
René Scharfec8222552009-01-09 23:18:34494
Ævar Arnfjörð Bjarmason04bf0522022-02-16 00:00:39495 if (opt->pattern_type_option == GREP_PATTERN_TYPE_UNSPECIFIED)
496 opt->pattern_type_option = (opt->extended_regexp_option
497 ? GREP_PATTERN_TYPE_ERE
498 : GREP_PATTERN_TYPE_BRE);
499
René Scharfed7eb5272009-03-07 12:28:40500 p->word_regexp = opt->word_regexp;
Brian Collins5183bf62009-11-06 09:22:35501 p->ignore_case = opt->ignore_case;
Ævar Arnfjörð Bjarmason04bf0522022-02-16 00:00:39502 p->fixed = opt->pattern_type_option == GREP_PATTERN_TYPE_FIXED;
René Scharfed7eb5272009-03-07 12:28:40503
Ævar Arnfjörð Bjarmason04bf0522022-02-16 00:00:39504 if (opt->pattern_type_option != GREP_PATTERN_TYPE_PCRE &&
Ævar Arnfjörð Bjarmasonae807d72022-02-16 00:00:38505 memchr(p->pattern, 0, p->patternlen))
Ævar Arnfjörð Bjarmason45d1f372019-07-01 21:20:58506 die(_("given pattern contains NULL byte (via -f <file>). This is only supported with -P under PCRE v2"));
Fredrik Kuivinen9ecedde2011-08-20 22:42:18507
Ævar Arnfjörð Bjarmason09872f62019-07-26 15:08:15508 p->is_fixed = is_fixed(p->pattern, p->patternlen);
Ævar Arnfjörð Bjarmason8a599982019-07-26 15:08:16509#ifdef USE_LIBPCRE2
510 if (!p->fixed && !p->is_fixed) {
511 const char *no_jit = "(*NO_JIT)";
512 const int no_jit_len = strlen(no_jit);
513 if (starts_with(p->pattern, no_jit) &&
514 is_fixed(p->pattern + no_jit_len,
515 p->patternlen - no_jit_len))
516 p->is_fixed = 1;
517 }
518#endif
Ævar Arnfjörð Bjarmason09872f62019-07-26 15:08:15519 if (p->fixed || p->is_fixed) {
Ævar Arnfjörð Bjarmasonb65abca2019-07-01 21:21:00520#ifdef USE_LIBPCRE2
Ævar Arnfjörð Bjarmason09872f62019-07-26 15:08:15521 if (p->is_fixed) {
Ævar Arnfjörð Bjarmasonb65abca2019-07-01 21:21:00522 compile_pcre2_pattern(p, opt);
523 } else {
524 /*
525 * E.g. t7811-grep-open.sh relies on the
526 * pattern being restored.
527 */
528 char *old_pattern = p->pattern;
529 size_t old_patternlen = p->patternlen;
530 struct strbuf sb = STRBUF_INIT;
531
532 /*
533 * There is the PCRE2_LITERAL flag, but it's
534 * only in PCRE v2 10.30 and later. Needing to
535 * ifdef our way around that and dealing with
536 * it + PCRE2_MULTILINE being an error is more
537 * complex than just quoting this ourselves.
538 */
539 strbuf_add(&sb, "\\Q", 2);
540 strbuf_add(&sb, p->pattern, p->patternlen);
541 strbuf_add(&sb, "\\E", 2);
542
543 p->pattern = sb.buf;
544 p->patternlen = sb.len;
545 compile_pcre2_pattern(p, opt);
546 p->pattern = old_pattern;
547 p->patternlen = old_patternlen;
548 strbuf_release(&sb);
549 }
550#else /* !USE_LIBPCRE2 */
Nguyễn Thái Ngọc Duy793dc672016-06-25 05:22:31551 compile_fixed_regexp(p, opt);
Ævar Arnfjörð Bjarmasonb65abca2019-07-01 21:21:00552#endif /* !USE_LIBPCRE2 */
Nguyễn Thái Ngọc Duy793dc672016-06-25 05:22:31553 return;
Fredrik Kuivinen9ecedde2011-08-20 22:42:18554 }
René Scharfec8222552009-01-09 23:18:34555
Ævar Arnfjörð Bjarmason04bf0522022-02-16 00:00:39556 if (opt->pattern_type_option == GREP_PATTERN_TYPE_PCRE) {
Ævar Arnfjörð Bjarmason94da9192017-06-01 18:20:56557 compile_pcre2_pattern(p, opt);
558 return;
559 }
560
Ævar Arnfjörð Bjarmason07a3d412017-06-29 22:22:21561 if (p->ignore_case)
562 regflags |= REG_ICASE;
Ævar Arnfjörð Bjarmason04bf0522022-02-16 00:00:39563 if (opt->pattern_type_option == GREP_PATTERN_TYPE_ERE)
Ævar Arnfjörð Bjarmason07a3d412017-06-29 22:22:21564 regflags |= REG_EXTENDED;
565 err = regcomp(&p->regexp, p->pattern, regflags);
Junio C Hamano83b5d2f2006-09-17 23:02:52566 if (err) {
567 char errbuf[1024];
Junio C Hamano83b5d2f2006-09-17 23:02:52568 regerror(err, &p->regexp, errbuf, 1024);
Michał Kiedrowicza30c1482011-05-09 21:52:04569 compile_regexp_failed(p, errbuf);
Junio C Hamano83b5d2f2006-09-17 23:02:52570 }
571}
572
René Scharfee2b15422022-01-06 09:54:19573static struct grep_expr *grep_not_expr(struct grep_expr *expr)
574{
575 struct grep_expr *z = xcalloc(1, sizeof(*z));
576 z->node = GREP_NODE_NOT;
577 z->u.unary = expr;
578 return z;
579}
580
Taylor Blauf2d27592022-01-06 19:50:12581static struct grep_expr *grep_binexp(enum grep_expr_node kind,
582 struct grep_expr *left,
583 struct grep_expr *right)
René Scharfe9dbf00b2022-01-06 09:51:00584{
585 struct grep_expr *z = xcalloc(1, sizeof(*z));
Taylor Blauf2d27592022-01-06 19:50:12586 z->node = kind;
René Scharfe9dbf00b2022-01-06 09:51:00587 z->u.binary.left = left;
588 z->u.binary.right = right;
589 return z;
590}
591
Taylor Blauf2d27592022-01-06 19:50:12592static struct grep_expr *grep_or_expr(struct grep_expr *left, struct grep_expr *right)
593{
594 return grep_binexp(GREP_NODE_OR, left, right);
595}
596
Taylor Blau0a6adc22022-01-06 19:50:15597static struct grep_expr *grep_and_expr(struct grep_expr *left, struct grep_expr *right)
598{
599 return grep_binexp(GREP_NODE_AND, left, right);
600}
601
Junio C Hamano0ab7bef2006-09-28 00:50:52602static struct grep_expr *compile_pattern_or(struct grep_pat **);
Junio C Hamano83b5d2f2006-09-17 23:02:52603static struct grep_expr *compile_pattern_atom(struct grep_pat **list)
604{
605 struct grep_pat *p;
606 struct grep_expr *x;
607
608 p = *list;
Linus Torvaldsc922b012009-04-27 18:10:24609 if (!p)
610 return NULL;
Junio C Hamano83b5d2f2006-09-17 23:02:52611 switch (p->token) {
612 case GREP_PATTERN: /* atom */
Junio C Hamano480c1ca2006-09-20 19:39:46613 case GREP_PATTERN_HEAD:
614 case GREP_PATTERN_BODY:
René Scharfeca56dad2021-03-13 16:17:22615 CALLOC_ARRAY(x, 1);
Junio C Hamano83b5d2f2006-09-17 23:02:52616 x->node = GREP_NODE_ATOM;
617 x->u.atom = p;
618 *list = p->next;
619 return x;
620 case GREP_OPEN_PAREN:
621 *list = p->next;
Junio C Hamano0ab7bef2006-09-28 00:50:52622 x = compile_pattern_or(list);
Junio C Hamano83b5d2f2006-09-17 23:02:52623 if (!*list || (*list)->token != GREP_CLOSE_PAREN)
624 die("unmatched parenthesis");
625 *list = (*list)->next;
626 return x;
627 default:
628 return NULL;
629 }
630}
631
632static struct grep_expr *compile_pattern_not(struct grep_pat **list)
633{
634 struct grep_pat *p;
635 struct grep_expr *x;
636
637 p = *list;
Linus Torvaldsc922b012009-04-27 18:10:24638 if (!p)
639 return NULL;
Junio C Hamano83b5d2f2006-09-17 23:02:52640 switch (p->token) {
641 case GREP_NOT:
642 if (!p->next)
643 die("--not not followed by pattern expression");
644 *list = p->next;
René Scharfee2b15422022-01-06 09:54:19645 x = compile_pattern_not(list);
646 if (!x)
Junio C Hamano83b5d2f2006-09-17 23:02:52647 die("--not followed by non pattern expression");
René Scharfee2b15422022-01-06 09:54:19648 return grep_not_expr(x);
Junio C Hamano83b5d2f2006-09-17 23:02:52649 default:
650 return compile_pattern_atom(list);
651 }
652}
653
654static struct grep_expr *compile_pattern_and(struct grep_pat **list)
655{
656 struct grep_pat *p;
Taylor Blau0a6adc22022-01-06 19:50:15657 struct grep_expr *x, *y;
Junio C Hamano83b5d2f2006-09-17 23:02:52658
659 x = compile_pattern_not(list);
660 p = *list;
661 if (p && p->token == GREP_AND) {
René Scharfefe7fe622021-06-30 16:12:43662 if (!x)
663 die("--and not preceded by pattern expression");
Junio C Hamano83b5d2f2006-09-17 23:02:52664 if (!p->next)
665 die("--and not followed by pattern expression");
666 *list = p->next;
667 y = compile_pattern_and(list);
668 if (!y)
669 die("--and not followed by pattern expression");
Taylor Blau0a6adc22022-01-06 19:50:15670 return grep_and_expr(x, y);
Junio C Hamano83b5d2f2006-09-17 23:02:52671 }
672 return x;
673}
674
675static struct grep_expr *compile_pattern_or(struct grep_pat **list)
676{
677 struct grep_pat *p;
René Scharfe9dbf00b2022-01-06 09:51:00678 struct grep_expr *x, *y;
Junio C Hamano83b5d2f2006-09-17 23:02:52679
680 x = compile_pattern_and(list);
681 p = *list;
682 if (x && p && p->token != GREP_CLOSE_PAREN) {
683 y = compile_pattern_or(list);
684 if (!y)
685 die("not a pattern expression %s", p->pattern);
René Scharfe9dbf00b2022-01-06 09:51:00686 return grep_or_expr(x, y);
Junio C Hamano83b5d2f2006-09-17 23:02:52687 }
688 return x;
689}
690
691static struct grep_expr *compile_pattern_expr(struct grep_pat **list)
692{
693 return compile_pattern_or(list);
694}
695
Junio C Hamano5aaeb732010-09-13 05:15:35696static struct grep_expr *grep_true_expr(void)
697{
698 struct grep_expr *z = xcalloc(1, sizeof(*z));
699 z->node = GREP_NODE_TRUE;
700 return z;
701}
702
Junio C Hamano95ce9ce2010-09-13 02:30:48703static struct grep_expr *prep_header_patterns(struct grep_opt *opt)
704{
705 struct grep_pat *p;
706 struct grep_expr *header_expr;
Junio C Hamano5aaeb732010-09-13 05:15:35707 struct grep_expr *(header_group[GREP_HEADER_FIELD_MAX]);
708 enum grep_header_field fld;
Junio C Hamano95ce9ce2010-09-13 02:30:48709
710 if (!opt->header_list)
711 return NULL;
Angus Hammond2385f242012-05-06 18:17:15712
Junio C Hamano95ce9ce2010-09-13 02:30:48713 for (p = opt->header_list; p; p = p->next) {
714 if (p->token != GREP_PATTERN_HEAD)
Johannes Schindelin033abf92018-05-02 09:38:39715 BUG("a non-header pattern in grep header list.");
Antoine Pelisse3ce3ffb2013-02-03 14:37:09716 if (p->field < GREP_HEADER_FIELD_MIN ||
717 GREP_HEADER_FIELD_MAX <= p->field)
Johannes Schindelin033abf92018-05-02 09:38:39718 BUG("unknown header field %d", p->field);
Junio C Hamano95ce9ce2010-09-13 02:30:48719 compile_regexp(p, opt);
720 }
Junio C Hamano5aaeb732010-09-13 05:15:35721
722 for (fld = 0; fld < GREP_HEADER_FIELD_MAX; fld++)
723 header_group[fld] = NULL;
724
725 for (p = opt->header_list; p; p = p->next) {
726 struct grep_expr *h;
727 struct grep_pat *pp = p;
728
729 h = compile_pattern_atom(&pp);
730 if (!h || pp != p->next)
Johannes Schindelin033abf92018-05-02 09:38:39731 BUG("malformed header expr");
Junio C Hamano5aaeb732010-09-13 05:15:35732 if (!header_group[p->field]) {
733 header_group[p->field] = h;
734 continue;
735 }
736 header_group[p->field] = grep_or_expr(h, header_group[p->field]);
737 }
738
739 header_expr = NULL;
740
741 for (fld = 0; fld < GREP_HEADER_FIELD_MAX; fld++) {
742 if (!header_group[fld])
743 continue;
744 if (!header_expr)
745 header_expr = grep_true_expr();
746 header_expr = grep_or_expr(header_group[fld], header_expr);
747 }
Junio C Hamano95ce9ce2010-09-13 02:30:48748 return header_expr;
749}
750
Junio C Hamano13e4fc72012-09-13 23:26:57751static struct grep_expr *grep_splice_or(struct grep_expr *x, struct grep_expr *y)
752{
753 struct grep_expr *z = x;
754
755 while (x) {
756 assert(x->node == GREP_NODE_OR);
757 if (x->u.binary.right &&
758 x->u.binary.right->node == GREP_NODE_TRUE) {
759 x->u.binary.right = y;
760 break;
761 }
762 x = x->u.binary.right;
763 }
764 return z;
765}
766
Ævar Arnfjörð Bjarmason15c96492021-01-25 23:36:51767void compile_grep_patterns(struct grep_opt *opt)
Junio C Hamano83b5d2f2006-09-17 23:02:52768{
769 struct grep_pat *p;
Junio C Hamano95ce9ce2010-09-13 02:30:48770 struct grep_expr *header_expr = prep_header_patterns(opt);
Ævar Arnfjörð Bjarmasondb843762022-10-11 09:48:45771 int extended = 0;
Junio C Hamano0ab7bef2006-09-28 00:50:52772
Junio C Hamano83b5d2f2006-09-17 23:02:52773 for (p = opt->pattern_list; p; p = p->next) {
Junio C Hamano480c1ca2006-09-20 19:39:46774 switch (p->token) {
775 case GREP_PATTERN: /* atom */
776 case GREP_PATTERN_HEAD:
777 case GREP_PATTERN_BODY:
René Scharfec8222552009-01-09 23:18:34778 compile_regexp(p, opt);
Junio C Hamano480c1ca2006-09-20 19:39:46779 break;
780 default:
Ævar Arnfjörð Bjarmasondb843762022-10-11 09:48:45781 extended = 1;
Junio C Hamano480c1ca2006-09-20 19:39:46782 break;
783 }
Junio C Hamano83b5d2f2006-09-17 23:02:52784 }
785
René Scharfe794c0002021-12-17 16:48:49786 if (opt->all_match || opt->no_body_match || header_expr)
Ævar Arnfjörð Bjarmasondb843762022-10-11 09:48:45787 extended = 1;
788 else if (!extended)
Junio C Hamano83b5d2f2006-09-17 23:02:52789 return;
790
Junio C Hamano83b5d2f2006-09-17 23:02:52791 p = opt->pattern_list;
Michele Ballabioba150a32009-03-18 20:53:27792 if (p)
793 opt->pattern_expression = compile_pattern_expr(&p);
Junio C Hamano83b5d2f2006-09-17 23:02:52794 if (p)
795 die("incomplete pattern expression: %s", p->pattern);
Junio C Hamano80235ba2010-01-18 04:09:06796
René Scharfe794c0002021-12-17 16:48:49797 if (opt->no_body_match && opt->pattern_expression)
798 opt->pattern_expression = grep_not_expr(opt->pattern_expression);
799
Junio C Hamano80235ba2010-01-18 04:09:06800 if (!header_expr)
801 return;
802
Junio C Hamano5aaeb732010-09-13 05:15:35803 if (!opt->pattern_expression)
Junio C Hamano80235ba2010-01-18 04:09:06804 opt->pattern_expression = header_expr;
Junio C Hamano13e4fc72012-09-13 23:26:57805 else if (opt->all_match)
806 opt->pattern_expression = grep_splice_or(header_expr,
807 opt->pattern_expression);
Junio C Hamano5aaeb732010-09-13 05:15:35808 else
809 opt->pattern_expression = grep_or_expr(opt->pattern_expression,
810 header_expr);
Junio C Hamano80235ba2010-01-18 04:09:06811 opt->all_match = 1;
Junio C Hamano83b5d2f2006-09-17 23:02:52812}
813
Junio C Hamanob48fb5b2006-09-27 23:27:10814static void free_pattern_expr(struct grep_expr *x)
815{
816 switch (x->node) {
Junio C Hamano5aaeb732010-09-13 05:15:35817 case GREP_NODE_TRUE:
Junio C Hamanob48fb5b2006-09-27 23:27:10818 case GREP_NODE_ATOM:
819 break;
820 case GREP_NODE_NOT:
821 free_pattern_expr(x->u.unary);
822 break;
823 case GREP_NODE_AND:
824 case GREP_NODE_OR:
825 free_pattern_expr(x->u.binary.left);
826 free_pattern_expr(x->u.binary.right);
827 break;
828 }
829 free(x);
830}
831
Ævar Arnfjörð Bjarmason891c9962023-02-06 23:07:50832static void free_grep_pat(struct grep_pat *pattern)
Junio C Hamanob48fb5b2006-09-27 23:27:10833{
834 struct grep_pat *p, *n;
835
Ævar Arnfjörð Bjarmason891c9962023-02-06 23:07:50836 for (p = pattern; p; p = n) {
Junio C Hamanob48fb5b2006-09-27 23:27:10837 n = p->next;
838 switch (p->token) {
839 case GREP_PATTERN: /* atom */
840 case GREP_PATTERN_HEAD:
841 case GREP_PATTERN_BODY:
Ævar Arnfjörð Bjarmason75997302021-01-24 01:58:33842 if (p->pcre2_pattern)
Ævar Arnfjörð Bjarmason94da9192017-06-01 18:20:56843 free_pcre2_pattern(p);
Michał Kiedrowicz63e7e9d2011-05-09 21:52:05844 else
845 regfree(&p->regexp);
René Scharfe526a8582012-05-20 14:33:07846 free(p->pattern);
Junio C Hamanob48fb5b2006-09-27 23:27:10847 break;
848 default:
849 break;
850 }
851 free(p);
852 }
Ævar Arnfjörð Bjarmason891c9962023-02-06 23:07:50853}
Junio C Hamanob48fb5b2006-09-27 23:27:10854
Ævar Arnfjörð Bjarmason891c9962023-02-06 23:07:50855void free_grep_patterns(struct grep_opt *opt)
856{
857 free_grep_pat(opt->pattern_list);
Ævar Arnfjörð Bjarmasonfb2ebe72023-02-06 23:07:51858 free_grep_pat(opt->header_list);
Ævar Arnfjörð Bjarmason891c9962023-02-06 23:07:50859
860 if (opt->pattern_expression)
861 free_pattern_expr(opt->pattern_expression);
Junio C Hamanob48fb5b2006-09-27 23:27:10862}
863
Jeff King1a845fb2021-09-21 03:49:49864static const char *end_of_line(const char *cp, unsigned long *left)
Junio C Hamano83b5d2f2006-09-17 23:02:52865{
866 unsigned long l = *left;
867 while (l && *cp != '\n') {
868 l--;
869 cp++;
870 }
871 *left = l;
872 return cp;
873}
874
875static int word_char(char ch)
876{
877 return isalnum(ch) || ch == '_';
878}
879
Mark Lodato55f638b2010-03-07 16:52:46880static void output_color(struct grep_opt *opt, const void *data, size_t size,
881 const char *color)
882{
Jeff Kingdaa0c3d2011-08-18 05:04:23883 if (want_color(opt->color) && color && color[0]) {
Mark Lodato55f638b2010-03-07 16:52:46884 opt->output(opt, color, strlen(color));
885 opt->output(opt, data, size);
886 opt->output(opt, GIT_COLOR_RESET, strlen(GIT_COLOR_RESET));
887 } else
888 opt->output(opt, data, size);
889}
890
891static void output_sep(struct grep_opt *opt, char sign)
892{
893 if (opt->null_following_name)
894 opt->output(opt, "\0", 1);
895 else
Nguyễn Thái Ngọc Duyfa151dc2018-05-26 13:55:22896 output_color(opt, &sign, 1, opt->colors[GREP_COLOR_SEP]);
Mark Lodato55f638b2010-03-07 16:52:46897}
898
Raphael Zimmerer83caecc2008-10-01 16:11:15899static void show_name(struct grep_opt *opt, const char *name)
900{
Nguyễn Thái Ngọc Duyfa151dc2018-05-26 13:55:22901 output_color(opt, name, strlen(name), opt->colors[GREP_COLOR_FILENAME]);
Fredrik Kuivinen5b594f42010-01-25 22:51:39902 opt->output(opt, opt->null_following_name ? "\0" : "\n", 1);
Raphael Zimmerer83caecc2008-10-01 16:11:15903}
904
Jeff King1a845fb2021-09-21 03:49:49905static int patmatch(struct grep_pat *p,
906 const char *line, const char *eol,
Michał Kiedrowicz97e77782011-05-04 22:00:19907 regmatch_t *match, int eflags)
908{
909 int hit;
910
Ævar Arnfjörð Bjarmason75997302021-01-24 01:58:33911 if (p->pcre2_pattern)
Ævar Arnfjörð Bjarmason94da9192017-06-01 18:20:56912 hit = !pcre2match(p, line, eol, match, eflags);
Michał Kiedrowicz97e77782011-05-04 22:00:19913 else
Johannes Schindelinb7d36ff2016-09-21 18:24:14914 hit = !regexec_buf(&p->regexp, line, eol - line, 1, match,
915 eflags);
Michał Kiedrowicz97e77782011-05-04 22:00:19916
917 return hit;
918}
919
Jeff King1a845fb2021-09-21 03:49:49920static void strip_timestamp(const char *bol, const char **eol_p)
Junio C Hamanoa4d7d2c2008-09-05 05:15:02921{
Jeff King1a845fb2021-09-21 03:49:49922 const char *eol = *eol_p;
Junio C Hamanoa4d7d2c2008-09-05 05:15:02923
924 while (bol < --eol) {
925 if (*eol != '>')
926 continue;
927 *eol_p = ++eol;
Jeff Kingcc8e26e2021-09-21 03:46:56928 break;
Junio C Hamanoa4d7d2c2008-09-05 05:15:02929 }
Junio C Hamanoa4d7d2c2008-09-05 05:15:02930}
931
932static struct {
933 const char *field;
934 size_t len;
935} header_field[] = {
936 { "author ", 7 },
937 { "committer ", 10 },
Nguyễn Thái Ngọc Duy72fd13f2012-09-29 04:41:28938 { "reflog ", 7 },
Junio C Hamanoa4d7d2c2008-09-05 05:15:02939};
940
Hamza Mahfooz3f566c42021-09-29 11:57:15941static int headerless_match_one_pattern(struct grep_pat *p,
942 const char *bol, const char *eol,
943 enum grep_context ctx,
944 regmatch_t *pmatch, int eflags)
Junio C Hamano83b5d2f2006-09-17 23:02:52945{
946 int hit = 0;
René Scharfee701fad2009-05-20 21:31:53947 const char *start = bol;
Junio C Hamano83b5d2f2006-09-17 23:02:52948
Junio C Hamano480c1ca2006-09-20 19:39:46949 if ((p->token != GREP_PATTERN) &&
950 ((p->token == GREP_PATTERN_HEAD) != (ctx == GREP_CONTEXT_HEAD)))
951 return 0;
952
Junio C Hamano83b5d2f2006-09-17 23:02:52953 again:
Michał Kiedrowicz97e77782011-05-04 22:00:19954 hit = patmatch(p, bol, eol, pmatch, eflags);
Junio C Hamano83b5d2f2006-09-17 23:02:52955
René Scharfed7eb5272009-03-07 12:28:40956 if (hit && p->word_regexp) {
Junio C Hamano83b5d2f2006-09-17 23:02:52957 if ((pmatch[0].rm_so < 0) ||
René Scharfe84201ea2009-06-03 16:19:01958 (eol - bol) < pmatch[0].rm_so ||
Junio C Hamano83b5d2f2006-09-17 23:02:52959 (pmatch[0].rm_eo < 0) ||
960 (eol - bol) < pmatch[0].rm_eo)
961 die("regexp returned nonsense");
962
963 /* Match beginning must be either beginning of the
964 * line, or at word boundary (i.e. the last char must
965 * not be a word char). Similarly, match end must be
966 * either end of the line, or at word boundary
967 * (i.e. the next char must not be a word char).
968 */
René Scharfefb62eb72009-01-09 23:08:40969 if ( ((pmatch[0].rm_so == 0) ||
Junio C Hamano83b5d2f2006-09-17 23:02:52970 !word_char(bol[pmatch[0].rm_so-1])) &&
971 ((pmatch[0].rm_eo == (eol-bol)) ||
972 !word_char(bol[pmatch[0].rm_eo])) )
973 ;
974 else
975 hit = 0;
976
René Scharfe84201ea2009-06-03 16:19:01977 /* Words consist of at least one character. */
978 if (pmatch->rm_so == pmatch->rm_eo)
979 hit = 0;
980
Junio C Hamano83b5d2f2006-09-17 23:02:52981 if (!hit && pmatch[0].rm_so + bol + 1 < eol) {
982 /* There could be more than one match on the
983 * line, and the first match might not be
984 * strict word match. But later ones could be!
René Scharfefb62eb72009-01-09 23:08:40985 * Forward to the next possible start, i.e. the
986 * next position following a non-word char.
Junio C Hamano83b5d2f2006-09-17 23:02:52987 */
988 bol = pmatch[0].rm_so + bol + 1;
René Scharfefb62eb72009-01-09 23:08:40989 while (word_char(bol[-1]) && bol < eol)
990 bol++;
René Scharfedbb6a4a2009-05-23 11:45:26991 eflags |= REG_NOTBOL;
René Scharfefb62eb72009-01-09 23:08:40992 if (bol < eol)
993 goto again;
Junio C Hamano83b5d2f2006-09-17 23:02:52994 }
995 }
René Scharfee701fad2009-05-20 21:31:53996 if (hit) {
997 pmatch[0].rm_so += bol - start;
998 pmatch[0].rm_eo += bol - start;
999 }
Junio C Hamano83b5d2f2006-09-17 23:02:521000 return hit;
1001}
1002
Hamza Mahfooz3f566c42021-09-29 11:57:151003static int match_one_pattern(struct grep_pat *p,
1004 const char *bol, const char *eol,
1005 enum grep_context ctx, regmatch_t *pmatch,
1006 int eflags)
1007{
1008 const char *field;
1009 size_t len;
1010
1011 if (p->token == GREP_PATTERN_HEAD) {
1012 assert(p->field < ARRAY_SIZE(header_field));
1013 field = header_field[p->field].field;
1014 len = header_field[p->field].len;
1015 if (strncmp(bol, field, len))
1016 return 0;
1017 bol += len;
1018
1019 switch (p->field) {
1020 case GREP_HEADER_AUTHOR:
1021 case GREP_HEADER_COMMITTER:
1022 strip_timestamp(bol, &eol);
1023 break;
1024 default:
1025 break;
1026 }
1027 }
1028
1029 return headerless_match_one_pattern(p, bol, eol, ctx, pmatch, eflags);
1030}
1031
1032
Jeff King1a845fb2021-09-21 03:49:491033static int match_expr_eval(struct grep_opt *opt, struct grep_expr *x,
1034 const char *bol, const char *eol,
1035 enum grep_context ctx, ssize_t *col,
Taylor Blau68d686e2018-06-22 15:49:351036 ssize_t *icol, int collect_hits)
Junio C Hamano83b5d2f2006-09-17 23:02:521037{
Junio C Hamano0ab7bef2006-09-28 00:50:521038 int h = 0;
Junio C Hamano0ab7bef2006-09-28 00:50:521039
Junio C Hamano83b5d2f2006-09-17 23:02:521040 switch (x->node) {
Junio C Hamano5aaeb732010-09-13 05:15:351041 case GREP_NODE_TRUE:
1042 h = 1;
1043 break;
Junio C Hamano83b5d2f2006-09-17 23:02:521044 case GREP_NODE_ATOM:
Taylor Blau68d686e2018-06-22 15:49:351045 {
1046 regmatch_t tmp;
1047 h = match_one_pattern(x->u.atom, bol, eol, ctx,
1048 &tmp, 0);
1049 if (h && (*col < 0 || tmp.rm_so < *col))
1050 *col = tmp.rm_so;
1051 }
René Scharfe794c0002021-12-17 16:48:491052 if (x->u.atom->token == GREP_PATTERN_BODY)
1053 opt->body_hit |= h;
Junio C Hamano83b5d2f2006-09-17 23:02:521054 break;
1055 case GREP_NODE_NOT:
Taylor Blau68d686e2018-06-22 15:49:351056 /*
1057 * Upon visiting a GREP_NODE_NOT, col and icol become swapped.
1058 */
1059 h = !match_expr_eval(opt, x->u.unary, bol, eol, ctx, icol, col,
1060 0);
Junio C Hamano0ab7bef2006-09-28 00:50:521061 break;
Junio C Hamano83b5d2f2006-09-17 23:02:521062 case GREP_NODE_AND:
Taylor Blau017c0fc2018-06-22 15:49:391063 h = match_expr_eval(opt, x->u.binary.left, bol, eol, ctx, col,
Taylor Blau68d686e2018-06-22 15:49:351064 icol, 0);
Taylor Blau017c0fc2018-06-22 15:49:391065 if (h || opt->columnnum) {
1066 /*
1067 * Don't short-circuit AND when given --column, since a
1068 * NOT earlier in the tree may turn this into an OR. In
1069 * this case, see the below comment.
1070 */
1071 h &= match_expr_eval(opt, x->u.binary.right, bol, eol,
1072 ctx, col, icol, 0);
1073 }
Junio C Hamano0ab7bef2006-09-28 00:50:521074 break;
Junio C Hamano83b5d2f2006-09-17 23:02:521075 case GREP_NODE_OR:
Taylor Blau017c0fc2018-06-22 15:49:391076 if (!(collect_hits || opt->columnnum)) {
1077 /*
1078 * Don't short-circuit OR when given --column (or
1079 * collecting hits) to ensure we don't skip a later
1080 * child that would produce an earlier match.
1081 */
Taylor Blau68d686e2018-06-22 15:49:351082 return (match_expr_eval(opt, x->u.binary.left, bol, eol,
1083 ctx, col, icol, 0) ||
1084 match_expr_eval(opt, x->u.binary.right, bol,
1085 eol, ctx, col, icol, 0));
Taylor Blau017c0fc2018-06-22 15:49:391086 }
Taylor Blau68d686e2018-06-22 15:49:351087 h = match_expr_eval(opt, x->u.binary.left, bol, eol, ctx, col,
1088 icol, 0);
Taylor Blau017c0fc2018-06-22 15:49:391089 if (collect_hits)
1090 x->u.binary.left->hit |= h;
Taylor Blau68d686e2018-06-22 15:49:351091 h |= match_expr_eval(opt, x->u.binary.right, bol, eol, ctx, col,
Taylor Blau017c0fc2018-06-22 15:49:391092 icol, collect_hits);
Junio C Hamano0ab7bef2006-09-28 00:50:521093 break;
1094 default:
Alexander Potashevd7530702009-01-04 18:38:411095 die("Unexpected node type (internal error) %d", x->node);
Junio C Hamano83b5d2f2006-09-17 23:02:521096 }
Junio C Hamano0ab7bef2006-09-28 00:50:521097 if (collect_hits)
1098 x->hit |= h;
1099 return h;
Junio C Hamano83b5d2f2006-09-17 23:02:521100}
1101
Jeff King1a845fb2021-09-21 03:49:491102static int match_expr(struct grep_opt *opt,
1103 const char *bol, const char *eol,
Taylor Blau68d686e2018-06-22 15:49:351104 enum grep_context ctx, ssize_t *col,
1105 ssize_t *icol, int collect_hits)
Junio C Hamano83b5d2f2006-09-17 23:02:521106{
1107 struct grep_expr *x = opt->pattern_expression;
Taylor Blau68d686e2018-06-22 15:49:351108 return match_expr_eval(opt, x, bol, eol, ctx, col, icol, collect_hits);
Junio C Hamano83b5d2f2006-09-17 23:02:521109}
1110
Jeff King1a845fb2021-09-21 03:49:491111static int match_line(struct grep_opt *opt,
1112 const char *bol, const char *eol,
Taylor Blau68d686e2018-06-22 15:49:351113 ssize_t *col, ssize_t *icol,
Junio C Hamano0ab7bef2006-09-28 00:50:521114 enum grep_context ctx, int collect_hits)
Junio C Hamano83b5d2f2006-09-17 23:02:521115{
1116 struct grep_pat *p;
Taylor Blau017c0fc2018-06-22 15:49:391117 int hit = 0;
René Scharfe79212772009-03-07 12:30:271118
Ævar Arnfjörð Bjarmasondb843762022-10-11 09:48:451119 if (opt->pattern_expression)
Taylor Blau68d686e2018-06-22 15:49:351120 return match_expr(opt, bol, eol, ctx, col, icol,
1121 collect_hits);
Junio C Hamano0ab7bef2006-09-28 00:50:521122
1123 /* we do not call with collect_hits without being extended */
Junio C Hamano83b5d2f2006-09-17 23:02:521124 for (p = opt->pattern_list; p; p = p->next) {
Taylor Blau68d686e2018-06-22 15:49:351125 regmatch_t tmp;
1126 if (match_one_pattern(p, bol, eol, ctx, &tmp, 0)) {
Taylor Blau017c0fc2018-06-22 15:49:391127 hit |= 1;
1128 if (!opt->columnnum) {
1129 /*
1130 * Without --column, any single match on a line
1131 * is enough to know that it needs to be
1132 * printed. With --column, scan _all_ patterns
1133 * to find the earliest.
1134 */
1135 break;
1136 }
1137 if (*col < 0 || tmp.rm_so < *col)
1138 *col = tmp.rm_so;
Taylor Blau68d686e2018-06-22 15:49:351139 }
Junio C Hamano83b5d2f2006-09-17 23:02:521140 }
Taylor Blau017c0fc2018-06-22 15:49:391141 return hit;
Junio C Hamano83b5d2f2006-09-17 23:02:521142}
1143
Jeff King1a845fb2021-09-21 03:49:491144static int match_next_pattern(struct grep_pat *p,
1145 const char *bol, const char *eol,
René Scharfe7e8f59d2009-03-07 12:32:321146 enum grep_context ctx,
1147 regmatch_t *pmatch, int eflags)
1148{
1149 regmatch_t match;
1150
Hamza Mahfooz3f566c42021-09-29 11:57:151151 if (!headerless_match_one_pattern(p, bol, eol, ctx, &match, eflags))
René Scharfe7e8f59d2009-03-07 12:32:321152 return 0;
1153 if (match.rm_so < 0 || match.rm_eo < 0)
1154 return 0;
1155 if (pmatch->rm_so >= 0 && pmatch->rm_eo >= 0) {
1156 if (match.rm_so > pmatch->rm_so)
1157 return 1;
1158 if (match.rm_so == pmatch->rm_so && match.rm_eo < pmatch->rm_eo)
1159 return 1;
1160 }
1161 pmatch->rm_so = match.rm_so;
1162 pmatch->rm_eo = match.rm_eo;
1163 return 1;
1164}
1165
Hamza Mahfooz3f566c42021-09-29 11:57:151166int grep_next_match(struct grep_opt *opt,
1167 const char *bol, const char *eol,
1168 enum grep_context ctx, regmatch_t *pmatch,
1169 enum grep_header_field field, int eflags)
René Scharfe7e8f59d2009-03-07 12:32:321170{
1171 struct grep_pat *p;
1172 int hit = 0;
1173
1174 pmatch->rm_so = pmatch->rm_eo = -1;
1175 if (bol < eol) {
Hamza Mahfooz3f566c42021-09-29 11:57:151176 for (p = ((ctx == GREP_CONTEXT_HEAD)
1177 ? opt->header_list : opt->pattern_list);
1178 p; p = p->next) {
René Scharfe7e8f59d2009-03-07 12:32:321179 switch (p->token) {
René Scharfe7e8f59d2009-03-07 12:32:321180 case GREP_PATTERN_HEAD:
Hamza Mahfooz3f566c42021-09-29 11:57:151181 if ((field != GREP_HEADER_FIELD_MAX) &&
1182 (p->field != field))
1183 continue;
1184 /* fall thru */
1185 case GREP_PATTERN: /* atom */
René Scharfe7e8f59d2009-03-07 12:32:321186 case GREP_PATTERN_BODY:
1187 hit |= match_next_pattern(p, bol, eol, ctx,
1188 pmatch, eflags);
1189 break;
1190 default:
1191 break;
1192 }
1193 }
1194 }
1195 return hit;
1196}
1197
Taylor Blauc707ded2018-07-03 21:51:561198static void show_line_header(struct grep_opt *opt, const char *name,
1199 unsigned lno, ssize_t cno, char sign)
René Scharfe7e8f59d2009-03-07 12:32:321200{
René Scharfe1d84f722011-06-05 15:24:361201 if (opt->heading && opt->last_shown == 0) {
Nguyễn Thái Ngọc Duyfa151dc2018-05-26 13:55:221202 output_color(opt, name, strlen(name), opt->colors[GREP_COLOR_FILENAME]);
René Scharfe1d84f722011-06-05 15:24:361203 opt->output(opt, "\n", 1);
1204 }
René Scharfe5dd06d32009-07-01 22:02:381205 opt->last_shown = lno;
1206
René Scharfe1d84f722011-06-05 15:24:361207 if (!opt->heading && opt->pathname) {
Nguyễn Thái Ngọc Duyfa151dc2018-05-26 13:55:221208 output_color(opt, name, strlen(name), opt->colors[GREP_COLOR_FILENAME]);
Mark Lodato55f638b2010-03-07 16:52:461209 output_sep(opt, sign);
Fredrik Kuivinen5b594f42010-01-25 22:51:391210 }
1211 if (opt->linenum) {
1212 char buf[32];
Jeff King1a168e52017-03-28 19:46:561213 xsnprintf(buf, sizeof(buf), "%d", lno);
Nguyễn Thái Ngọc Duyfa151dc2018-05-26 13:55:221214 output_color(opt, buf, strlen(buf), opt->colors[GREP_COLOR_LINENO]);
Mark Lodato55f638b2010-03-07 16:52:461215 output_sep(opt, sign);
Fredrik Kuivinen5b594f42010-01-25 22:51:391216 }
Taylor Blau89252cd2018-06-22 15:49:421217 /*
1218 * Treat 'cno' as the 1-indexed offset from the start of a non-context
1219 * line to its first match. Otherwise, 'cno' is 0 indicating that we are
1220 * being called with a context line.
1221 */
1222 if (opt->columnnum && cno) {
1223 char buf[32];
1224 xsnprintf(buf, sizeof(buf), "%"PRIuMAX, (uintmax_t)cno);
Junio C Hamanod036d662018-07-18 19:20:311225 output_color(opt, buf, strlen(buf), opt->colors[GREP_COLOR_COLUMNNO]);
Taylor Blau89252cd2018-06-22 15:49:421226 output_sep(opt, sign);
1227 }
Taylor Blauc707ded2018-07-03 21:51:561228}
1229
Jeff King1a845fb2021-09-21 03:49:491230static void show_line(struct grep_opt *opt,
1231 const char *bol, const char *eol,
Taylor Blauc707ded2018-07-03 21:51:561232 const char *name, unsigned lno, ssize_t cno, char sign)
1233{
1234 int rest = eol - bol;
Taylor Blau9d8db062018-07-09 20:33:471235 const char *match_color = NULL;
1236 const char *line_color = NULL;
Taylor Blauc707ded2018-07-03 21:51:561237
1238 if (opt->file_break && opt->last_shown == 0) {
1239 if (opt->show_hunk_mark)
1240 opt->output(opt, "\n", 1);
1241 } else if (opt->pre_context || opt->post_context || opt->funcbody) {
1242 if (opt->last_shown == 0) {
1243 if (opt->show_hunk_mark) {
Junio C Hamano87ece7c2018-08-02 22:30:441244 output_color(opt, "--", 2, opt->colors[GREP_COLOR_SEP]);
Taylor Blauc707ded2018-07-03 21:51:561245 opt->output(opt, "\n", 1);
1246 }
1247 } else if (lno > opt->last_shown + 1) {
Junio C Hamano87ece7c2018-08-02 22:30:441248 output_color(opt, "--", 2, opt->colors[GREP_COLOR_SEP]);
Taylor Blauc707ded2018-07-03 21:51:561249 opt->output(opt, "\n", 1);
1250 }
1251 }
Taylor Blau9d8db062018-07-09 20:33:471252 if (!opt->only_matching) {
1253 /*
1254 * In case the line we're being called with contains more than
1255 * one match, leave printing each header to the loop below.
1256 */
1257 show_line_header(opt, name, lno, cno, sign);
1258 }
1259 if (opt->color || opt->only_matching) {
René Scharfe7e8f59d2009-03-07 12:32:321260 regmatch_t match;
1261 enum grep_context ctx = GREP_CONTEXT_BODY;
René Scharfe7e8f59d2009-03-07 12:32:321262 int eflags = 0;
1263
Taylor Blau9d8db062018-07-09 20:33:471264 if (opt->color) {
1265 if (sign == ':')
Junio C Hamano87ece7c2018-08-02 22:30:441266 match_color = opt->colors[GREP_COLOR_MATCH_SELECTED];
Taylor Blau9d8db062018-07-09 20:33:471267 else
Junio C Hamano87ece7c2018-08-02 22:30:441268 match_color = opt->colors[GREP_COLOR_MATCH_CONTEXT];
Taylor Blau9d8db062018-07-09 20:33:471269 if (sign == ':')
Junio C Hamano87ece7c2018-08-02 22:30:441270 line_color = opt->colors[GREP_COLOR_SELECTED];
Taylor Blau9d8db062018-07-09 20:33:471271 else if (sign == '-')
Junio C Hamano87ece7c2018-08-02 22:30:441272 line_color = opt->colors[GREP_COLOR_CONTEXT];
Taylor Blau9d8db062018-07-09 20:33:471273 else if (sign == '=')
Junio C Hamano87ece7c2018-08-02 22:30:441274 line_color = opt->colors[GREP_COLOR_FUNCTION];
Taylor Blau9d8db062018-07-09 20:33:471275 }
Hamza Mahfooz3f566c42021-09-29 11:57:151276 while (grep_next_match(opt, bol, eol, ctx, &match,
1277 GREP_HEADER_FIELD_MAX, eflags)) {
René Scharfe1f5b9cc2009-06-01 21:53:051278 if (match.rm_so == match.rm_eo)
1279 break;
Fredrik Kuivinen5b594f42010-01-25 22:51:391280
Taylor Blau9d8db062018-07-09 20:33:471281 if (opt->only_matching)
1282 show_line_header(opt, name, lno, cno, sign);
1283 else
1284 output_color(opt, bol, match.rm_so, line_color);
Mark Lodato55f638b2010-03-07 16:52:461285 output_color(opt, bol + match.rm_so,
René Scharfe79a77102014-10-27 18:23:051286 match.rm_eo - match.rm_so, match_color);
Taylor Blau9d8db062018-07-09 20:33:471287 if (opt->only_matching)
1288 opt->output(opt, "\n", 1);
René Scharfe7e8f59d2009-03-07 12:32:321289 bol += match.rm_eo;
Taylor Blau9d8db062018-07-09 20:33:471290 cno += match.rm_eo;
René Scharfe7e8f59d2009-03-07 12:32:321291 rest -= match.rm_eo;
1292 eflags = REG_NOTBOL;
1293 }
René Scharfe7e8f59d2009-03-07 12:32:321294 }
Taylor Blau9d8db062018-07-09 20:33:471295 if (!opt->only_matching) {
1296 output_color(opt, bol, rest, line_color);
1297 opt->output(opt, "\n", 1);
1298 }
René Scharfe7e8f59d2009-03-07 12:32:321299}
1300
Jeff King78db6ea2012-02-02 08:18:291301int grep_use_locks;
1302
Thomas Rast0579f912011-12-12 21:16:071303/*
1304 * This lock protects access to the gitattributes machinery, which is
1305 * not thread-safe.
1306 */
1307pthread_mutex_t grep_attr_mutex;
1308
Jeff King78db6ea2012-02-02 08:18:291309static inline void grep_attr_lock(void)
Thomas Rast0579f912011-12-12 21:16:071310{
Jeff King78db6ea2012-02-02 08:18:291311 if (grep_use_locks)
Thomas Rast0579f912011-12-12 21:16:071312 pthread_mutex_lock(&grep_attr_mutex);
1313}
1314
Jeff King78db6ea2012-02-02 08:18:291315static inline void grep_attr_unlock(void)
Thomas Rast0579f912011-12-12 21:16:071316{
Jeff King78db6ea2012-02-02 08:18:291317 if (grep_use_locks)
Thomas Rast0579f912011-12-12 21:16:071318 pthread_mutex_unlock(&grep_attr_mutex);
1319}
Jeff Kingb3aeb282012-02-02 08:18:411320
Jeff King1a845fb2021-09-21 03:49:491321static int match_funcname(struct grep_opt *opt, struct grep_source *gs,
1322 const char *bol, const char *eol)
René Scharfe2944e4e2009-07-01 22:06:341323{
René Scharfe60ecac92009-07-01 22:07:241324 xdemitconf_t *xecfg = opt->priv;
Thomas Rast0579f912011-12-12 21:16:071325 if (xecfg && !xecfg->find_func) {
Nguyễn Thái Ngọc Duyacd00ea2018-09-21 15:57:331326 grep_source_load_driver(gs, opt->repo->index);
Jeff King94ad9d92012-02-02 08:20:431327 if (gs->driver->funcname.pattern) {
1328 const struct userdiff_funcname *pe = &gs->driver->funcname;
Thomas Rast0579f912011-12-12 21:16:071329 xdiff_set_find_func(xecfg, pe->pattern, pe->cflags);
1330 } else {
1331 xecfg = opt->priv = NULL;
1332 }
1333 }
1334
1335 if (xecfg) {
René Scharfe60ecac92009-07-01 22:07:241336 char buf[1];
1337 return xecfg->find_func(bol, eol - bol, buf, 1,
1338 xecfg->find_func_priv) >= 0;
1339 }
1340
René Scharfe2944e4e2009-07-01 22:06:341341 if (bol == eol)
1342 return 0;
1343 if (isalpha(*bol) || *bol == '_' || *bol == '$')
1344 return 1;
1345 return 0;
1346}
1347
Jeff Kinge1327022012-02-02 08:19:281348static void show_funcname_line(struct grep_opt *opt, struct grep_source *gs,
Jeff King1a845fb2021-09-21 03:49:491349 const char *bol, unsigned lno)
René Scharfe2944e4e2009-07-01 22:06:341350{
Jeff Kinge1327022012-02-02 08:19:281351 while (bol > gs->buf) {
Jeff King1a845fb2021-09-21 03:49:491352 const char *eol = --bol;
René Scharfe2944e4e2009-07-01 22:06:341353
Jeff Kinge1327022012-02-02 08:19:281354 while (bol > gs->buf && bol[-1] != '\n')
René Scharfe2944e4e2009-07-01 22:06:341355 bol--;
1356 lno--;
1357
1358 if (lno <= opt->last_shown)
1359 break;
1360
Jeff Kinge1327022012-02-02 08:19:281361 if (match_funcname(opt, gs, bol, eol)) {
Taylor Blau89252cd2018-06-22 15:49:421362 show_line(opt, bol, eol, gs->name, lno, 0, '=');
René Scharfe2944e4e2009-07-01 22:06:341363 break;
1364 }
1365 }
1366}
1367
René Scharfea5dc20b2017-11-18 18:08:081368static int is_empty_line(const char *bol, const char *eol);
1369
Jeff Kinge1327022012-02-02 08:19:281370static void show_pre_context(struct grep_opt *opt, struct grep_source *gs,
Jeff King1a845fb2021-09-21 03:49:491371 const char *bol, const char *end, unsigned lno)
René Scharfe49de3212009-07-01 22:05:171372{
René Scharfe6653a012017-11-18 18:07:131373 unsigned cur = lno, from = 1, funcname_lno = 0, orig_from;
René Scharfea5dc20b2017-11-18 18:08:081374 int funcname_needed = !!opt->funcname, comment_needed = 0;
René Scharfeba8ea742011-08-01 17:20:531375
René Scharfe49de3212009-07-01 22:05:171376 if (opt->pre_context < lno)
1377 from = lno - opt->pre_context;
1378 if (from <= opt->last_shown)
1379 from = opt->last_shown + 1;
René Scharfe6653a012017-11-18 18:07:131380 orig_from = from;
René Scharfea5dc20b2017-11-18 18:08:081381 if (opt->funcbody) {
1382 if (match_funcname(opt, gs, bol, end))
1383 comment_needed = 1;
1384 else
1385 funcname_needed = 1;
René Scharfe6653a012017-11-18 18:07:131386 from = opt->last_shown + 1;
1387 }
René Scharfe49de3212009-07-01 22:05:171388
1389 /* Rewind. */
René Scharfe6653a012017-11-18 18:07:131390 while (bol > gs->buf && cur > from) {
Jeff King1a845fb2021-09-21 03:49:491391 const char *next_bol = bol;
1392 const char *eol = --bol;
René Scharfe2944e4e2009-07-01 22:06:341393
Jeff Kinge1327022012-02-02 08:19:281394 while (bol > gs->buf && bol[-1] != '\n')
René Scharfe49de3212009-07-01 22:05:171395 bol--;
1396 cur--;
René Scharfea5dc20b2017-11-18 18:08:081397 if (comment_needed && (is_empty_line(bol, eol) ||
1398 match_funcname(opt, gs, bol, eol))) {
1399 comment_needed = 0;
1400 from = orig_from;
1401 if (cur < from) {
1402 cur++;
1403 bol = next_bol;
1404 break;
1405 }
1406 }
Jeff Kinge1327022012-02-02 08:19:281407 if (funcname_needed && match_funcname(opt, gs, bol, eol)) {
René Scharfe2944e4e2009-07-01 22:06:341408 funcname_lno = cur;
1409 funcname_needed = 0;
René Scharfea5dc20b2017-11-18 18:08:081410 if (opt->funcbody)
1411 comment_needed = 1;
1412 else
1413 from = orig_from;
René Scharfe2944e4e2009-07-01 22:06:341414 }
René Scharfe49de3212009-07-01 22:05:171415 }
1416
René Scharfe2944e4e2009-07-01 22:06:341417 /* We need to look even further back to find a function signature. */
1418 if (opt->funcname && funcname_needed)
Jeff Kinge1327022012-02-02 08:19:281419 show_funcname_line(opt, gs, bol, cur);
René Scharfe2944e4e2009-07-01 22:06:341420
René Scharfe49de3212009-07-01 22:05:171421 /* Back forward. */
1422 while (cur < lno) {
Jeff King1a845fb2021-09-21 03:49:491423 const char *eol = bol, sign = (cur == funcname_lno) ? '=' : '-';
René Scharfe49de3212009-07-01 22:05:171424
1425 while (*eol != '\n')
1426 eol++;
Taylor Blau89252cd2018-06-22 15:49:421427 show_line(opt, bol, eol, gs->name, cur, 0, sign);
René Scharfe49de3212009-07-01 22:05:171428 bol = eol + 1;
1429 cur++;
1430 }
1431}
1432
Junio C Hamanoa26345b2010-01-11 06:39:361433static int should_lookahead(struct grep_opt *opt)
1434{
1435 struct grep_pat *p;
1436
Ævar Arnfjörð Bjarmasondb843762022-10-11 09:48:451437 if (opt->pattern_expression)
Junio C Hamanoa26345b2010-01-11 06:39:361438 return 0; /* punt for too complex stuff */
1439 if (opt->invert)
1440 return 0;
1441 for (p = opt->pattern_list; p; p = p->next) {
1442 if (p->token != GREP_PATTERN)
1443 return 0; /* punt for "header only" and stuff */
1444 }
1445 return 1;
1446}
1447
1448static int look_ahead(struct grep_opt *opt,
1449 unsigned long *left_p,
1450 unsigned *lno_p,
Jeff King1a845fb2021-09-21 03:49:491451 const char **bol_p)
Junio C Hamanoa26345b2010-01-11 06:39:361452{
1453 unsigned lno = *lno_p;
Jeff King1a845fb2021-09-21 03:49:491454 const char *bol = *bol_p;
Junio C Hamanoa26345b2010-01-11 06:39:361455 struct grep_pat *p;
Jeff King1a845fb2021-09-21 03:49:491456 const char *sp, *last_bol;
Junio C Hamanoa26345b2010-01-11 06:39:361457 regoff_t earliest = -1;
1458
1459 for (p = opt->pattern_list; p; p = p->next) {
1460 int hit;
1461 regmatch_t m;
1462
Michał Kiedrowicz97e77782011-05-04 22:00:191463 hit = patmatch(p, bol, bol + *left_p, &m, 0);
Junio C Hamanoa26345b2010-01-11 06:39:361464 if (!hit || m.rm_so < 0 || m.rm_eo < 0)
1465 continue;
1466 if (earliest < 0 || m.rm_so < earliest)
1467 earliest = m.rm_so;
1468 }
1469
1470 if (earliest < 0) {
1471 *bol_p = bol + *left_p;
1472 *left_p = 0;
1473 return 1;
1474 }
1475 for (sp = bol + earliest; bol < sp && sp[-1] != '\n'; sp--)
1476 ; /* find the beginning of the line */
1477 last_bol = sp;
1478
1479 for (sp = bol; sp < last_bol; sp++) {
1480 if (*sp == '\n')
1481 lno++;
1482 }
1483 *left_p -= last_bol - bol;
1484 *bol_p = last_bol;
1485 *lno_p = lno;
1486 return 0;
1487}
1488
Nguyễn Thái Ngọc Duy38bbc2e2018-09-21 15:57:231489static int fill_textconv_grep(struct repository *r,
1490 struct userdiff_driver *driver,
Jeff King335ec3b2013-05-10 15:10:151491 struct grep_source *gs)
1492{
1493 struct diff_filespec *df;
1494 char *buf;
1495 size_t size;
1496
1497 if (!driver || !driver->textconv)
1498 return grep_source_load(gs);
1499
1500 /*
1501 * The textconv interface is intimately tied to diff_filespecs, so we
1502 * have to pretend to be one. If we could unify the grep_source
1503 * and diff_filespec structs, this mess could just go away.
1504 */
1505 df = alloc_filespec(gs->path);
1506 switch (gs->type) {
Brandon Williams1c41c822017-05-30 17:30:441507 case GREP_SOURCE_OID:
Jeff King335ec3b2013-05-10 15:10:151508 fill_filespec(df, gs->identifier, 1, 0100644);
1509 break;
1510 case GREP_SOURCE_FILE:
brian m. carlson14228442021-04-26 01:02:561511 fill_filespec(df, null_oid(), 0, 0100644);
Jeff King335ec3b2013-05-10 15:10:151512 break;
1513 default:
Johannes Schindelin033abf92018-05-02 09:38:391514 BUG("attempt to textconv something without a path?");
Jeff King335ec3b2013-05-10 15:10:151515 }
1516
1517 /*
Matheus Tavares1d1729c2020-01-16 02:39:541518 * fill_textconv is not remotely thread-safe; it modifies the global
1519 * diff tempfile structure, writes to the_repo's odb and might
1520 * internally call thread-unsafe functions such as the
1521 * prepare_packed_git() lazy-initializator. Because of the last two, we
1522 * must ensure mutual exclusion between this call and the object reading
1523 * API, thus we use obj_read_lock() here.
1524 *
1525 * TODO: allowing text conversion to run in parallel with object
1526 * reading operations might increase performance in the multithreaded
1527 * non-worktreee git-grep with --textconv.
Jeff King335ec3b2013-05-10 15:10:151528 */
Matheus Tavares1d1729c2020-01-16 02:39:541529 obj_read_lock();
Nguyễn Thái Ngọc Duy38bbc2e2018-09-21 15:57:231530 size = fill_textconv(r, driver, df, &buf);
Matheus Tavares1d1729c2020-01-16 02:39:541531 obj_read_unlock();
Jeff King335ec3b2013-05-10 15:10:151532 free_filespec(df);
1533
1534 /*
1535 * The normal fill_textconv usage by the diff machinery would just keep
1536 * the textconv'd buf separate from the diff_filespec. But much of the
1537 * grep code passes around a grep_source and assumes that its "buf"
1538 * pointer is the beginning of the thing we are searching. So let's
1539 * install our textconv'd version into the grep_source, taking care not
1540 * to leak any existing buffer.
1541 */
1542 grep_source_clear_data(gs);
1543 gs->buf = buf;
1544 gs->size = size;
1545
1546 return 0;
1547}
1548
René Scharfe4aa2c472016-05-28 15:06:191549static int is_empty_line(const char *bol, const char *eol)
1550{
1551 while (bol < eol && isspace(*bol))
1552 bol++;
1553 return bol == eol;
1554}
1555
Jeff Kinge1327022012-02-02 08:19:281556static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int collect_hits)
Junio C Hamano83b5d2f2006-09-17 23:02:521557{
Jeff King1a845fb2021-09-21 03:49:491558 const char *bol;
1559 const char *peek_bol = NULL;
Jeff Kinge1327022012-02-02 08:19:281560 unsigned long left;
Junio C Hamano83b5d2f2006-09-17 23:02:521561 unsigned lno = 1;
Junio C Hamano83b5d2f2006-09-17 23:02:521562 unsigned last_hit = 0;
Junio C Hamano83b5d2f2006-09-17 23:02:521563 int binary_match_only = 0;
Junio C Hamano83b5d2f2006-09-17 23:02:521564 unsigned count = 0;
Junio C Hamanoa26345b2010-01-11 06:39:361565 int try_lookahead = 0;
René Scharfeba8ea742011-08-01 17:20:531566 int show_function = 0;
Jeff King335ec3b2013-05-10 15:10:151567 struct userdiff_driver *textconv = NULL;
Junio C Hamano480c1ca2006-09-20 19:39:461568 enum grep_context ctx = GREP_CONTEXT_HEAD;
René Scharfe60ecac92009-07-01 22:07:241569 xdemitconf_t xecfg;
Junio C Hamano83b5d2f2006-09-17 23:02:521570
Emily Shafferde99eb02019-05-23 20:23:561571 if (!opt->status_only && gs->name == NULL)
1572 BUG("grep call which could print a name requires "
1573 "grep_source.name be non-NULL");
1574
Fredrik Kuivinen5b594f42010-01-25 22:51:391575 if (!opt->output)
1576 opt->output = std_output;
1577
René Scharfeba8ea742011-08-01 17:20:531578 if (opt->pre_context || opt->post_context || opt->file_break ||
1579 opt->funcbody) {
René Scharfe08303c32011-06-05 15:24:151580 /* Show hunk marks, except for the first file. */
1581 if (opt->last_shown)
1582 opt->show_hunk_mark = 1;
1583 /*
1584 * If we're using threads then we can't easily identify
1585 * the first file. Always put hunk marks in that case
1586 * and skip the very first one later in work_done().
1587 */
1588 if (opt->output != std_output)
1589 opt->show_hunk_mark = 1;
1590 }
René Scharfe431d6e72010-03-15 16:21:101591 opt->last_shown = 0;
1592
Jeff King335ec3b2013-05-10 15:10:151593 if (opt->allow_textconv) {
Nguyễn Thái Ngọc Duyacd00ea2018-09-21 15:57:331594 grep_source_load_driver(gs, opt->repo->index);
Jeff King335ec3b2013-05-10 15:10:151595 /*
1596 * We might set up the shared textconv cache data here, which
Matheus Tavares1d1729c2020-01-16 02:39:541597 * is not thread-safe. Also, get_oid_with_context() and
1598 * parse_object() might be internally called. As they are not
Steve Kemp84544f22020-07-29 03:33:281599 * currently thread-safe and might be racy with object reading,
Matheus Tavares1d1729c2020-01-16 02:39:541600 * obj_read_lock() must be called.
Jeff King335ec3b2013-05-10 15:10:151601 */
1602 grep_attr_lock();
Matheus Tavares1d1729c2020-01-16 02:39:541603 obj_read_lock();
Nguyễn Thái Ngọc Duybd7ad452018-11-10 05:49:061604 textconv = userdiff_get_textconv(opt->repo, gs->driver);
Matheus Tavares1d1729c2020-01-16 02:39:541605 obj_read_unlock();
Jeff King335ec3b2013-05-10 15:10:151606 grep_attr_unlock();
1607 }
1608
1609 /*
1610 * We know the result of a textconv is text, so we only have to care
1611 * about binary handling if we are not using it.
1612 */
1613 if (!textconv) {
1614 switch (opt->binary) {
1615 case GREP_BINARY_DEFAULT:
Nguyễn Thái Ngọc Duyacd00ea2018-09-21 15:57:331616 if (grep_source_is_binary(gs, opt->repo->index))
Jeff King335ec3b2013-05-10 15:10:151617 binary_match_only = 1;
1618 break;
1619 case GREP_BINARY_NOMATCH:
Nguyễn Thái Ngọc Duyacd00ea2018-09-21 15:57:331620 if (grep_source_is_binary(gs, opt->repo->index))
Jeff King335ec3b2013-05-10 15:10:151621 return 0; /* Assume unmatch */
1622 break;
1623 case GREP_BINARY_TEXT:
1624 break;
1625 default:
Johannes Schindelin033abf92018-05-02 09:38:391626 BUG("unknown binary handling mode");
Jeff King335ec3b2013-05-10 15:10:151627 }
Junio C Hamano83b5d2f2006-09-17 23:02:521628 }
1629
René Scharfe60ecac92009-07-01 22:07:241630 memset(&xecfg, 0, sizeof(xecfg));
Thomas Rast0579f912011-12-12 21:16:071631 opt->priv = &xecfg;
1632
Junio C Hamanoa26345b2010-01-11 06:39:361633 try_lookahead = should_lookahead(opt);
René Scharfe60ecac92009-07-01 22:07:241634
Nguyễn Thái Ngọc Duy38bbc2e2018-09-21 15:57:231635 if (fill_textconv_grep(opt->repo, textconv, gs) < 0)
Jeff King08265792012-02-02 08:21:111636 return 0;
1637
Jeff Kinge1327022012-02-02 08:19:281638 bol = gs->buf;
1639 left = gs->size;
Junio C Hamano83b5d2f2006-09-17 23:02:521640 while (left) {
Jeff King1a845fb2021-09-21 03:49:491641 const char *eol;
Junio C Hamano0ab7bef2006-09-28 00:50:521642 int hit;
Taylor Blau89252cd2018-06-22 15:49:421643 ssize_t cno;
Taylor Blau68d686e2018-06-22 15:49:351644 ssize_t col = -1, icol = -1;
Junio C Hamano83b5d2f2006-09-17 23:02:521645
Junio C Hamanoa26345b2010-01-11 06:39:361646 /*
Michał Kiedrowicz8997da32011-05-09 21:52:031647 * look_ahead() skips quickly to the line that possibly
Junio C Hamanoa26345b2010-01-11 06:39:361648 * has the next hit; don't call it if we need to do
1649 * something more than just skipping the current line
1650 * in response to an unmatch for the current line. E.g.
1651 * inside a post-context window, we will show the current
1652 * line as a context around the previous hit when it
1653 * doesn't hit.
1654 */
1655 if (try_lookahead
1656 && !(last_hit
René Scharfeba8ea742011-08-01 17:20:531657 && (show_function ||
1658 lno <= last_hit + opt->post_context))
Junio C Hamanoa26345b2010-01-11 06:39:361659 && look_ahead(opt, &left, &lno, &bol))
1660 break;
Junio C Hamano83b5d2f2006-09-17 23:02:521661 eol = end_of_line(bol, &left);
Junio C Hamano83b5d2f2006-09-17 23:02:521662
Junio C Hamano480c1ca2006-09-20 19:39:461663 if ((ctx == GREP_CONTEXT_HEAD) && (eol == bol))
1664 ctx = GREP_CONTEXT_BODY;
1665
Taylor Blau68d686e2018-06-22 15:49:351666 hit = match_line(opt, bol, eol, &col, &icol, ctx, collect_hits);
Junio C Hamano83b5d2f2006-09-17 23:02:521667
Junio C Hamano0ab7bef2006-09-28 00:50:521668 if (collect_hits)
1669 goto next_line;
1670
Junio C Hamano83b5d2f2006-09-17 23:02:521671 /* "grep -v -e foo -e bla" should list lines
1672 * that do not have either, so inversion should
1673 * be done outside.
1674 */
1675 if (opt->invert)
1676 hit = !hit;
1677 if (opt->unmatch_name_only) {
1678 if (hit)
1679 return 0;
1680 goto next_line;
1681 }
Carlos López68437ed2022-06-22 19:47:321682 if (hit && (opt->max_count < 0 || count < opt->max_count)) {
Junio C Hamano83b5d2f2006-09-17 23:02:521683 count++;
1684 if (opt->status_only)
1685 return 1;
René Scharfe321ffcc2010-05-22 21:30:481686 if (opt->name_only) {
Jeff Kinge1327022012-02-02 08:19:281687 show_name(opt, gs->name);
René Scharfe321ffcc2010-05-22 21:30:481688 return 1;
1689 }
René Scharfec30c10c2010-05-22 21:29:351690 if (opt->count)
1691 goto next_line;
Junio C Hamano83b5d2f2006-09-17 23:02:521692 if (binary_match_only) {
Fredrik Kuivinen5b594f42010-01-25 22:51:391693 opt->output(opt, "Binary file ", 12);
Jeff Kinge1327022012-02-02 08:19:281694 output_color(opt, gs->name, strlen(gs->name),
Nguyễn Thái Ngọc Duyfa151dc2018-05-26 13:55:221695 opt->colors[GREP_COLOR_FILENAME]);
Fredrik Kuivinen5b594f42010-01-25 22:51:391696 opt->output(opt, " matches\n", 9);
Junio C Hamano83b5d2f2006-09-17 23:02:521697 return 1;
1698 }
Junio C Hamano83b5d2f2006-09-17 23:02:521699 /* Hit at this line. If we haven't shown the
1700 * pre-context lines, we would need to show them.
Junio C Hamano83b5d2f2006-09-17 23:02:521701 */
René Scharfeba8ea742011-08-01 17:20:531702 if (opt->pre_context || opt->funcbody)
Jeff Kinge1327022012-02-02 08:19:281703 show_pre_context(opt, gs, bol, eol, lno);
René Scharfe2944e4e2009-07-01 22:06:341704 else if (opt->funcname)
Jeff Kinge1327022012-02-02 08:19:281705 show_funcname_line(opt, gs, bol, lno);
Taylor Blau89252cd2018-06-22 15:49:421706 cno = opt->invert ? icol : col;
1707 if (cno < 0) {
1708 /*
1709 * A negative cno indicates that there was no
1710 * match on the line. We are thus inverted and
1711 * being asked to show all lines that _don't_
1712 * match a given expression. Therefore, set cno
1713 * to 0 to suggest the whole line matches.
1714 */
1715 cno = 0;
1716 }
1717 show_line(opt, bol, eol, gs->name, lno, cno + 1, ':');
René Scharfe5dd06d32009-07-01 22:02:381718 last_hit = lno;
René Scharfeba8ea742011-08-01 17:20:531719 if (opt->funcbody)
1720 show_function = 1;
1721 goto next_line;
Junio C Hamano83b5d2f2006-09-17 23:02:521722 }
René Scharfe4aa2c472016-05-28 15:06:191723 if (show_function && (!peek_bol || peek_bol < bol)) {
1724 unsigned long peek_left = left;
Jeff King1a845fb2021-09-21 03:49:491725 const char *peek_eol = eol;
René Scharfe4aa2c472016-05-28 15:06:191726
1727 /*
1728 * Trailing empty lines are not interesting.
1729 * Peek past them to see if they belong to the
1730 * body of the current function.
1731 */
1732 peek_bol = bol;
1733 while (is_empty_line(peek_bol, peek_eol)) {
1734 peek_bol = peek_eol + 1;
1735 peek_eol = end_of_line(peek_bol, &peek_left);
1736 }
1737
1738 if (match_funcname(opt, gs, peek_bol, peek_eol))
1739 show_function = 0;
1740 }
René Scharfeba8ea742011-08-01 17:20:531741 if (show_function ||
1742 (last_hit && lno <= last_hit + opt->post_context)) {
Junio C Hamano83b5d2f2006-09-17 23:02:521743 /* If the last hit is within the post context,
1744 * we need to show this line.
1745 */
Taylor Blau89252cd2018-06-22 15:49:421746 show_line(opt, bol, eol, gs->name, lno, col + 1, '-');
Junio C Hamano83b5d2f2006-09-17 23:02:521747 }
Junio C Hamano83b5d2f2006-09-17 23:02:521748
1749 next_line:
1750 bol = eol + 1;
1751 if (!left)
1752 break;
1753 left--;
1754 lno++;
1755 }
1756
Junio C Hamano0ab7bef2006-09-28 00:50:521757 if (collect_hits)
1758 return 0;
Junio C Hamanob48fb5b2006-09-27 23:27:101759
Junio C Hamano83b5d2f2006-09-17 23:02:521760 if (opt->status_only)
Anthony Sottilee1f68c62017-08-18 01:38:511761 return opt->unmatch_name_only;
Junio C Hamano83b5d2f2006-09-17 23:02:521762 if (opt->unmatch_name_only) {
1763 /* We did not see any hit, so we want to show this */
Jeff Kinge1327022012-02-02 08:19:281764 show_name(opt, gs->name);
Junio C Hamano83b5d2f2006-09-17 23:02:521765 return 1;
1766 }
1767
René Scharfe60ecac92009-07-01 22:07:241768 xdiff_clear_find_func(&xecfg);
1769 opt->priv = NULL;
1770
Junio C Hamano83b5d2f2006-09-17 23:02:521771 /* NEEDSWORK:
1772 * The real "grep -c foo *.c" gives many "bar.c:0" lines,
1773 * which feels mostly useless but sometimes useful. Maybe
1774 * make it another option? For now suppress them.
1775 */
Fredrik Kuivinen5b594f42010-01-25 22:51:391776 if (opt->count && count) {
1777 char buf[32];
René Scharfef76d9472014-03-11 21:15:491778 if (opt->pathname) {
1779 output_color(opt, gs->name, strlen(gs->name),
Nguyễn Thái Ngọc Duyfa151dc2018-05-26 13:55:221780 opt->colors[GREP_COLOR_FILENAME]);
René Scharfef76d9472014-03-11 21:15:491781 output_sep(opt, ':');
1782 }
Jeff King1a168e52017-03-28 19:46:561783 xsnprintf(buf, sizeof(buf), "%u\n", count);
Fredrik Kuivinen5b594f42010-01-25 22:51:391784 opt->output(opt, buf, strlen(buf));
René Scharfec30c10c2010-05-22 21:29:351785 return 1;
Fredrik Kuivinen5b594f42010-01-25 22:51:391786 }
Junio C Hamano83b5d2f2006-09-17 23:02:521787 return !!last_hit;
1788}
1789
Junio C Hamano0ab7bef2006-09-28 00:50:521790static void clr_hit_marker(struct grep_expr *x)
1791{
1792 /* All-hit markers are meaningful only at the very top level
1793 * OR node.
1794 */
1795 while (1) {
1796 x->hit = 0;
1797 if (x->node != GREP_NODE_OR)
1798 return;
1799 x->u.binary.left->hit = 0;
1800 x = x->u.binary.right;
1801 }
1802}
1803
1804static int chk_hit_marker(struct grep_expr *x)
1805{
1806 /* Top level nodes have hit markers. See if they all are hits */
1807 while (1) {
1808 if (x->node != GREP_NODE_OR)
1809 return x->hit;
1810 if (!x->u.binary.left->hit)
1811 return 0;
1812 x = x->u.binary.right;
1813 }
1814}
1815
Jeff Kinge1327022012-02-02 08:19:281816int grep_source(struct grep_opt *opt, struct grep_source *gs)
Junio C Hamano0ab7bef2006-09-28 00:50:521817{
1818 /*
1819 * we do not have to do the two-pass grep when we do not check
1820 * buffer-wide "all-match".
1821 */
René Scharfe794c0002021-12-17 16:48:491822 if (!opt->all_match && !opt->no_body_match)
Jeff Kinge1327022012-02-02 08:19:281823 return grep_source_1(opt, gs, 0);
Junio C Hamano0ab7bef2006-09-28 00:50:521824
1825 /* Otherwise the toplevel "or" terms hit a bit differently.
1826 * We first clear hit markers from them.
1827 */
1828 clr_hit_marker(opt->pattern_expression);
René Scharfe794c0002021-12-17 16:48:491829 opt->body_hit = 0;
Jeff Kinge1327022012-02-02 08:19:281830 grep_source_1(opt, gs, 1);
Junio C Hamano0ab7bef2006-09-28 00:50:521831
René Scharfe794c0002021-12-17 16:48:491832 if (opt->all_match && !chk_hit_marker(opt->pattern_expression))
1833 return 0;
1834 if (opt->no_body_match && opt->body_hit)
Junio C Hamano0ab7bef2006-09-28 00:50:521835 return 0;
1836
Jeff Kinge1327022012-02-02 08:19:281837 return grep_source_1(opt, gs, 0);
1838}
1839
Jeff King1e668712021-09-21 03:51:281840static void grep_source_init_buf(struct grep_source *gs,
1841 const char *buf,
Jonathan Tan50d92b52021-08-16 21:09:531842 unsigned long size)
1843{
1844 gs->type = GREP_SOURCE_BUF;
1845 gs->name = NULL;
1846 gs->path = NULL;
1847 gs->buf = buf;
1848 gs->size = size;
1849 gs->driver = NULL;
1850 gs->identifier = NULL;
1851}
1852
Jeff King1e668712021-09-21 03:51:281853int grep_buffer(struct grep_opt *opt, const char *buf, unsigned long size)
Jeff Kinge1327022012-02-02 08:19:281854{
1855 struct grep_source gs;
1856 int r;
1857
Jonathan Tan50d92b52021-08-16 21:09:531858 grep_source_init_buf(&gs, buf, size);
Jeff Kinge1327022012-02-02 08:19:281859
1860 r = grep_source(opt, &gs);
1861
1862 grep_source_clear(&gs);
1863 return r;
1864}
1865
Jonathan Tan50d92b52021-08-16 21:09:531866void grep_source_init_file(struct grep_source *gs, const char *name,
1867 const char *path)
Jeff Kinge1327022012-02-02 08:19:281868{
Jonathan Tan50d92b52021-08-16 21:09:531869 gs->type = GREP_SOURCE_FILE;
Jeff King8c53f072015-01-13 01:59:091870 gs->name = xstrdup_or_null(name);
1871 gs->path = xstrdup_or_null(path);
Jeff Kinge1327022012-02-02 08:19:281872 gs->buf = NULL;
1873 gs->size = 0;
Jeff King94ad9d92012-02-02 08:20:431874 gs->driver = NULL;
Jonathan Tan50d92b52021-08-16 21:09:531875 gs->identifier = xstrdup(path);
1876}
Jeff Kinge1327022012-02-02 08:19:281877
Jonathan Tan50d92b52021-08-16 21:09:531878void grep_source_init_oid(struct grep_source *gs, const char *name,
Jonathan Tan06938062021-08-16 21:09:561879 const char *path, const struct object_id *oid,
1880 struct repository *repo)
Jonathan Tan50d92b52021-08-16 21:09:531881{
1882 gs->type = GREP_SOURCE_OID;
1883 gs->name = xstrdup_or_null(name);
1884 gs->path = xstrdup_or_null(path);
1885 gs->buf = NULL;
1886 gs->size = 0;
1887 gs->driver = NULL;
1888 gs->identifier = oiddup(oid);
Jonathan Tan06938062021-08-16 21:09:561889 gs->repo = repo;
Jeff Kinge1327022012-02-02 08:19:281890}
1891
1892void grep_source_clear(struct grep_source *gs)
1893{
Ævar Arnfjörð Bjarmason88ce3ef2017-06-15 23:15:491894 FREE_AND_NULL(gs->name);
1895 FREE_AND_NULL(gs->path);
1896 FREE_AND_NULL(gs->identifier);
Jeff Kinge1327022012-02-02 08:19:281897 grep_source_clear_data(gs);
1898}
1899
1900void grep_source_clear_data(struct grep_source *gs)
1901{
1902 switch (gs->type) {
1903 case GREP_SOURCE_FILE:
Brandon Williams1c41c822017-05-30 17:30:441904 case GREP_SOURCE_OID:
Jeff King1e668712021-09-21 03:51:281905 /* these types own the buffer */
1906 free((char *)gs->buf);
1907 gs->buf = NULL;
Jeff Kinge1327022012-02-02 08:19:281908 gs->size = 0;
1909 break;
1910 case GREP_SOURCE_BUF:
1911 /* leave user-provided buf intact */
1912 break;
1913 }
1914}
1915
Brandon Williams1c41c822017-05-30 17:30:441916static int grep_source_load_oid(struct grep_source *gs)
Jeff Kinge1327022012-02-02 08:19:281917{
1918 enum object_type type;
1919
Jonathan Tan06938062021-08-16 21:09:561920 gs->buf = repo_read_object_file(gs->repo, gs->identifier, &type,
1921 &gs->size);
Jeff Kinge1327022012-02-02 08:19:281922 if (!gs->buf)
1923 return error(_("'%s': unable to read %s"),
1924 gs->name,
Brandon Williams1c41c822017-05-30 17:30:441925 oid_to_hex(gs->identifier));
Jeff Kinge1327022012-02-02 08:19:281926 return 0;
1927}
1928
1929static int grep_source_load_file(struct grep_source *gs)
1930{
1931 const char *filename = gs->identifier;
1932 struct stat st;
1933 char *data;
1934 size_t size;
1935 int i;
1936
1937 if (lstat(filename, &st) < 0) {
1938 err_ret:
1939 if (errno != ENOENT)
Nguyễn Thái Ngọc Duy7645d8f2016-05-08 09:47:471940 error_errno(_("failed to stat '%s'"), filename);
Jeff Kinge1327022012-02-02 08:19:281941 return -1;
1942 }
1943 if (!S_ISREG(st.st_mode))
1944 return -1;
1945 size = xsize_t(st.st_size);
1946 i = open(filename, O_RDONLY);
1947 if (i < 0)
1948 goto err_ret;
Jeff King3733e692016-02-22 22:44:281949 data = xmallocz(size);
Jeff Kinge1327022012-02-02 08:19:281950 if (st.st_size != read_in_full(i, data, size)) {
Nguyễn Thái Ngọc Duy7645d8f2016-05-08 09:47:471951 error_errno(_("'%s': short read"), filename);
Jeff Kinge1327022012-02-02 08:19:281952 close(i);
1953 free(data);
1954 return -1;
1955 }
1956 close(i);
Jeff Kinge1327022012-02-02 08:19:281957
1958 gs->buf = data;
1959 gs->size = size;
1960 return 0;
1961}
1962
Junio C Hamano30833012012-09-20 21:20:091963static int grep_source_load(struct grep_source *gs)
Jeff Kinge1327022012-02-02 08:19:281964{
1965 if (gs->buf)
1966 return 0;
1967
1968 switch (gs->type) {
1969 case GREP_SOURCE_FILE:
1970 return grep_source_load_file(gs);
Brandon Williams1c41c822017-05-30 17:30:441971 case GREP_SOURCE_OID:
1972 return grep_source_load_oid(gs);
Jeff Kinge1327022012-02-02 08:19:281973 case GREP_SOURCE_BUF:
1974 return gs->buf ? 0 : -1;
Jeff Kinge1327022012-02-02 08:19:281975 }
Johannes Schindelin033abf92018-05-02 09:38:391976 BUG("invalid grep_source type to load");
Junio C Hamano0ab7bef2006-09-28 00:50:521977}
Jeff King94ad9d92012-02-02 08:20:431978
Nguyễn Thái Ngọc Duyacd00ea2018-09-21 15:57:331979void grep_source_load_driver(struct grep_source *gs,
1980 struct index_state *istate)
Jeff King94ad9d92012-02-02 08:20:431981{
1982 if (gs->driver)
1983 return;
1984
1985 grep_attr_lock();
Matheus Tavares1d1729c2020-01-16 02:39:541986 if (gs->path)
Nguyễn Thái Ngọc Duyacd00ea2018-09-21 15:57:331987 gs->driver = userdiff_find_by_path(istate, gs->path);
Jeff King94ad9d92012-02-02 08:20:431988 if (!gs->driver)
1989 gs->driver = userdiff_find_by_name("default");
1990 grep_attr_unlock();
1991}
Jeff King41b59bf2012-02-02 08:21:021992
Nguyễn Thái Ngọc Duyacd00ea2018-09-21 15:57:331993static int grep_source_is_binary(struct grep_source *gs,
1994 struct index_state *istate)
Jeff King41b59bf2012-02-02 08:21:021995{
Nguyễn Thái Ngọc Duyacd00ea2018-09-21 15:57:331996 grep_source_load_driver(gs, istate);
Jeff King41b59bf2012-02-02 08:21:021997 if (gs->driver->binary != -1)
1998 return gs->driver->binary;
1999
2000 if (!grep_source_load(gs))
2001 return buffer_is_binary(gs->buf, gs->size);
2002
2003 return 0;
2004}