Blame - blame.c - external/github.com/git/git

blob: 29770e5c81c0ad36cc18f8f17475ba7b2f509fb2 [file] [log] [blame]

Jeff Smith	072bf43	2017-05-24 05:15:34	[diff] [blame]	1	#include "cache.h"
				2	#include "refs.h"
Stefan Beller	cbd53a2	2018-05-15 23:42:15	[diff] [blame]	3	#include "object-store.h"
Jeff Smith	072bf43	2017-05-24 05:15:34	[diff] [blame]	4	#include "cache-tree.h"
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	5	#include "mergesort.h"
				6	#include "diff.h"
				7	#include "diffcore.h"
Jeff Smith	09002f1	2017-05-24 05:15:36	[diff] [blame]	8	#include "tag.h"
Jeff Smith	f5dd754	2017-05-24 05:15:33	[diff] [blame]	9	#include "blame.h"
Stefan Beller	14ba97f	2018-05-15 21:48:42	[diff] [blame]	10	#include "alloc.h"
Nguyễn Thái Ngọc Duy	4e0df4e	2018-05-19 05:28:19	[diff] [blame]	11	#include "commit-slab.h"
				12
				13	define_commit_slab(blame_suspects, struct blame_origin *);
				14	static struct blame_suspects blame_suspects;
				15
				16	struct blame_origin get_blame_suspects(struct commit commit)
				17	{
				18	struct blame_origin **result;
				19
				20	result = blame_suspects_peek(&blame_suspects, commit);
				21
				22	return result ? *result : NULL;
				23	}
				24
				25	static void set_blame_suspects(struct commit commit, struct blame_origin origin)
				26	{
				27	*blame_suspects_at(&blame_suspects, commit) = origin;
				28	}
Jeff Smith	f5dd754	2017-05-24 05:15:33	[diff] [blame]	29
				30	void blame_origin_decref(struct blame_origin *o)
				31	{
				32	if (o && --o->refcnt <= 0) {
				33	struct blame_origin p, l = NULL;
				34	if (o->previous)
				35	blame_origin_decref(o->previous);
				36	free(o->file.ptr);
				37	/* Should be present exactly once in commit chain */
Nguyễn Thái Ngọc Duy	4e0df4e	2018-05-19 05:28:19	[diff] [blame]	38	for (p = get_blame_suspects(o->commit); p; l = p, p = p->next) {
Jeff Smith	f5dd754	2017-05-24 05:15:33	[diff] [blame]	39	if (p == o) {
				40	if (l)
				41	l->next = p->next;
				42	else
Nguyễn Thái Ngọc Duy	4e0df4e	2018-05-19 05:28:19	[diff] [blame]	43	set_blame_suspects(o->commit, p->next);
Jeff Smith	f5dd754	2017-05-24 05:15:33	[diff] [blame]	44	free(o);
				45	return;
				46	}
				47	}
				48	die("internal error in blame_origin_decref");
				49	}
				50	}
				51
				52	/*
				53	* Given a commit and a path in it, create a new origin structure.
				54	* The callers that add blame to the scoreboard should use
				55	* get_origin() to obtain shared, refcounted copy instead of calling
				56	* this function directly.
				57	*/
Jeff Smith	072bf43	2017-05-24 05:15:34	[diff] [blame]	58	static struct blame_origin make_origin(struct commit commit, const char *path)
Jeff Smith	f5dd754	2017-05-24 05:15:33	[diff] [blame]	59	{
				60	struct blame_origin *o;
				61	FLEX_ALLOC_STR(o, path, path);
				62	o->commit = commit;
				63	o->refcnt = 1;
Nguyễn Thái Ngọc Duy	4e0df4e	2018-05-19 05:28:19	[diff] [blame]	64	o->next = get_blame_suspects(commit);
				65	set_blame_suspects(commit, o);
Jeff Smith	f5dd754	2017-05-24 05:15:33	[diff] [blame]	66	return o;
				67	}
				68
				69	/*
				70	* Locate an existing origin or create a new one.
				71	* This moves the origin to front position in the commit util list.
				72	*/
Jeff Smith	09002f1	2017-05-24 05:15:36	[diff] [blame]	73	static struct blame_origin get_origin(struct commit commit, const char *path)
Jeff Smith	f5dd754	2017-05-24 05:15:33	[diff] [blame]	74	{
				75	struct blame_origin o, l;
				76
Nguyễn Thái Ngọc Duy	4e0df4e	2018-05-19 05:28:19	[diff] [blame]	77	for (o = get_blame_suspects(commit), l = NULL; o; l = o, o = o->next) {
Jeff Smith	f5dd754	2017-05-24 05:15:33	[diff] [blame]	78	if (!strcmp(o->path, path)) {
				79	/* bump to front */
				80	if (l) {
				81	l->next = o->next;
Nguyễn Thái Ngọc Duy	4e0df4e	2018-05-19 05:28:19	[diff] [blame]	82	o->next = get_blame_suspects(commit);
				83	set_blame_suspects(commit, o);
Jeff Smith	f5dd754	2017-05-24 05:15:33	[diff] [blame]	84	}
				85	return blame_origin_incref(o);
				86	}
				87	}
				88	return make_origin(commit, path);
				89	}
Jeff Smith	072bf43	2017-05-24 05:15:34	[diff] [blame]	90
				91
				92
Nguyễn Thái Ngọc Duy	a470bee	2018-09-21 15:57:21	[diff] [blame]	93	static void verify_working_tree_path(struct repository *r,
Nguyễn Thái Ngọc Duy	ecbbc0a	2018-08-13 16:14:41	[diff] [blame]	94	struct commit work_tree, const char path)
Jeff Smith	072bf43	2017-05-24 05:15:34	[diff] [blame]	95	{
				96	struct commit_list *parents;
				97	int pos;
				98
				99	for (parents = work_tree->parents; parents; parents = parents->next) {
				100	const struct object_id *commit_oid = &parents->item->object.oid;
				101	struct object_id blob_oid;
Elijah Newren	5ec1e72	2019-04-05 15:00:12	[diff] [blame]	102	unsigned short mode;
Jeff Smith	072bf43	2017-05-24 05:15:34	[diff] [blame]	103
Nguyễn Thái Ngọc Duy	50ddb08	2019-06-27 09:28:49	[diff] [blame]	104	if (!get_tree_entry(r, commit_oid, path, &blob_oid, &mode) &&
Nguyễn Thái Ngọc Duy	a470bee	2018-09-21 15:57:21	[diff] [blame]	105	oid_object_info(r, &blob_oid, NULL) == OBJ_BLOB)
Jeff Smith	072bf43	2017-05-24 05:15:34	[diff] [blame]	106	return;
				107	}
				108
Nguyễn Thái Ngọc Duy	a470bee	2018-09-21 15:57:21	[diff] [blame]	109	pos = index_name_pos(r->index, path, strlen(path));
Jeff Smith	072bf43	2017-05-24 05:15:34	[diff] [blame]	110	if (pos >= 0)
				111	; /* path is in the index */
Nguyễn Thái Ngọc Duy	a470bee	2018-09-21 15:57:21	[diff] [blame]	112	else if (-1 - pos < r->index->cache_nr &&
				113	!strcmp(r->index->cache[-1 - pos]->name, path))
Jeff Smith	072bf43	2017-05-24 05:15:34	[diff] [blame]	114	; /* path is in the index, unmerged */
				115	else
				116	die("no such path '%s' in HEAD", path);
				117	}
				118
Nguyễn Thái Ngọc Duy	fb998ea	2018-11-10 05:48:58	[diff] [blame]	119	static struct commit_list *append_parent(struct repository r,
				120	struct commit_list **tail,
				121	const struct object_id *oid)
Jeff Smith	072bf43	2017-05-24 05:15:34	[diff] [blame]	122	{
				123	struct commit *parent;
				124
Nguyễn Thái Ngọc Duy	fb998ea	2018-11-10 05:48:58	[diff] [blame]	125	parent = lookup_commit_reference(r, oid);
Jeff Smith	072bf43	2017-05-24 05:15:34	[diff] [blame]	126	if (!parent)
				127	die("no such commit %s", oid_to_hex(oid));
				128	return &commit_list_insert(parent, tail)->next;
				129	}
				130
Nguyễn Thái Ngọc Duy	fb998ea	2018-11-10 05:48:58	[diff] [blame]	131	static void append_merge_parents(struct repository *r,
				132	struct commit_list **tail)
Jeff Smith	072bf43	2017-05-24 05:15:34	[diff] [blame]	133	{
				134	int merge_head;
				135	struct strbuf line = STRBUF_INIT;
				136
Nguyễn Thái Ngọc Duy	fb998ea	2018-11-10 05:48:58	[diff] [blame]	137	merge_head = open(git_path_merge_head(r), O_RDONLY);
Jeff Smith	072bf43	2017-05-24 05:15:34	[diff] [blame]	138	if (merge_head < 0) {
				139	if (errno == ENOENT)
				140	return;
Stefan Beller	102de88	2018-05-17 22:51:51	[diff] [blame]	141	die("cannot open '%s' for reading",
Nguyễn Thái Ngọc Duy	fb998ea	2018-11-10 05:48:58	[diff] [blame]	142	git_path_merge_head(r));
Jeff Smith	072bf43	2017-05-24 05:15:34	[diff] [blame]	143	}
				144
				145	while (!strbuf_getwholeline_fd(&line, merge_head, '\n')) {
				146	struct object_id oid;
brian m. carlson	fee4930	2019-08-18 20:04:08	[diff] [blame]	147	if (get_oid_hex(line.buf, &oid))
Stefan Beller	102de88	2018-05-17 22:51:51	[diff] [blame]	148	die("unknown line in '%s': %s",
Nguyễn Thái Ngọc Duy	fb998ea	2018-11-10 05:48:58	[diff] [blame]	149	git_path_merge_head(r), line.buf);
				150	tail = append_parent(r, tail, &oid);
Jeff Smith	072bf43	2017-05-24 05:15:34	[diff] [blame]	151	}
				152	close(merge_head);
				153	strbuf_release(&line);
				154	}
				155
				156	/*
				157	* This isn't as simple as passing sb->buf and sb->len, because we
				158	* want to transfer ownership of the buffer to the commit (so we
				159	* must use detach).
				160	*/
Nguyễn Thái Ngọc Duy	fb998ea	2018-11-10 05:48:58	[diff] [blame]	161	static void set_commit_buffer_from_strbuf(struct repository *r,
				162	struct commit *c,
				163	struct strbuf *sb)
Jeff Smith	072bf43	2017-05-24 05:15:34	[diff] [blame]	164	{
				165	size_t len;
				166	void *buf = strbuf_detach(sb, &len);
Nguyễn Thái Ngọc Duy	fb998ea	2018-11-10 05:48:58	[diff] [blame]	167	set_commit_buffer(r, c, buf, len);
Jeff Smith	072bf43	2017-05-24 05:15:34	[diff] [blame]	168	}
				169
				170	/*
				171	* Prepare a dummy commit that represents the work tree (or staged) item.
				172	* Note that annotating work tree item never works in the reverse.
				173	*/
Nguyễn Thái Ngọc Duy	a470bee	2018-09-21 15:57:21	[diff] [blame]	174	static struct commit fake_working_tree_commit(struct repository r,
Nguyễn Thái Ngọc Duy	ecbbc0a	2018-08-13 16:14:41	[diff] [blame]	175	struct diff_options *opt,
Jeff Smith	09002f1	2017-05-24 05:15:36	[diff] [blame]	176	const char *path,
				177	const char *contents_from)
Jeff Smith	072bf43	2017-05-24 05:15:34	[diff] [blame]	178	{
				179	struct commit *commit;
				180	struct blame_origin *origin;
				181	struct commit_list *parent_tail, parent;
				182	struct object_id head_oid;
				183	struct strbuf buf = STRBUF_INIT;
				184	const char *ident;
				185	time_t now;
Jameson Miller	a849735	2018-07-02 19:49:31	[diff] [blame]	186	int len;
Jeff Smith	072bf43	2017-05-24 05:15:34	[diff] [blame]	187	struct cache_entry *ce;
				188	unsigned mode;
				189	struct strbuf msg = STRBUF_INIT;
				190
Nguyễn Thái Ngọc Duy	e1ff0a3	2019-01-12 02:13:26	[diff] [blame]	191	repo_read_index(r);
Jeff Smith	072bf43	2017-05-24 05:15:34	[diff] [blame]	192	time(&now);
Nguyễn Thái Ngọc Duy	fb998ea	2018-11-10 05:48:58	[diff] [blame]	193	commit = alloc_commit_node(r);
Jeff Smith	072bf43	2017-05-24 05:15:34	[diff] [blame]	194	commit->object.parsed = 1;
				195	commit->date = now;
				196	parent_tail = &commit->parents;
				197
brian m. carlson	49e6147	2017-10-15 22:07:09	[diff] [blame]	198	if (!resolve_ref_unsafe("HEAD", RESOLVE_REF_READING, &head_oid, NULL))
Jeff Smith	072bf43	2017-05-24 05:15:34	[diff] [blame]	199	die("no such ref: HEAD");
				200
Nguyễn Thái Ngọc Duy	fb998ea	2018-11-10 05:48:58	[diff] [blame]	201	parent_tail = append_parent(r, parent_tail, &head_oid);
				202	append_merge_parents(r, parent_tail);
Nguyễn Thái Ngọc Duy	a470bee	2018-09-21 15:57:21	[diff] [blame]	203	verify_working_tree_path(r, commit, path);
Jeff Smith	072bf43	2017-05-24 05:15:34	[diff] [blame]	204
				205	origin = make_origin(commit, path);
				206
William Hubbs	39ab4d0	2019-02-04 18:48:50	[diff] [blame]	207	ident = fmt_ident("Not Committed Yet", "not.committed.yet",
				208	WANT_BLANK_IDENT, NULL, 0);
Jeff Smith	072bf43	2017-05-24 05:15:34	[diff] [blame]	209	strbuf_addstr(&msg, "tree 0000000000000000000000000000000000000000\n");
				210	for (parent = commit->parents; parent; parent = parent->next)
				211	strbuf_addf(&msg, "parent %s\n",
				212	oid_to_hex(&parent->item->object.oid));
				213	strbuf_addf(&msg,
				214	"author %s\n"
				215	"committer %s\n\n"
				216	"Version of %s from %s\n",
				217	ident, ident, path,
				218	(!contents_from ? path :
				219	(!strcmp(contents_from, "-") ? "standard input" : contents_from)));
Nguyễn Thái Ngọc Duy	fb998ea	2018-11-10 05:48:58	[diff] [blame]	220	set_commit_buffer_from_strbuf(r, commit, &msg);
Jeff Smith	072bf43	2017-05-24 05:15:34	[diff] [blame]	221
				222	if (!contents_from \|\| strcmp("-", contents_from)) {
				223	struct stat st;
				224	const char *read_from;
				225	char *buf_ptr;
				226	unsigned long buf_len;
				227
				228	if (contents_from) {
				229	if (stat(contents_from, &st) < 0)
				230	die_errno("Cannot stat '%s'", contents_from);
				231	read_from = contents_from;
				232	}
				233	else {
				234	if (lstat(path, &st) < 0)
				235	die_errno("Cannot lstat '%s'", path);
				236	read_from = path;
				237	}
				238	mode = canon_mode(st.st_mode);
				239
				240	switch (st.st_mode & S_IFMT) {
				241	case S_IFREG:
Brandon Williams	0d1e0e7	2017-10-31 18:19:11	[diff] [blame]	242	if (opt->flags.allow_textconv &&
Nguyễn Thái Ngọc Duy	6afaf80	2018-09-21 15:57:22	[diff] [blame]	243	textconv_object(r, read_from, mode, &null_oid, 0, &buf_ptr, &buf_len))
Jeff Smith	072bf43	2017-05-24 05:15:34	[diff] [blame]	244	strbuf_attach(&buf, buf_ptr, buf_len, buf_len + 1);
				245	else if (strbuf_read_file(&buf, read_from, st.st_size) != st.st_size)
				246	die_errno("cannot open or read '%s'", read_from);
				247	break;
				248	case S_IFLNK:
				249	if (strbuf_readlink(&buf, read_from, st.st_size) < 0)
				250	die_errno("cannot readlink '%s'", read_from);
				251	break;
				252	default:
				253	die("unsupported file type %s", read_from);
				254	}
				255	}
				256	else {
				257	/* Reading from stdin */
				258	mode = 0;
				259	if (strbuf_read(&buf, 0, 0) < 0)
				260	die_errno("failed to read from stdin");
				261	}
Nguyễn Thái Ngọc Duy	a470bee	2018-09-21 15:57:21	[diff] [blame]	262	convert_to_git(r->index, path, buf.buf, buf.len, &buf, 0);
Jeff Smith	072bf43	2017-05-24 05:15:34	[diff] [blame]	263	origin->file.ptr = buf.buf;
				264	origin->file.size = buf.len;
Patryk Obara	829e5c3	2018-01-28 00:13:11	[diff] [blame]	265	pretend_object_file(buf.buf, buf.len, OBJ_BLOB, &origin->blob_oid);
Jeff Smith	072bf43	2017-05-24 05:15:34	[diff] [blame]	266
				267	/*
				268	* Read the current index, replace the path entry with
				269	* origin->blob_sha1 without mucking with its mode or type
				270	* bits; we are not going to write this index out -- we just
				271	* want to run "diff-index --cached".
				272	*/
Nguyễn Thái Ngọc Duy	a470bee	2018-09-21 15:57:21	[diff] [blame]	273	discard_index(r->index);
Nguyễn Thái Ngọc Duy	e1ff0a3	2019-01-12 02:13:26	[diff] [blame]	274	repo_read_index(r);
Jeff Smith	072bf43	2017-05-24 05:15:34	[diff] [blame]	275
				276	len = strlen(path);
				277	if (!mode) {
Nguyễn Thái Ngọc Duy	a470bee	2018-09-21 15:57:21	[diff] [blame]	278	int pos = index_name_pos(r->index, path, len);
Jeff Smith	072bf43	2017-05-24 05:15:34	[diff] [blame]	279	if (0 <= pos)
Nguyễn Thái Ngọc Duy	a470bee	2018-09-21 15:57:21	[diff] [blame]	280	mode = r->index->cache[pos]->ce_mode;
Jeff Smith	072bf43	2017-05-24 05:15:34	[diff] [blame]	281	else
				282	/* Let's not bother reading from HEAD tree */
				283	mode = S_IFREG \| 0644;
				284	}
Nguyễn Thái Ngọc Duy	a470bee	2018-09-21 15:57:21	[diff] [blame]	285	ce = make_empty_cache_entry(r->index, len);
Jeff Smith	072bf43	2017-05-24 05:15:34	[diff] [blame]	286	oidcpy(&ce->oid, &origin->blob_oid);
				287	memcpy(ce->name, path, len);
				288	ce->ce_flags = create_ce_flags(0);
				289	ce->ce_namelen = len;
				290	ce->ce_mode = create_ce_mode(mode);
Nguyễn Thái Ngọc Duy	a470bee	2018-09-21 15:57:21	[diff] [blame]	291	add_index_entry(r->index, ce,
Nguyễn Thái Ngọc Duy	ecbbc0a	2018-08-13 16:14:41	[diff] [blame]	292	ADD_CACHE_OK_TO_ADD \| ADD_CACHE_OK_TO_REPLACE);
Jeff Smith	072bf43	2017-05-24 05:15:34	[diff] [blame]	293
Nguyễn Thái Ngọc Duy	a470bee	2018-09-21 15:57:21	[diff] [blame]	294	cache_tree_invalidate_path(r->index, path);
Jeff Smith	072bf43	2017-05-24 05:15:34	[diff] [blame]	295
				296	return commit;
				297	}
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	298
				299
				300
				301	static int diff_hunks(mmfile_t file_a, mmfile_t file_b,
				302	xdl_emit_hunk_consume_func_t hunk_func, void *cb_data, int xdl_opts)
				303	{
				304	xpparam_t xpp = {0};
				305	xdemitconf_t xecfg = {0};
				306	xdemitcb_t ecb = {NULL};
				307
				308	xpp.flags = xdl_opts;
				309	xecfg.hunk_func = hunk_func;
				310	ecb.priv = cb_data;
				311	return xdi_diff(file_a, file_b, &xpp, &xecfg, &ecb);
				312	}
				313
Barret Rhoden	1fc7338	2019-05-15 21:45:01	[diff] [blame]	314	static const char get_next_line(const char start, const char *end)
				315	{
				316	const char *nl = memchr(start, '\n', end - start);
				317
				318	return nl ? nl + 1 : end;
				319	}
				320
				321	static int find_line_starts(int *line_starts, const char buf,
				322	unsigned long len)
				323	{
				324	const char *end = buf + len;
				325	const char *p;
				326	int *lineno;
				327	int num = 0;
				328
				329	for (p = buf; p < end; p = get_next_line(p, end))
				330	num++;
				331
				332	ALLOC_ARRAY(*line_starts, num + 1);
				333	lineno = *line_starts;
				334
				335	for (p = buf; p < end; p = get_next_line(p, end))
				336	*lineno++ = p - buf;
				337
				338	*lineno = len;
				339
				340	return num;
				341	}
				342
Michael Platings	1d028dc	2019-06-20 16:38:18	[diff] [blame]	343	struct fingerprint_entry;
				344
				345	/* A fingerprint is intended to loosely represent a string, such that two
				346	* fingerprints can be quickly compared to give an indication of the similarity
				347	* of the strings that they represent.
				348	*
				349	* A fingerprint is represented as a multiset of the lower-cased byte pairs in
				350	* the string that it represents. Whitespace is added at each end of the
				351	* string. Whitespace pairs are ignored. Whitespace is converted to '\0'.
				352	* For example, the string "Darth Radar" will be converted to the following
				353	* fingerprint:
				354	* {"\0d", "da", "da", "ar", "ar", "rt", "th", "h\0", "\0r", "ra", "ad", "r\0"}
				355	*
				356	* The similarity between two fingerprints is the size of the intersection of
				357	* their multisets, including repeated elements. See fingerprint_similarity for
				358	* examples.
				359	*
				360	* For ease of implementation, the fingerprint is implemented as a map
				361	* of byte pairs to the count of that byte pair in the string, instead of
				362	* allowing repeated elements in a set.
				363	*/
				364	struct fingerprint {
				365	struct hashmap map;
				366	/* As we know the maximum number of entries in advance, it's
				367	* convenient to store the entries in a single array instead of having
				368	* the hashmap manage the memory.
				369	*/
				370	struct fingerprint_entry *entries;
				371	};
				372
				373	/* A byte pair in a fingerprint. Stores the number of times the byte pair
				374	* occurs in the string that the fingerprint represents.
				375	*/
				376	struct fingerprint_entry {
				377	/* The hashmap entry - the hash represents the byte pair in its
				378	* entirety so we don't need to store the byte pair separately.
				379	*/
				380	struct hashmap_entry entry;
				381	/* The number of times the byte pair occurs in the string that the
				382	* fingerprint represents.
				383	*/
				384	int count;
				385	};
				386
				387	/* See `struct fingerprint` for an explanation of what a fingerprint is.
				388	* \param result the fingerprint of the string is stored here. This must be
				389	* freed later using free_fingerprint.
				390	* \param line_begin the start of the string
				391	* \param line_end the end of the string
				392	*/
				393	static void get_fingerprint(struct fingerprint *result,
				394	const char *line_begin,
				395	const char *line_end)
				396	{
				397	unsigned int hash, c0 = 0, c1;
				398	const char *p;
				399	int max_map_entry_count = 1 + line_end - line_begin;
				400	struct fingerprint_entry *entry = xcalloc(max_map_entry_count,
				401	sizeof(struct fingerprint_entry));
				402	struct fingerprint_entry *found_entry;
				403
				404	hashmap_init(&result->map, NULL, NULL, max_map_entry_count);
				405	result->entries = entry;
				406	for (p = line_begin; p <= line_end; ++p, c0 = c1) {
				407	/* Always terminate the string with whitespace.
				408	* Normalise whitespace to 0, and normalise letters to
				409	* lower case. This won't work for multibyte characters but at
				410	* worst will match some unrelated characters.
				411	*/
				412	if ((p == line_end) \|\| isspace(*p))
				413	c1 = 0;
				414	else
				415	c1 = tolower(*p);
				416	hash = c0 \| (c1 << 8);
				417	/* Ignore whitespace pairs */
				418	if (hash == 0)
				419	continue;
Eric Wong	d22245a	2019-10-06 23:30:27	[diff] [blame]	420	hashmap_entry_init(&entry->entry, hash);
Michael Platings	1d028dc	2019-06-20 16:38:18	[diff] [blame]	421
Eric Wong	404ab78	2019-10-06 23:30:42	[diff] [blame]	422	found_entry = hashmap_get_entry(&result->map, entry,
				423	/* member name */ entry, NULL);
Michael Platings	1d028dc	2019-06-20 16:38:18	[diff] [blame]	424	if (found_entry) {
				425	found_entry->count += 1;
				426	} else {
				427	entry->count = 1;
Eric Wong	b94e5c1	2019-10-06 23:30:29	[diff] [blame]	428	hashmap_add(&result->map, &entry->entry);
Michael Platings	1d028dc	2019-06-20 16:38:18	[diff] [blame]	429	++entry;
				430	}
				431	}
				432	}
				433
				434	static void free_fingerprint(struct fingerprint *f)
				435	{
Eric Wong	c8e424c	2019-10-06 23:30:40	[diff] [blame]	436	hashmap_free(&f->map);
Michael Platings	1d028dc	2019-06-20 16:38:18	[diff] [blame]	437	free(f->entries);
				438	}
				439
				440	/* Calculates the similarity between two fingerprints as the size of the
				441	* intersection of their multisets, including repeated elements. See
				442	* `struct fingerprint` for an explanation of the fingerprint representation.
				443	* The similarity between "cat mat" and "father rather" is 2 because "at" is
				444	* present twice in both strings while the similarity between "tim" and "mit"
				445	* is 0.
				446	*/
				447	static int fingerprint_similarity(struct fingerprint a, struct fingerprint b)
				448	{
				449	int intersection = 0;
				450	struct hashmap_iter iter;
				451	const struct fingerprint_entry entry_a, entry_b;
				452
Eric Wong	87571c3	2019-10-06 23:30:38	[diff] [blame]	453	hashmap_for_each_entry(&b->map, &iter, entry_b,
Eric Wong	87571c3	2019-10-06 23:30:38	[diff] [blame]	454	entry /* member name */) {
Eric Wong	404ab78	2019-10-06 23:30:42	[diff] [blame]	455	entry_a = hashmap_get_entry(&a->map, entry_b, entry, NULL);
Eric Wong	f23a465	2019-10-06 23:30:36	[diff] [blame]	456	if (entry_a) {
Michael Platings	1d028dc	2019-06-20 16:38:18	[diff] [blame]	457	intersection += entry_a->count < entry_b->count ?
				458	entry_a->count : entry_b->count;
				459	}
				460	}
				461	return intersection;
				462	}
				463
				464	/* Subtracts byte-pair elements in B from A, modifying A in place.
				465	*/
				466	static void fingerprint_subtract(struct fingerprint a, struct fingerprint b)
				467	{
				468	struct hashmap_iter iter;
				469	struct fingerprint_entry *entry_a;
				470	const struct fingerprint_entry *entry_b;
				471
				472	hashmap_iter_init(&b->map, &iter);
				473
Eric Wong	87571c3	2019-10-06 23:30:38	[diff] [blame]	474	hashmap_for_each_entry(&b->map, &iter, entry_b,
Eric Wong	87571c3	2019-10-06 23:30:38	[diff] [blame]	475	entry /* member name */) {
Eric Wong	404ab78	2019-10-06 23:30:42	[diff] [blame]	476	entry_a = hashmap_get_entry(&a->map, entry_b, entry, NULL);
Eric Wong	f23a465	2019-10-06 23:30:36	[diff] [blame]	477	if (entry_a) {
Michael Platings	1d028dc	2019-06-20 16:38:18	[diff] [blame]	478	if (entry_a->count <= entry_b->count)
Eric Wong	28ee794	2019-10-06 23:30:31	[diff] [blame]	479	hashmap_remove(&a->map, &entry_b->entry, NULL);
Michael Platings	1d028dc	2019-06-20 16:38:18	[diff] [blame]	480	else
				481	entry_a->count -= entry_b->count;
				482	}
				483	}
				484	}
				485
				486	/* Calculate fingerprints for a series of lines.
				487	* Puts the fingerprints in the fingerprints array, which must have been
				488	* preallocated to allow storing line_count elements.
				489	*/
				490	static void get_line_fingerprints(struct fingerprint *fingerprints,
				491	const char content, const int line_starts,
				492	long first_line, long line_count)
				493	{
				494	int i;
				495	const char linestart, lineend;
				496
				497	line_starts += first_line;
				498	for (i = 0; i < line_count; ++i) {
				499	linestart = content + line_starts[i];
				500	lineend = content + line_starts[i + 1];
				501	get_fingerprint(fingerprints + i, linestart, lineend);
				502	}
				503	}
				504
				505	static void free_line_fingerprints(struct fingerprint *fingerprints,
				506	int nr_fingerprints)
				507	{
				508	int i;
				509
				510	for (i = 0; i < nr_fingerprints; i++)
				511	free_fingerprint(&fingerprints[i]);
				512	}
				513
				514	/* This contains the data necessary to linearly map a line number in one half
				515	* of a diff chunk to the line in the other half of the diff chunk that is
				516	* closest in terms of its position as a fraction of the length of the chunk.
				517	*/
				518	struct line_number_mapping {
				519	int destination_start, destination_length,
				520	source_start, source_length;
				521	};
				522
				523	/* Given a line number in one range, offset and scale it to map it onto the
				524	* other range.
				525	* Essentially this mapping is a simple linear equation but the calculation is
				526	* more complicated to allow performing it with integer operations.
				527	* Another complication is that if a line could map onto many lines in the
				528	* destination range then we want to choose the line at the center of those
				529	* possibilities.
				530	* Example: if the chunk is 2 lines long in A and 10 lines long in B then the
				531	* first 5 lines in B will map onto the first line in the A chunk, while the
				532	* last 5 lines will all map onto the second line in the A chunk.
				533	* Example: if the chunk is 10 lines long in A and 2 lines long in B then line
				534	* 0 in B will map onto line 2 in A, and line 1 in B will map onto line 7 in A.
				535	*/
				536	static int map_line_number(int line_number,
				537	const struct line_number_mapping *mapping)
				538	{
				539	return ((line_number - mapping->source_start) * 2 + 1) *
				540	mapping->destination_length /
				541	(mapping->source_length * 2) +
				542	mapping->destination_start;
				543	}
				544
				545	/* Get a pointer to the element storing the similarity between a line in A
				546	* and a line in B.
				547	*
				548	* The similarities are stored in a 2-dimensional array. Each "row" in the
				549	* array contains the similarities for a line in B. The similarities stored in
				550	* a row are the similarities between the line in B and the nearby lines in A.
				551	* To keep the length of each row the same, it is padded out with values of -1
				552	* where the search range extends beyond the lines in A.
				553	* For example, if max_search_distance_a is 2 and the two sides of a diff chunk
				554	* look like this:
				555	* a \| m
				556	* b \| n
				557	* c \| o
				558	* d \| p
				559	* e \| q
				560	* Then the similarity array will contain:
				561	* [-1, -1, am, bm, cm,
				562	* -1, an, bn, cn, dn,
				563	* ao, bo, co, do, eo,
				564	* bp, cp, dp, ep, -1,
				565	* cq, dq, eq, -1, -1]
				566	* Where similarities are denoted either by -1 for invalid, or the
				567	* concatenation of the two lines in the diff being compared.
				568	*
				569	* \param similarities array of similarities between lines in A and B
				570	* \param line_a the index of the line in A, in the same frame of reference as
				571	* closest_line_a.
				572	* \param local_line_b the index of the line in B, relative to the first line
				573	* in B that similarities represents.
				574	* \param closest_line_a the index of the line in A that is deemed to be
				575	* closest to local_line_b. This must be in the same
				576	* frame of reference as line_a. This value defines
				577	* where similarities is centered for the line in B.
				578	* \param max_search_distance_a maximum distance in lines from the closest line
				579	* in A for other lines in A for which
				580	* similarities may be calculated.
				581	*/
				582	static int get_similarity(int similarities,
				583	int line_a, int local_line_b,
				584	int closest_line_a, int max_search_distance_a)
				585	{
				586	assert(abs(line_a - closest_line_a) <=
				587	max_search_distance_a);
				588	return similarities + line_a - closest_line_a +
				589	max_search_distance_a +
				590	local_line_b * (max_search_distance_a * 2 + 1);
				591	}
				592
				593	#define CERTAIN_NOTHING_MATCHES -2
				594	#define CERTAINTY_NOT_CALCULATED -1
				595
				596	/* Given a line in B, first calculate its similarities with nearby lines in A
				597	* if not already calculated, then identify the most similar and second most
				598	* similar lines. The "certainty" is calculated based on those two
				599	* similarities.
				600	*
				601	* \param start_a the index of the first line of the chunk in A
				602	* \param length_a the length in lines of the chunk in A
				603	* \param local_line_b the index of the line in B, relative to the first line
				604	* in the chunk.
				605	* \param fingerprints_a array of fingerprints for the chunk in A
				606	* \param fingerprints_b array of fingerprints for the chunk in B
				607	* \param similarities 2-dimensional array of similarities between lines in A
				608	* and B. See get_similarity() for more details.
				609	* \param certainties array of values indicating how strongly a line in B is
				610	* matched with some line in A.
				611	* \param second_best_result array of absolute indices in A for the second
				612	* closest match of a line in B.
				613	* \param result array of absolute indices in A for the closest match of a line
				614	* in B.
				615	* \param max_search_distance_a maximum distance in lines from the closest line
				616	* in A for other lines in A for which
				617	* similarities may be calculated.
				618	* \param map_line_number_in_b_to_a parameter to map_line_number().
				619	*/
				620	static void find_best_line_matches(
				621	int start_a,
				622	int length_a,
				623	int start_b,
				624	int local_line_b,
				625	struct fingerprint *fingerprints_a,
				626	struct fingerprint *fingerprints_b,
				627	int *similarities,
				628	int *certainties,
				629	int *second_best_result,
				630	int *result,
				631	const int max_search_distance_a,
				632	const struct line_number_mapping *map_line_number_in_b_to_a)
				633	{
				634
				635	int i, search_start, search_end, closest_local_line_a, *similarity,
				636	best_similarity = 0, second_best_similarity = 0,
				637	best_similarity_index = 0, second_best_similarity_index = 0;
				638
				639	/* certainty has already been calculated so no need to redo the work */
				640	if (certainties[local_line_b] != CERTAINTY_NOT_CALCULATED)
				641	return;
				642
				643	closest_local_line_a = map_line_number(
				644	local_line_b + start_b, map_line_number_in_b_to_a) - start_a;
				645
				646	search_start = closest_local_line_a - max_search_distance_a;
				647	if (search_start < 0)
				648	search_start = 0;
				649
				650	search_end = closest_local_line_a + max_search_distance_a + 1;
				651	if (search_end > length_a)
				652	search_end = length_a;
				653
				654	for (i = search_start; i < search_end; ++i) {
				655	similarity = get_similarity(similarities,
				656	i, local_line_b,
				657	closest_local_line_a,
				658	max_search_distance_a);
				659	if (*similarity == -1) {
				660	/* This value will never exceed 10 but assert just in
				661	* case
				662	*/
				663	assert(abs(i - closest_local_line_a) < 1000);
				664	/* scale the similarity by (1000 - distance from
				665	* closest line) to act as a tie break between lines
				666	* that otherwise are equally similar.
				667	*/
				668	*similarity = fingerprint_similarity(
				669	fingerprints_b + local_line_b,
				670	fingerprints_a + i) *
				671	(1000 - abs(i - closest_local_line_a));
				672	}
				673	if (*similarity > best_similarity) {
				674	second_best_similarity = best_similarity;
				675	second_best_similarity_index = best_similarity_index;
				676	best_similarity = *similarity;
				677	best_similarity_index = i;
				678	} else if (*similarity > second_best_similarity) {
				679	second_best_similarity = *similarity;
				680	second_best_similarity_index = i;
				681	}
				682	}
				683
				684	if (best_similarity == 0) {
				685	/* this line definitely doesn't match with anything. Mark it
				686	* with this special value so it doesn't get invalidated and
				687	* won't be recalculated.
				688	*/
				689	certainties[local_line_b] = CERTAIN_NOTHING_MATCHES;
				690	result[local_line_b] = -1;
				691	} else {
				692	/* Calculate the certainty with which this line matches.
				693	* If the line matches well with two lines then that reduces
				694	* the certainty. However we still want to prioritise matching
				695	* a line that matches very well with two lines over matching a
				696	* line that matches poorly with one line, hence doubling
				697	* best_similarity.
				698	* This means that if we have
				699	* line X that matches only one line with a score of 3,
				700	* line Y that matches two lines equally with a score of 5,
				701	* and line Z that matches only one line with a score or 2,
				702	* then the lines in order of certainty are X, Y, Z.
				703	*/
				704	certainties[local_line_b] = best_similarity * 2 -
				705	second_best_similarity;
				706
				707	/* We keep both the best and second best results to allow us to
				708	* check at a later stage of the matching process whether the
				709	* result needs to be invalidated.
				710	*/
				711	result[local_line_b] = start_a + best_similarity_index;
				712	second_best_result[local_line_b] =
				713	start_a + second_best_similarity_index;
				714	}
				715	}
				716
				717	/*
				718	* This finds the line that we can match with the most confidence, and
				719	* uses it as a partition. It then calls itself on the lines on either side of
				720	* that partition. In this way we avoid lines appearing out of order, and
				721	* retain a sensible line ordering.
				722	* \param start_a index of the first line in A with which lines in B may be
				723	* compared.
				724	* \param start_b index of the first line in B for which matching should be
				725	* done.
				726	* \param length_a number of lines in A with which lines in B may be compared.
				727	* \param length_b number of lines in B for which matching should be done.
				728	* \param fingerprints_a mutable array of fingerprints in A. The first element
				729	* corresponds to the line at start_a.
				730	* \param fingerprints_b array of fingerprints in B. The first element
				731	* corresponds to the line at start_b.
				732	* \param similarities 2-dimensional array of similarities between lines in A
				733	* and B. See get_similarity() for more details.
				734	* \param certainties array of values indicating how strongly a line in B is
				735	* matched with some line in A.
				736	* \param second_best_result array of absolute indices in A for the second
				737	* closest match of a line in B.
				738	* \param result array of absolute indices in A for the closest match of a line
				739	* in B.
				740	* \param max_search_distance_a maximum distance in lines from the closest line
				741	* in A for other lines in A for which
				742	* similarities may be calculated.
				743	* \param max_search_distance_b an upper bound on the greatest possible
				744	* distance between lines in B such that they will
				745	* both be compared with the same line in A
				746	* according to max_search_distance_a.
				747	* \param map_line_number_in_b_to_a parameter to map_line_number().
				748	*/
				749	static void fuzzy_find_matching_lines_recurse(
				750	int start_a, int start_b,
				751	int length_a, int length_b,
				752	struct fingerprint *fingerprints_a,
				753	struct fingerprint *fingerprints_b,
				754	int *similarities,
				755	int *certainties,
				756	int *second_best_result,
				757	int *result,
				758	int max_search_distance_a,
				759	int max_search_distance_b,
				760	const struct line_number_mapping *map_line_number_in_b_to_a)
				761	{
				762	int i, invalidate_min, invalidate_max, offset_b,
				763	second_half_start_a, second_half_start_b,
				764	second_half_length_a, second_half_length_b,
				765	most_certain_line_a, most_certain_local_line_b = -1,
				766	most_certain_line_certainty = -1,
				767	closest_local_line_a;
				768
				769	for (i = 0; i < length_b; ++i) {
				770	find_best_line_matches(start_a,
				771	length_a,
				772	start_b,
				773	i,
				774	fingerprints_a,
				775	fingerprints_b,
				776	similarities,
				777	certainties,
				778	second_best_result,
				779	result,
				780	max_search_distance_a,
				781	map_line_number_in_b_to_a);
				782
				783	if (certainties[i] > most_certain_line_certainty) {
				784	most_certain_line_certainty = certainties[i];
				785	most_certain_local_line_b = i;
				786	}
				787	}
				788
				789	/* No matches. */
				790	if (most_certain_local_line_b == -1)
				791	return;
				792
				793	most_certain_line_a = result[most_certain_local_line_b];
				794
				795	/*
				796	* Subtract the most certain line's fingerprint in B from the matched
				797	* fingerprint in A. This means that other lines in B can't also match
				798	* the same parts of the line in A.
				799	*/
				800	fingerprint_subtract(fingerprints_a + most_certain_line_a - start_a,
				801	fingerprints_b + most_certain_local_line_b);
				802
				803	/* Invalidate results that may be affected by the choice of most
				804	* certain line.
				805	*/
				806	invalidate_min = most_certain_local_line_b - max_search_distance_b;
				807	invalidate_max = most_certain_local_line_b + max_search_distance_b + 1;
				808	if (invalidate_min < 0)
				809	invalidate_min = 0;
				810	if (invalidate_max > length_b)
				811	invalidate_max = length_b;
				812
				813	/* As the fingerprint in A has changed, discard previously calculated
				814	* similarity values with that fingerprint.
				815	*/
				816	for (i = invalidate_min; i < invalidate_max; ++i) {
				817	closest_local_line_a = map_line_number(
				818	i + start_b, map_line_number_in_b_to_a) - start_a;
				819
				820	/* Check that the lines in A and B are close enough that there
				821	* is a similarity value for them.
				822	*/
				823	if (abs(most_certain_line_a - start_a - closest_local_line_a) >
				824	max_search_distance_a) {
				825	continue;
				826	}
				827
				828	*get_similarity(similarities, most_certain_line_a - start_a,
				829	i, closest_local_line_a,
				830	max_search_distance_a) = -1;
				831	}
				832
				833	/* More invalidating of results that may be affected by the choice of
				834	* most certain line.
				835	* Discard the matches for lines in B that are currently matched with a
				836	* line in A such that their ordering contradicts the ordering imposed
				837	* by the choice of most certain line.
				838	*/
				839	for (i = most_certain_local_line_b - 1; i >= invalidate_min; --i) {
				840	/* In this loop we discard results for lines in B that are
				841	* before most-certain-line-B but are matched with a line in A
				842	* that is after most-certain-line-A.
				843	*/
				844	if (certainties[i] >= 0 &&
				845	(result[i] >= most_certain_line_a \|\|
				846	second_best_result[i] >= most_certain_line_a)) {
				847	certainties[i] = CERTAINTY_NOT_CALCULATED;
				848	}
				849	}
				850	for (i = most_certain_local_line_b + 1; i < invalidate_max; ++i) {
				851	/* In this loop we discard results for lines in B that are
				852	* after most-certain-line-B but are matched with a line in A
				853	* that is before most-certain-line-A.
				854	*/
				855	if (certainties[i] >= 0 &&
				856	(result[i] <= most_certain_line_a \|\|
				857	second_best_result[i] <= most_certain_line_a)) {
				858	certainties[i] = CERTAINTY_NOT_CALCULATED;
				859	}
				860	}
				861
				862	/* Repeat the matching process for lines before the most certain line.
				863	*/
				864	if (most_certain_local_line_b > 0) {
				865	fuzzy_find_matching_lines_recurse(
				866	start_a, start_b,
				867	most_certain_line_a + 1 - start_a,
				868	most_certain_local_line_b,
				869	fingerprints_a, fingerprints_b, similarities,
				870	certainties, second_best_result, result,
				871	max_search_distance_a,
				872	max_search_distance_b,
				873	map_line_number_in_b_to_a);
				874	}
				875	/* Repeat the matching process for lines after the most certain line.
				876	*/
				877	if (most_certain_local_line_b + 1 < length_b) {
				878	second_half_start_a = most_certain_line_a;
				879	offset_b = most_certain_local_line_b + 1;
				880	second_half_start_b = start_b + offset_b;
				881	second_half_length_a =
				882	length_a + start_a - second_half_start_a;
				883	second_half_length_b =
				884	length_b + start_b - second_half_start_b;
				885	fuzzy_find_matching_lines_recurse(
				886	second_half_start_a, second_half_start_b,
				887	second_half_length_a, second_half_length_b,
				888	fingerprints_a + second_half_start_a - start_a,
				889	fingerprints_b + offset_b,
				890	similarities +
				891	offset_b * (max_search_distance_a * 2 + 1),
				892	certainties + offset_b,
				893	second_best_result + offset_b, result + offset_b,
				894	max_search_distance_a,
				895	max_search_distance_b,
				896	map_line_number_in_b_to_a);
				897	}
				898	}
				899
				900	/* Find the lines in the parent line range that most closely match the lines in
				901	* the target line range. This is accomplished by matching fingerprints in each
				902	* blame_origin, and choosing the best matches that preserve the line ordering.
				903	* See struct fingerprint for details of fingerprint matching, and
				904	* fuzzy_find_matching_lines_recurse for details of preserving line ordering.
				905	*
				906	* The performance is believed to be O(n log n) in the typical case and O(n^2)
				907	* in a pathological case, where n is the number of lines in the target range.
				908	*/
				909	static int fuzzy_find_matching_lines(struct blame_origin parent,
				910	struct blame_origin *target,
				911	int tlno, int parent_slno, int same,
				912	int parent_len)
				913	{
				914	/* We use the terminology "A" for the left hand side of the diff AKA
				915	* parent, and "B" for the right hand side of the diff AKA target. */
				916	int start_a = parent_slno;
				917	int length_a = parent_len;
				918	int start_b = tlno;
				919	int length_b = same - tlno;
				920
				921	struct line_number_mapping map_line_number_in_b_to_a = {
				922	start_a, length_a, start_b, length_b
				923	};
				924
				925	struct fingerprint *fingerprints_a = parent->fingerprints;
				926	struct fingerprint *fingerprints_b = target->fingerprints;
				927
				928	int i, result, second_best_result,
				929	certainties, similarities, similarity_count;
				930
				931	/*
				932	* max_search_distance_a means that given a line in B, compare it to
				933	* the line in A that is closest to its position, and the lines in A
				934	* that are no greater than max_search_distance_a lines away from the
				935	* closest line in A.
				936	*
				937	* max_search_distance_b is an upper bound on the greatest possible
				938	* distance between lines in B such that they will both be compared
				939	* with the same line in A according to max_search_distance_a.
				940	*/
				941	int max_search_distance_a = 10, max_search_distance_b;
				942
				943	if (length_a <= 0)
				944	return NULL;
				945
				946	if (max_search_distance_a >= length_a)
				947	max_search_distance_a = length_a ? length_a - 1 : 0;
				948
				949	max_search_distance_b = ((2 * max_search_distance_a + 1) * length_b
				950	- 1) / length_a;
				951
				952	result = xcalloc(sizeof(int), length_b);
				953	second_best_result = xcalloc(sizeof(int), length_b);
				954	certainties = xcalloc(sizeof(int), length_b);
				955
				956	/* See get_similarity() for details of similarities. */
				957	similarity_count = length_b * (max_search_distance_a * 2 + 1);
				958	similarities = xcalloc(sizeof(int), similarity_count);
				959
				960	for (i = 0; i < length_b; ++i) {
				961	result[i] = -1;
				962	second_best_result[i] = -1;
				963	certainties[i] = CERTAINTY_NOT_CALCULATED;
				964	}
				965
				966	for (i = 0; i < similarity_count; ++i)
				967	similarities[i] = -1;
				968
				969	fuzzy_find_matching_lines_recurse(start_a, start_b,
				970	length_a, length_b,
				971	fingerprints_a + start_a,
				972	fingerprints_b + start_b,
				973	similarities,
				974	certainties,
				975	second_best_result,
				976	result,
				977	max_search_distance_a,
				978	max_search_distance_b,
				979	&map_line_number_in_b_to_a);
				980
				981	free(similarities);
				982	free(certainties);
				983	free(second_best_result);
				984
				985	return result;
				986	}
				987
Jeff King	07a54dc	2019-06-28 06:24:57	[diff] [blame]	988	static void fill_origin_fingerprints(struct blame_origin *o)
Barret Rhoden	1fc7338	2019-05-15 21:45:01	[diff] [blame]	989	{
				990	int *line_starts;
				991
				992	if (o->fingerprints)
				993	return;
				994	o->num_lines = find_line_starts(&line_starts, o->file.ptr,
				995	o->file.size);
Barret Rhoden	a07a977	2019-06-20 16:38:19	[diff] [blame]	996	o->fingerprints = xcalloc(sizeof(struct fingerprint), o->num_lines);
				997	get_line_fingerprints(o->fingerprints, o->file.ptr, line_starts,
				998	0, o->num_lines);
Barret Rhoden	1fc7338	2019-05-15 21:45:01	[diff] [blame]	999	free(line_starts);
				1000	}
				1001
				1002	static void drop_origin_fingerprints(struct blame_origin *o)
				1003	{
Barret Rhoden	a07a977	2019-06-20 16:38:19	[diff] [blame]	1004	if (o->fingerprints) {
				1005	free_line_fingerprints(o->fingerprints, o->num_lines);
				1006	o->num_lines = 0;
				1007	FREE_AND_NULL(o->fingerprints);
				1008	}
Barret Rhoden	1fc7338	2019-05-15 21:45:01	[diff] [blame]	1009	}
				1010
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	1011	/*
				1012	* Given an origin, prepare mmfile_t structure to be used by the
				1013	* diff machinery
				1014	*/
				1015	static void fill_origin_blob(struct diff_options *opt,
Barret Rhoden	1fc7338	2019-05-15 21:45:01	[diff] [blame]	1016	struct blame_origin o, mmfile_t file,
				1017	int *num_read_blob, int fill_fingerprints)
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	1018	{
				1019	if (!o->file.ptr) {
				1020	enum object_type type;
				1021	unsigned long file_size;
				1022
				1023	(*num_read_blob)++;
Brandon Williams	0d1e0e7	2017-10-31 18:19:11	[diff] [blame]	1024	if (opt->flags.allow_textconv &&
Nguyễn Thái Ngọc Duy	6afaf80	2018-09-21 15:57:22	[diff] [blame]	1025	textconv_object(opt->repo, o->path, o->mode,
				1026	&o->blob_oid, 1, &file->ptr, &file_size))
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	1027	;
				1028	else
brian m. carlson	b4f5aca	2018-03-12 02:27:53	[diff] [blame]	1029	file->ptr = read_object_file(&o->blob_oid, &type,
				1030	&file_size);
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	1031	file->size = file_size;
				1032
				1033	if (!file->ptr)
				1034	die("Cannot read blob %s for path %s",
				1035	oid_to_hex(&o->blob_oid),
				1036	o->path);
				1037	o->file = *file;
				1038	}
				1039	else
				1040	*file = o->file;
Barret Rhoden	1fc7338	2019-05-15 21:45:01	[diff] [blame]	1041	if (fill_fingerprints)
Jeff King	07a54dc	2019-06-28 06:24:57	[diff] [blame]	1042	fill_origin_fingerprints(o);
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	1043	}
				1044
				1045	static void drop_origin_blob(struct blame_origin *o)
				1046	{
Ævar Arnfjörð Bjarmason	ce528de	2018-08-17 13:02:50	[diff] [blame]	1047	FREE_AND_NULL(o->file.ptr);
Barret Rhoden	1fc7338	2019-05-15 21:45:01	[diff] [blame]	1048	drop_origin_fingerprints(o);
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	1049	}
				1050
				1051	/*
				1052	* Any merge of blames happens on lists of blames that arrived via
				1053	* different parents in a single suspect. In this case, we want to
				1054	* sort according to the suspect line numbers as opposed to the final
				1055	* image line numbers. The function body is somewhat longish because
				1056	* it avoids unnecessary writes.
				1057	*/
				1058
				1059	static struct blame_entry blame_merge(struct blame_entry list1,
				1060	struct blame_entry *list2)
				1061	{
				1062	struct blame_entry p1 = list1, p2 = list2,
				1063	**tail = &list1;
				1064
				1065	if (!p1)
				1066	return p2;
				1067	if (!p2)
				1068	return p1;
				1069
				1070	if (p1->s_lno <= p2->s_lno) {
				1071	do {
				1072	tail = &p1->next;
				1073	if ((p1 = *tail) == NULL) {
				1074	*tail = p2;
				1075	return list1;
				1076	}
				1077	} while (p1->s_lno <= p2->s_lno);
				1078	}
				1079	for (;;) {
				1080	*tail = p2;
				1081	do {
				1082	tail = &p2->next;
				1083	if ((p2 = *tail) == NULL) {
				1084	*tail = p1;
				1085	return list1;
				1086	}
				1087	} while (p1->s_lno > p2->s_lno);
				1088	*tail = p1;
				1089	do {
				1090	tail = &p1->next;
				1091	if ((p1 = *tail) == NULL) {
				1092	*tail = p2;
				1093	return list1;
				1094	}
				1095	} while (p1->s_lno <= p2->s_lno);
				1096	}
				1097	}
				1098
				1099	static void get_next_blame(const void p)
				1100	{
				1101	return ((struct blame_entry *)p)->next;
				1102	}
				1103
				1104	static void set_next_blame(void p1, void p2)
				1105	{
				1106	((struct blame_entry *)p1)->next = p2;
				1107	}
				1108
				1109	/*
				1110	* Final image line numbers are all different, so we don't need a
				1111	* three-way comparison here.
				1112	*/
				1113
				1114	static int compare_blame_final(const void p1, const void p2)
				1115	{
				1116	return ((struct blame_entry )p1)->lno > ((struct blame_entry )p2)->lno
				1117	? 1 : -1;
				1118	}
				1119
				1120	static int compare_blame_suspect(const void p1, const void p2)
				1121	{
				1122	const struct blame_entry s1 = p1, s2 = p2;
				1123	/*
				1124	* to allow for collating suspects, we sort according to the
				1125	* respective pointer value as the primary sorting criterion.
				1126	* The actual relation is pretty unimportant as long as it
				1127	* establishes a total order. Comparing as integers gives us
				1128	* that.
				1129	*/
				1130	if (s1->suspect != s2->suspect)
				1131	return (intptr_t)s1->suspect > (intptr_t)s2->suspect ? 1 : -1;
				1132	if (s1->s_lno == s2->s_lno)
				1133	return 0;
				1134	return s1->s_lno > s2->s_lno ? 1 : -1;
				1135	}
				1136
				1137	void blame_sort_final(struct blame_scoreboard *sb)
				1138	{
				1139	sb->ent = llist_mergesort(sb->ent, get_next_blame, set_next_blame,
				1140	compare_blame_final);
				1141	}
				1142
Jeff Smith	09002f1	2017-05-24 05:15:36	[diff] [blame]	1143	static int compare_commits_by_reverse_commit_date(const void *a,
				1144	const void *b,
				1145	void *c)
				1146	{
				1147	return -compare_commits_by_commit_date(a, b, c);
				1148	}
				1149
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	1150	/*
				1151	* For debugging -- origin is refcounted, and this asserts that
				1152	* we do not underflow.
				1153	*/
				1154	static void sanity_check_refcnt(struct blame_scoreboard *sb)
				1155	{
				1156	int baa = 0;
				1157	struct blame_entry *ent;
				1158
				1159	for (ent = sb->ent; ent; ent = ent->next) {
				1160	/* Nobody should have zero or negative refcnt */
				1161	if (ent->suspect->refcnt <= 0) {
				1162	fprintf(stderr, "%s in %s has negative refcnt %d\n",
				1163	ent->suspect->path,
				1164	oid_to_hex(&ent->suspect->commit->object.oid),
				1165	ent->suspect->refcnt);
				1166	baa = 1;
				1167	}
				1168	}
				1169	if (baa)
				1170	sb->on_sanity_fail(sb, baa);
				1171	}
				1172
				1173	/*
				1174	* If two blame entries that are next to each other came from
				1175	* contiguous lines in the same origin (i.e. <commit, path> pair),
				1176	* merge them together.
				1177	*/
				1178	void blame_coalesce(struct blame_scoreboard *sb)
				1179	{
				1180	struct blame_entry ent, next;
				1181
				1182	for (ent = sb->ent; ent && (next = ent->next); ent = next) {
				1183	if (ent->suspect == next->suspect &&
Barret Rhoden	8934ac8	2019-05-15 21:45:00	[diff] [blame]	1184	ent->s_lno + ent->num_lines == next->s_lno &&
				1185	ent->ignored == next->ignored &&
				1186	ent->unblamable == next->unblamable) {
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	1187	ent->num_lines += next->num_lines;
				1188	ent->next = next->next;
				1189	blame_origin_decref(next->suspect);
				1190	free(next);
				1191	ent->score = 0;
				1192	next = ent; /* again */
				1193	}
				1194	}
				1195
				1196	if (sb->debug) /* sanity */
				1197	sanity_check_refcnt(sb);
				1198	}
				1199
				1200	/*
				1201	* Merge the given sorted list of blames into a preexisting origin.
				1202	* If there were no previous blames to that commit, it is entered into
				1203	* the commit priority queue of the score board.
				1204	*/
				1205
				1206	static void queue_blames(struct blame_scoreboard sb, struct blame_origin porigin,
				1207	struct blame_entry *sorted)
				1208	{
				1209	if (porigin->suspects)
				1210	porigin->suspects = blame_merge(porigin->suspects, sorted);
				1211	else {
				1212	struct blame_origin *o;
Nguyễn Thái Ngọc Duy	4e0df4e	2018-05-19 05:28:19	[diff] [blame]	1213	for (o = get_blame_suspects(porigin->commit); o; o = o->next) {
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	1214	if (o->suspects) {
				1215	porigin->suspects = sorted;
				1216	return;
				1217	}
				1218	}
				1219	porigin->suspects = sorted;
				1220	prio_queue_put(&sb->commits, porigin->commit);
				1221	}
				1222	}
				1223
				1224	/*
Jeff Smith	09002f1	2017-05-24 05:15:36	[diff] [blame]	1225	* Fill the blob_sha1 field of an origin if it hasn't, so that later
				1226	* call to fill_origin_blob() can use it to locate the data. blob_sha1
				1227	* for an origin is also used to pass the blame for the entire file to
				1228	* the parent to detect the case where a child's blob is identical to
				1229	* that of its parent's.
				1230	*
				1231	* This also fills origin->mode for corresponding tree path.
				1232	*/
Nguyễn Thái Ngọc Duy	a470bee	2018-09-21 15:57:21	[diff] [blame]	1233	static int fill_blob_sha1_and_mode(struct repository *r,
Nguyễn Thái Ngọc Duy	ecbbc0a	2018-08-13 16:14:41	[diff] [blame]	1234	struct blame_origin *origin)
Jeff Smith	09002f1	2017-05-24 05:15:36	[diff] [blame]	1235	{
				1236	if (!is_null_oid(&origin->blob_oid))
				1237	return 0;
Nguyễn Thái Ngọc Duy	50ddb08	2019-06-27 09:28:49	[diff] [blame]	1238	if (get_tree_entry(r, &origin->commit->object.oid, origin->path, &origin->blob_oid, &origin->mode))
Jeff Smith	09002f1	2017-05-24 05:15:36	[diff] [blame]	1239	goto error_out;
Nguyễn Thái Ngọc Duy	a470bee	2018-09-21 15:57:21	[diff] [blame]	1240	if (oid_object_info(r, &origin->blob_oid, NULL) != OBJ_BLOB)
Jeff Smith	09002f1	2017-05-24 05:15:36	[diff] [blame]	1241	goto error_out;
				1242	return 0;
				1243	error_out:
				1244	oidclr(&origin->blob_oid);
				1245	origin->mode = S_IFINVALID;
				1246	return -1;
				1247	}
				1248
				1249	/*
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	1250	* We have an origin -- check if the same path exists in the
				1251	* parent and return an origin structure to represent it.
				1252	*/
Nguyễn Thái Ngọc Duy	e675765	2018-09-21 15:57:24	[diff] [blame]	1253	static struct blame_origin find_origin(struct repository r,
				1254	struct commit *parent,
				1255	struct blame_origin *origin)
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	1256	{
				1257	struct blame_origin *porigin;
				1258	struct diff_options diff_opts;
				1259	const char *paths[2];
				1260
				1261	/* First check any existing origins */
Nguyễn Thái Ngọc Duy	4e0df4e	2018-05-19 05:28:19	[diff] [blame]	1262	for (porigin = get_blame_suspects(parent); porigin; porigin = porigin->next)
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	1263	if (!strcmp(porigin->path, origin->path)) {
				1264	/*
				1265	* The same path between origin and its parent
				1266	* without renaming -- the most common case.
				1267	*/
				1268	return blame_origin_incref (porigin);
				1269	}
				1270
				1271	/* See if the origin->path is different between parent
				1272	* and origin first. Most of the time they are the
				1273	* same and diff-tree is fairly efficient about this.
				1274	*/
Nguyễn Thái Ngọc Duy	e675765	2018-09-21 15:57:24	[diff] [blame]	1275	repo_diff_setup(r, &diff_opts);
Brandon Williams	0d1e0e7	2017-10-31 18:19:11	[diff] [blame]	1276	diff_opts.flags.recursive = 1;
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	1277	diff_opts.detect_rename = 0;
				1278	diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
				1279	paths[0] = origin->path;
				1280	paths[1] = NULL;
				1281
				1282	parse_pathspec(&diff_opts.pathspec,
				1283	PATHSPEC_ALL_MAGIC & ~PATHSPEC_LITERAL,
				1284	PATHSPEC_LITERAL_PATH, "", paths);
				1285	diff_setup_done(&diff_opts);
				1286
				1287	if (is_null_oid(&origin->commit->object.oid))
Derrick Stolee	2e27bd7	2018-04-06 19:09:38	[diff] [blame]	1288	do_diff_cache(get_commit_tree_oid(parent), &diff_opts);
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	1289	else
Derrick Stolee	2e27bd7	2018-04-06 19:09:38	[diff] [blame]	1290	diff_tree_oid(get_commit_tree_oid(parent),
				1291	get_commit_tree_oid(origin->commit),
Junio C Hamano	a6f38c1	2017-06-19 19:38:44	[diff] [blame]	1292	"", &diff_opts);
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	1293	diffcore_std(&diff_opts);
				1294
				1295	if (!diff_queued_diff.nr) {
				1296	/* The path is the same as parent */
				1297	porigin = get_origin(parent, origin->path);
				1298	oidcpy(&porigin->blob_oid, &origin->blob_oid);
				1299	porigin->mode = origin->mode;
				1300	} else {
				1301	/*
				1302	* Since origin->path is a pathspec, if the parent
				1303	* commit had it as a directory, we will see a whole
				1304	* bunch of deletion of files in the directory that we
				1305	* do not care about.
				1306	*/
				1307	int i;
				1308	struct diff_filepair *p = NULL;
				1309	for (i = 0; i < diff_queued_diff.nr; i++) {
				1310	const char *name;
				1311	p = diff_queued_diff.queue[i];
				1312	name = p->one->path ? p->one->path : p->two->path;
				1313	if (!strcmp(name, origin->path))
				1314	break;
				1315	}
				1316	if (!p)
				1317	die("internal error in blame::find_origin");
				1318	switch (p->status) {
				1319	default:
				1320	die("internal error in blame::find_origin (%c)",
				1321	p->status);
				1322	case 'M':
				1323	porigin = get_origin(parent, origin->path);
				1324	oidcpy(&porigin->blob_oid, &p->one->oid);
				1325	porigin->mode = p->one->mode;
				1326	break;
				1327	case 'A':
				1328	case 'T':
				1329	/* Did not exist in parent, or type changed */
				1330	break;
				1331	}
				1332	}
				1333	diff_flush(&diff_opts);
				1334	clear_pathspec(&diff_opts.pathspec);
				1335	return porigin;
				1336	}
				1337
				1338	/*
				1339	* We have an origin -- find the path that corresponds to it in its
				1340	* parent and return an origin structure to represent it.
				1341	*/
Nguyễn Thái Ngọc Duy	e675765	2018-09-21 15:57:24	[diff] [blame]	1342	static struct blame_origin find_rename(struct repository r,
				1343	struct commit *parent,
				1344	struct blame_origin *origin)
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	1345	{
				1346	struct blame_origin *porigin = NULL;
				1347	struct diff_options diff_opts;
				1348	int i;
				1349
Nguyễn Thái Ngọc Duy	e675765	2018-09-21 15:57:24	[diff] [blame]	1350	repo_diff_setup(r, &diff_opts);
Brandon Williams	0d1e0e7	2017-10-31 18:19:11	[diff] [blame]	1351	diff_opts.flags.recursive = 1;
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	1352	diff_opts.detect_rename = DIFF_DETECT_RENAME;
				1353	diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
				1354	diff_opts.single_follow = origin->path;
				1355	diff_setup_done(&diff_opts);
				1356
				1357	if (is_null_oid(&origin->commit->object.oid))
Derrick Stolee	2e27bd7	2018-04-06 19:09:38	[diff] [blame]	1358	do_diff_cache(get_commit_tree_oid(parent), &diff_opts);
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	1359	else
Derrick Stolee	2e27bd7	2018-04-06 19:09:38	[diff] [blame]	1360	diff_tree_oid(get_commit_tree_oid(parent),
				1361	get_commit_tree_oid(origin->commit),
Junio C Hamano	a6f38c1	2017-06-19 19:38:44	[diff] [blame]	1362	"", &diff_opts);
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	1363	diffcore_std(&diff_opts);
				1364
				1365	for (i = 0; i < diff_queued_diff.nr; i++) {
				1366	struct diff_filepair *p = diff_queued_diff.queue[i];
				1367	if ((p->status == 'R' \|\| p->status == 'C') &&
				1368	!strcmp(p->two->path, origin->path)) {
				1369	porigin = get_origin(parent, p->one->path);
				1370	oidcpy(&porigin->blob_oid, &p->one->oid);
				1371	porigin->mode = p->one->mode;
				1372	break;
				1373	}
				1374	}
				1375	diff_flush(&diff_opts);
				1376	clear_pathspec(&diff_opts.pathspec);
				1377	return porigin;
				1378	}
				1379
				1380	/*
				1381	* Append a new blame entry to a given output queue.
				1382	*/
				1383	static void add_blame_entry(struct blame_entry ***queue,
				1384	const struct blame_entry *src)
				1385	{
				1386	struct blame_entry e = xmalloc(sizeof(e));
				1387	memcpy(e, src, sizeof(*e));
				1388	blame_origin_incref(e->suspect);
				1389
				1390	e->next = **queue;
				1391	**queue = e;
				1392	*queue = &e->next;
				1393	}
				1394
				1395	/*
				1396	* src typically is on-stack; we want to copy the information in it to
				1397	* a malloced blame_entry that gets added to the given queue. The
				1398	* origin of dst loses a refcnt.
				1399	*/
				1400	static void dup_entry(struct blame_entry ***queue,
				1401	struct blame_entry dst, struct blame_entry src)
				1402	{
				1403	blame_origin_incref(src->suspect);
				1404	blame_origin_decref(dst->suspect);
				1405	memcpy(dst, src, sizeof(*src));
				1406	dst->next = **queue;
				1407	**queue = dst;
				1408	*queue = &dst->next;
				1409	}
				1410
				1411	const char blame_nth_line(struct blame_scoreboard sb, long lno)
				1412	{
				1413	return sb->final_buf + sb->lineno[lno];
				1414	}
				1415
				1416	/*
				1417	* It is known that lines between tlno to same came from parent, and e
				1418	* has an overlap with that range. it also is known that parent's
				1419	* line plno corresponds to e's line tlno.
				1420	*
				1421	* <---- e ----->
				1422	* <------>
				1423	* <------------>
				1424	* <------------>
				1425	* <------------------>
				1426	*
				1427	* Split e into potentially three parts; before this chunk, the chunk
				1428	* to be blamed for the parent, and after that portion.
				1429	*/
				1430	static void split_overlap(struct blame_entry *split,
				1431	struct blame_entry *e,
				1432	int tlno, int plno, int same,
				1433	struct blame_origin *parent)
				1434	{
				1435	int chunk_end_lno;
Barret Rhoden	8934ac8	2019-05-15 21:45:00	[diff] [blame]	1436	int i;
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	1437	memset(split, 0, sizeof(struct blame_entry [3]));
				1438
Barret Rhoden	8934ac8	2019-05-15 21:45:00	[diff] [blame]	1439	for (i = 0; i < 3; i++) {
				1440	split[i].ignored = e->ignored;
				1441	split[i].unblamable = e->unblamable;
				1442	}
				1443
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	1444	if (e->s_lno < tlno) {
				1445	/* there is a pre-chunk part not blamed on parent */
				1446	split[0].suspect = blame_origin_incref(e->suspect);
				1447	split[0].lno = e->lno;
				1448	split[0].s_lno = e->s_lno;
				1449	split[0].num_lines = tlno - e->s_lno;
				1450	split[1].lno = e->lno + tlno - e->s_lno;
				1451	split[1].s_lno = plno;
				1452	}
				1453	else {
				1454	split[1].lno = e->lno;
				1455	split[1].s_lno = plno + (e->s_lno - tlno);
				1456	}
				1457
				1458	if (same < e->s_lno + e->num_lines) {
				1459	/* there is a post-chunk part not blamed on parent */
				1460	split[2].suspect = blame_origin_incref(e->suspect);
				1461	split[2].lno = e->lno + (same - e->s_lno);
				1462	split[2].s_lno = e->s_lno + (same - e->s_lno);
				1463	split[2].num_lines = e->s_lno + e->num_lines - same;
				1464	chunk_end_lno = split[2].lno;
				1465	}
				1466	else
				1467	chunk_end_lno = e->lno + e->num_lines;
				1468	split[1].num_lines = chunk_end_lno - split[1].lno;
				1469
				1470	/*
				1471	* if it turns out there is nothing to blame the parent for,
				1472	* forget about the splitting. !split[1].suspect signals this.
				1473	*/
				1474	if (split[1].num_lines < 1)
				1475	return;
				1476	split[1].suspect = blame_origin_incref(parent);
				1477	}
				1478
				1479	/*
				1480	* split_overlap() divided an existing blame e into up to three parts
				1481	* in split. Any assigned blame is moved to queue to
				1482	* reflect the split.
				1483	*/
				1484	static void split_blame(struct blame_entry ***blamed,
				1485	struct blame_entry ***unblamed,
				1486	struct blame_entry *split,
				1487	struct blame_entry *e)
				1488	{
				1489	if (split[0].suspect && split[2].suspect) {
				1490	/* The first part (reuse storage for the existing entry e) */
				1491	dup_entry(unblamed, e, &split[0]);
				1492
				1493	/* The last part -- me */
				1494	add_blame_entry(unblamed, &split[2]);
				1495
				1496	/* ... and the middle part -- parent */
				1497	add_blame_entry(blamed, &split[1]);
				1498	}
				1499	else if (!split[0].suspect && !split[2].suspect)
				1500	/*
				1501	* The parent covers the entire area; reuse storage for
				1502	* e and replace it with the parent.
				1503	*/
				1504	dup_entry(blamed, e, &split[1]);
				1505	else if (split[0].suspect) {
				1506	/* me and then parent */
				1507	dup_entry(unblamed, e, &split[0]);
				1508	add_blame_entry(blamed, &split[1]);
				1509	}
				1510	else {
				1511	/* parent and then me */
				1512	dup_entry(blamed, e, &split[1]);
				1513	add_blame_entry(unblamed, &split[2]);
				1514	}
				1515	}
				1516
				1517	/*
				1518	* After splitting the blame, the origins used by the
				1519	* on-stack blame_entry should lose one refcnt each.
				1520	*/
				1521	static void decref_split(struct blame_entry *split)
				1522	{
				1523	int i;
				1524
				1525	for (i = 0; i < 3; i++)
				1526	blame_origin_decref(split[i].suspect);
				1527	}
				1528
				1529	/*
				1530	* reverse_blame reverses the list given in head, appending tail.
				1531	* That allows us to build lists in reverse order, then reverse them
				1532	* afterwards. This can be faster than building the list in proper
				1533	* order right away. The reason is that building in proper order
				1534	* requires writing a link in the _previous_ element, while building
				1535	* in reverse order just requires placing the list head into the
				1536	* _current_ element.
				1537	*/
				1538
				1539	static struct blame_entry reverse_blame(struct blame_entry head,
				1540	struct blame_entry *tail)
				1541	{
				1542	while (head) {
				1543	struct blame_entry *next = head->next;
				1544	head->next = tail;
				1545	tail = head;
				1546	head = next;
				1547	}
				1548	return tail;
				1549	}
				1550
				1551	/*
Barret Rhoden	55f808f	2019-05-15 21:44:58	[diff] [blame]	1552	* Splits a blame entry into two entries at 'len' lines. The original 'e'
				1553	* consists of len lines, i.e. [e->lno, e->lno + len), and the second part,
				1554	* which is returned, consists of the remainder: [e->lno + len, e->lno +
				1555	* e->num_lines). The caller needs to sort out the reference counting for the
				1556	* new entry's suspect.
				1557	*/
				1558	static struct blame_entry split_blame_at(struct blame_entry e, int len,
				1559	struct blame_origin *new_suspect)
				1560	{
				1561	struct blame_entry *n = xcalloc(1, sizeof(struct blame_entry));
				1562
				1563	n->suspect = new_suspect;
Barret Rhoden	8934ac8	2019-05-15 21:45:00	[diff] [blame]	1564	n->ignored = e->ignored;
				1565	n->unblamable = e->unblamable;
Barret Rhoden	55f808f	2019-05-15 21:44:58	[diff] [blame]	1566	n->lno = e->lno + len;
				1567	n->s_lno = e->s_lno + len;
				1568	n->num_lines = e->num_lines - len;
				1569	e->num_lines = len;
				1570	e->score = 0;
				1571	return n;
				1572	}
				1573
Barret Rhoden	ae3f36d	2019-05-15 21:44:59	[diff] [blame]	1574	struct blame_line_tracker {
				1575	int is_parent;
				1576	int s_lno;
				1577	};
				1578
				1579	static int are_lines_adjacent(struct blame_line_tracker *first,
				1580	struct blame_line_tracker *second)
				1581	{
				1582	return first->is_parent == second->is_parent &&
				1583	first->s_lno + 1 == second->s_lno;
				1584	}
				1585
Barret Rhoden	a07a977	2019-06-20 16:38:19	[diff] [blame]	1586	static int scan_parent_range(struct fingerprint *p_fps,
				1587	struct fingerprint *t_fps, int t_idx,
				1588	int from, int nr_lines)
				1589	{
				1590	int sim, p_idx;
				1591	#define FINGERPRINT_FILE_THRESHOLD 10
				1592	int best_sim_val = FINGERPRINT_FILE_THRESHOLD;
				1593	int best_sim_idx = -1;
				1594
				1595	for (p_idx = from; p_idx < from + nr_lines; p_idx++) {
				1596	sim = fingerprint_similarity(&t_fps[t_idx], &p_fps[p_idx]);
				1597	if (sim < best_sim_val)
				1598	continue;
				1599	/* Break ties with the closest-to-target line number */
				1600	if (sim == best_sim_val && best_sim_idx != -1 &&
				1601	abs(best_sim_idx - t_idx) < abs(p_idx - t_idx))
				1602	continue;
				1603	best_sim_val = sim;
				1604	best_sim_idx = p_idx;
				1605	}
				1606	return best_sim_idx;
				1607	}
				1608
Barret Rhoden	ae3f36d	2019-05-15 21:44:59	[diff] [blame]	1609	/*
Barret Rhoden	a07a977	2019-06-20 16:38:19	[diff] [blame]	1610	* The first pass checks the blame entry (from the target) against the parent's
				1611	* diff chunk. If that fails for a line, the second pass tries to match that
				1612	* line to any part of parent file. That catches cases where a change was
				1613	* broken into two chunks by 'context.'
Barret Rhoden	ae3f36d	2019-05-15 21:44:59	[diff] [blame]	1614	*/
				1615	static void guess_line_blames(struct blame_origin *parent,
				1616	struct blame_origin *target,
				1617	int tlno, int offset, int same, int parent_len,
				1618	struct blame_line_tracker *line_blames)
				1619	{
				1620	int i, best_idx, target_idx;
				1621	int parent_slno = tlno + offset;
Barret Rhoden	a07a977	2019-06-20 16:38:19	[diff] [blame]	1622	int *fuzzy_matches;
Barret Rhoden	ae3f36d	2019-05-15 21:44:59	[diff] [blame]	1623
Barret Rhoden	a07a977	2019-06-20 16:38:19	[diff] [blame]	1624	fuzzy_matches = fuzzy_find_matching_lines(parent, target,
				1625	tlno, parent_slno, same,
				1626	parent_len);
Barret Rhoden	ae3f36d	2019-05-15 21:44:59	[diff] [blame]	1627	for (i = 0; i < same - tlno; i++) {
				1628	target_idx = tlno + i;
Barret Rhoden	a07a977	2019-06-20 16:38:19	[diff] [blame]	1629	if (fuzzy_matches && fuzzy_matches[i] >= 0) {
				1630	best_idx = fuzzy_matches[i];
				1631	} else {
				1632	best_idx = scan_parent_range(parent->fingerprints,
				1633	target->fingerprints,
				1634	target_idx, 0,
				1635	parent->num_lines);
				1636	}
				1637	if (best_idx >= 0) {
Barret Rhoden	ae3f36d	2019-05-15 21:44:59	[diff] [blame]	1638	line_blames[i].is_parent = 1;
				1639	line_blames[i].s_lno = best_idx;
				1640	} else {
				1641	line_blames[i].is_parent = 0;
				1642	line_blames[i].s_lno = target_idx;
				1643	}
				1644	}
Barret Rhoden	a07a977	2019-06-20 16:38:19	[diff] [blame]	1645	free(fuzzy_matches);
Barret Rhoden	ae3f36d	2019-05-15 21:44:59	[diff] [blame]	1646	}
				1647
				1648	/*
				1649	* This decides which parts of a blame entry go to the parent (added to the
				1650	* ignoredp list) and which stay with the target (added to the diffp list). The
				1651	* actual decision was made in a separate heuristic function, and those answers
				1652	* for the lines in 'e' are in line_blames. This consumes e, essentially
				1653	* putting it on a list.
				1654	*
				1655	* Note that the blame entries on the ignoredp list are not necessarily sorted
				1656	* with respect to the parent's line numbers yet.
				1657	*/
				1658	static void ignore_blame_entry(struct blame_entry *e,
				1659	struct blame_origin *parent,
Barret Rhoden	ae3f36d	2019-05-15 21:44:59	[diff] [blame]	1660	struct blame_entry **diffp,
				1661	struct blame_entry **ignoredp,
				1662	struct blame_line_tracker *line_blames)
				1663	{
				1664	int entry_len, nr_lines, i;
				1665
				1666	/*
				1667	* We carve new entries off the front of e. Each entry comes from a
				1668	* contiguous chunk of lines: adjacent lines from the same origin
				1669	* (either the parent or the target).
				1670	*/
				1671	entry_len = 1;
				1672	nr_lines = e->num_lines; /* e changes in the loop */
				1673	for (i = 0; i < nr_lines; i++) {
				1674	struct blame_entry *next = NULL;
				1675
				1676	/*
				1677	* We are often adjacent to the next line - only split the blame
				1678	* entry when we have to.
				1679	*/
				1680	if (i + 1 < nr_lines) {
				1681	if (are_lines_adjacent(&line_blames[i],
				1682	&line_blames[i + 1])) {
				1683	entry_len++;
				1684	continue;
				1685	}
				1686	next = split_blame_at(e, entry_len,
				1687	blame_origin_incref(e->suspect));
				1688	}
				1689	if (line_blames[i].is_parent) {
Barret Rhoden	8934ac8	2019-05-15 21:45:00	[diff] [blame]	1690	e->ignored = 1;
Barret Rhoden	ae3f36d	2019-05-15 21:44:59	[diff] [blame]	1691	blame_origin_decref(e->suspect);
				1692	e->suspect = blame_origin_incref(parent);
				1693	e->s_lno = line_blames[i - entry_len + 1].s_lno;
				1694	e->next = *ignoredp;
				1695	*ignoredp = e;
				1696	} else {
Barret Rhoden	8934ac8	2019-05-15 21:45:00	[diff] [blame]	1697	e->unblamable = 1;
Barret Rhoden	ae3f36d	2019-05-15 21:44:59	[diff] [blame]	1698	/* e->s_lno is already in the target's address space. */
				1699	e->next = *diffp;
				1700	*diffp = e;
				1701	}
				1702	assert(e->num_lines == entry_len);
				1703	e = next;
				1704	entry_len = 1;
				1705	}
				1706	assert(!e);
				1707	}
				1708
Barret Rhoden	55f808f	2019-05-15 21:44:58	[diff] [blame]	1709	/*
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	1710	* Process one hunk from the patch between the current suspect for
				1711	* blame_entry e and its parent. This first blames any unfinished
				1712	* entries before the chunk (which is where target and parent start
				1713	* differing) on the parent, and then splits blame entries at the
				1714	* start and at the end of the difference region. Since use of -M and
				1715	* -C options may lead to overlapping/duplicate source line number
				1716	* ranges, all we can rely on from sorting/merging is the order of the
				1717	* first suspect line number.
Barret Rhoden	ae3f36d	2019-05-15 21:44:59	[diff] [blame]	1718	*
				1719	* tlno: line number in the target where this chunk begins
				1720	* same: line number in the target where this chunk ends
				1721	* offset: add to tlno to get the chunk starting point in the parent
				1722	* parent_len: number of lines in the parent chunk
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	1723	*/
				1724	static void blame_chunk(struct blame_entry *dstq, struct blame_entry *srcq,
Barret Rhoden	ae3f36d	2019-05-15 21:44:59	[diff] [blame]	1725	int tlno, int offset, int same, int parent_len,
				1726	struct blame_origin *parent,
				1727	struct blame_origin *target, int ignore_diffs)
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	1728	{
				1729	struct blame_entry e = *srcq;
Barret Rhoden	ae3f36d	2019-05-15 21:44:59	[diff] [blame]	1730	struct blame_entry samep = NULL, diffp = NULL, *ignoredp = NULL;
				1731	struct blame_line_tracker *line_blames = NULL;
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	1732
				1733	while (e && e->s_lno < tlno) {
				1734	struct blame_entry *next = e->next;
				1735	/*
				1736	* current record starts before differing portion. If
				1737	* it reaches into it, we need to split it up and
				1738	* examine the second part separately.
				1739	*/
				1740	if (e->s_lno + e->num_lines > tlno) {
				1741	/* Move second half to a new record */
Barret Rhoden	55f808f	2019-05-15 21:44:58	[diff] [blame]	1742	struct blame_entry *n;
				1743
				1744	n = split_blame_at(e, tlno - e->s_lno, e->suspect);
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	1745	/* Push new record to diffp */
				1746	n->next = diffp;
				1747	diffp = n;
				1748	} else
				1749	blame_origin_decref(e->suspect);
				1750	/* Pass blame for everything before the differing
				1751	* chunk to the parent */
				1752	e->suspect = blame_origin_incref(parent);
				1753	e->s_lno += offset;
				1754	e->next = samep;
				1755	samep = e;
				1756	e = next;
				1757	}
				1758	/*
				1759	* As we don't know how much of a common stretch after this
				1760	* diff will occur, the currently blamed parts are all that we
				1761	* can assign to the parent for now.
				1762	*/
				1763
				1764	if (samep) {
				1765	dstq = reverse_blame(samep, dstq);
				1766	*dstq = &samep->next;
				1767	}
				1768	/*
				1769	* Prepend the split off portions: everything after e starts
				1770	* after the blameable portion.
				1771	*/
				1772	e = reverse_blame(diffp, e);
				1773
				1774	/*
				1775	* Now retain records on the target while parts are different
				1776	* from the parent.
				1777	*/
				1778	samep = NULL;
				1779	diffp = NULL;
Barret Rhoden	ae3f36d	2019-05-15 21:44:59	[diff] [blame]	1780
				1781	if (ignore_diffs && same - tlno > 0) {
				1782	line_blames = xcalloc(sizeof(struct blame_line_tracker),
				1783	same - tlno);
				1784	guess_line_blames(parent, target, tlno, offset, same,
				1785	parent_len, line_blames);
				1786	}
				1787
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	1788	while (e && e->s_lno < same) {
				1789	struct blame_entry *next = e->next;
				1790
				1791	/*
				1792	* If current record extends into sameness, need to split.
				1793	*/
				1794	if (e->s_lno + e->num_lines > same) {
				1795	/*
				1796	* Move second half to a new record to be
				1797	* processed by later chunks
				1798	*/
Barret Rhoden	55f808f	2019-05-15 21:44:58	[diff] [blame]	1799	struct blame_entry *n;
				1800
				1801	n = split_blame_at(e, same - e->s_lno,
				1802	blame_origin_incref(e->suspect));
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	1803	/* Push new record to samep */
				1804	n->next = samep;
				1805	samep = n;
				1806	}
Barret Rhoden	ae3f36d	2019-05-15 21:44:59	[diff] [blame]	1807	if (ignore_diffs) {
Jeff King	07a54dc	2019-06-28 06:24:57	[diff] [blame]	1808	ignore_blame_entry(e, parent, &diffp, &ignoredp,
Barret Rhoden	ae3f36d	2019-05-15 21:44:59	[diff] [blame]	1809	line_blames + e->s_lno - tlno);
				1810	} else {
				1811	e->next = diffp;
				1812	diffp = e;
				1813	}
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	1814	e = next;
				1815	}
Barret Rhoden	ae3f36d	2019-05-15 21:44:59	[diff] [blame]	1816	free(line_blames);
				1817	if (ignoredp) {
				1818	/*
				1819	* Note ignoredp is not sorted yet, and thus neither is dstq.
				1820	* That list must be sorted before we queue_blames(). We defer
				1821	* sorting until after all diff hunks are processed, so that
				1822	* guess_line_blames() can pick any line in the parent. The
				1823	* slight drawback is that we end up sorting all blame entries
				1824	* passed to the parent, including those that are unrelated to
				1825	* changes made by the ignored commit.
				1826	*/
				1827	dstq = reverse_blame(ignoredp, dstq);
				1828	*dstq = &ignoredp->next;
				1829	}
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	1830	**srcq = reverse_blame(diffp, reverse_blame(samep, e));
				1831	/* Move across elements that are in the unblamable portion */
				1832	if (diffp)
				1833	*srcq = &diffp->next;
				1834	}
				1835
				1836	struct blame_chunk_cb_data {
				1837	struct blame_origin *parent;
Barret Rhoden	ae3f36d	2019-05-15 21:44:59	[diff] [blame]	1838	struct blame_origin *target;
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	1839	long offset;
Barret Rhoden	ae3f36d	2019-05-15 21:44:59	[diff] [blame]	1840	int ignore_diffs;
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	1841	struct blame_entry **dstq;
				1842	struct blame_entry **srcq;
				1843	};
				1844
				1845	/* diff chunks are from parent to target */
				1846	static int blame_chunk_cb(long start_a, long count_a,
				1847	long start_b, long count_b, void *data)
				1848	{
				1849	struct blame_chunk_cb_data *d = data;
				1850	if (start_a - start_b != d->offset)
				1851	die("internal error in blame::blame_chunk_cb");
				1852	blame_chunk(&d->dstq, &d->srcq, start_b, start_a - start_b,
Barret Rhoden	ae3f36d	2019-05-15 21:44:59	[diff] [blame]	1853	start_b + count_b, count_a, d->parent, d->target,
				1854	d->ignore_diffs);
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	1855	d->offset = start_a + count_a - (start_b + count_b);
				1856	return 0;
				1857	}
				1858
				1859	/*
				1860	* We are looking at the origin 'target' and aiming to pass blame
				1861	* for the lines it is suspected to its parent. Run diff to find
				1862	* which lines came from parent and pass blame for them.
				1863	*/
				1864	static void pass_blame_to_parent(struct blame_scoreboard *sb,
				1865	struct blame_origin *target,
Barret Rhoden	ae3f36d	2019-05-15 21:44:59	[diff] [blame]	1866	struct blame_origin *parent, int ignore_diffs)
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	1867	{
				1868	mmfile_t file_p, file_o;
				1869	struct blame_chunk_cb_data d;
				1870	struct blame_entry *newdest = NULL;
				1871
				1872	if (!target->suspects)
				1873	return; /* nothing remains for this target */
				1874
				1875	d.parent = parent;
Barret Rhoden	ae3f36d	2019-05-15 21:44:59	[diff] [blame]	1876	d.target = target;
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	1877	d.offset = 0;
Barret Rhoden	ae3f36d	2019-05-15 21:44:59	[diff] [blame]	1878	d.ignore_diffs = ignore_diffs;
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	1879	d.dstq = &newdest; d.srcq = &target->suspects;
				1880
Barret Rhoden	1fc7338	2019-05-15 21:45:01	[diff] [blame]	1881	fill_origin_blob(&sb->revs->diffopt, parent, &file_p,
				1882	&sb->num_read_blob, ignore_diffs);
				1883	fill_origin_blob(&sb->revs->diffopt, target, &file_o,
				1884	&sb->num_read_blob, ignore_diffs);
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	1885	sb->num_get_patch++;
				1886
				1887	if (diff_hunks(&file_p, &file_o, blame_chunk_cb, &d, sb->xdl_opts))
				1888	die("unable to generate diff (%s -> %s)",
				1889	oid_to_hex(&parent->commit->object.oid),
				1890	oid_to_hex(&target->commit->object.oid));
				1891	/* The rest are the same as the parent */
Barret Rhoden	ae3f36d	2019-05-15 21:44:59	[diff] [blame]	1892	blame_chunk(&d.dstq, &d.srcq, INT_MAX, d.offset, INT_MAX, 0,
				1893	parent, target, 0);
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	1894	*d.dstq = NULL;
Barret Rhoden	ae3f36d	2019-05-15 21:44:59	[diff] [blame]	1895	if (ignore_diffs)
				1896	newdest = llist_mergesort(newdest, get_next_blame,
				1897	set_next_blame,
				1898	compare_blame_suspect);
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	1899	queue_blames(sb, parent, newdest);
				1900
				1901	return;
				1902	}
				1903
				1904	/*
				1905	* The lines in blame_entry after splitting blames many times can become
				1906	* very small and trivial, and at some point it becomes pointless to
				1907	* blame the parents. E.g. "\t\t}\n\t}\n\n" appears everywhere in any
				1908	* ordinary C program, and it is not worth to say it was copied from
				1909	* totally unrelated file in the parent.
				1910	*
				1911	* Compute how trivial the lines in the blame_entry are.
				1912	*/
				1913	unsigned blame_entry_score(struct blame_scoreboard sb, struct blame_entry e)
				1914	{
				1915	unsigned score;
				1916	const char cp, ep;
				1917
				1918	if (e->score)
				1919	return e->score;
				1920
				1921	score = 1;
				1922	cp = blame_nth_line(sb, e->lno);
				1923	ep = blame_nth_line(sb, e->lno + e->num_lines);
				1924	while (cp < ep) {
				1925	unsigned ch = ((unsigned char )cp);
				1926	if (isalnum(ch))
				1927	score++;
				1928	cp++;
				1929	}
				1930	e->score = score;
				1931	return score;
				1932	}
				1933
				1934	/*
Brandon Williams	abeacb2	2018-02-14 18:59:25	[diff] [blame]	1935	* best_so_far[] and potential[] are both a split of an existing blame_entry
				1936	* that passes blame to the parent. Maintain best_so_far the best split so
				1937	* far, by comparing potential and best_so_far and copying potential into
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	1938	* bst_so_far as needed.
				1939	*/
				1940	static void copy_split_if_better(struct blame_scoreboard *sb,
				1941	struct blame_entry *best_so_far,
Brandon Williams	abeacb2	2018-02-14 18:59:25	[diff] [blame]	1942	struct blame_entry *potential)
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	1943	{
				1944	int i;
				1945
Brandon Williams	abeacb2	2018-02-14 18:59:25	[diff] [blame]	1946	if (!potential[1].suspect)
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	1947	return;
				1948	if (best_so_far[1].suspect) {
Brandon Williams	abeacb2	2018-02-14 18:59:25	[diff] [blame]	1949	if (blame_entry_score(sb, &potential[1]) <
				1950	blame_entry_score(sb, &best_so_far[1]))
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	1951	return;
				1952	}
				1953
				1954	for (i = 0; i < 3; i++)
Brandon Williams	abeacb2	2018-02-14 18:59:25	[diff] [blame]	1955	blame_origin_incref(potential[i].suspect);
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	1956	decref_split(best_so_far);
Brandon Williams	abeacb2	2018-02-14 18:59:25	[diff] [blame]	1957	memcpy(best_so_far, potential, sizeof(struct blame_entry[3]));
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	1958	}
				1959
				1960	/*
				1961	* We are looking at a part of the final image represented by
				1962	* ent (tlno and same are offset by ent->s_lno).
				1963	* tlno is where we are looking at in the final image.
				1964	* up to (but not including) same match preimage.
				1965	* plno is where we are looking at in the preimage.
				1966	*
				1967	* <-------------- final image ---------------------->
				1968	* <------ent------>
				1969	* ^tlno ^same
				1970	* <---------preimage----->
				1971	* ^plno
				1972	*
				1973	* All line numbers are 0-based.
				1974	*/
				1975	static void handle_split(struct blame_scoreboard *sb,
				1976	struct blame_entry *ent,
				1977	int tlno, int plno, int same,
				1978	struct blame_origin *parent,
				1979	struct blame_entry *split)
				1980	{
				1981	if (ent->num_lines <= tlno)
				1982	return;
				1983	if (tlno < same) {
Brandon Williams	abeacb2	2018-02-14 18:59:25	[diff] [blame]	1984	struct blame_entry potential[3];
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	1985	tlno += ent->s_lno;
				1986	same += ent->s_lno;
Brandon Williams	abeacb2	2018-02-14 18:59:25	[diff] [blame]	1987	split_overlap(potential, ent, tlno, plno, same, parent);
				1988	copy_split_if_better(sb, split, potential);
				1989	decref_split(potential);
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	1990	}
				1991	}
				1992
				1993	struct handle_split_cb_data {
				1994	struct blame_scoreboard *sb;
				1995	struct blame_entry *ent;
				1996	struct blame_origin *parent;
				1997	struct blame_entry *split;
				1998	long plno;
				1999	long tlno;
				2000	};
				2001
				2002	static int handle_split_cb(long start_a, long count_a,
				2003	long start_b, long count_b, void *data)
				2004	{
				2005	struct handle_split_cb_data *d = data;
				2006	handle_split(d->sb, d->ent, d->tlno, d->plno, start_b, d->parent,
				2007	d->split);
				2008	d->plno = start_a + count_a;
				2009	d->tlno = start_b + count_b;
				2010	return 0;
				2011	}
				2012
				2013	/*
				2014	* Find the lines from parent that are the same as ent so that
				2015	* we can pass blames to it. file_p has the blob contents for
				2016	* the parent.
				2017	*/
				2018	static void find_copy_in_blob(struct blame_scoreboard *sb,
				2019	struct blame_entry *ent,
				2020	struct blame_origin *parent,
				2021	struct blame_entry *split,
				2022	mmfile_t *file_p)
				2023	{
				2024	const char *cp;
				2025	mmfile_t file_o;
				2026	struct handle_split_cb_data d;
				2027
				2028	memset(&d, 0, sizeof(d));
				2029	d.sb = sb; d.ent = ent; d.parent = parent; d.split = split;
				2030	/*
				2031	* Prepare mmfile that contains only the lines in ent.
				2032	*/
				2033	cp = blame_nth_line(sb, ent->lno);
				2034	file_o.ptr = (char *) cp;
				2035	file_o.size = blame_nth_line(sb, ent->lno + ent->num_lines) - cp;
				2036
				2037	/*
				2038	* file_o is a part of final image we are annotating.
				2039	* file_p partially may match that image.
				2040	*/
				2041	memset(split, 0, sizeof(struct blame_entry [3]));
				2042	if (diff_hunks(file_p, &file_o, handle_split_cb, &d, sb->xdl_opts))
				2043	die("unable to generate diff (%s)",
				2044	oid_to_hex(&parent->commit->object.oid));
				2045	/* remainder, if any, all match the preimage */
				2046	handle_split(sb, ent, d.tlno, d.plno, ent->num_lines, parent, split);
				2047	}
				2048
				2049	/* Move all blame entries from list *source that have a score smaller
				2050	* than score_min to the front of list *small.
				2051	* Returns a pointer to the link pointing to the old head of the small list.
				2052	*/
				2053
				2054	static struct blame_entry *filter_small(struct blame_scoreboard sb,
				2055	struct blame_entry **small,
				2056	struct blame_entry **source,
				2057	unsigned score_min)
				2058	{
				2059	struct blame_entry p = source;
				2060	struct blame_entry oldsmall = small;
				2061	while (p) {
				2062	if (blame_entry_score(sb, p) <= score_min) {
				2063	*small = p;
				2064	small = &p->next;
				2065	p = *small;
				2066	} else {
				2067	*source = p;
				2068	source = &p->next;
				2069	p = *source;
				2070	}
				2071	}
				2072	*small = oldsmall;
				2073	*source = NULL;
				2074	return small;
				2075	}
				2076
				2077	/*
				2078	* See if lines currently target is suspected for can be attributed to
				2079	* parent.
				2080	*/
				2081	static void find_move_in_parent(struct blame_scoreboard *sb,
				2082	struct blame_entry ***blamed,
				2083	struct blame_entry **toosmall,
				2084	struct blame_origin *target,
				2085	struct blame_origin *parent)
				2086	{
				2087	struct blame_entry *e, split[3];
				2088	struct blame_entry *unblamed = target->suspects;
				2089	struct blame_entry *leftover = NULL;
				2090	mmfile_t file_p;
				2091
				2092	if (!unblamed)
				2093	return; /* nothing remains for this target */
				2094
Barret Rhoden	1fc7338	2019-05-15 21:45:01	[diff] [blame]	2095	fill_origin_blob(&sb->revs->diffopt, parent, &file_p,
				2096	&sb->num_read_blob, 0);
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	2097	if (!file_p.ptr)
				2098	return;
				2099
				2100	/* At each iteration, unblamed has a NULL-terminated list of
				2101	* entries that have not yet been tested for blame. leftover
				2102	* contains the reversed list of entries that have been tested
				2103	* without being assignable to the parent.
				2104	*/
				2105	do {
				2106	struct blame_entry **unblamedtail = &unblamed;
				2107	struct blame_entry *next;
				2108	for (e = unblamed; e; e = next) {
				2109	next = e->next;
				2110	find_copy_in_blob(sb, e, parent, split, &file_p);
				2111	if (split[1].suspect &&
				2112	sb->move_score < blame_entry_score(sb, &split[1])) {
				2113	split_blame(blamed, &unblamedtail, split, e);
				2114	} else {
				2115	e->next = leftover;
				2116	leftover = e;
				2117	}
				2118	decref_split(split);
				2119	}
				2120	*unblamedtail = NULL;
				2121	toosmall = filter_small(sb, toosmall, &unblamed, sb->move_score);
				2122	} while (unblamed);
				2123	target->suspects = reverse_blame(leftover, NULL);
				2124	}
				2125
				2126	struct blame_list {
				2127	struct blame_entry *ent;
				2128	struct blame_entry split[3];
				2129	};
				2130
				2131	/*
				2132	* Count the number of entries the target is suspected for,
				2133	* and prepare a list of entry and the best split.
				2134	*/
				2135	static struct blame_list setup_blame_list(struct blame_entry unblamed,
				2136	int *num_ents_p)
				2137	{
				2138	struct blame_entry *e;
				2139	int num_ents, i;
				2140	struct blame_list *blame_list = NULL;
				2141
				2142	for (e = unblamed, num_ents = 0; e; e = e->next)
				2143	num_ents++;
				2144	if (num_ents) {
				2145	blame_list = xcalloc(num_ents, sizeof(struct blame_list));
				2146	for (e = unblamed, i = 0; e; e = e->next)
				2147	blame_list[i++].ent = e;
				2148	}
				2149	*num_ents_p = num_ents;
				2150	return blame_list;
				2151	}
				2152
				2153	/*
				2154	* For lines target is suspected for, see if we can find code movement
				2155	* across file boundary from the parent commit. porigin is the path
				2156	* in the parent we already tried.
				2157	*/
				2158	static void find_copy_in_parent(struct blame_scoreboard *sb,
				2159	struct blame_entry ***blamed,
				2160	struct blame_entry **toosmall,
				2161	struct blame_origin *target,
				2162	struct commit *parent,
				2163	struct blame_origin *porigin,
				2164	int opt)
				2165	{
				2166	struct diff_options diff_opts;
				2167	int i, j;
				2168	struct blame_list *blame_list;
				2169	int num_ents;
				2170	struct blame_entry *unblamed = target->suspects;
				2171	struct blame_entry *leftover = NULL;
				2172
				2173	if (!unblamed)
				2174	return; /* nothing remains for this target */
				2175
Nguyễn Thái Ngọc Duy	e675765	2018-09-21 15:57:24	[diff] [blame]	2176	repo_diff_setup(sb->repo, &diff_opts);
Brandon Williams	0d1e0e7	2017-10-31 18:19:11	[diff] [blame]	2177	diff_opts.flags.recursive = 1;
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	2178	diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
				2179
				2180	diff_setup_done(&diff_opts);
				2181
				2182	/* Try "find copies harder" on new path if requested;
				2183	* we do not want to use diffcore_rename() actually to
				2184	* match things up; find_copies_harder is set only to
Junio C Hamano	a6f38c1	2017-06-19 19:38:44	[diff] [blame]	2185	* force diff_tree_oid() to feed all filepairs to diff_queue,
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	2186	* and this code needs to be after diff_setup_done(), which
				2187	* usually makes find-copies-harder imply copy detection.
				2188	*/
				2189	if ((opt & PICKAXE_BLAME_COPY_HARDEST)
				2190	\|\| ((opt & PICKAXE_BLAME_COPY_HARDER)
				2191	&& (!porigin \|\| strcmp(target->path, porigin->path))))
Brandon Williams	0d1e0e7	2017-10-31 18:19:11	[diff] [blame]	2192	diff_opts.flags.find_copies_harder = 1;
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	2193
				2194	if (is_null_oid(&target->commit->object.oid))
Derrick Stolee	2e27bd7	2018-04-06 19:09:38	[diff] [blame]	2195	do_diff_cache(get_commit_tree_oid(parent), &diff_opts);
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	2196	else
Derrick Stolee	2e27bd7	2018-04-06 19:09:38	[diff] [blame]	2197	diff_tree_oid(get_commit_tree_oid(parent),
				2198	get_commit_tree_oid(target->commit),
Junio C Hamano	a6f38c1	2017-06-19 19:38:44	[diff] [blame]	2199	"", &diff_opts);
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	2200
Brandon Williams	0d1e0e7	2017-10-31 18:19:11	[diff] [blame]	2201	if (!diff_opts.flags.find_copies_harder)
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	2202	diffcore_std(&diff_opts);
				2203
				2204	do {
				2205	struct blame_entry **unblamedtail = &unblamed;
				2206	blame_list = setup_blame_list(unblamed, &num_ents);
				2207
				2208	for (i = 0; i < diff_queued_diff.nr; i++) {
				2209	struct diff_filepair *p = diff_queued_diff.queue[i];
				2210	struct blame_origin *norigin;
				2211	mmfile_t file_p;
Brandon Williams	abeacb2	2018-02-14 18:59:25	[diff] [blame]	2212	struct blame_entry potential[3];
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	2213
				2214	if (!DIFF_FILE_VALID(p->one))
				2215	continue; /* does not exist in parent */
				2216	if (S_ISGITLINK(p->one->mode))
				2217	continue; /* ignore git links */
				2218	if (porigin && !strcmp(p->one->path, porigin->path))
				2219	/* find_move already dealt with this path */
				2220	continue;
				2221
				2222	norigin = get_origin(parent, p->one->path);
				2223	oidcpy(&norigin->blob_oid, &p->one->oid);
				2224	norigin->mode = p->one->mode;
Barret Rhoden	1fc7338	2019-05-15 21:45:01	[diff] [blame]	2225	fill_origin_blob(&sb->revs->diffopt, norigin, &file_p,
				2226	&sb->num_read_blob, 0);
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	2227	if (!file_p.ptr)
				2228	continue;
				2229
				2230	for (j = 0; j < num_ents; j++) {
				2231	find_copy_in_blob(sb, blame_list[j].ent,
Brandon Williams	abeacb2	2018-02-14 18:59:25	[diff] [blame]	2232	norigin, potential, &file_p);
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	2233	copy_split_if_better(sb, blame_list[j].split,
Brandon Williams	abeacb2	2018-02-14 18:59:25	[diff] [blame]	2234	potential);
				2235	decref_split(potential);
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	2236	}
				2237	blame_origin_decref(norigin);
				2238	}
				2239
				2240	for (j = 0; j < num_ents; j++) {
				2241	struct blame_entry *split = blame_list[j].split;
				2242	if (split[1].suspect &&
				2243	sb->copy_score < blame_entry_score(sb, &split[1])) {
				2244	split_blame(blamed, &unblamedtail, split,
				2245	blame_list[j].ent);
				2246	} else {
				2247	blame_list[j].ent->next = leftover;
				2248	leftover = blame_list[j].ent;
				2249	}
				2250	decref_split(split);
				2251	}
				2252	free(blame_list);
				2253	*unblamedtail = NULL;
				2254	toosmall = filter_small(sb, toosmall, &unblamed, sb->copy_score);
				2255	} while (unblamed);
				2256	target->suspects = reverse_blame(leftover, NULL);
				2257	diff_flush(&diff_opts);
				2258	clear_pathspec(&diff_opts.pathspec);
				2259	}
				2260
				2261	/*
				2262	* The blobs of origin and porigin exactly match, so everything
				2263	* origin is suspected for can be blamed on the parent.
				2264	*/
				2265	static void pass_whole_blame(struct blame_scoreboard *sb,
				2266	struct blame_origin origin, struct blame_origin porigin)
				2267	{
				2268	struct blame_entry e, suspects;
				2269
				2270	if (!porigin->file.ptr && origin->file.ptr) {
				2271	/* Steal its file */
				2272	porigin->file = origin->file;
				2273	origin->file.ptr = NULL;
				2274	}
				2275	suspects = origin->suspects;
				2276	origin->suspects = NULL;
				2277	for (e = suspects; e; e = e->next) {
				2278	blame_origin_incref(porigin);
				2279	blame_origin_decref(e->suspect);
				2280	e->suspect = porigin;
				2281	}
				2282	queue_blames(sb, porigin, suspects);
				2283	}
				2284
				2285	/*
				2286	* We pass blame from the current commit to its parents. We keep saying
				2287	* "parent" (and "porigin"), but what we mean is to find scapegoat to
				2288	* exonerate ourselves.
				2289	*/
				2290	static struct commit_list first_scapegoat(struct rev_info revs, struct commit *commit,
				2291	int reverse)
				2292	{
				2293	if (!reverse) {
				2294	if (revs->first_parent_only &&
				2295	commit->parents &&
				2296	commit->parents->next) {
				2297	free_commit_list(commit->parents->next);
				2298	commit->parents->next = NULL;
				2299	}
				2300	return commit->parents;
				2301	}
				2302	return lookup_decoration(&revs->children, &commit->object);
				2303	}
				2304
				2305	static int num_scapegoats(struct rev_info revs, struct commit commit, int reverse)
				2306	{
				2307	struct commit_list *l = first_scapegoat(revs, commit, reverse);
				2308	return commit_list_count(l);
				2309	}
				2310
				2311	/* Distribute collected unsorted blames to the respected sorted lists
				2312	* in the various origins.
				2313	*/
				2314	static void distribute_blame(struct blame_scoreboard sb, struct blame_entry blamed)
				2315	{
				2316	blamed = llist_mergesort(blamed, get_next_blame, set_next_blame,
				2317	compare_blame_suspect);
				2318	while (blamed)
				2319	{
				2320	struct blame_origin *porigin = blamed->suspect;
				2321	struct blame_entry *suspects = NULL;
				2322	do {
				2323	struct blame_entry *next = blamed->next;
				2324	blamed->next = suspects;
				2325	suspects = blamed;
				2326	blamed = next;
				2327	} while (blamed && blamed->suspect == porigin);
				2328	suspects = reverse_blame(suspects, NULL);
				2329	queue_blames(sb, porigin, suspects);
				2330	}
				2331	}
				2332
				2333	#define MAXSG 16
				2334
				2335	static void pass_blame(struct blame_scoreboard sb, struct blame_origin origin, int opt)
				2336	{
				2337	struct rev_info *revs = sb->revs;
				2338	int i, pass, num_sg;
				2339	struct commit *commit = origin->commit;
				2340	struct commit_list *sg;
				2341	struct blame_origin *sg_buf[MAXSG];
				2342	struct blame_origin porigin, *sg_origin = sg_buf;
				2343	struct blame_entry *toosmall = NULL;
				2344	struct blame_entry blames, *blametail = &blames;
				2345
				2346	num_sg = num_scapegoats(revs, commit, sb->reverse);
				2347	if (!num_sg)
				2348	goto finish;
				2349	else if (num_sg < ARRAY_SIZE(sg_buf))
				2350	memset(sg_buf, 0, sizeof(sg_buf));
				2351	else
				2352	sg_origin = xcalloc(num_sg, sizeof(*sg_origin));
				2353
				2354	/*
				2355	* The first pass looks for unrenamed path to optimize for
				2356	* common cases, then we look for renames in the second pass.
				2357	*/
				2358	for (pass = 0; pass < 2 - sb->no_whole_file_rename; pass++) {
Nguyễn Thái Ngọc Duy	e675765	2018-09-21 15:57:24	[diff] [blame]	2359	struct blame_origin (find)(struct repository , struct commit , struct blame_origin *);
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	2360	find = pass ? find_rename : find_origin;
				2361
				2362	for (i = 0, sg = first_scapegoat(revs, commit, sb->reverse);
				2363	i < num_sg && sg;
				2364	sg = sg->next, i++) {
				2365	struct commit *p = sg->item;
				2366	int j, same;
				2367
				2368	if (sg_origin[i])
				2369	continue;
				2370	if (parse_commit(p))
				2371	continue;
Nguyễn Thái Ngọc Duy	e675765	2018-09-21 15:57:24	[diff] [blame]	2372	porigin = find(sb->repo, p, origin);
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	2373	if (!porigin)
				2374	continue;
Jeff King	4a7e27e	2018-08-28 21:22:40	[diff] [blame]	2375	if (oideq(&porigin->blob_oid, &origin->blob_oid)) {
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	2376	pass_whole_blame(sb, origin, porigin);
				2377	blame_origin_decref(porigin);
				2378	goto finish;
				2379	}
				2380	for (j = same = 0; j < i; j++)
				2381	if (sg_origin[j] &&
Jeff King	4a7e27e	2018-08-28 21:22:40	[diff] [blame]	2382	oideq(&sg_origin[j]->blob_oid, &porigin->blob_oid)) {
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	2383	same = 1;
				2384	break;
				2385	}
				2386	if (!same)
				2387	sg_origin[i] = porigin;
				2388	else
				2389	blame_origin_decref(porigin);
				2390	}
				2391	}
				2392
				2393	sb->num_commits++;
				2394	for (i = 0, sg = first_scapegoat(revs, commit, sb->reverse);
				2395	i < num_sg && sg;
				2396	sg = sg->next, i++) {
				2397	struct blame_origin *porigin = sg_origin[i];
				2398	if (!porigin)
				2399	continue;
				2400	if (!origin->previous) {
				2401	blame_origin_incref(porigin);
				2402	origin->previous = porigin;
				2403	}
Barret Rhoden	ae3f36d	2019-05-15 21:44:59	[diff] [blame]	2404	pass_blame_to_parent(sb, origin, porigin, 0);
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	2405	if (!origin->suspects)
				2406	goto finish;
				2407	}
				2408
				2409	/*
Barret Rhoden	ae3f36d	2019-05-15 21:44:59	[diff] [blame]	2410	* Pass remaining suspects for ignored commits to their parents.
				2411	*/
				2412	if (oidset_contains(&sb->ignore_list, &commit->object.oid)) {
				2413	for (i = 0, sg = first_scapegoat(revs, commit, sb->reverse);
				2414	i < num_sg && sg;
				2415	sg = sg->next, i++) {
				2416	struct blame_origin *porigin = sg_origin[i];
				2417
				2418	if (!porigin)
				2419	continue;
				2420	pass_blame_to_parent(sb, origin, porigin, 1);
Barret Rhoden	a07a977	2019-06-20 16:38:19	[diff] [blame]	2421	/*
				2422	* Preemptively drop porigin so we can refresh the
				2423	* fingerprints if we use the parent again, which can
				2424	* occur if you ignore back-to-back commits.
				2425	*/
				2426	drop_origin_blob(porigin);
Barret Rhoden	ae3f36d	2019-05-15 21:44:59	[diff] [blame]	2427	if (!origin->suspects)
				2428	goto finish;
				2429	}
				2430	}
				2431
				2432	/*
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	2433	* Optionally find moves in parents' files.
				2434	*/
				2435	if (opt & PICKAXE_BLAME_MOVE) {
				2436	filter_small(sb, &toosmall, &origin->suspects, sb->move_score);
				2437	if (origin->suspects) {
				2438	for (i = 0, sg = first_scapegoat(revs, commit, sb->reverse);
				2439	i < num_sg && sg;
				2440	sg = sg->next, i++) {
				2441	struct blame_origin *porigin = sg_origin[i];
				2442	if (!porigin)
				2443	continue;
				2444	find_move_in_parent(sb, &blametail, &toosmall, origin, porigin);
				2445	if (!origin->suspects)
				2446	break;
				2447	}
				2448	}
				2449	}
				2450
				2451	/*
				2452	* Optionally find copies from parents' files.
				2453	*/
				2454	if (opt & PICKAXE_BLAME_COPY) {
				2455	if (sb->copy_score > sb->move_score)
				2456	filter_small(sb, &toosmall, &origin->suspects, sb->copy_score);
				2457	else if (sb->copy_score < sb->move_score) {
				2458	origin->suspects = blame_merge(origin->suspects, toosmall);
				2459	toosmall = NULL;
				2460	filter_small(sb, &toosmall, &origin->suspects, sb->copy_score);
				2461	}
				2462	if (!origin->suspects)
				2463	goto finish;
				2464
				2465	for (i = 0, sg = first_scapegoat(revs, commit, sb->reverse);
				2466	i < num_sg && sg;
				2467	sg = sg->next, i++) {
				2468	struct blame_origin *porigin = sg_origin[i];
				2469	find_copy_in_parent(sb, &blametail, &toosmall,
				2470	origin, sg->item, porigin, opt);
				2471	if (!origin->suspects)
				2472	goto finish;
				2473	}
				2474	}
				2475
				2476	finish:
				2477	*blametail = NULL;
				2478	distribute_blame(sb, blames);
				2479	/*
				2480	* prepend toosmall to origin->suspects
				2481	*
				2482	* There is no point in sorting: this ends up on a big
				2483	* unsorted list in the caller anyway.
				2484	*/
				2485	if (toosmall) {
				2486	struct blame_entry **tail = &toosmall;
				2487	while (*tail)
				2488	tail = &(*tail)->next;
				2489	*tail = origin->suspects;
				2490	origin->suspects = toosmall;
				2491	}
				2492	for (i = 0; i < num_sg; i++) {
				2493	if (sg_origin[i]) {
David Kastrup	f892014	2019-04-02 11:56:25	[diff] [blame]	2494	if (!sg_origin[i]->suspects)
				2495	drop_origin_blob(sg_origin[i]);
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	2496	blame_origin_decref(sg_origin[i]);
				2497	}
				2498	}
				2499	drop_origin_blob(origin);
				2500	if (sg_buf != sg_origin)
				2501	free(sg_origin);
				2502	}
				2503
				2504	/*
				2505	* The main loop -- while we have blobs with lines whose true origin
				2506	* is still unknown, pick one blob, and allow its lines to pass blames
				2507	* to its parents. */
				2508	void assign_blame(struct blame_scoreboard *sb, int opt)
				2509	{
				2510	struct rev_info *revs = sb->revs;
				2511	struct commit *commit = prio_queue_get(&sb->commits);
				2512
				2513	while (commit) {
				2514	struct blame_entry *ent;
Nguyễn Thái Ngọc Duy	4e0df4e	2018-05-19 05:28:19	[diff] [blame]	2515	struct blame_origin *suspect = get_blame_suspects(commit);
Jeff Smith	b543bb1	2017-05-24 05:15:35	[diff] [blame]	2516
				2517	/* find one suspect to break down */
				2518	while (suspect && !suspect->suspects)
				2519	suspect = suspect->next;
				2520
				2521	if (!suspect) {
				2522	commit = prio_queue_get(&sb->commits);
				2523	continue;
				2524	}
				2525
				2526	assert(commit == suspect->commit);
				2527
				2528	/*
				2529	* We will use this suspect later in the loop,
				2530	* so hold onto it in the meantime.
				2531	*/
				2532	blame_origin_incref(suspect);
				2533	parse_commit(commit);
				2534	if (sb->reverse \|\|
				2535	(!(commit->object.flags & UNINTERESTING) &&
				2536	!(revs->max_age != -1 && commit->date < revs->max_age)))
				2537	pass_blame(sb, suspect, opt);
				2538	else {
				2539	commit->object.flags \|= UNINTERESTING;
				2540	if (commit->object.parsed)
				2541	mark_parents_uninteresting(commit);
				2542	}
				2543	/* treat root commit as boundary */
				2544	if (!commit->parents && !sb->show_root)
				2545	commit->object.flags \|= UNINTERESTING;
				2546
				2547	/* Take responsibility for the remaining entries */
				2548	ent = suspect->suspects;
				2549	if (ent) {
				2550	suspect->guilty = 1;
				2551	for (;;) {
				2552	struct blame_entry *next = ent->next;
				2553	if (sb->found_guilty_entry)
				2554	sb->found_guilty_entry(ent, sb->found_guilty_entry_data);
				2555	if (next) {
				2556	ent = next;
				2557	continue;
				2558	}
				2559	ent->next = sb->ent;
				2560	sb->ent = suspect->suspects;
				2561	suspect->suspects = NULL;
				2562	break;
				2563	}
				2564	}
				2565	blame_origin_decref(suspect);
				2566
				2567	if (sb->debug) /* sanity */
				2568	sanity_check_refcnt(sb);
				2569	}
				2570	}
Jeff Smith	09002f1	2017-05-24 05:15:36	[diff] [blame]	2571
Jeff Smith	09002f1	2017-05-24 05:15:36	[diff] [blame]	2572	/*
				2573	* To allow quick access to the contents of nth line in the
				2574	* final image, prepare an index in the scoreboard.
				2575	*/
				2576	static int prepare_lines(struct blame_scoreboard *sb)
				2577	{
Barret Rhoden	1fc7338	2019-05-15 21:45:01	[diff] [blame]	2578	sb->num_lines = find_line_starts(&sb->lineno, sb->final_buf,
				2579	sb->final_buf_size);
Jeff Smith	09002f1	2017-05-24 05:15:36	[diff] [blame]	2580	return sb->num_lines;
				2581	}
				2582
				2583	static struct commit find_single_final(struct rev_info revs,
				2584	const char **name_p)
				2585	{
				2586	int i;
				2587	struct commit *found = NULL;
				2588	const char *name = NULL;
				2589
				2590	for (i = 0; i < revs->pending.nr; i++) {
				2591	struct object *obj = revs->pending.objects[i].item;
				2592	if (obj->flags & UNINTERESTING)
				2593	continue;
Nguyễn Thái Ngọc Duy	fb998ea	2018-11-10 05:48:58	[diff] [blame]	2594	obj = deref_tag(revs->repo, obj, NULL, 0);
Jeff Smith	09002f1	2017-05-24 05:15:36	[diff] [blame]	2595	if (obj->type != OBJ_COMMIT)
				2596	die("Non commit %s?", revs->pending.objects[i].name);
				2597	if (found)
				2598	die("More than one commit to dig from %s and %s?",
				2599	revs->pending.objects[i].name, name);
				2600	found = (struct commit *)obj;
				2601	name = revs->pending.objects[i].name;
				2602	}
				2603	if (name_p)
SZEDER Gábor	9e7d8a9	2017-07-24 21:15:50	[diff] [blame]	2604	*name_p = xstrdup_or_null(name);
Jeff Smith	09002f1	2017-05-24 05:15:36	[diff] [blame]	2605	return found;
				2606	}
				2607
				2608	static struct commit dwim_reverse_initial(struct rev_info revs,
				2609	const char **name_p)
				2610	{
				2611	/*
				2612	* DWIM "git blame --reverse ONE -- PATH" as
				2613	* "git blame --reverse ONE..HEAD -- PATH" but only do so
				2614	* when it makes sense.
				2615	*/
				2616	struct object *obj;
				2617	struct commit *head_commit;
Junio C Hamano	583c6a2	2017-06-05 00:18:11	[diff] [blame]	2618	struct object_id head_oid;
Jeff Smith	09002f1	2017-05-24 05:15:36	[diff] [blame]	2619
				2620	if (revs->pending.nr != 1)
				2621	return NULL;
				2622
				2623	/* Is that sole rev a committish? */
				2624	obj = revs->pending.objects[0].item;
Nguyễn Thái Ngọc Duy	fb998ea	2018-11-10 05:48:58	[diff] [blame]	2625	obj = deref_tag(revs->repo, obj, NULL, 0);
Jeff Smith	09002f1	2017-05-24 05:15:36	[diff] [blame]	2626	if (obj->type != OBJ_COMMIT)
				2627	return NULL;
				2628
				2629	/* Do we have HEAD? */
brian m. carlson	49e6147	2017-10-15 22:07:09	[diff] [blame]	2630	if (!resolve_ref_unsafe("HEAD", RESOLVE_REF_READING, &head_oid, NULL))
Jeff Smith	09002f1	2017-05-24 05:15:36	[diff] [blame]	2631	return NULL;
Nguyễn Thái Ngọc Duy	fb998ea	2018-11-10 05:48:58	[diff] [blame]	2632	head_commit = lookup_commit_reference_gently(revs->repo,
Stefan Beller	21e1ee8	2018-06-29 01:21:57	[diff] [blame]	2633	&head_oid, 1);
Jeff Smith	09002f1	2017-05-24 05:15:36	[diff] [blame]	2634	if (!head_commit)
				2635	return NULL;
				2636
				2637	/* Turn "ONE" into "ONE..HEAD" then */
				2638	obj->flags \|= UNINTERESTING;
				2639	add_pending_object(revs, &head_commit->object, "HEAD");
				2640
				2641	if (name_p)
				2642	*name_p = revs->pending.objects[0].name;
				2643	return (struct commit *)obj;
				2644	}
				2645
				2646	static struct commit find_single_initial(struct rev_info revs,
				2647	const char **name_p)
				2648	{
				2649	int i;
				2650	struct commit *found = NULL;
				2651	const char *name = NULL;
				2652
				2653	/*
				2654	* There must be one and only one negative commit, and it must be
				2655	* the boundary.
				2656	*/
				2657	for (i = 0; i < revs->pending.nr; i++) {
				2658	struct object *obj = revs->pending.objects[i].item;
				2659	if (!(obj->flags & UNINTERESTING))
				2660	continue;
Nguyễn Thái Ngọc Duy	fb998ea	2018-11-10 05:48:58	[diff] [blame]	2661	obj = deref_tag(revs->repo, obj, NULL, 0);
Jeff Smith	09002f1	2017-05-24 05:15:36	[diff] [blame]	2662	if (obj->type != OBJ_COMMIT)
				2663	die("Non commit %s?", revs->pending.objects[i].name);
				2664	if (found)
				2665	die("More than one commit to dig up from, %s and %s?",
				2666	revs->pending.objects[i].name, name);
				2667	found = (struct commit *) obj;
				2668	name = revs->pending.objects[i].name;
				2669	}
				2670
				2671	if (!name)
				2672	found = dwim_reverse_initial(revs, &name);
				2673	if (!name)
				2674	die("No commit to dig up from?");
				2675
				2676	if (name_p)
SZEDER Gábor	9e7d8a9	2017-07-24 21:15:50	[diff] [blame]	2677	*name_p = xstrdup(name);
Jeff Smith	09002f1	2017-05-24 05:15:36	[diff] [blame]	2678	return found;
				2679	}
				2680
				2681	void init_scoreboard(struct blame_scoreboard *sb)
				2682	{
				2683	memset(sb, 0, sizeof(struct blame_scoreboard));
				2684	sb->move_score = BLAME_DEFAULT_MOVE_SCORE;
				2685	sb->copy_score = BLAME_DEFAULT_COPY_SCORE;
				2686	}
				2687
Nguyễn Thái Ngọc Duy	ecbbc0a	2018-08-13 16:14:41	[diff] [blame]	2688	void setup_scoreboard(struct blame_scoreboard *sb,
				2689	const char *path,
				2690	struct blame_origin **orig)
Jeff Smith	09002f1	2017-05-24 05:15:36	[diff] [blame]	2691	{
				2692	const char *final_commit_name = NULL;
				2693	struct blame_origin *o;
				2694	struct commit *final_commit = NULL;
				2695	enum object_type type;
				2696
Nguyễn Thái Ngọc Duy	4e0df4e	2018-05-19 05:28:19	[diff] [blame]	2697	init_blame_suspects(&blame_suspects);
				2698
Jeff Smith	09002f1	2017-05-24 05:15:36	[diff] [blame]	2699	if (sb->reverse && sb->contents_from)
				2700	die(_("--contents and --reverse do not blend well."));
				2701
Nguyễn Thái Ngọc Duy	ecbbc0a	2018-08-13 16:14:41	[diff] [blame]	2702	if (!sb->repo)
				2703	BUG("repo is NULL");
				2704
Jeff Smith	09002f1	2017-05-24 05:15:36	[diff] [blame]	2705	if (!sb->reverse) {
				2706	sb->final = find_single_final(sb->revs, &final_commit_name);
				2707	sb->commits.compare = compare_commits_by_commit_date;
				2708	} else {
				2709	sb->final = find_single_initial(sb->revs, &final_commit_name);
				2710	sb->commits.compare = compare_commits_by_reverse_commit_date;
				2711	}
				2712
				2713	if (sb->final && sb->contents_from)
				2714	die(_("cannot use --contents with final commit object name"));
				2715
				2716	if (sb->reverse && sb->revs->first_parent_only)
				2717	sb->revs->children.name = NULL;
				2718
				2719	if (!sb->final) {
				2720	/*
				2721	* "--not A B -- path" without anything positive;
				2722	* do not default to HEAD, but use the working tree
				2723	* or "--contents".
				2724	*/
				2725	setup_work_tree();
Nguyễn Thái Ngọc Duy	ecbbc0a	2018-08-13 16:14:41	[diff] [blame]	2726	sb->final = fake_working_tree_commit(sb->repo,
				2727	&sb->revs->diffopt,
Jeff Smith	09002f1	2017-05-24 05:15:36	[diff] [blame]	2728	path, sb->contents_from);
				2729	add_pending_object(sb->revs, &(sb->final->object), ":");
				2730	}
				2731
				2732	if (sb->reverse && sb->revs->first_parent_only) {
				2733	final_commit = find_single_final(sb->revs, NULL);
				2734	if (!final_commit)
				2735	die(_("--reverse and --first-parent together require specified latest commit"));
				2736	}
				2737
				2738	/*
				2739	* If we have bottom, this will mark the ancestors of the
				2740	* bottom commits we would reach while traversing as
				2741	* uninteresting.
				2742	*/
				2743	if (prepare_revision_walk(sb->revs))
				2744	die(_("revision walk setup failed"));
				2745
				2746	if (sb->reverse && sb->revs->first_parent_only) {
				2747	struct commit *c = final_commit;
				2748
				2749	sb->revs->children.name = "children";
				2750	while (c->parents &&
Jeff King	9001dc2	2018-08-28 21:22:48	[diff] [blame]	2751	!oideq(&c->object.oid, &sb->final->object.oid)) {
Jeff Smith	09002f1	2017-05-24 05:15:36	[diff] [blame]	2752	struct commit_list l = xcalloc(1, sizeof(l));
				2753
				2754	l->item = c;
				2755	if (add_decoration(&sb->revs->children,
				2756	&c->parents->item->object, l))
Johannes Schindelin	033abf9	2018-05-02 09:38:39	[diff] [blame]	2757	BUG("not unique item in first-parent chain");
Jeff Smith	09002f1	2017-05-24 05:15:36	[diff] [blame]	2758	c = c->parents->item;
				2759	}
				2760
Jeff King	9001dc2	2018-08-28 21:22:48	[diff] [blame]	2761	if (!oideq(&c->object.oid, &sb->final->object.oid))
Jeff Smith	09002f1	2017-05-24 05:15:36	[diff] [blame]	2762	die(_("--reverse --first-parent together require range along first-parent chain"));
				2763	}
				2764
				2765	if (is_null_oid(&sb->final->object.oid)) {
Nguyễn Thái Ngọc Duy	4e0df4e	2018-05-19 05:28:19	[diff] [blame]	2766	o = get_blame_suspects(sb->final);
Jeff Smith	09002f1	2017-05-24 05:15:36	[diff] [blame]	2767	sb->final_buf = xmemdupz(o->file.ptr, o->file.size);
				2768	sb->final_buf_size = o->file.size;
				2769	}
				2770	else {
				2771	o = get_origin(sb->final, path);
Nguyễn Thái Ngọc Duy	ecbbc0a	2018-08-13 16:14:41	[diff] [blame]	2772	if (fill_blob_sha1_and_mode(sb->repo, o))
Jeff Smith	09002f1	2017-05-24 05:15:36	[diff] [blame]	2773	die(_("no such path %s in %s"), path, final_commit_name);
				2774
Brandon Williams	0d1e0e7	2017-10-31 18:19:11	[diff] [blame]	2775	if (sb->revs->diffopt.flags.allow_textconv &&
Nguyễn Thái Ngọc Duy	6afaf80	2018-09-21 15:57:22	[diff] [blame]	2776	textconv_object(sb->repo, path, o->mode, &o->blob_oid, 1, (char **) &sb->final_buf,
Jeff Smith	09002f1	2017-05-24 05:15:36	[diff] [blame]	2777	&sb->final_buf_size))
				2778	;
				2779	else
brian m. carlson	b4f5aca	2018-03-12 02:27:53	[diff] [blame]	2780	sb->final_buf = read_object_file(&o->blob_oid, &type,
				2781	&sb->final_buf_size);
Jeff Smith	09002f1	2017-05-24 05:15:36	[diff] [blame]	2782
				2783	if (!sb->final_buf)
				2784	die(_("cannot read blob %s for path %s"),
				2785	oid_to_hex(&o->blob_oid),
				2786	path);
				2787	}
				2788	sb->num_read_blob++;
				2789	prepare_lines(sb);
				2790
				2791	if (orig)
				2792	*orig = o;
SZEDER Gábor	9e7d8a9	2017-07-24 21:15:50	[diff] [blame]	2793
				2794	free((char *)final_commit_name);
Jeff Smith	09002f1	2017-05-24 05:15:36	[diff] [blame]	2795	}
Jeff Smith	bd481de	2017-05-24 05:15:37	[diff] [blame]	2796
				2797
				2798
				2799	struct blame_entry blame_entry_prepend(struct blame_entry head,
				2800	long start, long end,
				2801	struct blame_origin *o)
				2802	{
				2803	struct blame_entry *new_head = xcalloc(1, sizeof(struct blame_entry));
				2804	new_head->lno = start;
				2805	new_head->num_lines = end - start;
				2806	new_head->suspect = o;
				2807	new_head->s_lno = start;
				2808	new_head->next = head;
				2809	blame_origin_incref(o);
				2810	return new_head;
				2811	}