source src/diff_tform.c
Line | Flow | Count | Block(s) | Source |
---|---|---|---|---|
1 | - | /* | ||
2 | - | * Copyright (C) the libgit2 contributors. All rights reserved. | ||
3 | - | * | ||
4 | - | * This file is part of libgit2, distributed under the GNU GPL v2 with | ||
5 | - | * a Linking Exception. For full terms see the included COPYING file. | ||
6 | - | */ | ||
7 | - | |||
8 | - | #include "diff_tform.h" | ||
9 | - | |||
10 | - | #include "git2/config.h" | ||
11 | - | #include "git2/blob.h" | ||
12 | - | #include "git2/sys/hashsig.h" | ||
13 | - | |||
14 | - | #include "diff.h" | ||
15 | - | #include "diff_generate.h" | ||
16 | - | #include "path.h" | ||
17 | - | #include "futils.h" | ||
18 | - | #include "config.h" | ||
19 | - | |||
20 | 298 | 2 | git_diff_delta *git_diff__delta_dup( | |
21 | - | const git_diff_delta *d, git_pool *pool) | ||
22 | - | { | ||
23 | 298 | 2 | git_diff_delta *delta = git__malloc(sizeof(git_diff_delta)); | |
24 | 298 | 3 | if (!delta) | |
25 | ##### | 4 | return NULL; | |
26 | - | |||
27 | 298 | 5 | memcpy(delta, d, sizeof(git_diff_delta)); | |
28 | 298 | 5 | GIT_DIFF_FLAG__CLEAR_INTERNAL(delta->flags); | |
29 | - | |||
30 | 298 | 5 | if (d->old_file.path != NULL) { | |
31 | 298 | 6 | delta->old_file.path = git_pool_strdup(pool, d->old_file.path); | |
32 | 298 | 7 | if (delta->old_file.path == NULL) | |
33 | ##### | 8 | goto fail; | |
34 | - | } | ||
35 | - | |||
36 | 298 | 9,10 | if (d->new_file.path != d->old_file.path && d->new_file.path != NULL) { | |
37 | 1 | 11 | delta->new_file.path = git_pool_strdup(pool, d->new_file.path); | |
38 | 1 | 12,14 | if (delta->new_file.path == NULL) | |
39 | ##### | 13 | goto fail; | |
40 | - | } else { | ||
41 | 297 | 15 | delta->new_file.path = delta->old_file.path; | |
42 | - | } | ||
43 | - | |||
44 | 298 | 16 | return delta; | |
45 | - | |||
46 | - | fail: | ||
47 | ##### | 17 | git__free(delta); | |
48 | ##### | 18 | return NULL; | |
49 | - | } | ||
50 | - | |||
51 | 75 | 2 | git_diff_delta *git_diff__merge_like_cgit( | |
52 | - | const git_diff_delta *a, | ||
53 | - | const git_diff_delta *b, | ||
54 | - | git_pool *pool) | ||
55 | - | { | ||
56 | - | git_diff_delta *dup; | ||
57 | - | |||
58 | - | /* Emulate C git for merging two diffs (a la 'git diff <sha>'). | ||
59 | - | * | ||
60 | - | * When C git does a diff between the work dir and a tree, it actually | ||
61 | - | * diffs with the index but uses the workdir contents. This emulates | ||
62 | - | * those choices so we can emulate the type of diff. | ||
63 | - | * | ||
64 | - | * We have three file descriptions here, let's call them: | ||
65 | - | * f1 = a->old_file | ||
66 | - | * f2 = a->new_file AND b->old_file | ||
67 | - | * f3 = b->new_file | ||
68 | - | */ | ||
69 | - | |||
70 | - | /* If one of the diffs is a conflict, just dup it */ | ||
71 | 75 | 2 | if (b->status == GIT_DELTA_CONFLICTED) | |
72 | 1 | 3 | return git_diff__delta_dup(b, pool); | |
73 | 74 | 4 | if (a->status == GIT_DELTA_CONFLICTED) | |
74 | ##### | 5 | return git_diff__delta_dup(a, pool); | |
75 | - | |||
76 | - | /* if f2 == f3 or f2 is deleted, then just dup the 'a' diff */ | ||
77 | 74 | 6,7 | if (b->status == GIT_DELTA_UNMODIFIED || a->status == GIT_DELTA_DELETED) | |
78 | 2 | 8 | return git_diff__delta_dup(a, pool); | |
79 | - | |||
80 | - | /* otherwise, base this diff on the 'b' diff */ | ||
81 | 72 | 9,10 | if ((dup = git_diff__delta_dup(b, pool)) == NULL) | |
82 | ##### | 11 | return NULL; | |
83 | - | |||
84 | - | /* If 'a' status is uninteresting, then we're done */ | ||
85 | 72 | 12,13 | if (a->status == GIT_DELTA_UNMODIFIED || | |
86 | 72 | 13,14 | a->status == GIT_DELTA_UNTRACKED || | |
87 | 71 | 14 | a->status == GIT_DELTA_UNREADABLE) | |
88 | 1 | 15 | return dup; | |
89 | - | |||
90 | 71 | 16,17 | assert(b->status != GIT_DELTA_UNMODIFIED); | |
91 | - | |||
92 | - | /* A cgit exception is that the diff of a file that is only in the | ||
93 | - | * index (i.e. not in HEAD nor workdir) is given as empty. | ||
94 | - | */ | ||
95 | 71 | 18 | if (dup->status == GIT_DELTA_DELETED) { | |
96 | 11 | 19 | if (a->status == GIT_DELTA_ADDED) { | |
97 | 6 | 20 | dup->status = GIT_DELTA_UNMODIFIED; | |
98 | 11 | 20,21 | dup->nfiles = 2; | |
99 | - | } | ||
100 | - | /* else don't overwrite DELETE status */ | ||
101 | - | } else { | ||
102 | 60 | 22 | dup->status = a->status; | |
103 | 60 | 22 | dup->nfiles = a->nfiles; | |
104 | - | } | ||
105 | - | |||
106 | 71 | 23 | git_oid_cpy(&dup->old_file.id, &a->old_file.id); | |
107 | 71 | 24 | dup->old_file.mode = a->old_file.mode; | |
108 | 71 | 24 | dup->old_file.size = a->old_file.size; | |
109 | 71 | 24 | dup->old_file.flags = a->old_file.flags; | |
110 | - | |||
111 | 71 | 24 | return dup; | |
112 | - | } | ||
113 | - | |||
114 | 68 | 2 | int git_diff__merge( | |
115 | - | git_diff *onto, const git_diff *from, git_diff__merge_cb cb) | ||
116 | - | { | ||
117 | 68 | 2 | int error = 0; | |
118 | - | git_pool onto_pool; | ||
119 | - | git_vector onto_new; | ||
120 | - | git_diff_delta *delta; | ||
121 | - | bool ignore_case, reversed; | ||
122 | - | unsigned int i, j; | ||
123 | - | |||
124 | 68 | 2-4 | assert(onto && from); | |
125 | - | |||
126 | 68 | 5 | if (!from->deltas.length) | |
127 | 3 | 6 | return 0; | |
128 | - | |||
129 | 65 | 7 | ignore_case = ((onto->opts.flags & GIT_DIFF_IGNORE_CASE) != 0); | |
130 | 65 | 7 | reversed = ((onto->opts.flags & GIT_DIFF_REVERSE) != 0); | |
131 | - | |||
132 | 65 | 7,8 | if (ignore_case != ((from->opts.flags & GIT_DIFF_IGNORE_CASE) != 0) || | |
133 | 65 | 8 | reversed != ((from->opts.flags & GIT_DIFF_REVERSE) != 0)) { | |
134 | ##### | 9 | git_error_set(GIT_ERROR_INVALID, | |
135 | - | "attempt to merge diffs created with conflicting options"); | ||
136 | ##### | 10 | return -1; | |
137 | - | } | ||
138 | - | |||
139 | 65 | 11,12,14 | if (git_vector_init(&onto_new, onto->deltas.length, git_diff_delta__cmp) < 0 || | |
140 | 65 | 13 | git_pool_init(&onto_pool, 1) < 0) | |
141 | ##### | 15 | return -1; | |
142 | - | |||
143 | 357 | 16,56,57 | for (i = 0, j = 0; i < onto->deltas.length || j < from->deltas.length; ) { | |
144 | 292 | 17-19 | git_diff_delta *o = GIT_VECTOR_GET(&onto->deltas, i); | |
145 | 292 | 20-22 | const git_diff_delta *f = GIT_VECTOR_GET(&from->deltas, j); | |
146 | 292 | 23,24,28-31 | int cmp = !f ? -1 : !o ? 1 : | |
147 | 200 | 25-27 | STRCMP_CASESELECT(ignore_case, o->old_file.path, f->old_file.path); | |
148 | - | |||
149 | 292 | 32 | if (cmp < 0) { | |
150 | 93 | 33 | delta = git_diff__delta_dup(o, &onto_pool); | |
151 | 93 | 34 | i++; | |
152 | 199 | 35 | } else if (cmp > 0) { | |
153 | 123 | 36 | delta = git_diff__delta_dup(f, &onto_pool); | |
154 | 123 | 37 | j++; | |
155 | - | } else { | ||
156 | 76 | 38-40 | const git_diff_delta *left = reversed ? f : o; | |
157 | 76 | 41-43 | const git_diff_delta *right = reversed ? o : f; | |
158 | - | |||
159 | 76 | 44 | delta = cb(left, right, &onto_pool); | |
160 | 76 | 45 | i++; | |
161 | 76 | 45 | j++; | |
162 | - | } | ||
163 | - | |||
164 | - | /* the ignore rules for the target may not match the source | ||
165 | - | * or the result of a merged delta could be skippable... | ||
166 | - | */ | ||
167 | 292 | 46-48 | if (delta && git_diff_delta__should_skip(&onto->opts, delta)) { | |
168 | 6 | 49 | git__free(delta); | |
169 | 6 | 50 | continue; | |
170 | - | } | ||
171 | - | |||
172 | 286 | 51-54 | if ((error = !delta ? -1 : git_vector_insert(&onto_new, delta)) < 0) | |
173 | ##### | 55 | break; | |
174 | - | } | ||
175 | - | |||
176 | 65 | 58 | if (!error) { | |
177 | 65 | 59 | git_vector_swap(&onto->deltas, &onto_new); | |
178 | 65 | 60 | git_pool_swap(&onto->pool, &onto_pool); | |
179 | - | |||
180 | 65 | 61 | if ((onto->opts.flags & GIT_DIFF_REVERSE) != 0) | |
181 | 1 | 62 | onto->old_src = from->old_src; | |
182 | - | else | ||
183 | 64 | 63 | onto->new_src = from->new_src; | |
184 | - | |||
185 | - | /* prefix strings also come from old pool, so recreate those.*/ | ||
186 | 65 | 65 | onto->opts.old_prefix = | |
187 | 65 | 64 | git_pool_strdup_safe(&onto->pool, onto->opts.old_prefix); | |
188 | 65 | 66 | onto->opts.new_prefix = | |
189 | 65 | 65 | git_pool_strdup_safe(&onto->pool, onto->opts.new_prefix); | |
190 | - | } | ||
191 | - | |||
192 | 65 | 67 | git_vector_free_deep(&onto_new); | |
193 | 65 | 68 | git_pool_clear(&onto_pool); | |
194 | - | |||
195 | 65 | 69 | return error; | |
196 | - | } | ||
197 | - | |||
198 | 8 | 2 | int git_diff_merge(git_diff *onto, const git_diff *from) | |
199 | - | { | ||
200 | 8 | 2 | return git_diff__merge(onto, from, git_diff__merge_like_cgit); | |
201 | - | } | ||
202 | - | |||
203 | 46 | 2 | int git_diff_find_similar__hashsig_for_file( | |
204 | - | void **out, const git_diff_file *f, const char *path, void *p) | ||
205 | - | { | ||
206 | 46 | 2 | git_hashsig_option_t opt = (git_hashsig_option_t)(intptr_t)p; | |
207 | - | |||
208 | - | GIT_UNUSED(f); | ||
209 | 46 | 2 | return git_hashsig_create_fromfile((git_hashsig **)out, path, opt); | |
210 | - | } | ||
211 | - | |||
212 | 3591 | 2 | int git_diff_find_similar__hashsig_for_buf( | |
213 | - | void **out, const git_diff_file *f, const char *buf, size_t len, void *p) | ||
214 | - | { | ||
215 | 3591 | 2 | git_hashsig_option_t opt = (git_hashsig_option_t)(intptr_t)p; | |
216 | - | |||
217 | - | GIT_UNUSED(f); | ||
218 | 3591 | 2 | return git_hashsig_create((git_hashsig **)out, buf, len, opt); | |
219 | - | } | ||
220 | - | |||
221 | 1425 | 2 | void git_diff_find_similar__hashsig_free(void *sig, void *payload) | |
222 | - | { | ||
223 | - | GIT_UNUSED(payload); | ||
224 | 1425 | 2 | git_hashsig_free(sig); | |
225 | 1425 | 3 | } | |
226 | - | |||
227 | 37350 | 2 | int git_diff_find_similar__calc_similarity( | |
228 | - | int *score, void *siga, void *sigb, void *payload) | ||
229 | - | { | ||
230 | - | int error; | ||
231 | - | |||
232 | - | GIT_UNUSED(payload); | ||
233 | 37350 | 2 | error = git_hashsig_compare(siga, sigb); | |
234 | 37350 | 3 | if (error < 0) | |
235 | ##### | 4 | return error; | |
236 | - | |||
237 | 37350 | 5 | *score = error; | |
238 | 37350 | 5 | return 0; | |
239 | - | } | ||
240 | - | |||
241 | - | #define DEFAULT_THRESHOLD 50 | ||
242 | - | #define DEFAULT_BREAK_REWRITE_THRESHOLD 60 | ||
243 | - | #define DEFAULT_RENAME_LIMIT 200 | ||
244 | - | |||
245 | 332 | 2 | static int normalize_find_opts( | |
246 | - | git_diff *diff, | ||
247 | - | git_diff_find_options *opts, | ||
248 | - | const git_diff_find_options *given) | ||
249 | - | { | ||
250 | 332 | 2 | git_config *cfg = NULL; | |
251 | - | git_hashsig_option_t hashsig_opts; | ||
252 | - | |||
253 | 332 | 2-4 | GIT_ERROR_CHECK_VERSION(given, GIT_DIFF_FIND_OPTIONS_VERSION, "git_diff_find_options"); | |
254 | - | |||
255 | 330 | 5,7 | if (diff->repo != NULL && | |
256 | 330 | 6 | git_repository_config__weakptr(&cfg, diff->repo) < 0) | |
257 | ##### | 8 | return -1; | |
258 | - | |||
259 | 330 | 9 | if (given) | |
260 | 324 | 10 | memcpy(opts, given, sizeof(*opts)); | |
261 | - | |||
262 | 330 | 11,12 | if (!given || | |
263 | 324 | 12 | (given->flags & GIT_DIFF_FIND_ALL) == GIT_DIFF_FIND_BY_CONFIG) | |
264 | - | { | ||
265 | 12 | 13 | if (cfg) { | |
266 | 12 | 14 | char *rule = | |
267 | 12 | 14 | git_config__get_string_force(cfg, "diff.renames", "true"); | |
268 | - | int boolval; | ||
269 | - | |||
270 | 12 | 15-18 | if (!git__parse_bool(&boolval, rule) && !boolval) | |
271 | - | /* don't set FIND_RENAMES if bool value is false */; | ||
272 | 11 | 19,20 | else if (!strcasecmp(rule, "copies") || !strcasecmp(rule, "copy")) | |
273 | 5 | 21 | opts->flags |= GIT_DIFF_FIND_RENAMES | GIT_DIFF_FIND_COPIES; | |
274 | - | else | ||
275 | 6 | 22 | opts->flags |= GIT_DIFF_FIND_RENAMES; | |
276 | - | |||
277 | 12 | 23,24 | git__free(rule); | |
278 | - | } else { | ||
279 | - | /* set default flag */ | ||
280 | ##### | 25 | opts->flags |= GIT_DIFF_FIND_RENAMES; | |
281 | - | } | ||
282 | - | } | ||
283 | - | |||
284 | - | /* some flags imply others */ | ||
285 | - | |||
286 | 330 | 26 | if (opts->flags & GIT_DIFF_FIND_EXACT_MATCH_ONLY) { | |
287 | - | /* if we are only looking for exact matches, then don't turn | ||
288 | - | * MODIFIED items into ADD/DELETE pairs because it's too picky | ||
289 | - | */ | ||
290 | 5 | 27 | opts->flags &= ~(GIT_DIFF_FIND_REWRITES | GIT_DIFF_BREAK_REWRITES); | |
291 | - | |||
292 | - | /* similarly, don't look for self-rewrites to split */ | ||
293 | 5 | 27 | opts->flags &= ~GIT_DIFF_FIND_RENAMES_FROM_REWRITES; | |
294 | - | } | ||
295 | - | |||
296 | 330 | 28 | if (opts->flags & GIT_DIFF_FIND_RENAMES_FROM_REWRITES) | |
297 | 36 | 29 | opts->flags |= GIT_DIFF_FIND_RENAMES; | |
298 | - | |||
299 | 330 | 30 | if (opts->flags & GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED) | |
300 | 21 | 31 | opts->flags |= GIT_DIFF_FIND_COPIES; | |
301 | - | |||
302 | 330 | 32 | if (opts->flags & GIT_DIFF_BREAK_REWRITES) | |
303 | 34 | 33 | opts->flags |= GIT_DIFF_FIND_REWRITES; | |
304 | - | |||
305 | - | #define USE_DEFAULT(X) ((X) == 0 || (X) > 100) | ||
306 | - | |||
307 | 330 | 34,35 | if (USE_DEFAULT(opts->rename_threshold)) | |
308 | 327 | 36 | opts->rename_threshold = DEFAULT_THRESHOLD; | |
309 | - | |||
310 | 330 | 37,38 | if (USE_DEFAULT(opts->rename_from_rewrite_threshold)) | |
311 | 330 | 39 | opts->rename_from_rewrite_threshold = DEFAULT_THRESHOLD; | |
312 | - | |||
313 | 330 | 40,41 | if (USE_DEFAULT(opts->copy_threshold)) | |
314 | 328 | 42 | opts->copy_threshold = DEFAULT_THRESHOLD; | |
315 | - | |||
316 | 330 | 43,44 | if (USE_DEFAULT(opts->break_rewrite_threshold)) | |
317 | 326 | 45 | opts->break_rewrite_threshold = DEFAULT_BREAK_REWRITE_THRESHOLD; | |
318 | - | |||
319 | - | #undef USE_DEFAULT | ||
320 | - | |||
321 | 330 | 46 | if (!opts->rename_limit) { | |
322 | 330 | 47 | if (cfg) { | |
323 | 330 | 48,49 | opts->rename_limit = git_config__get_int_force( | |
324 | - | cfg, "diff.renamelimit", DEFAULT_RENAME_LIMIT); | ||
325 | - | } | ||
326 | - | |||
327 | 330 | 50 | if (opts->rename_limit <= 0) | |
328 | ##### | 51 | opts->rename_limit = DEFAULT_RENAME_LIMIT; | |
329 | - | } | ||
330 | - | |||
331 | - | /* assign the internal metric with whitespace flag as payload */ | ||
332 | 330 | 52 | if (!opts->metric) { | |
333 | 330 | 53 | opts->metric = git__malloc(sizeof(git_diff_similarity_metric)); | |
334 | 330 | 54,55 | GIT_ERROR_CHECK_ALLOC(opts->metric); | |
335 | - | |||
336 | 330 | 56 | opts->metric->file_signature = git_diff_find_similar__hashsig_for_file; | |
337 | 330 | 56 | opts->metric->buffer_signature = git_diff_find_similar__hashsig_for_buf; | |
338 | 330 | 56 | opts->metric->free_signature = git_diff_find_similar__hashsig_free; | |
339 | 330 | 56 | opts->metric->similarity = git_diff_find_similar__calc_similarity; | |
340 | - | |||
341 | 330 | 56 | if (opts->flags & GIT_DIFF_FIND_IGNORE_WHITESPACE) | |
342 | 7 | 57 | hashsig_opts = GIT_HASHSIG_IGNORE_WHITESPACE; | |
343 | 323 | 58 | else if (opts->flags & GIT_DIFF_FIND_DONT_IGNORE_WHITESPACE) | |
344 | 6 | 59 | hashsig_opts = GIT_HASHSIG_NORMAL; | |
345 | - | else | ||
346 | 317 | 60 | hashsig_opts = GIT_HASHSIG_SMART_WHITESPACE; | |
347 | 330 | 61 | hashsig_opts |= GIT_HASHSIG_ALLOW_SMALL_FILES; | |
348 | 330 | 61 | opts->metric->payload = (void *)hashsig_opts; | |
349 | - | } | ||
350 | - | |||
351 | 330 | 62 | return 0; | |
352 | - | } | ||
353 | - | |||
354 | 6 | 2 | static int insert_delete_side_of_split( | |
355 | - | git_diff *diff, git_vector *onto, const git_diff_delta *delta) | ||
356 | - | { | ||
357 | - | /* make new record for DELETED side of split */ | ||
358 | 6 | 2 | git_diff_delta *deleted = git_diff__delta_dup(delta, &diff->pool); | |
359 | 6 | 3,4 | GIT_ERROR_CHECK_ALLOC(deleted); | |
360 | - | |||
361 | 6 | 5 | deleted->status = GIT_DELTA_DELETED; | |
362 | 6 | 5 | deleted->nfiles = 1; | |
363 | 6 | 5 | memset(&deleted->new_file, 0, sizeof(deleted->new_file)); | |
364 | 6 | 5 | deleted->new_file.path = deleted->old_file.path; | |
365 | 6 | 5 | deleted->new_file.flags |= GIT_DIFF_FLAG_VALID_ID; | |
366 | - | |||
367 | 6 | 5 | return git_vector_insert(onto, deleted); | |
368 | - | } | ||
369 | - | |||
370 | 87 | 2 | static int apply_splits_and_deletes( | |
371 | - | git_diff *diff, size_t expected_size, bool actually_split) | ||
372 | - | { | ||
373 | 87 | 2 | git_vector onto = GIT_VECTOR_INIT; | |
374 | - | size_t i; | ||
375 | - | git_diff_delta *delta; | ||
376 | - | |||
377 | 87 | 2,3 | if (git_vector_init(&onto, expected_size, git_diff_delta__cmp) < 0) | |
378 | ##### | 4 | return -1; | |
379 | - | |||
380 | - | /* build new delta list without TO_DELETE and splitting TO_SPLIT */ | ||
381 | 1936 | 5,25-27 | git_vector_foreach(&diff->deltas, i, delta) { | |
382 | 1849 | 6 | if ((delta->flags & GIT_DIFF_FLAG__TO_DELETE) != 0) | |
383 | 248 | 7 | continue; | |
384 | - | |||
385 | 1601 | 8,9 | if ((delta->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0 && actually_split) { | |
386 | 4 | 10 | delta->similarity = 0; | |
387 | - | |||
388 | 4 | 10,11 | if (insert_delete_side_of_split(diff, &onto, delta) < 0) | |
389 | ##### | 12 | goto on_error; | |
390 | - | |||
391 | 4 | 13 | if (diff->new_src == GIT_ITERATOR_WORKDIR) | |
392 | 1 | 14 | delta->status = GIT_DELTA_UNTRACKED; | |
393 | - | else | ||
394 | 3 | 15 | delta->status = GIT_DELTA_ADDED; | |
395 | 4 | 16 | delta->nfiles = 1; | |
396 | 4 | 16 | memset(&delta->old_file, 0, sizeof(delta->old_file)); | |
397 | 4 | 16 | delta->old_file.path = delta->new_file.path; | |
398 | 4 | 16 | delta->old_file.flags |= GIT_DIFF_FLAG_VALID_ID; | |
399 | - | } | ||
400 | - | |||
401 | - | /* clean up delta before inserting into new list */ | ||
402 | 1601 | 17 | GIT_DIFF_FLAG__CLEAR_INTERNAL(delta->flags); | |
403 | - | |||
404 | 1601 | 17,18 | if (delta->status != GIT_DELTA_COPIED && | |
405 | 1587 | 18,19 | delta->status != GIT_DELTA_RENAMED && | |
406 | 1318 | 19,20 | (delta->status != GIT_DELTA_MODIFIED || actually_split)) | |
407 | 415 | 21 | delta->similarity = 0; | |
408 | - | |||
409 | - | /* insert into new list */ | ||
410 | 1601 | 22,23 | if (git_vector_insert(&onto, delta) < 0) | |
411 | ##### | 24 | goto on_error; | |
412 | - | } | ||
413 | - | |||
414 | - | /* cannot return an error past this point */ | ||
415 | - | |||
416 | - | /* free deltas from old list that didn't make it to the new one */ | ||
417 | 1936 | 28,31-33 | git_vector_foreach(&diff->deltas, i, delta) { | |
418 | 1849 | 29 | if ((delta->flags & GIT_DIFF_FLAG__TO_DELETE) != 0) | |
419 | 248 | 30 | git__free(delta); | |
420 | - | } | ||
421 | - | |||
422 | - | /* swap new delta list into place */ | ||
423 | 87 | 34 | git_vector_swap(&diff->deltas, &onto); | |
424 | 87 | 35 | git_vector_free(&onto); | |
425 | 87 | 36 | git_vector_sort(&diff->deltas); | |
426 | - | |||
427 | 87 | 37 | return 0; | |
428 | - | |||
429 | - | on_error: | ||
430 | ##### | 38 | git_vector_free_deep(&onto); | |
431 | - | |||
432 | ##### | 39 | return -1; | |
433 | - | } | ||
434 | - | |||
435 | 81663 | 2 | GIT_INLINE(git_diff_file *) similarity_get_file(git_diff *diff, size_t idx) | |
436 | - | { | ||
437 | 81663 | 2 | git_diff_delta *delta = git_vector_get(&diff->deltas, idx / 2); | |
438 | 81663 | 3 | return (idx & 1) ? &delta->new_file : &delta->old_file; | |
439 | - | } | ||
440 | - | |||
441 | - | typedef struct { | ||
442 | - | size_t idx; | ||
443 | - | git_iterator_t src; | ||
444 | - | git_repository *repo; | ||
445 | - | git_diff_file *file; | ||
446 | - | git_buf data; | ||
447 | - | git_odb_object *odb_obj; | ||
448 | - | git_blob *blob; | ||
449 | - | } similarity_info; | ||
450 | - | |||
451 | 1909 | 2 | static int similarity_init( | |
452 | - | similarity_info *info, git_diff *diff, size_t file_idx) | ||
453 | - | { | ||
454 | 1909 | 2 | info->idx = file_idx; | |
455 | 1909 | 2-4 | info->src = (file_idx & 1) ? diff->new_src : diff->old_src; | |
456 | 1909 | 5 | info->repo = diff->repo; | |
457 | 1909 | 5 | info->file = similarity_get_file(diff, file_idx); | |
458 | 1909 | 6 | info->odb_obj = NULL; | |
459 | 1909 | 6 | info->blob = NULL; | |
460 | 1909 | 6 | git_buf_init(&info->data, 0); | |
461 | - | |||
462 | 1909 | 7,8 | if (info->file->size > 0 || info->src == GIT_ITERATOR_WORKDIR) | |
463 | 705 | 9 | return 0; | |
464 | - | |||
465 | 1204 | 10 | return git_diff_file__resolve_zero_size( | |
466 | - | info->file, &info->odb_obj, info->repo); | ||
467 | - | } | ||
468 | - | |||
469 | 1237 | 2 | static int similarity_sig( | |
470 | - | similarity_info *info, | ||
471 | - | const git_diff_find_options *opts, | ||
472 | - | void **cache) | ||
473 | - | { | ||
474 | 1237 | 2 | int error = 0; | |
475 | 1237 | 2 | git_diff_file *file = info->file; | |
476 | - | |||
477 | 1237 | 2 | if (info->src == GIT_ITERATOR_WORKDIR) { | |
478 | 46 | 3,3-5 | if ((error = git_buf_joinpath( | |
479 | 46 | 3 | &info->data, git_repository_workdir(info->repo), file->path)) < 0) | |
480 | ##### | 6 | return error; | |
481 | - | |||
482 | - | /* if path is not a regular file, just skip this item */ | ||
483 | 46 | 7,8 | if (!git_path_isfile(info->data.ptr)) | |
484 | ##### | 9 | return 0; | |
485 | - | |||
486 | - | /* TODO: apply wd-to-odb filters to file data if necessary */ | ||
487 | - | |||
488 | 46 | 10,10,10 | error = opts->metric->file_signature( | |
489 | 46 | 10 | &cache[info->idx], info->file, | |
490 | 46 | 10,10 | info->data.ptr, opts->metric->payload); | |
491 | - | } else { | ||
492 | - | /* if we didn't initially know the size, we might have an odb_obj | ||
493 | - | * around from earlier, so convert that, otherwise load the blob now | ||
494 | - | */ | ||
495 | 1191 | 11 | if (info->odb_obj != NULL) | |
496 | ##### | 12,12 | error = git_object__from_odb_object( | |
497 | ##### | 12 | (git_object **)&info->blob, info->repo, | |
498 | - | info->odb_obj, GIT_OBJECT_BLOB); | ||
499 | - | else | ||
500 | 1191 | 13 | error = git_blob_lookup(&info->blob, info->repo, &file->id); | |
501 | - | |||
502 | 1191 | 14 | if (error < 0) { | |
503 | - | /* if lookup fails, just skip this item in similarity calc */ | ||
504 | ##### | 15 | git_error_clear(); | |
505 | - | } else { | ||
506 | - | size_t sz; | ||
507 | - | |||
508 | - | /* index size may not be actual blob size if filtered */ | ||
509 | 1191 | 16,17 | if (file->size != git_blob_rawsize(info->blob)) | |
510 | ##### | 18,19 | file->size = git_blob_rawsize(info->blob); | |
511 | - | |||
512 | 1191 | 20-23 | sz = git__is_sizet(file->size) ? (size_t)file->size : (size_t)-1; | |
513 | - | |||
514 | 1191 | 24,24,25 | error = opts->metric->buffer_signature( | |
515 | 1191 | 25 | &cache[info->idx], info->file, | |
516 | 1191 | 24,24 | git_blob_rawcontent(info->blob), sz, opts->metric->payload); | |
517 | - | } | ||
518 | - | } | ||
519 | - | |||
520 | 1237 | 26 | return error; | |
521 | - | } | ||
522 | - | |||
523 | 79086 | 2 | static void similarity_unload(similarity_info *info) | |
524 | - | { | ||
525 | 79086 | 2 | if (info->odb_obj) | |
526 | ##### | 3 | git_odb_object_free(info->odb_obj); | |
527 | - | |||
528 | 79086 | 4 | if (info->blob) | |
529 | 1191 | 5 | git_blob_free(info->blob); | |
530 | - | else | ||
531 | 77895 | 6 | git_buf_dispose(&info->data); | |
532 | 79086 | 7 | } | |
533 | - | |||
534 | - | #define FLAG_SET(opts,flag_name) (((opts)->flags & flag_name) != 0) | ||
535 | - | |||
536 | - | /* - score < 0 means files cannot be compared | ||
537 | - | * - score >= 100 means files are exact match | ||
538 | - | * - score == 0 means files are completely different | ||
539 | - | */ | ||
540 | 39877 | 2 | static int similarity_measure( | |
541 | - | int *score, | ||
542 | - | git_diff *diff, | ||
543 | - | const git_diff_find_options *opts, | ||
544 | - | void **cache, | ||
545 | - | size_t a_idx, | ||
546 | - | size_t b_idx) | ||
547 | - | { | ||
548 | 39877 | 2 | git_diff_file *a_file = similarity_get_file(diff, a_idx); | |
549 | 39877 | 3 | git_diff_file *b_file = similarity_get_file(diff, b_idx); | |
550 | 39877 | 4 | bool exact_match = FLAG_SET(opts, GIT_DIFF_FIND_EXACT_MATCH_ONLY); | |
551 | 39877 | 4 | int error = 0; | |
552 | - | similarity_info a_info, b_info; | ||
553 | - | |||
554 | 39877 | 4 | *score = -1; | |
555 | - | |||
556 | - | /* don't try to compare things that aren't files */ | ||
557 | 39877 | 4,5 | if (!GIT_MODE_ISBLOB(a_file->mode) || !GIT_MODE_ISBLOB(b_file->mode)) | |
558 | ##### | 6 | return 0; | |
559 | - | |||
560 | - | /* if exact match is requested, force calculation of missing OIDs now */ | ||
561 | 39877 | 7 | if (exact_match) { | |
562 | 30 | 8-10 | if (git_oid_is_zero(&a_file->id) && | |
563 | ##### | 10,12 | diff->old_src == GIT_ITERATOR_WORKDIR && | |
564 | ##### | 11,11 | !git_diff__oid_for_file(&a_file->id, | |
565 | ##### | 11 | diff, a_file->path, a_file->mode, a_file->size)) | |
566 | ##### | 13 | a_file->flags |= GIT_DIFF_FLAG_VALID_ID; | |
567 | - | |||
568 | 30 | 14-16 | if (git_oid_is_zero(&b_file->id) && | |
569 | 6 | 16,18 | diff->new_src == GIT_ITERATOR_WORKDIR && | |
570 | 6 | 17,17 | !git_diff__oid_for_file(&b_file->id, | |
571 | 6 | 17 | diff, b_file->path, b_file->mode, b_file->size)) | |
572 | 6 | 19 | b_file->flags |= GIT_DIFF_FLAG_VALID_ID; | |
573 | - | } | ||
574 | - | |||
575 | - | /* check OID match as a quick test */ | ||
576 | 39877 | 20,21 | if (git_oid__cmp(&a_file->id, &b_file->id) == 0) { | |
577 | 312 | 22 | *score = 100; | |
578 | 312 | 22 | return 0; | |
579 | - | } | ||
580 | - | |||
581 | - | /* don't calculate signatures if we are doing exact match */ | ||
582 | 39565 | 23 | if (exact_match) { | |
583 | 22 | 24 | *score = 0; | |
584 | 22 | 24 | return 0; | |
585 | - | } | ||
586 | - | |||
587 | 39543 | 25 | memset(&a_info, 0, sizeof(a_info)); | |
588 | 39543 | 25 | memset(&b_info, 0, sizeof(b_info)); | |
589 | - | |||
590 | - | /* set up similarity data (will try to update missing file sizes) */ | ||
591 | 39543 | 25-27 | if (!cache[a_idx] && (error = similarity_init(&a_info, diff, a_idx)) < 0) | |
592 | ##### | 28 | return error; | |
593 | 39543 | 29-31 | if (!cache[b_idx] && (error = similarity_init(&b_info, diff, b_idx)) < 0) | |
594 | ##### | 32 | goto cleanup; | |
595 | - | |||
596 | - | /* check if file sizes are nowhere near each other */ | ||
597 | 39543 | 33,34 | if (a_file->size > 127 && | |
598 | 18252 | 34,35 | b_file->size > 127 && | |
599 | 10215 | 35,36 | (a_file->size > (b_file->size << 3) || | |
600 | 8411 | 36 | b_file->size > (a_file->size << 3))) | |
601 | - | goto cleanup; | ||
602 | - | |||
603 | - | /* update signature cache if needed */ | ||
604 | 36299 | 37 | if (!cache[a_idx]) { | |
605 | 367 | 38,39 | if ((error = similarity_sig(&a_info, opts, cache)) < 0) | |
606 | ##### | 40 | goto cleanup; | |
607 | - | } | ||
608 | 36299 | 41 | if (!cache[b_idx]) { | |
609 | 870 | 42,43 | if ((error = similarity_sig(&b_info, opts, cache)) < 0) | |
610 | ##### | 44 | goto cleanup; | |
611 | - | } | ||
612 | - | |||
613 | - | /* calculate similarity provided that the metric choose to process | ||
614 | - | * both the a and b files (some may not if file is too big, etc). | ||
615 | - | */ | ||
616 | 36299 | 45,46 | if (cache[a_idx] && cache[b_idx]) | |
617 | 36299 | 47,47,47,47 | error = opts->metric->similarity( | |
618 | 36299 | 47,47,47 | score, cache[a_idx], cache[b_idx], opts->metric->payload); | |
619 | - | |||
620 | - | cleanup: | ||
621 | 39543 | 48 | similarity_unload(&a_info); | |
622 | 39543 | 49 | similarity_unload(&b_info); | |
623 | - | |||
624 | 39543 | 50 | return error; | |
625 | - | } | ||
626 | - | |||
627 | 51 | 2 | static int calc_self_similarity( | |
628 | - | git_diff *diff, | ||
629 | - | const git_diff_find_options *opts, | ||
630 | - | size_t delta_idx, | ||
631 | - | void **cache) | ||
632 | - | { | ||
633 | 51 | 2 | int error, similarity = -1; | |
634 | 51 | 2-4 | git_diff_delta *delta = GIT_VECTOR_GET(&diff->deltas, delta_idx); | |
635 | - | |||
636 | 51 | 5 | if ((delta->flags & GIT_DIFF_FLAG__HAS_SELF_SIMILARITY) != 0) | |
637 | 16 | 6 | return 0; | |
638 | - | |||
639 | 35 | 7 | error = similarity_measure( | |
640 | 35 | 7 | &similarity, diff, opts, cache, 2 * delta_idx, 2 * delta_idx + 1); | |
641 | 35 | 8 | if (error < 0) | |
642 | ##### | 9 | return error; | |
643 | - | |||
644 | 35 | 10 | if (similarity >= 0) { | |
645 | 35 | 11 | delta->similarity = (uint16_t)similarity; | |
646 | 35 | 11 | delta->flags |= GIT_DIFF_FLAG__HAS_SELF_SIMILARITY; | |
647 | - | } | ||
648 | - | |||
649 | 35 | 12 | return 0; | |
650 | - | } | ||
651 | - | |||
652 | 7107 | 2 | static bool is_rename_target( | |
653 | - | git_diff *diff, | ||
654 | - | const git_diff_find_options *opts, | ||
655 | - | size_t delta_idx, | ||
656 | - | void **cache) | ||
657 | - | { | ||
658 | 7107 | 2-4 | git_diff_delta *delta = GIT_VECTOR_GET(&diff->deltas, delta_idx); | |
659 | - | |||
660 | - | /* skip things that aren't plain blobs */ | ||
661 | 7107 | 5 | if (!GIT_MODE_ISBLOB(delta->new_file.mode)) | |
662 | 349 | 6 | return false; | |
663 | - | |||
664 | - | /* only consider ADDED, RENAMED, COPIED, and split MODIFIED as | ||
665 | - | * targets; maybe include UNTRACKED if requested. | ||
666 | - | */ | ||
667 | 6758 | 7 | switch (delta->status) { | |
668 | - | case GIT_DELTA_UNMODIFIED: | ||
669 | - | case GIT_DELTA_DELETED: | ||
670 | - | case GIT_DELTA_IGNORED: | ||
671 | - | case GIT_DELTA_CONFLICTED: | ||
672 | 27 | 8 | return false; | |
673 | - | |||
674 | - | case GIT_DELTA_MODIFIED: | ||
675 | 3954 | 9,10 | if (!FLAG_SET(opts, GIT_DIFF_FIND_REWRITES) && | |
676 | 3921 | 10 | !FLAG_SET(opts, GIT_DIFF_FIND_RENAMES_FROM_REWRITES)) | |
677 | 3919 | 11 | return false; | |
678 | - | |||
679 | 35 | 12,13 | if (calc_self_similarity(diff, opts, delta_idx, cache) < 0) | |
680 | ##### | 14 | return false; | |
681 | - | |||
682 | 35 | 15,16 | if (FLAG_SET(opts, GIT_DIFF_BREAK_REWRITES) && | |
683 | 33 | 16 | delta->similarity < opts->break_rewrite_threshold) { | |
684 | 28 | 17 | delta->flags |= GIT_DIFF_FLAG__TO_SPLIT; | |
685 | 28 | 17 | break; | |
686 | - | } | ||
687 | 7 | 18,19 | if (FLAG_SET(opts, GIT_DIFF_FIND_RENAMES_FROM_REWRITES) && | |
688 | 7 | 19 | delta->similarity < opts->rename_from_rewrite_threshold) { | |
689 | 2 | 20 | delta->flags |= GIT_DIFF_FLAG__TO_SPLIT; | |
690 | 2 | 20 | break; | |
691 | - | } | ||
692 | - | |||
693 | 5 | 21 | return false; | |
694 | - | |||
695 | - | case GIT_DELTA_UNTRACKED: | ||
696 | 41 | 22 | if (!FLAG_SET(opts, GIT_DIFF_FIND_FOR_UNTRACKED)) | |
697 | ##### | 23 | return false; | |
698 | 41 | 24 | break; | |
699 | - | |||
700 | - | default: /* all other status values should be checked */ | ||
701 | 2736 | 25 | break; | |
702 | - | } | ||
703 | - | |||
704 | 2807 | 26 | delta->flags |= GIT_DIFF_FLAG__IS_RENAME_TARGET; | |
705 | 2807 | 26 | return true; | |
706 | - | } | ||
707 | - | |||
708 | 7107 | 2 | static bool is_rename_source( | |
709 | - | git_diff *diff, | ||
710 | - | const git_diff_find_options *opts, | ||
711 | - | size_t delta_idx, | ||
712 | - | void **cache) | ||
713 | - | { | ||
714 | 7107 | 2-4 | git_diff_delta *delta = GIT_VECTOR_GET(&diff->deltas, delta_idx); | |
715 | - | |||
716 | - | /* skip things that aren't blobs */ | ||
717 | 7107 | 5 | if (!GIT_MODE_ISBLOB(delta->old_file.mode)) | |
718 | 2798 | 6 | return false; | |
719 | - | |||
720 | 4309 | 7 | switch (delta->status) { | |
721 | - | case GIT_DELTA_ADDED: | ||
722 | - | case GIT_DELTA_UNTRACKED: | ||
723 | - | case GIT_DELTA_UNREADABLE: | ||
724 | - | case GIT_DELTA_IGNORED: | ||
725 | - | case GIT_DELTA_CONFLICTED: | ||
726 | ##### | 8 | return false; | |
727 | - | |||
728 | - | case GIT_DELTA_DELETED: | ||
729 | - | case GIT_DELTA_TYPECHANGE: | ||
730 | 328 | 9 | break; | |
731 | - | |||
732 | - | case GIT_DELTA_UNMODIFIED: | ||
733 | 27 | 10 | if (!FLAG_SET(opts, GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED)) | |
734 | 10 | 11 | return false; | |
735 | 17 | 12 | if (FLAG_SET(opts, GIT_DIFF_FIND_REMOVE_UNMODIFIED)) | |
736 | 3 | 13 | delta->flags |= GIT_DIFF_FLAG__TO_DELETE; | |
737 | 17 | 14 | break; | |
738 | - | |||
739 | - | default: /* MODIFIED, RENAMED, COPIED */ | ||
740 | - | /* if we're finding copies, this could be a source */ | ||
741 | 3954 | 15 | if (FLAG_SET(opts, GIT_DIFF_FIND_COPIES)) | |
742 | 37 | 16 | break; | |
743 | - | |||
744 | - | /* otherwise, this is only a source if we can split it */ | ||
745 | 3917 | 17,18 | if (!FLAG_SET(opts, GIT_DIFF_FIND_REWRITES) && | |
746 | 3903 | 18 | !FLAG_SET(opts, GIT_DIFF_FIND_RENAMES_FROM_REWRITES)) | |
747 | 3901 | 19 | return false; | |
748 | - | |||
749 | 16 | 20,21 | if (calc_self_similarity(diff, opts, delta_idx, cache) < 0) | |
750 | ##### | 22 | return false; | |
751 | - | |||
752 | 16 | 23,24 | if (FLAG_SET(opts, GIT_DIFF_BREAK_REWRITES) && | |
753 | 14 | 24 | delta->similarity < opts->break_rewrite_threshold) { | |
754 | 13 | 25 | delta->flags |= GIT_DIFF_FLAG__TO_SPLIT; | |
755 | 13 | 25 | break; | |
756 | - | } | ||
757 | - | |||
758 | 3 | 26,27 | if (FLAG_SET(opts, GIT_DIFF_FIND_RENAMES_FROM_REWRITES) && | |
759 | 3 | 27 | delta->similarity < opts->rename_from_rewrite_threshold) | |
760 | 2 | 28 | break; | |
761 | - | |||
762 | 1 | 29 | return false; | |
763 | - | } | ||
764 | - | |||
765 | 397 | 30 | delta->flags |= GIT_DIFF_FLAG__IS_RENAME_SOURCE; | |
766 | 397 | 30 | return true; | |
767 | - | } | ||
768 | - | |||
769 | 80 | 2 | GIT_INLINE(bool) delta_is_split(git_diff_delta *delta) | |
770 | - | { | ||
771 | 80 | 2,3 | return (delta->status == GIT_DELTA_TYPECHANGE || | |
772 | 77 | 3 | (delta->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0); | |
773 | - | } | ||
774 | - | |||
775 | 316 | 2 | GIT_INLINE(bool) delta_is_new_only(git_diff_delta *delta) | |
776 | - | { | ||
777 | 316 | 2,3 | return (delta->status == GIT_DELTA_ADDED || | |
778 | 48 | 3,4 | delta->status == GIT_DELTA_UNTRACKED || | |
779 | 316 | 2,4-7 | delta->status == GIT_DELTA_UNREADABLE || | |
780 | 18 | 5 | delta->status == GIT_DELTA_IGNORED); | |
781 | - | } | ||
782 | - | |||
783 | 263 | 2 | GIT_INLINE(void) delta_make_rename( | |
784 | - | git_diff_delta *to, const git_diff_delta *from, uint16_t similarity) | ||
785 | - | { | ||
786 | 263 | 2 | to->status = GIT_DELTA_RENAMED; | |
787 | 263 | 2 | to->similarity = similarity; | |
788 | 263 | 2 | to->nfiles = 2; | |
789 | 263 | 2 | memcpy(&to->old_file, &from->old_file, sizeof(to->old_file)); | |
790 | 263 | 2 | to->flags &= ~GIT_DIFF_FLAG__TO_SPLIT; | |
791 | 263 | 2 | } | |
792 | - | |||
793 | - | typedef struct { | ||
794 | - | size_t idx; | ||
795 | - | uint16_t similarity; | ||
796 | - | } diff_find_match; | ||
797 | - | |||
798 | 332 | 2 | int git_diff_find_similar( | |
799 | - | git_diff *diff, | ||
800 | - | const git_diff_find_options *given_opts) | ||
801 | - | { | ||
802 | - | size_t s, t; | ||
803 | 332 | 2 | int error = 0, result; | |
804 | - | uint16_t similarity; | ||
805 | - | git_diff_delta *src, *tgt; | ||
806 | 332 | 2 | git_diff_find_options opts = GIT_DIFF_FIND_OPTIONS_INIT; | |
807 | 332 | 2 | size_t num_deltas, num_srcs = 0, num_tgts = 0; | |
808 | 332 | 2 | size_t tried_srcs = 0, tried_tgts = 0; | |
809 | 332 | 2 | size_t num_rewrites = 0, num_updates = 0, num_bumped = 0; | |
810 | - | size_t sigcache_size; | ||
811 | 332 | 2 | void **sigcache = NULL; /* cache of similarity metric file signatures */ | |
812 | 332 | 2 | diff_find_match *tgt2src = NULL; | |
813 | 332 | 2 | diff_find_match *src2tgt = NULL; | |
814 | 332 | 2 | diff_find_match *tgt2src_copy = NULL; | |
815 | - | diff_find_match *best_match; | ||
816 | - | git_diff_file swap; | ||
817 | - | |||
818 | 332 | 2,3 | assert(diff); | |
819 | - | |||
820 | 332 | 4,5 | if ((error = normalize_find_opts(diff, &opts, given_opts)) < 0) | |
821 | 2 | 6 | return error; | |
822 | - | |||
823 | 330 | 7 | num_deltas = diff->deltas.length; | |
824 | - | |||
825 | - | /* TODO: maybe abort if deltas.length > rename_limit ??? */ | ||
826 | 330 | 7-9 | if (!num_deltas || !git__is_uint32(num_deltas)) | |
827 | - | goto cleanup; | ||
828 | - | |||
829 | - | /* No flags set; nothing to do */ | ||
830 | 328 | 10 | if ((opts.flags & GIT_DIFF_FIND_ALL) == 0) | |
831 | 1 | 11 | goto cleanup; | |
832 | - | |||
833 | 327 | 12-18 | GIT_ERROR_CHECK_ALLOC_MULTIPLY(&sigcache_size, num_deltas, 2); | |
834 | 327 | 19 | sigcache = git__calloc(sigcache_size, sizeof(void *)); | |
835 | 327 | 20,21 | GIT_ERROR_CHECK_ALLOC(sigcache); | |
836 | - | |||
837 | - | /* Label rename sources and targets | ||
838 | - | * | ||
839 | - | * This will also set self-similarity scores for MODIFIED files and | ||
840 | - | * mark them for splitting if break-rewrites is enabled | ||
841 | - | */ | ||
842 | 7434 | 22,31-33 | git_vector_foreach(&diff->deltas, t, tgt) { | |
843 | 7107 | 23,24 | if (is_rename_source(diff, &opts, t, sigcache)) | |
844 | 397 | 25 | ++num_srcs; | |
845 | - | |||
846 | 7107 | 26,27 | if (is_rename_target(diff, &opts, t, sigcache)) | |
847 | 2807 | 28 | ++num_tgts; | |
848 | - | |||
849 | 7107 | 29 | if ((tgt->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0) | |
850 | 30 | 30 | num_rewrites++; | |
851 | - | } | ||
852 | - | |||
853 | - | /* if there are no candidate srcs or tgts, we're done */ | ||
854 | 327 | 34,35 | if (!num_srcs || !num_tgts) | |
855 | - | goto cleanup; | ||
856 | - | |||
857 | 99 | 36 | src2tgt = git__calloc(num_deltas, sizeof(diff_find_match)); | |
858 | 99 | 37,38 | GIT_ERROR_CHECK_ALLOC(src2tgt); | |
859 | 99 | 39 | tgt2src = git__calloc(num_deltas, sizeof(diff_find_match)); | |
860 | 99 | 40,41 | GIT_ERROR_CHECK_ALLOC(tgt2src); | |
861 | - | |||
862 | 99 | 42 | if (FLAG_SET(&opts, GIT_DIFF_FIND_COPIES)) { | |
863 | 29 | 43 | tgt2src_copy = git__calloc(num_deltas, sizeof(diff_find_match)); | |
864 | 29 | 44,45 | GIT_ERROR_CHECK_ALLOC(tgt2src_copy); | |
865 | - | } | ||
866 | - | |||
867 | - | /* | ||
868 | - | * Find best-fit matches for rename / copy candidates | ||
869 | - | */ | ||
870 | - | |||
871 | - | find_best_matches: | ||
872 | 134 | 46 | tried_tgts = num_bumped = 0; | |
873 | - | |||
874 | 4137 | 46,78-80 | git_vector_foreach(&diff->deltas, t, tgt) { | |
875 | - | /* skip things that are not rename targets */ | ||
876 | 4137 | 47 | if ((tgt->flags & GIT_DIFF_FLAG__IS_RENAME_TARGET) == 0) | |
877 | 2424 | 48 | continue; | |
878 | - | |||
879 | 1713 | 49 | tried_srcs = 0; | |
880 | - | |||
881 | 340999 | 49,73-75 | git_vector_foreach(&diff->deltas, s, src) { | |
882 | - | /* skip things that are not rename sources */ | ||
883 | 340336 | 50 | if ((src->flags & GIT_DIFF_FLAG__IS_RENAME_SOURCE) == 0) | |
884 | 300451 | 51 | continue; | |
885 | - | |||
886 | - | /* calculate similarity for this pair and find best match */ | ||
887 | 39885 | 52 | if (s == t) | |
888 | 43 | 53 | result = -1; /* don't measure self-similarity here */ | |
889 | 39842 | 54,55 | else if ((error = similarity_measure( | |
890 | 39842 | 54 | &result, diff, &opts, sigcache, 2 * s, 2 * t + 1)) < 0) | |
891 | ##### | 56 | goto cleanup; | |
892 | - | |||
893 | 39885 | 57 | if (result < 0) | |
894 | 3287 | 58 | continue; | |
895 | 36598 | 59 | similarity = (uint16_t)result; | |
896 | - | |||
897 | - | /* is this a better rename? */ | ||
898 | 36598 | 59,60 | if (tgt2src[t].similarity < similarity && | |
899 | 1752 | 60 | src2tgt[s].similarity < similarity) | |
900 | - | { | ||
901 | - | /* eject old mapping */ | ||
902 | 437 | 61 | if (src2tgt[s].similarity > 0) { | |
903 | 72 | 62 | tgt2src[src2tgt[s].idx].similarity = 0; | |
904 | 72 | 62 | num_bumped++; | |
905 | - | } | ||
906 | 437 | 63 | if (tgt2src[t].similarity > 0) { | |
907 | 41 | 64 | src2tgt[tgt2src[t].idx].similarity = 0; | |
908 | 41 | 64 | num_bumped++; | |
909 | - | } | ||
910 | - | |||
911 | - | /* write new mapping */ | ||
912 | 437 | 65 | tgt2src[t].idx = s; | |
913 | 437 | 65 | tgt2src[t].similarity = similarity; | |
914 | 437 | 65 | src2tgt[s].idx = t; | |
915 | 437 | 65 | src2tgt[s].similarity = similarity; | |
916 | - | } | ||
917 | - | |||
918 | - | /* keep best absolute match for copies */ | ||
919 | 36598 | 66,67 | if (tgt2src_copy != NULL && | |
920 | 225 | 67 | tgt2src_copy[t].similarity < similarity) | |
921 | - | { | ||
922 | 64 | 68 | tgt2src_copy[t].idx = s; | |
923 | 64 | 68 | tgt2src_copy[t].similarity = similarity; | |
924 | - | } | ||
925 | - | |||
926 | 36598 | 69 | if (++tried_srcs >= num_srcs) | |
927 | 1050 | 70 | break; | |
928 | - | |||
929 | - | /* cap on maximum targets we'll examine (per "tgt" file) */ | ||
930 | 35548 | 71 | if (tried_srcs > opts.rename_limit) | |
931 | ##### | 72 | break; | |
932 | - | } | ||
933 | - | |||
934 | 1713 | 76 | if (++tried_tgts >= num_tgts) | |
935 | 134 | 77 | break; | |
936 | - | } | ||
937 | - | |||
938 | 134 | 81 | if (num_bumped > 0) /* try again if we bumped some items */ | |
939 | 35 | 82 | goto find_best_matches; | |
940 | - | |||
941 | - | /* | ||
942 | - | * Rewrite the diffs with renames / copies | ||
943 | - | */ | ||
944 | - | |||
945 | 2770 | 83,157-159 | git_vector_foreach(&diff->deltas, t, tgt) { | |
946 | - | /* skip things that are not rename targets */ | ||
947 | 2671 | 84 | if ((tgt->flags & GIT_DIFF_FLAG__IS_RENAME_TARGET) == 0) | |
948 | 1694 | 85 | continue; | |
949 | - | |||
950 | - | /* check if this delta was the target of a similarity */ | ||
951 | 977 | 86 | if (tgt2src[t].similarity) | |
952 | 318 | 87 | best_match = &tgt2src[t]; | |
953 | 659 | 88,89 | else if (tgt2src_copy && tgt2src_copy[t].similarity) | |
954 | 2 | 90 | best_match = &tgt2src_copy[t]; | |
955 | - | else | ||
956 | 657 | 91 | continue; | |
957 | - | |||
958 | 320 | 92 | s = best_match->idx; | |
959 | 320 | 92-94 | src = GIT_VECTOR_GET(&diff->deltas, s); | |
960 | - | |||
961 | - | /* possible scenarios: | ||
962 | - | * 1. from DELETE to ADD/UNTRACK/IGNORE = RENAME | ||
963 | - | * 2. from DELETE to SPLIT/TYPECHANGE = RENAME + DELETE | ||
964 | - | * 3. from SPLIT/TYPECHANGE to ADD/UNTRACK/IGNORE = ADD + RENAME | ||
965 | - | * 4. from SPLIT/TYPECHANGE to SPLIT/TYPECHANGE = RENAME + SPLIT | ||
966 | - | * 5. from OTHER to ADD/UNTRACK/IGNORE = OTHER + COPY | ||
967 | - | */ | ||
968 | - | |||
969 | 320 | 95 | if (src->status == GIT_DELTA_DELETED) { | |
970 | - | |||
971 | 288 | 96,97 | if (delta_is_new_only(tgt)) { | |
972 | - | |||
973 | 283 | 98 | if (best_match->similarity < opts.rename_threshold) | |
974 | 38 | 99 | continue; | |
975 | - | |||
976 | 245 | 100 | delta_make_rename(tgt, src, best_match->similarity); | |
977 | - | |||
978 | 245 | 101 | src->flags |= GIT_DIFF_FLAG__TO_DELETE; | |
979 | 245 | 101 | num_rewrites++; | |
980 | - | } else { | ||
981 | 5 | 102-104 | assert(delta_is_split(tgt)); | |
982 | - | |||
983 | 5 | 105 | if (best_match->similarity < opts.rename_from_rewrite_threshold) | |
984 | 1 | 106 | continue; | |
985 | - | |||
986 | 4 | 107 | memcpy(&swap, &tgt->old_file, sizeof(swap)); | |
987 | - | |||
988 | 4 | 107 | delta_make_rename(tgt, src, best_match->similarity); | |
989 | 4 | 108 | num_rewrites--; | |
990 | - | |||
991 | 4 | 108,109 | assert(src->status == GIT_DELTA_DELETED); | |
992 | 4 | 110 | memcpy(&src->old_file, &swap, sizeof(src->old_file)); | |
993 | 4 | 110 | memset(&src->new_file, 0, sizeof(src->new_file)); | |
994 | 4 | 110 | src->new_file.path = src->old_file.path; | |
995 | 4 | 110 | src->new_file.flags |= GIT_DIFF_FLAG_VALID_ID; | |
996 | - | |||
997 | 4 | 110 | num_updates++; | |
998 | - | |||
999 | 4 | 110,111 | if (src2tgt[t].similarity > 0 && src2tgt[t].idx > t) { | |
1000 | - | /* what used to be at src t is now at src s */ | ||
1001 | 249 | 112,113 | tgt2src[src2tgt[t].idx].idx = s; | |
1002 | - | } | ||
1003 | - | } | ||
1004 | - | } | ||
1005 | - | |||
1006 | 32 | 114,115 | else if (delta_is_split(src)) { | |
1007 | - | |||
1008 | 14 | 116,117 | if (delta_is_new_only(tgt)) { | |
1009 | - | |||
1010 | 3 | 118 | if (best_match->similarity < opts.rename_threshold) | |
1011 | ##### | 119 | continue; | |
1012 | - | |||
1013 | 3 | 120 | delta_make_rename(tgt, src, best_match->similarity); | |
1014 | - | |||
1015 | 3 | 121-123 | src->status = (diff->new_src == GIT_ITERATOR_WORKDIR) ? | |
1016 | - | GIT_DELTA_UNTRACKED : GIT_DELTA_ADDED; | ||
1017 | 3 | 124 | src->nfiles = 1; | |
1018 | 3 | 124 | memset(&src->old_file, 0, sizeof(src->old_file)); | |
1019 | 3 | 124 | src->old_file.path = src->new_file.path; | |
1020 | 3 | 124 | src->old_file.flags |= GIT_DIFF_FLAG_VALID_ID; | |
1021 | - | |||
1022 | 3 | 124 | src->flags &= ~GIT_DIFF_FLAG__TO_SPLIT; | |
1023 | 3 | 124 | num_rewrites--; | |
1024 | - | |||
1025 | 3 | 124 | num_updates++; | |
1026 | - | } else { | ||
1027 | 11 | 125-127 | assert(delta_is_split(src)); | |
1028 | - | |||
1029 | 11 | 128 | if (best_match->similarity < opts.rename_from_rewrite_threshold) | |
1030 | ##### | 129 | continue; | |
1031 | - | |||
1032 | 11 | 130 | memcpy(&swap, &tgt->old_file, sizeof(swap)); | |
1033 | - | |||
1034 | 11 | 130 | delta_make_rename(tgt, src, best_match->similarity); | |
1035 | 11 | 131 | num_rewrites--; | |
1036 | 11 | 131 | num_updates++; | |
1037 | - | |||
1038 | 11 | 131 | memcpy(&src->old_file, &swap, sizeof(src->old_file)); | |
1039 | - | |||
1040 | - | /* if we've just swapped the new element into the correct | ||
1041 | - | * place, clear the SPLIT flag | ||
1042 | - | */ | ||
1043 | 11 | 131,132 | if (tgt2src[s].idx == t && | |
1044 | 7 | 132 | tgt2src[s].similarity > | |
1045 | 7 | 132 | opts.rename_from_rewrite_threshold) { | |
1046 | 6 | 133 | src->status = GIT_DELTA_RENAMED; | |
1047 | 6 | 133 | src->similarity = tgt2src[s].similarity; | |
1048 | 6 | 133 | tgt2src[s].similarity = 0; | |
1049 | 6 | 133 | src->flags &= ~GIT_DIFF_FLAG__TO_SPLIT; | |
1050 | 6 | 133 | num_rewrites--; | |
1051 | - | } | ||
1052 | - | /* otherwise, if we just overwrote a source, update mapping */ | ||
1053 | 5 | 134,135 | else if (src2tgt[t].similarity > 0 && src2tgt[t].idx > t) { | |
1054 | - | /* what used to be at src t is now at src s */ | ||
1055 | 3 | 136 | tgt2src[src2tgt[t].idx].idx = s; | |
1056 | - | } | ||
1057 | - | |||
1058 | 14 | 137,138 | num_updates++; | |
1059 | - | } | ||
1060 | - | } | ||
1061 | - | |||
1062 | 18 | 139 | else if (FLAG_SET(&opts, GIT_DIFF_FIND_COPIES)) { | |
1063 | 18 | 140 | if (tgt2src_copy[t].similarity < opts.copy_threshold) | |
1064 | 2 | 141 | continue; | |
1065 | - | |||
1066 | - | /* always use best possible source for copy */ | ||
1067 | 16 | 142 | best_match = &tgt2src_copy[t]; | |
1068 | 16 | 142-144 | src = GIT_VECTOR_GET(&diff->deltas, best_match->idx); | |
1069 | - | |||
1070 | 16 | 145,146 | if (delta_is_split(tgt)) { | |
1071 | 2 | 147 | error = insert_delete_side_of_split(diff, &diff->deltas, tgt); | |
1072 | 2 | 148 | if (error < 0) | |
1073 | ##### | 149 | goto cleanup; | |
1074 | 2 | 150 | num_rewrites--; | |
1075 | - | } | ||
1076 | - | |||
1077 | 16 | 151-154 | if (!delta_is_split(tgt) && !delta_is_new_only(tgt)) | |
1078 | 2 | 155 | continue; | |
1079 | - | |||
1080 | 14 | 156 | tgt->status = GIT_DELTA_COPIED; | |
1081 | 14 | 156 | tgt->similarity = best_match->similarity; | |
1082 | 14 | 156 | tgt->nfiles = 2; | |
1083 | 14 | 156 | memcpy(&tgt->old_file, &src->old_file, sizeof(tgt->old_file)); | |
1084 | 14 | 156 | tgt->flags &= ~GIT_DIFF_FLAG__TO_SPLIT; | |
1085 | - | |||
1086 | 14 | 156 | num_updates++; | |
1087 | - | } | ||
1088 | - | } | ||
1089 | - | |||
1090 | - | /* | ||
1091 | - | * Actually split and delete entries as needed | ||
1092 | - | */ | ||
1093 | - | |||
1094 | 99 | 160,161 | if (num_rewrites > 0 || num_updates > 0) | |
1095 | 87 | 166,166 | error = apply_splits_and_deletes( | |
1096 | 87 | 166 | diff, diff->deltas.length - num_rewrites, | |
1097 | 87 | 162-165 | FLAG_SET(&opts, GIT_DIFF_BREAK_REWRITES) && | |
1098 | 29 | 163 | !FLAG_SET(&opts, GIT_DIFF_BREAK_REWRITES_FOR_RENAMES_ONLY)); | |
1099 | - | |||
1100 | - | cleanup: | ||
1101 | 330 | 167 | git__free(tgt2src); | |
1102 | 330 | 168 | git__free(src2tgt); | |
1103 | 330 | 169 | git__free(tgt2src_copy); | |
1104 | - | |||
1105 | 330 | 170 | if (sigcache) { | |
1106 | 14541 | 171,174,175 | for (t = 0; t < num_deltas * 2; ++t) { | |
1107 | 14214 | 172 | if (sigcache[t] != NULL) | |
1108 | 1237 | 173 | opts.metric->free_signature(sigcache[t], opts.metric->payload); | |
1109 | - | } | ||
1110 | 327 | 176 | git__free(sigcache); | |
1111 | - | } | ||
1112 | - | |||
1113 | 330 | 177,178 | if (!given_opts || !given_opts->metric) | |
1114 | 330 | 179 | git__free(opts.metric); | |
1115 | - | |||
1116 | 330 | 180 | return error; | |
1117 | - | } | ||
1118 | - | |||
1119 | - | #undef FLAG_SET |