-
Notifications
You must be signed in to change notification settings - Fork 7
/
diff.c
825 lines (764 loc) · 23.1 KB
/
diff.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
/*
* wiggle - apply rejected patches
*
* Copyright (C) 2003 Neil Brown <[email protected]>
* Copyright (C) 2011-2013 Neil Brown <[email protected]>
* Copyright (C) 2014-2020 Neil Brown <[email protected]>
*
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program.
*
* Author: Neil Brown
* Email: <[email protected]>
*/
/*
* Calculate longest common subsequence between two sequences
*
* Each sequence contains strings with
* hash start length
* We produce a list of tripples: a b len
* where A and B point to elements in the two sequences, and len is the number
* of common elements there. The list is terminated by an entry with len==0.
*
* This is roughly based on
* "An O(ND) Difference Algorithm and its Variations", Eugene Myers,
* Algorithmica Vol. 1 No. 2, 1986, pp. 251-266;
* http://xmailserver.org/diff2.pdf
*
* However we don't run the basic algorithm both forward and backward until
* we find an overlap as Myers suggests. Rather we always run forwards, but
* we record the location of the (possibly empty) snake that crosses the
* midline. When we finish, this recorded location for the best path shows
* us where to divide and find further midpoints.
*
* In brief, the algorithm is as follows.
*
* Imagine a Cartesian Matrix where x co-ordinates correspond to symbols in
* the first sequence (A, length a) and y co-ordinates correspond to symbols
* in the second sequence (B, length b). At the origin we have the first
* sequence.
* Movement in the x direction represents deleting the symbol as that point,
* so from x=i-1 to x=i deletes symbol i from A.
* Movement in the y direction represents adding the corresponding symbol
* from B. So to move from the origin 'a' spaces along X and then 'b' spaces
* up Y will remove all of the first sequence and then add all of the second
* sequence. Similarly moving firstly up the Y axis, then along the X
* direction will add the new sequence, then remove the old sequence. Thus
* the point a,b represents the second sequence and a part from 0,0 to a,b
* represent an sequence of edits to change A into B.
*
* There are clearly many paths from 0,0 to a,b going through different
* points in the matrix in different orders. At some points in the matrix
* the next symbol to be added from B is the same as the next symbol to be
* removed from A. At these points we can take a diagonal step to a new
* point in the matrix without actually changing any symbol. A sequence of
* these diagonal steps is called a 'snake'. The goal then is to find a path
* of x-steps (removals), y-steps (additions) and diagonals (steps and
* snakes) where the number of (non-diagonal) steps is minimal.
*
* i.e. we aim for as many long snakes as possible.
* If the total number of 'steps' is called the 'cost', we aim to minimise
* the cost.
*
* As storing the whole matrix in memory would be prohibitive with large
* sequences we limit ourselves to linear storage proportional to a+b and
* repeat the search at most log2(a+b) times building up the path as we go.
* Specifically we perform a search on the full matrix and record where each
* path crosses the half-way point. i.e. where x+y = (a+b)/2 (== mid). This
* tells us the mid point of the best path. We then perform two searches,
* one on each of the two halves and find the 1/4 and 3/4 way points. This
* continues recursively until we have all points.
*
* The storage is an array v of 'struct v'. This is indexed by the
* diagonal-number k = x-y. Thus k can be negative and the array is
* allocated to allow for that. During the search there is an implicit value
* 'c' which is the cost (length in steps) of all the paths currently under
* consideration.
* v[k] stores details of the longest reaching path of cost c that finishes
* on diagonal k. "longest reaching" means "finishes closest to a,b".
* Details are:
* The location of the end point. 'x' is stored. y = x - k.
* The diagonal of the midpoint crossing. md is stored. x = (mid + md)/2
* y = (mid - md)/2
* = x - md
* (note: md is a diagonal so md = x-y. mid is an anti-diagonal: mid = x+y)
* The number of 'snakes' in the path (l). This is used to allocate the
* array which will record the snakes and to terminate recursion.
*
* A path with an even cost (c is even) must end on an even diagonal (k is
* even) and when c is odd, k must be odd. So the v[] array is treated as
* two sub arrays, the even part and the odd part. One represents paths of
* cost 'c', the other paths of cost c-1.
*
* Initially only v[0] is meaningful and there are no snakes. We firstly
* extend all paths under consideration with the longest possible snake on
* that diagonal.
*
* Then we increment 'c' and calculate for each suitable 'k' whether the best
* path to diagonal k of cost c comes from taking an x-step from the c-1 path
* on diagonal k-1, or from taking a y-step from the c-1 path on diagonal
* k+1. Obviously we need to avoid stepping out of the matrix. Finally we
* check if the 'v' array can be extended or reduced at the boundaries. If
* we hit a border we must reduce. If the best we could possibly do on that
* diagonal is less than the worst result from the current leading path, then
* we also reduce. Otherwise we extend the range of 'k's we consider.
*
* We continue until we find a path has reached a,b. This must be a minimal
* cost path (cost==c). At this point re-check the end of the snake at the
* midpoint and report that.
*
* This all happens recursively for smaller and smaller subranges stopping
* when we examine a submatrix and find that it contains no snakes. As we
* are usually dealing with sub-matrixes we are not walking from 0,0 to a,b
* from alo,blo to ahi,bhi - low point to high point. So the initial k is
* alo-blo, not 0.
*
*/
#include "wiggle.h"
#include <stdlib.h>
#include <sys/time.h>
struct v {
int x; /* x location of furthest reaching path of current cost */
int md; /* diagonal location of midline crossing */
int l; /* number of continuous common sequences found so far */
};
static int find_common(struct file *a, struct file *b,
int *alop, int *ahip,
int *blop, int *bhip,
struct v *v, int shortcut)
{
/* Examine matrix from alo to ahi and blo to bhi.
* i.e. including alo and blo, but less than ahi and bhi.
* Finding longest subsequence and
* return new {a,b}{lo,hi} either side of midline.
* i.e. mid = ( (ahi-alo) + (bhi-blo) ) / 2
* alo+blo <= mid <= ahi+bhi
* and alo,blo to ahi,bhi is a common (possibly empty)
* subseq - a snake.
*
* v is scratch space which is indexable from
* alo-bhi to ahi-blo inclusive.
* i.e. even though there is no symbol at ahi or bhi, we do
* consider paths that reach there as they simply cannot
* go further in that direction.
*
* Return the number of snakes found.
*/
struct timeval start, stop;
int klo, khi;
int alo = *alop;
int ahi = *ahip;
int blo = *blop;
int bhi = *bhip;
int mid = (ahi+bhi+alo+blo)/2;
/* 'worst' is the worst-case extra cost that we need
* to pay before reaching our destination. It assumes
* no more snakes in the furthest-reaching path so far.
* We use this to know when we can trim the extreme
* diagonals - when their best case does not improve on
* the current worst case.
*/
int worst = (ahi-alo)+(bhi-blo);
int loopcount = -1;
shortcut = !!shortcut;
if (shortcut) {
char *lc = getenv("WIGGLE_LOOPCOUNT");
if (lc)
loopcount = atoi(lc);
if (loopcount < 5) {
loopcount = -1;
gettimeofday(&start, NULL);
}
}
klo = khi = alo-blo;
v[klo].x = alo;
v[klo].l = 0;
while (1) {
int x, y;
int cost;
int k;
if (loopcount > 0)
loopcount -= 1;
if (shortcut == 1 &&
khi - klo > 5000 &&
(loopcount == 0 ||
(loopcount < 0 &&
gettimeofday(&stop, NULL) == 0 &&
(stop.tv_sec - start.tv_sec) * 1000000 +
(stop.tv_usec - start.tv_usec) > 20000)))
/* 20ms is a long time. Time to take a shortcut
* Next snake wins
*/
shortcut = 2;
/* Find the longest snake extending on each current
* diagonal, and record if it crosses the midline.
* If we reach the end, return.
*/
for (k = klo ; k <= khi ; k += 2) {
int snake = 0;
x = v[k].x;
y = x-k;
if (y > bhi)
abort();
/* Follow any snake that is here */
while (x < ahi && y < bhi &&
match(&a->list[x], &b->list[y])
) {
x++;
y++;
snake = 1;
}
/* Refine the worst-case remaining cost */
cost = (ahi-x)+(bhi-y);
if (cost < worst) {
worst = cost;
if (snake && shortcut == 2) {
*alop = v[k].x;
*blop = v[k].x - k;
*ahip = x;
*bhip = y;
return 1;
}
}
/* Check for midline crossing */
if (x+y >= mid &&
v[k].x + v[k].x-k <= mid)
v[k].md = k;
v[k].x = x;
v[k].l += snake;
if (cost == 0) {
/* OK! We have arrived.
* We crossed the midpoint on diagonal v[k].md
*/
if (x != ahi)
abort();
/* The snake could start earlier than the
* midline. We cannot just search backwards
* as that might find the wrong path - the
* greediness of the diff algorithm is
* asymmetric.
* We could record the start of the snake in
* 'v', but we will find the actual snake when
* we recurse so there is no need.
*/
x = (v[k].md+mid)/2;
y = x-v[k].md;
*alop = x;
*blop = y;
/* Find the end of the snake using the same
* greedy approach as when we first found the
* snake
*/
while (x < ahi && y < bhi &&
match(&a->list[x], &b->list[y])
) {
x++;
y++;
}
*ahip = x;
*bhip = y;
return v[k].l;
}
}
/* No success with previous cost, so increment cost (c) by 1
* and for each other diagonal, set from the end point of the
* diagonal on one side of it or the other.
*/
for (k = klo+1; k <= khi-1 ; k += 2) {
if (v[k-1].x+1 > ahi) {
/* cannot step to the right from previous
* diagonal as there is no room.
* So step up from next diagonal.
*/
v[k] = v[k+1];
} else if (v[k+1].x - k > bhi || v[k-1].x+1 >= v[k+1].x) {
/* Cannot step up from next diagonal as either
* there is no room, or doing so wouldn't get us
* as close to the endpoint.
* So step to the right.
*/
v[k] = v[k-1];
v[k].x++;
} else {
/* There is room in both directions, but
* stepping up from the next diagonal gets us
* closer
*/
v[k] = v[k+1];
}
}
/* Now we need to either extend or contract klo and khi
* so they both change parity (odd vs even).
* If we extend we need to step up (for klo) or to the
* right (khi) from the adjacent diagonal. This is
* not possible if we have hit the edge of the matrix, and
* not sensible if the new point has a best case remaining
* cost that is worse than our current worst case remaining
* cost.
* The best-case remaining cost is the absolute difference
* between the remaining number of additions and the remaining
* number of deletions - and assumes lots of snakes.
*/
/* new location if we step up from klo to klo-1*/
x = v[klo].x; y = x - (klo-1);
cost = abs((ahi-x)-(bhi-y));
klo--;
if (y <= bhi && cost <= worst) {
/* Looks acceptable - step up. */
v[klo] = v[klo+1];
} else do {
klo += 2;
x = v[klo].x; y = x - (klo-1);
cost = abs((ahi-x)-(bhi-y));
} while (cost > worst);
/* new location if we step to the right from khi to khi+1 */
x = v[khi].x+1; y = x - (khi+1);
cost = abs((ahi-x)-(bhi-y));
khi++;
if (x <= ahi && cost <= worst) {
/* Looks acceptable - step to the right */
v[khi] = v[khi-1];
v[khi].x++;
} else do {
khi -= 2;
x = v[khi].x+1; y = x - (khi+1);
cost = abs((ahi-x)-(bhi-y));
} while (cost > worst);
}
}
struct cslb {
int size; /* How much is alloced */
int len; /* How much is used */
struct csl *csl;
};
static void csl_add(struct cslb *buf, int a, int b, int len)
{
struct csl *csl;
if (len && buf->len) {
csl = buf->csl + buf->len - 1;
if (csl->a + csl->len == a &&
csl->b + csl->len == b) {
csl->len += len;
return;
}
}
if (buf->size <= buf->len) {
if (buf->size < 64)
buf->size = 64;
else
buf->size += buf->size;
buf->csl = realloc(buf->csl, sizeof(buf->csl[0]) * buf->size);
}
csl = buf->csl + buf->len;
csl->len = len;
csl->a = a;
csl->b = b;
buf->len += 1;
}
static void lcsl(struct file *a, int alo, int ahi,
struct file *b, int blo, int bhi,
struct cslb *cslb,
struct v *v, int shortcut)
{
/* lcsl == longest common sub-list.
* This calls itself recursively as it finds the midpoint
* of the best path.
* On first call, 'csl' is NULL and will need to be allocated and
* is returned.
* On subsequence calls when 'csl' is not NULL, we add all the
* snakes we find to csl, and return a pointer to the next
* location where future snakes can be stored.
*/
int alo1 = alo;
int ahi1 = ahi;
int blo1 = blo;
int bhi1 = bhi;
if (ahi <= alo || bhi <= blo)
return;
if (!find_common(a, b,
&alo1, &ahi1,
&blo1, &bhi1,
v, shortcut))
return;
/* There are more snakes to find - keep looking. */
/* With depth-first recursion, this adds all the snakes
* before 'alo1' to 'csl'
*/
lcsl(a, alo, alo1,
b, blo, blo1,
cslb, v, 0);
if (ahi1 > alo1)
/* need to add this common seq, possibly attach
* to last
*/
csl_add(cslb, alo1, blo1, ahi1 - alo1);
/* Now recurse to add all the snakes after ahi1 to csl */
lcsl(a, ahi1, ahi,
b, bhi1, bhi,
cslb, v, shortcut);
}
/* If two common sequences are separated by only an add or remove,
* and the first sequence ends the same as the middle text,
* extend the second and contract the first in the hope that the
* first might become empty. This ameliorates against the greediness
* of the 'diff' algorithm.
* i.e. if we have:
* [ foo X ] X [ bar ]
* [ foo X ] [ bar ]
* Then change it to:
* [ foo ] X [ X bar ]
* [ foo ] [ X bar ]
* We treat the final zero-length 'csl' as a common sequence which
* can be extended so we must make sure to add a new zero-length csl
* to the end.
* If this doesn't make the first sequence disappear, and (one of the)
* X(s) was a newline, then move back so the newline is at the end
* of the first sequence. This encourages common sequences
* to be whole-line units where possible.
*/
static void fixup(struct file *a, struct file *b, struct csl *list)
{
struct csl *list1, *orig;
int lasteol = -1;
int found_end = 0;
if (!list)
return;
/* 'list' and 'list1' are adjacent pointers into the csl.
* If a match gets deleted, they might not be physically
* adjacent any more. Once we get to the end of the list
* this will cease to matter - the list will be a bit
* shorter is all.
*/
orig = list;
list1 = list+1;
while (list->len) {
if (list1->len == 0)
found_end = 1;
/* If a single token is either inserted or deleted
* immediately after a matching token...
*/
if ((list->a+list->len == list1->a &&
list->b+list->len != list1->b &&
/* text at b inserted */
match(&b->list[list->b+list->len-1],
&b->list[list1->b-1])
)
||
(list->b+list->len == list1->b &&
list->a+list->len != list1->a &&
/* text at a deleted */
match(&a->list[list->a+list->len-1],
&a->list[list1->a-1])
)
) {
/* If the last common token is a simple end-of-line
* record where it is. For a word-wise diff, this is
* any EOL. For a line-wise diff this is a blank line.
* If we are looking at a deletion it must be deleting
* the eol, so record that deleted eol.
*/
if (ends_line(a->list[list->a+list->len-1])
&& a->list[list->a+list->len-1].len == 1
&& lasteol == -1
) {
lasteol = list1->a-1;
}
/* Expand the second match, shrink the first */
list1->a--;
list1->b--;
list1->len++;
list->len--;
/* If the first match has become empty, make it
* disappear.. (and forget about the eol).
*/
if (list->len == 0) {
lasteol = -1;
if (found_end) {
/* Deleting just before the last
* entry */
*list = *list1;
list1->a += list1->len;
list1->b += list1->len;
list1->len = 0;
} else if (list > orig)
/* Deleting in the middle */
list--;
else {
/* deleting the first entry */
*list = *list1++;
}
}
} else {
/* Nothing interesting here, though if we
* shuffled back past an eol, shuffle
* forward to line up with that eol.
* This causes an eol to bind more strongly
* with the preceding line than the following.
*/
if (lasteol >= 0) {
while (list1->a <= lasteol
&& (list1->len > 1 ||
(found_end && list1->len > 0))) {
list1->a++;
list1->b++;
list1->len--;
list->len++;
}
lasteol = -1;
}
*++list = *list1;
if (found_end) {
list1->a += list1->len;
list1->b += list1->len;
list1->len = 0;
} else
list1++;
}
if (list->len && list1 == list)
abort();
}
}
static int elcmp(const void *v1, const void *v2)
{
const struct elmnt *e1 = v1;
const struct elmnt *e2 = v2;
if (e1->hash != e2->hash) {
if (e1->hash < e2->hash)
return -1;
return 1;
}
if (e1->start[0] == 0 && e2->start[0] == 0)
return 0;
if (e1->len != e2->len)
return e1->len - e2->len;
return strncmp(e1->start, e2->start, e1->len);
}
#define BPL (sizeof(unsigned long) * 8)
static struct file filter_unique(struct file f, struct file ref)
{
/* Use a bloom-filter to record all hashes in 'ref' and
* then if there are consequtive entries in 'f' that are
* not in 'ref', reduce each such run to 1 entry
*/
struct file n;
int fi, cnt;
struct file sorted;
sorted.list = wiggle_xmalloc(sizeof(sorted.list[0]) * ref.elcnt);
sorted.elcnt = ref.elcnt;
memcpy(sorted.list, ref.list, sizeof(sorted.list[0]) * sorted.elcnt);
qsort(sorted.list, sorted.elcnt, sizeof(sorted.list[0]),
elcmp);
n.list = wiggle_xmalloc(sizeof(n.list[0]) * f.elcnt);
n.elcnt = 0;
cnt = 0;
for (fi = 0; fi < f.elcnt; fi++) {
int lo = 0, hi = sorted.elcnt;
while (lo + 1 < hi) {
int mid = (lo + hi) / 2;
if (elcmp(&f.list[fi], &sorted.list[mid]) < 0)
hi = mid;
else
lo = mid;
}
if (match(&f.list[fi], &sorted.list[lo]))
cnt = 0;
else
cnt += 1;
if (cnt <= 1)
n.list[n.elcnt++] = f.list[fi];
}
free(sorted.list);
return n;
}
static void remap(struct csl *csl, int which, struct file from, struct file to)
{
/* The a,b pointer in csl points to 'from' we need to remap to 'to'.
* 'to' has everything that 'from' has, plus more.
* Each list[].start is unique
*/
int ti = 0;
while (csl->len) {
int fi = which ? csl->b : csl->a;
while (to.list[ti].start != from.list[fi].start) {
ti += 1;
if (ti > to.elcnt)
abort();
}
if (which)
csl->b = ti;
else
csl->a = ti;
csl += 1;
}
if (which)
csl->b = to.elcnt;
else
csl->a = to.elcnt;
}
/* Main entry point - find the common-sub-list of files 'a' and 'b'.
* The final element in the list will have 'len' == 0 and will point
* beyond the end of the files.
*/
struct csl *wiggle_diff(struct file a, struct file b, int shortest)
{
struct v *v;
struct cslb cslb = {};
struct file af, bf;
/* Remove runs of 2 or more elements in one file that don't
* exist in the other file. This often makes the number of
* elements more manageable.
*/
af = filter_unique(a, b);
bf = filter_unique(b, a);
v = wiggle_xmalloc(sizeof(struct v)*(af.elcnt+bf.elcnt+2));
v += bf.elcnt+1;
lcsl(&af, 0, af.elcnt,
&bf, 0, bf.elcnt,
&cslb, v, !shortest);
csl_add(&cslb, af.elcnt, bf.elcnt, 0);
free(v-(bf.elcnt+1));
remap(cslb.csl, 0, af, a);
remap(cslb.csl, 1, bf, b);
free(af.list);
free(bf.list);
fixup(&a, &b, cslb.csl);
return cslb.csl;
}
/* Alternate entry point - find the common-sub-list in two
* subranges of files.
*/
struct csl *wiggle_diff_partial(struct file a, struct file b,
int alo, int ahi, int blo, int bhi)
{
struct v *v;
struct cslb cslb = {};
v = wiggle_xmalloc(sizeof(struct v)*(ahi-alo+bhi-blo+2));
v += bhi-alo+1;
lcsl(&a, alo, ahi,
&b, blo, bhi,
&cslb, v, 0);
csl_add(&cslb, ahi, bhi, 0);
free(v-(bhi-alo+1));
fixup(&a, &b, cslb.csl);
return cslb.csl;
}
struct csl *wiggle_csl_join(struct csl *c1, struct csl *c2)
{
int cnt1, cnt2;
int offset = 0;
if (c1 == NULL)
return c2;
if (c2 == NULL)
return c1;
for (cnt1 = 0; c1[cnt1].len; cnt1++)
;
for (cnt2 = 0; c2[cnt2].len; cnt2++)
;
if (cnt1 && cnt2 &&
c1[cnt1-1].a + c1[cnt1-1].len == c2[0].a &&
c1[cnt1-1].b + c1[cnt1-1].len == c2[0].b) {
/* Merge these two */
c1[cnt1-1].len += c2[0].len;
offset = 1;
cnt2--;
}
c1 = realloc(c1, (cnt1+cnt2+1)*sizeof(*c1));
while (cnt2 >= 0) {
c1[cnt1+cnt2] = c2[cnt2 + offset];
cnt2--;
}
free(c2);
return c1;
}
/* When rediffing a patch, we *must* make sure the hunk headers
* line up. So don't do a full diff, but rather find the hunk
* headers and diff the bits between them.
*/
struct csl *wiggle_diff_patch(struct file a, struct file b, int shortest)
{
int ap, bp;
struct csl *csl = NULL;
if (a.elcnt == 0 || b.elcnt == 0 ||
a.list[0].start[0] != '\0' ||
b.list[0].start[0] != '\0')
/* this is not a patch */
return wiggle_diff(a, b, shortest);
ap = 0; bp = 0;
while (ap < a.elcnt && bp < b.elcnt) {
int alo = ap;
int blo = bp;
struct csl *cs;
do
ap++;
while (ap < a.elcnt &&
a.list[ap].start[0] != '\0');
do
bp++;
while (bp < b.elcnt &&
b.list[bp].start[0] != '\0');
cs = wiggle_diff_partial(a, b, alo, ap, blo, bp);
csl = wiggle_csl_join(csl, cs);
}
return csl;
}
#ifdef MAIN
main(int argc, char *argv[])
{
struct file a, b;
struct csl *csl;
struct elmnt *lst = wiggle_xmalloc(argc*sizeof(*lst));
int arg;
struct v *v;
int ln;
arg = 1;
a.elcnt = 0;
a.list = lst;
while (argv[arg] && strcmp(argv[arg], "--")) {
lst->hash = 0;
lst->start = argv[arg];
lst->len = strlen(argv[arg]);
a.elcnt++;
lst++;
arg++;
}
if (!argv[arg]) {
printf("AARGH\n");
exit(1);
}
arg++;
b.elcnt = 0;
b.list = lst;
while (argv[arg] && strcmp(argv[arg], "--")) {
lst->hash = 0;
lst->start = argv[arg];
lst->len = strlen(argv[arg]);
b.elcnt++;
lst++;
arg++;
}
csl = wiggle_diff(a, b, 1);
fixup(&a, &b, csl);
while (csl && csl->len) {
int i;
printf("%d,%d for %d:\n", csl->a, csl->b, csl->len);
for (i = 0; i < csl->len; i++) {
printf(" %.*s (%.*s)\n",
a.list[csl->a+i].len, a.list[csl->a+i].start,
b.list[csl->b+i].len, b.list[csl->b+i].start);
}
csl++;
}
exit(0);
}
#endif