🌐 AI搜索 & 代理 主页
blob: 3797ce6041981d1facda44d3a95d4448dbb8a091 [file] [log] [blame]
Nicolas Pitrea310d432005-05-19 14:27:141/*
2 * diff-delta.c: generate a delta between two buffers
3 *
Nicolas Pitre366b53c2007-05-26 02:16:274 * This code was greatly inspired by parts of LibXDiff from Davide Libenzi
5 * http://www.xmailserver.org/xdiff-lib.html
Nicolas Pitrea310d432005-05-19 14:27:146 *
Nicolas Pitre03aa8ff2009-09-14 06:41:167 * Rewritten for GIT by Nicolas Pitre <nico@fluxnic.net>, (C) 2005-2007
Nicolas Pitrea310d432005-05-19 14:27:148 *
Nicolas Pitre366b53c2007-05-26 02:16:279 * This code is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
Nicolas Pitrea310d432005-05-19 14:27:1412 */
13
Florian Forster63f17562006-06-18 15:18:0414#include "git-compat-util.h"
Junio C Hamano85023572006-12-19 22:34:1215#include "delta.h"
Nicolas Pitrea310d432005-05-19 14:27:1416
Nicolas Pitre08abe662006-04-25 03:07:4717/* maximum hash entry list for the same hash bucket */
18#define HASH_LIMIT 64
Nicolas Pitrea310d432005-05-19 14:27:1419
Nicolas Pitre3dc5a9e2006-04-29 04:58:0520#define RABIN_SHIFT 23
21#define RABIN_WINDOW 16
22
23static const unsigned int T[256] = {
24 0x00000000, 0xab59b4d1, 0x56b369a2, 0xfdeadd73, 0x063f6795, 0xad66d344,
25 0x508c0e37, 0xfbd5bae6, 0x0c7ecf2a, 0xa7277bfb, 0x5acda688, 0xf1941259,
26 0x0a41a8bf, 0xa1181c6e, 0x5cf2c11d, 0xf7ab75cc, 0x18fd9e54, 0xb3a42a85,
27 0x4e4ef7f6, 0xe5174327, 0x1ec2f9c1, 0xb59b4d10, 0x48719063, 0xe32824b2,
28 0x1483517e, 0xbfdae5af, 0x423038dc, 0xe9698c0d, 0x12bc36eb, 0xb9e5823a,
29 0x440f5f49, 0xef56eb98, 0x31fb3ca8, 0x9aa28879, 0x6748550a, 0xcc11e1db,
30 0x37c45b3d, 0x9c9defec, 0x6177329f, 0xca2e864e, 0x3d85f382, 0x96dc4753,
31 0x6b369a20, 0xc06f2ef1, 0x3bba9417, 0x90e320c6, 0x6d09fdb5, 0xc6504964,
32 0x2906a2fc, 0x825f162d, 0x7fb5cb5e, 0xd4ec7f8f, 0x2f39c569, 0x846071b8,
33 0x798aaccb, 0xd2d3181a, 0x25786dd6, 0x8e21d907, 0x73cb0474, 0xd892b0a5,
34 0x23470a43, 0x881ebe92, 0x75f463e1, 0xdeadd730, 0x63f67950, 0xc8afcd81,
35 0x354510f2, 0x9e1ca423, 0x65c91ec5, 0xce90aa14, 0x337a7767, 0x9823c3b6,
36 0x6f88b67a, 0xc4d102ab, 0x393bdfd8, 0x92626b09, 0x69b7d1ef, 0xc2ee653e,
37 0x3f04b84d, 0x945d0c9c, 0x7b0be704, 0xd05253d5, 0x2db88ea6, 0x86e13a77,
38 0x7d348091, 0xd66d3440, 0x2b87e933, 0x80de5de2, 0x7775282e, 0xdc2c9cff,
39 0x21c6418c, 0x8a9ff55d, 0x714a4fbb, 0xda13fb6a, 0x27f92619, 0x8ca092c8,
40 0x520d45f8, 0xf954f129, 0x04be2c5a, 0xafe7988b, 0x5432226d, 0xff6b96bc,
41 0x02814bcf, 0xa9d8ff1e, 0x5e738ad2, 0xf52a3e03, 0x08c0e370, 0xa39957a1,
42 0x584ced47, 0xf3155996, 0x0eff84e5, 0xa5a63034, 0x4af0dbac, 0xe1a96f7d,
43 0x1c43b20e, 0xb71a06df, 0x4ccfbc39, 0xe79608e8, 0x1a7cd59b, 0xb125614a,
44 0x468e1486, 0xedd7a057, 0x103d7d24, 0xbb64c9f5, 0x40b17313, 0xebe8c7c2,
45 0x16021ab1, 0xbd5bae60, 0x6cb54671, 0xc7ecf2a0, 0x3a062fd3, 0x915f9b02,
46 0x6a8a21e4, 0xc1d39535, 0x3c394846, 0x9760fc97, 0x60cb895b, 0xcb923d8a,
47 0x3678e0f9, 0x9d215428, 0x66f4eece, 0xcdad5a1f, 0x3047876c, 0x9b1e33bd,
48 0x7448d825, 0xdf116cf4, 0x22fbb187, 0x89a20556, 0x7277bfb0, 0xd92e0b61,
49 0x24c4d612, 0x8f9d62c3, 0x7836170f, 0xd36fa3de, 0x2e857ead, 0x85dcca7c,
50 0x7e09709a, 0xd550c44b, 0x28ba1938, 0x83e3ade9, 0x5d4e7ad9, 0xf617ce08,
51 0x0bfd137b, 0xa0a4a7aa, 0x5b711d4c, 0xf028a99d, 0x0dc274ee, 0xa69bc03f,
52 0x5130b5f3, 0xfa690122, 0x0783dc51, 0xacda6880, 0x570fd266, 0xfc5666b7,
53 0x01bcbbc4, 0xaae50f15, 0x45b3e48d, 0xeeea505c, 0x13008d2f, 0xb85939fe,
54 0x438c8318, 0xe8d537c9, 0x153feaba, 0xbe665e6b, 0x49cd2ba7, 0xe2949f76,
55 0x1f7e4205, 0xb427f6d4, 0x4ff24c32, 0xe4abf8e3, 0x19412590, 0xb2189141,
56 0x0f433f21, 0xa41a8bf0, 0x59f05683, 0xf2a9e252, 0x097c58b4, 0xa225ec65,
57 0x5fcf3116, 0xf49685c7, 0x033df00b, 0xa86444da, 0x558e99a9, 0xfed72d78,
58 0x0502979e, 0xae5b234f, 0x53b1fe3c, 0xf8e84aed, 0x17bea175, 0xbce715a4,
59 0x410dc8d7, 0xea547c06, 0x1181c6e0, 0xbad87231, 0x4732af42, 0xec6b1b93,
60 0x1bc06e5f, 0xb099da8e, 0x4d7307fd, 0xe62ab32c, 0x1dff09ca, 0xb6a6bd1b,
61 0x4b4c6068, 0xe015d4b9, 0x3eb80389, 0x95e1b758, 0x680b6a2b, 0xc352defa,
62 0x3887641c, 0x93ded0cd, 0x6e340dbe, 0xc56db96f, 0x32c6cca3, 0x999f7872,
63 0x6475a501, 0xcf2c11d0, 0x34f9ab36, 0x9fa01fe7, 0x624ac294, 0xc9137645,
64 0x26459ddd, 0x8d1c290c, 0x70f6f47f, 0xdbaf40ae, 0x207afa48, 0x8b234e99,
65 0x76c993ea, 0xdd90273b, 0x2a3b52f7, 0x8162e626, 0x7c883b55, 0xd7d18f84,
66 0x2c043562, 0x875d81b3, 0x7ab75cc0, 0xd1eee811
67};
68
69static const unsigned int U[256] = {
70 0x00000000, 0x7eb5200d, 0x5633f4cb, 0x2886d4c6, 0x073e5d47, 0x798b7d4a,
71 0x510da98c, 0x2fb88981, 0x0e7cba8e, 0x70c99a83, 0x584f4e45, 0x26fa6e48,
72 0x0942e7c9, 0x77f7c7c4, 0x5f711302, 0x21c4330f, 0x1cf9751c, 0x624c5511,
73 0x4aca81d7, 0x347fa1da, 0x1bc7285b, 0x65720856, 0x4df4dc90, 0x3341fc9d,
74 0x1285cf92, 0x6c30ef9f, 0x44b63b59, 0x3a031b54, 0x15bb92d5, 0x6b0eb2d8,
75 0x4388661e, 0x3d3d4613, 0x39f2ea38, 0x4747ca35, 0x6fc11ef3, 0x11743efe,
76 0x3eccb77f, 0x40799772, 0x68ff43b4, 0x164a63b9, 0x378e50b6, 0x493b70bb,
77 0x61bda47d, 0x1f088470, 0x30b00df1, 0x4e052dfc, 0x6683f93a, 0x1836d937,
78 0x250b9f24, 0x5bbebf29, 0x73386bef, 0x0d8d4be2, 0x2235c263, 0x5c80e26e,
79 0x740636a8, 0x0ab316a5, 0x2b7725aa, 0x55c205a7, 0x7d44d161, 0x03f1f16c,
80 0x2c4978ed, 0x52fc58e0, 0x7a7a8c26, 0x04cfac2b, 0x73e5d470, 0x0d50f47d,
81 0x25d620bb, 0x5b6300b6, 0x74db8937, 0x0a6ea93a, 0x22e87dfc, 0x5c5d5df1,
82 0x7d996efe, 0x032c4ef3, 0x2baa9a35, 0x551fba38, 0x7aa733b9, 0x041213b4,
83 0x2c94c772, 0x5221e77f, 0x6f1ca16c, 0x11a98161, 0x392f55a7, 0x479a75aa,
84 0x6822fc2b, 0x1697dc26, 0x3e1108e0, 0x40a428ed, 0x61601be2, 0x1fd53bef,
85 0x3753ef29, 0x49e6cf24, 0x665e46a5, 0x18eb66a8, 0x306db26e, 0x4ed89263,
86 0x4a173e48, 0x34a21e45, 0x1c24ca83, 0x6291ea8e, 0x4d29630f, 0x339c4302,
87 0x1b1a97c4, 0x65afb7c9, 0x446b84c6, 0x3adea4cb, 0x1258700d, 0x6ced5000,
88 0x4355d981, 0x3de0f98c, 0x15662d4a, 0x6bd30d47, 0x56ee4b54, 0x285b6b59,
89 0x00ddbf9f, 0x7e689f92, 0x51d01613, 0x2f65361e, 0x07e3e2d8, 0x7956c2d5,
90 0x5892f1da, 0x2627d1d7, 0x0ea10511, 0x7014251c, 0x5facac9d, 0x21198c90,
91 0x099f5856, 0x772a785b, 0x4c921c31, 0x32273c3c, 0x1aa1e8fa, 0x6414c8f7,
92 0x4bac4176, 0x3519617b, 0x1d9fb5bd, 0x632a95b0, 0x42eea6bf, 0x3c5b86b2,
93 0x14dd5274, 0x6a687279, 0x45d0fbf8, 0x3b65dbf5, 0x13e30f33, 0x6d562f3e,
94 0x506b692d, 0x2ede4920, 0x06589de6, 0x78edbdeb, 0x5755346a, 0x29e01467,
95 0x0166c0a1, 0x7fd3e0ac, 0x5e17d3a3, 0x20a2f3ae, 0x08242768, 0x76910765,
96 0x59298ee4, 0x279caee9, 0x0f1a7a2f, 0x71af5a22, 0x7560f609, 0x0bd5d604,
97 0x235302c2, 0x5de622cf, 0x725eab4e, 0x0ceb8b43, 0x246d5f85, 0x5ad87f88,
98 0x7b1c4c87, 0x05a96c8a, 0x2d2fb84c, 0x539a9841, 0x7c2211c0, 0x029731cd,
99 0x2a11e50b, 0x54a4c506, 0x69998315, 0x172ca318, 0x3faa77de, 0x411f57d3,
100 0x6ea7de52, 0x1012fe5f, 0x38942a99, 0x46210a94, 0x67e5399b, 0x19501996,
101 0x31d6cd50, 0x4f63ed5d, 0x60db64dc, 0x1e6e44d1, 0x36e89017, 0x485db01a,
102 0x3f77c841, 0x41c2e84c, 0x69443c8a, 0x17f11c87, 0x38499506, 0x46fcb50b,
103 0x6e7a61cd, 0x10cf41c0, 0x310b72cf, 0x4fbe52c2, 0x67388604, 0x198da609,
104 0x36352f88, 0x48800f85, 0x6006db43, 0x1eb3fb4e, 0x238ebd5d, 0x5d3b9d50,
105 0x75bd4996, 0x0b08699b, 0x24b0e01a, 0x5a05c017, 0x728314d1, 0x0c3634dc,
106 0x2df207d3, 0x534727de, 0x7bc1f318, 0x0574d315, 0x2acc5a94, 0x54797a99,
107 0x7cffae5f, 0x024a8e52, 0x06852279, 0x78300274, 0x50b6d6b2, 0x2e03f6bf,
108 0x01bb7f3e, 0x7f0e5f33, 0x57888bf5, 0x293dabf8, 0x08f998f7, 0x764cb8fa,
109 0x5eca6c3c, 0x207f4c31, 0x0fc7c5b0, 0x7172e5bd, 0x59f4317b, 0x27411176,
110 0x1a7c5765, 0x64c97768, 0x4c4fa3ae, 0x32fa83a3, 0x1d420a22, 0x63f72a2f,
111 0x4b71fee9, 0x35c4dee4, 0x1400edeb, 0x6ab5cde6, 0x42331920, 0x3c86392d,
112 0x133eb0ac, 0x6d8b90a1, 0x450d4467, 0x3bb8646a
113};
Nicolas Pitrea310d432005-05-19 14:27:14114
Nicolas Pitre08abe662006-04-25 03:07:47115struct index_entry {
Nicolas Pitrea310d432005-05-19 14:27:14116 const unsigned char *ptr;
Nicolas Pitre8e1454b2006-02-22 01:43:17117 unsigned int val;
David Kastrupd2100862007-09-08 21:17:44118};
119
120struct unpacked_index_entry {
121 struct index_entry entry;
122 struct unpacked_index_entry *next;
Nicolas Pitre8e1454b2006-02-22 01:43:17123};
Nicolas Pitrea310d432005-05-19 14:27:14124
Nicolas Pitre08abe662006-04-25 03:07:47125struct delta_index {
Brian Downing11779e72007-07-12 12:55:48126 unsigned long memsize;
Nicolas Pitre08abe662006-04-25 03:07:47127 const void *src_buf;
128 unsigned long src_size;
Nicolas Pitre3dc5a9e2006-04-29 04:58:05129 unsigned int hash_mask;
Florian Forster63f17562006-06-18 15:18:04130 struct index_entry *hash[FLEX_ARRAY];
Nicolas Pitre08abe662006-04-25 03:07:47131};
132
133struct delta_index * create_delta_index(const void *buf, unsigned long bufsize)
Nicolas Pitrea310d432005-05-19 14:27:14134{
Nicolas Pitre06a9f922006-05-03 03:31:00135 unsigned int i, hsize, hmask, entries, prev_val, *hash_count;
Nicolas Pitre08abe662006-04-25 03:07:47136 const unsigned char *data, *buffer = buf;
137 struct delta_index *index;
David Kastrupd2100862007-09-08 21:17:44138 struct unpacked_index_entry *entry, **hash;
139 struct index_entry *packed_entry, **packed_hash;
Nicolas Pitre8e1454b2006-02-22 01:43:17140 void *mem;
Nicolas Pitre06a9f922006-05-03 03:31:00141 unsigned long memsize;
Nicolas Pitrea310d432005-05-19 14:27:14142
Nicolas Pitre08abe662006-04-25 03:07:47143 if (!buf || !bufsize)
144 return NULL;
145
Nicolas Pitre3dc5a9e2006-04-29 04:58:05146 /* Determine index hash size. Note that indexing skips the
Pavel Roskin82e5a822006-07-10 05:50:18147 first byte to allow for optimizing the Rabin's polynomial
Nicolas Pitre3dc5a9e2006-04-29 04:58:05148 initialization in create_delta(). */
Nicolas Pitre506049c2010-08-21 05:00:13149 entries = (bufsize - 1) / RABIN_WINDOW;
150 if (bufsize >= 0xffffffffUL) {
151 /*
152 * Current delta format can't encode offsets into
153 * reference buffer with more than 32 bits.
154 */
155 entries = 0xfffffffeU / RABIN_WINDOW;
156 }
Nicolas Pitre8e1454b2006-02-22 01:43:17157 hsize = entries / 4;
Stefan Bellerf7466e92013-08-16 21:22:37158 for (i = 4; (1u << i) < hsize; i++);
Nicolas Pitre8e1454b2006-02-22 01:43:17159 hsize = 1 << i;
Nicolas Pitre3dc5a9e2006-04-29 04:58:05160 hmask = hsize - 1;
Nicolas Pitrea310d432005-05-19 14:27:14161
Nicolas Pitre8e1454b2006-02-22 01:43:17162 /* allocate lookup index */
David Kastrupd2100862007-09-08 21:17:44163 memsize = sizeof(*hash) * hsize +
Nicolas Pitre06a9f922006-05-03 03:31:00164 sizeof(*entry) * entries;
165 mem = malloc(memsize);
Nicolas Pitre8e1454b2006-02-22 01:43:17166 if (!mem)
167 return NULL;
168 hash = mem;
Nicolas Pitre08abe662006-04-25 03:07:47169 mem = hash + hsize;
170 entry = mem;
171
Nicolas Pitre8e1454b2006-02-22 01:43:17172 memset(hash, 0, hsize * sizeof(*hash));
173
Nicolas Pitrec13c6bf2006-03-08 19:32:50174 /* allocate an array to count hash entries */
175 hash_count = calloc(hsize, sizeof(*hash_count));
176 if (!hash_count) {
David Kastrupd2100862007-09-08 21:17:44177 free(hash);
Nicolas Pitrec13c6bf2006-03-08 19:32:50178 return NULL;
179 }
180
181 /* then populate the index */
Nicolas Pitre06a9f922006-05-03 03:31:00182 prev_val = ~0;
183 for (data = buffer + entries * RABIN_WINDOW - RABIN_WINDOW;
184 data >= buffer;
185 data -= RABIN_WINDOW) {
Nicolas Pitre3dc5a9e2006-04-29 04:58:05186 unsigned int val = 0;
187 for (i = 1; i <= RABIN_WINDOW; i++)
188 val = ((val << 8) | data[i]) ^ T[val >> RABIN_SHIFT];
Nicolas Pitre06a9f922006-05-03 03:31:00189 if (val == prev_val) {
190 /* keep the lowest of consecutive identical blocks */
David Kastrupd2100862007-09-08 21:17:44191 entry[-1].entry.ptr = data + RABIN_WINDOW;
192 --entries;
Nicolas Pitre06a9f922006-05-03 03:31:00193 } else {
194 prev_val = val;
195 i = val & hmask;
David Kastrupd2100862007-09-08 21:17:44196 entry->entry.ptr = data + RABIN_WINDOW;
197 entry->entry.val = val;
Nicolas Pitre06a9f922006-05-03 03:31:00198 entry->next = hash[i];
199 hash[i] = entry++;
200 hash_count[i]++;
Nicolas Pitre06a9f922006-05-03 03:31:00201 }
Nicolas Pitre3dc5a9e2006-04-29 04:58:05202 }
Nicolas Pitrea310d432005-05-19 14:27:14203
Nicolas Pitrec13c6bf2006-03-08 19:32:50204 /*
205 * Determine a limit on the number of entries in the same hash
Pavel Roskin82e5a822006-07-10 05:50:18206 * bucket. This guards us against pathological data sets causing
Nicolas Pitrec13c6bf2006-03-08 19:32:50207 * really bad hash distribution with most entries in the same hash
208 * bucket that would bring us to O(m*n) computing costs (m and n
209 * corresponding to reference and target buffer sizes).
210 *
Nicolas Pitre08abe662006-04-25 03:07:47211 * Make sure none of the hash buckets has more entries than
Nicolas Pitrec13c6bf2006-03-08 19:32:50212 * we're willing to test. Otherwise we cull the entry list
213 * uniformly to still preserve a good repartition across
214 * the reference buffer.
215 */
216 for (i = 0; i < hsize; i++) {
David Kastrup02e665c2007-09-08 21:25:55217 int acc;
218
219 if (hash_count[i] <= HASH_LIMIT)
Nicolas Pitrec13c6bf2006-03-08 19:32:50220 continue;
David Kastrup02e665c2007-09-08 21:25:55221
David Kastrup02e665c2007-09-08 21:25:55222 /* We leave exactly HASH_LIMIT entries in the bucket */
Nicolas Pitrece85b052007-12-18 15:15:39223 entries -= hash_count[i] - HASH_LIMIT;
David Kastrup02e665c2007-09-08 21:25:55224
Nicolas Pitrec13c6bf2006-03-08 19:32:50225 entry = hash[i];
David Kastrup02e665c2007-09-08 21:25:55226 acc = 0;
Nicolas Pitrece85b052007-12-18 15:15:39227
228 /*
229 * Assume that this loop is gone through exactly
230 * HASH_LIMIT times and is entered and left with
231 * acc==0. So the first statement in the loop
232 * contributes (hash_count[i]-HASH_LIMIT)*HASH_LIMIT
233 * to the accumulator, and the inner loop consequently
234 * is run (hash_count[i]-HASH_LIMIT) times, removing
235 * one element from the list each time. Since acc
236 * balances out to 0 at the final run, the inner loop
237 * body can't be left with entry==NULL. So we indeed
238 * encounter entry==NULL in the outer loop only.
239 */
Nicolas Pitrec13c6bf2006-03-08 19:32:50240 do {
David Kastrup02e665c2007-09-08 21:25:55241 acc += hash_count[i] - HASH_LIMIT;
242 if (acc > 0) {
243 struct unpacked_index_entry *keep = entry;
244 do {
245 entry = entry->next;
246 acc -= HASH_LIMIT;
247 } while (acc > 0);
248 keep->next = entry->next;
249 }
250 entry = entry->next;
251 } while (entry);
Nicolas Pitrec13c6bf2006-03-08 19:32:50252 }
253 free(hash_count);
254
Nicolas Pitrece85b052007-12-18 15:15:39255 /*
256 * Now create the packed index in array form
257 * rather than linked lists.
258 */
David Kastrupd2100862007-09-08 21:17:44259 memsize = sizeof(*index)
260 + sizeof(*packed_hash) * (hsize+1)
261 + sizeof(*packed_entry) * entries;
David Kastrupd2100862007-09-08 21:17:44262 mem = malloc(memsize);
David Kastrupd2100862007-09-08 21:17:44263 if (!mem) {
264 free(hash);
265 return NULL;
266 }
267
268 index = mem;
269 index->memsize = memsize;
270 index->src_buf = buf;
271 index->src_size = bufsize;
272 index->hash_mask = hmask;
273
Pierre Habouzitf9c5a802007-12-18 01:39:57274 mem = index->hash;
David Kastrupd2100862007-09-08 21:17:44275 packed_hash = mem;
276 mem = packed_hash + (hsize+1);
277 packed_entry = mem;
278
David Kastrupd2100862007-09-08 21:17:44279 for (i = 0; i < hsize; i++) {
Nicolas Pitrece85b052007-12-18 15:15:39280 /*
281 * Coalesce all entries belonging to one linked list
282 * into consecutive array entries.
283 */
David Kastrupd2100862007-09-08 21:17:44284 packed_hash[i] = packed_entry;
285 for (entry = hash[i]; entry; entry = entry->next)
286 *packed_entry++ = entry->entry;
287 }
288
Nicolas Pitrece85b052007-12-18 15:15:39289 /* Sentinel value to indicate the length of the last hash bucket */
David Kastrupd2100862007-09-08 21:17:44290 packed_hash[hsize] = packed_entry;
Nicolas Pitrece85b052007-12-18 15:15:39291
David Kastrupd2100862007-09-08 21:17:44292 assert(packed_entry - (struct index_entry *)mem == entries);
293 free(hash);
294
Nicolas Pitre08abe662006-04-25 03:07:47295 return index;
296}
297
298void free_delta_index(struct delta_index *index)
299{
300 free(index);
Nicolas Pitrea310d432005-05-19 14:27:14301}
302
Brian Downing11779e72007-07-12 12:55:48303unsigned long sizeof_delta_index(struct delta_index *index)
304{
305 if (index)
306 return index->memsize;
307 else
308 return 0;
309}
310
Nicolas Pitre3dc5a9e2006-04-29 04:58:05311/*
312 * The maximum size for any opcode sequence, including the initial header
Pavel Roskin82e5a822006-07-10 05:50:18313 * plus Rabin window plus biggest copy.
Nicolas Pitre3dc5a9e2006-04-29 04:58:05314 */
315#define MAX_OP_SIZE (5 + 5 + 1 + RABIN_WINDOW + 7)
Nicolas Pitrefe474b52006-02-22 01:41:41316
Nicolas Pitre08abe662006-04-25 03:07:47317void *
318create_delta(const struct delta_index *index,
319 const void *trg_buf, unsigned long trg_size,
320 unsigned long *delta_size, unsigned long max_size)
Nicolas Pitrea310d432005-05-19 14:27:14321{
Nicolas Pitre84336692007-05-26 01:38:58322 unsigned int i, outpos, outsize, moff, msize, val;
Nicolas Pitre5a1fb2c2006-03-18 03:45:07323 int inscnt;
Nicolas Pitre8e1454b2006-02-22 01:43:17324 const unsigned char *ref_data, *ref_top, *data, *top;
325 unsigned char *out;
Nicolas Pitrea310d432005-05-19 14:27:14326
Nicolas Pitre08abe662006-04-25 03:07:47327 if (!trg_buf || !trg_size)
Nicolas Pitre8e1454b2006-02-22 01:43:17328 return NULL;
329
Nicolas Pitrea310d432005-05-19 14:27:14330 outpos = 0;
331 outsize = 8192;
Nicolas Pitrefe474b52006-02-22 01:41:41332 if (max_size && outsize >= max_size)
333 outsize = max_size + MAX_OP_SIZE + 1;
Nicolas Pitrea310d432005-05-19 14:27:14334 out = malloc(outsize);
Nicolas Pitre08abe662006-04-25 03:07:47335 if (!out)
Nicolas Pitrea310d432005-05-19 14:27:14336 return NULL;
Nicolas Pitrea310d432005-05-19 14:27:14337
338 /* store reference buffer size */
Nicolas Pitre08abe662006-04-25 03:07:47339 i = index->src_size;
340 while (i >= 0x80) {
341 out[outpos++] = i | 0x80;
342 i >>= 7;
Nicolas Pitre69a2d422005-06-29 04:27:45343 }
Nicolas Pitre08abe662006-04-25 03:07:47344 out[outpos++] = i;
Nicolas Pitrea310d432005-05-19 14:27:14345
346 /* store target buffer size */
Nicolas Pitre08abe662006-04-25 03:07:47347 i = trg_size;
348 while (i >= 0x80) {
349 out[outpos++] = i | 0x80;
350 i >>= 7;
Nicolas Pitre69a2d422005-06-29 04:27:45351 }
Nicolas Pitre08abe662006-04-25 03:07:47352 out[outpos++] = i;
Nicolas Pitrea310d432005-05-19 14:27:14353
Nicolas Pitre08abe662006-04-25 03:07:47354 ref_data = index->src_buf;
355 ref_top = ref_data + index->src_size;
356 data = trg_buf;
Florian Forster1d7f1712006-06-18 15:18:09357 top = (const unsigned char *) trg_buf + trg_size;
Nicolas Pitre3dc5a9e2006-04-29 04:58:05358
359 outpos++;
360 val = 0;
361 for (i = 0; i < RABIN_WINDOW && data < top; i++, data++) {
362 out[outpos++] = *data;
363 val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT];
364 }
365 inscnt = i;
Nicolas Pitrea310d432005-05-19 14:27:14366
Nicolas Pitre84336692007-05-26 01:38:58367 moff = 0;
368 msize = 0;
Nicolas Pitre8e1454b2006-02-22 01:43:17369 while (data < top) {
Nicolas Pitre84336692007-05-26 01:38:58370 if (msize < 4096) {
371 struct index_entry *entry;
372 val ^= U[data[-RABIN_WINDOW]];
373 val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT];
374 i = val & index->hash_mask;
David Kastrupd2100862007-09-08 21:17:44375 for (entry = index->hash[i]; entry < index->hash[i+1]; entry++) {
Nicolas Pitre84336692007-05-26 01:38:58376 const unsigned char *ref = entry->ptr;
377 const unsigned char *src = data;
378 unsigned int ref_size = ref_top - ref;
379 if (entry->val != val)
380 continue;
381 if (ref_size > top - src)
382 ref_size = top - src;
383 if (ref_size <= msize)
384 break;
385 while (ref_size-- && *src++ == *ref)
386 ref++;
387 if (msize < ref - entry->ptr) {
388 /* this is our best match so far */
389 msize = ref - entry->ptr;
390 moff = entry->ptr - ref_data;
391 if (msize >= 4096) /* good enough */
392 break;
393 }
Nicolas Pitrea310d432005-05-19 14:27:14394 }
395 }
396
Nicolas Pitre3dc5a9e2006-04-29 04:58:05397 if (msize < 4) {
Nicolas Pitrea310d432005-05-19 14:27:14398 if (!inscnt)
399 outpos++;
400 out[outpos++] = *data++;
401 inscnt++;
402 if (inscnt == 0x7f) {
403 out[outpos - inscnt - 1] = inscnt;
404 inscnt = 0;
405 }
Nicolas Pitre84336692007-05-26 01:38:58406 msize = 0;
Nicolas Pitrea310d432005-05-19 14:27:14407 } else {
Nicolas Pitre84336692007-05-26 01:38:58408 unsigned int left;
Nicolas Pitre8e1454b2006-02-22 01:43:17409 unsigned char *op;
410
Nicolas Pitrea310d432005-05-19 14:27:14411 if (inscnt) {
Nicolas Pitre5a1fb2c2006-03-18 03:45:07412 while (moff && ref_data[moff-1] == data[-1]) {
Nicolas Pitre5a1fb2c2006-03-18 03:45:07413 /* we can match one byte back */
414 msize++;
415 moff--;
416 data--;
417 outpos--;
418 if (--inscnt)
419 continue;
420 outpos--; /* remove count slot */
421 inscnt--; /* make it -1 */
422 break;
423 }
Nicolas Pitrea310d432005-05-19 14:27:14424 out[outpos - inscnt - 1] = inscnt;
425 inscnt = 0;
426 }
427
Nicolas Pitre84336692007-05-26 01:38:58428 /* A copy op is currently limited to 64KB (pack v2) */
429 left = (msize < 0x10000) ? 0 : (msize - 0x10000);
430 msize -= left;
431
Nicolas Pitre8e1454b2006-02-22 01:43:17432 op = out + outpos++;
Nicolas Pitrea310d432005-05-19 14:27:14433 i = 0x80;
434
Nicolas Pitre84336692007-05-26 01:38:58435 if (moff & 0x000000ff)
436 out[outpos++] = moff >> 0, i |= 0x01;
437 if (moff & 0x0000ff00)
438 out[outpos++] = moff >> 8, i |= 0x02;
439 if (moff & 0x00ff0000)
440 out[outpos++] = moff >> 16, i |= 0x04;
441 if (moff & 0xff000000)
442 out[outpos++] = moff >> 24, i |= 0x08;
Nicolas Pitrea310d432005-05-19 14:27:14443
Nicolas Pitre84336692007-05-26 01:38:58444 if (msize & 0x00ff)
445 out[outpos++] = msize >> 0, i |= 0x10;
446 if (msize & 0xff00)
447 out[outpos++] = msize >> 8, i |= 0x20;
Nicolas Pitrea310d432005-05-19 14:27:14448
Nicolas Pitre8e1454b2006-02-22 01:43:17449 *op = i;
Nicolas Pitre84336692007-05-26 01:38:58450
451 data += msize;
452 moff += msize;
453 msize = left;
454
455 if (msize < 4096) {
456 int j;
457 val = 0;
458 for (j = -RABIN_WINDOW; j < 0; j++)
459 val = ((val << 8) | data[j])
460 ^ T[val >> RABIN_SHIFT];
461 }
Nicolas Pitrea310d432005-05-19 14:27:14462 }
463
Nicolas Pitrefe474b52006-02-22 01:41:41464 if (outpos >= outsize - MAX_OP_SIZE) {
Nicolas Pitrea310d432005-05-19 14:27:14465 void *tmp = out;
466 outsize = outsize * 3 / 2;
Nicolas Pitrefe474b52006-02-22 01:41:41467 if (max_size && outsize >= max_size)
468 outsize = max_size + MAX_OP_SIZE + 1;
469 if (max_size && outpos > max_size)
Nicolas Pitre3dc5a9e2006-04-29 04:58:05470 break;
Martin Koeglerb75c6c62007-05-29 19:08:35471 out = realloc(out, outsize);
Nicolas Pitrea310d432005-05-19 14:27:14472 if (!out) {
473 free(tmp);
Nicolas Pitrea310d432005-05-19 14:27:14474 return NULL;
475 }
476 }
477 }
478
479 if (inscnt)
480 out[outpos - inscnt - 1] = inscnt;
481
Nicolas Pitre3dc5a9e2006-04-29 04:58:05482 if (max_size && outpos > max_size) {
483 free(out);
484 return NULL;
485 }
486
Nicolas Pitrea310d432005-05-19 14:27:14487 *delta_size = outpos;
488 return out;
489}