🌐 AI搜索 & 代理 主页
blob: a4e28df714b4834e5efe42fa3abb647711913d71 [file] [log] [blame]
Nicolas Pitrea310d432005-05-19 14:27:141/*
2 * diff-delta.c: generate a delta between two buffers
3 *
Nicolas Pitre366b53c2007-05-26 02:16:274 * This code was greatly inspired by parts of LibXDiff from Davide Libenzi
5 * http://www.xmailserver.org/xdiff-lib.html
Nicolas Pitrea310d432005-05-19 14:27:146 *
Nicolas Pitre366b53c2007-05-26 02:16:277 * Rewritten for GIT by Nicolas Pitre <nico@cam.org>, (C) 2005-2007
Nicolas Pitrea310d432005-05-19 14:27:148 *
Nicolas Pitre366b53c2007-05-26 02:16:279 * This code is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
Nicolas Pitrea310d432005-05-19 14:27:1412 */
13
Florian Forster63f17562006-06-18 15:18:0414#include "git-compat-util.h"
Junio C Hamano85023572006-12-19 22:34:1215#include "delta.h"
Nicolas Pitrea310d432005-05-19 14:27:1416
Nicolas Pitre08abe662006-04-25 03:07:4717/* maximum hash entry list for the same hash bucket */
18#define HASH_LIMIT 64
Nicolas Pitrea310d432005-05-19 14:27:1419
Nicolas Pitre3dc5a9e2006-04-29 04:58:0520#define RABIN_SHIFT 23
21#define RABIN_WINDOW 16
22
23static const unsigned int T[256] = {
24 0x00000000, 0xab59b4d1, 0x56b369a2, 0xfdeadd73, 0x063f6795, 0xad66d344,
25 0x508c0e37, 0xfbd5bae6, 0x0c7ecf2a, 0xa7277bfb, 0x5acda688, 0xf1941259,
26 0x0a41a8bf, 0xa1181c6e, 0x5cf2c11d, 0xf7ab75cc, 0x18fd9e54, 0xb3a42a85,
27 0x4e4ef7f6, 0xe5174327, 0x1ec2f9c1, 0xb59b4d10, 0x48719063, 0xe32824b2,
28 0x1483517e, 0xbfdae5af, 0x423038dc, 0xe9698c0d, 0x12bc36eb, 0xb9e5823a,
29 0x440f5f49, 0xef56eb98, 0x31fb3ca8, 0x9aa28879, 0x6748550a, 0xcc11e1db,
30 0x37c45b3d, 0x9c9defec, 0x6177329f, 0xca2e864e, 0x3d85f382, 0x96dc4753,
31 0x6b369a20, 0xc06f2ef1, 0x3bba9417, 0x90e320c6, 0x6d09fdb5, 0xc6504964,
32 0x2906a2fc, 0x825f162d, 0x7fb5cb5e, 0xd4ec7f8f, 0x2f39c569, 0x846071b8,
33 0x798aaccb, 0xd2d3181a, 0x25786dd6, 0x8e21d907, 0x73cb0474, 0xd892b0a5,
34 0x23470a43, 0x881ebe92, 0x75f463e1, 0xdeadd730, 0x63f67950, 0xc8afcd81,
35 0x354510f2, 0x9e1ca423, 0x65c91ec5, 0xce90aa14, 0x337a7767, 0x9823c3b6,
36 0x6f88b67a, 0xc4d102ab, 0x393bdfd8, 0x92626b09, 0x69b7d1ef, 0xc2ee653e,
37 0x3f04b84d, 0x945d0c9c, 0x7b0be704, 0xd05253d5, 0x2db88ea6, 0x86e13a77,
38 0x7d348091, 0xd66d3440, 0x2b87e933, 0x80de5de2, 0x7775282e, 0xdc2c9cff,
39 0x21c6418c, 0x8a9ff55d, 0x714a4fbb, 0xda13fb6a, 0x27f92619, 0x8ca092c8,
40 0x520d45f8, 0xf954f129, 0x04be2c5a, 0xafe7988b, 0x5432226d, 0xff6b96bc,
41 0x02814bcf, 0xa9d8ff1e, 0x5e738ad2, 0xf52a3e03, 0x08c0e370, 0xa39957a1,
42 0x584ced47, 0xf3155996, 0x0eff84e5, 0xa5a63034, 0x4af0dbac, 0xe1a96f7d,
43 0x1c43b20e, 0xb71a06df, 0x4ccfbc39, 0xe79608e8, 0x1a7cd59b, 0xb125614a,
44 0x468e1486, 0xedd7a057, 0x103d7d24, 0xbb64c9f5, 0x40b17313, 0xebe8c7c2,
45 0x16021ab1, 0xbd5bae60, 0x6cb54671, 0xc7ecf2a0, 0x3a062fd3, 0x915f9b02,
46 0x6a8a21e4, 0xc1d39535, 0x3c394846, 0x9760fc97, 0x60cb895b, 0xcb923d8a,
47 0x3678e0f9, 0x9d215428, 0x66f4eece, 0xcdad5a1f, 0x3047876c, 0x9b1e33bd,
48 0x7448d825, 0xdf116cf4, 0x22fbb187, 0x89a20556, 0x7277bfb0, 0xd92e0b61,
49 0x24c4d612, 0x8f9d62c3, 0x7836170f, 0xd36fa3de, 0x2e857ead, 0x85dcca7c,
50 0x7e09709a, 0xd550c44b, 0x28ba1938, 0x83e3ade9, 0x5d4e7ad9, 0xf617ce08,
51 0x0bfd137b, 0xa0a4a7aa, 0x5b711d4c, 0xf028a99d, 0x0dc274ee, 0xa69bc03f,
52 0x5130b5f3, 0xfa690122, 0x0783dc51, 0xacda6880, 0x570fd266, 0xfc5666b7,
53 0x01bcbbc4, 0xaae50f15, 0x45b3e48d, 0xeeea505c, 0x13008d2f, 0xb85939fe,
54 0x438c8318, 0xe8d537c9, 0x153feaba, 0xbe665e6b, 0x49cd2ba7, 0xe2949f76,
55 0x1f7e4205, 0xb427f6d4, 0x4ff24c32, 0xe4abf8e3, 0x19412590, 0xb2189141,
56 0x0f433f21, 0xa41a8bf0, 0x59f05683, 0xf2a9e252, 0x097c58b4, 0xa225ec65,
57 0x5fcf3116, 0xf49685c7, 0x033df00b, 0xa86444da, 0x558e99a9, 0xfed72d78,
58 0x0502979e, 0xae5b234f, 0x53b1fe3c, 0xf8e84aed, 0x17bea175, 0xbce715a4,
59 0x410dc8d7, 0xea547c06, 0x1181c6e0, 0xbad87231, 0x4732af42, 0xec6b1b93,
60 0x1bc06e5f, 0xb099da8e, 0x4d7307fd, 0xe62ab32c, 0x1dff09ca, 0xb6a6bd1b,
61 0x4b4c6068, 0xe015d4b9, 0x3eb80389, 0x95e1b758, 0x680b6a2b, 0xc352defa,
62 0x3887641c, 0x93ded0cd, 0x6e340dbe, 0xc56db96f, 0x32c6cca3, 0x999f7872,
63 0x6475a501, 0xcf2c11d0, 0x34f9ab36, 0x9fa01fe7, 0x624ac294, 0xc9137645,
64 0x26459ddd, 0x8d1c290c, 0x70f6f47f, 0xdbaf40ae, 0x207afa48, 0x8b234e99,
65 0x76c993ea, 0xdd90273b, 0x2a3b52f7, 0x8162e626, 0x7c883b55, 0xd7d18f84,
66 0x2c043562, 0x875d81b3, 0x7ab75cc0, 0xd1eee811
67};
68
69static const unsigned int U[256] = {
70 0x00000000, 0x7eb5200d, 0x5633f4cb, 0x2886d4c6, 0x073e5d47, 0x798b7d4a,
71 0x510da98c, 0x2fb88981, 0x0e7cba8e, 0x70c99a83, 0x584f4e45, 0x26fa6e48,
72 0x0942e7c9, 0x77f7c7c4, 0x5f711302, 0x21c4330f, 0x1cf9751c, 0x624c5511,
73 0x4aca81d7, 0x347fa1da, 0x1bc7285b, 0x65720856, 0x4df4dc90, 0x3341fc9d,
74 0x1285cf92, 0x6c30ef9f, 0x44b63b59, 0x3a031b54, 0x15bb92d5, 0x6b0eb2d8,
75 0x4388661e, 0x3d3d4613, 0x39f2ea38, 0x4747ca35, 0x6fc11ef3, 0x11743efe,
76 0x3eccb77f, 0x40799772, 0x68ff43b4, 0x164a63b9, 0x378e50b6, 0x493b70bb,
77 0x61bda47d, 0x1f088470, 0x30b00df1, 0x4e052dfc, 0x6683f93a, 0x1836d937,
78 0x250b9f24, 0x5bbebf29, 0x73386bef, 0x0d8d4be2, 0x2235c263, 0x5c80e26e,
79 0x740636a8, 0x0ab316a5, 0x2b7725aa, 0x55c205a7, 0x7d44d161, 0x03f1f16c,
80 0x2c4978ed, 0x52fc58e0, 0x7a7a8c26, 0x04cfac2b, 0x73e5d470, 0x0d50f47d,
81 0x25d620bb, 0x5b6300b6, 0x74db8937, 0x0a6ea93a, 0x22e87dfc, 0x5c5d5df1,
82 0x7d996efe, 0x032c4ef3, 0x2baa9a35, 0x551fba38, 0x7aa733b9, 0x041213b4,
83 0x2c94c772, 0x5221e77f, 0x6f1ca16c, 0x11a98161, 0x392f55a7, 0x479a75aa,
84 0x6822fc2b, 0x1697dc26, 0x3e1108e0, 0x40a428ed, 0x61601be2, 0x1fd53bef,
85 0x3753ef29, 0x49e6cf24, 0x665e46a5, 0x18eb66a8, 0x306db26e, 0x4ed89263,
86 0x4a173e48, 0x34a21e45, 0x1c24ca83, 0x6291ea8e, 0x4d29630f, 0x339c4302,
87 0x1b1a97c4, 0x65afb7c9, 0x446b84c6, 0x3adea4cb, 0x1258700d, 0x6ced5000,
88 0x4355d981, 0x3de0f98c, 0x15662d4a, 0x6bd30d47, 0x56ee4b54, 0x285b6b59,
89 0x00ddbf9f, 0x7e689f92, 0x51d01613, 0x2f65361e, 0x07e3e2d8, 0x7956c2d5,
90 0x5892f1da, 0x2627d1d7, 0x0ea10511, 0x7014251c, 0x5facac9d, 0x21198c90,
91 0x099f5856, 0x772a785b, 0x4c921c31, 0x32273c3c, 0x1aa1e8fa, 0x6414c8f7,
92 0x4bac4176, 0x3519617b, 0x1d9fb5bd, 0x632a95b0, 0x42eea6bf, 0x3c5b86b2,
93 0x14dd5274, 0x6a687279, 0x45d0fbf8, 0x3b65dbf5, 0x13e30f33, 0x6d562f3e,
94 0x506b692d, 0x2ede4920, 0x06589de6, 0x78edbdeb, 0x5755346a, 0x29e01467,
95 0x0166c0a1, 0x7fd3e0ac, 0x5e17d3a3, 0x20a2f3ae, 0x08242768, 0x76910765,
96 0x59298ee4, 0x279caee9, 0x0f1a7a2f, 0x71af5a22, 0x7560f609, 0x0bd5d604,
97 0x235302c2, 0x5de622cf, 0x725eab4e, 0x0ceb8b43, 0x246d5f85, 0x5ad87f88,
98 0x7b1c4c87, 0x05a96c8a, 0x2d2fb84c, 0x539a9841, 0x7c2211c0, 0x029731cd,
99 0x2a11e50b, 0x54a4c506, 0x69998315, 0x172ca318, 0x3faa77de, 0x411f57d3,
100 0x6ea7de52, 0x1012fe5f, 0x38942a99, 0x46210a94, 0x67e5399b, 0x19501996,
101 0x31d6cd50, 0x4f63ed5d, 0x60db64dc, 0x1e6e44d1, 0x36e89017, 0x485db01a,
102 0x3f77c841, 0x41c2e84c, 0x69443c8a, 0x17f11c87, 0x38499506, 0x46fcb50b,
103 0x6e7a61cd, 0x10cf41c0, 0x310b72cf, 0x4fbe52c2, 0x67388604, 0x198da609,
104 0x36352f88, 0x48800f85, 0x6006db43, 0x1eb3fb4e, 0x238ebd5d, 0x5d3b9d50,
105 0x75bd4996, 0x0b08699b, 0x24b0e01a, 0x5a05c017, 0x728314d1, 0x0c3634dc,
106 0x2df207d3, 0x534727de, 0x7bc1f318, 0x0574d315, 0x2acc5a94, 0x54797a99,
107 0x7cffae5f, 0x024a8e52, 0x06852279, 0x78300274, 0x50b6d6b2, 0x2e03f6bf,
108 0x01bb7f3e, 0x7f0e5f33, 0x57888bf5, 0x293dabf8, 0x08f998f7, 0x764cb8fa,
109 0x5eca6c3c, 0x207f4c31, 0x0fc7c5b0, 0x7172e5bd, 0x59f4317b, 0x27411176,
110 0x1a7c5765, 0x64c97768, 0x4c4fa3ae, 0x32fa83a3, 0x1d420a22, 0x63f72a2f,
111 0x4b71fee9, 0x35c4dee4, 0x1400edeb, 0x6ab5cde6, 0x42331920, 0x3c86392d,
112 0x133eb0ac, 0x6d8b90a1, 0x450d4467, 0x3bb8646a
113};
Nicolas Pitrea310d432005-05-19 14:27:14114
Nicolas Pitre08abe662006-04-25 03:07:47115struct index_entry {
Nicolas Pitrea310d432005-05-19 14:27:14116 const unsigned char *ptr;
Nicolas Pitre8e1454b2006-02-22 01:43:17117 unsigned int val;
David Kastrupd2100862007-09-08 21:17:44118};
119
120struct unpacked_index_entry {
121 struct index_entry entry;
122 struct unpacked_index_entry *next;
Nicolas Pitre8e1454b2006-02-22 01:43:17123};
Nicolas Pitrea310d432005-05-19 14:27:14124
Nicolas Pitre08abe662006-04-25 03:07:47125struct delta_index {
Brian Downing11779e72007-07-12 12:55:48126 unsigned long memsize;
Nicolas Pitre08abe662006-04-25 03:07:47127 const void *src_buf;
128 unsigned long src_size;
Nicolas Pitre3dc5a9e2006-04-29 04:58:05129 unsigned int hash_mask;
Florian Forster63f17562006-06-18 15:18:04130 struct index_entry *hash[FLEX_ARRAY];
Nicolas Pitre08abe662006-04-25 03:07:47131};
132
133struct delta_index * create_delta_index(const void *buf, unsigned long bufsize)
Nicolas Pitrea310d432005-05-19 14:27:14134{
Nicolas Pitre06a9f922006-05-03 03:31:00135 unsigned int i, hsize, hmask, entries, prev_val, *hash_count;
Nicolas Pitre08abe662006-04-25 03:07:47136 const unsigned char *data, *buffer = buf;
137 struct delta_index *index;
David Kastrupd2100862007-09-08 21:17:44138 struct unpacked_index_entry *entry, **hash;
139 struct index_entry *packed_entry, **packed_hash;
Nicolas Pitre8e1454b2006-02-22 01:43:17140 void *mem;
Nicolas Pitre06a9f922006-05-03 03:31:00141 unsigned long memsize;
Nicolas Pitrea310d432005-05-19 14:27:14142
Nicolas Pitre08abe662006-04-25 03:07:47143 if (!buf || !bufsize)
144 return NULL;
145
Nicolas Pitre3dc5a9e2006-04-29 04:58:05146 /* Determine index hash size. Note that indexing skips the
Pavel Roskin82e5a822006-07-10 05:50:18147 first byte to allow for optimizing the Rabin's polynomial
Nicolas Pitre3dc5a9e2006-04-29 04:58:05148 initialization in create_delta(). */
149 entries = (bufsize - 1) / RABIN_WINDOW;
Nicolas Pitre8e1454b2006-02-22 01:43:17150 hsize = entries / 4;
Pierre Habouzitb05faa22006-08-23 09:17:55151 for (i = 4; (1u << i) < hsize && i < 31; i++);
Nicolas Pitre8e1454b2006-02-22 01:43:17152 hsize = 1 << i;
Nicolas Pitre3dc5a9e2006-04-29 04:58:05153 hmask = hsize - 1;
Nicolas Pitrea310d432005-05-19 14:27:14154
Nicolas Pitre8e1454b2006-02-22 01:43:17155 /* allocate lookup index */
David Kastrupd2100862007-09-08 21:17:44156 memsize = sizeof(*hash) * hsize +
Nicolas Pitre06a9f922006-05-03 03:31:00157 sizeof(*entry) * entries;
158 mem = malloc(memsize);
Nicolas Pitre8e1454b2006-02-22 01:43:17159 if (!mem)
160 return NULL;
161 hash = mem;
Nicolas Pitre08abe662006-04-25 03:07:47162 mem = hash + hsize;
163 entry = mem;
164
Nicolas Pitre8e1454b2006-02-22 01:43:17165 memset(hash, 0, hsize * sizeof(*hash));
166
Nicolas Pitrec13c6bf2006-03-08 19:32:50167 /* allocate an array to count hash entries */
168 hash_count = calloc(hsize, sizeof(*hash_count));
169 if (!hash_count) {
David Kastrupd2100862007-09-08 21:17:44170 free(hash);
Nicolas Pitrec13c6bf2006-03-08 19:32:50171 return NULL;
172 }
173
174 /* then populate the index */
Nicolas Pitre06a9f922006-05-03 03:31:00175 prev_val = ~0;
176 for (data = buffer + entries * RABIN_WINDOW - RABIN_WINDOW;
177 data >= buffer;
178 data -= RABIN_WINDOW) {
Nicolas Pitre3dc5a9e2006-04-29 04:58:05179 unsigned int val = 0;
180 for (i = 1; i <= RABIN_WINDOW; i++)
181 val = ((val << 8) | data[i]) ^ T[val >> RABIN_SHIFT];
Nicolas Pitre06a9f922006-05-03 03:31:00182 if (val == prev_val) {
183 /* keep the lowest of consecutive identical blocks */
David Kastrupd2100862007-09-08 21:17:44184 entry[-1].entry.ptr = data + RABIN_WINDOW;
185 --entries;
Nicolas Pitre06a9f922006-05-03 03:31:00186 } else {
187 prev_val = val;
188 i = val & hmask;
David Kastrupd2100862007-09-08 21:17:44189 entry->entry.ptr = data + RABIN_WINDOW;
190 entry->entry.val = val;
Nicolas Pitre06a9f922006-05-03 03:31:00191 entry->next = hash[i];
192 hash[i] = entry++;
193 hash_count[i]++;
Nicolas Pitre06a9f922006-05-03 03:31:00194 }
Nicolas Pitre3dc5a9e2006-04-29 04:58:05195 }
Nicolas Pitrea310d432005-05-19 14:27:14196
Nicolas Pitrec13c6bf2006-03-08 19:32:50197 /*
198 * Determine a limit on the number of entries in the same hash
Pavel Roskin82e5a822006-07-10 05:50:18199 * bucket. This guards us against pathological data sets causing
Nicolas Pitrec13c6bf2006-03-08 19:32:50200 * really bad hash distribution with most entries in the same hash
201 * bucket that would bring us to O(m*n) computing costs (m and n
202 * corresponding to reference and target buffer sizes).
203 *
Nicolas Pitre08abe662006-04-25 03:07:47204 * Make sure none of the hash buckets has more entries than
Nicolas Pitrec13c6bf2006-03-08 19:32:50205 * we're willing to test. Otherwise we cull the entry list
206 * uniformly to still preserve a good repartition across
207 * the reference buffer.
208 */
209 for (i = 0; i < hsize; i++) {
David Kastrup02e665c2007-09-08 21:25:55210 int acc;
211
212 if (hash_count[i] <= HASH_LIMIT)
Nicolas Pitrec13c6bf2006-03-08 19:32:50213 continue;
David Kastrup02e665c2007-09-08 21:25:55214
David Kastrup02e665c2007-09-08 21:25:55215 /* We leave exactly HASH_LIMIT entries in the bucket */
Nicolas Pitrece85b052007-12-18 15:15:39216 entries -= hash_count[i] - HASH_LIMIT;
David Kastrup02e665c2007-09-08 21:25:55217
Nicolas Pitrec13c6bf2006-03-08 19:32:50218 entry = hash[i];
David Kastrup02e665c2007-09-08 21:25:55219 acc = 0;
Nicolas Pitrece85b052007-12-18 15:15:39220
221 /*
222 * Assume that this loop is gone through exactly
223 * HASH_LIMIT times and is entered and left with
224 * acc==0. So the first statement in the loop
225 * contributes (hash_count[i]-HASH_LIMIT)*HASH_LIMIT
226 * to the accumulator, and the inner loop consequently
227 * is run (hash_count[i]-HASH_LIMIT) times, removing
228 * one element from the list each time. Since acc
229 * balances out to 0 at the final run, the inner loop
230 * body can't be left with entry==NULL. So we indeed
231 * encounter entry==NULL in the outer loop only.
232 */
Nicolas Pitrec13c6bf2006-03-08 19:32:50233 do {
David Kastrup02e665c2007-09-08 21:25:55234 acc += hash_count[i] - HASH_LIMIT;
235 if (acc > 0) {
236 struct unpacked_index_entry *keep = entry;
237 do {
238 entry = entry->next;
239 acc -= HASH_LIMIT;
240 } while (acc > 0);
241 keep->next = entry->next;
242 }
243 entry = entry->next;
244 } while (entry);
Nicolas Pitrec13c6bf2006-03-08 19:32:50245 }
246 free(hash_count);
247
Nicolas Pitrece85b052007-12-18 15:15:39248 /*
249 * Now create the packed index in array form
250 * rather than linked lists.
251 */
David Kastrupd2100862007-09-08 21:17:44252 memsize = sizeof(*index)
253 + sizeof(*packed_hash) * (hsize+1)
254 + sizeof(*packed_entry) * entries;
David Kastrupd2100862007-09-08 21:17:44255 mem = malloc(memsize);
David Kastrupd2100862007-09-08 21:17:44256 if (!mem) {
257 free(hash);
258 return NULL;
259 }
260
261 index = mem;
262 index->memsize = memsize;
263 index->src_buf = buf;
264 index->src_size = bufsize;
265 index->hash_mask = hmask;
266
Pierre Habouzitf9c5a802007-12-18 01:39:57267 mem = index->hash;
David Kastrupd2100862007-09-08 21:17:44268 packed_hash = mem;
269 mem = packed_hash + (hsize+1);
270 packed_entry = mem;
271
David Kastrupd2100862007-09-08 21:17:44272 for (i = 0; i < hsize; i++) {
Nicolas Pitrece85b052007-12-18 15:15:39273 /*
274 * Coalesce all entries belonging to one linked list
275 * into consecutive array entries.
276 */
David Kastrupd2100862007-09-08 21:17:44277 packed_hash[i] = packed_entry;
278 for (entry = hash[i]; entry; entry = entry->next)
279 *packed_entry++ = entry->entry;
280 }
281
Nicolas Pitrece85b052007-12-18 15:15:39282 /* Sentinel value to indicate the length of the last hash bucket */
David Kastrupd2100862007-09-08 21:17:44283 packed_hash[hsize] = packed_entry;
Nicolas Pitrece85b052007-12-18 15:15:39284
David Kastrupd2100862007-09-08 21:17:44285 assert(packed_entry - (struct index_entry *)mem == entries);
286 free(hash);
287
Nicolas Pitre08abe662006-04-25 03:07:47288 return index;
289}
290
291void free_delta_index(struct delta_index *index)
292{
293 free(index);
Nicolas Pitrea310d432005-05-19 14:27:14294}
295
Brian Downing11779e72007-07-12 12:55:48296unsigned long sizeof_delta_index(struct delta_index *index)
297{
298 if (index)
299 return index->memsize;
300 else
301 return 0;
302}
303
Nicolas Pitre3dc5a9e2006-04-29 04:58:05304/*
305 * The maximum size for any opcode sequence, including the initial header
Pavel Roskin82e5a822006-07-10 05:50:18306 * plus Rabin window plus biggest copy.
Nicolas Pitre3dc5a9e2006-04-29 04:58:05307 */
308#define MAX_OP_SIZE (5 + 5 + 1 + RABIN_WINDOW + 7)
Nicolas Pitrefe474b52006-02-22 01:41:41309
Nicolas Pitre08abe662006-04-25 03:07:47310void *
311create_delta(const struct delta_index *index,
312 const void *trg_buf, unsigned long trg_size,
313 unsigned long *delta_size, unsigned long max_size)
Nicolas Pitrea310d432005-05-19 14:27:14314{
Nicolas Pitre84336692007-05-26 01:38:58315 unsigned int i, outpos, outsize, moff, msize, val;
Nicolas Pitre5a1fb2c2006-03-18 03:45:07316 int inscnt;
Nicolas Pitre8e1454b2006-02-22 01:43:17317 const unsigned char *ref_data, *ref_top, *data, *top;
318 unsigned char *out;
Nicolas Pitrea310d432005-05-19 14:27:14319
Nicolas Pitre08abe662006-04-25 03:07:47320 if (!trg_buf || !trg_size)
Nicolas Pitre8e1454b2006-02-22 01:43:17321 return NULL;
322
Nicolas Pitrea310d432005-05-19 14:27:14323 outpos = 0;
324 outsize = 8192;
Nicolas Pitrefe474b52006-02-22 01:41:41325 if (max_size && outsize >= max_size)
326 outsize = max_size + MAX_OP_SIZE + 1;
Nicolas Pitrea310d432005-05-19 14:27:14327 out = malloc(outsize);
Nicolas Pitre08abe662006-04-25 03:07:47328 if (!out)
Nicolas Pitrea310d432005-05-19 14:27:14329 return NULL;
Nicolas Pitrea310d432005-05-19 14:27:14330
331 /* store reference buffer size */
Nicolas Pitre08abe662006-04-25 03:07:47332 i = index->src_size;
333 while (i >= 0x80) {
334 out[outpos++] = i | 0x80;
335 i >>= 7;
Nicolas Pitre69a2d422005-06-29 04:27:45336 }
Nicolas Pitre08abe662006-04-25 03:07:47337 out[outpos++] = i;
Nicolas Pitrea310d432005-05-19 14:27:14338
339 /* store target buffer size */
Nicolas Pitre08abe662006-04-25 03:07:47340 i = trg_size;
341 while (i >= 0x80) {
342 out[outpos++] = i | 0x80;
343 i >>= 7;
Nicolas Pitre69a2d422005-06-29 04:27:45344 }
Nicolas Pitre08abe662006-04-25 03:07:47345 out[outpos++] = i;
Nicolas Pitrea310d432005-05-19 14:27:14346
Nicolas Pitre08abe662006-04-25 03:07:47347 ref_data = index->src_buf;
348 ref_top = ref_data + index->src_size;
349 data = trg_buf;
Florian Forster1d7f1712006-06-18 15:18:09350 top = (const unsigned char *) trg_buf + trg_size;
Nicolas Pitre3dc5a9e2006-04-29 04:58:05351
352 outpos++;
353 val = 0;
354 for (i = 0; i < RABIN_WINDOW && data < top; i++, data++) {
355 out[outpos++] = *data;
356 val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT];
357 }
358 inscnt = i;
Nicolas Pitrea310d432005-05-19 14:27:14359
Nicolas Pitre84336692007-05-26 01:38:58360 moff = 0;
361 msize = 0;
Nicolas Pitre8e1454b2006-02-22 01:43:17362 while (data < top) {
Nicolas Pitre84336692007-05-26 01:38:58363 if (msize < 4096) {
364 struct index_entry *entry;
365 val ^= U[data[-RABIN_WINDOW]];
366 val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT];
367 i = val & index->hash_mask;
David Kastrupd2100862007-09-08 21:17:44368 for (entry = index->hash[i]; entry < index->hash[i+1]; entry++) {
Nicolas Pitre84336692007-05-26 01:38:58369 const unsigned char *ref = entry->ptr;
370 const unsigned char *src = data;
371 unsigned int ref_size = ref_top - ref;
372 if (entry->val != val)
373 continue;
374 if (ref_size > top - src)
375 ref_size = top - src;
376 if (ref_size <= msize)
377 break;
378 while (ref_size-- && *src++ == *ref)
379 ref++;
380 if (msize < ref - entry->ptr) {
381 /* this is our best match so far */
382 msize = ref - entry->ptr;
383 moff = entry->ptr - ref_data;
384 if (msize >= 4096) /* good enough */
385 break;
386 }
Nicolas Pitrea310d432005-05-19 14:27:14387 }
388 }
389
Nicolas Pitre3dc5a9e2006-04-29 04:58:05390 if (msize < 4) {
Nicolas Pitrea310d432005-05-19 14:27:14391 if (!inscnt)
392 outpos++;
393 out[outpos++] = *data++;
394 inscnt++;
395 if (inscnt == 0x7f) {
396 out[outpos - inscnt - 1] = inscnt;
397 inscnt = 0;
398 }
Nicolas Pitre84336692007-05-26 01:38:58399 msize = 0;
Nicolas Pitrea310d432005-05-19 14:27:14400 } else {
Nicolas Pitre84336692007-05-26 01:38:58401 unsigned int left;
Nicolas Pitre8e1454b2006-02-22 01:43:17402 unsigned char *op;
403
Nicolas Pitrea310d432005-05-19 14:27:14404 if (inscnt) {
Nicolas Pitre5a1fb2c2006-03-18 03:45:07405 while (moff && ref_data[moff-1] == data[-1]) {
Nicolas Pitre5a1fb2c2006-03-18 03:45:07406 /* we can match one byte back */
407 msize++;
408 moff--;
409 data--;
410 outpos--;
411 if (--inscnt)
412 continue;
413 outpos--; /* remove count slot */
414 inscnt--; /* make it -1 */
415 break;
416 }
Nicolas Pitrea310d432005-05-19 14:27:14417 out[outpos - inscnt - 1] = inscnt;
418 inscnt = 0;
419 }
420
Nicolas Pitre84336692007-05-26 01:38:58421 /* A copy op is currently limited to 64KB (pack v2) */
422 left = (msize < 0x10000) ? 0 : (msize - 0x10000);
423 msize -= left;
424
Nicolas Pitre8e1454b2006-02-22 01:43:17425 op = out + outpos++;
Nicolas Pitrea310d432005-05-19 14:27:14426 i = 0x80;
427
Nicolas Pitre84336692007-05-26 01:38:58428 if (moff & 0x000000ff)
429 out[outpos++] = moff >> 0, i |= 0x01;
430 if (moff & 0x0000ff00)
431 out[outpos++] = moff >> 8, i |= 0x02;
432 if (moff & 0x00ff0000)
433 out[outpos++] = moff >> 16, i |= 0x04;
434 if (moff & 0xff000000)
435 out[outpos++] = moff >> 24, i |= 0x08;
Nicolas Pitrea310d432005-05-19 14:27:14436
Nicolas Pitre84336692007-05-26 01:38:58437 if (msize & 0x00ff)
438 out[outpos++] = msize >> 0, i |= 0x10;
439 if (msize & 0xff00)
440 out[outpos++] = msize >> 8, i |= 0x20;
Nicolas Pitrea310d432005-05-19 14:27:14441
Nicolas Pitre8e1454b2006-02-22 01:43:17442 *op = i;
Nicolas Pitre84336692007-05-26 01:38:58443
444 data += msize;
445 moff += msize;
446 msize = left;
447
448 if (msize < 4096) {
449 int j;
450 val = 0;
451 for (j = -RABIN_WINDOW; j < 0; j++)
452 val = ((val << 8) | data[j])
453 ^ T[val >> RABIN_SHIFT];
454 }
Nicolas Pitrea310d432005-05-19 14:27:14455 }
456
Nicolas Pitrefe474b52006-02-22 01:41:41457 if (outpos >= outsize - MAX_OP_SIZE) {
Nicolas Pitrea310d432005-05-19 14:27:14458 void *tmp = out;
459 outsize = outsize * 3 / 2;
Nicolas Pitrefe474b52006-02-22 01:41:41460 if (max_size && outsize >= max_size)
461 outsize = max_size + MAX_OP_SIZE + 1;
462 if (max_size && outpos > max_size)
Nicolas Pitre3dc5a9e2006-04-29 04:58:05463 break;
Martin Koeglerb75c6c62007-05-29 19:08:35464 out = realloc(out, outsize);
Nicolas Pitrea310d432005-05-19 14:27:14465 if (!out) {
466 free(tmp);
Nicolas Pitrea310d432005-05-19 14:27:14467 return NULL;
468 }
469 }
470 }
471
472 if (inscnt)
473 out[outpos - inscnt - 1] = inscnt;
474
Nicolas Pitre3dc5a9e2006-04-29 04:58:05475 if (max_size && outpos > max_size) {
476 free(out);
477 return NULL;
478 }
479
Nicolas Pitrea310d432005-05-19 14:27:14480 *delta_size = outpos;
481 return out;
482}