1 /*
2 * Copyright (c) 2021 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "blocks_diff.h"
17 #include "scope_guard.h"
18 #include <cstdio>
19 #include <iostream>
20 #include <vector>
21 #include "update_diff.h"
22
23 using namespace Hpackage;
24 using namespace std;
25 using namespace Updater;
26
27 namespace UpdatePatch {
28 #define SWAP(a, b) auto swapTmp = (a); (a) = (b); (b) = swapTmp
29 #define MIN(x, y) (((x) < (y)) ? (x) : (y))
30 #define SET_BUFFER(y, buffer, index) \
31 (buffer)[index] = static_cast<uint8_t>((y) % 256); (y) -= (buffer)[index]; (y) = (y) / 256
32
33 constexpr uint32_t BUCKET_SIZE = 256;
34 constexpr uint32_t MULTIPLE_TWO = 2;
35 constexpr int64_t BLOCK_SCORE = 8;
36 constexpr int64_t MIN_LENGTH = 16;
37
WriteLE64(const BlockBuffer & buffer,int64_t value)38 static void WriteLE64(const BlockBuffer &buffer, int64_t value)
39 {
40 int32_t index = 0;
41 auto y = static_cast<uint64_t>((value < 0) ? -value : value);
42 SET_BUFFER(y, buffer.buffer, index);
43 index++;
44 SET_BUFFER(y, buffer.buffer, index);
45 index++;
46 SET_BUFFER(y, buffer.buffer, index);
47 index++;
48 SET_BUFFER(y, buffer.buffer, index);
49 index++;
50 SET_BUFFER(y, buffer.buffer, index);
51 index++;
52 SET_BUFFER(y, buffer.buffer, index);
53 index++;
54 SET_BUFFER(y, buffer.buffer, index);
55 index++;
56 SET_BUFFER(y, buffer.buffer, index);
57 if (value < 0) {
58 buffer.buffer[index] |= 0x80;
59 }
60 }
61
MakePatch(const std::string & oldFileName,const std::string & newFileName,const std::string & patchFileName)62 int32_t BlocksDiff::MakePatch(const std::string &oldFileName, const std::string &newFileName,
63 const std::string &patchFileName)
64 {
65 PATCH_LOGI("BlocksDiff::MakePatch %s ", patchFileName.c_str());
66 std::fstream patchFile(patchFileName, std::ios::out | std::ios::trunc | std::ios::binary);
67 if (patchFile.fail()) {
68 PATCH_LOGE("Failed to open %s", strerror(errno));
69 return -1;
70 }
71
72 MemMapInfo oldBuffer {};
73 MemMapInfo newBuffer {};
74 int32_t ret = PatchMapFile(oldFileName, oldBuffer);
75 int32_t ret1 = PatchMapFile(newFileName, newBuffer);
76 if (ret != 0 || ret1 != 0) {
77 PATCH_LOGE("Failed to open %s", newFileName.c_str());
78 return -1;
79 }
80 BlockBuffer newInfo = {newBuffer.memory, newBuffer.length};
81 BlockBuffer oldInfo = {oldBuffer.memory, oldBuffer.length};
82 std::unique_ptr<BlocksDiff> blockdiff = std::make_unique<BlocksStreamDiff>(patchFile, 0);
83 size_t patchSize = 0;
84 ret = blockdiff->MakePatch(newInfo, oldInfo, patchSize);
85 if (ret != PATCH_SUCCESS) {
86 PATCH_LOGE("Failed to generate patch");
87 return ret;
88 }
89 PATCH_DEBUG("MakePatch %zu", static_cast<size_t>(patchFile.tellp()));
90 patchFile.close();
91 PATCH_LOGI("BlocksDiff::MakePatch success %zu", patchSize);
92 return ret;
93 }
94
MakePatch(const BlockBuffer & newInfo,const BlockBuffer & oldInfo,std::vector<uint8_t> & patchData,size_t offset,size_t & patchSize)95 int32_t BlocksDiff::MakePatch(const BlockBuffer &newInfo,
96 const BlockBuffer &oldInfo, std::vector<uint8_t> &patchData, size_t offset, size_t &patchSize)
97 {
98 if (patchData.empty()) {
99 patchData.resize(IGMDIFF_LIMIT_UNIT);
100 }
101 std::unique_ptr<BlocksDiff> blockdiff = std::make_unique<BlocksBufferDiff>(patchData, offset);
102 int32_t ret = blockdiff->MakePatch(newInfo, oldInfo, patchSize);
103 if (ret != PATCH_SUCCESS) {
104 PATCH_LOGE("Failed to generate patch");
105 return ret;
106 }
107 if (patchData.size() < patchSize) {
108 PATCH_LOGE("Failed to make block patch");
109 return -1;
110 }
111 patchData.resize(patchSize);
112 PATCH_LOGI("BlocksDiff::MakePatch success %zu", patchSize);
113 return ret;
114 }
115
MakePatch(const BlockBuffer & newInfo,const BlockBuffer & oldInfo,std::fstream & patchFile,size_t & patchSize)116 int32_t BlocksDiff::MakePatch(const BlockBuffer &newInfo,
117 const BlockBuffer &oldInfo, std::fstream &patchFile, size_t &patchSize)
118 {
119 std::unique_ptr<BlocksDiff> blockdiff = std::make_unique<BlocksStreamDiff>(
120 patchFile, static_cast<size_t>(patchFile.tellp()));
121 int32_t ret = blockdiff->MakePatch(newInfo, oldInfo, patchSize);
122 if (ret != PATCH_SUCCESS) {
123 PATCH_LOGE("Failed to generate patch");
124 return ret;
125 }
126 PATCH_LOGI("BlocksDiff::MakePatch success %zu patchFile %zu",
127 patchSize, static_cast<size_t>(patchFile.tellp()));
128 return ret;
129 }
130
MakePatch(const BlockBuffer & newInfo,const BlockBuffer & oldInfo,size_t & patchSize)131 int32_t BlocksDiff::MakePatch(const BlockBuffer &newInfo, const BlockBuffer &oldInfo, size_t &patchSize)
132 {
133 if (suffixArray_ == nullptr) {
134 suffixArray_.reset(new SuffixArray<int32_t>());
135 if (suffixArray_ == nullptr) {
136 PATCH_LOGE("Failed to create SuffixArray");
137 return -1;
138 }
139 suffixArray_->Init(oldInfo);
140 }
141
142 std::vector<ControlData> controlDatas;
143 int32_t ret = GetCtrlDatas(newInfo, oldInfo, controlDatas);
144 if (ret != 0) {
145 PATCH_LOGE("Failed to get control data");
146 return ret;
147 }
148 patchSize = 0;
149 ret = WritePatchHeader(0, 0, 0, patchSize);
150 if (ret != 0) {
151 PATCH_LOGE("Failed to write patch header");
152 return ret;
153 }
154
155 size_t controlSize = patchSize;
156 ret = WriteControlData(controlDatas, patchSize);
157 PATCH_CHECK(ret == BZ_OK, return ret, "Failed to write diff data");
158 controlSize = patchSize - controlSize;
159
160 // 写diff数据
161 size_t diffDataSize = patchSize;
162 ret = WriteDiffData(controlDatas, patchSize);
163 PATCH_CHECK(ret == BZ_OK, return ret, "Failed to write diff data");
164 diffDataSize = patchSize - diffDataSize;
165
166 size_t extraDataSize = patchSize;
167 ret = WriteExtraData(controlDatas, patchSize);
168 PATCH_CHECK(ret == BZ_OK, return ret, "Failed to write extra data");
169 extraDataSize = patchSize - extraDataSize;
170
171 // write real data
172 size_t headerLen = 0;
173 ret = WritePatchHeader(controlSize, diffDataSize, newInfo.length, headerLen);
174 if (ret != 0) {
175 PATCH_LOGE("Failed to write patch header");
176 return ret;
177 }
178 PATCH_DEBUG("MakePatch success patchSize:%zu controlSize:%zu diffDataSize:%zu, extraDataSize:%zu",
179 patchSize, controlSize, diffDataSize, extraDataSize);
180 return 0;
181 }
182
CreateBZip2Adapter(size_t patchOffset)183 std::unique_ptr<DeflateAdapter> BlocksBufferDiff::CreateBZip2Adapter(size_t patchOffset)
184 {
185 std::unique_ptr<DeflateAdapter> bzip2Adapter = std::make_unique<BZipBuffer2Adapter>(patchData_,
186 offset_ + patchOffset);
187 if (bzip2Adapter == nullptr) {
188 PATCH_LOGE("Failed to create bzip2Adapter");
189 return nullptr;
190 }
191 bzip2Adapter->Open();
192 return bzip2Adapter;
193 }
194
CreateBZip2Adapter(size_t patchOffset)195 std::unique_ptr<DeflateAdapter> BlocksStreamDiff::CreateBZip2Adapter(size_t patchOffset)
196 {
197 std::unique_ptr<DeflateAdapter> bzip2Adapter = std::make_unique<BZip2StreamAdapter>(stream_);
198 if (bzip2Adapter == nullptr) {
199 PATCH_LOGE("Failed to create bzip2Adapter");
200 return nullptr;
201 }
202 bzip2Adapter->Open();
203 return bzip2Adapter;
204 }
205
WritePatchHeader(int64_t controlSize,int64_t diffDataSize,int64_t newSize,size_t & headerLen)206 int32_t BlocksBufferDiff::WritePatchHeader(int64_t controlSize,
207 int64_t diffDataSize, int64_t newSize, size_t &headerLen)
208 {
209 headerLen = std::char_traits<char>::length(BSDIFF_MAGIC) + sizeof(int64_t) + sizeof(int64_t) + sizeof(int64_t);
210 if (patchData_.size() <= headerLen + offset_) {
211 PATCH_LOGE("Invalid patch size");
212 return -1;
213 }
214
215 int32_t ret = memcpy_s(patchData_.data() + offset_, patchData_.size(), BSDIFF_MAGIC,
216 std::char_traits<char>::length(BSDIFF_MAGIC));
217 if (ret != 0) {
218 PATCH_LOGE("Failed to copy magic");
219 return ret;
220 }
221 headerLen = std::char_traits<char>::length(BSDIFF_MAGIC);
222 BlockBuffer data = {patchData_.data() + offset_ + headerLen, patchData_.size()};
223 WriteLE64(data, controlSize);
224 headerLen += sizeof(int64_t);
225 BlockBuffer diffData = {patchData_.data() + offset_ + headerLen, patchData_.size()};
226 WriteLE64(diffData, diffDataSize);
227 headerLen += sizeof(int64_t);
228 BlockBuffer newData = {patchData_.data() + offset_ + headerLen, patchData_.size()};
229 WriteLE64(newData, newSize);
230 headerLen += sizeof(int64_t);
231 return 0;
232 }
233
WritePatchHeader(int64_t controlSize,int64_t diffDataSize,int64_t newSize,size_t & headerLen)234 int32_t BlocksStreamDiff::WritePatchHeader(int64_t controlSize,
235 int64_t diffDataSize, int64_t newSize, size_t &headerLen)
236 {
237 PATCH_DEBUG("WritePatchHeader %zu", static_cast<size_t>(stream_.tellp()));
238 #ifdef __aarch64__
239 if (offset_ > static_cast<size_t>(numeric_limits<std::fstream::off_type>::max())) {
240 PATCH_LOGE("offset_ error");
241 return -1;
242 }
243 #endif
244 stream_.seekp(offset_, std::ios::beg);
245 stream_.write(BSDIFF_MAGIC, std::char_traits<char>::length(BSDIFF_MAGIC));
246 PkgBuffer buffer(sizeof(int64_t));
247 WriteLE64(buffer, controlSize);
248 stream_.write(reinterpret_cast<const char*>(buffer.buffer), sizeof(int64_t));
249 WriteLE64(buffer, diffDataSize);
250 stream_.write(reinterpret_cast<const char*>(buffer.buffer), sizeof(int64_t));
251 WriteLE64(buffer, newSize);
252 stream_.write(reinterpret_cast<const char*>(buffer.buffer), sizeof(int64_t));
253 headerLen = std::char_traits<char>::length(BSDIFF_MAGIC) + sizeof(int64_t) + sizeof(int64_t) + sizeof(int64_t);
254 stream_.seekp(0, std::ios::end);
255 return 0;
256 }
257
ComputeOldScore(const BlockBuffer & newInfo,const BlockBuffer & oldInfo,int64_t & oldScore,int64_t & matchLen)258 void BlocksDiff::ComputeOldScore(const BlockBuffer &newInfo,
259 const BlockBuffer &oldInfo, int64_t &oldScore, int64_t &matchLen)
260 {
261 int64_t newSize = static_cast<int64_t>(newInfo.length);
262 for (int64_t begin = currentOffset_ += matchLen; currentOffset_ < newSize; currentOffset_++) {
263 BlockBuffer newBuff = {newInfo.buffer + currentOffset_, newInfo.length - currentOffset_};
264 matchLen = suffixArray_->Search(newBuff, { oldInfo.buffer, oldInfo.length }, 0, oldInfo.length, matchPos_);
265 for (; begin < currentOffset_ + matchLen; begin++) {
266 if ((begin + lastOffset_ < static_cast<int64_t>(oldInfo.length))
267 && (oldInfo.buffer[begin + lastOffset_] == newInfo.buffer[begin])) {
268 oldScore++;
269 }
270 }
271 if (((matchLen == oldScore) && (matchLen != 0)) || (matchLen > (oldScore + BLOCK_SCORE))) {
272 break;
273 }
274 if ((currentOffset_ + lastOffset_ < static_cast<int64_t>(oldInfo.length)) &&
275 (oldInfo.buffer[currentOffset_ + lastOffset_] == newInfo.buffer[currentOffset_])) {
276 oldScore--;
277 }
278 }
279 }
280
ComputeLength(const BlockBuffer & newInfo,const BlockBuffer & oldInfo,int64_t & lengthFront,int64_t & lengthBack)281 void BlocksDiff::ComputeLength(const BlockBuffer &newInfo,
282 const BlockBuffer &oldInfo, int64_t &lengthFront, int64_t &lengthBack)
283 {
284 lengthFront = 0;
285 lengthBack = 0;
286 int64_t i = 0;
287 int64_t s = 0;
288 int64_t tmp = 0;
289 while (((lastScan_ + i) < currentOffset_) && ((lastPos_ + i) < static_cast<int64_t>(oldInfo.length))) {
290 if (oldInfo.buffer[lastPos_ + i] == newInfo.buffer[lastScan_ + i]) {
291 s++;
292 }
293 i++;
294 if ((s * MULTIPLE_TWO - i) > (tmp * MULTIPLE_TWO - lengthFront)) {
295 tmp = s;
296 lengthFront = i;
297 }
298 }
299 s = 0;
300 tmp = 0;
301 if (currentOffset_ < static_cast<int64_t>(newInfo.length)) {
302 for (i = 1; (currentOffset_ >= lastScan_ + i) && (matchPos_ >= i); i++) {
303 if (oldInfo.buffer[matchPos_ - i] == newInfo.buffer[currentOffset_ - i]) {
304 s++;
305 }
306 if ((s * MULTIPLE_TWO - i) > (tmp * MULTIPLE_TWO - lengthBack)) {
307 tmp = s;
308 lengthBack = i;
309 }
310 }
311 }
312
313 if (lastScan_ + lengthFront > currentOffset_ - lengthBack) {
314 int64_t lens = 0;
315 int64_t overlap = (lastScan_ + lengthFront) - (currentOffset_ - lengthBack);
316 s = 0;
317 tmp = 0;
318 for (i = 0; i < overlap; i++) {
319 if (newInfo.buffer[lastScan_ + lengthFront - overlap + i] ==
320 oldInfo.buffer[lastPos_ + lengthFront - overlap + i]) {
321 s++;
322 }
323 if (newInfo.buffer[currentOffset_ - lengthBack + i] == oldInfo.buffer[matchPos_ - lengthBack + i]) {
324 s--;
325 }
326 if (s > tmp) {
327 tmp = s;
328 lens = i + 1;
329 }
330 }
331 lengthFront += lens - overlap;
332 lengthBack -= lens;
333 }
334 }
335
GetCtrlDatas(const BlockBuffer & newInfo,const BlockBuffer & oldInfo,std::vector<ControlData> & controlDatas)336 int32_t BlocksDiff::GetCtrlDatas(const BlockBuffer &newInfo,
337 const BlockBuffer &oldInfo, std::vector<ControlData> &controlDatas)
338 {
339 int64_t matchLen = 0;
340 while (currentOffset_ < static_cast<int64_t>(newInfo.length)) {
341 int64_t oldScore = 0;
342 int64_t lenFront = 0;
343 int64_t lenBack = 0;
344 ComputeOldScore(newInfo, oldInfo, oldScore, matchLen);
345 if ((matchLen == oldScore) && (currentOffset_ != static_cast<int64_t>(newInfo.length))) {
346 continue;
347 }
348 ComputeLength(newInfo, oldInfo, lenFront, lenBack);
349
350 // save ctrl data
351 ControlData ctrlData;
352 ctrlData.diffLength = lenFront;
353 ctrlData.extraLength = (currentOffset_ - lenBack) - (lastScan_ + lenFront);
354 ctrlData.offsetIncrement = (matchPos_ - lenBack) - (lastPos_ + lenFront);
355 ctrlData.diffNewStart = &newInfo.buffer[lastScan_];
356 ctrlData.diffOldStart = &oldInfo.buffer[lastPos_];
357 ctrlData.extraNewStart = &newInfo.buffer[lastScan_ + lenFront];
358 controlDatas.push_back(ctrlData);
359 lastScan_ = currentOffset_ - lenBack;
360 lastPos_ = matchPos_ - lenBack;
361 lastOffset_ = matchPos_ - currentOffset_;
362 }
363 return 0;
364 }
365
WriteControlData(const std::vector<ControlData> controlDatas,size_t & patchSize)366 int32_t BlocksDiff::WriteControlData(const std::vector<ControlData> controlDatas, size_t &patchSize)
367 {
368 std::unique_ptr<DeflateAdapter> bzip2Adapter = CreateBZip2Adapter(patchSize);
369 if (bzip2Adapter == nullptr) {
370 PATCH_LOGE("Failed to create bzip2Adapter");
371 return -1;
372 }
373 bzip2Adapter->Open();
374 int32_t ret = 0;
375 uint8_t buffer[sizeof(int64_t)] = {0};
376 BlockBuffer srcData = {buffer, sizeof(int64_t)};
377 PATCH_DEBUG("WriteControlData patchSize %zu controlDatas %zu", patchSize, controlDatas.size());
378 std::vector<int64_t> data;
379 ON_SCOPE_EXIT(closeBzip2Adapter) {
380 bzip2Adapter->Close();
381 };
382 for (size_t i = 0; i < controlDatas.size(); i++) {
383 WriteLE64(srcData, controlDatas[i].diffLength);
384 ret = bzip2Adapter->WriteData(srcData);
385 if (ret != 0) {
386 PATCH_LOGE("Failed to write data");
387 return ret;
388 }
389 WriteLE64(srcData, controlDatas[i].extraLength);
390 ret = bzip2Adapter->WriteData(srcData);
391 if (ret != 0) {
392 PATCH_LOGE("Failed to write data");
393 return ret;
394 }
395 WriteLE64(srcData, controlDatas[i].offsetIncrement);
396 ret = bzip2Adapter->WriteData(srcData);
397 if (ret != 0) {
398 PATCH_LOGE("WriteControlData : Failed to write data");
399 return ret;
400 }
401 }
402 size_t dataSize = 0;
403 ret = bzip2Adapter->FlushData(dataSize);
404 if (ret != 0) {
405 PATCH_LOGE("Failed to FlushData %d", ret);
406 return ret;
407 }
408 patchSize += dataSize;
409 PATCH_DEBUG("WriteControlData exit patchSize %zu", patchSize);
410 return 0;
411 }
412
WriteDiffData(const std::vector<ControlData> controlDatas,size_t & patchSize)413 int32_t BlocksDiff::WriteDiffData(const std::vector<ControlData> controlDatas, size_t &patchSize)
414 {
415 PATCH_DEBUG("WriteDiffData patchSize %zu", patchSize);
416 std::unique_ptr<DeflateAdapter> bzip2Adapter = CreateBZip2Adapter(patchSize);
417 if (bzip2Adapter == nullptr) {
418 PATCH_LOGE("Failed to create bzip2Adapter");
419 return -1;
420 }
421 bzip2Adapter->Open();
422 ON_SCOPE_EXIT(closeBzip2Adapter) {
423 bzip2Adapter->Close();
424 };
425
426 std::vector<uint8_t> diffData(IGMDIFF_LIMIT_UNIT, 0);
427 int32_t ret = 0;
428 for (size_t i = 0; i < controlDatas.size(); i++) {
429 if (controlDatas[i].diffLength <= 0) {
430 continue;
431 }
432
433 int64_t offset = 0;
434 while (offset < controlDatas[i].diffLength) {
435 int64_t cpyLen = static_cast<int64_t>(IGMDIFF_LIMIT_UNIT);
436 cpyLen = (offset + cpyLen > controlDatas[i].diffLength) ? (controlDatas[i].diffLength - offset) : cpyLen;
437 for (int64_t k = 0; k < cpyLen; k++) {
438 diffData[k] = controlDatas[i].diffNewStart[offset + k] - controlDatas[i].diffOldStart[offset + k];
439 }
440
441 BlockBuffer srcData = {reinterpret_cast<uint8_t*>(diffData.data()), static_cast<size_t>(cpyLen)};
442 ret = bzip2Adapter->WriteData(srcData);
443 if (ret != 0) {
444 PATCH_LOGE("Failed to write data");
445 return ret;
446 }
447 offset += cpyLen;
448 }
449 }
450 size_t dataSize = 0;
451 ret = bzip2Adapter->FlushData(dataSize);
452 if (ret != 0) {
453 PATCH_LOGE("Failed to FlushData %d", ret);
454 return ret;
455 }
456 patchSize += dataSize;
457 PATCH_DEBUG("WriteDiffData exit patchSize %zu dataSize %zu ", patchSize, dataSize);
458 return 0;
459 }
460
WriteExtraData(const std::vector<ControlData> controlDatas,size_t & patchSize)461 int32_t BlocksDiff::WriteExtraData(const std::vector<ControlData> controlDatas, size_t &patchSize)
462 {
463 PATCH_DEBUG("WriteExtraData patchSize %zu ", patchSize);
464 std::unique_ptr<DeflateAdapter> bzip2Adapter = CreateBZip2Adapter(patchSize);
465 if (bzip2Adapter == nullptr) {
466 PATCH_LOGE("Failed to create bzip2Adapter");
467 return -1;
468 }
469 bzip2Adapter->Open();
470 ON_SCOPE_EXIT(closeBzip2Adapter) {
471 bzip2Adapter->Close();
472 };
473 int32_t ret = 0;
474 for (size_t i = 0; i < controlDatas.size(); i++) {
475 if (controlDatas[i].extraLength <= 0) {
476 continue;
477 }
478 BlockBuffer srcData = {controlDatas[i].extraNewStart, static_cast<size_t>(controlDatas[i].extraLength)};
479 ret = bzip2Adapter->WriteData(srcData);
480 if (ret != 0) {
481 PATCH_LOGE("WriteExtraData : Failed to write data");
482 return ret;
483 }
484 }
485 size_t dataSize = 0;
486 ret = bzip2Adapter->FlushData(dataSize);
487 if (ret != 0) {
488 PATCH_LOGE("Failed to FlushData %d", ret);
489 return ret;
490 }
491 patchSize += dataSize;
492 PATCH_DEBUG("WriteExtraData exit patchSize %zu", patchSize);
493 return 0;
494 }
495
496 template<class DataType>
Init(const BlockBuffer & oldInfo)497 void SuffixArray<DataType>::Init(const BlockBuffer &oldInfo)
498 {
499 std::vector<DataType> suffixArrayTemp;
500 std::vector<DataType> buckets;
501 InitBuckets(oldInfo, buckets, suffixArrayTemp);
502 DataType i = 0;
503 DataType h = 0;
504 DataType len = 0;
505 for (h = 1; suffixArray_[0] != -(static_cast<DataType>(oldInfo.length) + 1); h += h) {
506 len = 0;
507 for (i = 0; i < (static_cast<DataType>(oldInfo.length) + 1);) {
508 if (suffixArray_[i] < 0) {
509 len -= suffixArray_[i];
510 i -= suffixArray_[i];
511 } else {
512 suffixArray_[i - len] = len != 0 ? -len : suffixArray_[i - len];
513 len = suffixArrayTemp[suffixArray_[i]] + 1 - i;
514 Split(suffixArrayTemp, i, len, h);
515 i += len;
516 len = 0;
517 }
518 }
519 if (len != 0) {
520 suffixArray_[i - len] = -len;
521 }
522 }
523
524 for (i = 0; i < static_cast<DataType>(oldInfo.length) + 1; i++) {
525 suffixArray_[suffixArrayTemp[i]] = i;
526 }
527
528 PATCH_DEBUG("SuffixArray::Init %d finish", static_cast<int>(oldInfo.length));
529 }
530
531 template<class DataType>
SplitForLess(std::vector<DataType> & suffixArrayTemp,DataType start,DataType len,DataType h)532 void SuffixArray<DataType>::SplitForLess(std::vector<DataType> &suffixArrayTemp,
533 DataType start, DataType len, DataType h)
534 {
535 DataType j = 0;
536 for (DataType k = start; k < start + len; k += j) {
537 j = 1;
538 DataType x = suffixArrayTemp[suffixArray_[k] + h];
539 for (DataType i = 1; k + i < start + len; i++) {
540 if (suffixArrayTemp[suffixArray_[k + i] + h] < x) {
541 x = suffixArrayTemp[suffixArray_[k + i] + h];
542 j = 0;
543 }
544 if (suffixArrayTemp[suffixArray_[k + i] + h] == x) {
545 SWAP(suffixArray_[k + j], suffixArray_[k + i]);
546 j++;
547 }
548 }
549 for (DataType i = 0; i < j; i++) {
550 suffixArrayTemp[suffixArray_[k + i]] = k + j - 1;
551 }
552 if (j == 1) {
553 suffixArray_[k] = -1;
554 }
555 }
556 }
557
558 template<class DataType>
Split(std::vector<DataType> & suffixArrayTemp,DataType start,DataType len,DataType h)559 void SuffixArray<DataType>::Split(std::vector<DataType> &suffixArrayTemp, DataType start, DataType len, DataType h)
560 {
561 if (len < MIN_LENGTH) {
562 SplitForLess(suffixArrayTemp, start, len, h);
563 return;
564 }
565
566 DataType x = suffixArrayTemp[suffixArray_[start + len / MULTIPLE_TWO] + h];
567 DataType jj = 0;
568 DataType kk = 0;
569 for (DataType i = start; i < (start + len); i++) {
570 jj = (suffixArrayTemp[suffixArray_[i] + h] < x) ? (jj + 1) : jj;
571 kk = (suffixArrayTemp[suffixArray_[i] + h] == x) ? (kk + 1) : kk;
572 }
573 jj += start;
574 kk += jj;
575 DataType i = start;
576 DataType j = 0;
577 DataType k = 0;
578 while (i < jj) {
579 if (suffixArrayTemp[suffixArray_[i] + h] < x) {
580 i++;
581 } else if (suffixArrayTemp[suffixArray_[i] + h] == x) {
582 SWAP(suffixArray_[i], suffixArray_[jj + j]);
583 j++;
584 } else {
585 SWAP(suffixArray_[i], suffixArray_[kk + k]);
586 k++;
587 }
588 }
589 while (jj + j < kk) {
590 if (suffixArrayTemp[suffixArray_[jj + j] + h] == x) {
591 j++;
592 } else {
593 SWAP(suffixArray_[jj + j], suffixArray_[kk + k]);
594 k++;
595 }
596 }
597 if (jj > start) {
598 Split(suffixArrayTemp, start, jj - start, h);
599 }
600
601 for (i = 0; i < kk - jj; i++) {
602 suffixArrayTemp[suffixArray_[jj + i]] = kk - 1;
603 }
604 if (jj == kk - 1) {
605 suffixArray_[jj] = -1;
606 }
607 if (start + len > kk) {
608 Split(suffixArrayTemp, kk, start + len - kk, h);
609 }
610 }
611
612 template<class DataType>
MatchLength(const BlockBuffer & oldBuffer,const BlockBuffer & newBuffer) const613 int64_t SuffixArray<DataType>::MatchLength(const BlockBuffer &oldBuffer, const BlockBuffer &newBuffer) const
614 {
615 int64_t i = 0;
616 for (; (i < static_cast<int64_t>(oldBuffer.length)) && (i < static_cast<int64_t>(newBuffer.length)); i++) {
617 if (oldBuffer.buffer[i] != newBuffer.buffer[i]) {
618 break;
619 }
620 }
621 return i;
622 }
623
624 template<class DataType>
Search(const BlockBuffer & newInfo,const BlockBuffer & oldInfo,int64_t start,int64_t end,int64_t & pos) const625 int64_t SuffixArray<DataType>::Search(const BlockBuffer &newInfo,
626 const BlockBuffer &oldInfo, int64_t start, int64_t end, int64_t &pos) const
627 {
628 int64_t x = 0;
629 int64_t y = 0;
630 if ((end - start) < MULTIPLE_TWO) {
631 BlockBuffer oldStart = {oldInfo.buffer + suffixArray_[start], oldInfo.length - suffixArray_[start]};
632 BlockBuffer oldEnd = {oldInfo.buffer + suffixArray_[end], oldInfo.length - suffixArray_[end]};
633 x = MatchLength(oldStart, newInfo);
634 y = MatchLength(oldEnd, newInfo);
635 if (x > y) {
636 pos = suffixArray_[start];
637 return x;
638 } else {
639 pos = suffixArray_[end];
640 return y;
641 }
642 }
643 x = start + (end - start) / MULTIPLE_TWO;
644 if (memcmp(oldInfo.buffer + suffixArray_[x],
645 newInfo.buffer, MIN(oldInfo.length - suffixArray_[x], newInfo.length)) < 0) {
646 return Search(newInfo, oldInfo, x, end, pos);
647 } else {
648 return Search(newInfo, oldInfo, start, x, pos);
649 }
650 }
651
652 template<class DataType>
InitBuckets(const BlockBuffer & oldInfo,std::vector<DataType> & buckets,std::vector<DataType> & suffixArrayTemp)653 void SuffixArray<DataType>::InitBuckets(const BlockBuffer &oldInfo,
654 std::vector<DataType> &buckets, std::vector<DataType> &suffixArrayTemp)
655 {
656 suffixArray_.resize(oldInfo.length + 1, 0);
657 suffixArrayTemp.resize(oldInfo.length + 1, 0);
658 buckets.resize(BUCKET_SIZE, 0);
659
660 for (size_t i = 0; i < oldInfo.length; i++) {
661 buckets[oldInfo.buffer[i]]++;
662 }
663 for (size_t i = 1; i < buckets.size(); i++) {
664 buckets[i] += buckets[i - 1];
665 }
666 for (size_t i = buckets.size() - 1; i > 0; i--) {
667 buckets[i] = buckets[i - 1];
668 }
669 buckets[0] = 0;
670
671 DataType i;
672 for (i = 0; i < static_cast<DataType>(oldInfo.length); i++) {
673 suffixArray_[++buckets[oldInfo.buffer[i]]] = i;
674 }
675 suffixArray_[0] = oldInfo.length;
676
677 for (i = 0; i < static_cast<DataType>(oldInfo.length); i++) {
678 suffixArrayTemp[i] = buckets[oldInfo.buffer[i]];
679 }
680 suffixArrayTemp[oldInfo.length] = 0;
681
682 for (i = 1; i < static_cast<DataType>(BUCKET_SIZE); i++) {
683 if (buckets[i] == buckets[i - 1] + 1) {
684 suffixArray_[buckets[i]] = -1;
685 }
686 }
687 suffixArray_[0] = -1;
688 }
689 } // namespace UpdatePatch
690