• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "sync/internal_api/public/base/unique_position.h"
6 
7 #include "base/basictypes.h"
8 #include "base/logging.h"
9 #include "base/stl_util.h"
10 #include "base/strings/string_number_conversions.h"
11 #include "sync/protocol/unique_position.pb.h"
12 #include "third_party/zlib/zlib.h"
13 
14 namespace syncer {
15 
16 const size_t UniquePosition::kSuffixLength = 28;
17 const size_t UniquePosition::kCompressBytesThreshold = 128;
18 
19 // static.
IsValidSuffix(const std::string & suffix)20 bool UniquePosition::IsValidSuffix(const std::string& suffix) {
21   // The suffix must be exactly the specified length, otherwise unique suffixes
22   // are not sufficient to guarantee unique positions (because prefix + suffix
23   // == p + refixsuffix).
24   return suffix.length() == kSuffixLength;
25 }
26 
27 // static.
IsValidBytes(const std::string & bytes)28 bool UniquePosition::IsValidBytes(const std::string& bytes) {
29   // The first condition ensures that our suffix uniqueness is sufficient to
30   // guarantee position uniqueness.  Otherwise, it's possible the end of some
31   // prefix + some short suffix == some long suffix.
32   // The second condition ensures that FindSmallerWithSuffix can always return a
33   // result.
34   return bytes.length() >= kSuffixLength
35       && bytes[bytes.length()-1] != 0;
36 }
37 
38 // static.
CreateInvalid()39 UniquePosition UniquePosition::CreateInvalid() {
40   UniquePosition pos;
41   DCHECK(!pos.IsValid());
42   return pos;
43 }
44 
45 // static.
FromProto(const sync_pb::UniquePosition & proto)46 UniquePosition UniquePosition::FromProto(const sync_pb::UniquePosition& proto) {
47   if (proto.has_custom_compressed_v1()) {
48     return UniquePosition(proto.custom_compressed_v1());
49   } else if (proto.has_value() && !proto.value().empty()) {
50     return UniquePosition(Compress(proto.value()));
51   } else if (proto.has_compressed_value() && proto.has_uncompressed_length()) {
52     uLongf uncompressed_len = proto.uncompressed_length();
53     std::string un_gzipped;
54 
55     un_gzipped.resize(uncompressed_len);
56     int result = uncompress(
57         reinterpret_cast<Bytef*>(string_as_array(&un_gzipped)),
58         &uncompressed_len,
59         reinterpret_cast<const Bytef*>(proto.compressed_value().data()),
60         proto.compressed_value().size());
61     if (result != Z_OK) {
62       DLOG(ERROR) << "Unzip failed " << result;
63       return UniquePosition::CreateInvalid();
64     }
65     if (uncompressed_len != proto.uncompressed_length()) {
66       DLOG(ERROR)
67           << "Uncompressed length " << uncompressed_len
68           << " did not match specified length " << proto.uncompressed_length();
69       return UniquePosition::CreateInvalid();
70     }
71     return UniquePosition(Compress(un_gzipped));
72   } else {
73     return UniquePosition::CreateInvalid();
74   }
75 }
76 
77 // static.
FromInt64(int64 x,const std::string & suffix)78 UniquePosition UniquePosition::FromInt64(
79     int64 x, const std::string& suffix) {
80   uint64 y = static_cast<uint64>(x);
81   y ^= 0x8000000000000000ULL; // Make it non-negative.
82   std::string bytes(8, 0);
83   for (int i = 7; i >= 0; --i) {
84     bytes[i] = static_cast<uint8>(y);
85     y >>= 8;
86   }
87   return UniquePosition(bytes + suffix, suffix);
88 }
89 
90 // static.
InitialPosition(const std::string & suffix)91 UniquePosition UniquePosition::InitialPosition(
92     const std::string& suffix) {
93   DCHECK(IsValidSuffix(suffix));
94   return UniquePosition(suffix, suffix);
95 }
96 
97 // static.
Before(const UniquePosition & x,const std::string & suffix)98 UniquePosition UniquePosition::Before(
99     const UniquePosition& x,
100     const std::string& suffix) {
101   DCHECK(IsValidSuffix(suffix));
102   DCHECK(x.IsValid());
103   const std::string& before = FindSmallerWithSuffix(
104       Uncompress(x.compressed_), suffix);
105   return UniquePosition(before + suffix, suffix);
106 }
107 
108 // static.
After(const UniquePosition & x,const std::string & suffix)109 UniquePosition UniquePosition::After(
110     const UniquePosition& x,
111     const std::string& suffix) {
112   DCHECK(IsValidSuffix(suffix));
113   DCHECK(x.IsValid());
114   const std::string& after = FindGreaterWithSuffix(
115       Uncompress(x.compressed_), suffix);
116   return UniquePosition(after + suffix, suffix);
117 }
118 
119 // static.
Between(const UniquePosition & before,const UniquePosition & after,const std::string & suffix)120 UniquePosition UniquePosition::Between(
121     const UniquePosition& before,
122     const UniquePosition& after,
123     const std::string& suffix) {
124   DCHECK(before.IsValid());
125   DCHECK(after.IsValid());
126   DCHECK(before.LessThan(after));
127   DCHECK(IsValidSuffix(suffix));
128   const std::string& mid = FindBetweenWithSuffix(
129       Uncompress(before.compressed_),
130       Uncompress(after.compressed_),
131       suffix);
132   return UniquePosition(mid + suffix, suffix);
133 }
134 
UniquePosition()135 UniquePosition::UniquePosition() : is_valid_(false) {}
136 
LessThan(const UniquePosition & other) const137 bool UniquePosition::LessThan(const UniquePosition& other) const {
138   DCHECK(this->IsValid());
139   DCHECK(other.IsValid());
140 
141   return compressed_ < other.compressed_;
142 }
143 
Equals(const UniquePosition & other) const144 bool UniquePosition::Equals(const UniquePosition& other) const {
145   if (!this->IsValid() && !other.IsValid())
146     return true;
147 
148   return compressed_ == other.compressed_;
149 }
150 
ToProto(sync_pb::UniquePosition * proto) const151 void UniquePosition::ToProto(sync_pb::UniquePosition* proto) const {
152   proto->Clear();
153 
154   // This is the current preferred foramt.
155   proto->set_custom_compressed_v1(compressed_);
156 
157   // Older clients used to write other formats.  We don't bother doing that
158   // anymore because that form of backwards compatibility is expensive.  We no
159   // longer want to pay that price just too support clients that have been
160   // obsolete for a long time.  See the proto definition for details.
161 }
162 
SerializeToString(std::string * blob) const163 void UniquePosition::SerializeToString(std::string* blob) const {
164   DCHECK(blob);
165   sync_pb::UniquePosition proto;
166   ToProto(&proto);
167   proto.SerializeToString(blob);
168 }
169 
ToInt64() const170 int64 UniquePosition::ToInt64() const {
171   uint64 y = 0;
172   const std::string& s = Uncompress(compressed_);
173   size_t l = sizeof(int64);
174   if (s.length() < l) {
175     NOTREACHED();
176     l = s.length();
177   }
178   for (size_t i = 0; i < l; ++i) {
179     const uint8 byte = s[l - i - 1];
180     y |= static_cast<uint64>(byte) << (i * 8);
181   }
182   y ^= 0x8000000000000000ULL;
183   // This is technically implementation-defined if y > INT64_MAX, so
184   // we're assuming that we're on a twos-complement machine.
185   return static_cast<int64>(y);
186 }
187 
IsValid() const188 bool UniquePosition::IsValid() const {
189   return is_valid_;
190 }
191 
ToDebugString() const192 std::string UniquePosition::ToDebugString() const {
193   const std::string bytes = Uncompress(compressed_);
194   if (bytes.empty())
195     return std::string("INVALID[]");
196 
197   std::string debug_string = base::HexEncode(bytes.data(), bytes.length());
198   if (!IsValid()) {
199     debug_string = "INVALID[" + debug_string + "]";
200   }
201 
202   std::string compressed_string =
203       base::HexEncode(compressed_.data(), compressed_.length());
204   debug_string.append(", compressed: " + compressed_string);
205   return debug_string;
206 }
207 
GetSuffixForTest() const208 std::string UniquePosition::GetSuffixForTest() const {
209   const std::string bytes = Uncompress(compressed_);
210   const size_t prefix_len = bytes.length() - kSuffixLength;
211   return bytes.substr(prefix_len, std::string::npos);
212 }
213 
FindSmallerWithSuffix(const std::string & reference,const std::string & suffix)214 std::string UniquePosition::FindSmallerWithSuffix(
215     const std::string& reference,
216     const std::string& suffix) {
217   size_t ref_zeroes = reference.find_first_not_of('\0');
218   size_t suffix_zeroes = suffix.find_first_not_of('\0');
219 
220   // Neither of our inputs are allowed to have trailing zeroes, so the following
221   // must be true.
222   DCHECK_NE(ref_zeroes, std::string::npos);
223   DCHECK_NE(suffix_zeroes, std::string::npos);
224 
225   if (suffix_zeroes > ref_zeroes) {
226     // Implies suffix < ref.
227     return std::string();
228   }
229 
230   if (suffix.substr(suffix_zeroes) < reference.substr(ref_zeroes)) {
231     // Prepend zeroes so the result has as many zero digits as |reference|.
232     return std::string(ref_zeroes - suffix_zeroes, '\0');
233   } else if (suffix_zeroes > 1) {
234     // Prepend zeroes so the result has one more zero digit than |reference|.
235     // We could also take the "else" branch below, but taking this branch will
236     // give us a smaller result.
237     return std::string(ref_zeroes - suffix_zeroes + 1, '\0');
238   } else {
239     // Prepend zeroes to match those in the |reference|, then something smaller
240     // than the first non-zero digit in |reference|.
241     char lt_digit = static_cast<uint8>(reference[ref_zeroes])/2;
242     return std::string(ref_zeroes, '\0') + lt_digit;
243   }
244 }
245 
246 // static
FindGreaterWithSuffix(const std::string & reference,const std::string & suffix)247 std::string UniquePosition::FindGreaterWithSuffix(
248     const std::string& reference,
249     const std::string& suffix) {
250   size_t ref_FFs = reference.find_first_not_of(kuint8max);
251   size_t suffix_FFs = suffix.find_first_not_of(kuint8max);
252 
253   if (ref_FFs == std::string::npos) {
254     ref_FFs = reference.length();
255   }
256   if (suffix_FFs == std::string::npos) {
257     suffix_FFs = suffix.length();
258   }
259 
260   if (suffix_FFs > ref_FFs) {
261     // Implies suffix > reference.
262     return std::string();
263   }
264 
265   if (suffix.substr(suffix_FFs) > reference.substr(ref_FFs)) {
266     // Prepend FF digits to match those in |reference|.
267     return std::string(ref_FFs - suffix_FFs, kuint8max);
268   } else if (suffix_FFs > 1) {
269     // Prepend enough leading FF digits so result has one more of them than
270     // |reference| does.  We could also take the "else" branch below, but this
271     // gives us a smaller result.
272     return std::string(ref_FFs - suffix_FFs + 1, kuint8max);
273   } else {
274     // Prepend FF digits to match those in |reference|, then something larger
275     // than the first non-FF digit in |reference|.
276     char gt_digit = static_cast<uint8>(reference[ref_FFs]) +
277         (kuint8max - static_cast<uint8>(reference[ref_FFs]) + 1) / 2;
278     return std::string(ref_FFs, kuint8max) + gt_digit;
279   }
280 }
281 
282 // static
FindBetweenWithSuffix(const std::string & before,const std::string & after,const std::string & suffix)283 std::string UniquePosition::FindBetweenWithSuffix(
284     const std::string& before,
285     const std::string& after,
286     const std::string& suffix) {
287   DCHECK(IsValidSuffix(suffix));
288   DCHECK_NE(before, after);
289   DCHECK_LT(before, after);
290 
291   std::string mid;
292 
293   // Sometimes our suffix puts us where we want to be.
294   if (before < suffix && suffix < after) {
295     return std::string();
296   }
297 
298   size_t i = 0;
299   for ( ; i < std::min(before.length(), after.length()); ++i) {
300     uint8 a_digit = before[i];
301     uint8 b_digit = after[i];
302 
303     if (b_digit - a_digit >= 2) {
304       mid.push_back(a_digit + (b_digit - a_digit)/2);
305       return mid;
306     } else if (a_digit == b_digit) {
307       mid.push_back(a_digit);
308 
309       // Both strings are equal so far.  Will appending the suffix at this point
310       // give us the comparison we're looking for?
311       if (before.substr(i+1) < suffix && suffix < after.substr(i+1)) {
312         return mid;
313       }
314     } else {
315       DCHECK_EQ(b_digit - a_digit, 1);  // Implied by above if branches.
316 
317       // The two options are off by one digit.  The choice of whether to round
318       // up or down here will have consequences on what we do with the remaining
319       // digits.  Exploring both options is an optimization and is not required
320       // for the correctness of this algorithm.
321 
322       // Option A: Round down the current digit.  This makes our |mid| <
323       // |after|, no matter what we append afterwards.  We then focus on
324       // appending digits until |mid| > |before|.
325       std::string mid_a = mid;
326       mid_a.push_back(a_digit);
327       mid_a.append(FindGreaterWithSuffix(before.substr(i+1), suffix));
328 
329       // Option B: Round up the current digit.  This makes our |mid| > |before|,
330       // no matter what we append afterwards.  We then focus on appending digits
331       // until |mid| < |after|.  Note that this option may not be viable if the
332       // current digit is the last one in |after|, so we skip the option in that
333       // case.
334       if (after.length() > i+1) {
335         std::string mid_b = mid;
336         mid_b.push_back(b_digit);
337         mid_b.append(FindSmallerWithSuffix(after.substr(i+1), suffix));
338 
339         // Does this give us a shorter position value?  If so, use it.
340         if (mid_b.length() < mid_a.length()) {
341           return mid_b;
342         }
343       }
344       return mid_a;
345     }
346   }
347 
348   // If we haven't found a midpoint yet, the following must be true:
349   DCHECK_EQ(before.substr(0, i), after.substr(0, i));
350   DCHECK_EQ(before, mid);
351   DCHECK_LT(before.length(), after.length());
352 
353   // We know that we'll need to append at least one more byte to |mid| in the
354   // process of making it < |after|.  Appending any digit, regardless of the
355   // value, will make |before| < |mid|.  Therefore, the following will get us a
356   // valid position.
357 
358   mid.append(FindSmallerWithSuffix(after.substr(i), suffix));
359   return mid;
360 }
361 
UniquePosition(const std::string & internal_rep)362 UniquePosition::UniquePosition(const std::string& internal_rep)
363     : compressed_(internal_rep),
364       is_valid_(IsValidBytes(Uncompress(internal_rep))) {
365 }
366 
UniquePosition(const std::string & uncompressed,const std::string & suffix)367 UniquePosition::UniquePosition(
368     const std::string& uncompressed,
369     const std::string& suffix)
370   : compressed_(Compress(uncompressed)),
371     is_valid_(IsValidBytes(uncompressed)) {
372   DCHECK(uncompressed.rfind(suffix) + kSuffixLength == uncompressed.length());
373   DCHECK(IsValidSuffix(suffix));
374   DCHECK(IsValid());
375 }
376 
377 // On custom compression:
378 //
379 // Let C(x) be the compression function and U(x) be the uncompression function.
380 //
381 // This compression scheme has a few special properties.  For one, it is
382 // order-preserving.  For any two valid position strings x and y:
383 //   x < y <=> C(x) < C(y)
384 // This allows us keep the position strings compressed as we sort them.
385 //
386 // The compressed format and the decode algorithm:
387 //
388 // The compressed string is a series of blocks, almost all of which are 8 bytes
389 // in length.  The only exception is the last block in the compressed string,
390 // which may be a remainder block, which has length no greater than 7.  The
391 // full-length blocks are either repeated character blocks or plain data blocks.
392 // All blocks are entirely self-contained.  Their decoded values are independent
393 // from that of their neighbours.
394 //
395 // A repeated character block is encoded into eight bytes and represents between
396 // 4 and 2^31 repeated instances of a given character in the unencoded stream.
397 // The encoding consists of a single character repeated four times, followed by
398 // an encoded count.  The encoded count is stored as a big-endian 32 bit
399 // integer.  There are 2^31 possible count values, and two encodings for each.
400 // The high encoding is 'enc = kuint32max - count'; the low encoding is 'enc =
401 // count'.  At compression time, the algorithm will choose between the two
402 // encodings based on which of the two will maintain the appropriate sort
403 // ordering (by a process which will be described below).  The decompression
404 // algorithm need not concern itself with which encoding was used; it needs only
405 // to decode it.  The decoded value of this block is "count" instances of the
406 // character that was repeated four times in the first half of this block.
407 //
408 // A plain data block is encoded into eight bytes and represents exactly eight
409 // bytes of data in the unencoded stream.  The plain data block must not begin
410 // with the same character repeated four times.  It is allowed to contain such a
411 // four-character sequence, just not at the start of the block.  The decoded
412 // value of a plain data block is identical to its encoded value.
413 //
414 // A remainder block has length of at most seven.  It is a shorter version of
415 // the plain data block.  It occurs only at the end of the encoded stream and
416 // represents exactly as many bytes of unencoded data as its own length.  Like a
417 // plain data block, the remainder block never begins with the same character
418 // repeated four times.  The decoded value of this block is identical to its
419 // encoded value.
420 //
421 // The encode algorithm:
422 //
423 // From the above description, it can be seen that there may be more than one
424 // way to encode a given input string.  The encoder must be careful to choose
425 // the encoding that guarantees sort ordering.
426 //
427 // The rules for the encoder are as follows:
428 // 1. Iterate through the input string and produce output blocks one at a time.
429 // 2. Where possible (ie. where the next four bytes of input consist of the
430 //    same character repeated four times), produce a repeated data block of
431 //    maximum possible length.
432 // 3. If there is at least 8 bytes of data remaining and it is not possible
433 //    to produce a repeated character block, produce a plain data block.
434 // 4. If there are less than 8 bytes of data remaining and it is not possible
435 //    to produce a repeated character block, produce a remainder block.
436 // 5. When producing a repeated character block, the count encoding must be
437 //    chosen in such a way that the sort ordering is maintained.  The choice is
438 //    best illustrated by way of example:
439 //
440 //      When comparing two strings, the first of which begins with of 8
441 //      instances of the letter 'B' and the second with 10 instances of the
442 //      letter 'B', which of the two should compare lower?  The result depends
443 //      on the 9th character of the first string, since it will be compared
444 //      against the 9th 'B' in the second string.  If that character is an 'A',
445 //      then the first string will compare lower.  If it is a 'C', then the
446 //      first string will compare higher.
447 //
448 //    The key insight is that the comparison value of a repeated character block
449 //    depends on the value of the character that follows it.  If the character
450 //    follows the repeated character has a value greater than the repeated
451 //    character itself, then a shorter run length should translate to a higher
452 //    comparison value.  Therefore, we encode its count using the low encoding.
453 //    Similarly, if the following character is lower, we use the high encoding.
454 
455 namespace {
456 
457 // Appends an encoded run length to |output_str|.
WriteEncodedRunLength(uint32 length,bool high_encoding,std::string * output_str)458 static void WriteEncodedRunLength(uint32 length,
459                                   bool high_encoding,
460                                   std::string* output_str) {
461   CHECK_GE(length, 4U);
462   CHECK_LT(length, 0x80000000);
463 
464   // Step 1: Invert the count, if necessary, to account for the following digit.
465   uint32 encoded_length;
466   if (high_encoding) {
467     encoded_length = 0xffffffff - length;
468   } else {
469     encoded_length = length;
470   }
471 
472   // Step 2: Write it as big-endian so it compares correctly with memcmp(3).
473   output_str->append(1, 0xff & (encoded_length >> 24U));
474   output_str->append(1, 0xff & (encoded_length >> 16U));
475   output_str->append(1, 0xff & (encoded_length >> 8U));
476   output_str->append(1, 0xff & (encoded_length >> 0U));
477 }
478 
479 // Reads an encoded run length for |str| at position |i|.
ReadEncodedRunLength(const std::string & str,size_t i)480 static uint32 ReadEncodedRunLength(const std::string& str, size_t i) {
481   DCHECK_LE(i + 4, str.length());
482 
483   // Step 1: Extract the big-endian count.
484   uint32 encoded_length =
485       ((uint8)(str[i+3]) << 0)  |
486       ((uint8)(str[i+2]) << 8)  |
487       ((uint8)(str[i+1]) << 16) |
488       ((uint8)(str[i+0]) << 24);
489 
490   // Step 2: If this was an inverted count, un-invert it.
491   uint32 length;
492   if (encoded_length & 0x80000000) {
493     length = 0xffffffff - encoded_length;
494   } else {
495     length = encoded_length;
496   }
497 
498   return length;
499 }
500 
501 // A series of four identical chars at the beginning of a block indicates
502 // the beginning of a repeated character block.
IsRepeatedCharPrefix(const std::string & chars,size_t start_index)503 static bool IsRepeatedCharPrefix(const std::string& chars, size_t start_index) {
504   return chars[start_index] == chars[start_index+1]
505       && chars[start_index] == chars[start_index+2]
506       && chars[start_index] == chars[start_index+3];
507 }
508 
509 }  // namespace
510 
511 // static
512 // Wraps the CompressImpl function with a bunch of DCHECKs.
Compress(const std::string & str)513 std::string UniquePosition::Compress(const std::string& str) {
514   DCHECK(IsValidBytes(str));
515   std::string compressed = CompressImpl(str);
516   DCHECK(IsValidCompressed(compressed));
517   DCHECK_EQ(str, Uncompress(compressed));
518   return compressed;
519 }
520 
521 // static
522 // Performs the order preserving run length compression of a given input string.
CompressImpl(const std::string & str)523 std::string UniquePosition::CompressImpl(const std::string& str) {
524   std::string output;
525 
526   // The compressed length will usually be at least as long as the suffix (28),
527   // since the suffix bytes are mostly random.  Most are a few bytes longer; a
528   // small few are tens of bytes longer.  Some early tests indicated that
529   // roughly 99% had length 40 or smaller.  We guess that pre-sizing for 48 is a
530   // good trade-off, but that has not been confirmed through profiling.
531   output.reserve(48);
532 
533   // Each loop iteration will consume 8, or N bytes, where N >= 4 and is the
534   // length of a string of identical digits starting at i.
535   for (size_t i = 0; i < str.length(); ) {
536     if (i + 4 <= str.length() && IsRepeatedCharPrefix(str, i)) {
537       // Four identical bytes in a row at this position means that we must start
538       // a repeated character block.  Begin by outputting those four bytes.
539       output.append(str, i, 4);
540 
541       // Determine the size of the run.
542       const char rep_digit = str[i];
543       const size_t runs_until = str.find_first_not_of(rep_digit, i+4);
544 
545       // Handle the 'runs until end' special case specially.
546       size_t run_length;
547       bool encode_high;  // True if the next byte is greater than |rep_digit|.
548       if (runs_until == std::string::npos) {
549         run_length = str.length() - i;
550         encode_high = false;
551       } else {
552         run_length = runs_until - i;
553         encode_high = static_cast<uint8>(str[runs_until]) >
554             static_cast<uint8>(rep_digit);
555       }
556       DCHECK_LT(run_length, static_cast<size_t>(kint32max))
557           << "This implementation can't encode run-lengths greater than 2^31.";
558 
559       WriteEncodedRunLength(run_length, encode_high, &output);
560       i += run_length;  // Jump forward by the size of the run length.
561     } else {
562       // Output up to eight bytes without any encoding.
563       const size_t len = std::min(static_cast<size_t>(8), str.length() - i);
564       output.append(str, i, len);
565       i += len;  // Jump forward by the amount of input consumed (usually 8).
566     }
567   }
568 
569   return output;
570 }
571 
572 // static
573 // Uncompresses strings that were compresed with UniquePosition::Compress.
Uncompress(const std::string & str)574 std::string UniquePosition::Uncompress(const std::string& str) {
575   std::string output;
576   size_t i = 0;
577   // Iterate through the compressed string one block at a time.
578   for (i = 0; i + 8 <= str.length(); i += 8) {
579     if (IsRepeatedCharPrefix(str, i)) {
580       // Found a repeated character block.  Expand it.
581       const char rep_digit = str[i];
582       uint32 length = ReadEncodedRunLength(str, i+4);
583       output.append(length, rep_digit);
584     } else {
585       // Found a regular block.  Copy it.
586       output.append(str, i, 8);
587     }
588   }
589   // Copy the remaining bytes that were too small to form a block.
590   output.append(str, i, std::string::npos);
591   return output;
592 }
593 
IsValidCompressed(const std::string & str)594 bool UniquePosition::IsValidCompressed(const std::string& str) {
595   for (size_t i = 0; i + 8 <= str.length(); i += 8) {
596     if (IsRepeatedCharPrefix(str, i)) {
597       uint32 count = ReadEncodedRunLength(str, i+4);
598       if (count < 4) {
599         // A repeated character block should at least represent the four
600         // characters that started it.
601         return false;
602       }
603       if (str[i] == str[i+4]) {
604         // Does the next digit after a count match the repeated character?  Then
605         // this is not the highest possible count.
606         return false;
607       }
608     }
609   }
610   // We don't bother looking for the existence or checking the validity of
611   // any partial blocks.  There's no way they could be invalid anyway.
612   return true;
613 }
614 
615 }  // namespace syncer
616