• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_
6 #define BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_
7 
8 #include <string>
9 #include <vector>
10 
11 #include "base/base_export.h"
12 #include "base/strings/string16.h"
13 #include "base/strings/string_piece.h"
14 
15 namespace base {
16 
17 // A helper class and associated data structures to adjust offsets into a
18 // string in response to various adjustments one might do to that string
19 // (e.g., eliminating a range).  For details on offsets, see the comments by
20 // the AdjustOffsets() function below.
21 class BASE_EXPORT OffsetAdjuster {
22  public:
23   struct BASE_EXPORT Adjustment {
24     Adjustment(size_t original_offset,
25                size_t original_length,
26                size_t output_length);
27 
28     size_t original_offset;
29     size_t original_length;
30     size_t output_length;
31   };
32   typedef std::vector<Adjustment> Adjustments;
33 
34   // Adjusts all offsets in |offsets_for_adjustment| to reflect the adjustments
35   // recorded in |adjustments|.
36   //
37   // Offsets represents insertion/selection points between characters: if |src|
38   // is "abcd", then 0 is before 'a', 2 is between 'b' and 'c', and 4 is at the
39   // end of the string.  Valid input offsets range from 0 to |src_len|.  On
40   // exit, each offset will have been modified to point at the same logical
41   // position in the output string.  If an offset cannot be successfully
42   // adjusted (e.g., because it points into the middle of a multibyte sequence),
43   // it will be set to string16::npos.
44   static void AdjustOffsets(const Adjustments& adjustments,
45                             std::vector<size_t>* offsets_for_adjustment);
46 
47   // Adjusts the single |offset| to reflect the adjustments recorded in
48   // |adjustments|.
49   static void AdjustOffset(const Adjustments& adjustments,
50                            size_t* offset);
51 
52   // Adjusts all offsets in |offsets_for_unadjustment| to reflect the reverse
53   // of the adjustments recorded in |adjustments|.  In other words, the offsets
54   // provided represent offsets into an adjusted string and the caller wants
55   // to know the offsets they correspond to in the original string.  If an
56   // offset cannot be successfully unadjusted (e.g., because it points into
57   // the middle of a multibyte sequence), it will be set to string16::npos.
58   static void UnadjustOffsets(const Adjustments& adjustments,
59                               std::vector<size_t>* offsets_for_unadjustment);
60 
61   // Adjusts the single |offset| to reflect the reverse of the adjustments
62   // recorded in |adjustments|.
63   static void UnadjustOffset(const Adjustments& adjustments,
64                              size_t* offset);
65 
66   // Combines two sequential sets of adjustments, storing the combined revised
67   // adjustments in |adjustments_on_adjusted_string|.  That is, suppose a
68   // string was altered in some way, with the alterations recorded as
69   // adjustments in |first_adjustments|.  Then suppose the resulting string is
70   // further altered, with the alterations recorded as adjustments scored in
71   // |adjustments_on_adjusted_string|, with the offsets recorded in these
72   // adjustments being with respect to the intermediate string.  This function
73   // combines the two sets of adjustments into one, storing the result in
74   // |adjustments_on_adjusted_string|, whose offsets are correct with respect
75   // to the original string.
76   //
77   // Assumes both parameters are sorted by increasing offset.
78   //
79   // WARNING: Only supports |first_adjustments| that involve collapsing ranges
80   // of text, not expanding ranges.
81   static void MergeSequentialAdjustments(
82       const Adjustments& first_adjustments,
83       Adjustments* adjustments_on_adjusted_string);
84 };
85 
86 // Like the conversions in utf_string_conversions.h, but also fills in an
87 // |adjustments| parameter that reflects the alterations done to the string.
88 // It may be NULL.
89 BASE_EXPORT bool UTF8ToUTF16WithAdjustments(
90     const char* src,
91     size_t src_len,
92     string16* output,
93     base::OffsetAdjuster::Adjustments* adjustments);
94 BASE_EXPORT string16 UTF8ToUTF16WithAdjustments(
95     const base::StringPiece& utf8,
96     base::OffsetAdjuster::Adjustments* adjustments);
97 // As above, but instead internally examines the adjustments and applies them
98 // to |offsets_for_adjustment|.  See comments by AdjustOffsets().
99 BASE_EXPORT string16 UTF8ToUTF16AndAdjustOffsets(
100     const base::StringPiece& utf8,
101     std::vector<size_t>* offsets_for_adjustment);
102 
103 BASE_EXPORT std::string UTF16ToUTF8AndAdjustOffsets(
104     const base::StringPiece16& utf16,
105     std::vector<size_t>* offsets_for_adjustment);
106 
107 // Limiting function callable by std::for_each which will replace any value
108 // which is greater than |limit| with npos.  Typically this is called with a
109 // string length to clamp offsets into the string to [0, length] (as opposed to
110 // [0, length); see comments above).
111 template <typename T>
112 struct LimitOffset {
LimitOffsetLimitOffset113   explicit LimitOffset(size_t limit)
114     : limit_(limit) {}
115 
operatorLimitOffset116   void operator()(size_t& offset) {
117     if (offset > limit_)
118       offset = T::npos;
119   }
120 
121   size_t limit_;
122 };
123 
124 }  // namespace base
125 
126 #endif  // BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_
127