1 // Boost string_algo library example file ---------------------------------//
2
3 // Copyright Pavol Droba 2002-2003. Use, modification and
4 // distribution is subject to the Boost Software License, Version
5 // 1.0. (See accompanying file LICENSE_1_0.txt or copy at
6 // http://www.boost.org/LICENSE_1_0.txt)
7
8 // See http://www.boost.org for updates, documentation, and revision history.
9
10 /*
11 RLE compression using replace framework. Goal is to compress a sequence of
12 repeating characters into 3 bytes ( repeat mark, character and repetition count ).
13 For simplification, it works only on numeric-value sequences.
14 */
15
16 #include <string>
17 #include <iostream>
18 #include <limits>
19 #include <iterator>
20 #include <boost/algorithm/string/find_format.hpp>
21 #include <boost/algorithm/string/finder.hpp>
22
23 using namespace std;
24 using namespace boost;
25
26 // replace mark specification, specialize for a specific element type
repeat_mark()27 template< typename T > T repeat_mark() { return (std::numeric_limits<T>::max)(); };
28
29 // Compression -----------------------------------------------------------------------
30
31
32 // compress finder -rle
33 /*
34 Find a sequence which can be compressed. It has to be at least 3-character long
35 sequence of repetitive characters
36 */
37 struct find_compressF
38 {
39 // Construction
find_compressFfind_compressF40 find_compressF() {}
41
42 // Operation
43 template<typename ForwardIteratorT>
operator ()find_compressF44 iterator_range<ForwardIteratorT> operator()(
45 ForwardIteratorT Begin,
46 ForwardIteratorT End ) const
47 {
48 typedef ForwardIteratorT input_iterator_type;
49 typedef typename std::iterator_traits<input_iterator_type>::value_type value_type;
50 typedef iterator_range<input_iterator_type> result_type;
51
52 // begin of the matching segment
53 input_iterator_type MStart=End;
54 // Repetition counter
55 value_type Cnt=0;
56
57 // Search for a sequence of repetitive characters
58 for(input_iterator_type It=Begin; It!=End;)
59 {
60 input_iterator_type It2=It++;
61
62 if ( It==End || Cnt>=(std::numeric_limits<value_type>::max)() )
63 {
64 return result_type( MStart, It );
65 }
66
67 if ( *It==*It2 )
68 {
69 if ( MStart==End )
70 {
71 // Mark the start
72 MStart=It2;
73 }
74
75 // Increate repetition counter
76 Cnt++;
77 }
78 else
79 {
80 if ( MStart!=End )
81 {
82 if ( Cnt>2 )
83 return result_type( MStart, It );
84 else
85 {
86 MStart=End;
87 Cnt=0;
88 }
89 }
90 }
91 }
92
93 return result_type( End, End );
94 }
95 };
96
97 // rle compress format
98 /*
99 Transform a sequence into repeat mark, character and count
100 */
101 template<typename SeqT>
102 struct format_compressF
103 {
104 private:
105 typedef SeqT result_type;
106 typedef typename SeqT::value_type value_type;
107
108 public:
109 // Construction
format_compressFformat_compressF110 format_compressF() {};
111
112 // Operation
113 template< typename ReplaceT >
operator ()format_compressF114 result_type operator()( const ReplaceT& Replace ) const
115 {
116 SeqT r;
117 if(!Replace.empty())
118 {
119 r.push_back( repeat_mark<value_type>() );
120 r.push_back( *(Replace.begin()) );
121 r.push_back( value_type( Replace.size() ) );
122 }
123
124 return r;
125 }
126 };
127
128 // Decompression -----------------------------------------------------------------------
129
130
131 // find decompress-rle functor
132 /*
133 find a repetition block
134 */
135 struct find_decompressF
136 {
137 // Construction
find_decompressFfind_decompressF138 find_decompressF() {}
139
140 // Operation
141 template<typename ForwardIteratorT>
operator ()find_decompressF142 iterator_range<ForwardIteratorT> operator()(
143 ForwardIteratorT Begin,
144 ForwardIteratorT End ) const
145 {
146 typedef ForwardIteratorT input_iterator_type;
147 typedef typename std::iterator_traits<input_iterator_type>::value_type value_type;
148 typedef iterator_range<input_iterator_type> result_type;
149
150 for(input_iterator_type It=Begin; It!=End; It++)
151 {
152 if( *It==repeat_mark<value_type>() )
153 {
154 // Repeat mark found, extract body
155 input_iterator_type It2=It++;
156
157 if ( It==End ) break;
158 It++;
159 if ( It==End ) break;
160 It++;
161
162 return result_type( It2, It );
163 }
164 }
165
166 return result_type( End, End );
167 }
168 };
169
170 // rle decompress format
171 /*
172 transform a repetition block into a sequence of characters
173 */
174 template< typename SeqT >
175 struct format_decompressF
176 {
177 private:
178 typedef SeqT result_type;
179 typedef typename SeqT::value_type value_type;
180
181 public:
182 // Construction
format_decompressFformat_decompressF183 format_decompressF() {};
184
185 // Operation
186 template< typename ReplaceT >
operator ()format_decompressF187 result_type operator()( const ReplaceT& Replace ) const
188 {
189 SeqT r;
190
191 if(!Replace.empty())
192 {
193 // extract info
194 typename ReplaceT::const_iterator It=Replace.begin();
195
196 value_type Value=*(++It);
197 value_type Repeat=*(++It);
198
199 for( value_type Index=0; Index<Repeat; Index++ ) r.push_back( Value );
200 }
201
202 return r;
203 }
204 };
205
206
main()207 int main()
208 {
209 cout << "* RLE Compression Example *" << endl << endl;
210
211 string original("123_AA_*ZZZZZZZZZZZZZZ*34");
212
213 // copy compression
214 string compress=find_format_all_copy(
215 original,
216 find_compressF(),
217 format_compressF<string>() );
218
219 cout << "Compressed string: " << compress << endl;
220
221 // Copy decompression
222 string decompress=find_format_all_copy(
223 compress,
224 find_decompressF(),
225 format_decompressF<string>() );
226
227 cout << "Decompressed string: " << decompress << endl;
228
229 // in-place compression
230 find_format_all(
231 original,
232 find_compressF(),
233 format_compressF<string>() );
234
235 cout << "Compressed string: " << original << endl;
236
237 // in-place decompression
238 find_format_all(
239 original,
240 find_decompressF(),
241 format_decompressF<string>() );
242
243 cout << "Decompressed string: " << original << endl;
244
245 cout << endl;
246
247 return 0;
248 }
249