• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //  Boost string_algo library example file  ---------------------------------//
2 
3 //  Copyright Pavol Droba 2002-2003. Use, modification and
4 //  distribution is subject to the Boost Software License, Version
5 //  1.0. (See accompanying file LICENSE_1_0.txt or copy at
6 //  http://www.boost.org/LICENSE_1_0.txt)
7 
8 //  See http://www.boost.org for updates, documentation, and revision history.
9 
10 /*
11     RLE compression using replace framework. Goal is to compress a sequence of
12     repeating characters into 3 bytes ( repeat mark, character and repetition count ).
13     For simplification, it works only on numeric-value sequences.
14 */
15 
16 #include <string>
17 #include <iostream>
18 #include <limits>
19 #include <iterator>
20 #include <boost/algorithm/string/find_format.hpp>
21 #include <boost/algorithm/string/finder.hpp>
22 
23 using namespace std;
24 using namespace boost;
25 
26 // replace mark specification, specialize for a specific element type
repeat_mark()27 template< typename T > T repeat_mark() { return (std::numeric_limits<T>::max)(); };
28 
29 // Compression  -----------------------------------------------------------------------
30 
31 
32 // compress finder -rle
33 /*
34     Find a sequence which can be compressed. It has to be at least 3-character long
35     sequence of repetitive characters
36 */
37 struct find_compressF
38 {
39     // Construction
find_compressFfind_compressF40     find_compressF() {}
41 
42     // Operation
43     template<typename ForwardIteratorT>
operator ()find_compressF44     iterator_range<ForwardIteratorT> operator()(
45         ForwardIteratorT Begin,
46         ForwardIteratorT End ) const
47     {
48         typedef ForwardIteratorT input_iterator_type;
49         typedef typename std::iterator_traits<input_iterator_type>::value_type value_type;
50         typedef iterator_range<input_iterator_type> result_type;
51 
52         // begin of the matching segment
53         input_iterator_type MStart=End;
54         // Repetition counter
55         value_type Cnt=0;
56 
57         // Search for a sequence of repetitive characters
58         for(input_iterator_type It=Begin; It!=End;)
59         {
60             input_iterator_type It2=It++;
61 
62             if ( It==End || Cnt>=(std::numeric_limits<value_type>::max)() )
63             {
64                 return result_type( MStart, It );
65             }
66 
67             if ( *It==*It2 )
68             {
69                 if ( MStart==End )
70                 {
71                     // Mark the start
72                     MStart=It2;
73                 }
74 
75                 // Increate repetition counter
76                 Cnt++;
77             }
78             else
79             {
80                 if ( MStart!=End )
81                 {
82                     if ( Cnt>2 )
83                         return result_type( MStart, It );
84                     else
85                     {
86                         MStart=End;
87                         Cnt=0;
88                     }
89                 }
90             }
91         }
92 
93         return result_type( End, End );
94     }
95 };
96 
97 // rle compress format
98 /*
99     Transform a sequence into repeat mark, character and count
100 */
101 template<typename SeqT>
102 struct format_compressF
103 {
104 private:
105     typedef SeqT result_type;
106     typedef typename SeqT::value_type value_type;
107 
108 public:
109     // Construction
format_compressFformat_compressF110     format_compressF() {};
111 
112     // Operation
113     template< typename ReplaceT >
operator ()format_compressF114     result_type operator()( const ReplaceT& Replace ) const
115     {
116         SeqT r;
117         if(!Replace.empty())
118         {
119             r.push_back( repeat_mark<value_type>() );
120             r.push_back( *(Replace.begin()) );
121             r.push_back( value_type( Replace.size() ) );
122         }
123 
124         return r;
125     }
126 };
127 
128 // Decompression  -----------------------------------------------------------------------
129 
130 
131 // find decompress-rle functor
132 /*
133     find a repetition block
134 */
135 struct find_decompressF
136 {
137     // Construction
find_decompressFfind_decompressF138     find_decompressF() {}
139 
140     // Operation
141     template<typename ForwardIteratorT>
operator ()find_decompressF142     iterator_range<ForwardIteratorT> operator()(
143         ForwardIteratorT Begin,
144         ForwardIteratorT End ) const
145     {
146         typedef ForwardIteratorT input_iterator_type;
147         typedef typename std::iterator_traits<input_iterator_type>::value_type value_type;
148         typedef iterator_range<input_iterator_type> result_type;
149 
150         for(input_iterator_type It=Begin; It!=End; It++)
151         {
152             if( *It==repeat_mark<value_type>() )
153             {
154                 // Repeat mark found, extract body
155                 input_iterator_type It2=It++;
156 
157                 if ( It==End ) break;
158                     It++;
159                 if ( It==End ) break;
160                     It++;
161 
162                 return result_type( It2, It );
163             }
164         }
165 
166         return result_type( End, End );
167     }
168 };
169 
170 // rle decompress format
171 /*
172     transform a repetition block into a sequence of characters
173 */
174 template< typename SeqT >
175 struct format_decompressF
176 {
177 private:
178     typedef SeqT result_type;
179     typedef typename SeqT::value_type value_type;
180 
181 public:
182     // Construction
format_decompressFformat_decompressF183     format_decompressF() {};
184 
185     // Operation
186     template< typename ReplaceT >
operator ()format_decompressF187     result_type operator()( const ReplaceT& Replace ) const
188     {
189         SeqT r;
190 
191         if(!Replace.empty())
192         {
193             // extract info
194             typename ReplaceT::const_iterator It=Replace.begin();
195 
196             value_type Value=*(++It);
197             value_type Repeat=*(++It);
198 
199             for( value_type Index=0; Index<Repeat; Index++ ) r.push_back( Value );
200         }
201 
202         return r;
203     }
204 };
205 
206 
main()207 int main()
208 {
209     cout << "* RLE Compression Example *" << endl << endl;
210 
211     string original("123_AA_*ZZZZZZZZZZZZZZ*34");
212 
213     // copy compression
214     string compress=find_format_all_copy(
215         original,
216         find_compressF(),
217         format_compressF<string>() );
218 
219     cout << "Compressed string: " << compress << endl;
220 
221     // Copy decompression
222     string decompress=find_format_all_copy(
223         compress,
224         find_decompressF(),
225         format_decompressF<string>() );
226 
227     cout << "Decompressed string: " << decompress << endl;
228 
229     // in-place compression
230     find_format_all(
231         original,
232         find_compressF(),
233         format_compressF<string>() );
234 
235     cout << "Compressed string: " << original << endl;
236 
237     // in-place decompression
238     find_format_all(
239         original,
240         find_decompressF(),
241         format_decompressF<string>() );
242 
243     cout << "Decompressed string: " << original << endl;
244 
245     cout << endl;
246 
247     return 0;
248 }
249