1 /*
2 Copyright (c) Marshall Clow 2010-2012.
3
4 Distributed under the Boost Software License, Version 1.0. (See accompanying
5 file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6
7 For more information, see http://www.boost.org
8 */
9
10 #include <boost/algorithm/searching/boyer_moore.hpp>
11 #include <boost/algorithm/searching/boyer_moore_horspool.hpp>
12 #include <boost/algorithm/searching/knuth_morris_pratt.hpp>
13
14 #define BOOST_TEST_MAIN
15 #include <boost/test/unit_test.hpp>
16
17 #include <iostream>
18 #include <string>
19 #include <vector>
20
21
22 namespace ba = boost::algorithm;
23
24 template <typename Iter>
make_str(Iter first,std::size_t len)25 std::string make_str ( Iter first, std::size_t len ) {
26 std::string retVal ( len + 2, '\'' );
27 std::copy ( first, first+len, retVal.begin () + 1);
28 return retVal;
29 }
30
31 namespace {
32
33 // Check using iterators
34 template<typename Container>
check_one_iter(const Container & haystack,const std::string & needle,int expected)35 void check_one_iter ( const Container &haystack, const std::string &needle, int expected ) {
36 typedef typename Container::const_iterator iter_type;
37 typedef typename std::pair<iter_type, iter_type> ret_type;
38 typedef std::string::const_iterator pattern_type;
39
40 iter_type hBeg = haystack.begin ();
41 iter_type hEnd = haystack.end ();
42 pattern_type nBeg = needle.begin ();
43 pattern_type nEnd = needle.end ();
44
45 // iter_type ret0 = std::search (hBeg, hEnd, nBeg, nEnd);
46 ret_type ret1 = ba::boyer_moore_search (hBeg, hEnd, nBeg, nEnd);
47 ret_type ret1r = ba::boyer_moore_search (haystack, nBeg, nEnd);
48 ret_type ret2 = ba::boyer_moore_horspool_search (hBeg, hEnd, nBeg, nEnd);
49 ret_type ret3 = ba::knuth_morris_pratt_search (hBeg, hEnd, nBeg, nEnd);
50
51 iter_type it0 = std::search (hBeg, hEnd, nBeg, nEnd);
52 // iter_type it1 = ret1.first;
53 // iter_type it1r = ret1r.first;
54 // iter_type it2 = ret2.first;
55 // iter_type it3 = ret3.first;
56 const int dist = ret1.first == hEnd ? -1 : std::distance ( hBeg, ret1.first );
57
58 std::cout << "(Iterators) Pattern is " << needle.length () << ", haysstack is " << haystack.length () << " chars long; " << std::endl;
59 try {
60 if ( it0 != ret1.first ) {
61 throw std::runtime_error (
62 std::string ( "results mismatch between std::search and boyer-moore search" ));
63 }
64
65 if ( ret1.first != ret1r.first || ret1.second != ret1r.second ) {
66 throw std::runtime_error (
67 std::string ( "results mismatch between iterator and range boyer_moore search" ));
68 }
69
70 if ( ret1.first != ret2.first || ret1.second != ret2.second ) {
71 throw std::runtime_error (
72 std::string ( "results mismatch between boyer-moore and boyer-moore-horspool search" ));
73 }
74
75 if ( ret1.first != ret3.first || ret1.second != ret3.second ) {
76 throw std::runtime_error (
77 std::string ( "results mismatch between boyer-moore and knuth-morris-pratt search" ));
78 }
79
80 }
81
82 catch ( ... ) {
83 std::cout << "Searching for: " << needle << std::endl;
84 std::cout << "Expected: " << expected << "\n";
85 std::cout << " std: " << std::distance ( hBeg, it0 ) << "\n";
86 std::cout << " bm: " << std::distance ( hBeg, ret1.first ) << "\n";
87 std::cout << " bm(r): " << std::distance ( hBeg, ret1r.first ) << "\n";
88 std::cout << " bmh: " << std::distance ( hBeg, ret2.first ) << "\n";
89 std::cout << " kpm: " << std::distance ( hBeg, ret3.first )<< "\n";
90 std::cout << std::flush;
91 throw ;
92 }
93
94 BOOST_CHECK_EQUAL ( dist, expected );
95 }
96
97 // Check using pointers
98 // We're assuming that the container implements contiguous storage here.
99 template<typename Container>
check_one_pointer(const Container & haystack,const std::string & needle,int expected)100 void check_one_pointer ( const Container &haystack, const std::string &needle, int expected ) {
101 typedef const typename Container::value_type *ptr_type;
102 typedef typename std::pair<ptr_type, ptr_type> ret_type;
103
104 ptr_type hBeg = haystack.size () == 0 ? NULL : &*haystack.begin ();
105 ptr_type hEnd = hBeg + haystack.size ();
106 ptr_type nBeg = needle.size () == 0 ? NULL : &*needle.begin ();
107 ptr_type nEnd = nBeg + needle.size ();
108
109 ptr_type it0 = std::search (hBeg, hEnd, nBeg, nEnd);
110 ret_type ret1 = ba::boyer_moore_search (hBeg, hEnd, nBeg, nEnd);
111 ret_type ret2 = ba::boyer_moore_horspool_search (hBeg, hEnd, nBeg, nEnd);
112 ret_type ret3 = ba::knuth_morris_pratt_search (hBeg, hEnd, nBeg, nEnd);
113 const int dist = ret1.first == hEnd ? -1 : std::distance ( hBeg, ret1.first );
114
115 std::cout << "(Pointers) Pattern is " << needle.length () << ", haysstack is " << haystack.length () << " chars long; " << std::endl;
116 try {
117 if ( it0 != ret1.first ) {
118 throw std::runtime_error (
119 std::string ( "results mismatch between std::search and boyer-moore search" ));
120 }
121
122 if ( ret1.first != ret2.first || ret1.second != ret2.second ) {
123 throw std::runtime_error (
124 std::string ( "results mismatch between boyer-moore and boyer-moore-horspool search" ));
125 }
126
127 if ( ret1.first != ret3.first || ret1.second != ret3.second ) {
128 throw std::runtime_error (
129 std::string ( "results mismatch between boyer-moore and knuth-morris-pratt search" ));
130 }
131
132 }
133
134 catch ( ... ) {
135 std::cout << "Searching for: " << needle << std::endl;
136 std::cout << "Expected: " << expected << "\n";
137 std::cout << " std: " << std::distance ( hBeg, it0 ) << "\n";
138 std::cout << " bm: " << std::distance ( hBeg, ret1.first ) << "\n";
139 std::cout << " bmh: " << std::distance ( hBeg, ret2.first ) << "\n";
140 std::cout << " kpm: " << std::distance ( hBeg, ret3.first )<< "\n";
141 std::cout << std::flush;
142 throw ;
143 }
144
145 BOOST_CHECK_EQUAL ( dist, expected );
146 }
147
148 // Check using objects
149 template<typename Container>
check_one_object(const Container & haystack,const std::string & needle,int expected)150 void check_one_object ( const Container &haystack, const std::string &needle, int expected ) {
151 typedef typename Container::const_iterator iter_type;
152 typedef typename std::pair<iter_type, iter_type> ret_type;
153 typedef std::string::const_iterator pattern_type;
154
155 iter_type hBeg = haystack.begin ();
156 iter_type hEnd = haystack.end ();
157 pattern_type nBeg = needle.begin ();
158 pattern_type nEnd = needle.end ();
159
160 ba::boyer_moore<pattern_type> bm_r = ba::make_boyer_moore ( needle );
161 ba::boyer_moore<pattern_type> bm ( nBeg, nEnd );
162 ba::boyer_moore_horspool<pattern_type> bmh ( nBeg, nEnd );
163 ba::knuth_morris_pratt<pattern_type> kmp ( nBeg, nEnd );
164
165 iter_type it0 = std::search (hBeg, hEnd, nBeg, nEnd);
166 ret_type ret1 = bm (hBeg, hEnd);
167 ret_type ret1r = bm (haystack);
168 ret_type retr1 = bm_r (hBeg, hEnd);
169 ret_type retr1r = bm_r (haystack);
170 ret_type ret2 = bmh (hBeg, hEnd);
171 ret_type ret3 = kmp (hBeg, hEnd);
172 const int dist = ret1.first == hEnd ? -1 : std::distance ( hBeg, ret1.first );
173
174 std::cout << "(Objects) Pattern is " << needle.length () << ", haysstack is " << haystack.length () << " chars long; " << std::endl;
175 try {
176 if ( it0 != ret1.first ) {
177 throw std::runtime_error (
178 std::string ( "results mismatch between std::search and boyer-moore search" ));
179 }
180
181 if ( ret1.first != ret1r.first || ret1.second != ret1r.second ) {
182 throw std::runtime_error (
183 std::string ( "results mismatch between iterator and range boyer_moore search(1)" ));
184 }
185
186 if ( ret1.first != retr1.first || ret1.second != retr1.second ) {
187 throw std::runtime_error (
188 std::string ( "results mismatch between iterator and range boyer_moore search(2)" ));
189 }
190
191 if ( ret1.first != retr1r.first || ret1.second != retr1r.second ) {
192 throw std::runtime_error (
193 std::string ( "results mismatch between iterator and range boyer_moore search(3)" ));
194 }
195
196 if ( ret1.first != ret2.first || ret1.second != ret2.second ) {
197 throw std::runtime_error (
198 std::string ( "results mismatch between boyer-moore and boyer-moore-horspool search" ));
199 }
200
201 if ( ret1.first != ret3.first || ret1.second != ret3.second ) {
202 throw std::runtime_error (
203 std::string ( "results mismatch between boyer-moore and knuth-morris-pratt search" ));
204 }
205
206 }
207
208 catch ( ... ) {
209 std::cout << "Searching for: " << needle << std::endl;
210 std::cout << "Expected: " << expected << "\n";
211 std::cout << " std: " << std::distance ( hBeg, it0 ) << "\n";
212 std::cout << " bm: " << std::distance ( hBeg, ret1.first ) << "\n";
213 std::cout << " bm(r1): " << std::distance ( hBeg, ret1r.first ) << "\n";
214 std::cout << " bm(r2): " << std::distance ( hBeg, retr1.first ) << "\n";
215 std::cout << " bm(r3): " << std::distance ( hBeg, retr1r.first ) << "\n";
216 std::cout << " bmh: " << std::distance ( hBeg, ret2.first ) << "\n";
217 std::cout << " kpm: " << std::distance ( hBeg, ret3.first )<< "\n";
218 std::cout << std::flush;
219 throw ;
220 }
221
222 BOOST_CHECK_EQUAL ( dist, expected );
223 }
224
225
226 template<typename Container>
check_one(const Container & haystack,const std::string & needle,int expected)227 void check_one ( const Container &haystack, const std::string &needle, int expected ) {
228 check_one_iter ( haystack, needle, expected );
229 check_one_pointer ( haystack, needle, expected );
230 check_one_object ( haystack, needle, expected );
231 }
232 }
233
234
BOOST_AUTO_TEST_CASE(test_main)235 BOOST_AUTO_TEST_CASE( test_main )
236 {
237 std::string haystack1 ( "NOW AN FOWE\220ER ANNMAN THE ANPANMANEND" );
238 std::string needle1 ( "ANPANMAN" );
239 std::string needle2 ( "MAN THE" );
240 std::string needle3 ( "WE\220ER" );
241 std::string needle4 ( "NOW " ); // At the beginning
242 std::string needle5 ( "NEND" ); // At the end
243 std::string needle6 ( "NOT FOUND" ); // Nowhere
244 std::string needle7 ( "NOT FO\340ND" ); // Nowhere
245
246 std::string haystack2 ( "ABC ABCDAB ABCDABCDABDE" );
247 std::string needle11 ( "ABCDABD" );
248
249 std::string haystack3 ( "abra abracad abracadabra" );
250 std::string needle12 ( "abracadabra" );
251
252 std::string needle13 ( "" );
253 std::string haystack4 ( "" );
254
255 check_one ( haystack1, needle1, 26 );
256 check_one ( haystack1, needle2, 18 );
257 check_one ( haystack1, needle3, 9 );
258 check_one ( haystack1, needle4, 0 );
259 check_one ( haystack1, needle5, 33 );
260 check_one ( haystack1, needle6, -1 );
261 check_one ( haystack1, needle7, -1 );
262
263 check_one ( needle1, haystack1, -1 ); // cant find long pattern in short corpus
264 check_one ( haystack1, haystack1, 0 ); // find something in itself
265 check_one ( haystack2, haystack2, 0 ); // find something in itself
266
267 check_one ( haystack2, needle11, 15 );
268 check_one ( haystack3, needle12, 13 );
269
270 check_one ( haystack1, needle13, 0 ); // find the empty string
271 check_one ( haystack4, needle1, -1 ); // can't find in an empty haystack
272
273 // Mikhail Levin <svarneticist@gmail.com> found a problem, and this was the test
274 // that triggered it.
275
276 const std::string mikhail_pattern =
277 "GATACACCTACCTTCACCAGTTACTCTATGCACTAGGTGCGCCAGGCCCATGCACAAGGGCTTGAGTGGATGGGAAGGA"
278 "TGTGCCCTAGTGATGGCAGCATAAGCTACGCAGAGAAGTTCCAGGGCAGAGTCACCATGACCAGGGACACATCCACGAG"
279 "CACAGCCTACATGGAGCTGAGCAGCCTGAGATCTGAAGACACGGCCATGTATTACTGTGGGAGAGATGTCTGGAGTGGT"
280 "TATTATTGCCCCGGTAATATTACTACTACTACTACTACATGGACGTCTGGGGCAAAGGGACCACG"
281 ;
282 const std::string mikhail_corpus = std::string (8, 'a') + mikhail_pattern;
283
284 check_one ( mikhail_corpus, mikhail_pattern, 8 );
285 }
286