1 /*
2 * Distributed under the Boost Software License, Version 1.0.(See accompanying
3 * file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt.)
4 *
5 * See http://www.boost.org/libs/iostreams for documentation.
6
7 * File: libs/iostreams/test/grep_test.cpp
8 * Date: Mon May 26 17:48:45 MDT 2008
9 * Copyright: 2008 CodeRage, LLC
10 * Author: Jonathan Turkanis
11 * Contact: turkanis at coderage dot com
12 *
13 * Tests the class template basic_grep_filter.
14 */
15
16 #include <iostream>
17
18 #include <boost/config.hpp> // Make sure ptrdiff_t is in std.
19 #include <algorithm>
20 #include <cstddef> // std::ptrdiff_t
21 #include <string>
22 #include <boost/iostreams/compose.hpp>
23 #include <boost/iostreams/copy.hpp>
24 #include <boost/iostreams/device/array.hpp>
25 #include <boost/iostreams/device/back_inserter.hpp>
26 #include <boost/iostreams/filter/grep.hpp>
27 #include <boost/iostreams/filter/test.hpp>
28 #include <boost/ref.hpp>
29 #include <boost/regex.hpp>
30 #include <boost/test/test_tools.hpp>
31 #include <boost/test/unit_test.hpp>
32
33 using namespace boost;
34 using namespace boost::iostreams;
35 namespace io = boost::iostreams;
36 using boost::unit_test::test_suite;
37
38 // List of addresses of US Appeals Courts, from uscourts.gov
39 std::string addresses =
40 "John Joseph Moakley United States Courthouse, Suite 2500\n"
41 "One Courthouse Way\n"
42 "Boston, MA 02210-3002\n"
43 "\n"
44 "Thurgood Marshall United States Courthouse, 18th Floor\n"
45 "40 Centre Street\n"
46 "New York, NY 10007-1501\n"
47 "\n"
48 "21400 James A. Byrne United States Courthouse\n"
49 "601 Market Street\n"
50 "Philadelphia, PA 19106-1729\n"
51 "\n"
52 "Lewis F. Powell, Jr. United States Courthouse Annex, Suite 501\n"
53 "1100 East Main Street\n"
54 "Richmond, VA 23219-3525\n"
55 "\n"
56 "F. Edward Hebert Federal Bldg\n"
57 "600 South Maestri Place\n"
58 "New Orleans, LA 70130\n"
59 "\n"
60 "Bob Casey United States Courthouse, 1st Floor\n"
61 "515 Rusk Street\n"
62 "Houston, TX 77002-2600\n"
63 "\n"
64 "Potter Stewart United States Courthouse, Suite 540\n"
65 "100 East Fifth Street\n"
66 "Cincinnati, OH 45202\n"
67 "\n"
68 "2722 Everett McKinley Dirksen United States Courthouse\n"
69 "219 South Dearborn Street\n"
70 "Chicago, IL 60604\n";
71
72 // Lines containing "United States Courthouse"
73 std::string us_courthouse =
74 "John Joseph Moakley United States Courthouse, Suite 2500\n"
75 "Thurgood Marshall United States Courthouse, 18th Floor\n"
76 "21400 James A. Byrne United States Courthouse\n"
77 "Lewis F. Powell, Jr. United States Courthouse Annex, Suite 501\n"
78 "Bob Casey United States Courthouse, 1st Floor\n"
79 "Potter Stewart United States Courthouse, Suite 540\n"
80 "2722 Everett McKinley Dirksen United States Courthouse\n";
81
82 // Lines not containing "United States Courthouse"
83 std::string us_courthouse_inv =
84 "One Courthouse Way\n"
85 "Boston, MA 02210-3002\n"
86 "\n"
87 "40 Centre Street\n"
88 "New York, NY 10007-1501\n"
89 "\n"
90 "601 Market Street\n"
91 "Philadelphia, PA 19106-1729\n"
92 "\n"
93 "1100 East Main Street\n"
94 "Richmond, VA 23219-3525\n"
95 "\n"
96 "F. Edward Hebert Federal Bldg\n"
97 "600 South Maestri Place\n"
98 "New Orleans, LA 70130\n"
99 "\n"
100 "515 Rusk Street\n"
101 "Houston, TX 77002-2600\n"
102 "\n"
103 "100 East Fifth Street\n"
104 "Cincinnati, OH 45202\n"
105 "\n"
106 "219 South Dearborn Street\n"
107 "Chicago, IL 60604\n";
108
109 // Lines containing a state and zip
110 std::string state_and_zip =
111 "Boston, MA 02210-3002\n"
112 "New York, NY 10007-1501\n"
113 "Philadelphia, PA 19106-1729\n"
114 "Richmond, VA 23219-3525\n"
115 "New Orleans, LA 70130\n"
116 "Houston, TX 77002-2600\n"
117 "Cincinnati, OH 45202\n"
118 "Chicago, IL 60604\n";
119
120 // Lines not containing a state and zip
121 std::string state_and_zip_inv =
122 "John Joseph Moakley United States Courthouse, Suite 2500\n"
123 "One Courthouse Way\n"
124 "\n"
125 "Thurgood Marshall United States Courthouse, 18th Floor\n"
126 "40 Centre Street\n"
127 "\n"
128 "21400 James A. Byrne United States Courthouse\n"
129 "601 Market Street\n"
130 "\n"
131 "Lewis F. Powell, Jr. United States Courthouse Annex, Suite 501\n"
132 "1100 East Main Street\n"
133 "\n"
134 "F. Edward Hebert Federal Bldg\n"
135 "600 South Maestri Place\n"
136 "\n"
137 "Bob Casey United States Courthouse, 1st Floor\n"
138 "515 Rusk Street\n"
139 "\n"
140 "Potter Stewart United States Courthouse, Suite 540\n"
141 "100 East Fifth Street\n"
142 "\n"
143 "2722 Everett McKinley Dirksen United States Courthouse\n"
144 "219 South Dearborn Street\n";
145
146 // Lines containing at least three words
147 std::string three_words =
148 "John Joseph Moakley United States Courthouse, Suite 2500\n"
149 "One Courthouse Way\n"
150 "Thurgood Marshall United States Courthouse, 18th Floor\n"
151 "40 Centre Street\n"
152 "21400 James A. Byrne United States Courthouse\n"
153 "601 Market Street\n"
154 "Lewis F. Powell, Jr. United States Courthouse Annex, Suite 501\n"
155 "1100 East Main Street\n"
156 "F. Edward Hebert Federal Bldg\n"
157 "600 South Maestri Place\n"
158 "Bob Casey United States Courthouse, 1st Floor\n"
159 "515 Rusk Street\n"
160 "Potter Stewart United States Courthouse, Suite 540\n"
161 "100 East Fifth Street\n"
162 "2722 Everett McKinley Dirksen United States Courthouse\n"
163 "219 South Dearborn Street\n";
164
165 // Lines containing exactly three words
166 std::string exactly_three_words =
167 "One Courthouse Way\n"
168 "40 Centre Street\n"
169 "601 Market Street\n"
170 "515 Rusk Street\n";
171
172 // Lines that don't contain exactly three words
173 std::string exactly_three_words_inv =
174 "John Joseph Moakley United States Courthouse, Suite 2500\n"
175 "Boston, MA 02210-3002\n"
176 "\n"
177 "Thurgood Marshall United States Courthouse, 18th Floor\n"
178 "New York, NY 10007-1501\n"
179 "\n"
180 "21400 James A. Byrne United States Courthouse\n"
181 "Philadelphia, PA 19106-1729\n"
182 "\n"
183 "Lewis F. Powell, Jr. United States Courthouse Annex, Suite 501\n"
184 "1100 East Main Street\n"
185 "Richmond, VA 23219-3525\n"
186 "\n"
187 "F. Edward Hebert Federal Bldg\n"
188 "600 South Maestri Place\n"
189 "New Orleans, LA 70130\n"
190 "\n"
191 "Bob Casey United States Courthouse, 1st Floor\n"
192 "Houston, TX 77002-2600\n"
193 "\n"
194 "Potter Stewart United States Courthouse, Suite 540\n"
195 "100 East Fifth Street\n"
196 "Cincinnati, OH 45202\n"
197 "\n"
198 "2722 Everett McKinley Dirksen United States Courthouse\n"
199 "219 South Dearborn Street\n"
200 "Chicago, IL 60604\n";
201
202 void test_filter( grep_filter grep,
203 const std::string& input,
204 const std::string& output );
205
grep_filter_test()206 void grep_filter_test()
207 {
208 regex match_us_courthouse("\\bUnited States Courthouse\\b");
209 regex match_state_and_zip("\\b[A-Z]{2}\\s+[0-9]{5}(-[0-9]{4})?\\b");
210 regex match_three_words("\\b\\w+\\s+\\w+\\s+\\w+\\b");
211 regex_constants::match_flag_type match_default =
212 regex_constants::match_default;
213
214 {
215 grep_filter grep(match_us_courthouse);
216 test_filter(grep, addresses, us_courthouse);
217 }
218
219 {
220 grep_filter grep(match_us_courthouse, match_default, grep::invert);
221 test_filter(grep, addresses, us_courthouse_inv);
222 }
223
224 {
225 grep_filter grep(match_state_and_zip);
226 test_filter(grep, addresses, state_and_zip);
227 }
228
229 {
230 grep_filter grep(match_state_and_zip, match_default, grep::invert);
231 test_filter(grep, addresses, state_and_zip_inv);
232 }
233
234 {
235 grep_filter grep(match_three_words);
236 test_filter(grep, addresses, three_words);
237 }
238
239 {
240 grep_filter grep(match_three_words, match_default, grep::whole_line);
241 test_filter(grep, addresses, exactly_three_words);
242 }
243
244 {
245 int options = grep::whole_line | grep::invert;
246 grep_filter grep(match_three_words, match_default, options);
247 test_filter(grep, addresses, exactly_three_words_inv);
248 }
249 }
250
test_filter(grep_filter grep,const std::string & input,const std::string & output)251 void test_filter( grep_filter grep,
252 const std::string& input,
253 const std::string& output )
254 {
255 // Count lines in output
256 std::ptrdiff_t count = std::count(output.begin(), output.end(), '\n');
257
258 // Test as input filter
259 {
260 array_source src(input.data(), input.data() + input.size());
261 std::string dest;
262 io::copy(compose(boost::ref(grep), src), io::back_inserter(dest));
263 BOOST_CHECK(dest == output);
264 BOOST_CHECK(grep.count() == count);
265 }
266
267 // Test as output filter
268 {
269 array_source src(input.data(), input.data() + input.size());
270 std::string dest;
271 io::copy(src, compose(boost::ref(grep), io::back_inserter(dest)));
272 BOOST_CHECK(dest == output);
273 BOOST_CHECK(grep.count() == count);
274 }
275 }
276
init_unit_test_suite(int,char * [])277 test_suite* init_unit_test_suite(int, char* [])
278 {
279 test_suite* test = BOOST_TEST_SUITE("grep_filter test");
280 test->add(BOOST_TEST_CASE(&grep_filter_test));
281 return test;
282 }
283