1 // -*- coding: utf-8 -*-
2 //
3 // Copyright (c) 2005 - 2010, Google Inc.
4 // All rights reserved.
5 //
6 // Redistribution and use in source and binary forms, with or without
7 // modification, are permitted provided that the following conditions are
8 // met:
9 //
10 // * Redistributions of source code must retain the above copyright
11 // notice, this list of conditions and the following disclaimer.
12 // * Redistributions in binary form must reproduce the above
13 // copyright notice, this list of conditions and the following disclaimer
14 // in the documentation and/or other materials provided with the
15 // distribution.
16 // * Neither the name of Google Inc. nor the names of its
17 // contributors may be used to endorse or promote products derived from
18 // this software without specific prior written permission.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 //
32 // Author: Sanjay Ghemawat
33 //
34 // TODO: Test extractions for PartialMatch/Consume
35
36 #ifdef HAVE_CONFIG_H
37 #include "config.h"
38 #endif
39
40 #include <stdio.h>
41 #include <cassert>
42 #include <vector>
43 #include "pcrecpp.h"
44
45 using pcrecpp::StringPiece;
46 using pcrecpp::RE;
47 using pcrecpp::RE_Options;
48 using pcrecpp::Hex;
49 using pcrecpp::Octal;
50 using pcrecpp::CRadix;
51
52 static bool VERBOSE_TEST = false;
53
54 // CHECK dies with a fatal error if condition is not true. It is *not*
55 // controlled by NDEBUG, so the check will be executed regardless of
56 // compilation mode. Therefore, it is safe to do things like:
57 // CHECK_EQ(fp->Write(x), 4)
58 #define CHECK(condition) do { \
59 if (!(condition)) { \
60 fprintf(stderr, "%s:%d: Check failed: %s\n", \
61 __FILE__, __LINE__, #condition); \
62 exit(1); \
63 } \
64 } while (0)
65
66 #define CHECK_EQ(a, b) CHECK(a == b)
67
Timing1(int num_iters)68 static void Timing1(int num_iters) {
69 // Same pattern lots of times
70 RE pattern("ruby:\\d+");
71 StringPiece p("ruby:1234");
72 for (int j = num_iters; j > 0; j--) {
73 CHECK(pattern.FullMatch(p));
74 }
75 }
76
Timing2(int num_iters)77 static void Timing2(int num_iters) {
78 // Same pattern lots of times
79 RE pattern("ruby:(\\d+)");
80 int i;
81 for (int j = num_iters; j > 0; j--) {
82 CHECK(pattern.FullMatch("ruby:1234", &i));
83 CHECK_EQ(i, 1234);
84 }
85 }
86
Timing3(int num_iters)87 static void Timing3(int num_iters) {
88 string text_string;
89 for (int j = num_iters; j > 0; j--) {
90 text_string += "this is another line\n";
91 }
92
93 RE line_matcher(".*\n");
94 string line;
95 StringPiece text(text_string);
96 int counter = 0;
97 while (line_matcher.Consume(&text)) {
98 counter++;
99 }
100 printf("Matched %d lines\n", counter);
101 }
102
103 #if 0 // uncomment this if you have a way of defining VirtualProcessSize()
104
105 static void LeakTest() {
106 // Check for memory leaks
107 unsigned long long initial_size = 0;
108 for (int i = 0; i < 100000; i++) {
109 if (i == 50000) {
110 initial_size = VirtualProcessSize();
111 printf("Size after 50000: %llu\n", initial_size);
112 }
113 char buf[100]; // definitely big enough
114 sprintf(buf, "pat%09d", i);
115 RE newre(buf);
116 }
117 uint64 final_size = VirtualProcessSize();
118 printf("Size after 100000: %llu\n", final_size);
119 const double growth = double(final_size - initial_size) / final_size;
120 printf("Growth: %0.2f%%", growth * 100);
121 CHECK(growth < 0.02); // Allow < 2% growth
122 }
123
124 #endif
125
RadixTests()126 static void RadixTests() {
127 printf("Testing hex\n");
128
129 #define CHECK_HEX(type, value) \
130 do { \
131 type v; \
132 CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
133 CHECK_EQ(v, 0x ## value); \
134 CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
135 CHECK_EQ(v, 0x ## value); \
136 } while(0)
137
138 CHECK_HEX(short, 2bad);
139 CHECK_HEX(unsigned short, 2badU);
140 CHECK_HEX(int, dead);
141 CHECK_HEX(unsigned int, deadU);
142 CHECK_HEX(long, 7eadbeefL);
143 CHECK_HEX(unsigned long, deadbeefUL);
144 #ifdef HAVE_LONG_LONG
145 CHECK_HEX(long long, 12345678deadbeefLL);
146 #endif
147 #ifdef HAVE_UNSIGNED_LONG_LONG
148 CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
149 #endif
150
151 #undef CHECK_HEX
152
153 printf("Testing octal\n");
154
155 #define CHECK_OCTAL(type, value) \
156 do { \
157 type v; \
158 CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
159 CHECK_EQ(v, 0 ## value); \
160 CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
161 CHECK_EQ(v, 0 ## value); \
162 } while(0)
163
164 CHECK_OCTAL(short, 77777);
165 CHECK_OCTAL(unsigned short, 177777U);
166 CHECK_OCTAL(int, 17777777777);
167 CHECK_OCTAL(unsigned int, 37777777777U);
168 CHECK_OCTAL(long, 17777777777L);
169 CHECK_OCTAL(unsigned long, 37777777777UL);
170 #ifdef HAVE_LONG_LONG
171 CHECK_OCTAL(long long, 777777777777777777777LL);
172 #endif
173 #ifdef HAVE_UNSIGNED_LONG_LONG
174 CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
175 #endif
176
177 #undef CHECK_OCTAL
178
179 printf("Testing decimal\n");
180
181 #define CHECK_DECIMAL(type, value) \
182 do { \
183 type v; \
184 CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
185 CHECK_EQ(v, value); \
186 CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
187 CHECK_EQ(v, value); \
188 } while(0)
189
190 CHECK_DECIMAL(short, -1);
191 CHECK_DECIMAL(unsigned short, 9999);
192 CHECK_DECIMAL(int, -1000);
193 CHECK_DECIMAL(unsigned int, 12345U);
194 CHECK_DECIMAL(long, -10000000L);
195 CHECK_DECIMAL(unsigned long, 3083324652U);
196 #ifdef HAVE_LONG_LONG
197 CHECK_DECIMAL(long long, -100000000000000LL);
198 #endif
199 #ifdef HAVE_UNSIGNED_LONG_LONG
200 CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
201 #endif
202
203 #undef CHECK_DECIMAL
204
205 }
206
TestReplace()207 static void TestReplace() {
208 printf("Testing Replace\n");
209
210 struct ReplaceTest {
211 const char *regexp;
212 const char *rewrite;
213 const char *original;
214 const char *single;
215 const char *global;
216 int global_count; // the expected return value from ReplaceAll
217 };
218 static const ReplaceTest tests[] = {
219 { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
220 "\\2\\1ay",
221 "the quick brown fox jumps over the lazy dogs.",
222 "ethay quick brown fox jumps over the lazy dogs.",
223 "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.",
224 9 },
225 { "\\w+",
226 "\\0-NOSPAM",
227 "paul.haahr@google.com",
228 "paul-NOSPAM.haahr@google.com",
229 "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM",
230 4 },
231 { "^",
232 "(START)",
233 "foo",
234 "(START)foo",
235 "(START)foo",
236 1 },
237 { "^",
238 "(START)",
239 "",
240 "(START)",
241 "(START)",
242 1 },
243 { "$",
244 "(END)",
245 "",
246 "(END)",
247 "(END)",
248 1 },
249 { "b",
250 "bb",
251 "ababababab",
252 "abbabababab",
253 "abbabbabbabbabb",
254 5 },
255 { "b",
256 "bb",
257 "bbbbbb",
258 "bbbbbbb",
259 "bbbbbbbbbbbb",
260 6 },
261 { "b+",
262 "bb",
263 "bbbbbb",
264 "bb",
265 "bb",
266 1 },
267 { "b*",
268 "bb",
269 "bbbbbb",
270 "bb",
271 "bbbb",
272 2 },
273 { "b*",
274 "bb",
275 "aaaaa",
276 "bbaaaaa",
277 "bbabbabbabbabbabb",
278 6 },
279 { "b*",
280 "bb",
281 "aa\naa\n",
282 "bbaa\naa\n",
283 "bbabbabb\nbbabbabb\nbb",
284 7 },
285 { "b*",
286 "bb",
287 "aa\raa\r",
288 "bbaa\raa\r",
289 "bbabbabb\rbbabbabb\rbb",
290 7 },
291 { "b*",
292 "bb",
293 "aa\r\naa\r\n",
294 "bbaa\r\naa\r\n",
295 "bbabbabb\r\nbbabbabb\r\nbb",
296 7 },
297 // Check empty-string matching (it's tricky!)
298 { "aa|b*",
299 "@",
300 "aa",
301 "@",
302 "@@",
303 2 },
304 { "b*|aa",
305 "@",
306 "aa",
307 "@aa",
308 "@@@",
309 3 },
310 #ifdef SUPPORT_UTF8
311 { "b*",
312 "bb",
313 "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", // utf8
314 "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
315 "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb",
316 5 },
317 { "b*",
318 "bb",
319 "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", // utf8
320 "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
321 ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
322 "bb\nbb""\xE3\x81\xB8""bb\r\nbb"),
323 9 },
324 #endif
325 { "", NULL, NULL, NULL, NULL, 0 }
326 };
327
328 #ifdef SUPPORT_UTF8
329 const bool support_utf8 = true;
330 #else
331 const bool support_utf8 = false;
332 #endif
333
334 for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
335 RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8));
336 assert(re.error().empty());
337 string one(t->original);
338 CHECK(re.Replace(t->rewrite, &one));
339 CHECK_EQ(one, t->single);
340 string all(t->original);
341 const int replace_count = re.GlobalReplace(t->rewrite, &all);
342 CHECK_EQ(all, t->global);
343 CHECK_EQ(replace_count, t->global_count);
344 }
345
346 // One final test: test \r\n replacement when we're not in CRLF mode
347 {
348 RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8));
349 assert(re.error().empty());
350 string all("aa\r\naa\r\n");
351 CHECK_EQ(re.GlobalReplace("bb", &all), 9);
352 CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
353 }
354 {
355 RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8));
356 assert(re.error().empty());
357 string all("aa\r\naa\r\n");
358 CHECK_EQ(re.GlobalReplace("bb", &all), 9);
359 CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
360 }
361 // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
362 // Alas, the answer depends on how pcre was compiled.
363 }
364
TestExtract()365 static void TestExtract() {
366 printf("Testing Extract\n");
367
368 string s;
369
370 CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s));
371 CHECK_EQ(s, "kremvax!boris");
372
373 // check the RE interface as well
374 CHECK(RE(".*").Extract("'\\0'", "foo", &s));
375 CHECK_EQ(s, "'foo'");
376 CHECK(!RE("bar").Extract("'\\0'", "baz", &s));
377 CHECK_EQ(s, "'foo'");
378 }
379
TestConsume()380 static void TestConsume() {
381 printf("Testing Consume\n");
382
383 string word;
384
385 string s(" aaa b!@#$@#$cccc");
386 StringPiece input(s);
387
388 RE r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace
389 CHECK(r.Consume(&input, &word));
390 CHECK_EQ(word, "aaa");
391 CHECK(r.Consume(&input, &word));
392 CHECK_EQ(word, "b");
393 CHECK(! r.Consume(&input, &word));
394 }
395
TestFindAndConsume()396 static void TestFindAndConsume() {
397 printf("Testing FindAndConsume\n");
398
399 string word;
400
401 string s(" aaa b!@#$@#$cccc");
402 StringPiece input(s);
403
404 RE r("(\\w+)"); // matches a word
405 CHECK(r.FindAndConsume(&input, &word));
406 CHECK_EQ(word, "aaa");
407 CHECK(r.FindAndConsume(&input, &word));
408 CHECK_EQ(word, "b");
409 CHECK(r.FindAndConsume(&input, &word));
410 CHECK_EQ(word, "cccc");
411 CHECK(! r.FindAndConsume(&input, &word));
412 }
413
TestMatchNumberPeculiarity()414 static void TestMatchNumberPeculiarity() {
415 printf("Testing match-number peculiaraity\n");
416
417 string word1;
418 string word2;
419 string word3;
420
421 RE r("(foo)|(bar)|(baz)");
422 CHECK(r.PartialMatch("foo", &word1, &word2, &word3));
423 CHECK_EQ(word1, "foo");
424 CHECK_EQ(word2, "");
425 CHECK_EQ(word3, "");
426 CHECK(r.PartialMatch("bar", &word1, &word2, &word3));
427 CHECK_EQ(word1, "");
428 CHECK_EQ(word2, "bar");
429 CHECK_EQ(word3, "");
430 CHECK(r.PartialMatch("baz", &word1, &word2, &word3));
431 CHECK_EQ(word1, "");
432 CHECK_EQ(word2, "");
433 CHECK_EQ(word3, "baz");
434 CHECK(!r.PartialMatch("f", &word1, &word2, &word3));
435
436 string a;
437 CHECK(RE("(foo)|hello").FullMatch("hello", &a));
438 CHECK_EQ(a, "");
439 }
440
TestRecursion()441 static void TestRecursion() {
442 printf("Testing recursion\n");
443
444 // Get one string that passes (sometimes), one that never does.
445 string text_good("abcdefghijk");
446 string text_bad("acdefghijkl");
447
448 // According to pcretest, matching text_good against (\w+)*b
449 // requires match_limit of at least 8192, and match_recursion_limit
450 // of at least 37.
451
452 RE_Options options_ml;
453 options_ml.set_match_limit(8192);
454 RE re("(\\w+)*b", options_ml);
455 CHECK(re.PartialMatch(text_good) == true);
456 CHECK(re.PartialMatch(text_bad) == false);
457 CHECK(re.FullMatch(text_good) == false);
458 CHECK(re.FullMatch(text_bad) == false);
459
460 options_ml.set_match_limit(1024);
461 RE re2("(\\w+)*b", options_ml);
462 CHECK(re2.PartialMatch(text_good) == false); // because of match_limit
463 CHECK(re2.PartialMatch(text_bad) == false);
464 CHECK(re2.FullMatch(text_good) == false);
465 CHECK(re2.FullMatch(text_bad) == false);
466
467 RE_Options options_mlr;
468 options_mlr.set_match_limit_recursion(50);
469 RE re3("(\\w+)*b", options_mlr);
470 CHECK(re3.PartialMatch(text_good) == true);
471 CHECK(re3.PartialMatch(text_bad) == false);
472 CHECK(re3.FullMatch(text_good) == false);
473 CHECK(re3.FullMatch(text_bad) == false);
474
475 options_mlr.set_match_limit_recursion(10);
476 RE re4("(\\w+)*b", options_mlr);
477 CHECK(re4.PartialMatch(text_good) == false);
478 CHECK(re4.PartialMatch(text_bad) == false);
479 CHECK(re4.FullMatch(text_good) == false);
480 CHECK(re4.FullMatch(text_bad) == false);
481 }
482
483 // A meta-quoted string, interpreted as a pattern, should always match
484 // the original unquoted string.
TestQuoteMeta(string unquoted,RE_Options options=RE_Options ())485 static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) {
486 string quoted = RE::QuoteMeta(unquoted);
487 RE re(quoted, options);
488 CHECK(re.FullMatch(unquoted));
489 }
490
491 // A string containing meaningful regexp characters, which is then meta-
492 // quoted, should not generally match a string the unquoted string does.
NegativeTestQuoteMeta(string unquoted,string should_not_match,RE_Options options=RE_Options ())493 static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
494 RE_Options options = RE_Options()) {
495 string quoted = RE::QuoteMeta(unquoted);
496 RE re(quoted, options);
497 CHECK(!re.FullMatch(should_not_match));
498 }
499
500 // Tests that quoted meta characters match their original strings,
501 // and that a few things that shouldn't match indeed do not.
TestQuotaMetaSimple()502 static void TestQuotaMetaSimple() {
503 TestQuoteMeta("foo");
504 TestQuoteMeta("foo.bar");
505 TestQuoteMeta("foo\\.bar");
506 TestQuoteMeta("[1-9]");
507 TestQuoteMeta("1.5-2.0?");
508 TestQuoteMeta("\\d");
509 TestQuoteMeta("Who doesn't like ice cream?");
510 TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
511 TestQuoteMeta("((?!)xxx).*yyy");
512 TestQuoteMeta("([");
513 TestQuoteMeta(string("foo\0bar", 7));
514 }
515
TestQuoteMetaSimpleNegative()516 static void TestQuoteMetaSimpleNegative() {
517 NegativeTestQuoteMeta("foo", "bar");
518 NegativeTestQuoteMeta("...", "bar");
519 NegativeTestQuoteMeta("\\.", ".");
520 NegativeTestQuoteMeta("\\.", "..");
521 NegativeTestQuoteMeta("(a)", "a");
522 NegativeTestQuoteMeta("(a|b)", "a");
523 NegativeTestQuoteMeta("(a|b)", "(a)");
524 NegativeTestQuoteMeta("(a|b)", "a|b");
525 NegativeTestQuoteMeta("[0-9]", "0");
526 NegativeTestQuoteMeta("[0-9]", "0-9");
527 NegativeTestQuoteMeta("[0-9]", "[9]");
528 NegativeTestQuoteMeta("((?!)xxx)", "xxx");
529 }
530
TestQuoteMetaLatin1()531 static void TestQuoteMetaLatin1() {
532 TestQuoteMeta("3\xb2 = 9");
533 }
534
TestQuoteMetaUtf8()535 static void TestQuoteMetaUtf8() {
536 #ifdef SUPPORT_UTF8
537 TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());
538 TestQuoteMeta("xyz", pcrecpp::UTF8()); // No fancy utf8
539 TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8()); // 2-byte utf8 (degree symbol)
540 TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8()); // As a middle character
541 TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8()); // 3-byte utf8 (double prime)
542 TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note)
543 TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work
544 NegativeTestQuoteMeta("27\xc2\xb0", // 2-byte utf (degree symbol)
545 "27\\\xc2\\\xb0",
546 pcrecpp::UTF8());
547 #endif
548 }
549
TestQuoteMetaAll()550 static void TestQuoteMetaAll() {
551 printf("Testing QuoteMeta\n");
552 TestQuotaMetaSimple();
553 TestQuoteMetaSimpleNegative();
554 TestQuoteMetaLatin1();
555 TestQuoteMetaUtf8();
556 }
557
558 //
559 // Options tests contributed by
560 // Giuseppe Maxia, CTO, Stardata s.r.l.
561 // July 2005
562 //
GetOneOptionResult(const char * option_name,const char * regex,const char * str,RE_Options options,bool full,string expected)563 static void GetOneOptionResult(
564 const char *option_name,
565 const char *regex,
566 const char *str,
567 RE_Options options,
568 bool full,
569 string expected) {
570
571 printf("Testing Option <%s>\n", option_name);
572 if(VERBOSE_TEST)
573 printf("/%s/ finds \"%s\" within \"%s\" \n",
574 regex,
575 expected.c_str(),
576 str);
577 string captured("");
578 if (full)
579 RE(regex,options).FullMatch(str, &captured);
580 else
581 RE(regex,options).PartialMatch(str, &captured);
582 CHECK_EQ(captured, expected);
583 }
584
TestOneOption(const char * option_name,const char * regex,const char * str,RE_Options options,bool full,bool assertive=true)585 static void TestOneOption(
586 const char *option_name,
587 const char *regex,
588 const char *str,
589 RE_Options options,
590 bool full,
591 bool assertive = true) {
592
593 printf("Testing Option <%s>\n", option_name);
594 if (VERBOSE_TEST)
595 printf("'%s' %s /%s/ \n",
596 str,
597 (assertive? "matches" : "doesn't match"),
598 regex);
599 if (assertive) {
600 if (full)
601 CHECK(RE(regex,options).FullMatch(str));
602 else
603 CHECK(RE(regex,options).PartialMatch(str));
604 } else {
605 if (full)
606 CHECK(!RE(regex,options).FullMatch(str));
607 else
608 CHECK(!RE(regex,options).PartialMatch(str));
609 }
610 }
611
Test_CASELESS()612 static void Test_CASELESS() {
613 RE_Options options;
614 RE_Options options2;
615
616 options.set_caseless(true);
617 TestOneOption("CASELESS (class)", "HELLO", "hello", options, false);
618 TestOneOption("CASELESS (class2)", "HELLO", "hello", options2.set_caseless(true), false);
619 TestOneOption("CASELESS (class)", "^[A-Z]+$", "Hello", options, false);
620
621 TestOneOption("CASELESS (function)", "HELLO", "hello", pcrecpp::CASELESS(), false);
622 TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
623 options.set_caseless(false);
624 TestOneOption("no CASELESS", "HELLO", "hello", options, false, false);
625 }
626
Test_MULTILINE()627 static void Test_MULTILINE() {
628 RE_Options options;
629 RE_Options options2;
630 const char *str = "HELLO\n" "cruel\n" "world\n";
631
632 options.set_multiline(true);
633 TestOneOption("MULTILINE (class)", "^cruel$", str, options, false);
634 TestOneOption("MULTILINE (class2)", "^cruel$", str, options2.set_multiline(true), false);
635 TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false);
636 options.set_multiline(false);
637 TestOneOption("no MULTILINE", "^cruel$", str, options, false, false);
638 }
639
Test_DOTALL()640 static void Test_DOTALL() {
641 RE_Options options;
642 RE_Options options2;
643 const char *str = "HELLO\n" "cruel\n" "world";
644
645 options.set_dotall(true);
646 TestOneOption("DOTALL (class)", "HELLO.*world", str, options, true);
647 TestOneOption("DOTALL (class2)", "HELLO.*world", str, options2.set_dotall(true), true);
648 TestOneOption("DOTALL (function)", "HELLO.*world", str, pcrecpp::DOTALL(), true);
649 options.set_dotall(false);
650 TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false);
651 }
652
Test_DOLLAR_ENDONLY()653 static void Test_DOLLAR_ENDONLY() {
654 RE_Options options;
655 RE_Options options2;
656 const char *str = "HELLO world\n";
657
658 TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false);
659 options.set_dollar_endonly(true);
660 TestOneOption("DOLLAR_ENDONLY 1", "world$", str, options, false, false);
661 TestOneOption("DOLLAR_ENDONLY 2", "world$", str, options2.set_dollar_endonly(true), false, false);
662 }
663
Test_EXTRA()664 static void Test_EXTRA() {
665 RE_Options options;
666 const char *str = "HELLO";
667
668 options.set_extra(true);
669 TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false );
670 TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false );
671 options.set_extra(false);
672 TestOneOption("no EXTRA", "\\HELL\\O", str, options, true );
673 }
674
Test_EXTENDED()675 static void Test_EXTENDED() {
676 RE_Options options;
677 RE_Options options2;
678 const char *str = "HELLO world";
679
680 options.set_extended(true);
681 TestOneOption("EXTENDED (class)", "HELLO world", str, options, false, false);
682 TestOneOption("EXTENDED (class2)", "HELLO world", str, options2.set_extended(true), false, false);
683 TestOneOption("EXTENDED (class)",
684 "^ HE L{2} O "
685 "\\s+ "
686 "\\w+ $ ",
687 str,
688 options,
689 false);
690
691 TestOneOption("EXTENDED (function)", "HELLO world", str, pcrecpp::EXTENDED(), false, false);
692 TestOneOption("EXTENDED (function)",
693 "^ HE L{2} O "
694 "\\s+ "
695 "\\w+ $ ",
696 str,
697 pcrecpp::EXTENDED(),
698 false);
699
700 options.set_extended(false);
701 TestOneOption("no EXTENDED", "HELLO world", str, options, false);
702 }
703
Test_NO_AUTO_CAPTURE()704 static void Test_NO_AUTO_CAPTURE() {
705 RE_Options options;
706 const char *str = "HELLO world";
707 string captured;
708
709 printf("Testing Option <no NO_AUTO_CAPTURE>\n");
710 if (VERBOSE_TEST)
711 printf("parentheses capture text\n");
712 RE re("(world|universe)$", options);
713 CHECK(re.Extract("\\1", str , &captured));
714 CHECK_EQ(captured, "world");
715 options.set_no_auto_capture(true);
716 printf("testing Option <NO_AUTO_CAPTURE>\n");
717 if (VERBOSE_TEST)
718 printf("parentheses do not capture text\n");
719 re.Extract("\\1",str, &captured );
720 CHECK_EQ(captured, "world");
721 }
722
Test_UNGREEDY()723 static void Test_UNGREEDY() {
724 RE_Options options;
725 const char *str = "HELLO, 'this' is the 'world'";
726
727 options.set_ungreedy(true);
728 GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" );
729 GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" );
730 GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" );
731
732 options.set_ungreedy(false);
733 GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" );
734 GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" );
735 }
736
Test_all_options()737 static void Test_all_options() {
738 const char *str = "HELLO\n" "cruel\n" "world";
739 RE_Options options;
740 options.set_all_options(PCRE_CASELESS | PCRE_DOTALL);
741
742 TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false);
743 options.set_all_options(0);
744 TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false);
745 options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED);
746
747 TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false);
748 TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
749 " ^ c r u e l $ ",
750 str,
751 RE_Options(PCRE_MULTILINE | PCRE_EXTENDED),
752 false);
753
754 TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
755 " ^ c r u e l $ ",
756 str,
757 RE_Options()
758 .set_multiline(true)
759 .set_extended(true),
760 false);
761
762 options.set_all_options(0);
763 TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false);
764
765 }
766
TestOptions()767 static void TestOptions() {
768 printf("Testing Options\n");
769 Test_CASELESS();
770 Test_MULTILINE();
771 Test_DOTALL();
772 Test_DOLLAR_ENDONLY();
773 Test_EXTENDED();
774 Test_NO_AUTO_CAPTURE();
775 Test_UNGREEDY();
776 Test_EXTRA();
777 Test_all_options();
778 }
779
TestConstructors()780 static void TestConstructors() {
781 printf("Testing constructors\n");
782
783 RE_Options options;
784 options.set_dotall(true);
785 const char *str = "HELLO\n" "cruel\n" "world";
786
787 RE orig("HELLO.*world", options);
788 CHECK(orig.FullMatch(str));
789
790 RE copy1(orig);
791 CHECK(copy1.FullMatch(str));
792
793 RE copy2("not a match");
794 CHECK(!copy2.FullMatch(str));
795 copy2 = copy1;
796 CHECK(copy2.FullMatch(str));
797 copy2 = orig;
798 CHECK(copy2.FullMatch(str));
799
800 // Make sure when we assign to ourselves, nothing bad happens
801 orig = orig;
802 copy1 = copy1;
803 copy2 = copy2;
804 CHECK(orig.FullMatch(str));
805 CHECK(copy1.FullMatch(str));
806 CHECK(copy2.FullMatch(str));
807 }
808
main(int argc,char ** argv)809 int main(int argc, char** argv) {
810 // Treat any flag as --help
811 if (argc > 1 && argv[1][0] == '-') {
812 printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
813 " If 'timingX ###' is specified, run the given timing test\n"
814 " with the given number of iterations, rather than running\n"
815 " the default corectness test.\n", argv[0]);
816 return 0;
817 }
818
819 if (argc > 1) {
820 if ( argc == 2 || atoi(argv[2]) == 0) {
821 printf("timing mode needs a num-iters argument\n");
822 return 1;
823 }
824 if (!strcmp(argv[1], "timing1"))
825 Timing1(atoi(argv[2]));
826 else if (!strcmp(argv[1], "timing2"))
827 Timing2(atoi(argv[2]));
828 else if (!strcmp(argv[1], "timing3"))
829 Timing3(atoi(argv[2]));
830 else
831 printf("Unknown argument '%s'\n", argv[1]);
832 return 0;
833 }
834
835 printf("Testing FullMatch\n");
836
837 int i;
838 string s;
839
840 /***** FullMatch with no args *****/
841
842 CHECK(RE("h.*o").FullMatch("hello"));
843 CHECK(!RE("h.*o").FullMatch("othello")); // Must be anchored at front
844 CHECK(!RE("h.*o").FullMatch("hello!")); // Must be anchored at end
845 CHECK(RE("a*").FullMatch("aaaa")); // Fullmatch with normal op
846 CHECK(RE("a*?").FullMatch("aaaa")); // Fullmatch with nongreedy op
847 CHECK(RE("a*?\\z").FullMatch("aaaa")); // Two unusual ops
848
849 /***** FullMatch with args *****/
850
851 // Zero-arg
852 CHECK(RE("\\d+").FullMatch("1001"));
853
854 // Single-arg
855 CHECK(RE("(\\d+)").FullMatch("1001", &i));
856 CHECK_EQ(i, 1001);
857 CHECK(RE("(-?\\d+)").FullMatch("-123", &i));
858 CHECK_EQ(i, -123);
859 CHECK(!RE("()\\d+").FullMatch("10", &i));
860 CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
861 &i));
862
863 // Digits surrounding integer-arg
864 CHECK(RE("1(\\d*)4").FullMatch("1234", &i));
865 CHECK_EQ(i, 23);
866 CHECK(RE("(\\d)\\d+").FullMatch("1234", &i));
867 CHECK_EQ(i, 1);
868 CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i));
869 CHECK_EQ(i, -1);
870 CHECK(RE("(\\d)").PartialMatch("1234", &i));
871 CHECK_EQ(i, 1);
872 CHECK(RE("(-\\d)").PartialMatch("-1234", &i));
873 CHECK_EQ(i, -1);
874
875 // String-arg
876 CHECK(RE("h(.*)o").FullMatch("hello", &s));
877 CHECK_EQ(s, string("ell"));
878
879 // StringPiece-arg
880 StringPiece sp;
881 CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i));
882 CHECK_EQ(sp.size(), 4);
883 CHECK(memcmp(sp.data(), "ruby", 4) == 0);
884 CHECK_EQ(i, 1234);
885
886 // Multi-arg
887 CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i));
888 CHECK_EQ(s, string("ruby"));
889 CHECK_EQ(i, 1234);
890
891 // Ignore non-void* NULL arg
892 CHECK(RE("he(.*)lo").FullMatch("hello", (char*)NULL));
893 CHECK(RE("h(.*)o").FullMatch("hello", (string*)NULL));
894 CHECK(RE("h(.*)o").FullMatch("hello", (StringPiece*)NULL));
895 CHECK(RE("(.*)").FullMatch("1234", (int*)NULL));
896 #ifdef HAVE_LONG_LONG
897 CHECK(RE("(.*)").FullMatch("1234567890123456", (long long*)NULL));
898 #endif
899 CHECK(RE("(.*)").FullMatch("123.4567890123456", (double*)NULL));
900 CHECK(RE("(.*)").FullMatch("123.4567890123456", (float*)NULL));
901
902 // Fail on non-void* NULL arg if the match doesn't parse for the given type.
903 CHECK(!RE("h(.*)lo").FullMatch("hello", &s, (char*)NULL));
904 CHECK(!RE("(.*)").FullMatch("hello", (int*)NULL));
905 CHECK(!RE("(.*)").FullMatch("1234567890123456", (int*)NULL));
906 CHECK(!RE("(.*)").FullMatch("hello", (double*)NULL));
907 CHECK(!RE("(.*)").FullMatch("hello", (float*)NULL));
908
909 // Ignored arg
910 CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
911 CHECK_EQ(s, string("ruby"));
912 CHECK_EQ(i, 1234);
913
914 // Type tests
915 {
916 char c;
917 CHECK(RE("(H)ello").FullMatch("Hello", &c));
918 CHECK_EQ(c, 'H');
919 }
920 {
921 unsigned char c;
922 CHECK(RE("(H)ello").FullMatch("Hello", &c));
923 CHECK_EQ(c, static_cast<unsigned char>('H'));
924 }
925 {
926 short v;
927 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
928 CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
929 CHECK(RE("(-?\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
930 CHECK(RE("(-?\\d+)").FullMatch("-32768", &v)); CHECK_EQ(v, -32768);
931 CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v));
932 CHECK(!RE("(-?\\d+)").FullMatch("32768", &v));
933 }
934 {
935 unsigned short v;
936 CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
937 CHECK(RE("(\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
938 CHECK(RE("(\\d+)").FullMatch("65535", &v)); CHECK_EQ(v, 65535);
939 CHECK(!RE("(\\d+)").FullMatch("65536", &v));
940 }
941 {
942 int v;
943 static const int max_value = 0x7fffffff;
944 static const int min_value = -max_value - 1;
945 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
946 CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
947 CHECK(RE("(-?\\d+)").FullMatch("2147483647", &v)); CHECK_EQ(v, max_value);
948 CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value);
949 CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v));
950 CHECK(!RE("(-?\\d+)").FullMatch("2147483648", &v));
951 }
952 {
953 unsigned int v;
954 static const unsigned int max_value = 0xfffffffful;
955 CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
956 CHECK(RE("(\\d+)").FullMatch("4294967295", &v)); CHECK_EQ(v, max_value);
957 CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
958 }
959 #ifdef HAVE_LONG_LONG
960 # if defined(__MINGW__) || defined(__MINGW32__)
961 # define LLD "%I64d"
962 # define LLU "%I64u"
963 # else
964 # define LLD "%lld"
965 # define LLU "%llu"
966 # endif
967 {
968 long long v;
969 static const long long max_value = 0x7fffffffffffffffLL;
970 static const long long min_value = -max_value - 1;
971 char buf[32]; // definitely big enough for a long long
972
973 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
974 CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
975
976 sprintf(buf, LLD, max_value);
977 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
978
979 sprintf(buf, LLD, min_value);
980 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
981
982 sprintf(buf, LLD, max_value);
983 assert(buf[strlen(buf)-1] != '9');
984 buf[strlen(buf)-1]++;
985 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
986
987 sprintf(buf, LLD, min_value);
988 assert(buf[strlen(buf)-1] != '9');
989 buf[strlen(buf)-1]++;
990 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
991 }
992 #endif
993 #if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
994 {
995 unsigned long long v;
996 long long v2;
997 static const unsigned long long max_value = 0xffffffffffffffffULL;
998 char buf[32]; // definitely big enough for a unsigned long long
999
1000 CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
1001 CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
1002
1003 sprintf(buf, LLU, max_value);
1004 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
1005
1006 assert(buf[strlen(buf)-1] != '9');
1007 buf[strlen(buf)-1]++;
1008 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
1009 }
1010 #endif
1011 {
1012 float v;
1013 CHECK(RE("(.*)").FullMatch("100", &v));
1014 CHECK(RE("(.*)").FullMatch("-100.", &v));
1015 CHECK(RE("(.*)").FullMatch("1e23", &v));
1016 }
1017 {
1018 double v;
1019 CHECK(RE("(.*)").FullMatch("100", &v));
1020 CHECK(RE("(.*)").FullMatch("-100.", &v));
1021 CHECK(RE("(.*)").FullMatch("1e23", &v));
1022 }
1023
1024 // Check that matching is fully anchored
1025 CHECK(!RE("(\\d+)").FullMatch("x1001", &i));
1026 CHECK(!RE("(\\d+)").FullMatch("1001x", &i));
1027 CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001);
1028 CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001);
1029
1030 // Braces
1031 CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
1032 CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
1033 CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
1034
1035 // Complicated RE
1036 CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
1037 CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
1038 CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
1039 CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
1040
1041 // Check full-match handling (needs '$' tacked on internally)
1042 CHECK(RE("fo|foo").FullMatch("fo"));
1043 CHECK(RE("fo|foo").FullMatch("foo"));
1044 CHECK(RE("fo|foo$").FullMatch("fo"));
1045 CHECK(RE("fo|foo$").FullMatch("foo"));
1046 CHECK(RE("foo$").FullMatch("foo"));
1047 CHECK(!RE("foo\\$").FullMatch("foo$bar"));
1048 CHECK(!RE("fo|bar").FullMatch("fox"));
1049
1050 // Uncomment the following if we change the handling of '$' to
1051 // prevent it from matching a trailing newline
1052 if (false) {
1053 // Check that we don't get bitten by pcre's special handling of a
1054 // '\n' at the end of the string matching '$'
1055 CHECK(!RE("foo$").PartialMatch("foo\n"));
1056 }
1057
1058 // Number of args
1059 int a[16];
1060 CHECK(RE("").FullMatch(""));
1061
1062 memset(a, 0, sizeof(0));
1063 CHECK(RE("(\\d){1}").FullMatch("1",
1064 &a[0]));
1065 CHECK_EQ(a[0], 1);
1066
1067 memset(a, 0, sizeof(0));
1068 CHECK(RE("(\\d)(\\d)").FullMatch("12",
1069 &a[0], &a[1]));
1070 CHECK_EQ(a[0], 1);
1071 CHECK_EQ(a[1], 2);
1072
1073 memset(a, 0, sizeof(0));
1074 CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
1075 &a[0], &a[1], &a[2]));
1076 CHECK_EQ(a[0], 1);
1077 CHECK_EQ(a[1], 2);
1078 CHECK_EQ(a[2], 3);
1079
1080 memset(a, 0, sizeof(0));
1081 CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
1082 &a[0], &a[1], &a[2], &a[3]));
1083 CHECK_EQ(a[0], 1);
1084 CHECK_EQ(a[1], 2);
1085 CHECK_EQ(a[2], 3);
1086 CHECK_EQ(a[3], 4);
1087
1088 memset(a, 0, sizeof(0));
1089 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
1090 &a[0], &a[1], &a[2],
1091 &a[3], &a[4]));
1092 CHECK_EQ(a[0], 1);
1093 CHECK_EQ(a[1], 2);
1094 CHECK_EQ(a[2], 3);
1095 CHECK_EQ(a[3], 4);
1096 CHECK_EQ(a[4], 5);
1097
1098 memset(a, 0, sizeof(0));
1099 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
1100 &a[0], &a[1], &a[2],
1101 &a[3], &a[4], &a[5]));
1102 CHECK_EQ(a[0], 1);
1103 CHECK_EQ(a[1], 2);
1104 CHECK_EQ(a[2], 3);
1105 CHECK_EQ(a[3], 4);
1106 CHECK_EQ(a[4], 5);
1107 CHECK_EQ(a[5], 6);
1108
1109 memset(a, 0, sizeof(0));
1110 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
1111 &a[0], &a[1], &a[2], &a[3],
1112 &a[4], &a[5], &a[6]));
1113 CHECK_EQ(a[0], 1);
1114 CHECK_EQ(a[1], 2);
1115 CHECK_EQ(a[2], 3);
1116 CHECK_EQ(a[3], 4);
1117 CHECK_EQ(a[4], 5);
1118 CHECK_EQ(a[5], 6);
1119 CHECK_EQ(a[6], 7);
1120
1121 memset(a, 0, sizeof(0));
1122 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
1123 "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
1124 "1234567890123456",
1125 &a[0], &a[1], &a[2], &a[3],
1126 &a[4], &a[5], &a[6], &a[7],
1127 &a[8], &a[9], &a[10], &a[11],
1128 &a[12], &a[13], &a[14], &a[15]));
1129 CHECK_EQ(a[0], 1);
1130 CHECK_EQ(a[1], 2);
1131 CHECK_EQ(a[2], 3);
1132 CHECK_EQ(a[3], 4);
1133 CHECK_EQ(a[4], 5);
1134 CHECK_EQ(a[5], 6);
1135 CHECK_EQ(a[6], 7);
1136 CHECK_EQ(a[7], 8);
1137 CHECK_EQ(a[8], 9);
1138 CHECK_EQ(a[9], 0);
1139 CHECK_EQ(a[10], 1);
1140 CHECK_EQ(a[11], 2);
1141 CHECK_EQ(a[12], 3);
1142 CHECK_EQ(a[13], 4);
1143 CHECK_EQ(a[14], 5);
1144 CHECK_EQ(a[15], 6);
1145
1146 /***** PartialMatch *****/
1147
1148 printf("Testing PartialMatch\n");
1149
1150 CHECK(RE("h.*o").PartialMatch("hello"));
1151 CHECK(RE("h.*o").PartialMatch("othello"));
1152 CHECK(RE("h.*o").PartialMatch("hello!"));
1153 CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
1154
1155 /***** other tests *****/
1156
1157 RadixTests();
1158 TestReplace();
1159 TestExtract();
1160 TestConsume();
1161 TestFindAndConsume();
1162 TestQuoteMetaAll();
1163 TestMatchNumberPeculiarity();
1164
1165 // Check the pattern() accessor
1166 {
1167 const string kPattern = "http://([^/]+)/.*";
1168 const RE re(kPattern);
1169 CHECK_EQ(kPattern, re.pattern());
1170 }
1171
1172 // Check RE error field.
1173 {
1174 RE re("foo");
1175 CHECK(re.error().empty()); // Must have no error
1176 }
1177
1178 #ifdef SUPPORT_UTF8
1179 // Check UTF-8 handling
1180 {
1181 printf("Testing UTF-8 handling\n");
1182
1183 // Three Japanese characters (nihongo)
1184 const unsigned char utf8_string[] = {
1185 0xe6, 0x97, 0xa5, // 65e5
1186 0xe6, 0x9c, 0xac, // 627c
1187 0xe8, 0xaa, 0x9e, // 8a9e
1188 0
1189 };
1190 const unsigned char utf8_pattern[] = {
1191 '.',
1192 0xe6, 0x9c, 0xac, // 627c
1193 '.',
1194 0
1195 };
1196
1197 // Both should match in either mode, bytes or UTF-8
1198 RE re_test1(".........");
1199 CHECK(re_test1.FullMatch(utf8_string));
1200 RE re_test2("...", pcrecpp::UTF8());
1201 CHECK(re_test2.FullMatch(utf8_string));
1202
1203 // Check that '.' matches one byte or UTF-8 character
1204 // according to the mode.
1205 string ss;
1206 RE re_test3("(.)");
1207 CHECK(re_test3.PartialMatch(utf8_string, &ss));
1208 CHECK_EQ(ss, string("\xe6"));
1209 RE re_test4("(.)", pcrecpp::UTF8());
1210 CHECK(re_test4.PartialMatch(utf8_string, &ss));
1211 CHECK_EQ(ss, string("\xe6\x97\xa5"));
1212
1213 // Check that string matches itself in either mode
1214 RE re_test5(utf8_string);
1215 CHECK(re_test5.FullMatch(utf8_string));
1216 RE re_test6(utf8_string, pcrecpp::UTF8());
1217 CHECK(re_test6.FullMatch(utf8_string));
1218
1219 // Check that pattern matches string only in UTF8 mode
1220 RE re_test7(utf8_pattern);
1221 CHECK(!re_test7.FullMatch(utf8_string));
1222 RE re_test8(utf8_pattern, pcrecpp::UTF8());
1223 CHECK(re_test8.FullMatch(utf8_string));
1224 }
1225
1226 // Check that ungreedy, UTF8 regular expressions don't match when they
1227 // oughtn't -- see bug 82246.
1228 {
1229 // This code always worked.
1230 const char* pattern = "\\w+X";
1231 const string target = "a aX";
1232 RE match_sentence(pattern);
1233 RE match_sentence_re(pattern, pcrecpp::UTF8());
1234
1235 CHECK(!match_sentence.FullMatch(target));
1236 CHECK(!match_sentence_re.FullMatch(target));
1237 }
1238
1239 {
1240 const char* pattern = "(?U)\\w+X";
1241 const string target = "a aX";
1242 RE match_sentence(pattern);
1243 RE match_sentence_re(pattern, pcrecpp::UTF8());
1244
1245 CHECK(!match_sentence.FullMatch(target));
1246 CHECK(!match_sentence_re.FullMatch(target));
1247 }
1248 #endif /* def SUPPORT_UTF8 */
1249
1250 printf("Testing error reporting\n");
1251
1252 { RE re("a\\1"); CHECK(!re.error().empty()); }
1253 {
1254 RE re("a[x");
1255 CHECK(!re.error().empty());
1256 }
1257 {
1258 RE re("a[z-a]");
1259 CHECK(!re.error().empty());
1260 }
1261 {
1262 RE re("a[[:foobar:]]");
1263 CHECK(!re.error().empty());
1264 }
1265 {
1266 RE re("a(b");
1267 CHECK(!re.error().empty());
1268 }
1269 {
1270 RE re("a\\");
1271 CHECK(!re.error().empty());
1272 }
1273
1274 // Test that recursion is stopped
1275 TestRecursion();
1276
1277 // Test Options
1278 if (getenv("VERBOSE_TEST") != NULL)
1279 VERBOSE_TEST = true;
1280 TestOptions();
1281
1282 // Test the constructors
1283 TestConstructors();
1284
1285 // Done
1286 printf("OK\n");
1287
1288 return 0;
1289 }
1290