• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // -*- coding: utf-8 -*-
2 // Copyright 2002-2009 The RE2 Authors.  All Rights Reserved.
3 // Use of this source code is governed by a BSD-style
4 // license that can be found in the LICENSE file.
5 
6 // TODO: Test extractions for PartialMatch/Consume
7 
8 #include <errno.h>
9 #include <stddef.h>
10 #include <stdint.h>
11 #include <string.h>
12 #include <map>
13 #include <string>
14 #include <utility>
15 #include <vector>
16 #if !defined(_MSC_VER) && !defined(__CYGWIN__) && !defined(__MINGW32__)
17 #include <sys/mman.h>
18 #include <unistd.h>  /* for sysconf */
19 #endif
20 
21 #include "util/test.h"
22 #include "util/logging.h"
23 #include "util/strutil.h"
24 #include "re2/re2.h"
25 #include "re2/regexp.h"
26 
27 namespace re2 {
28 
TEST(RE2,HexTests)29 TEST(RE2, HexTests) {
30 #define ASSERT_HEX(type, value)                                         \
31   do {                                                                  \
32     type v;                                                             \
33     ASSERT_TRUE(                                                        \
34         RE2::FullMatch(#value, "([0-9a-fA-F]+)[uUlL]*", RE2::Hex(&v))); \
35     ASSERT_EQ(v, 0x##value);                                            \
36     ASSERT_TRUE(RE2::FullMatch("0x" #value, "([0-9a-fA-FxX]+)[uUlL]*",  \
37                                RE2::CRadix(&v)));                       \
38     ASSERT_EQ(v, 0x##value);                                            \
39   } while (0)
40 
41   ASSERT_HEX(short,              2bad);
42   ASSERT_HEX(unsigned short,     2badU);
43   ASSERT_HEX(int,                dead);
44   ASSERT_HEX(unsigned int,       deadU);
45   ASSERT_HEX(long,               7eadbeefL);
46   ASSERT_HEX(unsigned long,      deadbeefUL);
47   ASSERT_HEX(long long,          12345678deadbeefLL);
48   ASSERT_HEX(unsigned long long, cafebabedeadbeefULL);
49 
50 #undef ASSERT_HEX
51 }
52 
TEST(RE2,OctalTests)53 TEST(RE2, OctalTests) {
54 #define ASSERT_OCTAL(type, value)                                           \
55   do {                                                                      \
56     type v;                                                                 \
57     ASSERT_TRUE(RE2::FullMatch(#value, "([0-7]+)[uUlL]*", RE2::Octal(&v))); \
58     ASSERT_EQ(v, 0##value);                                                 \
59     ASSERT_TRUE(RE2::FullMatch("0" #value, "([0-9a-fA-FxX]+)[uUlL]*",       \
60                                RE2::CRadix(&v)));                           \
61     ASSERT_EQ(v, 0##value);                                                 \
62   } while (0)
63 
64   ASSERT_OCTAL(short,              77777);
65   ASSERT_OCTAL(unsigned short,     177777U);
66   ASSERT_OCTAL(int,                17777777777);
67   ASSERT_OCTAL(unsigned int,       37777777777U);
68   ASSERT_OCTAL(long,               17777777777L);
69   ASSERT_OCTAL(unsigned long,      37777777777UL);
70   ASSERT_OCTAL(long long,          777777777777777777777LL);
71   ASSERT_OCTAL(unsigned long long, 1777777777777777777777ULL);
72 
73 #undef ASSERT_OCTAL
74 }
75 
TEST(RE2,DecimalTests)76 TEST(RE2, DecimalTests) {
77 #define ASSERT_DECIMAL(type, value)                                            \
78   do {                                                                         \
79     type v;                                                                    \
80     ASSERT_TRUE(RE2::FullMatch(#value, "(-?[0-9]+)[uUlL]*", &v));              \
81     ASSERT_EQ(v, value);                                                       \
82     ASSERT_TRUE(                                                               \
83         RE2::FullMatch(#value, "(-?[0-9a-fA-FxX]+)[uUlL]*", RE2::CRadix(&v))); \
84     ASSERT_EQ(v, value);                                                       \
85   } while (0)
86 
87   ASSERT_DECIMAL(short,              -1);
88   ASSERT_DECIMAL(unsigned short,     9999);
89   ASSERT_DECIMAL(int,                -1000);
90   ASSERT_DECIMAL(unsigned int,       12345U);
91   ASSERT_DECIMAL(long,               -10000000L);
92   ASSERT_DECIMAL(unsigned long,      3083324652U);
93   ASSERT_DECIMAL(long long,          -100000000000000LL);
94   ASSERT_DECIMAL(unsigned long long, 1234567890987654321ULL);
95 
96 #undef ASSERT_DECIMAL
97 }
98 
TEST(RE2,Replace)99 TEST(RE2, Replace) {
100   struct ReplaceTest {
101     const char *regexp;
102     const char *rewrite;
103     const char *original;
104     const char *single;
105     const char *global;
106     int        greplace_count;
107   };
108   static const ReplaceTest tests[] = {
109     { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
110       "\\2\\1ay",
111       "the quick brown fox jumps over the lazy dogs.",
112       "ethay quick brown fox jumps over the lazy dogs.",
113       "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.",
114       9 },
115     { "\\w+",
116       "\\0-NOSPAM",
117       "abcd.efghi@google.com",
118       "abcd-NOSPAM.efghi@google.com",
119       "abcd-NOSPAM.efghi-NOSPAM@google-NOSPAM.com-NOSPAM",
120       4 },
121     { "^",
122       "(START)",
123       "foo",
124       "(START)foo",
125       "(START)foo",
126       1 },
127     { "^",
128       "(START)",
129       "",
130       "(START)",
131       "(START)",
132       1 },
133     { "$",
134       "(END)",
135       "",
136       "(END)",
137       "(END)",
138       1 },
139     { "b",
140       "bb",
141       "ababababab",
142       "abbabababab",
143       "abbabbabbabbabb",
144       5 },
145     { "b",
146       "bb",
147       "bbbbbb",
148       "bbbbbbb",
149       "bbbbbbbbbbbb",
150       6 },
151     { "b+",
152       "bb",
153       "bbbbbb",
154       "bb",
155       "bb",
156       1 },
157     { "b*",
158       "bb",
159       "bbbbbb",
160       "bb",
161       "bb",
162       1 },
163     { "b*",
164       "bb",
165       "aaaaa",
166       "bbaaaaa",
167       "bbabbabbabbabbabb",
168       6 },
169     // Check newline handling
170     { "a.*a",
171       "(\\0)",
172       "aba\naba",
173       "(aba)\naba",
174       "(aba)\n(aba)",
175       2 },
176     { "", NULL, NULL, NULL, NULL, 0 }
177   };
178 
179   for (const ReplaceTest* t = tests; t->original != NULL; t++) {
180     std::string one(t->original);
181     ASSERT_TRUE(RE2::Replace(&one, t->regexp, t->rewrite));
182     ASSERT_EQ(one, t->single);
183     std::string all(t->original);
184     ASSERT_EQ(RE2::GlobalReplace(&all, t->regexp, t->rewrite), t->greplace_count)
185       << "Got: " << all;
186     ASSERT_EQ(all, t->global);
187   }
188 }
189 
TestCheckRewriteString(const char * regexp,const char * rewrite,bool expect_ok)190 static void TestCheckRewriteString(const char* regexp, const char* rewrite,
191                               bool expect_ok) {
192   std::string error;
193   RE2 exp(regexp);
194   bool actual_ok = exp.CheckRewriteString(rewrite, &error);
195   EXPECT_EQ(expect_ok, actual_ok) << " for " << rewrite << " error: " << error;
196 }
197 
TEST(CheckRewriteString,all)198 TEST(CheckRewriteString, all) {
199   TestCheckRewriteString("abc", "foo", true);
200   TestCheckRewriteString("abc", "foo\\", false);
201   TestCheckRewriteString("abc", "foo\\0bar", true);
202 
203   TestCheckRewriteString("a(b)c", "foo", true);
204   TestCheckRewriteString("a(b)c", "foo\\0bar", true);
205   TestCheckRewriteString("a(b)c", "foo\\1bar", true);
206   TestCheckRewriteString("a(b)c", "foo\\2bar", false);
207   TestCheckRewriteString("a(b)c", "f\\\\2o\\1o", true);
208 
209   TestCheckRewriteString("a(b)(c)", "foo\\12", true);
210   TestCheckRewriteString("a(b)(c)", "f\\2o\\1o", true);
211   TestCheckRewriteString("a(b)(c)", "f\\oo\\1", false);
212 }
213 
TEST(RE2,Extract)214 TEST(RE2, Extract) {
215   std::string s;
216 
217   ASSERT_TRUE(RE2::Extract("boris@kremvax.ru", "(.*)@([^.]*)", "\\2!\\1", &s));
218   ASSERT_EQ(s, "kremvax!boris");
219 
220   ASSERT_TRUE(RE2::Extract("foo", ".*", "'\\0'", &s));
221   ASSERT_EQ(s, "'foo'");
222   // check that false match doesn't overwrite
223   ASSERT_FALSE(RE2::Extract("baz", "bar", "'\\0'", &s));
224   ASSERT_EQ(s, "'foo'");
225 }
226 
TEST(RE2,MaxSubmatchTooLarge)227 TEST(RE2, MaxSubmatchTooLarge) {
228   std::string s;
229   ASSERT_FALSE(RE2::Extract("foo", "f(o+)", "\\1\\2", &s));
230   s = "foo";
231   ASSERT_FALSE(RE2::Replace(&s, "f(o+)", "\\1\\2"));
232   s = "foo";
233   ASSERT_FALSE(RE2::GlobalReplace(&s, "f(o+)", "\\1\\2"));
234 }
235 
TEST(RE2,Consume)236 TEST(RE2, Consume) {
237   RE2 r("\\s*(\\w+)");    // matches a word, possibly proceeded by whitespace
238   std::string word;
239 
240   std::string s("   aaa b!@#$@#$cccc");
241   StringPiece input(s);
242 
243   ASSERT_TRUE(RE2::Consume(&input, r, &word));
244   ASSERT_EQ(word, "aaa") << " input: " << input;
245   ASSERT_TRUE(RE2::Consume(&input, r, &word));
246   ASSERT_EQ(word, "b") << " input: " << input;
247   ASSERT_FALSE(RE2::Consume(&input, r, &word)) << " input: " << input;
248 }
249 
TEST(RE2,ConsumeN)250 TEST(RE2, ConsumeN) {
251   const std::string s(" one two three 4");
252   StringPiece input(s);
253 
254   RE2::Arg argv[2];
255   const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
256 
257   // 0 arg
258   EXPECT_TRUE(RE2::ConsumeN(&input, "\\s*(\\w+)", args, 0));  // Skips "one".
259 
260   // 1 arg
261   std::string word;
262   argv[0] = &word;
263   EXPECT_TRUE(RE2::ConsumeN(&input, "\\s*(\\w+)", args, 1));
264   EXPECT_EQ("two", word);
265 
266   // Multi-args
267   int n;
268   argv[1] = &n;
269   EXPECT_TRUE(RE2::ConsumeN(&input, "\\s*(\\w+)\\s*(\\d+)", args, 2));
270   EXPECT_EQ("three", word);
271   EXPECT_EQ(4, n);
272 }
273 
TEST(RE2,FindAndConsume)274 TEST(RE2, FindAndConsume) {
275   RE2 r("(\\w+)");      // matches a word
276   std::string word;
277 
278   std::string s("   aaa b!@#$@#$cccc");
279   StringPiece input(s);
280 
281   ASSERT_TRUE(RE2::FindAndConsume(&input, r, &word));
282   ASSERT_EQ(word, "aaa");
283   ASSERT_TRUE(RE2::FindAndConsume(&input, r, &word));
284   ASSERT_EQ(word, "b");
285   ASSERT_TRUE(RE2::FindAndConsume(&input, r, &word));
286   ASSERT_EQ(word, "cccc");
287   ASSERT_FALSE(RE2::FindAndConsume(&input, r, &word));
288 
289   // Check that FindAndConsume works without any submatches.
290   // Earlier version used uninitialized data for
291   // length to consume.
292   input = "aaa";
293   ASSERT_TRUE(RE2::FindAndConsume(&input, "aaa"));
294   ASSERT_EQ(input, "");
295 }
296 
TEST(RE2,FindAndConsumeN)297 TEST(RE2, FindAndConsumeN) {
298   const std::string s(" one two three 4");
299   StringPiece input(s);
300 
301   RE2::Arg argv[2];
302   const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
303 
304   // 0 arg
305   EXPECT_TRUE(RE2::FindAndConsumeN(&input, "(\\w+)", args, 0));  // Skips "one".
306 
307   // 1 arg
308   std::string word;
309   argv[0] = &word;
310   EXPECT_TRUE(RE2::FindAndConsumeN(&input, "(\\w+)", args, 1));
311   EXPECT_EQ("two", word);
312 
313   // Multi-args
314   int n;
315   argv[1] = &n;
316   EXPECT_TRUE(RE2::FindAndConsumeN(&input, "(\\w+)\\s*(\\d+)", args, 2));
317   EXPECT_EQ("three", word);
318   EXPECT_EQ(4, n);
319 }
320 
TEST(RE2,MatchNumberPeculiarity)321 TEST(RE2, MatchNumberPeculiarity) {
322   RE2 r("(foo)|(bar)|(baz)");
323   std::string word1;
324   std::string word2;
325   std::string word3;
326 
327   ASSERT_TRUE(RE2::PartialMatch("foo", r, &word1, &word2, &word3));
328   ASSERT_EQ(word1, "foo");
329   ASSERT_EQ(word2, "");
330   ASSERT_EQ(word3, "");
331   ASSERT_TRUE(RE2::PartialMatch("bar", r, &word1, &word2, &word3));
332   ASSERT_EQ(word1, "");
333   ASSERT_EQ(word2, "bar");
334   ASSERT_EQ(word3, "");
335   ASSERT_TRUE(RE2::PartialMatch("baz", r, &word1, &word2, &word3));
336   ASSERT_EQ(word1, "");
337   ASSERT_EQ(word2, "");
338   ASSERT_EQ(word3, "baz");
339   ASSERT_FALSE(RE2::PartialMatch("f", r, &word1, &word2, &word3));
340 
341   std::string a;
342   ASSERT_TRUE(RE2::FullMatch("hello", "(foo)|hello", &a));
343   ASSERT_EQ(a, "");
344 }
345 
TEST(RE2,Match)346 TEST(RE2, Match) {
347   RE2 re("((\\w+):([0-9]+))");   // extracts host and port
348   StringPiece group[4];
349 
350   // No match.
351   StringPiece s = "zyzzyva";
352   ASSERT_FALSE(
353       re.Match(s, 0, s.size(), RE2::UNANCHORED, group, arraysize(group)));
354 
355   // Matches and extracts.
356   s = "a chrisr:9000 here";
357   ASSERT_TRUE(
358       re.Match(s, 0, s.size(), RE2::UNANCHORED, group, arraysize(group)));
359   ASSERT_EQ(group[0], "chrisr:9000");
360   ASSERT_EQ(group[1], "chrisr:9000");
361   ASSERT_EQ(group[2], "chrisr");
362   ASSERT_EQ(group[3], "9000");
363 
364   std::string all, host;
365   int port;
366   ASSERT_TRUE(RE2::PartialMatch("a chrisr:9000 here", re, &all, &host, &port));
367   ASSERT_EQ(all, "chrisr:9000");
368   ASSERT_EQ(host, "chrisr");
369   ASSERT_EQ(port, 9000);
370 }
371 
TestRecursion(int size,const char * pattern)372 static void TestRecursion(int size, const char* pattern) {
373   // Fill up a string repeating the pattern given
374   std::string domain;
375   domain.resize(size);
376   size_t patlen = strlen(pattern);
377   for (int i = 0; i < size; i++) {
378     domain[i] = pattern[i % patlen];
379   }
380   // Just make sure it doesn't crash due to too much recursion.
381   RE2 re("([a-zA-Z0-9]|-)+(\\.([a-zA-Z0-9]|-)+)*(\\.)?", RE2::Quiet);
382   RE2::FullMatch(domain, re);
383 }
384 
385 // A meta-quoted string, interpreted as a pattern, should always match
386 // the original unquoted string.
TestQuoteMeta(const std::string & unquoted,const RE2::Options & options=RE2::DefaultOptions)387 static void TestQuoteMeta(const std::string& unquoted,
388                           const RE2::Options& options = RE2::DefaultOptions) {
389   std::string quoted = RE2::QuoteMeta(unquoted);
390   RE2 re(quoted, options);
391   EXPECT_TRUE(RE2::FullMatch(unquoted, re))
392       << "Unquoted='" << unquoted << "', quoted='" << quoted << "'.";
393 }
394 
395 // A meta-quoted string, interpreted as a pattern, should always match
396 // the original unquoted string.
NegativeTestQuoteMeta(const std::string & unquoted,const std::string & should_not_match,const RE2::Options & options=RE2::DefaultOptions)397 static void NegativeTestQuoteMeta(
398     const std::string& unquoted, const std::string& should_not_match,
399     const RE2::Options& options = RE2::DefaultOptions) {
400   std::string quoted = RE2::QuoteMeta(unquoted);
401   RE2 re(quoted, options);
402   EXPECT_FALSE(RE2::FullMatch(should_not_match, re))
403       << "Unquoted='" << unquoted << "', quoted='" << quoted << "'.";
404 }
405 
406 // Tests that quoted meta characters match their original strings,
407 // and that a few things that shouldn't match indeed do not.
TEST(QuoteMeta,Simple)408 TEST(QuoteMeta, Simple) {
409   TestQuoteMeta("foo");
410   TestQuoteMeta("foo.bar");
411   TestQuoteMeta("foo\\.bar");
412   TestQuoteMeta("[1-9]");
413   TestQuoteMeta("1.5-2.0?");
414   TestQuoteMeta("\\d");
415   TestQuoteMeta("Who doesn't like ice cream?");
416   TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
417   TestQuoteMeta("((?!)xxx).*yyy");
418   TestQuoteMeta("([");
419 }
TEST(QuoteMeta,SimpleNegative)420 TEST(QuoteMeta, SimpleNegative) {
421   NegativeTestQuoteMeta("foo", "bar");
422   NegativeTestQuoteMeta("...", "bar");
423   NegativeTestQuoteMeta("\\.", ".");
424   NegativeTestQuoteMeta("\\.", "..");
425   NegativeTestQuoteMeta("(a)", "a");
426   NegativeTestQuoteMeta("(a|b)", "a");
427   NegativeTestQuoteMeta("(a|b)", "(a)");
428   NegativeTestQuoteMeta("(a|b)", "a|b");
429   NegativeTestQuoteMeta("[0-9]", "0");
430   NegativeTestQuoteMeta("[0-9]", "0-9");
431   NegativeTestQuoteMeta("[0-9]", "[9]");
432   NegativeTestQuoteMeta("((?!)xxx)", "xxx");
433 }
434 
TEST(QuoteMeta,Latin1)435 TEST(QuoteMeta, Latin1) {
436   TestQuoteMeta("3\xb2 = 9", RE2::Latin1);
437 }
438 
TEST(QuoteMeta,UTF8)439 TEST(QuoteMeta, UTF8) {
440   TestQuoteMeta("Plácido Domingo");
441   TestQuoteMeta("xyz");  // No fancy utf8.
442   TestQuoteMeta("\xc2\xb0");  // 2-byte utf8 -- a degree symbol.
443   TestQuoteMeta("27\xc2\xb0 degrees");  // As a middle character.
444   TestQuoteMeta("\xe2\x80\xb3");  // 3-byte utf8 -- a double prime.
445   TestQuoteMeta("\xf0\x9d\x85\x9f");  // 4-byte utf8 -- a music note.
446   TestQuoteMeta("27\xc2\xb0");  // Interpreted as Latin-1, this should
447                                 // still work.
448   NegativeTestQuoteMeta("27\xc2\xb0",
449                         "27\\\xc2\\\xb0");  // 2-byte utf8 -- a degree symbol.
450 }
451 
TEST(QuoteMeta,HasNull)452 TEST(QuoteMeta, HasNull) {
453   std::string has_null;
454 
455   // string with one null character
456   has_null += '\0';
457   TestQuoteMeta(has_null);
458   NegativeTestQuoteMeta(has_null, "");
459 
460   // Don't want null-followed-by-'1' to be interpreted as '\01'.
461   has_null += '1';
462   TestQuoteMeta(has_null);
463   NegativeTestQuoteMeta(has_null, "\1");
464 }
465 
TEST(ProgramSize,BigProgram)466 TEST(ProgramSize, BigProgram) {
467   RE2 re_simple("simple regexp");
468   RE2 re_medium("medium.*regexp");
469   RE2 re_complex("complex.{1,128}regexp");
470 
471   ASSERT_GT(re_simple.ProgramSize(), 0);
472   ASSERT_GT(re_medium.ProgramSize(), re_simple.ProgramSize());
473   ASSERT_GT(re_complex.ProgramSize(), re_medium.ProgramSize());
474 
475   ASSERT_GT(re_simple.ReverseProgramSize(), 0);
476   ASSERT_GT(re_medium.ReverseProgramSize(), re_simple.ReverseProgramSize());
477   ASSERT_GT(re_complex.ReverseProgramSize(), re_medium.ReverseProgramSize());
478 }
479 
TEST(ProgramFanout,BigProgram)480 TEST(ProgramFanout, BigProgram) {
481   RE2 re1("(?:(?:(?:(?:(?:.)?){1})*)+)");
482   RE2 re10("(?:(?:(?:(?:(?:.)?){10})*)+)");
483   RE2 re100("(?:(?:(?:(?:(?:.)?){100})*)+)");
484   RE2 re1000("(?:(?:(?:(?:(?:.)?){1000})*)+)");
485 
486   std::vector<int> histogram;
487 
488   // 3 is the largest non-empty bucket and has 1 element.
489   ASSERT_EQ(3, re1.ProgramFanout(&histogram));
490   ASSERT_EQ(1, histogram[3]);
491 
492   // 6 is the largest non-empty bucket and has 10 elements.
493   ASSERT_EQ(6, re10.ProgramFanout(&histogram));
494   ASSERT_EQ(10, histogram[6]);
495 
496   // 9 is the largest non-empty bucket and has 100 elements.
497   ASSERT_EQ(9, re100.ProgramFanout(&histogram));
498   ASSERT_EQ(100, histogram[9]);
499 
500   // 13 is the largest non-empty bucket and has 1000 elements.
501   ASSERT_EQ(13, re1000.ProgramFanout(&histogram));
502   ASSERT_EQ(1000, histogram[13]);
503 
504   // 2 is the largest non-empty bucket and has 1 element.
505   ASSERT_EQ(2, re1.ReverseProgramFanout(&histogram));
506   ASSERT_EQ(1, histogram[2]);
507 
508   // 5 is the largest non-empty bucket and has 10 elements.
509   ASSERT_EQ(5, re10.ReverseProgramFanout(&histogram));
510   ASSERT_EQ(10, histogram[5]);
511 
512   // 9 is the largest non-empty bucket and has 100 elements.
513   ASSERT_EQ(9, re100.ReverseProgramFanout(&histogram));
514   ASSERT_EQ(100, histogram[9]);
515 
516   // 12 is the largest non-empty bucket and has 1000 elements.
517   ASSERT_EQ(12, re1000.ReverseProgramFanout(&histogram));
518   ASSERT_EQ(1000, histogram[12]);
519 }
520 
521 // Issue 956519: handling empty character sets was
522 // causing NULL dereference.  This tests a few empty character sets.
523 // (The way to get an empty character set is to negate a full one.)
TEST(EmptyCharset,Fuzz)524 TEST(EmptyCharset, Fuzz) {
525   static const char *empties[] = {
526     "[^\\S\\s]",
527     "[^\\S[:space:]]",
528     "[^\\D\\d]",
529     "[^\\D[:digit:]]"
530   };
531   for (size_t i = 0; i < arraysize(empties); i++)
532     ASSERT_FALSE(RE2(empties[i]).Match("abc", 0, 3, RE2::UNANCHORED, NULL, 0));
533 }
534 
535 // Bitstate assumes that kInstFail instructions in
536 // alternations or capture groups have been "compiled away".
TEST(EmptyCharset,BitstateAssumptions)537 TEST(EmptyCharset, BitstateAssumptions) {
538   // Captures trigger use of Bitstate.
539   static const char *nop_empties[] = {
540     "((((()))))" "[^\\S\\s]?",
541     "((((()))))" "([^\\S\\s])?",
542     "((((()))))" "([^\\S\\s]|[^\\S\\s])?",
543     "((((()))))" "(([^\\S\\s]|[^\\S\\s])|)"
544   };
545   StringPiece group[6];
546   for (size_t i = 0; i < arraysize(nop_empties); i++)
547     ASSERT_TRUE(RE2(nop_empties[i]).Match("", 0, 0, RE2::UNANCHORED, group, 6));
548 }
549 
550 // Test that named groups work correctly.
TEST(Capture,NamedGroups)551 TEST(Capture, NamedGroups) {
552   {
553     RE2 re("(hello world)");
554     ASSERT_EQ(re.NumberOfCapturingGroups(), 1);
555     const std::map<std::string, int>& m = re.NamedCapturingGroups();
556     ASSERT_EQ(m.size(), 0);
557   }
558 
559   {
560     RE2 re("(?P<A>expr(?P<B>expr)(?P<C>expr))((expr)(?P<D>expr))");
561     ASSERT_EQ(re.NumberOfCapturingGroups(), 6);
562     const std::map<std::string, int>& m = re.NamedCapturingGroups();
563     ASSERT_EQ(m.size(), 4);
564     ASSERT_EQ(m.find("A")->second, 1);
565     ASSERT_EQ(m.find("B")->second, 2);
566     ASSERT_EQ(m.find("C")->second, 3);
567     ASSERT_EQ(m.find("D")->second, 6);  // $4 and $5 are anonymous
568   }
569 }
570 
TEST(RE2,CapturedGroupTest)571 TEST(RE2, CapturedGroupTest) {
572   RE2 re("directions from (?P<S>.*) to (?P<D>.*)");
573   int num_groups = re.NumberOfCapturingGroups();
574   EXPECT_EQ(2, num_groups);
575   std::string args[4];
576   RE2::Arg arg0(&args[0]);
577   RE2::Arg arg1(&args[1]);
578   RE2::Arg arg2(&args[2]);
579   RE2::Arg arg3(&args[3]);
580 
581   const RE2::Arg* const matches[4] = {&arg0, &arg1, &arg2, &arg3};
582   EXPECT_TRUE(RE2::FullMatchN("directions from mountain view to san jose",
583                               re, matches, num_groups));
584   const std::map<std::string, int>& named_groups = re.NamedCapturingGroups();
585   EXPECT_TRUE(named_groups.find("S") != named_groups.end());
586   EXPECT_TRUE(named_groups.find("D") != named_groups.end());
587 
588   // The named group index is 1-based.
589   int source_group_index = named_groups.find("S")->second;
590   int destination_group_index = named_groups.find("D")->second;
591   EXPECT_EQ(1, source_group_index);
592   EXPECT_EQ(2, destination_group_index);
593 
594   // The args is zero-based.
595   EXPECT_EQ("mountain view", args[source_group_index - 1]);
596   EXPECT_EQ("san jose", args[destination_group_index - 1]);
597 }
598 
TEST(RE2,FullMatchWithNoArgs)599 TEST(RE2, FullMatchWithNoArgs) {
600   ASSERT_TRUE(RE2::FullMatch("h", "h"));
601   ASSERT_TRUE(RE2::FullMatch("hello", "hello"));
602   ASSERT_TRUE(RE2::FullMatch("hello", "h.*o"));
603   ASSERT_FALSE(RE2::FullMatch("othello", "h.*o"));  // Must be anchored at front
604   ASSERT_FALSE(RE2::FullMatch("hello!", "h.*o"));   // Must be anchored at end
605 }
606 
TEST(RE2,PartialMatch)607 TEST(RE2, PartialMatch) {
608   ASSERT_TRUE(RE2::PartialMatch("x", "x"));
609   ASSERT_TRUE(RE2::PartialMatch("hello", "h.*o"));
610   ASSERT_TRUE(RE2::PartialMatch("othello", "h.*o"));
611   ASSERT_TRUE(RE2::PartialMatch("hello!", "h.*o"));
612   ASSERT_TRUE(RE2::PartialMatch("x", "((((((((((((((((((((x))))))))))))))))))))"));
613 }
614 
TEST(RE2,PartialMatchN)615 TEST(RE2, PartialMatchN) {
616   RE2::Arg argv[2];
617   const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
618 
619   // 0 arg
620   EXPECT_TRUE(RE2::PartialMatchN("hello", "e.*o", args, 0));
621   EXPECT_FALSE(RE2::PartialMatchN("othello", "a.*o", args, 0));
622 
623   // 1 arg
624   int i;
625   argv[0] = &i;
626   EXPECT_TRUE(RE2::PartialMatchN("1001 nights", "(\\d+)", args, 1));
627   EXPECT_EQ(1001, i);
628   EXPECT_FALSE(RE2::PartialMatchN("three", "(\\d+)", args, 1));
629 
630   // Multi-arg
631   std::string s;
632   argv[1] = &s;
633   EXPECT_TRUE(RE2::PartialMatchN("answer: 42:life", "(\\d+):(\\w+)", args, 2));
634   EXPECT_EQ(42, i);
635   EXPECT_EQ("life", s);
636   EXPECT_FALSE(RE2::PartialMatchN("hi1", "(\\w+)(1)", args, 2));
637 }
638 
TEST(RE2,FullMatchZeroArg)639 TEST(RE2, FullMatchZeroArg) {
640   // Zero-arg
641   ASSERT_TRUE(RE2::FullMatch("1001", "\\d+"));
642 }
643 
TEST(RE2,FullMatchOneArg)644 TEST(RE2, FullMatchOneArg) {
645   int i;
646 
647   // Single-arg
648   ASSERT_TRUE(RE2::FullMatch("1001", "(\\d+)",   &i));
649   ASSERT_EQ(i, 1001);
650   ASSERT_TRUE(RE2::FullMatch("-123", "(-?\\d+)", &i));
651   ASSERT_EQ(i, -123);
652   ASSERT_FALSE(RE2::FullMatch("10", "()\\d+", &i));
653   ASSERT_FALSE(
654       RE2::FullMatch("1234567890123456789012345678901234567890", "(\\d+)", &i));
655 }
656 
TEST(RE2,FullMatchIntegerArg)657 TEST(RE2, FullMatchIntegerArg) {
658   int i;
659 
660   // Digits surrounding integer-arg
661   ASSERT_TRUE(RE2::FullMatch("1234", "1(\\d*)4", &i));
662   ASSERT_EQ(i, 23);
663   ASSERT_TRUE(RE2::FullMatch("1234", "(\\d)\\d+", &i));
664   ASSERT_EQ(i, 1);
665   ASSERT_TRUE(RE2::FullMatch("-1234", "(-\\d)\\d+", &i));
666   ASSERT_EQ(i, -1);
667   ASSERT_TRUE(RE2::PartialMatch("1234", "(\\d)", &i));
668   ASSERT_EQ(i, 1);
669   ASSERT_TRUE(RE2::PartialMatch("-1234", "(-\\d)", &i));
670   ASSERT_EQ(i, -1);
671 }
672 
TEST(RE2,FullMatchStringArg)673 TEST(RE2, FullMatchStringArg) {
674   std::string s;
675   // String-arg
676   ASSERT_TRUE(RE2::FullMatch("hello", "h(.*)o", &s));
677   ASSERT_EQ(s, std::string("ell"));
678 }
679 
TEST(RE2,FullMatchStringPieceArg)680 TEST(RE2, FullMatchStringPieceArg) {
681   int i;
682   // StringPiece-arg
683   StringPiece sp;
684   ASSERT_TRUE(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &sp, &i));
685   ASSERT_EQ(sp.size(), 4);
686   ASSERT_TRUE(memcmp(sp.data(), "ruby", 4) == 0);
687   ASSERT_EQ(i, 1234);
688 }
689 
TEST(RE2,FullMatchMultiArg)690 TEST(RE2, FullMatchMultiArg) {
691   int i;
692   std::string s;
693   // Multi-arg
694   ASSERT_TRUE(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i));
695   ASSERT_EQ(s, std::string("ruby"));
696   ASSERT_EQ(i, 1234);
697 }
698 
TEST(RE2,FullMatchN)699 TEST(RE2, FullMatchN) {
700   RE2::Arg argv[2];
701   const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
702 
703   // 0 arg
704   EXPECT_TRUE(RE2::FullMatchN("hello", "h.*o", args, 0));
705   EXPECT_FALSE(RE2::FullMatchN("othello", "h.*o", args, 0));
706 
707   // 1 arg
708   int i;
709   argv[0] = &i;
710   EXPECT_TRUE(RE2::FullMatchN("1001", "(\\d+)", args, 1));
711   EXPECT_EQ(1001, i);
712   EXPECT_FALSE(RE2::FullMatchN("three", "(\\d+)", args, 1));
713 
714   // Multi-arg
715   std::string s;
716   argv[1] = &s;
717   EXPECT_TRUE(RE2::FullMatchN("42:life", "(\\d+):(\\w+)", args, 2));
718   EXPECT_EQ(42, i);
719   EXPECT_EQ("life", s);
720   EXPECT_FALSE(RE2::FullMatchN("hi1", "(\\w+)(1)", args, 2));
721 }
722 
TEST(RE2,FullMatchIgnoredArg)723 TEST(RE2, FullMatchIgnoredArg) {
724   int i;
725   std::string s;
726 
727   // Old-school NULL should be ignored.
728   ASSERT_TRUE(
729       RE2::FullMatch("ruby:1234", "(\\w+)(:)(\\d+)", &s, (void*)NULL, &i));
730   ASSERT_EQ(s, std::string("ruby"));
731   ASSERT_EQ(i, 1234);
732 
733   // C++11 nullptr should also be ignored.
734   ASSERT_TRUE(RE2::FullMatch("rubz:1235", "(\\w+)(:)(\\d+)", &s, nullptr, &i));
735   ASSERT_EQ(s, std::string("rubz"));
736   ASSERT_EQ(i, 1235);
737 }
738 
TEST(RE2,FullMatchTypedNullArg)739 TEST(RE2, FullMatchTypedNullArg) {
740   std::string s;
741 
742   // Ignore non-void* NULL arg
743   ASSERT_TRUE(RE2::FullMatch("hello", "he(.*)lo", (char*)NULL));
744   ASSERT_TRUE(RE2::FullMatch("hello", "h(.*)o", (std::string*)NULL));
745   ASSERT_TRUE(RE2::FullMatch("hello", "h(.*)o", (StringPiece*)NULL));
746   ASSERT_TRUE(RE2::FullMatch("1234", "(.*)", (int*)NULL));
747   ASSERT_TRUE(RE2::FullMatch("1234567890123456", "(.*)", (long long*)NULL));
748   ASSERT_TRUE(RE2::FullMatch("123.4567890123456", "(.*)", (double*)NULL));
749   ASSERT_TRUE(RE2::FullMatch("123.4567890123456", "(.*)", (float*)NULL));
750 
751   // Fail on non-void* NULL arg if the match doesn't parse for the given type.
752   ASSERT_FALSE(RE2::FullMatch("hello", "h(.*)lo", &s, (char*)NULL));
753   ASSERT_FALSE(RE2::FullMatch("hello", "(.*)", (int*)NULL));
754   ASSERT_FALSE(RE2::FullMatch("1234567890123456", "(.*)", (int*)NULL));
755   ASSERT_FALSE(RE2::FullMatch("hello", "(.*)", (double*)NULL));
756   ASSERT_FALSE(RE2::FullMatch("hello", "(.*)", (float*)NULL));
757 }
758 
759 // Check that numeric parsing code does not read past the end of
760 // the number being parsed.
761 // This implementation requires mmap(2) et al. and thus cannot
762 // be used unless they are available.
TEST(RE2,NULTerminated)763 TEST(RE2, NULTerminated) {
764 #if defined(_POSIX_MAPPED_FILES) && _POSIX_MAPPED_FILES > 0
765   char *v;
766   int x;
767   long pagesize = sysconf(_SC_PAGE_SIZE);
768 
769 #ifndef MAP_ANONYMOUS
770 #define MAP_ANONYMOUS MAP_ANON
771 #endif
772   v = static_cast<char*>(mmap(NULL, 2*pagesize, PROT_READ|PROT_WRITE,
773                               MAP_ANONYMOUS|MAP_PRIVATE, -1, 0));
774   ASSERT_TRUE(v != reinterpret_cast<char*>(-1));
775   LOG(INFO) << "Memory at " << (void*)v;
776   ASSERT_EQ(munmap(v + pagesize, pagesize), 0) << " error " << errno;
777   v[pagesize - 1] = '1';
778 
779   x = 0;
780   ASSERT_TRUE(RE2::FullMatch(StringPiece(v + pagesize - 1, 1), "(.*)", &x));
781   ASSERT_EQ(x, 1);
782 #endif
783 }
784 
TEST(RE2,FullMatchTypeTests)785 TEST(RE2, FullMatchTypeTests) {
786   // Type tests
787   std::string zeros(1000, '0');
788   {
789     char c;
790     ASSERT_TRUE(RE2::FullMatch("Hello", "(H)ello", &c));
791     ASSERT_EQ(c, 'H');
792   }
793   {
794     unsigned char c;
795     ASSERT_TRUE(RE2::FullMatch("Hello", "(H)ello", &c));
796     ASSERT_EQ(c, static_cast<unsigned char>('H'));
797   }
798   {
799     int16_t v;
800     ASSERT_TRUE(RE2::FullMatch("100",     "(-?\\d+)", &v)); ASSERT_EQ(v, 100);
801     ASSERT_TRUE(RE2::FullMatch("-100",    "(-?\\d+)", &v)); ASSERT_EQ(v, -100);
802     ASSERT_TRUE(RE2::FullMatch("32767",   "(-?\\d+)", &v)); ASSERT_EQ(v, 32767);
803     ASSERT_TRUE(RE2::FullMatch("-32768",  "(-?\\d+)", &v)); ASSERT_EQ(v, -32768);
804     ASSERT_FALSE(RE2::FullMatch("-32769", "(-?\\d+)", &v));
805     ASSERT_FALSE(RE2::FullMatch("32768",  "(-?\\d+)", &v));
806   }
807   {
808     uint16_t v;
809     ASSERT_TRUE(RE2::FullMatch("100",    "(\\d+)", &v)); ASSERT_EQ(v, 100);
810     ASSERT_TRUE(RE2::FullMatch("32767",  "(\\d+)", &v)); ASSERT_EQ(v, 32767);
811     ASSERT_TRUE(RE2::FullMatch("65535",  "(\\d+)", &v)); ASSERT_EQ(v, 65535);
812     ASSERT_FALSE(RE2::FullMatch("65536", "(\\d+)", &v));
813   }
814   {
815     int32_t v;
816     static const int32_t max = INT32_C(0x7fffffff);
817     static const int32_t min = -max - 1;
818     ASSERT_TRUE(RE2::FullMatch("100",          "(-?\\d+)", &v)); ASSERT_EQ(v, 100);
819     ASSERT_TRUE(RE2::FullMatch("-100",         "(-?\\d+)", &v)); ASSERT_EQ(v, -100);
820     ASSERT_TRUE(RE2::FullMatch("2147483647",   "(-?\\d+)", &v)); ASSERT_EQ(v, max);
821     ASSERT_TRUE(RE2::FullMatch("-2147483648",  "(-?\\d+)", &v)); ASSERT_EQ(v, min);
822     ASSERT_FALSE(RE2::FullMatch("-2147483649", "(-?\\d+)", &v));
823     ASSERT_FALSE(RE2::FullMatch("2147483648",  "(-?\\d+)", &v));
824 
825     ASSERT_TRUE(RE2::FullMatch(zeros + "2147483647", "(-?\\d+)", &v));
826     ASSERT_EQ(v, max);
827     ASSERT_TRUE(RE2::FullMatch("-" + zeros + "2147483648", "(-?\\d+)", &v));
828     ASSERT_EQ(v, min);
829 
830     ASSERT_FALSE(RE2::FullMatch("-" + zeros + "2147483649", "(-?\\d+)", &v));
831     ASSERT_TRUE(RE2::FullMatch("0x7fffffff", "(.*)", RE2::CRadix(&v)));
832     ASSERT_EQ(v, max);
833     ASSERT_FALSE(RE2::FullMatch("000x7fffffff", "(.*)", RE2::CRadix(&v)));
834   }
835   {
836     uint32_t v;
837     static const uint32_t max = UINT32_C(0xffffffff);
838     ASSERT_TRUE(RE2::FullMatch("100",         "(\\d+)", &v)); ASSERT_EQ(v, 100);
839     ASSERT_TRUE(RE2::FullMatch("4294967295",  "(\\d+)", &v)); ASSERT_EQ(v, max);
840     ASSERT_FALSE(RE2::FullMatch("4294967296", "(\\d+)", &v));
841     ASSERT_FALSE(RE2::FullMatch("-1",         "(\\d+)", &v));
842 
843     ASSERT_TRUE(RE2::FullMatch(zeros + "4294967295", "(\\d+)", &v)); ASSERT_EQ(v, max);
844   }
845   {
846     int64_t v;
847     static const int64_t max = INT64_C(0x7fffffffffffffff);
848     static const int64_t min = -max - 1;
849     std::string str;
850 
851     ASSERT_TRUE(RE2::FullMatch("100",  "(-?\\d+)", &v)); ASSERT_EQ(v, 100);
852     ASSERT_TRUE(RE2::FullMatch("-100", "(-?\\d+)", &v)); ASSERT_EQ(v, -100);
853 
854     str = std::to_string(max);
855     ASSERT_TRUE(RE2::FullMatch(str,    "(-?\\d+)", &v)); ASSERT_EQ(v, max);
856 
857     str = std::to_string(min);
858     ASSERT_TRUE(RE2::FullMatch(str,    "(-?\\d+)", &v)); ASSERT_EQ(v, min);
859 
860     str = std::to_string(max);
861     ASSERT_NE(str.back(), '9');
862     str.back()++;
863     ASSERT_FALSE(RE2::FullMatch(str,   "(-?\\d+)", &v));
864 
865     str = std::to_string(min);
866     ASSERT_NE(str.back(), '9');
867     str.back()++;
868     ASSERT_FALSE(RE2::FullMatch(str,   "(-?\\d+)", &v));
869   }
870   {
871     uint64_t v;
872     int64_t v2;
873     static const uint64_t max = UINT64_C(0xffffffffffffffff);
874     std::string str;
875 
876     ASSERT_TRUE(RE2::FullMatch("100",  "(-?\\d+)", &v));  ASSERT_EQ(v, 100);
877     ASSERT_TRUE(RE2::FullMatch("-100", "(-?\\d+)", &v2)); ASSERT_EQ(v2, -100);
878 
879     str = std::to_string(max);
880     ASSERT_TRUE(RE2::FullMatch(str,    "(-?\\d+)", &v)); ASSERT_EQ(v, max);
881 
882     ASSERT_NE(str.back(), '9');
883     str.back()++;
884     ASSERT_FALSE(RE2::FullMatch(str,   "(-?\\d+)", &v));
885   }
886 }
887 
TEST(RE2,FloatingPointFullMatchTypes)888 TEST(RE2, FloatingPointFullMatchTypes) {
889   std::string zeros(1000, '0');
890   {
891     float v;
892     ASSERT_TRUE(RE2::FullMatch("100",   "(.*)", &v)); ASSERT_EQ(v, 100);
893     ASSERT_TRUE(RE2::FullMatch("-100.", "(.*)", &v)); ASSERT_EQ(v, -100);
894     ASSERT_TRUE(RE2::FullMatch("1e23",  "(.*)", &v)); ASSERT_EQ(v, float(1e23));
895     ASSERT_TRUE(RE2::FullMatch(" 100",  "(.*)", &v)); ASSERT_EQ(v, 100);
896 
897     ASSERT_TRUE(RE2::FullMatch(zeros + "1e23",  "(.*)", &v));
898     ASSERT_EQ(v, float(1e23));
899 
900     // 6700000000081920.1 is an edge case.
901     // 6700000000081920 is exactly halfway between
902     // two float32s, so the .1 should make it round up.
903     // However, the .1 is outside the precision possible with
904     // a float64: the nearest float64 is 6700000000081920.
905     // So if the code uses strtod and then converts to float32,
906     // round-to-even will make it round down instead of up.
907     // To pass the test, the parser must call strtof directly.
908     // This test case is carefully chosen to use only a 17-digit
909     // number, since C does not guarantee to get the correctly
910     // rounded answer for strtod and strtof unless the input is
911     // short.
912     //
913     // This is known to fail on Cygwin and MinGW due to a broken
914     // implementation of strtof(3). And apparently MSVC too. Sigh.
915 #if !defined(_MSC_VER) && !defined(__CYGWIN__) && !defined(__MINGW32__)
916     ASSERT_TRUE(RE2::FullMatch("0.1", "(.*)", &v));
917     ASSERT_EQ(v, 0.1f) << StringPrintf("%.8g != %.8g", v, 0.1f);
918     ASSERT_TRUE(RE2::FullMatch("6700000000081920.1", "(.*)", &v));
919     ASSERT_EQ(v, 6700000000081920.1f)
920       << StringPrintf("%.8g != %.8g", v, 6700000000081920.1f);
921 #endif
922   }
923   {
924     double v;
925     ASSERT_TRUE(RE2::FullMatch("100",   "(.*)", &v)); ASSERT_EQ(v, 100);
926     ASSERT_TRUE(RE2::FullMatch("-100.", "(.*)", &v)); ASSERT_EQ(v, -100);
927     ASSERT_TRUE(RE2::FullMatch("1e23",  "(.*)", &v)); ASSERT_EQ(v, 1e23);
928     ASSERT_TRUE(RE2::FullMatch(zeros + "1e23", "(.*)", &v));
929     ASSERT_EQ(v, double(1e23));
930 
931     ASSERT_TRUE(RE2::FullMatch("0.1", "(.*)", &v));
932     ASSERT_EQ(v, 0.1) << StringPrintf("%.17g != %.17g", v, 0.1);
933     ASSERT_TRUE(RE2::FullMatch("1.00000005960464485", "(.*)", &v));
934     ASSERT_EQ(v, 1.0000000596046448)
935       << StringPrintf("%.17g != %.17g", v, 1.0000000596046448);
936   }
937 }
938 
TEST(RE2,FullMatchAnchored)939 TEST(RE2, FullMatchAnchored) {
940   int i;
941   // Check that matching is fully anchored
942   ASSERT_FALSE(RE2::FullMatch("x1001", "(\\d+)",  &i));
943   ASSERT_FALSE(RE2::FullMatch("1001x", "(\\d+)",  &i));
944   ASSERT_TRUE(RE2::FullMatch("x1001",  "x(\\d+)", &i)); ASSERT_EQ(i, 1001);
945   ASSERT_TRUE(RE2::FullMatch("1001x",  "(\\d+)x", &i)); ASSERT_EQ(i, 1001);
946 }
947 
TEST(RE2,FullMatchBraces)948 TEST(RE2, FullMatchBraces) {
949   // Braces
950   ASSERT_TRUE(RE2::FullMatch("0abcd",  "[0-9a-f+.-]{5,}"));
951   ASSERT_TRUE(RE2::FullMatch("0abcde", "[0-9a-f+.-]{5,}"));
952   ASSERT_FALSE(RE2::FullMatch("0abc",  "[0-9a-f+.-]{5,}"));
953 }
954 
TEST(RE2,Complicated)955 TEST(RE2, Complicated) {
956   // Complicated RE2
957   ASSERT_TRUE(RE2::FullMatch("foo", "foo|bar|[A-Z]"));
958   ASSERT_TRUE(RE2::FullMatch("bar", "foo|bar|[A-Z]"));
959   ASSERT_TRUE(RE2::FullMatch("X",   "foo|bar|[A-Z]"));
960   ASSERT_FALSE(RE2::FullMatch("XY", "foo|bar|[A-Z]"));
961 }
962 
TEST(RE2,FullMatchEnd)963 TEST(RE2, FullMatchEnd) {
964   // Check full-match handling (needs '$' tacked on internally)
965   ASSERT_TRUE(RE2::FullMatch("fo", "fo|foo"));
966   ASSERT_TRUE(RE2::FullMatch("foo", "fo|foo"));
967   ASSERT_TRUE(RE2::FullMatch("fo", "fo|foo$"));
968   ASSERT_TRUE(RE2::FullMatch("foo", "fo|foo$"));
969   ASSERT_TRUE(RE2::FullMatch("foo", "foo$"));
970   ASSERT_FALSE(RE2::FullMatch("foo$bar", "foo\\$"));
971   ASSERT_FALSE(RE2::FullMatch("fox", "fo|bar"));
972 
973   // Uncomment the following if we change the handling of '$' to
974   // prevent it from matching a trailing newline
975   if (false) {
976     // Check that we don't get bitten by pcre's special handling of a
977     // '\n' at the end of the string matching '$'
978     ASSERT_FALSE(RE2::PartialMatch("foo\n", "foo$"));
979   }
980 }
981 
TEST(RE2,FullMatchArgCount)982 TEST(RE2, FullMatchArgCount) {
983   // Number of args
984   int a[16];
985   ASSERT_TRUE(RE2::FullMatch("", ""));
986 
987   memset(a, 0, sizeof(0));
988   ASSERT_TRUE(RE2::FullMatch("1", "(\\d){1}", &a[0]));
989   ASSERT_EQ(a[0], 1);
990 
991   memset(a, 0, sizeof(0));
992   ASSERT_TRUE(RE2::FullMatch("12", "(\\d)(\\d)", &a[0], &a[1]));
993   ASSERT_EQ(a[0], 1);
994   ASSERT_EQ(a[1], 2);
995 
996   memset(a, 0, sizeof(0));
997   ASSERT_TRUE(RE2::FullMatch("123", "(\\d)(\\d)(\\d)", &a[0], &a[1], &a[2]));
998   ASSERT_EQ(a[0], 1);
999   ASSERT_EQ(a[1], 2);
1000   ASSERT_EQ(a[2], 3);
1001 
1002   memset(a, 0, sizeof(0));
1003   ASSERT_TRUE(RE2::FullMatch("1234", "(\\d)(\\d)(\\d)(\\d)", &a[0], &a[1],
1004                              &a[2], &a[3]));
1005   ASSERT_EQ(a[0], 1);
1006   ASSERT_EQ(a[1], 2);
1007   ASSERT_EQ(a[2], 3);
1008   ASSERT_EQ(a[3], 4);
1009 
1010   memset(a, 0, sizeof(0));
1011   ASSERT_TRUE(RE2::FullMatch("12345", "(\\d)(\\d)(\\d)(\\d)(\\d)", &a[0], &a[1],
1012                              &a[2], &a[3], &a[4]));
1013   ASSERT_EQ(a[0], 1);
1014   ASSERT_EQ(a[1], 2);
1015   ASSERT_EQ(a[2], 3);
1016   ASSERT_EQ(a[3], 4);
1017   ASSERT_EQ(a[4], 5);
1018 
1019   memset(a, 0, sizeof(0));
1020   ASSERT_TRUE(RE2::FullMatch("123456", "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)", &a[0],
1021                              &a[1], &a[2], &a[3], &a[4], &a[5]));
1022   ASSERT_EQ(a[0], 1);
1023   ASSERT_EQ(a[1], 2);
1024   ASSERT_EQ(a[2], 3);
1025   ASSERT_EQ(a[3], 4);
1026   ASSERT_EQ(a[4], 5);
1027   ASSERT_EQ(a[5], 6);
1028 
1029   memset(a, 0, sizeof(0));
1030   ASSERT_TRUE(RE2::FullMatch("1234567", "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)",
1031                              &a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6]));
1032   ASSERT_EQ(a[0], 1);
1033   ASSERT_EQ(a[1], 2);
1034   ASSERT_EQ(a[2], 3);
1035   ASSERT_EQ(a[3], 4);
1036   ASSERT_EQ(a[4], 5);
1037   ASSERT_EQ(a[5], 6);
1038   ASSERT_EQ(a[6], 7);
1039 
1040   memset(a, 0, sizeof(0));
1041   ASSERT_TRUE(RE2::FullMatch("1234567890123456",
1042                              "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
1043                              "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)",
1044                              &a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6],
1045                              &a[7], &a[8], &a[9], &a[10], &a[11], &a[12],
1046                              &a[13], &a[14], &a[15]));
1047   ASSERT_EQ(a[0], 1);
1048   ASSERT_EQ(a[1], 2);
1049   ASSERT_EQ(a[2], 3);
1050   ASSERT_EQ(a[3], 4);
1051   ASSERT_EQ(a[4], 5);
1052   ASSERT_EQ(a[5], 6);
1053   ASSERT_EQ(a[6], 7);
1054   ASSERT_EQ(a[7], 8);
1055   ASSERT_EQ(a[8], 9);
1056   ASSERT_EQ(a[9], 0);
1057   ASSERT_EQ(a[10], 1);
1058   ASSERT_EQ(a[11], 2);
1059   ASSERT_EQ(a[12], 3);
1060   ASSERT_EQ(a[13], 4);
1061   ASSERT_EQ(a[14], 5);
1062   ASSERT_EQ(a[15], 6);
1063 }
1064 
TEST(RE2,Accessors)1065 TEST(RE2, Accessors) {
1066   // Check the pattern() accessor
1067   {
1068     const std::string kPattern = "http://([^/]+)/.*";
1069     const RE2 re(kPattern);
1070     ASSERT_EQ(kPattern, re.pattern());
1071   }
1072 
1073   // Check RE2 error field.
1074   {
1075     RE2 re("foo");
1076     ASSERT_TRUE(re.error().empty());  // Must have no error
1077     ASSERT_TRUE(re.ok());
1078     ASSERT_EQ(re.error_code(), RE2::NoError);
1079   }
1080 }
1081 
TEST(RE2,UTF8)1082 TEST(RE2, UTF8) {
1083   // Check UTF-8 handling
1084   // Three Japanese characters (nihongo)
1085   const char utf8_string[] = {
1086        (char)0xe6, (char)0x97, (char)0xa5, // 65e5
1087        (char)0xe6, (char)0x9c, (char)0xac, // 627c
1088        (char)0xe8, (char)0xaa, (char)0x9e, // 8a9e
1089        0
1090   };
1091   const char utf8_pattern[] = {
1092        '.',
1093        (char)0xe6, (char)0x9c, (char)0xac, // 627c
1094        '.',
1095        0
1096   };
1097 
1098   // Both should match in either mode, bytes or UTF-8
1099   RE2 re_test1(".........", RE2::Latin1);
1100   ASSERT_TRUE(RE2::FullMatch(utf8_string, re_test1));
1101   RE2 re_test2("...");
1102   ASSERT_TRUE(RE2::FullMatch(utf8_string, re_test2));
1103 
1104   // Check that '.' matches one byte or UTF-8 character
1105   // according to the mode.
1106   std::string s;
1107   RE2 re_test3("(.)", RE2::Latin1);
1108   ASSERT_TRUE(RE2::PartialMatch(utf8_string, re_test3, &s));
1109   ASSERT_EQ(s, std::string("\xe6"));
1110   RE2 re_test4("(.)");
1111   ASSERT_TRUE(RE2::PartialMatch(utf8_string, re_test4, &s));
1112   ASSERT_EQ(s, std::string("\xe6\x97\xa5"));
1113 
1114   // Check that string matches itself in either mode
1115   RE2 re_test5(utf8_string, RE2::Latin1);
1116   ASSERT_TRUE(RE2::FullMatch(utf8_string, re_test5));
1117   RE2 re_test6(utf8_string);
1118   ASSERT_TRUE(RE2::FullMatch(utf8_string, re_test6));
1119 
1120   // Check that pattern matches string only in UTF8 mode
1121   RE2 re_test7(utf8_pattern, RE2::Latin1);
1122   ASSERT_FALSE(RE2::FullMatch(utf8_string, re_test7));
1123   RE2 re_test8(utf8_pattern);
1124   ASSERT_TRUE(RE2::FullMatch(utf8_string, re_test8));
1125 }
1126 
TEST(RE2,UngreedyUTF8)1127 TEST(RE2, UngreedyUTF8) {
1128   // Check that ungreedy, UTF8 regular expressions don't match when they
1129   // oughtn't -- see bug 82246.
1130   {
1131     // This code always worked.
1132     const char* pattern = "\\w+X";
1133     const std::string target = "a aX";
1134     RE2 match_sentence(pattern, RE2::Latin1);
1135     RE2 match_sentence_re(pattern);
1136 
1137     ASSERT_FALSE(RE2::FullMatch(target, match_sentence));
1138     ASSERT_FALSE(RE2::FullMatch(target, match_sentence_re));
1139   }
1140   {
1141     const char* pattern = "(?U)\\w+X";
1142     const std::string target = "a aX";
1143     RE2 match_sentence(pattern, RE2::Latin1);
1144     ASSERT_EQ(match_sentence.error(), "");
1145     RE2 match_sentence_re(pattern);
1146 
1147     ASSERT_FALSE(RE2::FullMatch(target, match_sentence));
1148     ASSERT_FALSE(RE2::FullMatch(target, match_sentence_re));
1149   }
1150 }
1151 
TEST(RE2,Rejects)1152 TEST(RE2, Rejects) {
1153   {
1154     RE2 re("a\\1", RE2::Quiet);
1155     ASSERT_FALSE(re.ok()); }
1156   {
1157     RE2 re("a[x", RE2::Quiet);
1158     ASSERT_FALSE(re.ok());
1159   }
1160   {
1161     RE2 re("a[z-a]", RE2::Quiet);
1162     ASSERT_FALSE(re.ok());
1163   }
1164   {
1165     RE2 re("a[[:foobar:]]", RE2::Quiet);
1166     ASSERT_FALSE(re.ok());
1167   }
1168   {
1169     RE2 re("a(b", RE2::Quiet);
1170     ASSERT_FALSE(re.ok());
1171   }
1172   {
1173     RE2 re("a\\", RE2::Quiet);
1174     ASSERT_FALSE(re.ok());
1175   }
1176 }
1177 
TEST(RE2,NoCrash)1178 TEST(RE2, NoCrash) {
1179   // Test that using a bad regexp doesn't crash.
1180   {
1181     RE2 re("a\\", RE2::Quiet);
1182     ASSERT_FALSE(re.ok());
1183     ASSERT_FALSE(RE2::PartialMatch("a\\b", re));
1184   }
1185 
1186   // Test that using an enormous regexp doesn't crash
1187   {
1188     RE2 re("(((.{100}){100}){100}){100}", RE2::Quiet);
1189     ASSERT_FALSE(re.ok());
1190     ASSERT_FALSE(RE2::PartialMatch("aaa", re));
1191   }
1192 
1193   // Test that a crazy regexp still compiles and runs.
1194   {
1195     RE2 re(".{512}x", RE2::Quiet);
1196     ASSERT_TRUE(re.ok());
1197     std::string s;
1198     s.append(515, 'c');
1199     s.append("x");
1200     ASSERT_TRUE(RE2::PartialMatch(s, re));
1201   }
1202 }
1203 
TEST(RE2,Recursion)1204 TEST(RE2, Recursion) {
1205   // Test that recursion is stopped.
1206   // This test is PCRE-legacy -- there's no recursion in RE2.
1207   int bytes = 15 * 1024;  // enough to crash PCRE
1208   TestRecursion(bytes, ".");
1209   TestRecursion(bytes, "a");
1210   TestRecursion(bytes, "a.");
1211   TestRecursion(bytes, "ab.");
1212   TestRecursion(bytes, "abc.");
1213 }
1214 
TEST(RE2,BigCountedRepetition)1215 TEST(RE2, BigCountedRepetition) {
1216   // Test that counted repetition works, given tons of memory.
1217   RE2::Options opt;
1218   opt.set_max_mem(256<<20);
1219 
1220   RE2 re(".{512}x", opt);
1221   ASSERT_TRUE(re.ok());
1222   std::string s;
1223   s.append(515, 'c');
1224   s.append("x");
1225   ASSERT_TRUE(RE2::PartialMatch(s, re));
1226 }
1227 
TEST(RE2,DeepRecursion)1228 TEST(RE2, DeepRecursion) {
1229   // Test for deep stack recursion.  This would fail with a
1230   // segmentation violation due to stack overflow before pcre was
1231   // patched.
1232   // Again, a PCRE legacy test.  RE2 doesn't recurse.
1233   std::string comment("x*");
1234   std::string a(131072, 'a');
1235   comment += a;
1236   comment += "*x";
1237   RE2 re("((?:\\s|xx.*\n|x[*](?:\n|.)*?[*]x)*)");
1238   ASSERT_TRUE(RE2::FullMatch(comment, re));
1239 }
1240 
1241 // Suggested by Josh Hyman.  Failed when SearchOnePass was
1242 // not implementing case-folding.
TEST(CaseInsensitive,MatchAndConsume)1243 TEST(CaseInsensitive, MatchAndConsume) {
1244   std::string text = "A fish named *Wanda*";
1245   StringPiece sp(text);
1246   StringPiece result;
1247   EXPECT_TRUE(RE2::PartialMatch(text, "(?i)([wand]{5})", &result));
1248   EXPECT_TRUE(RE2::FindAndConsume(&sp, "(?i)([wand]{5})", &result));
1249 }
1250 
1251 // RE2 should permit implicit conversions from string, StringPiece, const char*,
1252 // and C string literals.
TEST(RE2,ImplicitConversions)1253 TEST(RE2, ImplicitConversions) {
1254   std::string re_string(".");
1255   StringPiece re_stringpiece(".");
1256   const char* re_cstring = ".";
1257   EXPECT_TRUE(RE2::PartialMatch("e", re_string));
1258   EXPECT_TRUE(RE2::PartialMatch("e", re_stringpiece));
1259   EXPECT_TRUE(RE2::PartialMatch("e", re_cstring));
1260   EXPECT_TRUE(RE2::PartialMatch("e", "."));
1261 }
1262 
1263 // Bugs introduced by 8622304
TEST(RE2,CL8622304)1264 TEST(RE2, CL8622304) {
1265   // reported by ingow
1266   std::string dir;
1267   EXPECT_TRUE(RE2::FullMatch("D", "([^\\\\])"));  // ok
1268   EXPECT_TRUE(RE2::FullMatch("D", "([^\\\\])", &dir));  // fails
1269 
1270   // reported by jacobsa
1271   std::string key, val;
1272   EXPECT_TRUE(RE2::PartialMatch("bar:1,0x2F,030,4,5;baz:true;fooby:false,true",
1273               "(\\w+)(?::((?:[^;\\\\]|\\\\.)*))?;?",
1274               &key,
1275               &val));
1276   EXPECT_EQ(key, "bar");
1277   EXPECT_EQ(val, "1,0x2F,030,4,5");
1278 }
1279 
1280 
1281 // Check that RE2 returns correct regexp pieces on error.
1282 // In particular, make sure it returns whole runes
1283 // and that it always reports invalid UTF-8.
1284 // Also check that Perl error flag piece is big enough.
1285 static struct ErrorTest {
1286   const char *regexp;
1287   const char *error;
1288 } error_tests[] = {
1289   { "ab\\αcd", "\\α" },
1290   { "ef\\x☺01", "\\x☺0" },
1291   { "gh\\x1☺01", "\\x1☺" },
1292   { "ij\\x1", "\\x1" },
1293   { "kl\\x", "\\x" },
1294   { "uv\\x{0000☺}", "\\x{0000☺" },
1295   { "wx\\p{ABC", "\\p{ABC" },
1296   { "yz(?smiUX:abc)", "(?smiUX" },   // used to return (?s but the error is X
1297   { "aa(?sm☺i", "(?sm☺" },
1298   { "bb[abc", "[abc" },
1299 
1300   { "mn\\x1\377", "" },  // no argument string returned for invalid UTF-8
1301   { "op\377qr", "" },
1302   { "st\\x{00000\377", "" },
1303   { "zz\\p{\377}", "" },
1304   { "zz\\x{00\377}", "" },
1305   { "zz(?P<name\377>abc)", "" },
1306 };
TEST(RE2,ErrorArgs)1307 TEST(RE2, ErrorArgs) {
1308   for (size_t i = 0; i < arraysize(error_tests); i++) {
1309     RE2 re(error_tests[i].regexp, RE2::Quiet);
1310     EXPECT_FALSE(re.ok());
1311     EXPECT_EQ(re.error_arg(), error_tests[i].error) << re.error();
1312   }
1313 }
1314 
1315 // Check that "never match \n" mode never matches \n.
1316 static struct NeverTest {
1317   const char* regexp;
1318   const char* text;
1319   const char* match;
1320 } never_tests[] = {
1321   { "(.*)", "abc\ndef\nghi\n", "abc" },
1322   { "(?s)(abc.*def)", "abc\ndef\n", NULL },
1323   { "(abc(.|\n)*def)", "abc\ndef\n", NULL },
1324   { "(abc[^x]*def)", "abc\ndef\n", NULL },
1325   { "(abc[^x]*def)", "abczzzdef\ndef\n", "abczzzdef" },
1326 };
TEST(RE2,NeverNewline)1327 TEST(RE2, NeverNewline) {
1328   RE2::Options opt;
1329   opt.set_never_nl(true);
1330   for (size_t i = 0; i < arraysize(never_tests); i++) {
1331     const NeverTest& t = never_tests[i];
1332     RE2 re(t.regexp, opt);
1333     if (t.match == NULL) {
1334       EXPECT_FALSE(re.PartialMatch(t.text, re));
1335     } else {
1336       StringPiece m;
1337       EXPECT_TRUE(re.PartialMatch(t.text, re, &m));
1338       EXPECT_EQ(m, t.match);
1339     }
1340   }
1341 }
1342 
1343 // Check that dot_nl option works.
TEST(RE2,DotNL)1344 TEST(RE2, DotNL) {
1345   RE2::Options opt;
1346   opt.set_dot_nl(true);
1347   EXPECT_TRUE(RE2::PartialMatch("\n", RE2(".", opt)));
1348   EXPECT_FALSE(RE2::PartialMatch("\n", RE2("(?-s).", opt)));
1349   opt.set_never_nl(true);
1350   EXPECT_FALSE(RE2::PartialMatch("\n", RE2(".", opt)));
1351 }
1352 
1353 // Check that there are no capturing groups in "never capture" mode.
TEST(RE2,NeverCapture)1354 TEST(RE2, NeverCapture) {
1355   RE2::Options opt;
1356   opt.set_never_capture(true);
1357   RE2 re("(r)(e)", opt);
1358   EXPECT_EQ(0, re.NumberOfCapturingGroups());
1359 }
1360 
1361 // Bitstate bug was looking at submatch[0] even if nsubmatch == 0.
1362 // Triggered by a failed DFA search falling back to Bitstate when
1363 // using Match with a NULL submatch set.  Bitstate tried to read
1364 // the submatch[0] entry even if nsubmatch was 0.
TEST(RE2,BitstateCaptureBug)1365 TEST(RE2, BitstateCaptureBug) {
1366   RE2::Options opt;
1367   opt.set_max_mem(20000);
1368   RE2 re("(_________$)", opt);
1369   StringPiece s = "xxxxxxxxxxxxxxxxxxxxxxxxxx_________x";
1370   EXPECT_FALSE(re.Match(s, 0, s.size(), RE2::UNANCHORED, NULL, 0));
1371 }
1372 
1373 // C++ version of bug 609710.
TEST(RE2,UnicodeClasses)1374 TEST(RE2, UnicodeClasses) {
1375   const std::string str = "ABCDEFGHI譚永鋒";
1376   std::string a, b, c;
1377 
1378   EXPECT_TRUE(RE2::FullMatch("A", "\\p{L}"));
1379   EXPECT_TRUE(RE2::FullMatch("A", "\\p{Lu}"));
1380   EXPECT_FALSE(RE2::FullMatch("A", "\\p{Ll}"));
1381   EXPECT_FALSE(RE2::FullMatch("A", "\\P{L}"));
1382   EXPECT_FALSE(RE2::FullMatch("A", "\\P{Lu}"));
1383   EXPECT_TRUE(RE2::FullMatch("A", "\\P{Ll}"));
1384 
1385   EXPECT_TRUE(RE2::FullMatch("譚", "\\p{L}"));
1386   EXPECT_FALSE(RE2::FullMatch("譚", "\\p{Lu}"));
1387   EXPECT_FALSE(RE2::FullMatch("譚", "\\p{Ll}"));
1388   EXPECT_FALSE(RE2::FullMatch("譚", "\\P{L}"));
1389   EXPECT_TRUE(RE2::FullMatch("譚", "\\P{Lu}"));
1390   EXPECT_TRUE(RE2::FullMatch("譚", "\\P{Ll}"));
1391 
1392   EXPECT_TRUE(RE2::FullMatch("永", "\\p{L}"));
1393   EXPECT_FALSE(RE2::FullMatch("永", "\\p{Lu}"));
1394   EXPECT_FALSE(RE2::FullMatch("永", "\\p{Ll}"));
1395   EXPECT_FALSE(RE2::FullMatch("永", "\\P{L}"));
1396   EXPECT_TRUE(RE2::FullMatch("永", "\\P{Lu}"));
1397   EXPECT_TRUE(RE2::FullMatch("永", "\\P{Ll}"));
1398 
1399   EXPECT_TRUE(RE2::FullMatch("鋒", "\\p{L}"));
1400   EXPECT_FALSE(RE2::FullMatch("鋒", "\\p{Lu}"));
1401   EXPECT_FALSE(RE2::FullMatch("鋒", "\\p{Ll}"));
1402   EXPECT_FALSE(RE2::FullMatch("鋒", "\\P{L}"));
1403   EXPECT_TRUE(RE2::FullMatch("鋒", "\\P{Lu}"));
1404   EXPECT_TRUE(RE2::FullMatch("鋒", "\\P{Ll}"));
1405 
1406   EXPECT_TRUE(RE2::PartialMatch(str, "(.).*?(.).*?(.)", &a, &b, &c));
1407   EXPECT_EQ("A", a);
1408   EXPECT_EQ("B", b);
1409   EXPECT_EQ("C", c);
1410 
1411   EXPECT_TRUE(RE2::PartialMatch(str, "(.).*?([\\p{L}]).*?(.)", &a, &b, &c));
1412   EXPECT_EQ("A", a);
1413   EXPECT_EQ("B", b);
1414   EXPECT_EQ("C", c);
1415 
1416   EXPECT_FALSE(RE2::PartialMatch(str, "\\P{L}"));
1417 
1418   EXPECT_TRUE(RE2::PartialMatch(str, "(.).*?([\\p{Lu}]).*?(.)", &a, &b, &c));
1419   EXPECT_EQ("A", a);
1420   EXPECT_EQ("B", b);
1421   EXPECT_EQ("C", c);
1422 
1423   EXPECT_FALSE(RE2::PartialMatch(str, "[^\\p{Lu}\\p{Lo}]"));
1424 
1425   EXPECT_TRUE(RE2::PartialMatch(str, ".*(.).*?([\\p{Lu}\\p{Lo}]).*?(.)", &a, &b, &c));
1426   EXPECT_EQ("譚", a);
1427   EXPECT_EQ("永", b);
1428   EXPECT_EQ("鋒", c);
1429 }
1430 
TEST(RE2,LazyRE2)1431 TEST(RE2, LazyRE2) {
1432   // Test with and without options.
1433   static LazyRE2 a = {"a"};
1434   static LazyRE2 b = {"b", RE2::Latin1};
1435 
1436   EXPECT_EQ("a", a->pattern());
1437   EXPECT_EQ(RE2::Options::EncodingUTF8, a->options().encoding());
1438 
1439   EXPECT_EQ("b", b->pattern());
1440   EXPECT_EQ(RE2::Options::EncodingLatin1, b->options().encoding());
1441 }
1442 
1443 // Bug reported by saito. 2009/02/17
TEST(RE2,NullVsEmptyString)1444 TEST(RE2, NullVsEmptyString) {
1445   RE2 re(".*");
1446   EXPECT_TRUE(re.ok());
1447 
1448   StringPiece null;
1449   EXPECT_TRUE(RE2::FullMatch(null, re));
1450 
1451   StringPiece empty("");
1452   EXPECT_TRUE(RE2::FullMatch(empty, re));
1453 }
1454 
1455 // Similar to the previous test, check that the null string and the empty
1456 // string both match, but also that the null string can only provide null
1457 // submatches whereas the empty string can also provide empty submatches.
TEST(RE2,NullVsEmptyStringSubmatches)1458 TEST(RE2, NullVsEmptyStringSubmatches) {
1459   RE2 re("()|(foo)");
1460   EXPECT_TRUE(re.ok());
1461 
1462   // matches[0] is overall match, [1] is (), [2] is (foo), [3] is nonexistent.
1463   StringPiece matches[4];
1464 
1465   for (size_t i = 0; i < arraysize(matches); i++)
1466     matches[i] = "bar";
1467 
1468   StringPiece null;
1469   EXPECT_TRUE(re.Match(null, 0, null.size(), RE2::UNANCHORED,
1470                        matches, arraysize(matches)));
1471   for (size_t i = 0; i < arraysize(matches); i++) {
1472     EXPECT_TRUE(matches[i].data() == NULL);  // always null
1473     EXPECT_TRUE(matches[i].empty());
1474   }
1475 
1476   for (size_t i = 0; i < arraysize(matches); i++)
1477     matches[i] = "bar";
1478 
1479   StringPiece empty("");
1480   EXPECT_TRUE(re.Match(empty, 0, empty.size(), RE2::UNANCHORED,
1481                        matches, arraysize(matches)));
1482   EXPECT_TRUE(matches[0].data() != NULL);  // empty, not null
1483   EXPECT_TRUE(matches[0].empty());
1484   EXPECT_TRUE(matches[1].data() != NULL);  // empty, not null
1485   EXPECT_TRUE(matches[1].empty());
1486   EXPECT_TRUE(matches[2].data() == NULL);
1487   EXPECT_TRUE(matches[2].empty());
1488   EXPECT_TRUE(matches[3].data() == NULL);
1489   EXPECT_TRUE(matches[3].empty());
1490 }
1491 
1492 // Issue 1816809
TEST(RE2,Bug1816809)1493 TEST(RE2, Bug1816809) {
1494   RE2 re("(((((llx((-3)|(4)))(;(llx((-3)|(4))))*))))");
1495   StringPiece piece("llx-3;llx4");
1496   std::string x;
1497   EXPECT_TRUE(RE2::Consume(&piece, re, &x));
1498 }
1499 
1500 // Issue 3061120
TEST(RE2,Bug3061120)1501 TEST(RE2, Bug3061120) {
1502   RE2 re("(?i)\\W");
1503   EXPECT_FALSE(RE2::PartialMatch("x", re));  // always worked
1504   EXPECT_FALSE(RE2::PartialMatch("k", re));  // broke because of kelvin
1505   EXPECT_FALSE(RE2::PartialMatch("s", re));  // broke because of latin long s
1506 }
1507 
TEST(RE2,CapturingGroupNames)1508 TEST(RE2, CapturingGroupNames) {
1509   // Opening parentheses annotated with group IDs:
1510   //      12    3        45   6         7
1511   RE2 re("((abc)(?P<G2>)|((e+)(?P<G2>.*)(?P<G1>u+)))");
1512   EXPECT_TRUE(re.ok());
1513   const std::map<int, std::string>& have = re.CapturingGroupNames();
1514   std::map<int, std::string> want;
1515   want[3] = "G2";
1516   want[6] = "G2";
1517   want[7] = "G1";
1518   EXPECT_EQ(want, have);
1519 }
1520 
TEST(RE2,RegexpToStringLossOfAnchor)1521 TEST(RE2, RegexpToStringLossOfAnchor) {
1522   EXPECT_EQ(RE2("^[a-c]at", RE2::POSIX).Regexp()->ToString(), "^[a-c]at");
1523   EXPECT_EQ(RE2("^[a-c]at").Regexp()->ToString(), "(?-m:^)[a-c]at");
1524   EXPECT_EQ(RE2("ca[t-z]$", RE2::POSIX).Regexp()->ToString(), "ca[t-z]$");
1525   EXPECT_EQ(RE2("ca[t-z]$").Regexp()->ToString(), "ca[t-z](?-m:$)");
1526 }
1527 
1528 // Issue 10131674
TEST(RE2,Bug10131674)1529 TEST(RE2, Bug10131674) {
1530   // Some of these escapes describe values that do not fit in a byte.
1531   RE2 re("\\140\\440\\174\\271\\150\\656\\106\\201\\004\\332", RE2::Latin1);
1532   EXPECT_FALSE(re.ok());
1533   EXPECT_FALSE(RE2::FullMatch("hello world", re));
1534 }
1535 
TEST(RE2,Bug18391750)1536 TEST(RE2, Bug18391750) {
1537   // Stray write past end of match_ in nfa.cc, caught by fuzzing + address sanitizer.
1538   const char t[] = {
1539       (char)0x28, (char)0x28, (char)0xfc, (char)0xfc, (char)0x08, (char)0x08,
1540       (char)0x26, (char)0x26, (char)0x28, (char)0xc2, (char)0x9b, (char)0xc5,
1541       (char)0xc5, (char)0xd4, (char)0x8f, (char)0x8f, (char)0x69, (char)0x69,
1542       (char)0xe7, (char)0x29, (char)0x7b, (char)0x37, (char)0x31, (char)0x31,
1543       (char)0x7d, (char)0xae, (char)0x7c, (char)0x7c, (char)0xf3, (char)0x29,
1544       (char)0xae, (char)0xae, (char)0x2e, (char)0x2a, (char)0x29, (char)0x00,
1545   };
1546   RE2::Options opt;
1547   opt.set_encoding(RE2::Options::EncodingLatin1);
1548   opt.set_longest_match(true);
1549   opt.set_dot_nl(true);
1550   opt.set_case_sensitive(false);
1551   RE2 re(t, opt);
1552   ASSERT_TRUE(re.ok());
1553   RE2::PartialMatch(t, re);
1554 }
1555 
TEST(RE2,Bug18458852)1556 TEST(RE2, Bug18458852) {
1557   // Bug in parser accepting invalid (too large) rune,
1558   // causing compiler to fail in DCHECK in UTF-8
1559   // character class code.
1560   const char b[] = {
1561       (char)0x28, (char)0x05, (char)0x05, (char)0x41, (char)0x41, (char)0x28,
1562       (char)0x24, (char)0x5b, (char)0x5e, (char)0xf5, (char)0x87, (char)0x87,
1563       (char)0x90, (char)0x29, (char)0x5d, (char)0x29, (char)0x29, (char)0x00,
1564   };
1565   RE2 re(b);
1566   ASSERT_FALSE(re.ok());
1567 }
1568 
TEST(RE2,Bug18523943)1569 TEST(RE2, Bug18523943) {
1570   // Bug in BitState: case kFailInst failed the match entirely.
1571 
1572   RE2::Options opt;
1573   const char a[] = {
1574       (char)0x29, (char)0x29, (char)0x24, (char)0x00,
1575   };
1576   const char b[] = {
1577       (char)0x28, (char)0x0a, (char)0x2a, (char)0x2a, (char)0x29, (char)0x00,
1578   };
1579   opt.set_log_errors(false);
1580   opt.set_encoding(RE2::Options::EncodingLatin1);
1581   opt.set_posix_syntax(true);
1582   opt.set_longest_match(true);
1583   opt.set_literal(false);
1584   opt.set_never_nl(true);
1585 
1586   RE2 re((const char*)b, opt);
1587   ASSERT_TRUE(re.ok());
1588   std::string s1;
1589   ASSERT_TRUE(RE2::PartialMatch((const char*)a, re, &s1));
1590 }
1591 
TEST(RE2,Bug21371806)1592 TEST(RE2, Bug21371806) {
1593   // Bug in parser accepting Unicode groups in Latin-1 mode,
1594   // causing compiler to fail in DCHECK in prog.cc.
1595 
1596   RE2::Options opt;
1597   opt.set_encoding(RE2::Options::EncodingLatin1);
1598 
1599   RE2 re("g\\p{Zl}]", opt);
1600   ASSERT_TRUE(re.ok());
1601 }
1602 
TEST(RE2,Bug26356109)1603 TEST(RE2, Bug26356109) {
1604   // Bug in parser caused by factoring of common prefixes in alternations.
1605 
1606   // In the past, this was factored to "a\\C*?[bc]". Thus, the automaton would
1607   // consume "ab" and then stop (when unanchored) whereas it should consume all
1608   // of "abc" as per first-match semantics.
1609   RE2 re("a\\C*?c|a\\C*?b");
1610   ASSERT_TRUE(re.ok());
1611 
1612   std::string s = "abc";
1613   StringPiece m;
1614 
1615   ASSERT_TRUE(re.Match(s, 0, s.size(), RE2::UNANCHORED, &m, 1));
1616   ASSERT_EQ(m, s) << " (UNANCHORED) got m='" << m << "', want '" << s << "'";
1617 
1618   ASSERT_TRUE(re.Match(s, 0, s.size(), RE2::ANCHOR_BOTH, &m, 1));
1619   ASSERT_EQ(m, s) << " (ANCHOR_BOTH) got m='" << m << "', want '" << s << "'";
1620 }
1621 
TEST(RE2,Issue104)1622 TEST(RE2, Issue104) {
1623   // RE2::GlobalReplace always advanced by one byte when the empty string was
1624   // matched, which would clobber any rune that is longer than one byte.
1625 
1626   std::string s = "bc";
1627   ASSERT_EQ(3, RE2::GlobalReplace(&s, "a*", "d"));
1628   ASSERT_EQ("dbdcd", s);
1629 
1630   s = "ąć";
1631   ASSERT_EQ(3, RE2::GlobalReplace(&s, "Ć*", "Ĉ"));
1632   ASSERT_EQ("ĈąĈćĈ", s);
1633 
1634   s = "人类";
1635   ASSERT_EQ(3, RE2::GlobalReplace(&s, "大*", "小"));
1636   ASSERT_EQ("小人小类小", s);
1637 }
1638 
1639 }  // namespace re2
1640