1 /*
2 *
3 * Copyright (c) 2002
4 * John Maddock
5 *
6 * Use, modification and distribution are subject to the
7 * Boost Software License, Version 1.0. (See accompanying file
8 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
9 *
10 */
11
12 /*
13 * LOCATION: see http://www.boost.org for most recent version.
14 * FILE perl_matcher_common.cpp
15 * VERSION see <boost/version.hpp>
16 * DESCRIPTION: Definitions of perl_matcher member functions that are
17 * specific to the recursive implementation.
18 */
19
20 #ifndef BOOST_REGEX_V4_PERL_MATCHER_RECURSIVE_HPP
21 #define BOOST_REGEX_V4_PERL_MATCHER_RECURSIVE_HPP
22
23 #ifdef BOOST_MSVC
24 #pragma warning(push)
25 #pragma warning(disable: 4103)
26 #endif
27 #ifdef BOOST_HAS_ABI_HEADERS
28 # include BOOST_ABI_PREFIX
29 #endif
30 #ifdef BOOST_MSVC
31 #pragma warning(pop)
32 #endif
33
34 #ifdef BOOST_MSVC
35 #pragma warning(push)
36 #pragma warning(disable: 4800)
37 #endif
38
39 namespace boost{
40 namespace BOOST_REGEX_DETAIL_NS{
41
42 template <class BidiIterator>
43 class backup_subex
44 {
45 int index;
46 sub_match<BidiIterator> sub;
47 public:
48 template <class A>
backup_subex(const match_results<BidiIterator,A> & w,int i)49 backup_subex(const match_results<BidiIterator, A>& w, int i)
50 : index(i), sub(w[i], false) {}
51 template <class A>
restore(match_results<BidiIterator,A> & w)52 void restore(match_results<BidiIterator, A>& w)
53 {
54 w.set_first(sub.first, index, index == 0);
55 w.set_second(sub.second, index, sub.matched, index == 0);
56 }
get()57 const sub_match<BidiIterator>& get() { return sub; }
58 };
59
60 template <class BidiIterator, class Allocator, class traits>
match_all_states()61 bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states()
62 {
63 static matcher_proc_type const s_match_vtable[34] =
64 {
65 (&perl_matcher<BidiIterator, Allocator, traits>::match_startmark),
66 &perl_matcher<BidiIterator, Allocator, traits>::match_endmark,
67 &perl_matcher<BidiIterator, Allocator, traits>::match_literal,
68 &perl_matcher<BidiIterator, Allocator, traits>::match_start_line,
69 &perl_matcher<BidiIterator, Allocator, traits>::match_end_line,
70 &perl_matcher<BidiIterator, Allocator, traits>::match_wild,
71 &perl_matcher<BidiIterator, Allocator, traits>::match_match,
72 &perl_matcher<BidiIterator, Allocator, traits>::match_word_boundary,
73 &perl_matcher<BidiIterator, Allocator, traits>::match_within_word,
74 &perl_matcher<BidiIterator, Allocator, traits>::match_word_start,
75 &perl_matcher<BidiIterator, Allocator, traits>::match_word_end,
76 &perl_matcher<BidiIterator, Allocator, traits>::match_buffer_start,
77 &perl_matcher<BidiIterator, Allocator, traits>::match_buffer_end,
78 &perl_matcher<BidiIterator, Allocator, traits>::match_backref,
79 &perl_matcher<BidiIterator, Allocator, traits>::match_long_set,
80 &perl_matcher<BidiIterator, Allocator, traits>::match_set,
81 &perl_matcher<BidiIterator, Allocator, traits>::match_jump,
82 &perl_matcher<BidiIterator, Allocator, traits>::match_alt,
83 &perl_matcher<BidiIterator, Allocator, traits>::match_rep,
84 &perl_matcher<BidiIterator, Allocator, traits>::match_combining,
85 &perl_matcher<BidiIterator, Allocator, traits>::match_soft_buffer_end,
86 &perl_matcher<BidiIterator, Allocator, traits>::match_restart_continue,
87 // Although this next line *should* be evaluated at compile time, in practice
88 // some compilers (VC++) emit run-time initialisation which breaks thread
89 // safety, so use a dispatch function instead:
90 //(::boost::is_random_access_iterator<BidiIterator>::value ? &perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_fast : &perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_slow),
91 &perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_dispatch,
92 &perl_matcher<BidiIterator, Allocator, traits>::match_char_repeat,
93 &perl_matcher<BidiIterator, Allocator, traits>::match_set_repeat,
94 &perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat,
95 &perl_matcher<BidiIterator, Allocator, traits>::match_backstep,
96 &perl_matcher<BidiIterator, Allocator, traits>::match_assert_backref,
97 &perl_matcher<BidiIterator, Allocator, traits>::match_toggle_case,
98 &perl_matcher<BidiIterator, Allocator, traits>::match_recursion,
99 &perl_matcher<BidiIterator, Allocator, traits>::match_fail,
100 &perl_matcher<BidiIterator, Allocator, traits>::match_accept,
101 &perl_matcher<BidiIterator, Allocator, traits>::match_commit,
102 &perl_matcher<BidiIterator, Allocator, traits>::match_then,
103 };
104
105 if(state_count > max_state_count)
106 raise_error(traits_inst, regex_constants::error_complexity);
107 while(pstate)
108 {
109 matcher_proc_type proc = s_match_vtable[pstate->type];
110 ++state_count;
111 if(!(this->*proc)())
112 {
113 if((m_match_flags & match_partial) && (position == last) && (position != search_base))
114 m_has_partial_match = true;
115 return 0;
116 }
117 }
118 return true;
119 }
120
121 template <class BidiIterator, class Allocator, class traits>
match_startmark()122 bool perl_matcher<BidiIterator, Allocator, traits>::match_startmark()
123 {
124 int index = static_cast<const re_brace*>(pstate)->index;
125 icase = static_cast<const re_brace*>(pstate)->icase;
126 bool r = true;
127 switch(index)
128 {
129 case 0:
130 pstate = pstate->next.p;
131 break;
132 case -1:
133 case -2:
134 {
135 // forward lookahead assert:
136 BidiIterator old_position(position);
137 const re_syntax_base* next_pstate = static_cast<const re_jump*>(pstate->next.p)->alt.p->next.p;
138 pstate = pstate->next.p->next.p;
139 r = match_all_states();
140 pstate = next_pstate;
141 position = old_position;
142 if((r && (index != -1)) || (!r && (index != -2)))
143 r = false;
144 else
145 r = true;
146 if(r && m_have_accept)
147 r = skip_until_paren(INT_MAX);
148 break;
149 }
150 case -3:
151 {
152 // independent sub-expression:
153 bool old_independent = m_independent;
154 m_independent = true;
155 const re_syntax_base* next_pstate = static_cast<const re_jump*>(pstate->next.p)->alt.p->next.p;
156 pstate = pstate->next.p->next.p;
157 bool can_backtrack = m_can_backtrack;
158 r = match_all_states();
159 if(r)
160 m_can_backtrack = can_backtrack;
161 pstate = next_pstate;
162 m_independent = old_independent;
163 #ifdef BOOST_REGEX_MATCH_EXTRA
164 if(r && (m_match_flags & match_extra))
165 {
166 //
167 // our captures have been stored in *m_presult
168 // we need to unpack them, and insert them
169 // back in the right order when we unwind the stack:
170 //
171 unsigned i;
172 match_results<BidiIterator, Allocator> tm(*m_presult);
173 for(i = 0; i < tm.size(); ++i)
174 (*m_presult)[i].get_captures().clear();
175 // match everything else:
176 r = match_all_states();
177 // now place the stored captures back:
178 for(i = 0; i < tm.size(); ++i)
179 {
180 typedef typename sub_match<BidiIterator>::capture_sequence_type seq;
181 seq& s1 = (*m_presult)[i].get_captures();
182 const seq& s2 = tm[i].captures();
183 s1.insert(
184 s1.end(),
185 s2.begin(),
186 s2.end());
187 }
188 }
189 #endif
190 if(r && m_have_accept)
191 r = skip_until_paren(INT_MAX);
192 break;
193 }
194 case -4:
195 {
196 // conditional expression:
197 const re_alt* alt = static_cast<const re_alt*>(pstate->next.p);
198 BOOST_ASSERT(alt->type == syntax_element_alt);
199 pstate = alt->next.p;
200 if(pstate->type == syntax_element_assert_backref)
201 {
202 if(!match_assert_backref())
203 pstate = alt->alt.p;
204 break;
205 }
206 else
207 {
208 // zero width assertion, have to match this recursively:
209 BOOST_ASSERT(pstate->type == syntax_element_startmark);
210 bool negated = static_cast<const re_brace*>(pstate)->index == -2;
211 BidiIterator saved_position = position;
212 const re_syntax_base* next_pstate = static_cast<const re_jump*>(pstate->next.p)->alt.p->next.p;
213 pstate = pstate->next.p->next.p;
214 bool res = match_all_states();
215 position = saved_position;
216 if(negated)
217 res = !res;
218 if(res)
219 pstate = next_pstate;
220 else
221 pstate = alt->alt.p;
222 break;
223 }
224 }
225 case -5:
226 {
227 // Reset start of $0, since we have a \K escape
228 backup_subex<BidiIterator> sub(*m_presult, 0);
229 m_presult->set_first(position, 0, true);
230 pstate = pstate->next.p;
231 r = match_all_states();
232 if(r == false)
233 sub.restore(*m_presult);
234 break;
235 }
236 default:
237 {
238 BOOST_ASSERT(index > 0);
239 if((m_match_flags & match_nosubs) == 0)
240 {
241 backup_subex<BidiIterator> sub(*m_presult, index);
242 m_presult->set_first(position, index);
243 pstate = pstate->next.p;
244 r = match_all_states();
245 if(r == false)
246 sub.restore(*m_presult);
247 #ifdef BOOST_REGEX_MATCH_EXTRA
248 //
249 // we have a match, push the capture information onto the stack:
250 //
251 else if(sub.get().matched && (match_extra & m_match_flags))
252 ((*m_presult)[index]).get_captures().push_back(sub.get());
253 #endif
254 }
255 else
256 {
257 pstate = pstate->next.p;
258 }
259 break;
260 }
261 }
262 return r;
263 }
264
265 template <class BidiIterator, class Allocator, class traits>
match_alt()266 bool perl_matcher<BidiIterator, Allocator, traits>::match_alt()
267 {
268 bool take_first, take_second;
269 const re_alt* jmp = static_cast<const re_alt*>(pstate);
270
271 // find out which of these two alternatives we need to take:
272 if(position == last)
273 {
274 take_first = jmp->can_be_null & mask_take;
275 take_second = jmp->can_be_null & mask_skip;
276 }
277 else
278 {
279 take_first = can_start(*position, jmp->_map, (unsigned char)mask_take);
280 take_second = can_start(*position, jmp->_map, (unsigned char)mask_skip);
281 }
282
283 if(take_first)
284 {
285 // we can take the first alternative,
286 // see if we need to push next alternative:
287 if(take_second)
288 {
289 BidiIterator oldposition(position);
290 const re_syntax_base* old_pstate = jmp->alt.p;
291 pstate = pstate->next.p;
292 bool oldcase = icase;
293 m_have_then = false;
294 if(!match_all_states())
295 {
296 pstate = old_pstate;
297 position = oldposition;
298 icase = oldcase;
299 if(m_have_then)
300 {
301 m_can_backtrack = true;
302 m_have_then = false;
303 return false;
304 }
305 }
306 m_have_then = false;
307 return m_can_backtrack;
308 }
309 pstate = pstate->next.p;
310 return true;
311 }
312 if(take_second)
313 {
314 pstate = jmp->alt.p;
315 return true;
316 }
317 return false; // neither option is possible
318 }
319
320 template <class BidiIterator, class Allocator, class traits>
match_rep()321 bool perl_matcher<BidiIterator, Allocator, traits>::match_rep()
322 {
323 #ifdef BOOST_MSVC
324 #pragma warning(push)
325 #pragma warning(disable:4127 4244)
326 #endif
327 const re_repeat* rep = static_cast<const re_repeat*>(pstate);
328 //
329 // Always copy the repeat count, so that the state is restored
330 // when we exit this scope:
331 //
332 repeater_count<BidiIterator> r(rep->state_id, &next_count, position, this->recursion_stack.size() ? this->recursion_stack.back().idx : INT_MIN + 3);
333 //
334 // If we've had at least one repeat already, and the last one
335 // matched the NULL string then set the repeat count to
336 // maximum:
337 //
338 next_count->check_null_repeat(position, rep->max);
339
340 // find out which of these two alternatives we need to take:
341 bool take_first, take_second;
342 if(position == last)
343 {
344 take_first = rep->can_be_null & mask_take;
345 take_second = rep->can_be_null & mask_skip;
346 }
347 else
348 {
349 take_first = can_start(*position, rep->_map, (unsigned char)mask_take);
350 take_second = can_start(*position, rep->_map, (unsigned char)mask_skip);
351 }
352
353 if(next_count->get_count() < rep->min)
354 {
355 // we must take the repeat:
356 if(take_first)
357 {
358 // increase the counter:
359 ++(*next_count);
360 pstate = rep->next.p;
361 return match_all_states();
362 }
363 return false;
364 }
365 bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent);
366 if(greedy)
367 {
368 // try and take the repeat if we can:
369 if((next_count->get_count() < rep->max) && take_first)
370 {
371 // store position in case we fail:
372 BidiIterator pos = position;
373 // increase the counter:
374 ++(*next_count);
375 pstate = rep->next.p;
376 if(match_all_states())
377 return true;
378 if(!m_can_backtrack)
379 return false;
380 // failed repeat, reset posistion and fall through for alternative:
381 position = pos;
382 }
383 if(take_second)
384 {
385 pstate = rep->alt.p;
386 return true;
387 }
388 return false; // can't take anything, fail...
389 }
390 else // non-greedy
391 {
392 // try and skip the repeat if we can:
393 if(take_second)
394 {
395 // store position in case we fail:
396 BidiIterator pos = position;
397 pstate = rep->alt.p;
398 if(match_all_states())
399 return true;
400 if(!m_can_backtrack)
401 return false;
402 // failed alternative, reset posistion and fall through for repeat:
403 position = pos;
404 }
405 if((next_count->get_count() < rep->max) && take_first)
406 {
407 // increase the counter:
408 ++(*next_count);
409 pstate = rep->next.p;
410 return match_all_states();
411 }
412 }
413 return false;
414 #ifdef BOOST_MSVC
415 #pragma warning(pop)
416 #endif
417 }
418
419 template <class BidiIterator, class Allocator, class traits>
match_dot_repeat_slow()420 bool perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_slow()
421 {
422 #ifdef BOOST_MSVC
423 #pragma warning(push)
424 #pragma warning(disable:4127)
425 #endif
426 std::size_t count = 0;
427 const re_repeat* rep = static_cast<const re_repeat*>(pstate);
428 re_syntax_base* psingle = rep->next.p;
429 // match compulsary repeats first:
430 while(count < rep->min)
431 {
432 pstate = psingle;
433 if(!match_wild())
434 return false;
435 ++count;
436 }
437 bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent);
438 if(greedy)
439 {
440 // normal repeat:
441 while(count < rep->max)
442 {
443 pstate = psingle;
444 if(!match_wild())
445 break;
446 ++count;
447 }
448 if((rep->leading) && (count < rep->max))
449 restart = position;
450 pstate = rep;
451 return backtrack_till_match(count - rep->min);
452 }
453 else
454 {
455 // non-greedy, keep trying till we get a match:
456 BidiIterator save_pos;
457 do
458 {
459 if((rep->leading) && (rep->max == UINT_MAX))
460 restart = position;
461 pstate = rep->alt.p;
462 save_pos = position;
463 ++state_count;
464 if(match_all_states())
465 return true;
466 if((count >= rep->max) || !m_can_backtrack)
467 return false;
468 ++count;
469 pstate = psingle;
470 position = save_pos;
471 if(!match_wild())
472 return false;
473 }while(true);
474 }
475 #ifdef BOOST_MSVC
476 #pragma warning(pop)
477 #endif
478 }
479
480 template <class BidiIterator, class Allocator, class traits>
match_dot_repeat_fast()481 bool perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_fast()
482 {
483 #ifdef BOOST_MSVC
484 #pragma warning(push)
485 #pragma warning(disable:4127)
486 #endif
487 if(m_match_flags & match_not_dot_null)
488 return match_dot_repeat_slow();
489 if((static_cast<const re_dot*>(pstate->next.p)->mask & match_any_mask) == 0)
490 return match_dot_repeat_slow();
491 //
492 // start by working out how much we can skip:
493 //
494 const re_repeat* rep = static_cast<const re_repeat*>(pstate);
495 #ifdef BOOST_MSVC
496 #pragma warning(push)
497 #pragma warning(disable:4267)
498 #endif
499 bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent);
500 std::size_t count = (std::min)(static_cast<std::size_t>(::boost::BOOST_REGEX_DETAIL_NS::distance(position, last)), greedy ? rep->max : rep->min);
501 if(rep->min > count)
502 {
503 position = last;
504 return false; // not enough text left to match
505 }
506 std::advance(position, count);
507 #ifdef BOOST_MSVC
508 #pragma warning(pop)
509 #endif
510 if((rep->leading) && (count < rep->max) && greedy)
511 restart = position;
512 if(greedy)
513 return backtrack_till_match(count - rep->min);
514
515 // non-greedy, keep trying till we get a match:
516 BidiIterator save_pos;
517 do
518 {
519 while((position != last) && (count < rep->max) && !can_start(*position, rep->_map, mask_skip))
520 {
521 ++position;
522 ++count;
523 }
524 if((rep->leading) && (rep->max == UINT_MAX))
525 restart = position;
526 pstate = rep->alt.p;
527 save_pos = position;
528 ++state_count;
529 if(match_all_states())
530 return true;
531 if((count >= rep->max) || !m_can_backtrack)
532 return false;
533 if(save_pos == last)
534 return false;
535 position = ++save_pos;
536 ++count;
537 }while(true);
538 #ifdef BOOST_MSVC
539 #pragma warning(pop)
540 #endif
541 }
542
543 template <class BidiIterator, class Allocator, class traits>
match_char_repeat()544 bool perl_matcher<BidiIterator, Allocator, traits>::match_char_repeat()
545 {
546 #ifdef BOOST_MSVC
547 #pragma warning(push)
548 #pragma warning(disable:4127)
549 #pragma warning(disable:4267)
550 #endif
551 #ifdef BOOST_BORLANDC
552 #pragma option push -w-8008 -w-8066 -w-8004
553 #endif
554 const re_repeat* rep = static_cast<const re_repeat*>(pstate);
555 BOOST_ASSERT(1 == static_cast<const re_literal*>(rep->next.p)->length);
556 const char_type what = *reinterpret_cast<const char_type*>(static_cast<const re_literal*>(rep->next.p) + 1);
557 //
558 // start by working out how much we can skip:
559 //
560 bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent);
561 std::size_t count, desired;
562 if(::boost::is_random_access_iterator<BidiIterator>::value)
563 {
564 desired =
565 (std::min)(
566 (std::size_t)(greedy ? rep->max : rep->min),
567 (std::size_t)::boost::BOOST_REGEX_DETAIL_NS::distance(position, last));
568 count = desired;
569 ++desired;
570 if(icase)
571 {
572 while(--desired && (traits_inst.translate_nocase(*position) == what))
573 {
574 ++position;
575 }
576 }
577 else
578 {
579 while(--desired && (traits_inst.translate(*position) == what))
580 {
581 ++position;
582 }
583 }
584 count = count - desired;
585 }
586 else
587 {
588 count = 0;
589 desired = greedy ? rep->max : rep->min;
590 while((count < desired) && (position != last) && (traits_inst.translate(*position, icase) == what))
591 {
592 ++position;
593 ++count;
594 }
595 }
596 if((rep->leading) && (count < rep->max) && greedy)
597 restart = position;
598 if(count < rep->min)
599 return false;
600
601 if(greedy)
602 return backtrack_till_match(count - rep->min);
603
604 // non-greedy, keep trying till we get a match:
605 BidiIterator save_pos;
606 do
607 {
608 while((position != last) && (count < rep->max) && !can_start(*position, rep->_map, mask_skip))
609 {
610 if((traits_inst.translate(*position, icase) == what))
611 {
612 ++position;
613 ++count;
614 }
615 else
616 return false; // counldn't repeat even though it was the only option
617 }
618 if((rep->leading) && (rep->max == UINT_MAX))
619 restart = position;
620 pstate = rep->alt.p;
621 save_pos = position;
622 ++state_count;
623 if(match_all_states())
624 return true;
625 if((count >= rep->max) || !m_can_backtrack)
626 return false;
627 position = save_pos;
628 if(position == last)
629 return false;
630 if(traits_inst.translate(*position, icase) == what)
631 {
632 ++position;
633 ++count;
634 }
635 else
636 {
637 return false;
638 }
639 }while(true);
640 #ifdef BOOST_BORLANDC
641 #pragma option pop
642 #endif
643 #ifdef BOOST_MSVC
644 #pragma warning(pop)
645 #endif
646 }
647
648 template <class BidiIterator, class Allocator, class traits>
match_set_repeat()649 bool perl_matcher<BidiIterator, Allocator, traits>::match_set_repeat()
650 {
651 #ifdef BOOST_MSVC
652 #pragma warning(push)
653 #pragma warning(disable:4127)
654 #endif
655 #ifdef BOOST_BORLANDC
656 #pragma option push -w-8008 -w-8066 -w-8004
657 #endif
658 const re_repeat* rep = static_cast<const re_repeat*>(pstate);
659 const unsigned char* map = static_cast<const re_set*>(rep->next.p)->_map;
660 std::size_t count = 0;
661 //
662 // start by working out how much we can skip:
663 //
664 bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent);
665 std::size_t desired = greedy ? rep->max : rep->min;
666 if(::boost::is_random_access_iterator<BidiIterator>::value)
667 {
668 BidiIterator end = position;
669 // Move end forward by "desired", preferably without using distance or advance if we can
670 // as these can be slow for some iterator types.
671 std::size_t len = (desired == (std::numeric_limits<std::size_t>::max)()) ? 0u : ::boost::BOOST_REGEX_DETAIL_NS::distance(position, last);
672 if(desired >= len)
673 end = last;
674 else
675 std::advance(end, desired);
676 BidiIterator origin(position);
677 while((position != end) && map[static_cast<unsigned char>(traits_inst.translate(*position, icase))])
678 {
679 ++position;
680 }
681 count = (unsigned)::boost::BOOST_REGEX_DETAIL_NS::distance(origin, position);
682 }
683 else
684 {
685 while((count < desired) && (position != last) && map[static_cast<unsigned char>(traits_inst.translate(*position, icase))])
686 {
687 ++position;
688 ++count;
689 }
690 }
691 if((rep->leading) && (count < rep->max) && greedy)
692 restart = position;
693 if(count < rep->min)
694 return false;
695
696 if(greedy)
697 return backtrack_till_match(count - rep->min);
698
699 // non-greedy, keep trying till we get a match:
700 BidiIterator save_pos;
701 do
702 {
703 while((position != last) && (count < rep->max) && !can_start(*position, rep->_map, mask_skip))
704 {
705 if(map[static_cast<unsigned char>(traits_inst.translate(*position, icase))])
706 {
707 ++position;
708 ++count;
709 }
710 else
711 return false; // counldn't repeat even though it was the only option
712 }
713 if((rep->leading) && (rep->max == UINT_MAX))
714 restart = position;
715 pstate = rep->alt.p;
716 save_pos = position;
717 ++state_count;
718 if(match_all_states())
719 return true;
720 if((count >= rep->max) || !m_can_backtrack)
721 return false;
722 position = save_pos;
723 if(position == last)
724 return false;
725 if(map[static_cast<unsigned char>(traits_inst.translate(*position, icase))])
726 {
727 ++position;
728 ++count;
729 }
730 else
731 {
732 return false;
733 }
734 }while(true);
735 #ifdef BOOST_BORLANDC
736 #pragma option pop
737 #endif
738 #ifdef BOOST_MSVC
739 #pragma warning(pop)
740 #endif
741 }
742
743 template <class BidiIterator, class Allocator, class traits>
match_long_set_repeat()744 bool perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat()
745 {
746 #ifdef BOOST_MSVC
747 #pragma warning(push)
748 #pragma warning(disable:4127)
749 #endif
750 #ifdef BOOST_BORLANDC
751 #pragma option push -w-8008 -w-8066 -w-8004
752 #endif
753 typedef typename traits::char_class_type char_class_type;
754 const re_repeat* rep = static_cast<const re_repeat*>(pstate);
755 const re_set_long<char_class_type>* set = static_cast<const re_set_long<char_class_type>*>(pstate->next.p);
756 std::size_t count = 0;
757 //
758 // start by working out how much we can skip:
759 //
760 bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent);
761 std::size_t desired = greedy ? rep->max : rep->min;
762 if(::boost::is_random_access_iterator<BidiIterator>::value)
763 {
764 BidiIterator end = position;
765 // Move end forward by "desired", preferably without using distance or advance if we can
766 // as these can be slow for some iterator types.
767 std::size_t len = (desired == (std::numeric_limits<std::size_t>::max)()) ? 0u : ::boost::BOOST_REGEX_DETAIL_NS::distance(position, last);
768 if(desired >= len)
769 end = last;
770 else
771 std::advance(end, desired);
772 BidiIterator origin(position);
773 while((position != end) && (position != re_is_set_member(position, last, set, re.get_data(), icase)))
774 {
775 ++position;
776 }
777 count = (unsigned)::boost::BOOST_REGEX_DETAIL_NS::distance(origin, position);
778 }
779 else
780 {
781 while((count < desired) && (position != last) && (position != re_is_set_member(position, last, set, re.get_data(), icase)))
782 {
783 ++position;
784 ++count;
785 }
786 }
787 if((rep->leading) && (count < rep->max) && greedy)
788 restart = position;
789 if(count < rep->min)
790 return false;
791
792 if(greedy)
793 return backtrack_till_match(count - rep->min);
794
795 // non-greedy, keep trying till we get a match:
796 BidiIterator save_pos;
797 do
798 {
799 while((position != last) && (count < rep->max) && !can_start(*position, rep->_map, mask_skip))
800 {
801 if(position != re_is_set_member(position, last, set, re.get_data(), icase))
802 {
803 ++position;
804 ++count;
805 }
806 else
807 return false; // counldn't repeat even though it was the only option
808 }
809 if((rep->leading) && (rep->max == UINT_MAX))
810 restart = position;
811 pstate = rep->alt.p;
812 save_pos = position;
813 ++state_count;
814 if(match_all_states())
815 return true;
816 if((count >= rep->max) || !m_can_backtrack)
817 return false;
818 position = save_pos;
819 if(position == last)
820 return false;
821 if(position != re_is_set_member(position, last, set, re.get_data(), icase))
822 {
823 ++position;
824 ++count;
825 }
826 else
827 {
828 return false;
829 }
830 }while(true);
831 #ifdef BOOST_BORLANDC
832 #pragma option pop
833 #endif
834 #ifdef BOOST_MSVC
835 #pragma warning(pop)
836 #endif
837 }
838
839 template <class BidiIterator, class Allocator, class traits>
backtrack_till_match(std::size_t count)840 bool perl_matcher<BidiIterator, Allocator, traits>::backtrack_till_match(std::size_t count)
841 {
842 #ifdef BOOST_MSVC
843 #pragma warning(push)
844 #pragma warning(disable:4127)
845 #endif
846 if(!m_can_backtrack)
847 return false;
848 if((m_match_flags & match_partial) && (position == last))
849 m_has_partial_match = true;
850
851 const re_repeat* rep = static_cast<const re_repeat*>(pstate);
852 BidiIterator backtrack = position;
853 if(position == last)
854 {
855 if(rep->can_be_null & mask_skip)
856 {
857 pstate = rep->alt.p;
858 if(match_all_states())
859 return true;
860 }
861 if(count)
862 {
863 position = --backtrack;
864 --count;
865 }
866 else
867 return false;
868 }
869 do
870 {
871 while(count && !can_start(*position, rep->_map, mask_skip))
872 {
873 --position;
874 --count;
875 ++state_count;
876 }
877 pstate = rep->alt.p;
878 backtrack = position;
879 if(match_all_states())
880 return true;
881 if(count == 0)
882 return false;
883 position = --backtrack;
884 ++state_count;
885 --count;
886 }while(true);
887 #ifdef BOOST_MSVC
888 #pragma warning(pop)
889 #endif
890 }
891
892 template <class BidiIterator, class Allocator, class traits>
match_recursion()893 bool perl_matcher<BidiIterator, Allocator, traits>::match_recursion()
894 {
895 BOOST_ASSERT(pstate->type == syntax_element_recurse);
896 //
897 // Set new call stack:
898 //
899 if(recursion_stack.capacity() == 0)
900 {
901 recursion_stack.reserve(50);
902 }
903 //
904 // See if we've seen this recursion before at this location, if we have then
905 // we need to prevent infinite recursion:
906 //
907 for(typename std::vector<recursion_info<results_type> >::reverse_iterator i = recursion_stack.rbegin(); i != recursion_stack.rend(); ++i)
908 {
909 if(i->idx == static_cast<const re_brace*>(static_cast<const re_jump*>(pstate)->alt.p)->index)
910 {
911 if(i->location_of_start == position)
912 return false;
913 break;
914 }
915 }
916 //
917 // Now get on with it:
918 //
919 recursion_stack.push_back(recursion_info<results_type>());
920 recursion_stack.back().preturn_address = pstate->next.p;
921 recursion_stack.back().results = *m_presult;
922 recursion_stack.back().repeater_stack = next_count;
923 recursion_stack.back().location_of_start = position;
924 pstate = static_cast<const re_jump*>(pstate)->alt.p;
925 recursion_stack.back().idx = static_cast<const re_brace*>(pstate)->index;
926
927 repeater_count<BidiIterator>* saved = next_count;
928 repeater_count<BidiIterator> r(&next_count); // resets all repeat counts since we're recursing and starting fresh on those
929 next_count = &r;
930 bool can_backtrack = m_can_backtrack;
931 bool result = match_all_states();
932 m_can_backtrack = can_backtrack;
933 next_count = saved;
934
935 if(!result)
936 {
937 next_count = recursion_stack.back().repeater_stack;
938 *m_presult = recursion_stack.back().results;
939 recursion_stack.pop_back();
940 return false;
941 }
942 return true;
943 }
944
945 template <class BidiIterator, class Allocator, class traits>
match_endmark()946 bool perl_matcher<BidiIterator, Allocator, traits>::match_endmark()
947 {
948 int index = static_cast<const re_brace*>(pstate)->index;
949 icase = static_cast<const re_brace*>(pstate)->icase;
950 if(index > 0)
951 {
952 if((m_match_flags & match_nosubs) == 0)
953 {
954 m_presult->set_second(position, index);
955 }
956 if(!recursion_stack.empty())
957 {
958 if(index == recursion_stack.back().idx)
959 {
960 recursion_info<results_type> saved = recursion_stack.back();
961 recursion_stack.pop_back();
962 pstate = saved.preturn_address;
963 repeater_count<BidiIterator>* saved_count = next_count;
964 next_count = saved.repeater_stack;
965 *m_presult = saved.results;
966 if(!match_all_states())
967 {
968 recursion_stack.push_back(saved);
969 next_count = saved_count;
970 return false;
971 }
972 }
973 }
974 }
975 else if((index < 0) && (index != -4))
976 {
977 // matched forward lookahead:
978 pstate = 0;
979 return true;
980 }
981 pstate = pstate ? pstate->next.p : 0;
982 return true;
983 }
984
985 template <class BidiIterator, class Allocator, class traits>
match_match()986 bool perl_matcher<BidiIterator, Allocator, traits>::match_match()
987 {
988 if(!recursion_stack.empty())
989 {
990 BOOST_ASSERT(0 == recursion_stack.back().idx);
991 const re_syntax_base* saved_state = pstate = recursion_stack.back().preturn_address;
992 *m_presult = recursion_stack.back().results;
993 recursion_stack.pop_back();
994 if(!match_all_states())
995 {
996 recursion_stack.push_back(recursion_info<results_type>());
997 recursion_stack.back().preturn_address = saved_state;
998 recursion_stack.back().results = *m_presult;
999 recursion_stack.back().location_of_start = position;
1000 return false;
1001 }
1002 return true;
1003 }
1004 if((m_match_flags & match_not_null) && (position == (*m_presult)[0].first))
1005 return false;
1006 if((m_match_flags & match_all) && (position != last))
1007 return false;
1008 if((m_match_flags & regex_constants::match_not_initial_null) && (position == search_base))
1009 return false;
1010 m_presult->set_second(position);
1011 pstate = 0;
1012 m_has_found_match = true;
1013 if((m_match_flags & match_posix) == match_posix)
1014 {
1015 m_result.maybe_assign(*m_presult);
1016 if((m_match_flags & match_any) == 0)
1017 return false;
1018 }
1019 #ifdef BOOST_REGEX_MATCH_EXTRA
1020 if(match_extra & m_match_flags)
1021 {
1022 for(unsigned i = 0; i < m_presult->size(); ++i)
1023 if((*m_presult)[i].matched)
1024 ((*m_presult)[i]).get_captures().push_back((*m_presult)[i]);
1025 }
1026 #endif
1027 return true;
1028 }
1029
1030 template <class BidiIterator, class Allocator, class traits>
match_commit()1031 bool perl_matcher<BidiIterator, Allocator, traits>::match_commit()
1032 {
1033 m_can_backtrack = false;
1034 int action = static_cast<const re_commit*>(pstate)->action;
1035 switch(action)
1036 {
1037 case commit_commit:
1038 restart = last;
1039 break;
1040 case commit_skip:
1041 restart = position;
1042 break;
1043 }
1044 pstate = pstate->next.p;
1045 return true;
1046 }
1047
1048 template <class BidiIterator, class Allocator, class traits>
match_then()1049 bool perl_matcher<BidiIterator, Allocator, traits>::match_then()
1050 {
1051 pstate = pstate->next.p;
1052 if(match_all_states())
1053 return true;
1054 m_can_backtrack = false;
1055 m_have_then = true;
1056 return false;
1057 }
1058
1059 template <class BidiIterator, class Allocator, class traits>
match_toggle_case()1060 bool perl_matcher<BidiIterator, Allocator, traits>::match_toggle_case()
1061 {
1062 // change our case sensitivity:
1063 bool oldcase = this->icase;
1064 this->icase = static_cast<const re_case*>(pstate)->icase;
1065 pstate = pstate->next.p;
1066 bool result = match_all_states();
1067 this->icase = oldcase;
1068 return result;
1069 }
1070
1071
1072
1073 template <class BidiIterator, class Allocator, class traits>
skip_until_paren(int index,bool have_match)1074 bool perl_matcher<BidiIterator, Allocator, traits>::skip_until_paren(int index, bool have_match)
1075 {
1076 while(pstate)
1077 {
1078 if(pstate->type == syntax_element_endmark)
1079 {
1080 if(static_cast<const re_brace*>(pstate)->index == index)
1081 {
1082 if(have_match)
1083 return this->match_endmark();
1084 pstate = pstate->next.p;
1085 return true;
1086 }
1087 else
1088 {
1089 // Unenclosed closing ), occurs when (*ACCEPT) is inside some other
1090 // parenthesis which may or may not have other side effects associated with it.
1091 bool r = match_endmark();
1092 m_have_accept = true;
1093 if(!pstate)
1094 return r;
1095 }
1096 continue;
1097 }
1098 else if(pstate->type == syntax_element_match)
1099 return true;
1100 else if(pstate->type == syntax_element_startmark)
1101 {
1102 int idx = static_cast<const re_brace*>(pstate)->index;
1103 pstate = pstate->next.p;
1104 skip_until_paren(idx, false);
1105 continue;
1106 }
1107 pstate = pstate->next.p;
1108 }
1109 return true;
1110 }
1111
1112
1113 } // namespace BOOST_REGEX_DETAIL_NS
1114 } // namespace boost
1115 #ifdef BOOST_MSVC
1116 #pragma warning(pop)
1117 #endif
1118
1119 #ifdef BOOST_MSVC
1120 #pragma warning(push)
1121 #pragma warning(disable: 4103)
1122 #endif
1123 #ifdef BOOST_HAS_ABI_HEADERS
1124 # include BOOST_ABI_SUFFIX
1125 #endif
1126 #ifdef BOOST_MSVC
1127 #pragma warning(pop)
1128 #endif
1129
1130 #endif
1131
1132