• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2010 The RE2 Authors.  All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 
5 #include "re2/set.h"
6 
7 #include <stddef.h>
8 #include <algorithm>
9 #include <memory>
10 
11 #include "util/util.h"
12 #include "util/logging.h"
13 #include "re2/pod_array.h"
14 #include "re2/prog.h"
15 #include "re2/re2.h"
16 #include "re2/regexp.h"
17 #include "re2/stringpiece.h"
18 
19 namespace re2 {
20 
Set(const RE2::Options & options,RE2::Anchor anchor)21 RE2::Set::Set(const RE2::Options& options, RE2::Anchor anchor) {
22   options_.Copy(options);
23   options_.set_never_capture(true);  // might unblock some optimisations
24   anchor_ = anchor;
25   prog_ = NULL;
26   compiled_ = false;
27   size_ = 0;
28 }
29 
~Set()30 RE2::Set::~Set() {
31   for (size_t i = 0; i < elem_.size(); i++)
32     elem_[i].second->Decref();
33   delete prog_;
34 }
35 
Add(const StringPiece & pattern,std::string * error)36 int RE2::Set::Add(const StringPiece& pattern, std::string* error) {
37   if (compiled_) {
38     LOG(DFATAL) << "RE2::Set::Add() called after compiling";
39     return -1;
40   }
41 
42   Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>(
43     options_.ParseFlags());
44   RegexpStatus status;
45   re2::Regexp* re = Regexp::Parse(pattern, pf, &status);
46   if (re == NULL) {
47     if (error != NULL)
48       *error = status.Text();
49     if (options_.log_errors())
50       LOG(ERROR) << "Error parsing '" << pattern << "': " << status.Text();
51     return -1;
52   }
53 
54   // Concatenate with match index and push on vector.
55   int n = static_cast<int>(elem_.size());
56   re2::Regexp* m = re2::Regexp::HaveMatch(n, pf);
57   if (re->op() == kRegexpConcat) {
58     int nsub = re->nsub();
59     PODArray<re2::Regexp*> sub(nsub + 1);
60     for (int i = 0; i < nsub; i++)
61       sub[i] = re->sub()[i]->Incref();
62     sub[nsub] = m;
63     re->Decref();
64     re = re2::Regexp::Concat(sub.data(), nsub + 1, pf);
65   } else {
66     re2::Regexp* sub[2];
67     sub[0] = re;
68     sub[1] = m;
69     re = re2::Regexp::Concat(sub, 2, pf);
70   }
71   elem_.emplace_back(std::string(pattern), re);
72   return n;
73 }
74 
Compile()75 bool RE2::Set::Compile() {
76   if (compiled_) {
77     LOG(DFATAL) << "RE2::Set::Compile() called more than once";
78     return false;
79   }
80   compiled_ = true;
81   size_ = static_cast<int>(elem_.size());
82 
83   // Sort the elements by their patterns. This is good enough for now
84   // until we have a Regexp comparison function. (Maybe someday...)
85   std::sort(elem_.begin(), elem_.end(),
86             [](const Elem& a, const Elem& b) -> bool {
87               return a.first < b.first;
88             });
89 
90   PODArray<re2::Regexp*> sub(size_);
91   for (int i = 0; i < size_; i++)
92     sub[i] = elem_[i].second;
93   elem_.clear();
94   elem_.shrink_to_fit();
95 
96   Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>(
97     options_.ParseFlags());
98   re2::Regexp* re = re2::Regexp::Alternate(sub.data(), size_, pf);
99 
100   prog_ = Prog::CompileSet(re, anchor_, options_.max_mem());
101   re->Decref();
102   return prog_ != NULL;
103 }
104 
Match(const StringPiece & text,std::vector<int> * v) const105 bool RE2::Set::Match(const StringPiece& text, std::vector<int>* v) const {
106   return Match(text, v, NULL);
107 }
108 
Match(const StringPiece & text,std::vector<int> * v,ErrorInfo * error_info) const109 bool RE2::Set::Match(const StringPiece& text, std::vector<int>* v,
110                      ErrorInfo* error_info) const {
111   if (!compiled_) {
112     LOG(DFATAL) << "RE2::Set::Match() called before compiling";
113     if (error_info != NULL)
114       error_info->kind = kNotCompiled;
115     return false;
116   }
117   bool dfa_failed = false;
118   std::unique_ptr<SparseSet> matches;
119   if (v != NULL) {
120     matches.reset(new SparseSet(size_));
121     v->clear();
122   }
123   bool ret = prog_->SearchDFA(text, text, Prog::kAnchored, Prog::kManyMatch,
124                               NULL, &dfa_failed, matches.get());
125   if (dfa_failed) {
126     if (options_.log_errors())
127       LOG(ERROR) << "DFA out of memory: "
128                  << "program size " << prog_->size() << ", "
129                  << "list count " << prog_->list_count() << ", "
130                  << "bytemap range " << prog_->bytemap_range();
131     if (error_info != NULL)
132       error_info->kind = kOutOfMemory;
133     return false;
134   }
135   if (ret == false) {
136     if (error_info != NULL)
137       error_info->kind = kNoError;
138     return false;
139   }
140   if (v != NULL) {
141     if (matches->empty()) {
142       LOG(DFATAL) << "RE2::Set::Match() matched, but no matches returned?!";
143       if (error_info != NULL)
144         error_info->kind = kInconsistent;
145       return false;
146     }
147     v->assign(matches->begin(), matches->end());
148   }
149   if (error_info != NULL)
150     error_info->kind = kNoError;
151   return true;
152 }
153 
154 }  // namespace re2
155