1 // Copyright 2008 the V8 project authors. All rights reserved. 2 // Redistribution and use in source and binary forms, with or without 3 // modification, are permitted provided that the following conditions are 4 // met: 5 // 6 // * Redistributions of source code must retain the above copyright 7 // notice, this list of conditions and the following disclaimer. 8 // * Redistributions in binary form must reproduce the above 9 // copyright notice, this list of conditions and the following 10 // disclaimer in the documentation and/or other materials provided 11 // with the distribution. 12 // * Neither the name of Google Inc. nor the names of its 13 // contributors may be used to endorse or promote products derived 14 // from this software without specific prior written permission. 15 // 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 28 #ifndef V8_REGEXP_MACRO_ASSEMBLER_H_ 29 #define V8_REGEXP_MACRO_ASSEMBLER_H_ 30 31 #include "ast.h" 32 33 namespace v8 { 34 namespace internal { 35 36 struct DisjunctDecisionRow { 37 RegExpCharacterClass cc; 38 Label* on_match; 39 }; 40 41 42 class RegExpMacroAssembler { 43 public: 44 // The implementation must be able to handle at least: 45 static const int kMaxRegister = (1 << 16) - 1; 46 static const int kMaxCPOffset = (1 << 15) - 1; 47 static const int kMinCPOffset = -(1 << 15); 48 enum IrregexpImplementation { 49 kIA32Implementation, 50 kARMImplementation, 51 kMIPSImplementation, 52 kX64Implementation, 53 kBytecodeImplementation 54 }; 55 56 enum StackCheckFlag { 57 kNoStackLimitCheck = false, 58 kCheckStackLimit = true 59 }; 60 61 RegExpMacroAssembler(); 62 virtual ~RegExpMacroAssembler(); 63 // The maximal number of pushes between stack checks. Users must supply 64 // kCheckStackLimit flag to push operations (instead of kNoStackLimitCheck) 65 // at least once for every stack_limit() pushes that are executed. 66 virtual int stack_limit_slack() = 0; 67 virtual bool CanReadUnaligned(); 68 virtual void AdvanceCurrentPosition(int by) = 0; // Signed cp change. 69 virtual void AdvanceRegister(int reg, int by) = 0; // r[reg] += by. 70 // Continues execution from the position pushed on the top of the backtrack 71 // stack by an earlier PushBacktrack(Label*). 72 virtual void Backtrack() = 0; 73 virtual void Bind(Label* label) = 0; 74 virtual void CheckAtStart(Label* on_at_start) = 0; 75 // Dispatch after looking the current character up in a 2-bits-per-entry 76 // map. The destinations vector has up to 4 labels. 77 virtual void CheckCharacter(unsigned c, Label* on_equal) = 0; 78 // Bitwise and the current character with the given constant and then 79 // check for a match with c. 80 virtual void CheckCharacterAfterAnd(unsigned c, 81 unsigned and_with, 82 Label* on_equal) = 0; 83 virtual void CheckCharacterGT(uc16 limit, Label* on_greater) = 0; 84 virtual void CheckCharacterLT(uc16 limit, Label* on_less) = 0; 85 // Check the current character for a match with a literal string. If we 86 // fail to match then goto the on_failure label. If check_eos is set then 87 // the end of input always fails. If check_eos is clear then it is the 88 // caller's responsibility to ensure that the end of string is not hit. 89 // If the label is NULL then we should pop a backtrack address off 90 // the stack and go to that. 91 virtual void CheckCharacters( 92 Vector<const uc16> str, 93 int cp_offset, 94 Label* on_failure, 95 bool check_eos) = 0; 96 virtual void CheckGreedyLoop(Label* on_tos_equals_current_position) = 0; 97 virtual void CheckNotAtStart(Label* on_not_at_start) = 0; 98 virtual void CheckNotBackReference(int start_reg, Label* on_no_match) = 0; 99 virtual void CheckNotBackReferenceIgnoreCase(int start_reg, 100 Label* on_no_match) = 0; 101 // Check the current character for a match with a literal character. If we 102 // fail to match then goto the on_failure label. End of input always 103 // matches. If the label is NULL then we should pop a backtrack address off 104 // the stack and go to that. 105 virtual void CheckNotCharacter(unsigned c, Label* on_not_equal) = 0; 106 virtual void CheckNotCharacterAfterAnd(unsigned c, 107 unsigned and_with, 108 Label* on_not_equal) = 0; 109 // Subtract a constant from the current character, then or with the given 110 // constant and then check for a match with c. 111 virtual void CheckNotCharacterAfterMinusAnd(uc16 c, 112 uc16 minus, 113 uc16 and_with, 114 Label* on_not_equal) = 0; 115 virtual void CheckNotRegistersEqual(int reg1, 116 int reg2, 117 Label* on_not_equal) = 0; 118 119 // Checks whether the given offset from the current position is before 120 // the end of the string. May overwrite the current character. CheckPosition(int cp_offset,Label * on_outside_input)121 virtual void CheckPosition(int cp_offset, Label* on_outside_input) { 122 LoadCurrentCharacter(cp_offset, on_outside_input, true); 123 } 124 // Check whether a standard/default character class matches the current 125 // character. Returns false if the type of special character class does 126 // not have custom support. 127 // May clobber the current loaded character. CheckSpecialCharacterClass(uc16 type,Label * on_no_match)128 virtual bool CheckSpecialCharacterClass(uc16 type, 129 Label* on_no_match) { 130 return false; 131 } 132 virtual void Fail() = 0; 133 virtual Handle<HeapObject> GetCode(Handle<String> source) = 0; 134 virtual void GoTo(Label* label) = 0; 135 // Check whether a register is >= a given constant and go to a label if it 136 // is. Backtracks instead if the label is NULL. 137 virtual void IfRegisterGE(int reg, int comparand, Label* if_ge) = 0; 138 // Check whether a register is < a given constant and go to a label if it is. 139 // Backtracks instead if the label is NULL. 140 virtual void IfRegisterLT(int reg, int comparand, Label* if_lt) = 0; 141 // Check whether a register is == to the current position and go to a 142 // label if it is. 143 virtual void IfRegisterEqPos(int reg, Label* if_eq) = 0; 144 virtual IrregexpImplementation Implementation() = 0; 145 virtual void LoadCurrentCharacter(int cp_offset, 146 Label* on_end_of_input, 147 bool check_bounds = true, 148 int characters = 1) = 0; 149 virtual void PopCurrentPosition() = 0; 150 virtual void PopRegister(int register_index) = 0; 151 // Pushes the label on the backtrack stack, so that a following Backtrack 152 // will go to this label. Always checks the backtrack stack limit. 153 virtual void PushBacktrack(Label* label) = 0; 154 virtual void PushCurrentPosition() = 0; 155 virtual void PushRegister(int register_index, 156 StackCheckFlag check_stack_limit) = 0; 157 virtual void ReadCurrentPositionFromRegister(int reg) = 0; 158 virtual void ReadStackPointerFromRegister(int reg) = 0; 159 virtual void SetCurrentPositionFromEnd(int by) = 0; 160 virtual void SetRegister(int register_index, int to) = 0; 161 virtual void Succeed() = 0; 162 virtual void WriteCurrentPositionToRegister(int reg, int cp_offset) = 0; 163 virtual void ClearRegisters(int reg_from, int reg_to) = 0; 164 virtual void WriteStackPointerToRegister(int reg) = 0; 165 166 // Controls the generation of large inlined constants in the code. set_slow_safe(bool ssc)167 void set_slow_safe(bool ssc) { slow_safe_compiler_ = ssc; } slow_safe()168 bool slow_safe() { return slow_safe_compiler_; } 169 170 private: 171 bool slow_safe_compiler_; 172 }; 173 174 175 #ifndef V8_INTERPRETED_REGEXP // Avoid compiling unused code. 176 177 class NativeRegExpMacroAssembler: public RegExpMacroAssembler { 178 public: 179 // Type of input string to generate code for. 180 enum Mode { ASCII = 1, UC16 = 2 }; 181 182 // Result of calling generated native RegExp code. 183 // RETRY: Something significant changed during execution, and the matching 184 // should be retried from scratch. 185 // EXCEPTION: Something failed during execution. If no exception has been 186 // thrown, it's an internal out-of-memory, and the caller should 187 // throw the exception. 188 // FAILURE: Matching failed. 189 // SUCCESS: Matching succeeded, and the output array has been filled with 190 // capture positions. 191 enum Result { RETRY = -2, EXCEPTION = -1, FAILURE = 0, SUCCESS = 1 }; 192 193 NativeRegExpMacroAssembler(); 194 virtual ~NativeRegExpMacroAssembler(); 195 virtual bool CanReadUnaligned(); 196 197 static Result Match(Handle<Code> regexp, 198 Handle<String> subject, 199 int* offsets_vector, 200 int offsets_vector_length, 201 int previous_index, 202 Isolate* isolate); 203 204 // Compares two-byte strings case insensitively. 205 // Called from generated RegExp code. 206 static int CaseInsensitiveCompareUC16(Address byte_offset1, 207 Address byte_offset2, 208 size_t byte_length, 209 Isolate* isolate); 210 211 // Called from RegExp if the backtrack stack limit is hit. 212 // Tries to expand the stack. Returns the new stack-pointer if 213 // successful, and updates the stack_top address, or returns 0 if unable 214 // to grow the stack. 215 // This function must not trigger a garbage collection. 216 static Address GrowStack(Address stack_pointer, Address* stack_top, 217 Isolate* isolate); 218 219 static const byte* StringCharacterPosition(String* subject, int start_index); 220 221 // Byte map of ASCII characters with a 0xff if the character is a word 222 // character (digit, letter or underscore) and 0x00 otherwise. 223 // Used by generated RegExp code. 224 static const byte word_character_map[128]; 225 word_character_map_address()226 static Address word_character_map_address() { 227 return const_cast<Address>(&word_character_map[0]); 228 } 229 230 static Result Execute(Code* code, 231 String* input, 232 int start_offset, 233 const byte* input_start, 234 const byte* input_end, 235 int* output, 236 Isolate* isolate); 237 }; 238 239 #endif // V8_INTERPRETED_REGEXP 240 241 } } // namespace v8::internal 242 243 #endif // V8_REGEXP_MACRO_ASSEMBLER_H_ 244