1/* ***** BEGIN LICENSE BLOCK ***** 2* Version: NPL 1.1/GPL 2.0/LGPL 2.1 3* 4* The contents of this file are subject to the Netscape Public License 5* Version 1.1 (the "License"); you may not use this file except in 6* compliance with the License. You may obtain a copy of the License at 7* http://www.mozilla.org/NPL/ 8* 9* Software distributed under the License is distributed on an "AS IS" basis, 10* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 11* for the specific language governing rights and limitations under the 12* License. 13* 14* The Original Code is JavaScript Engine testing utilities. 15* 16* The Initial Developer of the Original Code is Netscape Communications Corp. 17* Portions created by the Initial Developer are Copyright (C) 2002 18* the Initial Developer. All Rights Reserved. 19* 20* Contributor(s): rogerl@netscape.com, pschwartau@netscape.com 21* 22* Alternatively, the contents of this file may be used under the terms of 23* either the GNU General Public License Version 2 or later (the "GPL"), or 24* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 25* in which case the provisions of the GPL or the LGPL are applicable instead 26* of those above. If you wish to allow use of your version of this file only 27* under the terms of either the GPL or the LGPL, and not to allow others to 28* use your version of this file under the terms of the NPL, indicate your 29* decision by deleting the provisions above and replace them with the notice 30* and other provisions required by the GPL or the LGPL. If you do not delete 31* the provisions above, a recipient may use your version of this file under 32* the terms of any one of the NPL, the GPL or the LGPL. 33* 34* ***** END LICENSE BLOCK ***** 35* 36* 37* Date: 14 Feb 2002 38* SUMMARY: Performance: Regexp performance degraded from 4.7 39* See http://bugzilla.mozilla.org/show_bug.cgi?id=85721 40* 41* Adjust this testcase if necessary. The FAST constant defines 42* an upper bound in milliseconds for any execution to take. 43* 44*/ 45//----------------------------------------------------------------------------- 46var bug = 85721; 47var summary = 'Performance: execution of regular expression'; 48var FAST = 100; // execution should be 100 ms or less to pass the test 49var MSG_FAST = 'Execution took less than ' + FAST + ' ms'; 50var MSG_SLOW = 'Execution took '; 51var MSG_MS = ' ms'; 52var str = ''; 53var re = ''; 54var status = ''; 55var actual = ''; 56var expect= ''; 57 58printBugNumber (bug); 59printStatus (summary); 60 61 62function elapsedTime(startTime) 63{ 64 return new Date() - startTime; 65} 66 67 68function isThisFast(ms) 69{ 70 if (ms <= FAST) 71 return MSG_FAST; 72 return MSG_SLOW + ms + MSG_MS; 73} 74 75 76 77/* 78 * The first regexp. We'll test for performance (Section 1) and accuracy (Section 2). 79 */ 80str='<sql:connection id="conn1"> <sql:url>www.m.com</sql:url> <sql:driver>drive.class</sql:driver>\n<sql:userId>foo</sql:userId> <sql:password>goo</sql:password> </sql:connection>'; 81re = /<sql:connection id="([^\r\n]*?)">\s*<sql:url>\s*([^\r\n]*?)\s*<\/sql:url>\s*<sql:driver>\s*([^\r\n]*?)\s*<\/sql:driver>\s*(\s*<sql:userId>\s*([^\r\n]*?)\s*<\/sql:userId>\s*)?\s*(\s*<sql:password>\s*([^\r\n]*?)\s*<\/sql:password>\s*)?\s*<\/sql:connection>/; 82expect = Array("<sql:connection id=\"conn1\"> <sql:url>www.m.com</sql:url> <sql:driver>drive.class</sql:driver>\n<sql:userId>foo</sql:userId> <sql:password>goo</sql:password> </sql:connection>","conn1","www.m.com","drive.class","<sql:userId>foo</sql:userId> ","foo","<sql:password>goo</sql:password> ","goo"); 83 84/* 85 * Check performance - 86 */ 87status = inSection(1); 88var start = new Date(); 89var result = re.exec(str); 90actual = elapsedTime(start); 91reportCompare(isThisFast(FAST), isThisFast(actual), status); 92 93/* 94 * Check accuracy - 95 */ 96status = inSection(2); 97testRegExp([status], [re], [str], [result], [expect]); 98 99 100 101/* 102 * The second regexp (HUGE!). We'll test for performance (Section 3) and accuracy (Section 4). 103 * It comes from the O'Reilly book "Mastering Regular Expressions" by Jeffrey Friedl, Appendix B 104 */ 105 106//# Some things for avoiding backslashitis later on. 107$esc = '\\\\'; 108$Period = '\.'; 109$space = '\040'; $tab = '\t'; 110$OpenBR = '\\['; $CloseBR = '\\]'; 111$OpenParen = '\\('; $CloseParen = '\\)'; 112$NonASCII = '\x80-\xff'; $ctrl = '\000-\037'; 113$CRlist = '\n\015'; //# note: this should really be only \015. 114// Items 19, 20, 21 115$qtext = '[^' + $esc + $NonASCII + $CRlist + '\"]'; // # for within "..." 116$dtext = '[^' + $esc + $NonASCII + $CRlist + $OpenBR + $CloseBR + ']'; // # for within [...] 117$quoted_pair = $esc + '[^' + $NonASCII + ']'; // # an escaped character 118 119//############################################################################## 120//# Items 22 and 23, comment. 121//# Impossible to do properly with a regex, I make do by allowing at most one level of nesting. 122$ctext = '[^' + $esc + $NonASCII + $CRlist + '()]'; 123 124//# $Cnested matches one non-nested comment. 125//# It is unrolled, with normal of $ctext, special of $quoted_pair. 126$Cnested = 127 $OpenParen + // # ( 128 $ctext + '*' + // # normal* 129 '(?:' + $quoted_pair + $ctext + '*)*' + // # (special normal*)* 130 $CloseParen; // # ) 131 132 133//# $comment allows one level of nested parentheses 134//# It is unrolled, with normal of $ctext, special of ($quoted_pair|$Cnested) 135$comment = 136 $OpenParen + // # ( 137 $ctext + '*' + // # normal* 138 '(?:' + // # ( 139 '(?:' + $quoted_pair + '|' + $Cnested + ')' + // # special 140 $ctext + '*' + // # normal* 141 ')*' + // # )* 142 $CloseParen; // # ) 143 144 145//############################################################################## 146//# $X is optional whitespace/comments. 147$X = 148 '[' + $space + $tab + ']*' + // # Nab whitespace. 149 '(?:' + $comment + '[' + $space + $tab + ']*)*'; // # If comment found, allow more spaces. 150 151 152//# Item 10: atom 153$atom_char = '[^(' + $space + '<>\@,;:\".' + $esc + $OpenBR + $CloseBR + $ctrl + $NonASCII + ']'; 154$atom = 155 $atom_char + '+' + // # some number of atom characters... 156 '(?!' + $atom_char + ')'; // # ..not followed by something that could be part of an atom 157 158// # Item 11: doublequoted string, unrolled. 159$quoted_str = 160 '\"' + // # " 161 $qtext + '*' + // # normal 162 '(?:' + $quoted_pair + $qtext + '*)*' + // # ( special normal* )* 163 '\"'; // # " 164 165//# Item 7: word is an atom or quoted string 166$word = 167 '(?:' + 168 $atom + // # Atom 169 '|' + // # or 170 $quoted_str + // # Quoted string 171 ')' 172 173//# Item 12: domain-ref is just an atom 174$domain_ref = $atom; 175 176//# Item 13: domain-literal is like a quoted string, but [...] instead of "..." 177$domain_lit = 178 $OpenBR + // # [ 179 '(?:' + $dtext + '|' + $quoted_pair + ')*' + // # stuff 180 $CloseBR; // # ] 181 182// # Item 9: sub-domain is a domain-ref or domain-literal 183$sub_domain = 184 '(?:' + 185 $domain_ref + 186 '|' + 187 $domain_lit + 188 ')' + 189 $X; // # optional trailing comments 190 191// # Item 6: domain is a list of subdomains separated by dots. 192$domain = 193 $sub_domain + 194 '(?:' + 195 $Period + $X + $sub_domain + 196 ')*'; 197 198//# Item 8: a route. A bunch of "@ $domain" separated by commas, followed by a colon. 199$route = 200 '\@' + $X + $domain + 201 '(?:,' + $X + '\@' + $X + $domain + ')*' + // # additional domains 202 ':' + 203 $X; // # optional trailing comments 204 205//# Item 6: local-part is a bunch of $word separated by periods 206$local_part = 207 $word + $X 208 '(?:' + 209 $Period + $X + $word + $X + // # additional words 210 ')*'; 211 212// # Item 2: addr-spec is local@domain 213$addr_spec = 214 $local_part + '\@' + $X + $domain; 215 216//# Item 4: route-addr is <route? addr-spec> 217$route_addr = 218 '<' + $X + // # < 219 '(?:' + $route + ')?' + // # optional route 220 $addr_spec + // # address spec 221 '>'; // # > 222 223//# Item 3: phrase........ 224$phrase_ctrl = '\000-\010\012-\037'; // # like ctrl, but without tab 225 226//# Like atom-char, but without listing space, and uses phrase_ctrl. 227//# Since the class is negated, this matches the same as atom-char plus space and tab 228$phrase_char = 229 '[^()<>\@,;:\".' + $esc + $OpenBR + $CloseBR + $NonASCII + $phrase_ctrl + ']'; 230 231// # We've worked it so that $word, $comment, and $quoted_str to not consume trailing $X 232// # because we take care of it manually. 233$phrase = 234 $word + // # leading word 235 $phrase_char + '*' + // # "normal" atoms and/or spaces 236 '(?:' + 237 '(?:' + $comment + '|' + $quoted_str + ')' + // # "special" comment or quoted string 238 $phrase_char + '*' + // # more "normal" 239 ')*'; 240 241// ## Item #1: mailbox is an addr_spec or a phrase/route_addr 242$mailbox = 243 $X + // # optional leading comment 244 '(?:' + 245 $phrase + $route_addr + // # name and address 246 '|' + // # or 247 $addr_spec + // # address 248 ')'; 249 250 251//########################################################################### 252 253 254re = new RegExp($mailbox, "g"); 255str = 'Jeffy<"That Tall Guy"@ora.com (this address is no longer active)>'; 256expect = Array('Jeffy<"That Tall Guy"@ora.com (this address is no longer active)>'); 257 258/* 259 * Check performance - 260 */ 261status = inSection(3); 262var start = new Date(); 263var result = re.exec(str); 264actual = elapsedTime(start); 265reportCompare(isThisFast(FAST), isThisFast(actual), status); 266 267/* 268 * Check accuracy - 269 */ 270status = inSection(4); 271testRegExp([status], [re], [str], [result], [expect]); 272