• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/* ***** BEGIN LICENSE BLOCK *****
2* Version: NPL 1.1/GPL 2.0/LGPL 2.1
3*
4* The contents of this file are subject to the Netscape Public License
5* Version 1.1 (the "License"); you may not use this file except in
6* compliance with the License. You may obtain a copy of the License at
7* http://www.mozilla.org/NPL/
8*
9* Software distributed under the License is distributed on an "AS IS" basis,
10* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
11* for the specific language governing rights and limitations under the
12* License.
13*
14* The Original Code is JavaScript Engine testing utilities.
15*
16* The Initial Developer of the Original Code is Netscape Communications Corp.
17* Portions created by the Initial Developer are Copyright (C) 2002
18* the Initial Developer. All Rights Reserved.
19*
20* Contributor(s): rogerl@netscape.com, pschwartau@netscape.com
21*
22* Alternatively, the contents of this file may be used under the terms of
23* either the GNU General Public License Version 2 or later (the "GPL"), or
24* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
25* in which case the provisions of the GPL or the LGPL are applicable instead
26* of those above. If you wish to allow use of your version of this file only
27* under the terms of either the GPL or the LGPL, and not to allow others to
28* use your version of this file under the terms of the NPL, indicate your
29* decision by deleting the provisions above and replace them with the notice
30* and other provisions required by the GPL or the LGPL. If you do not delete
31* the provisions above, a recipient may use your version of this file under
32* the terms of any one of the NPL, the GPL or the LGPL.
33*
34* ***** END LICENSE BLOCK *****
35*
36*
37* Date:    14 Feb 2002
38* SUMMARY: Performance: Regexp performance degraded from 4.7
39* See http://bugzilla.mozilla.org/show_bug.cgi?id=85721
40*
41* Adjust this testcase if necessary. The FAST constant defines
42* an upper bound in milliseconds for any execution to take.
43*
44*/
45//-----------------------------------------------------------------------------
46var bug = 85721;
47var summary = 'Performance: execution of regular expression';
48var FAST = 100; // execution should be 100 ms or less to pass the test
49var MSG_FAST = 'Execution took less than ' + FAST + ' ms';
50var MSG_SLOW = 'Execution took ';
51var MSG_MS = ' ms';
52var str = '';
53var re = '';
54var status = '';
55var actual = '';
56var expect= '';
57
58printBugNumber (bug);
59printStatus (summary);
60
61
62function elapsedTime(startTime)
63{
64  return new Date() - startTime;
65}
66
67
68function isThisFast(ms)
69{
70  if (ms <= FAST)
71    return MSG_FAST;
72  return MSG_SLOW + ms + MSG_MS;
73}
74
75
76
77/*
78 * The first regexp. We'll test for performance (Section 1) and accuracy (Section 2).
79 */
80str='<sql:connection id="conn1"> <sql:url>www.m.com</sql:url> <sql:driver>drive.class</sql:driver>\n<sql:userId>foo</sql:userId> <sql:password>goo</sql:password> </sql:connection>';
81re = /<sql:connection id="([^\r\n]*?)">\s*<sql:url>\s*([^\r\n]*?)\s*<\/sql:url>\s*<sql:driver>\s*([^\r\n]*?)\s*<\/sql:driver>\s*(\s*<sql:userId>\s*([^\r\n]*?)\s*<\/sql:userId>\s*)?\s*(\s*<sql:password>\s*([^\r\n]*?)\s*<\/sql:password>\s*)?\s*<\/sql:connection>/;
82expect = Array("<sql:connection id=\"conn1\"> <sql:url>www.m.com</sql:url> <sql:driver>drive.class</sql:driver>\n<sql:userId>foo</sql:userId> <sql:password>goo</sql:password> </sql:connection>","conn1","www.m.com","drive.class","<sql:userId>foo</sql:userId> ","foo","<sql:password>goo</sql:password> ","goo");
83
84/*
85 *  Check performance -
86 */
87status = inSection(1);
88var start = new Date();
89var result = re.exec(str);
90actual = elapsedTime(start);
91reportCompare(isThisFast(FAST), isThisFast(actual), status);
92
93/*
94 *  Check accuracy -
95 */
96status = inSection(2);
97testRegExp([status], [re], [str], [result], [expect]);
98
99
100
101/*
102 * The second regexp (HUGE!). We'll test for performance (Section 3) and accuracy (Section 4).
103 * It comes from the O'Reilly book "Mastering Regular Expressions" by Jeffrey Friedl, Appendix B
104 */
105
106//# Some things for avoiding backslashitis later on.
107$esc        = '\\\\';
108$Period      = '\.';
109$space      = '\040';              $tab         = '\t';
110$OpenBR     = '\\[';               $CloseBR     = '\\]';
111$OpenParen  = '\\(';               $CloseParen  = '\\)';
112$NonASCII   = '\x80-\xff';         $ctrl        = '\000-\037';
113$CRlist     = '\n\015';  //# note: this should really be only \015.
114// Items 19, 20, 21
115$qtext = '[^' + $esc + $NonASCII + $CRlist + '\"]';						  // # for within "..."
116$dtext = '[^' + $esc + $NonASCII + $CRlist + $OpenBR + $CloseBR + ']';    // # for within [...]
117$quoted_pair = $esc + '[^' + $NonASCII + ']';							  // # an escaped character
118
119//##############################################################################
120//# Items 22 and 23, comment.
121//# Impossible to do properly with a regex, I make do by allowing at most one level of nesting.
122$ctext   =  '[^' + $esc + $NonASCII + $CRlist + '()]';
123
124//# $Cnested matches one non-nested comment.
125//# It is unrolled, with normal of $ctext, special of $quoted_pair.
126$Cnested =
127   $OpenParen +                                 // #  (
128      $ctext + '*' +                            // #     normal*
129      '(?:' + $quoted_pair + $ctext + '*)*' +   // #     (special normal*)*
130   $CloseParen;                                 // #                       )
131
132
133//# $comment allows one level of nested parentheses
134//# It is unrolled, with normal of $ctext, special of ($quoted_pair|$Cnested)
135$comment =
136   $OpenParen +                                           // #  (
137       $ctext + '*' +                                     // #     normal*
138       '(?:' +                                            // #       (
139          '(?:' + $quoted_pair + '|' + $Cnested + ')' +   // #         special
140           $ctext + '*' +                                 // #         normal*
141       ')*' +                                             // #            )*
142   $CloseParen;                                           // #                )
143
144
145//##############################################################################
146//# $X is optional whitespace/comments.
147$X =
148   '[' + $space + $tab + ']*' +					       // # Nab whitespace.
149   '(?:' + $comment + '[' + $space + $tab + ']*)*';    // # If comment found, allow more spaces.
150
151
152//# Item 10: atom
153$atom_char   = '[^(' + $space + '<>\@,;:\".' + $esc + $OpenBR + $CloseBR + $ctrl + $NonASCII + ']';
154$atom =
155  $atom_char + '+' +            // # some number of atom characters...
156  '(?!' + $atom_char + ')';     // # ..not followed by something that could be part of an atom
157
158// # Item 11: doublequoted string, unrolled.
159$quoted_str =
160    '\"' +                                         // # "
161       $qtext + '*' +                              // #   normal
162       '(?:' + $quoted_pair + $qtext + '*)*' +     // #   ( special normal* )*
163    '\"';                                          // # "
164
165//# Item 7: word is an atom or quoted string
166$word =
167    '(?:' +
168       $atom +                // # Atom
169       '|' +                  //     #  or
170       $quoted_str +          // # Quoted string
171     ')'
172
173//# Item 12: domain-ref is just an atom
174$domain_ref  = $atom;
175
176//# Item 13: domain-literal is like a quoted string, but [...] instead of  "..."
177$domain_lit  =
178    $OpenBR +								   	     // # [
179    '(?:' + $dtext + '|' + $quoted_pair + ')*' +     // #    stuff
180    $CloseBR;                                        // #           ]
181
182// # Item 9: sub-domain is a domain-ref or domain-literal
183$sub_domain  =
184  '(?:' +
185    $domain_ref +
186    '|' +
187    $domain_lit +
188   ')' +
189   $X;                 // # optional trailing comments
190
191// # Item 6: domain is a list of subdomains separated by dots.
192$domain =
193     $sub_domain +
194     '(?:' +
195        $Period + $X + $sub_domain +
196     ')*';
197
198//# Item 8: a route. A bunch of "@ $domain" separated by commas, followed by a colon.
199$route =
200    '\@' + $X + $domain +
201    '(?:,' + $X + '\@' + $X + $domain + ')*' +  // # additional domains
202    ':' +
203    $X;					// # optional trailing comments
204
205//# Item 6: local-part is a bunch of $word separated by periods
206$local_part =
207    $word + $X
208    '(?:' +
209        $Period + $X + $word + $X +		// # additional words
210    ')*';
211
212// # Item 2: addr-spec is local@domain
213$addr_spec  =
214  $local_part + '\@' + $X + $domain;
215
216//# Item 4: route-addr is <route? addr-spec>
217$route_addr =
218    '<' + $X +                     // # <
219       '(?:' + $route + ')?' +     // #       optional route
220       $addr_spec +                // #       address spec
221    '>';                           // #                 >
222
223//# Item 3: phrase........
224$phrase_ctrl = '\000-\010\012-\037'; // # like ctrl, but without tab
225
226//# Like atom-char, but without listing space, and uses phrase_ctrl.
227//# Since the class is negated, this matches the same as atom-char plus space and tab
228$phrase_char =
229   '[^()<>\@,;:\".' + $esc + $OpenBR + $CloseBR + $NonASCII + $phrase_ctrl + ']';
230
231// # We've worked it so that $word, $comment, and $quoted_str to not consume trailing $X
232// # because we take care of it manually.
233$phrase =
234   $word +                                                  // # leading word
235   $phrase_char + '*' +                                     // # "normal" atoms and/or spaces
236   '(?:' +
237      '(?:' + $comment + '|' + $quoted_str + ')' +          // # "special" comment or quoted string
238      $phrase_char + '*' +                                  // #  more "normal"
239   ')*';
240
241// ## Item #1: mailbox is an addr_spec or a phrase/route_addr
242$mailbox =
243    $X +                                // # optional leading comment
244    '(?:' +
245            $phrase + $route_addr +     // # name and address
246            '|' +                       //     #  or
247            $addr_spec +                // # address
248     ')';
249
250
251//###########################################################################
252
253
254re = new RegExp($mailbox, "g");
255str = 'Jeffy<"That Tall Guy"@ora.com (this address is no longer active)>';
256expect = Array('Jeffy<"That Tall Guy"@ora.com (this address is no longer active)>');
257
258/*
259 *  Check performance -
260 */
261status = inSection(3);
262var start = new Date();
263var result = re.exec(str);
264actual = elapsedTime(start);
265reportCompare(isThisFast(FAST), isThisFast(actual), status);
266
267/*
268 *  Check accuracy -
269 */
270status = inSection(4);
271testRegExp([status], [re], [str], [result], [expect]);
272