1#!/usr/bin/python2.4 2 3# Copyright 2009 the V8 project authors. All rights reserved. 4# Redistribution and use in source and binary forms, with or without 5# modification, are permitted provided that the following conditions are 6# met: 7# 8# * Redistributions of source code must retain the above copyright 9# notice, this list of conditions and the following disclaimer. 10# * Redistributions in binary form must reproduce the above 11# copyright notice, this list of conditions and the following 12# disclaimer in the documentation and/or other materials provided 13# with the distribution. 14# * Neither the name of Google Inc. nor the names of its 15# contributors may be used to endorse or promote products derived 16# from this software without specific prior written permission. 17# 18# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 30"""A JavaScript minifier. 31 32It is far from being a complete JS parser, so there are many valid 33JavaScript programs that will be ruined by it. Another strangeness is that 34it accepts $ and % as parts of identifiers. It doesn't merge lines or strip 35out blank lines in order to ease debugging. Variables at the top scope are 36properties of the global object so we can't rename them. It is assumed that 37you introduce variables with var as if JavaScript followed C++ scope rules 38around curly braces, so the declaration must be above the first use. 39 40Use as: 41import jsmin 42minifier = JavaScriptMinifier() 43program1 = minifier.JSMinify(program1) 44program2 = minifier.JSMinify(program2) 45""" 46 47import re 48 49 50class JavaScriptMinifier(object): 51 """An object that you can feed code snippets to to get them minified.""" 52 53 def __init__(self): 54 # We prepopulate the list of identifiers that shouldn't be used. These 55 # short language keywords could otherwise be used by the script as variable 56 # names. 57 self.seen_identifiers = {"do": True, "in": True} 58 self.identifier_counter = 0 59 self.in_comment = False 60 self.map = {} 61 self.nesting = 0 62 63 def LookAtIdentifier(self, m): 64 """Records identifiers or keywords that we see in use. 65 66 (So we can avoid renaming variables to these strings.) 67 Args: 68 m: The match object returned by re.search. 69 70 Returns: 71 Nothing. 72 """ 73 identifier = m.group(1) 74 self.seen_identifiers[identifier] = True 75 76 def Push(self): 77 """Called when we encounter a '{'.""" 78 self.nesting += 1 79 80 def Pop(self): 81 """Called when we encounter a '}'.""" 82 self.nesting -= 1 83 # We treat each top-level opening brace as a single scope that can span 84 # several sets of nested braces. 85 if self.nesting == 0: 86 self.map = {} 87 self.identifier_counter = 0 88 89 def Declaration(self, m): 90 """Rewrites bits of the program selected by a regexp. 91 92 These can be curly braces, literal strings, function declarations and var 93 declarations. (These last two must be on one line including the opening 94 curly brace of the function for their variables to be renamed). 95 96 Args: 97 m: The match object returned by re.search. 98 99 Returns: 100 The string that should replace the match in the rewritten program. 101 """ 102 matched_text = m.group(0) 103 if matched_text == "{": 104 self.Push() 105 return matched_text 106 if matched_text == "}": 107 self.Pop() 108 return matched_text 109 if re.match("[\"'/]", matched_text): 110 return matched_text 111 m = re.match(r"var ", matched_text) 112 if m: 113 var_names = matched_text[m.end():] 114 var_names = re.split(r",", var_names) 115 return "var " + ",".join(map(self.FindNewName, var_names)) 116 m = re.match(r"(function\b[^(]*)\((.*)\)\{$", matched_text) 117 if m: 118 up_to_args = m.group(1) 119 args = m.group(2) 120 args = re.split(r",", args) 121 self.Push() 122 return up_to_args + "(" + ",".join(map(self.FindNewName, args)) + "){" 123 124 if matched_text in self.map: 125 return self.map[matched_text] 126 127 return matched_text 128 129 def CharFromNumber(self, number): 130 """A single-digit base-52 encoding using a-zA-Z.""" 131 if number < 26: 132 return chr(number + 97) 133 number -= 26 134 return chr(number + 65) 135 136 def FindNewName(self, var_name): 137 """Finds a new 1-character or 2-character name for a variable. 138 139 Enters it into the mapping table for this scope. 140 141 Args: 142 var_name: The name of the variable before renaming. 143 144 Returns: 145 The new name of the variable. 146 """ 147 new_identifier = "" 148 # Variable names that end in _ are member variables of the global object, 149 # so they can be visible from code in a different scope. We leave them 150 # alone. 151 if var_name in self.map: 152 return self.map[var_name] 153 if self.nesting == 0: 154 return var_name 155 while True: 156 identifier_first_char = self.identifier_counter % 52 157 identifier_second_char = self.identifier_counter / 52 158 new_identifier = self.CharFromNumber(identifier_first_char) 159 if identifier_second_char != 0: 160 new_identifier = ( 161 self.CharFromNumber(identifier_second_char - 1) + new_identifier) 162 self.identifier_counter += 1 163 if not new_identifier in self.seen_identifiers: 164 break 165 166 self.map[var_name] = new_identifier 167 return new_identifier 168 169 def RemoveSpaces(self, m): 170 """Returns literal strings unchanged, replaces other inputs with group 2. 171 172 Other inputs are replaced with the contents of capture 1. This is either 173 a single space or an empty string. 174 175 Args: 176 m: The match object returned by re.search. 177 178 Returns: 179 The string that should be inserted instead of the matched text. 180 """ 181 entire_match = m.group(0) 182 replacement = m.group(1) 183 if re.match(r"'.*'$", entire_match): 184 return entire_match 185 if re.match(r'".*"$', entire_match): 186 return entire_match 187 if re.match(r"/.+/$", entire_match): 188 return entire_match 189 return replacement 190 191 def JSMinify(self, text): 192 """The main entry point. Takes a text and returns a compressed version. 193 194 The compressed version hopefully does the same thing. Line breaks are 195 preserved. 196 197 Args: 198 text: The text of the code snippet as a multiline string. 199 200 Returns: 201 The compressed text of the code snippet as a multiline string. 202 """ 203 new_lines = [] 204 for line in re.split(r"\n", text): 205 line = line.replace("\t", " ") 206 if self.in_comment: 207 m = re.search(r"\*/", line) 208 if m: 209 line = line[m.end():] 210 self.in_comment = False 211 else: 212 new_lines.append("") 213 continue 214 215 if not self.in_comment: 216 line = re.sub(r"/\*.*?\*/", " ", line) 217 line = re.sub(r"//.*", "", line) 218 m = re.search(r"/\*", line) 219 if m: 220 line = line[:m.start()] 221 self.in_comment = True 222 223 # Strip leading and trailing spaces. 224 line = re.sub(r"^ +", "", line) 225 line = re.sub(r" +$", "", line) 226 # A regexp that matches a literal string surrounded by "double quotes". 227 # This regexp can handle embedded backslash-escaped characters including 228 # embedded backslash-escaped double quotes. 229 double_quoted_string = r'"(?:[^"\\]|\\.)*"' 230 # A regexp that matches a literal string surrounded by 'double quotes'. 231 single_quoted_string = r"'(?:[^'\\]|\\.)*'" 232 # A regexp that matches a regexp literal surrounded by /slashes/. 233 # Don't allow a regexp to have a ) before the first ( since that's a 234 # syntax error and it's probably just two unrelated slashes. 235 # Also don't allow it to come after anything that can only be the 236 # end of a primary expression. 237 slash_quoted_regexp = r"(?<![\w$'\")\]])/(?:(?=\()|(?:[^()/\\]|\\.)+)(?:\([^/\\]|\\.)*/" 238 # Replace multiple spaces with a single space. 239 line = re.sub("|".join([double_quoted_string, 240 single_quoted_string, 241 slash_quoted_regexp, 242 "( )+"]), 243 self.RemoveSpaces, 244 line) 245 # Strip single spaces unless they have an identifier character both before 246 # and after the space. % and $ are counted as identifier characters. 247 line = re.sub("|".join([double_quoted_string, 248 single_quoted_string, 249 slash_quoted_regexp, 250 r"(?<![a-zA-Z_0-9$%]) | (?![a-zA-Z_0-9$%])()"]), 251 self.RemoveSpaces, 252 line) 253 # Collect keywords and identifiers that are already in use. 254 if self.nesting == 0: 255 re.sub(r"([a-zA-Z0-9_$%]+)", self.LookAtIdentifier, line) 256 function_declaration_regexp = ( 257 r"\bfunction" # Function definition keyword... 258 r"( [\w$%]+)?" # ...optional function name... 259 r"\([\w$%,]+\)\{") # ...argument declarations. 260 # Unfortunately the keyword-value syntax { key:value } makes the key look 261 # like a variable where in fact it is a literal string. We use the 262 # presence or absence of a question mark to try to distinguish between 263 # this case and the ternary operator: "condition ? iftrue : iffalse". 264 if re.search(r"\?", line): 265 block_trailing_colon = r"" 266 else: 267 block_trailing_colon = r"(?![:\w$%])" 268 # Variable use. Cannot follow a period precede a colon. 269 variable_use_regexp = r"(?<![.\w$%])[\w$%]+" + block_trailing_colon 270 line = re.sub("|".join([double_quoted_string, 271 single_quoted_string, 272 slash_quoted_regexp, 273 r"\{", # Curly braces. 274 r"\}", 275 r"\bvar [\w$%,]+", # var declarations. 276 function_declaration_regexp, 277 variable_use_regexp]), 278 self.Declaration, 279 line) 280 new_lines.append(line) 281 282 return "\n".join(new_lines) + "\n" 283