• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python
2#
3# Copyright 2011 - 2013
4# Andr\xe9 Malo or his licensors, as applicable
5#
6# Licensed under the Apache License, Version 2.0 (the "License");
7# you may not use this file except in compliance with the License.
8# You may obtain a copy of the License at
9#
10#     http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS,
14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
17r"""
18=====================
19 Javascript Minifier
20=====================
21
22rJSmin is a javascript minifier written in python.
23
24The minifier is based on the semantics of `jsmin.c by Douglas Crockford`_\.
25
26The module is a re-implementation aiming for speed, so it can be used at
27runtime (rather than during a preprocessing step). Usually it produces the
28same results as the original ``jsmin.c``. It differs in the following ways:
29
30- there is no error detection: unterminated string, regex and comment
31  literals are treated as regular javascript code and minified as such.
32- Control characters inside string and regex literals are left untouched; they
33  are not converted to spaces (nor to \n)
34- Newline characters are not allowed inside string and regex literals, except
35  for line continuations in string literals (ECMA-5).
36- "return /regex/" is recognized correctly.
37- "+ +" and "- -" sequences are not collapsed to '++' or '--'
38- Newlines before ! operators are removed more sensibly
39- rJSmin does not handle streams, but only complete strings. (However, the
40  module provides a "streamy" interface).
41
42Since most parts of the logic are handled by the regex engine it's way
43faster than the original python port of ``jsmin.c`` by Baruch Even. The speed
44factor varies between about 6 and 55 depending on input and python version
45(it gets faster the more compressed the input already is). Compared to the
46speed-refactored python port by Dave St.Germain the performance gain is less
47dramatic but still between 1.2 and 7. See the docs/BENCHMARKS file for
48details.
49
50rjsmin.c is a reimplementation of rjsmin.py in C and speeds it up even more.
51
52Both python 2 and python 3 are supported.
53
54.. _jsmin.c by Douglas Crockford:
55   http://www.crockford.com/javascript/jsmin.c
56"""
57__author__ = "Andr\xe9 Malo"
58__author__ = getattr(__author__, 'decode', lambda x: __author__)('latin-1')
59__docformat__ = "restructuredtext en"
60__license__ = "Apache License, Version 2.0"
61__version__ = '1.0.7'
62__all__ = ['jsmin']
63
64import re as _re
65
66
67def _make_jsmin(python_only=False):
68    """
69    Generate JS minifier based on `jsmin.c by Douglas Crockford`_
70
71    .. _jsmin.c by Douglas Crockford:
72       http://www.crockford.com/javascript/jsmin.c
73
74    :Parameters:
75      `python_only` : ``bool``
76        Use only the python variant. If true, the c extension is not even
77        tried to be loaded.
78
79    :Return: Minifier
80    :Rtype: ``callable``
81    """
82    # pylint: disable = R0912, R0914, W0612
83    if not python_only:
84        try:
85            import _rjsmin
86        except ImportError:
87            pass
88        else:
89            return _rjsmin.jsmin
90    try:
91        xrange
92    except NameError:
93        xrange = range  # pylint: disable = W0622
94
95    space_chars = r'[\000-\011\013\014\016-\040]'
96
97    line_comment = r'(?://[^\r\n]*)'
98    space_comment = r'(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)'
99    string1 = \
100        r'(?:\047[^\047\\\r\n]*(?:\\(?:[^\r\n]|\r?\n|\r)[^\047\\\r\n]*)*\047)'
101    string2 = r'(?:"[^"\\\r\n]*(?:\\(?:[^\r\n]|\r?\n|\r)[^"\\\r\n]*)*")'
102    strings = r'(?:%s|%s)' % (string1, string2)
103
104    charclass = r'(?:\[[^\\\]\r\n]*(?:\\[^\r\n][^\\\]\r\n]*)*\])'
105    nospecial = r'[^/\\\[\r\n]'
106    regex = r'(?:/(?![\r\n/*])%s*(?:(?:\\[^\r\n]|%s)%s*)*/)' % (
107        nospecial, charclass, nospecial)
108    space = r'(?:%s|%s)' % (space_chars, space_comment)
109    newline = r'(?:%s?[\r\n])' % line_comment
110
111    def fix_charclass(result):
112        """ Fixup string of chars to fit into a regex char class """
113        pos = result.find('-')
114        if pos >= 0:
115            result = r'%s%s-' % (result[:pos], result[pos + 1:])
116
117        def sequentize(string):
118            """
119            Notate consecutive characters as sequence
120
121            (1-4 instead of 1234)
122            """
123            first, last, result = None, None, []
124            for char in map(ord, string):
125                if last is None:
126                    first = last = char
127                elif last + 1 == char:
128                    last = char
129                else:
130                    result.append((first, last))
131                    first = last = char
132            if last is not None:
133                result.append((first, last))
134            return ''.join(['%s%s%s' % (
135                chr(first),
136                last > first + 1 and '-' or '',
137                last != first and chr(last) or '') for first, last in result])
138
139        return _re.sub(r'([\000-\040\047])',  # for better portability
140            lambda m: '\\%03o' % ord(m.group(1)), (sequentize(result)
141                .replace('\\', '\\\\')
142                .replace('[', '\\[')
143                .replace(']', '\\]')))
144
145    def id_literal_(what):
146        """ Make id_literal like char class """
147        match = _re.compile(what).match
148        result = ''.join([chr(c) for c in xrange(127) if not match(chr(c))])
149        return '[^%s]' % fix_charclass(result)
150
151    def not_id_literal_(keep):
152        """ Make negated id_literal like char class """
153        match = _re.compile(id_literal_(keep)).match
154        result = ''.join([chr(c) for c in xrange(127) if not match(chr(c))])
155        return r'[%s]' % fix_charclass(result)
156
157    not_id_literal = not_id_literal_(r'[a-zA-Z0-9_$]')
158    preregex1 = r'[(,=:\[!&|?{};\r\n]'
159    preregex2 = r'%(not_id_literal)sreturn' % locals()
160
161    id_literal = id_literal_(r'[a-zA-Z0-9_$]')
162    id_literal_open = id_literal_(r'[a-zA-Z0-9_${\[(!+-]')
163    id_literal_close = id_literal_(r'[a-zA-Z0-9_$}\])"\047+-]')
164
165    dull = r'[^\047"/\000-\040]'
166
167    space_sub = _re.compile((
168        r'(%(dull)s+)'
169        r'|(%(strings)s%(dull)s*)'
170        r'|(?<=%(preregex1)s)'
171            r'%(space)s*(?:%(newline)s%(space)s*)*'
172            r'(%(regex)s%(dull)s*)'
173        r'|(?<=%(preregex2)s)'
174            r'%(space)s*(?:%(newline)s%(space)s)*'
175            r'(%(regex)s%(dull)s*)'
176        r'|(?<=%(id_literal_close)s)'
177            r'%(space)s*(?:(%(newline)s)%(space)s*)+'
178            r'(?=%(id_literal_open)s)'
179        r'|(?<=%(id_literal)s)(%(space)s)+(?=%(id_literal)s)'
180        r'|(?<=\+)(%(space)s)+(?=\+)'
181        r'|(?<=-)(%(space)s)+(?=-)'
182        r'|%(space)s+'
183        r'|(?:%(newline)s%(space)s*)+') % locals()).sub
184    #print space_sub.__self__.pattern
185
186    def space_subber(match):
187        """ Substitution callback """
188        # pylint: disable = C0321, R0911
189        groups = match.groups()
190        if groups[0]:
191            return groups[0]
192        elif groups[1]:
193            return groups[1]
194        elif groups[2]:
195            return groups[2]
196        elif groups[3]:
197            return groups[3]
198        elif groups[4]:
199            return '\n'
200        elif groups[5] or groups[6] or groups[7]:
201            return ' '
202        else:
203            return ''
204
205    def jsmin(script):  # pylint: disable = W0621
206        r"""
207        Minify javascript based on `jsmin.c by Douglas Crockford`_\.
208
209        Instead of parsing the stream char by char, it uses a regular
210        expression approach which minifies the whole script with one big
211        substitution regex.
212
213        .. _jsmin.c by Douglas Crockford:
214           http://www.crockford.com/javascript/jsmin.c
215
216        :Parameters:
217          `script` : ``str``
218            Script to minify
219
220        :Return: Minified script
221        :Rtype: ``str``
222        """
223        return space_sub(space_subber, '\n%s\n' % script).strip()
224
225    return jsmin
226
227jsmin = _make_jsmin()
228
229
230def jsmin_for_posers(script):
231    r"""
232    Minify javascript based on `jsmin.c by Douglas Crockford`_\.
233
234    Instead of parsing the stream char by char, it uses a regular
235    expression approach which minifies the whole script with one big
236    substitution regex.
237
238    .. _jsmin.c by Douglas Crockford:
239       http://www.crockford.com/javascript/jsmin.c
240
241    :Warning: This function is the digest of a _make_jsmin() call. It just
242              utilizes the resulting regex. It's just for fun here and may
243              vanish any time. Use the `jsmin` function instead.
244
245    :Parameters:
246      `script` : ``str``
247        Script to minify
248
249    :Return: Minified script
250    :Rtype: ``str``
251    """
252    def subber(match):
253        """ Substitution callback """
254        groups = match.groups()
255        return (
256            groups[0] or
257            groups[1] or
258            groups[2] or
259            groups[3] or
260            (groups[4] and '\n') or
261            (groups[5] and ' ') or
262            (groups[6] and ' ') or
263            (groups[7] and ' ') or
264            '')
265
266    return _re.sub(
267        r'([^\047"/\000-\040]+)|((?:(?:\047[^\047\\\r\n]*(?:\\(?:[^\r\n]|\r?'
268        r'\n|\r)[^\047\\\r\n]*)*\047)|(?:"[^"\\\r\n]*(?:\\(?:[^\r\n]|\r?\n|'
269        r'\r)[^"\\\r\n]*)*"))[^\047"/\000-\040]*)|(?<=[(,=:\[!&|?{};\r\n])(?'
270        r':[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*'
271        r'(?:(?:(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\014\016-\040]|(?:/\*'
272        r'[^*]*\*+(?:[^/*][^*]*\*+)*/))*)*((?:/(?![\r\n/*])[^/\\\[\r\n]*(?:('
273        r'?:\\[^\r\n]|(?:\[[^\\\]\r\n]*(?:\\[^\r\n][^\\\]\r\n]*)*\]))[^/\\\['
274        r'\r\n]*)*/)[^\047"/\000-\040]*)|(?<=[\000-#%-,./:-@\[-^`{-~-]return'
275        r')(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/'
276        r'))*(?:(?:(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\014\016-\040]|(?:'
277        r'/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)))*((?:/(?![\r\n/*])[^/\\\[\r\n]*(?'
278        r':(?:\\[^\r\n]|(?:\[[^\\\]\r\n]*(?:\\[^\r\n][^\\\]\r\n]*)*\]))[^/'
279        r'\\\[\r\n]*)*/)[^\047"/\000-\040]*)|(?<=[^\000-!#%&(*,./:-@\[\\^`{|'
280        r'~])(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)'
281        r'*/))*(?:((?:(?://[^\r\n]*)?[\r\n]))(?:[\000-\011\013\014\016-\040]'
282        r'|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*)+(?=[^\000-\040"#%-\047)*,./'
283        r':-@\\-^`|-~])|(?<=[^\000-#%-,./:-@\[-^`{-~-])((?:[\000-\011\013\01'
284        r'4\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)))+(?=[^\000-#%-,./:'
285        r'-@\[-^`{-~-])|(?<=\+)((?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*'
286        r'\*+(?:[^/*][^*]*\*+)*/)))+(?=\+)|(?<=-)((?:[\000-\011\013\014\016-'
287        r'\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)))+(?=-)|(?:[\000-\011\013'
288        r'\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))+|(?:(?:(?://[^'
289        r'\r\n]*)?[\r\n])(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^'
290        r'/*][^*]*\*+)*/))*)+', subber, '\n%s\n' % script).strip()
291
292
293if __name__ == '__main__':
294    import sys as _sys
295    _sys.stdout.write(jsmin(_sys.stdin.read()))
296