1# Copyright 2013 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5"""Utility function for stripping comments out of JavaScript source code.""" 6 7import re 8 9 10def _TokenizeJS(text): 11 """Splits source code text into segments in preparation for comment stripping. 12 13 Note that this doesn't tokenize for parsing. There is no notion of statements, 14 variables, etc. The only tokens of interest are comment-related tokens. 15 16 Args: 17 text: The contents of a JavaScript file. 18 19 Yields: 20 A succession of strings in the file, including all comment-related symbols. 21 """ 22 rest = text 23 tokens = ['//', '/*', '*/', '\n'] 24 next_tok = re.compile('|'.join(re.escape(x) for x in tokens)) 25 while len(rest): 26 m = next_tok.search(rest) 27 if not m: 28 # end of string 29 yield rest 30 return 31 min_index = m.start() 32 end_index = m.end() 33 34 if min_index > 0: 35 yield rest[:min_index] 36 37 yield rest[min_index:end_index] 38 rest = rest[end_index:] 39 40 41def StripJSComments(text): 42 """Strips comments out of JavaScript source code. 43 44 Args: 45 text: JavaScript source text. 46 47 Returns: 48 JavaScript source text with comments stripped out. 49 """ 50 result_tokens = [] 51 token_stream = _TokenizeJS(text).__iter__() 52 while True: 53 try: 54 t = token_stream.next() 55 except StopIteration: 56 break 57 58 if t == '//': 59 while True: 60 try: 61 t2 = token_stream.next() 62 if t2 == '\n': 63 break 64 except StopIteration: 65 break 66 elif t == '/*': 67 nesting = 1 68 while True: 69 try: 70 t2 = token_stream.next() 71 if t2 == '/*': 72 nesting += 1 73 elif t2 == '*/': 74 nesting -= 1 75 if nesting == 0: 76 break 77 except StopIteration: 78 break 79 else: 80 result_tokens.append(t) 81 return ''.join(result_tokens) 82