1#!/usr/bin/env python 2# Copyright 2013 Google Inc. All Rights Reserved. 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15 16"""Inject javascript into html page source code.""" 17 18import logging 19import os 20import re 21import util 22import third_party.jsmin as jsmin 23 24DOCTYPE_RE = re.compile(r'^.{,256}?(<!--.*-->)?.{,256}?<!doctype html>', 25 re.IGNORECASE | re.DOTALL) 26HTML_RE = re.compile(r'^.{,256}?(<!--.*-->)?.{,256}?<html.*?>', 27 re.IGNORECASE | re.DOTALL) 28HEAD_RE = re.compile(r'^.{,256}?(<!--.*-->)?.{,256}?<head.*?>', 29 re.IGNORECASE | re.DOTALL) 30 31 32def GetInjectScript(scripts): 33 """Loads |scripts| from disk and returns a string of their content.""" 34 lines = [] 35 if scripts: 36 if not isinstance(scripts, list): 37 scripts = scripts.split(',') 38 for script in scripts: 39 if os.path.exists(script): 40 with open(script) as f: 41 lines.extend(f.read()) 42 elif util.resource_exists(script): 43 lines.extend(util.resource_string(script)) 44 else: 45 raise Exception('Script does not exist: %s', script) 46 47 return jsmin.jsmin(''.join(lines), quote_chars="'\"`") 48 49 50def _IsHtmlContent(content): 51 content = content.strip() 52 return content.startswith('<') and content.endswith('>') 53 54 55def InjectScript(text_chunks, content_type, script_to_inject): 56 """Inject |script_to_inject| into |content| if |content_type| is 'text/html'. 57 58 Inject |script_to_inject| into |text_chunks| immediately after <head>, 59 <html> or <!doctype html>, if one of them is found. Otherwise, inject at 60 the beginning. 61 62 Returns: 63 text_chunks, already_injected 64 |text_chunks| is the new content if script is injected, otherwise 65 the original. If the script was injected, exactly one chunk in 66 |text_chunks| will have changed. 67 |just_injected| indicates if |script_to_inject| was just injected in 68 the content. 69 """ 70 if not content_type or content_type != 'text/html': 71 return text_chunks, False 72 content = "".join(text_chunks) 73 if not content or not _IsHtmlContent(content) or script_to_inject in content: 74 return text_chunks, False 75 for regexp in (HEAD_RE, HTML_RE, DOCTYPE_RE): 76 matchobj = regexp.search(content) 77 if matchobj: 78 pos = matchobj.end(0) 79 for i, chunk in enumerate(text_chunks): 80 if pos <= len(chunk): 81 result = text_chunks[:] 82 result[i] = '%s<script>%s</script>%s' % (chunk[0:pos], 83 script_to_inject, 84 chunk[pos:]) 85 return result, True 86 pos -= len(chunk) 87 result = text_chunks[:] 88 result[0] = '<script>%s</script>%s' % (script_to_inject, 89 text_chunks[0]) 90 logging.warning('Inject at the very beginning, because no tag of ' 91 '<head>, <html> or <!doctype html> is found.') 92 return result, True 93