1# Copyright (c) 2011 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5"""Utility to use a browser to visit multiple URLs. 6 7Prerequisites: 8 1. The command_line package from tools/site_compare 9 2. Either the IE BHO or Firefox extension (or both) 10 11Installation: 12 1. Build the IE BHO, or call regsvr32 on a prebuilt binary 13 2. Add a file called "measurepageloadtimeextension@google.com" to 14 the default Firefox profile directory under extensions, containing 15 the path to the Firefox extension root 16 17Invoke with the command line arguments as documented within 18the command line. 19""" 20 21import command_line 22import scrapers 23import socket 24import time 25 26from drivers import windowing 27 28# Constants 29MAX_URL = 1024 30PORT = 42492 31 32def SetupIterationCommandLine(cmd): 33 """Adds the necessary flags for iteration to a command. 34 35 Args: 36 cmd: an object created by cmdline.AddCommand 37 """ 38 cmd.AddArgument( 39 ["-b", "--browser"], "Browser to use (ie, firefox, chrome)", 40 type="string", required=True) 41 cmd.AddArgument( 42 ["-b1v", "--browserver"], "Version of browser", metaname="VERSION") 43 cmd.AddArgument( 44 ["-p", "--browserpath"], "Path to browser.", 45 type="string", required=False) 46 cmd.AddArgument( 47 ["-u", "--url"], "URL to visit") 48 cmd.AddArgument( 49 ["-l", "--list"], "File containing list of URLs to visit", type="readfile") 50 cmd.AddMutualExclusion(["--url", "--list"]) 51 cmd.AddArgument( 52 ["-s", "--startline"], "First line of URL list", type="int") 53 cmd.AddArgument( 54 ["-e", "--endline"], "Last line of URL list (exclusive)", type="int") 55 cmd.AddArgument( 56 ["-c", "--count"], "Number of lines of URL file to use", type="int") 57 cmd.AddDependency("--startline", "--list") 58 cmd.AddRequiredGroup(["--url", "--list"]) 59 cmd.AddDependency("--endline", "--list") 60 cmd.AddDependency("--count", "--list") 61 cmd.AddMutualExclusion(["--count", "--endline"]) 62 cmd.AddDependency("--count", "--startline") 63 cmd.AddArgument( 64 ["-t", "--timeout"], "Amount of time (seconds) to wait for browser to " 65 "finish loading", 66 type="int", default=300) 67 cmd.AddArgument( 68 ["-sz", "--size"], "Browser window size", default=(800, 600), type="coords") 69 70 71def Iterate(command, iteration_func): 72 """Iterates over a list of URLs, calling a function on each. 73 74 Args: 75 command: the command line containing the iteration flags 76 iteration_func: called for each URL with (proc, wnd, url, result) 77 """ 78 79 # Retrieve the browser scraper to use to invoke the browser 80 scraper = scrapers.GetScraper((command["--browser"], command["--browserver"])) 81 82 def AttachToBrowser(path, timeout): 83 """Invoke the browser process and connect to the socket.""" 84 (proc, frame, wnd) = scraper.GetBrowser(path) 85 86 if not wnd: raise ValueError("Could not invoke browser.") 87 88 # Try to connect the socket. If it fails, wait and try 89 # again. Do this for ten seconds 90 s = socket.socket(socket.AF_INET, socket.SOCK_STREAM, socket.IPPROTO_TCP) 91 92 for attempt in xrange(10): 93 try: 94 s.connect(("localhost", PORT)) 95 except socket.error: 96 time.sleep(1) 97 continue 98 break 99 100 try: 101 s.getpeername() 102 except socket.error: 103 raise ValueError("Could not connect to browser") 104 105 if command["--size"]: 106 # Resize and reposition the frame 107 windowing.MoveAndSizeWindow(frame, (0, 0), command["--size"], wnd) 108 109 s.settimeout(timeout) 110 111 Iterate.proc = proc 112 Iterate.wnd = wnd 113 Iterate.s = s 114 115 def DetachFromBrowser(): 116 """Close the socket and kill the process if necessary.""" 117 if Iterate.s: 118 Iterate.s.close() 119 Iterate.s = None 120 121 if Iterate.proc: 122 if not windowing.WaitForProcessExit(Iterate.proc, 0): 123 try: 124 windowing.EndProcess(Iterate.proc) 125 windowing.WaitForProcessExit(Iterate.proc, 0) 126 except pywintypes.error: 127 # Exception here most likely means the process died on its own 128 pass 129 Iterate.proc = None 130 131 if command["--browserpath"]: 132 browser = command["--browserpath"] 133 else: 134 browser = None 135 136 # Read the URLs from the file 137 if command["--url"]: 138 url_list = [command["--url"]] 139 else: 140 startline = command["--startline"] 141 if command["--count"]: 142 endline = startline+command["--count"] 143 else: 144 endline = command["--endline"] 145 146 url_list = [] 147 file = open(command["--list"], "r") 148 149 for line in xrange(startline-1): 150 file.readline() 151 152 for line in xrange(endline-startline): 153 url_list.append(file.readline().strip()) 154 155 timeout = command["--timeout"] 156 157 # Loop through the URLs and send them through the socket 158 Iterate.s = None 159 Iterate.proc = None 160 Iterate.wnd = None 161 162 for url in url_list: 163 # Invoke the browser if necessary 164 if not Iterate.proc: 165 AttachToBrowser(browser, timeout) 166 # Send the URL and wait for a response 167 Iterate.s.send(url + "\n") 168 169 response = "" 170 171 while (response.find("\n") < 0): 172 173 try: 174 recv = Iterate.s.recv(MAX_URL) 175 response = response + recv 176 177 # Workaround for an oddity: when Firefox closes 178 # gracefully, somehow Python doesn't detect it. 179 # (Telnet does) 180 if not recv: 181 raise socket.error 182 183 except socket.timeout: 184 response = url + ",hang\n" 185 DetachFromBrowser() 186 except socket.error: 187 # If there was a socket error, it's probably a crash 188 response = url + ",crash\n" 189 DetachFromBrowser() 190 191 # If we received a timeout response, restart the browser 192 if response[-9:] == ",timeout\n": 193 DetachFromBrowser() 194 195 # Invoke the iteration function 196 iteration_func(url, Iterate.proc, Iterate.wnd, response) 197 198 # We're done 199 DetachFromBrowser() 200