1import os, select 2import virt_utils, virt_vm, aexpect 3 4 5class scheduler: 6 """ 7 A scheduler that manages several parallel test execution pipelines on a 8 single host. 9 """ 10 11 def __init__(self, tests, num_workers, total_cpus, total_mem, bindir): 12 """ 13 Initialize the class. 14 15 @param tests: A list of test dictionaries. 16 @param num_workers: The number of workers (pipelines). 17 @param total_cpus: The total number of CPUs to dedicate to tests. 18 @param total_mem: The total amount of memory to dedicate to tests. 19 @param bindir: The directory where environment files reside. 20 """ 21 self.tests = tests 22 self.num_workers = num_workers 23 self.total_cpus = total_cpus 24 self.total_mem = total_mem 25 self.bindir = bindir 26 # Pipes -- s stands for scheduler, w stands for worker 27 self.s2w = [os.pipe() for i in range(num_workers)] 28 self.w2s = [os.pipe() for i in range(num_workers)] 29 self.s2w_r = [os.fdopen(r, "r", 0) for r, w in self.s2w] 30 self.s2w_w = [os.fdopen(w, "w", 0) for r, w in self.s2w] 31 self.w2s_r = [os.fdopen(r, "r", 0) for r, w in self.w2s] 32 self.w2s_w = [os.fdopen(w, "w", 0) for r, w in self.w2s] 33 # "Personal" worker dicts contain modifications that are applied 34 # specifically to each worker. For example, each worker must use a 35 # different environment file and a different MAC address pool. 36 self.worker_dicts = [{"env": "env%d" % i} for i in range(num_workers)] 37 38 39 def worker(self, index, run_test_func): 40 """ 41 The worker function. 42 43 Waits for commands from the scheduler and processes them. 44 45 @param index: The index of this worker (in the range 0..num_workers-1). 46 @param run_test_func: A function to be called to run a test 47 (e.g. job.run_test). 48 """ 49 r = self.s2w_r[index] 50 w = self.w2s_w[index] 51 self_dict = self.worker_dicts[index] 52 53 # Inform the scheduler this worker is ready 54 w.write("ready\n") 55 56 while True: 57 cmd = r.readline().split() 58 if not cmd: 59 continue 60 61 # The scheduler wants this worker to run a test 62 if cmd[0] == "run": 63 test_index = int(cmd[1]) 64 test = self.tests[test_index].copy() 65 test.update(self_dict) 66 test_iterations = int(test.get("iterations", 1)) 67 status = run_test_func("kvm", params=test, 68 tag=test.get("shortname"), 69 iterations=test_iterations) 70 w.write("done %s %s\n" % (test_index, status)) 71 w.write("ready\n") 72 73 # The scheduler wants this worker to free its used resources 74 elif cmd[0] == "cleanup": 75 env_filename = os.path.join(self.bindir, self_dict["env"]) 76 env = virt_utils.Env(env_filename) 77 for obj in env.values(): 78 if isinstance(obj, virt_vm.VM): 79 obj.destroy() 80 elif isinstance(obj, aexpect.Spawn): 81 obj.close() 82 env.save() 83 w.write("cleanup_done\n") 84 w.write("ready\n") 85 86 # There's no more work for this worker 87 elif cmd[0] == "terminate": 88 break 89 90 91 def scheduler(self): 92 """ 93 The scheduler function. 94 95 Sends commands to workers, telling them to run tests, clean up or 96 terminate execution. 97 """ 98 idle_workers = [] 99 closing_workers = [] 100 test_status = ["waiting"] * len(self.tests) 101 test_worker = [None] * len(self.tests) 102 used_cpus = [0] * self.num_workers 103 used_mem = [0] * self.num_workers 104 105 while True: 106 # Wait for a message from a worker 107 r, w, x = select.select(self.w2s_r, [], []) 108 109 someone_is_ready = False 110 111 for pipe in r: 112 worker_index = self.w2s_r.index(pipe) 113 msg = pipe.readline().split() 114 if not msg: 115 continue 116 117 # A worker is ready -- add it to the idle_workers list 118 if msg[0] == "ready": 119 idle_workers.append(worker_index) 120 someone_is_ready = True 121 122 # A worker completed a test 123 elif msg[0] == "done": 124 test_index = int(msg[1]) 125 test = self.tests[test_index] 126 status = int(eval(msg[2])) 127 test_status[test_index] = ("fail", "pass")[status] 128 # If the test failed, mark all dependent tests as "failed" too 129 if not status: 130 for i, other_test in enumerate(self.tests): 131 for dep in other_test.get("dep", []): 132 if dep in test["name"]: 133 test_status[i] = "fail" 134 135 # A worker is done shutting down its VMs and other processes 136 elif msg[0] == "cleanup_done": 137 used_cpus[worker_index] = 0 138 used_mem[worker_index] = 0 139 closing_workers.remove(worker_index) 140 141 if not someone_is_ready: 142 continue 143 144 for worker in idle_workers[:]: 145 # Find a test for this worker 146 test_found = False 147 for i, test in enumerate(self.tests): 148 # We only want "waiting" tests 149 if test_status[i] != "waiting": 150 continue 151 # Make sure the test isn't assigned to another worker 152 if test_worker[i] is not None and test_worker[i] != worker: 153 continue 154 # Make sure the test's dependencies are satisfied 155 dependencies_satisfied = True 156 for dep in test["dep"]: 157 dependencies = [j for j, t in enumerate(self.tests) 158 if dep in t["name"]] 159 bad_status_deps = [j for j in dependencies 160 if test_status[j] != "pass"] 161 if bad_status_deps: 162 dependencies_satisfied = False 163 break 164 if not dependencies_satisfied: 165 continue 166 # Make sure we have enough resources to run the test 167 test_used_cpus = int(test.get("used_cpus", 1)) 168 test_used_mem = int(test.get("used_mem", 128)) 169 # First make sure the other workers aren't using too many 170 # CPUs (not including the workers currently shutting down) 171 uc = (sum(used_cpus) - used_cpus[worker] - 172 sum(used_cpus[i] for i in closing_workers)) 173 if uc and uc + test_used_cpus > self.total_cpus: 174 continue 175 # ... or too much memory 176 um = (sum(used_mem) - used_mem[worker] - 177 sum(used_mem[i] for i in closing_workers)) 178 if um and um + test_used_mem > self.total_mem: 179 continue 180 # If we reached this point it means there are, or will 181 # soon be, enough resources to run the test 182 test_found = True 183 # Now check if the test can be run right now, i.e. if the 184 # other workers, including the ones currently shutting 185 # down, aren't using too many CPUs 186 uc = (sum(used_cpus) - used_cpus[worker]) 187 if uc and uc + test_used_cpus > self.total_cpus: 188 continue 189 # ... or too much memory 190 um = (sum(used_mem) - used_mem[worker]) 191 if um and um + test_used_mem > self.total_mem: 192 continue 193 # Everything is OK -- run the test 194 test_status[i] = "running" 195 test_worker[i] = worker 196 idle_workers.remove(worker) 197 # Update used_cpus and used_mem 198 used_cpus[worker] = test_used_cpus 199 used_mem[worker] = test_used_mem 200 # Assign all related tests to this worker 201 for j, other_test in enumerate(self.tests): 202 for other_dep in other_test["dep"]: 203 # All tests that depend on this test 204 if other_dep in test["name"]: 205 test_worker[j] = worker 206 break 207 # ... and all tests that share a dependency 208 # with this test 209 for dep in test["dep"]: 210 if dep in other_dep or other_dep in dep: 211 test_worker[j] = worker 212 break 213 # Tell the worker to run the test 214 self.s2w_w[worker].write("run %s\n" % i) 215 break 216 217 # If there won't be any tests for this worker to run soon, tell 218 # the worker to free its used resources 219 if not test_found and (used_cpus[worker] or used_mem[worker]): 220 self.s2w_w[worker].write("cleanup\n") 221 idle_workers.remove(worker) 222 closing_workers.append(worker) 223 224 # If there are no more new tests to run, terminate the workers and 225 # the scheduler 226 if len(idle_workers) == self.num_workers: 227 for worker in idle_workers: 228 self.s2w_w[worker].write("terminate\n") 229 break 230