1#!/usr/bin/env python 2# 3# Copyright 2016 - The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16"""A client that manages Google Compute Engine. 17 18** ComputeClient ** 19 20ComputeClient is a wrapper around Google Compute Engine APIs. 21It provides a set of methods for managing a google compute engine project, 22such as creating images, creating instances, etc. 23 24Design philosophy: We tried to make ComputeClient as stateless as possible, 25and it only keeps states about authentication. ComputeClient should be very 26generic, and only knows how to talk to Compute Engine APIs. 27""" 28# pylint: disable=too-many-lines 29import copy 30import functools 31import getpass 32import logging 33import os 34import re 35 36from acloud import errors 37from acloud.internal import constants 38from acloud.internal.lib import base_cloud_client 39from acloud.internal.lib import utils 40from acloud.internal.lib.ssh import IP 41 42 43logger = logging.getLogger(__name__) 44 45_MAX_RETRIES_ON_FINGERPRINT_CONFLICT = 10 46_METADATA_KEY = "key" 47_METADATA_KEY_VALUE = "value" 48_SSH_KEYS_NAME = "sshKeys" 49_ITEMS = "items" 50_METADATA = "metadata" 51_ZONE_RE = re.compile(r"^zones/(?P<zone>.+)") 52# Quota metrics 53_METRIC_CPUS = "CPUS" 54_METRIC_DISKS_GB = "DISKS_TOTAL_GB" 55_METRIC_USE_ADDRESSES = "IN_USE_ADDRESSES" 56_METRICS = [_METRIC_CPUS, _METRIC_DISKS_GB, _METRIC_USE_ADDRESSES] 57_USAGE = "usage" 58_LIMIT = "limit" 59# The minimum requirement to create an instance. 60_REQUIRE_METRICS = {_METRIC_CPUS: 8, 61 _METRIC_DISKS_GB: 1000, 62 _METRIC_USE_ADDRESSES: 1} 63 64BASE_DISK_ARGS = { 65 "type": "PERSISTENT", 66 "boot": True, 67 "mode": "READ_WRITE", 68 "autoDelete": True, 69 "initializeParams": {}, 70} 71 72 73class OperationScope(): 74 """Represents operation scope enum.""" 75 ZONE = "zone" 76 REGION = "region" 77 GLOBAL = "global" 78 79 80class PersistentDiskType(): 81 """Represents different persistent disk types. 82 83 pd-standard for regular hard disk. 84 pd-ssd for solid state disk. 85 """ 86 STANDARD = "pd-standard" 87 SSD = "pd-ssd" 88 89 90class ImageStatus(): 91 """Represents the status of an image.""" 92 PENDING = "PENDING" 93 READY = "READY" 94 FAILED = "FAILED" 95 96 97def _IsFingerPrintError(exc): 98 """Determine if the exception is a HTTP error with code 412. 99 100 Args: 101 exc: Exception instance. 102 103 Returns: 104 Boolean. True if the exception is a "Precondition Failed" error. 105 """ 106 return isinstance(exc, errors.HttpError) and exc.code == 412 107 108 109# pylint: disable=too-many-public-methods 110class ComputeClient(base_cloud_client.BaseCloudApiClient): 111 """Client that manages GCE.""" 112 113 # API settings, used by BaseCloudApiClient. 114 API_NAME = "compute" 115 API_VERSION = "v1" 116 SCOPE = " ".join([ 117 "https://www.googleapis.com/auth/compute", 118 "https://www.googleapis.com/auth/devstorage.read_write" 119 ]) 120 # Default settings for gce operations 121 DEFAULT_INSTANCE_SCOPE = [ 122 "https://www.googleapis.com/auth/androidbuild.internal", 123 "https://www.googleapis.com/auth/devstorage.read_only", 124 "https://www.googleapis.com/auth/logging.write" 125 ] 126 OPERATION_TIMEOUT_SECS = 30 * 60 # 30 mins 127 OPERATION_POLL_INTERVAL_SECS = 20 128 MACHINE_SIZE_METRICS = ["guestCpus", "memoryMb"] 129 ACCESS_DENIED_CODE = 403 130 131 def __init__(self, acloud_config, oauth2_credentials): 132 """Initialize. 133 134 Args: 135 acloud_config: An AcloudConfig object. 136 oauth2_credentials: An oauth2client.OAuth2Credentials instance. 137 """ 138 super().__init__(oauth2_credentials) 139 self._project = acloud_config.project 140 141 def _GetOperationStatus(self, operation, operation_scope, scope_name=None): 142 """Get status of an operation. 143 144 Args: 145 operation: An Operation resource in the format of json. 146 operation_scope: A value from OperationScope, "zone", "region", 147 or "global". 148 scope_name: If operation_scope is "zone" or "region", this should be 149 the name of the zone or region, e.g. "us-central1-f". 150 151 Returns: 152 Status of the operation, one of "DONE", "PENDING", "RUNNING". 153 154 Raises: 155 errors.DriverError: if the operation fails. 156 """ 157 operation_name = operation["name"] 158 if operation_scope == OperationScope.GLOBAL: 159 api = self.service.globalOperations().get( 160 project=self._project, operation=operation_name) 161 result = self.Execute(api) 162 elif operation_scope == OperationScope.ZONE: 163 api = self.service.zoneOperations().get( 164 project=self._project, 165 operation=operation_name, 166 zone=scope_name) 167 result = self.Execute(api) 168 elif operation_scope == OperationScope.REGION: 169 api = self.service.regionOperations().get( 170 project=self._project, 171 operation=operation_name, 172 region=scope_name) 173 result = self.Execute(api) 174 175 if result.get("error"): 176 errors_list = result["error"]["errors"] 177 raise errors.DriverError( 178 "Get operation state failed, errors: %s" % str(errors_list)) 179 return result["status"] 180 181 def WaitOnOperation(self, operation, operation_scope, scope_name=None): 182 """Wait for an operation to finish. 183 184 Args: 185 operation: An Operation resource in the format of json. 186 operation_scope: A value from OperationScope, "zone", "region", 187 or "global". 188 scope_name: If operation_scope is "zone" or "region", this should be 189 the name of the zone or region, e.g. "us-central1-f". 190 """ 191 timeout_exception = errors.GceOperationTimeoutError( 192 "Operation hits timeout, did not complete within %d secs." % 193 self.OPERATION_TIMEOUT_SECS) 194 utils.PollAndWait( 195 func=self._GetOperationStatus, 196 expected_return="DONE", 197 timeout_exception=timeout_exception, 198 timeout_secs=self.OPERATION_TIMEOUT_SECS, 199 sleep_interval_secs=self.OPERATION_POLL_INTERVAL_SECS, 200 operation=operation, 201 operation_scope=operation_scope, 202 scope_name=scope_name) 203 204 def GetProject(self): 205 """Get project information. 206 207 Returns: 208 A project resource in json. 209 """ 210 api = self.service.projects().get(project=self._project) 211 return self.Execute(api) 212 213 def GetRegionInfo(self): 214 """Get region information that includes all quotas limit. 215 216 The region info example: 217 {"items": 218 [{"status": "UP", 219 "name": "asia-east1", 220 "quotas": 221 [{"usage": 92, "metric": "CPUS", "limit": 100}, 222 {"usage": 640, "metric": "DISKS_TOTAL_GB", "limit": 10240}, 223 ...]]} 224 } 225 226 Returns: 227 A region resource in json. 228 """ 229 api = self.service.regions().list(project=self._project) 230 return self.Execute(api) 231 232 @staticmethod 233 def GetMetricQuota(regions_info, zone, metric): 234 """Get CPU quota limit in specific zone and project. 235 236 Args: 237 regions_info: Dict, regions resource in json. 238 zone: String, name of zone. 239 metric: String, name of metric, e.g. "CPUS". 240 241 Returns: 242 A dict of quota information. Such as 243 {"usage": 100, "metric": "CPUS", "limit": 200} 244 """ 245 for region_info in regions_info["items"]: 246 if region_info["name"] in zone: 247 for quota in region_info["quotas"]: 248 if quota["metric"] == metric: 249 return quota 250 logger.info("Can't get %s quota info from zone(%s)", metric, zone) 251 return None 252 253 def EnoughMetricsInZone(self, zone): 254 """Check the zone have enough metrics to create instance. 255 256 The metrics include CPUS and DISKS. 257 258 Args: 259 zone: String, name of zone. 260 261 Returns: 262 Boolean. True if zone have enough quota. 263 """ 264 regions_info = self.GetRegionInfo() 265 for metric in _METRICS: 266 quota = self.GetMetricQuota(regions_info, zone, metric) 267 if not quota: 268 logger.debug( 269 "Can't query the metric(%s) in zone(%s)", metric, zone) 270 return False 271 if quota[_LIMIT] - quota[_USAGE] < _REQUIRE_METRICS[metric]: 272 logger.debug( 273 "The metric(%s) is over limit in zone(%s)", metric, zone) 274 return False 275 return True 276 277 def GetDisk(self, disk_name, zone): 278 """Get disk information. 279 280 Args: 281 disk_name: A string. 282 zone: String, name of zone. 283 284 Returns: 285 An disk resource in json. 286 https://cloud.google.com/compute/docs/reference/latest/disks#resource 287 """ 288 api = self.service.disks().get( 289 project=self._project, zone=zone, disk=disk_name) 290 return self.Execute(api) 291 292 def CheckDiskExists(self, disk_name, zone): 293 """Check if disk exists. 294 295 Args: 296 disk_name: A string 297 zone: String, name of zone. 298 299 Returns: 300 True if disk exists, otherwise False. 301 """ 302 try: 303 self.GetDisk(disk_name, zone) 304 exists = True 305 except errors.ResourceNotFoundError: 306 exists = False 307 logger.debug("CheckDiskExists: disk_name: %s, result: %s", disk_name, 308 exists) 309 return exists 310 311 def CreateDisk(self, 312 disk_name, 313 source_image, 314 size_gb, 315 zone, 316 source_project=None, 317 disk_type=PersistentDiskType.STANDARD): 318 """Create a gce disk. 319 320 Args: 321 disk_name: String 322 source_image: String, name of the image. 323 size_gb: Integer, size in gb. 324 zone: String, name of the zone, e.g. us-central1-b. 325 source_project: String, required if the image is located in a different 326 project. 327 disk_type: String, a value from PersistentDiskType, STANDARD 328 for regular hard disk or SSD for solid state disk. 329 """ 330 source_project = source_project or self._project 331 source_image = "projects/%s/global/images/%s" % ( 332 source_project, source_image) if source_image else None 333 logger.info("Creating disk %s, size_gb: %d, source_image: %s", 334 disk_name, size_gb, str(source_image)) 335 body = { 336 "name": disk_name, 337 "sizeGb": size_gb, 338 "type": "projects/%s/zones/%s/diskTypes/%s" % (self._project, zone, 339 disk_type), 340 } 341 api = self.service.disks().insert( 342 project=self._project, 343 sourceImage=source_image, 344 zone=zone, 345 body=body) 346 operation = self.Execute(api) 347 try: 348 self.WaitOnOperation( 349 operation=operation, 350 operation_scope=OperationScope.ZONE, 351 scope_name=zone) 352 except errors.DriverError: 353 logger.error("Creating disk failed, cleaning up: %s", disk_name) 354 if self.CheckDiskExists(disk_name, zone): 355 self.DeleteDisk(disk_name, zone) 356 raise 357 logger.info("Disk %s has been created.", disk_name) 358 359 def DeleteDisk(self, disk_name, zone): 360 """Delete a gce disk. 361 362 Args: 363 disk_name: A string, name of disk. 364 zone: A string, name of zone. 365 """ 366 logger.info("Deleting disk %s", disk_name) 367 api = self.service.disks().delete( 368 project=self._project, zone=zone, disk=disk_name) 369 operation = self.Execute(api) 370 self.WaitOnOperation( 371 operation=operation, 372 operation_scope=OperationScope.ZONE, 373 scope_name=zone) 374 logger.info("Deleted disk %s", disk_name) 375 376 def DeleteDisks(self, disk_names, zone): 377 """Delete multiple disks. 378 379 Args: 380 disk_names: A list of disk names. 381 zone: A string, name of zone. 382 383 Returns: 384 A tuple, (deleted, failed, error_msgs) 385 deleted: A list of names of disks that have been deleted. 386 failed: A list of names of disks that we fail to delete. 387 error_msgs: A list of failure messages. 388 """ 389 if not disk_names: 390 logger.warning("Nothing to delete. Arg disk_names is not provided.") 391 return [], [], [] 392 # Batch send deletion requests. 393 logger.info("Deleting disks: %s", disk_names) 394 delete_requests = {} 395 for disk_name in set(disk_names): 396 request = self.service.disks().delete( 397 project=self._project, disk=disk_name, zone=zone) 398 delete_requests[disk_name] = request 399 return self._BatchExecuteAndWait( 400 delete_requests, OperationScope.ZONE, scope_name=zone) 401 402 def ListDisks(self, zone, disk_filter=None): 403 """List disks. 404 405 Args: 406 zone: A string, representing zone name. e.g. "us-central1-f" 407 disk_filter: A string representing a filter in format of 408 FIELD_NAME COMPARISON_STRING LITERAL_STRING 409 e.g. "name ne example-instance" 410 e.g. "name eq "example-instance-[0-9]+"" 411 412 Returns: 413 A list of disks. 414 """ 415 return self.ListWithMultiPages( 416 api_resource=self.service.disks().list, 417 project=self._project, 418 zone=zone, 419 filter=disk_filter) 420 421 def CreateImage(self, 422 image_name, 423 source_uri=None, 424 source_disk=None, 425 labels=None): 426 """Create a Gce image. 427 428 Args: 429 image_name: String, name of image 430 source_uri: Full Google Cloud Storage URL where the disk image is 431 stored. e.g. "https://storage.googleapis.com/my-bucket/ 432 avd-system-2243663.tar.gz" 433 source_disk: String, this should be the disk's selfLink value 434 (including zone and project), rather than the disk_name 435 e.g. https://www.googleapis.com/compute/v1/projects/ 436 google.com:android-builds-project/zones/ 437 us-east1-d/disks/<disk_name> 438 labels: Dict, will be added to the image's labels. 439 440 Raises: 441 errors.DriverError: For malformed request or response. 442 errors.GceOperationTimeoutError: Operation takes too long to finish. 443 """ 444 if self.CheckImageExists(image_name): 445 return 446 if (source_uri and source_disk) or (not source_uri 447 and not source_disk): 448 raise errors.DriverError( 449 "Creating image %s requires either source_uri %s or " 450 "source_disk %s but not both" % (image_name, source_uri, 451 source_disk)) 452 if source_uri: 453 logger.info("Creating image %s, source_uri %s", image_name, 454 source_uri) 455 body = { 456 "name": image_name, 457 "rawDisk": { 458 "source": source_uri, 459 }, 460 } 461 else: 462 logger.info("Creating image %s, source_disk %s", image_name, 463 source_disk) 464 body = { 465 "name": image_name, 466 "sourceDisk": source_disk, 467 } 468 if labels is not None: 469 body["labels"] = labels 470 api = self.service.images().insert(project=self._project, body=body) 471 operation = self.Execute(api) 472 try: 473 self.WaitOnOperation( 474 operation=operation, operation_scope=OperationScope.GLOBAL) 475 except errors.DriverError: 476 logger.error("Creating image failed, cleaning up: %s", image_name) 477 if self.CheckImageExists(image_name): 478 self.DeleteImage(image_name) 479 raise 480 logger.info("Image %s has been created.", image_name) 481 482 @utils.RetryOnException(_IsFingerPrintError, 483 _MAX_RETRIES_ON_FINGERPRINT_CONFLICT) 484 def SetImageLabels(self, image_name, new_labels): 485 """Update image's labels. Retry for finger print conflict. 486 487 Note: Decorator RetryOnException will retry the call for FingerPrint 488 conflict (HTTP error code 412). The fingerprint is used to detect 489 conflicts of GCE resource updates. The fingerprint is initially generated 490 by Compute Engine and changes after every request to modify or update 491 resources (e.g. GCE "image" resource has "fingerPrint" for "labels" 492 updates). 493 494 Args: 495 image_name: A string, the image name. 496 new_labels: Dict, will be added to the image's labels. 497 498 Returns: 499 A GlobalOperation resouce. 500 https://cloud.google.com/compute/docs/reference/latest/globalOperations 501 """ 502 image = self.GetImage(image_name) 503 labels = image.get("labels", {}) 504 labels.update(new_labels) 505 body = { 506 "labels": labels, 507 "labelFingerprint": image["labelFingerprint"] 508 } 509 api = self.service.images().setLabels( 510 project=self._project, resource=image_name, body=body) 511 return self.Execute(api) 512 513 def CheckImageExists(self, image_name): 514 """Check if image exists. 515 516 Args: 517 image_name: A string 518 519 Returns: 520 True if image exists, otherwise False. 521 """ 522 try: 523 self.GetImage(image_name) 524 exists = True 525 except errors.ResourceNotFoundError: 526 exists = False 527 logger.debug("CheckImageExists: image_name: %s, result: %s", 528 image_name, exists) 529 return exists 530 531 def GetImage(self, image_name, image_project=None): 532 """Get image information. 533 534 Args: 535 image_name: A string 536 image_project: A string 537 538 Returns: 539 An image resource in json. 540 https://cloud.google.com/compute/docs/reference/latest/images#resource 541 """ 542 api = self.service.images().get( 543 project=image_project or self._project, image=image_name) 544 return self.Execute(api) 545 546 def GetImageFromFamily(self, image_family, image_project=None): 547 """Get image information from image_family. 548 549 Args: 550 image_family: String of image family. 551 image_project: String of image project. 552 553 Returns: 554 An image resource in json. 555 https://cloud.google.com/compute/docs/reference/latest/images#resource 556 """ 557 api = self.service.images().getFromFamily( 558 project=image_project or self._project, family=image_family) 559 return self.Execute(api) 560 561 def DeleteImage(self, image_name): 562 """Delete an image. 563 564 Args: 565 image_name: A string 566 """ 567 logger.info("Deleting image %s", image_name) 568 api = self.service.images().delete( 569 project=self._project, image=image_name) 570 operation = self.Execute(api) 571 self.WaitOnOperation( 572 operation=operation, operation_scope=OperationScope.GLOBAL) 573 logger.info("Deleted image %s", image_name) 574 575 def DeleteImages(self, image_names): 576 """Delete multiple images. 577 578 Args: 579 image_names: A list of image names. 580 581 Returns: 582 A tuple, (deleted, failed, error_msgs) 583 deleted: A list of names of images that have been deleted. 584 failed: A list of names of images that we fail to delete. 585 error_msgs: A list of failure messages. 586 """ 587 if not image_names: 588 return [], [], [] 589 # Batch send deletion requests. 590 logger.info("Deleting images: %s", image_names) 591 delete_requests = {} 592 for image_name in set(image_names): 593 request = self.service.images().delete( 594 project=self._project, image=image_name) 595 delete_requests[image_name] = request 596 return self._BatchExecuteAndWait(delete_requests, 597 OperationScope.GLOBAL) 598 599 def ListImages(self, image_filter=None, image_project=None): 600 """List images. 601 602 Args: 603 image_filter: A string representing a filter in format of 604 FIELD_NAME COMPARISON_STRING LITERAL_STRING 605 e.g. "name ne example-image" 606 e.g. "name eq "example-image-[0-9]+"" 607 image_project: String. If not provided, will list images from the default 608 project. Otherwise, will list images from the given 609 project, which can be any arbitrary project where the 610 account has read access 611 (i.e. has the role "roles/compute.imageUser") 612 613 Read more about image sharing across project: 614 https://cloud.google.com/compute/docs/images/sharing-images-across-projects 615 616 Returns: 617 A list of images. 618 """ 619 return self.ListWithMultiPages( 620 api_resource=self.service.images().list, 621 project=image_project or self._project, 622 filter=image_filter) 623 624 def GetInstance(self, instance, zone): 625 """Get information about an instance. 626 627 Args: 628 instance: A string, representing instance name. 629 zone: A string, representing zone name. e.g. "us-central1-f" 630 631 Returns: 632 An instance resource in json. 633 https://cloud.google.com/compute/docs/reference/latest/instances#resource 634 """ 635 api = self.service.instances().get( 636 project=self._project, zone=zone, instance=instance) 637 return self.Execute(api) 638 639 def AttachAccelerator(self, instance, zone, accelerator_count, 640 accelerator_type): 641 """Attach a GPU accelerator to the instance. 642 643 Note: In order for this to succeed the following must hold: 644 - The machine schedule must be set to "terminate" i.e: 645 SetScheduling(self, instance, zone, on_host_maintenance="terminate") 646 must have been called. 647 - The machine is not starting or running. i.e. 648 StopInstance(self, instance) must have been called. 649 650 Args: 651 instance: A string, representing instance name. 652 zone: String, name of zone. 653 accelerator_count: The number accelerators to be attached to the instance. 654 a value of 0 will detach all accelerators. 655 accelerator_type: The type of accelerator to attach. e.g. 656 "nvidia-tesla-k80" 657 """ 658 body = { 659 "guestAccelerators": [{ 660 "acceleratorType": 661 self.GetAcceleratorUrl(accelerator_type, zone), 662 "acceleratorCount": 663 accelerator_count 664 }] 665 } 666 api = self.service.instances().setMachineResources( 667 project=self._project, zone=zone, instance=instance, body=body) 668 operation = self.Execute(api) 669 try: 670 self.WaitOnOperation( 671 operation=operation, 672 operation_scope=OperationScope.ZONE, 673 scope_name=zone) 674 except errors.GceOperationTimeoutError: 675 logger.error("Attach instance failed: %s", instance) 676 raise 677 logger.info("%d x %s have been attached to instance %s.", 678 accelerator_count, accelerator_type, instance) 679 680 def AttachDisk(self, instance, zone, **kwargs): 681 """Attach the external disk to the instance. 682 683 Args: 684 instance: A string, representing instance name. 685 zone: String, name of zone. 686 **kwargs: The attachDisk request body. See "https://cloud.google.com/ 687 compute/docs/reference/latest/instances/attachDisk" for detail. 688 { 689 "kind": "compute#attachedDisk", 690 "type": string, 691 "mode": string, 692 "source": string, 693 "deviceName": string, 694 "index": integer, 695 "boot": boolean, 696 "initializeParams": { 697 "diskName": string, 698 "sourceImage": string, 699 "diskSizeGb": long, 700 "diskType": string, 701 "sourceImageEncryptionKey": { 702 "rawKey": string, 703 "sha256": string 704 } 705 }, 706 "autoDelete": boolean, 707 "licenses": [ 708 string 709 ], 710 "interface": string, 711 "diskEncryptionKey": { 712 "rawKey": string, 713 "sha256": string 714 } 715 } 716 717 Returns: 718 An disk resource in json. 719 https://cloud.google.com/compute/docs/reference/latest/disks#resource 720 721 722 Raises: 723 errors.GceOperationTimeoutError: Operation takes too long to finish. 724 """ 725 api = self.service.instances().attachDisk( 726 project=self._project, zone=zone, instance=instance, body=kwargs) 727 operation = self.Execute(api) 728 try: 729 self.WaitOnOperation( 730 operation=operation, 731 operation_scope=OperationScope.ZONE, 732 scope_name=zone) 733 except errors.GceOperationTimeoutError: 734 logger.error("Attach instance failed: %s", instance) 735 raise 736 logger.info("Disk has been attached to instance %s.", instance) 737 738 def DetachDisk(self, instance, zone, disk_name): 739 """Attach the external disk to the instance. 740 741 Args: 742 instance: A string, representing instance name. 743 zone: String, name of zone. 744 disk_name: A string, the name of the detach disk. 745 746 Returns: 747 A ZoneOperation resource. 748 See https://cloud.google.com/compute/docs/reference/latest/zoneOperations 749 750 Raises: 751 errors.GceOperationTimeoutError: Operation takes too long to finish. 752 """ 753 api = self.service.instances().detachDisk( 754 project=self._project, 755 zone=zone, 756 instance=instance, 757 deviceName=disk_name) 758 operation = self.Execute(api) 759 try: 760 self.WaitOnOperation( 761 operation=operation, 762 operation_scope=OperationScope.ZONE, 763 scope_name=zone) 764 except errors.GceOperationTimeoutError: 765 logger.error("Detach instance failed: %s", instance) 766 raise 767 logger.info("Disk has been detached to instance %s.", instance) 768 769 def StartInstance(self, instance, zone): 770 """Start |instance| in |zone|. 771 772 Args: 773 instance: A string, representing instance name. 774 zone: A string, representing zone name. e.g. "us-central1-f" 775 776 Raises: 777 errors.GceOperationTimeoutError: Operation takes too long to finish. 778 """ 779 api = self.service.instances().start( 780 project=self._project, zone=zone, instance=instance) 781 operation = self.Execute(api) 782 try: 783 self.WaitOnOperation( 784 operation=operation, 785 operation_scope=OperationScope.ZONE, 786 scope_name=zone) 787 except errors.GceOperationTimeoutError: 788 logger.error("Start instance failed: %s", instance) 789 raise 790 logger.info("Instance %s has been started.", instance) 791 792 def StartInstances(self, instances, zone): 793 """Start |instances| in |zone|. 794 795 Args: 796 instances: A list of strings, representing instance names's list. 797 zone: A string, representing zone name. e.g. "us-central1-f" 798 799 Returns: 800 A tuple, (done, failed, error_msgs) 801 done: A list of string, representing the names of instances that 802 have been executed. 803 failed: A list of string, representing the names of instances that 804 we failed to execute. 805 error_msgs: A list of string, representing the failure messages. 806 """ 807 action = functools.partial( 808 self.service.instances().start, project=self._project, zone=zone) 809 return self._BatchExecuteOnInstances(instances, zone, action) 810 811 def StopInstance(self, instance, zone): 812 """Stop |instance| in |zone|. 813 814 Args: 815 instance: A string, representing instance name. 816 zone: A string, representing zone name. e.g. "us-central1-f" 817 818 Raises: 819 errors.GceOperationTimeoutError: Operation takes too long to finish. 820 """ 821 api = self.service.instances().stop( 822 project=self._project, zone=zone, instance=instance) 823 operation = self.Execute(api) 824 try: 825 self.WaitOnOperation( 826 operation=operation, 827 operation_scope=OperationScope.ZONE, 828 scope_name=zone) 829 except errors.GceOperationTimeoutError: 830 logger.error("Stop instance failed: %s", instance) 831 raise 832 logger.info("Instance %s has been terminated.", instance) 833 834 def StopInstances(self, instances, zone): 835 """Stop |instances| in |zone|. 836 837 Args: 838 instances: A list of strings, representing instance names's list. 839 zone: A string, representing zone name. e.g. "us-central1-f" 840 841 Returns: 842 A tuple, (done, failed, error_msgs) 843 done: A list of string, representing the names of instances that 844 have been executed. 845 failed: A list of string, representing the names of instances that 846 we failed to execute. 847 error_msgs: A list of string, representing the failure messages. 848 """ 849 action = functools.partial( 850 self.service.instances().stop, project=self._project, zone=zone) 851 return self._BatchExecuteOnInstances(instances, zone, action) 852 853 def SetScheduling(self, 854 instance, 855 zone, 856 automatic_restart=True, 857 on_host_maintenance="MIGRATE"): 858 """Update scheduling config |automatic_restart| and |on_host_maintenance|. 859 860 Args: 861 instance: A string, representing instance name. 862 zone: A string, representing zone name. e.g. "us-central1-f". 863 automatic_restart: Boolean, determine whether the instance will 864 automatically restart if it crashes or not, 865 default to True. 866 on_host_maintenance: enum["MIGRATE", "TERMINATE"] 867 The instance's maintenance behavior, which 868 determines whether the instance is live 869 "MIGRATE" or "TERMINATE" when there is 870 a maintenance event. 871 872 Raises: 873 errors.GceOperationTimeoutError: Operation takes too long to finish. 874 """ 875 body = { 876 "automaticRestart": automatic_restart, 877 "onHostMaintenance": on_host_maintenance 878 } 879 api = self.service.instances().setScheduling( 880 project=self._project, zone=zone, instance=instance, body=body) 881 operation = self.Execute(api) 882 try: 883 self.WaitOnOperation( 884 operation=operation, 885 operation_scope=OperationScope.ZONE, 886 scope_name=zone) 887 except errors.GceOperationTimeoutError: 888 logger.error("Set instance scheduling failed: %s", instance) 889 raise 890 logger.info( 891 "Instance scheduling changed:\n" 892 " automaticRestart: %s\n" 893 " onHostMaintenance: %s\n", 894 str(automatic_restart).lower(), on_host_maintenance) 895 896 def ListInstances(self, instance_filter=None): 897 """List instances cross all zones. 898 899 Gcompute response instance. For example: 900 { 901 'items': 902 { 903 'zones/europe-west3-b': 904 { 905 'warning': 906 { 907 'message': "There are no results for scope 908 'zones/europe-west3-b' on this page.", 909 'code': 'NO_RESULTS_ON_PAGE', 910 'data': [{'value': u'zones/europe-west3-b', 911 'key': u'scope'}] 912 } 913 }, 914 'zones/asia-east1-b': 915 { 916 'instances': [ 917 { 918 'name': 'ins-bc212dc8-userbuild-aosp-cf-x86-64-phone' 919 'status': 'RUNNING', 920 'cpuPlatform': 'Intel Broadwell', 921 'startRestricted': False, 922 'labels': {u'created_by': u'herbertxue'}, 923 'name': 'ins-bc212dc8-userbuild-aosp-cf-x86-64-phone', 924 ... 925 }] 926 } 927 } 928 } 929 930 Args: 931 instance_filter: A string representing a filter in format of 932 FIELD_NAME COMPARISON_STRING LITERAL_STRING 933 e.g. "name ne example-instance" 934 e.g. "name eq "example-instance-[0-9]+"" 935 936 Returns: 937 A list of instances. 938 """ 939 # aggregatedList will only return 500 results max, so if there are more, 940 # we need to send in the next page token to get the next 500 (and so on 941 # and so forth. 942 get_more_instances = True 943 page_token = None 944 instances_list = [] 945 while get_more_instances: 946 api = self.service.instances().aggregatedList( 947 project=self._project, 948 filter=instance_filter, 949 pageToken=page_token) 950 response = self.Execute(api) 951 page_token = response.get("nextPageToken") 952 get_more_instances = page_token is not None 953 for instances_data in response["items"].values(): 954 if "instances" in instances_data: 955 for instance in instances_data.get("instances"): 956 instances_list.append(instance) 957 958 return instances_list 959 960 def SetSchedulingInstances(self, 961 instances, 962 zone, 963 automatic_restart=True, 964 on_host_maintenance="MIGRATE"): 965 """Update scheduling config |automatic_restart| and |on_host_maintenance|. 966 967 See //cloud/cluster/api/mixer_instances.proto Scheduling for config option. 968 969 Args: 970 instances: A list of string, representing instance names. 971 zone: A string, representing zone name. e.g. "us-central1-f". 972 automatic_restart: Boolean, determine whether the instance will 973 automatically restart if it crashes or not, 974 default to True. 975 on_host_maintenance: enum["MIGRATE", "TERMINATE"] 976 The instance's maintenance behavior, which 977 determines whether the instance is live 978 migrated or terminated when there is 979 a maintenance event. 980 981 Returns: 982 A tuple, (done, failed, error_msgs) 983 done: A list of string, representing the names of instances that 984 have been executed. 985 failed: A list of string, representing the names of instances that 986 we failed to execute. 987 error_msgs: A list of string, representing the failure messages. 988 """ 989 body = { 990 "automaticRestart": automatic_restart, 991 "OnHostMaintenance": on_host_maintenance 992 } 993 action = functools.partial( 994 self.service.instances().setScheduling, 995 project=self._project, 996 zone=zone, 997 body=body) 998 return self._BatchExecuteOnInstances(instances, zone, action) 999 1000 def _BatchExecuteOnInstances(self, instances, zone, action): 1001 """Batch processing operations requiring computing time. 1002 1003 Args: 1004 instances: A list of instance names. 1005 zone: A string, e.g. "us-central1-f". 1006 action: partial func, all kwargs for this gcloud action has been 1007 defined in the caller function (e.g. See "StartInstances") 1008 except 'instance' which will be defined by iterating the 1009 |instances|. 1010 1011 Returns: 1012 A tuple, (done, failed, error_msgs) 1013 done: A list of string, representing the names of instances that 1014 have been executed. 1015 failed: A list of string, representing the names of instances that 1016 we failed to execute. 1017 error_msgs: A list of string, representing the failure messages. 1018 """ 1019 if not instances: 1020 return [], [], [] 1021 # Batch send requests. 1022 logger.info("Batch executing instances: %s", instances) 1023 requests = {} 1024 for instance_name in set(instances): 1025 requests[instance_name] = action(instance=instance_name) 1026 return self._BatchExecuteAndWait( 1027 requests, operation_scope=OperationScope.ZONE, scope_name=zone) 1028 1029 def _BatchExecuteAndWait(self, requests, operation_scope, scope_name=None): 1030 """Batch processing requests and wait on the operation. 1031 1032 Args: 1033 requests: A dictionary. The key is a string representing the resource 1034 name. For example, an instance name, or an image name. 1035 operation_scope: A value from OperationScope, "zone", "region", 1036 or "global". 1037 scope_name: If operation_scope is "zone" or "region", this should be 1038 the name of the zone or region, e.g. "us-central1-f". 1039 Returns: 1040 A tuple, (done, failed, error_msgs) 1041 done: A list of string, representing the resource names that have 1042 been executed. 1043 failed: A list of string, representing resource names that 1044 we failed to execute. 1045 error_msgs: A list of string, representing the failure messages. 1046 """ 1047 results = self.BatchExecute(requests) 1048 # Initialize return values 1049 failed = [] 1050 error_msgs = [] 1051 for resource_name, (_, error) in results.items(): 1052 if error is not None: 1053 failed.append(resource_name) 1054 error_msgs.append(str(error)) 1055 done = [] 1056 # Wait for the executing operations to finish. 1057 logger.info("Waiting for executing operations") 1058 for resource_name in requests.keys(): 1059 operation, _ = results[resource_name] 1060 if operation: 1061 try: 1062 self.WaitOnOperation(operation, operation_scope, 1063 scope_name) 1064 done.append(resource_name) 1065 except errors.DriverError as exc: 1066 failed.append(resource_name) 1067 error_msgs.append(str(exc)) 1068 return done, failed, error_msgs 1069 1070 def ListZones(self): 1071 """List all zone instances in the project. 1072 1073 Returns: 1074 Gcompute response instance. For example: 1075 { 1076 "id": "projects/google.com%3Aandroid-build-staging/zones", 1077 "kind": "compute#zoneList", 1078 "selfLink": "https://www.googleapis.com/compute/v1/projects/" 1079 "google.com:android-build-staging/zones" 1080 "items": [ 1081 { 1082 'creationTimestamp': '2014-07-15T10:44:08.663-07:00', 1083 'description': 'asia-east1-c', 1084 'id': '2222', 1085 'kind': 'compute#zone', 1086 'name': 'asia-east1-c', 1087 'region': 'https://www.googleapis.com/compute/v1/projects/' 1088 'google.com:android-build-staging/regions/asia-east1', 1089 'selfLink': 'https://www.googleapis.com/compute/v1/projects/' 1090 'google.com:android-build-staging/zones/asia-east1-c', 1091 'status': 'UP' 1092 }, { 1093 'creationTimestamp': '2014-05-30T18:35:16.575-07:00', 1094 'description': 'asia-east1-b', 1095 'id': '2221', 1096 'kind': 'compute#zone', 1097 'name': 'asia-east1-b', 1098 'region': 'https://www.googleapis.com/compute/v1/projects/' 1099 'google.com:android-build-staging/regions/asia-east1', 1100 'selfLink': 'https://www.googleapis.com/compute/v1/projects' 1101 '/google.com:android-build-staging/zones/asia-east1-b', 1102 'status': 'UP' 1103 }] 1104 } 1105 See cloud cluster's api/mixer_zones.proto 1106 """ 1107 api = self.service.zones().list(project=self._project) 1108 return self.Execute(api) 1109 1110 def ListRegions(self): 1111 """List all the regions for a project. 1112 1113 Returns: 1114 A dictionary containing all the zones and additional data. See this link 1115 for the detailed response: 1116 https://cloud.google.com/compute/docs/reference/latest/regions/list. 1117 Example: 1118 { 1119 'items': [{ 1120 'name': 1121 'us-central1', 1122 'quotas': [{ 1123 'usage': 2.0, 1124 'limit': 24.0, 1125 'metric': 'CPUS' 1126 }, { 1127 'usage': 1.0, 1128 'limit': 23.0, 1129 'metric': 'IN_USE_ADDRESSES' 1130 }, { 1131 'usage': 209.0, 1132 'limit': 10240.0, 1133 'metric': 'DISKS_TOTAL_GB' 1134 }, { 1135 'usage': 1000.0, 1136 'limit': 20000.0, 1137 'metric': 'INSTANCES' 1138 }] 1139 },..] 1140 } 1141 """ 1142 api = self.service.regions().list(project=self._project) 1143 return self.Execute(api) 1144 1145 def _GetNetworkArgs(self, network, zone, disable_external_ip): 1146 """Helper to generate network args that is used to create an instance. 1147 1148 See: https://cloud.google.com/compute/docs/reference/rest/v1/instances 1149 for more information on the specific fields. 1150 1151 Args: 1152 network: A string, e.g. "default". 1153 zone: String, representing zone name, e.g. "us-central1-f" 1154 disable_external_ip: Boolean, true if the external ip should be 1155 disabled. 1156 1157 Returns: 1158 A dictionary representing network args. 1159 """ 1160 network_args = { 1161 "network": self.GetNetworkUrl(network), 1162 "accessConfigs": [] if disable_external_ip else [{ 1163 "name": "External NAT", 1164 "type": "ONE_TO_ONE_NAT" 1165 }] 1166 } 1167 # default network can be blank or set to default, we don't need to 1168 # specify the subnetwork for that. 1169 if network and network != "default": 1170 network_args["subnetwork"] = self.GetSubnetworkUrl(network, zone) 1171 return network_args 1172 1173 def _GetDiskArgs(self, 1174 disk_name, 1175 image_name, 1176 image_project=None, 1177 disk_size_gb=None): 1178 """Helper to generate disk args that is used to create an instance. 1179 1180 Args: 1181 disk_name: A string 1182 image_name: A string 1183 image_project: A string 1184 disk_size_gb: An integer 1185 1186 Returns: 1187 List holding dict of disk args. 1188 """ 1189 args = copy.deepcopy(BASE_DISK_ARGS) 1190 args["initializeParams"] = { 1191 "diskName": disk_name, 1192 "sourceImage": self.GetImage(image_name, 1193 image_project)["selfLink"], 1194 } 1195 # TODO: Remove this check once it's validated that we can either pass in 1196 # a None diskSizeGb or we find an appropriate default val. 1197 if disk_size_gb: 1198 args["diskSizeGb"] = disk_size_gb 1199 return [args] 1200 1201 def _GetExtraDiskArgs(self, extra_disk_name, zone): 1202 """Get extra disk arg for given disk. 1203 1204 Args: 1205 extra_disk_name: String, name of the disk. 1206 zone: String, representing zone name, e.g. "us-central1-f" 1207 1208 Returns: 1209 A dictionary of disk args. 1210 """ 1211 return [{ 1212 "type": "PERSISTENT", 1213 "mode": "READ_WRITE", 1214 "source": "projects/%s/zones/%s/disks/%s" % (self._project, zone, 1215 extra_disk_name), 1216 "autoDelete": True, 1217 "boot": False, 1218 "interface": "SCSI", 1219 "deviceName": extra_disk_name, 1220 }] 1221 1222 # pylint: disable=too-many-locals 1223 def CreateInstance(self, 1224 instance, 1225 image_name, 1226 machine_type, 1227 metadata, 1228 network, 1229 zone, 1230 disk_args=None, 1231 image_project=None, 1232 gpu=None, 1233 disk_type=None, 1234 extra_disk_name=None, 1235 extra_scopes=None, 1236 tags=None, 1237 disable_external_ip=False): 1238 """Create a gce instance with a gce image. 1239 1240 Args: 1241 instance: String, instance name. 1242 image_name: String, source image used to create this disk. 1243 machine_type: String, representing machine_type, 1244 e.g. "n1-standard-1" 1245 metadata: Dict, maps a metadata name to its value. 1246 network: String, representing network name, e.g. "default" 1247 zone: String, representing zone name, e.g. "us-central1-f" 1248 disk_args: A list of extra disk args (strings), see _GetDiskArgs 1249 for example, if None, will create a disk using the given 1250 image. 1251 image_project: String, name of the project where the image 1252 belongs. Assume the default project if None. 1253 gpu: String, type of gpu to attach. e.g. "nvidia-tesla-k80", if 1254 None no gpus will be attached. For more details see: 1255 https://cloud.google.com/compute/docs/gpus/add-gpus 1256 disk_type: String, type of GCE instance disk type. 1257 extra_disk_name: String,the name of the extra disk to attach. 1258 extra_scopes: A list of extra scopes to be provided to the instance. 1259 tags: A list of tags to associate with the instance. e.g. 1260 ["http-server", "https-server"] 1261 disable_external_ip: Boolean, true if instance external ip should be 1262 disabled. 1263 """ 1264 disk_args = (disk_args 1265 or self._GetDiskArgs(instance, image_name, image_project)) 1266 if extra_disk_name: 1267 disk_args.extend(self._GetExtraDiskArgs(extra_disk_name, zone)) 1268 1269 if disk_type: 1270 for disk_arg in disk_args: 1271 if "initializeParams" not in disk_arg: 1272 disk_arg["initializeParams"] = {} 1273 disk_arg["initializeParams"][ 1274 "diskType"] = "projects/%s/zones/%s/diskTypes/%s" % ( 1275 self._project, zone, disk_type) 1276 1277 scopes = [] 1278 scopes.extend(self.DEFAULT_INSTANCE_SCOPE) 1279 if extra_scopes: 1280 scopes.extend(extra_scopes) 1281 1282 # Add labels for giving the instances ability to be filter for 1283 # acloud list/delete cmds. 1284 body = { 1285 "machineType": self.GetMachineType(machine_type, zone)["selfLink"], 1286 "name": instance, 1287 "networkInterfaces": [ 1288 self._GetNetworkArgs(network, zone, disable_external_ip) 1289 ], 1290 "disks": disk_args, 1291 "labels": {constants.LABEL_CREATE_BY: getpass.getuser()}, 1292 "serviceAccounts": [{ 1293 "email": "default", 1294 "scopes": scopes, 1295 }], 1296 "enableVtpm": True, 1297 } 1298 1299 if tags: 1300 body["tags"] = {"items": tags} 1301 if gpu: 1302 body["guestAccelerators"] = [{ 1303 "acceleratorType": self.GetAcceleratorUrl(gpu, zone), 1304 "acceleratorCount": 1 1305 }] 1306 # Instances with GPUs cannot live migrate because they are assigned 1307 # to specific hardware devices. 1308 body["scheduling"] = {"onHostMaintenance": "terminate"} 1309 if metadata: 1310 metadata_list = [{ 1311 _METADATA_KEY: key, 1312 _METADATA_KEY_VALUE: val 1313 } for key, val in metadata.items()] 1314 body[_METADATA] = {_ITEMS: metadata_list} 1315 logger.info("Creating instance: project %s, zone %s, body:%s", 1316 self._project, zone, body) 1317 api = self.service.instances().insert( 1318 project=self._project, zone=zone, body=body) 1319 operation = self.Execute(api) 1320 self.WaitOnOperation( 1321 operation, operation_scope=OperationScope.ZONE, scope_name=zone) 1322 logger.info("Instance %s has been created.", instance) 1323 1324 def DeleteInstance(self, instance, zone): 1325 """Delete a gce instance. 1326 1327 Args: 1328 instance: A string, instance name. 1329 zone: A string, e.g. "us-central1-f" 1330 """ 1331 logger.info("Deleting instance: %s", instance) 1332 api = self.service.instances().delete( 1333 project=self._project, zone=zone, instance=instance) 1334 operation = self.Execute(api) 1335 self.WaitOnOperation( 1336 operation, operation_scope=OperationScope.ZONE, scope_name=zone) 1337 logger.info("Deleted instance: %s", instance) 1338 1339 def DeleteInstances(self, instances, zone): 1340 """Delete multiple instances. 1341 1342 Args: 1343 instances: A list of instance names. 1344 zone: A string, e.g. "us-central1-f". 1345 1346 Returns: 1347 A tuple, (deleted, failed, error_msgs) 1348 deleted: A list of names of instances that have been deleted. 1349 failed: A list of names of instances that we fail to delete. 1350 error_msgs: A list of failure messages. 1351 """ 1352 action = functools.partial( 1353 self.service.instances().delete, project=self._project, zone=zone) 1354 return self._BatchExecuteOnInstances(instances, zone, action) 1355 1356 def ResetInstance(self, instance, zone): 1357 """Reset the gce instance. 1358 1359 Args: 1360 instance: A string, instance name. 1361 zone: A string, e.g. "us-central1-f". 1362 """ 1363 logger.info("Resetting instance: %s", instance) 1364 api = self.service.instances().reset( 1365 project=self._project, zone=zone, instance=instance) 1366 operation = self.Execute(api) 1367 self.WaitOnOperation( 1368 operation, operation_scope=OperationScope.ZONE, scope_name=zone) 1369 logger.info("Instance has been reset: %s", instance) 1370 1371 def GetMachineType(self, machine_type, zone): 1372 """Get URL for a given machine typle. 1373 1374 Args: 1375 machine_type: A string, name of the machine type. 1376 zone: A string, e.g. "us-central1-f" 1377 1378 Returns: 1379 A machine type resource in json. 1380 https://cloud.google.com/compute/docs/reference/latest/ 1381 machineTypes#resource 1382 """ 1383 api = self.service.machineTypes().get( 1384 project=self._project, zone=zone, machineType=machine_type) 1385 return self.Execute(api) 1386 1387 def GetAcceleratorUrl(self, accelerator_type, zone): 1388 """Get URL for a given type of accelator. 1389 1390 Args: 1391 accelerator_type: A string, representing the accelerator, e.g 1392 "nvidia-tesla-k80" 1393 zone: A string representing a zone, e.g. "us-west1-b" 1394 1395 Returns: 1396 A URL that points to the accelerator resource, e.g. 1397 https://www.googleapis.com/compute/v1/projects/<project id>/zones/ 1398 us-west1-b/acceleratorTypes/nvidia-tesla-k80 1399 """ 1400 api = self.service.acceleratorTypes().get( 1401 project=self._project, zone=zone, acceleratorType=accelerator_type) 1402 result = self.Execute(api) 1403 return result["selfLink"] 1404 1405 def GetNetworkUrl(self, network): 1406 """Get URL for a given network. 1407 1408 Args: 1409 network: A string, representing network name, e.g "default" 1410 1411 Returns: 1412 A URL that points to the network resource, e.g. 1413 https://www.googleapis.com/compute/v1/projects/<project id>/ 1414 global/networks/default 1415 """ 1416 api = self.service.networks().get( 1417 project=self._project, network=network) 1418 result = self.Execute(api) 1419 return result["selfLink"] 1420 1421 def GetSubnetworkUrl(self, network, zone): 1422 """Get URL for a given network and zone. 1423 1424 Return the subnetwork for the network in the specified region that the 1425 specified zone resides in. If there is no subnetwork for the specified 1426 zone, raise an exception. 1427 1428 Args: 1429 network: A string, representing network name, e.g "default" 1430 zone: String, representing zone name, e.g. "us-central1-f" 1431 1432 Returns: 1433 A URL that points to the network resource, e.g. 1434 https://www.googleapis.com/compute/v1/projects/<project id>/ 1435 global/networks/default 1436 1437 Raises: 1438 errors.NoSubnetwork: When no subnetwork exists for the zone 1439 specified. 1440 """ 1441 api = self.service.networks().get( 1442 project=self._project, network=network) 1443 result = self.Execute(api) 1444 region = zone.rsplit("-", 1)[0] 1445 for subnetwork in result.get("subnetworks", []): 1446 if region in subnetwork: 1447 return subnetwork 1448 raise errors.NoSubnetwork("No subnetwork for network %s in region %s" % 1449 (network, region)) 1450 1451 def CompareMachineSize(self, machine_type_1, machine_type_2, zone): 1452 """Compare the size of two machine types. 1453 1454 Args: 1455 machine_type_1: A string representing a machine type, e.g. n1-standard-1 1456 machine_type_2: A string representing a machine type, e.g. n1-standard-1 1457 zone: A string representing a zone, e.g. "us-central1-f" 1458 1459 Returns: 1460 -1 if any metric of machine size of the first type is smaller than 1461 the second type. 1462 0 if all metrics of machine size are equal. 1463 1 if at least one metric of machine size of the first type is 1464 greater than the second type and all metrics of first type are 1465 greater or equal to the second type. 1466 1467 Raises: 1468 errors.DriverError: For malformed response. 1469 """ 1470 machine_info_1 = self.GetMachineType(machine_type_1, zone) 1471 machine_info_2 = self.GetMachineType(machine_type_2, zone) 1472 result = 0 1473 for metric in self.MACHINE_SIZE_METRICS: 1474 if metric not in machine_info_1 or metric not in machine_info_2: 1475 raise errors.DriverError( 1476 "Malformed machine size record: Can't find '%s' in %s or %s" 1477 % (metric, machine_info_1, machine_info_2)) 1478 cmp_result = machine_info_1[metric] - machine_info_2[metric] 1479 if cmp_result < 0: 1480 return -1 1481 if cmp_result > 0: 1482 result = 1 1483 return result 1484 1485 def GetSerialPortOutput(self, instance, zone, port=1): 1486 """Get serial port output. 1487 1488 Args: 1489 instance: string, instance name. 1490 zone: string, zone name. 1491 port: int, which COM port to read from, 1-4, default to 1. 1492 1493 Returns: 1494 String, contents of the output. 1495 1496 Raises: 1497 errors.DriverError: For malformed response. 1498 """ 1499 api = self.service.instances().getSerialPortOutput( 1500 project=self._project, zone=zone, instance=instance, port=port) 1501 result = self.Execute(api) 1502 if "contents" not in result: 1503 raise errors.DriverError( 1504 "Malformed response for GetSerialPortOutput: %s" % result) 1505 return result["contents"] 1506 1507 def GetInstanceNamesByIPs(self, ips): 1508 """Get Instance names by IPs. 1509 1510 This function will go through all instances, which 1511 could be slow if there are too many instances. However, currently 1512 GCE doesn't support search for instance by IP. 1513 1514 Args: 1515 ips: A set of IPs. 1516 1517 Returns: 1518 A dictionary where key is IP and value is instance name or None 1519 if instance is not found for the given IP. 1520 """ 1521 ip_name_map = dict.fromkeys(ips) 1522 for instance in self.ListInstances(): 1523 try: 1524 instance_ip = GetInstanceIP(instance) 1525 ip = instance_ip.external or instance_ip.internal 1526 if ip in ips: 1527 ip_name_map[ip] = instance["name"] 1528 except (IndexError, KeyError) as e: 1529 logger.error("Could not get instance names by ips: %s", str(e)) 1530 return ip_name_map 1531 1532 def GetInstanceIP(self, instance, zone): 1533 """Get Instance IP given instance name. 1534 1535 Args: 1536 instance: String, representing instance name. 1537 zone: String, name of the zone. 1538 1539 Returns: 1540 ssh.IP object, that stores internal and external ip of the instance. 1541 """ 1542 instance = self.GetInstance(instance, zone) 1543 return GetInstanceIP(instance) 1544 1545 @utils.TimeExecute(function_description="Updating instance metadata: ") 1546 def SetInstanceMetadata(self, zone, instance, body): 1547 """Set instance metadata. 1548 1549 Args: 1550 zone: String, name of zone. 1551 instance: String, representing instance name. 1552 body: Dict, Metadata body. 1553 metdata is in the following format. 1554 { 1555 "kind": "compute#metadata", 1556 "fingerprint": "a-23icsyx4E=", 1557 "items": [ 1558 { 1559 "key": "sshKeys", 1560 "value": "key" 1561 }, ... 1562 ] 1563 } 1564 """ 1565 api = self.service.instances().setMetadata( 1566 project=self._project, zone=zone, instance=instance, body=body) 1567 operation = self.Execute(api) 1568 self.WaitOnOperation( 1569 operation, operation_scope=OperationScope.ZONE, scope_name=zone) 1570 1571 def AddSshRsaInstanceMetadata(self, user, ssh_rsa_path, instance): 1572 """Add the public rsa key to the instance's metadata. 1573 1574 Confirm that the instance has this public key in the instance's 1575 metadata, if not we will add this public key. 1576 1577 Args: 1578 user: String, name of the user which the key belongs to. 1579 ssh_rsa_path: String, The absolute path to public rsa key. 1580 instance: String, representing instance name. 1581 """ 1582 ssh_rsa_path = os.path.expanduser(ssh_rsa_path) 1583 rsa = GetRsaKey(ssh_rsa_path) 1584 entry = "%s:%s" % (user, rsa) 1585 logger.debug("New RSA entry: %s", entry) 1586 1587 zone = self.GetZoneByInstance(instance) 1588 gce_instance = self.GetInstance(instance, zone) 1589 metadata = gce_instance.get(_METADATA) 1590 if RsaNotInMetadata(metadata, entry): 1591 self.UpdateRsaInMetadata(zone, instance, metadata, entry) 1592 1593 def GetZoneByInstance(self, instance): 1594 """Get the zone from instance name. 1595 1596 Gcompute response instance. For example: 1597 { 1598 'items': 1599 { 1600 'zones/europe-west3-b': 1601 { 1602 'warning': 1603 { 1604 'message': "There are no results for scope 1605 'zones/europe-west3-b' on this page.", 1606 'code': 'NO_RESULTS_ON_PAGE', 1607 'data': [{'value': u'zones/europe-west3-b', 1608 'key': u'scope'}] 1609 } 1610 }, 1611 'zones/asia-east1-b': 1612 { 1613 'instances': [ 1614 { 1615 'name': 'ins-bc212dc8-userbuild-aosp-cf-x86-64-phone' 1616 'status': 'RUNNING', 1617 'cpuPlatform': 'Intel Broadwell', 1618 'startRestricted': False, 1619 'labels': {u'created_by': u'herbertxue'}, 1620 'name': 'ins-bc212dc8-userbuild-aosp-cf-x86-64-phone', 1621 ... 1622 }] 1623 } 1624 } 1625 } 1626 1627 Args: 1628 instance: String, representing instance name. 1629 1630 Raises: 1631 errors.GetGceZoneError: Can't get zone from instance name. 1632 1633 Returns: 1634 String of zone name. 1635 """ 1636 api = self.service.instances().aggregatedList( 1637 project=self._project, 1638 filter="name=%s" % instance) 1639 response = self.Execute(api) 1640 for zone, instance_data in response["items"].items(): 1641 if "instances" in instance_data: 1642 zone_match = _ZONE_RE.match(zone) 1643 if zone_match: 1644 return zone_match.group("zone") 1645 raise errors.GetGceZoneError("Can't get zone from the instance name %s" 1646 % instance) 1647 1648 def GetZonesByInstances(self, instances): 1649 """Get the zone from instance name. 1650 1651 Args: 1652 instances: List of strings, representing instance names. 1653 1654 Returns: 1655 A dictionary that contains the name of all instances in the zone. 1656 The key is the name of the zone, and the value is a list contains 1657 the name of the instances. 1658 """ 1659 zone_instances = {} 1660 for instance in instances: 1661 zone = self.GetZoneByInstance(instance) 1662 if zone in zone_instances: 1663 zone_instances[zone].append(instance) 1664 else: 1665 zone_instances[zone] = [instance] 1666 return zone_instances 1667 1668 def CheckAccess(self): 1669 """Check if the user has read access to the cloud project. 1670 1671 Returns: 1672 True if the user has at least read access to the project. 1673 False otherwise. 1674 1675 Raises: 1676 errors.HttpError if other unexpected error happens when 1677 accessing the project. 1678 """ 1679 api = self.service.zones().list(project=self._project) 1680 retry_http_codes = copy.copy(self.RETRY_HTTP_CODES) 1681 retry_http_codes.remove(self.ACCESS_DENIED_CODE) 1682 try: 1683 self.Execute(api, retry_http_codes=retry_http_codes) 1684 except errors.HttpError as e: 1685 if e.code == self.ACCESS_DENIED_CODE: 1686 return False 1687 raise 1688 return True 1689 1690 def UpdateRsaInMetadata(self, zone, instance, metadata, entry): 1691 """Update ssh public key to sshKeys's value in this metadata. 1692 1693 Args: 1694 zone: String, name of zone. 1695 instance: String, representing instance name. 1696 metadata: Dict, maps a metadata name to its value. 1697 entry: String, ssh public key. 1698 """ 1699 ssh_key_item = GetSshKeyFromMetadata(metadata) 1700 if ssh_key_item: 1701 # The ssh key exists in the metadata so update the reference to it 1702 # in the metadata. There may not be an actual ssh key value so 1703 # that's why we filter for None to avoid an empty line in front. 1704 ssh_key_item[_METADATA_KEY_VALUE] = "\n".join( 1705 list(filter(None, [ssh_key_item[_METADATA_KEY_VALUE], entry]))) 1706 else: 1707 # Since there is no ssh key item in the metadata, we need to add it in. 1708 ssh_key_item = {_METADATA_KEY: _SSH_KEYS_NAME, 1709 _METADATA_KEY_VALUE: entry} 1710 metadata[_ITEMS].append(ssh_key_item) 1711 utils.PrintColorString( 1712 "Ssh public key doesn't exist in the instance(%s), adding it." 1713 % instance, utils.TextColors.WARNING) 1714 self.SetInstanceMetadata(zone, instance, metadata) 1715 1716 1717def RsaNotInMetadata(metadata, entry): 1718 """Check ssh public key exist in sshKeys's value. 1719 1720 Args: 1721 metadata: Dict, maps a metadata name to its value. 1722 entry: String, ssh public key. 1723 1724 Returns: 1725 Boolean. True if ssh public key doesn't exist in metadata. 1726 """ 1727 for item in metadata.setdefault(_ITEMS, []): 1728 if item[_METADATA_KEY] == _SSH_KEYS_NAME: 1729 if entry in item[_METADATA_KEY_VALUE]: 1730 return False 1731 return True 1732 1733 1734def GetSshKeyFromMetadata(metadata): 1735 """Get ssh key item from metadata. 1736 1737 Args: 1738 metadata: Dict, maps a metadata name to its value. 1739 1740 Returns: 1741 Dict of ssk_key_item in metadata, None if can't find the ssh key item 1742 in metadata. 1743 """ 1744 for item in metadata.setdefault(_ITEMS, []): 1745 if item.get(_METADATA_KEY, '') == _SSH_KEYS_NAME: 1746 return item 1747 return None 1748 1749 1750def GetRsaKey(ssh_rsa_path): 1751 """Get rsa key from rsa path. 1752 1753 Args: 1754 ssh_rsa_path: String, The absolute path to public rsa key. 1755 1756 Returns: 1757 String, rsa key. 1758 1759 Raises: 1760 errors.DriverError: RSA file does not exist. 1761 """ 1762 ssh_rsa_path = os.path.expanduser(ssh_rsa_path) 1763 if not os.path.exists(ssh_rsa_path): 1764 raise errors.DriverError( 1765 "RSA file %s does not exist." % ssh_rsa_path) 1766 1767 with open(ssh_rsa_path) as f: 1768 rsa = f.read() 1769 # The space must be removed here for string processing, 1770 # if it is not string, it doesn't have a strip function. 1771 rsa = rsa.strip() if rsa else rsa 1772 utils.VerifyRsaPubKey(rsa) 1773 return rsa 1774 1775def GetInstanceIP(instance): 1776 """Get the internal and external IP for a given instance 1777 1778 Args: 1779 instance: A dict, representing a gce instance 1780 1781 Returns: 1782 A populated IP object 1783 """ 1784 network_interface = instance["networkInterfaces"][0] 1785 access_configs = network_interface.get("accessConfigs", [{}])[0] 1786 external_ip = access_configs.get("natIP", "") 1787 internal_ip = network_interface.get("networkIP", "") 1788 return IP(internal=internal_ip, external=external_ip) 1789 1790def GetGCEHostName(gce_project, instance, zone): 1791 """Get the GCE host name with specific rule. 1792 1793 Args: 1794 gce_project: String, GCE project name. 1795 instance: String, GCE instance name. 1796 zone: String, Instance zone name. 1797 1798 Returns: 1799 One host name coverted by instance name, project name, and zone. 1800 """ 1801 if ":" in gce_project: 1802 domain = gce_project.split(":")[0] 1803 project_no_domain = gce_project.split(":")[1] 1804 project = f"{project_no_domain}.{domain}" 1805 return f"nic0.{instance}.{zone}.c.{project}.internal.gcpnode.com" 1806 return f"nic0.{instance}.{zone}.c.{gce_project}.internal.gcpnode.com" 1807