1#!/usr/bin/env python 2# 3# Copyright 2016 - The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16"""A client that manages Google Compute Engine. 17 18** ComputeClient ** 19 20ComputeClient is a wrapper around Google Compute Engine APIs. 21It provides a set of methods for managing a google compute engine project, 22such as creating images, creating instances, etc. 23 24Design philosophy: We tried to make ComputeClient as stateless as possible, 25and it only keeps states about authentication. ComputeClient should be very 26generic, and only knows how to talk to Compute Engine APIs. 27""" 28# pylint: disable=too-many-lines 29import copy 30import functools 31import getpass 32import logging 33import os 34import re 35 36import six 37 38from acloud import errors 39from acloud.internal import constants 40from acloud.internal.lib import base_cloud_client 41from acloud.internal.lib import utils 42from acloud.internal.lib.ssh import IP 43 44 45logger = logging.getLogger(__name__) 46 47_MAX_RETRIES_ON_FINGERPRINT_CONFLICT = 10 48_METADATA_KEY = "key" 49_METADATA_KEY_VALUE = "value" 50_SSH_KEYS_NAME = "sshKeys" 51_ITEMS = "items" 52_METADATA = "metadata" 53_ZONE_RE = re.compile(r"^zones/(?P<zone>.+)") 54# Quota metrics 55_METRIC_CPUS = "CPUS" 56_METRIC_DISKS_GB = "DISKS_TOTAL_GB" 57_METRIC_USE_ADDRESSES = "IN_USE_ADDRESSES" 58_METRICS = [_METRIC_CPUS, _METRIC_DISKS_GB, _METRIC_USE_ADDRESSES] 59_USAGE = "usage" 60_LIMIT = "limit" 61# The minimum requirement to create an instance. 62_REQUIRE_METRICS = {_METRIC_CPUS: 8, 63 _METRIC_DISKS_GB: 1000, 64 _METRIC_USE_ADDRESSES: 1} 65 66BASE_DISK_ARGS = { 67 "type": "PERSISTENT", 68 "boot": True, 69 "mode": "READ_WRITE", 70 "autoDelete": True, 71 "initializeParams": {}, 72} 73 74 75class OperationScope(object): 76 """Represents operation scope enum.""" 77 ZONE = "zone" 78 REGION = "region" 79 GLOBAL = "global" 80 81 82class PersistentDiskType(object): 83 """Represents different persistent disk types. 84 85 pd-standard for regular hard disk. 86 pd-ssd for solid state disk. 87 """ 88 STANDARD = "pd-standard" 89 SSD = "pd-ssd" 90 91 92class ImageStatus(object): 93 """Represents the status of an image.""" 94 PENDING = "PENDING" 95 READY = "READY" 96 FAILED = "FAILED" 97 98 99def _IsFingerPrintError(exc): 100 """Determine if the exception is a HTTP error with code 412. 101 102 Args: 103 exc: Exception instance. 104 105 Returns: 106 Boolean. True if the exception is a "Precondition Failed" error. 107 """ 108 return isinstance(exc, errors.HttpError) and exc.code == 412 109 110 111# pylint: disable=too-many-public-methods 112class ComputeClient(base_cloud_client.BaseCloudApiClient): 113 """Client that manages GCE.""" 114 115 # API settings, used by BaseCloudApiClient. 116 API_NAME = "compute" 117 API_VERSION = "v1" 118 SCOPE = " ".join([ 119 "https://www.googleapis.com/auth/compute", 120 "https://www.googleapis.com/auth/devstorage.read_write" 121 ]) 122 # Default settings for gce operations 123 DEFAULT_INSTANCE_SCOPE = [ 124 "https://www.googleapis.com/auth/androidbuild.internal", 125 "https://www.googleapis.com/auth/devstorage.read_only", 126 "https://www.googleapis.com/auth/logging.write" 127 ] 128 OPERATION_TIMEOUT_SECS = 30 * 60 # 30 mins 129 OPERATION_POLL_INTERVAL_SECS = 20 130 MACHINE_SIZE_METRICS = ["guestCpus", "memoryMb"] 131 ACCESS_DENIED_CODE = 403 132 133 def __init__(self, acloud_config, oauth2_credentials): 134 """Initialize. 135 136 Args: 137 acloud_config: An AcloudConfig object. 138 oauth2_credentials: An oauth2client.OAuth2Credentials instance. 139 """ 140 super(ComputeClient, self).__init__(oauth2_credentials) 141 self._project = acloud_config.project 142 143 def _GetOperationStatus(self, operation, operation_scope, scope_name=None): 144 """Get status of an operation. 145 146 Args: 147 operation: An Operation resource in the format of json. 148 operation_scope: A value from OperationScope, "zone", "region", 149 or "global". 150 scope_name: If operation_scope is "zone" or "region", this should be 151 the name of the zone or region, e.g. "us-central1-f". 152 153 Returns: 154 Status of the operation, one of "DONE", "PENDING", "RUNNING". 155 156 Raises: 157 errors.DriverError: if the operation fails. 158 """ 159 operation_name = operation["name"] 160 if operation_scope == OperationScope.GLOBAL: 161 api = self.service.globalOperations().get( 162 project=self._project, operation=operation_name) 163 result = self.Execute(api) 164 elif operation_scope == OperationScope.ZONE: 165 api = self.service.zoneOperations().get( 166 project=self._project, 167 operation=operation_name, 168 zone=scope_name) 169 result = self.Execute(api) 170 elif operation_scope == OperationScope.REGION: 171 api = self.service.regionOperations().get( 172 project=self._project, 173 operation=operation_name, 174 region=scope_name) 175 result = self.Execute(api) 176 177 if result.get("error"): 178 errors_list = result["error"]["errors"] 179 raise errors.DriverError( 180 "Get operation state failed, errors: %s" % str(errors_list)) 181 return result["status"] 182 183 def WaitOnOperation(self, operation, operation_scope, scope_name=None): 184 """Wait for an operation to finish. 185 186 Args: 187 operation: An Operation resource in the format of json. 188 operation_scope: A value from OperationScope, "zone", "region", 189 or "global". 190 scope_name: If operation_scope is "zone" or "region", this should be 191 the name of the zone or region, e.g. "us-central1-f". 192 """ 193 timeout_exception = errors.GceOperationTimeoutError( 194 "Operation hits timeout, did not complete within %d secs." % 195 self.OPERATION_TIMEOUT_SECS) 196 utils.PollAndWait( 197 func=self._GetOperationStatus, 198 expected_return="DONE", 199 timeout_exception=timeout_exception, 200 timeout_secs=self.OPERATION_TIMEOUT_SECS, 201 sleep_interval_secs=self.OPERATION_POLL_INTERVAL_SECS, 202 operation=operation, 203 operation_scope=operation_scope, 204 scope_name=scope_name) 205 206 def GetProject(self): 207 """Get project information. 208 209 Returns: 210 A project resource in json. 211 """ 212 api = self.service.projects().get(project=self._project) 213 return self.Execute(api) 214 215 def GetRegionInfo(self): 216 """Get region information that includes all quotas limit. 217 218 The region info example: 219 {"items": 220 [{"status": "UP", 221 "name": "asia-east1", 222 "quotas": 223 [{"usage": 92, "metric": "CPUS", "limit": 100}, 224 {"usage": 640, "metric": "DISKS_TOTAL_GB", "limit": 10240}, 225 ...]]} 226 } 227 228 Returns: 229 A region resource in json. 230 """ 231 api = self.service.regions().list(project=self._project) 232 return self.Execute(api) 233 234 @staticmethod 235 def GetMetricQuota(regions_info, zone, metric): 236 """Get CPU quota limit in specific zone and project. 237 238 Args: 239 regions_info: Dict, regions resource in json. 240 zone: String, name of zone. 241 metric: String, name of metric, e.g. "CPUS". 242 243 Returns: 244 A dict of quota information. Such as 245 {"usage": 100, "metric": "CPUS", "limit": 200} 246 """ 247 for region_info in regions_info["items"]: 248 if region_info["name"] in zone: 249 for quota in region_info["quotas"]: 250 if quota["metric"] == metric: 251 return quota 252 logger.info("Can't get %s quota info from zone(%s)", metric, zone) 253 return None 254 255 def EnoughMetricsInZone(self, zone): 256 """Check the zone have enough metrics to create instance. 257 258 The metrics include CPUS and DISKS. 259 260 Args: 261 zone: String, name of zone. 262 263 Returns: 264 Boolean. True if zone have enough quota. 265 """ 266 regions_info = self.GetRegionInfo() 267 for metric in _METRICS: 268 quota = self.GetMetricQuota(regions_info, zone, metric) 269 if not quota: 270 logger.debug( 271 "Can't query the metric(%s) in zone(%s)", metric, zone) 272 return False 273 if quota[_LIMIT] - quota[_USAGE] < _REQUIRE_METRICS[metric]: 274 logger.debug( 275 "The metric(%s) is over limit in zone(%s)", metric, zone) 276 return False 277 return True 278 279 def GetDisk(self, disk_name, zone): 280 """Get disk information. 281 282 Args: 283 disk_name: A string. 284 zone: String, name of zone. 285 286 Returns: 287 An disk resource in json. 288 https://cloud.google.com/compute/docs/reference/latest/disks#resource 289 """ 290 api = self.service.disks().get( 291 project=self._project, zone=zone, disk=disk_name) 292 return self.Execute(api) 293 294 def CheckDiskExists(self, disk_name, zone): 295 """Check if disk exists. 296 297 Args: 298 disk_name: A string 299 zone: String, name of zone. 300 301 Returns: 302 True if disk exists, otherwise False. 303 """ 304 try: 305 self.GetDisk(disk_name, zone) 306 exists = True 307 except errors.ResourceNotFoundError: 308 exists = False 309 logger.debug("CheckDiskExists: disk_name: %s, result: %s", disk_name, 310 exists) 311 return exists 312 313 def CreateDisk(self, 314 disk_name, 315 source_image, 316 size_gb, 317 zone, 318 source_project=None, 319 disk_type=PersistentDiskType.STANDARD): 320 """Create a gce disk. 321 322 Args: 323 disk_name: String 324 source_image: String, name of the image. 325 size_gb: Integer, size in gb. 326 zone: String, name of the zone, e.g. us-central1-b. 327 source_project: String, required if the image is located in a different 328 project. 329 disk_type: String, a value from PersistentDiskType, STANDARD 330 for regular hard disk or SSD for solid state disk. 331 """ 332 source_project = source_project or self._project 333 source_image = "projects/%s/global/images/%s" % ( 334 source_project, source_image) if source_image else None 335 logger.info("Creating disk %s, size_gb: %d, source_image: %s", 336 disk_name, size_gb, str(source_image)) 337 body = { 338 "name": disk_name, 339 "sizeGb": size_gb, 340 "type": "projects/%s/zones/%s/diskTypes/%s" % (self._project, zone, 341 disk_type), 342 } 343 api = self.service.disks().insert( 344 project=self._project, 345 sourceImage=source_image, 346 zone=zone, 347 body=body) 348 operation = self.Execute(api) 349 try: 350 self.WaitOnOperation( 351 operation=operation, 352 operation_scope=OperationScope.ZONE, 353 scope_name=zone) 354 except errors.DriverError: 355 logger.error("Creating disk failed, cleaning up: %s", disk_name) 356 if self.CheckDiskExists(disk_name, zone): 357 self.DeleteDisk(disk_name, zone) 358 raise 359 logger.info("Disk %s has been created.", disk_name) 360 361 def DeleteDisk(self, disk_name, zone): 362 """Delete a gce disk. 363 364 Args: 365 disk_name: A string, name of disk. 366 zone: A string, name of zone. 367 """ 368 logger.info("Deleting disk %s", disk_name) 369 api = self.service.disks().delete( 370 project=self._project, zone=zone, disk=disk_name) 371 operation = self.Execute(api) 372 self.WaitOnOperation( 373 operation=operation, 374 operation_scope=OperationScope.ZONE, 375 scope_name=zone) 376 logger.info("Deleted disk %s", disk_name) 377 378 def DeleteDisks(self, disk_names, zone): 379 """Delete multiple disks. 380 381 Args: 382 disk_names: A list of disk names. 383 zone: A string, name of zone. 384 385 Returns: 386 A tuple, (deleted, failed, error_msgs) 387 deleted: A list of names of disks that have been deleted. 388 failed: A list of names of disks that we fail to delete. 389 error_msgs: A list of failure messages. 390 """ 391 if not disk_names: 392 logger.warning("Nothing to delete. Arg disk_names is not provided.") 393 return [], [], [] 394 # Batch send deletion requests. 395 logger.info("Deleting disks: %s", disk_names) 396 delete_requests = {} 397 for disk_name in set(disk_names): 398 request = self.service.disks().delete( 399 project=self._project, disk=disk_name, zone=zone) 400 delete_requests[disk_name] = request 401 return self._BatchExecuteAndWait( 402 delete_requests, OperationScope.ZONE, scope_name=zone) 403 404 def ListDisks(self, zone, disk_filter=None): 405 """List disks. 406 407 Args: 408 zone: A string, representing zone name. e.g. "us-central1-f" 409 disk_filter: A string representing a filter in format of 410 FIELD_NAME COMPARISON_STRING LITERAL_STRING 411 e.g. "name ne example-instance" 412 e.g. "name eq "example-instance-[0-9]+"" 413 414 Returns: 415 A list of disks. 416 """ 417 return self.ListWithMultiPages( 418 api_resource=self.service.disks().list, 419 project=self._project, 420 zone=zone, 421 filter=disk_filter) 422 423 def CreateImage(self, 424 image_name, 425 source_uri=None, 426 source_disk=None, 427 labels=None): 428 """Create a Gce image. 429 430 Args: 431 image_name: String, name of image 432 source_uri: Full Google Cloud Storage URL where the disk image is 433 stored. e.g. "https://storage.googleapis.com/my-bucket/ 434 avd-system-2243663.tar.gz" 435 source_disk: String, this should be the disk's selfLink value 436 (including zone and project), rather than the disk_name 437 e.g. https://www.googleapis.com/compute/v1/projects/ 438 google.com:android-builds-project/zones/ 439 us-east1-d/disks/<disk_name> 440 labels: Dict, will be added to the image's labels. 441 442 Raises: 443 errors.DriverError: For malformed request or response. 444 errors.GceOperationTimeoutError: Operation takes too long to finish. 445 """ 446 if self.CheckImageExists(image_name): 447 return 448 if (source_uri and source_disk) or (not source_uri 449 and not source_disk): 450 raise errors.DriverError( 451 "Creating image %s requires either source_uri %s or " 452 "source_disk %s but not both" % (image_name, source_uri, 453 source_disk)) 454 elif source_uri: 455 logger.info("Creating image %s, source_uri %s", image_name, 456 source_uri) 457 body = { 458 "name": image_name, 459 "rawDisk": { 460 "source": source_uri, 461 }, 462 } 463 else: 464 logger.info("Creating image %s, source_disk %s", image_name, 465 source_disk) 466 body = { 467 "name": image_name, 468 "sourceDisk": source_disk, 469 } 470 if labels is not None: 471 body["labels"] = labels 472 api = self.service.images().insert(project=self._project, body=body) 473 operation = self.Execute(api) 474 try: 475 self.WaitOnOperation( 476 operation=operation, operation_scope=OperationScope.GLOBAL) 477 except errors.DriverError: 478 logger.error("Creating image failed, cleaning up: %s", image_name) 479 if self.CheckImageExists(image_name): 480 self.DeleteImage(image_name) 481 raise 482 logger.info("Image %s has been created.", image_name) 483 484 @utils.RetryOnException(_IsFingerPrintError, 485 _MAX_RETRIES_ON_FINGERPRINT_CONFLICT) 486 def SetImageLabels(self, image_name, new_labels): 487 """Update image's labels. Retry for finger print conflict. 488 489 Note: Decorator RetryOnException will retry the call for FingerPrint 490 conflict (HTTP error code 412). The fingerprint is used to detect 491 conflicts of GCE resource updates. The fingerprint is initially generated 492 by Compute Engine and changes after every request to modify or update 493 resources (e.g. GCE "image" resource has "fingerPrint" for "labels" 494 updates). 495 496 Args: 497 image_name: A string, the image name. 498 new_labels: Dict, will be added to the image's labels. 499 500 Returns: 501 A GlobalOperation resouce. 502 https://cloud.google.com/compute/docs/reference/latest/globalOperations 503 """ 504 image = self.GetImage(image_name) 505 labels = image.get("labels", {}) 506 labels.update(new_labels) 507 body = { 508 "labels": labels, 509 "labelFingerprint": image["labelFingerprint"] 510 } 511 api = self.service.images().setLabels( 512 project=self._project, resource=image_name, body=body) 513 return self.Execute(api) 514 515 def CheckImageExists(self, image_name): 516 """Check if image exists. 517 518 Args: 519 image_name: A string 520 521 Returns: 522 True if image exists, otherwise False. 523 """ 524 try: 525 self.GetImage(image_name) 526 exists = True 527 except errors.ResourceNotFoundError: 528 exists = False 529 logger.debug("CheckImageExists: image_name: %s, result: %s", 530 image_name, exists) 531 return exists 532 533 def GetImage(self, image_name, image_project=None): 534 """Get image information. 535 536 Args: 537 image_name: A string 538 image_project: A string 539 540 Returns: 541 An image resource in json. 542 https://cloud.google.com/compute/docs/reference/latest/images#resource 543 """ 544 api = self.service.images().get( 545 project=image_project or self._project, image=image_name) 546 return self.Execute(api) 547 548 def GetImageFromFamily(self, image_family, image_project=None): 549 """Get image information from image_family. 550 551 Args: 552 image_family: String of image family. 553 image_project: String of image project. 554 555 Returns: 556 An image resource in json. 557 https://cloud.google.com/compute/docs/reference/latest/images#resource 558 """ 559 api = self.service.images().getFromFamily( 560 project=image_project or self._project, family=image_family) 561 return self.Execute(api) 562 563 def DeleteImage(self, image_name): 564 """Delete an image. 565 566 Args: 567 image_name: A string 568 """ 569 logger.info("Deleting image %s", image_name) 570 api = self.service.images().delete( 571 project=self._project, image=image_name) 572 operation = self.Execute(api) 573 self.WaitOnOperation( 574 operation=operation, operation_scope=OperationScope.GLOBAL) 575 logger.info("Deleted image %s", image_name) 576 577 def DeleteImages(self, image_names): 578 """Delete multiple images. 579 580 Args: 581 image_names: A list of image names. 582 583 Returns: 584 A tuple, (deleted, failed, error_msgs) 585 deleted: A list of names of images that have been deleted. 586 failed: A list of names of images that we fail to delete. 587 error_msgs: A list of failure messages. 588 """ 589 if not image_names: 590 return [], [], [] 591 # Batch send deletion requests. 592 logger.info("Deleting images: %s", image_names) 593 delete_requests = {} 594 for image_name in set(image_names): 595 request = self.service.images().delete( 596 project=self._project, image=image_name) 597 delete_requests[image_name] = request 598 return self._BatchExecuteAndWait(delete_requests, 599 OperationScope.GLOBAL) 600 601 def ListImages(self, image_filter=None, image_project=None): 602 """List images. 603 604 Args: 605 image_filter: A string representing a filter in format of 606 FIELD_NAME COMPARISON_STRING LITERAL_STRING 607 e.g. "name ne example-image" 608 e.g. "name eq "example-image-[0-9]+"" 609 image_project: String. If not provided, will list images from the default 610 project. Otherwise, will list images from the given 611 project, which can be any arbitrary project where the 612 account has read access 613 (i.e. has the role "roles/compute.imageUser") 614 615 Read more about image sharing across project: 616 https://cloud.google.com/compute/docs/images/sharing-images-across-projects 617 618 Returns: 619 A list of images. 620 """ 621 return self.ListWithMultiPages( 622 api_resource=self.service.images().list, 623 project=image_project or self._project, 624 filter=image_filter) 625 626 def GetInstance(self, instance, zone): 627 """Get information about an instance. 628 629 Args: 630 instance: A string, representing instance name. 631 zone: A string, representing zone name. e.g. "us-central1-f" 632 633 Returns: 634 An instance resource in json. 635 https://cloud.google.com/compute/docs/reference/latest/instances#resource 636 """ 637 api = self.service.instances().get( 638 project=self._project, zone=zone, instance=instance) 639 return self.Execute(api) 640 641 def AttachAccelerator(self, instance, zone, accelerator_count, 642 accelerator_type): 643 """Attach a GPU accelerator to the instance. 644 645 Note: In order for this to succeed the following must hold: 646 - The machine schedule must be set to "terminate" i.e: 647 SetScheduling(self, instance, zone, on_host_maintenance="terminate") 648 must have been called. 649 - The machine is not starting or running. i.e. 650 StopInstance(self, instance) must have been called. 651 652 Args: 653 instance: A string, representing instance name. 654 zone: String, name of zone. 655 accelerator_count: The number accelerators to be attached to the instance. 656 a value of 0 will detach all accelerators. 657 accelerator_type: The type of accelerator to attach. e.g. 658 "nvidia-tesla-k80" 659 """ 660 body = { 661 "guestAccelerators": [{ 662 "acceleratorType": 663 self.GetAcceleratorUrl(accelerator_type, zone), 664 "acceleratorCount": 665 accelerator_count 666 }] 667 } 668 api = self.service.instances().setMachineResources( 669 project=self._project, zone=zone, instance=instance, body=body) 670 operation = self.Execute(api) 671 try: 672 self.WaitOnOperation( 673 operation=operation, 674 operation_scope=OperationScope.ZONE, 675 scope_name=zone) 676 except errors.GceOperationTimeoutError: 677 logger.error("Attach instance failed: %s", instance) 678 raise 679 logger.info("%d x %s have been attached to instance %s.", 680 accelerator_count, accelerator_type, instance) 681 682 def AttachDisk(self, instance, zone, **kwargs): 683 """Attach the external disk to the instance. 684 685 Args: 686 instance: A string, representing instance name. 687 zone: String, name of zone. 688 **kwargs: The attachDisk request body. See "https://cloud.google.com/ 689 compute/docs/reference/latest/instances/attachDisk" for detail. 690 { 691 "kind": "compute#attachedDisk", 692 "type": string, 693 "mode": string, 694 "source": string, 695 "deviceName": string, 696 "index": integer, 697 "boot": boolean, 698 "initializeParams": { 699 "diskName": string, 700 "sourceImage": string, 701 "diskSizeGb": long, 702 "diskType": string, 703 "sourceImageEncryptionKey": { 704 "rawKey": string, 705 "sha256": string 706 } 707 }, 708 "autoDelete": boolean, 709 "licenses": [ 710 string 711 ], 712 "interface": string, 713 "diskEncryptionKey": { 714 "rawKey": string, 715 "sha256": string 716 } 717 } 718 719 Returns: 720 An disk resource in json. 721 https://cloud.google.com/compute/docs/reference/latest/disks#resource 722 723 724 Raises: 725 errors.GceOperationTimeoutError: Operation takes too long to finish. 726 """ 727 api = self.service.instances().attachDisk( 728 project=self._project, zone=zone, instance=instance, body=kwargs) 729 operation = self.Execute(api) 730 try: 731 self.WaitOnOperation( 732 operation=operation, 733 operation_scope=OperationScope.ZONE, 734 scope_name=zone) 735 except errors.GceOperationTimeoutError: 736 logger.error("Attach instance failed: %s", instance) 737 raise 738 logger.info("Disk has been attached to instance %s.", instance) 739 740 def DetachDisk(self, instance, zone, disk_name): 741 """Attach the external disk to the instance. 742 743 Args: 744 instance: A string, representing instance name. 745 zone: String, name of zone. 746 disk_name: A string, the name of the detach disk. 747 748 Returns: 749 A ZoneOperation resource. 750 See https://cloud.google.com/compute/docs/reference/latest/zoneOperations 751 752 Raises: 753 errors.GceOperationTimeoutError: Operation takes too long to finish. 754 """ 755 api = self.service.instances().detachDisk( 756 project=self._project, 757 zone=zone, 758 instance=instance, 759 deviceName=disk_name) 760 operation = self.Execute(api) 761 try: 762 self.WaitOnOperation( 763 operation=operation, 764 operation_scope=OperationScope.ZONE, 765 scope_name=zone) 766 except errors.GceOperationTimeoutError: 767 logger.error("Detach instance failed: %s", instance) 768 raise 769 logger.info("Disk has been detached to instance %s.", instance) 770 771 def StartInstance(self, instance, zone): 772 """Start |instance| in |zone|. 773 774 Args: 775 instance: A string, representing instance name. 776 zone: A string, representing zone name. e.g. "us-central1-f" 777 778 Raises: 779 errors.GceOperationTimeoutError: Operation takes too long to finish. 780 """ 781 api = self.service.instances().start( 782 project=self._project, zone=zone, instance=instance) 783 operation = self.Execute(api) 784 try: 785 self.WaitOnOperation( 786 operation=operation, 787 operation_scope=OperationScope.ZONE, 788 scope_name=zone) 789 except errors.GceOperationTimeoutError: 790 logger.error("Start instance failed: %s", instance) 791 raise 792 logger.info("Instance %s has been started.", instance) 793 794 def StartInstances(self, instances, zone): 795 """Start |instances| in |zone|. 796 797 Args: 798 instances: A list of strings, representing instance names's list. 799 zone: A string, representing zone name. e.g. "us-central1-f" 800 801 Returns: 802 A tuple, (done, failed, error_msgs) 803 done: A list of string, representing the names of instances that 804 have been executed. 805 failed: A list of string, representing the names of instances that 806 we failed to execute. 807 error_msgs: A list of string, representing the failure messages. 808 """ 809 action = functools.partial( 810 self.service.instances().start, project=self._project, zone=zone) 811 return self._BatchExecuteOnInstances(instances, zone, action) 812 813 def StopInstance(self, instance, zone): 814 """Stop |instance| in |zone|. 815 816 Args: 817 instance: A string, representing instance name. 818 zone: A string, representing zone name. e.g. "us-central1-f" 819 820 Raises: 821 errors.GceOperationTimeoutError: Operation takes too long to finish. 822 """ 823 api = self.service.instances().stop( 824 project=self._project, zone=zone, instance=instance) 825 operation = self.Execute(api) 826 try: 827 self.WaitOnOperation( 828 operation=operation, 829 operation_scope=OperationScope.ZONE, 830 scope_name=zone) 831 except errors.GceOperationTimeoutError: 832 logger.error("Stop instance failed: %s", instance) 833 raise 834 logger.info("Instance %s has been terminated.", instance) 835 836 def StopInstances(self, instances, zone): 837 """Stop |instances| in |zone|. 838 839 Args: 840 instances: A list of strings, representing instance names's list. 841 zone: A string, representing zone name. e.g. "us-central1-f" 842 843 Returns: 844 A tuple, (done, failed, error_msgs) 845 done: A list of string, representing the names of instances that 846 have been executed. 847 failed: A list of string, representing the names of instances that 848 we failed to execute. 849 error_msgs: A list of string, representing the failure messages. 850 """ 851 action = functools.partial( 852 self.service.instances().stop, project=self._project, zone=zone) 853 return self._BatchExecuteOnInstances(instances, zone, action) 854 855 def SetScheduling(self, 856 instance, 857 zone, 858 automatic_restart=True, 859 on_host_maintenance="MIGRATE"): 860 """Update scheduling config |automatic_restart| and |on_host_maintenance|. 861 862 Args: 863 instance: A string, representing instance name. 864 zone: A string, representing zone name. e.g. "us-central1-f". 865 automatic_restart: Boolean, determine whether the instance will 866 automatically restart if it crashes or not, 867 default to True. 868 on_host_maintenance: enum["MIGRATE", "TERMINATE"] 869 The instance's maintenance behavior, which 870 determines whether the instance is live 871 "MIGRATE" or "TERMINATE" when there is 872 a maintenance event. 873 874 Raises: 875 errors.GceOperationTimeoutError: Operation takes too long to finish. 876 """ 877 body = { 878 "automaticRestart": automatic_restart, 879 "onHostMaintenance": on_host_maintenance 880 } 881 api = self.service.instances().setScheduling( 882 project=self._project, zone=zone, instance=instance, body=body) 883 operation = self.Execute(api) 884 try: 885 self.WaitOnOperation( 886 operation=operation, 887 operation_scope=OperationScope.ZONE, 888 scope_name=zone) 889 except errors.GceOperationTimeoutError: 890 logger.error("Set instance scheduling failed: %s", instance) 891 raise 892 logger.info( 893 "Instance scheduling changed:\n" 894 " automaticRestart: %s\n" 895 " onHostMaintenance: %s\n", 896 str(automatic_restart).lower(), on_host_maintenance) 897 898 def ListInstances(self, instance_filter=None): 899 """List instances cross all zones. 900 901 Gcompute response instance. For example: 902 { 903 'items': 904 { 905 'zones/europe-west3-b': 906 { 907 'warning': 908 { 909 'message': "There are no results for scope 910 'zones/europe-west3-b' on this page.", 911 'code': 'NO_RESULTS_ON_PAGE', 912 'data': [{'value': u'zones/europe-west3-b', 913 'key': u'scope'}] 914 } 915 }, 916 'zones/asia-east1-b': 917 { 918 'instances': [ 919 { 920 'name': 'ins-bc212dc8-userbuild-aosp-cf-x86-64-phone' 921 'status': 'RUNNING', 922 'cpuPlatform': 'Intel Broadwell', 923 'startRestricted': False, 924 'labels': {u'created_by': u'herbertxue'}, 925 'name': 'ins-bc212dc8-userbuild-aosp-cf-x86-64-phone', 926 ... 927 }] 928 } 929 } 930 } 931 932 Args: 933 instance_filter: A string representing a filter in format of 934 FIELD_NAME COMPARISON_STRING LITERAL_STRING 935 e.g. "name ne example-instance" 936 e.g. "name eq "example-instance-[0-9]+"" 937 938 Returns: 939 A list of instances. 940 """ 941 # aggregatedList will only return 500 results max, so if there are more, 942 # we need to send in the next page token to get the next 500 (and so on 943 # and so forth. 944 get_more_instances = True 945 page_token = None 946 instances_list = [] 947 while get_more_instances: 948 api = self.service.instances().aggregatedList( 949 project=self._project, 950 filter=instance_filter, 951 pageToken=page_token) 952 response = self.Execute(api) 953 page_token = response.get("nextPageToken") 954 get_more_instances = page_token is not None 955 for instances_data in response["items"].values(): 956 if "instances" in instances_data: 957 for instance in instances_data.get("instances"): 958 instances_list.append(instance) 959 960 return instances_list 961 962 def SetSchedulingInstances(self, 963 instances, 964 zone, 965 automatic_restart=True, 966 on_host_maintenance="MIGRATE"): 967 """Update scheduling config |automatic_restart| and |on_host_maintenance|. 968 969 See //cloud/cluster/api/mixer_instances.proto Scheduling for config option. 970 971 Args: 972 instances: A list of string, representing instance names. 973 zone: A string, representing zone name. e.g. "us-central1-f". 974 automatic_restart: Boolean, determine whether the instance will 975 automatically restart if it crashes or not, 976 default to True. 977 on_host_maintenance: enum["MIGRATE", "TERMINATE"] 978 The instance's maintenance behavior, which 979 determines whether the instance is live 980 migrated or terminated when there is 981 a maintenance event. 982 983 Returns: 984 A tuple, (done, failed, error_msgs) 985 done: A list of string, representing the names of instances that 986 have been executed. 987 failed: A list of string, representing the names of instances that 988 we failed to execute. 989 error_msgs: A list of string, representing the failure messages. 990 """ 991 body = { 992 "automaticRestart": automatic_restart, 993 "OnHostMaintenance": on_host_maintenance 994 } 995 action = functools.partial( 996 self.service.instances().setScheduling, 997 project=self._project, 998 zone=zone, 999 body=body) 1000 return self._BatchExecuteOnInstances(instances, zone, action) 1001 1002 def _BatchExecuteOnInstances(self, instances, zone, action): 1003 """Batch processing operations requiring computing time. 1004 1005 Args: 1006 instances: A list of instance names. 1007 zone: A string, e.g. "us-central1-f". 1008 action: partial func, all kwargs for this gcloud action has been 1009 defined in the caller function (e.g. See "StartInstances") 1010 except 'instance' which will be defined by iterating the 1011 |instances|. 1012 1013 Returns: 1014 A tuple, (done, failed, error_msgs) 1015 done: A list of string, representing the names of instances that 1016 have been executed. 1017 failed: A list of string, representing the names of instances that 1018 we failed to execute. 1019 error_msgs: A list of string, representing the failure messages. 1020 """ 1021 if not instances: 1022 return [], [], [] 1023 # Batch send requests. 1024 logger.info("Batch executing instances: %s", instances) 1025 requests = {} 1026 for instance_name in set(instances): 1027 requests[instance_name] = action(instance=instance_name) 1028 return self._BatchExecuteAndWait( 1029 requests, operation_scope=OperationScope.ZONE, scope_name=zone) 1030 1031 def _BatchExecuteAndWait(self, requests, operation_scope, scope_name=None): 1032 """Batch processing requests and wait on the operation. 1033 1034 Args: 1035 requests: A dictionary. The key is a string representing the resource 1036 name. For example, an instance name, or an image name. 1037 operation_scope: A value from OperationScope, "zone", "region", 1038 or "global". 1039 scope_name: If operation_scope is "zone" or "region", this should be 1040 the name of the zone or region, e.g. "us-central1-f". 1041 Returns: 1042 A tuple, (done, failed, error_msgs) 1043 done: A list of string, representing the resource names that have 1044 been executed. 1045 failed: A list of string, representing resource names that 1046 we failed to execute. 1047 error_msgs: A list of string, representing the failure messages. 1048 """ 1049 results = self.BatchExecute(requests) 1050 # Initialize return values 1051 failed = [] 1052 error_msgs = [] 1053 for resource_name, (_, error) in six.iteritems(results): 1054 if error is not None: 1055 failed.append(resource_name) 1056 error_msgs.append(str(error)) 1057 done = [] 1058 # Wait for the executing operations to finish. 1059 logger.info("Waiting for executing operations") 1060 for resource_name in six.iterkeys(requests): 1061 operation, _ = results[resource_name] 1062 if operation: 1063 try: 1064 self.WaitOnOperation(operation, operation_scope, 1065 scope_name) 1066 done.append(resource_name) 1067 except errors.DriverError as exc: 1068 failed.append(resource_name) 1069 error_msgs.append(str(exc)) 1070 return done, failed, error_msgs 1071 1072 def ListZones(self): 1073 """List all zone instances in the project. 1074 1075 Returns: 1076 Gcompute response instance. For example: 1077 { 1078 "id": "projects/google.com%3Aandroid-build-staging/zones", 1079 "kind": "compute#zoneList", 1080 "selfLink": "https://www.googleapis.com/compute/v1/projects/" 1081 "google.com:android-build-staging/zones" 1082 "items": [ 1083 { 1084 'creationTimestamp': '2014-07-15T10:44:08.663-07:00', 1085 'description': 'asia-east1-c', 1086 'id': '2222', 1087 'kind': 'compute#zone', 1088 'name': 'asia-east1-c', 1089 'region': 'https://www.googleapis.com/compute/v1/projects/' 1090 'google.com:android-build-staging/regions/asia-east1', 1091 'selfLink': 'https://www.googleapis.com/compute/v1/projects/' 1092 'google.com:android-build-staging/zones/asia-east1-c', 1093 'status': 'UP' 1094 }, { 1095 'creationTimestamp': '2014-05-30T18:35:16.575-07:00', 1096 'description': 'asia-east1-b', 1097 'id': '2221', 1098 'kind': 'compute#zone', 1099 'name': 'asia-east1-b', 1100 'region': 'https://www.googleapis.com/compute/v1/projects/' 1101 'google.com:android-build-staging/regions/asia-east1', 1102 'selfLink': 'https://www.googleapis.com/compute/v1/projects' 1103 '/google.com:android-build-staging/zones/asia-east1-b', 1104 'status': 'UP' 1105 }] 1106 } 1107 See cloud cluster's api/mixer_zones.proto 1108 """ 1109 api = self.service.zones().list(project=self._project) 1110 return self.Execute(api) 1111 1112 def ListRegions(self): 1113 """List all the regions for a project. 1114 1115 Returns: 1116 A dictionary containing all the zones and additional data. See this link 1117 for the detailed response: 1118 https://cloud.google.com/compute/docs/reference/latest/regions/list. 1119 Example: 1120 { 1121 'items': [{ 1122 'name': 1123 'us-central1', 1124 'quotas': [{ 1125 'usage': 2.0, 1126 'limit': 24.0, 1127 'metric': 'CPUS' 1128 }, { 1129 'usage': 1.0, 1130 'limit': 23.0, 1131 'metric': 'IN_USE_ADDRESSES' 1132 }, { 1133 'usage': 209.0, 1134 'limit': 10240.0, 1135 'metric': 'DISKS_TOTAL_GB' 1136 }, { 1137 'usage': 1000.0, 1138 'limit': 20000.0, 1139 'metric': 'INSTANCES' 1140 }] 1141 },..] 1142 } 1143 """ 1144 api = self.service.regions().list(project=self._project) 1145 return self.Execute(api) 1146 1147 def _GetNetworkArgs(self, network, zone): 1148 """Helper to generate network args that is used to create an instance. 1149 1150 Args: 1151 network: A string, e.g. "default". 1152 zone: String, representing zone name, e.g. "us-central1-f" 1153 1154 Returns: 1155 A dictionary representing network args. 1156 """ 1157 network_args = { 1158 "network": self.GetNetworkUrl(network), 1159 "accessConfigs": [{ 1160 "name": "External NAT", 1161 "type": "ONE_TO_ONE_NAT" 1162 }] 1163 } 1164 # default network can be blank or set to default, we don't need to 1165 # specify the subnetwork for that. 1166 if network and network != "default": 1167 network_args["subnetwork"] = self.GetSubnetworkUrl(network, zone) 1168 return network_args 1169 1170 def _GetDiskArgs(self, 1171 disk_name, 1172 image_name, 1173 image_project=None, 1174 disk_size_gb=None): 1175 """Helper to generate disk args that is used to create an instance. 1176 1177 Args: 1178 disk_name: A string 1179 image_name: A string 1180 image_project: A string 1181 disk_size_gb: An integer 1182 1183 Returns: 1184 List holding dict of disk args. 1185 """ 1186 args = copy.deepcopy(BASE_DISK_ARGS) 1187 args["initializeParams"] = { 1188 "diskName": disk_name, 1189 "sourceImage": self.GetImage(image_name, 1190 image_project)["selfLink"], 1191 } 1192 # TODO: Remove this check once it's validated that we can either pass in 1193 # a None diskSizeGb or we find an appropriate default val. 1194 if disk_size_gb: 1195 args["diskSizeGb"] = disk_size_gb 1196 return [args] 1197 1198 def _GetExtraDiskArgs(self, extra_disk_name, zone): 1199 """Get extra disk arg for given disk. 1200 1201 Args: 1202 extra_disk_name: String, name of the disk. 1203 zone: String, representing zone name, e.g. "us-central1-f" 1204 1205 Returns: 1206 A dictionary of disk args. 1207 """ 1208 return [{ 1209 "type": "PERSISTENT", 1210 "mode": "READ_WRITE", 1211 "source": "projects/%s/zones/%s/disks/%s" % (self._project, zone, 1212 extra_disk_name), 1213 "autoDelete": True, 1214 "boot": False, 1215 "interface": "SCSI", 1216 "deviceName": extra_disk_name, 1217 }] 1218 1219 # pylint: disable=too-many-locals 1220 def CreateInstance(self, 1221 instance, 1222 image_name, 1223 machine_type, 1224 metadata, 1225 network, 1226 zone, 1227 disk_args=None, 1228 image_project=None, 1229 gpu=None, 1230 extra_disk_name=None, 1231 extra_scopes=None, 1232 tags=None): 1233 """Create a gce instance with a gce image. 1234 1235 Args: 1236 instance: String, instance name. 1237 image_name: String, source image used to create this disk. 1238 machine_type: String, representing machine_type, 1239 e.g. "n1-standard-1" 1240 metadata: Dict, maps a metadata name to its value. 1241 network: String, representing network name, e.g. "default" 1242 zone: String, representing zone name, e.g. "us-central1-f" 1243 disk_args: A list of extra disk args (strings), see _GetDiskArgs 1244 for example, if None, will create a disk using the given 1245 image. 1246 image_project: String, name of the project where the image 1247 belongs. Assume the default project if None. 1248 gpu: String, type of gpu to attach. e.g. "nvidia-tesla-k80", if 1249 None no gpus will be attached. For more details see: 1250 https://cloud.google.com/compute/docs/gpus/add-gpus 1251 extra_disk_name: String,the name of the extra disk to attach. 1252 extra_scopes: A list of extra scopes to be provided to the instance. 1253 tags: A list of tags to associate with the instance. e.g. 1254 ["http-server", "https-server"] 1255 """ 1256 disk_args = (disk_args 1257 or self._GetDiskArgs(instance, image_name, image_project)) 1258 if extra_disk_name: 1259 disk_args.extend(self._GetExtraDiskArgs(extra_disk_name, zone)) 1260 1261 scopes = [] 1262 scopes.extend(self.DEFAULT_INSTANCE_SCOPE) 1263 if extra_scopes: 1264 scopes.extend(extra_scopes) 1265 1266 # Add labels for giving the instances ability to be filter for 1267 # acloud list/delete cmds. 1268 body = { 1269 "machineType": self.GetMachineType(machine_type, zone)["selfLink"], 1270 "name": instance, 1271 "networkInterfaces": [self._GetNetworkArgs(network, zone)], 1272 "disks": disk_args, 1273 "labels": {constants.LABEL_CREATE_BY: getpass.getuser()}, 1274 "serviceAccounts": [{ 1275 "email": "default", 1276 "scopes": scopes, 1277 }], 1278 "enableVtpm": True, 1279 } 1280 1281 if tags: 1282 body["tags"] = {"items": tags} 1283 if gpu: 1284 body["guestAccelerators"] = [{ 1285 "acceleratorType": self.GetAcceleratorUrl(gpu, zone), 1286 "acceleratorCount": 1 1287 }] 1288 # Instances with GPUs cannot live migrate because they are assigned 1289 # to specific hardware devices. 1290 body["scheduling"] = {"onHostMaintenance": "terminate"} 1291 if metadata: 1292 metadata_list = [{ 1293 _METADATA_KEY: key, 1294 _METADATA_KEY_VALUE: val 1295 } for key, val in six.iteritems(metadata)] 1296 body[_METADATA] = {_ITEMS: metadata_list} 1297 logger.info("Creating instance: project %s, zone %s, body:%s", 1298 self._project, zone, body) 1299 api = self.service.instances().insert( 1300 project=self._project, zone=zone, body=body) 1301 operation = self.Execute(api) 1302 self.WaitOnOperation( 1303 operation, operation_scope=OperationScope.ZONE, scope_name=zone) 1304 logger.info("Instance %s has been created.", instance) 1305 1306 def DeleteInstance(self, instance, zone): 1307 """Delete a gce instance. 1308 1309 Args: 1310 instance: A string, instance name. 1311 zone: A string, e.g. "us-central1-f" 1312 """ 1313 logger.info("Deleting instance: %s", instance) 1314 api = self.service.instances().delete( 1315 project=self._project, zone=zone, instance=instance) 1316 operation = self.Execute(api) 1317 self.WaitOnOperation( 1318 operation, operation_scope=OperationScope.ZONE, scope_name=zone) 1319 logger.info("Deleted instance: %s", instance) 1320 1321 def DeleteInstances(self, instances, zone): 1322 """Delete multiple instances. 1323 1324 Args: 1325 instances: A list of instance names. 1326 zone: A string, e.g. "us-central1-f". 1327 1328 Returns: 1329 A tuple, (deleted, failed, error_msgs) 1330 deleted: A list of names of instances that have been deleted. 1331 failed: A list of names of instances that we fail to delete. 1332 error_msgs: A list of failure messages. 1333 """ 1334 action = functools.partial( 1335 self.service.instances().delete, project=self._project, zone=zone) 1336 return self._BatchExecuteOnInstances(instances, zone, action) 1337 1338 def ResetInstance(self, instance, zone): 1339 """Reset the gce instance. 1340 1341 Args: 1342 instance: A string, instance name. 1343 zone: A string, e.g. "us-central1-f". 1344 """ 1345 logger.info("Resetting instance: %s", instance) 1346 api = self.service.instances().reset( 1347 project=self._project, zone=zone, instance=instance) 1348 operation = self.Execute(api) 1349 self.WaitOnOperation( 1350 operation, operation_scope=OperationScope.ZONE, scope_name=zone) 1351 logger.info("Instance has been reset: %s", instance) 1352 1353 def GetMachineType(self, machine_type, zone): 1354 """Get URL for a given machine typle. 1355 1356 Args: 1357 machine_type: A string, name of the machine type. 1358 zone: A string, e.g. "us-central1-f" 1359 1360 Returns: 1361 A machine type resource in json. 1362 https://cloud.google.com/compute/docs/reference/latest/ 1363 machineTypes#resource 1364 """ 1365 api = self.service.machineTypes().get( 1366 project=self._project, zone=zone, machineType=machine_type) 1367 return self.Execute(api) 1368 1369 def GetAcceleratorUrl(self, accelerator_type, zone): 1370 """Get URL for a given type of accelator. 1371 1372 Args: 1373 accelerator_type: A string, representing the accelerator, e.g 1374 "nvidia-tesla-k80" 1375 zone: A string representing a zone, e.g. "us-west1-b" 1376 1377 Returns: 1378 A URL that points to the accelerator resource, e.g. 1379 https://www.googleapis.com/compute/v1/projects/<project id>/zones/ 1380 us-west1-b/acceleratorTypes/nvidia-tesla-k80 1381 """ 1382 api = self.service.acceleratorTypes().get( 1383 project=self._project, zone=zone, acceleratorType=accelerator_type) 1384 result = self.Execute(api) 1385 return result["selfLink"] 1386 1387 def GetNetworkUrl(self, network): 1388 """Get URL for a given network. 1389 1390 Args: 1391 network: A string, representing network name, e.g "default" 1392 1393 Returns: 1394 A URL that points to the network resource, e.g. 1395 https://www.googleapis.com/compute/v1/projects/<project id>/ 1396 global/networks/default 1397 """ 1398 api = self.service.networks().get( 1399 project=self._project, network=network) 1400 result = self.Execute(api) 1401 return result["selfLink"] 1402 1403 def GetSubnetworkUrl(self, network, zone): 1404 """Get URL for a given network and zone. 1405 1406 Return the subnetwork for the network in the specified region that the 1407 specified zone resides in. If there is no subnetwork for the specified 1408 zone, raise an exception. 1409 1410 Args: 1411 network: A string, representing network name, e.g "default" 1412 zone: String, representing zone name, e.g. "us-central1-f" 1413 1414 Returns: 1415 A URL that points to the network resource, e.g. 1416 https://www.googleapis.com/compute/v1/projects/<project id>/ 1417 global/networks/default 1418 1419 Raises: 1420 errors.NoSubnetwork: When no subnetwork exists for the zone 1421 specified. 1422 """ 1423 api = self.service.networks().get( 1424 project=self._project, network=network) 1425 result = self.Execute(api) 1426 region = zone.rsplit("-", 1)[0] 1427 for subnetwork in result.get("subnetworks", []): 1428 if region in subnetwork: 1429 return subnetwork 1430 raise errors.NoSubnetwork("No subnetwork for network %s in region %s" % 1431 (network, region)) 1432 1433 def CompareMachineSize(self, machine_type_1, machine_type_2, zone): 1434 """Compare the size of two machine types. 1435 1436 Args: 1437 machine_type_1: A string representing a machine type, e.g. n1-standard-1 1438 machine_type_2: A string representing a machine type, e.g. n1-standard-1 1439 zone: A string representing a zone, e.g. "us-central1-f" 1440 1441 Returns: 1442 -1 if any metric of machine size of the first type is smaller than 1443 the second type. 1444 0 if all metrics of machine size are equal. 1445 1 if at least one metric of machine size of the first type is 1446 greater than the second type and all metrics of first type are 1447 greater or equal to the second type. 1448 1449 Raises: 1450 errors.DriverError: For malformed response. 1451 """ 1452 machine_info_1 = self.GetMachineType(machine_type_1, zone) 1453 machine_info_2 = self.GetMachineType(machine_type_2, zone) 1454 result = 0 1455 for metric in self.MACHINE_SIZE_METRICS: 1456 if metric not in machine_info_1 or metric not in machine_info_2: 1457 raise errors.DriverError( 1458 "Malformed machine size record: Can't find '%s' in %s or %s" 1459 % (metric, machine_info_1, machine_info_2)) 1460 cmp_result = machine_info_1[metric] - machine_info_2[metric] 1461 if cmp_result < 0: 1462 return -1 1463 if cmp_result > 0: 1464 result = 1 1465 return result 1466 1467 def GetSerialPortOutput(self, instance, zone, port=1): 1468 """Get serial port output. 1469 1470 Args: 1471 instance: string, instance name. 1472 zone: string, zone name. 1473 port: int, which COM port to read from, 1-4, default to 1. 1474 1475 Returns: 1476 String, contents of the output. 1477 1478 Raises: 1479 errors.DriverError: For malformed response. 1480 """ 1481 api = self.service.instances().getSerialPortOutput( 1482 project=self._project, zone=zone, instance=instance, port=port) 1483 result = self.Execute(api) 1484 if "contents" not in result: 1485 raise errors.DriverError( 1486 "Malformed response for GetSerialPortOutput: %s" % result) 1487 return result["contents"] 1488 1489 def GetInstanceNamesByIPs(self, ips): 1490 """Get Instance names by IPs. 1491 1492 This function will go through all instances, which 1493 could be slow if there are too many instances. However, currently 1494 GCE doesn't support search for instance by IP. 1495 1496 Args: 1497 ips: A set of IPs. 1498 1499 Returns: 1500 A dictionary where key is IP and value is instance name or None 1501 if instance is not found for the given IP. 1502 """ 1503 ip_name_map = dict.fromkeys(ips) 1504 for instance in self.ListInstances(): 1505 try: 1506 ip = instance["networkInterfaces"][0]["accessConfigs"][0][ 1507 "natIP"] 1508 if ip in ips: 1509 ip_name_map[ip] = instance["name"] 1510 except (IndexError, KeyError) as e: 1511 logger.error("Could not get instance names by ips: %s", str(e)) 1512 return ip_name_map 1513 1514 def GetInstanceIP(self, instance, zone): 1515 """Get Instance IP given instance name. 1516 1517 Args: 1518 instance: String, representing instance name. 1519 zone: String, name of the zone. 1520 1521 Returns: 1522 ssh.IP object, that stores internal and external ip of the instance. 1523 """ 1524 instance = self.GetInstance(instance, zone) 1525 internal_ip = instance["networkInterfaces"][0]["networkIP"] 1526 external_ip = instance["networkInterfaces"][0]["accessConfigs"][0]["natIP"] 1527 return IP(internal=internal_ip, external=external_ip) 1528 1529 @utils.TimeExecute(function_description="Updating instance metadata: ") 1530 def SetInstanceMetadata(self, zone, instance, body): 1531 """Set instance metadata. 1532 1533 Args: 1534 zone: String, name of zone. 1535 instance: String, representing instance name. 1536 body: Dict, Metadata body. 1537 metdata is in the following format. 1538 { 1539 "kind": "compute#metadata", 1540 "fingerprint": "a-23icsyx4E=", 1541 "items": [ 1542 { 1543 "key": "sshKeys", 1544 "value": "key" 1545 }, ... 1546 ] 1547 } 1548 """ 1549 api = self.service.instances().setMetadata( 1550 project=self._project, zone=zone, instance=instance, body=body) 1551 operation = self.Execute(api) 1552 self.WaitOnOperation( 1553 operation, operation_scope=OperationScope.ZONE, scope_name=zone) 1554 1555 def AddSshRsaInstanceMetadata(self, user, ssh_rsa_path, instance): 1556 """Add the public rsa key to the instance's metadata. 1557 1558 Confirm that the instance has this public key in the instance's 1559 metadata, if not we will add this public key. 1560 1561 Args: 1562 user: String, name of the user which the key belongs to. 1563 ssh_rsa_path: String, The absolute path to public rsa key. 1564 instance: String, representing instance name. 1565 """ 1566 ssh_rsa_path = os.path.expanduser(ssh_rsa_path) 1567 rsa = GetRsaKey(ssh_rsa_path) 1568 entry = "%s:%s" % (user, rsa) 1569 logger.debug("New RSA entry: %s", entry) 1570 1571 zone = self.GetZoneByInstance(instance) 1572 gce_instance = self.GetInstance(instance, zone) 1573 metadata = gce_instance.get(_METADATA) 1574 if RsaNotInMetadata(metadata, entry): 1575 self.UpdateRsaInMetadata(zone, instance, metadata, entry) 1576 1577 def GetZoneByInstance(self, instance): 1578 """Get the zone from instance name. 1579 1580 Gcompute response instance. For example: 1581 { 1582 'items': 1583 { 1584 'zones/europe-west3-b': 1585 { 1586 'warning': 1587 { 1588 'message': "There are no results for scope 1589 'zones/europe-west3-b' on this page.", 1590 'code': 'NO_RESULTS_ON_PAGE', 1591 'data': [{'value': u'zones/europe-west3-b', 1592 'key': u'scope'}] 1593 } 1594 }, 1595 'zones/asia-east1-b': 1596 { 1597 'instances': [ 1598 { 1599 'name': 'ins-bc212dc8-userbuild-aosp-cf-x86-64-phone' 1600 'status': 'RUNNING', 1601 'cpuPlatform': 'Intel Broadwell', 1602 'startRestricted': False, 1603 'labels': {u'created_by': u'herbertxue'}, 1604 'name': 'ins-bc212dc8-userbuild-aosp-cf-x86-64-phone', 1605 ... 1606 }] 1607 } 1608 } 1609 } 1610 1611 Args: 1612 instance: String, representing instance name. 1613 1614 Raises: 1615 errors.GetGceZoneError: Can't get zone from instance name. 1616 1617 Returns: 1618 String of zone name. 1619 """ 1620 api = self.service.instances().aggregatedList( 1621 project=self._project, 1622 filter="name=%s" % instance) 1623 response = self.Execute(api) 1624 for zone, instance_data in response["items"].items(): 1625 if "instances" in instance_data: 1626 zone_match = _ZONE_RE.match(zone) 1627 if zone_match: 1628 return zone_match.group("zone") 1629 raise errors.GetGceZoneError("Can't get zone from the instance name %s" 1630 % instance) 1631 1632 def GetZonesByInstances(self, instances): 1633 """Get the zone from instance name. 1634 1635 Args: 1636 instances: List of strings, representing instance names. 1637 1638 Returns: 1639 A dictionary that contains the name of all instances in the zone. 1640 The key is the name of the zone, and the value is a list contains 1641 the name of the instances. 1642 """ 1643 zone_instances = {} 1644 for instance in instances: 1645 zone = self.GetZoneByInstance(instance) 1646 if zone in zone_instances: 1647 zone_instances[zone].append(instance) 1648 else: 1649 zone_instances[zone] = [instance] 1650 return zone_instances 1651 1652 def CheckAccess(self): 1653 """Check if the user has read access to the cloud project. 1654 1655 Returns: 1656 True if the user has at least read access to the project. 1657 False otherwise. 1658 1659 Raises: 1660 errors.HttpError if other unexpected error happens when 1661 accessing the project. 1662 """ 1663 api = self.service.zones().list(project=self._project) 1664 retry_http_codes = copy.copy(self.RETRY_HTTP_CODES) 1665 retry_http_codes.remove(self.ACCESS_DENIED_CODE) 1666 try: 1667 self.Execute(api, retry_http_codes=retry_http_codes) 1668 except errors.HttpError as e: 1669 if e.code == self.ACCESS_DENIED_CODE: 1670 return False 1671 raise 1672 return True 1673 1674 def UpdateRsaInMetadata(self, zone, instance, metadata, entry): 1675 """Update ssh public key to sshKeys's value in this metadata. 1676 1677 Args: 1678 zone: String, name of zone. 1679 instance: String, representing instance name. 1680 metadata: Dict, maps a metadata name to its value. 1681 entry: String, ssh public key. 1682 """ 1683 ssh_key_item = GetSshKeyFromMetadata(metadata) 1684 if ssh_key_item: 1685 # The ssh key exists in the metadata so update the reference to it 1686 # in the metadata. There may not be an actual ssh key value so 1687 # that's why we filter for None to avoid an empty line in front. 1688 ssh_key_item[_METADATA_KEY_VALUE] = "\n".join( 1689 list(filter(None, [ssh_key_item[_METADATA_KEY_VALUE], entry]))) 1690 else: 1691 # Since there is no ssh key item in the metadata, we need to add it in. 1692 ssh_key_item = {_METADATA_KEY: _SSH_KEYS_NAME, 1693 _METADATA_KEY_VALUE: entry} 1694 metadata[_ITEMS].append(ssh_key_item) 1695 utils.PrintColorString( 1696 "Ssh public key doesn't exist in the instance(%s), adding it." 1697 % instance, utils.TextColors.WARNING) 1698 self.SetInstanceMetadata(zone, instance, metadata) 1699 1700 1701def RsaNotInMetadata(metadata, entry): 1702 """Check ssh public key exist in sshKeys's value. 1703 1704 Args: 1705 metadata: Dict, maps a metadata name to its value. 1706 entry: String, ssh public key. 1707 1708 Returns: 1709 Boolean. True if ssh public key doesn't exist in metadata. 1710 """ 1711 for item in metadata.setdefault(_ITEMS, []): 1712 if item[_METADATA_KEY] == _SSH_KEYS_NAME: 1713 if entry in item[_METADATA_KEY_VALUE]: 1714 return False 1715 return True 1716 1717 1718def GetSshKeyFromMetadata(metadata): 1719 """Get ssh key item from metadata. 1720 1721 Args: 1722 metadata: Dict, maps a metadata name to its value. 1723 1724 Returns: 1725 Dict of ssk_key_item in metadata, None if can't find the ssh key item 1726 in metadata. 1727 """ 1728 for item in metadata.setdefault(_ITEMS, []): 1729 if item.get(_METADATA_KEY, '') == _SSH_KEYS_NAME: 1730 return item 1731 return None 1732 1733 1734def GetRsaKey(ssh_rsa_path): 1735 """Get rsa key from rsa path. 1736 1737 Args: 1738 ssh_rsa_path: String, The absolute path to public rsa key. 1739 1740 Returns: 1741 String, rsa key. 1742 1743 Raises: 1744 errors.DriverError: RSA file does not exist. 1745 """ 1746 ssh_rsa_path = os.path.expanduser(ssh_rsa_path) 1747 if not os.path.exists(ssh_rsa_path): 1748 raise errors.DriverError( 1749 "RSA file %s does not exist." % ssh_rsa_path) 1750 1751 with open(ssh_rsa_path) as f: 1752 rsa = f.read() 1753 # The space must be removed here for string processing, 1754 # if it is not string, it doesn't have a strip function. 1755 rsa = rsa.strip() if rsa else rsa 1756 utils.VerifyRsaPubKey(rsa) 1757 return rsa 1758