• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python
2#
3# Copyright 2016 - The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#     http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16"""A client that manages Google Compute Engine.
17
18** ComputeClient **
19
20ComputeClient is a wrapper around Google Compute Engine APIs.
21It provides a set of methods for managing a google compute engine project,
22such as creating images, creating instances, etc.
23
24Design philosophy: We tried to make ComputeClient as stateless as possible,
25and it only keeps states about authentication. ComputeClient should be very
26generic, and only knows how to talk to Compute Engine APIs.
27"""
28# pylint: disable=too-many-lines
29import copy
30import functools
31import getpass
32import logging
33import os
34import re
35
36import six
37
38from acloud import errors
39from acloud.internal import constants
40from acloud.internal.lib import base_cloud_client
41from acloud.internal.lib import utils
42from acloud.internal.lib.ssh import IP
43
44
45logger = logging.getLogger(__name__)
46
47_MAX_RETRIES_ON_FINGERPRINT_CONFLICT = 10
48_METADATA_KEY = "key"
49_METADATA_KEY_VALUE = "value"
50_SSH_KEYS_NAME = "sshKeys"
51_ITEMS = "items"
52_METADATA = "metadata"
53_ZONE_RE = re.compile(r"^zones/(?P<zone>.+)")
54# Quota metrics
55_METRIC_CPUS = "CPUS"
56_METRIC_DISKS_GB = "DISKS_TOTAL_GB"
57_METRIC_USE_ADDRESSES = "IN_USE_ADDRESSES"
58_METRICS = [_METRIC_CPUS, _METRIC_DISKS_GB, _METRIC_USE_ADDRESSES]
59_USAGE = "usage"
60_LIMIT = "limit"
61# The minimum requirement to create an instance.
62_REQUIRE_METRICS = {_METRIC_CPUS: 8,
63                    _METRIC_DISKS_GB: 1000,
64                    _METRIC_USE_ADDRESSES: 1}
65
66BASE_DISK_ARGS = {
67    "type": "PERSISTENT",
68    "boot": True,
69    "mode": "READ_WRITE",
70    "autoDelete": True,
71    "initializeParams": {},
72}
73
74
75class OperationScope(object):
76    """Represents operation scope enum."""
77    ZONE = "zone"
78    REGION = "region"
79    GLOBAL = "global"
80
81
82class PersistentDiskType(object):
83    """Represents different persistent disk types.
84
85    pd-standard for regular hard disk.
86    pd-ssd for solid state disk.
87    """
88    STANDARD = "pd-standard"
89    SSD = "pd-ssd"
90
91
92class ImageStatus(object):
93    """Represents the status of an image."""
94    PENDING = "PENDING"
95    READY = "READY"
96    FAILED = "FAILED"
97
98
99def _IsFingerPrintError(exc):
100    """Determine if the exception is a HTTP error with code 412.
101
102    Args:
103        exc: Exception instance.
104
105    Returns:
106        Boolean. True if the exception is a "Precondition Failed" error.
107    """
108    return isinstance(exc, errors.HttpError) and exc.code == 412
109
110
111# pylint: disable=too-many-public-methods
112class ComputeClient(base_cloud_client.BaseCloudApiClient):
113    """Client that manages GCE."""
114
115    # API settings, used by BaseCloudApiClient.
116    API_NAME = "compute"
117    API_VERSION = "v1"
118    SCOPE = " ".join([
119        "https://www.googleapis.com/auth/compute",
120        "https://www.googleapis.com/auth/devstorage.read_write"
121    ])
122    # Default settings for gce operations
123    DEFAULT_INSTANCE_SCOPE = [
124        "https://www.googleapis.com/auth/androidbuild.internal",
125        "https://www.googleapis.com/auth/devstorage.read_only",
126        "https://www.googleapis.com/auth/logging.write"
127    ]
128    OPERATION_TIMEOUT_SECS = 30 * 60  # 30 mins
129    OPERATION_POLL_INTERVAL_SECS = 20
130    MACHINE_SIZE_METRICS = ["guestCpus", "memoryMb"]
131    ACCESS_DENIED_CODE = 403
132
133    def __init__(self, acloud_config, oauth2_credentials):
134        """Initialize.
135
136        Args:
137            acloud_config: An AcloudConfig object.
138            oauth2_credentials: An oauth2client.OAuth2Credentials instance.
139        """
140        super(ComputeClient, self).__init__(oauth2_credentials)
141        self._project = acloud_config.project
142
143    def _GetOperationStatus(self, operation, operation_scope, scope_name=None):
144        """Get status of an operation.
145
146        Args:
147            operation: An Operation resource in the format of json.
148            operation_scope: A value from OperationScope, "zone", "region",
149                             or "global".
150            scope_name: If operation_scope is "zone" or "region", this should be
151                        the name of the zone or region, e.g. "us-central1-f".
152
153        Returns:
154            Status of the operation, one of "DONE", "PENDING", "RUNNING".
155
156        Raises:
157            errors.DriverError: if the operation fails.
158        """
159        operation_name = operation["name"]
160        if operation_scope == OperationScope.GLOBAL:
161            api = self.service.globalOperations().get(
162                project=self._project, operation=operation_name)
163            result = self.Execute(api)
164        elif operation_scope == OperationScope.ZONE:
165            api = self.service.zoneOperations().get(
166                project=self._project,
167                operation=operation_name,
168                zone=scope_name)
169            result = self.Execute(api)
170        elif operation_scope == OperationScope.REGION:
171            api = self.service.regionOperations().get(
172                project=self._project,
173                operation=operation_name,
174                region=scope_name)
175            result = self.Execute(api)
176
177        if result.get("error"):
178            errors_list = result["error"]["errors"]
179            raise errors.DriverError(
180                "Get operation state failed, errors: %s" % str(errors_list))
181        return result["status"]
182
183    def WaitOnOperation(self, operation, operation_scope, scope_name=None):
184        """Wait for an operation to finish.
185
186        Args:
187            operation: An Operation resource in the format of json.
188            operation_scope: A value from OperationScope, "zone", "region",
189                             or "global".
190            scope_name: If operation_scope is "zone" or "region", this should be
191                        the name of the zone or region, e.g. "us-central1-f".
192        """
193        timeout_exception = errors.GceOperationTimeoutError(
194            "Operation hits timeout, did not complete within %d secs." %
195            self.OPERATION_TIMEOUT_SECS)
196        utils.PollAndWait(
197            func=self._GetOperationStatus,
198            expected_return="DONE",
199            timeout_exception=timeout_exception,
200            timeout_secs=self.OPERATION_TIMEOUT_SECS,
201            sleep_interval_secs=self.OPERATION_POLL_INTERVAL_SECS,
202            operation=operation,
203            operation_scope=operation_scope,
204            scope_name=scope_name)
205
206    def GetProject(self):
207        """Get project information.
208
209        Returns:
210            A project resource in json.
211        """
212        api = self.service.projects().get(project=self._project)
213        return self.Execute(api)
214
215    def GetRegionInfo(self):
216        """Get region information that includes all quotas limit.
217
218        The region info example:
219        {"items":
220            [{"status": "UP",
221              "name": "asia-east1",
222              "quotas":
223                [{"usage": 92, "metric": "CPUS", "limit": 100},
224                 {"usage": 640, "metric": "DISKS_TOTAL_GB", "limit": 10240},
225              ...]]}
226        }
227
228        Returns:
229            A region resource in json.
230        """
231        api = self.service.regions().list(project=self._project)
232        return self.Execute(api)
233
234    @staticmethod
235    def GetMetricQuota(regions_info, zone, metric):
236        """Get CPU quota limit in specific zone and project.
237
238        Args:
239            regions_info: Dict, regions resource in json.
240            zone: String, name of zone.
241            metric: String, name of metric, e.g. "CPUS".
242
243        Returns:
244            A dict of quota information. Such as
245            {"usage": 100, "metric": "CPUS", "limit": 200}
246        """
247        for region_info in regions_info["items"]:
248            if region_info["name"] in zone:
249                for quota in region_info["quotas"]:
250                    if quota["metric"] == metric:
251                        return quota
252        logger.info("Can't get %s quota info from zone(%s)", metric, zone)
253        return None
254
255    def EnoughMetricsInZone(self, zone):
256        """Check the zone have enough metrics to create instance.
257
258        The metrics include CPUS and DISKS.
259
260        Args:
261            zone: String, name of zone.
262
263        Returns:
264            Boolean. True if zone have enough quota.
265        """
266        regions_info = self.GetRegionInfo()
267        for metric in _METRICS:
268            quota = self.GetMetricQuota(regions_info, zone, metric)
269            if not quota:
270                logger.debug(
271                    "Can't query the metric(%s) in zone(%s)", metric, zone)
272                return False
273            if quota[_LIMIT] - quota[_USAGE] < _REQUIRE_METRICS[metric]:
274                logger.debug(
275                    "The metric(%s) is over limit in zone(%s)", metric, zone)
276                return False
277        return True
278
279    def GetDisk(self, disk_name, zone):
280        """Get disk information.
281
282        Args:
283          disk_name: A string.
284          zone: String, name of zone.
285
286        Returns:
287          An disk resource in json.
288          https://cloud.google.com/compute/docs/reference/latest/disks#resource
289        """
290        api = self.service.disks().get(
291            project=self._project, zone=zone, disk=disk_name)
292        return self.Execute(api)
293
294    def CheckDiskExists(self, disk_name, zone):
295        """Check if disk exists.
296
297        Args:
298          disk_name: A string
299          zone: String, name of zone.
300
301        Returns:
302          True if disk exists, otherwise False.
303        """
304        try:
305            self.GetDisk(disk_name, zone)
306            exists = True
307        except errors.ResourceNotFoundError:
308            exists = False
309        logger.debug("CheckDiskExists: disk_name: %s, result: %s", disk_name,
310                     exists)
311        return exists
312
313    def CreateDisk(self,
314                   disk_name,
315                   source_image,
316                   size_gb,
317                   zone,
318                   source_project=None,
319                   disk_type=PersistentDiskType.STANDARD):
320        """Create a gce disk.
321
322        Args:
323            disk_name: String
324            source_image: String, name of the image.
325            size_gb: Integer, size in gb.
326            zone: String, name of the zone, e.g. us-central1-b.
327            source_project: String, required if the image is located in a different
328                            project.
329            disk_type: String, a value from PersistentDiskType, STANDARD
330                       for regular hard disk or SSD for solid state disk.
331        """
332        source_project = source_project or self._project
333        source_image = "projects/%s/global/images/%s" % (
334            source_project, source_image) if source_image else None
335        logger.info("Creating disk %s, size_gb: %d, source_image: %s",
336                    disk_name, size_gb, str(source_image))
337        body = {
338            "name": disk_name,
339            "sizeGb": size_gb,
340            "type": "projects/%s/zones/%s/diskTypes/%s" % (self._project, zone,
341                                                           disk_type),
342        }
343        api = self.service.disks().insert(
344            project=self._project,
345            sourceImage=source_image,
346            zone=zone,
347            body=body)
348        operation = self.Execute(api)
349        try:
350            self.WaitOnOperation(
351                operation=operation,
352                operation_scope=OperationScope.ZONE,
353                scope_name=zone)
354        except errors.DriverError:
355            logger.error("Creating disk failed, cleaning up: %s", disk_name)
356            if self.CheckDiskExists(disk_name, zone):
357                self.DeleteDisk(disk_name, zone)
358            raise
359        logger.info("Disk %s has been created.", disk_name)
360
361    def DeleteDisk(self, disk_name, zone):
362        """Delete a gce disk.
363
364        Args:
365            disk_name: A string, name of disk.
366            zone: A string, name of zone.
367        """
368        logger.info("Deleting disk %s", disk_name)
369        api = self.service.disks().delete(
370            project=self._project, zone=zone, disk=disk_name)
371        operation = self.Execute(api)
372        self.WaitOnOperation(
373            operation=operation,
374            operation_scope=OperationScope.ZONE,
375            scope_name=zone)
376        logger.info("Deleted disk %s", disk_name)
377
378    def DeleteDisks(self, disk_names, zone):
379        """Delete multiple disks.
380
381        Args:
382            disk_names: A list of disk names.
383            zone: A string, name of zone.
384
385        Returns:
386            A tuple, (deleted, failed, error_msgs)
387            deleted: A list of names of disks that have been deleted.
388            failed: A list of names of disks that we fail to delete.
389            error_msgs: A list of failure messages.
390        """
391        if not disk_names:
392            logger.warning("Nothing to delete. Arg disk_names is not provided.")
393            return [], [], []
394        # Batch send deletion requests.
395        logger.info("Deleting disks: %s", disk_names)
396        delete_requests = {}
397        for disk_name in set(disk_names):
398            request = self.service.disks().delete(
399                project=self._project, disk=disk_name, zone=zone)
400            delete_requests[disk_name] = request
401        return self._BatchExecuteAndWait(
402            delete_requests, OperationScope.ZONE, scope_name=zone)
403
404    def ListDisks(self, zone, disk_filter=None):
405        """List disks.
406
407        Args:
408            zone: A string, representing zone name. e.g. "us-central1-f"
409            disk_filter: A string representing a filter in format of
410                             FIELD_NAME COMPARISON_STRING LITERAL_STRING
411                             e.g. "name ne example-instance"
412                             e.g. "name eq "example-instance-[0-9]+""
413
414        Returns:
415            A list of disks.
416        """
417        return self.ListWithMultiPages(
418            api_resource=self.service.disks().list,
419            project=self._project,
420            zone=zone,
421            filter=disk_filter)
422
423    def CreateImage(self,
424                    image_name,
425                    source_uri=None,
426                    source_disk=None,
427                    labels=None):
428        """Create a Gce image.
429
430        Args:
431            image_name: String, name of image
432            source_uri: Full Google Cloud Storage URL where the disk image is
433                        stored.  e.g. "https://storage.googleapis.com/my-bucket/
434                        avd-system-2243663.tar.gz"
435            source_disk: String, this should be the disk's selfLink value
436                         (including zone and project), rather than the disk_name
437                         e.g. https://www.googleapis.com/compute/v1/projects/
438                              google.com:android-builds-project/zones/
439                              us-east1-d/disks/<disk_name>
440            labels: Dict, will be added to the image's labels.
441
442        Raises:
443            errors.DriverError: For malformed request or response.
444            errors.GceOperationTimeoutError: Operation takes too long to finish.
445        """
446        if self.CheckImageExists(image_name):
447            return
448        if (source_uri and source_disk) or (not source_uri
449                                            and not source_disk):
450            raise errors.DriverError(
451                "Creating image %s requires either source_uri %s or "
452                "source_disk %s but not both" % (image_name, source_uri,
453                                                 source_disk))
454        elif source_uri:
455            logger.info("Creating image %s, source_uri %s", image_name,
456                        source_uri)
457            body = {
458                "name": image_name,
459                "rawDisk": {
460                    "source": source_uri,
461                },
462            }
463        else:
464            logger.info("Creating image %s, source_disk %s", image_name,
465                        source_disk)
466            body = {
467                "name": image_name,
468                "sourceDisk": source_disk,
469            }
470        if labels is not None:
471            body["labels"] = labels
472        api = self.service.images().insert(project=self._project, body=body)
473        operation = self.Execute(api)
474        try:
475            self.WaitOnOperation(
476                operation=operation, operation_scope=OperationScope.GLOBAL)
477        except errors.DriverError:
478            logger.error("Creating image failed, cleaning up: %s", image_name)
479            if self.CheckImageExists(image_name):
480                self.DeleteImage(image_name)
481            raise
482        logger.info("Image %s has been created.", image_name)
483
484    @utils.RetryOnException(_IsFingerPrintError,
485                            _MAX_RETRIES_ON_FINGERPRINT_CONFLICT)
486    def SetImageLabels(self, image_name, new_labels):
487        """Update image's labels. Retry for finger print conflict.
488
489        Note: Decorator RetryOnException will retry the call for FingerPrint
490          conflict (HTTP error code 412). The fingerprint is used to detect
491          conflicts of GCE resource updates. The fingerprint is initially generated
492          by Compute Engine and changes after every request to modify or update
493          resources (e.g. GCE "image" resource has "fingerPrint" for "labels"
494          updates).
495
496        Args:
497            image_name: A string, the image name.
498            new_labels: Dict, will be added to the image's labels.
499
500        Returns:
501            A GlobalOperation resouce.
502            https://cloud.google.com/compute/docs/reference/latest/globalOperations
503        """
504        image = self.GetImage(image_name)
505        labels = image.get("labels", {})
506        labels.update(new_labels)
507        body = {
508            "labels": labels,
509            "labelFingerprint": image["labelFingerprint"]
510        }
511        api = self.service.images().setLabels(
512            project=self._project, resource=image_name, body=body)
513        return self.Execute(api)
514
515    def CheckImageExists(self, image_name):
516        """Check if image exists.
517
518        Args:
519            image_name: A string
520
521        Returns:
522            True if image exists, otherwise False.
523        """
524        try:
525            self.GetImage(image_name)
526            exists = True
527        except errors.ResourceNotFoundError:
528            exists = False
529        logger.debug("CheckImageExists: image_name: %s, result: %s",
530                     image_name, exists)
531        return exists
532
533    def GetImage(self, image_name, image_project=None):
534        """Get image information.
535
536        Args:
537            image_name: A string
538            image_project: A string
539
540        Returns:
541            An image resource in json.
542            https://cloud.google.com/compute/docs/reference/latest/images#resource
543        """
544        api = self.service.images().get(
545            project=image_project or self._project, image=image_name)
546        return self.Execute(api)
547
548    def GetImageFromFamily(self, image_family, image_project=None):
549        """Get image information from image_family.
550
551        Args:
552            image_family: String of image family.
553            image_project: String of image project.
554
555        Returns:
556            An image resource in json.
557            https://cloud.google.com/compute/docs/reference/latest/images#resource
558        """
559        api = self.service.images().getFromFamily(
560            project=image_project or self._project, family=image_family)
561        return self.Execute(api)
562
563    def DeleteImage(self, image_name):
564        """Delete an image.
565
566        Args:
567            image_name: A string
568        """
569        logger.info("Deleting image %s", image_name)
570        api = self.service.images().delete(
571            project=self._project, image=image_name)
572        operation = self.Execute(api)
573        self.WaitOnOperation(
574            operation=operation, operation_scope=OperationScope.GLOBAL)
575        logger.info("Deleted image %s", image_name)
576
577    def DeleteImages(self, image_names):
578        """Delete multiple images.
579
580        Args:
581            image_names: A list of image names.
582
583        Returns:
584            A tuple, (deleted, failed, error_msgs)
585            deleted: A list of names of images that have been deleted.
586            failed: A list of names of images that we fail to delete.
587            error_msgs: A list of failure messages.
588        """
589        if not image_names:
590            return [], [], []
591        # Batch send deletion requests.
592        logger.info("Deleting images: %s", image_names)
593        delete_requests = {}
594        for image_name in set(image_names):
595            request = self.service.images().delete(
596                project=self._project, image=image_name)
597            delete_requests[image_name] = request
598        return self._BatchExecuteAndWait(delete_requests,
599                                         OperationScope.GLOBAL)
600
601    def ListImages(self, image_filter=None, image_project=None):
602        """List images.
603
604        Args:
605            image_filter: A string representing a filter in format of
606                          FIELD_NAME COMPARISON_STRING LITERAL_STRING
607                          e.g. "name ne example-image"
608                          e.g. "name eq "example-image-[0-9]+""
609            image_project: String. If not provided, will list images from the default
610                           project. Otherwise, will list images from the given
611                           project, which can be any arbitrary project where the
612                           account has read access
613                           (i.e. has the role "roles/compute.imageUser")
614
615        Read more about image sharing across project:
616        https://cloud.google.com/compute/docs/images/sharing-images-across-projects
617
618        Returns:
619            A list of images.
620        """
621        return self.ListWithMultiPages(
622            api_resource=self.service.images().list,
623            project=image_project or self._project,
624            filter=image_filter)
625
626    def GetInstance(self, instance, zone):
627        """Get information about an instance.
628
629        Args:
630            instance: A string, representing instance name.
631            zone: A string, representing zone name. e.g. "us-central1-f"
632
633        Returns:
634            An instance resource in json.
635            https://cloud.google.com/compute/docs/reference/latest/instances#resource
636        """
637        api = self.service.instances().get(
638            project=self._project, zone=zone, instance=instance)
639        return self.Execute(api)
640
641    def AttachAccelerator(self, instance, zone, accelerator_count,
642                          accelerator_type):
643        """Attach a GPU accelerator to the instance.
644
645        Note: In order for this to succeed the following must hold:
646        - The machine schedule must be set to "terminate" i.e:
647          SetScheduling(self, instance, zone, on_host_maintenance="terminate")
648          must have been called.
649        - The machine is not starting or running. i.e.
650          StopInstance(self, instance) must have been called.
651
652        Args:
653            instance: A string, representing instance name.
654            zone: String, name of zone.
655            accelerator_count: The number accelerators to be attached to the instance.
656             a value of 0 will detach all accelerators.
657            accelerator_type: The type of accelerator to attach. e.g.
658              "nvidia-tesla-k80"
659        """
660        body = {
661            "guestAccelerators": [{
662                "acceleratorType":
663                self.GetAcceleratorUrl(accelerator_type, zone),
664                "acceleratorCount":
665                accelerator_count
666            }]
667        }
668        api = self.service.instances().setMachineResources(
669            project=self._project, zone=zone, instance=instance, body=body)
670        operation = self.Execute(api)
671        try:
672            self.WaitOnOperation(
673                operation=operation,
674                operation_scope=OperationScope.ZONE,
675                scope_name=zone)
676        except errors.GceOperationTimeoutError:
677            logger.error("Attach instance failed: %s", instance)
678            raise
679        logger.info("%d x %s have been attached to instance %s.",
680                    accelerator_count, accelerator_type, instance)
681
682    def AttachDisk(self, instance, zone, **kwargs):
683        """Attach the external disk to the instance.
684
685        Args:
686            instance: A string, representing instance name.
687            zone: String, name of zone.
688            **kwargs: The attachDisk request body. See "https://cloud.google.com/
689              compute/docs/reference/latest/instances/attachDisk" for detail.
690              {
691                "kind": "compute#attachedDisk",
692                "type": string,
693                "mode": string,
694                "source": string,
695                "deviceName": string,
696                "index": integer,
697                "boot": boolean,
698                "initializeParams": {
699                  "diskName": string,
700                  "sourceImage": string,
701                  "diskSizeGb": long,
702                  "diskType": string,
703                  "sourceImageEncryptionKey": {
704                    "rawKey": string,
705                    "sha256": string
706                  }
707                },
708                "autoDelete": boolean,
709                "licenses": [
710                  string
711                ],
712                "interface": string,
713                "diskEncryptionKey": {
714                  "rawKey": string,
715                  "sha256": string
716                }
717              }
718
719        Returns:
720            An disk resource in json.
721            https://cloud.google.com/compute/docs/reference/latest/disks#resource
722
723
724        Raises:
725            errors.GceOperationTimeoutError: Operation takes too long to finish.
726        """
727        api = self.service.instances().attachDisk(
728            project=self._project, zone=zone, instance=instance, body=kwargs)
729        operation = self.Execute(api)
730        try:
731            self.WaitOnOperation(
732                operation=operation,
733                operation_scope=OperationScope.ZONE,
734                scope_name=zone)
735        except errors.GceOperationTimeoutError:
736            logger.error("Attach instance failed: %s", instance)
737            raise
738        logger.info("Disk has been attached to instance %s.", instance)
739
740    def DetachDisk(self, instance, zone, disk_name):
741        """Attach the external disk to the instance.
742
743        Args:
744            instance: A string, representing instance name.
745            zone: String, name of zone.
746            disk_name: A string, the name of the detach disk.
747
748        Returns:
749            A ZoneOperation resource.
750            See https://cloud.google.com/compute/docs/reference/latest/zoneOperations
751
752        Raises:
753            errors.GceOperationTimeoutError: Operation takes too long to finish.
754        """
755        api = self.service.instances().detachDisk(
756            project=self._project,
757            zone=zone,
758            instance=instance,
759            deviceName=disk_name)
760        operation = self.Execute(api)
761        try:
762            self.WaitOnOperation(
763                operation=operation,
764                operation_scope=OperationScope.ZONE,
765                scope_name=zone)
766        except errors.GceOperationTimeoutError:
767            logger.error("Detach instance failed: %s", instance)
768            raise
769        logger.info("Disk has been detached to instance %s.", instance)
770
771    def StartInstance(self, instance, zone):
772        """Start |instance| in |zone|.
773
774        Args:
775            instance: A string, representing instance name.
776            zone: A string, representing zone name. e.g. "us-central1-f"
777
778        Raises:
779            errors.GceOperationTimeoutError: Operation takes too long to finish.
780        """
781        api = self.service.instances().start(
782            project=self._project, zone=zone, instance=instance)
783        operation = self.Execute(api)
784        try:
785            self.WaitOnOperation(
786                operation=operation,
787                operation_scope=OperationScope.ZONE,
788                scope_name=zone)
789        except errors.GceOperationTimeoutError:
790            logger.error("Start instance failed: %s", instance)
791            raise
792        logger.info("Instance %s has been started.", instance)
793
794    def StartInstances(self, instances, zone):
795        """Start |instances| in |zone|.
796
797        Args:
798            instances: A list of strings, representing instance names's list.
799            zone: A string, representing zone name. e.g. "us-central1-f"
800
801        Returns:
802            A tuple, (done, failed, error_msgs)
803            done: A list of string, representing the names of instances that
804              have been executed.
805            failed: A list of string, representing the names of instances that
806              we failed to execute.
807            error_msgs: A list of string, representing the failure messages.
808        """
809        action = functools.partial(
810            self.service.instances().start, project=self._project, zone=zone)
811        return self._BatchExecuteOnInstances(instances, zone, action)
812
813    def StopInstance(self, instance, zone):
814        """Stop |instance| in |zone|.
815
816        Args:
817            instance: A string, representing instance name.
818            zone: A string, representing zone name. e.g. "us-central1-f"
819
820        Raises:
821            errors.GceOperationTimeoutError: Operation takes too long to finish.
822        """
823        api = self.service.instances().stop(
824            project=self._project, zone=zone, instance=instance)
825        operation = self.Execute(api)
826        try:
827            self.WaitOnOperation(
828                operation=operation,
829                operation_scope=OperationScope.ZONE,
830                scope_name=zone)
831        except errors.GceOperationTimeoutError:
832            logger.error("Stop instance failed: %s", instance)
833            raise
834        logger.info("Instance %s has been terminated.", instance)
835
836    def StopInstances(self, instances, zone):
837        """Stop |instances| in |zone|.
838
839        Args:
840            instances: A list of strings, representing instance names's list.
841            zone: A string, representing zone name. e.g. "us-central1-f"
842
843        Returns:
844            A tuple, (done, failed, error_msgs)
845            done: A list of string, representing the names of instances that
846                  have been executed.
847            failed: A list of string, representing the names of instances that
848                    we failed to execute.
849            error_msgs: A list of string, representing the failure messages.
850        """
851        action = functools.partial(
852            self.service.instances().stop, project=self._project, zone=zone)
853        return self._BatchExecuteOnInstances(instances, zone, action)
854
855    def SetScheduling(self,
856                      instance,
857                      zone,
858                      automatic_restart=True,
859                      on_host_maintenance="MIGRATE"):
860        """Update scheduling config |automatic_restart| and |on_host_maintenance|.
861
862        Args:
863            instance: A string, representing instance name.
864            zone: A string, representing zone name. e.g. "us-central1-f".
865            automatic_restart: Boolean, determine whether the instance will
866                               automatically restart if it crashes or not,
867                               default to True.
868            on_host_maintenance: enum["MIGRATE", "TERMINATE"]
869                                 The instance's maintenance behavior, which
870                                 determines whether the instance is live
871                                 "MIGRATE" or "TERMINATE" when there is
872                                 a maintenance event.
873
874        Raises:
875            errors.GceOperationTimeoutError: Operation takes too long to finish.
876        """
877        body = {
878            "automaticRestart": automatic_restart,
879            "onHostMaintenance": on_host_maintenance
880        }
881        api = self.service.instances().setScheduling(
882            project=self._project, zone=zone, instance=instance, body=body)
883        operation = self.Execute(api)
884        try:
885            self.WaitOnOperation(
886                operation=operation,
887                operation_scope=OperationScope.ZONE,
888                scope_name=zone)
889        except errors.GceOperationTimeoutError:
890            logger.error("Set instance scheduling failed: %s", instance)
891            raise
892        logger.info(
893            "Instance scheduling changed:\n"
894            "    automaticRestart: %s\n"
895            "    onHostMaintenance: %s\n",
896            str(automatic_restart).lower(), on_host_maintenance)
897
898    def ListInstances(self, instance_filter=None):
899        """List instances cross all zones.
900
901        Gcompute response instance. For example:
902        {
903            'items':
904            {
905                'zones/europe-west3-b':
906                {
907                    'warning':
908                    {
909                        'message': "There are no results for scope
910                        'zones/europe-west3-b' on this page.",
911                        'code': 'NO_RESULTS_ON_PAGE',
912                        'data': [{'value': u'zones/europe-west3-b',
913                                  'key': u'scope'}]
914                    }
915                },
916                'zones/asia-east1-b':
917                {
918                    'instances': [
919                    {
920                        'name': 'ins-bc212dc8-userbuild-aosp-cf-x86-64-phone'
921                        'status': 'RUNNING',
922                        'cpuPlatform': 'Intel Broadwell',
923                        'startRestricted': False,
924                        'labels': {u'created_by': u'herbertxue'},
925                        'name': 'ins-bc212dc8-userbuild-aosp-cf-x86-64-phone',
926                        ...
927                    }]
928                }
929            }
930        }
931
932        Args:
933            instance_filter: A string representing a filter in format of
934                             FIELD_NAME COMPARISON_STRING LITERAL_STRING
935                             e.g. "name ne example-instance"
936                             e.g. "name eq "example-instance-[0-9]+""
937
938        Returns:
939            A list of instances.
940        """
941        # aggregatedList will only return 500 results max, so if there are more,
942        # we need to send in the next page token to get the next 500 (and so on
943        # and so forth.
944        get_more_instances = True
945        page_token = None
946        instances_list = []
947        while get_more_instances:
948            api = self.service.instances().aggregatedList(
949                project=self._project,
950                filter=instance_filter,
951                pageToken=page_token)
952            response = self.Execute(api)
953            page_token = response.get("nextPageToken")
954            get_more_instances = page_token is not None
955            for instances_data in response["items"].values():
956                if "instances" in instances_data:
957                    for instance in instances_data.get("instances"):
958                        instances_list.append(instance)
959
960        return instances_list
961
962    def SetSchedulingInstances(self,
963                               instances,
964                               zone,
965                               automatic_restart=True,
966                               on_host_maintenance="MIGRATE"):
967        """Update scheduling config |automatic_restart| and |on_host_maintenance|.
968
969        See //cloud/cluster/api/mixer_instances.proto Scheduling for config option.
970
971        Args:
972            instances: A list of string, representing instance names.
973            zone: A string, representing zone name. e.g. "us-central1-f".
974            automatic_restart: Boolean, determine whether the instance will
975                               automatically restart if it crashes or not,
976                               default to True.
977            on_host_maintenance: enum["MIGRATE", "TERMINATE"]
978                                 The instance's maintenance behavior, which
979                                 determines whether the instance is live
980                                 migrated or terminated when there is
981                                 a maintenance event.
982
983        Returns:
984            A tuple, (done, failed, error_msgs)
985            done: A list of string, representing the names of instances that
986                  have been executed.
987            failed: A list of string, representing the names of instances that
988                    we failed to execute.
989            error_msgs: A list of string, representing the failure messages.
990        """
991        body = {
992            "automaticRestart": automatic_restart,
993            "OnHostMaintenance": on_host_maintenance
994        }
995        action = functools.partial(
996            self.service.instances().setScheduling,
997            project=self._project,
998            zone=zone,
999            body=body)
1000        return self._BatchExecuteOnInstances(instances, zone, action)
1001
1002    def _BatchExecuteOnInstances(self, instances, zone, action):
1003        """Batch processing operations requiring computing time.
1004
1005        Args:
1006            instances: A list of instance names.
1007            zone: A string, e.g. "us-central1-f".
1008            action: partial func, all kwargs for this gcloud action has been
1009                    defined in the caller function (e.g. See "StartInstances")
1010                    except 'instance' which will be defined by iterating the
1011                    |instances|.
1012
1013        Returns:
1014            A tuple, (done, failed, error_msgs)
1015            done: A list of string, representing the names of instances that
1016                  have been executed.
1017            failed: A list of string, representing the names of instances that
1018                    we failed to execute.
1019            error_msgs: A list of string, representing the failure messages.
1020        """
1021        if not instances:
1022            return [], [], []
1023        # Batch send requests.
1024        logger.info("Batch executing instances: %s", instances)
1025        requests = {}
1026        for instance_name in set(instances):
1027            requests[instance_name] = action(instance=instance_name)
1028        return self._BatchExecuteAndWait(
1029            requests, operation_scope=OperationScope.ZONE, scope_name=zone)
1030
1031    def _BatchExecuteAndWait(self, requests, operation_scope, scope_name=None):
1032        """Batch processing requests and wait on the operation.
1033
1034        Args:
1035            requests: A dictionary. The key is a string representing the resource
1036                      name. For example, an instance name, or an image name.
1037            operation_scope: A value from OperationScope, "zone", "region",
1038                             or "global".
1039            scope_name: If operation_scope is "zone" or "region", this should be
1040                        the name of the zone or region, e.g. "us-central1-f".
1041        Returns:
1042            A tuple, (done, failed, error_msgs)
1043            done: A list of string, representing the resource names that have
1044                  been executed.
1045            failed: A list of string, representing resource names that
1046                    we failed to execute.
1047            error_msgs: A list of string, representing the failure messages.
1048        """
1049        results = self.BatchExecute(requests)
1050        # Initialize return values
1051        failed = []
1052        error_msgs = []
1053        for resource_name, (_, error) in six.iteritems(results):
1054            if error is not None:
1055                failed.append(resource_name)
1056                error_msgs.append(str(error))
1057        done = []
1058        # Wait for the executing operations to finish.
1059        logger.info("Waiting for executing operations")
1060        for resource_name in six.iterkeys(requests):
1061            operation, _ = results[resource_name]
1062            if operation:
1063                try:
1064                    self.WaitOnOperation(operation, operation_scope,
1065                                         scope_name)
1066                    done.append(resource_name)
1067                except errors.DriverError as exc:
1068                    failed.append(resource_name)
1069                    error_msgs.append(str(exc))
1070        return done, failed, error_msgs
1071
1072    def ListZones(self):
1073        """List all zone instances in the project.
1074
1075        Returns:
1076            Gcompute response instance. For example:
1077            {
1078              "id": "projects/google.com%3Aandroid-build-staging/zones",
1079              "kind": "compute#zoneList",
1080              "selfLink": "https://www.googleapis.com/compute/v1/projects/"
1081                  "google.com:android-build-staging/zones"
1082              "items": [
1083                {
1084                  'creationTimestamp': '2014-07-15T10:44:08.663-07:00',
1085                  'description': 'asia-east1-c',
1086                  'id': '2222',
1087                  'kind': 'compute#zone',
1088                  'name': 'asia-east1-c',
1089                  'region': 'https://www.googleapis.com/compute/v1/projects/'
1090                      'google.com:android-build-staging/regions/asia-east1',
1091                  'selfLink': 'https://www.googleapis.com/compute/v1/projects/'
1092                      'google.com:android-build-staging/zones/asia-east1-c',
1093                  'status': 'UP'
1094                }, {
1095                  'creationTimestamp': '2014-05-30T18:35:16.575-07:00',
1096                  'description': 'asia-east1-b',
1097                  'id': '2221',
1098                  'kind': 'compute#zone',
1099                  'name': 'asia-east1-b',
1100                  'region': 'https://www.googleapis.com/compute/v1/projects/'
1101                      'google.com:android-build-staging/regions/asia-east1',
1102                  'selfLink': 'https://www.googleapis.com/compute/v1/projects'
1103                      '/google.com:android-build-staging/zones/asia-east1-b',
1104                  'status': 'UP'
1105                }]
1106            }
1107            See cloud cluster's api/mixer_zones.proto
1108        """
1109        api = self.service.zones().list(project=self._project)
1110        return self.Execute(api)
1111
1112    def ListRegions(self):
1113        """List all the regions for a project.
1114
1115        Returns:
1116            A dictionary containing all the zones and additional data. See this link
1117            for the detailed response:
1118            https://cloud.google.com/compute/docs/reference/latest/regions/list.
1119            Example:
1120            {
1121              'items': [{
1122                  'name':
1123                      'us-central1',
1124                  'quotas': [{
1125                      'usage': 2.0,
1126                      'limit': 24.0,
1127                      'metric': 'CPUS'
1128                  }, {
1129                      'usage': 1.0,
1130                      'limit': 23.0,
1131                      'metric': 'IN_USE_ADDRESSES'
1132                  }, {
1133                      'usage': 209.0,
1134                      'limit': 10240.0,
1135                      'metric': 'DISKS_TOTAL_GB'
1136                  }, {
1137                      'usage': 1000.0,
1138                      'limit': 20000.0,
1139                      'metric': 'INSTANCES'
1140                  }]
1141              },..]
1142            }
1143        """
1144        api = self.service.regions().list(project=self._project)
1145        return self.Execute(api)
1146
1147    def _GetNetworkArgs(self, network, zone):
1148        """Helper to generate network args that is used to create an instance.
1149
1150        Args:
1151            network: A string, e.g. "default".
1152            zone: String, representing zone name, e.g. "us-central1-f"
1153
1154        Returns:
1155            A dictionary representing network args.
1156        """
1157        network_args = {
1158            "network": self.GetNetworkUrl(network),
1159            "accessConfigs": [{
1160                "name": "External NAT",
1161                "type": "ONE_TO_ONE_NAT"
1162            }]
1163        }
1164        # default network can be blank or set to default, we don't need to
1165        # specify the subnetwork for that.
1166        if network and network != "default":
1167            network_args["subnetwork"] = self.GetSubnetworkUrl(network, zone)
1168        return network_args
1169
1170    def _GetDiskArgs(self,
1171                     disk_name,
1172                     image_name,
1173                     image_project=None,
1174                     disk_size_gb=None):
1175        """Helper to generate disk args that is used to create an instance.
1176
1177        Args:
1178            disk_name: A string
1179            image_name: A string
1180            image_project: A string
1181            disk_size_gb: An integer
1182
1183        Returns:
1184            List holding dict of disk args.
1185        """
1186        args = copy.deepcopy(BASE_DISK_ARGS)
1187        args["initializeParams"] = {
1188            "diskName": disk_name,
1189            "sourceImage": self.GetImage(image_name,
1190                                         image_project)["selfLink"],
1191        }
1192        # TODO: Remove this check once it's validated that we can either pass in
1193        # a None diskSizeGb or we find an appropriate default val.
1194        if disk_size_gb:
1195            args["diskSizeGb"] = disk_size_gb
1196        return [args]
1197
1198    def _GetExtraDiskArgs(self, extra_disk_name, zone):
1199        """Get extra disk arg for given disk.
1200
1201        Args:
1202            extra_disk_name: String, name of the disk.
1203            zone: String, representing zone name, e.g. "us-central1-f"
1204
1205        Returns:
1206            A dictionary of disk args.
1207        """
1208        return [{
1209            "type": "PERSISTENT",
1210            "mode": "READ_WRITE",
1211            "source": "projects/%s/zones/%s/disks/%s" % (self._project, zone,
1212                                                         extra_disk_name),
1213            "autoDelete": True,
1214            "boot": False,
1215            "interface": "SCSI",
1216            "deviceName": extra_disk_name,
1217        }]
1218
1219    # pylint: disable=too-many-locals
1220    def CreateInstance(self,
1221                       instance,
1222                       image_name,
1223                       machine_type,
1224                       metadata,
1225                       network,
1226                       zone,
1227                       disk_args=None,
1228                       image_project=None,
1229                       gpu=None,
1230                       extra_disk_name=None,
1231                       extra_scopes=None,
1232                       tags=None):
1233        """Create a gce instance with a gce image.
1234
1235        Args:
1236            instance: String, instance name.
1237            image_name: String, source image used to create this disk.
1238            machine_type: String, representing machine_type,
1239                          e.g. "n1-standard-1"
1240            metadata: Dict, maps a metadata name to its value.
1241            network: String, representing network name, e.g. "default"
1242            zone: String, representing zone name, e.g. "us-central1-f"
1243            disk_args: A list of extra disk args (strings), see _GetDiskArgs
1244                       for example, if None, will create a disk using the given
1245                       image.
1246            image_project: String, name of the project where the image
1247                           belongs. Assume the default project if None.
1248            gpu: String, type of gpu to attach. e.g. "nvidia-tesla-k80", if
1249                 None no gpus will be attached. For more details see:
1250                 https://cloud.google.com/compute/docs/gpus/add-gpus
1251            extra_disk_name: String,the name of the extra disk to attach.
1252            extra_scopes: A list of extra scopes to be provided to the instance.
1253            tags: A list of tags to associate with the instance. e.g.
1254                  ["http-server", "https-server"]
1255        """
1256        disk_args = (disk_args
1257                     or self._GetDiskArgs(instance, image_name, image_project))
1258        if extra_disk_name:
1259            disk_args.extend(self._GetExtraDiskArgs(extra_disk_name, zone))
1260
1261        scopes = []
1262        scopes.extend(self.DEFAULT_INSTANCE_SCOPE)
1263        if extra_scopes:
1264            scopes.extend(extra_scopes)
1265
1266        # Add labels for giving the instances ability to be filter for
1267        # acloud list/delete cmds.
1268        body = {
1269            "machineType": self.GetMachineType(machine_type, zone)["selfLink"],
1270            "name": instance,
1271            "networkInterfaces": [self._GetNetworkArgs(network, zone)],
1272            "disks": disk_args,
1273            "labels": {constants.LABEL_CREATE_BY: getpass.getuser()},
1274            "serviceAccounts": [{
1275                "email": "default",
1276                "scopes": scopes,
1277            }],
1278            "enableVtpm": True,
1279        }
1280
1281        if tags:
1282            body["tags"] = {"items": tags}
1283        if gpu:
1284            body["guestAccelerators"] = [{
1285                "acceleratorType": self.GetAcceleratorUrl(gpu, zone),
1286                "acceleratorCount": 1
1287            }]
1288            # Instances with GPUs cannot live migrate because they are assigned
1289            # to specific hardware devices.
1290            body["scheduling"] = {"onHostMaintenance": "terminate"}
1291        if metadata:
1292            metadata_list = [{
1293                _METADATA_KEY: key,
1294                _METADATA_KEY_VALUE: val
1295            } for key, val in six.iteritems(metadata)]
1296            body[_METADATA] = {_ITEMS: metadata_list}
1297        logger.info("Creating instance: project %s, zone %s, body:%s",
1298                    self._project, zone, body)
1299        api = self.service.instances().insert(
1300            project=self._project, zone=zone, body=body)
1301        operation = self.Execute(api)
1302        self.WaitOnOperation(
1303            operation, operation_scope=OperationScope.ZONE, scope_name=zone)
1304        logger.info("Instance %s has been created.", instance)
1305
1306    def DeleteInstance(self, instance, zone):
1307        """Delete a gce instance.
1308
1309        Args:
1310            instance: A string, instance name.
1311            zone: A string, e.g. "us-central1-f"
1312        """
1313        logger.info("Deleting instance: %s", instance)
1314        api = self.service.instances().delete(
1315            project=self._project, zone=zone, instance=instance)
1316        operation = self.Execute(api)
1317        self.WaitOnOperation(
1318            operation, operation_scope=OperationScope.ZONE, scope_name=zone)
1319        logger.info("Deleted instance: %s", instance)
1320
1321    def DeleteInstances(self, instances, zone):
1322        """Delete multiple instances.
1323
1324        Args:
1325            instances: A list of instance names.
1326            zone: A string, e.g. "us-central1-f".
1327
1328        Returns:
1329            A tuple, (deleted, failed, error_msgs)
1330            deleted: A list of names of instances that have been deleted.
1331            failed: A list of names of instances that we fail to delete.
1332            error_msgs: A list of failure messages.
1333        """
1334        action = functools.partial(
1335            self.service.instances().delete, project=self._project, zone=zone)
1336        return self._BatchExecuteOnInstances(instances, zone, action)
1337
1338    def ResetInstance(self, instance, zone):
1339        """Reset the gce instance.
1340
1341        Args:
1342            instance: A string, instance name.
1343            zone: A string, e.g. "us-central1-f".
1344        """
1345        logger.info("Resetting instance: %s", instance)
1346        api = self.service.instances().reset(
1347            project=self._project, zone=zone, instance=instance)
1348        operation = self.Execute(api)
1349        self.WaitOnOperation(
1350            operation, operation_scope=OperationScope.ZONE, scope_name=zone)
1351        logger.info("Instance has been reset: %s", instance)
1352
1353    def GetMachineType(self, machine_type, zone):
1354        """Get URL for a given machine typle.
1355
1356        Args:
1357            machine_type: A string, name of the machine type.
1358            zone: A string, e.g. "us-central1-f"
1359
1360        Returns:
1361            A machine type resource in json.
1362            https://cloud.google.com/compute/docs/reference/latest/
1363            machineTypes#resource
1364        """
1365        api = self.service.machineTypes().get(
1366            project=self._project, zone=zone, machineType=machine_type)
1367        return self.Execute(api)
1368
1369    def GetAcceleratorUrl(self, accelerator_type, zone):
1370        """Get URL for a given type of accelator.
1371
1372        Args:
1373            accelerator_type: A string, representing the accelerator, e.g
1374              "nvidia-tesla-k80"
1375            zone: A string representing a zone, e.g. "us-west1-b"
1376
1377        Returns:
1378            A URL that points to the accelerator resource, e.g.
1379            https://www.googleapis.com/compute/v1/projects/<project id>/zones/
1380            us-west1-b/acceleratorTypes/nvidia-tesla-k80
1381        """
1382        api = self.service.acceleratorTypes().get(
1383            project=self._project, zone=zone, acceleratorType=accelerator_type)
1384        result = self.Execute(api)
1385        return result["selfLink"]
1386
1387    def GetNetworkUrl(self, network):
1388        """Get URL for a given network.
1389
1390        Args:
1391            network: A string, representing network name, e.g "default"
1392
1393        Returns:
1394            A URL that points to the network resource, e.g.
1395            https://www.googleapis.com/compute/v1/projects/<project id>/
1396            global/networks/default
1397        """
1398        api = self.service.networks().get(
1399            project=self._project, network=network)
1400        result = self.Execute(api)
1401        return result["selfLink"]
1402
1403    def GetSubnetworkUrl(self, network, zone):
1404        """Get URL for a given network and zone.
1405
1406        Return the subnetwork for the network in the specified region that the
1407        specified zone resides in. If there is no subnetwork for the specified
1408        zone, raise an exception.
1409
1410        Args:
1411            network: A string, representing network name, e.g "default"
1412            zone: String, representing zone name, e.g. "us-central1-f"
1413
1414        Returns:
1415            A URL that points to the network resource, e.g.
1416            https://www.googleapis.com/compute/v1/projects/<project id>/
1417            global/networks/default
1418
1419        Raises:
1420            errors.NoSubnetwork: When no subnetwork exists for the zone
1421            specified.
1422        """
1423        api = self.service.networks().get(
1424            project=self._project, network=network)
1425        result = self.Execute(api)
1426        region = zone.rsplit("-", 1)[0]
1427        for subnetwork in result.get("subnetworks", []):
1428            if region in subnetwork:
1429                return subnetwork
1430        raise errors.NoSubnetwork("No subnetwork for network %s in region %s" %
1431                                  (network, region))
1432
1433    def CompareMachineSize(self, machine_type_1, machine_type_2, zone):
1434        """Compare the size of two machine types.
1435
1436        Args:
1437            machine_type_1: A string representing a machine type, e.g. n1-standard-1
1438            machine_type_2: A string representing a machine type, e.g. n1-standard-1
1439            zone: A string representing a zone, e.g. "us-central1-f"
1440
1441        Returns:
1442            -1 if any metric of machine size of the first type is smaller than
1443                the second type.
1444            0 if all metrics of machine size are equal.
1445            1 if at least one metric of machine size of the first type is
1446                greater than the second type and all metrics of first type are
1447                greater or equal to the second type.
1448
1449        Raises:
1450            errors.DriverError: For malformed response.
1451        """
1452        machine_info_1 = self.GetMachineType(machine_type_1, zone)
1453        machine_info_2 = self.GetMachineType(machine_type_2, zone)
1454        result = 0
1455        for metric in self.MACHINE_SIZE_METRICS:
1456            if metric not in machine_info_1 or metric not in machine_info_2:
1457                raise errors.DriverError(
1458                    "Malformed machine size record: Can't find '%s' in %s or %s"
1459                    % (metric, machine_info_1, machine_info_2))
1460            cmp_result = machine_info_1[metric] - machine_info_2[metric]
1461            if cmp_result < 0:
1462                return -1
1463            if cmp_result > 0:
1464                result = 1
1465        return result
1466
1467    def GetSerialPortOutput(self, instance, zone, port=1):
1468        """Get serial port output.
1469
1470        Args:
1471            instance: string, instance name.
1472            zone: string, zone name.
1473            port: int, which COM port to read from, 1-4, default to 1.
1474
1475        Returns:
1476            String, contents of the output.
1477
1478        Raises:
1479            errors.DriverError: For malformed response.
1480        """
1481        api = self.service.instances().getSerialPortOutput(
1482            project=self._project, zone=zone, instance=instance, port=port)
1483        result = self.Execute(api)
1484        if "contents" not in result:
1485            raise errors.DriverError(
1486                "Malformed response for GetSerialPortOutput: %s" % result)
1487        return result["contents"]
1488
1489    def GetInstanceNamesByIPs(self, ips):
1490        """Get Instance names by IPs.
1491
1492        This function will go through all instances, which
1493        could be slow if there are too many instances.  However, currently
1494        GCE doesn't support search for instance by IP.
1495
1496        Args:
1497            ips: A set of IPs.
1498
1499        Returns:
1500            A dictionary where key is IP and value is instance name or None
1501            if instance is not found for the given IP.
1502        """
1503        ip_name_map = dict.fromkeys(ips)
1504        for instance in self.ListInstances():
1505            try:
1506                ip = instance["networkInterfaces"][0]["accessConfigs"][0][
1507                    "natIP"]
1508                if ip in ips:
1509                    ip_name_map[ip] = instance["name"]
1510            except (IndexError, KeyError) as e:
1511                logger.error("Could not get instance names by ips: %s", str(e))
1512        return ip_name_map
1513
1514    def GetInstanceIP(self, instance, zone):
1515        """Get Instance IP given instance name.
1516
1517        Args:
1518            instance: String, representing instance name.
1519            zone: String, name of the zone.
1520
1521        Returns:
1522            ssh.IP object, that stores internal and external ip of the instance.
1523        """
1524        instance = self.GetInstance(instance, zone)
1525        internal_ip = instance["networkInterfaces"][0]["networkIP"]
1526        external_ip = instance["networkInterfaces"][0]["accessConfigs"][0]["natIP"]
1527        return IP(internal=internal_ip, external=external_ip)
1528
1529    @utils.TimeExecute(function_description="Updating instance metadata: ")
1530    def SetInstanceMetadata(self, zone, instance, body):
1531        """Set instance metadata.
1532
1533        Args:
1534            zone: String, name of zone.
1535            instance: String, representing instance name.
1536            body: Dict, Metadata body.
1537                  metdata is in the following format.
1538                  {
1539                    "kind": "compute#metadata",
1540                    "fingerprint": "a-23icsyx4E=",
1541                    "items": [
1542                      {
1543                        "key": "sshKeys",
1544                        "value": "key"
1545                      }, ...
1546                    ]
1547                  }
1548        """
1549        api = self.service.instances().setMetadata(
1550            project=self._project, zone=zone, instance=instance, body=body)
1551        operation = self.Execute(api)
1552        self.WaitOnOperation(
1553            operation, operation_scope=OperationScope.ZONE, scope_name=zone)
1554
1555    def AddSshRsaInstanceMetadata(self, user, ssh_rsa_path, instance):
1556        """Add the public rsa key to the instance's metadata.
1557
1558        Confirm that the instance has this public key in the instance's
1559        metadata, if not we will add this public key.
1560
1561        Args:
1562            user: String, name of the user which the key belongs to.
1563            ssh_rsa_path: String, The absolute path to public rsa key.
1564            instance: String, representing instance name.
1565        """
1566        ssh_rsa_path = os.path.expanduser(ssh_rsa_path)
1567        rsa = GetRsaKey(ssh_rsa_path)
1568        entry = "%s:%s" % (user, rsa)
1569        logger.debug("New RSA entry: %s", entry)
1570
1571        zone = self.GetZoneByInstance(instance)
1572        gce_instance = self.GetInstance(instance, zone)
1573        metadata = gce_instance.get(_METADATA)
1574        if RsaNotInMetadata(metadata, entry):
1575            self.UpdateRsaInMetadata(zone, instance, metadata, entry)
1576
1577    def GetZoneByInstance(self, instance):
1578        """Get the zone from instance name.
1579
1580        Gcompute response instance. For example:
1581        {
1582            'items':
1583            {
1584                'zones/europe-west3-b':
1585                {
1586                    'warning':
1587                    {
1588                        'message': "There are no results for scope
1589                        'zones/europe-west3-b' on this page.",
1590                        'code': 'NO_RESULTS_ON_PAGE',
1591                        'data': [{'value': u'zones/europe-west3-b',
1592                                  'key': u'scope'}]
1593                    }
1594                },
1595                'zones/asia-east1-b':
1596                {
1597                    'instances': [
1598                    {
1599                        'name': 'ins-bc212dc8-userbuild-aosp-cf-x86-64-phone'
1600                        'status': 'RUNNING',
1601                        'cpuPlatform': 'Intel Broadwell',
1602                        'startRestricted': False,
1603                        'labels': {u'created_by': u'herbertxue'},
1604                        'name': 'ins-bc212dc8-userbuild-aosp-cf-x86-64-phone',
1605                        ...
1606                    }]
1607                }
1608            }
1609        }
1610
1611        Args:
1612            instance: String, representing instance name.
1613
1614        Raises:
1615            errors.GetGceZoneError: Can't get zone from instance name.
1616
1617        Returns:
1618            String of zone name.
1619        """
1620        api = self.service.instances().aggregatedList(
1621            project=self._project,
1622            filter="name=%s" % instance)
1623        response = self.Execute(api)
1624        for zone, instance_data in response["items"].items():
1625            if "instances" in instance_data:
1626                zone_match = _ZONE_RE.match(zone)
1627                if zone_match:
1628                    return zone_match.group("zone")
1629        raise errors.GetGceZoneError("Can't get zone from the instance name %s"
1630                                     % instance)
1631
1632    def GetZonesByInstances(self, instances):
1633        """Get the zone from instance name.
1634
1635        Args:
1636            instances: List of strings, representing instance names.
1637
1638        Returns:
1639            A dictionary that contains the name of all instances in the zone.
1640            The key is the name of the zone, and the value is a list contains
1641            the name of the instances.
1642        """
1643        zone_instances = {}
1644        for instance in instances:
1645            zone = self.GetZoneByInstance(instance)
1646            if zone in zone_instances:
1647                zone_instances[zone].append(instance)
1648            else:
1649                zone_instances[zone] = [instance]
1650        return zone_instances
1651
1652    def CheckAccess(self):
1653        """Check if the user has read access to the cloud project.
1654
1655        Returns:
1656            True if the user has at least read access to the project.
1657            False otherwise.
1658
1659        Raises:
1660            errors.HttpError if other unexpected error happens when
1661            accessing the project.
1662        """
1663        api = self.service.zones().list(project=self._project)
1664        retry_http_codes = copy.copy(self.RETRY_HTTP_CODES)
1665        retry_http_codes.remove(self.ACCESS_DENIED_CODE)
1666        try:
1667            self.Execute(api, retry_http_codes=retry_http_codes)
1668        except errors.HttpError as e:
1669            if e.code == self.ACCESS_DENIED_CODE:
1670                return False
1671            raise
1672        return True
1673
1674    def UpdateRsaInMetadata(self, zone, instance, metadata, entry):
1675        """Update ssh public key to sshKeys's value in this metadata.
1676
1677        Args:
1678            zone: String, name of zone.
1679            instance: String, representing instance name.
1680            metadata: Dict, maps a metadata name to its value.
1681            entry: String, ssh public key.
1682        """
1683        ssh_key_item = GetSshKeyFromMetadata(metadata)
1684        if ssh_key_item:
1685            # The ssh key exists in the metadata so update the reference to it
1686            # in the metadata. There may not be an actual ssh key value so
1687            # that's why we filter for None to avoid an empty line in front.
1688            ssh_key_item[_METADATA_KEY_VALUE] = "\n".join(
1689                list(filter(None, [ssh_key_item[_METADATA_KEY_VALUE], entry])))
1690        else:
1691            # Since there is no ssh key item in the metadata, we need to add it in.
1692            ssh_key_item = {_METADATA_KEY: _SSH_KEYS_NAME,
1693                            _METADATA_KEY_VALUE: entry}
1694            metadata[_ITEMS].append(ssh_key_item)
1695        utils.PrintColorString(
1696            "Ssh public key doesn't exist in the instance(%s), adding it."
1697            % instance, utils.TextColors.WARNING)
1698        self.SetInstanceMetadata(zone, instance, metadata)
1699
1700
1701def RsaNotInMetadata(metadata, entry):
1702    """Check ssh public key exist in sshKeys's value.
1703
1704    Args:
1705        metadata: Dict, maps a metadata name to its value.
1706        entry: String, ssh public key.
1707
1708    Returns:
1709        Boolean. True if ssh public key doesn't exist in metadata.
1710    """
1711    for item in metadata.setdefault(_ITEMS, []):
1712        if item[_METADATA_KEY] == _SSH_KEYS_NAME:
1713            if entry in item[_METADATA_KEY_VALUE]:
1714                return False
1715    return True
1716
1717
1718def GetSshKeyFromMetadata(metadata):
1719    """Get ssh key item from metadata.
1720
1721    Args:
1722        metadata: Dict, maps a metadata name to its value.
1723
1724    Returns:
1725        Dict of ssk_key_item in metadata, None if can't find the ssh key item
1726        in metadata.
1727    """
1728    for item in metadata.setdefault(_ITEMS, []):
1729        if item.get(_METADATA_KEY, '') == _SSH_KEYS_NAME:
1730            return item
1731    return None
1732
1733
1734def GetRsaKey(ssh_rsa_path):
1735    """Get rsa key from rsa path.
1736
1737    Args:
1738        ssh_rsa_path: String, The absolute path to public rsa key.
1739
1740    Returns:
1741        String, rsa key.
1742
1743    Raises:
1744        errors.DriverError: RSA file does not exist.
1745    """
1746    ssh_rsa_path = os.path.expanduser(ssh_rsa_path)
1747    if not os.path.exists(ssh_rsa_path):
1748        raise errors.DriverError(
1749            "RSA file %s does not exist." % ssh_rsa_path)
1750
1751    with open(ssh_rsa_path) as f:
1752        rsa = f.read()
1753        # The space must be removed here for string processing,
1754        # if it is not string, it doesn't have a strip function.
1755        rsa = rsa.strip() if rsa else rsa
1756        utils.VerifyRsaPubKey(rsa)
1757    return rsa
1758