• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2015 Google LLC
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15"""Iterators for paging through paged API methods.
16
17These iterators simplify the process of paging through API responses
18where the request takes a page token and the response is a list of results with
19a token for the next page. See `list pagination`_ in the Google API Style Guide
20for more details.
21
22.. _list pagination:
23    https://cloud.google.com/apis/design/design_patterns#list_pagination
24
25API clients that have methods that follow the list pagination pattern can
26return an :class:`.Iterator`. You can use this iterator to get **all** of
27the results across all pages::
28
29    >>> results_iterator = client.list_resources()
30    >>> list(results_iterator)  # Convert to a list (consumes all values).
31
32Or you can walk your way through items and call off the search early if
33you find what you're looking for (resulting in possibly fewer requests)::
34
35    >>> for resource in results_iterator:
36    ...     print(resource.name)
37    ...     if not resource.is_valid:
38    ...         break
39
40At any point, you may check the number of items consumed by referencing the
41``num_results`` property of the iterator::
42
43    >>> for my_item in results_iterator:
44    ...     if results_iterator.num_results >= 10:
45    ...         break
46
47When iterating, not every new item will send a request to the server.
48To iterate based on each page of items (where a page corresponds to
49a request)::
50
51    >>> for page in results_iterator.pages:
52    ...     print('=' * 20)
53    ...     print('    Page number: {:d}'.format(iterator.page_number))
54    ...     print('  Items in page: {:d}'.format(page.num_items))
55    ...     print('     First item: {!r}'.format(next(page)))
56    ...     print('Items remaining: {:d}'.format(page.remaining))
57    ...     print('Next page token: {}'.format(iterator.next_page_token))
58    ====================
59        Page number: 1
60      Items in page: 1
61         First item: <MyItemClass at 0x7f1d3cccf690>
62    Items remaining: 0
63    Next page token: eav1OzQB0OM8rLdGXOEsyQWSG
64    ====================
65        Page number: 2
66      Items in page: 19
67         First item: <MyItemClass at 0x7f1d3cccffd0>
68    Items remaining: 18
69    Next page token: None
70
71Then, for each page you can get all the resources on that page by iterating
72through it or using :func:`list`::
73
74    >>> list(page)
75    [
76        <MyItemClass at 0x7fd64a098ad0>,
77        <MyItemClass at 0x7fd64a098ed0>,
78        <MyItemClass at 0x7fd64a098e90>,
79    ]
80"""
81
82import abc
83
84
85class Page(object):
86    """Single page of results in an iterator.
87
88    Args:
89        parent (google.api_core.page_iterator.Iterator): The iterator that owns
90            the current page.
91        items (Sequence[Any]): An iterable (that also defines __len__) of items
92            from a raw API response.
93        item_to_value (Callable[google.api_core.page_iterator.Iterator, Any]):
94            Callable to convert an item from the type in the raw API response
95            into the native object. Will be called with the iterator and a
96            single item.
97        raw_page Optional[google.protobuf.message.Message]:
98            The raw page response.
99    """
100
101    def __init__(self, parent, items, item_to_value, raw_page=None):
102        self._parent = parent
103        self._num_items = len(items)
104        self._remaining = self._num_items
105        self._item_iter = iter(items)
106        self._item_to_value = item_to_value
107        self._raw_page = raw_page
108
109    @property
110    def raw_page(self):
111        """google.protobuf.message.Message"""
112        return self._raw_page
113
114    @property
115    def num_items(self):
116        """int: Total items in the page."""
117        return self._num_items
118
119    @property
120    def remaining(self):
121        """int: Remaining items in the page."""
122        return self._remaining
123
124    def __iter__(self):
125        """The :class:`Page` is an iterator of items."""
126        return self
127
128    def __next__(self):
129        """Get the next value in the page."""
130        item = next(self._item_iter)
131        result = self._item_to_value(self._parent, item)
132        # Since we've successfully got the next value from the
133        # iterator, we update the number of remaining.
134        self._remaining -= 1
135        return result
136
137
138def _item_to_value_identity(iterator, item):
139    """An item to value transformer that returns the item un-changed."""
140    # pylint: disable=unused-argument
141    # We are conforming to the interface defined by Iterator.
142    return item
143
144
145class Iterator(object, metaclass=abc.ABCMeta):
146    """A generic class for iterating through API list responses.
147
148    Args:
149        client(google.cloud.client.Client): The API client.
150        item_to_value (Callable[google.api_core.page_iterator.Iterator, Any]):
151            Callable to convert an item from the type in the raw API response
152            into the native object. Will be called with the iterator and a
153            single item.
154        page_token (str): A token identifying a page in a result set to start
155            fetching results from.
156        max_results (int): The maximum number of results to fetch.
157    """
158
159    def __init__(
160        self,
161        client,
162        item_to_value=_item_to_value_identity,
163        page_token=None,
164        max_results=None,
165    ):
166        self._started = False
167        self.__active_iterator = None
168
169        self.client = client
170        """Optional[Any]: The client that created this iterator."""
171        self.item_to_value = item_to_value
172        """Callable[Iterator, Any]: Callable to convert an item from the type
173            in the raw API response into the native object. Will be called with
174            the iterator and a
175            single item.
176        """
177        self.max_results = max_results
178        """int: The maximum number of results to fetch"""
179
180        # The attributes below will change over the life of the iterator.
181        self.page_number = 0
182        """int: The current page of results."""
183        self.next_page_token = page_token
184        """str: The token for the next page of results. If this is set before
185            the iterator starts, it effectively offsets the iterator to a
186            specific starting point."""
187        self.num_results = 0
188        """int: The total number of results fetched so far."""
189
190    @property
191    def pages(self):
192        """Iterator of pages in the response.
193
194        returns:
195            types.GeneratorType[google.api_core.page_iterator.Page]: A
196                generator of page instances.
197
198        raises:
199            ValueError: If the iterator has already been started.
200        """
201        if self._started:
202            raise ValueError("Iterator has already started", self)
203        self._started = True
204        return self._page_iter(increment=True)
205
206    def _items_iter(self):
207        """Iterator for each item returned."""
208        for page in self._page_iter(increment=False):
209            for item in page:
210                self.num_results += 1
211                yield item
212
213    def __iter__(self):
214        """Iterator for each item returned.
215
216        Returns:
217            types.GeneratorType[Any]: A generator of items from the API.
218
219        Raises:
220            ValueError: If the iterator has already been started.
221        """
222        if self._started:
223            raise ValueError("Iterator has already started", self)
224        self._started = True
225        return self._items_iter()
226
227    def __next__(self):
228        if self.__active_iterator is None:
229            self.__active_iterator = iter(self)
230        return next(self.__active_iterator)
231
232    def _page_iter(self, increment):
233        """Generator of pages of API responses.
234
235        Args:
236            increment (bool): Flag indicating if the total number of results
237                should be incremented on each page. This is useful since a page
238                iterator will want to increment by results per page while an
239                items iterator will want to increment per item.
240
241        Yields:
242            Page: each page of items from the API.
243        """
244        page = self._next_page()
245        while page is not None:
246            self.page_number += 1
247            if increment:
248                self.num_results += page.num_items
249            yield page
250            page = self._next_page()
251
252    @abc.abstractmethod
253    def _next_page(self):
254        """Get the next page in the iterator.
255
256        This does nothing and is intended to be over-ridden by subclasses
257        to return the next :class:`Page`.
258
259        Raises:
260            NotImplementedError: Always, this method is abstract.
261        """
262        raise NotImplementedError
263
264
265def _do_nothing_page_start(iterator, page, response):
266    """Helper to provide custom behavior after a :class:`Page` is started.
267
268    This is a do-nothing stand-in as the default value.
269
270    Args:
271        iterator (Iterator): An iterator that holds some request info.
272        page (Page): The page that was just created.
273        response (Any): The API response for a page.
274    """
275    # pylint: disable=unused-argument
276    pass
277
278
279class HTTPIterator(Iterator):
280    """A generic class for iterating through HTTP/JSON API list responses.
281
282    To make an iterator work, you'll need to provide a way to convert a JSON
283    item returned from the API into the object of your choice (via
284    ``item_to_value``). You also may need to specify a custom ``items_key`` so
285    that a given response (containing a page of results) can be parsed into an
286    iterable page of the actual objects you want.
287
288    Args:
289        client (google.cloud.client.Client): The API client.
290        api_request (Callable): The function to use to make API requests.
291            Generally, this will be
292            :meth:`google.cloud._http.JSONConnection.api_request`.
293        path (str): The method path to query for the list of items.
294        item_to_value (Callable[google.api_core.page_iterator.Iterator, Any]):
295            Callable to convert an item from the type in the JSON response into
296            a native object. Will be called with the iterator and a single
297            item.
298        items_key (str): The key in the API response where the list of items
299            can be found.
300        page_token (str): A token identifying a page in a result set to start
301            fetching results from.
302        page_size (int): The maximum number of results to fetch per page
303        max_results (int): The maximum number of results to fetch
304        extra_params (dict): Extra query string parameters for the
305            API call.
306        page_start (Callable[
307            google.api_core.page_iterator.Iterator,
308            google.api_core.page_iterator.Page, dict]): Callable to provide
309            any special behavior after a new page has been created. Assumed
310            signature takes the :class:`.Iterator` that started the page,
311            the :class:`.Page` that was started and the dictionary containing
312            the page response.
313        next_token (str): The name of the field used in the response for page
314            tokens.
315
316    .. autoattribute:: pages
317    """
318
319    _DEFAULT_ITEMS_KEY = "items"
320    _PAGE_TOKEN = "pageToken"
321    _MAX_RESULTS = "maxResults"
322    _NEXT_TOKEN = "nextPageToken"
323    _RESERVED_PARAMS = frozenset([_PAGE_TOKEN])
324    _HTTP_METHOD = "GET"
325
326    def __init__(
327        self,
328        client,
329        api_request,
330        path,
331        item_to_value,
332        items_key=_DEFAULT_ITEMS_KEY,
333        page_token=None,
334        page_size=None,
335        max_results=None,
336        extra_params=None,
337        page_start=_do_nothing_page_start,
338        next_token=_NEXT_TOKEN,
339    ):
340        super(HTTPIterator, self).__init__(
341            client, item_to_value, page_token=page_token, max_results=max_results
342        )
343        self.api_request = api_request
344        self.path = path
345        self._items_key = items_key
346        self.extra_params = extra_params
347        self._page_size = page_size
348        self._page_start = page_start
349        self._next_token = next_token
350        # Verify inputs / provide defaults.
351        if self.extra_params is None:
352            self.extra_params = {}
353        self._verify_params()
354
355    def _verify_params(self):
356        """Verifies the parameters don't use any reserved parameter.
357
358        Raises:
359            ValueError: If a reserved parameter is used.
360        """
361        reserved_in_use = self._RESERVED_PARAMS.intersection(self.extra_params)
362        if reserved_in_use:
363            raise ValueError("Using a reserved parameter", reserved_in_use)
364
365    def _next_page(self):
366        """Get the next page in the iterator.
367
368        Returns:
369            Optional[Page]: The next page in the iterator or :data:`None` if
370                there are no pages left.
371        """
372        if self._has_next_page():
373            response = self._get_next_page_response()
374            items = response.get(self._items_key, ())
375            page = Page(self, items, self.item_to_value, raw_page=response)
376            self._page_start(self, page, response)
377            self.next_page_token = response.get(self._next_token)
378            return page
379        else:
380            return None
381
382    def _has_next_page(self):
383        """Determines whether or not there are more pages with results.
384
385        Returns:
386            bool: Whether the iterator has more pages.
387        """
388        if self.page_number == 0:
389            return True
390
391        if self.max_results is not None:
392            if self.num_results >= self.max_results:
393                return False
394
395        return self.next_page_token is not None
396
397    def _get_query_params(self):
398        """Getter for query parameters for the next request.
399
400        Returns:
401            dict: A dictionary of query parameters.
402        """
403        result = {}
404        if self.next_page_token is not None:
405            result[self._PAGE_TOKEN] = self.next_page_token
406
407        page_size = None
408        if self.max_results is not None:
409            page_size = self.max_results - self.num_results
410            if self._page_size is not None:
411                page_size = min(page_size, self._page_size)
412        elif self._page_size is not None:
413            page_size = self._page_size
414
415        if page_size is not None:
416            result[self._MAX_RESULTS] = page_size
417
418        result.update(self.extra_params)
419        return result
420
421    def _get_next_page_response(self):
422        """Requests the next page from the path provided.
423
424        Returns:
425            dict: The parsed JSON response of the next page's contents.
426
427        Raises:
428            ValueError: If the HTTP method is not ``GET`` or ``POST``.
429        """
430        params = self._get_query_params()
431        if self._HTTP_METHOD == "GET":
432            return self.api_request(
433                method=self._HTTP_METHOD, path=self.path, query_params=params
434            )
435        elif self._HTTP_METHOD == "POST":
436            return self.api_request(
437                method=self._HTTP_METHOD, path=self.path, data=params
438            )
439        else:
440            raise ValueError("Unexpected HTTP method", self._HTTP_METHOD)
441
442
443class _GAXIterator(Iterator):
444    """A generic class for iterating through Cloud gRPC APIs list responses.
445
446    Any:
447        client (google.cloud.client.Client): The API client.
448        page_iter (google.gax.PageIterator): A GAX page iterator to be wrapped
449            to conform to the :class:`Iterator` interface.
450        item_to_value (Callable[Iterator, Any]): Callable to convert an item
451            from the the protobuf response into a native object. Will
452            be called with the iterator and a single item.
453        max_results (int): The maximum number of results to fetch.
454
455    .. autoattribute:: pages
456    """
457
458    def __init__(self, client, page_iter, item_to_value, max_results=None):
459        super(_GAXIterator, self).__init__(
460            client,
461            item_to_value,
462            page_token=page_iter.page_token,
463            max_results=max_results,
464        )
465        self._gax_page_iter = page_iter
466
467    def _next_page(self):
468        """Get the next page in the iterator.
469
470        Wraps the response from the :class:`~google.gax.PageIterator` in a
471        :class:`Page` instance and captures some state at each page.
472
473        Returns:
474            Optional[Page]: The next page in the iterator or :data:`None` if
475                  there are no pages left.
476        """
477        try:
478            items = next(self._gax_page_iter)
479            page = Page(self, items, self.item_to_value)
480            self.next_page_token = self._gax_page_iter.page_token or None
481            return page
482        except StopIteration:
483            return None
484
485
486class GRPCIterator(Iterator):
487    """A generic class for iterating through gRPC list responses.
488
489    .. note:: The class does not take a ``page_token`` argument because it can
490        just be specified in the ``request``.
491
492    Args:
493        client (google.cloud.client.Client): The API client. This unused by
494            this class, but kept to satisfy the :class:`Iterator` interface.
495        method (Callable[protobuf.Message]): A bound gRPC method that should
496            take a single message for the request.
497        request (protobuf.Message): The request message.
498        items_field (str): The field in the response message that has the
499            items for the page.
500        item_to_value (Callable[GRPCIterator, Any]): Callable to convert an
501            item from the type in the JSON response into a native object. Will
502            be called with the iterator and a single item.
503        request_token_field (str): The field in the request message used to
504            specify the page token.
505        response_token_field (str): The field in the response message that has
506            the token for the next page.
507        max_results (int): The maximum number of results to fetch.
508
509    .. autoattribute:: pages
510    """
511
512    _DEFAULT_REQUEST_TOKEN_FIELD = "page_token"
513    _DEFAULT_RESPONSE_TOKEN_FIELD = "next_page_token"
514
515    def __init__(
516        self,
517        client,
518        method,
519        request,
520        items_field,
521        item_to_value=_item_to_value_identity,
522        request_token_field=_DEFAULT_REQUEST_TOKEN_FIELD,
523        response_token_field=_DEFAULT_RESPONSE_TOKEN_FIELD,
524        max_results=None,
525    ):
526        super(GRPCIterator, self).__init__(
527            client, item_to_value, max_results=max_results
528        )
529        self._method = method
530        self._request = request
531        self._items_field = items_field
532        self._request_token_field = request_token_field
533        self._response_token_field = response_token_field
534
535    def _next_page(self):
536        """Get the next page in the iterator.
537
538        Returns:
539            Page: The next page in the iterator or :data:`None` if
540                there are no pages left.
541        """
542        if not self._has_next_page():
543            return None
544
545        if self.next_page_token is not None:
546            setattr(self._request, self._request_token_field, self.next_page_token)
547
548        response = self._method(self._request)
549
550        self.next_page_token = getattr(response, self._response_token_field)
551        items = getattr(response, self._items_field)
552        page = Page(self, items, self.item_to_value, raw_page=response)
553
554        return page
555
556    def _has_next_page(self):
557        """Determines whether or not there are more pages with results.
558
559        Returns:
560            bool: Whether the iterator has more pages.
561        """
562        if self.page_number == 0:
563            return True
564
565        if self.max_results is not None:
566            if self.num_results >= self.max_results:
567                return False
568
569        # Note: intentionally a falsy check instead of a None check. The RPC
570        # can return an empty string indicating no more pages.
571        return True if self.next_page_token else False
572