1# Copyright 2015 Google LLC 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14 15"""Iterators for paging through paged API methods. 16 17These iterators simplify the process of paging through API responses 18where the request takes a page token and the response is a list of results with 19a token for the next page. See `list pagination`_ in the Google API Style Guide 20for more details. 21 22.. _list pagination: 23 https://cloud.google.com/apis/design/design_patterns#list_pagination 24 25API clients that have methods that follow the list pagination pattern can 26return an :class:`.Iterator`. You can use this iterator to get **all** of 27the results across all pages:: 28 29 >>> results_iterator = client.list_resources() 30 >>> list(results_iterator) # Convert to a list (consumes all values). 31 32Or you can walk your way through items and call off the search early if 33you find what you're looking for (resulting in possibly fewer requests):: 34 35 >>> for resource in results_iterator: 36 ... print(resource.name) 37 ... if not resource.is_valid: 38 ... break 39 40At any point, you may check the number of items consumed by referencing the 41``num_results`` property of the iterator:: 42 43 >>> for my_item in results_iterator: 44 ... if results_iterator.num_results >= 10: 45 ... break 46 47When iterating, not every new item will send a request to the server. 48To iterate based on each page of items (where a page corresponds to 49a request):: 50 51 >>> for page in results_iterator.pages: 52 ... print('=' * 20) 53 ... print(' Page number: {:d}'.format(iterator.page_number)) 54 ... print(' Items in page: {:d}'.format(page.num_items)) 55 ... print(' First item: {!r}'.format(next(page))) 56 ... print('Items remaining: {:d}'.format(page.remaining)) 57 ... print('Next page token: {}'.format(iterator.next_page_token)) 58 ==================== 59 Page number: 1 60 Items in page: 1 61 First item: <MyItemClass at 0x7f1d3cccf690> 62 Items remaining: 0 63 Next page token: eav1OzQB0OM8rLdGXOEsyQWSG 64 ==================== 65 Page number: 2 66 Items in page: 19 67 First item: <MyItemClass at 0x7f1d3cccffd0> 68 Items remaining: 18 69 Next page token: None 70 71Then, for each page you can get all the resources on that page by iterating 72through it or using :func:`list`:: 73 74 >>> list(page) 75 [ 76 <MyItemClass at 0x7fd64a098ad0>, 77 <MyItemClass at 0x7fd64a098ed0>, 78 <MyItemClass at 0x7fd64a098e90>, 79 ] 80""" 81 82import abc 83 84 85class Page(object): 86 """Single page of results in an iterator. 87 88 Args: 89 parent (google.api_core.page_iterator.Iterator): The iterator that owns 90 the current page. 91 items (Sequence[Any]): An iterable (that also defines __len__) of items 92 from a raw API response. 93 item_to_value (Callable[google.api_core.page_iterator.Iterator, Any]): 94 Callable to convert an item from the type in the raw API response 95 into the native object. Will be called with the iterator and a 96 single item. 97 raw_page Optional[google.protobuf.message.Message]: 98 The raw page response. 99 """ 100 101 def __init__(self, parent, items, item_to_value, raw_page=None): 102 self._parent = parent 103 self._num_items = len(items) 104 self._remaining = self._num_items 105 self._item_iter = iter(items) 106 self._item_to_value = item_to_value 107 self._raw_page = raw_page 108 109 @property 110 def raw_page(self): 111 """google.protobuf.message.Message""" 112 return self._raw_page 113 114 @property 115 def num_items(self): 116 """int: Total items in the page.""" 117 return self._num_items 118 119 @property 120 def remaining(self): 121 """int: Remaining items in the page.""" 122 return self._remaining 123 124 def __iter__(self): 125 """The :class:`Page` is an iterator of items.""" 126 return self 127 128 def __next__(self): 129 """Get the next value in the page.""" 130 item = next(self._item_iter) 131 result = self._item_to_value(self._parent, item) 132 # Since we've successfully got the next value from the 133 # iterator, we update the number of remaining. 134 self._remaining -= 1 135 return result 136 137 138def _item_to_value_identity(iterator, item): 139 """An item to value transformer that returns the item un-changed.""" 140 # pylint: disable=unused-argument 141 # We are conforming to the interface defined by Iterator. 142 return item 143 144 145class Iterator(object, metaclass=abc.ABCMeta): 146 """A generic class for iterating through API list responses. 147 148 Args: 149 client(google.cloud.client.Client): The API client. 150 item_to_value (Callable[google.api_core.page_iterator.Iterator, Any]): 151 Callable to convert an item from the type in the raw API response 152 into the native object. Will be called with the iterator and a 153 single item. 154 page_token (str): A token identifying a page in a result set to start 155 fetching results from. 156 max_results (int): The maximum number of results to fetch. 157 """ 158 159 def __init__( 160 self, 161 client, 162 item_to_value=_item_to_value_identity, 163 page_token=None, 164 max_results=None, 165 ): 166 self._started = False 167 self.__active_iterator = None 168 169 self.client = client 170 """Optional[Any]: The client that created this iterator.""" 171 self.item_to_value = item_to_value 172 """Callable[Iterator, Any]: Callable to convert an item from the type 173 in the raw API response into the native object. Will be called with 174 the iterator and a 175 single item. 176 """ 177 self.max_results = max_results 178 """int: The maximum number of results to fetch""" 179 180 # The attributes below will change over the life of the iterator. 181 self.page_number = 0 182 """int: The current page of results.""" 183 self.next_page_token = page_token 184 """str: The token for the next page of results. If this is set before 185 the iterator starts, it effectively offsets the iterator to a 186 specific starting point.""" 187 self.num_results = 0 188 """int: The total number of results fetched so far.""" 189 190 @property 191 def pages(self): 192 """Iterator of pages in the response. 193 194 returns: 195 types.GeneratorType[google.api_core.page_iterator.Page]: A 196 generator of page instances. 197 198 raises: 199 ValueError: If the iterator has already been started. 200 """ 201 if self._started: 202 raise ValueError("Iterator has already started", self) 203 self._started = True 204 return self._page_iter(increment=True) 205 206 def _items_iter(self): 207 """Iterator for each item returned.""" 208 for page in self._page_iter(increment=False): 209 for item in page: 210 self.num_results += 1 211 yield item 212 213 def __iter__(self): 214 """Iterator for each item returned. 215 216 Returns: 217 types.GeneratorType[Any]: A generator of items from the API. 218 219 Raises: 220 ValueError: If the iterator has already been started. 221 """ 222 if self._started: 223 raise ValueError("Iterator has already started", self) 224 self._started = True 225 return self._items_iter() 226 227 def __next__(self): 228 if self.__active_iterator is None: 229 self.__active_iterator = iter(self) 230 return next(self.__active_iterator) 231 232 def _page_iter(self, increment): 233 """Generator of pages of API responses. 234 235 Args: 236 increment (bool): Flag indicating if the total number of results 237 should be incremented on each page. This is useful since a page 238 iterator will want to increment by results per page while an 239 items iterator will want to increment per item. 240 241 Yields: 242 Page: each page of items from the API. 243 """ 244 page = self._next_page() 245 while page is not None: 246 self.page_number += 1 247 if increment: 248 self.num_results += page.num_items 249 yield page 250 page = self._next_page() 251 252 @abc.abstractmethod 253 def _next_page(self): 254 """Get the next page in the iterator. 255 256 This does nothing and is intended to be over-ridden by subclasses 257 to return the next :class:`Page`. 258 259 Raises: 260 NotImplementedError: Always, this method is abstract. 261 """ 262 raise NotImplementedError 263 264 265def _do_nothing_page_start(iterator, page, response): 266 """Helper to provide custom behavior after a :class:`Page` is started. 267 268 This is a do-nothing stand-in as the default value. 269 270 Args: 271 iterator (Iterator): An iterator that holds some request info. 272 page (Page): The page that was just created. 273 response (Any): The API response for a page. 274 """ 275 # pylint: disable=unused-argument 276 pass 277 278 279class HTTPIterator(Iterator): 280 """A generic class for iterating through HTTP/JSON API list responses. 281 282 To make an iterator work, you'll need to provide a way to convert a JSON 283 item returned from the API into the object of your choice (via 284 ``item_to_value``). You also may need to specify a custom ``items_key`` so 285 that a given response (containing a page of results) can be parsed into an 286 iterable page of the actual objects you want. 287 288 Args: 289 client (google.cloud.client.Client): The API client. 290 api_request (Callable): The function to use to make API requests. 291 Generally, this will be 292 :meth:`google.cloud._http.JSONConnection.api_request`. 293 path (str): The method path to query for the list of items. 294 item_to_value (Callable[google.api_core.page_iterator.Iterator, Any]): 295 Callable to convert an item from the type in the JSON response into 296 a native object. Will be called with the iterator and a single 297 item. 298 items_key (str): The key in the API response where the list of items 299 can be found. 300 page_token (str): A token identifying a page in a result set to start 301 fetching results from. 302 page_size (int): The maximum number of results to fetch per page 303 max_results (int): The maximum number of results to fetch 304 extra_params (dict): Extra query string parameters for the 305 API call. 306 page_start (Callable[ 307 google.api_core.page_iterator.Iterator, 308 google.api_core.page_iterator.Page, dict]): Callable to provide 309 any special behavior after a new page has been created. Assumed 310 signature takes the :class:`.Iterator` that started the page, 311 the :class:`.Page` that was started and the dictionary containing 312 the page response. 313 next_token (str): The name of the field used in the response for page 314 tokens. 315 316 .. autoattribute:: pages 317 """ 318 319 _DEFAULT_ITEMS_KEY = "items" 320 _PAGE_TOKEN = "pageToken" 321 _MAX_RESULTS = "maxResults" 322 _NEXT_TOKEN = "nextPageToken" 323 _RESERVED_PARAMS = frozenset([_PAGE_TOKEN]) 324 _HTTP_METHOD = "GET" 325 326 def __init__( 327 self, 328 client, 329 api_request, 330 path, 331 item_to_value, 332 items_key=_DEFAULT_ITEMS_KEY, 333 page_token=None, 334 page_size=None, 335 max_results=None, 336 extra_params=None, 337 page_start=_do_nothing_page_start, 338 next_token=_NEXT_TOKEN, 339 ): 340 super(HTTPIterator, self).__init__( 341 client, item_to_value, page_token=page_token, max_results=max_results 342 ) 343 self.api_request = api_request 344 self.path = path 345 self._items_key = items_key 346 self.extra_params = extra_params 347 self._page_size = page_size 348 self._page_start = page_start 349 self._next_token = next_token 350 # Verify inputs / provide defaults. 351 if self.extra_params is None: 352 self.extra_params = {} 353 self._verify_params() 354 355 def _verify_params(self): 356 """Verifies the parameters don't use any reserved parameter. 357 358 Raises: 359 ValueError: If a reserved parameter is used. 360 """ 361 reserved_in_use = self._RESERVED_PARAMS.intersection(self.extra_params) 362 if reserved_in_use: 363 raise ValueError("Using a reserved parameter", reserved_in_use) 364 365 def _next_page(self): 366 """Get the next page in the iterator. 367 368 Returns: 369 Optional[Page]: The next page in the iterator or :data:`None` if 370 there are no pages left. 371 """ 372 if self._has_next_page(): 373 response = self._get_next_page_response() 374 items = response.get(self._items_key, ()) 375 page = Page(self, items, self.item_to_value, raw_page=response) 376 self._page_start(self, page, response) 377 self.next_page_token = response.get(self._next_token) 378 return page 379 else: 380 return None 381 382 def _has_next_page(self): 383 """Determines whether or not there are more pages with results. 384 385 Returns: 386 bool: Whether the iterator has more pages. 387 """ 388 if self.page_number == 0: 389 return True 390 391 if self.max_results is not None: 392 if self.num_results >= self.max_results: 393 return False 394 395 return self.next_page_token is not None 396 397 def _get_query_params(self): 398 """Getter for query parameters for the next request. 399 400 Returns: 401 dict: A dictionary of query parameters. 402 """ 403 result = {} 404 if self.next_page_token is not None: 405 result[self._PAGE_TOKEN] = self.next_page_token 406 407 page_size = None 408 if self.max_results is not None: 409 page_size = self.max_results - self.num_results 410 if self._page_size is not None: 411 page_size = min(page_size, self._page_size) 412 elif self._page_size is not None: 413 page_size = self._page_size 414 415 if page_size is not None: 416 result[self._MAX_RESULTS] = page_size 417 418 result.update(self.extra_params) 419 return result 420 421 def _get_next_page_response(self): 422 """Requests the next page from the path provided. 423 424 Returns: 425 dict: The parsed JSON response of the next page's contents. 426 427 Raises: 428 ValueError: If the HTTP method is not ``GET`` or ``POST``. 429 """ 430 params = self._get_query_params() 431 if self._HTTP_METHOD == "GET": 432 return self.api_request( 433 method=self._HTTP_METHOD, path=self.path, query_params=params 434 ) 435 elif self._HTTP_METHOD == "POST": 436 return self.api_request( 437 method=self._HTTP_METHOD, path=self.path, data=params 438 ) 439 else: 440 raise ValueError("Unexpected HTTP method", self._HTTP_METHOD) 441 442 443class _GAXIterator(Iterator): 444 """A generic class for iterating through Cloud gRPC APIs list responses. 445 446 Any: 447 client (google.cloud.client.Client): The API client. 448 page_iter (google.gax.PageIterator): A GAX page iterator to be wrapped 449 to conform to the :class:`Iterator` interface. 450 item_to_value (Callable[Iterator, Any]): Callable to convert an item 451 from the the protobuf response into a native object. Will 452 be called with the iterator and a single item. 453 max_results (int): The maximum number of results to fetch. 454 455 .. autoattribute:: pages 456 """ 457 458 def __init__(self, client, page_iter, item_to_value, max_results=None): 459 super(_GAXIterator, self).__init__( 460 client, 461 item_to_value, 462 page_token=page_iter.page_token, 463 max_results=max_results, 464 ) 465 self._gax_page_iter = page_iter 466 467 def _next_page(self): 468 """Get the next page in the iterator. 469 470 Wraps the response from the :class:`~google.gax.PageIterator` in a 471 :class:`Page` instance and captures some state at each page. 472 473 Returns: 474 Optional[Page]: The next page in the iterator or :data:`None` if 475 there are no pages left. 476 """ 477 try: 478 items = next(self._gax_page_iter) 479 page = Page(self, items, self.item_to_value) 480 self.next_page_token = self._gax_page_iter.page_token or None 481 return page 482 except StopIteration: 483 return None 484 485 486class GRPCIterator(Iterator): 487 """A generic class for iterating through gRPC list responses. 488 489 .. note:: The class does not take a ``page_token`` argument because it can 490 just be specified in the ``request``. 491 492 Args: 493 client (google.cloud.client.Client): The API client. This unused by 494 this class, but kept to satisfy the :class:`Iterator` interface. 495 method (Callable[protobuf.Message]): A bound gRPC method that should 496 take a single message for the request. 497 request (protobuf.Message): The request message. 498 items_field (str): The field in the response message that has the 499 items for the page. 500 item_to_value (Callable[GRPCIterator, Any]): Callable to convert an 501 item from the type in the JSON response into a native object. Will 502 be called with the iterator and a single item. 503 request_token_field (str): The field in the request message used to 504 specify the page token. 505 response_token_field (str): The field in the response message that has 506 the token for the next page. 507 max_results (int): The maximum number of results to fetch. 508 509 .. autoattribute:: pages 510 """ 511 512 _DEFAULT_REQUEST_TOKEN_FIELD = "page_token" 513 _DEFAULT_RESPONSE_TOKEN_FIELD = "next_page_token" 514 515 def __init__( 516 self, 517 client, 518 method, 519 request, 520 items_field, 521 item_to_value=_item_to_value_identity, 522 request_token_field=_DEFAULT_REQUEST_TOKEN_FIELD, 523 response_token_field=_DEFAULT_RESPONSE_TOKEN_FIELD, 524 max_results=None, 525 ): 526 super(GRPCIterator, self).__init__( 527 client, item_to_value, max_results=max_results 528 ) 529 self._method = method 530 self._request = request 531 self._items_field = items_field 532 self._request_token_field = request_token_field 533 self._response_token_field = response_token_field 534 535 def _next_page(self): 536 """Get the next page in the iterator. 537 538 Returns: 539 Page: The next page in the iterator or :data:`None` if 540 there are no pages left. 541 """ 542 if not self._has_next_page(): 543 return None 544 545 if self.next_page_token is not None: 546 setattr(self._request, self._request_token_field, self.next_page_token) 547 548 response = self._method(self._request) 549 550 self.next_page_token = getattr(response, self._response_token_field) 551 items = getattr(response, self._items_field) 552 page = Page(self, items, self.item_to_value, raw_page=response) 553 554 return page 555 556 def _has_next_page(self): 557 """Determines whether or not there are more pages with results. 558 559 Returns: 560 bool: Whether the iterator has more pages. 561 """ 562 if self.page_number == 0: 563 return True 564 565 if self.max_results is not None: 566 if self.num_results >= self.max_results: 567 return False 568 569 # Note: intentionally a falsy check instead of a None check. The RPC 570 # can return an empty string indicating no more pages. 571 return True if self.next_page_token else False 572