1# Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""Code for creating a dataset out of a NumPy array.""" 16 17from __future__ import absolute_import 18from __future__ import division 19from __future__ import print_function 20 21import numpy as np 22 23from tensorflow.python.data.ops import dataset_ops 24from tensorflow.python.eager import context 25from tensorflow.python.framework import dtypes 26from tensorflow.python.framework import ops 27from tensorflow.python.ops import array_ops 28from tensorflow.python.ops import variable_scope 29from tensorflow.python.util import nest 30 31 32def init_var_from_numpy(input_var, numpy_input, session): 33 """Initialize `input_var` to `numpy_input` using `session` in graph mode.""" 34 with ops.init_scope(): 35 if context.executing_eagerly(): 36 input_var.assign(numpy_input) 37 return 38 39 assert session is not None 40 session.run(input_var.initializer) 41 42 start_placeholder = array_ops.placeholder(dtypes.int64, ()) 43 end_placeholder = array_ops.placeholder(dtypes.int64, ()) 44 slice_placeholder = array_ops.placeholder(input_var.dtype) 45 assign_slice_op = input_var[start_placeholder:end_placeholder].assign( 46 slice_placeholder) 47 48 # If each batch element is > 64 MB, then we copy each batch element 49 # individually. Otherwise, the slices will be < 128 MB. There might be 50 # padding which might mean that the slices are 128 MB even if the size of 51 # the tensor allocated is less than 128 MB. This formula gives slices with 52 # size: ceil(64 MB / byte size per batch element) bytes. Using ceil() 53 # guarantees we get a number >= 1. 54 55 # Calculate the size of each batch element. 56 byte_size_per_batch_element = ( 57 np.prod(numpy_input.shape[1:]) * input_var.dtype.size) 58 59 # Calculate number of elements we want to copy per slice. 60 batch_size_per_slice = int( 61 np.ceil((64 << 20) / byte_size_per_batch_element)) 62 63 # Copy slices of the above size starting at 0, except the last slice will be 64 # smaller. 65 start = 0 66 limit = numpy_input.shape[0] 67 while start < limit: 68 end = min(start + batch_size_per_slice, limit) 69 session.run(assign_slice_op, feed_dict={ 70 start_placeholder: start, 71 end_placeholder: end, 72 slice_placeholder: numpy_input[start:end]}) 73 start = end 74 75 76def one_host_numpy_dataset(numpy_input, colocate_with, session): 77 """Create a dataset on `colocate_with` from `numpy_input`.""" 78 def create_colocated_variable(next_creator, *args, **kwargs): 79 kwargs["colocate_with"] = colocate_with 80 return next_creator(*args, **kwargs) 81 82 numpy_flat = nest.flatten(numpy_input) 83 with variable_scope.variable_creator_scope(create_colocated_variable): 84 vars_flat = tuple(variable_scope.variable(array_ops.zeros(i.shape, i.dtype), 85 trainable=False) 86 for i in numpy_flat) 87 for v, i in zip(vars_flat, numpy_flat): 88 init_var_from_numpy(v, i, session) 89 vars_nested = nest.pack_sequence_as(numpy_input, vars_flat) 90 return dataset_ops.Dataset.from_tensor_slices(vars_nested) 91 92 93class SingleDevice(object): 94 """Used with `colocate_with` to create a non-mirrored variable.""" 95 96 def __init__(self, device): 97 self.device = device 98