# Copyright 2015 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Operations often used for initializing tensors. All variable initializers returned by functions in this file should have the following signature: def _initializer(shape, dtype=dtypes.float32): Args: shape: List of `int` representing the shape of the output `Tensor`. Some initializers may also be able to accept a `Tensor`. dtype: (Optional) Type of the output `Tensor`. Returns: A `Tensor` of type `dtype` and `shape`. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import math import numpy as np from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.ops import array_ops from tensorflow.python.ops import gen_linalg_ops from tensorflow.python.ops import linalg_ops_impl from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops from tensorflow.python.ops import stateless_random_ops from tensorflow.python.util.tf_export import tf_export class Initializer(object): """Initializer base class: all initializers inherit from this class. """ def __call__(self, shape, dtype=None): """Returns a tensor object initialized as specified by the initializer. Args: shape: Shape of the tensor. dtype: Optional dtype of the tensor. If not provided will return tensor of `tf.float32`. """ raise NotImplementedError def get_config(self): """Returns the configuration of the initializer as a JSON-serializable dict. Returns: A JSON-serializable Python dict. """ return {} @classmethod def from_config(cls, config): """Instantiates an initializer from a configuration dictionary. Example: ```python initializer = RandomUniform(-1, 1) config = initializer.get_config() initializer = RandomUniform.from_config(config) ``` Args: config: A Python dictionary. It will typically be the output of `get_config`. Returns: An Initializer instance. """ config.pop("dtype", None) return cls(**config) @tf_export("zeros_initializer", v1=[]) class Zeros(Initializer): """Initializer that generates tensors initialized to 0. Initializers allow you to pre-specify an initialization strategy, encoded in the Initializer object, without knowing the shape and dtype of the variable being initialized. Examples: >>> def make_variables(k, initializer): ... return (tf.Variable(initializer(shape=[k], dtype=tf.float32)), ... tf.Variable(initializer(shape=[k, k], dtype=tf.float32))) >>> v1, v2 = make_variables(3, tf.zeros_initializer()) >>> v1 >>> v2 >>> make_variables(4, tf.random_uniform_initializer(minval=-1., maxval=1.)) (, >> def make_variables(k, initializer): ... return (tf.Variable(initializer(shape=[k], dtype=tf.float32)), ... tf.Variable(initializer(shape=[k, k], dtype=tf.float32))) >>> v1, v2 = make_variables(3, tf.ones_initializer()) >>> v1 >>> v2 >>> make_variables(4, tf.random_uniform_initializer(minval=-1., maxval=1.)) (, >> def make_variables(k, initializer): ... return (tf.Variable(initializer(shape=[k], dtype=tf.float32)), ... tf.Variable(initializer(shape=[k, k], dtype=tf.float32))) >>> v1, v2 = make_variables(3, tf.constant_initializer(2.)) >>> v1 >>> v2 >>> make_variables(4, tf.random_uniform_initializer(minval=-1., maxval=1.)) (, >> value = [0, 1, 2, 3, 4, 5, 6, 7] >>> init = tf.constant_initializer(value) >>> # Fitting shape >>> tf.Variable(init(shape=[2, 4], dtype=tf.float32)) >>> # Larger shape >>> tf.Variable(init(shape=[3, 4], dtype=tf.float32)) Traceback (most recent call last): ... TypeError: ...value has 8 elements, shape is (3, 4) with 12 elements... >>> # Smaller shape >>> tf.Variable(init(shape=[2, 3], dtype=tf.float32)) Traceback (most recent call last): ... TypeError: ...value has 8 elements, shape is (2, 3) with 6 elements... Args: value: A Python scalar, list or tuple of values, or a N-dimensional numpy array. All elements of the initialized variable will be set to the corresponding value in the `value` argument. Raises: TypeError: If the input `value` is not one of the expected types. """ def __init__(self, value=0): if not (np.isscalar(value) or isinstance(value, (list, tuple, np.ndarray))): raise TypeError( "Invalid type for initial value: %s (expected Python scalar, list or " "tuple of values, or numpy.ndarray)." % type(value)) self.value = value def __call__(self, shape, dtype=None): """Returns a tensor object initialized as specified by the initializer. Args: shape: Shape of the tensor. dtype: Optional dtype of the tensor. If not provided the dtype of the tensor created will be the type of the inital value. Raises: TypeError: If the initializer cannot create a tensor of the requested dtype. """ if dtype is not None: dtype = dtypes.as_dtype(dtype) return constant_op.constant( self.value, dtype=dtype, shape=shape) def get_config(self): return {"value": self.value} @tf_export("random_uniform_initializer", v1=[]) class RandomUniform(Initializer): """Initializer that generates tensors with a uniform distribution. Initializers allow you to pre-specify an initialization strategy, encoded in the Initializer object, without knowing the shape and dtype of the variable being initialized. Examples: >>> def make_variables(k, initializer): ... return (tf.Variable(initializer(shape=[k], dtype=tf.float32)), ... tf.Variable(initializer(shape=[k, k], dtype=tf.float32))) >>> v1, v2 = make_variables(3, tf.ones_initializer()) >>> v1 >>> v2 >>> make_variables(4, tf.random_uniform_initializer(minval=-1., maxval=1.)) (, >> def make_variables(k, initializer): ... return (tf.Variable(initializer(shape=[k], dtype=tf.float32)), ... tf.Variable(initializer(shape=[k, k], dtype=tf.float32))) >>> v1, v2 = make_variables(3, ... tf.random_normal_initializer(mean=1., stddev=2.)) >>> v1 >>> v2 >> make_variables(4, tf.random_uniform_initializer(minval=-1., maxval=1.)) (, >> def make_variables(k, initializer): ... return (tf.Variable(initializer(shape=[k], dtype=tf.float32)), ... tf.Variable(initializer(shape=[k, k], dtype=tf.float32))) >>> v1, v2 = make_variables( ... 3, tf.initializers.TruncatedNormal(mean=1., stddev=2.)) >>> v1 >>> v2 >> make_variables(4, tf.initializers.RandomUniform(minval=-1., maxval=1.)) (, >> def make_variables(k, initializer): ... return (tf.Variable(initializer(shape=[k], dtype=tf.float32)), ... tf.Variable(initializer(shape=[k, k], dtype=tf.float32))) >>> v1, v2 = make_variables(3, tf.initializers.VarianceScaling(scale=1.)) >>> v1 >>> v2 >> make_variables(4, tf.initializers.VarianceScaling(distribution='uniform')) (, >> def make_variables(k, initializer): ... return (tf.Variable(initializer(shape=[k, k], dtype=tf.float32)), ... tf.Variable(initializer(shape=[k, k, k], dtype=tf.float32))) >>> v1, v2 = make_variables(3, tf.initializers.Orthogonal()) >>> v1 >> v2 >> make_variables(4, tf.initializers.Orthogonal(gain=0.5)) (>> def make_variable(k, initializer): ... return tf.Variable(initializer(shape=[k, k], dtype=tf.float32)) >>> make_variable(2, tf.initializers.Identity()) >>> make_variable(3, tf.initializers.Identity(gain=0.5)) Args: gain: Multiplicative factor to apply to the identity matrix. """ def __init__(self, gain=1.0): self.gain = gain def __call__(self, shape, dtype=dtypes.float32): """Returns a tensor object initialized as specified by the initializer. Args: shape: Shape of the tensor. dtype: Optional dtype of the tensor. Only floating point types are supported. Raises: ValueError: If the dtype is not floating point ValueError: If the requested shape does not have exactly two axes. """ partition_info = None # Keeps logic so can be readded later if necessary dtype = _assert_float_dtype(dtype) full_shape = shape if partition_info is None else partition_info.full_shape if len(full_shape) != 2: raise ValueError( "Identity matrix initializer can only be used for 2D matrices.") initializer = linalg_ops_impl.eye(*full_shape, dtype=dtype) if partition_info is not None: initializer = array_ops.slice(initializer, partition_info.var_offset, shape) return self.gain * initializer def get_config(self): return {"gain": self.gain} class GlorotUniform(VarianceScaling): """The Glorot uniform initializer, also called Xavier uniform initializer. Initializers allow you to pre-specify an initialization strategy, encoded in the Initializer object, without knowing the shape and dtype of the variable being initialized. Draws samples from a uniform distribution within [-limit, limit] where `limit` is `sqrt(6 / (fan_in + fan_out))` where `fan_in` is the number of input units in the weight tensor and `fan_out` is the number of output units in the weight tensor. Examples: >>> def make_variables(k, initializer): ... return (tf.Variable(initializer(shape=[k, k], dtype=tf.float32)), ... tf.Variable(initializer(shape=[k, k, k], dtype=tf.float32))) >>> v1, v2 = make_variables(3, tf.initializers.GlorotUniform()) >>> v1 >> v2 >> make_variables(4, tf.initializers.RandomNormal()) (>> def make_variables(k, initializer): ... return (tf.Variable(initializer(shape=[k, k], dtype=tf.float32)), ... tf.Variable(initializer(shape=[k, k, k], dtype=tf.float32))) >>> v1, v2 = make_variables(3, tf.initializers.GlorotNormal()) >>> v1 >> v2 >> make_variables(4, tf.initializers.RandomNormal()) (>> def make_variables(k, initializer): ... return (tf.Variable(initializer(shape=[k, k], dtype=tf.float32)), ... tf.Variable(initializer(shape=[k, k, k], dtype=tf.float32))) >>> v1, v2 = make_variables(3, tf.initializers.lecun_normal()) >>> v1 >> v2 >> make_variables(4, tf.initializers.RandomNormal()) (>> def make_variables(k, initializer): ... return (tf.Variable(initializer(shape=[k, k], dtype=tf.float32)), ... tf.Variable(initializer(shape=[k, k, k], dtype=tf.float32))) >>> v1, v2 = make_variables(3, tf.initializers.lecun_uniform()) >>> v1 >> v2 >> make_variables(4, tf.initializers.RandomNormal()) (>> def make_variables(k, initializer): ... return (tf.Variable(initializer(shape=[k, k], dtype=tf.float32)), ... tf.Variable(initializer(shape=[k, k, k], dtype=tf.float32))) >>> v1, v2 = make_variables(3, tf.initializers.he_normal()) >>> v1 >> v2 >> make_variables(4, tf.initializers.RandomNormal()) (>> def make_variables(k, initializer): ... return (tf.Variable(initializer(shape=[k, k], dtype=tf.float32)), ... tf.Variable(initializer(shape=[k, k, k], dtype=tf.float32))) >>> v1, v2 = make_variables(3, tf.initializers.he_uniform()) >>> v1 >> v2 >> make_variables(4, tf.initializers.RandomNormal()) (