• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Student's t distribution class."""
16
17from __future__ import absolute_import
18from __future__ import division
19from __future__ import print_function
20
21import numpy as np
22
23from tensorflow.python.framework import constant_op
24from tensorflow.python.framework import dtypes
25from tensorflow.python.framework import ops
26from tensorflow.python.framework import tensor_shape
27from tensorflow.python.ops import array_ops
28from tensorflow.python.ops import check_ops
29from tensorflow.python.ops import control_flow_ops
30from tensorflow.python.ops import math_ops
31from tensorflow.python.ops import nn
32from tensorflow.python.ops import random_ops
33from tensorflow.python.ops import special_math_ops
34from tensorflow.python.ops.distributions import distribution
35from tensorflow.python.ops.distributions import util as distribution_util
36from tensorflow.python.util import deprecation
37from tensorflow.python.util.tf_export import tf_export
38
39
40__all__ = [
41    "StudentT",
42    "StudentTWithAbsDfSoftplusScale",
43]
44
45
46@tf_export(v1=["distributions.StudentT"])
47class StudentT(distribution.Distribution):
48  """Student's t-distribution.
49
50  This distribution has parameters: degree of freedom `df`, location `loc`,
51  and `scale`.
52
53  #### Mathematical details
54
55  The probability density function (pdf) is,
56
57  ```none
58  pdf(x; df, mu, sigma) = (1 + y**2 / df)**(-0.5 (df + 1)) / Z
59  where,
60  y = (x - mu) / sigma
61  Z = abs(sigma) sqrt(df pi) Gamma(0.5 df) / Gamma(0.5 (df + 1))
62  ```
63
64  where:
65  * `loc = mu`,
66  * `scale = sigma`, and,
67  * `Z` is the normalization constant, and,
68  * `Gamma` is the [gamma function](
69    https://en.wikipedia.org/wiki/Gamma_function).
70
71  The StudentT distribution is a member of the [location-scale family](
72  https://en.wikipedia.org/wiki/Location-scale_family), i.e., it can be
73  constructed as,
74
75  ```none
76  X ~ StudentT(df, loc=0, scale=1)
77  Y = loc + scale * X
78  ```
79
80  Notice that `scale` has semantics more similar to standard deviation than
81  variance. However it is not actually the std. deviation; the Student's
82  t-distribution std. dev. is `scale sqrt(df / (df - 2))` when `df > 2`.
83
84  Samples of this distribution are reparameterized (pathwise differentiable).
85  The derivatives are computed using the approach described in
86  (Figurnov et al., 2018).
87
88  #### Examples
89
90  Examples of initialization of one or a batch of distributions.
91
92  ```python
93  import tensorflow_probability as tfp
94  tfd = tfp.distributions
95
96  # Define a single scalar Student t distribution.
97  single_dist = tfd.StudentT(df=3)
98
99  # Evaluate the pdf at 1, returning a scalar Tensor.
100  single_dist.prob(1.)
101
102  # Define a batch of two scalar valued Student t's.
103  # The first has degrees of freedom 2, mean 1, and scale 11.
104  # The second 3, 2 and 22.
105  multi_dist = tfd.StudentT(df=[2, 3], loc=[1, 2.], scale=[11, 22.])
106
107  # Evaluate the pdf of the first distribution on 0, and the second on 1.5,
108  # returning a length two tensor.
109  multi_dist.prob([0, 1.5])
110
111  # Get 3 samples, returning a 3 x 2 tensor.
112  multi_dist.sample(3)
113  ```
114
115  Arguments are broadcast when possible.
116
117  ```python
118  # Define a batch of two Student's t distributions.
119  # Both have df 2 and mean 1, but different scales.
120  dist = tfd.StudentT(df=2, loc=1, scale=[11, 22.])
121
122  # Evaluate the pdf of both distributions on the same point, 3.0,
123  # returning a length 2 tensor.
124  dist.prob(3.0)
125  ```
126
127  Compute the gradients of samples w.r.t. the parameters:
128
129  ```python
130  df = tf.constant(2.0)
131  loc = tf.constant(2.0)
132  scale = tf.constant(11.0)
133  dist = tfd.StudentT(df=df, loc=loc, scale=scale)
134  samples = dist.sample(5)  # Shape [5]
135  loss = tf.reduce_mean(tf.square(samples))  # Arbitrary loss function
136  # Unbiased stochastic gradients of the loss function
137  grads = tf.gradients(loss, [df, loc, scale])
138  ```
139
140  References:
141    Implicit Reparameterization Gradients:
142      [Figurnov et al., 2018]
143      (http://papers.nips.cc/paper/7326-implicit-reparameterization-gradients)
144      ([pdf](http://papers.nips.cc/paper/7326-implicit-reparameterization-gradients.pdf))
145  """
146
147  @deprecation.deprecated(
148      "2019-01-01",
149      "The TensorFlow Distributions library has moved to "
150      "TensorFlow Probability "
151      "(https://github.com/tensorflow/probability). You "
152      "should update all references to use `tfp.distributions` "
153      "instead of `tf.distributions`.",
154      warn_once=True)
155  def __init__(self,
156               df,
157               loc,
158               scale,
159               validate_args=False,
160               allow_nan_stats=True,
161               name="StudentT"):
162    """Construct Student's t distributions.
163
164    The distributions have degree of freedom `df`, mean `loc`, and scale
165    `scale`.
166
167    The parameters `df`, `loc`, and `scale` must be shaped in a way that
168    supports broadcasting (e.g. `df + loc + scale` is a valid operation).
169
170    Args:
171      df: Floating-point `Tensor`. The degrees of freedom of the
172        distribution(s). `df` must contain only positive values.
173      loc: Floating-point `Tensor`. The mean(s) of the distribution(s).
174      scale: Floating-point `Tensor`. The scaling factor(s) for the
175        distribution(s). Note that `scale` is not technically the standard
176        deviation of this distribution but has semantics more similar to
177        standard deviation than variance.
178      validate_args: Python `bool`, default `False`. When `True` distribution
179        parameters are checked for validity despite possibly degrading runtime
180        performance. When `False` invalid inputs may silently render incorrect
181        outputs.
182      allow_nan_stats: Python `bool`, default `True`. When `True`,
183        statistics (e.g., mean, mode, variance) use the value "`NaN`" to
184        indicate the result is undefined. When `False`, an exception is raised
185        if one or more of the statistic's batch members are undefined.
186      name: Python `str` name prefixed to Ops created by this class.
187
188    Raises:
189      TypeError: if loc and scale are different dtypes.
190    """
191    parameters = dict(locals())
192    with ops.name_scope(name, values=[df, loc, scale]) as name:
193      with ops.control_dependencies([check_ops.assert_positive(df)]
194                                    if validate_args else []):
195        self._df = array_ops.identity(df, name="df")
196        self._loc = array_ops.identity(loc, name="loc")
197        self._scale = array_ops.identity(scale, name="scale")
198        check_ops.assert_same_float_dtype(
199            (self._df, self._loc, self._scale))
200    super(StudentT, self).__init__(
201        dtype=self._scale.dtype,
202        reparameterization_type=distribution.FULLY_REPARAMETERIZED,
203        validate_args=validate_args,
204        allow_nan_stats=allow_nan_stats,
205        parameters=parameters,
206        graph_parents=[self._df, self._loc, self._scale],
207        name=name)
208
209  @staticmethod
210  def _param_shapes(sample_shape):
211    return dict(
212        zip(("df", "loc", "scale"), (
213            [ops.convert_to_tensor(
214                sample_shape, dtype=dtypes.int32)] * 3)))
215
216  @property
217  def df(self):
218    """Degrees of freedom in these Student's t distribution(s)."""
219    return self._df
220
221  @property
222  def loc(self):
223    """Locations of these Student's t distribution(s)."""
224    return self._loc
225
226  @property
227  def scale(self):
228    """Scaling factors of these Student's t distribution(s)."""
229    return self._scale
230
231  def _batch_shape_tensor(self):
232    return array_ops.broadcast_dynamic_shape(
233        array_ops.shape(self.df),
234        array_ops.broadcast_dynamic_shape(
235            array_ops.shape(self.loc), array_ops.shape(self.scale)))
236
237  def _batch_shape(self):
238    return array_ops.broadcast_static_shape(
239        array_ops.broadcast_static_shape(self.df.get_shape(),
240                                         self.loc.get_shape()),
241        self.scale.get_shape())
242
243  def _event_shape_tensor(self):
244    return constant_op.constant([], dtype=math_ops.int32)
245
246  def _event_shape(self):
247    return tensor_shape.TensorShape([])
248
249  def _sample_n(self, n, seed=None):
250    # The sampling method comes from the fact that if:
251    #   X ~ Normal(0, 1)
252    #   Z ~ Chi2(df)
253    #   Y = X / sqrt(Z / df)
254    # then:
255    #   Y ~ StudentT(df).
256    shape = array_ops.concat([[n], self.batch_shape_tensor()], 0)
257    normal_sample = random_ops.random_normal(shape, dtype=self.dtype, seed=seed)
258    df = self.df * array_ops.ones(self.batch_shape_tensor(), dtype=self.dtype)
259    gamma_sample = random_ops.random_gamma(
260        [n],
261        0.5 * df,
262        beta=0.5,
263        dtype=self.dtype,
264        seed=distribution_util.gen_new_seed(seed, salt="student_t"))
265    samples = normal_sample * math_ops.rsqrt(gamma_sample / df)
266    return samples * self.scale + self.loc  # Abs(scale) not wanted.
267
268  def _log_prob(self, x):
269    return self._log_unnormalized_prob(x) - self._log_normalization()
270
271  def _log_unnormalized_prob(self, x):
272    y = (x - self.loc) / self.scale  # Abs(scale) superfluous.
273    return -0.5 * (self.df + 1.) * math_ops.log1p(y**2. / self.df)
274
275  def _log_normalization(self):
276    return (math_ops.log(math_ops.abs(self.scale)) +
277            0.5 * math_ops.log(self.df) +
278            0.5 * np.log(np.pi) +
279            math_ops.lgamma(0.5 * self.df) -
280            math_ops.lgamma(0.5 * (self.df + 1.)))
281
282  def _cdf(self, x):
283    # Take Abs(scale) to make subsequent where work correctly.
284    y = (x - self.loc) / math_ops.abs(self.scale)
285    x_t = self.df / (y**2. + self.df)
286    neg_cdf = 0.5 * math_ops.betainc(0.5 * self.df, 0.5, x_t)
287    return array_ops.where_v2(math_ops.less(y, 0.), neg_cdf, 1. - neg_cdf)
288
289  def _entropy(self):
290    v = array_ops.ones(self.batch_shape_tensor(),
291                       dtype=self.dtype)[..., array_ops.newaxis]
292    u = v * self.df[..., array_ops.newaxis]
293    beta_arg = array_ops.concat([u, v], -1) / 2.
294    return (math_ops.log(math_ops.abs(self.scale)) +
295            0.5 * math_ops.log(self.df) +
296            special_math_ops.lbeta(beta_arg) +
297            0.5 * (self.df + 1.) *
298            (math_ops.digamma(0.5 * (self.df + 1.)) -
299             math_ops.digamma(0.5 * self.df)))
300
301  @distribution_util.AppendDocstring(
302      """The mean of Student's T equals `loc` if `df > 1`, otherwise it is
303      `NaN`. If `self.allow_nan_stats=True`, then an exception will be raised
304      rather than returning `NaN`.""")
305  def _mean(self):
306    mean = self.loc * array_ops.ones(self.batch_shape_tensor(),
307                                     dtype=self.dtype)
308    if self.allow_nan_stats:
309      nan = np.array(np.nan, dtype=self.dtype.as_numpy_dtype())
310      return array_ops.where_v2(
311          math_ops.greater(
312              self.df,
313              array_ops.ones(self.batch_shape_tensor(), dtype=self.dtype)),
314          mean, array_ops.fill(self.batch_shape_tensor(), nan, name="nan"))
315    else:
316      return control_flow_ops.with_dependencies(
317          [
318              check_ops.assert_less(
319                  array_ops.ones([], dtype=self.dtype),
320                  self.df,
321                  message="mean not defined for components of df <= 1"),
322          ],
323          mean)
324
325  @distribution_util.AppendDocstring("""
326      The variance for Student's T equals
327
328      ```
329      df / (df - 2), when df > 2
330      infinity, when 1 < df <= 2
331      NaN, when df <= 1
332      ```
333      """)
334  def _variance(self):
335    # We need to put the tf.where inside the outer tf.where to ensure we never
336    # hit a NaN in the gradient.
337    denom = array_ops.where_v2(
338        math_ops.greater(self.df, 2.), self.df - 2.,
339        array_ops.ones_like(self.df))
340    # Abs(scale) superfluous.
341    var = (array_ops.ones(self.batch_shape_tensor(), dtype=self.dtype) *
342           math_ops.square(self.scale) * self.df / denom)
343    # When 1 < df <= 2, variance is infinite.
344    inf = np.array(np.inf, dtype=self.dtype.as_numpy_dtype())
345    result_where_defined = array_ops.where_v2(
346        self.df > array_ops.fill(self.batch_shape_tensor(), 2.), var,
347        array_ops.fill(self.batch_shape_tensor(), inf, name="inf"))
348
349    if self.allow_nan_stats:
350      nan = np.array(np.nan, dtype=self.dtype.as_numpy_dtype())
351      return array_ops.where_v2(
352          math_ops.greater(
353              self.df,
354              array_ops.ones(self.batch_shape_tensor(), dtype=self.dtype)),
355          result_where_defined,
356          array_ops.fill(self.batch_shape_tensor(), nan, name="nan"))
357    else:
358      return control_flow_ops.with_dependencies(
359          [
360              check_ops.assert_less(
361                  array_ops.ones([], dtype=self.dtype),
362                  self.df,
363                  message="variance not defined for components of df <= 1"),
364          ],
365          result_where_defined)
366
367  def _mode(self):
368    return array_ops.identity(self.loc)
369
370
371class StudentTWithAbsDfSoftplusScale(StudentT):
372  """StudentT with `df = floor(abs(df))` and `scale = softplus(scale)`."""
373
374  @deprecation.deprecated(
375      "2019-01-01",
376      "Use `tfd.StudentT(tf.floor(tf.abs(df)), loc, "
377      "tf.nn.softplus(scale)) instead.",
378      warn_once=True)
379  def __init__(self,
380               df,
381               loc,
382               scale,
383               validate_args=False,
384               allow_nan_stats=True,
385               name="StudentTWithAbsDfSoftplusScale"):
386    parameters = dict(locals())
387    with ops.name_scope(name, values=[df, scale]) as name:
388      super(StudentTWithAbsDfSoftplusScale, self).__init__(
389          df=math_ops.floor(math_ops.abs(df)),
390          loc=loc,
391          scale=nn.softplus(scale, name="softplus_scale"),
392          validate_args=validate_args,
393          allow_nan_stats=allow_nan_stats,
394          name=name)
395    self._parameters = parameters
396