1# Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""Student's t distribution class.""" 16 17from __future__ import absolute_import 18from __future__ import division 19from __future__ import print_function 20 21import numpy as np 22 23from tensorflow.python.framework import constant_op 24from tensorflow.python.framework import dtypes 25from tensorflow.python.framework import ops 26from tensorflow.python.framework import tensor_shape 27from tensorflow.python.ops import array_ops 28from tensorflow.python.ops import check_ops 29from tensorflow.python.ops import control_flow_ops 30from tensorflow.python.ops import math_ops 31from tensorflow.python.ops import nn 32from tensorflow.python.ops import random_ops 33from tensorflow.python.ops import special_math_ops 34from tensorflow.python.ops.distributions import distribution 35from tensorflow.python.ops.distributions import util as distribution_util 36from tensorflow.python.util import deprecation 37from tensorflow.python.util.tf_export import tf_export 38 39 40__all__ = [ 41 "StudentT", 42 "StudentTWithAbsDfSoftplusScale", 43] 44 45 46@tf_export(v1=["distributions.StudentT"]) 47class StudentT(distribution.Distribution): 48 """Student's t-distribution. 49 50 This distribution has parameters: degree of freedom `df`, location `loc`, 51 and `scale`. 52 53 #### Mathematical details 54 55 The probability density function (pdf) is, 56 57 ```none 58 pdf(x; df, mu, sigma) = (1 + y**2 / df)**(-0.5 (df + 1)) / Z 59 where, 60 y = (x - mu) / sigma 61 Z = abs(sigma) sqrt(df pi) Gamma(0.5 df) / Gamma(0.5 (df + 1)) 62 ``` 63 64 where: 65 * `loc = mu`, 66 * `scale = sigma`, and, 67 * `Z` is the normalization constant, and, 68 * `Gamma` is the [gamma function]( 69 https://en.wikipedia.org/wiki/Gamma_function). 70 71 The StudentT distribution is a member of the [location-scale family]( 72 https://en.wikipedia.org/wiki/Location-scale_family), i.e., it can be 73 constructed as, 74 75 ```none 76 X ~ StudentT(df, loc=0, scale=1) 77 Y = loc + scale * X 78 ``` 79 80 Notice that `scale` has semantics more similar to standard deviation than 81 variance. However it is not actually the std. deviation; the Student's 82 t-distribution std. dev. is `scale sqrt(df / (df - 2))` when `df > 2`. 83 84 Samples of this distribution are reparameterized (pathwise differentiable). 85 The derivatives are computed using the approach described in 86 (Figurnov et al., 2018). 87 88 #### Examples 89 90 Examples of initialization of one or a batch of distributions. 91 92 ```python 93 import tensorflow_probability as tfp 94 tfd = tfp.distributions 95 96 # Define a single scalar Student t distribution. 97 single_dist = tfd.StudentT(df=3) 98 99 # Evaluate the pdf at 1, returning a scalar Tensor. 100 single_dist.prob(1.) 101 102 # Define a batch of two scalar valued Student t's. 103 # The first has degrees of freedom 2, mean 1, and scale 11. 104 # The second 3, 2 and 22. 105 multi_dist = tfd.StudentT(df=[2, 3], loc=[1, 2.], scale=[11, 22.]) 106 107 # Evaluate the pdf of the first distribution on 0, and the second on 1.5, 108 # returning a length two tensor. 109 multi_dist.prob([0, 1.5]) 110 111 # Get 3 samples, returning a 3 x 2 tensor. 112 multi_dist.sample(3) 113 ``` 114 115 Arguments are broadcast when possible. 116 117 ```python 118 # Define a batch of two Student's t distributions. 119 # Both have df 2 and mean 1, but different scales. 120 dist = tfd.StudentT(df=2, loc=1, scale=[11, 22.]) 121 122 # Evaluate the pdf of both distributions on the same point, 3.0, 123 # returning a length 2 tensor. 124 dist.prob(3.0) 125 ``` 126 127 Compute the gradients of samples w.r.t. the parameters: 128 129 ```python 130 df = tf.constant(2.0) 131 loc = tf.constant(2.0) 132 scale = tf.constant(11.0) 133 dist = tfd.StudentT(df=df, loc=loc, scale=scale) 134 samples = dist.sample(5) # Shape [5] 135 loss = tf.reduce_mean(tf.square(samples)) # Arbitrary loss function 136 # Unbiased stochastic gradients of the loss function 137 grads = tf.gradients(loss, [df, loc, scale]) 138 ``` 139 140 References: 141 Implicit Reparameterization Gradients: 142 [Figurnov et al., 2018] 143 (http://papers.nips.cc/paper/7326-implicit-reparameterization-gradients) 144 ([pdf](http://papers.nips.cc/paper/7326-implicit-reparameterization-gradients.pdf)) 145 """ 146 147 @deprecation.deprecated( 148 "2019-01-01", 149 "The TensorFlow Distributions library has moved to " 150 "TensorFlow Probability " 151 "(https://github.com/tensorflow/probability). You " 152 "should update all references to use `tfp.distributions` " 153 "instead of `tf.distributions`.", 154 warn_once=True) 155 def __init__(self, 156 df, 157 loc, 158 scale, 159 validate_args=False, 160 allow_nan_stats=True, 161 name="StudentT"): 162 """Construct Student's t distributions. 163 164 The distributions have degree of freedom `df`, mean `loc`, and scale 165 `scale`. 166 167 The parameters `df`, `loc`, and `scale` must be shaped in a way that 168 supports broadcasting (e.g. `df + loc + scale` is a valid operation). 169 170 Args: 171 df: Floating-point `Tensor`. The degrees of freedom of the 172 distribution(s). `df` must contain only positive values. 173 loc: Floating-point `Tensor`. The mean(s) of the distribution(s). 174 scale: Floating-point `Tensor`. The scaling factor(s) for the 175 distribution(s). Note that `scale` is not technically the standard 176 deviation of this distribution but has semantics more similar to 177 standard deviation than variance. 178 validate_args: Python `bool`, default `False`. When `True` distribution 179 parameters are checked for validity despite possibly degrading runtime 180 performance. When `False` invalid inputs may silently render incorrect 181 outputs. 182 allow_nan_stats: Python `bool`, default `True`. When `True`, 183 statistics (e.g., mean, mode, variance) use the value "`NaN`" to 184 indicate the result is undefined. When `False`, an exception is raised 185 if one or more of the statistic's batch members are undefined. 186 name: Python `str` name prefixed to Ops created by this class. 187 188 Raises: 189 TypeError: if loc and scale are different dtypes. 190 """ 191 parameters = dict(locals()) 192 with ops.name_scope(name, values=[df, loc, scale]) as name: 193 with ops.control_dependencies([check_ops.assert_positive(df)] 194 if validate_args else []): 195 self._df = array_ops.identity(df, name="df") 196 self._loc = array_ops.identity(loc, name="loc") 197 self._scale = array_ops.identity(scale, name="scale") 198 check_ops.assert_same_float_dtype( 199 (self._df, self._loc, self._scale)) 200 super(StudentT, self).__init__( 201 dtype=self._scale.dtype, 202 reparameterization_type=distribution.FULLY_REPARAMETERIZED, 203 validate_args=validate_args, 204 allow_nan_stats=allow_nan_stats, 205 parameters=parameters, 206 graph_parents=[self._df, self._loc, self._scale], 207 name=name) 208 209 @staticmethod 210 def _param_shapes(sample_shape): 211 return dict( 212 zip(("df", "loc", "scale"), ( 213 [ops.convert_to_tensor( 214 sample_shape, dtype=dtypes.int32)] * 3))) 215 216 @property 217 def df(self): 218 """Degrees of freedom in these Student's t distribution(s).""" 219 return self._df 220 221 @property 222 def loc(self): 223 """Locations of these Student's t distribution(s).""" 224 return self._loc 225 226 @property 227 def scale(self): 228 """Scaling factors of these Student's t distribution(s).""" 229 return self._scale 230 231 def _batch_shape_tensor(self): 232 return array_ops.broadcast_dynamic_shape( 233 array_ops.shape(self.df), 234 array_ops.broadcast_dynamic_shape( 235 array_ops.shape(self.loc), array_ops.shape(self.scale))) 236 237 def _batch_shape(self): 238 return array_ops.broadcast_static_shape( 239 array_ops.broadcast_static_shape(self.df.get_shape(), 240 self.loc.get_shape()), 241 self.scale.get_shape()) 242 243 def _event_shape_tensor(self): 244 return constant_op.constant([], dtype=math_ops.int32) 245 246 def _event_shape(self): 247 return tensor_shape.TensorShape([]) 248 249 def _sample_n(self, n, seed=None): 250 # The sampling method comes from the fact that if: 251 # X ~ Normal(0, 1) 252 # Z ~ Chi2(df) 253 # Y = X / sqrt(Z / df) 254 # then: 255 # Y ~ StudentT(df). 256 shape = array_ops.concat([[n], self.batch_shape_tensor()], 0) 257 normal_sample = random_ops.random_normal(shape, dtype=self.dtype, seed=seed) 258 df = self.df * array_ops.ones(self.batch_shape_tensor(), dtype=self.dtype) 259 gamma_sample = random_ops.random_gamma( 260 [n], 261 0.5 * df, 262 beta=0.5, 263 dtype=self.dtype, 264 seed=distribution_util.gen_new_seed(seed, salt="student_t")) 265 samples = normal_sample * math_ops.rsqrt(gamma_sample / df) 266 return samples * self.scale + self.loc # Abs(scale) not wanted. 267 268 def _log_prob(self, x): 269 return self._log_unnormalized_prob(x) - self._log_normalization() 270 271 def _log_unnormalized_prob(self, x): 272 y = (x - self.loc) / self.scale # Abs(scale) superfluous. 273 return -0.5 * (self.df + 1.) * math_ops.log1p(y**2. / self.df) 274 275 def _log_normalization(self): 276 return (math_ops.log(math_ops.abs(self.scale)) + 277 0.5 * math_ops.log(self.df) + 278 0.5 * np.log(np.pi) + 279 math_ops.lgamma(0.5 * self.df) - 280 math_ops.lgamma(0.5 * (self.df + 1.))) 281 282 def _cdf(self, x): 283 # Take Abs(scale) to make subsequent where work correctly. 284 y = (x - self.loc) / math_ops.abs(self.scale) 285 x_t = self.df / (y**2. + self.df) 286 neg_cdf = 0.5 * math_ops.betainc(0.5 * self.df, 0.5, x_t) 287 return array_ops.where_v2(math_ops.less(y, 0.), neg_cdf, 1. - neg_cdf) 288 289 def _entropy(self): 290 v = array_ops.ones(self.batch_shape_tensor(), 291 dtype=self.dtype)[..., array_ops.newaxis] 292 u = v * self.df[..., array_ops.newaxis] 293 beta_arg = array_ops.concat([u, v], -1) / 2. 294 return (math_ops.log(math_ops.abs(self.scale)) + 295 0.5 * math_ops.log(self.df) + 296 special_math_ops.lbeta(beta_arg) + 297 0.5 * (self.df + 1.) * 298 (math_ops.digamma(0.5 * (self.df + 1.)) - 299 math_ops.digamma(0.5 * self.df))) 300 301 @distribution_util.AppendDocstring( 302 """The mean of Student's T equals `loc` if `df > 1`, otherwise it is 303 `NaN`. If `self.allow_nan_stats=True`, then an exception will be raised 304 rather than returning `NaN`.""") 305 def _mean(self): 306 mean = self.loc * array_ops.ones(self.batch_shape_tensor(), 307 dtype=self.dtype) 308 if self.allow_nan_stats: 309 nan = np.array(np.nan, dtype=self.dtype.as_numpy_dtype()) 310 return array_ops.where_v2( 311 math_ops.greater( 312 self.df, 313 array_ops.ones(self.batch_shape_tensor(), dtype=self.dtype)), 314 mean, array_ops.fill(self.batch_shape_tensor(), nan, name="nan")) 315 else: 316 return control_flow_ops.with_dependencies( 317 [ 318 check_ops.assert_less( 319 array_ops.ones([], dtype=self.dtype), 320 self.df, 321 message="mean not defined for components of df <= 1"), 322 ], 323 mean) 324 325 @distribution_util.AppendDocstring(""" 326 The variance for Student's T equals 327 328 ``` 329 df / (df - 2), when df > 2 330 infinity, when 1 < df <= 2 331 NaN, when df <= 1 332 ``` 333 """) 334 def _variance(self): 335 # We need to put the tf.where inside the outer tf.where to ensure we never 336 # hit a NaN in the gradient. 337 denom = array_ops.where_v2( 338 math_ops.greater(self.df, 2.), self.df - 2., 339 array_ops.ones_like(self.df)) 340 # Abs(scale) superfluous. 341 var = (array_ops.ones(self.batch_shape_tensor(), dtype=self.dtype) * 342 math_ops.square(self.scale) * self.df / denom) 343 # When 1 < df <= 2, variance is infinite. 344 inf = np.array(np.inf, dtype=self.dtype.as_numpy_dtype()) 345 result_where_defined = array_ops.where_v2( 346 self.df > array_ops.fill(self.batch_shape_tensor(), 2.), var, 347 array_ops.fill(self.batch_shape_tensor(), inf, name="inf")) 348 349 if self.allow_nan_stats: 350 nan = np.array(np.nan, dtype=self.dtype.as_numpy_dtype()) 351 return array_ops.where_v2( 352 math_ops.greater( 353 self.df, 354 array_ops.ones(self.batch_shape_tensor(), dtype=self.dtype)), 355 result_where_defined, 356 array_ops.fill(self.batch_shape_tensor(), nan, name="nan")) 357 else: 358 return control_flow_ops.with_dependencies( 359 [ 360 check_ops.assert_less( 361 array_ops.ones([], dtype=self.dtype), 362 self.df, 363 message="variance not defined for components of df <= 1"), 364 ], 365 result_where_defined) 366 367 def _mode(self): 368 return array_ops.identity(self.loc) 369 370 371class StudentTWithAbsDfSoftplusScale(StudentT): 372 """StudentT with `df = floor(abs(df))` and `scale = softplus(scale)`.""" 373 374 @deprecation.deprecated( 375 "2019-01-01", 376 "Use `tfd.StudentT(tf.floor(tf.abs(df)), loc, " 377 "tf.nn.softplus(scale)) instead.", 378 warn_once=True) 379 def __init__(self, 380 df, 381 loc, 382 scale, 383 validate_args=False, 384 allow_nan_stats=True, 385 name="StudentTWithAbsDfSoftplusScale"): 386 parameters = dict(locals()) 387 with ops.name_scope(name, values=[df, scale]) as name: 388 super(StudentTWithAbsDfSoftplusScale, self).__init__( 389 df=math_ops.floor(math_ops.abs(df)), 390 loc=loc, 391 scale=nn.softplus(scale, name="softplus_scale"), 392 validate_args=validate_args, 393 allow_nan_stats=allow_nan_stats, 394 name=name) 395 self._parameters = parameters 396