1# Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""Student's t distribution class.""" 16 17import numpy as np 18 19from tensorflow.python.framework import constant_op 20from tensorflow.python.framework import dtypes 21from tensorflow.python.framework import ops 22from tensorflow.python.framework import tensor_shape 23from tensorflow.python.ops import array_ops 24from tensorflow.python.ops import check_ops 25from tensorflow.python.ops import control_flow_ops 26from tensorflow.python.ops import math_ops 27from tensorflow.python.ops import nn 28from tensorflow.python.ops import random_ops 29from tensorflow.python.ops import special_math_ops 30from tensorflow.python.ops.distributions import distribution 31from tensorflow.python.ops.distributions import util as distribution_util 32from tensorflow.python.util import deprecation 33from tensorflow.python.util.tf_export import tf_export 34 35 36__all__ = [ 37 "StudentT", 38 "StudentTWithAbsDfSoftplusScale", 39] 40 41 42@tf_export(v1=["distributions.StudentT"]) 43class StudentT(distribution.Distribution): 44 """Student's t-distribution. 45 46 This distribution has parameters: degree of freedom `df`, location `loc`, 47 and `scale`. 48 49 #### Mathematical details 50 51 The probability density function (pdf) is, 52 53 ```none 54 pdf(x; df, mu, sigma) = (1 + y**2 / df)**(-0.5 (df + 1)) / Z 55 where, 56 y = (x - mu) / sigma 57 Z = abs(sigma) sqrt(df pi) Gamma(0.5 df) / Gamma(0.5 (df + 1)) 58 ``` 59 60 where: 61 * `loc = mu`, 62 * `scale = sigma`, and, 63 * `Z` is the normalization constant, and, 64 * `Gamma` is the [gamma function]( 65 https://en.wikipedia.org/wiki/Gamma_function). 66 67 The StudentT distribution is a member of the [location-scale family]( 68 https://en.wikipedia.org/wiki/Location-scale_family), i.e., it can be 69 constructed as, 70 71 ```none 72 X ~ StudentT(df, loc=0, scale=1) 73 Y = loc + scale * X 74 ``` 75 76 Notice that `scale` has semantics more similar to standard deviation than 77 variance. However it is not actually the std. deviation; the Student's 78 t-distribution std. dev. is `scale sqrt(df / (df - 2))` when `df > 2`. 79 80 Samples of this distribution are reparameterized (pathwise differentiable). 81 The derivatives are computed using the approach described in 82 (Figurnov et al., 2018). 83 84 #### Examples 85 86 Examples of initialization of one or a batch of distributions. 87 88 ```python 89 import tensorflow_probability as tfp 90 tfd = tfp.distributions 91 92 # Define a single scalar Student t distribution. 93 single_dist = tfd.StudentT(df=3) 94 95 # Evaluate the pdf at 1, returning a scalar Tensor. 96 single_dist.prob(1.) 97 98 # Define a batch of two scalar valued Student t's. 99 # The first has degrees of freedom 2, mean 1, and scale 11. 100 # The second 3, 2 and 22. 101 multi_dist = tfd.StudentT(df=[2, 3], loc=[1, 2.], scale=[11, 22.]) 102 103 # Evaluate the pdf of the first distribution on 0, and the second on 1.5, 104 # returning a length two tensor. 105 multi_dist.prob([0, 1.5]) 106 107 # Get 3 samples, returning a 3 x 2 tensor. 108 multi_dist.sample(3) 109 ``` 110 111 Arguments are broadcast when possible. 112 113 ```python 114 # Define a batch of two Student's t distributions. 115 # Both have df 2 and mean 1, but different scales. 116 dist = tfd.StudentT(df=2, loc=1, scale=[11, 22.]) 117 118 # Evaluate the pdf of both distributions on the same point, 3.0, 119 # returning a length 2 tensor. 120 dist.prob(3.0) 121 ``` 122 123 Compute the gradients of samples w.r.t. the parameters: 124 125 ```python 126 df = tf.constant(2.0) 127 loc = tf.constant(2.0) 128 scale = tf.constant(11.0) 129 dist = tfd.StudentT(df=df, loc=loc, scale=scale) 130 samples = dist.sample(5) # Shape [5] 131 loss = tf.reduce_mean(tf.square(samples)) # Arbitrary loss function 132 # Unbiased stochastic gradients of the loss function 133 grads = tf.gradients(loss, [df, loc, scale]) 134 ``` 135 136 References: 137 Implicit Reparameterization Gradients: 138 [Figurnov et al., 2018] 139 (http://papers.nips.cc/paper/7326-implicit-reparameterization-gradients) 140 ([pdf](http://papers.nips.cc/paper/7326-implicit-reparameterization-gradients.pdf)) 141 """ 142 143 @deprecation.deprecated( 144 "2019-01-01", 145 "The TensorFlow Distributions library has moved to " 146 "TensorFlow Probability " 147 "(https://github.com/tensorflow/probability). You " 148 "should update all references to use `tfp.distributions` " 149 "instead of `tf.distributions`.", 150 warn_once=True) 151 def __init__(self, 152 df, 153 loc, 154 scale, 155 validate_args=False, 156 allow_nan_stats=True, 157 name="StudentT"): 158 """Construct Student's t distributions. 159 160 The distributions have degree of freedom `df`, mean `loc`, and scale 161 `scale`. 162 163 The parameters `df`, `loc`, and `scale` must be shaped in a way that 164 supports broadcasting (e.g. `df + loc + scale` is a valid operation). 165 166 Args: 167 df: Floating-point `Tensor`. The degrees of freedom of the 168 distribution(s). `df` must contain only positive values. 169 loc: Floating-point `Tensor`. The mean(s) of the distribution(s). 170 scale: Floating-point `Tensor`. The scaling factor(s) for the 171 distribution(s). Note that `scale` is not technically the standard 172 deviation of this distribution but has semantics more similar to 173 standard deviation than variance. 174 validate_args: Python `bool`, default `False`. When `True` distribution 175 parameters are checked for validity despite possibly degrading runtime 176 performance. When `False` invalid inputs may silently render incorrect 177 outputs. 178 allow_nan_stats: Python `bool`, default `True`. When `True`, 179 statistics (e.g., mean, mode, variance) use the value "`NaN`" to 180 indicate the result is undefined. When `False`, an exception is raised 181 if one or more of the statistic's batch members are undefined. 182 name: Python `str` name prefixed to Ops created by this class. 183 184 Raises: 185 TypeError: if loc and scale are different dtypes. 186 """ 187 parameters = dict(locals()) 188 with ops.name_scope(name, values=[df, loc, scale]) as name: 189 with ops.control_dependencies([check_ops.assert_positive(df)] 190 if validate_args else []): 191 self._df = array_ops.identity(df, name="df") 192 self._loc = array_ops.identity(loc, name="loc") 193 self._scale = array_ops.identity(scale, name="scale") 194 check_ops.assert_same_float_dtype( 195 (self._df, self._loc, self._scale)) 196 super(StudentT, self).__init__( 197 dtype=self._scale.dtype, 198 reparameterization_type=distribution.FULLY_REPARAMETERIZED, 199 validate_args=validate_args, 200 allow_nan_stats=allow_nan_stats, 201 parameters=parameters, 202 graph_parents=[self._df, self._loc, self._scale], 203 name=name) 204 205 @staticmethod 206 def _param_shapes(sample_shape): 207 return dict( 208 zip(("df", "loc", "scale"), ( 209 [ops.convert_to_tensor( 210 sample_shape, dtype=dtypes.int32)] * 3))) 211 212 @property 213 def df(self): 214 """Degrees of freedom in these Student's t distribution(s).""" 215 return self._df 216 217 @property 218 def loc(self): 219 """Locations of these Student's t distribution(s).""" 220 return self._loc 221 222 @property 223 def scale(self): 224 """Scaling factors of these Student's t distribution(s).""" 225 return self._scale 226 227 def _batch_shape_tensor(self): 228 return array_ops.broadcast_dynamic_shape( 229 array_ops.shape(self.df), 230 array_ops.broadcast_dynamic_shape( 231 array_ops.shape(self.loc), array_ops.shape(self.scale))) 232 233 def _batch_shape(self): 234 return array_ops.broadcast_static_shape( 235 array_ops.broadcast_static_shape(self.df.get_shape(), 236 self.loc.get_shape()), 237 self.scale.get_shape()) 238 239 def _event_shape_tensor(self): 240 return constant_op.constant([], dtype=math_ops.int32) 241 242 def _event_shape(self): 243 return tensor_shape.TensorShape([]) 244 245 def _sample_n(self, n, seed=None): 246 # The sampling method comes from the fact that if: 247 # X ~ Normal(0, 1) 248 # Z ~ Chi2(df) 249 # Y = X / sqrt(Z / df) 250 # then: 251 # Y ~ StudentT(df). 252 shape = array_ops.concat([[n], self.batch_shape_tensor()], 0) 253 normal_sample = random_ops.random_normal(shape, dtype=self.dtype, seed=seed) 254 df = self.df * array_ops.ones(self.batch_shape_tensor(), dtype=self.dtype) 255 gamma_sample = random_ops.random_gamma( 256 [n], 257 0.5 * df, 258 beta=0.5, 259 dtype=self.dtype, 260 seed=distribution_util.gen_new_seed(seed, salt="student_t")) 261 samples = normal_sample * math_ops.rsqrt(gamma_sample / df) 262 return samples * self.scale + self.loc # Abs(scale) not wanted. 263 264 def _log_prob(self, x): 265 return self._log_unnormalized_prob(x) - self._log_normalization() 266 267 def _log_unnormalized_prob(self, x): 268 y = (x - self.loc) / self.scale # Abs(scale) superfluous. 269 return -0.5 * (self.df + 1.) * math_ops.log1p(y**2. / self.df) 270 271 def _log_normalization(self): 272 return (math_ops.log(math_ops.abs(self.scale)) + 273 0.5 * math_ops.log(self.df) + 274 0.5 * np.log(np.pi) + 275 math_ops.lgamma(0.5 * self.df) - 276 math_ops.lgamma(0.5 * (self.df + 1.))) 277 278 def _cdf(self, x): 279 # Take Abs(scale) to make subsequent where work correctly. 280 y = (x - self.loc) / math_ops.abs(self.scale) 281 x_t = self.df / (y**2. + self.df) 282 neg_cdf = 0.5 * math_ops.betainc(0.5 * self.df, 0.5, x_t) 283 return array_ops.where_v2(math_ops.less(y, 0.), neg_cdf, 1. - neg_cdf) 284 285 def _entropy(self): 286 v = array_ops.ones(self.batch_shape_tensor(), 287 dtype=self.dtype)[..., array_ops.newaxis] 288 u = v * self.df[..., array_ops.newaxis] 289 beta_arg = array_ops.concat([u, v], -1) / 2. 290 return (math_ops.log(math_ops.abs(self.scale)) + 291 0.5 * math_ops.log(self.df) + 292 special_math_ops.lbeta(beta_arg) + 293 0.5 * (self.df + 1.) * 294 (math_ops.digamma(0.5 * (self.df + 1.)) - 295 math_ops.digamma(0.5 * self.df))) 296 297 @distribution_util.AppendDocstring( 298 """The mean of Student's T equals `loc` if `df > 1`, otherwise it is 299 `NaN`. If `self.allow_nan_stats=True`, then an exception will be raised 300 rather than returning `NaN`.""") 301 def _mean(self): 302 mean = self.loc * array_ops.ones(self.batch_shape_tensor(), 303 dtype=self.dtype) 304 if self.allow_nan_stats: 305 nan = np.array(np.nan, dtype=self.dtype.as_numpy_dtype()) 306 return array_ops.where_v2( 307 math_ops.greater( 308 self.df, 309 array_ops.ones(self.batch_shape_tensor(), dtype=self.dtype)), 310 mean, array_ops.fill(self.batch_shape_tensor(), nan, name="nan")) 311 else: 312 return control_flow_ops.with_dependencies( 313 [ 314 check_ops.assert_less( 315 array_ops.ones([], dtype=self.dtype), 316 self.df, 317 message="mean not defined for components of df <= 1"), 318 ], 319 mean) 320 321 @distribution_util.AppendDocstring(""" 322 The variance for Student's T equals 323 324 ``` 325 df / (df - 2), when df > 2 326 infinity, when 1 < df <= 2 327 NaN, when df <= 1 328 ``` 329 """) 330 def _variance(self): 331 # We need to put the tf.where inside the outer tf.where to ensure we never 332 # hit a NaN in the gradient. 333 denom = array_ops.where_v2( 334 math_ops.greater(self.df, 2.), self.df - 2., 335 array_ops.ones_like(self.df)) 336 # Abs(scale) superfluous. 337 var = (array_ops.ones(self.batch_shape_tensor(), dtype=self.dtype) * 338 math_ops.square(self.scale) * self.df / denom) 339 # When 1 < df <= 2, variance is infinite. 340 inf = np.array(np.inf, dtype=self.dtype.as_numpy_dtype()) 341 result_where_defined = array_ops.where_v2( 342 self.df > array_ops.fill(self.batch_shape_tensor(), 2.), var, 343 array_ops.fill(self.batch_shape_tensor(), inf, name="inf")) 344 345 if self.allow_nan_stats: 346 nan = np.array(np.nan, dtype=self.dtype.as_numpy_dtype()) 347 return array_ops.where_v2( 348 math_ops.greater( 349 self.df, 350 array_ops.ones(self.batch_shape_tensor(), dtype=self.dtype)), 351 result_where_defined, 352 array_ops.fill(self.batch_shape_tensor(), nan, name="nan")) 353 else: 354 return control_flow_ops.with_dependencies( 355 [ 356 check_ops.assert_less( 357 array_ops.ones([], dtype=self.dtype), 358 self.df, 359 message="variance not defined for components of df <= 1"), 360 ], 361 result_where_defined) 362 363 def _mode(self): 364 return array_ops.identity(self.loc) 365 366 367class StudentTWithAbsDfSoftplusScale(StudentT): 368 """StudentT with `df = floor(abs(df))` and `scale = softplus(scale)`.""" 369 370 @deprecation.deprecated( 371 "2019-01-01", 372 "Use `tfd.StudentT(tf.floor(tf.abs(df)), loc, " 373 "tf.nn.softplus(scale)) instead.", 374 warn_once=True) 375 def __init__(self, 376 df, 377 loc, 378 scale, 379 validate_args=False, 380 allow_nan_stats=True, 381 name="StudentTWithAbsDfSoftplusScale"): 382 parameters = dict(locals()) 383 with ops.name_scope(name, values=[df, scale]) as name: 384 super(StudentTWithAbsDfSoftplusScale, self).__init__( 385 df=math_ops.floor(math_ops.abs(df)), 386 loc=loc, 387 scale=nn.softplus(scale, name="softplus_scale"), 388 validate_args=validate_args, 389 allow_nan_stats=allow_nan_stats, 390 name=name) 391 self._parameters = parameters 392