1# Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""A SessionRunHook extends `session.run()` calls for the `MonitoredSession`. 16 17SessionRunHooks are useful to track training, report progress, request early 18stopping and more. SessionRunHooks use the observer pattern and notify at the 19following points: 20 - when a session starts being used 21 - before a call to the `session.run()` 22 - after a call to the `session.run()` 23 - when the session closed 24 25A SessionRunHook encapsulates a piece of reusable/composable computation that 26can piggyback a call to `MonitoredSession.run()`. A hook can add any 27ops-or-tensor/feeds to the run call, and when the run call finishes with success 28gets the outputs it requested. Hooks are allowed to add ops to the graph in 29`hook.begin()`. The graph is finalized after the `begin()` method is called. 30 31There are a few pre-defined hooks: 32 - StopAtStepHook: Request stop based on global_step 33 - CheckpointSaverHook: saves checkpoint 34 - LoggingTensorHook: outputs one or more tensor values to log 35 - NanTensorHook: Request stop if given `Tensor` contains Nans. 36 - SummarySaverHook: saves summaries to a summary writer 37 38For more specific needs, you can create custom hooks: 39 class ExampleHook(SessionRunHook): 40 def begin(self): 41 # You can add ops to the graph here. 42 print('Starting the session.') 43 self.your_tensor = ... 44 45 def after_create_session(self, session, coord): 46 # When this is called, the graph is finalized and 47 # ops can no longer be added to the graph. 48 print('Session created.') 49 50 def before_run(self, run_context): 51 print('Before calling session.run().') 52 return SessionRunArgs(self.your_tensor) 53 54 def after_run(self, run_context, run_values): 55 print('Done running one step. The value of my tensor: %s', 56 run_values.results) 57 if you-need-to-stop-loop: 58 run_context.request_stop() 59 60 def end(self, session): 61 print('Done with the session.') 62 63To understand how hooks interact with calls to `MonitoredSession.run()`, 64look at following code: 65 with MonitoredTrainingSession(hooks=your_hooks, ...) as sess: 66 while not sess.should_stop(): 67 sess.run(your_fetches) 68 69Above user code leads to following execution: 70 call hooks.begin() 71 sess = tf.compat.v1.Session() 72 call hooks.after_create_session() 73 while not stop is requested: 74 call hooks.before_run() 75 try: 76 results = sess.run(merged_fetches, feed_dict=merged_feeds) 77 except (errors.OutOfRangeError, StopIteration): 78 break 79 call hooks.after_run() 80 call hooks.end() 81 sess.close() 82 83Note that if sess.run() raises OutOfRangeError or StopIteration then 84hooks.after_run() will not be called but hooks.end() will still be called. 85If sess.run() raises any other exception then neither hooks.after_run() nor 86hooks.end() will be called. 87""" 88 89import collections 90from tensorflow.python.util.tf_export import tf_export 91 92 93@tf_export(v1=["train.SessionRunHook"]) 94class SessionRunHook: 95 """Hook to extend calls to MonitoredSession.run().""" 96 97 def begin(self): 98 """Called once before using the session. 99 100 When called, the default graph is the one that will be launched in the 101 session. The hook can modify the graph by adding new operations to it. 102 After the `begin()` call the graph will be finalized and the other callbacks 103 can not modify the graph anymore. Second call of `begin()` on the same 104 graph, should not change the graph. 105 """ 106 pass 107 108 def after_create_session(self, session, coord): # pylint: disable=unused-argument 109 """Called when new TensorFlow session is created. 110 111 This is called to signal the hooks that a new session has been created. This 112 has two essential differences with the situation in which `begin` is called: 113 114 * When this is called, the graph is finalized and ops can no longer be added 115 to the graph. 116 * This method will also be called as a result of recovering a wrapped 117 session, not only at the beginning of the overall session. 118 119 Args: 120 session: A TensorFlow Session that has been created. 121 coord: A Coordinator object which keeps track of all threads. 122 """ 123 pass 124 125 def before_run(self, run_context): # pylint: disable=unused-argument 126 """Called before each call to run(). 127 128 You can return from this call a `SessionRunArgs` object indicating ops or 129 tensors to add to the upcoming `run()` call. These ops/tensors will be run 130 together with the ops/tensors originally passed to the original run() call. 131 The run args you return can also contain feeds to be added to the run() 132 call. 133 134 The `run_context` argument is a `SessionRunContext` that provides 135 information about the upcoming `run()` call: the originally requested 136 op/tensors, the TensorFlow Session. 137 138 At this point graph is finalized and you can not add ops. 139 140 Args: 141 run_context: A `SessionRunContext` object. 142 143 Returns: 144 None or a `SessionRunArgs` object. 145 """ 146 return None 147 148 def after_run(self, 149 run_context, # pylint: disable=unused-argument 150 run_values): # pylint: disable=unused-argument 151 """Called after each call to run(). 152 153 The `run_values` argument contains results of requested ops/tensors by 154 `before_run()`. 155 156 The `run_context` argument is the same one send to `before_run` call. 157 `run_context.request_stop()` can be called to stop the iteration. 158 159 If `session.run()` raises any exceptions then `after_run()` is not called. 160 161 Args: 162 run_context: A `SessionRunContext` object. 163 run_values: A SessionRunValues object. 164 """ 165 pass 166 167 def end(self, session): # pylint: disable=unused-argument 168 """Called at the end of session. 169 170 The `session` argument can be used in case the hook wants to run final ops, 171 such as saving a last checkpoint. 172 173 If `session.run()` raises exception other than OutOfRangeError or 174 StopIteration then `end()` is not called. 175 Note the difference between `end()` and `after_run()` behavior when 176 `session.run()` raises OutOfRangeError or StopIteration. In that case 177 `end()` is called but `after_run()` is not called. 178 179 Args: 180 session: A TensorFlow Session that will be soon closed. 181 """ 182 pass 183 184 185@tf_export(v1=["train.SessionRunArgs"]) 186class SessionRunArgs( 187 collections.namedtuple("SessionRunArgs", 188 ["fetches", "feed_dict", "options"])): 189 """Represents arguments to be added to a `Session.run()` call. 190 191 Args: 192 fetches: Exactly like the 'fetches' argument to Session.Run(). 193 Can be a single tensor or op, a list of 'fetches' or a dictionary 194 of fetches. For example: 195 fetches = global_step_tensor 196 fetches = [train_op, summary_op, global_step_tensor] 197 fetches = {'step': global_step_tensor, 'summ': summary_op} 198 Note that this can recurse as expected: 199 fetches = {'step': global_step_tensor, 200 'ops': [train_op, check_nan_op]} 201 feed_dict: Exactly like the `feed_dict` argument to `Session.Run()` 202 options: Exactly like the `options` argument to `Session.run()`, i.e., a 203 config_pb2.RunOptions proto. 204 """ 205 206 def __new__(cls, fetches, feed_dict=None, options=None): 207 return super(SessionRunArgs, cls).__new__(cls, fetches, feed_dict, options) 208 209 210@tf_export(v1=["train.SessionRunContext"]) 211class SessionRunContext: 212 """Provides information about the `session.run()` call being made. 213 214 Provides information about original request to `Session.Run()` function. 215 SessionRunHook objects can stop the loop by calling `request_stop()` of 216 `run_context`. In the future we may use this object to add more information 217 about run without changing the Hook API. 218 """ 219 220 def __init__(self, original_args, session): 221 """Initializes SessionRunContext.""" 222 self._original_args = original_args 223 self._session = session 224 self._stop_requested = False 225 226 @property 227 def original_args(self): 228 """A `SessionRunArgs` object holding the original arguments of `run()`. 229 230 If user called `MonitoredSession.run(fetches=a, feed_dict=b)`, then this 231 field is equal to SessionRunArgs(a, b). 232 233 Returns: 234 A `SessionRunArgs` object 235 """ 236 return self._original_args 237 238 @property 239 def session(self): 240 """A TensorFlow session object which will execute the `run`.""" 241 return self._session 242 243 @property 244 def stop_requested(self): 245 """Returns whether a stop is requested or not. 246 247 If true, `MonitoredSession` stops iterations. 248 Returns: 249 A `bool` 250 """ 251 return self._stop_requested 252 253 def request_stop(self): 254 """Sets stop requested field. 255 256 Hooks can use this function to request stop of iterations. 257 `MonitoredSession` checks whether this is called or not. 258 """ 259 self._stop_requested = True 260 261 262@tf_export(v1=["train.SessionRunValues"]) 263class SessionRunValues( 264 collections.namedtuple("SessionRunValues", 265 ["results", "options", "run_metadata"])): 266 """Contains the results of `Session.run()`. 267 268 In the future we may use this object to add more information about result of 269 run without changing the Hook API. 270 271 Args: 272 results: The return values from `Session.run()` corresponding to the fetches 273 attribute returned in the RunArgs. Note that this has the same shape as 274 the RunArgs fetches. For example: 275 fetches = global_step_tensor 276 => results = nparray(int) 277 fetches = [train_op, summary_op, global_step_tensor] 278 => results = [None, nparray(string), nparray(int)] 279 fetches = {'step': global_step_tensor, 'summ': summary_op} 280 => results = {'step': nparray(int), 'summ': nparray(string)} 281 options: `RunOptions` from the `Session.run()` call. 282 run_metadata: `RunMetadata` from the `Session.run()` call. 283 """ 284