• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Protocol messages for describing input data Examples for machine learning
2// model training or inference.
3syntax = "proto3";
4
5package tensorflow;
6
7import "tensorflow/core/example/feature.proto";
8
9option cc_enable_arenas = true;
10option java_outer_classname = "ExampleProtos";
11option java_multiple_files = true;
12option java_package = "org.tensorflow.example";
13option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/example/example_protos_go_proto";
14
15// LINT.IfChange
16// An Example is a mostly-normalized data format for storing data for
17// training and inference.  It contains a key-value store (features); where
18// each key (string) maps to a Feature message (which is oneof packed BytesList,
19// FloatList, or Int64List).  This flexible and compact format allows the
20// storage of large amounts of typed data, but requires that the data shape
21// and use be determined by the configuration files and parsers that are used to
22// read and write this format.  That is, the Example is mostly *not* a
23// self-describing format.  In TensorFlow, Examples are read in row-major
24// format, so any configuration that describes data with rank-2 or above
25// should keep this in mind.  For example, to store an M x N matrix of Bytes,
26// the BytesList must contain M*N bytes, with M rows of N contiguous values
27// each.  That is, the BytesList value must store the matrix as:
28//     .... row 0 .... .... row 1 .... // ...........  // ... row M-1 ....
29//
30// An Example for a movie recommendation application:
31//   features {
32//     feature {
33//       key: "age"
34//       value { float_list {
35//         value: 29.0
36//       }}
37//     }
38//     feature {
39//       key: "movie"
40//       value { bytes_list {
41//         value: "The Shawshank Redemption"
42//         value: "Fight Club"
43//       }}
44//     }
45//     feature {
46//       key: "movie_ratings"
47//       value { float_list {
48//         value: 9.0
49//         value: 9.7
50//       }}
51//     }
52//     feature {
53//       key: "suggestion"
54//       value { bytes_list {
55//         value: "Inception"
56//       }}
57//     }
58//     # Note that this feature exists to be used as a label in training.
59//     # E.g., if training a logistic regression model to predict purchase
60//     # probability in our learning tool we would set the label feature to
61//     # "suggestion_purchased".
62//     feature {
63//       key: "suggestion_purchased"
64//       value { float_list {
65//         value: 1.0
66//       }}
67//     }
68//     # Similar to "suggestion_purchased" above this feature exists to be used
69//     # as a label in training.
70//     # E.g., if training a linear regression model to predict purchase
71//     # price in our learning tool we would set the label feature to
72//     # "purchase_price".
73//     feature {
74//       key: "purchase_price"
75//       value { float_list {
76//         value: 9.99
77//       }}
78//     }
79//  }
80//
81// A conformant Example data set obeys the following conventions:
82//   - If a Feature K exists in one example with data type T, it must be of
83//       type T in all other examples when present. It may be omitted.
84//   - The number of instances of Feature K list data may vary across examples,
85//       depending on the requirements of the model.
86//   - If a Feature K doesn't exist in an example, a K-specific default will be
87//       used, if configured.
88//   - If a Feature K exists in an example but contains no items, the intent
89//       is considered to be an empty tensor and no default will be used.
90
91message Example {
92  Features features = 1;
93}
94
95// A SequenceExample is an Example representing one or more sequences, and
96// some context.  The context contains features which apply to the entire
97// example. The feature_lists contain a key, value map where each key is
98// associated with a repeated set of Features (a FeatureList).
99// A FeatureList thus represents the values of a feature identified by its key
100// over time / frames.
101//
102// Below is a SequenceExample for a movie recommendation application recording a
103// sequence of ratings by a user. The time-independent features ("locale",
104// "age", "favorites") describing the user are part of the context. The sequence
105// of movies the user rated are part of the feature_lists. For each movie in the
106// sequence we have information on its name and actors and the user's rating.
107// This information is recorded in three separate feature_list(s).
108// In the example below there are only two movies. All three feature_list(s),
109// namely "movie_ratings", "movie_names", and "actors" have a feature value for
110// both movies. Note, that "actors" is itself a bytes_list with multiple
111// strings per movie.
112//
113// context: {
114//   feature: {
115//     key  : "locale"
116//     value: {
117//       bytes_list: {
118//         value: [ "pt_BR" ]
119//       }
120//     }
121//   }
122//   feature: {
123//     key  : "age"
124//     value: {
125//       float_list: {
126//         value: [ 19.0 ]
127//       }
128//     }
129//   }
130//   feature: {
131//     key  : "favorites"
132//     value: {
133//       bytes_list: {
134//         value: [ "Majesty Rose", "Savannah Outen", "One Direction" ]
135//       }
136//     }
137//   }
138// }
139// feature_lists: {
140//   feature_list: {
141//     key  : "movie_ratings"
142//     value: {
143//       feature: {
144//         float_list: {
145//           value: [ 4.5 ]
146//         }
147//       }
148//       feature: {
149//         float_list: {
150//           value: [ 5.0 ]
151//         }
152//       }
153//     }
154//   }
155//   feature_list: {
156//     key  : "movie_names"
157//     value: {
158//       feature: {
159//         bytes_list: {
160//           value: [ "The Shawshank Redemption" ]
161//         }
162//       }
163//       feature: {
164//         bytes_list: {
165//           value: [ "Fight Club" ]
166//         }
167//       }
168//     }
169//   }
170//   feature_list: {
171//     key  : "actors"
172//     value: {
173//       feature: {
174//         bytes_list: {
175//           value: [ "Tim Robbins", "Morgan Freeman" ]
176//         }
177//       }
178//       feature: {
179//         bytes_list: {
180//           value: [ "Brad Pitt", "Edward Norton", "Helena Bonham Carter" ]
181//         }
182//       }
183//     }
184//   }
185// }
186//
187// A conformant SequenceExample data set obeys the following conventions:
188//
189// Context:
190//   - All conformant context features K must obey the same conventions as
191//     a conformant Example's features (see above).
192// Feature lists:
193//   - A FeatureList L may be missing in an example; it is up to the
194//     parser configuration to determine if this is allowed or considered
195//     an empty list (zero length).
196//   - If a FeatureList L exists, it may be empty (zero length).
197//   - If a FeatureList L is non-empty, all features within the FeatureList
198//     must have the same data type T. Even across SequenceExamples, the type T
199//     of the FeatureList identified by the same key must be the same. An entry
200//     without any values may serve as an empty feature.
201//   - If a FeatureList L is non-empty, it is up to the parser configuration
202//     to determine if all features within the FeatureList must
203//     have the same size.  The same holds for this FeatureList across multiple
204//     examples.
205//   - For sequence modeling, e.g.:
206//        http://colah.github.io/posts/2015-08-Understanding-LSTMs/
207//        https://github.com/tensorflow/nmt
208//     the feature lists represent a sequence of frames.
209//     In this scenario, all FeatureLists in a SequenceExample have the same
210//     number of Feature messages, so that the ith element in each FeatureList
211//     is part of the ith frame (or time step).
212// Examples of conformant and non-conformant examples' FeatureLists:
213//
214// Conformant FeatureLists:
215//    feature_lists: { feature_list: {
216//      key: "movie_ratings"
217//      value: { feature: { float_list: { value: [ 4.5 ] } }
218//               feature: { float_list: { value: [ 5.0 ] } } }
219//    } }
220//
221// Non-conformant FeatureLists (mismatched types):
222//    feature_lists: { feature_list: {
223//      key: "movie_ratings"
224//      value: { feature: { float_list: { value: [ 4.5 ] } }
225//               feature: { int64_list: { value: [ 5 ] } } }
226//    } }
227//
228// Conditionally conformant FeatureLists, the parser configuration determines
229// if the feature sizes must match:
230//    feature_lists: { feature_list: {
231//      key: "movie_ratings"
232//      value: { feature: { float_list: { value: [ 4.5 ] } }
233//               feature: { float_list: { value: [ 5.0, 6.0 ] } } }
234//    } }
235//
236// Conformant pair of SequenceExample
237//    feature_lists: { feature_list: {
238//      key: "movie_ratings"
239//      value: { feature: { float_list: { value: [ 4.5 ] } }
240//               feature: { float_list: { value: [ 5.0 ] } } }
241//    } }
242// and:
243//    feature_lists: { feature_list: {
244//      key: "movie_ratings"
245//      value: { feature: { float_list: { value: [ 4.5 ] } }
246//               feature: { float_list: { value: [ 5.0 ] } }
247//               feature: { float_list: { value: [ 2.0 ] } } }
248//    } }
249//
250// Conformant pair of SequenceExample
251//    feature_lists: { feature_list: {
252//      key: "movie_ratings"
253//      value: { feature: { float_list: { value: [ 4.5 ] } }
254//               feature: { float_list: { value: [ 5.0 ] } } }
255//    } }
256// and:
257//    feature_lists: { feature_list: {
258//      key: "movie_ratings"
259//      value: { }
260//    } }
261//
262// Conditionally conformant pair of SequenceExample, the parser configuration
263// determines if the second feature_lists is consistent (zero-length) or
264// invalid (missing "movie_ratings"):
265//    feature_lists: { feature_list: {
266//      key: "movie_ratings"
267//      value: { feature: { float_list: { value: [ 4.5 ] } }
268//               feature: { float_list: { value: [ 5.0 ] } } }
269//    } }
270// and:
271//    feature_lists: { }
272//
273// Non-conformant pair of SequenceExample (mismatched types)
274//    feature_lists: { feature_list: {
275//      key: "movie_ratings"
276//      value: { feature: { float_list: { value: [ 4.5 ] } }
277//               feature: { float_list: { value: [ 5.0 ] } } }
278//    } }
279// and:
280//    feature_lists: { feature_list: {
281//      key: "movie_ratings"
282//      value: { feature: { int64_list: { value: [ 4 ] } }
283//               feature: { int64_list: { value: [ 5 ] } }
284//               feature: { int64_list: { value: [ 2 ] } } }
285//    } }
286//
287// Conditionally conformant pair of SequenceExample; the parser configuration
288// determines if the feature sizes must match:
289//    feature_lists: { feature_list: {
290//      key: "movie_ratings"
291//      value: { feature: { float_list: { value: [ 4.5 ] } }
292//               feature: { float_list: { value: [ 5.0 ] } } }
293//    } }
294// and:
295//    feature_lists: { feature_list: {
296//      key: "movie_ratings"
297//      value: { feature: { float_list: { value: [ 4.0 ] } }
298//               feature: { float_list: { value: [ 5.0, 3.0 ] } }
299//    } }
300
301message SequenceExample {
302  Features context = 1;
303  FeatureLists feature_lists = 2;
304}
305// LINT.ThenChange(
306//     https://www.tensorflow.org/code/tensorflow/python/training/training.py)
307