• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Protocol messages for describing input data Examples for machine learning
2// model training or inference.
3syntax = "proto3";
4
5package tensorflow;
6
7import "tensorflow/core/example/feature.proto";
8
9option cc_enable_arenas = true;
10option java_outer_classname = "ExampleProtos";
11option java_multiple_files = true;
12option java_package = "org.tensorflow.example";
13option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/example/example_protos_go_proto";
14
15// LINT.IfChange
16// An Example is a mostly-normalized data format for storing data for
17// training and inference.  It contains a key-value store (features); where
18// each key (string) maps to a Feature message (which is oneof packed BytesList,
19// FloatList, or Int64List).  This flexible and compact format allows the
20// storage of large amounts of typed data, but requires that the data shape
21// and use be determined by the configuration files and parsers that are used to
22// read and write this format.  That is, the Example is mostly *not* a
23// self-describing format.  In TensorFlow, Examples are read in row-major
24// format, so any configuration that describes data with rank-2 or above
25// should keep this in mind. If you flatten a matrix into a FloatList it should
26// be stored as [ row 0 ... row 1 ... row M-1 ]
27//
28// An Example for a movie recommendation application:
29//   features {
30//     feature {
31//       key: "age"
32//       value { float_list {
33//         value: 29.0
34//       }}
35//     }
36//     feature {
37//       key: "movie"
38//       value { bytes_list {
39//         value: "The Shawshank Redemption"
40//         value: "Fight Club"
41//       }}
42//     }
43//     feature {
44//       key: "movie_ratings"
45//       value { float_list {
46//         value: 9.0
47//         value: 9.7
48//       }}
49//     }
50//     feature {
51//       key: "suggestion"
52//       value { bytes_list {
53//         value: "Inception"
54//       }}
55//     }
56//     # Note that this feature exists to be used as a label in training.
57//     # E.g., if training a logistic regression model to predict purchase
58//     # probability in our learning tool we would set the label feature to
59//     # "suggestion_purchased".
60//     feature {
61//       key: "suggestion_purchased"
62//       value { float_list {
63//         value: 1.0
64//       }}
65//     }
66//     # Similar to "suggestion_purchased" above this feature exists to be used
67//     # as a label in training.
68//     # E.g., if training a linear regression model to predict purchase
69//     # price in our learning tool we would set the label feature to
70//     # "purchase_price".
71//     feature {
72//       key: "purchase_price"
73//       value { float_list {
74//         value: 9.99
75//       }}
76//     }
77//  }
78//
79// A conformant Example data set obeys the following conventions:
80//   - If a Feature K exists in one example with data type T, it must be of
81//       type T in all other examples when present. It may be omitted.
82//   - The number of instances of Feature K list data may vary across examples,
83//       depending on the requirements of the model.
84//   - If a Feature K doesn't exist in an example, a K-specific default will be
85//       used, if configured.
86//   - If a Feature K exists in an example but contains no items, the intent
87//       is considered to be an empty tensor and no default will be used.
88
89message Example {
90  Features features = 1;
91}
92
93// A SequenceExample is an Example representing one or more sequences, and
94// some context.  The context contains features which apply to the entire
95// example. The feature_lists contain a key, value map where each key is
96// associated with a repeated set of Features (a FeatureList).
97// A FeatureList thus represents the values of a feature identified by its key
98// over time / frames.
99//
100// Below is a SequenceExample for a movie recommendation application recording a
101// sequence of ratings by a user. The time-independent features ("locale",
102// "age", "favorites") describing the user are part of the context. The sequence
103// of movies the user rated are part of the feature_lists. For each movie in the
104// sequence we have information on its name and actors and the user's rating.
105// This information is recorded in three separate feature_list(s).
106// In the example below there are only two movies. All three feature_list(s),
107// namely "movie_ratings", "movie_names", and "actors" have a feature value for
108// both movies. Note, that "actors" is itself a bytes_list with multiple
109// strings per movie.
110//
111// context: {
112//   feature: {
113//     key  : "locale"
114//     value: {
115//       bytes_list: {
116//         value: [ "pt_BR" ]
117//       }
118//     }
119//   }
120//   feature: {
121//     key  : "age"
122//     value: {
123//       float_list: {
124//         value: [ 19.0 ]
125//       }
126//     }
127//   }
128//   feature: {
129//     key  : "favorites"
130//     value: {
131//       bytes_list: {
132//         value: [ "Majesty Rose", "Savannah Outen", "One Direction" ]
133//       }
134//     }
135//   }
136// }
137// feature_lists: {
138//   feature_list: {
139//     key  : "movie_ratings"
140//     value: {
141//       feature: {
142//         float_list: {
143//           value: [ 4.5 ]
144//         }
145//       }
146//       feature: {
147//         float_list: {
148//           value: [ 5.0 ]
149//         }
150//       }
151//     }
152//   }
153//   feature_list: {
154//     key  : "movie_names"
155//     value: {
156//       feature: {
157//         bytes_list: {
158//           value: [ "The Shawshank Redemption" ]
159//         }
160//       }
161//       feature: {
162//         bytes_list: {
163//           value: [ "Fight Club" ]
164//         }
165//       }
166//     }
167//   }
168//   feature_list: {
169//     key  : "actors"
170//     value: {
171//       feature: {
172//         bytes_list: {
173//           value: [ "Tim Robbins", "Morgan Freeman" ]
174//         }
175//       }
176//       feature: {
177//         bytes_list: {
178//           value: [ "Brad Pitt", "Edward Norton", "Helena Bonham Carter" ]
179//         }
180//       }
181//     }
182//   }
183// }
184//
185// A conformant SequenceExample data set obeys the following conventions:
186//
187// Context:
188//   - All conformant context features K must obey the same conventions as
189//     a conformant Example's features (see above).
190// Feature lists:
191//   - A FeatureList L may be missing in an example; it is up to the
192//     parser configuration to determine if this is allowed or considered
193//     an empty list (zero length).
194//   - If a FeatureList L exists, it may be empty (zero length).
195//   - If a FeatureList L is non-empty, all features within the FeatureList
196//     must have the same data type T. Even across SequenceExamples, the type T
197//     of the FeatureList identified by the same key must be the same. An entry
198//     without any values may serve as an empty feature.
199//   - If a FeatureList L is non-empty, it is up to the parser configuration
200//     to determine if all features within the FeatureList must
201//     have the same size.  The same holds for this FeatureList across multiple
202//     examples.
203//   - For sequence modeling, e.g.:
204//        http://colah.github.io/posts/2015-08-Understanding-LSTMs/
205//        https://github.com/tensorflow/nmt
206//     the feature lists represent a sequence of frames.
207//     In this scenario, all FeatureLists in a SequenceExample have the same
208//     number of Feature messages, so that the ith element in each FeatureList
209//     is part of the ith frame (or time step).
210// Examples of conformant and non-conformant examples' FeatureLists:
211//
212// Conformant FeatureLists:
213//    feature_lists: { feature_list: {
214//      key: "movie_ratings"
215//      value: { feature: { float_list: { value: [ 4.5 ] } }
216//               feature: { float_list: { value: [ 5.0 ] } } }
217//    } }
218//
219// Non-conformant FeatureLists (mismatched types):
220//    feature_lists: { feature_list: {
221//      key: "movie_ratings"
222//      value: { feature: { float_list: { value: [ 4.5 ] } }
223//               feature: { int64_list: { value: [ 5 ] } } }
224//    } }
225//
226// Conditionally conformant FeatureLists, the parser configuration determines
227// if the feature sizes must match:
228//    feature_lists: { feature_list: {
229//      key: "movie_ratings"
230//      value: { feature: { float_list: { value: [ 4.5 ] } }
231//               feature: { float_list: { value: [ 5.0, 6.0 ] } } }
232//    } }
233//
234// Conformant pair of SequenceExample
235//    feature_lists: { feature_list: {
236//      key: "movie_ratings"
237//      value: { feature: { float_list: { value: [ 4.5 ] } }
238//               feature: { float_list: { value: [ 5.0 ] } } }
239//    } }
240// and:
241//    feature_lists: { feature_list: {
242//      key: "movie_ratings"
243//      value: { feature: { float_list: { value: [ 4.5 ] } }
244//               feature: { float_list: { value: [ 5.0 ] } }
245//               feature: { float_list: { value: [ 2.0 ] } } }
246//    } }
247//
248// Conformant pair of SequenceExample
249//    feature_lists: { feature_list: {
250//      key: "movie_ratings"
251//      value: { feature: { float_list: { value: [ 4.5 ] } }
252//               feature: { float_list: { value: [ 5.0 ] } } }
253//    } }
254// and:
255//    feature_lists: { feature_list: {
256//      key: "movie_ratings"
257//      value: { }
258//    } }
259//
260// Conditionally conformant pair of SequenceExample, the parser configuration
261// determines if the second feature_lists is consistent (zero-length) or
262// invalid (missing "movie_ratings"):
263//    feature_lists: { feature_list: {
264//      key: "movie_ratings"
265//      value: { feature: { float_list: { value: [ 4.5 ] } }
266//               feature: { float_list: { value: [ 5.0 ] } } }
267//    } }
268// and:
269//    feature_lists: { }
270//
271// Non-conformant pair of SequenceExample (mismatched types)
272//    feature_lists: { feature_list: {
273//      key: "movie_ratings"
274//      value: { feature: { float_list: { value: [ 4.5 ] } }
275//               feature: { float_list: { value: [ 5.0 ] } } }
276//    } }
277// and:
278//    feature_lists: { feature_list: {
279//      key: "movie_ratings"
280//      value: { feature: { int64_list: { value: [ 4 ] } }
281//               feature: { int64_list: { value: [ 5 ] } }
282//               feature: { int64_list: { value: [ 2 ] } } }
283//    } }
284//
285// Conditionally conformant pair of SequenceExample; the parser configuration
286// determines if the feature sizes must match:
287//    feature_lists: { feature_list: {
288//      key: "movie_ratings"
289//      value: { feature: { float_list: { value: [ 4.5 ] } }
290//               feature: { float_list: { value: [ 5.0 ] } } }
291//    } }
292// and:
293//    feature_lists: { feature_list: {
294//      key: "movie_ratings"
295//      value: { feature: { float_list: { value: [ 4.0 ] } }
296//               feature: { float_list: { value: [ 5.0, 3.0 ] } }
297//    } }
298
299message SequenceExample {
300  Features context = 1;
301  FeatureLists feature_lists = 2;
302}
303// LINT.ThenChange(
304//     https://www.tensorflow.org/code/tensorflow/python/training/training.py)
305