• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Protocol messages for describing input data Examples for machine learning
2// model training or inference.
3syntax = "proto3";
4
5import "tensorflow/core/example/feature.proto";
6option cc_enable_arenas = true;
7option java_outer_classname = "ExampleProtos";
8option java_multiple_files = true;
9option java_package = "org.tensorflow.example";
10option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/example";
11package tensorflow;
12
13// An Example is a mostly-normalized data format for storing data for
14// training and inference.  It contains a key-value store (features); where
15// each key (string) maps to a Feature message (which is oneof packed BytesList,
16// FloatList, or Int64List).  This flexible and compact format allows the
17// storage of large amounts of typed data, but requires that the data shape
18// and use be determined by the configuration files and parsers that are used to
19// read and write this format.  That is, the Example is mostly *not* a
20// self-describing format.  In TensorFlow, Examples are read in row-major
21// format, so any configuration that describes data with rank-2 or above
22// should keep this in mind.  For example, to store an M x N matrix of Bytes,
23// the BytesList must contain M*N bytes, with M rows of N contiguous values
24// each.  That is, the BytesList value must store the matrix as:
25//     .... row 0 .... .... row 1 .... // ...........  // ... row M-1 ....
26//
27// An Example for a movie recommendation application:
28//   features {
29//     feature {
30//       key: "age"
31//       value { float_list {
32//         value: 29.0
33//       }}
34//     }
35//     feature {
36//       key: "movie"
37//       value { bytes_list {
38//         value: "The Shawshank Redemption"
39//         value: "Fight Club"
40//       }}
41//     }
42//     feature {
43//       key: "movie_ratings"
44//       value { float_list {
45//         value: 9.0
46//         value: 9.7
47//       }}
48//     }
49//     feature {
50//       key: "suggestion"
51//       value { bytes_list {
52//         value: "Inception"
53//       }}
54//     }
55//     # Note that this feature exists to be used as a label in training.
56//     # E.g., if training a logistic regression model to predict purchase
57//     # probability in our learning tool we would set the label feature to
58//     # "suggestion_purchased".
59//     feature {
60//       key: "suggestion_purchased"
61//       value { float_list {
62//         value: 1.0
63//       }}
64//     }
65//     # Similar to "suggestion_purchased" above this feature exists to be used
66//     # as a label in training.
67//     # E.g., if training a linear regression model to predict purchase
68//     # price in our learning tool we would set the label feature to
69//     # "purchase_price".
70//     feature {
71//       key: "purchase_price"
72//       value { float_list {
73//         value: 9.99
74//       }}
75//     }
76//  }
77//
78// A conformant Example data set obeys the following conventions:
79//   - If a Feature K exists in one example with data type T, it must be of
80//       type T in all other examples when present. It may be omitted.
81//   - The number of instances of Feature K list data may vary across examples,
82//       depending on the requirements of the model.
83//   - If a Feature K doesn't exist in an example, a K-specific default will be
84//       used, if configured.
85//   - If a Feature K exists in an example but contains no items, the intent
86//       is considered to be an empty tensor and no default will be used.
87
88message Example {
89  Features features = 1;
90};
91
92// A SequenceExample is an Example representing one or more sequences, and
93// some context.  The context contains features which apply to the entire
94// example. The feature_lists contain a key, value map where each key is
95// associated with a repeated set of Features (a FeatureList).
96// A FeatureList thus represents the values of a feature identified by its key
97// over time / frames.
98//
99// Below is a SequenceExample for a movie recommendation application recording a
100// sequence of ratings by a user. The time-independent features ("locale",
101// "age", "favorites") describing the user are part of the context. The sequence
102// of movies the user rated are part of the feature_lists. For each movie in the
103// sequence we have information on its name and actors and the user's rating.
104// This information is recorded in three separate feature_list(s).
105// In the example below there are only two movies. All three feature_list(s),
106// namely "movie_ratings", "movie_names", and "actors" have a feature value for
107// both movies. Note, that "actors" is itself a bytes_list with multiple
108// strings per movie.
109//
110// context: {
111//   feature: {
112//     key  : "locale"
113//     value: {
114//       bytes_list: {
115//         value: [ "pt_BR" ]
116//       }
117//     }
118//   }
119//   feature: {
120//     key  : "age"
121//     value: {
122//       float_list: {
123//         value: [ 19.0 ]
124//       }
125//     }
126//   }
127//   feature: {
128//     key  : "favorites"
129//     value: {
130//       bytes_list: {
131//         value: [ "Majesty Rose", "Savannah Outen", "One Direction" ]
132//       }
133//     }
134//   }
135// }
136// feature_lists: {
137//   feature_list: {
138//     key  : "movie_ratings"
139//     value: {
140//       feature: {
141//         float_list: {
142//           value: [ 4.5 ]
143//         }
144//       }
145//       feature: {
146//         float_list: {
147//           value: [ 5.0 ]
148//         }
149//       }
150//     }
151//   }
152//   feature_list: {
153//     key  : "movie_names"
154//     value: {
155//       feature: {
156//         bytes_list: {
157//           value: [ "The Shawshank Redemption" ]
158//         }
159//       }
160//       feature: {
161//         bytes_list: {
162//           value: [ "Fight Club" ]
163//         }
164//       }
165//     }
166//   }
167//   feature_list: {
168//     key  : "actors"
169//     value: {
170//       feature: {
171//         bytes_list: {
172//           value: [ "Tim Robbins", "Morgan Freeman" ]
173//         }
174//       }
175//       feature: {
176//         bytes_list: {
177//           value: [ "Brad Pitt", "Edward Norton", "Helena Bonham Carter" ]
178//         }
179//       }
180//     }
181//   }
182// }
183//
184// A conformant SequenceExample data set obeys the following conventions:
185//
186// Context:
187//   - All conformant context features K must obey the same conventions as
188//     a conformant Example's features (see above).
189// Feature lists:
190//   - A FeatureList L may be missing in an example; it is up to the
191//     parser configuration to determine if this is allowed or considered
192//     an empty list (zero length).
193//   - If a FeatureList L exists, it may be empty (zero length).
194//   - If a FeatureList L is non-empty, all features within the FeatureList
195//     must have the same data type T. Even across SequenceExamples, the type T
196//     of the FeatureList identified by the same key must be the same. An entry
197//     without any values may serve as an empty feature.
198//   - If a FeatureList L is non-empty, it is up to the parser configuration
199//     to determine if all features within the FeatureList must
200//     have the same size.  The same holds for this FeatureList across multiple
201//     examples.
202//   - For sequence modeling, e.g.:
203//        http://colah.github.io/posts/2015-08-Understanding-LSTMs/
204//        https://github.com/tensorflow/nmt
205//     the feature lists represent a sequence of frames.
206//     In this scenario, all FeatureLists in a SequenceExample have the same
207//     number of Feature messages, so that the ith element in each FeatureList
208//     is part of the ith frame (or time step).
209// Examples of conformant and non-conformant examples' FeatureLists:
210//
211// Conformant FeatureLists:
212//    feature_lists: { feature_list: {
213//      key: "movie_ratings"
214//      value: { feature: { float_list: { value: [ 4.5 ] } }
215//               feature: { float_list: { value: [ 5.0 ] } } }
216//    } }
217//
218// Non-conformant FeatureLists (mismatched types):
219//    feature_lists: { feature_list: {
220//      key: "movie_ratings"
221//      value: { feature: { float_list: { value: [ 4.5 ] } }
222//               feature: { int64_list: { value: [ 5 ] } } }
223//    } }
224//
225// Conditionally conformant FeatureLists, the parser configuration determines
226// if the feature sizes must match:
227//    feature_lists: { feature_list: {
228//      key: "movie_ratings"
229//      value: { feature: { float_list: { value: [ 4.5 ] } }
230//               feature: { float_list: { value: [ 5.0, 6.0 ] } } }
231//    } }
232//
233// Conformant pair of SequenceExample
234//    feature_lists: { feature_list: {
235//      key: "movie_ratings"
236//      value: { feature: { float_list: { value: [ 4.5 ] } }
237//               feature: { float_list: { value: [ 5.0 ] } } }
238//    } }
239// and:
240//    feature_lists: { feature_list: {
241//      key: "movie_ratings"
242//      value: { feature: { float_list: { value: [ 4.5 ] } }
243//               feature: { float_list: { value: [ 5.0 ] } }
244//               feature: { float_list: { value: [ 2.0 ] } } }
245//    } }
246//
247// Conformant pair of SequenceExample
248//    feature_lists: { feature_list: {
249//      key: "movie_ratings"
250//      value: { feature: { float_list: { value: [ 4.5 ] } }
251//               feature: { float_list: { value: [ 5.0 ] } } }
252//    } }
253// and:
254//    feature_lists: { feature_list: {
255//      key: "movie_ratings"
256//      value: { }
257//    } }
258//
259// Conditionally conformant pair of SequenceExample, the parser configuration
260// determines if the second feature_lists is consistent (zero-length) or
261// invalid (missing "movie_ratings"):
262//    feature_lists: { feature_list: {
263//      key: "movie_ratings"
264//      value: { feature: { float_list: { value: [ 4.5 ] } }
265//               feature: { float_list: { value: [ 5.0 ] } } }
266//    } }
267// and:
268//    feature_lists: { }
269//
270// Non-conformant pair of SequenceExample (mismatched types)
271//    feature_lists: { feature_list: {
272//      key: "movie_ratings"
273//      value: { feature: { float_list: { value: [ 4.5 ] } }
274//               feature: { float_list: { value: [ 5.0 ] } } }
275//    } }
276// and:
277//    feature_lists: { feature_list: {
278//      key: "movie_ratings"
279//      value: { feature: { int64_list: { value: [ 4 ] } }
280//               feature: { int64_list: { value: [ 5 ] } }
281//               feature: { int64_list: { value: [ 2 ] } } }
282//    } }
283//
284// Conditionally conformant pair of SequenceExample; the parser configuration
285// determines if the feature sizes must match:
286//    feature_lists: { feature_list: {
287//      key: "movie_ratings"
288//      value: { feature: { float_list: { value: [ 4.5 ] } }
289//               feature: { float_list: { value: [ 5.0 ] } } }
290//    } }
291// and:
292//    feature_lists: { feature_list: {
293//      key: "movie_ratings"
294//      value: { feature: { float_list: { value: [ 4.0 ] } }
295//               feature: { float_list: { value: [ 5.0, 3.0 ] } }
296//    } }
297
298message SequenceExample {
299  Features context = 1;
300  FeatureLists feature_lists = 2;
301};
302