1// Protocol messages for describing input data Examples for machine learning 2// model training or inference. 3syntax = "proto3"; 4 5import "tensorflow/core/example/feature.proto"; 6option cc_enable_arenas = true; 7option java_outer_classname = "ExampleProtos"; 8option java_multiple_files = true; 9option java_package = "org.tensorflow.example"; 10option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/example"; 11package tensorflow; 12 13// An Example is a mostly-normalized data format for storing data for 14// training and inference. It contains a key-value store (features); where 15// each key (string) maps to a Feature message (which is oneof packed BytesList, 16// FloatList, or Int64List). This flexible and compact format allows the 17// storage of large amounts of typed data, but requires that the data shape 18// and use be determined by the configuration files and parsers that are used to 19// read and write this format. That is, the Example is mostly *not* a 20// self-describing format. In TensorFlow, Examples are read in row-major 21// format, so any configuration that describes data with rank-2 or above 22// should keep this in mind. For example, to store an M x N matrix of Bytes, 23// the BytesList must contain M*N bytes, with M rows of N contiguous values 24// each. That is, the BytesList value must store the matrix as: 25// .... row 0 .... .... row 1 .... // ........... // ... row M-1 .... 26// 27// An Example for a movie recommendation application: 28// features { 29// feature { 30// key: "age" 31// value { float_list { 32// value: 29.0 33// }} 34// } 35// feature { 36// key: "movie" 37// value { bytes_list { 38// value: "The Shawshank Redemption" 39// value: "Fight Club" 40// }} 41// } 42// feature { 43// key: "movie_ratings" 44// value { float_list { 45// value: 9.0 46// value: 9.7 47// }} 48// } 49// feature { 50// key: "suggestion" 51// value { bytes_list { 52// value: "Inception" 53// }} 54// } 55// # Note that this feature exists to be used as a label in training. 56// # E.g., if training a logistic regression model to predict purchase 57// # probability in our learning tool we would set the label feature to 58// # "suggestion_purchased". 59// feature { 60// key: "suggestion_purchased" 61// value { float_list { 62// value: 1.0 63// }} 64// } 65// # Similar to "suggestion_purchased" above this feature exists to be used 66// # as a label in training. 67// # E.g., if training a linear regression model to predict purchase 68// # price in our learning tool we would set the label feature to 69// # "purchase_price". 70// feature { 71// key: "purchase_price" 72// value { float_list { 73// value: 9.99 74// }} 75// } 76// } 77// 78// A conformant Example data set obeys the following conventions: 79// - If a Feature K exists in one example with data type T, it must be of 80// type T in all other examples when present. It may be omitted. 81// - The number of instances of Feature K list data may vary across examples, 82// depending on the requirements of the model. 83// - If a Feature K doesn't exist in an example, a K-specific default will be 84// used, if configured. 85// - If a Feature K exists in an example but contains no items, the intent 86// is considered to be an empty tensor and no default will be used. 87 88message Example { 89 Features features = 1; 90}; 91 92// A SequenceExample is an Example representing one or more sequences, and 93// some context. The context contains features which apply to the entire 94// example. The feature_lists contain a key, value map where each key is 95// associated with a repeated set of Features (a FeatureList). 96// A FeatureList thus represents the values of a feature identified by its key 97// over time / frames. 98// 99// Below is a SequenceExample for a movie recommendation application recording a 100// sequence of ratings by a user. The time-independent features ("locale", 101// "age", "favorites") describing the user are part of the context. The sequence 102// of movies the user rated are part of the feature_lists. For each movie in the 103// sequence we have information on its name and actors and the user's rating. 104// This information is recorded in three separate feature_list(s). 105// In the example below there are only two movies. All three feature_list(s), 106// namely "movie_ratings", "movie_names", and "actors" have a feature value for 107// both movies. Note, that "actors" is itself a bytes_list with multiple 108// strings per movie. 109// 110// context: { 111// feature: { 112// key : "locale" 113// value: { 114// bytes_list: { 115// value: [ "pt_BR" ] 116// } 117// } 118// } 119// feature: { 120// key : "age" 121// value: { 122// float_list: { 123// value: [ 19.0 ] 124// } 125// } 126// } 127// feature: { 128// key : "favorites" 129// value: { 130// bytes_list: { 131// value: [ "Majesty Rose", "Savannah Outen", "One Direction" ] 132// } 133// } 134// } 135// } 136// feature_lists: { 137// feature_list: { 138// key : "movie_ratings" 139// value: { 140// feature: { 141// float_list: { 142// value: [ 4.5 ] 143// } 144// } 145// feature: { 146// float_list: { 147// value: [ 5.0 ] 148// } 149// } 150// } 151// } 152// feature_list: { 153// key : "movie_names" 154// value: { 155// feature: { 156// bytes_list: { 157// value: [ "The Shawshank Redemption" ] 158// } 159// } 160// feature: { 161// bytes_list: { 162// value: [ "Fight Club" ] 163// } 164// } 165// } 166// } 167// feature_list: { 168// key : "actors" 169// value: { 170// feature: { 171// bytes_list: { 172// value: [ "Tim Robbins", "Morgan Freeman" ] 173// } 174// } 175// feature: { 176// bytes_list: { 177// value: [ "Brad Pitt", "Edward Norton", "Helena Bonham Carter" ] 178// } 179// } 180// } 181// } 182// } 183// 184// A conformant SequenceExample data set obeys the following conventions: 185// 186// Context: 187// - All conformant context features K must obey the same conventions as 188// a conformant Example's features (see above). 189// Feature lists: 190// - A FeatureList L may be missing in an example; it is up to the 191// parser configuration to determine if this is allowed or considered 192// an empty list (zero length). 193// - If a FeatureList L exists, it may be empty (zero length). 194// - If a FeatureList L is non-empty, all features within the FeatureList 195// must have the same data type T. Even across SequenceExamples, the type T 196// of the FeatureList identified by the same key must be the same. An entry 197// without any values may serve as an empty feature. 198// - If a FeatureList L is non-empty, it is up to the parser configuration 199// to determine if all features within the FeatureList must 200// have the same size. The same holds for this FeatureList across multiple 201// examples. 202// - For sequence modeling, e.g.: 203// http://colah.github.io/posts/2015-08-Understanding-LSTMs/ 204// https://github.com/tensorflow/nmt 205// the feature lists represent a sequence of frames. 206// In this scenario, all FeatureLists in a SequenceExample have the same 207// number of Feature messages, so that the ith element in each FeatureList 208// is part of the ith frame (or time step). 209// Examples of conformant and non-conformant examples' FeatureLists: 210// 211// Conformant FeatureLists: 212// feature_lists: { feature_list: { 213// key: "movie_ratings" 214// value: { feature: { float_list: { value: [ 4.5 ] } } 215// feature: { float_list: { value: [ 5.0 ] } } } 216// } } 217// 218// Non-conformant FeatureLists (mismatched types): 219// feature_lists: { feature_list: { 220// key: "movie_ratings" 221// value: { feature: { float_list: { value: [ 4.5 ] } } 222// feature: { int64_list: { value: [ 5 ] } } } 223// } } 224// 225// Conditionally conformant FeatureLists, the parser configuration determines 226// if the feature sizes must match: 227// feature_lists: { feature_list: { 228// key: "movie_ratings" 229// value: { feature: { float_list: { value: [ 4.5 ] } } 230// feature: { float_list: { value: [ 5.0, 6.0 ] } } } 231// } } 232// 233// Conformant pair of SequenceExample 234// feature_lists: { feature_list: { 235// key: "movie_ratings" 236// value: { feature: { float_list: { value: [ 4.5 ] } } 237// feature: { float_list: { value: [ 5.0 ] } } } 238// } } 239// and: 240// feature_lists: { feature_list: { 241// key: "movie_ratings" 242// value: { feature: { float_list: { value: [ 4.5 ] } } 243// feature: { float_list: { value: [ 5.0 ] } } 244// feature: { float_list: { value: [ 2.0 ] } } } 245// } } 246// 247// Conformant pair of SequenceExample 248// feature_lists: { feature_list: { 249// key: "movie_ratings" 250// value: { feature: { float_list: { value: [ 4.5 ] } } 251// feature: { float_list: { value: [ 5.0 ] } } } 252// } } 253// and: 254// feature_lists: { feature_list: { 255// key: "movie_ratings" 256// value: { } 257// } } 258// 259// Conditionally conformant pair of SequenceExample, the parser configuration 260// determines if the second feature_lists is consistent (zero-length) or 261// invalid (missing "movie_ratings"): 262// feature_lists: { feature_list: { 263// key: "movie_ratings" 264// value: { feature: { float_list: { value: [ 4.5 ] } } 265// feature: { float_list: { value: [ 5.0 ] } } } 266// } } 267// and: 268// feature_lists: { } 269// 270// Non-conformant pair of SequenceExample (mismatched types) 271// feature_lists: { feature_list: { 272// key: "movie_ratings" 273// value: { feature: { float_list: { value: [ 4.5 ] } } 274// feature: { float_list: { value: [ 5.0 ] } } } 275// } } 276// and: 277// feature_lists: { feature_list: { 278// key: "movie_ratings" 279// value: { feature: { int64_list: { value: [ 4 ] } } 280// feature: { int64_list: { value: [ 5 ] } } 281// feature: { int64_list: { value: [ 2 ] } } } 282// } } 283// 284// Conditionally conformant pair of SequenceExample; the parser configuration 285// determines if the feature sizes must match: 286// feature_lists: { feature_list: { 287// key: "movie_ratings" 288// value: { feature: { float_list: { value: [ 4.5 ] } } 289// feature: { float_list: { value: [ 5.0 ] } } } 290// } } 291// and: 292// feature_lists: { feature_list: { 293// key: "movie_ratings" 294// value: { feature: { float_list: { value: [ 4.0 ] } } 295// feature: { float_list: { value: [ 5.0, 3.0 ] } } 296// } } 297 298message SequenceExample { 299 Features context = 1; 300 FeatureLists feature_lists = 2; 301}; 302