1// Protocol messages for describing input data Examples for machine learning 2// model training or inference. 3syntax = "proto3"; 4 5package tensorflow; 6 7import "tensorflow/core/example/feature.proto"; 8 9option cc_enable_arenas = true; 10option java_outer_classname = "ExampleProtos"; 11option java_multiple_files = true; 12option java_package = "org.tensorflow.example"; 13option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/example/example_protos_go_proto"; 14 15// LINT.IfChange 16// An Example is a mostly-normalized data format for storing data for 17// training and inference. It contains a key-value store (features); where 18// each key (string) maps to a Feature message (which is oneof packed BytesList, 19// FloatList, or Int64List). This flexible and compact format allows the 20// storage of large amounts of typed data, but requires that the data shape 21// and use be determined by the configuration files and parsers that are used to 22// read and write this format. That is, the Example is mostly *not* a 23// self-describing format. In TensorFlow, Examples are read in row-major 24// format, so any configuration that describes data with rank-2 or above 25// should keep this in mind. For example, to store an M x N matrix of Bytes, 26// the BytesList must contain M*N bytes, with M rows of N contiguous values 27// each. That is, the BytesList value must store the matrix as: 28// .... row 0 .... .... row 1 .... // ........... // ... row M-1 .... 29// 30// An Example for a movie recommendation application: 31// features { 32// feature { 33// key: "age" 34// value { float_list { 35// value: 29.0 36// }} 37// } 38// feature { 39// key: "movie" 40// value { bytes_list { 41// value: "The Shawshank Redemption" 42// value: "Fight Club" 43// }} 44// } 45// feature { 46// key: "movie_ratings" 47// value { float_list { 48// value: 9.0 49// value: 9.7 50// }} 51// } 52// feature { 53// key: "suggestion" 54// value { bytes_list { 55// value: "Inception" 56// }} 57// } 58// # Note that this feature exists to be used as a label in training. 59// # E.g., if training a logistic regression model to predict purchase 60// # probability in our learning tool we would set the label feature to 61// # "suggestion_purchased". 62// feature { 63// key: "suggestion_purchased" 64// value { float_list { 65// value: 1.0 66// }} 67// } 68// # Similar to "suggestion_purchased" above this feature exists to be used 69// # as a label in training. 70// # E.g., if training a linear regression model to predict purchase 71// # price in our learning tool we would set the label feature to 72// # "purchase_price". 73// feature { 74// key: "purchase_price" 75// value { float_list { 76// value: 9.99 77// }} 78// } 79// } 80// 81// A conformant Example data set obeys the following conventions: 82// - If a Feature K exists in one example with data type T, it must be of 83// type T in all other examples when present. It may be omitted. 84// - The number of instances of Feature K list data may vary across examples, 85// depending on the requirements of the model. 86// - If a Feature K doesn't exist in an example, a K-specific default will be 87// used, if configured. 88// - If a Feature K exists in an example but contains no items, the intent 89// is considered to be an empty tensor and no default will be used. 90 91message Example { 92 Features features = 1; 93} 94 95// A SequenceExample is an Example representing one or more sequences, and 96// some context. The context contains features which apply to the entire 97// example. The feature_lists contain a key, value map where each key is 98// associated with a repeated set of Features (a FeatureList). 99// A FeatureList thus represents the values of a feature identified by its key 100// over time / frames. 101// 102// Below is a SequenceExample for a movie recommendation application recording a 103// sequence of ratings by a user. The time-independent features ("locale", 104// "age", "favorites") describing the user are part of the context. The sequence 105// of movies the user rated are part of the feature_lists. For each movie in the 106// sequence we have information on its name and actors and the user's rating. 107// This information is recorded in three separate feature_list(s). 108// In the example below there are only two movies. All three feature_list(s), 109// namely "movie_ratings", "movie_names", and "actors" have a feature value for 110// both movies. Note, that "actors" is itself a bytes_list with multiple 111// strings per movie. 112// 113// context: { 114// feature: { 115// key : "locale" 116// value: { 117// bytes_list: { 118// value: [ "pt_BR" ] 119// } 120// } 121// } 122// feature: { 123// key : "age" 124// value: { 125// float_list: { 126// value: [ 19.0 ] 127// } 128// } 129// } 130// feature: { 131// key : "favorites" 132// value: { 133// bytes_list: { 134// value: [ "Majesty Rose", "Savannah Outen", "One Direction" ] 135// } 136// } 137// } 138// } 139// feature_lists: { 140// feature_list: { 141// key : "movie_ratings" 142// value: { 143// feature: { 144// float_list: { 145// value: [ 4.5 ] 146// } 147// } 148// feature: { 149// float_list: { 150// value: [ 5.0 ] 151// } 152// } 153// } 154// } 155// feature_list: { 156// key : "movie_names" 157// value: { 158// feature: { 159// bytes_list: { 160// value: [ "The Shawshank Redemption" ] 161// } 162// } 163// feature: { 164// bytes_list: { 165// value: [ "Fight Club" ] 166// } 167// } 168// } 169// } 170// feature_list: { 171// key : "actors" 172// value: { 173// feature: { 174// bytes_list: { 175// value: [ "Tim Robbins", "Morgan Freeman" ] 176// } 177// } 178// feature: { 179// bytes_list: { 180// value: [ "Brad Pitt", "Edward Norton", "Helena Bonham Carter" ] 181// } 182// } 183// } 184// } 185// } 186// 187// A conformant SequenceExample data set obeys the following conventions: 188// 189// Context: 190// - All conformant context features K must obey the same conventions as 191// a conformant Example's features (see above). 192// Feature lists: 193// - A FeatureList L may be missing in an example; it is up to the 194// parser configuration to determine if this is allowed or considered 195// an empty list (zero length). 196// - If a FeatureList L exists, it may be empty (zero length). 197// - If a FeatureList L is non-empty, all features within the FeatureList 198// must have the same data type T. Even across SequenceExamples, the type T 199// of the FeatureList identified by the same key must be the same. An entry 200// without any values may serve as an empty feature. 201// - If a FeatureList L is non-empty, it is up to the parser configuration 202// to determine if all features within the FeatureList must 203// have the same size. The same holds for this FeatureList across multiple 204// examples. 205// - For sequence modeling, e.g.: 206// http://colah.github.io/posts/2015-08-Understanding-LSTMs/ 207// https://github.com/tensorflow/nmt 208// the feature lists represent a sequence of frames. 209// In this scenario, all FeatureLists in a SequenceExample have the same 210// number of Feature messages, so that the ith element in each FeatureList 211// is part of the ith frame (or time step). 212// Examples of conformant and non-conformant examples' FeatureLists: 213// 214// Conformant FeatureLists: 215// feature_lists: { feature_list: { 216// key: "movie_ratings" 217// value: { feature: { float_list: { value: [ 4.5 ] } } 218// feature: { float_list: { value: [ 5.0 ] } } } 219// } } 220// 221// Non-conformant FeatureLists (mismatched types): 222// feature_lists: { feature_list: { 223// key: "movie_ratings" 224// value: { feature: { float_list: { value: [ 4.5 ] } } 225// feature: { int64_list: { value: [ 5 ] } } } 226// } } 227// 228// Conditionally conformant FeatureLists, the parser configuration determines 229// if the feature sizes must match: 230// feature_lists: { feature_list: { 231// key: "movie_ratings" 232// value: { feature: { float_list: { value: [ 4.5 ] } } 233// feature: { float_list: { value: [ 5.0, 6.0 ] } } } 234// } } 235// 236// Conformant pair of SequenceExample 237// feature_lists: { feature_list: { 238// key: "movie_ratings" 239// value: { feature: { float_list: { value: [ 4.5 ] } } 240// feature: { float_list: { value: [ 5.0 ] } } } 241// } } 242// and: 243// feature_lists: { feature_list: { 244// key: "movie_ratings" 245// value: { feature: { float_list: { value: [ 4.5 ] } } 246// feature: { float_list: { value: [ 5.0 ] } } 247// feature: { float_list: { value: [ 2.0 ] } } } 248// } } 249// 250// Conformant pair of SequenceExample 251// feature_lists: { feature_list: { 252// key: "movie_ratings" 253// value: { feature: { float_list: { value: [ 4.5 ] } } 254// feature: { float_list: { value: [ 5.0 ] } } } 255// } } 256// and: 257// feature_lists: { feature_list: { 258// key: "movie_ratings" 259// value: { } 260// } } 261// 262// Conditionally conformant pair of SequenceExample, the parser configuration 263// determines if the second feature_lists is consistent (zero-length) or 264// invalid (missing "movie_ratings"): 265// feature_lists: { feature_list: { 266// key: "movie_ratings" 267// value: { feature: { float_list: { value: [ 4.5 ] } } 268// feature: { float_list: { value: [ 5.0 ] } } } 269// } } 270// and: 271// feature_lists: { } 272// 273// Non-conformant pair of SequenceExample (mismatched types) 274// feature_lists: { feature_list: { 275// key: "movie_ratings" 276// value: { feature: { float_list: { value: [ 4.5 ] } } 277// feature: { float_list: { value: [ 5.0 ] } } } 278// } } 279// and: 280// feature_lists: { feature_list: { 281// key: "movie_ratings" 282// value: { feature: { int64_list: { value: [ 4 ] } } 283// feature: { int64_list: { value: [ 5 ] } } 284// feature: { int64_list: { value: [ 2 ] } } } 285// } } 286// 287// Conditionally conformant pair of SequenceExample; the parser configuration 288// determines if the feature sizes must match: 289// feature_lists: { feature_list: { 290// key: "movie_ratings" 291// value: { feature: { float_list: { value: [ 4.5 ] } } 292// feature: { float_list: { value: [ 5.0 ] } } } 293// } } 294// and: 295// feature_lists: { feature_list: { 296// key: "movie_ratings" 297// value: { feature: { float_list: { value: [ 4.0 ] } } 298// feature: { float_list: { value: [ 5.0, 3.0 ] } } 299// } } 300 301message SequenceExample { 302 Features context = 1; 303 FeatureLists feature_lists = 2; 304} 305// LINT.ThenChange( 306// https://www.tensorflow.org/code/tensorflow/python/training/training.py) 307