1// Protocol messages for describing input data Examples for machine learning 2// model training or inference. 3syntax = "proto3"; 4 5package tensorflow; 6 7import "tensorflow/core/example/feature.proto"; 8 9option cc_enable_arenas = true; 10option java_outer_classname = "ExampleProtos"; 11option java_multiple_files = true; 12option java_package = "org.tensorflow.example"; 13option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/example/example_protos_go_proto"; 14 15// LINT.IfChange 16// An Example is a mostly-normalized data format for storing data for 17// training and inference. It contains a key-value store (features); where 18// each key (string) maps to a Feature message (which is oneof packed BytesList, 19// FloatList, or Int64List). This flexible and compact format allows the 20// storage of large amounts of typed data, but requires that the data shape 21// and use be determined by the configuration files and parsers that are used to 22// read and write this format. That is, the Example is mostly *not* a 23// self-describing format. In TensorFlow, Examples are read in row-major 24// format, so any configuration that describes data with rank-2 or above 25// should keep this in mind. If you flatten a matrix into a FloatList it should 26// be stored as [ row 0 ... row 1 ... row M-1 ] 27// 28// An Example for a movie recommendation application: 29// features { 30// feature { 31// key: "age" 32// value { float_list { 33// value: 29.0 34// }} 35// } 36// feature { 37// key: "movie" 38// value { bytes_list { 39// value: "The Shawshank Redemption" 40// value: "Fight Club" 41// }} 42// } 43// feature { 44// key: "movie_ratings" 45// value { float_list { 46// value: 9.0 47// value: 9.7 48// }} 49// } 50// feature { 51// key: "suggestion" 52// value { bytes_list { 53// value: "Inception" 54// }} 55// } 56// # Note that this feature exists to be used as a label in training. 57// # E.g., if training a logistic regression model to predict purchase 58// # probability in our learning tool we would set the label feature to 59// # "suggestion_purchased". 60// feature { 61// key: "suggestion_purchased" 62// value { float_list { 63// value: 1.0 64// }} 65// } 66// # Similar to "suggestion_purchased" above this feature exists to be used 67// # as a label in training. 68// # E.g., if training a linear regression model to predict purchase 69// # price in our learning tool we would set the label feature to 70// # "purchase_price". 71// feature { 72// key: "purchase_price" 73// value { float_list { 74// value: 9.99 75// }} 76// } 77// } 78// 79// A conformant Example data set obeys the following conventions: 80// - If a Feature K exists in one example with data type T, it must be of 81// type T in all other examples when present. It may be omitted. 82// - The number of instances of Feature K list data may vary across examples, 83// depending on the requirements of the model. 84// - If a Feature K doesn't exist in an example, a K-specific default will be 85// used, if configured. 86// - If a Feature K exists in an example but contains no items, the intent 87// is considered to be an empty tensor and no default will be used. 88 89message Example { 90 Features features = 1; 91} 92 93// A SequenceExample is an Example representing one or more sequences, and 94// some context. The context contains features which apply to the entire 95// example. The feature_lists contain a key, value map where each key is 96// associated with a repeated set of Features (a FeatureList). 97// A FeatureList thus represents the values of a feature identified by its key 98// over time / frames. 99// 100// Below is a SequenceExample for a movie recommendation application recording a 101// sequence of ratings by a user. The time-independent features ("locale", 102// "age", "favorites") describing the user are part of the context. The sequence 103// of movies the user rated are part of the feature_lists. For each movie in the 104// sequence we have information on its name and actors and the user's rating. 105// This information is recorded in three separate feature_list(s). 106// In the example below there are only two movies. All three feature_list(s), 107// namely "movie_ratings", "movie_names", and "actors" have a feature value for 108// both movies. Note, that "actors" is itself a bytes_list with multiple 109// strings per movie. 110// 111// context: { 112// feature: { 113// key : "locale" 114// value: { 115// bytes_list: { 116// value: [ "pt_BR" ] 117// } 118// } 119// } 120// feature: { 121// key : "age" 122// value: { 123// float_list: { 124// value: [ 19.0 ] 125// } 126// } 127// } 128// feature: { 129// key : "favorites" 130// value: { 131// bytes_list: { 132// value: [ "Majesty Rose", "Savannah Outen", "One Direction" ] 133// } 134// } 135// } 136// } 137// feature_lists: { 138// feature_list: { 139// key : "movie_ratings" 140// value: { 141// feature: { 142// float_list: { 143// value: [ 4.5 ] 144// } 145// } 146// feature: { 147// float_list: { 148// value: [ 5.0 ] 149// } 150// } 151// } 152// } 153// feature_list: { 154// key : "movie_names" 155// value: { 156// feature: { 157// bytes_list: { 158// value: [ "The Shawshank Redemption" ] 159// } 160// } 161// feature: { 162// bytes_list: { 163// value: [ "Fight Club" ] 164// } 165// } 166// } 167// } 168// feature_list: { 169// key : "actors" 170// value: { 171// feature: { 172// bytes_list: { 173// value: [ "Tim Robbins", "Morgan Freeman" ] 174// } 175// } 176// feature: { 177// bytes_list: { 178// value: [ "Brad Pitt", "Edward Norton", "Helena Bonham Carter" ] 179// } 180// } 181// } 182// } 183// } 184// 185// A conformant SequenceExample data set obeys the following conventions: 186// 187// Context: 188// - All conformant context features K must obey the same conventions as 189// a conformant Example's features (see above). 190// Feature lists: 191// - A FeatureList L may be missing in an example; it is up to the 192// parser configuration to determine if this is allowed or considered 193// an empty list (zero length). 194// - If a FeatureList L exists, it may be empty (zero length). 195// - If a FeatureList L is non-empty, all features within the FeatureList 196// must have the same data type T. Even across SequenceExamples, the type T 197// of the FeatureList identified by the same key must be the same. An entry 198// without any values may serve as an empty feature. 199// - If a FeatureList L is non-empty, it is up to the parser configuration 200// to determine if all features within the FeatureList must 201// have the same size. The same holds for this FeatureList across multiple 202// examples. 203// - For sequence modeling, e.g.: 204// http://colah.github.io/posts/2015-08-Understanding-LSTMs/ 205// https://github.com/tensorflow/nmt 206// the feature lists represent a sequence of frames. 207// In this scenario, all FeatureLists in a SequenceExample have the same 208// number of Feature messages, so that the ith element in each FeatureList 209// is part of the ith frame (or time step). 210// Examples of conformant and non-conformant examples' FeatureLists: 211// 212// Conformant FeatureLists: 213// feature_lists: { feature_list: { 214// key: "movie_ratings" 215// value: { feature: { float_list: { value: [ 4.5 ] } } 216// feature: { float_list: { value: [ 5.0 ] } } } 217// } } 218// 219// Non-conformant FeatureLists (mismatched types): 220// feature_lists: { feature_list: { 221// key: "movie_ratings" 222// value: { feature: { float_list: { value: [ 4.5 ] } } 223// feature: { int64_list: { value: [ 5 ] } } } 224// } } 225// 226// Conditionally conformant FeatureLists, the parser configuration determines 227// if the feature sizes must match: 228// feature_lists: { feature_list: { 229// key: "movie_ratings" 230// value: { feature: { float_list: { value: [ 4.5 ] } } 231// feature: { float_list: { value: [ 5.0, 6.0 ] } } } 232// } } 233// 234// Conformant pair of SequenceExample 235// feature_lists: { feature_list: { 236// key: "movie_ratings" 237// value: { feature: { float_list: { value: [ 4.5 ] } } 238// feature: { float_list: { value: [ 5.0 ] } } } 239// } } 240// and: 241// feature_lists: { feature_list: { 242// key: "movie_ratings" 243// value: { feature: { float_list: { value: [ 4.5 ] } } 244// feature: { float_list: { value: [ 5.0 ] } } 245// feature: { float_list: { value: [ 2.0 ] } } } 246// } } 247// 248// Conformant pair of SequenceExample 249// feature_lists: { feature_list: { 250// key: "movie_ratings" 251// value: { feature: { float_list: { value: [ 4.5 ] } } 252// feature: { float_list: { value: [ 5.0 ] } } } 253// } } 254// and: 255// feature_lists: { feature_list: { 256// key: "movie_ratings" 257// value: { } 258// } } 259// 260// Conditionally conformant pair of SequenceExample, the parser configuration 261// determines if the second feature_lists is consistent (zero-length) or 262// invalid (missing "movie_ratings"): 263// feature_lists: { feature_list: { 264// key: "movie_ratings" 265// value: { feature: { float_list: { value: [ 4.5 ] } } 266// feature: { float_list: { value: [ 5.0 ] } } } 267// } } 268// and: 269// feature_lists: { } 270// 271// Non-conformant pair of SequenceExample (mismatched types) 272// feature_lists: { feature_list: { 273// key: "movie_ratings" 274// value: { feature: { float_list: { value: [ 4.5 ] } } 275// feature: { float_list: { value: [ 5.0 ] } } } 276// } } 277// and: 278// feature_lists: { feature_list: { 279// key: "movie_ratings" 280// value: { feature: { int64_list: { value: [ 4 ] } } 281// feature: { int64_list: { value: [ 5 ] } } 282// feature: { int64_list: { value: [ 2 ] } } } 283// } } 284// 285// Conditionally conformant pair of SequenceExample; the parser configuration 286// determines if the feature sizes must match: 287// feature_lists: { feature_list: { 288// key: "movie_ratings" 289// value: { feature: { float_list: { value: [ 4.5 ] } } 290// feature: { float_list: { value: [ 5.0 ] } } } 291// } } 292// and: 293// feature_lists: { feature_list: { 294// key: "movie_ratings" 295// value: { feature: { float_list: { value: [ 4.0 ] } } 296// feature: { float_list: { value: [ 5.0, 3.0 ] } } 297// } } 298 299message SequenceExample { 300 Features context = 1; 301 FeatureLists feature_lists = 2; 302} 303// LINT.ThenChange( 304// https://www.tensorflow.org/code/tensorflow/python/training/training.py) 305