1syntax = "proto2"; 2 3package caffe; 4 5// Specifies the shape (dimensions) of a Blob. 6message BlobShape { 7 repeated int64 dim = 1 [packed = true]; 8} 9 10message BlobProto { 11 optional BlobShape shape = 7; 12 repeated float data = 5 [packed = true]; 13 repeated float diff = 6 [packed = true]; 14 repeated double double_data = 8 [packed = true]; 15 repeated double double_diff = 9 [packed = true]; 16 17 // 4D dimensions -- deprecated. Use "shape" instead. 18 optional int32 num = 1 [default = 0]; 19 optional int32 channels = 2 [default = 0]; 20 optional int32 height = 3 [default = 0]; 21 optional int32 width = 4 [default = 0]; 22} 23 24// The BlobProtoVector is simply a way to pass multiple blobproto instances 25// around. 26message BlobProtoVector { 27 repeated BlobProto blobs = 1; 28} 29 30message Datum { 31 optional int32 channels = 1; 32 optional int32 height = 2; 33 optional int32 width = 3; 34 // the actual image data, in bytes 35 optional bytes data = 4; 36 optional int32 label = 5; 37 // Optionally, the datum could also hold float data. 38 repeated float float_data = 6; 39 // If true data contains an encoded image that need to be decoded 40 optional bool encoded = 7 [default = false]; 41} 42 43message FillerParameter { 44 // The filler type. 45 optional string type = 1 [default = 'constant']; 46 optional float value = 2 [default = 0]; // the value in constant filler 47 optional float min = 3 [default = 0]; // the min value in uniform filler 48 optional float max = 4 [default = 1]; // the max value in uniform filler 49 optional float mean = 5 [default = 0]; // the mean value in Gaussian filler 50 optional float std = 6 [default = 1]; // the std value in Gaussian filler 51 // The expected number of non-zero output weights for a given input in 52 // Gaussian filler -- the default -1 means don't perform sparsification. 53 optional int32 sparse = 7 [default = -1]; 54 // Normalize the filler variance by fan_in, fan_out, or their average. 55 // Applies to 'xavier' and 'msra' fillers. 56 enum VarianceNorm { 57 FAN_IN = 0; 58 FAN_OUT = 1; 59 AVERAGE = 2; 60 } 61 optional VarianceNorm variance_norm = 8 [default = FAN_IN]; 62} 63 64message NetParameter { 65 optional string name = 1; // consider giving the network a name 66 // DEPRECATED. See InputParameter. The input blobs to the network. 67 repeated string input = 3; 68 // DEPRECATED. See InputParameter. The shape of the input blobs. 69 repeated BlobShape input_shape = 8; 70 71 // 4D input dimensions -- deprecated. Use "input_shape" instead. 72 // If specified, for each input blob there should be four 73 // values specifying the num, channels, height and width of the input blob. 74 // Thus, there should be a total of (4 * #input) numbers. 75 repeated int32 input_dim = 4; 76 77 // Whether the network will force every layer to carry out backward operation. 78 // If set False, then whether to carry out backward is determined 79 // automatically according to the net structure and learning rates. 80 optional bool force_backward = 5 [default = false]; 81 // The current "state" of the network, including the phase, level, and stage. 82 // Some layers may be included/excluded depending on this state and the states 83 // specified in the layers' include and exclude fields. 84 optional NetState state = 6; 85 86 // Print debugging information about results while running Net::Forward, 87 // Net::Backward, and Net::Update. 88 optional bool debug_info = 7 [default = false]; 89 90 // The layers that make up the net. Each of their configurations, including 91 // connectivity and behavior, is specified as a LayerParameter. 92 repeated LayerParameter layer = 100; // ID 100 so layers are printed last. 93 94 // DEPRECATED: use 'layer' instead. 95 repeated V1LayerParameter layers = 2; 96} 97 98// NOTE 99// Update the next available ID when you add a new SolverParameter field. 100// 101// SolverParameter next available ID: 42 (last added: layer_wise_reduce) 102message SolverParameter { 103 ////////////////////////////////////////////////////////////////////////////// 104 // Specifying the train and test networks 105 // 106 // Exactly one train net must be specified using one of the following fields: 107 // train_net_param, train_net, net_param, net 108 // One or more test nets may be specified using any of the following fields: 109 // test_net_param, test_net, net_param, net 110 // If more than one test net field is specified (e.g., both net and 111 // test_net are specified), they will be evaluated in the field order given 112 // above: (1) test_net_param, (2) test_net, (3) net_param/net. 113 // A test_iter must be specified for each test_net. 114 // A test_level and/or a test_stage may also be specified for each test_net. 115 ////////////////////////////////////////////////////////////////////////////// 116 117 // Proto filename for the train net, possibly combined with one or more 118 // test nets. 119 optional string net = 24; 120 // Inline train net param, possibly combined with one or more test nets. 121 optional NetParameter net_param = 25; 122 123 optional string train_net = 1; // Proto filename for the train net. 124 repeated string test_net = 2; // Proto filenames for the test nets. 125 optional NetParameter train_net_param = 21; // Inline train net params. 126 repeated NetParameter test_net_param = 22; // Inline test net params. 127 128 // The states for the train/test nets. Must be unspecified or 129 // specified once per net. 130 // 131 // By default, all states will have solver = true; 132 // train_state will have phase = TRAIN, 133 // and all test_state's will have phase = TEST. 134 // Other defaults are set according to the NetState defaults. 135 optional NetState train_state = 26; 136 repeated NetState test_state = 27; 137 138 // The number of iterations for each test net. 139 repeated int32 test_iter = 3; 140 141 // The number of iterations between two testing phases. 142 optional int32 test_interval = 4 [default = 0]; 143 optional bool test_compute_loss = 19 [default = false]; 144 // If true, run an initial test pass before the first iteration, 145 // ensuring memory availability and printing the starting value of the loss. 146 optional bool test_initialization = 32 [default = true]; 147 optional float base_lr = 5; // The base learning rate 148 // the number of iterations between displaying info. If display = 0, no info 149 // will be displayed. 150 optional int32 display = 6; 151 // Display the loss averaged over the last average_loss iterations 152 optional int32 average_loss = 33 [default = 1]; 153 optional int32 max_iter = 7; // the maximum number of iterations 154 // accumulate gradients over `iter_size` x `batch_size` instances 155 optional int32 iter_size = 36 [default = 1]; 156 157 // The learning rate decay policy. The currently implemented learning rate 158 // policies are as follows: 159 // - fixed: always return base_lr. 160 // - step: return base_lr * gamma ^ (floor(iter / step)) 161 // - exp: return base_lr * gamma ^ iter 162 // - inv: return base_lr * (1 + gamma * iter) ^ (- power) 163 // - multistep: similar to step but it allows non uniform steps defined by 164 // stepvalue 165 // - poly: the effective learning rate follows a polynomial decay, to be 166 // zero by the max_iter. return base_lr (1 - iter/max_iter) ^ (power) 167 // - sigmoid: the effective learning rate follows a sigmod decay 168 // return base_lr ( 1/(1 + exp(-gamma * (iter - stepsize)))) 169 // 170 // where base_lr, max_iter, gamma, step, stepvalue and power are defined 171 // in the solver parameter protocol buffer, and iter is the current iteration. 172 optional string lr_policy = 8; 173 optional float gamma = 9; // The parameter to compute the learning rate. 174 optional float power = 10; // The parameter to compute the learning rate. 175 optional float momentum = 11; // The momentum value. 176 optional float weight_decay = 12; // The weight decay. 177 // regularization types supported: L1 and L2 178 // controlled by weight_decay 179 optional string regularization_type = 29 [default = "L2"]; 180 // the stepsize for learning rate policy "step" 181 optional int32 stepsize = 13; 182 // the stepsize for learning rate policy "multistep" 183 repeated int32 stepvalue = 34; 184 185 // Set clip_gradients to >= 0 to clip parameter gradients to that L2 norm, 186 // whenever their actual L2 norm is larger. 187 optional float clip_gradients = 35 [default = -1]; 188 189 optional int32 snapshot = 14 [default = 0]; // The snapshot interval 190 optional string snapshot_prefix = 15; // The prefix for the snapshot. 191 // whether to snapshot diff in the results or not. Snapshotting diff will help 192 // debugging but the final protocol buffer size will be much larger. 193 optional bool snapshot_diff = 16 [default = false]; 194 enum SnapshotFormat { 195 HDF5 = 0; 196 BINARYPROTO = 1; 197 } 198 optional SnapshotFormat snapshot_format = 37 [default = BINARYPROTO]; 199 // the mode solver will use: 0 for CPU and 1 for GPU. Use GPU in default. 200 enum SolverMode { 201 CPU = 0; 202 GPU = 1; 203 } 204 optional SolverMode solver_mode = 17 [default = GPU]; 205 // the device_id will that be used in GPU mode. Use device_id = 0 in default. 206 optional int32 device_id = 18 [default = 0]; 207 // If non-negative, the seed with which the Solver will initialize the Caffe 208 // random number generator -- useful for reproducible results. Otherwise, 209 // (and by default) initialize using a seed derived from the system clock. 210 optional int64 random_seed = 20 [default = -1]; 211 212 // type of the solver 213 optional string type = 40 [default = "SGD"]; 214 215 // numerical stability for RMSProp, AdaGrad and AdaDelta and Adam 216 optional float delta = 31 [default = 1e-8]; 217 // parameters for the Adam solver 218 optional float momentum2 = 39 [default = 0.999]; 219 220 // RMSProp decay value 221 // MeanSquare(t) = rms_decay*MeanSquare(t-1) + (1-rms_decay)*SquareGradient(t) 222 optional float rms_decay = 38 [default = 0.99]; 223 224 // If true, print information about the state of the net that may help with 225 // debugging learning problems. 226 optional bool debug_info = 23 [default = false]; 227 228 // If false, don't save a snapshot after training finishes. 229 optional bool snapshot_after_train = 28 [default = true]; 230 231 // DEPRECATED: old solver enum types, use string instead 232 enum SolverType { 233 SGD = 0; 234 NESTEROV = 1; 235 ADAGRAD = 2; 236 RMSPROP = 3; 237 ADADELTA = 4; 238 ADAM = 5; 239 } 240 // DEPRECATED: use type instead of solver_type 241 optional SolverType solver_type = 30 [default = SGD]; 242 243 // Overlap compute and communication for data parallel training 244 optional bool layer_wise_reduce = 41 [default = true]; 245} 246 247// A message that stores the solver snapshots 248message SolverState { 249 optional int32 iter = 1; // The current iteration 250 optional string learned_net = 2; // The file that stores the learned net. 251 repeated BlobProto history = 3; // The history for sgd solvers 252 optional int32 current_step = 4 [default = 0]; // The current step for learning rate 253} 254 255enum Phase { 256 TRAIN = 0; 257 TEST = 1; 258} 259 260message NetState { 261 optional Phase phase = 1 [default = TEST]; 262 optional int32 level = 2 [default = 0]; 263 repeated string stage = 3; 264} 265 266message NetStateRule { 267 // Set phase to require the NetState have a particular phase (TRAIN or TEST) 268 // to meet this rule. 269 optional Phase phase = 1; 270 271 // Set the minimum and/or maximum levels in which the layer should be used. 272 // Leave undefined to meet the rule regardless of level. 273 optional int32 min_level = 2; 274 optional int32 max_level = 3; 275 276 // Customizable sets of stages to include or exclude. 277 // The net must have ALL of the specified stages and NONE of the specified 278 // "not_stage"s to meet the rule. 279 // (Use multiple NetStateRules to specify conjunctions of stages.) 280 repeated string stage = 4; 281 repeated string not_stage = 5; 282} 283 284// Specifies training parameters (multipliers on global learning constants, 285// and the name and other settings used for weight sharing). 286message ParamSpec { 287 // The names of the parameter blobs -- useful for sharing parameters among 288 // layers, but never required otherwise. To share a parameter between two 289 // layers, give it a (non-empty) name. 290 optional string name = 1; 291 292 // Whether to require shared weights to have the same shape, or just the same 293 // count -- defaults to STRICT if unspecified. 294 optional DimCheckMode share_mode = 2; 295 enum DimCheckMode { 296 // STRICT (default) requires that num, channels, height, width each match. 297 STRICT = 0; 298 // PERMISSIVE requires only the count (num*channels*height*width) to match. 299 PERMISSIVE = 1; 300 } 301 302 // The multiplier on the global learning rate for this parameter. 303 optional float lr_mult = 3 [default = 1.0]; 304 305 // The multiplier on the global weight decay for this parameter. 306 optional float decay_mult = 4 [default = 1.0]; 307} 308 309// NOTE 310// Update the next available ID when you add a new LayerParameter field. 311// 312// LayerParameter next available layer-specific ID: 151 (last added: smooth_l1_loss_param) 313message LayerParameter { 314 optional string name = 1; // the layer name 315 optional string type = 2; // the layer type 316 repeated string bottom = 3; // the name of each bottom blob 317 repeated string top = 4; // the name of each top blob 318 319 // The train / test phase for computation. 320 optional Phase phase = 10; 321 322 // The amount of weight to assign each top blob in the objective. 323 // Each layer assigns a default value, usually of either 0 or 1, 324 // to each top blob. 325 repeated float loss_weight = 5; 326 327 // Specifies training parameters (multipliers on global learning constants, 328 // and the name and other settings used for weight sharing). 329 repeated ParamSpec param = 6; 330 331 // The blobs containing the numeric parameters of the layer. 332 repeated BlobProto blobs = 7; 333 334 // Specifies whether to backpropagate to each bottom. If unspecified, 335 // Caffe will automatically infer whether each input needs backpropagation 336 // to compute parameter gradients. If set to true for some inputs, 337 // backpropagation to those inputs is forced; if set false for some inputs, 338 // backpropagation to those inputs is skipped. 339 // 340 // The size must be either 0 or equal to the number of bottoms. 341 repeated bool propagate_down = 11; 342 343 // Rules controlling whether and when a layer is included in the network, 344 // based on the current NetState. You may specify a non-zero number of rules 345 // to include OR exclude, but not both. If no include or exclude rules are 346 // specified, the layer is always included. If the current NetState meets 347 // ANY (i.e., one or more) of the specified rules, the layer is 348 // included/excluded. 349 repeated NetStateRule include = 8; 350 repeated NetStateRule exclude = 9; 351 352 // Parameters for data pre-processing. 353 optional TransformationParameter transform_param = 100; 354 355 // Parameters shared by loss layers. 356 optional LossParameter loss_param = 101; 357 358 // Layer type-specific parameters. 359 // 360 // Note: certain layers may have more than one computational engine 361 // for their implementation. These layers include an Engine type and 362 // engine parameter for selecting the implementation. 363 // The default for the engine is set by the ENGINE switch at compile-time. 364 optional AccuracyParameter accuracy_param = 102; 365 optional ArgMaxParameter argmax_param = 103; 366 optional BatchNormParameter batch_norm_param = 139; 367 optional BiasParameter bias_param = 141; 368 optional ConcatParameter concat_param = 104; 369 optional ContrastiveLossParameter contrastive_loss_param = 105; 370 optional ConvolutionParameter convolution_param = 106; 371 optional CropParameter crop_param = 144; 372 optional DataParameter data_param = 107; 373 optional DetectionOutputParameter detection_output_param = 150; 374 optional DropoutParameter dropout_param = 108; 375 optional DummyDataParameter dummy_data_param = 109; 376 optional EltwiseParameter eltwise_param = 110; 377 optional ELUParameter elu_param = 140; 378 optional EmbedParameter embed_param = 137; 379 optional ExpParameter exp_param = 111; 380 optional FlattenParameter flatten_param = 135; 381 optional HDF5DataParameter hdf5_data_param = 112; 382 optional HDF5OutputParameter hdf5_output_param = 113; 383 optional HingeLossParameter hinge_loss_param = 114; 384 optional ImageDataParameter image_data_param = 115; 385 optional InfogainLossParameter infogain_loss_param = 116; 386 optional InnerProductParameter inner_product_param = 117; 387 optional InputParameter input_param = 143; 388 optional LogParameter log_param = 134; 389 optional LRNParameter lrn_param = 118; 390 optional MemoryDataParameter memory_data_param = 119; 391 optional MVNParameter mvn_param = 120; 392 optional ParameterParameter parameter_param = 145; 393 optional PoolingParameter pooling_param = 121; 394 optional PowerParameter power_param = 122; 395 optional PReLUParameter prelu_param = 131; 396 optional ProposalParameter proposal_param = 900; 397 optional PythonParameter python_param = 130; 398 optional RecurrentParameter recurrent_param = 146; 399 optional ReductionParameter reduction_param = 136; 400 optional ReLUParameter relu_param = 123; 401 optional ReshapeParameter reshape_param = 133; 402 optional ROIPoolingParameter roi_pooling_param = 147; 403 optional ScaleParameter scale_param = 142; 404 optional SigmoidParameter sigmoid_param = 124; 405 optional SmoothL1LossParameter smooth_l1_loss_param = 148; 406 optional SoftmaxParameter softmax_param = 125; 407 optional SPPParameter spp_param = 132; 408 optional SliceParameter slice_param = 126; 409 optional TanHParameter tanh_param = 127; 410 optional ThresholdParameter threshold_param = 128; 411 optional TileParameter tile_param = 138; 412 optional WindowDataParameter window_data_param = 129; 413 optional PermuteParameter permute_param = 202; 414 optional PriorBoxParameter prior_box_param = 203; 415 optional NormalizeParameter norm_param = 206; 416 optional PSROIPoolingParameter psroi_pooling_param = 207; 417 optional FreespaceExtractParameter freespace_extract_param = 151; 418 optional PostprocessParameter postprocess_param = 152; 419 optional SpatialTransformParameter spatial_transform_param = 153; 420 optional ROIAlignParameter roi_align_param = 154; 421 optional ReorgParameter reorg_param = 155; 422 optional RegionParameter region_param = 156; 423 optional ReverseParameter reverse_param = 157; 424 optional InterpParameter interp_param = 158; 425 optional ShuffleChannelParameter shuffle_channel_param = 159; 426 optional UpsampleParameter upsample_param = 160; 427} 428 429// Message that stores parameters used to apply transformation 430// to the data layer's data 431message TransformationParameter { 432 // For data pre-processing, we can do simple scaling and subtracting the 433 // data mean, if provided. Note that the mean subtraction is always carried 434 // out before scaling. 435 optional float scale = 1 [default = 1]; 436 // Specify if we want to randomly mirror data. 437 optional bool mirror = 2 [default = false]; 438 // Specify if we would like to randomly crop an image. 439 optional uint32 crop_size = 3 [default = 0]; 440 // mean_file and mean_value cannot be specified at the same time 441 optional string mean_file = 4; 442 // if specified can be repeated once (would subtract it from all the channels) 443 // or can be repeated the same number of times as channels 444 // (would subtract them from the corresponding channel) 445 repeated float mean_value = 5; 446 // Force the decoded image to have 3 color channels. 447 optional bool force_color = 6 [default = false]; 448 // Force the decoded image to have 1 color channels. 449 optional bool force_gray = 7 [default = false]; 450} 451 452// Message that stores parameters shared by loss layers 453message LossParameter { 454 // If specified, ignore instances with the given label. 455 optional int32 ignore_label = 1; 456 // How to normalize the loss for loss layers that aggregate across batches, 457 // spatial dimensions, or other dimensions. Currently only implemented in 458 // SoftmaxWithLoss and SigmoidCrossEntropyLoss layers. 459 enum NormalizationMode { 460 // Divide by the number of examples in the batch times spatial dimensions. 461 // Outputs that receive the ignore label will NOT be ignored in computing 462 // the normalization factor. 463 FULL = 0; 464 // Divide by the total number of output locations that do not take the 465 // ignore_label. If ignore_label is not set, this behaves like FULL. 466 VALID = 1; 467 // Divide by the batch size. 468 BATCH_SIZE = 2; 469 // Do not normalize the loss. 470 NONE = 3; 471 } 472 // For historical reasons, the default normalization for 473 // SigmoidCrossEntropyLoss is BATCH_SIZE and *not* VALID. 474 optional NormalizationMode normalization = 3 [default = VALID]; 475 // Deprecated. Ignored if normalization is specified. If normalization 476 // is not specified, then setting this to false will be equivalent to 477 // normalization = BATCH_SIZE to be consistent with previous behavior. 478 optional bool normalize = 2; 479} 480 481// Messages that store parameters used by individual layer types follow, in 482// alphabetical order. 483 484message AccuracyParameter { 485 // When computing accuracy, count as correct by comparing the true label to 486 // the top k scoring classes. By default, only compare to the top scoring 487 // class (i.e. argmax). 488 optional uint32 top_k = 1 [default = 1]; 489 490 // The "label" axis of the prediction blob, whose argmax corresponds to the 491 // predicted label -- may be negative to index from the end (e.g., -1 for the 492 // last axis). For example, if axis == 1 and the predictions are 493 // (N x C x H x W), the label blob is expected to contain N*H*W ground truth 494 // labels with integer values in {0, 1, ..., C-1}. 495 optional int32 axis = 2 [default = 1]; 496 497 // If specified, ignore instances with the given label. 498 optional int32 ignore_label = 3; 499} 500 501message ArgMaxParameter { 502 // If true produce pairs (argmax, maxval) 503 optional bool out_max_val = 1 [default = false]; 504 optional uint32 top_k = 2 [default = 1]; 505 // The axis along which to maximise -- may be negative to index from the 506 // end (e.g., -1 for the last axis). 507 // By default ArgMaxLayer maximizes over the flattened trailing dimensions 508 // for each index of the first / num dimension. 509 optional int32 axis = 3; 510} 511 512message ConcatParameter { 513 // The axis along which to concatenate -- may be negative to index from the 514 // end (e.g., -1 for the last axis). Other axes must have the 515 // same dimension for all the bottom blobs. 516 // By default, ConcatLayer concatenates blobs along the "channels" axis (1). 517 optional int32 axis = 2 [default = 1]; 518 519 // DEPRECATED: alias for "axis" -- does not support negative indexing. 520 optional uint32 concat_dim = 1 [default = 1]; 521} 522 523message BatchNormParameter { 524 // If false, normalization is performed over the current mini-batch 525 // and global statistics are accumulated (but not yet used) by a moving 526 // average. 527 // If true, those accumulated mean and variance values are used for the 528 // normalization. 529 // By default, it is set to false when the network is in the training 530 // phase and true when the network is in the testing phase. 531 optional bool use_global_stats = 1; 532 // What fraction of the moving average remains each iteration? 533 // Smaller values make the moving average decay faster, giving more 534 // weight to the recent values. 535 // Each iteration updates the moving average @f$S_{t-1}@f$ with the 536 // current mean @f$ Y_t @f$ by 537 // @f$ S_t = (1-\beta)Y_t + \beta \cdot S_{t-1} @f$, where @f$ \beta @f$ 538 // is the moving_average_fraction parameter. 539 optional float moving_average_fraction = 2 [default = .999]; 540 // Small value to add to the variance estimate so that we don't divide by 541 // zero. 542 optional float eps = 3 [default = 1e-5]; 543} 544 545message BiasParameter { 546 // The first axis of bottom[0] (the first input Blob) along which to apply 547 // bottom[1] (the second input Blob). May be negative to index from the end 548 // (e.g., -1 for the last axis). 549 // 550 // For example, if bottom[0] is 4D with shape 100x3x40x60, the output 551 // top[0] will have the same shape, and bottom[1] may have any of the 552 // following shapes (for the given value of axis): 553 // (axis == 0 == -4) 100; 100x3; 100x3x40; 100x3x40x60 554 // (axis == 1 == -3) 3; 3x40; 3x40x60 555 // (axis == 2 == -2) 40; 40x60 556 // (axis == 3 == -1) 60 557 // Furthermore, bottom[1] may have the empty shape (regardless of the value of 558 // "axis") -- a scalar bias. 559 optional int32 axis = 1 [default = 1]; 560 561 // (num_axes is ignored unless just one bottom is given and the bias is 562 // a learned parameter of the layer. Otherwise, num_axes is determined by the 563 // number of axes by the second bottom.) 564 // The number of axes of the input (bottom[0]) covered by the bias 565 // parameter, or -1 to cover all axes of bottom[0] starting from `axis`. 566 // Set num_axes := 0, to add a zero-axis Blob: a scalar. 567 optional int32 num_axes = 2 [default = 1]; 568 569 // (filler is ignored unless just one bottom is given and the bias is 570 // a learned parameter of the layer.) 571 // The initialization for the learned bias parameter. 572 // Default is the zero (0) initialization, resulting in the BiasLayer 573 // initially performing the identity operation. 574 optional FillerParameter filler = 3; 575} 576 577message ContrastiveLossParameter { 578 // margin for dissimilar pair 579 optional float margin = 1 [default = 1.0]; 580 // The first implementation of this cost did not exactly match the cost of 581 // Hadsell et al 2006 -- using (margin - d^2) instead of (margin - d)^2. 582 // legacy_version = false (the default) uses (margin - d)^2 as proposed in the 583 // Hadsell paper. New models should probably use this version. 584 // legacy_version = true uses (margin - d^2). This is kept to support / 585 // reproduce existing models and results 586 optional bool legacy_version = 2 [default = false]; 587} 588 589message ConvolutionParameter { 590 optional uint32 num_output = 1; // The number of outputs for the layer 591 optional bool bias_term = 2 [default = true]; // whether to have bias terms 592 593 // Pad, kernel size, and stride are all given as a single value for equal 594 // dimensions in all spatial dimensions, or once per spatial dimension. 595 repeated uint32 pad = 3; // The padding size; defaults to 0 596 repeated uint32 kernel_size = 4; // The kernel size 597 repeated uint32 stride = 6; // The stride; defaults to 1 598 // Factor used to dilate the kernel, (implicitly) zero-filling the resulting 599 // holes. (Kernel dilation is sometimes referred to by its use in the 600 // algorithme à trous from Holschneider et al. 1987.) 601 repeated uint32 dilation = 18; // The dilation; defaults to 1 602 603 // For 2D convolution only, the *_h and *_w versions may also be used to 604 // specify both spatial dimensions. 605 optional uint32 pad_h = 9 [default = 0]; // The padding height (2D only) 606 optional uint32 pad_w = 10 [default = 0]; // The padding width (2D only) 607 optional uint32 kernel_h = 11; // The kernel height (2D only) 608 optional uint32 kernel_w = 12; // The kernel width (2D only) 609 optional uint32 stride_h = 13; // The stride height (2D only) 610 optional uint32 stride_w = 14; // The stride width (2D only) 611 612 optional uint32 group = 5 [default = 1]; // The group size for group conv 613 614 optional FillerParameter weight_filler = 7; // The filler for the weight 615 optional FillerParameter bias_filler = 8; // The filler for the bias 616 enum Engine { 617 DEFAULT = 0; 618 CAFFE = 1; 619 CUDNN = 2; 620 } 621 optional Engine engine = 15 [default = DEFAULT]; 622 623 // The axis to interpret as "channels" when performing convolution. 624 // Preceding dimensions are treated as independent inputs; 625 // succeeding dimensions are treated as "spatial". 626 // With (N, C, H, W) inputs, and axis == 1 (the default), we perform 627 // N independent 2D convolutions, sliding C-channel (or (C/g)-channels, for 628 // groups g>1) filters across the spatial axes (H, W) of the input. 629 // With (N, C, D, H, W) inputs, and axis == 1, we perform 630 // N independent 3D convolutions, sliding (C/g)-channels 631 // filters across the spatial axes (D, H, W) of the input. 632 optional int32 axis = 16 [default = 1]; 633 634 // Whether to force use of the general ND convolution, even if a specific 635 // implementation for blobs of the appropriate number of spatial dimensions 636 // is available. (Currently, there is only a 2D-specific convolution 637 // implementation; for input blobs with num_axes != 2, this option is 638 // ignored and the ND implementation will be used.) 639 optional bool force_nd_im2col = 17 [default = false]; 640} 641 642message CropParameter { 643 // To crop, elements of the first bottom are selected to fit the dimensions 644 // of the second, reference bottom. The crop is configured by 645 // - the crop `axis` to pick the dimensions for cropping 646 // - the crop `offset` to set the shift for all/each dimension 647 // to align the cropped bottom with the reference bottom. 648 // All dimensions up to but excluding `axis` are preserved, while 649 // the dimensions including and trailing `axis` are cropped. 650 // If only one `offset` is set, then all dimensions are offset by this amount. 651 // Otherwise, the number of offsets must equal the number of cropped axes to 652 // shift the crop in each dimension accordingly. 653 // Note: standard dimensions are N,C,H,W so the default is a spatial crop, 654 // and `axis` may be negative to index from the end (e.g., -1 for the last 655 // axis). 656 optional int32 axis = 1 [default = 2]; 657 repeated uint32 offset = 2; 658} 659 660message DataParameter { 661 enum DB { 662 LEVELDB = 0; 663 LMDB = 1; 664 } 665 // Specify the data source. 666 optional string source = 1; 667 // Specify the batch size. 668 optional uint32 batch_size = 4; 669 // The rand_skip variable is for the data layer to skip a few data points 670 // to avoid all asynchronous sgd clients to start at the same point. The skip 671 // point would be set as rand_skip * rand(0,1). Note that rand_skip should not 672 // be larger than the number of keys in the database. 673 // DEPRECATED. Each solver accesses a different subset of the database. 674 optional uint32 rand_skip = 7 [default = 0]; 675 optional DB backend = 8 [default = LEVELDB]; 676 // DEPRECATED. See TransformationParameter. For data pre-processing, we can do 677 // simple scaling and subtracting the data mean, if provided. Note that the 678 // mean subtraction is always carried out before scaling. 679 optional float scale = 2 [default = 1]; 680 optional string mean_file = 3; 681 // DEPRECATED. See TransformationParameter. Specify if we would like to randomly 682 // crop an image. 683 optional uint32 crop_size = 5 [default = 0]; 684 // DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror 685 // data. 686 optional bool mirror = 6 [default = false]; 687 // Force the encoded image to have 3 color channels 688 optional bool force_encoded_color = 9 [default = false]; 689 // Prefetch queue (Increase if data feeding bandwidth varies, within the 690 // limit of device memory for GPU training) 691 optional uint32 prefetch = 10 [default = 4]; 692} 693 694message DropoutParameter { 695 optional float dropout_ratio = 1 [default = 0.5]; // dropout ratio 696 optional bool scale_train = 2 [default = true]; // scale train or test phase 697} 698 699// DummyDataLayer fills any number of arbitrarily shaped blobs with random 700// (or constant) data generated by "Fillers" (see "message FillerParameter"). 701message DummyDataParameter { 702 // This layer produces N >= 1 top blobs. DummyDataParameter must specify 1 or N 703 // shape fields, and 0, 1 or N data_fillers. 704 // 705 // If 0 data_fillers are specified, ConstantFiller with a value of 0 is used. 706 // If 1 data_filler is specified, it is applied to all top blobs. If N are 707 // specified, the ith is applied to the ith top blob. 708 repeated FillerParameter data_filler = 1; 709 repeated BlobShape shape = 6; 710 711 // 4D dimensions -- deprecated. Use "shape" instead. 712 repeated uint32 num = 2; 713 repeated uint32 channels = 3; 714 repeated uint32 height = 4; 715 repeated uint32 width = 5; 716} 717 718message EltwiseParameter { 719 enum EltwiseOp { 720 PROD = 0; 721 SUM = 1; 722 MAX = 2; 723 } 724 optional EltwiseOp operation = 1 [default = SUM]; // element-wise operation 725 repeated float coeff = 2; // blob-wise coefficient for SUM operation 726 727 // Whether to use an asymptotically slower (for >2 inputs) but stabler method 728 // of computing the gradient for the PROD operation. (No effect for SUM op.) 729 optional bool stable_prod_grad = 3 [default = true]; 730} 731 732// Message that stores parameters used by ELULayer 733message ELUParameter { 734 // Described in: 735 // Clevert, D.-A., Unterthiner, T., & Hochreiter, S. (2015). Fast and Accurate 736 // Deep Network Learning by Exponential Linear Units (ELUs). arXiv 737 optional float alpha = 1 [default = 1]; 738} 739 740// Message that stores parameters used by EmbedLayer 741message EmbedParameter { 742 optional uint32 num_output = 1; // The number of outputs for the layer 743 // The input is given as integers to be interpreted as one-hot 744 // vector indices with dimension num_input. Hence num_input should be 745 // 1 greater than the maximum possible input value. 746 optional uint32 input_dim = 2; 747 748 optional bool bias_term = 3 [default = true]; // Whether to use a bias term 749 optional FillerParameter weight_filler = 4; // The filler for the weight 750 optional FillerParameter bias_filler = 5; // The filler for the bias 751 752} 753 754// Message that stores parameters used by ExpLayer 755message ExpParameter { 756 // ExpLayer computes outputs y = base ^ (shift + scale * x), for base > 0. 757 // Or if base is set to the default (-1), base is set to e, 758 // so y = exp(shift + scale * x). 759 optional float base = 1 [default = -1.0]; 760 optional float scale = 2 [default = 1.0]; 761 optional float shift = 3 [default = 0.0]; 762} 763 764/// Message that stores parameters used by FlattenLayer 765message FlattenParameter { 766 // The first axis to flatten: all preceding axes are retained in the output. 767 // May be negative to index from the end (e.g., -1 for the last axis). 768 optional int32 axis = 1 [default = 1]; 769 770 // The last axis to flatten: all following axes are retained in the output. 771 // May be negative to index from the end (e.g., the default -1 for the last 772 // axis). 773 optional int32 end_axis = 2 [default = -1]; 774} 775 776// Message that stores parameters used by HDF5DataLayer 777message HDF5DataParameter { 778 // Specify the data source. 779 optional string source = 1; 780 // Specify the batch size. 781 optional uint32 batch_size = 2; 782 783 // Specify whether to shuffle the data. 784 // If shuffle == true, the ordering of the HDF5 files is shuffled, 785 // and the ordering of data within any given HDF5 file is shuffled, 786 // but data between different files are not interleaved; all of a file's 787 // data are output (in a random order) before moving onto another file. 788 optional bool shuffle = 3 [default = false]; 789} 790 791message HDF5OutputParameter { 792 optional string file_name = 1; 793} 794 795message HingeLossParameter { 796 enum Norm { 797 L1 = 1; 798 L2 = 2; 799 } 800 // Specify the Norm to use L1 or L2 801 optional Norm norm = 1 [default = L1]; 802} 803 804message ImageDataParameter { 805 // Specify the data source. 806 optional string source = 1; 807 // Specify the batch size. 808 optional uint32 batch_size = 4 [default = 1]; 809 // The rand_skip variable is for the data layer to skip a few data points 810 // to avoid all asynchronous sgd clients to start at the same point. The skip 811 // point would be set as rand_skip * rand(0,1). Note that rand_skip should not 812 // be larger than the number of keys in the database. 813 optional uint32 rand_skip = 7 [default = 0]; 814 // Whether or not ImageLayer should shuffle the list of files at every epoch. 815 optional bool shuffle = 8 [default = false]; 816 // It will also resize images if new_height or new_width are not zero. 817 optional uint32 new_height = 9 [default = 0]; 818 optional uint32 new_width = 10 [default = 0]; 819 // Specify if the images are color or gray 820 optional bool is_color = 11 [default = true]; 821 // DEPRECATED. See TransformationParameter. For data pre-processing, we can do 822 // simple scaling and subtracting the data mean, if provided. Note that the 823 // mean subtraction is always carried out before scaling. 824 optional float scale = 2 [default = 1]; 825 optional string mean_file = 3; 826 // DEPRECATED. See TransformationParameter. Specify if we would like to randomly 827 // crop an image. 828 optional uint32 crop_size = 5 [default = 0]; 829 // DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror 830 // data. 831 optional bool mirror = 6 [default = false]; 832 optional string root_folder = 12 [default = ""]; 833} 834 835message InfogainLossParameter { 836 // Specify the infogain matrix source. 837 optional string source = 1; 838 optional int32 axis = 2 [default = 1]; // axis of prob 839} 840 841message InnerProductParameter { 842 optional uint32 num_output = 1; // The number of outputs for the layer 843 optional bool bias_term = 2 [default = true]; // whether to have bias terms 844 optional FillerParameter weight_filler = 3; // The filler for the weight 845 optional FillerParameter bias_filler = 4; // The filler for the bias 846 847 // The first axis to be lumped into a single inner product computation; 848 // all preceding axes are retained in the output. 849 // May be negative to index from the end (e.g., -1 for the last axis). 850 optional int32 axis = 5 [default = 1]; 851 // Specify whether to transpose the weight matrix or not. 852 // If transpose == true, any operations will be performed on the transpose 853 // of the weight matrix. The weight matrix itself is not going to be transposed 854 // but rather the transfer flag of operations will be toggled accordingly. 855 optional bool transpose = 6 [default = false]; 856} 857 858message InputParameter { 859 // This layer produces N >= 1 top blob(s) to be assigned manually. 860 // Define N shapes to set a shape for each top. 861 // Define 1 shape to set the same shape for every top. 862 // Define no shape to defer to reshaping manually. 863 repeated BlobShape shape = 1; 864} 865 866// Message that stores parameters used by LogLayer 867message LogParameter { 868 // LogLayer computes outputs y = log_base(shift + scale * x), for base > 0. 869 // Or if base is set to the default (-1), base is set to e, 870 // so y = ln(shift + scale * x) = log_e(shift + scale * x) 871 optional float base = 1 [default = -1.0]; 872 optional float scale = 2 [default = 1.0]; 873 optional float shift = 3 [default = 0.0]; 874} 875 876// Message that stores parameters used by LRNLayer 877message LRNParameter { 878 optional uint32 local_size = 1 [default = 5]; 879 optional float alpha = 2 [default = 1.]; 880 optional float beta = 3 [default = 0.75]; 881 enum NormRegion { 882 ACROSS_CHANNELS = 0; 883 WITHIN_CHANNEL = 1; 884 } 885 optional NormRegion norm_region = 4 [default = ACROSS_CHANNELS]; 886 optional float k = 5 [default = 1.]; 887 enum Engine { 888 DEFAULT = 0; 889 CAFFE = 1; 890 CUDNN = 2; 891 } 892 optional Engine engine = 6 [default = DEFAULT]; 893} 894 895message MemoryDataParameter { 896 optional uint32 batch_size = 1; 897 optional uint32 channels = 2; 898 optional uint32 height = 3; 899 optional uint32 width = 4; 900} 901 902message MVNParameter { 903 // This parameter can be set to false to normalize mean only 904 optional bool normalize_variance = 1 [default = true]; 905 906 // This parameter can be set to true to perform DNN-like MVN 907 optional bool across_channels = 2 [default = false]; 908 909 // Epsilon for not dividing by zero while normalizing variance 910 optional float eps = 3 [default = 1e-9]; 911} 912 913message ParameterParameter { 914 optional BlobShape shape = 1; 915} 916 917message PoolingParameter { 918 enum PoolMethod { 919 MAX = 0; 920 AVE = 1; 921 STOCHASTIC = 2; 922 } 923 optional PoolMethod pool = 1 [default = MAX]; // The pooling method 924 // Pad, kernel size, and stride are all given as a single value for equal 925 // dimensions in height and width or as Y, X pairs. 926 optional uint32 pad = 4 [default = 0]; // The padding size (equal in Y, X) 927 optional uint32 pad_h = 9 [default = 0]; // The padding height 928 optional uint32 pad_w = 10 [default = 0]; // The padding width 929 optional uint32 kernel_size = 2; // The kernel size (square) 930 optional uint32 kernel_h = 5; // The kernel height 931 optional uint32 kernel_w = 6; // The kernel width 932 optional uint32 stride = 3 [default = 1]; // The stride (equal in Y, X) 933 optional uint32 stride_h = 7; // The stride height 934 optional uint32 stride_w = 8; // The stride width 935 enum Engine { 936 DEFAULT = 0; 937 CAFFE = 1; 938 CUDNN = 2; 939 } 940 optional Engine engine = 11 [default = DEFAULT]; 941 // If global_pooling then it will pool over the size of the bottom by doing 942 // kernel_h = bottom->height and kernel_w = bottom->width 943 optional bool global_pooling = 12 [default = false]; 944 optional bool ceil_mode = 13 [default = true]; 945 // How to calculate the output size - using ceil (default) or floor rounding. 946 enum RoundMode { 947 CEIL = 0; 948 FLOOR = 1; 949 } 950 optional RoundMode round_mode = 14 [default = CEIL]; 951} 952 953message PowerParameter { 954 // PowerLayer computes outputs y = (shift + scale * x) ^ power. 955 optional float power = 1 [default = 1.0]; 956 optional float scale = 2 [default = 1.0]; 957 optional float shift = 3 [default = 0.0]; 958} 959 960message PythonParameter { 961 optional string module = 1; 962 optional string layer = 2; 963 // This value is set to the attribute `param_str` of the `PythonLayer` object 964 // in Python before calling the `setup()` method. This could be a number, 965 // string, dictionary in Python dict format, JSON, etc. You may parse this 966 // string in `setup` method and use it in `forward` and `backward`. 967 optional string param_str = 3 [default = '']; 968 // Whether this PythonLayer is shared among worker solvers during data parallelism. 969 // If true, each worker solver sequentially run forward from this layer. 970 // This value should be set true if you are using it as a data layer. 971 optional bool share_in_parallel = 4 [default = false]; 972} 973 974// Message that stores parameters used by RecurrentLayer 975message RecurrentParameter { 976 // The dimension of the output (and usually hidden state) representation -- 977 // must be explicitly set to non-zero. 978 optional uint32 num_output = 1 [default = 0]; 979 980 optional FillerParameter weight_filler = 2; // The filler for the weight 981 optional FillerParameter bias_filler = 3; // The filler for the bias 982 983 // Whether to enable displaying debug_info in the unrolled recurrent net. 984 optional bool debug_info = 4 [default = false]; 985 986 // Whether to add as additional inputs (bottoms) the initial hidden state 987 // blobs, and add as additional outputs (tops) the final timestep hidden state 988 // blobs. The number of additional bottom/top blobs required depends on the 989 // recurrent architecture -- e.g., 1 for RNNs, 2 for LSTMs. 990 optional bool expose_hidden = 5 [default = false]; 991} 992 993// Message that stores parameters used by ReductionLayer 994message ReductionParameter { 995 enum ReductionOp { 996 SUM = 1; 997 ASUM = 2; 998 SUMSQ = 3; 999 MEAN = 4; 1000 } 1001 1002 optional ReductionOp operation = 1 [default = SUM]; // reduction operation 1003 1004 // The first axis to reduce to a scalar -- may be negative to index from the 1005 // end (e.g., -1 for the last axis). 1006 // (Currently, only reduction along ALL "tail" axes is supported; reduction 1007 // of axis M through N, where N < num_axes - 1, is unsupported.) 1008 // Suppose we have an n-axis bottom Blob with shape: 1009 // (d0, d1, d2, ..., d(m-1), dm, d(m+1), ..., d(n-1)). 1010 // If axis == m, the output Blob will have shape 1011 // (d0, d1, d2, ..., d(m-1)), 1012 // and the ReductionOp operation is performed (d0 * d1 * d2 * ... * d(m-1)) 1013 // times, each including (dm * d(m+1) * ... * d(n-1)) individual data. 1014 // If axis == 0 (the default), the output Blob always has the empty shape 1015 // (count 1), performing reduction across the entire input -- 1016 // often useful for creating new loss functions. 1017 optional int32 axis = 2 [default = 0]; 1018 1019 optional float coeff = 3 [default = 1.0]; // coefficient for output 1020} 1021 1022// Message that stores parameters used by ReLULayer 1023message ReLUParameter { 1024 // Allow non-zero slope for negative inputs to speed up optimization 1025 // Described in: 1026 // Maas, A. L., Hannun, A. Y., & Ng, A. Y. (2013). Rectifier nonlinearities 1027 // improve neural network acoustic models. In ICML Workshop on Deep Learning 1028 // for Audio, Speech, and Language Processing. 1029 optional float negative_slope = 1 [default = 0]; 1030 enum Engine { 1031 DEFAULT = 0; 1032 CAFFE = 1; 1033 CUDNN = 2; 1034 } 1035 optional Engine engine = 2 [default = DEFAULT]; 1036} 1037 1038message ReshapeParameter { 1039 // Specify the output dimensions. If some of the dimensions are set to 0, 1040 // the corresponding dimension from the bottom layer is used (unchanged). 1041 // Exactly one dimension may be set to -1, in which case its value is 1042 // inferred from the count of the bottom blob and the remaining dimensions. 1043 // For example, suppose we want to reshape a 2D blob "input" with shape 2 x 8: 1044 // 1045 // layer { 1046 // type: "Reshape" bottom: "input" top: "output" 1047 // reshape_param { ... } 1048 // } 1049 // 1050 // If "input" is 2D with shape 2 x 8, then the following reshape_param 1051 // specifications are all equivalent, producing a 3D blob "output" with shape 1052 // 2 x 2 x 4: 1053 // 1054 // reshape_param { shape { dim: 2 dim: 2 dim: 4 } } 1055 // reshape_param { shape { dim: 0 dim: 2 dim: 4 } } 1056 // reshape_param { shape { dim: 0 dim: 2 dim: -1 } } 1057 // reshape_param { shape { dim: 0 dim:-1 dim: 4 } } 1058 // 1059 optional BlobShape shape = 1; 1060 1061 // axis and num_axes control the portion of the bottom blob's shape that are 1062 // replaced by (included in) the reshape. By default (axis == 0 and 1063 // num_axes == -1), the entire bottom blob shape is included in the reshape, 1064 // and hence the shape field must specify the entire output shape. 1065 // 1066 // axis may be non-zero to retain some portion of the beginning of the input 1067 // shape (and may be negative to index from the end; e.g., -1 to begin the 1068 // reshape after the last axis, including nothing in the reshape, 1069 // -2 to include only the last axis, etc.). 1070 // 1071 // For example, suppose "input" is a 2D blob with shape 2 x 8. 1072 // Then the following ReshapeLayer specifications are all equivalent, 1073 // producing a blob "output" with shape 2 x 2 x 4: 1074 // 1075 // reshape_param { shape { dim: 2 dim: 2 dim: 4 } } 1076 // reshape_param { shape { dim: 2 dim: 4 } axis: 1 } 1077 // reshape_param { shape { dim: 2 dim: 4 } axis: -3 } 1078 // 1079 // num_axes specifies the extent of the reshape. 1080 // If num_axes >= 0 (and axis >= 0), the reshape will be performed only on 1081 // input axes in the range [axis, axis+num_axes]. 1082 // num_axes may also be -1, the default, to include all remaining axes 1083 // (starting from axis). 1084 // 1085 // For example, suppose "input" is a 2D blob with shape 2 x 8. 1086 // Then the following ReshapeLayer specifications are equivalent, 1087 // producing a blob "output" with shape 1 x 2 x 8. 1088 // 1089 // reshape_param { shape { dim: 1 dim: 2 dim: 8 } } 1090 // reshape_param { shape { dim: 1 dim: 2 } num_axes: 1 } 1091 // reshape_param { shape { dim: 1 } num_axes: 0 } 1092 // 1093 // On the other hand, these would produce output blob shape 2 x 1 x 8: 1094 // 1095 // reshape_param { shape { dim: 2 dim: 1 dim: 8 } } 1096 // reshape_param { shape { dim: 1 } axis: 1 num_axes: 0 } 1097 // 1098 optional int32 axis = 2 [default = 0]; 1099 optional int32 num_axes = 3 [default = -1]; 1100} 1101 1102// Message that stores parameters used by ROIPoolingLayer 1103message ROIPoolingParameter { 1104 // Pad, kernel size, and stride are all given as a single value for equal 1105 // dimensions in height and width or as Y, X pairs. 1106 optional uint32 pooled_h = 1 [default = 0]; // The pooled output height 1107 optional uint32 pooled_w = 2 [default = 0]; // The pooled output width 1108 // Multiplicative spatial scale factor to translate ROI coords from their 1109 // input scale to the scale used when pooling 1110 optional float spatial_scale = 3 [default = 1]; 1111} 1112 1113message ScaleParameter { 1114 // The first axis of bottom[0] (the first input Blob) along which to apply 1115 // bottom[1] (the second input Blob). May be negative to index from the end 1116 // (e.g., -1 for the last axis). 1117 // 1118 // For example, if bottom[0] is 4D with shape 100x3x40x60, the output 1119 // top[0] will have the same shape, and bottom[1] may have any of the 1120 // following shapes (for the given value of axis): 1121 // (axis == 0 == -4) 100; 100x3; 100x3x40; 100x3x40x60 1122 // (axis == 1 == -3) 3; 3x40; 3x40x60 1123 // (axis == 2 == -2) 40; 40x60 1124 // (axis == 3 == -1) 60 1125 // Furthermore, bottom[1] may have the empty shape (regardless of the value of 1126 // "axis") -- a scalar multiplier. 1127 optional int32 axis = 1 [default = 1]; 1128 1129 // (num_axes is ignored unless just one bottom is given and the scale is 1130 // a learned parameter of the layer. Otherwise, num_axes is determined by the 1131 // number of axes by the second bottom.) 1132 // The number of axes of the input (bottom[0]) covered by the scale 1133 // parameter, or -1 to cover all axes of bottom[0] starting from `axis`. 1134 // Set num_axes := 0, to multiply with a zero-axis Blob: a scalar. 1135 optional int32 num_axes = 2 [default = 1]; 1136 1137 // (filler is ignored unless just one bottom is given and the scale is 1138 // a learned parameter of the layer.) 1139 // The initialization for the learned scale parameter. 1140 // Default is the unit (1) initialization, resulting in the ScaleLayer 1141 // initially performing the identity operation. 1142 optional FillerParameter filler = 3; 1143 1144 // Whether to also learn a bias (equivalent to a ScaleLayer+BiasLayer, but 1145 // may be more efficient). Initialized with bias_filler (defaults to 0). 1146 optional bool bias_term = 4 [default = false]; 1147 optional FillerParameter bias_filler = 5; 1148} 1149 1150message SigmoidParameter { 1151 enum Engine { 1152 DEFAULT = 0; 1153 CAFFE = 1; 1154 CUDNN = 2; 1155 } 1156 optional Engine engine = 1 [default = DEFAULT]; 1157} 1158 1159message SliceParameter { 1160 // The axis along which to slice -- may be negative to index from the end 1161 // (e.g., -1 for the last axis). 1162 // By default, SliceLayer concatenates blobs along the "channels" axis (1). 1163 optional int32 axis = 3 [default = 1]; 1164 repeated uint32 slice_point = 2; 1165 1166 // DEPRECATED: alias for "axis" -- does not support negative indexing. 1167 optional uint32 slice_dim = 1 [default = 1]; 1168} 1169 1170message SmoothL1LossParameter { 1171 // SmoothL1Loss(x) = 1172 // 0.5 * (sigma * x) ** 2 -- if x < 1.0 / sigma / sigma 1173 // |x| - 0.5 / sigma / sigma -- otherwise 1174 optional float sigma = 1 [default = 1]; 1175} 1176 1177// Message that stores parameters used by SoftmaxLayer, SoftmaxWithLossLayer 1178message SoftmaxParameter { 1179 enum Engine { 1180 DEFAULT = 0; 1181 CAFFE = 1; 1182 CUDNN = 2; 1183 } 1184 optional Engine engine = 1 [default = DEFAULT]; 1185 1186 // The axis along which to perform the softmax -- may be negative to index 1187 // from the end (e.g., -1 for the last axis). 1188 // Any other axes will be evaluated as independent softmaxes. 1189 optional int32 axis = 2 [default = 1]; 1190} 1191 1192message TanHParameter { 1193 enum Engine { 1194 DEFAULT = 0; 1195 CAFFE = 1; 1196 CUDNN = 2; 1197 } 1198 optional Engine engine = 1 [default = DEFAULT]; 1199} 1200 1201// Message that stores parameters used by TileLayer 1202message TileParameter { 1203 // The index of the axis to tile. 1204 optional int32 axis = 1 [default = 1]; 1205 1206 // The number of copies (tiles) of the blob to output. 1207 optional int32 tiles = 2; 1208} 1209 1210// Message that stores parameters used by ThresholdLayer 1211message ThresholdParameter { 1212 optional float threshold = 1 [default = 0]; // Strictly positive values 1213} 1214 1215message WindowDataParameter { 1216 // Specify the data source. 1217 optional string source = 1; 1218 // For data pre-processing, we can do simple scaling and subtracting the 1219 // data mean, if provided. Note that the mean subtraction is always carried 1220 // out before scaling. 1221 optional float scale = 2 [default = 1]; 1222 optional string mean_file = 3; 1223 // Specify the batch size. 1224 optional uint32 batch_size = 4; 1225 // Specify if we would like to randomly crop an image. 1226 optional uint32 crop_size = 5 [default = 0]; 1227 // Specify if we want to randomly mirror data. 1228 optional bool mirror = 6 [default = false]; 1229 // Foreground (object) overlap threshold 1230 optional float fg_threshold = 7 [default = 0.5]; 1231 // Background (non-object) overlap threshold 1232 optional float bg_threshold = 8 [default = 0.5]; 1233 // Fraction of batch that should be foreground objects 1234 optional float fg_fraction = 9 [default = 0.25]; 1235 // Amount of contextual padding to add around a window 1236 // (used only by the window_data_layer) 1237 optional uint32 context_pad = 10 [default = 0]; 1238 // Mode for cropping out a detection window 1239 // warp: cropped window is warped to a fixed size and aspect ratio 1240 // square: the tightest square around the window is cropped 1241 optional string crop_mode = 11 [default = "warp"]; 1242 // cache_images: will load all images in memory for faster access 1243 optional bool cache_images = 12 [default = false]; 1244 // append root_folder to locate images 1245 optional string root_folder = 13 [default = ""]; 1246} 1247 1248message SPPParameter { 1249 enum PoolMethod { 1250 MAX = 0; 1251 AVE = 1; 1252 STOCHASTIC = 2; 1253 } 1254 optional uint32 pyramid_height = 1; 1255 optional PoolMethod pool = 2 [default = MAX]; // The pooling method 1256 enum Engine { 1257 DEFAULT = 0; 1258 CAFFE = 1; 1259 CUDNN = 2; 1260 } 1261 optional Engine engine = 6 [default = DEFAULT]; 1262} 1263 1264// DEPRECATED: use LayerParameter. 1265message V1LayerParameter { 1266 repeated string bottom = 2; 1267 repeated string top = 3; 1268 optional string name = 4; 1269 repeated NetStateRule include = 32; 1270 repeated NetStateRule exclude = 33; 1271 enum LayerType { 1272 NONE = 0; 1273 ABSVAL = 35; 1274 ACCURACY = 1; 1275 ARGMAX = 30; 1276 BNLL = 2; 1277 CONCAT = 3; 1278 CONTRASTIVE_LOSS = 37; 1279 CONVOLUTION = 4; 1280 DATA = 5; 1281 DECONVOLUTION = 39; 1282 DROPOUT = 6; 1283 DUMMY_DATA = 32; 1284 EUCLIDEAN_LOSS = 7; 1285 ELTWISE = 25; 1286 EXP = 38; 1287 FLATTEN = 8; 1288 HDF5_DATA = 9; 1289 HDF5_OUTPUT = 10; 1290 HINGE_LOSS = 28; 1291 IM2COL = 11; 1292 IMAGE_DATA = 12; 1293 INFOGAIN_LOSS = 13; 1294 INNER_PRODUCT = 14; 1295 LRN = 15; 1296 MEMORY_DATA = 29; 1297 MULTINOMIAL_LOGISTIC_LOSS = 16; 1298 MVN = 34; 1299 POOLING = 17; 1300 POWER = 26; 1301 RELU = 18; 1302 SIGMOID = 19; 1303 SIGMOID_CROSS_ENTROPY_LOSS = 27; 1304 SILENCE = 36; 1305 SOFTMAX = 20; 1306 SOFTMAX_LOSS = 21; 1307 SPLIT = 22; 1308 SLICE = 33; 1309 TANH = 23; 1310 WINDOW_DATA = 24; 1311 THRESHOLD = 31; 1312 } 1313 optional LayerType type = 5; 1314 repeated BlobProto blobs = 6; 1315 repeated string param = 1001; 1316 repeated DimCheckMode blob_share_mode = 1002; 1317 enum DimCheckMode { 1318 STRICT = 0; 1319 PERMISSIVE = 1; 1320 } 1321 repeated float blobs_lr = 7; 1322 repeated float weight_decay = 8; 1323 repeated float loss_weight = 35; 1324 optional AccuracyParameter accuracy_param = 27; 1325 optional ArgMaxParameter argmax_param = 23; 1326 optional ConcatParameter concat_param = 9; 1327 optional ContrastiveLossParameter contrastive_loss_param = 40; 1328 optional ConvolutionParameter convolution_param = 10; 1329 optional DataParameter data_param = 11; 1330 optional DropoutParameter dropout_param = 12; 1331 optional DummyDataParameter dummy_data_param = 26; 1332 optional EltwiseParameter eltwise_param = 24; 1333 optional ExpParameter exp_param = 41; 1334 optional HDF5DataParameter hdf5_data_param = 13; 1335 optional HDF5OutputParameter hdf5_output_param = 14; 1336 optional HingeLossParameter hinge_loss_param = 29; 1337 optional ImageDataParameter image_data_param = 15; 1338 optional InfogainLossParameter infogain_loss_param = 16; 1339 optional InnerProductParameter inner_product_param = 17; 1340 optional LRNParameter lrn_param = 18; 1341 optional MemoryDataParameter memory_data_param = 22; 1342 optional MVNParameter mvn_param = 34; 1343 optional PoolingParameter pooling_param = 19; 1344 optional PowerParameter power_param = 21; 1345 optional ReLUParameter relu_param = 30; 1346 optional SigmoidParameter sigmoid_param = 38; 1347 optional SoftmaxParameter softmax_param = 39; 1348 optional SliceParameter slice_param = 31; 1349 optional TanHParameter tanh_param = 37; 1350 optional ThresholdParameter threshold_param = 25; 1351 optional WindowDataParameter window_data_param = 20; 1352 optional TransformationParameter transform_param = 36; 1353 optional LossParameter loss_param = 42; 1354 optional V0LayerParameter layer = 1; 1355} 1356 1357// DEPRECATED: V0LayerParameter is the old way of specifying layer parameters 1358// in Caffe. We keep this message type around for legacy support. 1359message V0LayerParameter { 1360 optional string name = 1; // the layer name 1361 optional string type = 2; // the string to specify the layer type 1362 1363 // Parameters to specify layers with inner products. 1364 optional uint32 num_output = 3; // The number of outputs for the layer 1365 optional bool biasterm = 4 [default = true]; // whether to have bias terms 1366 optional FillerParameter weight_filler = 5; // The filler for the weight 1367 optional FillerParameter bias_filler = 6; // The filler for the bias 1368 1369 optional uint32 pad = 7 [default = 0]; // The padding size 1370 optional uint32 kernelsize = 8; // The kernel size 1371 optional uint32 group = 9 [default = 1]; // The group size for group conv 1372 optional uint32 stride = 10 [default = 1]; // The stride 1373 enum PoolMethod { 1374 MAX = 0; 1375 AVE = 1; 1376 STOCHASTIC = 2; 1377 } 1378 optional PoolMethod pool = 11 [default = MAX]; // The pooling method 1379 optional float dropout_ratio = 12 [default = 0.5]; // dropout ratio 1380 1381 optional uint32 local_size = 13 [default = 5]; // for local response norm 1382 optional float alpha = 14 [default = 1.]; // for local response norm 1383 optional float beta = 15 [default = 0.75]; // for local response norm 1384 optional float k = 22 [default = 1.]; 1385 1386 // For data layers, specify the data source 1387 optional string source = 16; 1388 // For data pre-processing, we can do simple scaling and subtracting the 1389 // data mean, if provided. Note that the mean subtraction is always carried 1390 // out before scaling. 1391 optional float scale = 17 [default = 1]; 1392 optional string meanfile = 18; 1393 // For data layers, specify the batch size. 1394 optional uint32 batchsize = 19; 1395 // For data layers, specify if we would like to randomly crop an image. 1396 optional uint32 cropsize = 20 [default = 0]; 1397 // For data layers, specify if we want to randomly mirror data. 1398 optional bool mirror = 21 [default = false]; 1399 1400 // The blobs containing the numeric parameters of the layer 1401 repeated BlobProto blobs = 50; 1402 // The ratio that is multiplied on the global learning rate. If you want to 1403 // set the learning ratio for one blob, you need to set it for all blobs. 1404 repeated float blobs_lr = 51; 1405 // The weight decay that is multiplied on the global weight decay. 1406 repeated float weight_decay = 52; 1407 1408 // The rand_skip variable is for the data layer to skip a few data points 1409 // to avoid all asynchronous sgd clients to start at the same point. The skip 1410 // point would be set as rand_skip * rand(0,1). Note that rand_skip should not 1411 // be larger than the number of keys in the database. 1412 optional uint32 rand_skip = 53 [default = 0]; 1413 1414 // Fields related to detection (det_*) 1415 // foreground (object) overlap threshold 1416 optional float det_fg_threshold = 54 [default = 0.5]; 1417 // background (non-object) overlap threshold 1418 optional float det_bg_threshold = 55 [default = 0.5]; 1419 // Fraction of batch that should be foreground objects 1420 optional float det_fg_fraction = 56 [default = 0.25]; 1421 1422 // optional bool OBSOLETE_can_clobber = 57 [default = true]; 1423 1424 // Amount of contextual padding to add around a window 1425 // (used only by the window_data_layer) 1426 optional uint32 det_context_pad = 58 [default = 0]; 1427 1428 // Mode for cropping out a detection window 1429 // warp: cropped window is warped to a fixed size and aspect ratio 1430 // square: the tightest square around the window is cropped 1431 optional string det_crop_mode = 59 [default = "warp"]; 1432 1433 // For ReshapeLayer, one needs to specify the new dimensions. 1434 optional int32 new_num = 60 [default = 0]; 1435 optional int32 new_channels = 61 [default = 0]; 1436 optional int32 new_height = 62 [default = 0]; 1437 optional int32 new_width = 63 [default = 0]; 1438 1439 // Whether or not ImageLayer should shuffle the list of files at every epoch. 1440 // It will also resize images if new_height or new_width are not zero. 1441 optional bool shuffle_images = 64 [default = false]; 1442 1443 // For ConcatLayer, one needs to specify the dimension for concatenation, and 1444 // the other dimensions must be the same for all the bottom blobs. 1445 // By default it will concatenate blobs along the channels dimension. 1446 optional uint32 concat_dim = 65 [default = 1]; 1447 1448 optional HDF5OutputParameter hdf5_output_param = 1001; 1449} 1450 1451message PReLUParameter { 1452 // Parametric ReLU described in K. He et al, Delving Deep into Rectifiers: 1453 // Surpassing Human-Level Performance on ImageNet Classification, 2015. 1454 1455 // Initial value of a_i. Default is a_i=0.25 for all i. 1456 optional FillerParameter filler = 1; 1457 // Whether or not slope parameters are shared across channels. 1458 optional bool channel_shared = 2 [default = false]; 1459} 1460 1461// Message that stores parameters used by ProposalLayer 1462message ProposalParameter { 1463 optional float feat_stride = 1; 1464 optional float base_size = 2; 1465 optional float min_size = 3; 1466 repeated float ratio = 4; 1467 repeated float scale = 5; 1468 optional int32 pre_nms_topn = 6; 1469 optional int32 post_nms_topn = 7; 1470 optional float nms_thresh = 8; 1471} 1472 1473// Message that stores parameters used by DetectionOutputLayer 1474//message DetectionOutputParameter { 1475// optional int32 num_classes = 1 [default = 21]; 1476// optional float nms_threshold = 2 [default = 0.3]; 1477// optional int32 top_k = 3; 1478// optional float confidence_threshold = 4 [default = 0.8]; 1479//} 1480 1481// Message that store parameters used by PriorBoxLayer 1482message PriorBoxParameter { 1483 // Encode/decode type. 1484 enum CodeType { 1485 CORNER = 1; 1486 CENTER_SIZE = 2; 1487 CORNER_SIZE = 3; 1488 } 1489 // Minimum box size (in pixels). Required! 1490 repeated float min_size = 1; 1491 // Maximum box size (in pixels). Required! 1492 repeated float max_size = 2; 1493 // Various of aspect ratios. Duplicate ratios will be ignored. 1494 // If none is provided, we use default ratio 1. 1495 repeated float aspect_ratio = 3; 1496 // If true, will flip each aspect ratio. 1497 // For example, if there is aspect ratio "r", 1498 // we will generate aspect ratio "1.0/r" as well. 1499 optional bool flip = 4 [default = true]; 1500 // If true, will clip the prior so that it is within [0, 1] 1501 optional bool clip = 5 [default = false]; 1502 // Variance for adjusting the prior boxes. 1503 repeated float variance = 6; 1504 // By default, we calculate img_height, img_width, step_x, step_y based on 1505 // bottom[0] (feat) and bottom[1] (img). Unless these values are explicitly 1506 // provided. 1507 // Explicitly provide the img_size. 1508 optional uint32 img_size = 7; 1509 // Either img_size or img_h/img_w should be specified; not both. 1510 optional uint32 img_h = 8; 1511 optional uint32 img_w = 9; 1512 1513 // Explicitly provide the step size. 1514 optional float step = 10; 1515 // Either step or step_h/step_w should be specified; not both. 1516 optional float step_h = 11; 1517 optional float step_w = 12; 1518 1519 // Offset to the top left corner of each cell. 1520 optional float offset = 13 [default = 0.5]; 1521} 1522 1523// Message that stores parameters used by PermutetLayer 1524message PermuteParameter { 1525 // The new orders of the axes of data. Notice it should be with 1526 // in the same range as the input data, and it starts from 0. 1527 // Do not provide repeated order. 1528 repeated uint32 order = 1; 1529} 1530 1531message NormalizeParameter { 1532 optional bool across_spatial = 1 [default = true]; 1533 // Initial value of scale. Default is 1.0 for all 1534 optional FillerParameter scale_filler = 2; 1535 // Whether or not scale parameters are shared across channels. 1536 optional bool channel_shared = 3 [default = true]; 1537 // Epsilon for not dividing by zero while normalizing variance 1538 optional float eps = 4 [default = 1e-10]; 1539} 1540 1541// needed by ssd 1542message SaveOutputParameter { 1543 // Output directory. If not empty, we will save the results. 1544 optional string output_directory = 1; 1545 // Output name prefix. 1546 optional string output_name_prefix = 2; 1547 // Output format. 1548 // VOC - PASCAL VOC output format. 1549 // COCO - MS COCO output format. 1550 optional string output_format = 3; 1551 // If you want to output results, must also provide the following two files. 1552 // Otherwise, we will ignore saving results. 1553 // label map file. 1554 optional string label_map_file = 4; 1555 // A file which contains a list of names and sizes with same order 1556 // of the input DB. The file is in the following format: 1557 // name height width 1558 // ... 1559 optional string name_size_file = 5; 1560 // Number of test images. It can be less than the lines specified in 1561 // name_size_file. For example, when we only want to evaluate on part 1562 // of the test images. 1563 optional uint32 num_test_image = 6; 1564 // The resize parameter used in saving the data. 1565 // optional ResizeParameter resize_param = 7; 1566} 1567 1568message NonMaximumSuppressionParameter { 1569 // Threshold to be used in nms. 1570 optional float nms_threshold = 1 [default = 0.3]; 1571 // Maximum number of results to be kept. 1572 optional int32 top_k = 2; 1573 // Parameter for adaptive nms. 1574 optional float eta = 3 [default = 1.0]; 1575} 1576 1577message GeneralNmsParameter { 1578 optional int32 post_top_k = 1 ; 1579 optional float nms_threshold = 2 [default = 0]; 1580 optional float iou_threshold_decay = 3 [default = 1.0]; 1581 optional float coor_scale_factor = 4 [default = 1.0]; 1582} 1583 1584// Message that store parameters used by DetectionOutputLayer, ssd/fasterRcnn 1585message DetectionOutputParameter { 1586 optional int32 num_classes = 1; 1587 optional bool share_location = 2 [default = true]; 1588 optional int32 background_label_id = 3 [default = 0]; 1589 optional NonMaximumSuppressionParameter nms_param = 4; 1590 optional SaveOutputParameter save_output_param = 5; 1591 optional PriorBoxParameter.CodeType code_type = 6 [default = CENTER_SIZE]; 1592 optional bool variance_encoded_in_target = 8 [default = true]; 1593 optional int32 keep_top_k = 7; 1594 optional float confidence_threshold = 9; 1595 optional float nms_threshold = 13; 1596 optional int32 top_k = 14; 1597 optional int32 boxes = 15 [default = 1]; 1598 optional bool relative = 17 [default = true]; 1599 optional float objectness_threshold = 18 [default = 0.5]; 1600 optional float class_threshold = 19 [default = 0.5]; 1601 repeated float biases = 20; 1602 optional GeneralNmsParameter general_nms_param = 21; 1603} 1604message PSROIPoolingParameter { 1605 required float spatial_scale = 1; 1606 required int32 output_dim = 2; // output channel number 1607 required int32 group_size = 3; // number of groups to encode position-sensitive score maps 1608} 1609// Message that stores parameters used by FreespaceExtractLayer 1610message FreespaceExtractParameter { 1611 optional float org_height = 1; 1612} 1613 1614// Message that stores parameters used by DetectpostprocessLayer 1615message PostprocessParameter { 1616 optional float nms_thresh = 1 [default = 0.3]; 1617 optional float conf_thresh = 2 [default = 0.5]; 1618 optional uint32 post_nms_topn = 3 [default = 100]; 1619 optional uint32 cls_num = 4 [default = 12]; 1620 repeated float bbox_reg_weights = 5; 1621} 1622 1623// Message that stores parameters used by SpatialTransformLayer 1624message SpatialTransformParameter { 1625 optional uint32 output_h = 1 [default = 0]; 1626 optional uint32 output_w = 2 [default = 0]; 1627 optional float border_value = 3 [default = 0]; 1628 repeated float affine_transform = 4; 1629 enum Engine { 1630 DEFAULT = 0; 1631 CAFFE = 1; 1632 CUDNN = 2; 1633 } 1634 optional Engine engine = 15 [default = DEFAULT]; 1635} 1636message ROIAlignParameter { 1637 // Pad, kernel size, and stride are all given as a single value for equal 1638 // dimensions in height and width or as Y, X pairs. 1639 optional uint32 pooled_h = 1 [default = 0]; // The pooled output height 1640 optional uint32 pooled_w = 2 [default = 0]; // The pooled output width 1641 // Multiplicative spatial scale factor to translate ROI coords from their 1642 // input scale to the scale used when pooling 1643 optional float spatial_scale = 3 [default = 1]; 1644 optional int32 sampling_ratio = 4 [default = -1]; 1645} 1646 1647message RegionParameter { 1648 optional uint32 classes = 1 [default = 20]; //分类的种类 1649 optional uint32 coords = 2 [default = 4]; //box的坐标数 1650 optional uint32 boxes = 3 [default = 1]; //每个grid预测的boxes数 1651 optional uint32 softmax = 4 [default = 0]; 1652 optional string softmax_tree = 5 [default = ""]; 1653 optional uint32 background = 6 [default = 0]; 1654} 1655message ReorgParameter{ 1656 optional uint32 stride = 2 [default = 2]; 1657 optional bool reverse = 1 [default = false]; 1658} 1659message ReverseParameter{ 1660 optional int32 axis = 1 [default = 1]; 1661} 1662message InterpParameter{ 1663 optional int32 height = 1 [default = 0];//Height of output 1664 optional int32 width = 2 [default = 0];//Width of output 1665 optional int32 zoom_factor = 3 [default = 1];//zoom factor 1666 optional int32 shrink_factor = 4 [default = 1];//shrink factor 1667 optional int32 pad_beg = 5 [default = 0];//padding at begin of input 1668 optional int32 pad_end = 6 [default = 0];//padding at end of input 1669} 1670message ShuffleChannelParameter{ 1671 optional uint32 group = 1[default = 1]; // The number of group 1672} 1673message UpsampleParameter{ 1674 optional int32 scale = 1[default = 1]; 1675} 1676