1// Copyright (c) 2017-2019, Apple Inc. All rights reserved. 2// 3// Use of this source code is governed by a BSD-3-clause license that can be 4// found in LICENSE.txt or at https://opensource.org/licenses/BSD-3-Clause 5 6/** 7 * A neural network is defined through a collection of layers 8 * and represents a directed acyclic graph (DAG). 9 * Each layer has a name, a layer type, 10 * a list of input names, a list of output names, 11 * and a collection of parameters specific to the layer type. 12 * 13 * The graph structure and connectivity of the neural network 14 * is inferred from the input and output names. 15 * A neural network starts with the layer 16 * whose input name is equal to the value specified in 17 * ``Model.description.input.name``, 18 * and ends with the layer 19 * whose output name is equal to the value specified in 20 * ``Model.description.output.name``. 21 * Layers must have unique input and output names, 22 * and a layer may not have input or output names that 23 * refer to layers that are not yet defined. 24 * 25 * For Core ML specification version <=3, 26 * all inputs are mapped to static rank 5 tensors, with axis notations 27 * [Sequence, Batch, Channel, Height, Width]. 28 * 29 * From specification version 4 onwards (iOS >= 13, macOS >= 10.15), more options are available 30 * (see enums ``NeuralNetworkMultiArrayShapeMapping``, ``NeuralNetworkImageShapeMapping``) 31 * to map inputs to generic N-Dimensional (or N rank) tensors, where N >= 1. 32 * 33 * Each layer type may have specific constraints on the ranks of its inputs and outputs. 34 * 35 * Some of the layers (such as softmax, reduce, etc) have parameters that have been described in 36 * terms of notational axis "Channel", "Height", "Width" or "Sequence". They can be re-interpreted easily in 37 * the general ND setting by using the following rule: 38 * "width" is same as axis = -1 (i.e. the last axis from the end) 39 * "height" is same as axis = -2 (i.e. the second last axis from the end) 40 * "channel" is same as axis = -3 (i.e. the third last axis from the end) 41 * "sequence" is same as axis = -5 (i.e. the fifth last axis from the end) 42 * 43 * Several layers are available in 3 different variations, with the names ending 44 * in identifiers: ``like``, ``static`` and ``dynamic``. For instance, ``FillLike``, 45 * ``FillStatic`` and ``FillDynamic``. The ``static`` variation generally will have 46 * a property corresponding to the shape of the output. For instance, if the 47 * output of the ``FillStatic`` layer is desired to be of shape (10, 4), the 48 * property ``targetShape`` will have to be set to [10, 4]. In the ``dynamic`` case, 49 * the shape is an input, hence it can be changed at runtime. For instance, for 50 * a ``FillDynamic`` layer, the input would have to be an array containing the 51 * values 10 and 4, if the desired output is of shape (10, 4). Whereas in the 52 * ``like`` case, the additional input's shape is used as the output shape, ignoring 53 * its values. For instance, for a ``FillLike`` layer, for an input with shape 54 * (10, 4), the output generated will also be of shape (10, 4), values of the 55 * input will be ignored. 56 */ 57 58syntax = "proto3"; 59option optimize_for = LITE_RUNTIME; 60 61import public "DataStructures.proto"; 62import public "Parameters.proto"; 63 64package CoreML.Specification; 65 66 67enum NeuralNetworkMultiArrayShapeMapping { 68 69 /* 70 * Describes how the MultiArray shape for the inputs, 71 * provided in Features Types proto via model description, 72 * is mapped to construct tensors that are fed into the Neural Network layers. 73 */ 74 75 /* 76 * Default legacy value. Only supported for Core ML Specification version <= 3. 77 * 78 * The default legacy shape mapping resolves all input shapes to a rank 5 equivalent 79 * with axis notation of [Seq, Batch, Channel, Height, Width]. 80 * 81 * When this enum value is selected, 82 * the repeated shape field in the message "ArrayFeatureType" in feature types proto, 83 * must be either length 1 or length 3. 84 * 85 * The following rule is used to map the values in the shape field to the actual tensor shape: 86 * rank 1 shape is mapped to shape [1,1,C,1,1] 87 * rank 3 shape is mapped to shape [1,1,C,H,W] 88 * At runtime, the first two dimensions (Seq or Batch) can be presented as well, with non-1 values. 89 * 90 * It is invalid to use this enum value if any of the layers added 91 * Specification version 4 (iOS >= 13, macOS >= 10.15) onwards are used in the network. 92 * Validator will raise an error in that case. 93 */ 94 RANK5_ARRAY_MAPPING = 0; 95 96 /* 97 * The exact shape and rank (i.e. number of dimensions in the shape) of the input, 98 * as specified in the message "ArrayFeatureType", is passed through to the layers. 99 * Supported only for Specification version >= 4 (iOS >= 13, macOS >= 10.15). 100 */ 101 EXACT_ARRAY_MAPPING = 1; 102 103} 104 105enum NeuralNetworkImageShapeMapping { 106 107 /* 108 * Describes how the shape of the input tensors is constructed from image inputs. 109 */ 110 111 /* 112 * In this case, image input is mapped to a rank 5 tensor. 113 * For Color images, input tensor is shaped as [1,1,3,H,W]. 114 * For Gray images, input tensor is shaped as [1,1,1,H,W]. 115 */ 116 RANK5_IMAGE_MAPPING = 0; 117 118 /* 119 * For Color images, input tensor is shaped as [1,3,H,W]. 120 * For Gray images, input tensor is shaped as [1,1,H,W]. 121 * Supported only for Specification version >= 4 (iOS >= 13, macOS >= 10.15). 122 */ 123 RANK4_IMAGE_MAPPING = 1; 124 125} 126 127/** 128 A neural network. 129 */ 130message NeuralNetwork { 131 132 repeated NeuralNetworkLayer layers = 1; 133 repeated NeuralNetworkPreprocessing preprocessing = 2; 134 135 // use this enum value to determine the input tensor shapes to the neural network, for multiarray inputs 136 NeuralNetworkMultiArrayShapeMapping arrayInputShapeMapping = 5; 137 138 // use this enum value to determine the input tensor shapes to the neural network, for image inputs 139 NeuralNetworkImageShapeMapping imageInputShapeMapping = 6; 140 141 142 NetworkUpdateParameters updateParams = 10; 143 144} 145 146/// Preprocessing 147/// ------------- 148 149/** 150 * A neural network preprocessor that 151 * performs a scalar multiplication of an image 152 * followed by addition of scalar biases to the channels. 153 * 154 * Input: X 155 * An image in BGR or RGB format with shape ``[3, H, W]`` 156 * or in grayscale format with shape ``[1, H, W]``. 157 * Output: Y 158 * An image with format and shape corresponding to the input. 159 * 160 * If the input image is in BGR format: 161 * 162 * .. code:: 163 * 164 * Y[0, :, :] = channelScale * X[0, :, :] + blueBias 165 * Y[1, :, :] = channelScale * X[1, :, :] + greenBias 166 * Y[2, :, :] = channelScale * X[2, :, :] + redBias 167 * 168 * If the input image is in RGB format: 169 * 170 * .. code:: 171 * 172 * Y[0, :, :] = channelScale * X[0, :, :] + redBias 173 * Y[1, :, :] = channelScale * X[1, :, :] + greenBias 174 * Y[2, :, :] = channelScale * X[2, :, :] + blueBias 175 * 176 * If the input image is in grayscale format: 177 * 178 * .. code:: 179 * 180 * Y[0, :, :] = channelScale * X[0, :, :] + grayBias 181 */ 182message NeuralNetworkImageScaler { 183 184 float channelScale = 10; ///Scalar to be multiplied. 185 float blueBias = 20; ///Scalar blue bias to be added. 186 float greenBias = 21; ///Scalar green bias to be added. 187 float redBias = 22; ///Scalar red bias to be added. 188 float grayBias = 30; ///Scalar bias to be added for grayscale images. 189 190} 191 192/** 193 * A neural network preprocessor that 194 * subtracts the provided mean image from the input image. 195 * The mean image is subtracted from the input named 196 * ``NeuralNetworkPreprocessing.featureName``. 197 */ 198message NeuralNetworkMeanImage { 199 200 /** 201 * Mean image stored as a flattened array of floats, 202 * representing shape [Channel,Height,Width]. 203 */ 204 repeated float meanImage = 1; 205 206} 207 208/// Preprocessing parameters for image inputs. 209message NeuralNetworkPreprocessing { 210 211 string featureName = 1; /// must be equal to the input name to which the preprocessing is applied 212 oneof preprocessor { 213 NeuralNetworkImageScaler scaler = 10; 214 NeuralNetworkMeanImage meanImage = 11; 215 } 216 217} 218 219/// Activation Functions 220/// -------------------- 221 222/** 223 * A rectified linear unit (ReLU) activation function. 224 * 225 * This function has the following formula: 226 * 227 * .. math:: 228 * f(x) = \text{max}(0, x) 229 */ 230message ActivationReLU { 231 232} 233 234/** 235 * A leaky rectified linear unit (ReLU) activation function. 236 * 237 * This function has the following formula: 238 * 239 * .. math:: 240 * f(x) = \begin{cases} 241 * x & \text{if } x \geq 0 \\ 242 * \alpha x & \text{if } x < 0 243 * \end{cases} 244 */ 245message ActivationLeakyReLU { 246 247 float alpha = 1; //negative slope value for leakyReLU 248 249} 250 251/** 252 * A hyperbolic tangent activation function. 253 * 254 * This function has the following formula: 255 * 256 * .. math:: 257 * f(x) = \dfrac{1 - e^{-2x}}{1 + e^{-2x}} 258 */ 259message ActivationTanh { 260 261} 262 263/** 264 * A scaled hyperbolic tangent activation function. 265 * 266 * This function has the following formula: 267 * 268 * .. math:: 269 * f(x) = \alpha \tanh(\beta x) 270 */ 271message ActivationScaledTanh { 272 273 float alpha = 1; 274 float beta = 2; 275 276} 277 278/** 279 * A sigmoid activation function. 280 * 281 * This function has the following formula: 282 * 283 * .. math:: 284 * f(x) = \dfrac{1}{1 + e^{-x}} 285 */ 286message ActivationSigmoid { 287 288} 289 290/** 291 * A linear activation function. 292 * 293 * This function has the following formula: 294 * 295 * .. math:: 296 * f(x) = \alpha x + \beta 297 */ 298message ActivationLinear { 299 300 float alpha = 1; 301 float beta = 2; 302 303} 304 305/** 306 * A hard sigmoid activation function. 307 * 308 * This function has the following formula: 309 * 310 * .. math:: 311 * f(x) = \text{min}(\text{max}(\alpha x + \beta, 0), 1) 312 */ 313message ActivationSigmoidHard { 314 315 float alpha = 1; 316 float beta = 2; 317 318} 319 320/** 321 * A parameterized rectified linear unit (PReLU) activation function. 322 * Input must be at least rank 3. Axis = -3 is denoted by "C", or channels. 323 * "alpha" parameter can be a vector of length C. 324 * 325 * This function has the following formula: 326 * 327 * .. math:: 328 * f(x_i) = \begin{cases} 329 * x_i & \text{if } x_i \geq 0 \\ 330 * \alpha_i x_i & \text{if } x_i < 0 331 * \end{cases} \;,\;i=1,...,C 332 */ 333message ActivationPReLU { 334 335 // parameter of length C or 1. 336 // If length is 1, same value is used for all channels 337 WeightParams alpha = 1; 338 339} 340 341/** 342 * An exponential linear unit (ELU) activation function. 343 * 344 * This function has the following formula: 345 * 346 * .. math:: 347 * f(x) = \begin{cases} 348 * x & \text{if } x \geq 0 \\ 349 * \alpha (e^x - 1) & \text{if } x < 0 350 * \end{cases} 351 */ 352message ActivationELU { 353 354 float alpha = 1; 355 356} 357 358/** 359 * A thresholded rectified linear unit (ReLU) activation function. 360 * 361 * This function has the following formula: 362 * 363 * .. math:: 364 * f(x) = \begin{cases} 365 * x & \text{if } x \geq \alpha \\ 366 * 0 & \text{if } x < \alpha 367 * \end{cases} 368 */ 369message ActivationThresholdedReLU { 370 371 float alpha = 1; 372 373} 374 375/** 376 * A softsign activation function. 377 * 378 * This function has the following formula: 379 * 380 * .. math:: 381 * f(x) = \dfrac{x}{1 + |x|} 382 */ 383message ActivationSoftsign { 384 385} 386 387/** 388 * A softplus activation function. 389 * 390 * This function has the following formula: 391 * 392 * .. math:: 393 * f(x) = \text{log}(1 + e^x) 394 */ 395message ActivationSoftplus { 396 397} 398 399/** 400 * A parametric softplus activation function. 401 * Input must be at least rank 3. axis = -3 is denoted by "C", or channels. 402 * "alpha"/"beta" parameter can be a vector of length C. 403 * 404 * This function has the following formula: 405 * 406 * .. math:: 407 * f(x_i) = \alpha_i \text{log}(1 + e^{\beta_i x_i}) \;,\;i=1,...,C 408 */ 409message ActivationParametricSoftplus { 410 411 // If length is 1, same value is used for all channels 412 WeightParams alpha = 1; //parameter of length C or 1 413 WeightParams beta = 2; //parameter of length C or 1 414 415} 416 417message ActivationParams { 418 419 oneof NonlinearityType { 420 ActivationLinear linear = 5; 421 422 ActivationReLU ReLU = 10; 423 ActivationLeakyReLU leakyReLU = 15; 424 ActivationThresholdedReLU thresholdedReLU = 20; 425 ActivationPReLU PReLU = 25; 426 427 ActivationTanh tanh = 30; 428 ActivationScaledTanh scaledTanh = 31; 429 430 ActivationSigmoid sigmoid = 40; 431 ActivationSigmoidHard sigmoidHard = 41; 432 433 ActivationELU ELU = 50; 434 435 ActivationSoftsign softsign = 60; 436 ActivationSoftplus softplus = 70; 437 ActivationParametricSoftplus parametricSoftplus = 71; 438 } 439 440} 441 442/** 443 * Representation of the intermediate tensors 444 */ 445message Tensor { 446 447 // Number of dimensions in the tensor shape 448 uint32 rank = 1; 449 // actual value of the tensor shape. 450 // must be of length "rank". Can contain -1s for unknown dimensions. 451 repeated int64 dimValue = 2; 452 453} 454 455/** 456 * A single neural network layer. 457 */ 458message NeuralNetworkLayer { 459 460 string name = 1; //descriptive name of the layer 461 repeated string input = 2; 462 repeated string output = 3; 463 464 repeated Tensor inputTensor = 4; // must be the same length as the "input" field 465 repeated Tensor outputTensor = 5; // must be the same length as the "output" field 466 467 // Must be set to true to mark the layer as updatable. 468 // If true, the weightParams in the layer's properties must also be set to updatable 469 // If false, the value of the isUpdatable parameter within the layer's weights are ignored 470 bool isUpdatable = 10; 471 472 oneof layer { 473 474 // Start at 100 here 475 ConvolutionLayerParams convolution = 100; 476 477 PoolingLayerParams pooling = 120; 478 479 ActivationParams activation = 130; 480 481 InnerProductLayerParams innerProduct = 140; 482 EmbeddingLayerParams embedding = 150; 483 484 // Normalization-related Layers 485 BatchnormLayerParams batchnorm = 160; 486 MeanVarianceNormalizeLayerParams mvn = 165; 487 L2NormalizeLayerParams l2normalize = 170; 488 SoftmaxLayerParams softmax = 175; 489 LRNLayerParams lrn = 180; 490 491 CropLayerParams crop = 190; 492 PaddingLayerParams padding = 200; 493 UpsampleLayerParams upsample = 210; 494 495 ResizeBilinearLayerParams resizeBilinear = 211; 496 CropResizeLayerParams cropResize = 212; 497 498 UnaryFunctionLayerParams unary = 220; 499 500 // Element-wise Operations 501 AddLayerParams add = 230; 502 MultiplyLayerParams multiply = 231; 503 504 AverageLayerParams average = 240; 505 ScaleLayerParams scale = 245; 506 507 BiasLayerParams bias = 250; 508 MaxLayerParams max = 260; 509 MinLayerParams min = 261; 510 511 DotProductLayerParams dot = 270; 512 ReduceLayerParams reduce = 280; 513 LoadConstantLayerParams loadConstant = 290; 514 515 // Data Reorganization 516 ReshapeLayerParams reshape = 300; 517 FlattenLayerParams flatten = 301; 518 PermuteLayerParams permute = 310; 519 ConcatLayerParams concat = 320; 520 SplitLayerParams split = 330; 521 SequenceRepeatLayerParams sequenceRepeat = 340; 522 523 ReorganizeDataLayerParams reorganizeData = 345; 524 SliceLayerParams slice = 350; 525 526 // Recurrent Layers 527 SimpleRecurrentLayerParams simpleRecurrent = 400; 528 GRULayerParams gru = 410; 529 UniDirectionalLSTMLayerParams uniDirectionalLSTM = 420; 530 BiDirectionalLSTMLayerParams biDirectionalLSTM = 430; 531 532 // Custom (user-implemented) Layer 533 CustomLayerParams custom = 500; 534 535 // Following layers are available only after Core ML Specification 536 // version >= 4 (iOS >= 13, macOS >= 10.15) 537 538 // Control Flow related Layers 539 CopyLayerParams copy = 600; 540 BranchLayerParams branch = 605; 541 542 LoopLayerParams loop = 615; 543 LoopBreakLayerParams loopBreak = 620; 544 LoopContinueLayerParams loopContinue = 625; 545 546 RangeStaticLayerParams rangeStatic = 635; 547 RangeDynamicLayerParams rangeDynamic = 640; 548 549 // Element-wise Unary Layers 550 ClipLayerParams clip = 660; 551 CeilLayerParams ceil = 665; 552 FloorLayerParams floor = 670; 553 554 SignLayerParams sign = 680; 555 RoundLayerParams round = 685; 556 557 Exp2LayerParams exp2 = 700; 558 559 SinLayerParams sin = 710; 560 CosLayerParams cos = 715; 561 TanLayerParams tan = 720; 562 563 AsinLayerParams asin = 730; 564 AcosLayerParams acos = 735; 565 AtanLayerParams atan = 740; 566 567 SinhLayerParams sinh = 750; 568 CoshLayerParams cosh = 755; 569 TanhLayerParams tanh = 760; 570 571 AsinhLayerParams asinh = 770; 572 AcoshLayerParams acosh = 775; 573 AtanhLayerParams atanh = 780; 574 575 ErfLayerParams erf = 790; 576 GeluLayerParams gelu = 795; 577 578 // Element-wise Binary with Broadcasting Support 579 EqualLayerParams equal = 815; 580 NotEqualLayerParams notEqual = 820; 581 LessThanLayerParams lessThan = 825; 582 LessEqualLayerParams lessEqual = 827; 583 GreaterThanLayerParams greaterThan = 830; 584 GreaterEqualLayerParams greaterEqual = 832; 585 586 LogicalOrLayerParams logicalOr = 840; 587 LogicalXorLayerParams logicalXor = 845; 588 LogicalNotLayerParams logicalNot = 850; 589 LogicalAndLayerParams logicalAnd = 855; 590 591 ModBroadcastableLayerParams modBroadcastable = 865; 592 MinBroadcastableLayerParams minBroadcastable = 870; 593 MaxBroadcastableLayerParams maxBroadcastable = 875; 594 AddBroadcastableLayerParams addBroadcastable = 880; 595 PowBroadcastableLayerParams powBroadcastable = 885; 596 DivideBroadcastableLayerParams divideBroadcastable = 890; 597 FloorDivBroadcastableLayerParams floorDivBroadcastable = 895; 598 MultiplyBroadcastableLayerParams multiplyBroadcastable = 900; 599 SubtractBroadcastableLayerParams subtractBroadcastable = 905; 600 601 // Tensor Manipulations 602 TileLayerParams tile = 920; 603 StackLayerParams stack = 925; 604 GatherLayerParams gather = 930; 605 ScatterLayerParams scatter = 935; 606 GatherNDLayerParams gatherND = 940; 607 ScatterNDLayerParams scatterND = 945; 608 SoftmaxNDLayerParams softmaxND = 950; 609 GatherAlongAxisLayerParams gatherAlongAxis = 952; 610 ScatterAlongAxisLayerParams scatterAlongAxis = 954; 611 612 ReverseLayerParams reverse = 960; 613 ReverseSeqLayerParams reverseSeq = 965; 614 615 SplitNDLayerParams splitND = 975; 616 ConcatNDLayerParams concatND = 980; 617 TransposeLayerParams transpose = 985; 618 619 SliceStaticLayerParams sliceStatic = 995; 620 SliceDynamicLayerParams sliceDynamic = 1000; 621 SlidingWindowsLayerParams slidingWindows = 1005; 622 623 TopKLayerParams topK = 1015; 624 ArgMinLayerParams argMin = 1020; 625 ArgMaxLayerParams argMax = 1025; 626 627 EmbeddingNDLayerParams embeddingND = 1040; 628 BatchedMatMulLayerParams batchedMatmul = 1045; 629 630 // Tensor Allocation / Reshape-related Operations 631 GetShapeLayerParams getShape = 1065; 632 LoadConstantNDLayerParams loadConstantND = 1070; 633 634 FillLikeLayerParams fillLike = 1080; 635 FillStaticLayerParams fillStatic = 1085; 636 FillDynamicLayerParams fillDynamic = 1090; 637 638 BroadcastToLikeLayerParams broadcastToLike = 1100; 639 BroadcastToStaticLayerParams broadcastToStatic = 1105; 640 BroadcastToDynamicLayerParams broadcastToDynamic = 1110; 641 642 SqueezeLayerParams squeeze = 1120; 643 ExpandDimsLayerParams expandDims = 1125; 644 FlattenTo2DLayerParams flattenTo2D = 1130; 645 ReshapeLikeLayerParams reshapeLike = 1135; 646 ReshapeStaticLayerParams reshapeStatic = 1140; 647 ReshapeDynamicLayerParams reshapeDynamic = 1145; 648 RankPreservingReshapeLayerParams rankPreservingReshape = 1150; 649 650 ConstantPaddingLayerParams constantPad = 1155; 651 652 // Random Distributions 653 RandomNormalLikeLayerParams randomNormalLike = 1170; 654 RandomNormalStaticLayerParams randomNormalStatic = 1175; 655 RandomNormalDynamicLayerParams randomNormalDynamic = 1180; 656 657 RandomUniformLikeLayerParams randomUniformLike = 1190; 658 RandomUniformStaticLayerParams randomUniformStatic = 1195; 659 RandomUniformDynamicLayerParams randomUniformDynamic = 1200; 660 661 RandomBernoulliLikeLayerParams randomBernoulliLike = 1210; 662 RandomBernoulliStaticLayerParams randomBernoulliStatic = 1215; 663 RandomBernoulliDynamicLayerParams randomBernoulliDynamic = 1220; 664 665 CategoricalDistributionLayerParams categoricalDistribution = 1230; 666 667 // Reduction-related Layers: 668 ReduceL1LayerParams reduceL1 = 1250; 669 ReduceL2LayerParams reduceL2 = 1255; 670 ReduceMaxLayerParams reduceMax = 1260; 671 ReduceMinLayerParams reduceMin = 1265; 672 ReduceSumLayerParams reduceSum = 1270; 673 ReduceProdLayerParams reduceProd = 1275; 674 ReduceMeanLayerParams reduceMean = 1280; 675 ReduceLogSumLayerParams reduceLogSum = 1285; 676 ReduceSumSquareLayerParams reduceSumSquare = 1290; 677 ReduceLogSumExpLayerParams reduceLogSumExp = 1295; 678 679 // Masking / Selection Layers 680 WhereNonZeroLayerParams whereNonZero = 1313; 681 MatrixBandPartLayerParams matrixBandPart = 1315; 682 LowerTriangularLayerParams lowerTriangular = 1320; 683 UpperTriangularLayerParams upperTriangular = 1325; 684 WhereBroadcastableLayerParams whereBroadcastable = 1330; 685 686 // Normalization Layers 687 LayerNormalizationLayerParams layerNormalization = 1350; 688 689 NonMaximumSuppressionLayerParams NonMaximumSuppression = 1400; 690 691 // Following layers are available only after Core ML Specification 692 // version >= 5 (iOS >= 14, macOS >= 11.0) 693 OneHotLayerParams oneHot = 1450; 694 CumSumLayerParams cumSum = 1455; 695 ClampedReLULayerParams clampedReLU = 1460; 696 ArgSortLayerParams argSort = 1461; 697 Pooling3DLayerParams pooling3d = 1465; 698 GlobalPooling3DLayerParams globalPooling3d = 1466; 699 SliceBySizeLayerParams sliceBySize = 1470; 700 Convolution3DLayerParams convolution3d = 1471; 701 702 } 703 704} 705 706/** 707 * Branching Layer 708 * 709 * A layer that provides the functionality of branching or an If-Else block. 710 * 711 * Must have 1 input. There are no outputs as the execution is transferred to either the 712 * if or the else branch based on the value of the input. 713 * 714 * Input is the condition predicate. Must be a scalar (length 1 tensor). 715 * 716 */ 717message BranchLayerParams { 718 719 /** 720 * execute this graph if the absolute value of the input Tensor is greater than 1e-6 721 * This must be present. 722 */ 723 NeuralNetwork ifBranch = 1; 724 /** 725 * execute this graph if the absolute value of the input Tensor is less than 1e-6 726 * This is optional. 727 */ 728 NeuralNetwork elseBranch = 2; 729 730} 731 732/** 733 * Loop Layer 734 * 735 * A layer that provides the functionality of a "for" loop or a "while" loop. 736 * 737 * There are either no inputs or 1 input. When an input is present, it corresponds to the maximum loop count, 738 * in that case the value of the "maxLoopIterations" field is ignored. Input must be a scalar. 739 * (For description below, maxLoopIterations is assumed to be the value of the input, when its present) 740 * 741 * No outputs are produced. Blobs produced by the condition or the body network are visible in the scope of the overall network. 742 * 743 * "conditionNetwork" must produce a tensor with the name specified in the "conditionVar" field. 744 * 745 * There are 3 possible cases for determining the termination condition: 746 * 747 * Case 1: 748 * 749 * If there is no "conditionNetwork", in this case the layer corresponds to a pure for loop, which is run "maxLoopIterations" number of times. 750 * Equivalent pseudo-code: 751 * 752 * for loopIterator = 0 : maxLoopIterations 753 * bodyNetwork() 754 * 755 * 756 * Case 2: 757 * 758 * "conditionNetwork" is present, and "maxLoopIterations" is 0 and there is no input, 759 * in this case the layer corresponds to a while loop. Equivalent pseudo-code: 760 * 761 * conditionVar = conditionNetwork() 762 * while conditionVar: 763 * bodyNetwork() 764 * conditionVar = conditionNetwork() 765 * 766 * 767 * Case 3: 768 * 769 * "conditionNetwork" is provided, and "maxLoopIterations" is positive or there is an input, 770 * in this case the layer corresponds to a while loop with a joint condition. Equivalent pseudo-code: 771 * 772 * loopIterator = 0 773 * conditionVar = conditionNetwork() 774 * while (conditionVar and loopIterator < maxLoopIterations): 775 * bodyNetwork() 776 * loopIterator = loopIterator + 1 777 * conditionVar = conditionNetwork() 778 * 779 */ 780message LoopLayerParams { 781 782 /** 783 * maximum number of iterations. Ignored if input is present. 784 */ 785 uint64 maxLoopIterations = 1; 786 /** 787 * This field provides the name of the tensor which is produced by the conditionNetwork 788 * and whose value is checked to start/continue/terminate the loop. Value close to 0.0f is treated as False. 789 * This field is optional. 790 * Must be a non empty string if and only if "conditionNetwork" is present. 791 */ 792 string conditionVar = 2; 793 /** 794 * Must generate a tensor with the name provided in the "conditionVar" field. 795 * This field is optional. 796 * Must be present if and only if "conditionVar" field is a non empty string. 797 */ 798 NeuralNetwork conditionNetwork = 3; 799 /** 800 * Body of the loop. 801 * This field must be present. 802 */ 803 NeuralNetwork bodyNetwork = 4; 804 805} 806 807/** 808 * Loop break Layer 809 * 810 * Terminate the loop that has this layer. 811 * If present, it should always reside in the "bodyNetwork" of the loop layer 812 * 813 * No inputs/outputs 814 * 815 */ 816message LoopBreakLayerParams { 817 818} 819 820/** 821 * Loop Continue Layer 822 * 823 * Stop the current loop iteration and continue on the next iteration. 824 * If present, it should always reside in the "bodyNetwork" of the loop layer 825 * 826 * No inputs/outputs 827 * 828 */ 829message LoopContinueLayerParams { 830 831} 832 833/** 834 * Copy Layer 835 * 836 * A layer that copies its input tensor to the output tensor. 837 * Must have 1 input and 1 output, with distinct names. 838 * This is the only layer that is allowed to re-generate an output that is already present in the neural network prior to this layer, 839 * in which case it will overwrite the output tensor. 840 * 841 */ 842message CopyLayerParams { 843 844} 845 846/** 847 * GreaterThan Layer 848 * 849 * Either 1 or 2 inputs. 850 * Produces 1 output. 851 * Perform elementwise greater than operation. 852 * 853 * Output is 1.0f if the condition is true otherwise 0.0f. 854 * 855 * .. code:: 856 * 857 * y = x1 > x2 858 * or 859 * y = x1 > alpha, if only one input is provided 860 * 861 * Broadcasting is supported. 862 * 863 */ 864message GreaterThanLayerParams { 865 866 /** 867 * Compare to the scalar value provided here if there is 1 input 868 */ 869 float alpha = 2; 870 871} 872 873/** 874 * GreaterEqual Layer 875 * 876 * Either 1 or 2 inputs. 877 * Produces 1 output. 878 * Perform elementwise greater equal operation. 879 * 880 * Output is 1.0f if the condition is true otherwise 0.0f. 881 * 882 * .. code:: 883 * 884 * y = x1 >= x2 885 * or 886 * y = x1 >= alpha, if only one input is provided 887 * 888 * Broadcasting is supported. 889 * 890 */ 891message GreaterEqualLayerParams { 892 893 /** 894 * Compare to the scalar value provided here if there is 1 input 895 */ 896 float alpha = 2; 897 898} 899 900/** 901 * LessThan Layer 902 * 903 * Either 1 or 2 inputs. 904 * Produces 1 output. 905 * Perform elementwise less than operation. 906 * 907 * Output is 1.0f if the condition is true otherwise 0.0f. 908 * 909 * .. code:: 910 * 911 * y = x1 < x2 912 * or 913 * y = x1 < alpha, if only one input is provided 914 * 915 * Broadcasting is supported. 916 * 917 */ 918message LessThanLayerParams { 919 920 /** 921 * Compare to the scalar value provided here if there is 1 input 922 */ 923 float alpha = 2; 924 925} 926 927/** 928 * LessEqual Layer 929 * 930 * Either 1 or 2 inputs. 931 * Produces 1 output. 932 * Perform elementwise less equal operation. 933 * 934 * Output is 1.0f if the condition is true otherwise 0.0f. 935 * 936 * .. code:: 937 * 938 * y = x1 <= x2 939 * or 940 * y = x1 <= alpha, if only one input is provided 941 * 942 * Broadcasting is supported. 943 * 944 */ 945message LessEqualLayerParams { 946 947 /** 948 * Compare to the scalar value provided here if there is 1 input 949 */ 950 float alpha = 2; 951 952} 953 954/** 955 * Equal Layer 956 * 957 * Either 1 or 2 inputs. 958 * Produces 1 output. 959 * Perform elementwise equal operation. 960 * 961 * Output is 1.0f if the condition is true otherwise 0.0f. 962 * 963 * .. code:: 964 * 965 * y = x1 == x2 966 * or 967 * y = x1 == alpha, if only one input is provided 968 * 969 * Broadcasting is supported. 970 * 971 */ 972message EqualLayerParams { 973 974 /** 975 * Compare to the scalar value provided here if there is 1 input 976 */ 977 float alpha = 1; 978 979} 980 981/** 982 * NotEqual Layer 983 * 984 * Either 1 or 2 inputs. 985 * Produces 1 output. 986 * Perform elementwise not equal operation. 987 * 988 * Output is 1.0f if the condition is true otherwise 0.0f. 989 * 990 * .. code:: 991 * 992 * y = x1 != x2 993 * or 994 * y = x1 != alpha, if only one input is provided 995 * 996 * Broadcasting is supported. 997 * 998 */ 999message NotEqualLayerParams { 1000 1001 /** 1002 * Compare to the scalar value provided here if there is 1 input 1003 */ 1004 float alpha = 1; 1005 1006} 1007 1008/** 1009 * LogicalAnd Layer 1010 * 1011 * Must have 2 inputs, produces 1 output. 1012 * Perform elementwise logical AND operation. 1013 * 1014 * Input is considered False if equal to 0.0f otherwise True. 1015 * Output is 1.0f if the condition is true otherwise 0.0f. 1016 * 1017 * .. code:: 1018 * 1019 * y = AND(x1, x2) 1020 * 1021 * Broadcasting is supported. 1022 * 1023 */ 1024message LogicalAndLayerParams { 1025 1026} 1027 1028/** 1029 * LogicalOr Layer 1030 * 1031 * Must have 2 inputs, produces 1 output. 1032 * Perform elementwise logical OR operation. 1033 * 1034 * Input is considered False if equal to 0.0f otherwise True. 1035 * Output is 1.0f if the condition is true otherwise 0.0f. 1036 * 1037 * .. code:: 1038 * 1039 * y = OR(x1, x2) 1040 * 1041 * Broadcasting is supported. 1042 * 1043 */ 1044message LogicalOrLayerParams { 1045 1046} 1047 1048/** 1049 * LogicalXor Layer 1050 * 1051 * Must have 2 inputs, produces 1 output. 1052 * Perform elementwise logical XOR operation. 1053 * 1054 * Input is considered False if equal to 0.0f otherwise True. 1055 * Output is 1.0f if the condition is true otherwise 0.0f. 1056 * 1057 * .. code:: 1058 * 1059 * y = XOR(x1, x2) 1060 * 1061 * Broadcasting is supported. 1062 * 1063 */ 1064message LogicalXorLayerParams { 1065 1066} 1067 1068/** 1069 * LogicalNot Layer 1070 * 1071 * Must have 1 input, produces 1 output. 1072 * Perform elementwise logical NOT operation. 1073 * 1074 * Input is considered False if equal to 0.0f otherwise True. 1075 * Output is 1.0f if the condition is true otherwise 0.0f. 1076 * 1077 * .. code:: 1078 * 1079 * y = NOT(x) 1080 * 1081 * 1082 */ 1083message LogicalNotLayerParams { 1084 1085} 1086 1087/// Border Amounts 1088/// -------------- 1089 1090/** 1091 * Specifies the amount of spatial border to be either padded or cropped. 1092 * 1093 * For padding: 1094 * 1095 * .. code:: 1096 * 1097 * H_out = borderAmounts[0].startEdgeSize + H_in + borderAmounts[0].endEdgeSize 1098 * W_out = borderAmounts[1].startEdgeSize + W_in + borderAmounts[1].endEdgeSize 1099 * 1100 * topPaddingAmount == Height startEdgeSize 1101 * bottomPaddingAmount == Height endEdgeSize 1102 * leftPaddingAmount == Width startEdgeSize 1103 * rightPaddingAmount == Width endEdgeSize 1104 * 1105 * For cropping: 1106 * 1107 * .. code:: 1108 * 1109 * H_out = (-borderAmounts[0].startEdgeSize) + H_in + (-borderAmounts[0].endEdgeSize) 1110 * W_out = (-borderAmounts[1].startEdgeSize) + W_in + (-borderAmounts[1].endEdgeSize) 1111 * 1112 * topCropAmount == Height startEdgeSize 1113 * bottomCropAmount == Height endEdgeSize 1114 * leftCropAmount == Width startEdgeSize 1115 * rightCropAmount == Width endEdgeSize 1116 */ 1117message BorderAmounts { 1118 1119 message EdgeSizes { 1120 /** 1121 * The amount to be padded or cropped from the beginning. 1122 */ 1123 uint64 startEdgeSize = 1; 1124 1125 /** 1126 * The amount to be padded or cropped from the end. 1127 */ 1128 uint64 endEdgeSize = 2; 1129 } 1130 1131 /** 1132 * The border amounts. 1133 * This must be length 2 in the order ``[H, W]``. 1134 */ 1135 repeated EdgeSizes borderAmounts = 10; 1136 1137} 1138 1139/** 1140 * Specifies the type of padding to be used with Convolution/Deconvolution and Pooling layers. 1141 * After padding, input spatial shape: ``[H_in, W_in]``, gets modified to the 1142 * output spatial shape ``[H_out, W_out]``. 1143 * 1144 * .. code:: 1145 * 1146 * topPaddingAmount == Height startEdgeSize == borderAmounts[0].startEdgeSize 1147 * bottomPaddingAmount == Height endEdgeSize == borderAmounts[0].endEdgeSize 1148 * leftPaddingAmount == Width startEdgeSize == borderAmounts[1].startEdgeSize 1149 * rightPaddingAmount == Width endEdgeSize == borderAmounts[1].endEdgeSize 1150 * 1151 * With Convolution or Pooling: 1152 * 1153 * .. code:: 1154 * 1155 * H_out = int_division_round_down((H_in + topPaddingAmount + bottomPaddingAmount - KernelSize[0]),stride[0]) + 1 1156 * 1157 * which is same as: 1158 * 1159 * .. code:: 1160 * 1161 * H_out = int_division_round_up((H_in + topPaddingAmount + bottomPaddingAmount - KernelSize[0] + 1),stride[0]) 1162 * 1163 * With Deconvolution: 1164 * 1165 * .. code:: 1166 * 1167 * H_out = (H_in-1) * stride[0] + kernelSize[0] - (topPaddingAmount + bottomPaddingAmount) 1168 * 1169 * 1170 * The equivalent expressions hold true for ``W_out`` as well. 1171 * 1172 * 1173 * By default, the values of ``paddingAmounts`` are set to ``0``, 1174 * which results in a "true" valid padding. 1175 * If non-zero values are provided for ``paddingAmounts``, 1176 * "valid" convolution/pooling is performed within the spatially expanded input. 1177 * 1178 */ 1179message ValidPadding { 1180 1181 BorderAmounts paddingAmounts = 1; 1182 1183} 1184 1185/** 1186 * Specifies the type of padding to be used with Convolution/Deconvolution and pooling layers. 1187 * After padding, input spatial shape: ``[H_in, W_in]``, gets modified to the 1188 * output spatial shape ``[H_out, W_out]``. 1189 * With Convolution or pooling: 1190 * 1191 * .. code:: 1192 * 1193 * H_out = int_division_round_up(H_in,stride[0]) 1194 * W_out = int_division_round_up(W_in,stride[1]) 1195 * 1196 * This is achieved by using the following padding amounts: 1197 * 1198 * .. code:: 1199 * 1200 * totalPaddingHeight = max(0,(H_out-1) * stride[0] + KernelSize[0] - Hin) 1201 * totalPaddingWidth = max(0,(W_out-1) * stride[1] + KernelSize[1] - Win) 1202 * 1203 * There are two modes of asymmetry: 1204 * ``BOTTOM_RIGHT_HEAVY``, and ``TOP_LEFT_HEAVY``. 1205 * 1206 * If the mode is ``BOTTOM_RIGHT_HEAVY``: 1207 * 1208 * .. code:: 1209 * 1210 * topPaddingAmount = floor(totalPaddingHeight / 2) 1211 * bottomPaddingAmount = totalPaddingHeight - topPaddingAmount 1212 * leftPaddingAmount = floor(totalPaddingWidth / 2) 1213 * rightPaddingAmount = totalPaddingWidth - leftPaddingAmount 1214 * 1215 * If the mode is ``TOP_LEFT_HEAVY``: 1216 * 1217 * .. code:: 1218 * 1219 * bottomPaddingAmount = floor(totalPaddingHeight / 2) 1220 * topPaddingAmount = totalPaddingHeight - bottomPaddingAmount 1221 * rightPaddingAmount = floor(totalPaddingWidth / 2) 1222 * leftPaddingAmount = totalPaddingWidth - rightPaddingAmount 1223 * 1224 * 1225 * With Deconvolution: 1226 * 1227 * .. code:: 1228 * 1229 * H_out = H_in * stride[0] 1230 * W_out = W_in * stride[1] 1231 */ 1232message SamePadding { 1233 1234 enum SamePaddingMode { 1235 1236 BOTTOM_RIGHT_HEAVY = 0; 1237 TOP_LEFT_HEAVY = 1; 1238 1239 } 1240 SamePaddingMode asymmetryMode = 1; 1241 1242} 1243 1244/** 1245 * Specifies how grid points are sampled from an interval. 1246 * Without the loss of generality, assume the interval to be [0, X-1] from which N points are to be sampled. 1247 * Here X may correspond to an input image's height or width. 1248 * All the methods can be expressed in terms of numpy's linspace function, along with the constraint that grid points have to lie in the interval [0, X-1]. 1249 * Note: numpy.linspace(start = start, end = end, num = N, endpoint = True) corresponds to sampling 1250 * N points uniformly from the interval [start, end], endpoints included. 1251 * The methods vary in how the ``start`` and ``end`` values are computed. 1252 */ 1253message SamplingMode { 1254 1255 enum Method { 1256 1257 /** 1258 * start = 0, end = X-1 1259 * grid points = numpy.linspace(start, end) 1260 */ 1261 STRICT_ALIGN_ENDPOINTS_MODE = 0; 1262 1263 /** 1264 * if N == 1: start = end = (X-1)/2 1265 * otherwise, start = 0, end = X-1 1266 * grid points = numpy.linspace(start, end) 1267 */ 1268 ALIGN_ENDPOINTS_MODE = 1; 1269 1270 /** 1271 * start = 0, end = X - X/N 1272 * grid points = min(X-1, numpy.linspace(start, end)) 1273 * This is same as the mode used in the upsample layer in this specification, when used with bilinear interpolation. In that case N/X = upsample ratio. 1274 */ 1275 UPSAMPLE_MODE = 2; 1276 1277 /** 1278 * spacing = max(1, X-1)/N 1279 * start = 0.5 * spacing 1280 * end = start + (N-1) * spacing 1281 * grid points = min(X-1, numpy.linspace(start, end)) 1282 */ 1283 ROI_ALIGN_MODE = 3; 1284 1285 } 1286 1287 Method samplingMethod = 1; 1288 1289} 1290 1291/** 1292 * Specifies the convention used to specify four bounding box coordinates for an image of size (Height, Width). 1293 * The (0,0) coordinate corresponds to the top-left corner of the image. 1294 */ 1295message BoxCoordinatesMode { 1296 1297 enum Coordinates { 1298 1299 /** 1300 * [h_start, w_start, h_end, w_end] 1301 */ 1302 CORNERS_HEIGHT_FIRST = 0; 1303 1304 /** 1305 * [w_start, h_start, w_end, h_end] 1306 */ 1307 CORNERS_WIDTH_FIRST = 1; 1308 1309 /** 1310 * [h_center, w_center, box_height, box_width] 1311 */ 1312 CENTER_SIZE_HEIGHT_FIRST = 2; 1313 1314 /** 1315 * [w_center, h_center, box_width, box_height] 1316 */ 1317 CENTER_SIZE_WIDTH_FIRST = 3; 1318 1319 } 1320 1321 Coordinates boxMode = 1; 1322 1323} 1324 1325/** 1326 * Weights for layer parameters. 1327 * Weights are stored as repeated floating point numbers 1328 * using row-major ordering 1329 * and can represent 1-, 2-, 3-, or 4-dimensional data. 1330 */ 1331message WeightParams { 1332 1333 /** 1334 * Values specified in single / float / FP32 precision. 1335 */ 1336 repeated float floatValue = 1; 1337 1338 /** 1339 * Values in 16-bit half precision floating point. 1340 */ 1341 bytes float16Value = 2; 1342 1343 /** 1344 * Raw value specification for quantized lower precisions. 1345 * 1346 * This field is interpreted as uintN, where N is the number of bits in quantization. 1347 * E.g. if n=8, the field is interpreted as an array of UINT8. 1348 * Use this field for quantized parameters unless specifically noted to use 1349 * int8RawValue. 1350 */ 1351 bytes rawValue = 30; 1352 1353 /** 1354 * Field to be used if int8DynamicQuantize is set in the parent layer. 1355 * Cannot be set if rawValue is also set. 1356 * The values in this field are interpreted as INT8. 1357 * 1358 * If this field is set, following conditions must hold true: 1359 * * QuantizationType == LinearQuantizationParams, such that 1360 * * size of the "scale" field is 1 and "bias" field is empty in "LinearQuantizationParams" 1361 */ 1362 bytes int8RawValue = 31; 1363 1364 /** 1365 * Quantization related parameters. 1366 */ 1367 QuantizationParams quantization = 40; 1368 1369 bool isUpdatable = 50; 1370 1371} 1372 1373/** 1374 * Quantization parameters. 1375 */ 1376message QuantizationParams { 1377 1378 uint64 numberOfBits = 1; 1379 oneof QuantizationType { 1380 LinearQuantizationParams linearQuantization = 101; 1381 LookUpTableQuantizationParams lookupTableQuantization = 102; 1382 } 1383 1384} 1385 1386message LinearQuantizationParams { 1387 1388 /** 1389 * Stores scale and bias values corresponding to the quantized weights. 1390 * Must be an array of 1 element, or an array of C elements, where C 1391 * is number of output channels. For recurrent layers it is equal to 1392 * the output vector size. 1393 * 1394 * Relationship between quantized weights, unquantized weights, scale and bias: 1395 * 1396 * W_unquantized = W_quantized * scale + bias 1397 * 1398 */ 1399 repeated float scale = 1; 1400 repeated float bias = 2; 1401 1402} 1403 1404message LookUpTableQuantizationParams { 1405 1406 /* Stores look-up table quantization values. Must be an array of 1407 (2^numberOfBits) Elements. 1408 */ 1409 repeated float floatValue = 1; 1410 1411} 1412 1413/// Layers 1414/// ------ 1415 1416/** 1417 * A layer that performs spatial convolution or deconvolution. 1418 * 1419 * .. code:: 1420 * 1421 * y = ConvolutionLayer(x) 1422 * 1423 * Requires 1 or 2 inputs and produces 1 output. 1424 * 1425 * Input 1426 * First Input: 1427 * A blob with rank greater than or equal to 4. 1428 * Rank 4 blob represents [Batch, channels, height, width]. 1429 * For ranks greater than 4, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch. 1430 * 1431 * From Core ML specification version 4 onwards (iOS >= 13, macOS >= 10.15). 1432 * convolution layer can have 2 inputs, in which case the second input is 1433 * the blob representing the weights. This is allowed when "isDeconvolution" = False. 1434 * The weight blob should have shape 1435 * ``[outputChannels, kernelChannels, kernelHeight, kernelWidth]``, 1436 * where kernelChannels == inputChannels / nGroups. 1437 * 1438 * Output 1439 * Rank is same as the input. e.g.: for rank 4 input, output shape is [B, C_out, H_out, W_out] 1440 * 1441 * 1442 * If ``dilationFactor`` is not 1, effective kernel size is 1443 * modified as follows: 1444 * 1445 * .. code:: 1446 * 1447 * KernelSize[0] <-- (kernelSize[0]-1) * dilationFactor[0] + 1 1448 * KernelSize[1] <-- (kernelSize[1]-1) * dilationFactor[1] + 1 1449 * 1450 * Type of padding can be ``valid`` or ``same``. Output spatial dimensions depend on the 1451 * the type of padding. For details, refer to the descriptions of the messages "ValidPadding" 1452 * and "SamePadding". Padded values are all zeros. 1453 * 1454 * For Deconvolution, ``ConvolutionPaddingType`` (``valid`` or ``same``) is ignored when ``outputShape`` is set. 1455 * 1456 * 1457 */ 1458message ConvolutionLayerParams { 1459 1460 /** 1461 * The number of kernels. 1462 * Same as ``C_out`` used in the layer description. 1463 */ 1464 uint64 outputChannels = 1; 1465 1466 /** 1467 * Channel dimension of the kernels. 1468 * Must be equal to ``inputChannels / nGroups``, if isDeconvolution == False 1469 * Must be equal to ``inputChannels``, if isDeconvolution == True 1470 */ 1471 uint64 kernelChannels = 2; 1472 1473 /** 1474 * Group convolution, i.e. weight reuse along channel axis. 1475 * Input and kernels are divided into g groups 1476 * and convolution / deconvolution is applied within the groups independently. 1477 * If not set or 0, it is set to the default value 1. 1478 */ 1479 uint64 nGroups = 10; 1480 1481 /** 1482 * Must be length 2 in the order ``[H, W]``. 1483 * If not set, default value ``[3, 3]`` is used. 1484 */ 1485 repeated uint64 kernelSize = 20; 1486 1487 /** 1488 * Must be length 2 in the order ``[H, W]``. 1489 * If not set, default value ``[1, 1]`` is used. 1490 */ 1491 repeated uint64 stride = 30; 1492 1493 /** 1494 * Must be length 2 in order ``[H, W]``. 1495 * If not set, default value ``[1, 1]`` is used. 1496 * It is ignored if ``isDeconvolution == true``. 1497 */ 1498 repeated uint64 dilationFactor = 40; 1499 1500 /** 1501 * The type of padding. 1502 */ 1503 oneof ConvolutionPaddingType { 1504 ValidPadding valid = 50; 1505 SamePadding same = 51; 1506 } 1507 1508 /** 1509 * Flag to specify whether it is a deconvolution layer. 1510 */ 1511 bool isDeconvolution = 60; 1512 1513 /** 1514 * Flag to specify whether a bias is to be added or not. 1515 */ 1516 bool hasBias = 70; 1517 1518 /** 1519 * Weights associated with this layer. 1520 * If convolution (``isDeconvolution == false``), weights have the shape 1521 * ``[outputChannels, kernelChannels, kernelHeight, kernelWidth]``, where kernelChannels == inputChannels / nGroups 1522 * If deconvolution (``isDeconvolution == true``) weights have the shape 1523 * ``[kernelChannels, outputChannels / nGroups, kernelHeight, kernelWidth]``, where kernelChannels == inputChannels 1524 */ 1525 WeightParams weights = 90; 1526 WeightParams bias = 91; /// Must be of size [outputChannels]. 1527 1528 /** 1529 * The output shape, which has length 2 ``[H_out, W_out]``. 1530 * This is used only for deconvolution (``isDeconvolution == true``). 1531 * If not set, the deconvolution output shape is calculated 1532 * based on ``ConvolutionPaddingType``. 1533 */ 1534 repeated uint64 outputShape = 100; 1535 1536} 1537 1538/** 1539 * A layer that performs a 3-dimensional convolution. 1540 * 1541 * .. code:: 1542 * 1543 * y = Convolution3DLayer(x) 1544 * 1545 * Input 1546 * A blob of rank 5. 1547 * The input blob's shape should be ``[batch, channels, depth, height, width]``. 1548 * 1549 * Fields 1550 * The bias field, if set, should have shape of ``[channelsOut]``. 1551 * 1552 * Output 1553 * A blob of rank 5. 1554 * The output blob's shape is ``[batch, channelsOut, depthOut, heightOut, widthOut]``. 1555 * 1556 * Type of padding can be ``custom``, ``valid``, or ``same``. Padded values are all zeros. 1557 * Output spatial dimensions depend on the the type of padding. For details, refer to the 1558 * descriptions of the ``PaddingType`` field of this ``Convolution3DLayerParams`` message. 1559 * 1560 * Example 1561 * For example, given an input of size ``[1, 3, 3, 8, 8]``, a stride of 2 in each dimension, 1562 * a kernel of 3 in each dimension, 2 output channels, and ``same`` padding, this layer will 1563 * compute the total padding applied in the depth, height, and width dimensions to be 2, 1, and 1, 1564 * respectively. The depth padding is even and will be applied equally to both sides of the depth 1565 * dimension. Since the height and width padding values are odd, they'll be applied to the 1566 * bottom/right of the height/width dimensions. Thus, the padding applied to the input will be 1567 * ``[1, 1, 0, 1, 0, 1]`` (front, back, top, bottom, left, right). Finally, the output produced 1568 * will have size ``[1, 2, 2, 4, 4]``. 1569 * 1570 */ 1571message Convolution3DLayerParams { 1572 1573 /** 1574 * The number of channels in the output (channelsOut). Must be a positive integer. 1575 */ 1576 int32 outputChannels = 1; 1577 1578 /** 1579 * The number of channels in the input (channels). Must be a positive integer. 1580 */ 1581 int32 inputChannels = 2; 1582 1583 /** 1584 * Group convolution, i.e., weight reuse along the channel axis. 1585 * It must evenly divide both the number of input and output channels and be at most the number 1586 * of input channels (a depthwise convolution). 1587 * Input and kernels are divided into g groups and convolution is applied within the groups 1588 * independently. 1589 */ 1590 int32 nGroups = 10; 1591 1592 /* Depth of the convolution kernel. Must be a positive integer. 1593 */ 1594 int32 kernelDepth = 20; 1595 1596 /* Height of the convolution kernel. Must be a positive integer. 1597 */ 1598 int32 kernelHeight = 21; 1599 1600 /* Width of the convolution kernel. Must be a positive integer. 1601 */ 1602 int32 kernelWidth = 22; 1603 1604 /* Stride along the depth direction. Must be a positive integer. 1605 */ 1606 int32 strideDepth = 31; 1607 1608 /* Stride along the height direction. Must be a positive integer. 1609 */ 1610 int32 strideHeight = 32; 1611 1612 /* Stride along the width direction. Must be a positive integer. 1613 */ 1614 int32 strideWidth = 33; 1615 1616 /* Dilation along the depth direction. Must be a positive integer. 1617 */ 1618 int32 dilationDepth = 40; 1619 1620 /* Dilation along the height direction. Must be a positive integer. 1621 */ 1622 int32 dilationHeight = 41; 1623 1624 /* Dilation along the width direction. Must be a positive integer. 1625 */ 1626 int32 dilationWidth = 42; 1627 1628 /** 1629 * Flag to specify whether a bias is to be added or not. 1630 * If false, then no bias is added. 1631 */ 1632 bool hasBias = 50; 1633 1634 /** 1635 * Weights associated with this layer. 1636 * Weights have the shape 1637 * if deconvolution == False 1638 * ``[outputChannels, kernelChannels, kernelDepth, kernelHeight, kernelWidth]``, where 1639 * kernelChannels == inputChannels / nGroups 1640 * else if deconvolution == True 1641 * ``[outputChannels / nGroups, kernelChannels, kernelDepth, kernelHeight, kernelWidth]``, where 1642 */ 1643 WeightParams weights = 60; 1644 1645 /** 1646 * Must be of size ``[outputChannels]``. 1647 */ 1648 WeightParams bias = 61; 1649 1650 1651 /** 1652 * The type of padding. 1653 * All padding types pad the input shape with zeros. 1654 * CUSTOM padding will add the custom padding values specified below to their respective 1655 * dimensions, e.g., `customPaddingFront` number of zeros will be added to one side of the 1656 * input's depth dimension and `customPaddingBack` number of zeros will be added to the other 1657 * side of the input's depth dimension. 1658 * VALID padding adds no padding to any dimension. In this case, the last convolution along 1659 * each dimension will be dropped if the input dimension and the kernel size, stride, and 1660 * dilation do not match. 1661 * SAME padding adds enough padding to each dimension such that the output of the convolution 1662 * has size ``Ceiling(inputShape / stride)``. Padding is added evenly to both sides of each 1663 * dimension unless the total padding to add is odd, in which case it is added to the 1664 * back/bottom/right side of the respective dimension. For example, if the total padding needed 1665 * in the depth dimension is 3, 1 zero will be added to the front side of the depth dimension 1666 * and 2 zeros will be added to the back side. 1667 */ 1668 enum PaddingType { 1669 CUSTOM = 0; 1670 VALID = 1; 1671 SAME = 2; 1672 } 1673 PaddingType paddingType = 70; 1674 1675 /* Padding before the input in the depth direction. Must be zero or a positive integer. 1676 * Used when the `PaddingType` is `CustomPadding`, otherwise ignored by other padding types. 1677 */ 1678 int32 customPaddingFront = 80; 1679 1680 /* Padding after the input in the depth direction. Must be zero or a positive integer. 1681 * Used when the `PaddingType` is `CustomPadding`, otherwise ignored by other padding types. 1682 */ 1683 int32 customPaddingBack = 81; 1684 1685 /* Padding before the input in the height direction. Must be zero or a positive integer. 1686 * Used when the `PaddingType` is `CustomPadding`, otherwise ignored by other padding types. 1687 */ 1688 int32 customPaddingTop = 82; 1689 1690 /* Padding after the input in the height direction. Must be zero or a positive integer. 1691 * Used when the `PaddingType` is `CustomPadding`, otherwise ignored by other padding types. 1692 */ 1693 int32 customPaddingBottom = 83; 1694 1695 /* Padding before the input in the width direction. Must be zero or a positive integer. 1696 * Used when the `PaddingType` is `CustomPadding`, otherwise ignored by other padding types. 1697 */ 1698 int32 customPaddingLeft = 84; 1699 1700 /* Padding after the input in the width direction. Must be zero or a positive integer. 1701 * Used when the `PaddingType` is `CustomPadding`, otherwise ignored by other padding types. 1702 */ 1703 int32 customPaddingRight = 85; 1704 1705 /* Flag to specify if this is Convolution Transpose or not. 1706 */ 1707 bool isDeconvolution = 86; 1708 1709 /* 1710 * The output shape, which has length 3 ``[D_out, H_out, W_out]``. 1711 * This is used only for deconvolution (``isDeconvolution == true``). 1712 * If not set, the deconvolution output shape is calculated 1713 * based on ``PaddingType``. 1714 */ 1715 repeated uint64 outputShape = 87; 1716 1717} 1718 1719/** 1720 * A layer that performs a matrix-vector or matrix-matrix product. 1721 * This is equivalent to a fully-connected, or dense layer. 1722 * The weight parameters correspond to a matrix of dimensions (inputChannels, outputChannels) i.e. (C_in, C_out) 1723 * 1724 * .. code:: 1725 * 1726 * y = InnerProductLayer(x) 1727 * 1728 * Requires 1 input and produces 1 output. 1729 * 1730 * Input 1731 * Input can have rank 1 to rank 5. This is how it is reshaped in to the matrix (for rank > 1): 1732 * rank 1 (x1) : in this case, the layer corresponds to a matrix-vector product. x1 must be equal to C_in 1733 * rank 2 (x1, x2): x2 must be equal to C_in 1734 * rank 3 (x1, x2, x3) --> (x1 * x2, x3). x3 must be equal to C_in 1735 * rank 4 (x1, x2, x3, x4) ---> (x1, x2 * x3 * x4). x2 * x3 * x4 must be equal to C_in 1736 * rank 5 (x1, x2, x3, x4, x5) ---> (x1 * x2, x3 * x4 * x5). x3 * x4 * x5 must be equal to C_in 1737 * 1738 * Output 1739 * Output rank is same as the input rank 1740 * rank 1: (C_out) 1741 * rank 2: (x1, C_out) 1742 * rank 3: (x1, x2, C_out) 1743 * rank 4: (x1, C_out, 1, 1) 1744 * rank 5: (x1, x2, C_out, 1, 1) 1745 * 1746 */ 1747message InnerProductLayerParams { 1748 1749 uint64 inputChannels = 1; /// Input size: C_in. 1750 uint64 outputChannels = 2; /// Output size: C_out. 1751 1752 bool hasBias = 10; /// Whether a bias is added or not. 1753 1754 WeightParams weights = 20; /// Weight matrix [C_out, C_in]. 1755 WeightParams bias = 21; /// Bias vector [C_out]. 1756 1757 /** 1758 * If set, this layer, at runtime, quantizes the floating point input blob to int8 before applying an 1759 * inner product using INT8 weight matrix parameters, as provided in weights->int8RawValue. The 1760 * result is then dequantized. 1761 * Requires: 1762 * * hasBias == false 1763 * * QuantizationType == LinearQuantizationParams, such that 1764 * * size of the "scale" field is 1 and "bias" field is empty in "LinearQuantizationParams" 1765 * * numberOfBits == 8 1766 * * weights->rawValue_size to be empty 1767 */ 1768 bool int8DynamicQuantize = 22; 1769 1770} 1771 1772/** 1773 * A layer that performs a matrix lookup and optionally adds a bias. 1774 * The weights matrix is stored with dimensions [outputChannels, inputDim]. 1775 * 1776 * .. code:: 1777 * 1778 * y = EmbeddingLayer(x) 1779 * 1780 * Requires 1 input and produces 1 output. 1781 * 1782 * Input 1783 * Input values must be in the range ``[0, inputDim - 1]``. 1784 * 1785 * Input must have rank equal to 4 or 5, such that the last 3 dimensions are all 1. 1786 * rank 4: shape (x1, 1, 1, 1). x1 is effectively the batch/sequence length. 1787 * rank 5: shape (x1, x2 , 1, 1, 1). x1 * x2 is effectively the combined batch/sequence length. 1788 * 1789 * Output 1790 * Output rank is same as the input rank. Please see input description above. 1791 * rank 4: shape (x1, outputChannels, 1, 1) 1792 * rank 5: shape (x1, x2, outputChannels, 1, 1) 1793 * 1794 */ 1795message EmbeddingLayerParams { 1796 1797 uint64 inputDim = 1; /// Size of the input dictionary. 1798 uint64 outputChannels = 2; /// Size of the output vectors. 1799 1800 bool hasBias = 10; /// Whether a bias is added or not. 1801 1802 WeightParams weights = 20; /// 2-D weights of dimensions [outputChannels, inputDim]. 1803 WeightParams bias = 21; /// Bias of size [outputChannels]. 1804 1805} 1806 1807/** 1808 * A layer that performs a matrix lookup and optionally adds a bias. 1809 * The weights matrix is stored with dimensions [embeddingSize, vocabSize]. 1810 * 1811 * .. code:: 1812 * 1813 * y = EmbeddingNDLayer(x) 1814 * 1815 * Requires 1 input and produces 1 output. 1816 * 1817 * Input 1818 * Input values must be in the range ``[0, vocabSize - 1]``. 1819 * Input must have rank at least 2. The last dimension must always be 1. 1820 * rank 2: shape (x1, 1). x1 is the batch/sequence length. 1821 * rank 3: shape (x1, x2, 1). x1 * x2 is effectively the combined batch/sequence length. 1822 * rank 4: shape (x1, x2, x3, 1). x1 * x2 * x2 is effectively the combined batch/sequence length. 1823 * rank 5: shape (x1, x2 , x3, x4, 1). x1 * x2 * x3 * x4 is effectively the combined batch/sequence length. 1824 * 1825 * Output 1826 * Output rank is same as the input rank. Please see input description above. 1827 * rank 2: shape (x1, embeddingSize) 1828 * rank 3: shape (x1, x2, embeddingSize) 1829 * rank 4: shape (x1, x2, x3, embeddingSize) 1830 * rank 5: shape (x1, x2, x3, x4, embeddingSize) 1831 * 1832 */ 1833message EmbeddingNDLayerParams { 1834 1835 uint64 vocabSize = 1; /// Size of the input dictionary. 1836 uint64 embeddingSize = 2; /// Size of the output vectors. 1837 bool hasBias = 3; /// Whether a bias is added or not. 1838 WeightParams weights = 20; /// 2-D weights of dimensions [embeddingSize, vocabSize]. 1839 WeightParams bias = 21; /// Bias of size [embeddingSize]. 1840 1841} 1842 1843/** 1844 * A layer that performs batch normalization, 1845 * which is performed along axis = -3, 1846 * and repeated along the other axes, if present. 1847 * 1848 * .. code:: 1849 * 1850 * y = BatchnormLayer(x) 1851 * 1852 * Requires 1 input and produces 1 output. 1853 * 1854 * This operation is described by the following formula: 1855 * 1856 * .. math:: 1857 * y_i = \gamma_i \dfrac{ (x_i - \mu_i)}{\sqrt{\sigma_i^2 + \epsilon}} + \beta_i \;,\;i=1,....,C 1858 * 1859 * Input 1860 * A blob with rank greater than equal to 3. 1861 * Example: Rank 4 blob represents [Batch, channels, height, width] 1862 * For ranks greater than 3, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch. 1863 * 1864 * Output 1865 * A blob with the same shape as the input. 1866 */ 1867message BatchnormLayerParams { 1868 1869 uint64 channels = 1; /// Size of the channel dimension in the input. 1870 1871 /** 1872 * If ``computeMeanVar == true``, 1873 * the mean and variance are calculated from either 1874 * the single input instance, if ``instanceNormalization == true``, 1875 * or the whole batch, if ``instanceNormalization = false``. 1876 * and the values provided in parameters "mean" and "variance" are ignored. 1877 */ 1878 bool computeMeanVar = 5; 1879 bool instanceNormalization = 6; 1880 1881 /** 1882 * A small constant to avoid division by 0 while normalizing by variance. 1883 * Defaults to ``1e-5`` if not set or set to ``0``. 1884 */ 1885 float epsilon = 10; 1886 1887 WeightParams gamma = 15; /// Parameter of length [channels] 1888 WeightParams beta = 16; /// Parameter of length [channels] 1889 WeightParams mean = 17; /// Parameter of length [channels] 1890 WeightParams variance = 18; /// Parameter of length [channels] 1891 1892} 1893 1894/** 1895 * A spatial pooling layer. 1896 * 1897 * .. code:: 1898 * 1899 * y = PoolingLayer(x) 1900 * 1901 * Requires 1 input and produces 1 output. 1902 * 1903 * Input 1904 * A blob with rank greater than equal to 4. 1905 * Rank 4 blob represents [Batch, channels, height, width] 1906 * For ranks greater than 4, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch. 1907 * 1908 * Output 1909 * Rank is same as the input. e.g.: for rank 4 input, output shape is [B, C, H_out, W_out] 1910 * 1911 * Padding options are similar to ``ConvolutionLayerParams`` 1912 * with the additional option of ``ValidCompletePadding`` (``includeLastPixel``), 1913 * which ensures that the last application of the kernel 1914 * always includes the last pixel of the input image, if there is padding. 1915 * 1916 * .. code:: 1917 * 1918 * H_out = ceil(float(H_in + 2 * paddingAmounts[0] - kernelSize[0])/float(Stride[0])) + 1 1919 * if (paddingAmounts[0] > 0 or paddingAmounts[1] > 0) 1920 * if ((H_out - 1) * Stride >= H_in + paddingAmounts[0]) { 1921 * H_out = H_out - 1 1922 * } 1923 * } 1924 * 1925 * The equivalent expressions hold true for ``W_out`` as well. 1926 * Only symmetric padding is supported with this option. 1927 */ 1928message PoolingLayerParams { 1929 1930 enum PoolingType { 1931 1932 MAX = 0; 1933 AVERAGE = 1; 1934 L2 = 2; 1935 1936 } 1937 PoolingType type = 1; /// Type of pooling operation. 1938 1939 /** 1940 * Must be length 2 in the order ``[H, W]``. 1941 * If not set, default value ``[3, 3]`` is used. 1942 */ 1943 repeated uint64 kernelSize = 10; 1944 1945 /** 1946 * Must be length 2 in the order ``[H, W]``. 1947 * If not set, default value ``[1, 1]`` is used. 1948 */ 1949 repeated uint64 stride = 20; 1950 1951 message ValidCompletePadding { 1952 1953 /** 1954 * Must be length 2 in order ``[H, W]``. 1955 * If not set, value ``[0, 0]`` is used. 1956 */ 1957 repeated uint64 paddingAmounts = 10; 1958 1959 } 1960 1961 oneof PoolingPaddingType { 1962 ValidPadding valid = 30; 1963 SamePadding same = 31; 1964 ValidCompletePadding includeLastPixel = 32; 1965 } 1966 1967 /** 1968 * If true, padded values are excluded from the count (denominator) 1969 * when computing average pooling. 1970 */ 1971 bool avgPoolExcludePadding = 50; 1972 1973 /** 1974 * If true, global pooling is performed. 1975 * Kernel size is inferred from the input data spatial dimensions. 1976 */ 1977 bool globalPooling = 60; 1978 1979} 1980 1981/* 1982 * A layer to pool three spatial dimensions 1983 * 1984 * Input 1985 * A blob with rank equal to 5, representing [Batch, channels, depth, height, width]. 1986 * 1987 * Output 1988 * Rank is same as the input: A blob with rank equal to 5, representing [Batch, channels, depth, height, width]. 1989 * 1990 * Requires 1 input and produces 1 output. 1991 * 1992 * For example, given an input of shape (1,1,2,3,3): 1993 * +----+----+----+ 1994 * / | 10 | 11 | 12 | 1995 * / +----+----+----+ 1996 * / | 13 | 14 | 15 | 1997 * / +----+----+----+ 1998 * / | 16 | 17 | 18 | 1999 * / +----+----+----+ 2000 * +----+----+----+ / 2001 * | 1 | 2 | 3 | / 2002 * +----+----+----+ / 2003 * | 4 | 5 | 6 | / 2004 * +----+----+----+ / 2005 * | 7 | 8 | 9 | / 2006 * +----+----+----+ 2007 * 2008 * And applying MAX pooling using: 2009 * Kernel: 2x2x2 2010 * Stride: 1x1x1 2011 * Valid Padding 2012 * We expect to get an output with shape: (1,1,1,2,2) and value: 2013 * +----+----+ 2014 * | 14 | 15 | 2015 * +----+----+ 2016 * | 17 | 18 | 2017 * +----+----+ 2018 */ 2019message Pooling3DLayerParams { 2020 2021 enum PoolingType3D { 2022 MAX = 0; 2023 AVERAGE = 1; 2024 } 2025 2026 // Whether to use Max or Average 2027 PoolingType3D type = 1; 2028 2029 // Depth of the pooling region. 2030 int32 kernelDepth = 2; 2031 2032 // Height of the pooling region. 2033 int32 kernelHeight = 3; 2034 2035 // Width of the pooling region. 2036 int32 kernelWidth = 4; 2037 2038 // Stride along the depth direction 2039 int32 strideDepth = 5; 2040 2041 // Stride along the height direction 2042 int32 strideHeight = 6; 2043 2044 // Stride along the width direction 2045 int32 strideWidth = 7; 2046 2047 /** 2048 * The type of padding. 2049 * All padding types pad the input shape with zeros. 2050 * CUSTOM padding will add the custom padding values specified below to their respective 2051 * dimensions, e.g., `customPaddingFront` number of zeros will be added to one side of the 2052 * input's depth dimension and `customPaddingBack` number of zeros will be added to the other 2053 * side of the input's depth dimension. 2054 * VALID padding adds no padding to any dimension. In this case, the last pool along 2055 * each dimension will be dropped if the input dimension and the kernel size, and stride do not match. 2056 * SAME padding adds enough padding to each dimension such that the output 2057 * has the same spatial dimensions as the input. Padding is added evenly to both 2058 * sides of each dimension unless the total padding to add is odd, in which case the extra padding 2059 * is added to the back/bottom/right side of the respective dimension. For example, if the the 2060 * total horizontal padding is 3, then there will be 1 padding on the left, and 2 padding on the right. 2061 */ 2062 enum Pooling3DPaddingType { 2063 CUSTOM = 0; 2064 VALID = 1; 2065 SAME = 2; 2066 } 2067 Pooling3DPaddingType paddingType = 15; 2068 2069 // Padding before the input in the depth direction. 2070 int32 customPaddingFront = 8; 2071 2072 // Padding after the input in the depth direction. 2073 int32 customPaddingBack = 9; 2074 2075 // Padding before the input in the height direction. 2076 int32 customPaddingTop = 10; 2077 2078 // Padding after the input in the height direction. 2079 int32 customPaddingBottom = 11; 2080 2081 // Padding before the input in the width direction. 2082 int32 customPaddingLeft = 12; 2083 2084 // Padding after the input in the width direction. 2085 int32 customPaddingRight = 13; 2086 2087 // If true, exclude zeros from padding in Average pooling. Meaningless in Max Pooling. 2088 bool countExcludePadding = 14; 2089} 2090 2091/* 2092 * A layer to pool three spatial dimensions down to one value. 2093 * This behaves like a special case of Pooling3DLayerParams in which 2094 * the Kernel is the size of the input and there is no padding. 2095 * 2096 * Input 2097 * A blob with rank equal to 5, representing [Batch, channels, depth, height, width]. 2098 * 2099 * Output 2100 * Rank is same as the input: A blob with rank equal to 5, representing [Batch, channels, depth, height, width]. 2101 * Depth, height, and width of the output will always be 1. 2102 * 2103 * Requires 1 input and produces 1 output. 2104 * 2105 * For example, given an input of shape (1,1,2,3,3): 2106 * +----+----+----+ 2107 * / | 10 | 11 | 12 | 2108 * / +----+----+----+ 2109 * / | 13 | 14 | 15 | 2110 * / +----+----+----+ 2111 * / | 16 | 17 | 18 | 2112 * / +----+----+----+ 2113 * +----+----+----+ / 2114 * | 1 | 2 | 3 | / 2115 * +----+----+----+ / 2116 * | 4 | 5 | 6 | / 2117 * +----+----+----+ / 2118 * | 7 | 8 | 9 | / 2119 * +----+----+----+ 2120 * 2121 * And applying MAX global 3d pooling, we expect to get an output with shape: (1,1,1,1,1) and value: 2122 * +----+ 2123 * | 18 | 2124 * +----+ 2125 */ 2126message GlobalPooling3DLayerParams { 2127 2128 enum GlobalPoolingType3D { 2129 MAX = 0; 2130 AVERAGE = 1; 2131 } 2132 2133 // Whether to use Max or Average 2134 GlobalPoolingType3D type = 1; 2135} 2136 2137/** 2138 * A layer that performs padding along spatial dimensions. 2139 * 2140 * .. code:: 2141 * 2142 * y = PaddingLayer(x) 2143 * 2144 * Requires 1 input and produces 1 output. 2145 * 2146 * Input 2147 * A blob with rank at least 2. 2148 * e.g.: blob with shape ``[H_in, W_in]``. 2149 * For ranks greater than 2, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch 2150 * i.e. Padding is applied on last two dimensions. 2151 * 2152 * Output 2153 * Same rank as the input. 2154 * e.g.: blob with shape ``[H_out, W_out]``. 2155 * 2156 * Output dimensions are calculated as follows: 2157 * 2158 * .. code:: 2159 * 2160 * H_out = H_in + topPaddingAmount + bottomPaddingAmount 2161 * W_out = W_in + leftPaddingAmount + rightPaddingAmount 2162 * 2163 * topPaddingAmount == Height startEdgeSize == borderAmounts[0].startEdgeSize 2164 * bottomPaddingAmount == Height endEdgeSize == borderAmounts[0].endEdgeSize 2165 * leftPaddingAmount == Width startEdgeSize == borderAmounts[1].startEdgeSize 2166 * rightPaddingAmount == Width endEdgeSize == borderAmounts[1].endEdgeSize 2167 * 2168 * There are three types of padding: 2169 * 2170 * - ``PaddingConstant``, which fills a constant value at the border. 2171 * - ``PaddingReflection``, which reflects the values at the border. 2172 * - ``PaddingReplication``, which replicates the values at the border. 2173 * 2174 * Given the following input: 2175 * 2176 * .. code:: 2177 * 2178 * [1, 3, 4] : 1 2 3 4 2179 * 5 6 7 8 2180 * 9 10 11 12 2181 * 2182 * Here is the output of applying the padding 2183 * ``(top=2, left=2, bottom=0, right=0)`` 2184 * with each of the supported types: 2185 * 2186 * - ``PaddingConstant`` (``value = 0``): 2187 * .. code:: 2188 * 2189 * [1, 5, 6] : 0 0 0 0 0 0 2190 * 0 0 0 0 0 0 2191 * 0 0 1 2 3 4 2192 * 0 0 5 6 7 8 2193 * 0 0 9 10 11 12 2194 * 2195 * - ``PaddingReflection``: 2196 * .. code:: 2197 * 2198 * [1, 5, 6] : 11 10 9 10 11 12 2199 * 7 6 5 6 7 8 2200 * 3 2 1 2 3 4 2201 * 7 6 5 6 7 8 2202 * 11 10 9 10 11 12 2203 * 2204 * - ``PaddingReplication``: 2205 * .. code:: 2206 * 2207 * [1, 5, 6] : 1 1 1 2 3 4 2208 * 1 1 1 2 3 4 2209 * 1 1 1 2 3 4 2210 * 5 5 5 6 7 8 2211 * 9 9 9 10 11 12 2212 */ 2213message PaddingLayerParams { 2214 2215 /** 2216 * Fill a constant value in the padded region. 2217 */ 2218 message PaddingConstant { 2219 float value = 1; 2220 } 2221 2222 /** 2223 * Reflect the values at the border for padding. 2224 */ 2225 message PaddingReflection { 2226 } 2227 2228 /** 2229 * Replicate the values at the border for padding. 2230 */ 2231 message PaddingReplication { 2232 } 2233 2234 oneof PaddingType { 2235 PaddingConstant constant = 1; 2236 PaddingReflection reflection = 2; 2237 PaddingReplication replication = 3; 2238 } 2239 2240 BorderAmounts paddingAmounts = 10; /// Amounts to be padded to the input. 2241 2242} 2243 2244/** 2245 * A layer that concatenates along the axis = -3 or -5. 2246 * For general concatenation along any axis, see ConcatNDLayer. 2247 * 2248 * .. code:: 2249 * 2250 * y = ConcatLayer(x1,x2,....) 2251 * 2252 * Requires more than 1 input and produces 1 output. 2253 * 2254 * Input 2255 * All input blobs must have same rank. 2256 * If "sequenceConcat" = False, rank must be greater than equal to 3. In this case concatenation is along axis = -3 2257 * If "sequenceConcat" = True, rank must be greater than equal to 5. In this case concatenation is along axis = -5 2258 * 2259 * Output 2260 * Same rank as the input. 2261 * 2262 */ 2263message ConcatLayerParams { 2264 2265 /** 2266 * If true, concatenate along the axis = -5 instead of axis = -3. 2267 */ 2268 bool sequenceConcat = 100; 2269 2270} 2271 2272/** 2273 * A layer that performs local response normalization (LRN). 2274 * 2275 * .. code:: 2276 * 2277 * y = LRNLayer(x) 2278 * 2279 * Requires 1 input and produces 1 output. 2280 * 2281 * Input 2282 * A blob with rank greater than equal to 3. 2283 * Example: Rank 4 blob represents [Batch, channels, height, width] 2284 * For ranks greater than 3, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch. 2285 * Output 2286 * A blob with the same shape as the input. 2287 * 2288 * This layer is described by the following formula: 2289 * 2290 * .. math:: 2291 * x_i \leftarrow \dfrac{x_i}{\left ( k + \dfrac{\alpha}{\text{localSize}} \sum_j x_j^2 \right )^\beta} 2292 * 2293 * where the summation is done over a ``(localSize, 1, 1)`` neighborhood --- 2294 * that is, over a window "across" channels in 1x1 spatial neighborhoods. 2295 */ 2296message LRNLayerParams { 2297 2298 float alpha = 1; 2299 float beta = 2; 2300 uint64 localSize = 3; /// Number of channels in the normalization window. 2301 float k = 4; /// Defaults to 1 if not set or 0. Must be strictly positive. 2302 2303} 2304 2305/** 2306 * Softmax Normalization Layer 2307 * 2308 * A layer that performs softmax normalization. 2309 * Normalization is applied along axis = -3 or N-3 (where N is the rank of the input) 2310 * For softmax layer that can operate on any axis, see SoftmaxNDLayer. 2311 * 2312 * 2313 * .. code:: 2314 * 2315 * y = SoftmaxLayer(x) 2316 * 2317 * Requires 1 input and produces 1 output. 2318 * 2319 * Input 2320 * Must be a blob with rank >= 3. 2321 * Output 2322 * A blob with the same shape as the input. 2323 * 2324 * This layer is described by the following formula: 2325 * 2326 * .. math:: 2327 * x_i \leftarrow \dfrac{e^{x_i}}{\sum_i{e^{x_i}}} 2328 */ 2329message SoftmaxLayerParams { 2330 2331} 2332 2333/** 2334 * A layer that uniformly splits across axis = -3 to produce a specified number of outputs. 2335 * For general split operation along any axis, see SplitNDLayer. 2336 * 2337 * .. code:: 2338 * 2339 * (y1,y2,...yN) = SplitLayer(x), where N = nOutputs 2340 * 2341 * Requires 1 input and produces multiple outputs. 2342 * 2343 * Input 2344 * A blob with rank at least 3. 2345 * e.g.: blob with shape ``[C, H, W]`` 2346 * Output 2347 * ``nOutputs`` blobs each with same rank as the input. 2348 * e.g.: For input that is of shape ``[C, H, W]``, output shapes will be ``[C/nOutputs, H, W]`` 2349 */ 2350message SplitLayerParams { 2351 2352 uint64 nOutputs = 1; /// The number of outputs. 2353 2354} 2355 2356/** 2357 * A layer that performs elementwise addition. 2358 * This layer has limited broadcasting support. For general broadcasting see AddBroadcastableLayer. 2359 * 2360 * .. code:: 2361 * 2362 * y = AddLayer(x1,x2,...) 2363 * 2364 * Requires 1 or more than 1 input and produces 1 output. 2365 * 2366 * Input 2367 * In general, there are no rank constraints. 2368 * However, only certain set of shapes are broadcastable. For example: 2369 * [B, 1, 1, 1], [B, C, 1, 1], [B, 1, H, W], [B, C, H, W] 2370 * Output 2371 * A blob with shape equal to the input blob. 2372 * 2373 * If only one input is provided, scalar addition is performed: 2374 * 2375 * .. math:: 2376 * y = x + \alpha 2377 * 2378 */ 2379message AddLayerParams { 2380 2381 /** 2382 * Scalar to be added to the input. 2383 * Only used if there is a single input. 2384 */ 2385 float alpha = 1; 2386 2387} 2388 2389/** 2390 * A layer that performs elementwise multiplication. 2391 * This layer has limited broadcasting support. For general broadcasting see MultiplyBroadcastableLayer. 2392 * 2393 * .. code:: 2394 * 2395 * y = MultiplyLayer(x1,x2,...) 2396 * 2397 * Requires 1 or more than 1 input and produces 1 output. 2398 * 2399 * Input 2400 * In general, there are no rank constraints. 2401 * However, only certain set of shapes are broadcastable. For example: 2402 * [B, 1, 1, 1], [B, C, 1, 1], [B, 1, H, W], [B, C, H, W] 2403 * Output 2404 * A blob with shape equal to the first input blob. 2405 * 2406 * If only one input is provided, scalar multiplication is performed: 2407 * 2408 * .. math:: 2409 * y = \alpha x 2410 * 2411 */ 2412message MultiplyLayerParams { 2413 2414 /** 2415 * Scalar to be multiplied with the input. 2416 * Only used if there is a single input. 2417 */ 2418 float alpha = 1; 2419 2420} 2421 2422/** 2423 * A layer that applies a unary function. 2424 * 2425 * .. code:: 2426 * 2427 * y = UnaryFunctionLayer(x) 2428 * 2429 * Requires 1 input and produces 1 output. 2430 * 2431 * Input 2432 * A blob with no rank constraints. 2433 * Output 2434 * A blob with the same shape as the input. 2435 * 2436 * The input is first modified by shifting and scaling: 2437 * 2438 * .. math:: 2439 * x \leftarrow \text{scale} \cdot x + \text{shift} 2440 */ 2441message UnaryFunctionLayerParams { 2442 2443 /** 2444 * A unary operator. 2445 * 2446 * The following functions are supported: 2447 * 2448 * ``SQRT`` 2449 * .. math:: f(x) = \sqrt{x} 2450 * 2451 * ``RSQRT`` 2452 * .. math:: f(x) = \dfrac{1}{\sqrt{x + \epsilon}} 2453 * 2454 * ``INVERSE`` 2455 * .. math:: f(x) = \dfrac{1}{x + \epsilon} 2456 * 2457 * ``POWER`` 2458 * .. math:: f(x) = x^\alpha 2459 * 2460 * ``EXP`` 2461 * .. math:: f(x) = e^x 2462 * 2463 * ``LOG`` 2464 * .. math:: f(x) = \log x 2465 * 2466 * ``ABS`` 2467 * .. math:: f(x) = |x| 2468 * 2469 * ``THRESHOLD`` 2470 * .. math:: f(x) = \text{max}(\alpha, x) 2471 */ 2472 enum Operation { 2473 SQRT = 0; 2474 RSQRT = 1; 2475 INVERSE = 2; 2476 POWER = 3; 2477 EXP = 4; 2478 LOG = 5; 2479 ABS = 6; 2480 THRESHOLD = 7; 2481 } 2482 Operation type = 1; /// The type of unary function. 2483 2484 /** 2485 * A constant used in ``POWER`` and ``THRESHOLD`` functions. 2486 */ 2487 float alpha = 2; 2488 2489 /** 2490 * A small constant to avoid division by 0 while normalizing variance. 2491 * Defaults to ``1e-6`` if not set or set to ``0``. 2492 */ 2493 float epsilon = 3; 2494 2495 /** 2496 * Input is shifted by this amount 2497 * before the unary function is applied. 2498 * Defaults to ``0.0`` if not set. 2499 */ 2500 float shift = 4; 2501 2502 /** 2503 * Input is scaled by this amount 2504 * before the unary function is applied. 2505 * Defaults to ``1.0`` if not set or set to ``0``. 2506 */ 2507 float scale = 5; 2508 2509} 2510 2511/** 2512 * A layer that scales up spatial dimensions. 2513 * It supports two modes: nearest neighbour (default) and bilinear. 2514 * 2515 * .. code:: 2516 * 2517 * y = UpsampleLayer(x) 2518 * 2519 * Requires 1 input and produces 1 output. 2520 * 2521 * Input 2522 * A blob with rank at least 3. 2523 * e.g.: blob with shape ``[C, H, W]``. 2524 * For ranks greater than 3, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch. 2525 * 2526 * Output 2527 * Same rank as the input. 2528 * e.g.: blob with shape ``[C, scalingFactor[0] * H, scalingFactor[1] * W]`` 2529 */ 2530message UpsampleLayerParams { 2531 2532 /** 2533 * Scaling Factor. Mutually exclusive with fractionalScalingFactor. 2534 * Must be length 2 in order ``[H, W]``. 2535 * If not set, default value ``[1, 1]`` is used. 2536 */ 2537 repeated uint64 scalingFactor = 1; 2538 2539 /** 2540 * Fractional scaling factor. Mutually exclusive with scalingFactor. 2541 * Must be length 2 in order ``[H, W]``. 2542 * If not set, default value ``[1.0, 1.0]`` is used. 2543 */ 2544 repeated float fractionalScalingFactor = 7; 2545 2546 /* 2547 * Overall mode for interpolating new elements when upsampling. 2548 * NN - Nearest Neighbors - simply pick the nearest true value for interpolated values. 2549 * BILINEAR - Use bilinear interpolation. See LinearUpsamplingMode for behavior. 2550 */ 2551 enum InterpolationMode { 2552 2553 NN = 0; /// Nearest Neighbour 2554 BILINEAR = 1; /// Bilinear 2555 2556 } 2557 2558 InterpolationMode mode = 5; 2559 2560 /** 2561 * LinearUpsampleMode specifies the behavior for linear upsampling. Only valid when Interpolation Mode is BILINEAR. 2562 * If input grid is [0, Xin-1] (corresponding to an input size of Xin), and if the output size is Xout, 2563 * then the grid points are sampled in the following manner: 2564 * DEFAULT: 2565 * spacing = (Xin-Xin/Xout) / (Xout-1) 2566 * grid_point[i] = min(Xin-1, max(0, i * spacing)), for i = 0,1,2,….,Xout-1 2567 * ALIGN_CORNERS_TRUE: 2568 * spacing = (Xin-1) / (Xout-1) 2569 * grid_point[i] = min(Xin-1, max(0, i * spacing)), for i = 0,1,2,….,Xout-1 2570 * ALIGN_CORNERS_FALSE: 2571 * spacing = Xin / Xout 2572 * grid_point[i] = min(Xin-1, max(0, i * spacing + 0.5 * spacing - 0.5)), for i = 0,1,2,….,Xout-1 2573 */ 2574 enum LinearUpsampleMode { 2575 2576 DEFAULT = 0; 2577 ALIGN_CORNERS_TRUE = 1; 2578 ALIGN_CORNERS_FALSE = 2; 2579 2580 } 2581 2582 LinearUpsampleMode linearUpsampleMode = 6; 2583 2584} 2585 2586/** 2587* A layer that resizes the input to a pre-specified spatial size using bilinear interpolation. 2588* 2589* .. code:: 2590* 2591* y = ResizeBilinearLayer(x) 2592* 2593* Requires 1 input and produces 1 output. 2594* 2595* Input 2596* A blob with rank at least 3. 2597* e.g.: blob with shape ``[C, H_in, W_in]``. 2598* For ranks greater than 3, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch. 2599* 2600* Output 2601* Same rank as the input. 2602* e.g.: blob with shape ``[C, H_out, W_out]``. 2603* 2604*/ 2605message ResizeBilinearLayerParams { 2606 2607 /** 2608 * Target Spatial Size. 2609 * Must be length 2 in order ``[Height, Width]``, i.e. ``[H_out, W_out]``. 2610 * If not set, default value ``[1, 1]`` is used. 2611 */ 2612 repeated uint64 targetSize = 1; 2613 2614 /** 2615 * Mode used to compute the grid on which the spatial output values are evaluated. 2616 * Same mode is applied to both the height and width axes. 2617 */ 2618 SamplingMode mode = 2; 2619 2620} 2621 2622/** 2623* A layer that extracts cropped spatial patches or RoIs (regions of interest) from the input and resizes them to a pre-specified size using 2624* bilinear interpolation. 2625* Note that RoI Align layer can be implemented with this layer followed by a pooling layer. 2626* 2627* .. code:: 2628* 2629* y = CropResizeLayer(x) 2630* 2631* Requires 2 inputs and produces 1 output. 2632* 2633* Input 2634* There are two inputs. 2635* First input represents an image feature map. 2636* Second input represents the bounding box coordinates for N patches or RoIs (region of interest). 2637* 2638* First input is rank 5: [1, Batch, C, H_in, W_in]. 2639* Second input is rank 5. Its shape can be either [N, 1, 4, 1, 1] or [N, 1, 5, 1, 1]. 2640* 2641* N: number of patches/RoIs to be extracted 2642* 2643* If RoI shape = ``[N, 1, 4, 1, 1]`` 2644* The axis=-3 corresponds to the four coordinates specifying the bounding box. 2645* All the N RoIs are extracted from all the batches of the input. 2646* 2647* If RoI shape = ``[N, 1, 5, 1, 1]`` 2648* The first element of the axis=-3 specifies the input batch id from which to extract the RoI and 2649* must be in the interval ``[0, Batch - 1]``. That is, n-th RoI is extracted from the RoI[n,0,0,0,0]-th 2650* input batch id. The last four elements of the axis=-3 specify the bounding box coordinates. 2651* 2652* Output 2653* A blob with rank 5. 2654* - Shape is [N, Batch, C, H_out, W_out] if input RoI shape is [N, 1, 4, 1, 1] 2655* - Shape is [N, 1, C, H_out, W_out] if input RoI shape is [N, 1, 5, 1, 1] 2656* 2657*/ 2658message CropResizeLayerParams { 2659 2660 /** 2661 * Target Spatial Size. 2662 * Must be length 2 in order ``[Height, Width]``, i.e. ``[H_out, W_out]``. 2663 * If not set, default value ``[1, 1]`` is used. 2664 */ 2665 repeated uint64 targetSize = 1; 2666 2667 /** 2668 * If true the bounding box coordinates must be in the interval [0, 1]. 2669 * They are scaled by (H_in - 1), (W_in - 1), i.e. based on the input spatial dimensions. 2670 * If false the bounding box coordinates must be in the interval 2671 * [0, H_in -1] and [0, W_in - 1], respectively for height and width dimensions. 2672 */ 2673 bool normalizedCoordinates = 2; 2674 2675 /** 2676 * Mode used to compute the grid on which the spatial output values are evaluated. 2677 * Same mode is applied to both the height and width axes. 2678 */ 2679 SamplingMode mode = 3; 2680 2681 /** 2682 * Representation used to express the bounding box coordinates. 2683 * It determines how the values of the second input are interpreted. 2684 */ 2685 BoxCoordinatesMode boxIndicesMode = 4; 2686 2687 /** 2688 * Additional spatial scale that multiplies the bounding box coordinates. 2689 * Generally used while implementing the RoI Align layer, 2690 * which uses unnormalized RoI coordinates along with a spatial scale less than or equal to 1. 2691 */ 2692 float spatialScale = 5; 2693 2694} 2695 2696/** 2697 * A layer that performs elementwise addition of a bias, 2698 * which is broadcasted to match the input shape. 2699 * 2700 * .. code:: 2701 * 2702 * y = BiasLayer(x) 2703 * 2704 * Requires 1 input and produces 1 output. 2705 * 2706 * Input 2707 * A blob with rank at least 3. 2708 * e.g.: blob with shape ``[C, H, W]``. 2709 * For ranks greater than 3, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch. 2710 * Output 2711 * A blob with the same shape as the input. 2712 */ 2713message BiasLayerParams { 2714 2715 /** 2716 * The shape of the bias. 2717 * Must be one of the following: 2718 * ``[1]``, ``[C]``, ``[1, H, W]`` or ``[C, H, W]``. 2719 */ 2720 repeated uint64 shape = 1; 2721 2722 /** 2723 * The bias values. 2724 * The size must be equal to the product of the ``shape`` dimensions. 2725 */ 2726 WeightParams bias = 2; 2727 2728} 2729 2730/** 2731 * A layer that performs elmentwise multiplication by a scale factor 2732 * and optionally adds a bias; 2733 * both the scale and bias are broadcasted to match the input shape. 2734 * 2735 * .. code:: 2736 * 2737 * y = ScaleLayer(x) 2738 * 2739 * Requires 1 input and produces 1 output. 2740 * 2741 * Input 2742 * A blob with rank at least 3. 2743 * e.g.: blob with shape ``[C, H, W]``. 2744 * For ranks greater than 3, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch. 2745 * Output 2746 * A blob with the same shape as the input. 2747 */ 2748message ScaleLayerParams { 2749 2750 /** 2751 * The shape of the scale. 2752 * Must be one of the following: 2753 * ``[1]``, ``[C]``, ``[1, H, W]`` or ``[C, H, W]``. 2754 */ 2755 repeated uint64 shapeScale = 1; 2756 2757 /** 2758 * The scale values. 2759 * The size must be equal to the product of the ``shape`` dimensions. 2760 */ 2761 WeightParams scale = 2; /// Scale values. Size must be equal to the product of dimensions specified in shapeScale. 2762 2763 bool hasBias = 3; /// If true, a bias is added after scaling. 2764 2765 /** 2766 * The shape of the bias. 2767 * Must be one of the following: 2768 * ``[1]``, ``[C]``, ``[1, H, W]`` or ``[C, H, W]``. 2769 */ 2770 repeated uint64 shapeBias = 4; 2771 2772 /** 2773 * The bias values. 2774 * The size must be equal to the product of the ``shape`` dimensions. 2775 */ 2776 WeightParams bias = 5; 2777 2778} 2779 2780/** 2781 * A layer that loads data as a parameter and provides it as an output. 2782 * The output is rank 5. For general rank, see LoadConstantNDLayer. 2783 * 2784 * .. code:: 2785 * 2786 * y = LoadConstantLayer() 2787 * 2788 * Requires no input and produces 1 output. 2789 * 2790 * Output: 2791 * A blob with rank 5 and shape ``[1, 1, C, H, W]`` 2792 */ 2793message LoadConstantLayerParams { 2794 2795 /** 2796 * The shape of the constant to be loaded, 2797 * which must be``[C, H, W]``, that is length 3. 2798 */ 2799 repeated uint64 shape = 1; 2800 2801 /** 2802 * The data values, 2803 * of size ``C * H * W``. 2804 */ 2805 WeightParams data = 2; 2806 2807} 2808 2809/** 2810 * A layer that performs L2 normalization, i.e. divides by the 2811 * the square root of the sum of squares of all elements of input. 2812 * 2813 * .. code:: 2814 * 2815 * y = L2NormalizeLayer(x) 2816 * 2817 * Requires 1 input and produces 1 output. 2818 * 2819 * Input 2820 * A blob with rank greater than equal to 3. 2821 * For ranks greater than 3, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch. 2822 * Output 2823 * A blob with the same shape as the input. 2824 * 2825 * This layer is described by the following formula: 2826 * 2827 * .. math:: 2828 * x_i \leftarrow \dfrac{x_i}{\sqrt{\sum{x_i^2} + \epsilon}} 2829 */ 2830message L2NormalizeLayerParams { 2831 2832 /** 2833 * A small constant to avoid division by 0 while normalizing variance. 2834 * Defaults to ``1e-6`` if not set or set to ``0``. 2835 */ 2836 float epsilon = 1; 2837 2838} 2839 2840/// Data Reorganization Layers 2841/// -------------------------- 2842 2843/** 2844 * A layer that flattens the input. 2845 * 2846 * .. code:: 2847 * 2848 * y = FlattenLayer(x) 2849 * 2850 * Requires 1 input and produces 1 output. 2851 * 2852 * Input 2853 * A blob with rank greater than equal to 3. 2854 * e.g.: Rank 4 blob represents [Batch, C, H, W] 2855 * For ranks greater than 3, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch. 2856 * Output 2857 * Same rank as the input, such that last two dimensions are both 1. 2858 * e.g.: For rank 4 input, output shape is ``[Batch, C * H * W, 1, 1]`` 2859 * 2860 * There are two X orders: ``CHANNEL_FIRST`` and ``CHANNEL_LAST``. 2861 * ``CHANNEL_FIRST`` does not require data to be rearranged, 2862 * because row major ordering is used by internal storage. 2863 * ``CHANNEL_LAST`` requires data to be rearranged. 2864 */ 2865message FlattenLayerParams { 2866 2867 enum FlattenOrder { 2868 2869 CHANNEL_FIRST = 0; 2870 CHANNEL_LAST = 1; 2871 2872 } 2873 FlattenOrder mode = 1; 2874 2875} 2876 2877/** 2878 * A layer that recasts the input into a new shape. 2879 * 2880 * .. code:: 2881 * 2882 * y = ReshapeLayer(x) 2883 * 2884 * Requires 1 input and produces 1 output. 2885 * 2886 * Input 2887 * A blob with rank 5. 2888 * e.g.: ``[1, 1, C, H, W]`` or ``[Seq, 1, C, H, W]``. 2889 * Output 2890 * A blob with rank 5. 2891 * e.g.: ``[1, 1, C_out, H_out, W_out]`` or ``[Seq_out, 1, C_out, H_out, W_out]``. 2892 * 2893 * There are two reshape orders: ``CHANNEL_FIRST`` and ``CHANNEL_LAST``. 2894 * ``CHANNEL_FIRST`` is equivalent to 2895 * flattening the input to ``[Seq, 1, C * H * W, 1, 1]`` in channel first order 2896 * and then reshaping it to the target shape; 2897 * no data rearrangement is required. 2898 * ``CHANNEL_LAST`` is equivalent to 2899 * flattening the input to ``[Seq, 1, H * W * C, 1, 1]`` in channel last order, 2900 * reshaping it to ``[Seq_out, 1, H_out, W_out, C_out]`` (it is now in "H_out-major"" order), 2901 * and then permuting it to ``[C_out, H_out, W_out]``; 2902 * both the flattening and permuting requires the data to be rearranged. 2903 */ 2904message ReshapeLayerParams { 2905 2906 /** 2907 * The shape of the output. 2908 * Must be of length 3 or 4. 2909 * If set to 3, ``targetShape`` is interpreted as 2910 * ``[1, 1, C_out, H_out, W_out]``, and sequence length of the input is preserved. 2911 * If set to 4, ``targetShape`` is interpreted as 2912 * ``[Seq_out, 1, C_out, H_out, W_out]``, 2913 * where ``Seq_out`` is the new sequence length. 2914 */ 2915 repeated int64 targetShape = 1; 2916 2917 enum ReshapeOrder { 2918 2919 CHANNEL_FIRST = 0; 2920 CHANNEL_LAST = 1; 2921 2922 } 2923 ReshapeOrder mode = 2; 2924 2925} 2926 2927/** 2928 * A layer that rearranges the dimensions and data of an input. 2929 * For generic transpose/permute operation see TransposeLayer. 2930 * 2931 * .. code:: 2932 * 2933 * y = PermuteLayer(x) 2934 * 2935 * Requires 1 input and produces 1 output. 2936 * 2937 * Input 2938 * Must be a rank 5 blob. 2939 * e.g.: shape ``[Seq, B, C, H, W]``. 2940 * Output 2941 * Rank 5 blob. Transposed version of the input, such that dimensions at axis=1 or axis=-4 is unchanged. 2942 * 2943 * 2944 * Examples: 2945 * 2946 * Assume input shape is [Seq, B, C, H, W] 2947 * 2948 * - If ``axis`` is set to ``[0, 3, 1, 2]``, 2949 * then the output has shape ``[Seq, B, W, C, H]`` 2950 * 2951 * - If ``axis`` is set to ``[3, 1, 2, 0]``, 2952 * then the output has shape ``[W, B, C, H, Seq]`` 2953 * 2954 * - If ``axis`` is set to ``[0, 3, 2, 1]``, 2955 * then the output has shape ``[Seq, B, W, H, C]`` 2956 * 2957 * - If ``axis`` is not set, or is set to ``[0, 1, 2, 3]``, 2958 * the output is the same as the input. 2959 */ 2960message PermuteLayerParams { 2961 2962 /** 2963 * The order in which to permute the dimensions. 2964 * Must have length 4 and a permutation of ``[0, 1, 2, 3]``. 2965 */ 2966 repeated uint64 axis = 1; 2967 2968} 2969 2970/** 2971 * A layer that reorganizes data in the input in specific ways. 2972 * 2973 * .. code:: 2974 * 2975 * y = ReorganizeDataLayer(x) 2976 * 2977 * Requires 1 input and produces 1 output. 2978 * 2979 * Input 2980 * A blob with rank at least 3. 2981 * e.g.: blob with shape ``[C, H, W]``. 2982 * For ranks greater than 3, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch. 2983 * Output 2984 * Same rank as the input. 2985 * e.g.: blob with shape ``[C_out, H_out, W_out]``. 2986 * 2987 * mode == SPACE_TO_DEPTH 2988 * ``[C_out, H_out, W_out]`` : ``[C * blockSize * blockSize, H/blockSize, W/blockSize]``. 2989 * blockSize must divide H and W. 2990 * Data is moved from the spatial dimensions to the channel dimension. Input is spatially divided into 2991 * non-overlapping blocks of size blockSize X blockSize and data from each block is moved into the 2992 * channel dimension. 2993 * 2994 * mode == DEPTH_TO_SPACE 2995 * ``[C_out, H_out, W_out]`` : ``[C/(blockSize * blockSize), H * blockSize, W * blockSize]``. 2996 * Square of blockSize must divide C. 2997 * Reverse of SPACE_TO_DEPTH. Data is moved from the channel dimension to the spatial dimensions. 2998 * 2999 * mode == PIXEL_SHUFFLE 3000 * ``[C_out, H_out, W_out]`` : ``[C/(blockSize * blockSize), H * blockSize, W * blockSize]``. 3001 * Square of blockSize must divide C. 3002 * Similar to DEPTH_TO_SPACE, but using the pixel-shuffle semantics for channel order in the output space. 3003 * In both modes, elements along the channel dimension are collapsed into 3004 * blocks in the spatial dimensions. The difference is in the arrangement of 3005 * the input-channels' data in the output space. See below example for more 3006 * detail. 3007 * (Only available in Core ML Specification >= 5 (iOS >= 14, macOS >= 11.0) 3008 * 3009 * 3010 * Examples: 3011 * 3012 * Assume input is the following [C = 8, H = 1, W = 2] tensor: 3013 * 3014 * .. code:: 3015 * 3016 * [[[1 2]] [[3 4]] [[5 6]] [[7 8]] [[9 10]] [[11 12]] [[13 14]] [[15 16]]] 3017 * 3018 * If block_size == 2 and mode == DEPTH_TO_SPACE, output will be the following 3019 * [C = 2, H = 2, W = 4] tensor: 3020 * 3021 * .. code:: 3022 * 3023 * [[[ 1 5 2 6] 3024 * [ 9 13 10 14]] 3025 * 3026 * [[ 3 7 4 8] 3027 * [11 15 12 16]]] 3028 * 3029 * For mode == SPACE_TO_DEPTH, the behavior is the same as mode == 3030 * DEPTH_TO_SPACE, but with the input and output swapped. 3031 * 3032 * If block_size == 2 and mode == PIXEL_SHUFFLE, output will be the following 3033 * [C = 2, H = 2, W = 4] tensor: 3034 * 3035 * .. code:: 3036 * 3037 * [[[ 1 3 2 4] 3038 * [ 5 7 6 8]] 3039 * 3040 * [[ 9 11 10 12] 3041 * [13 15 14 16]]] 3042 * 3043 */ 3044message ReorganizeDataLayerParams { 3045 3046 enum ReorganizationType { 3047 3048 SPACE_TO_DEPTH = 0; 3049 DEPTH_TO_SPACE = 1; 3050 PIXEL_SHUFFLE = 2; 3051 3052 } 3053 ReorganizationType mode = 1; 3054 uint64 blockSize = 2; /// must be greater than 1 3055 3056} 3057 3058/** 3059 * A layer that slices the input data along axis = -1 or -2 or -3. 3060 * For general slice along any axis, please see SliceStaticLayer/SliceDynamicLayer. 3061 * 3062 * .. code:: 3063 * 3064 * y = SliceLayer(x) 3065 * 3066 * Requires 1 input and produces 1 output. 3067 * 3068 * Input 3069 * A blob that can, in general, have any rank. However, depending on the value of "axis" , 3070 * there may be additional rank constraints. 3071 * Output 3072 * A blob with the same rank as the input. 3073 * 3074 * Sliced section is taken from the interval ``[startIndex, endIndex)``, i.e. 3075 * startIndex is inclusive while endIndex is exclusive. 3076 * stride must be positive and represents the step size for slicing. 3077 * Negative indexing is supported for startIndex and endIndex. 3078 * -1 denotes N-1, -2 denotes N-2 and so on, where N is the length of the dimension to be sliced. 3079 * 3080 */ 3081message SliceLayerParams { 3082 3083 int64 startIndex = 1; /// start of the sliced section. Inclusive. 3084 int64 endIndex = 2; /// end of sliced section. Exclusive. 3085 uint64 stride = 3; /// The step size. Must be positive. 3086 3087 enum SliceAxis { 3088 3089 CHANNEL_AXIS = 0; 3090 HEIGHT_AXIS = 1; 3091 WIDTH_AXIS = 2; 3092 3093 } 3094 // The following mapping is used for interpreting this parameter: 3095 // CHANNEL_AXIS => axis = -3, input must have rank at least 3. 3096 // HEIGHT_AXIS => axis = -2, input must have rank at least 2. 3097 // WIDTH_AXIS => axis = -1 3098 SliceAxis axis = 4; 3099 3100} 3101 3102/** 3103 * A layer that reduces the input using a specified operation. 3104 * 3105 * .. code:: 3106 * 3107 * y = ReduceLayer(x) 3108 * 3109 * Requires 1 input and produces 1 output. 3110 * 3111 * Input 3112 * A blob that can, in general, have any rank. However, depending on the value of "axis" , 3113 * there may be additional rank constraints. 3114 * Output 3115 * A blob with the same rank as the input, which has 1s on the dimensions specified in the parameter "axis" 3116 * 3117 * Values supported for axis are [-1], [-2], [-3], [-2,-1], [-3,-2,-1] 3118 * and the equivalent positive values (depending on the rank of the input) 3119 * For mode == 'ArgMax', axis must be [-1] or [-2] or [-3]. 3120 */ 3121message ReduceLayerParams { 3122 3123 /* 3124 * The following reduction operations are supported 3125 * and are applied on the specified axis of the input array: 3126 * 3127 * ``SUM`` 3128 * Sum of all elements 3129 * 3130 * .. math:: \sum{x_i} 3131 * 3132 * ``AVG`` 3133 * Sum of all elements divided by the number of elements 3134 * 3135 * .. math:: \dfrac{\sum^n{x_i}}{n} 3136 * 3137 * ``PROD`` 3138 * Product of all elements 3139 * 3140 * .. math:: \prod{x_i} 3141 * 3142 * ``LOGSUM`` 3143 * Sum of the natural logarithm of all elements 3144 * 3145 * .. math:: \sum{\ln{(x_i + \epsilon)}} 3146 * 3147 * ``SUMSQUARE`` 3148 * Sum of squares of all elements 3149 * 3150 * .. math:: \sum{x^2} 3151 * 3152 * ``L1`` 3153 * L1 normalization of all elements 3154 * 3155 * .. math:: ||x||_1 = \sum{|x_i|} 3156 * 3157 * ``L2`` 3158 * L2 normalization of all elements 3159 * 3160 * .. math:: ||x||_2 = \sqrt{\sum{x_i^2}} 3161 * 3162 * ``MAX`` 3163 * Maximum of all elements 3164 * 3165 * .. math:: \text{max}(x_i) 3166 * 3167 * ``MIN`` 3168 * Minumum of all elements 3169 * 3170 * .. math:: \text{min}(x_i) 3171 * 3172 * ``ARGMAX`` 3173 * Argument of the maximum of all elements 3174 * 3175 * .. math:: \text{argmax}(x_i) 3176 * 3177 */ 3178 enum ReduceOperation { 3179 3180 SUM = 0; 3181 AVG = 1; 3182 PROD = 2; 3183 LOGSUM = 3; 3184 SUMSQUARE = 4; 3185 L1 = 5; 3186 L2 = 6; 3187 MAX = 7; 3188 MIN = 8; 3189 ARGMAX = 9; /// only supported with axis = C, H or W. 3190 3191 } 3192 ReduceOperation mode = 1; /// Specifies function used to reduce. 3193 3194 /** 3195 * Used if mode is ``LOGSUM``. 3196 * Defaults to ``1e-6`` if not set or is set to ``0``. 3197 */ 3198 float epsilon = 2; 3199 3200 enum ReduceAxis { 3201 3202 CHW = 0; 3203 HW = 1; 3204 C = 2; 3205 H = 3; 3206 W = 4; 3207 3208 } 3209 3210 // The following mapping is used for interpreting this parameter: 3211 // CHW = axis [-3, -2, -1], input must have rank at least 3. 3212 // HW = axis [-2, -1], input must have rank at least 2. 3213 // C = axis [-3] 3214 // H = axis [-2] 3215 // W = axis [-1] 3216 ReduceAxis axis = 3; 3217 3218} 3219 3220/** 3221 * A layer that crops the spatial dimensions of an input. 3222 * If two inputs are provided, the shape of the second input is used as the reference shape. 3223 * 3224 * .. code:: 3225 * 3226 * y = CropLayer(x1) or y = CropLayer(x1,x2) 3227 * 3228 * Requires 1 or 2 inputs and produces 1 output. 3229 * 3230 * Input 3231 * 1 or 2 tensors, each with rank at least 3, both inputs must have equal rank. 3232 * Example: 3233 * - 1 input case: A blob with shape ``[C, H_in, W_in]``. 3234 * - 2 input case: 1st blob with shape ``[C, H_in, W_in]``, 2nd blob with shape ``[C, H_out, W_out]``. 3235 * 3236 * For ranks greater than 3, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch. 3237 * 3238 * Output 3239 * Same rank as the inputs. 3240 * e.g.: A blob with shape ``[C, H_out, W_out]``. 3241 * 3242 * If one input is used, output is computed as follows: 3243 * 3244 * .. code:: 3245 * 3246 * y = x1[:, topCropAmount:H_in - bottomCropAmount, leftCropAmount:W_in - rightCropAmount] 3247 * 3248 * topCropAmount == Height startEdgeSize == borderAmounts[0].startEdgeSize 3249 * bottomCropAmount == Height endEdgeSize == borderAmounts[0].endEdgeSize 3250 * leftCropAmount == Width startEdgeSize == borderAmounts[1].startEdgeSize 3251 * rightCropAmount == Width endEdgeSize == borderAmounts[1].endEdgeSize 3252 * 3253 * H_out = H_in - topCropAmount - bottomCropAmount 3254 * W_out = W_in - leftCropAmount - rightCropAmount 3255 * 3256 * If two inputs are used, output is computed as follows: 3257 * 3258 * .. code:: 3259 * 3260 * y = x1[:, offset[0]:offset[0] + H_out, offset[1]:offset[1] + W_out] 3261 */ 3262message CropLayerParams { 3263 3264 /** 3265 * The amounts to be cropped from the input. 3266 * Used only if a single input is provided. 3267 */ 3268 BorderAmounts cropAmounts = 1; 3269 3270 /** 3271 * The offset amounts. 3272 * Used only if two inputs are provided. 3273 * Must be of length 2, in order ``[H, W]``. 3274 */ 3275 repeated uint64 offset = 5; 3276 3277} 3278 3279/** 3280 * A layer that computes the elementwise average of the inputs. 3281 * This layer has limited broadcasting support. For general broadcasting see AddBroadcastableLayer. 3282 * 3283 * .. code:: 3284 * 3285 * y = AverageLayer(x1,x2,...) 3286 * 3287 * Requires multiple inputs and produces 1 output. 3288 * 3289 * Input 3290 * In general, there are no rank constraints. 3291 * However, only certain set of shapes are broadcastable. For example: 3292 * [B, 1, 1, 1], [B, C, 1, 1], [B, 1, H, W], [B, C, H, W] 3293 * Output 3294 * A blob with the same shape as each input. 3295 */ 3296message AverageLayerParams { 3297 3298} 3299 3300/** 3301 * A layer that computes the elementwise maximum over the inputs. 3302 * 3303 * .. code:: 3304 * 3305 * y = MaxLayer(x1,x2,...) 3306 * 3307 * Requires multiple inputs and produces 1 output. 3308 * 3309 * Input 3310 * In general, there are no rank constraints. 3311 * However, only certain set of shapes are broadcastable. For example: 3312 * [B, C, 1, 1], [B, C, H, W] 3313 * Output 3314 * A blob with the same shape as each input. 3315 */ 3316message MaxLayerParams { 3317 3318} 3319 3320/** 3321 * A layer that computes the elementwise minimum over the inputs. 3322 * 3323 * .. code:: 3324 * 3325 * y = MinLayer(x1,x2,...) 3326 * 3327 * Requires multiple inputs and produces 1 output. 3328 * 3329 * Input 3330 * In general, there are no rank constraints. 3331 * However, only certain set of shapes are broadcastable. For example: 3332 * [B, C, 1, 1], [B, C, H, W] 3333 * Output 3334 * A blob with the same shape as each input. 3335 */ 3336message MinLayerParams { 3337 3338} 3339 3340/** 3341 * A layer that computes the dot product of two vectors. 3342 * 3343 * .. code:: 3344 * 3345 * y = DotProductLayer(x1,x2) 3346 * 3347 * Requires 2 inputs and produces 1 output. 3348 * 3349 * Input 3350 * Two blobs with rank at least 3, such that the last two dimensions must be 1. 3351 * e.g.: blobs with shape ``[B, C, 1, 1]``. 3352 * For ranks greater than 3, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch. 3353 * 3354 * Output 3355 * Same rank as the input. 3356 * e.g. for rank 4 inputs, output shape: [B, 1, 1, 1] 3357 */ 3358message DotProductLayerParams { 3359 3360 /** 3361 * If true, inputs are normalized first, 3362 * thereby computing the cosine similarity. 3363 */ 3364 bool cosineSimilarity = 1; 3365 3366} 3367 3368/** 3369 * A layer that performs mean variance normalization, along axis = -3. 3370 * 3371 * .. code:: 3372 * 3373 * y = MeanVarianceNormalizeLayer(x) 3374 * 3375 * Requires 1 input and produces 1 output. 3376 * 3377 * Input 3378 * A blob with rank greater than equal to 3. 3379 * Example: Rank 4 blob represents [Batch, channels, height, width] 3380 * For ranks greater than 3, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch. 3381 * 3382 * Output 3383 * A blob with the same shape as the input. 3384 * 3385 * If ``acrossChannels == true`` 3386 * normalization is performed on flattened input, i.e. the input is reshaped to (Batch,C), where "Batch" contains 3387 * all dimensions from 0 to -4 (inclusive), and C contains dimensions -1, -2, -3. 3388 * 3389 * If ``acrossChannels == false`` 3390 * normalization is performed within a channel, 3391 * across spatial dimensions (i.e. last two dimensions). 3392 */ 3393message MeanVarianceNormalizeLayerParams { 3394 3395 /** 3396 * If true, mean and variance are computed across channels. 3397 */ 3398 bool acrossChannels = 1; 3399 3400 /** 3401 * If false, only mean is subtracted. 3402 */ 3403 bool normalizeVariance = 2; 3404 3405 /** 3406 * A small constant to avoid division by 0 while normalizing variance. 3407 * Defaults to ``1e-6`` if not set or set to ``0``. 3408 */ 3409 float epsilon = 3; 3410 3411} 3412 3413/** 3414 * A layer that repeats a sequence or the dimension sitting at axis = -5 3415 * 3416 * .. code:: 3417 * 3418 * y = SequenceRepeatLayer(x) 3419 * 3420 * Requires 1 input and produces 1 output. 3421 * 3422 * Input 3423 * A blob with rank at least 5. 3424 * e.g: shape ``[Seq, B, C, H, W]`` 3425 * Output 3426 * A blob with the same rank as the input. 3427 * e.g.: for input shape ``[Seq, B, C, H, W]``, output shape is ``[nRepetitions * Seq, B, C, H, W]``. 3428 */ 3429message SequenceRepeatLayerParams { 3430 3431 /** 3432 * Number of repetitions. 3433 * Defaults to ``1`` if not set or set to ``0``. 3434 */ 3435 uint64 nRepetitions = 1; 3436 3437} 3438 3439/// Recurrent Layers 3440/// ---------------- 3441 3442/* 3443 * The following activations are supported with recurrent layers: 3444 * - Linear 3445 * - Sigmoid 3446 * - Tanh 3447 * - ReLU 3448 * - Scaled Hyperbolic Tangent: alpha * tanh(beta * x), currently only supported for alpha = 1.7159, beta = 2/3 3449 * - Hard Sigmoid: min(max(alpha * x + beta, 0), 1), currently only supported for alpha = 0.2, beta = 0.5 3450 */ 3451 3452/** 3453 * A simple recurrent layer. 3454 * 3455 * .. code:: 3456 * 3457 * y_t = SimpleRecurrentLayer(x_t, y_{t-1}) 3458 * 3459 * Input 3460 * A blob of rank 5, with shape `[Seq, Batch, inputVectorSize, 1, 1]``. 3461 * This represents a sequence of vectors of size ``inputVectorSize``. 3462 * Output 3463 * Same rank as the input. 3464 * Represents a vector of size ``outputVectorSize``. It is either the final output or a sequence of outputs at all time steps. 3465 * 3466 * - Output Shape: ``[1, Batch, outputVectorSize, 1, 1]`` , if ``sequenceOutput == false`` 3467 * - Output Shape: ``[Seq, Batch, outputVectorSize, 1, 1]`` , if ``sequenceOutput == true`` 3468 * 3469 * This layer is described by the following equation: 3470 * 3471 * .. math:: 3472 * \boldsymbol{y_t} = f(\mathrm{clip}(W \boldsymbol{x_t} + \ 3473 * R \boldsymbol{y_{t-1}} + b)) 3474 * 3475 * - ``W`` is a 2-dimensional weight matrix 3476 * (``[outputVectorSize, inputVectorSize]``, row-major) 3477 * - ``R`` is a 2-dimensional recursion matrix 3478 * (``[outputVectorSize, outputVectorSize]``, row-major) 3479 * - ``b`` is a 1-dimensional bias vector (``[outputVectorSize]``) 3480 * - ``f()`` is an activation 3481 * - ``clip()`` is a function that constrains values between ``[-50.0, 50.0]`` 3482 */ 3483message SimpleRecurrentLayerParams { 3484 3485 uint64 inputVectorSize = 1; /// The size of the input vectors. 3486 uint64 outputVectorSize = 2; /// The size of the output vectors. 3487 3488 /** 3489 * Activations supported are Linear, Sigmoid, Tanh, ReLU, Scaled Tanh (alpha = 1.71, beta = 2/3), Hard sigmoid (alpha = 0.2, beta = 0.5) 3490 */ 3491 ActivationParams activation = 10; /// The activation function. 3492 3493 /** 3494 If false output is just the result after final state update. 3495 If true, output is a sequence, containing outputs at all time steps. 3496 */ 3497 bool sequenceOutput = 15; 3498 3499 bool hasBiasVector = 20; /// If false, no bias is added. 3500 3501 WeightParams weightMatrix = 30; /// Weight matrix W. 3502 WeightParams recursionMatrix = 31; /// Recursion Weight matrix R. 3503 WeightParams biasVector = 32; /// Bias vector b. 3504 3505 bool reverseInput = 100; 3506 // If true, then the node processes the input sequence from right to left 3507 3508} 3509 3510/** 3511 * Gated-Recurrent Unit (GRU) Layer 3512 * 3513 * .. code:: 3514 * 3515 * y_t = GRULayer(x_t, y_{t-1}) 3516 * 3517 * Input 3518 * A blob of rank 5, with shape `[Seq, Batch, inputVectorSize, 1, 1]``. 3519 * This represents a sequence of vectors of size ``inputVectorSize``. 3520 * Output 3521 * Same rank as the input. 3522 * Represents a vector of size ``outputVectorSize``. It is either the final output or a sequence of outputs at all time steps. 3523 * 3524 * - Output Shape: ``[1, Batch, outputVectorSize, 1, 1]`` , if ``sequenceOutput == false`` 3525 * - Output Shape: ``[Seq, Batch, outputVectorSize, 1, 1]`` , if ``sequenceOutput == true`` 3526 * 3527 * This layer is described by the following equations: 3528 * 3529 * Update Gate 3530 * .. math:: 3531 * \boldsymbol{z_t} = \ 3532 * f(\mathrm{clip}(W_z \boldsymbol{x_t} + \ 3533 * R_z \boldsymbol{y_{t-1}} + b_z) 3534 * 3535 * Reset Gate 3536 * .. math:: 3537 * \boldsymbol{r_t} = \ 3538 * f(\mathrm{clip}(W_r \boldsymbol{x_t} + \ 3539 * R_r \boldsymbol{y_{t-1}} + b_r)) 3540 * 3541 * Cell Memory State 3542 * .. math:: 3543 * \boldsymbol{c_t} = \ 3544 * \boldsymbol{y_{t-1}} \odot \boldsymbol{r_t} 3545 * 3546 * Output Gate 3547 * .. math:: 3548 * \boldsymbol{o_t} = \ 3549 * g(\mathrm{clip}(W_o \boldsymbol{x_t} + \ 3550 * R_o \boldsymbol{c_t} + b_o)) 3551 * 3552 * Output 3553 * .. math:: 3554 * \boldsymbol{y_t} = \ 3555 * (1 - \boldsymbol{z_t}) \odot \boldsymbol{o_t} + \ 3556 * \boldsymbol{z_t} \odot \boldsymbol{y_{t-1}} 3557 * 3558 * - ``W_z``, ``W_r``, ``W_o`` are 2-dimensional input weight matrices 3559 * (``[outputVectorSize, inputVectorSize]``, row-major) 3560 * - ``R_z``, ``R_r``, ``R_o`` are 2-dimensional recursion matrices 3561 * (``[outputVectorSize, outputVectorSize]``, row-major) 3562 * - ``b_z``, ``b_r``, ``b_o`` are 1-dimensional bias vectors 3563 * (``[outputVectorSize]``) 3564 * - ``f()``, ``g()`` are activations 3565 * - ``clip()`` is a function that constrains values between ``[-50.0, 50.0]`` 3566 * - ``⊙`` denotes the elementwise product of matrices 3567 */ 3568message GRULayerParams { 3569 3570 uint64 inputVectorSize = 1; /// Size of the input vectors. 3571 uint64 outputVectorSize = 2; /// Size of the output vectors. 3572 3573 /** 3574 * 2 element array representing activations [f(), g()] in that order. 3575 * Typical values used = [sigmoid, tanh]. 3576 * Activations supported are Linear, Sigmoid, Tanh, ReLU, Scaled Tanh (alpha = 1.71, beta = 2/3), Hard sigmoid (alpha = 0.2, beta = 0.5) 3577 */ 3578 repeated ActivationParams activations = 10; 3579 3580 /** 3581 * If false output is just the result after final state update. 3582 * If true, output is a sequence, containing outputs at all time steps. 3583 */ 3584 bool sequenceOutput = 15; 3585 3586 /** 3587 * If false, no biases (``b_z``, ``b_r``, ``b_o``) are added. 3588 */ 3589 bool hasBiasVectors = 20; 3590 3591 WeightParams updateGateWeightMatrix = 30; /// Weight Matrix W_z. 3592 WeightParams resetGateWeightMatrix = 31; /// Weight Matrix W_r. 3593 WeightParams outputGateWeightMatrix = 32; /// Weight Matrix W_o. 3594 3595 WeightParams updateGateRecursionMatrix = 50; /// Recursion Weight Matrix R_z. 3596 WeightParams resetGateRecursionMatrix = 51; /// Recursion Weight Matrix R_r. 3597 WeightParams outputGateRecursionMatrix = 52; /// Recursion Weight Matrix R_o. 3598 3599 WeightParams updateGateBiasVector = 70; /// Bias vector b_z. 3600 WeightParams resetGateBiasVector = 71; /// Bias vector b_r. 3601 WeightParams outputGateBiasVector = 72; /// Bias vector b_o. 3602 3603 /// If true, then the node processes the input sequence from right to left 3604 bool reverseInput = 100; 3605 3606} 3607 3608/** 3609 * Long short-term memory (LSTM) parameters. 3610 * 3611 * This is described by the following equations: 3612 * 3613 * Input Gate 3614 * .. math:: 3615 * \boldsymbol{i_t} = \ 3616 * f(\mathrm{clip}(W_i \boldsymbol{x_t} + \ 3617 * R_i \boldsymbol{y_{t-1}} + \ 3618 * p_i \odot c_{t-1} + b_i)) 3619 * 3620 * Forget Gate 3621 * .. math:: 3622 * \boldsymbol{f_t} = \ 3623 * f(\mathrm{clip}(W_f \boldsymbol{x_t} + \ 3624 * R_f \boldsymbol{y_{t-1}} + \ 3625 * p_f \odot c_{t-1} + b_f)) 3626 * 3627 * Block Input 3628 * .. math:: 3629 * \boldsymbol{z_t} = \ 3630 * g(\mathrm{clip}(W_z \boldsymbol{x_t} + \ 3631 * R_z \boldsymbol{y_{t-1}} + b_z)) 3632 * 3633 * Cell Memory State 3634 * .. math:: 3635 * \boldsymbol{c_t} = \ 3636 * \boldsymbol{c_{t-1}} \odot \boldsymbol{f_t} + \ 3637 * \boldsymbol{i_t} \odot \boldsymbol{z_t} 3638 * 3639 * Output Gate 3640 * .. math:: 3641 * \boldsymbol{o_t} = \ 3642 * f(\mathrm{clip}(W_o \boldsymbol{x_t} + \ 3643 * R_o \boldsymbol{y_{t-1}} + \ 3644 * p_o \odot c_t + b_o)) 3645 * 3646 * Output 3647 * .. math:: 3648 * \boldsymbol{y_t} = \ 3649 * h(\boldsymbol{c_t}) \odot \boldsymbol{o_t} 3650 * 3651 * - ``W_i``, ``W_f``, ``W_z``, ``W_o`` are 2-dimensional input weight matrices 3652 * (``[outputVectorSize, inputVectorSize]``, row-major) 3653 * - ``R_i``, ``R_f``, ``R_z``, ``R_o`` are 2-dimensional recursion matrices 3654 * (``[outputVectorSize, outputVectorSize]``, row-major) 3655 * - ``b_i``, ``b_f``, ``b_z``, ``b_o`` are 1-dimensional bias vectors 3656 * (``[outputVectorSize]``) 3657 * - ``p_``, ``p_f``, ``p_o`` are 1-dimensional peephole vectors 3658 * (``[outputVectorSize]``) 3659 * - ``f()``, ``g()``, ``h()`` are activations 3660 * - ``clip()`` is a function that constrains values between ``[-50.0, 50.0]`` 3661 * - ``⊙`` denotes the elementwise product of matrices 3662 */ 3663message LSTMParams { 3664 3665 /** 3666 * If true, output is a sequence, containing outputs at all time steps. 3667 * If false, output is just the result after final state update. 3668 */ 3669 bool sequenceOutput = 10; 3670 3671 /** 3672 * If false, no biases (``b_i``, ``b_f``, ``b_z``, ``b_o``) are added. 3673 */ 3674 bool hasBiasVectors = 20; 3675 3676 /** 3677 * If true, a vector of ``1`` values is added to ``b_f``. 3678 */ 3679 bool forgetBias = 30; 3680 3681 /** 3682 * If true, peephole vectors are included. 3683 */ 3684 bool hasPeepholeVectors = 40; 3685 3686 /** 3687 * If the coupled Input and Forget flag is on, the behaviour of 3688 * ``c_t`` is changed to the following (i.e. forget gate is not used): 3689 * 3690 * .. math:: 3691 * \boldsymbol{c_t} = \ 3692 * \boldsymbol{c_{t-1}} \odot (1 - \boldsymbol{i_t}) + \ 3693 * \boldsymbol{i_t} \odot \boldsymbol{z_t} 3694 * 3695 */ 3696 bool coupledInputAndForgetGate = 50; 3697 3698 /** 3699 * Places a limit on the maximum and minimum values of ``c_t``. 3700 * c_t = min(c_t, cellClipThreshold) 3701 * c_t = max(c_t, -cellClipThreshold) 3702 * If 0, it is set to its default value = 50.0. 3703 */ 3704 float cellClipThreshold = 60; 3705 3706} 3707 3708/** 3709 * Weights for long short-term memory (LSTM) layers 3710 */ 3711message LSTMWeightParams { 3712 3713 WeightParams inputGateWeightMatrix = 1; /// Weight Matrix W_i. 3714 WeightParams forgetGateWeightMatrix = 2; /// Weight Matrix W_f. 3715 WeightParams blockInputWeightMatrix = 3; /// Weight Matrix W_z. 3716 WeightParams outputGateWeightMatrix = 4; /// Weight Matrix W_o. 3717 3718 WeightParams inputGateRecursionMatrix = 20; /// Recursion Weight Matrix R_i. 3719 WeightParams forgetGateRecursionMatrix = 21; /// Recursion Weight Matrix R_f. 3720 WeightParams blockInputRecursionMatrix = 22; /// Recursion Weight Matrix R_z. 3721 WeightParams outputGateRecursionMatrix = 23; /// Recursion Weight Matrix R_o. 3722 3723 //biases: 3724 WeightParams inputGateBiasVector = 40; /// Bias vector b_i. 3725 WeightParams forgetGateBiasVector = 41; /// Bias vector b_f. 3726 WeightParams blockInputBiasVector = 42; /// Bias vector b_z. 3727 WeightParams outputGateBiasVector = 43; /// Bias vector b_o. 3728 3729 //peepholes: 3730 WeightParams inputGatePeepholeVector = 60; /// Peephole vector p_i. 3731 WeightParams forgetGatePeepholeVector = 61; /// Peephole vector p_f. 3732 WeightParams outputGatePeepholeVector = 62; /// Peephole vector p_o. 3733 3734} 3735 3736/** 3737 * A unidirectional long short-term memory (LSTM) layer. 3738 * 3739 * .. code:: 3740 * 3741 * (y_t, c_t) = UniDirectionalLSTMLayer(x_t, y_{t-1}, c_{t-1}) 3742 * 3743 * Input 3744 * A blob of rank 5, with shape `[Seq, Batch, inputVectorSize, 1, 1]``. 3745 * This represents a sequence of vectors of size ``inputVectorSize``. 3746 * Output 3747 * Same rank as the input. 3748 * Represents a vector of size ``outputVectorSize``. It is either the final output or a sequence of outputs at all time steps. 3749 * 3750 * - Output Shape: ``[1, Batch, outputVectorSize, 1, 1]`` , if ``sequenceOutput == false`` 3751 * - Output Shape: ``[Seq, Batch, outputVectorSize, 1, 1]`` , if ``sequenceOutput == true`` 3752 * 3753 */ 3754message UniDirectionalLSTMLayerParams { 3755 3756 uint64 inputVectorSize = 1; /// Size of the input vectors. 3757 uint64 outputVectorSize = 2; /// Size of the output vectors. 3758 3759 /** 3760 * 3 element array representing activations [f(),g(),h()] in that order. 3761 * Typical values used = [sigmoid, tanh, tanh]. 3762 * Activations supported are Linear, Sigmoid, Tanh, ReLU, Scaled Tanh (alpha = 1.71, beta = 2/3), Hard sigmoid (alpha = 0.2, beta = 0.5) 3763 */ 3764 repeated ActivationParams activations = 10; 3765 3766 LSTMParams params = 15; 3767 3768 LSTMWeightParams weightParams = 20; /// Weights, biases and peepholes. 3769 3770 /// If true, then the node processes the input sequence from right to left 3771 bool reverseInput = 100; 3772 3773} 3774 3775/** 3776 * Bidirectional long short-term memory (LSTM) layer 3777 * 3778 * .. code:: 3779 * 3780 * (y_t, c_t, y_t_reverse, c_t_reverse) = BiDirectionalLSTMLayer(x_t, y_{t-1}, c_{t-1}, y_{t-1}_reverse, c_{t-1}_reverse) 3781 * 3782 * Input 3783 * A blob of rank 5, with shape `[Seq, Batch, inputVectorSize, 1, 1]``. 3784 * This represents a sequence of vectors of size ``inputVectorSize``. 3785 * Output 3786 * Same rank as the input. 3787 * Represents a vector of size ``2 * outputVectorSize``. It is either the final output or a sequence of outputs at all time steps. 3788 * 3789 * - Output Shape: ``[1, Batch, 2 * outputVectorSize, 1, 1]`` , if ``sequenceOutput == false`` 3790 * - Output Shape: ``[Seq, Batch, 2 * outputVectorSize, 1, 1]`` , if ``sequenceOutput == true`` 3791 * 3792 * 3793 * The first LSTM operates on the input sequence in the forward direction. 3794 * The second LSTM operates on the input sequence in the reverse direction. 3795 * 3796 * Example: given the input sequence ``[x_1, x_2, x_3]``, 3797 * where ``x_i`` are vectors at time index ``i``: 3798 * 3799 * The forward LSTM output is ``[yf_1, yf_2, yf_3]``, 3800 * 3801 * where ``yf_i`` are vectors of size ``outputVectorSize``: 3802 * 3803 * - ``yf_1`` is the output at the end of sequence {``x_1``} 3804 * - ``yf_2`` is the output at the end of sequence {``x_1``, ``x_2``} 3805 * - ``yf_3`` is the output at the end of sequence {``x_1``, ``x_2``, ``x_3``} 3806 * 3807 * The backward LSTM output: ``[yb_1, yb_2, yb_3]``, 3808 * 3809 * where ``yb_i`` are vectors of size ``outputVectorSize``: 3810 * 3811 * - ``yb_1`` is the output at the end of sequence {``x_3``} 3812 * - ``yb_2`` is the output at the end of sequence {``x_3``, ``x_2``} 3813 * - ``yb_3`` is the output at the end of sequence {``x_3``, ``x_2``, ``x_1``} 3814 * 3815 * Output of the bi-dir layer: 3816 * 3817 * - if ``sequenceOutput = True`` : { ``[yf_1, yb_3]``, ``[yf_2, yb_2]``, ``[yf_3, yb_1]`` } 3818 * - if ``sequenceOutput = False`` : { ``[yf_3, yb_3]`` } 3819 */ 3820message BiDirectionalLSTMLayerParams { 3821 3822 /** 3823 * Size of the input vectors. 3824 */ 3825 uint64 inputVectorSize = 1; 3826 /** 3827 * Size of the outputs vectors. 3828 * It is same for both forward and backward LSTMs. 3829 */ 3830 uint64 outputVectorSize = 2; 3831 3832 /** 3833 * 3 element array representing activations [f(),g(),h()] in that order. 3834 * Typical values used = [sigmoid, tanh, tanh]. 3835 * Activations supported are Linear, Sigmoid, Tanh, ReLU, Scaled Tanh (alpha = 1.71, beta = 2/3), Hard sigmoid (alpha = 0.2, beta = 0.5) 3836 */ 3837 repeated ActivationParams activationsForwardLSTM = 10; 3838 /** 3839 * Currently, backward LSTM activations 3840 * must be same as the ones for the forward LSTM. 3841 */ 3842 repeated ActivationParams activationsBackwardLSTM = 11; 3843 3844 /** 3845 * Common parameters shared by the forward and backward LSTMs. 3846 */ 3847 LSTMParams params = 15; 3848 3849 /** 3850 * Weights and biases. 3851 * Must be a length 2 message, 3852 * for the forward and backward LSTM respectively. 3853 */ 3854 repeated LSTMWeightParams weightParams = 20; 3855 3856} 3857 3858message CustomLayerParams { 3859 3860 message CustomLayerParamValue { 3861 oneof value { 3862 double doubleValue = 10; 3863 string stringValue = 20; 3864 int32 intValue = 30; 3865 int64 longValue = 40; 3866 bool boolValue = 50; 3867 } 3868 } 3869 3870 string className = 10; // The name of the class (conforming to MLCustomLayer) corresponding to this layer 3871 repeated WeightParams weights = 20; // Any weights -- these are serialized in binary format and memmapped at runtime 3872 map<string, CustomLayerParamValue> parameters = 30; // these may be handled as strings, so this should not be large 3873 string description = 40; // An (optional) description of the layer provided by the model creator. This information is displayed when viewing the model, but does not affect the model's execution on device. 3874 3875} 3876 3877/** 3878 * A layer that rearranges the dimensions and data of an input. 3879 * 3880 * .. code:: 3881 * 3882 * y = TransposeLayer(x) 3883 * 3884 * Requires 1 input and produces 1 output. 3885 * 3886 * Input 3887 * A N-Dimensional tensor. 3888 * Output 3889 * A N-Dimensional tensor of the same rank but with dimensions and data permuted according to axes. 3890 * Shape: ``[InputShape[axis[0]], InputShape[axis[1]], ... , InputShape[axis[N-1]]]`` 3891 * 3892 * Examples: 3893 * 3894 * - If ``axes`` is set to ``[3, 1, 2, 0]`` and the input shape is ``[6,7,8,9]``, 3895 * then the output has shape ``[9,7,8,6]`` 3896 */ 3897 3898message TransposeLayerParams { 3899 3900 /** 3901 * Length of "axes" should match the rank of input & output tensor 3902 * "axes" should be a permutation of "[0,1,2,...,N-1]" where N is the rank. 3903 */ 3904 repeated uint64 axes = 1; // 3905 3906} 3907 3908/** 3909 * A layer that computes the matrix multiplication of two tensors with numpy-like broadcasting 3910 * where the matrices reside in the last two indices of the tensor. 3911 * 3912 * .. code:: 3913 * 3914 * y = BatchedMatMul(a,b) 3915 * 3916 * Requires 1 or 2 inputs and produces 1 output. 3917 * 3918 * The first tensor, "a", must be provided as an input. The second tensor can either be an input or provided as a weight matrix parameter. 3919 * 3920 * Input 3921 * - a: First N-Dimensional tensor 3922 * - b: Second N-Dimensional tensor (either a rank-N input or a matrix, i.e. N=2, provided as a layer parameter) 3923 * 3924 * Output 3925 * A tensor containing the matrix product of two tensors. 3926 * When there are two inputs: rank is max(2, rank(a), rank(b)) 3927 * When there is one input: rank is same as that of the input. 3928 * 3929 * This operation behaves as following: 3930 * 3931 * When there are two inputs: 3932 * - If N >= 2 for both tensors, it is treated as a batch of matrices residing in the last two indices. 3933 * All the indices, except for the last two, are broadcasted using conventional rules. 3934 * - If the first tensor is 1-D, it is converted to a 2-D tensor by prepending a 1 to its shape. Eg. (D) -> (1,D) 3935 * - If the second tensor is 1-D, it is converted to a 2-D tensor by appending a 1 to its shape. Eg. (D) -> (D,1) 3936 * 3937 * When there is one input: 3938 * - The weight matrix corresponds to a matrix, of shape (X1, X2). Values of X1, X2 must be provided as layer parameters. 3939 * - The input, "a", is reshaped into a matrix by combining all the leading dimensions, except the last, into a batch dimension. eg: 3940 * - if "a" is rank 1 (X1,) --> (1, X1). Output shape will be (X2,) 3941 * - if "a" is rank 2 (B1, X1) --> no need to reshape. Output shape will be (B1, X2) 3942 * - if "a" is rank 3 (B1, B2, X1) --> (B1 * B2, X1). Output shape will be (B1, B2, X2) 3943 * - etc 3944 */ 3945message BatchedMatMulLayerParams { 3946 3947 /** 3948 * If transposeA is true, it transposes the left matrix on the fly before matrix multiplication. 3949 * (is ignored when there is one input) 3950 */ 3951 bool transposeA = 1; 3952 /** 3953 * If transposeB is true, it transposes the right matrix on the fly before matrix multiplication. 3954 * (is ignored when there is one input) 3955 */ 3956 bool transposeB = 2; 3957 3958 /* 3959 * Following parameters are ignored when there are two inputs. 3960 */ 3961 3962 uint64 weightMatrixFirstDimension = 5; /// X1: same as the last dimension of the input tensor 3963 uint64 weightMatrixSecondDimension = 6; /// X2: same as the last dimension of the output tensor 3964 3965 bool hasBias = 7; /// Whether a bias is added or not. Supported only when there is one input. 3966 3967 /* 3968 * Weight matrix representing shape [X1, X2]. 3969 * Values are however stored in column major order, 3970 * in the "repeated float" or "bytes" fields of the message "WeightParams" 3971 */ 3972 WeightParams weights = 8; 3973 WeightParams bias = 9; /// Bias vector [X2]. Supported only when there is one input. 3974 3975 /** 3976 * If set, this layer, at runtime, quantizes the floating point input blob to int8 before applying the 3977 * matrix multiplication using the INT8 weight parameters provided in weights->int8RawValue. The 3978 * result is then dequantized. 3979 * Requires: 3980 * * number of inputs to be 1 3981 * * hasBias == false 3982 * * QuantizationType == LinearQuantizationParams, such that 3983 * * size of the "scale" field is 1 and "bias" field is empty in "LinearQuantizationParams" 3984 * * numberOfBits == 8 3985 * * weights->rawValue_size to be empty 3986 */ 3987 bool int8DynamicQuantize = 10; 3988 3989} 3990 3991/** 3992 * A layer that concatenates a list of tensors along a specified axis. 3993 * 3994 * .. code:: 3995 * 3996 * y = ConcatNDLayer(x1,x2,....) 3997 * 3998 * Requires at least 2 input and produces 1 output. 3999 * 4000 * Input 4001 * The rank of the input tensors must match and all dimensions also must match, except for the dimension 'axis'. 4002 * 4003 * 4004 * Output 4005 * Same rank as the input. The dimension along "axis", is the sum of the dimensions of the inputs. 4006 * 4007 * example: 4008 * 4009 * in1 : shape (3, 2), value = [[1, 2], [3, 4], [5, 6]] 4010 * in2 : shape (3, 2), value = [[7, 8], [9, 10], [11, 12]] 4011 * axis = 0 4012 * 4013 * if interleave = False (default) 4014 * output : shape (6, 2) 4015 * output[0:3, :] = in1 4016 * output[3:6, :] = in2 4017 * value = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12]] 4018 * 4019 * if interleave = True 4020 * output : shape (6, 2) 4021 * output[0::2, :] = in1 4022 * output[1::2, :] = in2 4023 * value = [[1, 2], [7, 8], [3, 4], [9, 10], [5, 6], [11, 12]] 4024 * 4025 */ 4026message ConcatNDLayerParams { 4027 4028 /** 4029 * Dimension along which to concatenate. Supports negative values of the parameter 'axis'. 4030 */ 4031 int64 axis = 1; 4032 4033 /** 4034 * (Only available in Core ML Specification >= 5 (iOS >= 14, macOS >= 11.0) 4035 * Interleave option. If True, concatenation is done via interleaving the inputs. 4036 * This requires all inputs to have the exact same shape. 4037 */ 4038 bool interleave = 2; 4039 4040 4041} 4042 4043/** 4044 * A layer that performs softmax normalization along a specified axis. 4045 * 4046 * .. code:: 4047 * 4048 * y = SoftmaxNDLayer(x) 4049 * 4050 * Requires 1 input and produces 1 output. 4051 * 4052 * Output shape is same as the input. 4053 */ 4054message SoftmaxNDLayerParams { 4055 4056 /** 4057 * Dimension on which the softmax would be performed. Supports negative values of the parameter 'axis'. 4058 */ 4059 int64 axis = 1; 4060 4061} 4062 4063/** 4064 * A layer that reverses specific dimensions of the input tensor. 4065 * It is similar in functionality to the numpy.flip method. 4066 * 4067 * Requires 1 input and produces 1 output. 4068 * Output shape is same as the input. 4069 */ 4070message ReverseLayerParams { 4071 4072 /** 4073 * Reverses each dimension of the input tensor for which corresponding reverseDim is set to True. 4074 * Requires len(reverseDim) == rank(inputTensor) 4075 */ 4076 repeated bool reverseDim = 1; 4077 4078} 4079 4080/** 4081 * A layer that reverses variable length slices. 4082 * 4083 * Requires 2 inputs and produces 1 output. 4084 * 4085 * 2 inputs, in order are denoted by "data", "seq_lengths". 4086 * "seq_lenghts" must be a rank 1 tensor, i.e. seq_lengths.shape = (B,) 4087 * which contains the lengths of the amount of sequence to be reversed, for each element of the batch. 4088 * Dimension "batchAxis" in "data" must be equal to B, i.e, 4089 * data.shape[batchAxis] = B. 4090 * 4091 * According to the batch axis, input "data" is first divided into a batch of B inputs, 4092 * each of which is flipped along the dimension "sequenceAxis", by the amount specified in 4093 * "seq_lengths", the second input. 4094 * 4095 * e.g.: 4096 * 4097 * data [shape = (2,4)]: 4098 * [0 1 2 3] 4099 * [4 5 6 7] 4100 * seq_lengths [shape = (2,)]: 4101 * [3, 0] 4102 * batchAxis = 0 4103 * sequenceAxis = 1 4104 * 4105 * output [shape = (2,4)]: 4106 * [2 1 0 3] 4107 * [4 5 6 7] 4108 * 4109 * 4110 * data [shape = (2,3,2)]: 4111 * [0 1] 4112 * [2 3] 4113 * [4 5] (slice = 0) 4114 * [6 7] 4115 * [8 9] 4116 * [10 11] (slice = 1) 4117 * seq_lengths [shape = (2,)]: 4118 * [2, 3] 4119 * batchAxis = 0 4120 * sequenceAxis = 1 4121 * 4122 * output [shape = (2,3,2)]: 4123 * [2 3] 4124 * [0 1] 4125 * [4 5] (slice = 0) 4126 * [10 11] 4127 * [8 9] 4128 * [6 7] (slice = 1) 4129 * 4130 * Output shape is same as the input. 4131 */ 4132message ReverseSeqLayerParams { 4133 4134 int64 batchAxis = 1; // batch axis has to be strictly less than seq_axis 4135 int64 sequenceAxis = 2; 4136 4137} 4138 4139/** 4140 * A layer that loads data as a parameter and provides it as an output. 4141 * 4142 * .. code:: 4143 * 4144 * y = LoadConstantNDLayer() 4145 * 4146 * Requires no input and produces 1 output. 4147 * 4148 * Output: A tensor with shape as provided in the parameter "shape" 4149 */ 4150message LoadConstantNDLayerParams { 4151 4152 /** 4153 * The shape of the constant to be loaded. 4154 */ 4155 repeated uint64 shape = 1; 4156 WeightParams data = 2; 4157 4158} 4159 4160/** 4161 * A layer that generates an output tensor with a constant value. 4162 * Input is only used to determine the shape of the output. 4163 * This layer is used to allocate a tensor with a dynamic shape (that of the input) and constant value. 4164 * 4165 * Requires 1 input and produces 1 output. 4166 * 4167 * .. code:: 4168 * 4169 * y = FillLikeLayer(x) 4170 * 4171 * Input 4172 * A N-Dimensional tensor, whose values are ignored. Only the shape is used to 4173 * infer the shape of the output. 4174 * 4175 * Output 4176 * A N-Dimensional tensor with the same shape as the input tensor. 4177 * 4178 */ 4179message FillLikeLayerParams { 4180 4181 float value = 1; 4182 4183} 4184 4185/** 4186 * A layer that generates an output tensor with a constant value. 4187 * This layer is used to allocate a tensor with a static shape and constant value. 4188 * 4189 * Requires no input and produces 1 output. 4190 * 4191 * .. code:: 4192 * 4193 * y = FillStaticLayer(x) 4194 * 4195 * Output 4196 * A N-Dimensional tensor of shape "targetShape". 4197 * 4198 */ 4199message FillStaticLayerParams { 4200 4201 float value = 1; 4202 repeated uint64 targetShape = 2; 4203 4204} 4205 4206/** 4207 * A layer that generates an output tensor with a constant value. 4208 * This layer is used to allocate a tensor with a dynamic shape (as specified by the input) and constant value. 4209 * 4210 * Requires 1 input and produces 1 output. 4211 * 4212 * .. code:: 4213 * 4214 * y = FillDynamicLayer(x) 4215 * 4216 * Input 4217 * A rank 1 tensor specifying the shape of the output 4218 * 4219 * Output 4220 * An N-Dimensional tensor with the shape specified by the values in the input tensor. 4221 * 4222 */ 4223message FillDynamicLayerParams { 4224 4225 float value = 1; 4226 4227} 4228 4229/** 4230 * A layer that returns the elements either from tensor x or tensor y, 4231 * depending on the value in the condition tensor. 4232 * It is similar in functionality to the numpy.where method with 3 inputs. 4233 * 4234 * Requires 3 inputs and produces 1 output. 4235 * Inputs, in order, are the condition tensor, x and y. 4236 * 4237 * for each vector index (i,...,j): 4238 * output[i,...,j] = x[i,...,j] if condition[i,...,j] = True 4239 * y[i,...,j] if condition[i,...,j] = False 4240 * 4241 * All the 3 inputs are first broadcasted to a common shape. 4242 * (the shapes must be broadcastable) 4243 * 4244 * output.rank = max(input[0].rank, input[1].rank, input[2].rank) 4245 * 4246 */ 4247message WhereBroadcastableLayerParams { 4248 4249} 4250 4251/** 4252 * A layer that computes elementwise trigonometric sine function. 4253 * 4254 * 4255 * .. code:: 4256 * 4257 * y = SinLayer(x) 4258 * 4259 * Requires 1 input and produces 1 output. 4260 * Output shape is same as the input. 4261 * 4262 */ 4263message SinLayerParams { 4264 4265} 4266 4267/** 4268 * A layer that computes elementwise trigonometric cosine function. 4269 * 4270 * 4271 * .. code:: 4272 * 4273 * y = CosLayer(x) 4274 * 4275 * Requires 1 input and produces 1 output. 4276 * Output shape is same as the input. 4277 * 4278 */ 4279message CosLayerParams { 4280 4281} 4282 4283/** 4284 * A layer that computes elementwise trigonometric tangent function. 4285 * 4286 * 4287 * .. code:: 4288 * 4289 * y = TanLayer(x) 4290 * 4291 * Requires 1 input and produces 1 output. 4292 * Output shape is same as the input. 4293 * 4294 */ 4295message TanLayerParams { 4296 4297} 4298 4299/** 4300 * A layer that computes elementwise trigonometric arcsine function. 4301 * 4302 * 4303 * .. code:: 4304 * 4305 * y = AsinLayer(x) 4306 * 4307 * Requires 1 input and produces 1 output. 4308 * Output shape is same as the input. 4309 * 4310 */ 4311message AsinLayerParams { 4312 4313} 4314 4315/** 4316 * A layer that computes elementwise trigonometric arccosine function. 4317 * 4318 * 4319 * .. code:: 4320 * 4321 * y = AcosLayer(x) 4322 * 4323 * Requires 1 input and produces 1 output. 4324 * Output shape is same as the input. 4325 * 4326 */ 4327message AcosLayerParams { 4328 4329} 4330 4331/** 4332 * A layer that computes elementwise trigonometric arctangent function. 4333 * 4334 * 4335 * .. code:: 4336 * 4337 * y = AtanLayer(x) 4338 * 4339 * Requires 1 input and produces 1 output. 4340 * Output shape is same as the input. 4341 * 4342 */ 4343message AtanLayerParams { 4344 4345} 4346 4347/** 4348 * A layer that computes elementwise trigonometric hyperbolic sine function. 4349 * 4350 * 4351 * .. code:: 4352 * 4353 * y = SinhLayer(x) 4354 * 4355 * Requires 1 input and produces 1 output. 4356 * Output shape is same as the input. 4357 * 4358 */ 4359message SinhLayerParams { 4360 4361} 4362 4363/** 4364 * A layer that computes elementwise trigonometric hyperbolic cosine function. 4365 * 4366 * 4367 * .. code:: 4368 * 4369 * y = CoshLayer(x) 4370 * 4371 * Requires 1 input and produces 1 output. 4372 * Output shape is same as the input. 4373 * 4374 */ 4375message CoshLayerParams { 4376 4377} 4378 4379/** 4380 * A layer that computes elementwise trigonometric hyperbolic tangent function. 4381 * 4382 * 4383 * .. code:: 4384 * 4385 * y = TanhLayer(x) 4386 * 4387 * Requires 1 input and produces 1 output. 4388 * Output shape is same as the input. 4389 * 4390 */ 4391message TanhLayerParams { 4392 4393} 4394 4395/** 4396 * A layer that computes elementwise trigonometric hyperbolic arcsine function. 4397 * 4398 * 4399 * .. code:: 4400 * 4401 * y = AsinhLayer(x) 4402 * 4403 * Requires 1 input and produces 1 output. 4404 * Output shape is same as the input. 4405 * 4406 */ 4407message AsinhLayerParams { 4408 4409} 4410 4411/** 4412 * A layer that computes elementwise trigonometric hyperbolic arccosine function. 4413 * 4414 * 4415 * .. code:: 4416 * 4417 * y = AcoshLayer(x) 4418 * 4419 * Requires 1 input and produces 1 output. 4420 * Output shape is same as the input. 4421 * 4422 */ 4423message AcoshLayerParams { 4424 4425} 4426 4427/** 4428 * A layer that computes elementwise trigonometric hyperbolic arctangent function. 4429 * 4430 * 4431 * .. code:: 4432 * 4433 * y = AtanhLayer(x) 4434 * 4435 * Requires 1 input and produces 1 output. 4436 * Output shape is same as the input. 4437 * 4438 */ 4439message AtanhLayerParams { 4440 4441} 4442/** 4443 * A layer that raises each element in first tensor to the power of 4444 * corresponding element in the second tensor. 4445 * Supports conventional numpy-like broadcasting. 4446 * 4447 * .. code:: 4448 * 4449 * y = PowBroadcastableLayer(x) 4450 * 4451 * Requires 2 inputs and produces 1 output. 4452 * 4453 * Input 4454 * - First N-Dimensional tensor 4455 * - Second N-Dimensional tensor 4456 * 4457 * Output 4458 * An N-Dimensional tensor with the broadcast shape. 4459 * 4460 */ 4461message PowBroadcastableLayerParams { 4462 4463} 4464 4465/** 4466 * A layer that computes the exponential of all elements in the input tensor, with the base 2. 4467 * 4468 * 4469 * .. code:: 4470 * 4471 * y = Exp2Layer(x) 4472 * 4473 * Requires 1 input and produces 1 output. 4474 * Output shape is same as the input. 4475 * 4476 */ 4477message Exp2LayerParams { 4478 4479} 4480 4481/** 4482 * A layer that returns a tensor containing the indices of all non-zero 4483 * elements of input tensor. 4484 * It is similar in functionality to the numpy.where method with 1 input. 4485 * 4486 * Requires 1 input and produces 1 output. 4487 * Output is of rank 2, of shape (N,R), 4488 * where N is the number of non-zero elements in the input and R is the rank of the input. 4489 * 4490 * Output contains indices represented in the multi-index form 4491 * 4492 * e.g.: 4493 * input {shape = (4,)}: 4494 * [0 1 0 2] 4495 * output {shape = (2,1)}: 4496 * [1] 4497 * [3] 4498 * 4499 * 4500 * input {shape = (3, 3)}: 4501 * [1 2 1] 4502 * [0 2 2] 4503 * [2 1 0] 4504 * output {shape = (7,1)}: 4505 * [0. 0.] 4506 * [0. 1.] 4507 * [0. 2.] 4508 * [1. 1.] 4509 * [1. 2.] 4510 * [2. 0.] 4511 * [2. 1.] 4512 * 4513 */ 4514message WhereNonZeroLayerParams { 4515 4516} 4517 4518/** 4519 * A layer that copies a tensor setting everything outside a central band in 4520 * each inner-most matrix to zero. 4521 * 4522 * Requires 1 input and produces 1 output. 4523 * 4524 * Parameters for matrix_band_part layer 4525 * band(m, n) = (num_lower < 0 || (m-n) <= num_lower) && (num_upper < 0 || (n-m) <= num_upper). 4526 * output[i, j, k, ..., m, n] = band(m, n) * input[i, j, k, ..., m, n] 4527 * 4528 * 4529 * Output shape is same as the input shape. 4530 * Rank of the input must be at least 2. 4531 * For rank higher than 2, the last 2 dimensions are treated as the matrix, while the rest are treated as batch. 4532 */ 4533message MatrixBandPartLayerParams { 4534 4535 int64 numLower = 1; 4536 int64 numUpper = 2; 4537 4538} 4539 4540/** 4541 * A layer that copies a tensor setting everything outside upper triangular to zero. 4542 * 4543 * Requires 1 input and produces 1 output. 4544 * 4545 * Output shape is same as the input shape. 4546 * Rank of the input must be at least 2. 4547 * For rank higher than 2, the last 2 dimensions are treated as the matrix, while the rest are treated as batch. 4548 */ 4549message UpperTriangularLayerParams { 4550 4551 int64 k = 1; // Diagonal below which to zero elements. k = 0 (the default) is the main diagonal, k < 0 is below it and k > 0 is above 4552 4553} 4554 4555/** 4556 * A layer that copies a tensor setting everything outside lower triangular to zero. 4557 * 4558 * Requires 1 input and produces 1 output. 4559 * 4560 * Output shape is same as the input shape. 4561 * Rank of the input must be at least 2. 4562 * For rank higher than 2, the last 2 dimensions are treated as the matrix, while the rest are treated as batch. 4563 */ 4564message LowerTriangularLayerParams { 4565 4566 int64 k = 1; // Diagonal above which to zero elements. k = 0 (the default) is the main diagonal, k < 0 is below it and k > 0 is above 4567 4568} 4569 4570/** 4571 * 4572 * A layer that broadcasts a tensor to a new shape. 4573 * 4574 * Requires 2 inputs and produces 1 output. 4575 * 4576 * First input is broadcast to produce the output, while the second input is only 4577 * used to determine the shape of the output. Values of second input are not used. 4578 * 4579 * Output is a tensor with the same shape as the second input. 4580 * 4581 */ 4582message BroadcastToLikeLayerParams { 4583 4584} 4585 4586/** 4587 * 4588 * A layer that broadcasts a tensor to a new shape. 4589 * 4590 * Requires 1 input and produces 1 output. 4591 * 4592 * Output tensor is the broadcasted version of the input and has shape as specified in the 4593 * parameter "targetShape". 4594 */ 4595message BroadcastToStaticLayerParams { 4596 4597 repeated uint64 targetShape = 1; 4598 4599} 4600 4601/** 4602 * 4603 * A layer that broadcasts a tensor to a new shape. 4604 * 4605 * Requires 2 inputs and produces 1 output. 4606 * 4607 * First input is the one that is broadcasted to produce the output. 4608 * Second input is a rank 1 tensor specifying the shape of the output. 4609 * Output tensor has shape as specified by the values in the 2nd input tensor. 4610 */ 4611message BroadcastToDynamicLayerParams { 4612 4613} 4614 4615/** 4616 * A layer that performs element-wise addition operation with broadcast support. 4617 * 4618 * Requires 2 inputs and produces 1 output. 4619 */ 4620message AddBroadcastableLayerParams { 4621 4622} 4623 4624/** 4625 * A layer that performs element-wise maximum operation with broadcast support. 4626 * 4627 * Requires 2 inputs and produces 1 output. 4628 */ 4629message MaxBroadcastableLayerParams { 4630 4631} 4632 4633/** 4634 * A layer that performs element-wise minimum operation with broadcast support. 4635 * 4636 * Requires 2 inputs and produces 1 output. 4637 */ 4638message MinBroadcastableLayerParams { 4639 4640} 4641 4642/** 4643 * A layer that performs element-wise modular operation with broadcast support. 4644 * 4645 * Requires 2 inputs and produces 1 output. 4646 */ 4647message ModBroadcastableLayerParams { 4648 4649} 4650 4651/** 4652 * A layer that performs element-wise floor division operation with broadcast support. 4653 * 4654 * Requires 2 inputs and produces 1 output. 4655 */ 4656message FloorDivBroadcastableLayerParams { 4657 4658} 4659 4660/** 4661 * A layer that performs element-wise subtract operation with broadcast support. 4662 * 4663 * Requires 2 inputs and produces 1 output. 4664 */ 4665message SubtractBroadcastableLayerParams { 4666 4667} 4668 4669/** 4670 * A layer that performs element-wise multiply operation with broadcast support. 4671 * 4672 * Requires 2 inputs and produces 1 output. 4673 */ 4674message MultiplyBroadcastableLayerParams { 4675 4676} 4677 4678/** 4679 * A layer that performs element-wise division operation with broadcast support. 4680 * 4681 * Requires 2 inputs and produces 1 output. 4682 */ 4683message DivideBroadcastableLayerParams { 4684 4685} 4686 4687/** 4688 * Gather layer that gathers elements from the first input, along a specified axis, 4689 * at indices specified in the second input. 4690 * It is similar in functionality to the numpy.take method. 4691 * 4692 * Requires 2 inputs and produces 1 output. 4693 * 4694 * Given two inputs, 'data' and 'indices', gather the slices of 'data' 4695 * and store into output. 4696 * e.g. 4697 * for i in [0, length(indices) - 1] 4698 * output[i] = data[indices[i]] (1-D case, axis=0) 4699 * 4700 * if axis = 0: 4701 * for each vector index (i,...,j) 4702 * output[i,...,j,:,..,:] = data[indices[i,...,j],:,..,:] 4703 * 4704 * output.rank = (data.rank - 1) + indices.rank 4705 * 4706 * Negative indices and negative axis are supported. 4707 * 4708 * e.g: 4709 * 4710 * data shape = (2, 3) 4711 * indices shape = (6, 8) 4712 * axis = 0 4713 * output shape = (6, 8) + (3,) = (6, 8, 3) 4714 * 4715 * data shape = (2, 3, 5) 4716 * indices shape = (6, 8) 4717 * axis = 1 4718 * output shape = (2,) + (6, 8) + (5,) = (2, 6, 8, 5) 4719 * 4720 */ 4721message GatherLayerParams { 4722 4723 int64 axis = 1; 4724 4725} 4726 4727/* 4728 * Scatter accumulation mode. 4729 */ 4730enum ScatterMode { 4731 4732 SCATTER_UPDATE = 0; 4733 SCATTER_ADD = 1; /// add 4734 SCATTER_SUB = 2; /// subtract 4735 SCATTER_MUL = 3; /// multiply 4736 SCATTER_DIV = 4; /// divide 4737 SCATTER_MAX = 5; /// maximum 4738 SCATTER_MIN = 6; /// minimum 4739 4740} 4741 4742/* 4743 * A layer that scatters data into a new tensor according to indices from the input. 4744 * This is the inverse operation of Gather. 4745 * 4746 * Requires 3 inputs and produces 1 output. 4747 * 4748 * Output is initialized with the first input. 4749 * Then updated with the values in the third input, at indices specified by the second input. 4750 * 4751 * An example when axis=0: 4752 * Given three inputs, in order, "container", "indices", "updates", where 4753 * 4754 * - "container" is a rank R+1 tensor of shape [D_0, D_1, ..., D_R], which 4755 * contains D_0 number of tensors, each with shape [D_1, ..., D_R]. 4756 * 4757 * - "indices" is a rank 1 tensor with shape [N], where N is the number of updates. 4758 * The values in this tensor must be in the range [0, D_0 - 1]. (negative indexing is supported) 4759 * 4760 * - "updates" is a rank R+1 tensor with shape [N, D_1, ..., D_R], which represents 4761 * a total number of N tensors, each of shape [D_1, ..., D_R]. 4762 * 4763 * The effect of this operation is as follows: 4764 * 4765 * output = container; 4766 * For each i in 0, ..., N - 1 4767 * output[indices[i], :, ..., :] = updates[i, :, ..., :] // if mode == "SCATTER_UPDATE" 4768 * 4769 * or 4770 * For each i in 0, ..., N - 1 4771 * output[indices[i], :, ..., :] += updates[i, :, ..., :] // if mode == "SCATTER_ADD" 4772 * 4773 * etc 4774 * 4775 * When "indices" is a tensor of rank greater than 1, the equation becomes (for axis=0): 4776 * For each vector index (i,...,j) 4777 * output[indices[i,...,j],...] -= updates[i,...,j,...] // if mode == "SCATTER_SUB" 4778 * 4779 * 4780 * The output has the same shape as the first input. 4781 * "indices" input must have rank less than or equal to the "updates" input and its shape 4782 * must be a subset of the the shape of the "updates" input. 4783 * 4784 * e.g: 4785 * 4786 * container shape = (4, 3) 4787 * indices shape = (5, 2, 3) 4788 * updates shape = (4, 5, 2, 3) 4789 * axis = 1 4790 * output shape = (4, 3) 4791 * 4792 * container shape = (4, 4, 3) 4793 * indices shape = (6,) 4794 * updates shape = (4, 6, 3) 4795 * axis = -2 4796 * output shape = (4, 4, 3) 4797 * 4798 * container shape = (5,) 4799 * indices shape = (5, 7, 5, 6) 4800 * updates shape = (5, 7, 5, 6) 4801 * axis = -1 4802 * output shape = (5,) 4803 */ 4804 4805message ScatterLayerParams { 4806 4807 int64 axis = 1; 4808 ScatterMode mode = 2; /// mode of accumulation. 4809 4810} 4811 4812/** 4813 * A layer that gathers elements from the first input, 'params', at the multi-indices specified 4814 * by the second input, 'indices'. 4815 * 4816 * Requires 2 inputs and produces 1 output. 4817 * 4818 * 'params' = input[0], 'indices' = input[1] 4819 * 4820 * 'indices' is a rank K+1 tensor of shape [I_0, I_1, .., I_(K-1), I_K] which is viewed as a collection of 4821 * indices of (I_0 * I_1 * ... * I_(K-1)) points in the I_K dimensional space. For instance, the multi-index of the first point 4822 * is indices[0,0,...,0,:]. 4823 * 4824 * Here is how the output is constructed: 4825 * 4826 * for i = 0,1,...,(I_0-1) 4827 * ... 4828 * for j = 0,1,....,(I_(K-1)-1) 4829 * output[i,....,j,:,:,..,:] = params[indices[i,...,j,:], :,:,..,:] 4830 * 4831 * Hence, output shape is [I_0, I_1,...,I(K-1)] + params.shape[I_K:] 4832 * 4833 * output.rank = indices.rank - 1 + params.rank - indices.shape[-1] 4834 * 4835 * e.g: 4836 * 4837 * input[0] shape = (4, 2, 3, 4) 4838 * input[1] shape = (6, 2) 4839 * output shape = (6,) + (3, 4) = (6, 3, 4) 4840 * 4841 * input[0] shape = (3, 3, 3, 4, 7) 4842 * input[1] shape = (3, 5) 4843 * output shape = (3,) + () = (3,) 4844 * 4845 * input[0] shape = (5, 3, 2, 5) 4846 * input[1] shape = (2, 7, 3, 2) 4847 * output shape = (2, 7, 3) + (2, 5) = (2, 7, 3, 2, 5) 4848 * 4849 */ 4850message GatherNDLayerParams { 4851 4852} 4853 4854/* 4855 * A layer that scatters data into a new tensor according to multi-indices from the input. 4856 * This is the inverse operation of GatherND. 4857 * 4858 * Requires 3 inputs and produces 1 output. 4859 * 3 inputs, in order are denoted as "container", "indices", "updates". 4860 * 4861 * 'indices' is a rank K+1 tensor of shape [I_0, I_1, .., I_(K-1), I_K] which is viewed as a collection of 4862 * indices of (I_0 * I_1 * ... * I_(K-1)) points in the I_K dimensional space. For instance, the multi-index of the first point 4863 * is indices[0,0,...,0,:]. 4864 * 4865 * container.rank >= I_K 4866 * updates.rank = K + (container.rank - I_K) 4867 * shape of 'updates' = [I_0, I_1,...,I(K-1)] + container.shape[I_K:] 4868 * 4869 * output = container 4870 * For each vector index (i,...,j) s.t. 0<=i<I_0,..., 0<=j<I_K 4871 * output[indices[i,...,j,:], :,:,..,:] = updates[i,....,j,:,:,..,:] // if mode == "SCATTER_UPDATE" 4872 * 4873 * The output has the same shape as the first input. 4874 * 4875 * e.g: 4876 * 4877 * container shape = (3, 2) 4878 * indices shape = (4, 2) 4879 * updates shape = (4,) 4880 * output shape = (3, 2) 4881 * 4882 * container shape = (7, 6) 4883 * indices shape = (4, 7, 2, 5, 1) 4884 * updates shape = (4, 7, 2, 5, 6) 4885 * output shape = (7, 6) 4886 * 4887 */ 4888message ScatterNDLayerParams { 4889 4890 ScatterMode mode = 1; /// mode of accumulation. 4891 4892} 4893 4894/** 4895 * Gather layer that gathers elements from the first input, along a specified axis, 4896 * at indices specified in the second input. 4897 * It is similar in functionality to the numpy.take_along_axis method. 4898 * 4899 * Requires 2 inputs and produces 1 output. 4900 * 4901 * Given two inputs, 'data' and 'indices', gather the slices of 'data' 4902 * and store into output. 4903 * 4904 * Both inputs and output have the same rank. 4905 * Output shape is same as the shape of 'indices' 4906 * Shapes of 'indices' and 'data' match, except at the 'axis' dimension. 4907 * 4908 * This operation performs the following operation for axis=0: 4909 * for each vector index (i,j,....,k) 4910 * output[i,j,....,k] = data[index[i,j,....,k],j,....,k] 4911 * 4912 * Negative indices and negative axis are supported. 4913 * 4914 * e.g: 4915 * 4916 * data shape = (4, 4, 7) 4917 * indices shape = (4, 5, 7) 4918 * axis = 1 4919 * output shape = (4, 5, 7) 4920 * 4921 */ 4922message GatherAlongAxisLayerParams { 4923 4924 int64 axis = 1; 4925 4926} 4927 4928/** 4929 * A layer that scatters data into a new tensor according to indices from 4930 * the input along the given axis into the output tensor. 4931 * This is the inverse operation of GatherAlongAxis. 4932 * It is similar in functionality to the numpy.put_along_axis method. 4933 * 4934 * Requires 3 inputs and produces 1 output. 4935 * 3 inputs, in order are denoted as "container", "indices", "updates". 4936 * 4937 * All inputs and output have the same rank. 4938 * Output shape is same as the shape of 'container' 4939 * Shapes of 'indices' and 'updates' match, which is same as the shape of 'container' except at the 'axis' dimension. 4940 * 4941 * Negative indices and negative axis are supported. 4942 * 4943 * This operation performs the following operation for axis=0: 4944 * output = container 4945 * for each vector index (i,j,....,k) 4946 * output[index[i,j,....,k],j,....,k] = updates[i,j,....,k] 4947 * 4948 * e.g.: 4949 * 4950 * container shape = (2, 5, 6) 4951 * indices shape = (2, 2, 6) 4952 * updates shape = (2, 2, 6) 4953 * axis = -2 4954 * output shape = (2, 5, 6) 4955 * 4956 */ 4957message ScatterAlongAxisLayerParams { 4958 4959 int64 axis = 1; 4960 ScatterMode mode = 2; /// mode of accumulation. 4961 4962} 4963 4964/** 4965 * A layer that stacks the input tensors along the given axis. 4966 * It is similar in functionality to the numpy.stack method. 4967 * 4968 * Requires at least 2 inputs and produces 1 output. 4969 * All inputs must have the same shape. 4970 * Rank of the output is 1 greater than the rank of the inputs. 4971 * 4972 * Negative indexing is supported for the "axis" parameter. 4973 * 4974 * e.g.: 4975 * 4976 * input shape = (2, 4, 2) 4977 * number of inputs = 5 4978 * axis = 3 4979 * output shape = (2, 4, 2, 5) 4980 * 4981 * input shape = (2, 4, 2) 4982 * number of inputs = 5 4983 * axis = -2 4984 * output shape = (2, 4, 5, 2) 4985 */ 4986message StackLayerParams { 4987 4988 int64 axis = 1; 4989 4990} 4991 4992/** 4993 * A layer that reshapes a tensor that does not alter the rank of the input. 4994 * Order of the data is left unchanged. 4995 * 4996 * Requires 1 input and produces 1 output. 4997 * 4998 * e.g: 4999 * 5000 * input shape = (20,10) 5001 * targetShape = (5,-1) 5002 * output shape = (5,40) 5003 * 5004 * input shape = (20,10,5) 5005 * targetShape = (0,2,25) 5006 * output shape = (20,2,25) 5007 * 5008 * input shape = (10,3,5) 5009 * targetShape = (25,0,-1) 5010 * output shape = (25,3,2) 5011 */ 5012message RankPreservingReshapeLayerParams { 5013 5014 /** 5015 * Length of this field must be same as the input/output rank. 5016 * It can have 0's, in which case the corresponding input dimension is kept intact. 5017 * At most one element can be -1, in which case the output dimension is calculated from rest of the shape. 5018 */ 5019 repeated int64 targetShape = 1; 5020 5021} 5022 5023/** 5024 * Constant padding layer. 5025 * Pad the input array with a constant value, either along a single given axis or along a set of axes. 5026 * 5027 * Requires 1 or 2 inputs and produces 1 output. 5028 * The amount of padding can be either set as a parameter ("padAmounts") or provided as a second input. 5029 * 5030 * Output rank is same as the rank of the first input. 5031 * 5032 * when "padToGivenOutputSizeMode" is False: 5033 * 5034 * output_shape[i] = input_shape[i] + padAmounts[2*i] + padAmounts[2*i+1], i=0,...,rank-1 5035 * 5036 * Examples: 5037 * 5038 * input shape = (20,10) 5039 * padAmounts = [0,1,4,0] 5040 * output shape = (21,14) 5041 * 5042 * input shape = (20,10,5) 5043 * padAmounts = [0,0,3,4,0,9] 5044 * output shape = (20,17,14) 5045 * 5046 * 5047 * when "padToGivenOutputSizeMode" is True 5048 * 5049 * output_shape[i] = max(input_shape[i], max(padAmounts[2*i] + padAmounts[2*i+1])), i=0,...,rank-1 5050 * 5051 * input shape = (20,10) 5052 * padAmounts = [0,21,14,0] 5053 * output shape = (21,14) 5054 * 5055 * input shape = (20,10,5) 5056 * padAmounts = [0,0,17,0,0,14] 5057 * output shape = (20,17,14) 5058 */ 5059message ConstantPaddingLayerParams { 5060 /** 5061 * The value to be used for padding. 5062 */ 5063 float value = 1; 5064 5065 /** 5066 * Length of this repeated field must be twice the rank of the first input. 5067 * 2*i-th and (2*i+1)-th values represent the amount of padding to be applied to the the i-th input 5068 * dimension, "before" and "after" the input values, respectively. 5069 */ 5070 repeated uint64 padAmounts = 2; 5071 5072 /** 5073 * When this is True, positive values in "padAmounts" are equivalent to the output shape. 5074 * In that case only one of padAmounts[2*i] and padAmounts[2*i+1] can be non zero, for i=0,..,rank-1. 5075 */ 5076 bool padToGivenOutputSizeMode = 3; 5077} 5078 5079/** 5080 * A layer that returns a tensor filled with values from the normal distribution. 5081 * 5082 * Requires 1 input and produces 1 output. 5083 * 5084 * Parameters 5085 * seed: seed used for the normal distribution. 5086 * mean: mean of the normal distribution. 5087 * stdDev: standard deviation of the normal distribution. 5088 * 5089 * Input 5090 * An N-Dimensional tensor, whose values are ignored. Only the shape is used to 5091 * infer the shape of the output. 5092 * 5093 * Output 5094 * An N-Dimensional tensor with the same shape as the input tensor. 5095 * 5096 */ 5097message RandomNormalLikeLayerParams { 5098 5099 int64 seed = 1; 5100 float mean = 2; 5101 float stdDev = 3; 5102 5103} 5104 5105/** 5106 * A layer that returns a tensor filled with values from the normal distribution. 5107 * 5108 * Requires no input and produces 1 output. 5109 * 5110 * Parameters 5111 * seed: seed used for the normal distribution. 5112 * mean: mean of the normal distribution. 5113 * stdDev: standard deviation of the normal distribution. 5114 * outputShape: shape of the output tensor. 5115 * 5116 * Output 5117 * An N-Dimensional tensor of shape "outputShape". 5118 * 5119 */ 5120message RandomNormalStaticLayerParams { 5121 5122 int64 seed = 1; 5123 float mean = 2; 5124 float stdDev = 3; 5125 repeated uint64 outputShape = 4; 5126 5127} 5128 5129/** 5130 * A layer that returns a tensor filled with values from the normal distribution. 5131 * 5132 * Requires 1 input and produces 1 output. 5133 * 5134 * Parameters: 5135 * seed: seed used for the normal distribution. 5136 * mean: mean of the normal distribution. 5137 * stdDev: standard deviation of the normal distribution. 5138 * 5139 * Input 5140 * A rank 1 tensor specifying the shape of the output 5141 * 5142 * Output 5143 * An N-Dimensional tensor with the shape specified by the values in the input tensor. 5144 */ 5145message RandomNormalDynamicLayerParams { 5146 5147 int64 seed = 1; 5148 float mean = 2; 5149 float stdDev = 3; 5150 5151} 5152 5153/** 5154 * A layer that returns a tensor filled with values from the uniform distribution. 5155 * 5156 * Requires 1 input and produces 1 output. 5157 * 5158 * Parameters 5159 * seed: seed used for the uniform distribution. 5160 * minVal: lower bound on the range of random values for the uniform distribution. 5161 * maxVal: upper bound on the range of random values for the uniform distribution. 5162 * 5163 * Input 5164 * An N-Dimensional tensor, whose values are ignored. Only the shape is used to 5165 * infer the shape of the output. 5166 * 5167 * Output 5168 * An N-Dimensional tensor with the same shape as the input tensor. 5169 * 5170 */ 5171message RandomUniformLikeLayerParams { 5172 5173 int64 seed = 1; 5174 float minVal = 2; 5175 float maxVal = 3; 5176 5177} 5178 5179/** 5180 * A layer that returns a tensor filled with values from the uniform distribution. 5181 * 5182 * Requires no input and produces 1 output. 5183 * 5184 * Parameters 5185 * seed: seed used for the uniform distribution. 5186 * minVal: lower bound on the range of random values for the uniform distribution. 5187 * maxVal: upper bound on the range of random values for the uniform distribution. 5188 * outputShape: shape of the output tensor. 5189 * 5190 * Output 5191 * An N-Dimensional tensor of shape "outputShape". 5192 * 5193 */ 5194message RandomUniformStaticLayerParams { 5195 5196 int64 seed = 1; 5197 float minVal = 2; 5198 float maxVal = 3; 5199 repeated uint64 outputShape = 4; 5200 5201} 5202 5203/** 5204 * A layer that returns a tensor filled with values from the uniform distribution. 5205 * 5206 * Requires 1 input and produces 1 output. 5207 * 5208 * Parameters: 5209 * seed: seed used for the uniform distribution. 5210 * minVal: lower bound on the range of random values for the uniform distribution. 5211 * maxVal: upper bound on the range of random values for the uniform distribution. 5212 * 5213 * Input 5214 * A rank 1 tensor specifying the shape of the output 5215 * 5216 * Output 5217 * An N-Dimensional tensor with the shape specified by the values in the input tensor. 5218 * 5219 */ 5220message RandomUniformDynamicLayerParams { 5221 5222 int64 seed = 1; 5223 float minVal = 2; 5224 float maxVal = 3; 5225 5226} 5227 5228/** 5229 * A layer that returns a tensor filled with values from the Bernoulli distribution. 5230 * 5231 * Requires 1 input and produces 1 output. 5232 * 5233 * Parameters 5234 * seed: seed used for the Bernoulli distribution. 5235 * prob: probability of a 1 event. 5236 * 5237 * Input 5238 * An N-Dimensional tensor, whose values are ignored. Only the shape is used to 5239 * infer the shape of the output. 5240 * 5241 * Output 5242 * An N-Dimensional tensor with the same shape as the input tensor. 5243 * 5244 */ 5245message RandomBernoulliLikeLayerParams { 5246 5247 int64 seed = 1; 5248 float prob = 2; 5249 5250} 5251 5252/** 5253 * A layer that returns a tensor filled with values from the Bernoulli distribution. 5254 * 5255 * Requires no input and produces 1 output. 5256 * 5257 * Parameters 5258 * seed: seed used for the Bernoulli distribution. 5259 * prob: probability of a 1 event. 5260 * outputShape: shape of the output tensor. 5261 * 5262 * Output 5263 * An N-Dimensional tensor of shape "outputShape". 5264 */ 5265message RandomBernoulliStaticLayerParams { 5266 5267 int64 seed = 1; 5268 float prob = 2; 5269 repeated uint64 outputShape = 3; 5270 5271} 5272 5273/** 5274 * A layer that returns a tensor filled with values from the Bernoulli distribution. 5275 * 5276 * Requires 1 input and produces 1 output. 5277 * 5278 * Parameters: 5279 * seed: seed used for the Bernoulli distribution. 5280 * prob: probability of a 1 event. 5281 * 5282 * Input 5283 * A rank 1 tensor specifying the shape of the output 5284 * 5285 * Output 5286 * An N-Dimensional tensor with the shape specified by the values in the input tensor. 5287 */ 5288message RandomBernoulliDynamicLayerParams { 5289 5290 int64 seed = 1; 5291 float prob = 2; 5292 5293} 5294 5295/** 5296 * A layer that returns a tensor of the specified shape filled with values from the categorical distribution. 5297 * 5298 * Requires 1 input and produces 1 output. 5299 * 5300 * Parameter: 5301 * seed: seed used for the categorical distribution. 5302 * numSamples: number of samples to draw. 5303 * isLogits: true if the inputs are logits, false if the inputs are probabilities. 5304 * eps: default value is 1e-10. 5305 * temperature: default value is 1.0. 5306 * 5307 * Input tensor shape = [D_1, D_2, ... , D_(R-1), D_R] (Rank = R) 5308 * Then the shape of the output is [D_1, D_2, ... , D_(R-1), numSamples] (Rank = R) 5309 * 5310 */ 5311message CategoricalDistributionLayerParams { 5312 5313 int64 seed = 1; 5314 int64 numSamples = 2; 5315 bool isLogits = 3; 5316 float eps = 4; 5317 float temperature = 5; 5318} 5319 5320/** 5321 * A layer that performs reduction with L1 normalization operation. 5322 * 5323 * Negative indexing is supported. 5324 * Requires 1 input and produces 1 output. 5325 * 5326 * Parameters: 5327 * axes: dimensions along which to perform reduction 5328 * keepDims: if True, keep the reduced dimensions (value will be 1), otherwise, reduced dimensions are squeezed 5329 * reduceAll: ignore the "axes" parameter, perform reduction along all axes 5330 * 5331 */ 5332message ReduceL1LayerParams { 5333 5334 repeated int64 axes = 1; 5335 bool keepDims = 2; 5336 bool reduceAll = 3; 5337 5338} 5339 5340/** 5341 * A layer that performs reduction with L2 normalization operation. 5342 * 5343 * Negative indexing is supported. 5344 * Requires 1 input and produces 1 output. 5345 * 5346 * Parameters: 5347 * axes: dimensions along which to perform reduction 5348 * keepDims: if True, keep the reduced dimensions (value will be 1), otherwise, reduced dimensions are squeezed 5349 * reduceAll: ignore the "axes" parameter, perform reduction along all axes 5350 * 5351 */ 5352message ReduceL2LayerParams { 5353 5354 repeated int64 axes = 1; 5355 bool keepDims = 2; 5356 bool reduceAll = 3; 5357 5358} 5359 5360/** 5361 * A layer that performs reduction with max operation. 5362 * 5363 * Negative indexing is supported. 5364 * Requires 1 input and produces 1 output. 5365 * 5366 * Parameters: 5367 * axes: dimensions along which to perform reduction 5368 * keepDims: if True, keep the reduced dimensions (value will be 1), otherwise, reduced dimensions are squeezed 5369 * reduceAll: ignore the "axes" parameter, perform reduction along all axes 5370 * 5371 */ 5372message ReduceMaxLayerParams { 5373 5374 repeated int64 axes = 1; 5375 bool keepDims = 2; 5376 bool reduceAll = 3; 5377 5378} 5379 5380/** 5381 * A layer that performs reduction with min operation. 5382 * 5383 * Negative indexing is supported. 5384 * Requires 1 input and produces 1 output. 5385 * 5386 * Parameters: 5387 * axes: dimensions along which to perform reduction 5388 * keepDims: if True, keep the reduced dimensions (value will be 1), otherwise, reduced dimensions are squeezed 5389 * reduceAll: ignore the "axes" parameter, perform reduction along all axes 5390 * 5391 */ 5392message ReduceMinLayerParams { 5393 5394 repeated int64 axes = 1; 5395 bool keepDims = 2; 5396 bool reduceAll = 3; 5397 5398} 5399 5400/** 5401 * A layer that performs reduction with sum operation. 5402 * 5403 * Negative indexing is supported. 5404 * Requires 1 input and produces 1 output. 5405 * 5406 * Parameters: 5407 * axes: dimensions along which to perform reduction 5408 * keepDims: if True, keep the reduced dimensions (value will be 1), otherwise, reduced dimensions are squeezed 5409 * reduceAll: ignore the "axes" parameter, perform reduction along all axes 5410 * 5411 */ 5412message ReduceSumLayerParams { 5413 5414 repeated int64 axes = 1; 5415 bool keepDims = 2; 5416 bool reduceAll = 3; 5417 5418} 5419 5420/** 5421 * A layer that performs reduction with prod operation. 5422 * 5423 * Negative indexing is supported. 5424 * Requires 1 input and produces 1 output. 5425 * 5426 * Parameters: 5427 * axes: dimensions along which to perform reduction 5428 * keepDims: if True, keep the reduced dimensions (value will be 1), otherwise, reduced dimensions are squeezed 5429 * reduceAll: ignore the "axes" parameter, perform reduction along all axes 5430 * 5431 */ 5432message ReduceProdLayerParams { 5433 5434 repeated int64 axes = 1; 5435 bool keepDims = 2; 5436 bool reduceAll = 3; 5437 5438} 5439 5440/** 5441 * A layer that performs reduction with mean operation. 5442 * 5443 * Negative indexing is supported. 5444 * Requires 1 input and produces 1 output. 5445 * 5446 * Parameters: 5447 * axes: dimensions along which to perform reduction 5448 * keepDims: if True, keep the reduced dimensions (value will be 1), otherwise, reduced dimensions are squeezed 5449 * reduceAll: ignore the "axes" parameter, perform reduction along all axes 5450 * 5451 */ 5452message ReduceMeanLayerParams { 5453 5454 repeated int64 axes = 1; 5455 bool keepDims = 2; 5456 bool reduceAll = 3; 5457 5458} 5459 5460/** 5461 * A layer that performs reduction with logSum operation. 5462 * 5463 * Negative indexing is supported. 5464 * Requires 1 input and produces 1 output. 5465 * 5466 * Parameters: 5467 * axes: dimensions along which to perform reduction 5468 * keepDims: if True, keep the reduced dimensions (value will be 1), otherwise, reduced dimensions are squeezed 5469 * reduceAll: ignore the "axes" parameter, perform reduction along all axes 5470 * 5471 */ 5472message ReduceLogSumLayerParams { 5473 5474 repeated int64 axes = 1; 5475 bool keepDims = 2; 5476 bool reduceAll = 3; 5477 5478} 5479 5480/** 5481 * A layer that performs reduction with logSumExp operation. 5482 * 5483 * Negative indexing is supported. 5484 * Requires 1 input and produces 1 output. 5485 * 5486 * Parameters: 5487 * axes: dimensions along which to perform reduction 5488 * keepDims: if True, keep the reduced dimensions (value will be 1), otherwise, reduced dimensions are squeezed 5489 * reduceAll: ignore the "axes" parameter, perform reduction along all axes 5490 * 5491 */ 5492message ReduceSumSquareLayerParams { 5493 5494 repeated int64 axes = 1; 5495 bool keepDims = 2; 5496 bool reduceAll = 3; 5497 5498} 5499 5500/** 5501 * A layer that performs reduction with logSumExp operation. 5502 * 5503 * Negative indexing is supported. 5504 * Requires 1 input and produces 1 output. 5505 * 5506 * Parameters: 5507 * axes: dimensions along which to perform reduction 5508 * keepDims: if True, keep the reduced dimensions (value will be 1), otherwise, reduced dimensions are squeezed 5509 * reduceAll: ignore the "axes" parameter, perform reduction along all axes 5510 * 5511 */ 5512message ReduceLogSumExpLayerParams { 5513 5514 repeated int64 axes = 1; 5515 bool keepDims = 2; 5516 bool reduceAll = 3; 5517 5518} 5519 5520/** 5521 * A layer that increases the rank of the input tensor by adding unit dimensions. 5522 * 5523 * Requires 1 input and produces 1 output. 5524 * 5525 * e.g.: 5526 * 5527 * input shape = (10,5) 5528 * axes = (0,1) 5529 * output shape = (1,1,10,5) 5530 * 5531 * input shape = (10,5) 5532 * axes = (0,2) 5533 * output shape = (1,10,1,5) 5534 * 5535 * input shape = (10,5) 5536 * axes = (-2,-1) 5537 * output shape = (10,5,1,1) 5538 * 5539 */ 5540message ExpandDimsLayerParams { 5541 5542 /** 5543 * Axis values provided here get dimension 1 in the output tensor. 5544 * Negative indexing is supported. 5545 */ 5546 repeated int64 axes = 1; 5547 5548} 5549 5550/** 5551 * A layer that flattens the input tensor into a 2-dimensional matrix. 5552 * 5553 * Requires 1 input and produces 1 output. 5554 * Output tensor is always rank 2. 5555 * 5556 * First dimension of output is the product of all the dimensions in input[:axis] ("axis" is exclusive) 5557 * Second dimension of output is the product of all the dimensions in input[axis:] ("axis" is inclusive) 5558 * 5559 * e.g.: 5560 * input shape: (3,) 5561 * axis: -1 5562 * output shape: (1, 3) 5563 * 5564 * input shape: (3,) 5565 * axis: 1 5566 * output shape: (3, 1) 5567 * 5568 * input shape: (4, 3) 5569 * axis: -1 5570 * output shape: (4, 3) 5571 * 5572 * input shape: (5, 2) 5573 * axis: 0 5574 * output shape: (1, 10) 5575 * 5576 * input shape: (5, 5, 3) 5577 * axis: -2 5578 * output shape: (5, 15) 5579 * 5580 * input shape: (2, 3, 2) 5581 * axis: -1 5582 * output shape: (6, 2) 5583 * 5584 */ 5585message FlattenTo2DLayerParams { 5586 5587 int64 axis = 1; 5588 5589} 5590 5591/** 5592 * A layer that reshapes a tensor. 5593 * 5594 * Requires 1 input and produces 1 output. 5595 * 5596 * Output tensor is the reshaped version of the input and has shape as specified in the 5597 * parameter "targetShape". 5598 * 5599 */ 5600message ReshapeStaticLayerParams { 5601 5602 repeated int64 targetShape = 1; 5603 5604} 5605 5606/** 5607 * A layer that reshapes a tensor. 5608 * 5609 * Requires 2 inputs and produces 1 output. 5610 * 5611 * First input is reshaped to produce the output, while the second input is only 5612 * used to determine the shape of the output. Values of the second input are not used. 5613 * 5614 * Output is a tensor with the same shape as the second input. 5615 * 5616 */ 5617message ReshapeLikeLayerParams { 5618 5619} 5620 5621/** 5622 * A layer that reshapes a tensor. 5623 * 5624 * Requires 2 inputs and produces 1 output. 5625 * 5626 * First input is the one that is reshaped to produce the output. 5627 * Second input is a rank 1 tensor specifying the shape of the output. 5628 * Output tensor has shape as specified by the values in the 2nd input tensor. 5629 */ 5630message ReshapeDynamicLayerParams { 5631 5632} 5633 5634/** 5635 * A layer that decreases the rank of the input tensor by removing unit dimensions. 5636 * 5637 * Requires 1 input and produces 1 output. 5638 * 5639 * Output rank is one less than input rank, if input rank is more than 1. 5640 * If input rank is 1, output rank is also 1. 5641 * 5642 * e.g.: 5643 * 5644 * input shape = (1,1,10,5) 5645 * axes = (0,1) 5646 * output shape = (10,5) 5647 * 5648 * input shape = (1,10,5,1) 5649 * axes = (0,3) 5650 * output shape = (10,5) 5651 * 5652 * input shape = (10,5,1,1) 5653 * axes = (-2,-1) 5654 * output shape = (10,5) 5655 * 5656 * input shape = (1,) 5657 * axes = (0) 5658 * output shape = (1,) 5659 * 5660 */ 5661message SqueezeLayerParams { 5662 5663 /** 5664 * Axis values provided here get removed from the input tensor. 5665 * Negative indexing is supported. 5666 */ 5667 repeated int64 axes = 1; 5668 bool squeezeAll = 2; // if true squeeze all dimensions that are 1. 5669 5670} 5671 5672/** 5673 * A layer that returns top K (or bottom K) values and the corresponding indices 5674 * of the input along a given axis. 5675 * 5676 * Requires 1 or 2 inputs and produces 2 outputs. 5677 * 5678 * The second input is the value of the K, and is optional. 5679 * If there is only one input, value of K that is specified in the layer parameter is used. 5680 * 5681 * Both outputs have the same rank as the first input. 5682 * Second input must correspond to a scalar tensor. 5683 * 5684 * e.g.: 5685 * 5686 * first input's shape = (45, 34, 10, 5) 5687 * axis = 1 5688 * output shape, for both outputs = (45, K, 10, 5) 5689 * 5690 */ 5691message TopKLayerParams { 5692 5693 int64 axis = 1; /// negative indexing is supported 5694 uint64 K = 2; /// is ignored if a second input is present. 5695 bool useBottomK = 3; /// if true, bottom K (values, indices) are returned instead 5696 5697} 5698 5699/** 5700 * A layer that returns the indices of the maximum value along a specified axis in a tensor. 5701 * 5702 * Requires 1 input and produces 1 output. Negative indexing is supported. 5703 * 5704 * Output has the same rank as the input if "removeDim" is False (default). 5705 * Output has rank one less than the input if "removeDim" is True and input rank is more than 1. 5706 * 5707 * e.g.: 5708 * 5709 * input shape = (45, 34, 10, 5) 5710 * axis = -2 5711 * output shape = (45, 1, 10, 5), if removeDim = False (default) 5712 * output shape = (45, 10, 5), if removeDim = True 5713 * 5714 * input shape = (5,) 5715 * axis = 0 5716 * output shape = (1,), if removeDim = False or True 5717 * 5718 */ 5719message ArgMaxLayerParams { 5720 5721 int64 axis = 1; 5722 bool removeDim = 2; 5723 5724} 5725 5726/** 5727* A layer that returns the indices of the minimum value along a specified axis in a tensor. 5728* 5729* Requires 1 input and produces 1 output. Negative indexing is supported. 5730* 5731* Output has the same rank as the input if "removeDim" is False (default). 5732* Output has rank one less than the input if "removeDim" is True and input rank is more than 1. 5733* 5734* e.g.: 5735* 5736* input shape = (45, 34, 10, 5) 5737* axis = -2 5738* output shape = (45, 1, 10, 5), if removeDim = False (default) 5739* output shape = (45, 10, 5), if removeDim = True 5740* 5741* input shape = (5,) 5742* axis = 0 5743* output shape = (1,), if removeDim = False or True 5744* 5745*/ 5746message ArgMinLayerParams { 5747 5748 int64 axis = 1; 5749 bool removeDim = 2; 5750 5751} 5752 5753/** 5754 * A layer layer that splits the input tensor into multiple output tensors, 5755 * along the specified axis. 5756 * 5757 * The layer either uniformly splits the input tensor into ``num_splits`` tensors, or 5758 * splits according to the given split sizes in ``split_sizes``. 5759 * Supports unequal splits and negative indexing. 5760 * 5761 * Requires 1 input and produces at least 2 outputs. 5762 * Rank of all the outputs is same as that of the input. 5763 * 5764 * If parameter "splitSizes" is provided, value of the parameter "numSplits" is ignored, since in that case 5765 * "numSplits" is automatically inferred to be the length of "splitSizes". 5766 * 5767 * 5768 * e.g.: 5769 * input shape: (5, 3, 4) 5770 * axis = -3, split_sizes = [3, 2] 5771 * output shape: (3, 3, 4) 5772 * output shape: (2, 3, 4) 5773 */ 5774message SplitNDLayerParams { 5775 5776 int64 axis = 1; 5777 uint64 numSplits = 2; 5778 repeated uint64 splitSizes = 3; 5779 5780} 5781 5782/** 5783 * A layer that performs element-wise ceil operation on the input tensor that 5784 * rounds the value to the smallest integer not less than x. 5785 * 5786 * Requires 1 input and produces 1 output. 5787 * Output shape is same as the input. 5788 * 5789 */ 5790message CeilLayerParams { 5791 5792} 5793 5794/** 5795 * A layer that performs element-wise round operation on the input tensor 5796 * that rounds the value to the nearest integer. 5797 * 5798 * Requires 1 input and produces 1 output. 5799 * Output shape is same as the input. 5800 * 5801 */ 5802message RoundLayerParams { 5803 5804} 5805 5806/** 5807 * A layer that performs element-wise floor operation on the input tensor 5808 * that rounds the value to the largest integer not greater than x. 5809 * 5810 * Requires 1 input and produces 1 output. 5811 * Output shape is same as the input. 5812 * 5813 */ 5814message FloorLayerParams { 5815 5816} 5817 5818/** 5819 * A layer that performs element-wise sign operation (+1 for positive values, 5820 * -1 for negative values, 0 for zeros). 5821 * 5822 * Requires 1 input and produces 1 output. 5823 * Output shape is same as the input. 5824 * 5825 */ 5826message SignLayerParams { 5827 5828} 5829 5830/** 5831 * A layer that performs element-wise clip operation. Clip the values in the 5832 * input tensor to the threshold values [min_value, max_value]. 5833 * 5834 * Requires 1 input and produces 1 output. 5835 * 5836 * Parameter minVal: the minimum threshold. 5837 * Parameter maxVal: the maximum threshold. 5838 * 5839 * output = min(max(input, minVal), maxVal) 5840 * 5841 * Output shape is same as the input. 5842 */ 5843message ClipLayerParams { 5844 5845 float minVal = 1; 5846 float maxVal = 2; 5847 5848} 5849 5850/** 5851 * A layer that extracts a slice of size ``(end - begin) / stride`` 5852 * from the given input tensor. 5853 * Support negative indexing and negative strides. 5854 * 5855 * Requires 1 input and produces 1 output. 5856 * Output rank is same as the input rank. 5857 * 5858 * Value of beginIds, beginMasks, endIds, endMasks, strides are required parameters. 5859 * Lengths of all the parameters must equal the rank of the input. 5860 * 5861 * i-th element of "beginIds" is ignored and assumed to be 0 if the i-th element of 5862 * "beginMasks" is True 5863 * 5864 * i-th element of "endIds" is ignored and assumed to be -1 if the i-th element of 5865 * "endMasks" is True 5866 * 5867 * e.g.: 5868 * if i-th element of "squeezeMasks" is set to True, only beginIds[i] would be sliced 5869 * out, and all other masks and inputs are ignored. 5870 * 5871 * e.g. (without squeezeMasks): 5872 * input shape: (5, 5, 5) 5873 * beginIds: [1, 2, 3] 5874 * beginMasks: [True, False, True] 5875 * endIds: [3, -3, 2] 5876 * endMasks: [False, True, True] 5877 * strides: [2, 2, 2] 5878 * SqueezeMasks: [False, False, False] 5879 * output shape: (2, 2, 3) 5880 * This is equivalent to input[:3:2, 2::2, ::2] 5881 * 5882 * e.g. (with squeezeMasks): 5883 * input shape: (5, 5, 5) 5884 * beginIds: [1, 2, 3] 5885 * beginMasks: [True, False, True] 5886 * endIds: [3, -3, 2] 5887 * endMasks: [False, True, True] 5888 * strides: [2, 2, 2] 5889 * SqueezeMasks: [False, True, False] 5890 * output shape: (2, 3) 5891 * This is equivalent to input[:3:2, 2, ::2] 5892 * 5893 */ 5894message SliceStaticLayerParams { 5895 5896 repeated int64 beginIds = 1; 5897 repeated bool beginMasks = 2; 5898 repeated int64 endIds = 3; 5899 repeated bool endMasks = 4; 5900 repeated int64 strides = 5; 5901 repeated bool squeezeMasks = 6; 5902 5903 5904} 5905 5906/** 5907 * A layer that extracts a slice of size ``(end - begin) / stride`` 5908 * from the given input tensor. 5909 * Support negative indexing and negative strides. 5910 * See "SliceStaticLayerParams" for the description and an example of the functionality of the layer. 5911 * 5912 * Requires 2 to 7 inputs and produces 1 output. 5913 * Rank of the output is same as the rank of the first input unless squeezeMask is set. 5914 * 5915 * Value of beginIds, beginMasks, endIds, endMasks, strides can be passed in either 5916 * as dynamic inputs or as static parameters. 5917 * Lengths of all the parameters or inputs from 2-6 must equal the rank of the first input. 5918 * 5919 * The 2nd input represents the "beginIds". 5920 * The 3rd input, if present, corresponds to "endIds". In this case the value of the "endIds" parameter is ignored. 5921 * The 4th input, if present, corresponds to "strides". In this case the value of the "strides" parameter is ignored. 5922 * The 5th input, if present, corresponds to "beginMasks". In this case the value of the "beginMasks" parameter is ignored. 5923 * The 6th input, if present, corresponds to "endMasks". In this case the value of the "endMasks" parameter is ignored. 5924 * The 7th input, if present, corresponds to "squeezeMasks". In this case the value of the "squeezeMasks" parameter is ignored. 5925 * 5926 */ 5927message SliceDynamicLayerParams { 5928 5929 repeated bool beginMasks = 2; 5930 repeated int64 endIds = 3; 5931 repeated bool endMasks = 4; 5932 repeated int64 strides = 5; 5933 repeated bool squeezeMasks = 6; 5934 5935} 5936 5937/** 5938 * A layer that constructs a tensor by repeating the input tensor multiple 5939 * number of times. 5940 * 5941 * Requires 1 or 2 inputs and produces 1 output. 5942 * Output rank is same as the input rank. 5943 * 5944 * If two inputs are provided, second input is used as "reps" 5945 * and "reps" parameter is ignored. 5946 * 5947 * If only one input is provided, 5948 * length of the "reps" parameter must be at least 1 and 5949 * not greater than the rank of the input. 5950 * If it is less than the input rank, it is made equal to the input rank by prepending 1's to it. 5951 * 5952 * e.g.: 5953 * 5954 * input shape = (2, 4, 2) 5955 * reps = (1, 2, 6) 5956 * output shape = (2, 8, 12) 5957 * 5958 * input shape = (2, 4, 2) 5959 * reps = (6) 5960 * reps after prepending ones = (1, 1, 6) 5961 * output shape = (2, 4, 12) 5962 * 5963 * input shape = (2, 4, 2) 5964 * second input = [1, 2, 6] -> shape: (3,) 5965 * reps = N/A [Ignored] 5966 * output shape = (2, 8, 12) 5967 * 5968 */ 5969message TileLayerParams { 5970 5971 repeated uint64 reps = 1; 5972 5973} 5974 5975/** 5976 * A layer that returns the shape of an input tensor. 5977 * 5978 * Requires 1 input and produces 1 output. 5979 * 5980 * Input: a tensor. 5981 * Output: a vector of length R, where R is the rank of the input tensor 5982 * Output is always a rank 1 tensor. 5983 */ 5984message GetShapeLayerParams { 5985 5986} 5987 5988/** 5989 * A layer that computes the Gauss error function, 5990 * which is defined as: 5991 * 5992 * .. math:: 5993 * f(x) = \dfrac{1}{\sqrt{\pi}}\int_{-x}^{x}{e^{-t^2}dt} 5994 * 5995 * Requires 1 input and produces 1 output. 5996 * Output shape is same as the input. 5997 */ 5998message ErfLayerParams { 5999 6000} 6001 6002/** 6003 * A layer that evaluates the Gaussian Error Linear Unit (GELU) activation. 6004 * Following equations are used to compute the activation based on the value of the "mode" parameter: 6005 * 6006 * mode == 'EXACT': 6007 * .. math:: 6008 * f(x) = 0.5x\left ( 1+\rm{erf}\left ( \frac{x}{\sqrt{2}} \right ) \right ) 6009 * 6010 * mode == 'TANH_APPROXIMATION': 6011 * .. math:: 6012 * f(x) = 0.5x\left ( 1+\rm{tanh}\left ( \sqrt{2/\pi}\left ( x + 0.044715x^3 \right ) \right ) \right ) 6013 * 6014 * mode == 'SIGMOID_APPROXIMATION': 6015 * .. math:: 6016 * f(x) = x*\rm{sigmoid}(1.702x) 6017 * 6018 * Requires 1 input and produces 1 output. 6019 * Output shape is same as the input. 6020 * 6021 */ 6022message GeluLayerParams { 6023 6024 enum GeluMode { 6025 6026 EXACT = 0; 6027 TANH_APPROXIMATION = 1; 6028 SIGMOID_APPROXIMATION = 2; 6029 6030 } 6031 6032 GeluMode mode = 1; /// mode of GELU operation. 6033 6034} 6035 6036/** 6037 * RangeStatic layer that returns a tensor that contains evenly spaced values. 6038 * It is similar in functionality to the numpy.arange method. 6039 * 6040 * Requires no input and produces 1 output. 6041 * Output is a rank 1 tensor. 6042 */ 6043message RangeStaticLayerParams { 6044 6045 float endValue = 1; 6046 float startValue = 2; 6047 float stepSizeValue = 3; 6048 6049} 6050 6051/** 6052 * A layer that returns a tensor that contains evenly spaced values. 6053 * Its functionality is similar to the numpy.arange method. 6054 * 6055 * Requires at least 1 input, up to a maximum of 3 inputs. 6056 * Produces 1 output, which is a rank 1 tensor. 6057 * 6058 * Each input must be a scalar, or rank 1 and shape (1,). 6059 * 6060 * The first input represents the "endValue". 6061 * The second input, if present, corresponds to "startValue". In this case the value of the "startValue" parameter is ignored. 6062 * The third input, if present, corresponds to "stepSizeValue". In this case the value of the "stepSizeValue" parameter is ignored. 6063 * 6064 */ 6065message RangeDynamicLayerParams { 6066 6067 float startValue = 2; 6068 float stepSizeValue = 3; 6069 6070} 6071 6072/** 6073 * A layer that returns a tensor containing all windows of size ``windowSize`` 6074 * separated by ``step`` along the dimension ``axis``. 6075 * 6076 * .. code:: 6077 * 6078 * y = SlidingWindows(x) 6079 * 6080 * Requires 1 input and produces 1 output. 6081 * 6082 * Input 6083 * An N-Dimensional tensor. 6084 * 6085 * Output 6086 * An (N+1)-Dimensional tensor. 6087 * 6088 * This operation behaves as following: 6089 * - if axis = 0 & input is rank 1 (L,). Output shape will be (M, W). 6090 * - if axis = 1 & input is rank 3 (B1, L, C1). Output shape will be (B1, M, W, C1) 6091 * - if axis = 2 & input is rank 5 (B1, B2, L, C1, C2) --> (B1 * B2, L, C1 * C2) --> (B1 * B2, M, W, C1 * C2). Output shape will be (B1, B2, M, W, C1, C2) 6092 * - etc. 6093 * where 6094 * - L, C, B refer to input length, feature dimension length & batch size respectively 6095 * - W is the window size. 6096 * - M is the number of windows/slices calculated as M = (L - W) / step + 1 6097 */ 6098message SlidingWindowsLayerParams { 6099 6100 int64 axis = 1; 6101 uint64 windowSize = 2; 6102 uint64 step = 3; 6103 6104} 6105 6106/** 6107 * A layer that applies layer normalization over the input tensor. 6108 * 6109 * Requires 1 input and produces 1 output. 6110 * 6111 * output = gamma * (input - computed_mean) / (sqrt(computed_variance + eps)) + beta 6112 * 6113 * Parameters 6114 * normalizedShape: subset of the input shape, along with layer norm is performed, rest of the input shape is treated as the batch dimension. The mean and variance are computed for the input, over the last few dimensions as specified by the normalizedShape parameter. 6115 * gamma: must have shape = "normalizedShape" 6116 * beta: must have shape = "normalizedShape" 6117 * eps: small constant to avoid division by 0 6118 * 6119 * Output shape is same as the input. 6120 * 6121 * e.g.: 6122 * input shape = (10,5) 6123 * normalized shape = (5,) or (10,5) 6124 * 6125 * input shape = (10,5,6,7) 6126 * normalized shape = (7,) or (6,7) or (5,6,7) or (10,5,6,7) 6127 */ 6128message LayerNormalizationLayerParams { 6129 6130 repeated int64 normalizedShape = 1; 6131 float eps = 2; 6132 WeightParams gamma = 3; 6133 WeightParams beta = 4; 6134 6135} 6136 6137/** 6138 * Non maximum suppression (NMS) layer. 6139 * Applies the non maximum suppression algorithm to input bounding box coordinates. 6140 * The effect of this layer is similar to the functionality of the "NonMaximumSuppression" 6141 * model type (for details please see NonMaximumSuppression.proto) with a couple of differences. 6142 * One, this is a layer in a neural network model, whereas that is a different model type. Second, 6143 * this layer supports a batch of bounding boxes. 6144 * 6145 * The NMS layer requires at least 2 inputs, and up to a maximum of 5 inputs. It produces 4 outputs. 6146 * Following is the description of inputs and outputs: 6147 * 6148 * input 1, shape (B,N,4): coordinates of N boxes, for a batch size B. 6149 * input 2, shape (B,N,C): class scores for each box. C can be 1 when there is only 1 score per box, i.e., no class specific score. 6150 * 6151 * input 3, optional, shape (1,): IoU threshold. When present, it overwrites the value provided in layer parameter "iouThreshold". 6152 * input 4, optional, shape (1,): Score threshold. When present, it overwrites the value provided in layer parameter "scoreThreshold". 6153 * input 5, optional, shape (1,): Maximum number of boxes. When present, it overwrites the value provided in layer parameter "maxBoxes". 6154 * 6155 * output 1, shape (B,maxBoxes,4): box coordinates, corresponding to the surviving boxes. 6156 * output 2, shape (B,maxBoxes,C): box scores, corresponding to the surviving boxes. 6157 * output 3, shape (B,maxBoxes): indices of the surviving boxes. Hence it will have values in the range [0,N-1], except for padding. 6158 * output 4, shape (B,): number of boxes selected after the NMS algorithm, for each batch. 6159 * 6160 * When surviving boxes are less than "maxBoxes", the first 3 outputs are padded. 6161 * For the first two outputs, the padding is done using values 0, whereas for the third output the 6162 * padding value used is -1, since the output values represent indices. 6163 * 6164 * If no box survives, that is, all the scores are below the "scoreThreshold", 6165 * then for that batch, number of boxes (value of the fourth output) will be 1. The first 3 outputs will 6166 * correspond to the box with the highest score. This is to avoid generating an "empty" output. 6167 * 6168 * The four values that describe the box dimensions are (in order): 6169 * 6170 * - x (center location of the box along the horizontal axis) 6171 * - y (center location of the box along the vertical axis) 6172 * - width (size of box along the horizontal axis) 6173 * - height (size of box on along the vertical axis) 6174 * 6175 * In each batch, 6176 * the N scores for N boxes, used for suppression, are generated by taking the max of the matrix (N,C) 6177 * along the columns. 6178 * If "perClassSuppression" flag is false, suppression happens across all classes. 6179 * If "perClassSuppression" flag is true, each box is assigned to the class with the highest 6180 * score and then the suppression happens separately for boxes within the same class. 6181 * 6182 * Note that the 4th output can be used to dynamically slice the first 3 outputs, in case 6183 * the padded outputs are not required. 6184 * 6185 */ 6186message NonMaximumSuppressionLayerParams { 6187 /** 6188 * The intersection over union (IoU) threshold over which boxes are suppressed. 6189 */ 6190 float iouThreshold = 1; 6191 6192 /** 6193 * Before IoU suppression is performed, boxes with class scores below this threshold are rejected. 6194 */ 6195 float scoreThreshold = 2; 6196 6197 /** 6198 * The maximum number of boxes to be given out as output. 6199 * If the number of surviving boxes are less, output is padded up to this number. 6200 */ 6201 uint64 maxBoxes = 3; 6202 6203 /** 6204 * If true, suppression is performed independently within boxes of each class. 6205 */ 6206 bool perClassSuppression = 4; 6207} 6208 6209/** 6210 * A layer that performs element-wise clamped ReLU operation. 6211 * 6212 * Requires 1 input and produces 1 output. 6213 * 6214 * This function has the following formula: 6215 * 6216 * .. math:: 6217 * f(x) = \begin{cases} 6218 * \text{min}(\text{beta},x) \;\; \text{if} \;\; x \geq 0\\ 6219 * \text{min}(\text{beta} ,\text{alpha}\cdot x) \;\; \text{if} \;\; x<0 6220 * \end{cases} 6221 * 6222 * Output shape is same as the input. 6223 * 6224 * Available (iOS >= 14, macOS >= 11.0, watchOS >= 7) 6225 */ 6226message ClampedReLULayerParams { 6227 6228 float alpha = 1; 6229 float beta = 2; 6230 6231} 6232 6233/** 6234* A layer that returns the indices that would sort the input tensor, along a specified axis. 6235* 6236* Requires 1 input and produces 1 output. 6237* 6238* Output has the same rank and shape as the input. 6239* 6240* Value of "axis" must be positive and less than the rank of the input. 6241* 6242* e.g.: 6243* 6244* input shape = (5,) 6245* axis = 0 6246* input values = [3.1, 5.4, 32.9, 3.2, 77.0] 6247* output shape = (5,) 6248* output values = [0, 3, 1, 2, 4], descending = False 6249* output values = [4, 2, 1, 3, 0], descending = True 6250* 6251* input shape = (2,3) 6252* axis = 1 6253* input values = [[3, 5, 32], [3, 77, 6]] 6254* output shape = (2,3) 6255* output values = [[0, 1, 2], [0, 2, 1]], descending = False 6256* output values = [[2, 1, 0], [1, 2, 0]], descending = True 6257* 6258*/ 6259message ArgSortLayerParams { 6260 6261 int64 axis = 1; /// must be between [0, input_rank - 1] 6262 bool descending = 2; 6263 6264} 6265 6266/** 6267 * A layer that does slice operation by providing size to be extracted 6268 * from the given input tensor. 6269 * 6270 * Requires 2 inputs and produces 1 output. 6271 * Rank of the output is same as the rank of the first input. 6272 * 6273 * The 1st input represents the tensor to be sliced. 6274 * The 2nd input represents the beginning index to be sliced from. 6275 * 6276 * Example: 6277 * Input 1: x (x.shape = (2, 3, 4)) 6278 * Input 2: begin 6279 * size: 2 6280 * axis: 1 6281 * 6282 * Output: x[:, begin:begin+2, :] 6283 * 6284 */ 6285message SliceBySizeLayerParams { 6286 6287 int64 size = 2; 6288 int64 axis = 3; 6289 6290} 6291 6292 6293/// Neural Network Specializations 6294/// ------------------------------ 6295 6296/** 6297 * A neural network specialized as a classifier. 6298 */ 6299message NeuralNetworkClassifier { 6300 6301 repeated NeuralNetworkLayer layers = 1; 6302 repeated NeuralNetworkPreprocessing preprocessing = 2; 6303 6304 // use this enum value to determine the input tensor shapes to the neural network, for multiarray inputs 6305 NeuralNetworkMultiArrayShapeMapping arrayInputShapeMapping = 5; 6306 6307 // use this enum value to determine the input tensor shapes to the neural network, for image inputs 6308 NeuralNetworkImageShapeMapping imageInputShapeMapping = 6; 6309 6310 NetworkUpdateParameters updateParams = 10; 6311 6312 // The set of labels for every possible class. 6313 oneof ClassLabels { 6314 StringVector stringClassLabels = 100; 6315 Int64Vector int64ClassLabels = 101; 6316 } 6317 6318 // The name of the output blob containing the probability of each class. 6319 // In other words, the score vector. Must be a 1-D tensor with the same 6320 // number and order of elements as ClassLabels. 6321 string labelProbabilityLayerName = 200; 6322} 6323 6324 6325/** 6326 * A layer that computes the one hot representation of the input. 6327 * 6328 * Requires 1 or 2 inputs and produces 1 output. 6329 * Rank of the output is one more than the first input. 6330 * If the second input is present, it is used to determine the value of "oneHotVectorSize" and the parameter "oneHotVectorSize" is ignored. 6331 * 6332 * Input values correspond to indices and should typically be in the range [0,"oneHotVectorSize" -1]. If it is outside this range, a vector of all "offValue" will be chosen. 6333 * 6334 * Typically one hot vectors contain 0s everywhere, except 1 at the index that the input corresponds to. 6335 * However, instead of 0, any float value could be generated by using the "offValue" parameter. 6336 * Similarly, instead of 1, any other value can be used by employing the "onValue" parameter. 6337 * 6338 * e.g.: 6339 * input shape: (10,), "oneHotVectorSize" : 32, axis=-1, then output shape will be (10,32) 6340 * input shape: (10,23), "oneHotVectorSize" : 32, axis=1, then output shape will be (10,32,23) 6341 * input shape: (10,), "oneHotVectorSize" : 32, axis=0, then output shape will be (32,10) 6342 * 6343 * input shape: (2,), "oneHotVectorSize" : 4, axis=-1, then output shape will be (2,4) 6344 * say input values = [2, 0], and "onValue" = 5, and "offValue" = -1, then output will be: 6345 * [-1, -1, 5, -1 6346 * 5, -1, -1, -1] 6347 * 6348 * say input values = [2, -1], and "onValue" = 5, and "offValue" = -1, then output will be: 6349 * [-1, -1, 5, -1 6350 * -1, -1, -1, -1] 6351 * 6352 * Available (iOS >= 14, macOS >= 11.0, watchOS >= 7) 6353 */ 6354 6355message OneHotLayerParams { 6356 6357 uint64 oneHotVectorSize = 1; /// size of the one hot vector 6358 int64 axis = 2; /// negative indexing is supported. It refers to the axis in the output tensor. 6359 float onValue = 3; 6360 float offValue = 4; 6361} 6362 6363 6364/** 6365 * A layer that computes the cumsum values of the input along a given axis. 6366 * 6367 * Requires 1 or 2 inputs and produces 1 output. 6368 * 6369 * Output shape and rank is same as the first input. 6370 * If the second input is present, it is used to determine the value of "axis" and the parameter "axis" is ignored. 6371 * 6372 * e.g.: 6373 * Input shape = (3,), values it has: [4, 6, 7] 6374 * 6375 * Then output values will be: 6376 * 6377 * if "excludeFinalSum" = False and "reverse" = False: 6378 * output values : [4, 10, 17] 6379 * 6380 * if "excludeFinalSum" = True and "reverse" = False: 6381 * output values : [0, 4, 10] 6382 * 6383 * if "excludeFinalSum" = False and "reverse" = True: 6384 * output values : [17, 13, 7] 6385 * 6386 * if "excludeFinalSum" = True and "reverse" = True: 6387 * output values : [13, 7, 0] 6388 * 6389 * 6390 * Available (iOS >= 14, macOS >= 11.0, watchOS >= 7) 6391 */ 6392 6393 6394message CumSumLayerParams { 6395 6396 int64 axis = 1; /// negative indexing is supported 6397 6398 /// if true, the first element of the output is 0, and the last element contains the sum of the input up to the penultimate value 6399 /// if false, the first element of the output is same as the input and the last element is the sum of all the input values 6400 /// (this behavior is reversed when "reverse" flag is True) 6401 bool excludeFinalSum = 2; 6402 6403 bool reverse = 3; /// if true, cumsum is performed in the opposite direction 6404} 6405 6406 6407/** 6408 * A neural network specialized as a regressor. 6409 */ 6410message NeuralNetworkRegressor { 6411 6412 repeated NeuralNetworkLayer layers = 1; 6413 repeated NeuralNetworkPreprocessing preprocessing = 2; 6414 6415 // use this enum value to determine the input tensor shapes to the neural network, for multiarray inputs 6416 NeuralNetworkMultiArrayShapeMapping arrayInputShapeMapping = 5; 6417 6418 // use this enum value to determine the input tensor shapes to the neural network, for image inputs 6419 NeuralNetworkImageShapeMapping imageInputShapeMapping = 6; 6420 6421 NetworkUpdateParameters updateParams = 10; 6422 6423} 6424 6425/// --------------------------------------------------------- 6426/// On-device Training related messages 6427/// --------------------------------------------------------- 6428 6429/** 6430 * Details on how the network will be updated 6431 */ 6432message NetworkUpdateParameters { 6433 6434 repeated LossLayer lossLayers = 1; 6435 Optimizer optimizer = 2; 6436 Int64Parameter epochs = 3; 6437 6438 /** 6439 * Describes whether to shuffle the batch of data between epochs. 6440 */ 6441 BoolParameter shuffle = 10; 6442 6443 /** 6444 * The seed to be used in an associated random number generator. 6445 */ 6446 Int64Parameter seed = 20; 6447} 6448 6449/** 6450 * Loss layer - categorical cross entropy and mean squared error are the only supported loss functions currently 6451 */ 6452message LossLayer { 6453 6454 string name = 1; 6455 oneof LossLayerType { 6456 6457 CategoricalCrossEntropyLossLayer categoricalCrossEntropyLossLayer = 10; 6458 MeanSquaredErrorLossLayer meanSquaredErrorLossLayer = 11; 6459 6460 } 6461 6462} 6463 6464/** 6465 * Categorical cross entropy loss layer 6466 * Categorical cross entropy is used for single label categorization (only one category is applicable for each data point). 6467 * 6468 * The input is a vector of length N representing the distribution over N categories. It must be the output of a softmax. 6469 * 6470 * The target is a single value representing the true category or class label. If the target is the predictedFeatureName of a neural network classifier it will be inverse mapped to the corresponding categorical index for you. 6471 * 6472 * math: 6473 * Loss_{CCE}(input, target) = -\sum_{i=1}^{N} (target == i) log( input[i] ) = - log (input[target]) 6474 */ 6475message CategoricalCrossEntropyLossLayer { 6476 6477 string input = 1; 6478 string target = 2; 6479 6480} 6481 6482/** 6483 * Mean squared error loss layer, 6484 * specifying input and target 6485 */ 6486message MeanSquaredErrorLossLayer { 6487 6488 string input = 1; 6489 string target = 2; 6490 6491} 6492 6493/** 6494 * Optimizer - stochastic gradient descent and adam are the only supported optimizers currently 6495 */ 6496message Optimizer { 6497 6498 oneof OptimizerType { 6499 6500 SGDOptimizer sgdOptimizer = 10; 6501 AdamOptimizer adamOptimizer = 11; 6502 6503 } 6504 6505} 6506 6507/** 6508 * Stochastic gradient descent optimizer, 6509 * specifying configurable learning rate, mini batch size, and momentum 6510 */ 6511message SGDOptimizer { 6512 6513 DoubleParameter learningRate = 1; 6514 Int64Parameter miniBatchSize = 2; 6515 DoubleParameter momentum = 3; 6516 6517} 6518 6519/** 6520 * Adam optimizer, 6521 * specifying configurable learning rate, mini batch size, betas, and eps 6522 */ 6523message AdamOptimizer { 6524 6525 DoubleParameter learningRate = 1; 6526 Int64Parameter miniBatchSize = 2; 6527 DoubleParameter beta1 = 3; 6528 DoubleParameter beta2 = 4; 6529 DoubleParameter eps = 5; 6530 6531} 6532