• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright (c) 2017-2019, Apple Inc. All rights reserved.
2//
3// Use of this source code is governed by a BSD-3-clause license that can be
4// found in LICENSE.txt or at https://opensource.org/licenses/BSD-3-Clause
5
6/**
7 * A neural network is defined through a collection of layers
8 * and represents a directed acyclic graph (DAG).
9 * Each layer has a name, a layer type,
10 * a list of input names, a list of output names,
11 * and a collection of parameters specific to the layer type.
12 *
13 * The graph structure and connectivity of the neural network
14 * is inferred from the input and output names.
15 * A neural network starts with the layer
16 * whose input name is equal to the value specified in
17 * ``Model.description.input.name``,
18 * and ends with the layer
19 * whose output name is equal to the value specified in
20 * ``Model.description.output.name``.
21 * Layers must have unique input and output names,
22 * and a layer may not have input or output names that
23 * refer to layers that are not yet defined.
24 *
25 * For Core ML specification version <=3,
26 * all inputs are mapped to static rank 5 tensors, with axis notations
27 * [Sequence, Batch, Channel, Height, Width].
28 *
29 * From specification version 4 onwards (iOS >= 13, macOS >= 10.15), more options are available
30 * (see enums ``NeuralNetworkMultiArrayShapeMapping``, ``NeuralNetworkImageShapeMapping``)
31 * to map inputs to generic N-Dimensional (or N rank) tensors, where N >= 1.
32 *
33 * Each layer type may have specific constraints on the ranks of its inputs and outputs.
34 *
35 * Some of the layers (such as softmax, reduce, etc) have parameters that have been described in
36 * terms of notational axis "Channel", "Height", "Width" or "Sequence". They can be re-interpreted easily in
37 * the general ND setting by using the following rule:
38 * "width" is same as axis = -1 (i.e. the last axis from the end)
39 * "height" is same as axis = -2 (i.e. the second last axis from the end)
40 * "channel" is same as axis = -3 (i.e. the third last axis from the end)
41 * "sequence" is same as axis = -5 (i.e. the fifth last axis from the end)
42 *
43 * Several layers are available in 3 different variations, with the names ending
44 * in identifiers: ``like``, ``static`` and ``dynamic``. For instance, ``FillLike``,
45 * ``FillStatic`` and ``FillDynamic``. The ``static`` variation generally will have
46 * a property corresponding to the shape of the output. For instance, if the
47 * output of the ``FillStatic`` layer is desired to be of shape (10, 4), the
48 * property ``targetShape`` will have to be set to [10, 4]. In the ``dynamic`` case,
49 * the shape is an input, hence it can be changed at runtime. For instance, for
50 * a ``FillDynamic`` layer, the input would have to be an array containing the
51 * values 10 and 4, if the desired output is of shape (10, 4). Whereas in the
52 * ``like`` case, the additional input's shape is used as the output shape, ignoring
53 * its values. For instance, for a ``FillLike`` layer, for an input with shape
54 * (10, 4), the output generated will also be of shape (10, 4), values of the
55 * input will be ignored.
56 */
57
58syntax = "proto3";
59option optimize_for = LITE_RUNTIME;
60
61import public "DataStructures.proto";
62import public "Parameters.proto";
63
64package CoreML.Specification;
65
66
67enum NeuralNetworkMultiArrayShapeMapping {
68
69    /*
70     * Describes how the MultiArray shape for the inputs,
71     * provided in Features Types proto via model description,
72     * is mapped to construct tensors that are fed into the Neural Network layers.
73     */
74
75    /*
76     * Default legacy value. Only supported for Core ML Specification version <= 3.
77     *
78     * The default legacy shape mapping resolves all input shapes to a rank 5 equivalent
79     * with axis notation of [Seq, Batch, Channel, Height, Width].
80     *
81     * When this enum value is selected,
82     * the repeated shape field in the message "ArrayFeatureType" in feature types proto,
83     * must be either length 1 or length 3.
84     *
85     * The following rule is used to map the values in the shape field to the actual tensor shape:
86     * rank 1 shape is mapped to shape [1,1,C,1,1]
87     * rank 3 shape is mapped to shape [1,1,C,H,W]
88     * At runtime, the first two dimensions (Seq or Batch) can be presented as well, with non-1 values.
89     *
90     * It is invalid to use this enum value if any of the layers added
91     * Specification version 4 (iOS >= 13, macOS >= 10.15) onwards are used in the network.
92     * Validator will raise an error in that case.
93     */
94    RANK5_ARRAY_MAPPING = 0;
95
96    /*
97     * The exact shape and rank (i.e. number of dimensions in the shape) of the input,
98     * as specified in the message "ArrayFeatureType", is passed through to the layers.
99     * Supported only for Specification version >= 4 (iOS >= 13, macOS >= 10.15).
100     */
101    EXACT_ARRAY_MAPPING = 1;
102
103}
104
105enum NeuralNetworkImageShapeMapping {
106
107    /*
108     * Describes how the shape of the input tensors is constructed from image inputs.
109     */
110
111    /*
112     * In this case, image input is mapped to a rank 5 tensor.
113     * For Color images, input tensor is shaped as [1,1,3,H,W].
114     * For Gray images, input tensor is shaped as [1,1,1,H,W].
115     */
116    RANK5_IMAGE_MAPPING = 0;
117
118    /*
119     * For Color images, input tensor is shaped as [1,3,H,W].
120     * For Gray images, input tensor is shaped as [1,1,H,W].
121     * Supported only for Specification version >= 4 (iOS >= 13, macOS >= 10.15).
122     */
123    RANK4_IMAGE_MAPPING = 1;
124
125}
126
127/**
128 A neural network.
129 */
130message NeuralNetwork {
131
132    repeated NeuralNetworkLayer layers = 1;
133    repeated NeuralNetworkPreprocessing preprocessing = 2;
134
135    // use this enum value to determine the input tensor shapes to the neural network, for multiarray inputs
136    NeuralNetworkMultiArrayShapeMapping arrayInputShapeMapping = 5;
137
138    // use this enum value to determine the input tensor shapes to the neural network, for image inputs
139    NeuralNetworkImageShapeMapping imageInputShapeMapping = 6;
140
141
142    NetworkUpdateParameters updateParams = 10;
143
144}
145
146/// Preprocessing
147/// -------------
148
149/**
150 * A neural network preprocessor that
151 * performs a scalar multiplication of an image
152 * followed by addition of scalar biases to the channels.
153 *
154 * Input: X
155 *    An image in BGR or RGB format with shape ``[3, H, W]``
156 *    or in grayscale format with shape ``[1, H, W]``.
157 * Output: Y
158 *    An image with format and shape corresponding to the input.
159 *
160 * If the input image is in BGR format:
161 *
162 * .. code::
163 *
164 *     Y[0, :, :] = channelScale * X[0, :, :] + blueBias
165 *     Y[1, :, :] = channelScale * X[1, :, :] + greenBias
166 *     Y[2, :, :] = channelScale * X[2, :, :] + redBias
167 *
168 * If the input image is in RGB format:
169 *
170 * .. code::
171 *
172 *     Y[0, :, :] = channelScale * X[0, :, :] + redBias
173 *     Y[1, :, :] = channelScale * X[1, :, :] + greenBias
174 *     Y[2, :, :] = channelScale * X[2, :, :] + blueBias
175 *
176 * If the input image is in grayscale format:
177 *
178 * .. code::
179 *
180 *     Y[0, :, :] = channelScale * X[0, :, :] + grayBias
181 */
182message NeuralNetworkImageScaler {
183
184    float channelScale = 10; ///Scalar to be multiplied.
185    float blueBias = 20; ///Scalar blue bias to be added.
186    float greenBias = 21; ///Scalar green bias to be added.
187    float redBias = 22; ///Scalar red bias to be added.
188    float grayBias = 30; ///Scalar bias to be added for grayscale images.
189
190}
191
192/**
193 * A neural network preprocessor that
194 * subtracts the provided mean image from the input image.
195 * The mean image is subtracted from the input named
196 * ``NeuralNetworkPreprocessing.featureName``.
197 */
198message NeuralNetworkMeanImage {
199
200    /**
201     * Mean image stored as a flattened array of floats,
202     * representing shape [Channel,Height,Width].
203     */
204    repeated float meanImage = 1;
205
206}
207
208/// Preprocessing parameters for image inputs.
209message NeuralNetworkPreprocessing {
210
211    string featureName = 1; /// must be equal to the input name to which the preprocessing is applied
212    oneof preprocessor {
213        NeuralNetworkImageScaler scaler = 10;
214        NeuralNetworkMeanImage meanImage = 11;
215    }
216
217}
218
219/// Activation Functions
220/// --------------------
221
222/**
223 * A rectified linear unit (ReLU) activation function.
224 *
225 * This function has the following formula:
226 *
227 * .. math::
228 *     f(x) = \text{max}(0, x)
229 */
230message ActivationReLU {
231
232}
233
234/**
235 * A leaky rectified linear unit (ReLU) activation function.
236 *
237 * This function has the following formula:
238 *
239 * .. math::
240 *     f(x) = \begin{cases}
241 *             x      & \text{if } x \geq 0 \\
242 *             \alpha x & \text{if } x < 0
243 *            \end{cases}
244 */
245message ActivationLeakyReLU {
246
247    float alpha = 1; //negative slope value for leakyReLU
248
249}
250
251/**
252 * A hyperbolic tangent activation function.
253 *
254 * This function has the following formula:
255 *
256 * .. math::
257 *     f(x) = \dfrac{1 - e^{-2x}}{1 + e^{-2x}}
258 */
259message ActivationTanh {
260
261}
262
263/**
264 * A scaled hyperbolic tangent activation function.
265 *
266 * This function has the following formula:
267 *
268 * .. math::
269 *     f(x) = \alpha \tanh(\beta x)
270 */
271message ActivationScaledTanh {
272
273    float alpha = 1;
274    float beta = 2;
275
276}
277
278/**
279 * A sigmoid activation function.
280 *
281 * This function has the following formula:
282 *
283 * .. math::
284 *     f(x) = \dfrac{1}{1 + e^{-x}}
285 */
286message ActivationSigmoid {
287
288}
289
290/**
291 * A linear activation function.
292 *
293 * This function has the following formula:
294 *
295 * .. math::
296 *     f(x) = \alpha x + \beta
297 */
298message ActivationLinear {
299
300    float alpha = 1;
301    float beta = 2;
302
303}
304
305/**
306 * A hard sigmoid activation function.
307 *
308 * This function has the following formula:
309 *
310 * .. math::
311 *     f(x) = \text{min}(\text{max}(\alpha x + \beta, 0), 1)
312 */
313message ActivationSigmoidHard {
314
315    float alpha = 1;
316    float beta = 2;
317
318}
319
320/**
321 * A parameterized rectified linear unit (PReLU) activation function.
322 * Input must be at least rank 3. Axis = -3 is denoted by "C", or channels.
323 * "alpha" parameter can be a vector of length C.
324 *
325 * This function has the following formula:
326 *
327 * .. math::
328 *    f(x_i) = \begin{cases}
329 *                 x_i          & \text{if } x_i \geq 0 \\
330 *                 \alpha_i x_i & \text{if } x_i < 0
331 *             \end{cases} \;,\;i=1,...,C
332 */
333message ActivationPReLU {
334
335    // parameter of length C or 1.
336    // If length is 1, same value is used for all channels
337    WeightParams alpha = 1;
338
339}
340
341/**
342 * An exponential linear unit (ELU) activation function.
343 *
344 * This function has the following formula:
345 *
346 * .. math::
347 *     f(x) = \begin{cases}
348 *             x              & \text{if } x \geq 0 \\
349 *             \alpha (e^x - 1) & \text{if } x < 0
350 *            \end{cases}
351 */
352message ActivationELU {
353
354    float alpha = 1;
355
356}
357
358/**
359 * A thresholded rectified linear unit (ReLU) activation function.
360 *
361 * This function has the following formula:
362 *
363 * .. math::
364 *     f(x) = \begin{cases}
365 *             x & \text{if } x \geq \alpha \\
366 *             0 & \text{if } x < \alpha
367 *            \end{cases}
368 */
369message ActivationThresholdedReLU {
370
371    float alpha = 1;
372
373}
374
375/**
376 * A softsign activation function.
377 *
378 * This function has the following formula:
379 *
380 * .. math::
381 *     f(x) = \dfrac{x}{1 + |x|}
382 */
383message ActivationSoftsign {
384
385}
386
387/**
388 * A softplus activation function.
389 *
390 * This function has the following formula:
391 *
392 * .. math::
393 *     f(x) = \text{log}(1 + e^x)
394 */
395message ActivationSoftplus {
396
397}
398
399/**
400 * A parametric softplus activation function.
401 * Input must be at least rank 3. axis = -3 is denoted by "C", or channels.
402 * "alpha"/"beta" parameter can be a vector of length C.
403 *
404 * This function has the following formula:
405 *
406 * .. math::
407 *     f(x_i) = \alpha_i \text{log}(1 + e^{\beta_i x_i}) \;,\;i=1,...,C
408 */
409message ActivationParametricSoftplus {
410
411    // If length is 1, same value is used for all channels
412    WeightParams alpha = 1; //parameter of length C or 1
413    WeightParams beta = 2; //parameter of length C or 1
414
415}
416
417message ActivationParams {
418
419    oneof NonlinearityType {
420        ActivationLinear linear = 5;
421
422        ActivationReLU ReLU = 10;
423        ActivationLeakyReLU leakyReLU = 15;
424        ActivationThresholdedReLU thresholdedReLU = 20;
425        ActivationPReLU PReLU = 25;
426
427        ActivationTanh tanh = 30;
428        ActivationScaledTanh scaledTanh = 31;
429
430        ActivationSigmoid sigmoid = 40;
431        ActivationSigmoidHard sigmoidHard = 41;
432
433        ActivationELU ELU = 50;
434
435        ActivationSoftsign softsign = 60;
436        ActivationSoftplus softplus = 70;
437        ActivationParametricSoftplus parametricSoftplus = 71;
438    }
439
440}
441
442/**
443 * Representation of the intermediate tensors
444 */
445message Tensor {
446
447    // Number of dimensions in the tensor shape
448    uint32 rank = 1;
449    // actual value of the tensor shape.
450    // must be of length "rank". Can contain -1s for unknown dimensions.
451    repeated int64 dimValue = 2;
452
453}
454
455/**
456 * A single neural network layer.
457 */
458message NeuralNetworkLayer {
459
460    string name = 1; //descriptive name of the layer
461    repeated string input = 2;
462    repeated string output = 3;
463
464    repeated Tensor inputTensor = 4; // must be the same length as the "input" field
465    repeated Tensor outputTensor = 5; // must be the same length as the "output" field
466
467    // Must be set to true to mark the layer as updatable.
468    // If true, the weightParams in the layer's properties must also be set to updatable
469    // If false, the value of the isUpdatable parameter within the layer's weights are ignored
470    bool isUpdatable = 10;
471
472    oneof layer {
473
474        // Start at 100 here
475        ConvolutionLayerParams convolution = 100;
476
477        PoolingLayerParams pooling = 120;
478
479        ActivationParams activation = 130;
480
481        InnerProductLayerParams innerProduct = 140;
482        EmbeddingLayerParams embedding = 150;
483
484        // Normalization-related Layers
485        BatchnormLayerParams batchnorm = 160;
486        MeanVarianceNormalizeLayerParams mvn = 165;
487        L2NormalizeLayerParams l2normalize = 170;
488        SoftmaxLayerParams softmax = 175;
489        LRNLayerParams lrn = 180;
490
491        CropLayerParams crop = 190;
492        PaddingLayerParams padding = 200;
493        UpsampleLayerParams upsample = 210;
494
495        ResizeBilinearLayerParams resizeBilinear = 211;
496        CropResizeLayerParams cropResize = 212;
497
498        UnaryFunctionLayerParams unary = 220;
499
500        // Element-wise Operations
501        AddLayerParams add = 230;
502        MultiplyLayerParams multiply = 231;
503
504        AverageLayerParams average = 240;
505        ScaleLayerParams scale = 245;
506
507        BiasLayerParams bias = 250;
508        MaxLayerParams max = 260;
509        MinLayerParams min = 261;
510
511        DotProductLayerParams dot = 270;
512        ReduceLayerParams reduce = 280;
513        LoadConstantLayerParams loadConstant = 290;
514
515        // Data Reorganization
516        ReshapeLayerParams reshape = 300;
517        FlattenLayerParams flatten = 301;
518        PermuteLayerParams permute = 310;
519        ConcatLayerParams concat = 320;
520        SplitLayerParams split = 330;
521        SequenceRepeatLayerParams sequenceRepeat = 340;
522
523        ReorganizeDataLayerParams reorganizeData = 345;
524        SliceLayerParams slice = 350;
525
526        // Recurrent Layers
527        SimpleRecurrentLayerParams simpleRecurrent = 400;
528        GRULayerParams gru = 410;
529        UniDirectionalLSTMLayerParams uniDirectionalLSTM = 420;
530        BiDirectionalLSTMLayerParams biDirectionalLSTM = 430;
531
532        // Custom (user-implemented) Layer
533        CustomLayerParams custom = 500;
534
535        // Following layers are available only after Core ML Specification
536        // version >= 4 (iOS >= 13, macOS >= 10.15)
537
538        // Control Flow related Layers
539        CopyLayerParams copy = 600;
540        BranchLayerParams branch = 605;
541
542        LoopLayerParams loop = 615;
543        LoopBreakLayerParams loopBreak = 620;
544        LoopContinueLayerParams loopContinue = 625;
545
546        RangeStaticLayerParams rangeStatic = 635;
547        RangeDynamicLayerParams rangeDynamic = 640;
548
549        // Element-wise Unary Layers
550        ClipLayerParams clip = 660;
551        CeilLayerParams ceil = 665;
552        FloorLayerParams floor = 670;
553
554        SignLayerParams sign = 680;
555        RoundLayerParams round = 685;
556
557        Exp2LayerParams exp2 = 700;
558
559        SinLayerParams sin = 710;
560        CosLayerParams cos = 715;
561        TanLayerParams tan = 720;
562
563        AsinLayerParams asin = 730;
564        AcosLayerParams acos = 735;
565        AtanLayerParams atan = 740;
566
567        SinhLayerParams sinh = 750;
568        CoshLayerParams cosh = 755;
569        TanhLayerParams tanh = 760;
570
571        AsinhLayerParams asinh = 770;
572        AcoshLayerParams acosh = 775;
573        AtanhLayerParams atanh = 780;
574
575        ErfLayerParams erf = 790;
576        GeluLayerParams gelu = 795;
577
578        // Element-wise Binary with Broadcasting Support
579        EqualLayerParams equal = 815;
580        NotEqualLayerParams notEqual = 820;
581        LessThanLayerParams lessThan = 825;
582        LessEqualLayerParams lessEqual = 827;
583        GreaterThanLayerParams greaterThan = 830;
584        GreaterEqualLayerParams greaterEqual = 832;
585
586        LogicalOrLayerParams logicalOr = 840;
587        LogicalXorLayerParams logicalXor = 845;
588        LogicalNotLayerParams logicalNot = 850;
589        LogicalAndLayerParams logicalAnd = 855;
590
591        ModBroadcastableLayerParams modBroadcastable = 865;
592        MinBroadcastableLayerParams minBroadcastable = 870;
593        MaxBroadcastableLayerParams maxBroadcastable = 875;
594        AddBroadcastableLayerParams addBroadcastable = 880;
595        PowBroadcastableLayerParams powBroadcastable = 885;
596        DivideBroadcastableLayerParams divideBroadcastable = 890;
597        FloorDivBroadcastableLayerParams floorDivBroadcastable = 895;
598        MultiplyBroadcastableLayerParams multiplyBroadcastable = 900;
599        SubtractBroadcastableLayerParams subtractBroadcastable = 905;
600
601        // Tensor Manipulations
602        TileLayerParams tile = 920;
603        StackLayerParams stack = 925;
604        GatherLayerParams gather = 930;
605        ScatterLayerParams scatter = 935;
606        GatherNDLayerParams gatherND = 940;
607        ScatterNDLayerParams scatterND = 945;
608        SoftmaxNDLayerParams softmaxND = 950;
609        GatherAlongAxisLayerParams gatherAlongAxis = 952;
610        ScatterAlongAxisLayerParams scatterAlongAxis = 954;
611
612        ReverseLayerParams reverse = 960;
613        ReverseSeqLayerParams reverseSeq = 965;
614
615        SplitNDLayerParams splitND = 975;
616        ConcatNDLayerParams concatND = 980;
617        TransposeLayerParams transpose = 985;
618
619        SliceStaticLayerParams sliceStatic = 995;
620        SliceDynamicLayerParams sliceDynamic = 1000;
621        SlidingWindowsLayerParams slidingWindows = 1005;
622
623        TopKLayerParams topK = 1015;
624        ArgMinLayerParams argMin = 1020;
625        ArgMaxLayerParams argMax = 1025;
626
627        EmbeddingNDLayerParams embeddingND = 1040;
628        BatchedMatMulLayerParams batchedMatmul = 1045;
629
630        // Tensor Allocation / Reshape-related Operations
631        GetShapeLayerParams getShape = 1065;
632        LoadConstantNDLayerParams loadConstantND = 1070;
633
634        FillLikeLayerParams fillLike = 1080;
635        FillStaticLayerParams fillStatic = 1085;
636        FillDynamicLayerParams fillDynamic = 1090;
637
638        BroadcastToLikeLayerParams broadcastToLike = 1100;
639        BroadcastToStaticLayerParams broadcastToStatic = 1105;
640        BroadcastToDynamicLayerParams broadcastToDynamic = 1110;
641
642        SqueezeLayerParams squeeze = 1120;
643        ExpandDimsLayerParams expandDims = 1125;
644        FlattenTo2DLayerParams flattenTo2D = 1130;
645        ReshapeLikeLayerParams reshapeLike = 1135;
646        ReshapeStaticLayerParams reshapeStatic = 1140;
647        ReshapeDynamicLayerParams reshapeDynamic = 1145;
648        RankPreservingReshapeLayerParams rankPreservingReshape = 1150;
649
650        ConstantPaddingLayerParams constantPad = 1155;
651
652        // Random Distributions
653        RandomNormalLikeLayerParams randomNormalLike = 1170;
654        RandomNormalStaticLayerParams randomNormalStatic = 1175;
655        RandomNormalDynamicLayerParams randomNormalDynamic = 1180;
656
657        RandomUniformLikeLayerParams randomUniformLike = 1190;
658        RandomUniformStaticLayerParams randomUniformStatic = 1195;
659        RandomUniformDynamicLayerParams randomUniformDynamic = 1200;
660
661        RandomBernoulliLikeLayerParams randomBernoulliLike = 1210;
662        RandomBernoulliStaticLayerParams randomBernoulliStatic = 1215;
663        RandomBernoulliDynamicLayerParams randomBernoulliDynamic = 1220;
664
665        CategoricalDistributionLayerParams categoricalDistribution = 1230;
666
667        // Reduction-related Layers:
668        ReduceL1LayerParams reduceL1 = 1250;
669        ReduceL2LayerParams reduceL2 = 1255;
670        ReduceMaxLayerParams reduceMax = 1260;
671        ReduceMinLayerParams reduceMin = 1265;
672        ReduceSumLayerParams reduceSum = 1270;
673        ReduceProdLayerParams reduceProd = 1275;
674        ReduceMeanLayerParams reduceMean = 1280;
675        ReduceLogSumLayerParams reduceLogSum = 1285;
676        ReduceSumSquareLayerParams reduceSumSquare = 1290;
677        ReduceLogSumExpLayerParams reduceLogSumExp = 1295;
678
679        // Masking / Selection Layers
680        WhereNonZeroLayerParams whereNonZero = 1313;
681        MatrixBandPartLayerParams matrixBandPart = 1315;
682        LowerTriangularLayerParams lowerTriangular = 1320;
683        UpperTriangularLayerParams upperTriangular = 1325;
684        WhereBroadcastableLayerParams whereBroadcastable = 1330;
685
686        // Normalization Layers
687        LayerNormalizationLayerParams layerNormalization = 1350;
688
689        NonMaximumSuppressionLayerParams NonMaximumSuppression = 1400;
690
691        // Following layers are available only after Core ML Specification
692        // version >= 5 (iOS >= 14, macOS >= 11.0)
693        OneHotLayerParams oneHot = 1450;
694        CumSumLayerParams cumSum = 1455;
695        ClampedReLULayerParams clampedReLU = 1460;
696        ArgSortLayerParams argSort = 1461;
697        Pooling3DLayerParams pooling3d = 1465;
698        GlobalPooling3DLayerParams globalPooling3d = 1466;
699        SliceBySizeLayerParams sliceBySize = 1470;
700        Convolution3DLayerParams convolution3d = 1471;
701
702    }
703
704}
705
706/**
707 * Branching Layer
708 *
709 * A layer that provides the functionality of branching or an If-Else block.
710 *
711 * Must have 1 input. There are no outputs as the execution is transferred to either the
712 * if or the else branch based on the value of the input.
713 *
714 * Input is the condition predicate. Must be a scalar (length 1 tensor).
715 *
716 */
717message BranchLayerParams {
718
719    /**
720     * execute this graph if the absolute value of the input Tensor is greater than 1e-6
721     * This must be present.
722     */
723    NeuralNetwork ifBranch = 1;
724    /**
725     * execute this graph if the absolute value of the input Tensor is less than 1e-6
726     * This is optional.
727     */
728    NeuralNetwork elseBranch = 2;
729
730}
731
732/**
733 * Loop Layer
734 *
735 * A layer that provides the functionality of a "for" loop or a "while" loop.
736 *
737 * There are either no inputs or 1 input. When an input is present, it corresponds to the maximum loop count,
738 * in that case the value of the "maxLoopIterations" field is ignored. Input must be a scalar.
739 * (For description below, maxLoopIterations is assumed to be the value of the input, when its present)
740 *
741 * No outputs are produced. Blobs produced by the condition or the body network are visible in the scope of the overall network.
742 *
743 * "conditionNetwork" must produce a tensor with the name specified in the "conditionVar" field.
744 *
745 * There are 3 possible cases for determining the termination condition:
746 *
747 * Case 1:
748 *
749 * If there is no "conditionNetwork", in this case the layer corresponds to a pure for loop, which is run "maxLoopIterations" number of times.
750 * Equivalent pseudo-code:
751 *
752 * for loopIterator = 0 : maxLoopIterations
753 *      bodyNetwork()
754 *
755 *
756 * Case 2:
757 *
758 * "conditionNetwork" is present, and "maxLoopIterations" is 0 and there is no input,
759 * in this case the layer corresponds to a while loop. Equivalent pseudo-code:
760 *
761 * conditionVar = conditionNetwork()
762 * while conditionVar:
763 *      bodyNetwork()
764 *      conditionVar = conditionNetwork()
765 *
766 *
767 * Case 3:
768 *
769 * "conditionNetwork" is provided, and "maxLoopIterations" is positive or there is an input,
770 * in this case the layer corresponds to a while loop with a joint condition. Equivalent pseudo-code:
771 *
772 * loopIterator = 0
773 * conditionVar = conditionNetwork()
774 * while (conditionVar and loopIterator < maxLoopIterations):
775 *      bodyNetwork()
776 *      loopIterator = loopIterator + 1
777 *      conditionVar = conditionNetwork()
778 *
779 */
780message LoopLayerParams {
781
782    /**
783     * maximum number of iterations. Ignored if input is present.
784     */
785    uint64 maxLoopIterations = 1;
786    /**
787     * This field provides the name of the tensor which is produced by the conditionNetwork
788     * and whose value is checked to start/continue/terminate the loop. Value close to 0.0f is treated as False.
789     * This field is optional.
790     * Must be a non empty string if and only if "conditionNetwork" is present.
791     */
792    string conditionVar = 2;
793    /**
794     * Must generate a tensor with the name provided in the "conditionVar" field.
795     * This field is optional.
796     * Must be present if and only if "conditionVar" field is a non empty string.
797     */
798    NeuralNetwork conditionNetwork = 3;
799    /**
800     * Body of the loop.
801     * This field must be present.
802     */
803    NeuralNetwork bodyNetwork = 4;
804
805}
806
807/**
808 * Loop break Layer
809 *
810 * Terminate the loop that has this layer.
811 * If present, it should always reside in the "bodyNetwork" of the loop layer
812 *
813 * No inputs/outputs
814 *
815 */
816message LoopBreakLayerParams {
817
818}
819
820/**
821 * Loop Continue Layer
822 *
823 * Stop the current loop iteration and continue on the next iteration.
824 * If present, it should always reside in the "bodyNetwork" of the loop layer
825 *
826 * No inputs/outputs
827 *
828 */
829message LoopContinueLayerParams {
830
831}
832
833/**
834 * Copy Layer
835 *
836 * A layer that copies its input tensor to the output tensor.
837 * Must have 1 input and 1 output, with distinct names.
838 * This is the only layer that is allowed to re-generate an output that is already present in the neural network prior to this layer,
839 * in which case it will overwrite the output tensor.
840 *
841 */
842message CopyLayerParams {
843
844}
845
846/**
847 * GreaterThan Layer
848 *
849 * Either 1 or 2 inputs.
850 * Produces 1 output.
851 * Perform elementwise greater than operation.
852 *
853 * Output is 1.0f if the condition is true otherwise 0.0f.
854 *
855 * .. code::
856 *
857 *      y = x1 > x2
858 *          or
859 *      y = x1 > alpha, if only one input is provided
860 *
861 * Broadcasting is supported.
862 *
863 */
864message GreaterThanLayerParams {
865
866    /**
867     * Compare to the scalar value provided here if there is 1 input
868     */
869    float alpha = 2;
870
871}
872
873/**
874 * GreaterEqual Layer
875 *
876 * Either 1 or 2 inputs.
877 * Produces 1 output.
878 * Perform elementwise greater equal operation.
879 *
880 * Output is 1.0f if the condition is true otherwise 0.0f.
881 *
882 * .. code::
883 *
884 *      y = x1 >= x2
885 *          or
886 *      y = x1 >= alpha, if only one input is provided
887 *
888 * Broadcasting is supported.
889 *
890 */
891message GreaterEqualLayerParams {
892
893    /**
894     * Compare to the scalar value provided here if there is 1 input
895     */
896    float alpha = 2;
897
898}
899
900/**
901 * LessThan Layer
902 *
903 * Either 1 or 2 inputs.
904 * Produces 1 output.
905 * Perform elementwise less than operation.
906 *
907 * Output is 1.0f if the condition is true otherwise 0.0f.
908 *
909 * .. code::
910 *
911 *      y = x1 < x2
912 *          or
913 *      y = x1 < alpha, if only one input is provided
914 *
915 * Broadcasting is supported.
916 *
917 */
918message LessThanLayerParams {
919
920    /**
921     * Compare to the scalar value provided here if there is 1 input
922     */
923    float alpha = 2;
924
925}
926
927/**
928 * LessEqual Layer
929 *
930 * Either 1 or 2 inputs.
931 * Produces 1 output.
932 * Perform elementwise less equal operation.
933 *
934 * Output is 1.0f if the condition is true otherwise 0.0f.
935 *
936 * .. code::
937 *
938 *      y = x1 <= x2
939 *          or
940 *      y = x1 <= alpha, if only one input is provided
941 *
942 * Broadcasting is supported.
943 *
944 */
945message LessEqualLayerParams {
946
947    /**
948     * Compare to the scalar value provided here if there is 1 input
949     */
950    float alpha = 2;
951
952}
953
954/**
955 * Equal Layer
956 *
957 * Either 1 or 2 inputs.
958 * Produces 1 output.
959 * Perform elementwise equal operation.
960 *
961 * Output is 1.0f if the condition is true otherwise 0.0f.
962 *
963 * .. code::
964 *
965 *      y = x1 == x2
966 *          or
967 *      y = x1 == alpha, if only one input is provided
968 *
969 * Broadcasting is supported.
970 *
971 */
972message EqualLayerParams {
973
974    /**
975     * Compare to the scalar value provided here if there is 1 input
976     */
977    float alpha = 1;
978
979}
980
981/**
982 * NotEqual Layer
983 *
984 * Either 1 or 2 inputs.
985 * Produces 1 output.
986 * Perform elementwise not equal operation.
987 *
988 * Output is 1.0f if the condition is true otherwise 0.0f.
989 *
990 * .. code::
991 *
992 *      y = x1 != x2
993 *          or
994 *      y = x1 != alpha, if only one input is provided
995 *
996 * Broadcasting is supported.
997 *
998 */
999message NotEqualLayerParams {
1000
1001    /**
1002     * Compare to the scalar value provided here if there is 1 input
1003     */
1004    float alpha = 1;
1005
1006}
1007
1008/**
1009 * LogicalAnd Layer
1010 *
1011 * Must have 2 inputs, produces 1 output.
1012 * Perform elementwise logical AND operation.
1013 *
1014 * Input is considered False if equal to 0.0f otherwise True.
1015 * Output is 1.0f if the condition is true otherwise 0.0f.
1016 *
1017 * .. code::
1018 *
1019 *      y = AND(x1, x2)
1020 *
1021 * Broadcasting is supported.
1022 *
1023 */
1024message LogicalAndLayerParams {
1025
1026}
1027
1028/**
1029 * LogicalOr Layer
1030 *
1031 * Must have 2 inputs, produces 1 output.
1032 * Perform elementwise logical OR operation.
1033 *
1034 * Input is considered False if equal to 0.0f otherwise True.
1035 * Output is 1.0f if the condition is true otherwise 0.0f.
1036 *
1037 * .. code::
1038 *
1039 *      y = OR(x1, x2)
1040 *
1041 * Broadcasting is supported.
1042 *
1043 */
1044message LogicalOrLayerParams {
1045
1046}
1047
1048/**
1049 * LogicalXor Layer
1050 *
1051 * Must have 2 inputs, produces 1 output.
1052 * Perform elementwise logical XOR operation.
1053 *
1054 * Input is considered False if equal to 0.0f otherwise True.
1055 * Output is 1.0f if the condition is true otherwise 0.0f.
1056 *
1057 * .. code::
1058 *
1059 *      y = XOR(x1, x2)
1060 *
1061 * Broadcasting is supported.
1062 *
1063 */
1064message LogicalXorLayerParams {
1065
1066}
1067
1068/**
1069 * LogicalNot Layer
1070 *
1071 * Must have 1 input, produces 1 output.
1072 * Perform elementwise logical NOT operation.
1073 *
1074 * Input is considered False if equal to 0.0f otherwise True.
1075 * Output is 1.0f if the condition is true otherwise 0.0f.
1076 *
1077 * .. code::
1078 *
1079 *      y = NOT(x)
1080 *
1081 *
1082 */
1083message LogicalNotLayerParams {
1084
1085}
1086
1087/// Border Amounts
1088/// --------------
1089
1090/**
1091 * Specifies the amount of spatial border to be either padded or cropped.
1092 *
1093 * For padding:
1094 *
1095 * .. code::
1096 *
1097 *     H_out = borderAmounts[0].startEdgeSize + H_in + borderAmounts[0].endEdgeSize
1098 *     W_out = borderAmounts[1].startEdgeSize + W_in + borderAmounts[1].endEdgeSize
1099 *
1100 *     topPaddingAmount == Height startEdgeSize
1101 *     bottomPaddingAmount == Height endEdgeSize
1102 *     leftPaddingAmount == Width startEdgeSize
1103 *     rightPaddingAmount == Width endEdgeSize
1104 *
1105 * For cropping:
1106 *
1107 * .. code::
1108 *
1109 *     H_out = (-borderAmounts[0].startEdgeSize) + H_in + (-borderAmounts[0].endEdgeSize)
1110 *     W_out = (-borderAmounts[1].startEdgeSize) + W_in + (-borderAmounts[1].endEdgeSize)
1111 *
1112 *     topCropAmount == Height startEdgeSize
1113 *     bottomCropAmount == Height endEdgeSize
1114 *     leftCropAmount == Width startEdgeSize
1115 *     rightCropAmount == Width endEdgeSize
1116 */
1117message BorderAmounts {
1118
1119    message EdgeSizes {
1120        /**
1121         * The amount to be padded or cropped from the beginning.
1122         */
1123        uint64 startEdgeSize = 1;
1124
1125        /**
1126         * The amount to be padded or cropped from the end.
1127         */
1128        uint64 endEdgeSize = 2;
1129    }
1130
1131    /**
1132     * The border amounts.
1133     * This must be length 2 in the order ``[H, W]``.
1134     */
1135    repeated EdgeSizes borderAmounts = 10;
1136
1137}
1138
1139/**
1140 * Specifies the type of padding to be used with Convolution/Deconvolution and Pooling layers.
1141 * After padding, input spatial shape: ``[H_in, W_in]``, gets modified to the
1142 * output spatial shape ``[H_out, W_out]``.
1143 *
1144 * .. code::
1145 *
1146 *      topPaddingAmount == Height startEdgeSize == borderAmounts[0].startEdgeSize
1147 *      bottomPaddingAmount == Height endEdgeSize == borderAmounts[0].endEdgeSize
1148 *      leftPaddingAmount == Width startEdgeSize == borderAmounts[1].startEdgeSize
1149 *      rightPaddingAmount == Width endEdgeSize == borderAmounts[1].endEdgeSize
1150 *
1151 * With Convolution or Pooling:
1152 *
1153 * .. code::
1154 *
1155 *    H_out = int_division_round_down((H_in + topPaddingAmount + bottomPaddingAmount - KernelSize[0]),stride[0]) + 1
1156 *
1157 * which is same as:
1158 *
1159 * .. code::
1160 *
1161 *    H_out = int_division_round_up((H_in + topPaddingAmount + bottomPaddingAmount - KernelSize[0] + 1),stride[0])
1162 *
1163 * With Deconvolution:
1164 *
1165 * .. code::
1166 *
1167 *    H_out = (H_in-1) * stride[0] + kernelSize[0] - (topPaddingAmount + bottomPaddingAmount)
1168 *
1169 *
1170 * The equivalent expressions hold true for ``W_out`` as well.
1171 *
1172 *
1173 * By default, the values of ``paddingAmounts`` are set to ``0``,
1174 * which results in a "true" valid padding.
1175 * If non-zero values are provided for ``paddingAmounts``,
1176 * "valid" convolution/pooling is performed within the spatially expanded input.
1177 *
1178 */
1179message ValidPadding {
1180
1181    BorderAmounts paddingAmounts = 1;
1182
1183}
1184
1185/**
1186 * Specifies the type of padding to be used with Convolution/Deconvolution and pooling layers.
1187 * After padding, input spatial shape: ``[H_in, W_in]``, gets modified to the
1188 * output spatial shape ``[H_out, W_out]``.
1189 * With Convolution or pooling:
1190 *
1191 * .. code::
1192 *
1193 *      H_out = int_division_round_up(H_in,stride[0])
1194 *      W_out = int_division_round_up(W_in,stride[1])
1195 *
1196 * This is achieved by using the following padding amounts:
1197 *
1198 * .. code::
1199 *
1200 *     totalPaddingHeight = max(0,(H_out-1) * stride[0] + KernelSize[0] - Hin)
1201 *     totalPaddingWidth = max(0,(W_out-1) * stride[1] + KernelSize[1] - Win)
1202 *
1203 * There are two modes of asymmetry:
1204 * ``BOTTOM_RIGHT_HEAVY``, and ``TOP_LEFT_HEAVY``.
1205 *
1206 * If the mode is ``BOTTOM_RIGHT_HEAVY``:
1207 *
1208 * .. code::
1209 *
1210 *     topPaddingAmount = floor(totalPaddingHeight / 2)
1211 *     bottomPaddingAmount = totalPaddingHeight - topPaddingAmount
1212 *     leftPaddingAmount = floor(totalPaddingWidth / 2)
1213 *     rightPaddingAmount = totalPaddingWidth - leftPaddingAmount
1214 *
1215 * If the mode is ``TOP_LEFT_HEAVY``:
1216 *
1217 * .. code::
1218 *
1219 *     bottomPaddingAmount = floor(totalPaddingHeight / 2)
1220 *     topPaddingAmount = totalPaddingHeight - bottomPaddingAmount
1221 *     rightPaddingAmount = floor(totalPaddingWidth / 2)
1222 *     leftPaddingAmount = totalPaddingWidth - rightPaddingAmount
1223 *
1224 *
1225 * With Deconvolution:
1226 *
1227 * .. code::
1228 *
1229 *    H_out = H_in * stride[0]
1230 *    W_out = W_in * stride[1]
1231 */
1232message SamePadding {
1233
1234    enum SamePaddingMode {
1235
1236        BOTTOM_RIGHT_HEAVY = 0;
1237        TOP_LEFT_HEAVY = 1;
1238
1239    }
1240    SamePaddingMode asymmetryMode = 1;
1241
1242}
1243
1244/**
1245 * Specifies how grid points are sampled from an interval.
1246 * Without the loss of generality, assume the interval to be [0, X-1] from which N points are to be sampled.
1247 * Here X may correspond to an input image's height or width.
1248 * All the methods can be expressed in terms of numpy's linspace function, along with the constraint that grid points have to lie in the interval [0, X-1].
1249 * Note: numpy.linspace(start = start, end = end, num = N, endpoint = True) corresponds to sampling
1250 * N points uniformly from the interval [start, end], endpoints included.
1251 * The methods vary in how the ``start`` and ``end`` values are computed.
1252 */
1253message SamplingMode {
1254
1255    enum Method {
1256
1257        /**
1258         * start = 0, end = X-1
1259         * grid points = numpy.linspace(start, end)
1260         */
1261        STRICT_ALIGN_ENDPOINTS_MODE = 0;
1262
1263        /**
1264         * if N == 1: start = end = (X-1)/2
1265         * otherwise, start = 0, end = X-1
1266         * grid points = numpy.linspace(start, end)
1267         */
1268        ALIGN_ENDPOINTS_MODE = 1;
1269
1270        /**
1271         * start = 0, end = X - X/N
1272         * grid points = min(X-1, numpy.linspace(start, end))
1273         * This is same as the mode used in the upsample layer in this specification, when used with bilinear interpolation. In that case N/X = upsample ratio.
1274         */
1275        UPSAMPLE_MODE = 2;
1276
1277        /**
1278         * spacing = max(1, X-1)/N
1279         * start = 0.5 * spacing
1280         * end = start + (N-1) * spacing
1281         * grid points = min(X-1, numpy.linspace(start, end))
1282         */
1283        ROI_ALIGN_MODE = 3;
1284
1285    }
1286
1287    Method samplingMethod = 1;
1288
1289}
1290
1291/**
1292 * Specifies the convention used to specify four bounding box coordinates for an image of size (Height, Width).
1293 * The (0,0) coordinate corresponds to the top-left corner of the image.
1294 */
1295message BoxCoordinatesMode {
1296
1297    enum Coordinates {
1298
1299        /**
1300         * [h_start, w_start, h_end, w_end]
1301         */
1302        CORNERS_HEIGHT_FIRST = 0;
1303
1304        /**
1305         * [w_start, h_start, w_end, h_end]
1306         */
1307        CORNERS_WIDTH_FIRST = 1;
1308
1309        /**
1310         * [h_center, w_center, box_height, box_width]
1311         */
1312        CENTER_SIZE_HEIGHT_FIRST = 2;
1313
1314        /**
1315         * [w_center, h_center, box_width, box_height]
1316         */
1317        CENTER_SIZE_WIDTH_FIRST = 3;
1318
1319    }
1320
1321    Coordinates boxMode = 1;
1322
1323}
1324
1325/**
1326 * Weights for layer parameters.
1327 * Weights are stored as repeated floating point numbers
1328 * using row-major ordering
1329 * and can represent 1-, 2-, 3-, or 4-dimensional data.
1330 */
1331message WeightParams {
1332
1333    /**
1334     * Values specified in single / float / FP32 precision.
1335     */
1336    repeated float floatValue = 1;
1337
1338    /**
1339     * Values in 16-bit half precision floating point.
1340     */
1341    bytes float16Value = 2;
1342
1343    /**
1344     * Raw value specification for quantized lower precisions.
1345     *
1346     * This field is interpreted as uintN, where N is the number of bits in quantization.
1347     * E.g. if n=8, the field is interpreted as an array of UINT8.
1348     * Use this field for quantized parameters unless specifically noted to use
1349     * int8RawValue.
1350     */
1351    bytes rawValue = 30;
1352
1353    /**
1354     * Field to be used if int8DynamicQuantize is set in the parent layer.
1355     * Cannot be set if rawValue is also set.
1356     * The values in this field are interpreted as INT8.
1357     *
1358     * If this field is set, following conditions must hold true:
1359     * * QuantizationType == LinearQuantizationParams, such that
1360     *   * size of the "scale" field is 1 and "bias" field is empty in "LinearQuantizationParams"
1361     */
1362    bytes int8RawValue = 31;
1363
1364    /**
1365     * Quantization related parameters.
1366     */
1367    QuantizationParams quantization = 40;
1368
1369    bool isUpdatable = 50;
1370
1371}
1372
1373/**
1374 * Quantization parameters.
1375 */
1376message QuantizationParams {
1377
1378    uint64 numberOfBits = 1;
1379    oneof QuantizationType {
1380        LinearQuantizationParams linearQuantization = 101;
1381        LookUpTableQuantizationParams lookupTableQuantization = 102;
1382    }
1383
1384}
1385
1386message LinearQuantizationParams {
1387
1388    /**
1389     * Stores scale and bias values corresponding to the quantized weights.
1390     * Must be an array of 1 element, or an array of C elements, where C
1391     * is number of output channels. For recurrent layers it is equal to
1392     * the output vector size.
1393     *
1394     * Relationship between quantized weights, unquantized weights, scale and bias:
1395     *
1396     * W_unquantized = W_quantized * scale + bias
1397     *
1398     */
1399    repeated float scale = 1;
1400    repeated float bias = 2;
1401
1402}
1403
1404message LookUpTableQuantizationParams {
1405
1406    /* Stores look-up table quantization values. Must be an array of
1407    (2^numberOfBits) Elements.
1408    */
1409    repeated float floatValue = 1;
1410
1411}
1412
1413/// Layers
1414/// ------
1415
1416/**
1417 * A layer that performs spatial convolution or deconvolution.
1418 *
1419 * .. code::
1420 *
1421 *      y = ConvolutionLayer(x)
1422 *
1423 * Requires 1 or 2 inputs and produces 1 output.
1424 *
1425 * Input
1426 *    First Input:
1427 *      A blob with rank greater than or equal to 4.
1428 *      Rank 4 blob represents [Batch, channels, height, width].
1429 *      For ranks greater than 4, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch.
1430 *
1431 *     From Core ML specification version 4 onwards (iOS >= 13, macOS >= 10.15).
1432 *     convolution layer can have 2 inputs, in which case the second input is
1433 *     the blob representing the weights. This is allowed when "isDeconvolution" = False.
1434 *     The weight blob should have shape
1435 *     ``[outputChannels, kernelChannels, kernelHeight, kernelWidth]``,
1436 *     where kernelChannels == inputChannels / nGroups.
1437 *
1438 * Output
1439 *   Rank is same as the input. e.g.: for rank 4 input, output shape is [B, C_out, H_out, W_out]
1440 *
1441 *
1442 * If ``dilationFactor`` is not 1, effective kernel size is
1443 * modified as follows:
1444 *
1445 * .. code::
1446 *
1447 *      KernelSize[0] <-- (kernelSize[0]-1) * dilationFactor[0] + 1
1448 *      KernelSize[1] <-- (kernelSize[1]-1) * dilationFactor[1] + 1
1449 *
1450 * Type of padding can be ``valid`` or ``same``. Output spatial dimensions depend on the
1451 * the type of padding. For details, refer to the descriptions of the messages "ValidPadding"
1452 * and "SamePadding". Padded values are all zeros.
1453 *
1454 * For Deconvolution, ``ConvolutionPaddingType`` (``valid`` or ``same``) is ignored when ``outputShape`` is set.
1455 *
1456 *
1457 */
1458message ConvolutionLayerParams {
1459
1460    /**
1461     * The number of kernels.
1462     * Same as ``C_out`` used in the layer description.
1463     */
1464    uint64 outputChannels = 1;
1465
1466    /**
1467     * Channel dimension of the kernels.
1468     * Must be equal to ``inputChannels / nGroups``, if isDeconvolution == False
1469     * Must be equal to ``inputChannels``, if isDeconvolution == True
1470     */
1471    uint64 kernelChannels = 2;
1472
1473    /**
1474     * Group convolution, i.e. weight reuse along channel axis.
1475     * Input and kernels are divided into g groups
1476     * and convolution / deconvolution is applied within the groups independently.
1477     * If not set or 0, it is set to the default value 1.
1478     */
1479    uint64 nGroups = 10;
1480
1481    /**
1482     * Must be length 2 in the order ``[H, W]``.
1483     * If not set, default value ``[3, 3]`` is used.
1484     */
1485    repeated uint64 kernelSize = 20;
1486
1487    /**
1488     * Must be length 2 in the order ``[H, W]``.
1489     * If not set, default value ``[1, 1]`` is used.
1490     */
1491    repeated uint64 stride = 30;
1492
1493    /**
1494     * Must be length 2 in order ``[H, W]``.
1495     * If not set, default value ``[1, 1]`` is used.
1496     * It is ignored if ``isDeconvolution == true``.
1497     */
1498    repeated uint64 dilationFactor = 40;
1499
1500    /**
1501     * The type of padding.
1502     */
1503    oneof ConvolutionPaddingType {
1504        ValidPadding valid = 50;
1505        SamePadding same = 51;
1506    }
1507
1508    /**
1509     * Flag to specify whether it is a deconvolution layer.
1510     */
1511    bool isDeconvolution = 60;
1512
1513    /**
1514     * Flag to specify whether a bias is to be added or not.
1515     */
1516    bool hasBias = 70;
1517
1518    /**
1519     * Weights associated with this layer.
1520     * If convolution (``isDeconvolution == false``), weights have the shape
1521     * ``[outputChannels, kernelChannels, kernelHeight, kernelWidth]``, where kernelChannels == inputChannels / nGroups
1522     * If deconvolution (``isDeconvolution == true``) weights have the shape
1523     * ``[kernelChannels, outputChannels / nGroups, kernelHeight, kernelWidth]``, where kernelChannels == inputChannels
1524     */
1525    WeightParams weights = 90;
1526    WeightParams bias = 91; /// Must be of size [outputChannels].
1527
1528    /**
1529     * The output shape, which has length 2 ``[H_out, W_out]``.
1530     * This is used only for deconvolution (``isDeconvolution == true``).
1531     * If not set, the deconvolution output shape is calculated
1532     * based on ``ConvolutionPaddingType``.
1533     */
1534    repeated uint64 outputShape = 100;
1535
1536}
1537
1538/**
1539 * A layer that performs a 3-dimensional convolution.
1540 *
1541 * .. code::
1542 *
1543 *      y = Convolution3DLayer(x)
1544 *
1545 * Input
1546 *    A blob of rank 5.
1547 *    The input blob's shape should be ``[batch, channels, depth, height, width]``.
1548 *
1549 * Fields
1550 *   The bias field, if set, should have shape of ``[channelsOut]``.
1551 *
1552 * Output
1553 *   A blob of rank 5.
1554 *   The output blob's shape is ``[batch, channelsOut, depthOut, heightOut, widthOut]``.
1555 *
1556 * Type of padding can be ``custom``, ``valid``, or ``same``. Padded values are all zeros.
1557 * Output spatial dimensions depend on the the type of padding. For details, refer to the
1558 * descriptions of the ``PaddingType`` field of this ``Convolution3DLayerParams`` message.
1559 *
1560 * Example
1561 *   For example, given an input of size ``[1, 3, 3, 8, 8]``, a stride of 2 in each dimension,
1562 *   a kernel of 3 in each dimension, 2 output channels, and ``same`` padding, this layer will
1563 *   compute the total padding applied in the depth, height, and width dimensions to be 2, 1, and 1,
1564 *   respectively. The depth padding is even and will be applied equally to both sides of the depth
1565 *   dimension. Since the height and width padding values are odd, they'll be applied to the
1566 *   bottom/right of the height/width dimensions. Thus, the padding applied to the input will be
1567 *   ``[1, 1, 0, 1, 0, 1]`` (front, back, top, bottom, left, right). Finally, the output produced
1568 *   will have size ``[1, 2, 2, 4, 4]``.
1569 *
1570 */
1571message Convolution3DLayerParams {
1572
1573    /**
1574     * The number of channels in the output (channelsOut). Must be a positive integer.
1575     */
1576    int32 outputChannels = 1;
1577
1578    /**
1579     * The number of channels in the input (channels). Must be a positive integer.
1580     */
1581    int32 inputChannels = 2;
1582
1583    /**
1584    * Group convolution, i.e., weight reuse along the channel axis.
1585    * It must evenly divide both the number of input and output channels and be at most the number
1586    * of input channels (a depthwise convolution).
1587    * Input and kernels are divided into g groups and convolution is applied within the groups
1588    * independently.
1589    */
1590    int32 nGroups = 10;
1591
1592    /* Depth of the convolution kernel. Must be a positive integer.
1593     */
1594    int32 kernelDepth = 20;
1595
1596    /* Height of the convolution kernel. Must be a positive integer.
1597     */
1598    int32 kernelHeight = 21;
1599
1600    /* Width of the convolution kernel. Must be a positive integer.
1601     */
1602    int32 kernelWidth = 22;
1603
1604    /* Stride along the depth direction. Must be a positive integer.
1605     */
1606    int32 strideDepth = 31;
1607
1608    /* Stride along the height direction. Must be a positive integer.
1609     */
1610    int32 strideHeight = 32;
1611
1612    /* Stride along the width direction. Must be a positive integer.
1613     */
1614    int32 strideWidth = 33;
1615
1616    /* Dilation along the depth direction. Must be a positive integer.
1617     */
1618    int32 dilationDepth = 40;
1619
1620    /* Dilation along the height direction. Must be a positive integer.
1621     */
1622    int32 dilationHeight = 41;
1623
1624    /* Dilation along the width direction. Must be a positive integer.
1625     */
1626    int32 dilationWidth = 42;
1627
1628    /**
1629     * Flag to specify whether a bias is to be added or not.
1630     * If false, then no bias is added.
1631     */
1632    bool hasBias = 50;
1633
1634    /**
1635     * Weights associated with this layer.
1636     * Weights have the shape
1637     * if deconvolution == False
1638     * ``[outputChannels, kernelChannels, kernelDepth, kernelHeight, kernelWidth]``, where
1639     * kernelChannels == inputChannels / nGroups
1640     * else if deconvolution == True
1641     * ``[outputChannels / nGroups, kernelChannels, kernelDepth, kernelHeight, kernelWidth]``, where
1642     */
1643    WeightParams weights = 60;
1644
1645    /**
1646     * Must be of size ``[outputChannels]``.
1647     */
1648    WeightParams bias = 61;
1649
1650
1651    /**
1652     * The type of padding.
1653     * All padding types pad the input shape with zeros.
1654     * CUSTOM padding will add the custom padding values specified below to their respective
1655     * dimensions, e.g., `customPaddingFront` number of zeros will be added to one side of the
1656     * input's depth dimension and `customPaddingBack` number of zeros will be added to the other
1657     * side of the input's depth dimension.
1658     * VALID padding adds no padding to any dimension. In this case, the last convolution along
1659     * each dimension will be dropped if the input dimension and the kernel size, stride, and
1660     * dilation do not match.
1661     * SAME padding adds enough padding to each dimension such that the output of the convolution
1662     * has size ``Ceiling(inputShape / stride)``. Padding is added evenly to both sides of each
1663     * dimension unless the total padding to add is odd, in which case it is added to the
1664     * back/bottom/right side of the respective dimension. For example, if the total padding needed
1665     * in the depth dimension is 3, 1 zero will be added to the front side of the depth dimension
1666     * and 2 zeros will be added to the back side.
1667     */
1668    enum PaddingType {
1669        CUSTOM = 0;
1670        VALID = 1;
1671        SAME = 2;
1672    }
1673    PaddingType paddingType = 70;
1674
1675    /* Padding before the input in the depth direction. Must be zero or a positive integer.
1676     * Used when the `PaddingType` is `CustomPadding`, otherwise ignored by other padding types.
1677     */
1678    int32 customPaddingFront = 80;
1679
1680    /* Padding after the input in the depth direction. Must be zero or a positive integer.
1681     * Used when the `PaddingType` is `CustomPadding`, otherwise ignored by other padding types.
1682     */
1683    int32 customPaddingBack = 81;
1684
1685    /* Padding before the input in the height direction. Must be zero or a positive integer.
1686     * Used when the `PaddingType` is `CustomPadding`, otherwise ignored by other padding types.
1687     */
1688    int32 customPaddingTop = 82;
1689
1690    /* Padding after the input in the height direction. Must be zero or a positive integer.
1691     * Used when the `PaddingType` is `CustomPadding`, otherwise ignored by other padding types.
1692     */
1693    int32 customPaddingBottom = 83;
1694
1695    /* Padding before the input in the width direction. Must be zero or a positive integer.
1696     * Used when the `PaddingType` is `CustomPadding`, otherwise ignored by other padding types.
1697     */
1698    int32 customPaddingLeft = 84;
1699
1700    /* Padding after the input in the width direction. Must be zero or a positive integer.
1701     * Used when the `PaddingType` is `CustomPadding`, otherwise ignored by other padding types.
1702     */
1703    int32 customPaddingRight = 85;
1704
1705    /* Flag to specify if this is Convolution Transpose or not.
1706     */
1707    bool isDeconvolution = 86;
1708
1709    /*
1710     * The output shape, which has length 3 ``[D_out, H_out, W_out]``.
1711     * This is used only for deconvolution (``isDeconvolution == true``).
1712     * If not set, the deconvolution output shape is calculated
1713     * based on ``PaddingType``.
1714     */
1715    repeated uint64 outputShape = 87;
1716
1717}
1718
1719/**
1720 * A layer that performs a matrix-vector or matrix-matrix product.
1721 * This is equivalent to a fully-connected, or dense layer.
1722 * The weight parameters correspond to a matrix of dimensions (inputChannels, outputChannels) i.e. (C_in, C_out)
1723 *
1724 * .. code::
1725 *
1726 *      y = InnerProductLayer(x)
1727 *
1728 * Requires 1 input and produces 1 output.
1729 *
1730 * Input
1731 *      Input can have rank 1 to rank 5. This is how it is reshaped in to the matrix (for rank > 1):
1732 *      rank 1 (x1) : in this case, the layer corresponds to a matrix-vector product. x1 must be equal to C_in
1733 *      rank 2 (x1, x2): x2 must be equal to C_in
1734 *      rank 3 (x1, x2, x3) --> (x1 * x2, x3). x3 must be equal to C_in
1735 *      rank 4 (x1, x2, x3, x4) ---> (x1, x2 * x3 * x4). x2 * x3 * x4 must be equal to C_in
1736 *      rank 5 (x1, x2, x3, x4, x5) ---> (x1 * x2, x3 * x4 * x5). x3 * x4 * x5 must be equal to C_in
1737 *
1738 * Output
1739 *      Output rank is same as the input rank
1740 *      rank 1: (C_out)
1741 *      rank 2: (x1, C_out)
1742 *      rank 3: (x1, x2, C_out)
1743 *      rank 4: (x1, C_out, 1, 1)
1744 *      rank 5: (x1, x2, C_out, 1, 1)
1745 *
1746 */
1747message InnerProductLayerParams {
1748
1749    uint64 inputChannels = 1; /// Input size: C_in.
1750    uint64 outputChannels = 2; /// Output size: C_out.
1751
1752    bool hasBias = 10; /// Whether a bias is added or not.
1753
1754    WeightParams weights = 20; /// Weight matrix [C_out, C_in].
1755    WeightParams bias = 21; /// Bias vector [C_out].
1756
1757    /**
1758     * If set, this layer, at runtime, quantizes the floating point input blob to int8 before applying an
1759     * inner product using INT8 weight matrix parameters, as provided in weights->int8RawValue. The
1760     * result is then dequantized.
1761     * Requires:
1762     * * hasBias == false
1763     * * QuantizationType == LinearQuantizationParams, such that
1764     *   * size of the "scale" field is 1 and "bias" field is empty in "LinearQuantizationParams"
1765     * * numberOfBits == 8
1766     * * weights->rawValue_size to be empty
1767     */
1768    bool int8DynamicQuantize = 22;
1769
1770}
1771
1772/**
1773 * A layer that performs a matrix lookup and optionally adds a bias.
1774 * The weights matrix is stored with dimensions [outputChannels, inputDim].
1775 *
1776 * .. code::
1777 *
1778 *      y = EmbeddingLayer(x)
1779 *
1780 * Requires 1 input and produces 1 output.
1781 *
1782 * Input
1783 *     Input values must be in the range ``[0, inputDim - 1]``.
1784 *
1785 *     Input must have rank equal to 4 or 5, such that the last 3 dimensions are all 1.
1786 *     rank 4: shape (x1, 1, 1, 1). x1 is effectively the batch/sequence length.
1787 *     rank 5: shape (x1, x2 , 1, 1, 1). x1 * x2 is effectively the combined batch/sequence length.
1788 *
1789 * Output
1790 *      Output rank is same as the input rank. Please see input description above.
1791 *      rank 4: shape (x1, outputChannels, 1, 1)
1792 *      rank 5: shape (x1, x2, outputChannels, 1, 1)
1793 *
1794 */
1795message EmbeddingLayerParams {
1796
1797    uint64 inputDim = 1; /// Size of the input dictionary.
1798    uint64 outputChannels = 2; /// Size of the output vectors.
1799
1800    bool hasBias = 10; /// Whether a bias is added or not.
1801
1802    WeightParams weights = 20; /// 2-D weights of dimensions [outputChannels, inputDim].
1803    WeightParams bias = 21; /// Bias of size [outputChannels].
1804
1805}
1806
1807/**
1808 * A layer that performs a matrix lookup and optionally adds a bias.
1809 * The weights matrix is stored with dimensions [embeddingSize, vocabSize].
1810 *
1811 * .. code::
1812 *
1813 *      y = EmbeddingNDLayer(x)
1814 *
1815 * Requires 1 input and produces 1 output.
1816 *
1817 * Input
1818 *     Input values must be in the range ``[0, vocabSize - 1]``.
1819 *     Input must have rank at least 2. The last dimension must always be 1.
1820 *     rank 2: shape (x1, 1). x1 is the batch/sequence length.
1821 *     rank 3: shape (x1, x2, 1). x1 * x2 is effectively the combined batch/sequence length.
1822 *     rank 4: shape (x1, x2, x3, 1). x1 * x2 * x2 is effectively the combined batch/sequence length.
1823 *     rank 5: shape (x1, x2 , x3, x4, 1). x1 * x2 * x3 * x4 is effectively the combined batch/sequence length.
1824 *
1825 * Output
1826 *      Output rank is same as the input rank. Please see input description above.
1827 *      rank 2: shape (x1, embeddingSize)
1828 *      rank 3: shape (x1, x2, embeddingSize)
1829 *      rank 4: shape (x1, x2, x3, embeddingSize)
1830 *      rank 5: shape (x1, x2, x3, x4, embeddingSize)
1831 *
1832 */
1833message EmbeddingNDLayerParams {
1834
1835    uint64 vocabSize = 1; /// Size of the input dictionary.
1836    uint64 embeddingSize = 2; /// Size of the output vectors.
1837    bool hasBias = 3; /// Whether a bias is added or not.
1838    WeightParams weights = 20; /// 2-D weights of dimensions [embeddingSize, vocabSize].
1839    WeightParams bias = 21; /// Bias of size [embeddingSize].
1840
1841}
1842
1843/**
1844 * A layer that performs batch normalization,
1845 * which is performed along axis = -3,
1846 * and repeated along the other axes, if present.
1847 *
1848 * .. code::
1849 *
1850 *      y = BatchnormLayer(x)
1851 *
1852 * Requires 1 input and produces 1 output.
1853 *
1854 * This operation is described by the following formula:
1855 *
1856 * .. math::
1857 *     y_i = \gamma_i \dfrac{ (x_i - \mu_i)}{\sqrt{\sigma_i^2 + \epsilon}} + \beta_i \;,\;i=1,....,C
1858 *
1859 * Input
1860 *     A blob with rank greater than equal to 3.
1861 *     Example: Rank 4 blob represents [Batch, channels, height, width]
1862 *     For ranks greater than 3, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch.
1863 *
1864 * Output
1865 *     A blob with the same shape as the input.
1866 */
1867message BatchnormLayerParams {
1868
1869    uint64 channels = 1; /// Size of the channel dimension in the input.
1870
1871    /**
1872     * If ``computeMeanVar == true``,
1873     * the mean and variance are calculated from either
1874     * the single input instance, if ``instanceNormalization == true``,
1875     * or the whole batch, if ``instanceNormalization = false``.
1876     * and the values provided in parameters "mean" and "variance" are ignored.
1877     */
1878    bool computeMeanVar = 5;
1879    bool instanceNormalization = 6;
1880
1881    /**
1882     * A small constant to avoid division by 0 while normalizing by variance.
1883     * Defaults to ``1e-5`` if not set or set to ``0``.
1884     */
1885    float epsilon = 10;
1886
1887    WeightParams gamma = 15; /// Parameter of length [channels]
1888    WeightParams beta = 16; /// Parameter of length [channels]
1889    WeightParams mean = 17; /// Parameter of length [channels]
1890    WeightParams variance = 18; /// Parameter of length [channels]
1891
1892}
1893
1894/**
1895 * A spatial pooling layer.
1896 *
1897 * .. code::
1898 *
1899 *      y = PoolingLayer(x)
1900 *
1901 * Requires 1 input and produces 1 output.
1902 *
1903 * Input
1904 *     A blob with rank greater than equal to 4.
1905 *     Rank 4 blob represents [Batch, channels, height, width]
1906 *     For ranks greater than 4, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch.
1907 *
1908 * Output
1909 *     Rank is same as the input. e.g.: for rank 4 input, output shape is [B, C, H_out, W_out]
1910 *
1911 * Padding options are similar to ``ConvolutionLayerParams``
1912 * with the additional option of ``ValidCompletePadding`` (``includeLastPixel``),
1913 * which ensures that the last application of the kernel
1914 * always includes the last pixel of the input image, if there is padding.
1915 *
1916 * .. code::
1917 *
1918 *     H_out = ceil(float(H_in + 2 * paddingAmounts[0] - kernelSize[0])/float(Stride[0])) + 1
1919 *     if (paddingAmounts[0] > 0 or paddingAmounts[1] > 0)
1920 *          if ((H_out - 1) * Stride >= H_in + paddingAmounts[0]) {
1921 *              H_out = H_out - 1
1922 *          }
1923 *     }
1924 *
1925 * The equivalent expressions hold true for ``W_out`` as well.
1926 * Only symmetric padding is supported with this option.
1927 */
1928message PoolingLayerParams {
1929
1930    enum PoolingType {
1931
1932        MAX = 0;
1933        AVERAGE = 1;
1934        L2 = 2;
1935
1936    }
1937    PoolingType type = 1; /// Type of pooling operation.
1938
1939    /**
1940     * Must be length 2 in the order ``[H, W]``.
1941     * If not set, default value ``[3, 3]`` is used.
1942     */
1943    repeated uint64 kernelSize = 10;
1944
1945    /**
1946     * Must be length 2 in the order ``[H, W]``.
1947     * If not set, default value ``[1, 1]`` is used.
1948     */
1949    repeated uint64 stride = 20;
1950
1951    message ValidCompletePadding {
1952
1953        /**
1954         * Must be length 2 in order ``[H, W]``.
1955         * If not set, value ``[0, 0]`` is used.
1956         */
1957        repeated uint64 paddingAmounts = 10;
1958
1959    }
1960
1961    oneof PoolingPaddingType {
1962        ValidPadding valid = 30;
1963        SamePadding same = 31;
1964        ValidCompletePadding includeLastPixel = 32;
1965    }
1966
1967    /**
1968     * If true, padded values are excluded from the count (denominator)
1969     * when computing average pooling.
1970     */
1971    bool avgPoolExcludePadding = 50;
1972
1973    /**
1974     * If true, global pooling is performed.
1975     * Kernel size is inferred from the input data spatial dimensions.
1976     */
1977    bool globalPooling = 60;
1978
1979}
1980
1981/*
1982 * A layer to pool three spatial dimensions
1983 *
1984 * Input
1985 *      A blob with rank equal to 5, representing [Batch, channels, depth, height, width].
1986 *
1987 * Output
1988 *      Rank is same as the input: A blob with rank equal to 5, representing [Batch, channels, depth, height, width].
1989 *
1990 * Requires 1 input and produces 1 output.
1991 *
1992 * For example, given an input of shape (1,1,2,3,3):
1993 *        +----+----+----+
1994 *      / | 10 | 11 | 12 |
1995 *     /  +----+----+----+
1996 *    /   | 13 | 14 | 15 |
1997 *   /    +----+----+----+
1998 *  /     | 16 | 17 | 18 |
1999 * /      +----+----+----+
2000 * +----+----+----+      /
2001 * |  1 |  2 |  3 |     /
2002 * +----+----+----+    /
2003 * |  4 |  5 |  6 |   /
2004 * +----+----+----+  /
2005 * |  7 |  8 |  9 | /
2006 * +----+----+----+
2007 *
2008 * And applying MAX pooling using:
2009 *      Kernel: 2x2x2
2010 *      Stride: 1x1x1
2011 *      Valid Padding
2012 * We expect to get an output with shape: (1,1,1,2,2) and value:
2013 * +----+----+
2014 * | 14 | 15 |
2015 * +----+----+
2016 * | 17 | 18 |
2017 * +----+----+
2018 */
2019message Pooling3DLayerParams {
2020
2021    enum PoolingType3D {
2022        MAX = 0;
2023        AVERAGE = 1;
2024    }
2025
2026    // Whether to use Max or Average
2027    PoolingType3D type = 1;
2028
2029    // Depth of the pooling region.
2030    int32 kernelDepth = 2;
2031
2032    // Height of the pooling region.
2033    int32 kernelHeight = 3;
2034
2035    // Width of the pooling region.
2036    int32 kernelWidth = 4;
2037
2038    // Stride along the depth direction
2039    int32 strideDepth = 5;
2040
2041    // Stride along the height direction
2042    int32 strideHeight = 6;
2043
2044    // Stride along the width direction
2045    int32 strideWidth = 7;
2046
2047    /**
2048     * The type of padding.
2049     * All padding types pad the input shape with zeros.
2050     * CUSTOM padding will add the custom padding values specified below to their respective
2051     * dimensions, e.g., `customPaddingFront` number of zeros will be added to one side of the
2052     * input's depth dimension and `customPaddingBack` number of zeros will be added to the other
2053     * side of the input's depth dimension.
2054     * VALID padding adds no padding to any dimension. In this case, the last pool along
2055     * each dimension will be dropped if the input dimension and the kernel size, and stride do not match.
2056     * SAME padding adds enough padding to each dimension such that the output
2057     * has the same spatial dimensions as the input. Padding is added evenly to both
2058     * sides of each dimension unless the total padding to add is odd, in which case the extra padding
2059     * is added to the back/bottom/right side of the respective dimension.  For example, if the the
2060     * total horizontal padding is 3, then there will be 1 padding on the left, and 2 padding on the right.
2061     */
2062    enum Pooling3DPaddingType {
2063        CUSTOM = 0;
2064        VALID = 1;
2065        SAME = 2;
2066    }
2067    Pooling3DPaddingType paddingType = 15;
2068
2069    // Padding before the input in the depth direction.
2070    int32 customPaddingFront = 8;
2071
2072    // Padding after the input in the depth direction.
2073    int32 customPaddingBack = 9;
2074
2075    // Padding before the input in the height direction.
2076    int32 customPaddingTop = 10;
2077
2078    // Padding after the input in the height direction.
2079    int32 customPaddingBottom = 11;
2080
2081    // Padding before the input in the width direction.
2082    int32 customPaddingLeft = 12;
2083
2084    // Padding after the input in the width direction.
2085    int32 customPaddingRight = 13;
2086
2087    // If true, exclude zeros from padding in Average pooling.  Meaningless in Max Pooling.
2088    bool countExcludePadding = 14;
2089}
2090
2091/*
2092 * A layer to pool three spatial dimensions down to one value.
2093 * This behaves like a special case of Pooling3DLayerParams in which
2094 * the Kernel is the size of the input and there is no padding.
2095 *
2096 * Input
2097 *      A blob with rank equal to 5, representing [Batch, channels, depth, height, width].
2098 *
2099 * Output
2100 *      Rank is same as the input: A blob with rank equal to 5, representing [Batch, channels, depth, height, width].
2101 *      Depth, height, and width of the output will always be 1.
2102 *
2103 * Requires 1 input and produces 1 output.
2104 *
2105 * For example, given an input of shape (1,1,2,3,3):
2106 *        +----+----+----+
2107 *      / | 10 | 11 | 12 |
2108 *     /  +----+----+----+
2109 *    /   | 13 | 14 | 15 |
2110 *   /    +----+----+----+
2111 *  /     | 16 | 17 | 18 |
2112 * /      +----+----+----+
2113 * +----+----+----+      /
2114 * |  1 |  2 |  3 |     /
2115 * +----+----+----+    /
2116 * |  4 |  5 |  6 |   /
2117 * +----+----+----+  /
2118 * |  7 |  8 |  9 | /
2119 * +----+----+----+
2120 *
2121 * And applying MAX global 3d pooling, we expect to get an output with shape: (1,1,1,1,1) and value:
2122 * +----+
2123 * | 18 |
2124 * +----+
2125 */
2126message GlobalPooling3DLayerParams {
2127
2128    enum GlobalPoolingType3D {
2129        MAX = 0;
2130        AVERAGE = 1;
2131    }
2132
2133    // Whether to use Max or Average
2134    GlobalPoolingType3D type = 1;
2135}
2136
2137/**
2138 * A layer that performs padding along spatial dimensions.
2139 *
2140 * .. code::
2141 *
2142 *      y = PaddingLayer(x)
2143 *
2144 * Requires 1 input and produces 1 output.
2145 *
2146 * Input
2147 *     A blob with rank at least 2.
2148 *     e.g.: blob with shape ``[H_in, W_in]``.
2149 *     For ranks greater than 2, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch
2150 *     i.e. Padding is applied on last two dimensions.
2151 *
2152 * Output
2153 *     Same rank as the input.
2154 *     e.g.: blob with shape ``[H_out, W_out]``.
2155 *
2156 * Output dimensions are calculated as follows:
2157 *
2158 * .. code::
2159 *
2160 *     H_out = H_in + topPaddingAmount + bottomPaddingAmount
2161 *     W_out = W_in + leftPaddingAmount + rightPaddingAmount
2162 *
2163 *     topPaddingAmount == Height startEdgeSize == borderAmounts[0].startEdgeSize
2164 *     bottomPaddingAmount == Height endEdgeSize == borderAmounts[0].endEdgeSize
2165 *     leftPaddingAmount == Width startEdgeSize == borderAmounts[1].startEdgeSize
2166 *     rightPaddingAmount == Width endEdgeSize == borderAmounts[1].endEdgeSize
2167 *
2168 * There are three types of padding:
2169 *
2170 * - ``PaddingConstant``, which fills a constant value at the border.
2171 * - ``PaddingReflection``, which reflects the values at the border.
2172 * - ``PaddingReplication``, which replicates the values at the border.
2173 *
2174 * Given the following input:
2175 *
2176 * .. code::
2177 *
2178 *     [1, 3, 4]  :  1   2   3   4
2179 *                   5   6   7   8
2180 *                   9   10  11  12
2181 *
2182 * Here is the output of applying the padding
2183 * ``(top=2, left=2, bottom=0, right=0)``
2184 * with each of the supported types:
2185 *
2186 * - ``PaddingConstant`` (``value = 0``):
2187 *   .. code::
2188 *
2189 *       [1, 5, 6]  :  0   0   0  0   0   0
2190 *                     0   0   0  0   0   0
2191 *                     0   0   1  2   3   4
2192 *                     0   0   5  6   7   8
2193 *                     0   0   9  10  11  12
2194 *
2195 * - ``PaddingReflection``:
2196 *   .. code::
2197 *
2198 *       [1, 5, 6]  :  11  10  9  10  11  12
2199 *                     7   6   5  6   7   8
2200 *                     3   2   1  2   3   4
2201 *                     7   6   5  6   7   8
2202 *                     11  10  9  10  11  12
2203 *
2204 * - ``PaddingReplication``:
2205 *   .. code::
2206 *
2207 *       [1, 5, 6]  :  1   1   1  2   3   4
2208 *                     1   1   1  2   3   4
2209 *                     1   1   1  2   3   4
2210 *                     5   5   5  6   7   8
2211 *                     9   9   9  10  11  12
2212 */
2213message PaddingLayerParams {
2214
2215    /**
2216     * Fill a constant value in the padded region.
2217     */
2218    message PaddingConstant {
2219        float value = 1;
2220    }
2221
2222    /**
2223     * Reflect the values at the border for padding.
2224     */
2225    message PaddingReflection {
2226    }
2227
2228    /**
2229     * Replicate the values at the border for padding.
2230     */
2231    message PaddingReplication {
2232    }
2233
2234    oneof PaddingType {
2235        PaddingConstant constant = 1;
2236        PaddingReflection reflection = 2;
2237        PaddingReplication replication = 3;
2238    }
2239
2240    BorderAmounts paddingAmounts = 10; /// Amounts to be padded to the input.
2241
2242}
2243
2244/**
2245 * A layer that concatenates along the axis = -3 or -5.
2246 * For general concatenation along any axis, see ConcatNDLayer.
2247 *
2248 * .. code::
2249 *
2250 *      y = ConcatLayer(x1,x2,....)
2251 *
2252 * Requires more than 1 input and produces 1 output.
2253 *
2254 * Input
2255 *   All input blobs must have same rank.
2256 *   If "sequenceConcat" = False, rank must be greater than equal to 3. In this case concatenation is along axis = -3
2257 *   If "sequenceConcat" = True, rank must be greater than equal to 5. In this case concatenation is along axis = -5
2258 *
2259 * Output
2260 *   Same rank as the input.
2261 *
2262 */
2263message ConcatLayerParams {
2264
2265    /**
2266     * If true, concatenate along the axis = -5 instead of axis = -3.
2267     */
2268    bool sequenceConcat = 100;
2269
2270}
2271
2272/**
2273 * A layer that performs local response normalization (LRN).
2274 *
2275 * .. code::
2276 *
2277 *      y = LRNLayer(x)
2278 *
2279 * Requires 1 input and produces 1 output.
2280 *
2281 * Input
2282 *     A blob with rank greater than equal to 3.
2283 *     Example: Rank 4 blob represents [Batch, channels, height, width]
2284 *     For ranks greater than 3, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch.
2285 * Output
2286 *     A blob with the same shape as the input.
2287 *
2288 * This layer is described by the following formula:
2289 *
2290 * .. math::
2291 *     x_i \leftarrow  \dfrac{x_i}{\left ( k + \dfrac{\alpha}{\text{localSize}} \sum_j x_j^2 \right )^\beta}
2292 *
2293 * where the summation is done over a ``(localSize, 1, 1)`` neighborhood ---
2294 * that is, over a window "across" channels in 1x1 spatial neighborhoods.
2295 */
2296message LRNLayerParams {
2297
2298    float alpha = 1;
2299    float beta = 2;
2300    uint64 localSize = 3; /// Number of channels in the normalization window.
2301    float k = 4; /// Defaults to 1 if not set or 0. Must be strictly positive.
2302
2303}
2304
2305/**
2306 * Softmax Normalization Layer
2307 *
2308 * A layer that performs softmax normalization.
2309 * Normalization is applied along axis = -3 or N-3 (where N is the rank of the input)
2310 * For softmax layer that can operate on any axis, see SoftmaxNDLayer.
2311 *
2312 *
2313 * .. code::
2314 *
2315 *      y = SoftmaxLayer(x)
2316 *
2317 * Requires 1 input and produces 1 output.
2318 *
2319 * Input
2320 *     Must be a blob with rank >= 3.
2321 * Output
2322 *     A blob with the same shape as the input.
2323 *
2324 * This layer is described by the following formula:
2325 *
2326 * .. math::
2327 *     x_i \leftarrow \dfrac{e^{x_i}}{\sum_i{e^{x_i}}}
2328 */
2329message SoftmaxLayerParams {
2330
2331}
2332
2333/**
2334 * A layer that uniformly splits across axis = -3 to produce a specified number of outputs.
2335 * For general split operation along any axis, see SplitNDLayer.
2336 *
2337 * .. code::
2338 *
2339 *      (y1,y2,...yN) = SplitLayer(x), where N = nOutputs
2340 *
2341 * Requires 1 input and produces multiple outputs.
2342 *
2343 * Input
2344 *     A blob with rank at least 3.
2345 *     e.g.: blob with shape ``[C, H, W]``
2346 * Output
2347 *     ``nOutputs`` blobs each with same rank as the input.
2348 *     e.g.: For input that is of shape ``[C, H, W]``, output shapes will be ``[C/nOutputs, H, W]``
2349 */
2350message SplitLayerParams {
2351
2352    uint64 nOutputs = 1; /// The number of outputs.
2353
2354}
2355
2356/**
2357 * A layer that performs elementwise addition.
2358 * This layer has limited broadcasting support. For general broadcasting see AddBroadcastableLayer.
2359 *
2360 * .. code::
2361 *
2362 *      y = AddLayer(x1,x2,...)
2363 *
2364 * Requires 1 or more than 1 input and produces 1 output.
2365 *
2366 * Input
2367 *     In general, there are no rank constraints.
2368 *     However, only certain set of shapes are broadcastable. For example:
2369 *     [B, 1, 1, 1], [B, C, 1, 1], [B, 1, H, W], [B, C, H, W]
2370 * Output
2371 *     A blob with shape equal to the input blob.
2372 *
2373 * If only one input is provided, scalar addition is performed:
2374 *
2375 * .. math::
2376 *     y = x + \alpha
2377 *
2378 */
2379message AddLayerParams {
2380
2381    /**
2382     * Scalar to be added to the input.
2383     * Only used if there is a single input.
2384     */
2385    float alpha = 1;
2386
2387}
2388
2389/**
2390 * A layer that performs elementwise multiplication.
2391 * This layer has limited broadcasting support. For general broadcasting see MultiplyBroadcastableLayer.
2392 *
2393 * .. code::
2394 *
2395 *      y = MultiplyLayer(x1,x2,...)
2396 *
2397 * Requires 1 or more than 1 input and produces 1 output.
2398 *
2399 * Input
2400 *     In general, there are no rank constraints.
2401 *     However, only certain set of shapes are broadcastable. For example:
2402 *     [B, 1, 1, 1], [B, C, 1, 1], [B, 1, H, W], [B, C, H, W]
2403 * Output
2404 *     A blob with shape equal to the first input blob.
2405 *
2406 * If only one input is provided, scalar multiplication is performed:
2407 *
2408 * .. math::
2409 *     y = \alpha x
2410 *
2411 */
2412message MultiplyLayerParams {
2413
2414    /**
2415     * Scalar to be multiplied with the input.
2416     * Only used if there is a single input.
2417     */
2418    float alpha = 1;
2419
2420}
2421
2422/**
2423 * A layer that applies a unary function.
2424 *
2425 * .. code::
2426 *
2427 *      y = UnaryFunctionLayer(x)
2428 *
2429 * Requires 1 input and produces 1 output.
2430 *
2431 * Input
2432 *     A blob with no rank constraints.
2433 * Output
2434 *     A blob with the same shape as the input.
2435 *
2436 * The input is first modified by shifting and scaling:
2437 *
2438 * .. math::
2439 *     x \leftarrow \text{scale} \cdot x + \text{shift}
2440 */
2441message UnaryFunctionLayerParams {
2442
2443    /**
2444     * A unary operator.
2445     *
2446     * The following functions are supported:
2447     *
2448     * ``SQRT``
2449     *     .. math:: f(x) = \sqrt{x}
2450     *
2451     * ``RSQRT``
2452     *     .. math:: f(x) = \dfrac{1}{\sqrt{x + \epsilon}}
2453     *
2454     * ``INVERSE``
2455     *     .. math:: f(x) = \dfrac{1}{x + \epsilon}
2456     *
2457     * ``POWER``
2458     *     .. math:: f(x) = x^\alpha
2459     *
2460     * ``EXP``
2461     *     .. math:: f(x) = e^x
2462     *
2463     * ``LOG``
2464     *     .. math:: f(x) = \log x
2465     *
2466     * ``ABS``
2467     *     .. math:: f(x) = |x|
2468     *
2469     * ``THRESHOLD``
2470     *     .. math:: f(x) = \text{max}(\alpha, x)
2471     */
2472    enum Operation {
2473        SQRT = 0;
2474        RSQRT = 1;
2475        INVERSE = 2;
2476        POWER = 3;
2477        EXP = 4;
2478        LOG = 5;
2479        ABS = 6;
2480        THRESHOLD = 7;
2481    }
2482    Operation type = 1; /// The type of unary function.
2483
2484    /**
2485     * A constant used in ``POWER`` and ``THRESHOLD`` functions.
2486     */
2487    float alpha = 2;
2488
2489    /**
2490     * A small constant to avoid division by 0 while normalizing variance.
2491     * Defaults to ``1e-6`` if not set or set to ``0``.
2492     */
2493    float epsilon = 3;
2494
2495    /**
2496     * Input is shifted by this amount
2497     * before the unary function is applied.
2498     * Defaults to ``0.0`` if not set.
2499     */
2500    float shift = 4;
2501
2502    /**
2503     * Input is scaled by this amount
2504     * before the unary function is applied.
2505     * Defaults to ``1.0`` if not set or set to ``0``.
2506     */
2507    float scale = 5;
2508
2509}
2510
2511/**
2512 * A layer that scales up spatial dimensions.
2513 * It supports two modes: nearest neighbour (default) and bilinear.
2514 *
2515 * .. code::
2516 *
2517 *      y = UpsampleLayer(x)
2518 *
2519 * Requires 1 input and produces 1 output.
2520 *
2521 * Input
2522 *     A blob with rank at least 3.
2523 *     e.g.: blob with shape ``[C, H, W]``.
2524 *     For ranks greater than 3, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch.
2525 *
2526 * Output
2527 *     Same rank as the input.
2528 *     e.g.: blob with shape ``[C, scalingFactor[0] * H, scalingFactor[1] * W]``
2529 */
2530message UpsampleLayerParams {
2531
2532    /**
2533     * Scaling Factor. Mutually exclusive with fractionalScalingFactor.
2534     * Must be length 2 in order ``[H, W]``.
2535     * If not set, default value ``[1, 1]`` is used.
2536     */
2537    repeated uint64 scalingFactor = 1;
2538
2539    /**
2540     * Fractional scaling factor. Mutually exclusive with scalingFactor.
2541     * Must be length 2 in order ``[H, W]``.
2542     * If not set, default value ``[1.0, 1.0]`` is used.
2543     */
2544    repeated float fractionalScalingFactor = 7;
2545
2546    /*
2547     * Overall mode for interpolating new elements when upsampling.
2548     * NN - Nearest Neighbors - simply pick the nearest true value for interpolated values.
2549     * BILINEAR - Use bilinear interpolation. See LinearUpsamplingMode for behavior.
2550     */
2551    enum InterpolationMode {
2552
2553        NN = 0; /// Nearest Neighbour
2554        BILINEAR = 1; /// Bilinear
2555
2556    }
2557
2558    InterpolationMode mode = 5;
2559
2560    /**
2561     * LinearUpsampleMode specifies the behavior for linear upsampling. Only valid when Interpolation Mode is BILINEAR.
2562     * If input grid is [0, Xin-1] (corresponding to an input size of Xin), and if the output size is Xout,
2563     * then the grid points are sampled in the following manner:
2564     * DEFAULT:
2565     *   spacing = (Xin-Xin/Xout) / (Xout-1)
2566     *   grid_point[i] = min(Xin-1, max(0, i * spacing)), for i = 0,1,2,….,Xout-1
2567     * ALIGN_CORNERS_TRUE:
2568     *   spacing = (Xin-1) / (Xout-1)
2569     *   grid_point[i] = min(Xin-1, max(0, i * spacing)), for i = 0,1,2,….,Xout-1
2570     * ALIGN_CORNERS_FALSE:
2571     *   spacing = Xin / Xout
2572     *   grid_point[i] = min(Xin-1, max(0, i * spacing + 0.5 * spacing - 0.5)), for i = 0,1,2,….,Xout-1
2573     */
2574    enum LinearUpsampleMode {
2575
2576        DEFAULT = 0;
2577        ALIGN_CORNERS_TRUE = 1;
2578        ALIGN_CORNERS_FALSE = 2;
2579
2580    }
2581
2582    LinearUpsampleMode linearUpsampleMode = 6;
2583
2584}
2585
2586/**
2587* A layer that resizes the input to a pre-specified spatial size using bilinear interpolation.
2588*
2589* .. code::
2590*
2591*      y = ResizeBilinearLayer(x)
2592*
2593* Requires 1 input and produces 1 output.
2594*
2595* Input
2596*     A blob with rank at least 3.
2597*     e.g.: blob with shape ``[C, H_in, W_in]``.
2598*     For ranks greater than 3, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch.
2599*
2600* Output
2601*     Same rank as the input.
2602*     e.g.: blob with shape ``[C, H_out, W_out]``.
2603*
2604*/
2605message ResizeBilinearLayerParams {
2606
2607    /**
2608     * Target Spatial Size.
2609     * Must be length 2 in order ``[Height, Width]``, i.e. ``[H_out, W_out]``.
2610     * If not set, default value ``[1, 1]`` is used.
2611     */
2612    repeated uint64 targetSize = 1;
2613
2614    /**
2615     * Mode used to compute the grid on which the spatial output values are evaluated.
2616     * Same mode is applied to both the height and width axes.
2617     */
2618    SamplingMode mode = 2;
2619
2620}
2621
2622/**
2623* A layer that extracts cropped spatial patches or RoIs (regions of interest) from the input and resizes them to a pre-specified size using
2624* bilinear interpolation.
2625* Note that RoI Align layer can be implemented with this layer followed by a pooling layer.
2626*
2627* .. code::
2628*
2629*      y = CropResizeLayer(x)
2630*
2631* Requires 2 inputs and produces 1 output.
2632*
2633* Input
2634*     There are two inputs.
2635*     First input represents an image feature map.
2636*     Second input represents the bounding box coordinates for N patches or RoIs (region of interest).
2637*
2638*     First input is rank 5: [1, Batch, C, H_in, W_in].
2639*     Second input is rank 5. Its shape can be either [N, 1, 4, 1, 1] or [N, 1, 5, 1, 1].
2640*
2641*     N: number of patches/RoIs to be extracted
2642*
2643*     If RoI shape = ``[N, 1, 4, 1, 1]``
2644*                    The axis=-3 corresponds to the four coordinates specifying the bounding box.
2645*                    All the N RoIs are extracted from all the batches of the input.
2646*
2647*     If RoI shape = ``[N, 1, 5, 1, 1]``
2648*                     The first element of the axis=-3 specifies the input batch id from which to extract the RoI and
2649*                               must be in the interval ``[0, Batch - 1]``. That is, n-th RoI is extracted from the RoI[n,0,0,0,0]-th
2650*                     input batch id. The last four elements of the axis=-3 specify the bounding box coordinates.
2651*
2652* Output
2653*     A blob with rank 5.
2654*           - Shape is [N, Batch, C, H_out, W_out] if input RoI shape is [N, 1, 4, 1, 1]
2655*           - Shape is [N, 1, C, H_out, W_out] if input RoI shape is [N, 1, 5, 1, 1]
2656*
2657*/
2658message CropResizeLayerParams {
2659
2660    /**
2661     * Target Spatial Size.
2662     * Must be length 2 in order ``[Height, Width]``, i.e. ``[H_out, W_out]``.
2663     * If not set, default value ``[1, 1]`` is used.
2664     */
2665    repeated uint64 targetSize = 1;
2666
2667    /**
2668     * If true the bounding box coordinates must be in the interval [0, 1].
2669     * They are scaled by (H_in - 1), (W_in - 1), i.e. based on the input spatial dimensions.
2670     * If false the bounding box coordinates must be in the interval
2671     * [0, H_in -1] and [0, W_in - 1], respectively for height and width dimensions.
2672     */
2673    bool normalizedCoordinates = 2;
2674
2675    /**
2676     * Mode used to compute the grid on which the spatial output values are evaluated.
2677     * Same mode is applied to both the height and width axes.
2678     */
2679    SamplingMode mode = 3;
2680
2681    /**
2682     * Representation used to express the bounding box coordinates.
2683     * It determines how the values of the second input are interpreted.
2684     */
2685    BoxCoordinatesMode boxIndicesMode = 4;
2686
2687    /**
2688     * Additional spatial scale that multiplies the bounding box coordinates.
2689     * Generally used while implementing the RoI Align layer,
2690     * which uses unnormalized RoI coordinates along with a spatial scale less than or equal to 1.
2691     */
2692    float spatialScale = 5;
2693
2694}
2695
2696/**
2697 * A layer that performs elementwise addition of a bias,
2698 * which is broadcasted to match the input shape.
2699 *
2700 * .. code::
2701 *
2702 *      y = BiasLayer(x)
2703 *
2704 * Requires 1 input and produces 1 output.
2705 *
2706 * Input
2707 *     A blob with rank at least 3.
2708 *     e.g.: blob with shape ``[C, H, W]``.
2709 *     For ranks greater than 3, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch.
2710 * Output
2711 *     A blob with the same shape as the input.
2712 */
2713message BiasLayerParams {
2714
2715    /**
2716     * The shape of the bias.
2717     * Must be one of the following:
2718     * ``[1]``, ``[C]``, ``[1, H, W]`` or ``[C, H, W]``.
2719     */
2720    repeated uint64 shape = 1;
2721
2722    /**
2723     * The bias values.
2724     * The size must be equal to the product of the ``shape`` dimensions.
2725     */
2726    WeightParams bias = 2;
2727
2728}
2729
2730/**
2731 * A layer that performs elmentwise multiplication by a scale factor
2732 * and optionally adds a bias;
2733 * both the scale and bias are broadcasted to match the input shape.
2734 *
2735 * .. code::
2736 *
2737 *      y = ScaleLayer(x)
2738 *
2739 * Requires 1 input and produces 1 output.
2740 *
2741 * Input
2742 *     A blob with rank at least 3.
2743 *     e.g.: blob with shape ``[C, H, W]``.
2744 *     For ranks greater than 3, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch.
2745 * Output
2746 *     A blob with the same shape as the input.
2747 */
2748message ScaleLayerParams {
2749
2750    /**
2751     * The shape of the scale.
2752     * Must be one of the following:
2753     * ``[1]``, ``[C]``, ``[1, H, W]`` or ``[C, H, W]``.
2754     */
2755    repeated uint64 shapeScale = 1;
2756
2757    /**
2758     * The scale values.
2759     * The size must be equal to the product of the ``shape`` dimensions.
2760     */
2761    WeightParams scale = 2; /// Scale values. Size must be equal to the product of dimensions specified in shapeScale.
2762
2763    bool hasBias = 3; /// If true, a bias is added after scaling.
2764
2765    /**
2766     * The shape of the bias.
2767     * Must be one of the following:
2768     * ``[1]``, ``[C]``, ``[1, H, W]`` or ``[C, H, W]``.
2769     */
2770    repeated uint64 shapeBias = 4;
2771
2772    /**
2773     * The bias values.
2774     * The size must be equal to the product of the ``shape`` dimensions.
2775     */
2776    WeightParams bias = 5;
2777
2778}
2779
2780/**
2781 * A layer that loads data as a parameter and provides it as an output.
2782 * The output is rank 5. For general rank, see LoadConstantNDLayer.
2783 *
2784 * .. code::
2785 *
2786 *      y = LoadConstantLayer()
2787 *
2788 * Requires no input and produces 1 output.
2789 *
2790 * Output:
2791 *     A blob with rank 5 and shape ``[1, 1, C, H, W]``
2792 */
2793message LoadConstantLayerParams {
2794
2795    /**
2796     * The shape of the constant to be loaded,
2797     * which must be``[C, H, W]``, that is length 3.
2798     */
2799    repeated uint64 shape = 1;
2800
2801    /**
2802     * The data values,
2803     * of size ``C * H * W``.
2804     */
2805    WeightParams data = 2;
2806
2807}
2808
2809/**
2810 * A layer that performs L2 normalization, i.e. divides by the
2811 * the square root of the sum of squares of all elements of input.
2812 *
2813 * .. code::
2814 *
2815 *      y = L2NormalizeLayer(x)
2816 *
2817 * Requires 1 input and produces 1 output.
2818 *
2819 * Input
2820 *     A blob with rank greater than equal to 3.
2821 *     For ranks greater than 3, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch.
2822 * Output
2823 *     A blob with the same shape as the input.
2824 *
2825 * This layer is described by the following formula:
2826 *
2827 * .. math::
2828 *     x_i \leftarrow \dfrac{x_i}{\sqrt{\sum{x_i^2} + \epsilon}}
2829 */
2830message L2NormalizeLayerParams {
2831
2832    /**
2833     * A small constant to avoid division by 0 while normalizing variance.
2834     * Defaults to ``1e-6`` if not set or set to ``0``.
2835     */
2836    float epsilon = 1;
2837
2838}
2839
2840/// Data Reorganization Layers
2841/// --------------------------
2842
2843/**
2844 * A layer that flattens the input.
2845 *
2846 * .. code::
2847 *
2848 *      y = FlattenLayer(x)
2849 *
2850 * Requires 1 input and produces 1 output.
2851 *
2852 * Input
2853 *     A blob with rank greater than equal to 3.
2854 *     e.g.: Rank 4 blob represents [Batch, C, H, W]
2855 *     For ranks greater than 3, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch.
2856 * Output
2857 *     Same rank as the input, such that last two dimensions are both 1.
2858 *     e.g.: For rank 4 input, output shape is ``[Batch, C * H * W, 1, 1]``
2859 *
2860 * There are two X orders: ``CHANNEL_FIRST`` and ``CHANNEL_LAST``.
2861 * ``CHANNEL_FIRST`` does not require data to be rearranged,
2862 * because row major ordering is used by internal storage.
2863 * ``CHANNEL_LAST`` requires data to be rearranged.
2864 */
2865message FlattenLayerParams {
2866
2867    enum FlattenOrder {
2868
2869        CHANNEL_FIRST = 0;
2870        CHANNEL_LAST = 1;
2871
2872    }
2873    FlattenOrder mode = 1;
2874
2875}
2876
2877/**
2878 * A layer that recasts the input into a new shape.
2879 *
2880 * .. code::
2881 *
2882 *      y = ReshapeLayer(x)
2883 *
2884 * Requires 1 input and produces 1 output.
2885 *
2886 * Input
2887 *     A blob with rank 5.
2888 *     e.g.: ``[1, 1, C, H, W]`` or ``[Seq, 1, C, H, W]``.
2889 * Output
2890 *     A blob with rank 5.
2891 *     e.g.: ``[1, 1, C_out, H_out, W_out]`` or ``[Seq_out, 1, C_out, H_out, W_out]``.
2892 *
2893 * There are two reshape orders: ``CHANNEL_FIRST`` and ``CHANNEL_LAST``.
2894 * ``CHANNEL_FIRST`` is equivalent to
2895 * flattening the input to ``[Seq, 1, C * H * W, 1, 1]`` in channel first order
2896 * and then reshaping it to the target shape;
2897 * no data rearrangement is required.
2898 * ``CHANNEL_LAST`` is equivalent to
2899 * flattening the input to ``[Seq, 1, H * W * C, 1, 1]`` in channel last order,
2900 * reshaping it to ``[Seq_out, 1, H_out, W_out, C_out]`` (it is now in "H_out-major"" order),
2901 * and then permuting it to ``[C_out, H_out, W_out]``;
2902 * both the flattening and permuting requires the data to be rearranged.
2903 */
2904message ReshapeLayerParams {
2905
2906    /**
2907     * The shape of the output.
2908     * Must be of length 3 or 4.
2909     * If set to 3, ``targetShape`` is interpreted as
2910     * ``[1, 1, C_out, H_out, W_out]``, and sequence length of the input is preserved.
2911     * If set to 4, ``targetShape`` is interpreted as
2912     * ``[Seq_out, 1, C_out, H_out, W_out]``,
2913     * where ``Seq_out`` is the new sequence length.
2914     */
2915    repeated int64 targetShape = 1;
2916
2917    enum ReshapeOrder {
2918
2919        CHANNEL_FIRST = 0;
2920        CHANNEL_LAST = 1;
2921
2922    }
2923    ReshapeOrder mode = 2;
2924
2925}
2926
2927/**
2928 * A layer that rearranges the dimensions and data of an input.
2929 * For generic transpose/permute operation see TransposeLayer.
2930 *
2931 * .. code::
2932 *
2933 *      y = PermuteLayer(x)
2934 *
2935 * Requires 1 input and produces 1 output.
2936 *
2937 * Input
2938 *     Must be a rank 5 blob.
2939 *     e.g.: shape ``[Seq, B, C, H, W]``.
2940 * Output
2941 *     Rank 5 blob. Transposed version of the input, such that dimensions at axis=1 or axis=-4 is unchanged.
2942 *
2943 *
2944 * Examples:
2945 *
2946 *  Assume input shape is [Seq, B, C, H, W]
2947 *
2948 * - If ``axis`` is set to ``[0, 3, 1, 2]``,
2949 *   then the output has shape ``[Seq, B, W, C, H]``
2950 *
2951 * - If ``axis`` is set to ``[3, 1, 2, 0]``,
2952 *   then the output has shape ``[W, B, C, H, Seq]``
2953 *
2954 * - If ``axis`` is set to ``[0, 3, 2, 1]``,
2955 *   then the output has shape ``[Seq, B, W, H, C]``
2956 *
2957 * - If ``axis`` is not set, or is set to ``[0, 1, 2, 3]``,
2958 *   the output is the same as the input.
2959 */
2960message PermuteLayerParams {
2961
2962    /**
2963     * The order in which to permute the dimensions.
2964     * Must have length 4 and a permutation of ``[0, 1, 2, 3]``.
2965     */
2966    repeated uint64 axis = 1;
2967
2968}
2969
2970/**
2971 * A layer that reorganizes data in the input in specific ways.
2972 *
2973 * .. code::
2974 *
2975 *      y = ReorganizeDataLayer(x)
2976 *
2977 * Requires 1 input and produces 1 output.
2978 *
2979 * Input
2980 *     A blob with rank at least 3.
2981 *     e.g.: blob with shape ``[C, H, W]``.
2982 *     For ranks greater than 3, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch.
2983 * Output
2984 *     Same rank as the input.
2985 *     e.g.: blob with shape ``[C_out, H_out, W_out]``.
2986 *
2987 * mode == SPACE_TO_DEPTH
2988 *  ``[C_out, H_out, W_out]`` : ``[C * blockSize * blockSize, H/blockSize, W/blockSize]``.
2989 *  blockSize must divide H and W.
2990 *  Data is moved from the spatial dimensions to the channel dimension. Input is spatially divided into
2991 *  non-overlapping blocks of size blockSize X blockSize and data from each block is moved into the
2992 *  channel dimension.
2993 *
2994 * mode == DEPTH_TO_SPACE
2995 *  ``[C_out, H_out, W_out]`` : ``[C/(blockSize * blockSize), H * blockSize, W * blockSize]``.
2996 *  Square of blockSize must divide C.
2997 *  Reverse of SPACE_TO_DEPTH. Data is moved from the channel dimension to the spatial dimensions.
2998 *
2999 * mode == PIXEL_SHUFFLE
3000 *  ``[C_out, H_out, W_out]`` : ``[C/(blockSize * blockSize), H * blockSize, W *  blockSize]``.
3001 *  Square of blockSize must divide C.
3002 *  Similar to DEPTH_TO_SPACE, but using the pixel-shuffle semantics for channel order in the output space.
3003 *  In both modes, elements along the channel dimension are collapsed into
3004 *  blocks in the spatial dimensions. The difference is in the arrangement of
3005 *  the input-channels' data in the output space. See below example for more
3006 *  detail.
3007 *  (Only available in Core ML Specification >= 5 (iOS >= 14, macOS >= 11.0)
3008 *
3009 *
3010 * Examples:
3011 *
3012 * Assume input is the following [C = 8, H = 1, W = 2] tensor:
3013 *
3014 * .. code::
3015 *
3016 *    [[[1 2]] [[3 4]] [[5 6]] [[7 8]] [[9 10]] [[11 12]] [[13 14]] [[15 16]]]
3017 *
3018 * If block_size == 2 and mode == DEPTH_TO_SPACE, output will be the following
3019 * [C = 2, H = 2, W = 4] tensor:
3020 *
3021 * .. code::
3022 *
3023 *    [[[ 1  5  2  6]
3024 *      [ 9 13 10 14]]
3025 *
3026 *     [[ 3  7  4  8]
3027 *      [11 15 12 16]]]
3028 *
3029 * For mode == SPACE_TO_DEPTH, the behavior is the same as mode ==
3030 * DEPTH_TO_SPACE, but with the input and output swapped.
3031 *
3032 * If block_size == 2 and mode == PIXEL_SHUFFLE, output will be the following
3033 * [C = 2, H = 2, W = 4] tensor:
3034 *
3035 * .. code::
3036 *
3037 *    [[[ 1  3  2  4]
3038 *      [ 5  7  6  8]]
3039 *
3040 *     [[ 9 11 10 12]
3041 *      [13 15 14 16]]]
3042 *
3043 */
3044message ReorganizeDataLayerParams {
3045
3046    enum ReorganizationType {
3047
3048        SPACE_TO_DEPTH = 0;
3049        DEPTH_TO_SPACE = 1;
3050        PIXEL_SHUFFLE = 2;
3051
3052    }
3053    ReorganizationType mode = 1;
3054    uint64 blockSize = 2; /// must be greater than 1
3055
3056}
3057
3058/**
3059 * A layer that slices the input data along axis = -1 or -2 or -3.
3060 * For general slice along any axis, please see SliceStaticLayer/SliceDynamicLayer.
3061 *
3062 * .. code::
3063 *
3064 *      y = SliceLayer(x)
3065 *
3066 * Requires 1 input and produces 1 output.
3067 *
3068 * Input
3069 *     A blob that can, in general, have any rank. However, depending on the value of "axis" ,
3070 *     there may be additional rank constraints.
3071 * Output
3072 *     A blob with the same rank as the input.
3073 *
3074 * Sliced section is taken from the interval ``[startIndex, endIndex)``, i.e.
3075 * startIndex is inclusive while endIndex is exclusive.
3076 * stride must be positive and represents the step size for slicing.
3077 * Negative indexing is supported for startIndex and endIndex.
3078 * -1 denotes N-1, -2 denotes N-2 and so on, where N is the length of the dimension to be sliced.
3079 *
3080 */
3081message SliceLayerParams {
3082
3083    int64 startIndex = 1; /// start of the sliced section. Inclusive.
3084    int64 endIndex = 2; /// end of sliced section. Exclusive.
3085    uint64 stride = 3; /// The step size. Must be positive.
3086
3087    enum SliceAxis {
3088
3089        CHANNEL_AXIS = 0;
3090        HEIGHT_AXIS = 1;
3091        WIDTH_AXIS = 2;
3092
3093    }
3094    // The following mapping is used for interpreting this parameter:
3095    // CHANNEL_AXIS => axis = -3, input must have rank at least 3.
3096    // HEIGHT_AXIS => axis = -2, input must have rank at least 2.
3097    // WIDTH_AXIS => axis = -1
3098    SliceAxis axis = 4;
3099
3100}
3101
3102/**
3103 * A layer that reduces the input using a specified operation.
3104 *
3105 * .. code::
3106 *
3107 *      y = ReduceLayer(x)
3108 *
3109 * Requires 1 input and produces 1 output.
3110 *
3111 * Input
3112 *     A blob that can, in general, have any rank. However, depending on the value of "axis" ,
3113 *      there may be additional rank constraints.
3114 * Output
3115 *     A blob with the same rank as the input, which has 1s on the dimensions specified in the parameter "axis"
3116 *
3117 *     Values supported for axis are [-1], [-2], [-3], [-2,-1], [-3,-2,-1]
3118 *     and the equivalent positive values (depending on the rank of the input)
3119 *     For mode == 'ArgMax', axis must be [-1] or [-2] or [-3].
3120 */
3121message ReduceLayerParams {
3122
3123    /*
3124     * The following reduction operations are supported
3125     * and are applied on the specified axis of the input array:
3126     *
3127     * ``SUM``
3128     *     Sum of all elements
3129     *
3130     *     .. math:: \sum{x_i}
3131     *
3132     * ``AVG``
3133     *     Sum of all elements divided by the number of elements
3134     *
3135     *     .. math:: \dfrac{\sum^n{x_i}}{n}
3136     *
3137     * ``PROD``
3138     *     Product of all elements
3139     *
3140     *     .. math:: \prod{x_i}
3141     *
3142     * ``LOGSUM``
3143     *     Sum of the natural logarithm of all elements
3144     *
3145     *     .. math:: \sum{\ln{(x_i + \epsilon)}}
3146     *
3147     * ``SUMSQUARE``
3148     *     Sum of squares of all elements
3149     *
3150     *     .. math:: \sum{x^2}
3151     *
3152     * ``L1``
3153     *     L1 normalization of all elements
3154     *
3155     *     .. math:: ||x||_1 = \sum{|x_i|}
3156     *
3157     * ``L2``
3158     *     L2 normalization of all elements
3159     *
3160     *     .. math:: ||x||_2 = \sqrt{\sum{x_i^2}}
3161     *
3162     * ``MAX``
3163     *     Maximum of all elements
3164     *
3165     *     .. math:: \text{max}(x_i)
3166     *
3167     * ``MIN``
3168     *     Minumum of all elements
3169     *
3170     *     .. math:: \text{min}(x_i)
3171     *
3172     * ``ARGMAX``
3173     *     Argument of the maximum of all elements
3174     *
3175     *     .. math:: \text{argmax}(x_i)
3176     *
3177     */
3178    enum ReduceOperation {
3179
3180        SUM = 0;
3181        AVG = 1;
3182        PROD = 2;
3183        LOGSUM = 3;
3184        SUMSQUARE = 4;
3185        L1 = 5;
3186        L2 = 6;
3187        MAX = 7;
3188        MIN = 8;
3189        ARGMAX = 9; /// only supported with axis = C, H or W.
3190
3191    }
3192    ReduceOperation mode = 1; /// Specifies function used to reduce.
3193
3194    /**
3195     * Used if mode is ``LOGSUM``.
3196     * Defaults to ``1e-6`` if not set or is set to ``0``.
3197     */
3198    float epsilon = 2;
3199
3200    enum ReduceAxis {
3201
3202        CHW = 0;
3203        HW = 1;
3204        C = 2;
3205        H = 3;
3206        W = 4;
3207
3208    }
3209
3210    // The following mapping is used for interpreting this parameter:
3211    // CHW = axis [-3, -2, -1], input must have rank at least 3.
3212    // HW = axis [-2, -1], input must have rank at least 2.
3213    // C = axis [-3]
3214    // H = axis [-2]
3215    // W = axis [-1]
3216    ReduceAxis axis = 3;
3217
3218}
3219
3220/**
3221 * A layer that crops the spatial dimensions of an input.
3222 * If two inputs are provided, the shape of the second input is used as the reference shape.
3223 *
3224 * .. code::
3225 *
3226 *      y = CropLayer(x1) or y = CropLayer(x1,x2)
3227 *
3228 * Requires 1 or 2 inputs and produces 1 output.
3229 *
3230 * Input
3231 *    1 or 2 tensors, each with rank at least 3, both inputs must have equal rank.
3232 *    Example:
3233 *     - 1 input case: A blob with shape ``[C, H_in, W_in]``.
3234 *     - 2 input case: 1st blob with shape ``[C, H_in, W_in]``, 2nd blob with shape ``[C, H_out, W_out]``.
3235 *
3236 *     For ranks greater than 3, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch.
3237 *
3238 * Output
3239 *     Same rank as the inputs.
3240 *     e.g.: A blob with shape ``[C, H_out, W_out]``.
3241 *
3242 * If one input is used, output is computed as follows:
3243 *
3244 * .. code::
3245 *
3246 *      y = x1[:, topCropAmount:H_in - bottomCropAmount, leftCropAmount:W_in - rightCropAmount]
3247 *
3248 *      topCropAmount == Height startEdgeSize == borderAmounts[0].startEdgeSize
3249 *      bottomCropAmount == Height endEdgeSize == borderAmounts[0].endEdgeSize
3250 *      leftCropAmount == Width startEdgeSize == borderAmounts[1].startEdgeSize
3251 *      rightCropAmount == Width endEdgeSize == borderAmounts[1].endEdgeSize
3252 *
3253 *      H_out = H_in - topCropAmount - bottomCropAmount
3254 *      W_out = W_in - leftCropAmount - rightCropAmount
3255 *
3256 * If two inputs are used, output is computed as follows:
3257 *
3258 * .. code::
3259 *
3260 *      y = x1[:, offset[0]:offset[0] + H_out, offset[1]:offset[1] + W_out]
3261 */
3262message CropLayerParams {
3263
3264    /**
3265     * The amounts to be cropped from the input.
3266     * Used only if a single input is provided.
3267     */
3268    BorderAmounts cropAmounts = 1;
3269
3270    /**
3271     * The offset amounts.
3272     * Used only if two inputs are provided.
3273     * Must be of length 2, in order ``[H, W]``.
3274     */
3275    repeated uint64 offset = 5;
3276
3277}
3278
3279/**
3280 * A layer that computes the elementwise average of the inputs.
3281 * This layer has limited broadcasting support. For general broadcasting see AddBroadcastableLayer.
3282 *
3283 * .. code::
3284 *
3285 *      y = AverageLayer(x1,x2,...)
3286 *
3287 * Requires multiple inputs and produces 1 output.
3288 *
3289 * Input
3290 *     In general, there are no rank constraints.
3291 *     However, only certain set of shapes are broadcastable. For example:
3292 *     [B, 1, 1, 1], [B, C, 1, 1], [B, 1, H, W], [B, C, H, W]
3293 * Output
3294 *     A blob with the same shape as each input.
3295 */
3296message AverageLayerParams {
3297
3298}
3299
3300/**
3301 * A layer that computes the elementwise maximum over the inputs.
3302 *
3303 * .. code::
3304 *
3305 *      y = MaxLayer(x1,x2,...)
3306 *
3307 * Requires multiple inputs and produces 1 output.
3308 *
3309 * Input
3310 *     In general, there are no rank constraints.
3311 *     However, only certain set of shapes are broadcastable. For example:
3312 *     [B, C, 1, 1], [B, C, H, W]
3313 * Output
3314 *     A blob with the same shape as each input.
3315 */
3316message MaxLayerParams {
3317
3318}
3319
3320/**
3321 * A layer that computes the elementwise minimum over the inputs.
3322 *
3323 * .. code::
3324 *
3325 *      y = MinLayer(x1,x2,...)
3326 *
3327 * Requires multiple inputs and produces 1 output.
3328 *
3329 * Input
3330 *     In general, there are no rank constraints.
3331 *     However, only certain set of shapes are broadcastable. For example:
3332 *     [B, C, 1, 1], [B, C, H, W]
3333 * Output
3334 *     A blob with the same shape as each input.
3335 */
3336message MinLayerParams {
3337
3338}
3339
3340/**
3341 * A layer that computes the dot product of two vectors.
3342 *
3343 * .. code::
3344 *
3345 *      y = DotProductLayer(x1,x2)
3346 *
3347 * Requires 2 inputs and produces 1 output.
3348 *
3349 * Input
3350 *     Two blobs with rank at least 3, such that the last two dimensions must be 1.
3351 *     e.g.: blobs with shape ``[B, C, 1, 1]``.
3352 *     For ranks greater than 3, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch.
3353 *
3354 * Output
3355 *     Same rank as the input.
3356 *     e.g. for rank 4 inputs, output shape: [B, 1, 1, 1]
3357 */
3358message DotProductLayerParams {
3359
3360    /**
3361     * If true, inputs are normalized first,
3362     * thereby computing the cosine similarity.
3363     */
3364    bool cosineSimilarity = 1;
3365
3366}
3367
3368/**
3369 * A layer that performs mean variance normalization, along axis = -3.
3370 *
3371 * .. code::
3372 *
3373 *      y = MeanVarianceNormalizeLayer(x)
3374 *
3375 * Requires 1 input and produces 1 output.
3376 *
3377 * Input
3378 *     A blob with rank greater than equal to 3.
3379 *     Example: Rank 4 blob represents [Batch, channels, height, width]
3380 *     For ranks greater than 3, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch.
3381 *
3382 * Output
3383 *     A blob with the same shape as the input.
3384 *
3385 * If ``acrossChannels == true``
3386 * normalization is performed on flattened input, i.e. the input is reshaped to (Batch,C), where "Batch" contains
3387 * all dimensions from 0 to -4 (inclusive), and C contains dimensions -1, -2, -3.
3388 *
3389 * If ``acrossChannels == false``
3390 * normalization is performed within a channel,
3391 * across spatial dimensions (i.e. last two dimensions).
3392 */
3393message MeanVarianceNormalizeLayerParams {
3394
3395    /**
3396     * If true, mean and variance are computed across channels.
3397     */
3398    bool acrossChannels = 1;
3399
3400    /**
3401     * If false, only mean is subtracted.
3402     */
3403    bool normalizeVariance = 2;
3404
3405    /**
3406     * A small constant to avoid division by 0 while normalizing variance.
3407     * Defaults to ``1e-6`` if not set or set to ``0``.
3408     */
3409    float epsilon = 3;
3410
3411}
3412
3413/**
3414 * A layer that repeats a sequence or the dimension sitting at axis = -5
3415 *
3416 * .. code::
3417 *
3418 *      y = SequenceRepeatLayer(x)
3419 *
3420 * Requires 1 input and produces 1 output.
3421 *
3422 * Input
3423 *     A blob with rank at least 5.
3424 *     e.g: shape ``[Seq, B, C, H, W]``
3425 * Output
3426 *     A blob with the same rank as the input.
3427 *     e.g.: for input shape ``[Seq, B, C, H, W]``, output shape is ``[nRepetitions * Seq, B, C, H, W]``.
3428 */
3429message SequenceRepeatLayerParams {
3430
3431    /**
3432     * Number of repetitions.
3433     * Defaults to ``1`` if not set or set to ``0``.
3434     */
3435    uint64 nRepetitions = 1;
3436
3437}
3438
3439/// Recurrent Layers
3440/// ----------------
3441
3442/*
3443 * The following activations are supported with recurrent layers:
3444 * - Linear
3445 * - Sigmoid
3446 * - Tanh
3447 * - ReLU
3448 * - Scaled Hyperbolic Tangent: alpha * tanh(beta * x), currently only supported for alpha = 1.7159, beta = 2/3
3449 * - Hard Sigmoid: min(max(alpha * x + beta, 0), 1), currently only supported for alpha = 0.2, beta = 0.5
3450 */
3451
3452/**
3453 * A simple recurrent layer.
3454 *
3455 * .. code::
3456 *
3457 *      y_t = SimpleRecurrentLayer(x_t, y_{t-1})
3458 *
3459 * Input
3460 *    A blob of rank 5, with shape `[Seq, Batch, inputVectorSize, 1, 1]``.
3461 *    This represents a sequence of vectors of size ``inputVectorSize``.
3462 * Output
3463 *    Same rank as the input.
3464 *    Represents a vector of size ``outputVectorSize``. It is either the final output or a sequence of outputs at all time steps.
3465 *
3466 * - Output Shape: ``[1, Batch, outputVectorSize, 1, 1]`` , if ``sequenceOutput == false``
3467 * - Output Shape: ``[Seq, Batch, outputVectorSize, 1, 1]`` , if ``sequenceOutput == true``
3468 *
3469 * This layer is described by the following equation:
3470 *
3471 * .. math::
3472 *     \boldsymbol{y_t} = f(\mathrm{clip}(W \boldsymbol{x_t} + \
3473 *                                        R \boldsymbol{y_{t-1}} + b))
3474 *
3475 * - ``W`` is a 2-dimensional weight matrix
3476 *   (``[outputVectorSize, inputVectorSize]``, row-major)
3477 * - ``R`` is a 2-dimensional recursion matrix
3478 *   (``[outputVectorSize, outputVectorSize]``, row-major)
3479 * - ``b`` is a 1-dimensional bias vector (``[outputVectorSize]``)
3480 * - ``f()`` is an activation
3481 * - ``clip()`` is a function that constrains values between ``[-50.0, 50.0]``
3482 */
3483message SimpleRecurrentLayerParams {
3484
3485    uint64 inputVectorSize = 1; /// The size of the input vectors.
3486    uint64 outputVectorSize = 2; /// The size of the output vectors.
3487
3488    /**
3489    * Activations supported are Linear, Sigmoid, Tanh, ReLU, Scaled Tanh (alpha = 1.71, beta = 2/3), Hard sigmoid (alpha = 0.2, beta = 0.5)
3490    */
3491    ActivationParams activation = 10; /// The activation function.
3492
3493    /**
3494        If false output is just the result after final state update.
3495        If true, output is a sequence, containing outputs at all time steps.
3496    */
3497    bool sequenceOutput = 15;
3498
3499    bool hasBiasVector = 20; /// If false, no bias is added.
3500
3501    WeightParams weightMatrix = 30; /// Weight matrix W.
3502    WeightParams recursionMatrix = 31; /// Recursion Weight matrix R.
3503    WeightParams biasVector = 32; /// Bias vector b.
3504
3505    bool reverseInput = 100;
3506    // If true, then the node processes the input sequence from right to left
3507
3508}
3509
3510/**
3511 * Gated-Recurrent Unit (GRU) Layer
3512 *
3513 * .. code::
3514 *
3515 *      y_t = GRULayer(x_t, y_{t-1})
3516 *
3517 * Input
3518 *    A blob of rank 5, with shape `[Seq, Batch, inputVectorSize, 1, 1]``.
3519 *    This represents a sequence of vectors of size ``inputVectorSize``.
3520 * Output
3521 *    Same rank as the input.
3522 *    Represents a vector of size ``outputVectorSize``. It is either the final output or a sequence of outputs at all time steps.
3523 *
3524 * - Output Shape: ``[1, Batch, outputVectorSize, 1, 1]`` , if ``sequenceOutput == false``
3525 * - Output Shape: ``[Seq, Batch, outputVectorSize, 1, 1]`` , if ``sequenceOutput == true``
3526 *
3527 * This layer is described by the following equations:
3528 *
3529 * Update Gate
3530 *     .. math::
3531 *         \boldsymbol{z_t} = \
3532 *             f(\mathrm{clip}(W_z \boldsymbol{x_t} + \
3533 *                             R_z \boldsymbol{y_{t-1}} + b_z)
3534 *
3535 * Reset Gate
3536 *     .. math::
3537 *         \boldsymbol{r_t} = \
3538 *             f(\mathrm{clip}(W_r \boldsymbol{x_t} + \
3539 *                             R_r \boldsymbol{y_{t-1}} + b_r))
3540 *
3541 * Cell Memory State
3542 *     .. math::
3543 *         \boldsymbol{c_t} = \
3544 *             \boldsymbol{y_{t-1}} \odot \boldsymbol{r_t}
3545 *
3546 * Output Gate
3547 *     .. math::
3548 *         \boldsymbol{o_t} = \
3549 *             g(\mathrm{clip}(W_o \boldsymbol{x_t} + \
3550 *                             R_o \boldsymbol{c_t} + b_o))
3551 *
3552 * Output
3553 *     .. math::
3554 *         \boldsymbol{y_t} = \
3555 *             (1 - \boldsymbol{z_t}) \odot \boldsymbol{o_t} + \
3556 *              \boldsymbol{z_t} \odot \boldsymbol{y_{t-1}}
3557 *
3558 * - ``W_z``, ``W_r``, ``W_o`` are 2-dimensional input weight matrices
3559 *   (``[outputVectorSize, inputVectorSize]``, row-major)
3560 * - ``R_z``, ``R_r``, ``R_o`` are 2-dimensional recursion matrices
3561 *   (``[outputVectorSize, outputVectorSize]``, row-major)
3562 * - ``b_z``, ``b_r``, ``b_o`` are 1-dimensional bias vectors
3563 *   (``[outputVectorSize]``)
3564 * - ``f()``, ``g()`` are activations
3565 * - ``clip()`` is a function that constrains values between ``[-50.0, 50.0]``
3566 * - ``⊙`` denotes the elementwise product of matrices
3567 */
3568message GRULayerParams {
3569
3570    uint64 inputVectorSize = 1; /// Size of the input vectors.
3571    uint64 outputVectorSize = 2; /// Size of the output vectors.
3572
3573    /**
3574     * 2 element array representing activations [f(), g()] in that order.
3575     * Typical values used = [sigmoid, tanh].
3576     * Activations supported are Linear, Sigmoid, Tanh, ReLU, Scaled Tanh (alpha = 1.71, beta = 2/3), Hard sigmoid (alpha = 0.2, beta = 0.5)
3577     */
3578    repeated ActivationParams activations = 10;
3579
3580    /**
3581     * If false output is just the result after final state update.
3582     * If true, output is a sequence, containing outputs at all time steps.
3583     */
3584    bool sequenceOutput = 15;
3585
3586    /**
3587     * If false, no biases (``b_z``, ``b_r``, ``b_o``) are added.
3588     */
3589    bool hasBiasVectors = 20;
3590
3591    WeightParams updateGateWeightMatrix = 30; /// Weight Matrix W_z.
3592    WeightParams resetGateWeightMatrix = 31; /// Weight Matrix W_r.
3593    WeightParams outputGateWeightMatrix = 32; /// Weight Matrix W_o.
3594
3595    WeightParams updateGateRecursionMatrix = 50; /// Recursion Weight Matrix R_z.
3596    WeightParams resetGateRecursionMatrix = 51; /// Recursion Weight Matrix R_r.
3597    WeightParams outputGateRecursionMatrix = 52; /// Recursion Weight Matrix R_o.
3598
3599    WeightParams updateGateBiasVector = 70; /// Bias vector b_z.
3600    WeightParams resetGateBiasVector = 71; /// Bias vector b_r.
3601    WeightParams outputGateBiasVector = 72; /// Bias vector b_o.
3602
3603    /// If true, then the node processes the input sequence from right to left
3604    bool reverseInput = 100;
3605
3606}
3607
3608/**
3609 * Long short-term memory (LSTM) parameters.
3610 *
3611 * This is described by the following equations:
3612 *
3613 * Input Gate
3614 *     .. math::
3615 *         \boldsymbol{i_t} = \
3616 *             f(\mathrm{clip}(W_i \boldsymbol{x_t} + \
3617 *                             R_i \boldsymbol{y_{t-1}} + \
3618 *                             p_i \odot c_{t-1} + b_i))
3619 *
3620 * Forget Gate
3621 *     .. math::
3622 *         \boldsymbol{f_t} = \
3623 *             f(\mathrm{clip}(W_f \boldsymbol{x_t} + \
3624 *                             R_f \boldsymbol{y_{t-1}} + \
3625 *                             p_f \odot c_{t-1} + b_f))
3626 *
3627 * Block Input
3628 *     .. math::
3629 *         \boldsymbol{z_t} = \
3630 *             g(\mathrm{clip}(W_z \boldsymbol{x_t} + \
3631 *                             R_z \boldsymbol{y_{t-1}} + b_z))
3632 *
3633 * Cell Memory State
3634 *     .. math::
3635 *         \boldsymbol{c_t} = \
3636 *             \boldsymbol{c_{t-1}} \odot \boldsymbol{f_t} + \
3637 *             \boldsymbol{i_t} \odot \boldsymbol{z_t}
3638 *
3639 * Output Gate
3640 *     .. math::
3641 *         \boldsymbol{o_t} = \
3642 *             f(\mathrm{clip}(W_o \boldsymbol{x_t} + \
3643 *                             R_o \boldsymbol{y_{t-1}} + \
3644 *                             p_o \odot c_t + b_o))
3645 *
3646 * Output
3647 *     .. math::
3648 *         \boldsymbol{y_t} = \
3649 *             h(\boldsymbol{c_t}) \odot \boldsymbol{o_t}
3650 *
3651 * - ``W_i``, ``W_f``, ``W_z``, ``W_o`` are 2-dimensional input weight matrices
3652 *   (``[outputVectorSize, inputVectorSize]``, row-major)
3653 * - ``R_i``, ``R_f``, ``R_z``, ``R_o`` are 2-dimensional recursion matrices
3654 *   (``[outputVectorSize, outputVectorSize]``, row-major)
3655 * - ``b_i``, ``b_f``, ``b_z``, ``b_o`` are 1-dimensional bias vectors
3656 *   (``[outputVectorSize]``)
3657 * - ``p_``, ``p_f``, ``p_o`` are 1-dimensional peephole vectors
3658 *   (``[outputVectorSize]``)
3659 * - ``f()``, ``g()``, ``h()`` are activations
3660 * - ``clip()`` is a function that constrains values between ``[-50.0, 50.0]``
3661 * - ``⊙`` denotes the elementwise product of matrices
3662 */
3663message LSTMParams {
3664
3665    /**
3666     * If true, output is a sequence, containing outputs at all time steps.
3667     * If false, output is just the result after final state update.
3668     */
3669    bool sequenceOutput = 10;
3670
3671    /**
3672     * If false, no biases (``b_i``, ``b_f``, ``b_z``, ``b_o``) are added.
3673     */
3674    bool hasBiasVectors = 20;
3675
3676    /**
3677     * If true, a vector of ``1`` values is added to ``b_f``.
3678     */
3679    bool forgetBias = 30;
3680
3681    /**
3682     * If true, peephole vectors are included.
3683     */
3684    bool hasPeepholeVectors = 40;
3685
3686    /**
3687     * If the coupled Input and Forget flag is on, the behaviour of
3688     * ``c_t`` is changed to the following (i.e. forget gate is not used):
3689     *
3690     * .. math::
3691     *     \boldsymbol{c_t} = \
3692     *         \boldsymbol{c_{t-1}} \odot (1 - \boldsymbol{i_t}) + \
3693     *         \boldsymbol{i_t} \odot \boldsymbol{z_t}
3694     *
3695     */
3696    bool coupledInputAndForgetGate = 50;
3697
3698    /**
3699     * Places a limit on the maximum and minimum values of ``c_t``.
3700     * c_t = min(c_t, cellClipThreshold)
3701     * c_t = max(c_t, -cellClipThreshold)
3702     * If 0, it is set to its default value = 50.0.
3703     */
3704    float cellClipThreshold = 60;
3705
3706}
3707
3708/**
3709 * Weights for long short-term memory (LSTM) layers
3710 */
3711message LSTMWeightParams {
3712
3713    WeightParams inputGateWeightMatrix = 1; /// Weight Matrix W_i.
3714    WeightParams forgetGateWeightMatrix = 2; /// Weight Matrix W_f.
3715    WeightParams blockInputWeightMatrix = 3; /// Weight Matrix W_z.
3716    WeightParams outputGateWeightMatrix = 4; /// Weight Matrix W_o.
3717
3718    WeightParams inputGateRecursionMatrix = 20; /// Recursion Weight Matrix R_i.
3719    WeightParams forgetGateRecursionMatrix = 21; /// Recursion Weight Matrix R_f.
3720    WeightParams blockInputRecursionMatrix = 22; /// Recursion Weight Matrix R_z.
3721    WeightParams outputGateRecursionMatrix = 23; /// Recursion Weight Matrix R_o.
3722
3723    //biases:
3724    WeightParams inputGateBiasVector = 40; /// Bias vector b_i.
3725    WeightParams forgetGateBiasVector = 41; /// Bias vector b_f.
3726    WeightParams blockInputBiasVector = 42; /// Bias vector b_z.
3727    WeightParams outputGateBiasVector = 43; /// Bias vector b_o.
3728
3729    //peepholes:
3730    WeightParams inputGatePeepholeVector = 60; /// Peephole vector p_i.
3731    WeightParams forgetGatePeepholeVector = 61; /// Peephole vector p_f.
3732    WeightParams outputGatePeepholeVector = 62; /// Peephole vector p_o.
3733
3734}
3735
3736/**
3737 * A unidirectional long short-term memory (LSTM) layer.
3738 *
3739 * .. code::
3740 *
3741 *      (y_t, c_t) = UniDirectionalLSTMLayer(x_t, y_{t-1}, c_{t-1})
3742 *
3743 * Input
3744 *    A blob of rank 5, with shape `[Seq, Batch, inputVectorSize, 1, 1]``.
3745 *    This represents a sequence of vectors of size ``inputVectorSize``.
3746 * Output
3747 *    Same rank as the input.
3748 *    Represents a vector of size ``outputVectorSize``. It is either the final output or a sequence of outputs at all time steps.
3749 *
3750 * - Output Shape: ``[1, Batch, outputVectorSize, 1, 1]`` , if ``sequenceOutput == false``
3751 * - Output Shape: ``[Seq, Batch, outputVectorSize, 1, 1]`` , if ``sequenceOutput == true``
3752 *
3753 */
3754message UniDirectionalLSTMLayerParams {
3755
3756    uint64 inputVectorSize = 1; /// Size of the input vectors.
3757    uint64 outputVectorSize = 2; /// Size of the output vectors.
3758
3759    /**
3760     * 3 element array representing activations [f(),g(),h()] in that order.
3761     * Typical values used = [sigmoid, tanh, tanh].
3762     * Activations supported are Linear, Sigmoid, Tanh, ReLU, Scaled Tanh (alpha = 1.71, beta = 2/3), Hard sigmoid (alpha = 0.2, beta = 0.5)
3763     */
3764    repeated ActivationParams activations = 10;
3765
3766    LSTMParams params = 15;
3767
3768    LSTMWeightParams weightParams = 20; /// Weights, biases and peepholes.
3769
3770    /// If true, then the node processes the input sequence from right to left
3771    bool reverseInput = 100;
3772
3773}
3774
3775/**
3776 * Bidirectional long short-term memory (LSTM) layer
3777 *
3778 * .. code::
3779 *
3780 *      (y_t, c_t, y_t_reverse, c_t_reverse) = BiDirectionalLSTMLayer(x_t, y_{t-1}, c_{t-1}, y_{t-1}_reverse, c_{t-1}_reverse)
3781 *
3782 * Input
3783 *    A blob of rank 5, with shape `[Seq, Batch, inputVectorSize, 1, 1]``.
3784 *    This represents a sequence of vectors of size ``inputVectorSize``.
3785 * Output
3786 *    Same rank as the input.
3787 *    Represents a vector of size ``2 * outputVectorSize``. It is either the final output or a sequence of outputs at all time steps.
3788 *
3789 * - Output Shape: ``[1, Batch, 2 * outputVectorSize, 1, 1]`` , if ``sequenceOutput == false``
3790 * - Output Shape: ``[Seq, Batch, 2 * outputVectorSize, 1, 1]`` , if ``sequenceOutput == true``
3791 *
3792 *
3793 * The first LSTM operates on the input sequence in the forward direction.
3794 * The second LSTM operates on the input sequence in the reverse direction.
3795 *
3796 * Example: given the input sequence ``[x_1, x_2, x_3]``,
3797 * where ``x_i`` are vectors at time index ``i``:
3798 *
3799 * The forward LSTM output is ``[yf_1, yf_2, yf_3]``,
3800 *
3801 * where ``yf_i`` are vectors of size ``outputVectorSize``:
3802 *
3803 * - ``yf_1`` is the output at the end of sequence {``x_1``}
3804 * - ``yf_2`` is the output at the end of sequence {``x_1``, ``x_2``}
3805 * - ``yf_3`` is the output at the end of sequence {``x_1``, ``x_2``, ``x_3``}
3806 *
3807 * The backward LSTM output: ``[yb_1, yb_2, yb_3]``,
3808 *
3809 * where ``yb_i`` are vectors of size ``outputVectorSize``:
3810 *
3811 * - ``yb_1`` is the output at the end of sequence {``x_3``}
3812 * - ``yb_2`` is the output at the end of sequence {``x_3``, ``x_2``}
3813 * - ``yb_3`` is the output at the end of sequence {``x_3``, ``x_2``, ``x_1``}
3814 *
3815 * Output of the bi-dir layer:
3816 *
3817 * - if ``sequenceOutput = True`` : { ``[yf_1, yb_3]``,  ``[yf_2, yb_2]``,  ``[yf_3, yb_1]`` }
3818 * - if ``sequenceOutput = False`` : { ``[yf_3, yb_3]`` }
3819 */
3820message BiDirectionalLSTMLayerParams {
3821
3822    /**
3823     * Size of the input vectors.
3824     */
3825    uint64 inputVectorSize = 1;
3826    /**
3827     * Size of the outputs vectors.
3828     * It is same for both forward and backward LSTMs.
3829     */
3830    uint64 outputVectorSize = 2;
3831
3832    /**
3833     * 3 element array representing activations [f(),g(),h()] in that order.
3834     * Typical values used = [sigmoid, tanh, tanh].
3835     * Activations supported are Linear, Sigmoid, Tanh, ReLU, Scaled Tanh (alpha = 1.71, beta = 2/3), Hard sigmoid (alpha = 0.2, beta = 0.5)
3836     */
3837    repeated ActivationParams activationsForwardLSTM = 10;
3838    /**
3839     * Currently, backward LSTM activations
3840     * must be same as the ones for the forward LSTM.
3841     */
3842    repeated ActivationParams activationsBackwardLSTM = 11;
3843
3844    /**
3845     * Common parameters shared by the forward and backward LSTMs.
3846     */
3847    LSTMParams params = 15;
3848
3849    /**
3850     * Weights and biases.
3851     * Must be a length 2 message,
3852     * for the forward and backward LSTM respectively.
3853     */
3854    repeated LSTMWeightParams weightParams = 20;
3855
3856}
3857
3858message CustomLayerParams {
3859
3860    message CustomLayerParamValue {
3861        oneof value {
3862            double doubleValue = 10;
3863            string stringValue = 20;
3864            int32 intValue = 30;
3865            int64 longValue = 40;
3866            bool boolValue = 50;
3867        }
3868    }
3869
3870    string className = 10; // The name of the class (conforming to MLCustomLayer) corresponding to this layer
3871    repeated WeightParams weights = 20; // Any weights -- these are serialized in binary format and memmapped at runtime
3872    map<string, CustomLayerParamValue> parameters = 30; // these may be handled as strings, so this should not be large
3873    string description = 40; // An (optional) description of the layer provided by the model creator. This information is displayed when viewing the model, but does not affect the model's execution on device.
3874
3875}
3876
3877/**
3878 * A layer that rearranges the dimensions and data of an input.
3879 *
3880 * .. code::
3881 *
3882 *      y = TransposeLayer(x)
3883 *
3884 * Requires 1 input and produces 1 output.
3885 *
3886 * Input
3887 *     A N-Dimensional tensor.
3888 * Output
3889 *     A N-Dimensional tensor of the same rank but with dimensions and data permuted according to axes.
3890 *     Shape: ``[InputShape[axis[0]], InputShape[axis[1]], ... , InputShape[axis[N-1]]]``
3891 *
3892 * Examples:
3893 *
3894 * - If ``axes`` is set to ``[3, 1, 2, 0]`` and the input shape is ``[6,7,8,9]``,
3895 *   then the output has shape ``[9,7,8,6]``
3896 */
3897
3898message TransposeLayerParams {
3899
3900    /**
3901     * Length of "axes" should match the rank of input & output tensor
3902     * "axes" should be a permutation of "[0,1,2,...,N-1]" where N is the rank.
3903     */
3904    repeated uint64 axes = 1; //
3905
3906}
3907
3908/**
3909 * A layer that computes the matrix multiplication of two tensors with numpy-like broadcasting
3910 * where the matrices reside in the last two indices of the tensor.
3911 *
3912 * .. code::
3913 *
3914 *      y = BatchedMatMul(a,b)
3915 *
3916 * Requires 1 or 2 inputs and produces 1 output.
3917 *
3918 * The first tensor, "a", must be provided as an input. The second tensor can either be an input or provided as a weight matrix parameter.
3919 *
3920 * Input
3921 *     - a: First N-Dimensional tensor
3922 *     - b: Second N-Dimensional tensor (either a rank-N input or a matrix, i.e. N=2, provided as a layer parameter)
3923 *
3924 * Output
3925 *     A tensor containing the matrix product of two tensors.
3926 *     When there are two inputs: rank is max(2, rank(a), rank(b))
3927 *     When there is one input: rank is same as that of the input.
3928 *
3929 * This operation behaves as following:
3930 *
3931 *  When there are two inputs:
3932 *      - If N >= 2 for both tensors, it is treated as a batch of matrices residing in the last two indices.
3933 *        All the indices, except for the last two, are broadcasted using conventional rules.
3934 *      - If the first tensor is 1-D, it is converted to a 2-D tensor by prepending a 1 to its shape. Eg. (D) -> (1,D)
3935 *      - If the second tensor is 1-D, it is converted to a 2-D tensor by appending a 1 to its shape. Eg. (D) -> (D,1)
3936 *
3937 *  When there is one input:
3938 *      - The weight matrix corresponds to a matrix, of shape (X1, X2). Values of X1, X2 must be provided as layer parameters.
3939 *      - The input, "a", is reshaped into a matrix by combining all the leading dimensions, except the last, into a batch dimension. eg:
3940 *             - if "a" is rank 1 (X1,) -->  (1, X1). Output shape will be (X2,)
3941 *             - if "a" is rank 2 (B1, X1) --> no need to reshape. Output shape will be (B1, X2)
3942 *             - if "a" is rank 3 (B1, B2, X1) --> (B1 * B2, X1). Output shape will be (B1, B2, X2)
3943 *             - etc
3944 */
3945message BatchedMatMulLayerParams {
3946
3947    /**
3948     * If transposeA is true, it transposes the left matrix on the fly before matrix multiplication.
3949     * (is ignored when there is one input)
3950     */
3951    bool transposeA = 1;
3952    /**
3953     * If transposeB is true, it transposes the right matrix on the fly before matrix multiplication.
3954     * (is ignored when there is one input)
3955     */
3956    bool transposeB = 2;
3957
3958    /*
3959     * Following parameters are ignored when there are two inputs.
3960     */
3961
3962    uint64 weightMatrixFirstDimension = 5; /// X1: same as the last dimension of the input tensor
3963    uint64 weightMatrixSecondDimension = 6; /// X2: same as the last dimension of the output tensor
3964
3965    bool hasBias = 7; /// Whether a bias is added or not. Supported only when there is one input.
3966
3967    /*
3968     * Weight matrix representing shape [X1, X2].
3969     * Values are however stored in column major order,
3970     * in the "repeated float" or "bytes" fields of the message "WeightParams"
3971     */
3972    WeightParams weights = 8;
3973    WeightParams bias = 9; /// Bias vector [X2]. Supported only when there is one input.
3974
3975    /**
3976     * If set, this layer, at runtime, quantizes the floating point input blob to int8 before applying the
3977     * matrix multiplication using the INT8 weight parameters provided in weights->int8RawValue. The
3978     * result is then dequantized.
3979     * Requires:
3980     * * number of inputs to be 1
3981     * * hasBias == false
3982     * * QuantizationType == LinearQuantizationParams, such that
3983     *   * size of the "scale" field is 1 and "bias" field is empty in "LinearQuantizationParams"
3984     * * numberOfBits == 8
3985     * * weights->rawValue_size to be empty
3986     */
3987    bool int8DynamicQuantize = 10;
3988
3989}
3990
3991/**
3992 * A layer that concatenates a list of tensors along a specified axis.
3993 *
3994 * .. code::
3995 *
3996 *      y = ConcatNDLayer(x1,x2,....)
3997 *
3998 * Requires at least 2 input and produces 1 output.
3999 *
4000 * Input
4001 *     The rank of the input tensors must match and all dimensions also must match, except for the dimension 'axis'.
4002 *
4003 *
4004 * Output
4005 *     Same rank as the input. The dimension along "axis", is the sum of the dimensions of the inputs.
4006 *
4007 * example:
4008 *
4009 * in1 : shape (3, 2), value = [[1, 2], [3, 4], [5, 6]]
4010 * in2 : shape (3, 2), value = [[7, 8], [9, 10], [11, 12]]
4011 * axis = 0
4012 *
4013 * if interleave = False (default)
4014 * output : shape (6, 2)
4015 * output[0:3, :] = in1
4016 * output[3:6, :] = in2
4017 * value = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12]]
4018 *
4019 * if interleave = True
4020 * output : shape (6, 2)
4021 * output[0::2, :] = in1
4022 * output[1::2, :] = in2
4023 * value = [[1, 2], [7, 8], [3, 4], [9, 10], [5, 6], [11, 12]]
4024 *
4025 */
4026message ConcatNDLayerParams {
4027
4028    /**
4029     * Dimension along which to concatenate. Supports negative values of the parameter 'axis'.
4030     */
4031    int64 axis = 1;
4032
4033    /**
4034     * (Only available in Core ML Specification >= 5 (iOS >= 14, macOS >= 11.0)
4035     * Interleave option. If True, concatenation is done via interleaving the inputs.
4036     * This requires all inputs to have the exact same shape.
4037     */
4038    bool interleave = 2;
4039
4040
4041}
4042
4043/**
4044 * A layer that performs softmax normalization along a specified axis.
4045 *
4046 * .. code::
4047 *
4048 *      y = SoftmaxNDLayer(x)
4049 *
4050 * Requires 1 input and produces 1 output.
4051 *
4052 * Output shape is same as the input.
4053 */
4054message SoftmaxNDLayerParams {
4055
4056    /**
4057     * Dimension on which the softmax would be performed. Supports negative values of the parameter 'axis'.
4058     */
4059    int64 axis = 1;
4060
4061}
4062
4063/**
4064 * A layer that reverses specific dimensions of the input tensor.
4065 * It is similar in functionality to the numpy.flip method.
4066 *
4067 * Requires 1 input and produces 1 output.
4068 * Output shape is same as the input.
4069 */
4070message ReverseLayerParams {
4071
4072    /**
4073     * Reverses each dimension of the input tensor for which corresponding reverseDim is set to True.
4074     * Requires len(reverseDim) == rank(inputTensor)
4075     */
4076    repeated bool reverseDim = 1;
4077
4078}
4079
4080/**
4081 * A layer that reverses variable length slices.
4082 *
4083 * Requires 2 inputs and produces 1 output.
4084 *
4085 * 2 inputs, in order are denoted by "data", "seq_lengths".
4086 * "seq_lenghts" must be a rank 1 tensor, i.e. seq_lengths.shape = (B,)
4087 * which contains the lengths of the amount of sequence to be reversed, for each element of the batch.
4088 * Dimension "batchAxis" in "data" must be equal to B, i.e,
4089 * data.shape[batchAxis] = B.
4090 *
4091 * According to the batch axis, input "data" is first divided into a batch of B inputs,
4092 * each of which is flipped along the dimension "sequenceAxis", by the amount specified in
4093 * "seq_lengths", the second input.
4094 *
4095 * e.g.:
4096 *
4097 * data [shape = (2,4)]:
4098 * [0 1 2 3]
4099 * [4 5 6 7]
4100 * seq_lengths [shape = (2,)]:
4101 * [3, 0]
4102 * batchAxis = 0
4103 * sequenceAxis = 1
4104 *
4105 * output [shape = (2,4)]:
4106 * [2 1 0 3]
4107 * [4 5 6 7]
4108 *
4109 *
4110 * data [shape = (2,3,2)]:
4111 * [0 1]
4112 * [2 3]
4113 * [4 5] (slice = 0)
4114 * [6 7]
4115 * [8 9]
4116 * [10 11] (slice = 1)
4117 * seq_lengths [shape = (2,)]:
4118 * [2, 3]
4119 * batchAxis = 0
4120 * sequenceAxis = 1
4121 *
4122 * output [shape = (2,3,2)]:
4123 * [2 3]
4124 * [0 1]
4125 * [4 5] (slice = 0)
4126 * [10 11]
4127 * [8 9]
4128 * [6 7] (slice = 1)
4129 *
4130 * Output shape is same as the input.
4131 */
4132message ReverseSeqLayerParams {
4133
4134    int64 batchAxis = 1; // batch axis has to be strictly less than seq_axis
4135    int64 sequenceAxis = 2;
4136
4137}
4138
4139/**
4140 * A layer that loads data as a parameter and provides it as an output.
4141 *
4142 * .. code::
4143 *
4144 *      y = LoadConstantNDLayer()
4145 *
4146 * Requires no input and produces 1 output.
4147 *
4148 * Output: A tensor with shape as provided in the parameter "shape"
4149 */
4150message LoadConstantNDLayerParams {
4151
4152    /**
4153     * The shape of the constant to be loaded.
4154     */
4155    repeated uint64 shape = 1;
4156    WeightParams data = 2;
4157
4158}
4159
4160/**
4161 * A layer that generates an output tensor with a constant value.
4162 * Input is only used to determine the shape of the output.
4163 * This layer is used to allocate a tensor with a dynamic shape (that of the input) and constant value.
4164 *
4165 * Requires 1 input and produces 1 output.
4166 *
4167 * .. code::
4168 *
4169 *      y = FillLikeLayer(x)
4170 *
4171 * Input
4172 *     A N-Dimensional tensor, whose values are ignored. Only the shape is used to
4173 *     infer the shape of the output.
4174 *
4175 * Output
4176 *     A N-Dimensional tensor with the same shape as the input tensor.
4177 *
4178 */
4179message FillLikeLayerParams {
4180
4181    float value = 1;
4182
4183}
4184
4185/**
4186 * A layer that generates an output tensor with a constant value.
4187 * This layer is used to allocate a tensor with a static shape and constant value.
4188 *
4189 * Requires no input and produces 1 output.
4190 *
4191 * .. code::
4192 *
4193 *      y = FillStaticLayer(x)
4194 *
4195 * Output
4196 *     A N-Dimensional tensor of shape "targetShape".
4197 *
4198 */
4199message FillStaticLayerParams {
4200
4201    float value = 1;
4202    repeated uint64 targetShape = 2;
4203
4204}
4205
4206/**
4207 * A layer that generates an output tensor with a constant value.
4208 * This layer is used to allocate a tensor with a dynamic shape (as specified by the input) and constant value.
4209 *
4210 * Requires 1 input and produces 1 output.
4211 *
4212 * .. code::
4213 *
4214 *      y = FillDynamicLayer(x)
4215 *
4216 * Input
4217 *     A rank 1 tensor specifying the shape of the output
4218 *
4219 * Output
4220 *     An N-Dimensional tensor with the shape specified by the values in the input tensor.
4221 *
4222 */
4223message FillDynamicLayerParams {
4224
4225    float value = 1;
4226
4227}
4228
4229/**
4230 * A layer that returns the elements either from tensor x or tensor y,
4231 * depending on the value in the condition tensor.
4232 * It is similar in functionality to the numpy.where method with 3 inputs.
4233 *
4234 * Requires 3 inputs and produces 1 output.
4235 * Inputs, in order, are the condition tensor, x and y.
4236 *
4237 * for each vector index (i,...,j):
4238 *    output[i,...,j] = x[i,...,j] if condition[i,...,j] = True
4239 *                      y[i,...,j] if condition[i,...,j] = False
4240 *
4241 * All the 3 inputs are first broadcasted to a common shape.
4242 * (the shapes must be broadcastable)
4243 *
4244 * output.rank = max(input[0].rank, input[1].rank, input[2].rank)
4245 *
4246 */
4247message WhereBroadcastableLayerParams {
4248
4249}
4250
4251/**
4252 * A layer that computes elementwise trigonometric sine function.
4253 *
4254 *
4255 * .. code::
4256 *
4257 *      y = SinLayer(x)
4258 *
4259 * Requires 1 input and produces 1 output.
4260 * Output shape is same as the input.
4261 *
4262 */
4263message SinLayerParams {
4264
4265}
4266
4267/**
4268 * A layer that computes elementwise trigonometric cosine function.
4269 *
4270 *
4271 * .. code::
4272 *
4273 *      y = CosLayer(x)
4274 *
4275 * Requires 1 input and produces 1 output.
4276 * Output shape is same as the input.
4277 *
4278 */
4279message CosLayerParams {
4280
4281}
4282
4283/**
4284 * A layer that computes elementwise trigonometric tangent function.
4285 *
4286 *
4287 * .. code::
4288 *
4289 *      y = TanLayer(x)
4290 *
4291 * Requires 1 input and produces 1 output.
4292 * Output shape is same as the input.
4293 *
4294 */
4295message TanLayerParams {
4296
4297}
4298
4299/**
4300 * A layer that computes elementwise trigonometric arcsine function.
4301 *
4302 *
4303 * .. code::
4304 *
4305 *      y = AsinLayer(x)
4306 *
4307 * Requires 1 input and produces 1 output.
4308 * Output shape is same as the input.
4309 *
4310 */
4311message AsinLayerParams {
4312
4313}
4314
4315/**
4316 * A layer that computes elementwise trigonometric arccosine function.
4317 *
4318 *
4319 * .. code::
4320 *
4321 *      y = AcosLayer(x)
4322 *
4323 * Requires 1 input and produces 1 output.
4324 * Output shape is same as the input.
4325 *
4326 */
4327message AcosLayerParams {
4328
4329}
4330
4331/**
4332 * A layer that computes elementwise trigonometric arctangent function.
4333 *
4334 *
4335 * .. code::
4336 *
4337 *      y = AtanLayer(x)
4338 *
4339 * Requires 1 input and produces 1 output.
4340 * Output shape is same as the input.
4341 *
4342 */
4343message AtanLayerParams {
4344
4345}
4346
4347/**
4348 * A layer that computes elementwise trigonometric hyperbolic sine function.
4349 *
4350 *
4351 * .. code::
4352 *
4353 *      y = SinhLayer(x)
4354 *
4355 * Requires 1 input and produces 1 output.
4356 * Output shape is same as the input.
4357 *
4358 */
4359message SinhLayerParams {
4360
4361}
4362
4363/**
4364 * A layer that computes elementwise trigonometric hyperbolic cosine function.
4365 *
4366 *
4367 * .. code::
4368 *
4369 *      y = CoshLayer(x)
4370 *
4371 * Requires 1 input and produces 1 output.
4372 * Output shape is same as the input.
4373 *
4374 */
4375message CoshLayerParams {
4376
4377}
4378
4379/**
4380 * A layer that computes elementwise trigonometric hyperbolic tangent function.
4381 *
4382 *
4383 * .. code::
4384 *
4385 *      y = TanhLayer(x)
4386 *
4387 * Requires 1 input and produces 1 output.
4388 * Output shape is same as the input.
4389 *
4390 */
4391message TanhLayerParams {
4392
4393}
4394
4395/**
4396 * A layer that computes elementwise trigonometric hyperbolic arcsine function.
4397 *
4398 *
4399 * .. code::
4400 *
4401 *      y = AsinhLayer(x)
4402 *
4403 * Requires 1 input and produces 1 output.
4404 * Output shape is same as the input.
4405 *
4406 */
4407message AsinhLayerParams {
4408
4409}
4410
4411/**
4412 * A layer that computes elementwise trigonometric hyperbolic arccosine function.
4413 *
4414 *
4415 * .. code::
4416 *
4417 *      y = AcoshLayer(x)
4418 *
4419 * Requires 1 input and produces 1 output.
4420 * Output shape is same as the input.
4421 *
4422 */
4423message AcoshLayerParams {
4424
4425}
4426
4427/**
4428 * A layer that computes elementwise trigonometric hyperbolic arctangent function.
4429 *
4430 *
4431 * .. code::
4432 *
4433 *      y = AtanhLayer(x)
4434 *
4435 * Requires 1 input and produces 1 output.
4436 * Output shape is same as the input.
4437 *
4438 */
4439message AtanhLayerParams {
4440
4441}
4442/**
4443 * A layer that raises each element in first tensor to the power of
4444 * corresponding element in the second tensor.
4445 * Supports conventional numpy-like broadcasting.
4446 *
4447 * .. code::
4448 *
4449 *      y = PowBroadcastableLayer(x)
4450 *
4451 * Requires 2 inputs and produces 1 output.
4452 *
4453 * Input
4454 *     - First N-Dimensional tensor
4455 *     - Second N-Dimensional tensor
4456 *
4457 * Output
4458 *     An N-Dimensional tensor with the broadcast shape.
4459 *
4460 */
4461message PowBroadcastableLayerParams {
4462
4463}
4464
4465/**
4466 * A layer that computes the exponential of all elements in the input tensor, with the base 2.
4467 *
4468 *
4469 * .. code::
4470 *
4471 *      y = Exp2Layer(x)
4472 *
4473 * Requires 1 input and produces 1 output.
4474 * Output shape is same as the input.
4475 *
4476 */
4477message Exp2LayerParams {
4478
4479}
4480
4481/**
4482 * A layer that returns a tensor containing the indices of all non-zero
4483 * elements of input tensor.
4484 * It is similar in functionality to the numpy.where method with 1 input.
4485 *
4486 * Requires 1 input and produces 1 output.
4487 * Output is of rank 2, of shape (N,R),
4488 * where N is the number of non-zero elements in the input and R is the rank of the input.
4489 *
4490 * Output contains indices represented in the multi-index form
4491 *
4492 * e.g.:
4493 * input {shape = (4,)}:
4494 * [0 1 0 2]
4495 * output {shape = (2,1)}:
4496 * [1]
4497 * [3]
4498 *
4499 *
4500 * input {shape = (3, 3)}:
4501 * [1 2 1]
4502 * [0 2 2]
4503 * [2 1 0]
4504 * output {shape = (7,1)}:
4505 * [0. 0.]
4506 * [0. 1.]
4507 * [0. 2.]
4508 * [1. 1.]
4509 * [1. 2.]
4510 * [2. 0.]
4511 * [2. 1.]
4512 *
4513 */
4514message WhereNonZeroLayerParams {
4515
4516}
4517
4518/**
4519 * A layer that copies a tensor setting everything outside a central band in
4520 * each inner-most matrix to zero.
4521 *
4522 * Requires 1 input and produces 1 output.
4523 *
4524 * Parameters for matrix_band_part layer
4525 * band(m, n) = (num_lower < 0 || (m-n) <= num_lower) && (num_upper < 0 || (n-m) <= num_upper).
4526 * output[i, j, k, ..., m, n] = band(m, n) * input[i, j, k, ..., m, n]
4527 *
4528 *
4529 * Output shape is same as the input shape.
4530 * Rank of the input must be at least 2.
4531 * For rank higher than 2, the last 2 dimensions are treated as the matrix, while the rest are treated as batch.
4532 */
4533message MatrixBandPartLayerParams {
4534
4535    int64 numLower = 1;
4536    int64 numUpper = 2;
4537
4538}
4539
4540/**
4541 * A layer that copies a tensor setting everything outside upper triangular to zero.
4542 *
4543 * Requires 1 input and produces 1 output.
4544 *
4545 * Output shape is same as the input shape.
4546 * Rank of the input must be at least 2.
4547 * For rank higher than 2, the last 2 dimensions are treated as the matrix, while the rest are treated as batch.
4548 */
4549message UpperTriangularLayerParams {
4550
4551    int64 k = 1; // Diagonal below which to zero elements. k = 0 (the default) is the main diagonal, k < 0 is below it and k > 0 is above
4552
4553}
4554
4555/**
4556 * A layer that copies a tensor setting everything outside lower triangular to zero.
4557 *
4558 * Requires 1 input and produces 1 output.
4559 *
4560 * Output shape is same as the input shape.
4561 * Rank of the input must be at least 2.
4562 * For rank higher than 2, the last 2 dimensions are treated as the matrix, while the rest are treated as batch.
4563 */
4564message LowerTriangularLayerParams {
4565
4566    int64 k = 1; // Diagonal above which to zero elements. k = 0 (the default) is the main diagonal, k < 0 is below it and k > 0 is above
4567
4568}
4569
4570/**
4571 *
4572 * A layer that broadcasts a tensor to a new shape.
4573 *
4574 * Requires 2 inputs and produces 1 output.
4575 *
4576 * First input is broadcast to produce the output, while the second input is only
4577 * used to determine the shape of the output. Values of second input are not used.
4578 *
4579 * Output is a tensor with the same shape as the second input.
4580 *
4581 */
4582message BroadcastToLikeLayerParams {
4583
4584}
4585
4586/**
4587 *
4588 * A layer that broadcasts a tensor to a new shape.
4589 *
4590 * Requires 1 input and produces 1 output.
4591 *
4592 * Output tensor is the broadcasted version of the input and has shape as specified in the
4593 * parameter "targetShape".
4594 */
4595message BroadcastToStaticLayerParams {
4596
4597    repeated uint64 targetShape = 1;
4598
4599}
4600
4601/**
4602 *
4603 * A layer that broadcasts a tensor to a new shape.
4604 *
4605 * Requires 2 inputs and produces 1 output.
4606 *
4607 * First input is the one that is broadcasted to produce the output.
4608 * Second input is a rank 1 tensor specifying the shape of the output.
4609 * Output tensor has shape as specified by the values in the 2nd input tensor.
4610 */
4611message BroadcastToDynamicLayerParams {
4612
4613}
4614
4615/**
4616 * A layer that performs element-wise addition operation with broadcast support.
4617 *
4618 * Requires 2 inputs and produces 1 output.
4619 */
4620message AddBroadcastableLayerParams {
4621
4622}
4623
4624/**
4625 * A layer that performs element-wise maximum operation with broadcast support.
4626 *
4627 * Requires 2 inputs and produces 1 output.
4628 */
4629message MaxBroadcastableLayerParams {
4630
4631}
4632
4633/**
4634 * A layer that performs element-wise minimum operation with broadcast support.
4635 *
4636 * Requires 2 inputs and produces 1 output.
4637 */
4638message MinBroadcastableLayerParams {
4639
4640}
4641
4642/**
4643 * A layer that performs element-wise modular operation with broadcast support.
4644 *
4645 * Requires 2 inputs and produces 1 output.
4646 */
4647message ModBroadcastableLayerParams {
4648
4649}
4650
4651/**
4652 * A layer that performs element-wise floor division operation with broadcast support.
4653 *
4654 * Requires 2 inputs and produces 1 output.
4655 */
4656message FloorDivBroadcastableLayerParams {
4657
4658}
4659
4660/**
4661 * A layer that performs element-wise subtract operation with broadcast support.
4662 *
4663 * Requires 2 inputs and produces 1 output.
4664 */
4665message SubtractBroadcastableLayerParams {
4666
4667}
4668
4669/**
4670 * A layer that performs element-wise multiply operation with broadcast support.
4671 *
4672 * Requires 2 inputs and produces 1 output.
4673 */
4674message MultiplyBroadcastableLayerParams {
4675
4676}
4677
4678/**
4679 * A layer that performs element-wise division operation with broadcast support.
4680 *
4681 * Requires 2 inputs and produces 1 output.
4682 */
4683message DivideBroadcastableLayerParams {
4684
4685}
4686
4687/**
4688 * Gather layer that gathers elements from the first input, along a specified axis,
4689 * at indices specified in the second input.
4690 * It is similar in functionality to the numpy.take method.
4691 *
4692 * Requires 2 inputs and produces 1 output.
4693 *
4694 * Given two inputs, 'data' and 'indices', gather the slices of 'data'
4695 * and store into output.
4696 * e.g.
4697 * for i in [0, length(indices) - 1]
4698 *    output[i] = data[indices[i]]  (1-D case, axis=0)
4699 *
4700 * if axis = 0:
4701 * for each vector index (i,...,j)
4702 *    output[i,...,j,:,..,:] = data[indices[i,...,j],:,..,:]
4703 *
4704 * output.rank = (data.rank - 1) + indices.rank
4705 *
4706 * Negative indices and negative axis are supported.
4707 *
4708 * e.g:
4709 *
4710 * data shape = (2, 3)
4711 * indices shape = (6, 8)
4712 * axis = 0
4713 * output shape = (6, 8) + (3,) = (6, 8, 3)
4714 *
4715 * data shape = (2, 3, 5)
4716 * indices shape = (6, 8)
4717 * axis = 1
4718 * output shape = (2,) + (6, 8) + (5,) =  (2, 6, 8, 5)
4719 *
4720 */
4721message GatherLayerParams {
4722
4723    int64 axis = 1;
4724
4725}
4726
4727/*
4728 * Scatter accumulation mode.
4729 */
4730enum ScatterMode {
4731
4732    SCATTER_UPDATE = 0;
4733    SCATTER_ADD = 1; /// add
4734    SCATTER_SUB = 2; /// subtract
4735    SCATTER_MUL = 3; /// multiply
4736    SCATTER_DIV = 4; /// divide
4737    SCATTER_MAX = 5; /// maximum
4738    SCATTER_MIN = 6; /// minimum
4739
4740}
4741
4742/*
4743 * A layer that scatters data into a new tensor according to indices from the input.
4744 * This is the inverse operation of Gather.
4745 *
4746 * Requires 3 inputs and produces 1 output.
4747 *
4748 * Output is initialized with the first input.
4749 * Then updated with the values in the third input, at indices specified by the second input.
4750 *
4751 * An example when axis=0:
4752 * Given three inputs, in order, "container", "indices", "updates", where
4753 *
4754 * - "container" is a rank R+1 tensor of shape [D_0, D_1, ..., D_R], which
4755 *   contains D_0 number of tensors, each with shape [D_1, ..., D_R].
4756 *
4757 * - "indices" is a rank 1 tensor with shape [N], where N is the number of updates.
4758 *   The values in this tensor must be in the range [0, D_0 - 1]. (negative indexing is supported)
4759 *
4760 * - "updates" is a rank R+1 tensor with shape [N, D_1, ..., D_R], which represents
4761 *   a total number of N tensors, each of shape [D_1, ..., D_R].
4762 *
4763 * The effect of this operation is as follows:
4764 *
4765 * output = container;
4766 * For each i in 0, ..., N - 1
4767 *    output[indices[i], :, ..., :] = updates[i, :, ..., :] // if mode == "SCATTER_UPDATE"
4768 *
4769 * or
4770 * For each i in 0, ..., N - 1
4771 *    output[indices[i], :, ..., :] += updates[i, :, ..., :] // if mode == "SCATTER_ADD"
4772 *
4773 * etc
4774 *
4775 * When "indices" is a tensor of rank greater than 1, the equation becomes (for axis=0):
4776 * For each vector index (i,...,j)
4777 *   output[indices[i,...,j],...] -= updates[i,...,j,...] // if mode == "SCATTER_SUB"
4778 *
4779 *
4780 * The output has the same shape as the first input.
4781 * "indices" input must have rank less than or equal to the "updates" input and its shape
4782 * must be a subset of the the shape of the "updates" input.
4783 *
4784 * e.g:
4785 *
4786 * container shape = (4, 3)
4787 * indices shape = (5, 2, 3)
4788 * updates shape = (4, 5, 2, 3)
4789 * axis = 1
4790 * output shape = (4, 3)
4791 *
4792 * container shape = (4, 4, 3)
4793 * indices shape = (6,)
4794 * updates shape = (4, 6, 3)
4795 * axis = -2
4796 * output shape = (4, 4, 3)
4797 *
4798 * container shape = (5,)
4799 * indices shape = (5, 7, 5, 6)
4800 * updates shape = (5, 7, 5, 6)
4801 * axis = -1
4802 * output shape = (5,)
4803 */
4804
4805message ScatterLayerParams {
4806
4807    int64 axis = 1;
4808    ScatterMode mode = 2; /// mode of accumulation.
4809
4810}
4811
4812/**
4813 * A layer that gathers elements from the first input, 'params', at the multi-indices specified
4814 * by the second input, 'indices'.
4815 *
4816 * Requires 2 inputs and produces 1 output.
4817 *
4818 * 'params' = input[0], 'indices' = input[1]
4819 *
4820 * 'indices' is a rank K+1 tensor of shape [I_0, I_1, .., I_(K-1), I_K] which is viewed as a collection of
4821 * indices of (I_0 * I_1 * ... * I_(K-1)) points in the I_K dimensional space. For instance, the multi-index of the first point
4822 * is indices[0,0,...,0,:].
4823 *
4824 * Here is how the output is constructed:
4825 *
4826 * for i = 0,1,...,(I_0-1)
4827 *   ...
4828 *     for j = 0,1,....,(I_(K-1)-1)
4829 *          output[i,....,j,:,:,..,:] = params[indices[i,...,j,:], :,:,..,:]
4830 *
4831 * Hence, output shape is [I_0, I_1,...,I(K-1)] + params.shape[I_K:]
4832 *
4833 * output.rank = indices.rank - 1 + params.rank - indices.shape[-1]
4834 *
4835 * e.g:
4836 *
4837 * input[0] shape = (4, 2, 3, 4)
4838 * input[1] shape = (6, 2)
4839 * output shape = (6,) + (3, 4) = (6, 3, 4)
4840 *
4841 * input[0] shape = (3, 3, 3, 4, 7)
4842 * input[1] shape = (3, 5)
4843 * output shape = (3,) + () = (3,)
4844 *
4845 * input[0] shape = (5, 3, 2, 5)
4846 * input[1] shape = (2, 7, 3, 2)
4847 * output shape = (2, 7, 3) + (2, 5) = (2, 7, 3, 2, 5)
4848 *
4849 */
4850message GatherNDLayerParams {
4851
4852}
4853
4854/*
4855 * A layer that scatters data into a new tensor according to multi-indices from the input.
4856 * This is the inverse operation of GatherND.
4857 *
4858 * Requires 3 inputs and produces 1 output.
4859 * 3 inputs, in order are denoted as "container", "indices", "updates".
4860 *
4861 * 'indices' is a rank K+1 tensor of shape [I_0, I_1, .., I_(K-1), I_K] which is viewed as a collection of
4862 * indices of (I_0 * I_1 * ... * I_(K-1)) points in the I_K dimensional space. For instance, the multi-index of the first point
4863 * is indices[0,0,...,0,:].
4864 *
4865 * container.rank >= I_K
4866 * updates.rank = K + (container.rank - I_K)
4867 * shape of 'updates' = [I_0, I_1,...,I(K-1)] + container.shape[I_K:]
4868 *
4869 * output = container
4870 * For each vector index (i,...,j) s.t. 0<=i<I_0,..., 0<=j<I_K
4871 *   output[indices[i,...,j,:], :,:,..,:] = updates[i,....,j,:,:,..,:] // if mode == "SCATTER_UPDATE"
4872 *
4873 * The output has the same shape as the first input.
4874 *
4875 * e.g:
4876 *
4877 * container shape = (3, 2)
4878 * indices shape = (4, 2)
4879 * updates shape = (4,)
4880 * output shape = (3, 2)
4881 *
4882 * container shape = (7, 6)
4883 * indices shape = (4, 7, 2, 5, 1)
4884 * updates shape = (4, 7, 2, 5, 6)
4885 * output shape = (7, 6)
4886 *
4887 */
4888message ScatterNDLayerParams {
4889
4890    ScatterMode mode = 1; /// mode of accumulation.
4891
4892}
4893
4894/**
4895 * Gather layer that gathers elements from the first input, along a specified axis,
4896 * at indices specified in the second input.
4897 * It is similar in functionality to the numpy.take_along_axis method.
4898 *
4899 * Requires 2 inputs and produces 1 output.
4900 *
4901 * Given two inputs, 'data' and 'indices', gather the slices of 'data'
4902 * and store into output.
4903 *
4904 * Both inputs and output have the same rank.
4905 * Output shape is same as the shape of 'indices'
4906 * Shapes of 'indices' and 'data' match, except at the 'axis' dimension.
4907 *
4908 * This operation performs the following operation for axis=0:
4909 * for each vector index (i,j,....,k)
4910 *    output[i,j,....,k] = data[index[i,j,....,k],j,....,k]
4911 *
4912 * Negative indices and negative axis are supported.
4913 *
4914 * e.g:
4915 *
4916 * data shape = (4, 4, 7)
4917 * indices shape = (4, 5, 7)
4918 * axis = 1
4919 * output shape = (4, 5, 7)
4920 *
4921 */
4922message GatherAlongAxisLayerParams {
4923
4924    int64 axis = 1;
4925
4926}
4927
4928/**
4929 * A layer that scatters data into a new tensor according to indices from
4930 * the input along the given axis into the output tensor.
4931 * This is the inverse operation of GatherAlongAxis.
4932 * It is similar in functionality to the numpy.put_along_axis method.
4933 *
4934 * Requires 3 inputs and produces 1 output.
4935 * 3 inputs, in order are denoted as "container", "indices", "updates".
4936 *
4937 * All inputs and output have the same rank.
4938 * Output shape is same as the shape of 'container'
4939 * Shapes of 'indices' and 'updates' match, which is same as the shape of 'container' except at the 'axis' dimension.
4940 *
4941 * Negative indices and negative axis are supported.
4942 *
4943 * This operation performs the following operation for axis=0:
4944 * output = container
4945 * for each vector index (i,j,....,k)
4946 *    output[index[i,j,....,k],j,....,k] = updates[i,j,....,k]
4947 *
4948 * e.g.:
4949 *
4950 * container shape = (2, 5, 6)
4951 * indices shape = (2, 2, 6)
4952 * updates shape = (2, 2, 6)
4953 * axis = -2
4954 * output shape = (2, 5, 6)
4955 *
4956 */
4957message ScatterAlongAxisLayerParams {
4958
4959    int64 axis = 1;
4960    ScatterMode mode = 2; /// mode of accumulation.
4961
4962}
4963
4964/**
4965 * A layer that stacks the input tensors along the given axis.
4966 * It is similar in functionality to the numpy.stack method.
4967 *
4968 * Requires at least 2 inputs and produces 1 output.
4969 * All inputs must have the same shape.
4970 * Rank of the output is 1 greater than the rank of the inputs.
4971 *
4972 * Negative indexing is supported for the "axis" parameter.
4973 *
4974 * e.g.:
4975 *
4976 * input shape = (2, 4, 2)
4977 * number of inputs = 5
4978 * axis = 3
4979 * output shape = (2, 4, 2, 5)
4980 *
4981 * input shape = (2, 4, 2)
4982 * number of inputs = 5
4983 * axis = -2
4984 * output shape = (2, 4, 5, 2)
4985 */
4986message StackLayerParams {
4987
4988    int64 axis = 1;
4989
4990}
4991
4992/**
4993 * A layer that reshapes a tensor that does not alter the rank of the input.
4994 * Order of the data is left unchanged.
4995 *
4996 * Requires 1 input and produces 1 output.
4997 *
4998 * e.g:
4999 *
5000 * input shape = (20,10)
5001 * targetShape = (5,-1)
5002 * output shape = (5,40)
5003 *
5004 * input shape = (20,10,5)
5005 * targetShape = (0,2,25)
5006 * output shape = (20,2,25)
5007 *
5008 * input shape = (10,3,5)
5009 * targetShape = (25,0,-1)
5010 * output shape = (25,3,2)
5011 */
5012message RankPreservingReshapeLayerParams {
5013
5014    /**
5015     * Length of this field must be same as the input/output rank.
5016     * It can have 0's, in which case the corresponding input dimension is kept intact.
5017     * At most one element can be -1, in which case the output dimension is calculated from rest of the shape.
5018     */
5019    repeated int64 targetShape = 1;
5020
5021}
5022
5023/**
5024 * Constant padding layer.
5025 * Pad the input array with a constant value, either along a single given axis or along a set of axes.
5026 *
5027 * Requires 1 or 2 inputs and produces 1 output.
5028 * The amount of padding can be either set as a parameter ("padAmounts") or provided as a second input.
5029 *
5030 * Output rank is same as the rank of the first input.
5031 *
5032 * when "padToGivenOutputSizeMode" is False:
5033 *
5034 * output_shape[i] = input_shape[i] + padAmounts[2*i] + padAmounts[2*i+1], i=0,...,rank-1
5035 *
5036 * Examples:
5037 *
5038 * input shape = (20,10)
5039 * padAmounts = [0,1,4,0]
5040 * output shape = (21,14)
5041 *
5042 * input shape = (20,10,5)
5043 * padAmounts = [0,0,3,4,0,9]
5044 * output shape = (20,17,14)
5045 *
5046 *
5047 * when "padToGivenOutputSizeMode" is True
5048 *
5049 * output_shape[i] = max(input_shape[i], max(padAmounts[2*i] + padAmounts[2*i+1])), i=0,...,rank-1
5050 *
5051 * input shape = (20,10)
5052 * padAmounts = [0,21,14,0]
5053 * output shape = (21,14)
5054 *
5055 * input shape = (20,10,5)
5056 * padAmounts = [0,0,17,0,0,14]
5057 * output shape = (20,17,14)
5058 */
5059message ConstantPaddingLayerParams {
5060    /**
5061     * The value to be used for padding.
5062     */
5063    float value = 1;
5064
5065    /**
5066     * Length of this repeated field must be twice the rank of the first input.
5067     * 2*i-th and (2*i+1)-th values represent the amount of padding to be applied to the the i-th input
5068     * dimension, "before" and "after" the input values, respectively.
5069     */
5070    repeated uint64 padAmounts = 2;
5071
5072    /**
5073     * When this is True, positive values in "padAmounts" are equivalent to the output shape.
5074     * In that case only one of padAmounts[2*i] and padAmounts[2*i+1] can be non zero, for i=0,..,rank-1.
5075     */
5076    bool padToGivenOutputSizeMode = 3;
5077}
5078
5079/**
5080 * A layer that returns a tensor filled with values from the normal distribution.
5081 *
5082 * Requires 1 input and produces 1 output.
5083 *
5084 * Parameters
5085 *     seed: seed used for the normal distribution.
5086 *     mean: mean of the normal distribution.
5087 *     stdDev: standard deviation of the normal distribution.
5088 *
5089 * Input
5090 *     An N-Dimensional tensor, whose values are ignored. Only the shape is used to
5091 *     infer the shape of the output.
5092 *
5093 * Output
5094 *     An N-Dimensional tensor with the same shape as the input tensor.
5095 *
5096 */
5097message RandomNormalLikeLayerParams {
5098
5099    int64 seed = 1;
5100    float mean = 2;
5101    float stdDev = 3;
5102
5103}
5104
5105/**
5106 * A layer that returns a tensor filled with values from the normal distribution.
5107 *
5108 * Requires no input and produces 1 output.
5109 *
5110 * Parameters
5111 *     seed: seed used for the normal distribution.
5112 *     mean: mean of the normal distribution.
5113 *     stdDev: standard deviation of the normal distribution.
5114 *     outputShape: shape of the output tensor.
5115 *
5116 * Output
5117 *     An N-Dimensional tensor of shape "outputShape".
5118 *
5119 */
5120message RandomNormalStaticLayerParams {
5121
5122    int64 seed = 1;
5123    float mean = 2;
5124    float stdDev = 3;
5125    repeated uint64 outputShape = 4;
5126
5127}
5128
5129/**
5130 * A layer that returns a tensor filled with values from the normal distribution.
5131 *
5132 * Requires 1 input and produces 1 output.
5133 *
5134 * Parameters:
5135 *     seed: seed used for the normal distribution.
5136 *     mean: mean of the normal distribution.
5137 *     stdDev: standard deviation of the normal distribution.
5138 *
5139 * Input
5140 *     A rank 1 tensor specifying the shape of the output
5141 *
5142 * Output
5143 *     An N-Dimensional tensor with the shape specified by the values in the input tensor.
5144 */
5145message RandomNormalDynamicLayerParams {
5146
5147    int64 seed = 1;
5148    float mean = 2;
5149    float stdDev = 3;
5150
5151}
5152
5153/**
5154 * A layer that returns a tensor filled with values from the uniform distribution.
5155 *
5156 * Requires 1 input and produces 1 output.
5157 *
5158 * Parameters
5159 *     seed: seed used for the uniform distribution.
5160 *     minVal: lower bound on the range of random values for the uniform distribution.
5161 *     maxVal: upper bound on the range of random values for the uniform distribution.
5162 *
5163 * Input
5164 *     An N-Dimensional tensor, whose values are ignored. Only the shape is used to
5165 *     infer the shape of the output.
5166 *
5167 * Output
5168 *     An N-Dimensional tensor with the same shape as the input tensor.
5169 *
5170 */
5171message RandomUniformLikeLayerParams {
5172
5173    int64 seed = 1;
5174    float minVal = 2;
5175    float maxVal = 3;
5176
5177}
5178
5179/**
5180 * A layer that returns a tensor filled with values from the uniform distribution.
5181 *
5182 * Requires no input and produces 1 output.
5183 *
5184 * Parameters
5185 *     seed: seed used for the uniform distribution.
5186 *     minVal: lower bound on the range of random values for the uniform distribution.
5187 *     maxVal: upper bound on the range of random values for the uniform distribution.
5188 *     outputShape: shape of the output tensor.
5189 *
5190 * Output
5191 *     An N-Dimensional tensor of shape "outputShape".
5192 *
5193 */
5194message RandomUniformStaticLayerParams {
5195
5196    int64 seed = 1;
5197    float minVal = 2;
5198    float maxVal = 3;
5199    repeated uint64 outputShape = 4;
5200
5201}
5202
5203/**
5204 * A layer that returns a tensor filled with values from the uniform distribution.
5205 *
5206 * Requires 1 input and produces 1 output.
5207 *
5208 * Parameters:
5209 *     seed: seed used for the uniform distribution.
5210 *     minVal: lower bound on the range of random values for the uniform distribution.
5211 *     maxVal: upper bound on the range of random values for the uniform distribution.
5212 *
5213 * Input
5214 *     A rank 1 tensor specifying the shape of the output
5215 *
5216 * Output
5217 *     An N-Dimensional tensor with the shape specified by the values in the input tensor.
5218 *
5219 */
5220message RandomUniformDynamicLayerParams {
5221
5222    int64 seed = 1;
5223    float minVal = 2;
5224    float maxVal = 3;
5225
5226}
5227
5228/**
5229 * A layer that returns a tensor filled with values from the Bernoulli distribution.
5230 *
5231 * Requires 1 input and produces 1 output.
5232 *
5233 * Parameters
5234 *     seed: seed used for the Bernoulli distribution.
5235 *     prob: probability of a 1 event.
5236 *
5237 * Input
5238 *     An N-Dimensional tensor, whose values are ignored. Only the shape is used to
5239 *     infer the shape of the output.
5240 *
5241 * Output
5242 *     An N-Dimensional tensor with the same shape as the input tensor.
5243 *
5244 */
5245message RandomBernoulliLikeLayerParams {
5246
5247    int64 seed = 1;
5248    float prob = 2;
5249
5250}
5251
5252/**
5253 * A layer that returns a tensor filled with values from the Bernoulli distribution.
5254 *
5255 * Requires no input and produces 1 output.
5256 *
5257 * Parameters
5258 *     seed: seed used for the Bernoulli distribution.
5259 *     prob: probability of a 1 event.
5260 *     outputShape: shape of the output tensor.
5261 *
5262 * Output
5263 *     An N-Dimensional tensor of shape "outputShape".
5264 */
5265message RandomBernoulliStaticLayerParams {
5266
5267    int64 seed = 1;
5268    float prob = 2;
5269    repeated uint64 outputShape = 3;
5270
5271}
5272
5273/**
5274 * A layer that returns a tensor filled with values from the Bernoulli distribution.
5275 *
5276 * Requires 1 input and produces 1 output.
5277 *
5278 * Parameters:
5279 *     seed: seed used for the Bernoulli distribution.
5280 *     prob: probability of a 1 event.
5281 *
5282 * Input
5283 *     A rank 1 tensor specifying the shape of the output
5284 *
5285 * Output
5286 *     An N-Dimensional tensor with the shape specified by the values in the input tensor.
5287 */
5288message RandomBernoulliDynamicLayerParams {
5289
5290    int64 seed = 1;
5291    float prob = 2;
5292
5293}
5294
5295/**
5296 * A layer that returns a tensor of the specified shape filled with values from the categorical distribution.
5297 *
5298 * Requires 1 input and produces 1 output.
5299 *
5300 * Parameter:
5301 *     seed: seed used for the categorical distribution.
5302 *     numSamples: number of samples to draw.
5303 *     isLogits: true if the inputs are logits, false if the inputs are probabilities.
5304 *     eps: default value is 1e-10.
5305 *     temperature: default value is 1.0.
5306 *
5307 * Input tensor shape = [D_1, D_2, ... , D_(R-1), D_R] (Rank = R)
5308 * Then the shape of the output is [D_1, D_2, ... , D_(R-1), numSamples] (Rank = R)
5309 *
5310 */
5311message CategoricalDistributionLayerParams {
5312
5313    int64 seed = 1;
5314    int64 numSamples = 2;
5315    bool isLogits = 3;
5316    float eps = 4;
5317    float temperature = 5;
5318}
5319
5320/**
5321 * A layer that performs reduction with L1 normalization operation.
5322 *
5323 * Negative indexing is supported.
5324 * Requires 1 input and produces 1 output.
5325 *
5326 * Parameters:
5327 *    axes: dimensions along which to perform reduction
5328 *    keepDims: if True, keep the reduced dimensions (value will be 1), otherwise, reduced dimensions are squeezed
5329 *    reduceAll: ignore the "axes" parameter, perform reduction along all axes
5330 *
5331 */
5332message ReduceL1LayerParams {
5333
5334    repeated int64 axes = 1;
5335    bool keepDims = 2;
5336    bool reduceAll = 3;
5337
5338}
5339
5340/**
5341 * A layer that performs reduction with L2 normalization operation.
5342 *
5343 * Negative indexing is supported.
5344 * Requires 1 input and produces 1 output.
5345 *
5346 * Parameters:
5347 *    axes: dimensions along which to perform reduction
5348 *    keepDims: if True, keep the reduced dimensions (value will be 1), otherwise, reduced dimensions are squeezed
5349 *    reduceAll: ignore the "axes" parameter, perform reduction along all axes
5350 *
5351 */
5352message ReduceL2LayerParams {
5353
5354    repeated int64 axes = 1;
5355    bool keepDims = 2;
5356    bool reduceAll = 3;
5357
5358}
5359
5360/**
5361 * A layer that performs reduction with max operation.
5362 *
5363 * Negative indexing is supported.
5364 * Requires 1 input and produces 1 output.
5365 *
5366 * Parameters:
5367 *    axes: dimensions along which to perform reduction
5368 *    keepDims: if True, keep the reduced dimensions (value will be 1), otherwise, reduced dimensions are squeezed
5369 *    reduceAll: ignore the "axes" parameter, perform reduction along all axes
5370 *
5371 */
5372message ReduceMaxLayerParams {
5373
5374    repeated int64 axes = 1;
5375    bool keepDims = 2;
5376    bool reduceAll = 3;
5377
5378}
5379
5380/**
5381 * A layer that performs reduction with min operation.
5382 *
5383 * Negative indexing is supported.
5384 * Requires 1 input and produces 1 output.
5385 *
5386 * Parameters:
5387 *    axes: dimensions along which to perform reduction
5388 *    keepDims: if True, keep the reduced dimensions (value will be 1), otherwise, reduced dimensions are squeezed
5389 *    reduceAll: ignore the "axes" parameter, perform reduction along all axes
5390 *
5391 */
5392message ReduceMinLayerParams {
5393
5394    repeated int64 axes = 1;
5395    bool keepDims = 2;
5396    bool reduceAll = 3;
5397
5398}
5399
5400/**
5401 * A layer that performs reduction with sum operation.
5402 *
5403 * Negative indexing is supported.
5404 * Requires 1 input and produces 1 output.
5405 *
5406 * Parameters:
5407 *    axes: dimensions along which to perform reduction
5408 *    keepDims: if True, keep the reduced dimensions (value will be 1), otherwise, reduced dimensions are squeezed
5409 *    reduceAll: ignore the "axes" parameter, perform reduction along all axes
5410 *
5411 */
5412message ReduceSumLayerParams {
5413
5414    repeated int64 axes = 1;
5415    bool keepDims = 2;
5416    bool reduceAll = 3;
5417
5418}
5419
5420/**
5421 * A layer that performs reduction with prod operation.
5422 *
5423 * Negative indexing is supported.
5424 * Requires 1 input and produces 1 output.
5425 *
5426 * Parameters:
5427 *    axes: dimensions along which to perform reduction
5428 *    keepDims: if True, keep the reduced dimensions (value will be 1), otherwise, reduced dimensions are squeezed
5429 *    reduceAll: ignore the "axes" parameter, perform reduction along all axes
5430 *
5431 */
5432message ReduceProdLayerParams {
5433
5434    repeated int64 axes = 1;
5435    bool keepDims = 2;
5436    bool reduceAll = 3;
5437
5438}
5439
5440/**
5441 * A layer that performs reduction with mean operation.
5442 *
5443 * Negative indexing is supported.
5444 * Requires 1 input and produces 1 output.
5445 *
5446 * Parameters:
5447 *    axes: dimensions along which to perform reduction
5448 *    keepDims: if True, keep the reduced dimensions (value will be 1), otherwise, reduced dimensions are squeezed
5449 *    reduceAll: ignore the "axes" parameter, perform reduction along all axes
5450 *
5451 */
5452message ReduceMeanLayerParams {
5453
5454    repeated int64 axes = 1;
5455    bool keepDims = 2;
5456    bool reduceAll = 3;
5457
5458}
5459
5460/**
5461 * A layer that performs reduction with logSum operation.
5462 *
5463 * Negative indexing is supported.
5464 * Requires 1 input and produces 1 output.
5465 *
5466 * Parameters:
5467 *    axes: dimensions along which to perform reduction
5468 *    keepDims: if True, keep the reduced dimensions (value will be 1), otherwise, reduced dimensions are squeezed
5469 *    reduceAll: ignore the "axes" parameter, perform reduction along all axes
5470 *
5471 */
5472message ReduceLogSumLayerParams {
5473
5474    repeated int64 axes = 1;
5475    bool keepDims = 2;
5476    bool reduceAll = 3;
5477
5478}
5479
5480/**
5481 * A layer that performs reduction with logSumExp operation.
5482 *
5483 * Negative indexing is supported.
5484 * Requires 1 input and produces 1 output.
5485 *
5486 * Parameters:
5487 *    axes: dimensions along which to perform reduction
5488 *    keepDims: if True, keep the reduced dimensions (value will be 1), otherwise, reduced dimensions are squeezed
5489 *    reduceAll: ignore the "axes" parameter, perform reduction along all axes
5490 *
5491 */
5492message ReduceSumSquareLayerParams {
5493
5494    repeated int64 axes = 1;
5495    bool keepDims = 2;
5496    bool reduceAll = 3;
5497
5498}
5499
5500/**
5501 * A layer that performs reduction with logSumExp operation.
5502 *
5503 * Negative indexing is supported.
5504 * Requires 1 input and produces 1 output.
5505 *
5506 * Parameters:
5507 *    axes: dimensions along which to perform reduction
5508 *    keepDims: if True, keep the reduced dimensions (value will be 1), otherwise, reduced dimensions are squeezed
5509 *    reduceAll: ignore the "axes" parameter, perform reduction along all axes
5510 *
5511 */
5512message ReduceLogSumExpLayerParams {
5513
5514    repeated int64 axes = 1;
5515    bool keepDims = 2;
5516    bool reduceAll = 3;
5517
5518}
5519
5520/**
5521 * A layer that increases the rank of the input tensor by adding unit dimensions.
5522 *
5523 * Requires 1 input and produces 1 output.
5524 *
5525 * e.g.:
5526 *
5527 * input shape = (10,5)
5528 * axes = (0,1)
5529 * output shape = (1,1,10,5)
5530 *
5531 * input shape = (10,5)
5532 * axes = (0,2)
5533 * output shape = (1,10,1,5)
5534 *
5535 * input shape = (10,5)
5536 * axes = (-2,-1)
5537 * output shape = (10,5,1,1)
5538 *
5539 */
5540message ExpandDimsLayerParams {
5541
5542    /**
5543     * Axis values provided here get dimension 1 in the output tensor.
5544     * Negative indexing is supported.
5545     */
5546    repeated int64 axes = 1;
5547
5548}
5549
5550/**
5551 * A layer that flattens the input tensor into a 2-dimensional matrix.
5552 *
5553 * Requires 1 input and produces 1 output.
5554 * Output tensor is always rank 2.
5555 *
5556 * First dimension of output is the product of all the dimensions in input[:axis] ("axis" is exclusive)
5557 * Second dimension of output is the product of all the dimensions in input[axis:] ("axis" is inclusive)
5558 *
5559 * e.g.:
5560 * input shape:  (3,)
5561 * axis:  -1
5562 * output shape:  (1, 3)
5563 *
5564 * input shape:  (3,)
5565 * axis:  1
5566 * output shape:  (3, 1)
5567 *
5568 * input shape:  (4, 3)
5569 * axis:  -1
5570 * output shape:  (4, 3)
5571 *
5572 * input shape:  (5, 2)
5573 * axis:  0
5574 * output shape:  (1, 10)
5575 *
5576 * input shape:  (5, 5, 3)
5577 * axis:  -2
5578 * output shape:  (5, 15)
5579 *
5580 * input shape:  (2, 3, 2)
5581 * axis:  -1
5582 * output shape:  (6, 2)
5583 *
5584 */
5585message FlattenTo2DLayerParams {
5586
5587    int64 axis = 1;
5588
5589}
5590
5591/**
5592 * A layer that reshapes a tensor.
5593 *
5594 * Requires 1 input and produces 1 output.
5595 *
5596 * Output tensor is the reshaped version of the input and has shape as specified in the
5597 * parameter "targetShape".
5598 *
5599 */
5600message ReshapeStaticLayerParams {
5601
5602    repeated int64 targetShape = 1;
5603
5604}
5605
5606/**
5607 * A layer that reshapes a tensor.
5608 *
5609 * Requires 2 inputs and produces 1 output.
5610 *
5611 * First input is reshaped to produce the output, while the second input is only
5612 * used to determine the shape of the output. Values of the second input are not used.
5613 *
5614 * Output is a tensor with the same shape as the second input.
5615 *
5616 */
5617message ReshapeLikeLayerParams {
5618
5619}
5620
5621/**
5622 * A layer that reshapes a tensor.
5623 *
5624 * Requires 2 inputs and produces 1 output.
5625 *
5626 * First input is the one that is reshaped to produce the output.
5627 * Second input is a rank 1 tensor specifying the shape of the output.
5628 * Output tensor has shape as specified by the values in the 2nd input tensor.
5629 */
5630message ReshapeDynamicLayerParams {
5631
5632}
5633
5634/**
5635 * A layer that decreases the rank of the input tensor by removing unit dimensions.
5636 *
5637 * Requires 1 input and produces 1 output.
5638 *
5639 * Output rank is one less than input rank, if input rank is more than 1.
5640 * If input rank is 1, output rank is also 1.
5641 *
5642 * e.g.:
5643 *
5644 * input shape = (1,1,10,5)
5645 * axes = (0,1)
5646 * output shape = (10,5)
5647 *
5648 * input shape = (1,10,5,1)
5649 * axes = (0,3)
5650 * output shape = (10,5)
5651 *
5652 * input shape = (10,5,1,1)
5653 * axes = (-2,-1)
5654 * output shape = (10,5)
5655 *
5656 * input shape = (1,)
5657 * axes = (0)
5658 * output shape = (1,)
5659 *
5660 */
5661message SqueezeLayerParams {
5662
5663    /**
5664     * Axis values provided here get removed from the input tensor.
5665     * Negative indexing is supported.
5666     */
5667    repeated int64 axes = 1;
5668    bool squeezeAll = 2; // if true squeeze all dimensions that are 1.
5669
5670}
5671
5672/**
5673 * A layer that returns top K (or bottom K) values and the corresponding indices
5674 * of the input along a given axis.
5675 *
5676 * Requires 1 or 2 inputs and produces 2 outputs.
5677 *
5678 * The second input is the value of the K, and is optional.
5679 * If there is only one input, value of K that is specified in the layer parameter is used.
5680 *
5681 * Both outputs have the same rank as the first input.
5682 * Second input must correspond to a scalar tensor.
5683 *
5684 * e.g.:
5685 *
5686 * first input's shape = (45, 34, 10, 5)
5687 * axis = 1
5688 * output shape, for both outputs = (45, K, 10, 5)
5689 *
5690 */
5691message TopKLayerParams {
5692
5693    int64 axis = 1; ///  negative indexing is supported
5694    uint64 K = 2; /// is ignored if a second input is present.
5695    bool useBottomK = 3; /// if true, bottom K (values, indices) are returned instead
5696
5697}
5698
5699/**
5700 * A layer that returns the indices of the maximum value along a specified axis in a tensor.
5701 *
5702 * Requires 1 input and produces 1 output. Negative indexing is supported.
5703 *
5704 * Output has the same rank as the input if "removeDim" is False (default).
5705 * Output has rank one less than the input if "removeDim" is True and input rank is more than 1.
5706 *
5707 * e.g.:
5708 *
5709 * input shape = (45, 34, 10, 5)
5710 * axis = -2
5711 * output shape = (45, 1, 10, 5), if removeDim = False (default)
5712 * output shape = (45, 10, 5), if removeDim = True
5713 *
5714 * input shape = (5,)
5715 * axis = 0
5716 * output shape = (1,), if removeDim = False or True
5717 *
5718 */
5719message ArgMaxLayerParams {
5720
5721    int64 axis = 1;
5722    bool removeDim = 2;
5723
5724}
5725
5726/**
5727* A layer that returns the indices of the minimum value along a specified axis in a tensor.
5728*
5729* Requires 1 input and produces 1 output. Negative indexing is supported.
5730*
5731* Output has the same rank as the input if "removeDim" is False (default).
5732* Output has rank one less than the input if "removeDim" is True and input rank is more than 1.
5733*
5734* e.g.:
5735*
5736* input shape = (45, 34, 10, 5)
5737* axis = -2
5738* output shape = (45, 1, 10, 5), if removeDim = False (default)
5739* output shape = (45, 10, 5), if removeDim = True
5740*
5741* input shape = (5,)
5742* axis = 0
5743* output shape = (1,), if removeDim = False or True
5744*
5745*/
5746message ArgMinLayerParams {
5747
5748    int64 axis = 1;
5749    bool removeDim = 2;
5750
5751}
5752
5753/**
5754 * A layer layer that splits the input tensor into multiple output tensors,
5755 * along the specified axis.
5756 *
5757 * The layer either uniformly splits the input tensor into ``num_splits`` tensors, or
5758 * splits according to the given split sizes in ``split_sizes``.
5759 * Supports unequal splits and negative indexing.
5760 *
5761 * Requires 1 input and produces at least 2 outputs.
5762 * Rank of all the outputs is same as that of the input.
5763 *
5764 * If parameter "splitSizes" is provided, value of the parameter "numSplits" is ignored, since in that case
5765 * "numSplits" is automatically inferred to be the length of "splitSizes".
5766 *
5767 *
5768 * e.g.:
5769 * input shape:  (5, 3, 4)
5770 * axis = -3, split_sizes = [3, 2]
5771 * output shape:  (3, 3, 4)
5772 * output shape:  (2, 3, 4)
5773 */
5774message SplitNDLayerParams {
5775
5776    int64 axis = 1;
5777    uint64 numSplits = 2;
5778    repeated uint64 splitSizes = 3;
5779
5780}
5781
5782/**
5783 * A layer that performs element-wise ceil operation on the input tensor that
5784 * rounds the value to the smallest integer not less than x.
5785 *
5786 * Requires 1 input and produces 1 output.
5787 * Output shape is same as the input.
5788 *
5789 */
5790message CeilLayerParams {
5791
5792}
5793
5794/**
5795 * A layer that performs element-wise round operation on the input tensor
5796 * that rounds the value to the nearest integer.
5797 *
5798 * Requires 1 input and produces 1 output.
5799 * Output shape is same as the input.
5800 *
5801 */
5802message RoundLayerParams {
5803
5804}
5805
5806/**
5807 * A layer that performs element-wise floor operation on the input tensor
5808 * that rounds the value to the largest integer not greater than x.
5809 *
5810 * Requires 1 input and produces 1 output.
5811 * Output shape is same as the input.
5812 *
5813 */
5814message FloorLayerParams {
5815
5816}
5817
5818/**
5819 * A layer that performs element-wise sign operation (+1 for positive values,
5820 * -1 for negative values, 0 for zeros).
5821 *
5822 * Requires 1 input and produces 1 output.
5823 * Output shape is same as the input.
5824 *
5825 */
5826message SignLayerParams {
5827
5828}
5829
5830/**
5831 * A layer that performs element-wise clip operation. Clip the values in the
5832 * input tensor to the threshold values [min_value, max_value].
5833 *
5834 * Requires 1 input and produces 1 output.
5835 *
5836 * Parameter minVal: the minimum threshold.
5837 * Parameter maxVal: the maximum threshold.
5838 *
5839 * output =  min(max(input, minVal), maxVal)
5840 *
5841 * Output shape is same as the input.
5842 */
5843message ClipLayerParams {
5844
5845    float minVal = 1;
5846    float maxVal = 2;
5847
5848}
5849
5850/**
5851 * A layer that extracts a slice of size ``(end - begin) / stride``
5852 * from the given input tensor.
5853 * Support negative indexing and negative strides.
5854 *
5855 * Requires 1 input and produces 1 output.
5856 * Output rank is same as the input rank.
5857 *
5858 * Value of beginIds, beginMasks, endIds, endMasks, strides are required parameters.
5859 * Lengths of all the parameters must equal the rank of the input.
5860 *
5861 * i-th element of "beginIds" is ignored and assumed to be 0 if the i-th element of
5862 * "beginMasks" is True
5863 *
5864 * i-th element of "endIds" is ignored and assumed to be -1 if the i-th element of
5865 * "endMasks" is True
5866 *
5867 * e.g.:
5868 * if i-th element of "squeezeMasks" is set to True, only beginIds[i] would be sliced
5869 * out, and all other masks and inputs are ignored.
5870 *
5871 * e.g. (without squeezeMasks):
5872 * input shape:  (5, 5, 5)
5873 * beginIds:  [1, 2, 3]
5874 * beginMasks:  [True, False, True]
5875 * endIds:  [3, -3, 2]
5876 * endMasks:  [False, True, True]
5877 * strides:  [2, 2, 2]
5878 * SqueezeMasks:  [False, False, False]
5879 * output shape:  (2, 2, 3)
5880 * This is equivalent to input[:3:2, 2::2, ::2]
5881 *
5882 * e.g. (with squeezeMasks):
5883 * input shape:  (5, 5, 5)
5884 * beginIds:  [1, 2, 3]
5885 * beginMasks:  [True, False, True]
5886 * endIds:  [3, -3, 2]
5887 * endMasks:  [False, True, True]
5888 * strides:  [2, 2, 2]
5889 * SqueezeMasks:  [False, True, False]
5890 * output shape:  (2, 3)
5891 * This is equivalent to input[:3:2, 2, ::2]
5892 *
5893 */
5894message SliceStaticLayerParams {
5895
5896    repeated int64 beginIds = 1;
5897    repeated bool beginMasks = 2;
5898    repeated int64 endIds = 3;
5899    repeated bool endMasks = 4;
5900    repeated int64 strides = 5;
5901    repeated bool squeezeMasks = 6;
5902
5903
5904}
5905
5906/**
5907 * A layer that extracts a slice of size ``(end - begin) / stride``
5908 * from the given input tensor.
5909 * Support negative indexing and negative strides.
5910 * See "SliceStaticLayerParams" for the description and an example of the functionality of the layer.
5911 *
5912 * Requires 2 to 7 inputs and produces 1 output.
5913 * Rank of the output is same as the rank of the first input unless squeezeMask is set.
5914 *
5915 * Value of beginIds, beginMasks, endIds, endMasks, strides can be passed in either
5916 * as dynamic inputs or as static parameters.
5917 * Lengths of all the parameters or inputs from 2-6 must equal the rank of the first input.
5918 *
5919 * The 2nd input represents the "beginIds".
5920 * The 3rd input, if present, corresponds to "endIds". In this case the value of the "endIds" parameter is ignored.
5921 * The 4th input, if present, corresponds to "strides". In this case the value of the "strides" parameter is ignored.
5922 * The 5th input, if present, corresponds to "beginMasks". In this case the value of the "beginMasks" parameter is ignored.
5923 * The 6th input, if present, corresponds to "endMasks". In this case the value of the "endMasks" parameter is ignored.
5924 * The 7th input, if present, corresponds to "squeezeMasks". In this case the value of the "squeezeMasks" parameter is ignored.
5925 *
5926 */
5927message SliceDynamicLayerParams {
5928
5929    repeated bool beginMasks = 2;
5930    repeated int64 endIds = 3;
5931    repeated bool endMasks = 4;
5932    repeated int64 strides = 5;
5933    repeated bool squeezeMasks = 6;
5934
5935}
5936
5937/**
5938 * A layer that constructs a tensor by repeating the input tensor multiple
5939 * number of times.
5940 *
5941 * Requires 1 or 2 inputs and produces 1 output.
5942 * Output rank is same as the input rank.
5943 *
5944 * If two inputs are provided, second input is used as "reps"
5945 * and "reps" parameter is ignored.
5946 *
5947 * If only one input is provided,
5948 * length of the "reps" parameter must be at least 1 and
5949 * not greater than the rank of the input.
5950 * If it is less than the input rank, it is made equal to the input rank by prepending 1's to it.
5951 *
5952 * e.g.:
5953 *
5954 * input shape = (2, 4, 2)
5955 * reps = (1, 2, 6)
5956 * output shape = (2, 8, 12)
5957 *
5958 * input shape = (2, 4, 2)
5959 * reps = (6)
5960 * reps after prepending ones = (1, 1, 6)
5961 * output shape = (2, 4, 12)
5962 *
5963 * input shape = (2, 4, 2)
5964 * second input = [1, 2, 6] -> shape: (3,)
5965 * reps = N/A [Ignored]
5966 * output shape = (2, 8, 12)
5967 *
5968 */
5969message TileLayerParams {
5970
5971    repeated uint64 reps = 1;
5972
5973}
5974
5975/**
5976 * A layer that returns the shape of an input tensor.
5977 *
5978 * Requires 1 input and produces 1 output.
5979 *
5980 * Input: a tensor.
5981 * Output: a vector of length R, where R is the rank of the input tensor
5982 * Output is always a rank 1 tensor.
5983 */
5984message GetShapeLayerParams {
5985
5986}
5987
5988/**
5989 * A layer that computes the Gauss error function,
5990 * which is defined as:
5991 *
5992 * .. math::
5993 *     f(x) = \dfrac{1}{\sqrt{\pi}}\int_{-x}^{x}{e^{-t^2}dt}
5994 *
5995 * Requires 1 input and produces 1 output.
5996 * Output shape is same as the input.
5997 */
5998message ErfLayerParams {
5999
6000}
6001
6002/**
6003 * A layer that evaluates the Gaussian Error Linear Unit (GELU) activation.
6004 * Following equations are used to compute the activation based on the value of the "mode" parameter:
6005 *
6006 * mode == 'EXACT':
6007 * .. math::
6008 *     f(x) = 0.5x\left ( 1+\rm{erf}\left ( \frac{x}{\sqrt{2}} \right ) \right )
6009 *
6010 * mode == 'TANH_APPROXIMATION':
6011 * .. math::
6012 *     f(x) = 0.5x\left ( 1+\rm{tanh}\left ( \sqrt{2/\pi}\left ( x + 0.044715x^3 \right ) \right ) \right )
6013 *
6014 * mode == 'SIGMOID_APPROXIMATION':
6015 * .. math::
6016 *     f(x) = x*\rm{sigmoid}(1.702x)
6017 *
6018 * Requires 1 input and produces 1 output.
6019 * Output shape is same as the input.
6020 *
6021 */
6022message GeluLayerParams {
6023
6024    enum GeluMode {
6025
6026        EXACT = 0;
6027        TANH_APPROXIMATION = 1;
6028        SIGMOID_APPROXIMATION = 2;
6029
6030    }
6031
6032    GeluMode mode = 1; /// mode of GELU operation.
6033
6034}
6035
6036/**
6037 * RangeStatic layer that returns a tensor that contains evenly spaced values.
6038 * It is similar in functionality to the numpy.arange method.
6039 *
6040 * Requires no input and produces 1 output.
6041 * Output is a rank 1 tensor.
6042 */
6043message RangeStaticLayerParams {
6044
6045    float endValue = 1;
6046    float startValue = 2;
6047    float stepSizeValue = 3;
6048
6049}
6050
6051/**
6052 * A layer that returns a tensor that contains evenly spaced values.
6053 * Its functionality is similar to the numpy.arange method.
6054 *
6055 * Requires at least 1 input, up to a maximum of 3 inputs.
6056 * Produces 1 output, which is a rank 1 tensor.
6057 *
6058 * Each input must be a scalar, or rank 1 and shape (1,).
6059 *
6060 * The first input represents the "endValue".
6061 * The second input, if present, corresponds to "startValue". In this case the value of the "startValue" parameter is ignored.
6062 * The third input, if present, corresponds to "stepSizeValue". In this case the value of the "stepSizeValue" parameter is ignored.
6063 *
6064 */
6065message RangeDynamicLayerParams {
6066
6067    float startValue = 2;
6068    float stepSizeValue = 3;
6069
6070}
6071
6072/**
6073 * A layer that returns a tensor containing all windows of size ``windowSize``
6074 * separated by ``step`` along the dimension ``axis``.
6075 *
6076 * .. code::
6077 *
6078 *      y = SlidingWindows(x)
6079 *
6080 * Requires 1 input and produces 1 output.
6081 *
6082 * Input
6083 *     An N-Dimensional tensor.
6084 *
6085 * Output
6086 *     An (N+1)-Dimensional tensor.
6087 *
6088 * This operation behaves as following:
6089 *      - if axis = 0 & input is rank 1 (L,). Output shape will be (M, W).
6090 *      - if axis = 1 & input is rank 3 (B1, L, C1). Output shape will be (B1, M, W, C1)
6091 *      - if axis = 2 & input is rank 5 (B1, B2, L, C1, C2) --> (B1 * B2, L, C1 * C2) --> (B1 * B2, M, W, C1 * C2). Output shape will be (B1, B2, M, W, C1, C2)
6092 *      - etc.
6093 * where
6094 *      - L, C, B refer to input length, feature dimension length & batch size respectively
6095 *      - W is the window size.
6096 *      - M is the number of windows/slices calculated as M = (L - W) / step + 1
6097 */
6098message SlidingWindowsLayerParams {
6099
6100    int64 axis = 1;
6101    uint64 windowSize = 2;
6102    uint64 step = 3;
6103
6104}
6105
6106/**
6107 * A layer that applies layer normalization over the input tensor.
6108 *
6109 * Requires 1 input and produces 1 output.
6110 *
6111 * output = gamma * (input - computed_mean) / (sqrt(computed_variance + eps)) + beta
6112 *
6113 * Parameters
6114 *     normalizedShape: subset of the input shape, along with layer norm is performed, rest of the input shape is treated as the batch dimension. The mean and variance are computed for the input, over the last few dimensions as specified by the normalizedShape parameter.
6115 *     gamma: must have shape = "normalizedShape"
6116 *     beta: must have shape = "normalizedShape"
6117 *     eps: small constant to avoid division by 0
6118 *
6119 * Output shape is same as the input.
6120 *
6121 * e.g.:
6122 * input shape = (10,5)
6123 * normalized shape = (5,) or (10,5)
6124 *
6125 * input shape = (10,5,6,7)
6126 * normalized shape = (7,) or (6,7) or (5,6,7) or (10,5,6,7)
6127 */
6128message LayerNormalizationLayerParams {
6129
6130    repeated int64 normalizedShape = 1;
6131    float eps = 2;
6132    WeightParams gamma = 3;
6133    WeightParams beta = 4;
6134
6135}
6136
6137/**
6138 * Non maximum suppression (NMS) layer.
6139 * Applies the non maximum suppression algorithm to input bounding box coordinates.
6140 * The effect of this layer is similar to the functionality of the "NonMaximumSuppression"
6141 * model type (for details please see NonMaximumSuppression.proto) with a couple of differences.
6142 * One, this is a layer in a neural network model, whereas that is a different model type. Second,
6143 * this layer supports a batch of bounding boxes.
6144 *
6145 * The NMS layer requires at least 2 inputs, and up to a maximum of 5 inputs. It produces 4 outputs.
6146 * Following is the description of inputs and outputs:
6147 *
6148 * input 1, shape (B,N,4): coordinates of N boxes, for a batch size B.
6149 * input 2, shape (B,N,C): class scores for each box. C can be 1 when there is only 1 score per box, i.e., no class specific score.
6150 *
6151 * input 3, optional, shape (1,): IoU threshold. When present, it overwrites the value provided in layer parameter "iouThreshold".
6152 * input 4, optional, shape (1,): Score threshold. When present, it overwrites the value provided in layer parameter "scoreThreshold".
6153 * input 5, optional, shape (1,): Maximum number of boxes. When present, it overwrites the value provided in layer parameter "maxBoxes".
6154 *
6155 * output 1, shape (B,maxBoxes,4): box coordinates, corresponding to the surviving boxes.
6156 * output 2, shape (B,maxBoxes,C): box scores, corresponding to the surviving boxes.
6157 * output 3, shape (B,maxBoxes): indices of the surviving boxes. Hence it will have values in the range [0,N-1], except for padding.
6158 * output 4, shape (B,): number of boxes selected after the NMS algorithm, for each batch.
6159 *
6160 * When surviving boxes are less than "maxBoxes", the first 3 outputs are padded.
6161 * For the first two outputs, the padding is done using values 0, whereas for the third output the
6162 * padding value used is -1, since the output values represent indices.
6163 *
6164 * If no box survives, that is, all the scores are below the "scoreThreshold",
6165 * then for that batch, number of boxes (value of the fourth output) will be 1. The first 3 outputs will
6166 * correspond to the box with the highest score. This is to avoid generating an "empty" output.
6167 *
6168 * The four values that describe the box dimensions are (in order):
6169 *
6170 *  - x (center location of the box along the horizontal axis)
6171 *  - y (center location of the box along the vertical axis)
6172 *  - width (size of box along the horizontal axis)
6173 *  - height (size of box on along the vertical axis)
6174 *
6175 * In each batch,
6176 * the N scores for N boxes, used for suppression, are generated by taking the max of the matrix (N,C)
6177 * along the columns.
6178 * If "perClassSuppression" flag is false, suppression happens across all classes.
6179 * If "perClassSuppression" flag is true, each box is assigned to the class with the highest
6180 * score and then the suppression happens separately for boxes within the same class.
6181 *
6182 * Note that the 4th output can be used to dynamically slice the first 3 outputs, in case
6183 * the padded outputs are not required.
6184 *
6185 */
6186message NonMaximumSuppressionLayerParams {
6187    /**
6188     * The intersection over union (IoU) threshold over which boxes are suppressed.
6189     */
6190    float iouThreshold = 1;
6191
6192    /**
6193     * Before IoU suppression is performed, boxes with class scores below this threshold are rejected.
6194     */
6195    float scoreThreshold = 2;
6196
6197    /**
6198     * The maximum number of boxes to be given out as output.
6199     * If the number of surviving boxes are less, output is padded up to this number.
6200     */
6201    uint64 maxBoxes = 3;
6202
6203    /**
6204     * If true, suppression is performed independently within boxes of each class.
6205     */
6206    bool perClassSuppression = 4;
6207}
6208
6209/**
6210 * A layer that performs element-wise clamped ReLU operation.
6211 *
6212 * Requires 1 input and produces 1 output.
6213 *
6214 * This function has the following formula:
6215 *
6216 * .. math::
6217 *     f(x) = \begin{cases}
6218 *               \text{min}(\text{beta},x) \;\; \text{if} \;\; x \geq 0\\
6219 *               \text{min}(\text{beta} ,\text{alpha}\cdot x) \;\; \text{if} \;\; x<0
6220 *            \end{cases}
6221 *
6222 * Output shape is same as the input.
6223 *
6224 * Available (iOS >= 14, macOS >= 11.0, watchOS >= 7)
6225 */
6226message ClampedReLULayerParams {
6227
6228    float alpha = 1;
6229    float beta = 2;
6230
6231}
6232
6233/**
6234* A layer that returns the indices that would sort the input tensor, along a specified axis.
6235*
6236* Requires 1 input and produces 1 output.
6237*
6238* Output has the same rank and shape as the input.
6239*
6240* Value of "axis" must be positive and less than the rank of the input.
6241*
6242* e.g.:
6243*
6244* input shape = (5,)
6245* axis = 0
6246* input values = [3.1, 5.4, 32.9, 3.2, 77.0]
6247* output shape = (5,)
6248* output values = [0, 3, 1, 2, 4], descending = False
6249* output values = [4, 2, 1, 3, 0], descending = True
6250*
6251* input shape = (2,3)
6252* axis = 1
6253* input values = [[3, 5, 32], [3, 77, 6]]
6254* output shape = (2,3)
6255* output values = [[0, 1, 2], [0, 2, 1]], descending = False
6256* output values = [[2, 1, 0], [1, 2, 0]], descending = True
6257*
6258*/
6259message ArgSortLayerParams {
6260
6261    int64 axis = 1; /// must be between [0, input_rank - 1]
6262    bool descending = 2;
6263
6264}
6265
6266/**
6267 * A layer that does slice operation by providing size to be extracted
6268 * from the given input tensor.
6269 *
6270 * Requires 2 inputs and produces 1 output.
6271 * Rank of the output is same as the rank of the first input.
6272 *
6273 * The 1st input represents the tensor to be sliced.
6274 * The 2nd input represents the beginning index to be sliced from.
6275 *
6276 * Example:
6277 * Input 1: x (x.shape = (2, 3, 4))
6278 * Input 2: begin
6279 * size: 2
6280 * axis: 1
6281 *
6282 * Output: x[:, begin:begin+2, :]
6283 *
6284 */
6285message SliceBySizeLayerParams {
6286
6287    int64 size = 2;
6288    int64 axis = 3;
6289
6290}
6291
6292
6293/// Neural Network Specializations
6294/// ------------------------------
6295
6296/**
6297 * A neural network specialized as a classifier.
6298 */
6299message NeuralNetworkClassifier {
6300
6301    repeated NeuralNetworkLayer layers = 1;
6302    repeated NeuralNetworkPreprocessing preprocessing = 2;
6303
6304    // use this enum value to determine the input tensor shapes to the neural network, for multiarray inputs
6305    NeuralNetworkMultiArrayShapeMapping arrayInputShapeMapping = 5;
6306
6307    // use this enum value to determine the input tensor shapes to the neural network, for image inputs
6308    NeuralNetworkImageShapeMapping imageInputShapeMapping = 6;
6309
6310    NetworkUpdateParameters updateParams = 10;
6311
6312    // The set of labels for every possible class.
6313    oneof ClassLabels {
6314        StringVector stringClassLabels = 100;
6315        Int64Vector int64ClassLabels = 101;
6316    }
6317
6318    // The name of the output blob containing the probability of each class.
6319    // In other words, the score vector. Must be a 1-D tensor with the same
6320    // number and order of elements as ClassLabels.
6321    string labelProbabilityLayerName = 200;
6322}
6323
6324
6325/**
6326 * A layer that computes the one hot representation of the input.
6327 *
6328 * Requires 1 or 2 inputs and produces 1 output.
6329 * Rank of the output is one more than the first input.
6330 * If the second input is present, it is used to determine the value of "oneHotVectorSize" and the parameter "oneHotVectorSize" is ignored.
6331 *
6332 * Input values correspond to indices and should typically be in the range [0,"oneHotVectorSize" -1]. If it is outside this range, a vector of all "offValue" will be chosen.
6333 *
6334 * Typically one hot vectors contain 0s everywhere, except 1 at the index that the input corresponds to.
6335 * However, instead of 0, any float value could be generated by using the "offValue" parameter.
6336 * Similarly, instead of 1, any other value can be used by employing the "onValue" parameter.
6337 *
6338 * e.g.:
6339 * input shape: (10,), "oneHotVectorSize" : 32, axis=-1, then output shape will be (10,32)
6340 * input shape: (10,23), "oneHotVectorSize" : 32, axis=1, then output shape will be (10,32,23)
6341 * input shape: (10,), "oneHotVectorSize" : 32, axis=0, then output shape will be (32,10)
6342 *
6343 * input shape: (2,), "oneHotVectorSize" : 4, axis=-1, then output shape will be (2,4)
6344 * say input values = [2, 0], and "onValue" = 5, and "offValue" = -1, then output will be:
6345 * [-1, -1, 5, -1
6346 *  5, -1, -1, -1]
6347 *
6348 *  say input values = [2, -1], and "onValue" = 5, and "offValue" = -1, then output will be:
6349 * [-1, -1, 5, -1
6350 *  -1, -1, -1, -1]
6351 *
6352 * Available (iOS >= 14, macOS >= 11.0, watchOS >= 7)
6353 */
6354
6355message OneHotLayerParams {
6356
6357    uint64 oneHotVectorSize = 1; /// size of the one hot vector
6358    int64 axis = 2; ///  negative indexing is supported. It refers to the axis in the output tensor.
6359    float onValue = 3;
6360    float offValue = 4;
6361}
6362
6363
6364/**
6365 * A layer that computes the cumsum values of the input along a given axis.
6366 *
6367 * Requires 1 or 2 inputs and produces 1 output.
6368 *
6369 * Output shape and rank is same as the first input.
6370 * If the second input is present, it is used to determine the value of "axis" and the parameter "axis" is ignored.
6371 *
6372 * e.g.:
6373 * Input shape = (3,), values it has:  [4, 6, 7]
6374 *
6375 * Then output values will be:
6376 *
6377 * if "excludeFinalSum" = False and "reverse" = False:
6378 * output values : [4, 10, 17]
6379 *
6380 * if "excludeFinalSum" = True and "reverse" = False:
6381 * output values : [0, 4, 10]
6382 *
6383 * if "excludeFinalSum" = False and "reverse" = True:
6384 * output values : [17, 13, 7]
6385 *
6386 * if "excludeFinalSum" = True and "reverse" = True:
6387 * output values : [13, 7, 0]
6388 *
6389 *
6390 * Available (iOS >= 14, macOS >= 11.0, watchOS >= 7)
6391 */
6392
6393
6394message CumSumLayerParams {
6395
6396    int64 axis = 1; ///  negative indexing is supported
6397
6398    /// if true, the first element of the output is 0, and the last element contains the sum of the input up to the penultimate value
6399    /// if false, the first element of the output is same as the input and the last element is the sum of all the input values
6400    /// (this behavior is reversed when "reverse" flag is True)
6401    bool excludeFinalSum = 2;
6402
6403    bool reverse = 3; /// if true, cumsum is performed in the opposite direction
6404}
6405
6406
6407/**
6408 * A neural network specialized as a regressor.
6409 */
6410message NeuralNetworkRegressor {
6411
6412    repeated NeuralNetworkLayer layers = 1;
6413    repeated NeuralNetworkPreprocessing preprocessing = 2;
6414
6415    // use this enum value to determine the input tensor shapes to the neural network, for multiarray inputs
6416    NeuralNetworkMultiArrayShapeMapping arrayInputShapeMapping = 5;
6417
6418    // use this enum value to determine the input tensor shapes to the neural network, for image inputs
6419    NeuralNetworkImageShapeMapping imageInputShapeMapping = 6;
6420
6421    NetworkUpdateParameters updateParams = 10;
6422
6423}
6424
6425/// ---------------------------------------------------------
6426/// On-device Training related messages
6427/// ---------------------------------------------------------
6428
6429/**
6430 * Details on how the network will be updated
6431 */
6432message NetworkUpdateParameters {
6433
6434    repeated LossLayer lossLayers = 1;
6435    Optimizer optimizer = 2;
6436    Int64Parameter epochs = 3;
6437
6438    /**
6439     * Describes whether to shuffle the batch of data between epochs.
6440     */
6441    BoolParameter shuffle = 10;
6442
6443    /**
6444     * The seed to be used in an associated random number generator.
6445     */
6446    Int64Parameter seed = 20;
6447}
6448
6449/**
6450 * Loss layer - categorical cross entropy and mean squared error are the only supported loss functions currently
6451 */
6452message LossLayer {
6453
6454    string name = 1;
6455    oneof LossLayerType {
6456
6457        CategoricalCrossEntropyLossLayer categoricalCrossEntropyLossLayer = 10;
6458        MeanSquaredErrorLossLayer meanSquaredErrorLossLayer = 11;
6459
6460    }
6461
6462}
6463
6464/**
6465 * Categorical cross entropy loss layer
6466 * Categorical cross entropy is used for single label categorization (only one category is applicable for each data point).
6467 *
6468 * The input is a vector of length N representing the distribution over N categories.  It must be the output of a softmax.
6469 *
6470 * The target is a single value representing the true category or class label. If the target is the predictedFeatureName of a neural network classifier it will be inverse mapped to the corresponding categorical index for you.
6471 *
6472 * math:
6473 * Loss_{CCE}(input, target) = -\sum_{i=1}^{N} (target == i) log( input[i] ) = - log (input[target])
6474 */
6475message CategoricalCrossEntropyLossLayer {
6476
6477    string input = 1;
6478    string target = 2;
6479
6480}
6481
6482/**
6483 * Mean squared error loss layer,
6484 * specifying input and target
6485 */
6486message MeanSquaredErrorLossLayer {
6487
6488    string input = 1;
6489    string target = 2;
6490
6491}
6492
6493/**
6494 * Optimizer - stochastic gradient descent and adam are the only supported optimizers currently
6495 */
6496message Optimizer {
6497
6498    oneof OptimizerType {
6499
6500        SGDOptimizer sgdOptimizer = 10;
6501        AdamOptimizer adamOptimizer = 11;
6502
6503    }
6504
6505}
6506
6507/**
6508 * Stochastic gradient descent optimizer,
6509 * specifying configurable learning rate, mini batch size, and momentum
6510 */
6511message SGDOptimizer {
6512
6513    DoubleParameter learningRate = 1;
6514    Int64Parameter miniBatchSize = 2;
6515    DoubleParameter momentum = 3;
6516
6517}
6518
6519/**
6520 * Adam optimizer,
6521 * specifying configurable learning rate, mini batch size, betas, and eps
6522 */
6523message AdamOptimizer {
6524
6525    DoubleParameter learningRate = 1;
6526    Int64Parameter miniBatchSize = 2;
6527    DoubleParameter beta1 = 3;
6528    DoubleParameter beta2 = 4;
6529    DoubleParameter eps = 5;
6530
6531}
6532