1// Copyright 2019 Google LLC 2// 3// This source code is licensed under the BSD-style license found in the 4// LICENSE file in the root directory of this source tree. 5 6$assert BATCH_TILE >= 1 7$ABC = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" 8$assert OP in ["ADD", "DIV", "RDIV", "MAX", "MIN", "MUL", "SUB", "RSUB", "SQRDIFF"] 9$assert ACTIVATION in ["LINEAR", "MINMAX", "RELU"] 10#include <assert.h> 11 12#include <xnnpack/common.h> 13#include <xnnpack/math.h> 14#include <xnnpack/vbinary.h> 15 16 17$MIN_F32 = "__builtin_wasm_min_f32" if WASM else "math_min_f32" 18$MAX_F32 = "__builtin_wasm_max_f32" if WASM else "math_max_f32" 19$OP_FUNC = { 20$ "ADD": lambda x: "%s + vb" % x, 21$ "DIV": lambda x: "%s / vb" % x, 22$ "RDIV": lambda x: "vb / %s" % x, 23$ "MAX": lambda x: "%s(%s, vb)" % (MAX_F32, x), 24$ "MIN": lambda x: "%s(%s, vb)" % (MIN_F32, x), 25$ "MUL": lambda x: "%s * vb" % x, 26$ "SUB": lambda x: "%s - vb" % x, 27$ "RSUB": lambda x: "vb - %s" % x, 28$ "SQRDIFF": lambda x: "%s - vb" % x, 29$}[OP] 30$SUFFIX = {"LINEAR": "", "RELU": "_relu", "MINMAX": "_minmax"}[ACTIVATION] 31$PARAMS = {"LINEAR": "xnn_f32_default_params", "RELU": "xnn_f32_relu_params", "MINMAX": "xnn_f32_minmax_params"}[ACTIVATION] 32void xnn_f32_v${OP.lower()}c${SUFFIX}_ukernel__${"wasm" if WASM else "scalar"}_x${BATCH_TILE}( 33 size_t n, 34 const float* a, 35 const float* b, 36 float* y, 37 const union ${PARAMS} params[restrict XNN_MIN_ELEMENTS(1)]) 38{ 39 assert(n != 0); 40 assert(n % sizeof(float) == 0); 41 assert(a != NULL); 42 assert(b != NULL); 43 assert(y != NULL); 44 45 $if ACTIVATION == "MINMAX": 46 const float vy_min = params->scalar.min; 47 const float vy_max = params->scalar.max; 48 49 const float vb = *b; 50 $if BATCH_TILE > 1: 51 for (; n >= ${BATCH_TILE} * sizeof(float); n -= ${BATCH_TILE} * sizeof(float)) { 52 $for N in range(BATCH_TILE): 53 const float va${ABC[N]} = a[${N}]; 54 a += ${BATCH_TILE}; 55 56 $for N in range(BATCH_TILE): 57 float vy${ABC[N]} = ${OP_FUNC("va" + ABC[N])}; 58 59 $if OP == "SQRDIFF": 60 $for N in range(BATCH_TILE): 61 vy${ABC[N]} = vy${ABC[N]} * vy${ABC[N]}; 62 63 $if ACTIVATION == "MINMAX": 64 $for N in range(BATCH_TILE): 65 vy${ABC[N]} = ${MAX_F32}(vy${ABC[N]}, vy_min); 66 67 $for N in range(BATCH_TILE): 68 vy${ABC[N]} = ${MIN_F32}(vy${ABC[N]}, vy_max); 69 $elif ACTIVATION == "RELU": 70 $for N in range(BATCH_TILE): 71 vy${ABC[N]} = ${MAX_F32}(vy${ABC[N]}, 0.0f); 72 73 $for N in range(BATCH_TILE): 74 y[${N}] = vy${ABC[N]}; 75 y += ${BATCH_TILE}; 76 } 77 if XNN_UNLIKELY(n != 0) { 78 $if BATCH_TILE > 2: 79 do { 80 const float va = *a++; 81 float vy = ${OP_FUNC("va")}; 82 $if OP == "SQRDIFF": 83 vy = vy * vy; 84 $if ACTIVATION == "MINMAX": 85 vy = ${MAX_F32}(vy, vy_min); 86 vy = ${MIN_F32}(vy, vy_max); 87 $elif ACTIVATION == "RELU": 88 vy = ${MAX_F32}(vy, 0.0f); 89 *y++ = vy; 90 n -= sizeof(float); 91 } while (n != 0); 92 $else: 93 const float va = *a; 94 float vy = ${OP_FUNC("va")}; 95 $if OP == "SQRDIFF": 96 vy = vy * vy; 97 $if ACTIVATION == "MINMAX": 98 vy = ${MAX_F32}(vy, vy_min); 99 vy = ${MIN_F32}(vy, vy_max); 100 $elif ACTIVATION == "RELU": 101 vy = ${MAX_F32}(vy, 0.0f); 102 *y = vy; 103 } 104 $else: 105 for (; n >= sizeof(float); n -= sizeof(float)) { 106 const float va = *a++; 107 float vy = ${OP_FUNC("va")}; 108 $if OP == "SQRDIFF": 109 vy = vy * vy; 110 $if ACTIVATION == "MINMAX": 111 vy = ${MAX_F32}(vy, vy_min); 112 vy = ${MIN_F32}(vy, vy_max); 113 $elif ACTIVATION == "RELU": 114 vy = ${MAX_F32}(vy, 0.0f); 115 *y++ = vy; 116 } 117} 118