1// Copyright 2019 Google LLC 2// 3// This source code is licensed under the BSD-style license found in the 4// LICENSE file in the root directory of this source tree. 5 6$assert BATCH_TILE >= 1 7$ABC = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" 8$assert OP in ["ADD", "DIV", "MAX", "MIN", "MUL", "SUB", "SQRDIFF"] 9$assert ACTIVATION in ["LINEAR", "MINMAX", "RELU"] 10#include <assert.h> 11 12#include <xnnpack/common.h> 13#include <xnnpack/math.h> 14#include <xnnpack/vbinary.h> 15 16 17$MIN_F32 = "__builtin_wasm_min_f32" if WASM else "math_min_f32" 18$MAX_F32 = "__builtin_wasm_max_f32" if WASM else "math_max_f32" 19$OP_FUNC = { 20$ "ADD": lambda x, y: "%s + %s" % (x, y), 21$ "DIV": lambda x, y: "%s / %s" % (x, y), 22$ "MAX": lambda x, y: "%s(%s, %s)" % (MAX_F32, x, y), 23$ "MIN": lambda x, y: "%s(%s, %s)" % (MIN_F32, x, y), 24$ "MUL": lambda x, y: "%s * %s" % (x, y), 25$ "SUB": lambda x, y: "%s - %s" % (x, y), 26$ "SQRDIFF": lambda x, y: "%s - %s" % (x, y), 27$}[OP] 28$SUFFIX = {"LINEAR": "", "RELU": "_relu", "MINMAX": "_minmax"}[ACTIVATION] 29$PARAMS = {"LINEAR": "xnn_f32_default_params", "RELU": "xnn_f32_relu_params", "MINMAX": "xnn_f32_minmax_params"}[ACTIVATION] 30void xnn_f32_v${OP.lower()}${SUFFIX}_ukernel__${"wasm" if WASM else "scalar"}_x${BATCH_TILE}( 31 size_t n, 32 const float* a, 33 const float* b, 34 float* y, 35 const union ${PARAMS} params[restrict XNN_MIN_ELEMENTS(1)]) 36{ 37 assert(n != 0); 38 assert(n % sizeof(float) == 0); 39 assert(a != NULL); 40 assert(b != NULL); 41 assert(y != NULL); 42 43 $if ACTIVATION == "MINMAX": 44 const float vy_min = params->scalar.min; 45 const float vy_max = params->scalar.max; 46 47 $if BATCH_TILE > 1: 48 for (; n >= ${BATCH_TILE} * sizeof(float); n -= ${BATCH_TILE} * sizeof(float)) { 49 $for N in range(BATCH_TILE): 50 const float va${ABC[N]} = a[${N}]; 51 a += ${BATCH_TILE}; 52 53 $for N in range(BATCH_TILE): 54 const float vb${ABC[N]} = b[${N}]; 55 b += ${BATCH_TILE}; 56 57 $for N in range(BATCH_TILE): 58 float vy${ABC[N]} = ${OP_FUNC("va" + ABC[N], "vb" + ABC[N])}; 59 60 $if OP == "SQRDIFF": 61 $for N in range(BATCH_TILE): 62 vy${ABC[N]} = vy${ABC[N]} * vy${ABC[N]}; 63 64 $if ACTIVATION == "MINMAX": 65 $for N in range(BATCH_TILE): 66 vy${ABC[N]} = ${MAX_F32}(vy${ABC[N]}, vy_min); 67 68 $for N in range(BATCH_TILE): 69 vy${ABC[N]} = ${MIN_F32}(vy${ABC[N]}, vy_max); 70 $elif ACTIVATION == "RELU": 71 $for N in range(BATCH_TILE): 72 vy${ABC[N]} = ${MAX_F32}(vy${ABC[N]}, 0.0f); 73 74 $for N in range(BATCH_TILE): 75 y[${N}] = vy${ABC[N]}; 76 y += ${BATCH_TILE}; 77 } 78 if XNN_UNLIKELY(n != 0) { 79 $if BATCH_TILE > 2: 80 do { 81 const float va = *a++; 82 const float vb = *b++; 83 float vy = ${OP_FUNC("va", "vb")}; 84 $if OP == "SQRDIFF": 85 vy = vy * vy; 86 $if ACTIVATION == "MINMAX": 87 vy = ${MAX_F32}(vy, vy_min); 88 vy = ${MIN_F32}(vy, vy_max); 89 $elif ACTIVATION == "RELU": 90 vy = ${MAX_F32}(vy, 0.0f); 91 *y++ = vy; 92 n -= sizeof(float); 93 } while (n != 0); 94 $else: 95 const float va = *a; 96 const float vb = *b; 97 float vy = ${OP_FUNC("va", "vb")}; 98 $if OP == "SQRDIFF": 99 vy = vy * vy; 100 $if ACTIVATION == "MINMAX": 101 vy = ${MAX_F32}(vy, vy_min); 102 vy = ${MIN_F32}(vy, vy_max); 103 $elif ACTIVATION == "RELU": 104 vy = ${MAX_F32}(vy, 0.0f); 105 *y = vy; 106 } 107 $else: 108 for (; n >= sizeof(float); n -= sizeof(float)) { 109 const float va = *a++; 110 const float vb = *b++; 111 float vy = ${OP_FUNC("va", "vb")}; 112 $if OP == "SQRDIFF": 113 vy = vy * vy; 114 $if ACTIVATION == "MINMAX": 115 vy = ${MAX_F32}(vy, vy_min); 116 vy = ${MIN_F32}(vy, vy_max); 117 $elif ACTIVATION == "RELU": 118 vy = ${MAX_F32}(vy, 0.0f); 119 *y++ = vy; 120 } 121} 122