// Copyright 2019 Google LLC // // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. $assert BATCH_TILE >= 1 $ABC = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" $assert OP in ["ADD", "DIV", "MAX", "MIN", "MUL", "SUB", "SQRDIFF"] $assert ACTIVATION in ["LINEAR", "MINMAX", "RELU"] #include #include #include #include $MIN_F32 = "__builtin_wasm_min_f32" if WASM else "math_min_f32" $MAX_F32 = "__builtin_wasm_max_f32" if WASM else "math_max_f32" $OP_FUNC = { $ "ADD": lambda x, y: "%s + %s" % (x, y), $ "DIV": lambda x, y: "%s / %s" % (x, y), $ "MAX": lambda x, y: "%s(%s, %s)" % (MAX_F32, x, y), $ "MIN": lambda x, y: "%s(%s, %s)" % (MIN_F32, x, y), $ "MUL": lambda x, y: "%s * %s" % (x, y), $ "SUB": lambda x, y: "%s - %s" % (x, y), $ "SQRDIFF": lambda x, y: "%s - %s" % (x, y), $}[OP] $SUFFIX = {"LINEAR": "", "RELU": "_relu", "MINMAX": "_minmax"}[ACTIVATION] $PARAMS = {"LINEAR": "xnn_f32_default_params", "RELU": "xnn_f32_relu_params", "MINMAX": "xnn_f32_minmax_params"}[ACTIVATION] void xnn_f32_v${OP.lower()}${SUFFIX}_ukernel__${"wasm" if WASM else "scalar"}_x${BATCH_TILE}( size_t n, const float* a, const float* b, float* y, const union ${PARAMS} params[restrict XNN_MIN_ELEMENTS(1)]) { assert(n != 0); assert(n % sizeof(float) == 0); assert(a != NULL); assert(b != NULL); assert(y != NULL); $if ACTIVATION == "MINMAX": const float vy_min = params->scalar.min; const float vy_max = params->scalar.max; $if BATCH_TILE > 1: for (; n >= ${BATCH_TILE} * sizeof(float); n -= ${BATCH_TILE} * sizeof(float)) { $for N in range(BATCH_TILE): const float va${ABC[N]} = a[${N}]; a += ${BATCH_TILE}; $for N in range(BATCH_TILE): const float vb${ABC[N]} = b[${N}]; b += ${BATCH_TILE}; $for N in range(BATCH_TILE): float vy${ABC[N]} = ${OP_FUNC("va" + ABC[N], "vb" + ABC[N])}; $if OP == "SQRDIFF": $for N in range(BATCH_TILE): vy${ABC[N]} = vy${ABC[N]} * vy${ABC[N]}; $if ACTIVATION == "MINMAX": $for N in range(BATCH_TILE): vy${ABC[N]} = ${MAX_F32}(vy${ABC[N]}, vy_min); $for N in range(BATCH_TILE): vy${ABC[N]} = ${MIN_F32}(vy${ABC[N]}, vy_max); $elif ACTIVATION == "RELU": $for N in range(BATCH_TILE): vy${ABC[N]} = ${MAX_F32}(vy${ABC[N]}, 0.0f); $for N in range(BATCH_TILE): y[${N}] = vy${ABC[N]}; y += ${BATCH_TILE}; } if XNN_UNLIKELY(n != 0) { $if BATCH_TILE > 2: do { const float va = *a++; const float vb = *b++; float vy = ${OP_FUNC("va", "vb")}; $if OP == "SQRDIFF": vy = vy * vy; $if ACTIVATION == "MINMAX": vy = ${MAX_F32}(vy, vy_min); vy = ${MIN_F32}(vy, vy_max); $elif ACTIVATION == "RELU": vy = ${MAX_F32}(vy, 0.0f); *y++ = vy; n -= sizeof(float); } while (n != 0); $else: const float va = *a; const float vb = *b; float vy = ${OP_FUNC("va", "vb")}; $if OP == "SQRDIFF": vy = vy * vy; $if ACTIVATION == "MINMAX": vy = ${MAX_F32}(vy, vy_min); vy = ${MIN_F32}(vy, vy_max); $elif ACTIVATION == "RELU": vy = ${MAX_F32}(vy, 0.0f); *y = vy; } $else: for (; n >= sizeof(float); n -= sizeof(float)) { const float va = *a++; const float vb = *b++; float vy = ${OP_FUNC("va", "vb")}; $if OP == "SQRDIFF": vy = vy * vy; $if ACTIVATION == "MINMAX": vy = ${MAX_F32}(vy, vy_min); vy = ${MIN_F32}(vy, vy_max); $elif ACTIVATION == "RELU": vy = ${MAX_F32}(vy, 0.0f); *y++ = vy; } }