ComputeLibrary/support/Bfloat16.h

/*
 * Copyright (c) 2020 Arm Limited.
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal in the Software without restriction, including without limitation the
 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 * sell copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
#ifndef ARM_COMPUTE_BFLOAT16_H
#define ARM_COMPUTE_BFLOAT16_H

#include <cstdint>

namespace arm_compute
{
namespace
{
/** Convert float to bfloat16
 *
 * @param[in] v Floating-point value to convert to bfloat
 *
 * @return Converted value
 */
inline uint16_t float_to_bf16(const float v)
{
    const uint32_t *fromptr = reinterpret_cast<const uint32_t *>(&v);
#if defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16)
    uint16_t res;

    __asm __volatile(
        "ldr    s0, [%[fromptr]]\n"
        ".inst    0x1e634000\n" // BFCVT h0, s0
        "str    h0, [%[toptr]]\n"
        :
        : [fromptr] "r"(fromptr), [toptr] "r"(&res)
        : "v0", "memory");
#else  /* defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16) */
    uint16_t       res   = (*fromptr >> 16);
    const uint16_t error = (*fromptr & 0x0000ffff);
    uint16_t       bf_l  = res & 0x0001;
    if((error > 0x8000) || ((error == 0x8000) && (bf_l != 0)))
    {
        res += 1;
    }
#endif /* defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16) */
    return res;
}

/** Convert bfloat16 to float
 *
 * @param[in] v Bfloat16 value to convert to float
 *
 * @return Converted value
 */
inline float bf16_to_float(const uint16_t &v)
{
    const uint32_t lv = (v << 16);
    const float   *fp = reinterpret_cast<const float *>(&lv);

    return *fp;
}
}

/** Brain floating point representation class */
class bfloat16 final
{
public:
    /** Default Constructor */
    bfloat16()
        : value(0)
    {
    }
    /** Constructor
     *
     * @param[in] v Floating-point value
     */
    explicit bfloat16(float v)
        : value(float_to_bf16(v))
    {
    }
    /** Assignment operator
     *
     * @param[in] v Floating point value to assign
     *
     * @return The updated object
     */
    bfloat16 &operator=(float v)
    {
        value = float_to_bf16(v);
        return *this;
    }
    /** Floating point conversion operator
     *
     * @return Floating point representation of the value
     */
    operator float() const
    {
        return bf16_to_float(value);
    }
    /** Lowest representative value
     *
     * @return Returns the lowest finite value representable by bfloat16
     */
    static bfloat16 lowest()
    {
        bfloat16 val;
        val.value = 0xFF7F;
        return val;
    }
    /** Largest representative value
     *
     * @return Returns the largest finite value representable by bfloat16
     */
    static bfloat16 max()
    {
        bfloat16 val;
        val.value = 0x7F7F;
        return val;
    }

private:
    uint16_t value;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_BFLOAT16_H */