• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include <armnnUtils/FloatingPointConverter.hpp>
7 
8 #include <BFloat16.hpp>
9 #include <Half.hpp>
10 
11 #include <boost/test/unit_test.hpp>
12 
13 BOOST_AUTO_TEST_SUITE(TestFPConversion)
14 
BOOST_AUTO_TEST_CASE(TestConvertFp32ToFp16)15 BOOST_AUTO_TEST_CASE(TestConvertFp32ToFp16)
16 {
17     using namespace half_float::literal;
18 
19     float floatArray[] = { 1.0f, 2.0f, 0.5f, 3.1f, 2.4f,
20                            5.666f, 6.444f, 7.1f, 432.121f, 12.22f };
21     size_t numFloats = sizeof(floatArray) / sizeof(floatArray[0]);
22     std::vector<armnn::Half> convertedBuffer(numFloats, 0.0_h);
23 
24     armnnUtils::FloatingPointConverter::ConvertFloat32To16(floatArray, numFloats, convertedBuffer.data());
25 
26     for (size_t i = 0; i < numFloats; i++)
27     {
28         armnn::Half expected(floatArray[i]);
29         armnn::Half actual = convertedBuffer[i];
30         BOOST_CHECK_EQUAL(expected, actual);
31 
32         float convertedHalf = actual;
33         BOOST_CHECK_CLOSE(floatArray[i], convertedHalf, 0.07);
34     }
35 }
36 
BOOST_AUTO_TEST_CASE(TestConvertFp16ToFp32)37 BOOST_AUTO_TEST_CASE(TestConvertFp16ToFp32)
38 {
39     using namespace half_float::literal;
40 
41     armnn::Half halfArray[] = { 1.0_h, 2.0_h, 0.5_h, 3.1_h, 2.4_h,
42                                 5.666_h, 6.444_h, 7.1_h, 432.121_h, 12.22_h };
43     size_t numFloats = sizeof(halfArray) / sizeof(halfArray[0]);
44     std::vector<float> convertedBuffer(numFloats, 0.0f);
45 
46     armnnUtils::FloatingPointConverter::ConvertFloat16To32(halfArray, numFloats, convertedBuffer.data());
47 
48     for (size_t i = 0; i < numFloats; i++)
49     {
50         float expected(halfArray[i]);
51         float actual = convertedBuffer[i];
52         BOOST_CHECK_EQUAL(expected, actual);
53     }
54 }
55 
BOOST_AUTO_TEST_CASE(TestConvertFloat32ToBFloat16)56 BOOST_AUTO_TEST_CASE(TestConvertFloat32ToBFloat16)
57 {
58     float floatArray[] = { 1.704735E38f,   // 0x7F004000 round down
59                            0.0f,           // 0x00000000 round down
60                            2.2959E-41f,    // 0x00004000 round down
61                            1.7180272E38f,  // 0x7F014000 round down
62                            9.18355E-41f,   // 0x00010000 round down
63                            1.14794E-40f,   // 0x00014000 round down
64                            4.5918E-41f,    // 0x00008000 round down
65                            -1.708058E38f,  // 0xFF008000 round down
66                            -4.3033756E37f, // 0xFE018000 round up
67                            1.60712E-40f,   // 0x0001C000 round up
68                            -2.0234377f,    // 0xC0018001 round up
69                            -1.1800863E-38f,// 0x80808001 round up
70                            4.843037E-35f,  // 0x0680C000 round up
71                            3.9999998f,     // 0x407FFFFF round up
72                            std::numeric_limits<float>::max(),    // 0x7F7FFFFF max positive value
73                            std::numeric_limits<float>::lowest(), // 0xFF7FFFFF max negative value
74                            1.1754942E-38f, // 0x007FFFFF min positive value
75                            -1.1754942E-38f // 0x807FFFFF min negative value
76                           };
77     uint16_t expectedResult[] = { 0x7F00,
78                                   0x0000,
79                                   0x0000,
80                                   0x7F01,
81                                   0x0001,
82                                   0x0001,
83                                   0x0000,
84                                   0xFF00,
85                                   0xFE02,
86                                   0x0002,
87                                   0xC002,
88                                   0x8081,
89                                   0x0681,
90                                   0x4080,
91                                   0x7F80,
92                                   0xFF80,
93                                   0x0080,
94                                   0x8080
95                                  };
96     size_t numFloats = sizeof(floatArray) / sizeof(floatArray[0]);
97 
98     std::vector<armnn::BFloat16> convertedBuffer(numFloats);
99 
100     armnnUtils::FloatingPointConverter::ConvertFloat32ToBFloat16(floatArray, numFloats, convertedBuffer.data());
101 
102     for (size_t i = 0; i < numFloats; i++)
103     {
104         armnn::BFloat16 actual = convertedBuffer[i];
105         BOOST_CHECK_EQUAL(expectedResult[i], actual.Val());
106     }
107 }
108 
BOOST_AUTO_TEST_CASE(TestConvertBFloat16ToFloat32)109 BOOST_AUTO_TEST_CASE(TestConvertBFloat16ToFloat32)
110 {
111     uint16_t bf16Array[] = { 16256, 16320, 38699, 16384, 49156, 32639 };
112     size_t numFloats = sizeof(bf16Array) / sizeof(bf16Array[0]);
113     float expectedResult[] = { 1.0f, 1.5f, -5.525308E-25f, 2.0f, -2.0625f, 3.3895314E38f };
114     std::vector<float> convertedBuffer(numFloats, 0.0f);
115 
116     armnnUtils::FloatingPointConverter::ConvertBFloat16ToFloat32(bf16Array, numFloats, convertedBuffer.data());
117 
118     for (size_t i = 0; i < numFloats; i++)
119     {
120         float actual = convertedBuffer[i];
121         BOOST_CHECK_EQUAL(expectedResult[i], actual);
122     }
123 }
124 
125 BOOST_AUTO_TEST_SUITE_END()
126