• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2016-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef __UTILS_UTILS_H__
25 #define __UTILS_UTILS_H__
26 
27 /** @dir .
28  *  brief Boiler plate code used by examples. Various utilities to print types, load / store assets, etc.
29  */
30 
31 #include "arm_compute/core/Helpers.h"
32 #include "arm_compute/core/ITensor.h"
33 #include "arm_compute/core/Types.h"
34 #include "arm_compute/core/Window.h"
35 #include "arm_compute/runtime/Tensor.h"
36 #pragma GCC diagnostic push
37 #pragma GCC diagnostic ignored "-Wunused-parameter"
38 #pragma GCC diagnostic ignored "-Wstrict-overflow"
39 #include "libnpy/npy.hpp"
40 #pragma GCC diagnostic pop
41 #include "support/MemorySupport.h"
42 #include "support/StringSupport.h"
43 
44 #ifdef ARM_COMPUTE_CL
45 #include "arm_compute/core/CL/OpenCL.h"
46 #include "arm_compute/runtime/CL/CLDistribution1D.h"
47 #include "arm_compute/runtime/CL/CLTensor.h"
48 #endif /* ARM_COMPUTE_CL */
49 #ifdef ARM_COMPUTE_GC
50 #include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
51 #endif /* ARM_COMPUTE_GC */
52 
53 #include <cstdlib>
54 #include <cstring>
55 #include <fstream>
56 #include <iostream>
57 #include <random>
58 #include <string>
59 #include <tuple>
60 #include <vector>
61 
62 namespace arm_compute
63 {
64 namespace utils
65 {
66 /** Supported image types */
67 enum class ImageType
68 {
69     UNKNOWN,
70     PPM,
71     JPEG
72 };
73 
74 /** Abstract Example class.
75  *
76  * All examples have to inherit from this class.
77  */
78 class Example
79 {
80 public:
81     /** Setup the example.
82      *
83      * @param[in] argc Argument count.
84      * @param[in] argv Argument values.
85      *
86      * @return True in case of no errors in setup else false
87      */
do_setup(int argc,char ** argv)88     virtual bool do_setup(int argc, char **argv)
89     {
90         ARM_COMPUTE_UNUSED(argc, argv);
91         return true;
92     };
93     /** Run the example. */
do_run()94     virtual void do_run() {};
95     /** Teardown the example. */
do_teardown()96     virtual void do_teardown() {};
97 
98     /** Default destructor. */
99     virtual ~Example() = default;
100 };
101 
102 /** Run an example and handle the potential exceptions it throws
103  *
104  * @param[in] argc    Number of command line arguments
105  * @param[in] argv    Command line arguments
106  * @param[in] example Example to run
107  */
108 int run_example(int argc, char **argv, std::unique_ptr<Example> example);
109 
110 template <typename T>
run_example(int argc,char ** argv)111 int run_example(int argc, char **argv)
112 {
113     return run_example(argc, argv, support::cpp14::make_unique<T>());
114 }
115 
116 /** Draw a RGB rectangular window for the detected object
117  *
118  * @param[in, out] tensor Input tensor where the rectangle will be drawn on. Format supported: RGB888
119  * @param[in]      rect   Geometry of the rectangular window
120  * @param[in]      r      Red colour to use
121  * @param[in]      g      Green colour to use
122  * @param[in]      b      Blue colour to use
123  */
124 void draw_detection_rectangle(arm_compute::ITensor *tensor, const arm_compute::DetectionWindow &rect, uint8_t r, uint8_t g, uint8_t b);
125 
126 /** Gets image type given a file
127  *
128  * @param[in] filename File to identify its image type
129  *
130  * @return Image type
131  */
132 ImageType get_image_type_from_file(const std::string &filename);
133 
134 /** Parse the ppm header from an input file stream. At the end of the execution,
135  *  the file position pointer will be located at the first pixel stored in the ppm file
136  *
137  * @param[in] fs Input file stream to parse
138  *
139  * @return The width, height and max value stored in the header of the PPM file
140  */
141 std::tuple<unsigned int, unsigned int, int> parse_ppm_header(std::ifstream &fs);
142 
143 /** Parse the npy header from an input file stream. At the end of the execution,
144  *  the file position pointer will be located at the first pixel stored in the npy file //TODO
145  *
146  * @param[in] fs Input file stream to parse
147  *
148  * @return The width and height stored in the header of the NPY file
149  */
150 std::tuple<std::vector<unsigned long>, bool, std::string> parse_npy_header(std::ifstream &fs);
151 
152 /** Obtain numpy type string from DataType.
153  *
154  * @param[in] data_type Data type.
155  *
156  * @return numpy type string.
157  */
get_typestring(DataType data_type)158 inline std::string get_typestring(DataType data_type)
159 {
160     // Check endianness
161     const unsigned int i = 1;
162     const char        *c = reinterpret_cast<const char *>(&i);
163     std::string        endianness;
164     if(*c == 1)
165     {
166         endianness = std::string("<");
167     }
168     else
169     {
170         endianness = std::string(">");
171     }
172     const std::string no_endianness("|");
173 
174     switch(data_type)
175     {
176         case DataType::U8:
177         case DataType::QASYMM8:
178             return no_endianness + "u" + support::cpp11::to_string(sizeof(uint8_t));
179         case DataType::S8:
180         case DataType::QSYMM8:
181         case DataType::QSYMM8_PER_CHANNEL:
182             return no_endianness + "i" + support::cpp11::to_string(sizeof(int8_t));
183         case DataType::U16:
184         case DataType::QASYMM16:
185             return endianness + "u" + support::cpp11::to_string(sizeof(uint16_t));
186         case DataType::S16:
187         case DataType::QSYMM16:
188             return endianness + "i" + support::cpp11::to_string(sizeof(int16_t));
189         case DataType::U32:
190             return endianness + "u" + support::cpp11::to_string(sizeof(uint32_t));
191         case DataType::S32:
192             return endianness + "i" + support::cpp11::to_string(sizeof(int32_t));
193         case DataType::U64:
194             return endianness + "u" + support::cpp11::to_string(sizeof(uint64_t));
195         case DataType::S64:
196             return endianness + "i" + support::cpp11::to_string(sizeof(int64_t));
197         case DataType::F16:
198             return endianness + "f" + support::cpp11::to_string(sizeof(half));
199         case DataType::F32:
200             return endianness + "f" + support::cpp11::to_string(sizeof(float));
201         case DataType::F64:
202             return endianness + "f" + support::cpp11::to_string(sizeof(double));
203         case DataType::SIZET:
204             return endianness + "u" + support::cpp11::to_string(sizeof(size_t));
205         default:
206             ARM_COMPUTE_ERROR("Data type not supported");
207     }
208 }
209 
210 /** Maps a tensor if needed
211  *
212  * @param[in] tensor   Tensor to be mapped
213  * @param[in] blocking Specified if map is blocking or not
214  */
215 template <typename T>
map(T & tensor,bool blocking)216 inline void map(T &tensor, bool blocking)
217 {
218     ARM_COMPUTE_UNUSED(tensor);
219     ARM_COMPUTE_UNUSED(blocking);
220 }
221 
222 /** Unmaps a tensor if needed
223  *
224  * @param tensor  Tensor to be unmapped
225  */
226 template <typename T>
unmap(T & tensor)227 inline void unmap(T &tensor)
228 {
229     ARM_COMPUTE_UNUSED(tensor);
230 }
231 
232 #ifdef ARM_COMPUTE_CL
233 /** Maps a tensor if needed
234  *
235  * @param[in] tensor   Tensor to be mapped
236  * @param[in] blocking Specified if map is blocking or not
237  */
map(CLTensor & tensor,bool blocking)238 inline void map(CLTensor &tensor, bool blocking)
239 {
240     tensor.map(blocking);
241 }
242 
243 /** Unmaps a tensor if needed
244  *
245  * @param tensor  Tensor to be unmapped
246  */
unmap(CLTensor & tensor)247 inline void unmap(CLTensor &tensor)
248 {
249     tensor.unmap();
250 }
251 
252 /** Maps a distribution if needed
253  *
254  * @param[in] distribution Distribution to be mapped
255  * @param[in] blocking     Specified if map is blocking or not
256  */
map(CLDistribution1D & distribution,bool blocking)257 inline void map(CLDistribution1D &distribution, bool blocking)
258 {
259     distribution.map(blocking);
260 }
261 
262 /** Unmaps a distribution if needed
263  *
264  * @param distribution  Distribution to be unmapped
265  */
unmap(CLDistribution1D & distribution)266 inline void unmap(CLDistribution1D &distribution)
267 {
268     distribution.unmap();
269 }
270 #endif /* ARM_COMPUTE_CL */
271 
272 #ifdef ARM_COMPUTE_GC
273 /** Maps a tensor if needed
274  *
275  * @param[in] tensor   Tensor to be mapped
276  * @param[in] blocking Specified if map is blocking or not
277  */
map(GCTensor & tensor,bool blocking)278 inline void map(GCTensor &tensor, bool blocking)
279 {
280     tensor.map(blocking);
281 }
282 
283 /** Unmaps a tensor if needed
284  *
285  * @param tensor  Tensor to be unmapped
286  */
unmap(GCTensor & tensor)287 inline void unmap(GCTensor &tensor)
288 {
289     tensor.unmap();
290 }
291 #endif /* ARM_COMPUTE_GC */
292 
293 /** Specialized class to generate random non-zero FP16 values.
294  *  uniform_real_distribution<half> generates values that get rounded off to zero, causing
295  *  differences between ACL and reference implementation
296 */
297 class uniform_real_distribution_fp16
298 {
299     half                                   min{ 0.0f }, max{ 0.0f };
300     std::uniform_real_distribution<float>  neg{ min, -0.3f };
301     std::uniform_real_distribution<float>  pos{ 0.3f, max };
302     std::uniform_int_distribution<uint8_t> sign_picker{ 0, 1 };
303 
304 public:
305     using result_type = half;
306     /** Constructor
307      *
308      * @param[in] a Minimum value of the distribution
309      * @param[in] b Maximum value of the distribution
310      */
311     explicit uniform_real_distribution_fp16(half a = half(0.0), half b = half(1.0))
min(a)312         : min(a), max(b)
313     {
314     }
315 
316     /** () operator to generate next value
317      *
318      * @param[in] gen an uniform random bit generator object
319      */
operator()320     half operator()(std::mt19937 &gen)
321     {
322         if(sign_picker(gen))
323         {
324             return (half)neg(gen);
325         }
326         return (half)pos(gen);
327     }
328 };
329 
330 /** Numpy data loader */
331 class NPYLoader
332 {
333 public:
334     /** Default constructor */
NPYLoader()335     NPYLoader()
336         : _fs(), _shape(), _fortran_order(false), _typestring(), _file_layout(DataLayout::NCHW)
337     {
338     }
339 
340     /** Open a NPY file and reads its metadata
341      *
342      * @param[in] npy_filename File to open
343      * @param[in] file_layout  (Optional) Layout in which the weights are stored in the file.
344      */
345     void open(const std::string &npy_filename, DataLayout file_layout = DataLayout::NCHW)
346     {
347         ARM_COMPUTE_ERROR_ON(is_open());
348         try
349         {
350             _fs.open(npy_filename, std::ios::in | std::ios::binary);
351             ARM_COMPUTE_EXIT_ON_MSG_VAR(!_fs.good(), "Failed to load binary data from %s", npy_filename.c_str());
352             _fs.exceptions(std::ifstream::failbit | std::ifstream::badbit);
353             _file_layout = file_layout;
354 
355             std::tie(_shape, _fortran_order, _typestring) = parse_npy_header(_fs);
356         }
catch(const std::ifstream::failure & e)357         catch(const std::ifstream::failure &e)
358         {
359             ARM_COMPUTE_ERROR_VAR("Accessing %s: %s", npy_filename.c_str(), e.what());
360         }
361     }
362     /** Return true if a NPY file is currently open */
is_open()363     bool is_open()
364     {
365         return _fs.is_open();
366     }
367 
368     /** Return true if a NPY file is in fortran order */
is_fortran()369     bool is_fortran()
370     {
371         return _fortran_order;
372     }
373 
374     /** Initialise the tensor's metadata with the dimensions of the NPY file currently open
375      *
376      * @param[out] tensor Tensor to initialise
377      * @param[in]  dt     Data type to use for the tensor
378      */
379     template <typename T>
init_tensor(T & tensor,arm_compute::DataType dt)380     void init_tensor(T &tensor, arm_compute::DataType dt)
381     {
382         ARM_COMPUTE_ERROR_ON(!is_open());
383         ARM_COMPUTE_ERROR_ON(dt != arm_compute::DataType::F32);
384 
385         // Use the size of the input NPY tensor
386         TensorShape shape;
387         shape.set_num_dimensions(_shape.size());
388         for(size_t i = 0; i < _shape.size(); ++i)
389         {
390             size_t src = i;
391             if(_fortran_order)
392             {
393                 src = _shape.size() - 1 - i;
394             }
395             shape.set(i, _shape.at(src));
396         }
397 
398         arm_compute::TensorInfo tensor_info(shape, 1, dt);
399         tensor.allocator()->init(tensor_info);
400     }
401 
402     /** Fill a tensor with the content of the currently open NPY file.
403      *
404      * @note If the tensor is a CLTensor, the function maps and unmaps the tensor
405      *
406      * @param[in,out] tensor Tensor to fill (Must be allocated, and of matching dimensions with the opened NPY).
407      */
408     template <typename T>
fill_tensor(T & tensor)409     void fill_tensor(T &tensor)
410     {
411         ARM_COMPUTE_ERROR_ON(!is_open());
412         ARM_COMPUTE_ERROR_ON_DATA_TYPE_NOT_IN(&tensor, arm_compute::DataType::QASYMM8, arm_compute::DataType::S32, arm_compute::DataType::F32, arm_compute::DataType::F16);
413         try
414         {
415             // Map buffer if creating a CLTensor
416             map(tensor, true);
417 
418             // Check if the file is large enough to fill the tensor
419             const size_t current_position = _fs.tellg();
420             _fs.seekg(0, std::ios_base::end);
421             const size_t end_position = _fs.tellg();
422             _fs.seekg(current_position, std::ios_base::beg);
423 
424             ARM_COMPUTE_ERROR_ON_MSG((end_position - current_position) < tensor.info()->tensor_shape().total_size() * tensor.info()->element_size(),
425                                      "Not enough data in file");
426             ARM_COMPUTE_UNUSED(end_position);
427 
428             // Check if the typestring matches the given one
429             std::string expect_typestr = get_typestring(tensor.info()->data_type());
430             ARM_COMPUTE_ERROR_ON_MSG(_typestring != expect_typestr, "Typestrings mismatch");
431 
432             bool are_layouts_different = (_file_layout != tensor.info()->data_layout());
433             // Correct dimensions (Needs to match TensorShape dimension corrections)
434             if(_shape.size() != tensor.info()->tensor_shape().num_dimensions())
435             {
436                 for(int i = static_cast<int>(_shape.size()) - 1; i > 0; --i)
437                 {
438                     if(_shape[i] == 1)
439                     {
440                         _shape.pop_back();
441                     }
442                     else
443                     {
444                         break;
445                     }
446                 }
447             }
448 
449             TensorShape                    permuted_shape = tensor.info()->tensor_shape();
450             arm_compute::PermutationVector perm;
451             if(are_layouts_different && tensor.info()->tensor_shape().num_dimensions() > 2)
452             {
453                 perm                                    = (tensor.info()->data_layout() == arm_compute::DataLayout::NHWC) ? arm_compute::PermutationVector(2U, 0U, 1U) : arm_compute::PermutationVector(1U, 2U, 0U);
454                 arm_compute::PermutationVector perm_vec = (tensor.info()->data_layout() == arm_compute::DataLayout::NCHW) ? arm_compute::PermutationVector(2U, 0U, 1U) : arm_compute::PermutationVector(1U, 2U, 0U);
455 
456                 arm_compute::permute(permuted_shape, perm_vec);
457             }
458 
459             // Validate tensor shape
460             ARM_COMPUTE_ERROR_ON_MSG(_shape.size() != tensor.info()->tensor_shape().num_dimensions(), "Tensor ranks mismatch");
461             for(size_t i = 0; i < _shape.size(); ++i)
462             {
463                 ARM_COMPUTE_ERROR_ON_MSG(permuted_shape[i] != _shape[i], "Tensor dimensions mismatch");
464             }
465 
466             switch(tensor.info()->data_type())
467             {
468                 case arm_compute::DataType::QASYMM8:
469                 case arm_compute::DataType::S32:
470                 case arm_compute::DataType::F32:
471                 case arm_compute::DataType::F16:
472                 {
473                     // Read data
474                     if(!are_layouts_different && !_fortran_order && tensor.info()->padding().empty())
475                     {
476                         // If tensor has no padding read directly from stream.
477                         _fs.read(reinterpret_cast<char *>(tensor.buffer()), tensor.info()->total_size());
478                     }
479                     else
480                     {
481                         // If tensor has padding or is in fortran order accessing tensor elements through execution window.
482                         Window             window;
483                         const unsigned int num_dims = _shape.size();
484                         if(_fortran_order)
485                         {
486                             for(unsigned int dim = 0; dim < num_dims; dim++)
487                             {
488                                 permuted_shape.set(dim, _shape[num_dims - dim - 1]);
489                                 perm.set(dim, num_dims - dim - 1);
490                             }
491                             if(are_layouts_different)
492                             {
493                                 // Permute only if num_dimensions greater than 2
494                                 if(num_dims > 2)
495                                 {
496                                     if(_file_layout == DataLayout::NHWC) // i.e destination is NCHW --> permute(1,2,0)
497                                     {
498                                         arm_compute::permute(perm, arm_compute::PermutationVector(1U, 2U, 0U));
499                                     }
500                                     else
501                                     {
502                                         arm_compute::permute(perm, arm_compute::PermutationVector(2U, 0U, 1U));
503                                     }
504                                 }
505                             }
506                         }
507                         window.use_tensor_dimensions(permuted_shape);
508 
509                         execute_window_loop(window, [&](const Coordinates & id)
510                         {
511                             Coordinates dst(id);
512                             arm_compute::permute(dst, perm);
513                             _fs.read(reinterpret_cast<char *>(tensor.ptr_to_element(dst)), tensor.info()->element_size());
514                         });
515                     }
516 
517                     break;
518                 }
519                 default:
520                     ARM_COMPUTE_ERROR("Unsupported data type");
521             }
522 
523             // Unmap buffer if creating a CLTensor
524             unmap(tensor);
525         }
526         catch(const std::ifstream::failure &e)
527         {
528             ARM_COMPUTE_ERROR_VAR("Loading NPY file: %s", e.what());
529         }
530     }
531 
532 private:
533     std::ifstream              _fs;
534     std::vector<unsigned long> _shape;
535     bool                       _fortran_order;
536     std::string                _typestring;
537     DataLayout                 _file_layout;
538 };
539 
540 /** Template helper function to save a tensor image to a PPM file.
541  *
542  * @note Only U8 and RGB888 formats supported.
543  * @note Only works with 2D tensors.
544  * @note If the input tensor is a CLTensor, the function maps and unmaps the image
545  *
546  * @param[in] tensor       The tensor to save as PPM file
547  * @param[in] ppm_filename Filename of the file to create.
548  */
549 template <typename T>
save_to_ppm(T & tensor,const std::string & ppm_filename)550 void save_to_ppm(T &tensor, const std::string &ppm_filename)
551 {
552     ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(&tensor, arm_compute::Format::RGB888, arm_compute::Format::U8);
553     ARM_COMPUTE_ERROR_ON(tensor.info()->num_dimensions() > 2);
554 
555     std::ofstream fs;
556 
557     try
558     {
559         fs.exceptions(std::ofstream::failbit | std::ofstream::badbit | std::ofstream::eofbit);
560         fs.open(ppm_filename, std::ios::out | std::ios::binary);
561 
562         const unsigned int width  = tensor.info()->tensor_shape()[0];
563         const unsigned int height = tensor.info()->tensor_shape()[1];
564 
565         fs << "P6\n"
566            << width << " " << height << " 255\n";
567 
568         // Map buffer if creating a CLTensor/GCTensor
569         map(tensor, true);
570 
571         switch(tensor.info()->format())
572         {
573             case arm_compute::Format::U8:
574             {
575                 arm_compute::Window window;
576                 window.set(arm_compute::Window::DimX, arm_compute::Window::Dimension(0, width, 1));
577                 window.set(arm_compute::Window::DimY, arm_compute::Window::Dimension(0, height, 1));
578 
579                 arm_compute::Iterator in(&tensor, window);
580 
581                 arm_compute::execute_window_loop(window, [&](const arm_compute::Coordinates &)
582                 {
583                     const unsigned char value = *in.ptr();
584 
585                     fs << value << value << value;
586                 },
587                 in);
588 
589                 break;
590             }
591             case arm_compute::Format::RGB888:
592             {
593                 arm_compute::Window window;
594                 window.set(arm_compute::Window::DimX, arm_compute::Window::Dimension(0, width, width));
595                 window.set(arm_compute::Window::DimY, arm_compute::Window::Dimension(0, height, 1));
596 
597                 arm_compute::Iterator in(&tensor, window);
598 
599                 arm_compute::execute_window_loop(window, [&](const arm_compute::Coordinates &)
600                 {
601                     fs.write(reinterpret_cast<std::fstream::char_type *>(in.ptr()), width * tensor.info()->element_size());
602                 },
603                 in);
604 
605                 break;
606             }
607             default:
608                 ARM_COMPUTE_ERROR("Unsupported format");
609         }
610 
611         // Unmap buffer if creating a CLTensor/GCTensor
612         unmap(tensor);
613     }
614     catch(const std::ofstream::failure &e)
615     {
616         ARM_COMPUTE_ERROR_VAR("Writing %s: (%s)", ppm_filename.c_str(), e.what());
617     }
618 }
619 
620 /** Template helper function to save a tensor image to a NPY file.
621  *
622  * @note Only F32 data type supported.
623  * @note If the input tensor is a CLTensor, the function maps and unmaps the image
624  *
625  * @param[in] tensor        The tensor to save as NPY file
626  * @param[in] npy_filename  Filename of the file to create.
627  * @param[in] fortran_order If true, save matrix in fortran order.
628  */
629 template <typename T, typename U = float>
save_to_npy(T & tensor,const std::string & npy_filename,bool fortran_order)630 void save_to_npy(T &tensor, const std::string &npy_filename, bool fortran_order)
631 {
632     ARM_COMPUTE_ERROR_ON_DATA_TYPE_NOT_IN(&tensor, arm_compute::DataType::F32, arm_compute::DataType::QASYMM8);
633 
634     std::ofstream fs;
635     try
636     {
637         fs.exceptions(std::ofstream::failbit | std::ofstream::badbit | std::ofstream::eofbit);
638         fs.open(npy_filename, std::ios::out | std::ios::binary);
639 
640         std::vector<npy::ndarray_len_t> shape(tensor.info()->num_dimensions());
641 
642         for(unsigned int i = 0, j = tensor.info()->num_dimensions() - 1; i < tensor.info()->num_dimensions(); ++i, --j)
643         {
644             shape[i] = tensor.info()->tensor_shape()[!fortran_order ? j : i];
645         }
646 
647         // Map buffer if creating a CLTensor
648         map(tensor, true);
649 
650         using typestring_type = typename std::conditional<std::is_floating_point<U>::value, float, qasymm8_t>::type;
651 
652         std::vector<typestring_type> tmp; /* Used only to get the typestring */
653         npy::Typestring              typestring_o{ tmp };
654         std::string                  typestring = typestring_o.str();
655 
656         std::ofstream stream(npy_filename, std::ofstream::binary);
657         npy::write_header(stream, typestring, fortran_order, shape);
658 
659         arm_compute::Window window;
660         window.use_tensor_dimensions(tensor.info()->tensor_shape());
661 
662         arm_compute::Iterator in(&tensor, window);
663 
664         arm_compute::execute_window_loop(window, [&](const arm_compute::Coordinates &)
665         {
666             stream.write(reinterpret_cast<const char *>(in.ptr()), sizeof(typestring_type));
667         },
668         in);
669 
670         // Unmap buffer if creating a CLTensor
671         unmap(tensor);
672     }
673     catch(const std::ofstream::failure &e)
674     {
675         ARM_COMPUTE_ERROR_VAR("Writing %s: (%s)", npy_filename.c_str(), e.what());
676     }
677 }
678 
679 /** Load the tensor with pre-trained data from a binary file
680  *
681  * @param[in] tensor   The tensor to be filled. Data type supported: F32.
682  * @param[in] filename Filename of the binary file to load from.
683  */
684 template <typename T>
load_trained_data(T & tensor,const std::string & filename)685 void load_trained_data(T &tensor, const std::string &filename)
686 {
687     ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&tensor, 1, DataType::F32);
688 
689     std::ifstream fs;
690 
691     try
692     {
693         fs.exceptions(std::ofstream::failbit | std::ofstream::badbit | std::ofstream::eofbit);
694         // Open file
695         fs.open(filename, std::ios::in | std::ios::binary);
696 
697         if(!fs.good())
698         {
699             throw std::runtime_error("Could not load binary data: " + filename);
700         }
701 
702         // Map buffer if creating a CLTensor/GCTensor
703         map(tensor, true);
704 
705         Window window;
706 
707         window.set(arm_compute::Window::DimX, arm_compute::Window::Dimension(0, 1, 1));
708 
709         for(unsigned int d = 1; d < tensor.info()->num_dimensions(); ++d)
710         {
711             window.set(d, Window::Dimension(0, tensor.info()->tensor_shape()[d], 1));
712         }
713 
714         arm_compute::Iterator in(&tensor, window);
715 
716         execute_window_loop(window, [&](const Coordinates &)
717         {
718             fs.read(reinterpret_cast<std::fstream::char_type *>(in.ptr()), tensor.info()->tensor_shape()[0] * tensor.info()->element_size());
719         },
720         in);
721 
722         // Unmap buffer if creating a CLTensor/GCTensor
723         unmap(tensor);
724     }
725     catch(const std::ofstream::failure &e)
726     {
727         ARM_COMPUTE_ERROR_VAR("Writing %s: (%s)", filename.c_str(), e.what());
728     }
729 }
730 
731 template <typename T>
fill_random_tensor(T & tensor,float lower_bound,float upper_bound)732 void fill_random_tensor(T &tensor, float lower_bound, float upper_bound)
733 {
734     std::random_device rd;
735     std::mt19937       gen(rd());
736 
737     Window window;
738     window.use_tensor_dimensions(tensor.info()->tensor_shape());
739 
740     map(tensor, true);
741 
742     Iterator it(&tensor, window);
743 
744     switch(tensor.info()->data_type())
745     {
746         case arm_compute::DataType::F16:
747         {
748             std::uniform_real_distribution<float> dist(lower_bound, upper_bound);
749 
750             execute_window_loop(window, [&](const Coordinates &)
751             {
752                 *reinterpret_cast<half *>(it.ptr()) = (half)dist(gen);
753             },
754             it);
755 
756             break;
757         }
758         case arm_compute::DataType::F32:
759         {
760             std::uniform_real_distribution<float> dist(lower_bound, upper_bound);
761 
762             execute_window_loop(window, [&](const Coordinates &)
763             {
764                 *reinterpret_cast<float *>(it.ptr()) = dist(gen);
765             },
766             it);
767 
768             break;
769         }
770         default:
771         {
772             ARM_COMPUTE_ERROR("Unsupported format");
773         }
774     }
775 
776     unmap(tensor);
777 }
778 
779 template <typename T>
init_sgemm_output(T & dst,T & src0,T & src1,arm_compute::DataType dt)780 void init_sgemm_output(T &dst, T &src0, T &src1, arm_compute::DataType dt)
781 {
782     dst.allocator()->init(TensorInfo(TensorShape(src1.info()->dimension(0), src0.info()->dimension(1), src0.info()->dimension(2)), 1, dt));
783 }
784 /** This function returns the amount of memory free reading from /proc/meminfo
785  *
786  * @return The free memory in kB
787  */
788 uint64_t get_mem_free_from_meminfo();
789 
790 /** Compare two tensors
791  *
792  * @param[in] tensor1   First tensor to be compared.
793  * @param[in] tensor2   Second tensor to be compared.
794  * @param[in] tolerance Tolerance used for the comparison.
795  *
796  * @return The number of mismatches
797  */
798 template <typename T>
compare_tensor(ITensor & tensor1,ITensor & tensor2,T tolerance)799 int compare_tensor(ITensor &tensor1, ITensor &tensor2, T tolerance)
800 {
801     ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(&tensor1, &tensor2);
802     ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(&tensor1, &tensor2);
803 
804     int    num_mismatches = 0;
805     Window window;
806     window.use_tensor_dimensions(tensor1.info()->tensor_shape());
807 
808     map(tensor1, true);
809     map(tensor2, true);
810 
811     Iterator itensor1(&tensor1, window);
812     Iterator itensor2(&tensor2, window);
813 
814     execute_window_loop(window, [&](const Coordinates &)
815     {
816         if(std::abs(*reinterpret_cast<T *>(itensor1.ptr()) - *reinterpret_cast<T *>(itensor2.ptr())) > tolerance)
817         {
818             ++num_mismatches;
819         }
820     },
821     itensor1, itensor2);
822 
823     unmap(itensor1);
824     unmap(itensor2);
825 
826     return num_mismatches;
827 }
828 } // namespace utils
829 } // namespace arm_compute
830 #endif /* __UTILS_UTILS_H__*/
831