• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2023 Huawei Technologies Co., Ltd
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ============================================================================
15"""
16functions for cross framework model infer result accuracy compare and summary
17"""
18import os
19import os.path
20import stat
21import csv
22
23import numpy as np
24
25from mslite_bench.infer_base.infer_session_factory import InferSessionFactory
26from mslite_bench.utils.infer_log import InferLogger
27from mslite_bench.common.task_common_func import CommonFunc
28from mslite_bench.common.enum_class import (
29    NumpyDtype
30)
31from mslite_bench.common.model_info_enum import ErrorAlgType
32
33_logger = InferLogger().logger
34
35
36class CrossFrameworkAccSummary:
37    """
38    functions for cross framework model infer result accuracy compare and summary
39    """
40    @classmethod
41    def acc_infos_between_features(cls,
42                                   standard_feature,
43                                   compare_feature):
44        """
45        get accuracy info between features, including mean error ratio
46        and cosine similarity.
47        params:
48        standard_feature(Dict[str, nunmpy.ndarray): standard features to be compared
49        compare_feature(Dict[str, nunmpy.ndarray): compare features to compare
50        return:
51        A dict, including mean_error_ratio and cosine similarity.
52        """
53
54        mean_relative_error = cls.get_mean_relative_error_between_features(compare_feature,
55                                                                           standard_feature)
56
57        cosine_similarity = cls.get_cosine_distance_between_features(compare_feature,
58                                                                     standard_feature)
59        return {
60            ErrorAlgType.MEAN_RELATIVE_ERROR.value: mean_relative_error,
61            ErrorAlgType.COSINE_SIMILARITY.value: cosine_similarity
62        }
63
64    @classmethod
65    def accuracy_compare_func(cls,
66                              args,
67                              logger=None):
68        """
69        get outputs accuracy compare info between two different framework using same model.
70        params:
71        args: input arguments
72        logger: logger to recorder logs
73        return:
74        A dict, including mean_error_ratio and cosine similarity.
75        """
76        cmp_result = None
77
78        src_file_path = args.model_file
79        if not src_file_path.endswith('ms') and \
80                not src_file_path.endswith('mindir'):
81            raise ValueError(f'{src_file_path} is not a valid mslite model')
82
83        dst_file_path = args.cmp_model_file
84
85        input_data_map = CommonFunc.create_numpy_data_map(args)
86        ms_config = CommonFunc.get_framework_config(src_file_path,
87                                                    args)
88        try:
89            ms_session = InferSessionFactory.create_infer_session(src_file_path,
90                                                                  ms_config)
91        except ValueError as e:
92            logger.error('[Accuracy Compare] Create ms session failed: %s', e)
93            return cmp_result
94
95        args.device = args.cmp_device
96        cmp_cfg = CommonFunc.get_framework_config(dst_file_path,
97                                                  args)
98        try:
99            cmp_session = InferSessionFactory.create_infer_session(dst_file_path,
100                                                                   cmp_cfg,
101                                                                   args.params_file)
102        except ValueError as e:
103            logger.error(f'Create dst session failed %s', e)
104            return cmp_result
105
106        try:
107            cmp_result = cls.real_accuracy_compare(ms_session,
108                                                   cmp_session,
109                                                   input_data_map)
110        except (NotImplementedError, ValueError) as e:
111            logger.error(f'Accuracy test failed, get accuracy failed %s', e)
112            raise
113        cmp_result = cls.is_acc_ok(cmp_result)
114        for key, val in cmp_result.items():
115            logger.debug(f'{key}: {val}')
116
117        if not args.cmp_result_file:
118            csv_path = os.path.join(os.path.dirname(src_file_path), 'accuracy_infos.csv')
119        else:
120            csv_path = f'{args.cmp_result_file}.csv'
121            csv_dir = os.path.dirname(csv_path)
122            os.makedirs(csv_dir, exist_ok=True)
123        logger.info(f'Accuracy compare done, save accuracy info in %s', csv_path)
124        cls.write_csv(cmp_result, csv_path)
125        return cmp_result
126
127    @classmethod
128    def real_accuracy_compare(cls,
129                              src_session,
130                              dst_session,
131                              input_tensor_map):
132        """
133        get accuracy compare info between two different sessions with same input tensor map.
134        params:
135        src_session: session to be compared
136        dst_session: session to compare
137        input_tensor_map: tensor name and value dict for session input.
138        return:
139        A dict, including mean_error_ratio and cosine similarity.
140        """
141        src_output = src_session(input_tensor_map)
142        dst_output = dst_session(input_tensor_map)
143
144        result = {
145            ErrorAlgType.MEAN_RELATIVE_ERROR.value: cls.get_mean_relative_error_between_features(dst_output,
146                                                                                                 src_output),
147            ErrorAlgType.COSINE_SIMILARITY.value: cls.get_cosine_distance_between_features(dst_output,
148                                                                                           src_output)
149        }
150        return result
151
152    @classmethod
153    def specific_accuracy_compare(cls,
154                                  src_session,
155                                  dst_session,
156                                  args):
157        """
158        get accuracy compare info between two different sessions with
159        specific input loading from files.
160        params:
161        src_session: session to be compared
162        dst_session: session to compare
163        args: input arguments.
164        return:
165        A dict, including mean_error_ratio and cosine similarity.
166        """
167        input_tensor_map = np.load(args.input_data_file,
168                                   allow_pickle=True).item()
169        result = cls.real_accuracy_compare(src_session,
170                                           dst_session,
171                                           input_tensor_map)
172        return result
173
174    @classmethod
175    def random_accuracy_compare(cls,
176                                src_session,
177                                dst_session,
178                                args):
179        """
180        get accuracy compare info between two different sessions with random inputs.
181        params:
182        src_session: session to be compared
183        dst_session: session to compare
184        args: input arguments.
185        return:
186        A dict, including mean_error_ratio and cosine similarity.
187        """
188        input_tensor_dtypes = CommonFunc.parse_dtype_infos(args.input_tensor_dtypes)
189        input_tensor_shapes = CommonFunc.get_tensor_shapes(args.input_tensor_shapes)
190        input_tensor_infos = {
191            key: (shape, input_tensor_dtypes.get(key))
192            for key, shape in input_tensor_shapes.items()
193        }
194        try:
195            input_tensor_map = CommonFunc.create_numpy_data_map(input_tensor_infos)
196        except ValueError as e:
197            _logger.error('Random accuracy compare failed: %s', e)
198            raise
199        result = cls.real_accuracy_compare(src_session,
200                                           dst_session,
201                                           input_tensor_map)
202
203        return result
204
205    @classmethod
206    def get_cosine_distance_between_features(cls,
207                                             calibrate_feature,
208                                             cmp_feature):
209        """
210        calculate cosine distance between features.
211        params:
212        calibrate_feature: feature to be calibrated.
213        cmp_feature: feature to compare.
214        return:
215        cosine similarity values between features.
216        """
217        cosine_similarity = {}
218        for key, dst_feature in calibrate_feature.items():
219            src_feature = cmp_feature.get(key)
220            abs_eps = cls.absolute_tolerance()
221            dst_sum = np.sum(dst_feature * dst_feature)
222            src_sum = np.sum(src_feature * src_feature)
223            dot_sum = np.sum(dst_feature * src_feature)
224
225            if dst_sum < abs_eps and src_sum < abs_eps:
226                value = 1.0
227            elif dst_sum * src_sum < abs_eps:
228                if dst_sum < abs_eps or src_sum < abs_eps:
229                    value = 1.0
230                else:
231                    value = 0.0
232            else:
233                value = dot_sum / (np.sqrt(dst_sum) * np.sqrt(src_sum) + abs_eps)
234
235            cosine_similarity[key] = cls.error_format(value)
236
237        return cosine_similarity
238
239    @classmethod
240    def get_mean_relative_error_between_features(cls,
241                                                 dst_feature,
242                                                 src_feature):
243        """
244        calculate mean relative error between features.
245        params:
246        dst_feature: feature to be calibrated.
247        src_feature: feature to compare.
248        return:
249        mean relative error values between features.
250        """
251        mean_relative_error_info = {}
252        np.seterr(divide='ignore', invalid='ignore')
253
254        for key in dst_feature.keys():
255            feat_a = dst_feature.get(key, None)
256            feat_b = src_feature.get(key, None)
257            if feat_b is None:
258                raise ValueError(f'Model Inference feature '
259                                 f'is not consistent in tensor: {key}')
260            if feat_a.size == 0:
261                mean_relative_error_info[key] = '0.0'
262                continue
263            if feat_a.dtype != feat_b.dtype:
264                _logger.warning('layer %s : different dtypes between onnx out: %s '
265                                'with mslite out: %s ',
266                                key,
267                                feat_a.dtype,
268                                feat_b.dtype)
269                mean_relative_error_info[key] = '0.0'
270                continue
271            diff = np.abs(feat_b - feat_a)
272            abs_feat_a = np.abs(feat_a)
273            relative_index = diff > cls.relative_tolerance()
274            if relative_index.size == 0:
275                mean_relative_error_info[key] = '0.0'
276                continue
277            diff = diff[relative_index]
278            abs_feat_a = abs_feat_a[relative_index]
279            abs_index = abs_feat_a > cls.absolute_tolerance()
280            if abs_index.size == 0:
281                mean_relative_error_info[key] = cls.error_format(np.average(diff))
282                continue
283            abs_feat_a = abs_feat_a[abs_index]
284            relative_diff = diff[abs_index]
285            abs_diff = diff[~abs_index]
286            relative_error = np.divide(relative_diff, abs_feat_a)
287            mean_relative_error_info[key] = (np.sum(relative_error) + np.sum(abs_diff)) \
288                                            / (relative_error.size + abs_diff.size)
289            if np.isnan(mean_relative_error_info.get(key, None)):
290                _logger.warning('layer: %s has nan value, '
291                                '%s do not work',
292                                key,
293                                ErrorAlgType.MEAN_RELATIVE_ERROR.value)
294            mean_relative_error_info[key] = cls.error_format(mean_relative_error_info.get(key, None))
295
296        return mean_relative_error_info
297
298    @classmethod
299    def get_mean_error_between_features(cls,
300                                        dst_feature,
301                                        src_feature):
302        """
303        calculate mean error between features.
304        params:
305        dst_feature: feature to be calibrated.
306        src_feature: feature to compare.
307        return:
308        mean error values between features.
309        """
310        absolute_tolerance = cls.absolute_tolerance()
311        relative_tolerance = cls.relative_tolerance()
312        mean_error_info = {}
313
314        for key in dst_feature.keys():
315            feat_a = dst_feature.get(key, None)
316            feat_b = src_feature.get(key, None)
317            if feat_b is None:
318                raise ValueError(f'Model Inference feature '
319                                 f'is not consistent in tensor: {key}')
320            diff = abs(feat_a - feat_b)
321            gt_tolerance_index = diff > (absolute_tolerance + relative_tolerance * abs(feat_a))
322            lt_tolerance_index = np.logical_and(gt_tolerance_index, abs(feat_a) > absolute_tolerance)
323            gt_tolerance_index = np.logical_and(gt_tolerance_index, abs(feat_a) < absolute_tolerance)
324            gt_tolerance_index = np.logical_and(gt_tolerance_index, diff > relative_tolerance)
325            gt_error = diff[gt_tolerance_index]
326            lt_error = diff / (abs(feat_a) + absolute_tolerance)
327            lt_error = lt_error[lt_tolerance_index]
328            if gt_error.size + lt_error.size == 0:
329                mean_error = 0.0
330            else:
331                mean_error = (np.sum(gt_error) + np.sum(lt_error)) / \
332                             (gt_error.size + lt_error.size + 1 + cls.absolute_tolerance())
333            mean_error_info[key] = cls.error_format(mean_error)
334        return mean_error_info
335
336    @staticmethod
337    def check_np_dtype_with_model_input_dtype(tensor_map,
338                                              session):
339        """
340        check input numpy data dtype with model input dtype
341        params:
342        tensor_map: a dict with key tensor name and value numpy data.
343        session: model infer session
344        return:
345        a dict with key tensor name and value revised numpy data.
346        """
347        ret_map = tensor_map
348        input_tensor_infos = session.input_infos
349        dtype_class = session.dtype_class
350
351        for key, np_data in tensor_map.items():
352            np_dtype = np_data.dtype
353            np_dtype_name = NumpyDtype(np_dtype).name
354            session_dtype = input_tensor_infos.get(key, None)
355
356            if session_dtype is None:
357                raise ValueError('Input tensor name is not consistent with model inputs')
358            session_dtype = session_dtype[1]
359
360            session_dtype_name = dtype_class(session_dtype).name
361            if session_dtype_name != np_dtype_name:
362                _logger.warning('input tensor %s input dtype %s '
363                                'is not consistent with model dtype(%s)',
364                                key,
365                                np_dtype_name,
366                                session_dtype_name)
367                new_data = np_data.astype(getattr(NumpyDtype, session_dtype_name).value)
368                ret_map[key] = new_data
369
370        return ret_map
371
372    @staticmethod
373    def absolute_tolerance():
374        """for const absolute tolerance"""
375        return 1e-4
376
377    @staticmethod
378    def relative_tolerance():
379        """for const relative tolerance"""
380        return 1e-4
381
382    @staticmethod
383    def error_format(error):
384        return f'{error * 100:.4f}%'
385
386    @staticmethod
387    def write_csv(contents, csv_file):
388        """write csv"""
389        contents_to_write = []
390        error_names = []
391        error_infos = []
392        for key, value in contents.items():
393            error_names.append(key)
394            error_infos.append(value)
395
396        for layer_name in list(error_infos[0].keys()):
397            tmp_dict = {'layer_name': layer_name}
398            for error_name in error_names:
399                tmp_dict[error_name] = contents.get(error_name).get(layer_name)
400            contents_to_write.append(tmp_dict)
401
402        fieldnames = ['layer_name'] + error_names
403        flags = os.O_WRONLY
404        mode = stat.S_IWUSR | stat.S_IRUSR
405        with os.fdopen(os.open(csv_file, flags, mode), 'w') as f:
406            writer = csv.DictWriter(f, fieldnames=fieldnames)
407            writer.writeheader()
408            writer.writerows(contents_to_write)
409
410    @staticmethod
411    def is_acc_ok(acc_info):
412        """add is ok check for accuracy result"""
413        mre = acc_info.get(ErrorAlgType.MEAN_RELATIVE_ERROR.value, None)
414        cos = acc_info.get(ErrorAlgType.COSINE_SIMILARITY.value, None)
415        is_ok = {}
416        if mre is None or cos is None:
417            raise ValueError('MRE or cosine similarity is None')
418        mre_thred = 0.05
419        cos_thred = 0.99
420        cos_bad_thred = 0.9
421        def error_format_to_float(num):
422            return float(num.strip('%')) / 100
423
424        nan_set = {'nan', 'nan%'}
425        for key, mre_val in mre.items():
426            cos_val = cos.get(key, None)
427            if mre_val in nan_set or cos_val in nan_set:
428                is_ok[key] = 'Invalid'
429            elif error_format_to_float(cos_val) < cos_bad_thred:
430                is_ok[key] = 'Bad'
431            elif error_format_to_float(mre_val) > mre_thred \
432                    and error_format_to_float(cos_val) < cos_thred:
433                is_ok[key] = 'Bad'
434            else:
435                is_ok[key] = 'Good'
436
437        acc_info['is_ok'] = is_ok
438        return acc_info
439