1# Copyright 2023 Huawei Technologies Co., Ltd 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================ 15""" 16functions for cross framework model infer result accuracy compare and summary 17""" 18import os 19import os.path 20import stat 21import csv 22 23import numpy as np 24 25from mslite_bench.infer_base.infer_session_factory import InferSessionFactory 26from mslite_bench.utils.infer_log import InferLogger 27from mslite_bench.common.task_common_func import CommonFunc 28from mslite_bench.common.enum_class import ( 29 NumpyDtype 30) 31from mslite_bench.common.model_info_enum import ErrorAlgType 32 33_logger = InferLogger().logger 34 35 36class CrossFrameworkAccSummary: 37 """ 38 functions for cross framework model infer result accuracy compare and summary 39 """ 40 @classmethod 41 def acc_infos_between_features(cls, 42 standard_feature, 43 compare_feature): 44 """ 45 get accuracy info between features, including mean error ratio 46 and cosine similarity. 47 params: 48 standard_feature(Dict[str, nunmpy.ndarray): standard features to be compared 49 compare_feature(Dict[str, nunmpy.ndarray): compare features to compare 50 return: 51 A dict, including mean_error_ratio and cosine similarity. 52 """ 53 54 mean_relative_error = cls.get_mean_relative_error_between_features(compare_feature, 55 standard_feature) 56 57 cosine_similarity = cls.get_cosine_distance_between_features(compare_feature, 58 standard_feature) 59 return { 60 ErrorAlgType.MEAN_RELATIVE_ERROR.value: mean_relative_error, 61 ErrorAlgType.COSINE_SIMILARITY.value: cosine_similarity 62 } 63 64 @classmethod 65 def accuracy_compare_func(cls, 66 args, 67 logger=None): 68 """ 69 get outputs accuracy compare info between two different framework using same model. 70 params: 71 args: input arguments 72 logger: logger to recorder logs 73 return: 74 A dict, including mean_error_ratio and cosine similarity. 75 """ 76 cmp_result = None 77 78 src_file_path = args.model_file 79 if not src_file_path.endswith('ms') and \ 80 not src_file_path.endswith('mindir'): 81 raise ValueError(f'{src_file_path} is not a valid mslite model') 82 83 dst_file_path = args.cmp_model_file 84 85 input_data_map = CommonFunc.create_numpy_data_map(args) 86 ms_config = CommonFunc.get_framework_config(src_file_path, 87 args) 88 try: 89 ms_session = InferSessionFactory.create_infer_session(src_file_path, 90 ms_config) 91 except ValueError as e: 92 logger.error('[Accuracy Compare] Create ms session failed: %s', e) 93 return cmp_result 94 95 args.device = args.cmp_device 96 cmp_cfg = CommonFunc.get_framework_config(dst_file_path, 97 args) 98 try: 99 cmp_session = InferSessionFactory.create_infer_session(dst_file_path, 100 cmp_cfg, 101 args.params_file) 102 except ValueError as e: 103 logger.error(f'Create dst session failed %s', e) 104 return cmp_result 105 106 try: 107 cmp_result = cls.real_accuracy_compare(ms_session, 108 cmp_session, 109 input_data_map) 110 except (NotImplementedError, ValueError) as e: 111 logger.error(f'Accuracy test failed, get accuracy failed %s', e) 112 raise 113 cmp_result = cls.is_acc_ok(cmp_result) 114 for key, val in cmp_result.items(): 115 logger.debug(f'{key}: {val}') 116 117 if not args.cmp_result_file: 118 csv_path = os.path.join(os.path.dirname(src_file_path), 'accuracy_infos.csv') 119 else: 120 csv_path = f'{args.cmp_result_file}.csv' 121 csv_dir = os.path.dirname(csv_path) 122 os.makedirs(csv_dir, exist_ok=True) 123 logger.info(f'Accuracy compare done, save accuracy info in %s', csv_path) 124 cls.write_csv(cmp_result, csv_path) 125 return cmp_result 126 127 @classmethod 128 def real_accuracy_compare(cls, 129 src_session, 130 dst_session, 131 input_tensor_map): 132 """ 133 get accuracy compare info between two different sessions with same input tensor map. 134 params: 135 src_session: session to be compared 136 dst_session: session to compare 137 input_tensor_map: tensor name and value dict for session input. 138 return: 139 A dict, including mean_error_ratio and cosine similarity. 140 """ 141 src_output = src_session(input_tensor_map) 142 dst_output = dst_session(input_tensor_map) 143 144 result = { 145 ErrorAlgType.MEAN_RELATIVE_ERROR.value: cls.get_mean_relative_error_between_features(dst_output, 146 src_output), 147 ErrorAlgType.COSINE_SIMILARITY.value: cls.get_cosine_distance_between_features(dst_output, 148 src_output) 149 } 150 return result 151 152 @classmethod 153 def specific_accuracy_compare(cls, 154 src_session, 155 dst_session, 156 args): 157 """ 158 get accuracy compare info between two different sessions with 159 specific input loading from files. 160 params: 161 src_session: session to be compared 162 dst_session: session to compare 163 args: input arguments. 164 return: 165 A dict, including mean_error_ratio and cosine similarity. 166 """ 167 input_tensor_map = np.load(args.input_data_file, 168 allow_pickle=True).item() 169 result = cls.real_accuracy_compare(src_session, 170 dst_session, 171 input_tensor_map) 172 return result 173 174 @classmethod 175 def random_accuracy_compare(cls, 176 src_session, 177 dst_session, 178 args): 179 """ 180 get accuracy compare info between two different sessions with random inputs. 181 params: 182 src_session: session to be compared 183 dst_session: session to compare 184 args: input arguments. 185 return: 186 A dict, including mean_error_ratio and cosine similarity. 187 """ 188 input_tensor_dtypes = CommonFunc.parse_dtype_infos(args.input_tensor_dtypes) 189 input_tensor_shapes = CommonFunc.get_tensor_shapes(args.input_tensor_shapes) 190 input_tensor_infos = { 191 key: (shape, input_tensor_dtypes.get(key)) 192 for key, shape in input_tensor_shapes.items() 193 } 194 try: 195 input_tensor_map = CommonFunc.create_numpy_data_map(input_tensor_infos) 196 except ValueError as e: 197 _logger.error('Random accuracy compare failed: %s', e) 198 raise 199 result = cls.real_accuracy_compare(src_session, 200 dst_session, 201 input_tensor_map) 202 203 return result 204 205 @classmethod 206 def get_cosine_distance_between_features(cls, 207 calibrate_feature, 208 cmp_feature): 209 """ 210 calculate cosine distance between features. 211 params: 212 calibrate_feature: feature to be calibrated. 213 cmp_feature: feature to compare. 214 return: 215 cosine similarity values between features. 216 """ 217 cosine_similarity = {} 218 for key, dst_feature in calibrate_feature.items(): 219 src_feature = cmp_feature.get(key) 220 abs_eps = cls.absolute_tolerance() 221 dst_sum = np.sum(dst_feature * dst_feature) 222 src_sum = np.sum(src_feature * src_feature) 223 dot_sum = np.sum(dst_feature * src_feature) 224 225 if dst_sum < abs_eps and src_sum < abs_eps: 226 value = 1.0 227 elif dst_sum * src_sum < abs_eps: 228 if dst_sum < abs_eps or src_sum < abs_eps: 229 value = 1.0 230 else: 231 value = 0.0 232 else: 233 value = dot_sum / (np.sqrt(dst_sum) * np.sqrt(src_sum) + abs_eps) 234 235 cosine_similarity[key] = cls.error_format(value) 236 237 return cosine_similarity 238 239 @classmethod 240 def get_mean_relative_error_between_features(cls, 241 dst_feature, 242 src_feature): 243 """ 244 calculate mean relative error between features. 245 params: 246 dst_feature: feature to be calibrated. 247 src_feature: feature to compare. 248 return: 249 mean relative error values between features. 250 """ 251 mean_relative_error_info = {} 252 np.seterr(divide='ignore', invalid='ignore') 253 254 for key in dst_feature.keys(): 255 feat_a = dst_feature.get(key, None) 256 feat_b = src_feature.get(key, None) 257 if feat_b is None: 258 raise ValueError(f'Model Inference feature ' 259 f'is not consistent in tensor: {key}') 260 if feat_a.size == 0: 261 mean_relative_error_info[key] = '0.0' 262 continue 263 if feat_a.dtype != feat_b.dtype: 264 _logger.warning('layer %s : different dtypes between onnx out: %s ' 265 'with mslite out: %s ', 266 key, 267 feat_a.dtype, 268 feat_b.dtype) 269 mean_relative_error_info[key] = '0.0' 270 continue 271 diff = np.abs(feat_b - feat_a) 272 abs_feat_a = np.abs(feat_a) 273 relative_index = diff > cls.relative_tolerance() 274 if relative_index.size == 0: 275 mean_relative_error_info[key] = '0.0' 276 continue 277 diff = diff[relative_index] 278 abs_feat_a = abs_feat_a[relative_index] 279 abs_index = abs_feat_a > cls.absolute_tolerance() 280 if abs_index.size == 0: 281 mean_relative_error_info[key] = cls.error_format(np.average(diff)) 282 continue 283 abs_feat_a = abs_feat_a[abs_index] 284 relative_diff = diff[abs_index] 285 abs_diff = diff[~abs_index] 286 relative_error = np.divide(relative_diff, abs_feat_a) 287 mean_relative_error_info[key] = (np.sum(relative_error) + np.sum(abs_diff)) \ 288 / (relative_error.size + abs_diff.size) 289 if np.isnan(mean_relative_error_info.get(key, None)): 290 _logger.warning('layer: %s has nan value, ' 291 '%s do not work', 292 key, 293 ErrorAlgType.MEAN_RELATIVE_ERROR.value) 294 mean_relative_error_info[key] = cls.error_format(mean_relative_error_info.get(key, None)) 295 296 return mean_relative_error_info 297 298 @classmethod 299 def get_mean_error_between_features(cls, 300 dst_feature, 301 src_feature): 302 """ 303 calculate mean error between features. 304 params: 305 dst_feature: feature to be calibrated. 306 src_feature: feature to compare. 307 return: 308 mean error values between features. 309 """ 310 absolute_tolerance = cls.absolute_tolerance() 311 relative_tolerance = cls.relative_tolerance() 312 mean_error_info = {} 313 314 for key in dst_feature.keys(): 315 feat_a = dst_feature.get(key, None) 316 feat_b = src_feature.get(key, None) 317 if feat_b is None: 318 raise ValueError(f'Model Inference feature ' 319 f'is not consistent in tensor: {key}') 320 diff = abs(feat_a - feat_b) 321 gt_tolerance_index = diff > (absolute_tolerance + relative_tolerance * abs(feat_a)) 322 lt_tolerance_index = np.logical_and(gt_tolerance_index, abs(feat_a) > absolute_tolerance) 323 gt_tolerance_index = np.logical_and(gt_tolerance_index, abs(feat_a) < absolute_tolerance) 324 gt_tolerance_index = np.logical_and(gt_tolerance_index, diff > relative_tolerance) 325 gt_error = diff[gt_tolerance_index] 326 lt_error = diff / (abs(feat_a) + absolute_tolerance) 327 lt_error = lt_error[lt_tolerance_index] 328 if gt_error.size + lt_error.size == 0: 329 mean_error = 0.0 330 else: 331 mean_error = (np.sum(gt_error) + np.sum(lt_error)) / \ 332 (gt_error.size + lt_error.size + 1 + cls.absolute_tolerance()) 333 mean_error_info[key] = cls.error_format(mean_error) 334 return mean_error_info 335 336 @staticmethod 337 def check_np_dtype_with_model_input_dtype(tensor_map, 338 session): 339 """ 340 check input numpy data dtype with model input dtype 341 params: 342 tensor_map: a dict with key tensor name and value numpy data. 343 session: model infer session 344 return: 345 a dict with key tensor name and value revised numpy data. 346 """ 347 ret_map = tensor_map 348 input_tensor_infos = session.input_infos 349 dtype_class = session.dtype_class 350 351 for key, np_data in tensor_map.items(): 352 np_dtype = np_data.dtype 353 np_dtype_name = NumpyDtype(np_dtype).name 354 session_dtype = input_tensor_infos.get(key, None) 355 356 if session_dtype is None: 357 raise ValueError('Input tensor name is not consistent with model inputs') 358 session_dtype = session_dtype[1] 359 360 session_dtype_name = dtype_class(session_dtype).name 361 if session_dtype_name != np_dtype_name: 362 _logger.warning('input tensor %s input dtype %s ' 363 'is not consistent with model dtype(%s)', 364 key, 365 np_dtype_name, 366 session_dtype_name) 367 new_data = np_data.astype(getattr(NumpyDtype, session_dtype_name).value) 368 ret_map[key] = new_data 369 370 return ret_map 371 372 @staticmethod 373 def absolute_tolerance(): 374 """for const absolute tolerance""" 375 return 1e-4 376 377 @staticmethod 378 def relative_tolerance(): 379 """for const relative tolerance""" 380 return 1e-4 381 382 @staticmethod 383 def error_format(error): 384 return f'{error * 100:.4f}%' 385 386 @staticmethod 387 def write_csv(contents, csv_file): 388 """write csv""" 389 contents_to_write = [] 390 error_names = [] 391 error_infos = [] 392 for key, value in contents.items(): 393 error_names.append(key) 394 error_infos.append(value) 395 396 for layer_name in list(error_infos[0].keys()): 397 tmp_dict = {'layer_name': layer_name} 398 for error_name in error_names: 399 tmp_dict[error_name] = contents.get(error_name).get(layer_name) 400 contents_to_write.append(tmp_dict) 401 402 fieldnames = ['layer_name'] + error_names 403 flags = os.O_WRONLY 404 mode = stat.S_IWUSR | stat.S_IRUSR 405 with os.fdopen(os.open(csv_file, flags, mode), 'w') as f: 406 writer = csv.DictWriter(f, fieldnames=fieldnames) 407 writer.writeheader() 408 writer.writerows(contents_to_write) 409 410 @staticmethod 411 def is_acc_ok(acc_info): 412 """add is ok check for accuracy result""" 413 mre = acc_info.get(ErrorAlgType.MEAN_RELATIVE_ERROR.value, None) 414 cos = acc_info.get(ErrorAlgType.COSINE_SIMILARITY.value, None) 415 is_ok = {} 416 if mre is None or cos is None: 417 raise ValueError('MRE or cosine similarity is None') 418 mre_thred = 0.05 419 cos_thred = 0.99 420 cos_bad_thred = 0.9 421 def error_format_to_float(num): 422 return float(num.strip('%')) / 100 423 424 nan_set = {'nan', 'nan%'} 425 for key, mre_val in mre.items(): 426 cos_val = cos.get(key, None) 427 if mre_val in nan_set or cos_val in nan_set: 428 is_ok[key] = 'Invalid' 429 elif error_format_to_float(cos_val) < cos_bad_thred: 430 is_ok[key] = 'Bad' 431 elif error_format_to_float(mre_val) > mre_thred \ 432 and error_format_to_float(cos_val) < cos_thred: 433 is_ok[key] = 'Bad' 434 else: 435 is_ok[key] = 'Good' 436 437 acc_info['is_ok'] = is_ok 438 return acc_info 439