1# 使用MindSpore Lite实现语音识别(C/C++) 2 3<!--Kit: MindSpore Lite Kit--> 4<!--Subsystem: AI--> 5<!--Owner: @zhuguodong8--> 6<!--Designer: @zhuguodong8; @jjfeing--> 7<!--Tester: @principal87--> 8<!--Adviser: @ge-yafang--> 9 10## 场景说明 11 12开发者可以使用[MindSpore](../../reference/apis-mindspore-lite-kit/capi-mindspore.md),在UI代码中集成MindSpore Lite能力,快速部署AI算法,进行AI模型推理,实现语音识别的应用。 13 14语音识别可以将一段音频信息转换为文本,在智能语音助手、语音输入、语音搜索等领域有广泛的应用。 15 16## 基本概念 17 18- N-API:用于构建ArkTS本地化组件的一套接口。可利用N-API,将C/C++开发的库封装成ArkTS模块。 19 20## 开发流程 21 221. 选择语音识别模型。 232. 在端侧使用MindSpore Lite推理模型,实现对语音文件的语音识别。 24 25## 环境准备 26 27安装DevEco Studio,要求版本 >= 5.0.2,并更新SDK到API 14或以上。 28 29## 开发步骤 30 31本文以对语音识别模型进行推理为例,提供使用MindSpore Lite实现语音识别应用的开发指导。 32 33### 选择模型 34 35本示例程序中使用的语音识别模型文件为tiny-encoder.ms、tiny-decoder-main.ms、tiny-decoder-loop.ms,放置在entry/src/main/resources/rawfile工程目录下。 36 37### 编写播放音频代码 38 391. 调用[@ohos.multimedia.media](../../reference/apis-media-kit/arkts-apis-media.md)、[@ohos.multimedia.audio](../../reference/apis-audio-kit/arkts-apis-audio.md),实现播放音频的功能。 40 41 ```ts 42 // player.ets 43 import { media } from '@kit.MediaKit'; 44 import { common } from '@kit.AbilityKit'; 45 import { BusinessError } from '@kit.BasicServicesKit'; 46 import { audio } from '@kit.AudioKit'; 47 import { UIContext } from '@kit.ArkUI'; 48 49 export default class AVPlayerDemo { 50 private isSeek: boolean = false; // 用于区分模式是否支持seek操作。 51 // 注册avplayer回调函数。 52 setAVPlayerCallback(avPlayer: media.AVPlayer) { 53 // seek操作结果回调函数。 54 avPlayer.on('seekDone', (seekDoneTime: number) => { 55 console.info(`MS_LITE_LOG: AVPlayer seek succeeded, seek time is ${seekDoneTime}`); 56 }); 57 // error回调监听函数,当avPlayer在操作过程中出现错误时调用reset接口触发重置流程。 58 avPlayer.on('error', (err: BusinessError) => { 59 console.error(`MS_LITE_LOG: Invoke avPlayer failed, code is ${err.code}, message is ${err.message}`); 60 avPlayer.reset(); // 调用reset重置资源,触发idle状态。 61 }); 62 // 状态机变化回调函数。 63 avPlayer.on('stateChange', async (state: string, reason: media.StateChangeReason) => { 64 switch (state) { 65 case 'idle': // 成功调用reset接口后触发该状态机上报。 66 console.info('MS_LITE_LOG: AVPlayer state idle called.'); 67 avPlayer.release(); // 调用release接口销毁实例对象。 68 break; 69 case 'initialized': // avplayer 设置播放源后触发该状态上报。 70 console.info('MS_LITE_LOG: AVPlayer state initialized called.'); 71 avPlayer.audioRendererInfo = { 72 usage: audio.StreamUsage.STREAM_USAGE_MUSIC, // 音频流使用类型:音乐。根据业务场景配置。 73 rendererFlags: 0 // 音频渲染器标志。 74 }; 75 avPlayer.prepare(); 76 break; 77 case 'prepared': // prepare调用成功后上报该状态机。 78 console.info('MS_LITE_LOG: AVPlayer state prepared called.'); 79 avPlayer.play(); // 调用播放接口开始播放。 80 break; 81 case 'playing': // play成功调用后触发该状态机上报。 82 console.info('MS_LITE_LOG: AVPlayer state playing called.'); 83 if (this.isSeek) { 84 console.info('MS_LITE_LOG: AVPlayer start to seek.'); 85 avPlayer.seek(0); // 将播放位置移动到音频的开始。 86 } else { 87 // 当播放模式不支持seek操作时继续播放到结尾。 88 console.info('MS_LITE_LOG: AVPlayer wait to play end.'); 89 } 90 break; 91 case 'paused': // pause成功调用后触发该状态机上报。 92 console.info('MS_LITE_LOG: AVPlayer state paused called.'); 93 setTimeout(() => { 94 console.info('MS_LITE_LOG: AVPlayer paused wait to play again'); 95 avPlayer.play(); // 暂停3s后再次调用播放接口开始播放。 96 }, 3000); 97 break; 98 case 'completed': // 播放结束后触发该状态机上报。 99 console.info('MS_LITE_LOG: AVPlayer state completed called.'); 100 avPlayer.stop(); // 调用播放结束接口。 101 break; 102 case 'stopped': // stop接口成功调用后触发该状态机上报。 103 console.info('MS_LITE_LOG: AVPlayer state stopped called.'); 104 avPlayer.reset(); // 调用reset接口初始化avplayer状态。 105 break; 106 case 'released': 107 console.info('MS_LITE_LOG: AVPlayer state released called.'); 108 break; 109 default: 110 console.info('MS_LITE_LOG: AVPlayer state unknown called.'); 111 break; 112 } 113 }); 114 } 115 116 // 使用资源管理接口获取音频文件并通过fdSrc属性进行播放。 117 async avPlayerFdSrcDemo() { 118 // 创建avPlayer实例对象。 119 let avPlayer: media.AVPlayer = await media.createAVPlayer(); 120 // 创建状态机变化回调函数。 121 this.setAVPlayerCallback(avPlayer); 122 // 通过UIAbilityContext的resourceManager成员的getRawFd接口获取媒体资源播放地址。 123 // 返回类型为{fd,offset,length},fd为HAP包fd地址,offset为媒体资源偏移量,length为播放长度。 124 let context = new UIContext().getHostContext() as common.UIAbilityContext; 125 let fileDescriptor = await context.resourceManager.getRawFd('zh.wav'); 126 let avFileDescriptor: media.AVFileDescriptor = 127 { fd: fileDescriptor.fd, offset: fileDescriptor.offset, length: fileDescriptor.length }; 128 this.isSeek = true; // 支持seek操作。 129 // 为fdSrc赋值触发initialized状态机上报。 130 avPlayer.fdSrc = avFileDescriptor; 131 } 132 } 133 ``` 134 135 136### 编写识别音频代码 137 138调用[MindSpore](../../reference/apis-mindspore-lite-kit/capi-mindspore.md),依次对3个模型进行推理,推理代码流程如下。 139 1401. 引用对应的头文件。其中三方库librosa来源是[LibrosaCpp](https://github.com/ewan-xu/LibrosaCpp),libsamplerate来源是[libsamplerate](https://github.com/libsndfile/libsamplerate),AudioFile.h、base64.h来源是[whisper.axera](https://github.com/ml-inory/whisper.axera/tree/main/cpp/src)。 141 142 ```c++ 143 #include "AudioFile.h" 144 #include "base64.h" 145 #include "napi/native_api.h" 146 #include "utils.h" 147 #include <algorithm> 148 #include <cstdlib> 149 #include <fstream> 150 #include <hilog/log.h> 151 #include <iostream> 152 #include <librosa/librosa.h> 153 #include <mindspore/context.h> 154 #include <mindspore/model.h> 155 #include <mindspore/status.h> 156 #include <mindspore/tensor.h> 157 #include <mindspore/types.h> 158 #include <numeric> 159 #include <rawfile/raw_file_manager.h> 160 #include <sstream> 161 #include <vector> 162 ``` 163 1642. 读取音频文件、模型文件等,转换为buffer数据。 165 166 ```c++ 167 #define LOGI(...) ((void)OH_LOG_Print(LOG_APP, LOG_INFO, LOG_DOMAIN, "[MSLiteNapi]", __VA_ARGS__)) 168 #define LOGD(...) ((void)OH_LOG_Print(LOG_APP, LOG_DEBUG, LOG_DOMAIN, "[MSLiteNapi]", __VA_ARGS__)) 169 #define LOGW(...) ((void)OH_LOG_Print(LOG_APP, LOG_WARN, LOG_DOMAIN, "[MSLiteNapi]", __VA_ARGS__)) 170 #define LOGE(...) ((void)OH_LOG_Print(LOG_APP, LOG_ERROR, LOG_DOMAIN, "[MSLiteNapi]", __VA_ARGS__)) 171 172 using BinBuffer = std::pair<void *, size_t>; 173 174 BinBuffer ReadBinFile(NativeResourceManager *nativeResourceManager, const std::string &modelName) 175 { 176 auto rawFile = OH_ResourceManager_OpenRawFile(nativeResourceManager, modelName.c_str()); 177 if (rawFile == nullptr) { 178 LOGE("MS_LITE_ERR: Open model file failed"); 179 return BinBuffer(nullptr, 0); 180 } 181 long fileSize = OH_ResourceManager_GetRawFileSize(rawFile); 182 if (fileSize <= 0) { 183 LOGE("MS_LITE_ERR: FileSize not correct"); 184 return BinBuffer(nullptr, 0); 185 } 186 void *buffer = malloc(fileSize); 187 if (buffer == nullptr) { 188 LOGE("MS_LITE_ERR: OH_ResourceManager_ReadRawFile failed"); 189 return BinBuffer(nullptr, 0); 190 } 191 int ret = OH_ResourceManager_ReadRawFile(rawFile, buffer, fileSize); 192 if (ret == 0) { 193 LOGE("MS_LITE_ERR: OH_ResourceManager_ReadRawFile failed"); 194 OH_ResourceManager_CloseRawFile(rawFile); 195 return BinBuffer(nullptr, 0); 196 } 197 OH_ResourceManager_CloseRawFile(rawFile); 198 return BinBuffer(buffer, fileSize); 199 } 200 201 BinBuffer ReadTokens(NativeResourceManager *nativeResourceManager, const std::string &modelName) { 202 auto rawFile = OH_ResourceManager_OpenRawFile(nativeResourceManager, modelName.c_str()); 203 if (rawFile == nullptr) { 204 LOGE("MS_LITE_ERR: Open model file failed"); 205 return BinBuffer(nullptr, 0); 206 } 207 long fileSize = OH_ResourceManager_GetRawFileSize(rawFile); 208 if (fileSize <= 0) { 209 LOGE("MS_LITE_ERR: FileSize not correct"); 210 return BinBuffer(nullptr, 0); 211 } 212 void *buffer = malloc(fileSize); 213 if (buffer == nullptr) { 214 LOGE("MS_LITE_ERR: OH_ResourceManager_ReadRawFile failed"); 215 return BinBuffer(nullptr, 0); 216 } 217 int ret = OH_ResourceManager_ReadRawFile(rawFile, buffer, fileSize); 218 if (ret == 0) { 219 LOGE("MS_LITE_ERR: OH_ResourceManager_ReadRawFile failed"); 220 OH_ResourceManager_CloseRawFile(rawFile); 221 return BinBuffer(nullptr, 0); 222 } 223 OH_ResourceManager_CloseRawFile(rawFile); 224 BinBuffer res(buffer, fileSize); 225 return res; 226 } 227 ``` 228 2293. 创建上下文,设置设备类型,并加载模型。 230 231 ```c++ 232 void DestroyModelBuffer(void **buffer) 233 { 234 if (buffer == nullptr) { 235 return; 236 } 237 free(*buffer); 238 *buffer = nullptr; 239 } 240 241 OH_AI_ModelHandle CreateMSLiteModel(BinBuffer &bin) 242 { 243 // 创建并配置上下文 244 auto context = OH_AI_ContextCreate(); 245 if (context == nullptr) { 246 DestroyModelBuffer(&bin.first); 247 LOGE("MS_LITE_ERR: Create MSLite context failed.\n"); 248 return nullptr; 249 } 250 auto cpu_device_info = OH_AI_DeviceInfoCreate(OH_AI_DEVICETYPE_CPU); 251 OH_AI_DeviceInfoSetEnableFP16(cpu_device_info, false); 252 OH_AI_ContextAddDeviceInfo(context, cpu_device_info); 253 254 // 创建模型 255 auto model = OH_AI_ModelCreate(); 256 if (model == nullptr) { 257 DestroyModelBuffer(&bin.first); 258 LOGE("MS_LITE_ERR: Allocate MSLite Model failed.\n"); 259 return nullptr; 260 } 261 262 // 加载与编译模型,模型的类型为OH_AI_MODELTYPE_MINDIR 263 auto build_ret = OH_AI_ModelBuild(model, bin.first, bin.second, OH_AI_MODELTYPE_MINDIR, context); 264 DestroyModelBuffer(&bin.first); 265 if (build_ret != OH_AI_STATUS_SUCCESS) { 266 OH_AI_ModelDestroy(&model); 267 LOGE("MS_LITE_ERR: Build MSLite model failed.\n"); 268 return nullptr; 269 } 270 LOGI("MS_LITE_LOG: Build MSLite model success.\n"); 271 return model; 272 } 273 ``` 274 2754. 设置模型输入数据,执行模型推理。 276 277 ```c++ 278 constexpr int K_NUM_PRINT_OF_OUT_DATA = 20; 279 280 int FillInputTensor(OH_AI_TensorHandle input, const BinBuffer &bin) 281 { 282 if (OH_AI_TensorGetDataSize(input) != bin.second) { 283 return OH_AI_STATUS_LITE_INPUT_PARAM_INVALID; 284 } 285 char *data = (char *)OH_AI_TensorGetMutableData(input); 286 memcpy(data, (const char *)bin.first, OH_AI_TensorGetDataSize(input)); 287 return OH_AI_STATUS_SUCCESS; 288 } 289 290 // 执行模型推理 291 int RunMSLiteModel(OH_AI_ModelHandle model, std::vector<BinBuffer> inputBins) 292 { 293 // 设置模型的输入数据 294 auto inputs = OH_AI_ModelGetInputs(model); 295 for(int i = 0; i < inputBins.size(); i++) 296 { 297 auto ret = FillInputTensor(inputs.handle_list[i], inputBins[i]); 298 if (ret != OH_AI_STATUS_SUCCESS) { 299 LOGE("MS_LITE_ERR: set input %{public}d error.\n", i); 300 return OH_AI_STATUS_LITE_ERROR; 301 } 302 } 303 304 // 获取模型的输出张量 305 auto outputs = OH_AI_ModelGetOutputs(model); 306 307 // 模型推理 308 auto predict_ret = OH_AI_ModelPredict(model, inputs, &outputs, nullptr, nullptr); 309 if (predict_ret != OH_AI_STATUS_SUCCESS) { 310 OH_AI_ModelDestroy(&model); 311 LOGE("MS_LITE_ERR: MSLite Predict error.\n"); 312 return OH_AI_STATUS_LITE_ERROR; 313 } 314 LOGD("MS_LITE_LOG: Run MSLite model Predict success.\n"); 315 316 // 打印输出数据 317 LOGD("MS_LITE_LOG: Get model outputs:\n"); 318 for (size_t i = 0; i < outputs.handle_num; i++) { 319 auto tensor = outputs.handle_list[i]; 320 LOGD("MS_LITE_LOG: - Tensor %{public}d name is: %{public}s.\n", static_cast<int>(i), 321 OH_AI_TensorGetName(tensor)); 322 LOGD("MS_LITE_LOG: - Tensor %{public}d size is: %{public}d.\n", static_cast<int>(i), 323 (int)OH_AI_TensorGetDataSize(tensor)); 324 LOGD("MS_LITE_LOG: - Tensor data is:\n"); 325 auto out_data = reinterpret_cast<const float *>(OH_AI_TensorGetData(tensor)); 326 std::stringstream outStr; 327 for (int i = 0; (i < OH_AI_TensorGetElementNum(tensor)) && (i <= K_NUM_PRINT_OF_OUT_DATA); i++) { 328 outStr << out_data[i] << " "; 329 } 330 LOGD("MS_LITE_LOG: %{public}s", outStr.str().c_str()); 331 } 332 return OH_AI_STATUS_SUCCESS; 333 } 334 ``` 335 3365. 调用以上方法,实现3个模型的推理流程。 337 338 ```c++ 339 const float NEG_INF = -std::numeric_limits<float>::infinity(); 340 const int WHISPER_SOT = 50258; 341 const int WHISPER_TRANSCRIBE = 50359; 342 const int WHISPER_TRANSLATE = 50358; 343 const int WHISPER_NO_TIMESTAMPS = 50363; 344 const int WHISPER_EOT = 50257; 345 const int WHISPER_BLANK = 220; 346 const int WHISPER_NO_SPEECH = 50362; 347 const int WHISPER_N_TEXT_CTX = 448; 348 const int WHISPER_N_TEXT_STATE = 384; 349 constexpr int WHISPER_SAMPLE_RATE = 16000; 350 351 BinBuffer GetMSOutput(OH_AI_TensorHandle output) { 352 float *outputData = reinterpret_cast<float *>(OH_AI_TensorGetMutableData(output)); 353 size_t size = OH_AI_TensorGetDataSize(output); 354 return {outputData, size}; 355 } 356 357 void SupressTokens(BinBuffer &logits, bool is_initial) { 358 auto logits_data = static_cast<float *>(logits.first); 359 if (is_initial) { 360 logits_data[WHISPER_EOT] = NEG_INF; 361 logits_data[WHISPER_BLANK] = NEG_INF; 362 } 363 364 // 其他令牌的抑制 365 logits_data[WHISPER_NO_TIMESTAMPS] = NEG_INF; 366 logits_data[WHISPER_SOT] = NEG_INF; 367 logits_data[WHISPER_NO_SPEECH] = NEG_INF; 368 logits_data[WHISPER_TRANSLATE] = NEG_INF; 369 } 370 371 std::vector<int> LoopPredict(const OH_AI_ModelHandle model, const BinBuffer &n_layer_cross_k, 372 const BinBuffer &n_layer_cross_v, const BinBuffer &logits_init, 373 BinBuffer &out_n_layer_self_k_cache, BinBuffer &out_n_layer_self_v_cache, 374 const BinBuffer &data_embedding, const int loop, const int offset_init) { 375 BinBuffer logits{nullptr, 51865 * sizeof(float)}; 376 logits.first = malloc(logits.second); 377 if (!logits.first) { 378 LOGE("MS_LITE_ERR: Fail to malloc!\n"); 379 return {}; 380 } 381 void *logits_init_src = static_cast<char *>(logits_init.first) + 51865 * 3 * sizeof(float); 382 memcpy(logits.first, logits_init_src, logits.second); 383 SupressTokens(logits, true); 384 385 std::vector<int> output_token; 386 float *logits_data = static_cast<float *>(logits.first); 387 int max_token_id = 0; 388 float max_token = logits_data[0]; 389 for (int i = 0; i < logits.second / sizeof(float); i++) { 390 if (logits_data[i] > max_token) { 391 max_token_id = i; 392 max_token = logits_data[i]; 393 } 394 } 395 396 int offset = offset_init; 397 BinBuffer slice{nullptr, 0}; 398 slice.second = WHISPER_N_TEXT_STATE * sizeof(float); 399 slice.first = malloc(slice.second); 400 if (!slice.first) { 401 LOGE("MS_LITE_ERR: Fail to malloc!\n"); 402 return {}; 403 } 404 405 auto out_n_layer_self_k_cache_new = out_n_layer_self_k_cache; 406 auto out_n_layer_self_v_cache_new = out_n_layer_self_v_cache; 407 408 for (size_t i = 0; i < loop; i++) { 409 if (max_token_id == WHISPER_EOT) { 410 break; 411 } 412 output_token.push_back(max_token_id); 413 std::vector<float> mask(WHISPER_N_TEXT_CTX, 0.0f); 414 for (size_t i = 0; i < WHISPER_N_TEXT_CTX - offset - 1; ++i) { 415 mask[i] = NEG_INF; 416 } 417 BinBuffer tokens{&max_token_id, sizeof(int)}; 418 419 void *data_embedding_src = 420 static_cast<char *>(data_embedding.first) + offset * WHISPER_N_TEXT_STATE * sizeof(float); 421 memcpy(slice.first, data_embedding_src, slice.second); 422 BinBuffer mask_bin(mask.data(), mask.size() * sizeof(float)); 423 int ret = RunMSLiteModel(model, {tokens, out_n_layer_self_k_cache_new, out_n_layer_self_v_cache_new, 424 n_layer_cross_k, n_layer_cross_v, slice, mask_bin}); 425 426 auto outputs = OH_AI_ModelGetOutputs(model); 427 logits = GetMSOutput(outputs.handle_list[0]); 428 out_n_layer_self_k_cache_new = GetMSOutput(outputs.handle_list[1]); 429 out_n_layer_self_v_cache_new = GetMSOutput(outputs.handle_list[2]); 430 offset++; 431 SupressTokens(logits, false); 432 logits_data = static_cast<float *>(logits.first); 433 max_token = logits_data[0]; 434 435 for (int j = 0; j < logits.second / sizeof(float); j++) { 436 if (logits_data[j] > max_token) { 437 max_token_id = j; 438 max_token = logits_data[j]; 439 } 440 } 441 LOGI("MS_LITE_LOG: run decoder loop %{public}d ok!\n token = %{public}d", i, max_token_id); 442 } 443 return output_token; 444 } 445 446 std::vector<std::string> ProcessDataLines(const BinBuffer token_txt) { 447 void *data_ptr = token_txt.first; 448 size_t data_size = token_txt.second; 449 std::vector<std::string> tokens; 450 451 const char *char_data = static_cast<const char *>(data_ptr); 452 std::stringstream ss(std::string(char_data, char_data + data_size)); 453 std::string line; 454 while (std::getline(ss, line)) { 455 size_t space_pos = line.find(' '); 456 tokens.push_back(line.substr(0, space_pos)); 457 } 458 return tokens; 459 } 460 461 static napi_value RunDemo(napi_env env, napi_callback_info info) 462 { 463 // 执行样例推理 464 napi_value error_ret; 465 napi_create_int32(env, -1, &error_ret); 466 size_t argc = 1; 467 napi_value argv[1] = {nullptr}; 468 napi_get_cb_info(env, info, &argc, argv, nullptr, nullptr); 469 auto resourcesManager = OH_ResourceManager_InitNativeResourceManager(env, argv[0]); 470 471 // 数据预处理 472 AudioFile<float> audioFile; 473 std::string filePath = "zh.wav"; 474 auto audioBin = ReadBinFile(resourcesManager, filePath); 475 if (audioBin.first == nullptr) { 476 LOGE("MS_LITE_ERR: Fail to read %{public}s!", filePath.c_str()); 477 return error_ret; 478 } 479 size_t dataSize = audioBin.second; 480 uint8_t *dataBuffer = (uint8_t *)audioBin.first; 481 bool ok = audioFile.loadFromMemory(std::vector<uint8_t>(dataBuffer, dataBuffer + dataSize)); 482 if (!ok) { 483 LOGE("MS_LITE_ERR: Fail to read %{public}s!", filePath.c_str()); 484 return error_ret; 485 } 486 std::vector<float> data(audioFile.samples[0]); 487 ResampleAudio(data, audioFile.getSampleRate(), WHISPER_SAMPLE_RATE, 1, SRC_SINC_BEST_QUALITY); 488 std::vector<float> audio(data); 489 490 int padding = 480000; 491 int sr = 16000; 492 int n_fft = 480; 493 int n_hop = 160; 494 int n_mel = 80; 495 int fmin = 0; // 最小频率,默认值为0.0 Hz 496 int fmax = 497 sr / 498 2.0; // 最大频率,默认值为采样率(sr/2.0)的一半 499 audio.insert(audio.end(), padding, 0.0f); 500 std::vector<std::vector<float>> mels_T = 501 librosa::Feature::melspectrogram(audio, sr, n_fft, n_hop, "hann", true, "reflect", 2.f, n_mel, fmin, fmax); 502 std::cout << "mels: " << std::endl; 503 504 std::vector<std::vector<float>> mels = TransposeMel(mels_T); 505 ProcessMelSpectrogram(mels); 506 507 std::vector<float> inputMels(mels.size() * mels[0].size(), 0); 508 for (int i = 0; i < mels.size(); i++) { 509 std::copy(mels[i].begin(), mels[i].end(), inputMels.begin() + i * mels[0].size()); 510 } 511 512 BinBuffer inputMelsBin(inputMels.data(), inputMels.size() * sizeof(float)); 513 514 // tiny-encoder.ms模型推理 515 auto encoderBin = ReadBinFile(resourcesManager, "tiny-encoder.ms"); 516 if (encoderBin.first == nullptr) { 517 free(dataBuffer); 518 dataBuffer = nullptr; 519 return error_ret; 520 } 521 522 auto encoder = CreateMSLiteModel(encoderBin); 523 524 int ret = RunMSLiteModel(encoder, {inputMelsBin}); 525 if (ret != OH_AI_STATUS_SUCCESS) { 526 OH_AI_ModelDestroy(&encoder); 527 return error_ret; 528 } 529 LOGI("MS_LITE_LOG: run encoder ok!\n"); 530 531 auto outputs = OH_AI_ModelGetOutputs(encoder); 532 auto n_layer_cross_k = GetMSOutput(outputs.handle_list[0]); 533 auto n_layer_cross_v = GetMSOutput(outputs.handle_list[1]); 534 535 // tiny-decoder-main.ms模型推理 536 std::vector<int> SOT_SEQUENCE = {WHISPER_SOT, 537 WHISPER_SOT + 1 + 1, 538 WHISPER_TRANSCRIBE, WHISPER_NO_TIMESTAMPS}; 539 BinBuffer sotSequence(SOT_SEQUENCE.data(), SOT_SEQUENCE.size() * sizeof(int)); 540 541 const std::string decoder_main_path = "tiny-decoder-main.ms"; 542 auto decoderMainBin = ReadBinFile(resourcesManager, decoder_main_path); 543 if (decoderMainBin.first == nullptr) { 544 OH_AI_ModelDestroy(&encoder); 545 return error_ret; 546 } 547 auto decoder_main = CreateMSLiteModel(decoderMainBin); 548 int ret2 = RunMSLiteModel(decoder_main, {sotSequence, n_layer_cross_k, n_layer_cross_v}); 549 550 if (ret2 != OH_AI_STATUS_SUCCESS) { 551 OH_AI_ModelDestroy(&decoder_main); 552 return error_ret; 553 } 554 LOGI("MS_LITE_LOG: run decoder_main ok!\n"); 555 556 auto decoderMainOut = OH_AI_ModelGetOutputs(decoder_main); 557 auto logitsBin = GetMSOutput(decoderMainOut.handle_list[0]); 558 auto out_n_layer_self_k_cache_Bin = GetMSOutput(decoderMainOut.handle_list[1]); 559 auto out_n_layer_self_v_cache_Bin = GetMSOutput(decoderMainOut.handle_list[2]); 560 561 // tiny-decoder-loop.ms模型推理 562 const std::string modelName3 = "tiny-decoder-loop.ms"; 563 auto modelBuffer3 = ReadBinFile(resourcesManager, modelName3); 564 auto decoder_loop = CreateMSLiteModel(modelBuffer3); 565 566 const std::string dataName_embedding = "tiny-positional_embedding.bin"; // 获取输入数据 567 auto data_embedding = ReadBinFile(resourcesManager, dataName_embedding); 568 if (data_embedding.first == nullptr) { 569 OH_AI_ModelDestroy(&encoder); 570 OH_AI_ModelDestroy(&decoder_main); 571 OH_AI_ModelDestroy(&decoder_loop); 572 return error_ret; 573 } 574 575 int loop_times = WHISPER_N_TEXT_CTX - SOT_SEQUENCE.size(); 576 int offset_init = SOT_SEQUENCE.size(); 577 auto output_tokens = 578 LoopPredict(decoder_loop, n_layer_cross_k, n_layer_cross_v, logitsBin, out_n_layer_self_k_cache_Bin, 579 out_n_layer_self_v_cache_Bin, data_embedding, loop_times, offset_init); 580 581 std::vector<std::string> token_tables = ProcessDataLines(ReadTokens(resourcesManager, "tiny-tokens.txt")); 582 std::string result; 583 for (const auto i : output_tokens) { 584 char str[1024]; 585 base64_decode((const uint8 *)token_tables[i].c_str(), (uint32)token_tables[i].size(), str); 586 result += str; 587 } 588 LOGI("MS_LITE_LOG: result is -> %{public}s", result.c_str()); 589 590 OH_AI_ModelDestroy(&encoder); 591 OH_AI_ModelDestroy(&decoder_main); 592 OH_AI_ModelDestroy(&decoder_loop); 593 594 napi_value out_data; 595 napi_create_string_utf8(env, result.c_str(), result.length(), &out_data); 596 return out_data; 597 } 598 ``` 599 6007. 编写CMake脚本,链接MindSpore Lite动态库。 601 602 ```c++ 603 # the minimum version of CMake. 604 cmake_minimum_required(VERSION 3.5.0) 605 project(test) 606 # AudioFile.h 607 set(CMAKE_CXX_STANDARD 17) 608 set(CMAKE_CXX_STANDARD_REQUIRED TRUE) 609 set(NATIVERENDER_PATH ${CMAKE_CURRENT_SOURCE_DIR}) 610 611 if(DEFINED PACKAGE_FIND_FILE) 612 include(${PACKAGE_FIND_FILE}) 613 endif() 614 615 include_directories(${NATIVERENDER_PATH} 616 ${NATIVERENDER_PATH}/include) 617 618 # libsamplerate 619 set(LIBSAMPLERATE_DIR ${NATIVERENDER_PATH}/third_party/libsamplerate) 620 include_directories(${LIBSAMPLERATE_DIR}/include) 621 add_subdirectory(${LIBSAMPLERATE_DIR}) 622 623 include_directories(${NATIVERENDER_PATH}/third_party/opencc/include/opencc) 624 # src 625 aux_source_directory(src SRC_DIR) 626 include_directories(${NATIVERENDER_PATH}/src) 627 628 include_directories(${CMAKE_SOURCE_DIR}/third_party) 629 630 file(GLOB SRC src/*.cc) 631 632 add_library(entry SHARED mslite_napi.cpp ${SRC}) 633 target_link_libraries(entry PUBLIC samplerate) 634 target_link_libraries(entry PUBLIC mindspore_lite_ndk) 635 target_link_libraries(entry PUBLIC hilog_ndk.z) 636 target_link_libraries(entry PUBLIC rawfile.z) 637 target_link_libraries(entry PUBLIC ace_napi.z) 638 ``` 639 640### 使用N-API将C++动态库封装成ArkTS模块 641 6421. 在 entry/src/main/cpp/types/libentry/Index.d.ts,定义ArkTS接口`runDemo()` 。内容如下: 643 644 ```ts 645 export const runDemo: (a: Object) => string; 646 ``` 647 6482. 在 oh-package.json5 文件,将API与so相关联,成为一个完整的ArkTS模块: 649 650 ```json 651 { 652 "name": "entry", 653 "version": "1.0.0", 654 "description": "MindSpore Lite inference module", 655 "main": "", 656 "author": "", 657 "license": "", 658 "dependencies": { 659 "libentry.so": "file:./src/main/cpp/types/libentry" 660 } 661 } 662 ``` 663 664### 调用封装的ArkTS模块进行推理并输出结果 665 666在 entry/src/main/ets/pages/Index.ets 中,调用封装的ArkTS模块,最后对推理结果进行处理。 667 668```ts 669// Index.ets 670 671import msliteNapi from 'libentry.so' 672import AVPlayerDemo from './player'; 673import { transverter, TransverterType, TransverterLanguage } from "@nutpi/chinese_transverter" 674 675@Entry 676@Component 677struct Index { 678 @State message: string = 'MSLite Whisper Demo'; 679 @State wavName: string = 'zh.wav'; 680 @State content: string = ''; 681 682 build() { 683 Row() { 684 Column() { 685 Text(this.message) 686 .fontSize(30) 687 .fontWeight(FontWeight.Bold); 688 Button() { 689 Text('播放示例音频') 690 .fontSize(20) 691 .fontWeight(FontWeight.Medium) 692 } 693 .type(ButtonType.Capsule) 694 .margin({ 695 top: 20 696 }) 697 .backgroundColor('#0D9FFB') 698 .width('40%') 699 .height('5%') 700 .onClick(async () =>{ 701 // 通过实例调用类中的函数 702 console.info('MS_LITE_LOG: begin to play wav.'); 703 let myClass = new AVPlayerDemo(); 704 myClass.avPlayerFdSrcDemo(); 705 }) 706 Button() { 707 Text('识别示例音频') 708 .fontSize(20) 709 .fontWeight(FontWeight.Medium) 710 } 711 .type(ButtonType.Capsule) 712 .margin({ 713 top: 20 714 }) 715 .backgroundColor('#0D9FFB') 716 .width('40%') 717 .height('5%') 718 .onClick(() => { 719 let resMgr = this.getUIContext()?.getHostContext()?.getApplicationContext().resourceManager; 720 if (resMgr === undefined || resMgr === null) { 721 console.error('MS_LITE_ERR: get resourceManager failed.'); 722 return 723 } 724 // 调用封装的runDemo函数 725 console.info('MS_LITE_LOG: *** Start MSLite Demo ***'); 726 let output = msliteNapi.runDemo(resMgr); 727 if (output === null || output.length === 0) { 728 console.error('MS_LITE_ERR: runDemo failed.') 729 return 730 } 731 console.info('MS_LITE_LOG: output length = ', output.length, ';value = ', output.slice(0, 20)); 732 this.content = output; 733 console.info('MS_LITE_LOG: *** Finished MSLite Demo ***'); 734 }) 735 736 // 显示识别内容 737 if (this.content) { 738 Text('识别内容: \n' + transverter({ 739 type: TransverterType.SIMPLIFIED, 740 str: this.content, 741 language: TransverterLanguage.ZH_CN 742 }) + '\n').focusable(true).fontSize(20).height('20%') 743 } 744 }.width('100%') 745 } 746 .height('100%') 747 } 748} 749``` 750 751### 调测验证 752 7531. 在DevEco Studio中连接设备,点击Run entry,编译Hap,有如下显示: 754 755 ```shell 756 Launching com.samples.mindsporelitecdemoasr 757 $ hdc shell aa force-stop com.samples.mindsporelitecdemoasr 758 $ hdc shell mkdir data/local/tmp/xxx 759 $ hdc file send E:\xxx\entry\build\default\outputs\default\entry-default-signed.hap "data/local/tmp/xxx" 760 $ hdc shell bm install -p data/local/tmp/xxx 761 $ hdc shell rm -rf data/local/tmp/xxx 762 $ hdc shell aa start -a EntryAbility -b com.samples.mindsporelitecdemoasr 763 com.samples.mindsporelitecdemoasr successfully launched... 764 ``` 765 7662. 在设备屏幕点击`播放示例音频`按钮,会播放本示例音频文件。点击`识别示例音频`按钮,设备屏幕显示本示例音频文件的中文内容。在日志打印结果中,过滤关键字”MS_LITE_LOG“,可得到如下结果: 767 768 ```verilog 769 05-16 14:53:44.200 1679-1679 A03d00/JSAPP com.sampl...cdemoasr I MS_LITE_LOG: begin to play wav. 770 05-16 14:53:44.210 1679-1679 A03d00/JSAPP com.sampl...cdemoasr I [a92ab1e0f831191, 0, 0] MS_LITE_LOG: AVPlayer state initialized called. 771 05-16 14:53:44.228 1679-1679 A03d00/JSAPP com.sampl...cdemoasr I [a92ab1e0f831191, 0, 0] MS_LITE_LOG: AVPlayer state prepared called. 772 05-16 14:53:44.242 1679-1679 A03d00/JSAPP com.sampl...cdemoasr I MS_LITE_LOG: AVPlayer state playing called. 773 05-16 14:53:44.242 1679-1679 A03d00/JSAPP com.sampl...cdemoasr I MS_LITE_LOG: AVPlayer start to seek. 774 05-16 14:53:44.372 1679-1679 A03d00/JSAPP com.sampl...cdemoasr I MS_LITE_LOG: AVPlayer seek succeeded, seek time is 0 775 05-16 14:53:49.621 1679-1679 A03d00/JSAPP com.sampl...cdemoasr I MS_LITE_LOG: AVPlayer state completed called. 776 05-16 14:53:49.646 1679-1679 A03d00/JSAPP com.sampl...cdemoasr I MS_LITE_LOG: AVPlayer state stopped called. 777 05-16 14:53:49.647 1679-1679 A03d00/JSAPP com.sampl...cdemoasr I MS_LITE_LOG: AVPlayer state idle called. 778 05-16 14:53:49.649 1679-1679 A03d00/JSAPP com.sampl...cdemoasr I MS_LITE_LOG: AVPlayer state released called. 779 05-16 14:53:53.282 1679-1679 A03d00/JSAPP com.sampl...cdemoasr I MS_LITE_LOG: *** Start MSLite Demo *** 780 05-16 14:53:53.926 1679-1679 A00000/[MSLiteNapi] com.sampl...cdemoasr I MS_LITE_LOG: Build MSLite model success. 781 05-16 14:53:54.260 1679-1679 A00000/[MSLiteNapi] com.sampl...cdemoasr D MS_LITE_LOG: Run MSLite model Predict success. 782 05-16 14:53:54.260 1679-1679 A00000/[MSLiteNapi] com.sampl...cdemoasr D MS_LITE_LOG: Get model outputs: 783 05-16 14:53:54.260 1679-1679 A00000/[MSLiteNapi] com.sampl...cdemoasr D MS_LITE_LOG: - Tensor 0 name is: n_layer_cross_k. 784 05-16 14:53:54.260 1679-1679 A00000/[MSLiteNapi] com.sampl...cdemoasr D MS_LITE_LOG: - Tensor 0 size is: 9216000. 785 05-16 14:53:54.260 1679-1679 A00000/[MSLiteNapi] com.sampl...cdemoasr D MS_LITE_LOG: - Tensor data is: 786 05-16 14:53:54.260 1679-1679 A00000/[MSLiteNapi] com.sampl...cdemoasr D MS_LITE_LOG: -1.14678 -2.30223 0.868679 0.284441 1.03233 -2.02062 0.688163 -0.732034 -1.10553 1.43459 0.083885 -0.116173 -0.772636 1.5466 -0.631993 -0.897929 -0.0501685 -1.62517 0.375988 -1.77772 -0.432178 787 05-16 14:53:54.260 1679-1679 A00000/[MSLiteNapi] com.sampl...cdemoasr D MS_LITE_LOG: - Tensor 1 name is: n_layer_cross_v. 788 05-16 14:53:54.260 1679-1679 A00000/[MSLiteNapi] com.sampl...cdemoasr D MS_LITE_LOG: - Tensor 1 size is: 9216000. 789 05-16 14:53:54.260 1679-1679 A00000/[MSLiteNapi] com.sampl...cdemoasr D MS_LITE_LOG: - Tensor data is: 790 05-16 14:53:54.260 1679-1679 A00000/[MSLiteNapi] com.sampl...cdemoasr D MS_LITE_LOG: 0.0876085 -0.560317 -0.652518 -0.116969 -0.182608 -9.40531e-05 0.186293 0.123206 0.0127445 0.0708352 -0.489624 -0.226322 -0.0686949 -0.0341293 -0.0719619 0.103588 0.398025 -0.444261 0.396124 -0.347295 0.00541205 791 05-16 14:53:54.430 1679-1679 A00000/[MSLiteNapi] com.sampl...cdemoasr I MS_LITE_LOG: Build MSLite model success. 792 05-16 14:53:54.462 1679-1679 A00000/[MSLiteNapi] com.sampl...cdemoasr D MS_LITE_LOG: Run MSLite model Predict success. 793 ...... 794 05-16 14:53:55.272 1679-1679 A00000/[MSLiteNapi] com.sampl...cdemoasr I MS_LITE_LOG: run decoder loop 16 ok! 795 token = 50257 796 05-16 14:53:55.307 1679-1679 A00000/[MSLiteNapi] com.sampl...cdemoasr I MS_LITE_LOG: result is -> 我認為跑步最重要的就是給我帶來了身體健康 797 05-16 14:53:55.334 1679-1679 A03d00/JSAPP com.sampl...cdemoasr I MS_LITE_LOG: output length = 20 ;value = 我認為跑步最重要的就是給我帶來了身體健康 798 05-16 14:53:55.334 1679-1679 A03d00/JSAPP com.sampl...cdemoasr I MS_LITE_LOG: *** Finished MSLite Demo *** 799 ``` 800 801 802### 效果示意 803 804在设备上,点击**播放示例音频**按钮,会播放本示例音频文件。点击**识别示例音频**按钮,设备屏幕显示本示例音频文件的中文内容。 805 806| 初始页面 | 点击识别示例音频按钮后 | 807| :-----------------------: | :-----------------------: | 808|  |  | 809 810 811## 相关实例 812 813针对使用MindSpore Lite进行语音识别应用的开发,有以下相关实例可供参考: 814 815- [基于Native接口的MindSpore Lite ASR应用开发(C/C++)(API14)](https://gitcode.com/openharmony/applications_app_samples/tree/master/code/AI/MindSporeLiteCDemoASR) 816 817