1# Copyright 2022-2023 Huawei Technologies Co., Ltd 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================ 15"""quick_start_cloud_infer_parallel_python.""" 16 17import time 18from threading import Thread 19import numpy as np 20import mindspore_lite as mslite 21 22# Use case: serving inference. 23# Precondition 1: Download MindSpore Lite serving package or building MindSpore Lite serving package by 24# export MSLITE_ENABLE_SERVER_INFERENCE=on. 25# Precondition 2: Install wheel package of MindSpore Lite built by precondition 1. 26# The result can be find in the tutorial of runtime_parallel_python. 27# the number of threads of one worker. 28# WORKERS_NUM * THREAD_NUM should not exceed the number of cores of the machine. 29THREAD_NUM = 1 30# In parallel inference, the number of workers in one `ModelParallelRunner` in server. 31# If you prepare to compare the time difference between parallel inference and serial inference, 32# you can set WORKERS_NUM = 1 as serial inference. 33WORKERS_NUM = 3 34# Simulate 5 clients, and each client sends 2 inference tasks to the server at the same time. 35PARALLEL_NUM = 5 36TASK_NUM = 2 37 38 39def parallel_runner_predict(parallel_runner, parallel_id): 40 """ 41 One Runner with 3 workers, set model input, execute inference and get output. 42 43 Args: 44 parallel_runner (mindspore_lite.ModelParallelRunner): Actuator Supporting Parallel inference. 45 parallel_id (int): Simulate which client's task to process 46 """ 47 48 task_index = 0 49 while True: 50 if task_index == TASK_NUM: 51 break 52 task_index += 1 53 # Set model input 54 inputs = parallel_runner.get_inputs() 55 in_data = np.fromfile("./model/input.bin", dtype=np.float32) 56 inputs[0].set_data_from_numpy(in_data) 57 once_start_time = time.time() 58 # Execute inference 59 outputs = parallel_runner.predict(inputs) 60 once_end_time = time.time() 61 print("parallel id: ", parallel_id, " | task index: ", task_index, " | run once time: ", 62 once_end_time - once_start_time, " s") 63 # Get output 64 for output in outputs: 65 tensor_name = output.name.rstrip() 66 data_size = output.data_size 67 element_num = output.element_num 68 print("tensor name is:%s tensor size is:%s tensor elements num is:%s" % (tensor_name, 69 data_size, 70 element_num)) 71 data = output.get_data_to_numpy() 72 data = data.flatten() 73 print("output data is:", end=" ") 74 for j in range(5): 75 print(data[j], end=" ") 76 print("") 77 78 79# Init RunnerConfig and context, and add CPU device info 80context = mslite.Context() 81context.target = ["cpu"] 82context.cpu.thread_num = THREAD_NUM 83context.cpu.inter_op_parallel_num = THREAD_NUM 84context.parallel.workers_num = WORKERS_NUM 85# Build ModelParallelRunner from file 86model_parallel_runner = mslite.ModelParallelRunner() 87model_parallel_runner.build_from_file(model_path="./model/mobilenetv2.mindir", context=context) 88# The server creates 5 threads to store the inference tasks of 5 clients. 89threads = [] 90total_start_time = time.time() 91for i in range(PARALLEL_NUM): 92 threads.append(Thread(target=parallel_runner_predict, args=(model_parallel_runner, i,))) 93# Start threads to perform parallel inference. 94for th in threads: 95 th.start() 96for th in threads: 97 th.join() 98total_end_time = time.time() 99print("total run time: ", total_end_time - total_start_time, " s") 100