• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2022-2023 Huawei Technologies Co., Ltd
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ============================================================================
15"""quick_start_cloud_infer_parallel_python."""
16
17import time
18from threading import Thread
19import numpy as np
20import mindspore_lite as mslite
21
22# Use case: serving inference.
23# Precondition 1: Download MindSpore Lite serving package or building MindSpore Lite serving package by
24#                 export MSLITE_ENABLE_SERVER_INFERENCE=on.
25# Precondition 2: Install wheel package of MindSpore Lite built by precondition 1.
26# The result can be find in the tutorial of runtime_parallel_python.
27# the number of threads of one worker.
28# WORKERS_NUM * THREAD_NUM should not exceed the number of cores of the machine.
29THREAD_NUM = 1
30# In parallel inference, the number of workers in one `ModelParallelRunner` in server.
31# If you prepare to compare the time difference between parallel inference and serial inference,
32# you can set WORKERS_NUM = 1 as serial inference.
33WORKERS_NUM = 3
34# Simulate 5 clients, and each client sends 2 inference tasks to the server at the same time.
35PARALLEL_NUM = 5
36TASK_NUM = 2
37
38
39def parallel_runner_predict(parallel_runner, parallel_id):
40    """
41    One Runner with 3 workers, set model input, execute inference and get output.
42
43    Args:
44        parallel_runner (mindspore_lite.ModelParallelRunner): Actuator Supporting Parallel inference.
45        parallel_id (int): Simulate which client's task to process
46    """
47
48    task_index = 0
49    while True:
50        if task_index == TASK_NUM:
51            break
52        task_index += 1
53        # Set model input
54        inputs = parallel_runner.get_inputs()
55        in_data = np.fromfile("./model/input.bin", dtype=np.float32)
56        inputs[0].set_data_from_numpy(in_data)
57        once_start_time = time.time()
58        # Execute inference
59        outputs = parallel_runner.predict(inputs)
60        once_end_time = time.time()
61        print("parallel id: ", parallel_id, " | task index: ", task_index, " | run once time: ",
62              once_end_time - once_start_time, " s")
63        # Get output
64        for output in outputs:
65            tensor_name = output.name.rstrip()
66            data_size = output.data_size
67            element_num = output.element_num
68            print("tensor name is:%s tensor size is:%s tensor elements num is:%s" % (tensor_name,
69                                                                                     data_size,
70                                                                                     element_num))
71            data = output.get_data_to_numpy()
72            data = data.flatten()
73            print("output data is:", end=" ")
74            for j in range(5):
75                print(data[j], end=" ")
76            print("")
77
78
79# Init RunnerConfig and context, and add CPU device info
80context = mslite.Context()
81context.target = ["cpu"]
82context.cpu.thread_num = THREAD_NUM
83context.cpu.inter_op_parallel_num = THREAD_NUM
84context.parallel.workers_num = WORKERS_NUM
85# Build ModelParallelRunner from file
86model_parallel_runner = mslite.ModelParallelRunner()
87model_parallel_runner.build_from_file(model_path="./model/mobilenetv2.mindir", context=context)
88# The server creates 5 threads to store the inference tasks of 5 clients.
89threads = []
90total_start_time = time.time()
91for i in range(PARALLEL_NUM):
92    threads.append(Thread(target=parallel_runner_predict, args=(model_parallel_runner, i,)))
93# Start threads to perform parallel inference.
94for th in threads:
95    th.start()
96for th in threads:
97    th.join()
98total_end_time = time.time()
99print("total run time: ", total_end_time - total_start_time, " s")
100