• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2020-2021 Huawei Technologies Co., Ltd
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ============================================================================
15"""Generate the summary event which conform to proto format."""
16import io
17import platform
18import time
19
20import numpy as np
21from PIL import Image
22
23from mindspore import log as logger
24from mindspore import context
25from mindspore.communication.management import get_rank
26from mindspore.communication.management import GlobalComm
27
28from ..._checkparam import Validator
29from ..anf_ir_pb2 import DataType, ModelProto
30from ..summary_pb2 import Event
31
32# define the MindSpore image format
33MS_IMAGE_TENSOR_FORMAT = 'NCHW'
34# Set the Event mark
35EVENT_FILE_NAME_MARK = ".out.events.summary."
36# Set the init event of version and mark
37EVENT_FILE_INIT_VERSION_MARK = "MindSpore.Event:"
38EVENT_FILE_INIT_VERSION = 1
39
40F32_MIN, F32_MAX = np.finfo(np.float32).min, np.finfo(np.float32).max
41
42
43def get_event_file_name(prefix, suffix, time_second):
44    """
45    Create file name: file_prefix + EVENT_FILE_NAME_MARK + time(seconds) + "." + Hostname + file_suffix.
46
47    Args:
48        prefix (str): The prefix of file name.
49        suffix (str): The suffix of file name.
50        time_second (str): The time stamp of file name.
51
52    Returns:
53        String, the name of event log file.
54    """
55    Validator.check_str_by_regular(prefix)
56    Validator.check_str_by_regular(suffix)
57    file_name = ""
58    hostname = platform.node()
59
60    device_num = context.get_auto_parallel_context('device_num')
61    device_id = context.get_context('device_id')
62    if device_num > 1 or GlobalComm.WORLD_COMM_GROUP == 'nccl_world_group':
63        # Notice:
64        # In GPU distribute training scene, get_context('device_id') will not work,
65        # so we use get_rank instead of get_context.
66        device_id = get_rank()
67
68    file_name = f'{file_name}{EVENT_FILE_NAME_MARK}{time_second}.{device_id}.{hostname}'
69
70    if prefix is not None:
71        file_name = prefix + file_name
72
73    if suffix is not None:
74        file_name = file_name + suffix
75
76    return file_name
77
78
79def package_init_event():
80    """Package the summary init event."""
81    init_event = Event()
82    init_event.wall_time = time.time()
83    version = EVENT_FILE_INIT_VERSION_MARK + str(EVENT_FILE_INIT_VERSION)
84    init_event.version = version
85    return init_event
86
87
88def package_graph_event(data):
89    """
90    Package the summary graph event.
91
92    Args:
93        data (Bytes): Graph bytes string.
94
95    Returns:
96        Event, event log object.
97    """
98    graph_event = Event()
99    graph_event.wall_time = time.time()
100    modelp = ModelProto()
101    modelp.ParseFromString(data)
102    graph_event.graph_def.CopyFrom(modelp.graph)
103    return graph_event
104
105
106def package_summary_event(data_list, step, wall_time):
107    """
108    Package the summary to event protobuffer.
109
110    Args:
111        data_list (list): Summary data list.
112        step (Number): The recode step index.
113        wall_time (float): The wall time.
114
115    Returns:
116        Summary, the summary event.
117    """
118    # create the event of summary
119    summary_event = Event()
120    summary = summary_event.summary
121    summary_event.wall_time = wall_time
122    summary_event.step = int(step)
123
124    for value in data_list:
125        summary_type = value["_type"]
126        data = value["data"]
127        tag = value["name"]
128
129        logger.debug(f"Now process {summary_type} summary, tag = {tag}")
130
131        summary_value = summary.value.add()
132        summary_value.tag = tag
133        # get the summary type and parse the tag
134        if summary_type == 'Scalar':
135            if not _fill_scalar_summary(tag, data, summary_value):
136                del summary.value[-1]
137        elif summary_type == 'Tensor':
138            _fill_tensor_summary(tag, data, summary_value.tensor)
139        elif summary_type == 'Image':
140            if not _fill_image_summary(tag, data, summary_value.image, MS_IMAGE_TENSOR_FORMAT):
141                del summary.value[-1]
142        elif summary_type == 'Histogram':
143            _fill_histogram_summary(tag, data, summary_value.histogram)
144        else:
145            # The data is invalid ,jump the data
146            logger.error(f"Summary type({summary_type}) is error, tag = {tag}")
147            del summary.value[-1]
148
149    return summary_event
150
151
152def _nptype_to_prototype(np_value):
153    """
154    Transform the np type to proto type.
155
156    Args:
157        np_value (Type): Numpy data type.
158
159    Returns:
160        Type, proto data type.
161    """
162    np2pt_tbl = {
163        np.bool_: 'DT_BOOL',
164        np.int8: 'DT_INT8',
165        np.int16: 'DT_INT16',
166        np.int32: 'DT_INT32',
167        np.int64: 'DT_INT64',
168        np.uint8: 'DT_UINT8',
169        np.uint16: 'DT_UINT16',
170        np.uint32: 'DT_UINT32',
171        np.uint64: 'DT_UINT64',
172        np.float16: 'DT_FLOAT16',
173        np.float: 'DT_FLOAT64',
174        np.float32: 'DT_FLOAT32',
175        np.float64: 'DT_FLOAT64',
176        None: 'DT_UNDEFINED'
177    }
178    np_type = None
179    if np_value is None:
180        logger.error("The numpy value is none")
181    else:
182        np_type = np_value.dtype.type
183
184    proto = np2pt_tbl.get(np_type, None)
185    if proto is None:
186        raise TypeError("No match for proto data type.")
187
188    return proto
189
190
191def _fill_scalar_summary(tag: str, np_value, summary):
192    """
193    Package the scalar summary.
194
195    Args:
196        tag (str): Summary tag describe.
197        np_value (Object): Scalary object.
198
199    Returns:
200        Summary, return scalar summary content.
201    """
202    logger.debug(f"Set({tag}) the scalar summary value")
203    if np_value.size == 1:
204        # is scalar
205        summary.scalar_value = np_value.item()
206        return True
207    if np_value.size > 1:
208        logger.warning(
209            f"The tensor is not a single scalar, tag = {tag}, ndim = {np_value.ndim}, shape = {np_value.shape}")
210        summary.scalar_value = next(np_value.flat).item()
211        return True
212    logger.error(f"There no values inside tensor, tag = {tag}, size = {np_value.size}")
213    return False
214
215
216def _fill_tensor_summary(tag: str, np_value, summary_tensor):
217    """
218    Package the tensor summary.
219
220    Args:
221        tag (str): Summary tag describe.
222        np_value (Type): Summary data type.
223        summary_tensor (Tensor): The tensor of summary.
224
225    Returns:
226        Summary, return tensor summary content.
227    """
228    logger.debug(f"Set({tag}) the tensor summary value")
229    # get tensor dtype
230    tensor_dtype = _nptype_to_prototype(np_value)
231    summary_tensor.data_type = DataType.Value(tensor_dtype)
232
233    # get the value list
234    tensor_value_list = np_value.reshape(-1).tolist()
235    summary_tensor.float_data.extend(tensor_value_list)
236
237    # get the tensor dim
238    for v in np_value.shape:
239        summary_tensor.dims.append(v)
240
241    return summary_tensor
242
243
244def _calc_histogram_bins(count):
245    """
246    Calculates experience-based optimal bins number for histogram.
247
248    There should be enough number in each bin. So we calc bin numbers according to count. For very small count(1 -
249    10), we assign carefully chosen number. For large count, we tried to make sure there are 9-10 numbers in each
250    bucket on average. Too many bins will slow down performance, so we set max number of bins to 90.
251
252    Args:
253        count (int): Valid number count for the tensor.
254
255    Returns:
256        int, number of histogram bins.
257    """
258    max_bins, max_per_bin = 90, 10
259
260    if not count:
261        return 1
262    if count <= 5:
263        return 2
264    if count <= 10:
265        return 3
266    if count <= 880:
267        # note that math.ceil(881/10) + 1 equals 90
268        return count // max_per_bin + 1
269
270    return max_bins
271
272
273def _fill_histogram_summary(tag: str, np_value: np.ndarray, summary) -> None:
274    """
275    Package the histogram summary.
276
277    Args:
278        tag (str): Summary tag describe.
279        np_value (np.ndarray): Summary data.
280        summary (summary_pb2.Summary.Histogram): Summary histogram data.
281    """
282    logger.debug(f"Set({tag}) the histogram summary value")
283    # Default bucket for tensor with no valid data.
284    ma_value = np.ma.masked_invalid(np_value)
285    total, valid = np_value.size, ma_value.count()
286    invalids = []
287    for isfn in np.isnan, np.isposinf, np.isneginf:
288        if total - valid > sum(invalids):
289            invalids.append(np.count_nonzero(isfn(np_value)))
290        else:
291            invalids.append(0)
292
293    summary.count = total
294    summary.nan_count, summary.pos_inf_count, summary.neg_inf_count = invalids
295    if not valid:
296        logger.warning(f'There are no valid values in the ndarray(size={total}, shape={np_value.shape})')
297        # summary.{min, max, sum} are 0s by default, no need to explicitly set
298    else:
299        # BUG: max of a masked array with dtype np.float16 returns inf
300        # See numpy issue#15077
301        if issubclass(np_value.dtype.type, np.floating):
302            summary.min = ma_value.min(fill_value=np.PINF)
303            summary.max = ma_value.max(fill_value=np.NINF)
304            if summary.min < F32_MIN or summary.max > F32_MAX:
305                logger.warning(f'Values({summary.min}, {summary.max}) are too large, '
306                               f'you may encounter some undefined behaviours hereafter.')
307        else:
308            summary.min = ma_value.min()
309            summary.max = ma_value.max()
310        summary.sum = ma_value.sum(dtype=np.float64)
311        _fill_bucket(valid, np_value, summary)
312
313
314def _fill_bucket(valid, np_value, summary):
315    """
316    Fill the bucket.
317
318    Args:
319        valid (int): The count of valid data.
320        np_value (np.ndarray): Summary data.
321        summary (summary_pb2.Summary.Histogram): Summary histogram data.
322    """
323    bins = _calc_histogram_bins(valid)
324    first_edge, last_edge = summary.min, summary.max
325
326    if not first_edge < last_edge:
327        first_edge -= 0.5
328        last_edge += 0.5
329
330    bins = np.linspace(first_edge, last_edge, bins + 1, dtype=np_value.dtype)
331    hists, edges = np.histogram(np_value, bins=bins)
332
333    for hist, edge1, edge2 in zip(hists, edges, edges[1:]):
334        bucket = summary.buckets.add()
335        bucket.width = edge2 - edge1
336        bucket.count = hist
337        bucket.left = edge1
338
339
340def _fill_image_summary(tag: str, np_value, summary_image, input_format='NCHW'):
341    """
342    Package the image summary.
343
344    Args:
345        tag (str): Summary tag describe.
346        np_value (Type): Summary data type.
347        summary_image (Tensor): The tensor of summary.
348        input_format (str): Data sort order index. Default: 'NCHW'.
349
350    Returns:
351        Summary, return image summary content.
352    """
353    logger.debug(f"Set({tag}) the image summary value")
354    if np_value.ndim != 4 or np_value.shape[1] not in (1, 3):
355        logger.error(f"The value is not Image, tag = {tag}, ndim = {np_value.ndim}, shape={np_value.shape}")
356        return False
357
358    if np_value.ndim != len(input_format):
359        logger.error(
360            f"The tensor with dim({np_value.ndim}) can't convert the format({input_format}) because dim not same")
361        return False
362
363    if 0 in np_value.shape:
364        logger.error(
365            f"The tensor with shape({np_value.shape}) is not a valid image because the shape contains zero.")
366        return False
367
368    # convert the tensor format
369    tensor = _convert_image_format(np_value, input_format)
370
371    # convert the tensor dtype
372    # Do not assume that user passes in values in [0, 255], use data type to detect
373    scale_factor = 1
374    if tensor.dtype == np.uint8:
375        scale_factor = 1
376    elif np.max(tensor) <= 1 and np.min(tensor) >= 0:
377        scale_factor = 255
378    tensor = tensor.astype(np.float32)
379    tensor = (tensor * scale_factor).astype(np.uint8)
380
381    # create the image summary
382    height, width, channel, image_string = _make_image(tensor)
383    summary_image.height = height
384    summary_image.width = width
385    summary_image.colorspace = channel
386    summary_image.encoded_image = image_string
387    return True
388
389
390def _make_image(tensor, rescale=1):
391    """
392    Convert a numpy representation of an image to Image protobuf.
393
394    Args:
395        tensor (Tensor): The image data.
396        rescale (Number): The rescale value. Default: 1.
397
398    Returns:
399        (Number, Number, Number, Bytes), return the height, width, channel, image string .
400    """
401    height, width, channel = tensor.shape
402    scaled_height = int(height * rescale)
403    scaled_width = int(width * rescale)
404    image = Image.fromarray(tensor)
405    image = image.resize((scaled_width, scaled_height), Image.ANTIALIAS)
406    output = io.BytesIO()
407    image.save(output, format='PNG')
408    image_string = output.getvalue()
409    output.close()
410    return height, width, channel, image_string
411
412
413def _convert_image_format(np_tensor, input_format, out_format='HWC'):
414    """
415    Convert the image format.
416
417    Args:
418        np_tensor (Tensor): The image data.
419        input_format (str): Input data format.
420        out_format (str): The output data format. Default: 'HWC'.
421
422    Returns:
423        Tensor, return format image.
424    """
425    input_format = input_format.upper()
426
427    # convert the NCHW
428    if input_format != 'NCHW':
429        index = [input_format.find(c) for c in 'NCHW']
430        tensor_nchw = np_tensor.transpose(index)
431    else:
432        tensor_nchw = np_tensor
433
434    # make grid to expand N
435    tensor_chw = _make_canvas_for_imgs(tensor_nchw)
436
437    # convert to out format
438    out_index = ['CHW'.find(c) for c in out_format]
439    out_tensor = tensor_chw.transpose(out_index)
440    return out_tensor
441
442
443def _make_canvas_for_imgs(tensor, col_imgs=8):
444    """
445    Expand the N, show imgs on a canvas.
446
447    Args:
448        tensor (Tensor): The canvas value.
449        col_imgs (Number): The image colume number. Default: 8.
450
451    Returns:
452        Tensor, return canvas of image.
453    """
454    # expand the N1HW to N3HW
455    if tensor.shape[1] == 1:
456        tensor = np.concatenate([tensor, tensor, tensor], 1)
457
458    # expand the N
459    n = tensor.shape[0]
460    h = tensor.shape[2]
461    w = tensor.shape[3]
462    cols = min(n, col_imgs)
463    rows = int(np.ceil(float(n) / cols))
464
465    # create the canvas: expand the n
466    out_canvas = np.zeros((3, h * rows, w * cols))
467    i = 0
468    for y in range(rows):
469        for x in range(cols):
470            if i >= n:
471                break
472            out_canvas[:, y * h:(y + 1) * h, x * w:(x + 1) * w] = tensor[i]
473            i = i + 1
474    return out_canvas
475