1 /**
2 * Copyright 2021 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include "nnacl/fp32/log_softmax_fp32.h"
17 #include <math.h>
18 #include "nnacl/fp32/softmax_fp32.h"
19 #include "nnacl/fp32/exp_fp32.h"
20
LogSoftmaxLastAxis(const float * src,float * dst,float * exp_data,int batch,int channel)21 void LogSoftmaxLastAxis(const float *src, float *dst, float *exp_data, int batch, int channel) {
22 SoftmaxNorm(src, dst, batch, channel);
23 ExpFp32(dst, exp_data, batch * channel);
24 int cur_batch_offset = 0;
25 for (int i = 0; i < batch; i++, cur_batch_offset += channel) {
26 float sum = 0;
27 int j = 0;
28 #ifdef ENABLE_NEON
29 float32x4_t sum4 = vdupq_n_f32(0);
30 int count = (channel / C4NUM) * C4NUM;
31 for (; j < count; j += C4NUM) {
32 sum4 = vaddq_f32(sum4, vld1q_f32(exp_data + cur_batch_offset + j));
33 }
34 sum = sum4[0] + sum4[1] + sum4[2] + sum4[3];
35 #endif
36 for (; j < channel; j++) {
37 sum += exp_data[cur_batch_offset + j];
38 }
39 for (int k = 0; k < channel; k++) {
40 dst[cur_batch_offset + k] = dst[cur_batch_offset + k] - logf(sum);
41 }
42 }
43 }
44
45 // output = (input - reduce_max(input, axis)) - log(reduce_sum(exp(input - reduce_max(input, axis)), axis))
LogSoftmax(const float * input_ptr,float * output_ptr,float * sum_data,int32_t * input_shape,int n_dim,int axis)46 void LogSoftmax(const float *input_ptr, float *output_ptr, float *sum_data, int32_t *input_shape, int n_dim, int axis) {
47 int inner_size = 1;
48 int outter_size = 1;
49
50 for (int i = 0; i < axis; i++) {
51 outter_size *= input_shape[i];
52 }
53 for (int i = axis + 1; i < n_dim; i++) {
54 inner_size *= input_shape[i];
55 }
56 for (int i = 0; i < outter_size; i++) {
57 int outter_offset = i * input_shape[axis] * inner_size;
58 int sum_outter_offset = i * inner_size;
59 for (int k = 0; k < inner_size; k++) {
60 int inner_offset = outter_offset + k;
61 float max_data = input_ptr[inner_offset];
62 sum_data[k + sum_outter_offset] = 0;
63 for (int j = 0; j < input_shape[axis]; j++) {
64 int axis_offset = inner_offset + j * inner_size;
65 max_data = max_data > input_ptr[axis_offset] ? max_data : input_ptr[axis_offset];
66 }
67 for (int j = 0; j < input_shape[axis]; j++) {
68 int axis_offset = inner_offset + j * inner_size;
69 output_ptr[axis_offset] = input_ptr[axis_offset] - max_data;
70 sum_data[k + sum_outter_offset] += expf(output_ptr[axis_offset]);
71 }
72 }
73 }
74 for (int i = 0; i < outter_size; i++) {
75 int outter_offset = i * input_shape[axis] * inner_size;
76 int sum_outter_offset = i * inner_size;
77 for (int j = 0; j < input_shape[axis]; j++) {
78 int axis_offset = outter_offset + j * inner_size;
79 for (int k = 0; k < inner_size; k++) {
80 int inner_offset = axis_offset + k;
81 output_ptr[inner_offset] = output_ptr[inner_offset] - logf(sum_data[k + sum_outter_offset]);
82 }
83 }
84 }
85 }
86