android-14.0.0_r21/s

/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

#include "tensorflow/lite/delegates/gpu/gl/workgroups/ideal_workgroup_picker.h"

#include <map>
#include <vector>

#include "tensorflow/lite/delegates/gpu/common/gpu_info.h"
#include "tensorflow/lite/delegates/gpu/common/operations.h"
#include "tensorflow/lite/delegates/gpu/common/shape.h"
#include "tensorflow/lite/delegates/gpu/common/types.h"
#include "tensorflow/lite/delegates/gpu/gl/workgroups/calculator.h"

namespace tflite {
namespace gpu {
namespace gl {
namespace {

// This code employs the results the workgroup performance reseach
// (b/117291356).

// Describes the ideal convolution for the specific operation case
// Case here means specific "kernel + strides" combination for specific
// operations type, not sizes of input and output tensors, they can be any.
struct IdealByCase {
  bool ParamsAccepted(OperationType in_op_type, HW in_kernel,
                      HW in_strides) const {
    return operation_type == in_op_type && kernel == in_kernel &&
           strides == in_strides;
  }
  OperationType operation_type;
  HW kernel;
  HW strides;
  uint3 ideal_workgroup;
};

// Describes the ideal convolution for the type of operations. It means that
// any configuration of operation of this type will be working with top 10%
// performance with the particular GPU.
struct IdealByType {
  bool ParamsAccepted(OperationType in_op_type) const {
    return operation_type == in_op_type;
  }
  OperationType operation_type;
  uint3 ideal_workgroup;
};

// Describes ideal workgroups for the particular GPU model.
struct IdealWorkgroups {
  std::vector<IdealByType> by_type;
  std::vector<IdealByCase> by_case;
};

// List of Ideal workgroups which is received after the research mentioned
// above.

// Ideal workgroups for Adreno 630.
std::vector<IdealByType>* kIdealByTypeAdreno630Ptr =
    new std::vector<IdealByType>{
        {OperationType::CONVOLUTION_2D, uint3(4, 8, 4)},
        {OperationType::DEPTHWISE_CONVOLUTION, uint3(4, 4, 8)},
    };

std::vector<IdealByCase>* kIdealByCaseAdreno630Ptr =
    new std::vector<IdealByCase>{
        {OperationType::CONVOLUTION_2D, HW(1, 1), HW(1, 1), uint3(4, 8, 4)},
        {OperationType::CONVOLUTION_2D, HW(3, 3), HW(2, 2), uint3(8, 4, 4)},
        {OperationType::DEPTHWISE_CONVOLUTION, HW(1, 1), HW(1, 1),
         uint3(8, 4, 4)},
        {OperationType::DEPTHWISE_CONVOLUTION, HW(3, 3), HW(2, 2),
         uint3(4, 4, 4)},
    };

// Ideal workgroups for Adreno 540.
std::vector<IdealByType>* kIdealByTypeAdreno540Ptr =
    new std::vector<IdealByType>{
        {OperationType::CONVOLUTION_2D, uint3(8, 2, 2)},
        {OperationType::DEPTHWISE_CONVOLUTION, uint3(8, 8, 2)},
    };

std::vector<IdealByCase>* kIdealByCaseAdreno540Ptr =
    new std::vector<IdealByCase>{
        {OperationType::CONVOLUTION_2D, HW(1, 1), HW(1, 1), uint3(4, 2, 8)},
        {OperationType::CONVOLUTION_2D, HW(3, 3), HW(2, 2), uint3(8, 2, 8)},
        {OperationType::DEPTHWISE_CONVOLUTION, HW(1, 1), HW(1, 1),
         uint3(8, 4, 8)},
        {OperationType::DEPTHWISE_CONVOLUTION, HW(3, 3), HW(2, 2),
         uint3(4, 4, 8)},
    };

// Ideal workgroups for Adreno 510.
std::vector<IdealByType>* kIdealByTypeAdreno510Ptr =
    new std::vector<IdealByType>{
        {OperationType::CONVOLUTION_2D, uint3(8, 4, 4)},
        {OperationType::DEPTHWISE_CONVOLUTION, uint3(8, 4, 4)},
    };

std::vector<IdealByCase>* kIdealByCaseAdreno510Ptr =
    new std::vector<IdealByCase>{
        {OperationType::CONVOLUTION_2D, HW(1, 1), HW(1, 1), uint3(4, 2, 8)},
        {OperationType::CONVOLUTION_2D, HW(3, 3), HW(2, 2), uint3(8, 2, 8)},
        {OperationType::DEPTHWISE_CONVOLUTION, HW(1, 1), HW(1, 1),
         uint3(8, 4, 8)},
        {OperationType::DEPTHWISE_CONVOLUTION, HW(3, 3), HW(2, 2),
         uint3(4, 4, 8)},
    };

// Ideal workgroups for Adreno 509.
std::vector<IdealByType>* kIdealByTypeAdreno509Ptr =
    new std::vector<IdealByType>{
        {OperationType::CONVOLUTION_2D, uint3(8, 4, 8)},
        {OperationType::DEPTHWISE_CONVOLUTION, uint3(8, 8, 2)},
    };

// Ideal workgroups for Adreno 508, 506, 505, 418, 405
std::vector<IdealByType>* kIdealByTypeAdreno508Ptr =
    new std::vector<IdealByType>{
        {OperationType::CONVOLUTION_2D, uint3(8, 4, 8)},
        {OperationType::DEPTHWISE_CONVOLUTION, uint3(8, 4, 8)},
    };
std::vector<IdealByType>* kIdealByTypeAdreno506Ptr = kIdealByTypeAdreno508Ptr;
std::vector<IdealByType>* kIdealByTypeAdreno505Ptr = kIdealByTypeAdreno508Ptr;
std::vector<IdealByType>* kIdealByTypeAdreno418Ptr = kIdealByTypeAdreno508Ptr;
std::vector<IdealByType>* kIdealByTypeAdreno405Ptr = kIdealByTypeAdreno508Ptr;

// Put all ideal workgroups from the list together.
const std::map<AdrenoGpu, IdealWorkgroups>* kIdealAdrenoWorkgroupsInfoPtr =
    new std::map<AdrenoGpu, IdealWorkgroups>{
        {AdrenoGpu::kAdreno630,
         {*kIdealByTypeAdreno630Ptr, *kIdealByCaseAdreno630Ptr}},
        {AdrenoGpu::kAdreno540, {*kIdealByTypeAdreno540Ptr, {}}},
        {AdrenoGpu::kAdreno510,
         {*kIdealByTypeAdreno510Ptr, *kIdealByCaseAdreno510Ptr}},
        {AdrenoGpu::kAdreno509, {*kIdealByTypeAdreno509Ptr, {}}},
        {AdrenoGpu::kAdreno508, {*kIdealByTypeAdreno508Ptr, {}}},
        {AdrenoGpu::kAdreno506, {*kIdealByTypeAdreno506Ptr, {}}},
        {AdrenoGpu::kAdreno505, {*kIdealByTypeAdreno505Ptr, {}}},
        {AdrenoGpu::kAdreno418, {*kIdealByTypeAdreno418Ptr, {}}},
        {AdrenoGpu::kAdreno405, {*kIdealByTypeAdreno405Ptr, {}}},
    };

}  // namespace

uint3 GetIdealWorkgroupIfPossible(const GpuInfo& gpu_info,
                                  OperationType op_type, HW kernel, HW strides,
                                  uint3 default_wg, OHWI workload) {
  // Research showed that ideal workgroup approach doesn't work well with
  // convolutions, which have small amount of output channels or output
  // height/width dimensions
  if (workload.o < 32 || workload.h <= 5 || workload.w <= 5) return default_wg;

  if (!gpu_info.IsAdreno()) {
    return default_wg;
  }
  auto adreno_gpu_version = gpu_info.adreno_info.adreno_gpu;

  // If GPU was investigated
  if (!kIdealAdrenoWorkgroupsInfoPtr->count(adreno_gpu_version)) {
    return default_wg;
  }

  // Try to find the ideal workgroup by the specific operation case, cause they
  // are expected to be better tuned than default "by type" cases
  for (const auto& specific_case :
       kIdealAdrenoWorkgroupsInfoPtr->at(adreno_gpu_version).by_case) {
    if (specific_case.ParamsAccepted(op_type, kernel, strides)) {
      return specific_case.ideal_workgroup;
    }
  }

  // Try to find the ideal workgroup by the operation type
  for (const auto& default_case :
       kIdealAdrenoWorkgroupsInfoPtr->at(adreno_gpu_version).by_type) {
    if (default_case.ParamsAccepted(op_type)) {
      return default_case.ideal_workgroup;
    }
  }

  // If no ideal workgroup is found, use the default workgroup suggested by each
  // operation.
  return default_wg;
}

uint3 GetIdealWorkgroupIfPossible(const GpuInfo& gpu_info,
                                  OperationType op_type, HW kernel, HW strides,
                                  OHWI workload) {
  return GetIdealWorkgroupIfPossible(gpu_info, op_type, kernel, strides,
                                     kEmptyWorkgroupSize, workload);
}

}  // namespace gl
}  // namespace gpu
}  // namespace tflite