1 /** 2 * Copyright 2021-2023 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_VISION_ASCEND_H_ 18 #define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_VISION_ASCEND_H_ 19 20 #include <map> 21 #include <memory> 22 #include <string> 23 #include <utility> 24 #include <vector> 25 26 #include "include/api/status.h" 27 #include "include/dataset/constants.h" 28 #include "include/dataset/transforms.h" 29 30 namespace mindspore { 31 namespace dataset { 32 // Transform operations for performing computer vision. 33 namespace vision { 34 /* ##################################### API class ###########################################*/ 35 36 /// \brief Decode and resize JPEG image using the hardware algorithm of 37 /// Ascend series chip DVPP module. 38 class DATASET_API DvppDecodeResizeJpeg final : public TensorTransform { 39 public: 40 /// \brief Constructor. 41 /// \param[in] resize Parameter vector of two integers for each dimension, with respect to H,W order. 42 /// \par Example 43 /// \code 44 /// /* Define operations */ 45 /// auto dvpp_op = vision::DvppDecodeResizeJpeg({255, 255}); 46 /// 47 /// /* dataset is an instance of Dataset object */ 48 /// dataset = dataset->Map({dvpp_op}, // operations 49 /// {"image"}); // input columns 50 /// \endcode 51 explicit DvppDecodeResizeJpeg(const std::vector<uint32_t> &resize); 52 53 /// \brief Destructor. 54 ~DvppDecodeResizeJpeg() override = default; 55 56 protected: 57 /// \brief The function to convert a TensorTransform object into a TensorOperation object. 58 /// \return Shared pointer to TensorOperation object. 59 std::shared_ptr<TensorOperation> Parse() override; 60 61 std::shared_ptr<TensorOperation> Parse(const MapTargetDevice &env) override; 62 63 private: 64 struct Data; 65 std::shared_ptr<Data> data_; 66 }; 67 68 /// \brief Decode, resize and crop JPEG image using the hardware algorithm of 69 /// Ascend series chip DVPP module. 70 class DATASET_API DvppDecodeResizeCropJpeg final : public TensorTransform { 71 public: 72 /// \brief Constructor. 73 /// \param[in] crop Parameter vector of two integers for each dimension after final crop, with respect to H,W order. 74 /// \param[in] resize Parameter vector of two integers for each dimension after resize, with respect to H,W order. 75 /// \par Example 76 /// \code 77 /// /* Define operations */ 78 /// auto dvpp_op = vision::DvppDecodeResizeCropJpeg({50, 50}, {100, 100}); 79 /// 80 /// /* dataset is an instance of Dataset object */ 81 /// dataset = dataset->Map({dvpp_op}, // operations 82 /// {"image"}); // input columns 83 /// \endcode 84 DvppDecodeResizeCropJpeg(const std::vector<uint32_t> &crop, const std::vector<uint32_t> &resize); 85 86 /// \brief Destructor. 87 ~DvppDecodeResizeCropJpeg() override = default; 88 89 protected: 90 /// \brief The function to convert a TensorTransform object into a TensorOperation object. 91 /// \return Shared pointer to TensorOperation object. 92 std::shared_ptr<TensorOperation> Parse() override; 93 94 std::shared_ptr<TensorOperation> Parse(const MapTargetDevice &env) override; 95 96 private: 97 struct Data; 98 std::shared_ptr<Data> data_; 99 }; 100 101 /// \brief Decode H264/H265 video using the hardware algorithm of 102 /// DVPP module on Ascend series chip. 103 class DATASET_API DvppDecodeVideo final : public TensorTransform { 104 public: 105 /// \brief Constructor. 106 /// \param[in] size Parameter vector of two integers for each dimension of input video frames, with respect to H,W 107 /// order. 108 /// \param[in] type An enum for the coding protocol of video. 109 /// - VdecStreamFormat::kH265MainLevel, video coding protocol is H265-main level. 110 /// - VdecStreamFormat::kH264BaselineLevel, video coding protocol is H264-baseline level. 111 /// - VdecStreamFormat::kH264MainLevel, video coding protocol is H264-main level. 112 /// - VdecStreamFormat::kH264HighLevel, video coding protocol is H264-high level. 113 /// \param[in] out_format An enum for the format of output image (default=VdecOutputFormat::kYUV_SEMIPLANAR_420). 114 /// - VdecOutputFormat::kYUV_SEMIPLANAR_420, format of output image is YUV420SP NV12 8bit. 115 /// - VdecOutputFormat::kYVU_SEMIPLANAR_420, format of output image is YUV420SP NV21 8bit. 116 /// \param[in] output The output path of the decoded images corresponds to video frames. 117 /// \par Example 118 /// \code 119 /// namespace ds = mindspore::dataset; 120 /// 121 /// /* Define operations */ 122 /// std::shared_ptr<ds::TensorTransform> dvpp_decode(new ds::vision::DvppDecodeVideo({1080, 1920}, 123 /// ds::VdecStreamFormat::kH265MainLevel)); 124 /// 125 /// /* define preprocessor */ 126 /// ds::Execute preprocessor({dvpp_decode}, ds::MapTargetDevice::kCpu, 0); 127 /// 128 /// \endcode 129 130 DvppDecodeVideo(const std::vector<uint32_t> &size, VdecStreamFormat type, 131 VdecOutputFormat out_format = VdecOutputFormat::kYuvSemiplanar420, 132 const std::string &output = "./output") DvppDecodeVideo(size,type,out_format,StringToChar (output))133 : DvppDecodeVideo(size, type, out_format, StringToChar(output)) {} 134 135 /// \brief Constructor. 136 /// \param[in] size Parameter vector of two integers for each dimension of input video frames, with respect to H,W 137 /// order. 138 /// \param[in] type An enum for the coding protocol of video. 139 /// - VdecStreamFormat::kH265MainLevel, video coding protocol is H265-main level. 140 /// - VdecStreamFormat::kH264BaselineLevel, video coding protocol is H264-baseline level. 141 /// - VdecStreamFormat::kH264MainLevel, video coding protocol is H264-main level. 142 /// - VdecStreamFormat::kH264HighLevel, video coding protocol is H264-high level. 143 /// \param[in] output The output path of the decoded images corresponds to video frames. 144 /// \par Example 145 /// \code 146 /// namespace ds = mindspore::dataset; 147 /// 148 /// /* Define operations */ 149 /// std::shared_ptr<ds::TensorTransform> dvpp_decode(new ds::vision::DvppDecodeVideo({1080, 1920})); 150 /// 151 /// /* define preprocessor */ 152 /// ds::Execute preprocessor({dvpp_decode}, ds::MapTargetDevice::kCpu, 0); 153 /// 154 /// \endcode 155 156 DvppDecodeVideo(const std::vector<uint32_t> &size, VdecStreamFormat type, const std::string &output = "./output") DvppDecodeVideo(size,type,VdecOutputFormat::kYuvSemiplanar420,StringToChar (output))157 : DvppDecodeVideo(size, type, VdecOutputFormat::kYuvSemiplanar420, StringToChar(output)) {} 158 159 DvppDecodeVideo(const std::vector<uint32_t> &size, VdecStreamFormat type, VdecOutputFormat out_format, 160 const std::vector<char> &output); 161 162 /// \brief Destructor. 163 ~DvppDecodeVideo() override = default; 164 165 protected: 166 /// \brief The function to convert a TensorTransform object into a TensorOperation object. 167 /// \return Shared pointer to TensorOperation object. 168 std::shared_ptr<TensorOperation> Parse() override; 169 170 std::shared_ptr<TensorOperation> Parse(const MapTargetDevice &env) override; 171 172 private: 173 struct Data; 174 std::shared_ptr<Data> data_; 175 }; 176 177 /// \brief Decode PNG image using the hardware algorithm of 178 /// Ascend series chip DVPP module. 179 class DATASET_API DvppDecodePng final : public TensorTransform { 180 public: 181 /// \brief Constructor. 182 /// \par Example 183 /// \code 184 /// /* Define operations */ 185 /// auto dvpp_op = vision::DvppDecodePng(); 186 /// 187 /// /* dataset is an instance of Dataset object */ 188 /// dataset = dataset->Map({dvpp_op}, // operations 189 /// {"image"}); // input columns 190 /// \endcode 191 DvppDecodePng(); 192 193 /// \brief Destructor. 194 ~DvppDecodePng() override = default; 195 196 protected: 197 /// \brief The function to convert a TensorTransform object into a TensorOperation object. 198 /// \return Shared pointer to TensorOperation object. 199 std::shared_ptr<TensorOperation> Parse() override; 200 201 std::shared_ptr<TensorOperation> Parse(const MapTargetDevice &env) override; 202 }; 203 } // namespace vision 204 } // namespace dataset 205 } // namespace mindspore 206 #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_VISION_ASCEND_H_ 207