1 /** 2 * Copyright 2024 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_KERNELS_IMAGE_VIDEO_UTILS_H_ 17 #define MINDSPORE_CCSRC_MINDDATA_DATASET_KERNELS_IMAGE_VIDEO_UTILS_H_ 18 19 #include <map> 20 #include <memory> 21 #include <string> 22 #include <vector> 23 24 #include "minddata/dataset/core/tensor.h" 25 #include "minddata/dataset/core/tensor_row.h" 26 #include "minddata/dataset/util/status.h" 27 28 namespace mindspore { 29 namespace dataset { 30 /// \brief Decode the raw input video bytes. Supported video formats are AVI, H264, H265, MOV, MP4 and WMV. 31 /// \param input: CVTensor containing the not decoded video 1D bytes. 32 /// \param output: Decoded visual Tensor and audio Tensor. For visual tensor, the shape is <T,H,W,C>, the type is 33 /// DE_UINT8. Pixel order is RGB. For audio tensor, the shape is <C, L>. 34 Status DecodeVideo(const TensorRow &input, TensorRow *output); 35 36 /// \brief Read the video, audio, metadata from a video file. It supports AVI, H264, H265, MOV, MP4, WMV files. 37 /// \param[in] filename The path to the videoe file to be read. 38 /// \param[out] video_output The video frames of the video file. 39 /// \param[out] audio_output The audio frames of the video file. 40 /// \param[out] metadata_output The metadata contains video_fps, audio_fps. 41 /// \param[in] start_pts The start presentation timestamp of the video. 42 /// \param[in] end_pts The end presentation timestamp of the video. 43 /// \param[in] pts_unit The unit for the timestamps, can be one of ["pts", "sec"]. 44 /// \return The status code. 45 Status ReadVideo(const std::string &filename, std::shared_ptr<Tensor> *video_output, 46 std::shared_ptr<Tensor> *audio_output, std::map<std::string, std::string> *metadata_output, 47 float start_pts, float end_pts, const std::string &pts_unit); 48 49 /// \brief Read the timestamps and frame rate of a video file. It supports AVI, H264, H265, MOV, MP4, WMV files. 50 /// \param[in] filename The path to the video file to be read. 51 /// \param[out] pts_int64_vector The pts vector of the video file. 52 /// \param[out] video_fps The video frame rate of the video file. 53 /// \param[out] time_base The time base for the pts_int64_vector. 54 /// \param[in] pts_unit The unit for the timestamps, can be one of ["pts", "sec"]. 55 /// \return The status code. 56 Status ReadVideoTimestamps(const std::string &filename, std::vector<int64_t> *pts_int64_vector, float *video_fps, 57 float *time_base, const std::string &pts_unit); 58 } // namespace dataset 59 } // namespace mindspore 60 #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_KERNELS_IMAGE_VIDEO_UTILS_H_ 61