• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2024 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_KERNELS_IMAGE_VIDEO_UTILS_H_
17 #define MINDSPORE_CCSRC_MINDDATA_DATASET_KERNELS_IMAGE_VIDEO_UTILS_H_
18 
19 #include <map>
20 #include <memory>
21 #include <string>
22 #include <vector>
23 
24 #include "minddata/dataset/core/tensor.h"
25 #include "minddata/dataset/core/tensor_row.h"
26 #include "minddata/dataset/util/status.h"
27 
28 namespace mindspore {
29 namespace dataset {
30 /// \brief Decode the raw input video bytes. Supported video formats are AVI, H264, H265, MOV, MP4 and WMV.
31 /// \param input: CVTensor containing the not decoded video 1D bytes.
32 /// \param output: Decoded visual Tensor and audio Tensor. For visual tensor, the shape is <T,H,W,C>, the type is
33 ///     DE_UINT8. Pixel order is RGB. For audio tensor, the shape is <C, L>.
34 Status DecodeVideo(const TensorRow &input, TensorRow *output);
35 
36 /// \brief Read the video, audio, metadata from a video file. It supports AVI, H264, H265, MOV, MP4, WMV files.
37 /// \param[in] filename The path to the videoe file to be read.
38 /// \param[out] video_output The video frames of the video file.
39 /// \param[out] audio_output The audio frames of the video file.
40 /// \param[out] metadata_output The metadata contains video_fps, audio_fps.
41 /// \param[in] start_pts The start presentation timestamp of the video.
42 /// \param[in] end_pts The end presentation timestamp of the video.
43 /// \param[in] pts_unit The unit for the timestamps, can be one of ["pts", "sec"].
44 /// \return The status code.
45 Status ReadVideo(const std::string &filename, std::shared_ptr<Tensor> *video_output,
46                  std::shared_ptr<Tensor> *audio_output, std::map<std::string, std::string> *metadata_output,
47                  float start_pts, float end_pts, const std::string &pts_unit);
48 
49 /// \brief Read the timestamps and frame rate of a video file. It supports AVI, H264, H265, MOV, MP4, WMV files.
50 /// \param[in] filename The path to the video file to be read.
51 /// \param[out] pts_int64_vector The pts vector of the video file.
52 /// \param[out] video_fps The video frame rate of the video file.
53 /// \param[out] time_base The time base for the pts_int64_vector.
54 /// \param[in] pts_unit The unit for the timestamps, can be one of ["pts", "sec"].
55 /// \return The status code.
56 Status ReadVideoTimestamps(const std::string &filename, std::vector<int64_t> *pts_int64_vector, float *video_fps,
57                            float *time_base, const std::string &pts_unit);
58 }  // namespace dataset
59 }  // namespace mindspore
60 #endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_KERNELS_IMAGE_VIDEO_UTILS_H_
61