• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_SPEECH_COMMANDS_OP_H_
17 #define MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_SPEECH_COMMANDS_OP_H_
18 
19 #include <map>
20 #include <memory>
21 #include <set>
22 #include <string>
23 #include <utility>
24 #include <vector>
25 
26 #include "minddata/dataset/core/tensor.h"
27 #include "minddata/dataset/engine/data_schema.h"
28 #include "minddata/dataset/engine/datasetops/parallel_op.h"
29 #include "minddata/dataset/engine/datasetops/source/mappable_leaf_op.h"
30 #include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
31 #include "minddata/dataset/util/services.h"
32 #include "minddata/dataset/util/status.h"
33 #include "minddata/dataset/util/wait_post.h"
34 
35 namespace mindspore {
36 namespace dataset {
37 class SpeechCommandsOp : public MappableLeafOp {
38  public:
39   /// Constructor.
40   /// \param[in] std::string - dataset_dir - directory of SpeechCommands dataset.
41   /// \param[in] std::string - usage - directory of SpeechCommands dataset.
42   /// \param[in] uint32_t - num_workers - Num of workers reading audios in parallel.
43   /// \param[in] uint32_t - queue_size - connector queue size.
44   /// \param[in] std::unique_ptr<DataSchema> - data_schema - data schema of SpeechCommands dataset.
45   /// \param[in] std::unique_ptr<Sampler> - sampler - sampler tells SpeechCommands what to read.
46   SpeechCommandsOp(const std::string &dataset_dir, const std::string &usage, int32_t num_workers, int32_t queue_size,
47                    std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler);
48 
49   /// Destructor.
50   ~SpeechCommandsOp() override = default;
51 
52   /// A print method typically used for debugging.
53   /// \param[out] out - out stream.
54   /// \param[in] show_all - whether to show all information.
55   void Print(std::ostream &out, bool show_all) const override;
56 
57   /// Function to count the number of samples in the SpeechCommands dataset.
58   /// \param[in] num_rows output arg that will hold the actual dataset size.
59   /// \return Status - The status code returned.
60   Status CountTotalRows(int64_t *num_rows);
61 
62   /// Op name getter.
63   /// \return Name of the current Op.
Name()64   std::string Name() const override { return "SpeechCommandsOp"; }
65 
66  private:
67   /// Load a tensor row.
68   /// \param[in] row_id - row id.
69   /// \param[in] trow - waveform & sample_rate & label & speaker_id & utterance_number
70   ///     read into this tensor row.
71   /// \return Status - The status code returned.
72   Status LoadTensorRow(row_id_type row_id, TensorRow *trow) override;
73 
74   /// \param[in] pf_path - the real path of root directory.
75   /// \param[in] pf_usage - usage.
76   /// \return Status - The status code returned.
77   Status ParseFileList(const std::string &pf_path, const std::string &pf_usage);
78 
79   /// Called first when function is called.
80   /// \return Status - The status code returned.
81   Status PrepareData();
82 
83   /// Walk all folders to read all ".wav" files.
84   /// \param[in] walk_path - real path to traverse.
85   /// \return Status - The status code returned.
86   Status WalkAllFiles(const std::string &walk_path);
87 
88   /// Get detail info of wave filename by regex.
89   /// \param[in] file_path - wave file path.
90   /// \param[out] label - label.
91   /// \param[out] speaker_id - speaker id.
92   /// \param[out] utterance_number - utterance number.
93   /// \return Status - The status code returned.
94   Status GetFileInfo(const std::string &file_path, std::string *label, std::string *speaker_id,
95                      int32_t *utterance_number);
96 
97   // Private function for computing the assignment of the column name map.
98   /// \return Status - The status code returned.
99   Status ComputeColMap() override;
100 
101   std::string dataset_dir_;
102   std::string usage_;  // can only be "test", "train", "valid" or "all".
103   std::unique_ptr<DataSchema> data_schema_;
104 
105   std::set<std::string> all_wave_files;         // all wave files in dataset_dir.
106   std::set<std::string> loaded_names;           // loaded file names from txt files.
107   std::vector<std::string> selected_files_vec;  // vector of filenames for sequential loading.
108 
109   std::mutex mux_;
110 };
111 }  // namespace dataset
112 }  // namespace mindspore
113 #endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_SPEECH_COMMANDS_OP_H_
114