1 /** 2 * Copyright 2021 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_SPEECH_COMMANDS_OP_H_ 17 #define MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_SPEECH_COMMANDS_OP_H_ 18 19 #include <map> 20 #include <memory> 21 #include <set> 22 #include <string> 23 #include <utility> 24 #include <vector> 25 26 #include "minddata/dataset/core/tensor.h" 27 #include "minddata/dataset/engine/data_schema.h" 28 #include "minddata/dataset/engine/datasetops/parallel_op.h" 29 #include "minddata/dataset/engine/datasetops/source/mappable_leaf_op.h" 30 #include "minddata/dataset/engine/datasetops/source/sampler/sampler.h" 31 #include "minddata/dataset/util/services.h" 32 #include "minddata/dataset/util/status.h" 33 #include "minddata/dataset/util/wait_post.h" 34 35 namespace mindspore { 36 namespace dataset { 37 class SpeechCommandsOp : public MappableLeafOp { 38 public: 39 /// Constructor. 40 /// \param[in] std::string - dataset_dir - directory of SpeechCommands dataset. 41 /// \param[in] std::string - usage - directory of SpeechCommands dataset. 42 /// \param[in] uint32_t - num_workers - Num of workers reading audios in parallel. 43 /// \param[in] uint32_t - queue_size - connector queue size. 44 /// \param[in] std::unique_ptr<DataSchema> - data_schema - data schema of SpeechCommands dataset. 45 /// \param[in] std::unique_ptr<Sampler> - sampler - sampler tells SpeechCommands what to read. 46 SpeechCommandsOp(const std::string &dataset_dir, const std::string &usage, int32_t num_workers, int32_t queue_size, 47 std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler); 48 49 /// Destructor. 50 ~SpeechCommandsOp() override = default; 51 52 /// A print method typically used for debugging. 53 /// \param[out] out - out stream. 54 /// \param[in] show_all - whether to show all information. 55 void Print(std::ostream &out, bool show_all) const override; 56 57 /// Function to count the number of samples in the SpeechCommands dataset. 58 /// \param[in] num_rows output arg that will hold the actual dataset size. 59 /// \return Status - The status code returned. 60 Status CountTotalRows(int64_t *num_rows); 61 62 /// Op name getter. 63 /// \return Name of the current Op. Name()64 std::string Name() const override { return "SpeechCommandsOp"; } 65 66 private: 67 /// Load a tensor row. 68 /// \param[in] row_id - row id. 69 /// \param[in] trow - waveform & sample_rate & label & speaker_id & utterance_number 70 /// read into this tensor row. 71 /// \return Status - The status code returned. 72 Status LoadTensorRow(row_id_type row_id, TensorRow *trow) override; 73 74 /// \param[in] pf_path - the real path of root directory. 75 /// \param[in] pf_usage - usage. 76 /// \return Status - The status code returned. 77 Status ParseFileList(const std::string &pf_path, const std::string &pf_usage); 78 79 /// Called first when function is called. 80 /// \return Status - The status code returned. 81 Status PrepareData(); 82 83 /// Walk all folders to read all ".wav" files. 84 /// \param[in] walk_path - real path to traverse. 85 /// \return Status - The status code returned. 86 Status WalkAllFiles(const std::string &walk_path); 87 88 /// Get detail info of wave filename by regex. 89 /// \param[in] file_path - wave file path. 90 /// \param[out] label - label. 91 /// \param[out] speaker_id - speaker id. 92 /// \param[out] utterance_number - utterance number. 93 /// \return Status - The status code returned. 94 Status GetFileInfo(const std::string &file_path, std::string *label, std::string *speaker_id, 95 int32_t *utterance_number); 96 97 // Private function for computing the assignment of the column name map. 98 /// \return Status - The status code returned. 99 Status ComputeColMap() override; 100 101 std::string dataset_dir_; 102 std::string usage_; // can only be "test", "train", "valid" or "all". 103 std::unique_ptr<DataSchema> data_schema_; 104 105 std::set<std::string> all_wave_files; // all wave files in dataset_dir. 106 std::set<std::string> loaded_names; // loaded file names from txt files. 107 std::vector<std::string> selected_files_vec; // vector of filenames for sequential loading. 108 109 std::mutex mux_; 110 }; 111 } // namespace dataset 112 } // namespace mindspore 113 #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_SPEECH_COMMANDS_OP_H_ 114