1 /** 2 * Copyright 2019-2021 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_RANDOM_DATA_OP_ 17 #define MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_RANDOM_DATA_OP_ 18 19 #include <atomic> 20 #include <memory> 21 #include <mutex> 22 #include <random> 23 #include <string> 24 #include <vector> 25 #include <utility> 26 #include "minddata/dataset/util/status.h" 27 #include "minddata/dataset/core/tensor.h" 28 #include "minddata/dataset/core/data_type.h" 29 #include "minddata/dataset/engine/data_schema.h" 30 #include "minddata/dataset/engine/datasetops/source/mappable_leaf_op.h" 31 #include "minddata/dataset/util/wait_post.h" 32 33 namespace mindspore { 34 namespace dataset { 35 // The RandomDataOp is a leaf node storage operator that generates random data based 36 // on the schema specifications. Typically, it's used for testing and demonstrating 37 // various dataset operator pipelines. It is not "real" data to train with. 38 // The data that is random created is just random and repeated bytes, there is no 39 // "meaning" behind what these bytes are. 40 class RandomDataOp : public MappableLeafOp { 41 public: 42 // Some constants to provide limits to random generation. 43 static constexpr int32_t kMaxNumColumns = 4; 44 static constexpr int32_t kMaxRank = 4; 45 static constexpr int32_t kMaxDimValue = 32; 46 static constexpr int32_t kMaxTotalRows = 1024; 47 48 /** 49 * Constructor for RandomDataOp 50 * @note Private constructor. Must use builder to construct. 51 * @param num_workers - The number of workers 52 * @param op_connector_size - The size of the output connector 53 * @param data_schema - A user-provided schema 54 * @param total_rows - The total number of rows in the dataset 55 * @return Builder - The modified builder by reference 56 */ 57 RandomDataOp(int32_t num_workers, int32_t op_connector_size, int64_t total_rows, 58 std::unique_ptr<DataSchema> data_schema); 59 60 protected: 61 Status PrepareData() override; 62 63 public: 64 /** 65 * Destructor 66 */ 67 ~RandomDataOp() = default; 68 69 /** 70 * A print method typically used for debugging 71 * @param out - The output stream to write output to 72 * @param show_all - A bool to control if you want to show all info or just a summary 73 */ 74 void Print(std::ostream &out, bool show_all) const override; 75 76 /** 77 * << Stream output operator overload 78 * @notes This allows you to write the debug print info using stream operators 79 * @param out - reference to the output stream being overloaded 80 * @param so - reference to the ShuffleOp to display 81 * @return - the output stream must be returned 82 */ 83 friend std::ostream &operator<<(std::ostream &out, const RandomDataOp &op) { 84 op.Print(out, false); 85 return out; 86 } 87 88 // Op name getter 89 // @return Name of the current Op Name()90 std::string Name() const override { return "RandomDataOp"; } 91 92 protected: 93 Status LoadTensorRow(row_id_type row_id, TensorRow *row) override; 94 95 private: 96 /** 97 * Helper function to produce a default/random schema if one didn't exist 98 */ 99 void GenerateSchema(); 100 101 /** 102 * A helper function to create random data for the row 103 * @param new_row - The output row to produce 104 * @return Status The status code returned 105 */ 106 Status CreateRandomRow(TensorRow *new_row); 107 108 /** 109 * A quick inline for producing a random number between (and including) min/max 110 * @param min - minimum number that can be generated 111 * @param max - maximum number that can be generated 112 * @return - The generated random number 113 */ GenRandomInt(int32_t min,int32_t max)114 inline int32_t GenRandomInt(int32_t min, int32_t max) { 115 std::uniform_int_distribution<int32_t> uniDist(min, max); 116 return uniDist(rand_gen_); 117 } 118 119 // Private function for computing the assignment of the column name map. 120 // @return - Status 121 Status ComputeColMap() override; 122 int64_t total_rows_; 123 std::unique_ptr<DataSchema> data_schema_; 124 std::mt19937 rand_gen_; 125 std::vector<TensorRow> rows_; 126 }; // class RandomDataOp 127 } // namespace dataset 128 } // namespace mindspore 129 130 #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_RANDOM_DATA_OP_ 131