• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2019-2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_RANDOM_DATA_OP_
17 #define MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_RANDOM_DATA_OP_
18 
19 #include <atomic>
20 #include <memory>
21 #include <mutex>
22 #include <random>
23 #include <string>
24 #include <vector>
25 #include <utility>
26 #include "minddata/dataset/util/status.h"
27 #include "minddata/dataset/core/tensor.h"
28 #include "minddata/dataset/core/data_type.h"
29 #include "minddata/dataset/engine/data_schema.h"
30 #include "minddata/dataset/engine/datasetops/source/mappable_leaf_op.h"
31 #include "minddata/dataset/util/wait_post.h"
32 
33 namespace mindspore {
34 namespace dataset {
35 // The RandomDataOp is a leaf node storage operator that generates random data based
36 // on the schema specifications.  Typically, it's used for testing and demonstrating
37 // various dataset operator pipelines.  It is not "real" data to train with.
38 // The data that is random created is just random and repeated bytes, there is no
39 // "meaning" behind what these bytes are.
40 class RandomDataOp : public MappableLeafOp {
41  public:
42   // Some constants to provide limits to random generation.
43   static constexpr int32_t kMaxNumColumns = 4;
44   static constexpr int32_t kMaxRank = 4;
45   static constexpr int32_t kMaxDimValue = 32;
46   static constexpr int32_t kMaxTotalRows = 1024;
47 
48   /**
49    * Constructor for RandomDataOp
50    * @note Private constructor.  Must use builder to construct.
51    * @param num_workers - The number of workers
52    * @param op_connector_size - The size of the output connector
53    * @param data_schema - A user-provided schema
54    * @param total_rows - The total number of rows in the dataset
55    * @return Builder - The modified builder by reference
56    */
57   RandomDataOp(int32_t num_workers, int32_t op_connector_size, int64_t total_rows,
58                std::unique_ptr<DataSchema> data_schema);
59 
60  protected:
61   Status PrepareData() override;
62 
63  public:
64   /**
65    * Destructor
66    */
67   ~RandomDataOp() = default;
68 
69   /**
70    * A print method typically used for debugging
71    * @param out - The output stream to write output to
72    * @param show_all - A bool to control if you want to show all info or just a summary
73    */
74   void Print(std::ostream &out, bool show_all) const override;
75 
76   /**
77    * << Stream output operator overload
78    * @notes This allows you to write the debug print info using stream operators
79    * @param out - reference to the output stream being overloaded
80    * @param so - reference to the ShuffleOp to display
81    * @return - the output stream must be returned
82    */
83   friend std::ostream &operator<<(std::ostream &out, const RandomDataOp &op) {
84     op.Print(out, false);
85     return out;
86   }
87 
88   // Op name getter
89   // @return Name of the current Op
Name()90   std::string Name() const override { return "RandomDataOp"; }
91 
92  protected:
93   Status LoadTensorRow(row_id_type row_id, TensorRow *row) override;
94 
95  private:
96   /**
97    * Helper function to produce a default/random schema if one didn't exist
98    */
99   void GenerateSchema();
100 
101   /**
102    * A helper function to create random data for the row
103    * @param new_row - The output row to produce
104    * @return Status The status code returned
105    */
106   Status CreateRandomRow(TensorRow *new_row);
107 
108   /**
109    * A quick inline for producing a random number between (and including) min/max
110    * @param min - minimum number that can be generated
111    * @param max - maximum number that can be generated
112    * @return - The generated random number
113    */
GenRandomInt(int32_t min,int32_t max)114   inline int32_t GenRandomInt(int32_t min, int32_t max) {
115     std::uniform_int_distribution<int32_t> uniDist(min, max);
116     return uniDist(rand_gen_);
117   }
118 
119   // Private function for computing the assignment of the column name map.
120   // @return - Status
121   Status ComputeColMap() override;
122   int64_t total_rows_;
123   std::unique_ptr<DataSchema> data_schema_;
124   std::mt19937 rand_gen_;
125   std::vector<TensorRow> rows_;
126 };  // class RandomDataOp
127 }  // namespace dataset
128 }  // namespace mindspore
129 
130 #endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_RANDOM_DATA_OP_
131