1 /** 2 * Copyright 2020 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_IR_DATASETOPS_BATCH_NODE_H_ 18 #define MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_IR_DATASETOPS_BATCH_NODE_H_ 19 20 #include <map> 21 #include <memory> 22 #include <string> 23 #include <utility> 24 #include <vector> 25 26 #include "minddata/dataset/engine/ir/datasetops/dataset_node.h" 27 #include "minddata/dataset/engine/opt/pass.h" 28 29 namespace mindspore { 30 namespace dataset { 31 32 class BatchNode : public DatasetNode { 33 public: 34 #ifdef ENABLE_PYTHON 35 /// \brief Constructor #1, for Python API to create a BatchNode 36 BatchNode(std::shared_ptr<DatasetNode> child, int32_t batch_size, bool drop_remainder, bool pad, 37 const std::vector<std::string> &in_col_names, const std::vector<std::string> &out_col_names, 38 const std::vector<std::string> &col_order, py::function batch_size_func, py::function batch_map_func, 39 std::map<std::string, std::pair<TensorShape, std::shared_ptr<Tensor>>> pad_map); 40 #endif 41 42 /// \brief Constructor #2 for C++ API to create a BatchNode 43 BatchNode(std::shared_ptr<DatasetNode> child, int32_t batch_size, bool drop_remainder); 44 45 /// \brief Destructor 46 ~BatchNode() = default; 47 48 /// \brief Node name getter 49 /// \return Name of the current node Name()50 std::string Name() const override { return kBatchNode; } 51 52 /// \brief Print the description 53 /// \param out - The output stream to write output to 54 void Print(std::ostream &out) const override; 55 56 /// \brief Copy the node to a new object 57 /// \return A shared pointer to the new copy 58 std::shared_ptr<DatasetNode> Copy() override; 59 60 /// \brief a base class override function to create the required runtime dataset op objects for this class 61 /// \param node_ops - A vector containing shared pointer to the Dataset Ops that this object will create 62 /// \return Status Status::OK() if build successfully 63 Status Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops) override; 64 65 /// \brief Parameters validation 66 /// \return Status Status::OK() if all the parameters are valid 67 Status ValidateParams() override; 68 69 /// \brief Base-class override for GetDatasetSize 70 /// \param[in] size_getter Shared pointer to DatasetSizeGetter 71 /// \param[in] estimate This is only supported by some of the ops and it's used to speed up the process of getting 72 /// dataset size at the expense of accuracy. 73 /// \param[out] dataset_size the size of the dataset 74 /// \return Status of the function 75 Status GetDatasetSize(const std::shared_ptr<DatasetSizeGetter> &size_getter, bool estimate, 76 int64_t *dataset_size) override; 77 78 /// \brief Base-class override for accepting IRNodePass visitor 79 /// \param[in] p The node to visit 80 /// \param[out] modified Indicator if the node was modified 81 /// \return Status of the node visit 82 Status Accept(IRNodePass *const p, bool *const modified) override; 83 84 /// \brief Base-class override for accepting IRNodePass visitor 85 /// \param[in] p The node to visit 86 /// \param[out] modified Indicator if the node was modified 87 /// \return Status of the node visit 88 Status AcceptAfter(IRNodePass *const p, bool *const modified) override; 89 90 /// \brief Getter functions BatchSize()91 int32_t BatchSize() const { return batch_size_; } DropRemainder()92 bool DropRemainder() const { return drop_remainder_; } 93 #ifdef ENABLE_PYTHON Pad()94 bool Pad() const { return pad_; } InColNames()95 const std::vector<std::string> &InColNames() const { return in_col_names_; } OutColNames()96 const std::vector<std::string> &OutColNames() const { return out_col_names_; } ColOrder()97 const std::vector<std::string> &ColOrder() const { return col_order_; } BatchSizeFunc()98 const py::function &BatchSizeFunc() const { return batch_size_func_; } BatchMapFunc()99 const py::function &BatchMapFunc() const { return batch_map_func_; } PadMap()100 const std::map<std::string, std::pair<TensorShape, std::shared_ptr<Tensor>>> &PadMap() const { return pad_map_; } 101 #endif 102 103 /// \brief Get the arguments of node 104 /// \param[out] out_json JSON string of all attributes 105 /// \return Status of the function 106 Status to_json(nlohmann::json *out_json) override; 107 108 /// \brief Function for read dataset operation from json 109 /// \param[in] json_obj The JSON object to be deserialized 110 /// \param[in] ds dataset node constructed 111 /// \param[out] result Deserialized dataset after the operation 112 /// \return Status The status code returned 113 static Status from_json(nlohmann::json json_obj, std::shared_ptr<DatasetNode> ds, 114 std::shared_ptr<DatasetNode> *result); 115 116 private: 117 int32_t batch_size_; 118 bool drop_remainder_; 119 bool pad_; 120 std::vector<std::string> in_col_names_; 121 std::vector<std::string> out_col_names_; 122 std::vector<std::string> col_order_; 123 #ifdef ENABLE_PYTHON 124 py::function batch_size_func_; 125 py::function batch_map_func_; 126 #endif 127 std::map<std::string, std::pair<TensorShape, std::shared_ptr<Tensor>>> pad_map_; 128 }; 129 130 } // namespace dataset 131 } // namespace mindspore 132 #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_IR_DATASETOPS_BATCH_NODE_H_ 133