1 /** 2 * Copyright 2019 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef MINDSPORE_CCSRC_MINDDATA_MINDRECORD_INCLUDE_SHARD_PAGE_H_ 18 #define MINDSPORE_CCSRC_MINDDATA_MINDRECORD_INCLUDE_SHARD_PAGE_H_ 19 20 #include <iostream> 21 #include <memory> 22 #include <string> 23 #include <utility> 24 #include <vector> 25 26 #include "minddata/mindrecord/include/common/log_adapter.h" 27 #include "minddata/mindrecord/include/common/shard_utils.h" 28 #include "minddata/mindrecord/include/mindrecord_macro.h" 29 #include "pybind11/pybind11.h" 30 31 namespace mindspore { 32 namespace mindrecord { 33 const std::string kPageTypeRaw = "RAW_DATA"; 34 const std::string kPageTypeBlob = "BLOB_DATA"; 35 const std::string kPageTypeNewColumn = "NEW_COLUMN_DATA"; 36 37 class MINDRECORD_API Page { 38 public: Page(const int & page_id,const int & shard_id,const std::string & page_type,const int & page_type_id,const uint64_t & start_row_id,const uint64_t end_row_id,const std::vector<std::pair<int,uint64_t>> & row_group_ids,const uint64_t page_size)39 Page(const int &page_id, const int &shard_id, const std::string &page_type, const int &page_type_id, 40 const uint64_t &start_row_id, const uint64_t end_row_id, 41 const std::vector<std::pair<int, uint64_t>> &row_group_ids, const uint64_t page_size) 42 : page_id_(page_id), 43 shard_id_(shard_id), 44 page_type_(page_type), 45 page_type_id_(page_type_id), 46 start_row_id_(start_row_id), 47 end_row_id_(end_row_id), 48 row_group_ids_(row_group_ids), 49 page_size_(page_size) {} 50 51 ~Page() = default; 52 53 /// \brief get the page and its description 54 /// \return the json format of the page and its description 55 json GetPage() const; 56 GetPageID()57 int GetPageID() const { return page_id_; } 58 GetShardID()59 int GetShardID() const { return shard_id_; } 60 GetPageTypeID()61 int GetPageTypeID() const { return page_type_id_; } 62 GetPageType()63 std::string GetPageType() const { return page_type_; } 64 GetPageSize()65 uint64_t GetPageSize() const { return page_size_; } 66 GetStartRowID()67 uint64_t GetStartRowID() const { return start_row_id_; } 68 GetEndRowID()69 uint64_t GetEndRowID() const { return end_row_id_; } 70 SetEndRowID(const uint64_t & end_row_id)71 void SetEndRowID(const uint64_t &end_row_id) { end_row_id_ = end_row_id; } 72 SetPageSize(const uint64_t & page_size)73 void SetPageSize(const uint64_t &page_size) { page_size_ = page_size; } 74 GetLastRowGroupID()75 std::pair<int, uint64_t> GetLastRowGroupID() const { return row_group_ids_.back(); } 76 GetRowGroupIds()77 std::vector<std::pair<int, uint64_t>> GetRowGroupIds() const { return row_group_ids_; } 78 SetRowGroupIds(const std::vector<std::pair<int,uint64_t>> & last_row_group_ids)79 void SetRowGroupIds(const std::vector<std::pair<int, uint64_t>> &last_row_group_ids) { 80 row_group_ids_ = last_row_group_ids; 81 } 82 83 void DeleteLastGroupId(); 84 85 private: 86 int page_id_; 87 int shard_id_; 88 std::string page_type_; 89 int page_type_id_; 90 uint64_t start_row_id_; 91 uint64_t end_row_id_; 92 std::vector<std::pair<int, uint64_t>> row_group_ids_; 93 uint64_t page_size_; 94 // JSON page: { 95 // "page_id":X, 96 // "shard_id":X, 97 // "page_type":"XXX", (enum "raw_data", "blob_data", "new_column") 98 // "page_type_id":X, 99 // "start_row_id":X, 100 // "end_row_id":X, 101 // "row_group_ids":[{"id":X, "offset":X}], 102 // "page_size":X, 103 }; 104 } // namespace mindrecord 105 } // namespace mindspore 106 107 #endif // MINDSPORE_CCSRC_MINDDATA_MINDRECORD_INCLUDE_SHARD_PAGE_H_ 108