1 /** 2 * Copyright 2019 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef MINDSPORE_CCSRC_MINDDATA_MINDRECORD_INCLUDE_SHARD_PAGE_H_ 18 #define MINDSPORE_CCSRC_MINDDATA_MINDRECORD_INCLUDE_SHARD_PAGE_H_ 19 20 #include <fstream> 21 #include <iostream> 22 #include <memory> 23 #include <string> 24 #include <utility> 25 #include <vector> 26 #include "minddata/mindrecord/include/common/shard_utils.h" 27 #include "pybind11/pybind11.h" 28 #include "utils/log_adapter.h" 29 30 namespace mindspore { 31 namespace mindrecord { 32 const std::string kPageTypeRaw = "RAW_DATA"; 33 const std::string kPageTypeBlob = "BLOB_DATA"; 34 const std::string kPageTypeNewColumn = "NEW_COLUMN_DATA"; 35 36 class __attribute__((visibility("default"))) Page { 37 public: Page(const int & page_id,const int & shard_id,const std::string & page_type,const int & page_type_id,const uint64_t & start_row_id,const uint64_t end_row_id,const std::vector<std::pair<int,uint64_t>> & row_group_ids,const uint64_t page_size)38 Page(const int &page_id, const int &shard_id, const std::string &page_type, const int &page_type_id, 39 const uint64_t &start_row_id, const uint64_t end_row_id, 40 const std::vector<std::pair<int, uint64_t>> &row_group_ids, const uint64_t page_size) 41 : page_id_(page_id), 42 shard_id_(shard_id), 43 page_type_(page_type), 44 page_type_id_(page_type_id), 45 start_row_id_(start_row_id), 46 end_row_id_(end_row_id), 47 row_group_ids_(row_group_ids), 48 page_size_(page_size) {} 49 50 ~Page() = default; 51 52 /// \brief get the page and its description 53 /// \return the json format of the page and its description 54 json GetPage() const; 55 GetPageID()56 int GetPageID() const { return page_id_; } 57 GetShardID()58 int GetShardID() const { return shard_id_; } 59 GetPageTypeID()60 int GetPageTypeID() const { return page_type_id_; } 61 GetPageType()62 std::string GetPageType() const { return page_type_; } 63 GetPageSize()64 uint64_t GetPageSize() const { return page_size_; } 65 GetStartRowID()66 uint64_t GetStartRowID() const { return start_row_id_; } 67 GetEndRowID()68 uint64_t GetEndRowID() const { return end_row_id_; } 69 SetEndRowID(const uint64_t & end_row_id)70 void SetEndRowID(const uint64_t &end_row_id) { end_row_id_ = end_row_id; } 71 SetPageSize(const uint64_t & page_size)72 void SetPageSize(const uint64_t &page_size) { page_size_ = page_size; } 73 GetLastRowGroupID()74 std::pair<int, uint64_t> GetLastRowGroupID() const { return row_group_ids_.back(); } 75 GetRowGroupIds()76 std::vector<std::pair<int, uint64_t>> GetRowGroupIds() const { return row_group_ids_; } 77 SetRowGroupIds(const std::vector<std::pair<int,uint64_t>> & last_row_group_ids)78 void SetRowGroupIds(const std::vector<std::pair<int, uint64_t>> &last_row_group_ids) { 79 row_group_ids_ = last_row_group_ids; 80 } 81 82 void DeleteLastGroupId(); 83 84 private: 85 int page_id_; 86 int shard_id_; 87 std::string page_type_; 88 int page_type_id_; 89 uint64_t start_row_id_; 90 uint64_t end_row_id_; 91 std::vector<std::pair<int, uint64_t>> row_group_ids_; 92 uint64_t page_size_; 93 // JSON page: { 94 // "page_id":X, 95 // "shard_id":X, 96 // "page_type":"XXX", (enum "raw_data", "blob_data", "new_column") 97 // "page_type_id":X, 98 // "start_row_id":X, 99 // "end_row_id":X, 100 // "row_group_ids":[{"id":X, "offset":X}], 101 // "page_size":X, 102 }; 103 } // namespace mindrecord 104 } // namespace mindspore 105 106 #endif // MINDSPORE_CCSRC_MINDDATA_MINDRECORD_INCLUDE_SHARD_PAGE_H_ 107