• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2019 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MINDSPORE_CCSRC_MINDDATA_MINDRECORD_INCLUDE_SHARD_PAGE_H_
18 #define MINDSPORE_CCSRC_MINDDATA_MINDRECORD_INCLUDE_SHARD_PAGE_H_
19 
20 #include <fstream>
21 #include <iostream>
22 #include <memory>
23 #include <string>
24 #include <utility>
25 #include <vector>
26 #include "minddata/mindrecord/include/common/shard_utils.h"
27 #include "pybind11/pybind11.h"
28 #include "utils/log_adapter.h"
29 
30 namespace mindspore {
31 namespace mindrecord {
32 const std::string kPageTypeRaw = "RAW_DATA";
33 const std::string kPageTypeBlob = "BLOB_DATA";
34 const std::string kPageTypeNewColumn = "NEW_COLUMN_DATA";
35 
36 class __attribute__((visibility("default"))) Page {
37  public:
Page(const int & page_id,const int & shard_id,const std::string & page_type,const int & page_type_id,const uint64_t & start_row_id,const uint64_t end_row_id,const std::vector<std::pair<int,uint64_t>> & row_group_ids,const uint64_t page_size)38   Page(const int &page_id, const int &shard_id, const std::string &page_type, const int &page_type_id,
39        const uint64_t &start_row_id, const uint64_t end_row_id,
40        const std::vector<std::pair<int, uint64_t>> &row_group_ids, const uint64_t page_size)
41       : page_id_(page_id),
42         shard_id_(shard_id),
43         page_type_(page_type),
44         page_type_id_(page_type_id),
45         start_row_id_(start_row_id),
46         end_row_id_(end_row_id),
47         row_group_ids_(row_group_ids),
48         page_size_(page_size) {}
49 
50   ~Page() = default;
51 
52   /// \brief get the page and its description
53   /// \return the json format of the page and its description
54   json GetPage() const;
55 
GetPageID()56   int GetPageID() const { return page_id_; }
57 
GetShardID()58   int GetShardID() const { return shard_id_; }
59 
GetPageTypeID()60   int GetPageTypeID() const { return page_type_id_; }
61 
GetPageType()62   std::string GetPageType() const { return page_type_; }
63 
GetPageSize()64   uint64_t GetPageSize() const { return page_size_; }
65 
GetStartRowID()66   uint64_t GetStartRowID() const { return start_row_id_; }
67 
GetEndRowID()68   uint64_t GetEndRowID() const { return end_row_id_; }
69 
SetEndRowID(const uint64_t & end_row_id)70   void SetEndRowID(const uint64_t &end_row_id) { end_row_id_ = end_row_id; }
71 
SetPageSize(const uint64_t & page_size)72   void SetPageSize(const uint64_t &page_size) { page_size_ = page_size; }
73 
GetLastRowGroupID()74   std::pair<int, uint64_t> GetLastRowGroupID() const { return row_group_ids_.back(); }
75 
GetRowGroupIds()76   std::vector<std::pair<int, uint64_t>> GetRowGroupIds() const { return row_group_ids_; }
77 
SetRowGroupIds(const std::vector<std::pair<int,uint64_t>> & last_row_group_ids)78   void SetRowGroupIds(const std::vector<std::pair<int, uint64_t>> &last_row_group_ids) {
79     row_group_ids_ = last_row_group_ids;
80   }
81 
82   void DeleteLastGroupId();
83 
84  private:
85   int page_id_;
86   int shard_id_;
87   std::string page_type_;
88   int page_type_id_;
89   uint64_t start_row_id_;
90   uint64_t end_row_id_;
91   std::vector<std::pair<int, uint64_t>> row_group_ids_;
92   uint64_t page_size_;
93   // JSON page: {
94   //            "page_id":X,
95   //            "shard_id":X,
96   //            "page_type":"XXX", (enum "raw_data", "blob_data", "new_column")
97   //            "page_type_id":X,
98   //            "start_row_id":X,
99   //            "end_row_id":X,
100   //            "row_group_ids":[{"id":X, "offset":X}],
101   //            "page_size":X,
102 };
103 }  // namespace mindrecord
104 }  // namespace mindspore
105 
106 #endif  // MINDSPORE_CCSRC_MINDDATA_MINDRECORD_INCLUDE_SHARD_PAGE_H_
107