1 /* 2 * Copyright (c) 2025 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16 #ifndef DISTRIBUTED_RDB_KNOWLEDGE_TYPES_H 17 #define DISTRIBUTED_RDB_KNOWLEDGE_TYPES_H 18 19 #include <string> 20 #include <vector> 21 #include <unordered_map> 22 #include "rdb_store_config.h" 23 24 namespace OHOS::DistributedRdb { 25 26 constexpr int DEFAULT_CHUNK_SIZE = 3072; 27 constexpr int DEFAULT_SEGMENT_SIZE = 300; 28 constexpr double DEFAULT_OVERLAP_RATIO = 0.1; 29 constexpr int DEFAULT_TEXT_EMBEDDING_MAX_CNT = 50; 30 constexpr int DEFAULT_IMAGE_EMBEDDING_MAX_CNT = 10; 31 constexpr int DEFAULT_PARSE_FILE_MAX_CNT = 10; 32 33 struct RdbKnowledgeParser { 34 std::string type; 35 std::string path; 36 }; 37 38 struct RdbKnowledgeField { 39 std::string columnName; 40 std::vector<std::string> type; 41 std::vector<RdbKnowledgeParser> parser; 42 std::string description; 43 }; 44 45 struct RdbKnowledgeTable { 46 std::string tableName; 47 std::string tokenizer; 48 std::vector<std::string> referenceFields; 49 std::vector<RdbKnowledgeField> knowledgeFields; 50 std::unordered_map<std::string, std::vector<std::string>> pipelineHandlers; 51 }; 52 53 struct RdbKnowledgeProcess { 54 struct { 55 std::string modelVersion; 56 } embeddingModelCfgs; 57 struct { 58 int chunkSize{DEFAULT_CHUNK_SIZE}; 59 int segmentSize{DEFAULT_SEGMENT_SIZE}; 60 double overlapRatio{DEFAULT_OVERLAP_RATIO}; 61 } chunkSplitter; 62 struct { 63 int textEmbeddingMaxCnt{DEFAULT_TEXT_EMBEDDING_MAX_CNT}; 64 int imageEmbeddingMaxCnt{DEFAULT_IMAGE_EMBEDDING_MAX_CNT}; 65 int parseFileMaxCnt{DEFAULT_PARSE_FILE_MAX_CNT}; 66 } perRecordLimit; 67 }; 68 69 struct RdbKnowledgeSchema { 70 int64_t version = 0; 71 std::string dbName; 72 std::vector<RdbKnowledgeTable> tables; 73 RdbKnowledgeProcess knowledgeProcess; 74 }; 75 76 class API_EXPORT IKnowledgeSchemaManager { 77 public: 78 API_EXPORT virtual ~IKnowledgeSchemaManager() = default; 79 80 /** 81 * @brief Init with database config and schema. 82 */ 83 API_EXPORT virtual void Init(const NativeRdb::RdbStoreConfig &config, 84 const DistributedRdb::RdbKnowledgeSchema &schema) = 0; 85 86 /** 87 * @brief Start build knowledge data task. 88 */ 89 API_EXPORT virtual void StartTask(const std::string &dbName) = 0; 90 91 /** 92 * @brief Stop build knowledge data task. 93 */ 94 API_EXPORT virtual void StopTask(const std::string &dbName) = 0; 95 96 /** 97 * @brief Get rdb knowledge schema. 98 */ 99 API_EXPORT virtual std::shared_ptr<RdbKnowledgeSchema> GetRdbKnowledgeSchema(const std::string &dbName) = 0; 100 }; 101 } 102 #endif // DISTRIBUTED_RDB_KNOWLEDGE_TYPES_H