1 // Copyright (C) 2022 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef ICING_INDEX_DATA_INDEXING_HANDLER_H_ 16 #define ICING_INDEX_DATA_INDEXING_HANDLER_H_ 17 18 #include "icing/text_classifier/lib3/utils/base/status.h" 19 #include "icing/proto/logging.pb.h" 20 #include "icing/store/document-id.h" 21 #include "icing/util/clock.h" 22 #include "icing/util/tokenized-document.h" 23 24 namespace icing { 25 namespace lib { 26 27 // Parent class for indexing different types of data in TokenizedDocument. 28 class DataIndexingHandler { 29 public: DataIndexingHandler(const Clock * clock)30 explicit DataIndexingHandler(const Clock* clock) : clock_(*clock) {} 31 32 virtual ~DataIndexingHandler() = default; 33 34 // Handles the indexing process: add data into the specific type index (e.g. 35 // term index, integer index, qualified id type joinable index) for all 36 // contents in the corresponding type of data in tokenized_document. 37 // For example, IntegerSectionIndexingHandler::Handle should add data into 38 // integer index for all contents in tokenized_document.integer_sections. 39 // 40 // Also it should handle last added DocumentId properly (based on 41 // recovery_mode_) to avoid adding previously indexed documents. 42 // 43 // tokenized_document: document object with different types of tokenized data. 44 // document_id: id of the document. 45 // recovery_mode: decides how to handle document_id <= 46 // last_added_document_id. If in recovery_mode, then 47 // Handle() will simply return OK immediately. Otherwise, 48 // returns INVALID_ARGUMENT_ERROR. 49 // put_document_stats: object for collecting stats during indexing. It can be 50 // nullptr. 51 // 52 /// Returns: 53 // - OK on success. 54 // - INVALID_ARGUMENT_ERROR if document_id is invalid OR document_id is less 55 // than or equal to the document_id of a previously indexed document in 56 // non recovery mode. 57 // - Any other errors. It depends on each implementation. 58 virtual libtextclassifier3::Status Handle( 59 const TokenizedDocument& tokenized_document, DocumentId document_id, 60 bool recovery_mode, PutDocumentStatsProto* put_document_stats) = 0; 61 62 protected: 63 const Clock& clock_; // Does not own. 64 }; 65 66 } // namespace lib 67 } // namespace icing 68 69 #endif // ICING_INDEX_DATA_INDEXING_HANDLER_H_ 70