1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef LIBTEXTCLASSIFIER_SMARTSELECT_TYPES_H_ 18 #define LIBTEXTCLASSIFIER_SMARTSELECT_TYPES_H_ 19 20 #include <ostream> 21 #include <string> 22 #include <utility> 23 24 namespace libtextclassifier { 25 26 constexpr int kInvalidIndex = -1; 27 28 // Index for a 0-based array of tokens. 29 using TokenIndex = int; 30 31 // Index for a 0-based array of codepoints. 32 using CodepointIndex = int; 33 34 // Marks a span in a sequence of codepoints. The first element is the index of 35 // the first codepoint of the span, and the second element is the index of the 36 // codepoint one past the end of the span. 37 using CodepointSpan = std::pair<CodepointIndex, CodepointIndex>; 38 39 // Marks a span in a sequence of tokens. The first element is the index of the 40 // first token in the span, and the second element is the index of the token one 41 // past the end of the span. 42 using TokenSpan = std::pair<TokenIndex, TokenIndex>; 43 44 // Token holds a token, its position in the original string and whether it was 45 // part of the input span. 46 struct Token { 47 std::string value; 48 CodepointIndex start; 49 CodepointIndex end; 50 51 // Whether the token is a padding token. 52 bool is_padding; 53 54 // Default constructor constructs the padding-token. TokenToken55 Token() 56 : value(""), start(kInvalidIndex), end(kInvalidIndex), is_padding(true) {} 57 TokenToken58 Token(const std::string& arg_value, CodepointIndex arg_start, 59 CodepointIndex arg_end) 60 : value(arg_value), start(arg_start), end(arg_end), is_padding(false) {} 61 62 bool operator==(const Token& other) const { 63 return value == other.value && start == other.start && end == other.end && 64 is_padding == other.is_padding; 65 } 66 IsContainedInSpanToken67 bool IsContainedInSpan(CodepointSpan span) const { 68 return start >= span.first && end <= span.second; 69 } 70 }; 71 72 // Pretty-printing function for Token. 73 inline std::ostream& operator<<(std::ostream& os, const Token& token) { 74 return os << "Token(\"" << token.value << "\", " << token.start << ", " 75 << token.end << ", is_padding=" << token.is_padding << ")"; 76 } 77 78 } // namespace libtextclassifier 79 80 #endif // LIBTEXTCLASSIFIER_SMARTSELECT_TYPES_H_ 81