• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_TRANSFORM_MAP_MAP_NORMALIZER_H_
16 #define ICING_TRANSFORM_MAP_MAP_NORMALIZER_H_
17 
18 #include <string>
19 #include <string_view>
20 
21 #include "icing/transform/normalizer.h"
22 #include "icing/util/character-iterator.h"
23 
24 namespace icing {
25 namespace lib {
26 
27 class MapNormalizer : public Normalizer {
28  public:
MapNormalizer(int max_term_byte_size)29   explicit MapNormalizer(int max_term_byte_size)
30       : max_term_byte_size_(max_term_byte_size){};
31 
32   // Normalizes the input term based on character mappings. The mappings
33   // contain the following categories:
34   //   - Uppercase -> lowercase
35   //   - Hiragana -> Katakana
36   //   - Common full-width characters -> ASCII
37   //   - Common ideographic punctuation marks -> ASCII
38   //   - Common diacritic Latin characters -> ASCII
39   //
40   // Read more mapping details in normalization-map.cc
41   std::string NormalizeTerm(std::string_view term) const override;
42 
43   // Returns a CharacterIterator pointing to one past the end of the segment of
44   // term that (once normalized) matches with normalized_term.
45   //
46   // Ex. FindNormalizedMatchEndPosition("YELLOW", "yell") will return
47   // CharacterIterator(u8:4, u16:4, u32:4).
48   //
49   // Ex. FindNormalizedMatchEndPosition("YELLOW", "red") will return
50   // CharacterIterator(u8:0, u16:0, u32:0).
51   CharacterIterator FindNormalizedMatchEndPosition(
52       std::string_view term, std::string_view normalized_term) const override;
53 
54  private:
55   // The maximum term length allowed after normalization.
56   int max_term_byte_size_;
57 };
58 
59 }  // namespace lib
60 }  // namespace icing
61 
62 #endif  // ICING_TRANSFORM_MAP_MAP_NORMALIZER_H_
63