• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 // Common feature types for parser components.
18 
19 #ifndef NLP_SAFT_COMPONENTS_COMMON_MOBILE_FEL_FEATURE_TYPES_H_
20 #define NLP_SAFT_COMPONENTS_COMMON_MOBILE_FEL_FEATURE_TYPES_H_
21 
22 #include <algorithm>
23 #include <map>
24 #include <string>
25 #include <utility>
26 
27 #include "lang_id/common/lite_base/integral-types.h"
28 #include "lang_id/common/lite_base/logging.h"
29 #include "lang_id/common/lite_strings/str-cat.h"
30 
31 namespace libtextclassifier3 {
32 namespace mobile {
33 
34 // TODO(djweiss) Clean this up as well.
35 // Use the same type for feature values as is used for predicated.
36 typedef int64 Predicate;
37 typedef Predicate FeatureValue;
38 
39 // Each feature value in a feature vector has a feature type. The feature type
40 // is used for converting feature type and value pairs to predicate values. The
41 // feature type can also return names for feature values and calculate the size
42 // of the feature value domain. The FeatureType class is abstract and must be
43 // specialized for the concrete feature types.
44 class FeatureType {
45  public:
46   // Initializes a feature type.
FeatureType(const std::string & name)47   explicit FeatureType(const std::string &name)
48       : name_(name),
49         base_(0),
50         is_continuous_(name.find("continuous") != std::string::npos) {}
51 
~FeatureType()52   virtual ~FeatureType() {}
53 
54   // Converts a feature value to a name.
55   virtual std::string GetFeatureValueName(FeatureValue value) const = 0;
56 
57   // Returns the size of the feature values domain.
58   virtual int64 GetDomainSize() const = 0;
59 
60   // Returns the feature type name.
name()61   const std::string &name() const { return name_; }
62 
base()63   Predicate base() const { return base_; }
set_base(Predicate base)64   void set_base(Predicate base) { base_ = base; }
65 
66   // Returns true iff this feature is continuous; see FloatFeatureValue.
is_continuous()67   bool is_continuous() const { return is_continuous_; }
68 
69  private:
70   // Feature type name.
71   std::string name_;
72 
73   // "Base" feature value: i.e. a "slot" in a global ordering of features.
74   Predicate base_;
75 
76   // See doc for is_continuous().
77   bool is_continuous_;
78 };
79 
80 // Feature type that is defined using an explicit map from FeatureValue to
81 // string values.  This can reduce some of the boilerplate when defining
82 // features that generate enum values.  Example usage:
83 //
84 //   class BeverageSizeFeature : public FeatureFunction<Beverage>
85 //     enum FeatureValue { SMALL, MEDIUM, LARGE };  // values for this feature
86 //     void Init(TaskContext *context) override {
87 //       set_feature_type(new EnumFeatureType("beverage_size",
88 //           {{SMALL, "SMALL"}, {MEDIUM, "MEDIUM"}, {LARGE, "LARGE"}});
89 //     }
90 //     [...]
91 //   };
92 class EnumFeatureType : public FeatureType {
93  public:
EnumFeatureType(const std::string & name,const std::map<FeatureValue,std::string> & value_names)94   EnumFeatureType(const std::string &name,
95                   const std::map<FeatureValue, std::string> &value_names)
96       : FeatureType(name), value_names_(value_names) {
97     for (const auto &pair : value_names) {
98       SAFTM_CHECK_GE(pair.first, 0)
99           << "Invalid feature value: " << pair.first << ", " << pair.second;
100       domain_size_ = std::max(domain_size_, pair.first + 1);
101     }
102   }
103 
104   // Returns the feature name for a given feature value.
GetFeatureValueName(FeatureValue value)105   std::string GetFeatureValueName(FeatureValue value) const override {
106     auto it = value_names_.find(value);
107     if (it == value_names_.end()) {
108       SAFTM_LOG(ERROR) << "Invalid feature value " << value << " for "
109                        << name();
110       return "<INVALID>";
111     }
112     return it->second;
113   }
114 
115   // Returns the number of possible values for this feature type. This is one
116   // greater than the largest value in the value_names map.
GetDomainSize()117   FeatureValue GetDomainSize() const override { return domain_size_; }
118 
119  protected:
120   // Maximum possible value this feature could take.
121   FeatureValue domain_size_ = 0;
122 
123   // Names of feature values.
124   std::map<FeatureValue, std::string> value_names_;
125 };
126 
127 // Feature type for binary features.
128 class BinaryFeatureType : public FeatureType {
129  public:
BinaryFeatureType(const std::string & name,const std::string & off,const std::string & on)130   BinaryFeatureType(const std::string &name, const std::string &off,
131                     const std::string &on)
132       : FeatureType(name), off_(off), on_(on) {}
133 
134   // Returns the feature name for a given feature value.
GetFeatureValueName(FeatureValue value)135   std::string GetFeatureValueName(FeatureValue value) const override {
136     if (value == 0) return off_;
137     if (value == 1) return on_;
138     return "";
139   }
140 
141   // Binary features always have two feature values.
GetDomainSize()142   FeatureValue GetDomainSize() const override { return 2; }
143 
144  private:
145   // Feature value names for on and off.
146   std::string off_;
147   std::string on_;
148 };
149 
150 // Feature type for numeric features.
151 class NumericFeatureType : public FeatureType {
152  public:
153   // Initializes numeric feature.
NumericFeatureType(const std::string & name,FeatureValue size)154   NumericFeatureType(const std::string &name, FeatureValue size)
155       : FeatureType(name), size_(size) {}
156 
157   // Returns numeric feature value.
GetFeatureValueName(FeatureValue value)158   std::string GetFeatureValueName(FeatureValue value) const override {
159     if (value < 0) return "";
160     return LiteStrCat(value);
161   }
162 
163   // Returns the number of feature values.
GetDomainSize()164   FeatureValue GetDomainSize() const override { return size_; }
165 
166  private:
167   // The underlying size of the numeric feature.
168   FeatureValue size_;
169 };
170 
171 // Feature type for byte features, including an "outside" value.
172 class ByteFeatureType : public NumericFeatureType {
173  public:
ByteFeatureType(const std::string & name)174   explicit ByteFeatureType(const std::string &name)
175       : NumericFeatureType(name, 257) {}
176 
GetFeatureValueName(FeatureValue value)177   std::string GetFeatureValueName(FeatureValue value) const override {
178     if (value == 256) {
179       return "<NULL>";
180     }
181     std::string result;
182     result += static_cast<char>(value);
183     return result;
184   }
185 };
186 
187 }  // namespace mobile
188 }  // namespace nlp_saft
189 
190 #endif  // NLP_SAFT_COMPONENTS_COMMON_MOBILE_FEL_FEATURE_TYPES_H_
191