1 /* 2 * Copyright (C) 2018 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 // Common feature types for parser components. 18 19 #ifndef NLP_SAFT_COMPONENTS_COMMON_MOBILE_FEL_FEATURE_TYPES_H_ 20 #define NLP_SAFT_COMPONENTS_COMMON_MOBILE_FEL_FEATURE_TYPES_H_ 21 22 #include <algorithm> 23 #include <map> 24 #include <string> 25 #include <utility> 26 27 #include "lang_id/common/lite_base/integral-types.h" 28 #include "lang_id/common/lite_base/logging.h" 29 #include "lang_id/common/lite_strings/str-cat.h" 30 31 namespace libtextclassifier3 { 32 namespace mobile { 33 34 // TODO(djweiss) Clean this up as well. 35 // Use the same type for feature values as is used for predicated. 36 typedef int64 Predicate; 37 typedef Predicate FeatureValue; 38 39 // Each feature value in a feature vector has a feature type. The feature type 40 // is used for converting feature type and value pairs to predicate values. The 41 // feature type can also return names for feature values and calculate the size 42 // of the feature value domain. The FeatureType class is abstract and must be 43 // specialized for the concrete feature types. 44 class FeatureType { 45 public: 46 // Initializes a feature type. FeatureType(const string & name)47 explicit FeatureType(const string &name) 48 : name_(name), base_(0), 49 is_continuous_(name.find("continuous") != string::npos) { 50 } 51 ~FeatureType()52 virtual ~FeatureType() {} 53 54 // Converts a feature value to a name. 55 virtual string GetFeatureValueName(FeatureValue value) const = 0; 56 57 // Returns the size of the feature values domain. 58 virtual int64 GetDomainSize() const = 0; 59 60 // Returns the feature type name. name()61 const string &name() const { return name_; } 62 base()63 Predicate base() const { return base_; } set_base(Predicate base)64 void set_base(Predicate base) { base_ = base; } 65 66 // Returns true iff this feature is continuous; see FloatFeatureValue. is_continuous()67 bool is_continuous() const { return is_continuous_; } 68 69 private: 70 // Feature type name. 71 string name_; 72 73 // "Base" feature value: i.e. a "slot" in a global ordering of features. 74 Predicate base_; 75 76 // See doc for is_continuous(). 77 bool is_continuous_; 78 }; 79 80 // Feature type that is defined using an explicit map from FeatureValue to 81 // string values. This can reduce some of the boilerplate when defining 82 // features that generate enum values. Example usage: 83 // 84 // class BeverageSizeFeature : public FeatureFunction<Beverage> 85 // enum FeatureValue { SMALL, MEDIUM, LARGE }; // values for this feature 86 // void Init(TaskContext *context) override { 87 // set_feature_type(new EnumFeatureType("beverage_size", 88 // {{SMALL, "SMALL"}, {MEDIUM, "MEDIUM"}, {LARGE, "LARGE"}}); 89 // } 90 // [...] 91 // }; 92 class EnumFeatureType : public FeatureType { 93 public: EnumFeatureType(const string & name,const std::map<FeatureValue,string> & value_names)94 EnumFeatureType(const string &name, 95 const std::map<FeatureValue, string> &value_names) 96 : FeatureType(name), value_names_(value_names) { 97 for (const auto &pair : value_names) { 98 SAFTM_CHECK_GE(pair.first, 0) 99 << "Invalid feature value: " << pair.first << ", " << pair.second; 100 domain_size_ = std::max(domain_size_, pair.first + 1); 101 } 102 } 103 104 // Returns the feature name for a given feature value. GetFeatureValueName(FeatureValue value)105 string GetFeatureValueName(FeatureValue value) const override { 106 auto it = value_names_.find(value); 107 if (it == value_names_.end()) { 108 SAFTM_LOG(ERROR) << "Invalid feature value " << value << " for " 109 << name(); 110 return "<INVALID>"; 111 } 112 return it->second; 113 } 114 115 // Returns the number of possible values for this feature type. This is one 116 // greater than the largest value in the value_names map. GetDomainSize()117 FeatureValue GetDomainSize() const override { return domain_size_; } 118 119 protected: 120 // Maximum possible value this feature could take. 121 FeatureValue domain_size_ = 0; 122 123 // Names of feature values. 124 std::map<FeatureValue, string> value_names_; 125 }; 126 127 // Feature type for binary features. 128 class BinaryFeatureType : public FeatureType { 129 public: BinaryFeatureType(const string & name,const string & off,const string & on)130 BinaryFeatureType(const string &name, const string &off, const string &on) 131 : FeatureType(name), off_(off), on_(on) {} 132 133 // Returns the feature name for a given feature value. GetFeatureValueName(FeatureValue value)134 string GetFeatureValueName(FeatureValue value) const override { 135 if (value == 0) return off_; 136 if (value == 1) return on_; 137 return ""; 138 } 139 140 // Binary features always have two feature values. GetDomainSize()141 FeatureValue GetDomainSize() const override { return 2; } 142 143 private: 144 // Feature value names for on and off. 145 string off_; 146 string on_; 147 }; 148 149 // Feature type for numeric features. 150 class NumericFeatureType : public FeatureType { 151 public: 152 // Initializes numeric feature. NumericFeatureType(const string & name,FeatureValue size)153 NumericFeatureType(const string &name, FeatureValue size) 154 : FeatureType(name), size_(size) {} 155 156 // Returns numeric feature value. GetFeatureValueName(FeatureValue value)157 string GetFeatureValueName(FeatureValue value) const override { 158 if (value < 0) return ""; 159 return LiteStrCat(value); 160 } 161 162 // Returns the number of feature values. GetDomainSize()163 FeatureValue GetDomainSize() const override { return size_; } 164 165 private: 166 // The underlying size of the numeric feature. 167 FeatureValue size_; 168 }; 169 170 // Feature type for byte features, including an "outside" value. 171 class ByteFeatureType : public NumericFeatureType { 172 public: ByteFeatureType(const string & name)173 explicit ByteFeatureType(const string &name) 174 : NumericFeatureType(name, 257) {} 175 GetFeatureValueName(FeatureValue value)176 string GetFeatureValueName(FeatureValue value) const override { 177 if (value == 256) { 178 return "<NULL>"; 179 } 180 string result; 181 result += static_cast<char>(value); 182 return result; 183 } 184 }; 185 186 } // namespace mobile 187 } // namespace nlp_saft 188 189 #endif // NLP_SAFT_COMPONENTS_COMMON_MOBILE_FEL_FEATURE_TYPES_H_ 190