1 /* 2 * Copyright (C) 2018 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 // Common feature types for parser components. 18 19 #ifndef NLP_SAFT_COMPONENTS_COMMON_MOBILE_FEL_FEATURE_TYPES_H_ 20 #define NLP_SAFT_COMPONENTS_COMMON_MOBILE_FEL_FEATURE_TYPES_H_ 21 22 #include <algorithm> 23 #include <map> 24 #include <string> 25 #include <utility> 26 27 #include "lang_id/common/lite_base/integral-types.h" 28 #include "lang_id/common/lite_base/logging.h" 29 #include "lang_id/common/lite_strings/str-cat.h" 30 31 namespace libtextclassifier3 { 32 namespace mobile { 33 34 // TODO(djweiss) Clean this up as well. 35 // Use the same type for feature values as is used for predicated. 36 typedef int64 Predicate; 37 typedef Predicate FeatureValue; 38 39 // Each feature value in a feature vector has a feature type. The feature type 40 // is used for converting feature type and value pairs to predicate values. The 41 // feature type can also return names for feature values and calculate the size 42 // of the feature value domain. The FeatureType class is abstract and must be 43 // specialized for the concrete feature types. 44 class FeatureType { 45 public: 46 // Initializes a feature type. FeatureType(const std::string & name)47 explicit FeatureType(const std::string &name) 48 : name_(name), 49 base_(0), 50 is_continuous_(name.find("continuous") != std::string::npos) {} 51 ~FeatureType()52 virtual ~FeatureType() {} 53 54 // Converts a feature value to a name. 55 virtual std::string GetFeatureValueName(FeatureValue value) const = 0; 56 57 // Returns the size of the feature values domain. 58 virtual int64 GetDomainSize() const = 0; 59 60 // Returns the feature type name. name()61 const std::string &name() const { return name_; } 62 base()63 Predicate base() const { return base_; } set_base(Predicate base)64 void set_base(Predicate base) { base_ = base; } 65 66 // Returns true iff this feature is continuous; see FloatFeatureValue. is_continuous()67 bool is_continuous() const { return is_continuous_; } 68 69 private: 70 // Feature type name. 71 std::string name_; 72 73 // "Base" feature value: i.e. a "slot" in a global ordering of features. 74 Predicate base_; 75 76 // See doc for is_continuous(). 77 bool is_continuous_; 78 }; 79 80 // Feature type that is defined using an explicit map from FeatureValue to 81 // string values. This can reduce some of the boilerplate when defining 82 // features that generate enum values. Example usage: 83 // 84 // class BeverageSizeFeature : public FeatureFunction<Beverage> 85 // enum FeatureValue { SMALL, MEDIUM, LARGE }; // values for this feature 86 // void Init(TaskContext *context) override { 87 // set_feature_type(new EnumFeatureType("beverage_size", 88 // {{SMALL, "SMALL"}, {MEDIUM, "MEDIUM"}, {LARGE, "LARGE"}}); 89 // } 90 // [...] 91 // }; 92 class EnumFeatureType : public FeatureType { 93 public: EnumFeatureType(const std::string & name,const std::map<FeatureValue,std::string> & value_names)94 EnumFeatureType(const std::string &name, 95 const std::map<FeatureValue, std::string> &value_names) 96 : FeatureType(name), value_names_(value_names) { 97 for (const auto &pair : value_names) { 98 SAFTM_CHECK_GE(pair.first, 0) 99 << "Invalid feature value: " << pair.first << ", " << pair.second; 100 domain_size_ = std::max(domain_size_, pair.first + 1); 101 } 102 } 103 104 // Returns the feature name for a given feature value. GetFeatureValueName(FeatureValue value)105 std::string GetFeatureValueName(FeatureValue value) const override { 106 auto it = value_names_.find(value); 107 if (it == value_names_.end()) { 108 SAFTM_LOG(ERROR) << "Invalid feature value " << value << " for " 109 << name(); 110 return "<INVALID>"; 111 } 112 return it->second; 113 } 114 115 // Returns the number of possible values for this feature type. This is one 116 // greater than the largest value in the value_names map. GetDomainSize()117 FeatureValue GetDomainSize() const override { return domain_size_; } 118 119 protected: 120 // Maximum possible value this feature could take. 121 FeatureValue domain_size_ = 0; 122 123 // Names of feature values. 124 std::map<FeatureValue, std::string> value_names_; 125 }; 126 127 // Feature type for binary features. 128 class BinaryFeatureType : public FeatureType { 129 public: BinaryFeatureType(const std::string & name,const std::string & off,const std::string & on)130 BinaryFeatureType(const std::string &name, const std::string &off, 131 const std::string &on) 132 : FeatureType(name), off_(off), on_(on) {} 133 134 // Returns the feature name for a given feature value. GetFeatureValueName(FeatureValue value)135 std::string GetFeatureValueName(FeatureValue value) const override { 136 if (value == 0) return off_; 137 if (value == 1) return on_; 138 return ""; 139 } 140 141 // Binary features always have two feature values. GetDomainSize()142 FeatureValue GetDomainSize() const override { return 2; } 143 144 private: 145 // Feature value names for on and off. 146 std::string off_; 147 std::string on_; 148 }; 149 150 // Feature type for numeric features. 151 class NumericFeatureType : public FeatureType { 152 public: 153 // Initializes numeric feature. NumericFeatureType(const std::string & name,FeatureValue size)154 NumericFeatureType(const std::string &name, FeatureValue size) 155 : FeatureType(name), size_(size) {} 156 157 // Returns numeric feature value. GetFeatureValueName(FeatureValue value)158 std::string GetFeatureValueName(FeatureValue value) const override { 159 if (value < 0) return ""; 160 return LiteStrCat(value); 161 } 162 163 // Returns the number of feature values. GetDomainSize()164 FeatureValue GetDomainSize() const override { return size_; } 165 166 private: 167 // The underlying size of the numeric feature. 168 FeatureValue size_; 169 }; 170 171 // Feature type for byte features, including an "outside" value. 172 class ByteFeatureType : public NumericFeatureType { 173 public: ByteFeatureType(const std::string & name)174 explicit ByteFeatureType(const std::string &name) 175 : NumericFeatureType(name, 257) {} 176 GetFeatureValueName(FeatureValue value)177 std::string GetFeatureValueName(FeatureValue value) const override { 178 if (value == 256) { 179 return "<NULL>"; 180 } 181 std::string result; 182 result += static_cast<char>(value); 183 return result; 184 } 185 }; 186 187 } // namespace mobile 188 } // namespace nlp_saft 189 190 #endif // NLP_SAFT_COMPONENTS_COMMON_MOBILE_FEL_FEATURE_TYPES_H_ 191