1 /*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "sparse_weight_vector.h"
18
19 #include <algorithm>
20 #include <list>
21 #include <vector>
22 #include <math.h>
23
24 using std::vector;
25 using std::list;
26 using std::max;
27
28 namespace learning_stochastic_linear {
29
30 // Max/Min permitted values of normalizer_ for preventing under/overflows.
31 static double kNormalizerMin = 1e-20;
32 static double kNormalizerMax = 1e20;
33
34 template<class Key, class Hash>
IsValid() const35 bool SparseWeightVector<Key, Hash>::IsValid() const {
36 if (isnan(normalizer_) || __isinff(normalizer_))
37 return false;
38 for (Witer_const iter = w_.begin();
39 iter != w_.end();
40 ++iter) {
41 if (isnanf(iter->second) || __isinff(iter->second))
42 return false;
43 }
44 return true;
45 }
46
47 template<class Key, class Hash>
AdditiveWeightUpdate(const double multiplier,const SparseWeightVector<Key,Hash> & w1,const double additive_const)48 void SparseWeightVector<Key, Hash>::AdditiveWeightUpdate(
49 const double multiplier,
50 const SparseWeightVector<Key, Hash> &w1,
51 const double additive_const) {
52 for (Witer_const iter = w1.w_.begin();
53 iter != w1.w_.end();
54 ++iter) {
55 w_[iter->first] += ((multiplier * iter->second) / w1.normalizer_
56 + additive_const) * normalizer_;
57 }
58 return;
59 }
60
61 template<class Key, class Hash>
AdditiveSquaredWeightUpdate(const double multiplier,const SparseWeightVector<Key,Hash> & w1,const double additive_const)62 void SparseWeightVector<Key, Hash>::AdditiveSquaredWeightUpdate(
63 const double multiplier,
64 const SparseWeightVector<Key, Hash> &w1,
65 const double additive_const) {
66 for (Witer_const iter = w1.w_.begin();
67 iter != w1.w_.end();
68 ++iter) {
69 w_[iter->first] += ((multiplier * iter->second * iter->second) /
70 (w1.normalizer_ * w1.normalizer_)
71 + additive_const) * normalizer_;
72 }
73 return;
74 }
75
76 template<class Key, class Hash>
AdditiveInvSqrtWeightUpdate(const double multiplier,const SparseWeightVector<Key,Hash> & w1,const double additive_const)77 void SparseWeightVector<Key, Hash>::AdditiveInvSqrtWeightUpdate(
78 const double multiplier,
79 const SparseWeightVector<Key, Hash> &w1,
80 const double additive_const) {
81 for (Witer_const iter = w1.w_.begin();
82 iter != w1.w_.end();
83 ++iter) {
84 if(iter->second > 0.0) {
85 w_[iter->first] += ((multiplier * sqrt(w1.normalizer_)) /
86 (sqrt(iter->second))
87 + additive_const) * normalizer_;
88 }
89 }
90 return;
91 }
92
93 template<class Key, class Hash>
AdditiveWeightUpdateBounded(const double multiplier,const SparseWeightVector<Key,Hash> & w1,const double additive_const)94 void SparseWeightVector<Key, Hash>::AdditiveWeightUpdateBounded(
95 const double multiplier,
96 const SparseWeightVector<Key, Hash> &w1,
97 const double additive_const) {
98 double min_bound = 0;
99 double max_bound = 0;
100 for (Witer_const iter = w1.w_.begin();
101 iter != w1.w_.end();
102 ++iter) {
103 w_[iter->first] += ((multiplier * iter->second) / w1.normalizer_
104 + additive_const) * normalizer_;
105 bool is_min_bounded = GetValue(wmin_, iter->first, &min_bound);
106 if (is_min_bounded) {
107 if ((w_[iter->first] / normalizer_) < min_bound) {
108 w_[iter->first] = min_bound*normalizer_;
109 continue;
110 }
111 }
112 bool is_max_bounded = GetValue(wmax_, iter->first, &max_bound);
113 if (is_max_bounded) {
114 if ((w_[iter->first] / normalizer_) > max_bound)
115 w_[iter->first] = max_bound*normalizer_;
116 }
117 }
118 return;
119 }
120
121 template<class Key, class Hash>
MultWeightUpdate(const SparseWeightVector<Key,Hash> & w1)122 void SparseWeightVector<Key, Hash>::MultWeightUpdate(
123 const SparseWeightVector<Key, Hash> &w1) {
124 for (Witer iter = w_.begin();
125 iter != w_.end();
126 ++iter) {
127 iter->second *= w1.GetElement(iter->first);
128 }
129 normalizer_ *= w1.normalizer_;
130 return;
131 }
132
133 template<class Key, class Hash>
MultWeightUpdateBounded(const SparseWeightVector<Key,Hash> & w1)134 void SparseWeightVector<Key, Hash>::MultWeightUpdateBounded(
135 const SparseWeightVector<Key, Hash> &w1) {
136 double min_bound = 0;
137 double max_bound = 0;
138
139 normalizer_ *= w1.normalizer_;
140 for (Witer iter = w_.begin();
141 iter != w_.end();
142 ++iter) {
143 iter->second *= w1.GetElement(iter->first);
144 bool is_min_bounded = GetValue(wmin_, iter->first, &min_bound);
145 if (is_min_bounded) {
146 if ((iter->second / normalizer_) < min_bound) {
147 iter->second = min_bound*normalizer_;
148 continue;
149 }
150 }
151 bool is_max_bounded = GetValue(wmax_, iter->first, &max_bound);
152 if (is_max_bounded) {
153 if ((iter->second / normalizer_) > max_bound)
154 iter->second = max_bound*normalizer_;
155 }
156 }
157 return;
158 }
159
160 template<class Key, class Hash>
ResetNormalizer()161 void SparseWeightVector<Key, Hash>::ResetNormalizer() {
162 for (Witer iter = w_.begin();
163 iter != w_.end();
164 ++iter) {
165 iter->second /= normalizer_;
166 }
167 normalizer_ = 1.0;
168 }
169
170 template<class Key, class Hash>
ReprojectToBounds()171 void SparseWeightVector<Key, Hash>::ReprojectToBounds() {
172 double min_bound = 0;
173 double max_bound = 0;
174
175 for (Witer iter = w_.begin();
176 iter != w_.end();
177 ++iter) {
178 bool is_min_bounded = GetValue(wmin_, iter->first, &min_bound);
179 if (is_min_bounded) {
180 if ((iter->second/normalizer_) < min_bound) {
181 iter->second = min_bound*normalizer_;
182 continue;
183 }
184 }
185 bool is_max_bounded = GetValue(wmax_, iter->first, &max_bound);
186 if (is_max_bounded) {
187 if ((iter->second/normalizer_) > max_bound)
188 iter->second = max_bound*normalizer_;
189 }
190 }
191 }
192
193 template<class Key, class Hash>
DotProduct(const SparseWeightVector<Key,Hash> & w1) const194 double SparseWeightVector<Key, Hash>::DotProduct(
195 const SparseWeightVector<Key, Hash> &w1) const {
196 double result = 0;
197 if (w_.size() > w1.w_.size()) {
198 for (Witer_const iter = w1.w_.begin();
199 iter != w1.w_.end();
200 ++iter) {
201 result += iter->second * GetElement(iter->first);
202 }
203 result /= (this->normalizer_ * w1.normalizer_);
204 } else {
205 for (Witer_const iter = w_.begin();
206 iter != w_.end();
207 ++iter) {
208 result += iter->second * w1.GetElement(iter->first);
209 }
210 result /= (this->normalizer_ * w1.normalizer_);
211 }
212 return result;
213 }
214
215 template<class Key, class Hash>
LxNorm(const double x) const216 double SparseWeightVector<Key, Hash>::LxNorm(const double x) const {
217 double result = 0;
218 CHECK_GT(x, 0);
219 for (Witer_const iter = w_.begin();
220 iter != w_.end();
221 ++iter) {
222 result += pow(iter->second, x);
223 }
224 return (pow(result, 1.0 / x) / normalizer_);
225 }
226
227 template<class Key, class Hash>
L2Norm() const228 double SparseWeightVector<Key, Hash>::L2Norm() const {
229 double result = 0;
230 for (Witer_const iter = w_.begin();
231 iter != w_.end();
232 ++iter) {
233 result += iter->second * iter->second;
234 }
235 return sqrt(result)/normalizer_;
236 }
237
238 template<class Key, class Hash>
L1Norm() const239 double SparseWeightVector<Key, Hash>::L1Norm() const {
240 double result = 0;
241 for (Witer_const iter = w_.begin();
242 iter != w_.end();
243 ++iter) {
244 result += fabs(iter->second);
245 }
246 return result / normalizer_;
247 }
248
249 template<class Key, class Hash>
L0Norm(const double epsilon) const250 double SparseWeightVector<Key, Hash>::L0Norm(
251 const double epsilon) const {
252 double result = 0;
253 for (Witer_const iter = w_.begin();
254 iter != w_.end();
255 ++iter) {
256 if (fabs(iter->second / normalizer_) > epsilon)
257 ++result;
258 }
259 return result;
260 }
261
262 // Algorithm for L0 projection which takes O(n log(n)), where n is
263 // the number of non-zero elements in the vector.
264 template<class Key, class Hash>
ReprojectL0(const double l0_norm)265 void SparseWeightVector<Key, Hash>::ReprojectL0(const double l0_norm) {
266 // First calculates the order-statistics of the sparse vector
267 // and then reprojects to the L0 orthant with the requested norm.
268 CHECK_GT(l0_norm, 0);
269 uint64 req_l0_norm = static_cast<uint64>(l0_norm);
270 // Compute order statistics and the current L0 norm.
271 vector<double> abs_val_vec;
272 uint64 curr_l0_norm = 0;
273 const double epsilone = 1E-05;
274 for (Witer iter = w_.begin();
275 iter != w_.end();
276 ++iter) {
277 if (fabs(iter->second/normalizer_) > epsilone) {
278 abs_val_vec.push_back(fabs(iter->second/normalizer_));
279 ++curr_l0_norm;
280 }
281 }
282 // check if a projection is necessary
283 if (curr_l0_norm < req_l0_norm) {
284 return;
285 }
286 std::nth_element(&abs_val_vec[0],
287 &abs_val_vec[curr_l0_norm - req_l0_norm],
288 &abs_val_vec[curr_l0_norm]);
289 const double theta = abs_val_vec[curr_l0_norm - req_l0_norm];
290 // compute the final projection.
291 for (Witer iter = w_.begin();
292 iter != w_.end();
293 ++iter) {
294 if ((fabs(iter->second/normalizer_) - theta) < 0) {
295 iter->second = 0;
296 }
297 }
298 }
299
300 // Slow algorithm for accurate L1 projection which takes O(n log(n)), where n is
301 // the number of non-zero elements in the vector.
302 template<class Key, class Hash>
ReprojectL1(const double l1_norm)303 void SparseWeightVector<Key, Hash>::ReprojectL1(const double l1_norm) {
304 // First calculates the order-statistics of the sparse vector
305 // applies a probability simplex projection to the abs(vector)
306 // and reprojects back to the original with the appropriate sign.
307 // For ref. see "Efficient Projections into the l1-ball for Learning
308 // in High Dimensions"
309 CHECK_GT(l1_norm, 0);
310 // Compute order statistics and the current L1 norm.
311 list<double> abs_val_list;
312 double curr_l1_norm = 0;
313 for (Witer iter = w_.begin();
314 iter != w_.end();
315 ++iter) {
316 abs_val_list.push_back(fabs(iter->second/normalizer_));
317 curr_l1_norm += fabs(iter->second/normalizer_);
318 }
319 // check if a projection is necessary
320 if (curr_l1_norm < l1_norm) {
321 return;
322 }
323 abs_val_list.sort();
324 abs_val_list.reverse();
325 // Compute projection on the probability simplex.
326 double curr_index = 1;
327 double theta = 0;
328 double cum_sum = 0;
329 for (list<double>::iterator val_iter = abs_val_list.begin();
330 val_iter != abs_val_list.end();
331 ++val_iter) {
332 cum_sum += *val_iter;
333 theta = (cum_sum - l1_norm)/curr_index;
334 if (((*val_iter) - theta) <= 0) {
335 break;
336 }
337 ++curr_index;
338 }
339 // compute the final projection.
340 for (Witer iter = w_.begin();
341 iter != w_.end();
342 ++iter) {
343 int sign_mul = iter->second > 0;
344 iter->second = max(sign_mul * normalizer_ *
345 (fabs(iter->second/normalizer_) - theta),
346 0.0);
347 }
348 }
349
350 template<class Key, class Hash>
ReprojectL2(const double l2_norm)351 void SparseWeightVector<Key, Hash>::ReprojectL2(const double l2_norm) {
352 CHECK_GT(l2_norm, 0);
353 double curr_l2_norm = L2Norm();
354 // Check if a projection is necessary.
355 if (curr_l2_norm > l2_norm) {
356 normalizer_ *= curr_l2_norm / l2_norm;
357 }
358 }
359
360 template<class Key, class Hash>
Reproject(const double norm,const RegularizationType r)361 int32 SparseWeightVector<Key, Hash>::Reproject(const double norm,
362 const RegularizationType r) {
363 CHECK_GT(norm, 0);
364 if (r == L0) {
365 ReprojectL0(norm);
366 } else if (r == L1) {
367 ReprojectL1(norm);
368 } else if (r == L2) {
369 ReprojectL2(norm);
370 } else {
371 // This else is just to ensure that if other RegularizationTypes are
372 // supported in the enum later which require manipulations not related
373 // to SparseWeightVector then we catch the accidental argument here.
374 ALOGE("Unsupported regularization type requested");
375 return -1;
376 }
377 // If the normalizer gets dangerously large or small, normalize the
378 // entire vector. This stops projections from sending the vector
379 // weights and the normalizer simultaneously all very small or
380 // large, causing under/over flows. But if you hit this too often
381 // it's a sign you've chosen a bad lambda.
382 if (normalizer_ < kNormalizerMin) {
383 ALOGE("Resetting normalizer to 1.0 to prevent underflow. "
384 "Is lambda too large?");
385 ResetNormalizer();
386 }
387 if (normalizer_ > kNormalizerMax) {
388 ALOGE("Resetting normalizer to 1.0 to prevent overflow. "
389 "Is lambda too small?");
390 ResetNormalizer();
391 }
392 return 0;
393 }
394
395 template class SparseWeightVector<std::string, std::unordered_map<std::string, double> >;
396 template class SparseWeightVector<int, std::unordered_map<int, double> >;
397 template class SparseWeightVector<uint64, std::unordered_map<uint64, double> >;
398 } // namespace learning_stochastic_linear
399