• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2014 Google Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include <libaddressinput/address_input_helper.h>
16 
17 #include <libaddressinput/address_data.h>
18 #include <libaddressinput/address_field.h>
19 #include <libaddressinput/preload_supplier.h>
20 #include <libaddressinput/util/basictypes.h>
21 
22 #include <cassert>
23 #include <cstddef>
24 #include <string>
25 #include <vector>
26 
27 #include <re2/re2.h>
28 
29 #include "language.h"
30 #include "lookup_key.h"
31 #include "region_data_constants.h"
32 #include "rule.h"
33 #include "util/re2ptr.h"
34 
35 namespace i18n {
36 namespace addressinput {
37 
38 // Used for building a hierarchy of rules, each one connected to its parent.
39 struct Node {
40   const Node* parent;
41   const Rule* rule;
42 };
43 
44 namespace {
45 
46 const char kLookupKeySeparator = '/';
47 
48 const size_t kHierarchyDepth = arraysize(LookupKey::kHierarchy);
49 
50 // Gets the best name for the entity represented by the current rule, using the
51 // language provided. The language is currently used to distinguish whether a
52 // Latin-script name should be fetched; if it is not explicitly Latin-script, we
53 // prefer IDs over names (so return CA instead of California for an English
54 // user.) If there is no Latin-script name, we fall back to the ID.
GetBestName(const Language & language,const Rule & rule)55 std::string GetBestName(const Language& language, const Rule& rule) {
56   if (language.has_latin_script) {
57     const std::string& name = rule.GetLatinName();
58     if (!name.empty()) {
59       return name;
60     }
61   }
62   // The ID is stored as data/US/CA for "CA", for example, and we only want the
63   // last part.
64   const std::string& id = rule.GetId();
65   std::string::size_type pos = id.rfind(kLookupKeySeparator);
66   assert(pos != std::string::npos);
67   return id.substr(pos + 1);
68 }
69 
FillAddressFromMatchedRules(const std::vector<Node> * hierarchy,AddressData * address)70 void FillAddressFromMatchedRules(
71     const std::vector<Node>* hierarchy,
72     AddressData* address) {
73   assert(hierarchy != NULL);
74   assert(address != NULL);
75   // We skip region code, because we never try and fill that in if it isn't
76   // already set.
77   Language language(address->language_code);
78   for (size_t depth = kHierarchyDepth - 1; depth > 0; --depth) {
79     // If there is only one match at this depth, then we should populate the
80     // address, using this rule and its parents.
81     if (hierarchy[depth].size() == 1) {
82       for (const Node* node = &hierarchy[depth].front();
83            node != NULL; node = node->parent, --depth) {
84         const Rule* rule = node->rule;
85         assert(rule != NULL);
86 
87         AddressField field = LookupKey::kHierarchy[depth];
88         // Note only empty fields are permitted to be overwritten.
89         if (address->IsFieldEmpty(field)) {
90           address->SetFieldValue(field, GetBestName(language, *rule));
91         }
92       }
93       break;
94     }
95   }
96 }
97 
98 }  // namespace;
99 
AddressInputHelper(PreloadSupplier * supplier)100 AddressInputHelper::AddressInputHelper(PreloadSupplier* supplier)
101     : supplier_(supplier) {
102   assert(supplier_ != NULL);
103 }
104 
~AddressInputHelper()105 AddressInputHelper::~AddressInputHelper() {
106 }
107 
FillAddress(AddressData * address) const108 void AddressInputHelper::FillAddress(AddressData* address) const {
109   assert(address != NULL);
110   const std::string& region_code = address->region_code;
111   if (!RegionDataConstants::IsSupported(region_code)) {
112     // If we don't have a region code, we can't do anything reliably to fill
113     // this address.
114     return;
115   }
116 
117   AddressData lookup_key_address;
118   lookup_key_address.region_code = region_code;
119   // First try and fill in the postal code if it is missing.
120   LookupKey lookup_key;
121   lookup_key.FromAddress(lookup_key_address);
122   const Rule* region_rule = supplier_->GetRule(lookup_key);
123   // We have already checked that the region is supported; and users of this
124   // method must have called LoadRules() first, so we check this here.
125   assert(region_rule != NULL);
126 
127   const RE2ptr* postal_code_reg_exp = region_rule->GetPostalCodeMatcher();
128   if (postal_code_reg_exp != NULL) {
129     if (address->postal_code.empty()) {
130       address->postal_code = region_rule->GetSolePostalCode();
131     }
132 
133     // If we have a valid postal code, try and work out the most specific
134     // hierarchy that matches the postal code. Note that the postal code might
135     // have been added in the previous check.
136     if (!address->postal_code.empty() &&
137         RE2::FullMatch(address->postal_code, *postal_code_reg_exp->ptr)) {
138 
139       // This hierarchy is used to store rules that represent possible matches
140       // at each level of the hierarchy.
141       std::vector<Node> hierarchy[kHierarchyDepth];
142       CheckChildrenForPostCodeMatches(*address, lookup_key, NULL, hierarchy);
143 
144       FillAddressFromMatchedRules(hierarchy, address);
145     }
146   }
147 
148   // TODO: When we have the data, we should fill in the state for countries with
149   // state required and only one possible value, e.g. American Samoa.
150 }
151 
CheckChildrenForPostCodeMatches(const AddressData & address,const LookupKey & lookup_key,const Node * parent,std::vector<Node> * hierarchy) const152 void AddressInputHelper::CheckChildrenForPostCodeMatches(
153     const AddressData& address,
154     const LookupKey& lookup_key,
155     const Node* parent,
156     // An array of vectors.
157     std::vector<Node>* hierarchy) const {
158   const Rule* rule = supplier_->GetRule(lookup_key);
159   assert(rule != NULL);
160 
161   const RE2ptr* postal_code_prefix = rule->GetPostalCodeMatcher();
162   if (postal_code_prefix == NULL ||
163       RE2::PartialMatch(address.postal_code, *postal_code_prefix->ptr)) {
164     // This was a match, so store it and its parent in the hierarchy.
165     hierarchy[lookup_key.GetDepth()].push_back(Node());
166     Node* node = &hierarchy[lookup_key.GetDepth()].back();
167     node->parent = parent;
168     node->rule = rule;
169 
170     // If there are children, check them too.
171     for (std::vector<std::string>::const_iterator child_it =
172              rule->GetSubKeys().begin();
173          child_it != rule->GetSubKeys().end(); ++child_it) {
174       LookupKey child_key;
175       child_key.FromLookupKey(lookup_key, *child_it);
176       CheckChildrenForPostCodeMatches(address, child_key, node, hierarchy);
177     }
178   }
179 }
180 
181 }  // namespace addressinput
182 }  // namespace i18n
183