1 // Copyright (C) 2014 Google Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include <libaddressinput/address_input_helper.h>
16
17 #include <libaddressinput/address_data.h>
18 #include <libaddressinput/address_field.h>
19 #include <libaddressinput/preload_supplier.h>
20 #include <libaddressinput/util/basictypes.h>
21
22 #include <cassert>
23 #include <cstddef>
24 #include <string>
25 #include <vector>
26
27 #include <re2/re2.h>
28
29 #include "language.h"
30 #include "lookup_key.h"
31 #include "region_data_constants.h"
32 #include "rule.h"
33 #include "util/re2ptr.h"
34
35 namespace i18n {
36 namespace addressinput {
37
38 // Used for building a hierarchy of rules, each one connected to its parent.
39 struct Node {
40 const Node* parent;
41 const Rule* rule;
42 };
43
44 namespace {
45
46 const char kLookupKeySeparator = '/';
47
48 const size_t kHierarchyDepth = arraysize(LookupKey::kHierarchy);
49
50 // Gets the best name for the entity represented by the current rule, using the
51 // language provided. The language is currently used to distinguish whether a
52 // Latin-script name should be fetched; if it is not explicitly Latin-script, we
53 // prefer IDs over names (so return CA instead of California for an English
54 // user.) If there is no Latin-script name, we fall back to the ID.
GetBestName(const Language & language,const Rule & rule)55 std::string GetBestName(const Language& language, const Rule& rule) {
56 if (language.has_latin_script) {
57 const std::string& name = rule.GetLatinName();
58 if (!name.empty()) {
59 return name;
60 }
61 }
62 // The ID is stored as data/US/CA for "CA", for example, and we only want the
63 // last part.
64 const std::string& id = rule.GetId();
65 std::string::size_type pos = id.rfind(kLookupKeySeparator);
66 assert(pos != std::string::npos);
67 return id.substr(pos + 1);
68 }
69
FillAddressFromMatchedRules(const std::vector<Node> * hierarchy,AddressData * address)70 void FillAddressFromMatchedRules(
71 const std::vector<Node>* hierarchy,
72 AddressData* address) {
73 assert(hierarchy != NULL);
74 assert(address != NULL);
75 // We skip region code, because we never try and fill that in if it isn't
76 // already set.
77 Language language(address->language_code);
78 for (size_t depth = kHierarchyDepth - 1; depth > 0; --depth) {
79 // If there is only one match at this depth, then we should populate the
80 // address, using this rule and its parents.
81 if (hierarchy[depth].size() == 1) {
82 for (const Node* node = &hierarchy[depth].front();
83 node != NULL; node = node->parent, --depth) {
84 const Rule* rule = node->rule;
85 assert(rule != NULL);
86
87 AddressField field = LookupKey::kHierarchy[depth];
88 // Note only empty fields are permitted to be overwritten.
89 if (address->IsFieldEmpty(field)) {
90 address->SetFieldValue(field, GetBestName(language, *rule));
91 }
92 }
93 break;
94 }
95 }
96 }
97
98 } // namespace;
99
AddressInputHelper(PreloadSupplier * supplier)100 AddressInputHelper::AddressInputHelper(PreloadSupplier* supplier)
101 : supplier_(supplier) {
102 assert(supplier_ != NULL);
103 }
104
~AddressInputHelper()105 AddressInputHelper::~AddressInputHelper() {
106 }
107
FillAddress(AddressData * address) const108 void AddressInputHelper::FillAddress(AddressData* address) const {
109 assert(address != NULL);
110 const std::string& region_code = address->region_code;
111 if (!RegionDataConstants::IsSupported(region_code)) {
112 // If we don't have a region code, we can't do anything reliably to fill
113 // this address.
114 return;
115 }
116
117 AddressData lookup_key_address;
118 lookup_key_address.region_code = region_code;
119 // First try and fill in the postal code if it is missing.
120 LookupKey lookup_key;
121 lookup_key.FromAddress(lookup_key_address);
122 const Rule* region_rule = supplier_->GetRule(lookup_key);
123 // We have already checked that the region is supported; and users of this
124 // method must have called LoadRules() first, so we check this here.
125 assert(region_rule != NULL);
126
127 const RE2ptr* postal_code_reg_exp = region_rule->GetPostalCodeMatcher();
128 if (postal_code_reg_exp != NULL) {
129 if (address->postal_code.empty()) {
130 address->postal_code = region_rule->GetSolePostalCode();
131 }
132
133 // If we have a valid postal code, try and work out the most specific
134 // hierarchy that matches the postal code. Note that the postal code might
135 // have been added in the previous check.
136 if (!address->postal_code.empty() &&
137 RE2::FullMatch(address->postal_code, *postal_code_reg_exp->ptr)) {
138
139 // This hierarchy is used to store rules that represent possible matches
140 // at each level of the hierarchy.
141 std::vector<Node> hierarchy[kHierarchyDepth];
142 CheckChildrenForPostCodeMatches(*address, lookup_key, NULL, hierarchy);
143
144 FillAddressFromMatchedRules(hierarchy, address);
145 }
146 }
147
148 // TODO: When we have the data, we should fill in the state for countries with
149 // state required and only one possible value, e.g. American Samoa.
150 }
151
CheckChildrenForPostCodeMatches(const AddressData & address,const LookupKey & lookup_key,const Node * parent,std::vector<Node> * hierarchy) const152 void AddressInputHelper::CheckChildrenForPostCodeMatches(
153 const AddressData& address,
154 const LookupKey& lookup_key,
155 const Node* parent,
156 // An array of vectors.
157 std::vector<Node>* hierarchy) const {
158 const Rule* rule = supplier_->GetRule(lookup_key);
159 assert(rule != NULL);
160
161 const RE2ptr* postal_code_prefix = rule->GetPostalCodeMatcher();
162 if (postal_code_prefix == NULL ||
163 RE2::PartialMatch(address.postal_code, *postal_code_prefix->ptr)) {
164 // This was a match, so store it and its parent in the hierarchy.
165 hierarchy[lookup_key.GetDepth()].push_back(Node());
166 Node* node = &hierarchy[lookup_key.GetDepth()].back();
167 node->parent = parent;
168 node->rule = rule;
169
170 // If there are children, check them too.
171 for (std::vector<std::string>::const_iterator child_it =
172 rule->GetSubKeys().begin();
173 child_it != rule->GetSubKeys().end(); ++child_it) {
174 LookupKey child_key;
175 child_key.FromLookupKey(lookup_key, *child_it);
176 CheckChildrenForPostCodeMatches(address, child_key, node, hierarchy);
177 }
178 }
179 }
180
181 } // namespace addressinput
182 } // namespace i18n
183