• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "js_uri.h"
17 #include "utils/log.h"
18 namespace OHOS::Uri {
19     std::bitset<MAX_BIT_SIZE> g_ruleAlpha;
20     std::bitset<MAX_BIT_SIZE> g_ruleScheme;
21     std::bitset<MAX_BIT_SIZE> g_ruleUrlc;
22     std::bitset<MAX_BIT_SIZE> g_rulePath;
23     std::bitset<MAX_BIT_SIZE> g_ruleUserInfo;
24     std::bitset<MAX_BIT_SIZE> g_ruleDigit;
25     std::bitset<MAX_BIT_SIZE> g_rulePort;
PreliminaryWork() const26     void Uri::PreliminaryWork() const
27     {
28         std::string digitAggregate = "0123456789";
29         for (size_t i = 0; i < digitAggregate.size(); ++i) {
30             g_ruleDigit.set(digitAggregate[i]);
31         }
32 
33         std::string alphasAggregate = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
34         for (size_t i = 0; i < alphasAggregate.size(); ++i) {
35             g_ruleAlpha.set(alphasAggregate[i]);
36         }
37 
38         std::string schemeAggregate = digitAggregate + alphasAggregate + "+-.";
39         for (size_t i = 0; i < schemeAggregate.size(); ++i) {
40             g_ruleScheme.set(schemeAggregate[i]);
41         }
42 
43         std::string uricAggregate = schemeAggregate + ";/?:@&=$,[]_!~*'()%";
44         for (size_t i = 0; i < uricAggregate.size(); ++i) {
45             g_ruleUrlc.set(uricAggregate[i]);
46         }
47 
48         std::string pathAggregate = schemeAggregate + ";/:@&=$,_!~*'()%";
49         for (size_t i = 0; i < pathAggregate.size(); ++i) {
50             g_rulePath.set(pathAggregate[i]);
51         }
52 
53         std::string userInfoAggregate = schemeAggregate + ";:&=$,_!~*'()%";
54         for (size_t i = 0; i < userInfoAggregate.size(); ++i) {
55             g_ruleUserInfo.set(userInfoAggregate[i]);
56         }
57 
58         std::string portAggregate = digitAggregate + alphasAggregate + ".:@-;&=+$,-_!~*'()";
59         for (size_t i = 0; i < portAggregate.size(); ++i) {
60             g_rulePort.set(portAggregate[i]);
61         }
62     }
63 
Uri(const std::string input)64     Uri::Uri(const std::string input)
65     {
66         PreliminaryWork();
67         errStr_ = "";
68         if (input.empty()) {
69             errStr_ = "uri is empty";
70             return;
71         }
72         inputUri_ = input;
73         AnalysisUri();
74     }
75 
AssignSchemeSpecificPart()76     void Uri::AssignSchemeSpecificPart()
77     {
78         uriData_.SchemeSpecificPart.reserve(data_.length() + uriData_.query.length() + 1);
79         uriData_.SchemeSpecificPart.append(data_);
80         uriData_.SchemeSpecificPart.append("?");
81         uriData_.SchemeSpecificPart.append(uriData_.query);
82     }
83 
AnalysisUri()84     void Uri::AnalysisUri()
85     {
86         data_ = inputUri_;
87         size_t pos = data_.find('#'); // Fragment
88         if (pos != std::string::npos) {
89             AnalysisFragment(pos);
90             if (!errStr_.empty()) {
91                 return;
92             }
93         }
94         pos = data_.find('?'); // Query
95         if (pos != std::string::npos) {
96             AnalysisQuery(pos);
97             if (!errStr_.empty()) {
98                 return;
99             }
100         }
101         pos = data_.find(':'); // Scheme
102         if (pos != std::string::npos) {
103             AnalysisScheme(pos);
104             if (!errStr_.empty()) {
105                 return;
106             }
107         } else {
108             SpecialPath();
109             if (!errStr_.empty()) {
110                 return;
111             }
112             AssignSchemeSpecificPart();
113             return;
114         }
115         pos = data_.find("//"); // userInfo path host port ipv4 or ipv6
116         if (pos != std::string::npos && pos == 0) {
117             AssignSchemeSpecificPart();
118             data_ = data_.substr(2); // 2:Intercept the string from the second subscript
119             AnalysisHostAndPath();
120             if (!errStr_.empty()) {
121                 return;
122             }
123         } else if (data_[0] == '/') {
124             uriData_.path = data_;
125             AssignSchemeSpecificPart();
126             data_ = "";
127         } else {
128             AssignSchemeSpecificPart();
129             uriData_.query = "";
130             data_ = "";
131         }
132     }
133 
CheckCharacter(std::string data,std::bitset<MAX_BIT_SIZE> rule,bool flag) const134     bool Uri::CheckCharacter(std::string data, std::bitset<MAX_BIT_SIZE> rule, bool flag) const
135     {
136         size_t dataLen = data.size();
137         for (size_t i = 0; i < dataLen; ++i) {
138             if (static_cast<int>(data[i]) >= 0 && static_cast<int>(data[i]) < 128) { // 128:ASCII Max Number
139                 bool isLegal = rule.test(data[i]);
140                 if (!isLegal) {
141                     return false;
142                 }
143             } else if (!flag) {
144                 return false;
145             }
146         }
147         return true;
148     }
149 
SpecialPath()150     void Uri::SpecialPath()
151     {
152         if (!CheckCharacter(data_, g_rulePath, true)) {
153             errStr_ = "SpecialPath does not conform to the rule";
154             return;
155         }
156         uriData_.path = data_;
157         data_ = "";
158     }
159 
AnalysisFragment(size_t pos)160     void Uri::AnalysisFragment(size_t pos)
161     {
162         if (pos == 0) {
163             errStr_ = "#It can't be the first";
164             return;
165         }
166         std::string fragment = data_.substr(pos + 1);
167         if (!CheckCharacter(fragment, g_ruleUrlc, true)) {
168             errStr_ = "Fragment does not conform to the rule";
169             return;
170         }
171         uriData_.fragment = fragment;
172         data_ = data_.substr(0, pos);
173     }
174 
AnalysisQuery(size_t pos)175     void Uri::AnalysisQuery(size_t pos)
176     {
177         std::string query = data_.substr(pos + 1);
178         if (!CheckCharacter(query, g_ruleUrlc, true)) {
179             errStr_ = "Query does not conform to the rule";
180             return;
181         }
182         uriData_.query = query;
183         data_ = data_.substr(0, pos);
184     }
185 
AnalysisScheme(size_t pos)186     void Uri::AnalysisScheme(size_t pos)
187     {
188         size_t slashPos = data_.find('/');
189         if (slashPos != std::string::npos && slashPos < pos) {
190             SpecialPath();
191             uriData_.SchemeSpecificPart.reserve(uriData_.path.length() + uriData_.query.length() + 1);
192             uriData_.SchemeSpecificPart.append(uriData_.path);
193             uriData_.SchemeSpecificPart.append("?");
194             uriData_.SchemeSpecificPart.append(uriData_.query);
195             data_ = "";
196         } else {
197             if ((static_cast<int>(data_[0]) >= 0 && static_cast<int>(data_[0]) < MAX_BIT_SIZE) &&
198                 !g_ruleAlpha.test(data_[0])) {
199                 errStr_ = "Scheme the first character must be a letter";
200                 return;
201             }
202             std::string scheme = data_.substr(0, pos);
203             if (!CheckCharacter(scheme, g_ruleScheme, false)) {
204                 errStr_ = "scheme does not conform to the rule";
205                 return;
206             }
207             uriData_.scheme = scheme;
208             data_ = data_.substr(pos + 1);
209         }
210     }
211 
AnalysisHost(bool isLawfulProt)212     void Uri::AnalysisHost(bool isLawfulProt)
213     {
214         // find ipv4 or ipv6 or host
215         if (data_[0] == '[') {
216             if (data_[data_.size() - 1] == ']') {
217                 // IPV6
218                 if (!isLawfulProt) {
219                     errStr_ = "Prot does not conform to the rule";
220                     return;
221                 }
222                 AnalysisIPV6();
223             } else {
224                 errStr_ = "IPv6 is missing a closing bracket";
225                 return;
226             }
227         } else {
228             if (data_.find('[') != std::string::npos || data_.find(']') != std::string::npos) {
229                 errStr_ = "host does not conform to the rule";
230                 return;
231             }
232             // ipv4
233             if (!isLawfulProt || !AnalysisIPV4()) {
234                 uriData_.port = -1;
235                 uriData_.host = "";
236                 uriData_.userInfo = "";
237             }
238         }
239     }
240 
AnalysisHostAndPath()241     void Uri::AnalysisHostAndPath()
242     {
243         if (data_.empty()) {
244             return;
245         }
246         // find path
247         size_t pos = data_.find('/');
248         if (pos != std::string::npos) {
249             AnalysisPath(pos);
250             if (!errStr_.empty()) {
251                 return;
252             }
253         }
254         uriData_.authority = data_;
255         // find UserInfo
256         pos = data_.find('@');
257         if (pos != std::string::npos) {
258             AnalysisUserInfo(pos);
259             if (!errStr_.empty()) {
260                 return;
261             }
262         }
263         bool isLawfulProt = true;
264         // find port
265         pos = data_.rfind(':');
266         if (pos != std::string::npos) {
267             size_t pos1 = data_.rfind(']');
268             if (pos1 == std::string::npos || pos > pos1) {
269                 isLawfulProt = AnalysisPort(pos);
270             }
271             if (!errStr_.empty()) {
272             return;
273             }
274         }
275         AnalysisHost(isLawfulProt);
276     }
277 
AnalysisPath(size_t pos)278     void Uri::AnalysisPath(size_t pos)
279     {
280         std::string path = data_.substr(pos);
281         if (!CheckCharacter(path, g_rulePath, true)) {
282             errStr_ = "path does not conform to the rule";
283             return;
284         }
285         uriData_.path = path;
286         data_ = data_.substr(0, pos);
287     }
288 
AnalysisUserInfo(size_t pos)289     void Uri::AnalysisUserInfo(size_t pos)
290     {
291         std::string userInfo = data_.substr(0, pos);
292         if (!CheckCharacter(userInfo, g_ruleUserInfo, true)) {
293             errStr_ = "userInfo does not conform to the rule";
294             return;
295         }
296         uriData_.userInfo = userInfo;
297         data_ = data_.substr(pos + 1);
298     }
299 
AnalysisPort(size_t pos)300     bool Uri::AnalysisPort(size_t pos)
301     {
302         std::string port = data_.substr(pos + 1);
303         if (!CheckCharacter(port, g_rulePort, true)) {
304             errStr_ = "port does not conform to the rule";
305             return false;
306         } else if (CheckCharacter(port, g_ruleDigit, false)) {
307             if (port.size() == 0) {
308                 return false;
309             }
310             uriData_.port = std::stoi(port);
311             data_ = data_.substr(0, pos);
312             return true;
313         } else {
314             data_ = data_.substr(0, pos);
315             return false;
316         }
317         return false;
318     }
319 
AnalysisIPV4()320     bool Uri::AnalysisIPV4()
321     {
322         std::regex ipv4("((25[0-5]|2[0-4]\\d|1\\d{2}|[1-9]\\d|\\d)\\.){3}(25[0-5]|2[0-4]\\d|1\\d{2}|[1-9]\\d|\\d)");
323         std::regex hostname("(([a-zA-Z0-9]([a-zA-Z0-9\\-]*[a-zA-Z0-9])?\\.)+([a-zA-Z]([a-zA-Z0-9\\-]*[a-zA-Z0-9])?))|"
324                             "([a-zA-Z0-9]([a-zA-Z0-9\\-]*[a-zA-Z0-9])?)");
325         bool isIpv4 = std::regex_match(data_, ipv4);
326         bool isHosName = std::regex_match(data_, hostname);
327         if (!isIpv4 && !isHosName) {
328             return false;
329         } else {
330             uriData_.host = data_;
331             data_ = "";
332             return true;
333         }
334     }
335 
AnalysisIPV6()336     void Uri::AnalysisIPV6()
337     {
338         std::string str = data_.substr(1, data_.size() - 2); // 2:Intercept the string from the second subscript
339         std::regex ipv6("(::|(:((:[0-9A-Fa-f]{1,4}){1,7}))|(([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|"
340                         "(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})|:))|(([0-9A-Fa-f]{1,4}:){2}"
341                         "(((:[0-9A-Fa-f]{1,4}){1,5})|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})"
342                         "|:))|(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3})|:))|(([0-9A-Fa-f]{1,4}:){5}"
343                         "(((:[0-9A-Fa-f]{1,4}){1,2})|:))|(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|:))|"
344                         "(((:(:[0-9A-Fa-f]{1,4}){0,5}:)|(([0-9A-Fa-f]{1,4}:){1}(:[0-9A-Fa-f]{1,4}){0,4}:)"
345                         "|(([0-9A-Fa-f]{1,4}:){2}(:[0-9A-Fa-f]{1,4}){0,3}:)|(([0-9A-Fa-f]{1,4}:){3}"
346                         "(:[0-9A-Fa-f]{1,4}){0,2}:)|(([0-9A-Fa-f]{1,4}:){4}(:[0-9A-Fa-f]{1,4})?:)|"
347                         "(([0-9A-Fa-f]{1,4}:){5}:)|(([0-9A-Fa-f]{1,4}:){6}))((25[0-5]|2[0-4]\\d|1\\d{2}|"
348                         "[1-9]\\d|\\d)\\.){3}(25[0-5]|2[0-4]\\d|1\\d{2}|[1-9]\\d|\\d)))(%[a-zA-Z0-9._]+)?");
349         if (!std::regex_match(str, ipv6)) {
350             errStr_ = "ipv6 does not conform to the rule";
351             return;
352         }
353         uriData_.host = data_;
354         data_ = "";
355     }
356 
Equals(const Uri other) const357     bool Uri::Equals(const Uri other) const
358     {
359         if (uriData_.port != other.uriData_.port) {
360             return false;
361         }
362         if (uriData_.scheme != other.uriData_.scheme) {
363             return false;
364         }
365         if (uriData_.userInfo != other.uriData_.userInfo) {
366             return false;
367         }
368         if (uriData_.host != other.uriData_.host) {
369             return false;
370         }
371         if (uriData_.query != other.uriData_.query) {
372             return false;
373         }
374         if (uriData_.fragment != other.uriData_.fragment) {
375             return false;
376         }
377         if (uriData_.path != other.uriData_.path) {
378             return false;
379         }
380         if (uriData_.authority != other.uriData_.authority) {
381             return false;
382         }
383         if (uriData_.SchemeSpecificPart != other.uriData_.SchemeSpecificPart) {
384             return false;
385         }
386         return true;
387     }
388 
ToString() const389     std::string Uri::ToString() const
390     {
391         return inputUri_;
392     }
393 
IsAbsolute() const394     bool Uri::IsAbsolute() const
395     {
396         return !uriData_.scheme.empty();
397     }
398 
IsFailed() const399     std::string Uri::IsFailed() const
400     {
401         return errStr_;
402     }
403 
Normalize() const404     std::string Uri::Normalize() const
405     {
406         std::vector<std::string> temp;
407         size_t pathLen = uriData_.path.size();
408         if (pathLen == 0) {
409             return this->inputUri_;
410         }
411         size_t pos = 0;
412         size_t left = 0;
413         while ((pos = uriData_.path.find('/', left)) != std::string::npos) {
414             temp.push_back(uriData_.path.substr(left, pos - left));
415             left = pos + 1;
416         }
417         if (left != pathLen) {
418             temp.push_back(uriData_.path.substr(left));
419         }
420         size_t tempLen = temp.size();
421         std::vector<std::string> normalizeTemp;
422         for (size_t i = 0; i < tempLen; ++i) {
423             if (!temp[i].empty() && !(temp[i] == ".") && !(temp[i] == "..")) {
424                 normalizeTemp.push_back(temp[i]);
425             }
426             if (temp[i] == "..") {
427                 if (!normalizeTemp.empty() && normalizeTemp.back() != "..") {
428                     normalizeTemp.pop_back();
429                 } else {
430                     normalizeTemp.push_back(temp[i]);
431                 }
432             }
433         }
434         std::string normalizePath = "";
435         tempLen = normalizeTemp.size();
436         if (tempLen == 0) {
437             normalizePath = "/";
438         } else {
439             for (size_t i = 0; i < tempLen; ++i) {
440                 normalizePath += "/" + normalizeTemp[i];
441             }
442         }
443         return Split(normalizePath);
444     }
445 
446 
Split(const std::string & path) const447     std::string Uri::Split(const std::string &path) const
448     {
449         std::string normalizeUri = "";
450         if (!uriData_.scheme.empty()) {
451             normalizeUri += uriData_.scheme + ":";
452         }
453         if (uriData_.path.empty()) {
454             normalizeUri += uriData_.SchemeSpecificPart;
455         } else {
456             if (!uriData_.host.empty()) {
457                 normalizeUri += "//";
458                 if (!uriData_.userInfo.empty()) {
459                     normalizeUri += uriData_.userInfo + "@";
460                 }
461                 normalizeUri += uriData_.host;
462                 if (uriData_.port != -1) {
463                     normalizeUri += ":" + std::to_string(uriData_.port);
464                 }
465             } else if (!uriData_.authority.empty()) {
466                 normalizeUri += "//" + uriData_.authority;
467             }
468             normalizeUri += path;
469         }
470         if (!uriData_.query.empty()) {
471             normalizeUri += "?" + uriData_.query;
472         }
473         if (!uriData_.fragment.empty()) {
474             normalizeUri += "#" + uriData_.fragment;
475         }
476         return normalizeUri;
477     }
478 
GetScheme() const479     std::string Uri::GetScheme() const
480     {
481         return uriData_.scheme;
482     }
483 
GetAuthority() const484     std::string Uri::GetAuthority() const
485     {
486         return uriData_.authority;
487     }
488 
GetSsp() const489     std::string Uri::GetSsp() const
490     {
491         return uriData_.SchemeSpecificPart;
492     }
493 
GetUserinfo() const494     std::string Uri::GetUserinfo() const
495     {
496         return uriData_.userInfo;
497     }
498 
GetHost() const499     std::string Uri::GetHost() const
500     {
501         return uriData_.host;
502     }
503 
GetPort() const504     std::string Uri::GetPort() const
505     {
506         return std::to_string(uriData_.port);
507     }
508 
GetPath() const509     std::string Uri::GetPath() const
510     {
511         return uriData_.path;
512     }
513 
GetQuery() const514     std::string Uri::GetQuery() const
515     {
516         return uriData_.query;
517     }
518 
GetFragment() const519     std::string Uri::GetFragment() const
520     {
521         return uriData_.fragment;
522     }
523 } // namespace OHOS::Uri
524