1 /* 2 * Copyright (c) 2022 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16 #include "js_uri.h" 17 #include "utils/log.h" 18 namespace OHOS::Uri { 19 std::bitset<MAX_BIT_SIZE> g_ruleAlpha; 20 std::bitset<MAX_BIT_SIZE> g_ruleScheme; 21 std::bitset<MAX_BIT_SIZE> g_ruleUrlc; 22 std::bitset<MAX_BIT_SIZE> g_rulePath; 23 std::bitset<MAX_BIT_SIZE> g_ruleUserInfo; 24 std::bitset<MAX_BIT_SIZE> g_ruleDigit; 25 std::bitset<MAX_BIT_SIZE> g_rulePort; PreliminaryWork() const26 void Uri::PreliminaryWork() const 27 { 28 std::string digitAggregate = "0123456789"; 29 for (size_t i = 0; i < digitAggregate.size(); ++i) { 30 g_ruleDigit.set(digitAggregate[i]); 31 } 32 33 std::string alphasAggregate = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; 34 for (size_t i = 0; i < alphasAggregate.size(); ++i) { 35 g_ruleAlpha.set(alphasAggregate[i]); 36 } 37 38 std::string schemeAggregate = digitAggregate + alphasAggregate + "+-."; 39 for (size_t i = 0; i < schemeAggregate.size(); ++i) { 40 g_ruleScheme.set(schemeAggregate[i]); 41 } 42 43 std::string uricAggregate = schemeAggregate + ";/?:@&=$,[]_!~*'()%"; 44 for (size_t i = 0; i < uricAggregate.size(); ++i) { 45 g_ruleUrlc.set(uricAggregate[i]); 46 } 47 48 std::string pathAggregate = schemeAggregate + ";/:@&=$,_!~*'()%"; 49 for (size_t i = 0; i < pathAggregate.size(); ++i) { 50 g_rulePath.set(pathAggregate[i]); 51 } 52 53 std::string userInfoAggregate = schemeAggregate + ";:&=$,_!~*'()%"; 54 for (size_t i = 0; i < userInfoAggregate.size(); ++i) { 55 g_ruleUserInfo.set(userInfoAggregate[i]); 56 } 57 58 std::string portAggregate = digitAggregate + alphasAggregate + ".:@-;&=+$,-_!~*'()"; 59 for (size_t i = 0; i < portAggregate.size(); ++i) { 60 g_rulePort.set(portAggregate[i]); 61 } 62 } 63 Uri(const std::string input)64 Uri::Uri(const std::string input) 65 { 66 PreliminaryWork(); 67 errStr_ = ""; 68 if (input.empty()) { 69 errStr_ = "uri is empty"; 70 return; 71 } 72 inputUri_ = input; 73 AnalysisUri(); 74 } 75 AnalysisUri()76 void Uri::AnalysisUri() 77 { 78 data_ = inputUri_; 79 size_t pos = data_.find('#'); // Fragment 80 if (pos != std::string::npos) { 81 AnalysisFragment(pos); 82 if (!errStr_.empty()) { 83 return; 84 } 85 } 86 pos = data_.find('?'); // Query 87 if (pos != std::string::npos) { 88 AnalysisQuery(pos); 89 if (!errStr_.empty()) { 90 return; 91 } 92 } 93 pos = data_.find(':'); // Scheme 94 if (pos != std::string::npos) { 95 AnalysisScheme(pos); 96 if (!errStr_.empty()) { 97 return; 98 } 99 } else { 100 SpecialPath(); 101 if (!errStr_.empty()) { 102 return; 103 } 104 uriData_.SchemeSpecificPart = data_ + "?" + uriData_.query; 105 return; 106 } 107 pos = data_.find("//"); // userInfo path host port ipv4 or ipv6 108 if (pos != std::string::npos && pos == 0) { 109 uriData_.SchemeSpecificPart = data_ + "?" + uriData_.query; 110 data_ = data_.substr(2); // 2:Intercept the string from the second subscript 111 AnalysisHostAndPath(); 112 if (!errStr_.empty()) { 113 return; 114 } 115 } else if (data_[0] == '/') { 116 uriData_.path = data_; 117 uriData_.SchemeSpecificPart = data_ + uriData_.query; 118 data_ = ""; 119 } else if (!data_.empty()) { 120 uriData_.SchemeSpecificPart = data_ + uriData_.query; 121 uriData_.query = ""; 122 data_ = ""; 123 } 124 } 125 CheckCharacter(std::string data,std::bitset<MAX_BIT_SIZE> rule,bool flag) const126 bool Uri::CheckCharacter(std::string data, std::bitset<MAX_BIT_SIZE> rule, bool flag) const 127 { 128 size_t dataLen = data.size(); 129 for (size_t i = 0; i < dataLen; ++i) { 130 if (static_cast<int>(data[i]) >= 0 && static_cast<int>(data[i]) < 128) { // 128:ASCII Max Number 131 bool isLegal = rule.test(data[i]); 132 if (!isLegal) { 133 return false; 134 } 135 } else if (!flag) { 136 return false; 137 } 138 } 139 return true; 140 } 141 SpecialPath()142 void Uri::SpecialPath() 143 { 144 if (!CheckCharacter(data_, g_rulePath, true)) { 145 errStr_ = "SpecialPath does not conform to the rule"; 146 return; 147 } 148 uriData_.path = data_; 149 data_ = ""; 150 } 151 AnalysisFragment(size_t pos)152 void Uri::AnalysisFragment(size_t pos) 153 { 154 if (pos == 0) { 155 errStr_ = "#It can't be the first"; 156 return; 157 } 158 std::string fragment = data_.substr(pos + 1); 159 if (!CheckCharacter(fragment, g_ruleUrlc, true)) { 160 errStr_ = "Fragment does not conform to the rule"; 161 return; 162 } 163 uriData_.fragment = fragment; 164 data_ = data_.substr(0, pos); 165 } 166 AnalysisQuery(size_t pos)167 void Uri::AnalysisQuery(size_t pos) 168 { 169 std::string query = data_.substr(pos + 1); 170 if (!CheckCharacter(query, g_ruleUrlc, true)) { 171 errStr_ = "Query does not conform to the rule"; 172 return; 173 } 174 uriData_.query = query; 175 data_ = data_.substr(0, pos); 176 } 177 AnalysisScheme(size_t pos)178 void Uri::AnalysisScheme(size_t pos) 179 { 180 size_t slashPos = data_.find('/'); 181 if (slashPos != std::string::npos && slashPos < pos) { 182 SpecialPath(); 183 uriData_.SchemeSpecificPart = uriData_.path + "?" + uriData_.query; 184 data_ = ""; 185 } else { 186 if (!g_ruleAlpha.test(data_[0])) { 187 errStr_ = "Scheme the first character must be a letter"; 188 return; 189 } 190 std::string scheme = data_.substr(0, pos); 191 if (!CheckCharacter(scheme, g_ruleScheme, false)) { 192 errStr_ = "scheme does not conform to the rule"; 193 return; 194 } 195 uriData_.scheme = scheme; 196 data_ = data_.substr(pos + 1); 197 } 198 } 199 AnalysisHostAndPath()200 void Uri::AnalysisHostAndPath() 201 { 202 if (data_.empty()) { 203 return; 204 } 205 // find path 206 size_t pos = data_.find('/'); 207 if (pos != std::string::npos) { 208 AnalysisPath(pos); 209 if (!errStr_.empty()) { 210 return; 211 } 212 } 213 214 uriData_.authority = data_; 215 216 // find UserInfo 217 pos = data_.find('@'); 218 if (pos != std::string::npos) { 219 AnalysisUserInfo(pos); 220 if (!errStr_.empty()) { 221 return; 222 } 223 } 224 bool isLawfulProt = true; 225 // find port 226 pos = data_.rfind(':'); 227 if (pos != std::string::npos) { 228 size_t pos1 = data_.rfind(']'); 229 if (pos1 == std::string::npos || pos > pos1) { 230 isLawfulProt = AnalysisPort(pos); 231 } 232 if (!errStr_.empty()) { 233 return; 234 } 235 } 236 237 // find ipv4 or ipv6 or host 238 if (data_[0] == '[') { 239 if (data_[data_.size() - 1] == ']') { 240 // IPV6 241 if (!isLawfulProt) { 242 errStr_ = "Prot does not conform to the rule"; 243 return; 244 } 245 AnalysisIPV6(); 246 } else { 247 errStr_ = "IPv6 is missing a closing bracket"; 248 return; 249 } 250 } else { 251 // ipv4 252 if (!isLawfulProt || !AnalysisIPV4()) { 253 uriData_.port = -1; 254 uriData_.host = ""; 255 uriData_.userInfo = ""; 256 } 257 } 258 } 259 AnalysisPath(size_t pos)260 void Uri::AnalysisPath(size_t pos) 261 { 262 std::string path = data_.substr(pos); 263 if (!CheckCharacter(path, g_rulePath, true)) { 264 errStr_ = "path does not conform to the rule"; 265 return; 266 } 267 uriData_.path = path; 268 data_ = data_.substr(0, pos); 269 } 270 AnalysisUserInfo(size_t pos)271 void Uri::AnalysisUserInfo(size_t pos) 272 { 273 std::string userInfo = data_.substr(0, pos); 274 if (!CheckCharacter(userInfo, g_ruleUserInfo, true)) { 275 errStr_ = "userInfo does not conform to the rule"; 276 return; 277 } 278 uriData_.userInfo = userInfo; 279 data_ = data_.substr(pos + 1); 280 } 281 AnalysisPort(size_t pos)282 bool Uri::AnalysisPort(size_t pos) 283 { 284 std::string port = data_.substr(pos + 1); 285 if (!CheckCharacter(port, g_rulePort, true)) { 286 errStr_ = "port does not conform to the rule"; 287 return false; 288 } else if (CheckCharacter(port, g_ruleDigit, false)) { 289 uriData_.port = std::stoi(port); 290 data_ = data_.substr(0, pos); 291 return true; 292 } else { 293 data_ = data_.substr(0, pos); 294 return false; 295 } 296 return false; 297 } 298 AnalysisIPV4()299 bool Uri::AnalysisIPV4() 300 { 301 std::regex ipv4("((25[0-5]|2[0-4]\\d|1\\d{2}|[1-9]\\d|\\d)\\.){3}(25[0-5]|2[0-4]\\d|1\\d{2}|[1-9]\\d|\\d)"); 302 std::regex hostname("(([a-zA-Z0-9]([a-zA-Z0-9\\-]*[a-zA-Z0-9])?\\.)+([a-zA-Z]([a-zA-Z0-9\\-]*[a-zA-Z0-9])?))|" 303 "([a-zA-Z0-9]([a-zA-Z0-9\\-]*[a-zA-Z0-9])?)"); 304 bool isIpv4 = std::regex_match(data_, ipv4); 305 bool isHosName = std::regex_match(data_, hostname); 306 if (!isIpv4 && !isHosName) { 307 return false; 308 } else { 309 uriData_.host = data_; 310 data_ = ""; 311 return true; 312 } 313 } 314 AnalysisIPV6()315 void Uri::AnalysisIPV6() 316 { 317 std::string str = data_.substr(1, data_.size() - 2); // 2:Intercept the string from the second subscript 318 std::regex ipv6("(::|(:((:[0-9A-Fa-f]{1,4}){1,7}))|(([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|" 319 "(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})|:))|(([0-9A-Fa-f]{1,4}:){2}" 320 "(((:[0-9A-Fa-f]{1,4}){1,5})|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})" 321 "|:))|(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3})|:))|(([0-9A-Fa-f]{1,4}:){5}" 322 "(((:[0-9A-Fa-f]{1,4}){1,2})|:))|(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|:))|" 323 "(((:(:[0-9A-Fa-f]{1,4}){0,5}:)|(([0-9A-Fa-f]{1,4}:){1}(:[0-9A-Fa-f]{1,4}){0,4}:)" 324 "|(([0-9A-Fa-f]{1,4}:){2}(:[0-9A-Fa-f]{1,4}){0,3}:)|(([0-9A-Fa-f]{1,4}:){3}" 325 "(:[0-9A-Fa-f]{1,4}){0,2}:)|(([0-9A-Fa-f]{1,4}:){4}(:[0-9A-Fa-f]{1,4})?:)|" 326 "(([0-9A-Fa-f]{1,4}:){5}:)|(([0-9A-Fa-f]{1,4}:){6}))((25[0-5]|2[0-4]\\d|1\\d{2}|" 327 "[1-9]\\d|\\d)\\.){3}(25[0-5]|2[0-4]\\d|1\\d{2}|[1-9]\\d|\\d)))(%[a-zA-Z0-9._]+)?"); 328 if (!std::regex_match(str, ipv6)) { 329 errStr_ = "ipv6 does not conform to the rule"; 330 return; 331 } 332 uriData_.host = data_; 333 data_ = ""; 334 } 335 Equals(const Uri other) const336 bool Uri::Equals(const Uri other) const 337 { 338 if (uriData_.port != other.uriData_.port) { 339 return false; 340 } 341 if (uriData_.scheme != other.uriData_.scheme) { 342 return false; 343 } 344 if (uriData_.userInfo != other.uriData_.userInfo) { 345 return false; 346 } 347 if (uriData_.host != other.uriData_.host) { 348 return false; 349 } 350 if (uriData_.query != other.uriData_.query) { 351 return false; 352 } 353 if (uriData_.fragment != other.uriData_.fragment) { 354 return false; 355 } 356 if (uriData_.path != other.uriData_.path) { 357 return false; 358 } 359 if (uriData_.authority != other.uriData_.authority) { 360 return false; 361 } 362 if (uriData_.SchemeSpecificPart != other.uriData_.SchemeSpecificPart) { 363 return false; 364 } 365 return true; 366 } 367 ToString() const368 std::string Uri::ToString() const 369 { 370 return inputUri_; 371 } 372 IsAbsolute() const373 bool Uri::IsAbsolute() const 374 { 375 return !uriData_.scheme.empty(); 376 } 377 IsFailed() const378 std::string Uri::IsFailed() const 379 { 380 return errStr_; 381 } 382 Normalize() const383 std::string Uri::Normalize() const 384 { 385 std::vector<std::string> temp; 386 size_t pathLen = uriData_.path.size(); 387 if (pathLen == 0) { 388 return this->inputUri_; 389 } 390 size_t pos = 0; 391 size_t left = 0; 392 while ((pos = uriData_.path.find('/', left)) != std::string::npos) { 393 temp.push_back(uriData_.path.substr(left, pos - left)); 394 left = pos + 1; 395 } 396 if (left != pathLen) { 397 temp.push_back(uriData_.path.substr(left)); 398 } 399 size_t tempLen = temp.size(); 400 std::vector<std::string> normalizeTemp; 401 for (size_t i = 0; i < tempLen; ++i) { 402 if (!temp[i].empty() && !(temp[i] == ".") && !(temp[i] == "..")) { 403 normalizeTemp.push_back(temp[i]); 404 } 405 if (temp[i] == "..") { 406 if (!normalizeTemp.empty() && normalizeTemp.back() != "..") { 407 normalizeTemp.pop_back(); 408 } else { 409 normalizeTemp.push_back(temp[i]); 410 } 411 } 412 } 413 std::string normalizePath = ""; 414 tempLen = normalizeTemp.size(); 415 if (tempLen == 0) { 416 normalizePath = "/"; 417 } else { 418 for (size_t i = 0; i < tempLen; ++i) { 419 normalizePath += "/" + normalizeTemp[i]; 420 } 421 } 422 return Split(normalizePath); 423 } 424 425 Split(const std::string & path) const426 std::string Uri::Split(const std::string &path) const 427 { 428 std::string normalizeUri = ""; 429 if (!uriData_.scheme.empty()) { 430 normalizeUri += uriData_.scheme + ":"; 431 } 432 if (uriData_.path.empty()) { 433 normalizeUri += uriData_.SchemeSpecificPart; 434 } else { 435 if (!uriData_.host.empty()) { 436 normalizeUri += "//"; 437 if (!uriData_.userInfo.empty()) { 438 normalizeUri += uriData_.userInfo + "@"; 439 } 440 normalizeUri += uriData_.host; 441 if (uriData_.port != -1) { 442 normalizeUri += ":" + std::to_string(uriData_.port); 443 } 444 } else if (!uriData_.authority.empty()) { 445 normalizeUri += "//" + uriData_.authority; 446 } 447 normalizeUri += path; 448 } 449 if (!uriData_.query.empty()) { 450 normalizeUri += "?" + uriData_.query; 451 } 452 if (!uriData_.fragment.empty()) { 453 normalizeUri += "#" + uriData_.fragment; 454 } 455 return normalizeUri; 456 } 457 458 GetScheme() const459 std::string Uri::GetScheme() const 460 { 461 if (uriData_.scheme.empty()) { 462 return "null"; 463 } 464 return uriData_.scheme; 465 } 466 GetAuthority() const467 std::string Uri::GetAuthority() const 468 { 469 if (uriData_.authority.empty()) { 470 return "null"; 471 } 472 return uriData_.authority; 473 } 474 GetSsp() const475 std::string Uri::GetSsp() const 476 { 477 if (uriData_.SchemeSpecificPart.empty()) { 478 return "null"; 479 } 480 return uriData_.SchemeSpecificPart; 481 } 482 GetUserinfo() const483 std::string Uri::GetUserinfo() const 484 { 485 if (uriData_.userInfo.empty()) { 486 return "null"; 487 } 488 return uriData_.userInfo; 489 } 490 GetHost() const491 std::string Uri::GetHost() const 492 { 493 if (uriData_.host.empty()) { 494 return "null"; 495 } 496 return uriData_.host; 497 } 498 GetPort() const499 std::string Uri::GetPort() const 500 { 501 return std::to_string(uriData_.port); 502 } 503 GetPath() const504 std::string Uri::GetPath() const 505 { 506 if (uriData_.path.empty()) { 507 return "null"; 508 } 509 return uriData_.path; 510 } 511 GetQuery() const512 std::string Uri::GetQuery() const 513 { 514 if (uriData_.query.empty()) { 515 return "null"; 516 } 517 return uriData_.query; 518 } 519 GetFragment() const520 std::string Uri::GetFragment() const 521 { 522 if (uriData_.fragment.empty()) { 523 return "null"; 524 } 525 return uriData_.fragment; 526 } 527 } // namespace OHOS::Uri 528