1 /* 2 * Copyright (c) 2022 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16 #include "js_uri.h" 17 #include "utils/log.h" 18 namespace OHOS::Uri { 19 std::bitset<MAX_BIT_SIZE> g_ruleAlpha; 20 std::bitset<MAX_BIT_SIZE> g_ruleScheme; 21 std::bitset<MAX_BIT_SIZE> g_ruleUrlc; 22 std::bitset<MAX_BIT_SIZE> g_rulePath; 23 std::bitset<MAX_BIT_SIZE> g_ruleUserInfo; 24 std::bitset<MAX_BIT_SIZE> g_ruleDigit; 25 std::bitset<MAX_BIT_SIZE> g_rulePort; PreliminaryWork() const26 void Uri::PreliminaryWork() const 27 { 28 std::string digitAggregate = "0123456789"; 29 for (size_t i = 0; i < digitAggregate.size(); ++i) { 30 g_ruleDigit.set(digitAggregate[i]); 31 } 32 33 std::string alphasAggregate = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; 34 for (size_t i = 0; i < alphasAggregate.size(); ++i) { 35 g_ruleAlpha.set(alphasAggregate[i]); 36 } 37 38 std::string schemeAggregate = digitAggregate + alphasAggregate + "+-."; 39 for (size_t i = 0; i < schemeAggregate.size(); ++i) { 40 g_ruleScheme.set(schemeAggregate[i]); 41 } 42 43 std::string uricAggregate = schemeAggregate + ";/?:@&=$,[]_!~*'()%"; 44 for (size_t i = 0; i < uricAggregate.size(); ++i) { 45 g_ruleUrlc.set(uricAggregate[i]); 46 } 47 48 std::string pathAggregate = schemeAggregate + ";/:@&=$,_!~*'()%"; 49 for (size_t i = 0; i < pathAggregate.size(); ++i) { 50 g_rulePath.set(pathAggregate[i]); 51 } 52 53 std::string userInfoAggregate = schemeAggregate + ";:&=$,_!~*'()%"; 54 for (size_t i = 0; i < userInfoAggregate.size(); ++i) { 55 g_ruleUserInfo.set(userInfoAggregate[i]); 56 } 57 58 std::string portAggregate = digitAggregate + alphasAggregate + ".:@-;&=+$,-_!~*'()"; 59 for (size_t i = 0; i < portAggregate.size(); ++i) { 60 g_rulePort.set(portAggregate[i]); 61 } 62 } 63 Uri(const std::string input)64 Uri::Uri(const std::string input) 65 { 66 PreliminaryWork(); 67 errStr_ = ""; 68 if (input.empty()) { 69 errStr_ = "uri is empty"; 70 return; 71 } 72 inputUri_ = input; 73 AnalysisUri(); 74 } 75 AnalysisUri()76 void Uri::AnalysisUri() 77 { 78 data_ = inputUri_; 79 size_t pos = data_.find('#'); // Fragment 80 if (pos != std::string::npos) { 81 AnalysisFragment(pos); 82 if (!errStr_.empty()) { 83 return; 84 } 85 } 86 pos = data_.find('?'); // Query 87 if (pos != std::string::npos) { 88 AnalysisQuery(pos); 89 if (!errStr_.empty()) { 90 return; 91 } 92 } 93 pos = data_.find(':'); // Scheme 94 if (pos != std::string::npos) { 95 AnalysisScheme(pos); 96 if (!errStr_.empty()) { 97 return; 98 } 99 } else { 100 SpecialPath(); 101 if (!errStr_.empty()) { 102 return; 103 } 104 uriData_.SchemeSpecificPart = data_ + "?" + uriData_.query; 105 return; 106 } 107 pos = data_.find("//"); // userInfo path host port ipv4 or ipv6 108 if (pos != std::string::npos && pos == 0) { 109 uriData_.SchemeSpecificPart = data_ + "?" + uriData_.query; 110 data_ = data_.substr(2); // 2:Intercept the string from the second subscript 111 AnalysisHostAndPath(); 112 if (!errStr_.empty()) { 113 return; 114 } 115 } else if (data_[0] == '/') { 116 uriData_.path = data_; 117 uriData_.SchemeSpecificPart = data_ + uriData_.query; 118 data_ = ""; 119 } else if (!data_.empty()) { 120 uriData_.SchemeSpecificPart = data_ + uriData_.query; 121 uriData_.query = ""; 122 data_ = ""; 123 } 124 } 125 CheckCharacter(std::string data,std::bitset<MAX_BIT_SIZE> rule,bool flag) const126 bool Uri::CheckCharacter(std::string data, std::bitset<MAX_BIT_SIZE> rule, bool flag) const 127 { 128 size_t dataLen = data.size(); 129 for (size_t i = 0; i < dataLen; ++i) { 130 if (static_cast<int>(data[i]) >= 0 && static_cast<int>(data[i]) < 128) { // 128:ASCII Max Number 131 bool isLegal = rule.test(data[i]); 132 if (!isLegal) { 133 return false; 134 } 135 } else if (!flag) { 136 return false; 137 } 138 } 139 return true; 140 } 141 SpecialPath()142 void Uri::SpecialPath() 143 { 144 if (!CheckCharacter(data_, g_rulePath, true)) { 145 errStr_ = "SpecialPath does not conform to the rule"; 146 return; 147 } 148 uriData_.path = data_; 149 data_ = ""; 150 } 151 AnalysisFragment(size_t pos)152 void Uri::AnalysisFragment(size_t pos) 153 { 154 if (pos == 0) { 155 errStr_ = "#It can't be the first"; 156 return; 157 } 158 std::string fragment = data_.substr(pos + 1); 159 if (!CheckCharacter(fragment, g_ruleUrlc, true)) { 160 errStr_ = "Fragment does not conform to the rule"; 161 return; 162 } 163 uriData_.fragment = fragment; 164 data_ = data_.substr(0, pos); 165 } 166 AnalysisQuery(size_t pos)167 void Uri::AnalysisQuery(size_t pos) 168 { 169 std::string query = data_.substr(pos + 1); 170 if (!CheckCharacter(query, g_ruleUrlc, true)) { 171 errStr_ = "Query does not conform to the rule"; 172 return; 173 } 174 uriData_.query = query; 175 data_ = data_.substr(0, pos); 176 } 177 AnalysisScheme(size_t pos)178 void Uri::AnalysisScheme(size_t pos) 179 { 180 size_t slashPos = data_.find('/'); 181 if (slashPos != std::string::npos && slashPos < pos) { 182 SpecialPath(); 183 uriData_.SchemeSpecificPart = uriData_.path + "?" + uriData_.query; 184 data_ = ""; 185 } else { 186 if (!g_ruleAlpha.test(data_[0])) { 187 errStr_ = "Scheme the first character must be a letter"; 188 return; 189 } 190 std::string scheme = data_.substr(0, pos); 191 if (!CheckCharacter(scheme, g_ruleScheme, false)) { 192 errStr_ = "scheme does not conform to the rule"; 193 return; 194 } 195 uriData_.scheme = scheme; 196 data_ = data_.substr(pos + 1); 197 } 198 } 199 AnalysisHostAndPath()200 void Uri::AnalysisHostAndPath() 201 { 202 if (data_.empty()) { 203 return; 204 } 205 // find path 206 size_t pos = data_.find('/'); 207 if (pos != std::string::npos) { 208 AnalysisPath(pos); 209 if (!errStr_.empty()) { 210 return; 211 } 212 } 213 214 uriData_.authority = data_; 215 216 // find UserInfo 217 pos = data_.find('@'); 218 if (pos != std::string::npos) { 219 AnalysisUserInfo(pos); 220 if (!errStr_.empty()) { 221 return; 222 } 223 } 224 bool isLawfulProt = true; 225 // find port 226 pos = data_.rfind(':'); 227 if (pos != std::string::npos) { 228 size_t pos1 = data_.rfind(']'); 229 if (pos1 == std::string::npos || pos > pos1) { 230 isLawfulProt = AnalysisPort(pos); 231 } 232 if (!errStr_.empty()) { 233 return; 234 } 235 } 236 237 // find ipv4 or ipv6 or host 238 if (data_[0] == '[') { 239 if (data_[data_.size() - 1] == ']') { 240 // IPV6 241 if (!isLawfulProt) { 242 errStr_ = "Prot does not conform to the rule"; 243 return; 244 } 245 AnalysisIPV6(); 246 } else { 247 errStr_ = "IPv6 is missing a closing bracket"; 248 return; 249 } 250 } else { 251 // ipv4 252 if (!isLawfulProt || !AnalysisIPV4()) { 253 uriData_.port = -1; 254 uriData_.host = ""; 255 uriData_.userInfo = ""; 256 } 257 } 258 } 259 AnalysisPath(size_t pos)260 void Uri::AnalysisPath(size_t pos) 261 { 262 std::string path = data_.substr(pos); 263 if (!CheckCharacter(path, g_rulePath, true)) { 264 errStr_ = "path does not conform to the rule"; 265 return; 266 } 267 uriData_.path = path; 268 data_ = data_.substr(0, pos); 269 } 270 AnalysisUserInfo(size_t pos)271 void Uri::AnalysisUserInfo(size_t pos) 272 { 273 std::string userInfo = data_.substr(0, pos); 274 if (!CheckCharacter(userInfo, g_ruleUserInfo, true)) { 275 errStr_ = "userInfo does not conform to the rule"; 276 return; 277 } 278 uriData_.userInfo = userInfo; 279 data_ = data_.substr(pos + 1); 280 } 281 AnalysisPort(size_t pos)282 bool Uri::AnalysisPort(size_t pos) 283 { 284 std::string port = data_.substr(pos + 1); 285 if (!CheckCharacter(port, g_rulePort, true)) { 286 errStr_ = "port does not conform to the rule"; 287 return false; 288 } else if (CheckCharacter(port, g_ruleDigit, false)) { 289 if (port.size() == 0) { 290 return false; 291 } 292 uriData_.port = std::stoi(port); 293 data_ = data_.substr(0, pos); 294 return true; 295 } else { 296 data_ = data_.substr(0, pos); 297 return false; 298 } 299 return false; 300 } 301 AnalysisIPV4()302 bool Uri::AnalysisIPV4() 303 { 304 std::regex ipv4("((25[0-5]|2[0-4]\\d|1\\d{2}|[1-9]\\d|\\d)\\.){3}(25[0-5]|2[0-4]\\d|1\\d{2}|[1-9]\\d|\\d)"); 305 std::regex hostname("(([a-zA-Z0-9]([a-zA-Z0-9\\-]*[a-zA-Z0-9])?\\.)+([a-zA-Z]([a-zA-Z0-9\\-]*[a-zA-Z0-9])?))|" 306 "([a-zA-Z0-9]([a-zA-Z0-9\\-]*[a-zA-Z0-9])?)"); 307 bool isIpv4 = std::regex_match(data_, ipv4); 308 bool isHosName = std::regex_match(data_, hostname); 309 if (!isIpv4 && !isHosName) { 310 return false; 311 } else { 312 uriData_.host = data_; 313 data_ = ""; 314 return true; 315 } 316 } 317 AnalysisIPV6()318 void Uri::AnalysisIPV6() 319 { 320 std::string str = data_.substr(1, data_.size() - 2); // 2:Intercept the string from the second subscript 321 std::regex ipv6("(::|(:((:[0-9A-Fa-f]{1,4}){1,7}))|(([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|" 322 "(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})|:))|(([0-9A-Fa-f]{1,4}:){2}" 323 "(((:[0-9A-Fa-f]{1,4}){1,5})|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})" 324 "|:))|(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3})|:))|(([0-9A-Fa-f]{1,4}:){5}" 325 "(((:[0-9A-Fa-f]{1,4}){1,2})|:))|(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|:))|" 326 "(((:(:[0-9A-Fa-f]{1,4}){0,5}:)|(([0-9A-Fa-f]{1,4}:){1}(:[0-9A-Fa-f]{1,4}){0,4}:)" 327 "|(([0-9A-Fa-f]{1,4}:){2}(:[0-9A-Fa-f]{1,4}){0,3}:)|(([0-9A-Fa-f]{1,4}:){3}" 328 "(:[0-9A-Fa-f]{1,4}){0,2}:)|(([0-9A-Fa-f]{1,4}:){4}(:[0-9A-Fa-f]{1,4})?:)|" 329 "(([0-9A-Fa-f]{1,4}:){5}:)|(([0-9A-Fa-f]{1,4}:){6}))((25[0-5]|2[0-4]\\d|1\\d{2}|" 330 "[1-9]\\d|\\d)\\.){3}(25[0-5]|2[0-4]\\d|1\\d{2}|[1-9]\\d|\\d)))(%[a-zA-Z0-9._]+)?"); 331 if (!std::regex_match(str, ipv6)) { 332 errStr_ = "ipv6 does not conform to the rule"; 333 return; 334 } 335 uriData_.host = data_; 336 data_ = ""; 337 } 338 Equals(const Uri other) const339 bool Uri::Equals(const Uri other) const 340 { 341 if (uriData_.port != other.uriData_.port) { 342 return false; 343 } 344 if (uriData_.scheme != other.uriData_.scheme) { 345 return false; 346 } 347 if (uriData_.userInfo != other.uriData_.userInfo) { 348 return false; 349 } 350 if (uriData_.host != other.uriData_.host) { 351 return false; 352 } 353 if (uriData_.query != other.uriData_.query) { 354 return false; 355 } 356 if (uriData_.fragment != other.uriData_.fragment) { 357 return false; 358 } 359 if (uriData_.path != other.uriData_.path) { 360 return false; 361 } 362 if (uriData_.authority != other.uriData_.authority) { 363 return false; 364 } 365 if (uriData_.SchemeSpecificPart != other.uriData_.SchemeSpecificPart) { 366 return false; 367 } 368 return true; 369 } 370 ToString() const371 std::string Uri::ToString() const 372 { 373 return inputUri_; 374 } 375 IsAbsolute() const376 bool Uri::IsAbsolute() const 377 { 378 return !uriData_.scheme.empty(); 379 } 380 IsFailed() const381 std::string Uri::IsFailed() const 382 { 383 return errStr_; 384 } 385 Normalize() const386 std::string Uri::Normalize() const 387 { 388 std::vector<std::string> temp; 389 size_t pathLen = uriData_.path.size(); 390 if (pathLen == 0) { 391 return this->inputUri_; 392 } 393 size_t pos = 0; 394 size_t left = 0; 395 while ((pos = uriData_.path.find('/', left)) != std::string::npos) { 396 temp.push_back(uriData_.path.substr(left, pos - left)); 397 left = pos + 1; 398 } 399 if (left != pathLen) { 400 temp.push_back(uriData_.path.substr(left)); 401 } 402 size_t tempLen = temp.size(); 403 std::vector<std::string> normalizeTemp; 404 for (size_t i = 0; i < tempLen; ++i) { 405 if (!temp[i].empty() && !(temp[i] == ".") && !(temp[i] == "..")) { 406 normalizeTemp.push_back(temp[i]); 407 } 408 if (temp[i] == "..") { 409 if (!normalizeTemp.empty() && normalizeTemp.back() != "..") { 410 normalizeTemp.pop_back(); 411 } else { 412 normalizeTemp.push_back(temp[i]); 413 } 414 } 415 } 416 std::string normalizePath = ""; 417 tempLen = normalizeTemp.size(); 418 if (tempLen == 0) { 419 normalizePath = "/"; 420 } else { 421 for (size_t i = 0; i < tempLen; ++i) { 422 normalizePath += "/" + normalizeTemp[i]; 423 } 424 } 425 return Split(normalizePath); 426 } 427 428 Split(const std::string & path) const429 std::string Uri::Split(const std::string &path) const 430 { 431 std::string normalizeUri = ""; 432 if (!uriData_.scheme.empty()) { 433 normalizeUri += uriData_.scheme + ":"; 434 } 435 if (uriData_.path.empty()) { 436 normalizeUri += uriData_.SchemeSpecificPart; 437 } else { 438 if (!uriData_.host.empty()) { 439 normalizeUri += "//"; 440 if (!uriData_.userInfo.empty()) { 441 normalizeUri += uriData_.userInfo + "@"; 442 } 443 normalizeUri += uriData_.host; 444 if (uriData_.port != -1) { 445 normalizeUri += ":" + std::to_string(uriData_.port); 446 } 447 } else if (!uriData_.authority.empty()) { 448 normalizeUri += "//" + uriData_.authority; 449 } 450 normalizeUri += path; 451 } 452 if (!uriData_.query.empty()) { 453 normalizeUri += "?" + uriData_.query; 454 } 455 if (!uriData_.fragment.empty()) { 456 normalizeUri += "#" + uriData_.fragment; 457 } 458 return normalizeUri; 459 } 460 461 GetScheme() const462 std::string Uri::GetScheme() const 463 { 464 if (uriData_.scheme.empty()) { 465 return "null"; 466 } 467 return uriData_.scheme; 468 } 469 GetAuthority() const470 std::string Uri::GetAuthority() const 471 { 472 if (uriData_.authority.empty()) { 473 return "null"; 474 } 475 return uriData_.authority; 476 } 477 GetSsp() const478 std::string Uri::GetSsp() const 479 { 480 if (uriData_.SchemeSpecificPart.empty()) { 481 return "null"; 482 } 483 return uriData_.SchemeSpecificPart; 484 } 485 GetUserinfo() const486 std::string Uri::GetUserinfo() const 487 { 488 if (uriData_.userInfo.empty()) { 489 return "null"; 490 } 491 return uriData_.userInfo; 492 } 493 GetHost() const494 std::string Uri::GetHost() const 495 { 496 if (uriData_.host.empty()) { 497 return "null"; 498 } 499 return uriData_.host; 500 } 501 GetPort() const502 std::string Uri::GetPort() const 503 { 504 return std::to_string(uriData_.port); 505 } 506 GetPath() const507 std::string Uri::GetPath() const 508 { 509 if (uriData_.path.empty()) { 510 return "null"; 511 } 512 return uriData_.path; 513 } 514 GetQuery() const515 std::string Uri::GetQuery() const 516 { 517 if (uriData_.query.empty()) { 518 return "null"; 519 } 520 return uriData_.query; 521 } 522 GetFragment() const523 std::string Uri::GetFragment() const 524 { 525 if (uriData_.fragment.empty()) { 526 return "null"; 527 } 528 return uriData_.fragment; 529 } 530 } // namespace OHOS::Uri 531