1 /* 2 * Copyright (c) 2022 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16 #include "js_uri.h" 17 #include "utils/log.h" 18 namespace OHOS::Uri { 19 std::bitset<MAX_BIT_SIZE> g_ruleAlpha; 20 std::bitset<MAX_BIT_SIZE> g_ruleScheme; 21 std::bitset<MAX_BIT_SIZE> g_ruleUrlc; 22 std::bitset<MAX_BIT_SIZE> g_rulePath; 23 std::bitset<MAX_BIT_SIZE> g_ruleUserInfo; 24 std::bitset<MAX_BIT_SIZE> g_ruleDigit; 25 std::bitset<MAX_BIT_SIZE> g_rulePort; PreliminaryWork() const26 void Uri::PreliminaryWork() const 27 { 28 std::string digitAggregate = "0123456789"; 29 for (size_t i = 0; i < digitAggregate.size(); ++i) { 30 g_ruleDigit.set(digitAggregate[i]); 31 } 32 33 std::string alphasAggregate = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; 34 for (size_t i = 0; i < alphasAggregate.size(); ++i) { 35 g_ruleAlpha.set(alphasAggregate[i]); 36 } 37 38 std::string schemeAggregate = digitAggregate + alphasAggregate + "+-."; 39 for (size_t i = 0; i < schemeAggregate.size(); ++i) { 40 g_ruleScheme.set(schemeAggregate[i]); 41 } 42 43 std::string uricAggregate = schemeAggregate + ";/?:@&=$,[]_!~*'()%"; 44 for (size_t i = 0; i < uricAggregate.size(); ++i) { 45 g_ruleUrlc.set(uricAggregate[i]); 46 } 47 48 std::string pathAggregate = schemeAggregate + ";/:@&=$,_!~*'()%"; 49 for (size_t i = 0; i < pathAggregate.size(); ++i) { 50 g_rulePath.set(pathAggregate[i]); 51 } 52 53 std::string userInfoAggregate = schemeAggregate + ";:&=$,_!~*'()%"; 54 for (size_t i = 0; i < userInfoAggregate.size(); ++i) { 55 g_ruleUserInfo.set(userInfoAggregate[i]); 56 } 57 58 std::string portAggregate = digitAggregate + alphasAggregate + ".:@-;&=+$,-_!~*'()"; 59 for (size_t i = 0; i < portAggregate.size(); ++i) { 60 g_rulePort.set(portAggregate[i]); 61 } 62 } 63 Uri(const std::string input)64 Uri::Uri(const std::string input) 65 { 66 PreliminaryWork(); 67 errStr_ = ""; 68 if (input.empty()) { 69 errStr_ = "uri is empty"; 70 return; 71 } 72 inputUri_ = input; 73 AnalysisUri(); 74 } 75 AssignSchemeSpecificPart()76 void Uri::AssignSchemeSpecificPart() 77 { 78 uriData_.SchemeSpecificPart.reserve(data_.length() + uriData_.query.length() + 1); 79 uriData_.SchemeSpecificPart.append(data_); 80 uriData_.SchemeSpecificPart.append("?"); 81 uriData_.SchemeSpecificPart.append(uriData_.query); 82 } 83 AnalysisUri()84 void Uri::AnalysisUri() 85 { 86 data_ = inputUri_; 87 size_t pos = data_.find('#'); // Fragment 88 if (pos != std::string::npos) { 89 AnalysisFragment(pos); 90 if (!errStr_.empty()) { 91 return; 92 } 93 } 94 pos = data_.find('?'); // Query 95 if (pos != std::string::npos) { 96 AnalysisQuery(pos); 97 if (!errStr_.empty()) { 98 return; 99 } 100 } 101 pos = data_.find(':'); // Scheme 102 if (pos != std::string::npos) { 103 AnalysisScheme(pos); 104 if (!errStr_.empty()) { 105 return; 106 } 107 } else { 108 SpecialPath(); 109 if (!errStr_.empty()) { 110 return; 111 } 112 AssignSchemeSpecificPart(); 113 return; 114 } 115 pos = data_.find("//"); // userInfo path host port ipv4 or ipv6 116 if (pos != std::string::npos && pos == 0) { 117 AssignSchemeSpecificPart(); 118 data_ = data_.substr(2); // 2:Intercept the string from the second subscript 119 AnalysisHostAndPath(); 120 if (!errStr_.empty()) { 121 return; 122 } 123 } else if (data_[0] == '/') { 124 uriData_.path = data_; 125 AssignSchemeSpecificPart(); 126 data_ = ""; 127 } else { 128 AssignSchemeSpecificPart(); 129 uriData_.query = ""; 130 data_ = ""; 131 } 132 } 133 CheckCharacter(std::string data,std::bitset<MAX_BIT_SIZE> rule,bool flag) const134 bool Uri::CheckCharacter(std::string data, std::bitset<MAX_BIT_SIZE> rule, bool flag) const 135 { 136 size_t dataLen = data.size(); 137 for (size_t i = 0; i < dataLen; ++i) { 138 if (static_cast<int>(data[i]) >= 0 && static_cast<int>(data[i]) < 128) { // 128:ASCII Max Number 139 bool isLegal = rule.test(data[i]); 140 if (!isLegal) { 141 return false; 142 } 143 } else if (!flag) { 144 return false; 145 } 146 } 147 return true; 148 } 149 SpecialPath()150 void Uri::SpecialPath() 151 { 152 if (!CheckCharacter(data_, g_rulePath, true)) { 153 errStr_ = "SpecialPath does not conform to the rule"; 154 return; 155 } 156 uriData_.path = data_; 157 data_ = ""; 158 } 159 AnalysisFragment(size_t pos)160 void Uri::AnalysisFragment(size_t pos) 161 { 162 if (pos == 0) { 163 errStr_ = "#It can't be the first"; 164 return; 165 } 166 std::string fragment = data_.substr(pos + 1); 167 if (!CheckCharacter(fragment, g_ruleUrlc, true)) { 168 errStr_ = "Fragment does not conform to the rule"; 169 return; 170 } 171 uriData_.fragment = fragment; 172 data_ = data_.substr(0, pos); 173 } 174 AnalysisQuery(size_t pos)175 void Uri::AnalysisQuery(size_t pos) 176 { 177 std::string query = data_.substr(pos + 1); 178 if (!CheckCharacter(query, g_ruleUrlc, true)) { 179 errStr_ = "Query does not conform to the rule"; 180 return; 181 } 182 uriData_.query = query; 183 data_ = data_.substr(0, pos); 184 } 185 AnalysisScheme(size_t pos)186 void Uri::AnalysisScheme(size_t pos) 187 { 188 size_t slashPos = data_.find('/'); 189 if (slashPos != std::string::npos && slashPos < pos) { 190 SpecialPath(); 191 uriData_.SchemeSpecificPart.reserve(uriData_.path.length() + uriData_.query.length() + 1); 192 uriData_.SchemeSpecificPart.append(uriData_.path); 193 uriData_.SchemeSpecificPart.append("?"); 194 uriData_.SchemeSpecificPart.append(uriData_.query); 195 data_ = ""; 196 } else { 197 if ((static_cast<int>(data_[0]) >= 0 && static_cast<int>(data_[0]) < MAX_BIT_SIZE) && 198 !g_ruleAlpha.test(data_[0])) { 199 errStr_ = "Scheme the first character must be a letter"; 200 return; 201 } 202 std::string scheme = data_.substr(0, pos); 203 if (!CheckCharacter(scheme, g_ruleScheme, false)) { 204 errStr_ = "scheme does not conform to the rule"; 205 return; 206 } 207 uriData_.scheme = scheme; 208 data_ = data_.substr(pos + 1); 209 } 210 } 211 AnalysisHost(bool isLawfulProt)212 void Uri::AnalysisHost(bool isLawfulProt) 213 { 214 // find ipv4 or ipv6 or host 215 if (data_[0] == '[') { 216 if (data_[data_.size() - 1] == ']') { 217 // IPV6 218 if (!isLawfulProt) { 219 errStr_ = "Prot does not conform to the rule"; 220 return; 221 } 222 AnalysisIPV6(); 223 } else { 224 errStr_ = "IPv6 is missing a closing bracket"; 225 return; 226 } 227 } else { 228 if (data_.find('[') != std::string::npos || data_.find(']') != std::string::npos) { 229 errStr_ = "host does not conform to the rule"; 230 return; 231 } 232 // ipv4 233 if (!isLawfulProt || !AnalysisIPV4()) { 234 uriData_.port = -1; 235 uriData_.host = ""; 236 uriData_.userInfo = ""; 237 } 238 } 239 } 240 AnalysisHostAndPath()241 void Uri::AnalysisHostAndPath() 242 { 243 if (data_.empty()) { 244 return; 245 } 246 // find path 247 size_t pos = data_.find('/'); 248 if (pos != std::string::npos) { 249 AnalysisPath(pos); 250 if (!errStr_.empty()) { 251 return; 252 } 253 } 254 uriData_.authority = data_; 255 // find UserInfo 256 pos = data_.find('@'); 257 if (pos != std::string::npos) { 258 AnalysisUserInfo(pos); 259 if (!errStr_.empty()) { 260 return; 261 } 262 } 263 bool isLawfulProt = true; 264 // find port 265 pos = data_.rfind(':'); 266 if (pos != std::string::npos) { 267 size_t pos1 = data_.rfind(']'); 268 if (pos1 == std::string::npos || pos > pos1) { 269 isLawfulProt = AnalysisPort(pos); 270 } 271 if (!errStr_.empty()) { 272 return; 273 } 274 } 275 AnalysisHost(isLawfulProt); 276 } 277 AnalysisPath(size_t pos)278 void Uri::AnalysisPath(size_t pos) 279 { 280 std::string path = data_.substr(pos); 281 if (!CheckCharacter(path, g_rulePath, true)) { 282 errStr_ = "path does not conform to the rule"; 283 return; 284 } 285 uriData_.path = path; 286 data_ = data_.substr(0, pos); 287 } 288 AnalysisUserInfo(size_t pos)289 void Uri::AnalysisUserInfo(size_t pos) 290 { 291 std::string userInfo = data_.substr(0, pos); 292 if (!CheckCharacter(userInfo, g_ruleUserInfo, true)) { 293 errStr_ = "userInfo does not conform to the rule"; 294 return; 295 } 296 uriData_.userInfo = userInfo; 297 data_ = data_.substr(pos + 1); 298 } 299 AnalysisPort(size_t pos)300 bool Uri::AnalysisPort(size_t pos) 301 { 302 std::string port = data_.substr(pos + 1); 303 if (!CheckCharacter(port, g_rulePort, true)) { 304 errStr_ = "port does not conform to the rule"; 305 return false; 306 } else if (CheckCharacter(port, g_ruleDigit, false)) { 307 if (port.size() == 0) { 308 return false; 309 } 310 uriData_.port = std::stoi(port); 311 data_ = data_.substr(0, pos); 312 return true; 313 } else { 314 data_ = data_.substr(0, pos); 315 return false; 316 } 317 return false; 318 } 319 AnalysisIPV4()320 bool Uri::AnalysisIPV4() 321 { 322 std::regex ipv4("((25[0-5]|2[0-4]\\d|1\\d{2}|[1-9]\\d|\\d)\\.){3}(25[0-5]|2[0-4]\\d|1\\d{2}|[1-9]\\d|\\d)"); 323 std::regex hostname("(([a-zA-Z0-9]([a-zA-Z0-9\\-]*[a-zA-Z0-9])?\\.)+([a-zA-Z]([a-zA-Z0-9\\-]*[a-zA-Z0-9])?))|" 324 "([a-zA-Z0-9]([a-zA-Z0-9\\-]*[a-zA-Z0-9])?)"); 325 bool isIpv4 = std::regex_match(data_, ipv4); 326 bool isHosName = std::regex_match(data_, hostname); 327 if (!isIpv4 && !isHosName) { 328 return false; 329 } else { 330 uriData_.host = data_; 331 data_ = ""; 332 return true; 333 } 334 } 335 AnalysisIPV6()336 void Uri::AnalysisIPV6() 337 { 338 std::string str = data_.substr(1, data_.size() - 2); // 2:Intercept the string from the second subscript 339 std::regex ipv6("(::|(:((:[0-9A-Fa-f]{1,4}){1,7}))|(([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|" 340 "(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})|:))|(([0-9A-Fa-f]{1,4}:){2}" 341 "(((:[0-9A-Fa-f]{1,4}){1,5})|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})" 342 "|:))|(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3})|:))|(([0-9A-Fa-f]{1,4}:){5}" 343 "(((:[0-9A-Fa-f]{1,4}){1,2})|:))|(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|:))|" 344 "(((:(:[0-9A-Fa-f]{1,4}){0,5}:)|(([0-9A-Fa-f]{1,4}:){1}(:[0-9A-Fa-f]{1,4}){0,4}:)" 345 "|(([0-9A-Fa-f]{1,4}:){2}(:[0-9A-Fa-f]{1,4}){0,3}:)|(([0-9A-Fa-f]{1,4}:){3}" 346 "(:[0-9A-Fa-f]{1,4}){0,2}:)|(([0-9A-Fa-f]{1,4}:){4}(:[0-9A-Fa-f]{1,4})?:)|" 347 "(([0-9A-Fa-f]{1,4}:){5}:)|(([0-9A-Fa-f]{1,4}:){6}))((25[0-5]|2[0-4]\\d|1\\d{2}|" 348 "[1-9]\\d|\\d)\\.){3}(25[0-5]|2[0-4]\\d|1\\d{2}|[1-9]\\d|\\d)))(%[a-zA-Z0-9._]+)?"); 349 if (!std::regex_match(str, ipv6)) { 350 errStr_ = "ipv6 does not conform to the rule"; 351 return; 352 } 353 uriData_.host = data_; 354 data_ = ""; 355 } 356 Equals(const Uri other) const357 bool Uri::Equals(const Uri other) const 358 { 359 if (uriData_.port != other.uriData_.port) { 360 return false; 361 } 362 if (uriData_.scheme != other.uriData_.scheme) { 363 return false; 364 } 365 if (uriData_.userInfo != other.uriData_.userInfo) { 366 return false; 367 } 368 if (uriData_.host != other.uriData_.host) { 369 return false; 370 } 371 if (uriData_.query != other.uriData_.query) { 372 return false; 373 } 374 if (uriData_.fragment != other.uriData_.fragment) { 375 return false; 376 } 377 if (uriData_.path != other.uriData_.path) { 378 return false; 379 } 380 if (uriData_.authority != other.uriData_.authority) { 381 return false; 382 } 383 if (uriData_.SchemeSpecificPart != other.uriData_.SchemeSpecificPart) { 384 return false; 385 } 386 return true; 387 } 388 ToString() const389 std::string Uri::ToString() const 390 { 391 return inputUri_; 392 } 393 IsAbsolute() const394 bool Uri::IsAbsolute() const 395 { 396 return !uriData_.scheme.empty(); 397 } 398 IsFailed() const399 std::string Uri::IsFailed() const 400 { 401 return errStr_; 402 } 403 Normalize() const404 std::string Uri::Normalize() const 405 { 406 std::vector<std::string> temp; 407 size_t pathLen = uriData_.path.size(); 408 if (pathLen == 0) { 409 return this->inputUri_; 410 } 411 size_t pos = 0; 412 size_t left = 0; 413 while ((pos = uriData_.path.find('/', left)) != std::string::npos) { 414 temp.push_back(uriData_.path.substr(left, pos - left)); 415 left = pos + 1; 416 } 417 if (left != pathLen) { 418 temp.push_back(uriData_.path.substr(left)); 419 } 420 size_t tempLen = temp.size(); 421 std::vector<std::string> normalizeTemp; 422 for (size_t i = 0; i < tempLen; ++i) { 423 if (!temp[i].empty() && !(temp[i] == ".") && !(temp[i] == "..")) { 424 normalizeTemp.push_back(temp[i]); 425 } 426 if (temp[i] == "..") { 427 if (!normalizeTemp.empty() && normalizeTemp.back() != "..") { 428 normalizeTemp.pop_back(); 429 } else { 430 normalizeTemp.push_back(temp[i]); 431 } 432 } 433 } 434 std::string normalizePath = ""; 435 tempLen = normalizeTemp.size(); 436 if (tempLen == 0) { 437 normalizePath = "/"; 438 } else { 439 for (size_t i = 0; i < tempLen; ++i) { 440 normalizePath += "/" + normalizeTemp[i]; 441 } 442 } 443 return Split(normalizePath); 444 } 445 446 Split(const std::string & path) const447 std::string Uri::Split(const std::string &path) const 448 { 449 std::string normalizeUri = ""; 450 if (!uriData_.scheme.empty()) { 451 normalizeUri += uriData_.scheme + ":"; 452 } 453 if (uriData_.path.empty()) { 454 normalizeUri += uriData_.SchemeSpecificPart; 455 } else { 456 if (!uriData_.host.empty()) { 457 normalizeUri += "//"; 458 if (!uriData_.userInfo.empty()) { 459 normalizeUri += uriData_.userInfo + "@"; 460 } 461 normalizeUri += uriData_.host; 462 if (uriData_.port != -1) { 463 normalizeUri += ":" + std::to_string(uriData_.port); 464 } 465 } else if (!uriData_.authority.empty()) { 466 normalizeUri += "//" + uriData_.authority; 467 } 468 normalizeUri += path; 469 } 470 if (!uriData_.query.empty()) { 471 normalizeUri += "?" + uriData_.query; 472 } 473 if (!uriData_.fragment.empty()) { 474 normalizeUri += "#" + uriData_.fragment; 475 } 476 return normalizeUri; 477 } 478 GetScheme() const479 std::string Uri::GetScheme() const 480 { 481 return uriData_.scheme; 482 } 483 GetAuthority() const484 std::string Uri::GetAuthority() const 485 { 486 return uriData_.authority; 487 } 488 GetSsp() const489 std::string Uri::GetSsp() const 490 { 491 return uriData_.SchemeSpecificPart; 492 } 493 GetUserinfo() const494 std::string Uri::GetUserinfo() const 495 { 496 return uriData_.userInfo; 497 } 498 GetHost() const499 std::string Uri::GetHost() const 500 { 501 return uriData_.host; 502 } 503 GetPort() const504 std::string Uri::GetPort() const 505 { 506 return std::to_string(uriData_.port); 507 } 508 GetPath() const509 std::string Uri::GetPath() const 510 { 511 return uriData_.path; 512 } 513 GetQuery() const514 std::string Uri::GetQuery() const 515 { 516 return uriData_.query; 517 } 518 GetFragment() const519 std::string Uri::GetFragment() const 520 { 521 return uriData_.fragment; 522 } 523 } // namespace OHOS::Uri 524