Lucas Teske a révisé ce gist 3 years ago. Aller à la révision
Aucun changement
Lucas Teske a révisé ce gist 3 years ago. Aller à la révision
Aucun changement
Lucas Teske a révisé ce gist 3 years ago. Aller à la révision
Aucun changement
Lucas Teske a révisé ce gist 3 years ago. Aller à la révision
2 files changed, 571 insertions
url.cpp(fichier créé)
| @@ -0,0 +1,515 @@ | |||
| 1 | + | #include "common/url.h" | |
| 2 | + | ||
| 3 | + | #include <fmt/format.h> | |
| 4 | + | ||
| 5 | + | #include <cctype> | |
| 6 | + | ||
| 7 | + | using namespace ProtoRock::Http; | |
| 8 | + | ||
| 9 | + | enum EncodingMode { | |
| 10 | + | encodePath = 1, | |
| 11 | + | encodePathSegment, | |
| 12 | + | encodeHost, | |
| 13 | + | encodeZone, | |
| 14 | + | encodeUserPassword, | |
| 15 | + | encodeQueryComponent, | |
| 16 | + | encodeFragment, | |
| 17 | + | }; | |
| 18 | + | ||
| 19 | + | char unhex(char c) { | |
| 20 | + | if ('0' <= c && c <= '9') { | |
| 21 | + | return c - '0'; | |
| 22 | + | } | |
| 23 | + | if ('a' <= c && c <= 'f') { | |
| 24 | + | return c - 'a' + 10; | |
| 25 | + | } | |
| 26 | + | if ('A' <= c && c <= 'F') { | |
| 27 | + | return c - 'A' + 10; | |
| 28 | + | } | |
| 29 | + | return 0; | |
| 30 | + | } | |
| 31 | + | ||
| 32 | + | const char *upperhex = "0123456789ABCDEF"; | |
| 33 | + | ||
| 34 | + | // Return true if the specified character should be escaped when | |
| 35 | + | // appearing in a URL string, according to RFC 3986. | |
| 36 | + | // | |
| 37 | + | // Please be informed that for now shouldEscape does not check all | |
| 38 | + | // reserved characters correctly. See golang.org/issue/5684. | |
| 39 | + | bool shouldEscape(char c, EncodingMode mode) { | |
| 40 | + | // §2.3 Unreserved characters (alphanum) | |
| 41 | + | if ('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9') { | |
| 42 | + | return false; | |
| 43 | + | } | |
| 44 | + | if (mode == encodeHost || mode == encodeZone) { | |
| 45 | + | // §3.2.2 Host allows | |
| 46 | + | // sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "=" | |
| 47 | + | // as part of reg-name. | |
| 48 | + | // We add : because we include :port as part of host. | |
| 49 | + | // We add [ ] because we include [ipv6]:port as part of host. | |
| 50 | + | // We add < > because they're the only characters left that | |
| 51 | + | // we could possibly allow, and Parse will reject them if we | |
| 52 | + | // escape them (because hosts can't use %-encoding for | |
| 53 | + | // ASCII bytes). | |
| 54 | + | switch (c) { | |
| 55 | + | case '!': | |
| 56 | + | case '$': | |
| 57 | + | case '&': | |
| 58 | + | case '\'': | |
| 59 | + | case '(': | |
| 60 | + | case ')': | |
| 61 | + | case '*': | |
| 62 | + | case '+': | |
| 63 | + | case ',': | |
| 64 | + | case ';': | |
| 65 | + | case '=': | |
| 66 | + | case ':': | |
| 67 | + | case '[': | |
| 68 | + | case ']': | |
| 69 | + | case '<': | |
| 70 | + | case '>': | |
| 71 | + | case '"': | |
| 72 | + | return false; | |
| 73 | + | } | |
| 74 | + | } | |
| 75 | + | ||
| 76 | + | switch (c) { | |
| 77 | + | // §2.3 Unreserved characters (mark) | |
| 78 | + | case '-': | |
| 79 | + | case '_': | |
| 80 | + | case '.': | |
| 81 | + | case '~': | |
| 82 | + | return false; | |
| 83 | + | ||
| 84 | + | // §2.2 Reserved characters (reserved) | |
| 85 | + | case '$': | |
| 86 | + | case '&': | |
| 87 | + | case '+': | |
| 88 | + | case ',': | |
| 89 | + | case '/': | |
| 90 | + | case ':': | |
| 91 | + | case ';': | |
| 92 | + | case '=': | |
| 93 | + | case '?': | |
| 94 | + | case '@': | |
| 95 | + | // Different sections of the URL allow a few of | |
| 96 | + | // the reserved characters to appear unescaped. | |
| 97 | + | switch (mode) { | |
| 98 | + | case encodePath: // §3.3 | |
| 99 | + | // The RFC allows : @ & = + $ but saves / ; , for assigning | |
| 100 | + | // meaning to individual path segments. This package | |
| 101 | + | // only manipulates the path as a whole, so we allow those | |
| 102 | + | // last three as well. That leaves only ? to escape. | |
| 103 | + | return c == '?'; | |
| 104 | + | ||
| 105 | + | case encodePathSegment: // §3.3 | |
| 106 | + | // The RFC allows : @ & = + $ but saves / ; , for assigning | |
| 107 | + | // meaning to individual path segments. | |
| 108 | + | return c == '/' || c == ';' || c == ',' || c == '?'; | |
| 109 | + | ||
| 110 | + | case encodeUserPassword: // §3.2.1 | |
| 111 | + | // The RFC allows ';', ':', '&', '=', '+', '$', and ',' in | |
| 112 | + | // userinfo, so we must escape only '@', '/', and '?'. | |
| 113 | + | // The parsing of userinfo treats ':' as special so we must escape | |
| 114 | + | // that too. | |
| 115 | + | return c == '@' || c == '/' || c == '?' || c == ':'; | |
| 116 | + | ||
| 117 | + | case encodeQueryComponent: // §3.4 | |
| 118 | + | // The RFC reserves (so we must escape) everything. | |
| 119 | + | return true; | |
| 120 | + | ||
| 121 | + | case encodeFragment: // §4.1 | |
| 122 | + | // The RFC text is silent but the grammar allows | |
| 123 | + | // everything: case so escape nothing. | |
| 124 | + | return false; | |
| 125 | + | } | |
| 126 | + | } | |
| 127 | + | ||
| 128 | + | if (mode == encodeFragment) { | |
| 129 | + | // RFC 3986 §2.2 allows not escaping sub-delims. A subset of sub-delims are | |
| 130 | + | // included in reserved from RFC 2396 §2.2. The remaining sub-delims do not | |
| 131 | + | // need to be escaped. To minimize potential breakage, we apply two restrictions: | |
| 132 | + | // (1) we always escape sub-delims outside of the fragment, and (2) we always | |
| 133 | + | // escape single quote to avoid breaking callers that had previously assumed that | |
| 134 | + | // single quotes would be escaped. See issue #19917. | |
| 135 | + | switch (c) { | |
| 136 | + | case '!': | |
| 137 | + | case '(': | |
| 138 | + | case ')': | |
| 139 | + | case '*': | |
| 140 | + | return false; | |
| 141 | + | } | |
| 142 | + | } | |
| 143 | + | ||
| 144 | + | return true; | |
| 145 | + | } | |
| 146 | + | ||
| 147 | + | std::string escape(const std::string &s, EncodingMode mode) { | |
| 148 | + | auto spaceCount = 0; | |
| 149 | + | auto hexCount = 0; | |
| 150 | + | ||
| 151 | + | for (auto i = 0; i < s.size(); i++) { | |
| 152 | + | auto c = s[i]; | |
| 153 | + | if (shouldEscape(c, mode)) { | |
| 154 | + | if (c == ' ' && mode == encodeQueryComponent) { | |
| 155 | + | spaceCount++; | |
| 156 | + | } else { | |
| 157 | + | hexCount++; | |
| 158 | + | } | |
| 159 | + | } | |
| 160 | + | } | |
| 161 | + | ||
| 162 | + | if (spaceCount == 0 && hexCount == 0) { | |
| 163 | + | return s; | |
| 164 | + | } | |
| 165 | + | ||
| 166 | + | auto required = s.size() + 2 * hexCount; | |
| 167 | + | auto t = std::vector<char>(); | |
| 168 | + | t.reserve(required); | |
| 169 | + | ||
| 170 | + | if (hexCount == 0) { | |
| 171 | + | t.insert(t.begin(), s.begin(), s.end()); | |
| 172 | + | for (auto i = 0; i < s.size(); i++) { | |
| 173 | + | if (s[i] == ' ') { | |
| 174 | + | t[i] = '+'; | |
| 175 | + | } | |
| 176 | + | } | |
| 177 | + | return std::string(t.begin(), t.end()); | |
| 178 | + | } | |
| 179 | + | ||
| 180 | + | auto j = 0; | |
| 181 | + | auto c = 0; | |
| 182 | + | for (auto i = 0; i < s.size(); i++) { | |
| 183 | + | auto c = s[i]; | |
| 184 | + | if (c == ' ' && mode == encodeQueryComponent) { | |
| 185 | + | t[j] = '+'; | |
| 186 | + | j++; | |
| 187 | + | } else if (shouldEscape(c, mode)) { | |
| 188 | + | t[j] = '%'; | |
| 189 | + | t[j + 1] = upperhex[c >> 4]; | |
| 190 | + | t[j + 2] = upperhex[c & 15]; | |
| 191 | + | j += 3; | |
| 192 | + | } else { | |
| 193 | + | t[j] = s[i]; | |
| 194 | + | j++; | |
| 195 | + | } | |
| 196 | + | } | |
| 197 | + | ||
| 198 | + | return std::string(t.begin(), t.end()); | |
| 199 | + | } | |
| 200 | + | ||
| 201 | + | std::string unescape(std::string s, EncodingMode mode) { | |
| 202 | + | // Count %, check that they're well-formed. | |
| 203 | + | auto n = 0; | |
| 204 | + | auto hasPlus = false; | |
| 205 | + | auto tmp = std::string(); | |
| 206 | + | auto v = 0; | |
| 207 | + | for (int i = 0; i < s.size();) { | |
| 208 | + | switch (s[i]) { | |
| 209 | + | case '%': | |
| 210 | + | n++; | |
| 211 | + | if (i + 2 >= s.size() || !std::isxdigit(s[i + 1]) || !std::isxdigit(s[i + 2])) { | |
| 212 | + | s = std::string(s.begin() + 1, s.end()); | |
| 213 | + | if (s.size() > 3) { | |
| 214 | + | s = std::string(s.begin(), s.begin() + 3); | |
| 215 | + | } | |
| 216 | + | throw std::invalid_argument("escape error: " + s); | |
| 217 | + | } | |
| 218 | + | // Per https://tools.ietf.org/html/rfc3986#page-21 | |
| 219 | + | // in the host component %-encoding can only be used | |
| 220 | + | // for non-ASCII bytes. | |
| 221 | + | // But https://tools.ietf.org/html/rfc6874#section-2 | |
| 222 | + | // introduces %25 being allowed to escape a percent sign | |
| 223 | + | // in IPv6 scoped-address literals. Yay. | |
| 224 | + | tmp = std::string(s.begin() + i, s.begin() + i + 3); | |
| 225 | + | if (mode == encodeHost && unhex(s[i + 1]) < 8 && tmp != "%25") { | |
| 226 | + | throw std::invalid_argument("escape error: " + tmp); | |
| 227 | + | } | |
| 228 | + | ||
| 229 | + | if (mode == encodeZone) { | |
| 230 | + | // RFC 6874 says basically "anything goes" for zone identifiers | |
| 231 | + | // and that even non-ASCII can be redundantly escaped, | |
| 232 | + | // but it seems prudent to restrict %-escaped bytes here to those | |
| 233 | + | // that are valid host name bytes in their unescaped form. | |
| 234 | + | // That is, you can use escaping in the zone identifier but not | |
| 235 | + | // to introduce bytes you couldn't just write directly. | |
| 236 | + | // But Windows puts spaces here! Yay. | |
| 237 | + | v = unhex(s[i + 1]) << 4 | unhex(s[i + 2]); | |
| 238 | + | tmp = std::string(s.begin() + i, s.begin() + i + 3); | |
| 239 | + | if (tmp != "%25" && v != ' ' && shouldEscape(v, encodeHost)) { | |
| 240 | + | throw std::invalid_argument("escape error: " + tmp); | |
| 241 | + | } | |
| 242 | + | } | |
| 243 | + | i += 3; | |
| 244 | + | break; | |
| 245 | + | case '+': | |
| 246 | + | hasPlus = mode == encodeQueryComponent; | |
| 247 | + | i++; | |
| 248 | + | break; | |
| 249 | + | default: | |
| 250 | + | if ((mode == encodeHost || mode == encodeZone) && (uint8_t)s[i] < 0x80 && shouldEscape(s[i], mode)) { | |
| 251 | + | tmp = std::string(s.begin() + i, s.begin() + i + 1); | |
| 252 | + | throw std::invalid_argument("invalid host: " + tmp); | |
| 253 | + | } | |
| 254 | + | i++; | |
| 255 | + | } | |
| 256 | + | } | |
| 257 | + | ||
| 258 | + | if (n == 0 && !hasPlus) { | |
| 259 | + | return s; | |
| 260 | + | } | |
| 261 | + | ||
| 262 | + | auto ss = std::stringstream(); | |
| 263 | + | for (int i = 0; i < s.size(); i++) { | |
| 264 | + | switch (s[i]) { | |
| 265 | + | case '%': | |
| 266 | + | ss << (char)(unhex(s[i + 1]) << 4 | unhex(s[i + 2])); | |
| 267 | + | i += 2; | |
| 268 | + | break; | |
| 269 | + | case '+': | |
| 270 | + | ss << ((mode == encodeQueryComponent) ? ' ' : '+'); | |
| 271 | + | break; | |
| 272 | + | default: | |
| 273 | + | ss << s[i]; | |
| 274 | + | } | |
| 275 | + | } | |
| 276 | + | return ss.str(); | |
| 277 | + | } | |
| 278 | + | ||
| 279 | + | bool stringContainsCTLByte(const std::string &s) { | |
| 280 | + | for (auto c : s) { | |
| 281 | + | if (c < ' ' || c == 0x7f) { | |
| 282 | + | return true; | |
| 283 | + | } | |
| 284 | + | } | |
| 285 | + | return false; | |
| 286 | + | } | |
| 287 | + | ||
| 288 | + | std::string getScheme(std::string &url) { | |
| 289 | + | int i = 0; | |
| 290 | + | std::string scheme; | |
| 291 | + | for (auto &c : url) { | |
| 292 | + | if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z')) { | |
| 293 | + | // Do nothing | |
| 294 | + | } else if (('0' <= c && c <= '9') || c == '+' || c == '-' || c == '.') { | |
| 295 | + | if (i == 0) { | |
| 296 | + | break; | |
| 297 | + | } | |
| 298 | + | } else if (c == ':') { | |
| 299 | + | scheme = std::string(url.begin(), url.begin() + i); | |
| 300 | + | url = std::string(url.begin() + i + 1, url.end()); | |
| 301 | + | break; | |
| 302 | + | } else { | |
| 303 | + | // we have encountered an invalid character, | |
| 304 | + | // so there is no valid scheme | |
| 305 | + | break; | |
| 306 | + | } | |
| 307 | + | i++; | |
| 308 | + | } | |
| 309 | + | return scheme; | |
| 310 | + | } | |
| 311 | + | ||
| 312 | + | // validUserinfo reports whether s is a valid userinfo string per RFC 3986 | |
| 313 | + | // Section 3.2.1: | |
| 314 | + | // userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) | |
| 315 | + | // unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" | |
| 316 | + | // sub-delims = "!" / "$" / "&" / "'" / "(" / ")" | |
| 317 | + | // / "*" / "+" / "," / ";" / "=" | |
| 318 | + | // | |
| 319 | + | // It doesn't validate pct-encoded. The caller does that via func unescape. | |
| 320 | + | bool validUserinfo(const std::string &s) { | |
| 321 | + | for (auto r : s) { | |
| 322 | + | if ('A' <= r && r <= 'Z') { | |
| 323 | + | continue; | |
| 324 | + | } | |
| 325 | + | if ('a' <= r && r <= 'z') { | |
| 326 | + | continue; | |
| 327 | + | } | |
| 328 | + | if ('0' <= r && r <= '9') { | |
| 329 | + | continue; | |
| 330 | + | } | |
| 331 | + | switch (r) { | |
| 332 | + | case '-': | |
| 333 | + | case '.': | |
| 334 | + | case '_': | |
| 335 | + | case ':': | |
| 336 | + | case '~': | |
| 337 | + | case '!': | |
| 338 | + | case '$': | |
| 339 | + | case '&': | |
| 340 | + | case '\'': | |
| 341 | + | case '(': | |
| 342 | + | case ')': | |
| 343 | + | case '*': | |
| 344 | + | case '+': | |
| 345 | + | case ',': | |
| 346 | + | case ';': | |
| 347 | + | case '=': | |
| 348 | + | case '%': | |
| 349 | + | case '@': | |
| 350 | + | continue; | |
| 351 | + | default: | |
| 352 | + | return false; | |
| 353 | + | } | |
| 354 | + | } | |
| 355 | + | return true; | |
| 356 | + | } | |
| 357 | + | ||
| 358 | + | // validOptionalPort reports whether port is either an empty string | |
| 359 | + | // or matches /^:\d*$/ | |
| 360 | + | bool validOptionalPort(const std::string &port) { | |
| 361 | + | if (port.empty()) { | |
| 362 | + | return true; | |
| 363 | + | } | |
| 364 | + | if (port[0] != ':') { | |
| 365 | + | return false; | |
| 366 | + | } | |
| 367 | + | for (auto b = port.begin() + 1; b < port.end(); b++) { | |
| 368 | + | if (*b < '0' || *b > '9') { | |
| 369 | + | return false; | |
| 370 | + | } | |
| 371 | + | } | |
| 372 | + | return true; | |
| 373 | + | } | |
| 374 | + | ||
| 375 | + | // parseHost parses host as an authority without user | |
| 376 | + | // information. That is, as host[:port]. | |
| 377 | + | std::string parseHost(const std::string &host) { | |
| 378 | + | int idx; | |
| 379 | + | if (!host.empty() && host[0] == '[') { | |
| 380 | + | // Parse an IP-Literal in RFC 3986 and RFC 6874. | |
| 381 | + | // E.g., "[fe80::1]", "[fe80::1%25en0]", "[fe80::1]:80". | |
| 382 | + | idx = host.find_last_of(']'); | |
| 383 | + | if (idx >= host.size()) { | |
| 384 | + | throw std::invalid_argument("cannot find ']' in host"); | |
| 385 | + | } | |
| 386 | + | auto colonPort = std::string(host.begin() + idx + 1, host.end()); | |
| 387 | + | if (!validOptionalPort(colonPort)) { | |
| 388 | + | throw std::invalid_argument(fmt::format("invalid port {} after host", colonPort)); | |
| 389 | + | } | |
| 390 | + | // RFC 6874 defines that %25 (%-encoded percent) introduces | |
| 391 | + | // the zone identifier, and the zone identifier can use basically | |
| 392 | + | // any %-encoding it likes. That's different from the host, which | |
| 393 | + | // can only %-encode non-ASCII bytes. | |
| 394 | + | // We do impose some restrictions on the zone, to avoid stupidity | |
| 395 | + | // like newlines. | |
| 396 | + | auto zone = host.find("%25"); | |
| 397 | + | if (idx != std::string::npos) { | |
| 398 | + | auto host1 = unescape(std::string(host.begin(), host.begin() + zone), encodeHost); | |
| 399 | + | auto host2 = unescape(std::string(host.begin() + zone, host.begin() + idx), encodeHost); | |
| 400 | + | auto host3 = unescape(std::string(host.begin() + idx, host.end()), encodeZone); | |
| 401 | + | return host1 + host2 + host3; | |
| 402 | + | } | |
| 403 | + | } else if ((idx = host.find_last_of(':')) < host.size()) { | |
| 404 | + | auto colonPort = std::string(host.begin() + idx, host.end()); | |
| 405 | + | if (!validOptionalPort(colonPort)) { | |
| 406 | + | throw std::invalid_argument(fmt::format("invalid port {} after host", colonPort)); | |
| 407 | + | } | |
| 408 | + | } | |
| 409 | + | ||
| 410 | + | return unescape(host, encodeHost); | |
| 411 | + | } | |
| 412 | + | ||
| 413 | + | void parseAuthority(const std::string &authority, UserInfo &ui, std::string &host) { | |
| 414 | + | auto i = authority.find_last_of('@'); | |
| 415 | + | if (i > authority.size()) { | |
| 416 | + | host = parseHost(authority); | |
| 417 | + | } else { | |
| 418 | + | host = parseHost(std::string(authority.begin() + i + 1, authority.end())); | |
| 419 | + | } | |
| 420 | + | ||
| 421 | + | if (i > authority.size()) { | |
| 422 | + | return; | |
| 423 | + | } | |
| 424 | + | ||
| 425 | + | auto userInfo = std::string(authority.begin(), authority.begin() + i); | |
| 426 | + | if (!validUserinfo(userInfo)) { | |
| 427 | + | throw std::invalid_argument("invalid userinfo"); | |
| 428 | + | } | |
| 429 | + | auto idx = userInfo.find(':'); | |
| 430 | + | if (idx == std::string::npos) { | |
| 431 | + | userInfo = unescape(userInfo, encodeUserPassword); | |
| 432 | + | ui = UserInfo(userInfo); | |
| 433 | + | } else { | |
| 434 | + | auto username = std::string(userInfo.begin(), userInfo.begin() + idx); | |
| 435 | + | auto password = std::string(userInfo.begin() + idx, userInfo.end()); | |
| 436 | + | ui.Username = unescape(username, encodeUserPassword); | |
| 437 | + | ui.Password = unescape(username, encodeUserPassword); | |
| 438 | + | } | |
| 439 | + | } | |
| 440 | + | ||
| 441 | + | URL URL::Parse(std::string url) { | |
| 442 | + | URL u; | |
| 443 | + | std::string frag; | |
| 444 | + | auto hashIndex = url.find("#"); | |
| 445 | + | if (hashIndex != std::string::npos) { | |
| 446 | + | frag = std::string(url.begin() + hashIndex, url.end()); | |
| 447 | + | url = std::string(url.begin(), url.begin() + hashIndex); | |
| 448 | + | } | |
| 449 | + | ||
| 450 | + | u.setFragment(frag); | |
| 451 | + | ||
| 452 | + | if (stringContainsCTLByte(url)) { | |
| 453 | + | throw std::invalid_argument("invalid url: string contains control bytes"); | |
| 454 | + | } | |
| 455 | + | ||
| 456 | + | if (url == "*") { | |
| 457 | + | u.Path = "*"; | |
| 458 | + | return u; | |
| 459 | + | } | |
| 460 | + | auto rest = url; | |
| 461 | + | ||
| 462 | + | u.Scheme = getScheme(rest); | |
| 463 | + | std::transform(u.Scheme.begin(), u.Scheme.end(), u.Scheme.begin(), [](unsigned char c) -> unsigned char { return std::tolower(c); }); | |
| 464 | + | ||
| 465 | + | if (!rest.empty() > 0 && rest[rest.size() - 1] == '?') { | |
| 466 | + | u.ForceQuery = true; | |
| 467 | + | rest.pop_back(); | |
| 468 | + | } else { | |
| 469 | + | auto idx = rest.find("?"); | |
| 470 | + | if (idx != std::string::npos) { | |
| 471 | + | u.RawQuery = std::string(rest.begin() + idx, rest.end()); | |
| 472 | + | rest = std::string(rest.begin(), rest.begin() + idx); | |
| 473 | + | } | |
| 474 | + | } | |
| 475 | + | ||
| 476 | + | if (!rest.empty() && rest[0] != '/') { | |
| 477 | + | if (!u.Scheme.empty()) { | |
| 478 | + | // We consider rootless paths per RFC 3986 as opaque. | |
| 479 | + | u.Opaque = rest; | |
| 480 | + | return u; | |
| 481 | + | } | |
| 482 | + | } | |
| 483 | + | ||
| 484 | + | if (!u.Scheme.empty() || (rest.find("///") != 0 && rest.find("//") == 0)) { | |
| 485 | + | auto authority = std::string(rest.begin() + 2, rest.end()); | |
| 486 | + | rest = ""; | |
| 487 | + | int i = authority.find("/"); | |
| 488 | + | if (i != std::string::npos) { | |
| 489 | + | rest = std::string(authority.begin() + i, authority.end()); | |
| 490 | + | authority = std::string(authority.begin(), authority.begin() + i); | |
| 491 | + | } | |
| 492 | + | parseAuthority(authority, u.User, u.Host); | |
| 493 | + | } | |
| 494 | + | ||
| 495 | + | u.setPath(rest); | |
| 496 | + | ||
| 497 | + | return u; | |
| 498 | + | } | |
| 499 | + | ||
| 500 | + | void URL::setFragment(const std::string &f) { | |
| 501 | + | Fragment = unescape(f, encodeFragment); | |
| 502 | + | auto escf = escape(Fragment, encodeFragment); | |
| 503 | + | RawFragment = (escf == f) ? "" : f; | |
| 504 | + | } | |
| 505 | + | ||
| 506 | + | void URL::setPath(const std::string &p) { | |
| 507 | + | Path = unescape(p, encodePath); | |
| 508 | + | auto escp = escape(Path, encodePath); | |
| 509 | + | RawPath = (escp == p) ? "" : p; | |
| 510 | + | } | |
| 511 | + | ||
| 512 | + | std::string URL::PathEscape(const std::string &path) { return escape(path, encodePath); } | |
| 513 | + | std::string URL::PathUnescape(const std::string &path) { return unescape(path, encodePath); } | |
| 514 | + | std::string URL::QueryEscape(const std::string &query) { return escape(query, encodeQueryComponent); } | |
| 515 | + | std::string URL::QueryUnescape(const std::string &query) { return unescape(query, encodeQueryComponent); } | |
url.h(fichier créé)
| @@ -0,0 +1,56 @@ | |||
| 1 | + | #pragma once | |
| 2 | + | ||
| 3 | + | #include <algorithm> | |
| 4 | + | #include <map> | |
| 5 | + | #include <stdexcept> | |
| 6 | + | ||
| 7 | + | #include "common/common.h" | |
| 8 | + | ||
| 9 | + | // Based on Golang Implementation | |
| 10 | + | // MIT | |
| 11 | + | ||
| 12 | + | namespace ProtoRock { | |
| 13 | + | namespace Http { | |
| 14 | + | struct UserInfo { | |
| 15 | + | std::string Username; | |
| 16 | + | std::string Password; | |
| 17 | + | ||
| 18 | + | UserInfo() {} | |
| 19 | + | UserInfo(const std::string &u) : Username(u){}; | |
| 20 | + | }; | |
| 21 | + | ||
| 22 | + | struct URL { | |
| 23 | + | private: | |
| 24 | + | void setFragment(const std::string &); | |
| 25 | + | void setPath(const std::string &); | |
| 26 | + | ||
| 27 | + | public: | |
| 28 | + | std::string Scheme; | |
| 29 | + | // encoded opaque data | |
| 30 | + | std::string Opaque; | |
| 31 | + | // username and password information | |
| 32 | + | UserInfo User; | |
| 33 | + | // host or host:port | |
| 34 | + | std::string Host; | |
| 35 | + | // path (relative paths may omit leading slash) | |
| 36 | + | std::string Path; | |
| 37 | + | // encoded path hint (see EscapedPath method) | |
| 38 | + | std::string RawPath; | |
| 39 | + | // append a query ('?') even if RawQuery is empty | |
| 40 | + | bool ForceQuery = false; | |
| 41 | + | // encoded query values, without '?' | |
| 42 | + | std::string RawQuery; | |
| 43 | + | // fragment for references, without '#' | |
| 44 | + | std::string Fragment; | |
| 45 | + | // encoded fragment hint (see EscapedFragment method) | |
| 46 | + | std::string RawFragment; | |
| 47 | + | ||
| 48 | + | static URL Parse(std::string url); | |
| 49 | + | static std::string PathEscape(const std::string &path); | |
| 50 | + | static std::string PathUnescape(const std::string &path); | |
| 51 | + | static std::string QueryEscape(const std::string &query); | |
| 52 | + | static std::string QueryUnescape(const std::string &query); | |
| 53 | + | }; | |
| 54 | + | ||
| 55 | + | } // namespace Http | |
| 56 | + | } // namespace ProtoRock | |