From 2f1feb99a5499510183f398730cddc2a7e7df863 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Sat, 7 Jan 2012 23:58:54 +0100 Subject: [PATCH 137/139] DOC: add some documentation from RFC3986 about URI format --- doc/internals/http-parsing.txt | 96 ++++++++++++++++++++++++++++++++++++++++ 1 files changed, 96 insertions(+), 0 deletions(-) diff --git a/doc/internals/http-parsing.txt b/doc/internals/http-parsing.txt index fd41b6e..494558b 100644 --- a/doc/internals/http-parsing.txt +++ b/doc/internals/http-parsing.txt @@ -212,6 +212,102 @@ defined without changing the protocol, but these fields cannot be assumed to be recognizable by the recipient. Unrecognized header fields SHOULD be ignored by the recipient and MUST be forwarded by transparent proxies. +---------------------------------- + +The format of Request-URI is defined by RFC3986 : + + URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] + + hier-part = "//" authority path-abempty + / path-absolute + / path-rootless + / path-empty + + URI-reference = URI / relative-ref + + absolute-URI = scheme ":" hier-part [ "?" query ] + + relative-ref = relative-part [ "?" query ] [ "#" fragment ] + + relative-part = "//" authority path-abempty + / path-absolute + / path-noscheme + / path-empty + + scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) + + authority = [ userinfo "@" ] host [ ":" port ] + userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) + host = IP-literal / IPv4address / reg-name + port = *DIGIT + + IP-literal = "[" ( IPv6address / IPvFuture ) "]" + + IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) + + IPv6address = 6( h16 ":" ) ls32 + / "::" 5( h16 ":" ) ls32 + / [ h16 ] "::" 4( h16 ":" ) ls32 + / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 + / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 + / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 + / [ *4( h16 ":" ) h16 ] "::" ls32 + / [ *5( h16 ":" ) h16 ] "::" h16 + / [ *6( h16 ":" ) h16 ] "::" + + h16 = 1*4HEXDIG + ls32 = ( h16 ":" h16 ) / IPv4address + IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet + dec-octet = DIGIT ; 0-9 + / %x31-39 DIGIT ; 10-99 + / "1" 2DIGIT ; 100-199 + / "2" %x30-34 DIGIT ; 200-249 + / "25" %x30-35 ; 250-255 + + reg-name = *( unreserved / pct-encoded / sub-delims ) + + path = path-abempty ; begins with "/" or is empty + / path-absolute ; begins with "/" but not "//" + / path-noscheme ; begins with a non-colon segment + / path-rootless ; begins with a segment + / path-empty ; zero characters + + path-abempty = *( "/" segment ) + path-absolute = "/" [ segment-nz *( "/" segment ) ] + path-noscheme = segment-nz-nc *( "/" segment ) + path-rootless = segment-nz *( "/" segment ) + path-empty = 0 + + segment = *pchar + segment-nz = 1*pchar + segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) + ; non-zero-length segment without any colon ":" + + pchar = unreserved / pct-encoded / sub-delims / ":" / "@" + + query = *( pchar / "/" / "?" ) + + fragment = *( pchar / "/" / "?" ) + + pct-encoded = "%" HEXDIG HEXDIG + + unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" + reserved = gen-delims / sub-delims + gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" + sub-delims = "!" / "$" / "&" / "'" / "(" / ")" + / "*" / "+" / "," / ";" / "=" + +=> so the list of allowed characters in a URI is : + + uri-char = unreserved / gen-delims / sub-delims / "%" + = ALPHA / DIGIT / "-" / "." / "_" / "~" + / ":" / "/" / "?" / "#" / "[" / "]" / "@" + / "!" / "$" / "&" / "'" / "(" / ")" / + / "*" / "+" / "," / ";" / "=" / "%" + +Note that non-ascii characters are forbidden ! Spaces and CTL are forbidden. +Unfortunately, some products such as Apache allow such characters :-/ + ---- The correct way to do it ---- - one http_session -- 1.7.2.3