mirror of https://github.com/rwf2/Rocket.git
Accept browser-sent unencoded query characters.
Closes #941. Co-authored-by: Vladimir Ignatev <ya.na.pochte@gmail.com>
This commit is contained in:
parent
949bb01e2d
commit
b5e4dded8a
|
@ -3,7 +3,7 @@ use pear::input::{Extent, Rewind};
|
|||
use pear::macros::{parser, switch, parse_current_marker, parse_error, parse_try};
|
||||
|
||||
use crate::uri::{Uri, Origin, Authority, Absolute, Host};
|
||||
use crate::parse::uri::tables::{is_reg_name_char, is_pchar, is_pchar_or_rchar};
|
||||
use crate::parse::uri::tables::{is_reg_name_char, is_pchar, is_qchar, is_rchar};
|
||||
use crate::parse::uri::RawInput;
|
||||
|
||||
type Result<'a, T> = pear::input::Result<T, RawInput<'a>>;
|
||||
|
@ -15,12 +15,14 @@ pub fn uri<'a>(input: &mut RawInput<'a>) -> Result<'a, Uri<'a>> {
|
|||
1 => switch! {
|
||||
eat(b'*') => Uri::Asterisk,
|
||||
eat(b'/') => Uri::Origin(Origin::new::<_, &str>("/", None)),
|
||||
eat(b'%') => parse_error!("'%' is not a valid URI")?,
|
||||
_ => unsafe {
|
||||
// the `is_reg_name_char` guarantees ASCII
|
||||
let host = Host::Raw(take_n_if(1, is_reg_name_char)?);
|
||||
Uri::Authority(Authority::raw(input.start.into(), None, host, None))
|
||||
}
|
||||
},
|
||||
// NOTE: We accept '%' even when it isn't followed by two hex digits.
|
||||
_ => switch! {
|
||||
peek(b'/') => Uri::Origin(origin()?),
|
||||
_ => absolute_or_authority()?
|
||||
|
@ -30,30 +32,31 @@ pub fn uri<'a>(input: &mut RawInput<'a>) -> Result<'a, Uri<'a>> {
|
|||
|
||||
#[parser]
|
||||
pub fn origin<'a>(input: &mut RawInput<'a>) -> Result<'a, Origin<'a>> {
|
||||
(peek(b'/')?, path_and_query(is_pchar)?).1
|
||||
(peek(b'/')?, path_and_query(is_pchar, is_qchar)?).1
|
||||
}
|
||||
|
||||
#[parser]
|
||||
pub fn rocket_route_origin<'a>(input: &mut RawInput<'a>) -> Result<'a, Origin<'a>> {
|
||||
(peek(b'/')?, path_and_query(is_pchar_or_rchar)?).1
|
||||
fn is_pchar_or_rchar(c: &u8) -> bool { is_pchar(c) || is_rchar(c) }
|
||||
fn is_qchar_or_rchar(c: &u8) -> bool { is_qchar(c) || is_rchar(c) }
|
||||
(peek(b'/')?, path_and_query(is_pchar_or_rchar, is_qchar_or_rchar)?).1
|
||||
}
|
||||
|
||||
#[parser]
|
||||
fn path_and_query<'a, F>(input: &mut RawInput<'a>, is_good_char: F) -> Result<'a, Origin<'a>>
|
||||
where F: Fn(&u8) -> bool + Copy
|
||||
fn path_and_query<'a, F, Q>(
|
||||
input: &mut RawInput<'a>,
|
||||
is_path_char: F,
|
||||
is_query_char: Q
|
||||
) -> Result<'a, Origin<'a>>
|
||||
where F: Fn(&u8) -> bool + Copy, Q: Fn(&u8) -> bool + Copy
|
||||
{
|
||||
let path = take_while(is_good_char)?;
|
||||
// FIXME: this works on nightly but not stable! `Span` issues?
|
||||
// let query = parse_try!(eat(b'?') => take_while(|c| is_good_char(c) || *c == b'?')?);
|
||||
let query = switch! {
|
||||
eat(b'?') => Some(take_while(|c| is_good_char(c) || *c == b'?')?),
|
||||
_ => None
|
||||
};
|
||||
let path = take_while(is_path_char)?;
|
||||
let query = parse_try!(eat(b'?') => take_while(is_query_char)?);
|
||||
|
||||
if path.is_empty() && query.is_none() {
|
||||
parse_error!("expected path or query, found neither")?
|
||||
} else {
|
||||
// We know the string is ASCII because of the `is_good_char` checks above.
|
||||
// We know the string is ASCII because of the `is_char` checks above.
|
||||
Ok(unsafe {Origin::raw(input.start.into(), path.into(), query.map(|q| q.into())) })
|
||||
}
|
||||
}
|
||||
|
@ -115,10 +118,10 @@ fn absolute<'a>(
|
|||
}
|
||||
};
|
||||
|
||||
let path_and_query = parse_try!(path_and_query(is_pchar));
|
||||
let path_and_query = parse_try!(path_and_query(is_pchar, is_qchar));
|
||||
(Some(authority), path_and_query)
|
||||
},
|
||||
eat(b':') => (None, Some(path_and_query(is_pchar)?)),
|
||||
eat(b':') => (None, Some(path_and_query(is_pchar, is_qchar)?)),
|
||||
_ => parse_error!("expected ':' but none was found")?
|
||||
};
|
||||
|
||||
|
|
|
@ -1,118 +1,78 @@
|
|||
pub(crate) const PATH_CHARS: [u8; 256] = [
|
||||
// 0 1 2 3 4 5 6 7 8 9
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
|
||||
0, 0, 0, b'!', 0, 0, b'$', b'%', b'&', b'\'', // 3x
|
||||
b'(', b')', b'*', b'+', b',', b'-', b'.', b'/', b'0', b'1', // 4x
|
||||
b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', b':', b';', // 5x
|
||||
0, b'=', 0, 0, b'@', b'A', b'B', b'C', b'D', b'E', // 6x
|
||||
b'F', b'G', b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O', // 7x
|
||||
b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W', b'X', b'Y', // 8x
|
||||
b'Z', 0, 0, 0, 0, b'_', 0, b'a', b'b', b'c', // 9x
|
||||
b'd', b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', // 10x
|
||||
b'n', b'o', b'p', b'q', b'r', b's', b't', b'u', b'v', b'w', // 11x
|
||||
b'x', b'y', b'z', 0, 0, 0, b'~', 0, 0, 0, // 12x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 13x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 14x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 15x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 17x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 18x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 19x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 20x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 21x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 22x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 23x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 24x
|
||||
0, 0, 0, 0, 0, 0, // 25x
|
||||
const fn char_table(sets: &[&[u8]]) -> [u8; 256] {
|
||||
let mut table = [0u8; 256];
|
||||
|
||||
let mut i = 0;
|
||||
while i < sets.len() {
|
||||
let set: &[u8] = sets[i];
|
||||
|
||||
let mut j = 0;
|
||||
while j < set.len() {
|
||||
let c: u8 = set[j];
|
||||
table[c as usize] = c;
|
||||
j += 1;
|
||||
}
|
||||
|
||||
i += 1;
|
||||
}
|
||||
|
||||
table
|
||||
}
|
||||
|
||||
const UNRESERVED: &[u8] = &[
|
||||
b'A', b'B', b'C', b'D', b'E', b'F', b'G', b'H', b'I', b'J', b'K', b'L',
|
||||
b'M', b'N', b'O', b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W', b'X',
|
||||
b'Y', b'Z', b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j',
|
||||
b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's', b't', b'u', b'v',
|
||||
b'w', b'x', b'y', b'z', b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7',
|
||||
b'8', b'9', b'-', b'.', b'_', b'~',
|
||||
];
|
||||
|
||||
#[inline(always)]
|
||||
pub fn is_pchar(&c: &u8) -> bool {
|
||||
PATH_CHARS[c as usize] != 0
|
||||
}
|
||||
|
||||
pub(crate) const ROUTE_CHARS: [u8; 256] = [
|
||||
// 0 1 2 3 4 5 6 7 8 9
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 3x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 4x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 5x
|
||||
b'<', 0, b'>', 0, 0, 0, 0, 0, 0, 0, // 6x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 7x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 10x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 11x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 12x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 13x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 14x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 15x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 17x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 18x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 19x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 20x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 21x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 22x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 23x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 24x
|
||||
0, 0, 0, 0, 0, 0, // 25x
|
||||
const PCT_ENCODED: &[u8] = &[
|
||||
b'%', b'A', b'B', b'C', b'D', b'E', b'F', b'a', b'b', b'c', b'd', b'e',
|
||||
b'f', b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9'
|
||||
];
|
||||
|
||||
#[inline(always)]
|
||||
pub fn is_rchar(&c: &u8) -> bool {
|
||||
ROUTE_CHARS[c as usize] != 0
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn is_pchar_or_rchar(c: &u8) -> bool {
|
||||
is_pchar(c) || is_rchar(c)
|
||||
}
|
||||
|
||||
const REG_CHARS: [u8; 256] = [
|
||||
// 0 1 2 3 4 5 6 7 8 9
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
|
||||
0, 0, 0, b'!', 0, 0, b'$', 0, b'&', b'\'', // 3x
|
||||
b'(', b')', b'*', b'+', b',', b'-', b'.', 0, b'0', b'1', // 4x
|
||||
b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', 0, b';', // 5x
|
||||
0, b'=', 0, 0, 0, b'A', b'B', b'C', b'D', b'E', // 6x
|
||||
b'F', b'G', b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O', // 7x
|
||||
b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W', b'X', b'Y', // 8x
|
||||
b'Z', 0, 0, 0, 0, b'_', 0, b'a', b'b', b'c', // 9x
|
||||
b'd', b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', // 10x
|
||||
b'n', b'o', b'p', b'q', b'r', b's', b't', b'u', b'v', b'w', // 11x
|
||||
b'x', b'y', b'z', 0, 0, 0, b'~', 0, 0, 0, // 12x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 13x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 14x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 15x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 17x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 18x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 19x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 20x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 21x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 22x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 23x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 24x
|
||||
0, 0, 0, 0, 0, 0 // 25x
|
||||
const SUB_DELIMS: &[u8] = &[
|
||||
b'!', b'$', b'&', b'\'', b'(', b')', b'*', b'+', b',', b';', b'='
|
||||
];
|
||||
|
||||
pub const PATH_CHARS: [u8; 256] = char_table(&[
|
||||
UNRESERVED, PCT_ENCODED, SUB_DELIMS, &[b':', b'@', b'/']
|
||||
]);
|
||||
|
||||
const ROUTE_CHARS: [u8; 256] = char_table(&[&[
|
||||
b'<', b'>'
|
||||
]]);
|
||||
|
||||
const QUERY_CHARS: [u8; 256] = char_table(&[
|
||||
&PATH_CHARS, &[b'/', b'?'],
|
||||
|
||||
// NOTE: these are _not_ accepted in RFC 7230/3986. However, browsers
|
||||
// routinely send these unencoded, so allow them to support the real-world.
|
||||
&[b'{', b'}', b'[', b']', b'\\', b'^', b'`', b'|'],
|
||||
]);
|
||||
|
||||
const REG_NAME_CHARS: [u8; 256] = char_table(&[
|
||||
UNRESERVED, PCT_ENCODED, SUB_DELIMS
|
||||
]);
|
||||
|
||||
#[inline(always)]
|
||||
pub fn is_reg_name_char(&c: &u8) -> bool {
|
||||
REG_CHARS[c as usize] != 0
|
||||
}
|
||||
pub const fn is_pchar(&c: &u8) -> bool { PATH_CHARS[c as usize] != 0 }
|
||||
|
||||
#[inline(always)]
|
||||
pub const fn is_rchar(&c: &u8) -> bool { ROUTE_CHARS[c as usize] != 0 }
|
||||
|
||||
#[inline(always)]
|
||||
pub const fn is_qchar(&c: &u8) -> bool { QUERY_CHARS[c as usize] != 0 }
|
||||
|
||||
#[inline(always)]
|
||||
pub const fn is_reg_name_char(&c: &u8) -> bool { REG_NAME_CHARS[c as usize] != 0 }
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
fn test_char_table(table: &[u8]) {
|
||||
for (i, &v) in table.iter().enumerate() {
|
||||
if v != 0 && v != 1 {
|
||||
if v != 0 {
|
||||
assert_eq!(i, v as usize);
|
||||
}
|
||||
}
|
||||
|
@ -121,6 +81,8 @@ mod tests {
|
|||
#[test]
|
||||
fn check_tables() {
|
||||
test_char_table(&super::PATH_CHARS[..]);
|
||||
test_char_table(&super::REG_CHARS[..]);
|
||||
test_char_table(&super::QUERY_CHARS[..]);
|
||||
test_char_table(&super::ROUTE_CHARS[..]);
|
||||
test_char_table(&super::REG_NAME_CHARS[..]);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -37,6 +37,19 @@ macro_rules! assert_no_parse {
|
|||
($($from:expr),+,) => (assert_no_parse!($($from),+))
|
||||
}
|
||||
|
||||
macro_rules! assert_parse {
|
||||
($($from:expr),+) => (
|
||||
$(
|
||||
if let Err(e) = from_str($from) {
|
||||
println!("{:?} failed to parse", $from);
|
||||
panic!("{}", e);
|
||||
}
|
||||
)+
|
||||
);
|
||||
|
||||
($($from:expr),+,) => (assert_parse!($($from),+))
|
||||
}
|
||||
|
||||
macro_rules! assert_displays_eq {
|
||||
($($string:expr),+) => (
|
||||
$(
|
||||
|
@ -89,6 +102,25 @@ fn bad_parses() {
|
|||
assert_no_parse!("://z7:77777777777777777777777777777`77777777777");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_issue_924_samples() {
|
||||
assert_parse!("/path?param={value}",
|
||||
"/path/?param={value}",
|
||||
"/some/path/?param={forgot-to-replace-placeholder}",
|
||||
"/path?param={value}&onemore={value}",
|
||||
"/some/path/?tags=[]", "/some/path/?tags=[rocket,is,perfect]",
|
||||
"/some/path/?tags=[rocket|is\\perfect^`]&users={arenot}",
|
||||
"/rocket/@user/",
|
||||
"/rocket/@user/?tags=[rocket,%F0%9F%98%8B]",
|
||||
"/rocket/?username=@sergio&tags=[rocket,%F0%9F%98%8B]",
|
||||
"/rocket/?Key+With+Spaces=value+too",
|
||||
"/rocket/?Key+With+\'",
|
||||
"/rocket/?query=%3E5",
|
||||
);
|
||||
|
||||
assert_no_parse!("/rocket/?query=>5", "/?#foo");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn single_byte() {
|
||||
assert_parse_eq!(
|
||||
|
@ -116,6 +148,7 @@ fn origin() {
|
|||
"/hi%20there?a=b&c=d" => uri_origin("/hi%20there", Some("a=b&c=d")),
|
||||
"/c/d/fa/b/c?abc" => uri_origin("/c/d/fa/b/c", Some("abc")),
|
||||
"/xn--ls8h?emoji=poop" => uri_origin("/xn--ls8h", Some("emoji=poop")),
|
||||
"/?t=[rocket|is\\here^`]&{ok}" => uri_origin("/", Some("t=[rocket|is\\here^`]&{ok}")),
|
||||
);
|
||||
}
|
||||
|
||||
|
|
|
@ -28,10 +28,21 @@ use crate::uri::encoding::{percent_encode, DEFAULT_ENCODE_SET};
|
|||
/// ## Parsing
|
||||
///
|
||||
/// The `Uri` type implements a full, zero-allocation, zero-copy [RFC 7230]
|
||||
/// compliant parser. To parse an `&str` into a `Uri`, use the [`Uri::parse()`]
|
||||
/// method. Alternatively, you may also use the `TryFrom<&str>` and
|
||||
/// `TryFrom<String>` trait implementation. To inspect the parsed type, match on
|
||||
/// the resulting `enum` and use the methods of the internal structure.
|
||||
/// compliant "request target" parser with limited liberties for real-world
|
||||
/// deviations. In particular, the parser deviates as follows:
|
||||
///
|
||||
/// * It accepts `%` characters without two trailing hex-digits unless it is
|
||||
/// the only character in the URI.
|
||||
///
|
||||
/// * It accepts the following additional unencoded characters in query parts,
|
||||
/// to match real-world browser behavior:
|
||||
///
|
||||
/// `{`, `}`, `[`, `]`, `\`, `^`, <code>`</code>, `|`
|
||||
///
|
||||
/// To parse an `&str` into a `Uri`, use [`Uri::parse()`]. Alternatively, you
|
||||
/// may also use the `TryFrom<&str>` and `TryFrom<String>` trait implementation.
|
||||
/// To inspect the parsed type, match on the resulting `enum` and use the
|
||||
/// methods of the internal structure.
|
||||
///
|
||||
/// [RFC 7230]: https://tools.ietf.org/html/rfc7230
|
||||
///
|
||||
|
|
Loading…
Reference in New Issue