diff --git a/core/http/src/parse/uri/mod.rs b/core/http/src/parse/uri/mod.rs index 01a3cc1b..a28e7203 100644 --- a/core/http/src/parse/uri/mod.rs +++ b/core/http/src/parse/uri/mod.rs @@ -1,6 +1,6 @@ mod parser; mod error; -mod tables; +pub(crate) mod tables; #[cfg(test)] mod tests; @@ -8,7 +8,6 @@ use crate::uri::{Uri, Origin, Absolute, Authority}; use self::parser::{uri, origin, authority_only, absolute_only, rocket_route_origin}; -pub use self::tables::{is_pchar, PATH_SET}; pub use self::error::Error; type RawInput<'a> = pear::input::Pear>; diff --git a/core/http/src/parse/uri/tables.rs b/core/http/src/parse/uri/tables.rs index 3b339853..0808130b 100644 --- a/core/http/src/parse/uri/tables.rs +++ b/core/http/src/parse/uri/tables.rs @@ -1,40 +1,4 @@ -use percent_encoding::AsciiSet; - -// Generates an AsciiSet from a full character table: -// -// table_to_ascci_set!( ExistingAsciiSet, 0, [ 0, 1, b'x', ... ] ); -// -// A 0 or 1 token in the ith position indicates that the ASCII character i -// should be included in the set. Any other literal in the ith position -// indicates that the ASCII character with value i should not be included in the -// set. -// -// The table's last index must be 127 or earlier. All values in the original -// set, up to the end of the passed-in table, are overwritten. -macro_rules! table_to_ascii_set { - ($base:expr, $i:expr, [ 0, $($rest:tt,)* ]) => { table_to_ascii_set!($base.add($i), $i+1, [ $($rest,)* ]); }; - ($base:expr, $i:expr, [ 1, $($rest:tt,)* ]) => { table_to_ascii_set!($base.add($i), $i+1, [ $($rest,)* ]); }; - ($base:expr, $i:expr, [ $ch:literal, $($rest:tt,)* ]) => { table_to_ascii_set!($base.remove($i), $i+1, [ $($rest,)* ]); }; - ($base:expr, $i:expr, [ ]) => { $base }; -} - -// Generates an AsciiSet accompanying a character table. This is -// used to keep `PATH_CHARS` in sync with `PATH_SET`. -// -// The first block is limited to 128 entries, since it is passed -// to table_to_ascii_set! -macro_rules! table_and_asciiset { - ( - const $name:ident: [u8; $size:expr] = [ $($block1:tt)* ] [ $($block2:tt)* ]; - pub const $setname:ident: AsciiSet; - ) => { - const $name: [u8; $size] = [ $($block1)* $($block2)* ] ; - pub const $setname: AsciiSet = table_to_ascii_set!(percent_encoding::CONTROLS, 0, [ $($block1)* ]); - }; -} - -table_and_asciiset! { -const PATH_CHARS: [u8; 256] = [ +pub(crate) const PATH_CHARS: [u8; 256] = [ // 0 1 2 3 4 5 6 7 8 9 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // x 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x @@ -42,14 +6,13 @@ const PATH_CHARS: [u8; 256] = [ 0, 0, 0, b'!', 0, 0, b'$', b'%', b'&', b'\'', // 3x b'(', b')', b'*', b'+', b',', b'-', b'.', b'/', b'0', b'1', // 4x b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', b':', b';', // 5x - // < > (1 used to indicate these are valid in route URIs only) - 1, b'=', 1, 0, b'@', b'A', b'B', b'C', b'D', b'E', // 6x + 0, b'=', 0, 0, b'@', b'A', b'B', b'C', b'D', b'E', // 6x b'F', b'G', b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O', // 7x b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W', b'X', b'Y', // 8x b'Z', 0, 0, 0, 0, b'_', 0, b'a', b'b', b'c', // 9x b'd', b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', // 10x b'n', b'o', b'p', b'q', b'r', b's', b't', b'u', b'v', b'w', // 11x - b'x', b'y', b'z', 0, 0, 0, b'~', 0, ] [ 0, 0, // 12x + b'x', b'y', b'z', 0, 0, 0, b'~', 0, 0, 0, // 12x 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 13x 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 14x 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 15x @@ -64,17 +27,50 @@ const PATH_CHARS: [u8; 256] = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 24x 0, 0, 0, 0, 0, 0, // 25x ]; -pub const PATH_SET: AsciiSet; -} #[inline(always)] pub fn is_pchar(&c: &u8) -> bool { - PATH_CHARS[c as usize] == c + PATH_CHARS[c as usize] != 0 +} + +pub(crate) const ROUTE_CHARS: [u8; 256] = [ + // 0 1 2 3 4 5 6 7 8 9 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 3x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 4x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 5x + b'<', 0, b'>', 0, 0, 0, 0, 0, 0, 0, // 6x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 7x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 10x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 11x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 12x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 13x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 14x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 15x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 17x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 18x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 19x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 20x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 21x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 22x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 23x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 24x + 0, 0, 0, 0, 0, 0, // 25x +]; + +#[inline(always)] +pub fn is_rchar(&c: &u8) -> bool { + ROUTE_CHARS[c as usize] != 0 } #[inline(always)] -pub fn is_pchar_or_rchar(&c: &u8) -> bool { - PATH_CHARS[c as usize] != 0 +pub fn is_pchar_or_rchar(c: &u8) -> bool { + is_pchar(c) || is_rchar(c) } const REG_CHARS: [u8; 256] = [ diff --git a/core/http/src/uri/encoding.rs b/core/http/src/uri/encoding.rs index 268fbfcd..1c4d9f22 100644 --- a/core/http/src/uri/encoding.rs +++ b/core/http/src/uri/encoding.rs @@ -4,7 +4,7 @@ use std::borrow::Cow; use percent_encoding::{AsciiSet, utf8_percent_encode}; use crate::uri::{UriPart, Path, Query}; -use crate::parse::uri::PATH_SET; +use crate::parse::uri::tables::PATH_CHARS; #[derive(Clone, Copy)] #[allow(non_camel_case_types)] @@ -13,6 +13,24 @@ pub trait EncodeSet { const SET: AsciiSet; } +const fn set_from_table(table: &'static [u8; 256]) -> AsciiSet { + const ASCII_RANGE_LEN: u8 = 0x80; + + let mut set = percent_encoding::CONTROLS.add(0); + let mut i: u8 = 0; + while i < ASCII_RANGE_LEN { + if table[i as usize] == 0 { + set = set.add(i); + } + + i += 1; + } + + set +} + +const PATH_SET: AsciiSet = set_from_table(&PATH_CHARS); + impl Default for UNSAFE_ENCODE_SET

{ #[inline(always)] fn default() -> Self { UNSAFE_ENCODE_SET(PhantomData) } diff --git a/core/http/src/uri/origin.rs b/core/http/src/uri/origin.rs index 4f54a275..915d05e6 100644 --- a/core/http/src/uri/origin.rs +++ b/core/http/src/uri/origin.rs @@ -343,7 +343,9 @@ impl<'a> Origin<'a> { #[inline] pub fn map_path String>(&self, f: F) -> Option { let path = f(self.path()); - if !path.starts_with('/') || !path.bytes().all(|b| crate::parse::uri::is_pchar(&b)) { + if !path.starts_with('/') + || !path.bytes().all(|b| crate::parse::uri::tables::is_pchar(&b)) + { return None; }