Generate path encoding set using 'const fn'.

Co-authored-by: Jakub Wieczorek <jakub.adam.wieczorek@gmail.com>
This commit is contained in:
Sergio Benitez 2020-11-01 20:57:10 -08:00
parent 337e8843a4
commit 949bb01e2d
4 changed files with 64 additions and 49 deletions

View File

@ -1,6 +1,6 @@
mod parser; mod parser;
mod error; mod error;
mod tables; pub(crate) mod tables;
#[cfg(test)] mod tests; #[cfg(test)] mod tests;
@ -8,7 +8,6 @@ use crate::uri::{Uri, Origin, Absolute, Authority};
use self::parser::{uri, origin, authority_only, absolute_only, rocket_route_origin}; use self::parser::{uri, origin, authority_only, absolute_only, rocket_route_origin};
pub use self::tables::{is_pchar, PATH_SET};
pub use self::error::Error; pub use self::error::Error;
type RawInput<'a> = pear::input::Pear<pear::input::Cursor<&'a [u8]>>; type RawInput<'a> = pear::input::Pear<pear::input::Cursor<&'a [u8]>>;

View File

@ -1,40 +1,4 @@
use percent_encoding::AsciiSet; pub(crate) const PATH_CHARS: [u8; 256] = [
// Generates an AsciiSet from a full character table:
//
// table_to_ascci_set!( ExistingAsciiSet, 0, [ 0, 1, b'x', ... ] );
//
// A 0 or 1 token in the ith position indicates that the ASCII character i
// should be included in the set. Any other literal in the ith position
// indicates that the ASCII character with value i should not be included in the
// set.
//
// The table's last index must be 127 or earlier. All values in the original
// set, up to the end of the passed-in table, are overwritten.
macro_rules! table_to_ascii_set {
($base:expr, $i:expr, [ 0, $($rest:tt,)* ]) => { table_to_ascii_set!($base.add($i), $i+1, [ $($rest,)* ]); };
($base:expr, $i:expr, [ 1, $($rest:tt,)* ]) => { table_to_ascii_set!($base.add($i), $i+1, [ $($rest,)* ]); };
($base:expr, $i:expr, [ $ch:literal, $($rest:tt,)* ]) => { table_to_ascii_set!($base.remove($i), $i+1, [ $($rest,)* ]); };
($base:expr, $i:expr, [ ]) => { $base };
}
// Generates an AsciiSet accompanying a character table. This is
// used to keep `PATH_CHARS` in sync with `PATH_SET`.
//
// The first block is limited to 128 entries, since it is passed
// to table_to_ascii_set!
macro_rules! table_and_asciiset {
(
const $name:ident: [u8; $size:expr] = [ $($block1:tt)* ] [ $($block2:tt)* ];
pub const $setname:ident: AsciiSet;
) => {
const $name: [u8; $size] = [ $($block1)* $($block2)* ] ;
pub const $setname: AsciiSet = table_to_ascii_set!(percent_encoding::CONTROLS, 0, [ $($block1)* ]);
};
}
table_and_asciiset! {
const PATH_CHARS: [u8; 256] = [
// 0 1 2 3 4 5 6 7 8 9 // 0 1 2 3 4 5 6 7 8 9
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // x 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@ -42,14 +6,13 @@ const PATH_CHARS: [u8; 256] = [
0, 0, 0, b'!', 0, 0, b'$', b'%', b'&', b'\'', // 3x 0, 0, 0, b'!', 0, 0, b'$', b'%', b'&', b'\'', // 3x
b'(', b')', b'*', b'+', b',', b'-', b'.', b'/', b'0', b'1', // 4x b'(', b')', b'*', b'+', b',', b'-', b'.', b'/', b'0', b'1', // 4x
b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', b':', b';', // 5x b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', b':', b';', // 5x
// < > (1 used to indicate these are valid in route URIs only) 0, b'=', 0, 0, b'@', b'A', b'B', b'C', b'D', b'E', // 6x
1, b'=', 1, 0, b'@', b'A', b'B', b'C', b'D', b'E', // 6x
b'F', b'G', b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O', // 7x b'F', b'G', b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O', // 7x
b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W', b'X', b'Y', // 8x b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W', b'X', b'Y', // 8x
b'Z', 0, 0, 0, 0, b'_', 0, b'a', b'b', b'c', // 9x b'Z', 0, 0, 0, 0, b'_', 0, b'a', b'b', b'c', // 9x
b'd', b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', // 10x b'd', b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', // 10x
b'n', b'o', b'p', b'q', b'r', b's', b't', b'u', b'v', b'w', // 11x b'n', b'o', b'p', b'q', b'r', b's', b't', b'u', b'v', b'w', // 11x
b'x', b'y', b'z', 0, 0, 0, b'~', 0, ] [ 0, 0, // 12x b'x', b'y', b'z', 0, 0, 0, b'~', 0, 0, 0, // 12x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 13x 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 13x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 14x 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 14x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 15x 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 15x
@ -64,17 +27,50 @@ const PATH_CHARS: [u8; 256] = [
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 24x 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 24x
0, 0, 0, 0, 0, 0, // 25x 0, 0, 0, 0, 0, 0, // 25x
]; ];
pub const PATH_SET: AsciiSet;
}
#[inline(always)] #[inline(always)]
pub fn is_pchar(&c: &u8) -> bool { pub fn is_pchar(&c: &u8) -> bool {
PATH_CHARS[c as usize] == c PATH_CHARS[c as usize] != 0
}
pub(crate) const ROUTE_CHARS: [u8; 256] = [
// 0 1 2 3 4 5 6 7 8 9
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 3x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 4x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 5x
b'<', 0, b'>', 0, 0, 0, 0, 0, 0, 0, // 6x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 7x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 10x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 11x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 12x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 13x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 14x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 15x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 17x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 18x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 19x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 20x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 21x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 22x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 23x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 24x
0, 0, 0, 0, 0, 0, // 25x
];
#[inline(always)]
pub fn is_rchar(&c: &u8) -> bool {
ROUTE_CHARS[c as usize] != 0
} }
#[inline(always)] #[inline(always)]
pub fn is_pchar_or_rchar(&c: &u8) -> bool { pub fn is_pchar_or_rchar(c: &u8) -> bool {
PATH_CHARS[c as usize] != 0 is_pchar(c) || is_rchar(c)
} }
const REG_CHARS: [u8; 256] = [ const REG_CHARS: [u8; 256] = [

View File

@ -4,7 +4,7 @@ use std::borrow::Cow;
use percent_encoding::{AsciiSet, utf8_percent_encode}; use percent_encoding::{AsciiSet, utf8_percent_encode};
use crate::uri::{UriPart, Path, Query}; use crate::uri::{UriPart, Path, Query};
use crate::parse::uri::PATH_SET; use crate::parse::uri::tables::PATH_CHARS;
#[derive(Clone, Copy)] #[derive(Clone, Copy)]
#[allow(non_camel_case_types)] #[allow(non_camel_case_types)]
@ -13,6 +13,24 @@ pub trait EncodeSet {
const SET: AsciiSet; const SET: AsciiSet;
} }
const fn set_from_table(table: &'static [u8; 256]) -> AsciiSet {
const ASCII_RANGE_LEN: u8 = 0x80;
let mut set = percent_encoding::CONTROLS.add(0);
let mut i: u8 = 0;
while i < ASCII_RANGE_LEN {
if table[i as usize] == 0 {
set = set.add(i);
}
i += 1;
}
set
}
const PATH_SET: AsciiSet = set_from_table(&PATH_CHARS);
impl<P: UriPart> Default for UNSAFE_ENCODE_SET<P> { impl<P: UriPart> Default for UNSAFE_ENCODE_SET<P> {
#[inline(always)] #[inline(always)]
fn default() -> Self { UNSAFE_ENCODE_SET(PhantomData) } fn default() -> Self { UNSAFE_ENCODE_SET(PhantomData) }

View File

@ -343,7 +343,9 @@ impl<'a> Origin<'a> {
#[inline] #[inline]
pub fn map_path<F: FnOnce(&str) -> String>(&self, f: F) -> Option<Self> { pub fn map_path<F: FnOnce(&str) -> String>(&self, f: F) -> Option<Self> {
let path = f(self.path()); let path = f(self.path());
if !path.starts_with('/') || !path.bytes().all(|b| crate::parse::uri::is_pchar(&b)) { if !path.starts_with('/')
|| !path.bytes().all(|b| crate::parse::uri::tables::is_pchar(&b))
{
return None; return None;
} }