diff --git a/lib/Cargo.toml b/lib/Cargo.toml index 95360614..39489ef7 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -28,6 +28,9 @@ state = "0.2" time = "0.1" memchr = "1" base64 = "0.4" +smallvec = "0.3" +pear = "0.0.2" +pear_codegen = "0.0.2" [dependencies.cookie] version = "0.7.2" diff --git a/lib/src/config/mod.rs b/lib/src/config/mod.rs index 6849f33e..d7caf835 100644 --- a/lib/src/config/mod.rs +++ b/lib/src/config/mod.rs @@ -498,6 +498,7 @@ mod test { const TEST_CONFIG_FILENAME: &'static str = "/tmp/testing/Rocket.toml"; + // TODO: It's a shame we have to depend on lazy_static just for this. lazy_static! { static ref ENV_LOCK: Mutex = Mutex::new(0); } diff --git a/lib/src/http/ascii.rs b/lib/src/http/ascii.rs index 64509dc4..8f71f8c2 100644 --- a/lib/src/http/ascii.rs +++ b/lib/src/http/ascii.rs @@ -90,6 +90,13 @@ impl Ord for UncasedAsciiRef { } } +impl fmt::Display for UncasedAsciiRef { + #[inline(always)] + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + self.0.fmt(f) + } +} + /// An uncased (case-preserving) ASCII string. #[derive(Clone, Debug)] pub struct UncasedAscii<'s> { diff --git a/lib/src/http/known_media_types.rs b/lib/src/http/known_media_types.rs new file mode 100644 index 00000000..8731e1c0 --- /dev/null +++ b/lib/src/http/known_media_types.rs @@ -0,0 +1,50 @@ +macro_rules! known_media_types { + ($cont:ident) => ($cont! { + Any (is_any): "any Content-Type", "*", "*", + HTML (is_html): "HTML", "text", "html" ; "charset" => "utf-8", + Plain (is_plain): "plaintext", "text", "plain" ; "charset" => "utf-8", + JSON (is_json): "JSON", "application", "json", + Form (is_form): "forms", "application", "x-www-form-urlencoded", + JavaScript (is_javascript): "JavaScript", "application", "javascript", + CSS (is_css): "CSS", "text", "css" ; "charset" => "utf-8", + FormData (is_form_data): "multipart form data", "multipart", "form-data", + XML (is_xml): "XML", "text", "xml" ; "charset" => "utf-8", + CSV (is_csv): "CSV", "text", "csv" ; "charset" => "utf-8", + PNG (is_png): "PNG", "image", "png", + GIF (is_gif): "GIF", "image", "gif", + BMP (is_bmp): "BMP", "image", "bmp", + JPEG (is_jpeg): "JPEG", "image", "jpeg", + WEBP (is_webp): "WEBP", "image", "webp", + SVG (is_svg): "SVG", "image", "svg+xml", + PDF (is_pdf): "PDF", "application", "pdf", + TTF (is_ttf): "TTF", "application", "font-sfnt", + OTF (is_otf): "OTF", "application", "font-sfnt", + WOFF (is_woff): "WOFF", "application", "font-woff", + WOFF2 (is_woff2): "WOFF2", "font", "woff2" + }) +} + +macro_rules! known_extensions { + ($cont:ident) => ($cont! { + "txt" => Plain, + "html" => HTML, + "htm" => HTML, + "xml" => XML, + "csv" => CSV, + "js" => JavaScript, + "css" => CSS, + "json" => JSON, + "png" => PNG, + "gif" => GIF, + "bmp" => BMP, + "jpeg" => JPEG, + "jpg" => JPEG, + "webp" => WEBP, + "svg" => SVG, + "pdf" => PDF, + "ttf" => TTF, + "otf" => OTF, + "woff" => WOFF, + "woff2" => WOFF2 + }) +} diff --git a/lib/src/http/media_type.rs b/lib/src/http/media_type.rs new file mode 100644 index 00000000..38fc9985 --- /dev/null +++ b/lib/src/http/media_type.rs @@ -0,0 +1,375 @@ +use std::borrow::Cow; +use std::str::FromStr; +use std::fmt; +use std::hash::{Hash, Hasher}; + +use http::ascii::{uncased_eq, UncasedAsciiRef}; +use http::parse::{IndexedStr, parse_media_type}; + +use smallvec::SmallVec; + +#[derive(Debug, Clone)] +struct MediaParam { + key: IndexedStr, + value: IndexedStr, +} + +// FIXME: `Static` is needed for `const` items. Need `const SmallVec::new`. +#[derive(Debug, Clone)] +pub enum MediaParams { + Empty, + Static(&'static [(IndexedStr, IndexedStr)]), + Dynamic(SmallVec<[(IndexedStr, IndexedStr); 2]>) +} + +// TODO: impl PartialEq, Hash for `MediaType`. +#[derive(Debug, Clone)] +pub struct MediaType { + /// Storage for the entire media type string. This will be `Some` when the + /// media type was parsed from a string and `None` when it was created + /// manually. + #[doc(hidden)] + pub source: Option>, + /// The top-level type. + #[doc(hidden)] + pub top: IndexedStr, + /// The subtype. + #[doc(hidden)] + pub sub: IndexedStr, + /// The parameters, if any. + #[doc(hidden)] + pub params: MediaParams +} + +macro_rules! media_str { + ($string:expr) => (IndexedStr::Concrete(Cow::Borrowed($string))) +} + +macro_rules! media_types { + ($($name:ident ($check:ident): $str:expr, $t:expr, + $s:expr $(; $k:expr => $v:expr)*),+) => { + $( + #[doc="[MediaType](struct.MediaType.html) for "] + #[doc=$str] + #[doc=": "] #[doc=$t] #[doc="/"] #[doc=$s] #[doc=""] + #[allow(non_upper_case_globals)] + pub const $name: MediaType = MediaType { + source: None, + top: media_str!($t), + sub: media_str!($s), + params: MediaParams::Static(&[$((media_str!($k), media_str!($v))),*]) + }; + + #[inline(always)] + pub fn $check(&self) -> bool { + *self == MediaType::$name + } + )+ + + /// Returns `true` if this MediaType is known to Rocket, that is, + /// there is an associated constant for `self`. + pub fn is_known(&self) -> bool { + $(if self.$check() { return true })+ + false + } + }; +} + +macro_rules! from_extension { + ($($ext:expr => $name:ident),*) => ( + pub fn from_extension(ext: &str) -> Option { + match ext { + $(x if uncased_eq(x, $ext) => Some(MediaType::$name)),*, + _ => None + } + } + ) +} + +impl MediaType { + #[inline] + pub fn new(top: T, sub: S) -> MediaType + where T: Into>, S: Into> + { + MediaType { + source: None, + top: IndexedStr::Concrete(top.into()), + sub: IndexedStr::Concrete(sub.into()), + params: MediaParams::Empty, + } + } + + #[inline] + pub fn with_params(top: T, sub: S, ps: P) -> MediaType + where T: Into>, S: Into>, + K: Into>, V: Into>, + P: IntoIterator + { + let mut params = SmallVec::new(); + for (key, val) in ps { + params.push(( + IndexedStr::Concrete(key.into()), + IndexedStr::Concrete(val.into()) + )) + } + + MediaType { + source: None, + top: IndexedStr::Concrete(top.into()), + sub: IndexedStr::Concrete(sub.into()), + params: MediaParams::Dynamic(params) + } + } + + known_extensions!(from_extension); + + #[inline] + pub fn top(&self) -> &UncasedAsciiRef { + self.top.to_str(self.source.as_ref()).into() + } + + #[inline] + pub fn sub(&self) -> &UncasedAsciiRef { + self.sub.to_str(self.source.as_ref()).into() + } + + #[inline] + pub fn params<'a>(&'a self) -> impl Iterator + 'a { + let param_slice = match self.params { + MediaParams::Static(slice) => slice, + MediaParams::Dynamic(ref vec) => &vec[..], + MediaParams::Empty => &[] + }; + + param_slice.iter() + .map(move |&(ref key, ref val)| { + let source_str = self.source.as_ref(); + (key.to_str(source_str), val.to_str(source_str)) + }) + } + + #[inline(always)] + pub fn into_owned(self) -> MediaType { + MediaType { + source: self.source.map(|c| c.into_owned().into()), + top: self.top, + sub: self.sub, + params: self.params + } + } + + known_media_types!(media_types); +} + +impl FromStr for MediaType { + // Ideally we'd return a `ParseError`, but that required a lifetime. + type Err = String; + + #[inline] + fn from_str(raw: &str) -> Result { + parse_media_type(raw) + .map(|mt| mt.into_owned()) + .map_err(|e| e.to_string()) + } +} + +impl PartialEq for MediaType { + fn eq(&self, other: &MediaType) -> bool { + self.top() == other.top() && self.sub() == other.sub() + } +} + +impl Hash for MediaType { + fn hash(&self, state: &mut H) { + self.top().hash(state); + self.sub().hash(state); + + for (key, val) in self.params() { + key.hash(state); + val.hash(state); + } + } +} + +impl fmt::Display for MediaType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}/{}", self.top(), self.sub())?; + for (key, val) in self.params() { + write!(f, "; {}={}", key, val)?; + } + + Ok(()) + } +} + +#[cfg(test)] +mod test { + use std::str::FromStr; + use super::MediaType; + + macro_rules! assert_no_parse { + ($string:expr) => ({ + let result = MediaType::from_str($string); + if result.is_ok() { + panic!("{:?} parsed unexpectedly.", $string) + } + }); + } + + macro_rules! assert_parse { + ($string:expr) => ({ + let result = MediaType::from_str($string); + match result { + Ok(media_type) => media_type, + Err(e) => panic!("{:?} failed to parse: {}", $string, e) + } + }); + } + + macro_rules! assert_parse_eq { + (@full $string:expr, $result:expr, $(($k:expr, $v:expr)),*) => ({ + let result = assert_parse!($string); + assert_eq!(result, $result); + + let result = assert_parse!($string); + assert_eq!(result, $result); + + let expected_params: Vec<(&str, &str)> = vec![$(($k, $v)),*]; + if expected_params.len() > 0 { + assert_eq!(result.params().count(), expected_params.len()); + let all_params = result.params().zip(expected_params.iter()); + for ((key, val), &(ekey, eval)) in all_params { + assert_eq!(key, ekey); + assert_eq!(val, eval); + } + } + }); + + (from: $string:expr, into: $result:expr) + => (assert_parse_eq!(@full $string, $result, )); + (from: $string:expr, into: $result:expr, params: $(($key:expr, $val:expr)),*) + => (assert_parse_eq!(@full $string, $result, $(($key, $val)),*)); + } + + #[test] + fn check_does_parse() { + assert_parse!("text/html"); + assert_parse!("a/b"); + assert_parse!("*/*"); + } + + #[test] + fn check_parse_eq() { + assert_parse_eq!(from: "text/html", into: MediaType::HTML); + assert_parse_eq!(from: "text/html; charset=utf-8", into: MediaType::HTML); + assert_parse_eq!(from: "text/html", into: MediaType::new("text", "html")); + + assert_parse_eq!(from: "a/b", into: MediaType::new("a", "b")); + assert_parse_eq!(from: "*/*", into: MediaType::Any); + assert_parse_eq!(from: "application/pdf", into: MediaType::PDF); + assert_parse_eq!(from: "application/json", into: MediaType::JSON); + assert_parse_eq!(from: "image/svg+xml", into: MediaType::SVG); + + assert_parse_eq!(from: "*/json", into: MediaType::new("*", "json")); + assert_parse_eq! { + from: "application/*; param=1", + into: MediaType::new("application", "*") + }; + } + + #[test] + fn check_param_eq() { + assert_parse_eq! { + from: "text/html; a=b; b=c; c=d", + into: MediaType::new("text", "html"), + params: ("a", "b"), ("b", "c"), ("c", "d") + }; + + assert_parse_eq! { + from: "text/html;a=b;b=c; c=d; d=e", + into: MediaType::new("text", "html"), + params: ("a", "b"), ("b", "c"), ("c", "d"), ("d", "e") + }; + + assert_parse_eq! { + from: "text/html; charset=utf-8", + into: MediaType::new("text", "html"), + params: ("charset", "utf-8") + }; + + assert_parse_eq! { + from: "application/*; param=1", + into: MediaType::new("application", "*"), + params: ("param", "1") + }; + + assert_parse_eq! { + from: "*/*;q=0.5;b=c;c=d", + into: MediaType::Any, + params: ("q", "0.5"), ("b", "c"), ("c", "d") + }; + + assert_parse_eq! { + from: "multipart/form-data; boundary=----WebKitFormBoundarypRshfItmvaC3aEuq", + into: MediaType::FormData, + params: ("boundary", "----WebKitFormBoundarypRshfItmvaC3aEuq") + }; + + assert_parse_eq! { + from: r#"*/*; a="hello, world!@#$%^&*();;hi""#, + into: MediaType::Any, + params: ("a", "hello, world!@#$%^&*();;hi") + }; + + assert_parse_eq! { + from: r#"application/json; a=";,;""#, + into: MediaType::JSON, + params: ("a", ";,;") + }; + + assert_parse_eq! { + from: r#"application/json; a=";,;"; b=c"#, + into: MediaType::JSON, + params: ("a", ";,;"), ("b", "c") + }; + + assert_parse_eq! { + from: r#"application/json; b=c; a=";.,.;""#, + into: MediaType::JSON, + params: ("b", "c"), ("a", ";.,.;") + }; + + assert_parse_eq! { + from: r#"*/*; a="a"; b="b"; a=a; b=b; c=c"#, + into: MediaType::Any, + params: ("a", "a"), ("b", "b"), ("a", "a"), ("b", "b"), ("c", "c") + }; + } + + #[test] + fn check_params_do_parse() { + assert_parse!("*/*; q=1; q=2"); + assert_parse!("*/*; q=1;q=2;q=3;a=v;c=1;da=1;sdlkldsadasd=uhisdcb89"); + assert_parse!("*/*; q=1; q=2"); + assert_parse!("*/*; q=1; q=2; a=b;c=d; e=f; a=s;a=e"); + assert_parse!("*/*; q=1; q=2 ; a=b"); + assert_parse!("*/*; q=1; q=2; hello=\"world !\""); + } + + #[test] + fn test_bad_parses() { + assert_no_parse!("application//json"); + assert_no_parse!("application///json"); + assert_no_parse!("a/b;"); + assert_no_parse!("*/*; a=b;;"); + assert_no_parse!("*/*; a=b;a"); + assert_no_parse!("*/*; a=b; "); + assert_no_parse!("*/*; a=b;"); + assert_no_parse!("*/*; a = b"); + assert_no_parse!("*/*; a= b"); + assert_no_parse!("*/*; a =b"); + assert_no_parse!(r#"*/*; a="b"#); + assert_no_parse!(r#"*/*; a="b; c=d"#); + assert_no_parse!(r#"*/*; a="b; c=d"#); + } +} diff --git a/lib/src/http/mod.rs b/lib/src/http/mod.rs index c015e328..ee11c776 100644 --- a/lib/src/http/mod.rs +++ b/lib/src/http/mod.rs @@ -8,12 +8,16 @@ pub mod hyper; pub mod uri; +#[macro_use] +mod known_media_types; mod cookies; mod session; mod method; +mod media_type; mod content_type; mod status; mod header; +mod parse; // We need to export this for codegen, but otherwise it's unnecessary. // TODO: Expose a `const fn` from ContentType when possible. (see RFC#1817) @@ -24,5 +28,6 @@ pub use self::content_type::ContentType; pub use self::status::{Status, StatusClass}; pub use self::header::{Header, HeaderMap}; +pub use self::media_type::*; pub use self::cookies::*; pub use self::session::*; diff --git a/lib/src/http/parse/checkers.rs b/lib/src/http/parse/checkers.rs new file mode 100644 index 00000000..26585529 --- /dev/null +++ b/lib/src/http/parse/checkers.rs @@ -0,0 +1,13 @@ +#[inline(always)] +pub fn is_whitespace(byte: char) -> bool { + byte == ' ' || byte == '\t' +} + +#[inline] +pub fn is_valid_token(c: char) -> bool { + match c { + '0'...'9' | 'A'...'Z' | '^'...'~' | '#'...'\'' + | '!' | '*' | '+' | '-' | '.' => true, + _ => false + } +} diff --git a/lib/src/http/parse/indexed_str.rs b/lib/src/http/parse/indexed_str.rs new file mode 100644 index 00000000..892675b7 --- /dev/null +++ b/lib/src/http/parse/indexed_str.rs @@ -0,0 +1,54 @@ +use std::borrow::Cow; + +type Index = u32; + +#[derive(Debug, Clone)] +pub enum IndexedStr { + Indexed(Index, Index), + Concrete(Cow<'static, str>) +} + +impl IndexedStr { + /// Whether this string is derived from indexes or not. + pub fn is_indexed(&self) -> bool { + match *self { + IndexedStr::Indexed(..) => true, + IndexedStr::Concrete(..) => false, + } + } + + /// Retrieves the string `self` corresponds to. If `self` is derived from + /// indexes, the corresponding subslice of `string` is returned. Otherwise, + /// the concrete string is returned. + /// + /// # Panics + /// + /// Panics if `self` is an indexed string and `string` is None. + pub fn to_str<'a>(&'a self, string: Option<&'a Cow>) -> &'a str { + if self.is_indexed() && string.is_none() { + panic!("Cannot convert indexed str to str without base string!") + } + + match *self { + IndexedStr::Indexed(i, j) => &string.unwrap()[(i as usize)..(j as usize)], + IndexedStr::Concrete(ref mstr) => &*mstr, + } + } + + pub fn from(needle: &str, haystack: &str) -> Option { + let haystack_start = haystack.as_ptr() as usize; + let needle_start = needle.as_ptr() as usize; + + if needle_start < haystack_start { + return None; + } + + if (needle_start + needle.len()) > (haystack_start + haystack.len()) { + return None; + } + + let start = needle_start - haystack_start; + let end = start + needle.len(); + Some(IndexedStr::Indexed(start as Index, end as Index)) + } +} diff --git a/lib/src/http/parse/media_type.rs b/lib/src/http/parse/media_type.rs new file mode 100644 index 00000000..6f300d36 --- /dev/null +++ b/lib/src/http/parse/media_type.rs @@ -0,0 +1,67 @@ +use std::borrow::Cow; + +use pear::ParseResult; +use pear::parsers::*; +use pear::combinators::*; +use smallvec::SmallVec; + +use http::{MediaType, MediaParams}; +use http::parse::checkers::{is_whitespace, is_valid_token}; +use http::parse::IndexedStr; + +#[parser] +fn quoted_string<'a>(input: &mut &'a str) -> ParseResult<&'a str, &'a str> { + eat('"'); + + let mut is_escaped = false; + let inner = take_while(|c| { + if is_escaped { is_escaped = false; return true; } + if c == '\\' { is_escaped = true; return true; } + c != '"' + }); + + eat('"'); + inner +} + +macro_rules! switch_repeat { + ($input:expr, $($cases:tt)*) => (repeat!($input, switch!($($cases)*))) +} + +#[parser] +fn media_type<'a>(input: &mut &'a str, + source: &'a str) -> ParseResult<&'a str, MediaType> { + let top = take_some_while(|c| is_valid_token(c) && c != '/'); + eat('/'); + let sub = take_some_while(is_valid_token); + + let mut params = SmallVec::new(); + switch_repeat! { + surrounded(|i| eat(i, ';'), is_whitespace) => { + skip_while(is_whitespace); + let key = take_some_while(|c| is_valid_token(c) && c != '='); + eat('='); + + let value = switch! { + peek('"') => quoted_string(), + _ => take_some_while(|c| is_valid_token(c) && c != ';') + }; + + let indexed_key = IndexedStr::from(key, source).expect("key"); + let indexed_val = IndexedStr::from(value, source).expect("val"); + params.push((indexed_key, indexed_val)) + }, + _ => break + } + + MediaType { + source: Some(Cow::Owned(source.to_string())), + top: IndexedStr::from(top, source).expect("top in source"), + sub: IndexedStr::from(sub, source).expect("sub in source"), + params: MediaParams::Dynamic(params) + } +} + +pub fn parse_media_type(mut input: &str) -> ParseResult<&str, MediaType> { + parse!(&mut input, (media_type(input), eof()).0) +} diff --git a/lib/src/http/parse/mod.rs b/lib/src/http/parse/mod.rs new file mode 100644 index 00000000..f1913347 --- /dev/null +++ b/lib/src/http/parse/mod.rs @@ -0,0 +1,6 @@ +mod media_type; +mod indexed_str; +mod checkers; + +pub use self::indexed_str::*; +pub use self::media_type::*; diff --git a/lib/src/lib.rs b/lib/src/lib.rs index 77240bca..6ed3917a 100644 --- a/lib/src/lib.rs +++ b/lib/src/lib.rs @@ -6,6 +6,9 @@ #![feature(type_ascription)] #![feature(pub_restricted)] #![feature(lookup_host)] +#![feature(plugin)] + +#![plugin(pear_codegen)] //! # Rocket - Core API Documentation //! @@ -95,6 +98,7 @@ //! #[macro_use] extern crate log; +#[macro_use] extern crate pear; extern crate term_painter; extern crate hyper; extern crate url; @@ -105,6 +109,7 @@ extern crate cookie; extern crate time; extern crate memchr; extern crate base64; +extern crate smallvec; #[cfg(test)] #[macro_use] extern crate lazy_static;