use std::cell::UnsafeCell; use multer::Multipart; use parking_lot::{RawMutex, lock_api::RawMutex as _}; use either::Either; use crate::request::{Request, local_cache}; use crate::data::{Data, Limits, Outcome}; use crate::form::prelude::*; use crate::http::RawStr; type Result<'r, T> = std::result::Result>; type Field<'r, 'i> = Either, DataField<'r, 'i>>; pub struct Buffer { strings: UnsafeCell>, mutex: RawMutex, } pub struct MultipartParser<'r, 'i> { request: &'r Request<'i>, buffer: &'r Buffer, source: Multipart, done: bool, } pub struct RawStrParser<'r> { buffer: &'r Buffer, source: &'r RawStr, } pub enum Parser<'r, 'i> { Multipart(MultipartParser<'r, 'i>), RawStr(RawStrParser<'r>), } impl<'r, 'i> Parser<'r, 'i> { pub async fn new(req: &'r Request<'i>, data: Data) -> Outcome, Errors<'r>> { let parser = match req.content_type() { Some(c) if c.is_form() => Self::from_form(req, data).await, Some(c) if c.is_form_data() => Self::from_multipart(req, data).await, _ => return Outcome::Forward(data), }; match parser { Ok(storage) => Outcome::Success(storage), Err(e) => Outcome::Failure((e.status(), e.into())) } } async fn from_form(req: &'r Request<'i>, data: Data) -> Result<'r, Parser<'r, 'i>> { let limit = req.limits().get("form").unwrap_or(Limits::FORM); let string = data.open(limit).into_string().await?; if !string.is_complete() { Err((None, Some(limit.as_u64())))? } Ok(Parser::RawStr(RawStrParser { buffer: local_cache!(req, Buffer::new()), source: RawStr::new(local_cache!(req, string.into_inner())), })) } async fn from_multipart(req: &'r Request<'i>, data: Data) -> Result<'r, Parser<'r, 'i>> { let boundary = req.content_type() .ok_or(multer::Error::NoMultipart)? .param("boundary") .ok_or(multer::Error::NoBoundary)?; let form_limit = req.limits() .get("data-form") .unwrap_or(Limits::DATA_FORM); Ok(Parser::Multipart(MultipartParser { request: req, buffer: local_cache!(req, Buffer::new()), source: Multipart::with_reader(data.open(form_limit), boundary), done: false, })) } pub async fn next(&mut self) -> Option>> { match self { Parser::Multipart(ref mut p) => p.next().await, Parser::RawStr(ref mut p) => p.next().map(|f| Ok(Either::Left(f))) } } } impl<'r> RawStrParser<'r> { pub fn new(buffer: &'r Buffer, source: &'r RawStr) -> Self { RawStrParser { buffer, source } } } impl<'r> Iterator for RawStrParser<'r> { type Item = ValueField<'r>; fn next(&mut self) -> Option { use std::borrow::Cow::*; let (name, value) = loop { if self.source.is_empty() { return None; } let (field_str, rest) = self.source.split_at_byte(b'&'); self.source = rest; if !field_str.is_empty() { break field_str.split_at_byte(b'='); } }; let name_val = match (name.url_decode_lossy(), value.url_decode_lossy()) { (Borrowed(name), Borrowed(val)) => (name, val), (Borrowed(name), Owned(v)) => (name, self.buffer.push_one(v)), (Owned(name), Borrowed(val)) => (self.buffer.push_one(name), val), (Owned(mut name), Owned(val)) => { let len = name.len(); name.push_str(&val); self.buffer.push_split(name, len) } }; Some(ValueField::from(name_val)) } } #[cfg(test)] mod raw_str_parse_tests { use crate::form::ValueField as Field; #[test] fn test_skips_empty() { let buffer = super::Buffer::new(); let fields: Vec<_> = super::RawStrParser::new(&buffer, "a&b=c&&&c".into()).collect(); assert_eq!(fields, &[Field::parse("a"), Field::parse("b=c"), Field::parse("c")]); } #[test] fn test_decodes() { let buffer = super::Buffer::new(); let fields: Vec<_> = super::RawStrParser::new(&buffer, "a+b=c%20d&%26".into()).collect(); assert_eq!(fields, &[Field::parse("a b=c d"), Field::parse("&")]); } } impl<'r, 'i> MultipartParser<'r, 'i> { async fn next(&mut self) -> Option>> { if self.done { return None; } let field = match self.source.next_field().await { Ok(Some(field)) => field, Ok(None) => return None, Err(e) => { self.done = true; return Some(Err(e.into())); } }; // A field with a content-type is data; one without is "value". trace_!("multipart field: {:?}", field.name()); let content_type = field.content_type().and_then(|m| m.as_ref().parse().ok()); let field = if let Some(content_type) = content_type { let (name, file_name) = match (field.name(), field.file_name()) { (None, None) => ("", None), (None, Some(file_name)) => ("", Some(self.buffer.push_one(file_name))), (Some(name), None) => (self.buffer.push_one(name), None), (Some(a), Some(b)) => { let (field_name, file_name) = self.buffer.push_two(a, b); (field_name, Some(file_name)) } }; Either::Right(DataField { content_type, request: self.request, name: NameView::new(name), file_name: file_name.and_then(sanitize), data: Data::from(field), }) } else { let (mut buf, len) = match field.name() { Some(s) => (s.to_string(), s.len()), None => (String::new(), 0) }; match field.text().await { Ok(text) => buf.push_str(&text), Err(e) => return Some(Err(e.into())), }; let name_val = self.buffer.push_split(buf, len); Either::Left(ValueField::from(name_val)) }; Some(Ok(field)) } } fn sanitize(file_name: &str) -> Option<&str> { let file_name = std::path::Path::new(file_name) .file_name() .and_then(|n| n.to_str()) .map(|n| n.find('.').map(|i| n.split_at(i).0).unwrap_or(n))?; if file_name.is_empty() || file_name.starts_with(|c| c == '.' || c == '*') || file_name.ends_with(|c| c == ':' || c == '>' || c == '<') || file_name.contains(|c| c == '/' || c == '\\') { return None } Some(file_name) } impl Buffer { pub fn new() -> Self { Buffer { strings: UnsafeCell::new(vec![]), mutex: RawMutex::INIT, } } pub fn push_one<'a, S: Into>(&'a self, string: S) -> &'a str { // SAFETY: // * Aliasing: We retrieve a mutable reference to the last slot (via // `push()`) and then return said reference as immutable; these // occur in serial, so they don't alias. This method accesses a // unique slot each call: the last slot, subsequently replaced by // `push()` each next call. No other method accesses the internal // buffer directly. Thus, the outstanding reference to the last slot // is never accessed again mutably, preserving aliasing guarantees. // * Liveness: The returned reference is to a `String`; we must ensure // that the `String` is never dropped while `self` lives. This is // guaranteed by returning a reference with the same lifetime as // `self`, so `self` can't be dropped while the string is live, and // by never removing elements from the internal `Vec` thus not // dropping `String` itself: `push()` is the only mutating operation // called on `Vec`, which preserves all previous elements; the // stability of `String` itself means that the returned address // remains valid even after internal realloc of `Vec`. // * Thread-Safety: Parallel calls to `push_one` without exclusion // would result in a race to `vec.push()`; `RawMutex` ensures that // this doesn't occur. unsafe { self.mutex.lock(); let vec: &mut Vec = &mut *self.strings.get(); vec.push(string.into()); let last = vec.last().expect("push() => non-empty"); self.mutex.unlock(); last } } pub fn push_split(&self, string: String, len: usize) -> (&str, &str) { let buffered = self.push_one(string); let a = &buffered[..len]; let b = &buffered[len..]; (a, b) } pub fn push_two<'a>(&'a self, a: &str, b: &str) -> (&'a str, &'a str) { let mut buffer = String::new(); buffer.push_str(a); buffer.push_str(b); self.push_split(buffer, a.len()) } } unsafe impl Sync for Buffer {}