Improve documentation on unsafe usage.

All uses of `unsafe` are now thoroughly documented with arguments and
informal proofs of correctness as well as conditions that must hold for
these arguments to pass.

This commit also reduces the number of `unsafe` uses by 7, bringing the
count to ~10 non-test uses of `unsafe`.
This commit is contained in:
Sergio Benitez 2018-06-20 14:02:12 +02:00
parent df7111143e
commit 64bbed1422
7 changed files with 186 additions and 68 deletions

View File

@ -24,15 +24,18 @@ pub struct Parser {
impl Parser {
pub fn new(tokens: TokenStream) -> Parser {
let buffer = Box::new(TokenBuffer::new(tokens.into()));
// Our `Parser` is self-referential. We cast a pointer to the heap
// allocation as `&'static` to allow the storage of the reference
// along-side the allocation. This is safe as long as `buffer` is never
// dropped while `self` lives and an instance or reference to `cursor`
// is never allowed to escape. Both of these properties can be
// confirmed with a cursor look over the method signatures of `Parser`.
let cursor = unsafe {
let buffer: &'static TokenBuffer = ::std::mem::transmute(&*buffer);
buffer.begin()
};
Parser {
buffer: buffer,
cursor: cursor,
}
Parser { buffer, cursor }
}
pub fn current_span(&self) -> Span {
@ -46,6 +49,9 @@ impl Parser {
.map_err(|e| {
let expected = match T::description() {
Some(desc) => desc,
// We're just grabbing the type's name here. This is totally
// unnecessary. There's nothing potentially memory-unsafe
// about this. It's simply unsafe because it's an intrinsic.
None => unsafe { ::std::intrinsics::type_name::<T>() }
};

View File

@ -10,7 +10,6 @@ pub type IndexedString = Indexed<'static, str>;
pub trait AsPtr {
fn as_ptr(&self) -> *const u8;
// unsafe fn from_raw<'a>(raw: *const u8, length: usize) -> &T;
}
impl AsPtr for str {
@ -43,7 +42,7 @@ impl<'a, T: ?Sized + ToOwned + 'a, C: Into<Cow<'a, T>>> From<C> for Indexed<'a,
impl<'a, T: ?Sized + ToOwned + 'a> Indexed<'a, T> {
#[inline(always)]
pub unsafe fn coerce<U: ?Sized + ToOwned>(self) -> Indexed<'a, U> {
pub fn coerce<U: ?Sized + ToOwned>(self) -> Indexed<'a, U> {
match self {
Indexed::Indexed(a, b) => Indexed::Indexed(a, b),
_ => panic!("cannot convert indexed T to U unless indexed")
@ -53,7 +52,7 @@ impl<'a, T: ?Sized + ToOwned + 'a> Indexed<'a, T> {
impl<'a, T: ?Sized + ToOwned + 'a> Indexed<'a, T> {
#[inline(always)]
pub unsafe fn coerce_lifetime<'b>(self) -> Indexed<'b, T> {
pub fn coerce_lifetime<'b>(self) -> Indexed<'b, T> {
match self {
Indexed::Indexed(a, b) => Indexed::Indexed(a, b),
_ => panic!("cannot coerce lifetime unless indexed")
@ -217,9 +216,8 @@ impl<'a> IndexedInput<'a, [u8]> {
let source_addr = self.source.as_ptr() as usize;
let current_addr = self.current.as_ptr() as usize;
if current_addr > n && (current_addr - n) >= source_addr {
let size = self.current.len() + n;
let addr = (current_addr - n) as *const u8;
self.current = unsafe { ::std::slice::from_raw_parts(addr, size) };
let forward = (current_addr - n) - source_addr;
self.current = &self.source[forward..];
Ok(())
} else {
let diag = format!("({}, {:x} in {:x})", n, current_addr, source_addr);

View File

@ -35,8 +35,7 @@ fn media_param<'a>(input: &mut Input<'a>) -> Result<'a, (Slice<'a>, Slice<'a>)>
#[parser]
pub fn media_type<'a>(input: &mut Input<'a>) -> Result<'a, MediaType> {
// FIXME: Explain the coerce safety.
let (top, sub, params) = unsafe {
let (top, sub, params) = {
let top = (take_some_while_until(is_valid_token, '/')?, eat('/')?).0;
let sub = take_some_while_until(is_valid_token, ';')?;
let params = series(true, ';', is_whitespace, |i| {

View File

@ -233,6 +233,33 @@ impl RawStr {
}
if escaped {
// This use of `unsafe` is only correct if the bytes in `allocated`
// form a valid UTF-8 string. We prove this by cases:
//
// 1. In the `!escaped` branch, capacity for the vector is first
// allocated. No characters are pushed to `allocated` prior to
// this branch running since the `escaped` flag isn't set. To
// enter this branch, the current byte must be a valid ASCII
// character. This implies that every byte preceeding forms a
// valid UTF-8 string since ASCII characters in UTF-8 are never
// part of a multi-byte sequence. Thus, extending the `allocated`
// vector with these bytes results in a valid UTF-8 string in
// `allocated`.
//
// 2. After the `!escaped` branch, `allocated` is extended with a
// byte string of valid ASCII characters. Thus, `allocated` is
// still a valid UTF-8 string.
//
// 3. In the `_ if escaped` branch, the byte is simply pushed into
// the `allocated` vector. At this point, `allocated` may contain
// an invalid UTF-8 string as we are not a known boundary.
// However, note that this byte is part of a known valid string
// (`self`). If the byte is not part of a multi-byte sequence, it
// is ASCII, and no further justification is needed. If the byte
// _is_ part of a multi-byte sequence, it is _not_ ASCII, and
// thus will not fall into the escaped character set and it will
// be pushed into `allocated` subsequently, resulting in a valid
// UTF-8 string in `allocated`.
unsafe { Cow::Owned(String::from_utf8_unchecked(allocated)) }
} else {
Cow::Borrowed(self.as_str())
@ -281,6 +308,9 @@ impl RawStr {
impl<'a> From<&'a str> for &'a RawStr {
#[inline(always)]
fn from(string: &'a str) -> &'a RawStr {
// This is simply a `newtype`-like transformation. The `repr(C)` ensures
// that this is safe and correct. Note this exact pattern appears often
// in the standard library.
unsafe { &*(string as *const str as *const RawStr) }
}
}

View File

@ -39,6 +39,9 @@ impl UncasedStr {
/// ```
#[inline(always)]
pub fn new(string: &str) -> &UncasedStr {
// This is simply a `newtype`-like transformation. The `repr(C)` ensures
// that this is safe and correct. Note this exact pattern appears often
// in the standard library.
unsafe { &*(string as *const str as *const UncasedStr) }
}
@ -73,6 +76,9 @@ impl UncasedStr {
/// ```
#[inline(always)]
pub fn into_uncased(self: Box<UncasedStr>) -> Uncased<'static> {
// This is the inverse of a `newtype`-like transformation. The `repr(C)`
// ensures that this is safe and correct. Note this exact pattern
// appears often in the standard library.
unsafe {
let raw_str = Box::into_raw(self) as *mut str;
Uncased::from(Box::from_raw(raw_str).into_string())
@ -210,6 +216,9 @@ impl<'s> Uncased<'s> {
/// ```
#[inline(always)]
pub fn into_boxed_uncased(self) -> Box<UncasedStr> {
// This is simply a `newtype`-like transformation. The `repr(C)` ensures
// that this is safe and correct. Note this exact pattern appears often
// in the standard library.
unsafe {
let raw_str = Box::into_raw(self.string.into_owned().into_boxed_str());
Box::from_raw(raw_str as *mut UncasedStr)

View File

@ -90,6 +90,10 @@ impl Data {
pub(crate) fn from_hyp(mut body: HyperBodyReader) -> Result<Data, &'static str> {
// Steal the internal, undecoded data buffer and net stream from Hyper.
let (mut hyper_buf, pos, cap) = body.get_mut().take_buf();
// This is only valid because we know that hyper's `cap` represents the
// actual length of the vector. As such, this is really only setting
// things up correctly for future use. See
// https://github.com/hyperium/hyper/commit/bbbce5f for confirmation.
unsafe { hyper_buf.set_len(cap); }
let hyper_net_stream = body.get_ref().get_ref();
@ -233,7 +237,7 @@ impl Data {
#[inline(always)]
pub(crate) fn new(mut stream: BodyReader) -> Data {
trace_!("Date::new({:?})", stream);
let mut peek_buf = vec![0; PEEK_BYTES];
let mut peek_buf: Vec<u8> = vec![0; PEEK_BYTES];
// Fill the buffer with as many bytes as possible. If we read less than
// that buffer's length, we know we reached the EOF. Otherwise, it's
@ -241,13 +245,16 @@ impl Data {
let eof = match stream.read_max(&mut peek_buf[..]) {
Ok(n) => {
trace_!("Filled peek buf with {} bytes.", n);
// TODO: Explain this.
unsafe { peek_buf.set_len(n); }
// We can use `set_len` here instead of `truncate`, but we'll
// take the performance hit to avoid `unsafe`. All of this code
// should go away when we migrate away from hyper 0.10.x.
peek_buf.truncate(n);
n < PEEK_BYTES
}
Err(e) => {
error_!("Failed to read into peek buffer: {:?}.", e);
unsafe { peek_buf.set_len(0); }
// Likewise here as above.
peek_buf.truncate(0);
false
},
};

View File

@ -1,6 +1,5 @@
use std::fmt;
use std::rc::Rc;
use std::mem::transmute;
use std::net::SocketAddr;
use std::ops::{Deref, DerefMut};
@ -70,7 +69,33 @@ use http::{Header, Cookie};
/// [`cloned_dispatch`]: #method.cloned_dispatch
pub struct LocalRequest<'c> {
client: &'c Client,
// This pointer exists to access the `Rc<Request>` mutably inside of
// `LocalRequest`. This is the only place that a `Request` can be accessed
// mutably. This is accomplished via the private `request_mut()` method.
ptr: *mut Request<'c>,
// This `Rc` exists so that we can transfer ownership to the `LocalResponse`
// selectively on dispatch. This is necessary because responses may point
// into the request, and thus the request and all of its data needs to be
// alive while the response is accessible.
//
// Because both a `LocalRequest` and a `LocalResponse` can hold an `Rc` to
// the same `Request`, _and_ the `LocalRequest` can mutate the request, we
// must ensure that 1) neither `LocalRequest` not `LocalResponse` are `Sync`
// or `Send` and 2) mutatations carried out in `LocalRequest` are _stable_:
// they never _remove_ data, and any reallocations (say, for vectors or
// hashmaps) result in object pointers remaining the same. This means that
// even if the `Request` is mutated by a `LocalRequest`, those mutations are
// not observeable by `LocalResponse`.
//
// The first is ensured by the embedding of the `Rc` type which is neither
// `Send` nor `Sync`. The second is more difficult to argue. First, observe
// that any methods of `LocalRequest` that _remove_ values from `Request`
// only remove _Copy_ values, in particular, `SocketAddr`. Second, the
// lifetime of the `Request` object is tied tot he lifetime of the
// `LocalResponse`, so references from `Request` cannot be dangling in
// `Response`. And finally, observe how all of the data stored in `Request`
// is convered into its owned counterpart before insertion, ensuring stable
// addresses. Together, these properties guarantee the second condition.
request: Rc<Request<'c>>,
data: Vec<u8>
}
@ -100,7 +125,18 @@ impl<'c> LocalRequest<'c> {
}
#[inline(always)]
fn request(&mut self) -> &mut Request<'c> {
fn request_mut(&mut self) -> &mut Request<'c> {
// See the comments in the structure for the argument of correctness.
unsafe { &mut *self.ptr }
}
// This method should _never_ be publically exposed!
#[inline(always)]
fn long_lived_request<'a>(&mut self) -> &'a mut Request<'c> {
// See the comments in the structure for the argument of correctness.
// Additionally, the caller must ensure that the owned instance of
// `Request` itself remains valid as long as the returned reference can
// be accessed.
unsafe { &mut *self.ptr }
}
@ -126,7 +162,7 @@ impl<'c> LocalRequest<'c> {
/// ```
#[inline]
pub fn header<H: Into<Header<'static>>>(mut self, header: H) -> Self {
self.request().add_header(header.into());
self.request_mut().add_header(header.into());
self
}
@ -146,7 +182,7 @@ impl<'c> LocalRequest<'c> {
/// ```
#[inline]
pub fn add_header<H: Into<Header<'static>>>(&mut self, header: H) {
self.request().add_header(header.into());
self.request_mut().add_header(header.into());
}
/// Set the remote address of this request.
@ -164,7 +200,7 @@ impl<'c> LocalRequest<'c> {
/// ```
#[inline]
pub fn remote(mut self, address: SocketAddr) -> Self {
self.request().set_remote(address);
self.request_mut().set_remote(address);
self
}
@ -297,7 +333,7 @@ impl<'c> LocalRequest<'c> {
/// ```
#[inline(always)]
pub fn dispatch(mut self) -> LocalResponse<'c> {
let req = unsafe { transmute(self.request()) };
let req = self.long_lived_request();
let response = self.client.rocket().dispatch(req, Data::local(self.data));
self.client.update_cookies(&response);
@ -361,7 +397,7 @@ impl<'c> LocalRequest<'c> {
#[inline(always)]
pub fn mut_dispatch(&mut self) -> LocalResponse<'c> {
let data = ::std::mem::replace(&mut self.data, vec![]);
let req = unsafe { transmute(self.request()) };
let req = self.long_lived_request();
let response = self.client.rocket().dispatch(req, Data::local(data));
self.client.update_cookies(&response);
@ -414,6 +450,38 @@ impl<'c> fmt::Debug for LocalResponse<'c> {
}
}
#[cfg(test)]
mod tests {
// Someday...
// #[test]
// #[compile_fail]
// fn local_req_not_sync() {
// fn is_sync<T: Sync>() { }
// is_sync::<::local::LocalRequest>();
// }
// #[test]
// #[compile_fail]
// fn local_req_not_send() {
// fn is_send<T: Send>() { }
// is_send::<::local::LocalRequest>();
// }
// #[test]
// #[compile_fail]
// fn local_req_not_sync() {
// fn is_sync<T: Sync>() { }
// is_sync::<::local::LocalResponse>();
// }
// #[test]
// #[compile_fail]
// fn local_req_not_send() {
// fn is_send<T: Send>() { }
// is_send::<::local::LocalResponse>();
// }
// fn test() {
// use local::Client;
@ -473,3 +541,4 @@ impl<'c> fmt::Debug for LocalResponse<'c> {
// drop(client1);
// }
}