commit f3768f132194ec89744e62c4c5edd85630af6877 Author: Damian Poddebniak Date: Thu Aug 6 16:25:02 2020 +0200 "Initial" commit. diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..80cb495 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +Cargo.lock +/target +.idea diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..241f99e --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "smtp-proto" +version = "0.1.0" +authors = ["Damian Poddebniak "] +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +nom = "5.1" +abnf-core = "0.3.0" \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..24f5b3e --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +# smtp-proto diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..597653d --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,24 @@ +pub mod parse; +pub mod types; + +pub fn escape(bytes: &[u8]) -> String { + bytes + .iter() + .map(|byte| match byte { + 0x00..=0x08 => format!("\\x{:02x}", byte), + 0x09 => String::from("\\t"), + 0x0A => String::from("\\n\n"), + 0x0B => format!("\\x{:02x}", byte), + 0x0C => format!("\\x{:02x}", byte), + 0x0D => String::from("\\r"), + 0x0e..=0x1f => format!("\\x{:02x}", byte), + 0x20..=0x22 => format!("{}", *byte as char), + 0x23..=0x5B => format!("{}", *byte as char), + 0x5C => String::from("\\\\"), + 0x5D..=0x7E => format!("{}", *byte as char), + 0x7f => format!("\\x{:02x}", byte), + 0x80..=0xff => format!("\\x{:02x}", byte), + }) + .collect::>() + .join("") +} diff --git a/src/parse/address.rs b/src/parse/address.rs new file mode 100644 index 0000000..a2b2628 --- /dev/null +++ b/src/parse/address.rs @@ -0,0 +1,161 @@ +//! 4.1.3. Address Literals (RFC 5321) + +use crate::parse::command::Ldh_str; +use abnf_core::streaming::is_DIGIT; +use nom::{ + branch::alt, + bytes::streaming::{tag, tag_no_case, take_while1, take_while_m_n}, + character::is_hex_digit, + combinator::{opt, recognize}, + multi::{count, many_m_n}, + sequence::tuple, + IResult, +}; + +/// IPv4-address-literal = Snum 3("." Snum) +pub fn IPv4_address_literal(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = tuple((Snum, count(tuple((tag(b"."), Snum)), 3))); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) +} + +/// IPv6-address-literal = "IPv6:" IPv6-addr +pub fn IPv6_address_literal(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = tuple((tag_no_case(b"IPv6:"), IPv6_addr)); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) +} + +/// General-address-literal = Standardized-tag ":" 1*dcontent +pub fn General_address_literal(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = tuple((Standardized_tag, tag(b":"), take_while1(is_dcontent))); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) +} + +/// Standardized-tag MUST be specified in a Standards-Track RFC and registered with IANA +/// +/// Standardized-tag = Ldh-str +pub fn Standardized_tag(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = Ldh_str; + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) +} + +/// Printable US-ASCII excl. "[", "\", "]" +/// +/// dcontent = %d33-90 / %d94-126 +pub fn is_dcontent(byte: u8) -> bool { + match byte { + 33..=90 | 94..=126 => true, + _ => false, + } +} + +/// Representing a decimal integer value in the range 0 through 255 +/// +/// Snum = 1*3DIGIT +pub fn Snum(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = take_while_m_n(1, 3, is_DIGIT); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) +} + +/// IPv6-addr = IPv6-full / IPv6-comp / IPv6v4-full / IPv6v4-comp +pub fn IPv6_addr(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = alt((IPv6_full, IPv6_comp, IPv6v4_full, IPv6v4_comp)); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) +} + +/// IPv6-hex = 1*4HEXDIG +pub fn IPv6_hex(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = take_while_m_n(1, 4, is_hex_digit); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) +} + +/// IPv6-full = IPv6-hex 7(":" IPv6-hex) +pub fn IPv6_full(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = tuple((IPv6_hex, count(tuple((tag(b":"), IPv6_hex)), 7))); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) +} + +/// The "::" represents at least 2 16-bit groups of zeros. +/// No more than 6 groups in addition to the "::" may be present. +/// +/// IPv6-comp = [IPv6-hex *5(":" IPv6-hex)] "::" [IPv6-hex *5(":" IPv6-hex)] +pub fn IPv6_comp(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = tuple(( + opt(tuple(( + IPv6_hex, + many_m_n(0, 5, tuple((tag(b":"), IPv6_hex))), + ))), + tag(b"::"), + opt(tuple(( + IPv6_hex, + many_m_n(0, 5, tuple((tag(b":"), IPv6_hex))), + ))), + )); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) +} + +/// IPv6v4-full = IPv6-hex 5(":" IPv6-hex) ":" IPv4-address-literal +pub fn IPv6v4_full(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = tuple(( + IPv6_hex, + count(tuple((tag(b":"), IPv6_hex)), 5), + tag(b":"), + IPv4_address_literal, + )); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) +} + +/// The "::" represents at least 2 16-bit groups of zeros. +/// No more than 4 groups in addition to the "::" and IPv4-address-literal may be present. +/// +/// IPv6v4-comp = [IPv6-hex *3(":" IPv6-hex)] "::" +/// [IPv6-hex *3(":" IPv6-hex) ":"] +/// IPv4-address-literal +pub fn IPv6v4_comp(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = tuple(( + opt(tuple(( + IPv6_hex, + many_m_n(0, 3, tuple((tag(b":"), IPv6_hex))), + ))), + tag(b"::"), + opt(tuple(( + IPv6_hex, + many_m_n(0, 3, tuple((tag(b":"), IPv6_hex))), + tag(b":"), + ))), + IPv4_address_literal, + )); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) +} diff --git a/src/parse/command.rs b/src/parse/command.rs new file mode 100644 index 0000000..907e801 --- /dev/null +++ b/src/parse/command.rs @@ -0,0 +1,495 @@ +use crate::{ + parse::{ + address::{General_address_literal, IPv4_address_literal, IPv6_address_literal}, + base64, + imf::atom::is_atext, + }, + types::Command, +}; +use abnf_core::streaming::{is_ALPHA, is_DIGIT, CRLF, DQUOTE, SP}; +use nom::{ + branch::alt, + bytes::streaming::{tag, tag_no_case, take_while, take_while1, take_while_m_n}, + combinator::{opt, recognize}, + multi::many0, + sequence::{delimited, preceded, tuple}, + IResult, +}; + +pub fn command(input: &[u8]) -> IResult<&[u8], Command> { + let parser = alt(( + helo, ehlo, mail, rcpt, data, rset, vrfy, expn, help, noop, quit, + starttls, // Extensions + auth_login, // https://interoperability.blob.core.windows.net/files/MS-XLOGIN/[MS-XLOGIN].pdf + auth_plain, // RFC 4616 + )); + + let (remaining, parsed) = parser(input)?; + + Ok((remaining, parsed)) +} + +/// helo = "HELO" SP Domain CRLF +pub fn helo(input: &[u8]) -> IResult<&[u8], Command> { + let parser = tuple(( + tag_no_case(b"HELO"), + SP, + alt((Domain, address_literal)), // address_literal alternative for Geary + CRLF, + )); + + let (remaining, (_, _, data, _)) = parser(input)?; + + Ok((remaining, Command::Helo(data.into()))) +} + +/// ehlo = "EHLO" SP ( Domain / address-literal ) CRLF +pub fn ehlo(input: &[u8]) -> IResult<&[u8], Command> { + let parser = tuple(( + tag_no_case(b"EHLO"), + SP, + alt((Domain, address_literal)), + CRLF, + )); + + let (remaining, (_, _, data, _)) = parser(input)?; + + Ok((remaining, Command::Ehlo(data.into()))) +} + +/// mail = "MAIL FROM:" Reverse-path [SP Mail-parameters] CRLF +pub fn mail(input: &[u8]) -> IResult<&[u8], Command> { + let parser = tuple(( + tag_no_case(b"MAIL FROM:"), + opt(SP), // Out-of-Spec, but Outlook does it ... + Reverse_path, + opt(preceded(SP, Mail_parameters)), + CRLF, + )); + + let (remaining, (_, _, data, maybe_params, _)) = parser(input)?; + + Ok(( + remaining, + Command::Mail { + data: data.into(), + params: maybe_params.map(|params| params.into()), + }, + )) +} + +/// rcpt = "RCPT TO:" ( "" / "" / Forward-path ) [SP Rcpt-parameters] CRLF +/// +/// Note that, in a departure from the usual rules for +/// local-parts, the "Postmaster" string shown above is +/// treated as case-insensitive. +pub fn rcpt(input: &[u8]) -> IResult<&[u8], Command> { + let parser = tuple(( + tag_no_case(b"RCPT TO:"), + opt(SP), // Out-of-Spec, but Outlook does it ... + alt(( + recognize(tuple((tag_no_case(b"")))), + tag_no_case(b""), + Forward_path, + )), + opt(preceded(SP, Rcpt_parameters)), + CRLF, + )); + + let (remaining, (_, _, data, maybe_params, _)) = parser(input)?; + + Ok(( + remaining, + Command::Rcpt { + data: data.into(), + params: maybe_params.map(|params| params.into()), + }, + )) +} + +/// data = "DATA" CRLF +pub fn data(input: &[u8]) -> IResult<&[u8], Command> { + let parser = tuple((tag_no_case(b"DATA"), CRLF)); + + let (remaining, _) = parser(input)?; + + Ok((remaining, Command::Data)) +} + +/// rset = "RSET" CRLF +pub fn rset(input: &[u8]) -> IResult<&[u8], Command> { + let parser = tuple((tag_no_case(b"RSET"), CRLF)); + + let (remaining, _) = parser(input)?; + + Ok((remaining, Command::Rset)) +} + +/// vrfy = "VRFY" SP String CRLF +pub fn vrfy(input: &[u8]) -> IResult<&[u8], Command> { + let parser = tuple((tag_no_case(b"VRFY"), SP, String, CRLF)); + + let (remaining, (_, _, data, _)) = parser(input)?; + + Ok((remaining, Command::Vrfy(data.into()))) +} + +/// expn = "EXPN" SP String CRLF +pub fn expn(input: &[u8]) -> IResult<&[u8], Command> { + let parser = tuple((tag_no_case(b"EXPN"), SP, String, CRLF)); + + let (remaining, (_, _, data, _)) = parser(input)?; + + Ok((remaining, Command::Expn(data.into()))) +} + +/// help = "HELP" [ SP String ] CRLF +pub fn help(input: &[u8]) -> IResult<&[u8], Command> { + let parser = tuple((tag_no_case(b"HELP"), opt(preceded(SP, String)), CRLF)); + + let (remaining, (_, maybe_data, _)) = parser(input)?; + + Ok((remaining, Command::Help(maybe_data.map(|data| data.into())))) +} + +/// noop = "NOOP" [ SP String ] CRLF +pub fn noop(input: &[u8]) -> IResult<&[u8], Command> { + let parser = tuple((tag_no_case(b"NOOP"), opt(preceded(SP, String)), CRLF)); + + let (remaining, (_, maybe_data, _)) = parser(input)?; + + Ok((remaining, Command::Noop(maybe_data.map(|data| data.into())))) +} + +/// quit = "QUIT" CRLF +pub fn quit(input: &[u8]) -> IResult<&[u8], Command> { + let parser = tuple((tag_no_case(b"QUIT"), CRLF)); + + let (remaining, _) = parser(input)?; + + Ok((remaining, Command::Quit)) +} + +pub fn starttls(input: &[u8]) -> IResult<&[u8], Command> { + let parser = tuple((tag_no_case(b"STARTTLS"), CRLF)); + + let (remaining, _) = parser(input)?; + + Ok((remaining, Command::StartTLS)) +} + +/// https://interoperability.blob.core.windows.net/files/MS-XLOGIN/[MS-XLOGIN].pdf +/// +/// username = 1*CHAR ; Base64-encoded username +/// password = 1*CHAR ; Base64-encoded password +/// +/// auth_login_command = "AUTH LOGIN" [SP username] CRLF +/// +/// auth_login_username_challenge = "334 VXNlcm5hbWU6" CRLF +/// auth_login_username_response = username CRLF +/// auth_login_password_challenge = "334 UGFzc3dvcmQ6" CRLF +/// auth_login_password_response = password CRLF +pub fn auth_login(input: &[u8]) -> IResult<&[u8], Command> { + let parser = tuple(( + tag_no_case(b"AUTH"), + SP, + tag_no_case("LOGIN"), + opt(preceded(SP, base64)), + CRLF, + )); + + let (remaining, (_, _, _, maybe_username_b64, _)) = parser(input)?; + + Ok(( + remaining, + Command::AuthLogin(maybe_username_b64.map(|i| i.to_owned())), + )) +} + +pub fn auth_plain(input: &[u8]) -> IResult<&[u8], Command> { + let parser = tuple(( + tag_no_case(b"AUTH"), + SP, + tag_no_case("PLAIN"), + opt(preceded(SP, base64)), + CRLF, + )); + + let (remaining, (_, _, _, maybe_credentials_b64, _)) = parser(input)?; + + Ok(( + remaining, + Command::AuthPlain(maybe_credentials_b64.map(|i| i.to_owned())), + )) +} + +// ----- 4.1.2. Command Argument Syntax (RFC 5321) ----- + +/// Reverse-path = Path / "<>" +pub fn Reverse_path(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = alt((Path, tag(b"<>"))); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) +} + +/// Forward-path = Path +pub fn Forward_path(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = Path; + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) +} + +// Path = "<" [ A-d-l ":" ] Mailbox ">" +pub fn Path(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = tuple(( + tag(b"<"), + opt(tuple((A_d_l, tag(b":")))), + Mailbox, + tag(b">"), + )); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) +} + +/// A-d-l = At-domain *( "," At-domain ) +/// ; Note that this form, the so-called "source +/// ; route", MUST BE accepted, SHOULD NOT be +/// ; generated, and SHOULD be ignored. +pub fn A_d_l(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = tuple((At_domain, many0(tuple((tag(b","), At_domain))))); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) +} + +/// At-domain = "@" Domain +pub fn At_domain(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = tuple((tag(b"@"), Domain)); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) +} + +/// Mail-parameters = esmtp-param *(SP esmtp-param) +pub fn Mail_parameters(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = tuple((esmtp_param, many0(tuple((SP, esmtp_param))))); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) +} + +/// Rcpt-parameters = esmtp-param *(SP esmtp-param) +pub fn Rcpt_parameters(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = tuple((esmtp_param, many0(tuple((SP, esmtp_param))))); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) +} + +/// esmtp-param = esmtp-keyword ["=" esmtp-value] +pub fn esmtp_param(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = tuple((esmtp_keyword, opt(tuple((tag(b"="), esmtp_value))))); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) +} + +/// esmtp-keyword = (ALPHA / DIGIT) *(ALPHA / DIGIT / "-") +pub fn esmtp_keyword(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = tuple(( + take_while_m_n(1, 1, |byte| is_ALPHA(byte) || is_DIGIT(byte)), + take_while(|byte| is_ALPHA(byte) || is_DIGIT(byte) || byte == b'-'), + )); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) +} + +/// Any CHAR excluding "=", SP, and control characters. +/// If this string is an email address, i.e., a Mailbox, +/// then the "xtext" syntax [32] SHOULD be used. +/// +/// esmtp-value = 1*(%d33-60 / %d62-126) +pub fn esmtp_value(input: &[u8]) -> IResult<&[u8], &[u8]> { + fn is_value_character(byte: u8) -> bool { + match byte { + 33..=60 | 62..=126 => true, + _ => false, + } + } + + take_while1(is_value_character)(input) +} + +/// Keyword = Ldh-str +pub fn Keyword(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = Ldh_str; + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) +} + +/// Argument = Atom +pub fn Argument(input: &[u8]) -> IResult<&[u8], &[u8]> { + Atom(input) +} + +/// Domain = sub-domain *("." sub-domain) +pub fn Domain(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = tuple((sub_domain, many0(tuple((tag(b"."), sub_domain))))); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) +} + +/// sub-domain = Let-dig [Ldh-str] +pub fn sub_domain(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = tuple((take_while_m_n(1, 1, is_Let_dig), opt(Ldh_str))); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) +} + +/// Let-dig = ALPHA / DIGIT +pub fn is_Let_dig(byte: u8) -> bool { + is_ALPHA(byte) || is_DIGIT(byte) +} + +/// Ldh-str = *( ALPHA / DIGIT / "-" ) Let-dig +pub fn Ldh_str(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = many0(alt(( + take_while_m_n(1, 1, is_ALPHA), + take_while_m_n(1, 1, is_DIGIT), + recognize(tuple((tag(b"-"), take_while_m_n(1, 1, is_Let_dig)))), + ))); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) +} + +/// address-literal = "[" ( +/// IPv4-address-literal / +/// IPv6-address-literal / +/// General-address-literal +/// ) "]" +/// ; See Section 4.1.3 +pub fn address_literal(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = delimited( + tag(b"["), + alt(( + IPv4_address_literal, + IPv6_address_literal, + General_address_literal, + )), + tag(b"]"), + ); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) +} + +/// Mailbox = Local-part "@" ( Domain / address-literal ) +pub fn Mailbox(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = tuple((Local_part, tag(b"@"), alt((Domain, address_literal)))); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) +} + +/// Local-part = Dot-string / Quoted-string +/// ; MAY be case-sensitive +pub fn Local_part(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = alt((Dot_string, Quoted_string)); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) +} + +/// Dot-string = Atom *("." Atom) +pub fn Dot_string(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = tuple((Atom, many0(tuple((tag(b"."), Atom))))); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) +} + +/// Atom = 1*atext +pub fn Atom(input: &[u8]) -> IResult<&[u8], &[u8]> { + take_while1(is_atext)(input) +} + +/// Quoted-string = DQUOTE *QcontentSMTP DQUOTE +pub fn Quoted_string(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = delimited(DQUOTE, many0(QcontentSMTP), DQUOTE); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) +} + +/// QcontentSMTP = qtextSMTP / quoted-pairSMTP +pub fn QcontentSMTP(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = alt((take_while_m_n(1, 1, is_qtextSMTP), quoted_pairSMTP)); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) +} + +/// Backslash followed by any ASCII graphic (including itself) or SPace +/// +/// quoted-pairSMTP = %d92 %d32-126 +pub fn quoted_pairSMTP(input: &[u8]) -> IResult<&[u8], &[u8]> { + fn is_ascii_bs_or_sp(byte: u8) -> bool { + match byte { + 32..=126 => true, + _ => false, + } + } + + let parser = tuple((tag("\\"), take_while_m_n(1, 1, is_ascii_bs_or_sp))); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) +} + +/// Within a quoted string, any ASCII graphic or space is permitted +/// without blackslash-quoting except double-quote and the backslash itself. +/// +/// qtextSMTP = %d32-33 / %d35-91 / %d93-126 +pub fn is_qtextSMTP(byte: u8) -> bool { + match byte { + 32..=33 | 35..=91 | 93..=126 => true, + _ => false, + } +} + +/// String = Atom / Quoted-string +pub fn String(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = alt((Atom, Quoted_string)); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) +} diff --git a/src/parse/imf.rs b/src/parse/imf.rs new file mode 100644 index 0000000..3d417c8 --- /dev/null +++ b/src/parse/imf.rs @@ -0,0 +1,611 @@ +//! Internet Message Format (RFC 5322) +//! +//! TODO: replace this with an IMF library, e.g. rustyknife? + +/// 3.2.1. Quoted characters +pub mod quoted_characters { + use super::obsolete::obs_qp; + use abnf_core::streaming::{is_VCHAR, WSP}; + use nom::{ + branch::alt, + bytes::streaming::{tag, take_while_m_n}, + combinator::recognize, + sequence::tuple, + IResult, + }; + + /// quoted-pair = ("\" (VCHAR / WSP)) / obs-qp + pub fn quoted_pair(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = alt(( + recognize(tuple(( + tag(b"\\"), + alt((take_while_m_n(1, 1, is_VCHAR), WSP)), + ))), + obs_qp, + )); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) + } +} + +/// 3.2.2. Folding White Space and Comments +pub mod folding_ws_and_comment { + use nom::IResult; + + /// Folding white space + /// + /// FWS = ([*WSP CRLF] 1*WSP) / obs-FWS + pub fn FWS(_input: &[u8]) -> IResult<&[u8], &[u8]> { + unimplemented!() + } + + // Printable US-ASCII characters not including "(", ")", or "\" + // + // ctext = %d33-39 / %d42-91 / %d93-126 / obs-ctext + + // ccontent = ctext / quoted-pair / comment + + // comment = "(" *([FWS] ccontent) [FWS] ")" + + /// CFWS = (1*([FWS] comment) [FWS]) / FWS + pub fn CFWS(_input: &[u8]) -> IResult<&[u8], &[u8]> { + unimplemented!() + } +} + +/// 3.2.3. Atom +pub mod atom { + use super::folding_ws_and_comment::CFWS; + use abnf_core::streaming::{is_ALPHA, is_DIGIT}; + use nom::{ + bytes::streaming::{tag, take_while1}, + combinator::{opt, recognize}, + multi::many0, + sequence::tuple, + IResult, + }; + + /// Printable US-ASCII characters not including specials. + /// Used for atoms. + /// + /// atext = ALPHA / DIGIT / + /// "!" / "#" / + /// "$" / "%" / + /// "&" / "'" / + /// "*" / "+" / + /// "-" / "/" / + /// "=" / "?" / + /// "^" / "_" / + /// "`" / "{" / + /// "|" / "}" / + /// "~" + pub fn is_atext(byte: u8) -> bool { + let allowed = b"!#$%&'*+-/=?^_`{|}~"; + + is_ALPHA(byte) || is_DIGIT(byte) || allowed.contains(&byte) + } + + /// atom = [CFWS] 1*atext [CFWS] + pub fn atom(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = tuple((opt(CFWS), take_while1(is_atext), opt(CFWS))); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) + } + + /// dot-atom-text = 1*atext *("." 1*atext) + pub fn dot_atom_text(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = tuple(( + take_while1(is_atext), + many0(tuple((tag(b"."), take_while1(is_atext)))), + )); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) + } + + // dot-atom = [CFWS] dot-atom-text [CFWS] + pub fn dot_atom(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = tuple((opt(CFWS), dot_atom_text, opt(CFWS))); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) + } + + // Special characters that do not appear in atext. + // + // specials = "(" / ")" / + // "<" / ">" / + // "[" / "]" / + // ":" / ";" / + // "@" / "\" / + // "," / "." / + // DQUOTE + // ... +} + +/// 3.2.4. Quoted Strings +pub mod quoted_strings { + use super::{ + folding_ws_and_comment::{CFWS, FWS}, + obsolete::is_obs_qtext, + quoted_characters::quoted_pair, + }; + use abnf_core::streaming::DQUOTE; + use nom::{ + branch::alt, + bytes::streaming::take_while_m_n, + combinator::{opt, recognize}, + multi::many0, + sequence::tuple, + IResult, + }; + + /// Printable US-ASCII characters not including "\" or the quote character. + /// + /// qtext = %d33 / %d35-91 / %d93-126 / obs-qtext + pub fn is_qtext(byte: u8) -> bool { + match byte { + 33 | 35..=91 | 93..=126 => true, + _ if is_obs_qtext(byte) => true, + _ => false, + } + } + + /// qcontent = qtext / quoted-pair + pub fn qcontent(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = alt((take_while_m_n(1, 1, is_qtext), quoted_pair)); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) + } + + /// quoted-string = [CFWS] DQUOTE *([FWS] qcontent) [FWS] DQUOTE [CFWS] + pub fn quoted_string(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = tuple(( + opt(CFWS), + DQUOTE, + many0(tuple((opt(FWS), qcontent))), + opt(FWS), + DQUOTE, + opt(CFWS), + )); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) + } +} + +/// 3.2.5. Miscellaneous Tokens +pub mod miscellaneous { + use super::{atom::atom, quoted_strings::quoted_string}; + use nom::{branch::alt, IResult}; + + /// word = atom / quoted-string + pub fn word(input: &[u8]) -> IResult<&[u8], &[u8]> { + alt((atom, quoted_string))(input) + } + + // phrase = 1*word / obs-phrase + // ... + + // unstructured = (*([FWS] VCHAR) *WSP) / obs-unstruct + // ... +} + +/// 3.3. Date and Time Specification +pub mod datetime { + use super::folding_ws_and_comment::{CFWS, FWS}; + use abnf_core::streaming::is_DIGIT; + use nom::{ + branch::alt, + bytes::streaming::{tag, tag_no_case, take_while_m_n}, + combinator::{opt, recognize}, + sequence::tuple, + IResult, + }; + + // date-time = [ day-of-week "," ] date time [CFWS] + pub fn date_time(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = tuple((opt(tuple((day_of_week, tag(b",")))), date, time, opt(CFWS))); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) + } + + // day-of-week = ([FWS] day-name) / obs-day-of-week + pub fn day_of_week(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = tuple((opt(FWS), day_name)); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) + } + + // day-name = "Mon" / "Tue" / "Wed" / "Thu" / "Fri" / "Sat" / "Sun" + pub fn day_name(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = alt(( + tag_no_case(b"Mon"), + tag_no_case(b"Tue"), + tag_no_case(b"Wed"), + tag_no_case(b"Thu"), + tag_no_case(b"Fri"), + tag_no_case(b"Sat"), + tag_no_case(b"Sun"), + )); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) + } + + // date = day month year + pub fn date(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = tuple((day, month, year)); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) + } + + // day = ([FWS] 1*2DIGIT FWS) / obs-day + pub fn day(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = tuple((opt(FWS), take_while_m_n(1, 2, is_DIGIT), FWS)); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) + } + + // month = "Jan" / "Feb" / "Mar" / "Apr" / "May" / "Jun" / "Jul" / "Aug" / "Sep" / "Oct" / "Nov" / "Dec" + pub fn month(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = alt(( + tag_no_case(b"Jan"), + tag_no_case(b"Feb"), + tag_no_case(b"Mar"), + tag_no_case(b"Apr"), + tag_no_case(b"May"), + tag_no_case(b"Jun"), + tag_no_case(b"Jul"), + tag_no_case(b"Aug"), + tag_no_case(b"Sep"), + tag_no_case(b"Oct"), + tag_no_case(b"Nov"), + tag_no_case(b"Dec"), + )); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) + } + + // year = (FWS 4*DIGIT FWS) / obs-year + pub fn year(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = tuple((FWS, take_while_m_n(4, 8, is_DIGIT), FWS)); // FIXME: 4*?! + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) + } + + // time = time-of-day zone + pub fn time(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = tuple((time_of_day, zone)); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) + } + + // time-of-day = hour ":" minute [ ":" second ] + pub fn time_of_day(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = tuple((hour, tag(b":"), minute, opt(tuple((tag(b":"), second))))); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) + } + + // hour = 2DIGIT / obs-hour + pub fn hour(input: &[u8]) -> IResult<&[u8], &[u8]> { + // FIXME: obs- forms must not be used in SMTP. Never? + + let parser = take_while_m_n(2, 2, is_DIGIT); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) + } + + // minute = 2DIGIT / obs-minute + pub fn minute(input: &[u8]) -> IResult<&[u8], &[u8]> { + // FIXME: obs- forms must not be used in SMTP. Never? + + let parser = take_while_m_n(2, 2, is_DIGIT); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) + } + + // second = 2DIGIT / obs-second + pub fn second(input: &[u8]) -> IResult<&[u8], &[u8]> { + // FIXME: obs- forms must not be used in SMTP. Never? + + let parser = take_while_m_n(2, 2, is_DIGIT); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) + } + + // zone = (FWS ( "+" / "-" ) 4DIGIT) / obs-zone + pub fn zone(input: &[u8]) -> IResult<&[u8], &[u8]> { + // FIXME: obs- forms must not be used in SMTP. Never? + + let parser = tuple(( + FWS, + alt((tag(b"+"), tag(b"-"))), + take_while_m_n(4, 4, is_DIGIT), + )); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) + } +} + +/// 3.4.1. Addr-Spec Specification +pub mod addr_spec { + use super::{ + atom::dot_atom, + folding_ws_and_comment::{CFWS, FWS}, + obsolete::{obs_domain, obs_dtext, obs_local_part}, + quoted_strings::quoted_string, + }; + use nom::{ + branch::alt, + bytes::streaming::{tag, take_while_m_n}, + combinator::{opt, recognize}, + multi::many0, + sequence::tuple, + IResult, + }; + + // addr-spec = local-part "@" domain + + /// local-part = dot-atom / quoted-string / obs-local-part + pub fn local_part(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = alt((dot_atom, quoted_string, obs_local_part)); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) + } + + /// domain = dot-atom / domain-literal / obs-domain + pub fn domain(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = alt((dot_atom, domain_literal, obs_domain)); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) + } + + /// domain-literal = [CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS] + pub fn domain_literal(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = tuple(( + opt(CFWS), + tag(b"["), + many0(tuple((opt(FWS), dtext))), + opt(FWS), + tag(b"]"), + opt(CFWS), + )); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) + } + + /// Printable US-ASCII characters not including "[", "]", or "\". + /// + /// dtext = %d33-90 / %d94-126 / obs-dtext + pub fn dtext(input: &[u8]) -> IResult<&[u8], &[u8]> { + fn is_a(byte: u8) -> bool { + match byte { + 33..=90 => true, + _ => false, + } + } + + fn is_b(byte: u8) -> bool { + match byte { + 94..=126 => true, + _ => false, + } + } + + let parser = alt(( + take_while_m_n(1, 1, is_a), + take_while_m_n(1, 1, is_b), + obs_dtext, + )); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) + } +} + +/// 3.6.4. Identification Fields +pub mod identification { + use super::{ + addr_spec::dtext, + atom::dot_atom_text, + folding_ws_and_comment::CFWS, + obsolete::{obs_id_left, obs_id_right}, + }; + use nom::{ + branch::alt, + bytes::streaming::tag, + combinator::{opt, recognize}, + multi::many0, + sequence::{delimited, tuple}, + IResult, + }; + + // message-id = "Message-ID:" msg-id CRLF + // ... + + // in-reply-to = "In-Reply-To:" 1*msg-id CRLF + // ... + + // references = "References:" 1*msg-id CRLF + // ... + + /// msg-id = [CFWS] "<" id-left "@" id-right ">" [CFWS] + pub fn msg_id(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = tuple(( + opt(CFWS), + tag(b"<"), + id_left, + tag(b"@"), + id_right, + tag(b">"), + opt(CFWS), + )); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) + } + + /// id-left = dot-atom-text / obs-id-left + pub fn id_left(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = alt((dot_atom_text, obs_id_left)); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) + } + + /// id-right = dot-atom-text / no-fold-literal / obs-id-right + pub fn id_right(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = alt((dot_atom_text, no_fold_literal, obs_id_right)); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) + } + + // no-fold-literal = "[" *dtext "]" + pub fn no_fold_literal(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = delimited(tag(b"["), many0(dtext), tag(b"]")); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) + } +} + +/// 4.1. Miscellaneous Obsolete Tokens +pub mod obsolete { + use super::{ + addr_spec::{domain, local_part}, + atom::atom, + miscellaneous::word, + quoted_characters::quoted_pair, + }; + use abnf_core::streaming::{CR, LF}; + use nom::{ + branch::alt, + bytes::streaming::{tag, take_while_m_n}, + combinator::recognize, + multi::many0, + sequence::tuple, + IResult, + }; + + /// US-ASCII control characters that do not include the carriage + /// return, line feed, and white space characters + /// + /// obs-NO-WS-CTL = %d1-8 / %d11 / %d12 / %d14-31 / %d127 + pub fn is_obs_NO_WS_CTL(byte: u8) -> bool { + match byte { + 1..=8 | 11 | 12 | 14..=31 | 127 => true, + _ => false, + } + } + + /// obs-qtext = obs-NO-WS-CTL + pub fn is_obs_qtext(byte: u8) -> bool { + is_obs_NO_WS_CTL(byte) + } + + /// obs-qp = "\" (%d0 / obs-NO-WS-CTL / LF / CR) + pub fn obs_qp(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = tuple(( + tag(b"\\"), + alt(( + take_while_m_n(1, 1, |x| x == 0x00), + take_while_m_n(1, 1, is_obs_NO_WS_CTL), + LF, + CR, + )), + )); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) + } + + // 4.4. Obsolete Addressing (RFC 5322) + + /// obs-local-part = word *("." word) + pub fn obs_local_part(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = tuple((word, many0(tuple((tag(b"."), word))))); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) + } + + /// obs-domain = atom *("." atom) + pub fn obs_domain(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = tuple((atom, many0(tuple((tag(b"."), atom))))); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) + } + + /// obs-dtext = obs-NO-WS-CTL / quoted-pair + pub fn obs_dtext(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = alt((take_while_m_n(1, 1, is_obs_NO_WS_CTL), quoted_pair)); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) + } + + // 4.5.4. Obsolete Identification Fields (RFC 5322) + + /// obs-id-left = local-part + pub fn obs_id_left(input: &[u8]) -> IResult<&[u8], &[u8]> { + local_part(input) + } + + /// obs-id-right = domain + pub fn obs_id_right(input: &[u8]) -> IResult<&[u8], &[u8]> { + domain(input) + } +} diff --git a/src/parse/mod.rs b/src/parse/mod.rs new file mode 100644 index 0000000..0e95968 --- /dev/null +++ b/src/parse/mod.rs @@ -0,0 +1,37 @@ +#![allow(non_snake_case)] + +use abnf_core::streaming::{is_ALPHA, is_DIGIT}; +use nom::{ + branch::alt, + bytes::streaming::{tag, take_while}, + combinator::{map_res, opt, recognize}, + sequence::tuple, + IResult, +}; +use std::str::from_utf8; + +pub mod address; +pub mod command; +pub mod imf; +pub mod replies; +pub mod response; +pub mod trace; +pub mod utils; + +fn is_base64_char(i: u8) -> bool { + is_ALPHA(i) || is_DIGIT(i) || i == b'+' || i == b'/' +} + +pub fn base64(input: &[u8]) -> IResult<&[u8], &str> { + let parser = map_res( + recognize(tuple(( + take_while(is_base64_char), + opt(alt((tag("=="), tag("=")))), + ))), + from_utf8, + ); + + let (remaining, base64) = parser(input)?; + + Ok((remaining, base64)) +} diff --git a/src/parse/replies.rs b/src/parse/replies.rs new file mode 100644 index 0000000..7fffa64 --- /dev/null +++ b/src/parse/replies.rs @@ -0,0 +1,80 @@ +//! 4.2. SMTP Replies (RFC 5321) + +use crate::parse::command::{address_literal, Domain}; +use abnf_core::streaming::{CRLF, SP}; +use nom::{ + branch::alt, + bytes::streaming::{tag, take_while1, take_while_m_n}, + combinator::{opt, recognize}, + multi::many0, + sequence::tuple, + IResult, +}; + +/// Greeting = ( "220 " (Domain / address-literal) [ SP textstring ] CRLF ) / +/// ( "220-" (Domain / address-literal) [ SP textstring ] CRLF +/// *( "220-" [ textstring ] CRLF ) +/// "220" [ SP textstring ] CRLF ) +pub fn Greeting(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = alt(( + recognize(tuple(( + tag(b"220 "), + alt((Domain, address_literal)), + opt(tuple((SP, textstring))), + CRLF, + ))), + recognize(tuple(( + tag(b"220-"), + alt((Domain, address_literal)), + opt(tuple((SP, textstring))), + CRLF, + many0(tuple((tag(b"220-"), opt(textstring), CRLF))), + tag(b"220"), + opt(tuple((SP, textstring))), + CRLF, + ))), + )); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) +} + +/// HT, SP, Printable US-ASCII +/// +/// textstring = 1*(%d09 / %d32-126) +pub fn textstring(input: &[u8]) -> IResult<&[u8], &[u8]> { + fn is_value(byte: u8) -> bool { + match byte { + 9 | 32..=126 => true, + _ => false, + } + } + + take_while1(is_value)(input) +} + +/// Reply-line = *( Reply-code "-" [ textstring ] CRLF ) +/// Reply-code [ SP textstring ] CRLF +pub fn Reply_line(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = tuple(( + many0(tuple((Reply_code, tag(b"-"), opt(textstring), CRLF))), + Reply_code, + opt(tuple((SP, textstring))), + CRLF, + )); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) +} + +/// Reply-code = %x32-35 %x30-35 %x30-39 +/// +/// 2345 +/// 012345 +/// 0123456789 +pub fn Reply_code(input: &[u8]) -> IResult<&[u8], &[u8]> { + // FIXME: do not accept all codes. + take_while_m_n(3, 3, nom::character::is_digit)(input) +} diff --git a/src/parse/response.rs b/src/parse/response.rs new file mode 100644 index 0000000..7f32e5e --- /dev/null +++ b/src/parse/response.rs @@ -0,0 +1,93 @@ +use crate::parse::command::Domain; +use abnf_core::streaming::{is_ALPHA, is_DIGIT, CRLF, SP}; +use nom::{ + branch::alt, + bytes::streaming::{tag, take_while, take_while1, take_while_m_n}, + combinator::{opt, recognize}, + multi::many0, + sequence::tuple, + IResult, +}; + +/// ehlo-ok-rsp = ( "250" SP Domain [ SP ehlo-greet ] CRLF ) / +/// ( "250-" Domain [ SP ehlo-greet ] CRLF +/// *( "250-" ehlo-line CRLF ) +/// "250" SP ehlo-line CRLF ) +pub fn ehlo_ok_rsp(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = alt(( + recognize(tuple(( + tag(b"250"), + SP, + Domain, + opt(tuple((SP, ehlo_greet))), + CRLF, + ))), + recognize(tuple(( + tag(b"250-"), + Domain, + opt(tuple((SP, ehlo_greet))), + CRLF, + many0(tuple((tag(b"250-"), ehlo_line, CRLF))), + tag(b"250"), + SP, + ehlo_line, + CRLF, + ))), + )); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) +} + +/// String of any characters other than CR or LF. +/// +/// ehlo-greet = 1*(%d0-9 / %d11-12 / %d14-127) +pub fn ehlo_greet(input: &[u8]) -> IResult<&[u8], &[u8]> { + fn is_valid_character(byte: u8) -> bool { + match byte { + 0..=9 | 11..=12 | 14..=127 => true, + _ => false, + } + } + + take_while1(is_valid_character)(input) +} + +/// ehlo-line = ehlo-keyword *( SP ehlo-param ) +pub fn ehlo_line(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = tuple((ehlo_keyword, many0(tuple((SP, ehlo_param))))); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) +} + +/// Additional syntax of ehlo-params depends on ehlo-keyword +/// +/// ehlo-keyword = (ALPHA / DIGIT) *(ALPHA / DIGIT / "-") +pub fn ehlo_keyword(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = tuple(( + take_while_m_n(1, 1, |byte| is_ALPHA(byte) || is_DIGIT(byte)), + take_while(|byte| is_ALPHA(byte) || is_DIGIT(byte) || byte == b'-'), + )); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) +} + +/// Any CHAR excluding and all control characters +/// (US-ASCII 0-31 and 127 inclusive) +/// +/// ehlo-param = 1*(%d33-126) +pub fn ehlo_param(input: &[u8]) -> IResult<&[u8], &[u8]> { + fn is_valid_character(byte: u8) -> bool { + match byte { + 33..=126 => true, + _ => false, + } + } + + take_while1(is_valid_character)(input) +} diff --git a/src/parse/trace.rs b/src/parse/trace.rs new file mode 100644 index 0000000..d4a0ec4 --- /dev/null +++ b/src/parse/trace.rs @@ -0,0 +1,218 @@ +use crate::parse::{ + command::{address_literal, Atom, Domain, Mailbox, Path, Reverse_path, String}, + imf::{ + datetime::date_time, + folding_ws_and_comment::{CFWS, FWS}, + identification::msg_id, + }, +}; +/// 4.4. Trace Information (RFC 5321) +use abnf_core::streaming::CRLF; +use nom::{ + branch::alt, + bytes::streaming::{tag, tag_no_case}, + combinator::{opt, recognize}, + multi::many1, + sequence::tuple, + IResult, +}; + +/// Return-path-line = "Return-Path:" FWS Reverse-path +pub fn Return_path_line(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = tuple((tag_no_case(b"Return-Path:"), FWS, Reverse_path, CRLF)); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) +} + +/// Time-stamp-line = "Received:" FWS Stamp +pub fn Time_stamp_line(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = tuple((tag_no_case(b"Received:"), FWS, Stamp, CRLF)); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) +} + +/// Stamp = From-domain By-domain Opt-info [CFWS] ";" FWS date-time +/// +/// Caution: Where "date-time" is as defined in RFC 5322 [4] +/// but the "obs-" forms, especially two-digit +/// years, are prohibited in SMTP and MUST NOT be used. +pub fn Stamp(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = tuple(( + From_domain, + By_domain, + Opt_info, + opt(CFWS), + tag(b";"), + FWS, + date_time, + )); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) +} + +/// From-domain = "FROM" FWS Extended-Domain +pub fn From_domain(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = tuple((tag_no_case(b"FROM"), FWS, Extended_Domain)); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) +} + +/// By-domain = CFWS "BY" FWS Extended-Domain +pub fn By_domain(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = tuple((CFWS, tag_no_case(b"BY"), FWS, Extended_Domain)); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) +} + +/// Extended-Domain = Domain / +/// ( Domain FWS "(" TCP-info ")" ) / +/// ( address-literal FWS "(" TCP-info ")" ) +pub fn Extended_Domain(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = alt(( + Domain, + recognize(tuple((Domain, FWS, tag(b"("), TCP_info, tag(b")")))), + recognize(tuple(( + address_literal, + FWS, + tag(b"("), + TCP_info, + tag(b")"), + ))), + )); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) +} + +/// Information derived by server from TCP connection not client EHLO. +/// +/// TCP-info = address-literal / ( Domain FWS address-literal ) +pub fn TCP_info(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = alt(( + address_literal, + recognize(tuple((Domain, FWS, address_literal))), + )); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) +} + +/// Opt-info = [Via] [With] [ID] [For] [Additional-Registered-Clauses] +pub fn Opt_info(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = tuple(( + opt(Via), + opt(With), + opt(ID), + opt(For), + opt(Additional_Registered_Clauses), + )); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) +} + +/// Via = CFWS "VIA" FWS Link +pub fn Via(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = tuple((CFWS, tag_no_case(b"VIA"), FWS, Link)); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) +} + +/// With = CFWS "WITH" FWS Protocol +pub fn With(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = tuple((CFWS, tag_no_case(b"WITH"), FWS, Protocol)); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) +} + +/// ID = CFWS "ID" FWS ( Atom / msg-id ) +/// ; msg-id is defined in RFC 5322 [4] +pub fn ID(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = tuple((CFWS, tag_no_case(b"ID"), FWS, alt((Atom, msg_id)))); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) +} + +/// For = CFWS "FOR" FWS ( Path / Mailbox ) +pub fn For(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = tuple((CFWS, tag_no_case(b"FOR"), FWS, alt((Path, Mailbox)))); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) +} + +/// Additional standard clauses may be added in this location by future standards and registration with +/// IANA. SMTP servers SHOULD NOT use unregistered names. See Section 8. +/// +/// Additional-Registered-Clauses = CFWS Atom FWS String +pub fn Additional_Registered_Clauses(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = many1(tuple((CFWS, Atom, FWS, String))); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) +} + +/// Link = "TCP" / Addtl-Link +pub fn Link(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = alt((tag_no_case(b"TCP"), Addtl_Link)); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) +} + +/// Additional standard names for links are registered with the Internet Assigned Numbers +/// Authority (IANA). "Via" is primarily of value with non-Internet transports. SMTP servers +/// SHOULD NOT use unregistered names. +/// +/// Addtl-Link = Atom +pub fn Addtl_Link(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = Atom; + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) +} + +/// Protocol = "ESMTP" / "SMTP" / Attdl-Protocol +pub fn Protocol(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = alt((tag_no_case(b"ESMTP"), tag_no_case(b"SMTP"), Attdl_Protocol)); + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) +} + +/// Additional standard names for protocols are registered with the Internet Assigned Numbers +/// Authority (IANA) in the "mail parameters" registry [9]. SMTP servers SHOULD NOT +/// use unregistered names. +/// +/// Attdl-Protocol = Atom +pub fn Attdl_Protocol(input: &[u8]) -> IResult<&[u8], &[u8]> { + let parser = Atom; + + let (remaining, parsed) = recognize(parser)(input)?; + + Ok((remaining, parsed)) +} diff --git a/src/parse/utils.rs b/src/parse/utils.rs new file mode 100644 index 0000000..99410e9 --- /dev/null +++ b/src/parse/utils.rs @@ -0,0 +1,10 @@ +use nom::{ + character::streaming::{line_ending, not_line_ending}, + IResult, +}; + +pub fn single_line(input: &[u8]) -> IResult<&[u8], String> { + let (rem, (line, _)) = nom::sequence::tuple((not_line_ending, line_ending))(input)?; + + Ok((rem, String::from_utf8(line.to_vec()).unwrap())) +} diff --git a/src/types.rs b/src/types.rs new file mode 100644 index 0000000..ef3f162 --- /dev/null +++ b/src/types.rs @@ -0,0 +1,132 @@ +use crate::escape; + +#[derive(Clone, PartialEq, Eq)] +pub enum Command { + Ehlo(Vec), + Helo(Vec), + Mail { + data: Vec, + params: Option>, + }, + Rcpt { + data: Vec, + params: Option>, + }, + Data, + Rset, + Vrfy(Vec), + Expn(Vec), + Help(Option>), + Noop(Option>), + Quit, + // Extensions + StartTLS, + // AUTH LOGIN + AuthLogin(Option), + // AUTH PLAIN + AuthPlain(Option), +} + +impl Command { + pub fn name(&self) -> &'static str { + match self { + Command::Ehlo(_) => "EHLO", + Command::Helo(_) => "HELO", + Command::Mail { .. } => "MAIL", + Command::Rcpt { .. } => "RCPT", + Command::Data => "DATA", + Command::Rset => "RSET", + Command::Vrfy(_) => "VRFY", + Command::Expn(_) => "EXPN", + Command::Help(_) => "HELP", + Command::Noop(_) => "NOOP", + Command::Quit => "QUIT", + // Extensions + Command::StartTLS => "STARTTLS", + // TODO: SMTP AUTH LOGIN + Command::AuthLogin(_) => "AUTHLOGIN", + // TODO: SMTP AUTH PLAIN + Command::AuthPlain(_) => "AUTHPLAIN", + } + } +} + +impl std::fmt::Debug for Command { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + use Command::*; + + match self { + Ehlo(data) => write!(f, "Ehlo({})", escape(data)), + Helo(data) => write!(f, "Helo({})", escape(data)), + Mail { + data: path, + params: None, + } => write!(f, "Mail({})", escape(path)), + Mail { + data: path, + params: Some(params), + } => write!(f, "Mail({}, {})", escape(path), escape(params)), + Rcpt { data, params: None } => write!(f, "Rcpt({})", escape(data)), + Rcpt { + data, + params: Some(params), + } => write!(f, "Rcpt({}, {})", escape(data), escape(params)), + Data => write!(f, "Data"), + Rset => write!(f, "Rset"), + Vrfy(data) => write!(f, "Vrfy({})", escape(data)), + Expn(data) => write!(f, "Expn({})", escape(data)), + Help(None) => write!(f, "Help"), + Help(Some(data)) => write!(f, "Help({})", escape(data)), + Noop(None) => write!(f, "Noop"), + Noop(Some(data)) => write!(f, "Noop({})", escape(data)), + Quit => write!(f, "Quit"), + // Extensions + StartTLS => write!(f, "StartTLS"), + // TODO: SMTP Auth + AuthLogin(data) => write!(f, "AuthLogin({:?})", data), + // TODO: SMTP Auth + AuthPlain(data) => write!(f, "AuthPlain({:?})", data), + } + } +} + +pub type EhloLine = (String, Option); + +#[cfg(test)] +mod test { + use crate::{parse::command::*, types::*}; + + #[test] + fn test_subdomain() { + let (rem, parsed) = sub_domain(b"example???").unwrap(); + assert_eq!(parsed, b"example"); + assert_eq!(rem, b"???"); + } + + #[test] + fn test_ehlo() { + let (rem, parsed) = ehlo(b"EHLO [123.123.123.123]\r\n???").unwrap(); + assert_eq!(parsed, Command::Ehlo(b"[123.123.123.123]".to_vec())); + assert_eq!(rem, b"???"); + } + + #[test] + fn test_helo() { + let (rem, parsed) = helo(b"HELO example.com\r\n???").unwrap(); + assert_eq!(parsed, Command::Helo(b"example.com".to_vec())); + assert_eq!(rem, b"???"); + } + + #[test] + fn test_mail() { + let (rem, parsed) = mail(b"MAIL FROM:\r\n???").unwrap(); + assert_eq!( + parsed, + Command::Mail { + data: b"".to_vec(), + params: None + } + ); + assert_eq!(rem, b"???"); + } +}