From f7bbc00172cd41a516c036cde51fed5345fe480f Mon Sep 17 00:00:00 2001 From: Dirkjan Ochtman Date: Thu, 1 Sep 2022 13:39:11 +0200 Subject: [PATCH] Move deserializer code into de module --- instant-xml-macros/src/de.rs | 4 +- instant-xml/src/de.rs | 376 +++++++++++++++++++++++++++++++++++ instant-xml/src/impls.rs | 5 +- instant-xml/src/lib.rs | 250 +---------------------- instant-xml/src/parse.rs | 134 ------------- 5 files changed, 383 insertions(+), 386 deletions(-) create mode 100644 instant-xml/src/de.rs delete mode 100644 instant-xml/src/parse.rs diff --git a/instant-xml-macros/src/de.rs b/instant-xml-macros/src/de.rs index 2328594..e343b69 100644 --- a/instant-xml-macros/src/de.rs +++ b/instant-xml-macros/src/de.rs @@ -133,8 +133,8 @@ impl Deserializer { out.extend(quote!( fn deserialize(deserializer: &mut ::instant_xml::Deserializer<'xml>) -> Result { - use ::instant_xml::parse::XmlRecord; - use ::instant_xml::{Error, Deserializer, Visitor} ; + use ::instant_xml::de::{XmlRecord, Deserializer, Visitor}; + use ::instant_xml::Error; enum __Elements { #elements_enum diff --git a/instant-xml/src/de.rs b/instant-xml/src/de.rs new file mode 100644 index 0000000..0780e33 --- /dev/null +++ b/instant-xml/src/de.rs @@ -0,0 +1,376 @@ +use std::collections::HashMap; +use std::iter::Peekable; + +use super::Error; +use xmlparser::{ElementEnd, Token, Tokenizer}; + +pub struct Deserializer<'xml> { + parser: XmlParser<'xml>, + def_namespaces: HashMap<&'xml str, &'xml str>, + parser_namespaces: HashMap<&'xml str, &'xml str>, + def_default_namespace: &'xml str, + parser_default_namespace: &'xml str, + tag_attributes: Vec<(&'xml str, &'xml str)>, + next_type: EntityType, + next_def_namespace: Option<&'xml str>, +} + +impl<'xml> Deserializer<'xml> { + pub fn new(input: &'xml str) -> Self { + Self { + parser: XmlParser::new(input), + def_namespaces: std::collections::HashMap::new(), + parser_namespaces: std::collections::HashMap::new(), + def_default_namespace: "", + parser_default_namespace: "", + tag_attributes: Vec::new(), + next_type: EntityType::Element, + next_def_namespace: None, + } + } + + pub fn peek_next_tag(&mut self) -> Result>, Error> { + self.parser.peek_next_tag() + } + + // Check if defined and gotten namespaces equals for each field + pub fn compare_namespace( + &self, + expected: &Option<&str>, + actual: Option<&str>, + ) -> Result<(), Error> { + match (expected, actual) { + (Some(expected), Some(actual)) => { + match self.parser_namespaces.get(expected) == self.def_namespaces.get(actual) { + true => Ok(()), + false => Err(Error::WrongNamespace), + } + } + (Some(_), None) | (None, Some(_)) => Err(Error::WrongNamespace), + (None, None) => Ok(()), + } + } + + pub fn compare_parser_and_def_default_namespaces(&self) -> bool { + self.parser_default_namespace == self.def_default_namespace + } + + pub fn peek_next_attribute(&self) -> Option<&(&'xml str, &'xml str)> { + self.tag_attributes.last() + } + + pub fn deserialize_struct( + &mut self, + visitor: V, + name: &str, + def_default_namespace: &'xml str, + def_namespaces: &HashMap<&'xml str, &'xml str>, + ) -> Result + where + V: Visitor<'xml>, + { + // Saveing current defined default namespace + let def_default_namespace_to_revert = self.def_default_namespace; + self.def_default_namespace = def_default_namespace; + + // Adding struct defined namespaces + let new_def_namespaces = def_namespaces + .iter() + .filter(|(k, v)| self.def_namespaces.insert(k, v).is_none()) + .collect::>(); + + // Process open tag + let tag_data = match self.parser.next() { + Some(Ok(XmlRecord::Open(item))) if item.key == name => item, + _ => return Err(Error::UnexpectedValue), + }; + + // Set current attributes + self.tag_attributes = tag_data.attributes; + + // Saveing current parser default namespace + let parser_default_namespace_to_revert = self.parser_default_namespace; + + // Set parser default namespace + match tag_data.default_namespace { + Some(namespace) => { + self.parser_default_namespace = namespace; + } + None => { + // If there is no default namespace in the tag, check if parent default namespace equals the current one + if def_default_namespace_to_revert != self.def_default_namespace { + return Err(Error::WrongNamespace); + } + } + } + + // Compare parser namespace with defined one + if !self.compare_parser_and_def_default_namespaces() { + return Err(Error::WrongNamespace); + } + + // Adding parser namespaces + let new_parser_namespaces = tag_data + .namespaces + .iter() + .filter(|(k, v)| self.parser_namespaces.insert(k, v).is_none()) + .collect::>(); + + let ret = visitor.visit_struct(self)?; + + // Process close tag + self.check_close_tag(name)?; + + // Removing parser namespaces + let _ = new_parser_namespaces + .iter() + .map(|(k, _)| self.parser_namespaces.remove(*k)); + + // Removing struct defined namespaces + let _ = new_def_namespaces + .iter() + .map(|(k, _)| self.def_namespaces.remove(*k)); + + // Retriving old defined namespace + self.def_default_namespace = def_default_namespace_to_revert; + + // Retriving old parser namespace + self.parser_default_namespace = parser_default_namespace_to_revert; + Ok(ret) + } + + pub fn set_next_type_as_attribute(&mut self) -> Result<(), Error> { + if self.next_type == EntityType::Attribute { + return Err(Error::UnexpectedState); + } + + self.next_type = EntityType::Attribute; + Ok(()) + } + + pub fn consume_next_type(&mut self) -> EntityType { + let ret = self.next_type.clone(); + self.next_type = EntityType::Element; + ret + } + + pub fn set_next_def_namespace(&mut self, namespace: Option<&'xml str>) -> Result<(), Error> { + if self.next_def_namespace.is_some() { + return Err(Error::UnexpectedState); + } + + self.next_def_namespace = namespace; + Ok(()) + } + + pub fn consume_next_def_namespace(&mut self) -> Option<&'xml str> { + let ret = self.next_def_namespace; + self.next_def_namespace = None; + ret + } + + pub(crate) fn deserialize_element(&mut self, visitor: V) -> Result + where + V: Visitor<'xml>, + { + // Process open tag + let tag_data = match self.parser.next() { + Some(Ok(XmlRecord::Open(item))) => item, + _ => return Err(Error::UnexpectedValue), + }; + + if tag_data.default_namespace != self.consume_next_def_namespace() { + return Err(Error::WrongNamespace); + } + + match self.parser.next() { + Some(Ok(XmlRecord::Element(v))) => { + let ret = visitor.visit_str(v); + self.parser.next(); + ret + } + _ => Err(Error::UnexpectedValue), + } + } + + pub(crate) fn deserialize_attribute(&mut self, visitor: V) -> Result + where + V: Visitor<'xml>, + { + match self.tag_attributes.pop() { + Some((_, value)) => visitor.visit_str(value), + None => Err(Error::UnexpectedEndOfStream), + } + } + + fn check_close_tag(&mut self, name: &str) -> Result<(), Error> { + let item = match self.parser.next() { + Some(item) => item?, + None => return Err(Error::MissingTag), + }; + + match item { + XmlRecord::Close(v) if v == name => Ok(()), + _ => Err(Error::UnexpectedTag), + } + } +} + +pub struct XmlParser<'xml> { + stack: Vec<&'xml str>, + iter: Peekable>, +} + +impl<'a> XmlParser<'a> { + pub fn new(input: &'a str) -> XmlParser<'a> { + XmlParser { + stack: Vec::new(), + iter: Tokenizer::from(input).peekable(), + } + } + + pub fn peek_next_tag(&mut self) -> Result>, Error> { + let item = match self.iter.peek() { + Some(v) => v, + None => return Ok(None), + }; + + match item { + Ok(Token::ElementStart { prefix, local, .. }) => { + let prefix = match prefix.is_empty() { + true => None, + false => Some(prefix.as_str()), + }; + + Ok(Some(XmlRecord::Open(TagData { + key: local.as_str(), + attributes: Vec::new(), + default_namespace: Some(""), + namespaces: HashMap::new(), + prefix, + }))) + } + Ok(Token::ElementEnd { + end: ElementEnd::Close(..), + .. + }) => { + if self.stack.is_empty() { + return Err(Error::UnexpectedEndOfStream); + } + + return Ok(Some(XmlRecord::Close(self.stack.last().unwrap()))); + } + Ok(_) => Err(Error::UnexpectedToken), + Err(e) => Err(Error::Parse(*e)), + } + } +} + +impl<'xml> Iterator for XmlParser<'xml> { + type Item = Result, Error>; + + #[inline] + fn next(&mut self) -> Option { + let mut key: Option<&str> = None; + let mut prefix_ret: Option<&str> = None; + let mut default_namespace = None; + let mut namespaces = HashMap::new(); + let mut attributes = Vec::new(); + + loop { + let item = match self.iter.next() { + Some(v) => v, + None => return None, + }; + + match item { + Ok(Token::ElementStart { prefix, local, .. }) => { + key = Some(local.as_str()); + prefix_ret = match prefix.is_empty() { + true => None, + false => Some(prefix.as_str()), + }; + } + Ok(Token::ElementEnd { end, .. }) => match end { + ElementEnd::Open => { + self.stack.push(key.unwrap()); + + return Some(Ok(XmlRecord::Open(TagData { + key: key.unwrap(), + attributes, + default_namespace, + namespaces, + prefix: prefix_ret, + }))); + } + ElementEnd::Close(_, v) => match self.stack.pop() { + Some(last) if last == v.as_str() => { + return Some(Ok(XmlRecord::Close(last))); + } + _ => return Some(Err(Error::UnexpectedValue)), + }, + ElementEnd::Empty => { + todo!(); + } + }, + Ok(Token::Attribute { + prefix, + local, + value, + .. + }) => { + if prefix.is_empty() && local.as_str() == "xmlns" { + // Default namespace + default_namespace = Some(value.as_str()); + } else if prefix.as_str() == "xmlns" { + // Namespaces + namespaces.insert(local.as_str(), value.as_str()); + } else if prefix.is_empty() { + // Other attributes + attributes.push((local.as_str(), value.as_str())); + } else { + // TODO: Can the attributes have the prefix? + todo!(); + } + } + Ok(Token::Text { text }) => { + return Some(Ok(XmlRecord::Element(text.as_str()))); + } + Ok(_) => return Some(Err(Error::UnexpectedToken)), + Err(e) => return Some(Err(Error::Parse(e))), + } + } + } +} + +pub trait Visitor<'xml>: Sized { + type Value; + + fn visit_str(self, _value: &'xml str) -> Result { + unimplemented!(); + } + + fn visit_struct(&self, _deserializer: &mut Deserializer<'xml>) -> Result { + unimplemented!(); + } +} + +pub enum XmlRecord<'xml> { + Open(TagData<'xml>), + Element(&'xml str), + Close(&'xml str), +} + +pub struct TagData<'xml> { + pub key: &'xml str, + pub attributes: Vec<(&'xml str, &'xml str)>, + pub default_namespace: Option<&'xml str>, + pub namespaces: HashMap<&'xml str, &'xml str>, + pub prefix: Option<&'xml str>, +} + +#[derive(Clone, PartialEq, Eq)] +pub enum EntityType { + Element, + Attribute, +} diff --git a/instant-xml/src/impls.rs b/instant-xml/src/impls.rs index f0bf7af..1fda514 100644 --- a/instant-xml/src/impls.rs +++ b/instant-xml/src/impls.rs @@ -3,9 +3,8 @@ use std::fmt; use std::marker::PhantomData; use std::str::FromStr; -use crate::{ - Deserializer, EntityType, Error, FieldAttribute, FromXml, Serializer, TagName, ToXml, Visitor, -}; +use crate::de::{EntityType, Visitor}; +use crate::{Deserializer, Error, FieldAttribute, FromXml, Serializer, TagName, ToXml}; // Deserializer struct FromStrToVisitor(PhantomData) diff --git a/instant-xml/src/lib.rs b/instant-xml/src/lib.rs index 43c0fe8..cc16c71 100644 --- a/instant-xml/src/lib.rs +++ b/instant-xml/src/lib.rs @@ -6,25 +6,11 @@ use thiserror::Error; pub use xmlparser; pub use macros::{FromXml, ToXml}; -use parse::XmlParser; -pub mod impls; +mod impls; #[doc(hidden)] -pub mod parse; - -pub struct TagData<'xml> { - pub key: &'xml str, - pub attributes: Vec<(&'xml str, &'xml str)>, - pub default_namespace: Option<&'xml str>, - pub namespaces: HashMap<&'xml str, &'xml str>, - pub prefix: Option<&'xml str>, -} - -pub enum XmlRecord<'xml> { - Open(TagData<'xml>), - Element(&'xml str), - Close(&'xml str), -} +pub mod de; +pub use de::Deserializer; pub trait ToXml { fn to_xml(&self) -> Result { @@ -173,12 +159,6 @@ pub struct FieldContext<'xml> { pub attribute: Option>, } -#[derive(Clone, PartialEq, Eq)] -pub enum EntityType { - Element, - Attribute, -} - pub enum TagName { FieldName, Custom(&'static str), @@ -201,230 +181,6 @@ pub trait FromXml<'xml>: Sized { } } -pub trait Visitor<'xml>: Sized { - type Value; - - fn visit_str(self, _value: &'xml str) -> Result { - unimplemented!(); - } - - fn visit_struct(&self, _deserializer: &mut Deserializer<'xml>) -> Result { - unimplemented!(); - } -} - -pub struct Deserializer<'xml> { - parser: XmlParser<'xml>, - def_namespaces: HashMap<&'xml str, &'xml str>, - parser_namespaces: HashMap<&'xml str, &'xml str>, - def_default_namespace: &'xml str, - parser_default_namespace: &'xml str, - tag_attributes: Vec<(&'xml str, &'xml str)>, - next_type: EntityType, - next_def_namespace: Option<&'xml str>, -} - -impl<'xml> Deserializer<'xml> { - pub fn new(input: &'xml str) -> Self { - Self { - parser: XmlParser::new(input), - def_namespaces: std::collections::HashMap::new(), - parser_namespaces: std::collections::HashMap::new(), - def_default_namespace: "", - parser_default_namespace: "", - tag_attributes: Vec::new(), - next_type: EntityType::Element, - next_def_namespace: None, - } - } - - pub fn peek_next_tag(&mut self) -> Result>, Error> { - self.parser.peek_next_tag() - } - - // Check if defined and gotten namespaces equals for each field - pub fn compare_namespace( - &self, - expected: &Option<&str>, - actual: Option<&str>, - ) -> Result<(), Error> { - match (expected, actual) { - (Some(expected), Some(actual)) => { - match self.parser_namespaces.get(expected) == self.def_namespaces.get(actual) { - true => Ok(()), - false => Err(Error::WrongNamespace), - } - } - (Some(_), None) | (None, Some(_)) => Err(Error::WrongNamespace), - (None, None) => Ok(()), - } - } - - pub fn compare_parser_and_def_default_namespaces(&self) -> bool { - self.parser_default_namespace == self.def_default_namespace - } - - pub fn peek_next_attribute(&self) -> Option<&(&'xml str, &'xml str)> { - self.tag_attributes.last() - } - - pub fn deserialize_struct( - &mut self, - visitor: V, - name: &str, - def_default_namespace: &'xml str, - def_namespaces: &HashMap<&'xml str, &'xml str>, - ) -> Result - where - V: Visitor<'xml>, - { - // Saveing current defined default namespace - let def_default_namespace_to_revert = self.def_default_namespace; - self.def_default_namespace = def_default_namespace; - - // Adding struct defined namespaces - let new_def_namespaces = def_namespaces - .iter() - .filter(|(k, v)| self.def_namespaces.insert(k, v).is_none()) - .collect::>(); - - // Process open tag - let tag_data = match self.parser.next() { - Some(Ok(XmlRecord::Open(item))) if item.key == name => item, - _ => return Err(Error::UnexpectedValue), - }; - - // Set current attributes - self.tag_attributes = tag_data.attributes; - - // Saveing current parser default namespace - let parser_default_namespace_to_revert = self.parser_default_namespace; - - // Set parser default namespace - match tag_data.default_namespace { - Some(namespace) => { - self.parser_default_namespace = namespace; - } - None => { - // If there is no default namespace in the tag, check if parent default namespace equals the current one - if def_default_namespace_to_revert != self.def_default_namespace { - return Err(Error::WrongNamespace); - } - } - } - - // Compare parser namespace with defined one - if !self.compare_parser_and_def_default_namespaces() { - return Err(Error::WrongNamespace); - } - - // Adding parser namespaces - let new_parser_namespaces = tag_data - .namespaces - .iter() - .filter(|(k, v)| self.parser_namespaces.insert(k, v).is_none()) - .collect::>(); - - let ret = visitor.visit_struct(self)?; - - // Process close tag - self.check_close_tag(name)?; - - // Removing parser namespaces - let _ = new_parser_namespaces - .iter() - .map(|(k, _)| self.parser_namespaces.remove(*k)); - - // Removing struct defined namespaces - let _ = new_def_namespaces - .iter() - .map(|(k, _)| self.def_namespaces.remove(*k)); - - // Retriving old defined namespace - self.def_default_namespace = def_default_namespace_to_revert; - - // Retriving old parser namespace - self.parser_default_namespace = parser_default_namespace_to_revert; - Ok(ret) - } - - pub fn set_next_type_as_attribute(&mut self) -> Result<(), Error> { - if self.next_type == EntityType::Attribute { - return Err(Error::UnexpectedState); - } - - self.next_type = EntityType::Attribute; - Ok(()) - } - - pub fn consume_next_type(&mut self) -> EntityType { - let ret = self.next_type.clone(); - self.next_type = EntityType::Element; - ret - } - - pub fn set_next_def_namespace(&mut self, namespace: Option<&'xml str>) -> Result<(), Error> { - if self.next_def_namespace.is_some() { - return Err(Error::UnexpectedState); - } - - self.next_def_namespace = namespace; - Ok(()) - } - - pub fn consume_next_def_namespace(&mut self) -> Option<&'xml str> { - let ret = self.next_def_namespace; - self.next_def_namespace = None; - ret - } - - fn deserialize_element(&mut self, visitor: V) -> Result - where - V: Visitor<'xml>, - { - // Process open tag - let tag_data = match self.parser.next() { - Some(Ok(XmlRecord::Open(item))) => item, - _ => return Err(Error::UnexpectedValue), - }; - - if tag_data.default_namespace != self.consume_next_def_namespace() { - return Err(Error::WrongNamespace); - } - - match self.parser.next() { - Some(Ok(XmlRecord::Element(v))) => { - let ret = visitor.visit_str(v); - self.parser.next(); - ret - } - _ => Err(Error::UnexpectedValue), - } - } - - fn deserialize_attribute(&mut self, visitor: V) -> Result - where - V: Visitor<'xml>, - { - match self.tag_attributes.pop() { - Some((_, value)) => visitor.visit_str(value), - None => Err(Error::UnexpectedEndOfStream), - } - } - - fn check_close_tag(&mut self, name: &str) -> Result<(), Error> { - let item = match self.parser.next() { - Some(item) => item?, - None => return Err(Error::MissingTag), - }; - - match item { - XmlRecord::Close(v) if v == name => Ok(()), - _ => Err(Error::UnexpectedTag), - } - } -} - pub trait FromXmlOwned: for<'xml> FromXml<'xml> {} #[derive(Debug, Error, PartialEq, Eq)] diff --git a/instant-xml/src/parse.rs b/instant-xml/src/parse.rs deleted file mode 100644 index 53d030b..0000000 --- a/instant-xml/src/parse.rs +++ /dev/null @@ -1,134 +0,0 @@ -use std::collections::HashMap; -use std::iter::Peekable; - -use xmlparser::{ElementEnd, Token, Tokenizer}; - -use crate::Error; -pub use crate::{TagData, XmlRecord}; - -pub struct XmlParser<'xml> { - stack: Vec<&'xml str>, - iter: Peekable>, -} - -impl<'a> XmlParser<'a> { - pub fn new(input: &'a str) -> XmlParser<'a> { - XmlParser { - stack: Vec::new(), - iter: Tokenizer::from(input).peekable(), - } - } - - pub fn peek_next_tag(&mut self) -> Result>, Error> { - let item = match self.iter.peek() { - Some(v) => v, - None => return Ok(None), - }; - - match item { - Ok(Token::ElementStart { prefix, local, .. }) => { - let prefix = match prefix.is_empty() { - true => None, - false => Some(prefix.as_str()), - }; - - Ok(Some(XmlRecord::Open(TagData { - key: local.as_str(), - attributes: Vec::new(), - default_namespace: Some(""), - namespaces: HashMap::new(), - prefix, - }))) - } - Ok(Token::ElementEnd { - end: ElementEnd::Close(..), - .. - }) => { - if self.stack.is_empty() { - return Err(Error::UnexpectedEndOfStream); - } - - return Ok(Some(XmlRecord::Close(self.stack.last().unwrap()))); - } - Ok(_) => Err(Error::UnexpectedToken), - Err(e) => Err(Error::Parse(*e)), - } - } -} - -impl<'xml> Iterator for XmlParser<'xml> { - type Item = Result, Error>; - - #[inline] - fn next(&mut self) -> Option { - let mut key: Option<&str> = None; - let mut prefix_ret: Option<&str> = None; - let mut default_namespace = None; - let mut namespaces = HashMap::new(); - let mut attributes = Vec::new(); - - loop { - let item = match self.iter.next() { - Some(v) => v, - None => return None, - }; - - match item { - Ok(Token::ElementStart { prefix, local, .. }) => { - key = Some(local.as_str()); - prefix_ret = match prefix.is_empty() { - true => None, - false => Some(prefix.as_str()), - }; - } - Ok(Token::ElementEnd { end, .. }) => match end { - ElementEnd::Open => { - self.stack.push(key.unwrap()); - - return Some(Ok(XmlRecord::Open(TagData { - key: key.unwrap(), - attributes, - default_namespace, - namespaces, - prefix: prefix_ret, - }))); - } - ElementEnd::Close(_, v) => match self.stack.pop() { - Some(last) if last == v.as_str() => { - return Some(Ok(XmlRecord::Close(last))); - } - _ => return Some(Err(Error::UnexpectedValue)), - }, - ElementEnd::Empty => { - todo!(); - } - }, - Ok(Token::Attribute { - prefix, - local, - value, - .. - }) => { - if prefix.is_empty() && local.as_str() == "xmlns" { - // Default namespace - default_namespace = Some(value.as_str()); - } else if prefix.as_str() == "xmlns" { - // Namespaces - namespaces.insert(local.as_str(), value.as_str()); - } else if prefix.is_empty() { - // Other attributes - attributes.push((local.as_str(), value.as_str())); - } else { - // TODO: Can the attributes have the prefix? - todo!(); - } - } - Ok(Token::Text { text }) => { - return Some(Ok(XmlRecord::Element(text.as_str()))); - } - Ok(_) => return Some(Err(Error::UnexpectedToken)), - Err(e) => return Some(Err(Error::Parse(e))), - } - } - } -}