From 9641055d281030ae24de5dcca9012351acac4a20 Mon Sep 17 00:00:00 2001 From: Mingun Date: Fri, 8 Sep 2023 22:33:20 +0500 Subject: [PATCH] =?UTF-8?q?=D0=9F=D0=BE=D0=BF=D1=8B=D1=82=D0=BA=D0=B8=20?= =?UTF-8?q?=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2=D0=B8=D1=82=D1=8C=20DOM=20=D1=83?= =?UTF-8?q?=D0=B7=D0=B5=D0=BB,=20=D0=B8=D0=B7=20=D0=BA=D0=BE=D1=82=D0=BE?= =?UTF-8?q?=D1=80=D0=BE=D0=B3=D0=BE=20=D0=BC=D0=BE=D0=B6=D0=BD=D0=BE=20?= =?UTF-8?q?=D0=B4=D0=B5=D1=81=D0=B5=D1=80=D0=B8=D0=B0=D0=BB=D0=B8=D0=B7?= =?UTF-8?q?=D0=BE=D0=B2=D1=8B=D0=B2=D0=B0=D1=82=D1=8C=20=D0=B4=D0=B0=D0=BD?= =?UTF-8?q?=D0=BD=D1=8B=D0=B5=20(=D1=82.=D0=B5.=20=D1=87=D1=82=D0=BE=D0=B1?= =?UTF-8?q?=D1=8B=20=D0=BE=D0=BD=20=D1=81=D0=BB=D1=83=D0=B6=D0=B8=D0=BB=20?= =?UTF-8?q?=D0=B0=D0=BD=D0=B0=D0=BB=D0=BE=D0=B3=D0=BE=D0=BC=20Content=20?= =?UTF-8?q?=D0=B4=D0=BB=D1=8F=20serde,=20=D1=82=D0=BE=D0=BB=D1=8C=D0=BA?= =?UTF-8?q?=D0=BE=20=D1=81=D0=BF=D0=B5=D1=86=D0=B8=D1=84=D0=B8=D1=87=D0=BD?= =?UTF-8?q?=D1=8B=D0=B9=20=D0=B4=D0=BB=D1=8F=20XML)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Лучше начать с того, чтобы BytesStart можно было превратить в MapAccess --- src/de/dom.rs | 425 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/de/mod.rs | 1 + src/dom.rs | 4 +- 3 files changed, 428 insertions(+), 2 deletions(-) create mode 100644 src/de/dom.rs diff --git a/src/de/dom.rs b/src/de/dom.rs new file mode 100644 index 00000000..d25df9b7 --- /dev/null +++ b/src/de/dom.rs @@ -0,0 +1,425 @@ +//! Serde deserialization support for a DOM tree. + +use super::str2bool; +use crate::de::{Text, TEXT_KEY, VALUE_KEY}; +use crate::de::key::QNameDeserializer; +use crate::de::simple_type::SimpleTypeDeserializer; +use crate::dom::{Element, Node}; +use crate::events::BytesStart; +use crate::errors::serialize::DeError; +use serde::de::{Deserializer, DeserializeSeed, MapAccess, Visitor}; +use serde::de::value::BorrowedStrDeserializer; +use serde::{forward_to_deserialize_any, serde_if_integer128}; +use std::borrow::Cow; +use std::ops::Range; +use std::vec::IntoIter; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +macro_rules! deserialize_num { + ($name:ident => $visit:ident) => { + fn $name(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + match self { + Self::Element(_) => self.deserialize_map(visitor), + Self::Text(text) => visitor.$visit(text.parse()?), + } + } + }; +} + +impl<'de> Deserializer<'de> for Node<'de> { + type Error = DeError; + + forward_to_deserialize_any! { char str string bytes byte_buf identifier } + + #[inline] + fn deserialize_any(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + match self { + Self::Element(_) => self.deserialize_map(visitor), + Self::Text(Cow::Borrowed(text)) => visitor.visit_borrowed_str(text), + Self::Text(Cow::Owned(text)) => visitor.visit_string(text), + } + } + + fn deserialize_bool(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + match self { + Self::Element(_) => self.deserialize_map(visitor), + Self::Text(text) => str2bool(&text, visitor), + } + } + + deserialize_num!(deserialize_i8 => visit_i8); + deserialize_num!(deserialize_u8 => visit_u8); + deserialize_num!(deserialize_i16 => visit_i16); + deserialize_num!(deserialize_u16 => visit_u16); + deserialize_num!(deserialize_i32 => visit_i32); + deserialize_num!(deserialize_u32 => visit_u32); + deserialize_num!(deserialize_i64 => visit_i64); + deserialize_num!(deserialize_u64 => visit_u64); + + serde_if_integer128! { + deserialize_num!(deserialize_i128 => visit_i128); + deserialize_num!(deserialize_u128 => visit_u128); + } + + deserialize_num!(deserialize_f32 => visit_f32); + deserialize_num!(deserialize_f64 => visit_f64); + + fn deserialize_option(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_some(self) + } + + #[inline] + fn deserialize_unit(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_unit() + } + + #[inline] + fn deserialize_unit_struct( + self, + _name: &'static str, + visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + visitor.visit_unit() + } + + fn deserialize_newtype_struct( + self, + _name: &'static str, + visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + visitor.visit_newtype_struct(self) + } + + fn deserialize_seq(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + match self { + Self::Element(e) => e.deserialize_seq(visitor), + Self::Text(text) => SimpleTypeDeserializer::from_text(text).deserialize_seq(visitor), + } + } + + fn deserialize_tuple(self, len: usize, visitor: V) -> Result + where + V: Visitor<'de>, + { + match self { + Self::Element(e) => e.deserialize_tuple(len, visitor), + Self::Text(text) => SimpleTypeDeserializer::from_text(text).deserialize_tuple(len, visitor), + } + } + + fn deserialize_tuple_struct( + self, + name: &'static str, + len: usize, + visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + match self { + Self::Element(e) => e.deserialize_tuple_struct(name, len, visitor), + Self::Text(text) => SimpleTypeDeserializer::from_text(text).deserialize_tuple_struct(name, len, visitor), + } + } + + #[inline] + fn deserialize_map(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + match self { + Self::Element(e) => e.deserialize_map(visitor), + Self::Text(_) => self.deserialize_str(visitor), + } + } + + fn deserialize_struct( + self, + name: &'static str, + fields: &'static [&'static str], + visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + match self { + Self::Element(e) => e.deserialize_struct(name, fields, visitor), + Self::Text(_) => self.deserialize_str(visitor), + } + } + + fn deserialize_enum( + self, + name: &'static str, + variants: &'static [&'static str], + visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + match self { + Self::Element(e) => e.deserialize_enum(name, variants, visitor), + Self::Text(text) => SimpleTypeDeserializer::from_text(text).deserialize_enum(name, variants, visitor), + } + } + + #[inline] + fn deserialize_ignored_any(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_unit() + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +#[derive(Debug, PartialEq, Eq)] +enum Value<'i> { + Unknown, + Attribute(Range), + Text(Text<'i>), + Value(Node<'i>), + Field(Element<'i>), +} + +impl<'de> Deserializer<'de> for Element<'de> { + type Error = DeError; + + #[inline] + fn deserialize_any(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_map(visitor) + } + + forward_to_deserialize_any! { + bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string + bytes byte_buf identifier + } + + fn deserialize_option(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_some(self) + } + + #[inline] + fn deserialize_unit(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_unit() + } + + fn deserialize_unit_struct( + self, + _name: &'static str, + visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + visitor.visit_unit() + } + + fn deserialize_newtype_struct( + self, + _name: &'static str, + visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + visitor.visit_newtype_struct(self) + } + + fn deserialize_seq(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + todo!() + } + + #[inline] + fn deserialize_tuple(self, _len: usize, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_seq(visitor) + } + + #[inline] + fn deserialize_tuple_struct( + self, + _name: &'static str, + _len: usize, + visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + self.deserialize_seq(visitor) + } + + #[inline] + fn deserialize_map(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_struct("", &[], visitor) + } + + fn deserialize_struct( + self, + _name: &'static str, + fields: &'static [&'static str], + visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + visitor.visit_map(ElementMapAccess::new(self, fields)) + } + + fn deserialize_enum( + self, + name: &'static str, + variants: &'static [&'static str], + visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + todo!() + } + + #[inline] + fn deserialize_ignored_any(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_unit() + } +} + +#[derive(Debug)] +struct ElementMapAccess<'de> { + attributes: BytesStart<'de>, + children: IntoIter>, + pending_value: Value<'de>, + /// If `true`, then the deserialized struct has a field with a special name: + /// [`TEXT_KEY`]. That field should be deserialized from the whole content + /// of an XML node, including tag name: + /// + /// ```xml + /// value for VALUE_KEY field + /// ``` + has_text_field: bool, + /// If `true`, then the deserialized struct has a field with a special name: + /// [`VALUE_KEY`]. That field should be deserialized from the whole content + /// of an XML node, including tag name: + /// + /// ```xml + /// value for VALUE_KEY field + /// ``` + has_value_field: bool, + /// List of field names of the struct. It is empty for maps + fields: &'static [&'static str], +} + +impl<'de> ElementMapAccess<'de> { + fn new(element: Element<'de>, fields: &'static [&'static str]) -> Self { + Self { + attributes: element.start, + children: element.children.into_iter(), + pending_value: Value::Unknown, + has_text_field: fields.contains(&TEXT_KEY), + has_value_field: fields.contains(&VALUE_KEY), + fields, + } + } +} + +impl<'de> MapAccess<'de> for ElementMapAccess<'de> { + type Error = DeError; + + fn next_key_seed>( + &mut self, + seed: K, + ) -> Result, Self::Error> { + debug_assert_eq!(self.pending_value, Value::Unknown); + + let decoder = todo!("decoder"); + match self.children.next() { + Some(Node::Text(_)) if self.has_value_field && !self.has_text_field => { + // Deserialize `key` from special attribute name which means + // that value should be taken from the text content of the + // XML node + let de = BorrowedStrDeserializer::::new(VALUE_KEY); + seed.deserialize(de).map(Some) + } + Some(Node::Text(_)) => { + // Deserialize `key` from special attribute name which means + // that value should be taken from the text content of the + // XML node + let de = BorrowedStrDeserializer::::new(TEXT_KEY); + seed.deserialize(de).map(Some) + } + /*Some(Node::Element(e)) if self.has_value_field && not_in(self.fields, e, decoder)? => { + let de = BorrowedStrDeserializer::::new(VALUE_KEY); + seed.deserialize(de).map(Some) + }*/ + Some(Node::Element(e)) => { + let de = QNameDeserializer::from_elem(e.start.raw_name(), decoder)?; + seed.deserialize(de).map(Some) + } + None => Ok(None), + } + } + + fn next_value_seed>( + &mut self, + seed: K, + ) -> Result { + match std::mem::replace(&mut self.pending_value, Value::Unknown) { + Value::Attribute(value) => seed.deserialize(SimpleTypeDeserializer::from_part( + &self.attributes.buf, + value, + true, + todo!("decoder"), + )), + Value::Text(text) => seed.deserialize(SimpleTypeDeserializer::from_text_content(text)), + Value::Value(node) => seed.deserialize(node), + Value::Field(elem) => seed.deserialize(elem), + Value::Unknown => panic!(), + } + } +} diff --git a/src/de/mod.rs b/src/de/mod.rs index cea5e95f..8aedf2da 100644 --- a/src/de/mod.rs +++ b/src/de/mod.rs @@ -1989,6 +1989,7 @@ macro_rules! deserialize_primitives { }; } +pub mod dom; mod key; mod map; mod resolver; diff --git a/src/dom.rs b/src/dom.rs index 73aa6f0b..cbcc3c53 100644 --- a/src/dom.rs +++ b/src/dom.rs @@ -58,8 +58,8 @@ enum Unprocessed<'a> { /// A struct representing a DOM Element. #[derive(Clone, Debug, PartialEq, Eq)] pub struct Element<'a> { - start: BytesStart<'a>, - children: Vec>, + pub(crate) start: BytesStart<'a>, + pub(crate) children: Vec>, } impl<'a> Element<'a> { /// Creates a DOM Element from XML text, borrowing the input.