From 8b6e8c6b22a115bbfc3567d8c1e40fd6b93028b4 Mon Sep 17 00:00:00 2001 From: Mingun Date: Wed, 16 Oct 2024 00:13:12 +0500 Subject: [PATCH] Do not allow serialization of consequent primitives in `$value` fields and top-level --- Changelog.md | 3 ++ src/de/mod.rs | 5 +- src/se/content.rs | 124 ++++++++++++++++++++++++++++++++-------------- src/se/element.rs | 95 +++++++++++++++++++++++++---------- src/se/mod.rs | 12 ++++- 5 files changed, 170 insertions(+), 69 deletions(-) diff --git a/Changelog.md b/Changelog.md index a0bfeeeb..b9a3ed4f 100644 --- a/Changelog.md +++ b/Changelog.md @@ -30,12 +30,15 @@ - [#811]: Narrow down error return type from `Error` where only one variant is ever returned: attribute related methods on `BytesStart` and `BytesDecl` returns `AttrError` - [#820]: Classify output of the `Serializer` by returning an enumeration with kind of written data +- [#823]: Do not allow serialization of consequent primitives, for example `Vec` or + `Vec` in `$value` fields. They cannot be deserialized back with the same result [#227]: https://github.com/tafia/quick-xml/issues/227 [#655]: https://github.com/tafia/quick-xml/issues/655 [#810]: https://github.com/tafia/quick-xml/pull/810 [#811]: https://github.com/tafia/quick-xml/pull/811 [#820]: https://github.com/tafia/quick-xml/pull/820 +[#823]: https://github.com/tafia/quick-xml/pull/823 ## 0.36.2 -- 2024-09-20 diff --git a/src/de/mod.rs b/src/de/mod.rs index e150e78e..3a181609 100644 --- a/src/de/mod.rs +++ b/src/de/mod.rs @@ -1568,9 +1568,8 @@ //! } //! //! let obj = AnyName { field: vec![1, 2, 3] }; -//! let xml = to_string(&obj).unwrap(); -//! // Note, that types that does not produce their own tag are written as is! -//! assert_eq!(xml, "123"); +//! // If this object were serialized, it would be represented as "123" +//! to_string(&obj).unwrap_err(); //! //! let object: AnyName = from_str("123").unwrap(); //! assert_eq!(object, AnyName { field: vec![123] }); diff --git a/src/se/content.rs b/src/se/content.rs index c07a2d9d..46c6170f 100644 --- a/src/se/content.rs +++ b/src/se/content.rs @@ -14,7 +14,7 @@ macro_rules! write_primitive { ($method:ident ( $ty:ty )) => { #[inline] fn $method(self, value: $ty) -> Result { - self.into_simple_type_serializer().$method(value)?; + self.into_simple_type_serializer()?.$method(value)?; Ok(WriteResult::Text) } }; @@ -71,16 +71,23 @@ pub struct ContentSerializer<'w, 'i, W: Write> { /// If `true`, then current indent will be written before writing the content, /// but only if content is not empty. This flag is reset after writing indent. pub write_indent: bool, + /// If `true`, then primitive types that serializes to a text content without + /// surrounding tag will be allowed, otherwise the [`SeError::Unsupported`] + /// will be returned. + /// + /// This method protects from the situation when two consequent values serialized + /// as a text that makes it impossible to distinguish between them during + /// deserialization. Instead of ambiguous serialization the error is returned. + pub allow_primitive: bool, // If `true`, then empty elements will be serialized as `` // instead of ``. pub expand_empty_elements: bool, - //TODO: add settings to disallow consequent serialization of primitives } impl<'w, 'i, W: Write> ContentSerializer<'w, 'i, W> { /// Turns this serializer into serializer of a text content #[inline] - pub fn into_simple_type_serializer(self) -> SimpleTypeSerializer<&'w mut W> { + pub fn into_simple_type_serializer_impl(self) -> SimpleTypeSerializer<&'w mut W> { //TODO: Customization point: choose between CDATA and Text representation SimpleTypeSerializer { writer: self.writer, @@ -89,15 +96,27 @@ impl<'w, 'i, W: Write> ContentSerializer<'w, 'i, W> { } } + /// Turns this serializer into serializer of a text content if that is allowed, + /// otherwise error is returned + #[inline] + pub fn into_simple_type_serializer(self) -> Result, SeError> { + if self.allow_primitive { + Ok(self.into_simple_type_serializer_impl()) + } else { + Err(SeError::Unsupported("consequent primitives would be serialized without delimiter and cannot be deserialized back".into())) + } + } + /// Creates new serializer that shares state with this serializer and /// writes to the same underlying writer #[inline] - pub fn new_seq_element_serializer(&mut self) -> ContentSerializer { + pub fn new_seq_element_serializer(&mut self, allow_primitive: bool) -> ContentSerializer { ContentSerializer { writer: self.writer, level: self.level, indent: self.indent.borrow(), write_indent: self.write_indent, + allow_primitive, expand_empty_elements: self.expand_empty_elements, } } @@ -134,7 +153,7 @@ impl<'w, 'i, W: Write> ContentSerializer<'w, 'i, W> { self.writer.write_str(name.0)?; self.writer.write_char('>')?; - let writer = serialize(self.into_simple_type_serializer())?; + let writer = serialize(self.into_simple_type_serializer_impl())?; writer.write_str(" Serializer for ContentSerializer<'w, 'i, W> { #[inline] fn serialize_char(self, value: char) -> Result { - self.into_simple_type_serializer().serialize_char(value)?; + self.into_simple_type_serializer()?.serialize_char(value)?; Ok(WriteResult::SensitiveText) } #[inline] fn serialize_str(self, value: &str) -> Result { if !value.is_empty() { - self.into_simple_type_serializer().serialize_str(value)?; + self.into_simple_type_serializer()?.serialize_str(value)?; } Ok(WriteResult::SensitiveText) } @@ -259,7 +278,7 @@ impl<'w, 'i, W: Write> Serializer for ContentSerializer<'w, 'i, W> { value: &T, ) -> Result { if variant == TEXT_KEY { - value.serialize(self.into_simple_type_serializer())?; + value.serialize(self.into_simple_type_serializer()?)?; Ok(WriteResult::SensitiveText) } else { value.serialize(ElementSerializer { @@ -311,7 +330,7 @@ impl<'w, 'i, W: Write> Serializer for ContentSerializer<'w, 'i, W> { len: usize, ) -> Result { if variant == TEXT_KEY { - self.into_simple_type_serializer() + self.into_simple_type_serializer()? .serialize_tuple_struct(name, len) .map(Tuple::Text) } else { @@ -390,7 +409,7 @@ impl<'w, 'i, W: Write> SerializeSeq for Seq<'w, 'i, W> { where T: ?Sized + Serialize, { - self.last = value.serialize(self.ser.new_seq_element_serializer())?; + self.last = value.serialize(self.ser.new_seq_element_serializer(self.last.is_text()))?; // Write indent for next element if indents are used self.ser.write_indent = self.last.allow_indent(); Ok(()) @@ -576,6 +595,7 @@ pub(super) mod tests { level: QuoteLevel::Full, indent: Indent::None, write_indent: false, + allow_primitive: true, expand_empty_elements: false, }; @@ -598,6 +618,7 @@ pub(super) mod tests { level: QuoteLevel::Full, indent: Indent::None, write_indent: false, + allow_primitive: true, expand_empty_elements: false, }; @@ -671,15 +692,13 @@ pub(super) mod tests { serialize_as!(enum_newtype: Enum::Newtype(42) => "42"); // Note that sequences of primitives serialized without delimiters! - serialize_as!(seq: vec![1, 2, 3] => "123", Text); + err!(seq: vec![1, 2, 3] + => Unsupported("consequent primitives would be serialized without delimiter and cannot be deserialized back")); serialize_as!(seq_empty: Vec::::new() => "", SensitiveNothing); - serialize_as!(tuple: ("<\"&'>", "with\t\r\n spaces", 3usize) - => "<"&'>\ - with\t\r\n spaces\ - 3", Text); - serialize_as!(tuple_struct: Tuple("first", 42) - => "first\ - 42", Text); + err!(tuple: ("<\"&'>", "with\t\r\n spaces", 3usize) + => Unsupported("consequent primitives would be serialized without delimiter and cannot be deserialized back")); + err!(tuple_struct: Tuple("first", 42) + => Unsupported("consequent primitives would be serialized without delimiter and cannot be deserialized back")); serialize_as!(enum_tuple: Enum::Tuple("first", 42) => "first\ 42"); @@ -933,13 +952,28 @@ pub(super) mod tests { value!(enum_newtype: Enum::Newtype(42) => "42"); // Note that sequences of primitives serialized without delimiters! - value!(seq: vec![1, 2, 3] => "123"); + err!(seq: + SpecialEnum::Value { + before: "answer", + content: vec![1, 2, 3], + after: "answer", + } + => Unsupported("consequent primitives would be serialized without delimiter and cannot be deserialized back")); value!(seq_empty: Vec::::new() => ""); - value!(tuple: ("<\"&'>", "with\t\n\r spaces", 3usize) - => "<"&'>\ - with\t\n\r spaces\ - 3"); - value!(tuple_struct: Tuple("first", 42) => "first42"); + err!(tuple: + SpecialEnum::Value { + before: "answer", + content: ("<\"&'>", "with\t\n\r spaces", 3usize), + after: "answer", + } + => Unsupported("consequent primitives would be serialized without delimiter and cannot be deserialized back")); + err!(tuple_struct: + SpecialEnum::Value { + before: "answer", + content: Tuple("first", 42), + after: "answer", + } + => Unsupported("consequent primitives would be serialized without delimiter and cannot be deserialized back")); value!(enum_tuple: Enum::Tuple("first", 42) => "first\ 42"); @@ -1014,6 +1048,7 @@ pub(super) mod tests { level: QuoteLevel::Full, indent: Indent::Owned(Indentation::new(b' ', 2)), write_indent: false, + allow_primitive: true, expand_empty_elements: false, }; @@ -1036,6 +1071,7 @@ pub(super) mod tests { level: QuoteLevel::Full, indent: Indent::Owned(Indentation::new(b' ', 2)), write_indent: false, + allow_primitive: true, expand_empty_elements: false, }; @@ -1106,14 +1142,13 @@ pub(super) mod tests { serialize_as!(newtype: Newtype(42) => "42", Text); serialize_as!(enum_newtype: Enum::Newtype(42) => "42"); - // Note that sequences of primitives serialized without delimiters! - serialize_as!(seq: vec![1, 2, 3] => "123", Text); + err!(seq: vec![1, 2, 3] + => Unsupported("consequent primitives would be serialized without delimiter and cannot be deserialized back")); serialize_as!(seq_empty: Vec::::new() => "", SensitiveNothing); - serialize_as!(tuple: ("<\"&'>", "with\t\r\n spaces", 3usize) - => "<"&'>\ - with\t\r\n spaces\ - 3", Text); - serialize_as!(tuple_struct: Tuple("first", 42) => "first42", Text); + err!(tuple: ("<\"&'>", "with\t\r\n spaces", 3usize) + => Unsupported("consequent primitives would be serialized without delimiter and cannot be deserialized back")); + err!(tuple_struct: Tuple("first", 42) + => Unsupported("consequent primitives would be serialized without delimiter and cannot be deserialized back")); serialize_as!(enum_tuple: Enum::Tuple("first", 42) => "first\n\ 42"); @@ -1365,13 +1400,28 @@ pub(super) mod tests { value!(enum_newtype: Enum::Newtype(42) => "\n 42\n "); // Note that sequences of primitives serialized without delimiters! - value!(seq: vec![1, 2, 3] => "123"); + err!(seq: + SpecialEnum::Value { + before: "answer", + content: vec![1, 2, 3], + after: "answer", + } + => Unsupported("consequent primitives would be serialized without delimiter and cannot be deserialized back")); value!(seq_empty: Vec::::new() => ""); - value!(tuple: ("<\"&'>", "with\t\n\r spaces", 3usize) - => "<"&'>\ - with\t\n\r spaces\ - 3"); - value!(tuple_struct: Tuple("first", 42) => "first42"); + err!(tuple: + SpecialEnum::Value { + before: "answer", + content: ("<\"&'>", "with\t\n\r spaces", 3usize), + after: "answer", + } + => Unsupported("consequent primitives would be serialized without delimiter and cannot be deserialized back")); + err!(tuple_struct: + SpecialEnum::Value { + before: "answer", + content: Tuple("first", 42), + after: "answer", + } + => Unsupported("consequent primitives would be serialized without delimiter and cannot be deserialized back")); value!(enum_tuple: Enum::Tuple("first", 42) => "\n \ first\n \ diff --git a/src/se/element.rs b/src/se/element.rs index 5f9d3cf7..2f6d5f30 100644 --- a/src/se/element.rs +++ b/src/se/element.rs @@ -274,7 +274,7 @@ impl<'w, 'k, W: Write> SerializeSeq for ElementSerializer<'w, 'k, W> { T: ?Sized + Serialize, { value.serialize(ElementSerializer { - ser: self.ser.new_seq_element_serializer(), + ser: self.ser.new_seq_element_serializer(true), key: self.key, })?; // Write indent for the next element @@ -443,11 +443,12 @@ impl<'w, 'k, W: Write> Struct<'w, 'k, W> { indent: self.ser.ser.indent.borrow(), // If previous field does not require indent, do not write it write_indent: self.write_indent, + allow_primitive: true, expand_empty_elements: self.ser.ser.expand_empty_elements, }; if key == TEXT_KEY { - value.serialize(TextSerializer(ser.into_simple_type_serializer()))?; + value.serialize(TextSerializer(ser.into_simple_type_serializer()?))?; // Text was written so we don't need to indent next field self.write_indent = false; } else if key == VALUE_KEY { @@ -634,6 +635,7 @@ mod tests { level: QuoteLevel::Full, indent: Indent::None, write_indent: false, + allow_primitive: true, expand_empty_elements: false, }, key: XmlName("root"), @@ -659,6 +661,7 @@ mod tests { level: QuoteLevel::Full, indent: Indent::None, write_indent: false, + allow_primitive: true, expand_empty_elements: false, }, key: XmlName("root"), @@ -1090,13 +1093,16 @@ mod tests { value!(enum_newtype: Enum::Newtype(42) => "42"); // Note that sequences of primitives serialized without delimiters! - value!(seq: vec![1, 2, 3] => "123"); + err!(seq: + BTreeMap::from([("$value", vec![1, 2, 3])]) + => Unsupported("consequent primitives would be serialized without delimiter and cannot be deserialized back")); value!(seq_empty: Vec::::new()); - value!(tuple: ("<\"&'>", "with\t\n\r spaces", 3usize) - => "<"&'>\ - with\t\n\r spaces\ - 3"); - value!(tuple_struct: Tuple("first", 42) => "first42"); + err!(tuple: + BTreeMap::from([("$value", ("<\"&'>", "with\t\n\r spaces", 3usize))]) + => Unsupported("consequent primitives would be serialized without delimiter and cannot be deserialized back")); + err!(tuple_struct: + BTreeMap::from([("$value", Tuple("first", 42))]) + => Unsupported("consequent primitives would be serialized without delimiter and cannot be deserialized back")); value!(enum_tuple: Enum::Tuple("first", 42) => "first\ 42"); @@ -1202,13 +1208,28 @@ mod tests { value!(enum_newtype: Enum::Newtype(42) => "42"); // Note that sequences of primitives serialized without delimiters! - value!(seq: vec![1, 2, 3] => "123"); + err!(seq: + Value { + before: "answer", + content: vec![1, 2, 3], + after: "answer", + } + => Unsupported("consequent primitives would be serialized without delimiter and cannot be deserialized back")); value!(seq_empty: Vec::::new() => ""); - value!(tuple: ("<\"&'>", "with\t\n\r spaces", 3usize) - => "<"&'>\ - with\t\n\r spaces\ - 3"); - value!(tuple_struct: Tuple("first", 42) => "first42"); + err!(tuple: + Value { + before: "answer", + content: ("<\"&'>", "with\t\n\r spaces", 3usize), + after: "answer", + } + => Unsupported("consequent primitives would be serialized without delimiter and cannot be deserialized back")); + err!(tuple_struct: + Value { + before: "answer", + content: Tuple("first", 42), + after: "answer", + } + => Unsupported("consequent primitives would be serialized without delimiter and cannot be deserialized back")); value!(enum_tuple: Enum::Tuple("first", 42) => "first\ 42"); @@ -1331,6 +1352,7 @@ mod tests { level: QuoteLevel::Full, indent: Indent::Owned(Indentation::new(b' ', 2)), write_indent: false, + allow_primitive: true, expand_empty_elements: false, }, key: XmlName("root"), @@ -1356,6 +1378,7 @@ mod tests { level: QuoteLevel::Full, indent: Indent::Owned(Indentation::new(b' ', 2)), write_indent: false, + allow_primitive: true, expand_empty_elements: false, }, key: XmlName("root"), @@ -1792,13 +1815,16 @@ mod tests { value!(newtype: Newtype(42) => "42"); value!(enum_newtype: Enum::Newtype(42) => "\n 42\n"); - value!(seq: vec![1, 2, 3] => "123"); + err!(seq: + BTreeMap::from([("$value", vec![1, 2, 3])]) + => Unsupported("consequent primitives would be serialized without delimiter and cannot be deserialized back")); value!(seq_empty: Vec::::new()); - value!(tuple: ("<\"&'>", "with\t\n\r spaces", 3usize) - => "<"&'>\ - with\t\n\r spaces\ - 3"); - value!(tuple_struct: Tuple("first", 42) => "first42"); + err!(tuple: + BTreeMap::from([("$value", ("<\"&'>", "with\t\n\r spaces", 3usize))]) + => Unsupported("consequent primitives would be serialized without delimiter and cannot be deserialized back")); + err!(tuple_struct: + BTreeMap::from([("$value", Tuple("first", 42))]) + => Unsupported("consequent primitives would be serialized without delimiter and cannot be deserialized back")); value!(enum_tuple: Enum::Tuple("first", 42) => "\n \ first\n \ @@ -1906,14 +1932,28 @@ mod tests { value!(newtype: Newtype(42) => "42"); value!(enum_newtype: Enum::Newtype(42) => "\n 42\n "); - // Note that sequences of primitives serialized without delimiters! - value!(seq: vec![1, 2, 3] => "123"); + err!(seq: + Value { + before: "answer", + content: vec![1, 2, 3], + after: "answer", + } + => Unsupported("consequent primitives would be serialized without delimiter and cannot be deserialized back")); value!(seq_empty: Vec::::new() => ""); - value!(tuple: ("<\"&'>", "with\t\n\r spaces", 3usize) - => "<"&'>\ - with\t\n\r spaces\ - 3"); - value!(tuple_struct: Tuple("first", 42) => "first42"); + err!(tuple: + Value { + before: "answer", + content: ("<\"&'>", "with\t\n\r spaces", 3usize), + after: "answer", + } + => Unsupported("consequent primitives would be serialized without delimiter and cannot be deserialized back")); + err!(tuple_struct: + Value { + before: "answer", + content: Tuple("first", 42), + after: "answer", + } + => Unsupported("consequent primitives would be serialized without delimiter and cannot be deserialized back")); value!(enum_tuple: Enum::Tuple("first", 42) => "\n \ first\n \ @@ -2041,6 +2081,7 @@ mod tests { level: QuoteLevel::Full, indent: Indent::None, write_indent: false, + allow_primitive: true, expand_empty_elements: true, }, key: XmlName("root"), diff --git a/src/se/mod.rs b/src/se/mod.rs index db2f8bbb..e9eaa0fc 100644 --- a/src/se/mod.rs +++ b/src/se/mod.rs @@ -343,6 +343,12 @@ impl WriteResult { pub fn allow_indent(&self) -> bool { matches!(self, Self::Element | Self::Nothing) } + + /// Returns `true` if self is `Text` or `SensitiveText`. + #[inline] + pub fn is_text(&self) -> bool { + matches!(self, Self::Text | Self::SensitiveText) + } } //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -502,6 +508,7 @@ impl<'w, 'r, W: Write> Serializer<'w, 'r, W> { level: QuoteLevel::Partial, indent: Indent::None, write_indent: false, + allow_primitive: true, expand_empty_elements: false, }, root_tag: None, @@ -567,6 +574,7 @@ impl<'w, 'r, W: Write> Serializer<'w, 'r, W> { level: QuoteLevel::Partial, indent: Indent::None, write_indent: false, + allow_primitive: true, expand_empty_elements: false, }, root_tag: root_tag.map(|tag| XmlName::try_from(tag)).transpose()?, @@ -745,7 +753,7 @@ impl<'w, 'r, W: Write> ser::Serializer for Serializer<'w, 'r, W> { value: &T, ) -> Result { if variant == TEXT_KEY { - value.serialize(self.ser.into_simple_type_serializer())?; + value.serialize(self.ser.into_simple_type_serializer()?)?; // Do not write indent after `$text` variant because it may be interpreted as // part of content when deserialize Ok(WriteResult::SensitiveText) @@ -783,7 +791,7 @@ impl<'w, 'r, W: Write> ser::Serializer for Serializer<'w, 'r, W> { ) -> Result { if variant == TEXT_KEY { self.ser - .into_simple_type_serializer() + .into_simple_type_serializer()? .serialize_tuple_struct(name, len) .map(Tuple::Text) } else {