From 7f338afa2394175bae8f0e42cbe6b256e4d16021 Mon Sep 17 00:00:00 2001 From: Mingun Date: Sun, 19 Nov 2023 01:24:44 +0500 Subject: [PATCH] Introduce dedicated type for processing instruction --- Changelog.md | 4 + fuzz/fuzz_targets/fuzz_target_1.rs | 4 +- fuzz/fuzz_targets/structured_roundtrip.rs | 4 +- src/events/mod.rs | 117 +++++++++++++++++++++- src/reader/mod.rs | 8 +- src/reader/state.rs | 4 +- src/writer.rs | 6 +- src/writer/async_tokio.rs | 12 +-- tests/reader-config.rs | 14 +-- tests/reader-errors.rs | 6 +- 10 files changed, 148 insertions(+), 31 deletions(-) diff --git a/Changelog.md b/Changelog.md index 7b91ec59..0ba5a368 100644 --- a/Changelog.md +++ b/Changelog.md @@ -16,6 +16,10 @@ ### Misc Changes +- [#650]: Change the type of `Event::PI` to a new dedicated `BytesPI` type. + +[#650]: https://github.com/tafia/quick-xml/issues/650 + ## 0.32.0 -- 2024-06-10 diff --git a/fuzz/fuzz_targets/fuzz_target_1.rs b/fuzz/fuzz_targets/fuzz_target_1.rs index bfac5e13..9809526f 100644 --- a/fuzz/fuzz_targets/fuzz_target_1.rs +++ b/fuzz/fuzz_targets/fuzz_target_1.rs @@ -41,7 +41,6 @@ where } Ok(Event::Text(ref e)) | Ok(Event::Comment(ref e)) - | Ok(Event::PI(ref e)) | Ok(Event::DocType(ref e)) => { debug_format!(e); if let Err(err) = e.unescape() { @@ -56,6 +55,9 @@ where break; } } + Ok(Event::PI(ref e)) => { + debug_format!(e); + } Ok(Event::Decl(ref e)) => { debug_format!(e); let _ = black_box(e.version()); diff --git a/fuzz/fuzz_targets/structured_roundtrip.rs b/fuzz/fuzz_targets/structured_roundtrip.rs index f0cc6ee9..2eb6962e 100644 --- a/fuzz/fuzz_targets/structured_roundtrip.rs +++ b/fuzz/fuzz_targets/structured_roundtrip.rs @@ -2,7 +2,7 @@ use arbitrary::{Arbitrary, Unstructured}; use libfuzzer_sys::fuzz_target; -use quick_xml::events::{BytesCData, BytesText, Event}; +use quick_xml::events::{BytesCData, BytesPI, BytesText, Event}; use quick_xml::reader::{Config, NsReader, Reader}; use quick_xml::writer::Writer; use std::{hint::black_box, io::Cursor}; @@ -71,7 +71,7 @@ fn fuzz_round_trip(driver: Driver) -> quick_xml::Result<()> { _ = element_writer.write_cdata_content(BytesCData::new(*text))?; } WritePiContent(text) => { - _ = element_writer.write_pi_content(BytesText::from_escaped(*text))?; + _ = element_writer.write_pi_content(BytesPI::new(*text))?; } WriteEmpty => { _ = element_writer.write_empty()?; diff --git a/src/events/mod.rs b/src/events/mod.rs index 25304152..08d0dd7d 100644 --- a/src/events/mod.rs +++ b/src/events/mod.rs @@ -50,7 +50,7 @@ use crate::escape::{ escape, minimal_escape, partial_escape, resolve_predefined_entity, unescape_with, }; use crate::name::{LocalName, QName}; -use crate::reader::is_whitespace; +use crate::reader::{is_whitespace, name_len}; use crate::utils::write_cow_string; #[cfg(feature = "serialize")] use crate::utils::CowRef; @@ -992,6 +992,119 @@ impl<'a> arbitrary::Arbitrary<'a> for BytesCData<'a> { //////////////////////////////////////////////////////////////////////////////////////////////////// +/// [Processing instructions][PI] (PIs) allow documents to contain instructions for applications. +/// +/// [PI]: https://www.w3.org/TR/xml11/#sec-pi +#[derive(Clone, Eq, PartialEq)] +pub struct BytesPI<'a> { + content: BytesStart<'a>, +} + +impl<'a> BytesPI<'a> { + /// Creates a new `BytesPI` from a byte sequence in the specified encoding. + #[inline] + pub(crate) fn wrap(content: &'a [u8], target_len: usize) -> Self { + Self { + content: BytesStart::wrap(content, target_len), + } + } + + /// Creates a new `BytesPI` from a string. + /// + /// # Warning + /// + /// `content` must not contain the `?>` sequence. + #[inline] + pub fn new>>(content: C) -> Self { + let buf = str_cow_to_bytes(content); + let name_len = name_len(&buf); + Self { + content: BytesStart { buf, name_len }, + } + } + + /// Ensures that all data is owned to extend the object's lifetime if + /// necessary. + #[inline] + pub fn into_owned(self) -> BytesPI<'static> { + BytesPI { + content: self.content.into_owned().into(), + } + } + + /// Extracts the inner `Cow` from the `BytesPI` event container. + #[inline] + pub fn into_inner(self) -> Cow<'a, [u8]> { + self.content.buf + } + + /// Converts the event into a borrowed event. + #[inline] + pub fn borrow(&self) -> BytesPI { + BytesPI { + content: self.content.borrow(), + } + } + + /// A target used to identify the application to which the instruction is directed. + #[inline] + pub fn target(&self) -> &[u8] { + self.content.name().0 + } + + /// Content of the processing instruction. Contains everything between target + /// name and the end of the instruction. A direct consequence is that the first + /// character is always a space character. + #[inline] + pub fn content(&self) -> &[u8] { + self.content.attributes_raw() + } + + /// A view of the processing instructions' content as a list of key-value pairs. + /// + /// Key-value pairs are used in some processing instructions, for example in + /// ``. + /// + /// Returned iterator does not validate attribute values as may required by + /// target's rules. For example, it doesn't check that substring `?>` is not + /// present in the attribute value. That shouldn't be the problem when event + /// is produced by the reader, because reader detects end of processing instruction + /// by the first `?>` sequence, as required by the specification, and therefore + /// this sequence cannot appear inside it. + #[inline] + pub fn attributes(&self) -> Attributes { + self.content.attributes() + } +} + +impl<'a> Debug for BytesPI<'a> { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "BytesPI {{ content: ")?; + write_cow_string(f, &self.content.buf)?; + write!(f, " }}") + } +} + +impl<'a> Deref for BytesPI<'a> { + type Target = [u8]; + + fn deref(&self) -> &[u8] { + &self.content + } +} + +#[cfg(feature = "arbitrary")] +impl<'a> arbitrary::Arbitrary<'a> for BytesPI<'a> { + fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result { + Ok(Self::new(<&str>::arbitrary(u)?)) + } + fn size_hint(depth: usize) -> (usize, Option) { + return <&str as arbitrary::Arbitrary>::size_hint(depth); + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + /// Event emitted by [`Reader::read_event_into`]. /// /// [`Reader::read_event_into`]: crate::reader::Reader::read_event_into @@ -1013,7 +1126,7 @@ pub enum Event<'a> { /// XML declaration ``. Decl(BytesDecl<'a>), /// Processing instruction ``. - PI(BytesText<'a>), + PI(BytesPI<'a>), /// Document type definition data (DTD) stored in ``. DocType(BytesText<'a>), /// End of XML document. diff --git a/src/reader/mod.rs b/src/reader/mod.rs index 4cab549b..29df75cb 100644 --- a/src/reader/mod.rs +++ b/src/reader/mod.rs @@ -1697,7 +1697,7 @@ mod test { /// Ensures, that no empty `Text` events are generated mod $read_event { - use crate::events::{BytesCData, BytesDecl, BytesEnd, BytesStart, BytesText, Event}; + use crate::events::{BytesCData, BytesDecl, BytesEnd, BytesPI, BytesStart, BytesText, Event}; use crate::reader::Reader; use pretty_assertions::assert_eq; @@ -1767,7 +1767,7 @@ mod test { assert_eq!( reader.$read_event($buf) $(.$await)? .unwrap(), - Event::PI(BytesText::from_escaped("xml-stylesheet '? >\" ")) + Event::PI(BytesPI::new("xml-stylesheet '? >\" ")) ); } @@ -1848,7 +1848,7 @@ mod test { $(, $async:ident, $await:ident)? ) => { mod small_buffers { - use crate::events::{BytesCData, BytesDecl, BytesStart, BytesText, Event}; + use crate::events::{BytesCData, BytesDecl, BytesPI, BytesStart, BytesText, Event}; use crate::reader::Reader; use pretty_assertions::assert_eq; @@ -1882,7 +1882,7 @@ mod test { assert_eq!( reader.$read_event(&mut buf) $(.$await)? .unwrap(), - Event::PI(BytesText::new("pi")) + Event::PI(BytesPI::new("pi")) ); assert_eq!( reader.$read_event(&mut buf) $(.$await)? .unwrap(), diff --git a/src/reader/state.rs b/src/reader/state.rs index ca6a5840..ffe4814a 100644 --- a/src/reader/state.rs +++ b/src/reader/state.rs @@ -3,7 +3,7 @@ use encoding_rs::UTF_8; use crate::encoding::Decoder; use crate::errors::{Error, IllFormedError, Result, SyntaxError}; -use crate::events::{BytesCData, BytesDecl, BytesEnd, BytesStart, BytesText, Event}; +use crate::events::{BytesCData, BytesDecl, BytesEnd, BytesPI, BytesStart, BytesText, Event}; #[cfg(feature = "encoding")] use crate::reader::EncodingRef; use crate::reader::{is_whitespace, name_len, BangType, Config, ParseState}; @@ -242,7 +242,7 @@ impl ReaderState { Ok(Event::Decl(event)) } else { - Ok(Event::PI(BytesText::wrap(content, self.decoder()))) + Ok(Event::PI(BytesPI::wrap(content, name_len(content)))) } } else { // ElementWriter<'a, W> { } /// Write a processing instruction `` inside the current element. - pub fn write_pi_content(self, text: BytesText) -> Result<&'a mut Writer> { + pub fn write_pi_content(self, pi: BytesPI) -> Result<&'a mut Writer> { self.writer .write_event(Event::Start(self.start_tag.borrow()))?; - self.writer.write_event(Event::PI(text))?; + self.writer.write_event(Event::PI(pi))?; self.writer .write_event(Event::End(self.start_tag.to_end()))?; Ok(self.writer) diff --git a/src/writer/async_tokio.rs b/src/writer/async_tokio.rs index 390ad927..dfecbe4a 100644 --- a/src/writer/async_tokio.rs +++ b/src/writer/async_tokio.rs @@ -4,7 +4,7 @@ use std::result::Result as StdResult; use tokio::io::{AsyncWrite, AsyncWriteExt}; use crate::errors::{Error, Result}; -use crate::events::{BytesCData, BytesText, Event}; +use crate::events::{BytesCData, BytesPI, BytesText, Event}; use crate::{ElementWriter, Writer}; impl Writer { @@ -173,7 +173,7 @@ impl<'a, W: AsyncWrite + Unpin> ElementWriter<'a, W> { /// /// ``` /// # use quick_xml::writer::Writer; - /// # use quick_xml::events::BytesText; + /// # use quick_xml::events::BytesPI; /// # use tokio::io::AsyncWriteExt; /// # #[tokio::main(flavor = "current_thread")] async fn main() { /// let mut buffer = Vec::new(); @@ -184,9 +184,7 @@ impl<'a, W: AsyncWrite + Unpin> ElementWriter<'a, W> { /// .create_element("paired") /// .with_attribute(("attr1", "value1")) /// .with_attribute(("attr2", "value2")) - /// // NOTE: We cannot use BytesText::new here, because it escapes strings, - /// // but processing instruction content should not be escaped - /// .write_pi_content_async(BytesText::from_escaped(r#"xml-stylesheet href="style.css""#)) + /// .write_pi_content_async(BytesPI::new(r#"xml-stylesheet href="style.css""#)) /// .await /// .expect("cannot write content"); /// @@ -199,7 +197,7 @@ impl<'a, W: AsyncWrite + Unpin> ElementWriter<'a, W> { /// "# /// ); /// # } - pub async fn write_pi_content_async(self, text: BytesText<'_>) -> Result<&'a mut Writer> { + pub async fn write_pi_content_async(self, text: BytesPI<'_>) -> Result<&'a mut Writer> { self.writer .write_event_async(Event::Start(self.start_tag.borrow())) .await?; @@ -363,7 +361,7 @@ mod tests { test!( pi, - Event::PI(BytesText::new("this is a processing instruction")), + Event::PI(BytesPI::new("this is a processing instruction")), r#""# ); diff --git a/tests/reader-config.rs b/tests/reader-config.rs index d8724670..fd968ce3 100644 --- a/tests/reader-config.rs +++ b/tests/reader-config.rs @@ -6,7 +6,7 @@ //! Please keep tests sorted (exceptions are allowed if options are tightly related). use quick_xml::errors::{Error, IllFormedError}; -use quick_xml::events::{BytesCData, BytesEnd, BytesStart, BytesText, Event}; +use quick_xml::events::{BytesCData, BytesEnd, BytesPI, BytesStart, BytesText, Event}; use quick_xml::reader::Reader; mod check_comments { @@ -520,7 +520,7 @@ mod trim_text { assert_eq!( reader.read_event().unwrap(), - Event::PI(BytesText::new("pi \t\r\n")) + Event::PI(BytesPI::new("pi \t\r\n")) ); assert_eq!( reader.read_event().unwrap(), @@ -570,7 +570,7 @@ mod trim_text { ); assert_eq!( reader.read_event().unwrap(), - Event::PI(BytesText::new("pi \t\r\n")) + Event::PI(BytesPI::new("pi \t\r\n")) ); assert_eq!( reader.read_event().unwrap(), @@ -641,7 +641,7 @@ mod trim_text_start { assert_eq!( reader.read_event().unwrap(), - Event::PI(BytesText::new("pi \t\r\n")) + Event::PI(BytesPI::new("pi \t\r\n")) ); assert_eq!( reader.read_event().unwrap(), @@ -691,7 +691,7 @@ mod trim_text_start { ); assert_eq!( reader.read_event().unwrap(), - Event::PI(BytesText::new("pi \t\r\n")) + Event::PI(BytesPI::new("pi \t\r\n")) ); assert_eq!( reader.read_event().unwrap(), @@ -762,7 +762,7 @@ mod trim_text_end { assert_eq!( reader.read_event().unwrap(), - Event::PI(BytesText::new("pi \t\r\n")) + Event::PI(BytesPI::new("pi \t\r\n")) ); assert_eq!( reader.read_event().unwrap(), @@ -814,7 +814,7 @@ mod trim_text_end { ); assert_eq!( reader.read_event().unwrap(), - Event::PI(BytesText::new("pi \t\r\n")) + Event::PI(BytesPI::new("pi \t\r\n")) ); assert_eq!( reader.read_event().unwrap(), diff --git a/tests/reader-errors.rs b/tests/reader-errors.rs index 3727d63d..24a61c32 100644 --- a/tests/reader-errors.rs +++ b/tests/reader-errors.rs @@ -1,7 +1,7 @@ //! Contains tests that produces errors during parsing XML. use quick_xml::errors::{Error, SyntaxError}; -use quick_xml::events::{BytesCData, BytesDecl, BytesEnd, BytesStart, BytesText, Event}; +use quick_xml::events::{BytesCData, BytesDecl, BytesEnd, BytesPI, BytesStart, BytesText, Event}; use quick_xml::reader::{NsReader, Reader}; macro_rules! ok { @@ -391,8 +391,8 @@ mod syntax { // According to the grammar, processing instruction MUST contain a non-empty // target name, but we do not consider this as a _syntax_ error. - ok!(normal_empty("") => Event::PI(BytesText::new(""))); - ok!(normal_xmlx("") => Event::PI(BytesText::new("xmlx"))); + ok!(normal_empty("") => Event::PI(BytesPI::new(""))); + ok!(normal_xmlx("") => Event::PI(BytesPI::new("xmlx"))); } /// https://www.w3.org/TR/xml11/#NT-prolog