Skip to content

Commit

Permalink
Introduce dedicated type for processing instruction
Browse files Browse the repository at this point in the history
  • Loading branch information
Mingun committed Jun 14, 2024
1 parent 0f9444a commit 7f338af
Show file tree
Hide file tree
Showing 10 changed files with 148 additions and 31 deletions.
4 changes: 4 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@

### Misc Changes

- [#650]: Change the type of `Event::PI` to a new dedicated `BytesPI` type.

[#650]: https://github.com/tafia/quick-xml/issues/650


## 0.32.0 -- 2024-06-10

Expand Down
4 changes: 3 additions & 1 deletion fuzz/fuzz_targets/fuzz_target_1.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ where
}
Ok(Event::Text(ref e))
| Ok(Event::Comment(ref e))
| Ok(Event::PI(ref e))
| Ok(Event::DocType(ref e)) => {
debug_format!(e);
if let Err(err) = e.unescape() {
Expand All @@ -56,6 +55,9 @@ where
break;
}
}
Ok(Event::PI(ref e)) => {
debug_format!(e);
}
Ok(Event::Decl(ref e)) => {
debug_format!(e);
let _ = black_box(e.version());
Expand Down
4 changes: 2 additions & 2 deletions fuzz/fuzz_targets/structured_roundtrip.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

use arbitrary::{Arbitrary, Unstructured};
use libfuzzer_sys::fuzz_target;
use quick_xml::events::{BytesCData, BytesText, Event};
use quick_xml::events::{BytesCData, BytesPI, BytesText, Event};
use quick_xml::reader::{Config, NsReader, Reader};
use quick_xml::writer::Writer;
use std::{hint::black_box, io::Cursor};
Expand Down Expand Up @@ -71,7 +71,7 @@ fn fuzz_round_trip(driver: Driver) -> quick_xml::Result<()> {
_ = element_writer.write_cdata_content(BytesCData::new(*text))?;
}
WritePiContent(text) => {
_ = element_writer.write_pi_content(BytesText::from_escaped(*text))?;
_ = element_writer.write_pi_content(BytesPI::new(*text))?;
}
WriteEmpty => {
_ = element_writer.write_empty()?;
Expand Down
117 changes: 115 additions & 2 deletions src/events/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ use crate::escape::{
escape, minimal_escape, partial_escape, resolve_predefined_entity, unescape_with,
};
use crate::name::{LocalName, QName};
use crate::reader::is_whitespace;
use crate::reader::{is_whitespace, name_len};
use crate::utils::write_cow_string;
#[cfg(feature = "serialize")]
use crate::utils::CowRef;
Expand Down Expand Up @@ -992,6 +992,119 @@ impl<'a> arbitrary::Arbitrary<'a> for BytesCData<'a> {

////////////////////////////////////////////////////////////////////////////////////////////////////

/// [Processing instructions][PI] (PIs) allow documents to contain instructions for applications.
///
/// [PI]: https://www.w3.org/TR/xml11/#sec-pi
#[derive(Clone, Eq, PartialEq)]
pub struct BytesPI<'a> {
content: BytesStart<'a>,
}

impl<'a> BytesPI<'a> {
/// Creates a new `BytesPI` from a byte sequence in the specified encoding.
#[inline]
pub(crate) fn wrap(content: &'a [u8], target_len: usize) -> Self {
Self {
content: BytesStart::wrap(content, target_len),
}
}

/// Creates a new `BytesPI` from a string.
///
/// # Warning
///
/// `content` must not contain the `?>` sequence.
#[inline]
pub fn new<C: Into<Cow<'a, str>>>(content: C) -> Self {
let buf = str_cow_to_bytes(content);
let name_len = name_len(&buf);
Self {
content: BytesStart { buf, name_len },
}
}

/// Ensures that all data is owned to extend the object's lifetime if
/// necessary.
#[inline]
pub fn into_owned(self) -> BytesPI<'static> {
BytesPI {
content: self.content.into_owned().into(),
}
}

/// Extracts the inner `Cow` from the `BytesPI` event container.
#[inline]
pub fn into_inner(self) -> Cow<'a, [u8]> {
self.content.buf
}

/// Converts the event into a borrowed event.
#[inline]
pub fn borrow(&self) -> BytesPI {
BytesPI {
content: self.content.borrow(),
}
}

/// A target used to identify the application to which the instruction is directed.
#[inline]
pub fn target(&self) -> &[u8] {
self.content.name().0
}

/// Content of the processing instruction. Contains everything between target
/// name and the end of the instruction. A direct consequence is that the first
/// character is always a space character.
#[inline]
pub fn content(&self) -> &[u8] {
self.content.attributes_raw()
}

/// A view of the processing instructions' content as a list of key-value pairs.
///
/// Key-value pairs are used in some processing instructions, for example in
/// `<?xml-stylesheet?>`.
///
/// Returned iterator does not validate attribute values as may required by
/// target's rules. For example, it doesn't check that substring `?>` is not
/// present in the attribute value. That shouldn't be the problem when event
/// is produced by the reader, because reader detects end of processing instruction
/// by the first `?>` sequence, as required by the specification, and therefore
/// this sequence cannot appear inside it.
#[inline]
pub fn attributes(&self) -> Attributes {
self.content.attributes()
}
}

impl<'a> Debug for BytesPI<'a> {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
write!(f, "BytesPI {{ content: ")?;
write_cow_string(f, &self.content.buf)?;
write!(f, " }}")
}
}

impl<'a> Deref for BytesPI<'a> {
type Target = [u8];

fn deref(&self) -> &[u8] {
&self.content
}
}

#[cfg(feature = "arbitrary")]
impl<'a> arbitrary::Arbitrary<'a> for BytesPI<'a> {
fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
Ok(Self::new(<&str>::arbitrary(u)?))
}
fn size_hint(depth: usize) -> (usize, Option<usize>) {
return <&str as arbitrary::Arbitrary>::size_hint(depth);
}
}

////////////////////////////////////////////////////////////////////////////////////////////////////

/// Event emitted by [`Reader::read_event_into`].
///
/// [`Reader::read_event_into`]: crate::reader::Reader::read_event_into
Expand All @@ -1013,7 +1126,7 @@ pub enum Event<'a> {
/// XML declaration `<?xml ...?>`.
Decl(BytesDecl<'a>),
/// Processing instruction `<?...?>`.
PI(BytesText<'a>),
PI(BytesPI<'a>),
/// Document type definition data (DTD) stored in `<!DOCTYPE ...>`.
DocType(BytesText<'a>),
/// End of XML document.
Expand Down
8 changes: 4 additions & 4 deletions src/reader/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1697,7 +1697,7 @@ mod test {

/// Ensures, that no empty `Text` events are generated
mod $read_event {
use crate::events::{BytesCData, BytesDecl, BytesEnd, BytesStart, BytesText, Event};
use crate::events::{BytesCData, BytesDecl, BytesEnd, BytesPI, BytesStart, BytesText, Event};
use crate::reader::Reader;
use pretty_assertions::assert_eq;

Expand Down Expand Up @@ -1767,7 +1767,7 @@ mod test {

assert_eq!(
reader.$read_event($buf) $(.$await)? .unwrap(),
Event::PI(BytesText::from_escaped("xml-stylesheet '? >\" "))
Event::PI(BytesPI::new("xml-stylesheet '? >\" "))
);
}

Expand Down Expand Up @@ -1848,7 +1848,7 @@ mod test {
$(, $async:ident, $await:ident)?
) => {
mod small_buffers {
use crate::events::{BytesCData, BytesDecl, BytesStart, BytesText, Event};
use crate::events::{BytesCData, BytesDecl, BytesPI, BytesStart, BytesText, Event};
use crate::reader::Reader;
use pretty_assertions::assert_eq;

Expand Down Expand Up @@ -1882,7 +1882,7 @@ mod test {

assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Event::PI(BytesText::new("pi"))
Event::PI(BytesPI::new("pi"))
);
assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Expand Down
4 changes: 2 additions & 2 deletions src/reader/state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use encoding_rs::UTF_8;

use crate::encoding::Decoder;
use crate::errors::{Error, IllFormedError, Result, SyntaxError};
use crate::events::{BytesCData, BytesDecl, BytesEnd, BytesStart, BytesText, Event};
use crate::events::{BytesCData, BytesDecl, BytesEnd, BytesPI, BytesStart, BytesText, Event};
#[cfg(feature = "encoding")]
use crate::reader::EncodingRef;
use crate::reader::{is_whitespace, name_len, BangType, Config, ParseState};
Expand Down Expand Up @@ -242,7 +242,7 @@ impl ReaderState {

Ok(Event::Decl(event))
} else {
Ok(Event::PI(BytesText::wrap(content, self.decoder())))
Ok(Event::PI(BytesPI::wrap(content, name_len(content))))
}
} else {
// <?....EOF
Expand Down
6 changes: 3 additions & 3 deletions src/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use std::result::Result as StdResult;

use crate::encoding::UTF8_BOM;
use crate::errors::{Error, Result};
use crate::events::{attributes::Attribute, BytesCData, BytesStart, BytesText, Event};
use crate::events::{attributes::Attribute, BytesCData, BytesPI, BytesStart, BytesText, Event};

#[cfg(feature = "async-tokio")]
mod async_tokio;
Expand Down Expand Up @@ -551,10 +551,10 @@ impl<'a, W: Write> ElementWriter<'a, W> {
}

/// Write a processing instruction `<?...?>` inside the current element.
pub fn write_pi_content(self, text: BytesText) -> Result<&'a mut Writer<W>> {
pub fn write_pi_content(self, pi: BytesPI) -> Result<&'a mut Writer<W>> {
self.writer
.write_event(Event::Start(self.start_tag.borrow()))?;
self.writer.write_event(Event::PI(text))?;
self.writer.write_event(Event::PI(pi))?;
self.writer
.write_event(Event::End(self.start_tag.to_end()))?;
Ok(self.writer)
Expand Down
12 changes: 5 additions & 7 deletions src/writer/async_tokio.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use std::result::Result as StdResult;
use tokio::io::{AsyncWrite, AsyncWriteExt};

use crate::errors::{Error, Result};
use crate::events::{BytesCData, BytesText, Event};
use crate::events::{BytesCData, BytesPI, BytesText, Event};
use crate::{ElementWriter, Writer};

impl<W: AsyncWrite + Unpin> Writer<W> {
Expand Down Expand Up @@ -173,7 +173,7 @@ impl<'a, W: AsyncWrite + Unpin> ElementWriter<'a, W> {
///
/// ```
/// # use quick_xml::writer::Writer;
/// # use quick_xml::events::BytesText;
/// # use quick_xml::events::BytesPI;
/// # use tokio::io::AsyncWriteExt;
/// # #[tokio::main(flavor = "current_thread")] async fn main() {
/// let mut buffer = Vec::new();
Expand All @@ -184,9 +184,7 @@ impl<'a, W: AsyncWrite + Unpin> ElementWriter<'a, W> {
/// .create_element("paired")
/// .with_attribute(("attr1", "value1"))
/// .with_attribute(("attr2", "value2"))
/// // NOTE: We cannot use BytesText::new here, because it escapes strings,
/// // but processing instruction content should not be escaped
/// .write_pi_content_async(BytesText::from_escaped(r#"xml-stylesheet href="style.css""#))
/// .write_pi_content_async(BytesPI::new(r#"xml-stylesheet href="style.css""#))
/// .await
/// .expect("cannot write content");
///
Expand All @@ -199,7 +197,7 @@ impl<'a, W: AsyncWrite + Unpin> ElementWriter<'a, W> {
/// </paired>"#
/// );
/// # }
pub async fn write_pi_content_async(self, text: BytesText<'_>) -> Result<&'a mut Writer<W>> {
pub async fn write_pi_content_async(self, text: BytesPI<'_>) -> Result<&'a mut Writer<W>> {
self.writer
.write_event_async(Event::Start(self.start_tag.borrow()))
.await?;
Expand Down Expand Up @@ -363,7 +361,7 @@ mod tests {

test!(
pi,
Event::PI(BytesText::new("this is a processing instruction")),
Event::PI(BytesPI::new("this is a processing instruction")),
r#"<?this is a processing instruction?>"#
);

Expand Down
14 changes: 7 additions & 7 deletions tests/reader-config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
//! Please keep tests sorted (exceptions are allowed if options are tightly related).
use quick_xml::errors::{Error, IllFormedError};
use quick_xml::events::{BytesCData, BytesEnd, BytesStart, BytesText, Event};
use quick_xml::events::{BytesCData, BytesEnd, BytesPI, BytesStart, BytesText, Event};
use quick_xml::reader::Reader;

mod check_comments {
Expand Down Expand Up @@ -520,7 +520,7 @@ mod trim_text {

assert_eq!(
reader.read_event().unwrap(),
Event::PI(BytesText::new("pi \t\r\n"))
Event::PI(BytesPI::new("pi \t\r\n"))
);
assert_eq!(
reader.read_event().unwrap(),
Expand Down Expand Up @@ -570,7 +570,7 @@ mod trim_text {
);
assert_eq!(
reader.read_event().unwrap(),
Event::PI(BytesText::new("pi \t\r\n"))
Event::PI(BytesPI::new("pi \t\r\n"))
);
assert_eq!(
reader.read_event().unwrap(),
Expand Down Expand Up @@ -641,7 +641,7 @@ mod trim_text_start {

assert_eq!(
reader.read_event().unwrap(),
Event::PI(BytesText::new("pi \t\r\n"))
Event::PI(BytesPI::new("pi \t\r\n"))
);
assert_eq!(
reader.read_event().unwrap(),
Expand Down Expand Up @@ -691,7 +691,7 @@ mod trim_text_start {
);
assert_eq!(
reader.read_event().unwrap(),
Event::PI(BytesText::new("pi \t\r\n"))
Event::PI(BytesPI::new("pi \t\r\n"))
);
assert_eq!(
reader.read_event().unwrap(),
Expand Down Expand Up @@ -762,7 +762,7 @@ mod trim_text_end {

assert_eq!(
reader.read_event().unwrap(),
Event::PI(BytesText::new("pi \t\r\n"))
Event::PI(BytesPI::new("pi \t\r\n"))
);
assert_eq!(
reader.read_event().unwrap(),
Expand Down Expand Up @@ -814,7 +814,7 @@ mod trim_text_end {
);
assert_eq!(
reader.read_event().unwrap(),
Event::PI(BytesText::new("pi \t\r\n"))
Event::PI(BytesPI::new("pi \t\r\n"))
);
assert_eq!(
reader.read_event().unwrap(),
Expand Down
6 changes: 3 additions & 3 deletions tests/reader-errors.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
//! Contains tests that produces errors during parsing XML.
use quick_xml::errors::{Error, SyntaxError};
use quick_xml::events::{BytesCData, BytesDecl, BytesEnd, BytesStart, BytesText, Event};
use quick_xml::events::{BytesCData, BytesDecl, BytesEnd, BytesPI, BytesStart, BytesText, Event};
use quick_xml::reader::{NsReader, Reader};

macro_rules! ok {
Expand Down Expand Up @@ -391,8 +391,8 @@ mod syntax {

// According to the grammar, processing instruction MUST contain a non-empty
// target name, but we do not consider this as a _syntax_ error.
ok!(normal_empty("<??>") => Event::PI(BytesText::new("")));
ok!(normal_xmlx("<?xmlx?>") => Event::PI(BytesText::new("xmlx")));
ok!(normal_empty("<??>") => Event::PI(BytesPI::new("")));
ok!(normal_xmlx("<?xmlx?>") => Event::PI(BytesPI::new("xmlx")));
}

/// https://www.w3.org/TR/xml11/#NT-prolog
Expand Down

0 comments on commit 7f338af

Please sign in to comment.