Skip to content

Commit

Permalink
Replace BytesText::unescape and unescape_with by decode
Browse files Browse the repository at this point in the history
Text events produces by the Reader can not contain escaped data anymore,
all such data is represented by the Event::GeneralRef
  • Loading branch information
Mingun committed Oct 20, 2024
1 parent 4c1bba1 commit dbfa0bd
Show file tree
Hide file tree
Showing 15 changed files with 28 additions and 48 deletions.
2 changes: 2 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ XML specification. See the updated `custom_entities` example!
- [#826]: Removed `DeError::InvalidInt`, `DeError::InvalidFloat` and `DeError::InvalidBoolean`.
Now the responsibility for returning the error lies with the visitor of the type.
See rationale in https://github.com/serde-rs/serde/pull/2811
- [#766]: `BytesText::unescape` and `BytesText::unescape_with` replaced by `BytesText::decode`.
Now Text events does not contain escaped parts which are reported as `Event::GeneralRef`.

[#227]: https://github.com/tafia/quick-xml/issues/227
[#655]: https://github.com/tafia/quick-xml/issues/655
Expand Down
8 changes: 4 additions & 4 deletions benches/macrobenches.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ fn parse_document_from_str(doc: &str) -> XmlResult<()> {
}
}
Event::Text(e) => {
criterion::black_box(e.unescape()?);
criterion::black_box(e.decode()?);
}
Event::CData(e) => {
criterion::black_box(e.into_inner());
Expand All @@ -79,7 +79,7 @@ fn parse_document_from_bytes(doc: &[u8]) -> XmlResult<()> {
}
}
Event::Text(e) => {
criterion::black_box(e.unescape()?);
criterion::black_box(e.decode()?);
}
Event::CData(e) => {
criterion::black_box(e.into_inner());
Expand All @@ -105,7 +105,7 @@ fn parse_document_from_str_with_namespaces(doc: &str) -> XmlResult<()> {
}
}
(resolved_ns, Event::Text(e)) => {
criterion::black_box(e.unescape()?);
criterion::black_box(e.decode()?);
criterion::black_box(resolved_ns);
}
(resolved_ns, Event::CData(e)) => {
Expand Down Expand Up @@ -133,7 +133,7 @@ fn parse_document_from_bytes_with_namespaces(doc: &[u8]) -> XmlResult<()> {
}
}
(resolved_ns, Event::Text(e)) => {
criterion::black_box(e.unescape()?);
criterion::black_box(e.decode()?);
criterion::black_box(resolved_ns);
}
(resolved_ns, Event::CData(e)) => {
Expand Down
2 changes: 1 addition & 1 deletion benches/microbenches.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ fn one_event(c: &mut Criterion) {
config.trim_text(true);
config.check_end_names = false;
match r.read_event() {
Ok(Event::Comment(e)) => nbtxt += e.unescape().unwrap().len(),
Ok(Event::Comment(e)) => nbtxt += e.decode().unwrap().len(),
something_else => panic!("Did not expect {:?}", something_else),
};

Expand Down
2 changes: 1 addition & 1 deletion fuzz/fuzz_targets/fuzz_target_1.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ where
| Ok(Event::Comment(ref e))
| Ok(Event::DocType(ref e)) => {
debug_format!(e);
if let Err(err) = e.unescape() {
if let Err(err) = e.decode() {
debug_format!(err);
break;
}
Expand Down
6 changes: 2 additions & 4 deletions src/de/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2223,9 +2223,7 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
// FIXME: Actually, we should trim after decoding text, but now we trim before
e.inplace_trim_end();
}
result
.to_mut()
.push_str(&e.unescape_with(|entity| self.entity_resolver.resolve(entity))?);
result.to_mut().push_str(&e.decode()?);
}
PayloadEvent::CData(e) => result.to_mut().push_str(&e.decode()?),

Expand All @@ -2247,7 +2245,7 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
// FIXME: Actually, we should trim after decoding text, but now we trim before
continue;
}
self.drain_text(e.unescape_with(|entity| self.entity_resolver.resolve(entity))?)
self.drain_text(e.decode()?)
}
PayloadEvent::CData(e) => self.drain_text(e.decode()?),
PayloadEvent::DocType(e) => {
Expand Down
28 changes: 4 additions & 24 deletions src/events/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,7 @@ use std::str::from_utf8;

use crate::encoding::{Decoder, EncodingError};
use crate::errors::{Error, IllFormedError};
use crate::escape::{
escape, minimal_escape, parse_number, partial_escape, resolve_predefined_entity, unescape_with,
EscapeError,
};
use crate::escape::{escape, minimal_escape, parse_number, partial_escape, EscapeError};
use crate::name::{LocalName, QName};
#[cfg(feature = "serialize")]
use crate::utils::CowRef;
Expand Down Expand Up @@ -580,29 +577,12 @@ impl<'a> BytesText<'a> {
}
}

/// Decodes then unescapes the content of the event.
///
/// This will allocate if the value contains any escape sequences or in
/// non-UTF-8 encoding.
pub fn unescape(&self) -> Result<Cow<'a, str>, Error> {
self.unescape_with(resolve_predefined_entity)
}

/// Decodes then unescapes the content of the event with custom entities.
/// Decodes the content of the event.
///
/// This will allocate if the value contains any escape sequences or in
/// non-UTF-8 encoding.
pub fn unescape_with<'entity>(
&self,
resolve_entity: impl FnMut(&str) -> Option<&'entity str>,
) -> Result<Cow<'a, str>, Error> {
let decoded = self.decoder.decode_cow(&self.content)?;

match unescape_with(&decoded, resolve_entity)? {
// Because result is borrowed, no replacements was done and we can use original string
Cow::Borrowed(_) => Ok(decoded),
Cow::Owned(s) => Ok(s.into()),
}
pub fn decode(&self) -> Result<Cow<'a, str>, EncodingError> {
self.decoder.decode_cow(&self.content)
}

/// Removes leading XML whitespace bytes from text content.
Expand Down
6 changes: 3 additions & 3 deletions src/reader/async_tokio.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ impl<R: AsyncBufRead + Unpin> Reader<R> {
/// loop {
/// match reader.read_event_into_async(&mut buf).await {
/// Ok(Event::Start(_)) => count += 1,
/// Ok(Event::Text(e)) => txt.push(e.unescape().unwrap().into_owned()),
/// Ok(Event::Text(e)) => txt.push(e.decode().unwrap().into_owned()),
/// Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e),
/// Ok(Event::Eof) => break,
/// _ => (),
Expand Down Expand Up @@ -237,7 +237,7 @@ impl<R: AsyncBufRead + Unpin> NsReader<R> {
/// }
/// }
/// Event::Text(e) => {
/// txt.push(e.unescape().unwrap().into_owned())
/// txt.push(e.decode().unwrap().into_owned())
/// }
/// Event::Eof => break,
/// _ => (),
Expand Down Expand Up @@ -373,7 +373,7 @@ impl<R: AsyncBufRead + Unpin> NsReader<R> {
/// (_, Event::Start(_)) => unreachable!(),
///
/// (_, Event::Text(e)) => {
/// txt.push(e.unescape().unwrap().into_owned())
/// txt.push(e.decode().unwrap().into_owned())
/// }
/// (_, Event::Eof) => break,
/// _ => (),
Expand Down
2 changes: 1 addition & 1 deletion src/reader/buffered_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -372,7 +372,7 @@ impl<R: BufRead> Reader<R> {
/// loop {
/// match reader.read_event_into(&mut buf) {
/// Ok(Event::Start(_)) => count += 1,
/// Ok(Event::Text(e)) => txt.push(e.unescape().unwrap().into_owned()),
/// Ok(Event::Text(e)) => txt.push(e.decode().unwrap().into_owned()),
/// Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e),
/// Ok(Event::Eof) => break,
/// _ => (),
Expand Down
2 changes: 1 addition & 1 deletion src/reader/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -718,7 +718,7 @@ where
/// _ => (),
/// }
/// }
/// Ok(Event::Text(e)) => txt.push(e.unescape().unwrap().into_owned()),
/// Ok(Event::Text(e)) => txt.push(e.decode().unwrap().into_owned()),
///
/// // There are several other `Event`s we do not consider here
/// _ => (),
Expand Down
8 changes: 4 additions & 4 deletions src/reader/ns_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,7 @@ impl<R: BufRead> NsReader<R> {
/// }
/// }
/// Event::Text(e) => {
/// txt.push(e.unescape().unwrap().into_owned())
/// txt.push(e.decode().unwrap().into_owned())
/// }
/// Event::Eof => break,
/// _ => (),
Expand Down Expand Up @@ -478,7 +478,7 @@ impl<R: BufRead> NsReader<R> {
/// (_, Event::Start(_)) => unreachable!(),
///
/// (_, Event::Text(e)) => {
/// txt.push(e.unescape().unwrap().into_owned())
/// txt.push(e.decode().unwrap().into_owned())
/// }
/// (_, Event::Eof) => break,
/// _ => (),
Expand Down Expand Up @@ -664,7 +664,7 @@ impl<'i> NsReader<&'i [u8]> {
/// }
/// }
/// Event::Text(e) => {
/// txt.push(e.unescape().unwrap().into_owned())
/// txt.push(e.decode().unwrap().into_owned())
/// }
/// Event::Eof => break,
/// _ => (),
Expand Down Expand Up @@ -726,7 +726,7 @@ impl<'i> NsReader<&'i [u8]> {
/// (_, Event::Start(_)) => unreachable!(),
///
/// (_, Event::Text(e)) => {
/// txt.push(e.unescape().unwrap().into_owned())
/// txt.push(e.decode().unwrap().into_owned())
/// }
/// (_, Event::Eof) => break,
/// _ => (),
Expand Down
2 changes: 1 addition & 1 deletion src/reader/slice_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ impl<'a> Reader<&'a [u8]> {
/// loop {
/// match reader.read_event().unwrap() {
/// Event::Start(e) => count += 1,
/// Event::Text(e) => txt.push(e.unescape().unwrap().into_owned()),
/// Event::Text(e) => txt.push(e.decode().unwrap().into_owned()),
/// Event::Eof => break,
/// _ => (),
/// }
Expand Down
2 changes: 1 addition & 1 deletion tests/encodings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ fn test_koi8_r_encoding() {
loop {
match r.read_event_into(&mut buf) {
Ok(Text(e)) => {
e.unescape().unwrap();
e.decode().unwrap();
}
Ok(Eof) => break,
_ => (),
Expand Down
2 changes: 1 addition & 1 deletion tests/fuzzing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ fn fuzz_101() {
}
}
Ok(Event::Text(e)) => {
if e.unescape().is_err() {
if e.decode().is_err() {
break;
}
}
Expand Down
2 changes: 1 addition & 1 deletion tests/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ fn test_escaped_content() {
"content unexpected: expecting 'test', got '{:?}'",
from_utf8(&e)
);
match e.unescape() {
match e.decode() {
Ok(c) => assert_eq!(c, "test"),
Err(e) => panic!(
"cannot escape content at position {}: {:?}",
Expand Down
2 changes: 1 addition & 1 deletion tests/roundtrip.rs
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ fn reescape_text() {
match reader.read_event().unwrap() {
Eof => break,
Text(e) => {
let t = e.unescape().unwrap();
let t = e.decode().unwrap();
assert!(writer.write_event(Text(BytesText::new(&t))).is_ok());
}
e => assert!(writer.write_event(e).is_ok()),
Expand Down

0 comments on commit dbfa0bd

Please sign in to comment.