diff --git a/Cargo.toml b/Cargo.toml index 39efdfdf..eeca7a9d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -84,7 +84,7 @@ async-tokio = ["tokio"] ## let mut buf = Vec::new(); ## let mut unsupported = false; ## loop { -## if !reader.decoder().encoding().is_ascii_compatible() { +## if !reader.encoding().is_ascii_compatible() { ## unsupported = true; ## break; ## } diff --git a/Changelog.md b/Changelog.md index e84c625e..761f9633 100644 --- a/Changelog.md +++ b/Changelog.md @@ -16,6 +16,12 @@ ### Misc Changes +- [#441]: `Reader::decoder()` removed as it is no longer necessary (`Reader` already + decodes everything for you). `Reader::encoding()` is provided to make the current + encoding accessible as it was before. + +[#441]: https://github.com/Mingun/quick-xml/pull/441 + ## 0.24.0 -- 2022-08-28 ### New Features diff --git a/src/encoding.rs b/src/encoding.rs index 022abfa7..3a920e62 100644 --- a/src/encoding.rs +++ b/src/encoding.rs @@ -75,74 +75,6 @@ impl io::BufRead for Utf8BytesReader { } } -/// Decoder of byte slices into strings. -/// -/// If feature `encoding` is enabled, this encoding taken from the `"encoding"` -/// XML declaration or assumes UTF-8, if XML has no declaration, encoding -/// key is not defined or contains unknown encoding. -/// -/// The library supports any UTF-8 compatible encodings that crate `encoding_rs` -/// is supported. [*UTF-16 and ISO-2022-JP are not supported at the present*][utf16]. -/// -/// If feature `encoding` is disabled, the decoder is always UTF-8 decoder: -/// any XML declarations are ignored. -/// -/// [utf16]: https://github.com/tafia/quick-xml/issues/158 -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub struct Decoder { - #[cfg(feature = "encoding")] - pub(crate) encoding: &'static Encoding, -} - -impl Decoder { - pub(crate) fn utf8() -> Self { - Decoder { - #[cfg(feature = "encoding")] - encoding: UTF_8, - } - } - - #[cfg(all(test, feature = "encoding", feature = "serialize"))] - pub(crate) fn utf16() -> Self { - Decoder { encoding: UTF_16LE } - } -} - -impl Decoder { - /// Returns the `Reader`s encoding. - /// - /// This encoding will be used by [`decode`]. - /// - /// [`decode`]: Self::decode - #[cfg(feature = "encoding")] - pub const fn encoding(&self) -> &'static Encoding { - self.encoding - } - - /// ## Without `encoding` feature - /// - /// Decodes an UTF-8 slice regardless of XML declaration and ignoring BOM - /// if it is present in the `bytes`. - /// - /// ## With `encoding` feature - /// - /// Decodes specified bytes using encoding, declared in the XML, if it was - /// declared there, or UTF-8 otherwise, and ignoring BOM if it is present - /// in the `bytes`. - /// - /// ---- - /// Returns an error in case of malformed sequences in the `bytes`. - pub fn decode<'b>(&self, bytes: &'b [u8]) -> Result> { - #[cfg(not(feature = "encoding"))] - let decoded = Ok(Cow::Borrowed(std::str::from_utf8(bytes)?)); - - #[cfg(feature = "encoding")] - let decoded = decode(bytes, self.encoding); - - decoded - } -} - /// Decodes the provided bytes using the specified encoding. /// /// Returns an error in case of malformed or non-representable sequences in the `bytes`. diff --git a/src/lib.rs b/src/lib.rs index 5e8a20c2..85fbf73e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -65,7 +65,6 @@ pub mod utils; pub mod writer; // reexports -pub use crate::encoding::Decoder; #[cfg(feature = "serialize")] pub use crate::errors::serialize::DeError; pub use crate::errors::{Error, Result}; diff --git a/src/reader/buffered_reader.rs b/src/reader/buffered_reader.rs index de12f2d0..004b8de1 100644 --- a/src/reader/buffered_reader.rs +++ b/src/reader/buffered_reader.rs @@ -437,9 +437,9 @@ mod test { Reader::from_reader(b"\xFF\xFE".as_ref()); let mut buf = Vec::new(); - assert_eq!(reader.decoder().encoding(), UTF_8); + assert_eq!(reader.encoding(), UTF_8); reader.read_event_into(&mut buf).unwrap(); - assert_eq!(reader.decoder().encoding(), WINDOWS_1251); + assert_eq!(reader.encoding(), WINDOWS_1251); assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Eof); } @@ -452,12 +452,12 @@ mod test { ); let mut buf = Vec::new(); - assert_eq!(reader.decoder().encoding(), UTF_8); + assert_eq!(reader.encoding(), UTF_8); reader.read_event_into(&mut buf).unwrap(); - assert_eq!(reader.decoder().encoding(), UTF_16LE); + assert_eq!(reader.encoding(), UTF_16LE); reader.read_event_into(&mut buf).unwrap(); - assert_eq!(reader.decoder().encoding(), UTF_16LE); + assert_eq!(reader.encoding(), UTF_16LE); assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Eof); } diff --git a/src/reader/mod.rs b/src/reader/mod.rs index d9cfbaf8..d19959d6 100644 --- a/src/reader/mod.rs +++ b/src/reader/mod.rs @@ -6,7 +6,7 @@ use std::ops::Range; #[cfg(feature = "encoding")] use encoding_rs::{Encoding, UTF_8}; -use crate::encoding::{Decoder, Utf8BytesReader}; +use crate::encoding::Utf8BytesReader; use crate::errors::{Error, Result}; use crate::events::Event; use crate::reader::parser::Parser; @@ -283,8 +283,7 @@ macro_rules! read_to_end { depth -= 1; } Ok(Event::Eof) => { - let name = $self.decoder().decode($end.as_ref().as_bytes()); - return Err(Error::UnexpectedEof(format!("", name))); + return Err(Error::UnexpectedEof(format!("", $end.as_ref()))); } _ => (), } @@ -529,16 +528,17 @@ impl Reader { } } - /// Get the decoder, used to decode bytes, read by this reader, to the strings. + /// Get the encoding this reader is currently using to decode strings. /// /// If `encoding` feature is enabled, the used encoding may change after /// parsing the XML declaration, otherwise encoding is fixed to UTF-8. /// /// If `encoding` feature is enabled and no encoding is specified in declaration, /// defaults to UTF-8. + #[cfg(feature = "encoding")] #[inline] - pub const fn decoder(&self) -> Decoder { - self.parser.decoder() + pub const fn encoding(&self) -> &'static Encoding { + self.parser.encoding.encoding() } } diff --git a/src/reader/ns_reader.rs b/src/reader/ns_reader.rs index 20790038..63865516 100644 --- a/src/reader/ns_reader.rs +++ b/src/reader/ns_reader.rs @@ -780,7 +780,7 @@ impl<'i> NsReader<&'i [u8]> { /// /// ```ignore /// let span = reader.read_to_end(end)?; - /// let text = reader.decoder().decode(&reader.inner_slice[span]); + /// let text = std::str::from_utf8(&reader.inner_slice[span]); /// ``` /// /// # Examples diff --git a/src/reader/parser.rs b/src/reader/parser.rs index f4cda865..7b4eed42 100644 --- a/src/reader/parser.rs +++ b/src/reader/parser.rs @@ -1,7 +1,6 @@ #[cfg(feature = "encoding")] use encoding_rs::UTF_8; -use crate::encoding::Decoder; use crate::errors::{Error, Result}; use crate::events::{BytesCData, BytesDecl, BytesEnd, BytesStart, BytesText, Event}; #[cfg(feature = "encoding")] @@ -55,6 +54,16 @@ pub(super) struct Parser { #[cfg(feature = "encoding")] /// Reference to the encoding used to read an XML + /// + /// If feature `encoding` is enabled, this encoding is taken from the `"encoding"` + /// XML declaration or assumes UTF-8, if XML has no declaration, encoding + /// key is not defined or contains unknown encoding. + /// + /// The library supports any UTF-8 compatible encodings that crate `encoding_rs` + /// is supported. [*UTF-16 and ISO-2022-JP are not supported at the present*][utf16]. + /// + /// If feature `encoding` is disabled, the decoder is always UTF-8 decoder: + /// any XML declarations are ignored. pub encoding: EncodingRef, } @@ -226,20 +235,6 @@ impl Parser { .split_off(self.opened_starts.pop().unwrap()); Ok(Event::End(BytesEnd::new(name))) } - - /// Get the decoder, used to decode bytes, read by this reader, to the strings. - /// - /// If `encoding` feature is enabled, the used encoding may change after - /// parsing the XML declaration, otherwise encoding is fixed to UTF-8. - /// - /// If `encoding` feature is enabled and no encoding is specified in declaration, - /// defaults to UTF-8. - pub const fn decoder(&self) -> Decoder { - Decoder { - #[cfg(feature = "encoding")] - encoding: self.encoding.encoding(), - } - } } impl Default for Parser { diff --git a/src/reader/slice_reader.rs b/src/reader/slice_reader.rs index 04d1a8f6..f8b1c205 100644 --- a/src/reader/slice_reader.rs +++ b/src/reader/slice_reader.rs @@ -158,7 +158,7 @@ impl<'a> Reader<&'a [u8]> { /// /// ```ignore /// let span = reader.read_to_end(end)?; - /// let text = reader.decoder().decode(&reader.inner_slice[span]); + /// let text = std::str::from_utf8(&reader.inner_slice[span]); /// ``` /// /// # Examples @@ -211,7 +211,7 @@ impl<'a> Reader<&'a [u8]> { let buffer = self.reader; let span = self.read_to_end(end)?; - self.decoder().decode(&buffer[0..span.len()]) + Ok(Cow::Borrowed(std::str::from_utf8(&buffer[0..span.len()])?)) } } @@ -362,9 +362,9 @@ mod test { fn str_always_has_utf8() { let mut reader = Reader::from_str(""); - assert_eq!(reader.decoder().encoding(), UTF_8); + assert_eq!(reader.encoding(), UTF_8); reader.read_event().unwrap(); - assert_eq!(reader.decoder().encoding(), UTF_8); + assert_eq!(reader.encoding(), UTF_8); assert_eq!(reader.read_event().unwrap(), Event::Eof); } diff --git a/tests/encodings.rs b/tests/encodings.rs index 6e2bcf95..fe692898 100644 --- a/tests/encodings.rs +++ b/tests/encodings.rs @@ -59,7 +59,7 @@ mod detect { let mut r = Reader::from_reader( include_bytes!(concat!("documents/encoding/", $file, ".xml")).as_ref(), ); - assert_eq!(r.decoder().encoding(), UTF_8); + assert_eq!(r.encoding(), UTF_8); let mut buf = Vec::new(); loop { @@ -67,7 +67,7 @@ mod detect { Event::Eof => break, _ => {} } - assert_eq!(r.decoder().encoding(), $enc); + assert_eq!(r.encoding(), $enc); buf.clear(); $($break)? } diff --git a/tests/xmlrs_reader_tests.rs b/tests/xmlrs_reader_tests.rs index fbb769b2..918eed45 100644 --- a/tests/xmlrs_reader_tests.rs +++ b/tests/xmlrs_reader_tests.rs @@ -394,7 +394,7 @@ fn test_bytes(input: &[u8], output: &[u8], trim: bool) { loop { let line = match reader.read_resolved_event_into(&mut Vec::new()) { Ok((_, Event::Decl(e))) => { - // Declaration could change decoder + // Declaration could change encoding let version = e.version().unwrap(); let encoding = e.encoding().unwrap().unwrap(); format!("StartDocument({}, {})", version, encoding)