Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[work-in-progress] Decoding BufReader implementation #441

Draft
wants to merge 8 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ include = ["src/*", "LICENSE-MIT.md", "README.md"]
[dependencies]
document-features = { version = "0.2", optional = true }
encoding_rs = { version = "0.8", optional = true }
encoding_rs_io = { version = "0.1", optional = true }
serde = { version = "1.0.100", optional = true }
tokio = { version = "1.10", optional = true, default-features = false, features = ["io-util"] }
memchr = "2.1"
Expand Down Expand Up @@ -90,7 +91,7 @@ async-tokio = ["tokio"]
## let mut buf = Vec::new();
## let mut unsupported = false;
## loop {
## if !reader.decoder().encoding().is_ascii_compatible() {
## if !reader.encoding().is_ascii_compatible() {
## unsupported = true;
## break;
## }
Expand All @@ -109,7 +110,7 @@ async-tokio = ["tokio"]
## [UTF-16LE]: encoding_rs::UTF_16LE
## [ISO-2022-JP]: encoding_rs::ISO_2022_JP
## [#158]: https://github.com/tafia/quick-xml/issues/158
encoding = ["encoding_rs"]
encoding = ["encoding_rs", "encoding_rs_io"]

## Enables support for recognizing all [HTML 5 entities] in [`unescape`] and
## [`unescape_with`] functions. The full list of entities also can be found in
Expand Down
4 changes: 3 additions & 1 deletion Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,9 @@

### Misc Changes

- [#441]: `Reader::decoder()` removed as it is no longer necessary (`Reader` already
decodes everything for you). `Reader::encoding()` is provided to make the current
encoding accessible as it was before.
- [#481]: Removed the uses of `const fn` added in version 0.24 in favor of a lower minimum
supported Rust version (1.46.0). Minimum supported Rust version is now verified in the CI.
- [#489]: Reduced the size of the package uploaded into the crates.io by excluding
Expand Down Expand Up @@ -415,7 +418,6 @@
- [#416]: `BytesStart::to_borrowed` renamed to `BytesStart::borrow`, the same method
added to all events

- [#421]: `decode_and_unescape*` methods now does one less allocation if unescaping is not required
dralley marked this conversation as resolved.
Show resolved Hide resolved
- [#421]: Removed ability to deserialize byte arrays from serde deserializer.
XML is not able to store binary data directly, you should always use some encoding
scheme, for example, HEX or Base64
Expand Down
8 changes: 4 additions & 4 deletions benches/macrobenches.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ fn parse_document_from_str(doc: &str) -> XmlResult<()> {
match criterion::black_box(r.read_event()?) {
Event::Start(e) | Event::Empty(e) => {
for attr in e.attributes() {
criterion::black_box(attr?.decode_and_unescape_value(&r)?);
criterion::black_box(attr?.unescape_value()?);
}
}
Event::Text(e) => {
Expand All @@ -75,7 +75,7 @@ fn parse_document_from_bytes(doc: &[u8]) -> XmlResult<()> {
match criterion::black_box(r.read_event_into(&mut buf)?) {
Event::Start(e) | Event::Empty(e) => {
for attr in e.attributes() {
criterion::black_box(attr?.decode_and_unescape_value(&r)?);
criterion::black_box(attr?.unescape_value()?);
}
}
Event::Text(e) => {
Expand All @@ -101,7 +101,7 @@ fn parse_document_from_str_with_namespaces(doc: &str) -> XmlResult<()> {
(resolved_ns, Event::Start(e) | Event::Empty(e)) => {
criterion::black_box(resolved_ns);
for attr in e.attributes() {
criterion::black_box(attr?.decode_and_unescape_value(&r)?);
criterion::black_box(attr?.unescape_value()?);
}
}
(resolved_ns, Event::Text(e)) => {
Expand Down Expand Up @@ -129,7 +129,7 @@ fn parse_document_from_bytes_with_namespaces(doc: &[u8]) -> XmlResult<()> {
(resolved_ns, Event::Start(e) | Event::Empty(e)) => {
criterion::black_box(resolved_ns);
for attr in e.attributes() {
criterion::black_box(attr?.decode_and_unescape_value(&r)?);
criterion::black_box(attr?.unescape_value()?);
}
}
(resolved_ns, Event::Text(e)) => {
Expand Down
2 changes: 1 addition & 1 deletion benches/microbenches.rs
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ fn attributes(c: &mut Criterion) {
let mut count = criterion::black_box(0);
loop {
match r.read_event() {
Ok(Event::Empty(e)) if e.name() == QName(b"player") => {
Ok(Event::Empty(e)) if e.name() == QName("player") => {
for name in ["num", "status", "avg"] {
if let Some(_attr) = e.try_get_attribute(name).unwrap() {
count += 1
Expand Down
10 changes: 5 additions & 5 deletions examples/custom_entities.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,20 +33,20 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
loop {
match reader.read_event() {
Ok(Event::DocType(ref e)) => {
for cap in entity_re.captures_iter(e) {
for cap in entity_re.captures_iter(e.as_bytes()) {
custom_entities.insert(
reader.decoder().decode(&cap[1])?.into_owned(),
reader.decoder().decode(&cap[2])?.into_owned(),
String::from_utf8(cap[1].to_owned())?,
String::from_utf8(cap[2].to_owned())?,
);
}
}
Ok(Event::Start(ref e)) => {
if let b"test" = e.name().as_ref() {
if let "test" = e.name().as_ref() {
let attributes = e
.attributes()
.map(|a| {
a.unwrap()
.decode_and_unescape_value_with(&reader, |ent| {
.unescape_value_with(|ent| {
custom_entities.get(ent).map(|s| s.as_str())
})
.unwrap()
Expand Down
13 changes: 5 additions & 8 deletions examples/nested_readers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ fn main() -> Result<(), quick_xml::Error> {
loop {
match reader.read_event_into(&mut buf)? {
Event::Start(element) => {
if let b"w:tbl" = element.name().as_ref() {
if let "w:tbl" = element.name().as_ref() {
count += 1;
let mut stats = TableStat {
index: count,
Expand All @@ -35,20 +35,17 @@ fn main() -> Result<(), quick_xml::Error> {
skip_buf.clear();
match reader.read_event_into(&mut skip_buf)? {
Event::Start(element) => match element.name().as_ref() {
b"w:tr" => {
"w:tr" => {
stats.rows.push(vec![]);
row_index = stats.rows.len() - 1;
}
b"w:tc" => {
stats.rows[row_index].push(
String::from_utf8(element.name().as_ref().to_vec())
.unwrap(),
);
"w:tc" => {
stats.rows[row_index].push(element.name().as_ref().to_owned());
}
_ => {}
},
Event::End(element) => {
if element.name().as_ref() == b"w:tbl" {
if element.name().as_ref() == "w:tbl" {
found_tables.push(stats);
break;
}
Expand Down
4 changes: 1 addition & 3 deletions examples/read_buffered.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,7 @@ fn main() -> Result<(), quick_xml::Error> {
loop {
match reader.read_event_into(&mut buf) {
Ok(Event::Start(ref e)) => {
let name = e.name();
let name = reader.decoder().decode(name.as_ref())?;
println!("read start event {:?}", name.as_ref());
println!("read start event {:?}", e.name().as_ref());
count += 1;
}
Ok(Event::Eof) => break, // exits the loop when reaching end of file
Expand Down
26 changes: 8 additions & 18 deletions examples/read_nodes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ use quick_xml::name::QName;
use quick_xml::reader::Reader;
use std::borrow::Cow;
use std::collections::HashMap;
use std::convert::Infallible;
use std::str;

const XML: &str = r#"
Expand Down Expand Up @@ -47,8 +46,8 @@ impl Translation {
for attr_result in element.attributes() {
let a = attr_result?;
match a.key.as_ref() {
b"Language" => lang = a.decode_and_unescape_value(reader)?,
b"Tag" => tag = a.decode_and_unescape_value(reader)?,
"Language" => lang = Cow::Owned(a.unescape_value()?.to_string()),
"Tag" => tag = Cow::Owned(a.unescape_value()?.to_string()),
_ => (),
}
}
Expand All @@ -57,7 +56,7 @@ impl Translation {

if let Event::Start(ref e) = event {
let name = e.name();
if name == QName(b"Text") {
if name == QName("Text") {
// note: `read_text` does not support content as CDATA
let text_content = reader.read_text(e.name())?;
Ok(Translation {
Expand All @@ -67,8 +66,7 @@ impl Translation {
})
} else {
dbg!("Expected Event::Start for Text, got: {:?}", &event);
let name_string = reader.decoder().decode(name.as_ref())?;
Err(quick_xml::Error::UnexpectedToken(name_string.into()))
Err(quick_xml::Error::UnexpectedToken(name.as_ref().to_owned()))
}
} else {
let event_string = format!("{:?}", event);
Expand Down Expand Up @@ -99,24 +97,16 @@ fn main() -> Result<(), quick_xml::Error> {

match event {
Event::Start(element) => match element.name().as_ref() {
b"DefaultSettings" => {
"DefaultSettings" => {
// Note: real app would handle errors with good defaults or halt program with nice message
// This illustrates decoding an attribute's key and value with error handling
settings = element
.attributes()
.map(|attr_result| {
match attr_result {
Ok(a) => {
let key = reader.decoder().decode(a.key.local_name().as_ref())
.or_else(|err| {
dbg!("unable to read key in DefaultSettings attribute {:?}, utf8 error {:?}", &a, err);
Ok::<Cow<'_, str>, Infallible>(std::borrow::Cow::from(""))
})
.unwrap().to_string();
let value = a.decode_and_unescape_value(&reader).or_else(|err| {
dbg!("unable to read key in DefaultSettings attribute {:?}, utf8 error {:?}", &a, err);
Ok::<Cow<'_, str>, Infallible>(std::borrow::Cow::from(""))
}).unwrap().to_string();
let key = a.key.local_name().as_ref().to_string();
let value = a.unescape_value().expect("failure to unescape").to_string();
(key, value)
},
Err(err) => {
Expand All @@ -130,7 +120,7 @@ fn main() -> Result<(), quick_xml::Error> {
assert_eq!(settings["Greeting"], "HELLO");
reader.read_to_end(element.name())?;
}
b"Translation" => {
"Translation" => {
translations.push(Translation::new_from_element(&mut reader, element)?);
}
_ => (),
Expand Down
2 changes: 1 addition & 1 deletion examples/read_texts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ fn main() {

loop {
match reader.read_event() {
Ok(Event::Start(e)) if e.name().as_ref() == b"tag2" => {
Ok(Event::Start(e)) if e.name().as_ref() == "tag2" => {
// read_text_into for buffered readers not implemented
let txt = reader
.read_text(e.name())
Expand Down
4 changes: 2 additions & 2 deletions src/de/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1931,7 +1931,7 @@ pub use crate::errors::serialize::DeError;
pub use resolver::{EntityResolver, NoEntityResolver};

use crate::{
encoding::Decoder,
encoding::Utf8BytesReader,
errors::Error,
events::{BytesCData, BytesEnd, BytesStart, BytesText, Event},
name::QName,
Expand Down Expand Up @@ -2677,7 +2677,7 @@ where
}
}

impl<'de, R> Deserializer<'de, IoReader<R>>
impl<'de, R> Deserializer<'de, IoReader<Utf8BytesReader<R>>>
where
R: BufRead,
{
Expand Down
Loading