Skip to content

Commit

Permalink
Add benchmarks for escaping / unescaping text
Browse files Browse the repository at this point in the history
closes tafia#404
  • Loading branch information
dralley committed Jun 29, 2022
1 parent 7770b76 commit b9bed6f
Show file tree
Hide file tree
Showing 4 changed files with 154 additions and 15 deletions.
2 changes: 2 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@
- [#393]: Added more tests for namespace resolver
- [#393]: Added tests for reserved names (started with "xml"i) -- see <https://www.w3.org/TR/xml-names11/#xmlReserved>
- [#363]: Add tests for `Reader::read_event_buffered` to ensure that proper events generated for corresponding inputs
- [#407]: Improved benchmark suite to cover whole-document parsing, escaping and unescaping text

[#8]: https://github.com/Mingun/fast-xml/pull/8
[#9]: https://github.com/Mingun/fast-xml/pull/9
Expand All @@ -115,6 +116,7 @@
[#393]: https://github.com/tafia/quick-xml/pull/393
[#395]: https://github.com/tafia/quick-xml/pull/395
[#403]: https://github.com/tafia/quick-xml/pull/403
[#407]: https://github.com/tafia/quick-xml/pull/407

## 0.23.0 -- 2022-05-08

Expand Down
11 changes: 4 additions & 7 deletions benches/macrobenches.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,13 @@ fn parse_document(doc: &[u8]) -> XmlResult<()> {
for attr in e.attributes() {
criterion::black_box(attr?.unescaped_value()?);
}
},
}
Event::Text(e) => {
criterion::black_box(e.unescaped()?);
},
}
Event::CData(e) => {
criterion::black_box(e.into_inner());
},
}
Event::End(_) => (),
Event::Eof => break,
_ => (),
Expand Down Expand Up @@ -131,7 +131,4 @@ pub fn bench_fully_parse_document(c: &mut Criterion) {
group.finish();
}

criterion_group!(
benches,
bench_fully_parse_document,
);
criterion_group!(benches, bench_fully_parse_document,);
153 changes: 146 additions & 7 deletions benches/microbenches.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,28 @@
use criterion::{self, criterion_group, Criterion};
use pretty_assertions::assert_eq;
use quick_xml::escape::{escape, unescape};
use quick_xml::events::Event;
use quick_xml::name::QName;
use quick_xml::Reader;

static SAMPLE: &[u8] = include_bytes!("../tests/documents/sample_rss.xml");
static PLAYERS: &[u8] = include_bytes!("../tests/documents/players.xml");

static LOREM_IPSUM_TEXT: &[u8] =
b"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt
ut labore et dolore magna aliqua. Hac habitasse platea dictumst vestibulum rhoncus est pellentesque.
Risus ultricies tristique nulla aliquet enim tortor at. Fermentum odio eu feugiat pretium nibh ipsum.
Volutpat sed cras ornare arcu dui. Scelerisque fermentum dui faucibus in ornare quam. Arcu cursus
euismod quis viverra nibh cras pulvinar mattis. Sed viverra tellus in hac habitasse platea. Quis
commodo odio aenean sed. Cursus in hac habitasse platea dictumst quisque sagittis purus.
Neque convallis a cras semper auctor. Sit amet mauris commodo quis imperdiet massa. Ac ut consequat
semper viverra nam libero justo laoreet sit. Adipiscing commodo elit at imperdiet dui accumsan.
Enim lobortis scelerisque fermentum dui faucibus in ornare. Natoque penatibus et magnis dis parturient
montes nascetur ridiculus mus. At lectus urna duis convallis convallis tellus id interdum. Libero
volutpat sed cras ornare arcu dui vivamus arcu. Cursus in hac habitasse platea dictumst quisque sagittis
purus. Consequat id porta nibh venenatis cras sed felis.";

/// Benchmarks the `Reader::read_event` function with all XML well-formless
/// checks disabled (with and without trimming content of #text nodes)
fn read_event(c: &mut Criterion) {
Expand All @@ -25,7 +41,10 @@ fn read_event(c: &mut Criterion) {
}
buf.clear();
}
assert_eq!(count, 1550, "Overall tag count in ./tests/documents/sample_rss.xml");
assert_eq!(
count, 1550,
"Overall tag count in ./tests/documents/sample_rss.xml"
);
})
});

Expand All @@ -45,7 +64,10 @@ fn read_event(c: &mut Criterion) {
}
buf.clear();
}
assert_eq!(count, 1550, "Overall tag count in ./tests/documents/sample_rss.xml");
assert_eq!(
count, 1550,
"Overall tag count in ./tests/documents/sample_rss.xml"
);
});
});
group.finish();
Expand All @@ -70,7 +92,10 @@ fn read_namespaced_event(c: &mut Criterion) {
}
buf.clear();
}
assert_eq!(count, 1550, "Overall tag count in ./tests/documents/sample_rss.xml");
assert_eq!(
count, 1550,
"Overall tag count in ./tests/documents/sample_rss.xml"
);
});
});

Expand All @@ -91,7 +116,10 @@ fn read_namespaced_event(c: &mut Criterion) {
}
buf.clear();
}
assert_eq!(count, 1550, "Overall tag count in ./tests/documents/sample_rss.xml");
assert_eq!(
count, 1550,
"Overall tag count in ./tests/documents/sample_rss.xml"
);
});
});
group.finish();
Expand All @@ -117,7 +145,10 @@ fn bytes_text_unescaped(c: &mut Criterion) {
}
buf.clear();
}
assert_eq!(count, 1550, "Overall tag count in ./tests/documents/sample_rss.xml");
assert_eq!(
count, 1550,
"Overall tag count in ./tests/documents/sample_rss.xml"
);

// Windows has \r\n instead of \n
#[cfg(windows)]
Expand Down Expand Up @@ -152,7 +183,10 @@ fn bytes_text_unescaped(c: &mut Criterion) {
}
buf.clear();
}
assert_eq!(count, 1550, "Overall tag count in ./tests/documents/sample_rss.xml");
assert_eq!(
count, 1550,
"Overall tag count in ./tests/documents/sample_rss.xml"
);

// Windows has \r\n instead of \n
#[cfg(windows)]
Expand Down Expand Up @@ -333,11 +367,116 @@ fn attributes(c: &mut Criterion) {
group.finish();
}

/// Benchmarks escaping text using XML rules
fn escaping(c: &mut Criterion) {
let mut group = c.benchmark_group("escape_text");

group.bench_function("no_chars_to_escape_long", |b| {
b.iter(|| {
criterion::black_box(escape(LOREM_IPSUM_TEXT));
})
});

group.bench_function("no_chars_to_escape_short", |b| {
b.iter(|| {
criterion::black_box(escape(b"just bit of text"));
})
});

group.bench_function("escaped_chars_short", |b| {
b.iter(|| {
criterion::black_box(escape(b"age > 72 && age < 21"));
criterion::black_box(escape(b"\"what's that?\""));
})
});

group.bench_function("escaped_chars_long", |b| {
let lorem_ipsum_with_escape_chars =
b"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt
ut labore et dolore magna aliqua. & Hac habitasse platea dictumst vestibulum rhoncus est pellentesque.
Risus ultricies tristique nulla aliquet enim tortor at. Fermentum odio eu feugiat pretium nibh ipsum.
Volutpat sed cras ornare arcu dui. Scelerisque fermentum dui faucibus in ornare quam. Arcu cursus
euismod quis< viverra nibh cras pulvinar mattis. Sed viverra tellus in hac habitasse platea. Quis
commodo odio aenean sed. Cursus in hac habitasse platea dictumst quisque sagittis purus.
Neque convallis >a cras semper auctor. Sit amet mauris commodo quis imperdiet massa. Ac ut consequat
semper viverra nam libero justo laoreet sit. 'Adipiscing' commodo elit at imperdiet dui accumsan.
Enim lobortis scelerisque fermentum dui faucibus in ornare. Natoque penatibus et magnis dis parturient
montes nascetur ridiculus mus. At lectus urna duis convallis convallis tellus id interdum. Libero
volutpat sed cras ornare arcu dui vivamus arcu. Cursus in hac habitasse platea dictumst quisque sagittis
purus. Consequat id porta nibh venenatis cras sed felis.";

b.iter(|| {
criterion::black_box(escape(lorem_ipsum_with_escape_chars));
})
});
group.finish();
}

/// Benchmarks unescaping text encoded using XML rules
fn unescaping(c: &mut Criterion) {
let mut group = c.benchmark_group("unescape_text");

group.bench_function("no_chars_to_unescape_long", |b| {
b.iter(|| {
criterion::black_box(unescape(LOREM_IPSUM_TEXT)).unwrap();
})
});

group.bench_function("no_chars_to_unescape_short", |b| {
b.iter(|| {
criterion::black_box(unescape(b"just a bit of text")).unwrap();
})
});

group.bench_function("char_reference", |b| {
b.iter(|| {
let text = b"prefix &#34;some stuff&#34;,&#x22;more stuff&#x22;";
criterion::black_box(unescape(text)).unwrap();
let text = b"&#38;&#60;";
criterion::black_box(unescape(text)).unwrap();
})
});

group.bench_function("entity_reference", |b| {
b.iter(|| {
let text = b"age &gt; 72 &amp;&amp; age &lt; 21";
criterion::black_box(unescape(text)).unwrap();
let text = b"&quot;what&apos;s that?&quot;";
criterion::black_box(unescape(text)).unwrap();
})
});

group.bench_function("mixed", |b| {
let text =
b"Lorem ipsum dolor sit amet, &amp;consectetur adipiscing elit, sed do eiusmod tempor incididunt
ut labore et dolore magna aliqua. Hac habitasse platea dictumst vestibulum rhoncus est pellentesque.
Risus ultricies &quot;tristique nulla aliquet enim tortor&quot; at. Fermentum odio eu feugiat pretium
nibh ipsum. Volutpat sed cras ornare arcu dui. Scelerisque fermentum dui faucibus in ornare quam. Arcu
cursus euismod quis &#60;viverra nibh cras pulvinar mattis. Sed viverra tellus in hac habitasse platea.
Quis commodo odio aenean sed. Cursus in hac habitasse platea dictumst quisque sagittis purus.
Neque convallis a cras semper auctor. Sit amet mauris commodo quis imperdiet massa. Ac ut consequat
semper viverra nam libero justo &#35; laoreet sit. Adipiscing commodo elit at imperdiet dui accumsan.
Enim lobortis scelerisque fermentum dui faucibus in ornare. Natoque penatibus et magnis dis parturient
montes nascetur ridiculus mus. At lectus urna &#33;duis convallis convallis tellus id interdum. Libero
volutpat sed cras ornare arcu dui vivamus arcu. Cursus in hac habitasse platea dictumst quisque sagittis
purus. Consequat id porta nibh venenatis cras sed felis.";

b.iter(|| {
criterion::black_box(unescape(text)).unwrap();
})
});
group.finish();
}

criterion_group!(
benches,
read_event,
bytes_text_unescaped,
read_namespaced_event,
one_event,
attributes
attributes,
escaping,
unescaping,
);
3 changes: 2 additions & 1 deletion tests/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,8 @@ fn line_score() {
inning: String,
}

let res: LineScoreData = quick_xml::de::from_str(include_str!("documents/linescore.xml")).unwrap();
let res: LineScoreData =
quick_xml::de::from_str(include_str!("documents/linescore.xml")).unwrap();

let expected = LineScoreData {
game_pk: 239575,
Expand Down

0 comments on commit b9bed6f

Please sign in to comment.