From 941d606c3bfd25e5b0b178df8c18d825237468f0 Mon Sep 17 00:00:00 2001 From: Daniel Alley Date: Tue, 28 Jun 2022 20:58:14 -0400 Subject: [PATCH 1/4] Move all XML documents to the same directory --- benches/bench.rs | 26 +++++++++++++------------- compare/benches/bench.rs | 16 ++++++++-------- tests/{ => documents}/linescore.xml | 0 tests/{ => documents}/players.xml | 0 tests/{ => documents}/sample_rss.xml | 0 tests/test.rs | 6 +++--- 6 files changed, 24 insertions(+), 24 deletions(-) rename tests/{ => documents}/linescore.xml (100%) rename tests/{ => documents}/players.xml (100%) rename tests/{ => documents}/sample_rss.xml (100%) diff --git a/benches/bench.rs b/benches/bench.rs index 4f4b7c2d..9732a06d 100644 --- a/benches/bench.rs +++ b/benches/bench.rs @@ -4,8 +4,8 @@ use quick_xml::events::Event; use quick_xml::name::QName; use quick_xml::Reader; -static SAMPLE: &[u8] = include_bytes!("../tests/sample_rss.xml"); -static PLAYERS: &[u8] = include_bytes!("../tests/players.xml"); +static SAMPLE: &[u8] = include_bytes!("../tests/documents/sample_rss.xml"); +static PLAYERS: &[u8] = include_bytes!("../tests/documents/players.xml"); /// Benchmarks the `Reader::read_event` function with all XML well-formless /// checks disabled (with and without trimming content of #text nodes) @@ -25,7 +25,7 @@ fn read_event(c: &mut Criterion) { } buf.clear(); } - assert_eq!(count, 1550, "Overall tag count in ./tests/sample_rss.xml"); + assert_eq!(count, 1550, "Overall tag count in ./tests/documents/sample_rss.xml"); }) }); @@ -45,7 +45,7 @@ fn read_event(c: &mut Criterion) { } buf.clear(); } - assert_eq!(count, 1550, "Overall tag count in ./tests/sample_rss.xml"); + assert_eq!(count, 1550, "Overall tag count in ./tests/documents/sample_rss.xml"); }); }); group.finish(); @@ -70,7 +70,7 @@ fn read_namespaced_event(c: &mut Criterion) { } buf.clear(); } - assert_eq!(count, 1550, "Overall tag count in ./tests/sample_rss.xml"); + assert_eq!(count, 1550, "Overall tag count in ./tests/documents/sample_rss.xml"); }); }); @@ -91,7 +91,7 @@ fn read_namespaced_event(c: &mut Criterion) { } buf.clear(); } - assert_eq!(count, 1550, "Overall tag count in ./tests/sample_rss.xml"); + assert_eq!(count, 1550, "Overall tag count in ./tests/documents/sample_rss.xml"); }); }); group.finish(); @@ -117,19 +117,19 @@ fn bytes_text_unescaped(c: &mut Criterion) { } buf.clear(); } - assert_eq!(count, 1550, "Overall tag count in ./tests/sample_rss.xml"); + assert_eq!(count, 1550, "Overall tag count in ./tests/documents/sample_rss.xml"); // Windows has \r\n instead of \n #[cfg(windows)] assert_eq!( nbtxt, 67661, - "Overall length (in bytes) of all text contents of ./tests/sample_rss.xml" + "Overall length (in bytes) of all text contents of ./tests/documents/sample_rss.xml" ); #[cfg(not(windows))] assert_eq!( nbtxt, 66277, - "Overall length (in bytes) of all text contents of ./tests/sample_rss.xml" + "Overall length (in bytes) of all text contents of ./tests/documents/sample_rss.xml" ); }); }); @@ -152,19 +152,19 @@ fn bytes_text_unescaped(c: &mut Criterion) { } buf.clear(); } - assert_eq!(count, 1550, "Overall tag count in ./tests/sample_rss.xml"); + assert_eq!(count, 1550, "Overall tag count in ./tests/documents/sample_rss.xml"); // Windows has \r\n instead of \n #[cfg(windows)] assert_eq!( nbtxt, 50334, - "Overall length (in bytes) of all text contents of ./tests/sample_rss.xml" + "Overall length (in bytes) of all text contents of ./tests/documents/sample_rss.xml" ); #[cfg(not(windows))] assert_eq!( nbtxt, 50261, - "Overall length (in bytes) of all text contents of ./tests/sample_rss.xml" + "Overall length (in bytes) of all text contents of ./tests/documents/sample_rss.xml" ); }); }); @@ -182,7 +182,7 @@ fn one_event(c: &mut Criterion) { let mut nbtxt = criterion::black_box(0); r.check_end_names(false).check_comments(false); match r.read_event(&mut buf) { - Ok(Event::StartText(e)) => nbtxt += e.unescaped().unwrap().len(), + Ok(Event::StartText(e)) => nbtxt += e.len(), something_else => panic!("Did not expect {:?}", something_else), }; diff --git a/compare/benches/bench.rs b/compare/benches/bench.rs index fbb98a72..4a398636 100644 --- a/compare/benches/bench.rs +++ b/compare/benches/bench.rs @@ -5,7 +5,7 @@ use serde::Deserialize; use serde_xml_rs; use xml::reader::{EventReader, XmlEvent}; -static SOURCE: &str = include_str!("../../tests/sample_rss.xml"); +static SOURCE: &str = include_str!("../../tests/documents/sample_rss.xml"); /// Runs benchmarks for several XML libraries using low-level API fn low_level_comparison(c: &mut Criterion) { @@ -25,7 +25,7 @@ fn low_level_comparison(c: &mut Criterion) { } buf.clear(); } - assert_eq!(count, 1550, "Overall tag count in ./tests/sample_rss.xml"); + assert_eq!(count, 1550, "Overall tag count in ./tests/documents/sample_rss.xml"); }) }); @@ -49,7 +49,7 @@ fn low_level_comparison(c: &mut Criterion) { } input = &input[consumed..]; } - assert_eq!(count, 1550, "Overall tag count in ./tests/sample_rss.xml"); + assert_eq!(count, 1550, "Overall tag count in ./tests/documents/sample_rss.xml"); }) }); @@ -68,7 +68,7 @@ fn low_level_comparison(c: &mut Criterion) { _ => (), } } - assert_eq!(count, 1550, "Overall tag count in ./tests/sample_rss.xml"); + assert_eq!(count, 1550, "Overall tag count in ./tests/documents/sample_rss.xml"); }) }); @@ -83,7 +83,7 @@ fn low_level_comparison(c: &mut Criterion) { _ => (), } } - assert_eq!(count, 1550, "Overall tag count in ./tests/sample_rss.xml"); + assert_eq!(count, 1550, "Overall tag count in ./tests/documents/sample_rss.xml"); }) }); @@ -101,7 +101,7 @@ fn low_level_comparison(c: &mut Criterion) { _ => (), } } - assert_eq!(count, 1550, "Overall tag count in ./tests/sample_rss.xml"); + assert_eq!(count, 1550, "Overall tag count in ./tests/documents/sample_rss.xml"); }) }); @@ -152,7 +152,7 @@ fn low_level_comparison(c: &mut Criterion) { assert_eq!( tok.sink.0, 1550, - "Overall tag count in ./tests/sample_rss.xml" + "Overall tag count in ./tests/documents/sample_rss.xml" ); }) }); @@ -166,7 +166,7 @@ fn low_level_comparison(c: &mut Criterion) { count += 1; } } - assert_eq!(count, 1550, "Overall tag count in ./tests/sample_rss.xml"); + assert_eq!(count, 1550, "Overall tag count in ./tests/documents/sample_rss.xml"); }) }); group.finish(); diff --git a/tests/linescore.xml b/tests/documents/linescore.xml similarity index 100% rename from tests/linescore.xml rename to tests/documents/linescore.xml diff --git a/tests/players.xml b/tests/documents/players.xml similarity index 100% rename from tests/players.xml rename to tests/documents/players.xml diff --git a/tests/sample_rss.xml b/tests/documents/sample_rss.xml similarity index 100% rename from tests/sample_rss.xml rename to tests/documents/sample_rss.xml diff --git a/tests/test.rs b/tests/test.rs index a47d3015..45cc618c 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -9,7 +9,7 @@ use pretty_assertions::assert_eq; #[test] fn test_sample() { - let src: &[u8] = include_bytes!("sample_rss.xml"); + let src: &[u8] = include_bytes!("documents/sample_rss.xml"); let mut buf = Vec::new(); let mut r = Reader::from_reader(src); let mut count = 0; @@ -264,7 +264,7 @@ fn line_score() { inning: String, } - let res: LineScoreData = quick_xml::de::from_str(include_str!("linescore.xml")).unwrap(); + let res: LineScoreData = quick_xml::de::from_str(include_str!("documents/linescore.xml")).unwrap(); let expected = LineScoreData { game_pk: 239575, @@ -376,7 +376,7 @@ fn players() { id: u32, } - let res: Game = quick_xml::de::from_str(include_str!("players.xml")).unwrap(); + let res: Game = quick_xml::de::from_str(include_str!("documents/players.xml")).unwrap(); let expected = Game { teams: vec![ From 7770b767045e004edf90361b86a82e114d8286b6 Mon Sep 17 00:00:00 2001 From: Daniel Alley Date: Tue, 28 Jun 2022 20:58:56 -0400 Subject: [PATCH 2/4] Add macrobenchmarks and new documents --- benches/bench.rs | 346 +--------------- benches/macrobenches.rs | 137 ++++++ benches/microbenches.rs | 343 +++++++++++++++ tests/README.md | 48 +++ tests/documents/libreoffice_document.fodt | 481 ++++++++++++++++++++++ tests/documents/rpm_filelists.xml | 198 +++++++++ tests/documents/rpm_other.xml | 189 +++++++++ tests/documents/rpm_primary.xml | 437 ++++++++++++++++++++ tests/documents/rpm_primary2.xml | 152 +++++++ 9 files changed, 1989 insertions(+), 342 deletions(-) create mode 100644 benches/macrobenches.rs create mode 100644 benches/microbenches.rs create mode 100644 tests/README.md create mode 100644 tests/documents/libreoffice_document.fodt create mode 100644 tests/documents/rpm_filelists.xml create mode 100644 tests/documents/rpm_other.xml create mode 100644 tests/documents/rpm_primary.xml create mode 100644 tests/documents/rpm_primary2.xml diff --git a/benches/bench.rs b/benches/bench.rs index 9732a06d..e4168cef 100644 --- a/benches/bench.rs +++ b/benches/bench.rs @@ -1,344 +1,6 @@ -use criterion::{self, criterion_group, criterion_main, Criterion}; -use pretty_assertions::assert_eq; -use quick_xml::events::Event; -use quick_xml::name::QName; -use quick_xml::Reader; +use criterion::criterion_main; -static SAMPLE: &[u8] = include_bytes!("../tests/documents/sample_rss.xml"); -static PLAYERS: &[u8] = include_bytes!("../tests/documents/players.xml"); +mod macrobenches; +mod microbenches; -/// Benchmarks the `Reader::read_event` function with all XML well-formless -/// checks disabled (with and without trimming content of #text nodes) -fn read_event(c: &mut Criterion) { - let mut group = c.benchmark_group("read_event"); - group.bench_function("trim_text = false", |b| { - b.iter(|| { - let mut r = Reader::from_reader(SAMPLE); - r.check_end_names(false).check_comments(false); - let mut count = criterion::black_box(0); - let mut buf = Vec::new(); - loop { - match r.read_event(&mut buf) { - Ok(Event::Start(_)) | Ok(Event::Empty(_)) => count += 1, - Ok(Event::Eof) => break, - _ => (), - } - buf.clear(); - } - assert_eq!(count, 1550, "Overall tag count in ./tests/documents/sample_rss.xml"); - }) - }); - - group.bench_function("trim_text = true", |b| { - b.iter(|| { - let mut r = Reader::from_reader(SAMPLE); - r.check_end_names(false) - .check_comments(false) - .trim_text(true); - let mut count = criterion::black_box(0); - let mut buf = Vec::new(); - loop { - match r.read_event(&mut buf) { - Ok(Event::Start(_)) | Ok(Event::Empty(_)) => count += 1, - Ok(Event::Eof) => break, - _ => (), - } - buf.clear(); - } - assert_eq!(count, 1550, "Overall tag count in ./tests/documents/sample_rss.xml"); - }); - }); - group.finish(); -} - -/// Benchmarks the `Reader::read_namespaced_event` function with all XML well-formless -/// checks disabled (with and without trimming content of #text nodes) -fn read_namespaced_event(c: &mut Criterion) { - let mut group = c.benchmark_group("read_namespaced_event"); - group.bench_function("trim_text = false", |b| { - b.iter(|| { - let mut r = Reader::from_reader(SAMPLE); - r.check_end_names(false).check_comments(false); - let mut count = criterion::black_box(0); - let mut buf = Vec::new(); - let mut ns_buf = Vec::new(); - loop { - match r.read_namespaced_event(&mut buf, &mut ns_buf) { - Ok((_, Event::Start(_))) | Ok((_, Event::Empty(_))) => count += 1, - Ok((_, Event::Eof)) => break, - _ => (), - } - buf.clear(); - } - assert_eq!(count, 1550, "Overall tag count in ./tests/documents/sample_rss.xml"); - }); - }); - - group.bench_function("trim_text = true", |b| { - b.iter(|| { - let mut r = Reader::from_reader(SAMPLE); - r.check_end_names(false) - .check_comments(false) - .trim_text(true); - let mut count = criterion::black_box(0); - let mut buf = Vec::new(); - let mut ns_buf = Vec::new(); - loop { - match r.read_namespaced_event(&mut buf, &mut ns_buf) { - Ok((_, Event::Start(_))) | Ok((_, Event::Empty(_))) => count += 1, - Ok((_, Event::Eof)) => break, - _ => (), - } - buf.clear(); - } - assert_eq!(count, 1550, "Overall tag count in ./tests/documents/sample_rss.xml"); - }); - }); - group.finish(); -} - -/// Benchmarks the `BytesText::unescaped()` method (includes time of `read_event` -/// benchmark) -fn bytes_text_unescaped(c: &mut Criterion) { - let mut group = c.benchmark_group("BytesText::unescaped"); - group.bench_function("trim_text = false", |b| { - b.iter(|| { - let mut buf = Vec::new(); - let mut r = Reader::from_reader(SAMPLE); - r.check_end_names(false).check_comments(false); - let mut count = criterion::black_box(0); - let mut nbtxt = criterion::black_box(0); - loop { - match r.read_event(&mut buf) { - Ok(Event::Start(_)) | Ok(Event::Empty(_)) => count += 1, - Ok(Event::Text(ref e)) => nbtxt += e.unescaped().unwrap().len(), - Ok(Event::Eof) => break, - _ => (), - } - buf.clear(); - } - assert_eq!(count, 1550, "Overall tag count in ./tests/documents/sample_rss.xml"); - - // Windows has \r\n instead of \n - #[cfg(windows)] - assert_eq!( - nbtxt, 67661, - "Overall length (in bytes) of all text contents of ./tests/documents/sample_rss.xml" - ); - - #[cfg(not(windows))] - assert_eq!( - nbtxt, 66277, - "Overall length (in bytes) of all text contents of ./tests/documents/sample_rss.xml" - ); - }); - }); - - group.bench_function("trim_text = true", |b| { - b.iter(|| { - let mut buf = Vec::new(); - let mut r = Reader::from_reader(SAMPLE); - r.check_end_names(false) - .check_comments(false) - .trim_text(true); - let mut count = criterion::black_box(0); - let mut nbtxt = criterion::black_box(0); - loop { - match r.read_event(&mut buf) { - Ok(Event::Start(_)) | Ok(Event::Empty(_)) => count += 1, - Ok(Event::Text(ref e)) => nbtxt += e.unescaped().unwrap().len(), - Ok(Event::Eof) => break, - _ => (), - } - buf.clear(); - } - assert_eq!(count, 1550, "Overall tag count in ./tests/documents/sample_rss.xml"); - - // Windows has \r\n instead of \n - #[cfg(windows)] - assert_eq!( - nbtxt, 50334, - "Overall length (in bytes) of all text contents of ./tests/documents/sample_rss.xml" - ); - - #[cfg(not(windows))] - assert_eq!( - nbtxt, 50261, - "Overall length (in bytes) of all text contents of ./tests/documents/sample_rss.xml" - ); - }); - }); - group.finish(); -} - -/// Benchmarks, how fast individual event parsed -fn one_event(c: &mut Criterion) { - let mut group = c.benchmark_group("One event"); - group.bench_function("StartText", |b| { - let src = "Hello world!".repeat(512 / 12).into_bytes(); - let mut buf = Vec::with_capacity(1024); - b.iter(|| { - let mut r = Reader::from_reader(src.as_ref()); - let mut nbtxt = criterion::black_box(0); - r.check_end_names(false).check_comments(false); - match r.read_event(&mut buf) { - Ok(Event::StartText(e)) => nbtxt += e.len(), - something_else => panic!("Did not expect {:?}", something_else), - }; - - buf.clear(); - - assert_eq!(nbtxt, 504); - }) - }); - - group.bench_function("Start", |b| { - let src = format!(r#""#, "world".repeat(512 / 5)).into_bytes(); - let mut buf = Vec::with_capacity(1024); - b.iter(|| { - let mut r = Reader::from_reader(src.as_ref()); - let mut nbtxt = criterion::black_box(0); - r.check_end_names(false) - .check_comments(false) - .trim_text(true); - match r.read_event(&mut buf) { - Ok(Event::Start(ref e)) => nbtxt += e.len(), - something_else => panic!("Did not expect {:?}", something_else), - }; - - buf.clear(); - - assert_eq!(nbtxt, 525); - }) - }); - - group.bench_function("Comment", |b| { - let src = format!(r#""#, "world".repeat(512 / 5)).into_bytes(); - let mut buf = Vec::with_capacity(1024); - b.iter(|| { - let mut r = Reader::from_reader(src.as_ref()); - let mut nbtxt = criterion::black_box(0); - r.check_end_names(false) - .check_comments(false) - .trim_text(true); - match r.read_event(&mut buf) { - Ok(Event::Comment(ref e)) => nbtxt += e.unescaped().unwrap().len(), - something_else => panic!("Did not expect {:?}", something_else), - }; - - buf.clear(); - - assert_eq!(nbtxt, 520); - }) - }); - - group.bench_function("CData", |b| { - let src = format!(r#""#, "world".repeat(512 / 5)).into_bytes(); - let mut buf = Vec::with_capacity(1024); - b.iter(|| { - let mut r = Reader::from_reader(src.as_ref()); - let mut nbtxt = criterion::black_box(0); - r.check_end_names(false) - .check_comments(false) - .trim_text(true); - match r.read_event(&mut buf) { - Ok(Event::CData(ref e)) => nbtxt += e.len(), - something_else => panic!("Did not expect {:?}", something_else), - }; - - buf.clear(); - - assert_eq!(nbtxt, 518); - }) - }); - group.finish(); -} - -/// Benchmarks parsing attributes from events -fn attributes(c: &mut Criterion) { - let mut group = c.benchmark_group("attributes"); - group.bench_function("with_checks = true", |b| { - b.iter(|| { - let mut r = Reader::from_reader(PLAYERS); - r.check_end_names(false).check_comments(false); - let mut count = criterion::black_box(0); - let mut buf = Vec::new(); - loop { - match r.read_event(&mut buf) { - Ok(Event::Empty(e)) => { - for attr in e.attributes() { - let _attr = attr.unwrap(); - count += 1 - } - } - Ok(Event::Eof) => break, - _ => (), - } - buf.clear(); - } - assert_eq!(count, 1041); - }) - }); - - group.bench_function("with_checks = false", |b| { - b.iter(|| { - let mut r = Reader::from_reader(PLAYERS); - r.check_end_names(false).check_comments(false); - let mut count = criterion::black_box(0); - let mut buf = Vec::new(); - loop { - match r.read_event(&mut buf) { - Ok(Event::Empty(e)) => { - for attr in e.attributes().with_checks(false) { - let _attr = attr.unwrap(); - count += 1 - } - } - Ok(Event::Eof) => break, - _ => (), - } - buf.clear(); - } - assert_eq!(count, 1041); - }) - }); - - group.bench_function("try_get_attribute", |b| { - b.iter(|| { - let mut r = Reader::from_reader(PLAYERS); - r.check_end_names(false).check_comments(false); - let mut count = criterion::black_box(0); - let mut buf = Vec::new(); - loop { - match r.read_event(&mut buf) { - Ok(Event::Empty(e)) if e.name() == QName(b"player") => { - for name in ["num", "status", "avg"] { - if let Some(_attr) = e.try_get_attribute(name).unwrap() { - count += 1 - } - } - assert!(e - .try_get_attribute("attribute-that-doesn't-exist") - .unwrap() - .is_none()); - } - Ok(Event::Eof) => break, - _ => (), - } - buf.clear(); - } - assert_eq!(count, 150); - }) - }); - group.finish(); -} - -criterion_group!( - benches, - read_event, - bytes_text_unescaped, - read_namespaced_event, - one_event, - attributes -); -criterion_main!(benches); +criterion_main!(macrobenches::benches, microbenches::benches); diff --git a/benches/macrobenches.rs b/benches/macrobenches.rs new file mode 100644 index 00000000..876fc7ae --- /dev/null +++ b/benches/macrobenches.rs @@ -0,0 +1,137 @@ +use criterion::{self, criterion_group, Criterion}; +use quick_xml::events::Event; +use quick_xml::Reader; +use quick_xml::Result as XmlResult; + +static RPM_PRIMARY: &[u8] = include_bytes!("../tests/documents/rpm_primary.xml"); +static RPM_PRIMARY2: &[u8] = include_bytes!("../tests/documents/rpm_primary2.xml"); +static RPM_FILELISTS: &[u8] = include_bytes!("../tests/documents/rpm_filelists.xml"); +static RPM_OTHER: &[u8] = include_bytes!("../tests/documents/rpm_other.xml"); +static LIBREOFFICE_DOCUMENT: &[u8] = include_bytes!("../tests/documents/libreoffice_document.fodt"); +static DOCUMENT: &[u8] = include_bytes!("../tests/documents/document.xml"); +static TEST_WRITER_INDENT: &[u8] = include_bytes!("../tests/documents/test_writer_indent.xml"); +static SAMPLE_1: &[u8] = include_bytes!("../tests/documents/sample_1.xml"); +static LINESCORE: &[u8] = include_bytes!("../tests/documents/linescore.xml"); +static SAMPLE_RSS: &[u8] = include_bytes!("../tests/documents/sample_rss.xml"); +static SAMPLE_NS: &[u8] = include_bytes!("../tests/documents/sample_ns.xml"); +static PLAYERS: &[u8] = include_bytes!("../tests/documents/players.xml"); + +// TODO: read the namespaces too +// TODO: use fully normalized attribute values +fn parse_document(doc: &[u8]) -> XmlResult<()> { + let mut r = Reader::from_reader(doc); + loop { + match r.read_event_unbuffered()? { + Event::Start(e) | Event::Empty(e) => { + for attr in e.attributes() { + criterion::black_box(attr?.unescaped_value()?); + } + }, + Event::Text(e) => { + criterion::black_box(e.unescaped()?); + }, + Event::CData(e) => { + criterion::black_box(e.into_inner()); + }, + Event::End(_) => (), + Event::Eof => break, + _ => (), + } + } + Ok(()) +} + +pub fn bench_fully_parse_document(c: &mut Criterion) { + let mut group = c.benchmark_group("fully_parse_document"); + + // long, mix of attributes and text, not much escaping, mix of attribute lengths, some namespaces + group.bench_function("rpm_primary.xml", |b| { + b.iter(|| { + parse_document(RPM_PRIMARY).unwrap(); + }) + }); + + // long, mix of attributes and text, not much escaping, mix of attribute lengths, some namespaces + group.bench_function("rpm_primary2.xml", |b| { + b.iter(|| { + parse_document(RPM_PRIMARY2).unwrap(); + }) + }); + + // long, mostly medium-length text elements, not much escaping + group.bench_function("rpm_filelists.xml", |b| { + b.iter(|| { + parse_document(RPM_FILELISTS).unwrap(); + }) + }); + + // long, mix of attributes and text, lots of escaping (both entity and char literal), long attributes + group.bench_function("rpm_other.xml", |b| { + b.iter(|| { + parse_document(RPM_OTHER).unwrap(); + }) + }); + + // long, mix of attributes and text, not much escaping, lots of non-ascii characters, lots of namespaces + group.bench_function("libreoffice_document.fodt", |b| { + b.iter(|| { + parse_document(LIBREOFFICE_DOCUMENT).unwrap(); + }) + }); + + // medium length, mostly empty tags, a few short attributes per element, no escaping + group.bench_function("document.xml", |b| { + b.iter(|| { + parse_document(DOCUMENT).unwrap(); + }) + }); + + // medium length, lots of namespaces, no escaping + group.bench_function("test_writer_ident.xml", |b| { + b.iter(|| { + parse_document(TEST_WRITER_INDENT).unwrap(); + }) + }); + + // short, mix of attributes and text, lots of escapes + group.bench_function("sample_1.xml", |b| { + b.iter(|| { + parse_document(SAMPLE_1).unwrap(); + }) + }); + + // medium length, lots of attributes, short attributes, few escapes + group.bench_function("linescore.xml", |b| { + b.iter(|| { + parse_document(LINESCORE).unwrap(); + }) + }); + + // short, lots of namespaces, no escapes + group.bench_function("sample_ns.xml", |b| { + b.iter(|| { + parse_document(SAMPLE_NS).unwrap(); + }) + }); + + // long, few attributes, mix of attribute lengths, escapes in text content + group.bench_function("sample_rss.xml", |b| { + b.iter(|| { + parse_document(SAMPLE_RSS).unwrap(); + }) + }); + + // long, lots of attributes, short attributes, no text, no escapes + group.bench_function("players.xml", |b| { + b.iter(|| { + parse_document(PLAYERS).unwrap(); + }) + }); + + group.finish(); +} + +criterion_group!( + benches, + bench_fully_parse_document, +); diff --git a/benches/microbenches.rs b/benches/microbenches.rs new file mode 100644 index 00000000..1c8defc7 --- /dev/null +++ b/benches/microbenches.rs @@ -0,0 +1,343 @@ +use criterion::{self, criterion_group, Criterion}; +use pretty_assertions::assert_eq; +use quick_xml::events::Event; +use quick_xml::name::QName; +use quick_xml::Reader; + +static SAMPLE: &[u8] = include_bytes!("../tests/documents/sample_rss.xml"); +static PLAYERS: &[u8] = include_bytes!("../tests/documents/players.xml"); + +/// Benchmarks the `Reader::read_event` function with all XML well-formless +/// checks disabled (with and without trimming content of #text nodes) +fn read_event(c: &mut Criterion) { + let mut group = c.benchmark_group("read_event"); + group.bench_function("trim_text = false", |b| { + b.iter(|| { + let mut r = Reader::from_reader(SAMPLE); + r.check_end_names(false).check_comments(false); + let mut count = criterion::black_box(0); + let mut buf = Vec::new(); + loop { + match r.read_event(&mut buf) { + Ok(Event::Start(_)) | Ok(Event::Empty(_)) => count += 1, + Ok(Event::Eof) => break, + _ => (), + } + buf.clear(); + } + assert_eq!(count, 1550, "Overall tag count in ./tests/documents/sample_rss.xml"); + }) + }); + + group.bench_function("trim_text = true", |b| { + b.iter(|| { + let mut r = Reader::from_reader(SAMPLE); + r.check_end_names(false) + .check_comments(false) + .trim_text(true); + let mut count = criterion::black_box(0); + let mut buf = Vec::new(); + loop { + match r.read_event(&mut buf) { + Ok(Event::Start(_)) | Ok(Event::Empty(_)) => count += 1, + Ok(Event::Eof) => break, + _ => (), + } + buf.clear(); + } + assert_eq!(count, 1550, "Overall tag count in ./tests/documents/sample_rss.xml"); + }); + }); + group.finish(); +} + +/// Benchmarks the `Reader::read_namespaced_event` function with all XML well-formless +/// checks disabled (with and without trimming content of #text nodes) +fn read_namespaced_event(c: &mut Criterion) { + let mut group = c.benchmark_group("read_namespaced_event"); + group.bench_function("trim_text = false", |b| { + b.iter(|| { + let mut r = Reader::from_reader(SAMPLE); + r.check_end_names(false).check_comments(false); + let mut count = criterion::black_box(0); + let mut buf = Vec::new(); + let mut ns_buf = Vec::new(); + loop { + match r.read_namespaced_event(&mut buf, &mut ns_buf) { + Ok((_, Event::Start(_))) | Ok((_, Event::Empty(_))) => count += 1, + Ok((_, Event::Eof)) => break, + _ => (), + } + buf.clear(); + } + assert_eq!(count, 1550, "Overall tag count in ./tests/documents/sample_rss.xml"); + }); + }); + + group.bench_function("trim_text = true", |b| { + b.iter(|| { + let mut r = Reader::from_reader(SAMPLE); + r.check_end_names(false) + .check_comments(false) + .trim_text(true); + let mut count = criterion::black_box(0); + let mut buf = Vec::new(); + let mut ns_buf = Vec::new(); + loop { + match r.read_namespaced_event(&mut buf, &mut ns_buf) { + Ok((_, Event::Start(_))) | Ok((_, Event::Empty(_))) => count += 1, + Ok((_, Event::Eof)) => break, + _ => (), + } + buf.clear(); + } + assert_eq!(count, 1550, "Overall tag count in ./tests/documents/sample_rss.xml"); + }); + }); + group.finish(); +} + +/// Benchmarks the `BytesText::unescaped()` method (includes time of `read_event` +/// benchmark) +fn bytes_text_unescaped(c: &mut Criterion) { + let mut group = c.benchmark_group("BytesText::unescaped"); + group.bench_function("trim_text = false", |b| { + b.iter(|| { + let mut buf = Vec::new(); + let mut r = Reader::from_reader(SAMPLE); + r.check_end_names(false).check_comments(false); + let mut count = criterion::black_box(0); + let mut nbtxt = criterion::black_box(0); + loop { + match r.read_event(&mut buf) { + Ok(Event::Start(_)) | Ok(Event::Empty(_)) => count += 1, + Ok(Event::Text(ref e)) => nbtxt += e.unescaped().unwrap().len(), + Ok(Event::Eof) => break, + _ => (), + } + buf.clear(); + } + assert_eq!(count, 1550, "Overall tag count in ./tests/documents/sample_rss.xml"); + + // Windows has \r\n instead of \n + #[cfg(windows)] + assert_eq!( + nbtxt, 67661, + "Overall length (in bytes) of all text contents of ./tests/documents/sample_rss.xml" + ); + + #[cfg(not(windows))] + assert_eq!( + nbtxt, 66277, + "Overall length (in bytes) of all text contents of ./tests/documents/sample_rss.xml" + ); + }); + }); + + group.bench_function("trim_text = true", |b| { + b.iter(|| { + let mut buf = Vec::new(); + let mut r = Reader::from_reader(SAMPLE); + r.check_end_names(false) + .check_comments(false) + .trim_text(true); + let mut count = criterion::black_box(0); + let mut nbtxt = criterion::black_box(0); + loop { + match r.read_event(&mut buf) { + Ok(Event::Start(_)) | Ok(Event::Empty(_)) => count += 1, + Ok(Event::Text(ref e)) => nbtxt += e.unescaped().unwrap().len(), + Ok(Event::Eof) => break, + _ => (), + } + buf.clear(); + } + assert_eq!(count, 1550, "Overall tag count in ./tests/documents/sample_rss.xml"); + + // Windows has \r\n instead of \n + #[cfg(windows)] + assert_eq!( + nbtxt, 50334, + "Overall length (in bytes) of all text contents of ./tests/documents/sample_rss.xml" + ); + + #[cfg(not(windows))] + assert_eq!( + nbtxt, 50261, + "Overall length (in bytes) of all text contents of ./tests/documents/sample_rss.xml" + ); + }); + }); + group.finish(); +} + +/// Benchmarks, how fast individual event parsed +fn one_event(c: &mut Criterion) { + let mut group = c.benchmark_group("One event"); + group.bench_function("StartText", |b| { + let src = "Hello world!".repeat(512 / 12).into_bytes(); + let mut buf = Vec::with_capacity(1024); + b.iter(|| { + let mut r = Reader::from_reader(src.as_ref()); + let mut nbtxt = criterion::black_box(0); + r.check_end_names(false).check_comments(false); + match r.read_event(&mut buf) { + Ok(Event::StartText(e)) => nbtxt += e.len(), + something_else => panic!("Did not expect {:?}", something_else), + }; + + buf.clear(); + + assert_eq!(nbtxt, 504); + }) + }); + + group.bench_function("Start", |b| { + let src = format!(r#""#, "world".repeat(512 / 5)).into_bytes(); + let mut buf = Vec::with_capacity(1024); + b.iter(|| { + let mut r = Reader::from_reader(src.as_ref()); + let mut nbtxt = criterion::black_box(0); + r.check_end_names(false) + .check_comments(false) + .trim_text(true); + match r.read_event(&mut buf) { + Ok(Event::Start(ref e)) => nbtxt += e.len(), + something_else => panic!("Did not expect {:?}", something_else), + }; + + buf.clear(); + + assert_eq!(nbtxt, 525); + }) + }); + + group.bench_function("Comment", |b| { + let src = format!(r#""#, "world".repeat(512 / 5)).into_bytes(); + let mut buf = Vec::with_capacity(1024); + b.iter(|| { + let mut r = Reader::from_reader(src.as_ref()); + let mut nbtxt = criterion::black_box(0); + r.check_end_names(false) + .check_comments(false) + .trim_text(true); + match r.read_event(&mut buf) { + Ok(Event::Comment(ref e)) => nbtxt += e.unescaped().unwrap().len(), + something_else => panic!("Did not expect {:?}", something_else), + }; + + buf.clear(); + + assert_eq!(nbtxt, 520); + }) + }); + + group.bench_function("CData", |b| { + let src = format!(r#""#, "world".repeat(512 / 5)).into_bytes(); + let mut buf = Vec::with_capacity(1024); + b.iter(|| { + let mut r = Reader::from_reader(src.as_ref()); + let mut nbtxt = criterion::black_box(0); + r.check_end_names(false) + .check_comments(false) + .trim_text(true); + match r.read_event(&mut buf) { + Ok(Event::CData(ref e)) => nbtxt += e.len(), + something_else => panic!("Did not expect {:?}", something_else), + }; + + buf.clear(); + + assert_eq!(nbtxt, 518); + }) + }); + group.finish(); +} + +/// Benchmarks parsing attributes from events +fn attributes(c: &mut Criterion) { + let mut group = c.benchmark_group("attributes"); + group.bench_function("with_checks = true", |b| { + b.iter(|| { + let mut r = Reader::from_reader(PLAYERS); + r.check_end_names(false).check_comments(false); + let mut count = criterion::black_box(0); + let mut buf = Vec::new(); + loop { + match r.read_event(&mut buf) { + Ok(Event::Empty(e)) => { + for attr in e.attributes() { + let _attr = attr.unwrap(); + count += 1 + } + } + Ok(Event::Eof) => break, + _ => (), + } + buf.clear(); + } + assert_eq!(count, 1041); + }) + }); + + group.bench_function("with_checks = false", |b| { + b.iter(|| { + let mut r = Reader::from_reader(PLAYERS); + r.check_end_names(false).check_comments(false); + let mut count = criterion::black_box(0); + let mut buf = Vec::new(); + loop { + match r.read_event(&mut buf) { + Ok(Event::Empty(e)) => { + for attr in e.attributes().with_checks(false) { + let _attr = attr.unwrap(); + count += 1 + } + } + Ok(Event::Eof) => break, + _ => (), + } + buf.clear(); + } + assert_eq!(count, 1041); + }) + }); + + group.bench_function("try_get_attribute", |b| { + b.iter(|| { + let mut r = Reader::from_reader(PLAYERS); + r.check_end_names(false).check_comments(false); + let mut count = criterion::black_box(0); + let mut buf = Vec::new(); + loop { + match r.read_event(&mut buf) { + Ok(Event::Empty(e)) if e.name() == QName(b"player") => { + for name in ["num", "status", "avg"] { + if let Some(_attr) = e.try_get_attribute(name).unwrap() { + count += 1 + } + } + assert!(e + .try_get_attribute("attribute-that-doesn't-exist") + .unwrap() + .is_none()); + } + Ok(Event::Eof) => break, + _ => (), + } + buf.clear(); + } + assert_eq!(count, 150); + }) + }); + group.finish(); +} + +criterion_group!( + benches, + read_event, + bytes_text_unescaped, + read_namespaced_event, + one_event, + attributes +); diff --git a/tests/README.md b/tests/README.md new file mode 100644 index 00000000..316ee2d5 --- /dev/null +++ b/tests/README.md @@ -0,0 +1,48 @@ +# Document descrptions + +document.xml + medium length, mostly empty tags, a few short attributes per element, no escaping +html5.html +html5.txt +libreoffice_document.fodt + long, mix of attributes and text, not much escaping, lots of non-ascii characters, lots of namespaces +linescore.xml + medium length, lots of attributes, short attributes, few escapes +opennews_all.rss +players.xml + long, lots of attributes, short attributes, no text, no escapes +rpm_filelists.xml + long, mostly medium-length text elements, not much escaping +rpm_other.xml + long, mix of attributes and text, lots of escaping (both entity and char literal), long attributes +rpm_primary.xml + long, mix of attributes and text, not much escaping, mix of attribute lengths, some namespaces +rpm_primary2.xml + long, mix of attributes and text, not much escaping, mix of attribute lengths, some namespaces +sample_1.xml + short, mix of attributes and text, lots of escapes +sample_1_short.txt +sample_1_full.txt +sample_2.xml +sample_2_short.txt +sample_3.xml +sample_2_full.txt +sample_3_short.txt +sample_3_full.txt +sample_4.xml +sample_4_short.txt +sample_4_full.txt +sample_5_short.txt +sample_5_utf16bom.xml +sample_5_full.txt +sample_ns_short.txt +sample_ns.xml + short, lots of namespaces, no escapes +sample_rss.xml + long, few attributes, mix of attribute lengths, escapes in text content +test_writer_indent_cdata.xml +test_writer_indent.xml + medium length, lots of namespaces, no escaping +test_writer.xml +utf16be.xml +utf16le.xml diff --git a/tests/documents/libreoffice_document.fodt b/tests/documents/libreoffice_document.fodt new file mode 100644 index 00000000..e0b78cfd --- /dev/null +++ b/tests/documents/libreoffice_document.fodt @@ -0,0 +1,481 @@ + + + + 2022-06-28T20:26:31.0481761152022-06-28T20:27:13.444218412PT43S1LibreOffice/7.2.7.2$Linux_X86_64 LibreOffice_project/20$Build-2 + + + 0 + 0 + 49214 + 22915 + true + false + + + view2 + 15812 + 2501 + 0 + 0 + 49213 + 22913 + 0 + 1 + false + 100 + false + false + false + + + + + false + false + false + true + true + true + true + true + false + 0 + false + false + false + true + false + false + true + false + false + false + false + true + true + true + false + false + false + false + false + false + false + true + false + false + true + false + false + false + true + 0 + 1 + true + false + + high-resolution + true + + + false + false + true + false + true + true + false + true + + true + 426418 + + true + false + true + 0 + + false + false + false + true + false + true + false + false + false + false + true + false + false + + false + false + true + false + false + false + false + false + false + false + false + false + 340756 + false + false + false + false + false + true + false + true + true + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Original by Markus Kuhn, adapted for HTML by Martin Dürst. + UTF-8 encoded sample plain-text file + ‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + + Markus Kuhn [ˈmaʳkʊs kuːn] <mkuhn@acm.org> — 1999-08-20 + + + The ASCII compatible UTF-8 encoding of ISO 10646 and Unicode + plain-text files is defined in RFC 2279 and in ISO 10646-1 Annex R. + + + Using Unicode/UTF-8, you can write in emails and source code things such as + + Mathematics and Sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), + + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (A ⇔ B), + + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm + + Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + + APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + + Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + + • ‘single’ and “double” quotes + + • Curly apostrophes: “We’ve been here” ║ + + • Latin-1 apostrophe and accents: '´` + + • ‚deutsche‘ „Anführungszeichen“ + + • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … + + • ASCII safety test: 1lI|, 0OD, 8B + ╭─────────╮ + • the euro symbol: │ 14.95 € │ + ╰─────────╯ + ╚══════════════════════════════════════════╝ + + Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + + Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + + Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + + Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + + Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + + Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + + Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + + Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + + Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + + Box drawing alignment tests: + + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + + + + + \ No newline at end of file diff --git a/tests/documents/rpm_filelists.xml b/tests/documents/rpm_filelists.xml new file mode 100644 index 00000000..7b3a7ae3 --- /dev/null +++ b/tests/documents/rpm_filelists.xml @@ -0,0 +1,198 @@ + + + + + /usr/share/aclocal/find_apu.m4 + /usr/lib64/pkgconfig/apr-util-1.pc + /usr/lib64/libaprutil-1.so + /usr/lib64/libaprutil-1.la + /usr/include/apr-1/apu_want.h + /usr/include/apr-1/apu_version.h + /usr/include/apr-1/apu_errno.h + /usr/include/apr-1/apu.h + /usr/include/apr-1/apr_xml.h + /usr/include/apr-1/apr_xlate.h + /usr/include/apr-1/apr_uuid.h + /usr/include/apr-1/apr_uri.h + /usr/include/apr-1/apr_thread_pool.h + /usr/include/apr-1/apr_strmatch.h + /usr/include/apr-1/apr_siphash.h + /usr/include/apr-1/apr_sha1.h + /usr/include/apr-1/apr_sdbm.h + /usr/include/apr-1/apr_rmm.h + /usr/include/apr-1/apr_reslist.h + /usr/include/apr-1/apr_redis.h + /usr/include/apr-1/apr_queue.h + /usr/include/apr-1/apr_optional_hooks.h + /usr/include/apr-1/apr_optional.h + /usr/include/apr-1/apr_memcache.h + /usr/include/apr-1/apr_md5.h + /usr/include/apr-1/apr_md4.h + /usr/include/apr-1/apr_ldap_url.h + /usr/include/apr-1/apr_ldap_rebind.h + /usr/include/apr-1/apr_ldap_option.h + /usr/include/apr-1/apr_ldap_init.h + /usr/include/apr-1/apr_ldap.h + /usr/include/apr-1/apr_hooks.h + /usr/include/apr-1/apr_dbm.h + /usr/include/apr-1/apr_dbd.h + /usr/include/apr-1/apr_date.h + /usr/include/apr-1/apr_crypto.h + /usr/include/apr-1/apr_buckets.h + /usr/include/apr-1/apr_base64.h + /usr/include/apr-1/apr_anylock.h + /usr/bin/apu-1-config + + + + /usr/lib64/apr-util-1/apr_dbm_db.so + /usr/lib64/apr-util-1/apr_dbm_db-1.so + /usr/lib/.build-id/1c/aa4768d439e50d084a67ec81fbcd3df9c058fc + /usr/lib/.build-id/1c + /usr/lib/.build-id + + + + /usr/lib64/apr-util-1/apr_dbd_pgsql.so + /usr/lib64/apr-util-1/apr_dbd_pgsql-1.so + /usr/lib/.build-id/31/ce21d792563c765e73a8aba098540365708ac6 + /usr/lib/.build-id/31 + /usr/lib/.build-id + + + + /usr/lib64/apr-util-1/apr_crypto_openssl.so + /usr/lib64/apr-util-1/apr_crypto_openssl-1.so + /usr/lib/.build-id/e1/aaeed41b3467d28383f986e36cb17fca822321 + /usr/lib/.build-id/e1 + /usr/lib/.build-id + + + + /usr/lib64/libacl.so.1.1.2253 + /usr/lib64/libacl.so.1 + /usr/lib/.build-id/e3/4e4bf1c9f6060fdf5d1da6f04247ee6d3e56de + /usr/lib/.build-id/e3 + /usr/lib/.build-id + + + + /usr/share/man/man3/acl_valid.3.gz + /usr/share/man/man3/acl_to_text.3.gz + /usr/share/man/man3/acl_to_any_text.3.gz + /usr/share/man/man3/acl_size.3.gz + /usr/share/man/man3/acl_set_tag_type.3.gz + /usr/share/man/man3/acl_set_qualifier.3.gz + /usr/share/man/man3/acl_set_permset.3.gz + /usr/share/man/man3/acl_set_file.3.gz + /usr/share/man/man3/acl_set_fd.3.gz + /usr/share/man/man3/acl_init.3.gz + /usr/share/man/man3/acl_get_tag_type.3.gz + /usr/share/man/man3/acl_get_qualifier.3.gz + /usr/share/man/man3/acl_get_permset.3.gz + /usr/share/man/man3/acl_get_perm.3.gz + /usr/share/man/man3/acl_get_file.3.gz + /usr/share/man/man3/acl_get_fd.3.gz + /usr/share/man/man3/acl_get_entry.3.gz + /usr/share/man/man3/acl_from_text.3.gz + /usr/share/man/man3/acl_from_mode.3.gz + /usr/share/man/man3/acl_free.3.gz + /usr/share/man/man3/acl_extended_file_nofollow.3.gz + /usr/share/man/man3/acl_extended_file.3.gz + /usr/share/man/man3/acl_extended_fd.3.gz + /usr/share/man/man3/acl_error.3.gz + /usr/share/man/man3/acl_equiv_mode.3.gz + /usr/share/man/man3/acl_entries.3.gz + /usr/share/man/man3/acl_dup.3.gz + /usr/share/man/man3/acl_delete_perm.3.gz + /usr/share/man/man3/acl_delete_entry.3.gz + /usr/share/man/man3/acl_delete_def_file.3.gz + /usr/share/man/man3/acl_create_entry.3.gz + /usr/share/man/man3/acl_copy_int.3.gz + /usr/share/man/man3/acl_copy_ext.3.gz + /usr/share/man/man3/acl_copy_entry.3.gz + /usr/share/man/man3/acl_cmp.3.gz + /usr/share/man/man3/acl_clear_perms.3.gz + /usr/share/man/man3/acl_check.3.gz + /usr/share/man/man3/acl_calc_mask.3.gz + /usr/share/man/man3/acl_add_perm.3.gz + /usr/lib64/pkgconfig/libacl.pc + /usr/lib64/libacl.so + /usr/include/sys/acl.h + /usr/include/acl/libacl.h + /usr/include/acl + + + + /usr/share/man/man5/acl.5.gz + /usr/share/man/man1/setfacl.1.gz + /usr/share/man/man1/getfacl.1.gz + /usr/share/man/man1/chacl.1.gz + /usr/share/locale/sv/LC_MESSAGES/acl.mo + /usr/share/locale/pl/LC_MESSAGES/acl.mo + /usr/share/locale/gl/LC_MESSAGES/acl.mo + /usr/share/locale/fr/LC_MESSAGES/acl.mo + /usr/share/locale/es/LC_MESSAGES/acl.mo + /usr/share/locale/en@quot/LC_MESSAGES/acl.mo + /usr/share/locale/en@boldquot/LC_MESSAGES/acl.mo + /usr/share/locale/de/LC_MESSAGES/acl.mo + /usr/share/licenses/acl/COPYING.LGPL + /usr/share/licenses/acl/COPYING + /usr/share/licenses/acl + /usr/lib/.build-id/ea/bd0b51e30f5b8f7a60e8a713ea3e8e87da158a + /usr/lib/.build-id/ea + /usr/lib/.build-id/8c/f58e99260c995bf9a178a1eec3f0b6718420cb + /usr/lib/.build-id/8c + /usr/lib/.build-id/68/0459a093ce0f41f4da38f101a65134b6239d7d + /usr/lib/.build-id/68 + /usr/lib/.build-id + /usr/bin/setfacl + /usr/bin/getfacl + /usr/bin/chacl + + + + /usr/share/doc/apr-util/NOTICE + /usr/share/doc/apr-util/LICENSE + /usr/share/doc/apr-util/CHANGES + /usr/share/doc/apr-util + /usr/lib64/libaprutil-1.so.0.6.1 + /usr/lib64/libaprutil-1.so.0 + /usr/lib64/apr-util-1 + /usr/lib/.build-id/9a/1329e49ef7c955cbb975d72208c04f3a43df21 + /usr/lib/.build-id/9a + /usr/lib/.build-id + + + + /usr/lib64/apr-util-1/apr_ldap.so + /usr/lib64/apr-util-1/apr_ldap-1.so + /usr/lib/.build-id/de/6f295ab2082ad9db5dc6e4f89f15310f7476b5 + /usr/lib/.build-id/de + /usr/lib/.build-id + + + + /usr/lib64/apr-util-1/apr_dbd_mysql.so + /usr/lib64/apr-util-1/apr_dbd_mysql-1.so + /usr/lib/.build-id/12/5b8186db5c0fd8df6f367914de98afde368164 + /usr/lib/.build-id/12 + /usr/lib/.build-id + + + + /usr/lib64/apr-util-1/apr_dbd_sqlite3.so + /usr/lib64/apr-util-1/apr_dbd_sqlite3-1.so + /usr/lib/.build-id/6a/11b542c4986631e17444858fc7f09a645c4e08 + /usr/lib/.build-id/6a + /usr/lib/.build-id + + + + /usr/lib64/apr-util-1/apr_dbd_odbc.so + /usr/lib64/apr-util-1/apr_dbd_odbc-1.so + /usr/lib/.build-id/bb/857d0925e0fc4182a542db4338cc99abd269c4 + /usr/lib/.build-id/bb + /usr/lib/.build-id + + diff --git a/tests/documents/rpm_other.xml b/tests/documents/rpm_other.xml new file mode 100644 index 00000000..fb2c8672 --- /dev/null +++ b/tests/documents/rpm_other.xml @@ -0,0 +1,189 @@ + + + + + - Recommends: apr-util-openssl, apr-util-bdb (#1633973) + - Rebuilt for https://fedoraproject.org/wiki/Fedora_28_Mass_Rebuild + - Rebuild with new build flags embedded in apr + - Rebuilt for switch to libxcrypt + - Build with mariadb-connector-c + - new version 1.6.1 + - update to 1.6.0 (#1460831) +- move bdb support to loadable DSO in apr-util-dbd subpackage +- drop NSS, freetds support + - Rebuilt for https://fedoraproject.org/wiki/Fedora_27_Binutils_Mass_Rebuild + - Rebuilt for https://fedoraproject.org/wiki/Fedora_27_Mass_Rebuild + - Rebuilt for https://fedoraproject.org/wiki/Fedora_26_Mass_Rebuild + + + + - Recommends: apr-util-openssl, apr-util-bdb (#1633973) + - Rebuilt for https://fedoraproject.org/wiki/Fedora_28_Mass_Rebuild + - Rebuild with new build flags embedded in apr + - Rebuilt for switch to libxcrypt + - Build with mariadb-connector-c + - new version 1.6.1 + - update to 1.6.0 (#1460831) +- move bdb support to loadable DSO in apr-util-dbd subpackage +- drop NSS, freetds support + - Rebuilt for https://fedoraproject.org/wiki/Fedora_27_Binutils_Mass_Rebuild + - Rebuilt for https://fedoraproject.org/wiki/Fedora_27_Mass_Rebuild + - Rebuilt for https://fedoraproject.org/wiki/Fedora_26_Mass_Rebuild + + + + - Recommends: apr-util-openssl, apr-util-bdb (#1633973) + - Rebuilt for https://fedoraproject.org/wiki/Fedora_28_Mass_Rebuild + - Rebuild with new build flags embedded in apr + - Rebuilt for switch to libxcrypt + - Build with mariadb-connector-c + - new version 1.6.1 + - update to 1.6.0 (#1460831) +- move bdb support to loadable DSO in apr-util-dbd subpackage +- drop NSS, freetds support + - Rebuilt for https://fedoraproject.org/wiki/Fedora_27_Binutils_Mass_Rebuild + - Rebuilt for https://fedoraproject.org/wiki/Fedora_27_Mass_Rebuild + - Rebuilt for https://fedoraproject.org/wiki/Fedora_26_Mass_Rebuild + + + + - Recommends: apr-util-openssl, apr-util-bdb (#1633973) + - Rebuilt for https://fedoraproject.org/wiki/Fedora_28_Mass_Rebuild + - Rebuild with new build flags embedded in apr + - Rebuilt for switch to libxcrypt + - Build with mariadb-connector-c + - new version 1.6.1 + - update to 1.6.0 (#1460831) +- move bdb support to loadable DSO in apr-util-dbd subpackage +- drop NSS, freetds support + - Rebuilt for https://fedoraproject.org/wiki/Fedora_27_Binutils_Mass_Rebuild + - Rebuilt for https://fedoraproject.org/wiki/Fedora_27_Mass_Rebuild + - Rebuilt for https://fedoraproject.org/wiki/Fedora_26_Mass_Rebuild + + + + - new upstream release + - update link to POSIX.1e draft in acl(5) man page (#1510527) + - Rebuilt for https://fedoraproject.org/wiki/Fedora_28_Mass_Rebuild + - Switch to %ldconfig_scriptlets + - Rebuilt for https://fedoraproject.org/wiki/Fedora_27_Binutils_Mass_Rebuild + - Rebuilt for https://fedoraproject.org/wiki/Fedora_27_Mass_Rebuild + - fix test-suite failure with perl-5.26.0 (#1473845) +- update URL of the upstream source tarball + - setfacl.1: document the meaning of '-' in perms (#1337039) +- avoid failure of %check when building as root (#1085389) +- apply patches automatically to ease maintenance + - drop obsolete BuildRoot and Group tags +- fix spurious acl_check() failure on setfacl --restore (#1451826) + - Rebuilt for https://fedoraproject.org/wiki/Fedora_26_Mass_Rebuild + + + + - new upstream release + - update link to POSIX.1e draft in acl(5) man page (#1510527) + - Rebuilt for https://fedoraproject.org/wiki/Fedora_28_Mass_Rebuild + - Switch to %ldconfig_scriptlets + - Rebuilt for https://fedoraproject.org/wiki/Fedora_27_Binutils_Mass_Rebuild + - Rebuilt for https://fedoraproject.org/wiki/Fedora_27_Mass_Rebuild + - fix test-suite failure with perl-5.26.0 (#1473845) +- update URL of the upstream source tarball + - setfacl.1: document the meaning of '-' in perms (#1337039) +- avoid failure of %check when building as root (#1085389) +- apply patches automatically to ease maintenance + - drop obsolete BuildRoot and Group tags +- fix spurious acl_check() failure on setfacl --restore (#1451826) + - Rebuilt for https://fedoraproject.org/wiki/Fedora_26_Mass_Rebuild + + + + - new upstream release + - update link to POSIX.1e draft in acl(5) man page (#1510527) + - Rebuilt for https://fedoraproject.org/wiki/Fedora_28_Mass_Rebuild + - Switch to %ldconfig_scriptlets + - Rebuilt for https://fedoraproject.org/wiki/Fedora_27_Binutils_Mass_Rebuild + - Rebuilt for https://fedoraproject.org/wiki/Fedora_27_Mass_Rebuild + - fix test-suite failure with perl-5.26.0 (#1473845) +- update URL of the upstream source tarball + - setfacl.1: document the meaning of '-' in perms (#1337039) +- avoid failure of %check when building as root (#1085389) +- apply patches automatically to ease maintenance + - drop obsolete BuildRoot and Group tags +- fix spurious acl_check() failure on setfacl --restore (#1451826) + - Rebuilt for https://fedoraproject.org/wiki/Fedora_26_Mass_Rebuild + + + + - Recommends: apr-util-openssl, apr-util-bdb (#1633973) + - Rebuilt for https://fedoraproject.org/wiki/Fedora_28_Mass_Rebuild + - Rebuild with new build flags embedded in apr + - Rebuilt for switch to libxcrypt + - Build with mariadb-connector-c + - new version 1.6.1 + - update to 1.6.0 (#1460831) +- move bdb support to loadable DSO in apr-util-dbd subpackage +- drop NSS, freetds support + - Rebuilt for https://fedoraproject.org/wiki/Fedora_27_Binutils_Mass_Rebuild + - Rebuilt for https://fedoraproject.org/wiki/Fedora_27_Mass_Rebuild + - Rebuilt for https://fedoraproject.org/wiki/Fedora_26_Mass_Rebuild + + + + - Recommends: apr-util-openssl, apr-util-bdb (#1633973) + - Rebuilt for https://fedoraproject.org/wiki/Fedora_28_Mass_Rebuild + - Rebuild with new build flags embedded in apr + - Rebuilt for switch to libxcrypt + - Build with mariadb-connector-c + - new version 1.6.1 + - update to 1.6.0 (#1460831) +- move bdb support to loadable DSO in apr-util-dbd subpackage +- drop NSS, freetds support + - Rebuilt for https://fedoraproject.org/wiki/Fedora_27_Binutils_Mass_Rebuild + - Rebuilt for https://fedoraproject.org/wiki/Fedora_27_Mass_Rebuild + - Rebuilt for https://fedoraproject.org/wiki/Fedora_26_Mass_Rebuild + + + + - Recommends: apr-util-openssl, apr-util-bdb (#1633973) + - Rebuilt for https://fedoraproject.org/wiki/Fedora_28_Mass_Rebuild + - Rebuild with new build flags embedded in apr + - Rebuilt for switch to libxcrypt + - Build with mariadb-connector-c + - new version 1.6.1 + - update to 1.6.0 (#1460831) +- move bdb support to loadable DSO in apr-util-dbd subpackage +- drop NSS, freetds support + - Rebuilt for https://fedoraproject.org/wiki/Fedora_27_Binutils_Mass_Rebuild + - Rebuilt for https://fedoraproject.org/wiki/Fedora_27_Mass_Rebuild + - Rebuilt for https://fedoraproject.org/wiki/Fedora_26_Mass_Rebuild + + + + - Recommends: apr-util-openssl, apr-util-bdb (#1633973) + - Rebuilt for https://fedoraproject.org/wiki/Fedora_28_Mass_Rebuild + - Rebuild with new build flags embedded in apr + - Rebuilt for switch to libxcrypt + - Build with mariadb-connector-c + - new version 1.6.1 + - update to 1.6.0 (#1460831) +- move bdb support to loadable DSO in apr-util-dbd subpackage +- drop NSS, freetds support + - Rebuilt for https://fedoraproject.org/wiki/Fedora_27_Binutils_Mass_Rebuild + - Rebuilt for https://fedoraproject.org/wiki/Fedora_27_Mass_Rebuild + - Rebuilt for https://fedoraproject.org/wiki/Fedora_26_Mass_Rebuild + + + + - Recommends: apr-util-openssl, apr-util-bdb (#1633973) + - Rebuilt for https://fedoraproject.org/wiki/Fedora_28_Mass_Rebuild + - Rebuild with new build flags embedded in apr + - Rebuilt for switch to libxcrypt + - Build with mariadb-connector-c + - new version 1.6.1 + - update to 1.6.0 (#1460831) +- move bdb support to loadable DSO in apr-util-dbd subpackage +- drop NSS, freetds support + - Rebuilt for https://fedoraproject.org/wiki/Fedora_27_Binutils_Mass_Rebuild + - Rebuilt for https://fedoraproject.org/wiki/Fedora_27_Mass_Rebuild + - Rebuilt for https://fedoraproject.org/wiki/Fedora_26_Mass_Rebuild + + diff --git a/tests/documents/rpm_primary.xml b/tests/documents/rpm_primary.xml new file mode 100644 index 00000000..f3264ef3 --- /dev/null +++ b/tests/documents/rpm_primary.xml @@ -0,0 +1,437 @@ + + + + apr-util-devel + x86_64 + + 328abd9aab5206b5ea74fdbcec38d146851a2fe831b3e5dcebdb23c31c4bdb15 + APR utility library development kit + This package provides the support files which can be used to +build applications using the APR utility library. The mission +of the Apache Portable Runtime (APR) is to provide a free +library of C data structures and routines. + AlmaLinux Packaging Team <packager@almalinux.org> + http://apr.apache.org/ + + + apr-util-bdb + x86_64 + + bba430bebeec583e817372badbc957259d210e3081f5913a831594eb48f48f64 + APR utility library Berkeley DB driver + This package provides the Berkeley DB driver for the apr-util +DBM (database abstraction) interface. + AlmaLinux Packaging Team <packager@almalinux.org> + http://apr.apache.org/ + + + apr-util-pgsql + x86_64 + + 403ef93e442ecdb6ea4846e620ffec3c40f7d5bc9d43a4a9055a9540b98f180a + APR utility library PostgreSQL DBD driver + This package provides the PostgreSQL driver for the apr-util +DBD (database abstraction) interface. + AlmaLinux Packaging Team <packager@almalinux.org> + http://apr.apache.org/ + + + apr-util-openssl + x86_64 + + 7bdd897b6a875ea0ea2d0269338be903b69c5f394bb9a682800ccd8f466ab186 + APR utility library OpenSSL crypto support + This package provides the OpenSSL crypto support for the apr-util. + AlmaLinux Packaging Team <packager@almalinux.org> + http://apr.apache.org/ + + + libacl + x86_64 + + 2e7e855181b7e3053fbe067aa6d34e75b73e403215390852666de7be254ca607 + Dynamic library for access control list support + This package contains the libacl.so dynamic library which contains +the POSIX 1003.1e draft standard 17 functions for manipulating access +control lists. + AlmaLinux Packaging Team <packager@almalinux.org> + https://savannah.nongnu.org/projects/acl + + + libacl-devel + x86_64 + + 4d5eae10d5108a02e293a033b622544a18d200733ad9f0727973d579b20c1fb3 + Files needed for building programs with libacl + This package contains header files and documentation needed to develop +programs which make use of the access control list programming interface +defined in POSIX 1003.1e draft standard 17. + AlmaLinux Packaging Team <packager@almalinux.org> + https://savannah.nongnu.org/projects/acl + + + acl + x86_64 + + 51e80e35ebaa69f7e72c1d9788e569107aee3b4414347324a8983911a2af46d6 + Access control list utilities + This package contains the getfacl and setfacl utilities needed for +manipulating access control lists. + AlmaLinux Packaging Team <packager@almalinux.org> + https://savannah.nongnu.org/projects/acl + + + apr-util + x86_64 + + 56fbbf08e857582e22bb95d252a3e1735e4476ac64cdd4d7e607974876ccbb61 + Apache Portable Runtime Utility library + The mission of the Apache Portable Runtime (APR) is to provide a +free library of C data structures and routines. This library +contains additional utility interfaces for APR; including support +for XML, LDAP, database interfaces, URI parsing and more. + AlmaLinux Packaging Team <packager@almalinux.org> + http://apr.apache.org/ + + + apr-util-ldap + x86_64 + + 901e85b18bba94983531accfe46d9649cb23e50222adf5dc15657576ab7d55e4 + APR utility library LDAP support + This package provides the LDAP support for the apr-util. + AlmaLinux Packaging Team <packager@almalinux.org> + http://apr.apache.org/ + + + apr-util-mysql + x86_64 + + 6600d5b2801be6e85d5ed0f69b82b0f0ac9b2c9cf89b4d6a130d87f4bee575f6 + APR utility library MySQL DBD driver + This package provides the MySQL driver for the apr-util DBD +(database abstraction) interface. + AlmaLinux Packaging Team <packager@almalinux.org> + http://apr.apache.org/ + + + apr-util-sqlite + x86_64 + + b97be76fe95186f4c3ec3db709793fcab944eabf091614f46ee8b4271097b6ff + APR utility library SQLite DBD driver + This package provides the SQLite driver for the apr-util DBD +(database abstraction) interface. + AlmaLinux Packaging Team <packager@almalinux.org> + http://apr.apache.org/ + + + apr-util-odbc + x86_64 + + 72f87880ca38933b9158e3993ffff9f33267f3a0e32e9791c390190587f40cc5 + APR utility library ODBC DBD driver + This package provides the ODBC driver for the apr-util DBD +(database abstraction) interface. + AlmaLinux Packaging Team <packager@almalinux.org> + http://apr.apache.org/ + + diff --git a/tests/documents/rpm_primary2.xml b/tests/documents/rpm_primary2.xml new file mode 100644 index 00000000..fa9ce6cf --- /dev/null +++ b/tests/documents/rpm_primary2.xml @@ -0,0 +1,152 @@ + + + + complex-package + x86_64 + + bbb7b0e9350a0f75b923bdd0ef4f9af39765c668a3e70bfd3486ea9f0f618aaf + A package for exercising many different features of RPM metadata + Complex package + Michael Bluth + http://bobloblaw.com + + + rpm-empty + x86_64 + + 90fbba546300f507473547f33e229ee7bad94bbbe6e84b21d485e8e43b5f1132 + "" + + + + + + rpm-with-invalid-chars + noarch + + 64f1444f8e86a9ae6accdc2c4b12cb4a87fb2414c0998df461a8623a52eb3cc4 + An RPM file with invalid characters in its description. + This RPM that contains XML-illegal characters such as ampersand & and less-than < greater-than > in its </description>. +These must be escaped in the final XML metadata. The XML spec does not strictly require escaping 'single' or "double" quotes +within text content, and not all XML libraries do so. However, it is generally recommended. + + https://github.com/dralley/rpmrepo_rs/ + + + rpm-with-non-ascii + noarch + + 957de8a966af8fe8e55102489099d8b20bbecc23954c8c2bd88fb59625260393 + An RPM file with non-ascii characters in its metadata. + This file contains unicode characters and should be encoded as UTF-8. The +following code points are all outside the "Basic Latin (ASCII)" code point +block: + +* U+0080: € +* U+0100: Ā +* U+0180: ƀ +* U+0250: ɐ +* U+02B0: ʰ +* U+0041 0x0300: À +* U+0370: Ͱ + +See: http://www.unicode.org/charts/ + + https://github.com/dralley/rpmrepo_rs/ + + From b9bed6fd89dc8ce96bcb4b69e9f849cc5f02b37a Mon Sep 17 00:00:00 2001 From: Daniel Alley Date: Tue, 28 Jun 2022 21:06:39 -0400 Subject: [PATCH 3/4] Add benchmarks for escaping / unescaping text closes #404 --- Changelog.md | 2 + benches/macrobenches.rs | 11 ++- benches/microbenches.rs | 153 ++++++++++++++++++++++++++++++++++++++-- tests/test.rs | 3 +- 4 files changed, 154 insertions(+), 15 deletions(-) diff --git a/Changelog.md b/Changelog.md index 1e89efe7..60b2b965 100644 --- a/Changelog.md +++ b/Changelog.md @@ -103,6 +103,7 @@ - [#393]: Added more tests for namespace resolver - [#393]: Added tests for reserved names (started with "xml"i) -- see - [#363]: Add tests for `Reader::read_event_buffered` to ensure that proper events generated for corresponding inputs +- [#407]: Improved benchmark suite to cover whole-document parsing, escaping and unescaping text [#8]: https://github.com/Mingun/fast-xml/pull/8 [#9]: https://github.com/Mingun/fast-xml/pull/9 @@ -115,6 +116,7 @@ [#393]: https://github.com/tafia/quick-xml/pull/393 [#395]: https://github.com/tafia/quick-xml/pull/395 [#403]: https://github.com/tafia/quick-xml/pull/403 +[#407]: https://github.com/tafia/quick-xml/pull/407 ## 0.23.0 -- 2022-05-08 diff --git a/benches/macrobenches.rs b/benches/macrobenches.rs index 876fc7ae..14f28cb9 100644 --- a/benches/macrobenches.rs +++ b/benches/macrobenches.rs @@ -26,13 +26,13 @@ fn parse_document(doc: &[u8]) -> XmlResult<()> { for attr in e.attributes() { criterion::black_box(attr?.unescaped_value()?); } - }, + } Event::Text(e) => { criterion::black_box(e.unescaped()?); - }, + } Event::CData(e) => { criterion::black_box(e.into_inner()); - }, + } Event::End(_) => (), Event::Eof => break, _ => (), @@ -131,7 +131,4 @@ pub fn bench_fully_parse_document(c: &mut Criterion) { group.finish(); } -criterion_group!( - benches, - bench_fully_parse_document, -); +criterion_group!(benches, bench_fully_parse_document,); diff --git a/benches/microbenches.rs b/benches/microbenches.rs index 1c8defc7..9d701c05 100644 --- a/benches/microbenches.rs +++ b/benches/microbenches.rs @@ -1,5 +1,6 @@ use criterion::{self, criterion_group, Criterion}; use pretty_assertions::assert_eq; +use quick_xml::escape::{escape, unescape}; use quick_xml::events::Event; use quick_xml::name::QName; use quick_xml::Reader; @@ -7,6 +8,21 @@ use quick_xml::Reader; static SAMPLE: &[u8] = include_bytes!("../tests/documents/sample_rss.xml"); static PLAYERS: &[u8] = include_bytes!("../tests/documents/players.xml"); +static LOREM_IPSUM_TEXT: &[u8] = +b"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt +ut labore et dolore magna aliqua. Hac habitasse platea dictumst vestibulum rhoncus est pellentesque. +Risus ultricies tristique nulla aliquet enim tortor at. Fermentum odio eu feugiat pretium nibh ipsum. +Volutpat sed cras ornare arcu dui. Scelerisque fermentum dui faucibus in ornare quam. Arcu cursus +euismod quis viverra nibh cras pulvinar mattis. Sed viverra tellus in hac habitasse platea. Quis +commodo odio aenean sed. Cursus in hac habitasse platea dictumst quisque sagittis purus. + +Neque convallis a cras semper auctor. Sit amet mauris commodo quis imperdiet massa. Ac ut consequat +semper viverra nam libero justo laoreet sit. Adipiscing commodo elit at imperdiet dui accumsan. +Enim lobortis scelerisque fermentum dui faucibus in ornare. Natoque penatibus et magnis dis parturient +montes nascetur ridiculus mus. At lectus urna duis convallis convallis tellus id interdum. Libero +volutpat sed cras ornare arcu dui vivamus arcu. Cursus in hac habitasse platea dictumst quisque sagittis +purus. Consequat id porta nibh venenatis cras sed felis."; + /// Benchmarks the `Reader::read_event` function with all XML well-formless /// checks disabled (with and without trimming content of #text nodes) fn read_event(c: &mut Criterion) { @@ -25,7 +41,10 @@ fn read_event(c: &mut Criterion) { } buf.clear(); } - assert_eq!(count, 1550, "Overall tag count in ./tests/documents/sample_rss.xml"); + assert_eq!( + count, 1550, + "Overall tag count in ./tests/documents/sample_rss.xml" + ); }) }); @@ -45,7 +64,10 @@ fn read_event(c: &mut Criterion) { } buf.clear(); } - assert_eq!(count, 1550, "Overall tag count in ./tests/documents/sample_rss.xml"); + assert_eq!( + count, 1550, + "Overall tag count in ./tests/documents/sample_rss.xml" + ); }); }); group.finish(); @@ -70,7 +92,10 @@ fn read_namespaced_event(c: &mut Criterion) { } buf.clear(); } - assert_eq!(count, 1550, "Overall tag count in ./tests/documents/sample_rss.xml"); + assert_eq!( + count, 1550, + "Overall tag count in ./tests/documents/sample_rss.xml" + ); }); }); @@ -91,7 +116,10 @@ fn read_namespaced_event(c: &mut Criterion) { } buf.clear(); } - assert_eq!(count, 1550, "Overall tag count in ./tests/documents/sample_rss.xml"); + assert_eq!( + count, 1550, + "Overall tag count in ./tests/documents/sample_rss.xml" + ); }); }); group.finish(); @@ -117,7 +145,10 @@ fn bytes_text_unescaped(c: &mut Criterion) { } buf.clear(); } - assert_eq!(count, 1550, "Overall tag count in ./tests/documents/sample_rss.xml"); + assert_eq!( + count, 1550, + "Overall tag count in ./tests/documents/sample_rss.xml" + ); // Windows has \r\n instead of \n #[cfg(windows)] @@ -152,7 +183,10 @@ fn bytes_text_unescaped(c: &mut Criterion) { } buf.clear(); } - assert_eq!(count, 1550, "Overall tag count in ./tests/documents/sample_rss.xml"); + assert_eq!( + count, 1550, + "Overall tag count in ./tests/documents/sample_rss.xml" + ); // Windows has \r\n instead of \n #[cfg(windows)] @@ -333,11 +367,116 @@ fn attributes(c: &mut Criterion) { group.finish(); } +/// Benchmarks escaping text using XML rules +fn escaping(c: &mut Criterion) { + let mut group = c.benchmark_group("escape_text"); + + group.bench_function("no_chars_to_escape_long", |b| { + b.iter(|| { + criterion::black_box(escape(LOREM_IPSUM_TEXT)); + }) + }); + + group.bench_function("no_chars_to_escape_short", |b| { + b.iter(|| { + criterion::black_box(escape(b"just bit of text")); + }) + }); + + group.bench_function("escaped_chars_short", |b| { + b.iter(|| { + criterion::black_box(escape(b"age > 72 && age < 21")); + criterion::black_box(escape(b"\"what's that?\"")); + }) + }); + + group.bench_function("escaped_chars_long", |b| { + let lorem_ipsum_with_escape_chars = +b"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt +ut labore et dolore magna aliqua. & Hac habitasse platea dictumst vestibulum rhoncus est pellentesque. +Risus ultricies tristique nulla aliquet enim tortor at. Fermentum odio eu feugiat pretium nibh ipsum. +Volutpat sed cras ornare arcu dui. Scelerisque fermentum dui faucibus in ornare quam. Arcu cursus +euismod quis< viverra nibh cras pulvinar mattis. Sed viverra tellus in hac habitasse platea. Quis +commodo odio aenean sed. Cursus in hac habitasse platea dictumst quisque sagittis purus. + +Neque convallis >a cras semper auctor. Sit amet mauris commodo quis imperdiet massa. Ac ut consequat +semper viverra nam libero justo laoreet sit. 'Adipiscing' commodo elit at imperdiet dui accumsan. +Enim lobortis scelerisque fermentum dui faucibus in ornare. Natoque penatibus et magnis dis parturient +montes nascetur ridiculus mus. At lectus urna duis convallis convallis tellus id interdum. Libero +volutpat sed cras ornare arcu dui vivamus arcu. Cursus in hac habitasse platea dictumst quisque sagittis +purus. Consequat id porta nibh venenatis cras sed felis."; + + b.iter(|| { + criterion::black_box(escape(lorem_ipsum_with_escape_chars)); + }) + }); + group.finish(); +} + +/// Benchmarks unescaping text encoded using XML rules +fn unescaping(c: &mut Criterion) { + let mut group = c.benchmark_group("unescape_text"); + + group.bench_function("no_chars_to_unescape_long", |b| { + b.iter(|| { + criterion::black_box(unescape(LOREM_IPSUM_TEXT)).unwrap(); + }) + }); + + group.bench_function("no_chars_to_unescape_short", |b| { + b.iter(|| { + criterion::black_box(unescape(b"just a bit of text")).unwrap(); + }) + }); + + group.bench_function("char_reference", |b| { + b.iter(|| { + let text = b"prefix "some stuff","more stuff""; + criterion::black_box(unescape(text)).unwrap(); + let text = b"&<"; + criterion::black_box(unescape(text)).unwrap(); + }) + }); + + group.bench_function("entity_reference", |b| { + b.iter(|| { + let text = b"age > 72 && age < 21"; + criterion::black_box(unescape(text)).unwrap(); + let text = b""what's that?""; + criterion::black_box(unescape(text)).unwrap(); + }) + }); + + group.bench_function("mixed", |b| { + let text = +b"Lorem ipsum dolor sit amet, &consectetur adipiscing elit, sed do eiusmod tempor incididunt +ut labore et dolore magna aliqua. Hac habitasse platea dictumst vestibulum rhoncus est pellentesque. +Risus ultricies "tristique nulla aliquet enim tortor" at. Fermentum odio eu feugiat pretium +nibh ipsum. Volutpat sed cras ornare arcu dui. Scelerisque fermentum dui faucibus in ornare quam. Arcu +cursus euismod quis <viverra nibh cras pulvinar mattis. Sed viverra tellus in hac habitasse platea. +Quis commodo odio aenean sed. Cursus in hac habitasse platea dictumst quisque sagittis purus. + +Neque convallis a cras semper auctor. Sit amet mauris commodo quis imperdiet massa. Ac ut consequat +semper viverra nam libero justo # laoreet sit. Adipiscing commodo elit at imperdiet dui accumsan. +Enim lobortis scelerisque fermentum dui faucibus in ornare. Natoque penatibus et magnis dis parturient +montes nascetur ridiculus mus. At lectus urna !duis convallis convallis tellus id interdum. Libero +volutpat sed cras ornare arcu dui vivamus arcu. Cursus in hac habitasse platea dictumst quisque sagittis +purus. Consequat id porta nibh venenatis cras sed felis."; + + b.iter(|| { + criterion::black_box(unescape(text)).unwrap(); + }) + }); + group.finish(); +} + criterion_group!( benches, read_event, bytes_text_unescaped, read_namespaced_event, one_event, - attributes + attributes, + escaping, + unescaping, ); diff --git a/tests/test.rs b/tests/test.rs index 45cc618c..36ed5df9 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -264,7 +264,8 @@ fn line_score() { inning: String, } - let res: LineScoreData = quick_xml::de::from_str(include_str!("documents/linescore.xml")).unwrap(); + let res: LineScoreData = + quick_xml::de::from_str(include_str!("documents/linescore.xml")).unwrap(); let expected = LineScoreData { game_pk: 239575, From 84b2f3dc89f29b6141b21b942aea8dd2dad47e0b Mon Sep 17 00:00:00 2001 From: Daniel Alley Date: Wed, 29 Jun 2022 00:35:31 -0400 Subject: [PATCH 4/4] Run benchmarks as tests in CI --- .github/workflows/rust.yml | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index e8f1d13e..1f4869b6 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -3,7 +3,14 @@ name: Rust on: [push, pull_request] jobs: - build: + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Check fmt + run: cargo fmt -- --check + + test: strategy: matrix: platform: [ubuntu-latest, windows-latest] @@ -15,13 +22,14 @@ jobs: RUSTFLAGS: -C instrument-coverage steps: + - uses: actions/checkout@v3 - name: Install coverage reporter (llvm-tools-preview) if: runner.os == 'Linux' run: rustup component add llvm-tools-preview - name: Install coverage reporter (grcov) if: runner.os == 'Linux' run: cargo install grcov - - uses: actions/checkout@v1 + - name: Build run: cargo build - name: Build benchmarks @@ -29,6 +37,9 @@ jobs: - name: Build benchmarks (compare) working-directory: compare run: cargo bench --no-run + - name: Run tests + benchmarks + run: cargo test --all-features --benches --tests + - name: Run tests (no features) env: LLVM_PROFILE_FILE: coverage/no-features-%p-%m.profraw @@ -67,6 +78,4 @@ jobs: flags: unittests verbose: true continue-on-error: true - - name: Check fmt - run: cargo fmt -- --check