From adf873e46af4ce8df3340662ef11de1639eaea98 Mon Sep 17 00:00:00 2001 From: Tpt Date: Sun, 14 Jan 2024 02:05:51 +0500 Subject: [PATCH] Adds NsReader::prefixes to iterate on all the prefixes currently declared This is useful for some use cases like RDF/XML where prefixes might be interesting to extract --- Changelog.md | 2 + src/name.rs | 59 +++++++++++++++++++++ src/reader/ns_reader.rs | 86 +++++++++++++++++++++++++++++- tests/namespaces.rs | 113 +++++++++++++++++++++++++++++++++++++++- 4 files changed, 258 insertions(+), 2 deletions(-) diff --git a/Changelog.md b/Changelog.md index f652b3d6..1ee04a0a 100644 --- a/Changelog.md +++ b/Changelog.md @@ -29,6 +29,7 @@ to get an offset of the error position. For `SyntaxError`s the range - [#362]: Added `escape::minimal_escape()` which escapes only `&` and `<`. - [#362]: Added `BytesCData::minimal_escape()` which escapes only `&` and `<`. - [#362]: Added `Serializer::set_quote_level()` which allow to set desired level of escaping. +- [#705]: Added `NsReader::prefixes()` to list all the prefixes currently declared. ### Bug Fixes @@ -68,6 +69,7 @@ to get an offset of the error position. For `SyntaxError`s the range [#684]: https://github.com/tafia/quick-xml/pull/684 [#689]: https://github.com/tafia/quick-xml/pull/689 [#704]: https://github.com/tafia/quick-xml/pull/704 +[#705]: https://github.com/tafia/quick-xml/pull/705 ## 0.31.0 -- 2023-10-22 diff --git a/src/name.rs b/src/name.rs index 7ac207c1..a2fe50b1 100644 --- a/src/name.rs +++ b/src/name.rs @@ -617,6 +617,65 @@ impl NamespaceResolver { None => ResolveResult::Unbound, } } + + #[inline] + pub fn iter(&self) -> PrefixIter { + PrefixIter { + resolver: self, + // We initialize the cursor to 2 to skip the two default namespaces xml: and xmlns: + bindings_cursor: 2, + } + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Iterator on the current declared prefixes. +/// +/// See [`NsReader::prefixes`](crate::NsReader::prefixes) for documentation. +#[derive(Debug, Clone)] +pub struct PrefixIter<'a> { + resolver: &'a NamespaceResolver, + bindings_cursor: usize, +} + +impl<'a> Iterator for PrefixIter<'a> { + type Item = (PrefixDeclaration<'a>, Namespace<'a>); + + fn next(&mut self) -> Option<(PrefixDeclaration<'a>, Namespace<'a>)> { + while let Some(namespace_entry) = self.resolver.bindings.get(self.bindings_cursor) { + self.bindings_cursor += 1; // We increment for next read + + // We check if the key has not been overridden by having a look + // at the namespaces declared after in the array + let prefix = namespace_entry.prefix(&self.resolver.buffer); + if self.resolver.bindings[self.bindings_cursor..] + .iter() + .any(|ne| prefix == ne.prefix(&self.resolver.buffer)) + { + continue; // Overridden + } + let namespace = if let ResolveResult::Bound(namespace) = + namespace_entry.namespace(&self.resolver.buffer) + { + namespace + } else { + continue; // We don't return unbound namespaces + }; + let prefix = if let Some(Prefix(prefix)) = prefix { + PrefixDeclaration::Named(prefix) + } else { + PrefixDeclaration::Default + }; + return Some((prefix, namespace)); + } + None // We have exhausted the array + } + + fn size_hint(&self) -> (usize, Option) { + // Real count could be less if some namespaces was overridden + (0, Some(self.resolver.bindings.len() - self.bindings_cursor)) + } } #[cfg(test)] diff --git a/src/reader/ns_reader.rs b/src/reader/ns_reader.rs index d5b79e78..028f1364 100644 --- a/src/reader/ns_reader.rs +++ b/src/reader/ns_reader.rs @@ -12,7 +12,7 @@ use std::path::Path; use crate::errors::Result; use crate::events::Event; -use crate::name::{LocalName, NamespaceResolver, QName, ResolveResult}; +use crate::name::{LocalName, NamespaceResolver, PrefixIter, QName, ResolveResult}; use crate::reader::{Config, Reader, Span, XmlSource}; /// A low level encoding-agnostic XML event reader that performs namespace resolution. @@ -48,6 +48,90 @@ impl NsReader { pub fn config_mut(&mut self) -> &mut Config { self.reader.config_mut() } + + /// Returns all the prefixes currently declared except the default `xml` and `xmlns` namespaces. + /// + /// # Examples + /// + /// This example shows what results the returned iterator would return after + /// reading each event of a simple XML. + /// + /// ``` + /// # use pretty_assertions::assert_eq; + /// use quick_xml::name::{Namespace, PrefixDeclaration}; + /// use quick_xml::NsReader; + /// + /// let src = " + /// + /// + /// + /// + /// + /// + /// "; + /// let mut reader = NsReader::from_str(src); + /// reader.config_mut().trim_text(true); + /// // No prefixes at the beginning + /// assert_eq!(reader.prefixes().collect::>(), vec![]); + /// + /// reader.read_resolved_event()?; // + /// // No prefixes declared on root + /// assert_eq!(reader.prefixes().collect::>(), vec![]); + /// + /// reader.read_resolved_event()?; // + /// // Two prefixes declared on "a" + /// assert_eq!(reader.prefixes().collect::>(), vec![ + /// (PrefixDeclaration::Default, Namespace(b"a1")), + /// (PrefixDeclaration::Named(b"a"), Namespace(b"a2")) + /// ]); + /// + /// reader.read_resolved_event()?; // + /// // The default prefix got overridden and new "b" prefix + /// assert_eq!(reader.prefixes().collect::>(), vec![ + /// (PrefixDeclaration::Named(b"a"), Namespace(b"a2")), + /// (PrefixDeclaration::Default, Namespace(b"b1")), + /// (PrefixDeclaration::Named(b"b"), Namespace(b"b2")) + /// ]); + /// + /// reader.read_resolved_event()?; // + /// // Still the same + /// assert_eq!(reader.prefixes().collect::>(), vec![ + /// (PrefixDeclaration::Named(b"a"), Namespace(b"a2")), + /// (PrefixDeclaration::Default, Namespace(b"b1")), + /// (PrefixDeclaration::Named(b"b"), Namespace(b"b2")) + /// ]); + /// + /// reader.read_resolved_event()?; // + /// // Still the same + /// assert_eq!(reader.prefixes().collect::>(), vec![ + /// (PrefixDeclaration::Named(b"a"), Namespace(b"a2")), + /// (PrefixDeclaration::Default, Namespace(b"b1")), + /// (PrefixDeclaration::Named(b"b"), Namespace(b"b2")) + /// ]); + /// + /// reader.read_resolved_event()?; // + /// // got closed so back to the prefixes declared on + /// assert_eq!(reader.prefixes().collect::>(), vec![ + /// (PrefixDeclaration::Default, Namespace(b"a1")), + /// (PrefixDeclaration::Named(b"a"), Namespace(b"a2")) + /// ]); + /// + /// reader.read_resolved_event()?; // + /// // Still the same + /// assert_eq!(reader.prefixes().collect::>(), vec![ + /// (PrefixDeclaration::Default, Namespace(b"a1")), + /// (PrefixDeclaration::Named(b"a"), Namespace(b"a2")) + /// ]); + /// + /// reader.read_resolved_event()?; // + /// // got closed + /// assert_eq!(reader.prefixes().collect::>(), vec![]); + /// # quick_xml::Result::Ok(()) + /// ``` + #[inline] + pub fn prefixes(&self) -> PrefixIter { + self.ns_resolver.iter() + } } /// Private methods diff --git a/tests/namespaces.rs b/tests/namespaces.rs index 749aed7a..a50e2306 100644 --- a/tests/namespaces.rs +++ b/tests/namespaces.rs @@ -2,7 +2,7 @@ use pretty_assertions::assert_eq; use quick_xml::events::attributes::Attribute; use quick_xml::events::Event::*; use quick_xml::name::ResolveResult::*; -use quick_xml::name::{Namespace, QName}; +use quick_xml::name::{Namespace, PrefixDeclaration, QName}; use quick_xml::reader::NsReader; use std::borrow::Cow; @@ -19,6 +19,19 @@ fn namespace() { e ), } + let it1 = r.prefixes(); + let it2 = it1.clone(); + assert_eq!(it1.size_hint(), (0, Some(1))); + assert_eq!( + it1.collect::>(), + vec![(PrefixDeclaration::Named(b"myns"), Namespace(b"www1"))] + ); + + assert_eq!(it2.size_hint(), (0, Some(1))); + assert_eq!( + it2.collect::>(), + vec![(PrefixDeclaration::Named(b"myns"), Namespace(b"www1"))] + ); // match r.read_resolved_event() { @@ -28,11 +41,25 @@ fn namespace() { e ), } + let it = r.prefixes(); + assert_eq!(it.size_hint(), (0, Some(1))); + assert_eq!( + it.collect::>(), + vec![(PrefixDeclaration::Named(b"myns"), Namespace(b"www1"))] + ); + // "in namespace!" match r.read_resolved_event() { Ok((ns, Text(_))) => assert_eq!(ns, Unbound), e => panic!("expecting text content with no namespace, got {:?}", e), } + let it = r.prefixes(); + assert_eq!(it.size_hint(), (0, Some(1))); + assert_eq!( + it.collect::>(), + vec![(PrefixDeclaration::Named(b"myns"), Namespace(b"www1"))] + ); + // match r.read_resolved_event() { Ok((ns, End(_))) => assert_eq!(ns, Bound(Namespace(b"www1"))), @@ -41,12 +68,24 @@ fn namespace() { e ), } + let it = r.prefixes(); + assert_eq!(it.size_hint(), (0, Some(1))); + assert_eq!( + it.collect::>(), + vec![(PrefixDeclaration::Named(b"myns"), Namespace(b"www1"))] + ); // match r.read_resolved_event() { Ok((ns, End(_))) => assert_eq!(ns, Unbound), e => panic!("expecting outer end element with no namespace, got {:?}", e), } + let it = r.prefixes(); + assert_eq!(it.size_hint(), (0, Some(1))); + assert_eq!( + it.collect::>(), + vec![(PrefixDeclaration::Named(b"myns"), Namespace(b"www1"))] + ); } #[test] @@ -62,6 +101,9 @@ fn default_namespace() { e ), } + let it = r.prefixes(); + assert_eq!(it.size_hint(), (0, Some(0))); + assert_eq!(it.collect::>(), vec![]); // match r.read_resolved_event() { @@ -71,6 +113,13 @@ fn default_namespace() { e ), } + let it = r.prefixes(); + assert_eq!(it.size_hint(), (0, Some(1))); + assert_eq!( + it.collect::>(), + vec![(PrefixDeclaration::Default, Namespace(b"www1"))] + ); + // match r.read_resolved_event() { Ok((ns, End(_))) => assert_eq!(ns, Bound(Namespace(b"www1"))), @@ -79,6 +128,12 @@ fn default_namespace() { e ), } + let it = r.prefixes(); + assert_eq!(it.size_hint(), (0, Some(1))); + assert_eq!( + it.collect::>(), + vec![(PrefixDeclaration::Default, Namespace(b"www1"))] + ); // very important: a should not be in any namespace. The default namespace only applies to // the sub-document it is defined on. @@ -86,6 +141,9 @@ fn default_namespace() { Ok((ns, End(_))) => assert_eq!(ns, Unbound), e => panic!("expecting outer end element with no namespace, got {:?}", e), } + let it = r.prefixes(); + assert_eq!(it.size_hint(), (0, Some(0))); + assert_eq!(it.collect::>(), vec![]); } #[test] @@ -101,6 +159,12 @@ fn default_namespace_reset() { e ), } + let it = r.prefixes(); + assert_eq!(it.size_hint(), (0, Some(1))); + assert_eq!( + it.collect::>(), + vec![(PrefixDeclaration::Default, Namespace(b"www1"))] + ); // match r.read_resolved_event() { @@ -110,11 +174,18 @@ fn default_namespace_reset() { e ), } + let it = r.prefixes(); + assert_eq!(it.size_hint(), (0, Some(2))); + assert_eq!(it.collect::>(), vec![]); + // match r.read_resolved_event() { Ok((ns, End(_))) => assert_eq!(ns, Unbound), e => panic!("expecting inner end element with no namespace, got {:?}", e), } + let it = r.prefixes(); + assert_eq!(it.size_hint(), (0, Some(2))); + assert_eq!(it.collect::>(), vec![]); // match r.read_resolved_event() { @@ -124,6 +195,12 @@ fn default_namespace_reset() { e ), } + let it = r.prefixes(); + assert_eq!(it.size_hint(), (0, Some(1))); + assert_eq!( + it.collect::>(), + vec![(PrefixDeclaration::Default, Namespace(b"www1"))] + ); } /// Single empty element with qualified attributes. @@ -163,6 +240,13 @@ fn attributes_empty_ns() { )) ); assert_eq!(attrs.next(), None); + + let it = r.prefixes(); + assert_eq!(it.size_hint(), (0, Some(1))); + assert_eq!( + it.collect::>(), + vec![(PrefixDeclaration::Named(b"r"), Namespace(b"urn:example:r"))] + ); } /// Single empty element with qualified attributes. @@ -204,6 +288,13 @@ fn attributes_empty_ns_expanded() { )) ); assert_eq!(attrs.next(), None); + + let it = r.prefixes(); + assert_eq!(it.size_hint(), (0, Some(1))); + assert_eq!( + it.collect::>(), + vec![(PrefixDeclaration::Named(b"r"), Namespace(b"urn:example:r"))] + ); } match r.read_resolved_event() { @@ -228,6 +319,13 @@ fn default_ns_shadowing_empty() { } e => panic!("Expected Start event (), got {:?}", e), } + + let it = r.prefixes(); + assert_eq!(it.size_hint(), (0, Some(1))); + assert_eq!( + it.collect::>(), + vec![(PrefixDeclaration::Default, Namespace(b"urn:example:o"))] + ); } // @@ -257,6 +355,13 @@ fn default_ns_shadowing_empty() { Some((Unbound, &b"att1"[..], Cow::Borrowed(&b"a"[..]))) ); assert_eq!(attrs.next(), None); + + let it = r.prefixes(); + assert_eq!(it.size_hint(), (0, Some(2))); + assert_eq!( + it.collect::>(), + vec![(PrefixDeclaration::Default, Namespace(b"urn:example:i")),] + ); } // @@ -267,6 +372,12 @@ fn default_ns_shadowing_empty() { } e => panic!("Expected End event (), got {:?}", e), } + let it = r.prefixes(); + assert_eq!(it.size_hint(), (0, Some(1))); + assert_eq!( + it.collect::>(), + vec![(PrefixDeclaration::Default, Namespace(b"urn:example:o"))] + ); } #[test]