Vendor dependencies for 0.3.0 release

This commit is contained in:
2025-09-27 10:29:08 -05:00
parent 0c8d39d483
commit 82ab7f317b
26803 changed files with 16134934 additions and 0 deletions

1
vendor/xml-rs/.cargo-checksum.json vendored Normal file
View File

@@ -0,0 +1 @@
{"files":{"Cargo.lock":"f7b7d1c4c17ab67546d530874d1bbc12f7dd6cc673f8cf1aa8529a38e20dca9c","Cargo.toml":"398ea1a1be4800e60d7abfedb25da60383b815d539a4bf2b107bb9ed152ee4c7","LICENSE":"0dc18d924dc0a5f41172a393012843a5eaaef338e795b3645da9cc3b6068220b","README.md":"b5a3a4d18969fa9173cd89ace4ad3fb74a9d5405a940dd38a1ec62e524c8e26e","src/analyze.rs":"ad2d8cc726cc077c288caf9ac8089dac2598daa3eb54336b4509d0459b1ba417","src/attribute.rs":"1c411c711834944f9a47c554489baebe13133a222dc09d1348c625afd1444612","src/common.rs":"02ac663167f9f3b7db0e282403a290d2c2c019f47435bf720362f0ef4cb18341","src/escape.rs":"1d782f3d5a28ea0f8350e650d0eae91f34b94e6cd32ab4dc9801af6944c850e6","src/lib.rs":"9f68b90c472b7bc35c74d8b22425669b434f12bdd86bf39442cf8b2624907b9d","src/macros.rs":"12adbcd7aba7036cd3dffc9ea5fbde1e2b52a0c20a06e4c342077be483e4125c","src/name.rs":"a233b8e295d242428b22175f4935932d696d52a30402e06d91f3340d2ade7771","src/namespace.rs":"e88b49557530a3e21b358c82bb0ec5f9a9efe811a2e98bac92ac2fac13e19e61","src/reader.rs":"680caf174492f281e0ee2d95d349c55258a82d11bfdc029006c3e232c2e9b3c5","src/reader/config.rs":"b7d277d12dbb05f50e0a26c0fff3b9273dbf2217512e3aa0107cccd2c9bedfb6","src/reader/error.rs":"77b9da83b56e50fd4f3834339edbd73c9553934c1ab9e57ea9520fe21b9e5166","src/reader/events.rs":"bbd661b973fc7458d08fbe7b67756423786b6e0f9a7f2285c6c9fe7c71135296","src/reader/indexset.rs":"c646edf037efdc7160105393fd683b2d851afbada98b1de4aeacd0a514699182","src/reader/lexer.rs":"e6e1d1c62ca62b6159491a63e4bdf77bf4a0ea7e7dfef853c31f690854c3634d","src/reader/parser.rs":"de3f50fc9af6ee509ba48d31f64a0e515147ca49bf89c4f5c7ae0489c764ba51","src/reader/parser/inside_cdata.rs":"bc451ca411d8be8a64b1445dcbbcd05cba6251472888c891468c908d2de31a53","src/reader/parser/inside_closing_tag_name.rs":"84c0dcab91ffc8fc4bb172bbf3c1022c18d44ad19ca1cff5f24fc5eb94caaedf","src/reader/parser/inside_comment.rs":"85d934a45db05fcce04a2ae79d9f78225c026860363bf14045ec15c78c44b6b7","src/reader/parser/inside_declaration.rs":"c3b567a699bcdaf5c67c43dfb65251d05b4168eb9052f30aa98baaad81e5ca7a","src/reader/parser/inside_doctype.rs":"3cba178f7e8dedf76a4edc091eb77ae82581a145fd52d09486cbacac0228c816","src/reader/parser/inside_opening_tag.rs":"f4e910b30c9337412c19904da11601f8c552b172f68329d3f728abd3d03cf9f3","src/reader/parser/inside_processing_instruction.rs":"63d057c721668e01e6f5a7f6f6b54849ffc20d9cca08667b7776a5f5b5bcde15","src/reader/parser/inside_reference.rs":"a9511a1ad09b77e41b905227fffa53eece402f0eefc8e4f22211723d86c03e8e","src/reader/parser/outside_tag.rs":"b433f49dde157be672cfe85399f2a26e1757fa199b833aacab7a6b5d5b969a5c","src/util.rs":"93dbb17773d66405c985ba5e1db2d57a72f8b7c88246e74b05b8d20946307636","src/writer.rs":"8a12052563fbd5e28fe9d345ae0cf94b2b24a8a6123e27ce263e28e3355b7307","src/writer/config.rs":"edcd7a68c33454bb7dfcd176bbc860bbb03b5125a45db0188d6d6d4db1c8f6bd","src/writer/emitter.rs":"78d14363a0a0048389300b296ef4398902ba04abd90021aeddca5ea578c403f5","src/writer/events.rs":"f3f97140a2fad700f8a972698140d93ea31603760f986ede18b2e22c9cab286b"},"package":"6fd8403733700263c6eb89f192880191f1b83e332f7a20371ddcf421c4a337c7"}

7
vendor/xml-rs/Cargo.lock generated vendored Normal file
View File

@@ -0,0 +1,7 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "xml-rs"
version = "0.8.27"

61
vendor/xml-rs/Cargo.toml vendored Normal file
View File

@@ -0,0 +1,61 @@
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g., crates.io) dependencies.
#
# If you are reading this file be aware that the original Cargo.toml
# will likely look very different (and much more reasonable).
# See Cargo.toml.orig for the original contents.
[package]
edition = "2021"
rust-version = "1.61"
name = "xml-rs"
version = "0.8.27"
authors = ["Vladimir Matveev <vmatveev@citrine.cc>"]
build = false
include = [
"src/**",
"LICENSE",
"README.md",
]
autolib = false
autobins = false
autoexamples = false
autotests = false
autobenches = false
description = "An XML library in pure Rust"
homepage = "https://lib.rs/crates/xml-rs"
documentation = "https://docs.rs/xml-rs/"
readme = "README.md"
keywords = [
"xml",
"parser",
"sax",
"parsing",
"writer",
]
categories = ["parser-implementations"]
license = "MIT"
repository = "https://github.com/kornelski/xml-rs"
[package.metadata.docs.rs]
rustdoc-args = ["--generate-link-to-definition"]
targets = ["x86_64-unknown-linux-gnu"]
[package.metadata.release]
tag-message = ""
tag-name = "{{version}}"
[badges.maintenance]
status = "actively-developed"
[lib]
name = "xml"
path = "src/lib.rs"
[[bin]]
name = "xml-analyze"
path = "src/analyze.rs"

21
vendor/xml-rs/LICENSE vendored Normal file
View File

@@ -0,0 +1,21 @@
The MIT License (MIT)
Copyright (c) 2014 Vladimir Matveev
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

216
vendor/xml-rs/README.md vendored Normal file
View File

@@ -0,0 +1,216 @@
xml-rs, an XML library for Rust
===============================
[![CI](https://github.com/kornelski/xml-rs/actions/workflows/main.yml/badge.svg)](https://github.com/kornelski/xml-rs/actions/workflows/main.yml)
[![crates.io][crates-io-img]](https://lib.rs/crates/xml-rs)
[![docs][docs-img]](https://docs.rs/xml-rs/)
[Documentation](https://docs.rs/xml-rs/)
[crates-io-img]: https://img.shields.io/crates/v/xml-rs.svg
[docs-img]: https://img.shields.io/badge/docs-latest%20release-6495ed.svg
xml-rs is an XML library for the [Rust](https://www.rust-lang.org/) programming language.
It supports reading and writing of XML documents in a streaming fashion (without DOM).
### Features
* XML spec conformance better than other pure-Rust libraries.
* Easy to use API based on `Iterator`s and regular `String`s without tricky lifetimes.
* Support for UTF-16, UTF-8, ISO-8859-1, and ASCII encodings.
* Written entirely in the safe Rust subset. Designed to safely handle untrusted input.
The API is heavily inspired by Java Streaming API for XML ([StAX][stax]). It contains a pull parser much like StAX event reader. It provides an iterator API, so you can leverage Rust's existing iterators library features.
[stax]: https://en.wikipedia.org/wiki/StAX
It also provides a streaming document writer much like StAX event writer.
This writer consumes its own set of events, but reader events can be converted to
writer events easily, and so it is possible to write XML transformation chains in a pretty
clean manner.
This parser is mostly full-featured, however, there are limitations:
* Legacy code pages and non-Unicode encodings are not supported;
* DTD validation is not supported (but entities defined in the internal subset are supported);
* attribute value normalization is not performed, and end-of-line characters are not normalized either.
Other than that the parser tries to be mostly XML-1.1-compliant.
Writer is also mostly full-featured with the following limitations:
* no support for encodings other than UTF-8,
* no support for emitting `<!DOCTYPE>` declarations;
* more validations of input are needed, for example, checking that namespace prefixes are bounded
or comments are well-formed.
Building and using
------------------
xml-rs uses [Cargo](https://crates.io), so add it with `cargo add xml` or modify `Cargo.toml`:
```toml
[dependencies]
xml = "0.8.20"
```
The package exposes a single crate called `xml`.
Reading XML documents
---------------------
[`xml::reader::EventReader`](EventReader) requires a [`Read`](stdread) instance to read from. It can be a `File` wrapped in `BufReader`, or a `Vec<u8>`, or a `&[u8]` slice.
[EventReader]: https://docs.rs/xml-rs/latest/xml/reader/struct.EventReader.html
[stdread]: https://doc.rust-lang.org/stable/std/io/trait.Read.html
`EventReader` implements `IntoIterator` trait, so you can use it in a `for` loop directly:
```rust,no_run
use std::fs::File;
use std::io::BufReader;
use xml::reader::{EventReader, XmlEvent};
fn main() -> std::io::Result<()> {
let file = File::open("file.xml")?;
let file = BufReader::new(file); // Buffering is important for performance
let parser = EventReader::new(file);
let mut depth = 0;
for e in parser {
match e {
Ok(XmlEvent::StartElement { name, .. }) => {
println!("{:spaces$}+{name}", "", spaces = depth * 2);
depth += 1;
}
Ok(XmlEvent::EndElement { name }) => {
depth -= 1;
println!("{:spaces$}-{name}", "", spaces = depth * 2);
}
Err(e) => {
eprintln!("Error: {e}");
break;
}
// There's more: https://docs.rs/xml-rs/latest/xml/reader/enum.XmlEvent.html
_ => {}
}
}
Ok(())
}
```
Document parsing can end normally or with an error. Regardless of exact cause, the parsing
process will be stopped, and the iterator will terminate normally.
You can also have finer control over when to pull the next event from the parser using its own
`next()` method:
```rust,ignore
match parser.next() {
...
}
```
Upon the end of the document or an error, the parser will remember the last event and will always
return it in the result of `next()` call afterwards. If iterator is used, then it will yield
error or end-of-document event once and will produce `None` afterwards.
It is also possible to tweak parsing process a little using [`xml::reader::ParserConfig`][ParserConfig] structure.
See its documentation for more information and examples.
[ParserConfig]: https://docs.rs/xml-rs/latest/xml/reader/struct.ParserConfig.html
You can find a more extensive example of using `EventReader` in `src/analyze.rs`, which is a
small program (BTW, it is built with `cargo build` and can be run after that) which shows various
statistics about specified XML document. It can also be used to check for well-formedness of
XML documents - if a document is not well-formed, this program will exit with an error.
## Parsing untrusted inputs
The parser is written in safe Rust subset, so by Rust's guarantees the worst that it can do is to cause a panic.
You can use `ParserConfig` to set limits on maximum lenghts of names, attributes, text, entities, etc.
You should also set a maximum document size via `io::Read`'s [`take(max)`](https://doc.rust-lang.org/stable/std/io/trait.Read.html#method.take) method.
Writing XML documents
---------------------
xml-rs also provides a streaming writer much like StAX event writer. With it you can write an
XML document to any `Write` implementor.
```rust,no_run
use std::io;
use xml::writer::{EmitterConfig, XmlEvent};
/// A simple demo syntax where "+foo" makes `<foo>`, "-foo" makes `</foo>`
fn make_event_from_line(line: &str) -> XmlEvent {
let line = line.trim();
if let Some(name) = line.strip_prefix("+") {
XmlEvent::start_element(name).into()
} else if line.starts_with("-") {
XmlEvent::end_element().into()
} else {
XmlEvent::characters(line).into()
}
}
fn main() -> io::Result<()> {
let input = io::stdin();
let output = io::stdout();
let mut writer = EmitterConfig::new()
.perform_indent(true)
.create_writer(output);
let mut line = String::new();
loop {
line.clear();
let bytes_read = input.read_line(&mut line)?;
if bytes_read == 0 {
break; // EOF
}
let event = make_event_from_line(&line);
if let Err(e) = writer.write(event) {
panic!("Write error: {e}")
}
}
Ok(())
}
```
The code example above also demonstrates how to create a writer out of its configuration.
Similar thing also works with `EventReader`.
The library provides an XML event building DSL which helps to construct complex events,
e.g. ones having namespace definitions. Some examples:
```rust,ignore
// <a:hello a:param="value" xmlns:a="urn:some:document">
XmlEvent::start_element("a:hello").attr("a:param", "value").ns("a", "urn:some:document")
// <hello b:config="name" xmlns="urn:default:uri">
XmlEvent::start_element("hello").attr("b:config", "value").default_ns("urn:defaul:uri")
// <![CDATA[some unescaped text]]>
XmlEvent::cdata("some unescaped text")
```
Of course, one can create `XmlEvent` enum variants directly instead of using the builder DSL.
There are more examples in [`xml::writer::XmlEvent`][XmlEvent] documentation.
[XmlEvent]: https://docs.rs/xml-rs/latest/xml/reader/enum.XmlEvent.html
The writer has multiple configuration options; see `EmitterConfig` documentation for more
information.
[EmitterConfig]: https://docs.rs/xml-rs/latest/xml/writer/struct.EmitterConfig.html
Bug reports
------------
Please report issues at: <https://github.com/kornelski/xml-rs/issues>.

84
vendor/xml-rs/src/analyze.rs vendored Normal file
View File

@@ -0,0 +1,84 @@
#![forbid(unsafe_code)]
use std::collections::HashSet;
use std::fs::File;
use std::io::{self, BufReader, Read};
use std::{cmp, env};
use xml::reader::XmlEvent;
use xml::ParserConfig;
fn main() -> Result<(), Box<dyn std::error::Error>> {
let mut file;
let mut stdin;
let source: &mut dyn Read = if let Some(file_name) = env::args().nth(1) {
file = File::open(file_name).map_err(|e| format!("Cannot open input file: {e}"))?;
&mut file
} else {
stdin = io::stdin();
&mut stdin
};
let reader = ParserConfig::new()
.whitespace_to_characters(true)
.ignore_comments(false)
.create_reader(BufReader::new(source));
let mut processing_instructions = 0;
let mut elements = 0;
let mut character_blocks = 0;
let mut cdata_blocks = 0;
let mut characters = 0;
let mut comment_blocks = 0;
let mut comment_characters = 0;
let mut namespaces = HashSet::new();
let mut depth = 0;
let mut max_depth = 0;
for e in reader {
let e = e.map_err(|e| format!("Error parsing XML document: {e}"))?;
match e {
XmlEvent::StartDocument { version, encoding, standalone } =>
println!(
"XML document version {}, encoded in {}, {}standalone",
version, encoding, if standalone.unwrap_or(false) { "" } else { "not " }
),
XmlEvent::EndDocument => println!("Document finished"),
XmlEvent::ProcessingInstruction { .. } => processing_instructions += 1,
XmlEvent::Whitespace(_) => {}, // can't happen due to configuration
XmlEvent::Characters(s) => {
character_blocks += 1;
characters += s.len();
},
XmlEvent::CData(s) => {
cdata_blocks += 1;
characters += s.len();
},
XmlEvent::Comment(s) => {
comment_blocks += 1;
comment_characters += s.len();
},
XmlEvent::StartElement { namespace, .. } => {
depth += 1;
max_depth = cmp::max(max_depth, depth);
elements += 1;
namespaces.extend(namespace.0.into_values());
},
XmlEvent::EndElement { .. } => {
depth -= 1;
},
};
}
namespaces.remove(xml::namespace::NS_EMPTY_URI);
namespaces.remove(xml::namespace::NS_XMLNS_URI);
namespaces.remove(xml::namespace::NS_XML_URI);
println!("Elements: {elements}, maximum depth: {max_depth}");
println!("Namespaces (excluding built-in): {}", namespaces.len());
println!("Characters: {characters}, characters blocks: {character_blocks}, CDATA blocks: {cdata_blocks}");
println!("Comment blocks: {comment_blocks}, comment characters: {comment_characters}");
println!("Processing instructions (excluding built-in): {processing_instructions}");
Ok(())
}

99
vendor/xml-rs/src/attribute.rs vendored Normal file
View File

@@ -0,0 +1,99 @@
//! Contains XML attributes manipulation types and functions.
use std::fmt;
use crate::escape::{AttributeEscapes, Escaped};
use crate::name::{Name, OwnedName};
/// A borrowed version of an XML attribute.
///
/// Consists of a borrowed qualified name and a borrowed string value.
#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)]
pub struct Attribute<'a> {
/// Attribute name.
pub name: Name<'a>,
/// Attribute value.
pub value: &'a str,
}
impl fmt::Display for Attribute<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}=\"{}\"", self.name, Escaped::<AttributeEscapes>::new(self.value))
}
}
impl<'a> Attribute<'a> {
/// Creates an owned attribute out of this borrowed one.
#[inline]
#[must_use]
pub fn to_owned(&self) -> OwnedAttribute {
OwnedAttribute {
name: self.name.into(),
value: self.value.into(),
}
}
/// Creates a borrowed attribute using the provided borrowed name and a borrowed string value.
#[inline]
#[must_use]
pub const fn new(name: Name<'a>, value: &'a str) -> Self {
Attribute { name, value }
}
}
/// An owned version of an XML attribute.
///
/// Consists of an owned qualified name and an owned string value.
#[derive(Clone, Eq, PartialEq, Hash, Debug)]
pub struct OwnedAttribute {
/// Attribute name.
pub name: OwnedName,
/// Attribute value.
pub value: String,
}
impl OwnedAttribute {
/// Returns a borrowed `Attribute` out of this owned one.
#[must_use]
#[inline]
pub fn borrow(&self) -> Attribute<'_> {
Attribute {
name: self.name.borrow(),
value: &self.value,
}
}
/// Creates a new owned attribute using the provided owned name and an owned string value.
#[inline]
pub fn new<S: Into<String>>(name: OwnedName, value: S) -> Self {
Self { name, value: value.into() }
}
}
impl fmt::Display for OwnedAttribute {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}=\"{}\"", self.name, Escaped::<AttributeEscapes>::new(&self.value))
}
}
#[cfg(test)]
mod tests {
use super::Attribute;
use crate::name::Name;
#[test]
fn attribute_display() {
let attr = Attribute::new(
Name::qualified("attribute", "urn:namespace", Some("n")),
"its value with > & \" ' < weird symbols",
);
assert_eq!(
&*attr.to_string(),
"{urn:namespace}n:attribute=\"its value with &gt; &amp; &quot; &apos; &lt; weird symbols\""
);
}
}

164
vendor/xml-rs/src/common.rs vendored Normal file
View File

@@ -0,0 +1,164 @@
//! Contains common types and functions used throughout the library.
use std::fmt;
/// Represents a position inside some textual document.
#[derive(Copy, Clone, PartialEq, Eq)]
pub struct TextPosition {
/// Row, counting from 0
pub row: u64,
/// Column, counting from 0
pub column: u64,
}
impl TextPosition {
/// Creates a new position initialized to the beginning of the document
#[inline]
#[must_use]
pub const fn new() -> Self {
Self { row: 0, column: 0 }
}
/// Advances the position in a line
#[inline]
pub fn advance(&mut self, count: u8) {
self.column += u64::from(count);
}
/// Advances the position in a line to the next tab position
#[inline]
pub fn advance_to_tab(&mut self, width: u8) {
let width = u64::from(width);
self.column += width - self.column % width;
}
/// Advances the position to the beginning of the next line
#[inline]
pub fn new_line(&mut self) {
self.column = 0;
self.row += 1;
}
}
impl fmt::Debug for TextPosition {
#[cold]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}:{}", self.row + 1, self.column + 1)
}
}
impl fmt::Display for TextPosition {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}:{}", self.row + 1, self.column + 1)
}
}
/// Get the position in the document corresponding to the object
///
/// This trait is implemented by parsers, lexers and errors.
pub trait Position {
/// Returns the current position or a position corresponding to the object.
fn position(&self) -> TextPosition;
}
impl Position for TextPosition {
#[inline]
fn position(&self) -> TextPosition {
*self
}
}
/// XML version enumeration.
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
pub enum XmlVersion {
/// XML version 1.0.
Version10,
/// XML version 1.1.
Version11,
}
impl fmt::Display for XmlVersion {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match *self {
Self::Version10 => "1.0",
Self::Version11 => "1.1",
}.fmt(f)
}
}
impl fmt::Debug for XmlVersion {
#[cold]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(self, f)
}
}
/// Checks whether the given character is a white space character (`S`)
/// as is defined by XML 1.1 specification, [section 2.3][1].
///
/// [1]: http://www.w3.org/TR/2006/REC-xml11-20060816/#sec-common-syn
#[must_use]
#[inline]
pub const fn is_whitespace_char(c: char) -> bool {
matches!(c, '\x20' | '\x0a' | '\x09' | '\x0d')
}
/// Checks whether the given string is compound only by white space
/// characters (`S`) using the previous `is_whitespace_char` to check
/// all characters of this string
pub fn is_whitespace_str(s: &str) -> bool {
s.chars().all(is_whitespace_char)
}
/// Is it a valid character in XML 1.0
#[must_use]
pub const fn is_xml10_char(c: char) -> bool {
matches!(c, '\u{09}' | '\u{0A}' | '\u{0D}' | '\u{20}'..='\u{D7FF}' | '\u{E000}'..='\u{FFFD}' | '\u{10000}'..)
}
/// Is it a valid character in XML 1.1
#[must_use]
pub const fn is_xml11_char(c: char) -> bool {
matches!(c, '\u{01}'..='\u{D7FF}' | '\u{E000}'..='\u{FFFD}' | '\u{10000}'..)
}
/// Is it a valid character in XML 1.1 but not part of the restricted character set
#[must_use]
pub const fn is_xml11_char_not_restricted(c: char) -> bool {
is_xml11_char(c) &&
!matches!(c, '\u{01}'..='\u{08}' | '\u{0B}'..='\u{0C}' | '\u{0E}'..='\u{1F}' | '\u{7F}'..='\u{84}' | '\u{86}'..='\u{9F}')
}
/// Checks whether the given character is a name start character (`NameStartChar`)
/// as is defined by XML 1.1 specification, [section 2.3][1].
///
/// [1]: http://www.w3.org/TR/2006/REC-xml11-20060816/#sec-common-syn
#[must_use]
pub const fn is_name_start_char(c: char) -> bool {
matches!(c,
':' | 'A'..='Z' | '_' | 'a'..='z' |
'\u{C0}'..='\u{D6}' | '\u{D8}'..='\u{F6}' | '\u{F8}'..='\u{2FF}' |
'\u{370}'..='\u{37D}' | '\u{37F}'..='\u{1FFF}' |
'\u{200C}'..='\u{200D}' | '\u{2070}'..='\u{218F}' |
'\u{2C00}'..='\u{2FEF}' | '\u{3001}'..='\u{D7FF}' |
'\u{F900}'..='\u{FDCF}' | '\u{FDF0}'..='\u{FFFD}' |
'\u{10000}'..='\u{EFFFF}'
)
}
/// Checks whether the given character is a name character (`NameChar`)
/// as is defined by XML 1.1 specification, [section 2.3][1].
///
/// [1]: http://www.w3.org/TR/2006/REC-xml11-20060816/#sec-common-syn
#[must_use]
pub const fn is_name_char(c: char) -> bool {
if is_name_start_char(c) {
return true;
}
matches!(c,
'-' | '.' | '0'..='9' | '\u{B7}' |
'\u{300}'..='\u{36F}' | '\u{203F}'..='\u{2040}'
)
}

162
vendor/xml-rs/src/escape.rs vendored Normal file
View File

@@ -0,0 +1,162 @@
//! Contains functions for performing XML special characters escaping.
use std::borrow::Cow;
use std::fmt::{Display, Formatter, Result};
use std::marker::PhantomData;
pub(crate) trait Escapes {
fn escape(c: u8) -> Option<&'static str>;
fn byte_needs_escaping(c: u8) -> bool {
Self::escape(c).is_some()
}
fn str_needs_escaping(s: &str) -> bool {
s.bytes().any(|c| Self::escape(c).is_some())
}
}
pub(crate) struct Escaped<'a, E: Escapes> {
_escape_phantom: PhantomData<E>,
to_escape: &'a str,
}
impl<'a, E: Escapes> Escaped<'a, E> {
pub const fn new(s: &'a str) -> Self {
Escaped {
_escape_phantom: PhantomData,
to_escape: s,
}
}
}
impl<E: Escapes> Display for Escaped<'_, E> {
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
let mut total_remaining = self.to_escape;
// find the next occurence
while let Some(n) = total_remaining.bytes().position(E::byte_needs_escaping) {
let (start, remaining) = total_remaining.split_at(n);
f.write_str(start)?;
// unwrap is safe because we checked is_some for position n earlier
let next_byte = remaining.bytes().next().unwrap();
let replacement = E::escape(next_byte).unwrap_or("unexpected token");
f.write_str(replacement)?;
total_remaining = &remaining[1..];
}
f.write_str(total_remaining)
}
}
fn escape_str<E: Escapes>(s: &str) -> Cow<'_, str> {
if E::str_needs_escaping(s) {
Cow::Owned(Escaped::<E>::new(s).to_string())
} else {
Cow::Borrowed(s)
}
}
macro_rules! escapes {
{
$name: ident,
$($k: expr => $v: expr),* $(,)?
} => {
pub(crate) struct $name;
impl Escapes for $name {
fn escape(c: u8) -> Option<&'static str> {
match c {
$( $k => Some($v),)*
_ => None
}
}
}
};
}
escapes!(
AttributeEscapes,
b'<' => "&lt;",
b'>' => "&gt;",
b'"' => "&quot;",
b'\'' => "&apos;",
b'&' => "&amp;",
b'\n' => "&#xA;",
b'\r' => "&#xD;",
);
escapes!(
PcDataEscapes,
b'<' => "&lt;",
b'>' => "&gt;",
b'&' => "&amp;",
);
/// Performs escaping of common XML characters inside an attribute value.
///
/// This function replaces several important markup characters with their
/// entity equivalents:
///
/// * `<` → `&lt;`
/// * `>` → `&gt;`
/// * `"` → `&quot;`
/// * `'` → `&apos;`
/// * `&` → `&amp;`
///
/// The following characters are escaped so that attributes are printed on
/// a single line:
/// * `\n` → `&#xA;`
/// * `\r` → `&#xD;`
///
/// The resulting string is safe to use inside XML attribute values or in PCDATA sections.
///
/// Does not perform allocations if the given string does not contain escapable characters.
#[inline]
#[must_use]
pub fn escape_str_attribute(s: &str) -> Cow<'_, str> {
escape_str::<AttributeEscapes>(s)
}
/// Performs escaping of common XML characters inside PCDATA.
///
/// This function replaces several important markup characters with their
/// entity equivalents:
///
/// * `<` → `&lt;`
/// * `&` → `&amp;`
///
/// The resulting string is safe to use inside PCDATA sections but NOT inside attribute values.
///
/// Does not perform allocations if the given string does not contain escapable characters.
#[inline]
#[must_use]
pub fn escape_str_pcdata(s: &str) -> Cow<'_, str> {
escape_str::<PcDataEscapes>(s)
}
#[cfg(test)]
mod tests {
use super::{escape_str_attribute, escape_str_pcdata};
#[test]
fn test_escape_str_attribute() {
assert_eq!(escape_str_attribute("<>'\"&\n\r"), "&lt;&gt;&apos;&quot;&amp;&#xA;&#xD;");
assert_eq!(escape_str_attribute("no_escapes"), "no_escapes");
}
#[test]
fn test_escape_str_pcdata() {
assert_eq!(escape_str_pcdata("<>&"), "&lt;&gt;&amp;");
assert_eq!(escape_str_pcdata("no_escapes"), "no_escapes");
}
#[test]
fn test_escape_multibyte_code_points() {
assert_eq!(escape_str_attribute("☃<"), "☃&lt;");
assert_eq!(escape_str_pcdata("☃<"), "☃&lt;");
}
}

27
vendor/xml-rs/src/lib.rs vendored Normal file
View File

@@ -0,0 +1,27 @@
#![warn(missing_docs)]
#![forbid(non_camel_case_types)]
#![forbid(unsafe_code)]
#![allow(clippy::redundant_closure_for_method_calls)]
#![allow(clippy::module_name_repetitions)]
//! This crate currently provides an almost XML 1.0/1.1-compliant pull parser.
//!
//! Please note that functions of this parser may panic.
//! If a panic could cause a Denial Of Service in your codebase, *you're* responsible for wrapping access to this library in `catch_unwind`.
#![cfg_attr(doctest, doc = include_str!("../README.md"))]
pub use crate::reader::{EventReader, ParserConfig};
pub use crate::util::Encoding;
pub use crate::writer::{EmitterConfig, EventWriter};
pub mod attribute;
pub mod common;
pub mod escape;
#[doc(hidden)] // FIXME: not supposed to be public
pub mod macros;
pub mod name;
pub mod namespace;
pub mod reader;
mod util;
pub mod writer;

63
vendor/xml-rs/src/macros.rs vendored Normal file
View File

@@ -0,0 +1,63 @@
#![macro_use]
//! Contains several macros used in this crate.
macro_rules! gen_setter {
($(#[$comments:meta])* $field:ident : into $t:ty) => {
$(#[$comments])*
///
/// <small>See [`ParserConfig`][crate::ParserConfig] fields docs for details</small>
#[inline]
#[must_use]
pub fn $field<T: Into<$t>>(mut self, value: T) -> Self {
self.$field = value.into();
self
}
};
($(#[$comments:meta])* $field:ident : val $t:ty) => {
$(#[$comments])*
///
/// <small>See [`ParserConfig`][crate::ParserConfig] fields docs for details</small>
#[inline]
#[must_use]
pub const fn $field(mut self, value: $t) -> Self {
self.$field = value;
self
}
};
($(#[$comments:meta])* $field:ident : delegate $t:ty) => {
$(#[$comments])*
///
/// <small>See [`ParserConfig`][crate::ParserConfig] fields docs for details</small>
#[inline]
#[must_use]
pub const fn $field(mut self, value: $t) -> Self {
self.c.$field = value;
self
}
};
($(#[$comments:meta])* $field:ident : c2 $t:ty) => {
$(#[$comments])*
///
/// <small>See [`ParserConfig2`][crate::reader::ParserConfig2] fields docs for details</small>
#[inline]
#[must_use]
pub fn $field(self, value: $t) -> ParserConfig2 {
ParserConfig2 {
c: self,
..Default::default()
}
.$field(value)
}
};
}
macro_rules! gen_setters {
($target:ident, $($(#[$comments:meta])* $field:ident : $k:tt $tpe:ty),+) => (
impl $target {$(
gen_setter! { $(#[$comments])* $field : $k $tpe }
)+
})
}

310
vendor/xml-rs/src/name.rs vendored Normal file
View File

@@ -0,0 +1,310 @@
//! Contains XML qualified names manipulation types and functions.
use std::fmt;
use std::str::FromStr;
use crate::namespace::NS_NO_PREFIX;
/// Represents a qualified XML name.
///
/// A qualified name always consists at least of a local name. It can optionally contain
/// a prefix; when reading an XML document, if it contains a prefix, it must also contain a
/// namespace URI, but this is not enforced statically; see below. The name can contain a
/// namespace without a prefix; in that case a default, empty prefix is assumed.
///
/// When writing XML documents, it is possible to omit the namespace URI, leaving only
/// the prefix. In this case the writer will check that the specifed prefix is bound to some
/// URI in the current namespace context. If both prefix and namespace URI are specified,
/// it is checked that the current namespace context contains this exact correspondence
/// between prefix and namespace URI.
///
/// # Prefixes and URIs
///
/// A qualified name with a prefix must always contain a proper namespace URI --- names with
/// a prefix but without a namespace associated with that prefix are meaningless. However,
/// it is impossible to obtain proper namespace URI by a prefix without a context, and such
/// context is only available when parsing a document (or it can be constructed manually
/// when writing a document). Tying a name to a context statically seems impractical. This
/// may change in future, though.
///
/// # Conversions
///
/// `Name` implements some `From` instances for conversion from strings and tuples. For example:
///
/// ```rust
/// # use xml::name::Name;
/// let n1: Name = "p:some-name".into();
/// let n2: Name = ("p", "some-name").into();
///
/// assert_eq!(n1, n2);
/// assert_eq!(n1.local_name, "some-name");
/// assert_eq!(n1.prefix, Some("p"));
/// assert!(n1.namespace.is_none());
/// ```
///
/// This is added to support easy specification of XML elements when writing XML documents.
#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
pub struct Name<'a> {
/// A local name, e.g. `string` in `xsi:string`.
pub local_name: &'a str,
/// A namespace URI, e.g. `http://www.w3.org/2000/xmlns/`.
pub namespace: Option<&'a str>,
/// A name prefix, e.g. `xsi` in `xsi:string`.
pub prefix: Option<&'a str>,
}
impl<'a> From<&'a str> for Name<'a> {
fn from(s: &'a str) -> Self {
if let Some((prefix, name)) = s.split_once(':') {
Name::prefixed(name, prefix)
} else {
Name::local(s)
}
}
}
impl<'a> From<(&'a str, &'a str)> for Name<'a> {
fn from((prefix, name): (&'a str, &'a str)) -> Self {
Name::prefixed(name, prefix)
}
}
impl fmt::Display for Name<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if let Some(namespace) = self.namespace {
write!(f, "{{{namespace}}}")?;
}
if let Some(prefix) = self.prefix {
write!(f, "{prefix}:")?;
}
f.write_str(self.local_name)
}
}
impl<'a> Name<'a> {
/// Returns an owned variant of the qualified name.
#[must_use]
pub fn to_owned(&self) -> OwnedName {
OwnedName {
local_name: self.local_name.into(),
namespace: self.namespace.map(std::convert::Into::into),
prefix: self.prefix.map(std::convert::Into::into),
}
}
/// Returns a new `Name` instance representing plain local name.
#[inline]
#[must_use]
pub const fn local(local_name: &str) -> Name<'_> {
Name {
local_name,
prefix: None,
namespace: None,
}
}
/// Returns a new `Name` instance with the given local name and prefix.
#[inline]
#[must_use]
pub const fn prefixed(local_name: &'a str, prefix: &'a str) -> Self {
Name {
local_name,
namespace: None,
prefix: Some(prefix),
}
}
/// Returns a new `Name` instance representing a qualified name with or without a prefix and
/// with a namespace URI.
#[inline]
#[must_use]
pub const fn qualified(local_name: &'a str, namespace: &'a str, prefix: Option<&'a str>) -> Self {
Name {
local_name,
namespace: Some(namespace),
prefix,
}
}
/// Returns a correct XML representation of this local name and prefix.
///
/// This method is different from the autoimplemented `to_string()` because it does not
/// include namespace URI in the result.
#[must_use]
pub fn to_repr(&self) -> String {
self.repr_display().to_string()
}
/// Returns a structure which can be displayed with `std::fmt` machinery to obtain this
/// local name and prefix.
///
/// This method is needed for efficiency purposes in order not to create unnecessary
/// allocations.
#[inline]
#[must_use]
pub const fn repr_display(&self) -> ReprDisplay<'_, '_> {
ReprDisplay(self)
}
/// Returns either a prefix of this name or `namespace::NS_NO_PREFIX` constant.
#[inline]
#[must_use]
pub fn prefix_repr(&self) -> &str {
self.prefix.unwrap_or(NS_NO_PREFIX)
}
}
/// A wrapper around `Name` whose `Display` implementation prints the wrapped name as it is
/// displayed in an XML document.
pub struct ReprDisplay<'a, 'b>(&'a Name<'b>);
impl<'a, 'b: 'a> fmt::Display for ReprDisplay<'a, 'b> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self.0.prefix {
Some(prefix) => write!(f, "{}:{}", prefix, self.0.local_name),
None => self.0.local_name.fmt(f),
}
}
}
/// An owned variant of `Name`.
///
/// Everything about `Name` applies to this structure as well.
#[derive(Clone, PartialEq, Eq, Hash, Debug)]
pub struct OwnedName {
/// A local name, e.g. `string` in `xsi:string`.
pub local_name: String,
/// A namespace URI, e.g. `http://www.w3.org/2000/xmlns/`.
pub namespace: Option<String>,
/// A name prefix, e.g. `xsi` in `xsi:string`.
pub prefix: Option<String>,
}
impl fmt::Display for OwnedName {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(&self.borrow(), f)
}
}
impl OwnedName {
/// Constructs a borrowed `Name` based on this owned name.
#[must_use]
#[inline]
pub fn borrow(&self) -> Name<'_> {
Name {
local_name: &self.local_name,
namespace: self.namespace.as_deref(),
prefix: self.prefix.as_deref(),
}
}
/// Returns a new `OwnedName` instance representing a plain local name.
#[inline]
pub fn local<S>(local_name: S) -> Self where S: Into<String> {
Self {
local_name: local_name.into(),
namespace: None,
prefix: None,
}
}
/// Returns a new `OwnedName` instance representing a qualified name with or without
/// a prefix and with a namespace URI.
#[inline]
pub fn qualified<S1, S2, S3>(local_name: S1, namespace: S2, prefix: Option<S3>) -> Self
where S1: Into<String>, S2: Into<String>, S3: Into<String>
{
Self {
local_name: local_name.into(),
namespace: Some(namespace.into()),
prefix: prefix.map(std::convert::Into::into),
}
}
/// Returns an optional prefix by reference, equivalent to `self.borrow().prefix`
/// but avoids extra work.
#[inline]
#[must_use]
pub fn prefix_ref(&self) -> Option<&str> {
self.prefix.as_deref()
}
/// Returns an optional namespace by reference, equivalen to `self.borrow().namespace`
/// but avoids extra work.
#[inline]
#[must_use]
pub fn namespace_ref(&self) -> Option<&str> {
self.namespace.as_deref()
}
}
impl<'a> From<Name<'a>> for OwnedName {
#[inline]
fn from(n: Name<'a>) -> Self {
n.to_owned()
}
}
impl FromStr for OwnedName {
type Err = ();
/// Parses the given string slice into a qualified name.
///
/// This function, when finishes sucessfully, always return a qualified
/// name without a namespace (`name.namespace == None`). It should be filled later
/// using proper `NamespaceStack`.
///
/// It is supposed that all characters in the argument string are correct
/// as defined by the XML specification. No additional checks except a check
/// for emptiness are done.
fn from_str(s: &str) -> Result<Self, ()> {
let mut it = s.split(':');
let r = match (it.next(), it.next(), it.next()) {
(Some(prefix), Some(local_name), None) if !prefix.is_empty() &&
!local_name.is_empty() =>
Some((local_name.into(), Some(prefix.into()))),
(Some(local_name), None, None) if !local_name.is_empty() =>
Some((local_name.into(), None)),
(_, _, _) => None
};
r.map(|(local_name, prefix)| Self {
local_name,
namespace: None,
prefix
}).ok_or(())
}
}
#[cfg(test)]
mod tests {
use super::OwnedName;
#[test]
fn test_owned_name_from_str() {
assert_eq!("prefix:name".parse(), Ok(OwnedName {
local_name: "name".into(),
namespace: None,
prefix: Some("prefix".into())
}));
assert_eq!("name".parse(), Ok(OwnedName {
local_name: "name".into(),
namespace: None,
prefix: None
}));
assert_eq!("".parse(), Err::<OwnedName, ()>(()));
assert_eq!(":".parse(), Err::<OwnedName, ()>(()));
assert_eq!(":a".parse(), Err::<OwnedName, ()>(()));
assert_eq!("a:".parse(), Err::<OwnedName, ()>(()));
assert_eq!("a:b:c".parse(), Err::<OwnedName, ()>(()));
}
}

516
vendor/xml-rs/src/namespace.rs vendored Normal file
View File

@@ -0,0 +1,516 @@
//! Contains namespace manipulation types and functions.
use std::borrow::Cow;
use std::collections::btree_map::Iter as Entries;
use std::collections::btree_map::{BTreeMap, Entry};
use std::collections::HashSet;
use std::iter::{Map, Rev};
use std::slice::Iter;
/// Designates prefix for namespace definitions.
///
/// See [Namespaces in XML][namespace] spec for more information.
///
/// [namespace]: http://www.w3.org/TR/xml-names/#ns-decl
pub const NS_XMLNS_PREFIX: &str = "xmlns";
/// Designates the standard URI for `xmlns` prefix.
///
/// See [A Namespace Name for xmlns Attributes][namespace] for more information.
///
/// [namespace]: http://www.w3.org/2000/xmlns/
pub const NS_XMLNS_URI: &str = "http://www.w3.org/2000/xmlns/";
/// Designates prefix for a namespace containing several special predefined attributes.
///
/// See [2.10 White Space handling][1], [2.1 Language Identification][2],
/// [XML Base specification][3] and [xml:id specification][4] for more information.
///
/// [1]: http://www.w3.org/TR/REC-xml/#sec-white-space
/// [2]: http://www.w3.org/TR/REC-xml/#sec-lang-tag
/// [3]: http://www.w3.org/TR/xmlbase/
/// [4]: http://www.w3.org/TR/xml-id/
pub const NS_XML_PREFIX: &str = "xml";
/// Designates the standard URI for `xml` prefix.
///
/// See `NS_XML_PREFIX` documentation for more information.
pub const NS_XML_URI: &str = "http://www.w3.org/XML/1998/namespace";
/// Designates the absence of prefix in a qualified name.
///
/// This constant should be used to define or query default namespace which should be used
/// for element or attribute names without prefix. For example, if a namespace mapping
/// at a particular point in the document contains correspondence like
///
/// ```none
/// NS_NO_PREFIX --> urn:some:namespace
/// ```
///
/// then all names declared without an explicit prefix `urn:some:namespace` is assumed as
/// a namespace URI.
///
/// By default empty prefix corresponds to absence of namespace, but this can change either
/// when writing an XML document (manually) or when reading an XML document (based on namespace
/// declarations).
pub const NS_NO_PREFIX: &str = "";
/// Designates an empty namespace URI, which is equivalent to absence of namespace.
///
/// This constant should not usually be used directly; it is used to designate that
/// empty prefix corresponds to absent namespace in `NamespaceStack` instances created with
/// `NamespaceStack::default()`. Therefore, it can be used to restore `NS_NO_PREFIX` mapping
/// in a namespace back to its default value.
pub const NS_EMPTY_URI: &str = "";
/// Namespace is a map from prefixes to namespace URIs.
///
/// No prefix (i.e. default namespace) is designated by `NS_NO_PREFIX` constant.
#[derive(PartialEq, Eq, Clone, Debug)]
pub struct Namespace(pub BTreeMap<String, String>);
impl Namespace {
/// Returns an empty namespace.
#[inline]
#[must_use]
pub fn empty() -> Self {
Self(BTreeMap::new())
}
/// Checks whether this namespace is empty.
#[inline]
#[must_use]
pub fn is_empty(&self) -> bool {
self.0.is_empty()
}
/// Checks whether this namespace is essentially empty, that is, it does not contain
/// anything but default mappings.
#[must_use]
pub fn is_essentially_empty(&self) -> bool {
// a shortcut for a namespace which is definitely not empty
if self.0.len() > 3 { return false; }
self.0.iter().all(|(k, v)| matches!((&**k, &**v),
(NS_NO_PREFIX, NS_EMPTY_URI) |
(NS_XMLNS_PREFIX, NS_XMLNS_URI) |
(NS_XML_PREFIX, NS_XML_URI))
)
}
/// Checks whether this namespace mapping contains the given prefix.
///
/// # Parameters
/// * `prefix` --- namespace prefix.
///
/// # Return value
/// `true` if this namespace contains the given prefix, `false` otherwise.
#[inline]
pub fn contains<P: ?Sized + AsRef<str>>(&self, prefix: &P) -> bool {
self.0.contains_key(prefix.as_ref())
}
/// Puts a mapping into this namespace.
///
/// This method does not override any already existing mappings.
///
/// Returns a boolean flag indicating whether the map already contained
/// the given prefix.
///
/// # Parameters
/// * `prefix` --- namespace prefix;
/// * `uri` --- namespace URI.
///
/// # Return value
/// `true` if `prefix` has been inserted successfully; `false` if the `prefix`
/// was already present in the namespace.
pub fn put<P, U>(&mut self, prefix: P, uri: U) -> bool
where P: Into<String>, U: Into<String>
{
match self.0.entry(prefix.into()) {
Entry::Occupied(_) => false,
Entry::Vacant(ve) => {
ve.insert(uri.into());
true
},
}
}
/// Puts a mapping into this namespace forcefully.
///
/// This method, unlike `put()`, does replace an already existing mapping.
///
/// Returns previous URI which was assigned to the given prefix, if it is present.
///
/// # Parameters
/// * `prefix` --- namespace prefix;
/// * `uri` --- namespace URI.
///
/// # Return value
/// `Some(uri)` with `uri` being a previous URI assigned to the `prefix`, or
/// `None` if such prefix was not present in the namespace before.
pub fn force_put<P, U>(&mut self, prefix: P, uri: U) -> Option<String>
where P: Into<String>, U: Into<String>
{
self.0.insert(prefix.into(), uri.into())
}
/// Queries the namespace for the given prefix.
///
/// # Parameters
/// * `prefix` --- namespace prefix.
///
/// # Return value
/// Namespace URI corresponding to the given prefix, if it is present.
pub fn get<'a, P: ?Sized + AsRef<str>>(&'a self, prefix: &P) -> Option<&'a str> {
self.0.get(prefix.as_ref()).map(|s| &**s)
}
/// Borrowed namespace for the writer
#[must_use]
pub const fn borrow(&self) -> Cow<'_, Self> {
Cow::Borrowed(self)
}
/// Namespace mappings contained in a namespace.
pub fn iter(&self) -> NamespaceMappings<'_> {
self.into_iter()
}
}
/// An alias for iterator type for namespace mappings contained in a namespace.
pub type NamespaceMappings<'a> = Map<
Entries<'a, String, String>,
for<'b> fn((&'b String, &'b String)) -> UriMapping<'b>
>;
impl<'a> IntoIterator for &'a Namespace {
type IntoIter = NamespaceMappings<'a>;
type Item = UriMapping<'a>;
fn into_iter(self) -> Self::IntoIter {
fn mapper<'a>((prefix, uri): (&'a String, &'a String)) -> UriMapping<'a> {
(prefix, uri)
}
self.0.iter().map(mapper)
}
}
/// Namespace stack is a sequence of namespaces.
///
/// Namespace stack is used to represent cumulative namespace consisting of
/// combined namespaces from nested elements.
#[derive(Clone, Eq, PartialEq, Debug)]
pub struct NamespaceStack(pub Vec<Namespace>);
impl NamespaceStack {
/// Returns an empty namespace stack.
#[inline]
#[must_use]
pub fn empty() -> Self {
Self(Vec::with_capacity(2))
}
/// Returns a namespace stack with default items in it.
///
/// Default items are the following:
///
/// * `xml` → `http://www.w3.org/XML/1998/namespace`;
/// * `xmlns` → `http://www.w3.org/2000/xmlns/`.
#[inline]
#[must_use]
#[allow(clippy::should_implement_trait)]
pub fn default() -> Self {
let mut nst = Self::empty();
nst.push_empty();
// xml namespace
nst.put(NS_XML_PREFIX, NS_XML_URI);
// xmlns namespace
nst.put(NS_XMLNS_PREFIX, NS_XMLNS_URI);
// empty namespace
nst.put(NS_NO_PREFIX, NS_EMPTY_URI);
nst
}
/// Adds an empty namespace to the top of this stack.
#[inline]
pub fn push_empty(&mut self) -> &mut Self {
self.0.push(Namespace::empty());
self
}
/// Removes the topmost namespace in this stack.
///
/// Panics if the stack is empty.
#[inline]
#[track_caller]
pub fn pop(&mut self) -> Namespace {
self.0.pop().unwrap()
}
/// Removes the topmost namespace in this stack.
///
/// Returns `Some(namespace)` if this stack is not empty and `None` otherwise.
#[inline]
pub fn try_pop(&mut self) -> Option<Namespace> {
self.0.pop()
}
/// Borrows the topmost namespace mutably, leaving the stack intact.
///
/// Panics if the stack is empty.
#[inline]
#[track_caller]
pub fn peek_mut(&mut self) -> &mut Namespace {
self.0.last_mut().unwrap()
}
/// Borrows the topmost namespace immutably, leaving the stack intact.
///
/// Panics if the stack is empty.
#[inline]
#[must_use]
#[track_caller]
pub fn peek(&self) -> &Namespace {
self.0.last().unwrap()
}
/// Puts a mapping into the topmost namespace if this stack does not already contain one.
///
/// Returns a boolean flag indicating whether the insertion has completed successfully.
/// Note that both key and value are matched and the mapping is inserted if either
/// namespace prefix is not already mapped, or if it is mapped, but to a different URI.
///
/// # Parameters
/// * `prefix` --- namespace prefix;
/// * `uri` --- namespace URI.
///
/// # Return value
/// `true` if `prefix` has been inserted successfully; `false` if the `prefix`
/// was already present in the namespace stack.
pub fn put_checked<P, U>(&mut self, prefix: P, uri: U) -> bool
where P: Into<String> + AsRef<str>,
U: Into<String> + AsRef<str>
{
if self.0.iter().any(|ns| ns.get(&prefix) == Some(uri.as_ref())) {
false
} else {
self.put(prefix, uri);
true
}
}
/// Puts a mapping into the topmost namespace in this stack.
///
/// This method does not override a mapping in the topmost namespace if it is
/// already present, however, it does not depend on other namespaces in the stack,
/// so it is possible to put a mapping which is present in lower namespaces.
///
/// Returns a boolean flag indicating whether the insertion has completed successfully.
///
/// # Parameters
/// * `prefix` --- namespace prefix;
/// * `uri` --- namespace URI.
///
/// # Return value
/// `true` if `prefix` has been inserted successfully; `false` if the `prefix`
/// was already present in the namespace.
#[inline]
pub fn put<P, U>(&mut self, prefix: P, uri: U) -> bool
where P: Into<String>, U: Into<String>
{
if let Some(ns) = self.0.last_mut() {
ns.put(prefix, uri)
} else {
false
}
}
/// Performs a search for the given prefix in the whole stack.
///
/// This method walks the stack from top to bottom, querying each namespace
/// in order for the given prefix. If none of the namespaces contains the prefix,
/// `None` is returned.
///
/// # Parameters
/// * `prefix` --- namespace prefix.
#[inline]
pub fn get<'a, P: ?Sized + AsRef<str>>(&'a self, prefix: &P) -> Option<&'a str> {
let prefix = prefix.as_ref();
for ns in self.0.iter().rev() {
match ns.get(prefix) {
None => {},
r => return r,
}
}
None
}
/// Combines this stack of namespaces into a single namespace.
///
/// Namespaces are combined in left-to-right order, that is, rightmost namespace
/// elements take priority over leftmost ones.
#[must_use]
pub fn squash(&self) -> Namespace {
let mut result = BTreeMap::new();
for ns in &self.0 {
result.extend(ns.0.iter().map(|(k, v)| (k.clone(), v.clone())));
}
Namespace(result)
}
/// Returns an object which implements `Extend` using `put_checked()` instead of `put()`.
///
/// See `CheckedTarget` for more information.
#[inline]
pub fn checked_target(&mut self) -> CheckedTarget<'_> {
CheckedTarget(self)
}
/// Returns an iterator over all mappings in this namespace stack.
#[inline]
#[must_use]
pub fn iter(&self) -> NamespaceStackMappings<'_> {
self.into_iter()
}
}
/// An iterator over mappings from prefixes to URIs in a namespace stack.
///
/// # Example
/// ```
/// # use xml::namespace::NamespaceStack;
/// let mut nst = NamespaceStack::empty();
/// nst.push_empty();
/// nst.put("a", "urn:A");
/// nst.put("b", "urn:B");
/// nst.push_empty();
/// nst.put("c", "urn:C");
///
/// assert_eq!(vec![("c", "urn:C"), ("a", "urn:A"), ("b", "urn:B")], nst.iter().collect::<Vec<_>>());
/// ```
pub struct NamespaceStackMappings<'a> {
namespaces: Rev<Iter<'a, Namespace>>,
current_namespace: Option<NamespaceMappings<'a>>,
used_keys: HashSet<&'a str>,
}
impl NamespaceStackMappings<'_> {
fn go_to_next_namespace(&mut self) -> bool {
self.current_namespace = self.namespaces.next().map(|ns| ns.into_iter());
self.current_namespace.is_some()
}
}
impl<'a> Iterator for NamespaceStackMappings<'a> {
type Item = UriMapping<'a>;
fn next(&mut self) -> Option<UriMapping<'a>> {
// If there is no current namespace and no next namespace, we're finished
if self.current_namespace.is_none() && !self.go_to_next_namespace() {
return None;
}
let next_item = self.current_namespace.as_mut()?.next();
match next_item {
// There is an element in the current namespace
Some((k, v)) => if self.used_keys.contains(&k) {
// If the current key is used, go to the next one
self.next()
} else {
// Otherwise insert the current key to the set of used keys and
// return the mapping
self.used_keys.insert(k);
Some((k, v))
},
// Current namespace is exhausted
None => if self.go_to_next_namespace() {
// If there is next namespace, continue from it
self.next()
} else {
// No next namespace, exiting
None
}
}
}
}
impl<'a> IntoIterator for &'a NamespaceStack {
type IntoIter = NamespaceStackMappings<'a>;
type Item = UriMapping<'a>;
fn into_iter(self) -> Self::IntoIter {
NamespaceStackMappings {
namespaces: self.0.iter().rev(),
current_namespace: None,
used_keys: HashSet::new(),
}
}
}
/// A type alias for a pair of `(prefix, uri)` values returned by namespace iterators.
pub type UriMapping<'a> = (&'a str, &'a str);
impl<'a> Extend<UriMapping<'a>> for Namespace {
fn extend<T>(&mut self, iterable: T) where T: IntoIterator<Item=UriMapping<'a>> {
for (prefix, uri) in iterable {
self.put(prefix, uri);
}
}
}
impl<'a> Extend<UriMapping<'a>> for NamespaceStack {
fn extend<T>(&mut self, iterable: T) where T: IntoIterator<Item=UriMapping<'a>> {
for (prefix, uri) in iterable {
self.put(prefix, uri);
}
}
}
/// A wrapper around `NamespaceStack` which implements `Extend` using `put_checked()`.
///
/// # Example
///
/// ```
/// # use xml::namespace::NamespaceStack;
///
/// let mut nst = NamespaceStack::empty();
/// nst.push_empty();
/// nst.put("a", "urn:A");
/// nst.put("b", "urn:B");
/// nst.push_empty();
/// nst.put("c", "urn:C");
///
/// nst.checked_target().extend(vec![("a", "urn:Z"), ("b", "urn:B"), ("c", "urn:Y"), ("d", "urn:D")]);
/// assert_eq!(
/// vec![("a", "urn:Z"), ("c", "urn:C"), ("d", "urn:D"), ("b", "urn:B")],
/// nst.iter().collect::<Vec<_>>()
/// );
/// ```
///
/// Compare:
///
/// ```
/// # use xml::namespace::NamespaceStack;
/// # let mut nst = NamespaceStack::empty();
/// # nst.push_empty();
/// # nst.put("a", "urn:A");
/// # nst.put("b", "urn:B");
/// # nst.push_empty();
/// # nst.put("c", "urn:C");
///
/// nst.extend(vec![("a", "urn:Z"), ("b", "urn:B"), ("c", "urn:Y"), ("d", "urn:D")]);
/// assert_eq!(
/// vec![("a", "urn:Z"), ("b", "urn:B"), ("c", "urn:C"), ("d", "urn:D")],
/// nst.iter().collect::<Vec<_>>()
/// );
/// ```
pub struct CheckedTarget<'a>(&'a mut NamespaceStack);
impl<'b> Extend<UriMapping<'b>> for CheckedTarget<'_> {
fn extend<T>(&mut self, iterable: T) where T: IntoIterator<Item=UriMapping<'b>> {
for (prefix, uri) in iterable {
self.0.put_checked(prefix, uri);
}
}
}

181
vendor/xml-rs/src/reader.rs vendored Normal file
View File

@@ -0,0 +1,181 @@
//! Contains high-level interface for a pull-based XML parser.
//!
//! The most important type in this module is `EventReader`, which provides an iterator
//! view for events in XML document.
use std::io::Read;
use std::iter::FusedIterator;
use std::result;
use crate::common::{Position, TextPosition};
pub use self::config::{ParserConfig, ParserConfig2};
pub use self::error::{Error, ErrorKind};
pub use self::events::XmlEvent;
use self::parser::PullParser;
mod config;
mod error;
mod events;
mod indexset;
mod lexer;
mod parser;
/// A result type yielded by `XmlReader`.
pub type Result<T, E = Error> = result::Result<T, E>;
/// A wrapper around an `std::io::Read` instance which provides pull-based XML parsing.
///
/// The reader should be wrapped in a `BufReader`, otherwise parsing may be very slow.
pub struct EventReader<R: Read> {
source: R,
parser: PullParser,
}
impl<R: Read> EventReader<R> {
/// Creates a new reader, consuming the given stream. The reader should be wrapped in a `BufReader`, otherwise parsing may be very slow.
#[inline]
pub fn new(source: R) -> Self {
Self::new_with_config(source, ParserConfig2::new())
}
/// Creates a new reader with the provded configuration, consuming the given stream. The reader should be wrapped in a `BufReader`, otherwise parsing may be very slow.
#[inline]
pub fn new_with_config(source: R, config: impl Into<ParserConfig2>) -> Self {
Self { source, parser: PullParser::new(config) }
}
/// Pulls and returns next XML event from the stream.
///
/// If this returns [Err] or [`XmlEvent::EndDocument`] then further calls to
/// this method will return this event again.
#[inline]
pub fn next(&mut self) -> Result<XmlEvent> {
self.parser.next(&mut self.source)
}
/// Skips all XML events until the next end tag at the current level.
///
/// Convenience function that is useful for the case where you have
/// encountered a start tag that is of no interest and want to
/// skip the entire XML subtree until the corresponding end tag.
#[inline]
pub fn skip(&mut self) -> Result<()> {
let mut depth = 1;
while depth > 0 {
match self.next()? {
XmlEvent::StartElement { .. } => depth += 1,
XmlEvent::EndElement { .. } => depth -= 1,
XmlEvent::EndDocument => return Err(Error {
kind: ErrorKind::UnexpectedEof,
pos: self.parser.position(),
}),
_ => {},
}
}
Ok(())
}
/// Access underlying reader
///
/// Using it directly while the event reader is parsing is not recommended
pub fn source(&self) -> &R { &self.source }
/// Access underlying reader
///
/// Using it directly while the event reader is parsing is not recommended
pub fn source_mut(&mut self) -> &mut R { &mut self.source }
/// Unwraps this `EventReader`, returning the underlying reader.
///
/// Note that this operation is destructive; unwrapping the reader and wrapping it
/// again with `EventReader::new()` will create a fresh reader which will attempt
/// to parse an XML document from the beginning.
pub fn into_inner(self) -> R {
self.source
}
/// Returns the DOCTYPE of the document if it has already been seen
///
/// Available only after the root `StartElement` event
#[inline]
pub fn doctype(&self) -> Option<&str> {
self.parser.doctype()
}
}
impl<B: Read> Position for EventReader<B> {
/// Returns the position of the last event produced by the reader.
#[inline]
fn position(&self) -> TextPosition {
self.parser.position()
}
}
impl<R: Read> IntoIterator for EventReader<R> {
type IntoIter = Events<R>;
type Item = Result<XmlEvent>;
fn into_iter(self) -> Events<R> {
Events { reader: self, finished: false }
}
}
/// An iterator over XML events created from some type implementing `Read`.
///
/// When the next event is `xml::event::Error` or `xml::event::EndDocument`, then
/// it will be returned by the iterator once, and then it will stop producing events.
pub struct Events<R: Read> {
reader: EventReader<R>,
finished: bool,
}
impl<R: Read> Events<R> {
/// Unwraps the iterator, returning the internal `EventReader`.
#[inline]
pub fn into_inner(self) -> EventReader<R> {
self.reader
}
/// Access the underlying reader
///
/// It's not recommended to use it while the events are still being parsed
pub fn source(&self) -> &R { &self.reader.source }
/// Access the underlying reader
///
/// It's not recommended to use it while the events are still being parsed
pub fn source_mut(&mut self) -> &mut R { &mut self.reader.source }
}
impl<R: Read> FusedIterator for Events<R> {
}
impl<R: Read> Iterator for Events<R> {
type Item = Result<XmlEvent>;
#[inline]
fn next(&mut self) -> Option<Result<XmlEvent>> {
if self.finished && !self.reader.parser.is_ignoring_end_of_stream() {
None
} else {
let ev = self.reader.next();
if let Ok(XmlEvent::EndDocument) | Err(_) = ev {
self.finished = true;
}
Some(ev)
}
}
}
impl<'r> EventReader<&'r [u8]> {
/// A convenience method to create an `XmlReader` from a string slice.
#[inline]
#[must_use]
pub fn from_str(source: &'r str) -> Self {
EventReader::new(source.as_bytes())
}
}

365
vendor/xml-rs/src/reader/config.rs vendored Normal file
View File

@@ -0,0 +1,365 @@
//! Contains parser configuration structure.
use std::collections::HashMap;
use std::io::Read;
use crate::reader::EventReader;
use crate::util::Encoding;
/// Limits to defend from billion laughs attack
const DEFAULT_MAX_ENTITY_EXPANSION_LENGTH: usize = 1_000_000;
const DEFAULT_MAX_ENTITY_EXPANSION_DEPTH: u8 = 10;
/// Parser configuration structure. **There are more config methods than public fileds — see methods below**.
///
/// This structure contains various configuration options which affect
/// behavior of the parser.
#[derive(Clone, PartialEq, Eq, Debug)]
pub struct ParserConfig {
/// Whether or not should whitespace in textual events be removed. Default is false.
///
/// When true, all standalone whitespace will be removed (this means no
/// `Whitespace` events will be emitted), and leading and trailing whitespace
/// from `Character` events will be deleted. If after trimming `Characters`
/// event will be empty, it will also be omitted from output stream. This is
/// possible, however, only if `whitespace_to_characters` or
/// `cdata_to_characters` options are set.
///
/// This option does not affect CDATA events, unless `cdata_to_characters`
/// option is also set. In that case CDATA content will also be trimmed.
pub trim_whitespace: bool,
/// Whether or not should whitespace be converted to characters.
/// Default is false.
///
/// If true, instead of `Whitespace` events `Characters` events with the
/// same content will be emitted. If `trim_whitespace` is also true, these
/// events will be trimmed to nothing and, consequently, not emitted.
pub whitespace_to_characters: bool,
/// Whether or not should CDATA be converted to characters.
/// Default is false.
///
/// If true, instead of `CData` events `Characters` events with the same
/// content will be emitted. If `trim_whitespace` is also true, these events
/// will be trimmed. If corresponding CDATA contained nothing but whitespace,
/// this event will be omitted from the stream.
pub cdata_to_characters: bool,
/// Whether or not should comments be omitted. Default is true.
///
/// If true, `Comment` events will not be emitted at all.
pub ignore_comments: bool,
/// Whether or not should sequential `Characters` events be merged.
/// Default is true.
///
/// If true, multiple sequential `Characters` events will be merged into
/// a single event, that is, their data will be concatenated.
///
/// Multiple sequential `Characters` events are only possible if either
/// `cdata_to_characters` or `ignore_comments` are set. Otherwise character
/// events will always be separated by other events.
pub coalesce_characters: bool,
/// A map of extra entities recognized by the parser. Default is an empty map.
///
/// By default the XML parser recognizes the entities defined in the XML spec. Sometimes,
/// however, it is convenient to make the parser recognize additional entities which
/// are also not available through the DTD definitions (especially given that at the moment
/// DTD parsing is not supported).
pub extra_entities: HashMap<String, String>,
/// Whether or not the parser should ignore the end of stream. Default is false.
///
/// By default the parser will either error out when it encounters a premature end of
/// stream or complete normally if the end of stream was expected. If you want to continue
/// reading from a stream whose input is supplied progressively, you can set this option to true.
/// In this case the parser will allow you to invoke the `next()` method even if a supposed end
/// of stream has happened.
///
/// Note that support for this functionality is incomplete; for example, the parser will fail if
/// the premature end of stream happens inside PCDATA. Therefore, use this option at your own risk.
pub ignore_end_of_stream: bool,
/// Whether or not non-unicode entity references get replaced with the replacement character
///
/// When true, any decimal or hexadecimal character reference that cannot be converted from a
/// u32 to a char using [std::char::from_u32](https://doc.rust-lang.org/std/char/fn.from_u32.html)
/// will be converted into the unicode REPLACEMENT CHARACTER (U+FFFD).
pub replace_unknown_entity_references: bool,
/// Whether or not whitespace at the root level of the document is ignored. Default is true.
///
/// By default any whitespace that is not enclosed within at least one level of elements will be
/// ignored. Setting this value to false will cause root level whitespace events to be emitted.
///
/// **There are configuration options see methods below**
pub ignore_root_level_whitespace: bool,
}
impl ParserConfig {
/// Returns a new config with default values.
///
/// You can tweak default values using builder-like pattern:
///
/// ```rust
/// use xml::reader::ParserConfig;
///
/// let config = ParserConfig::new()
/// .trim_whitespace(true)
/// .ignore_comments(true)
/// .coalesce_characters(false);
/// ```
#[must_use]
#[inline]
pub fn new() -> Self {
Self {
trim_whitespace: false,
whitespace_to_characters: false,
cdata_to_characters: false,
ignore_comments: true,
coalesce_characters: true,
extra_entities: HashMap::new(),
ignore_end_of_stream: false,
replace_unknown_entity_references: false,
ignore_root_level_whitespace: true,
}
}
/// Creates an XML reader with this configuration. The reader should be wrapped in a `BufReader`, otherwise parsing may be very slow.
///
/// This is a convenience method for configuring and creating a reader at the same time:
///
/// ```rust
/// use xml::reader::ParserConfig;
///
/// let mut source: &[u8] = b"...";
///
/// let reader = ParserConfig::new()
/// .trim_whitespace(true)
/// .ignore_comments(true)
/// .coalesce_characters(false)
/// .create_reader(&mut source);
/// ```
///
/// This method is exactly equivalent to calling `EventReader::new_with_config()` with
/// this configuration object.
#[inline]
pub fn create_reader<R: Read>(self, source: R) -> EventReader<R> {
EventReader::new_with_config(source, self)
}
/// Adds a new entity mapping and returns an updated config object.
///
/// This is a convenience method for adding external entities mappings to the XML parser.
/// An example:
///
/// ```rust
/// use xml::reader::ParserConfig;
///
/// let mut source: &[u8] = b"...";
///
/// let reader = ParserConfig::new()
/// .add_entity("nbsp", " ")
/// .add_entity("copy", "©")
/// .add_entity("reg", "®")
/// .create_reader(&mut source);
/// ```
#[must_use]
pub fn add_entity<S: Into<String>, T: Into<String>>(mut self, entity: S, value: T) -> Self {
self.extra_entities.insert(entity.into(), value.into());
self
}
}
impl Default for ParserConfig {
#[inline]
fn default() -> Self {
Self::new()
}
}
gen_setters! { ParserConfig,
trim_whitespace: val bool,
whitespace_to_characters: val bool,
cdata_to_characters: val bool,
ignore_comments: val bool,
coalesce_characters: val bool,
ignore_end_of_stream: val bool,
replace_unknown_entity_references: val bool,
ignore_root_level_whitespace: val bool
}
/// Backwards-compatible extension of `ParserConfig`, which will eventually be merged into the original `ParserConfig` struct
#[derive(Clone, PartialEq, Eq, Debug)]
#[non_exhaustive]
pub struct ParserConfig2 {
pub(crate) c: ParserConfig,
/// Use this encoding as the default. Necessary for UTF-16 files without BOM.
pub override_encoding: Option<Encoding>,
/// Allow `<?xml encoding="…">` to contain unsupported encoding names,
/// and interpret them as Latin1 instead. This will mangle non-ASCII characters, but usually it won't fail parsing.
pub ignore_invalid_encoding_declarations: bool,
/// Documents with multiple root elements are ill-formed
pub allow_multiple_root_elements: bool,
/// Abort if custom entities create a string longer than this
pub max_entity_expansion_length: usize,
/// Entities can expand into other entities this many times (be careful about exponential cost!)
pub max_entity_expansion_depth: u8,
/// Maximum length of tag name or attribute name
pub max_name_length: usize,
/// Max number of attributes per element
pub max_attributes: usize,
/// Max number of bytes in each attribute
pub max_attribute_length: usize,
/// Maximum length of strings reprsenting characters, comments, and processing instructions
pub max_data_length: usize,
}
impl Default for ParserConfig2 {
fn default() -> Self {
Self {
c: ParserConfig::default(),
override_encoding: None,
ignore_invalid_encoding_declarations: false,
allow_multiple_root_elements: true,
max_entity_expansion_length: DEFAULT_MAX_ENTITY_EXPANSION_LENGTH,
max_entity_expansion_depth: DEFAULT_MAX_ENTITY_EXPANSION_DEPTH,
max_attributes: 1 << 16,
max_attribute_length: 1 << 30,
max_data_length: 1 << 30,
max_name_length: 1 << 18,
}
}
}
impl ParserConfig2 {
/// Create extended configuration struct
#[inline]
#[must_use]
pub fn new() -> Self {
Self::default()
}
/// Read character encoding from `Content-Type` header.
/// Set this when parsing XML documents fetched over HTTP.
///
/// `text/*` MIME types do *not* imply latin1. UTF-8 is always the default fallback.
#[must_use] pub fn content_type(mut self, mime_type: &str) -> Self {
let charset = mime_type.split_once(';')
.and_then(|(_, args)| args.split_once("charset"))
.and_then(|(_, args)| args.split_once('='));
if let Some((_, charset)) = charset {
let name = charset.trim().trim_matches('"');
if let Ok(enc) = name.parse() {
self.override_encoding = Some(enc);
}
}
self
}
/// Creates an XML reader with this configuration. The reader should be wrapped in a `BufReader`, otherwise parsing may be very slow.
///
/// This is a convenience method for configuring and creating a reader at the same time:
///
/// ```rust
/// use xml::reader::ParserConfig;
///
/// let mut source: &[u8] = b"...";
///
/// let reader = ParserConfig::new()
/// .trim_whitespace(true)
/// .ignore_comments(true)
/// .coalesce_characters(false)
/// .create_reader(&mut source);
/// ```
///
/// This method is exactly equivalent to calling `EventReader::new_with_config()` with
/// this configuration object.
#[inline]
pub fn create_reader<R: Read>(self, source: R) -> EventReader<R> {
EventReader::new_with_config(source, self)
}
}
impl From<ParserConfig> for ParserConfig2 {
#[inline]
fn from(c: ParserConfig) -> Self {
Self { c, ..Default::default() }
}
}
gen_setters! { ParserConfig2,
/// Set if you got one in the HTTP header
override_encoding: val Option<Encoding>,
/// Allows invalid documents. There should be only a single root element in XML.
allow_multiple_root_elements: val bool,
/// Abort if custom entities create a string longer than this
max_entity_expansion_length: val usize,
/// Entities can expand into other entities this many times (be careful about exponential cost!)
max_entity_expansion_depth: val u8,
/// Max number of attributes per element
max_attributes: val usize,
/// Maximum length of tag name or attribute name
max_name_length: val usize,
/// Max number of bytes in each attribute
max_attribute_length: val usize,
/// Maximum length of strings reprsenting characters, comments, and processing instructions
max_data_length: val usize,
/// Allow `<?xml encoding="bogus"?>`
ignore_invalid_encoding_declarations: val bool
}
gen_setters! { ParserConfig,
/// Set if you got one in the HTTP header (see `content_type`)
override_encoding: c2 Option<Encoding>,
/// Allow `<?xml encoding="bogus"?>`
ignore_invalid_encoding_declarations: c2 bool,
/// Allows invalid documents. There should be only a single root element in XML.
allow_multiple_root_elements: c2 bool,
/// Abort if custom entities create a string longer than this
max_entity_expansion_length: c2 usize,
/// Entities can expand into other entities this many times (be careful about exponential cost!)
max_entity_expansion_depth: c2 u8,
/// Max number of attributes per element
max_attributes: c2 usize,
/// Maximum length of tag name or attribute name
max_name_length: c2 usize,
/// Max number of bytes in each attribute
max_attribute_length: c2 usize,
/// Maximum length of strings reprsenting characters, comments, and processing instructions
max_data_length: c2 usize,
/// Set encoding from the MIME type. Important for HTTP compatibility.
content_type: c2 &str
}
gen_setters! { ParserConfig2,
trim_whitespace: delegate bool,
whitespace_to_characters: delegate bool,
cdata_to_characters: delegate bool,
ignore_comments: delegate bool,
coalesce_characters: delegate bool,
ignore_end_of_stream: delegate bool,
replace_unknown_entity_references: delegate bool,
/// Whether or not whitespace at the root level of the document is ignored. Default is true.
ignore_root_level_whitespace: delegate bool
}
#[test]
fn mime_parse() {
let c = ParserConfig2::new().content_type("text/xml;charset=Us-AScii").max_entity_expansion_length(1000);
assert_eq!(c.override_encoding, Some(Encoding::Ascii));
let c = ParserConfig2::new().max_entity_expansion_depth(3).content_type("text/xml;charset = \"UTF-16\"");
assert_eq!(c.override_encoding, Some(Encoding::Utf16));
}

255
vendor/xml-rs/src/reader/error.rs vendored Normal file
View File

@@ -0,0 +1,255 @@
use crate::reader::lexer::Token;
use crate::Encoding;
use std::borrow::Cow;
use std::error::Error as _;
use std::{error, fmt, io, str};
use crate::common::{Position, TextPosition};
use crate::util;
/// Failure reason
#[derive(Debug)]
pub enum ErrorKind {
/// This is an ill-formed XML document
Syntax(Cow<'static, str>),
/// Reader/writer reported an error
Io(io::Error),
/// The document contains bytes that are not allowed in UTF-8 strings
Utf8(str::Utf8Error),
/// The document ended while they were elements/comments/etc. still open
UnexpectedEof,
}
#[derive(Debug, Clone, PartialEq)]
#[non_exhaustive]
pub(crate) enum SyntaxError {
CannotRedefineXmlnsPrefix,
CannotRedefineXmlPrefix,
/// Recursive custom entity expanded to too many chars, it could be DoS
EntityTooBig,
EmptyEntity,
NoRootElement,
ProcessingInstructionWithoutName,
UnbalancedRootElement,
UnexpectedEof,
UnexpectedOpeningTag,
/// Missing `]]>`
UnclosedCdata,
UnexpectedQualifiedName(Token),
UnexpectedTokenOutsideRoot(Token),
UnexpectedToken(Token),
UnexpectedTokenInEntity(Token),
UnexpectedTokenInClosingTag(Token),
UnexpectedTokenInOpeningTag(Token),
InvalidQualifiedName(Box<str>),
UnboundAttribute(Box<str>),
UnboundElementPrefix(Box<str>),
UnexpectedClosingTag(Box<str>),
UnexpectedName(Box<str>),
/// Found <?xml-like PI not at the beginning of a document,
/// which is an error, see section 2.6 of XML 1.1 spec
UnexpectedProcessingInstruction(Box<str>, Token),
CannotUndefinePrefix(Box<str>),
InvalidCharacterEntity(u32),
InvalidDefaultNamespace(Box<str>),
InvalidNamePrefix(Box<str>),
InvalidNumericEntity(Box<str>),
InvalidStandaloneDeclaration(Box<str>),
InvalidXmlProcessingInstruction(Box<str>),
RedefinedAttribute(Box<str>),
UndefinedEntity(Box<str>),
UnexpectedEntity(Box<str>),
UnexpectedNameInsideXml(Box<str>),
UnsupportedEncoding(Box<str>),
/// In DTD
UnknownMarkupDeclaration(Box<str>),
UnexpectedXmlVersion(Box<str>),
ConflictingEncoding(Encoding, Encoding),
UnexpectedTokenBefore(&'static str, char),
/// Document has more stuff than `ParserConfig` allows
ExceededConfiguredLimit,
}
impl fmt::Display for SyntaxError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.to_cow().fmt(f)
}
}
impl SyntaxError {
#[inline(never)]
#[cold]
pub(crate) fn to_cow(&self) -> Cow<'static, str> {
match *self {
Self::CannotRedefineXmlnsPrefix => "Cannot redefine XMLNS prefix".into(),
Self::CannotRedefineXmlPrefix => "Default XMLNS prefix cannot be rebound to another value".into(),
Self::EmptyEntity => "Encountered empty entity".into(),
Self::EntityTooBig => "Entity too big".into(),
Self::NoRootElement => "Unexpected end of stream: no root element found".into(),
Self::ProcessingInstructionWithoutName => "Encountered processing instruction without a name".into(),
Self::UnbalancedRootElement => "Unexpected end of stream: still inside the root element".into(),
Self::UnclosedCdata => "Unclosed <![CDATA[".into(),
Self::UnexpectedEof => "Unexpected end of stream".into(),
Self::UnexpectedOpeningTag => "'<' is not allowed in attributes".into(),
Self::CannotUndefinePrefix(ref ln) => format!("Cannot undefine prefix '{ln}'").into(),
Self::ConflictingEncoding(a, b) => format!("Declared encoding {a}, but uses {b}").into(),
Self::InvalidCharacterEntity(num) => format!("Invalid character U+{num:04X}").into(),
Self::InvalidDefaultNamespace(ref name) => format!("Namespace '{name}' cannot be default").into(),
Self::InvalidNamePrefix(ref prefix) => format!("'{prefix}' cannot be an element name prefix").into(),
Self::InvalidNumericEntity(ref v) => format!("Invalid numeric entity: {v}").into(),
Self::InvalidQualifiedName(ref e) => format!("Qualified name is invalid: {e}").into(),
Self::InvalidStandaloneDeclaration(ref value) => format!("Invalid standalone declaration value: {value}").into(),
Self::InvalidXmlProcessingInstruction(ref name) => format!("Invalid processing instruction: <?{name}\nThe XML spec only allows \"<?xml\" at the very beginning of the file, with no whitespace, comments, or any elements before it").into(),
Self::RedefinedAttribute(ref name) => format!("Attribute '{name}' is redefined").into(),
Self::UnboundAttribute(ref name) => format!("Attribute {name} prefix is unbound").into(),
Self::UnboundElementPrefix(ref name) => format!("Element {name} prefix is unbound").into(),
Self::UndefinedEntity(ref v) => format!("Undefined entity: {v}").into(),
Self::UnexpectedClosingTag(ref expected_got) => format!("Unexpected closing tag: {expected_got}").into(),
Self::UnexpectedEntity(ref name) => format!("Unexpected entity: {name}").into(),
Self::UnexpectedName(ref name) => format!("Unexpected name: {name}").into(),
Self::UnexpectedNameInsideXml(ref name) => format!("Unexpected name inside XML declaration: {name}").into(),
Self::UnexpectedProcessingInstruction(ref buf, token) => format!("Unexpected token inside processing instruction: <?{buf}{token}").into(),
Self::UnexpectedQualifiedName(e) => format!("Unexpected token inside qualified name: {e}").into(),
Self::UnexpectedToken(token) => format!("Unexpected token: {token}").into(),
Self::UnexpectedTokenBefore(before, c) => format!("Unexpected token '{before}' before '{c}'").into(),
Self::UnexpectedTokenInClosingTag(token) => format!("Unexpected token inside closing tag: {token}").into(),
Self::UnexpectedTokenInEntity(token) => format!("Unexpected token inside entity: {token}").into(),
Self::UnexpectedTokenInOpeningTag(token) => format!("Unexpected token inside opening tag: {token}").into(),
Self::UnexpectedTokenOutsideRoot(token) => format!("Unexpected characters outside the root element: {token}").into(),
Self::UnexpectedXmlVersion(ref version) => format!("Invalid XML version: {version}").into(),
Self::UnknownMarkupDeclaration(ref v) => format!("Unknown markup declaration: {v}").into(),
Self::UnsupportedEncoding(ref v) => format!("Unsupported encoding: {v}").into(),
Self::ExceededConfiguredLimit => "This document is larger/more complex than allowed by the parser's configuration".into(),
}
}
}
/// An XML parsing error.
///
/// Consists of a 2D position in a document and a textual message describing the error.
#[derive(Clone, PartialEq, Eq, Debug)]
pub struct Error {
pub(crate) pos: TextPosition,
pub(crate) kind: ErrorKind,
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
use self::ErrorKind::{Io, Syntax, UnexpectedEof, Utf8};
write!(f, "{} ", self.pos)?;
match &self.kind {
Io(io_error) => io_error.fmt(f),
Utf8(reason) => reason.fmt(f),
Syntax(msg) => f.write_str(msg),
UnexpectedEof => f.write_str("Unexpected EOF"),
}
}
}
impl Position for Error {
#[inline]
fn position(&self) -> TextPosition { self.pos }
}
impl Error {
/// Returns a reference to a message which is contained inside this error.
#[cold]
#[doc(hidden)]
#[allow(deprecated)]
#[must_use]
pub fn msg(&self) -> &str {
use self::ErrorKind::{Io, Syntax, UnexpectedEof, Utf8};
match &self.kind {
Io(io_error) => io_error.description(),
Utf8(reason) => reason.description(),
Syntax(msg) => msg.as_ref(),
UnexpectedEof => "Unexpected EOF",
}
}
/// Failure reason
#[must_use]
#[inline]
pub fn kind(&self) -> &ErrorKind {
&self.kind
}
}
impl error::Error for Error {
#[allow(deprecated)]
#[cold]
fn description(&self) -> &str { self.msg() }
}
impl<'a, P, M> From<(&'a P, M)> for Error where P: Position, M: Into<Cow<'static, str>> {
#[cold]
fn from(orig: (&'a P, M)) -> Self {
Self {
pos: orig.0.position(),
kind: ErrorKind::Syntax(orig.1.into()),
}
}
}
impl From<util::CharReadError> for Error {
#[cold]
fn from(e: util::CharReadError) -> Self {
use crate::util::CharReadError::{Io, UnexpectedEof, Utf8};
Self {
pos: TextPosition::new(),
kind: match e {
UnexpectedEof => ErrorKind::UnexpectedEof,
Utf8(reason) => ErrorKind::Utf8(reason),
Io(io_error) => ErrorKind::Io(io_error),
},
}
}
}
impl From<io::Error> for Error {
#[cold]
fn from(e: io::Error) -> Self {
Self {
pos: TextPosition::new(),
kind: ErrorKind::Io(e),
}
}
}
impl Clone for ErrorKind {
#[cold]
fn clone(&self) -> Self {
use self::ErrorKind::{Io, Syntax, UnexpectedEof, Utf8};
match self {
UnexpectedEof => UnexpectedEof,
Utf8(reason) => Utf8(*reason),
Io(io_error) => Io(io::Error::new(io_error.kind(), io_error.to_string())),
Syntax(msg) => Syntax(msg.clone()),
}
}
}
impl PartialEq for ErrorKind {
#[allow(deprecated)]
fn eq(&self, other: &Self) -> bool {
use self::ErrorKind::{Io, Syntax, UnexpectedEof, Utf8};
match (self, other) {
(UnexpectedEof, UnexpectedEof) => true,
(Utf8(left), Utf8(right)) => left == right,
(Io(left), Io(right)) =>
left.kind() == right.kind() &&
left.description() == right.description(),
(Syntax(left), Syntax(right)) =>
left == right,
(_, _) => false,
}
}
}
impl Eq for ErrorKind {}
#[test]
fn err_size() {
assert!(std::mem::size_of::<SyntaxError>() <= 24);
}

219
vendor/xml-rs/src/reader/events.rs vendored Normal file
View File

@@ -0,0 +1,219 @@
//! Contains `XmlEvent` datatype, instances of which are emitted by the parser.
use crate::attribute::OwnedAttribute;
use crate::common::XmlVersion;
use crate::name::OwnedName;
use crate::namespace::Namespace;
use std::fmt;
/// An element of an XML input stream.
///
/// Items of this enum are emitted by `reader::EventReader`. They correspond to different
/// elements of an XML document.
#[derive(PartialEq, Clone)]
pub enum XmlEvent {
/// Corresponds to XML document declaration.
///
/// This event is always emitted before any other event. It is emitted
/// even if the actual declaration is not present in the document.
StartDocument {
/// XML version.
///
/// If XML declaration is not present, defaults to `Version10`.
version: XmlVersion,
/// XML document encoding.
///
/// If XML declaration is not present or does not contain `encoding` attribute,
/// defaults to `"UTF-8"`. This field is currently used for no other purpose than
/// informational.
encoding: String,
/// XML standalone declaration.
///
/// If XML document is not present or does not contain `standalone` attribute,
/// defaults to `None`. This field is currently used for no other purpose than
/// informational.
standalone: Option<bool>,
},
/// Denotes to the end of the document stream.
///
/// This event is always emitted after any other event (except `Error`). After it
/// is emitted for the first time, it will always be emitted on next event pull attempts.
EndDocument,
/// Denotes an XML processing instruction.
///
/// This event contains a processing instruction target (`name`) and opaque `data`. It
/// is up to the application to process them.
ProcessingInstruction {
/// Processing instruction target.
name: String,
/// Processing instruction content.
data: Option<String>,
},
/// Denotes a beginning of an XML element.
///
/// This event is emitted after parsing opening tags or after parsing bodiless tags. In the
/// latter case `EndElement` event immediately follows.
StartElement {
/// Qualified name of the element.
name: OwnedName,
/// A list of attributes associated with the element.
///
/// Currently attributes are not checked for duplicates (TODO)
attributes: Vec<OwnedAttribute>,
/// Contents of the namespace mapping at this point of the document.
namespace: Namespace,
},
/// Denotes an end of an XML element.
///
/// This event is emitted after parsing closing tags or after parsing bodiless tags. In the
/// latter case it is emitted immediately after corresponding `StartElement` event.
EndElement {
/// Qualified name of the element.
name: OwnedName,
},
/// Denotes CDATA content.
///
/// This event contains unparsed data. No unescaping will be performed.
///
/// It is possible to configure a parser to emit `Characters` event instead of `CData`. See
/// `pull::ParserConfiguration` structure for more information.
CData(String),
/// Denotes a comment.
///
/// It is possible to configure a parser to ignore comments, so this event will never be emitted.
/// See `pull::ParserConfiguration` structure for more information.
Comment(String),
/// Denotes character data outside of tags.
///
/// Contents of this event will always be unescaped, so no entities like `&lt;` or `&amp;` or `&#123;`
/// will appear in it.
///
/// It is possible to configure a parser to trim leading and trailing whitespace for this event.
/// See `pull::ParserConfiguration` structure for more information.
Characters(String),
/// Denotes a chunk of whitespace outside of tags.
///
/// It is possible to configure a parser to emit `Characters` event instead of `Whitespace`.
/// See `pull::ParserConfiguration` structure for more information. When combined with whitespace
/// trimming, it will eliminate standalone whitespace from the event stream completely.
Whitespace(String),
}
impl fmt::Debug for XmlEvent {
#[cold]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::StartDocument { version, encoding, standalone } =>
write!(f, "StartDocument({}, {}, {:?})", version, *encoding, standalone),
Self::EndDocument =>
write!(f, "EndDocument"),
Self::ProcessingInstruction { name, data } =>
write!(f, "ProcessingInstruction({}{})", *name, match data {
Some(data) => format!(", {data}"),
None => String::new()
}),
Self::StartElement { name, attributes, namespace: Namespace(namespace) } =>
write!(f, "StartElement({}, {:?}{})", name, namespace, if attributes.is_empty() {
String::new()
} else {
let attributes: Vec<String> = attributes.iter().map(
|a| format!("{} -> {}", a.name, a.value)
).collect();
format!(", [{}]", attributes.join(", "))
}),
Self::EndElement { name } =>
write!(f, "EndElement({name})"),
Self::Comment(data) =>
write!(f, "Comment({data})"),
Self::CData(data) =>
write!(f, "CData({data})"),
Self::Characters(data) =>
write!(f, "Characters({data})"),
Self::Whitespace(data) =>
write!(f, "Whitespace({data})")
}
}
}
impl XmlEvent {
/// Obtains a writer event from this reader event.
///
/// This method is useful for streaming processing of XML documents where the output
/// is also an XML document. With this method it is possible to process some events
/// while passing other events through to the writer unchanged:
///
/// ```rust
/// use std::str;
///
/// use xml::reader::XmlEvent as ReaderEvent;
/// use xml::writer::XmlEvent as WriterEvent;
/// use xml::{EventReader, EventWriter};
///
/// let mut input: &[u8] = b"<hello>world</hello>";
/// let mut output: Vec<u8> = Vec::new();
///
/// {
/// let mut reader = EventReader::new(&mut input);
/// let mut writer = EventWriter::new(&mut output);
///
/// for e in reader {
/// match e.unwrap() {
/// ReaderEvent::Characters(s) =>
/// writer.write(WriterEvent::characters(&s.to_uppercase())).unwrap(),
/// e => if let Some(e) = e.as_writer_event() {
/// writer.write(e).unwrap()
/// }
/// }
/// }
/// }
///
/// assert_eq!(
/// str::from_utf8(&output).unwrap(),
/// r#"<?xml version="1.0" encoding="UTF-8"?><hello>WORLD</hello>"#
/// );
/// ```
///
/// Note that this API may change or get additions in future to improve its ergonomics.
#[must_use]
pub fn as_writer_event(&self) -> Option<crate::writer::events::XmlEvent<'_>> {
match self {
Self::StartDocument { version, encoding, standalone } =>
Some(crate::writer::events::XmlEvent::StartDocument {
version: *version,
encoding: Some(encoding),
standalone: *standalone
}),
Self::ProcessingInstruction { name, data } =>
Some(crate::writer::events::XmlEvent::ProcessingInstruction {
name,
data: data.as_ref().map(|s| &**s)
}),
Self::StartElement { name, attributes, namespace } =>
Some(crate::writer::events::XmlEvent::StartElement {
name: name.borrow(),
attributes: attributes.iter().map(|a| a.borrow()).collect(),
namespace: namespace.borrow(),
}),
Self::EndElement { name } =>
Some(crate::writer::events::XmlEvent::EndElement { name: Some(name.borrow()) }),
Self::Comment(data) => Some(crate::writer::events::XmlEvent::Comment(data)),
Self::CData(data) => Some(crate::writer::events::XmlEvent::CData(data)),
Self::Characters(data) |
Self::Whitespace(data) => Some(crate::writer::events::XmlEvent::Characters(data)),
Self::EndDocument => None,
}
}
}

114
vendor/xml-rs/src/reader/indexset.rs vendored Normal file
View File

@@ -0,0 +1,114 @@
use crate::attribute::OwnedAttribute;
use crate::name::OwnedName;
use std::collections::hash_map::RandomState;
use std::collections::HashSet;
use std::hash::{BuildHasher, Hash, Hasher};
/// An ordered set
pub struct AttributesSet {
vec: Vec<OwnedAttribute>,
/// Uses a no-op hasher, because these u64s are hashes already
may_contain: HashSet<u64, U64HasherBuilder>,
/// This is real hasher for the `OwnedName`
hasher: RandomState,
}
/// Use linear search and don't allocate `HashSet` if there are few attributes,
/// because allocation costs more than a few comparisons.
const HASH_THRESHOLD: usize = 8;
impl AttributesSet {
pub fn new() -> Self {
Self {
vec: Vec::new(),
hasher: RandomState::new(),
may_contain: HashSet::default(),
}
}
fn hash(&self, val: &OwnedName) -> u64 {
let mut h = self.hasher.build_hasher();
val.hash(&mut h);
h.finish()
}
pub fn len(&self) -> usize {
self.vec.len()
}
pub fn contains(&self, name: &OwnedName) -> bool {
// fall back to linear search only on duplicate or hash collision
(self.vec.len() < HASH_THRESHOLD || self.may_contain.contains(&self.hash(name))) &&
self.vec.iter().any(move |a| &a.name == name)
}
pub fn push(&mut self, attr: OwnedAttribute) {
if self.vec.len() >= HASH_THRESHOLD {
if self.vec.len() == HASH_THRESHOLD {
self.may_contain.reserve(HASH_THRESHOLD * 2);
for attr in &self.vec {
self.may_contain.insert(self.hash(&attr.name));
}
}
self.may_contain.insert(self.hash(&attr.name));
}
self.vec.push(attr);
}
pub fn into_vec(self) -> Vec<OwnedAttribute> {
self.vec
}
}
#[test]
fn indexset() {
let mut s = AttributesSet::new();
let not_here = OwnedName {
local_name: "attr1000".into(),
namespace: Some("test".into()),
prefix: None,
};
// this test will take a lot of time if the `contains()` is linear, and the loop is quadratic
for i in 0..50000 {
let name = OwnedName {
local_name: format!("attr{i}"), namespace: None, prefix: None,
};
assert!(!s.contains(&name));
s.push(OwnedAttribute { name, value: String::new() });
assert!(!s.contains(&not_here));
}
assert!(s.contains(&OwnedName {
local_name: "attr1234".into(), namespace: None, prefix: None,
}));
assert!(s.contains(&OwnedName {
local_name: "attr0".into(), namespace: None, prefix: None,
}));
assert!(s.contains(&OwnedName {
local_name: "attr49999".into(), namespace: None, prefix: None,
}));
}
/// Hashser that does nothing except passing u64 through
struct U64Hasher(u64);
impl Hasher for U64Hasher {
fn finish(&self) -> u64 { self.0 }
fn write(&mut self, slice: &[u8]) {
for &v in slice { self.0 ^= u64::from(v) } // unused in practice
}
fn write_u64(&mut self, i: u64) {
self.0 ^= i;
}
}
#[derive(Default)]
struct U64HasherBuilder;
impl BuildHasher for U64HasherBuilder {
type Hasher = U64Hasher;
fn build_hasher(&self) -> U64Hasher { U64Hasher(0) }
}

1155
vendor/xml-rs/src/reader/lexer.rs vendored Normal file

File diff suppressed because it is too large Load Diff

829
vendor/xml-rs/src/reader/parser.rs vendored Normal file
View File

@@ -0,0 +1,829 @@
//! Contains an implementation of pull-based XML parser.
use crate::common::{is_xml10_char, is_xml11_char, is_xml11_char_not_restricted, is_name_char, is_name_start_char, is_whitespace_char};
use crate::common::{Position, TextPosition, XmlVersion};
use crate::name::OwnedName;
use crate::namespace::NamespaceStack;
use crate::reader::config::ParserConfig2;
use crate::reader::error::SyntaxError;
use crate::reader::events::XmlEvent;
use crate::reader::indexset::AttributesSet;
use crate::reader::lexer::{Lexer, Token};
use super::{Error, ErrorKind};
use std::collections::HashMap;
use std::io::Read;
macro_rules! gen_takes(
($($field:ident -> $method:ident, $t:ty, $def:expr);+) => (
$(
impl MarkupData {
#[inline]
#[allow(clippy::mem_replace_option_with_none)]
#[allow(clippy::mem_replace_with_default)]
fn $method(&mut self) -> $t {
std::mem::replace(&mut self.$field, $def)
}
}
)+
)
);
gen_takes!(
name -> take_name, String, String::new();
ref_data -> take_ref_data, String, String::new();
encoding -> take_encoding, Option<String>, None;
element_name -> take_element_name, Option<OwnedName>, None;
attr_name -> take_attr_name, Option<OwnedName>, None;
attributes -> take_attributes, AttributesSet, AttributesSet::new()
);
mod inside_cdata;
mod inside_closing_tag_name;
mod inside_comment;
mod inside_declaration;
mod inside_doctype;
mod inside_opening_tag;
mod inside_processing_instruction;
mod inside_reference;
mod outside_tag;
static DEFAULT_VERSION: XmlVersion = XmlVersion::Version10;
static DEFAULT_STANDALONE: Option<bool> = None;
type ElementStack = Vec<OwnedName>;
pub type Result = super::Result<XmlEvent>;
/// Pull-based XML parser.
pub(crate) struct PullParser {
config: ParserConfig2,
lexer: Lexer,
st: State,
state_after_reference: State,
buf: String,
/// From DTD internal subset
entities: HashMap<String, String>,
nst: NamespaceStack,
data: MarkupData,
final_result: Option<Result>,
next_event: Option<Result>,
est: ElementStack,
pos: Vec<TextPosition>,
encountered: Encountered,
inside_whitespace: bool,
read_prefix_separator: bool,
pop_namespace: bool,
}
// Keeps track when XML declaration can happen
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
enum Encountered {
None = 0,
AnyChars, // whitespace before <?xml is not allowed
Declaration,
Comment,
Doctype,
Element,
}
impl PullParser {
/// Returns a new parser using the given config.
#[inline]
pub fn new(config: impl Into<ParserConfig2>) -> Self {
let config = config.into();
Self::new_with_config2(config)
}
#[inline]
fn new_with_config2(config: ParserConfig2) -> Self {
let mut lexer = Lexer::new(&config);
if let Some(enc) = config.override_encoding {
lexer.set_encoding(enc);
}
let mut pos = Vec::with_capacity(16);
pos.push(TextPosition::new());
Self {
config,
lexer,
st: State::DocumentStart,
state_after_reference: State::OutsideTag,
buf: String::new(),
entities: HashMap::new(),
nst: NamespaceStack::default(),
data: MarkupData {
name: String::new(),
doctype: None,
version: None,
encoding: None,
standalone: None,
ref_data: String::new(),
element_name: None,
quote: None,
attr_name: None,
attributes: AttributesSet::new(),
},
final_result: None,
next_event: None,
est: Vec::new(),
pos,
encountered: Encountered::None,
inside_whitespace: true,
read_prefix_separator: false,
pop_namespace: false,
}
}
/// Checks if this parser ignores the end of stream errors.
pub fn is_ignoring_end_of_stream(&self) -> bool { self.config.c.ignore_end_of_stream }
/// Retrieves the Doctype from the document if any
#[inline]
pub fn doctype(&self) -> Option<&str> {
self.data.doctype.as_deref()
}
#[inline(never)]
fn set_encountered(&mut self, new_encounter: Encountered) -> Option<Result> {
if new_encounter <= self.encountered {
return None;
}
let prev_enc = self.encountered;
self.encountered = new_encounter;
// If declaration was not parsed and we have encountered an element,
// emit this declaration as the next event.
if prev_enc == Encountered::None {
self.push_pos();
Some(Ok(XmlEvent::StartDocument {
version: DEFAULT_VERSION,
encoding: self.lexer.encoding().to_string(),
standalone: DEFAULT_STANDALONE,
}))
} else {
None
}
}
}
impl Position for PullParser {
/// Returns the position of the last event produced by the parser
#[inline]
fn position(&self) -> TextPosition {
self.pos.first().copied().unwrap_or_else(TextPosition::new)
}
}
#[derive(Copy, Clone, PartialEq)]
pub enum State {
OutsideTag,
InsideOpeningTag(OpeningTagSubstate),
InsideClosingTag(ClosingTagSubstate),
InsideProcessingInstruction(ProcessingInstructionSubstate),
InsideComment,
InsideCData,
InsideDeclaration(DeclarationSubstate),
InsideDoctype(DoctypeSubstate),
InsideReference,
DocumentStart,
}
#[derive(Copy, Clone, PartialEq)]
pub enum DoctypeSubstate {
Outside,
String,
InsideName,
BeforeEntityName,
EntityName,
BeforeEntityValue,
EntityValue,
NumericReferenceStart,
NumericReference,
/// expansion
PEReferenceInValue,
PEReferenceInDtd,
/// name definition
PEReferenceDefinitionStart,
PEReferenceDefinition,
SkipDeclaration,
Comment,
}
#[derive(Copy, Clone, PartialEq)]
pub enum OpeningTagSubstate {
InsideName,
InsideTag,
InsideAttributeName,
AfterAttributeName,
InsideAttributeValue,
AfterAttributeValue,
}
#[derive(Copy, Clone, PartialEq)]
pub enum ClosingTagSubstate {
CTInsideName,
CTAfterName,
}
#[derive(Copy, Clone, PartialEq)]
pub enum ProcessingInstructionSubstate {
PIInsideName,
PIInsideData,
}
#[derive(Copy, Clone, PartialEq)]
pub enum DeclarationSubstate {
BeforeVersion,
InsideVersion,
AfterVersion,
InsideVersionValue,
AfterVersionValue,
BeforeEncoding,
InsideEncoding,
AfterEncoding,
InsideEncodingValue,
AfterEncodingValue,
BeforeStandaloneDecl,
InsideStandaloneDecl,
AfterStandaloneDecl,
InsideStandaloneDeclValue,
AfterStandaloneDeclValue,
}
#[derive(Copy, Clone, PartialEq)]
enum QualifiedNameTarget {
AttributeNameTarget,
OpeningTagNameTarget,
ClosingTagNameTarget,
}
#[derive(Copy, Clone, PartialEq, Eq)]
enum QuoteToken {
SingleQuoteToken,
DoubleQuoteToken,
}
impl QuoteToken {
#[inline]
fn from_token(t: Token) -> Option<Self> {
match t {
Token::SingleQuote => Some(Self::SingleQuoteToken),
Token::DoubleQuote => Some(Self::DoubleQuoteToken),
_ => {
debug_assert!(false);
None
},
}
}
const fn as_token(self) -> Token {
match self {
Self::SingleQuoteToken => Token::SingleQuote,
Self::DoubleQuoteToken => Token::DoubleQuote,
}
}
}
struct MarkupData {
name: String, // used for processing instruction name
ref_data: String, // used for reference content
doctype: Option<String>, // keeps a copy of the original doctype
version: Option<XmlVersion>, // used for XML declaration version
encoding: Option<String>, // used for XML declaration encoding
standalone: Option<bool>, // used for XML declaration standalone parameter
element_name: Option<OwnedName>, // used for element name
quote: Option<QuoteToken>, // used to hold opening quote for attribute value
attr_name: Option<OwnedName>, // used to hold attribute name
attributes: AttributesSet, // used to hold all accumulated attributes
}
impl PullParser {
/// Returns next event read from the given buffer.
///
/// This method should be always called with the same buffer. If you call it
/// providing different buffers each time, the result will be undefined.
pub fn next<R: Read>(&mut self, r: &mut R) -> Result {
if let Some(ref ev) = self.final_result {
return ev.clone();
}
if let Some(ev) = self.next_event.take() {
return ev;
}
if self.pop_namespace {
self.pop_namespace = false;
self.nst.pop();
}
loop {
debug_assert!(self.next_event.is_none());
debug_assert!(!self.pop_namespace);
// While lexer gives us Ok(maybe_token) -- we loop.
// Upon having a complete XML-event -- we return from the whole function.
match self.lexer.next_token(r) {
Ok(Token::Eof) => {
// Forward pos to the lexer head
self.next_pos();
return self.handle_eof();
},
Ok(token) => match self.dispatch_token(token) {
None => continue,
Some(Ok(xml_event)) => {
self.next_pos();
return Ok(xml_event);
},
Some(Err(xml_error)) => {
self.next_pos();
return self.set_final_result(Err(xml_error));
},
},
Err(lexer_error) => {
self.next_pos();
return self.set_final_result(Err(lexer_error));
},
}
}
}
/// Handle end of stream
#[cold]
fn handle_eof(&mut self) -> std::result::Result<XmlEvent, super::Error> {
let ev = if self.depth() == 0 {
if self.encountered == Encountered::Element && self.st == State::OutsideTag { // all is ok
Ok(XmlEvent::EndDocument)
} else if self.encountered < Encountered::Element {
self.error(SyntaxError::NoRootElement)
} else { // self.st != State::OutsideTag
self.error(SyntaxError::UnexpectedEof) // TODO: add expected hint?
}
} else if self.config.c.ignore_end_of_stream {
self.final_result = None;
self.lexer.reset_eof_handled();
return self.error(SyntaxError::UnbalancedRootElement);
} else {
self.error(SyntaxError::UnbalancedRootElement)
};
self.set_final_result(ev)
}
// This function is to be called when a terminal event is reached.
// The function sets up the `self.final_result` into `Some(result)` and return `result`.
#[inline]
fn set_final_result(&mut self, result: Result) -> Result {
self.final_result = Some(result.clone());
result
}
#[cold]
fn error(&self, e: SyntaxError) -> Result {
Err(Error {
pos: self.lexer.position(),
kind: ErrorKind::Syntax(e.to_cow()),
})
}
#[inline]
fn next_pos(&mut self) {
// unfortunately calls to next_pos will never be perfectly balanced with push_pos,
// at very least because parse errors and EOF can happen unexpectedly without a prior push.
if !self.pos.is_empty() {
if self.pos.len() > 1 {
self.pos.remove(0);
} else {
self.pos[0] = self.lexer.position();
}
}
}
#[inline]
#[track_caller]
fn push_pos(&mut self) {
debug_assert!(self.pos.len() != self.pos.capacity(), "You've found a bug in xml-rs, caused by calls to push_pos() in states that don't end up emitting events.
This case is ignored in release mode, and merely causes document positions to be out of sync.
Please file a bug and include the XML document that triggers this assert.");
// it has capacity preallocated for more than it ever needs, so this reduces code size
if self.pos.len() != self.pos.capacity() {
self.pos.push(self.lexer.position());
} else if self.pos.len() > 1 {
self.pos.remove(0); // this mitigates the excessive push_pos() call
}
}
#[inline(never)]
fn dispatch_token(&mut self, t: Token) -> Option<Result> {
match self.st {
State::OutsideTag => self.outside_tag(t),
State::InsideOpeningTag(s) => self.inside_opening_tag(t, s),
State::InsideClosingTag(s) => self.inside_closing_tag_name(t, s),
State::InsideReference => self.inside_reference(t),
State::InsideComment => self.inside_comment(t),
State::InsideCData => self.inside_cdata(t),
State::InsideProcessingInstruction(s) => self.inside_processing_instruction(t, s),
State::InsideDoctype(s) => self.inside_doctype(t, s),
State::InsideDeclaration(s) => self.inside_declaration(t, s),
State::DocumentStart => self.document_start(t),
}
}
#[inline]
fn depth(&self) -> usize {
self.est.len()
}
#[inline]
fn buf_has_data(&self) -> bool {
!self.buf.is_empty()
}
#[inline]
fn take_buf(&mut self) -> String {
std::mem::take(&mut self.buf)
}
#[inline]
fn into_state(&mut self, st: State, ev: Option<Result>) -> Option<Result> {
self.st = st;
ev
}
#[inline]
fn into_state_continue(&mut self, st: State) -> Option<Result> {
self.into_state(st, None)
}
#[inline]
fn into_state_emit(&mut self, st: State, ev: Result) -> Option<Result> {
self.into_state(st, Some(ev))
}
/// Dispatches tokens in order to process qualified name. If qualified name cannot be parsed,
/// an error is returned.
///
/// # Parameters
/// * `t` --- next token;
/// * `on_name` --- a callback which is executed when whitespace is encountered.
fn read_qualified_name<F>(&mut self, t: Token, target: QualifiedNameTarget, on_name: F) -> Option<Result>
where F: Fn(&mut Self, Token, OwnedName) -> Option<Result> {
// We can get here for the first time only when self.data.name contains zero or one character,
// but first character cannot be a colon anyway
if self.buf.len() <= 1 {
self.read_prefix_separator = false;
}
let invoke_callback = move |this: &mut Self, t| {
let name = this.take_buf();
match name.parse() {
Ok(name) => on_name(this, t, name),
Err(()) => Some(this.error(SyntaxError::InvalidQualifiedName(name.into()))),
}
};
match t {
// There can be only one colon, and not as the first character
Token::Character(':') if self.buf_has_data() && !self.read_prefix_separator => {
self.buf.push(':');
self.read_prefix_separator = true;
None
},
Token::Character(c) if c != ':' && (self.buf.is_empty() && is_name_start_char(c) ||
self.buf_has_data() && is_name_char(c)) => {
if self.buf.len() > self.config.max_name_length {
return Some(self.error(SyntaxError::ExceededConfiguredLimit));
}
self.buf.push(c);
None
},
Token::EqualsSign if target == QualifiedNameTarget::AttributeNameTarget => invoke_callback(self, t),
Token::EmptyTagEnd if target == QualifiedNameTarget::OpeningTagNameTarget => invoke_callback(self, t),
Token::TagEnd if target == QualifiedNameTarget::OpeningTagNameTarget ||
target == QualifiedNameTarget::ClosingTagNameTarget => invoke_callback(self, t),
Token::Character(c) if is_whitespace_char(c) => invoke_callback(self, t),
_ => Some(self.error(SyntaxError::UnexpectedQualifiedName(t))),
}
}
/// Dispatches tokens in order to process attribute value.
///
/// # Parameters
/// * `t` --- next token;
/// * `on_value` --- a callback which is called when terminating quote is encountered.
fn read_attribute_value<F>(&mut self, t: Token, on_value: F) -> Option<Result>
where F: Fn(&mut Self, String) -> Option<Result> {
match t {
Token::Character(c) if self.data.quote.is_none() && is_whitespace_char(c) => None, // skip leading whitespace
Token::DoubleQuote | Token::SingleQuote => match self.data.quote {
None => { // Entered attribute value
self.data.quote = QuoteToken::from_token(t);
None
},
Some(q) if q.as_token() == t => {
self.data.quote = None;
let value = self.take_buf();
on_value(self, value)
},
_ => {
if let Token::Character(c) = t {
if !self.is_valid_xml_char_not_restricted(c) {
return Some(self.error(SyntaxError::InvalidCharacterEntity(c as u32)));
}
}
if self.buf.len() > self.config.max_attribute_length {
return Some(self.error(SyntaxError::ExceededConfiguredLimit));
}
t.push_to_string(&mut self.buf);
None
},
},
Token::ReferenceStart if self.data.quote.is_some() => {
self.state_after_reference = self.st;
self.into_state_continue(State::InsideReference)
},
Token::OpeningTagStart | Token::ProcessingInstructionStart => Some(self.error(SyntaxError::UnexpectedOpeningTag)),
Token::Character(c) if !self.is_valid_xml_char_not_restricted(c) => {
Some(self.error(SyntaxError::InvalidCharacterEntity(c as u32)))
},
// Every character except " and ' and < is okay
_ if self.data.quote.is_some() => {
if self.buf.len() > self.config.max_attribute_length {
return Some(self.error(SyntaxError::ExceededConfiguredLimit));
}
t.push_to_string(&mut self.buf);
None
},
_ => Some(self.error(SyntaxError::UnexpectedToken(t))),
}
}
fn emit_start_element(&mut self, emit_end_element: bool) -> Option<Result> {
let mut name = self.data.take_element_name()?;
let mut attributes = self.data.take_attributes().into_vec();
// check whether the name prefix is bound and fix its namespace
match self.nst.get(name.borrow().prefix_repr()) {
Some("") => name.namespace = None, // default namespace
Some(ns) => name.namespace = Some(ns.into()),
None => return Some(self.error(SyntaxError::UnboundElementPrefix(name.to_string().into()))),
}
// check and fix accumulated attributes prefixes
for attr in &mut attributes {
if let Some(ref pfx) = attr.name.prefix {
let new_ns = match self.nst.get(pfx) {
Some("") => None, // default namespace
Some(ns) => Some(ns.into()),
None => return Some(self.error(SyntaxError::UnboundAttribute(attr.name.to_string().into()))),
};
attr.name.namespace = new_ns;
}
}
if emit_end_element {
self.pop_namespace = true;
self.next_event = Some(Ok(XmlEvent::EndElement {
name: name.clone()
}));
} else {
self.est.push(name.clone());
}
let namespace = self.nst.squash();
self.into_state_emit(State::OutsideTag, Ok(XmlEvent::StartElement {
name,
attributes,
namespace
}))
}
fn emit_end_element(&mut self) -> Option<Result> {
let mut name = self.data.take_element_name()?;
// check whether the name prefix is bound and fix its namespace
match self.nst.get(name.borrow().prefix_repr()) {
Some("") => name.namespace = None, // default namespace
Some(ns) => name.namespace = Some(ns.into()),
None => return Some(self.error(SyntaxError::UnboundElementPrefix(name.to_string().into()))),
}
let op_name = self.est.pop()?;
if name == op_name {
self.pop_namespace = true;
self.into_state_emit(State::OutsideTag, Ok(XmlEvent::EndElement { name }))
} else {
Some(self.error(SyntaxError::UnexpectedClosingTag(format!("{name} != {op_name}").into())))
}
}
#[inline]
fn is_valid_xml_char(&self, c: char) -> bool {
if Some(XmlVersion::Version11) == self.data.version {
is_xml11_char(c)
} else {
is_xml10_char(c)
}
}
#[inline]
fn is_valid_xml_char_not_restricted(&self, c: char) -> bool {
if Some(XmlVersion::Version11) == self.data.version {
is_xml11_char_not_restricted(c)
} else {
is_xml10_char(c)
}
}
}
#[cfg(test)]
mod tests {
use crate::attribute::OwnedAttribute;
use crate::common::TextPosition;
use crate::name::OwnedName;
use crate::reader::events::XmlEvent;
use crate::reader::parser::PullParser;
use crate::reader::ParserConfig;
use std::io::BufReader;
fn new_parser() -> PullParser {
PullParser::new(ParserConfig::new())
}
macro_rules! expect_event(
($r:expr, $p:expr, $t:pat) => (
match $p.next(&mut $r) {
$t => {}
e => panic!("Unexpected event: {e:?}\nExpected: {}", stringify!($t))
}
);
($r:expr, $p:expr, $t:pat => $c:expr ) => (
match $p.next(&mut $r) {
$t if $c => {}
e => panic!("Unexpected event: {e:?}\nExpected: {} if {}", stringify!($t), stringify!($c))
}
)
);
macro_rules! test_data(
($d:expr) => ({
static DATA: &'static str = $d;
let r = BufReader::new(DATA.as_bytes());
let p = new_parser();
(r, p)
})
);
#[test]
fn issue_3_semicolon_in_attribute_value() {
let (mut r, mut p) = test_data!(r#"
<a attr="zzz;zzz" />
"#);
expect_event!(r, p, Ok(XmlEvent::StartDocument { .. }));
expect_event!(r, p, Ok(XmlEvent::StartElement { ref name, ref attributes, ref namespace }) =>
*name == OwnedName::local("a") &&
attributes.len() == 1 &&
attributes[0] == OwnedAttribute::new(OwnedName::local("attr"), "zzz;zzz") &&
namespace.is_essentially_empty()
);
expect_event!(r, p, Ok(XmlEvent::EndElement { ref name }) => *name == OwnedName::local("a"));
expect_event!(r, p, Ok(XmlEvent::EndDocument));
}
#[test]
fn issue_140_entity_reference_inside_tag() {
let (mut r, mut p) = test_data!(r"
<bla>&#9835;</bla>
");
expect_event!(r, p, Ok(XmlEvent::StartDocument { .. }));
expect_event!(r, p, Ok(XmlEvent::StartElement { ref name, .. }) => *name == OwnedName::local("bla"));
expect_event!(r, p, Ok(XmlEvent::Characters(ref s)) => s == "\u{266b}");
expect_event!(r, p, Ok(XmlEvent::EndElement { ref name, .. }) => *name == OwnedName::local("bla"));
expect_event!(r, p, Ok(XmlEvent::EndDocument));
}
#[test]
fn issue_220_comment() {
let (mut r, mut p) = test_data!(r"<x><!-- <!--></x>");
expect_event!(r, p, Ok(XmlEvent::StartDocument { .. }));
expect_event!(r, p, Ok(XmlEvent::StartElement { .. }));
expect_event!(r, p, Ok(XmlEvent::EndElement { .. }));
expect_event!(r, p, Ok(XmlEvent::EndDocument));
let (mut r, mut p) = test_data!(r"<x><!-- <!---></x>");
expect_event!(r, p, Ok(XmlEvent::StartDocument { .. }));
expect_event!(r, p, Ok(XmlEvent::StartElement { .. }));
expect_event!(r, p, Err(_)); // ---> is forbidden in comments
let (mut r, mut p) = test_data!(r"<x><!--<text&x;> <!--></x>");
p.config.c.ignore_comments = false;
expect_event!(r, p, Ok(XmlEvent::StartDocument { .. }));
expect_event!(r, p, Ok(XmlEvent::StartElement { .. }));
expect_event!(r, p, Ok(XmlEvent::Comment(s)) => s == "<text&x;> <!");
expect_event!(r, p, Ok(XmlEvent::EndElement { .. }));
expect_event!(r, p, Ok(XmlEvent::EndDocument));
}
#[test]
fn malformed_declaration_attrs() {
let (mut r, mut p) = test_data!(r#"<?xml version x="1.0"?>"#);
expect_event!(r, p, Err(_));
let (mut r, mut p) = test_data!(r#"<?xml version="1.0" version="1.0"?>"#);
expect_event!(r, p, Err(_));
let (mut r, mut p) = test_data!(r#"<?xml version="1.0"encoding="utf-8"?>"#);
expect_event!(r, p, Err(_));
let (mut r, mut p) = test_data!(r#"<?xml version="1.0"standalone="yes"?>"#);
expect_event!(r, p, Err(_));
let (mut r, mut p) = test_data!(r#"<?xml version="1.0" encoding="utf-8"standalone="yes"?>"#);
expect_event!(r, p, Err(_));
}
#[test]
fn opening_tag_in_attribute_value() {
use crate::reader::error::{SyntaxError, Error, ErrorKind};
let (mut r, mut p) = test_data!(r#"
<a attr="zzz<zzz" />
"#);
expect_event!(r, p, Ok(XmlEvent::StartDocument { .. }));
expect_event!(r, p, Err(ref e) =>
*e == Error {
kind: ErrorKind::Syntax(SyntaxError::UnexpectedOpeningTag.to_cow()),
pos: TextPosition { row: 1, column: 24 }
}
);
}
#[test]
fn processing_instruction_in_attribute_value() {
use crate::reader::error::{SyntaxError, Error, ErrorKind};
let (mut r, mut p) = test_data!(r#"
<y F="<?abc"><x G="/">
"#);
expect_event!(r, p, Ok(XmlEvent::StartDocument { .. }));
expect_event!(r, p, Err(ref e) =>
*e == Error {
kind: ErrorKind::Syntax(SyntaxError::UnexpectedOpeningTag.to_cow()),
pos: TextPosition { row: 1, column: 18 }
}
);
}
#[test]
fn reference_err() {
let (mut r, mut p) = test_data!(r"
<a>&&amp;</a>
");
expect_event!(r, p, Ok(XmlEvent::StartDocument { .. }));
expect_event!(r, p, Ok(XmlEvent::StartElement { .. }));
expect_event!(r, p, Err(_));
}
#[test]
fn state_size() {
assert_eq!(2, std::mem::size_of::<super::State>());
assert_eq!(1, std::mem::size_of::<super::DoctypeSubstate>());
}
}

View File

@@ -0,0 +1,41 @@
use crate::common::is_whitespace_char;
use crate::reader::error::SyntaxError;
use crate::reader::events::XmlEvent;
use crate::reader::lexer::Token;
use super::{PullParser, Result, State};
impl PullParser {
pub fn inside_cdata(&mut self, t: Token) -> Option<Result> {
match t {
Token::CDataEnd => {
let event = if self.config.c.cdata_to_characters {
// start called push_pos, but there will be no event to pop it
if self.buf.is_empty() {
self.next_pos();
}
None
} else {
let data = self.take_buf();
Some(Ok(XmlEvent::CData(data)))
};
self.into_state(State::OutsideTag, event)
},
Token::Character(c) if !self.is_valid_xml_char(c) => {
Some(self.error(SyntaxError::InvalidCharacterEntity(c as u32)))
},
Token::Character(c) => {
if !is_whitespace_char(c) {
self.inside_whitespace = false;
}
self.buf.push(c);
None
},
_ => {
debug_assert!(false, "unreachable");
None
},
}
}
}

View File

@@ -0,0 +1,32 @@
use super::{ClosingTagSubstate, PullParser, QualifiedNameTarget, Result, State};
use crate::common::is_whitespace_char;
use crate::namespace;
use crate::reader::error::SyntaxError;
use crate::reader::lexer::Token;
impl PullParser {
pub fn inside_closing_tag_name(&mut self, t: Token, s: ClosingTagSubstate) -> Option<Result> {
match s {
ClosingTagSubstate::CTInsideName => self.read_qualified_name(t, QualifiedNameTarget::ClosingTagNameTarget, |this, token, name| {
match name.prefix_ref() {
Some(prefix) if prefix == namespace::NS_XML_PREFIX ||
prefix == namespace::NS_XMLNS_PREFIX =>
Some(this.error(SyntaxError::InvalidNamePrefix(prefix.into()))),
_ => {
this.data.element_name = Some(name.clone());
match token {
Token::TagEnd => this.emit_end_element(),
Token::Character(c) if is_whitespace_char(c) => this.into_state_continue(State::InsideClosingTag(ClosingTagSubstate::CTAfterName)),
_ => Some(this.error(SyntaxError::UnexpectedTokenInClosingTag(token))),
}
}
}
}),
ClosingTagSubstate::CTAfterName => match t {
Token::TagEnd => self.emit_end_element(),
Token::Character(c) if is_whitespace_char(c) => None, // Skip whitespace
_ => Some(self.error(SyntaxError::UnexpectedTokenInClosingTag(t))),
},
}
}
}

View File

@@ -0,0 +1,34 @@
use crate::reader::error::SyntaxError;
use crate::reader::events::XmlEvent;
use crate::reader::lexer::Token;
use super::{PullParser, Result, State};
impl PullParser {
pub fn inside_comment(&mut self, t: Token) -> Option<Result> {
match t {
Token::CommentEnd if self.config.c.ignore_comments => {
self.into_state_continue(State::OutsideTag)
}
Token::CommentEnd => {
let data = self.take_buf();
self.into_state_emit(State::OutsideTag, Ok(XmlEvent::Comment(data)))
},
Token::Character(c) if !self.is_valid_xml_char(c) => {
Some(self.error(SyntaxError::InvalidCharacterEntity(c as u32)))
},
_ if self.config.c.ignore_comments => None, // Do not modify buffer if ignoring the comment
_ => {
if self.buf.len() > self.config.max_data_length {
return Some(self.error(SyntaxError::ExceededConfiguredLimit));
}
t.push_to_string(&mut self.buf);
None
},
}
}
}

View File

@@ -0,0 +1,180 @@
use crate::common::{is_whitespace_char, XmlVersion};
use crate::reader::error::SyntaxError;
use crate::reader::events::XmlEvent;
use crate::reader::lexer::Token;
use crate::util::Encoding;
use super::{
DeclarationSubstate, Encountered, PullParser, QualifiedNameTarget, Result, State,
DEFAULT_VERSION,
};
impl PullParser {
#[inline(never)]
fn emit_start_document(&mut self) -> Option<Result> {
debug_assert!(self.encountered == Encountered::None);
self.encountered = Encountered::Declaration;
let version = self.data.version;
let encoding = self.data.take_encoding();
let standalone = self.data.standalone;
if let Some(new_encoding) = encoding.as_deref() {
let new_encoding = match new_encoding.parse() {
Ok(e) => e,
Err(_) if self.config.ignore_invalid_encoding_declarations => Encoding::Latin1,
Err(_) => return Some(self.error(SyntaxError::UnsupportedEncoding(new_encoding.into()))),
};
let current_encoding = self.lexer.encoding();
if current_encoding != new_encoding {
let set = match (current_encoding, new_encoding) {
(Encoding::Unknown | Encoding::Default, new) if new != Encoding::Utf16 => new,
(Encoding::Utf16Be | Encoding::Utf16Le, Encoding::Utf16) => current_encoding,
_ if self.config.ignore_invalid_encoding_declarations => current_encoding,
_ => return Some(self.error(SyntaxError::ConflictingEncoding(new_encoding, current_encoding))),
};
self.lexer.set_encoding(set);
}
}
let current_encoding = self.lexer.encoding();
self.into_state_emit(State::OutsideTag, Ok(XmlEvent::StartDocument {
version: version.unwrap_or(DEFAULT_VERSION),
encoding: encoding.unwrap_or_else(move || current_encoding.to_string()),
standalone
}))
}
// TODO: remove redundancy via macros or extra methods
pub fn inside_declaration(&mut self, t: Token, s: DeclarationSubstate) -> Option<Result> {
match s {
DeclarationSubstate::BeforeVersion => match t {
Token::Character('v') => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideVersion)),
Token::Character(c) if is_whitespace_char(c) => None, // continue
_ => Some(self.error(SyntaxError::UnexpectedToken(t))),
},
DeclarationSubstate::InsideVersion => self.read_qualified_name(t, QualifiedNameTarget::AttributeNameTarget, |this, token, name| {
match &*name.local_name {
"ersion" if name.namespace.is_none() =>
this.into_state_continue(State::InsideDeclaration(
if token == Token::EqualsSign {
DeclarationSubstate::InsideVersionValue
} else {
DeclarationSubstate::AfterVersion
}
)),
_ => Some(this.error(SyntaxError::UnexpectedNameInsideXml(name.to_string().into()))),
}
}),
DeclarationSubstate::AfterVersion => match t {
Token::EqualsSign => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideVersionValue)),
Token::Character(c) if is_whitespace_char(c) => None,
_ => Some(self.error(SyntaxError::UnexpectedToken(t))),
},
DeclarationSubstate::InsideVersionValue => self.read_attribute_value(t, |this, value| {
this.data.version = match &*value {
"1.0" => Some(XmlVersion::Version10),
"1.1" => Some(XmlVersion::Version11),
_ => None
};
if this.data.version.is_some() {
this.into_state_continue(State::InsideDeclaration(DeclarationSubstate::AfterVersionValue))
} else {
Some(this.error(SyntaxError::UnexpectedXmlVersion(value.into())))
}
}),
DeclarationSubstate::AfterVersionValue => match t {
Token::Character(c) if is_whitespace_char(c) => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::BeforeEncoding)),
Token::ProcessingInstructionEnd => self.emit_start_document(),
_ => Some(self.error(SyntaxError::UnexpectedToken(t))),
},
DeclarationSubstate::BeforeEncoding => match t {
Token::Character('e') => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideEncoding)),
Token::Character('s') => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideStandaloneDecl)),
Token::ProcessingInstructionEnd => self.emit_start_document(),
Token::Character(c) if is_whitespace_char(c) => None, // skip whitespace
_ => Some(self.error(SyntaxError::UnexpectedToken(t))),
},
DeclarationSubstate::InsideEncoding => self.read_qualified_name(t, QualifiedNameTarget::AttributeNameTarget, |this, token, name| {
match &*name.local_name {
"ncoding" if name.namespace.is_none() =>
this.into_state_continue(State::InsideDeclaration(
if token == Token::EqualsSign { DeclarationSubstate::InsideEncodingValue } else { DeclarationSubstate::AfterEncoding }
)),
_ => Some(this.error(SyntaxError::UnexpectedName(name.to_string().into())))
}
}),
DeclarationSubstate::AfterEncoding => match t {
Token::EqualsSign => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideEncodingValue)),
Token::Character(c) if is_whitespace_char(c) => None,
_ => Some(self.error(SyntaxError::UnexpectedToken(t))),
},
DeclarationSubstate::InsideEncodingValue => self.read_attribute_value(t, |this, value| {
this.data.encoding = Some(value);
this.into_state_continue(State::InsideDeclaration(DeclarationSubstate::AfterEncodingValue))
}),
DeclarationSubstate::AfterEncodingValue => match t {
Token::Character(c) if is_whitespace_char(c) => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::BeforeStandaloneDecl)),
Token::ProcessingInstructionEnd => self.emit_start_document(),
_ => Some(self.error(SyntaxError::UnexpectedToken(t))),
},
DeclarationSubstate::BeforeStandaloneDecl => match t {
Token::Character('s') => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideStandaloneDecl)),
Token::ProcessingInstructionEnd => self.emit_start_document(),
Token::Character(c) if is_whitespace_char(c) => None, // skip whitespace
_ => Some(self.error(SyntaxError::UnexpectedToken(t))),
},
DeclarationSubstate::InsideStandaloneDecl => self.read_qualified_name(t, QualifiedNameTarget::AttributeNameTarget, |this, token, name| {
match &*name.local_name {
"tandalone" if name.namespace.is_none() =>
this.into_state_continue(State::InsideDeclaration(
if token == Token::EqualsSign {
DeclarationSubstate::InsideStandaloneDeclValue
} else {
DeclarationSubstate::AfterStandaloneDecl
}
)),
_ => Some(this.error(SyntaxError::UnexpectedName(name.to_string().into()))),
}
}),
DeclarationSubstate::AfterStandaloneDecl => match t {
Token::EqualsSign => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideStandaloneDeclValue)),
Token::Character(c) if is_whitespace_char(c) => None,
_ => Some(self.error(SyntaxError::UnexpectedToken(t))),
},
DeclarationSubstate::InsideStandaloneDeclValue => self.read_attribute_value(t, |this, value| {
let standalone = match &*value {
"yes" => Some(true),
"no" => Some(false),
_ => None
};
if standalone.is_some() {
this.data.standalone = standalone;
this.into_state_continue(State::InsideDeclaration(DeclarationSubstate::AfterStandaloneDeclValue))
} else {
Some(this.error(SyntaxError::InvalidStandaloneDeclaration(value.into())))
}
}),
DeclarationSubstate::AfterStandaloneDeclValue => match t {
Token::ProcessingInstructionEnd => self.emit_start_document(),
Token::Character(c) if is_whitespace_char(c) => None, // skip whitespace
_ => Some(self.error(SyntaxError::UnexpectedToken(t))),
},
}
}
}

View File

@@ -0,0 +1,251 @@
use std::fmt::Write;
use crate::common::{is_name_char, is_name_start_char, is_whitespace_char};
use crate::reader::error::SyntaxError;
use crate::reader::lexer::Token;
use super::{DoctypeSubstate, PullParser, QuoteToken, Result, State};
impl PullParser {
pub fn inside_doctype(&mut self, t: Token, substate: DoctypeSubstate) -> Option<Result> {
if let Some(ref mut doctype) = self.data.doctype {
write!(doctype, "{t}").ok()?;
if doctype.len() > self.config.max_data_length {
return Some(self.error(SyntaxError::ExceededConfiguredLimit));
}
}
match substate {
DoctypeSubstate::Outside => match t {
Token::TagEnd => self.into_state_continue(State::OutsideTag),
Token::MarkupDeclarationStart => {
self.buf.clear();
self.into_state_continue(State::InsideDoctype(DoctypeSubstate::InsideName))
},
Token::Character('%') => {
self.data.ref_data.clear();
self.data.ref_data.push('%');
self.into_state_continue(State::InsideDoctype(DoctypeSubstate::PEReferenceInDtd))
},
Token::CommentStart => {
self.into_state_continue(State::InsideDoctype(DoctypeSubstate::Comment))
},
Token::SingleQuote | Token::DoubleQuote => {
// just discard string literals
self.data.quote = super::QuoteToken::from_token(t);
self.into_state_continue(State::InsideDoctype(DoctypeSubstate::String))
},
Token::CDataEnd | Token::CDataStart => Some(self.error(SyntaxError::UnexpectedToken(t))),
// TODO: parse SYSTEM, and [
_ => None,
},
DoctypeSubstate::String => match t {
Token::SingleQuote if self.data.quote != Some(QuoteToken::SingleQuoteToken) => None,
Token::DoubleQuote if self.data.quote != Some(QuoteToken::DoubleQuoteToken) => None,
Token::SingleQuote | Token::DoubleQuote => {
self.data.quote = None;
self.into_state_continue(State::InsideDoctype(DoctypeSubstate::Outside))
},
_ => None,
},
DoctypeSubstate::Comment => match t {
Token::CommentEnd => {
self.into_state_continue(State::InsideDoctype(DoctypeSubstate::Outside))
},
_ => None,
},
DoctypeSubstate::InsideName => match t {
Token::Character(c @ 'A'..='Z') => {
self.buf.push(c);
None
},
Token::Character(c) if is_whitespace_char(c) => {
let buf = self.take_buf();
match buf.as_str() {
"ENTITY" => self.into_state_continue(State::InsideDoctype(DoctypeSubstate::BeforeEntityName)),
"NOTATION" | "ELEMENT" | "ATTLIST" => self.into_state_continue(State::InsideDoctype(DoctypeSubstate::SkipDeclaration)),
_ => Some(self.error(SyntaxError::UnknownMarkupDeclaration(buf.into()))),
}
},
_ => Some(self.error(SyntaxError::UnexpectedToken(t))),
},
DoctypeSubstate::BeforeEntityName => {
self.data.name.clear();
match t {
Token::Character(c) if is_whitespace_char(c) => None,
Token::Character('%') => { // % is for PEDecl
self.data.name.push('%');
self.into_state_continue(State::InsideDoctype(DoctypeSubstate::PEReferenceDefinitionStart))
},
Token::Character(c) if is_name_start_char(c) => {
if self.data.name.len() > self.config.max_name_length {
return Some(self.error(SyntaxError::ExceededConfiguredLimit));
}
self.data.name.push(c);
self.into_state_continue(State::InsideDoctype(DoctypeSubstate::EntityName))
},
_ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))),
}
},
DoctypeSubstate::EntityName => match t {
Token::Character(c) if is_whitespace_char(c) => {
self.into_state_continue(State::InsideDoctype(DoctypeSubstate::BeforeEntityValue))
},
Token::Character(c) if is_name_char(c) => {
if self.data.name.len() > self.config.max_name_length {
return Some(self.error(SyntaxError::ExceededConfiguredLimit));
}
self.data.name.push(c);
None
},
_ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))),
},
DoctypeSubstate::BeforeEntityValue => {
self.buf.clear();
match t {
Token::Character(c) if is_whitespace_char(c) => None,
// SYSTEM/PUBLIC not supported
Token::Character('S' | 'P') => {
let name = self.data.take_name();
self.entities.entry(name).or_default(); // Dummy value, but at least the name is recognized
self.into_state_continue(State::InsideDoctype(DoctypeSubstate::SkipDeclaration))
},
Token::SingleQuote | Token::DoubleQuote => {
self.data.quote = super::QuoteToken::from_token(t);
self.into_state_continue(State::InsideDoctype(DoctypeSubstate::EntityValue))
},
_ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))),
}
},
DoctypeSubstate::EntityValue => match t {
Token::SingleQuote if self.data.quote != Some(QuoteToken::SingleQuoteToken) => { self.buf.push('\''); None },
Token::DoubleQuote if self.data.quote != Some(QuoteToken::DoubleQuoteToken) => { self.buf.push('"'); None },
Token::SingleQuote | Token::DoubleQuote => {
self.data.quote = None;
let name = self.data.take_name();
let val = self.take_buf();
self.entities.entry(name).or_insert(val); // First wins
self.into_state_continue(State::InsideDoctype(DoctypeSubstate::SkipDeclaration)) // FIXME
},
Token::ReferenceStart | Token::Character('&') => {
self.data.ref_data.clear();
self.into_state_continue(State::InsideDoctype(DoctypeSubstate::NumericReferenceStart))
},
Token::Character('%') => {
self.data.ref_data.clear();
self.data.ref_data.push('%'); // include literal % in the name to distinguish from regular entities
self.into_state_continue(State::InsideDoctype(DoctypeSubstate::PEReferenceInValue))
},
Token::Character(c) if !self.is_valid_xml_char(c) => {
Some(self.error(SyntaxError::InvalidCharacterEntity(c as u32)))
},
Token::Character(c) => {
self.buf.push(c);
None
},
_ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))),
},
DoctypeSubstate::PEReferenceDefinitionStart => match t {
Token::Character(c) if is_whitespace_char(c) => None,
Token::Character(c) if is_name_start_char(c) => {
debug_assert_eq!(self.data.name, "%");
self.data.name.push(c);
self.into_state_continue(State::InsideDoctype(DoctypeSubstate::PEReferenceDefinition))
},
_ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))),
},
DoctypeSubstate::PEReferenceDefinition => match t {
Token::Character(c) if is_name_char(c) => {
if self.data.name.len() > self.config.max_name_length {
return Some(self.error(SyntaxError::ExceededConfiguredLimit));
}
self.data.name.push(c);
None
},
Token::Character(c) if is_whitespace_char(c) => {
self.into_state_continue(State::InsideDoctype(DoctypeSubstate::BeforeEntityValue))
},
_ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))),
},
DoctypeSubstate::PEReferenceInDtd => match t {
Token::Character(c) if is_name_char(c) => {
self.data.ref_data.push(c);
None
},
Token::ReferenceEnd | Token::Character(';') => {
let name = self.data.take_ref_data();
match self.entities.get(&name) {
Some(ent) => {
if let Err(e) = self.lexer.reparse(ent) {
return Some(Err(e));
}
self.into_state_continue(State::InsideDoctype(DoctypeSubstate::Outside))
},
None => Some(self.error(SyntaxError::UndefinedEntity(name.into()))),
}
},
_ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))),
},
DoctypeSubstate::PEReferenceInValue => match t {
Token::Character(c) if is_name_char(c) => {
self.data.ref_data.push(c);
None
},
Token::ReferenceEnd | Token::Character(';') => {
let name = self.data.take_ref_data();
match self.entities.get(&name) {
Some(ent) => {
self.buf.push_str(ent);
self.into_state_continue(State::InsideDoctype(DoctypeSubstate::EntityValue))
},
None => Some(self.error(SyntaxError::UndefinedEntity(name.into()))),
}
},
_ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))),
},
DoctypeSubstate::NumericReferenceStart => match t {
Token::Character('#') => {
self.into_state_continue(State::InsideDoctype(DoctypeSubstate::NumericReference))
},
Token::Character(c) if !self.is_valid_xml_char(c) => {
Some(self.error(SyntaxError::InvalidCharacterEntity(c as u32)))
},
Token::Character(c) => {
self.buf.push('&');
self.buf.push(c);
// named entities are not expanded inside doctype
self.into_state_continue(State::InsideDoctype(DoctypeSubstate::EntityValue))
},
_ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))),
},
DoctypeSubstate::NumericReference => match t {
Token::ReferenceEnd | Token::Character(';') => {
let r = self.data.take_ref_data();
// https://www.w3.org/TR/xml/#sec-entexpand
match self.numeric_reference_from_str(&r) {
Ok(c) => {
self.buf.push(c);
self.into_state_continue(State::InsideDoctype(DoctypeSubstate::EntityValue))
},
Err(e) => Some(self.error(e)),
}
},
Token::Character(c) if !self.is_valid_xml_char(c) => {
Some(self.error(SyntaxError::InvalidCharacterEntity(c as u32)))
},
Token::Character(c) => {
self.data.ref_data.push(c);
None
},
_ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))),
},
DoctypeSubstate::SkipDeclaration => match t {
Token::TagEnd => {
self.into_state_continue(State::InsideDoctype(DoctypeSubstate::Outside))
},
_ => None,
},
}
}
}

View File

@@ -0,0 +1,120 @@
use crate::attribute::OwnedAttribute;
use crate::common::{is_name_start_char, is_whitespace_char};
use crate::namespace;
use crate::reader::error::SyntaxError;
use crate::reader::lexer::Token;
use super::{OpeningTagSubstate, PullParser, QualifiedNameTarget, Result, State};
impl PullParser {
pub fn inside_opening_tag(&mut self, t: Token, s: OpeningTagSubstate) -> Option<Result> {
let max_attrs = self.config.max_attributes;
match s {
OpeningTagSubstate::InsideName => self.read_qualified_name(t, QualifiedNameTarget::OpeningTagNameTarget, |this, token, name| {
match name.prefix_ref() {
Some(prefix) if prefix == namespace::NS_XML_PREFIX ||
prefix == namespace::NS_XMLNS_PREFIX =>
Some(this.error(SyntaxError::InvalidNamePrefix(prefix.into()))),
_ => {
this.data.element_name = Some(name.clone());
match token {
Token::TagEnd => this.emit_start_element(false),
Token::EmptyTagEnd => this.emit_start_element(true),
Token::Character(c) if is_whitespace_char(c) => this.into_state_continue(State::InsideOpeningTag(OpeningTagSubstate::InsideTag)),
_ => {
debug_assert!(false, "unreachable");
None
},
}
}
}
}),
OpeningTagSubstate::InsideTag => match t {
Token::TagEnd => self.emit_start_element(false),
Token::EmptyTagEnd => self.emit_start_element(true),
Token::Character(c) if is_whitespace_char(c) => None, // skip whitespace
Token::Character(c) if is_name_start_char(c) => {
if self.buf.len() > self.config.max_name_length {
return Some(self.error(SyntaxError::ExceededConfiguredLimit));
}
self.buf.push(c);
self.into_state_continue(State::InsideOpeningTag(OpeningTagSubstate::InsideAttributeName))
},
_ => Some(self.error(SyntaxError::UnexpectedTokenInOpeningTag(t))),
},
OpeningTagSubstate::InsideAttributeName => self.read_qualified_name(t, QualifiedNameTarget::AttributeNameTarget, |this, token, name| {
// check that no attribute with such name is already present
// if there is one, XML is not well-formed
if this.data.attributes.contains(&name) {
return Some(this.error(SyntaxError::RedefinedAttribute(name.to_string().into())))
}
this.data.attr_name = Some(name);
match token {
Token::EqualsSign => this.into_state_continue(State::InsideOpeningTag(OpeningTagSubstate::InsideAttributeValue)),
Token::Character(c) if is_whitespace_char(c) => this.into_state_continue(State::InsideOpeningTag(OpeningTagSubstate::AfterAttributeName)),
_ => Some(this.error(SyntaxError::UnexpectedTokenInOpeningTag(t))) // likely unreachable
}
}),
OpeningTagSubstate::AfterAttributeName => match t {
Token::EqualsSign => self.into_state_continue(State::InsideOpeningTag(OpeningTagSubstate::InsideAttributeValue)),
Token::Character(c) if is_whitespace_char(c) => None,
_ => Some(self.error(SyntaxError::UnexpectedTokenInOpeningTag(t)))
},
OpeningTagSubstate::InsideAttributeValue => self.read_attribute_value(t, |this, value| {
let name = this.data.take_attr_name()?; // will always succeed here
match name.prefix_ref() {
// declaring a new prefix; it is sufficient to check prefix only
// because "xmlns" prefix is reserved
Some(namespace::NS_XMLNS_PREFIX) => {
let ln = &*name.local_name;
if ln == namespace::NS_XMLNS_PREFIX {
Some(this.error(SyntaxError::CannotRedefineXmlnsPrefix))
} else if ln == namespace::NS_XML_PREFIX && &*value != namespace::NS_XML_URI {
Some(this.error(SyntaxError::CannotRedefineXmlPrefix))
} else if value.is_empty() {
Some(this.error(SyntaxError::CannotUndefinePrefix(ln.into())))
} else {
this.nst.put(name.local_name.clone(), value);
this.into_state_continue(State::InsideOpeningTag(OpeningTagSubstate::AfterAttributeValue))
}
},
// declaring default namespace
None if &*name.local_name == namespace::NS_XMLNS_PREFIX =>
match &*value {
namespace::NS_XMLNS_PREFIX | namespace::NS_XML_PREFIX | namespace::NS_XML_URI | namespace::NS_XMLNS_URI =>
Some(this.error(SyntaxError::InvalidDefaultNamespace(value.into()))),
_ => {
this.nst.put(namespace::NS_NO_PREFIX, value.clone());
this.into_state_continue(State::InsideOpeningTag(OpeningTagSubstate::AfterAttributeValue))
}
},
// regular attribute
_ => {
if this.data.attributes.len() >= max_attrs {
return Some(this.error(SyntaxError::ExceededConfiguredLimit));
}
this.data.attributes.push(OwnedAttribute { name, value });
this.into_state_continue(State::InsideOpeningTag(OpeningTagSubstate::AfterAttributeValue))
},
}
}),
OpeningTagSubstate::AfterAttributeValue => match t {
Token::Character(c) if is_whitespace_char(c) => {
self.into_state_continue(State::InsideOpeningTag(OpeningTagSubstate::InsideTag))
},
Token::TagEnd => self.emit_start_element(false),
Token::EmptyTagEnd => self.emit_start_element(true),
_ => Some(self.error(SyntaxError::UnexpectedTokenInOpeningTag(t))),
},
}
}
}

View File

@@ -0,0 +1,113 @@
use crate::common::{is_name_char, is_name_start_char, is_whitespace_char};
use crate::reader::error::SyntaxError;
use crate::reader::events::XmlEvent;
use crate::reader::lexer::Token;
use super::{DeclarationSubstate, Encountered, ProcessingInstructionSubstate, PullParser, Result, State};
impl PullParser {
pub fn inside_processing_instruction(&mut self, t: Token, s: ProcessingInstructionSubstate) -> Option<Result> {
match s {
ProcessingInstructionSubstate::PIInsideName => match t {
Token::Character(c) if self.buf.is_empty() && is_name_start_char(c) ||
self.buf_has_data() && is_name_char(c) => {
if self.buf.len() > self.config.max_name_length {
return Some(self.error(SyntaxError::ExceededConfiguredLimit));
}
self.buf.push(c);
None
},
Token::ProcessingInstructionEnd => {
// self.buf contains PI name
let name = self.take_buf();
// Don't need to check for declaration because it has mandatory attributes
// but there is none
match &*name {
// Name is empty, it is an error
"" => Some(self.error(SyntaxError::ProcessingInstructionWithoutName)),
// Found <?xml-like PI not at the beginning of a document,
// it is an error - see section 2.6 of XML 1.1 spec
n if "xml".eq_ignore_ascii_case(n) =>
Some(self.error(SyntaxError::InvalidXmlProcessingInstruction(name.into()))),
// All is ok, emitting event
_ => {
debug_assert!(self.next_event.is_none(), "{:?}", self.next_event);
// can't have a PI before `<?xml`
let event1 = self.set_encountered(Encountered::Declaration);
let event2 = Some(Ok(XmlEvent::ProcessingInstruction {
name,
data: None
}));
// emitting two events at once is cumbersome
let event1 = if event1.is_some() {
self.next_event = event2;
event1
} else {
event2
};
self.into_state(State::OutsideTag, event1)
},
}
},
Token::Character(c) if is_whitespace_char(c) => {
// self.buf contains PI name
let name = self.take_buf();
match &*name {
// We have not ever encountered an element and have not parsed XML declaration
"xml" if self.encountered == Encountered::None =>
self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::BeforeVersion)),
// Found <?xml-like PI after the beginning of a document,
// it is an error - see section 2.6 of XML 1.1 spec
n if "xml".eq_ignore_ascii_case(n) =>
Some(self.error(SyntaxError::InvalidXmlProcessingInstruction(name.into()))),
// All is ok, starting parsing PI data
_ => {
self.data.name = name;
// can't have a PI before `<?xml`
let next_event = self.set_encountered(Encountered::Declaration);
self.into_state(State::InsideProcessingInstruction(ProcessingInstructionSubstate::PIInsideData), next_event)
},
}
},
_ => {
let buf = self.take_buf();
Some(self.error(SyntaxError::UnexpectedProcessingInstruction(buf.into(), t)))
},
},
ProcessingInstructionSubstate::PIInsideData => match t {
Token::ProcessingInstructionEnd => {
let name = self.data.take_name();
let data = self.take_buf();
self.into_state_emit(
State::OutsideTag,
Ok(XmlEvent::ProcessingInstruction { name, data: Some(data) }),
)
},
Token::Character(c) if !self.is_valid_xml_char(c) => {
Some(self.error(SyntaxError::InvalidCharacterEntity(c as u32)))
},
// Any other token should be treated as plain characters
_ => {
if self.buf.len() > self.config.max_data_length {
return Some(self.error(SyntaxError::ExceededConfiguredLimit));
}
t.push_to_string(&mut self.buf);
None
},
},
}
}
}

View File

@@ -0,0 +1,76 @@
use super::{PullParser, Result, State};
use crate::common::{is_name_char, is_name_start_char, is_whitespace_char};
use crate::reader::error::SyntaxError;
use crate::reader::lexer::Token;
use std::char;
impl PullParser {
pub fn inside_reference(&mut self, t: Token) -> Option<Result> {
match t {
Token::Character(c) if !self.data.ref_data.is_empty() && is_name_char(c) ||
self.data.ref_data.is_empty() && (is_name_start_char(c) || c == '#') => {
self.data.ref_data.push(c);
None
},
Token::ReferenceEnd => {
let name = self.data.take_ref_data();
if name.is_empty() {
return Some(self.error(SyntaxError::EmptyEntity));
}
let c = match &*name {
"lt" => Some('<'),
"gt" => Some('>'),
"amp" => Some('&'),
"apos" => Some('\''),
"quot" => Some('"'),
_ if name.starts_with('#') => match self.numeric_reference_from_str(&name[1..]) {
Ok(c) => Some(c),
Err(e) => return Some(self.error(e)),
},
_ => None,
};
if let Some(c) = c {
self.buf.push(c);
} else if let Some(v) = self.config.c.extra_entities.get(&name) {
self.buf.push_str(v);
} else if let Some(v) = self.entities.get(&name) {
if self.state_after_reference == State::OutsideTag {
// an entity can expand to *elements*, so outside of a tag it needs a full reparse
if let Err(e) = self.lexer.reparse(v) {
return Some(Err(e));
}
} else {
// however, inside attributes it's not allowed to affect attribute quoting,
// so it can't be fed to the lexer
self.buf.push_str(v);
}
} else {
return Some(self.error(SyntaxError::UnexpectedEntity(name.into())));
}
let prev_st = self.state_after_reference;
if prev_st == State::OutsideTag && !is_whitespace_char(self.buf.chars().last().unwrap_or('\0')) {
self.inside_whitespace = false;
}
self.into_state_continue(prev_st)
},
_ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))),
}
}
pub(crate) fn numeric_reference_from_str(&self, num_str: &str) -> std::result::Result<char, SyntaxError> {
let val = if let Some(hex) = num_str.strip_prefix('x') {
u32::from_str_radix(hex, 16).map_err(move |_| SyntaxError::InvalidNumericEntity(num_str.into()))?
} else {
num_str.parse::<u32>().map_err(move |_| SyntaxError::InvalidNumericEntity(num_str.into()))?
};
match char::from_u32(val) {
Some(c) if self.is_valid_xml_char(c) => Ok(c),
Some(_) if self.config.c.replace_unknown_entity_references => Ok('\u{fffd}'),
None if self.config.c.replace_unknown_entity_references => Ok('\u{fffd}'),
_ => Err(SyntaxError::InvalidCharacterEntity(val)),
}
}
}

View File

@@ -0,0 +1,211 @@
use crate::common::is_whitespace_char;
use crate::reader::error::SyntaxError;
use crate::reader::events::XmlEvent;
use crate::reader::lexer::Token;
use super::{
ClosingTagSubstate, DoctypeSubstate, Encountered, OpeningTagSubstate,
ProcessingInstructionSubstate, PullParser, Result, State,
};
impl PullParser {
pub fn outside_tag(&mut self, t: Token) -> Option<Result> {
match t {
Token::Character(c) => {
if is_whitespace_char(c) {
// skip whitespace outside of the root element
if (self.config.c.trim_whitespace && self.buf.is_empty()) ||
(self.depth() == 0 && self.config.c.ignore_root_level_whitespace) {
return None;
}
} else {
self.inside_whitespace = false;
if self.depth() == 0 {
return Some(self.error(SyntaxError::UnexpectedTokenOutsideRoot(t)));
}
}
if !self.is_valid_xml_char_not_restricted(c) {
return Some(self.error(SyntaxError::InvalidCharacterEntity(c as u32)));
}
if self.buf.is_empty() {
self.push_pos();
} else if self.buf.len() > self.config.max_data_length {
return Some(self.error(SyntaxError::ExceededConfiguredLimit));
}
self.buf.push(c);
None
},
Token::CommentEnd | Token::TagEnd | Token::EqualsSign |
Token::DoubleQuote | Token::SingleQuote |
Token::ProcessingInstructionEnd | Token::EmptyTagEnd => {
if self.depth() == 0 {
return Some(self.error(SyntaxError::UnexpectedTokenOutsideRoot(t)));
}
self.inside_whitespace = false;
if let Some(s) = t.as_static_str() {
if self.buf.is_empty() {
self.push_pos();
} else if self.buf.len() > self.config.max_data_length {
return Some(self.error(SyntaxError::ExceededConfiguredLimit));
}
self.buf.push_str(s);
}
None
},
Token::ReferenceStart if self.depth() > 0 => {
self.state_after_reference = State::OutsideTag;
self.into_state_continue(State::InsideReference)
},
Token::ReferenceEnd if self.depth() > 0 => { // Semi-colon in a text outside an entity
self.inside_whitespace = false;
if self.buf.len() > self.config.max_data_length {
return Some(self.error(SyntaxError::ExceededConfiguredLimit));
}
Token::ReferenceEnd.push_to_string(&mut self.buf);
None
},
Token::CommentStart if self.config.c.coalesce_characters && self.config.c.ignore_comments => {
let next_event = self.set_encountered(Encountered::Comment);
// We need to switch the lexer into a comment mode inside comments
self.into_state(State::InsideComment, next_event)
}
Token::CDataStart if self.depth() > 0 && self.config.c.coalesce_characters && self.config.c.cdata_to_characters => {
if self.buf.is_empty() {
self.push_pos(); // CDataEnd will pop pos if the buffer remains empty
}
// if coalescing chars, continue without event
self.into_state_continue(State::InsideCData)
},
_ => {
// Encountered some markup event, flush the buffer as characters
// or a whitespace
let mut next_event = if self.buf_has_data() {
let buf = self.take_buf();
if self.inside_whitespace && self.config.c.trim_whitespace {
// there will be no event emitted for this, but start of buffering has pushed a pos
self.next_pos();
None
} else if self.inside_whitespace && !self.config.c.whitespace_to_characters {
debug_assert!(buf.chars().all(|ch| ch.is_whitespace()), "ws={buf:?}");
Some(Ok(XmlEvent::Whitespace(buf)))
} else if self.config.c.trim_whitespace {
Some(Ok(XmlEvent::Characters(buf.trim_matches(is_whitespace_char).into())))
} else {
Some(Ok(XmlEvent::Characters(buf)))
}
} else { None };
self.inside_whitespace = true; // Reset inside_whitespace flag
// pos is popped whenever an event is emitted, so pushes must happen only if there will be an event to balance it
// and ignored comments don't pop
if t != Token::CommentStart || !self.config.c.ignore_comments {
self.push_pos();
}
match t {
Token::OpeningTagStart if self.depth() > 0 || self.encountered < Encountered::Element || self.config.allow_multiple_root_elements => {
if let Some(e) = self.set_encountered(Encountered::Element) {
next_event = Some(e);
}
self.nst.push_empty();
self.into_state(State::InsideOpeningTag(OpeningTagSubstate::InsideName), next_event)
},
Token::ClosingTagStart if self.depth() > 0 =>
self.into_state(State::InsideClosingTag(ClosingTagSubstate::CTInsideName), next_event),
Token::CommentStart => {
if let Some(e) = self.set_encountered(Encountered::Comment) {
next_event = Some(e);
}
// We need to switch the lexer into a comment mode inside comments
self.into_state(State::InsideComment, next_event)
},
Token::DoctypeStart if self.encountered < Encountered::Doctype => {
if let Some(e) = self.set_encountered(Encountered::Doctype) {
next_event = Some(e);
}
self.data.doctype = Some(Token::DoctypeStart.to_string());
// We don't have a doctype event so skip this position
// FIXME: update when we have a doctype event
self.next_pos();
self.into_state(State::InsideDoctype(DoctypeSubstate::Outside), next_event)
},
Token::ProcessingInstructionStart =>
self.into_state(State::InsideProcessingInstruction(ProcessingInstructionSubstate::PIInsideName), next_event),
Token::CDataStart if self.depth() > 0 => {
self.into_state(State::InsideCData, next_event)
},
_ => Some(self.error(SyntaxError::UnexpectedToken(t))),
}
},
}
}
pub fn document_start(&mut self, t: Token) -> Option<Result> {
debug_assert!(self.encountered < Encountered::Declaration);
match t {
Token::Character(c) => {
let next_event = self.set_encountered(Encountered::AnyChars);
if !is_whitespace_char(c) {
return Some(self.error(SyntaxError::UnexpectedTokenOutsideRoot(t)));
}
self.inside_whitespace = true;
// skip whitespace outside of the root element
if (self.config.c.trim_whitespace && self.buf.is_empty()) ||
(self.depth() == 0 && self.config.c.ignore_root_level_whitespace) {
return self.into_state(State::OutsideTag, next_event);
}
self.push_pos();
self.buf.push(c);
self.into_state(State::OutsideTag, next_event)
},
Token::CommentStart => {
let next_event = self.set_encountered(Encountered::Comment);
self.into_state(State::InsideComment, next_event)
},
Token::OpeningTagStart => {
let next_event = self.set_encountered(Encountered::Element);
self.nst.push_empty();
self.into_state(State::InsideOpeningTag(OpeningTagSubstate::InsideName), next_event)
},
Token::DoctypeStart => {
let next_event = self.set_encountered(Encountered::Doctype);
self.data.doctype = Some(Token::DoctypeStart.to_string());
// We don't have a doctype event so skip this position
// FIXME: update when we have a doctype event
self.next_pos();
self.into_state(State::InsideDoctype(DoctypeSubstate::Outside), next_event)
},
Token::ProcessingInstructionStart => {
self.push_pos();
self.into_state_continue(State::InsideProcessingInstruction(ProcessingInstructionSubstate::PIInsideName))
},
_ => Some(self.error(SyntaxError::UnexpectedToken(t))),
}
}
}

315
vendor/xml-rs/src/util.rs vendored Normal file
View File

@@ -0,0 +1,315 @@
use std::fmt;
use std::io::{self, Read};
use std::str::{self, FromStr};
#[derive(Debug)]
pub enum CharReadError {
UnexpectedEof,
Utf8(str::Utf8Error),
Io(io::Error),
}
impl From<str::Utf8Error> for CharReadError {
#[cold]
fn from(e: str::Utf8Error) -> Self {
Self::Utf8(e)
}
}
impl From<io::Error> for CharReadError {
#[cold]
fn from(e: io::Error) -> Self {
Self::Io(e)
}
}
impl fmt::Display for CharReadError {
#[cold]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
use self::CharReadError::{Io, UnexpectedEof, Utf8};
match *self {
UnexpectedEof => write!(f, "unexpected end of stream"),
Utf8(ref e) => write!(f, "UTF-8 decoding error: {e}"),
Io(ref e) => write!(f, "I/O error: {e}"),
}
}
}
/// Character encoding used for parsing
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
#[non_exhaustive]
pub enum Encoding {
/// Explicitly UTF-8 only
Utf8,
/// UTF-8 fallback, but can be any 8-bit encoding
Default,
/// ISO-8859-1
Latin1,
/// US-ASCII
Ascii,
/// Big-Endian
Utf16Be,
/// Little-Endian
Utf16Le,
/// Unknown endianness yet, will be sniffed
Utf16,
/// Not determined yet, may be sniffed to be anything
Unknown,
}
// Rustc inlines eq_ignore_ascii_case and creates kilobytes of code!
#[inline(never)]
fn icmp(lower: &str, varcase: &str) -> bool {
lower.bytes().zip(varcase.bytes()).all(|(l, v)| l == v.to_ascii_lowercase())
}
impl FromStr for Encoding {
type Err = &'static str;
fn from_str(val: &str) -> Result<Self, Self::Err> {
if ["utf-8", "utf8"].into_iter().any(move |label| icmp(label, val)) {
Ok(Self::Utf8)
} else if ["iso-8859-1", "latin1"].into_iter().any(move |label| icmp(label, val)) {
Ok(Self::Latin1)
} else if ["utf-16", "utf16"].into_iter().any(move |label| icmp(label, val)) {
Ok(Self::Utf16)
} else if ["ascii", "us-ascii"].into_iter().any(move |label| icmp(label, val)) {
Ok(Self::Ascii)
} else {
Err("unknown encoding name")
}
}
}
impl fmt::Display for Encoding {
#[cold]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(match self {
Self::Utf8 |
Self::Default => "UTF-8",
Self::Latin1 => "ISO-8859-1",
Self::Ascii => "US-ASCII",
Self::Utf16Be |
Self::Utf16Le |
Self::Utf16 => "UTF-16",
Self::Unknown => "(unknown)",
})
}
}
pub(crate) struct CharReader {
pub encoding: Encoding,
}
impl CharReader {
pub const fn new() -> Self {
Self { encoding: Encoding::Unknown }
}
pub fn next_char_from<R: Read>(&mut self, source: &mut R) -> Result<Option<char>, CharReadError> {
let mut bytes = source.bytes();
const MAX_CODEPOINT_LEN: usize = 4;
let mut buf = [0u8; MAX_CODEPOINT_LEN];
let mut pos = 0;
while pos < MAX_CODEPOINT_LEN {
let next = match bytes.next() {
Some(Ok(b)) => b,
Some(Err(e)) => return Err(e.into()),
None if pos == 0 => return Ok(None),
None => return Err(CharReadError::UnexpectedEof),
};
match self.encoding {
Encoding::Utf8 | Encoding::Default => {
// fast path for ASCII subset
if pos == 0 && next.is_ascii() {
return Ok(Some(next.into()));
}
buf[pos] = next;
pos += 1;
match str::from_utf8(&buf[..pos]) {
Ok(s) => return Ok(s.chars().next()), // always Some(..)
Err(_) if pos < MAX_CODEPOINT_LEN => continue,
Err(e) => return Err(e.into()),
}
},
Encoding::Latin1 => {
return Ok(Some(next.into()));
},
Encoding::Ascii => {
return if next.is_ascii() {
Ok(Some(next.into()))
} else {
Err(CharReadError::Io(io::Error::new(io::ErrorKind::InvalidData, "char is not ASCII")))
};
},
Encoding::Unknown | Encoding::Utf16 => {
buf[pos] = next;
pos += 1;
if let Some(value) = self.sniff_bom(&buf[..pos], &mut pos) {
return value;
}
},
Encoding::Utf16Be => {
buf[pos] = next;
pos += 1;
if pos == 2 {
if let Some(Ok(c)) = char::decode_utf16([u16::from_be_bytes(buf[..2].try_into().unwrap())]).next() {
return Ok(Some(c));
}
} else if pos == 4 {
return Self::surrogate([u16::from_be_bytes(buf[..2].try_into().unwrap()), u16::from_be_bytes(buf[2..4].try_into().unwrap())]);
}
},
Encoding::Utf16Le => {
buf[pos] = next;
pos += 1;
if pos == 2 {
if let Some(Ok(c)) = char::decode_utf16([u16::from_le_bytes(buf[..2].try_into().unwrap())]).next() {
return Ok(Some(c));
}
} else if pos == 4 {
return Self::surrogate([u16::from_le_bytes(buf[..2].try_into().unwrap()), u16::from_le_bytes(buf[2..4].try_into().unwrap())]);
}
},
}
}
Err(CharReadError::Io(io::ErrorKind::InvalidData.into()))
}
#[cold]
fn sniff_bom(&mut self, buf: &[u8], pos: &mut usize) -> Option<Result<Option<char>, CharReadError>> {
// sniff BOM
if buf.len() <= 3 && [0xEF, 0xBB, 0xBF].starts_with(buf) {
if buf.len() == 3 && self.encoding != Encoding::Utf16 {
*pos = 0;
self.encoding = Encoding::Utf8;
}
} else if buf.len() <= 2 && [0xFE, 0xFF].starts_with(buf) {
if buf.len() == 2 {
*pos = 0;
self.encoding = Encoding::Utf16Be;
}
} else if buf.len() <= 2 && [0xFF, 0xFE].starts_with(buf) {
if buf.len() == 2 {
*pos = 0;
self.encoding = Encoding::Utf16Le;
}
} else if buf.len() == 1 && self.encoding == Encoding::Utf16 {
// sniff ASCII char in UTF-16
self.encoding = if buf[0] == 0 { Encoding::Utf16Be } else { Encoding::Utf16Le };
} else {
// UTF-8 is the default, but XML decl can change it to other 8-bit encoding
self.encoding = Encoding::Default;
if buf.len() == 1 && buf[0].is_ascii() {
return Some(Ok(Some(buf[0].into())));
}
}
None
}
fn surrogate(buf: [u16; 2]) -> Result<Option<char>, CharReadError> {
char::decode_utf16(buf).next().transpose()
.map_err(|e| CharReadError::Io(io::Error::new(io::ErrorKind::InvalidData, e)))
}
}
#[cfg(test)]
mod tests {
use super::{CharReadError, CharReader, Encoding};
#[test]
fn test_next_char_from() {
use std::io;
let mut bytes: &[u8] = b"correct"; // correct ASCII
assert_eq!(CharReader::new().next_char_from(&mut bytes).unwrap(), Some('c'));
let mut bytes: &[u8] = b"\xEF\xBB\xBF\xE2\x80\xA2!"; // BOM
assert_eq!(CharReader::new().next_char_from(&mut bytes).unwrap(), Some('•'));
let mut bytes: &[u8] = b"\xEF\xBB\xBFx123"; // BOM
assert_eq!(CharReader::new().next_char_from(&mut bytes).unwrap(), Some('x'));
let mut bytes: &[u8] = b"\xEF\xBB\xBF"; // Nothing after BOM
assert_eq!(CharReader::new().next_char_from(&mut bytes).unwrap(), None);
let mut bytes: &[u8] = b"\xEF\xBB"; // Nothing after BO
assert!(matches!(CharReader::new().next_char_from(&mut bytes), Err(CharReadError::UnexpectedEof)));
let mut bytes: &[u8] = b"\xEF\xBB\x42"; // Nothing after BO
assert!(CharReader::new().next_char_from(&mut bytes).is_err());
let mut bytes: &[u8] = b"\xFE\xFF\x00\x42"; // UTF-16
assert_eq!(CharReader::new().next_char_from(&mut bytes).unwrap(), Some('B'));
let mut bytes: &[u8] = b"\xFF\xFE\x42\x00"; // UTF-16
assert_eq!(CharReader::new().next_char_from(&mut bytes).unwrap(), Some('B'));
let mut bytes: &[u8] = b"\xFF\xFE"; // UTF-16
assert_eq!(CharReader::new().next_char_from(&mut bytes).unwrap(), None);
let mut bytes: &[u8] = b"\xFF\xFE\x00"; // UTF-16
assert!(matches!(CharReader::new().next_char_from(&mut bytes), Err(CharReadError::UnexpectedEof)));
let mut bytes: &[u8] = "правильно".as_bytes(); // correct BMP
assert_eq!(CharReader::new().next_char_from(&mut bytes).unwrap(), Some('п'));
let mut bytes: &[u8] = "правильно".as_bytes();
assert_eq!(CharReader { encoding: Encoding::Utf16Be }.next_char_from(&mut bytes).unwrap(), Some('킿'));
let mut bytes: &[u8] = "правильно".as_bytes();
assert_eq!(CharReader { encoding: Encoding::Utf16Le }.next_char_from(&mut bytes).unwrap(), Some('뿐'));
let mut bytes: &[u8] = b"\xD8\xD8\x80";
assert!(CharReader { encoding: Encoding::Utf16 }.next_char_from(&mut bytes).is_err());
let mut bytes: &[u8] = b"\x00\x42";
assert_eq!(CharReader { encoding: Encoding::Utf16 }.next_char_from(&mut bytes).unwrap(), Some('B'));
let mut bytes: &[u8] = b"\x42\x00";
assert_eq!(CharReader { encoding: Encoding::Utf16 }.next_char_from(&mut bytes).unwrap(), Some('B'));
let mut bytes: &[u8] = &[0xEF, 0xBB, 0xBF, 0xFF, 0xFF];
assert!(CharReader { encoding: Encoding::Utf16 }.next_char_from(&mut bytes).is_err());
let mut bytes: &[u8] = b"\x00";
assert!(CharReader { encoding: Encoding::Utf16Be }.next_char_from(&mut bytes).is_err());
let mut bytes: &[u8] = "😊".as_bytes(); // correct non-BMP
assert_eq!(CharReader::new().next_char_from(&mut bytes).unwrap(), Some('😊'));
let mut bytes: &[u8] = b""; // empty
assert_eq!(CharReader::new().next_char_from(&mut bytes).unwrap(), None);
let mut bytes: &[u8] = b"\xf0\x9f\x98"; // incomplete code point
match CharReader::new().next_char_from(&mut bytes).unwrap_err() {
super::CharReadError::UnexpectedEof => {},
e => panic!("Unexpected result: {e:?}")
};
let mut bytes: &[u8] = b"\xff\x9f\x98\x32"; // invalid code point
match CharReader::new().next_char_from(&mut bytes).unwrap_err() {
super::CharReadError::Utf8(_) => {},
e => panic!("Unexpected result: {e:?}")
};
// error during read
struct ErrorReader;
impl io::Read for ErrorReader {
fn read(&mut self, _: &mut [u8]) -> io::Result<usize> {
Err(io::Error::new(io::ErrorKind::Other, "test error"))
}
}
let mut r = ErrorReader;
match CharReader::new().next_char_from(&mut r).unwrap_err() {
super::CharReadError::Io(ref e) if e.kind() == io::ErrorKind::Other &&
e.to_string().contains("test error") => {},
e => panic!("Unexpected result: {e:?}")
}
}
}

95
vendor/xml-rs/src/writer.rs vendored Normal file
View File

@@ -0,0 +1,95 @@
//! Contains high-level interface for an events-based XML emitter.
//!
//! The most important type in this module is `EventWriter` which allows writing an XML document
//! to some output stream.
pub use self::config::EmitterConfig;
pub use self::emitter::EmitterError as Error;
pub use self::emitter::Result;
pub use self::events::XmlEvent;
use self::emitter::Emitter;
use std::io::prelude::*;
mod config;
mod emitter;
pub mod events;
/// A wrapper around an `std::io::Write` instance which emits XML document according to provided
/// events.
pub struct EventWriter<W> {
sink: W,
emitter: Emitter,
}
impl<W: Write> EventWriter<W> {
/// Creates a new `EventWriter` out of an `std::io::Write` instance using the default
/// configuration.
#[inline]
pub fn new(sink: W) -> Self {
Self::new_with_config(sink, EmitterConfig::new())
}
/// Creates a new `EventWriter` out of an `std::io::Write` instance using the provided
/// configuration.
#[inline]
pub fn new_with_config(sink: W, config: EmitterConfig) -> Self {
Self {
sink,
emitter: Emitter::new(config),
}
}
/// Writes the next piece of XML document according to the provided event.
///
/// Note that output data may not exactly correspond to the written event because
/// of various configuration options. For example, `XmlEvent::EndElement` may
/// correspond to a separate closing element or it may cause writing an empty element.
/// Another example is that `XmlEvent::CData` may be represented as characters in
/// the output stream.
pub fn write<'a, E>(&mut self, event: E) -> Result<()> where E: Into<XmlEvent<'a>> {
match event.into() {
XmlEvent::StartDocument { version, encoding, standalone } =>
self.emitter.emit_start_document(&mut self.sink, version, encoding.unwrap_or("UTF-8"), standalone),
XmlEvent::ProcessingInstruction { name, data } =>
self.emitter.emit_processing_instruction(&mut self.sink, name, data),
XmlEvent::StartElement { name, attributes, namespace } => {
self.emitter.namespace_stack_mut().push_empty().checked_target().extend(namespace.as_ref());
self.emitter.emit_start_element(&mut self.sink, name, &attributes)
},
XmlEvent::EndElement { name } => {
let r = self.emitter.emit_end_element(&mut self.sink, name);
self.emitter.namespace_stack_mut().try_pop();
r
},
XmlEvent::Comment(content) => self.emitter.emit_comment(&mut self.sink, content),
XmlEvent::CData(content) => self.emitter.emit_cdata(&mut self.sink, content),
XmlEvent::Characters(content) => self.emitter.emit_characters(&mut self.sink, content),
}
}
/// Returns a mutable reference to the underlying `Writer`.
///
/// Note that having a reference to the underlying sink makes it very easy to emit invalid XML
/// documents. Use this method with care. Valid use cases for this method include accessing
/// methods like `Write::flush`, which do not emit new data but rather change the state
/// of the stream itself.
pub fn inner_mut(&mut self) -> &mut W {
&mut self.sink
}
/// Returns an immutable reference to the underlying `Writer`.
pub fn inner_ref(&self) -> &W {
&self.sink
}
/// Unwraps this `EventWriter`, returning the underlying writer.
///
/// Note that this is a destructive operation: unwrapping a writer and then wrapping
/// it again with `EventWriter::new()` will create a fresh writer whose state will be
/// blank; for example, accumulated namespaces will be reset.
pub fn into_inner(self) -> W {
self.sink
}
}

157
vendor/xml-rs/src/writer/config.rs vendored Normal file
View File

@@ -0,0 +1,157 @@
//! Contains emitter configuration structure.
use crate::writer::EventWriter;
use std::borrow::Cow;
use std::io::Write;
/// Emitter configuration structure.
///
/// This structure contains various options which control XML document emitter behavior.
#[derive(Clone, PartialEq, Eq, Debug)]
pub struct EmitterConfig {
/// Line separator used to separate lines in formatted output. Default is `"\n"`.
pub line_separator: Cow<'static, str>,
/// A string which will be used for a single level of indentation. Default is `" "`
/// (two spaces).
pub indent_string: Cow<'static, str>,
/// Whether or not the emitted document should be indented. Default is false.
///
/// The emitter is capable to perform automatic indentation of the emitted XML document.
/// It is done in stream-like fashion and does not require the knowledge of the whole
/// document in advance.
///
/// Sometimes, however, automatic indentation is undesirable, e.g. when you want to keep
/// existing layout when processing an existing XML document. Also the indentiation algorithm
/// is not thoroughly tested. Hence by default it is disabled.
pub perform_indent: bool,
/// Whether or not characters in output events will be escaped. Default is true.
///
/// The emitter can automatically escape characters which can't appear in PCDATA sections
/// or element attributes of an XML document, like `<` or `"` (in attributes). This may
/// introduce some overhead because then every corresponding piece of character data
/// should be scanned for invalid characters.
///
/// If this option is disabled, the XML writer may produce non-well-formed documents, so
/// use `false` value for this option with care.
pub perform_escaping: bool,
/// Whether or not to write XML document declaration at the beginning of a document.
/// Default is true.
///
/// This option controls whether the document declaration should be emitted automatically
/// before a root element is written if it was not emitted explicitly by the user.
pub write_document_declaration: bool,
/// Whether or not to convert elements with empty content to empty elements. Default is true.
///
/// This option allows turning elements like `<a></a>` (an element with empty content)
/// into `<a />` (an empty element).
pub normalize_empty_elements: bool,
/// Whether or not to emit CDATA events as plain characters. Default is false.
///
/// This option forces the emitter to convert CDATA events into regular character events,
/// performing all the necessary escaping beforehand. This may be occasionally useful
/// for feeding the document into incorrect parsers which do not support CDATA.
pub cdata_to_characters: bool,
/// Whether or not to keep element names to support `EndElement` events without explicit names.
/// Default is true.
///
/// This option makes the emitter to keep names of written elements in order to allow
/// omitting names when writing closing element tags. This could incur some memory overhead.
pub keep_element_names_stack: bool,
/// Whether or not to automatically insert leading and trailing spaces in emitted comments,
/// if necessary. Default is true.
///
/// This is a convenience option in order for the user not to append spaces before and after
/// comments text in order to get more pretty comments: `<!-- something -->` instead of
/// `<!--something-->`.
pub autopad_comments: bool,
/// Whether or not to automatically insert spaces before the trailing `/>` in self-closing
/// elements. Default is true.
///
/// This option is only meaningful if `normalize_empty_elements` is true. For example, the
/// element `<a></a>` would be unaffected. When `normalize_empty_elements` is true, then when
/// this option is also true, the same element would appear `<a />`. If this option is false,
/// then the same element would appear `<a/>`.
pub pad_self_closing: bool,
}
impl EmitterConfig {
/// Creates an emitter configuration with default values.
///
/// You can tweak default options with builder-like pattern:
///
/// ```rust
/// use xml::writer::EmitterConfig;
///
/// let config = EmitterConfig::new()
/// .line_separator("\r\n")
/// .perform_indent(true)
/// .normalize_empty_elements(false);
/// ```
#[inline]
#[must_use]
pub fn new() -> Self {
Self {
line_separator: "\n".into(),
indent_string: " ".into(), // two spaces
perform_indent: false,
perform_escaping: true,
write_document_declaration: true,
normalize_empty_elements: true,
cdata_to_characters: false,
keep_element_names_stack: true,
autopad_comments: true,
pad_self_closing: true,
}
}
/// Creates an XML writer with this configuration.
///
/// This is a convenience method for configuring and creating a writer at the same time:
///
/// ```rust
/// use xml::writer::EmitterConfig;
///
/// let mut target: Vec<u8> = Vec::new();
///
/// let writer = EmitterConfig::new()
/// .line_separator("\r\n")
/// .perform_indent(true)
/// .normalize_empty_elements(false)
/// .create_writer(&mut target);
/// ```
///
/// This method is exactly equivalent to calling `EventWriter::new_with_config()` with
/// this configuration object.
#[inline]
pub fn create_writer<W: Write>(self, sink: W) -> EventWriter<W> {
EventWriter::new_with_config(sink, self)
}
}
impl Default for EmitterConfig {
#[inline]
fn default() -> Self {
Self::new()
}
}
gen_setters!(EmitterConfig,
line_separator: into Cow<'static, str>,
indent_string: into Cow<'static, str>,
perform_indent: val bool,
write_document_declaration: val bool,
normalize_empty_elements: val bool,
cdata_to_characters: val bool,
keep_element_names_stack: val bool,
autopad_comments: val bool,
pad_self_closing: val bool
);

438
vendor/xml-rs/src/writer/emitter.rs vendored Normal file
View File

@@ -0,0 +1,438 @@
use std::error::Error;
use std::io::prelude::*;
use std::{fmt, io, result};
use crate::attribute::Attribute;
use crate::common;
use crate::common::XmlVersion;
use crate::escape::{AttributeEscapes, Escaped, PcDataEscapes};
use crate::name::{Name, OwnedName};
use crate::namespace::{NamespaceStack, NS_EMPTY_URI, NS_NO_PREFIX, NS_XMLNS_PREFIX, NS_XML_PREFIX};
use crate::writer::config::EmitterConfig;
/// An error which may be returned by `XmlWriter` when writing XML events.
#[derive(Debug)]
pub enum EmitterError {
/// An I/O error occured in the underlying `Write` instance.
Io(io::Error),
/// Document declaration has already been written to the output stream.
DocumentStartAlreadyEmitted,
/// The name of the last opening element is not available.
LastElementNameNotAvailable,
/// The name of the last opening element is not equal to the name of the provided
/// closing element.
EndElementNameIsNotEqualToLastStartElementName,
/// End element name is not specified when it is needed, for example, when automatic
/// closing is not enabled in configuration.
EndElementNameIsNotSpecified,
}
impl From<io::Error> for EmitterError {
#[cold]
fn from(err: io::Error) -> Self {
Self::Io(err)
}
}
impl fmt::Display for EmitterError {
#[cold]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str("emitter error: ")?;
match self {
Self::Io(e) => write!(f, "I/O error: {e}"),
Self::DocumentStartAlreadyEmitted => f.write_str("document start event has already been emitted"),
Self::LastElementNameNotAvailable => f.write_str("last element name is not available"),
Self::EndElementNameIsNotEqualToLastStartElementName => f.write_str("end element name is not equal to last start element name"),
Self::EndElementNameIsNotSpecified => f.write_str("end element name is not specified and can't be inferred"),
}
}
}
impl Error for EmitterError {
}
/// A result type yielded by `XmlWriter`.
pub type Result<T, E = EmitterError> = result::Result<T, E>;
// TODO: split into a low-level fast writer without any checks and formatting logic and a
// high-level indenting validating writer
pub struct Emitter {
config: EmitterConfig,
nst: NamespaceStack,
indent_level: usize,
indent_stack: Vec<IndentFlags>,
element_names: Vec<OwnedName>,
start_document_emitted: bool,
just_wrote_start_element: bool,
}
impl Emitter {
pub fn new(config: EmitterConfig) -> Self {
let mut indent_stack = Vec::with_capacity(16);
indent_stack.push(IndentFlags::WroteNothing);
Self {
config,
nst: NamespaceStack::empty(),
indent_level: 0,
indent_stack,
element_names: Vec::new(),
start_document_emitted: false,
just_wrote_start_element: false,
}
}
}
#[derive(Copy, Clone, Eq, PartialEq, Debug)]
enum IndentFlags {
WroteNothing,
WroteMarkup,
WroteText,
}
impl Emitter {
/// Returns the current state of namespaces.
#[inline]
pub fn namespace_stack_mut(&mut self) -> &mut NamespaceStack {
&mut self.nst
}
#[inline]
fn wrote_text(&self) -> bool {
self.indent_stack.last().map_or(false, |&e| e == IndentFlags::WroteText)
}
#[inline]
fn wrote_markup(&self) -> bool {
self.indent_stack.last().map_or(false, |&e| e == IndentFlags::WroteMarkup)
}
#[inline]
fn set_wrote_text(&mut self) {
if let Some(e) = self.indent_stack.last_mut() {
*e = IndentFlags::WroteText;
}
}
#[inline]
fn set_wrote_markup(&mut self) {
if let Some(e) = self.indent_stack.last_mut() {
*e = IndentFlags::WroteMarkup;
}
}
fn write_newline<W: Write>(&self, target: &mut W, level: usize) -> Result<()> {
target.write_all(self.config.line_separator.as_bytes())?;
for _ in 0..level {
target.write_all(self.config.indent_string.as_bytes())?;
}
Ok(())
}
fn before_markup<W: Write>(&mut self, target: &mut W) -> Result<()> {
if self.config.perform_indent && !self.wrote_text() &&
(self.indent_level > 0 || self.wrote_markup()) {
let indent_level = self.indent_level;
self.write_newline(target, indent_level)?;
if self.indent_level > 0 && self.config.indent_string.len() > 0 {
self.after_markup();
}
}
Ok(())
}
fn after_markup(&mut self) {
self.set_wrote_markup();
}
fn before_start_element<W: Write>(&mut self, target: &mut W) -> Result<()> {
self.before_markup(target)?;
self.indent_stack.push(IndentFlags::WroteNothing);
Ok(())
}
fn after_start_element(&mut self) {
self.after_markup();
self.indent_level += 1;
}
fn before_end_element<W: Write>(&mut self, target: &mut W) -> Result<()> {
if self.config.perform_indent && self.indent_level > 0 && self.wrote_markup() &&
!self.wrote_text() {
let indent_level = self.indent_level;
self.write_newline(target, indent_level - 1)
} else {
Ok(())
}
}
fn after_end_element(&mut self) {
if self.indent_level > 0 {
self.indent_level -= 1;
self.indent_stack.pop();
}
self.set_wrote_markup();
}
fn after_text(&mut self) {
self.set_wrote_text();
}
pub fn emit_start_document<W: Write>(&mut self, target: &mut W,
version: XmlVersion,
encoding: &str,
standalone: Option<bool>) -> Result<()> {
if self.start_document_emitted {
return Err(EmitterError::DocumentStartAlreadyEmitted);
}
self.start_document_emitted = true;
self.before_markup(target)?;
let result = {
let mut write = move || {
write!(target, "<?xml version=\"{version}\" encoding=\"{encoding}\"")?;
if let Some(standalone) = standalone {
write!(target, " standalone=\"{}\"", if standalone { "yes" } else { "no" })?;
}
write!(target, "?>")?;
Ok(())
};
write()
};
self.after_markup();
result
}
fn check_document_started<W: Write>(&mut self, target: &mut W) -> Result<()> {
if !self.start_document_emitted && self.config.write_document_declaration {
self.emit_start_document(target, common::XmlVersion::Version10, "UTF-8", None)
} else {
Ok(())
}
}
fn fix_non_empty_element<W: Write>(&mut self, target: &mut W) -> Result<()> {
if self.config.normalize_empty_elements && self.just_wrote_start_element {
self.just_wrote_start_element = false;
target.write_all(b">").map_err(From::from)
} else {
Ok(())
}
}
pub fn emit_processing_instruction<W: Write>(&mut self,
target: &mut W,
name: &str,
data: Option<&str>) -> Result<()> {
self.check_document_started(target)?;
self.fix_non_empty_element(target)?;
self.before_markup(target)?;
let result = {
let mut write = move || {
write!(target, "<?{name}")?;
if let Some(data) = data {
write!(target, " {data}")?;
}
write!(target, "?>")?;
Ok(())
};
write()
};
self.after_markup();
result
}
#[track_caller]
fn emit_start_element_initial<W>(&mut self, target: &mut W,
name: Name<'_>,
attributes: &[Attribute<'_>]) -> Result<()>
where W: Write
{
self.check_document_started(target)?;
self.fix_non_empty_element(target)?;
self.before_start_element(target)?;
write!(target, "<{}", name.repr_display())?;
self.emit_current_namespace_attributes(target)?;
self.emit_attributes(target, attributes)?;
self.after_start_element();
Ok(())
}
#[track_caller]
pub fn emit_start_element<W>(&mut self, target: &mut W,
name: Name<'_>,
attributes: &[Attribute<'_>]) -> Result<()>
where W: Write
{
if self.config.keep_element_names_stack {
self.element_names.push(name.to_owned());
}
self.emit_start_element_initial(target, name, attributes)?;
self.just_wrote_start_element = true;
if !self.config.normalize_empty_elements {
write!(target, ">")?;
}
Ok(())
}
#[track_caller]
pub fn emit_current_namespace_attributes<W>(&self, target: &mut W) -> Result<()>
where W: Write
{
for (prefix, uri) in self.nst.peek() {
match prefix {
// internal namespaces are not emitted
NS_XMLNS_PREFIX | NS_XML_PREFIX => Ok(()),
//// there is already a namespace binding with this prefix in scope
//prefix if self.nst.get(prefix) == Some(uri) => Ok(()),
// emit xmlns only if it is overridden
NS_NO_PREFIX => if uri != NS_EMPTY_URI {
write!(target, " xmlns=\"{uri}\"")
} else { Ok(()) },
// everything else
prefix => write!(target, " xmlns:{prefix}=\"{uri}\""),
}?;
}
Ok(())
}
pub fn emit_attributes<W: Write>(&self, target: &mut W,
attributes: &[Attribute<'_>]) -> Result<()> {
for attr in attributes {
write!(target, " {}=\"", attr.name.repr_display())?;
if self.config.perform_escaping {
write!(target, "{}", Escaped::<AttributeEscapes>::new(attr.value))?;
} else {
write!(target, "{}", attr.value)?;
}
write!(target, "\"")?;
}
Ok(())
}
pub fn emit_end_element<W: Write>(&mut self, target: &mut W,
name: Option<Name<'_>>) -> Result<()> {
let owned_name = if self.config.keep_element_names_stack {
Some(self.element_names.pop().ok_or(EmitterError::LastElementNameNotAvailable)?)
} else {
None
};
// Check that last started element name equals to the provided name, if there are both
if let Some(ref last_name) = owned_name {
if let Some(ref name) = name {
if last_name.borrow() != *name {
return Err(EmitterError::EndElementNameIsNotEqualToLastStartElementName);
}
}
}
if let Some(name) = owned_name.as_ref().map(|n| n.borrow()).or(name) {
if self.config.normalize_empty_elements && self.just_wrote_start_element {
self.just_wrote_start_element = false;
let termination = if self.config.pad_self_closing { " />" } else { "/>" };
let result = target.write_all(termination.as_bytes()).map_err(From::from);
self.after_end_element();
result
} else {
self.just_wrote_start_element = false;
self.before_end_element(target)?;
let result = write!(target, "</{}>", name.repr_display()).map_err(From::from);
self.after_end_element();
result
}
} else {
Err(EmitterError::EndElementNameIsNotSpecified)
}
}
pub fn emit_cdata<W: Write>(&mut self, target: &mut W, content: &str) -> Result<()> {
self.fix_non_empty_element(target)?;
if self.config.cdata_to_characters {
self.emit_characters(target, content)
} else {
// TODO: escape ']]>' characters in CDATA as two adjacent CDATA blocks
target.write_all(b"<![CDATA[")?;
target.write_all(content.as_bytes())?;
target.write_all(b"]]>")?;
self.after_text();
Ok(())
}
}
pub fn emit_characters<W: Write>(&mut self, target: &mut W, content: &str) -> Result<()> {
self.check_document_started(target)?;
self.fix_non_empty_element(target)?;
if self.config.perform_escaping {
write!(target, "{}", Escaped::<PcDataEscapes>::new(content))?;
} else {
target.write_all(content.as_bytes())?;
}
self.after_text();
Ok(())
}
pub fn emit_comment<W: Write>(&mut self, target: &mut W, content: &str) -> Result<()> {
self.fix_non_empty_element(target)?;
// TODO: add escaping dashes at the end of the comment
let autopad_comments = self.config.autopad_comments;
let write = move |target: &mut W| -> Result<()> {
target.write_all(b"<!--")?;
if autopad_comments && !content.starts_with(char::is_whitespace) {
target.write_all(b" ")?;
}
target.write_all(content.as_bytes())?;
if autopad_comments && !content.ends_with(char::is_whitespace) {
target.write_all(b" ")?;
}
target.write_all(b"-->")?;
Ok(())
};
self.before_markup(target)?;
let result = write(target);
self.after_markup();
result
}
}

268
vendor/xml-rs/src/writer/events.rs vendored Normal file
View File

@@ -0,0 +1,268 @@
//! Contains `XmlEvent` datatype, instances of which are consumed by the writer.
use std::borrow::Cow;
use crate::attribute::Attribute;
use crate::common::XmlVersion;
use crate::name::Name;
use crate::namespace::{Namespace, NS_NO_PREFIX};
/// A part of an XML output stream.
///
/// Objects of this enum are consumed by `EventWriter`. They correspond to different parts of
/// an XML document.
#[derive(Debug, Clone)]
pub enum XmlEvent<'a> {
/// Corresponds to XML document declaration.
///
/// This event should always be written before any other event. If it is not written
/// at all, a default XML declaration will be outputted if the corresponding option
/// is set in the configuration. Otherwise an error will be returned.
StartDocument {
/// XML version.
///
/// Defaults to `XmlVersion::Version10`.
version: XmlVersion,
/// XML document encoding.
///
/// Defaults to `Some("UTF-8")`.
encoding: Option<&'a str>,
/// XML standalone declaration.
///
/// Defaults to `None`.
standalone: Option<bool>,
},
/// Denotes an XML processing instruction.
ProcessingInstruction {
/// Processing instruction target.
name: &'a str,
/// Processing instruction content.
data: Option<&'a str>,
},
/// Denotes a beginning of an XML element.
StartElement {
/// Qualified name of the element.
name: Name<'a>,
/// A list of attributes associated with the element.
///
/// Currently attributes are not checked for duplicates (TODO). Attribute values
/// will be escaped, and all characters invalid for attribute values like `"` or `<`
/// will be changed into character entities.
attributes: Cow<'a, [Attribute<'a>]>,
/// Contents of the namespace mapping at this point of the document.
///
/// This mapping will be inspected for "new" entries, and if at this point of the document
/// a particular pair of prefix and namespace URI is already defined, no namespace
/// attributes will be emitted.
namespace: Cow<'a, Namespace>,
},
/// Denotes an end of an XML element.
EndElement {
/// Optional qualified name of the element.
///
/// If `None`, then it is assumed that the element name should be the last valid one.
/// If `Some` and element names tracking is enabled, then the writer will check it for
/// correctness.
name: Option<Name<'a>>,
},
/// Denotes CDATA content.
///
/// This event contains unparsed data, and no escaping will be performed when writing it
/// to the output stream.
CData(&'a str),
/// Denotes a comment.
///
/// The string will be checked for invalid sequences and error will be returned by the
/// write operation
Comment(&'a str),
/// Denotes character data outside of tags.
///
/// Contents of this event will be escaped if `perform_escaping` option is enabled,
/// that is, every character invalid for PCDATA will appear as a character entity.
Characters(&'a str),
}
impl<'a> XmlEvent<'a> {
/// Returns an writer event for a processing instruction.
#[inline]
#[must_use]
pub const fn processing_instruction(name: &'a str, data: Option<&'a str>) -> Self {
XmlEvent::ProcessingInstruction { name, data }
}
/// Returns a builder for a starting element.
///
/// This builder can then be used to tweak attributes and namespace starting at
/// this element.
#[inline]
pub fn start_element<S>(name: S) -> StartElementBuilder<'a> where S: Into<Name<'a>> {
StartElementBuilder {
name: name.into(),
attributes: Vec::new(),
namespace: Namespace::empty(),
}
}
/// Returns a builder for an closing element.
///
/// This method, unline `start_element()`, does not accept a name because by default
/// the writer is able to determine it automatically. However, when this functionality
/// is disabled, it is possible to specify the name with `name()` method on the builder.
#[inline]
#[must_use]
pub const fn end_element() -> EndElementBuilder<'a> {
EndElementBuilder { name: None }
}
/// Returns a CDATA event.
///
/// Naturally, the provided string won't be escaped, except for closing CDATA token `]]>`
/// (depending on the configuration).
#[inline]
#[must_use]
pub const fn cdata(data: &'a str) -> Self {
XmlEvent::CData(data)
}
/// Returns a regular characters (PCDATA) event.
///
/// All offending symbols, in particular, `&` and `<`, will be escaped by the writer.
#[inline]
#[must_use]
pub const fn characters(data: &'a str) -> Self {
XmlEvent::Characters(data)
}
/// Returns a comment event.
#[inline]
#[must_use]
pub const fn comment(data: &'a str) -> Self {
XmlEvent::Comment(data)
}
}
impl<'a> From<&'a str> for XmlEvent<'a> {
#[inline]
fn from(s: &'a str) -> Self {
XmlEvent::Characters(s)
}
}
/// A builder for a closing element event.
pub struct EndElementBuilder<'a> {
name: Option<Name<'a>>,
}
/// A builder for a closing element event.
impl<'a> EndElementBuilder<'a> {
/// Sets the name of this closing element.
///
/// Usually the writer is able to determine closing element names automatically. If
/// this functionality is enabled (by default it is), then this name is checked for correctness.
/// It is possible, however, to disable such behavior; then the user must ensure that
/// closing element name is correct manually.
#[inline]
#[must_use]
pub fn name<N>(mut self, name: N) -> Self where N: Into<Name<'a>> {
self.name = Some(name.into());
self
}
}
impl<'a> From<EndElementBuilder<'a>> for XmlEvent<'a> {
fn from(b: EndElementBuilder<'a>) -> Self {
XmlEvent::EndElement { name: b.name }
}
}
/// A builder for a starting element event.
pub struct StartElementBuilder<'a> {
name: Name<'a>,
attributes: Vec<Attribute<'a>>,
namespace: Namespace,
}
impl<'a> StartElementBuilder<'a> {
/// Sets an attribute value of this element to the given string.
///
/// This method can be used to add attributes to the starting element. Name is a qualified
/// name; its namespace is ignored, but its prefix is checked for correctness, that is,
/// it is checked that the prefix is bound to some namespace in the current context.
///
/// Currently attributes are not checked for duplicates. Note that duplicate attributes
/// are a violation of XML document well-formedness.
///
/// The writer checks that you don't specify reserved prefix names, for example `xmlns`.
#[inline]
#[must_use]
pub fn attr<N>(mut self, name: N, value: &'a str) -> Self
where N: Into<Name<'a>> {
self.attributes.push(Attribute::new(name.into(), value));
self
}
/// Adds a namespace to the current namespace context.
///
/// If no namespace URI was bound to the provided prefix at this point of the document,
/// then the mapping from the prefix to the provided namespace URI will be written as
/// a part of this element attribute set.
///
/// If the same namespace URI was bound to the provided prefix at this point of the document,
/// then no namespace attributes will be emitted.
///
/// If some other namespace URI was bound to the provided prefix at this point of the document,
/// then another binding will be added as a part of this element attribute set, shadowing
/// the outer binding.
#[inline]
#[must_use]
pub fn ns<S1, S2>(mut self, prefix: S1, uri: S2) -> Self
where S1: Into<String>, S2: Into<String>
{
self.namespace.put(prefix, uri);
self
}
/// Adds a default namespace mapping to the current namespace context.
///
/// Same rules as for `ns()` are also valid for the default namespace mapping.
#[inline]
#[must_use]
pub fn default_ns<S>(mut self, uri: S) -> Self
where S: Into<String> {
self.namespace.put(NS_NO_PREFIX, uri);
self
}
}
impl<'a> From<StartElementBuilder<'a>> for XmlEvent<'a> {
#[inline]
fn from(b: StartElementBuilder<'a>) -> Self {
XmlEvent::StartElement {
name: b.name,
attributes: Cow::Owned(b.attributes),
namespace: Cow::Owned(b.namespace),
}
}
}
impl<'a> TryFrom<&'a crate::reader::XmlEvent> for XmlEvent<'a> {
type Error = crate::reader::Error;
fn try_from(event: &crate::reader::XmlEvent) -> Result<XmlEvent<'_>, Self::Error> {
event.as_writer_event().ok_or(crate::reader::Error {
pos: crate::common::TextPosition::new(),
kind: crate::reader::ErrorKind::UnexpectedEof,
})
}
}