diff --git a/xml5ever/src/tokenizer/states.rs b/xml5ever/src/tokenizer/states.rs index 001f5cdf..5e9a19a4 100644 --- a/xml5ever/src/tokenizer/states.rs +++ b/xml5ever/src/tokenizer/states.rs @@ -8,73 +8,168 @@ // except according to those terms. //! Tokenizer states. -//! -//! This is public for use by the tokenizer tests. Other library -//! users should not have to care about this. - -#![allow(missing_docs)] // FIXME pub use AttrValueKind::*; pub use DoctypeKind::*; pub use XmlState::*; +/// Specifies either the public or system identifier from a [Document Type Declaration] (DTD). +/// +/// [Document Type Declaration]: https://en.wikipedia.org/wiki/Document_type_declaration #[derive(PartialEq, Eq, PartialOrd, Ord, Copy, Clone, Hash, Debug)] pub enum DoctypeKind { + /// The public identifier. Public, + /// The system identifier. System, } +/// Specifies the different states a XML tokenizer will assume during parsing. #[derive(PartialEq, Eq, PartialOrd, Ord, Copy, Clone, Hash, Debug)] pub enum XmlState { + /// The initial state of the parser. + /// + /// It is equivalent to the [`Data`](https://html.spec.whatwg.org/#data-state) state of the html parser, + /// except null codepoints do not cause errors. Data, + /// Indicates that the parser has found a `<` character and will try to parse a tag. TagState, + /// Indicates that the parser has consumed the `/` of a closing tag, like ``. EndTagState, + /// Indicates that the parser is currently parsing the name of a closing tag, like the `foo` of ``. EndTagName, + /// Indicates that the parser has finished parsing the name of a closing tag and expects a `>` to follow. EndTagNameAfter, + /// Indicates that the parser has started parsing a [processing instruction] (PI). + /// + /// This state is reached after the initial `?` character has been consumed. + /// + /// [processing instruction]: https://en.wikipedia.org/wiki/Processing_Instruction Pi, + /// Indicates that the parser is currently parsing the target of a [processing instruction]. + /// + /// For example, the target of `` is `xml-stylesheet`. + /// + /// [processing instruction]: https://en.wikipedia.org/wiki/Processing_Instruction PiTarget, + /// Indicates that the parser has finished parsing the target of a [processing instruction]. + /// + /// [processing instruction]: https://en.wikipedia.org/wiki/Processing_Instruction PiTargetAfter, + /// Indicates that the parser is currently parsing the data of a [processing instruction]. + /// + /// The "data" refers to everything between the target and the closing `?` character. + /// + /// [processing instruction]: https://en.wikipedia.org/wiki/Processing_Instruction PiData, + /// Indicates that the parser has parsed the closing `?` of a [processing instruction]. + /// + /// [processing instruction]: https://en.wikipedia.org/wiki/Processing_Instruction PiAfter, + /// Indicates that the parser has parsed the initial `!` of a markup declaration. + /// + /// Examples of such declarations include `` or ``. MarkupDecl, + /// Indicates that the parser has parsed the start of a comment (``). CommentEnd, + /// Indicates that the parser has parsed a `-` character within a comment which may or may not + /// be the beginning of the comment end (`-->`). CommentEndDash, + /// Indicates that the parser has parsed `--!` within a comment which may or may not be part of the + /// end of the comment. Comments in XML can be closed with `--!>`. CommentEndBang, + /// Indicates that the parser has parsed the beginning of a CDATA section (``). CdataBracket, + /// Indicates that the parser has parsed two `]` characters within a CDATA section, which may be part of + /// the end of the section (`]]>`). CdataEnd, + /// Indicates that the parser is currently parsing the name of a tag, such as `foo` in ``. TagName, + /// Indicates that the parser has parsed the `/` of a self-closing tag, such as ``. TagEmpty, + /// Indicates that the parser has finished parsing the name of a tag and is now expecting either attributes or + /// a `>`. TagAttrNameBefore, + /// Indicates that the parser is currently parsing the name of an attribute within a tag, such as + /// `bar` in ``. TagAttrName, + /// Indicates that the parser has finished parsing the name of an attribute. TagAttrNameAfter, + /// Indicates that the parser is about to parse the value of an attribute. TagAttrValueBefore, + /// Indicates that the parser is currently parsing the value of an attribute, such as `baz` in + /// ``. + /// + /// Includes information about how the value is quoted, because the quotes before and after the attribute + /// value need to match. TagAttrValue(AttrValueKind), + /// Indicates that the parser has parsed the beginning of a document type definition (``. DoctypeName, + /// Indicates that the parser has finished parsing the name of the document type definition and now optionally + /// expects either a public or a system identifier. AfterDoctypeName, + /// Indicates that the parser has parsed a keyword for either a public or system identifier (`PUBLIC` or `SYSTEM`). AfterDoctypeKeyword(DoctypeKind), + /// Indicates that the parser is about to parse the value of a public or system identifier within + /// a document type definition, such as `foo` in + /// ``. BeforeDoctypeIdentifier(DoctypeKind), + /// Indicates that the parser is currently parsing the value of a public or system identifier + /// that is surrounded by double quotes , such as `foo` in + /// ``. DoctypeIdentifierDoubleQuoted(DoctypeKind), + /// Indicates that the parser is currently parsing the value of a public or system identifier + /// that is surrounded by single quotes , such as `foo` in + /// ``. DoctypeIdentifierSingleQuoted(DoctypeKind), + /// Indicates that the parser has finished parsing either a public or system identifier within a + /// document type definition. AfterDoctypeIdentifier(DoctypeKind), + /// Indicates that the parser has finished parsing a public identifier and now expects + /// a system identifier. BetweenDoctypePublicAndSystemIdentifiers, + /// Indicates that the parser is currently parsing an ill-formed document type defintion, such as + /// ``. BogusDoctype, + /// Indicates that the parser is currently parsing an ill-formed comment, such as + /// ``. BogusComment, + /// Interrupts the tokenizer for one single call to `step`. + /// + /// It is unclear whether this is still necessary ([#649](https://github.com/servo/html5ever/issues/649)). Quiescent, } +/// Specifies how an attribute value is quoted, if at all. #[derive(PartialEq, Eq, PartialOrd, Ord, Copy, Clone, Hash, Debug)] pub enum AttrValueKind { + /// A attribute value that is not surrounded by quotes, like `bar` in `foo=bar`. Unquoted, + /// A attribute value that is not surrounded by quotes, like `bar` in `foo='bar'`. SingleQuoted, + /// A attribute value that is not surrounded by quotes, like `bar` in `foo="bar"`. DoubleQuoted, }