Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion html5ever/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ trace_tokenizer = []

[dependencies]
log = "0.4"
mac = "0.1"
markup5ever = { version = "0.17", path = "../markup5ever" }
match_token = { workspace = true }

Expand Down
3 changes: 0 additions & 3 deletions html5ever/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,6 @@ pub use markup5ever::*;

pub use serialize::serialize;

#[macro_use]
mod macros;

mod util {
pub(crate) mod str;
}
Expand Down
28 changes: 14 additions & 14 deletions html5ever/src/tokenizer/char_ref/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@ use crate::data;
use crate::tendril::StrTendril;

use log::debug;
use mac::format_if;
use std::borrow::Cow::Borrowed;
use std::borrow::Cow::{self, Borrowed};
use std::char::from_u32;

use self::State::*;
Expand Down Expand Up @@ -257,12 +256,14 @@ impl CharRefTokenizer {
};

if error {
let msg = format_if!(
tokenizer.opts.exact_errors,
"Invalid numeric character reference",
"Invalid numeric character reference value 0x{:06X}",
self.num
);
let msg = if tokenizer.opts.exact_errors {
Cow::from(format!(
"Invalid numeric character reference value 0x{:06X}",
self.num
))
} else {
Cow::from("Invalid numeric character reference")
};
tokenizer.emit_error(msg);
}

Expand Down Expand Up @@ -299,12 +300,11 @@ impl CharRefTokenizer {
}

fn emit_name_error<Sink: TokenSink>(&mut self, tokenizer: &Tokenizer<Sink>) {
let msg = format_if!(
tokenizer.opts.exact_errors,
"Invalid character reference",
"Invalid character reference &{}",
self.name_buf()
);
let msg = if tokenizer.opts.exact_errors {
Cow::from(format!("Invalid character reference &{}", self.name_buf()))
} else {
Cow::from("Invalid character reference")
};
tokenizer.emit_error(msg);
}

Expand Down
29 changes: 14 additions & 15 deletions html5ever/src/tokenizer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,7 @@ use self::char_ref::{CharRef, CharRefTokenizer};
use crate::util::str::lower_ascii_letter;

use log::{debug, trace};
use mac::format_if;
use markup5ever::{ns, small_char_set, TokenizerResult};
use markup5ever::{ns, small_char_set, time, unwrap_or_return, TokenizerResult};
use std::borrow::Cow::{self, Borrowed};
use std::cell::{Cell, RefCell, RefMut};
use std::collections::BTreeMap;
Expand Down Expand Up @@ -376,13 +375,13 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
#[cfg(feature = "trace_tokenizer")]
trace!(" error");

let msg = format_if!(
self.opts.exact_errors,
"Bad character",
"Saw {} in state {:?}",
self.current_char.get(),
self.state.get()
);
let msg = if self.opts.exact_errors {
Cow::from("Bad character")
} else {
let c = self.current_char.get();
let state = self.state.get();
Cow::from(format!("Saw {c} in state {state:?}"))
};
self.emit_error(msg);
}

Expand All @@ -391,12 +390,12 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
#[cfg(feature = "trace_tokenizer")]
trace!(" error_eof");

let msg = format_if!(
self.opts.exact_errors,
"Unexpected EOF",
"Saw EOF in state {:?}",
self.state.get()
);
let msg = if self.opts.exact_errors {
Cow::from("Unexpected EOF")
} else {
let state = self.state.get();
Cow::from(format!("Saw EOF in state {state:?}"))
};
self.emit_error(msg);
}

Expand Down
97 changes: 46 additions & 51 deletions html5ever/src/tree_builder/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use crate::tokenizer;
use crate::tokenizer::states as tok_state;
use crate::tokenizer::{Doctype, EndTag, StartTag, Tag, TokenSink, TokenSinkResult};

use std::borrow::Cow::Borrowed;
use std::borrow::Cow::{self, Borrowed};
use std::cell::{Cell, Ref, RefCell};
use std::collections::VecDeque;
use std::iter::{Enumerate, Rev};
Expand All @@ -32,7 +32,6 @@ use crate::tokenizer::states::RawKind;
use crate::tree_builder::tag_sets::*;
use crate::util::str::to_escaped_string;
use log::{debug, log_enabled, warn, Level};
use mac::format_if;
use markup5ever::{expanded_name, local_name, namespace_prefix, ns};

#[macro_use]
Expand Down Expand Up @@ -488,12 +487,11 @@ where
if self.mode.get() == InsertionMode::Initial {
let (err, quirk) = data::doctype_error_and_quirks(&dt, self.opts.iframe_srcdoc);
if err {
self.sink.parse_error(format_if!(
self.opts.exact_errors,
"Bad DOCTYPE",
"Bad DOCTYPE: {:?}",
dt
));
self.sink.parse_error(if self.opts.exact_errors {
Cow::from(format!("Bad DOCTYPE: {dt:?}"))
} else {
Cow::from("Bad DOCTYPE")
});
}
let Doctype {
name,
Expand All @@ -513,12 +511,11 @@ where
self.mode.set(InsertionMode::BeforeHtml);
return tokenizer::TokenSinkResult::Continue;
} else {
self.sink.parse_error(format_if!(
self.opts.exact_errors,
"DOCTYPE in body",
"DOCTYPE in insertion mode {:?}",
self.mode.get()
));
self.sink.parse_error(if self.opts.exact_errors {
Cow::from(format!("DOCTYPE in insertion mode {:?}", self.mode.get()))
} else {
Cow::from("DOCTYPE in body")
});
return tokenizer::TokenSinkResult::Continue;
}
},
Expand Down Expand Up @@ -618,13 +615,15 @@ where
Sink: TreeSink<Handle = Handle>,
{
fn unexpected<T: fmt::Debug>(&self, _thing: &T) -> ProcessResult<Handle> {
self.sink.parse_error(format_if!(
self.opts.exact_errors,
"Unexpected token",
"Unexpected token {} in insertion mode {:?}",
to_escaped_string(_thing),
self.mode.get()
));
self.sink.parse_error(if self.opts.exact_errors {
Cow::from(format!(
"Unexpected token {} in insertion mode {:?}",
to_escaped_string(_thing),
self.mode.get()
))
} else {
Cow::from("Unexpected token")
});
ProcessResult::Done
}

Expand Down Expand Up @@ -1053,20 +1052,19 @@ where
"thead" "tr" "body" "html");

for elem in self.open_elems.borrow().iter() {
let error;
{
let error = {
let elem_name = self.sink.elem_name(elem);
let name = elem_name.expanded();
if body_end_ok(name) {
continue;
}
error = format_if!(
self.opts.exact_errors,
"Unexpected open tag at end of body",
"Unexpected open tag {:?} at end of body",
name
);
}

if self.opts.exact_errors {
Cow::from(format!("Unexpected open tag {name:?} at end of body"))
} else {
Cow::from("Unexpected open tag at end of body")
}
};
self.sink.parse_error(error);
// FIXME: Do we keep checking after finding one bad tag?
// The spec suggests not.
Expand Down Expand Up @@ -1193,12 +1191,11 @@ where
/// Signal an error if it was not the first one.
fn expect_to_close(&self, name: LocalName) {
if self.pop_until_named(name.clone()) != 1 {
self.sink.parse_error(format_if!(
self.opts.exact_errors,
"Unexpected open element",
"Unexpected open element while closing {:?}",
name
));
self.sink.parse_error(if self.opts.exact_errors {
Cow::from(format!("Unexpected open element while closing {name:?}"))
} else {
Cow::from("Unexpected open element")
});
}
}

Expand Down Expand Up @@ -1242,12 +1239,14 @@ where
self.orig_mode.set(Some(self.mode.get()));
ProcessResult::Reprocess(InsertionMode::InTableText, token)
} else {
self.sink.parse_error(format_if!(
self.opts.exact_errors,
"Unexpected characters in table",
"Unexpected characters {} in table",
to_escaped_string(&token)
));
self.sink.parse_error(if self.opts.exact_errors {
Cow::from(format!(
"Unexpected characters {} in table",
to_escaped_string(&token)
))
} else {
Cow::from("Unexpected characters in table")
});
self.foster_parent_in_body(token)
}
}
Expand Down Expand Up @@ -1553,15 +1552,11 @@ where
}
}

// Can't use unwrap_or_return!() due to rust-lang/rust#16617.
let match_idx = match match_idx {
None => {
// I believe this is impossible, because the root
// <html> element is in special_tag.
self.unexpected(&tag);
return;
},
Some(x) => x,
let Some(match_idx) = match_idx else {
// I believe this is impossible, because the root
// <html> element is in special_tag.
self.unexpected(&tag);
return;
};

self.generate_implied_end_except(tag.name.clone());
Expand Down
10 changes: 3 additions & 7 deletions html5ever/src/tree_builder/rules.rs
Original file line number Diff line number Diff line change
Expand Up @@ -491,13 +491,9 @@ where

</form> => {
if !self.in_html_elem_named(local_name!("template")) {
// Can't use unwrap_or_return!() due to rust-lang/rust#16617.
let node = match self.form_elem.take() {
None => {
self.sink.parse_error(Borrowed("Null form element pointer on </form>"));
return ProcessResult::Done;
}
Some(x) => x,
let Some(node) = self.form_elem.take() else {
self.sink.parse_error(Borrowed("Null form element pointer on </form>"));
return ProcessResult::Done;
};
if !self.in_scope(default_scope, |n| self.sink.same_node(&node, &n)) {
self.sink.parse_error(Borrowed("Form element not in scope on </form>"));
Expand Down
30 changes: 20 additions & 10 deletions html5ever/src/util/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,24 @@ pub(crate) fn lower_ascii_letter(c: char) -> Option<char> {
#[allow(non_snake_case)]
mod test {
use super::lower_ascii_letter;
use mac::test_eq;

test_eq!(lower_letter_a_is_a, lower_ascii_letter('a'), Some('a'));
test_eq!(lower_letter_A_is_a, lower_ascii_letter('A'), Some('a'));
test_eq!(lower_letter_symbol_is_None, lower_ascii_letter('!'), None);
test_eq!(
lower_letter_nonascii_is_None,
lower_ascii_letter('\u{a66e}'),
None
);

#[test]
fn lower_letter_a_is_a() {
assert_eq!(lower_ascii_letter('a'), Some('a'));
}

#[test]
fn lower_letter_A_is_a() {
assert_eq!(lower_ascii_letter('A'), Some('a'));
}

#[test]
fn lower_letter_symbol_is_None() {
assert_eq!(lower_ascii_letter('!'), None);
}

#[test]
fn lower_letter_nonascii_is_None() {
assert_eq!(lower_ascii_letter('\u{a66e}'), None);
}
}
1 change: 1 addition & 0 deletions markup5ever/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ pub mod interface;
pub mod serialize;
mod util {
pub mod buffer_queue;
mod macros;
pub mod smallcharset;
}

Expand Down
7 changes: 4 additions & 3 deletions html5ever/src/macros.rs → markup5ever/util/macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.

#[macro_export]
macro_rules! unwrap_or_return {
($opt:expr) => {{
let Some(x) = $opt else {
Expand All @@ -22,12 +23,12 @@ macro_rules! unwrap_or_return {
}};
}

#[macro_export]
macro_rules! time {
($e:expr) => {{
let now = ::std::time::Instant::now();
let t0 = ::std::time::Instant::now();
let result = $e;
let d = now.elapsed();
let dt = d.as_secs() * 1_000_000_000 + u64::from(d.subsec_nanos());
let dt = t0.elapsed().as_nanos() as u64;
(result, dt)
}};
}
1 change: 0 additions & 1 deletion xml5ever/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ trace_tokenizer = []

[dependencies]
log = "0.4"
mac = "0.1"
markup5ever = { version = "0.17", path = "../markup5ever" }

[dev-dependencies]
Expand Down
9 changes: 0 additions & 9 deletions xml5ever/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,6 @@

pub use markup5ever::*;

macro_rules! time {
($e:expr) => {{
let t0 = ::std::time::Instant::now();
let result = $e;
let dt = t0.elapsed().as_nanos() as u64;
(result, dt)
}};
}

/// Driver
pub mod driver;
/// Serializer for XML5.
Expand Down
Loading