Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion html5ever/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ trace_tokenizer = []

[dependencies]
log = "0.4"
mac = "0.1"
markup5ever = { version = "0.17", path = "../markup5ever" }
match_token = { workspace = true }

Expand Down
5 changes: 2 additions & 3 deletions html5ever/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,12 @@ pub use markup5ever::*;

pub use serialize::serialize;

#[macro_use]
mod macros;

mod util {
pub(crate) mod str;
}

pub(crate) mod macros;

pub mod driver;
pub mod serialize;
pub mod tokenizer;
Expand Down
7 changes: 4 additions & 3 deletions html5ever/src/macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,14 @@ macro_rules! unwrap_or_return {
x
}};
}
pub(crate) use unwrap_or_return;

macro_rules! time {
($e:expr) => {{
let now = ::std::time::Instant::now();
let t0 = ::std::time::Instant::now();
let result = $e;
let d = now.elapsed();
let dt = d.as_secs() * 1_000_000_000 + u64::from(d.subsec_nanos());
let dt = t0.elapsed().as_nanos() as u64;
(result, dt)
}};
}
pub(crate) use time;
28 changes: 14 additions & 14 deletions html5ever/src/tokenizer/char_ref/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@ use crate::data;
use crate::tendril::StrTendril;

use log::debug;
use mac::format_if;
use std::borrow::Cow::Borrowed;
use std::borrow::Cow::{self, Borrowed};
use std::char::from_u32;

use self::State::*;
Expand Down Expand Up @@ -257,12 +256,14 @@ impl CharRefTokenizer {
};

if error {
let msg = format_if!(
tokenizer.opts.exact_errors,
"Invalid numeric character reference",
"Invalid numeric character reference value 0x{:06X}",
self.num
);
let msg = if tokenizer.opts.exact_errors {
Cow::from(format!(
"Invalid numeric character reference value 0x{:06X}",
self.num
))
} else {
Cow::from("Invalid numeric character reference")
};
tokenizer.emit_error(msg);
}

Expand Down Expand Up @@ -299,12 +300,11 @@ impl CharRefTokenizer {
}

fn emit_name_error<Sink: TokenSink>(&mut self, tokenizer: &Tokenizer<Sink>) {
let msg = format_if!(
tokenizer.opts.exact_errors,
"Invalid character reference",
"Invalid character reference &{}",
self.name_buf()
);
let msg = if tokenizer.opts.exact_errors {
Cow::from(format!("Invalid character reference &{}", self.name_buf()))
} else {
Cow::from("Invalid character reference")
};
tokenizer.emit_error(msg);
}

Expand Down
28 changes: 14 additions & 14 deletions html5ever/src/tokenizer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,14 @@ use self::char_ref::{CharRef, CharRefTokenizer};
use crate::util::str::lower_ascii_letter;

use log::{debug, trace};
use mac::format_if;
use markup5ever::{ns, small_char_set, TokenizerResult};
use std::borrow::Cow::{self, Borrowed};
use std::cell::{Cell, RefCell, RefMut};
use std::collections::BTreeMap;
use std::mem;

pub use crate::buffer_queue::{BufferQueue, FromSet, NotFromSet, SetResult};
use crate::macros::{time, unwrap_or_return};
use crate::tendril::StrTendril;
use crate::{Attribute, LocalName, QualName, SmallCharSet};

Expand Down Expand Up @@ -376,13 +376,13 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
#[cfg(feature = "trace_tokenizer")]
trace!(" error");

let msg = format_if!(
self.opts.exact_errors,
"Bad character",
"Saw {} in state {:?}",
self.current_char.get(),
self.state.get()
);
let msg = if self.opts.exact_errors {
Cow::from("Bad character")
} else {
let c = self.current_char.get();
let state = self.state.get();
Cow::from(format!("Saw {c} in state {state:?}"))
};
self.emit_error(msg);
}

Expand All @@ -391,12 +391,12 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
#[cfg(feature = "trace_tokenizer")]
trace!(" error_eof");

let msg = format_if!(
self.opts.exact_errors,
"Unexpected EOF",
"Saw EOF in state {:?}",
self.state.get()
);
let msg = if self.opts.exact_errors {
Cow::from("Unexpected EOF")
} else {
let state = self.state.get();
Cow::from(format!("Saw EOF in state {state:?}"))
};
self.emit_error(msg);
}

Expand Down
97 changes: 46 additions & 51 deletions html5ever/src/tree_builder/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use crate::tokenizer;
use crate::tokenizer::states as tok_state;
use crate::tokenizer::{Doctype, EndTag, StartTag, Tag, TokenSink, TokenSinkResult};

use std::borrow::Cow::Borrowed;
use std::borrow::Cow::{self, Borrowed};
use std::cell::{Cell, Ref, RefCell};
use std::collections::VecDeque;
use std::iter::{Enumerate, Rev};
Expand All @@ -32,7 +32,6 @@ use crate::tokenizer::states::RawKind;
use crate::tree_builder::tag_sets::*;
use crate::util::str::to_escaped_string;
use log::{debug, log_enabled, warn, Level};
use mac::format_if;
use markup5ever::{expanded_name, local_name, namespace_prefix, ns};

#[macro_use]
Expand Down Expand Up @@ -488,12 +487,11 @@ where
if self.mode.get() == InsertionMode::Initial {
let (err, quirk) = data::doctype_error_and_quirks(&dt, self.opts.iframe_srcdoc);
if err {
self.sink.parse_error(format_if!(
self.opts.exact_errors,
"Bad DOCTYPE",
"Bad DOCTYPE: {:?}",
dt
));
self.sink.parse_error(if self.opts.exact_errors {
Cow::from(format!("Bad DOCTYPE: {dt:?}"))
} else {
Cow::from("Bad DOCTYPE")
});
}
let Doctype {
name,
Expand All @@ -513,12 +511,11 @@ where
self.mode.set(InsertionMode::BeforeHtml);
return tokenizer::TokenSinkResult::Continue;
} else {
self.sink.parse_error(format_if!(
self.opts.exact_errors,
"DOCTYPE in body",
"DOCTYPE in insertion mode {:?}",
self.mode.get()
));
self.sink.parse_error(if self.opts.exact_errors {
Cow::from(format!("DOCTYPE in insertion mode {:?}", self.mode.get()))
} else {
Cow::from("DOCTYPE in body")
});
return tokenizer::TokenSinkResult::Continue;
}
},
Expand Down Expand Up @@ -618,13 +615,15 @@ where
Sink: TreeSink<Handle = Handle>,
{
fn unexpected<T: fmt::Debug>(&self, _thing: &T) -> ProcessResult<Handle> {
self.sink.parse_error(format_if!(
self.opts.exact_errors,
"Unexpected token",
"Unexpected token {} in insertion mode {:?}",
to_escaped_string(_thing),
self.mode.get()
));
self.sink.parse_error(if self.opts.exact_errors {
Cow::from(format!(
"Unexpected token {} in insertion mode {:?}",
to_escaped_string(_thing),
self.mode.get()
))
} else {
Cow::from("Unexpected token")
});
ProcessResult::Done
}

Expand Down Expand Up @@ -1053,20 +1052,19 @@ where
"thead" "tr" "body" "html");

for elem in self.open_elems.borrow().iter() {
let error;
{
let error = {
let elem_name = self.sink.elem_name(elem);
let name = elem_name.expanded();
if body_end_ok(name) {
continue;
}
error = format_if!(
self.opts.exact_errors,
"Unexpected open tag at end of body",
"Unexpected open tag {:?} at end of body",
name
);
}

if self.opts.exact_errors {
Cow::from(format!("Unexpected open tag {name:?} at end of body"))
} else {
Cow::from("Unexpected open tag at end of body")
}
};
self.sink.parse_error(error);
// FIXME: Do we keep checking after finding one bad tag?
// The spec suggests not.
Expand Down Expand Up @@ -1193,12 +1191,11 @@ where
/// Signal an error if it was not the first one.
fn expect_to_close(&self, name: LocalName) {
if self.pop_until_named(name.clone()) != 1 {
self.sink.parse_error(format_if!(
self.opts.exact_errors,
"Unexpected open element",
"Unexpected open element while closing {:?}",
name
));
self.sink.parse_error(if self.opts.exact_errors {
Cow::from(format!("Unexpected open element while closing {name:?}"))
} else {
Cow::from("Unexpected open element")
});
}
}

Expand Down Expand Up @@ -1242,12 +1239,14 @@ where
self.orig_mode.set(Some(self.mode.get()));
ProcessResult::Reprocess(InsertionMode::InTableText, token)
} else {
self.sink.parse_error(format_if!(
self.opts.exact_errors,
"Unexpected characters in table",
"Unexpected characters {} in table",
to_escaped_string(&token)
));
self.sink.parse_error(if self.opts.exact_errors {
Cow::from(format!(
"Unexpected characters {} in table",
to_escaped_string(&token)
))
} else {
Cow::from("Unexpected characters in table")
});
self.foster_parent_in_body(token)
}
}
Expand Down Expand Up @@ -1553,15 +1552,11 @@ where
}
}

// Can't use unwrap_or_return!() due to rust-lang/rust#16617.
let match_idx = match match_idx {
None => {
// I believe this is impossible, because the root
// <html> element is in special_tag.
self.unexpected(&tag);
return;
},
Some(x) => x,
let Some(match_idx) = match_idx else {
// I believe this is impossible, because the root
// <html> element is in special_tag.
self.unexpected(&tag);
return;
};

self.generate_implied_end_except(tag.name.clone());
Expand Down
10 changes: 3 additions & 7 deletions html5ever/src/tree_builder/rules.rs
Original file line number Diff line number Diff line change
Expand Up @@ -491,13 +491,9 @@ where

</form> => {
if !self.in_html_elem_named(local_name!("template")) {
// Can't use unwrap_or_return!() due to rust-lang/rust#16617.
let node = match self.form_elem.take() {
None => {
self.sink.parse_error(Borrowed("Null form element pointer on </form>"));
return ProcessResult::Done;
}
Some(x) => x,
let Some(node) = self.form_elem.take() else {
self.sink.parse_error(Borrowed("Null form element pointer on </form>"));
return ProcessResult::Done;
};
if !self.in_scope(default_scope, |n| self.sink.same_node(&node, &n)) {
self.sink.parse_error(Borrowed("Form element not in scope on </form>"));
Expand Down
30 changes: 20 additions & 10 deletions html5ever/src/util/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,24 @@ pub(crate) fn lower_ascii_letter(c: char) -> Option<char> {
#[allow(non_snake_case)]
mod test {
use super::lower_ascii_letter;
use mac::test_eq;

test_eq!(lower_letter_a_is_a, lower_ascii_letter('a'), Some('a'));
test_eq!(lower_letter_A_is_a, lower_ascii_letter('A'), Some('a'));
test_eq!(lower_letter_symbol_is_None, lower_ascii_letter('!'), None);
test_eq!(
lower_letter_nonascii_is_None,
lower_ascii_letter('\u{a66e}'),
None
);

#[test]
fn lower_letter_a_is_a() {
assert_eq!(lower_ascii_letter('a'), Some('a'));
}

#[test]
fn lower_letter_A_is_a() {
assert_eq!(lower_ascii_letter('A'), Some('a'));
}

#[test]
fn lower_letter_symbol_is_None() {
assert_eq!(lower_ascii_letter('!'), None);
}

#[test]
fn lower_letter_nonascii_is_None() {
assert_eq!(lower_ascii_letter('\u{a66e}'), None);
}
}
1 change: 0 additions & 1 deletion xml5ever/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ trace_tokenizer = []

[dependencies]
log = "0.4"
mac = "0.1"
markup5ever = { version = "0.17", path = "../markup5ever" }

[dev-dependencies]
Expand Down
9 changes: 1 addition & 8 deletions xml5ever/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,7 @@

pub use markup5ever::*;

macro_rules! time {
($e:expr) => {{
let t0 = ::std::time::Instant::now();
let result = $e;
let dt = t0.elapsed().as_nanos() as u64;
(result, dt)
}};
}
pub(crate) mod macros;

/// Driver
pub mod driver;
Expand Down
Loading