From 250c05e2c5765fe816deb891acafd6ec867229e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20W=C3=BClker?= Date: Thu, 26 Jun 2025 18:21:51 +0200 Subject: [PATCH 1/2] Impl additional traits for BufferQueue/TokenizerResult MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Simon Wülker --- markup5ever/interface/mod.rs | 2 +- markup5ever/util/buffer_queue.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/markup5ever/interface/mod.rs b/markup5ever/interface/mod.rs index 366df1f6..0430a8e2 100644 --- a/markup5ever/interface/mod.rs +++ b/markup5ever/interface/mod.rs @@ -61,7 +61,7 @@ impl fmt::Debug for ExpandedName<'_> { } #[must_use] -#[derive(Debug)] +#[derive(Debug, PartialEq)] pub enum TokenizerResult { Done, Script(Handle), diff --git a/markup5ever/util/buffer_queue.rs b/markup5ever/util/buffer_queue.rs index 5ff0486f..d5e6864f 100644 --- a/markup5ever/util/buffer_queue.rs +++ b/markup5ever/util/buffer_queue.rs @@ -47,7 +47,7 @@ pub enum SetResult { /// Internally it uses [`VecDeque`] and has the same complexity properties. /// /// [`VecDeque`]: https://doc.rust-lang.org/std/collections/struct.VecDeque.html -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct BufferQueue { /// Buffers to process. buffers: RefCell>, From 623910018854293d69930215beb3d0d8a67ee94c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20W=C3=BClker?= Date: Thu, 26 Jun 2025 18:22:27 +0200 Subject: [PATCH 2/2] Don't clone input inside html5ever benchmark loop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Simon Wülker --- html5ever/benches/html5ever.rs | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/html5ever/benches/html5ever.rs b/html5ever/benches/html5ever.rs index c8114fb6..f74b13a7 100644 --- a/html5ever/benches/html5ever.rs +++ b/html5ever/benches/html5ever.rs @@ -5,10 +5,10 @@ extern crate html5ever; use std::fs; use std::path::PathBuf; -use criterion::Criterion; +use criterion::{BatchSize, Criterion}; -use html5ever::tendril::*; use html5ever::tokenizer::{BufferQueue, Token, TokenSink, TokenSinkResult, Tokenizer}; +use html5ever::{tendril::*, TokenizerResult}; struct Sink; @@ -51,19 +51,25 @@ fn run_bench(c: &mut Criterion, name: &str) { let test_name = format!("html tokenizing {name}"); + // Construct a buffer queue to feed to the tokenizer + let buffer_queue = BufferQueue::default(); + for buf in input.into_iter() { + buffer_queue.push_back(buf); + } + c.bench_function(&test_name, move |b| { - b.iter(|| { - let tok = Tokenizer::new(Sink, Default::default()); - let buffer = BufferQueue::default(); - // We are doing clone inside the bench function, this is not ideal, but possibly - // necessary since our iterator consumes the underlying buffer. - for buf in input.clone().into_iter() { - buffer.push_back(buf); - let _ = tok.feed(&buffer); - } - let _ = tok.feed(&buffer); - tok.end(); - }) + b.iter_batched( + || buffer_queue.clone(), + |buffer_queue| { + let tok = Tokenizer::new(Sink, Default::default()); + + // Tokenize the entire input, ignoring any