Skip to content

Commit ce45a10

Browse files
committed
wc: optimize the wc -cl case
1 parent 5287738 commit ce45a10

File tree

4 files changed

+60
-0
lines changed

4 files changed

+60
-0
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

fuzz/Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/uu/wc/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ path = "src/wc.rs"
2121
clap = { workspace = true }
2222
uucore = { workspace = true, features = ["parser", "pipes", "quoting-style"] }
2323
bytecount = { workspace = true, features = ["runtime-dispatch-simd"] }
24+
memchr = { workspace = true }
2425
thiserror = { workspace = true }
2526
unicode-width = { workspace = true }
2627
fluent = { workspace = true }

src/uu/wc/src/count_fast.rs

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,63 @@ pub(crate) fn count_bytes_chars_and_lines_fast<
229229
const COUNT_LINES: bool,
230230
>(
231231
handle: &mut R,
232+
) -> (WordCount, Option<io::Error>) {
233+
// Use specialized implementations for common cases
234+
match (COUNT_BYTES, COUNT_CHARS, COUNT_LINES) {
235+
// Lines only - use memchr for fastest line counting
236+
(false, false, true) => count_lines_only_fast(handle),
237+
// Bytes + Lines - optimize using bytecount and avoid double counting
238+
(true, false, true) => count_bytes_and_lines_fast(handle),
239+
// Default implementation for other cases
240+
_ => {
241+
count_bytes_chars_and_lines_generic::<R, COUNT_BYTES, COUNT_CHARS, COUNT_LINES>(handle)
242+
}
243+
}
244+
}
245+
246+
/// Specialized fast line counting using memchr
247+
fn count_lines_only_fast<R: Read>(handle: &mut R) -> (WordCount, Option<io::Error>) {
248+
let mut total = WordCount::default();
249+
let buf: &mut [u8] = &mut AlignedBuffer::default().data;
250+
251+
loop {
252+
match handle.read(buf) {
253+
Ok(0) => return (total, None),
254+
Ok(n) => {
255+
total.lines += memchr::memchr_iter(b'\n', &buf[..n]).count();
256+
}
257+
Err(ref e) if e.kind() == ErrorKind::Interrupted => (),
258+
Err(e) => return (total, Some(e)),
259+
}
260+
}
261+
}
262+
263+
/// Specialized fast byte and line counting using bytecount
264+
fn count_bytes_and_lines_fast<R: Read>(handle: &mut R) -> (WordCount, Option<io::Error>) {
265+
let mut total = WordCount::default();
266+
let buf: &mut [u8] = &mut AlignedBuffer::default().data;
267+
268+
loop {
269+
match handle.read(buf) {
270+
Ok(0) => return (total, None),
271+
Ok(n) => {
272+
total.bytes += n;
273+
total.lines += bytecount::count(&buf[..n], b'\n');
274+
}
275+
Err(ref e) if e.kind() == ErrorKind::Interrupted => (),
276+
Err(e) => return (total, Some(e)),
277+
}
278+
}
279+
}
280+
281+
/// Generic implementation for mixed counting
282+
fn count_bytes_chars_and_lines_generic<
283+
R: Read,
284+
const COUNT_BYTES: bool,
285+
const COUNT_CHARS: bool,
286+
const COUNT_LINES: bool,
287+
>(
288+
handle: &mut R,
232289
) -> (WordCount, Option<io::Error>) {
233290
let mut total = WordCount::default();
234291
let buf: &mut [u8] = &mut AlignedBuffer::default().data;

0 commit comments

Comments
 (0)