Skip to content

Commit 24a7181

Browse files
authored
Use biome_line_index to avoid biome_lsp_converters (#354)
Avoiding the tower-lsp dependency entirely in favor of "just" the `LineIndex`. We handle our own converter utilities, which are very easy to write on top of the LSP agnostic `LineIndex`.
1 parent 9fc4f63 commit 24a7181

File tree

19 files changed

+458
-73
lines changed

19 files changed

+458
-73
lines changed

Cargo.lock

Lines changed: 6 additions & 43 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ air_r_factory = { path = "./crates/air_r_factory" }
2626
air_r_formatter = { path = "./crates/air_r_formatter" }
2727
air_r_parser = { path = "./crates/air_r_parser" }
2828
air_r_syntax = { path = "./crates/air_r_syntax" }
29+
biome_line_index = { path = "./crates/biome_line_index" }
2930
biome_ungrammar = { path = "./crates/biome_ungrammar" }
3031
comments = { path = "./crates/comments" }
3132
crates = { path = "./crates/crates" }
@@ -42,7 +43,6 @@ assert_matches = "1.5.0"
4243
biome_console = { git = "https://github.com/biomejs/biome", rev = "2648fa4201be4afd26f44eca1a4e77aac0a67272" }
4344
biome_diagnostics = { git = "https://github.com/biomejs/biome", rev = "2648fa4201be4afd26f44eca1a4e77aac0a67272" }
4445
biome_formatter = { git = "https://github.com/biomejs/biome", rev = "2648fa4201be4afd26f44eca1a4e77aac0a67272" }
45-
biome_lsp_converters = { git = "https://github.com/biomejs/biome", rev = "2648fa4201be4afd26f44eca1a4e77aac0a67272" }
4646
biome_parser = { git = "https://github.com/biomejs/biome", rev = "2648fa4201be4afd26f44eca1a4e77aac0a67272" }
4747
biome_rowan = { git = "https://github.com/biomejs/biome", rev = "2648fa4201be4afd26f44eca1a4e77aac0a67272" }
4848
biome_string_case = { git = "https://github.com/biomejs/biome", rev = "2648fa4201be4afd26f44eca1a4e77aac0a67272" }

crates/biome_line_index/Cargo.toml

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
[package]
2+
authors.workspace = true
3+
categories.workspace = true
4+
description = "Biome's tools for converting between byte offsets and line / column positions"
5+
edition.workspace = true
6+
homepage.workspace = true
7+
keywords.workspace = true
8+
license.workspace = true
9+
name = "biome_line_index"
10+
repository.workspace = true
11+
version = "0.1.0"
12+
13+
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
14+
15+
[dependencies]
16+
biome_text_size = { workspace = true }
17+
rustc-hash = { workspace = true }
18+
19+
[lints]
20+
workspace = true

crates/biome_line_index/src/lib.rs

Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
//! The crate contains tools for converting between byte offsets and line / column positions.
2+
3+
#![deny(clippy::use_self)]
4+
5+
use biome_text_size::TextSize;
6+
7+
mod line_index;
8+
9+
pub use line_index::LineIndex;
10+
11+
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
12+
pub enum WideEncoding {
13+
Utf16,
14+
Utf32,
15+
}
16+
17+
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
18+
pub struct LineCol {
19+
/// Zero-based
20+
pub line: u32,
21+
/// Zero-based utf8 offset
22+
pub col: u32,
23+
}
24+
25+
/// Deliberately not a generic type and different from `LineCol`.
26+
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
27+
pub struct WideLineCol {
28+
/// Zero-based
29+
pub line: u32,
30+
/// Zero-based
31+
pub col: u32,
32+
}
33+
34+
#[derive(Clone, Debug, Hash, PartialEq, Eq)]
35+
pub struct WideChar {
36+
/// Start offset of a character inside a line, zero-based
37+
pub start: TextSize,
38+
/// End offset of a character inside a line, zero-based
39+
pub end: TextSize,
40+
}
41+
42+
impl WideChar {
43+
/// Returns the length in 8-bit UTF-8 code units.
44+
fn len(&self) -> TextSize {
45+
self.end - self.start
46+
}
47+
48+
/// Returns the length in UTF-16 or UTF-32 code units.
49+
fn wide_len(&self, enc: WideEncoding) -> usize {
50+
match enc {
51+
WideEncoding::Utf16 => {
52+
if self.len() == TextSize::from(4) {
53+
2
54+
} else {
55+
1
56+
}
57+
}
58+
59+
WideEncoding::Utf32 => 1,
60+
}
61+
}
62+
}
63+
64+
#[cfg(test)]
65+
mod tests {
66+
use crate::WideEncoding::{Utf16, Utf32};
67+
use crate::WideLineCol;
68+
use crate::line_index::LineIndex;
69+
use crate::{LineCol, WideEncoding};
70+
use biome_text_size::TextSize;
71+
72+
macro_rules! check_conversion {
73+
($line_index:ident : $wide_line_col:expr => $text_size:expr ) => {
74+
let encoding = WideEncoding::Utf16;
75+
76+
let line_col = $line_index.to_utf8(encoding, $wide_line_col);
77+
let offset = $line_index.offset(line_col);
78+
assert_eq!(offset, Some($text_size));
79+
80+
let line_col = $line_index.line_col(offset.unwrap());
81+
let wide_line_col = $line_index.to_wide(encoding, line_col.unwrap());
82+
assert_eq!(wide_line_col, Some($wide_line_col));
83+
};
84+
}
85+
86+
#[test]
87+
fn empty_string() {
88+
let line_index = LineIndex::new("");
89+
check_conversion!(line_index: WideLineCol { line: 0, col: 0 } => TextSize::from(0));
90+
}
91+
92+
#[test]
93+
fn empty_line() {
94+
let line_index = LineIndex::new("\n\n");
95+
check_conversion!(line_index: WideLineCol { line: 1, col: 0 } => TextSize::from(1));
96+
}
97+
98+
#[test]
99+
fn line_end() {
100+
let line_index = LineIndex::new("abc\ndef\nghi");
101+
check_conversion!(line_index: WideLineCol { line: 1, col: 3 } => TextSize::from(7));
102+
}
103+
104+
#[test]
105+
fn out_of_bounds_line() {
106+
let line_index = LineIndex::new("abcde\nfghij\n");
107+
108+
let offset = line_index.offset(LineCol { line: 5, col: 0 });
109+
assert!(offset.is_none());
110+
}
111+
112+
#[test]
113+
fn unicode() {
114+
let line_index = LineIndex::new("'Jan 1, 2018 – Jan 1, 2019'");
115+
116+
check_conversion!(line_index: WideLineCol { line: 0, col: 0 } => TextSize::from(0));
117+
check_conversion!(line_index: WideLineCol { line: 0, col: 1 } => TextSize::from(1));
118+
check_conversion!(line_index: WideLineCol { line: 0, col: 12 } => TextSize::from(12));
119+
check_conversion!(line_index: WideLineCol { line: 0, col: 13 } => TextSize::from(15));
120+
check_conversion!(line_index: WideLineCol { line: 0, col: 14 } => TextSize::from(18));
121+
check_conversion!(line_index: WideLineCol { line: 0, col: 15 } => TextSize::from(21));
122+
check_conversion!(line_index: WideLineCol { line: 0, col: 26 } => TextSize::from(32));
123+
check_conversion!(line_index: WideLineCol { line: 0, col: 27 } => TextSize::from(33));
124+
}
125+
126+
#[ignore]
127+
#[test]
128+
fn test_every_chars() {
129+
let text: String = {
130+
let mut chars: Vec<char> = ((0 as char)..char::MAX).collect();
131+
chars.extend("\n".repeat(chars.len() / 16).chars());
132+
chars.into_iter().collect()
133+
};
134+
135+
let line_index = LineIndex::new(&text);
136+
137+
let mut lin_col = LineCol { line: 0, col: 0 };
138+
let mut col_utf16 = 0;
139+
let mut col_utf32 = 0;
140+
for (offset, char) in text.char_indices() {
141+
let got_offset = line_index.offset(lin_col).unwrap();
142+
assert_eq!(usize::from(got_offset), offset);
143+
144+
let got_lin_col = line_index.line_col(got_offset).unwrap();
145+
assert_eq!(got_lin_col, lin_col);
146+
147+
for enc in [Utf16, Utf32] {
148+
let wide_lin_col = line_index.to_wide(enc, lin_col).unwrap();
149+
let got_lin_col = line_index.to_utf8(enc, wide_lin_col);
150+
assert_eq!(got_lin_col, lin_col);
151+
152+
let want_col = match enc {
153+
Utf16 => col_utf16,
154+
Utf32 => col_utf32,
155+
};
156+
assert_eq!(wide_lin_col.col, want_col)
157+
}
158+
159+
if char == '\n' {
160+
lin_col.line += 1;
161+
lin_col.col = 0;
162+
col_utf16 = 0;
163+
col_utf32 = 0;
164+
} else {
165+
lin_col.col += char.len_utf8() as u32;
166+
col_utf16 += char.len_utf16() as u32;
167+
col_utf32 += 1;
168+
}
169+
}
170+
}
171+
}

0 commit comments

Comments
 (0)