Skip to content

Commit c06e50d

Browse files
committed
feat: init Wtf8Atom
1 parent c659ccc commit c06e50d

File tree

6 files changed

+2005
-49
lines changed

6 files changed

+2005
-49
lines changed

crates/hstr/src/dynamic.rs

Lines changed: 36 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@ use triomphe::ThinArc;
1414

1515
use crate::{
1616
tagged_value::{TaggedValue, MAX_INLINE_LEN},
17-
Atom, INLINE_TAG, INLINE_TAG_INIT, LEN_OFFSET, TAG_MASK,
17+
wtf8::Wtf8,
18+
Atom, Wtf8Atom, INLINE_TAG, INLINE_TAG_INIT, LEN_OFFSET, TAG_MASK,
1819
};
1920

2021
#[derive(PartialEq, Eq)]
@@ -73,6 +74,11 @@ impl AtomStore {
7374
atom_in(self, &text.into())
7475
}
7576

77+
#[inline(always)]
78+
pub fn wtf8_atom<'a>(&mut self, text: impl Into<Cow<'a, Wtf8>>) -> Wtf8Atom {
79+
wtf8_atom_in(self, text.into().as_bytes())
80+
}
81+
7682
fn gc(&mut self) {
7783
self.data.retain(|item, _| {
7884
let count = ThinArc::strong_count(&item.0);
@@ -94,6 +100,14 @@ pub fn global_atom_store_gc() {
94100
});
95101
}
96102

103+
pub(crate) fn global_wtf8_atom(text: &[u8]) -> Wtf8Atom {
104+
GLOBAL_DATA.with(|global| {
105+
let mut store = global.borrow_mut();
106+
107+
wtf8_atom_in(&mut *store, text)
108+
})
109+
}
110+
97111
pub(crate) fn global_atom(text: &str) -> Atom {
98112
GLOBAL_DATA.with(|global| {
99113
let mut store = global.borrow_mut();
@@ -102,9 +116,7 @@ pub(crate) fn global_atom(text: &str) -> Atom {
102116
})
103117
}
104118

105-
/// This can create any kind of [Atom], although this lives in the `dynamic`
106-
/// module.
107-
fn atom_in<S>(storage: S, text: &str) -> Atom
119+
fn wtf8_atom_in<S>(storage: S, text: &[u8]) -> Wtf8Atom
108120
where
109121
S: Storage,
110122
{
@@ -115,9 +127,9 @@ where
115127
let tag = INLINE_TAG_INIT | ((len as u8) << LEN_OFFSET);
116128
let mut unsafe_data = TaggedValue::new_tag(tag);
117129
unsafe {
118-
unsafe_data.data_mut()[..len].copy_from_slice(text.as_bytes());
130+
unsafe_data.data_mut()[..len].copy_from_slice(text);
119131
}
120-
return Atom { unsafe_data };
132+
return Wtf8Atom { unsafe_data };
121133
}
122134

123135
let hash = calc_hash(text);
@@ -129,12 +141,22 @@ where
129141
NonNull::new_unchecked(entry)
130142
};
131143
debug_assert!(0 == ptr.as_ptr() as u8 & TAG_MASK);
132-
Atom {
144+
Wtf8Atom {
133145
unsafe_data: TaggedValue::new_ptr(ptr),
134146
}
135147
}
136148

137-
/// Attempts to construct an Atom but only if it can be constructed inline.
149+
/// This can create any kind of [Atom], although this lives in the `dynamic`
150+
/// module.
151+
fn atom_in<S>(storage: S, text: &str) -> Atom
152+
where
153+
S: Storage,
154+
{
155+
// SAFETY: `text` is valid UTF-8
156+
unsafe { Atom::from_wtf8_unchecked(wtf8_atom_in(storage, text.as_bytes())) }
157+
}
158+
159+
/// Attempts to construct an [Atom] but only if it can be constructed inline.
138160
/// This is primarily useful in constant contexts.
139161
pub(crate) const fn inline_atom(text: &str) -> Option<Atom> {
140162
let len = text.len();
@@ -159,31 +181,25 @@ pub(crate) const fn inline_atom(text: &str) -> Option<Atom> {
159181
}
160182

161183
trait Storage {
162-
fn insert_entry(self, text: &str, hash: u64) -> Item;
184+
fn insert_entry(self, text: &[u8], hash: u64) -> Item;
163185
}
164186

165187
impl Storage for &'_ mut AtomStore {
166-
fn insert_entry(self, text: &str, hash: u64) -> Item {
188+
fn insert_entry(self, text: &[u8], hash: u64) -> Item {
167189
// If the text is too long, interning is not worth it.
168190
if text.len() > 512 {
169-
return Item(ThinArc::from_header_and_slice(
170-
Metadata { hash },
171-
text.as_bytes(),
172-
));
191+
return Item(ThinArc::from_header_and_slice(Metadata { hash }, text));
173192
}
174193

175194
let (entry, _) = self
176195
.data
177196
.raw_entry_mut()
178197
.from_hash(hash, |key| {
179-
key.header.header.hash == hash && key.slice.eq(text.as_bytes())
198+
key.header.header.hash == hash && key.slice.eq(text)
180199
})
181200
.or_insert_with(move || {
182201
(
183-
Item(ThinArc::from_header_and_slice(
184-
Metadata { hash },
185-
text.as_bytes(),
186-
)),
202+
Item(ThinArc::from_header_and_slice(Metadata { hash }, text)),
187203
(),
188204
)
189205
});
@@ -192,7 +208,7 @@ impl Storage for &'_ mut AtomStore {
192208
}
193209

194210
#[inline(always)]
195-
fn calc_hash(text: &str) -> u64 {
211+
fn calc_hash(text: &[u8]) -> u64 {
196212
let mut hasher = FxHasher::default();
197213
text.hash(&mut hasher);
198214
hasher.finish()

crates/hstr/src/global_store.rs

Lines changed: 49 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
1-
use std::borrow::Cow;
1+
use std::{
2+
borrow::Cow,
3+
mem::{forget, ManuallyDrop},
4+
};
25

3-
use crate::{dynamic::global_atom, Atom};
6+
use crate::{
7+
dynamic::{global_atom, global_wtf8_atom},
8+
wtf8::{Wtf8, Wtf8Buf},
9+
Atom, Wtf8Atom,
10+
};
411

512
macro_rules! direct_from_impl {
613
($T:ty) => {
@@ -21,3 +28,43 @@ impl From<Box<str>> for crate::Atom {
2128
global_atom(&s)
2229
}
2330
}
31+
32+
macro_rules! direct_from_impl_wtf8 {
33+
($T:ty) => {
34+
impl From<$T> for Wtf8Atom {
35+
fn from(s: $T) -> Self {
36+
global_wtf8_atom(s.as_bytes())
37+
}
38+
}
39+
};
40+
}
41+
42+
direct_from_impl_wtf8!(&'_ str);
43+
direct_from_impl_wtf8!(Cow<'_, str>);
44+
direct_from_impl_wtf8!(String);
45+
direct_from_impl_wtf8!(&'_ Wtf8);
46+
direct_from_impl_wtf8!(Wtf8Buf);
47+
48+
impl From<&Atom> for crate::Wtf8Atom {
49+
fn from(s: &Atom) -> Self {
50+
forget(s.clone());
51+
Wtf8Atom {
52+
unsafe_data: s.unsafe_data,
53+
}
54+
}
55+
}
56+
57+
impl From<Atom> for crate::Wtf8Atom {
58+
fn from(s: Atom) -> Self {
59+
let s = ManuallyDrop::new(s);
60+
Wtf8Atom {
61+
unsafe_data: s.unsafe_data,
62+
}
63+
}
64+
}
65+
66+
impl From<Box<str>> for crate::Wtf8Atom {
67+
fn from(s: Box<str>) -> Self {
68+
global_wtf8_atom(s.as_bytes())
69+
}
70+
}

0 commit comments

Comments
 (0)