Skip to content

Commit 2edfbbe

Browse files
authored
Merge pull request #5803 from unisonweb/topic/serial-perf
Performance and other improvements to serialization
2 parents 9dec1b4 + aa6f35d commit 2edfbbe

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

52 files changed

+2988
-1230
lines changed

lib/unison-util-bytes/src/Unison/Util/Bytes.hs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ module Unison.Util.Bytes
2525
toArray,
2626
fromArray,
2727
toLazyByteString,
28+
fromLazyByteString,
2829
flatten,
2930
at,
3031
take,

parser-typechecker/src/Unison/Builtin.hs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -948,6 +948,7 @@ codeBuiltins =
948948
[ ("Code.dependencies", code --> list termLink),
949949
("Code.isMissing", termLink --> io boolean),
950950
("Code.serialize", code --> bytes),
951+
("Code.serialize.versioned", nat --> code --> bytes),
951952
("Code.deserialize", bytes --> eithert text code),
952953
("Code.cache_", list (tuple [termLink, code]) --> io (list termLink)),
953954
("Code.validate", list (tuple [termLink, code]) --> io (optionalt failure)),
@@ -962,6 +963,7 @@ codeBuiltins =
962963
),
963964
("Value.dependencies", value --> list termLink),
964965
("Value.serialize", value --> bytes),
966+
("Value.serialize.versioned", nat --> value --> bytes),
965967
("Value.deserialize", bytes --> eithert text value),
966968
("Value.value", forall1 "a" $ \a -> a --> value),
967969
( "Value.load",

unison-runtime/package.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ library:
100100
- unison-util-bytes
101101
- unison-util-recursion
102102
- unliftio
103+
- unordered-containers
103104
- vector
104105
- crypton-x509
105106
- crypton-x509-store

unison-runtime/src/Unison/Runtime/ANF.hs

Lines changed: 165 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,8 @@ module Unison.Runtime.ANF
6464
Code (..),
6565
ValList,
6666
Value (..),
67+
Referenced (..),
68+
dereference,
6769
Cont (..),
6870
BLit (..),
6971
packTags,
@@ -77,8 +79,11 @@ module Unison.Runtime.ANF
7779
superNormalize,
7880
anfTerm,
7981
codeGroup,
82+
traverseCodeRefs,
8083
valueTermLinks,
8184
valueLinks,
85+
overValueRefs,
86+
traverseValueRefs,
8287
groupTermLinks,
8388
replaceConstructors,
8489
replaceFunctions,
@@ -1604,6 +1609,28 @@ type ANFD v = Compose (ANFM v) (Directed ())
16041609
data GroupRef = GR Reference Word64
16051610
deriving (Show, Eq)
16061611

1612+
-- A value with optional optimization information for serialization.
1613+
-- The references are required for serialization V5, and are assumed
1614+
-- to be the only references used in the value _up to in-memory
1615+
-- uniqueness_.
1616+
--
1617+
-- This is parameterized so that it can be used with both Value and
1618+
-- Code.
1619+
--
1620+
-- Also note, the stored referenced might not be 'tight' in the sense
1621+
-- that they all actually occur in the value. Maintaining this
1622+
-- invariant together with actual canonicalization would be onerous
1623+
-- and isn't done at this time.
1624+
data Referenced a
1625+
= -- types, terms
1626+
WithRefs [Reference] [Reference] a
1627+
| Plain a
1628+
deriving (Show, Eq)
1629+
1630+
dereference :: Referenced a -> a
1631+
dereference (WithRefs _ _ x) = x
1632+
dereference (Plain x) = x
1633+
16071634
-- | A list of either unboxed or boxed values.
16081635
-- Each slot is one of unboxed or boxed but not both.
16091636
type ValList = [Value]
@@ -1641,12 +1668,20 @@ traverseGroup ::
16411668
f Code
16421669
traverseGroup f (CodeRep sg ch) = flip CodeRep ch <$> f sg
16431670

1671+
traverseCodeRefs ::
1672+
(Applicative f) =>
1673+
(Bool -> Reference -> f Reference) ->
1674+
Code ->
1675+
f Code
1676+
traverseCodeRefs h (CodeRep sg ch) =
1677+
flip CodeRep ch <$> traverseGroupLinks h sg
1678+
16441679
data Cont
16451680
= KE
16461681
| Mark
16471682
Word64 -- pending args
16481683
[Reference]
1649-
(Map Reference Value)
1684+
[(Reference, Value)]
16501685
Cont
16511686
| Push
16521687
Word64 -- Frame size
@@ -1670,6 +1705,8 @@ data BLit
16701705
| Neg Word64
16711706
| Char Char
16721707
| Float Double
1708+
| -- special cases for newer formats
1709+
Map [(Value, Value)]
16731710
deriving (Show, Eq)
16741711

16751712
groupVars :: ANFM v (Set v)
@@ -2210,19 +2247,143 @@ valueLinks f (Cont vs k) =
22102247
foldMap (valueLinks f) vs <> contLinks f k
22112248
valueLinks f (BLit l) = blitLinks f l
22122249

2250+
-- Maps over the references in a `Value`, with the boolean indicating
2251+
-- whether or not the reference is for a type.
2252+
--
2253+
-- This traverses _all_ references in the value, not just the ones
2254+
-- necessary to load it.
2255+
overValueRefs :: (Bool -> Reference -> Reference) -> Value -> Value
2256+
overValueRefs h = \case
2257+
Partial (GR r i) vs ->
2258+
Partial (GR (h False r) i) (fmap (overValueRefs h) vs)
2259+
Data r t vs ->
2260+
Data (h True r) t (fmap (overValueRefs h) vs)
2261+
Cont vs k ->
2262+
Cont (fmap (overValueRefs h) vs) (overContRefs h k)
2263+
BLit l -> BLit (overBLitRefs h l)
2264+
2265+
-- Traverses the references in a `Value`, with the boolean indicating
2266+
-- whether or not the reference is for a type.
2267+
--
2268+
-- Unlike the "Links" functions, this traverses _all_ references in a
2269+
-- Value, not just the ones necessary to load the value. So, this will
2270+
-- traverse inside quotes and code.
2271+
traverseValueRefs ::
2272+
(Applicative f) =>
2273+
(Bool -> Reference -> f Reference) ->
2274+
Value ->
2275+
f Value
2276+
traverseValueRefs h = \case
2277+
Partial (GR r i) vs ->
2278+
Partial . flip GR i
2279+
<$> h False r
2280+
<*> traverse (traverseValueRefs h) vs
2281+
Data r t vs ->
2282+
flip Data t
2283+
<$> h True r
2284+
<*> traverse (traverseValueRefs h) vs
2285+
Cont vs k ->
2286+
Cont
2287+
<$> traverse (traverseValueRefs h) vs
2288+
<*> traverseContRefs h k
2289+
BLit l -> BLit <$> traverseBLitRefs h l
2290+
22132291
contLinks :: (Monoid a) => (Bool -> Reference -> a) -> Cont -> a
22142292
contLinks f (Push _ _ (GR cr _) k) =
22152293
f False cr <> contLinks f k
22162294
contLinks f (Mark _ ps de k) =
22172295
foldMap (f True) ps
2218-
<> Map.foldMapWithKey (\k c -> f True k <> valueLinks f c) de
2296+
<> foldMap (\(k, c) -> f True k <> valueLinks f c) de
22192297
<> contLinks f k
22202298
contLinks _ KE = mempty
22212299

2300+
-- Maps over the references in a `Cont`, with the boolean indicating
2301+
-- whether or not the reference is for a type.
2302+
--
2303+
-- This traverses _all_ references in the continuation, not just the
2304+
-- ones necessary to load it.
2305+
overContRefs :: (Bool -> Reference -> Reference) -> Cont -> Cont
2306+
overContRefs h = \case
2307+
KE -> KE
2308+
Mark asz rs env k ->
2309+
Mark
2310+
asz
2311+
(fmap (h True) rs)
2312+
(fmap (bimap (h True) (overValueRefs h)) env)
2313+
(overContRefs h k)
2314+
Push fsz asz (GR r i) k ->
2315+
Push fsz asz (GR (h False r) i) (overContRefs h k)
2316+
2317+
-- Traverses the references in a `Cont`, with the boolean indicating
2318+
-- whether or not the reference is for a type.
2319+
--
2320+
-- Unlike the "Links" functions, this traverses _all_ references in a
2321+
-- continuation, not just the ones necessary to load it. So, this will
2322+
-- traverse inside quotes and code.
2323+
traverseContRefs ::
2324+
(Applicative f) =>
2325+
(Bool -> Reference -> f Reference) ->
2326+
Cont ->
2327+
f Cont
2328+
traverseContRefs h = \case
2329+
KE -> pure KE
2330+
Mark asz rs env k ->
2331+
Mark asz
2332+
<$> traverse (h True) rs
2333+
<*> traverse (bitraverse (h True) (traverseValueRefs h)) env
2334+
<*> traverseContRefs h k
2335+
Push fsz asz (GR r i) k ->
2336+
Push fsz asz . flip GR i
2337+
<$> h False r
2338+
<*> traverseContRefs h k
2339+
22222340
blitLinks :: (Monoid a) => (Bool -> Reference -> a) -> BLit -> a
22232341
blitLinks f (List s) = foldMap (valueLinks f) s
22242342
blitLinks _ _ = mempty
22252343

2344+
overBLitRefs :: (Bool -> Reference -> Reference) -> BLit -> BLit
2345+
overBLitRefs h = \case
2346+
List vs -> List (fmap oval vs)
2347+
TmLink rn
2348+
| Con (ConstructorReference r j) i <- rn ->
2349+
TmLink $ Con (ConstructorReference (h True r) j) i
2350+
| Ref r <- rn -> TmLink . Ref $ h False r
2351+
TyLink r -> TyLink $ h True r
2352+
Quote v -> Quote $ oval v
2353+
Code (CodeRep sg ch) -> Code $ CodeRep (overGroupLinks h sg) ch
2354+
Arr a -> Arr $ fmap oval a
2355+
Map kvs -> Map $ fmap (bimap oval oval) kvs
2356+
l -> l
2357+
where
2358+
oval v = overValueRefs h v
2359+
2360+
-- Traverses the references in a `BLit`, with the boolean indicating
2361+
-- whether or not the reference is for a type.
2362+
--
2363+
-- Unlike the "Links" functions, this traverses _all_ references in a
2364+
-- literal, not just the ones necessary to load it. So, this will
2365+
-- traverse inside quotes and code.
2366+
traverseBLitRefs ::
2367+
(Applicative f) =>
2368+
(Bool -> Reference -> f Reference) ->
2369+
BLit ->
2370+
f BLit
2371+
traverseBLitRefs h = \case
2372+
List vs -> List <$> traverse tval vs
2373+
TmLink rn
2374+
| Con (ConstructorReference r j) i <- rn ->
2375+
TmLink . flip Con i . flip ConstructorReference j <$> h True r
2376+
| Ref r <- rn -> TmLink . Ref <$> h False r
2377+
TyLink r -> TyLink <$> h True r
2378+
Quote v -> Quote <$> tval v
2379+
Code (CodeRep sg ch) ->
2380+
Code . flip CodeRep ch <$> traverseGroupLinks h sg
2381+
Arr a -> Arr <$> traverse tval a
2382+
Map kvs -> Map <$> traverse (bitraverse tval tval) kvs
2383+
l -> pure l
2384+
where
2385+
tval v = traverseValueRefs h v
2386+
22262387
groupTermLinks :: (Var v) => SuperGroup v -> [Reference]
22272388
groupTermLinks = Set.toList . foldGroupLinks f
22282389
where
@@ -2315,8 +2476,8 @@ branchLinks _ g (MatchText m e) =
23152476
MatchText <$> traverse g m <*> traverse g e
23162477
branchLinks _ g (MatchIntegral m e) =
23172478
MatchIntegral <$> traverse g m <*> traverse g e
2318-
branchLinks _ g (MatchNumeric r m e) =
2319-
MatchNumeric r <$> traverse g m <*> traverse g e
2479+
branchLinks f g (MatchNumeric r m e) =
2480+
MatchNumeric <$> f r <*> traverse g m <*> traverse g e
23202481
branchLinks _ g (MatchSum m) =
23212482
MatchSum <$> (traverse . traverse) g m
23222483
branchLinks _ _ MatchEmpty = pure MatchEmpty

0 commit comments

Comments
 (0)