Skip to content

Commit 110ada3

Browse files
committed
feat(Cache): make use of Lake cache
1 parent ad22e74 commit 110ada3

File tree

2 files changed

+104
-35
lines changed

2 files changed

+104
-35
lines changed

Cache/IO.lean

Lines changed: 103 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ Authors: Arthur Paulino, Jon Eugster
55
-/
66

77
import Cache.Lean
8+
import Lake.Build.Common
89

910
variable {α : Type}
1011

@@ -70,7 +71,7 @@ def CURLBIN :=
7071

7172
/-- leantar version at https://github.com/digama0/leangz -/
7273
def LEANTARVERSION :=
73-
"0.1.15"
74+
"0.1.16-pre"
7475

7576
def EXE := if System.Platform.isWindows then ".exe" else ""
7677

@@ -111,6 +112,8 @@ structure CacheM.Context where
111112
srcSearchPath : SearchPath
112113
/-- build directory for proofwidgets -/
113114
proofWidgetsBuildDir : FilePath
115+
/-- directory for the Lake artifact cache -/
116+
lakeArtifactDir? : Option FilePath
114117

115118
@[inherit_doc CacheM.Context]
116119
abbrev CacheM := ReaderT CacheM.Context IO
@@ -133,13 +136,31 @@ def _root_.Lean.SearchPath.relativize (sp : SearchPath) : IO SearchPath := do
133136
let pwd' := pwd.toString ++ System.FilePath.pathSeparator.toString
134137
return sp.map fun x => ⟨if x = pwd then "." else x.toString.stripPrefix pwd'⟩
135138

139+
/--
140+
Detects the directory Lake uses to cache artifacts.
141+
This is very sensitive to changes in `Lake.Env.compute`.
142+
-/
143+
def getLakeArtifactDir? : IO (Option FilePath) := do
144+
let elan? ← Lake.findElanInstall?
145+
let toolchain := (← IO.getEnv "ELAN_TOOLCHAIN").getD Lean.toolchain
146+
let .ok cache ← Lake.Env.compute.getCache? elan? toolchain |>.toBaseIO
147+
| return none
148+
let artifactDir := cache.artifactDir
149+
if (← artifactDir.pathExists) then
150+
return artifactDir
151+
else
152+
IO.eprintln <| s!"Warning: Lake's cache directory '{artifactDir}' seems not to exist, \
153+
most likely `cache` will not work as expected!"
154+
return none
155+
136156
private def CacheM.getContext : IO CacheM.Context := do
137157
let sp ← (← getSrcSearchPath).relativize
138158
let mathlibSource ← CacheM.mathlibDepPath sp
139159
return {
140160
mathlibDepPath := mathlibSource,
141161
srcSearchPath := sp,
142-
proofWidgetsBuildDir := LAKEPACKAGESDIR / "proofwidgets" / ".lake" / "build"}
162+
proofWidgetsBuildDir := LAKEPACKAGESDIR / "proofwidgets" / ".lake" / "build"
163+
lakeArtifactDir? := ← getLakeArtifactDir?}
143164

144165
/-- Run a `CacheM` in `IO` by loading the context from `LEAN_SRC_PATH`. -/
145166
def CacheM.run (f : CacheM α) : IO α := do ReaderT.run f (← getContext)
@@ -286,11 +307,35 @@ def hashes (hashMap : ModuleHashMap) : Lean.RBTree UInt64 compare :=
286307

287308
end ModuleHashMap
288309

310+
structure BuildPaths where
311+
trace : FilePath
312+
artifactDir : FilePath := "."
313+
localPaths : Array FilePath := #[]
314+
cachePaths : Array FilePath := #[]
315+
316+
/--
317+
Extracts the Lake module build output hashes from a trace file.
318+
This is sensitive to changes in `Lake.readTraceFile`.
319+
-/
320+
def readTraceOutputs (path : FilePath) : BaseIO (Option Lake.ModuleOutputHashes) := do
321+
let .ok contents ← IO.FS.readFile path |>.toBaseIO
322+
| return none
323+
if Lake.Hash.ofString? contents.trim |>.isSome then
324+
return none
325+
let .ok (data : Lake.BuildMetadata) := Json.parse contents >>= fromJson?
326+
| return none
327+
let some outs := data.outputs?
328+
| return none
329+
let .ok hashes := fromJson? outs
330+
| return none
331+
return hashes
332+
333+
289334
/--
290335
Given a module name, concatenates the paths to its build files.
291336
Each build file also has a `Bool` indicating whether that file is required for caching to proceed.
292337
-/
293-
def mkBuildPaths (mod : Name) : CacheM <| List (FilePath × Bool) := do
338+
def mkBuildPaths (mod : Name) : CacheM (Option BuildPaths) := do
294339
/-
295340
TODO: if `srcDir` or other custom lake layout options are set in the `lean_lib`,
296341
`packageSrcDir / LIBDIR` might be the wrong path!
@@ -307,48 +352,71 @@ def mkBuildPaths (mod : Name) : CacheM <| List (FilePath × Bool) := do
307352
if !(← (packageDir / ".lake").isDir) then
308353
IO.eprintln <| s!"Warning: {packageDir / ".lake"} seems not to exist, most likely `cache` \
309354
will not work as expected!"
310-
311-
return [
312-
-- Note that `packCache` below requires that the `.trace` file is first in this list.
313-
(packageDir / LIBDIR / path.withExtension "trace", true),
314-
(packageDir / LIBDIR / path.withExtension "olean", true),
315-
(packageDir / LIBDIR / path.withExtension "olean.hash", true),
316-
(packageDir / LIBDIR / path.withExtension "ilean", true),
317-
(packageDir / LIBDIR / path.withExtension "ilean.hash", true),
318-
(packageDir / IRDIR / path.withExtension "c", true),
319-
(packageDir / IRDIR / path.withExtension "c.hash", true),
320-
(packageDir / LIBDIR / path.withExtension "extra", false)]
321-
322-
/-- Check that all required build files exist. -/
323-
def allExist (paths : List (FilePath × Bool)) : IO Bool := do
324-
for (path, required) in paths do
325-
if required then if !(← path.pathExists) then return false
326-
pure true
355+
let trace := packageDir / LIBDIR / path.withExtension "trace"
356+
if let some artifactDir := (← read).lakeArtifactDir? then
357+
if let some outs ← readTraceOutputs trace then
358+
let cachePaths : Array FilePath := #[
359+
s!"{outs.olean}.olean",
360+
s!"{outs.ilean}.ilean",
361+
s!"{outs.c}.c"
362+
]
363+
unless ← allExists cachePaths artifactDir do
364+
return none
365+
return some {trace, artifactDir, cachePaths}
366+
else
367+
let some olean ← Lake.Hash.load? (packageDir / LIBDIR / path.withExtension "olean.hash")
368+
| return none
369+
let some ilean ← Lake.Hash.load? (packageDir / LIBDIR / path.withExtension "ilean.hash")
370+
| return none
371+
let some c ← Lake.Hash.load? (packageDir / IRDIR / path.withExtension "c.hash")
372+
| return none
373+
let cachePaths : Array FilePath := #[
374+
s!"{olean}.olean",
375+
s!"{ilean}.ilean",
376+
s!"{c}.c"
377+
]
378+
unless ← allExists cachePaths artifactDir do
379+
return none
380+
return some {trace, artifactDir, cachePaths}
381+
let localPaths := #[
382+
packageDir / LIBDIR / path.withExtension "olean",
383+
packageDir / LIBDIR / path.withExtension "olean.hash",
384+
packageDir / LIBDIR / path.withExtension "ilean",
385+
packageDir / LIBDIR / path.withExtension "ilean.hash",
386+
packageDir / IRDIR / path.withExtension "c",
387+
packageDir / IRDIR / path.withExtension "c.hash"
388+
]
389+
unless ← allExists localPaths do
390+
return none
391+
return some {trace, localPaths}
392+
where
393+
allExists paths (root : FilePath := ".") := paths.allM (root / · |>.pathExists)
327394

328395
/-- Compresses build files into the local cache and returns an array with the compressed files -/
329396
def packCache (hashMap : ModuleHashMap) (overwrite verbose unpackedOnly : Bool)
330-
(comment : Option String := none) :
397+
(commit? : Option String := none) :
331398
CacheM <| Array String := do
332399
IO.FS.createDirAll CACHEDIR
333400
IO.println "Compressing cache"
334401
let sp := (← read).srcSearchPath
335402
let mut acc := #[]
336403
let mut tasks := #[]
404+
let initComment := if let some c := commit? then s!"git=mathlib4@{c};" else ""
337405
for (mod, hash) in hashMap.toList do
338406
let sourceFile ← Lean.findLean sp mod
339407
let zip := hash.asLTar
340408
let zipPath := CACHEDIR / zip
341-
let buildPaths ← mkBuildPaths mod
342-
if ← allExist buildPaths then
409+
if let some buildPaths ← mkBuildPaths mod then
343410
if overwrite || !(← zipPath.pathExists) then
344411
acc := acc.push (sourceFile, zip)
345412
tasks := tasks.push <| ← IO.asTask do
346-
-- Note here we require that the `.trace` file is first
347-
-- in the list generated by `mkBuildPaths`.
348-
let trace :: args := (← buildPaths.filterM (·.1.pathExists)) |>.map (·.1.toString)
349-
| unreachable!
350-
runCmd (← getLeanTar) <| #[zipPath.toString, trace] ++
351-
(if let some c := comment then #["-c", s!"git=mathlib4@{c}"] else #[]) ++ args
413+
let usesLakeCache := !buildPaths.cachePaths.isEmpty
414+
let comment := s!"{initComment}lakeCache={usesLakeCache}"
415+
let opts := #["-C", "."] ++ if usesLakeCache then #["-C", buildPaths.artifactDir.toString] else #[]
416+
let mut args := opts ++ #[zipPath.toString, buildPaths.trace.toString, "-c", comment]
417+
args := buildPaths.localPaths.foldl (· ++ #["-i", "0", toString ·]) args
418+
args := buildPaths.cachePaths.foldl (· ++ #["-i", "1", toString ·]) args
419+
runCmd (← getLeanTar) args
352420
else if !unpackedOnly then
353421
acc := acc.push (sourceFile, zip)
354422
for task in tasks do
@@ -394,13 +462,13 @@ def unpackCache (hashMap : ModuleHashMap) (force : Bool) : CacheM Unit := do
394462
-/
395463
let isMathlibRoot ← isMathlibRoot
396464
let mathlibDepPath := (← read).mathlibDepPath.toString
465+
let artifactDir? := (← read).lakeArtifactDir?
397466
let config : Array Lean.Json := hashMap.fold (init := #[]) fun config mod hash =>
398467
let pathStr := s!"{CACHEDIR / hash.asLTar}"
399-
if isMathlibRoot || !isFromMathlib mod then
400-
config.push <| .str pathStr
401-
else
402-
-- only mathlib files, when not in the mathlib4 repo, need to be redirected
403-
config.push <| .mkObj [("file", pathStr), ("base", mathlibDepPath)]
468+
-- only mathlib files, when not in the mathlib4 repo, need to be redirected
469+
let localDir := if isMathlibRoot || !isFromMathlib mod then "." else mathlibDepPath
470+
let baseDirs := if let some dir := artifactDir? then #[localDir, dir.toString] else #[localDir]
471+
config.push <| .mkObj [("file", pathStr), ("base", toJson baseDirs)]
404472
stdin.putStr <| Lean.Json.compress <| .arr config
405473
let exitCode ← child.wait
406474
if exitCode != 0 then throw <| IO.userError s!"leantar failed with error code {exitCode}"
@@ -485,7 +553,7 @@ def leanModulesFromSpec (sp : SearchPath) (argₛ : String) :
485553
(i.e. `Mathlib.Data` when only the folder `Mathlib/Data/` but no \
486554
file `Mathlib/Data.lean` exists) is not supported yet!"
487555
else
488-
return .error "Invalid argument: non-existing module {mod}"
556+
return .error s!"Invalid argument: non-existing module {mod}"
489557

490558
/--
491559
Parse command line arguments.

lakefile.lean

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ abbrev mathlibLeanOptions := #[
4242

4343
package mathlib where
4444
testDriver := "MathlibTest"
45+
enableArtifactCache := true
4546
-- These are additional settings which do not affect the lake hash,
4647
-- so they can be enabled in CI and disabled locally or vice versa.
4748
-- Warning: Do not put any options here that actually change the olean files,

0 commit comments

Comments
 (0)