diff --git a/src/libstore/build/derivation-building-goal.cc b/src/libstore/build/derivation-building-goal.cc index 53343ce84c8..072bbfa93c3 100644 --- a/src/libstore/build/derivation-building-goal.cc +++ b/src/libstore/build/derivation-building-goal.cc @@ -127,31 +127,6 @@ static void runPostBuildHook( produced using a substitute. So we have to build instead. */ Goal::Co DerivationBuildingGoal::gaveUpOnSubstitution() { - /* Recheck at goal start. In particular, whereas before we were - given this information by the downstream goal, that cannot happen - anymore if the downstream goal only cares about one output, but - we care about all outputs. */ - auto outputHashes = staticOutputHashes(worker.evalStore, *drv); - for (auto & [outputName, outputHash] : outputHashes) { - InitialOutput v{.outputHash = outputHash}; - - /* TODO we might want to also allow randomizing the paths - for regular CA derivations, e.g. for sake of checking - determinism. */ - if (drv->type().isImpure()) { - v.known = InitialOutputStatus{ - .path = StorePath::random(outputPathName(drv->name, outputName)), - .status = PathStatus::Absent, - }; - } - - initialOutputs.insert({ - outputName, - std::move(v), - }); - } - checkPathValidity(); - Goals waitees; std::map, GoalPtr, value_comparison> inputGoals; @@ -334,14 +309,15 @@ Goal::Co DerivationBuildingGoal::gaveUpOnSubstitution() if (resolvedResult.success()) { SingleDrvOutputs builtOutputs; + auto outputHashes = staticOutputHashes(worker.evalStore, *drv); auto resolvedHashes = staticOutputHashes(worker.store, drvResolved); StorePathSet outputPaths; for (auto & outputName : drvResolved.outputNames()) { - auto initialOutput = get(initialOutputs, outputName); + auto outputHash = get(outputHashes, outputName); auto resolvedHash = get(resolvedHashes, outputName); - if ((!initialOutput) || (!resolvedHash)) + if ((!outputHash) || (!resolvedHash)) throw Error( "derivation '%s' doesn't have expected output '%s' (derivation-goal.cc/resolve)", worker.store.printStorePath(drvPath), @@ -368,7 +344,7 @@ Goal::Co DerivationBuildingGoal::gaveUpOnSubstitution() if (!drv->type().isImpure()) { auto newRealisation = realisation; - newRealisation.id = DrvOutput{initialOutput->outputHash, outputName}; + newRealisation.id = DrvOutput{*outputHash, outputName}; newRealisation.signatures.clear(); if (!drv->type().isFixed()) { auto & drvStore = worker.evalStore.isValidPath(drvPath) ? worker.evalStore : worker.store; @@ -439,136 +415,263 @@ Goal::Co DerivationBuildingGoal::gaveUpOnSubstitution() co_return tryToBuild(); } -void DerivationBuildingGoal::started() +Goal::Co DerivationBuildingGoal::tryToBuild() { - auto msg = - fmt(buildMode == bmRepair ? "repairing outputs of '%s'" - : buildMode == bmCheck ? "checking outputs of '%s'" - : "building '%s'", - worker.store.printStorePath(drvPath)); - fmt("building '%s'", worker.store.printStorePath(drvPath)); + std::map initialOutputs; + + /* Recheck at this point. In particular, whereas before we were + given this information by the downstream goal, that cannot happen + anymore if the downstream goal only cares about one output, but + we care about all outputs. */ + auto outputHashes = staticOutputHashes(worker.evalStore, *drv); + for (auto & [outputName, outputHash] : outputHashes) { + InitialOutput v{.outputHash = outputHash}; + + /* TODO we might want to also allow randomizing the paths + for regular CA derivations, e.g. for sake of checking + determinism. */ + if (drv->type().isImpure()) { + v.known = InitialOutputStatus{ + .path = StorePath::random(outputPathName(drv->name, outputName)), + .status = PathStatus::Absent, + }; + } + + initialOutputs.insert({ + outputName, + std::move(v), + }); + } + checkPathValidity(initialOutputs); + + auto started = [&]() { + auto msg = + fmt(buildMode == bmRepair ? "repairing outputs of '%s'" + : buildMode == bmCheck ? "checking outputs of '%s'" + : "building '%s'", + worker.store.printStorePath(drvPath)); + fmt("building '%s'", worker.store.printStorePath(drvPath)); #ifndef _WIN32 // TODO enable build hook on Windows - if (hook) - msg += fmt(" on '%s'", machineName); + if (hook) + msg += fmt(" on '%s'", hook->machineName); #endif - act = std::make_unique( - *logger, - lvlInfo, - actBuild, - msg, - Logger::Fields{ - worker.store.printStorePath(drvPath), + act = std::make_unique( + *logger, + lvlInfo, + actBuild, + msg, + Logger::Fields{ + worker.store.printStorePath(drvPath), #ifndef _WIN32 // TODO enable build hook on Windows - hook ? machineName : + hook ? hook->machineName : #endif - "", - 1, - 1}); - mcRunningBuilds = std::make_unique>(worker.runningBuilds); - worker.updateProgress(); -} + "", + 1, + 1}); + mcRunningBuilds = std::make_unique>(worker.runningBuilds); + worker.updateProgress(); + }; -Goal::Co DerivationBuildingGoal::tryToBuild() -{ - trace("trying to build"); - - /* Obtain locks on all output paths, if the paths are known a priori. - - The locks are automatically released when we exit this function or Nix - crashes. If we can't acquire the lock, then continue; hopefully some - other goal can start a build, and if not, the main loop will sleep a few - seconds and then retry this goal. */ - PathSet lockFiles; - /* FIXME: Should lock something like the drv itself so we don't build same - CA drv concurrently */ - if (dynamic_cast(&worker.store)) { - /* If we aren't a local store, we might need to use the local store as - a build remote, but that would cause a deadlock. */ - /* FIXME: Make it so we can use ourselves as a build remote even if we - are the local store (separate locking for building vs scheduling? */ - /* FIXME: find some way to lock for scheduling for the other stores so - a forking daemon with --store still won't farm out redundant builds. - */ - for (auto & i : drv->outputsAndOptPaths(worker.store)) { - if (i.second.second) - lockFiles.insert(worker.store.Store::toRealPath(*i.second.second)); - else - lockFiles.insert(worker.store.Store::toRealPath(drvPath) + "." + i.first); - } - } + /** + * Activity that denotes waiting for a lock. + */ + std::unique_ptr actLock; - if (!outputLocks.lockPaths(lockFiles, "", false)) { - Activity act(*logger, lvlWarn, actBuildWaiting, fmt("waiting for lock on %s", Magenta(showPaths(lockFiles)))); + /** + * Locks on (fixed) output paths. + */ + PathLocks outputLocks; - /* Wait then try locking again, repeat until success (returned - boolean is true). */ - do { - co_await waitForAWhile(); - } while (!outputLocks.lockPaths(lockFiles, "", false)); - } + bool useHook; - /* Now check again whether the outputs are valid. This is because - another process may have started building in parallel. After - it has finished and released the locks, we can (and should) - reuse its results. (Strictly speaking the first check can be - omitted, but that would be less efficient.) Note that since we - now hold the locks on the output paths, no other process can - build this derivation, so no further checks are necessary. */ - auto [allValid, validOutputs] = checkPathValidity(); - - if (buildMode != bmCheck && allValid) { - debug("skipping build of derivation '%s', someone beat us to it", worker.store.printStorePath(drvPath)); - outputLocks.setDeletion(true); - outputLocks.unlock(); - co_return doneSuccess(BuildResult::AlreadyValid, std::move(validOutputs)); - } + while (true) { + trace("trying to build"); + + /* Obtain locks on all output paths, if the paths are known a priori. + + The locks are automatically released when we exit this function or Nix + crashes. If we can't acquire the lock, then continue; hopefully some + other goal can start a build, and if not, the main loop will sleep a few + seconds and then retry this goal. */ + PathSet lockFiles; + /* FIXME: Should lock something like the drv itself so we don't build same + CA drv concurrently */ + if (dynamic_cast(&worker.store)) { + /* If we aren't a local store, we might need to use the local store as + a build remote, but that would cause a deadlock. */ + /* FIXME: Make it so we can use ourselves as a build remote even if we + are the local store (separate locking for building vs scheduling? */ + /* FIXME: find some way to lock for scheduling for the other stores so + a forking daemon with --store still won't farm out redundant builds. + */ + for (auto & i : drv->outputsAndOptPaths(worker.store)) { + if (i.second.second) + lockFiles.insert(worker.store.Store::toRealPath(*i.second.second)); + else + lockFiles.insert(worker.store.Store::toRealPath(drvPath) + "." + i.first); + } + } - /* If any of the outputs already exist but are not valid, delete - them. */ - for (auto & [_, status] : initialOutputs) { - if (!status.known || status.known->isValid()) - continue; - auto storePath = status.known->path; - debug("removing invalid path '%s'", worker.store.printStorePath(status.known->path)); - deletePath(worker.store.Store::toRealPath(storePath)); - } + if (!outputLocks.lockPaths(lockFiles, "", false)) { + Activity act( + *logger, lvlWarn, actBuildWaiting, fmt("waiting for lock on %s", Magenta(showPaths(lockFiles)))); - /* Don't do a remote build if the derivation has the attribute - `preferLocalBuild' set. Also, check and repair modes are only - supported for local builds. */ - bool buildLocally = - (buildMode != bmNormal || drvOptions->willBuildLocally(worker.store, *drv)) && settings.maxBuildJobs.get() != 0; - - if (!buildLocally) { - switch (tryBuildHook()) { - case rpAccept: - /* Yes, it has started doing so. Wait until we get - EOF from the hook. */ - actLock.reset(); - buildResult.startTime = time(0); // inexact - started(); - co_await Suspend{}; - co_return hookDone(); - case rpPostpone: - /* Not now; wait until at least one child finishes or - the wake-up timeout expires. */ - if (!actLock) - actLock = std::make_unique( - *logger, - lvlWarn, - actBuildWaiting, - fmt("waiting for a machine to build '%s'", Magenta(worker.store.printStorePath(drvPath)))); + /* Wait then try locking again, repeat until success (returned + boolean is true). */ + do { + co_await waitForAWhile(); + } while (!outputLocks.lockPaths(lockFiles, "", false)); + } + + /* Now check again whether the outputs are valid. This is because + another process may have started building in parallel. After + it has finished and released the locks, we can (and should) + reuse its results. (Strictly speaking the first check can be + omitted, but that would be less efficient.) Note that since we + now hold the locks on the output paths, no other process can + build this derivation, so no further checks are necessary. */ + auto [allValid, validOutputs] = checkPathValidity(initialOutputs); + + if (buildMode != bmCheck && allValid) { + debug("skipping build of derivation '%s', someone beat us to it", worker.store.printStorePath(drvPath)); + outputLocks.setDeletion(true); outputLocks.unlock(); - co_await waitForAWhile(); - co_return tryToBuild(); - case rpDecline: - /* We should do it ourselves. */ - break; + co_return doneSuccess(BuildResult::AlreadyValid, std::move(validOutputs)); + } + + /* If any of the outputs already exist but are not valid, delete + them. */ + for (auto & [_, status] : initialOutputs) { + if (!status.known || status.known->isValid()) + continue; + auto storePath = status.known->path; + debug("removing invalid path '%s'", worker.store.printStorePath(status.known->path)); + deletePath(worker.store.Store::toRealPath(storePath)); } + + /* Don't do a remote build if the derivation has the attribute + `preferLocalBuild' set. Also, check and repair modes are only + supported for local builds. */ + bool buildLocally = (buildMode != bmNormal || drvOptions->willBuildLocally(worker.store, *drv)) + && settings.maxBuildJobs.get() != 0; + + if (buildLocally) { + useHook = false; + } else { + switch (tryBuildHook(initialOutputs)) { + case rpAccept: + /* Yes, it has started doing so. Wait until we get + EOF from the hook. */ + useHook = true; + break; + case rpPostpone: + /* Not now; wait until at least one child finishes or + the wake-up timeout expires. */ + if (!actLock) + actLock = std::make_unique( + *logger, + lvlWarn, + actBuildWaiting, + fmt("waiting for a machine to build '%s'", Magenta(worker.store.printStorePath(drvPath)))); + outputLocks.unlock(); + co_await waitForAWhile(); + continue; + case rpDecline: + /* We should do it ourselves. */ + useHook = false; + break; + } + } + break; } actLock.reset(); + if (useHook) { + buildResult.startTime = time(0); // inexact + started(); + co_await Suspend{}; + +#ifndef _WIN32 + assert(hook); +#endif + + trace("hook build done"); + + /* Since we got an EOF on the logger pipe, the builder is presumed + to have terminated. In fact, the builder could also have + simply have closed its end of the pipe, so just to be sure, + kill it. */ + int status = +#ifndef _WIN32 // TODO enable build hook on Windows + hook->pid.kill(); +#else + 0; +#endif + + debug("build hook for '%s' finished", worker.store.printStorePath(drvPath)); + + buildResult.timesBuilt++; + buildResult.stopTime = time(0); + + /* So the child is gone now. */ + worker.childTerminated(this); + + /* Close the read side of the logger pipe. */ +#ifndef _WIN32 // TODO enable build hook on Windows + hook->builderOut.readSide.close(); + hook->fromHook.readSide.close(); +#endif + + /* Close the log file. */ + closeLogFile(); + + /* Check the exit status. */ + if (!statusOk(status)) { + auto e = fixupBuilderFailureErrorMessage({BuildResult::MiscFailure, status, ""}); + + outputLocks.unlock(); + + /* TODO (once again) support fine-grained error codes, see issue #12641. */ + + co_return doneFailure(std::move(e)); + } + + /* Compute the FS closure of the outputs and register them as + being valid. */ + auto builtOutputs = + /* When using a build hook, the build hook can register the output + as valid (by doing `nix-store --import'). If so we don't have + to do anything here. + + We can only early return when the outputs are known a priori. For + floating content-addressing derivations this isn't the case. + + Aborts if any output is not valid or corrupt, and otherwise + returns a 'SingleDrvOutputs' structure containing all outputs. + */ + [&] { + auto [allValid, validOutputs] = checkPathValidity(initialOutputs); + if (!allValid) + throw Error("some outputs are unexpectedly invalid"); + return validOutputs; + }(); + + StorePathSet outputPaths; + for (auto & [_, output] : builtOutputs) + outputPaths.insert(output.outPath); + runPostBuildHook(worker.store, *logger, drvPath, outputPaths); + + /* It is now safe to delete the lock files, since all future + lockers will see that the output paths are valid; they will + not create new lock files with the same names as the old + (unlinked) lock files. */ + outputLocks.setDeletion(true); + outputLocks.unlock(); + + co_return doneSuccess(BuildResult::Built, std::move(builtOutputs)); + } + co_await yield(); if (!dynamic_cast(&worker.store)) { @@ -584,12 +687,11 @@ Goal::Co DerivationBuildingGoal::tryToBuild() #ifdef _WIN32 // TODO enable `DerivationBuilder` on Windows throw UnimplementedError("building derivations is not yet implemented on Windows"); #else + assert(!hook); // Will continue here while waiting for a build user below while (true) { - assert(!hook); - unsigned int curBuilds = worker.getNrLocalBuilds(); if (curBuilds >= settings.maxBuildJobs) { outputLocks.unlock(); @@ -868,81 +970,7 @@ BuildError DerivationBuildingGoal::fixupBuilderFailureErrorMessage(BuilderFailur return BuildError{e.status, msg}; } -Goal::Co DerivationBuildingGoal::hookDone() -{ -#ifndef _WIN32 - assert(hook); -#endif - - trace("hook build done"); - - /* Since we got an EOF on the logger pipe, the builder is presumed - to have terminated. In fact, the builder could also have - simply have closed its end of the pipe, so just to be sure, - kill it. */ - int status = -#ifndef _WIN32 // TODO enable build hook on Windows - hook->pid.kill(); -#else - 0; -#endif - - debug("build hook for '%s' finished", worker.store.printStorePath(drvPath)); - - buildResult.timesBuilt++; - buildResult.stopTime = time(0); - - /* So the child is gone now. */ - worker.childTerminated(this); - - /* Close the read side of the logger pipe. */ -#ifndef _WIN32 // TODO enable build hook on Windows - hook->builderOut.readSide.close(); - hook->fromHook.readSide.close(); -#endif - - /* Close the log file. */ - closeLogFile(); - - /* Check the exit status. */ - if (!statusOk(status)) { - auto e = fixupBuilderFailureErrorMessage({BuildResult::MiscFailure, status, ""}); - - outputLocks.unlock(); - - /* TODO (once again) support fine-grained error codes, see issue #12641. */ - - co_return doneFailure(std::move(e)); - } - - /* Compute the FS closure of the outputs and register them as - being valid. */ - auto builtOutputs = - /* When using a build hook, the build hook can register the output - as valid (by doing `nix-store --import'). If so we don't have - to do anything here. - - We can only early return when the outputs are known a priori. For - floating content-addressing derivations this isn't the case. - */ - assertPathValidity(); - - StorePathSet outputPaths; - for (auto & [_, output] : builtOutputs) - outputPaths.insert(output.outPath); - runPostBuildHook(worker.store, *logger, drvPath, outputPaths); - - /* It is now safe to delete the lock files, since all future - lockers will see that the output paths are valid; they will - not create new lock files with the same names as the old - (unlinked) lock files. */ - outputLocks.setDeletion(true); - outputLocks.unlock(); - - co_return doneSuccess(BuildResult::Built, std::move(builtOutputs)); -} - -HookReply DerivationBuildingGoal::tryBuildHook() +HookReply DerivationBuildingGoal::tryBuildHook(const std::map & initialOutputs) { #ifdef _WIN32 // TODO enable build hook on Windows return rpDecline; @@ -1010,7 +1038,7 @@ HookReply DerivationBuildingGoal::tryBuildHook() hook = std::move(worker.hook); try { - machineName = readLine(hook->fromHook.readSide.get()); + hook->machineName = readLine(hook->fromHook.readSide.get()); } catch (Error & e) { e.addTrace({}, "while reading the machine name from the build hook"); throw; @@ -1221,7 +1249,8 @@ std::map> DerivationBuildingGoal::queryPar return res; } -std::pair DerivationBuildingGoal::checkPathValidity() +std::pair +DerivationBuildingGoal::checkPathValidity(std::map & initialOutputs) { if (drv->type().isImpure()) return {false, {}}; @@ -1278,17 +1307,8 @@ std::pair DerivationBuildingGoal::checkPathValidity() return {allValid, validOutputs}; } -SingleDrvOutputs DerivationBuildingGoal::assertPathValidity() -{ - auto [allValid, validOutputs] = checkPathValidity(); - if (!allValid) - throw Error("some outputs are unexpectedly invalid"); - return validOutputs; -} - Goal::Done DerivationBuildingGoal::doneSuccess(BuildResult::Status status, SingleDrvOutputs builtOutputs) { - outputLocks.unlock(); buildResult.status = status; assert(buildResult.success()); @@ -1306,7 +1326,6 @@ Goal::Done DerivationBuildingGoal::doneSuccess(BuildResult::Status status, Singl Goal::Done DerivationBuildingGoal::doneFailure(BuildError ex) { - outputLocks.unlock(); buildResult.status = ex.status; buildResult.errorMsg = fmt("%s", Uncolored(ex.info().msg)); if (buildResult.status == BuildResult::TimedOut) diff --git a/src/libstore/include/nix/store/build/derivation-building-goal.hh b/src/libstore/include/nix/store/build/derivation-building-goal.hh index 162cf14ad86..d394eb3c9c3 100644 --- a/src/libstore/include/nix/store/build/derivation-building-goal.hh +++ b/src/libstore/include/nix/store/build/derivation-building-goal.hh @@ -29,6 +29,12 @@ typedef enum { rpAccept, rpDecline, rpPostpone } HookReply; */ struct DerivationBuildingGoal : public Goal { + DerivationBuildingGoal( + const StorePath & drvPath, const Derivation & drv, Worker & worker, BuildMode buildMode = bmNormal); + ~DerivationBuildingGoal(); + +private: + /** The path of the derivation. */ StorePath drvPath; @@ -43,19 +49,12 @@ struct DerivationBuildingGoal : public Goal * The remainder is state held during the build. */ - /** - * Locks on (fixed) output paths. - */ - PathLocks outputLocks; - /** * All input paths (that is, the union of FS closures of the * immediate input paths). */ StorePathSet inputPaths; - std::map initialOutputs; - /** * File descriptor for the log file. */ @@ -92,22 +91,8 @@ struct DerivationBuildingGoal : public Goal std::unique_ptr act; - /** - * Activity that denotes waiting for a lock. - */ - std::unique_ptr actLock; - std::map builderActivities; - /** - * The remote machine on which we're building. - */ - std::string machineName; - - DerivationBuildingGoal( - const StorePath & drvPath, const Derivation & drv, Worker & worker, BuildMode buildMode = bmNormal); - ~DerivationBuildingGoal(); - void timedOut(Error && ex) override; std::string key() override; @@ -117,12 +102,11 @@ struct DerivationBuildingGoal : public Goal */ Co gaveUpOnSubstitution(); Co tryToBuild(); - Co hookDone(); /** * Is the build hook willing to perform the build? */ - HookReply tryBuildHook(); + HookReply tryBuildHook(const std::map & initialOutputs); /** * Open a log file and a pipe to it. @@ -156,21 +140,13 @@ struct DerivationBuildingGoal : public Goal * whether all outputs are valid and non-corrupt, and a * 'SingleDrvOutputs' structure containing the valid outputs. */ - std::pair checkPathValidity(); - - /** - * Aborts if any output is not valid or corrupt, and otherwise - * returns a 'SingleDrvOutputs' structure containing all outputs. - */ - SingleDrvOutputs assertPathValidity(); + std::pair checkPathValidity(std::map & initialOutputs); /** * Forcibly kill the child process, if any. */ void killChild(); - void started(); - Done doneSuccess(BuildResult::Status status, SingleDrvOutputs builtOutputs); Done doneFailure(BuildError ex); diff --git a/src/libstore/unix/include/nix/store/build/hook-instance.hh b/src/libstore/unix/include/nix/store/build/hook-instance.hh index 87e03665c72..7657d5dbd08 100644 --- a/src/libstore/unix/include/nix/store/build/hook-instance.hh +++ b/src/libstore/unix/include/nix/store/build/hook-instance.hh @@ -7,6 +7,14 @@ namespace nix { +/** + * @note Sometimes this is owned by the `Worker`, and sometimes it is + * owned by a `Goal`. This is for efficiency: rather than starting the + * hook every time we want to ask whether we can run a remote build + * (which can be very often), we reuse a hook process for answering + * those queries until it accepts a build. So if there are N + * derivations to be built, at most N hooks will be started. + */ struct HookInstance { /** @@ -29,6 +37,15 @@ struct HookInstance */ Pid pid; + /** + * The remote machine on which we're building. + * + * @Invariant When the hook instance is owned by the `Worker`, this + * is the empty string. When it is owned by a `Goal`, this should be + * set. + */ + std::string machineName; + FdSink sink; std::map activities;