Skip to content

Commit d106076

Browse files
committed
libexpr: Add recoverable errors
This provides an explicit API for call-fail-retry-succeed evaluation flows, such as currently used in NixOps4. An alternative design would simply reset the `Value` to the original thunk instead of `tFailed` under the condition of catching a `RecoverableEvalError`. That is somewhat simpler, but I believe the presence of `tFailed` is beneficial for possible use in the repl; being able to show the error sooner, without re-evaluation. The hasPos method and isEmpty function are required in order to avoid an include loop.
1 parent 34280ce commit d106076

File tree

11 files changed

+221
-11
lines changed

11 files changed

+221
-11
lines changed

src/libexpr-c/nix_api_value.cc

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#include "nix/expr/attr-set.hh"
2+
#include "nix/expr/eval-error.hh"
23
#include "nix/util/configuration.hh"
34
#include "nix/expr/eval.hh"
45
#include "nix/store/globals.hh"
@@ -89,8 +90,13 @@ static void nix_c_primop_wrapper(
8990
f(userdata, &ctx, (EvalState *) &state, (nix_value **) args, (nix_value *) &vTmp);
9091

9192
if (ctx.last_err_code != NIX_OK) {
92-
/* TODO: Throw different errors depending on the error code */
93-
state.error<nix::EvalError>("Error from custom function: %s", *ctx.last_err).atPos(pos).debugThrow();
93+
if (ctx.last_err_code == NIX_ERR_RECOVERABLE) {
94+
state.error<nix::RecoverableEvalError>("Recoverable error from custom function: %s", *ctx.last_err)
95+
.atPos(pos)
96+
.debugThrow();
97+
} else {
98+
state.error<nix::EvalError>("Error from custom function: %s", *ctx.last_err).atPos(pos).debugThrow();
99+
}
94100
}
95101

96102
if (!vTmp.isValid()) {

src/libexpr-tests/nix_api_expr.cc

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -437,4 +437,105 @@ TEST_F(nix_api_expr_test, nix_value_call_multi_no_args)
437437
assert_ctx_ok();
438438
ASSERT_EQ(3, rInt);
439439
}
440+
441+
// The following is a test case for retryable thunks.
442+
// This is a requirement for the current way in which NixOps4 evaluates its deployment expressions.
443+
// An alternative strategy could be implemented, but unwinding the stack may be a more efficient way to deal with many
444+
// suspensions/resumptions, compared to e.g. using a thread or coroutine stack for each suspended dependency. This test
445+
// models the essential bits of a deployment tool that uses such a strategy.
446+
447+
// State for the retryable primop - simulates deployment resource availability
448+
struct DeploymentResourceState
449+
{
450+
bool vm_created = false;
451+
};
452+
453+
static void primop_load_resource_input(
454+
void * user_data, nix_c_context * context, EvalState * state, nix_value ** args, nix_value * ret)
455+
{
456+
assert(context);
457+
assert(state);
458+
auto * resource_state = static_cast<DeploymentResourceState *>(user_data);
459+
460+
// Get the resource input name argument
461+
std::string input_name;
462+
if (nix_get_string(context, args[0], OBSERVE_STRING(input_name)) != NIX_OK)
463+
return;
464+
465+
// Only handle "vm_id" input - throw for anything else
466+
if (input_name != "vm_id") {
467+
std::string error_msg = "unknown resource input: " + input_name;
468+
nix_set_err_msg(context, NIX_ERR_NIX_ERROR, error_msg.c_str());
469+
return;
470+
}
471+
472+
if (resource_state->vm_created) {
473+
// VM has been created, return the ID
474+
nix_init_string(context, ret, "vm-12345");
475+
} else {
476+
// VM not created yet, fail with dependency error
477+
nix_set_err_msg(context, NIX_ERR_RECOVERABLE, "VM not yet created");
478+
}
479+
}
480+
481+
TEST_F(nix_api_expr_test, nix_expr_thunk_re_evaluation_after_deployment)
482+
{
483+
// This test demonstrates NixOps4's requirement: a thunk calling a primop should be
484+
// re-evaluable when deployment resources become available that were not available initially.
485+
486+
DeploymentResourceState resource_state;
487+
488+
PrimOp * primop = nix_alloc_primop(
489+
ctx,
490+
primop_load_resource_input,
491+
1,
492+
"loadResourceInput",
493+
nullptr,
494+
"load a deployment resource input",
495+
&resource_state);
496+
assert_ctx_ok();
497+
498+
nix_value * primopValue = nix_alloc_value(ctx, state);
499+
assert_ctx_ok();
500+
nix_init_primop(ctx, primopValue, primop);
501+
assert_ctx_ok();
502+
503+
nix_value * inputName = nix_alloc_value(ctx, state);
504+
assert_ctx_ok();
505+
nix_init_string(ctx, inputName, "vm_id");
506+
assert_ctx_ok();
507+
508+
// Create a single thunk by using nix_init_apply instead of nix_value_call
509+
// This creates a lazy application that can be forced multiple times
510+
nix_value * thunk = nix_alloc_value(ctx, state);
511+
assert_ctx_ok();
512+
nix_init_apply(ctx, thunk, primopValue, inputName);
513+
assert_ctx_ok();
514+
515+
// First force: VM not created yet, should fail
516+
nix_value_force(ctx, state, thunk);
517+
ASSERT_EQ(NIX_ERR_NIX_ERROR, nix_err_code(ctx));
518+
ASSERT_THAT(nix_err_msg(nullptr, ctx, nullptr), testing::HasSubstr("VM not yet created"));
519+
520+
// Clear the error context for the next attempt
521+
nix_c_context_free(ctx);
522+
ctx = nix_c_context_create();
523+
524+
// Simulate deployment process: VM gets created
525+
resource_state.vm_created = true;
526+
527+
// Second force of the SAME thunk: this is where the "failed" value issue appears
528+
// With failed value caching, this should fail because the thunk is marked as permanently failed
529+
// Without failed value caching (or with retryable failures), this should succeed
530+
nix_value_force(ctx, state, thunk);
531+
532+
// If we get here without error, the thunk was successfully re-evaluated
533+
assert_ctx_ok();
534+
535+
std::string result;
536+
nix_get_string(ctx, thunk, OBSERVE_STRING(result));
537+
assert_ctx_ok();
538+
ASSERT_STREQ("vm-12345", result.c_str());
539+
}
540+
440541
} // namespace nixC

src/libexpr/eval-error.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,5 +113,6 @@ template class EvalErrorBuilder<MissingArgumentError>;
113113
template class EvalErrorBuilder<InfiniteRecursionError>;
114114
template class EvalErrorBuilder<InvalidPathError>;
115115
template class EvalErrorBuilder<IFDError>;
116+
template class EvalErrorBuilder<RecoverableEvalError>;
116117

117118
} // namespace nix

src/libexpr/eval.cc

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#include "nix/expr/eval.hh"
2+
#include "nix/expr/eval-error.hh"
23
#include "nix/expr/eval-settings.hh"
34
#include "nix/expr/primops.hh"
45
#include "nix/expr/print-options.hh"
@@ -27,6 +28,7 @@
2728
#include "parser-tab.hh"
2829

2930
#include <algorithm>
31+
#include <exception>
3032
#include <iostream>
3133
#include <sstream>
3234
#include <cstring>
@@ -2062,6 +2064,51 @@ void ExprBlackHole::eval(EvalState & state, [[maybe_unused]] Env & env, Value &
20622064
// always force this to be separate, otherwise forceValue may inline it and take
20632065
// a massive perf hit
20642066
[[gnu::noinline]]
2067+
void EvalState::handleEvalExceptionForThunk(Env * env, Expr * expr, Value & v, const PosIdx pos)
2068+
{
2069+
tryFixupBlackHolePos(v, pos);
2070+
2071+
auto e = std::current_exception();
2072+
Value * recovery = nullptr;
2073+
try {
2074+
std::rethrow_exception(e);
2075+
} catch (const RecoverableEvalError & e) {
2076+
recovery = allocValue();
2077+
}
2078+
if (recovery) {
2079+
recovery->mkThunk(env, expr);
2080+
}
2081+
v.mkFailed(e, recovery);
2082+
}
2083+
2084+
[[gnu::noinline]]
2085+
void EvalState::handleEvalExceptionForApp(Value & v)
2086+
{
2087+
auto e = std::current_exception();
2088+
Value * recovery = nullptr;
2089+
try {
2090+
std::rethrow_exception(e);
2091+
} catch (const RecoverableEvalError & e) {
2092+
recovery = allocValue();
2093+
}
2094+
if (recovery) {
2095+
*recovery = v;
2096+
}
2097+
v.mkFailed(e, recovery);
2098+
}
2099+
2100+
[[gnu::noinline]]
2101+
void EvalState::handleEvalFailed(Value & v, const PosIdx pos)
2102+
{
2103+
assert(v.isFailed());
2104+
if (auto recoveryValue = v.failed()->recoveryValue) {
2105+
v = *recoveryValue;
2106+
forceValue(v, pos);
2107+
} else {
2108+
std::rethrow_exception(v.failed()->ex);
2109+
}
2110+
}
2111+
20652112
void EvalState::tryFixupBlackHolePos(Value & v, PosIdx pos)
20662113
{
20672114
if (!v.isBlackhole())
@@ -2070,7 +2117,8 @@ void EvalState::tryFixupBlackHolePos(Value & v, PosIdx pos)
20702117
try {
20712118
std::rethrow_exception(e);
20722119
} catch (InfiniteRecursionError & e) {
2073-
e.atPos(positions[pos]);
2120+
if (!e.hasPos())
2121+
e.atPos(positions[pos]);
20742122
} catch (...) {
20752123
}
20762124
}

src/libexpr/include/nix/expr/eval-error.hh

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,14 @@ MakeError(MissingArgumentError, EvalError);
5656
MakeError(InfiniteRecursionError, EvalError);
5757
MakeError(IFDError, EvalBaseError);
5858

59+
/**
60+
* An evaluation error which should be retried instead of rethrown
61+
*
62+
* A RecoverableEvalError is not an EvalError, because we shouldn't cache it in the eval cache, as it should be retried
63+
* anyway.
64+
*/
65+
MakeError(RecoverableEvalError, EvalBaseError);
66+
5967
struct InvalidPathError : public EvalError
6068
{
6169
public:

src/libexpr/include/nix/expr/eval-inline.hh

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include "nix/expr/eval.hh"
66
#include "nix/expr/eval-error.hh"
77
#include "nix/expr/eval-settings.hh"
8+
#include <exception>
89

910
namespace nix {
1011

@@ -97,19 +98,19 @@ void EvalState::forceValue(Value & v, const PosIdx pos)
9798
else
9899
ExprBlackHole::throwInfiniteRecursionError(*this, v);
99100
} catch (...) {
100-
tryFixupBlackHolePos(v, pos);
101-
v.mkFailed();
101+
handleEvalExceptionForThunk(env, expr, v, pos);
102102
throw;
103103
}
104104
} else if (v.isApp()) {
105105
try {
106106
callFunction(*v.app().left, *v.app().right, v, pos);
107107
} catch (...) {
108-
v.mkFailed();
108+
handleEvalExceptionForApp(v);
109109
throw;
110110
}
111-
} else if (v.isFailed())
112-
std::rethrow_exception(v.failed()->ex);
111+
} else if (v.isFailed()) {
112+
handleEvalFailed(v, pos);
113+
}
113114
}
114115

115116
[[gnu::always_inline]]

src/libexpr/include/nix/expr/eval.hh

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -610,8 +610,28 @@ public:
610610
*/
611611
inline void forceValue(Value & v, const PosIdx pos);
612612

613+
private:
614+
615+
/**
616+
* Internal support function for forceValue
617+
*
618+
* This code is factored out so that it's not in the heavily inlined hot path.
619+
*/
620+
void handleEvalExceptionForThunk(Env * env, Expr * expr, Value & v, const PosIdx pos);
621+
622+
/**
623+
* Internal support function for forceValue
624+
*
625+
* This code is factored out so that it's not in the heavily inlined hot path.
626+
*/
627+
void handleEvalExceptionForApp(Value & v);
628+
629+
void handleEvalFailed(Value & v, PosIdx pos);
630+
613631
void tryFixupBlackHolePos(Value & v, PosIdx pos);
614632

633+
public:
634+
615635
/**
616636
* Force a value, then recursively force list elements and
617637
* attributes.

src/libexpr/include/nix/expr/value.hh

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
///@file
33

44
#include <cassert>
5+
#include <exception>
56
#include <span>
67
#include <type_traits>
78
#include <concepts>
@@ -279,9 +280,15 @@ struct ValueBase
279280
{
280281
public:
281282
std::exception_ptr ex;
283+
/**
284+
* Optional value for recovering `RecoverableEvalError`
285+
* Must be set iff `ex` is an instance of `RecoverableEvalError`.
286+
*/
287+
Value * recoveryValue;
282288

283-
Failed(std::exception_ptr && ex)
289+
Failed(std::exception_ptr ex, Value * recoveryValue)
284290
: ex(ex)
291+
, recoveryValue(recoveryValue)
285292
{
286293
}
287294
};
@@ -1077,9 +1084,9 @@ public:
10771084
setStorage(n);
10781085
}
10791086

1080-
inline void mkFailed() noexcept
1087+
inline void mkFailed(std::exception_ptr e, Value * recovery) noexcept
10811088
{
1082-
setStorage(new Value::Failed(std::current_exception()));
1089+
setStorage(new Value::Failed(e, recovery));
10831090
}
10841091

10851092
bool isList() const noexcept

src/libutil-c/nix_api_util.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,13 @@ enum nix_err {
9898
*/
9999
NIX_ERR_NIX_ERROR = -4,
100100

101+
/**
102+
* @brief A recoverable error occurred.
103+
*
104+
* This is used primarily by C API *consumers* to communicate that a failed
105+
* primop call should be retried on the next evaluation attempt.
106+
*/
107+
NIX_ERR_RECOVERABLE = -5,
101108
};
102109

103110
typedef enum nix_err nix_err;

src/libutil/include/nix/util/error.hh

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ struct LinesOfCode
5151
FIXME: Untangle this mess. Should there be AbstractPos as there used to be before
5252
4feb7d9f71? */
5353
struct Pos;
54+
bool isEmpty(const Pos & pos);
5455

5556
void printCodeLines(std::ostream & out, const std::string & prefix, const Pos & errPos, const LinesOfCode & loc);
5657

@@ -187,6 +188,11 @@ public:
187188
err.pos = pos;
188189
}
189190

191+
bool hasPos()
192+
{
193+
return err.pos.get() && !isEmpty(*err.pos.get());
194+
}
195+
190196
void pushTrace(Trace trace)
191197
{
192198
err.traces.push_front(trace);

0 commit comments

Comments
 (0)