Skip to content

Commit c9598f5

Browse files
committed
itest: add tranche-based parallel runner and clearer logs
- add tranche splitting/shuffling flags to the itest harness - add itest-parallel target and scripts to run tranches concurrently - write per-tranche logs under .logs/trancheN and tail failures for clarity
1 parent a1ef642 commit c9598f5

File tree

5 files changed

+229
-6
lines changed

5 files changed

+229
-6
lines changed

Makefile

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,10 @@ build-itest:
239239
CGO_ENABLED=0 $(GOBUILD) -tags="$(ITEST_TAGS)" -o itest/btcd-itest -ldflags "$(ITEST_LDFLAGS)" $(BTCD_PKG)
240240
CGO_ENABLED=0 $(GOBUILD) -tags="$(ITEST_TAGS)" -o itest/lnd-itest -ldflags "$(ITEST_LDFLAGS)" $(LND_PKG)/cmd/lnd
241241

242+
build-itest-binary:
243+
@$(call print, "Building itest binary.")
244+
CGO_ENABLED=0 $(GOTEST) -v ./itest -tags="$(DEV_TAGS) $(ITEST_TAGS)" -c -o itest/itest.test
245+
242246
install-backward-compat-versions:
243247
@$(call print, "Installing old versions of litd for backward compatibility tests.")
244248
scripts/install-backward-compat-versions.sh '$(LITD_COMPAT_VERSIONS)'
@@ -258,6 +262,16 @@ itest: app-build build-itest itest-only
258262

259263
itest-no-backward-compat: app-build build-itest build-itest run-itest-only
260264

265+
itest-parallel: app-build build-itest install-backward-compat-versions build-itest-binary
266+
@$(call print, "Running integration tests in parallel.")
267+
rm -rf itest/*.log itest/.logs*; date
268+
scripts/itest_parallel.sh $(ITEST_PARALLELISM) $(NUM_ITEST_TRANCHES) $(SHUFFLE_SEED) $(TEST_FLAGS) $(ITEST_FLAGS)
269+
270+
itest-parallel-no-backward-compat: app-build build-itest build-itest-binary
271+
@$(call print, "Running integration tests in parallel (no backward compat binaries).")
272+
rm -rf itest/*.log itest/.logs*; date
273+
scripts/itest_parallel.sh $(ITEST_PARALLELISM) $(NUM_ITEST_TRANCHES) $(SHUFFLE_SEED) $(TEST_FLAGS) $(ITEST_FLAGS)
274+
261275
# =============
262276
# FLAKE HUNTING
263277
# =============
@@ -349,5 +363,6 @@ flakehunter-unit:
349363
.PHONY: default all yarn-install build install go-build go-build-noui \
350364
go-install go-install-noui go-install-cli app-build release go-release \
351365
docker-release docker-tools scratch check unit unit-cover unit-race \
352-
clean-itest build-itest itest-only itest flake-unit fmt lint mod mod-check \
353-
list rpc protos protos-check rpc-js-compile clean
366+
clean-itest build-itest build-itest-binary itest-only itest \
367+
itest-parallel itest-parallel-no-backward-compat flake-unit fmt lint \
368+
mod mod-check list rpc protos protos-check rpc-js-compile clean

itest/litd_test.go

Lines changed: 121 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
package itest
22

33
import (
4+
"flag"
5+
"fmt"
6+
"math/rand"
47
"os"
58
"strings"
69
"testing"
@@ -12,6 +15,40 @@ import (
1215
"github.com/stretchr/testify/require"
1316
)
1417

18+
const (
19+
// defaultSplitTranches is the default number of tranches to divide the
20+
// test suite into when no override is provided.
21+
defaultSplitTranches uint = 1
22+
23+
// defaultRunTranche is the default tranche index to execute when no
24+
// explicit tranche is selected.
25+
defaultRunTranche uint = 0
26+
)
27+
28+
var (
29+
// testCasesSplitTranches controls how many tranches the full itest list
30+
// is divided into for parallel execution.
31+
testCasesSplitTranches = flag.Uint(
32+
"splittranches", defaultSplitTranches,
33+
"split the test cases in this many tranches and run the tranche "+
34+
"at 0-based index specified by the -runtranche flag",
35+
)
36+
37+
// shuffleSeedFlag enables deterministic shuffling of test cases to
38+
// balance workload across tranches.
39+
shuffleSeedFlag = flag.Uint64(
40+
"shuffleseed", 0, "if set, shuffles the test cases using this "+
41+
"as the source of randomness",
42+
)
43+
44+
// testCasesRunTranche selects which tranche (0-based) to execute.
45+
testCasesRunTranche = flag.Uint(
46+
"runtranche", defaultRunTranche,
47+
"run the tranche of the split test cases with the given (0-based) "+
48+
"index",
49+
)
50+
)
51+
1552
// TestLightningTerminal performs a series of integration tests amongst a
1653
// programmatically driven network of lnd nodes.
1754
func TestLightningTerminal(t *testing.T) {
@@ -39,9 +76,18 @@ func TestLightningTerminal(t *testing.T) {
3976
"--rpcmiddleware.enable",
4077
}
4178

79+
testCases, trancheIndex, trancheOffset := selectTestTranche()
80+
totalTestCases := len(allTestCases)
81+
4282
// Run the subset of the test cases selected in this tranche.
43-
for _, testCase := range allTestCases {
44-
success := t.Run(testCase.name, func(t1 *testing.T) {
83+
for idx, testCase := range testCases {
84+
testOrdinal := int(trancheOffset) + idx + 1
85+
testName := fmt.Sprintf(
86+
"tranche%02d/%02d-of-%d/%s", int(trancheIndex),
87+
testOrdinal, totalTestCases, testCase.name,
88+
)
89+
90+
success := t.Run(testName, func(t1 *testing.T) {
4591
cleanTestCaseName := strings.ReplaceAll(
4692
testCase.name, " ", "_",
4793
)
@@ -107,6 +153,79 @@ func TestLightningTerminal(t *testing.T) {
107153
}
108154
}
109155

156+
// maybeShuffleTestCases shuffles the test cases if the flag `shuffleseed` is
157+
// set and not 0. This is used by parallel test runs to even out the work
158+
// across tranches.
159+
func maybeShuffleTestCases() {
160+
// Exit if not set or set to 0.
161+
if shuffleSeedFlag == nil || *shuffleSeedFlag == 0 {
162+
return
163+
}
164+
165+
// Init the seed and shuffle the test cases.
166+
// #nosec G404 -- This is not for cryptographic purposes.
167+
r := rand.New(rand.NewSource(int64(*shuffleSeedFlag)))
168+
r.Shuffle(len(allTestCases), func(i, j int) {
169+
allTestCases[i], allTestCases[j] =
170+
allTestCases[j], allTestCases[i]
171+
})
172+
}
173+
174+
// createIndices divides the number of test cases into pairs of indices that
175+
// specify the start and end of a tranche.
176+
func createIndices(numCases, numTranches uint) [][2]uint {
177+
base := numCases / numTranches
178+
remainder := numCases % numTranches
179+
180+
indices := make([][2]uint, numTranches)
181+
start := uint(0)
182+
183+
for i := uint(0); i < numTranches; i++ {
184+
end := start + base
185+
if i < remainder {
186+
end++
187+
}
188+
indices[i] = [2]uint{start, end}
189+
start = end
190+
}
191+
192+
return indices
193+
}
194+
195+
// selectTestTranche returns the sub slice of the test cases that should be run
196+
// as the current split tranche as well as the index and slice offset of the
197+
// tranche.
198+
func selectTestTranche() ([]*testCase, uint, uint) {
199+
numTranches := defaultSplitTranches
200+
if testCasesSplitTranches != nil {
201+
numTranches = *testCasesSplitTranches
202+
}
203+
runTranche := defaultRunTranche
204+
if testCasesRunTranche != nil {
205+
runTranche = *testCasesRunTranche
206+
}
207+
208+
// There's a special flake-hunt mode where we run the same test multiple
209+
// times in parallel. In that case the tranche index is equal to the
210+
// thread ID, but we need to actually run all tests for the regex
211+
// selection to work.
212+
threadID := runTranche
213+
if numTranches == 1 {
214+
runTranche = 0
215+
}
216+
217+
// Shuffle the test cases if the `shuffleseed` flag is set.
218+
maybeShuffleTestCases()
219+
220+
numCases := uint(len(allTestCases))
221+
indices := createIndices(numCases, numTranches)
222+
index := indices[runTranche]
223+
trancheOffset, trancheEnd := index[0], index[1]
224+
225+
return allTestCases[trancheOffset:trancheEnd], threadID,
226+
trancheOffset
227+
}
228+
110229
func init() {
111230
logger := btclog.NewSLogger(btclog.NewDefaultHandler(os.Stdout))
112231
UseLogger(logger.SubSystem(Subsystem))

make/testing_flags.mk

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,26 @@ include make/compile_flags.mk
33
TEST_FLAGS =
44
DEV_TAGS = dev
55

6+
NUM_ITEST_TRANCHES = 8
7+
ITEST_PARALLELISM = $(NUM_ITEST_TRANCHES)
8+
SHUFFLE_SEED = 0
9+
10+
# Scale the number of parallel running itest tranches.
11+
ifneq ($(tranches),)
12+
NUM_ITEST_TRANCHES = $(tranches)
13+
ITEST_PARALLELISM = $(NUM_ITEST_TRANCHES)
14+
endif
15+
16+
# Give the ability to run the same tranche multiple times at the same time.
17+
ifneq ($(parallel),)
18+
ITEST_PARALLELISM = $(parallel)
19+
endif
20+
21+
# Set the seed for shuffling the test cases.
22+
ifneq ($(shuffleseed),)
23+
SHUFFLE_SEED = $(shuffleseed)
24+
endif
25+
626
# Define the integration test.run filter if the icase argument was provided.
727
ifneq ($(icase),)
828
ITEST_FLAGS += -test.run="TestLightningTerminal/$(icase)"

scripts/itest_parallel.sh

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
#!/bin/bash
2+
3+
# Get all the variables.
4+
PROCESSES=$1
5+
TRANCHES=$2
6+
SHUFFLE_SEED=$3
7+
8+
# Here we also shift 3 times and get the rest of our flags to pass on in $@.
9+
shift 3
10+
11+
# Create a variable to hold the final exit code.
12+
exit_code=0
13+
14+
# Run commands in parallel and track their PIDs.
15+
pids=()
16+
for ((i=0; i<PROCESSES; i++)); do
17+
scripts/itest_part.sh $i $TRANCHES $SHUFFLE_SEED "$@" &
18+
pids+=($!)
19+
done
20+
21+
# Wait for the processes created by xargs to finish.
22+
for pid in "${pids[@]}"; do
23+
wait $pid
24+
25+
# Once finished, grab its exit code.
26+
current_exit_code=$?
27+
28+
# Overwrite the exit code if current itest doesn't return 0.
29+
if [ $current_exit_code -ne 0 ]; then
30+
# Only write the exit code of the first failing itest.
31+
if [ $exit_code -eq 0 ]; then
32+
exit_code=$current_exit_code
33+
fi
34+
fi
35+
done
36+
37+
# Exit with the exit code of the first failing itest or 0.
38+
exit $exit_code

scripts/itest_part.sh

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,45 @@
33
# Let's work with absolute paths only, we run in the itest directory itself.
44
WORKDIR=$(pwd)/itest
55

6+
TRANCHE=0
7+
NUM_TRANCHES=1
8+
SHUFFLE_SEED=0
9+
10+
# If the first three arguments are integers, treat them as tranche settings.
11+
if [[ $# -ge 3 && "$1" =~ ^[0-9]+$ && "$2" =~ ^[0-9]+$ && "$3" =~ ^[0-9]+$ ]]; then
12+
TRANCHE=$1
13+
NUM_TRANCHES=$2
14+
SHUFFLE_SEED=$3
15+
shift 3
16+
fi
17+
618
# Windows insists on having the .exe suffix for an executable, we need to add
719
# that here if necessary.
820
EXEC="$WORKDIR"/itest.test
921
LITD_EXEC="$WORKDIR"/litd-itest
1022
BTCD_EXEC="$WORKDIR"/btcd-itest
11-
echo $EXEC -test.v "$@" -logoutput -logdir=.logs -litdexec=$LITD_EXEC -btcdexec=$BTCD_EXEC
23+
LOG_DIR="$WORKDIR/.logs"
24+
if [[ $NUM_TRANCHES -gt 1 ]]; then
25+
LOG_DIR="$WORKDIR/.logs/tranche$TRANCHE"
26+
fi
27+
28+
mkdir -p "$LOG_DIR"
29+
LOG_FILE="$LOG_DIR/output.log"
30+
31+
TRANCHE_FLAGS=(-splittranches="$NUM_TRANCHES" -runtranche="$TRANCHE" -shuffleseed="$SHUFFLE_SEED")
32+
33+
echo "$EXEC" -test.v "${TRANCHE_FLAGS[@]}" "$@" -logoutput -logdir="$LOG_DIR" -litdexec=$LITD_EXEC -btcdexec=$BTCD_EXEC
1234

1335
# Exit code 255 causes the parallel jobs to abort, so if one part fails the
1436
# other is aborted too.
1537
cd "$WORKDIR" || exit 255
16-
$EXEC -test.v "$@" -logoutput -logdir=.logs -litdexec=$LITD_EXEC -btcdexec=$BTCD_EXEC || exit 255
38+
$EXEC -test.v "${TRANCHE_FLAGS[@]}" "$@" -logoutput -logdir="$LOG_DIR" -litdexec=$LITD_EXEC -btcdexec=$BTCD_EXEC >"$LOG_FILE" 2>&1
39+
40+
exit_code=$?
41+
if [ $exit_code -ne 0 ]; then
42+
echo "Tranche $TRANCHE failed with exit code $exit_code"
43+
tail -n 100 "$LOG_FILE"
44+
exit 255
45+
else
46+
echo "Tranche $TRANCHE completed successfully"
47+
fi

0 commit comments

Comments
 (0)