Skip to content

Commit d5edeff

Browse files
AnilAltinaygvisor-bot
authored andcommitted
GKE tests for Triton+TensorRT
Test: http://sponge2/271fb572-247f-43a4-9d5a-f7b66122bd4a PiperOrigin-RevId: 834505553
1 parent cc1d2c2 commit d5edeff

File tree

4 files changed

+510
-1
lines changed

4 files changed

+510
-1
lines changed

test/gpu/triton/triton.go

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -504,14 +504,44 @@ func (r *Response) NumTokens() int {
504504
return len(r.data)
505505
}
506506

507+
// TimeToFirstToken returns the time it took between the request starting
508+
// and the first token being received by the client.
509+
func (r *Response) TimeToFirstToken() time.Duration {
510+
return r.metrics.TimeToFirstByte()
511+
}
512+
513+
// TimeToLastToken returns the time it took between the request starting
514+
// and the last token being received by the client.
515+
func (r *Response) TimeToLastToken() time.Duration {
516+
return r.metrics.TimeToLastByte()
517+
}
518+
519+
// OutputTokensPerSecond computes the average number of output tokens
520+
// generated per second.
521+
func (r *Response) OutputTokensPerSecond() float64 {
522+
return float64(r.NumTokens()) / r.E2ELatency()
523+
}
524+
525+
// E2ELatency returns the elapsed time between when start_time was recorded and
526+
// the current moment in seconds.
527+
func (r *Response) E2ELatency() float64 {
528+
if r.metrics.LastByteRead.IsZero() || r.metrics.RequestSent.IsZero() {
529+
return 0
530+
}
531+
return r.metrics.LastByteRead.Sub(r.metrics.RequestSent).Seconds()
532+
}
533+
507534
// String returns the response text, if it is done.
508535
func (r *Response) String() string {
509536
if len(r.data) == 0 {
510537
return "<EMPTY>"
511538
}
512539
var fullResponse strings.Builder
513-
for _, token := range r.data {
540+
for i, token := range r.data {
514541
fullResponse.WriteString(token.Text)
542+
if i < len(r.data)-1 {
543+
fullResponse.WriteString(" ")
544+
}
515545
}
516546
return fullResponse.String()
517547
}

test/kubernetes/benchmarks/BUILD

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -397,6 +397,46 @@ go_test(
397397
],
398398
)
399399

400+
go_library(
401+
name = "triton",
402+
testonly = True,
403+
srcs = ["triton.go"],
404+
embedsrcs = [
405+
"//test/kubernetes/benchmarks/resources:files", # keep
406+
],
407+
nogo = False,
408+
deps = [
409+
"//pkg/sync",
410+
"//test/gpu/triton",
411+
"//test/kubernetes",
412+
"//test/kubernetes/benchmarks/profiling",
413+
"//test/kubernetes/benchmetric",
414+
"//test/kubernetes/k8sctx",
415+
"//test/kubernetes/testcluster",
416+
"@io_k8s_api//core/v1:go_default_library",
417+
"@io_k8s_apimachinery//pkg/api/resource:go_default_library",
418+
"@io_k8s_apimachinery//pkg/apis/meta/v1:go_default_library",
419+
"@io_k8s_apimachinery//pkg/util/intstr:go_default_library",
420+
],
421+
)
422+
423+
go_test(
424+
name = "triton_test",
425+
srcs = ["triton_test.go"],
426+
library = ":triton",
427+
nogo = False,
428+
tags = [
429+
"local",
430+
"noguitar",
431+
"notap",
432+
],
433+
deps = [
434+
"//test/kubernetes/k8sctx",
435+
"//test/kubernetes/k8sctx/kubectlctx",
436+
"//test/kubernetes/testcluster",
437+
],
438+
)
439+
400440
go_library(
401441
name = "sglang",
402442
testonly = True,

0 commit comments

Comments
 (0)