Skip to content

Commit 7022a71

Browse files
committed
feat: add average score to total progress bar
Makes the total progress bar a bit more dynamic by adding information about the average score so far.
1 parent fa629ce commit 7022a71

File tree

6 files changed

+59
-39
lines changed

6 files changed

+59
-39
lines changed

runner/orchestration/generate.ts

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -155,9 +155,10 @@ export async function generateCodeAndAssess(options: {
155155
appConcurrencyQueue.add(
156156
async () => {
157157
const evalID = await env.gateway.initializeEval();
158+
let results: AssessmentResult[] | undefined;
158159

159160
try {
160-
return await callWithTimeout(
161+
results = await callWithTimeout(
161162
`Evaluation of ${rootPromptDef.name}`,
162163
async abortSignal =>
163164
startEvaluationTask(
@@ -183,6 +184,7 @@ export async function generateCodeAndAssess(options: {
183184
// 10min max per app evaluation. We just want to make sure it never gets stuck.
184185
10,
185186
);
187+
return results;
186188
} catch (e: unknown) {
187189
failedPrompts.push({
188190
promptName: rootPromptDef.name,
@@ -198,8 +200,7 @@ export async function generateCodeAndAssess(options: {
198200
progress.log(rootPromptDef, 'error', 'Failed to evaluate code', details);
199201
return [] satisfies AssessmentResult[];
200202
} finally {
201-
progress.log(rootPromptDef, 'done', 'Done');
202-
203+
progress.evalFinished(rootPromptDef, results || []);
203204
await env.gateway.finalizeEval(evalID);
204205
}
205206
},

runner/progress/dynamic-progress-logger.ts

Lines changed: 39 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import {MultiBar, SingleBar, Presets} from 'cli-progress';
22
import chalk from 'chalk';
3-
import {RootPromptDefinition} from '../shared-interfaces.js';
3+
import {AssessmentResult, RootPromptDefinition} from '../shared-interfaces.js';
44
import {ProgressLogger, ProgressType, progressTypeToIcon} from './progress-logger.js';
55
import {redX} from '../reporting/format.js';
66

@@ -13,6 +13,8 @@ export class DynamicProgressLogger implements ProgressLogger {
1313
private pendingBars = new Map<RootPromptDefinition, SingleBar>();
1414
private spinnerFrames = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'];
1515
private currentSpinnerFrame = 0;
16+
private completedEvals = 0;
17+
private totalScore = 0;
1618
private spinnerInterval: ReturnType<typeof setInterval> | undefined;
1719
private errors: {
1820
prompt: RootPromptDefinition;
@@ -46,10 +48,17 @@ export class DynamicProgressLogger implements ProgressLogger {
4648
);
4749

4850
// Bar that tracks how many prompts are completed in total.
49-
this.totalBar = this.wrapper.create(total, 0, undefined, {
50-
format: '{bar} {spinner} {value}/{total} prompts completed',
51-
barsize: PREFIX_WIDTH,
52-
});
51+
this.totalBar = this.wrapper.create(
52+
total,
53+
0,
54+
{
55+
additionalInfo: '',
56+
},
57+
{
58+
format: '{bar} {spinner} {value}/{total} prompts completed{additionalInfo}',
59+
barsize: PREFIX_WIDTH,
60+
},
61+
);
5362

5463
// Interval to update the spinner.
5564
this.spinnerInterval = setInterval(() => {
@@ -74,6 +83,7 @@ export class DynamicProgressLogger implements ProgressLogger {
7483
this.wrapper?.stop();
7584
this.pendingBars.clear();
7685
this.wrapper = this.totalBar = this.spinnerInterval = undefined;
86+
this.completedEvals = this.totalScore = 0;
7787

7888
for (const error of this.errors) {
7989
let message = `${redX()} [${error.prompt.name}] ${error.message}`;
@@ -91,17 +101,6 @@ export class DynamicProgressLogger implements ProgressLogger {
91101

92102
let bar = this.pendingBars.get(prompt);
93103

94-
// Drop the bar from the screen if it's complete.
95-
if (type === 'done') {
96-
this.pendingBars.delete(prompt);
97-
98-
if (bar) {
99-
this.totalBar.increment();
100-
this.wrapper.remove(bar);
101-
}
102-
return;
103-
}
104-
105104
// Capture errors for static printing once the dynamic progress is hidden.
106105
if (type === 'error') {
107106
this.errors.push({prompt, message, details});
@@ -117,14 +116,36 @@ export class DynamicProgressLogger implements ProgressLogger {
117116
if (bar) {
118117
bar.update(0, payload);
119118
} else {
120-
const bar = this.wrapper.create(1, 0, payload);
119+
bar = this.wrapper.create(1, 0, payload);
121120
this.pendingBars.set(prompt, bar);
122121
}
123122
}
124123

124+
evalFinished(prompt: RootPromptDefinition, results: AssessmentResult[]): void {
125+
const bar = this.pendingBars.get(prompt);
126+
this.pendingBars.delete(prompt);
127+
128+
for (const result of results) {
129+
this.completedEvals++;
130+
this.totalScore += (result.score.totalPoints / result.score.maxOverallPoints) * 100;
131+
}
132+
133+
if (this.completedEvals > 0) {
134+
this.totalBar?.increment(1, {
135+
additionalInfo: `, ${Math.round(this.totalScore / this.completedEvals)}% score on average`,
136+
});
137+
} else {
138+
this.totalBar?.increment();
139+
}
140+
141+
// Drop the bar from the screen if it's complete.
142+
if (bar) {
143+
this.wrapper?.remove(bar);
144+
}
145+
}
146+
125147
private getColorFunction(type: ProgressType): (value: string) => string {
126148
switch (type) {
127-
case 'done':
128149
case 'success':
129150
case 'serve-testing':
130151
case 'build':

runner/progress/noop-progress-logger.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,5 @@ export class NoopProgressLogger implements ProgressLogger {
55
initialize(): void {}
66
finalize(): void {}
77
log(): void {}
8+
evalFinished(): void {}
89
}

runner/progress/progress-logger.ts

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,8 @@
11
import {greenCheckmark, redX} from '../reporting/format.js';
2-
import {RootPromptDefinition} from '../shared-interfaces.js';
2+
import {AssessmentResult, RootPromptDefinition} from '../shared-interfaces.js';
33

44
/** Possible progress event types. */
5-
export type ProgressType =
6-
| 'codegen'
7-
| 'build'
8-
| 'serve-testing'
9-
| 'success'
10-
| 'error'
11-
| 'eval'
12-
| 'done';
5+
export type ProgressType = 'codegen' | 'build' | 'serve-testing' | 'success' | 'error' | 'eval';
136

147
/** Maps a ProgressType to an icon that can represent it. */
158
export function progressTypeToIcon(type: ProgressType): string {
@@ -27,8 +20,6 @@ export function progressTypeToIcon(type: ProgressType): string {
2720
return redX();
2821
case 'eval':
2922
return '🔎';
30-
case 'done':
31-
return '🏁';
3223
}
3324
}
3425

@@ -43,6 +34,13 @@ export interface ProgressLogger {
4334
/** Stops the logging process. */
4435
finalize(): void;
4536

37+
/**
38+
* Logs when an individual eval has finished.
39+
* @param prompt Prompt associated with the event.
40+
* @param results Assessment results for the prompt.
41+
*/
42+
evalFinished(prompt: RootPromptDefinition, results: AssessmentResult[]): void;
43+
4644
/**
4745
* Logs a progress event to the logger.
4846
* @param prompt Prompt associated with the event.

runner/progress/text-progress-logger.ts

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,11 @@ export class TextProgressLogger implements ProgressLogger {
1717

1818
log(prompt: RootPromptDefinition, type: ProgressType, message: string, details?: string): void {
1919
const icon = progressTypeToIcon(type);
20-
21-
if (type === 'done') {
22-
// It's handy to know how many apps are done when one completes.
23-
const suffix = `(${++this.done}/${this.total})`;
24-
details = details ? `${details} ${suffix}` : suffix;
25-
}
26-
2720
console.log(`[${prompt.name}] ${icon} ${message} ${details || ''}`.trim());
2821
}
22+
23+
evalFinished(prompt: RootPromptDefinition): void {
24+
// It's handy to know how many apps are done when one completes.
25+
console.log(`[${prompt.name}] 🏁 Done (${++this.done}/${this.total})`.trim());
26+
}
2927
}

runner/run-cli.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,7 @@ async function getPossiblePrompts(environmentDir: string): Promise<string[]> {
178178
class ErrorOnlyProgressLogger implements ProgressLogger {
179179
initialize(): void {}
180180
finalize(): void {}
181+
evalFinished(): void {}
181182

182183
log(_: unknown, type: ProgressType, message: string, details?: string) {
183184
if (type === 'error') {

0 commit comments

Comments
 (0)