@@ -141,7 +141,7 @@ export interface EvaluationRetrieveResponse {
141141 /**
142142 * The parameters used for this evaluation
143143 */
144- parameters ?: unknown ;
144+ parameters ?: { [ key : string ] : unknown } ;
145145
146146 /**
147147 * Results of the evaluation (when completed)
@@ -213,10 +213,7 @@ export namespace EvaluationRetrieveResponse {
213213 }
214214
215215 export interface EvaluationScoreResults {
216- /**
217- * Aggregated score statistics
218- */
219- aggregated_scores ?: unknown ;
216+ aggregated_scores ?: EvaluationScoreResults . AggregatedScores ;
220217
221218 /**
222219 * number of failed samples generated from model
@@ -244,6 +241,16 @@ export namespace EvaluationRetrieveResponse {
244241 result_file_id ?: string ;
245242 }
246243
244+ export namespace EvaluationScoreResults {
245+ export interface AggregatedScores {
246+ mean_score ?: number ;
247+
248+ pass_percentage ?: number ;
249+
250+ std_score ?: number ;
251+ }
252+ }
253+
247254 export interface EvaluationCompareResults {
248255 /**
249256 * Number of times model A won
@@ -348,10 +355,7 @@ export namespace EvaluationGetStatusResponse {
348355 }
349356
350357 export interface EvaluationScoreResults {
351- /**
352- * Aggregated score statistics
353- */
354- aggregated_scores ?: unknown ;
358+ aggregated_scores ?: EvaluationScoreResults . AggregatedScores ;
355359
356360 /**
357361 * number of failed samples generated from model
@@ -379,6 +383,16 @@ export namespace EvaluationGetStatusResponse {
379383 result_file_id ?: string ;
380384 }
381385
386+ export namespace EvaluationScoreResults {
387+ export interface AggregatedScores {
388+ mean_score ?: number ;
389+
390+ pass_percentage ?: number ;
391+
392+ std_score ?: number ;
393+ }
394+ }
395+
382396 export interface EvaluationCompareResults {
383397 /**
384398 * Number of times model A won
@@ -527,10 +541,120 @@ export interface EvaluationUpdateStatusParams {
527541 */
528542 error ?: string ;
529543
530- /**
531- * Job results (required when status is 'completed')
532- */
533- results ?: unknown ;
544+ results ?:
545+ | EvaluationUpdateStatusParams . EvaluationClassifyResults
546+ | EvaluationUpdateStatusParams . EvaluationScoreResults
547+ | EvaluationUpdateStatusParams . EvaluationCompareResults ;
548+ }
549+
550+ export namespace EvaluationUpdateStatusParams {
551+ export interface EvaluationClassifyResults {
552+ /**
553+ * Number of failed generations.
554+ */
555+ generation_fail_count ?: number | null ;
556+
557+ /**
558+ * Number of invalid labels
559+ */
560+ invalid_label_count ?: number | null ;
561+
562+ /**
563+ * Number of failed judge generations
564+ */
565+ judge_fail_count ?: number | null ;
566+
567+ /**
568+ * JSON string representing label counts
569+ */
570+ label_counts ?: string ;
571+
572+ /**
573+ * Pecentage of pass labels.
574+ */
575+ pass_percentage ?: number | null ;
576+
577+ /**
578+ * Data File ID
579+ */
580+ result_file_id ?: string ;
581+ }
582+
583+ export interface EvaluationScoreResults {
584+ aggregated_scores ?: EvaluationScoreResults . AggregatedScores ;
585+
586+ /**
587+ * number of failed samples generated from model
588+ */
589+ failed_samples ?: number ;
590+
591+ /**
592+ * Number of failed generations.
593+ */
594+ generation_fail_count ?: number | null ;
595+
596+ /**
597+ * number of invalid scores generated from model
598+ */
599+ invalid_score_count ?: number ;
600+
601+ /**
602+ * Number of failed judge generations
603+ */
604+ judge_fail_count ?: number | null ;
605+
606+ /**
607+ * Data File ID
608+ */
609+ result_file_id ?: string ;
610+ }
611+
612+ export namespace EvaluationScoreResults {
613+ export interface AggregatedScores {
614+ mean_score ?: number ;
615+
616+ pass_percentage ?: number ;
617+
618+ std_score ?: number ;
619+ }
620+ }
621+
622+ export interface EvaluationCompareResults {
623+ /**
624+ * Number of times model A won
625+ */
626+ A_wins ?: number ;
627+
628+ /**
629+ * Number of times model B won
630+ */
631+ B_wins ?: number ;
632+
633+ /**
634+ * Number of failed generations.
635+ */
636+ generation_fail_count ?: number | null ;
637+
638+ /**
639+ * Number of failed judge generations
640+ */
641+ judge_fail_count ?: number | null ;
642+
643+ /**
644+ * Total number of samples compared
645+ */
646+ num_samples ?: number ;
647+
648+ /**
649+ * Data File ID
650+ */
651+ result_file_id ?: string ;
652+
653+ /**
654+ * Number of ties
655+ */
656+ Ties ?: number ;
657+ }
534658}
535659
536660export declare namespace Evaluation {
0 commit comments