33# @author Zeref996
44# encoding=utf-8 vi:ts=4:sw=4:expandtab:ft=python
55"""
6- slice 测试启动
6+ slice 测试启动
77"""
88
99import os
1010import json
1111import shutil
12- import yaml
1312from datetime import datetime
13+ import yaml
1414from time_count_engine import SliceBenchMark
1515from db .mysql_helper import SliceBenchmarkDB
1616from db .snapshot import Snapshot
17+ from slicebm_utils .threshold import perf_compare
1718
1819
1920class SliceTestRun (object ):
@@ -102,7 +103,7 @@ def run_test_and_insert_data(self, comment, base):
102103 "update_time" : test_complete_time ,
103104 }
104105 db .update (table = "slice_job" , data = update_data , condition = f"id = { task_id } " )
105- return res_dict , fail_cases_list
106+ return res_dict , fail_cases_list , task_id
106107
107108 def get_baseline (self , framework = "paddle" ):
108109 """
@@ -130,7 +131,7 @@ def ci_test(self):
130131 """
131132 ci 测试
132133 """
133- latest_res_dict , fail_cases_list = self .run_test_and_insert_data (comment = "slice测试CI任务" , base = 0 )
134+ latest_res_dict , fail_cases_list , task_id = self .run_test_and_insert_data (comment = "slice测试CI任务" , base = 0 )
134135 baseline_res_dict = self .get_baseline (framework = "paddle" )
135136 print ("开始使用本次CI测试结果, 与paddle基线进行性能对比 =============================>" )
136137 perf_compare_res_dict , fail_perf_dict = self .res_dict_compare (baseline_res_dict , latest_res_dict )
@@ -150,50 +151,23 @@ def ci_test(self):
150151 print (f"slice测试失败, 存在功能失败case, 失败case有: { fail_cases_list } " )
151152 if len (fail_perf_dict ) > 0 :
152153 print (f"slice测试失败, 存在性能下降case, 失败case性能变化: { fail_perf_dict } " )
154+
155+ with open (self .db_config , encoding = "utf-8" ) as f :
156+ db_config = yaml .load (f , Loader = yaml .FullLoader )
157+ db = SliceBenchmarkDB (** db_config ["Config" ]["slice_benchmark" ]["MYSQL" ])
153158 if len (fail_cases_list ) + len (fail_perf_dict ) > 0 :
159+ update_data = {
160+ "result" : "失败" ,
161+ "update_time" : self .timestamp (),
162+ }
163+ db .update (table = "slice_job" , data = update_data , condition = f"id = { task_id } " )
154164 raise Exception ("slice测试失败" )
155-
156- def perf_grade (self , res ):
157- """
158- 评分标准
159- :param res: 性能对比结果
160- :return:
161- """
162- grade = ""
163- if isinstance (res , str ):
164- grade = res
165- else :
166- if res <= - 0.2 :
167- grade = "worse"
168- elif - 0.2 < res <= - 0.1 :
169- grade = "doubt"
170- elif - 0.1 < res <= 0.1 :
171- grade = "equal"
172- elif res > 0.1 :
173- grade = "better"
174- return grade
175-
176- def perf_compare (self , baseline , latest ):
177- """
178- 比较函数
179- :param latest: 待测值
180- :param baseline: 基线值
181- :return: 比例值
182- """
183- if isinstance (baseline , str ) or isinstance (baseline , str ):
184- res = "error"
185- return res
186165 else :
187- if baseline == 0 or latest == 0 :
188- res = 0
189- else :
190- if latest > baseline :
191- res = (latest - baseline ) / baseline * - 1
192- else :
193- res = (baseline - latest ) / latest
194- grade = self .perf_grade (res )
195- return res , grade
196- # return "{:.2f}%".format(res * 100)
166+ update_data = {
167+ "result" : "成功" ,
168+ "update_time" : self .timestamp (),
169+ }
170+ db .update (table = "slice_job" , data = update_data , condition = f"id = { task_id } " )
197171
198172 def res_dict_compare (self , baseline_res_dict , latest_res_dict ):
199173 """
@@ -203,7 +177,7 @@ def res_dict_compare(self, baseline_res_dict, latest_res_dict):
203177 perf_compare_res_dict = {}
204178 for case_name , perf_value in latest_res_dict .items ():
205179 if case_name in baseline_res_dict :
206- perf_compare_res , grade = self . perf_compare (baseline_res_dict [case_name ], perf_value )
180+ perf_compare_res , grade = perf_compare (baseline_res_dict [case_name ], perf_value , case_name )
207181 if grade == "worse" or grade == "doubt" :
208182 fail_perf_dict [case_name ] = perf_compare_res
209183 perf_compare_res_dict [case_name ] = perf_compare_res
@@ -215,7 +189,7 @@ def res_dict_compare(self, baseline_res_dict, latest_res_dict):
215189 print (f"{ case_name } : 基线数据不存在, 本次测试数据{ perf_value } , 无对比值" )
216190
217191 return perf_compare_res_dict , fail_perf_dict
218-
192+
219193 def torch_res_dict_compare (self , torch_res_dict , latest_res_dict ):
220194 """
221195 性能字典数据对比
@@ -225,7 +199,7 @@ def torch_res_dict_compare(self, torch_res_dict, latest_res_dict):
225199 for case_name_origin , perf_value in latest_res_dict .items ():
226200 case_name = case_name_origin .replace ("paddle" , "torch" )
227201 if case_name in torch_res_dict :
228- perf_compare_res , grade = self . perf_compare (torch_res_dict [case_name ], perf_value )
202+ perf_compare_res , grade = perf_compare (torch_res_dict [case_name ], perf_value , case_name )
229203 if grade == "worse" or grade == "doubt" :
230204 fail_perf_dict [case_name ] = perf_compare_res
231205 perf_compare_res_dict [case_name ] = perf_compare_res
0 commit comments