Skip to content

Commit 5d72e06

Browse files
authored
Add slice bm 04 (#3125)
* add slice bm * fix slice bm * fix slice run * add slice insert * fix slice run * add slice bm case threshold * add slice bm case threshold
1 parent c65561b commit 5d72e06

File tree

2 files changed

+82
-48
lines changed

2 files changed

+82
-48
lines changed

framework/slice_benchmark/run.py

Lines changed: 22 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,18 @@
33
# @author Zeref996
44
# encoding=utf-8 vi:ts=4:sw=4:expandtab:ft=python
55
"""
6-
slice 测试启动
6+
slice 测试启动
77
"""
88

99
import os
1010
import json
1111
import shutil
12-
import yaml
1312
from datetime import datetime
13+
import yaml
1414
from time_count_engine import SliceBenchMark
1515
from db.mysql_helper import SliceBenchmarkDB
1616
from db.snapshot import Snapshot
17+
from slicebm_utils.threshold import perf_compare
1718

1819

1920
class SliceTestRun(object):
@@ -102,7 +103,7 @@ def run_test_and_insert_data(self, comment, base):
102103
"update_time": test_complete_time,
103104
}
104105
db.update(table="slice_job", data=update_data, condition=f"id = {task_id}")
105-
return res_dict, fail_cases_list
106+
return res_dict, fail_cases_list, task_id
106107

107108
def get_baseline(self, framework="paddle"):
108109
"""
@@ -130,7 +131,7 @@ def ci_test(self):
130131
"""
131132
ci 测试
132133
"""
133-
latest_res_dict, fail_cases_list = self.run_test_and_insert_data(comment="slice测试CI任务", base=0)
134+
latest_res_dict, fail_cases_list, task_id = self.run_test_and_insert_data(comment="slice测试CI任务", base=0)
134135
baseline_res_dict = self.get_baseline(framework="paddle")
135136
print("开始使用本次CI测试结果, 与paddle基线进行性能对比 =============================>")
136137
perf_compare_res_dict, fail_perf_dict = self.res_dict_compare(baseline_res_dict, latest_res_dict)
@@ -150,50 +151,23 @@ def ci_test(self):
150151
print(f"slice测试失败, 存在功能失败case, 失败case有: {fail_cases_list}")
151152
if len(fail_perf_dict) > 0:
152153
print(f"slice测试失败, 存在性能下降case, 失败case性能变化: {fail_perf_dict}")
154+
155+
with open(self.db_config, encoding="utf-8") as f:
156+
db_config = yaml.load(f, Loader=yaml.FullLoader)
157+
db = SliceBenchmarkDB(**db_config["Config"]["slice_benchmark"]["MYSQL"])
153158
if len(fail_cases_list) + len(fail_perf_dict) > 0:
159+
update_data = {
160+
"result": "失败",
161+
"update_time": self.timestamp(),
162+
}
163+
db.update(table="slice_job", data=update_data, condition=f"id = {task_id}")
154164
raise Exception("slice测试失败")
155-
156-
def perf_grade(self, res):
157-
"""
158-
评分标准
159-
:param res: 性能对比结果
160-
:return:
161-
"""
162-
grade = ""
163-
if isinstance(res, str):
164-
grade = res
165-
else:
166-
if res <= -0.2:
167-
grade = "worse"
168-
elif -0.2 < res <= -0.1:
169-
grade = "doubt"
170-
elif -0.1 < res <= 0.1:
171-
grade = "equal"
172-
elif res > 0.1:
173-
grade = "better"
174-
return grade
175-
176-
def perf_compare(self, baseline, latest):
177-
"""
178-
比较函数
179-
:param latest: 待测值
180-
:param baseline: 基线值
181-
:return: 比例值
182-
"""
183-
if isinstance(baseline, str) or isinstance(baseline, str):
184-
res = "error"
185-
return res
186165
else:
187-
if baseline == 0 or latest == 0:
188-
res = 0
189-
else:
190-
if latest > baseline:
191-
res = (latest - baseline) / baseline * -1
192-
else:
193-
res = (baseline - latest) / latest
194-
grade = self.perf_grade(res)
195-
return res, grade
196-
# return "{:.2f}%".format(res * 100)
166+
update_data = {
167+
"result": "成功",
168+
"update_time": self.timestamp(),
169+
}
170+
db.update(table="slice_job", data=update_data, condition=f"id = {task_id}")
197171

198172
def res_dict_compare(self, baseline_res_dict, latest_res_dict):
199173
"""
@@ -203,7 +177,7 @@ def res_dict_compare(self, baseline_res_dict, latest_res_dict):
203177
perf_compare_res_dict = {}
204178
for case_name, perf_value in latest_res_dict.items():
205179
if case_name in baseline_res_dict:
206-
perf_compare_res, grade = self.perf_compare(baseline_res_dict[case_name], perf_value)
180+
perf_compare_res, grade = perf_compare(baseline_res_dict[case_name], perf_value, case_name)
207181
if grade == "worse" or grade == "doubt":
208182
fail_perf_dict[case_name] = perf_compare_res
209183
perf_compare_res_dict[case_name] = perf_compare_res
@@ -215,7 +189,7 @@ def res_dict_compare(self, baseline_res_dict, latest_res_dict):
215189
print(f"{case_name}: 基线数据不存在, 本次测试数据{perf_value}, 无对比值")
216190

217191
return perf_compare_res_dict, fail_perf_dict
218-
192+
219193
def torch_res_dict_compare(self, torch_res_dict, latest_res_dict):
220194
"""
221195
性能字典数据对比
@@ -225,7 +199,7 @@ def torch_res_dict_compare(self, torch_res_dict, latest_res_dict):
225199
for case_name_origin, perf_value in latest_res_dict.items():
226200
case_name = case_name_origin.replace("paddle", "torch")
227201
if case_name in torch_res_dict:
228-
perf_compare_res, grade = self.perf_compare(torch_res_dict[case_name], perf_value)
202+
perf_compare_res, grade = perf_compare(torch_res_dict[case_name], perf_value, case_name)
229203
if grade == "worse" or grade == "doubt":
230204
fail_perf_dict[case_name] = perf_compare_res
231205
perf_compare_res_dict[case_name] = perf_compare_res
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
#!/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
# @author Zeref996
4+
# encoding=utf-8 vi:ts=4:sw=4:expandtab:ft=python
5+
"""
6+
case性能阈值
7+
"""
8+
9+
threshold_map = {
10+
'Getitem - forward - Scalar - Integer - float16 - paddle' : [-0.25, -0.15, 0.15],
11+
'Getitem - forward - Scalar - Tuple of Integers - float16 - paddle': [-0.25, -0.15, 0.15],
12+
'basic_threshold': [-0.2, -0.1, 0.1],
13+
}
14+
15+
def perf_grade(res, threshold):
16+
"""
17+
评分标准
18+
:param res: 性能对比结果
19+
:return:
20+
"""
21+
grade = ""
22+
if isinstance(res, str):
23+
grade = res
24+
else:
25+
if res <= threshold[0]:
26+
grade = "worse"
27+
elif threshold[0] < res <= threshold[1]:
28+
grade = "doubt"
29+
elif threshold[1] < res <= threshold[2]:
30+
grade = "equal"
31+
elif res > threshold[2]:
32+
grade = "better"
33+
return grade
34+
35+
36+
def perf_compare(baseline, latest, case_name):
37+
"""
38+
比较函数
39+
:param latest: 待测值
40+
:param baseline: 基线值
41+
:return: 比例值
42+
"""
43+
if case_name in threshold_map:
44+
threshold = threshold_map[case_name]
45+
else:
46+
threshold = threshold_map['basic_threshold']
47+
48+
if isinstance(baseline, str) or isinstance(baseline, str):
49+
res = "error"
50+
return res
51+
else:
52+
if baseline == 0 or latest == 0:
53+
res = 0
54+
else:
55+
if latest > baseline:
56+
res = (latest - baseline) / baseline * -1
57+
else:
58+
res = (baseline - latest) / latest
59+
grade = perf_grade(res, threshold)
60+
return res, grade

0 commit comments

Comments
 (0)