16
16
"""Script for running a benchmark to pick a hashing algorithm."""
17
17
18
18
import argparse
19
- import pathlib
20
19
import timeit
20
+ from typing import Final
21
21
22
+ import numpy as np
22
23
import serialize
23
24
24
25
26
+ KB : Final [int ] = 1024
27
+ MB : Final [int ] = 1024 * KB
28
+ GB : Final [int ] = 1024 * MB
29
+
30
+
25
31
def build_parser () -> argparse .ArgumentParser :
26
32
"""Builds the command line parser for the hash experiment."""
27
33
parser = argparse .ArgumentParser (
28
34
description = "hash algorithm benchmark data for model signing"
29
35
)
30
- parser .add_argument ("path" , help = "path to model" , type = pathlib .Path )
31
36
32
37
parser .add_argument (
33
38
"--repeat" ,
@@ -44,28 +49,51 @@ def build_parser() -> argparse.ArgumentParser:
44
49
default = ["sha256" , "blake2" ],
45
50
)
46
51
52
+ parser .add_argument (
53
+ "--data-sizes" ,
54
+ help = "hash methods to benchmark" ,
55
+ nargs = "+" ,
56
+ type = int ,
57
+ default = [KB , MB , 512 * MB , GB , 4 * GB , 16 * GB , 32 * GB ],
58
+ )
59
+
47
60
return parser
48
61
49
62
63
+ def _human_size (size : int ) -> str :
64
+ if size >= GB :
65
+ return str (size / GB ) + " GB"
66
+ elif size >= MB :
67
+ return str (size / MB ) + " MB"
68
+ elif size >= KB :
69
+ return str (size / KB ) + " KB"
70
+ return str (size ) + " B"
71
+
72
+
73
+ def _generate_data (size : int ) -> bytes :
74
+ if size < 0 :
75
+ raise ValueError ("Cannot generate negative bytes" )
76
+ return np .random .randint (0 , 256 , size , dtype = np .uint8 ).tobytes ()
77
+
78
+
50
79
if __name__ == "__main__" :
51
- hash_args = build_parser ().parse_args ()
52
- bench_parser = serialize .build_parser ()
53
- for algorithm in hash_args .methods :
54
- args = bench_parser .parse_args (
55
- [
56
- str (hash_args .path ),
57
- "--skip_manifest" ,
58
- "--hash_method" ,
59
- algorithm ,
60
- "--merge_hasher" ,
61
- algorithm ,
62
- ]
63
- )
64
- times = timeit .repeat (
65
- lambda args = args : serialize .run (args ),
66
- number = 1 ,
67
- repeat = hash_args .repeat ,
68
- )
69
- # Grab the min time, as suggested by the docs
70
- # https://docs.python.org/3/library/timeit.html#timeit.Timer.repeat
71
- print (f"algorithm: { algorithm } , best time: { min (times )} s" )
80
+ np .random .seed (42 )
81
+ args = build_parser ().parse_args ()
82
+ data = _generate_data (max (args .data_sizes ))
83
+ for size in args .data_sizes :
84
+ for algorithm in args .methods :
85
+ hasher = serialize .get_hash_engine_factory (algorithm )()
86
+
87
+ def hash (hasher = hasher , size = size ):
88
+ hasher .update (data [:size ])
89
+ return hasher .compute ()
90
+
91
+ times = timeit .repeat (lambda : hash (), number = 1 , repeat = args .repeat )
92
+
93
+ # Grab the min time, as suggested by the docs
94
+ # https://docs.python.org/3/library/timeit.html#timeit.Timer.repeat
95
+ print (
96
+ f"algorithm: { algorithm } , "
97
+ f"size: { _human_size (size )} , "
98
+ f"best time: { min (times )} s"
99
+ )
0 commit comments