11import numpy as np
22import numpy .testing as npt
3- from bulk_data import BulkData
43import logging
54import time
65import random
6+ import unittest
7+ from bulk_data import BulkData
78
89logger = logging .getLogger (__name__ )
910slog = logging .LoggerAdapter (logger , {
1516})
1617
1718
18- def test_bulk_data_test_vector ():
19- data = np .array ([
20- bytes ([0x01 , 0x02 , 0x03 ]),
21- bytes ([0x04 , 0x05 , 0x06 ]),
22- bytes ([0x07 ] * 10 )
23- ])
24- bulk_data = BulkData (data )
25- serialized = bulk_data .serialize ()
26- assert list (serialized ) == [
27- 0x87 , 0x87 , 0x03 , 0x03 , 0x01 , 0x02 , 0x03 , 0x03 , 0x04 , 0x05 , 0x06 , 0x0A , 0x07 , 0x07 ,
28- 0x07 , 0x07 , 0x07 , 0x07 , 0x07 , 0x07 , 0x07 , 0x07
29- ]
30- deserialized = BulkData .deserialize (serialized )
31- npt .assert_array_equal (data , deserialized .data )
32-
33-
3419def benchmark_bulk_data (bulk_data ) -> BulkData :
3520 t_start = time .perf_counter ()
3621 serialized = bulk_data .serialize ()
@@ -48,23 +33,76 @@ def random_bytes() -> bytes:
4833 return bytes (random .getrandbits (8 ) for _ in range (64 ))
4934
5035
51- def test_bulk_data_benchmark ():
52- num_samples = 5000000
53- slog .info (f"Testing performance with bulk data of { num_samples } samples" )
54- t_start = time .perf_counter ()
55- data = np .array ([
56- random .randbytes (64 ) for _ in range (num_samples )
57- ], dtype = np .object_ )
58- # check all samples have 64 bytes
59- for item in data :
60- assert len (item ) == 64
61- bulk_data = BulkData (data )
62- t_generate = time .perf_counter () - t_start
63- slog .info (f"Generate: { t_generate } s" )
36+ class TestBulkDataDeserialize (unittest .TestCase ):
37+ def test_valid_serialization_one_item (self ):
38+ serialized = b'\x87 \x87 \x01 \x03 abc'
39+ expected = BulkData ([b'abc' ])
40+ self .assertEqual (BulkData .deserialize (serialized ), expected )
41+
42+ def test_valid_serialization_multiple_items (self ):
43+ serialized = b'\x87 \x87 \x02 \x03 abc\x03 def'
44+ expected = BulkData ([b'abc' , b'def' ])
45+ self .assertEqual (BulkData .deserialize (serialized ), expected )
46+
47+ def test_invalid_serialization_incorrect_header (self ):
48+ serialized = b'\x88 \x87 \x01 \x03 abc'
49+ with self .assertRaises (AssertionError ):
50+ BulkData .deserialize (serialized )
51+
52+ def test_invalid_serialization_incorrect_item_length (self ):
53+ serialized = b'\x87 \x87 \x01 \x04 abc'
54+ with self .assertRaises (AssertionError ):
55+ BulkData .deserialize (serialized )
56+
57+ def test_invalid_serialization_truncated_data (self ):
58+ serialized = b'\x87 \x87 \x01 \x03 ab'
59+ with self .assertRaises (AssertionError ):
60+ BulkData .deserialize (serialized )
61+
62+ def test_invalid_serialization_empty_data (self ):
63+ serialized = b''
64+ with self .assertRaises (AssertionError ):
65+ BulkData .deserialize (serialized )
66+
67+ def test_bulk_data_test_vector (self ):
68+ data = np .array ([
69+ bytes ([0x01 , 0x02 , 0x03 ]),
70+ bytes ([0x04 , 0x05 , 0x06 ]),
71+ bytes ([0x07 ] * 10 )
72+ ])
73+ bulk_data = BulkData (data .tolist ())
74+ serialized = bulk_data .serialize ()
75+ assert list (serialized ) == [
76+ 0x87 , 0x87 , 0x03 , 0x03 , 0x01 , 0x02 , 0x03 , 0x03 , 0x04 , 0x05 , 0x06 , 0x0A , 0x07 , 0x07 ,
77+ 0x07 , 0x07 , 0x07 , 0x07 , 0x07 , 0x07 , 0x07 , 0x07
78+ ]
79+ deserialized = BulkData .deserialize (serialized )
80+ npt .assert_array_equal (data , deserialized .data )
81+
82+ def test_bulk_data_benchmark (self ):
83+ num_samples = 1000000
84+ slog .info (f"Testing performance with bulk data of { num_samples } samples" )
85+ t_start = time .perf_counter ()
86+ data = np .array ([
87+ random .randbytes (64 ) for _ in range (num_samples )
88+ ], dtype = np .object_ )
89+ # check all samples have 64 bytes
90+ for item in data :
91+ assert len (item ) == 64
92+ bulk_data = BulkData (data .tolist ())
93+ t_generate = time .perf_counter () - t_start
94+ slog .info (f"Generate: { t_generate } s" )
95+ # serialize+deserialize
96+ t_start = time .perf_counter ()
97+ recovered = benchmark_bulk_data (bulk_data )
98+ t_all = time .perf_counter () - t_start
99+ slog .info (f"serialize+deserialize: { t_all } s, i.e. { t_all / num_samples * 1000000 :.6f} µs per item" )
100+ self .assertEqual (len (bulk_data .data ), len (recovered .data ))
101+ # sample 100 random data from both arrays and check they are equal
102+ for _ in range (100 ):
103+ i = random .randint (0 , len (bulk_data .data ) - 1 )
104+ assert np .array_equal (bulk_data .data [i ], recovered .data [i ])
64105
65- t_start = time .perf_counter ()
66- recovered = benchmark_bulk_data (bulk_data )
67- t_all = time .perf_counter () - t_start
68- slog .info (f"serialize+deserialize: { t_all } s, i.e. { t_all / num_samples * 1000000 :.6f} µs per item" )
69106
70- assert np .array_equal (bulk_data .data , recovered .data )
107+ if __name__ == '__main__' :
108+ unittest .main ()
0 commit comments