Skip to content

Commit 6085a74

Browse files
committed
[Benchmark] Include exposed memory variants in index scan experiments
1 parent a05743c commit 6085a74

File tree

2 files changed

+68
-60
lines changed

2 files changed

+68
-60
lines changed

benchmark/phys-cost-models/gen.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -136,33 +136,33 @@
136136
],
137137

138138
'Selectivity-i32-simple': [
139-
( 'id', 'i32', ['NOT NULL'], {'num_tuples': 1_000_000, 'num_distinct_values': 1_000_000} ),
140-
( 'i32', 'i32', ['NOT NULL'], {'num_tuples': 1_000_000,
141-
'num_distinct_values': 1_000_000,
139+
( 'id', 'i32', ['NOT NULL'], {'num_tuples': 10_000_000} ),
140+
( 'i32', 'i32', ['NOT NULL'], {'num_tuples': 10_000_000,
141+
'num_distinct_values': 10_000_000,
142142
'min_value': TYPE_TO_DOMAIN['i32'][0] // 2,
143143
'max_value': TYPE_TO_DOMAIN['i32'][1] // 2
144144
}
145145
),
146146
],
147147

148148
'Selectivity-i64-simple': [
149-
( 'id', 'i32', ['NOT NULL'], {'num_tuples': 1_000_000, 'num_distinct_values': 1_000_000} ),
150-
( 'i64', 'i64', ['NOT NULL'], {'num_tuples': 1_000_000,
151-
'num_distinct_values': 1_000_000,
149+
( 'id', 'i32', ['NOT NULL'], {'num_tuples': 10_000_000} ),
150+
( 'i64', 'i64', ['NOT NULL'], {'num_tuples': 10_000_000,
151+
'num_distinct_values': 10_000_000,
152152
'min_value': TYPE_TO_DOMAIN['i64'][0] // 2,
153153
'max_value': TYPE_TO_DOMAIN['i64'][1] // 2
154154
}
155155
),
156156
],
157157

158158
'Selectivity-f-simple': [
159-
( 'id', 'i32', ['NOT NULL'], {'num_tuples': 1_000_000, 'num_distinct_values': 1_000_000} ),
160-
( 'f', 'f', ['NOT NULL'], {'num_tuples': 1_000_000, 'num_distinct_values': 1_000_000} ),
159+
( 'id', 'i32', ['NOT NULL'], {'num_tuples': 10_000_000} ),
160+
( 'f', 'f', ['NOT NULL'], {'num_tuples': 10_000_000, 'num_distinct_values': 10_000_000} ),
161161
],
162162

163163
'Selectivity-d-simple': [
164-
( 'id', 'i32', ['NOT NULL'], {'num_tuples': 1_000_000, 'num_distinct_values': 1_000_000} ),
165-
( 'd', 'd', ['NOT NULL'], {'num_tuples': 1_000_000, 'num_distinct_values': 1_000_000} ),
164+
( 'id', 'i32', ['NOT NULL'], {'num_tuples': 10_000_000} ),
165+
( 'd', 'd', ['NOT NULL'], {'num_tuples': 10_000_000, 'num_distinct_values': 10_000_000} ),
166166
],
167167

168168
'Distinct_multi_i32': [

benchmark/phys-cost-models/index-scan/benchmark-gen.py

Lines changed: 58 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -46,33 +46,33 @@
4646
('size_machindecode', '\'^Machine code size:/*\'', 'Machine code size'),
4747
]
4848

49-
MATERIALIZATIONS: list[tuple[str, str, str]] = [
50-
# ('wo_materialization', '', 'without materialization'),
51-
('w_materialization', '--soft-pipeline-breaker AfterFilter,AfterIndexScan', 'with materialization'),
49+
MATERIALIZATIONS: list[tuple[bool, str, str, str]] = [
50+
# (False, 'wo_materialization', '', 'without materialization'),
51+
(True, 'w_materialization', '--soft-pipeline-breaker AfterFilter,AfterIndexScan', 'with materialization'),
5252
]
53-
CACHING_CONFIGURATIONS: list[tuple[str, str, str]] = [
54-
('wo_caching', '--no-wasm-compilation-cache', 'without code caching'),
55-
('w_caching', '' , 'with code caching'),
53+
CACHING_CONFIGURATIONS: list[tuple[bool, str, str, str]] = [
54+
(False, 'wo_caching', '--no-wasm-compilation-cache', 'without code caching'),
55+
(True, 'w_caching', '' , 'with code caching'),
5656
]
57-
SCAN_CONFIGURATIONS: list[tuple[str, str]] = [
58-
('table scan + filter, branching', '--scan-implementations Scan --filter-selection-strategy Branching'),
59-
('table scan + filter, predicated', '--scan-implementations Scan --filter-selection-strategy Predicated'),
57+
SCAN_CONFIGURATIONS: list[tuple[str, str, str]] = [
58+
('table_scan+filter', 'branching', '--scan-implementations Scan --filter-selection-strategy Branching'),
59+
('table_scan+filter', 'predicated', '--scan-implementations Scan --filter-selection-strategy Predicated'),
6060
]
61-
INDEX_CONFIGURATIONS: list[tuple[str, str]] = [
62-
('index scan (compiled), inline/hostcall', '--scan-implementations IndexScan --index-scan-strategy Compilation --index-scan-materialization-strategy Inline --index-scan-compilation-strategy Callback'),
63-
('index scan (compiled), memory/hostcall', '--scan-implementations IndexScan --index-scan-strategy Compilation --index-scan-materialization-strategy Memory --index-scan-compilation-strategy Callback'),
64-
# ('index scan (compiled), inline/exposed memory', '--scan-implementations IndexScan --index-scan-strategy Compilation --index-scan-materialization-strategy Inline --index-scan-compilation-strategy ExposedMemory'),
65-
# ('index scan (compiled), memory/exposed memory', '--scan-implementations IndexScan --index-scan-strategy Compilation --index-scan-materialization-strategy Memory --index-scan-compilation-strategy ExposedMemory'),
66-
('index scan (hybrid), inline/hostcall', '--scan-implementations IndexScan --index-scan-strategy Hybrid --index-scan-materialization-strategy Inline --index-scan-compilation-strategy Callback'),
67-
('index scan (hybrid), memory/hostcall', '--scan-implementations IndexScan --index-scan-strategy Hybrid --index-scan-materialization-strategy Memory --index-scan-compilation-strategy Callback'),
68-
# ('index scan (hybrid), inline/exposed memory', '--scan-implementations IndexScan --index-scan-strategy Hybrid --index-scan-materialization-strategy Inline --index-scan-compilation-strategy ExposedMemory'),
69-
# ('index scan (hybrid), memory/exposed memory', '--scan-implementations IndexScan --index-scan-strategy Hybrid --index-scan-materialization-strategy Memory --index-scan-compilation-strategy ExposedMemory'),
70-
('index scan (interpreted), inline', '--scan-implementations IndexScan --index-scan-strategy Interpretation --index-scan-materialization-strategy Inline'),
71-
('index scan (interpreted), memory', '--scan-implementations IndexScan --index-scan-strategy Interpretation --index-scan-materialization-strategy Memory'),
61+
INDEX_CONFIGURATIONS: list[tuple[str, str, str, str, str]] = [
62+
('index_scan', 'compiled', 'host_call', 'inline', '--scan-implementations IndexScan --index-scan-strategy Compilation --index-scan-materialization-strategy Inline --index-scan-compilation-strategy Callback'),
63+
('index_scan', 'compiled', 'host_call', 'memory', '--scan-implementations IndexScan --index-scan-strategy Compilation --index-scan-materialization-strategy Memory --index-scan-compilation-strategy Callback'),
64+
('index_scan', 'compiled', 'exposed_memory', 'inline', '--scan-implementations IndexScan --index-scan-strategy Compilation --index-scan-materialization-strategy Inline --index-scan-compilation-strategy ExposedMemory'),
65+
('index_scan', 'compiled', 'exposed_memory', 'memory', '--scan-implementations IndexScan --index-scan-strategy Compilation --index-scan-materialization-strategy Memory --index-scan-compilation-strategy ExposedMemory'),
66+
('index_scan', 'hybrid', 'host_call', 'inline', '--scan-implementations IndexScan --index-scan-strategy Hybrid --index-scan-materialization-strategy Inline --index-scan-compilation-strategy Callback'),
67+
('index_scan', 'hybrid', 'host_call', 'memory', '--scan-implementations IndexScan --index-scan-strategy Hybrid --index-scan-materialization-strategy Memory --index-scan-compilation-strategy Callback'),
68+
('index_scan', 'hybrid', 'exposed_memory', 'inline', '--scan-implementations IndexScan --index-scan-strategy Hybrid --index-scan-materialization-strategy Inline --index-scan-compilation-strategy ExposedMemory'),
69+
('index_scan', 'hybrid', 'exposed_memory', 'memory', '--scan-implementations IndexScan --index-scan-strategy Hybrid --index-scan-materialization-strategy Memory --index-scan-compilation-strategy ExposedMemory'),
70+
('index_scan', 'interpreted', 'N/A', 'inline', '--scan-implementations IndexScan --index-scan-strategy Interpretation --index-scan-materialization-strategy Inline'),
71+
('index_scan', 'interpreted', 'N/A', 'memory', '--scan-implementations IndexScan --index-scan-strategy Interpretation --index-scan-materialization-strategy Memory'),
7272
]
73-
METHODS: list[tuple[str, str]] = [
73+
INDEXES: list[tuple[str, str]] = [
7474
('array', '--index-implementations Array'),
75-
('rmi', '--index-implementations Rmi'),
75+
# ('rmi', '--index-implementations Rmi'),
7676
]
7777
LAYOUTS: list[tuple[str, str, str]] = [
7878
('row', '--data-layout Row', 'Row layout'),
@@ -87,18 +87,26 @@
8787
]
8888

8989
SELECTIVITIES: list[float] = [
90-
10**0, 0.5 * 10**0,
91-
10**-1, 0.5 * 10**-1,
92-
10**-2, 0.5 * 10**-2,
93-
10**-3, 0.5 * 10**-3,
94-
10**-4, 0.5 * 10**-4,
95-
10**-5, 0.5 * 10**-5,
96-
10**-6
90+
10**0,
91+
0.5 * 10**0,
92+
10**-1,
93+
0.5 * 10**-1,
94+
10**-2,
95+
0.5 * 10**-2,
96+
10**-3,
97+
0.5 * 10**-3,
98+
10**-4,
99+
0.5 * 10**-4,
100+
10**-5,
101+
0.5 * 10**-5,
102+
10**-6,
103+
0.5 * 10**-6,
104+
10**-7,
97105
]
98106

99107
BATCH_SIZE_SELECTIVITY: float = 0.1
100108
BATCH_SIZE_ARG: str = '--index-sequential-scan-batch-size'
101-
BATCH_SIZES: list[int] = [10**0, 10**1, 10**2, 10**3, 10**4, 10**5, 10**6]
109+
BATCH_SIZES: list[int] = [10**0, 10**1, 10**2, 10**3, 10**4, 10**5, 10**6, 10**7]
102110

103111
MUTABLE_ARGS = '--backend WasmV8 --no-simd --statistics'
104112

@@ -217,11 +225,11 @@ def generate_point_query_str(table: str, column: str, val) -> str:
217225
# Generates the indexes section of the benchmark file.
218226
def generate_indexes_str(column: str) -> str:
219227
indexes_str: str = ' indexes:\n'
220-
for method, _ in METHODS:
228+
for index, _ in INDEXES:
221229
indexes_str += (
222-
f' \'{column}_{method}_idx\':\n'
230+
f' \'{column}_{index}_idx\':\n'
223231
f' attributes: \'{column}\'\n'
224-
f' method: \'{method}\'\n'
232+
f' method: \'{index}\'\n'
225233
)
226234
return indexes_str
227235

@@ -265,9 +273,9 @@ def generate_pattern_str(pattern: tuple[str, str, str] | None = None) -> str:
265273
# Generates the configurations section concerned with scans of the benchmark file.
266274
def generate_scan_configurations_str(pattern: tuple[str, str, str] | None = None) -> str:
267275
config_str: str = ''
268-
for config, config_args in SCAN_CONFIGURATIONS:
276+
for operators, strategy, config_args in SCAN_CONFIGURATIONS:
269277
config_str += (
270-
f' \'{config}, 0, no index\':\n' # scan config, index, batch size, pattern
278+
f' \'operators={operators};strategy={strategy};index_access=N/A;materialization=N/A;batch_size=N/A;index_type=N/A\':\n' # scan config, index, batch size, pattern
271279
f' args: {config_args}\n'
272280
f'{generate_pattern_str(pattern)}\n'
273281
)
@@ -277,19 +285,19 @@ def generate_scan_configurations_str(pattern: tuple[str, str, str] | None = None
277285
# Generates the configurations section concerned with indexes of the benchmark file.
278286
def generate_index_configurations_str(pattern: tuple[str, str, str] | None = None) -> str:
279287
config_str: str = ''
280-
for config, config_args in INDEX_CONFIGURATIONS:
281-
for method, method_args in METHODS:
282-
if 'hostcall' in config:
288+
for operators, strategy, index_access, materialization, config_args in INDEX_CONFIGURATIONS:
289+
for index_type, index_args in INDEXES:
290+
if index_access == 'host_call':
283291
for batch_size in BATCH_SIZES:
284292
config_str += (
285-
f' \'{config}, {batch_size}, {method}\':\n' # scan config, index, batch size, pattern
286-
f' args: {config_args} {method_args} {BATCH_SIZE_ARG} {batch_size}\n'
293+
f' \'operators={operators};strategy={strategy};index_access={index_access};materialization={materialization};batch_size={batch_size};index_type={index_type}\':\n' # scan config, index, batch size, pattern
294+
f' args: {config_args} {index_args} {BATCH_SIZE_ARG} {batch_size}\n'
287295
f'{generate_pattern_str(pattern)}\n'
288296
)
289297
else:
290298
config_str += (
291-
f' \'{config}, 0, {method}\':\n' # scan config, index, batch size, pattern
292-
f' args: {config_args} {method_args}\n'
299+
f' \'operators={operators};strategy={strategy};index_access={index_access};materialization={materialization};batch_size=N/A;index_type={index_type}\':\n' # scan config, index, batch size, pattern
300+
f' args: {config_args} {index_args}\n'
293301
f'{generate_pattern_str(pattern)}\n'
294302
)
295303
return config_str
@@ -339,13 +347,13 @@ def generate_performance_benchmark(data: pd.core.frame.DataFrame, table: str, co
339347
if verbose:
340348
print(f' + Generated case with selectivity {format_float(actual/n_rows)}.')
341349

342-
for materialization_abbrev, materialization_args, materialization_description in MATERIALIZATIONS: # Materialization
350+
for results_materialized, materialization_abbrev, materialization_args, materialization_description in MATERIALIZATIONS: # Materialization
343351
for layout_abbrev, layout_args, layout_description in LAYOUTS:
344352
blueprint: str = (
345353
f'description: Comparing table scan and index scan on {"ordered" if is_ordered else "unordered"} {get_type_str_from_column(column)} data in {layout_description} {materialization_description} for varying selectivities\n'
346354
f'suite: phys-cost-models\n'
347-
f'benchmark: index-scan\n'
348-
f'name: {get_type_str_from_column(column)},{"ordered" if is_ordered else "unordered"},{layout_abbrev},{materialization_abbrev}\n'
355+
f'benchmark: index-scan;performance\n'
356+
f'name: data_type={get_type_str_from_column(column)};data_layout={layout_abbrev};ordered={is_ordered};results_materialized={results_materialized}\n'
349357
f'readonly: true\n'
350358
f'{generate_data_str(table, data.columns, datafile, column)}'
351359
f'systems:\n'
@@ -398,14 +406,14 @@ def generate_caching_benchmark(data: pd.core.frame.DataFrame, table: str, column
398406
if verbose:
399407
print(f' + Generated case with selectivity {format_float(actual/n_rows)}.')
400408

401-
for materialization_abbrev, materialization_args, materialization_description in MATERIALIZATIONS: # Materialization
409+
for results_materialized, materialization_abbrev, materialization_args, materialization_description in MATERIALIZATIONS: # Materialization
402410
for layout_abbrev, layout_args, layout_description in LAYOUTS:
403-
for caching_abbrev, caching_args, caching_description in CACHING_CONFIGURATIONS:
411+
for plan_caching, caching_abbrev, caching_args, caching_description in CACHING_CONFIGURATIONS:
404412
blueprint: str = (
405413
f'description: Comparing table scan and index scan on {"ordered" if is_ordered else "unordered"} {get_type_str_from_column(column)} data in {layout_description} {materialization_description} and {caching_description} for varying selectivities\n'
406414
f'suite: phys-cost-models\n'
407-
f'benchmark: index-scan-caching\n'
408-
f'name: {get_type_str_from_column(column)},{"ordered" if is_ordered else "unordered"},{layout_abbrev},{materialization_abbrev},{caching_abbrev}\n'
415+
f'benchmark: index-scan;caching\n'
416+
f'name: data_type={get_type_str_from_column(column)};data_layout={layout_abbrev};ordered={is_ordered};results_materialized={results_materialized};plan_caching={plan_caching}\n'
409417
f'readonly: true\n'
410418
f'{generate_data_str(table, data.columns, datafile, column)}'
411419
f'systems:\n'
@@ -451,4 +459,4 @@ def generate_caching_benchmark(data: pd.core.frame.DataFrame, table: str, column
451459
generate_performance_benchmark(df, table_name, column_name, SELECTIVITIES, csv_file, outdir, is_ordered=is_ordered, verbose=args.verbose)
452460

453461
outdir = os.path.join(OUTPUT_DIR, 'caching')
454-
generate_caching_benchmark(df, table_name, column_name, 5, csv_file, outdir, is_ordered=is_ordered, verbose=args.verbose)
462+
generate_caching_benchmark(df, table_name, column_name, 5, csv_file, outdir, is_ordered=is_ordered, verbose=args.verbose)

0 commit comments

Comments
 (0)