Skip to content

Commit 2aaeb49

Browse files
add unit tests
1 parent 6b28a5c commit 2aaeb49

File tree

2 files changed

+231
-6
lines changed

2 files changed

+231
-6
lines changed

unit_tests/sources/file_based/scenarios/csv_scenarios.py

Lines changed: 225 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2120,12 +2120,14 @@
21202120
}
21212121
)
21222122
.set_expected_check_status("FAILED")
2123-
.set_expected_check_error(None, FileBasedSourceError.CONFIG_VALIDATION_ERROR.value)
2123+
.set_expected_check_error(
2124+
None, FileBasedSourceError.ERROR_VALIDATION_STREAM_DISCOVERY_OPTIONS.value
2125+
)
21242126
.set_expected_discover_error(
2125-
ConfigValidationError, FileBasedSourceError.CONFIG_VALIDATION_ERROR.value
2127+
ConfigValidationError, FileBasedSourceError.ERROR_VALIDATION_STREAM_DISCOVERY_OPTIONS.value
21262128
)
21272129
.set_expected_read_error(
2128-
ConfigValidationError, FileBasedSourceError.CONFIG_VALIDATION_ERROR.value
2130+
ConfigValidationError, FileBasedSourceError.ERROR_VALIDATION_STREAM_DISCOVERY_OPTIONS.value
21292131
)
21302132
).build()
21312133

@@ -2223,12 +2225,229 @@
22232225
}
22242226
)
22252227
.set_expected_check_status("FAILED")
2226-
.set_expected_check_error(None, FileBasedSourceError.CONFIG_VALIDATION_ERROR.value)
2228+
.set_expected_check_error(
2229+
None, FileBasedSourceError.ERROR_VALIDATION_STREAM_DISCOVERY_OPTIONS.value
2230+
)
2231+
.set_expected_discover_error(
2232+
ConfigValidationError, FileBasedSourceError.ERROR_VALIDATION_STREAM_DISCOVERY_OPTIONS.value
2233+
)
2234+
.set_expected_read_error(
2235+
ConfigValidationError, FileBasedSourceError.ERROR_VALIDATION_STREAM_DISCOVERY_OPTIONS.value
2236+
)
2237+
).build()
2238+
2239+
recent_n_files_to_read_for_schema_discovery_with_use_first_found_file_for_schema_discovery_fails_connection_check_multi_stream_scenario: TestScenario[
2240+
InMemoryFilesSource
2241+
] = (
2242+
TestScenarioBuilder[InMemoryFilesSource]()
2243+
.set_name(
2244+
"recent_n_files_to_read_for_schema_discovery_with_use_first_found_file_for_schema_discovery_fails_connection_check_multi_stream_scenario"
2245+
)
2246+
.set_config(
2247+
{
2248+
"streams": [
2249+
{
2250+
"name": "stream1",
2251+
"format": {"filetype": "csv"},
2252+
"globs": ["a.csv"],
2253+
"validation_policy": "Skip Record",
2254+
"recent_n_files_to_read_for_schema_discovery": 5,
2255+
"use_first_found_file_for_schema_discovery": True,
2256+
},
2257+
{
2258+
"name": "stream2",
2259+
"format": {"filetype": "csv"},
2260+
"globs": ["b.csv"],
2261+
"validation_policy": "Skip Record",
2262+
},
2263+
]
2264+
}
2265+
)
2266+
.set_source_builder(
2267+
FileBasedSourceBuilder()
2268+
.set_files(
2269+
{
2270+
"a.csv": {
2271+
"contents": [
2272+
("col1", "col2"),
2273+
("val11a", "val12a"),
2274+
("val21a", "val22a"),
2275+
],
2276+
"last_modified": "2023-06-05T03:54:07.000Z",
2277+
},
2278+
"b.csv": {
2279+
"contents": [
2280+
("col3",),
2281+
("val13b",),
2282+
("val23b",),
2283+
],
2284+
"last_modified": "2023-06-05T03:54:07.000Z",
2285+
},
2286+
}
2287+
)
2288+
.set_file_type("csv")
2289+
)
2290+
.set_catalog(
2291+
CatalogBuilder()
2292+
.with_stream("stream1", SyncMode.full_refresh)
2293+
.with_stream("stream2", SyncMode.full_refresh)
2294+
.build()
2295+
)
2296+
.set_expected_catalog(
2297+
{
2298+
"streams": [
2299+
{
2300+
"json_schema": {
2301+
"type": "object",
2302+
"properties": {
2303+
"data": {"type": "object"},
2304+
"_ab_source_file_last_modified": {"type": "string"},
2305+
"_ab_source_file_url": {"type": "string"},
2306+
},
2307+
},
2308+
"name": "stream1",
2309+
"supported_sync_modes": ["full_refresh", "incremental"],
2310+
"is_resumable": True,
2311+
"is_file_based": False,
2312+
"source_defined_cursor": True,
2313+
"default_cursor_field": ["_ab_source_file_last_modified"],
2314+
},
2315+
{
2316+
"json_schema": {
2317+
"type": "object",
2318+
"properties": {
2319+
"col3": {"type": ["null", "string"]},
2320+
"_ab_source_file_last_modified": {"type": "string"},
2321+
"_ab_source_file_url": {"type": "string"},
2322+
},
2323+
},
2324+
"name": "stream2",
2325+
"source_defined_cursor": True,
2326+
"default_cursor_field": ["_ab_source_file_last_modified"],
2327+
"supported_sync_modes": ["full_refresh", "incremental"],
2328+
"is_resumable": True,
2329+
"is_file_based": False,
2330+
},
2331+
]
2332+
}
2333+
)
2334+
.set_expected_check_status("FAILED")
2335+
.set_expected_check_error(
2336+
None, FileBasedSourceError.ERROR_VALIDATION_STREAM_DISCOVERY_OPTIONS.value
2337+
)
2338+
.set_expected_discover_error(
2339+
ConfigValidationError, FileBasedSourceError.ERROR_VALIDATION_STREAM_DISCOVERY_OPTIONS.value
2340+
)
2341+
.set_expected_read_error(
2342+
ConfigValidationError, FileBasedSourceError.ERROR_VALIDATION_STREAM_DISCOVERY_OPTIONS.value
2343+
)
2344+
).build()
2345+
2346+
2347+
schemaless_with_use_first_found_file_for_schema_discovery_fails_connection_check_multi_stream_scenario: TestScenario[
2348+
InMemoryFilesSource
2349+
] = (
2350+
TestScenarioBuilder[InMemoryFilesSource]()
2351+
.set_name(
2352+
"schemaless_with_use_first_found_file_for_schema_discovery_fails_connection_check_multi_stream_scenario"
2353+
)
2354+
.set_config(
2355+
{
2356+
"streams": [
2357+
{
2358+
"name": "stream1",
2359+
"format": {"filetype": "csv"},
2360+
"globs": ["a.csv"],
2361+
"validation_policy": "Skip Record",
2362+
"schemaless": True,
2363+
"use_first_found_file_for_schema_discovery": True,
2364+
},
2365+
{
2366+
"name": "stream2",
2367+
"format": {"filetype": "csv"},
2368+
"globs": ["b.csv"],
2369+
"validation_policy": "Skip Record",
2370+
},
2371+
]
2372+
}
2373+
)
2374+
.set_source_builder(
2375+
FileBasedSourceBuilder()
2376+
.set_files(
2377+
{
2378+
"a.csv": {
2379+
"contents": [
2380+
("col1", "col2"),
2381+
("val11a", "val12a"),
2382+
("val21a", "val22a"),
2383+
],
2384+
"last_modified": "2023-06-05T03:54:07.000Z",
2385+
},
2386+
"b.csv": {
2387+
"contents": [
2388+
("col3",),
2389+
("val13b",),
2390+
("val23b",),
2391+
],
2392+
"last_modified": "2023-06-05T03:54:07.000Z",
2393+
},
2394+
}
2395+
)
2396+
.set_file_type("csv")
2397+
)
2398+
.set_catalog(
2399+
CatalogBuilder()
2400+
.with_stream("stream1", SyncMode.full_refresh)
2401+
.with_stream("stream2", SyncMode.full_refresh)
2402+
.build()
2403+
)
2404+
.set_expected_catalog(
2405+
{
2406+
"streams": [
2407+
{
2408+
"json_schema": {
2409+
"type": "object",
2410+
"properties": {
2411+
"data": {"type": "object"},
2412+
"_ab_source_file_last_modified": {"type": "string"},
2413+
"_ab_source_file_url": {"type": "string"},
2414+
},
2415+
},
2416+
"name": "stream1",
2417+
"supported_sync_modes": ["full_refresh", "incremental"],
2418+
"is_resumable": True,
2419+
"is_file_based": False,
2420+
"source_defined_cursor": True,
2421+
"default_cursor_field": ["_ab_source_file_last_modified"],
2422+
},
2423+
{
2424+
"json_schema": {
2425+
"type": "object",
2426+
"properties": {
2427+
"col3": {"type": ["null", "string"]},
2428+
"_ab_source_file_last_modified": {"type": "string"},
2429+
"_ab_source_file_url": {"type": "string"},
2430+
},
2431+
},
2432+
"name": "stream2",
2433+
"source_defined_cursor": True,
2434+
"default_cursor_field": ["_ab_source_file_last_modified"],
2435+
"supported_sync_modes": ["full_refresh", "incremental"],
2436+
"is_resumable": True,
2437+
"is_file_based": False,
2438+
},
2439+
]
2440+
}
2441+
)
2442+
.set_expected_check_status("FAILED")
2443+
.set_expected_check_error(
2444+
None, FileBasedSourceError.ERROR_VALIDATION_STREAM_DISCOVERY_OPTIONS.value
2445+
)
22272446
.set_expected_discover_error(
2228-
ConfigValidationError, FileBasedSourceError.CONFIG_VALIDATION_ERROR.value
2447+
ConfigValidationError, FileBasedSourceError.ERROR_VALIDATION_STREAM_DISCOVERY_OPTIONS.value
22292448
)
22302449
.set_expected_read_error(
2231-
ConfigValidationError, FileBasedSourceError.CONFIG_VALIDATION_ERROR.value
2450+
ConfigValidationError, FileBasedSourceError.ERROR_VALIDATION_STREAM_DISCOVERY_OPTIONS.value
22322451
)
22332452
).build()
22342453

unit_tests/sources/file_based/test_file_based_scenarios.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,8 @@
8484
schemaless_csv_multi_stream_scenario,
8585
schemaless_csv_scenario,
8686
schemaless_with_user_input_schema_fails_connection_check_multi_stream_scenario,
87+
recent_n_files_to_read_for_schema_discovery_with_use_first_found_file_for_schema_discovery_fails_connection_check_multi_stream_scenario,
88+
schemaless_with_use_first_found_file_for_schema_discovery_fails_connection_check_multi_stream_scenario,
8789
schemaless_with_user_input_schema_fails_connection_check_scenario,
8890
single_csv_scenario,
8991
)
@@ -207,6 +209,8 @@
207209
schemaless_csv_scenario,
208210
schemaless_csv_multi_stream_scenario,
209211
schemaless_with_user_input_schema_fails_connection_check_multi_stream_scenario,
212+
recent_n_files_to_read_for_schema_discovery_with_use_first_found_file_for_schema_discovery_fails_connection_check_multi_stream_scenario,
213+
schemaless_with_use_first_found_file_for_schema_discovery_fails_connection_check_multi_stream_scenario,
210214
schemaless_with_user_input_schema_fails_connection_check_scenario,
211215
single_stream_user_input_schema_scenario_schema_is_invalid,
212216
single_stream_user_input_schema_scenario_emit_nonconforming_records,
@@ -312,6 +316,8 @@
312316
success_multi_stream_scenario,
313317
success_user_provided_schema_scenario,
314318
schemaless_with_user_input_schema_fails_connection_check_multi_stream_scenario,
319+
recent_n_files_to_read_for_schema_discovery_with_use_first_found_file_for_schema_discovery_fails_connection_check_multi_stream_scenario,
320+
schemaless_with_use_first_found_file_for_schema_discovery_fails_connection_check_multi_stream_scenario,
315321
schemaless_with_user_input_schema_fails_connection_check_scenario,
316322
valid_single_stream_user_input_schema_scenario,
317323
single_avro_scenario,

0 commit comments

Comments
 (0)