-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_pipeline.py
More file actions
271 lines (218 loc) · 9.89 KB
/
test_pipeline.py
File metadata and controls
271 lines (218 loc) · 9.89 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
#!/usr/bin/env python3
"""
Comprehensive pipeline test for CSV to LaTeX converter.
This script tests the complete pipeline including:
1. CSV loading and data processing
2. Generalized configuration features
3. Row filtering and sorting
4. Value replacements
5. Pattern formatting
6. Column formatting
7. LaTeX generation with underlines
8. Backward compatibility
Usage:
python test_pipeline.py
Files used:
- test_data.csv: Sample data with year, month, cluster, generative, scaled columns
- test_config.yaml: Comprehensive configuration demonstrating all features
"""
import pandas as pd
import sys
import os
# Add src to path for imports
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
from csv2latex.config import ConfigManager
from csv2latex.utils import DataProcessor
from csv2latex.latex import LatexFormatter
def test_complete_pipeline():
"""Test the complete CSV to LaTeX conversion pipeline"""
print("=" * 80)
print("COMPREHENSIVE CSV TO LATEX PIPELINE TEST")
print("=" * 80)
# Step 1: Load data and configuration
print("\\n1. LOADING DATA AND CONFIGURATION")
print("-" * 40)
try:
df = pd.read_csv("test_data.csv")
config = ConfigManager()
config.load_config_file("test_config.yaml")
print(f"✅ Data loaded: {df.shape[0]} rows, {df.shape[1]} columns")
print(f"✅ Configuration loaded: {config.config_path}")
print(f" Columns: {list(df.columns)}")
print(f" Data types: {dict(df.dtypes)}")
except Exception as e:
print(f"❌ Error loading data/config: {e}")
return False
# Step 2: Test generalized row filtering
print("\\n2. TESTING GENERALIZED ROW FILTERING")
print("-" * 40)
original_shape = df.shape
filtered_df = DataProcessor.filter_excluded_values(df, config)
excluded_clusters = set(df['cluster'].unique()) - set(filtered_df['cluster'].unique())
print(f"✅ Original data: {original_shape[0]} rows")
print(f"✅ After filtering: {filtered_df.shape[0]} rows")
print(f"✅ Excluded clusters: {sorted(excluded_clusters)}")
if excluded_clusters != {4, 5}:
print(f"❌ Expected to exclude clusters 4,5 but excluded {excluded_clusters}")
return False
# Step 3: Test generalized row sorting
print("\\n3. TESTING GENERALIZED ROW SORTING")
print("-" * 40)
sorted_df = DataProcessor.sort_by_custom_order(filtered_df, config)
print("✅ Data sorted by custom order")
print(" First 10 rows (year, month, cluster):")
for i, (_, row) in enumerate(sorted_df[['year', 'month', 'cluster']].head(10).iterrows()):
print(f" {i+1:2d}. {row['year']}-{row['month']:2d}-{row['cluster']}")
# Step 4: Test value replacements
print("\\n4. TESTING VALUE REPLACEMENTS")
print("-" * 40)
test_replacements = [
("year", 2022, "Year 1"),
("year", 2023, "Year 2"),
("month", 0, "Jan"),
("month", 6, "Jul"),
("cluster", 0, "0"), # No replacement configured
]
replacement_success = True
for col, value, expected in test_replacements:
result = config.get_value_replacement(col, value)
status = "✅" if result == expected else "❌"
print(f" {status} {col}[{value}] -> '{result}' (expected: '{expected}')")
if result != expected:
replacement_success = False
if not replacement_success:
print("❌ Some value replacements failed")
return False
# Step 5: Test pattern formatting
print("\\n5. TESTING PATTERN FORMATTING")
print("-" * 40)
generative_patterns = config.get_column_patterns("generative")
scaled_patterns = config.get_column_patterns("scaled")
print(f"✅ Generative patterns: {generative_patterns}")
print(f"✅ Scaled patterns: {scaled_patterns}")
# Step 6: Test column formatting
print("\\n6. TESTING COLUMN FORMATTING")
print("-" * 40)
format_tests = [
("year", 2022.0, "d"),
("month", 0.0, "d"),
("generative", 0.0092, ".4f"),
("scaled", 0.0180, ".4f"),
]
for col, value, expected_format in format_tests:
actual_format = config.get_column_format(col)
status = "✅" if actual_format == expected_format else "❌"
print(f" {status} {col} format: '{actual_format}' (expected: '{expected_format}')")
# Step 7: Test LaTeX generation
print("\\n7. TESTING LATEX GENERATION")
print("-" * 40)
latex_formatter = LatexFormatter(config)
selected_columns = {col: config.get_pretty_column_name(col) for col in sorted_df.columns}
# Generate LaTeX for first 5 rows
latex_output = latex_formatter.generate_latex_table(
sorted_df.head(5), selected_columns, decimal_places=4
)
print("✅ LaTeX table generated successfully")
print(f" Table style: {config.table_style}")
print(" Sample output (first data row):")
# Extract and display first data row
latex_lines = latex_output.split('\\n')
for line in latex_lines:
if '&' in line and 'textbf' not in line and 'hline' not in line and 'rule' not in line:
print(f" {line}")
break
# Step 8: Test table styles
print("\\n8. TESTING TABLE STYLES")
print("-" * 40)
# Test both table styles
for style in ['hline', 'booktabs']:
print(f" Testing {style} style:")
# Temporarily change config
original_style = config._config.get('table_style', 'hline')
config._config['table_style'] = style
# Generate LaTeX with this style
style_formatter = LatexFormatter(config)
style_output = style_formatter.generate_latex_table(
sorted_df.head(2), selected_columns, decimal_places=4
)
if style == 'hline':
has_hline = '\\hline' in style_output
has_booktabs = '\\toprule' in style_output or '\\midrule' in style_output or '\\bottomrule' in style_output
print(f" ✅ Uses \\hline: {has_hline}")
print(f" ✅ No booktabs rules: {not has_booktabs}")
else:
has_toprule = '\\toprule' in style_output
has_midrule = '\\midrule' in style_output
has_bottomrule = '\\bottomrule' in style_output
has_hline = '\\hline' in style_output
print(f" ✅ Uses \\toprule: {has_toprule}")
print(f" ✅ Uses \\midrule: {has_midrule}")
print(f" ✅ Uses \\bottomrule: {has_bottomrule}")
print(f" ✅ No \\hline: {not has_hline}")
# Restore original style
config._config['table_style'] = original_style
# Step 9: Test specific features in LaTeX output
print("\\n9. TESTING LATEX OUTPUT FEATURES")
print("-" * 40)
features_found = {
"Year replacements": any(year_name in latex_output for year_name in ["Year 1", "Year 2", "Year 3"]),
"Month replacements": any(month_name in latex_output for month_name in ["Jan", "Feb", "Jul"]),
"Underlined values": "\\underline{" in latex_output,
"Pattern symbols": any(symbol in latex_output for symbol in ["\\dagger", "\\ddagger", "\\ast"]),
"Extra columns": "\\checkmark" in latex_output,
"Proper formatting": "$" in latex_output,
"Booktabs style": "\\toprule" in latex_output and "\\midrule" in latex_output and "\\bottomrule" in latex_output,
}
all_features_work = True
for feature, found in features_found.items():
status = "✅" if found else "❌"
print(f" {status} {feature}: {'Found' if found else 'Not found'}")
if not found and feature != "Pattern symbols": # Pattern symbols depend on data
all_features_work = False
# Step 10: Test backward compatibility
print("\\n10. TESTING BACKWARD COMPATIBILITY")
print("-" * 40)
# Test with legacy model configuration
legacy_config = ConfigManager()
legacy_config._config = {
'model_order': {'test-model-1': 1, 'test-model-2': 2},
'latex_model_names': {'test-model-1': 'Test Model 1'},
'ignored_models': ['debug-model']
}
legacy_tests = [
("get_sort_order", lambda: legacy_config.get_sort_order('model', 'test-model-1') == 1),
("get_value_replacement", lambda: legacy_config.get_value_replacement('model', 'test-model-1') == 'Test Model 1'),
("should_exclude_value", lambda: legacy_config.should_exclude_value('model', 'debug-model') == True),
]
for test_name, test_func in legacy_tests:
try:
result = test_func()
status = "✅" if result else "❌"
print(f" {status} Legacy {test_name}: {'Passed' if result else 'Failed'}")
except Exception as e:
print(f" ❌ Legacy {test_name}: Error - {e}")
all_features_work = False
# Final summary
print("\\n" + "=" * 80)
print("PIPELINE TEST SUMMARY")
print("=" * 80)
if all_features_work and replacement_success:
print("🎉 ALL TESTS PASSED! The complete pipeline is working correctly.")
print("\\n✅ Key features verified:")
print(" • Generalized row filtering and sorting")
print(" • Value replacements for all columns")
print(" • Pattern formatting and custom column formats")
print(" • LaTeX generation with underlines and symbols")
print(" • Configurable table styles (hline vs booktabs)")
print(" • Backward compatibility with legacy configurations")
print(" • Extra columns and comprehensive formatting")
return True
else:
print("❌ SOME TESTS FAILED! Please review the errors above.")
return False
def main():
"""Main entry point"""
success = test_complete_pipeline()
sys.exit(0 if success else 1)
if __name__ == "__main__":
main()