|
| 1 | +#!/usr/bin/env python3 |
| 2 | +""" |
| 3 | +Validation script for organization YAML files in GitHub Actions. |
| 4 | +Validates organization files for schema compliance and slug conflicts. |
| 5 | +""" |
| 6 | + |
| 7 | +import sys |
| 8 | +import re |
| 9 | +from pathlib import Path |
| 10 | +from typing import Dict, List, Tuple |
| 11 | + |
| 12 | +import click |
| 13 | +import yaml |
| 14 | +import requests |
| 15 | +from krs_puller import KRSDataPuller, KRSMaintenanceError |
| 16 | + |
| 17 | + |
| 18 | +class OrganizationValidator: |
| 19 | + """Validates organization YAML files for GitHub Actions.""" |
| 20 | + |
| 21 | + def __init__(self, organizations_dir: str, slug_field: str): |
| 22 | + self.organizations_dir = Path(organizations_dir) |
| 23 | + self.slug_field = slug_field |
| 24 | + self.reserved_slugs = { |
| 25 | + "info", |
| 26 | + "organizacje", |
| 27 | + "404" |
| 28 | + } |
| 29 | + |
| 30 | + def load_all_organizations(self) -> Tuple[Dict[str, str], List[str]]: |
| 31 | + """Load all organization files and return slug to filename mapping with errors.""" |
| 32 | + slug_to_file = {} |
| 33 | + errors = [] |
| 34 | + |
| 35 | + if not self.organizations_dir.exists(): |
| 36 | + return slug_to_file, errors |
| 37 | + |
| 38 | + yaml_files = list(self.organizations_dir.glob("*.yaml")) + list(self.organizations_dir.glob("*.yml")) |
| 39 | + for yaml_file in yaml_files: |
| 40 | + try: |
| 41 | + with open(yaml_file, 'r', encoding='utf-8') as f: |
| 42 | + data = yaml.safe_load(f) |
| 43 | + if data and self.slug_field in data: |
| 44 | + slug = data[self.slug_field] |
| 45 | + if slug in slug_to_file: |
| 46 | + errors.append(f"Znaleziono duplikaty pola {self.slug_field} o wartości '{slug}' w plikach: {yaml_file.name} i {slug_to_file[slug]}") |
| 47 | + else: |
| 48 | + slug_to_file[slug] = yaml_file.name |
| 49 | + except Exception as e: |
| 50 | + errors.append(f"Błąd wczytywania pliku {yaml_file.name}: {e}") |
| 51 | + |
| 52 | + return slug_to_file, errors |
| 53 | + |
| 54 | + def _validate_krs_data(self, krs: str, data: dict, errors: List[str]) -> None: |
| 55 | + """Validate KRS data against external API and check name match.""" |
| 56 | + try: |
| 57 | + krs_data = KRSDataPuller(krs) |
| 58 | + |
| 59 | + if not krs_data.name: |
| 60 | + errors.append(f"KRS {krs} istnieje, ale dane organizacji są niekompletne") |
| 61 | + return |
| 62 | + |
| 63 | + # Check name match |
| 64 | + yaml_name = data.get('nazwa', '').strip() |
| 65 | + krs_name = krs_data.name.strip() |
| 66 | + |
| 67 | + if yaml_name.lower() != krs_name.lower(): |
| 68 | + errors.append(f"Niezgodność nazwy organizacji: w YAML jest '{yaml_name}', ale w KRS jest '{krs_name}'") |
| 69 | + |
| 70 | + except KRSMaintenanceError as e: |
| 71 | + print(f" ⚠️ {e}") |
| 72 | + |
| 73 | + except requests.HTTPError: |
| 74 | + errors.append(f"KRS {krs} nie zostało znalezione w rejestrze lub wystąpił błąd sieci") |
| 75 | + |
| 76 | + def validate_yaml_structure(self, file_path: Path) -> Tuple[bool, List[str]]: |
| 77 | + """Validate YAML file structure and required fields.""" |
| 78 | + errors = [] |
| 79 | + |
| 80 | + try: |
| 81 | + with open(file_path, 'r', encoding='utf-8') as f: |
| 82 | + data = yaml.safe_load(f) |
| 83 | + except yaml.YAMLError as e: |
| 84 | + errors.append(f"Nieprawidłowa składnia YAML: {e}") |
| 85 | + return False, errors |
| 86 | + except Exception as e: |
| 87 | + errors.append(f"Błąd odczytu pliku: {e}") |
| 88 | + return False, errors |
| 89 | + |
| 90 | + if not data: |
| 91 | + errors.append("Pusty plik YAML") |
| 92 | + return False, errors |
| 93 | + |
| 94 | + # Required fields |
| 95 | + required_fields = [ |
| 96 | + 'nazwa', # organization name |
| 97 | + self.slug_field, # URL slug (adres) |
| 98 | + 'strona', # website |
| 99 | + 'krs', # KRS number |
| 100 | + 'dostawa', # delivery info |
| 101 | + 'produkty' # products list |
| 102 | + ] |
| 103 | + |
| 104 | + for field in required_fields: |
| 105 | + if field not in data: |
| 106 | + errors.append(f"Brakuje wymaganego pola: {field}") |
| 107 | + |
| 108 | + # Validate specific field formats |
| 109 | + if 'krs' in data: |
| 110 | + krs = str(data['krs']) |
| 111 | + if not re.fullmatch(r"\d{10}", krs): |
| 112 | + errors.append(f"Nieprawidłowy format KRS: {krs} (oczekiwano 10 cyfr)") |
| 113 | + else: |
| 114 | + # Validate KRS against external API, including name match |
| 115 | + self._validate_krs_data(krs, data, errors) |
| 116 | + |
| 117 | + if self.slug_field in data: |
| 118 | + slug = data[self.slug_field] |
| 119 | + if not isinstance(slug, str) or not slug.strip(): |
| 120 | + errors.append(f"Nieprawidłowy {self.slug_field}: musi być niepustym ciągiem znaków") |
| 121 | + elif not re.fullmatch(r"[a-z0-9-]+", slug): |
| 122 | + errors.append(f"Nieprawidłowy format {self.slug_field}: {slug} (dozwolone tylko małe litery, cyfry i myślniki)") |
| 123 | + |
| 124 | + # Validate dostawa structure |
| 125 | + if 'dostawa' in data and data['dostawa']: |
| 126 | + delivery = data['dostawa'] |
| 127 | + required_delivery_fields = ['ulica', 'kod', 'miasto', 'telefon'] |
| 128 | + for field in required_delivery_fields: |
| 129 | + if field not in delivery: |
| 130 | + errors.append(f"Brakuje wymaganego pola dostawy: dostawa.{field}") |
| 131 | + |
| 132 | + # Validate postal code format |
| 133 | + if 'kod' in delivery: |
| 134 | + postal_code = str(delivery['kod']) |
| 135 | + if not re.fullmatch(r"\d{2}-\d{3}", postal_code): |
| 136 | + errors.append(f"Nieprawidłowy format kodu pocztowego: {postal_code} (oczekiwany format: 00-000)") |
| 137 | + |
| 138 | + # Validate phone number |
| 139 | + if 'telefon' in delivery: |
| 140 | + phone = re.sub(r"[\s-]", "", str(delivery['telefon'])) |
| 141 | + if not re.fullmatch(r"(\+?48|0048)?\d{9}", phone): |
| 142 | + errors.append(f"Nieprawidłowy format numeru telefonu: {delivery['telefon']}") |
| 143 | + |
| 144 | + # Validate produkty structure |
| 145 | + if 'produkty' in data and data['produkty']: |
| 146 | + if not isinstance(data['produkty'], list): |
| 147 | + errors.append("produkty musi być listą") |
| 148 | + else: |
| 149 | + for i, product in enumerate(data['produkty']): |
| 150 | + if not isinstance(product, dict): |
| 151 | + errors.append(f"produkty[{i}] musi być obiektem") |
| 152 | + continue |
| 153 | + |
| 154 | + if 'nazwa' not in product: |
| 155 | + errors.append(f"produkty[{i}] brakuje wymaganego pola: nazwa") |
| 156 | + if 'link' not in product: |
| 157 | + errors.append(f"produkty[{i}] brakuje wymaganego pola: link") |
| 158 | + |
| 159 | + return len(errors) == 0, errors |
| 160 | + |
| 161 | + def validate_slug_conflicts(self, files_to_check: List[str], all_organizations: Dict[str, str]) -> Tuple[bool, List[str]]: |
| 162 | + """Check for slug conflicts with reserved slugs.""" |
| 163 | + errors = [] |
| 164 | + |
| 165 | + # Check reserved slug conflicts for files being checked |
| 166 | + for slug, filename in all_organizations.items(): |
| 167 | + if filename in files_to_check and slug in self.reserved_slugs: |
| 168 | + errors.append(f"Zarezerwowany {self.slug_field} '{slug}' używany w pliku {filename}") |
| 169 | + |
| 170 | + return len(errors) == 0, errors |
| 171 | + |
| 172 | + def validate_files(self, files_to_check: List[str]) -> bool: |
| 173 | + """Validate a list of organization files.""" |
| 174 | + |
| 175 | + print("=================================================") |
| 176 | + print("🚀 Rozpoczynam walidację organizacji...") |
| 177 | + |
| 178 | + # Load all organizations and check for duplicate slugs |
| 179 | + all_organizations, load_errors = self.load_all_organizations() |
| 180 | + |
| 181 | + if load_errors: |
| 182 | + print("❌ Krytyczne błędy wczytywania organizacji:") |
| 183 | + for error in load_errors: |
| 184 | + print(f" - {error}") |
| 185 | + print("💥 Walidacja nie powiodła się!") |
| 186 | + return False |
| 187 | + |
| 188 | + print(f"Walidacja {len(files_to_check)} pliku/ów organizacji...") |
| 189 | + print(f"Katalog organizacji: {self.organizations_dir}") |
| 190 | + print(f"Pole {self.slug_field}: {self.slug_field}") |
| 191 | + print(f"Zarezerwowane {self.slug_field}: {', '.join(sorted(self.reserved_slugs))}") |
| 192 | + print() |
| 193 | + |
| 194 | + # Validate individual file structures |
| 195 | + for file_path in files_to_check: |
| 196 | + print(f"Walidacja {file_path}...") |
| 197 | + full_path = self.organizations_dir / file_path |
| 198 | + |
| 199 | + if not full_path.exists(): |
| 200 | + print(f" ❌ Plik nie znaleziony: {file_path}") |
| 201 | + all_valid = False |
| 202 | + continue |
| 203 | + |
| 204 | + is_valid, errors = self.validate_yaml_structure(full_path) |
| 205 | + |
| 206 | + if is_valid: |
| 207 | + print(f" ✅ Walidacja struktury zakończona pomyślnie") |
| 208 | + else: |
| 209 | + print(f" ❌ Walidacja struktury nie powiodła się:") |
| 210 | + for error in errors: |
| 211 | + print(f" - {error}") |
| 212 | + all_valid = False |
| 213 | + |
| 214 | + print() |
| 215 | + |
| 216 | + # Check slug conflicts |
| 217 | + print("Sprawdzanie konfliktów adresów...") |
| 218 | + is_valid, errors = self.validate_slug_conflicts(files_to_check, all_organizations) |
| 219 | + |
| 220 | + if is_valid: |
| 221 | + print(" ✅ Nie znaleziono konfliktów adresów") |
| 222 | + else: |
| 223 | + print(" ❌ Znaleziono konflikty adresów:") |
| 224 | + for error in errors: |
| 225 | + print(f" - {error}") |
| 226 | + all_valid = False |
| 227 | + |
| 228 | + print() |
| 229 | + |
| 230 | + if all_valid: |
| 231 | + print("🎉 Wszystkie walidacje zakończone pomyślnie!") |
| 232 | + else: |
| 233 | + print("💥 Walidacja nie powiodła się!") |
| 234 | + |
| 235 | + print("=================================================") |
| 236 | + |
| 237 | + return all_valid |
| 238 | + |
| 239 | + |
| 240 | +@click.command() |
| 241 | +@click.option('--files', required=True, help='Space-separated list of organization YAML files to validate') |
| 242 | +@click.option('--organizations-dir', default='organizations', help='Directory containing organization YAML files') |
| 243 | +@click.option('--slug-field', default='adres', help='YAML field name for organization slug') |
| 244 | +def main(files: str, organizations_dir: str, slug_field: str): |
| 245 | + """Validate organization YAML files.""" |
| 246 | + |
| 247 | + # Parse files list |
| 248 | + files_list = [f.strip() for f in files.split() if f.strip()] |
| 249 | + |
| 250 | + if not files_list: |
| 251 | + print("Brak plików do walidacji") |
| 252 | + sys.exit(0) |
| 253 | + |
| 254 | + validator = OrganizationValidator(organizations_dir, slug_field) |
| 255 | + |
| 256 | + if validator.validate_files(files_list): |
| 257 | + sys.exit(0) |
| 258 | + else: |
| 259 | + sys.exit(1) |
| 260 | + |
| 261 | + |
| 262 | +if __name__ == "__main__": |
| 263 | + main() |
0 commit comments