diff --git a/.github/workflows/full-test.yml b/.github/workflows/full-test.yml index 8629b28..50fa2f4 100644 --- a/.github/workflows/full-test.yml +++ b/.github/workflows/full-test.yml @@ -4,6 +4,7 @@ on: pull_request: branches: - main + - dev paths: - "Dockerfile" - "src/**" diff --git a/.last_release b/.last_release index 0a1ffad..3eefcb9 100644 --- a/.last_release +++ b/.last_release @@ -1 +1 @@ -0.7.4 +1.0.0 diff --git a/Dockerfile b/Dockerfile index e6e6bfe..73b1053 100644 --- a/Dockerfile +++ b/Dockerfile @@ -23,7 +23,7 @@ WORKDIR /photon RUN mkdir -p /photon/data/ -ADD https://github.com/komoot/photon/releases/download/${PHOTON_VERSION}/photon-opensearch-${PHOTON_VERSION}.jar /photon/photon.jar +ADD https://github.com/komoot/photon/releases/download/${PHOTON_VERSION}/photon-${PHOTON_VERSION}.jar /photon/photon.jar COPY src/ ./src/ COPY entrypoint.sh . diff --git a/README.md b/README.md index ca54344..86790e6 100644 --- a/README.md +++ b/README.md @@ -69,11 +69,14 @@ The container can be configured using the following environment variables: | `INITIAL_DOWNLOAD` | `TRUE`, `FALSE` | `TRUE` | Controls whether the container performs the initial index download when the Photon data directory is empty. Useful for manual imports. | | `BASE_URL` | Valid URL | `https://r2.koalasec.org/public` | Custom base URL for index data downloads. Should point to the parent directory of index files. The default has been changed to a community mirror to reduce load on the GraphHopper servers. | | `SKIP_MD5_CHECK` | `TRUE`, `FALSE` | `FALSE` | Optionally skip MD5 verification of downloaded index files. | -| `FILE_URL` | URL to a .tar.bz2 file | - | Set a custom URL for the index file to be downloaded (e.g., "https://download1.graphhopper.com/public/experimental/photon-db-latest.tar.bz2"). This must be a tar.bz2 format. Make sure to set the `UPDATE_STRATEGY` to `DISABLED` when using this option. | +| `SKIP_SPACE_CHECK` | `TRUE`, `FALSE` | `FALSE` | Skip disk space verification before downloading. | +| `FILE_URL` | URL to a .tar.bz2 file | - | Set a custom URL for the index file to be downloaded (e.g., "https://download1.graphhopper.com/public/experimental/photon-db-latest.tar.bz2"). This must be a tar.bz2 format. Setting this overrides `UPDATE_STRATEGY` to `DISABLED`, and `SKIP_MD5_CHECK` to true if `MD5_URL` is not set. | +| `MD5_URL` | URL to the MD5 file to use | - | Set a custom URL for the md5 file to be downloaded (e.g., "https://download1.graphhopper.com/public/experimental/photon-db-latest.tar.bz2.md5"). | | `PHOTON_PARAMS` | Photon executable parameters | - | See `https://github.com/komoot/photon#running-photon.` | | `APPRISE_URLS` | Comma-separated Apprise URLs | - | Optional notification URLs for [Apprise](https://github.com/caronc/apprise) to send status updates (e.g., download completion, errors). Supports multiple services like Pushover, Slack, email, etc. Example: `pover://user@token,mailto://user:pass@gmail.com` | | `PUID` | User ID | 9011 | The User ID for the photon process. Set this to your host user's ID (`id -u`) to prevent permission errors when using bind mounts. | | `PGID` | Group ID | 9011 | The Group ID for the photon process. Set this to your host group's ID (`id -g`) to prevent permission errors when using bind mounts. | +| `ENABLE_METRICS` | `TRUE`, `FALSE` | `FALSE` | Enables Prometheus Metrics endpoint at /metrics | ## Available Regions diff --git a/renovate.json b/renovate.json index 7190a60..c7ed646 100644 --- a/renovate.json +++ b/renovate.json @@ -1,3 +1,6 @@ { - "$schema": "https://docs.renovatebot.com/renovate-schema.json" + "$schema": "https://docs.renovatebot.com/renovate-schema.json", + "baseBranchPatterns": [ + "dev" + ] } diff --git a/src/check_remote.py b/src/check_remote.py index fbcd336..8440001 100644 --- a/src/check_remote.py +++ b/src/check_remote.py @@ -7,11 +7,15 @@ from src.utils import config from src.utils.logger import get_logger -from src.utils.regions import get_region_info, normalize_region +from src.utils.regions import get_index_url_path logging = get_logger() +class RemoteFileSizeError(Exception): + pass + + def get_remote_file_size(url: str) -> int: try: response = requests.head(url, allow_redirects=True, timeout=15) @@ -30,10 +34,12 @@ def get_remote_file_size(url: str) -> int: if total_size.isdigit(): return int(total_size) - except Exception as e: - logging.warning(f"Could not determine remote file size for {url}: {e}") + raise RemoteFileSizeError(f"Server did not return file size for {url}") - return 0 + except RemoteFileSizeError: + raise + except Exception as e: + raise RemoteFileSizeError(f"Could not determine remote file size for {url}: {e}") from e def get_remote_time(remote_url: str): @@ -59,29 +65,13 @@ def get_local_time(local_path: str): def compare_mtime() -> bool: - if config.REGION: - normalized = normalize_region(config.REGION) - region_info = get_region_info(config.REGION) - if not region_info: - logging.error(f"Unknown region: {config.REGION}") - return False - - region_type = region_info["type"] - - if region_type == "planet": - index_file = "/photon-db-planet-0.7OS-latest.tar.bz2" - elif region_type == "continent": - index_file = f"/{normalized}/photon-db-{normalized}-0.7OS-latest.tar.bz2" - elif region_type == "sub-region": - continent = region_info["continent"] - index_file = f"/{continent}/{normalized}/photon-db-{normalized}-0.7OS-latest.tar.bz2" - else: - logging.error(f"Invalid region type: {region_type}") - return False - else: - index_file = "/photon-db-planet-0.7OS-latest.tar.bz2" + try: + index_path = get_index_url_path(config.REGION, config.INDEX_DB_VERSION, config.INDEX_FILE_EXTENSION) + except ValueError as e: + logging.error(str(e)) + return False - remote_url = config.BASE_URL + index_file + remote_url = config.BASE_URL + index_path remote_dt = get_remote_time(remote_url) @@ -105,3 +95,31 @@ def compare_mtime() -> bool: logging.debug("Using directory timestamp - applying 144-hour grace period") grace_period = datetime.timedelta(hours=144) return remote_dt > (local_dt + grace_period) + + +def check_index_age() -> bool: + if not config.MIN_INDEX_DATE: + return True + + try: + min_date = datetime.datetime.strptime(config.MIN_INDEX_DATE, "%d.%m.%y").replace(tzinfo=datetime.UTC) + except ValueError: + logging.warning(f"Invalid MIN_INDEX_DATE format: {config.MIN_INDEX_DATE}. Expected DD.MM.YY") + return True + + local_timestamp = get_local_time(config.OS_NODE_DIR) + if local_timestamp == 0.0: + logging.info("No local index found, update required") + return True + + local_dt = datetime.datetime.fromtimestamp(local_timestamp, tz=datetime.UTC) + + logging.debug(f"Local index date: {local_dt.date()}") + logging.debug(f"Minimum required date: {min_date.date()}") + + if local_dt < min_date: + logging.info(f"Local index ({local_dt.date()}) is older than minimum required ({min_date.date()})") + return True + + logging.info(f"Local index ({local_dt.date()}) meets minimum date requirement ({min_date.date()})") + return False diff --git a/src/downloader.py b/src/downloader.py index bc1485e..24cc01d 100644 --- a/src/downloader.py +++ b/src/downloader.py @@ -8,11 +8,12 @@ from requests.exceptions import RequestException from tqdm import tqdm -from src.check_remote import get_local_time, get_remote_file_size +from src.check_remote import RemoteFileSizeError, get_local_time, get_remote_file_size from src.filesystem import clear_temp_dir, extract_index, move_index, verify_checksum from src.utils import config from src.utils.logger import get_logger -from src.utils.regions import get_region_info, normalize_region +from src.utils.regions import get_index_url_path +from src.utils.sanitize import sanitize_url class InsufficientSpaceError(Exception): @@ -152,29 +153,13 @@ def supports_range_requests(url: str) -> bool: def get_download_url() -> str: if config.FILE_URL: + logging.info("Using custom FILE_URL for download: %s", sanitize_url(config.FILE_URL)) return config.FILE_URL - if config.REGION: - normalized = normalize_region(config.REGION) - region_info = get_region_info(config.REGION) - if not region_info: - raise ValueError(f"Unknown region: {config.REGION}") - - region_type = region_info["type"] - - if region_type == "planet": - index_url = "/photon-db-planet-0.7OS-latest.tar.bz2" - elif region_type == "continent": - index_url = f"/{normalized}/photon-db-{normalized}-0.7OS-latest.tar.bz2" - elif region_type == "sub-region": - continent = region_info["continent"] - index_url = f"/{continent}/{normalized}/photon-db-{normalized}-0.7OS-latest.tar.bz2" - else: - raise ValueError(f"Invalid region type: {region_type}") - else: - index_url = "/photon-db-planet-0.7OS-latest.tar.bz2" - - return config.BASE_URL + index_url + index_path = get_index_url_path(config.REGION, config.INDEX_DB_VERSION, config.INDEX_FILE_EXTENSION) + download_url = config.BASE_URL + index_path + logging.info("Using constructed location for download: %s", download_url) + return download_url def parallel_update(): @@ -194,14 +179,23 @@ def parallel_update(): os.makedirs(config.TEMP_DIR, exist_ok=True) download_url = get_download_url() - file_size = get_remote_file_size(download_url) - if file_size > 0: + try: + file_size = get_remote_file_size(download_url) if not check_disk_space_requirements(file_size, is_parallel=True): logging.error("Insufficient disk space for parallel update") raise InsufficientSpaceError("Insufficient disk space for parallel update") - else: - logging.warning("Could not determine download size, proceeding without space check") + except RemoteFileSizeError as e: + if config.SKIP_SPACE_CHECK: + logging.warning(f"{e}") + logging.warning("SKIP_SPACE_CHECK is enabled, proceeding without space check") + else: + logging.error(f"{e}") + logging.error( + "Cannot proceed without verifying disk space. " + "Set SKIP_SPACE_CHECK=true to bypass this check (not recommended)." + ) + raise logging.info("Downloading index") @@ -246,14 +240,23 @@ def sequential_update(): os.makedirs(config.TEMP_DIR, exist_ok=True) download_url = get_download_url() - file_size = get_remote_file_size(download_url) - if file_size > 0: + try: + file_size = get_remote_file_size(download_url) if not check_disk_space_requirements(file_size, is_parallel=False): logging.error("Insufficient disk space for sequential update") raise InsufficientSpaceError("Insufficient disk space for sequential update") - else: - logging.warning("Could not determine download size, proceeding without space check") + except RemoteFileSizeError as e: + if config.SKIP_SPACE_CHECK: + logging.warning(f"{e}") + logging.warning("SKIP_SPACE_CHECK is enabled, proceeding without space check") + else: + logging.error(f"{e}") + logging.error( + "Cannot proceed without verifying disk space. " + "Set SKIP_SPACE_CHECK=true to bypass this check (not recommended)." + ) + raise logging.info("Downloading new index and MD5 checksum...") index_file = download_index() @@ -281,7 +284,7 @@ def sequential_update(): def download_index() -> str: - output_file = "photon-db-latest.tar.bz2" + output_file = f"photon-db-latest.{config.INDEX_FILE_EXTENSION}" download_url = get_download_url() output = os.path.join(config.TEMP_DIR, output_file) @@ -296,33 +299,20 @@ def download_index() -> str: def download_md5(): - if config.REGION: - normalized = normalize_region(config.REGION) - region_info = get_region_info(config.REGION) - if not region_info: - raise ValueError(f"Unknown region: {config.REGION}") - - region_type = region_info["type"] - - if region_type == "planet": - md5_url = "/photon-db-planet-0.7OS-latest.tar.bz2.md5" - elif region_type == "continent": - md5_url = f"/{normalized}/photon-db-{normalized}-0.7OS-latest.tar.bz2.md5" - elif region_type == "sub-region": - continent = region_info["continent"] - md5_url = f"/{continent}/{normalized}/photon-db-{normalized}-0.7OS-latest.tar.bz2.md5" - else: - raise ValueError(f"Invalid region type: {region_type}") + if config.MD5_URL: + # MD5 URL provided, use it directly. + logging.info("Using custom MD5_URL for checksum: %s", sanitize_url(config.MD5_URL)) + download_url = config.MD5_URL else: - md5_url = "/photon-db-planet-0.7OS-latest.tar.bz2.md5" - - download_url = config.BASE_URL + md5_url + md5_path = get_index_url_path(config.REGION, config.INDEX_DB_VERSION, config.INDEX_FILE_EXTENSION) + ".md5" + download_url = config.BASE_URL + md5_path + logging.info("Using constructed URL for checksum: %s", download_url) - output_file = "photon-db-latest.tar.bz2.md5" + output_file = f"photon-db-latest.{config.INDEX_FILE_EXTENSION}.md5" output = os.path.join(config.TEMP_DIR, output_file) if not download_file(download_url, output): - raise Exception(f"Failed to download MD5 checksum from {download_url}") + raise Exception(f"Failed to download MD5 checksum from {sanitize_url(download_url)}") return output diff --git a/src/entrypoint.py b/src/entrypoint.py index 3f14a69..ac6e930 100644 --- a/src/entrypoint.py +++ b/src/entrypoint.py @@ -1,10 +1,12 @@ import os import sys +from src.check_remote import check_index_age from src.downloader import InsufficientSpaceError, parallel_update, sequential_update from src.utils import config from src.utils.logger import get_logger, setup_logging from src.utils.notify import send_notification +from src.utils.sanitize import sanitize_url from src.utils.validate_config import validate_config logger = get_logger() @@ -20,14 +22,20 @@ def main(): logger.info(f"REGION: {config.REGION}") logger.info(f"FORCE_UPDATE: {config.FORCE_UPDATE}") logger.info(f"DOWNLOAD_MAX_RETRIES: {config.DOWNLOAD_MAX_RETRIES}") - # TODO ## some people may use HTTP Basic Auth in URL. Only debug log for now, possbily think of solution later. Same goes for BASE_URL, though less likely - logger.debug(f"FILE_URL: {config.FILE_URL}") + logger.info(f"FILE_URL (sanitized): {sanitize_url(config.FILE_URL)}") + logger.info(f"MD5_URL (sanitized): {sanitize_url(config.MD5_URL)}") logger.info(f"PHOTON_PARAMS: {config.PHOTON_PARAMS}") + logger.info(f"ENABLE_METRICS: {config.ENABLE_METRICS}") logger.info(f"JAVA_PARAMS: {config.JAVA_PARAMS}") logger.info(f"LOG_LEVEL: {config.LOG_LEVEL}") logger.info(f"BASE_URL: {config.BASE_URL}") logger.info(f"SKIP_MD5_CHECK: {config.SKIP_MD5_CHECK}") logger.info(f"INITIAL_DOWNLOAD: {config.INITIAL_DOWNLOAD}") + logger.info(f"SKIP_SPACE_CHECK: {config.SKIP_SPACE_CHECK}") + if config.APPRISE_URLS: + logger.info("APPRISE_URLS: REDACTED") + else: + logger.info("APPRISE_URLS: UNSET") logger.info("=== END CONFIG VARIABLES ===") @@ -37,6 +45,9 @@ def main(): logger.error(f"Stopping due to invalid configuration.\n{e}") sys.exit(1) + if config.MIN_INDEX_DATE: + logger.info(f"MIN_INDEX_DATE: {config.MIN_INDEX_DATE}") + if config.FORCE_UPDATE: logger.info("Starting forced update") try: @@ -66,6 +77,18 @@ def main(): else: logger.info("Existing index found, skipping download") + if config.MIN_INDEX_DATE and check_index_age(): + logger.info("Index is older than minimum required date, starting sequential update") + try: + sequential_update() + except InsufficientSpaceError as e: + logger.error(f"Cannot proceed with minimum date update: {e}") + send_notification(f"Photon-Docker minimum date update failed: {e}") + sys.exit(75) + except Exception: + logger.error("Minimum date update failed") + raise + if __name__ == "__main__": setup_logging() diff --git a/src/process_manager.py b/src/process_manager.py index 97ab5a5..f6a811e 100644 --- a/src/process_manager.py +++ b/src/process_manager.py @@ -90,6 +90,7 @@ def start_photon(self, max_startup_retries=3): logger.info(f"Starting Photon (attempt {attempt + 1}/{max_startup_retries})...") self.state = AppState.RUNNING + enable_metrics = config.ENABLE_METRICS or "" java_params = config.JAVA_PARAMS or "" photon_params = config.PHOTON_PARAMS or "" @@ -106,7 +107,10 @@ def start_photon(self, max_startup_retries=3): if java_params: cmd.extend(shlex.split(java_params)) - cmd.extend(["-jar", "/photon/photon.jar", "-data-dir", config.DATA_DIR]) + cmd.extend(["-jar", "/photon/photon.jar", "serve", "-data-dir", config.DATA_DIR]) + + if enable_metrics: + cmd.extend(["-metrics-enable", "prometheus"]) if photon_params: cmd.extend(shlex.split(photon_params)) diff --git a/src/utils/config.py b/src/utils/config.py index 1cc0d28..9271357 100644 --- a/src/utils/config.py +++ b/src/utils/config.py @@ -7,17 +7,29 @@ FORCE_UPDATE = os.getenv("FORCE_UPDATE", "False").lower() in ("true", "1", "t") DOWNLOAD_MAX_RETRIES = os.getenv("DOWNLOAD_MAX_RETRIES", "3") FILE_URL = os.getenv("FILE_URL") +MD5_URL = os.getenv("MD5_URL") PHOTON_PARAMS = os.getenv("PHOTON_PARAMS") +ENABLE_METRICS = os.getenv("ENABLE_METRICS", "False").lower() in ("true", "1", "t") JAVA_PARAMS = os.getenv("JAVA_PARAMS") LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO") BASE_URL = os.getenv("BASE_URL", "https://r2.koalasec.org/public").rstrip("/") SKIP_MD5_CHECK = os.getenv("SKIP_MD5_CHECK", "False").lower() in ("true", "1", "t") INITIAL_DOWNLOAD = os.getenv("INITIAL_DOWNLOAD", "True").lower() in ("true", "1", "t") +SKIP_SPACE_CHECK = os.getenv("SKIP_SPACE_CHECK", "False").lower() in ("true", "1", "t") APPRISE_URLS = os.getenv("APPRISE_URLS") +MIN_INDEX_DATE = os.getenv("MIN_INDEX_DATE", "10.02.26") # APP CONFIG +INDEX_DB_VERSION = "1.0" +INDEX_FILE_EXTENSION = "tar.bz2" + PHOTON_DIR = "/photon" DATA_DIR = "/photon/data" PHOTON_DATA_DIR = os.path.join(DATA_DIR, "photon_data") TEMP_DIR = os.path.join(DATA_DIR, "temp") OS_NODE_DIR = os.path.join(PHOTON_DATA_DIR, "node_1") + +if FILE_URL: + UPDATE_STRATEGY = "DISABLED" + if not MD5_URL: + SKIP_MD5_CHECK = True diff --git a/src/utils/regions.py b/src/utils/regions.py index 8879761..0cc75f0 100644 --- a/src/utils/regions.py +++ b/src/utils/regions.py @@ -104,3 +104,33 @@ def get_region_info(region: str) -> dict | None: def is_valid_region(region: str) -> bool: return get_region_info(region) is not None + + +def get_index_filename(region_name: str, db_version: str, extension: str) -> str: + return f"photon-db-{region_name}-{db_version}-latest.{extension}" + + +def get_index_url_path(region: str | None, db_version: str, extension: str) -> str: + if region: + normalized = normalize_region(region) + if normalized is None: + raise ValueError(f"Unknown region: {region}") + + region_info = get_region_info(region) + if not region_info: + raise ValueError(f"Unknown region: {region}") + + region_type = region_info["type"] + filename = get_index_filename(normalized, db_version, extension) + + if region_type == "planet": + return f"/{filename}" + if region_type == "continent": + return f"/{normalized}/{filename}" + if region_type == "sub-region": + continent = region_info["continent"] + return f"/{continent}/{normalized}/{filename}" + + raise ValueError(f"Invalid region type: {region_type}") + + return f"/{get_index_filename('planet', db_version, extension)}" diff --git a/src/utils/sanitize.py b/src/utils/sanitize.py new file mode 100644 index 0000000..f2d9d99 --- /dev/null +++ b/src/utils/sanitize.py @@ -0,0 +1,10 @@ +from urllib.parse import urlparse + + +def sanitize_url(url: str | None) -> str | None: + if not url: + return url + parsed = urlparse(url) + if parsed.username or parsed.password: + return parsed._replace(netloc=f"***@{parsed.hostname}{':%d' % parsed.port if parsed.port else ''}").geturl() + return url