diff --git a/CHANGELOG.md b/CHANGELOG.md index 954eae53..52a7495d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,11 @@ The format is (loosely) based on [Keep a Changelog](http://keepachangelog.com/) ### Added +- If a validation error occurs in recursive mode only show the invalid items unless verbose mode is on. [#243](https://github.com/stac-utils/stac-validator/pull/243) +- Added ability to validate extensions of Collections [#243](https://github.com/stac-utils/stac-validator/pull/243) +- Improve error reporting through use of [best_match](https://python-jsonschema.readthedocs.io/en/stable/errors/#best-match-and-relevance) [#243](https://github.com/stac-utils/stac-validator/pull/243) +- Add `schema-map` option similar to [stac-node-validator SchemaMap](https://github.com/stac-utils/stac-node-validator?tab=readme-ov-file#usage) to allow validation against local copies of schemas. [#243](https://github.com/stac-utils/stac-validator/pull/243) + ## [v3.5.0] - 2025-01-10 ### Added diff --git a/README.md b/README.md index 761d5eda..11a11434 100644 --- a/README.md +++ b/README.md @@ -91,34 +91,38 @@ stac-validator --help Usage: stac-validator [OPTIONS] STAC_FILE Options: - --core Validate core stac object only without extensions. - --extensions Validate extensions only. - --links Additionally validate links. Only works with - default mode. - --assets Additionally validate assets. Only works with - default mode. - -c, --custom TEXT Validate against a custom schema (local filepath or - remote schema). - -r, --recursive Recursively validate all related stac objects. - -m, --max-depth INTEGER Maximum depth to traverse when recursing. Omit this - argument to get full recursion. Ignored if - `recursive == False`. - --collections Validate /collections response. - --item-collection Validate item collection response. Can be combined - with --pages. Defaults to one page. - --no-assets-urls Disables the opening of href links when validating - assets (enabled by default). - --header KEY VALUE HTTP header to include in the requests. Can be used - multiple times. - -p, --pages INTEGER Maximum number of pages to validate via --item- - collection. Defaults to one page. - -v, --verbose Enables verbose output for recursive mode. - --no_output Do not print output to console. - --log_file TEXT Save full recursive output to log file (local - filepath). - --version Show the version and exit. - --help Show this message and exit. -``` + --core Validate core stac object only without + extensions. + --extensions Validate extensions only. + --links Additionally validate links. Only works with + default mode. + --assets Additionally validate assets. Only works + with default mode. + -c, --custom TEXT Validate against a custom schema (local + filepath or remote schema). + -s, --schema-map ... + Schema path to replaced by (local) schema + path during validation. Can be used multiple + times. + -r, --recursive Recursively validate all related stac + objects. + -m, --max-depth INTEGER Maximum depth to traverse when recursing. + Omit this argument to get full recursion. + Ignored if `recursive == False`. + --collections Validate /collections response. + --item-collection Validate item collection response. Can be + combined with --pages. Defaults to one page. + --no-assets-urls Disables the opening of href links when + validating assets (enabled by default). + --header ... HTTP header to include in the requests. Can + be used multiple times. + -p, --pages INTEGER Maximum number of pages to validate via + --item-collection. Defaults to one page. + -v, --verbose Enables verbose output for recursive mode. + --no_output Do not print output to console. + --log_file TEXT Save full recursive output to log file + (local filepath). + --help Show this message and exit.``` --- @@ -340,3 +344,47 @@ stac-validator https://earth-search.aws.element84.com/v0/collections/sentinel-s2 ```bash stac-validator https://stac-catalog.eu/collections/sentinel-s2-l2a/items --header x-api-key $MY_API_KEY --header foo bar ``` + +**--schema-map** +Schema map allows stac-validator to replace a schema in a STAC json by a schema from another URL or local schema file. +This is especially useful when developing a schema and testing validation against your local copy of the schema. + +``` bash +stac-validator https://raw.githubusercontent.com/radiantearth/stac-spec/master/examples/extended-item.json --extensions --schema-map https://stac-extensions.github.io/projection/v1.0.0/schema.json stac-validator https://raw.githubusercontent.com/radiantearth/stac-spec/v1.0.0/examples/extended-item.json --extensions --schema-map https://stac-extensions.github.io/projection/v1.0.0/schema.json "tests/test_data/schema/v1.0.0/projection.json" +[ + { + "version": "1.0.0", + "path": "https://raw.githubusercontent.com/radiantearth/stac-spec/v1.0.0/examples/extended-item.json", + "schema": [ + "https://stac-extensions.github.io/eo/v1.0.0/schema.json", + "tests/test_data/schema/v1.0.0/projection.json", + "https://stac-extensions.github.io/scientific/v1.0.0/schema.json", + "https://stac-extensions.github.io/view/v1.0.0/schema.json", + "https://stac-extensions.github.io/remote-data/v1.0.0/schema.json" + ], + "valid_stac": true, + "asset_type": "ITEM", + "validation_method": "extensions" + } +] +``` + +This option is also capable of replacing URLs to subschemas: + +```bash +stac-validator tests/test_data/v100/extended-item-local.json --custom tests/test_data/schema/v1.0.0/item_with_unreachable_url.json --schema-map https://geojson-wrong-url.org/schema/Feature.json https://geojson.org/schema/Feature.json --schema-map https://geojson-wrong-url.org/schema/Geometry.json https://geojson.org/schema/Geometry.json +[ + { + "version": "1.0.0", + "path": "tests/test_data/v100/extended-item-local.json", + "schema": [ + "tests/test_data/schema/v1.0.0/item_with_unreachable_url.json" + ], + "valid_stac": true, + "asset_type": "ITEM", + "validation_method": "custom" + } +] +``` + + diff --git a/stac_validator/stac_validator.py b/stac_validator/stac_validator.py index 48c69be9..2384d106 100644 --- a/stac_validator/stac_validator.py +++ b/stac_validator/stac_validator.py @@ -1,6 +1,6 @@ import json import sys -from typing import Any, Dict, List +from typing import Any, Dict, List, Optional, Tuple import click # type: ignore @@ -87,6 +87,13 @@ def collections_summary(message: List[Dict[str, Any]]) -> None: default="", help="Validate against a custom schema (local filepath or remote schema).", ) +@click.option( + "--schema-map", + "-s", + type=(str, str), + multiple=True, + help="Schema path to replaced by (local) schema path during validation. Can be used multiple times.", +) @click.option( "--recursive", "-r", @@ -149,6 +156,7 @@ def main( links: bool, assets: bool, custom: str, + schema_map: List[Tuple], verbose: bool, no_output: bool, log_file: str, @@ -170,6 +178,7 @@ def main( links (bool): Whether to additionally validate links. Only works with default mode. assets (bool): Whether to additionally validate assets. Only works with default mode. custom (str): Path to a custom schema file to validate against. + schema_map (list(tuple)): List of tuples each having two elememts. First element is the schema path to be replaced by the path in the second element. verbose (bool): Whether to enable verbose output for recursive mode. no_output (bool): Whether to print output to console. log_file (str): Path to a log file to save full recursive output. @@ -182,6 +191,10 @@ def main( or 1 if it is invalid. """ valid = True + if schema_map == (): + schema_map_dict: Optional[Dict[str, str]] = None + else: + schema_map_dict = dict(schema_map) stac = StacValidate( stac_file=stac_file, collections=collections, @@ -196,6 +209,7 @@ def main( headers=dict(header), extensions=extensions, custom=custom, + schema_map=schema_map_dict, verbose=verbose, log=log_file, ) diff --git a/stac_validator/utilities.py b/stac_validator/utilities.py index a6b24e65..e07aeb84 100644 --- a/stac_validator/utilities.py +++ b/stac_validator/utilities.py @@ -5,12 +5,10 @@ from urllib.parse import urlparse from urllib.request import Request, urlopen -import jsonschema import requests # type: ignore from jsonschema import Draft202012Validator from referencing import Registry, Resource from referencing.jsonschema import DRAFT202012 -from referencing.retrieval import to_cached_resource from referencing.typing import URI NEW_VERSIONS = [ @@ -192,60 +190,58 @@ def link_request( initial_message["format_invalid"].append(link["href"]) -def fetch_remote_schema(uri: str) -> dict: +def cached_retrieve(uri: URI, schema_map: Optional[Dict] = None) -> Resource[Dict]: """ - Fetch a remote schema from a URI. + Retrieve and cache a remote schema. Args: - uri (str): The URI of the schema to fetch. + uri (str): The URI of the schema. + schema_map_keys: Override schema location to validate against local versions of a schema Returns: - dict: The fetched schema content as a dictionary. + dict: The parsed JSON dict of the schema. Raises: requests.RequestException: If the request to fetch the schema fails. + Exception: For any other unexpected errors. """ - response = requests.get(uri) - response.raise_for_status() - return response.json() + return Resource.from_contents( + fetch_schema_with_override(uri, schema_map=schema_map) + ) -@to_cached_resource() # type: ignore -def cached_retrieve(uri: URI) -> str: +def fetch_schema_with_override( + schema_path: str, schema_map: Optional[Dict] = None +) -> Dict: """ Retrieve and cache a remote schema. Args: - uri (str): The URI of the schema. + schema_path (str): Path or URI of the schema. + schema_map (dict): Override schema location to validate against local versions of a schema Returns: - str: The raw JSON string of the schema. - - Raises: - requests.RequestException: If the request to fetch the schema fails. - Exception: For any other unexpected errors. + dict: The parsed JSON dict of the schema. """ - try: - response = requests.get(uri, timeout=10) # Set a timeout for robustness - response.raise_for_status() # Raise an error for HTTP response codes >= 400 - return response.text - except requests.exceptions.RequestException as e: - raise requests.RequestException( - f"Failed to fetch schema from {uri}: {str(e)}" - ) from e - except Exception as e: - raise Exception( - f"Unexpected error while retrieving schema from {uri}: {str(e)}" - ) from e - - -def validate_with_ref_resolver(schema_path: str, content: dict) -> None: + + if schema_map: + if schema_path in schema_map: + schema_path = schema_map[schema_path] + + # Load the schema + return fetch_and_parse_schema(schema_path) + + +def validate_with_ref_resolver( + schema_path: str, content: Dict, schema_map: Optional[Dict] = None +) -> None: """ Validate a JSON document against a JSON Schema with dynamic reference resolution. Args: schema_path (str): Path or URI of the JSON Schema. content (dict): JSON content to validate. + schema_map (dict): Override schema location to validate against local versions of a schema Raises: jsonschema.exceptions.ValidationError: If validation fails. @@ -253,27 +249,16 @@ def validate_with_ref_resolver(schema_path: str, content: dict) -> None: FileNotFoundError: If a local schema file is not found. Exception: If any other error occurs during validation. """ - # Load the schema - if schema_path.startswith("http"): - schema = fetch_remote_schema(schema_path) - else: - try: - with open(schema_path, "r") as f: - schema = json.load(f) - except FileNotFoundError as e: - raise FileNotFoundError(f"Schema file not found: {schema_path}") from e - + schema = fetch_schema_with_override(schema_path, schema_map=schema_map) # Set up the resource and registry for schema resolution + cached_retrieve_with_schema_map = functools.partial( + cached_retrieve, schema_map=schema_map + ) resource: Resource = Resource(contents=schema, specification=DRAFT202012) # type: ignore - registry: Registry = Registry(retrieve=cached_retrieve).with_resource( # type: ignore + registry: Registry = Registry(retrieve=cached_retrieve_with_schema_map).with_resource( # type: ignore uri=schema_path, resource=resource ) # type: ignore # Validate the content against the schema - try: - validator = Draft202012Validator(schema, registry=registry) - validator.validate(content) - except jsonschema.exceptions.ValidationError as e: - raise jsonschema.exceptions.ValidationError(f"{e.message}") from e - except Exception as e: - raise Exception(f"Unexpected error during validation: {str(e)}") from e + validator = Draft202012Validator(schema, registry=registry) + validator.validate(content) diff --git a/stac_validator/validate.py b/stac_validator/validate.py index ba71a7f6..abf948e0 100644 --- a/stac_validator/validate.py +++ b/stac_validator/validate.py @@ -6,6 +6,7 @@ import click # type: ignore import jsonschema # type: ignore +from jsonschema.exceptions import best_match from requests import exceptions # type: ignore from .utilities import ( @@ -60,6 +61,7 @@ def __init__( headers: dict = {}, extensions: bool = False, custom: str = "", + schema_map: Optional[Dict[str, str]] = None, verbose: bool = False, log: str = "", ): @@ -68,7 +70,8 @@ def __init__( self.item_collection = item_collection self.pages = pages self.message: List = [] - self.schema = custom + self._schema = custom + self.schema_map = schema_map self.links = links self.assets = assets self.assets_open_urls = assets_open_urls @@ -85,6 +88,17 @@ def __init__( self.valid = False self.log = log + @property + def schema(self) -> str: + return self._schema + + @schema.setter + def schema(self, schema_path: str): + if self.schema_map: + if schema_path in self.schema_map: + schema_path = self.schema_map[schema_path] + self._schema = schema_path + def create_err_msg(self, err_type: str, err_msg: str) -> Dict: """ Create a standardized error message dictionary and mark validation as failed. @@ -197,14 +211,20 @@ def custom_validator(self) -> None: None """ if is_valid_url(self.schema): - validate_with_ref_resolver(self.schema, self.stac_content) + validate_with_ref_resolver( + self.schema, self.stac_content, schema_map=self.schema_map + ) elif os.path.exists(self.schema): - validate_with_ref_resolver(self.schema, self.stac_content) + validate_with_ref_resolver( + self.schema, self.stac_content, schema_map=self.schema_map + ) else: file_directory = os.path.dirname(os.path.abspath(str(self.stac_file))) - self.schema = os.path.join(file_directory, self.schema) - self.schema = os.path.abspath(os.path.realpath(self.schema)) - validate_with_ref_resolver(self.schema, self.stac_content) + schema = os.path.join(file_directory, self.schema) + schema = os.path.abspath(os.path.realpath(schema)) + validate_with_ref_resolver( + schema, self.stac_content, schema_map=self.schema_map + ) def core_validator(self, stac_type: str) -> None: """ @@ -215,7 +235,9 @@ def core_validator(self, stac_type: str) -> None: """ stac_type = stac_type.lower() self.schema = set_schema_addr(self.version, stac_type) - validate_with_ref_resolver(self.schema, self.stac_content) + validate_with_ref_resolver( + self.schema, self.stac_content, schema_map=self.schema_map + ) def extensions_validator(self, stac_type: str) -> Dict: """ @@ -231,49 +253,58 @@ def extensions_validator(self, stac_type: str) -> Dict: message["schema"] = [] valid = True - if stac_type == "ITEM": - try: - if "stac_extensions" in self.stac_content: - # Handle legacy "proj" to "projection" mapping - if "proj" in self.stac_content["stac_extensions"]: - index = self.stac_content["stac_extensions"].index("proj") - self.stac_content["stac_extensions"][index] = "projection" - - schemas = self.stac_content["stac_extensions"] - for extension in schemas: - if not (is_valid_url(extension) or extension.endswith(".json")): - if self.version == "1.0.0-beta.2": - self.stac_content["stac_version"] = "1.0.0-beta.1" - self.version = self.stac_content["stac_version"] - extension = ( - f"https://cdn.staclint.com/v{self.version}/extension/" - f"{extension}.json" - ) - self.schema = extension - self.custom_validator() - message["schema"].append(extension) + try: + if ( + "stac_extensions" in self.stac_content + and len(self.stac_content["stac_extensions"]) > 0 + ): + # Handle legacy "proj" to "projection" mapping + if "proj" in self.stac_content["stac_extensions"]: + index = self.stac_content["stac_extensions"].index("proj") + self.stac_content["stac_extensions"][index] = "projection" + + schemas = self.stac_content["stac_extensions"] + for extension in schemas: + if not (is_valid_url(extension) or extension.endswith(".json")): + if self.version == "1.0.0-beta.2": + self.stac_content["stac_version"] = "1.0.0-beta.1" + self.version = self.stac_content["stac_version"] + extension = ( + f"https://cdn.staclint.com/v{self.version}/extension/" + f"{extension}.json" + ) + self.schema = extension + self.custom_validator() + message["schema"].append(self.schema) + else: + self.core_validator(stac_type) + message["schema"] = [self.schema] - except jsonschema.exceptions.ValidationError as e: - valid = False - if e.absolute_path: - err_msg = ( - f"{e.message}. Error is in " - f"{' -> '.join(map(str, e.absolute_path))}" - ) - else: - err_msg = f"{e.message}" - message = self.create_err_msg("JSONSchemaValidationError", err_msg) - return message + except jsonschema.exceptions.ValidationError as e: + if self.recursive: + raise + if e.context: + e = best_match(e.context) # type: ignore + valid = False + if e.absolute_path: + err_msg = ( + f"{e.message}. Error is in " + f"{' -> '.join(map(str, e.absolute_path))}" + ) + else: + err_msg = f"{e.message}" + message = self.create_err_msg("JSONSchemaValidationError", err_msg) + return message - except Exception as e: - valid = False - err_msg = f"{e}. Error in Extensions." - return self.create_err_msg("Exception", err_msg) - else: - self.core_validator(stac_type) - message["schema"] = [self.schema] + except Exception as e: + if self.recursive: + raise + valid = False + err_msg = f"{e}. Error in Extensions." + return self.create_err_msg("Exception", err_msg) self.valid = valid + message["valid_stac"] = valid return message def default_validator(self, stac_type: str) -> Dict: @@ -292,14 +323,16 @@ def default_validator(self, stac_type: str) -> Dict: # Validate core self.core_validator(stac_type) core_schema = self.schema - message["schema"].append(core_schema) + if core_schema not in message["schema"]: + message["schema"].append(core_schema) stac_upper = stac_type.upper() # Validate extensions if ITEM - if stac_upper == "ITEM": + if stac_upper == "ITEM" or stac_upper == "COLLECTION": message = self.extensions_validator(stac_upper) message["validation_method"] = "default" - message["schema"].append(core_schema) + if core_schema not in message["schema"]: + message["schema"].append(core_schema) # Optionally validate links if self.links: @@ -323,14 +356,19 @@ def recursive_validator(self, stac_type: str) -> bool: Returns: bool: True if all validations are successful, False otherwise. """ + valid = False if not self.skip_val: self.schema = set_schema_addr(self.version, stac_type.lower()) message = self.create_message(stac_type, "recursive") message["valid_stac"] = False try: - _ = self.default_validator(stac_type) + msg = self.default_validator(stac_type) + message["schema"] = msg["schema"] + except jsonschema.exceptions.ValidationError as e: + if e.context: + e = best_match(e.context) # type: ignore if e.absolute_path: err_msg = ( f"{e.message}. Error is in " @@ -344,9 +382,10 @@ def recursive_validator(self, stac_type: str) -> bool: self.message.append(message) if self.verbose: click.echo(json.dumps(message, indent=4)) - return False + return valid - message["valid_stac"] = True + valid = True + message["valid_stac"] = valid self.message.append(message) if self.verbose: click.echo(json.dumps(message, indent=4)) @@ -357,6 +396,7 @@ def recursive_validator(self, stac_type: str) -> bool: base_url = self.stac_file + child_validity = [] for link in self.stac_content["links"]: if link["rel"] in ("child", "item"): address = link["href"] @@ -377,7 +417,9 @@ def recursive_validator(self, stac_type: str) -> bool: stac_type = get_stac_type(self.stac_content).lower() if link["rel"] == "child": - self.recursive_validator(stac_type) + if not self.skip_val: + valid_child = self.recursive_validator(stac_type) + child_validity.append(valid_child) if link["rel"] == "item": self.schema = set_schema_addr(self.version, stac_type.lower()) @@ -396,8 +438,11 @@ def recursive_validator(self, stac_type: str) -> bool: self.message.append(message) if not self.max_depth or self.max_depth < 5: self.message.append(message) - - return True + if all(child_validity): + valid = True + else: + valid = False + return valid def validate_dict(self, stac_content: Dict) -> bool: """ @@ -558,4 +603,12 @@ def run(self) -> bool: with open(self.log, "w") as f: f.write(json.dumps(self.message, indent=4)) + # filter message to only show errors if valid is False unless verbose mode is on + if self.recursive and not self.valid and not self.verbose: + filtered_messages = [] + for message in self.message: + if not message["valid_stac"]: + filtered_messages.append(message) + self.message = filtered_messages + return self.valid diff --git a/tests/test_assets.py b/tests/test_assets.py index d8c47003..8ac2f82e 100644 --- a/tests/test_assets.py +++ b/tests/test_assets.py @@ -22,7 +22,7 @@ def test_assets_v090(): ], "valid_stac": False, "error_type": "JSONSchemaValidationError", - "error_message": "-0.00751271 is less than the minimum of 0", + "error_message": "-0.00751271 is less than the minimum of 0. Error is in properties -> view:off_nadir", "validation_method": "default", "assets_validated": { "format_valid": [ diff --git a/tests/test_data/1rc2/extensions-collection/collection.json b/tests/test_data/1rc2/extensions-collection/collection.json index a79fabe3..9b187535 100644 --- a/tests/test_data/1rc2/extensions-collection/collection.json +++ b/tests/test_data/1rc2/extensions-collection/collection.json @@ -25,7 +25,9 @@ "type": "application/json" } ], - "stac_extensions": [], + "stac_extensions": [ + "https://stac-extensions.github.io/projection/v1.1.0/schema.json" + ], "title": "Collection of Extension Items", "keywords": [ "examples", diff --git a/tests/test_data/schema/v1.0.0/item_with_unreachable_url.json b/tests/test_data/schema/v1.0.0/item_with_unreachable_url.json new file mode 100644 index 00000000..2ed7e46c --- /dev/null +++ b/tests/test_data/schema/v1.0.0/item_with_unreachable_url.json @@ -0,0 +1,272 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/item.json#", + "title": "STAC Item", + "type": "object", + "description": "This object represents the metadata for an item in a SpatioTemporal Asset Catalog.", + "allOf": [ + { + "$ref": "#/definitions/core" + } + ], + "definitions": { + "common_metadata": { + "allOf": [ + { + "$ref": "basics.json" + }, + { + "$ref": "datetime.json" + }, + { + "$ref": "instrument.json" + }, + { + "$ref": "licensing.json" + }, + { + "$ref": "provider.json" + } + ] + }, + "core": { + "allOf": [ + { + "$ref": "https://geojson-wrong-url.org/schema/Feature.json" + }, + { + "oneOf": [ + { + "type": "object", + "required": [ + "geometry", + "bbox" + ], + "properties": { + "geometry": { + "$ref": "https://geojson-wrong-url.org/schema/Geometry.json" + }, + "bbox": { + "type": "array", + "oneOf": [ + { + "minItems": 4, + "maxItems": 4 + }, + { + "minItems": 6, + "maxItems": 6 + } + ], + "items": { + "type": "number" + } + } + } + }, + { + "type": "object", + "required": [ + "geometry" + ], + "properties": { + "geometry": { + "type": "null" + }, + "bbox": { + "not": {} + } + } + } + ] + }, + { + "type": "object", + "required": [ + "stac_version", + "id", + "links", + "assets", + "properties" + ], + "properties": { + "stac_version": { + "title": "STAC version", + "type": "string", + "const": "1.0.0" + }, + "stac_extensions": { + "title": "STAC extensions", + "type": "array", + "uniqueItems": true, + "items": { + "title": "Reference to a JSON Schema", + "type": "string", + "format": "iri" + } + }, + "id": { + "title": "Provider ID", + "description": "Provider item ID", + "type": "string", + "minLength": 1 + }, + "links": { + "title": "Item links", + "description": "Links to item relations", + "type": "array", + "items": { + "$ref": "#/definitions/link" + } + }, + "assets": { + "$ref": "#/definitions/assets" + }, + "properties": { + "allOf": [ + { + "$ref": "#/definitions/common_metadata" + }, + { + "anyOf": [ + { + "required": [ + "datetime" + ], + "properties": { + "datetime": { + "not": { + "type": "null" + } + } + } + }, + { + "required": [ + "datetime", + "start_datetime", + "end_datetime" + ] + } + ] + } + ] + } + }, + "if": { + "properties": { + "links": { + "contains": { + "required": [ + "rel" + ], + "properties": { + "rel": { + "const": "collection" + } + } + } + } + } + }, + "then": { + "required": [ + "collection" + ], + "properties": { + "collection": { + "title": "Collection ID", + "description": "The ID of the STAC Collection this Item references to.", + "type": "string", + "minLength": 1 + } + } + }, + "else": { + "properties": { + "collection": { + "not": {} + } + } + } + } + ] + }, + "link": { + "type": "object", + "required": [ + "rel", + "href" + ], + "properties": { + "href": { + "title": "Link reference", + "type": "string", + "format": "iri-reference", + "minLength": 1 + }, + "rel": { + "title": "Link relation type", + "type": "string", + "minLength": 1 + }, + "type": { + "title": "Link type", + "type": "string" + }, + "title": { + "title": "Link title", + "type": "string" + } + } + }, + "assets": { + "title": "Asset links", + "description": "Links to assets", + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/asset" + } + }, + "asset": { + "allOf": [ + { + "type": "object", + "required": [ + "href" + ], + "properties": { + "href": { + "title": "Asset reference", + "type": "string", + "format": "iri-reference", + "minLength": 1 + }, + "title": { + "title": "Asset title", + "type": "string" + }, + "description": { + "title": "Asset description", + "type": "string" + }, + "type": { + "title": "Asset type", + "type": "string" + }, + "roles": { + "title": "Asset roles", + "type": "array", + "items": { + "type": "string" + } + } + } + }, + { + "$ref": "#/definitions/common_metadata" + } + ] + } + } + } diff --git a/tests/test_data/v100/catalog-with-bad-child-collection.json b/tests/test_data/v100/catalog-with-bad-child-collection.json new file mode 100644 index 00000000..843f02cf --- /dev/null +++ b/tests/test_data/v100/catalog-with-bad-child-collection.json @@ -0,0 +1,37 @@ +{ + "id": "examples", + "type": "Catalog", + "title": "Example Catalog", + "stac_version": "1.0.0", + "description": "This catalog is a simple demonstration of an example catalog that is used to organize a hierarchy of collections and their items. It contains one bad child collection", + "links": [ + { + "rel": "root", + "href": "./catalog.json", + "type": "application/json" + }, + { + "rel": "child", + "href": "./collection-only/bad-collection.json", + "type": "application/json", + "title": "Collection with no items with missing id" + }, + { + "rel": "child", + "href": "./collection-only/collection.json", + "type": "application/json", + "title": "Collection with no items (standalone)" + }, + { + "rel": "child", + "href": "./collection-only/collection-with-schemas.json", + "type": "application/json", + "title": "Collection with no items (standalone with JSON Schemas)" + }, + { + "rel": "self", + "href": "https://raw.githubusercontent.com/radiantearth/stac-spec/v1.0.0/examples/catalog.json", + "type": "application/json" + } + ] +} diff --git a/tests/test_data/v100/collection-only/bad-collection.json b/tests/test_data/v100/collection-only/bad-collection.json new file mode 100644 index 00000000..78bc3d33 --- /dev/null +++ b/tests/test_data/v100/collection-only/bad-collection.json @@ -0,0 +1,232 @@ +{ + "type": "Collection", + "stac_version": "1.0.0", + "stac_extensions": [ + "https://stac-extensions.github.io/eo/v1.0.0/schema.json", + "https://stac-extensions.github.io/projection/v1.0.0/schema.json", + "https://stac-extensions.github.io/view/v1.0.0/schema.json" + ], + "title": "Sentinel-2 MSI: MultiSpectral Instrument, Level-1C", + "description": "Sentinel-2 is a wide-swath, high-resolution, multi-spectral\nimaging mission supporting Copernicus Land Monitoring studies,\nincluding the monitoring of vegetation, soil and water cover,\nas well as observation of inland waterways and coastal areas.\n\nThe Sentinel-2 data contain 13 UINT16 spectral bands representing\nTOA reflectance scaled by 10000. See the [Sentinel-2 User Handbook](https://sentinel.esa.int/documents/247904/685211/Sentinel-2_User_Handbook)\nfor details. In addition, three QA bands are present where one\n(QA60) is a bitmask band with cloud mask information. For more\ndetails, [see the full explanation of how cloud masks are computed.](https://sentinel.esa.int/web/sentinel/technical-guides/sentinel-2-msi/level-1c/cloud-masks)\n\nEach Sentinel-2 product (zip archive) may contain multiple\ngranules. Each granule becomes a separate Earth Engine asset.\nEE asset ids for Sentinel-2 assets have the following format:\nCOPERNICUS/S2/20151128T002653_20151128T102149_T56MNN. Here the\nfirst numeric part represents the sensing date and time, the\nsecond numeric part represents the product generation date and\ntime, and the final 6-character string is a unique granule identifier\nindicating its UTM grid reference (see [MGRS](https://en.wikipedia.org/wiki/Military_Grid_Reference_System)).\n\nFor more details on Sentinel-2 radiometric resoltuon, [see this page](https://earth.esa.int/web/sentinel/user-guides/sentinel-2-msi/resolutions/radiometric).\n", + "license": "proprietary", + "keywords": [ + "copernicus", + "esa", + "eu", + "msi", + "radiance", + "sentinel" + ], + "providers": [ + { + "name": "European Union/ESA/Copernicus", + "roles": [ + "producer", + "licensor" + ], + "url": "https://sentinel.esa.int/web/sentinel/user-guides/sentinel-2-msi" + } + ], + "extent": { + "spatial": { + "bbox": [ + [ + -180, + -56, + 180, + 83 + ] + ] + }, + "temporal": { + "interval": [ + [ + "2015-06-23T00:00:00Z", + null + ] + ] + } + }, + "assets": { + "metadata_iso_19139": { + "roles": [ + "metadata", + "iso-19139" + ], + "href": "https://storage.googleapis.com/open-cogs/stac-examples/sentinel-2-iso-19139.xml", + "title": "ISO 19139 metadata", + "type": "application/vnd.iso.19139+xml" + } + }, + "summaries": { + "datetime": { + "minimum": "2015-06-23T00:00:00Z", + "maximum": "2019-07-10T13:44:56Z" + }, + "platform": [ + "sentinel-2a", + "sentinel-2b" + ], + "constellation": [ + "sentinel-2" + ], + "instruments": [ + "msi" + ], + "view:off_nadir": { + "minimum": 0, + "maximum": 100 + }, + "view:sun_elevation": { + "minimum": 6.78, + "maximum": 89.9 + }, + "gsd": [ + 10, + 30, + 60 + ], + "proj:epsg": [ + 32601, + 32602, + 32603, + 32604, + 32605, + 32606, + 32607, + 32608, + 32609, + 32610, + 32611, + 32612, + 32613, + 32614, + 32615, + 32616, + 32617, + 32618, + 32619, + 32620, + 32621, + 32622, + 32623, + 32624, + 32625, + 32626, + 32627, + 32628, + 32629, + 32630, + 32631, + 32632, + 32633, + 32634, + 32635, + 32636, + 32637, + 32638, + 32639, + 32640, + 32641, + 32642, + 32643, + 32644, + 32645, + 32646, + 32647, + 32648, + 32649, + 32650, + 32651, + 32652, + 32653, + 32654, + 32655, + 32656, + 32657, + 32658, + 32659, + 32660 + ], + "eo:bands": [ + { + "name": "B1", + "common_name": "coastal", + "center_wavelength": 4.439 + }, + { + "name": "B2", + "common_name": "blue", + "center_wavelength": 4.966 + }, + { + "name": "B3", + "common_name": "green", + "center_wavelength": 5.6 + }, + { + "name": "B4", + "common_name": "red", + "center_wavelength": 6.645 + }, + { + "name": "B5", + "center_wavelength": 7.039 + }, + { + "name": "B6", + "center_wavelength": 7.402 + }, + { + "name": "B7", + "center_wavelength": 7.825 + }, + { + "name": "B8", + "common_name": "nir", + "center_wavelength": 8.351 + }, + { + "name": "B8A", + "center_wavelength": 8.648 + }, + { + "name": "B9", + "center_wavelength": 9.45 + }, + { + "name": "B10", + "center_wavelength": 1.3735 + }, + { + "name": "B11", + "common_name": "swir16", + "center_wavelength": 1.6137 + }, + { + "name": "B12", + "common_name": "swir22", + "center_wavelength": 2.2024 + } + ] + }, + "links": [ + { + "rel": "parent", + "href": "../catalog.json", + "type": "application/json", + "title": "Example Catalog" + }, + { + "rel": "root", + "href": "../catalog.json", + "type": "application/json", + "title": "Example Catalog" + }, + { + "rel": "license", + "href": "https://scihub.copernicus.eu/twiki/pub/SciHubWebPortal/TermsConditions/Sentinel_Data_Terms_and_Conditions.pdf", + "title": "Legal notice on the use of Copernicus Sentinel Data and Service Information" + } + ] +} diff --git a/tests/test_default.py b/tests/test_default.py index 92075576..4b7316a7 100644 --- a/tests/test_default.py +++ b/tests/test_default.py @@ -158,3 +158,24 @@ def test_default_catalog_v1rc2(): "valid_stac": True, } ] + + +def test_default_collection_validates_extensions(): + stac_file = "tests/test_data/v100/collection.json" + stac = stac_validator.StacValidate(stac_file) + stac.run() + assert stac.message == [ + { + "version": "1.0.0", + "path": "tests/test_data/v100/collection.json", + "schema": [ + "https://stac-extensions.github.io/eo/v1.0.0/schema.json", + "https://stac-extensions.github.io/projection/v1.0.0/schema.json", + "https://stac-extensions.github.io/view/v1.0.0/schema.json", + "https://schemas.stacspec.org/v1.0.0/collection-spec/json-schema/collection.json", + ], + "valid_stac": True, + "asset_type": "COLLECTION", + "validation_method": "default", + } + ] diff --git a/tests/test_extensions.py b/tests/test_extensions.py index 3ee27acc..4440cfd8 100644 --- a/tests/test_extensions.py +++ b/tests/test_extensions.py @@ -52,9 +52,7 @@ def test_v1beta1(): { "version": "1.0.0-beta.1", "path": "tests/test_data/1beta1/sentinel2.json", - "schema": [ - "https://cdn.staclint.com/v1.0.0-beta.1/collection.json", - ], + "schema": ["https://cdn.staclint.com/v1.0.0-beta.1/collection.json"], "asset_type": "COLLECTION", "validation_method": "extensions", "valid_stac": True, @@ -72,7 +70,9 @@ def test_no_extensions_v1beta2(): "asset_type": "ITEM", "version": "1.0.0-beta.2", "validation_method": "extensions", - "schema": [], + "schema": [ + "https://schemas.stacspec.org/v1.0.0-beta.2/item-spec/json-schema/item.json" + ], "valid_stac": True, } ] @@ -151,7 +151,9 @@ def test_local_v1rc2(): { "version": "1.0.0-rc.2", "path": "tests/test_data/1rc2/extensions-collection/./proj-example/proj-example.json", - "schema": [], + "schema": [ + "https://schemas.stacspec.org/v1.0.0-rc.2/item-spec/json-schema/item.json" + ], "valid_stac": True, "asset_type": "ITEM", "validation_method": "extensions", @@ -213,3 +215,58 @@ def test_item_v100_local_schema(): "validation_method": "extensions", } ] + + +def test_item_v100_override_schema_with_schema_map(): + stac_file = "tests/test_data/v100/extended-item.json" + stac = stac_validator.StacValidate( + stac_file, + extensions=True, + schema_map={ + "https://stac-extensions.github.io/projection/v1.0.0/schema.json": "../schema/v1.0.0/projection.json" + }, + ) + stac.run() + assert stac.message == [ + { + "version": "1.0.0", + "path": "tests/test_data/v100/extended-item.json", + "schema": [ + "https://stac-extensions.github.io/eo/v1.0.0/schema.json", + "../schema/v1.0.0/projection.json", + "https://stac-extensions.github.io/scientific/v1.0.0/schema.json", + "https://stac-extensions.github.io/view/v1.0.0/schema.json", + "https://stac-extensions.github.io/remote-data/v1.0.0/schema.json", + ], + "valid_stac": True, + "asset_type": "ITEM", + "validation_method": "extensions", + } + ] + + +def test_item_v100_local_schema_unreachable_url_schema_map_override(): + """ + This tests that references in schemas are also replaced by the schema_map + """ + stac_file = "tests/test_data/v100/extended-item-local.json" + schema = "tests/test_data/schema/v1.0.0/item_with_unreachable_url.json" + stac = stac_validator.StacValidate( + stac_file, + custom=schema, + schema_map={ + "https://geojson-wrong-url.org/schema/Feature.json": "https://geojson.org/schema/Feature.json", + "https://geojson-wrong-url.org/schema/Geometry.json": "https://geojson.org/schema/Geometry.json", + }, + ) + stac.run() + assert stac.message == [ + { + "version": "1.0.0", + "path": "tests/test_data/v100/extended-item-local.json", + "schema": ["tests/test_data/schema/v1.0.0/item_with_unreachable_url.json"], + "valid_stac": True, + "asset_type": "ITEM", + "validation_method": "custom", + } + ] diff --git a/tests/test_links.py b/tests/test_links.py index 6e97e2c1..1ca99fcd 100644 --- a/tests/test_links.py +++ b/tests/test_links.py @@ -20,7 +20,7 @@ def test_poorly_formatted_v090(): ], "valid_stac": False, "error_type": "JSONSchemaValidationError", - "error_message": "-0.00751271 is less than the minimum of 0", + "error_message": "-0.00751271 is less than the minimum of 0. Error is in properties -> view:off_nadir", "validation_method": "default", "links_validated": { "format_valid": [ diff --git a/tests/test_recursion.py b/tests/test_recursion.py index ba49c4a1..d4f6810f 100644 --- a/tests/test_recursion.py +++ b/tests/test_recursion.py @@ -10,6 +10,7 @@ def test_recursive_lvl_3_v070(): stac_file = "https://radarstac.s3.amazonaws.com/stac/catalog.json" stac = stac_validator.StacValidate(stac_file, recursive=True, max_depth=4) stac.run() + assert stac.valid assert stac.message == [ { "version": "0.7.0", @@ -278,7 +279,8 @@ def test_recursion_collection_local_2_v1rc2(): "version": "1.0.0-rc.2", "path": "tests/test_data/1rc2/extensions-collection/collection.json", "schema": [ - "https://schemas.stacspec.org/v1.0.0-rc.2/collection-spec/json-schema/collection.json" + "https://stac-extensions.github.io/projection/v1.1.0/schema.json", + "https://schemas.stacspec.org/v1.0.0-rc.2/collection-spec/json-schema/collection.json", ], "asset_type": "COLLECTION", "validation_method": "recursive", @@ -308,6 +310,27 @@ def test_recursion_with_bad_item(): stac_file = "tests/test_data/v100/catalog-with-bad-item.json" stac = stac_validator.StacValidate(stac_file, recursive=True) stac.run() + assert not stac.valid + assert len(stac.message) == 1 + assert stac.message == [ + { + "version": "1.0.0", + "path": "tests/test_data/v100/./bad-item.json", + "schema": [ + "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/item.json" + ], + "valid_stac": False, + "error_type": "JSONSchemaValidationError", + "error_message": "'id' is a required property", + }, + ] + + +def test_recursion_with_bad_item_verbose(): + stac_file = "tests/test_data/v100/catalog-with-bad-item.json" + stac = stac_validator.StacValidate(stac_file, recursive=True, verbose=True) + stac.run() + assert not stac.valid assert len(stac.message) == 2 assert stac.message == [ { @@ -333,6 +356,31 @@ def test_recursion_with_bad_item(): ] +def test_recursion_with_bad_child_collection(): + # It is important here that there is a second good child in the collection + # since a previous bug did not correctly set the valid variable if the last + # child passed validation + stac_file = "tests/test_data/v100/catalog-with-bad-child-collection.json" + stac = stac_validator.StacValidate(stac_file, recursive=True) + stac.run() + assert not stac.valid + assert len(stac.message) == 1 + assert stac.message == [ + { + "version": "1.0.0", + "path": "tests/test_data/v100/./collection-only/bad-collection.json", + "schema": [ + "https://schemas.stacspec.org/v1.0.0/collection-spec/json-schema/collection.json" + ], + "valid_stac": False, + "asset_type": "COLLECTION", + "validation_method": "recursive", + "error_type": "JSONSchemaValidationError", + "error_message": "'id' is a required property", + } + ] + + def test_recursion_with_missing_collection_link(): stac_file = "tests/test_data/v100/item-without-collection-link.json" stac = stac_validator.StacValidate(stac_file, recursive=True) @@ -350,6 +398,6 @@ def test_recursion_with_missing_collection_link(): "valid_stac": False, "validation_method": "recursive", "error_type": "JSONSchemaValidationError", - "error_message": "'simple-collection' should not be valid under {}", + "error_message": "'simple-collection' should not be valid under {}. Error is in collection", }, ] diff --git a/tests/test_validate_collections.py b/tests/test_validate_collections.py index fe7ba19e..20b7f756 100644 --- a/tests/test_validate_collections.py +++ b/tests/test_validate_collections.py @@ -16,7 +16,8 @@ def test_validate_collections_remote(): "version": "1.0.0-beta.2", "path": "https://earth-search.aws.element84.com/v0/collections", "schema": [ - "https://schemas.stacspec.org/v1.0.0-beta.2/collection-spec/json-schema/collection.json" + "https://cdn.staclint.com/v1.0.0-beta.1/extension/item-assets.json", + "https://schemas.stacspec.org/v1.0.0-beta.2/collection-spec/json-schema/collection.json", ], "valid_stac": True, "asset_type": "COLLECTION", @@ -26,7 +27,8 @@ def test_validate_collections_remote(): "version": "1.0.0-beta.2", "path": "https://earth-search.aws.element84.com/v0/collections", "schema": [ - "https://schemas.stacspec.org/v1.0.0-beta.2/collection-spec/json-schema/collection.json" + "https://cdn.staclint.com/v1.0.0-beta.1/extension/item-assets.json", + "https://schemas.stacspec.org/v1.0.0-beta.2/collection-spec/json-schema/collection.json", ], "valid_stac": True, "asset_type": "COLLECTION", @@ -36,7 +38,8 @@ def test_validate_collections_remote(): "version": "1.0.0-beta.2", "path": "https://earth-search.aws.element84.com/v0/collections", "schema": [ - "https://schemas.stacspec.org/v1.0.0-beta.2/collection-spec/json-schema/collection.json" + "https://cdn.staclint.com/v1.0.0-beta.1/extension/item-assets.json", + "https://schemas.stacspec.org/v1.0.0-beta.2/collection-spec/json-schema/collection.json", ], "valid_stac": True, "asset_type": "COLLECTION",