Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,5 @@ src/pypgstac/target
src/pypgstac/python/pypgstac/*.so
.vscode
.ipynb_checkpoints
.venv
.pytest_cache
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,13 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](http://keepachangelog.com/)
and this project adheres to [Semantic Versioning](http://semver.org/).

## [Unreleased]

### Added

- Add `load_queryables` function to pypgstac for loading queryables from a JSON file
- Add support for specifying collection IDs when loading queryables

## [v0.9.5]

### Changed
Expand Down
48 changes: 48 additions & 0 deletions docs/src/pypgstac.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,54 @@ To upsert any records, adding anything new and replacing anything with the same
pypgstac load items --method upsert
```

### Loading Queryables

Queryables are a mechanism that allows clients to discover what terms are available for use when writing filter expressions in a STAC API. The Filter Extension enables clients to filter collections and items based on their properties using the Common Query Language (CQL2).

To load queryables from a JSON file:

```
pypgstac load_queryables queryables.json
```

To load queryables for specific collections:

```
pypgstac load_queryables queryables.json --collection_ids collection1,collection2
```

The JSON file should follow the queryables schema as described in the [STAC API - Filter Extension](https://github.com/stac-api-extensions/filter#queryables). Here's an example:

```json
{
"$schema": "https://json-schema.org/draft/2019-09/schema",
"$id": "https://example.com/stac/queryables",
"type": "object",
"title": "Queryables for Example STAC API",
"description": "Queryable names for the Example STAC API",
"properties": {
"id": {
"description": "Item identifier",
"type": "string"
},
"datetime": {
"description": "Datetime",
"type": "string",
"format": "date-time"
},
"eo:cloud_cover": {
"description": "Cloud cover percentage",
"type": "number",
"minimum": 0,
"maximum": 100
}
},
"additionalProperties": true
}
```

The command will extract the properties from the JSON file and create queryables in the database. It will also determine the appropriate property wrapper based on the type of each property and create the necessary indexes.

### Automated Collection Extent Updates

By setting `pgstac.update_collection_extent` to `true`, a trigger is enabled to automatically adjust the spatial and temporal extents in collections when new items are ingested. This feature, while helpful, may increase overhead within data load transactions. To alleviate performance impact, combining this setting with `pgstac.use_queue` is beneficial. This approach necessitates a separate process, such as a scheduled task via the `pg_cron` extension, to periodically invoke `CALL run_queued_queries();`. Such asynchronous processing ensures efficient transactional performance and updated collection extents.
Expand Down
47 changes: 47 additions & 0 deletions src/pypgstac/examples/load_queryables_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#!/usr/bin/env python
"""
Example script demonstrating how to load queryables into PgSTAC.

This script shows how to use the load_queryables function both from the command line
and programmatically.
"""

import sys
from pathlib import Path

# Add the parent directory to the path so we can import pypgstac
sys.path.append(str(Path(__file__).parent.parent))

from pypgstac.pypgstac import PgstacCLI


def load_for_specific_collections(cli, sample_file, collection_ids):
"""Load queryables for specific collections."""
cli.load_queryables(str(sample_file), collection_ids=collection_ids)


def main():
"""Demonstrate loading queryables into PgSTAC."""
# Get the path to the sample queryables file
sample_file = Path(__file__).parent / "sample_queryables.json"

# Check if the file exists
if not sample_file.exists():
return


# Create a PgstacCLI instance
# This will use the standard PostgreSQL environment variables for connection
cli = PgstacCLI()

# Load queryables for all collections
cli.load_queryables(str(sample_file))

# Example of loading for specific collections
# Uncomment the following line to test with specific collections
load_for_specific_collections(cli, sample_file, "landsat-8,sentinel-2")



if __name__ == "__main__":
main()
79 changes: 79 additions & 0 deletions src/pypgstac/examples/sample_queryables.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
{
"$schema": "https://json-schema.org/draft/2019-09/schema",
"$id": "https://example.com/stac/queryables",
"type": "object",
"title": "Queryables for Example STAC API",
"description": "Queryable names for the Example STAC API",
"properties": {
"id": {
"description": "Item identifier",
"type": "string"
},
"collection": {
"description": "Collection identifier",
"type": "string"
},
"datetime": {
"description": "Datetime",
"type": "string",
"format": "date-time"
},
"geometry": {
"description": "Geometry",
"type": "object"
},
"eo:cloud_cover": {
"description": "Cloud cover percentage",
"type": "number",
"minimum": 0,
"maximum": 100
},
"platform": {
"description": "Platform name",
"type": "string",
"enum": ["landsat-8", "sentinel-2"]
},
"instrument": {
"description": "Instrument name",
"type": "string"
},
"gsd": {
"description": "Ground sample distance in meters",
"type": "number"
},
"view:off_nadir": {
"description": "Off-nadir angle in degrees",
"type": "number"
},
"view:sun_azimuth": {
"description": "Sun azimuth angle in degrees",
"type": "number"
},
"view:sun_elevation": {
"description": "Sun elevation angle in degrees",
"type": "number"
},
"sci:doi": {
"description": "Digital Object Identifier",
"type": "string"
},
"created": {
"description": "Date and time the item was created",
"type": "string",
"format": "date-time"
},
"updated": {
"description": "Date and time the item was last updated",
"type": "string",
"format": "date-time"
},
"landcover:classes": {
"description": "Land cover classes",
"type": "array",
"items": {
"type": "string"
}
}
},
"additionalProperties": true
}
115 changes: 115 additions & 0 deletions src/pypgstac/src/pypgstac/pypgstac.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,121 @@ def loadextensions(self) -> None:
except Exception:
pass

def load_queryables(
self,
file: str,
collection_ids: Optional[str] = None,
) -> None:
"""Load queryables from a JSON file.

Args:
file: Path to the JSON file containing queryables definition
collection_ids: Comma-separated list of collection IDs to apply the
queryables
to
"""
import orjson

from pypgstac.load import read_json

# Parse collection_ids if provided
coll_ids_array = None
if collection_ids:
coll_ids_array = [cid.strip() for cid in collection_ids.split(",")]

# Read the queryables JSON file
queryables_data = None
for item in read_json(file):
queryables_data = item
break # We only need the first item

if not queryables_data:
raise ValueError(f"No valid JSON data found in {file}")

# Extract properties from the queryables definition
properties = queryables_data.get("properties", {})
if not properties:
raise ValueError("No properties found in queryables definition")

conn = self._db.connect()
with conn.cursor() as cur:
with conn.transaction():
# Insert each property as a queryable
for name, definition in properties.items():
# Skip core fields that are already indexed
if name in (
"id",
"geometry",
"datetime",
"end_datetime",
"collection",
):
continue

# Determine property wrapper based on type
property_wrapper = "to_text" # default
if definition.get("type") == "number":
property_wrapper = "to_float"
elif definition.get("type") == "integer":
property_wrapper = "to_int"
elif definition.get("format") == "date-time":
property_wrapper = "to_tstz"
elif definition.get("type") == "array":
property_wrapper = "to_text_array"

# Determine index type (default to BTREE)
property_index_type = "BTREE"

# First delete any existing queryable with the same name
if coll_ids_array is None:
# If no collection_ids specified, delete queryables
# with NULL collection_ids
cur.execute(
"""
DELETE FROM queryables
WHERE name = %s AND collection_ids IS NULL
""",
[name],
)
else:
# Delete queryables with matching name and collection_ids
cur.execute(
"""
DELETE FROM queryables
WHERE name = %s AND collection_ids = %s::text[]
""",
[name, coll_ids_array],
)

# Also delete queryables with NULL collection_ids
cur.execute(
"""
DELETE FROM queryables
WHERE name = %s AND collection_ids IS NULL
""",
[name],
)

# Then insert the new queryable
cur.execute(
"""
INSERT INTO queryables
(name, collection_ids, definition, property_wrapper,
property_index_type)
VALUES (%s, %s, %s, %s, %s)
""",
[
name,
coll_ids_array,
orjson.dumps(definition).decode(),
property_wrapper,
property_index_type,
],
)

# Trigger index creation
cur.execute("SELECT maintain_partitions();")


def cli() -> fire.Fire:
"""Wrap fire call for CLI."""
Expand Down
53 changes: 53 additions & 0 deletions src/pypgstac/tests/data-files/queryables/test_queryables.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
{
"$schema": "https://json-schema.org/draft/2019-09/schema",
"$id": "https://example.com/stac/queryables",
"type": "object",
"title": "Test Queryables for PgSTAC",
"description": "Test queryable names for PgSTAC",
"properties": {
"id": {
"description": "Item identifier",
"type": "string"
},
"collection": {
"description": "Collection identifier",
"type": "string"
},
"datetime": {
"description": "Datetime",
"type": "string",
"format": "date-time"
},
"geometry": {
"description": "Geometry",
"type": "object"
},
"test:string_prop": {
"description": "Test string property",
"type": "string"
},
"test:number_prop": {
"description": "Test number property",
"type": "number",
"minimum": 0,
"maximum": 100
},
"test:integer_prop": {
"description": "Test integer property",
"type": "integer"
},
"test:datetime_prop": {
"description": "Test datetime property",
"type": "string",
"format": "date-time"
},
"test:array_prop": {
"description": "Test array property",
"type": "array",
"items": {
"type": "string"
}
}
},
"additionalProperties": true
}
Loading
Loading