Skip to content

Commit

Permalink
feat: keep asset created date when resupplying
Browse files Browse the repository at this point in the history
  • Loading branch information
l0b0 committed Oct 24, 2024
1 parent b1490d1 commit e00ccb1
Show file tree
Hide file tree
Showing 9 changed files with 228 additions and 90 deletions.
2 changes: 2 additions & 0 deletions scripts/gdal/gdal_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import os
import subprocess
from enum import Enum
from functools import lru_cache
from shutil import rmtree
from tempfile import mkdtemp
from typing import cast
Expand Down Expand Up @@ -53,6 +54,7 @@ def command_to_string(command: list[str]) -> str:
return " ".join(command)


@lru_cache(maxsize=1)
def get_gdal_version() -> str:
"""Return the GDAL version assuming all GDAL commands are in the same version of gdalinfo.
Expand Down
70 changes: 48 additions & 22 deletions scripts/stac/imagery/create_stac.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,22 @@
import json
from os import path
from typing import Any
from typing import Any, cast

from linz_logger import get_log
from shapely.geometry.base import BaseGeometry

from scripts.datetimes import utc_now
from scripts.datetimes import format_rfc_3339_datetime_string, utc_now
from scripts.files.files_helper import get_file_name_from_path
from scripts.files.fs import NoSuchFileError, read
from scripts.files.fs import NoSuchFileError, modified, read
from scripts.files.geotiff import get_extents
from scripts.gdal.gdal_helper import gdal_info
from scripts.gdal.gdalinfo import GdalInfo
from scripts.stac.imagery.collection import ImageryCollection
from scripts.stac.imagery.item import ImageryItem
from scripts.stac.imagery.item import ImageryItem, STACAsset
from scripts.stac.imagery.metadata_constants import CollectionMetadata
from scripts.stac.imagery.provider import Provider, ProviderRole
from scripts.stac.link import Link, Relation
from scripts.stac.util.checksum import multihash_as_hex
from scripts.stac.util.media_type import StacMediaType


Expand Down Expand Up @@ -77,12 +78,15 @@ def create_collection(
return collection


def get_item_created_datetime(existing_item: dict[str, Any], current_datetime: str) -> str:
return cast(str, existing_item.get("properties", {}).get("created", current_datetime))


def create_item(
file: str,
asset_path: str,
start_datetime: str,
end_datetime: str,
collection_id: str,
gdal_version: str,
current_datetime: str,
gdalinfo_result: GdalInfo | None = None,
derived_from: list[str] | None = None,
Expand All @@ -91,11 +95,10 @@ def create_item(
"""Create an ImageryItem (STAC) to be linked to a Collection.
Args:
file: asset tiff file
asset_path: asset tiff file
start_datetime: start date of the survey
end_datetime: end date of the survey
collection_id: collection id to link to the Item
gdal_version: GDAL version
current_datetime: datetime string that represents the current time when the item is created.
gdalinfo_result: result of the gdalinfo command. Defaults to None.
derived_from: list of STAC Items from where this Item is derived. Defaults to None.
Expand All @@ -104,25 +107,48 @@ def create_item(
Returns:
a STAC Item wrapped in ImageryItem
"""
id_ = get_file_name_from_path(file)
item_id = get_file_name_from_path(asset_path)

if not gdalinfo_result:
gdalinfo_result = gdal_info(file)
gdalinfo_result = gdal_info(asset_path)

file_content = read(asset_path)
multihash = multihash_as_hex(file_content)

geometry, bbox = get_extents(gdalinfo_result)
created_datetime = current_datetime
existing_item = {}
if published_path:
# FIXME: make this try/catch nicer
try:
existing_item_content = read(path.join(published_path, f"{id_}.json"))
existing_item = json.loads(existing_item_content.decode("UTF-8"))
created_datetime = existing_item["properties"]["created"]
existing_item = json.loads(read(path.join(published_path, f"{item_id}.json")).decode("UTF-8"))
except NoSuchFileError:
get_log().info(f"No Item is published for ID: {id_}")
except KeyError:
get_log().info(f"Existing Item {id_} does not have 'properties.created' attribute")

item = ImageryItem(id_, file, gdal_version, created_datetime, current_datetime)
get_log().info(f"No Item is published for ID: {item_id}")

file_created_datetime = file_updated_datetime = format_rfc_3339_datetime_string(modified(asset_path))

try:
file_created_datetime = existing_item["assets"]["visual"]["created"]
except KeyError:
get_log().info(f"Existing Item for {item_id} does not have 'assets.visual.created' attribute")

try:
if multihash == existing_item["assets"]["visual"]["file:checksum"]:
file_updated_datetime = existing_item["assets"]["visual"]["updated"]
except KeyError:
get_log().info(f"Existing Item for {item_id} does not have 'assets.visual' attributes")

item = ImageryItem(
item_id,
STACAsset(
**{
"href": asset_path,
"file:checksum": multihash,
"created": file_created_datetime,
"updated": file_updated_datetime,
}
),
get_item_created_datetime(existing_item, current_datetime),
current_datetime,
)

if derived_from is not None:
for derived in derived_from:
Expand All @@ -142,8 +168,8 @@ def create_item(
)

item.update_datetime(start_datetime, end_datetime)
item.update_spatial(geometry, bbox)
item.update_spatial(*get_extents(gdalinfo_result))
item.add_collection(collection_id)

get_log().info("ImageryItem created", path=file)
get_log().info("ImageryItem created", path=asset_path)
return item
30 changes: 14 additions & 16 deletions scripts/stac/imagery/item.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,25 @@
import os
from os import environ
from typing import Any
from typing import Any, TypedDict

from scripts.datetimes import format_rfc_3339_datetime_string
from scripts.files import fs
from scripts.files.fs import modified
from scripts.gdal.gdal_helper import get_gdal_version
from scripts.stac.link import Link, Relation
from scripts.stac.util import checksum
from scripts.stac.util.STAC_VERSION import STAC_VERSION
from scripts.stac.util.media_type import StacMediaType
from scripts.stac.util.stac_extensions import StacExtensions

STACAsset = TypedDict("STACAsset", {"href": str, "file:checksum": str, "created": str, "updated": str})


class ImageryItem:
stac: dict[str, Any]

def __init__(self, id_: str, file: str, gdal_version: str, created_datetime: str, updated_datetime: str) -> None:
file_content = fs.read(file)
file_modified_datetime = format_rfc_3339_datetime_string(modified(file))

def __init__(
self,
id_: str,
stac_asset: STACAsset,
created_datetime: str,
updated_datetime: str,
) -> None:
if (topo_imagery_hash := environ.get("GIT_HASH")) is not None:
commit_url = f"https://github.com/linz/topo-imagery/commit/{topo_imagery_hash}"
else:
Expand All @@ -30,20 +31,17 @@ def __init__(self, id_: str, file: str, gdal_version: str, created_datetime: str
"id": id_,
"links": [Link(path=f"./{id_}.json", rel=Relation.SELF, media_type=StacMediaType.GEOJSON).stac],
"assets": {
"visual": {
"href": os.path.join(".", os.path.basename(file)),
"visual": stac_asset
| {
"type": "image/tiff; application=geotiff; profile=cloud-optimized",
"file:checksum": checksum.multihash_as_hex(file_content),
"created": file_modified_datetime,
"updated": file_modified_datetime,
}
},
"stac_extensions": [StacExtensions.file.value, StacExtensions.processing.value],
"properties": {
"created": created_datetime,
"updated": updated_datetime,
"processing:datetime": updated_datetime,
"processing:software": {"gdal": gdal_version, "linz/topo-imagery": commit_url},
"processing:software": {"gdal": get_gdal_version(), "linz/topo-imagery": commit_url},
"processing:version": environ.get("GIT_VERSION", "GIT_VERSION not specified"),
},
}
Expand Down
35 changes: 25 additions & 10 deletions scripts/stac/imagery/tests/collection_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,10 @@
from scripts.files.fs import read
from scripts.files.fs_s3 import write
from scripts.stac.imagery.collection import ImageryCollection
from scripts.stac.imagery.item import ImageryItem
from scripts.stac.imagery.item import ImageryItem, STACAsset
from scripts.stac.imagery.metadata_constants import CollectionMetadata
from scripts.stac.imagery.provider import Provider, ProviderRole
from scripts.stac.util.checksum import multihash_as_hex
from scripts.stac.util.stac_extensions import StacExtensions
from scripts.tests.datetimes_test import any_epoch_datetime

Expand Down Expand Up @@ -110,15 +111,35 @@ def func() -> datetime:
return func


def any_multihash_as_hex() -> str:
return multihash_as_hex(any_bytes(64))


def any_bytes(byte_count: int) -> bytes:
return os.urandom(byte_count)


def test_add_item(fake_collection_metadata: CollectionMetadata, subtests: SubTests) -> None:
now = any_epoch_datetime()
now_function = fixed_now_function(now)
current_datetime = format_rfc_3339_datetime_string(now)
created_datetime = format_rfc_3339_datetime_string(any_epoch_datetime())
collection = ImageryCollection(fake_collection_metadata, now_function)
item_file_path = "./scripts/tests/data/empty.tiff"
modified_datetime = datetime(2001, 2, 3, hour=4, minute=5, second=6, tzinfo=timezone.utc)
os.utime(item_file_path, times=(any_epoch_datetime().timestamp(), modified_datetime.timestamp()))
item = ImageryItem("BR34_5000_0304", item_file_path, "any GDAL version", current_datetime, current_datetime)
item = ImageryItem(
"BR34_5000_0304",
STACAsset(
**{
"href": item_file_path,
"file:checksum": any_multihash_as_hex(),
"created": format_rfc_3339_datetime_string(any_epoch_datetime()),
"updated": format_rfc_3339_datetime_string(any_epoch_datetime()),
}
),
created_datetime,
format_rfc_3339_datetime_string(any_epoch_datetime()),
)
geometry = {
"type": "Polygon",
"coordinates": [[1799667.5, 5815977.0], [1800422.5, 5815977.0], [1800422.5, 5814986.0], [1799667.5, 5814986.0]],
Expand Down Expand Up @@ -151,13 +172,7 @@ def test_add_item(fake_collection_metadata: CollectionMetadata, subtests: SubTes

for property_name in ["created", "updated"]:
with subtests.test(msg=f"collection {property_name}"):
assert collection.stac[property_name] == current_datetime

with subtests.test(msg=f"item properties.{property_name}"):
assert item.stac["properties"][property_name] == current_datetime

with subtests.test(msg=f"item assets.visual.{property_name}"):
assert item.stac["assets"]["visual"][property_name] == "2001-02-03T04:05:06Z"
assert collection.stac[property_name] == format_rfc_3339_datetime_string(now)


def test_write_collection(fake_collection_metadata: CollectionMetadata) -> None:
Expand Down
Loading

0 comments on commit e00ccb1

Please sign in to comment.