Skip to content

Commit

Permalink
feat: timestamps when created TDE-1147 (#956)
Browse files Browse the repository at this point in the history
#### Motivation

Give dataset users more detailed info about the datasets' life cycle.

#### Modification

Set item `created` and `updated` properties to current datetime when
processing

#### Checklist

- [x] Tests updated
- [ ] Docs updated (N/A)
- [x] Issue linked in Title

---------

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
  • Loading branch information
l0b0 and github-actions[bot] authored May 9, 2024
1 parent 759b32d commit 2828f14
Show file tree
Hide file tree
Showing 9 changed files with 109 additions and 13 deletions.
16 changes: 15 additions & 1 deletion scripts/files/fs.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,20 @@
import os
from concurrent.futures import Future, ThreadPoolExecutor, as_completed
from typing import List, Optional
from datetime import datetime
from pathlib import Path
from typing import TYPE_CHECKING, List, Optional

from boto3 import resource
from linz_logger import get_log

from scripts.aws.aws_helper import is_s3
from scripts.files import fs_local, fs_s3

if TYPE_CHECKING:
from mypy_boto3_s3 import S3Client
else:
S3Client = dict


def write(destination: str, source: bytes, content_type: Optional[str] = None) -> str:
"""Write a file from its source to a destination path.
Expand Down Expand Up @@ -79,6 +86,13 @@ def exists(path: str) -> bool:
return fs_local.exists(path)


def modified(path: str, s3_client: Optional[S3Client] = None) -> datetime:
"""Get modified datetime for S3 URL or local path"""
if is_s3(path):
return fs_s3.modified(fs_s3.bucket_name_from_path(path), fs_s3.prefix_from_path(path), s3_client)
return fs_local.modified(Path(path))


def write_all(inputs: List[str], target: str, concurrency: Optional[int] = 4) -> List[str]:
"""Writes list of files to target destination using multithreading.
Expand Down
8 changes: 8 additions & 0 deletions scripts/files/fs_local.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import os
from datetime import datetime, timezone
from pathlib import Path


def write(destination: str, source: bytes) -> None:
Expand Down Expand Up @@ -36,3 +38,9 @@ def exists(path: str) -> bool:
True if the path exists
"""
return os.path.exists(path)


def modified(path: Path) -> datetime:
"""Get path modified datetime as UTC"""
modified_timestamp = os.path.getmtime(path)
return datetime.fromtimestamp(modified_timestamp, tz=timezone.utc)
6 changes: 6 additions & 0 deletions scripts/files/fs_s3.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from concurrent import futures
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime
from typing import TYPE_CHECKING, Any, Generator, List, Optional, Union

from boto3 import client, resource
Expand Down Expand Up @@ -237,3 +238,8 @@ def get_object_parallel_multithreading(
yield key, future.result()
else:
yield key, exception


def modified(bucket_name: str, key: str, s3_client: Optional[S3Client]) -> datetime:
s3_client = s3_client or client("s3")
return _get_object(bucket_name, key, s3_client)["LastModified"]
12 changes: 11 additions & 1 deletion scripts/files/tests/fs_local_test.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import os
from pathlib import Path

import pytest

from scripts.files.fs_local import exists, read, write
from scripts.files.fs_local import exists, modified, read, write
from scripts.tests.datetimes_test import any_epoch_datetime


@pytest.mark.dependency(name="write")
Expand Down Expand Up @@ -43,3 +45,11 @@ def test_exists(setup: str) -> None:
def test_exists_file_not_found() -> None:
found = exists("/tmp/test.file")
assert found is False


def test_should_get_modified_datetime(setup: str) -> None:
path = Path(os.path.join(setup, "modified.file"))
path.touch()
modified_datetime = any_epoch_datetime()
os.utime(path, times=(any_epoch_datetime().timestamp(), modified_datetime.timestamp()))
assert modified(path) == modified_datetime
21 changes: 20 additions & 1 deletion scripts/files/tests/fs_s3_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,17 @@
from boto3 import client, resource
from botocore.exceptions import ClientError
from moto import mock_aws
from moto.core.models import DEFAULT_ACCOUNT_ID
from moto.s3.models import s3_backends
from moto.s3.responses import DEFAULT_REGION_NAME
from moto.wafv2.models import GLOBAL_REGION
from mypy_boto3_s3 import S3Client
from pytest import CaptureFixture, raises
from pytest_subtests import SubTests

from scripts.files.files_helper import ContentType
from scripts.files.fs_s3 import exists, list_files_in_uri, read, write
from scripts.files.fs_s3 import exists, list_files_in_uri, modified, read, write
from scripts.tests.datetimes_test import any_epoch_datetime


@mock_aws
Expand Down Expand Up @@ -156,3 +161,17 @@ def test_list_files_in_uri(subtests: SubTests) -> None:

with subtests.test():
assert "data/image.tiff" not in files


@mock_aws
def test_should_get_modified_datetime() -> None:
bucket_name = "any-bucket-name"
key = "any-key"
modified_datetime = any_epoch_datetime()

s3_client: S3Client = client("s3", region_name=DEFAULT_REGION_NAME)
s3_client.create_bucket(Bucket=bucket_name)
s3_client.put_object(Bucket=bucket_name, Key=key, Body=b"any body")
s3_backends[DEFAULT_ACCOUNT_ID][GLOBAL_REGION].buckets[bucket_name].keys[key].last_modified = modified_datetime

assert modified(bucket_name, key, s3_client) == modified_datetime
32 changes: 30 additions & 2 deletions scripts/files/tests/fs_test.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,20 @@
import os
from pathlib import Path
from shutil import rmtree
from tempfile import mkdtemp

from boto3 import resource
from boto3 import client, resource
from moto import mock_aws
from moto.core.models import DEFAULT_ACCOUNT_ID
from moto.s3.models import s3_backends
from moto.s3.responses import DEFAULT_REGION_NAME
from moto.wafv2.models import GLOBAL_REGION
from mypy_boto3_s3 import S3Client
from pytest import CaptureFixture, raises
from pytest_subtests import SubTests

from scripts.files.fs import NoSuchFileError, read, write, write_all, write_sidecars
from scripts.files.fs import NoSuchFileError, modified, read, write, write_all, write_sidecars
from scripts.tests.datetimes_test import any_epoch_datetime


def test_read_key_not_found_local() -> None:
Expand Down Expand Up @@ -81,3 +87,25 @@ def test_write_sidecars_one_found(capsys: CaptureFixture[str], subtests: SubTest
assert "wrote_sidecar_file" in logs

rmtree(target)


@mock_aws
def test_should_get_s3_object_modified_datetime() -> None:
bucket_name = "any-bucket-name"
key = "any-key"
modified_datetime = any_epoch_datetime()

s3_client: S3Client = client("s3", region_name=DEFAULT_REGION_NAME)
s3_client.create_bucket(Bucket=bucket_name)
s3_client.put_object(Bucket=bucket_name, Key=key, Body=b"any body")
s3_backends[DEFAULT_ACCOUNT_ID][GLOBAL_REGION].buckets[bucket_name].keys[key].last_modified = modified_datetime

assert modified(f"s3://{bucket_name}/{key}", s3_client) == modified_datetime


def test_should_get_local_file_modified_datetime(setup: str) -> None:
path = os.path.join(setup, "modified.file")
Path(path).touch()
modified_datetime = any_epoch_datetime()
os.utime(path, times=(any_epoch_datetime().timestamp(), modified_datetime.timestamp()))
assert modified(path) == modified_datetime
5 changes: 5 additions & 0 deletions scripts/stac/imagery/item.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import os
from typing import Any, Dict, Tuple

from scripts.datetimes import format_rfc_3339_datetime_string
from scripts.files import fs
from scripts.files.fs import modified
from scripts.stac.util import checksum
from scripts.stac.util.STAC_VERSION import STAC_VERSION
from scripts.stac.util.stac_extensions import StacExtensions
Expand All @@ -12,6 +14,7 @@ class ImageryItem:

def __init__(self, id_: str, file: str) -> None:
file_content = fs.read(file)
file_modified_datetime = format_rfc_3339_datetime_string(modified(file))
self.stac = {
"type": "Feature",
"stac_version": STAC_VERSION,
Expand All @@ -24,6 +27,8 @@ def __init__(self, id_: str, file: str) -> None:
"href": os.path.join(".", os.path.basename(file)),
"type": "image/tiff; application=geotiff; profile=cloud-optimized",
"file:checksum": checksum.multihash_as_hex(file_content),
"created": file_modified_datetime,
"updated": file_modified_datetime,
}
},
"stac_extensions": [StacExtensions.file.value],
Expand Down
18 changes: 12 additions & 6 deletions scripts/stac/imagery/tests/collection_test.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
import json
import os
import tempfile
from datetime import datetime
from datetime import datetime, timezone
from shutil import rmtree
from tempfile import mkdtemp
from typing import Generator

import pytest
import shapely.geometry
from pytest_mock import MockerFixture
from pytest_subtests import SubTests

from scripts.files.fs import read
Expand All @@ -17,6 +16,7 @@
from scripts.stac.imagery.metadata_constants import CollectionMetadata
from scripts.stac.imagery.provider import Provider, ProviderRole
from scripts.stac.util.stac_extensions import StacExtensions
from scripts.tests.datetimes_test import any_epoch_datetime


# pylint: disable=duplicate-code
Expand Down Expand Up @@ -113,10 +113,12 @@ def test_interval_updated_from_existing(metadata: CollectionMetadata) -> None:
assert collection.stac["extent"]["temporal"]["interval"] == [["2021-01-27T00:00:00Z", "2021-02-20T00:00:00Z"]]


def test_add_item(mocker: MockerFixture, metadata: CollectionMetadata, subtests: SubTests) -> None:
def test_add_item(metadata: CollectionMetadata, subtests: SubTests) -> None:
collection = ImageryCollection(metadata)
mocker.patch("scripts.files.fs.read", return_value=b"")
item = ImageryItem("BR34_5000_0304", "./test/BR34_5000_0304.tiff")
item_file_path = "./scripts/tests/data/empty.tiff"
modified_datetime = datetime(2001, 2, 3, hour=4, minute=5, second=6, tzinfo=timezone.utc)
os.utime(item_file_path, times=(any_epoch_datetime().timestamp(), modified_datetime.timestamp()))
item = ImageryItem("BR34_5000_0304", item_file_path)
geometry = {
"type": "Polygon",
"coordinates": [[1799667.5, 5815977.0], [1800422.5, 5815977.0], [1800422.5, 5814986.0], [1799667.5, 5814986.0]],
Expand All @@ -131,7 +133,7 @@ def test_add_item(mocker: MockerFixture, metadata: CollectionMetadata, subtests:

with subtests.test():
assert {
"file:checksum": "1220a049888b3971d9ed3fd52b830cfeb379d7069d6b7a927456bcf1fabab0ec4f46",
"file:checksum": "122097b5d2b049c6ffdf608af28c4ba2744fad7f03046d1f58b2523402f30577f618",
"rel": "item",
"href": "./BR34_5000_0304.json",
"type": "application/json",
Expand All @@ -143,6 +145,10 @@ def test_add_item(mocker: MockerFixture, metadata: CollectionMetadata, subtests:
with subtests.test():
assert collection.stac["extent"]["spatial"]["bbox"] == [bbox]

for property_name in ["created", "updated"]:
with subtests.test(msg=f"{property_name} property"):
assert item.stac["assets"]["visual"][property_name] == "2001-02-03T04:05:06Z"


def test_write_collection(metadata: CollectionMetadata) -> None:
target = mkdtemp()
Expand Down
4 changes: 2 additions & 2 deletions scripts/stac/imagery/tests/item_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def test_imagery_stac_item(mocker: MockerFixture, subtests: SubTests) -> None:
bbox = (1799667.5, 5815977.0, 1800422.5, 5814986.0)
mocker.patch("scripts.files.fs.read", return_value=b"")

path = "./test/BR34_5000_0302.tiff"
path = "./scripts/tests/data/empty.tiff"
id_ = get_file_name_from_path(path)
start_datetime = "2021-01-27T00:00:00Z"
end_datetime = "2021-01-27T00:00:00Z"
Expand Down Expand Up @@ -74,7 +74,7 @@ def test_imagery_add_collection(mocker: MockerFixture, subtests: SubTests) -> No
ulid = "fake_ulid"
collection = ImageryCollection(metadata=metadata, collection_id=ulid)

path = "./test/BR34_5000_0302.tiff"
path = "./scripts/tests/data/empty.tiff"
id_ = get_file_name_from_path(path)
mocker.patch("scripts.files.fs.read", return_value=b"")
item = ImageryItem(id_, path)
Expand Down

0 comments on commit 2828f14

Please sign in to comment.