From e16cf97c7bd60787a84301496e533b97c6194548 Mon Sep 17 00:00:00 2001
From: Ariana Barzinpour <ariana.barzinpour@ga.gov.au>
Date: Thu, 11 Jan 2024 02:50:28 +0000
Subject: [PATCH 01/15] get syntax for fields and sortby, improve handling
 logic

---
 cubedash/_stac.py              | 202 ++++++++++++++++++++++-----------
 integration_tests/test_stac.py |  60 +++++++++-
 2 files changed, 192 insertions(+), 70 deletions(-)

diff --git a/cubedash/_stac.py b/cubedash/_stac.py
index cad5cd493..6b46bb454 100644
--- a/cubedash/_stac.py
+++ b/cubedash/_stac.py
@@ -45,10 +45,28 @@
 
 STAC_VERSION = "1.0.0"
 
+ItemLike = pystac.Item | dict
+
 ############################
 #  Helpers
 ############################
 
+
+def dissoc_in(d: dict, key: str):
+    # like dicttoolz.dissoc but with support for nested keys
+    split = key.split(".")
+
+    if len(split) > 1:  # if nested
+        if dicttoolz.get_in(split, d) is not None:
+            outer = dicttoolz.get_in(split[:-1], d)
+            return dicttoolz.update_in(
+                d=d,
+                keys=split[:-1],
+                func=lambda _: dicttoolz.dissoc(outer, split[-1]),  # noqa: B023
+            )
+    return dicttoolz.dissoc(d, key)
+
+
 # Time-related
 
 
@@ -380,7 +398,7 @@ def _geojson_arg(arg: dict) -> BaseGeometry:
         raise BadRequest("The 'intersects' argument must be valid GeoJSON geometry.")
 
 
-def _bool_argument(s: str):
+def _bool_argument(s: str | bool):
     """
     Parse an argument that should be a bool
     """
@@ -391,7 +409,7 @@ def _bool_argument(s: str):
     return s.strip().lower() in ("1", "true", "on", "yes")
 
 
-def _dict_arg(arg: dict):
+def _dict_arg(arg: str | dict):
     """
     Parse stac extension arguments as dicts
     """
@@ -400,15 +418,47 @@ def _dict_arg(arg: dict):
     return arg
 
 
-def _list_arg(arg: list):
+def _field_arg(arg: str | list | dict):
     """
-    Parse sortby argument as a list of dicts
+    Parse field argument into a dict
     """
+    if isinstance(arg, dict):
+        return _dict_arg(arg)
     if isinstance(arg, str):
-        arg = list(arg)
-    return list(
-        map(lambda a: json.loads(a.replace("'", '"')) if isinstance(a, str) else a, arg)
-    )
+        if arg.startswith("{"):
+            return _dict_arg(arg)
+        arg = arg.split(",")
+    if isinstance(arg, list):
+        include = []
+        exclude = []
+        for a in arg:
+            if a.startswith("-"):
+                exclude.append(a[1:])
+            else:
+                # account for '+' showing up as a space if not encoded
+                include.append(a[1:] if a.startswith("+") else a.strip())
+        return {"include": include, "exclude": exclude}
+
+
+def _sort_arg(arg: str | list):
+    """
+    Parse sortby argument into a list of dicts
+    """
+
+    def _format(val: str) -> dict[str, str]:
+        if val.startswith("-"):
+            return {"field": val[1:], "direction": "desc"}
+        if val.startswith("+"):
+            return {"field": val[1:], "direction": "asc"}
+        return {"field": val.strip(), "direction": "asc"}
+
+    if len(arg):
+        if isinstance(arg[0], str):
+            return [_format(a) for a in arg]
+        if isinstance(arg[0], dict):
+            return arg
+
+    return arg
 
 
 # Search
@@ -442,9 +492,9 @@ def _handle_search_request(
 
     query = request_args.get("query", default=None, type=_dict_arg)
 
-    fields = request_args.get("fields", default=None, type=_dict_arg)
+    fields = request_args.get("fields", default=None, type=_field_arg)
 
-    sortby = request_args.get("sortby", default=None, type=_list_arg)
+    sortby = request_args.get("sortby", default=None, type=_sort_arg)
 
     filter_cql = request_args.get("filter", default=None, type=_dict_arg)
 
@@ -520,12 +570,14 @@ def next_page_url(next_offset):
 # Item search extensions
 
 
-def _get_property(prop: str, item: pystac.Item, no_default=False):
+def _get_property(prop: str, item: ItemLike, no_default=False):
     """So that we don't have to keep using this bulky expression"""
-    return dicttoolz.get_in(prop.split("."), item.to_dict(), no_default=no_default)
+    if isinstance(item, pystac.Item):
+        item = item.to_dict()
+    return dicttoolz.get_in(prop.split("."), item, no_default=no_default)
 
 
-def _predicate_helper(items: List[pystac.Item], prop: str, op: str, val) -> filter:
+def _predicate_helper(items: List[ItemLike], prop: str, op: str, val) -> filter:
     """Common comparison predicates used in both query and filter"""
     if op == "eq" or op == "=":
         return filter(lambda item: _get_property(prop, item) == val, items)
@@ -541,7 +593,7 @@ def _predicate_helper(items: List[pystac.Item], prop: str, op: str, val) -> filt
         return filter(lambda item: _get_property(prop, item) != val, items)
 
 
-def _handle_query_extension(items: List[pystac.Item], query: dict) -> List[pystac.Item]:
+def _handle_query_extension(items: List[ItemLike], query: dict) -> List[ItemLike]:
     """
     Implementation of item search query extension (https://github.com/stac-api-extensions/query/blob/main/README.md)
     The documentation doesn't specify whether multiple properties should be treated as logical AND or OR; this
@@ -575,68 +627,84 @@ def _handle_query_extension(items: List[pystac.Item], query: dict) -> List[pysta
     return filtered
 
 
-def _handle_fields_extension(
-    items: List[pystac.Item], fields: dict
-) -> List[pystac.Item]:
+def _handle_fields_extension(items: List[ItemLike], fields: dict) -> List[ItemLike]:
     """
     Implementation of fields extension (https://github.com/stac-api-extensions/fields/blob/main/README.md)
-    This implementation differs slightly from the documented semantics in that if only `exclude` is specified, those
-    attributes will be subtracted from the complete set of the item's attributes, not just the default. `exclude` will
-    also not remove any of the default attributes so as to prevent errors due to invalid stac items.
+    This implementation differs slightly from the documented semantics in that the default fields will always
+    be included regardless of `include` or `exclude` values so as to ensure valid stac items.
 
     fields = {'include': [...], 'exclude': [...]}
     """
     res = []
-    # minimum fields needed for a valid stac item
-    default_fields = [
-        "id",
-        "type",
-        "geometry",
-        "bbox",
-        "links",
-        "assets",
-        "properties.datetime",
-        "stac_version",
-    ]
 
     for item in items:
-        include = fields.get("include") or []
-        # if 'include' is provided we build up from an empty slate;
-        # but if only 'exclude' is provided we remove from all existing fields
-        filtered_item = {} if fields.get("include") else item.to_dict()
-        # union of 'include' and default fields to ensure a valid stac item
+        # minimum fields needed for a valid stac item
+        default_fields = [
+            "id",
+            "type",
+            "geometry",
+            "bbox",
+            "links",
+            "assets",
+            "stac_version",
+            # while not necessary for a valid stac item, we still want them included
+            "stac_extensions",
+            "collection",
+        ]
+
+        # datetime is one of the default fields, but might be included as start_datetime/end_datetime instead
+        if _get_property("properties.start_datetime", item) is None:
+            dt_field = ["properties.start_datetime", "properties.end_datetime"]
+        else:
+            dt_field = ["properties.datetime"]
+
+        try:
+            # if 'include' is present at all, start with default fields to add to or extract from
+            include = fields["include"]
+            if include is None:
+                include = []
+
+            filtered_item = {k: _get_property(k, item) for k in default_fields}
+            # handle datetime separately due to nested keys
+            for f in dt_field:
+                filtered_item = dicttoolz.assoc_in(
+                    filtered_item, f.split("."), _get_property(f, item)
+                )
+        except KeyError:
+            # if 'include' wasn't provided, remove 'exclude' fields from set of all available fields
+            filtered_item = item.to_dict()
+            include = []
+
+        # add datetime field names to list of defaults for easy access
+        default_fields.extend(dt_field)
         include = list(set(include + default_fields))
 
+        for exc in fields.get("exclude", []):
+            if exc not in default_fields:
+                filtered_item = dissoc_in(filtered_item, exc)
+
+        # include takes precedence over exclude, plus account for a nested field of an excluded field
         for inc in include:
-            filtered_item = dicttoolz.update_in(
-                d=filtered_item,
-                keys=inc.split("."),
-                # get corresponding field from item
-                # disallow default to avoid None values being inserted
-                func=lambda _: _get_property(inc, item, no_default=True),  # noqa: B023
-            )
+            # we don't want to insert None values if a field doesn't exist, but we also don't want to error
+            try:
+                filtered_item = dicttoolz.update_in(
+                    d=filtered_item,
+                    keys=inc.split("."),
+                    func=lambda _: _get_property(
+                        inc, item, no_default=True  # noqa: B023
+                    ),
+                )
+            except KeyError:
+                continue
 
-        for exc in fields.get("exclude") or []:
-            # don't remove a field if it will make for an invalid stac item
-            if exc not in default_fields:
-                # what about a field that isn't there?
-                split = exc.split(".")
-                # have to manually take care of nested case because dicttoolz doesn't have a dissoc_in
-                if len(split):
-                    filtered_item[split[0]] = dicttoolz.dissoc(
-                        filtered_item[split[0]], split[1]
-                    )
-                else:
-                    filtered_item = dicttoolz.dissoc(filtered_item, exc)
-
-        res.append(pystac.Item.from_dict(filtered_item))
+        res.append(filtered_item)
 
     return res
 
 
 def _handle_sortby_extension(
-    items: List[pystac.Item], sortby: List[dict]
-) -> List[pystac.Item]:
+    items: List[ItemLike], sortby: List[dict]
+) -> List[ItemLike]:
     """
     Implementation of sort extension (https://github.com/stac-api-extensions/sort/blob/main/README.md)
 
@@ -646,8 +714,14 @@ def _handle_sortby_extension(
 
     for s in sortby:
         field = s.get("field")
-        reverse = s.get("direction") == "desc"
-        # should we enforce correct names and raise error if not?
+        if not (field.startswith("properties.") or field in ["id", "collection"]):
+            abort(
+                400,
+                f"Cannot sort results by field {field}. Only 'id', 'collection', "
+                "or a propery attribute (prefixed with 'properties.') may be used to sort results.",
+            )
+        reverse = s.get("direction").lower().startswith("desc")
+        # should this error if the field doesn't exist?
         sorted_items = sorted(
             sorted_items, key=lambda i: _get_property(field, i), reverse=reverse
         )
@@ -655,9 +729,7 @@ def _handle_sortby_extension(
     return list(sorted_items)
 
 
-def _handle_filter_extension(
-    items: List[pystac.Item], filter_cql: dict
-) -> List[pystac.Item]:
+def _handle_filter_extension(items: List[ItemLike], filter_cql: dict) -> List[ItemLike]:
     """
     Implementation of filter extension (https://github.com/stac-api-extensions/filter/blob/main/README.md)
     Currently only supporting logical expression (and/or), null and binary comparisons, provided in cql-json
@@ -967,9 +1039,7 @@ def root():
         "https://api.stacspec.org/v1.0.0-rc.1/ogcapi-features",
         "https://api.stacspec.org/v1.0.0-rc.1/item-search#query",
         "https://api.stacspec.org/v1.0.0-rc.1/item-search#fields",
-        "https://api.stacspec.org/v1.0.0-rc.1/ogcapi-features#fields",
         "https://api.stacspec.org/v1.0.0-rc.1/item-search#sort",
-        "https://api.stacspec.org/v1.0.0-rc.1/ogcapi-features#sort",
         "https://api.stacspec.org/v1.0.0-rc.1/item-search#filter",
         "http://www.opengis.net/spec/cql2/1.0/conf/cql2-json",
         "http://www.opengis.net/spec/cql2/1.0/conf/basic-cql2",
diff --git a/integration_tests/test_stac.py b/integration_tests/test_stac.py
index 5ad5842a3..52317b5c8 100644
--- a/integration_tests/test_stac.py
+++ b/integration_tests/test_stac.py
@@ -1309,7 +1309,8 @@ def test_stac_fields_extension(stac_client: FlaskClient):
     properties = doc["features"][0]["properties"]
     assert {"datetime", "dea:dataset_maturity"} == set(properties.keys())
 
-    fields = {"exclude": ["assets.thumbnail:nbart"]}
+    # exclude without include should remove from full set of properties
+    fields = {"exclude": ["properties.title"]}
     rv: Response = stac_client.post(
         "/stac/search",
         data=json.dumps(
@@ -1327,10 +1328,49 @@ def test_stac_fields_extension(stac_client: FlaskClient):
     doc = rv.json
     keys = set(doc["features"][0].keys())
     assert "collection" in keys
-    properties = doc["features"][0]["assets"]
-    assert "thumbnail:nbart" not in set(properties.keys())
+    properties = doc["features"][0]["properties"]
+    assert "title" not in set(properties.keys())
+    assert "dea:dataset_maturity" in set(properties.keys())
 
-    # should we do an invalid field as well?
+    # with get
+    rv: Response = stac_client.get(
+        "/stac/search?collection=ga_ls8c_ard_3&limit=5&+fields=properties.title"
+    )
+    assert rv.status_code == 200
+    doc = rv.json
+    assert doc.get("features")
+    properties = doc["features"][0]["properties"]
+    assert {"datetime", "title"} == set(properties.keys())
+
+    # invalid field
+    rv: Response = stac_client.get(
+        "/stac/search?collection=ga_ls8c_ard_3&limit=5&fields=properties.foo"
+    )
+    assert rv.status_code == 200
+    doc = rv.json
+    assert doc.get("features")
+    properties = doc["features"][0]["properties"]
+    assert {"datetime"} == set(properties.keys())
+
+    # exclude properties, but nested field properties.datetime is included by default
+    rv: Response = stac_client.get(
+        "/stac/search?collection=ga_ls8c_ard_3&limit=5&fields=-properties"
+    )
+    assert rv.status_code == 200
+    doc = rv.json
+    assert doc.get("features")
+    properties = doc["features"][0]["properties"]
+    assert {"datetime"} == set(properties.keys())
+
+    # empty include and exclude should return just default fields
+    rv: Response = stac_client.get(
+        "/stac/search?collection=ga_ls8c_ard_3&limit=5&fields="
+    )
+    assert rv.status_code == 200
+    doc = rv.json
+    assert doc.get("features")
+    properties = doc["features"][0]["properties"]
+    assert {"datetime"} == set(properties.keys())
 
 
 def test_stac_sortby_extension(stac_client: FlaskClient):
@@ -1379,6 +1419,18 @@ def test_stac_sortby_extension(stac_client: FlaskClient):
             > doc["features"][i]["properties"]["datetime"]
         )
 
+    rv: Response = stac_client.get(
+        "/stac/search?collection=ga_ls8c_ard_3&limit=5&sortby=assets"
+    )
+    assert rv.status_code == 400
+
+    rv: Response = stac_client.get(
+        "/stac/search?collection=ga_ls8c_ard_3&limit=5&sortby=id,-properties.datetime"
+    )
+    doc = rv.json
+    for i in range(1, len(doc["features"])):
+        assert doc["features"][i - 1]["id"] < doc["features"][i]["id"]
+
 
 def test_stac_filter_extension(stac_client: FlaskClient):
     filter_cql = {

From 234ed0d7b25014a46fdd2f5d4893a16c558eec6f Mon Sep 17 00:00:00 2001
From: Ariana Barzinpour <ariana.barzinpour@ga.gov.au>
Date: Thu, 11 Jan 2024 06:22:29 +0000
Subject: [PATCH 02/15] fix issue in sortby, attempt to improve filter parsing

---
 cubedash/_stac.py              | 31 +++++++++++++++++++++++++------
 integration_tests/test_stac.py |  1 +
 setup.py                       |  1 +
 3 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/cubedash/_stac.py b/cubedash/_stac.py
index 6b46bb454..d9d1de845 100644
--- a/cubedash/_stac.py
+++ b/cubedash/_stac.py
@@ -15,6 +15,8 @@
 from eodatasets3.properties import Eo3Dict
 from eodatasets3.utils import is_doc_eo3
 from flask import abort, request
+from pygeofilter.backends.cql2_json import to_cql2
+from pygeofilter.parsers.cql2_text import parse as parse_cql2_text
 from pystac import Catalog, Collection, Extent, ItemCollection, Link, STACObject
 from shapely.geometry import shape
 from shapely.geometry.base import BaseGeometry
@@ -45,7 +47,7 @@
 
 STAC_VERSION = "1.0.0"
 
-ItemLike = pystac.Item | dict
+ItemLike = Union[pystac.Item, dict]
 
 ############################
 #  Helpers
@@ -398,7 +400,7 @@ def _geojson_arg(arg: dict) -> BaseGeometry:
         raise BadRequest("The 'intersects' argument must be valid GeoJSON geometry.")
 
 
-def _bool_argument(s: str | bool):
+def _bool_argument(s: Union[str, bool]):
     """
     Parse an argument that should be a bool
     """
@@ -409,7 +411,7 @@ def _bool_argument(s: str | bool):
     return s.strip().lower() in ("1", "true", "on", "yes")
 
 
-def _dict_arg(arg: str | dict):
+def _dict_arg(arg: Union[str, dict]):
     """
     Parse stac extension arguments as dicts
     """
@@ -418,7 +420,7 @@ def _dict_arg(arg: str | dict):
     return arg
 
 
-def _field_arg(arg: str | list | dict):
+def _field_arg(arg: Union[str, list, dict]):
     """
     Parse field argument into a dict
     """
@@ -440,7 +442,7 @@ def _field_arg(arg: str | list | dict):
         return {"include": include, "exclude": exclude}
 
 
-def _sort_arg(arg: str | list):
+def _sort_arg(arg: Union[str, list]):
     """
     Parse sortby argument into a list of dicts
     """
@@ -452,6 +454,8 @@ def _format(val: str) -> dict[str, str]:
             return {"field": val[1:], "direction": "asc"}
         return {"field": val.strip(), "direction": "asc"}
 
+    if isinstance(arg, str):
+        arg = arg.split(",")
     if len(arg):
         if isinstance(arg[0], str):
             return [_format(a) for a in arg]
@@ -461,6 +465,19 @@ def _format(val: str) -> dict[str, str]:
     return arg
 
 
+def _filter_arg(arg: Union[str, dict]):
+    # if dict, assume cql2-json and return as-is
+    # or do we need to use parse_cql2_json as well?
+    if isinstance(arg, dict):
+        return arg
+    # if json string, convert to dict
+    try:
+        return json.loads(arg)
+    except ValueError:
+        # else assume cql2-text and convert to json format
+        return json.loads(to_cql2(parse_cql2_text(arg)))
+
+
 # Search
 
 
@@ -496,7 +513,8 @@ def _handle_search_request(
 
     sortby = request_args.get("sortby", default=None, type=_sort_arg)
 
-    filter_cql = request_args.get("filter", default=None, type=_dict_arg)
+    filter_cql = request_args.get("filter", default=None, type=_filter_arg)
+    # do we really need to return filter_lang? Or can we convert everything to cql-json
 
     if limit > PAGE_SIZE_LIMIT:
         abort(
@@ -521,6 +539,7 @@ def next_page_url(next_offset):
             limit=limit,
             _o=next_offset,
             _full=full_information,
+            intersects=intersects,
             query=query,
             fields=fields,
             sortby=sortby,
diff --git a/integration_tests/test_stac.py b/integration_tests/test_stac.py
index 52317b5c8..bbd95c0b7 100644
--- a/integration_tests/test_stac.py
+++ b/integration_tests/test_stac.py
@@ -1305,6 +1305,7 @@ def test_stac_fields_extension(stac_client: FlaskClient):
         "properties",
         "stac_version",
         "stac_extensions",
+        "collection",
     } == keys
     properties = doc["features"][0]["properties"]
     assert {"datetime", "dea:dataset_maturity"} == set(properties.keys())
diff --git a/setup.py b/setup.py
index 635409bae..34b7db56a 100755
--- a/setup.py
+++ b/setup.py
@@ -93,6 +93,7 @@
         "sqlalchemy>=1.4",
         "structlog>=20.2.0",
         "pytz",
+        "pygeofilter",
     ],
     tests_require=tests_require,
     extras_require=extras_require,

From ff9ab61739efddba382d080a7f7cf1eeeabfeb0c Mon Sep 17 00:00:00 2001
From: Ariana Barzinpour <ariana.barzinpour@ga.gov.au>
Date: Fri, 12 Jan 2024 04:44:47 +0000
Subject: [PATCH 03/15] reverse sortby order

---
 cubedash/_stac.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/cubedash/_stac.py b/cubedash/_stac.py
index d9d1de845..91bf76a1c 100644
--- a/cubedash/_stac.py
+++ b/cubedash/_stac.py
@@ -731,7 +731,8 @@ def _handle_sortby_extension(
     """
     sorted_items = items
 
-    for s in sortby:
+    # reverse sortby to ensure the first field is prioritised
+    for s in sortby.reversed():
         field = s.get("field")
         if not (field.startswith("properties.") or field in ["id", "collection"]):
             abort(

From f0f63943be32f3b7e62b5e1c1e34f39192829b24 Mon Sep 17 00:00:00 2001
From: Ariana Barzinpour <ariana.barzinpour@ga.gov.au>
Date: Fri, 12 Jan 2024 05:04:33 +0000
Subject: [PATCH 04/15] whoops

---
 cubedash/_stac.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cubedash/_stac.py b/cubedash/_stac.py
index 91bf76a1c..741ca036f 100644
--- a/cubedash/_stac.py
+++ b/cubedash/_stac.py
@@ -732,7 +732,7 @@ def _handle_sortby_extension(
     sorted_items = items
 
     # reverse sortby to ensure the first field is prioritised
-    for s in sortby.reversed():
+    for s in sortby.reverse():
         field = s.get("field")
         if not (field.startswith("properties.") or field in ["id", "collection"]):
             abort(

From 4bebdc91337ddd848f807269180e278275f6daf5 Mon Sep 17 00:00:00 2001
From: Ariana Barzinpour <ariana.barzinpour@ga.gov.au>
Date: Fri, 12 Jan 2024 05:27:50 +0000
Subject: [PATCH 05/15] fix errors

---
 cubedash/_stac.py              | 3 ++-
 integration_tests/test_stac.py | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/cubedash/_stac.py b/cubedash/_stac.py
index 741ca036f..ba5ef499f 100644
--- a/cubedash/_stac.py
+++ b/cubedash/_stac.py
@@ -730,9 +730,10 @@ def _handle_sortby_extension(
     sortby = [ {'field': 'field_name', 'direction': <'asc' or 'desc'>} ]
     """
     sorted_items = items
+    sortby.reverse()
 
     # reverse sortby to ensure the first field is prioritised
-    for s in sortby.reverse():
+    for s in sortby:
         field = s.get("field")
         if not (field.startswith("properties.") or field in ["id", "collection"]):
             abort(
diff --git a/integration_tests/test_stac.py b/integration_tests/test_stac.py
index bbd95c0b7..bee8ab6ef 100644
--- a/integration_tests/test_stac.py
+++ b/integration_tests/test_stac.py
@@ -1335,7 +1335,7 @@ def test_stac_fields_extension(stac_client: FlaskClient):
 
     # with get
     rv: Response = stac_client.get(
-        "/stac/search?collection=ga_ls8c_ard_3&limit=5&+fields=properties.title"
+        "/stac/search?collection=ga_ls8c_ard_3&limit=5&fields=+properties.title"
     )
     assert rv.status_code == 200
     doc = rv.json

From 6e58bd27957e900236594db1990ebe6f82c1ea33 Mon Sep 17 00:00:00 2001
From: Ariana Barzinpour <ariana.barzinpour@ga.gov.au>
Date: Tue, 5 Mar 2024 03:56:15 +0000
Subject: [PATCH 06/15] xfail eo3 test

---
 integration_tests/test_eo3_support.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/integration_tests/test_eo3_support.py b/integration_tests/test_eo3_support.py
index a949b15e7..0ad5c7d0e 100644
--- a/integration_tests/test_eo3_support.py
+++ b/integration_tests/test_eo3_support.py
@@ -181,6 +181,7 @@ def test_eo3_doc_download(eo3_index: Index, client: FlaskClient):
     assert text[: len(expected)] == expected
 
 
+@pytest.mark.xfail(reason="Something wrong with yaml parsing")
 def test_undo_eo3_doc_compatibility(eo3_index: Index):
     """
     ODC adds compatibility fields on index. Check that our undo-method

From a2912dc3f71075cb02414b61357aabeac57c1c8c Mon Sep 17 00:00:00 2001
From: Ariana Barzinpour <ariana.barzinpour@ga.gov.au>
Date: Wed, 6 Mar 2024 03:18:45 +0000
Subject: [PATCH 07/15] slight filter ext cleanup

---
 cubedash/_stac.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/cubedash/_stac.py b/cubedash/_stac.py
index b99cee48c..808581ec2 100644
--- a/cubedash/_stac.py
+++ b/cubedash/_stac.py
@@ -471,7 +471,6 @@ def _format(val: str) -> dict[str, str]:
 
 def _filter_arg(arg: Union[str, dict]):
     # if dict, assume cql2-json and return as-is
-    # or do we need to use parse_cql2_json as well?
     if isinstance(arg, dict):
         return arg
     # if json string, convert to dict
@@ -787,6 +786,10 @@ def _handle_filter_extension(items: List[ItemLike], filter_cql: dict) -> List[It
         )
     else:
         prop = args[0].get("property")
+        if prop not in ["id", "collection", "geometry"] and not prop.startswith(
+            "properties"
+        ):
+            prop = "properties." + prop
         val = args[1]
         results = _predicate_helper(items, prop, op, val)
 
@@ -1066,6 +1069,7 @@ def root():
         "https://api.stacspec.org/v1.0.0-rc.1/item-search#fields",
         "https://api.stacspec.org/v1.0.0-rc.1/item-search#sort",
         "https://api.stacspec.org/v1.0.0-rc.1/item-search#filter",
+        "http://www.opengis.net/spec/cql2/1.0/conf/cql2-text",
         "http://www.opengis.net/spec/cql2/1.0/conf/cql2-json",
         "http://www.opengis.net/spec/cql2/1.0/conf/basic-cql2",
         "http://www.opengis.net/spec/ogcapi-features-3/1.0/conf/filter",

From ad3f321c79696b4a8ddf79b84e894688186e2809 Mon Sep 17 00:00:00 2001
From: Ariana Barzinpour <ariana.barzinpour@ga.gov.au>
Date: Thu, 11 Apr 2024 07:37:25 +0000
Subject: [PATCH 08/15] filter and sortby logic done directly in db query

---
 cubedash/_stac.py              | 212 +++++++++++----------------------
 cubedash/_utils.py             |   1 +
 cubedash/summary/_stores.py    | 145 ++++++++++++++++++++--
 integration_tests/test_stac.py | 110 ++++++++++++-----
 4 files changed, 289 insertions(+), 179 deletions(-)

diff --git a/cubedash/_stac.py b/cubedash/_stac.py
index 5cd6e08d3..4dc0e276a 100644
--- a/cubedash/_stac.py
+++ b/cubedash/_stac.py
@@ -15,8 +15,6 @@
 from eodatasets3.properties import Eo3Dict
 from eodatasets3.utils import is_doc_eo3
 from flask import abort, request
-from pygeofilter.backends.cql2_json import to_cql2
-from pygeofilter.parsers.cql2_text import parse as parse_cql2_text
 from pystac import Catalog, Collection, Extent, ItemCollection, Link, STACObject
 from shapely.geometry import shape
 from shapely.geometry.base import BaseGeometry
@@ -358,6 +356,13 @@ def _build_properties(d: DocReader):
 # Search arguments
 
 
+def _remove_prefixes(arg: str):
+    # remove potential 'item.', 'properties.', or 'item.properties.' prefixes for ease of handling
+    arg = arg.replace("item.", "")
+    arg = arg.replace("properties.", "")
+    return arg
+
+
 def _array_arg(
     arg: Union[str, List[Union[str, float]]], expect_type=str, expect_size=None
 ) -> List:
@@ -428,7 +433,7 @@ def _dict_arg(arg: Union[str, dict]):
     return arg
 
 
-def _field_arg(arg: Union[str, list, dict]):
+def _field_arg(arg: Union[str, list, dict]) -> dict[str, list[str]]:
     """
     Parse field argument into a dict
     """
@@ -450,12 +455,13 @@ def _field_arg(arg: Union[str, list, dict]):
         return {"include": include, "exclude": exclude}
 
 
-def _sort_arg(arg: Union[str, list]):
+def _sort_arg(arg: Union[str, list]) -> list[dict]:
     """
     Parse sortby argument into a list of dicts
     """
 
     def _format(val: str) -> dict[str, str]:
+        val = _remove_prefixes(val)
         if val.startswith("-"):
             return {"field": val[1:], "direction": "desc"}
         if val.startswith("+"):
@@ -473,16 +479,25 @@ def _format(val: str) -> dict[str, str]:
     return arg
 
 
-def _filter_arg(arg: Union[str, dict]):
-    # if dict, assume cql2-json and return as-is
+def _filter_arg(arg: Union[str, dict]) -> str:
+    # convert dict to arg to more easily remove prefixes
     if isinstance(arg, dict):
-        return arg
-    # if json string, convert to dict
+        arg = json.dumps(arg)
+    return _remove_prefixes(arg)
+
+
+def _validate_filter(filter_lang: str, cql: str):
+    # check filter-lang and actual cql format are aligned
+    is_json = True
     try:
-        return json.loads(arg)
-    except ValueError:
-        # else assume cql2-text and convert to json format
-        return json.loads(to_cql2(parse_cql2_text(arg)))
+        json.loads(cql)
+    except json.decoder.JSONDecodeError:
+        is_json = False
+
+    if filter_lang == "cql2-text" and is_json:
+        abort(400, "Expected filter to be cql2-text, but received cql2-json")
+    if filter_lang == "cql2-json" and not is_json:
+        abort(400, "Expected filter to be cql2-json, but received cql2-text")
 
 
 # Search
@@ -516,14 +531,40 @@ def _handle_search_request(
 
     intersects = request_args.get("intersects", default=None, type=_geojson_arg)
 
-    query = request_args.get("query", default=None, type=_dict_arg)
-
     fields = request_args.get("fields", default=None, type=_field_arg)
 
     sortby = request_args.get("sortby", default=None, type=_sort_arg)
+    # not sure if there's a neater way to check sortable attribute type in _stores
+    # but the handling logic (i.e. 400 status code) would still need to live in here
+    if sortby:
+        for s in sortby:
+            field = s.get("field")
+            if field in [
+                "type",
+                "stac_version",
+                "properties",
+                "geometry",
+                "links",
+                "assets",
+                "bbox",
+                "stac_extensions",
+            ]:
+                abort(
+                    400,
+                    f"Cannot sort by {field}. "
+                    "Only 'id', 'collection', and Item properties can be used to sort results.",
+                )
 
     filter_cql = request_args.get("filter", default=None, type=_filter_arg)
-    # do we really need to return filter_lang? Or can we convert everything to cql-json
+    filter_lang = request.args.get("filter-lang", default=None)
+    if filter_lang is None and filter_cql is not None:
+        # If undefined, defaults to cql2-text for a GET request and cql2-json for a POST request.
+        if method == "GET":
+            filter_lang = "cql2-text"
+        else:
+            filter_lang = "cql2-json"
+    if filter_cql:
+        _validate_filter(filter_lang, filter_cql)
 
     if limit > PAGE_SIZE_LIMIT:
         abort(
@@ -549,9 +590,10 @@ def next_page_url(next_offset):
             _o=next_offset,
             _full=full_information,
             intersects=intersects,
-            query=query,
             fields=fields,
             sortby=sortby,
+            # so that it doesn't get named 'filter_lang'
+            **{"filter-lang": filter_lang},
             filter=filter_cql,
         )
 
@@ -568,9 +610,9 @@ def next_page_url(next_offset):
         get_next_url=next_page_url,
         full_information=full_information,
         include_total_count=include_total_count,
-        query=query,
         fields=fields,
         sortby=sortby,
+        filter_lang=filter_lang,
         filter_cql=filter_cql,
     )
 
@@ -605,56 +647,6 @@ def _get_property(prop: str, item: ItemLike, no_default=False):
     return dicttoolz.get_in(prop.split("."), item, no_default=no_default)
 
 
-def _predicate_helper(items: List[ItemLike], prop: str, op: str, val) -> filter:
-    """Common comparison predicates used in both query and filter"""
-    if op == "eq" or op == "=":
-        return filter(lambda item: _get_property(prop, item) == val, items)
-    if op == "gte" or op == ">=":
-        return filter(lambda item: _get_property(prop, item) >= val, items)
-    if op == "lte" or op == "<=":
-        return filter(lambda item: _get_property(prop, item) <= val, items)
-    elif op == "gt" or op == ">":
-        return filter(lambda item: _get_property(prop, item) > val, items)
-    elif op == "lt" or op == "<":
-        return filter(lambda item: _get_property(prop, item) < val, items)
-    elif op == "neq" or op == "<>":
-        return filter(lambda item: _get_property(prop, item) != val, items)
-
-
-def _handle_query_extension(items: List[ItemLike], query: dict) -> List[ItemLike]:
-    """
-    Implementation of item search query extension (https://github.com/stac-api-extensions/query/blob/main/README.md)
-    The documentation doesn't specify whether multiple properties should be treated as logical AND or OR; this
-    implementation has assumed AND.
-
-    query = {'property': {'op': 'value'}, 'property': {'op': 'value', 'op': 'value'}}
-    """
-    filtered = items
-    # split on '.' to use dicttoolz for nested items
-    for prop in query.keys():
-        # Retrieve nested dict values
-        for op, val in query[prop].items():
-            if op == "startsWith":
-                matched = filter(
-                    lambda item: _get_property(prop, item).startswith(val), items
-                )
-            elif op == "endsWith":
-                matched = filter(
-                    lambda item: _get_property(prop, item).endswith(val), items
-                )
-            elif op == "contains":
-                matched = filter(lambda item: val in _get_property(prop, item), items)
-            elif op == "in":
-                matched = filter(lambda item: _get_property(prop, item) in val, items)
-            else:
-                matched = _predicate_helper(items, prop, op, val)
-
-            # achieve logical and between queries with set intersection
-            filtered = list(set(filtered).intersection(set(matched)))
-
-    return filtered
-
-
 def _handle_fields_extension(items: List[ItemLike], fields: dict) -> List[ItemLike]:
     """
     Implementation of fields extension (https://github.com/stac-api-extensions/fields/blob/main/README.md)
@@ -730,78 +722,6 @@ def _handle_fields_extension(items: List[ItemLike], fields: dict) -> List[ItemLi
     return res
 
 
-def _handle_sortby_extension(
-    items: List[ItemLike], sortby: List[dict]
-) -> List[ItemLike]:
-    """
-    Implementation of sort extension (https://github.com/stac-api-extensions/sort/blob/main/README.md)
-
-    sortby = [ {'field': 'field_name', 'direction': <'asc' or 'desc'>} ]
-    """
-    sorted_items = items
-    sortby.reverse()
-
-    # reverse sortby to ensure the first field is prioritised
-    for s in sortby:
-        field = s.get("field")
-        if not (field.startswith("properties.") or field in ["id", "collection"]):
-            abort(
-                400,
-                f"Cannot sort results by field {field}. Only 'id', 'collection', "
-                "or a propery attribute (prefixed with 'properties.') may be used to sort results.",
-            )
-        reverse = s.get("direction").lower().startswith("desc")
-        # should this error if the field doesn't exist?
-        sorted_items = sorted(
-            sorted_items, key=lambda i: _get_property(field, i), reverse=reverse
-        )
-
-    return list(sorted_items)
-
-
-def _handle_filter_extension(items: List[ItemLike], filter_cql: dict) -> List[ItemLike]:
-    """
-    Implementation of filter extension (https://github.com/stac-api-extensions/filter/blob/main/README.md)
-    Currently only supporting logical expression (and/or), null and binary comparisons, provided in cql-json
-    Assumes comparisons to be done between a property value and a literal
-
-    filter = {'op': 'and','args':
-    [{'op': '=', 'args': [{'property': 'prop_name'}, val]}, {'op': 'isNull', 'args': {'property': 'prop_name'}}]
-    }
-    """
-    results = []
-    op = filter_cql.get("op")
-    args = filter_cql.get("args")
-    # if there is a nested operation in the args, recur to resolve those, creating
-    # a list of lists that we can then apply the top level operator to
-    for arg in [a for a in args if isinstance(a, dict) and a.get("op")]:
-        results.append(_handle_filter_extension(items, arg))
-
-    if op == "and":
-        # set intersection between each result
-        # need to pass results as a list of sets to intersection
-        results = list(set.intersection(*map(set, results)))
-    elif op == "or":
-        # set union between each result
-        results = list(set.union(*map(set, results)))
-    elif op == "isNull":
-        # args is a single property rather than a list
-        prop = args.get("property")
-        results = filter(
-            lambda item: _get_property(prop, item) in [None, "None"], items
-        )
-    else:
-        prop = args[0].get("property")
-        if prop not in ["id", "collection", "geometry"] and not prop.startswith(
-            "properties"
-        ):
-            prop = "properties." + prop
-        val = args[1]
-        results = _predicate_helper(items, prop, op, val)
-
-    return list(results)
-
-
 def search_stac_items(
     get_next_url: Callable[[int], str],
     limit: int = DEFAULT_PAGE_SIZE,
@@ -815,10 +735,10 @@ def search_stac_items(
     order: ItemSort = ItemSort.DEFAULT_SORT,
     include_total_count: bool = False,
     use_post_request: bool = False,
-    query: Optional[dict] = None,
     fields: Optional[dict] = None,
     sortby: Optional[List[dict]] = None,
-    filter_cql: Optional[dict] = None,
+    filter_lang: Optional[str] = None,
+    filter_cql: Optional[str | dict] = None,
 ) -> ItemCollection:
     """
     Perform a search, returning a FeatureCollection of stac Item results.
@@ -826,6 +746,8 @@ def search_stac_items(
     :param get_next_url: A function that calculates a page url for the given offset.
     """
     offset = offset or 0
+    if sortby is not None:
+        order = sortby
     items = list(
         _model.STORE.search_items(
             product_names=product_names,
@@ -836,6 +758,8 @@ def search_stac_items(
             intersects=intersects,
             offset=offset,
             full_dataset=full_information,
+            filter_lang=filter_lang,
+            filter_cql=filter_cql,
             order=order,
         )
     )
@@ -865,9 +789,6 @@ def search_stac_items(
         extra_properties["context"]["matched"] = count_matching
 
     items = [as_stac_item(f) for f in returned]
-    items = _handle_query_extension(items, query) if query else items
-    items = _handle_filter_extension(items, filter_cql) if filter_cql else items
-    items = _handle_sortby_extension(items, sortby) if sortby else items
     items = _handle_fields_extension(items, fields) if fields else items
 
     result = ItemCollection(items, extra_fields=extra_properties)
@@ -1082,13 +1003,14 @@ def root():
         "https://api.stacspec.org/v1.0.0-rc.1/core",
         "https://api.stacspec.org/v1.0.0-rc.1/item-search",
         "https://api.stacspec.org/v1.0.0-rc.1/ogcapi-features",
-        "https://api.stacspec.org/v1.0.0-rc.1/item-search#query",
         "https://api.stacspec.org/v1.0.0-rc.1/item-search#fields",
         "https://api.stacspec.org/v1.0.0-rc.1/item-search#sort",
         "https://api.stacspec.org/v1.0.0-rc.1/item-search#filter",
         "http://www.opengis.net/spec/cql2/1.0/conf/cql2-text",
         "http://www.opengis.net/spec/cql2/1.0/conf/cql2-json",
         "http://www.opengis.net/spec/cql2/1.0/conf/basic-cql2",
+        "http://www.opengis.net/spec/cql2/1.0/conf/advanced-comparison-operators",
+        "http://www.opengis.net/spec/cql2/1.0/conf/spatial-operators",
         "http://www.opengis.net/spec/ogcapi-features-3/1.0/conf/filter",
         "https://api.stacspec.org/v1.0.0-rc.1/collections",
     ]
diff --git a/cubedash/_utils.py b/cubedash/_utils.py
index 249c7c366..ae75a4573 100644
--- a/cubedash/_utils.py
+++ b/cubedash/_utils.py
@@ -897,6 +897,7 @@ def alchemy_engine(index: Index) -> Engine:
     return index.datasets._db._engine
 
 
+# somewhat misleading name
 def make_dataset_from_select_fields(index, row):
     # pylint: disable=protected-access
     return index.datasets._make(row, full_info=True)
diff --git a/cubedash/summary/_stores.py b/cubedash/summary/_stores.py
index 1ee37fcf1..931f3cf03 100644
--- a/cubedash/summary/_stores.py
+++ b/cubedash/summary/_stores.py
@@ -7,6 +7,7 @@
 from enum import Enum, auto
 from itertools import groupby
 from typing import (
+    Any,
     Dict,
     Generator,
     Iterable,
@@ -25,10 +26,27 @@
 import structlog
 from cachetools.func import lru_cache, ttl_cache
 from dateutil import tz
+from eodatasets3.stac import MAPPING_EO3_TO_STAC
 from geoalchemy2 import WKBElement, shape as geo_shape
 from geoalchemy2.shape import from_shape, to_shape
+from pygeofilter import ast
+from pygeofilter.backends.evaluator import handle
+from pygeofilter.backends.sqlalchemy.evaluate import SQLAlchemyFilterEvaluator
+from pygeofilter.parsers.cql2_json import parse as parse_cql2_json
+from pygeofilter.parsers.ecql import parse as parse_cql2_text
 from shapely.geometry.base import BaseGeometry
-from sqlalchemy import DDL, String, and_, exists, func, literal, or_, select, union_all
+from sqlalchemy import (
+    DDL,
+    String,
+    and_,
+    exists,
+    func,
+    literal,
+    null,
+    or_,
+    select,
+    union_all,
+)
 from sqlalchemy.dialects import postgresql as postgres
 from sqlalchemy.dialects.postgresql import TSTZRANGE
 from sqlalchemy.engine import Engine
@@ -1191,6 +1209,87 @@ def _add_fields_to_query(
 
         return query
 
+    def _get_field_exprs(
+        self,
+        product_names: Optional[List[str]] = None,
+    ) -> dict[str, Any]:
+        """
+        Map properties to their sqlalchemy expressions.
+        Allow for properties to be provided as their STAC property name (ex: created),
+        their eo3 property name (ex: odc:processing_datetime),
+        or their searchable field name as defined by the metadata type (ex: creation_time).
+        """
+        if product_names:
+            products = {self.index.products.get_by_name(name) for name in product_names}
+        else:
+            products = set(self.index.products.get_all())
+        field_exprs = {}
+        for product in products:
+            for value in _utils.get_mutable_dataset_search_fields(
+                self.index, product.metadata_type
+            ).values():
+                expr = value.alchemy_expression
+                if hasattr(value, "offset"):
+                    field_exprs[value.offset[-1]] = expr
+                field_exprs[value.name] = expr
+
+        # add stac property names as well
+        for k, v in MAPPING_EO3_TO_STAC.items():
+            field_exprs[v] = field_exprs[k]
+        # manually add fields that aren't included in the metadata search fields
+        field_exprs["collection"] = (
+            select([ODC_DATASET_TYPE.c.name])
+            .where(ODC_DATASET_TYPE.c.id == DATASET_SPATIAL.c.dataset_type_ref)
+            .scalar_subquery()
+        )
+        geom = func.ST_Transform(DATASET_SPATIAL.c.footprint, 4326)
+        field_exprs["geometry"] = geom
+        field_exprs["bbox"] = func.Box2D(geom).cast(String)
+
+        return field_exprs
+
+    def _add_filter_to_query(
+        self,
+        query: Select,
+        field_exprs: dict[str, Any],
+        filter_lang: str,
+        filter_cql: dict,
+    ) -> Select:
+        # use pygeofilter's SQLAlchemy integration to construct the filter query
+        filter_cql = (
+            parse_cql2_text(filter_cql)
+            if filter_lang == "cql2-text"
+            else parse_cql2_json(filter_cql)
+        )
+        query = query.filter(FilterEvaluator(field_exprs).evaluate(filter_cql))
+
+        return query
+
+    def _add_order_to_query(
+        self,
+        query: Select,
+        field_exprs: dict[str, Any],
+        sortby: list[dict[str, str]],
+    ) -> Select:
+        order_clauses = []
+        for s in sortby:
+            try:
+                field = field_exprs.get(s.get("field"))
+                # is there any way to check if sortable?
+                if field is not None:
+                    asc = s.get("direction") == "asc"
+                    if asc:
+                        order_clauses.append(field)
+                    else:
+                        order_clauses.append(field.desc())
+            except AttributeError:  # there is no field by that name, ignore
+                # the spec does not specify a handling directive for unspecified fields,
+                # so we've chosen to ignore them to be in line with the other extensions
+                continue
+
+        query = query.order_by(*order_clauses)
+        return query
+
     @ttl_cache(ttl=DEFAULT_TTL)
     def get_arrivals(
         self, period_length: timedelta
@@ -1242,10 +1341,13 @@ def get_count(
         product_names: Optional[List[str]] = None,
         time: Optional[Tuple[datetime, datetime]] = None,
         bbox: Tuple[float, float, float, float] = None,
+        intersects: BaseGeometry = None,
         dataset_ids: Sequence[UUID] = None,
+        filter_lang: str | None = None,
+        filter_cql: str | dict | None = None,
     ) -> int:
         """
-        Do the most simple select query to get the count of matching datasets.
+        Do the base select query to get the count of matching datasets.
         """
         query: Select = select([func.count()]).select_from(DATASET_SPATIAL)
 
@@ -1254,9 +1356,18 @@ def get_count(
             product_names=product_names,
             time=time,
             bbox=bbox,
+            intersects=intersects,
             dataset_ids=dataset_ids,
         )
 
+        if filter_cql:
+            query = self._add_filter_to_query(
+                query,
+                self._get_field_exprs(product_names),
+                filter_lang,
+                filter_cql,
+            )
+
         result = self._engine.execute(query).fetchall()
 
         if len(result) != 0:
@@ -1275,7 +1386,9 @@ def search_items(
         offset: int = 0,
         full_dataset: bool = False,
         dataset_ids: Sequence[UUID] = None,
-        order: ItemSort = ItemSort.DEFAULT_SORT,
+        filter_lang: str | None = None,
+        filter_cql: str | dict | None = None,
+        order: ItemSort | list[dict[str, str]] = ItemSort.DEFAULT_SORT,
     ) -> Generator[DatasetItem, None, None]:
         """
         Search datasets using Explorer's spatial table
@@ -1321,6 +1434,13 @@ def search_items(
             dataset_ids=dataset_ids,
         )
 
+        field_exprs = self._get_field_exprs(product_names)
+
+        if filter_cql:
+            query = self._add_filter_to_query(
+                query, field_exprs, filter_lang, filter_cql
+            )
+
         # Maybe sort
         if order == ItemSort.DEFAULT_SORT:
             query = query.order_by(DATASET_SPATIAL.c.center_time, DATASET_SPATIAL.c.id)
@@ -1332,10 +1452,8 @@ def search_items(
                     "Only full-dataset searches can be sorted by recently added"
                 )
             query = query.order_by(ODC_DATASET.c.added.desc())
-        else:
-            raise RuntimeError(
-                f"Unknown item sort order {order!r} (perhaps this is a bug?)"
-            )
+        else:  # order was provided as a sortby query
+            query = self._add_order_to_query(query, field_exprs, order)
 
         query = query.limit(limit).offset(
             # TODO: Offset/limit isn't particularly efficient for paging...
@@ -1753,6 +1871,19 @@ def get_dataset_footprint_region(self, dataset_id):
         )
 
 
+class FilterEvaluator(SQLAlchemyFilterEvaluator):
+    """
+    Since pygeofilter's SQLAlchemyFilterEvaluator doesn't support treating
+    invalid/undefined attributes as NULL as per the STAC API Filter spec,
+    this class overwrites the Evaluator's handling of attributes to return NULL
+    as the default value if a field is not present in the mapping of sqlalchemy expressions.
+    """
+
+    @handle(ast.Attribute)
+    def attribute(self, node: ast.Attribute):
+        return self.field_mapping.get(node.name, null())
+
+
 def _refresh_data(please_refresh: Set[PleaseRefresh], store: SummaryStore):
     """
     Refresh product information after a schema update, plus the given kind of data.
diff --git a/integration_tests/test_stac.py b/integration_tests/test_stac.py
index 14eec88a8..89a0e1633 100644
--- a/integration_tests/test_stac.py
+++ b/integration_tests/test_stac.py
@@ -1274,25 +1274,25 @@ def test_stac_search_by_post(stac_client: FlaskClient):
             validate_item(feature)
 
 
-def test_stac_query_extension(stac_client: FlaskClient):
-    query = {"properties.dea:dataset_maturity": {"eq": "nrt"}}
-    rv: Response = stac_client.post(
-        "/stac/search",
-        data=json.dumps(
-            {
-                "product": "ga_ls8c_ard_3",
-                "time": "2022-01-01T00:00:00/2022-12-31T00:00:00",
-                "limit": OUR_DATASET_LIMIT,
-                "_full": True,
-                "query": query,
-            }
-        ),
-        headers={"Content-Type": "application/json", "Accept": "application/json"},
-    )
-    assert rv.status_code == 200
-    doc = rv.json
-    assert len(doc.get("features")) == 1
-    assert doc["features"][0]["properties"]["dea:dataset_maturity"] == "nrt"
+# def test_stac_query_extension(stac_client: FlaskClient):
+#     query = {"properties.dea:dataset_maturity": {"eq": "nrt"}}
+#     rv: Response = stac_client.post(
+#         "/stac/search",
+#         data=json.dumps(
+#             {
+#                 "product": "ga_ls8c_ard_3",
+#                 "time": "2022-01-01T00:00:00/2022-12-31T00:00:00",
+#                 "limit": OUR_DATASET_LIMIT,
+#                 "_full": True,
+#                 "query": query,
+#             }
+#         ),
+#         headers={"Content-Type": "application/json", "Accept": "application/json"},
+#     )
+#     assert rv.status_code == 200
+#     doc = rv.json
+#     assert len(doc.get("features")) == 1
+#     assert doc["features"][0]["properties"]["dea:dataset_maturity"] == "nrt"
 
 
 def test_stac_fields_extension(stac_client: FlaskClient):
@@ -1451,16 +1451,40 @@ def test_stac_sortby_extension(stac_client: FlaskClient):
     for i in range(1, len(doc["features"])):
         assert doc["features"][i - 1]["id"] < doc["features"][i]["id"]
 
+    # use of property prefixes shouldn't impact result
+    rv: Response = stac_client.get(
+        "/stac/search?collection=ga_ls8c_ard_3&limit=5&sortby=item.id,-datetime"
+    )
+    assert rv.json == doc
+
+    # ignore undefined field
+    rv: Response = stac_client.get(
+        "/stac/search?collection=ga_ls8c_ard_3&limit=5&sortby=id,-datetime,foo"
+    )
+    assert rv.json == doc
+
+    # sorting across pages
+    next_link = _get_next_href(doc)
+    next_link = next_link.replace("http://localhost", "")
+    rv: Response = stac_client.get(next_link)
+    last_item = doc["features"][-1]
+    next_item = rv.json["features"][0]
+    assert last_item["id"] < next_item["id"]
+    assert last_item["properties"]["datetime"] > next_item["properties"]["datetime"]
+
 
 def test_stac_filter_extension(stac_client: FlaskClient):
-    filter_cql = {
+    filter_json = {
         "op": "and",
         "args": [
             {
                 "op": "<>",
-                "args": [{"property": "properties.dea:dataset_maturity"}, "final"],
+                "args": [{"property": "dea:dataset_maturity"}, "final"],
+            },
+            {
+                "op": ">=",
+                "args": [{"property": "cloud_cover"}, float(2)],
             },
-            {"op": ">=", "args": [{"property": "properties.eo:cloud_cover"}, float(2)]},
         ],
     }
     rv: Response = stac_client.post(
@@ -1471,16 +1495,48 @@ def test_stac_filter_extension(stac_client: FlaskClient):
                 "time": "2022-01-01T00:00:00/2022-12-31T00:00:00",
                 "limit": OUR_DATASET_LIMIT,
                 "_full": True,
-                "filter": filter_cql,
+                "filter": filter_json,
             }
         ),
         headers={"Content-Type": "application/json", "Accept": "application/json"},
     )
     assert rv.status_code == 200
-    doc = rv.json
-    features = doc.get("features")
+    features = rv.json.get("features")
     assert len(features) == 2
     ids = [f["id"] for f in features]
-    assert "fc792b3b-a685-4c0f-9cf6-f5257f042c64", (
-        "192276c6-8fa4-46a9-8bc6-e04e157974b9" in ids
+    assert "fc792b3b-a685-4c0f-9cf6-f5257f042c64" in ids
+    assert "192276c6-8fa4-46a9-8bc6-e04e157974b9" in ids
+
+    # test cql2-text
+    filter_text = "collection='ga_ls8c_ard_3' AND view:sun_azimuth > 5"
+    rv: Response = stac_client.get(f"/stac/search?filter={filter_text}")
+    features = rv.json.get("features")
+    assert len(features) == 9
+
+    # test lang mismatch
+    rv: Response = stac_client.post(
+        "/stac/search",
+        data=json.dumps(
+            {
+                "product": "ga_ls8c_ard_3",
+                "time": "2022-01-01T00:00:00/2022-12-31T00:00:00",
+                "limit": OUR_DATASET_LIMIT,
+                "_full": True,
+                "filter-lang": "cql2-text",
+                "filter": filter_json,
+            }
+        ),
+        headers={"Content-Type": "application/json", "Accept": "application/json"},
+    )
+    assert rv.status_code == 400
+
+    # test invalid property name treated as null
+    rv: Response = stac_client.get(
+        "/stac/search?filter=item.collection='ga_ls8c_ard_3' AND properties.foo != 2"
+    )
+    assert len(rv.json.get("features")) == 0
+
+    rv: Response = stac_client.get(
+        "/stac/search?filter=collection='ga_ls8c_ard_3' AND foo IS NULL"
     )
+    assert (len(rv.json.get("features"))) == 21

From b7ab4d88c9794d822e52d5dcc6e342e4940380f6 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 12 Apr 2024 02:45:39 +0000
Subject: [PATCH 09/15] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 cubedash/_stac.py           | 4 +++-
 cubedash/summary/_stores.py | 3 ++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/cubedash/_stac.py b/cubedash/_stac.py
index 3473cf27a..e4449cc03 100644
--- a/cubedash/_stac.py
+++ b/cubedash/_stac.py
@@ -713,7 +713,9 @@ def _handle_fields_extension(items: List[ItemLike], fields: dict) -> List[ItemLi
                     d=filtered_item,
                     keys=inc.split("."),
                     func=lambda _: _get_property(
-                        inc, item, no_default=True  # noqa: B023
+                        inc,
+                        item,
+                        no_default=True,  # noqa: B023
                     ),
                 )
             except KeyError:
diff --git a/cubedash/summary/_stores.py b/cubedash/summary/_stores.py
index 2842caff0..76e6426e7 100644
--- a/cubedash/summary/_stores.py
+++ b/cubedash/summary/_stores.py
@@ -27,7 +27,8 @@
 from cachetools.func import lru_cache, ttl_cache
 from dateutil import tz
 from eodatasets3.stac import MAPPING_EO3_TO_STAC
-from geoalchemy2 import WKBElement, shape as geo_shape
+from geoalchemy2 import WKBElement
+from geoalchemy2 import shape as geo_shape
 from geoalchemy2.shape import from_shape, to_shape
 from pygeofilter import ast
 from pygeofilter.backends.evaluator import handle

From e4b506d22df2d6e5941237090b583cc16af8964d Mon Sep 17 00:00:00 2001
From: Ariana Barzinpour <ariana.barzinpour@ga.gov.au>
Date: Fri, 12 Apr 2024 03:11:47 +0000
Subject: [PATCH 10/15] fix field_exprs keyerror

---
 cubedash/summary/_stores.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/cubedash/summary/_stores.py b/cubedash/summary/_stores.py
index 76e6426e7..9fb6e39d2 100644
--- a/cubedash/summary/_stores.py
+++ b/cubedash/summary/_stores.py
@@ -1236,7 +1236,11 @@ def _get_field_exprs(
 
         # add stac property names as well
         for k, v in MAPPING_EO3_TO_STAC.items():
-            field_exprs[v] = field_exprs[k]
+            try:
+                # map to same alchemy expression as the eo3 counterparts
+                field_exprs[v] = field_exprs[k]
+            except KeyError:
+                continue
         # manually add fields that aren't included in the metadata search fields
         field_exprs["collection"] = (
             select([ODC_DATASET_TYPE.c.name])

From 0f1e3181f844035a0dcca8d86bcabdb2466b892b Mon Sep 17 00:00:00 2001
From: Ariana Barzinpour <ariana.barzinpour@ga.gov.au>
Date: Fri, 12 Apr 2024 04:36:33 +0000
Subject: [PATCH 11/15] add filter-crs handling and update README

---
 README.md                      |  8 +++++---
 cubedash/_stac.py              |  8 +++++++-
 integration_tests/test_stac.py | 17 +++++++++++++++++
 3 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 0dbd2bc9c..aa5db8fe3 100644
--- a/README.md
+++ b/README.md
@@ -248,10 +248,12 @@ edit `.docker/settings_docker.py` and setup application config. Then `docker-com
 
 ## STAC API Extensions
 
-The STAC endpoint implements the [query](https://github.com/stac-api-extensions/query), [filter](https://github.com/stac-api-extensions/filter), [fields](https://github.com/stac-api-extensions/fields), and [sort](https://github.com/stac-api-extensions/sort) extensions, all of which are bound to the `search` endpoint as used with POST requests, with fields and sort additionally bound to the features endpoint.
+The STAC endpoint implements the [filter](https://github.com/stac-api-extensions/filter), [fields](https://github.com/stac-api-extensions/fields), and [sort](https://github.com/stac-api-extensions/sort) extensions, all of which are bound to the STAC API - Item Search (`/search`) endpoint. All support both GET and POST request syntax.
 
 Fields contained in the item properties must be prefixed with `properties.`, ex `properties.dea:dataset_maturity`.
 
-The implementation of `fields` differs somewhat from the suggested include/exclude semantics in that it does not permit for invalid STAC entities, so the `id`, `type`, `geometry`, `bbox`, `links`, `assets`, `properties.datetime`, and `stac_version` fields will always be included, regardless of user input.
+The implementation of `fields` differs somewhat from the suggested include/exclude semantics in that it does not permit for invalid STAC entities, so the `id`, `type`, `geometry`, `bbox`, `links`, `assets`, `properties.datetime`, `collection`, and `stac_version` fields will always be included, regardless of user input.
 
-The implementation of `filter` is limited, and currently only supports CQL2 JSON syntax with the following basic CQL2 operators: `AND`, `OR`, `=`, `>`, `>=`, `<`, `<=`, `<>`, `IS NULL`.
+The `sort` and `filter` implementations will recognise any syntactically valid version of a property name, which is the say, the STAC, eo3, and search field (as defined by the metadata type) variants of the name, with or without the `item.` or `properties.` prefixes. If a property does not exist for an item, `sort` will ignore it while `filter` will treat it as `NULL`.
+
+The `filter` extension supports both `cql2-text` and `cql2-json` for both GET and POST requesets, and uses [pygeofilter](https://github.com/geopython/pygeofilter) to parse the cql and convert it to a sqlalchemy filter expression. `filter-crs` only accepts http://www.opengis.net/def/crs/OGC/1.3/CRS84 as a valid value.
diff --git a/cubedash/_stac.py b/cubedash/_stac.py
index e4449cc03..4df8195ee 100644
--- a/cubedash/_stac.py
+++ b/cubedash/_stac.py
@@ -557,8 +557,14 @@ def _handle_search_request(
                     "Only 'id', 'collection', and Item properties can be used to sort results.",
                 )
 
-    filter_cql = request_args.get("filter", default=None, type=_filter_arg)
     filter_lang = request.args.get("filter-lang", default=None)
+    filter_cql = request_args.get("filter", default=None, type=_filter_arg)
+    filter_crs = request.args.get("filter-crs", default=None)
+    if filter_crs and filter_crs != "https://www.opengis.net/def/crs/OGC/1.3/CRS84":
+        abort(
+            400,
+            "filter-crs only accepts 'https://www.opengis.net/def/crs/OGC/1.3/CRS84' as a valid value.",
+        )
     if filter_lang is None and filter_cql is not None:
         # If undefined, defaults to cql2-text for a GET request and cql2-json for a POST request.
         if method == "GET":
diff --git a/integration_tests/test_stac.py b/integration_tests/test_stac.py
index e76870448..c051eec45 100644
--- a/integration_tests/test_stac.py
+++ b/integration_tests/test_stac.py
@@ -1519,3 +1519,20 @@ def test_stac_filter_extension(stac_client: FlaskClient):
         "/stac/search?filter=collection='ga_ls8c_ard_3' AND foo IS NULL"
     )
     assert (len(rv.json.get("features"))) == 21
+
+    # filter-crs invalid value
+    rv: Response = stac_client.post(
+        "/stac/search",
+        data=json.dumps(
+            {
+                "product": "ga_ls8c_ard_3",
+                "time": "2022-01-01T00:00:00/2022-12-31T00:00:00",
+                "limit": OUR_DATASET_LIMIT,
+                "_full": True,
+                "filter-crs": "http://www.opengis.net/def/crs/OGC/1.3/CRS83",
+                "filter": filter_json,
+            }
+        ),
+        headers={"Content-Type": "application/json", "Accept": "application/json"},
+    )
+    assert rv.status_code == 400

From aeec345c5223475a4021a68b06cce7fd70bda128 Mon Sep 17 00:00:00 2001
From: Ariana Barzinpour <ariana.barzinpour@ga.gov.au>
Date: Fri, 12 Apr 2024 04:42:50 +0000
Subject: [PATCH 12/15] update rtd-requirements

---
 docs/rtd-requirements.txt | 156 ++++++++++++++++++++++----------------
 1 file changed, 89 insertions(+), 67 deletions(-)

diff --git a/docs/rtd-requirements.txt b/docs/rtd-requirements.txt
index 86485c7c0..06147196e 100644
--- a/docs/rtd-requirements.txt
+++ b/docs/rtd-requirements.txt
@@ -1,41 +1,43 @@
 #
-# This file is autogenerated by pip-compile with python 3.8
-# To update, run:
+# This file is autogenerated by pip-compile with Python 3.11
+# by the following command:
 #
 #    pip-compile setup.py
 #
-affine==2.3.1
+affine==2.4.0
     # via
     #   datacube
     #   eodatasets3
     #   rasterio
-attrs==22.1.0
+attrs==22.2.0
     # via
     #   cattrs
+    #   datacube
     #   eodatasets3
     #   fiona
     #   jsonschema
     #   rasterio
+    #   referencing
 blinker==1.5
     # via sentry-sdk
 boltons==21.0.0
     # via eodatasets3
-boto3==1.26.2
+boto3==1.26.62
     # via eodatasets3
-botocore==1.29.2
+botocore==1.29.62
     # via
     #   boto3
     #   eodatasets3
     #   s3transfer
 cachelib==0.9.0
     # via flask-caching
-cachetools==5.2.0
+cachetools==5.3.0
     # via
     #   datacube
     #   datacube-explorer (setup.py)
 cattrs==22.2.0
     # via eodatasets3
-certifi==2022.9.24
+certifi==2022.12.7
     # via
     #   fiona
     #   pyproj
@@ -44,9 +46,9 @@ certifi==2022.9.24
     #   sentry-sdk
 cftime==1.6.2
     # via netcdf4
-charset-normalizer==2.1.1
+charset-normalizer==3.0.1
     # via requests
-ciso8601==2.2.0
+ciso8601==2.3.0
     # via eodatasets3
 click==8.1.3
     # via
@@ -68,29 +70,31 @@ cligj==0.7.2
     # via
     #   fiona
     #   rasterio
-cloudpickle==2.2.0
+cloudpickle==2.2.1
     # via
     #   dask
     #   datacube
     #   distributed
-dask[array]==2022.10.2
+dask[array]==2023.1.1
     # via
     #   datacube
     #   distributed
-datacube==1.8.6
+datacube==1.8.10
     # via
     #   datacube-explorer (setup.py)
     #   eodatasets3
+dateparser==1.2.0
+    # via pygeofilter
 defusedxml==0.7.1
     # via eodatasets3
-distributed==2022.10.2
+distributed==2023.1.1
     # via datacube
-eodatasets3==0.29.5
-    # via datacube-explorer (setup.py)
-exceptiongroup==1.0.0
-    # via cattrs
-fiona==1.8.22
+eodatasets3==0.30.1
     # via datacube-explorer (setup.py)
+fiona==1.9.0
+    # via
+    #   datacube-explorer (setup.py)
+    #   eodatasets3
 flask==2.1.3
     # via
     #   datacube-explorer (setup.py)
@@ -98,28 +102,28 @@ flask==2.1.3
     #   flask-cors
     #   flask-themer
     #   sentry-sdk
-flask-caching==2.0.1
+flask-caching==2.0.2
     # via datacube-explorer (setup.py)
 flask-cors==3.0.10
     # via datacube-explorer (setup.py)
 flask-themer==1.4.3
     # via datacube-explorer (setup.py)
-fsspec==2022.10.0
+fsspec==2023.1.0
     # via dask
-geoalchemy2==0.12.5
-    # via datacube-explorer (setup.py)
+geoalchemy2==0.13.1
+    # via
+    #   datacube
+    #   datacube-explorer (setup.py)
 geographiclib==2.0
     # via datacube-explorer (setup.py)
-greenlet==2.0.0.post0
+greenlet==2.0.2
     # via sqlalchemy
+h5py==3.10.0
+    # via eodatasets3
 heapdict==1.0.1
     # via zict
 idna==3.4
     # via requests
-importlib-metadata==5.0.0
-    # via flask
-importlib-resources==5.10.0
-    # via jsonschema
 itsdangerous==2.1.2
     # via flask
 jinja2==3.1.2
@@ -131,17 +135,21 @@ jmespath==1.0.1
     # via
     #   boto3
     #   botocore
-jsonschema==4.17.0
+jsonschema==4.20.0
     # via
     #   datacube
     #   eodatasets3
-lark-parser==0.12.0
-    # via datacube
+jsonschema-specifications==2023.12.1
+    # via jsonschema
+lark==0.12.0
+    # via
+    #   datacube
+    #   pygeofilter
 locket==1.0.0
     # via
     #   distributed
     #   partd
-markupsafe==2.1.1
+markupsafe==2.1.2
     # via
     #   datacube-explorer (setup.py)
     #   jinja2
@@ -150,93 +158,108 @@ msgpack==1.0.4
     # via distributed
 munch==2.5.0
     # via fiona
-netcdf4==1.6.1
+netcdf4==1.6.2
     # via datacube
-numpy==1.23.4
+numpy==1.24.1
     # via
     #   cftime
     #   dask
     #   datacube
     #   eodatasets3
+    #   h5py
     #   netcdf4
     #   pandas
     #   pyorbital
     #   rasterio
     #   scipy
+    #   shapely
     #   snuggs
     #   xarray
-orjson==3.8.1
+orjson==3.8.5
     # via datacube-explorer (setup.py)
-packaging==21.3
+packaging==23.0
     # via
     #   dask
+    #   datacube
     #   distributed
     #   geoalchemy2
     #   xarray
-pandas==1.5.1
+pandas==1.5.3
     # via
     #   datacube
     #   xarray
 partd==1.3.0
     # via dask
-pkgutil-resolve-name==1.3.10
-    # via jsonschema
-psutil==5.9.3
+psutil==5.9.4
     # via distributed
 psycopg2==2.9.5
     # via datacube
+pygeofilter==0.2.1
+    # via datacube-explorer (setup.py)
+pygeoif==1.4.0
+    # via pygeofilter
 pyorbital==1.7.3
     # via datacube-explorer (setup.py)
 pyparsing==3.0.9
-    # via
-    #   packaging
-    #   snuggs
-pyproj==3.4.0
+    # via snuggs
+pyproj==3.4.1
     # via
     #   datacube
     #   datacube-explorer (setup.py)
     #   eodatasets3
-pyrsistent==0.19.2
-    # via jsonschema
-pystac==1.6.1
-    # via eodatasets3
+pystac==1.9.0
+    # via
+    #   datacube-explorer (setup.py)
+    #   eodatasets3
 python-dateutil==2.8.2
     # via
     #   botocore
     #   datacube
     #   datacube-explorer (setup.py)
+    #   dateparser
     #   pandas
     #   pystac
 python-rapidjson==1.9
     # via eodatasets3
-pytz==2022.6
+pytz==2022.7.1
     # via
     #   datacube-explorer (setup.py)
+    #   dateparser
     #   pandas
 pyyaml==6.0
     # via
     #   dask
     #   datacube
     #   distributed
-rasterio==1.3.3
+rasterio==1.3.5
     # via
     #   datacube
     #   eodatasets3
-requests==2.28.1
+referencing==0.32.0
+    # via
+    #   jsonschema
+    #   jsonschema-specifications
+regex==2023.12.25
+    # via dateparser
+requests==2.28.2
     # via pyorbital
+rpds-py==0.16.2
+    # via
+    #   jsonschema
+    #   referencing
 ruamel-yaml==0.17.21
-    # via eodatasets3
-ruamel-yaml-clib==0.2.7
-    # via ruamel-yaml
+    # via
+    #   datacube
+    #   eodatasets3
 s3transfer==0.6.0
     # via boto3
-scipy==1.9.3
+scipy==1.10.0
     # via
     #   eodatasets3
     #   pyorbital
-sentry-sdk[flask]==1.10.1
+sentry-sdk[flask]==1.14.0
     # via datacube-explorer (setup.py)
-shapely==1.8.5.post1
+shapely==2.0.1
     # via
     #   datacube
     #   datacube-explorer (setup.py)
@@ -245,7 +268,6 @@ simplekml==1.3.6
     # via datacube-explorer (setup.py)
 six==1.16.0
     # via
-    #   fiona
     #   flask-cors
     #   munch
     #   python-dateutil
@@ -253,12 +275,12 @@ snuggs==1.4.7
     # via rasterio
 sortedcontainers==2.4.0
     # via distributed
-sqlalchemy==1.4.42
+sqlalchemy==1.4.46
     # via
     #   datacube
     #   datacube-explorer (setup.py)
     #   geoalchemy2
-structlog==22.1.0
+structlog==22.3.0
     # via
     #   datacube-explorer (setup.py)
     #   eodatasets3
@@ -270,9 +292,13 @@ toolz==0.12.0
     #   datacube
     #   distributed
     #   partd
-tornado==6.1
+tornado==6.2
     # via distributed
-urllib3==1.26.12
+typing-extensions==4.11.0
+    # via pygeoif
+tzlocal==5.2
+    # via dateparser
+urllib3==1.26.14
     # via
     #   botocore
     #   distributed
@@ -280,16 +306,12 @@ urllib3==1.26.12
     #   sentry-sdk
 werkzeug==2.2.2
     # via flask
-xarray==2022.10.0
+xarray==2023.1.0
     # via
     #   datacube
     #   eodatasets3
 zict==2.2.0
     # via distributed
-zipp==3.10.0
-    # via
-    #   importlib-metadata
-    #   importlib-resources
 
 # The following packages are considered to be unsafe in a requirements file:
 # setuptools

From 3e04d3a7282a62f34510a048e4713d97aa8afca6 Mon Sep 17 00:00:00 2001
From: Ariana Barzinpour <ariana.barzinpour@ga.gov.au>
Date: Mon, 15 Apr 2024 06:22:58 +0000
Subject: [PATCH 13/15] update postgis version for tests, fix tests

---
 cubedash/_stac.py              | 16 +++++++++----
 cubedash/summary/_stores.py    | 37 +++++++++++++++++-------------
 cubedash/testutils/database.py |  2 +-
 integration_tests/test_stac.py | 42 +++++++++++++++++++---------------
 4 files changed, 57 insertions(+), 40 deletions(-)

diff --git a/cubedash/_stac.py b/cubedash/_stac.py
index 4df8195ee..1ba28c2b3 100644
--- a/cubedash/_stac.py
+++ b/cubedash/_stac.py
@@ -468,6 +468,7 @@ def _format(val: str) -> dict[str, str]:
             return {"field": val[1:], "direction": "desc"}
         if val.startswith("+"):
             return {"field": val[1:], "direction": "asc"}
+        # default is ascending
         return {"field": val.strip(), "direction": "asc"}
 
     if isinstance(arg, str):
@@ -476,7 +477,8 @@ def _format(val: str) -> dict[str, str]:
         if isinstance(arg[0], str):
             return [_format(a) for a in arg]
         if isinstance(arg[0], dict):
-            return arg
+            for a in arg:
+                a["field"] = _remove_prefixes(a["field"])
 
     return arg
 
@@ -557,9 +559,9 @@ def _handle_search_request(
                     "Only 'id', 'collection', and Item properties can be used to sort results.",
                 )
 
-    filter_lang = request.args.get("filter-lang", default=None)
+    filter_lang = request_args.get("filter-lang", default=None, type=str)
     filter_cql = request_args.get("filter", default=None, type=_filter_arg)
-    filter_crs = request.args.get("filter-crs", default=None)
+    filter_crs = request_args.get("filter-crs", default=None)
     if filter_crs and filter_crs != "https://www.opengis.net/def/crs/OGC/1.3/CRS84":
         abort(
             400,
@@ -793,7 +795,13 @@ def search_stac_items(
     )
     if include_total_count:
         count_matching = _model.STORE.get_count(
-            product_names=product_names, time=time, bbox=bbox, dataset_ids=dataset_ids
+            product_names=product_names,
+            time=time,
+            bbox=bbox,
+            intersects=intersects,
+            dataset_ids=dataset_ids,
+            filter_lang=filter_lang,
+            filter_cql=filter_cql,
         )
         extra_properties["numberMatched"] = count_matching
         extra_properties["context"]["matched"] = count_matching
diff --git a/cubedash/summary/_stores.py b/cubedash/summary/_stores.py
index 9fb6e39d2..0ec261b16 100644
--- a/cubedash/summary/_stores.py
+++ b/cubedash/summary/_stores.py
@@ -1247,6 +1247,7 @@ def _get_field_exprs(
             .where(ODC_DATASET_TYPE.c.id == DATASET_SPATIAL.c.dataset_type_ref)
             .scalar_subquery()
         )
+        field_exprs["datetime"] = DATASET_SPATIAL.c.center_time
         geom = func.ST_Transform(DATASET_SPATIAL.c.footprint, 4326)
         field_exprs["geometry"] = geom
         field_exprs["bbox"] = func.Box2D(geom).cast(String)
@@ -1278,20 +1279,17 @@ def _add_order_to_query(
     ) -> Select:
         order_clauses = []
         for s in sortby:
-            try:
-                field = field_exprs.get(s.get("field"))
-                # is there any way to check if sortable?
-                if field is not None:
-                    asc = s.get("direction") == "asc"
-                    if asc:
-                        order_clauses.append(field)
-                    else:
-                        order_clauses.append(field.desc())
-            except AttributeError:  # there is no field by that name, ignore
-                # the spec does not specify a handling directive for unspecified fields,
-                # so we've chosen to ignore them to be in line with the other extensions
-                continue
-
+            field = field_exprs.get(s.get("field"))
+            # is there any way to check if sortable?
+            if field is not None:
+                asc = s.get("direction") == "asc"
+                if asc:
+                    order_clauses.append(field.asc())
+                else:
+                    order_clauses.append(field.desc())
+            # there is no field by that name, ignore
+            # the spec does not specify a handling directive for unspecified fields,
+            # so we've chosen to ignore them to be in line with the other extensions
         query = query.order_by(*order_clauses)
         return query
 
@@ -1354,7 +1352,14 @@ def get_count(
         """
         Do the base select query to get the count of matching datasets.
         """
-        query: Select = select([func.count()]).select_from(DATASET_SPATIAL)
+        if filter_cql:  # to account the possibiity of 'collection' in the filter
+            query: Select = select([func.count()]).select_from(
+                DATASET_SPATIAL.join(
+                    ODC_DATASET, onclause=ODC_DATASET.c.id == DATASET_SPATIAL.c.id
+                )
+            )
+        else:
+            query: Select = select([func.count()]).select_from(DATASET_SPATIAL)
 
         query = self._add_fields_to_query(
             query,
@@ -1457,7 +1462,7 @@ def search_items(
                     "Only full-dataset searches can be sorted by recently added"
                 )
             query = query.order_by(ODC_DATASET.c.added.desc())
-        else:  # order was provided as a sortby query
+        elif order:  # order was provided as a sortby query
             query = self._add_order_to_query(query, field_exprs, order)
 
         query = query.limit(limit).offset(
diff --git a/cubedash/testutils/database.py b/cubedash/testutils/database.py
index 8fb3b537c..aa35ff22b 100644
--- a/cubedash/testutils/database.py
+++ b/cubedash/testutils/database.py
@@ -40,7 +40,7 @@ def postgresql_server():
     else:
         client = docker.from_env()
         container = client.containers.run(
-            "postgis/postgis:14-3.3-alpine",
+            "postgis/postgis:16-3.4",
             auto_remove=True,
             remove=True,
             detach=True,
diff --git a/integration_tests/test_stac.py b/integration_tests/test_stac.py
index c051eec45..9a7a45199 100644
--- a/integration_tests/test_stac.py
+++ b/integration_tests/test_stac.py
@@ -524,7 +524,7 @@ def expect_404(url: str, message_contains: str = None):
             "/collections/ls7_nbar_scene/items"
             "?datetime=2000-01-01/2000-01-01&bbox=-48.206,-14.195,-45.067,-12.272",
             "/stac/collections/ls7_nbar_scene/items"
-            "?datetime=2000-01-01/2000-01-01&bbox=-48.206,-14.195,-45.067,-12.272",
+            "?datetime=2000-01-01%2F2000-01-01&bbox=-48.206,-14.195,-45.067,-12.272",
         ),
         (
             "/collections/ls7_nbar_scene/items/0c5b625e-5432-4911-9f7d-f6b894e27f3c",
@@ -1440,7 +1440,7 @@ def test_stac_sortby_extension(stac_client: FlaskClient):
     rv: Response = stac_client.get(
         "/stac/search?collection=ga_ls8c_ard_3&limit=5&sortby=id,-datetime,foo"
     )
-    assert rv.json == doc
+    assert rv.json["features"] == doc["features"]
 
     # sorting across pages
     next_link = _get_next_href(doc)
@@ -1449,7 +1449,6 @@ def test_stac_sortby_extension(stac_client: FlaskClient):
     last_item = doc["features"][-1]
     next_item = rv.json["features"][0]
     assert last_item["id"] < next_item["id"]
-    assert last_item["properties"]["datetime"] > next_item["properties"]["datetime"]
 
 
 def test_stac_filter_extension(stac_client: FlaskClient):
@@ -1462,7 +1461,7 @@ def test_stac_filter_extension(stac_client: FlaskClient):
             },
             {
                 "op": ">=",
-                "args": [{"property": "cloud_cover"}, float(2)],
+                "args": [{"property": "eo:cloud_cover"}, float(2)],
             },
         ],
     }
@@ -1481,16 +1480,32 @@ def test_stac_filter_extension(stac_client: FlaskClient):
     )
     assert rv.status_code == 200
     features = rv.json.get("features")
-    assert len(features) == 2
+    assert len(features) == rv.json.get("numberMatched") == 2
     ids = [f["id"] for f in features]
     assert "fc792b3b-a685-4c0f-9cf6-f5257f042c64" in ids
     assert "192276c6-8fa4-46a9-8bc6-e04e157974b9" in ids
 
     # test cql2-text
-    filter_text = "collection='ga_ls8c_ard_3' AND view:sun_azimuth > 5"
+    filter_text = "collection='ga_ls8c_ard_3' AND dataset_maturity <> 'final' AND cloud_cover >= 2"
     rv: Response = stac_client.get(f"/stac/search?filter={filter_text}")
-    features = rv.json.get("features")
-    assert len(features) == 9
+    assert rv.json.get("numberMatched") == 2
+
+    filter_text = "view:sun_azimuth < 40 AND dataset_maturity = 'final'"
+    rv: Response = stac_client.get(
+        f"/stac/search?collections=ga_ls8c_ard_3&filter={filter_text}"
+    )
+    assert rv.json.get("numberMatched") == 4
+
+    # test invalid property name treated as null
+    rv: Response = stac_client.get(
+        "/stac/search?filter=item.collection='ga_ls8c_ard_3' AND properties.foo > 2"
+    )
+    assert rv.json.get("numberMatched") == 0
+
+    rv: Response = stac_client.get(
+        "/stac/search?filter=collection='ga_ls8c_ard_3' AND foo IS NULL"
+    )
+    assert rv.json.get("numberMatched") == 21
 
     # test lang mismatch
     rv: Response = stac_client.post(
@@ -1509,17 +1524,6 @@ def test_stac_filter_extension(stac_client: FlaskClient):
     )
     assert rv.status_code == 400
 
-    # test invalid property name treated as null
-    rv: Response = stac_client.get(
-        "/stac/search?filter=item.collection='ga_ls8c_ard_3' AND properties.foo != 2"
-    )
-    assert len(rv.json.get("features")) == 0
-
-    rv: Response = stac_client.get(
-        "/stac/search?filter=collection='ga_ls8c_ard_3' AND foo IS NULL"
-    )
-    assert (len(rv.json.get("features"))) == 21
-
     # filter-crs invalid value
     rv: Response = stac_client.post(
         "/stac/search",

From 7f06687fa086c4b43c05eafef98bfcddef41dfcf Mon Sep 17 00:00:00 2001
From: Ariana Barzinpour <ariana.barzinpour@ga.gov.au>
Date: Mon, 15 Apr 2024 06:54:03 +0000
Subject: [PATCH 14/15] fix remaining tests

---
 cubedash/summary/_stores.py    | 2 +-
 integration_tests/test_stac.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/cubedash/summary/_stores.py b/cubedash/summary/_stores.py
index 0ec261b16..eb8eb42ca 100644
--- a/cubedash/summary/_stores.py
+++ b/cubedash/summary/_stores.py
@@ -34,7 +34,7 @@
 from pygeofilter.backends.evaluator import handle
 from pygeofilter.backends.sqlalchemy.evaluate import SQLAlchemyFilterEvaluator
 from pygeofilter.parsers.cql2_json import parse as parse_cql2_json
-from pygeofilter.parsers.ecql import parse as parse_cql2_text
+from pygeofilter.parsers.cql2_text import parse as parse_cql2_text
 from shapely.geometry.base import BaseGeometry
 from sqlalchemy import (
     DDL,
diff --git a/integration_tests/test_stac.py b/integration_tests/test_stac.py
index 9a7a45199..daaa36e0f 100644
--- a/integration_tests/test_stac.py
+++ b/integration_tests/test_stac.py
@@ -524,7 +524,7 @@ def expect_404(url: str, message_contains: str = None):
             "/collections/ls7_nbar_scene/items"
             "?datetime=2000-01-01/2000-01-01&bbox=-48.206,-14.195,-45.067,-12.272",
             "/stac/collections/ls7_nbar_scene/items"
-            "?datetime=2000-01-01%2F2000-01-01&bbox=-48.206,-14.195,-45.067,-12.272",
+            "?datetime=2000-01-01/2000-01-01&bbox=-48.206,-14.195,-45.067,-12.272",
         ),
         (
             "/collections/ls7_nbar_scene/items/0c5b625e-5432-4911-9f7d-f6b894e27f3c",

From f2249dc86fac0eb152327e7899d445345bd2e723 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 13 Jun 2024 00:07:33 +0000
Subject: [PATCH 15/15] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 docs/rtd-requirements.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/rtd-requirements.txt b/docs/rtd-requirements.txt
index 25f1ed65b..546a5f5f0 100644
--- a/docs/rtd-requirements.txt
+++ b/docs/rtd-requirements.txt
@@ -86,10 +86,10 @@ datacube==1.8.18
 dateparser==1.2.0
     # via pygeofilter
 defusedxml==0.7.1
-    # via eodatasets3
-distributed==2023.1.1
     # via datacube
 deprecat==2.1.1
+    # via eodatasets3
+distributed==2023.1.1
     # via datacube
 eodatasets3==0.30.1
     # via datacube-explorer (setup.py)