Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: harmonized regex address retrieval mechanism #56

Merged
merged 1 commit into from
May 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 24 additions & 9 deletions eodag_cube/api/product/drivers/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
# limitations under the License.
from __future__ import annotations

import logging
import re
from pathlib import Path
from typing import TYPE_CHECKING

Expand All @@ -29,6 +31,8 @@
if TYPE_CHECKING:
from eodag.api.product._product import EOProduct

logger = logging.getLogger("eodag-cube.driver.generic")


class GenericDriver(DatasetDriver):
"""Generic Driver for products that need to be downloaded"""
Expand All @@ -48,16 +52,27 @@ def get_data_address(self, eo_product: EOProduct, band: str) -> str:
product_location_scheme = eo_product.location.split("://")[0]
if product_location_scheme == "file":

filenames = Path(uri_to_path(eo_product.location)).glob(f"**/*{band}*")
p = re.compile(rf"{band}", re.IGNORECASE)
matching_files = []
for f_path in Path(uri_to_path(eo_product.location)).glob("**/*"):
f_str = str(f_path.resolve())
if p.search(f_str):
try:
# files readable by rasterio
rasterio.drivers.driver_from_extension(f_path)
matching_files.append(f_str)
logger.debug(f"Matching band: {f_str}")
except ValueError:
pass

if len(matching_files) == 1:
return matching_files[0]

raise AddressNotFound(
rf"Please adapt given band parameter ('{band}') to match only file: "
rf"{len(matching_files)} files found matching {p}"
)

for filename in filenames:
try:
# return the first file readable by rasterio
rasterio.drivers.driver_from_extension(filename)
return str(filename.resolve())
except ValueError:
pass
raise AddressNotFound
raise UnsupportedDatasetAddressScheme(
"eo product {} is accessible through a location scheme that is not yet "
"supported by eodag: {}".format(eo_product, product_location_scheme)
Expand Down
49 changes: 14 additions & 35 deletions eodag_cube/api/product/drivers/stac_assets.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
# limitations under the License.
from __future__ import annotations

import logging
import re
from typing import TYPE_CHECKING

Expand All @@ -26,6 +27,8 @@
if TYPE_CHECKING:
from eodag.api.product._product import EOProduct

logger = logging.getLogger("eodag-cube.driver.stac_assets")


class StacAssets(DatasetDriver):
"""Driver for Stac Assets"""
Expand All @@ -42,47 +45,23 @@ def get_data_address(self, eo_product: EOProduct, band: str) -> str:
:raises: :class:`~eodag.utils.exceptions.AddressNotFound`
:raises: :class:`~eodag.utils.exceptions.UnsupportedDatasetAddressScheme`
"""
error_message = ""

# try using exact
p = re.compile(rf"^{band}$", re.IGNORECASE)
matching_keys = [
s
for s in eo_product.assets.keys()
p = re.compile(rf"{band}", re.IGNORECASE)
matching_keys = []
for s in eo_product.assets.keys():
if (
(
"roles" in eo_product.assets[s]
and "data" in eo_product.assets[s]["roles"]
)
or ("roles" not in eo_product.assets[s])
)
and p.match(s)
]
) and p.search(s):
matching_keys.append(s)
logger.debug(f"Matching asset key: {s}")

if len(matching_keys) == 1:
return str(eo_product.assets[matching_keys[0]]["href"])
else:
error_message += (
rf"{len(matching_keys)} assets keys found matching {p} AND "
)

# try to find keys containing given band
p = re.compile(rf"^.*{band}.*$", re.IGNORECASE)
matching_keys = [
s
for s in eo_product.assets.keys()
if (
(
"roles" in eo_product.assets[s]
and "data" in eo_product.assets[s]["roles"]
)
or ("roles" not in eo_product.assets[s])
)
and p.match(s)
]
if len(matching_keys) == 1:
return str(eo_product.assets[matching_keys[0]]["href"])
else:
raise AddressNotFound(
rf"Please adapt given band parameter ('{band}') to match only one asset: {error_message}"
rf"{len(matching_keys)} assets keys found matching {p}"
)
raise AddressNotFound(
rf"Please adapt given band parameter ('{band}') to match only one asset: "
rf"{len(matching_keys)} assets keys found matching {p}"
)
Loading