From 8d44566464ee26ba98c3e5d50c177cd3f2deb0b1 Mon Sep 17 00:00:00 2001 From: Sylvain Brunato Date: Wed, 22 May 2024 11:22:39 +0200 Subject: [PATCH] fix: harmonized regex address retrieval mechanism --- eodag_cube/api/product/drivers/generic.py | 33 +++++++++---- eodag_cube/api/product/drivers/stac_assets.py | 49 ++++++------------- 2 files changed, 38 insertions(+), 44 deletions(-) diff --git a/eodag_cube/api/product/drivers/generic.py b/eodag_cube/api/product/drivers/generic.py index 59e7156..c029e92 100644 --- a/eodag_cube/api/product/drivers/generic.py +++ b/eodag_cube/api/product/drivers/generic.py @@ -17,6 +17,8 @@ # limitations under the License. from __future__ import annotations +import logging +import re from pathlib import Path from typing import TYPE_CHECKING @@ -29,6 +31,8 @@ if TYPE_CHECKING: from eodag.api.product._product import EOProduct +logger = logging.getLogger("eodag-cube.driver.generic") + class GenericDriver(DatasetDriver): """Generic Driver for products that need to be downloaded""" @@ -48,16 +52,27 @@ def get_data_address(self, eo_product: EOProduct, band: str) -> str: product_location_scheme = eo_product.location.split("://")[0] if product_location_scheme == "file": - filenames = Path(uri_to_path(eo_product.location)).glob(f"**/*{band}*") + p = re.compile(rf"{band}", re.IGNORECASE) + matching_files = [] + for f_path in Path(uri_to_path(eo_product.location)).glob("**/*"): + f_str = str(f_path.resolve()) + if p.search(f_str): + try: + # files readable by rasterio + rasterio.drivers.driver_from_extension(f_path) + matching_files.append(f_str) + logger.debug(f"Matching band: {f_str}") + except ValueError: + pass + + if len(matching_files) == 1: + return matching_files[0] + + raise AddressNotFound( + rf"Please adapt given band parameter ('{band}') to match only file: " + rf"{len(matching_files)} files found matching {p}" + ) - for filename in filenames: - try: - # return the first file readable by rasterio - rasterio.drivers.driver_from_extension(filename) - return str(filename.resolve()) - except ValueError: - pass - raise AddressNotFound raise UnsupportedDatasetAddressScheme( "eo product {} is accessible through a location scheme that is not yet " "supported by eodag: {}".format(eo_product, product_location_scheme) diff --git a/eodag_cube/api/product/drivers/stac_assets.py b/eodag_cube/api/product/drivers/stac_assets.py index 2e63349..73543b0 100644 --- a/eodag_cube/api/product/drivers/stac_assets.py +++ b/eodag_cube/api/product/drivers/stac_assets.py @@ -17,6 +17,7 @@ # limitations under the License. from __future__ import annotations +import logging import re from typing import TYPE_CHECKING @@ -26,6 +27,8 @@ if TYPE_CHECKING: from eodag.api.product._product import EOProduct +logger = logging.getLogger("eodag-cube.driver.stac_assets") + class StacAssets(DatasetDriver): """Driver for Stac Assets""" @@ -42,47 +45,23 @@ def get_data_address(self, eo_product: EOProduct, band: str) -> str: :raises: :class:`~eodag.utils.exceptions.AddressNotFound` :raises: :class:`~eodag.utils.exceptions.UnsupportedDatasetAddressScheme` """ - error_message = "" - - # try using exact - p = re.compile(rf"^{band}$", re.IGNORECASE) - matching_keys = [ - s - for s in eo_product.assets.keys() + p = re.compile(rf"{band}", re.IGNORECASE) + matching_keys = [] + for s in eo_product.assets.keys(): if ( ( "roles" in eo_product.assets[s] and "data" in eo_product.assets[s]["roles"] ) or ("roles" not in eo_product.assets[s]) - ) - and p.match(s) - ] + ) and p.search(s): + matching_keys.append(s) + logger.debug(f"Matching asset key: {s}") + if len(matching_keys) == 1: return str(eo_product.assets[matching_keys[0]]["href"]) - else: - error_message += ( - rf"{len(matching_keys)} assets keys found matching {p} AND " - ) - # try to find keys containing given band - p = re.compile(rf"^.*{band}.*$", re.IGNORECASE) - matching_keys = [ - s - for s in eo_product.assets.keys() - if ( - ( - "roles" in eo_product.assets[s] - and "data" in eo_product.assets[s]["roles"] - ) - or ("roles" not in eo_product.assets[s]) - ) - and p.match(s) - ] - if len(matching_keys) == 1: - return str(eo_product.assets[matching_keys[0]]["href"]) - else: - raise AddressNotFound( - rf"Please adapt given band parameter ('{band}') to match only one asset: {error_message}" - rf"{len(matching_keys)} assets keys found matching {p}" - ) + raise AddressNotFound( + rf"Please adapt given band parameter ('{band}') to match only one asset: " + rf"{len(matching_keys)} assets keys found matching {p}" + )