Merge branch 'main' into aws-check

nsidc · Feb 19, 2024 · ca7e907 · ca7e907
2 parents aaf884d + 8b00ea3
commit ca7e907
Show file tree

Hide file tree

Showing 23 changed files with 1,492 additions and 1,301 deletions.
diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml
@@ -21,7 +21,7 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - name: Set up Python
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python-version }}
       - name: Get full python version

diff --git a/.github/workflows/static-analysis.yml b/.github/workflows/static-analysis.yml
@@ -9,7 +9,7 @@ jobs:
       - uses: actions/checkout@v4
 
       - name: Install Python
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
           python-version: 3.x
 

diff --git a/.github/workflows/test-mindeps.yml b/.github/workflows/test-mindeps.yml
@@ -26,7 +26,7 @@ jobs:
       - name: Checkout source
         uses: actions/[email protected]
       - name: Setup Conda Environment
-        uses: conda-incubator/setup-miniconda@v2.2.0
+        uses: conda-incubator/setup-miniconda@v3.0.1
         with:
           miniforge-variant: Mambaforge
           miniforge-version: latest
@@ -46,4 +46,4 @@ jobs:
         run: bash scripts/test.sh
 
       - name: Upload coverage
-        uses: codecov/codecov-action@v1
+        uses: codecov/codecov-action@v3
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -16,7 +16,7 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - name: Set up Python
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python-version }}
       - name: Get full python version

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,13 @@
 # Changelog
 
+## [Unreleased]
+
+* Bug fixes:
+    * fixed #439 by implementing more trusted domains in the SessionWithRedirection
+    * fixed #438 by using an authenticated session for hits()
+* Enhancements:
+    * addressing #427 by adding parameters to collection query
+
 ## [v0.8.2] 2023-12-06
 * Bug fixes:
     * Enable AWS check with IMDSv2
@@ -167,7 +175,7 @@
 - Add basic classes to interact with NASA CMR, EDL and cloud access.
 - Basic object formatting.
 
-[Unreleased]: https://github.com/nsidc/earthaccess/compare/v0.5.2...HEAD
+[Unreleased]: https://github.com/nsidc/earthaccess/compare/v0.8.2...HEAD
 [v0.5.2]: https://github.com/nsidc/earthaccess/releases/tag/v0.5.2
 [v0.5.1]: https://github.com/nsidc/earthaccess/releases/tag/v0.5.1
 [v0.5.0]: https://github.com/nsidc/earthaccess/releases/tag/v0.4.0

diff --git a/README.md b/README.md
@@ -65,8 +65,6 @@ With *earthaccess* we can login, search and download data with a few lines of co
 
 The only requirement to use this library is to open a free account with NASA [EDL](https://urs.earthdata.nasa.gov).
 
-<a href="https://urs.earthdata.nasa.gov"><img src="https://auth.ops.maap-project.org/cas/images/urs-logo.png" /></a>
-
 
 ### **Authentication**
 
@@ -99,7 +97,7 @@ Once you are authenticated with NASA EDL you can:
 ### **Searching for data**
 
 Once we have selected our dataset we can search for the data granules using *doi*, *short_name* or *concept_id*.
-If we are not sure or we don't know how to search for a particular dataset, we can start with the ["Introducing NASA earthaccess"](https://nsidc.github.io/earthaccess/tutorials/demo/#querying-for-datasets) tutorial or through the [NASA Earthdata Search portal](https://search.earthdata.nasa.gov/). For a complete list of search parameters we can use visit the extended [API documentation](https://nsidc.github.io/earthaccess/user-reference/api/api/).
+If we are not sure or we don't know how to search for a particular dataset, we can start with the ["Introducing NASA earthaccess"](https://nsidc.github.io/earthaccess/tutorials/demo/#querying-for-datasets) tutorial or through the [NASA Earthdata Search portal](https://search.earthdata.nasa.gov/). For a complete list of search parameters we can use visit the extended [API documentation](https://earthaccess.readthedocs.io/en/latest/user-reference/api/api/).
 
 ```python
 

diff --git a/binder/environment-dev.yml b/binder/environment-dev.yml
@@ -4,12 +4,22 @@ channels:
 dependencies:
   # This environment bootstraps poetry, the actual dev environment
   # is installed and managed with poetry
-  - python=3.9
+  - python=3.10
   - jupyterlab=3
   - xarray>=0.19
   - ipyleaflet>=0.13
   - h5netcdf>=0.11
   - cartopy
+
+  - mkdocs>=1.2
+  - mkdocs-material>=7.1,<9.0
+  - markdown-include>=0.6
+  - mkdocstrings>=0.19.0
+  - mkdocstrings-python
+  - mkdocs-jupyter>=0.19.0
+  - pymdown-extensions>=9.2
+
   - pip
   - pip:
       - poetry
+      - markdown-callouts>=0.2.0
diff --git a/earthaccess/api.py b/earthaccess/api.py
@@ -13,8 +13,8 @@
 from .utils import _validation as validate
 
 
-def _normalize_location(location: Union[str, None]) -> Union[str, None]:
-    """Handle user-provided `daac` and `provider` values
+def _normalize_location(location: Optional[str]) -> Optional[str]:
+    """Handle user-provided `daac` and `provider` values.
 
     These values must have a capital letter as the first character
     followed by capital letters, numbers, or an underscore. Here we
@@ -31,32 +31,29 @@ def _normalize_location(location: Union[str, None]) -> Union[str, None]:
 def search_datasets(
     count: int = -1, **kwargs: Any
 ) -> List[earthaccess.results.DataCollection]:
-    """Search datasets using NASA's CMR
+    """Search datasets using NASA's CMR.
 
     [https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html](https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html)
 
     Parameters:
+        count: Number of records to get, -1 = all
+        kwargs (Dict):
+            arguments to CMR:
 
-        count (Integer): Number of records to get, -1 = all
-        kwargs (Dict): arguments to CMR:
-
-            * **keyword**: case insensitive and support wild cards ? and *,
-
+            * **keyword**: case-insensitive and supports wildcards ? and *
             * **short_name**: e.g. ATL08
-
             * **doi**: DOI for a dataset
-
             * **daac**: e.g. NSIDC or PODAAC
-
             * **provider**: particular to each DAAC, e.g. POCLOUD, LPDAAC etc.
+            * **temporal**: a tuple representing temporal bounds in the form
+              `("yyyy-mm-dd", "yyyy-mm-dd")`
+            * **bounding_box**: a tuple representing spatial bounds in the form
+              `(lower_left_lon, lower_left_lat, upper_right_lon, upper_right_lat)`
 
-            * **temporal**: ("yyyy-mm-dd", "yyyy-mm-dd")
-
-            * **bounding_box**: (lower_left_lon, lower_left_lat ,
-                               upper_right_lon, upper_right_lat)
     Returns:
-        an list of DataCollection results that can be used to get
-        information such as concept_id, doi, etc. about a dataset.
+        A list of DataCollection results that can be used to get information about a
+            dataset, e.g. concept_id, doi, etc.
+
     Examples:
         ```python
         datasets = earthaccess.search_datasets(
@@ -89,27 +86,24 @@ def search_data(
     [https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html](https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html)
 
     Parameters:
+        count: Number of records to get, -1 = all
+        kwargs (Dict):
+            arguments to CMR:
 
-        count (Integer): Number of records to get, -1 = all
-        kwargs (Dict): arguments to CMR:
-
-            * **short_name**: dataset short name e.g. ATL08
-
+            * **short_name**: dataset short name, e.g. ATL08
             * **version**: dataset version
-
             * **doi**: DOI for a dataset
-
             * **daac**: e.g. NSIDC or PODAAC
-
             * **provider**: particular to each DAAC, e.g. POCLOUD, LPDAAC etc.
+            * **temporal**: a tuple representing temporal bounds in the form
+              `("yyyy-mm-dd", "yyyy-mm-dd")`
+            * **bounding_box**: a tuple representing spatial bounds in the form
+              `(lower_left_lon, lower_left_lat, upper_right_lon, upper_right_lat)`
 
-            * **temporal**: ("yyyy-mm-dd", "yyyy-mm-dd")
-
-            * **bounding_box**: (lower_left_lon, lower_left_lat ,
-                               upper_right_lon, upper_right_lat)
     Returns:
-        Granules: a list of DataGranules that can be used to access
-          the granule files by using `download()` or `open()`.
+        a list of DataGranules that can be used to access the granule files by using
+            `download()` or `open()`.
+
     Examples:
         ```python
         datasets = earthaccess.search_data(
@@ -131,22 +125,20 @@ def search_data(
 
 
 def login(strategy: str = "all", persist: bool = False) -> Auth:
-    """Authenticate with Earthdata login (https://urs.earthdata.nasa.gov/)
+    """Authenticate with Earthdata login (https://urs.earthdata.nasa.gov/).
 
     Parameters:
+        strategy:
+            An authentication method.
 
-        strategy (String): authentication method.
-
-                "all": (default) try all methods until one works
+            * **"all"**: (default) try all methods until one works
+            * **"interactive"**: enter username and password.
+            * **"netrc"**: retrieve username and password from ~/.netrc.
+            * **"environment"**: retrieve username and password from `$EARTHDATA_USERNAME` and `$EARTHDATA_PASSWORD`.
+        persist: will persist credentials in a .netrc file
 
-                "interactive": enter username and password.
-
-                "netrc": retrieve username and password from ~/.netrc.
-
-                "environment": retrieve username and password from $EARTHDATA_USERNAME and $EARTHDATA_PASSWORD.
-        persist (Boolean): will persist credentials in a .netrc file
     Returns:
-        an instance of Auth.
+        An instance of Auth.
     """
     if strategy == "all":
         for strategy in ["environment", "netrc", "interactive"]:
@@ -168,19 +160,20 @@ def login(strategy: str = "all", persist: bool = False) -> Auth:
 
 def download(
     granules: Union[DataGranule, List[DataGranule], str, List[str]],
-    local_path: Union[str, None],
+    local_path: Optional[str],
     provider: Optional[str] = None,
     threads: int = 8,
 ) -> List[str]:
     """Retrieves data granules from a remote storage system.
 
-       * If we run this in the cloud, we will be using S3 to move data to `local_path`
-       * If we run it outside AWS (us-west-2 region) and the dataset is cloud hostes we'll use HTTP links
+       * If we run this in the cloud, we will be using S3 to move data to `local_path`.
+       * If we run it outside AWS (us-west-2 region) and the dataset is cloud hosted,
+            we'll use HTTP links.
 
     Parameters:
         granules: a granule, list of granules, a granule link (HTTP), or a list of granule links (HTTP)
         local_path: local directory to store the remote data granules
-        provider: if we download a list of URLs we need to specify the provider.
+        provider: if we download a list of URLs, we need to specify the provider.
         threads: parallel number of threads to use to download the files, adjust as necessary, default = 8
 
     Returns:
@@ -208,8 +201,10 @@ def open(
     hosted on S3 or HTTPS by third party libraries like xarray.
 
     Parameters:
-        granules: a list of granule instances **or** list of URLs, e.g. s3://some-granule,
-        if a list of URLs is passed we need to specify the data provider e.g. POCLOUD, NSIDC_CPRD etc.
+        granules: a list of granule instances **or** list of URLs, e.g. `s3://some-granule`.
+            If a list of URLs is passed, we need to specify the data provider.
+        provider: e.g. POCLOUD, NSIDC_CPRD, etc.
+
     Returns:
         a list of s3fs "file pointers" to s3 files.
     """
@@ -223,15 +218,16 @@ def get_s3_credentials(
     provider: Optional[str] = None,
     results: Optional[List[earthaccess.results.DataGranule]] = None,
 ) -> Dict[str, Any]:
-    """Returns temporary (1 hour) credentials for direct access to NASA S3 buckets, we can
-    use the daac name, the provider or a list of results from earthaccess.search_data()
-    if we use results earthaccess will use the metadata on the response to get the credentials,
-    this is useful for missions that do not use the same endpoint as their DAACs e.g. SWOT
+    """Returns temporary (1 hour) credentials for direct access to NASA S3 buckets. We can
+    use the daac name, the provider, or a list of results from earthaccess.search_data().
+    If we use results, earthaccess will use the metadata on the response to get the credentials,
+    which is useful for missions that do not use the same endpoint as their DAACs, e.g. SWOT.
 
     Parameters:
-        daac (String): a DAAC short_name like NSIDC or PODAAC etc
-        provider (String: if we know the provider for the DAAC e.g. POCLOUD, LPCLOUD etc.
-        results (list[earthaccess.results.DataGranule]): List of results from search_data()
+        daac: a DAAC short_name like NSIDC or PODAAC, etc.
+        provider: if we know the provider for the DAAC, e.g. POCLOUD, LPCLOUD etc.
+        results: List of results from search_data()
+
     Returns:
         a dictionary with S3 credentials for the DAAC or provider
     """
@@ -244,12 +240,10 @@ def get_s3_credentials(
 
 
 def collection_query() -> Type[CollectionQuery]:
-    """Returns a query builder instance for NASA collections (datasets)
+    """Returns a query builder instance for NASA collections (datasets).
 
-    Parameters:
-        cloud_hosted (Boolean): initializes the query builder for cloud hosted collections.
     Returns:
-        class earthaccess.DataCollections: a query builder instance for data collections.
+        a query builder instance for data collections.
     """
     if earthaccess.__auth__.authenticated:
         query_builder = DataCollections(earthaccess.__auth__)
@@ -261,11 +255,8 @@ class earthaccess.DataCollections: a query builder instance for data collections
 def granule_query() -> Type[GranuleQuery]:
     """Returns a query builder instance for data granules
 
-    Parameters:
-        cloud_hosted (Boolean): initializes the query builder for a particular DOI
-        if we have it.
     Returns:
-        class earthaccess.DataGranules: a query builder instance for data granules.
+        a query builder instance for data granules.
     """
     if earthaccess.__auth__.authenticated:
         query_builder = DataGranules(earthaccess.__auth__)
@@ -275,10 +266,10 @@ class earthaccess.DataGranules: a query builder instance for data granules.
 
 
 def get_fsspec_https_session() -> AbstractFileSystem:
-    """Returns a fsspec session that can be used to access datafiles across many different DAACs
+    """Returns a fsspec session that can be used to access datafiles across many different DAACs.
 
     Returns:
-        class AbstractFileSystem: an fsspec instance able to access data across DAACs
+        An fsspec instance able to access data across DAACs.
 
     Examples:
         ```python
@@ -289,19 +280,18 @@ class AbstractFileSystem: an fsspec instance able to access data across DAACs
         with fs.open(DAAC_GRANULE) as f:
             f.read(10)
         ```
-
     """
     session = earthaccess.__store__.get_fsspec_session()
     return session
 
 
 def get_requests_https_session() -> requests.Session:
-    """Returns a requests Session instance with an authorized bearer token
-    this is useful to make requests to restricted URLs like data granules or services that
+    """Returns a requests Session instance with an authorized bearer token.
+    This is useful for making requests to restricted URLs, such as data granules or services that
     require authentication with NASA EDL.
 
     Returns:
-        class requests.Session: an authenticated requests Session instance.
+        An authenticated requests Session instance.
 
     Examples:
         ```python
@@ -323,15 +313,17 @@ def get_s3fs_session(
     provider: Optional[str] = None,
     results: Optional[earthaccess.results.DataGranule] = None,
 ) -> s3fs.S3FileSystem:
-    """Returns a fsspec s3fs file session for direct access when we are in us-west-2
+    """Returns a fsspec s3fs file session for direct access when we are in us-west-2.
 
     Parameters:
-        daac (String): Any DAAC short name e.g. NSIDC, GES_DISC
-        provider (String): Each DAAC can have a cloud provider, if the DAAC is specified, there is no need to use provider
-        results (list[class earthaccess.results.DataGranule]): A list of results from search_data(), earthaccess will use the metadata form CMR to obtain the S3 Endpoint
+        daac: Any DAAC short name e.g. NSIDC, GES_DISC
+        provider: Each DAAC can have a cloud provider.
+            If the DAAC is specified, there is no need to use provider.
+        results: A list of results from search_data().
+            `earthaccess` will use the metadata from CMR to obtain the S3 Endpoint.
 
     Returns:
-        class s3fs.S3FileSystem: an authenticated s3fs session valid for 1 hour
+        An authenticated s3fs session valid for 1 hour.
     """
     daac = _normalize_location(daac)
     provider = _normalize_location(provider)
@@ -345,11 +337,10 @@ class s3fs.S3FileSystem: an authenticated s3fs session valid for 1 hour
 
 
 def get_edl_token() -> str:
-    """Returns the current token used for EDL
+    """Returns the current token used for EDL.
 
     Returns:
-        str: EDL token
-
+        EDL token
     """
     token = earthaccess.__auth__.token
     return token