Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

NLCD2016 Tree Canopy #1243

Draft
wants to merge 11 commits into
base: main
Choose a base branch
from
2 changes: 2 additions & 0 deletions torchgeo/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@
from .nasa_marine_debris import NASAMarineDebris
from .nccm import NCCM
from .nlcd import NLCD
from .nlcd_tree_canopy_cover import NLCDTreeCanopyCover
from .openbuildings import OpenBuildings
from .oscd import OSCD
from .pastis import PASTIS
Expand Down Expand Up @@ -203,6 +204,7 @@
"MapInWild",
"MillionAID",
"NASAMarineDebris",
"NLCDTreeCanopyCover",
"OSCD",
"PASTIS",
"PatternNet",
Expand Down
254 changes: 254 additions & 0 deletions torchgeo/datasets/nlcd_tree_canopy_cover.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,254 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

"""National Land Cover Database (NLCD) Tree Canopy Cover dataset."""

import os
from typing import Any, Callable, Dict, List, Optional

import matplotlib.pyplot as plt
from rasterio.crs import CRS

from .geo import RasterDataset
from .utils import download_url, extract_archive


class NLCDTreeCanopyCover(RasterDataset):
"""Abstract base class for all NLCD Tree Canopy Cover datasets.

The `Multi-Resolution Land Characteristics (MRLC)
Consortium <https://www.mrlc.gov/>`_ provides 30m tree canopy cover raster datasets
generated by the United States Forest Service (USFS). The data covers the
conterminous United States, coastal Alaska, Hawaii, and Puerto Rico and consists
of continuous percent tree canopy estimates for each pixel across all land covers.
The data is derived from multi-spectral Landsat imagery and other available ground
and ancillary information. The datasets can be manually downloaded from the
following `link
<https://www.mrlc.gov/data/nlcd-2016-usfs-tree-canopy-cover-conus>`_.

If you use this dataset in your research, please cite it using the following paper:

* https://doi.org/10.14358/PERS.78.7.715

.. versionadded:: 0.5
"""

is_image = False
filename_glob = "nlcd_*tree_canopy*.img"
filename_regex = r"""
(?P<name>[nlcd]{4})
_(?P<date>\d{4})*
"""
date_format = "%Y"

urls = {
2011: [
"https://s3-us-west-2.amazonaws.com/mrlc/nlcd_2011_treecanopy_2019_08_31.zip", # noqa: E501
"https://s3-us-west-2.amazonaws.com/mrlc/nlcd_tcc_coastalAK_2011.zip",
"https://s3-us-west-2.amazonaws.com/mrlc/nlcd_tcc_hi_2011.zip",
"https://s3-us-west-2.amazonaws.com/mrlc/nlcd_tcc_prusvi_2011.zip",
],
2016: [
"https://s3-us-west-2.amazonaws.com/mrlc/nlcd_2016_treecanopy_2019_08_31.zip", # noqa: E501
"https://s3-us-west-2.amazonaws.com/mrlc/nlcd_tcc_coastalAK_2016.zip",
"https://s3-us-west-2.amazonaws.com/mrlc/nlcd_tcc_hi_2016.zip",
"https://s3-us-west-2.amazonaws.com/mrlc/nlcd_tcc_prusvi_2016.zip",
],
}
filenames = {
2011: [
"nlcd_2011_treecanopy_2019_08_31.img",
"nlcd_2011_coastal_alaska_treecanopy.img",
"nlcd_2011_hi_treecanopy_20191018.img",
"nlcd_2011_prusvi_treecanopy_20191017.img",
],
2016: [
"nlcd_2016_treecanopy_2019_08_31.zip",
"nlcd_tcc_coastalAK_2016.zip",
"nlcd_tcc_hi_2016.zip",
"nlcd_tcc_prusvi_2016.zip",
],
}
archives = {
2011: [
"nlcd_2011_treecanopy_2019_08_31.zip",
"nlcd_tcc_coastalAK_2011.zip",
"nlcd_tcc_hi_2011.zip",
"nlcd_tcc_prusvi_2011.zip",
],
2016: [
"nlcd_2016_treecanopy_2019_08_31.zip",
"nlcd_tcc_coastalAK_2016.zip",
"nlcd_tcc_hi_2016.zip",
"nlcd_tcc_prusvi_2016.zip",
],
}
md5s = {
2011: [
"3af2024f6b9889e6cd4927f91ee2939f",
"0bef748400b8a0ef2f8d56dba61d75a0",
"93a74a5607c5c1f4e0d471487c04fada",
"654ba8c25889a85645dc02d7c8d8bdcd",
],
2016: [
"0d349626ea6b84b27589fe55ad906998",
"78e4738cd3c36775edddae899370699e",
"87e0bcdc0c9a0ee78ed27229ce863caf",
"c83246c76834f740cfd8f0a03f4a2577",
],
}

def __init__(
self,
root: str = "data",
crs: Optional[CRS] = None,
res: Optional[float] = None,
years: List[int] = [2016],
transforms: Optional[Callable[[Dict[str, Any]], Dict[str, Any]]] = None,
cache: bool = True,
download: bool = False,
checksum: bool = False,
) -> None:
"""Initialize a new Dataset instance.
Args:
root: root directory where dataset can be found
crs: :term:`coordinate reference system (CRS)` to warp to
(defaults to the CRS of the first file found)
res: resolution of the dataset in units of CRS
(defaults to the resolution of the first file found)
years: list of years for which to use nlcd tcc layer
transforms: a function/transform that takes an input sample
and returns a transformed version
cache: if True, cache file handle to speed up repeated sampling
download: if True, download dataset and store it in the root directory
checksum: if True, check the MD5 after downloading files (may be slow)

Raises:
FileNotFoundError: if no files are found in ``root``
RuntimeError: if ``download=False`` but dataset is missing or checksum fails
AssertionError: if ``year`` is invalid
"""
assert set(years).issubset(
self.md5s.keys()
), f"NLCD TCC data product only exists for the years: {self.md5s.keys()}."
self.years = years
self.root = root
self.download = download
self.checksum = checksum

self._verify()

super().__init__(root, crs, res, transforms=transforms, cache=cache)

def _verify(self) -> None:
"""Verify the integrity of the dataset.
Raises:
RuntimeError: if ``download=False`` but dataset is missing or checksum fails
"""
# Check if the extracted files already exist
exists = []
for year in self.years:
filename_year = self.filename_glob.replace("*", str(year))
dirname_year = filename_year.split(".")[0]
pathname = os.path.join(self.root, dirname_year, filename_year)
if os.path.exists(pathname):
exists.append(True)
else:
exists.append(False)

if all(exists):
return

# Check if the zip files have already been downloaded
exists = []
for year in self.years:
for filename in self.filenames[year]:
path = os.path.join(self.root, filename)
if os.path.exists(path):
exists.append(True)
extract_archive(path)
else:
exists.append(False)

if all(exists):
return

# Check if the user requested to download the dataset
if not self.download:
raise RuntimeError(
f"Dataset not found in `root={self.root}` and `download=False`, "
"either specify a different `root` directory or use `download=True` "
"to automatically download the dataset."
)

# Download the dataset
self._download()
self._extract()

def _download(self) -> None:
"""Download the dataset."""
for year in self.years:
for url, filename, md5 in zip(
self.urls[year], self.filenames[year], self.md5s[year]
):
path = os.path.join(self.root, filename)
if not os.path.exists(path):
download_url(
url=url,
root=self.root,
filename=filename,
md5=md5 if self.checksum else None,
)

def _extract(self) -> None:
"""Extract the dataset."""
for year in self.years:
for filename in self.filenames[year]:
path = os.path.join(self.root, filename)
if os.path.exists(path):
extract_archive(path)

def plot(
self,
sample: Dict[str, Any],
show_titles: bool = True,
suptitle: Optional[str] = None,
) -> plt.Figure:
"""Plot a sample from the dataset.

Args:
sample: a sample returned by :meth:`RasterDataset.__getitem__`
show_titles: flag indicating whether to show titles above each panel
suptitle: optional string to use as a suptitle

Returns:
a matplotlib Figure with the rendered sample
"""
mask = sample["mask"].squeeze().numpy()
ncols = 1

showing_predictions = "prediction" in sample
if showing_predictions:
pred = sample["prediction"].squeeze().numpy()
ncols = 2

fig, axs = plt.subplots(nrows=1, ncols=ncols, figsize=(ncols * 4, 4))

if showing_predictions:
axs[0].imshow(mask)
axs[0].axis("off")
axs[1].imshow(pred)
axs[1].axis("off")
if show_titles:
axs[0].set_title("Mask")
axs[1].set_title("Prediction")
else:
axs.imshow(mask)
axs.axis("off")
if show_titles:
axs.set_title("Mask")

if suptitle is not None:
plt.suptitle(suptitle)

return fig
Loading