-
Notifications
You must be signed in to change notification settings - Fork 86
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Use EarthEngine API for fetching data #107
Changes from all commits
da41b32
8267d0b
5dc5951
3f25aa7
ceddaa1
82898b1
1eee323
2209ac2
8791e7d
dc22d9a
95dfe7f
435188e
ddcb43f
c4fea32
f73714d
d1bfb9c
24c3bca
8fb5255
99721fc
b4a8c4b
c0e248b
8245784
a672693
5a2b0ce
1a609fc
1cf502a
b2bafb0
6a9917c
a0b9d0e
4fa8e40
d6bf537
c05cb5f
51b6234
f094c41
0a42ff3
4ba303c
851e2a3
7cecb09
f4d76ba
070249e
05783cc
b033c27
f6e90c0
b605b74
508291c
230c612
5f7a06c
ff9d447
1395cc9
b588a8b
8bfdae8
fddf213
b120d3a
d0ed10e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,8 @@ on: | |
branches: [ main ] | ||
pull_request: | ||
branches: [ main ] | ||
paths: | ||
- 'buildings-example/**' | ||
|
||
jobs: | ||
test: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,8 @@ on: | |
branches: [ main ] | ||
pull_request: | ||
branches: [ main ] | ||
paths: | ||
- 'crop-mask-example/**' | ||
|
||
jobs: | ||
test: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,8 @@ on: | |
branches: [ main ] | ||
pull_request: | ||
branches: [ main ] | ||
paths: | ||
- 'forest-example/**' | ||
|
||
jobs: | ||
test: | ||
|
@@ -18,19 +20,35 @@ jobs: | |
steps: | ||
- name: Clone repo | ||
uses: actions/checkout@v2 | ||
with: | ||
ref: ${{ github.event.pull_request.head.ref }} | ||
- name: Set up python | ||
uses: actions/setup-python@v2 | ||
uses: actions/setup-python@v4 | ||
with: | ||
python-version: 3.8 | ||
- name: Install dependencies | ||
run: pip install -r requirements.txt | ||
- run: pip install -r requirements.txt | ||
|
||
- name: dvc pull data | ||
- uses: google-github-actions/auth@v0 | ||
with: | ||
credentials_json: ${{ secrets.GCP_SA_KEY }} | ||
- name: Run data pipeline | ||
env: | ||
# https://dvc.org/doc/user-guide/setup-google-drive-remote#authorization | ||
GDRIVE_CREDENTIALS_DATA: ${{ secrets.GDRIVE_CREDENTIALS_DATA }} | ||
run: dvc pull -f | ||
|
||
GCP_SA_KEY: ${{ secrets.GCP_SA_KEY }} | ||
run: | | ||
dvc pull -f | ||
openmapflow create-datasets --non-interactive | ||
dvc commit -f | ||
dvc push | ||
- name: Push automated dataset updates | ||
run: | | ||
git config --global user.name 'Dataset bot' | ||
git config --global user.email '[email protected]' | ||
git pull | ||
git add data | ||
git commit -m "Automated dataset updates" || echo "No updates to commit" | ||
git push | ||
- name: Integration test - Project | ||
run: | | ||
openmapflow cp templates/integration_test_project.py . | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,8 @@ on: | |
branches: [ main ] | ||
pull_request: | ||
branches: [ main ] | ||
paths: | ||
- 'maize-example/**' | ||
|
||
jobs: | ||
test: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
outs: | ||
- md5: 718c5017dec70570f87d1ca1941db208.dir | ||
size: 5789085 | ||
- md5: 3d8ac3ef8c473bb3445b9b67a0fdbc33.dir | ||
size: 5403436 | ||
nfiles: 1 | ||
path: datasets |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,5 @@ | ||
import json | ||
import os | ||
import warnings | ||
from datetime import date, timedelta | ||
from typing import Dict, List, Optional, Union | ||
|
@@ -104,6 +106,69 @@ def ee_safe_str(s: str): | |
return s.replace(".", "-").replace("=", "-").replace("/", "-")[:100] | ||
|
||
|
||
def create_ee_image( | ||
polygon: "ee.Geometry.Polygon", | ||
start_date: date, | ||
end_date: date, | ||
days_per_timestep: int = DAYS_PER_TIMESTEP, | ||
): | ||
image_collection_list: List[ee.Image] = [] | ||
cur_date = start_date | ||
cur_end_date = cur_date + timedelta(days=days_per_timestep) | ||
|
||
# first, we get all the S1 images in an exaggerated date range | ||
vv_imcol, vh_imcol = get_s1_image_collection( | ||
polygon, start_date - timedelta(days=31), end_date + timedelta(days=31) | ||
) | ||
|
||
while cur_end_date <= end_date: | ||
image_list: List[ee.Image] = [] | ||
|
||
# first, the S1 image which gets the entire s1 collection | ||
image_list.append( | ||
get_single_s1_image( | ||
region=polygon, | ||
start_date=cur_date, | ||
end_date=cur_end_date, | ||
vv_imcol=vv_imcol, | ||
vh_imcol=vh_imcol, | ||
) | ||
) | ||
for image_function in DYNAMIC_IMAGE_FUNCTIONS: | ||
image_list.append( | ||
image_function( | ||
region=polygon, start_date=cur_date, end_date=cur_end_date | ||
) | ||
) | ||
image_collection_list.append(ee.Image.cat(image_list)) | ||
|
||
cur_date += timedelta(days=days_per_timestep) | ||
cur_end_date += timedelta(days=days_per_timestep) | ||
|
||
# now, we want to take our image collection and append the bands into a single image | ||
imcoll = ee.ImageCollection(image_collection_list) | ||
combine_bands_function = make_combine_bands_function(DYNAMIC_BANDS) | ||
img = ee.Image(imcoll.iterate(combine_bands_function)) | ||
|
||
# finally, we add the SRTM image seperately since its static in time | ||
total_image_list: List[ee.Image] = [img] | ||
for static_image_function in STATIC_IMAGE_FUNCTIONS: | ||
total_image_list.append(static_image_function(region=polygon)) | ||
|
||
return ee.Image.cat(total_image_list) | ||
|
||
|
||
def get_ee_credentials(): | ||
gcp_sa_key = os.environ.get("GCP_SA_KEY") | ||
if gcp_sa_key is not None: | ||
gcp_sa_email = json.loads(gcp_sa_key)["client_email"] | ||
print(f"Logging into EarthEngine with {gcp_sa_email}") | ||
return ee.ServiceAccountCredentials(gcp_sa_email, key_data=gcp_sa_key) | ||
else: | ||
print("Logging into EarthEngine with default credentials") | ||
return "persistent" | ||
|
||
|
||
class EarthEngineExporter: | ||
""" | ||
Export satellite data from Earth engine. It's called using the following | ||
|
@@ -121,24 +186,10 @@ class EarthEngineExporter: | |
""" | ||
|
||
def __init__( | ||
self, | ||
dest_bucket: str, | ||
check_ee: bool = False, | ||
check_gcp: bool = False, | ||
credentials: Optional[str] = None, | ||
days_per_timestep: int = DAYS_PER_TIMESTEP, | ||
self, dest_bucket: str, check_ee: bool = False, check_gcp: bool = False | ||
) -> None: | ||
self.dest_bucket = dest_bucket | ||
self.days_per_timestep = days_per_timestep | ||
try: | ||
if credentials: | ||
ee.Initialize(credentials=credentials) | ||
else: | ||
ee.Initialize() | ||
except Exception: | ||
print( | ||
"This code may not work if you have not authenticated your earthengine account" | ||
) | ||
ee.Initialize(get_ee_credentials()) | ||
self.check_ee = check_ee | ||
self.ee_task_list = get_ee_task_list() if self.check_ee else [] | ||
self.check_gcp = check_gcp | ||
|
@@ -172,50 +223,7 @@ def _export_for_polygon( | |
if len(self.ee_task_list) >= 3000: | ||
return False | ||
|
||
image_collection_list: List[ee.Image] = [] | ||
cur_date = start_date | ||
cur_end_date = cur_date + timedelta(days=self.days_per_timestep) | ||
|
||
# first, we get all the S1 images in an exaggerated date range | ||
vv_imcol, vh_imcol = get_s1_image_collection( | ||
polygon, start_date - timedelta(days=31), end_date + timedelta(days=31) | ||
) | ||
|
||
while cur_end_date <= end_date: | ||
image_list: List[ee.Image] = [] | ||
|
||
# first, the S1 image which gets the entire s1 collection | ||
image_list.append( | ||
get_single_s1_image( | ||
region=polygon, | ||
start_date=cur_date, | ||
end_date=cur_end_date, | ||
vv_imcol=vv_imcol, | ||
vh_imcol=vh_imcol, | ||
) | ||
) | ||
for image_function in DYNAMIC_IMAGE_FUNCTIONS: | ||
image_list.append( | ||
image_function( | ||
region=polygon, start_date=cur_date, end_date=cur_end_date | ||
) | ||
) | ||
image_collection_list.append(ee.Image.cat(image_list)) | ||
|
||
cur_date += timedelta(days=self.days_per_timestep) | ||
cur_end_date += timedelta(days=self.days_per_timestep) | ||
|
||
# now, we want to take our image collection and append the bands into a single image | ||
imcoll = ee.ImageCollection(image_collection_list) | ||
combine_bands_function = make_combine_bands_function(DYNAMIC_BANDS) | ||
img = ee.Image(imcoll.iterate(combine_bands_function)) | ||
|
||
# finally, we add the SRTM image seperately since its static in time | ||
total_image_list: List[ee.Image] = [img] | ||
for static_image_function in STATIC_IMAGE_FUNCTIONS: | ||
total_image_list.append(static_image_function(region=polygon)) | ||
|
||
img = ee.Image.cat(total_image_list) | ||
img = create_ee_image(polygon, start_date, end_date) | ||
|
||
# and finally, export the image | ||
if not test: | ||
|
@@ -281,6 +289,9 @@ def export_for_labels( | |
for expected_column in [START, END, LAT, LON]: | ||
assert expected_column in labels | ||
|
||
labels[START] = pd.to_datetime(labels[START]).dt.date | ||
labels[END] = pd.to_datetime(labels[END]).dt.date | ||
|
||
exports_started = 0 | ||
print(f"Exporting {len(labels)} labels: ") | ||
|
||
|
@@ -306,3 +317,33 @@ def export_for_labels( | |
): | ||
print(f"Started {exports_started} exports. Ending export") | ||
return None | ||
|
||
|
||
class EarthEngineAPI: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this class necessary for now? It seems like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It might make sense to move There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I lean towards keeping it as is:
|
||
""" | ||
Fetch satellite data from Earth engine by URL. | ||
:param credentials: The credentials to use for the export. If not specified, | ||
the default credentials will be used | ||
""" | ||
|
||
def __init__(self) -> None: | ||
ee.Initialize( | ||
get_ee_credentials(), | ||
opt_url="https://earthengine-highvolume.googleapis.com", | ||
) | ||
|
||
def get_ee_url(self, lat, lon, start_date, end_date): | ||
ee_bbox = EEBoundingBox.from_centre( | ||
mid_lat=lat, | ||
mid_lon=lon, | ||
surrounding_metres=80, | ||
).to_ee_polygon() | ||
img = create_ee_image(ee_bbox, start_date, end_date) | ||
return img.getDownloadURL( | ||
{ | ||
"region": ee_bbox, | ||
"scale": 10, | ||
"filePerBand": False, | ||
"format": "GEO_TIFF", | ||
} | ||
) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Curious as to why?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Because there's way too many points, so this was a way to test a few
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
the change here is to the
random_state
- not super important, just curious why you changed itThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh! I think this was to force new points to be exported rather than the 1000 that already existed in Google Cloud Storage when I initially tested