Skip to content

Commit

Permalink
refactor(geopandas io): move geopandas utils, add _gis_enabled featur…
Browse files Browse the repository at this point in the history
…e flag to fix gpd import
  • Loading branch information
sqr00t committed Jan 4, 2024
1 parent 3deacdc commit bffb60f
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 43 deletions.
67 changes: 24 additions & 43 deletions nesta_ds_utils/loading_saving/S3.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import boto3
from fnmatch import fnmatch
import pandas as pd
import geopandas as gpd
import numpy as np
import pyarrow as pa
import pyarrow.parquet as pq
Expand All @@ -12,6 +11,14 @@
import warnings
from nesta_ds_utils.loading_saving import file_ops

from nesta_ds_utils.loading_saving.gis_interface import _gis_enabled

if _gis_enabled:
from nesta_ds_utils.loading_saving.gis_interface import (
_gdf_to_fileobj,
_fileobj_to_gdf,
)


def get_bucket_filenames_s3(bucket_name: str, dir_name: str = "") -> List[str]:
"""Get a list of all files in bucket directory.
Expand Down Expand Up @@ -58,27 +65,6 @@ def _df_to_fileobj(df_data: pd.DataFrame, path_to: str, **kwargs) -> io.BytesIO:
return buffer


def _gdf_to_fileobj(df_data: gpd.GeoDataFrame, path_to: str, **kwargs) -> io.BytesIO:
"""Convert GeoDataFrame into bytes file object.
Args:
df_data (gpd.DataFrame): Dataframe to convert.
path_to (str): Saving file name.
Returns:
io.BytesIO: Bytes file object.
"""
buffer = io.BytesIO()
if fnmatch(path_to, "*.geojson"):
df_data.to_file(buffer, driver="GeoJSON", **kwargs)
else:
raise NotImplementedError(
"Uploading geodataframe currently supported only for 'geojson'."
)
buffer.seek(0)
return buffer


def _dict_to_fileobj(dict_data: dict, path_to: str, **kwargs) -> io.BytesIO:
"""Convert dictionary into bytes file object.
Expand Down Expand Up @@ -229,10 +215,16 @@ def upload_obj(
kwargs_writing (dict, optional): Dictionary of kwargs for writing data.
"""
if isinstance(obj, gpd.base.GeoPandasBase):
obj = _gdf_to_fileobj(obj, path_to, **kwargs_writing)
elif isinstance(obj, pd.DataFrame):
obj = _df_to_fileobj(obj, path_to, **kwargs_writing)
if isinstance(obj, pd.DataFrame):
if type(obj).__name__ == "GeoDataFrame":
if _gis_enabled:
obj = _gdf_to_fileobj(obj, path_to, **kwargs_writing)
else:
raise ModuleNotFoundError(
"Please install 'gis' extra from nesta_ds_utils or 'geopandas' to upload geodataframes."
)
else:
obj = _df_to_fileobj(obj, path_to, **kwargs_writing)
elif isinstance(obj, dict):
obj = _dict_to_fileobj(obj, path_to, **kwargs_writing)
elif isinstance(obj, list):
Expand Down Expand Up @@ -272,22 +264,6 @@ def _fileobj_to_df(fileobj: io.BytesIO, path_from: str, **kwargs) -> pd.DataFram
return pd.read_excel(fileobj, **kwargs)


def _fileobj_to_gdf(fileobj: io.BytesIO, path_from: str, **kwargs) -> pd.DataFrame:
"""Convert bytes file object into geodataframe.
Args:
fileobj (io.BytesIO): Bytes file object.
path_from (str): Path of loaded data.
Returns:
gpd.DataFrame: Data as geodataframe.
"""
if fnmatch(path_from, "*.geojson"):
return gpd.GeoDataFrame.from_features(
json.loads(fileobj.getvalue().decode())["features"]
)


def _fileobj_to_dict(fileobj: io.BytesIO, path_from: str, **kwargs) -> dict:
"""Convert bytes file object into dictionary.
Expand Down Expand Up @@ -399,7 +375,12 @@ def download_obj(
)
elif download_as == "geodf":
if path_from.endswith(tuple([".geojson"])):
return _fileobj_to_gdf(fileobj, path_from, **kwargs_reading)
if _gis_enabled:
return _fileobj_to_gdf(fileobj, path_from, **kwargs_reading)
else:
raise ModuleNotFoundError(
"Please install 'gis' extra from nesta_ds_utils or 'geopandas' to download geodataframes."
)
else:
raise NotImplementedError(
"Download as geodataframe currently supported only " "for 'geojson'."
Expand Down
45 changes: 45 additions & 0 deletions nesta_ds_utils/loading_saving/gis_interface.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
try:
from json import loads as load_json
from geopandas import GeoDataFrame
from io import BytesIO
from fnmatch import fnmatch

_gis_enabled = True

def _gdf_to_fileobj(df_data: GeoDataFrame, path_to: str, **kwargs) -> BytesIO:
"""Convert GeoDataFrame into bytes file object.
Args:
df_data (gpd.DataFrame): Dataframe to convert.
path_to (str): Saving file name.
Returns:
io.BytesIO: Bytes file object.
"""
buffer = BytesIO()
if fnmatch(path_to, "*.geojson"):
df_data.to_file(buffer, driver="GeoJSON", **kwargs)
else:
raise NotImplementedError(
"Uploading geodataframe currently supported only for 'geojson'."
)
buffer.seek(0)
return buffer

def _fileobj_to_gdf(fileobj: BytesIO, path_from: str, **kwargs) -> GeoDataFrame:
"""Convert bytes file object into geodataframe.
Args:
fileobj (io.BytesIO): Bytes file object.
path_from (str): Path of loaded data.
Returns:
gpd.DataFrame: Data as geodataframe.
"""
if fnmatch(path_from, "*.geojson"):
return GeoDataFrame.from_features(
load_json(fileobj.getvalue().decode())["features"]
)

except ImportError:
_gis_enabled = False

0 comments on commit bffb60f

Please sign in to comment.