diff --git a/common/pom.xml b/common/pom.xml index 2f2a7211e4..265c56739b 100644 --- a/common/pom.xml +++ b/common/pom.xml @@ -118,6 +118,10 @@ ${janino-version} test + + edu.ucar + cdm-core + src/main/java diff --git a/common/src/main/java/org/apache/sedona/common/raster/RasterConstructors.java b/common/src/main/java/org/apache/sedona/common/raster/RasterConstructors.java index 0cc65b5d11..e19df957b9 100644 --- a/common/src/main/java/org/apache/sedona/common/raster/RasterConstructors.java +++ b/common/src/main/java/org/apache/sedona/common/raster/RasterConstructors.java @@ -15,6 +15,7 @@ import org.apache.sedona.common.FunctionsGeoTools; import org.apache.sedona.common.raster.inputstream.ByteArrayImageInputStream; +import org.apache.sedona.common.raster.netcdf.NetCdfReader; import org.apache.sedona.common.utils.RasterUtils; import org.geotools.coverage.grid.GridCoverage2D; import org.geotools.coverage.grid.GridEnvelope2D; @@ -28,6 +29,7 @@ import org.geotools.geometry.jts.JTS; import org.geotools.geometry.jts.ReferencedEnvelope; import org.geotools.process.vector.VectorToRasterProcess; +import org.geotools.referencing.CRS; import org.geotools.referencing.crs.DefaultEngineeringCRS; import org.geotools.referencing.operation.transform.AffineTransform2D; import org.geotools.util.factory.Hints; @@ -38,13 +40,15 @@ import org.opengis.referencing.crs.CoordinateReferenceSystem; import org.opengis.referencing.datum.PixelInCell; import org.opengis.referencing.operation.MathTransform; +import ucar.nc2.NetcdfFile; +import ucar.nc2.NetcdfFiles; import javax.media.jai.RasterFactory; import java.awt.image.WritableRaster; import java.io.IOException; import java.util.ArrayList; -import java.util.Arrays; import java.util.List; +import java.util.Map; public class RasterConstructors { @@ -58,6 +62,25 @@ public static GridCoverage2D fromGeoTiff(byte[] bytes) throws IOException { return geoTiffReader.read(null); } + public static GridCoverage2D fromNetCDF(byte[] bytes, String variableName, String lonDimensionName, String latDimensionName) throws IOException, FactoryException { + NetcdfFile netcdfFile = openNetCdfBytes(bytes); + return NetCdfReader.getRaster(netcdfFile, variableName, latDimensionName, lonDimensionName); + } + + public static GridCoverage2D fromNetCDF(byte[] bytes, String recordVariableName) throws IOException, FactoryException { + NetcdfFile netcdfFile = openNetCdfBytes(bytes); + return NetCdfReader.getRaster(netcdfFile, recordVariableName); + } + + public static String getRecordInfo(byte[] bytes) throws IOException { + NetcdfFile netcdfFile = openNetCdfBytes(bytes); + return NetCdfReader.getRecordInfo(netcdfFile); + } + + private static NetcdfFile openNetCdfBytes(byte[] bytes) throws IOException { + return NetcdfFiles.openInMemory("", bytes); + } + /** * Returns a raster that is converted from the geometry provided. * @param geom The geometry to convert @@ -310,7 +333,31 @@ public static GridCoverage2D makeEmptyRaster(int numBand, String bandDataType, i new GridEnvelope2D(0, 0, widthInPixel, heightInPixel), PixelInCell.CELL_CORNER, transform, crs, null); - return RasterUtils.create(raster, gridGeometry, null); + return RasterUtils.create(raster, gridGeometry, null, null); + } + + public static GridCoverage2D makeNonEmptyRaster(int numBand, int widthInPixel, int heightInPixel, double upperLeftX, double upperLeftY, double scaleX, double scaleY, double skewX, double skewY, int srid, double[][] data, Map> properties, Double noDataValue, PixelInCell anchor) throws FactoryException { + CoordinateReferenceSystem crs; + if (srid == 0) { + crs = DefaultEngineeringCRS.GENERIC_2D; + } else { + // Create the CRS from the srid + // Longitude first, Latitude second + crs = CRS.decode("EPSG:" + srid, true); + } + + // Create a new empty raster + WritableRaster raster = RasterFactory.createBandedRaster(5, widthInPixel, heightInPixel, numBand, null); //create a raster with double values + for (int i = 0; i < numBand; i++) { + raster.setSamples(0, 0, widthInPixel, heightInPixel, i, data[i]); + } + //raster.setPixels(0, 0, widthInPixel, heightInPixel, data); + MathTransform transform = new AffineTransform2D(scaleX, skewY, skewX, scaleY, upperLeftX, upperLeftY); + GridGeometry2D gridGeometry = new GridGeometry2D( + new GridEnvelope2D(0, 0, widthInPixel, heightInPixel), + anchor, + transform, crs, null); + return RasterUtils.create(raster, gridGeometry, null, noDataValue, properties); } public static GridCoverage2D makeNonEmptyRaster(int numBands, String bandDataType, int widthInPixel, int heightInPixel, double upperLeftX, double upperLeftY, double scaleX, double scaleY, double skewX, double skewY, int srid, double[][] rasterValues) { diff --git a/common/src/main/java/org/apache/sedona/common/raster/netcdf/NetCdfConstants.java b/common/src/main/java/org/apache/sedona/common/raster/netcdf/NetCdfConstants.java new file mode 100644 index 0000000000..e05155352c --- /dev/null +++ b/common/src/main/java/org/apache/sedona/common/raster/netcdf/NetCdfConstants.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.sedona.common.raster.netcdf; + +public class NetCdfConstants { + public static final String VALID_MIN = "valid_min"; + public static final String VALID_MAX = "valid_max"; + public static final String MISSING_VALUE = "missing_value"; + public static final String VALID_RANGE = "valid_range"; + public static final String SCALE_FACTOR = "scale_factor"; + public static final String ADD_OFFSET = "add_offset"; + public static final String LONG_NAME = "long_name"; + public static final String UNITS = "units"; + public static final String GLOBAL_ATTR_PREFIX = "GLOBAL_ATTRIBUTES"; + public static final String VAR_ATTR_PREFIX = "VAR_"; + public static final String BAND_ATTR_PREFIX = "BAND_"; + + public static final String INSUFFICIENT_DIMENSIONS_VARIABLE = "Provided variable has less than two dimensions"; + + public static final String NON_NUMERIC_VALUE = "An attribute expected to have numeric values does not have numeric value"; + + public static final String INVALID_LON_NAME = "Provided longitude variable short name is invalid"; + + public static final String INVALID_LAT_NAME = "Provided latitude variable short name is invalid"; + + public static final String INVALID_LAT_SHAPE = "Shape of latitude variable is supposed to be 1"; + + public static final String INVALID_LON_SHAPE = "Shape of longitude variable is supposed to be 1"; + + public static final String INVALID_RECORD_NAME = "Provided record variable short name is invalid"; + + public static final String COORD_VARIABLE_NOT_FOUND = "Corresponding coordinate variable not found for dimension %s"; + + public static final String COORD_IDX_NOT_FOUND = "Given record variable does not contain one of the latitude or longitude dimensions as the participating dimensions"; +} diff --git a/common/src/main/java/org/apache/sedona/common/raster/netcdf/NetCdfReader.java b/common/src/main/java/org/apache/sedona/common/raster/netcdf/NetCdfReader.java new file mode 100644 index 0000000000..ad8f423701 --- /dev/null +++ b/common/src/main/java/org/apache/sedona/common/raster/netcdf/NetCdfReader.java @@ -0,0 +1,376 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.sedona.common.raster.netcdf; + +import com.google.common.collect.ImmutableList; +import org.apache.sedona.common.raster.RasterConstructors; +import org.geotools.coverage.grid.GridCoverage2D; +import org.opengis.referencing.FactoryException; +import org.opengis.referencing.datum.PixelInCell; +import ucar.ma2.Array; +import ucar.nc2.*; + +import java.io.IOException; +import java.util.*; + +public class NetCdfReader { + /* + ==================================== Sedona NetCDF Public APIS ==================================== + */ + + /** + * Return a string depicting information about record variables in the provided netcdf file along with its dimensions + * @param netcdfFile NetCDF file to retrieve info of + * @return a string containing information about all record variables present within the file + */ + public static String getRecordInfo(NetcdfFile netcdfFile) { + ImmutableList recordVariables = getRecordVariables(netcdfFile); + if (Objects.isNull(recordVariables) || recordVariables.isEmpty()) return ""; + int numRecordVariables = recordVariables.size(); + StringBuilder recordInfo = new StringBuilder(); + for (int i = 0; i < numRecordVariables; i++) { + Variable variable = recordVariables.get(i); + recordInfo.append(variable.getNameAndDimensions(false)); //make strict true to get short name + if (i != numRecordVariables - 1) recordInfo.append("\n\n"); + } + return recordInfo.toString(); + } + + /** + * Return a raster containing the record variable data, assuming the last two dimensions of the variable as Y and X respectively + * @param netcdfFile NetCDF file to read + * @param variableShortName short name of the variable to read + * @return GridCoverage2D object + * @throws FactoryException + * @throws IOException + */ + public static GridCoverage2D getRaster(NetcdfFile netcdfFile, String variableShortName) throws FactoryException, IOException { + Variable recordVariable = getRecordVariableFromShortName(netcdfFile, variableShortName); + ImmutableList variableDimensions = recordVariable.getDimensions(); + int numDimensions = variableDimensions.size(); + if (numDimensions < 2) { + throw new IllegalArgumentException(NetCdfConstants.INSUFFICIENT_DIMENSIONS_VARIABLE); + } + //Assume ... Y, X dimension list in the variable + String latDimensionShortName = variableDimensions.get(numDimensions - 2).getShortName(); + String lonDimensionShortName = variableDimensions.get(numDimensions - 1).getShortName(); + + Variable[] coordVariablePair = getCoordVariables(netcdfFile, latDimensionShortName, lonDimensionShortName); + Variable latVariable = coordVariablePair[1]; + Variable lonVariable = coordVariablePair[0]; + return NetCdfReader.getRasterHelper(netcdfFile, recordVariable, lonVariable, latVariable, false); + } + + /** + * + * @param netcdfFile NetCDF file to read + * @param variableShortName short name of the variable to read + * @param latDimShortName short name of the variable that serves as the lat dimension of the record variable + * @param lonDimShortName short name of the variable that serves as the lon dimension of the record variable + * @return GridCoverage2D object + * @throws FactoryException + * @throws IOException + */ + public static GridCoverage2D getRaster(NetcdfFile netcdfFile, String variableShortName, String latDimShortName, String lonDimShortName) throws FactoryException, IOException { + Variable recordVariable = getRecordVariableFromShortName(netcdfFile, variableShortName); + + Variable[] coordVariablePair = getCoordVariables(netcdfFile, latDimShortName, lonDimShortName); + Variable latVariable = coordVariablePair[1]; + Variable lonVariable = coordVariablePair[0]; + return NetCdfReader.getRasterHelper(netcdfFile, recordVariable, lonVariable, latVariable, true); + } + + /** + * Return the properties map of the raster which contains metadata information about a netCDF file + * @param raster GridCoverage2D raster created from a NetCDF file + * @return a map of containing metadata + */ + public static Map getRasterMetadata(GridCoverage2D raster) { + return raster.getProperties(); + } + + /* + ==================================== Sedona NetCDF Helper APIS ==================================== + */ + + private static Variable getRecordVariableFromShortName(NetcdfFile netcdfFile, String variableShortName) { + Variable recordVariable = findVariableRecursive(variableShortName, netcdfFile.getRootGroup()); + ensureRecordVar(recordVariable); + return recordVariable; + } + + private static Variable[] getCoordVariables(NetcdfFile netcdfFile, String latVariableShortName, String lonVariableShortName) { + Group rootGroup = netcdfFile.getRootGroup(); + + //TODO: optimize to get all variables in 1 tree traversal? + Variable lonVariable = findVariableRecursive(lonVariableShortName, rootGroup); + Variable latVariable = findVariableRecursive(latVariableShortName, rootGroup); + ensureCoordVar(lonVariable, latVariable); + return new Variable[]{lonVariable, latVariable}; + } + + private static ImmutableList getRecordVariables(NetcdfFile netcdfFile) { + ImmutableList variables = netcdfFile.getVariables(); + List recordVariables = null; + for (Variable variable: variables) { + ImmutableList variableDimensions = variable.getDimensions(); + if (variableDimensions.size() < 2) continue; //record variables have least 2 dimensions (lat and lon) + if (Objects.isNull(recordVariables)) recordVariables = new ArrayList<>(); + //ASSUMPTION: A record is not restricted to having an unlimited dimension + recordVariables.add(variable); + } + if (Objects.isNull(recordVariables)) return null; + return ImmutableList.copyOf(recordVariables); + } + private static GridCoverage2D getRasterHelper(NetcdfFile netcdfFile, Variable recordVariable, Variable lonVariable, Variable latVariable, boolean findCoordIndices) throws IOException, FactoryException { + //assert(netcdfFile.getFileTypeVersion().equalsIgnoreCase("1")); //CDF - 1 + ImmutableList globalAttributes = netcdfFile.getGlobalAttributes(); + Group rootGroup = netcdfFile.getRootGroup(); + + Array lonData, latData; + double minX = 0, maxY = 0, translateX, translateY, skewX = 0, skewY = 0; + int width, height; + boolean isIncreasing = false; + HashMap> varMetadata = new HashMap<>(); + + + //========================= Set Raster geo-reference (width, height, scaleX, scaleY ========================= + lonData = lonVariable.read(); + width = lonData.getShape()[0]; + double firstVal, lastVal; + firstVal = getElement(lonData, 0); + lastVal = getElement(lonData, width - 1); + isIncreasing = firstVal < lastVal; + minX = isIncreasing ? firstVal : lastVal; + translateX = Math.abs(lastVal - firstVal) / (width - 1); + + latData = latVariable.read(); + height = latData.getShape()[0]; + firstVal = getElement(latData, 0); + lastVal = getElement(latData, height - 1); + isIncreasing = firstVal < lastVal; + maxY = isIncreasing ? lastVal : firstVal; + translateY = Math.abs(lastVal - firstVal) / (height - 1); + //====================================================================================================== + + String lonVariableName = lonVariable.getShortName(); + String latVariableName = latVariable.getShortName(); + + List recordDimensions = recordVariable.getDimensions(); + int numRecordDimensions = recordDimensions.size(); + + //==================== Re-check existence of dimensions if necessary ==================== + int lonIndex = numRecordDimensions - 1; + int latIndex = numRecordDimensions - 2; + if (findCoordIndices) { + lonIndex = recordVariable.findDimensionIndex(lonVariableName); + latIndex = recordVariable.findDimensionIndex(latVariableName); + if (latIndex == -1 || lonIndex == -1) { + throw new IllegalArgumentException(NetCdfConstants.COORD_IDX_NOT_FOUND); + } + } + + //read recordVariable data + Array recordData = recordVariable.read(); + + // Set noDataValue if available, null otherwise + Double noDataValue = null; + if (recordVariable.attributes().hasAttribute(NetCdfConstants.MISSING_VALUE)) { + Attribute noDataAttribute = recordVariable.findAttribute(NetCdfConstants.MISSING_VALUE); + if (!Objects.isNull(noDataAttribute)) + noDataValue = getAttrDoubleValue(noDataAttribute); + } + int[] strides = recordData.getIndex().getShape(); + int[] pointers = recordData.getIndex().getCurrentCounter(); + int numBands = 1; + int numValuesPerBand = 1; + for (int i = 0; i < numRecordDimensions; i++) { + Dimension recordDimension = recordDimensions.get(i); + String dimensionShortName = recordDimension.getShortName(); + //TODO: Maybe replace this string comparison with index comparison since we already have lon and lat dimension indices stored? + if (dimensionShortName.equalsIgnoreCase(latVariableName) || dimensionShortName.equalsIgnoreCase(lonVariableName)) { + numValuesPerBand *= strides[i]; + }else { + numBands *= strides[i]; + } + } + Array[] dimensionVars = new Array[numRecordDimensions]; + double scaleFactor = 1, offset = 0; + for (int i = 0; i < numRecordDimensions; i++) { + if (i == lonIndex) dimensionVars[i] = lonData; + else if (i == latIndex) dimensionVars[i] = latData; + else { + Dimension recordDimension = recordDimensions.get(i); + String dimensionShortName = recordDimension.getShortName(); + //TODO: This leads to tree traversal for dimensions, can we do this in one go? + Variable recordDimVar = findVariableRecursive(dimensionShortName, rootGroup);//rootGroup.findVariableLocal(dimensionShortName); + if (Objects.isNull(recordDimVar)) throw new IllegalArgumentException(String.format(NetCdfConstants.COORD_VARIABLE_NOT_FOUND, dimensionShortName)); + AttributeContainer recordDimAttrs = recordDimVar.attributes(); + varMetadata.computeIfAbsent(dimensionShortName, k -> new ArrayList<>()); + for (Attribute attr: recordDimAttrs) { + varMetadata.get(dimensionShortName).add(attr.getStringValue()); + if (attr.getShortName().equalsIgnoreCase(NetCdfConstants.SCALE_FACTOR)) { + Array values = attr.getValues(); + if (!Objects.isNull(values)) + scaleFactor = getElement(attr.getValues(), 0); + } + if (attr.getShortName().equalsIgnoreCase(NetCdfConstants.ADD_OFFSET)) { + Array values = attr.getValues(); + if (!Objects.isNull(values)) + offset = getElement(attr.getValues(), 0); + } + } + dimensionVars[i] = recordDimVar.read(); + + } + } + double[][] allBandValues = new double[numBands][numValuesPerBand]; + //check for offset in the record variable + List> bandMetaData = new ArrayList<>(); + int currBandNum = 0; + while (currBandNum < numBands) { + if (dimensionVars.length > 2) { + //start from the bottom 3rd dimension going up to form metadata + List currBandMetadata = new ArrayList<>(); + for (int metadataDim = dimensionVars.length - 3; metadataDim >= 0; metadataDim--) { + double data = getElement(dimensionVars[metadataDim], pointers[metadataDim]); + String metadata = recordDimensions.get(metadataDim).getShortName() + " : " + data; + currBandMetadata.add(metadata); + } + bandMetaData.add(currBandMetadata); + } + //int dataIndex = currBandNum; + for (int j = height - 1; j >= 0; j--) { + for (int i = 0; i < width; i++) { + int index = (j * width + i); + int dataIndex = currBandNum * (width * height) + ((height - 1 - j) * width + i); + double currRecord = scaleFactor * getElement(recordData, dataIndex) + offset; + allBandValues[currBandNum][index] = currRecord; + } + } + boolean addCarry = true; + for (int currDim = dimensionVars.length - 3; currDim >= 0; currDim--) { + int maxIndex = strides[currDim]; + if (addCarry) { + pointers[currDim]++; + addCarry = false; + } + if (pointers[currDim] == maxIndex) { + pointers[currDim] = 0; + addCarry = true; + } + } + currBandNum++; + } + Map> rasterProperties = getRasterProperties(globalAttributes, varMetadata, bandMetaData); + return RasterConstructors.makeNonEmptyRaster(numBands, width, height, minX, maxY, translateX, -translateY, skewX, skewY, 0, allBandValues, rasterProperties, noDataValue, PixelInCell.CELL_CENTER); + } + + private static Variable findVariableRecursive(String shortName, Group currGroup) { + if (!Objects.isNull(currGroup.findVariableLocal(shortName))) { + return currGroup.findVariableLocal(shortName); + }else { + for (Group group: currGroup.getGroups()) { + Variable ret = findVariableRecursive(shortName, group); + if (!Objects.isNull(ret)) return ret; + } + return null; + } + } + + private static Map> getRasterProperties(ImmutableList globalAttrs, Map> varAttrs, List> bandAttrs) { + Map> res = new HashMap<>(varAttrs); + if (!Objects.isNull(globalAttrs) && !globalAttrs.isEmpty()) { + List globalAttrString = new ArrayList<>(); + for (Attribute globalAttr: globalAttrs) { + globalAttrString.add(globalAttr.getStringValue()); + } + res.put(NetCdfConstants.GLOBAL_ATTR_PREFIX, globalAttrString); + } + for(int band = 0; band < bandAttrs.size(); band++) { + int currBandNum = band + 1; + res.put(NetCdfConstants.BAND_ATTR_PREFIX + currBandNum, bandAttrs.get(band)); + } + return res; + } + + private static Double getElement(Array array, int index) { + double res; + switch (array.getDataType()) { + case INT: + res = array.getInt(index); + break; + case LONG: + res = array.getLong(index); + break; + case FLOAT: + res = array.getFloat(index); + break; + case DOUBLE: + res = array.getDouble(index); + break; + case BYTE: + res = array.getByte(index); + break; + case SHORT: + case USHORT: + res = array.getShort(index); + break; + default: + throw new IllegalArgumentException("Error casting dimension data to double"); + } + return res; + } + + private static Double getAttrDoubleValue(Attribute attr) { + Number numericValue = attr.getNumericValue(); + if (!Objects.isNull(numericValue)) { + return numericValue.doubleValue(); + }else { + throw new IllegalArgumentException(NetCdfConstants.NON_NUMERIC_VALUE); + } + } + + + private static void ensureCoordVar(Variable lonVar, Variable latVar) throws NullPointerException, IllegalArgumentException { + if (latVar == null) { + throw new NullPointerException(NetCdfConstants.INVALID_LAT_NAME); + } + + if (lonVar == null) { + throw new NullPointerException(NetCdfConstants.INVALID_LON_NAME); + } + + if (latVar.getShape().length > 1) { + throw new IllegalArgumentException(NetCdfConstants.INVALID_LAT_SHAPE); + } + + if (lonVar.getShape().length > 1) { + throw new IllegalArgumentException(NetCdfConstants.INVALID_LON_SHAPE); + } + } + + private static void ensureRecordVar(Variable recordVar) throws NullPointerException { + if (recordVar == null) { + throw new IllegalArgumentException(NetCdfConstants.INVALID_RECORD_NAME); + } + } + + +} diff --git a/common/src/main/java/org/apache/sedona/common/utils/RasterUtils.java b/common/src/main/java/org/apache/sedona/common/utils/RasterUtils.java index d5d4f1dbf5..db60411f94 100644 --- a/common/src/main/java/org/apache/sedona/common/utils/RasterUtils.java +++ b/common/src/main/java/org/apache/sedona/common/utils/RasterUtils.java @@ -195,6 +195,10 @@ public static GridCoverage2D clone(RenderedImage image, GridGeometry2D gridGeome * @return A new GridCoverage2D object. */ public static GridCoverage2D create(WritableRaster raster, GridGeometry2D gridGeometry, GridSampleDimension[] bands, Double noDataValue) { + return create(raster, gridGeometry, bands, noDataValue, null); + } + + public static GridCoverage2D create(WritableRaster raster, GridGeometry2D gridGeometry, GridSampleDimension[] bands, Double noDataValue, Map properties) { int numBand = raster.getNumBands(); int rasterDataType = raster.getDataBuffer().getDataType(); @@ -220,7 +224,7 @@ public static GridCoverage2D create(WritableRaster raster, GridGeometry2D gridGe } final RenderedImage image = new BufferedImage(colorModel, raster, false, null); - return gridCoverageFactory.create("genericCoverage", image, gridGeometry, bands, null, null); + return gridCoverageFactory.create("genericCoverage", image, gridGeometry, bands, null, properties); } public static GridCoverage2D create(RenderedImage image, GridGeometry2D gridGeometry, GridSampleDimension[] bands, Double noDataValue) { diff --git a/common/src/test/java/org/apache/sedona/common/raster/RasterAccessorsTest.java b/common/src/test/java/org/apache/sedona/common/raster/RasterAccessorsTest.java index 5dac999c9f..d8f52fca8c 100644 --- a/common/src/test/java/org/apache/sedona/common/raster/RasterAccessorsTest.java +++ b/common/src/test/java/org/apache/sedona/common/raster/RasterAccessorsTest.java @@ -18,10 +18,12 @@ */ package org.apache.sedona.common.raster; +import org.apache.sedona.common.utils.RasterUtils; import org.geotools.coverage.grid.GridCoordinates2D; import org.geotools.coverage.grid.GridCoverage2D; import org.geotools.geometry.jts.JTS; import org.geotools.referencing.CRS; +import org.geotools.referencing.operation.transform.AffineTransform2D; import org.junit.Test; import org.locationtech.jts.geom.*; import org.opengis.referencing.FactoryException; diff --git a/common/src/test/java/org/apache/sedona/common/raster/RasterConstructorsTest.java b/common/src/test/java/org/apache/sedona/common/raster/RasterConstructorsTest.java index 2487fa75a8..d49c7b3970 100644 --- a/common/src/test/java/org/apache/sedona/common/raster/RasterConstructorsTest.java +++ b/common/src/test/java/org/apache/sedona/common/raster/RasterConstructorsTest.java @@ -20,12 +20,14 @@ import org.locationtech.jts.geom.Geometry; import org.locationtech.jts.io.ParseException; import org.opengis.referencing.FactoryException; +import org.opengis.referencing.operation.TransformException; import java.io.IOException; import java.nio.charset.StandardCharsets; import java.util.Arrays; -import static org.junit.Assert.*; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; public class RasterConstructorsTest extends RasterTestBase { @@ -236,4 +238,44 @@ public void makeEmptyRaster() throws FactoryException { assertEquals("SIGNED_32BITS", gridCoverage2D.getSampleDimension(0).getSampleDimensionType().name()); } + @Test + public void testNetCdfClassic() throws FactoryException, IOException, TransformException { + GridCoverage2D testRaster = RasterConstructors.fromNetCDF(testNc, "O3"); + double[] expectedMetadata = {4.9375, 50.9375, 80, 48, 0.125, -0.125, 0, 0, 0, 4}; + double[] actualMetadata = RasterAccessors.metadata(testRaster); + for (int i = 0; i < expectedMetadata.length; i++) { + assertEquals(expectedMetadata[i], actualMetadata[i], 1e-5); + } + + double actualFirstGridVal = PixelFunctions.value(testRaster, 0, 0, 1); + double expectedFirstGridVal = 60.95357131958008; + assertEquals(expectedFirstGridVal, actualFirstGridVal, 1e-6); + } + + @Test + public void testNetCdfClassicLongForm() throws FactoryException, IOException, TransformException { + GridCoverage2D testRaster = RasterConstructors.fromNetCDF(testNc, "O3", "lon", "lat"); + double[] expectedMetadata = {4.9375, 50.9375, 80, 48, 0.125, -0.125, 0, 0, 0, 4}; + double[] actualMetadata = RasterAccessors.metadata(testRaster); + for (int i = 0; i < expectedMetadata.length; i++) { + assertEquals(expectedMetadata[i], actualMetadata[i], 1e-5); + } + + double actualFirstGridVal = PixelFunctions.value(testRaster, 0, 0, 1); + double expectedFirstGridVal = 60.95357131958008; + assertEquals(expectedFirstGridVal, actualFirstGridVal, 1e-6); + } + + + @Test + public void testRecordInfo() throws IOException { + String actualRecordInfo = RasterConstructors.getRecordInfo(testNc); + String expectedRecordInfo = "O3(time=2, z=2, lat=48, lon=80)\n" + + "\n" + + "NO2(time=2, z=2, lat=48, lon=80)"; + assertEquals(expectedRecordInfo, actualRecordInfo); + } + + + } diff --git a/common/src/test/java/org/apache/sedona/common/raster/RasterTestBase.java b/common/src/test/java/org/apache/sedona/common/raster/RasterTestBase.java index 88eebd58c6..e1f1a9ee95 100644 --- a/common/src/test/java/org/apache/sedona/common/raster/RasterTestBase.java +++ b/common/src/test/java/org/apache/sedona/common/raster/RasterTestBase.java @@ -13,7 +13,7 @@ */ package org.apache.sedona.common.raster; -import org.geotools.coverage.grid.GridCoordinates2D; + import org.geotools.coverage.grid.GridCoordinates2D; import org.geotools.coverage.grid.GridCoverage2D; import org.geotools.coverage.grid.GridCoverageFactory; import org.geotools.gce.geotiff.GeoTiffReader; @@ -33,14 +33,14 @@ import org.opengis.referencing.operation.TransformException; import javax.media.jai.RasterFactory; -import java.awt.Color; +import java.awt.*; import java.awt.image.BufferedImage; import java.awt.image.WritableRaster; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.IOException; -import java.io.PrintStream; import java.nio.charset.StandardCharsets; +import java.nio.file.Files; public class RasterTestBase { String arc = "NCOLS 2\nNROWS 2\nXLLCORNER 378922\nYLLCORNER 4072345\nCELLSIZE 30\nNODATA_VALUE 0\n0 1 2 3\n"; @@ -52,6 +52,9 @@ public class RasterTestBase { GridCoverage2D oneBandRaster; GridCoverage2D multiBandRaster; byte[] geoTiff; + byte[] testNc; + String ncFile = resourceFolder + "raster/netcdf/test.nc"; + @Before public void setup() throws IOException { @@ -60,6 +63,8 @@ public void setup() throws IOException { ByteArrayOutputStream bos = new ByteArrayOutputStream(); new GeoTiffWriter(bos).write(multiBandRaster, new GeneralParameterValue[]{}); geoTiff = bos.toByteArray(); + File file = new File(ncFile); + testNc = Files.readAllBytes(file.toPath()); } GridCoverage2D createEmptyRaster(int numBands) diff --git a/core/src/test/resources/raster/netcdf/test.nc b/core/src/test/resources/raster/netcdf/test.nc new file mode 100644 index 0000000000..2f7b67d87f Binary files /dev/null and b/core/src/test/resources/raster/netcdf/test.nc differ diff --git a/docs/api/sql/Raster-loader.md b/docs/api/sql/Raster-loader.md index 595348c39e..0f9b8ff4aa 100644 --- a/docs/api/sql/Raster-loader.md +++ b/docs/api/sql/Raster-loader.md @@ -169,3 +169,63 @@ Output: | GridCoverage2D["g...| +------------------------------------------------------------------+ ``` + +### RS_FromNetCDF + + +Introduction: Returns a raster geometry representing the given record variable short name from a NetCDF file. +This API reads the array data of the record variable *in memory* along with all its dimensions +Since the netCDF format has many variants, the reader might not work for your test case, if that is so, please report this using the public forums. + +This API has been tested for netCDF classic (NetCDF 1, 2, 5) and netCDF4/HDF5 files. + +This API requires the name of the record variable. It is assumed that a variable of the given name exists, and its last 2 dimensions are 'lat' and 'lon' dimensions *respectively*. + +If this assumption does not hold true for your case, you can choose to pass the lonDimensionName and latDimensionName explicitly. + +You can use [RS_NetCDFInfo](./#rs_netcdfinfo) to get the details of the passed netCDF file (variables and its dimensions). + +Format 1: `RS_FromNetCDF(netCDF: ARRAY[Byte], recordVariableName: String)` + +Format 2: `RS_FromNetCDF(netCDF: ARRAY[Byte], recordVariableName: String, lonDimensionName: String, latDimensionName: String)` + +Since: `v1.5.1` + +Spark Example: + +```scala +val df = sedona.read.format("binaryFile").load("/some/path/test.nc") +df = df.withColumn("raster", f.expr("RS_FromNetCDF(content, 'O3')")) +``` + + +```scala +val df = sedona.read.format("binaryFile").load("/some/path/test.nc") +df = df.withColumn("raster", f.expr("RS_FromNetCDF(content, 'O3', 'lon', 'lat')")) +``` + + +### RS_NetCDFInfo + +Introduction: Returns a string containing names of the variables in a given netCDF file along with its dimensions. + +Format: `RS_NetCDFInfo(netCDF: ARRAY[Byte])` + +Since: `1.5.1` + +Spark Example: + +```scala +val df = sedona.read.format("binaryFile").load("/some/path/test.nc") +recordInfo = df.selectExpr("RS_NetCDFInfo(content) as record_info").first().getString(0) +print(recordInfo) +``` + + +Output: + +```text +O3(time=2, z=2, lat=48, lon=80) + +NO2(time=2, z=2, lat=48, lon=80) +``` diff --git a/pom.xml b/pom.xml index 72f4c9eb72..b17004f4d3 100644 --- a/pom.xml +++ b/pom.xml @@ -129,6 +129,10 @@ org.apache.logging.log4j log4j-core + + edu.ucar + cdm-core + org.apache.logging.log4j log4j-slf4j-impl diff --git a/spark/common/src/main/java/org/apache/sedona/core/formatMapper/netcdfParser/SerNetCDFUtils.java b/spark/common/src/main/java/org/apache/sedona/core/formatMapper/netcdfParser/SerNetCDFUtils.java index e5118ca73c..0c23fb39e1 100644 --- a/spark/common/src/main/java/org/apache/sedona/core/formatMapper/netcdfParser/SerNetCDFUtils.java +++ b/spark/common/src/main/java/org/apache/sedona/core/formatMapper/netcdfParser/SerNetCDFUtils.java @@ -32,7 +32,6 @@ package org.apache.sedona.core.formatMapper.netcdfParser; import ucar.ma2.Array; -import ucar.ma2.DataType; import ucar.ma2.Index; import ucar.nc2.Dimension; import ucar.nc2.NetcdfFile; @@ -201,4 +200,5 @@ public static Integer getDimensionSize(NetcdfDataset netcdfFile, String rowDim) if (dimSize < 0) {throw new IllegalStateException("Dimension does not exist!!!");} return dimSize; } + } diff --git a/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala b/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala index b49706d700..0b2a069300 100644 --- a/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala +++ b/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala @@ -256,7 +256,9 @@ object Catalog { function[RS_AsImage](), function[RS_ZonalStats](), function[RS_ZonalStatsAll](), - function[RS_Resample]() + function[RS_Resample](), + function[RS_FromNetCDF](), + function[RS_NetCDFInfo]() ) val aggregateExpressions: Seq[Aggregator[Geometry, Geometry, Geometry]] = Seq( diff --git a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/raster/RasterConstructors.scala b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/raster/RasterConstructors.scala index f7880aafe2..29c8ab1116 100644 --- a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/raster/RasterConstructors.scala +++ b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/raster/RasterConstructors.scala @@ -62,3 +62,22 @@ case class RS_MakeEmptyRaster(inputExpressions: Seq[Expression]) copy(inputExpressions = newChildren) } } + + +case class RS_FromNetCDF(inputExpressions: Seq[Expression]) + extends InferredExpression( + inferrableFunction2(RasterConstructors.fromNetCDF), inferrableFunction4(RasterConstructors.fromNetCDF)) { + + override def foldable: Boolean = false + protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]) = { + copy(inputExpressions = newChildren) + } +} + +case class RS_NetCDFInfo(inputExpressions: Seq[Expression]) + extends InferredExpression(RasterConstructors.getRecordInfo _) { + + protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]) = { + copy(inputExpressions = newChildren) + } +} diff --git a/spark/common/src/test/resources/raster/netcdf/test.nc b/spark/common/src/test/resources/raster/netcdf/test.nc new file mode 100644 index 0000000000..2f7b67d87f Binary files /dev/null and b/spark/common/src/test/resources/raster/netcdf/test.nc differ diff --git a/spark/common/src/test/scala/org/apache/sedona/sql/rasteralgebraTest.scala b/spark/common/src/test/scala/org/apache/sedona/sql/rasteralgebraTest.scala index 61ebc13163..5776949024 100644 --- a/spark/common/src/test/scala/org/apache/sedona/sql/rasteralgebraTest.scala +++ b/spark/common/src/test/scala/org/apache/sedona/sql/rasteralgebraTest.scala @@ -1429,5 +1429,44 @@ class rasteralgebraTest extends TestBaseScala with BeforeAndAfter with GivenWhen assertEquals(expectedMetadata(i), rasterMetadata(i), 1e-6) } } + + it("Passed RS_FromNetCDF with NetCDF classic") { + val df = sparkSession.read.format("binaryFile").load(resourceFolder + "raster/netcdf/test.nc") + val rasterDf = df.selectExpr("RS_FromNetCDF(content, 'O3') as raster") + val expectedMetadata = Seq(4.9375, 50.9375, 80, 48, 0.125, -0.125, 0, 0, 0, 4) + val actualMetadata = rasterDf.selectExpr("RS_Metadata(raster) as metadata").first().getSeq[Double](0) + + for (i <- expectedMetadata.indices) { + assertEquals(expectedMetadata(i), actualMetadata(i), 1e-6) + } + + val expectedFirstVal = 60.95357131958008 + val actualFirstVal = rasterDf.selectExpr("RS_Value(raster, 0, 0, 1) as raster").first().getDouble(0) + assertEquals(expectedFirstVal, actualFirstVal, 1e-6) + } + + it("Passed RS_FromNetCDF with NetCDF classic long form") { + val df = sparkSession.read.format("binaryFile").load(resourceFolder + "raster/netcdf/test.nc") + val rasterDf = df.selectExpr("RS_FromNetCDF(content, 'O3', 'lon', 'lat') as raster") + val expectedMetadata = Seq(4.9375, 50.9375, 80, 48, 0.125, -0.125, 0, 0, 0, 4) + val actualMetadata = rasterDf.selectExpr("RS_Metadata(raster) as metadata").first().getSeq[Double](0) + + for (i <- expectedMetadata.indices) { + assertEquals(expectedMetadata(i), actualMetadata(i), 1e-6) + } + + val expectedFirstVal = 60.95357131958008 + val actualFirstVal = rasterDf.selectExpr("RS_Value(raster, 0, 0, 1) as raster").first().getDouble(0) + assertEquals(expectedFirstVal, actualFirstVal, 1e-6) + } + + it("Passed RS_NetCDFInfo") { + val df = sparkSession.read.format("binaryFile").load(resourceFolder + "raster/netcdf/test.nc") + val recordInfo = df.selectExpr("RS_NetCDFInfo(content) as record_info").first().getString(0) + val expectedRecordInfo = "O3(time=2, z=2, lat=48, lon=80)\n" + + "\n" + + "NO2(time=2, z=2, lat=48, lon=80)" + assertEquals(expectedRecordInfo, recordInfo) + } } }