Skip to content

Commit

Permalink
Merge branch 'main' into kyle/remove-arrow
Browse files Browse the repository at this point in the history
  • Loading branch information
kylebarron committed Feb 12, 2024
2 parents 1077589 + 625f07d commit 34db004
Show file tree
Hide file tree
Showing 8 changed files with 413 additions and 22 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/linux.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name: CI-Linux

on: [push, pull_request]
on: [push, pull_request, merge_group]

env:
CARGO_TERM_COLOR: always
Expand Down
28 changes: 28 additions & 0 deletions geozero/src/feature_processor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,22 +6,46 @@ use crate::property_processor::PropertyProcessor;
#[allow(unused_variables)]
pub trait FeatureProcessor: GeomProcessor + PropertyProcessor {
/// Begin of dataset processing
///
/// ## Invariants
///
/// - `dataset_begin` is called _only once_ for an entire dataset.
/// - `dataset_begin` is called before all other methods, including `feature_begin`,
/// `properties_begin`, `geometry_begin`, and all methods from [`GeomProcessor`] and
/// [`PropertyProcessor`]
fn dataset_begin(&mut self, name: Option<&str>) -> Result<()> {
Ok(())
}
/// End of dataset processing
///
/// ## Invariants
///
/// - `dataset_end` is called _only once_ for an entire dataset.
/// - No other methods may be called after `dataset_end`.
fn dataset_end(&mut self) -> Result<()> {
Ok(())
}
/// Begin of feature processing
///
/// - `idx`: the positional row index in the dataset. For the `n`th row, `idx` will be
/// `n`.
/// - `feature_begin` will be called before both `properties_begin` and `geometry_begin`.
fn feature_begin(&mut self, idx: u64) -> Result<()> {
Ok(())
}
/// End of feature processing
///
/// - `idx`: the positional row index in the dataset. For the `n`th row, `idx` will be
/// `n`.
/// - `feature_end` will be called after both `properties_end` and `geometry_end`.
fn feature_end(&mut self, idx: u64) -> Result<()> {
Ok(())
}
/// Begin of feature property processing
///
/// ## Invariants
///
/// - `properties_begin` will not be called a second time before `properties_end` is called.
fn properties_begin(&mut self) -> Result<()> {
Ok(())
}
Expand All @@ -30,6 +54,10 @@ pub trait FeatureProcessor: GeomProcessor + PropertyProcessor {
Ok(())
}
/// Begin of feature geometry processing
///
/// ## Following events
///
/// - Relevant methods from [`GeomProcessor`] will be called for each geometry.
fn geometry_begin(&mut self) -> Result<()> {
Ok(())
}
Expand Down
272 changes: 272 additions & 0 deletions geozero/src/geojson/geojson_line_writer.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,272 @@
use std::io::Write;

use crate::error::Result;
use crate::geojson::GeoJsonWriter;
use crate::{FeatureProcessor, GeomProcessor, PropertyProcessor};

/// Line Delimited GeoJSON Writer: One feature per line.
///
/// See <https://jsonlines.org>
pub struct GeoJsonLineWriter<W: Write> {
/// We use a count of the number of contexts entered to decide when to add a newline character
/// and finish a line. The [newline-delimited GeoJSON
/// spec](https://datatracker.ietf.org/doc/html/rfc8142) defines that any type of GeoJSON
/// objects can be written as an object on a single line. Therefore, we can't solely add
/// newlines in `feature_end`. If the object on this line is a Point geometry, then we need to
/// add a newline character in `point_end`, because `feature_end` will never be called.
///
/// Note that this approach is not resilient to malformed input. If the number of begin and end
/// calls do not match, newline characters will not be correctly added.
open_contexts: usize,
line_writer: GeoJsonWriter<W>,
}

impl<W: Write> GeoJsonLineWriter<W> {
pub fn new(out: W) -> Self {
Self {
open_contexts: 0,
line_writer: GeoJsonWriter::new(out),
}
}

fn write_newline(&mut self) -> Result<()> {
self.line_writer.out.write_all(b"\n")?;
Ok(())
}

fn begin_context(&mut self) {
self.open_contexts += 1;
}

fn end_context(&mut self) -> Result<()> {
self.open_contexts -= 1;
if self.open_contexts == 0 {
self.write_newline()?;
}
Ok(())
}

/// Manually add a comma to the writer.
fn comma(&mut self) -> Result<()> {
self.line_writer.out.write_all(b",")?;
Ok(())
}
}

impl<W: Write> FeatureProcessor for GeoJsonLineWriter<W> {
fn feature_begin(&mut self, _idx: u64) -> Result<()> {
self.begin_context();
// We always pass `0` for `idx` because we want to avoid a preceding comma on this line.
self.line_writer.feature_begin(0)?;
Ok(())
}

fn feature_end(&mut self, idx: u64) -> Result<()> {
self.line_writer.feature_end(idx)?;
self.end_context()?;
Ok(())
}

fn properties_begin(&mut self) -> Result<()> {
self.line_writer.properties_begin()
}

fn properties_end(&mut self) -> Result<()> {
self.line_writer.properties_end()
}

fn geometry_begin(&mut self) -> Result<()> {
self.line_writer.geometry_begin()
}

fn geometry_end(&mut self) -> Result<()> {
self.line_writer.geometry_end()
}
}

impl<W: Write> GeomProcessor for GeoJsonLineWriter<W> {
fn dimensions(&self) -> crate::CoordDimensions {
self.line_writer.dimensions()
}

fn xy(&mut self, x: f64, y: f64, idx: usize) -> Result<()> {
self.line_writer.xy(x, y, idx)
}

fn coordinate(
&mut self,
x: f64,
y: f64,
z: Option<f64>,
m: Option<f64>,
t: Option<f64>,
tm: Option<u64>,
idx: usize,
) -> Result<()> {
self.line_writer.coordinate(x, y, z, m, t, tm, idx)
}

// Whenever the idx is > 0, the underlying GeoJsonWriter will automatically add a comma prefix.
// _Almost always_ we don't want the prefixed comma because each feature or geometry will be on
// a new line. _However_ we need to distinguish between geometries that are _part of a feature_
// and which need a preceding comma, and those that are standalone and don't need a preceding
// comma.
//
// When `self.open_contexts > 0` it means that we are inside a top-level feature and when `idx
// > 0` it means that this is not the first geometry in this feature. In that case we manually
// add a comma. Then we always pass `0` for `idx` to the underlying GeoJsonWriter.
fn empty_point(&mut self, idx: usize) -> Result<()> {
if self.open_contexts > 0 && idx > 0 {
self.comma()?;
}

self.begin_context();
self.line_writer.empty_point(0)?;
self.end_context()
}

fn point_begin(&mut self, idx: usize) -> Result<()> {
if self.open_contexts > 0 && idx > 0 {
self.comma()?;
}

self.begin_context();
self.line_writer.point_begin(0)
}

fn point_end(&mut self, idx: usize) -> Result<()> {
self.line_writer.point_end(idx)?;
self.end_context()
}

fn multipoint_begin(&mut self, size: usize, idx: usize) -> Result<()> {
if self.open_contexts > 0 && idx > 0 {
self.comma()?;
}

self.begin_context();
self.line_writer.multipoint_begin(size, 0)
}

fn multipoint_end(&mut self, idx: usize) -> Result<()> {
self.line_writer.multipoint_end(idx)?;
self.end_context()
}

fn linestring_begin(&mut self, tagged: bool, size: usize, idx: usize) -> Result<()> {
if self.open_contexts > 0 && idx > 0 {
self.comma()?;
}

self.begin_context();
self.line_writer.linestring_begin(tagged, size, 0)
}

fn linestring_end(&mut self, tagged: bool, idx: usize) -> Result<()> {
self.line_writer.linestring_end(tagged, idx)?;
self.end_context()
}

fn multilinestring_begin(&mut self, size: usize, idx: usize) -> Result<()> {
if self.open_contexts > 0 && idx > 0 {
self.comma()?;
}

self.begin_context();
self.line_writer.multilinestring_begin(size, 0)
}

fn multilinestring_end(&mut self, idx: usize) -> Result<()> {
self.line_writer.multilinestring_end(idx)?;
self.end_context()
}

fn polygon_begin(&mut self, tagged: bool, size: usize, idx: usize) -> Result<()> {
if self.open_contexts > 0 && idx > 0 {
self.comma()?;
}

self.begin_context();
self.line_writer.polygon_begin(tagged, size, 0)
}

fn polygon_end(&mut self, tagged: bool, idx: usize) -> Result<()> {
self.line_writer.polygon_end(tagged, idx)?;
self.end_context()
}

fn multipolygon_begin(&mut self, size: usize, idx: usize) -> Result<()> {
if self.open_contexts > 0 && idx > 0 {
self.comma()?;
}

self.begin_context();
self.line_writer.multipolygon_begin(size, 0)
}

fn multipolygon_end(&mut self, idx: usize) -> Result<()> {
self.line_writer.multipolygon_end(idx)?;
self.end_context()
}

fn geometrycollection_begin(&mut self, size: usize, idx: usize) -> Result<()> {
if self.open_contexts > 0 && idx > 0 {
self.comma()?;
}

self.begin_context();
self.line_writer.geometrycollection_begin(size, 0)
}

fn geometrycollection_end(&mut self, idx: usize) -> Result<()> {
self.line_writer.geometrycollection_end(idx)?;
self.end_context()
}
}

impl<W: Write> PropertyProcessor for GeoJsonLineWriter<W> {
fn property(&mut self, idx: usize, name: &str, value: &crate::ColumnValue) -> Result<bool> {
self.line_writer.property(idx, name, value)
}
}

#[cfg(test)]
mod tests {
use super::*;
use crate::geojson::read_geojson_lines;

#[test]
fn good_geometries() {
let input = r#"{ "type": "Point", "coordinates": [1.1, 1.2] }
{ "type": "Point", "coordinates": [2.1, 2.2] }
{ "type": "Point", "coordinates": [3.1, 3.2] }
"#;
let mut out: Vec<u8> = Vec::new();
assert!(
read_geojson_lines(input.as_bytes(), &mut GeoJsonLineWriter::new(&mut out)).is_ok()
);
assert_json_lines_eq(&out, input);
}

#[test]
fn good_features() {
let input = r#"{ "type": "Feature", "geometry": { "type": "Point", "coordinates": [1.1, 1.2] }, "properties": { "name": "first" } }
{ "type": "Feature", "geometry": { "type": "Point", "coordinates": [2.1, 2.2] }, "properties": { "name": "second" } }
{ "type": "Feature", "geometry": { "type": "Point", "coordinates": [3.1, 3.3] }, "properties": { "name": "third" } }
"#;
let mut out: Vec<u8> = Vec::new();
assert!(
read_geojson_lines(input.as_bytes(), &mut GeoJsonLineWriter::new(&mut out)).is_ok()
);
assert_json_lines_eq(&out, input);
}

fn assert_json_lines_eq(a: &[u8], b: &str) {
let a = std::str::from_utf8(a).unwrap();
a.lines().zip(b.lines()).for_each(|(a_line, b_line)| {
let a_val: serde_json::Value = serde_json::from_str(a_line).unwrap();
let b_val: serde_json::Value = serde_json::from_str(b_line).unwrap();
assert_eq!(a_val, b_val);
})
}
}
2 changes: 1 addition & 1 deletion geozero/src/geojson/geojson_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use std::io::Write;
/// GeoJSON writer.
pub struct GeoJsonWriter<W: Write> {
dims: CoordDimensions,
out: W,
pub(crate) out: W,
}

impl<W: Write> GeoJsonWriter<W> {
Expand Down
2 changes: 2 additions & 0 deletions geozero/src/geojson/mod.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
//! GeoJSON conversions.
pub(crate) mod geojson_line_reader;
pub(crate) mod geojson_line_writer;
pub(crate) mod geojson_reader;
pub(crate) mod geojson_writer;

pub use geojson_line_reader::*;
pub use geojson_line_writer::*;
pub use geojson_reader::*;
pub use geojson_writer::*;

Expand Down
Loading

0 comments on commit 34db004

Please sign in to comment.