From 85f160acea5b42c2352235c24b951521d736cbf4 Mon Sep 17 00:00:00 2001 From: Jacob Hall Date: Sat, 2 Nov 2024 11:42:59 -0400 Subject: [PATCH 1/2] initial rework of xml writer to manually build output text --- src/writers/xml.rs | 323 ++++++++++++++++++--------------------------- 1 file changed, 132 insertions(+), 191 deletions(-) diff --git a/src/writers/xml.rs b/src/writers/xml.rs index 07b47f6..b4e0397 100644 --- a/src/writers/xml.rs +++ b/src/writers/xml.rs @@ -1,227 +1,168 @@ -use quick_xml::se::to_writer_with_root; -use serde::{Serialize, Serializer}; -use std::collections::HashMap; -use std::fmt::Write; -use std::sync::mpsc::Receiver; +use quick_xml::escape::escape; +use rayon::prelude::*; +use std::fmt::{Error, Write}; +use std::sync::mpsc::{channel, Receiver}; use crate::elements::{Element, ElementType, Member, Metadata, SimpleElementType}; +use crate::threadpools::WRITER_THREAD_POOL; -fn serialize_simple_element_type( - value: &Option, - serializer: S, -) -> Result +// wrapper struct that implements std::fmt::Write for any type +// that implements std::io::Write +struct ToFmtWrite(pub T); + +impl Write for ToFmtWrite where - S: Serializer, + T: std::io::Write, { - match value { - Some(SimpleElementType::Node) => serializer.serialize_str("node"), - Some(SimpleElementType::Way) => serializer.serialize_str("way"), - Some(SimpleElementType::Relation) => serializer.serialize_str("relation"), - None => serializer.serialize_none(), + fn write_str(&mut self, s: &str) -> std::fmt::Result { + self.0.write_all(s.as_bytes()).map_err(|_| std::fmt::Error) } } -#[derive(Serialize)] -#[serde(remote = "Member")] -struct MemberDef { - #[serde(rename = "@type", serialize_with = "serialize_simple_element_type")] - t: Option, - #[serde(rename = "@ref")] - id: i64, - #[serde(rename = "@role")] - role: Option, +fn create_header(metadata: Metadata) -> String { + return "\n\n".to_string(); } -#[derive(Serialize)] -struct XmlTags { - #[serde(rename = "@k")] - k: String, - #[serde(rename = "@v")] - v: String, -} - -#[derive(Serialize)] -pub struct XmlElementMeta { - #[serde(rename = "@id")] - id: i64, - #[serde(rename = "@user")] - user: Option, - #[serde(rename = "@uid")] - uid: Option, - #[serde(rename = "@visible")] - visible: bool, - #[serde(rename = "@version")] - version: Option, - #[serde(rename = "@changeset")] - changeset: Option, - #[serde(rename = "@timestamp")] - timestamp: Option, -} +fn append_serialized_metadata(base: &mut String, element: &Element) { + base.push_str(" id=\""); + base.push_str(&lexical::to_string(element.id)); + base.push('\"'); -#[derive(Serialize)] -struct XmlNode { - #[serde(rename = "@lat")] - lat: f64, - #[serde(rename = "@lon")] - lon: f64, - #[serde(flatten)] - meta: XmlElementMeta, - #[serde(default, rename = "tag")] - tags: Vec, -} + if let Some(c) = element.changeset { + base.push_str(" changeset="); + base.push_str(&lexical::to_string(c)); + } -#[derive(Serialize)] -#[serde(rename = "nd")] -struct XmlWayNode { - #[serde(rename = "@ref")] - nd_ref: i64, -} + if let Some(t) = element.timestamp { + base.push_str(",\"timestamp\":"); + base.push_str(&stringify(t)); + } -#[derive(Serialize)] -struct XmlWay { - #[serde(flatten)] - meta: XmlElementMeta, - nd: Vec, - #[serde(default, rename = "tag")] - tags: Vec, -} + if let Some(u) = element.uid { + base.push_str(",\"uid\":"); + base.push_str(&lexical::to_string(u)); + } -fn serialize_member_vec(v: &[Member], serializer: S) -> Result { - #[derive(Serialize)] - struct Wrapper<'a>(#[serde(with = "MemberDef")] &'a Member); + if let Some(u) = element.user { + base.push_str(",\"user\":"); + base.push_str(&stringify(u)); + } - v.iter() - .map(Wrapper) - .collect::>() - .serialize(serializer) + // add visible field only if it is false + if element.visible == Some(false) { + base.push_str(",\"visible\":false"); + } } -#[derive(Serialize)] -struct XmlRelation { - #[serde(flatten)] - meta: XmlElementMeta, - #[serde(serialize_with = "serialize_member_vec")] - member: Vec, - #[serde(default, rename = "tag")] - tags: Vec, +fn append_serialized_tags(base: &mut String, element: &Element) { + for (k, v) in &element.tags { + base.push_str(" \n"); + } } -#[derive(Serialize)] -pub struct XmlMetadata { - #[serde(rename = "@version", skip_serializing_if = "Option::is_none")] - pub version: Option, - #[serde(rename = "@generator", skip_serializing_if = "Option::is_none")] - pub generator: Option, - #[serde(rename = "@copyright", skip_serializing_if = "Option::is_none")] - pub copyright: Option, - #[serde(rename = "@license", skip_serializing_if = "Option::is_none")] - pub license: Option, -} +fn append_serialized_element(base: &mut String, element: Element) { + match &element.element_type { + ElementType::Node { lat, lon } => { + base.push_str(" '); + append_serialized_tags(base, &element); + base.push_str(" \n"); + } else { + base.push_str("/>\n") + } + } + ElementType::Way { nodes } => { + // finish "type": "way", then start nodes dict + base.push_str(" \n"); -#[derive(Serialize)] -struct OsmXmlDocument { - #[serde(flatten)] - metadata: XmlMetadata, - #[serde(default)] - node: Vec, - #[serde(default)] - way: Vec, - #[serde(default)] - relation: Vec, -} + for n in nodes { + base.push_str(" \n") + } -struct ToFmtWrite(pub T); + append_serialized_tags(base, &element); -impl Write for ToFmtWrite -where - T: std::io::Write, -{ - fn write_str(&mut self, s: &str) -> std::fmt::Result { - self.0.write_all(s.as_bytes()).map_err(|_| std::fmt::Error) + base.push_str(" \n"); + } + ElementType::Relation { members } => { + base.push_str(" \n"); + + for m in members { + base.push_str(" base.push_str("type=\"node\""), + Some(SimpleElementType::Way) => base.push_str("type=\"way\""), + Some(SimpleElementType::Relation) => base.push_str("type=\"relation\""), + None => (), + } + + base.push_str(" ref=\""); + base.push_str(&lexical::to_string(m.id)); + base.push_str("\" role=\""); + if let Some(ref r) = m.role { + base.push_str(&escape(r.as_str())); + } + base.push_str("\"/>\n"); + } + base.push_str(" \n"); + } } } -fn convert_tags(element_tags: HashMap) -> Vec { - element_tags - .into_iter() - .map(|(k, v)| XmlTags { k, v }) - .collect() -} - -fn convert_nodes(way_nodes: Vec) -> Vec { - way_nodes - .into_iter() - .map(|nd_ref| XmlWayNode { nd_ref }) - .collect() -} - -fn split_and_convert_elements( - received_elements: I, -) -> (Vec, Vec, Vec) -where - I: Iterator, -{ - let mut nodes = Vec::new(); - let mut ways = Vec::new(); - let mut relations = Vec::new(); - for e in received_elements { - let meta = XmlElementMeta { - id: e.id, - user: e.user, - uid: e.uid, - visible: e.visible.unwrap_or(true), // TODO: better default behavior? - version: e.version, - changeset: e.changeset, - timestamp: e.timestamp, - }; - let tags = convert_tags(e.tags); - match e.element_type { - ElementType::Node { lat, lon } => nodes.push(XmlNode { - lat, - lon, - meta, - tags, - }), - ElementType::Way { nodes } => ways.push(XmlWay { - meta, - nd: convert_nodes(nodes), - tags, - }), - ElementType::Relation { members } => relations.push(XmlRelation { - meta, - member: members, - tags, - }), - } +fn serialize_chunk(chunk: Vec) -> Result { + let mut output = String::new(); + for element in chunk { + append_serialized_element(&mut output, element); } - (nodes, ways, relations) + Ok(output) } pub fn write_xml(receiver: Receiver>, metadata: Metadata, dest: D) { - let (node, way, relation) = split_and_convert_elements(receiver.iter().flatten()); - - let xml_osm_document = OsmXmlDocument { - metadata: XmlMetadata { - version: metadata.version, - generator: metadata.generator, - copyright: metadata.copyright, - license: metadata.license, - }, - node, - way, - relation, - }; - let mut writer = ToFmtWrite(dest); + let (output_sender, output_receiver) = channel(); + WRITER_THREAD_POOL.install(move || { + receiver + .into_iter() + .par_bridge() + .map(serialize_chunk) + .map(|result| result.expect("Failed to serialize chunk")) + .for_each(|s| match output_sender.clone().send(s) { + Ok(_) => (), + Err(e) => panic!("Error passing output chunk between threads: {e:?}"), + }); + }); + + let header = create_header(metadata); + writer - .write_str("") + .write_str(&header) .expect("Unable to write header to XML file!"); - match to_writer_with_root(writer, "osm", &xml_osm_document) { - Ok(_) => (), - Err(e) => { - panic!("XML serialization error: {e:?}"); - } + for output_string in output_receiver { + writer + .write_str(&output_string) + .expect("Failed to write to output"); } + + writer + .write_str("\n") + .expect("Couldn't write final closing curly brace to output."); } From 3858cdc1e2d7b5f7d26a36df82d6bb6257f983e5 Mon Sep 17 00:00:00 2001 From: Jacob Hall Date: Sun, 3 Nov 2024 11:06:32 -0500 Subject: [PATCH 2/2] finish implementing header, element metadata in new xml writer --- src/writers/xml.rs | 51 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 38 insertions(+), 13 deletions(-) diff --git a/src/writers/xml.rs b/src/writers/xml.rs index b4e0397..c2911f8 100644 --- a/src/writers/xml.rs +++ b/src/writers/xml.rs @@ -3,7 +3,7 @@ use rayon::prelude::*; use std::fmt::{Error, Write}; use std::sync::mpsc::{channel, Receiver}; -use crate::elements::{Element, ElementType, Member, Metadata, SimpleElementType}; +use crate::elements::{Element, ElementType, Metadata, SimpleElementType}; use crate::threadpools::WRITER_THREAD_POOL; // wrapper struct that implements std::fmt::Write for any type @@ -20,7 +20,27 @@ where } fn create_header(metadata: Metadata) -> String { - return "\n\n".to_string(); + let mut header = String::new(); + header.push_str("\n { + if let Some($attr) = &metadata.$attr { + header.push_str(concat!(" ", stringify!($attr), "=\"")); + header.push_str($attr); + header.push('\"'); + } + }; + } + + append_attribute!(copyright); + append_attribute!(generator); + append_attribute!(license); + append_attribute!(timestamp); + append_attribute!(version); + + header.push_str(">\n"); + header } fn append_serialized_metadata(base: &mut String, element: &Element) { @@ -29,28 +49,33 @@ fn append_serialized_metadata(base: &mut String, element: &Element) { base.push('\"'); if let Some(c) = element.changeset { - base.push_str(" changeset="); + base.push_str(" changeset=\""); base.push_str(&lexical::to_string(c)); + base.push('\"'); } - if let Some(t) = element.timestamp { - base.push_str(",\"timestamp\":"); - base.push_str(&stringify(t)); + if let Some(t) = &element.timestamp { + base.push_str(" timestamp=\""); + base.push_str(t); + base.push('\"'); } if let Some(u) = element.uid { - base.push_str(",\"uid\":"); + base.push_str(" uid=\""); base.push_str(&lexical::to_string(u)); + base.push('\"'); } - if let Some(u) = element.user { - base.push_str(",\"user\":"); - base.push_str(&stringify(u)); + if let Some(u) = &element.user { + base.push_str(" user=\""); + base.push_str(u); + base.push('\"'); } - // add visible field only if it is false - if element.visible == Some(false) { - base.push_str(",\"visible\":false"); + if element.visible == Some(true) { + base.push_str(" visible=\"true\""); + } else if element.visible == Some(false) { + base.push_str(" visible=\"false\""); } }