Skip to content

Commit

Permalink
unique id for duplicate region names (#34)
Browse files Browse the repository at this point in the history
  • Loading branch information
AndGem authored Sep 10, 2022
1 parent 1dcbf50 commit 2e73d34
Show file tree
Hide file tree
Showing 7 changed files with 75 additions and 80 deletions.
14 changes: 7 additions & 7 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "osm_extract_polygon"
version = "0.4.2"
version = "0.4.3"
authors = ["Andreas <[email protected]>"]
edition = "2018"

Expand Down
9 changes: 8 additions & 1 deletion src/converter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use crate::osm_reader::RelationNodes;
pub struct Polygon {
pub name: String,
pub points: Vec<Vec<Point>>,
pub relation_id: i64,
}

#[derive(Clone)]
Expand Down Expand Up @@ -112,6 +113,8 @@ fn convert_to_poly(rn: RelationNodes) -> Polygon {
let unknown_name = String::from("UNKNOWN_NAME");
let empty_string = String::from("");

let relation_id: i64 = rn.relation.id.0;

let name = rn
.relation
.tags
Expand All @@ -132,7 +135,11 @@ fn convert_to_poly(rn: RelationNodes) -> Polygon {
name
};

Polygon { name: fullname, points }
Polygon {
name: fullname,
points,
relation_id,
}
}

fn convert_nodes_to_points(nodes: &[Node]) -> Vec<Point> {
Expand Down
2 changes: 1 addition & 1 deletion src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ fn main() {

let relations = osm_reader::read_osm(in_filename, &min_admin_level, &max_admin_level);
let polygons = converter::convert(relations);
let result = output::output_handler::write(&path, &polygons, output_handler_config);
let result = output::output_handler::write(path, &polygons, output_handler_config);

match result {
Ok(size) => println!("success! wrote {} files!", size),
Expand Down
33 changes: 26 additions & 7 deletions src/osm_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,11 @@ fn find_admin_boundary_relations(
.map(|relation| (relation.id, relation))
.collect();

println!("finished parsing {} relations! {}s", relation_id_to_relation.len(), now.elapsed().as_secs());
println!(
"finished parsing {} relations! {}s",
relation_id_to_relation.len(),
now.elapsed().as_secs()
);
relation_id_to_relation
}

Expand Down Expand Up @@ -155,8 +159,8 @@ fn find_nodes_for_node_ids(pbf: &mut OsmPbfReaderFile, node_ids: HashSet<NodeId>
#[cfg(test)]
mod tests {
use super::*;
use std::iter::FromIterator;
use osmpbfreader::Tags;
use std::iter::FromIterator;

use smartstring::alias::String;

Expand All @@ -169,14 +173,20 @@ mod tests {
#[test]
fn test_admin_level_too_high_is_not_valid() {
let max_admin_level = 8;
let relation = create_relation(vec![(String::from("admin_level"), String::from((max_admin_level + 1).to_string()))]);
let relation = create_relation(vec![(
String::from("admin_level"),
String::from((max_admin_level + 1).to_string()),
)]);
assert_eq!(has_proper_admin_level(&relation, &1, &max_admin_level), false);
}

#[test]
fn test_admin_level_is_max_level_is_valid() {
let max_admin_level = 8;
let relation = create_relation(vec![(String::from("admin_level"), String::from((max_admin_level).to_string()))]);
let relation = create_relation(vec![(
String::from("admin_level"),
String::from((max_admin_level).to_string()),
)]);
assert_eq!(has_proper_admin_level(&relation, &1, &max_admin_level), true);
}

Expand All @@ -192,9 +202,18 @@ mod tests {
let min_admin_level = 3;
let max_admin_level = min_admin_level;

let relation_too_little = create_relation(vec![(String::from("admin_level"), String::from((min_admin_level - 1).to_string()))]);
let relation_exact = create_relation(vec![(String::from("admin_level"), String::from((min_admin_level).to_string()))]);
let relation_too_big = create_relation(vec![(String::from("admin_level"), String::from((min_admin_level + 1).to_string()))]);
let relation_too_little = create_relation(vec![(
String::from("admin_level"),
String::from((min_admin_level - 1).to_string()),
)]);
let relation_exact = create_relation(vec![(
String::from("admin_level"),
String::from((min_admin_level).to_string()),
)]);
let relation_too_big = create_relation(vec![(
String::from("admin_level"),
String::from((min_admin_level + 1).to_string()),
)]);

assert_eq!(
has_proper_admin_level(&relation_too_little, &min_admin_level, &max_admin_level),
Expand Down
3 changes: 3 additions & 0 deletions src/output/file_writer_geojson.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ mod tests {
let single_polygon = Polygon {
name: "barfoo".to_string(),
points: vec![vec![p1.clone(), p2.clone(), p3.clone()]],
relation_id: 1,
};

let result = convert_polygon_to_geo_polygons(&single_polygon);
Expand Down Expand Up @@ -121,6 +122,7 @@ mod tests {
vec![p21.clone(), p22.clone(), p23.clone()],
vec![p31.clone(), p32.clone(), p33.clone()],
],
relation_id: 1,
};

let result = convert_polygon_to_geo_polygons(&poly);
Expand Down Expand Up @@ -210,6 +212,7 @@ mod tests {
let single_polygon = Polygon {
name: poly_name.to_string(),
points: vec![vec![]],
relation_id: 1,
};
let result = create_properties(&single_polygon);

Expand Down
92 changes: 29 additions & 63 deletions src/output/output_handler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use crate::output::OverwriteConfiguration;
use std::fs::File;
use std::time::Instant;

use std::collections::HashMap;
use std::collections::HashSet;
use std::fs::create_dir_all;

pub trait FileWriter {
Expand Down Expand Up @@ -106,20 +106,27 @@ impl OutputHandler {
fn pair_safe_filenames_and_polygons(polygons: &[Polygon]) -> Vec<(String, &Polygon)> {
let safe_names: Vec<String> = polygons.iter().map(|p| make_safe(&p.name)).collect();

let mut duplicate_count: HashMap<String, usize> = count_duplicate_names(&safe_names);
let mut seen_names: HashSet<String> = HashSet::new();
let mut duplicate_names: HashSet<String> = HashSet::new();

safe_names.iter().for_each(|name| {
if seen_names.contains(&name.to_lowercase()) {
duplicate_names.insert(name.to_string().to_lowercase());
} else {
seen_names.insert(name.to_string().to_lowercase());
}
});

safe_names
.iter()
.zip(polygons.iter())
.map(|(name, p)| {
let out_name;
if duplicate_count.contains_key(&name.to_lowercase()) {
let val = duplicate_count.get_mut(&name.to_lowercase()).unwrap();
out_name = format!("{}_{}", name, val);
*val -= 1;
let out_name = if duplicate_names.contains(&name.to_lowercase()) {
format!("{}_{}", name, p.relation_id)
} else {
out_name = name.to_string();
}
name.to_string()
};

(out_name, p)
})
.collect()
Expand All @@ -131,15 +138,6 @@ fn make_safe(name: &str) -> String {
s
}

fn count_duplicate_names(safe_names: &[String]) -> HashMap<String, usize> {
let mut m: HashMap<String, usize> = HashMap::new();
for x in safe_names {
*m.entry(x.to_string().to_lowercase()).or_default() += 1;
}

m.into_iter().filter(|&(_, v)| v != 1).collect()
}

// ////////////////////////////////////
// ////////////////////////////////////
// UNIT TESTS
Expand Down Expand Up @@ -170,47 +168,6 @@ mod tests {
assert_eq!(result, expected);
}

#[test]
fn test_count_duplicates_when_input_is_unique_return_empty_hashmap() {
let p1_name = String::from("abc123");
let p2_name = String::from("defgh1");
let p3_name = String::from("aaaddd");

let input = [p1_name, p2_name, p3_name];
let result = count_duplicate_names(&input);

assert_eq!(result, HashMap::new());
}

#[test]
fn test_count_duplicates_when_input_contains_duplicates_then_have_them_in_hashmap() {
let p1_name = String::from("random_name");
let p1_name_copy = p1_name.clone();
let p2_name = String::from("random_name2");

let expected: HashMap<String, usize> = [(p1_name.clone(), 2)].iter().cloned().collect();

let input = [p1_name, p2_name, p1_name_copy];

let result = count_duplicate_names(&input);

assert_eq!(result, expected);
}

#[test]
fn test_count_duplicates_when_input_contains_duplicates_then_have_them_in_hashmap_and_ignores_case() {
let p1_name = String::from("random_name");
let p1_name_copy = String::from("RandOm_NAme");
let p2_name = String::from("random_name2");

let expected: HashMap<String, usize> = [(p1_name.clone(), 2)].iter().cloned().collect();

let input = [p1_name, p2_name, p1_name_copy];

let result = count_duplicate_names(&input);

assert_eq!(result, expected);
}
#[test]
fn test_create_filenames_add_extensions_to_duplicate_regions() {
let p1_name = String::from("spain_region");
Expand All @@ -219,31 +176,35 @@ mod tests {
let p2_name = String::from("french_region");

let expected = [
p1_name.clone() + "_3",
p1_name.clone() + "_100",
p2_name.clone(),
p1_name.clone() + "_2",
p1_name.clone() + "_1",
p1_name.clone() + "_300",
p1_name.clone() + "_400",
]
.to_vec();

let p1 = Polygon {
name: p1_name,
points: Vec::new(),
relation_id: 100,
};

let p2 = Polygon {
name: p2_name,
points: Vec::new(),
relation_id: 200,
};

let p3 = Polygon {
name: p1_name_clone,
points: Vec::new(),
relation_id: 300,
};

let p4 = Polygon {
name: p1_name_clone2,
points: Vec::new(),
relation_id: 400,
};

let input = [p1, p2, p3, p4];
Expand All @@ -266,16 +227,19 @@ mod tests {
let p1 = Polygon {
name: p1_name,
points: Vec::new(),
relation_id: 1,
};

let p2 = Polygon {
name: p2_name,
points: Vec::new(),
relation_id: 2,
};

let p3 = Polygon {
name: p3_name,
points: Vec::new(),
relation_id: 3,
};

let input = [p1, p2, p3];
Expand All @@ -292,16 +256,18 @@ mod tests {
let p1_name = String::from("spanish_region");
let p2_name = String::from("SPAniSh_RegION");

let expected = [p1_name.clone() + "_2", p2_name.clone() + "_1"];
let expected = [p1_name.clone() + "_123", p2_name.clone() + "_456"];

let p1 = Polygon {
name: p1_name,
points: Vec::new(),
relation_id: 123,
};

let p2 = Polygon {
name: p2_name,
points: Vec::new(),
relation_id: 456,
};

let input = [p1, p2];
Expand Down

0 comments on commit 2e73d34

Please sign in to comment.