From 8ebd5423b4ed28e42df8956593aa71165917d2b1 Mon Sep 17 00:00:00 2001 From: Frederik Ramm Date: Fri, 16 Nov 2012 21:21:53 +0100 Subject: [PATCH 1/2] Improve shapefile wrap-around when size limit reached --- include/osmium/export/shapefile.hpp | 96 +++++++++++++++++++++++++++-- 1 file changed, 90 insertions(+), 6 deletions(-) diff --git a/include/osmium/export/shapefile.hpp b/include/osmium/export/shapefile.hpp index 7586f8f..866a49e 100644 --- a/include/osmium/export/shapefile.hpp +++ b/include/osmium/export/shapefile.hpp @@ -45,6 +45,9 @@ namespace Osmium { // this limit has been arrived at experimentally static const unsigned int max_dbf_fields = 2047; + // shape files with more than 2 GB don't work + static const unsigned int max_file_size = INT_MAX; + private: class Field { @@ -115,6 +118,8 @@ namespace Osmium { throw std::runtime_error("Failed to add field:" + field.name()); } m_fields.push_back(field); + m_dbf_bytes += 32; + m_record_length += field.width(); } else { throw std::out_of_range("Too many fields in the shapefile."); } @@ -184,14 +189,18 @@ namespace Osmium { if (!shp_object || shp_object->nSHPType != m_shp_handle->nShapeType) { throw Osmium::Geometry::IllegalGeometry(); } - m_current_shape = SHPWriteObject(m_shp_handle, -1, shp_object); - if (m_current_shape == -1 && errno == EINVAL) { - // second chance if likely cause is having reached the 2GB limit + m_dbf_bytes += m_record_length; + m_shp_bytes += length_on_disk(shp_object); + + if (m_dbf_bytes > max_file_size || m_shp_bytes > max_file_size) { close(); m_sequence_number++; open(); - m_current_shape = SHPWriteObject(m_shp_handle, -1, shp_object); + m_dbf_bytes += m_record_length; + m_shp_bytes += length_on_disk(shp_object); } + + m_current_shape = SHPWriteObject(m_shp_handle, -1, shp_object); if (m_current_shape == -1) { throw std::runtime_error("error writing to shapefile"); } @@ -212,6 +221,13 @@ namespace Osmium { } } + void add_attribute(const int field, const double value) const { + int ok = DBFWriteDoubleAttribute(m_dbf_handle, m_current_shape, field, value); + if (!ok) { + throw std::runtime_error("Can't add double to field"); + } + } + void add_attribute(const int field, const std::string& value) const { int ok = DBFWriteStringAttribute(m_dbf_handle, m_current_shape, field, value.c_str()); if (!ok) { @@ -269,7 +285,10 @@ namespace Osmium { m_dbf_handle(NULL), m_current_shape(0), m_type(type), - m_sequence_number(0) { + m_sequence_number(0), + m_record_length(1), + m_shp_bytes(0), + m_dbf_bytes(0) { open(); } @@ -292,7 +311,16 @@ namespace Osmium { int m_type; /// shapefile sequence number for auto-overflow (0=first) - int m_sequence_number; + unsigned int m_sequence_number; + + /// number of bytes per DBF record + unsigned int m_record_length; + + /// number of bytes wriotten to SHP file + unsigned int m_shp_bytes; + + /// number of bytes written to DBF file + unsigned int m_dbf_bytes; /** * Open and initialize all files belonging to shapefile (.shp/shx/dbf/prj/cpg). @@ -334,8 +362,64 @@ namespace Osmium { for (std::vector::const_iterator it = m_fields.begin(); it != m_fields.end(); ++it) { DBFAddField(m_dbf_handle, it->name().c_str(), it->type(), it->width(), it->decimals()); } + + m_shp_bytes = 100; + m_dbf_bytes = 33 + 32 * (m_fields.size()); + } + + /** + * Computes the number of bytes a shape will take up when saved to the .shp file. + * + * @param shp_object A pointer to the shape object to be added. + */ + size_t length_on_disk(SHPObject *s) { + #define RECORD_HEADER 8 + #define TYPE 4 + #define NUMPTS 4 + #define NUMPART 4 + #define BOX 32 + #define RANGE 32 + #define COORD 8 + #define PART 4 + switch(s->nSHPType) { + case SHPT_NULL: return RECORD_HEADER + TYPE; + case SHPT_POINT: return RECORD_HEADER + TYPE + 2 * COORD; + case SHPT_MULTIPOINT: return RECORD_HEADER + TYPE + BOX + NUMPTS + + s->nVertices * 2 * COORD; + case SHPT_ARC: // like polygon + case SHPT_POLYGON: return RECORD_HEADER + TYPE + BOX + NUMPART + + s->nParts * PART + NUMPTS + s->nVertices * 2 * COORD; + + case SHPT_POINTM: return RECORD_HEADER + TYPE + 3 * COORD; + case SHPT_MULTIPOINTM: return RECORD_HEADER + TYPE + BOX + RANGE + + NUMPTS + s->nVertices * 3 * COORD; + case SHPT_ARCM: // like polygon + case SHPT_POLYGONM: return RECORD_HEADER + TYPE + BOX + RANGE + + NUMPART + s->nParts * PART + NUMPTS + s->nVertices * 3 * COORD; + + case SHPT_POINTZ: return RECORD_HEADER + TYPE + 4 * COORD; + case SHPT_MULTIPOINTZ: return RECORD_HEADER + TYPE + BOX + 2 * RANGE + + NUMPTS + s->nVertices * 4 * COORD; + case SHPT_ARCZ: // like polygon + case SHPT_POLYGONZ: return RECORD_HEADER + TYPE + BOX + 2 * RANGE + NUMPART + + s->nParts * PART + NUMPTS + s->nVertices * 4 * COORD; + + case SHPT_MULTIPATCH: return RECORD_HEADER + TYPE + BOX + 2 * RANGE + NUMPART + + s->nParts * 2 * PART + NUMPTS + s->nVertices * 4 * COORD; + default: + throw std::runtime_error("unrecognized shape type"); + } + #undef RECORD_HEADER + #undef TYPE + #undef NUMPTS + #undef NUMPART + #undef BOX + #undef COORD + #undef RANGE + #undef PART } + }; // class Shapefile /** From 61ddcc1985ba790be8af3e166f2d1f9ccb7821a3 Mon Sep 17 00:00:00 2001 From: Frederik Ramm Date: Sun, 18 Nov 2012 15:35:20 +0100 Subject: [PATCH 2/2] cosmetic changes to placate maintainer --- include/osmium/export/shapefile.hpp | 95 ++++++++++++++++------------- 1 file changed, 53 insertions(+), 42 deletions(-) diff --git a/include/osmium/export/shapefile.hpp b/include/osmium/export/shapefile.hpp index 866a49e..d7f58be 100644 --- a/include/osmium/export/shapefile.hpp +++ b/include/osmium/export/shapefile.hpp @@ -14,7 +14,7 @@ version 3 of the Licenses, or (at your option) any later version. Osmium is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A -PARTICULAR PURPOSE. See the GNU Lesser General Public License and the GNU +size_part_indexICULAR PURPOSE. See the GNU Lesser General Public License and the GNU General Public License for more details. You should have received a copy of the Licenses along with Osmium. If not, see @@ -25,6 +25,8 @@ You should have received a copy of the Licenses along with Osmium. If not, see #include #include #include +#include +#include #include #include @@ -46,7 +48,16 @@ namespace Osmium { static const unsigned int max_dbf_fields = 2047; // shape files with more than 2 GB don't work - static const unsigned int max_file_size = INT_MAX; + static const unsigned int max_file_size = 2147483647; + + // size of a shape file header + static const size_t size_shapefile_header = 100; + + // size of a DBF file header + static const size_t size_dbf_header = 33; + + // size of a DBF field descriptor + static const size_t size_dbf_field_header = 32; private: @@ -118,7 +129,7 @@ namespace Osmium { throw std::runtime_error("Failed to add field:" + field.name()); } m_fields.push_back(field); - m_dbf_bytes += 32; + m_dbf_bytes += size_dbf_field_header; m_record_length += field.width(); } else { throw std::out_of_range("Too many fields in the shapefile."); @@ -316,7 +327,7 @@ namespace Osmium { /// number of bytes per DBF record unsigned int m_record_length; - /// number of bytes wriotten to SHP file + /// number of bytes written to SHP file unsigned int m_shp_bytes; /// number of bytes written to DBF file @@ -327,6 +338,7 @@ namespace Osmium { * Uses m_filename_base and m_sequence_number plus suffix to build filename. */ void open() { + std::ostringstream filename; filename << m_filename_base; if (m_sequence_number) { @@ -363,8 +375,8 @@ namespace Osmium { DBFAddField(m_dbf_handle, it->name().c_str(), it->type(), it->width(), it->decimals()); } - m_shp_bytes = 100; - m_dbf_bytes = 33 + 32 * (m_fields.size()); + m_shp_bytes = size_shapefile_header; + m_dbf_bytes = size_dbf_header + size_dbf_field_header * (m_fields.size()); } /** @@ -372,51 +384,50 @@ namespace Osmium { * * @param shp_object A pointer to the shape object to be added. */ - size_t length_on_disk(SHPObject *s) { - #define RECORD_HEADER 8 - #define TYPE 4 - #define NUMPTS 4 - #define NUMPART 4 - #define BOX 32 - #define RANGE 32 - #define COORD 8 - #define PART 4 - switch(s->nSHPType) { - case SHPT_NULL: return RECORD_HEADER + TYPE; - case SHPT_POINT: return RECORD_HEADER + TYPE + 2 * COORD; - case SHPT_MULTIPOINT: return RECORD_HEADER + TYPE + BOX + NUMPTS + - s->nVertices * 2 * COORD; + size_t length_on_disk(SHPObject *shp_object) { + + // sizes of various elements making up a shape record + const size_t size_record_header = 8; // record header + const size_t size_type_field = 4; // shape type identifier + const size_t size_num_points = 4; // number of points + const size_t size_num_parts = 4; // number of parts + const size_t size_box = 32; // bounding box + const size_t size_range = 16; // measurement or Z range + const size_t size_coordinate = 8; // one coordinate + const size_t size_part_index = 4; // pointer to shape part + + switch(shp_object->nSHPType) { + case SHPT_NULL: return size_record_header + size_type_field; + + // standard shapes have 2-dimensional coordinates + case SHPT_POINT: return size_record_header + size_type_field + 2 * size_coordinate; + case SHPT_MULTIPOINT: return size_record_header + size_type_field + size_box + size_num_points + + shp_object->nVertices * 2 * size_coordinate; case SHPT_ARC: // like polygon - case SHPT_POLYGON: return RECORD_HEADER + TYPE + BOX + NUMPART + - s->nParts * PART + NUMPTS + s->nVertices * 2 * COORD; + case SHPT_POLYGON: return size_record_header + size_type_field + size_box + size_num_parts + + shp_object->nParts * size_part_index + size_num_points + shp_object->nVertices * 2 * size_coordinate; - case SHPT_POINTM: return RECORD_HEADER + TYPE + 3 * COORD; - case SHPT_MULTIPOINTM: return RECORD_HEADER + TYPE + BOX + RANGE + - NUMPTS + s->nVertices * 3 * COORD; + // "M" shapes have 3-dimensional coordinates (x/y/measurement) + case SHPT_POINTM: return size_record_header + size_type_field + 3 * size_coordinate; + case SHPT_MULTIPOINTM: return size_record_header + size_type_field + size_box + size_range + + size_num_points + shp_object->nVertices * 3 * size_coordinate; case SHPT_ARCM: // like polygon - case SHPT_POLYGONM: return RECORD_HEADER + TYPE + BOX + RANGE + - NUMPART + s->nParts * PART + NUMPTS + s->nVertices * 3 * COORD; + case SHPT_POLYGONM: return size_record_header + size_type_field + size_box + size_range + + size_num_parts + shp_object->nParts * size_part_index + size_num_points + shp_object->nVertices * 3 * size_coordinate; - case SHPT_POINTZ: return RECORD_HEADER + TYPE + 4 * COORD; - case SHPT_MULTIPOINTZ: return RECORD_HEADER + TYPE + BOX + 2 * RANGE + - NUMPTS + s->nVertices * 4 * COORD; + // "Z" shapes have 4-dimensional coordinates (x/y/z/measurement) + case SHPT_POINTZ: return size_record_header + size_type_field + 4 * size_coordinate; + case SHPT_MULTIPOINTZ: return size_record_header + size_type_field + size_box + 2 * size_range + + size_num_points + shp_object->nVertices * 4 * size_coordinate; case SHPT_ARCZ: // like polygon - case SHPT_POLYGONZ: return RECORD_HEADER + TYPE + BOX + 2 * RANGE + NUMPART + - s->nParts * PART + NUMPTS + s->nVertices * 4 * COORD; + case SHPT_POLYGONZ: return size_record_header + size_type_field + size_box + 2 * size_range + size_num_parts + + shp_object->nParts * size_part_index + size_num_points + shp_object->nVertices * 4 * size_coordinate; - case SHPT_MULTIPATCH: return RECORD_HEADER + TYPE + BOX + 2 * RANGE + NUMPART + - s->nParts * 2 * PART + NUMPTS + s->nVertices * 4 * COORD; + case SHPT_MULTIPATCH: return size_record_header + size_type_field + size_box + 2 * size_range + size_num_parts + + shp_object->nParts * 2 * size_part_index + size_num_points + shp_object->nVertices * 4 * size_coordinate; default: throw std::runtime_error("unrecognized shape type"); } - #undef RECORD_HEADER - #undef TYPE - #undef NUMPTS - #undef NUMPART - #undef BOX - #undef COORD - #undef RANGE - #undef PART }