Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve diagnostic messages #1282

Merged
merged 4 commits into from
Oct 8, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ SRCS = include/nlohmann/json.hpp \
include/nlohmann/detail/input/json_sax.hpp \
include/nlohmann/detail/input/lexer.hpp \
include/nlohmann/detail/input/parser.hpp \
include/nlohmann/detail/input/position_t.hpp \
include/nlohmann/detail/iterators/internal_iterator.hpp \
include/nlohmann/detail/iterators/iter_impl.hpp \
include/nlohmann/detail/iterators/iteration_proxy.hpp \
Expand Down
2 changes: 1 addition & 1 deletion doc/examples/json_pointer.output
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
[json.exception.parse_error.107] parse error at 1: JSON pointer must be empty or begin with '/' - was: 'foo'
[json.exception.parse_error.107] parse error at byte 1: JSON pointer must be empty or begin with '/' - was: 'foo'
[json.exception.parse_error.108] parse error: escape character '~' must be followed with '0' or '1'
[json.exception.parse_error.108] parse error: escape character '~' must be followed with '0' or '1'
2 changes: 1 addition & 1 deletion doc/examples/parse_error.output
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
message: [json.exception.parse_error.101] parse error at 8: syntax error - unexpected ']'; expected '[', '{', or a literal
message: [json.exception.parse_error.101] parse error at line 1, column 8: syntax error while parsing value - unexpected ']'; expected '[', '{', or a literal
exception id: 101
byte position of error: 8
22 changes: 19 additions & 3 deletions include/nlohmann/detail/exceptions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
#include <stdexcept> // runtime_error
#include <string> // to_string

#include <nlohmann/detail/input/position_t.hpp>

namespace nlohmann
{
namespace detail
Expand Down Expand Up @@ -114,15 +116,23 @@ class parse_error : public exception
/*!
@brief create a parse error exception
@param[in] id_ the id of the exception
@param[in] byte_ the byte index where the error occurred (or 0 if the
position cannot be determined)
@param[in] position the position where the error occurred (or with
chars_read_total=0 if the position cannot be
determined)
@param[in] what_arg the explanatory string
@return parse_error object
*/
static parse_error create(int id_, const position_t& pos, const std::string& what_arg)
{
std::string w = exception::name("parse_error", id_) + "parse error" +
position_string(pos) + ": " + what_arg;
return parse_error(id_, pos.chars_read_total, w.c_str());
}

static parse_error create(int id_, std::size_t byte_, const std::string& what_arg)
{
std::string w = exception::name("parse_error", id_) + "parse error" +
(byte_ != 0 ? (" at " + std::to_string(byte_)) : "") +
(byte_ != 0 ? (" at byte " + std::to_string(byte_)) : "") +
": " + what_arg;
return parse_error(id_, byte_, w.c_str());
}
Expand All @@ -141,6 +151,12 @@ class parse_error : public exception
private:
parse_error(int id_, std::size_t byte_, const char* what_arg)
: exception(id_, what_arg), byte(byte_) {}

static std::string position_string(const position_t& pos)
{
return " at line " + std::to_string(pos.lines_read + 1) +
", column " + std::to_string(pos.chars_read_current_line);
}
};

/*!
Expand Down
2 changes: 1 addition & 1 deletion include/nlohmann/detail/input/json_sax.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ struct json_sax
@brief a parse error occurred
@param[in] position the position in the input where the error occurs
@param[in] last_token the last read token
@param[in] error_msg a detailed error message
@param[in] ex an exception object describing the error
@return whether parsing should proceed (must return false)
*/
virtual bool parse_error(std::size_t position,
Expand Down
202 changes: 191 additions & 11 deletions include/nlohmann/detail/input/lexer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

#include <nlohmann/detail/macro_scope.hpp>
#include <nlohmann/detail/input/input_adapters.hpp>
#include <nlohmann/detail/input/position_t.hpp>

namespace nlohmann
{
Expand Down Expand Up @@ -393,39 +394,194 @@ class lexer

// invalid control characters
case 0x00:
{
error_message = "invalid string: control character U+0000 (NUL) must be escaped to \\u0000";
return token_type::parse_error;
}

case 0x01:
{
error_message = "invalid string: control character U+0001 (SOH) must be escaped to \\u0001";
return token_type::parse_error;
}

case 0x02:
{
error_message = "invalid string: control character U+0002 (STX) must be escaped to \\u0002";
return token_type::parse_error;
}

case 0x03:
{
error_message = "invalid string: control character U+0003 (ETX) must be escaped to \\u0003";
return token_type::parse_error;
}

case 0x04:
{
error_message = "invalid string: control character U+0004 (EOT) must be escaped to \\u0004";
return token_type::parse_error;
}

case 0x05:
{
error_message = "invalid string: control character U+0005 (ENQ) must be escaped to \\u0005";
return token_type::parse_error;
}

case 0x06:
{
error_message = "invalid string: control character U+0006 (ACK) must be escaped to \\u0006";
return token_type::parse_error;
}

case 0x07:
{
error_message = "invalid string: control character U+0007 (BEL) must be escaped to \\u0007";
return token_type::parse_error;
}

case 0x08:
{
error_message = "invalid string: control character U+0008 (BS) must be escaped to \\u0008 or \\b";
return token_type::parse_error;
}

case 0x09:
{
error_message = "invalid string: control character U+0009 (HT) must be escaped to \\u0009 or \\t";
return token_type::parse_error;
}

case 0x0A:
{
error_message = "invalid string: control character U+000A (LF) must be escaped to \\u000A or \\n";
return token_type::parse_error;
}

case 0x0B:
{
error_message = "invalid string: control character U+000B (VT) must be escaped to \\u000B";
return token_type::parse_error;
}

case 0x0C:
{
error_message = "invalid string: control character U+000C (FF) must be escaped to \\u000C or \\f";
return token_type::parse_error;
}

case 0x0D:
{
error_message = "invalid string: control character U+000D (CR) must be escaped to \\u000D or \\r";
return token_type::parse_error;
}

case 0x0E:
{
error_message = "invalid string: control character U+000E (SO) must be escaped to \\u000E";
return token_type::parse_error;
}

case 0x0F:
{
error_message = "invalid string: control character U+000F (SI) must be escaped to \\u000F";
return token_type::parse_error;
}

case 0x10:
{
error_message = "invalid string: control character U+0010 (DLE) must be escaped to \\u0010";
return token_type::parse_error;
}

case 0x11:
{
error_message = "invalid string: control character U+0011 (DC1) must be escaped to \\u0011";
return token_type::parse_error;
}

case 0x12:
{
error_message = "invalid string: control character U+0012 (DC2) must be escaped to \\u0012";
return token_type::parse_error;
}

case 0x13:
{
error_message = "invalid string: control character U+0013 (DC3) must be escaped to \\u0013";
return token_type::parse_error;
}

case 0x14:
{
error_message = "invalid string: control character U+0014 (DC4) must be escaped to \\u0014";
return token_type::parse_error;
}

case 0x15:
{
error_message = "invalid string: control character U+0015 (NAK) must be escaped to \\u0015";
return token_type::parse_error;
}

case 0x16:
{
error_message = "invalid string: control character U+0016 (SYN) must be escaped to \\u0016";
return token_type::parse_error;
}

case 0x17:
{
error_message = "invalid string: control character U+0017 (ETB) must be escaped to \\u0017";
return token_type::parse_error;
}

case 0x18:
{
error_message = "invalid string: control character U+0018 (CAN) must be escaped to \\u0018";
return token_type::parse_error;
}

case 0x19:
{
error_message = "invalid string: control character U+0019 (EM) must be escaped to \\u0019";
return token_type::parse_error;
}

case 0x1A:
{
error_message = "invalid string: control character U+001A (SUB) must be escaped to \\u001A";
return token_type::parse_error;
}

case 0x1B:
{
error_message = "invalid string: control character U+001B (ESC) must be escaped to \\u001B";
return token_type::parse_error;
}

case 0x1C:
{
error_message = "invalid string: control character U+001C (FS) must be escaped to \\u001C";
return token_type::parse_error;
}

case 0x1D:
{
error_message = "invalid string: control character U+001D (GS) must be escaped to \\u001D";
return token_type::parse_error;
}

case 0x1E:
{
error_message = "invalid string: control character U+001E (RS) must be escaped to \\u001E";
return token_type::parse_error;
}

case 0x1F:
{
error_message = "invalid string: control character must be escaped";
error_message = "invalid string: control character U+001F (US) must be escaped to \\u001F";
return token_type::parse_error;
}

Expand Down Expand Up @@ -1082,7 +1238,9 @@ class lexer
*/
std::char_traits<char>::int_type get()
{
++chars_read;
++position.chars_read_total;
++position.chars_read_current_line;

if (next_unget)
{
// just reset the next_unget variable and work with current
Expand All @@ -1097,21 +1255,43 @@ class lexer
{
token_string.push_back(std::char_traits<char>::to_char_type(current));
}

if (current == '\n')
{
++position.lines_read;
++position.chars_read_current_line = 0;
}

return current;
}

/*!
@brief unget current character (read it again on next get)

We implement unget by setting variable next_unget to true. The input is not
changed - we just simulate ungetting by modifying chars_read and
token_string. The next call to get() will behave as if the unget character
is read again.
changed - we just simulate ungetting by modifying chars_read_total,
chars_read_current_line, and token_string. The next call to get() will
behave as if the unget character is read again.
*/
void unget()
{
next_unget = true;
--chars_read;

--position.chars_read_total;

// in case we "unget" a newline, we have to also decrement the lines_read
if (position.chars_read_current_line == 0)
{
if (position.lines_read > 0)
{
--position.lines_read;
}
}
else
{
--position.chars_read_current_line;
}

if (JSON_LIKELY(current != std::char_traits<char>::eof()))
{
assert(token_string.size() != 0);
Expand Down Expand Up @@ -1159,9 +1339,9 @@ class lexer
/////////////////////

/// return position of last read token
constexpr std::size_t get_position() const noexcept
constexpr position_t get_position() const noexcept
{
return chars_read;
return position;
}

/// return the last read token (for errors only). Will never contain EOF
Expand Down Expand Up @@ -1231,7 +1411,7 @@ class lexer
token_type scan()
{
// initially, skip the BOM
if (chars_read == 0 and not skip_bom())
if (position.chars_read_total == 0 and not skip_bom())
{
error_message = "invalid BOM; must be 0xEF 0xBB 0xBF if given";
return token_type::parse_error;
Expand Down Expand Up @@ -1309,8 +1489,8 @@ class lexer
/// whether the next get() call should just return current
bool next_unget = false;

/// the number of characters read
std::size_t chars_read = 0;
/// the start position of the current token
position_t position;

/// raw input token string (for error messages)
std::vector<char> token_string {};
Expand Down
Loading