Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Speed up parsejson 3.25x (with a gcc-10.2 PGO build) on number heavy input #16055

Closed
wants to merge 16 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@
literals remain in the "raw" string form so that client code can easily treat
small and large numbers uniformly.

- The parsejson module now combines number validation & parsing to net a speed
up of over 2x on number heavy inputs. It also allows *not* retaining origin
strings for integers & floats for another 1.5x speed up (over 3X overall).
A couple convenience iterators were also added.

- Added `randState` template that exposes the default random number generator.
Useful for library authors.

Expand Down
18 changes: 4 additions & 14 deletions lib/pure/json.nim
Original file line number Diff line number Diff line change
Expand Up @@ -809,22 +809,12 @@ proc parseJson(p: var JsonParser; rawIntegers, rawFloats: bool): JsonNode =
p.a = ""
discard getTok(p)
of tkInt:
if rawIntegers:
result = newJRawNumber(p.a)
else:
try:
result = newJInt(parseBiggestInt(p.a))
except ValueError:
result = newJRawNumber(p.a)
result = if rawIntegers or p.isGiant: newJRawNumber(p.a)
else: newJInt(p.getInt)
discard getTok(p)
of tkFloat:
if rawFloats:
result = newJRawNumber(p.a)
else:
try:
result = newJFloat(parseFloat(p.a))
except ValueError:
result = newJRawNumber(p.a)
result = if rawFloats or p.isGiant: newJRawNumber(p.a)
else: newJFloat(p.getFloat)
discard getTok(p)
of tkTrue:
result = newJBool(true)
Expand Down
172 changes: 126 additions & 46 deletions lib/pure/parsejson.nim
Original file line number Diff line number Diff line change
Expand Up @@ -63,13 +63,16 @@ type
stateExpectObjectComma, stateExpectColon, stateExpectValue

JsonParser* = object of BaseLexer ## the parser object.
a*: string
tok*: TokKind
a*: string ## last valid string
i: int64 # last valid integer
f: float # last valid float
giant: bool # true if tkInt or tkFloat overflow native bounds
tok*: TokKind ## current token kind
kind: JsonEventKind
err: JsonError
state: seq[ParserState]
filename: string
rawStringLiterals: bool
rawStringLiterals, strIntegers, strFloats: bool

JsonKindError* = object of ValueError ## raised by the ``to`` macro if the
## JSON kind is incorrect.
Expand Down Expand Up @@ -102,17 +105,25 @@ const
]

proc open*(my: var JsonParser, input: Stream, filename: string;
rawStringLiterals = false) =
rawStringLiterals = false, strIntegers = true, strFloats = true) =
## initializes the parser with an input stream. `Filename` is only used
## for nice error messages. If `rawStringLiterals` is true, string literals
## are kept with their surrounding quotes and escape sequences in them are
## left untouched too.
## left untouched too. If `strIntegers` is true, the `a` field is set to the
## substring used to build the integer `i`. If `strFloats` is true, the `a`
## field is set to the substring used to build the float `f`. These are
## distinct from `rawFloats` & `rawIntegers` in `json` module, but must be
## true for those raw* forms to work correctly. Parsing is about 1.5x faster
## with all 3 flags false vs. all true, but false `str` defaults are needed
## for backward compatibility.
lexbase.open(my, input)
my.filename = filename
my.state = @[stateStart]
my.kind = jsonError
my.a = ""
my.rawStringLiterals = rawStringLiterals
my.strIntegers = strIntegers
my.strFloats = strFloats

proc close*(my: var JsonParser) {.inline.} =
## closes the parser `my` and its associated input stream.
Expand All @@ -124,15 +135,20 @@ proc str*(my: JsonParser): string {.inline.} =
assert(my.kind in {jsonInt, jsonFloat, jsonString})
return my.a

proc isGiant*(my: JsonParser): bool {.inline.} =
## returns whether the last ``tkInt|tkFloat`` token was not CPU native
assert(my.tok in {tkInt, tkFloat})
return my.giant

proc getInt*(my: JsonParser): BiggestInt {.inline.} =
## returns the number for the event: ``jsonInt``
assert(my.kind == jsonInt)
return parseBiggestInt(my.a)
## returns the number for the last ``tkInt`` token as a ``BiggestInt``
assert(my.tok == tkInt)
return cast[BiggestInt](my.i) # A no-op unless BiggestInt changes

proc getFloat*(my: JsonParser): float {.inline.} =
## returns the number for the event: ``jsonFloat``
assert(my.kind == jsonFloat)
return parseFloat(my.a)
## returns the number for the last ``tkFloat`` token as a ``float``.
assert(my.tok == tkFloat)
return my.f

proc kind*(my: JsonParser): JsonEventKind {.inline.} =
## returns the current event type for the JSON parser
Expand Down Expand Up @@ -317,35 +333,102 @@ proc skip(my: var JsonParser) =
break
my.bufpos = pos

proc parseNumber(my: var JsonParser) =
var pos = my.bufpos
if my.buf[pos] == '-':
add(my.a, '-')
inc(pos)
if my.buf[pos] == '.':
add(my.a, "0.")
inc(pos)
template jsOrVmBlock(caseJsOrVm, caseElse: untyped): untyped =
when nimvm:
block:
caseJsOrVm
else:
while my.buf[pos] in Digits:
add(my.a, my.buf[pos])
inc(pos)
if my.buf[pos] == '.':
add(my.a, '.')
inc(pos)
# digits after the dot:
while my.buf[pos] in Digits:
add(my.a, my.buf[pos])
inc(pos)
if my.buf[pos] in {'E', 'e'}:
add(my.a, my.buf[pos])
inc(pos)
if my.buf[pos] in {'+', '-'}:
add(my.a, my.buf[pos])
inc(pos)
while my.buf[pos] in Digits:
add(my.a, my.buf[pos])
inc(pos)
my.bufpos = pos
block:
when defined(js) or defined(nimscript):
# nimscript has to be here to avoid semantic checking of caseElse
caseJsOrVm
else:
caseElse

template doCopy(a, b, start, endp1: untyped): untyped =
jsOrVmBlock:
a = b[start ..< endp1]
do:
let n = endp1 - start
if n > 0:
a.setLen n
copyMem a[0].addr, b[start].addr, n

proc i64(c: char): int64 {.inline.} = int64(ord(c) - ord('0'))

proc pow10(e: int64): float {.inline.} =
const p10 = [1e-22, 1e-21, 1e-20, 1e-19, 1e-18, 1e-17, 1e-16, 1e-15, 1e-14,
1e-13, 1e-12, 1e-11, 1e-10, 1e-09, 1e-08, 1e-07, 1e-06, 1e-05,
1e-4, 1e-3, 1e-2, 1e-1, 1.0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7,
1e8, 1e9] # 4*64B cache lines = 32 slots
if -22 <= e and e <= 9:
return p10[e + 22] # common case=small table lookup
result = 1.0
var base = 10.0
var e = e
if e < 0:
e = -e
base = 0.1
while e != 0:
if (e and 1) != 0:
result *= base
e = e shr 1
base *= base

proc parseNumber(my: var JsonParser): TokKind {.inline.} =
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is not the proc kinda too big to be {.inline.} ?. 🤔

Copy link
Contributor Author

@c-blake c-blake Nov 20, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No. In the by far most common case it is just one line (two almost always perfectly predicted compare-jumps and one array reference).

It is only used just once, for human mental abstraction. So, there is no duplicated generated code.

Also, people are too sparing of {.inline.} in my view. Function call overhead is like 12..15 cycles * 3..6way superscalar =~ 36..90 dynamic instruction slots. So, if duplication is not a concern, that is the the rough amount of work to compare to.

If you were really going to browbeat me over inline then you should complaining about parseNumber being inline, not pow10. ;-) {EDIT: oh, wait, that's what you were doing! Sorry! } But many numbers to parse might be just single-digits which take only a few cycles. In fact, the more numbers you have the more likely many are single digits. So, the function call overhead would then be quite large compared to the work done.

Copy link
Contributor Author

@c-blake c-blake Nov 20, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another thing inline buys you here is putting parseNumber C code in the same translation unit as the parsing itself. So, the C compilers metrics/heuristics for inlining can at least be applied. Otherwise you rely on LTO which a lot of people do not enable. Given that this whole bit of work was to get really fast speed, it seems appropriate for that reason as well as the "many single digit" cases. If you do PGO builds the compiler will actually try to measure if inlining is good or not. According to the GCC guys that is the main benefit of PGO. I am always getting 1.5x to 2.0x speed ups from PGO with Nim which suggests to me that our {.inline.} choices are not generally so great (or at least a few critical choices are not...).

Copy link
Contributor Author

@c-blake c-blake Nov 20, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, and also parseNumber is also only called in one place in getTok. So, still no extra L1 I-cache style resource hit, and I also added {.inline.} on getTok to get the meat of all this logic into the translation unit of whatever is using the json parser.

Copy link
Contributor Author

@c-blake c-blake Nov 20, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, Nim's {.inline.} just marks it in the generated C as inline. So, it's still up to C/C++ compiler code metrics what ultimately happens. Those surely do take into account the statically observable number of expansions/substitutions and their sizes. So, the many calls to getTok from json are not a real problem. But if anyone really wants me to remove them I can. I always use (and recommend using) PGO when performance matters, myself.

# Parse/validate/classify all at once, both setting & returning token kind
# and, if not `tkError`, leaving the binary numeric answer in `my.[if]`.
const Sign = {'+', '-'} # NOTE: `parseFloat` can generalize this to INF/NAN.
var i = my.bufpos # NUL ('\0') terminated
var noDot = false
var exp = 0'i64
var p10 = 0
var pnt = -1 # find '.' (point); do digits
var nD = 0
my.giant = false
my.i = 0'i64 # build my.i up from zero..
if my.buf[i] in Sign:
i.inc # skip optional sign
while my.buf[i] != '\0': # ..and track scale/pow10.
if my.buf[i] notin Digits:
if my.buf[i] != '.' or pnt >= 0:
break # a second '.' is forbidden
pnt = nD # save location of '.' (point)
nD.dec # undo loop's nD.inc
elif nD < 18: # 2**63==9.2e18 => 18 digits ok
my.i = 10 * my.i + my.buf[i].i64 # core ASCII->binary transform
else: # 20+ digits before decimal
my.giant = true #XXX condition should be more precise than "18 digits"
p10.inc # any digit moves implicit '.'
i.inc
nD.inc
if my.buf[my.bufpos] == '-':
my.i = -my.i # adjust sign
if pnt < 0: # never saw '.'
pnt = nD; noDot = true # so set to number of digits
elif nD == 1:
return tkError # ONLY "[+-]*\.*"
if my.buf[i] in {'E', 'e'}: # optional exponent
i.inc
let i0 = i
if my.buf[i] in Sign:
i.inc # skip optional sign
while my.buf[i] in Digits: # build exponent
exp = 10 * exp + my.buf[i].i64
i.inc
if my.buf[i0] == '-':
exp = -exp # adjust sign
elif noDot: # and my.i < (1'i64 shl 53'i64) ? # No '.' & No [Ee]xponent
my.bufpos = i
if my.strIntegers or my.giant:
doCopy(my.a, my.buf, my.bufpos, i)
return tkInt # mark as integer
exp += pnt - nD + p10 # combine explicit&implicit exp
my.f = my.i.float * pow10(exp) # has round-off vs. 80-bit
if my.strFloats or my.giant:
doCopy(my.a, my.buf, my.bufpos, i)
my.bufpos = i
return tkFloat # mark as float

proc parseName(my: var JsonParser) =
var pos = my.bufpos
Expand All @@ -355,17 +438,13 @@ proc parseName(my: var JsonParser) =
inc(pos)
my.bufpos = pos

proc getTok*(my: var JsonParser): TokKind =
setLen(my.a, 0)
proc getTok*(my: var JsonParser): TokKind {.inline.} =
skip(my) # skip whitespace, comments
case my.buf[my.bufpos]
of '-', '.', '0'..'9':
parseNumber(my)
if {'.', 'e', 'E'} in my.a:
result = tkFloat
else:
result = tkInt
result = parseNumber(my)
of '"':
setLen(my.a, 0)
result = parseString(my)
of '[':
inc(my.bufpos)
Expand All @@ -388,6 +467,7 @@ proc getTok*(my: var JsonParser): TokKind =
of '\0':
result = tkEof
of 'a'..'z', 'A'..'Z', '_':
setLen(my.a, 0)
parseName(my)
case my.a
of "null": result = tkNull
Expand Down
9 changes: 5 additions & 4 deletions tests/stdlib/tjsonutils.nim
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ discard """

import std/jsonutils
import std/json
import std/math

proc testRoundtrip[T](t: T, expected: string) =
let j = t.toJson
Expand Down Expand Up @@ -238,7 +239,7 @@ template fn() =
doAssert not foo.b
doAssert foo.f == 0.0
doAssert foo.c == 1
doAssert foo.c1 == 3.14159
doAssert almostEqual(foo.c1, 3.14159, 1)

block testExceptionOnWrongDiscirminatBranchInJson:
var foo = Foo(b: false, f: 3.14159, c: 0, c0: 42)
Expand All @@ -247,7 +248,7 @@ template fn() =
fromJson(foo, json, Joptions(allowMissingKeys: true))
# Test that the original fields are not reset.
doAssert not foo.b
doAssert foo.f == 3.14159
doAssert almostEqual(foo.f, 3.14159, 1)
doAssert foo.c == 0
doAssert foo.c0 == 42

Expand All @@ -258,7 +259,7 @@ template fn() =
doAssert not foo.b
doAssert foo.f == 2.71828
doAssert foo.c == 1
doAssert foo.c1 == 3.14159
doAssert almostEqual(foo.c1, 3.14159, 1)

block testAllowExtraKeysInJsonOnWrongDisciriminatBranch:
var foo = Foo(b: false, f: 3.14159, c: 0, c0: 42)
Expand All @@ -267,7 +268,7 @@ template fn() =
allowExtraKeys: true))
# Test that the original fields are not reset.
doAssert not foo.b
doAssert foo.f == 3.14159
doAssert almostEqual(foo.f, 3.14159, 1)
doAssert foo.c == 0
doAssert foo.c0 == 42

Expand Down