From 048f8333c4f6f5ca1b5089bb51108c0d7c578541 Mon Sep 17 00:00:00 2001 From: Tommy Reilly Date: Mon, 21 Nov 2022 04:56:30 -0500 Subject: [PATCH] pgdate: optimize bulk date parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a "helper" to cache memory allocations to make repeated/bulk date parsing more efficient. ``` name old time/op new time/op delta ParseDate-10 1.76µs ±12% 1.49µs ± 6% -15.08% (p=0.000 n=9+10) name old alloc/op new alloc/op delta ParseDate-10 1.66kB ± 0% 0.00kB -100.00% (p=0.000 n=10+10) name old allocs/op new allocs/op delta ParseDate-10 8.00 ± 0% 0.00 -100.00% (p=0.000 n=10+10) ``` Fixes: #91834 Release note: None --- pkg/sql/colexec/colexecbase/cast.eg.go | 12 ++++-- .../execgen/cmd/execgen/cast_gen_util.go | 3 +- pkg/sql/sem/eval/context.go | 8 ++++ pkg/sql/sem/tree/datum.go | 17 +++++++- pkg/util/timeutil/pgdate/field_extract.go | 20 ++++++---- .../timeutil/pgdate/field_extract_test.go | 31 +++++++------- pkg/util/timeutil/pgdate/parsing.go | 40 +++++++++++++------ pkg/util/timeutil/pgdate/parsing_test.go | 40 +++++++++++++------ pkg/util/timeutil/pgdate/pgdate_test.go | 35 ++++++++-------- 9 files changed, 137 insertions(+), 69 deletions(-) diff --git a/pkg/sql/colexec/colexecbase/cast.eg.go b/pkg/sql/colexec/colexecbase/cast.eg.go index a55fa27fb763..c552f023bb09 100644 --- a/pkg/sql/colexec/colexecbase/cast.eg.go +++ b/pkg/sql/colexec/colexecbase/cast.eg.go @@ -9427,7 +9427,8 @@ func (c *castStringDateOp) Next() coldata.Batch { _now := evalCtx.GetRelativeParseTime() _dateStyle := evalCtx.GetDateStyle() - _d, _, err := pgdate.ParseDate(_now, _dateStyle, string(v)) + _ph := &evalCtx.ParseHelper + _d, _, err := pgdate.ParseDate(_now, _dateStyle, string(v), _ph) if err != nil { colexecerror.ExpectedError(err) } @@ -9457,7 +9458,8 @@ func (c *castStringDateOp) Next() coldata.Batch { _now := evalCtx.GetRelativeParseTime() _dateStyle := evalCtx.GetDateStyle() - _d, _, err := pgdate.ParseDate(_now, _dateStyle, string(v)) + _ph := &evalCtx.ParseHelper + _d, _, err := pgdate.ParseDate(_now, _dateStyle, string(v), _ph) if err != nil { colexecerror.ExpectedError(err) } @@ -9489,7 +9491,8 @@ func (c *castStringDateOp) Next() coldata.Batch { _now := evalCtx.GetRelativeParseTime() _dateStyle := evalCtx.GetDateStyle() - _d, _, err := pgdate.ParseDate(_now, _dateStyle, string(v)) + _ph := &evalCtx.ParseHelper + _d, _, err := pgdate.ParseDate(_now, _dateStyle, string(v), _ph) if err != nil { colexecerror.ExpectedError(err) } @@ -9519,7 +9522,8 @@ func (c *castStringDateOp) Next() coldata.Batch { _now := evalCtx.GetRelativeParseTime() _dateStyle := evalCtx.GetDateStyle() - _d, _, err := pgdate.ParseDate(_now, _dateStyle, string(v)) + _ph := &evalCtx.ParseHelper + _d, _, err := pgdate.ParseDate(_now, _dateStyle, string(v), _ph) if err != nil { colexecerror.ExpectedError(err) } diff --git a/pkg/sql/colexec/execgen/cmd/execgen/cast_gen_util.go b/pkg/sql/colexec/execgen/cmd/execgen/cast_gen_util.go index 5ff53e380432..0fb9fe0ff571 100644 --- a/pkg/sql/colexec/execgen/cmd/execgen/cast_gen_util.go +++ b/pkg/sql/colexec/execgen/cmd/execgen/cast_gen_util.go @@ -395,7 +395,8 @@ func stringToDate(to, from, evalCtx, _, _ string) string { convStr := ` _now := %[3]s.GetRelativeParseTime() _dateStyle := %[3]s.GetDateStyle() - _d, _, err := pgdate.ParseDate(_now, _dateStyle, string(%[2]s)) + _ph := &%[3]s.ParseHelper + _d, _, err := pgdate.ParseDate(_now, _dateStyle, string(%[2]s), _ph) if err != nil { colexecerror.ExpectedError(err) } diff --git a/pkg/sql/sem/eval/context.go b/pkg/sql/sem/eval/context.go index 725ab64eff44..2b91d0eaac97 100644 --- a/pkg/sql/sem/eval/context.go +++ b/pkg/sql/sem/eval/context.go @@ -244,6 +244,9 @@ type Context struct { // ChangefeedState stores the state (progress) of core changefeeds. ChangefeedState ChangefeedState + + // ParseHelper makes date parsing more efficient. + ParseHelper pgdate.ParseHelper } // DescIDGenerator generates unique descriptor IDs. @@ -564,6 +567,11 @@ func (ec *Context) GetRelativeParseTime() time.Time { return ret.In(ec.GetLocation()) } +// GetDateHelper implements ParseTimeContext. +func (ec *Context) GetDateHelper() *pgdate.ParseHelper { + return &ec.ParseHelper +} + // GetTxnTimestamp retrieves the current transaction timestamp as per // the evaluation context. The timestamp is guaranteed to be nonzero. func (ec *Context) GetTxnTimestamp(precision time.Duration) *tree.DTimestampTZ { diff --git a/pkg/sql/sem/tree/datum.go b/pkg/sql/sem/tree/datum.go index 1009fa932fdd..d73ebdf73878 100644 --- a/pkg/sql/sem/tree/datum.go +++ b/pkg/sql/sem/tree/datum.go @@ -2005,6 +2005,8 @@ type ParseTimeContext interface { GetIntervalStyle() duration.IntervalStyle // GetDateStyle returns the date style in the session. GetDateStyle() pgdate.DateStyle + // GetParseHelper returns a helper to optmize date parsing. + GetDateHelper() *pgdate.ParseHelper } var _ ParseTimeContext = &simpleParseTimeContext{} @@ -2037,6 +2039,7 @@ type simpleParseTimeContext struct { RelativeParseTime time.Time DateStyle pgdate.DateStyle IntervalStyle duration.IntervalStyle + dateHelper pgdate.ParseHelper } // GetRelativeParseTime implements ParseTimeContext. @@ -2054,6 +2057,11 @@ func (ctx simpleParseTimeContext) GetDateStyle() pgdate.DateStyle { return ctx.DateStyle } +// GetDateHelper implements ParseTimeContext. +func (ctx simpleParseTimeContext) GetDateHelper() *pgdate.ParseHelper { + return &ctx.dateHelper +} + // relativeParseTime chooses a reasonable "now" value for // performing date parsing. func relativeParseTime(ctx ParseTimeContext) time.Time { @@ -2077,6 +2085,13 @@ func intervalStyle(ctx ParseTimeContext) duration.IntervalStyle { return ctx.GetIntervalStyle() } +func dateParseHelper(ctx ParseTimeContext) *pgdate.ParseHelper { + if ctx == nil { + return nil + } + return ctx.GetDateHelper() +} + // ParseDDate parses and returns the *DDate Datum value represented by the provided // string in the provided location, or an error if parsing is unsuccessful. // @@ -2084,7 +2099,7 @@ func intervalStyle(ctx ParseTimeContext) duration.IntervalStyle { // ParseTimeContext (either for the time or the local timezone). func ParseDDate(ctx ParseTimeContext, s string) (_ *DDate, dependsOnContext bool, _ error) { now := relativeParseTime(ctx) - t, dependsOnContext, err := pgdate.ParseDate(now, dateStyle(ctx), s) + t, dependsOnContext, err := pgdate.ParseDate(now, dateStyle(ctx), s, dateParseHelper(ctx)) return NewDDate(t), dependsOnContext, err } diff --git a/pkg/util/timeutil/pgdate/field_extract.go b/pkg/util/timeutil/pgdate/field_extract.go index 3fe0afac37bb..289041620093 100644 --- a/pkg/util/timeutil/pgdate/field_extract.go +++ b/pkg/util/timeutil/pgdate/field_extract.go @@ -53,9 +53,6 @@ type fieldExtract struct { // Provides a time for evaluating relative dates as well as a // timezone. Should only be used via the now() and location() accessors. currentTime time.Time - // currentTimeUsed is set if we consulted currentTime (indicating if the - // result depends on the context). - currentTimeUsed bool // location is set to the timezone specified by the timestamp (if any). location *time.Location @@ -66,15 +63,22 @@ type fieldExtract struct { // Stores a reference to one of the sentinel values, to be returned // by the makeDateTime() functions sentinel *time.Time - // This indicates that the value in the year field was only - // two digits and should be adjusted to make it recent. - tweakYear bool // Tracks the sign of the timezone offset. We need to track // this separately from the sign of the tz1 value in case // we're trying to store a (nonsensical) value like -0030. tzSign int // Tracks the fields that we want to extract. wanted fieldSet + + textChunksScratch [fieldMaximum]stringChunk + numbersScratch [fieldMaximum]numberChunk + + // This indicates that the value in the year field was only + // two digits and should be adjusted to make it recent. + tweakYear bool + // currentTimeUsed is set if we consulted currentTime (indicating if the + // result depends on the context). + currentTimeUsed bool // Tracks whether the current timestamp is of db2 format. isDB2 bool } @@ -96,8 +100,8 @@ func (fe *fieldExtract) getLocation() *time.Location { // string into a collection of date/time fields in order to populate a // fieldExtract. func (fe *fieldExtract) Extract(s string) error { + textChunks := fe.textChunksScratch[:fieldMaximum] // Break the string into alphanumeric chunks. - textChunks := make([]stringChunk, fieldMaximum) count, _ := chunk(s, textChunks) if count < 0 { @@ -107,7 +111,7 @@ func (fe *fieldExtract) Extract(s string) error { } // Create a place to store extracted numeric info. - numbers := make([]numberChunk, 0, fieldMaximum) + numbers := fe.numbersScratch[:0] appendNumber := func(prefix, number string) error { v, err := strconv.Atoi(number) diff --git a/pkg/util/timeutil/pgdate/field_extract_test.go b/pkg/util/timeutil/pgdate/field_extract_test.go index ae31e6131945..0a616c23890f 100644 --- a/pkg/util/timeutil/pgdate/field_extract_test.go +++ b/pkg/util/timeutil/pgdate/field_extract_test.go @@ -21,6 +21,7 @@ import ( ) func TestExtractRelative(t *testing.T) { + var parseHelper ParseHelper tests := []struct { s string rel int @@ -42,20 +43,22 @@ func TestExtractRelative(t *testing.T) { now := time.Date(2018, 10, 17, 0, 0, 0, 0, time.UTC) for _, tc := range tests { t.Run(tc.s, func(t *testing.T) { - d, depOnCtx, err := ParseDate(now, DateStyle{Order: Order_YMD}, tc.s) - if err != nil { - t.Fatal(err) - } - if !depOnCtx { - t.Fatalf("relative dates should depend on context") - } - ts, err := d.ToTime() - if err != nil { - t.Fatal(err) - } - exp := now.AddDate(0, 0, tc.rel) - if ts != exp { - t.Fatalf("expected %v, got %v", exp, ts) + for _, ph := range []*ParseHelper{nil, &parseHelper} { + d, depOnCtx, err := ParseDate(now, DateStyle{Order: Order_YMD}, tc.s, ph) + if err != nil { + t.Fatal(err) + } + if !depOnCtx { + t.Fatalf("relative dates should depend on context") + } + ts, err := d.ToTime() + if err != nil { + t.Fatal(err) + } + exp := now.AddDate(0, 0, tc.rel) + if ts != exp { + t.Fatalf("expected %v, got %v", exp, ts) + } } }) } diff --git a/pkg/util/timeutil/pgdate/parsing.go b/pkg/util/timeutil/pgdate/parsing.go index 0a081b9f2555..0b10de4d30d8 100644 --- a/pkg/util/timeutil/pgdate/parsing.go +++ b/pkg/util/timeutil/pgdate/parsing.go @@ -82,6 +82,10 @@ var ( TimeNegativeInfinity = timeutil.Unix(-210866803200, 0) ) +type ParseHelper struct { + fe fieldExtract +} + // ParseDate converts a string into Date. // // Any specified timezone is inconsequential. Examples: @@ -91,10 +95,16 @@ var ( // // The dependsOnContext return value indicates if we had to consult the given // `now` value (either for the time or the local timezone). +// +// Memory allocations can be avoided by passing ParseHelper which can be re-used +// across calls for batch parsing purposes, otherwise it can be nil. func ParseDate( - now time.Time, dateStyle DateStyle, s string, + now time.Time, dateStyle DateStyle, s string, h *ParseHelper, ) (_ Date, dependsOnContext bool, _ error) { - fe := fieldExtract{ + if h == nil { + h = &ParseHelper{} + } + h.fe = fieldExtract{ currentTime: now, dateStyle: dateStyle, required: dateRequiredFields, @@ -104,42 +114,48 @@ func ParseDate( wanted: dateTimeFields, } - if err := fe.Extract(s); err != nil { + if err := h.fe.Extract(s); err != nil { return Date{}, false, parseError(err, "date", s) } - date, err := fe.MakeDate() - return date, fe.currentTimeUsed, err + date, err := h.fe.MakeDate() + return date, h.fe.currentTimeUsed, err } // ParseTime converts a string into a time value on the epoch day. // // The dependsOnContext return value indicates if we had to consult the given // `now` value (either for the time or the local timezone). +// +// Memory allocations can be avoided by passing ParseHelper which can be re-used +// across calls for batch parsing purposes, otherwise it can be nil. func ParseTime( - now time.Time, dateStyle DateStyle, s string, + now time.Time, dateStyle DateStyle, s string, h *ParseHelper, ) (_ time.Time, dependsOnContext bool, _ error) { - fe := fieldExtract{ + if h == nil { + h = &ParseHelper{} + } + h.fe = fieldExtract{ currentTime: now, required: timeRequiredFields, wanted: timeFields, } - if err := fe.Extract(s); err != nil { + if err := h.fe.Extract(s); err != nil { // It's possible that the user has given us a complete // timestamp string; let's try again, accepting more fields. - fe = fieldExtract{ + h.fe = fieldExtract{ currentTime: now, dateStyle: dateStyle, required: timeRequiredFields, wanted: dateTimeFields, } - if err := fe.Extract(s); err != nil { + if err := h.fe.Extract(s); err != nil { return TimeEpoch, false, parseError(err, "time", s) } } - res := fe.MakeTime() - return res, fe.currentTimeUsed, nil + res := h.fe.MakeTime() + return res, h.fe.currentTimeUsed, nil } // ParseTimeWithoutTimezone converts a string into a time value on the epoch diff --git a/pkg/util/timeutil/pgdate/parsing_test.go b/pkg/util/timeutil/pgdate/parsing_test.go index db7347bfb717..0a5c20b36ff0 100644 --- a/pkg/util/timeutil/pgdate/parsing_test.go +++ b/pkg/util/timeutil/pgdate/parsing_test.go @@ -116,10 +116,12 @@ func (td timeData) expected(order pgdate.Order) (time.Time, bool) { return td.exp, td.err } -func (td timeData) testParseDate(t *testing.T, info string, order pgdate.Order) { +func (td timeData) testParseDate( + t *testing.T, info string, order pgdate.Order, ph *pgdate.ParseHelper, +) { info = fmt.Sprintf("%s ParseDate", info) exp, expErr := td.expected(order) - dt, _, err := pgdate.ParseDate(time.Time{}, pgdate.DateStyle{Order: order}, td.s) + dt, _, err := pgdate.ParseDate(time.Time{}, pgdate.DateStyle{Order: order}, td.s, ph) res, _ := dt.ToTime() // HACK: This is a format that parses as a date and timestamp, @@ -138,15 +140,17 @@ func (td timeData) testParseDate(t *testing.T, info string, order pgdate.Order) td.crossCheck(t, info, "date", td.s, order, exp, expErr) } -func (td timeData) testParseTime(t *testing.T, info string, order pgdate.Order) { +func (td timeData) testParseTime( + t *testing.T, info string, order pgdate.Order, ph *pgdate.ParseHelper, +) { info = fmt.Sprintf("%s ParseTime", info) exp, expErr := td.expected(order) - res, _, err := pgdate.ParseTime(time.Time{}, pgdate.DateStyle{Order: order}, td.s) + res, _, err := pgdate.ParseTime(time.Time{}, pgdate.DateStyle{Order: order}, td.s, ph) // Weird times like 24:00:00 or 23:59:60 aren't allowed, // unless there's also a date. if td.isRolloverTime { - _, _, err := pgdate.ParseDate(time.Time{}, pgdate.DateStyle{Order: order}, td.s) + _, _, err := pgdate.ParseDate(time.Time{}, pgdate.DateStyle{Order: order}, td.s, ph) expErr = err != nil } @@ -761,6 +765,7 @@ func TestMain(m *testing.M) { // * Pick an example time input: // - Test ParseTime() func TestParse(t *testing.T) { + var ph pgdate.ParseHelper for _, order := range []pgdate.Order{ pgdate.Order_YMD, pgdate.Order_DMY, @@ -768,14 +773,17 @@ func TestParse(t *testing.T) { } { t.Run(order.String(), func(t *testing.T) { for _, dtc := range dateTestData { - dtc.testParseDate(t, dtc.s, order) + dtc.testParseDate(t, dtc.s, order, nil) + dtc.testParseDate(t, dtc.s, order, &ph) // Combine times with dates to create timestamps. for _, ttc := range timeTestData { info := fmt.Sprintf("%s %s", dtc.s, ttc.s) tstc := dtc.concatTime(ttc) - tstc.testParseDate(t, info, order) - tstc.testParseTime(t, info, order) + tstc.testParseDate(t, info, order, nil) + tstc.testParseDate(t, info, order, &ph) + tstc.testParseTime(t, info, order, nil) + tstc.testParseTime(t, info, order, &ph) tstc.testParseTimestamp(t, info, order) tstc.testParseTimestampWithoutTimezone(t, info, order) } @@ -784,14 +792,16 @@ func TestParse(t *testing.T) { // Test some other timestamps formats we can't create // by just concatenating a date + time string. for _, ttc := range timestampTestData { - ttc.testParseTime(t, ttc.s, order) + ttc.testParseTime(t, ttc.s, order, nil) + ttc.testParseTime(t, ttc.s, order, &ph) } }) } t.Run("ParseTime", func(t *testing.T) { for _, ttc := range timeTestData { - ttc.testParseTime(t, ttc.s, 0 /* order */) + ttc.testParseTime(t, ttc.s, 0 /* order */, nil) + ttc.testParseTime(t, ttc.s, 0 /* order */, &ph) } }) } @@ -1040,6 +1050,7 @@ func TestDependsOnContext(t *testing.T) { now := time.Date(2001, time.February, 3, 4, 5, 6, 1000, time.FixedZone("foo", 18000)) order := pgdate.Order_YMD + var ph pgdate.ParseHelper for _, tc := range testCases { t.Run(tc.s, func(t *testing.T) { toStr := func(result interface{}, depOnCtx bool, err error) string { @@ -1058,8 +1069,10 @@ func TestDependsOnContext(t *testing.T) { t.Errorf("%s: expected '%s', got '%s'", what, expected, actual) } } - check("ParseDate", tc.date, toStr(pgdate.ParseDate(now, pgdate.DateStyle{Order: order}, tc.s))) - check("ParseTime", tc.time, toStr(pgdate.ParseTime(now, pgdate.DateStyle{Order: order}, tc.s))) + check("ParseDate", tc.date, toStr(pgdate.ParseDate(now, pgdate.DateStyle{Order: order}, tc.s, nil))) + check("ParseDate", tc.date, toStr(pgdate.ParseDate(now, pgdate.DateStyle{Order: order}, tc.s, &ph))) + check("ParseTime", tc.time, toStr(pgdate.ParseTime(now, pgdate.DateStyle{Order: order}, tc.s, nil))) + check("ParseTime", tc.time, toStr(pgdate.ParseTime(now, pgdate.DateStyle{Order: order}, tc.s, &ph))) check( "ParseTimeWithoutTimezone", tc.timeNoTZ, toStr(pgdate.ParseTimeWithoutTimezone(now, pgdate.DateStyle{Order: order}, tc.s)), @@ -1084,11 +1097,12 @@ var benchDates = [...]string{ } func BenchmarkParseDate(b *testing.B) { + var ph pgdate.ParseHelper now := timeutil.Now() ds := pgdate.DefaultDateStyle() for i := 0; i < b.N; i++ { for _, str := range benchDates { - _, _, err := pgdate.ParseDate(now, ds, str) + _, _, err := pgdate.ParseDate(now, ds, str, &ph) if err != nil { b.Fatal(err) } diff --git a/pkg/util/timeutil/pgdate/pgdate_test.go b/pkg/util/timeutil/pgdate/pgdate_test.go index 5147eb68f8a3..3fbee8107288 100644 --- a/pkg/util/timeutil/pgdate/pgdate_test.go +++ b/pkg/util/timeutil/pgdate/pgdate_test.go @@ -21,6 +21,7 @@ import ( ) func TestParseDate(t *testing.T) { + var parseHelper ParseHelper for _, tc := range []struct { s string err string @@ -72,23 +73,25 @@ func TestParseDate(t *testing.T) { }, } { t.Run(tc.s, func(t *testing.T) { - d, depOnCtx, err := ParseDate(time.Time{}, DateStyle{Order: Order_YMD}, tc.s) - if tc.err != "" { - if err == nil || !strings.Contains(err.Error(), tc.err) { - t.Fatalf("got %v, expected %v", err, tc.err) + for _, ph := range []*ParseHelper{nil, &parseHelper} { + d, depOnCtx, err := ParseDate(time.Time{}, DateStyle{Order: Order_YMD}, tc.s, ph) + if tc.err != "" { + if err == nil || !strings.Contains(err.Error(), tc.err) { + t.Fatalf("got %v, expected %v", err, tc.err) + } + return + } + if depOnCtx { + t.Fatalf("should not depend on context") + } + pg := d.PGEpochDays() + if pg != tc.pgdays { + t.Fatalf("%d != %d", pg, tc.pgdays) + } + s := d.String() + if s != tc.s { + t.Fatalf("%s != %s", s, tc.s) } - return - } - if depOnCtx { - t.Fatalf("should not depend on context") - } - pg := d.PGEpochDays() - if pg != tc.pgdays { - t.Fatalf("%d != %d", pg, tc.pgdays) - } - s := d.String() - if s != tc.s { - t.Fatalf("%s != %s", s, tc.s) } }) }