From 1d464102b340e6656662231db1889eea67b4bec8 Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Mon, 24 Jun 2019 16:59:52 +0800 Subject: [PATCH] server: add http api to get some info of sub-optimal query (#10717) --- server/http_status.go | 42 +++-- server/sql_info_fetcher.go | 327 +++++++++++++++++++++++++++++++++++++ util/testkit/testkit.go | 25 ++- 3 files changed, 366 insertions(+), 28 deletions(-) create mode 100644 server/sql_info_fetcher.go diff --git a/server/http_status.go b/server/http_status.go index c7ca853b252de..2dff937958b5e 100644 --- a/server/http_status.go +++ b/server/http_status.go @@ -49,6 +49,22 @@ func (s *Server) startStatusHTTP() { go s.startHTTPServer() } +func serveError(w http.ResponseWriter, status int, txt string) { + w.Header().Set("Content-Type", "text/plain; charset=utf-8") + w.Header().Set("X-Go-Pprof", "1") + w.Header().Del("Content-Disposition") + w.WriteHeader(status) + _, err := fmt.Fprintln(w, txt) + terror.Log(err) +} + +func sleepWithCtx(ctx context.Context, d time.Duration) { + select { + case <-time.After(d): + case <-ctx.Done(): + } +} + func (s *Server) startHTTPServer() { router := mux.NewRouter() @@ -123,26 +139,6 @@ func (s *Server) startHTTPServer() { serverMux.HandleFunc("/debug/pprof/symbol", pprof.Symbol) serverMux.HandleFunc("/debug/pprof/trace", pprof.Trace) - serveError := func(w http.ResponseWriter, status int, txt string) { - w.Header().Set("Content-Type", "text/plain; charset=utf-8") - w.Header().Set("X-Go-Pprof", "1") - w.Header().Del("Content-Disposition") - w.WriteHeader(status) - _, err := fmt.Fprintln(w, txt) - terror.Log(err) - } - - sleep := func(w http.ResponseWriter, d time.Duration) { - var clientGone <-chan bool - if cn, ok := w.(http.CloseNotifier); ok { - clientGone = cn.CloseNotify() - } - select { - case <-time.After(d): - case <-clientGone: - } - } - serverMux.HandleFunc("/debug/zip", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Disposition", fmt.Sprintf(`attachment; filename="tidb_debug"`+time.Now().Format("20060102150405")+".zip")) @@ -191,7 +187,7 @@ func (s *Server) startHTTPServer() { if sec <= 0 || err != nil { sec = 10 } - sleep(w, time.Duration(sec)*time.Second) + sleepWithCtx(r.Context(), time.Duration(sec)*time.Second) rpprof.StopCPUProfile() // dump config @@ -220,11 +216,13 @@ func (s *Server) startHTTPServer() { err = zw.Close() terror.Log(err) }) + fetcher := sqlInfoFetcher{store: tikvHandlerTool.Store} + serverMux.HandleFunc("/debug/sub-optimal-plan", fetcher.zipInfoForSQL) var ( - err error httpRouterPage bytes.Buffer pathTemplate string + err error ) httpRouterPage.WriteString("TiDB Status and Metrics Report

TiDB Status and Metrics Report

") err = router.Walk(func(route *mux.Route, router *mux.Router, ancestors []*mux.Route) error { diff --git a/server/sql_info_fetcher.go b/server/sql_info_fetcher.go new file mode 100644 index 0000000000000..8f07a340c13d6 --- /dev/null +++ b/server/sql_info_fetcher.go @@ -0,0 +1,327 @@ +// Copyright 2019 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package server + +import ( + "archive/zip" + "context" + "encoding/json" + "fmt" + "net/http" + "runtime/pprof" + "strconv" + "strings" + "time" + + "github.com/pingcap/errors" + "github.com/pingcap/parser" + "github.com/pingcap/parser/ast" + "github.com/pingcap/parser/model" + "github.com/pingcap/parser/terror" + "github.com/pingcap/tidb/domain" + "github.com/pingcap/tidb/session" + "github.com/pingcap/tidb/statistics/handle" + "github.com/pingcap/tidb/store/tikv" + "github.com/pingcap/tidb/util/sqlexec" + "github.com/pingcap/tidb/util/testkit" +) + +type sqlInfoFetcher struct { + store tikv.Storage + do *domain.Domain + s session.Session +} + +type tableNamePair struct { + DBName string + TableName string +} + +type tableNameExtractor struct { + curDB string + names map[tableNamePair]struct{} +} + +func (tne *tableNameExtractor) Enter(in ast.Node) (ast.Node, bool) { + if _, ok := in.(*ast.TableName); ok { + return in, true + } + return in, false +} + +func (tne *tableNameExtractor) Leave(in ast.Node) (ast.Node, bool) { + if t, ok := in.(*ast.TableName); ok { + tp := tableNamePair{DBName: t.Schema.L, TableName: t.Name.L} + if tp.DBName == "" { + tp.DBName = tne.curDB + } + if _, ok := tne.names[tp]; !ok { + tne.names[tp] = struct{}{} + } + } + return in, true +} + +func (sh *sqlInfoFetcher) zipInfoForSQL(w http.ResponseWriter, r *http.Request) { + var err error + sh.s, err = session.CreateSession(sh.store) + if err != nil { + serveError(w, http.StatusInternalServerError, fmt.Sprintf("create session failed, err: %v", err)) + return + } + defer sh.s.Close() + sh.do = domain.GetDomain(sh.s) + reqCtx := r.Context() + sql := r.FormValue("sql") + pprofTimeString := r.FormValue("pprof_time") + timeoutString := r.FormValue("timeout") + curDB := strings.ToLower(r.FormValue("current_db")) + if curDB != "" { + _, err = sh.s.Execute(reqCtx, "use %v"+curDB) + if err != nil { + serveError(w, http.StatusInternalServerError, fmt.Sprintf("use database %v failed, err: %v", curDB, err)) + return + } + } + var ( + pprofTime int + timeout int + ) + if pprofTimeString != "" { + pprofTime, err = strconv.Atoi(pprofTimeString) + if err != nil { + serveError(w, http.StatusBadRequest, "invalid value for pprof_time, please input a int value larger than 5") + return + } + } + if pprofTimeString != "" && pprofTime < 5 { + serveError(w, http.StatusBadRequest, "pprof time is too short, please input a int value larger than 5") + } + if timeoutString != "" { + timeout, err = strconv.Atoi(timeoutString) + if err != nil { + serveError(w, http.StatusBadRequest, "invalid value for timeout") + return + } + } + if timeout < pprofTime { + timeout = pprofTime + } + pairs, err := sh.extractTableNames(sql, curDB) + if err != nil { + serveError(w, http.StatusBadRequest, fmt.Sprintf("invalid SQL text, err: %v", err)) + return + } + zw := zip.NewWriter(w) + defer func() { + terror.Log(zw.Close()) + }() + for pair := range pairs { + jsonTbl, err := sh.getStatsForTable(pair) + if err != nil { + err = sh.writeErrFile(zw, fmt.Sprintf("%v.%v.stats.err.txt", pair.DBName, pair.TableName), err) + terror.Log(err) + continue + } + statsFw, err := zw.Create(fmt.Sprintf("%v.%v.json", pair.DBName, pair.TableName)) + if err != nil { + terror.Log(err) + continue + } + data, err := json.Marshal(jsonTbl) + if err != nil { + err = sh.writeErrFile(zw, fmt.Sprintf("%v.%v.stats.err.txt", pair.DBName, pair.TableName), err) + terror.Log(err) + continue + } + _, err = statsFw.Write(data) + if err != nil { + err = sh.writeErrFile(zw, fmt.Sprintf("%v.%v.stats.err.txt", pair.DBName, pair.TableName), err) + terror.Log(err) + continue + } + } + for pair := range pairs { + err = sh.getShowCreateTable(pair, zw) + if err != nil { + err = sh.writeErrFile(zw, fmt.Sprintf("%v.%v.schema.err.txt", pair.DBName, pair.TableName), err) + terror.Log(err) + return + } + } + // If we don't catch profile. We just get a explain result. + if pprofTime == 0 { + recordSets, err := sh.s.(sqlexec.SQLExecutor).Execute(reqCtx, fmt.Sprintf("explain %s", sql)) + if len(recordSets) > 0 { + defer terror.Call(recordSets[0].Close) + } + if err != nil { + err = sh.writeErrFile(zw, "explain.err.txt", err) + terror.Log(err) + return + } + sRows, err := testkit.ResultSetToStringSlice(reqCtx, sh.s, recordSets[0]) + if err != nil { + err = sh.writeErrFile(zw, "explain.err.txt", err) + terror.Log(err) + return + } + fw, err := zw.Create("explain.txt") + if err != nil { + terror.Log(err) + return + } + for _, row := range sRows { + fmt.Fprintf(fw, "%s\n", strings.Join(row, "\t")) + } + } else { + // Otherwise we catch a profile and run `EXPLAIN ANALYZE` result. + ctx, cancelFunc := context.WithCancel(reqCtx) + timer := time.NewTimer(time.Second * time.Duration(timeout)) + resultChan := make(chan *explainAnalyzeResult) + go sh.getExplainAnalyze(ctx, sql, resultChan) + errChan := make(chan error) + go sh.catchCPUProfile(reqCtx, pprofTime, zw, errChan) + select { + case result := <-resultChan: + timer.Stop() + cancelFunc() + if result.err != nil { + err = sh.writeErrFile(zw, "explain_analyze.err.txt", result.err) + terror.Log(err) + return + } + if len(result.rows) == 0 { + break + } + fw, err := zw.Create("explain_analyze.txt") + if err != nil { + terror.Log(err) + break + } + for _, row := range result.rows { + fmt.Fprintf(fw, "%s\n", strings.Join(row, "\t")) + } + case <-timer.C: + cancelFunc() + } + err = <-errChan + if err != nil { + err = sh.writeErrFile(zw, "profile.err.txt", err) + terror.Log(err) + return + } + } +} + +func (sh *sqlInfoFetcher) writeErrFile(zw *zip.Writer, name string, err error) error { + fw, err1 := zw.Create(name) + if err1 != nil { + return err1 + } + fmt.Fprintf(fw, "error: %v", err) + return nil +} + +type explainAnalyzeResult struct { + rows [][]string + err error +} + +func (sh *sqlInfoFetcher) getExplainAnalyze(ctx context.Context, sql string, resultChan chan<- *explainAnalyzeResult) { + recordSets, err := sh.s.(sqlexec.SQLExecutor).Execute(ctx, fmt.Sprintf("explain analyze %s", sql)) + if len(recordSets) > 0 { + defer terror.Call(recordSets[0].Close) + } + if err != nil { + resultChan <- &explainAnalyzeResult{err: err} + return + } + rows, err := testkit.ResultSetToStringSlice(ctx, sh.s, recordSets[0]) + if err != nil { + terror.Log(err) + rows = nil + return + } + resultChan <- &explainAnalyzeResult{rows: rows} +} + +func (sh *sqlInfoFetcher) catchCPUProfile(ctx context.Context, sec int, zw *zip.Writer, errChan chan<- error) { + // dump profile + fw, err := zw.Create("profile") + if err != nil { + errChan <- err + return + } + if err := pprof.StartCPUProfile(fw); err != nil { + errChan <- err + return + } + sleepWithCtx(ctx, time.Duration(sec)*time.Second) + pprof.StopCPUProfile() + errChan <- nil +} + +func (sh *sqlInfoFetcher) getStatsForTable(pair tableNamePair) (*handle.JSONTable, error) { + is := sh.do.InfoSchema() + h := sh.do.StatsHandle() + tbl, err := is.TableByName(model.NewCIStr(pair.DBName), model.NewCIStr(pair.TableName)) + if err != nil { + return nil, err + } + js, err := h.DumpStatsToJSON(pair.DBName, tbl.Meta(), nil) + return js, err +} + +func (sh *sqlInfoFetcher) getShowCreateTable(pair tableNamePair, zw *zip.Writer) error { + recordSets, err := sh.s.(sqlexec.SQLExecutor).Execute(context.TODO(), fmt.Sprintf("show create table `%v`.`%v`", pair.DBName, pair.TableName)) + if len(recordSets) > 0 { + defer terror.Call(recordSets[0].Close) + } + if err != nil { + return err + } + sRows, err := testkit.ResultSetToStringSlice(context.Background(), sh.s, recordSets[0]) + if err != nil { + terror.Log(err) + return nil + } + fw, err := zw.Create(fmt.Sprintf("%v.%v.schema.txt", pair.DBName, pair.TableName)) + if err != nil { + terror.Log(err) + return nil + } + for _, row := range sRows { + fmt.Fprintf(fw, "%s\n", strings.Join(row, "\t")) + } + return nil +} + +func (sh *sqlInfoFetcher) extractTableNames(sql, curDB string) (map[tableNamePair]struct{}, error) { + p := parser.New() + charset, collation := sh.s.GetSessionVars().GetCharsetInfo() + stmts, _, err := p.Parse(sql, charset, collation) + if err != nil { + return nil, err + } + if len(stmts) > 1 { + return nil, errors.Errorf("Only 1 statement is allowed") + } + extractor := &tableNameExtractor{ + curDB: curDB, + names: make(map[tableNamePair]struct{}), + } + stmts[0].Accept(extractor) + return extractor.names, nil +} diff --git a/util/testkit/testkit.go b/util/testkit/testkit.go index 6b93d900409f8..b11748dce64af 100644 --- a/util/testkit/testkit.go +++ b/util/testkit/testkit.go @@ -236,12 +236,16 @@ func (tk *TestKit) ResultSetToResult(rs sqlexec.RecordSet, comment check.Comment return tk.ResultSetToResultWithCtx(context.Background(), rs, comment) } -// ResultSetToResultWithCtx converts sqlexec.RecordSet to testkit.Result. -func (tk *TestKit) ResultSetToResultWithCtx(ctx context.Context, rs sqlexec.RecordSet, comment check.CommentInterface) *Result { - rows, err := session.GetRows4Test(ctx, tk.Se, rs) - tk.c.Assert(errors.ErrorStack(err), check.Equals, "", comment) +// ResultSetToStringSlice changes the RecordSet to [][]string. +func ResultSetToStringSlice(ctx context.Context, s session.Session, rs sqlexec.RecordSet) ([][]string, error) { + rows, err := session.GetRows4Test(ctx, s, rs) + if err != nil { + return nil, err + } err = rs.Close() - tk.c.Assert(errors.ErrorStack(err), check.Equals, "", comment) + if err != nil { + return nil, err + } sRows := make([][]string, len(rows)) for i := range rows { row := rows[i] @@ -252,11 +256,20 @@ func (tk *TestKit) ResultSetToResultWithCtx(ctx context.Context, rs sqlexec.Reco } else { d := row.GetDatum(j, &rs.Fields()[j].Column.FieldType) iRow[j], err = d.ToString() - tk.c.Assert(err, check.IsNil) + if err != nil { + return nil, err + } } } sRows[i] = iRow } + return sRows, nil +} + +// ResultSetToResultWithCtx converts sqlexec.RecordSet to testkit.Result. +func (tk *TestKit) ResultSetToResultWithCtx(ctx context.Context, rs sqlexec.RecordSet, comment check.CommentInterface) *Result { + sRows, err := ResultSetToStringSlice(ctx, tk.Se, rs) + tk.c.Check(err, check.IsNil, comment) return &Result{rows: sRows, c: tk.c, comment: comment} }