Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

server: add http api to get some info of sub-optimal query #10717

Merged
merged 12 commits into from
Jun 24, 2019
42 changes: 20 additions & 22 deletions server/http_status.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,22 @@ func (s *Server) startStatusHTTP() {
go s.startHTTPServer()
}

func serveError(w http.ResponseWriter, status int, txt string) {
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
w.Header().Set("X-Go-Pprof", "1")
w.Header().Del("Content-Disposition")
w.WriteHeader(status)
_, err := fmt.Fprintln(w, txt)
terror.Log(err)
}

func sleepWithCtx(ctx context.Context, d time.Duration) {
select {
case <-time.After(d):
case <-ctx.Done():
}
}

func (s *Server) startHTTPServer() {
router := mux.NewRouter()

Expand Down Expand Up @@ -123,26 +139,6 @@ func (s *Server) startHTTPServer() {
serverMux.HandleFunc("/debug/pprof/symbol", pprof.Symbol)
serverMux.HandleFunc("/debug/pprof/trace", pprof.Trace)

serveError := func(w http.ResponseWriter, status int, txt string) {
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
w.Header().Set("X-Go-Pprof", "1")
w.Header().Del("Content-Disposition")
w.WriteHeader(status)
_, err := fmt.Fprintln(w, txt)
terror.Log(err)
}

sleep := func(w http.ResponseWriter, d time.Duration) {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

// Deprecated: the CloseNotifier interface predates Go's context package.
// New code should use Request.Context instead.

var clientGone <-chan bool
if cn, ok := w.(http.CloseNotifier); ok {
clientGone = cn.CloseNotify()
}
select {
case <-time.After(d):
case <-clientGone:
}
}

serverMux.HandleFunc("/debug/zip", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Disposition", fmt.Sprintf(`attachment; filename="tidb_debug"`+time.Now().Format("20060102150405")+".zip"))

Expand Down Expand Up @@ -191,7 +187,7 @@ func (s *Server) startHTTPServer() {
if sec <= 0 || err != nil {
sec = 10
}
sleep(w, time.Duration(sec)*time.Second)
sleepWithCtx(r.Context(), time.Duration(sec)*time.Second)
rpprof.StopCPUProfile()

// dump config
Expand Down Expand Up @@ -220,11 +216,13 @@ func (s *Server) startHTTPServer() {
err = zw.Close()
terror.Log(err)
})
fetcher := sqlInfoFetcher{store: tikvHandlerTool.Store}
serverMux.HandleFunc("/debug/sub-optimal-plan", fetcher.zipInfoForSQL)

var (
err error
httpRouterPage bytes.Buffer
pathTemplate string
err error
)
httpRouterPage.WriteString("<html><head><title>TiDB Status and Metrics Report</title></head><body><h1>TiDB Status and Metrics Report</h1><table>")
err = router.Walk(func(route *mux.Route, router *mux.Router, ancestors []*mux.Route) error {
Expand Down
308 changes: 308 additions & 0 deletions server/sql_info_fetcher.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,308 @@
// Copyright 2019 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.

package server

import (
"archive/zip"
"context"
"encoding/json"
"fmt"
"net/http"
"runtime/pprof"
"strconv"
"strings"
"time"

"github.com/pingcap/parser"
"github.com/pingcap/parser/ast"
"github.com/pingcap/parser/model"
"github.com/pingcap/parser/terror"
"github.com/pingcap/tidb/domain"
"github.com/pingcap/tidb/session"
"github.com/pingcap/tidb/statistics/handle"
"github.com/pingcap/tidb/store/tikv"
"github.com/pingcap/tidb/util/sqlexec"
"github.com/pingcap/tidb/util/testkit"
)

type sqlInfoFetcher struct {
store tikv.Storage
do *domain.Domain
s session.Session
}

type tableNamePair struct {
DBName string
TableName string
}

type tableNameExtractor struct {
curDB string
names map[tableNamePair]struct{}
}

func (tne *tableNameExtractor) Enter(in ast.Node) (ast.Node, bool) {
if _, ok := in.(*ast.TableName); ok {
return in, true
}
return in, false
}

func (tne *tableNameExtractor) Leave(in ast.Node) (ast.Node, bool) {
if t, ok := in.(*ast.TableName); ok {
tp := tableNamePair{DBName: t.Schema.L, TableName: t.Name.L}
if tp.DBName == "" {
tp.DBName = tne.curDB
}
if _, ok := tne.names[tp]; !ok {
tne.names[tp] = struct{}{}
}
}
return in, true
}

func (sh *sqlInfoFetcher) zipInfoForSQL(w http.ResponseWriter, r *http.Request) {
var err error
sh.s, err = session.CreateSession(sh.store)
if err != nil {
serveError(w, http.StatusInternalServerError, fmt.Sprintf("create session failed, err: %v", err))
return
}
defer sh.s.Close()
sh.do = domain.GetDomain(sh.s)
reqCtx := r.Context()
sql := r.FormValue("sql")
alivxxx marked this conversation as resolved.
Show resolved Hide resolved
pprofTimeString := r.FormValue("pprof_time")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how about profile_sec?

timeoutString := r.FormValue("timeout")
curDB := strings.ToLower(r.FormValue("current_db"))
if curDB != "" {
_, err = sh.s.Execute(reqCtx, fmt.Sprintf("use %v", curDB))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
_, err = sh.s.Execute(reqCtx, fmt.Sprintf("use %v", curDB))
_, err = sh.s.Execute(reqCtx, "use " + curDB)

if err != nil {
serveError(w, http.StatusInternalServerError, fmt.Sprintf("use database failed, err: %v", err))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
serveError(w, http.StatusInternalServerError, fmt.Sprintf("use database failed, err: %v", err))
serveError(w, http.StatusInternalServerError, fmt.Sprintf("use database %v failed: %v", curDB , err))

return
}
}
var (
pprofTime int
timeout int
)
if pprofTimeString != "" {
pprofTime, err = strconv.Atoi(pprofTimeString)
qw4990 marked this conversation as resolved.
Show resolved Hide resolved
}
if err != nil {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

check err in the above if statement

serveError(w, http.StatusBadRequest, "invalid value for pprof_time")
return
}
if pprofTimeString != "" && pprofTime < 5 {
serveError(w, http.StatusBadRequest, "pprof time is too short")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think a best practice for this kind of error message is:

  1. point out the invalid value
  2. point out the valid value range

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we return here if it's an invalid value?

}
if timeoutString != "" {
timeout, err = strconv.Atoi(timeoutString)
}
if err != nil {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto

serveError(w, http.StatusBadRequest, "invalid value for timeout")
return
}
if timeout < pprofTime {
timeout = pprofTime
}
pairs, err := sh.extractTableNames(sql, curDB)
if err != nil {
serveError(w, http.StatusBadRequest, fmt.Sprintf("invalid SQL text, err: %v", err))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how about "failed to extract table names, sql: %v"

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But the sql is something given by the user.

return
}
zw := zip.NewWriter(w)
defer func() {
terror.Log(zw.Close())
}()
for pair := range pairs {
jsonTbl, err := sh.getStatsForTable(pair)
if err != nil {
terror.Log(err)
continue
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we continue or just throw error out?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we return a message to indicate the failure of dumping stats for this table?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

write a file like dbname.tblname.stats.err?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

looks OK to me

}
statsFw, err := zw.Create(fmt.Sprintf("%v.%v.json", pair.DBName, pair.TableName))
if err != nil {
terror.Log(err)
continue
}
data, err := json.Marshal(jsonTbl)
if err != nil {
terror.Log(err)
continue
}
_, err = statsFw.Write(data)
if err != nil {
terror.Log(err)
continue
}
}
for pair := range pairs {
err = sh.getShowCreateTable(pair, zw)
if err != nil {
serveError(w, http.StatusInternalServerError, fmt.Sprintf("get schema for %v.%v failed", pair.DBName, pair.TableName))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it ok that we write error messages and zip formatted data to w at the same time?

Copy link
Member Author

@winoros winoros Jun 10, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In fact, it's not very ok. The following is the curl result when error arisen.

➜  tidb git:(http-fetch-sql-info) ✗ curl -X POST http://127.0.0.1:10080/debug/sub-optimal-plan -d "sql=select%20*%20from%20t&current_db=test"
execute SQL failed, err: [planner:1046]No database selected
PK
test.t.json��Aj�0���Z
                     ��i��� [iDe+X������"=C��a��[h�Ϋ?���� V4#��陉�6����Y��Ƣ�}��2
u�B��P�:��Ω��t�⧤���~���ܣ&�Ѭ��ɕ�A�E�t�wc�y�k�m�zl6��aw|�yC����H�؆"v�F�+����ן�7?j\���u�O��P� "f���Ӡa\�����T
XQtest.t.schema.txt*�tru
                        qqt�qUH(IP��RPH�LP(K,J�H,�02�Tpqus
                                                          �	Q�
                                                                  ����Tp�s��s�����wq�K:{8��ؖ��Y�&�(8���8�����I�y\���P(%��ip� "f
XQ
  test.t.json(%��ipCtest.t.schema.txtPKx�%

But the api debug/zip did the same thing. #9651
Or we may need to write to a BytesBuffer first then write to ResponseWriter

Copy link
Contributor

@qw4990 qw4990 Jun 10, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK... for simplifying, we can leave it as a further optimization and do it in another PR and please add a TODO here.

return
}
}
// If we don't catch profile. We just get a explain result.
if pprofTime == 0 {
recordSets, err := sh.s.(sqlexec.SQLExecutor).Execute(reqCtx, fmt.Sprintf("explain %s", sql))
if len(recordSets) > 0 {
defer terror.Call(recordSets[0].Close)
}
if err != nil {
serveError(w, http.StatusInternalServerError, fmt.Sprintf("execute SQL failed, err: %v", err))
return
}
sRows, err := testkit.ResultSetToStringSlice(reqCtx, sh.s, recordSets[0])
if err != nil {
terror.Log(err)
return
}
fw, err := zw.Create("explain.txt")
if err != nil {
terror.Log(err)
return
}
for _, row := range sRows {
fmt.Fprintf(fw, "%s\n", strings.Join(row, "\t"))
}
} else {
// Otherwise we catch a profile and run `EXPLAIN ANALYZE` result.
ctx, cancelFunc := context.WithCancel(reqCtx)
timer := time.NewTimer(time.Second * time.Duration(timeout))
resultChan := make(chan *explainAnalyzeResult)
go sh.getExplainAnalyze(ctx, sql, resultChan)
errChan := make(chan error)
go sh.catchCPUProfile(reqCtx, pprofTime, zw, errChan)
select {
case result := <-resultChan:
timer.Stop()
cancelFunc()
if result.err != nil {
serveError(w, http.StatusInternalServerError, fmt.Sprintf("explain analyze SQL failed, err: %v", err))
return
}
if len(result.rows) == 0 {
break
}
fw, err := zw.Create("explain_analyze.txt")
if err != nil {
terror.Log(err)
break
}
for _, row := range result.rows {
fmt.Fprintf(fw, "%s\n", strings.Join(row, "\t"))
}
case <-timer.C:
cancelFunc()
}
err = <-errChan
if err != nil {
serveError(w, http.StatusInternalServerError, fmt.Sprintf("catch cpu profile failed, error: %v", err))
return
}
}
}

type explainAnalyzeResult struct {
rows [][]string
err error
}

func (sh *sqlInfoFetcher) getExplainAnalyze(ctx context.Context, sql string, resultChan chan<- *explainAnalyzeResult) {
recordSets, err := sh.s.(sqlexec.SQLExecutor).Execute(ctx, fmt.Sprintf("explain analyze %s", sql))
if len(recordSets) > 0 {
defer terror.Call(recordSets[0].Close)
}
if err != nil {
resultChan <- &explainAnalyzeResult{err: err}
return
}
rows, err := testkit.ResultSetToStringSlice(ctx, sh.s, recordSets[0])
if err != nil {
terror.Log(err)
rows = nil
return
}
resultChan <- &explainAnalyzeResult{rows: rows}
}

func (sh *sqlInfoFetcher) catchCPUProfile(ctx context.Context, sec int, zw *zip.Writer, errChan chan<- error) {
// dump profile
fw, err := zw.Create("profile")
if err != nil {
errChan <- err
return
}
if err := pprof.StartCPUProfile(fw); err != nil {
errChan <- err
return
}
sleepWithCtx(ctx, time.Duration(sec)*time.Second)
pprof.StopCPUProfile()
errChan <- nil
}

func (sh *sqlInfoFetcher) getStatsForTable(pair tableNamePair) (*handle.JSONTable, error) {
is := sh.do.InfoSchema()
h := sh.do.StatsHandle()
tbl, err := is.TableByName(model.NewCIStr(pair.DBName), model.NewCIStr(pair.TableName))
if err != nil {
return nil, err
}
js, err := h.DumpStatsToJSON(pair.DBName, tbl.Meta(), nil)
return js, err
}

func (sh *sqlInfoFetcher) getShowCreateTable(pair tableNamePair, zw *zip.Writer) error {
recordSets, err := sh.s.(sqlexec.SQLExecutor).Execute(context.TODO(), fmt.Sprintf("show create table `%v`.`%v`", pair.DBName, pair.TableName))
if len(recordSets) > 0 {
defer terror.Call(recordSets[0].Close)
}
if err != nil {
return err
}
sRows, err := testkit.ResultSetToStringSlice(context.Background(), sh.s, recordSets[0])
if err != nil {
terror.Log(err)
return nil
}
fw, err := zw.Create(fmt.Sprintf("%v.%v.schema.txt", pair.DBName, pair.TableName))
if err != nil {
terror.Log(err)
return nil
}
for _, row := range sRows {
fmt.Fprintf(fw, "%s\n", strings.Join(row, "\t"))
}
return nil
}

func (sh *sqlInfoFetcher) extractTableNames(sql, curDB string) (map[tableNamePair]struct{}, error) {
p := parser.New()
charset, collation := sh.s.GetSessionVars().GetCharsetInfo()
stmts, _, err := p.Parse(sql, charset, collation)
if err != nil {
return nil, err
}
extractor := &tableNameExtractor{
curDB: curDB,
names: make(map[tableNamePair]struct{}),
}
for _, stmt := range stmts {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So we allow multiple statements? Will it cause problems if we use explain sql?

stmt.Accept(extractor)
}
return extractor.names, nil
}
Loading