Skip to content

Commit

Permalink
Add options to support Profile Guided Optimization
Browse files Browse the repository at this point in the history
  • Loading branch information
solotzg committed Jun 30, 2022
1 parent 5b61ae7 commit b35b586
Show file tree
Hide file tree
Showing 6 changed files with 285 additions and 3 deletions.
52 changes: 52 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,48 @@ if (COMPILER_CLANG)
endif ()
endif ()

option (ENABLE_LLVM_PROFILE_INSTR "Generate instrumented code to collect execution counts" OFF)
option (ENABLE_LLVM_PGO "Enables flags for Profile Guided Optimization (PGO)" OFF)
option (ENABLE_LLVM_PGO_USE_SAMPLE "Enables flags for Profile Guided Optimization (PGO) and use sampling profilers" OFF)
set (USE_LLVM_FDO OFF CACHE BOOL "" FORCE)

if (ENABLE_LLVM_PGO)
if (ENABLE_LLVM_PROFILE_INSTR)
message (FATAL_ERROR "`ENABLE_LLVM_PROFILE_INSTR` can not be used with `ENABLE_LLVM_PGO`")
endif ()
if (ENABLE_LLVM_PGO_USE_SAMPLE)

# Follow https://clang.llvm.org/docs/UsersManual.html#using-sampling-profilers
# Use https://github.com/google/autofdo

set (_LLVM_PGO_USE_SAMPLE_FLAGS "-gline-tables-only -fdebug-info-for-profiling -funique-internal-linkage-names")

set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${_LLVM_PGO_USE_SAMPLE_FLAGS}")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${_LLVM_PGO_USE_SAMPLE_FLAGS}")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-rosegment")
message (STATUS "Add flags `${_LLVM_PGO_USE_SAMPLE_FLAGS}` for profiling")

if (NOT "$ENV{TIFLASH_LLVM_PROFDATA}" STREQUAL "")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fprofile-sample-use=$ENV{TIFLASH_LLVM_PROFDATA}")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fprofile-sample-use=$ENV{TIFLASH_LLVM_PROFDATA}")
message (STATUS "Use sample profile data `$ENV{TIFLASH_LLVM_PROFDATA}` for profile-guided optimization")
set (USE_LLVM_FDO ON CACHE BOOL "" FORCE)
else ()
message (STATUS "NOT use sample profile data")
endif ()

unset (_LLVM_PGO_USE_SAMPLE_FLAGS)
else ()
if ("$ENV{TIFLASH_LLVM_PROFDATA}" STREQUAL "")
message (FATAL_ERROR "Please set env var `TIFLASH_LLVM_PROFDATA`")
endif ()

set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fprofile-instr-use=$ENV{TIFLASH_LLVM_PROFDATA} -Wno-profile-instr-unprofiled")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fprofile-instr-use=$ENV{TIFLASH_LLVM_PROFDATA} -Wno-profile-instr-unprofiled")
message (STATUS "Use instrumentation data `$ENV{TIFLASH_LLVM_PROFDATA}` for profile-guided optimization")
endif ()
endif ()

if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
# clang: warning: argument unused during compilation: '-stdlib=libc++'
# clang: warning: argument unused during compilation: '-specs=/usr/share/dpkg/no-pie-compile.specs' [-Wunused-command-line-argument]
Expand Down Expand Up @@ -448,6 +490,16 @@ if (TEST_LLVM_COVERAGE AND CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG")
set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fprofile-instr-generate -fcoverage-mapping -DTIFLASH_LLVM_COVERAGE=1")
endif ()

# `ENABLE_LLVM_PROFILE_INSTR` will make executable binary generate profile data automatically. Make it only work at modules dbms and libs.
if (ENABLE_LLVM_PROFILE_INSTR)
if (ENABLE_LLVM_PGO)
message (FATAL_ERROR "`ENABLE_LLVM_PROFILE_INSTR` can not be used with `ENABLE_LLVM_PGO`")
endif ()
message (STATUS "Using flag `-fprofile-instr-generate`. Generate instrumented code to collect execution counts into default.profraw file(overridden by '=' form of option or `LLVM_PROFILE_FILE` env var). Follow https://clang.llvm.org/docs/UsersManual.html#profile-guided-optimization.")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fprofile-instr-generate")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fprofile-instr-generate")
endif ()

if (ARCH_AMD64)
include(CheckCXXCompilerFlag)
check_cxx_compiler_flag("-mvpclmulqdq -Werror -Wall -Wextra" TIFLASH_COMPILER_VPCLMULQDQ_SUPPORT)
Expand Down
17 changes: 17 additions & 0 deletions dbms/src/Common/TiFlashBuildInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,23 @@ std::string getEnabledFeatures()
#if ENABLE_THINLTO
"thinlto",
#endif

// Profile instrumentation
#if ENABLE_LLVM_PROFILE_INSTR
"profile-instr",
#endif

// PGO
#if ENABLE_LLVM_PGO_USE_SAMPLE
"pgo-sample",
#elif ENABLE_LLVM_PGO
"pgo-instr",
#endif

// FDO
#if USE_LLVM_FDO
"fdo",
#endif
};
return fmt::format("{}", fmt::join(features.begin(), features.end(), " "));
}
Expand Down
3 changes: 0 additions & 3 deletions format-diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,6 @@ def main():
else:
print("Format check passed")
else:
cmd = 'clang-format -i {}'.format(' '.join(files_to_format))
if subprocess.Popen(cmd, shell=True, cwd=tics_repo_path).wait():
exit(-1)
print("Finish code format")
else:
print('No file to format')
Expand Down
4 changes: 4 additions & 0 deletions libs/libcommon/include/common/config_common.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,7 @@
#cmakedefine01 USE_UNWIND
#cmakedefine01 USE_LLVM_LIBUNWIND
#cmakedefine01 ENABLE_THINLTO
#cmakedefine01 ENABLE_LLVM_PGO
#cmakedefine01 ENABLE_LLVM_PROFILE_INSTR
#cmakedefine01 ENABLE_LLVM_PGO_USE_SAMPLE
#cmakedefine01 USE_LLVM_FDO
1 change: 1 addition & 0 deletions release-centos7-llvm/env/prepare-sysroot.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ function install_llvm() {
mkdir -p llvm-project/build
cd llvm-project/build

# TODO: enable `bolt` for >= 14.0.0. https://github.com/llvm/llvm-project/tree/main/bolt
cmake -DCMAKE_BUILD_TYPE=Release \
-GNinja \
-DLLVM_ENABLE_PROJECTS="clang;lld;polly;clang-tools-extra" \
Expand Down
211 changes: 211 additions & 0 deletions release-centos7-llvm/scripts/perf-tpch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,211 @@
#!/usr/bin/python3
# Copyright 2022 PingCAP, Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import argparse
import signal
import sys
import time
import logging
import types
import subprocess

logger = None


def get_tz_offset():
import datetime
now_stamp = time.time()
local_time = datetime.datetime.fromtimestamp(now_stamp)
utc_time = datetime.datetime.utcfromtimestamp(now_stamp)
offset = local_time - utc_time
total_seconds = offset.total_seconds()
flag = '+'
if total_seconds < 0:
flag = '-'
total_seconds = -total_seconds
mm, ss = divmod(total_seconds, 60)
hh, mm = divmod(mm, 60)
tz_offset = "%s%02d:%02d" % (flag, hh, mm)
return tz_offset


def init_logger():
global logger

tz_offset = get_tz_offset()

orig_record_factory = logging.getLogRecordFactory()
log_colors = {
logging.DEBUG: "\033[1;34m", # blue
logging.INFO: "\033[1;32m", # green
logging.WARNING: "\033[1;35m", # magenta
logging.ERROR: "\033[1;31m", # red
logging.CRITICAL: "\033[1;41m", # red reverted
}

def get_message(ori):
msg = str(ori.msg)
if ori.args:
msg = msg % ori.args
msg = "{}{}{}".format(log_colors[ori.levelno], msg, "\033[0m")
return msg

def record_factory(*args, **kwargs):
record = orig_record_factory(*args, **kwargs)
record.getMessage = types.MethodType(get_message, record)
return record

logging.setLogRecordFactory(record_factory)

root = logging.getLogger()
root.setLevel(logging.DEBUG)
handler = logging.StreamHandler(sys.stdout)
handler.setLevel(logging.DEBUG)
handler.setFormatter(
fmt=logging.Formatter('[%(asctime)s.%(msecs)03d {}][%(levelname)s][%(message)s]'.format(tz_offset),
datefmt='%Y/%m/%d %H:%M:%S'))
root.addHandler(handler)
logger = root


init_logger()


def wrap_run_time(func):
def wrap_func(*args, **kwargs):
bg = time.time()
r = func(*args, **kwargs)
logger.debug('Time cost {:.3f}s'.format(time.time() - bg))
return r

return wrap_func


@wrap_run_time
def run_cmd(cmd):
logger.debug("RUN CMD:\n{}\n".format(' '.join(cmd)))
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
stdout, stderr = proc.communicate()
# stderr.decode('utf-8')
return stdout, stderr, proc.returncode


class Runner:
def __init__(self):
usage = """
1. compile TiFlash with cmake option `-DENABLE_LLVM_PGO=ON -DENABLE_LLVM_PGO_USE_SAMPLE=ON`
2. compile https://github.com/google/autofdo and get binary `create_llvm_prof` for converting perf data to llvm profile data
3. start TiFlash process and get `<pid>`
4. prepare workload scripts file
5. run `python3 perf-tpch.py --perf --pid <pid> --workload <workload-scripts-path> --convert-llvm --convert-tool <create_llvm_prof-path> --binary <tiflash-bianry-path>`
6. get llvm perf file(`tiflash.llvm.code.prof` by default)
7. compile TiFlash with env `TIFLASH_LLVM_PROFDATA=<output-llvm-prof>` and cmake option `-DENABLE_LLVM_PGO=ON -DENABLE_LLVM_PGO_USE_SAMPLE=ON`
8. re-run workload and compare result
"""
parser = argparse.ArgumentParser(
description="Auto FDO tools", formatter_class=argparse.ArgumentDefaultsHelpFormatter,
usage=usage)
parser.add_argument(
'--perf', help='run perf with workload', action='store_true')
parser.add_argument(
'--convert-llvm', help='convert linux perf data to llvm profile data', action='store_true')

parser.add_argument(
'--workload', help='absolute path of workload script', required=False)
parser.add_argument(
'--pid', help='pid of TiFlash process', required=False)
parser.add_argument(
'--output', help='output file of perf data', required=False)
parser.add_argument(
'--convert-tool', help='tool to conver linux perf data to llvm profile data',)
parser.add_argument(
'--input-perf-file', help='input linux perf data file path')
parser.add_argument(
'--binary', help='binary to run workload')
parser.add_argument(
'--output-llvm-prof', help='output llvm profile data path', default='tiflash.llvm.code.prof')
self.args = parser.parse_args()
self.linux_perf_data = None

def run(self):
if self.args.perf:
self.run_perf()
if self.args.convert_llvm:
self.convert_llvm_perf()

def convert_llvm_perf(self):
assert self.args.convert_tool
if self.linux_perf_data is None:
assert self.args.input_perf_file
else:
self.args.input_perf_file = self.linux_perf_data

self.args.output_llvm_prof = 'tiflash.llvm.code.prof'

assert self.args.binary
logger.info('start to convert linux perf data `{}` to llvm profile data `{}`'.format(
self.args.input_perf_file, self.args.output_llvm_prof))
stdout, stderr, e = run_cmd([self.args.convert_tool, '--profile', '{}'.format(self.args.input_perf_file),
'--binary', "{}".format(self.args.binary),
'--out', '{}'.format(self.args.output_llvm_prof)])
logger.info(
'finish convert. stdout `{}`, stderr `{}`'.format(stdout.decode('utf-8'), stderr.decode('utf-8')))
assert e == 0

def run_perf(self):
assert self.args.pid
assert self.args.workload

pid = self.args.pid
output = 'tiflash.perf.data' if self.args.output is None else self.args.output
logger.info('using output file `{}`'.format(output))

def workload():
# git clone [email protected]:pingcap/go-tpc.git
# cd go-tpc
# make build
# bin/go-tpc tpch run --queries q1 --host {} -P {} --db {} --count 1
logger.info('start to run workload `{}`'.format(
self.args.workload))
stdout, stderr, err = run_cmd([self.args.workload])
logger.info('finish workload `{}`. stdout `{}`, stderr `{}`'.format(
self.args.workload, stdout.decode('utf-8'), stderr.decode('utf-8')))
assert err == 0
perf_cmd = ["perf", "record", "-p", "{}".format(
pid), "-e", "cycles:up", "-j", "any,u", "-a", "-o", "{}".format(output)]
logger.info("start perf with cmd `{}`".format(' '.join(perf_cmd)))
perf_proc = subprocess.Popen(
perf_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
#
workload()
#
perf_proc.send_signal(signal.SIGTERM)
stdout, stderr = perf_proc.communicate()
logger.info(
"stop perf. stdout `{}`, stderr `{}`".format(stdout.decode('utf-8'), stderr.decode('utf-8')))
_ = perf_proc.wait()
# check file exits
with open(output, 'r') as f:
f.close()
self.linux_perf_data = output


def main():
Runner().run()


if __name__ == '__main__':
main()

0 comments on commit b35b586

Please sign in to comment.