From d19ccb1cde100ab4a5c8e6db9a0d69560cabbd04 Mon Sep 17 00:00:00 2001 From: Waldemar Kozaczuk Date: Fri, 11 Mar 2022 11:42:13 -0500 Subject: [PATCH] build: support app version script This patch introduces another new build mechanism that allows creating custom kernel exporting only symbols required by specific application. Such kernel benefits from smaller size and better security as all unneeded code is removed. This patch addresses remaining part of the modularization/librarization functionality as explained by the issue #1110 and this part of the roadmap - https://github.com/cloudius-systems/osv/wiki/Roadmap#modularizationlibrarization. This idea was also mentioned in the P99 OSv presentation - see slide 12. In essence, this patch adds two new scripts that analyse the build manifest, detect ELF files and identify symbols required from OSv kernel and finally produce an application specific version script under build/last/app_version_script: - scripts/list_manifest_files.py - reads build/last/usr.manifest and produces a list of file paths on host filesystem - scripts/generate_app_version_script.sh - iterates over manifest files produced by list_manifest_files.py, identifies undefined symbols in the ELF files using objdump that are also exported by OSv kernel and finally generates build/last/app_version_script This patch also makes some modest changes to the main makefile to support new parameter - conf_version_script - intended to point to a custom version script. Please note that this new functionality only works when building kernel with most symbols hidden (conf_hide_symbols=1). To take advantage of this new feature one would follow these steps: 1. Build image for given application. 2. Run scripts/generate_app_version_script.sh to produce app_version_script. 3. Re-build the image with kernel exporting only symbols needed by an app like so: ./scripts/build fs=rofs conf_hide_symbols=1 image=golang-pie-example \ conf_version_script=build/last/app_version_script The version script generated for the golang ELF list only 30 symbols. My experiments show that for many apps this can reduce kernel size by close to 0.5MB. For example the size of kernel taylored to the golang app above is 3196K vs 3632K of the generic ones. Obviously this feature can be used together with the driver profile to further reduce kernel size. The kernel produced with the build command below is only 2688K in size: ./scripts/build fs=rofs conf_hide_symbols=1 image=golang-pie-example \ drivers_profile=virtio-mmio conf_version_script=build/last/app_version_script Please note that some application use dlsym() to dynamically resolve symbols which would be missed by this technique. In such scenarios such symbols would have to be manually added to app_version_script. Fixes #1110 Signed-off-by: Waldemar Kozaczuk --- Makefile | 31 +++++++--- scripts/generate_app_version_script.sh | 84 ++++++++++++++++++++++++++ scripts/generate_version_script.sh | 3 + scripts/list_manifest_files.py | 50 +++++++++++++++ 4 files changed, 160 insertions(+), 8 deletions(-) create mode 100755 scripts/generate_app_version_script.sh create mode 100755 scripts/list_manifest_files.py diff --git a/Makefile b/Makefile index c1c0eb844f..8288501694 100644 --- a/Makefile +++ b/Makefile @@ -2036,7 +2036,7 @@ $(out)/dummy-shlib.so: $(out)/dummy-shlib.o $(call quiet, $(CXX) -nodefaultlibs -shared $(gcc-sysroot) -o $@ $^, LINK $@) stage1_targets = $(out)/arch/$(arch)/boot.o $(out)/loader.o $(out)/runtime.o $(drivers:%=$(out)/%) $(objects:%=$(out)/%) $(out)/dummy-shlib.so -stage1: $(stage1_targets) links $(out)/version_script +stage1: $(stage1_targets) links $(out)/default_version_script .PHONY: stage1 loader_options_dep = $(out)/arch/$(arch)/loader_options.ld @@ -2047,20 +2047,35 @@ $(loader_options_dep): stage1 fi ifeq ($(conf_hide_symbols),1) +version_script_file:=$(out)/version_script +#Detect which version script to be used and copy to $(out)/version_script +#so that loader.elf/kernel.elf is rebuilt accordingly if version script has changed +ifdef conf_version_script +ifeq (,$(wildcard $(conf_version_script))) + $(error Missing version script: $(conf_version_script)) +endif +ifneq ($(shell cmp $(out)/version_script $(conf_version_script)),) +$(shell cp $(conf_version_script) $(out)/version_script) +endif +else +ifneq ($(shell cmp $(out)/version_script $(out)/default_version_script),) +$(shell cp $(out)/default_version_script $(out)/version_script) +endif +endif linker_archives_options = --no-whole-archive $(libstdc++.a) $(libgcc.a) $(libgcc_eh.a) $(boost-libs) \ - --exclude-libs libstdc++.a --gc-sections --version-script=$(out)/version_script + --exclude-libs libstdc++.a --gc-sections else linker_archives_options = --whole-archive $(libstdc++.a) $(libgcc_eh.a) $(boost-libs) --no-whole-archive $(libgcc.a) endif -$(out)/version_script: exported_symbols/*.symbols exported_symbols/$(arch)/*.symbols - $(call quiet, scripts/generate_version_script.sh $(out)/version_script, GEN version_script) +$(out)/default_version_script: exported_symbols/*.symbols exported_symbols/$(arch)/*.symbols + $(call quiet, scripts/generate_version_script.sh $(out)/default_version_script, GEN default_version_script) -$(out)/loader.elf: $(stage1_targets) arch/$(arch)/loader.ld $(out)/bootfs.o $(loader_options_dep) +$(out)/loader.elf: $(stage1_targets) arch/$(arch)/loader.ld $(out)/bootfs.o $(loader_options_dep) $(version_script_file) $(call quiet, $(LD) -o $@ --defsym=OSV_KERNEL_BASE=$(kernel_base) \ --defsym=OSV_KERNEL_VM_BASE=$(kernel_vm_base) --defsym=OSV_KERNEL_VM_SHIFT=$(kernel_vm_shift) \ -Bdynamic --export-dynamic --eh-frame-hdr --enable-new-dtags -L$(out)/arch/$(arch) \ - $(^:%.ld=-T %.ld) \ + $(patsubst %version_script,--version-script=%version_script,$(patsubst %.ld,-T %.ld,$^)) \ $(linker_archives_options) $(conf_linker_extra_options), \ LINK loader.elf) @# Build libosv.so matching this loader.elf. This is not a separate @@ -2069,11 +2084,11 @@ $(out)/loader.elf: $(stage1_targets) arch/$(arch)/loader.ld $(out)/bootfs.o $(lo @scripts/libosv.py $(out)/osv.syms $(out)/libosv.ld `scripts/osv-version.sh` | $(CC) -c -o $(out)/osv.o -x assembler - $(call quiet, $(CC) $(out)/osv.o -nostdlib -shared -o $(out)/libosv.so -T $(out)/libosv.ld, LIBOSV.SO) -$(out)/kernel.elf: $(stage1_targets) arch/$(arch)/loader.ld $(out)/empty_bootfs.o $(loader_options_dep) +$(out)/kernel.elf: $(stage1_targets) arch/$(arch)/loader.ld $(out)/empty_bootfs.o $(loader_options_dep) $(version_script_file) $(call quiet, $(LD) -o $@ --defsym=OSV_KERNEL_BASE=$(kernel_base) \ --defsym=OSV_KERNEL_VM_BASE=$(kernel_vm_base) --defsym=OSV_KERNEL_VM_SHIFT=$(kernel_vm_shift) \ -Bdynamic --export-dynamic --eh-frame-hdr --enable-new-dtags -L$(out)/arch/$(arch) \ - $(^:%.ld=-T %.ld) \ + $(patsubst %version_script,--version-script=%version_script,$(patsubst %.ld,-T %.ld,$^)) \ $(linker_archives_options) $(conf_linker_extra_options), \ LINK kernel.elf) $(call quiet, $(STRIP) $(out)/kernel.elf -o $(out)/kernel-stripped.elf, STRIP kernel.elf -> kernel-stripped.elf ) diff --git a/scripts/generate_app_version_script.sh b/scripts/generate_app_version_script.sh new file mode 100755 index 0000000000..b1cffe84ea --- /dev/null +++ b/scripts/generate_app_version_script.sh @@ -0,0 +1,84 @@ +#!/bin/bash + +if [[ "$1" == "--help" || "$1" == "-h" ]]; then + cat <<-EOF +Produce version script file under build/last/app_version_script intended +to build custom kernel exporting only symbols listed in this file. + +The script reads default user manifest file - build/last/usr.manifest +to identify all ELF files - executables and shared libraries - and +extract names of all symbols required to be exported by OSv kernel. + +You can override location of the source manifest and pass its path +as 1st argument. + +Usage: ${0} [] + +NOTE: Given that some executables and libraries may dynamically resolve +symbols using dlsym(), this script would miss to identify those. In this +case one would have to manually add those symbols to build/last/app_version_script. +EOF + exit 0 +fi + +MACHINE=$(uname -m) +if [ "${MACHINE}" == "x86_64" ]; then + ARCH="x64" +else + ARCH="aarch64" +fi + +VERSION_SCRIPT_START=$(cat <<"EOF" +{ + global: +EOF +) + +VERSION_SCRIPT_END=$(cat <<"EOF" + local: + *; +}; +EOF +) + +BUILD_DIR=$(dirname $0)/../build/last +VERSION_SCRIPT_FILE=$(dirname $0)/../build/last/app_version_script + +ALL_SYMBOLS_FILE=$BUILD_DIR/all.symbols +if [[ ! -f $ALL_SYMBOLS_FILE ]]; then + echo "Could not find $ALL_SYMBOLS_FILE. Please run build first!" + exit 1 +fi + +USR_MANIFEST=$1 +if [[ "$USR_MANIFEST" == "" ]]; then + USR_MANIFEST=$BUILD_DIR/usr.manifest +fi +if [[ ! -f $USR_MANIFEST ]]; then + echo "Could not find $USR_MANIFEST. Please run build first!" + exit 1 +fi + +MANIFEST_FILES=$BUILD_DIR/usr.manifest.files +echo "Extracting list of files on host from $USR_MANIFEST" +scripts/list_manifest_files.py > $MANIFEST_FILES + +extract_symbols_from_elf() +{ + local ELF_PATH=$1 + echo "/*------- $ELF_PATH */" + objdump -wT ${ELF_PATH} | grep UND | cut -c 62- | \ + sort -d | uniq | comm - ${ALL_SYMBOLS_FILE} -12 | \ + awk '// { printf(" %s;\n", $0) }' | tee /tmp/generate_app_version_script_symbols + if [[ $(grep dlsym /tmp/generate_app_version_script_symbols) != "" ]]; then + echo "WARNING: the $ELF_PATH may use dlsym() to dynamically reference symbols!" 1>&2 + fi +} + +echo "Writing to $VERSION_SCRIPT_FILE ..." +echo "$VERSION_SCRIPT_START" > $VERSION_SCRIPT_FILE + +cat $MANIFEST_FILES | xargs file | grep "ELF 64-bit" | cut --delimiter=: -f 1 | \ +while read file; do extract_symbols_from_elf "$file"; done >> $VERSION_SCRIPT_FILE + +echo "$VERSION_SCRIPT_END" >> $VERSION_SCRIPT_FILE diff --git a/scripts/generate_version_script.sh b/scripts/generate_version_script.sh index 8072afa8ad..7653f6e4b9 100755 --- a/scripts/generate_version_script.sh +++ b/scripts/generate_version_script.sh @@ -22,6 +22,9 @@ VERSION_SCRIPT_END=$(cat <<"EOF" EOF ) +ALL_SYMBOLS_FILE=$(dirname $VERSION_SCRIPT_FILE)/all.symbols +cat exported_symbols/*.symbols exported_symbols/$ARCH/*.symbols | sort -d | uniq > $ALL_SYMBOLS_FILE + echo "$VERSION_SCRIPT_START" > $VERSION_SCRIPT_FILE #Firstly output list of symbols from files common to all architectures diff --git a/scripts/list_manifest_files.py b/scripts/list_manifest_files.py new file mode 100755 index 0000000000..683bdf1e5f --- /dev/null +++ b/scripts/list_manifest_files.py @@ -0,0 +1,50 @@ +#!/usr/bin/python3 + +import optparse, os, sys, subprocess +from manifest_common import add_var, expand, unsymlink, read_manifest, defines + +def list_files(manifest,manifest_dir): + manifest = [(x, y % defines) for (x, y) in manifest] + files = list(expand(manifest)) + files = [(x, unsymlink(y)) for (x, y) in files] + + for name, hostname in files: + if not hostname.startswith("->"): + if os.path.islink(hostname): + link = os.readlink(hostname) + print(link) + elif not os.path.isdir(hostname): + if not os.path.isabs(hostname): + hostname = os.path.join(manifest_dir,hostname) + print(hostname) + +def main(): + make_option = optparse.make_option + + opt = optparse.OptionParser(option_list=[ + make_option('-m', + dest='manifest', + help='read manifest from FILE', + metavar='FILE'), + make_option('-D', + type='string', + help='define VAR=DATA', + metavar='VAR=DATA', + action='callback', + callback=add_var) + ]) + + (options, args) = opt.parse_args() + + if not 'libgcc_s_dir' in defines: + libgcc_s_path = subprocess.check_output(['gcc', '-print-file-name=libgcc_s.so.1']).decode('utf-8') + defines['libgcc_s_dir'] = os.path.dirname(libgcc_s_path) + + manifest_path = options.manifest or 'build/last/usr.manifest' + manifest_dir = os.path.abspath(os.path.dirname(manifest_path)) + + manifest = read_manifest(manifest_path) + list_files(manifest,manifest_dir) + +if __name__ == "__main__": + main()