From 18f9ceeb3df0db65977a4089ae158ef8ed15359f Mon Sep 17 00:00:00 2001 From: Rui Ueyama Date: Wed, 16 Jan 2019 23:46:28 +0000 Subject: [PATCH 001/125] Merging r351396: ------------------------------------------------------------------------ r351396 | lekensteyn | 2019-01-16 15:28:51 -0800 (Wed, 16 Jan 2019) | 17 lines [ELF][X86_64] Fix corrupted LD -> LE optimization for TLS without PLT The LD -> LE optimization for Thread-Local Storage without PLT requires an additional "66" prefix, otherwise the next instruction will be corrupted, causing runtime misbehavior (crashes) of the linked object. The other (GD -> IE/LD) optimizations are the same with or without PLT, but add tests for completeness. The instructions are copied from https://raw.githubusercontent.com/wiki/hjl-tools/x86-psABI/x86-64-psABI-1.0.pdf#subsection.11.1.2 This does not try to address ILP32 (x32) support. Fixes https://bugs.llvm.org/show_bug.cgi?id=37303 Reviewed By: ruiu Differential Revision: https://reviews.llvm.org/D56779 ------------------------------------------------------------------------ llvm-svn: 351401 --- lld/ELF/Arch/X86_64.cpp | 41 ++++++++++---- lld/test/ELF/tls-opt-x86_64-noplt.s | 88 +++++++++++++++++++++++++++++ 2 files changed, 119 insertions(+), 10 deletions(-) create mode 100644 lld/test/ELF/tls-opt-x86_64-noplt.s diff --git a/lld/ELF/Arch/X86_64.cpp b/lld/ELF/Arch/X86_64.cpp index 06314155dcc92f..a000eeb079d9ef 100644 --- a/lld/ELF/Arch/X86_64.cpp +++ b/lld/ELF/Arch/X86_64.cpp @@ -264,15 +264,6 @@ void X86_64::relaxTlsIeToLe(uint8_t *Loc, RelType Type, template void X86_64::relaxTlsLdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const { - // Convert - // leaq bar@tlsld(%rip), %rdi - // callq __tls_get_addr@PLT - // leaq bar@dtpoff(%rax), %rcx - // to - // .word 0x6666 - // .byte 0x66 - // mov %fs:0,%rax - // leaq bar@tpoff(%rax), %rcx if (Type == R_X86_64_DTPOFF64) { write64le(Loc, Val); return; @@ -287,7 +278,37 @@ void X86_64::relaxTlsLdToLe(uint8_t *Loc, RelType Type, 0x66, // .byte 0x66 0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00, // mov %fs:0,%rax }; - memcpy(Loc - 3, Inst, sizeof(Inst)); + + if (Loc[4] == 0xe8) { + // Convert + // leaq bar@tlsld(%rip), %rdi # 48 8d 3d + // callq __tls_get_addr@PLT # e8 + // leaq bar@dtpoff(%rax), %rcx + // to + // .word 0x6666 + // .byte 0x66 + // mov %fs:0,%rax + // leaq bar@tpoff(%rax), %rcx + memcpy(Loc - 3, Inst, sizeof(Inst)); + return; + } + + if (Loc[4] == 0xff && Loc[5] == 0x15) { + // Convert + // leaq x@tlsld(%rip),%rdi # 48 8d 3d + // call *__tls_get_addr@GOTPCREL(%rip) # ff 15 + // to + // .long 0x66666666 + // movq %fs:0,%rax + // See "Table 11.9: LD -> LE Code Transition (LP64)" in + // https://raw.githubusercontent.com/wiki/hjl-tools/x86-psABI/x86-64-psABI-1.0.pdf + Loc[-3] = 0x66; + memcpy(Loc - 2, Inst, sizeof(Inst)); + return; + } + + error(getErrorLocation(Loc - 3) + + "expected R_X86_64_PLT32 or R_X86_64_GOTPCRELX after R_X86_64_TLSLD"); } template diff --git a/lld/test/ELF/tls-opt-x86_64-noplt.s b/lld/test/ELF/tls-opt-x86_64-noplt.s new file mode 100644 index 00000000000000..69ec49871210e2 --- /dev/null +++ b/lld/test/ELF/tls-opt-x86_64-noplt.s @@ -0,0 +1,88 @@ +// REQUIRES: x86 + +// Checks whether the TLS optimizations match the cases in Chapter 11 of +// https://raw.githubusercontent.com/wiki/hjl-tools/x86-psABI/x86-64-psABI-1.0.pdf + +// RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t.o +// RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %p/Inputs/tls-opt-gdie.s -o %tso.o +// RUN: ld.lld -shared %tso.o -o %t.so +// RUN: ld.lld %t.o %t.so -o %t1 +// RUN: llvm-readobj -r %t1 | FileCheck --check-prefix=RELOC %s +// RUN: llvm-objdump -d %t1 | FileCheck --check-prefix=DISASM %s + +// RELOC: Relocations [ +// RELOC-NEXT: Section {{.*}} .rela.dyn { +// RELOC-NEXT: 0x2020C0 R_X86_64_TPOFF64 tlsshared0 0x0 +// RELOC-NEXT: 0x2020C8 R_X86_64_TPOFF64 tlsshared1 0x0 +// RELOC-NEXT: } +// RELOC-NEXT: ] + +// DISASM: _start: + +// Table 11.5: GD -> IE Code Transition (LP64) +// DISASM-NEXT: 201000: 64 48 8b 04 25 00 00 00 00 movq %fs:0, %rax +// DISASM-NEXT: 201009: 48 03 05 b0 10 00 00 addq 4272(%rip), %rax +// DISASM-NEXT: 201010: 64 48 8b 04 25 00 00 00 00 movq %fs:0, %rax +// DISASM-NEXT: 201019: 48 03 05 a8 10 00 00 addq 4264(%rip), %rax + +// Table 11.7: GD -> LE Code Transition (LP64) +// DISASM-NEXT: 201020: 64 48 8b 04 25 00 00 00 00 movq %fs:0, %rax +// DISASM-NEXT: 201029: 48 8d 80 f8 ff ff ff leaq -8(%rax), %rax +// DISASM-NEXT: 201030: 64 48 8b 04 25 00 00 00 00 movq %fs:0, %rax +// DISASM-NEXT: 201039: 48 8d 80 fc ff ff ff leaq -4(%rax), %rax + + +// Table 11.9: LD -> LE Code Transition (LP64) +// DISASM-NEXT: 201040: 66 66 66 66 64 48 8b 04 25 00 00 00 00 movq %fs:0, %rax +// DISASM-NEXT: 20104d: 66 66 66 66 64 48 8b 04 25 00 00 00 00 movq %fs:0, %rax + +.type tls0,@object +.section .tbss,"awT",@nobits +.globl tls0 +.align 4 +tls0: + .long 0 + .size tls0, 4 + +.type tls1,@object +.globl tls1 +.align 4 +tls1: + .long 0 + .size tls1, 4 + +.section .text +.globl _start +_start: + // Table 11.5: GD -> IE Code Transition (LP64) + .byte 0x66 + leaq tlsshared0@tlsgd(%rip),%rdi + .byte 0x66 + rex64 + call *__tls_get_addr@GOTPCREL(%rip) + + .byte 0x66 + leaq tlsshared1@tlsgd(%rip),%rdi + .byte 0x66 + rex64 + call *__tls_get_addr@GOTPCREL(%rip) + + // Table 11.7: GD -> LE Code Transition (LP64) + .byte 0x66 + leaq tls0@tlsgd(%rip),%rdi + .byte 0x66 + rex64 + call *__tls_get_addr@GOTPCREL(%rip) + + .byte 0x66 + leaq tls1@tlsgd(%rip),%rdi + .byte 0x66 + rex64 + call *__tls_get_addr@GOTPCREL(%rip) + + // Table 11.9: LD -> LE Code Transition (LP64) + leaq tls0@tlsld(%rip),%rdi + call *__tls_get_addr@GOTPCREL(%rip) + + leaq tls1@tlsld(%rip),%rdi + call *__tls_get_addr@GOTPCREL(%rip) From cfa62417693e9f3d56e4309b9d27f405668618a1 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Thu, 17 Jan 2019 09:18:40 +0000 Subject: [PATCH 002/125] Merging r351360: ------------------------------------------------------------------------ r351360 | jmorse | 2019-01-16 18:41:29 +0100 (Wed, 16 Jan 2019) | 6 lines Add a REQUIRES: darwin line for a mac test. This test, apparently for macs, fails on Windows as lit can't emulate the shell subprocess $(which...) correctly. Some other netbsd and linux buildbots also fail here. Limit to macs as a temporary workaround. ------------------------------------------------------------------------ llvm-svn: 351419 --- .../Tooling/clang-check-mac-libcxx-fixed-compilation-db.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/Tooling/clang-check-mac-libcxx-fixed-compilation-db.cpp b/clang/test/Tooling/clang-check-mac-libcxx-fixed-compilation-db.cpp index fd0003e9b3828e..8c59a2f07f6984 100644 --- a/clang/test/Tooling/clang-check-mac-libcxx-fixed-compilation-db.cpp +++ b/clang/test/Tooling/clang-check-mac-libcxx-fixed-compilation-db.cpp @@ -11,6 +11,6 @@ // RUN: cp $(which clang-check) %t/mock-libcxx/bin/ // RUN: cp "%s" "%t/test.cpp" // RUN: %t/mock-libcxx/bin/clang-check -p "%t" "%t/test.cpp" -- -stdlib=libc++ -target x86_64-apple-darwin - +// REQUIRES: system-darwin #include vector v; From bcabeb421d61b479cf618270b238fab3c1c73769 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Thu, 17 Jan 2019 09:24:42 +0000 Subject: [PATCH 003/125] Merging r351340: ------------------------------------------------------------------------ r351340 | asl | 2019-01-16 14:28:30 +0100 (Wed, 16 Jan 2019) | 7 lines [MSP430] Fix msp430-toolchain.c on Windows (added in r351228) Patch by Kristina Bessonova! Differential Revision: https://reviews.llvm.org/D56776 ------------------------------------------------------------------------ llvm-svn: 351420 --- clang/test/Driver/msp430-toolchain.c | 32 ++++++++++++++-------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/clang/test/Driver/msp430-toolchain.c b/clang/test/Driver/msp430-toolchain.c index ae5ed9189c8281..62ef1c0c1f150f 100644 --- a/clang/test/Driver/msp430-toolchain.c +++ b/clang/test/Driver/msp430-toolchain.c @@ -8,44 +8,44 @@ // RUN: --gcc-toolchain=%S/Inputs/basic_msp430_tree 2>&1 \ // RUN: | FileCheck -check-prefix=MSP430 %s -// MSP430: "{{.*}}Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../../../bin/msp430-elf-ld" +// MSP430: "{{.*}}Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../..{{/|\\\\}}..{{/|\\\\}}bin{{/|\\\\}}msp430-elf-ld" // MSP430: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/430" -// MSP430: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../../../msp430-elf/lib/430" -// MSP430: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../../../msp430-elf/lib/430/crt0.o" -// MSP430: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/430/crtbegin.o" +// MSP430: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../..{{/|\\\\}}..{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430" +// MSP430: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../..{{/|\\\\}}..{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430{{/|\\\\}}crt0.o" +// MSP430: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/430{{/|\\\\}}crtbegin.o" // MSP430: "--start-group" "-lmul_none" "-lgcc" "-lc" "-lcrt" "-lnosys" "--end-group" -// MSP430: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/430/crtend.o" -// MSP430: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../../../msp430-elf/lib/430/crtn.o" +// MSP430: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/430{{/|\\\\}}crtend.o" +// MSP430: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../..{{/|\\\\}}..{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430{{/|\\\\}}crtn.o" // RUN: %clang %s -### -no-canonical-prefixes -target msp430 -nodefaultlibs \ // RUN: --gcc-toolchain=%S/Inputs/basic_msp430_tree 2>&1 \ // RUN: | FileCheck -check-prefix=MSP430-NO-DFT-LIB %s -// MSP430-NO-DFT-LIB: "{{.*}}Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../../../bin/msp430-elf-ld" +// MSP430-NO-DFT-LIB: "{{.*}}Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../..{{/|\\\\}}..{{/|\\\\}}bin{{/|\\\\}}msp430-elf-ld" // MSP430-NO-DFT-LIB: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/430" -// MSP430-NO-DFT-LIB: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../../../msp430-elf/lib/430" -// MSP430-NO-DFT-LIB: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../../../msp430-elf/lib/430/crt0.o" -// MSP430-NO-DFT-LIB: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/430/crtbegin.o" +// MSP430-NO-DFT-LIB: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../..{{/|\\\\}}..{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430" +// MSP430-NO-DFT-LIB: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../..{{/|\\\\}}..{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430{{/|\\\\}}crt0.o" +// MSP430-NO-DFT-LIB: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/430{{/|\\\\}}crtbegin.o" // MSP430-NO-DFT-LIB: "--start-group" "-lmul_none" "-lgcc" "--end-group" -// MSP430-NO-DFT-LIB: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/430/crtend.o" -// MSP430-NO-DFT-LIB: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../../../msp430-elf/lib/430/crtn.o" +// MSP430-NO-DFT-LIB: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/430{{/|\\\\}}crtend.o" +// MSP430-NO-DFT-LIB: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../..{{/|\\\\}}..{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430{{/|\\\\}}crtn.o" // RUN: %clang %s -### -no-canonical-prefixes -target msp430 -nostartfiles \ // RUN: --gcc-toolchain=%S/Inputs/basic_msp430_tree 2>&1 \ // RUN: | FileCheck -check-prefix=MSP430-NO-START %s -// MSP430-NO-START: "{{.*}}Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../../../bin/msp430-elf-ld" +// MSP430-NO-START: "{{.*}}Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../..{{/|\\\\}}..{{/|\\\\}}bin{{/|\\\\}}msp430-elf-ld" // MSP430-NO-START: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/430" -// MSP430-NO-START: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../../../msp430-elf/lib/430" +// MSP430-NO-START: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../..{{/|\\\\}}..{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430" // MSP430-NO-START: "--start-group" "-lmul_none" "-lgcc" "-lc" "-lcrt" "-lnosys" "--end-group" // RUN: %clang %s -### -no-canonical-prefixes -target msp430 -nostdlib \ // RUN: --gcc-toolchain=%S/Inputs/basic_msp430_tree 2>&1 \ // RUN: | FileCheck -check-prefix=MSP430-NO-STD-LIB %s -// MSP430-NO-STD-LIB: "{{.*}}Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../../../bin/msp430-elf-ld" +// MSP430-NO-STD-LIB: "{{.*}}Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../..{{/|\\\\}}..{{/|\\\\}}bin{{/|\\\\}}msp430-elf-ld" // MSP430-NO-STD-LIB: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/430" -// MSP430-NO-STD-LIB: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../../../msp430-elf/lib/430" +// MSP430-NO-STD-LIB: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../..{{/|\\\\}}..{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430" // MSP430-NO-STD-LIB: "--start-group" "-lmul_none" "-lgcc" "--end-group" // RUN: %clang %s -### -no-canonical-prefixes -target msp430 -mmcu=msp430f147 2>&1 \ From 66303bc3a295d19d9f594b92e43146edcd1ab1ab Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Thu, 17 Jan 2019 11:31:57 +0000 Subject: [PATCH 004/125] Merging r351431: ------------------------------------------------------------------------ r351431 | hans | 2019-01-17 12:31:03 +0100 (Thu, 17 Jan 2019) | 19 lines Revert r351311 "[OMPT] Make sure that OMPT is enabled when accessing internals of the runtime" and also the follow-up r351315. The new test is failing on the buildbots. > Make sure that OMPT is enabled in runtime entry points that access internals > of the runtime. Else, return an appropiate value indicating an error or that > the data is not available. > > Patch provided by @sconvent > > Reviewers: jlpeyton, omalyshe, hbae, Hahnfeld, joachim.protze > > Reviewed By: joachim.protze > > Tags: #openmp, #ompt > > Differential Revision: https://reviews.llvm.org/D47717 ------------------------------------------------------------------------ llvm-svn: 351432 --- openmp/runtime/src/ompt-general.cpp | 19 +-- .../test/ompt/misc/api_calls_without_ompt.c | 148 ------------------ 2 files changed, 4 insertions(+), 163 deletions(-) delete mode 100644 openmp/runtime/test/ompt/misc/api_calls_without_ompt.c diff --git a/openmp/runtime/src/ompt-general.cpp b/openmp/runtime/src/ompt-general.cpp index cea00fff07accd..80a859196e68df 100644 --- a/openmp/runtime/src/ompt-general.cpp +++ b/openmp/runtime/src/ompt-general.cpp @@ -450,9 +450,6 @@ OMPT_API_ROUTINE ompt_set_result_t ompt_set_callback(ompt_callbacks_t which, OMPT_API_ROUTINE int ompt_get_callback(ompt_callbacks_t which, ompt_callback_t *callback) { - if (!ompt_enabled.enabled) - return ompt_get_callback_failure; - switch (which) { #define ompt_event_macro(event_name, callback_type, event_id) \ @@ -460,7 +457,7 @@ OMPT_API_ROUTINE int ompt_get_callback(ompt_callbacks_t which, if (ompt_event_implementation_status(event_name)) { \ ompt_callback_t mycb = \ (ompt_callback_t)ompt_callbacks.ompt_callback(event_name); \ - if (ompt_enabled.event_name && mycb) { \ + if (mycb) { \ *callback = mycb; \ return ompt_get_callback_success; \ } \ @@ -483,15 +480,11 @@ OMPT_API_ROUTINE int ompt_get_callback(ompt_callbacks_t which, OMPT_API_ROUTINE int ompt_get_parallel_info(int ancestor_level, ompt_data_t **parallel_data, int *team_size) { - if (!ompt_enabled.enabled) - return 0; return __ompt_get_parallel_info_internal(ancestor_level, parallel_data, team_size); } OMPT_API_ROUTINE int ompt_get_state(ompt_wait_id_t *wait_id) { - if (!ompt_enabled.enabled) - return ompt_state_work_serial; int thread_state = __ompt_get_state_internal(wait_id); if (thread_state == ompt_state_undefined) { @@ -506,8 +499,6 @@ OMPT_API_ROUTINE int ompt_get_state(ompt_wait_id_t *wait_id) { ****************************************************************************/ OMPT_API_ROUTINE ompt_data_t *ompt_get_thread_data(void) { - if (!ompt_enabled.enabled) - return NULL; return __ompt_get_thread_data_internal(); } @@ -516,8 +507,6 @@ OMPT_API_ROUTINE int ompt_get_task_info(int ancestor_level, int *type, ompt_frame_t **task_frame, ompt_data_t **parallel_data, int *thread_num) { - if (!ompt_enabled.enabled) - return 0; return __ompt_get_task_info_internal(ancestor_level, type, task_data, task_frame, parallel_data, thread_num); } @@ -592,7 +581,7 @@ OMPT_API_ROUTINE int ompt_get_place_num(void) { #if !KMP_AFFINITY_SUPPORTED return -1; #else - if (!ompt_enabled.enabled || __kmp_get_gtid() < 0) + if (__kmp_get_gtid() < 0) return -1; int gtid; @@ -613,7 +602,7 @@ OMPT_API_ROUTINE int ompt_get_partition_place_nums(int place_nums_size, #if !KMP_AFFINITY_SUPPORTED return 0; #else - if (!ompt_enabled.enabled || __kmp_get_gtid() < 0) + if (__kmp_get_gtid() < 0) return 0; int i, gtid, place_num, first_place, last_place, start, end; @@ -648,7 +637,7 @@ OMPT_API_ROUTINE int ompt_get_partition_place_nums(int place_nums_size, ****************************************************************************/ OMPT_API_ROUTINE int ompt_get_proc_id(void) { - if (!ompt_enabled.enabled || __kmp_get_gtid() < 0) + if (__kmp_get_gtid() < 0) return -1; #if KMP_OS_LINUX return sched_getcpu(); diff --git a/openmp/runtime/test/ompt/misc/api_calls_without_ompt.c b/openmp/runtime/test/ompt/misc/api_calls_without_ompt.c deleted file mode 100644 index 976c20f47f1e22..00000000000000 --- a/openmp/runtime/test/ompt/misc/api_calls_without_ompt.c +++ /dev/null @@ -1,148 +0,0 @@ -// RUN: %libomp-compile-and-run | FileCheck %s -// REQUIRES: ompt - -#define _BSD_SOURCE -#define _DEFAULT_SOURCE - -#include -#include -#include -#include - -static ompt_set_callback_t ompt_set_callback; -static ompt_get_callback_t ompt_get_callback; -static ompt_get_state_t ompt_get_state; -static ompt_get_task_info_t ompt_get_task_info; -static ompt_get_thread_data_t ompt_get_thread_data; -static ompt_get_parallel_info_t ompt_get_parallel_info; -static ompt_get_unique_id_t ompt_get_unique_id; -static ompt_get_num_procs_t ompt_get_num_procs; -static ompt_get_num_places_t ompt_get_num_places; -static ompt_get_place_proc_ids_t ompt_get_place_proc_ids; -static ompt_get_place_num_t ompt_get_place_num; -static ompt_get_partition_place_nums_t ompt_get_partition_place_nums; -static ompt_get_proc_id_t ompt_get_proc_id; -static ompt_enumerate_states_t ompt_enumerate_states; -static ompt_enumerate_mutex_impls_t ompt_enumerate_mutex_impls; - -int main() { - // Call OpenMP API function to force initialization of OMPT. - // (omp_get_thread_num() does not work because it just returns 0 if the - // runtime isn't initialized yet...) - omp_get_num_threads(); - - ompt_data_t *tdata = ompt_get_thread_data(); - uint64_t tvalue = tdata ? tdata->value : 0; - - printf("%" PRIu64 ": ompt_get_num_places()=%d\n", tvalue, - ompt_get_num_places()); - - printf("%" PRIu64 ": ompt_get_place_proc_ids()=%d\n", tvalue, - ompt_get_place_proc_ids(0, 0, NULL)); - - printf("%" PRIu64 ": ompt_get_place_num()=%d\n", tvalue, - ompt_get_place_num()); - - printf("%" PRIu64 ": ompt_get_partition_place_nums()=%d\n", tvalue, - ompt_get_partition_place_nums(0, NULL)); - - printf("%" PRIu64 ": ompt_get_proc_id()=%d\n", tvalue, ompt_get_proc_id()); - - printf("%" PRIu64 ": ompt_get_num_procs()=%d\n", tvalue, - ompt_get_num_procs()); - - ompt_callback_t callback; - printf("%" PRIu64 ": ompt_get_callback()=%d\n", tvalue, - ompt_get_callback(ompt_callback_thread_begin, &callback)); - - printf("%" PRIu64 ": ompt_get_state()=%d\n", tvalue, ompt_get_state(NULL)); - - int state = omp_state_undefined; - const char *state_name; - printf("%" PRIu64 ": ompt_enumerate_states()=%d\n", tvalue, - ompt_enumerate_states(state, &state, &state_name)); - - int impl = ompt_mutex_impl_unknown; - const char *impl_name; - printf("%" PRIu64 ": ompt_enumerate_mutex_impls()=%d\n", tvalue, - ompt_enumerate_mutex_impls(impl, &impl, &impl_name)); - - printf("%" PRIu64 ": ompt_get_thread_data()=%p\n", tvalue, - ompt_get_thread_data()); - - printf("%" PRIu64 ": ompt_get_parallel_info()=%d\n", tvalue, - ompt_get_parallel_info(0, NULL, NULL)); - - printf("%" PRIu64 ": ompt_get_task_info()=%d\n", tvalue, - ompt_get_task_info(0, NULL, NULL, NULL, NULL, NULL)); - - // Check if libomp supports the callbacks for this test. - - // CHECK: 0: NULL_POINTER=[[NULL:.*$]] - - // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_get_num_places()={{[0-9]+}} - - // CHECK: {{^}}[[MASTER_ID]]: ompt_get_place_proc_ids()={{[0-9]+}} - - // CHECK: {{^}}[[MASTER_ID]]: ompt_get_place_num()=-1 - - // CHECK: {{^}}[[MASTER_ID]]: ompt_get_partition_place_nums()=0 - - // CHECK: {{^}}[[MASTER_ID]]: ompt_get_proc_id()=-1 - - // CHECK: {{^}}[[MASTER_ID]]: ompt_get_num_procs()={{[0-9]+}} - - // CHECK: {{^}}[[MASTER_ID]]: ompt_get_callback()=0 - - // CHECK: {{^}}[[MASTER_ID]]: ompt_get_state()=0 - - // CHECK: {{^}}[[MASTER_ID]]: ompt_enumerate_states()=1 - - // CHECK: {{^}}[[MASTER_ID]]: ompt_enumerate_mutex_impls()=1 - - // CHECK: {{^}}[[MASTER_ID]]: ompt_get_thread_data()=[[NULL]] - - // CHECK: {{^}}[[MASTER_ID]]: ompt_get_parallel_info()=0 - - // CHECK: {{^}}[[MASTER_ID]]: ompt_get_task_info()=0 - - return 0; -} - -int ompt_initialize(ompt_function_lookup_t lookup, ompt_data_t *tool_data) { - ompt_set_callback = (ompt_set_callback_t)lookup("ompt_set_callback"); - ompt_get_callback = (ompt_get_callback_t)lookup("ompt_get_callback"); - ompt_get_state = (ompt_get_state_t)lookup("ompt_get_state"); - ompt_get_task_info = (ompt_get_task_info_t)lookup("ompt_get_task_info"); - ompt_get_thread_data = (ompt_get_thread_data_t)lookup("ompt_get_thread_data"); - ompt_get_parallel_info = - (ompt_get_parallel_info_t)lookup("ompt_get_parallel_info"); - ompt_get_unique_id = (ompt_get_unique_id_t)lookup("ompt_get_unique_id"); - - ompt_get_num_procs = (ompt_get_num_procs_t)lookup("ompt_get_num_procs"); - ompt_get_num_places = (ompt_get_num_places_t)lookup("ompt_get_num_places"); - ompt_get_place_proc_ids = - (ompt_get_place_proc_ids_t)lookup("ompt_get_place_proc_ids"); - ompt_get_place_num = (ompt_get_place_num_t)lookup("ompt_get_place_num"); - ompt_get_partition_place_nums = - (ompt_get_partition_place_nums_t)lookup("ompt_get_partition_place_nums"); - ompt_get_proc_id = (ompt_get_proc_id_t)lookup("ompt_get_proc_id"); - ompt_enumerate_states = - (ompt_enumerate_states_t)lookup("ompt_enumerate_states"); - ompt_enumerate_mutex_impls = - (ompt_enumerate_mutex_impls_t)lookup("ompt_enumerate_mutex_impls"); - - printf("0: NULL_POINTER=%p\n", (void *)NULL); - return 0; // no success -> OMPT not enabled -} - -void ompt_finalize(ompt_data_t *tool_data) { - printf("0: ompt_event_runtime_shutdown\n"); -} - -ompt_start_tool_result_t *ompt_start_tool(unsigned int omp_version, - const char *runtime_version) { - static ompt_start_tool_result_t ompt_start_tool_result = {&ompt_initialize, - &ompt_finalize, 0}; - return &ompt_start_tool_result; -} From 9d7c27063f0a4494d8e3b9b3e9eb7cc411d8e454 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Thu, 17 Jan 2019 13:12:00 +0000 Subject: [PATCH 005/125] Merging r351436: ------------------------------------------------------------------------ r351436 | hans | 2019-01-17 14:11:15 +0100 (Thu, 17 Jan 2019) | 1 line build_llvm_package.bat: Run more tests ------------------------------------------------------------------------ llvm-svn: 351437 --- llvm/utils/release/build_llvm_package.bat | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/llvm/utils/release/build_llvm_package.bat b/llvm/utils/release/build_llvm_package.bat index 51f425633a3d69..6a0b19b719605c 100755 --- a/llvm/utils/release/build_llvm_package.bat +++ b/llvm/utils/release/build_llvm_package.bat @@ -54,7 +54,7 @@ svn.exe export -r %revision% http://llvm.org/svn/llvm-project/lldb/%branch% llvm REM Setting CMAKE_CL_SHOWINCLUDES_PREFIX to work around PR27226. set cmake_flags=-DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_ASSERTIONS=ON -DLLVM_INSTALL_TOOLCHAIN_ONLY=ON -DCMAKE_INSTALL_UCRT_LIBRARIES=ON -DCLANG_FORMAT_VS_VERSION=%clang_format_vs_version% -DPACKAGE_VERSION=%package_version% -DLLDB_RELOCATABLE_PYTHON=1 -DLLDB_TEST_COMPILER=%cd%\build32_stage0\bin\clang.exe -DCMAKE_CL_SHOWINCLUDES_PREFIX="Note: including file: " -REM TODO: Run all tests, including lld and compiler-rt. +REM TODO: Run the "check-all" tests. set "VSCMD_START_DIR=%CD%" call "%vsdevcmd%" -arch=x86 @@ -66,7 +66,9 @@ REM Work around VS2017 bug by using MinSizeRel. cmake -GNinja %cmake_flags% -DPYTHON_HOME=%python32_dir% -DCMAKE_BUILD_TYPE=MinSizeRel ..\llvm || exit /b ninja all || ninja all || ninja all || exit /b ninja check || ninja check || ninja check || exit /b -ninja check-clang || ninja check-clang || ninja check-clang || exit /b +ninja check-clang || ninja check-clang || ninja check-clang || exit /b +ninja check-lld || ninja check-lld || ninja check-lld || exit /b +ninja check-sanitizer || ninja check-sanitizer || ninja check-sanitizer || exit /b cd.. mkdir build32 @@ -76,7 +78,9 @@ set CXX=..\build32_stage0\bin\clang-cl cmake -GNinja %cmake_flags% -DPYTHON_HOME=%python32_dir% ..\llvm || exit /b ninja all || ninja all || ninja all || exit /b ninja check || ninja check || ninja check || exit /b -ninja check-clang || ninja check-clang || ninja check-clang || exit /b +ninja check-clang || ninja check-clang || ninja check-clang || exit /b +ninja check-lld || ninja check-lld || ninja check-lld || exit /b +ninja check-sanitizer || ninja check-sanitizer || ninja check-sanitizer || exit /b ninja package || exit /b cd .. @@ -101,7 +105,9 @@ REM Work around VS2017 bug by using MinSizeRel. cmake -GNinja %cmake_flags% -DPYTHON_HOME=%python64_dir% -DCMAKE_BUILD_TYPE=MinSizeRel ..\llvm || exit /b ninja all || ninja all || ninja all || exit /b ninja check || ninja check || ninja check || exit /b -ninja check-clang || ninja check-clang || ninja check-clang || exit /b +ninja check-clang || ninja check-clang || ninja check-clang || exit /b +ninja check-lld || ninja check-lld || ninja check-lld || exit /b +ninja check-sanitizer || ninja check-sanitizer || ninja check-sanitizer || exit /b cd.. mkdir build64 @@ -111,6 +117,8 @@ set CXX=..\build64_stage0\bin\clang-cl cmake -GNinja %cmake_flags% -DPYTHON_HOME=%python64_dir% ..\llvm || exit /b ninja all || ninja all || ninja all || exit /b ninja check || ninja check || ninja check || exit /b -ninja check-clang || ninja check-clang || ninja check-clang || exit /b +ninja check-clang || ninja check-clang || ninja check-clang || exit /b +ninja check-lld || ninja check-lld || ninja check-lld || exit /b +ninja check-sanitizer || ninja check-sanitizer || ninja check-sanitizer || exit /b ninja package || exit /b cd .. From 2b727e7e3619782bb4f7a0a342f154d3418034eb Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Thu, 17 Jan 2019 13:26:58 +0000 Subject: [PATCH 006/125] Merging r351349: ------------------------------------------------------------------------ r351349 | abataev | 2019-01-16 16:39:52 +0100 (Wed, 16 Jan 2019) | 14 lines [SLP] Fix PR40310: The reduction nodes should stay scalar. Summary: Sometimes the SLP vectorizer tries to vectorize the horizontal reduction nodes during regular vectorization. This may happen inside of the loops, when there are some vectorizable PHIs. Patch fixes this by checking if the node is the reduction node and thus it must not be vectorized, it must be gathered. Reviewers: RKSimon, spatel, hfinkel, fedor.sergeev Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D56783 ------------------------------------------------------------------------ llvm-svn: 351440 --- .../Transforms/Vectorize/SLPVectorizer.cpp | 3 +- .../Transforms/SLPVectorizer/X86/PR39774.ll | 266 ++++++++++++------ .../Transforms/SLPVectorizer/X86/PR40310.ll | 18 +- 3 files changed, 188 insertions(+), 99 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 2e856a7e6802ed..a07fffe9b98b9a 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1468,8 +1468,9 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, // If any of the scalars is marked as a value that needs to stay scalar, then // we need to gather the scalars. + // The reduction nodes (stored in UserIgnoreList) also should stay scalar. for (unsigned i = 0, e = VL.size(); i != e; ++i) { - if (MustGather.count(VL[i])) { + if (MustGather.count(VL[i]) || is_contained(UserIgnoreList, VL[i])) { LLVM_DEBUG(dbgs() << "SLP: Gathering due to gathered scalar.\n"); newTreeEntry(VL, false, UserTreeIdx); return; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll index 8405117090b46f..67717a54659c36 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll @@ -3,93 +3,185 @@ ; RUN: opt -slp-vectorizer -S < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake -slp-threshold=-8 -slp-min-tree-size=6 | FileCheck %s --check-prefixes=ALL,FORCE_REDUCTION define void @Test(i32) { -; ALL-LABEL: @Test( -; ALL-NEXT: entry: -; ALL-NEXT: br label [[LOOP:%.*]] -; ALL: loop: -; ALL-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP11:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ] -; ALL-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <8 x i32> -; ALL-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[SHUFFLE]], i32 1 -; ALL-NEXT: [[TMP3:%.*]] = add <8 x i32> , [[SHUFFLE]] -; ALL-NEXT: [[VAL_1:%.*]] = and i32 [[TMP2]], undef -; ALL-NEXT: [[VAL_2:%.*]] = and i32 [[VAL_1]], [[TMP0:%.*]] -; ALL-NEXT: [[VAL_3:%.*]] = and i32 [[VAL_2]], [[TMP0]] -; ALL-NEXT: [[VAL_4:%.*]] = and i32 [[VAL_3]], [[TMP0]] -; ALL-NEXT: [[VAL_5:%.*]] = and i32 [[VAL_4]], [[TMP0]] -; ALL-NEXT: [[VAL_7:%.*]] = and i32 [[VAL_5]], undef -; ALL-NEXT: [[VAL_8:%.*]] = and i32 [[VAL_7]], [[TMP0]] -; ALL-NEXT: [[VAL_9:%.*]] = and i32 [[VAL_8]], [[TMP0]] -; ALL-NEXT: [[VAL_10:%.*]] = and i32 [[VAL_9]], [[TMP0]] -; ALL-NEXT: [[VAL_12:%.*]] = and i32 [[VAL_10]], undef -; ALL-NEXT: [[VAL_13:%.*]] = and i32 [[VAL_12]], [[TMP0]] -; ALL-NEXT: [[VAL_14:%.*]] = and i32 [[VAL_13]], [[TMP0]] -; ALL-NEXT: [[VAL_15:%.*]] = and i32 [[VAL_14]], [[TMP0]] -; ALL-NEXT: [[VAL_16:%.*]] = and i32 [[VAL_15]], [[TMP0]] -; ALL-NEXT: [[VAL_17:%.*]] = and i32 [[VAL_16]], [[TMP0]] -; ALL-NEXT: [[VAL_19:%.*]] = and i32 [[VAL_17]], undef -; ALL-NEXT: [[VAL_21:%.*]] = and i32 [[VAL_19]], undef -; ALL-NEXT: [[VAL_22:%.*]] = and i32 [[VAL_21]], [[TMP0]] -; ALL-NEXT: [[VAL_23:%.*]] = and i32 [[VAL_22]], [[TMP0]] -; ALL-NEXT: [[VAL_24:%.*]] = and i32 [[VAL_23]], [[TMP0]] -; ALL-NEXT: [[VAL_25:%.*]] = and i32 [[VAL_24]], [[TMP0]] -; ALL-NEXT: [[VAL_26:%.*]] = and i32 [[VAL_25]], [[TMP0]] -; ALL-NEXT: [[VAL_27:%.*]] = and i32 [[VAL_26]], [[TMP0]] -; ALL-NEXT: [[VAL_28:%.*]] = and i32 [[VAL_27]], [[TMP0]] -; ALL-NEXT: [[VAL_29:%.*]] = and i32 [[VAL_28]], [[TMP0]] -; ALL-NEXT: [[VAL_30:%.*]] = and i32 [[VAL_29]], [[TMP0]] -; ALL-NEXT: [[VAL_31:%.*]] = and i32 [[VAL_30]], [[TMP0]] -; ALL-NEXT: [[VAL_32:%.*]] = and i32 [[VAL_31]], [[TMP0]] -; ALL-NEXT: [[VAL_33:%.*]] = and i32 [[VAL_32]], [[TMP0]] -; ALL-NEXT: [[VAL_35:%.*]] = and i32 [[VAL_33]], undef -; ALL-NEXT: [[VAL_36:%.*]] = and i32 [[VAL_35]], [[TMP0]] -; ALL-NEXT: [[VAL_37:%.*]] = and i32 [[VAL_36]], [[TMP0]] -; ALL-NEXT: [[VAL_38:%.*]] = and i32 [[VAL_37]], [[TMP0]] -; ALL-NEXT: [[VAL_40:%.*]] = and i32 [[VAL_38]], undef -; ALL-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> undef, i32 [[VAL_40]], i32 0 -; ALL-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP4]], i32 [[TMP2]], i32 1 -; ALL-NEXT: [[TMP6:%.*]] = extractelement <8 x i32> [[TMP3]], i32 7 -; ALL-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> undef, i32 [[TMP6]], i32 0 -; ALL-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> [[TMP7]], i32 14910, i32 1 -; ALL-NEXT: [[TMP9:%.*]] = and <2 x i32> [[TMP5]], [[TMP8]] -; ALL-NEXT: [[TMP10:%.*]] = add <2 x i32> [[TMP5]], [[TMP8]] -; ALL-NEXT: [[TMP11]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> -; ALL-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> undef, <8 x i32> -; ALL-NEXT: [[BIN_RDX:%.*]] = and <8 x i32> [[TMP3]], [[RDX_SHUF]] -; ALL-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> -; ALL-NEXT: [[BIN_RDX2:%.*]] = and <8 x i32> [[BIN_RDX]], [[RDX_SHUF1]] -; ALL-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> -; ALL-NEXT: [[BIN_RDX4:%.*]] = and <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] -; ALL-NEXT: [[TMP12:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0 -; ALL-NEXT: [[OP_EXTRA:%.*]] = and i32 [[TMP12]], [[TMP0]] -; ALL-NEXT: [[OP_EXTRA5:%.*]] = and i32 [[OP_EXTRA]], [[TMP0]] -; ALL-NEXT: [[OP_EXTRA6:%.*]] = and i32 [[OP_EXTRA5]], [[TMP0]] -; ALL-NEXT: [[OP_EXTRA7:%.*]] = and i32 [[OP_EXTRA6]], [[TMP0]] -; ALL-NEXT: [[OP_EXTRA8:%.*]] = and i32 [[OP_EXTRA7]], [[TMP0]] -; ALL-NEXT: [[OP_EXTRA9:%.*]] = and i32 [[OP_EXTRA8]], [[TMP0]] -; ALL-NEXT: [[OP_EXTRA10:%.*]] = and i32 [[OP_EXTRA9]], [[TMP0]] -; ALL-NEXT: [[OP_EXTRA11:%.*]] = and i32 [[OP_EXTRA10]], [[TMP0]] -; ALL-NEXT: [[OP_EXTRA12:%.*]] = and i32 [[OP_EXTRA11]], [[TMP0]] -; ALL-NEXT: [[OP_EXTRA13:%.*]] = and i32 [[OP_EXTRA12]], [[TMP0]] -; ALL-NEXT: [[OP_EXTRA14:%.*]] = and i32 [[OP_EXTRA13]], [[TMP0]] -; ALL-NEXT: [[OP_EXTRA15:%.*]] = and i32 [[OP_EXTRA14]], [[TMP0]] -; ALL-NEXT: [[OP_EXTRA16:%.*]] = and i32 [[OP_EXTRA15]], [[TMP0]] -; ALL-NEXT: [[OP_EXTRA17:%.*]] = and i32 [[OP_EXTRA16]], [[TMP0]] -; ALL-NEXT: [[OP_EXTRA18:%.*]] = and i32 [[OP_EXTRA17]], [[TMP0]] -; ALL-NEXT: [[OP_EXTRA19:%.*]] = and i32 [[OP_EXTRA18]], [[TMP0]] -; ALL-NEXT: [[OP_EXTRA20:%.*]] = and i32 [[OP_EXTRA19]], [[TMP0]] -; ALL-NEXT: [[OP_EXTRA21:%.*]] = and i32 [[OP_EXTRA20]], [[TMP0]] -; ALL-NEXT: [[OP_EXTRA22:%.*]] = and i32 [[OP_EXTRA21]], [[TMP0]] -; ALL-NEXT: [[OP_EXTRA23:%.*]] = and i32 [[OP_EXTRA22]], [[TMP0]] -; ALL-NEXT: [[OP_EXTRA24:%.*]] = and i32 [[OP_EXTRA23]], [[TMP0]] -; ALL-NEXT: [[OP_EXTRA25:%.*]] = and i32 [[OP_EXTRA24]], [[TMP0]] -; ALL-NEXT: [[OP_EXTRA26:%.*]] = and i32 [[OP_EXTRA25]], [[TMP0]] -; ALL-NEXT: [[OP_EXTRA27:%.*]] = and i32 [[OP_EXTRA26]], [[TMP0]] -; ALL-NEXT: [[OP_EXTRA28:%.*]] = and i32 [[OP_EXTRA27]], [[TMP0]] -; ALL-NEXT: [[OP_EXTRA29:%.*]] = and i32 [[OP_EXTRA28]], [[TMP0]] -; ALL-NEXT: [[OP_EXTRA30:%.*]] = and i32 [[OP_EXTRA29]], [[TMP0]] -; ALL-NEXT: [[OP_EXTRA31:%.*]] = and i32 [[OP_EXTRA30]], [[TMP2]] -; ALL-NEXT: [[TMP13:%.*]] = extractelement <2 x i32> [[TMP11]], i32 0 -; ALL-NEXT: br label [[LOOP]] +; CHECK-LABEL: @Test( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP15:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[SHUFFLE]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> , [[SHUFFLE]] +; CHECK-NEXT: [[VAL_1:%.*]] = and i32 [[TMP2]], undef +; CHECK-NEXT: [[VAL_2:%.*]] = and i32 [[VAL_1]], [[TMP0:%.*]] +; CHECK-NEXT: [[VAL_3:%.*]] = and i32 [[VAL_2]], [[TMP0]] +; CHECK-NEXT: [[VAL_4:%.*]] = and i32 [[VAL_3]], [[TMP0]] +; CHECK-NEXT: [[VAL_5:%.*]] = and i32 [[VAL_4]], [[TMP0]] +; CHECK-NEXT: [[VAL_7:%.*]] = and i32 [[VAL_5]], undef +; CHECK-NEXT: [[VAL_8:%.*]] = and i32 [[VAL_7]], [[TMP0]] +; CHECK-NEXT: [[VAL_9:%.*]] = and i32 [[VAL_8]], [[TMP0]] +; CHECK-NEXT: [[VAL_10:%.*]] = and i32 [[VAL_9]], [[TMP0]] +; CHECK-NEXT: [[VAL_12:%.*]] = and i32 [[VAL_10]], undef +; CHECK-NEXT: [[VAL_13:%.*]] = and i32 [[VAL_12]], [[TMP0]] +; CHECK-NEXT: [[VAL_14:%.*]] = and i32 [[VAL_13]], [[TMP0]] +; CHECK-NEXT: [[VAL_15:%.*]] = and i32 [[VAL_14]], [[TMP0]] +; CHECK-NEXT: [[VAL_16:%.*]] = and i32 [[VAL_15]], [[TMP0]] +; CHECK-NEXT: [[VAL_17:%.*]] = and i32 [[VAL_16]], [[TMP0]] +; CHECK-NEXT: [[VAL_19:%.*]] = and i32 [[VAL_17]], undef +; CHECK-NEXT: [[VAL_21:%.*]] = and i32 [[VAL_19]], undef +; CHECK-NEXT: [[VAL_22:%.*]] = and i32 [[VAL_21]], [[TMP0]] +; CHECK-NEXT: [[VAL_23:%.*]] = and i32 [[VAL_22]], [[TMP0]] +; CHECK-NEXT: [[VAL_24:%.*]] = and i32 [[VAL_23]], [[TMP0]] +; CHECK-NEXT: [[VAL_25:%.*]] = and i32 [[VAL_24]], [[TMP0]] +; CHECK-NEXT: [[VAL_26:%.*]] = and i32 [[VAL_25]], [[TMP0]] +; CHECK-NEXT: [[VAL_27:%.*]] = and i32 [[VAL_26]], [[TMP0]] +; CHECK-NEXT: [[VAL_28:%.*]] = and i32 [[VAL_27]], [[TMP0]] +; CHECK-NEXT: [[VAL_29:%.*]] = and i32 [[VAL_28]], [[TMP0]] +; CHECK-NEXT: [[VAL_30:%.*]] = and i32 [[VAL_29]], [[TMP0]] +; CHECK-NEXT: [[VAL_31:%.*]] = and i32 [[VAL_30]], [[TMP0]] +; CHECK-NEXT: [[VAL_32:%.*]] = and i32 [[VAL_31]], [[TMP0]] +; CHECK-NEXT: [[VAL_33:%.*]] = and i32 [[VAL_32]], [[TMP0]] +; CHECK-NEXT: [[VAL_35:%.*]] = and i32 [[VAL_33]], undef +; CHECK-NEXT: [[VAL_36:%.*]] = and i32 [[VAL_35]], [[TMP0]] +; CHECK-NEXT: [[VAL_37:%.*]] = and i32 [[VAL_36]], [[TMP0]] +; CHECK-NEXT: [[VAL_38:%.*]] = and i32 [[VAL_37]], [[TMP0]] +; CHECK-NEXT: [[VAL_40:%.*]] = and i32 [[VAL_38]], undef +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> undef, <8 x i32> +; CHECK-NEXT: [[BIN_RDX:%.*]] = and <8 x i32> [[TMP3]], [[RDX_SHUF]] +; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> +; CHECK-NEXT: [[BIN_RDX2:%.*]] = and <8 x i32> [[BIN_RDX]], [[RDX_SHUF1]] +; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> +; CHECK-NEXT: [[BIN_RDX4:%.*]] = and <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0 +; CHECK-NEXT: [[OP_EXTRA:%.*]] = and i32 [[TMP4]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA5:%.*]] = and i32 [[OP_EXTRA]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA6:%.*]] = and i32 [[OP_EXTRA5]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA7:%.*]] = and i32 [[OP_EXTRA6]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA8:%.*]] = and i32 [[OP_EXTRA7]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA9:%.*]] = and i32 [[OP_EXTRA8]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA10:%.*]] = and i32 [[OP_EXTRA9]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA11:%.*]] = and i32 [[OP_EXTRA10]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA12:%.*]] = and i32 [[OP_EXTRA11]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA13:%.*]] = and i32 [[OP_EXTRA12]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA14:%.*]] = and i32 [[OP_EXTRA13]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA15:%.*]] = and i32 [[OP_EXTRA14]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA16:%.*]] = and i32 [[OP_EXTRA15]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA17:%.*]] = and i32 [[OP_EXTRA16]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA18:%.*]] = and i32 [[OP_EXTRA17]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA19:%.*]] = and i32 [[OP_EXTRA18]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA20:%.*]] = and i32 [[OP_EXTRA19]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA21:%.*]] = and i32 [[OP_EXTRA20]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA22:%.*]] = and i32 [[OP_EXTRA21]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA23:%.*]] = and i32 [[OP_EXTRA22]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA24:%.*]] = and i32 [[OP_EXTRA23]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA25:%.*]] = and i32 [[OP_EXTRA24]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA26:%.*]] = and i32 [[OP_EXTRA25]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA27:%.*]] = and i32 [[OP_EXTRA26]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA28:%.*]] = and i32 [[OP_EXTRA27]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA29:%.*]] = and i32 [[OP_EXTRA28]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA30:%.*]] = and i32 [[OP_EXTRA29]], [[TMP0]] +; CHECK-NEXT: [[VAL_42:%.*]] = and i32 [[VAL_40]], undef +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> undef, i32 [[OP_EXTRA30]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[TMP2]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> undef, i32 [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> [[TMP7]], i32 14910, i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = and <2 x i32> [[TMP6]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = add <2 x i32> [[TMP6]], [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i32> [[TMP11]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> undef, i32 [[TMP12]], i32 0 +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i32> [[TMP11]], i32 1 +; CHECK-NEXT: [[TMP15]] = insertelement <2 x i32> [[TMP13]], i32 [[TMP14]], i32 1 +; CHECK-NEXT: br label [[LOOP]] +; +; FORCE_REDUCTION-LABEL: @Test( +; FORCE_REDUCTION-NEXT: entry: +; FORCE_REDUCTION-NEXT: br label [[LOOP:%.*]] +; FORCE_REDUCTION: loop: +; FORCE_REDUCTION-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP13:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ] +; FORCE_REDUCTION-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <4 x i32> +; FORCE_REDUCTION-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 1 +; FORCE_REDUCTION-NEXT: [[TMP3:%.*]] = add <4 x i32> , [[SHUFFLE]] +; FORCE_REDUCTION-NEXT: [[VAL_1:%.*]] = and i32 [[TMP2]], undef +; FORCE_REDUCTION-NEXT: [[VAL_2:%.*]] = and i32 [[VAL_1]], [[TMP0:%.*]] +; FORCE_REDUCTION-NEXT: [[VAL_3:%.*]] = and i32 [[VAL_2]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_4:%.*]] = and i32 [[VAL_3]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_5:%.*]] = and i32 [[VAL_4]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_7:%.*]] = and i32 [[VAL_5]], undef +; FORCE_REDUCTION-NEXT: [[VAL_8:%.*]] = and i32 [[VAL_7]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_9:%.*]] = and i32 [[VAL_8]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_10:%.*]] = and i32 [[VAL_9]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_12:%.*]] = and i32 [[VAL_10]], undef +; FORCE_REDUCTION-NEXT: [[VAL_13:%.*]] = and i32 [[VAL_12]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_14:%.*]] = and i32 [[VAL_13]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_15:%.*]] = and i32 [[VAL_14]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_16:%.*]] = and i32 [[VAL_15]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_17:%.*]] = and i32 [[VAL_16]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_19:%.*]] = and i32 [[VAL_17]], undef +; FORCE_REDUCTION-NEXT: [[VAL_20:%.*]] = add i32 [[TMP2]], 1496 +; FORCE_REDUCTION-NEXT: [[VAL_21:%.*]] = and i32 [[VAL_19]], [[VAL_20]] +; FORCE_REDUCTION-NEXT: [[VAL_22:%.*]] = and i32 [[VAL_21]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_23:%.*]] = and i32 [[VAL_22]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_24:%.*]] = and i32 [[VAL_23]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_25:%.*]] = and i32 [[VAL_24]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_26:%.*]] = and i32 [[VAL_25]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_27:%.*]] = and i32 [[VAL_26]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_28:%.*]] = and i32 [[VAL_27]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_29:%.*]] = and i32 [[VAL_28]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_30:%.*]] = and i32 [[VAL_29]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_31:%.*]] = and i32 [[VAL_30]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_32:%.*]] = and i32 [[VAL_31]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_33:%.*]] = and i32 [[VAL_32]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_34:%.*]] = add i32 [[TMP2]], 8555 +; FORCE_REDUCTION-NEXT: [[VAL_35:%.*]] = and i32 [[VAL_33]], [[VAL_34]] +; FORCE_REDUCTION-NEXT: [[VAL_36:%.*]] = and i32 [[VAL_35]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_37:%.*]] = and i32 [[VAL_36]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> undef, <4 x i32> +; FORCE_REDUCTION-NEXT: [[BIN_RDX:%.*]] = and <4 x i32> [[TMP3]], [[RDX_SHUF]] +; FORCE_REDUCTION-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> +; FORCE_REDUCTION-NEXT: [[BIN_RDX2:%.*]] = and <4 x i32> [[BIN_RDX]], [[RDX_SHUF1]] +; FORCE_REDUCTION-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0 +; FORCE_REDUCTION-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], [[VAL_20]] +; FORCE_REDUCTION-NEXT: [[TMP6:%.*]] = and i32 [[TMP5]], [[VAL_34]] +; FORCE_REDUCTION-NEXT: [[OP_EXTRA:%.*]] = and i32 [[TMP6]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[OP_EXTRA3:%.*]] = and i32 [[OP_EXTRA]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[OP_EXTRA4:%.*]] = and i32 [[OP_EXTRA3]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[OP_EXTRA5:%.*]] = and i32 [[OP_EXTRA4]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[OP_EXTRA6:%.*]] = and i32 [[OP_EXTRA5]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[OP_EXTRA7:%.*]] = and i32 [[OP_EXTRA6]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[OP_EXTRA8:%.*]] = and i32 [[OP_EXTRA7]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[OP_EXTRA9:%.*]] = and i32 [[OP_EXTRA8]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[OP_EXTRA10:%.*]] = and i32 [[OP_EXTRA9]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[OP_EXTRA11:%.*]] = and i32 [[OP_EXTRA10]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[OP_EXTRA12:%.*]] = and i32 [[OP_EXTRA11]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[OP_EXTRA13:%.*]] = and i32 [[OP_EXTRA12]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[OP_EXTRA14:%.*]] = and i32 [[OP_EXTRA13]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[OP_EXTRA15:%.*]] = and i32 [[OP_EXTRA14]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[OP_EXTRA16:%.*]] = and i32 [[OP_EXTRA15]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[OP_EXTRA17:%.*]] = and i32 [[OP_EXTRA16]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[OP_EXTRA18:%.*]] = and i32 [[OP_EXTRA17]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[OP_EXTRA19:%.*]] = and i32 [[OP_EXTRA18]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[OP_EXTRA20:%.*]] = and i32 [[OP_EXTRA19]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[OP_EXTRA21:%.*]] = and i32 [[OP_EXTRA20]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[OP_EXTRA22:%.*]] = and i32 [[OP_EXTRA21]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[OP_EXTRA23:%.*]] = and i32 [[OP_EXTRA22]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[OP_EXTRA24:%.*]] = and i32 [[OP_EXTRA23]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[OP_EXTRA25:%.*]] = and i32 [[OP_EXTRA24]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[OP_EXTRA26:%.*]] = and i32 [[OP_EXTRA25]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[OP_EXTRA27:%.*]] = and i32 [[OP_EXTRA26]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[OP_EXTRA28:%.*]] = and i32 [[OP_EXTRA27]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[OP_EXTRA29:%.*]] = and i32 [[OP_EXTRA28]], [[TMP2]] +; FORCE_REDUCTION-NEXT: [[VAL_38:%.*]] = and i32 [[VAL_37]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_39:%.*]] = add i32 [[TMP2]], 12529 +; FORCE_REDUCTION-NEXT: [[VAL_40:%.*]] = and i32 [[OP_EXTRA29]], [[VAL_39]] +; FORCE_REDUCTION-NEXT: [[VAL_41:%.*]] = add i32 [[TMP2]], 13685 +; FORCE_REDUCTION-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> undef, i32 [[VAL_40]], i32 0 +; FORCE_REDUCTION-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP2]], i32 1 +; FORCE_REDUCTION-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> undef, i32 [[VAL_41]], i32 0 +; FORCE_REDUCTION-NEXT: [[TMP10:%.*]] = insertelement <2 x i32> [[TMP9]], i32 14910, i32 1 +; FORCE_REDUCTION-NEXT: [[TMP11:%.*]] = and <2 x i32> [[TMP8]], [[TMP10]] +; FORCE_REDUCTION-NEXT: [[TMP12:%.*]] = add <2 x i32> [[TMP8]], [[TMP10]] +; FORCE_REDUCTION-NEXT: [[TMP13]] = shufflevector <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> +; FORCE_REDUCTION-NEXT: br label [[LOOP]] ; entry: br label %loop diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR40310.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR40310.ll index 74e62e0e4ba2da..ad1434146a5b36 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/PR40310.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR40310.ll @@ -7,7 +7,7 @@ define void @mainTest(i32 %param, i32 * %vals, i32 %len) { ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> , i32 [[PARAM:%.*]], i32 1 ; CHECK-NEXT: br label [[BCI_15:%.*]] ; CHECK: bci_15: -; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP11:%.*]], [[BCI_15]] ], [ [[TMP0]], [[BCI_15_PREHEADER:%.*]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP7:%.*]], [[BCI_15]] ], [ [[TMP0]], [[BCI_15_PREHEADER:%.*]] ] ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <16 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <16 x i32> [[SHUFFLE]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <16 x i32> [[SHUFFLE]], i32 15 @@ -28,13 +28,6 @@ define void @mainTest(i32 %param, i32 * %vals, i32 %len) { ; CHECK-NEXT: [[V38:%.*]] = and i32 undef, [[V36]] ; CHECK-NEXT: [[V40:%.*]] = and i32 undef, [[V38]] ; CHECK-NEXT: [[V42:%.*]] = and i32 undef, [[V40]] -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> undef, i32 [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <16 x i32> [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[TMP6]], i32 1 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> , i32 [[V42]], i32 1 -; CHECK-NEXT: [[TMP9:%.*]] = add <2 x i32> [[TMP7]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = and <2 x i32> [[TMP7]], [[TMP8]] -; CHECK-NEXT: [[TMP11]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x i32> [[TMP4]], <16 x i32> undef, <16 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = and <16 x i32> [[TMP4]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x i32> [[BIN_RDX]], <16 x i32> undef, <16 x i32> @@ -43,9 +36,12 @@ define void @mainTest(i32 %param, i32 * %vals, i32 %len) { ; CHECK-NEXT: [[BIN_RDX4:%.*]] = and <16 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] ; CHECK-NEXT: [[RDX_SHUF5:%.*]] = shufflevector <16 x i32> [[BIN_RDX4]], <16 x i32> undef, <16 x i32> ; CHECK-NEXT: [[BIN_RDX6:%.*]] = and <16 x i32> [[BIN_RDX4]], [[RDX_SHUF5]] -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <16 x i32> [[BIN_RDX6]], i32 0 -; CHECK-NEXT: [[OP_EXTRA:%.*]] = and i32 [[TMP12]], [[TMP2]] -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i32> [[TMP11]], i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <16 x i32> [[BIN_RDX6]], i32 0 +; CHECK-NEXT: [[OP_EXTRA:%.*]] = and i32 [[TMP5]], [[TMP2]] +; CHECK-NEXT: [[V43:%.*]] = and i32 undef, [[V42]] +; CHECK-NEXT: [[V44:%.*]] = add i32 [[TMP2]], 16 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> undef, i32 [[V44]], i32 0 +; CHECK-NEXT: [[TMP7]] = insertelement <2 x i32> [[TMP6]], i32 [[OP_EXTRA]], i32 1 ; CHECK-NEXT: br i1 true, label [[BCI_15]], label [[LOOPEXIT:%.*]] ; CHECK: loopexit: ; CHECK-NEXT: ret void From ec0bc4cc340139b8787236bc18daeebaa9701ded Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Thu, 17 Jan 2019 13:31:58 +0000 Subject: [PATCH 007/125] Merging r351344: ------------------------------------------------------------------------ r351344 | asl | 2019-01-16 14:44:01 +0100 (Wed, 16 Jan 2019) | 10 lines [MSP430] Improve support of 'interrupt' attribute * Accept as an argument constants in range 0..63 (aligned with TI headers and linker scripts provided with TI GCC toolchain). * Emit function attribute 'interrupt'='xx' instead of aliases (used in the backend to create a section for particular interrupt vector). * Add more diagnostics. Patch by Kristina Bessonova! Differential Revision: https://reviews.llvm.org/D56663 ------------------------------------------------------------------------ llvm-svn: 351441 --- .../clang/Basic/DiagnosticSemaKinds.td | 4 +++ clang/lib/CodeGen/TargetInfo.cpp | 22 +++++++-------- clang/lib/Sema/SemaDeclAttr.cpp | 27 ++++++++++++++++--- clang/test/CodeGen/attr-msp430.c | 10 +++++++ clang/test/Sema/attr-msp430.c | 11 ++++++-- 5 files changed, 56 insertions(+), 18 deletions(-) create mode 100644 clang/test/CodeGen/attr-msp430.c diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 5feb877e46c51d..b71f65d146cab5 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -274,6 +274,10 @@ def warn_riscv_interrupt_attribute : Warning< "RISC-V 'interrupt' attribute only applies to functions that have " "%select{no parameters|a 'void' return type}0">, InGroup; +def warn_msp430_interrupt_attribute : Warning< + "MSP430 'interrupt' attribute only applies to functions that have " + "%select{no parameters|a 'void' return type}0">, + InGroup; def warn_unused_parameter : Warning<"unused parameter %0">, InGroup, DefaultIgnore; def warn_unused_variable : Warning<"unused variable %0">, diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index 89ec73670a7350..f5a770ed9d8444 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -6774,21 +6774,19 @@ void MSP430TargetCodeGenInfo::setTargetAttributes( if (GV->isDeclaration()) return; if (const FunctionDecl *FD = dyn_cast_or_null(D)) { - if (const MSP430InterruptAttr *attr = FD->getAttr()) { - // Handle 'interrupt' attribute: - llvm::Function *F = cast(GV); + const auto *InterruptAttr = FD->getAttr(); + if (!InterruptAttr) + return; - // Step 1: Set ISR calling convention. - F->setCallingConv(llvm::CallingConv::MSP430_INTR); + // Handle 'interrupt' attribute: + llvm::Function *F = cast(GV); - // Step 2: Add attributes goodness. - F->addFnAttr(llvm::Attribute::NoInline); + // Step 1: Set ISR calling convention. + F->setCallingConv(llvm::CallingConv::MSP430_INTR); - // Step 3: Emit ISR vector alias. - unsigned Num = attr->getNumber() / 2; - llvm::GlobalAlias::create(llvm::Function::ExternalLinkage, - "__isr_" + Twine(Num), F); - } + // Step 2: Add attributes goodness. + F->addFnAttr(llvm::Attribute::NoInline); + F->addFnAttr("interrupt", llvm::utostr(InterruptAttr->getNumber())); } } diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 0e10804a2ec761..eb95cc748b177a 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -5377,6 +5377,27 @@ static void handleARMInterruptAttr(Sema &S, Decl *D, const ParsedAttr &AL) { } static void handleMSP430InterruptAttr(Sema &S, Decl *D, const ParsedAttr &AL) { + // MSP430 'interrupt' attribute is applied to + // a function with no parameters and void return type. + if (!isFunctionOrMethod(D)) { + S.Diag(D->getLocation(), diag::warn_attribute_wrong_decl_type) + << "'interrupt'" << ExpectedFunctionOrMethod; + return; + } + + if (hasFunctionProto(D) && getFunctionOrMethodNumParams(D) != 0) { + S.Diag(D->getLocation(), diag::warn_msp430_interrupt_attribute) + << 0; + return; + } + + if (!getFunctionOrMethodResultType(D)->isVoidType()) { + S.Diag(D->getLocation(), diag::warn_msp430_interrupt_attribute) + << 1; + return; + } + + // The attribute takes one integer argument. if (!checkAttributeNumArgs(S, AL, 1)) return; @@ -5386,8 +5407,6 @@ static void handleMSP430InterruptAttr(Sema &S, Decl *D, const ParsedAttr &AL) { return; } - // FIXME: Check for decl - it should be void ()(void). - Expr *NumParamsExpr = static_cast(AL.getArgAsExpr(0)); llvm::APSInt NumParams(32); if (!NumParamsExpr->isIntegerConstantExpr(NumParams, S.Context)) { @@ -5396,9 +5415,9 @@ static void handleMSP430InterruptAttr(Sema &S, Decl *D, const ParsedAttr &AL) { << NumParamsExpr->getSourceRange(); return; } - + // The argument should be in range 0..63. unsigned Num = NumParams.getLimitedValue(255); - if ((Num & 1) || Num > 30) { + if (Num > 63) { S.Diag(AL.getLoc(), diag::err_attribute_argument_out_of_bounds) << AL << (int)NumParams.getSExtValue() << NumParamsExpr->getSourceRange(); diff --git a/clang/test/CodeGen/attr-msp430.c b/clang/test/CodeGen/attr-msp430.c new file mode 100644 index 00000000000000..e8b6d0d0fa3ea7 --- /dev/null +++ b/clang/test/CodeGen/attr-msp430.c @@ -0,0 +1,10 @@ +// RUN: %clang_cc1 -triple msp430-unknown-unknown -emit-llvm < %s| FileCheck %s + +__attribute__((interrupt(1))) void foo(void) {} +// CHECK: @llvm.used +// CHECK-SAME: @foo + +// CHECK: define msp430_intrcc void @foo() #0 +// CHECK: attributes #0 +// CHECK-SAME: noinline +// CHECK-SAME: "interrupt"="1" diff --git a/clang/test/Sema/attr-msp430.c b/clang/test/Sema/attr-msp430.c index 26b2d8fcfd6533..4b38d09b86757c 100644 --- a/clang/test/Sema/attr-msp430.c +++ b/clang/test/Sema/attr-msp430.c @@ -1,6 +1,13 @@ // RUN: %clang_cc1 -triple msp430-unknown-unknown -fsyntax-only -verify %s +__attribute__((interrupt(1))) int t; // expected-warning {{'interrupt' attribute only applies to functions}} + int i; -void f(void) __attribute__((interrupt(i))); /* expected-error {{'interrupt' attribute requires an integer constant}} */ +__attribute__((interrupt(i))) void f(void); // expected-error {{'interrupt' attribute requires an integer constant}} +__attribute__((interrupt(1, 2))) void f2(void); // expected-error {{'interrupt' attribute takes one argument}} +__attribute__((interrupt(1))) int f3(void); // expected-warning {{MSP430 'interrupt' attribute only applies to functions that have a 'void' return type}} +__attribute__((interrupt(1))) void f4(int a); // expected-warning {{MSP430 'interrupt' attribute only applies to functions that have no parameters}} +__attribute__((interrupt(64))) void f5(void); // expected-error {{'interrupt' attribute parameter 64 is out of bounds}} -void f2(void) __attribute__((interrupt(12))); +__attribute__((interrupt(0))) void f6(void); +__attribute__((interrupt(63))) void f7(void); From edcbea9713087f92720cafba12a4a03b59833109 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Thu, 17 Jan 2019 13:34:38 +0000 Subject: [PATCH 008/125] Merging r351345: ------------------------------------------------------------------------ r351345 | asl | 2019-01-16 15:03:41 +0100 (Wed, 16 Jan 2019) | 23 lines [MSP430] Emit a separate section for every interrupt vector This is LLVM part of D56663 Linker scripts shipped by TI require to have every interrupt vector in a separate section with a specific name: SECTIONS { __interrupt_vector_XX : { KEEP (*(__interrupt_vector_XX )) } > VECTXX ... } Follow the requirement emit the section for every vector which contain address of interrupt handler: .section __interrupt_vector_XX,"ax",@progbits .word %isr% Patch by Kristina Bessonova! Differential Revision: https://reviews.llvm.org/D56664 ------------------------------------------------------------------------ llvm-svn: 351442 --- llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp | 32 +++++++++++++++++++ .../CodeGen/MSP430/2009-12-21-FrameAddr.ll | 4 ++- llvm/test/CodeGen/MSP430/fp.ll | 2 ++ llvm/test/CodeGen/MSP430/interrupt.ll | 4 +++ 4 files changed, 41 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp b/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp index f39c21fc8aa2b0..5e9b108c2de3a1 100644 --- a/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp +++ b/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp @@ -17,6 +17,7 @@ #include "MSP430InstrInfo.h" #include "MSP430MCInstLower.h" #include "MSP430TargetMachine.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -28,6 +29,7 @@ #include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/TargetRegistry.h" @@ -44,6 +46,8 @@ namespace { StringRef getPassName() const override { return "MSP430 Assembly Printer"; } + bool runOnMachineFunction(MachineFunction &MF) override; + void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O, const char* Modifier = nullptr); void printSrcMemOperand(const MachineInstr *MI, int OpNum, @@ -55,6 +59,8 @@ namespace { unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, raw_ostream &O) override; void EmitInstruction(const MachineInstr *MI) override; + + void EmitInterruptVectorSection(MachineFunction &ISR); }; } // end of anonymous namespace @@ -153,6 +159,32 @@ void MSP430AsmPrinter::EmitInstruction(const MachineInstr *MI) { EmitToStreamer(*OutStreamer, TmpInst); } +void MSP430AsmPrinter::EmitInterruptVectorSection(MachineFunction &ISR) { + MCSection *Cur = OutStreamer->getCurrentSectionOnly(); + const auto *F = &ISR.getFunction(); + assert(F->hasFnAttribute("interrupt") && + "Functions with MSP430_INTR CC should have 'interrupt' attribute"); + StringRef IVIdx = F->getFnAttribute("interrupt").getValueAsString(); + MCSection *IV = OutStreamer->getContext().getELFSection( + "__interrupt_vector_" + IVIdx, + ELF::SHT_PROGBITS, ELF::SHF_ALLOC | ELF::SHF_EXECINSTR); + OutStreamer->SwitchSection(IV); + + const MCSymbol *FunctionSymbol = getSymbol(F); + OutStreamer->EmitSymbolValue(FunctionSymbol, TM.getProgramPointerSize()); + OutStreamer->SwitchSection(Cur); +} + +bool MSP430AsmPrinter::runOnMachineFunction(MachineFunction &MF) { + // Emit separate section for an interrupt vector if ISR + if (MF.getFunction().getCallingConv() == CallingConv::MSP430_INTR) + EmitInterruptVectorSection(MF); + + SetupMachineFunction(MF); + EmitFunctionBody(); + return false; +} + // Force static initialization. extern "C" void LLVMInitializeMSP430AsmPrinter() { RegisterAsmPrinter X(getTheMSP430Target()); diff --git a/llvm/test/CodeGen/MSP430/2009-12-21-FrameAddr.ll b/llvm/test/CodeGen/MSP430/2009-12-21-FrameAddr.ll index c3d69c7c0db5e2..be82e98dffe370 100644 --- a/llvm/test/CodeGen/MSP430/2009-12-21-FrameAddr.ll +++ b/llvm/test/CodeGen/MSP430/2009-12-21-FrameAddr.ll @@ -3,7 +3,7 @@ target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8" target triple = "msp430-unknown-linux-gnu" -define msp430_intrcc void @foo() nounwind { +define msp430_intrcc void @foo() nounwind #0 { entry: %fa = call i8* @llvm.frameaddress(i32 0) store i8 0, i8* %fa @@ -11,3 +11,5 @@ entry: } declare i8* @llvm.frameaddress(i32) + +attributes #0 = { noinline nounwind optnone "interrupt"="2" } diff --git a/llvm/test/CodeGen/MSP430/fp.ll b/llvm/test/CodeGen/MSP430/fp.ll index e7d7c519657eee..bf603704a91b68 100644 --- a/llvm/test/CodeGen/MSP430/fp.ll +++ b/llvm/test/CodeGen/MSP430/fp.ll @@ -27,3 +27,5 @@ define msp430_intrcc void @fpb_alloced() #0 { call void asm sideeffect "nop", "r"(i8 0) ret void } + +attributes #0 = { noinline nounwind optnone "interrupt"="2" } diff --git a/llvm/test/CodeGen/MSP430/interrupt.ll b/llvm/test/CodeGen/MSP430/interrupt.ll index 5fa0c849c2602a..94fb3bc457a356 100644 --- a/llvm/test/CodeGen/MSP430/interrupt.ll +++ b/llvm/test/CodeGen/MSP430/interrupt.ll @@ -13,6 +13,9 @@ target triple = "msp430-generic-generic" ; instruction RETI, which restores the SR register and branches to the PC where ; the interrupt occurred. +; CHECK: .section __interrupt_vector_2,"ax",@progbits +; CHECK-NEXT: .short ISR + @g = global float 0.0 define msp430_intrcc void @ISR() #0 { @@ -47,3 +50,4 @@ entry: ret void } +attributes #0 = { noinline nounwind optnone "interrupt"="2" } From f308f9833371625b7d5ee4b7c0a8c6ac22c932c2 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Thu, 17 Jan 2019 13:41:26 +0000 Subject: [PATCH 009/125] Merging r351351: ------------------------------------------------------------------------ r351351 | mareko | 2019-01-16 16:43:53 +0100 (Wed, 16 Jan 2019) | 7 lines AMDGPU: Add llvm.amdgcn.ds.ordered.add & swap Reviewers: arsenm, nhaehnle Subscribers: kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits Differential Revision: https://reviews.llvm.org/D52944 ------------------------------------------------------------------------ llvm-svn: 351443 --- llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 18 ++++ llvm/lib/Target/AMDGPU/AMDGPU.h | 2 +- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 1 + llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h | 1 + .../Target/AMDGPU/AMDGPUSearchableTables.td | 2 + .../AMDGPU/AMDGPUTargetTransformInfo.cpp | 2 + llvm/lib/Target/AMDGPU/DSInstructions.td | 5 + .../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 22 ++++- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 61 ++++++++++++ llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 14 ++- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 13 ++- llvm/lib/Target/AMDGPU/SIInstrInfo.h | 2 + llvm/lib/Target/AMDGPU/SIInstrInfo.td | 5 + .../AMDGPU/llvm.amdgcn.ds.ordered.add.ll | 96 +++++++++++++++++++ .../AMDGPU/llvm.amdgcn.ds.ordered.swap.ll | 45 +++++++++ 15 files changed, 278 insertions(+), 11 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.ll create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.swap.ll diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 7913ce828fbc0f..6585cb71769ab0 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -392,6 +392,24 @@ class AMDGPULDSF32Intrin : [IntrArgMemOnly, NoCapture<0>] >; +class AMDGPUDSOrderedIntrinsic : Intrinsic< + [llvm_i32_ty], + // M0 = {hi16:address, lo16:waveID}. Allow passing M0 as a pointer, so that + // the bit packing can be optimized at the IR level. + [LLVMQualPointerType, // IntToPtr(M0) + llvm_i32_ty, // value to add or swap + llvm_i32_ty, // ordering + llvm_i32_ty, // scope + llvm_i1_ty, // isVolatile + llvm_i32_ty, // ordered count index (OA index), also added to the address + llvm_i1_ty, // wave release, usually set to 1 + llvm_i1_ty], // wave done, set to 1 for the last ordered instruction + [NoCapture<0>] +>; + +def int_amdgcn_ds_ordered_add : AMDGPUDSOrderedIntrinsic; +def int_amdgcn_ds_ordered_swap : AMDGPUDSOrderedIntrinsic; + def int_amdgcn_ds_fadd : AMDGPULDSF32Intrin<"__builtin_amdgcn_ds_faddf">; def int_amdgcn_ds_fmin : AMDGPULDSF32Intrin<"__builtin_amdgcn_ds_fminf">; def int_amdgcn_ds_fmax : AMDGPULDSF32Intrin<"__builtin_amdgcn_ds_fmaxf">; diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index bb7801c172f60b..55668867cc8e2c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -254,7 +254,7 @@ namespace AMDGPUAS { FLAT_ADDRESS = 0, ///< Address space for flat memory. GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0). - REGION_ADDRESS = 2, ///< Address space for region memory. + REGION_ADDRESS = 2, ///< Address space for region memory. (GDS) CONSTANT_ADDRESS = 4, ///< Address space for constant memory (VTX2) LOCAL_ADDRESS = 3, ///< Address space for local memory. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 6951c915b1772d..8d36511a28303f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -4192,6 +4192,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(TBUFFER_STORE_FORMAT_D16) NODE_NAME_CASE(TBUFFER_LOAD_FORMAT) NODE_NAME_CASE(TBUFFER_LOAD_FORMAT_D16) + NODE_NAME_CASE(DS_ORDERED_COUNT) NODE_NAME_CASE(ATOMIC_CMP_SWAP) NODE_NAME_CASE(ATOMIC_INC) NODE_NAME_CASE(ATOMIC_DEC) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index 0d22cb2e3e20bc..d4a751d00a503d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -474,6 +474,7 @@ enum NodeType : unsigned { TBUFFER_STORE_FORMAT_D16, TBUFFER_LOAD_FORMAT, TBUFFER_LOAD_FORMAT_D16, + DS_ORDERED_COUNT, ATOMIC_CMP_SWAP, ATOMIC_INC, ATOMIC_DEC, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td index 9dbd7751b4d885..4d0962f65fdc1f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td @@ -72,6 +72,8 @@ def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; +def : SourceOfDivergence; +def : SourceOfDivergence; foreach intr = AMDGPUImageDimAtomicIntrinsics in def : SourceOfDivergence; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 11e4ba4b5010dc..62e7e44ddb80c6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -308,6 +308,8 @@ bool GCNTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst, switch (Inst->getIntrinsicID()) { case Intrinsic::amdgcn_atomic_inc: case Intrinsic::amdgcn_atomic_dec: + case Intrinsic::amdgcn_ds_ordered_add: + case Intrinsic::amdgcn_ds_ordered_swap: case Intrinsic::amdgcn_ds_fadd: case Intrinsic::amdgcn_ds_fmin: case Intrinsic::amdgcn_ds_fmax: { diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td index 31d2ebef481d20..9c7097e9a52066 100644 --- a/llvm/lib/Target/AMDGPU/DSInstructions.td +++ b/llvm/lib/Target/AMDGPU/DSInstructions.td @@ -817,6 +817,11 @@ defm : DSAtomicRetPat_mc; defm : DSAtomicCmpXChg_mc; +def : Pat < + (SIds_ordered_count i32:$value, i16:$offset), + (DS_ORDERED_COUNT $value, (as_i16imm $offset)) +>; + //===----------------------------------------------------------------------===// // Real instructions //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index c6396de89c4f6e..69ddbfb5395833 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -88,14 +88,28 @@ static bool isSMovRel(unsigned Opcode) { } } -static bool isSendMsgTraceDataOrGDS(const MachineInstr &MI) { +static bool isSendMsgTraceDataOrGDS(const SIInstrInfo &TII, + const MachineInstr &MI) { + if (TII.isAlwaysGDS(MI.getOpcode())) + return true; + switch (MI.getOpcode()) { case AMDGPU::S_SENDMSG: case AMDGPU::S_SENDMSGHALT: case AMDGPU::S_TTRACEDATA: return true; + // These DS opcodes don't support GDS. + case AMDGPU::DS_NOP: + case AMDGPU::DS_PERMUTE_B32: + case AMDGPU::DS_BPERMUTE_B32: + return false; default: - // TODO: GDS + if (TII.isDS(MI.getOpcode())) { + int GDS = AMDGPU::getNamedOperandIdx(MI.getOpcode(), + AMDGPU::OpName::gds); + if (MI.getOperand(GDS).getImm()) + return true; + } return false; } } @@ -145,7 +159,7 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { checkReadM0Hazards(MI) > 0) return NoopHazard; - if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(*MI) && + if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI) && checkReadM0Hazards(MI) > 0) return NoopHazard; @@ -199,7 +213,7 @@ unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) { isSMovRel(MI->getOpcode()))) return std::max(WaitStates, checkReadM0Hazards(MI)); - if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(*MI)) + if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI)) return std::max(WaitStates, checkReadM0Hazards(MI)); return WaitStates; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 0ba921647097d3..12113fcc1fcb8c 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -910,6 +910,8 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, switch (IntrID) { case Intrinsic::amdgcn_atomic_inc: case Intrinsic::amdgcn_atomic_dec: + case Intrinsic::amdgcn_ds_ordered_add: + case Intrinsic::amdgcn_ds_ordered_swap: case Intrinsic::amdgcn_ds_fadd: case Intrinsic::amdgcn_ds_fmin: case Intrinsic::amdgcn_ds_fmax: { @@ -937,6 +939,8 @@ bool SITargetLowering::getAddrModeArguments(IntrinsicInst *II, switch (II->getIntrinsicID()) { case Intrinsic::amdgcn_atomic_inc: case Intrinsic::amdgcn_atomic_dec: + case Intrinsic::amdgcn_ds_ordered_add: + case Intrinsic::amdgcn_ds_ordered_swap: case Intrinsic::amdgcn_ds_fadd: case Intrinsic::amdgcn_ds_fmin: case Intrinsic::amdgcn_ds_fmax: { @@ -5438,6 +5442,63 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, SDLoc DL(Op); switch (IntrID) { + case Intrinsic::amdgcn_ds_ordered_add: + case Intrinsic::amdgcn_ds_ordered_swap: { + MemSDNode *M = cast(Op); + SDValue Chain = M->getOperand(0); + SDValue M0 = M->getOperand(2); + SDValue Value = M->getOperand(3); + unsigned OrderedCountIndex = M->getConstantOperandVal(7); + unsigned WaveRelease = M->getConstantOperandVal(8); + unsigned WaveDone = M->getConstantOperandVal(9); + unsigned ShaderType; + unsigned Instruction; + + switch (IntrID) { + case Intrinsic::amdgcn_ds_ordered_add: + Instruction = 0; + break; + case Intrinsic::amdgcn_ds_ordered_swap: + Instruction = 1; + break; + } + + if (WaveDone && !WaveRelease) + report_fatal_error("ds_ordered_count: wave_done requires wave_release"); + + switch (DAG.getMachineFunction().getFunction().getCallingConv()) { + case CallingConv::AMDGPU_CS: + case CallingConv::AMDGPU_KERNEL: + ShaderType = 0; + break; + case CallingConv::AMDGPU_PS: + ShaderType = 1; + break; + case CallingConv::AMDGPU_VS: + ShaderType = 2; + break; + case CallingConv::AMDGPU_GS: + ShaderType = 3; + break; + default: + report_fatal_error("ds_ordered_count unsupported for this calling conv"); + } + + unsigned Offset0 = OrderedCountIndex << 2; + unsigned Offset1 = WaveRelease | (WaveDone << 1) | (ShaderType << 2) | + (Instruction << 4); + unsigned Offset = Offset0 | (Offset1 << 8); + + SDValue Ops[] = { + Chain, + Value, + DAG.getTargetConstant(Offset, DL, MVT::i16), + copyToM0(DAG, Chain, DL, M0).getValue(1), // Glue + }; + return DAG.getMemIntrinsicNode(AMDGPUISD::DS_ORDERED_COUNT, DL, + M->getVTList(), Ops, M->getMemoryVT(), + M->getMemOperand()); + } case Intrinsic::amdgcn_atomic_inc: case Intrinsic::amdgcn_atomic_dec: case Intrinsic::amdgcn_ds_fadd: diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index afc0b446761095..3c13bccd94fa85 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -536,10 +536,13 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII, CurrScore); } if (Inst.mayStore()) { - setExpScore( - &Inst, TII, TRI, MRI, - AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0), - CurrScore); + if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), + AMDGPU::OpName::data0) != -1) { + setExpScore( + &Inst, TII, TRI, MRI, + AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0), + CurrScore); + } if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data1) != -1) { setExpScore(&Inst, TII, TRI, MRI, @@ -1093,7 +1096,8 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst, // bracket and the destination operand scores. // TODO: Use the (TSFlags & SIInstrFlags::LGKM_CNT) property everywhere. if (TII->isDS(Inst) && TII->usesLGKM_CNT(Inst)) { - if (TII->hasModifiersSet(Inst, AMDGPU::OpName::gds)) { + if (TII->isAlwaysGDS(Inst.getOpcode()) || + TII->hasModifiersSet(Inst, AMDGPU::OpName::gds)) { ScoreBrackets->updateByEvent(TII, TRI, MRI, GDS_ACCESS, Inst); ScoreBrackets->updateByEvent(TII, TRI, MRI, GDS_GPR_LOCK, Inst); } else { diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 2370d5fa7b27b3..7f7f1807987ab2 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2390,6 +2390,16 @@ bool SIInstrInfo::isSchedulingBoundary(const MachineInstr &MI, changesVGPRIndexingMode(MI); } +bool SIInstrInfo::isAlwaysGDS(uint16_t Opcode) const { + return Opcode == AMDGPU::DS_ORDERED_COUNT || + Opcode == AMDGPU::DS_GWS_INIT || + Opcode == AMDGPU::DS_GWS_SEMA_V || + Opcode == AMDGPU::DS_GWS_SEMA_BR || + Opcode == AMDGPU::DS_GWS_SEMA_P || + Opcode == AMDGPU::DS_GWS_SEMA_RELEASE_ALL || + Opcode == AMDGPU::DS_GWS_BARRIER; +} + bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const { unsigned Opcode = MI.getOpcode(); @@ -2403,7 +2413,8 @@ bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const // EXEC = 0, but checking for that case here seems not worth it // given the typical code patterns. if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT || - Opcode == AMDGPU::EXP || Opcode == AMDGPU::EXP_DONE) + Opcode == AMDGPU::EXP || Opcode == AMDGPU::EXP_DONE || + Opcode == AMDGPU::DS_ORDERED_COUNT) return true; if (MI.isInlineAsm()) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 5b1a05f3785ec7..8847fd6babb362 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -450,6 +450,8 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { return get(Opcode).TSFlags & SIInstrFlags::DS; } + bool isAlwaysGDS(uint16_t Opcode) const; + static bool isMIMG(const MachineInstr &MI) { return MI.getDesc().TSFlags & SIInstrFlags::MIMG; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 13afa4d4974bf1..180a7b0601d74e 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -45,6 +45,11 @@ def SIsbuffer_load : SDNode<"AMDGPUISD::SBUFFER_LOAD", [SDNPMayLoad, SDNPMemOperand] >; +def SIds_ordered_count : SDNode<"AMDGPUISD::DS_ORDERED_COUNT", + SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i16>]>, + [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain, SDNPInGlue] +>; + def SIatomic_inc : SDNode<"AMDGPUISD::ATOMIC_INC", SDTAtomic2, [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain] >; diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.ll new file mode 100644 index 00000000000000..ad489debc46cb1 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.ll @@ -0,0 +1,96 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %s + +; FUNC-LABEL: {{^}}ds_ordered_add: +; GCN-DAG: v_mov_b32_e32 v[[INCR:[0-9]+]], 31 +; GCN-DAG: s_mov_b32 m0, +; GCN: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds +define amdgpu_kernel void @ds_ordered_add(i32 addrspace(2)* inreg %gds, i32 addrspace(1)* %out) { + %val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true) + store i32 %val, i32 addrspace(1)* %out + ret void +} + +; Below are various modifications of input operands and shader types. + +; FUNC-LABEL: {{^}}ds_ordered_add_counter2: +; GCN-DAG: v_mov_b32_e32 v[[INCR:[0-9]+]], 31 +; GCN-DAG: s_mov_b32 m0, +; GCN: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:776 gds +define amdgpu_kernel void @ds_ordered_add_counter2(i32 addrspace(2)* inreg %gds, i32 addrspace(1)* %out) { + %val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 2, i1 true, i1 true) + store i32 %val, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}ds_ordered_add_nodone: +; GCN-DAG: v_mov_b32_e32 v[[INCR:[0-9]+]], 31 +; GCN-DAG: s_mov_b32 m0, +; GCN: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:260 gds +define amdgpu_kernel void @ds_ordered_add_nodone(i32 addrspace(2)* inreg %gds, i32 addrspace(1)* %out) { + %val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 false) + store i32 %val, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}ds_ordered_add_norelease: +; GCN-DAG: v_mov_b32_e32 v[[INCR:[0-9]+]], 31 +; GCN-DAG: s_mov_b32 m0, +; GCN: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:4 gds +define amdgpu_kernel void @ds_ordered_add_norelease(i32 addrspace(2)* inreg %gds, i32 addrspace(1)* %out) { + %val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 false, i1 false) + store i32 %val, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}ds_ordered_add_cs: +; GCN: v_mov_b32_e32 v[[INCR:[0-9]+]], 31 +; GCN: s_mov_b32 m0, s0 +; VIGFX9-NEXT: s_nop 0 +; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds +; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0) +define amdgpu_cs float @ds_ordered_add_cs(i32 addrspace(2)* inreg %gds) { + %val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true) + %r = bitcast i32 %val to float + ret float %r +} + +; FUNC-LABEL: {{^}}ds_ordered_add_ps: +; GCN: v_mov_b32_e32 v[[INCR:[0-9]+]], 31 +; GCN: s_mov_b32 m0, s0 +; VIGFX9-NEXT: s_nop 0 +; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:1796 gds +; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0) +define amdgpu_ps float @ds_ordered_add_ps(i32 addrspace(2)* inreg %gds) { + %val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true) + %r = bitcast i32 %val to float + ret float %r +} + +; FUNC-LABEL: {{^}}ds_ordered_add_vs: +; GCN: v_mov_b32_e32 v[[INCR:[0-9]+]], 31 +; GCN: s_mov_b32 m0, s0 +; VIGFX9-NEXT: s_nop 0 +; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:2820 gds +; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0) +define amdgpu_vs float @ds_ordered_add_vs(i32 addrspace(2)* inreg %gds) { + %val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true) + %r = bitcast i32 %val to float + ret float %r +} + +; FUNC-LABEL: {{^}}ds_ordered_add_gs: +; GCN: v_mov_b32_e32 v[[INCR:[0-9]+]], 31 +; GCN: s_mov_b32 m0, s0 +; VIGFX9-NEXT: s_nop 0 +; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:3844 gds +; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0) +define amdgpu_gs float @ds_ordered_add_gs(i32 addrspace(2)* inreg %gds) { + %val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true) + %r = bitcast i32 %val to float + ret float %r +} + +declare i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* nocapture, i32, i32, i32, i1, i32, i1, i1) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.swap.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.swap.ll new file mode 100644 index 00000000000000..acb1133c6a0b8b --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.swap.ll @@ -0,0 +1,45 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %s + +; FUNC-LABEL: {{^}}ds_ordered_swap: +; GCN: s_mov_b32 m0, s0 +; VIGFX9-NEXT: s_nop 0 +; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v0 offset:4868 gds +; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0) +define amdgpu_cs float @ds_ordered_swap(i32 addrspace(2)* inreg %gds, i32 %value) { + %val = call i32@llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 %value, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true) + %r = bitcast i32 %val to float + ret float %r +} + +; FUNC-LABEL: {{^}}ds_ordered_swap_conditional: +; GCN: v_cmp_ne_u32_e32 vcc, 0, v0 +; GCN: s_and_saveexec_b64 s[[SAVED:\[[0-9]+:[0-9]+\]]], vcc +; // We have to use s_cbranch, because ds_ordered_count has side effects with EXEC=0 +; GCN: s_cbranch_execz [[BB:BB._.]] +; GCN: s_mov_b32 m0, s0 +; VIGFX9-NEXT: s_nop 0 +; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v0 offset:4868 gds +; GCN-NEXT: [[BB]]: +; // Wait for expcnt(0) before modifying EXEC +; GCN-NEXT: s_waitcnt expcnt(0) +; GCN-NEXT: s_or_b64 exec, exec, s[[SAVED]] +; GCN-NEXT: s_waitcnt lgkmcnt(0) +define amdgpu_cs float @ds_ordered_swap_conditional(i32 addrspace(2)* inreg %gds, i32 %value) { +entry: + %c = icmp ne i32 %value, 0 + br i1 %c, label %if-true, label %endif + +if-true: + %val = call i32@llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 %value, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true) + br label %endif + +endif: + %v = phi i32 [ %val, %if-true ], [ undef, %entry ] + %r = bitcast i32 %v to float + ret float %r +} + +declare i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* nocapture, i32, i32, i32, i1, i32, i1, i1) From 6f533811b5a4398b10eb0c97cd68d98947b1c9e8 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Thu, 17 Jan 2019 13:44:39 +0000 Subject: [PATCH 010/125] Merging r351381: ------------------------------------------------------------------------ r351381 | ctopper | 2019-01-16 22:46:32 +0100 (Wed, 16 Jan 2019) | 11 lines [X86] Add X86ISD::VSHLV and X86ISD::VSRLV nodes for psllv and psrlv Previously we used ISD::SHL and ISD::SRL to represent these in SelectionDAG. ISD::SHL/SRL interpret an out of range shift amount as undefined behavior and will constant fold to undef. While the intrinsics are defined to return 0 for out of range shift amounts. A previous patch added a special node for VPSRAV to produce all sign bits. This was previously believed safe because undefs frequently get turned into 0 either from the constant pool or a desire to not have a false register dependency. But undef is treated specially in some optimizations. For example, its ignored in detection of vector splats. So if the ISD::SHL/SRL can be constant folded and all of the elements with in bounds shift amounts are the same, we might fold it to single element broadcast from the constant pool. This would not put 0s in the elements with out of bounds shift amounts. We do have an existing InstCombine optimization to use shl/lshr when the shift amounts are all constant and in bounds. That should prevent some loss of constant folding from this change. Patch by zhutianyang and Craig Topper Differential Revision: https://reviews.llvm.org/D56695 ------------------------------------------------------------------------ llvm-svn: 351444 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 2 + llvm/lib/Target/X86/X86ISelLowering.h | 6 +- llvm/lib/Target/X86/X86InstrAVX512.td | 79 ++++-- llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 2 + llvm/lib/Target/X86/X86InstrSSE.td | 30 +-- llvm/lib/Target/X86/X86IntrinsicsInfo.h | 36 +-- llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll | 240 +++++++++++++----- llvm/test/CodeGen/X86/avx512-intrinsics.ll | 28 +- llvm/test/CodeGen/X86/avx512bw-intrinsics.ll | 24 +- .../test/CodeGen/X86/avx512bwvl-intrinsics.ll | 64 +++-- 10 files changed, 347 insertions(+), 164 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index b6a692ee187d83..3637562c8ec35e 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -27202,6 +27202,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::VSHLI: return "X86ISD::VSHLI"; case X86ISD::VSRLI: return "X86ISD::VSRLI"; case X86ISD::VSRAI: return "X86ISD::VSRAI"; + case X86ISD::VSHLV: return "X86ISD::VSHLV"; + case X86ISD::VSRLV: return "X86ISD::VSRLV"; case X86ISD::VSRAV: return "X86ISD::VSRAV"; case X86ISD::VROTLI: return "X86ISD::VROTLI"; case X86ISD::VROTRI: return "X86ISD::VROTRI"; diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 910acd80e8b8cb..66d5d43946a264 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -315,10 +315,8 @@ namespace llvm { // Vector shift elements VSHL, VSRL, VSRA, - // Vector variable shift right arithmetic. - // Unlike ISD::SRA, in case shift count greater then element size - // use sign bit to fill destination data element. - VSRAV, + // Vector variable shift + VSHLV, VSRLV, VSRAV, // Vector shift elements by immediate VSHLI, VSRLI, VSRAI, diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 7423cb85acd250..85676f102be0ac 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -6445,52 +6445,53 @@ defm : avx512_var_shift_lowering; // Special handing for handling VPSRAV intrinsics. -multiclass avx512_var_shift_int_lowering p> { +multiclass avx512_var_shift_int_lowering p> { let Predicates = p in { - def : Pat<(_.VT (X86vsrav _.RC:$src1, _.RC:$src2)), + def : Pat<(_.VT (OpNode _.RC:$src1, _.RC:$src2)), (!cast(InstrStr#_.ZSuffix#rr) _.RC:$src1, _.RC:$src2)>; - def : Pat<(_.VT (X86vsrav _.RC:$src1, (_.LdFrag addr:$src2))), + def : Pat<(_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2))), (!cast(InstrStr#_.ZSuffix##rm) _.RC:$src1, addr:$src2)>; def : Pat<(_.VT (vselect _.KRCWM:$mask, - (X86vsrav _.RC:$src1, _.RC:$src2), _.RC:$src0)), + (OpNode _.RC:$src1, _.RC:$src2), _.RC:$src0)), (!cast(InstrStr#_.ZSuffix#rrk) _.RC:$src0, _.KRC:$mask, _.RC:$src1, _.RC:$src2)>; def : Pat<(_.VT (vselect _.KRCWM:$mask, - (X86vsrav _.RC:$src1, (_.LdFrag addr:$src2)), + (OpNode _.RC:$src1, (_.LdFrag addr:$src2)), _.RC:$src0)), (!cast(InstrStr#_.ZSuffix##rmk) _.RC:$src0, _.KRC:$mask, _.RC:$src1, addr:$src2)>; def : Pat<(_.VT (vselect _.KRCWM:$mask, - (X86vsrav _.RC:$src1, _.RC:$src2), _.ImmAllZerosV)), + (OpNode _.RC:$src1, _.RC:$src2), _.ImmAllZerosV)), (!cast(InstrStr#_.ZSuffix#rrkz) _.KRC:$mask, _.RC:$src1, _.RC:$src2)>; def : Pat<(_.VT (vselect _.KRCWM:$mask, - (X86vsrav _.RC:$src1, (_.LdFrag addr:$src2)), + (OpNode _.RC:$src1, (_.LdFrag addr:$src2)), _.ImmAllZerosV)), (!cast(InstrStr#_.ZSuffix##rmkz) _.KRC:$mask, _.RC:$src1, addr:$src2)>; } } -multiclass avx512_var_shift_int_lowering_mb p> : - avx512_var_shift_int_lowering { +multiclass avx512_var_shift_int_lowering_mb p> : + avx512_var_shift_int_lowering { let Predicates = p in { - def : Pat<(_.VT (X86vsrav _.RC:$src1, + def : Pat<(_.VT (OpNode _.RC:$src1, (X86VBroadcast (_.ScalarLdFrag addr:$src2)))), (!cast(InstrStr#_.ZSuffix##rmb) _.RC:$src1, addr:$src2)>; def : Pat<(_.VT (vselect _.KRCWM:$mask, - (X86vsrav _.RC:$src1, + (OpNode _.RC:$src1, (X86VBroadcast (_.ScalarLdFrag addr:$src2))), _.RC:$src0)), (!cast(InstrStr#_.ZSuffix##rmbk) _.RC:$src0, _.KRC:$mask, _.RC:$src1, addr:$src2)>; def : Pat<(_.VT (vselect _.KRCWM:$mask, - (X86vsrav _.RC:$src1, + (OpNode _.RC:$src1, (X86VBroadcast (_.ScalarLdFrag addr:$src2))), _.ImmAllZerosV)), (!cast(InstrStr#_.ZSuffix##rmbkz) _.KRC:$mask, @@ -6498,15 +6499,47 @@ multiclass avx512_var_shift_int_lowering_mb; -defm : avx512_var_shift_int_lowering<"VPSRAVW", v16i16x_info, [HasVLX, HasBWI]>; -defm : avx512_var_shift_int_lowering<"VPSRAVW", v32i16_info, [HasBWI]>; -defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v4i32x_info, [HasVLX]>; -defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v8i32x_info, [HasVLX]>; -defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v16i32_info, [HasAVX512]>; -defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v2i64x_info, [HasVLX]>; -defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v4i64x_info, [HasVLX]>; -defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v8i64_info, [HasAVX512]>; +multiclass avx512_var_shift_int_lowering_vl { + defm : avx512_var_shift_int_lowering; + defm : avx512_var_shift_int_lowering; + defm : avx512_var_shift_int_lowering; +} + +multiclass avx512_var_shift_int_lowering_mb_vl { + defm : avx512_var_shift_int_lowering_mb; + defm : avx512_var_shift_int_lowering_mb; + defm : avx512_var_shift_int_lowering_mb; +} + +defm : avx512_var_shift_int_lowering_vl<"VPSRAVW", X86vsrav, avx512vl_i16_info, + HasBWI>; +defm : avx512_var_shift_int_lowering_mb_vl<"VPSRAVD", X86vsrav, + avx512vl_i32_info, HasAVX512>; +defm : avx512_var_shift_int_lowering_mb_vl<"VPSRAVQ", X86vsrav, + avx512vl_i64_info, HasAVX512>; + +defm : avx512_var_shift_int_lowering_vl<"VPSRLVW", X86vsrlv, avx512vl_i16_info, + HasBWI>; +defm : avx512_var_shift_int_lowering_mb_vl<"VPSRLVD", X86vsrlv, + avx512vl_i32_info, HasAVX512>; +defm : avx512_var_shift_int_lowering_mb_vl<"VPSRLVQ", X86vsrlv, + avx512vl_i64_info, HasAVX512>; + +defm : avx512_var_shift_int_lowering_vl<"VPSLLVW", X86vshlv, avx512vl_i16_info, + HasBWI>; +defm : avx512_var_shift_int_lowering_mb_vl<"VPSLLVD", X86vshlv, + avx512vl_i32_info, HasAVX512>; +defm : avx512_var_shift_int_lowering_mb_vl<"VPSLLVQ", X86vshlv, + avx512vl_i64_info, HasAVX512>; + // Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX. let Predicates = [HasAVX512, NoVLX] in { diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index 11a27ba90586be..3d508e2c34f39a 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -198,6 +198,8 @@ def X86vsra : SDNode<"X86ISD::VSRA", X86vshiftuniform>; def X86vshiftvariable : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>, SDTCisInt<0>]>; +def X86vshlv : SDNode<"X86ISD::VSHLV", X86vshiftvariable>; +def X86vsrlv : SDNode<"X86ISD::VSRLV", X86vshiftvariable>; def X86vsrav : SDNode<"X86ISD::VSRAV", X86vshiftvariable>; def X86vshli : SDNode<"X86ISD::VSHLI", X86vshiftimm>; diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index e2bcd18ce66079..ddfc369b1180ec 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -8318,7 +8318,7 @@ def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128:$src))), // Variable Bit Shifts // multiclass avx2_var_shift opc, string OpcodeStr, SDNode OpNode, - ValueType vt128, ValueType vt256> { + SDNode IntrinNode, ValueType vt128, ValueType vt256> { def rr : AVX28I opc, string OpcodeStr, SDNode OpNode, (vt256 (load addr:$src2)))))]>, VEX_4V, VEX_L, Sched<[SchedWriteVarVecShift.YMM.Folded, SchedWriteVarVecShift.YMM.ReadAfterFold]>; + + def : Pat<(vt128 (IntrinNode VR128:$src1, VR128:$src2)), + (!cast(NAME#"rr") VR128:$src1, VR128:$src2)>; + def : Pat<(vt128 (IntrinNode VR128:$src1, (load addr:$src2))), + (!cast(NAME#"rm") VR128:$src1, addr:$src2)>; + def : Pat<(vt256 (IntrinNode VR256:$src1, VR256:$src2)), + (!cast(NAME#"Yrr") VR256:$src1, VR256:$src2)>; + def : Pat<(vt256 (IntrinNode VR256:$src1, (load addr:$src2))), + (!cast(NAME#"Yrm") VR256:$src1, addr:$src2)>; } let Predicates = [HasAVX2, NoVLX] in { - defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", shl, v4i32, v8i32>; - defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", shl, v2i64, v4i64>, VEX_W; - defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", srl, v4i32, v8i32>; - defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", srl, v2i64, v4i64>, VEX_W; - defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", sra, v4i32, v8i32>; - - def : Pat<(v4i32 (X86vsrav VR128:$src1, VR128:$src2)), - (VPSRAVDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4i32 (X86vsrav VR128:$src1, (load addr:$src2))), - (VPSRAVDrm VR128:$src1, addr:$src2)>; - def : Pat<(v8i32 (X86vsrav VR256:$src1, VR256:$src2)), - (VPSRAVDYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v8i32 (X86vsrav VR256:$src1, (load addr:$src2))), - (VPSRAVDYrm VR256:$src1, addr:$src2)>; + defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", shl, X86vshlv, v4i32, v8i32>; + defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", shl, X86vshlv, v2i64, v4i64>, VEX_W; + defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", srl, X86vsrlv, v4i32, v8i32>; + defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", srl, X86vsrlv, v2i64, v4i64>, VEX_W; + defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", sra, X86vsrav, v4i32, v8i32>; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 151e1b9136c4a4..acb3d48463de68 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -389,10 +389,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx2_pslli_d, VSHIFT, X86ISD::VSHLI, 0), X86_INTRINSIC_DATA(avx2_pslli_q, VSHIFT, X86ISD::VSHLI, 0), X86_INTRINSIC_DATA(avx2_pslli_w, VSHIFT, X86ISD::VSHLI, 0), - X86_INTRINSIC_DATA(avx2_psllv_d, INTR_TYPE_2OP, ISD::SHL, 0), - X86_INTRINSIC_DATA(avx2_psllv_d_256, INTR_TYPE_2OP, ISD::SHL, 0), - X86_INTRINSIC_DATA(avx2_psllv_q, INTR_TYPE_2OP, ISD::SHL, 0), - X86_INTRINSIC_DATA(avx2_psllv_q_256, INTR_TYPE_2OP, ISD::SHL, 0), + X86_INTRINSIC_DATA(avx2_psllv_d, INTR_TYPE_2OP, X86ISD::VSHLV, 0), + X86_INTRINSIC_DATA(avx2_psllv_d_256, INTR_TYPE_2OP, X86ISD::VSHLV, 0), + X86_INTRINSIC_DATA(avx2_psllv_q, INTR_TYPE_2OP, X86ISD::VSHLV, 0), + X86_INTRINSIC_DATA(avx2_psllv_q_256, INTR_TYPE_2OP, X86ISD::VSHLV, 0), X86_INTRINSIC_DATA(avx2_psra_d, INTR_TYPE_2OP, X86ISD::VSRA, 0), X86_INTRINSIC_DATA(avx2_psra_w, INTR_TYPE_2OP, X86ISD::VSRA, 0), X86_INTRINSIC_DATA(avx2_psrai_d, VSHIFT, X86ISD::VSRAI, 0), @@ -405,10 +405,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx2_psrli_d, VSHIFT, X86ISD::VSRLI, 0), X86_INTRINSIC_DATA(avx2_psrli_q, VSHIFT, X86ISD::VSRLI, 0), X86_INTRINSIC_DATA(avx2_psrli_w, VSHIFT, X86ISD::VSRLI, 0), - X86_INTRINSIC_DATA(avx2_psrlv_d, INTR_TYPE_2OP, ISD::SRL, 0), - X86_INTRINSIC_DATA(avx2_psrlv_d_256, INTR_TYPE_2OP, ISD::SRL, 0), - X86_INTRINSIC_DATA(avx2_psrlv_q, INTR_TYPE_2OP, ISD::SRL, 0), - X86_INTRINSIC_DATA(avx2_psrlv_q_256, INTR_TYPE_2OP, ISD::SRL, 0), + X86_INTRINSIC_DATA(avx2_psrlv_d, INTR_TYPE_2OP, X86ISD::VSRLV, 0), + X86_INTRINSIC_DATA(avx2_psrlv_d_256, INTR_TYPE_2OP, X86ISD::VSRLV, 0), + X86_INTRINSIC_DATA(avx2_psrlv_q, INTR_TYPE_2OP, X86ISD::VSRLV, 0), + X86_INTRINSIC_DATA(avx2_psrlv_q_256, INTR_TYPE_2OP, X86ISD::VSRLV, 0), X86_INTRINSIC_DATA(avx512_add_pd_512, INTR_TYPE_2OP, ISD::FADD, X86ISD::FADD_RND), X86_INTRINSIC_DATA(avx512_add_ps_512, INTR_TYPE_2OP, ISD::FADD, X86ISD::FADD_RND), X86_INTRINSIC_DATA(avx512_cmp_pd_128, CMP_MASK_CC, X86ISD::CMPM, 0), @@ -943,11 +943,11 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_pslli_d_512, VSHIFT, X86ISD::VSHLI, 0), X86_INTRINSIC_DATA(avx512_pslli_q_512, VSHIFT, X86ISD::VSHLI, 0), X86_INTRINSIC_DATA(avx512_pslli_w_512, VSHIFT, X86ISD::VSHLI, 0), - X86_INTRINSIC_DATA(avx512_psllv_d_512, INTR_TYPE_2OP, ISD::SHL, 0), - X86_INTRINSIC_DATA(avx512_psllv_q_512, INTR_TYPE_2OP, ISD::SHL, 0), - X86_INTRINSIC_DATA(avx512_psllv_w_128, INTR_TYPE_2OP, ISD::SHL, 0), - X86_INTRINSIC_DATA(avx512_psllv_w_256, INTR_TYPE_2OP, ISD::SHL, 0), - X86_INTRINSIC_DATA(avx512_psllv_w_512, INTR_TYPE_2OP, ISD::SHL, 0), + X86_INTRINSIC_DATA(avx512_psllv_d_512, INTR_TYPE_2OP, X86ISD::VSHLV, 0), + X86_INTRINSIC_DATA(avx512_psllv_q_512, INTR_TYPE_2OP, X86ISD::VSHLV, 0), + X86_INTRINSIC_DATA(avx512_psllv_w_128, INTR_TYPE_2OP, X86ISD::VSHLV, 0), + X86_INTRINSIC_DATA(avx512_psllv_w_256, INTR_TYPE_2OP, X86ISD::VSHLV, 0), + X86_INTRINSIC_DATA(avx512_psllv_w_512, INTR_TYPE_2OP, X86ISD::VSHLV, 0), X86_INTRINSIC_DATA(avx512_psra_d_512, INTR_TYPE_2OP, X86ISD::VSRA, 0), X86_INTRINSIC_DATA(avx512_psra_q_128, INTR_TYPE_2OP, X86ISD::VSRA, 0), X86_INTRINSIC_DATA(avx512_psra_q_256, INTR_TYPE_2OP, X86ISD::VSRA, 0), @@ -971,11 +971,11 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_psrli_d_512, VSHIFT, X86ISD::VSRLI, 0), X86_INTRINSIC_DATA(avx512_psrli_q_512, VSHIFT, X86ISD::VSRLI, 0), X86_INTRINSIC_DATA(avx512_psrli_w_512, VSHIFT, X86ISD::VSRLI, 0), - X86_INTRINSIC_DATA(avx512_psrlv_d_512, INTR_TYPE_2OP, ISD::SRL, 0), - X86_INTRINSIC_DATA(avx512_psrlv_q_512, INTR_TYPE_2OP, ISD::SRL, 0), - X86_INTRINSIC_DATA(avx512_psrlv_w_128, INTR_TYPE_2OP, ISD::SRL, 0), - X86_INTRINSIC_DATA(avx512_psrlv_w_256, INTR_TYPE_2OP, ISD::SRL, 0), - X86_INTRINSIC_DATA(avx512_psrlv_w_512, INTR_TYPE_2OP, ISD::SRL, 0), + X86_INTRINSIC_DATA(avx512_psrlv_d_512, INTR_TYPE_2OP, X86ISD::VSRLV, 0), + X86_INTRINSIC_DATA(avx512_psrlv_q_512, INTR_TYPE_2OP, X86ISD::VSRLV, 0), + X86_INTRINSIC_DATA(avx512_psrlv_w_128, INTR_TYPE_2OP, X86ISD::VSRLV, 0), + X86_INTRINSIC_DATA(avx512_psrlv_w_256, INTR_TYPE_2OP, X86ISD::VSRLV, 0), + X86_INTRINSIC_DATA(avx512_psrlv_w_512, INTR_TYPE_2OP, X86ISD::VSRLV, 0), X86_INTRINSIC_DATA(avx512_pternlog_d_128, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0), X86_INTRINSIC_DATA(avx512_pternlog_d_256, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0), X86_INTRINSIC_DATA(avx512_pternlog_d_512, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0), diff --git a/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll b/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll index 617e198bce4c1b..de20c07d41399a 100644 --- a/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll @@ -1183,38 +1183,58 @@ define <4 x i32> @test_x86_avx2_psllv_d(<4 x i32> %a0, <4 x i32> %a1) { define <4 x i32> @test_x86_avx2_psllv_d_const() { ; X86-AVX-LABEL: test_x86_avx2_psllv_d_const: ; X86-AVX: # %bb.0: -; X86-AVX-NEXT: vpbroadcastd {{.*#+}} xmm0 = [2,2,2,2] -; X86-AVX-NEXT: # encoding: [0xc4,0xe2,0x79,0x58,0x05,A,A,A,A] +; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [2,9,0,4294967295] +; X86-AVX-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] +; X86-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-AVX-NEXT: vpsllvd {{\.LCPI.*}}, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x47,0x05,A,A,A,A] ; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 -; X86-AVX-NEXT: vpaddd {{\.LCPI.*}}, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfe,0x05,A,A,A,A] +; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [1,1,1,4294967295] +; X86-AVX-NEXT: # encoding: [0xc5,0xf9,0x6f,0x0d,A,A,A,A] ; X86-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-AVX-NEXT: vpsllvd %xmm1, %xmm1, %xmm1 # encoding: [0xc4,0xe2,0x71,0x47,0xc9] +; X86-AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfe,0xc1] ; X86-AVX-NEXT: retl # encoding: [0xc3] ; ; X86-AVX512VL-LABEL: test_x86_avx2_psllv_d_const: ; X86-AVX512VL: # %bb.0: -; X86-AVX512VL-NEXT: vmovdqa {{\.LCPI.*}}, %xmm0 # EVEX TO VEX Compression xmm0 = <4,9,0,u> +; X86-AVX512VL-NEXT: vmovdqa {{\.LCPI.*}}, %xmm0 # EVEX TO VEX Compression xmm0 = [2,9,0,4294967295] ; X86-AVX512VL-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] ; X86-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 -; X86-AVX512VL-NEXT: vpaddd {{\.LCPI.*}}{1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xfe,0x05,A,A,A,A] -; X86-AVX512VL-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-AVX512VL-NEXT: vpsllvd {{\.LCPI.*}}, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x47,0x05,A,A,A,A] +; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-AVX512VL-NEXT: vmovdqa {{\.LCPI.*}}, %xmm1 # EVEX TO VEX Compression xmm1 = [1,1,1,4294967295] +; X86-AVX512VL-NEXT: # encoding: [0xc5,0xf9,0x6f,0x0d,A,A,A,A] +; X86-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-AVX512VL-NEXT: vpsllvd %xmm1, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0x47,0xc9] +; X86-AVX512VL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1] ; X86-AVX512VL-NEXT: retl # encoding: [0xc3] ; ; X64-AVX-LABEL: test_x86_avx2_psllv_d_const: ; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vpbroadcastd {{.*#+}} xmm0 = [2,2,2,2] -; X64-AVX-NEXT: # encoding: [0xc4,0xe2,0x79,0x58,0x05,A,A,A,A] +; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [2,9,0,4294967295] +; X64-AVX-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] +; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x47,0x05,A,A,A,A] ; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte -; X64-AVX-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfe,0x05,A,A,A,A] +; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [1,1,1,4294967295] +; X64-AVX-NEXT: # encoding: [0xc5,0xf9,0x6f,0x0d,A,A,A,A] ; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX-NEXT: vpsllvd %xmm1, %xmm1, %xmm1 # encoding: [0xc4,0xe2,0x71,0x47,0xc9] +; X64-AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfe,0xc1] ; X64-AVX-NEXT: retq # encoding: [0xc3] ; ; X64-AVX512VL-LABEL: test_x86_avx2_psllv_d_const: ; X64-AVX512VL: # %bb.0: -; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = <4,9,0,u> +; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [2,9,0,4294967295] ; X64-AVX512VL-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] ; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte -; X64-AVX512VL-NEXT: vpaddd {{.*}}(%rip){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xfe,0x05,A,A,A,A] -; X64-AVX512VL-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x47,0x05,A,A,A,A] +; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %xmm1 # EVEX TO VEX Compression xmm1 = [1,1,1,4294967295] +; X64-AVX512VL-NEXT: # encoding: [0xc5,0xf9,0x6f,0x0d,A,A,A,A] +; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: vpsllvd %xmm1, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0x47,0xc9] +; X64-AVX512VL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1] ; X64-AVX512VL-NEXT: retq # encoding: [0xc3] %res0 = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> , <4 x i32> ) %res1 = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> , <4 x i32> ) @@ -1241,38 +1261,62 @@ define <8 x i32> @test_x86_avx2_psllv_d_256(<8 x i32> %a0, <8 x i32> %a1) { define <8 x i32> @test_x86_avx2_psllv_d_256_const() { ; X86-AVX-LABEL: test_x86_avx2_psllv_d_256_const: ; X86-AVX: # %bb.0: -; X86-AVX-NEXT: vpbroadcastd {{.*#+}} ymm0 = [8,8,8,8,8,8,8,8] -; X86-AVX-NEXT: # encoding: [0xc4,0xe2,0x7d,0x58,0x05,A,A,A,A] +; X86-AVX-NEXT: vmovdqa {{.*#+}} ymm0 = [2,9,0,4294967295,3,7,4294967295,0] +; X86-AVX-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] +; X86-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-AVX-NEXT: vpsllvd {{\.LCPI.*}}, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x47,0x05,A,A,A,A] ; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 -; X86-AVX-NEXT: vpaddd {{\.LCPI.*}}, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfe,0x05,A,A,A,A] +; X86-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4294967295] +; X86-AVX-NEXT: # encoding: [0xc5,0xfd,0x6f,0x0d,A,A,A,A] ; X86-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-AVX-NEXT: vpsllvd {{\.LCPI.*}}, %ymm1, %ymm1 # encoding: [0xc4,0xe2,0x75,0x47,0x0d,A,A,A,A] +; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-AVX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfe,0xc1] ; X86-AVX-NEXT: retl # encoding: [0xc3] ; ; X86-AVX512VL-LABEL: test_x86_avx2_psllv_d_256_const: ; X86-AVX512VL: # %bb.0: -; X86-AVX512VL-NEXT: vmovdqa {{\.LCPI.*}}, %ymm0 # EVEX TO VEX Compression ymm0 = <4,9,0,u,12,7,u,0> +; X86-AVX512VL-NEXT: vmovdqa {{\.LCPI.*}}, %ymm0 # EVEX TO VEX Compression ymm0 = [2,9,0,4294967295,3,7,4294967295,0] ; X86-AVX512VL-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] ; X86-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 -; X86-AVX512VL-NEXT: vpaddd {{\.LCPI.*}}{1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xfe,0x05,A,A,A,A] -; X86-AVX512VL-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-AVX512VL-NEXT: vpsllvd {{\.LCPI.*}}, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x47,0x05,A,A,A,A] +; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-AVX512VL-NEXT: vmovdqa {{\.LCPI.*}}, %ymm1 # EVEX TO VEX Compression ymm1 = [4,4,4,4,4,4,4,4294967295] +; X86-AVX512VL-NEXT: # encoding: [0xc5,0xfd,0x6f,0x0d,A,A,A,A] +; X86-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-AVX512VL-NEXT: vpsllvd {{\.LCPI.*}}, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x47,0x0d,A,A,A,A] +; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-AVX512VL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc1] ; X86-AVX512VL-NEXT: retl # encoding: [0xc3] ; ; X64-AVX-LABEL: test_x86_avx2_psllv_d_256_const: ; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vpbroadcastd {{.*#+}} ymm0 = [8,8,8,8,8,8,8,8] -; X64-AVX-NEXT: # encoding: [0xc4,0xe2,0x7d,0x58,0x05,A,A,A,A] +; X64-AVX-NEXT: vmovdqa {{.*#+}} ymm0 = [2,9,0,4294967295,3,7,4294967295,0] +; X64-AVX-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] +; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX-NEXT: vpsllvd {{.*}}(%rip), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x47,0x05,A,A,A,A] ; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte -; X64-AVX-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfe,0x05,A,A,A,A] +; X64-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4294967295] +; X64-AVX-NEXT: # encoding: [0xc5,0xfd,0x6f,0x0d,A,A,A,A] ; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX-NEXT: vpsllvd {{.*}}(%rip), %ymm1, %ymm1 # encoding: [0xc4,0xe2,0x75,0x47,0x0d,A,A,A,A] +; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfe,0xc1] ; X64-AVX-NEXT: retq # encoding: [0xc3] ; ; X64-AVX512VL-LABEL: test_x86_avx2_psllv_d_256_const: ; X64-AVX512VL: # %bb.0: -; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = <4,9,0,u,12,7,u,0> +; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [2,9,0,4294967295,3,7,4294967295,0] ; X64-AVX512VL-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] ; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte -; X64-AVX512VL-NEXT: vpaddd {{.*}}(%rip){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xfe,0x05,A,A,A,A] -; X64-AVX512VL-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: vpsllvd {{.*}}(%rip), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x47,0x05,A,A,A,A] +; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %ymm1 # EVEX TO VEX Compression ymm1 = [4,4,4,4,4,4,4,4294967295] +; X64-AVX512VL-NEXT: # encoding: [0xc5,0xfd,0x6f,0x0d,A,A,A,A] +; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: vpsllvd {{.*}}(%rip), %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x47,0x0d,A,A,A,A] +; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc1] ; X64-AVX512VL-NEXT: retq # encoding: [0xc3] %res0 = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> , <8 x i32> ) %res1 = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> , <8 x i32> ) @@ -1316,14 +1360,20 @@ define <2 x i64> @test_x86_avx2_psllv_q_const() { ; ; X64-AVX-LABEL: test_x86_avx2_psllv_q_const: ; X64-AVX: # %bb.0: -; X64-AVX-NEXT: movl $8, %eax # encoding: [0xb8,0x08,0x00,0x00,0x00] -; X64-AVX-NEXT: vmovq %rax, %xmm0 # encoding: [0xc4,0xe1,0xf9,0x6e,0xc0] +; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [4,18446744073709551615] +; X64-AVX-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] +; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX-NEXT: vpsllvq {{.*}}(%rip), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0xf9,0x47,0x05,A,A,A,A] +; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte ; X64-AVX-NEXT: retq # encoding: [0xc3] ; ; X64-AVX512VL-LABEL: test_x86_avx2_psllv_q_const: ; X64-AVX512VL: # %bb.0: -; X64-AVX512VL-NEXT: movl $8, %eax # encoding: [0xb8,0x08,0x00,0x00,0x00] -; X64-AVX512VL-NEXT: vmovq %rax, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xc0] +; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [4,18446744073709551615] +; X64-AVX512VL-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] +; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: vpsllvq {{.*}}(%rip), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x47,0x05,A,A,A,A] +; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte ; X64-AVX512VL-NEXT: retq # encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> , <2 x i64> ) ret <2 x i64> %res @@ -1366,15 +1416,19 @@ define <4 x i64> @test_x86_avx2_psllv_q_256_const() { ; ; X64-AVX-LABEL: test_x86_avx2_psllv_q_256_const: ; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vbroadcastsd {{.*#+}} ymm0 = [8,8,8,8] -; X64-AVX-NEXT: # encoding: [0xc4,0xe2,0x7d,0x19,0x05,A,A,A,A] +; X64-AVX-NEXT: vmovdqa {{.*#+}} ymm0 = [4,4,4,18446744073709551615] +; X64-AVX-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] +; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX-NEXT: vpsllvq {{.*}}(%rip), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0xfd,0x47,0x05,A,A,A,A] ; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte ; X64-AVX-NEXT: retq # encoding: [0xc3] ; ; X64-AVX512VL-LABEL: test_x86_avx2_psllv_q_256_const: ; X64-AVX512VL: # %bb.0: -; X64-AVX512VL-NEXT: vbroadcastsd {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [8,8,8,8] -; X64-AVX512VL-NEXT: # encoding: [0xc4,0xe2,0x7d,0x19,0x05,A,A,A,A] +; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [4,4,4,18446744073709551615] +; X64-AVX512VL-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] +; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: vpsllvq {{.*}}(%rip), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x47,0x05,A,A,A,A] ; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte ; X64-AVX512VL-NEXT: retq # encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> , <4 x i64> ) @@ -1400,38 +1454,62 @@ define <4 x i32> @test_x86_avx2_psrlv_d(<4 x i32> %a0, <4 x i32> %a1) { define <4 x i32> @test_x86_avx2_psrlv_d_const() { ; X86-AVX-LABEL: test_x86_avx2_psrlv_d_const: ; X86-AVX: # %bb.0: -; X86-AVX-NEXT: vpbroadcastd {{.*#+}} xmm0 = [2,2,2,2] -; X86-AVX-NEXT: # encoding: [0xc4,0xe2,0x79,0x58,0x05,A,A,A,A] +; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [2,9,0,4294967295] +; X86-AVX-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] +; X86-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-AVX-NEXT: vpsrlvd {{\.LCPI.*}}, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x45,0x05,A,A,A,A] ; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 -; X86-AVX-NEXT: vpaddd {{\.LCPI.*}}, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfe,0x05,A,A,A,A] +; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4294967295] +; X86-AVX-NEXT: # encoding: [0xc5,0xf9,0x6f,0x0d,A,A,A,A] ; X86-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-AVX-NEXT: vpsrlvd {{\.LCPI.*}}, %xmm1, %xmm1 # encoding: [0xc4,0xe2,0x71,0x45,0x0d,A,A,A,A] +; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfe,0xc1] ; X86-AVX-NEXT: retl # encoding: [0xc3] ; ; X86-AVX512VL-LABEL: test_x86_avx2_psrlv_d_const: ; X86-AVX512VL: # %bb.0: -; X86-AVX512VL-NEXT: vmovdqa {{\.LCPI.*}}, %xmm0 # EVEX TO VEX Compression xmm0 = <1,9,0,u> +; X86-AVX512VL-NEXT: vmovdqa {{\.LCPI.*}}, %xmm0 # EVEX TO VEX Compression xmm0 = [2,9,0,4294967295] ; X86-AVX512VL-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] ; X86-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 -; X86-AVX512VL-NEXT: vpaddd {{\.LCPI.*}}{1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xfe,0x05,A,A,A,A] -; X86-AVX512VL-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-AVX512VL-NEXT: vpsrlvd {{\.LCPI.*}}, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x45,0x05,A,A,A,A] +; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-AVX512VL-NEXT: vmovdqa {{\.LCPI.*}}, %xmm1 # EVEX TO VEX Compression xmm1 = [4,4,4,4294967295] +; X86-AVX512VL-NEXT: # encoding: [0xc5,0xf9,0x6f,0x0d,A,A,A,A] +; X86-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-AVX512VL-NEXT: vpsrlvd {{\.LCPI.*}}, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0x45,0x0d,A,A,A,A] +; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-AVX512VL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1] ; X86-AVX512VL-NEXT: retl # encoding: [0xc3] ; ; X64-AVX-LABEL: test_x86_avx2_psrlv_d_const: ; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vpbroadcastd {{.*#+}} xmm0 = [2,2,2,2] -; X64-AVX-NEXT: # encoding: [0xc4,0xe2,0x79,0x58,0x05,A,A,A,A] +; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [2,9,0,4294967295] +; X64-AVX-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] +; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x45,0x05,A,A,A,A] ; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte -; X64-AVX-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfe,0x05,A,A,A,A] +; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4294967295] +; X64-AVX-NEXT: # encoding: [0xc5,0xf9,0x6f,0x0d,A,A,A,A] ; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1 # encoding: [0xc4,0xe2,0x71,0x45,0x0d,A,A,A,A] +; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfe,0xc1] ; X64-AVX-NEXT: retq # encoding: [0xc3] ; ; X64-AVX512VL-LABEL: test_x86_avx2_psrlv_d_const: ; X64-AVX512VL: # %bb.0: -; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = <1,9,0,u> +; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [2,9,0,4294967295] ; X64-AVX512VL-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] ; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte -; X64-AVX512VL-NEXT: vpaddd {{.*}}(%rip){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xfe,0x05,A,A,A,A] -; X64-AVX512VL-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x45,0x05,A,A,A,A] +; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %xmm1 # EVEX TO VEX Compression xmm1 = [4,4,4,4294967295] +; X64-AVX512VL-NEXT: # encoding: [0xc5,0xf9,0x6f,0x0d,A,A,A,A] +; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0x45,0x0d,A,A,A,A] +; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1] ; X64-AVX512VL-NEXT: retq # encoding: [0xc3] %res0 = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> , <4 x i32> ) %res1 = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> , <4 x i32> ) @@ -1458,38 +1536,62 @@ define <8 x i32> @test_x86_avx2_psrlv_d_256(<8 x i32> %a0, <8 x i32> %a1) { define <8 x i32> @test_x86_avx2_psrlv_d_256_const() { ; X86-AVX-LABEL: test_x86_avx2_psrlv_d_256_const: ; X86-AVX: # %bb.0: -; X86-AVX-NEXT: vpbroadcastd {{.*#+}} ymm0 = [2,2,2,2,2,2,2,2] -; X86-AVX-NEXT: # encoding: [0xc4,0xe2,0x7d,0x58,0x05,A,A,A,A] +; X86-AVX-NEXT: vmovdqa {{.*#+}} ymm0 = [2,9,0,4294967295,3,7,4294967295,0] +; X86-AVX-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] +; X86-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-AVX-NEXT: vpsrlvd {{\.LCPI.*}}, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x45,0x05,A,A,A,A] ; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 -; X86-AVX-NEXT: vpaddd {{\.LCPI.*}}, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfe,0x05,A,A,A,A] +; X86-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4294967295] +; X86-AVX-NEXT: # encoding: [0xc5,0xfd,0x6f,0x0d,A,A,A,A] ; X86-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-AVX-NEXT: vpsrlvd {{\.LCPI.*}}, %ymm1, %ymm1 # encoding: [0xc4,0xe2,0x75,0x45,0x0d,A,A,A,A] +; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-AVX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfe,0xc1] ; X86-AVX-NEXT: retl # encoding: [0xc3] ; ; X86-AVX512VL-LABEL: test_x86_avx2_psrlv_d_256_const: ; X86-AVX512VL: # %bb.0: -; X86-AVX512VL-NEXT: vmovdqa {{\.LCPI.*}}, %ymm0 # EVEX TO VEX Compression ymm0 = <1,9,0,u,0,7,u,0> +; X86-AVX512VL-NEXT: vmovdqa {{\.LCPI.*}}, %ymm0 # EVEX TO VEX Compression ymm0 = [2,9,0,4294967295,3,7,4294967295,0] ; X86-AVX512VL-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] ; X86-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 -; X86-AVX512VL-NEXT: vpaddd {{\.LCPI.*}}{1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xfe,0x05,A,A,A,A] -; X86-AVX512VL-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-AVX512VL-NEXT: vpsrlvd {{\.LCPI.*}}, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x45,0x05,A,A,A,A] +; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-AVX512VL-NEXT: vmovdqa {{\.LCPI.*}}, %ymm1 # EVEX TO VEX Compression ymm1 = [4,4,4,4,4,4,4,4294967295] +; X86-AVX512VL-NEXT: # encoding: [0xc5,0xfd,0x6f,0x0d,A,A,A,A] +; X86-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-AVX512VL-NEXT: vpsrlvd {{\.LCPI.*}}, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x45,0x0d,A,A,A,A] +; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-AVX512VL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc1] ; X86-AVX512VL-NEXT: retl # encoding: [0xc3] ; ; X64-AVX-LABEL: test_x86_avx2_psrlv_d_256_const: ; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vpbroadcastd {{.*#+}} ymm0 = [2,2,2,2,2,2,2,2] -; X64-AVX-NEXT: # encoding: [0xc4,0xe2,0x7d,0x58,0x05,A,A,A,A] +; X64-AVX-NEXT: vmovdqa {{.*#+}} ymm0 = [2,9,0,4294967295,3,7,4294967295,0] +; X64-AVX-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] +; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX-NEXT: vpsrlvd {{.*}}(%rip), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x45,0x05,A,A,A,A] ; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte -; X64-AVX-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfe,0x05,A,A,A,A] +; X64-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4294967295] +; X64-AVX-NEXT: # encoding: [0xc5,0xfd,0x6f,0x0d,A,A,A,A] ; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX-NEXT: vpsrlvd {{.*}}(%rip), %ymm1, %ymm1 # encoding: [0xc4,0xe2,0x75,0x45,0x0d,A,A,A,A] +; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfe,0xc1] ; X64-AVX-NEXT: retq # encoding: [0xc3] ; ; X64-AVX512VL-LABEL: test_x86_avx2_psrlv_d_256_const: ; X64-AVX512VL: # %bb.0: -; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = <1,9,0,u,0,7,u,0> +; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [2,9,0,4294967295,3,7,4294967295,0] ; X64-AVX512VL-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] ; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte -; X64-AVX512VL-NEXT: vpaddd {{.*}}(%rip){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xfe,0x05,A,A,A,A] -; X64-AVX512VL-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: vpsrlvd {{.*}}(%rip), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x45,0x05,A,A,A,A] +; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %ymm1 # EVEX TO VEX Compression ymm1 = [4,4,4,4,4,4,4,4294967295] +; X64-AVX512VL-NEXT: # encoding: [0xc5,0xfd,0x6f,0x0d,A,A,A,A] +; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: vpsrlvd {{.*}}(%rip), %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x45,0x0d,A,A,A,A] +; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc1] ; X64-AVX512VL-NEXT: retq # encoding: [0xc3] %res0 = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> , <8 x i32> ) %res1 = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> , <8 x i32> ) @@ -1534,14 +1636,20 @@ define <2 x i64> @test_x86_avx2_psrlv_q_const() { ; ; X64-AVX-LABEL: test_x86_avx2_psrlv_q_const: ; X64-AVX: # %bb.0: -; X64-AVX-NEXT: movl $2, %eax # encoding: [0xb8,0x02,0x00,0x00,0x00] -; X64-AVX-NEXT: vmovq %rax, %xmm0 # encoding: [0xc4,0xe1,0xf9,0x6e,0xc0] +; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [4,4] +; X64-AVX-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] +; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX-NEXT: vpsrlvq {{.*}}(%rip), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0xf9,0x45,0x05,A,A,A,A] +; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte ; X64-AVX-NEXT: retq # encoding: [0xc3] ; ; X64-AVX512VL-LABEL: test_x86_avx2_psrlv_q_const: ; X64-AVX512VL: # %bb.0: -; X64-AVX512VL-NEXT: movl $2, %eax # encoding: [0xb8,0x02,0x00,0x00,0x00] -; X64-AVX512VL-NEXT: vmovq %rax, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xc0] +; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [4,4] +; X64-AVX512VL-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] +; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: vpsrlvq {{.*}}(%rip), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x45,0x05,A,A,A,A] +; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte ; X64-AVX512VL-NEXT: retq # encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> , <2 x i64> ) ret <2 x i64> %res @@ -1585,15 +1693,19 @@ define <4 x i64> @test_x86_avx2_psrlv_q_256_const() { ; ; X64-AVX-LABEL: test_x86_avx2_psrlv_q_256_const: ; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vbroadcastsd {{.*#+}} ymm0 = [2,2,2,2] -; X64-AVX-NEXT: # encoding: [0xc4,0xe2,0x7d,0x19,0x05,A,A,A,A] +; X64-AVX-NEXT: vpbroadcastq {{.*#+}} ymm0 = [4,4,4,4] +; X64-AVX-NEXT: # encoding: [0xc4,0xe2,0x7d,0x59,0x05,A,A,A,A] +; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX-NEXT: vpsrlvq {{.*}}(%rip), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0xfd,0x45,0x05,A,A,A,A] ; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte ; X64-AVX-NEXT: retq # encoding: [0xc3] ; ; X64-AVX512VL-LABEL: test_x86_avx2_psrlv_q_256_const: ; X64-AVX512VL: # %bb.0: -; X64-AVX512VL-NEXT: vbroadcastsd {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [2,2,2,2] -; X64-AVX512VL-NEXT: # encoding: [0xc4,0xe2,0x7d,0x19,0x05,A,A,A,A] +; X64-AVX512VL-NEXT: vpbroadcastq {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [4,4,4,4] +; X64-AVX512VL-NEXT: # encoding: [0xc4,0xe2,0x7d,0x59,0x05,A,A,A,A] +; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: vpsrlvq {{.*}}(%rip), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x45,0x05,A,A,A,A] ; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte ; X64-AVX512VL-NEXT: retq # encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> , <4 x i64> ) diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-intrinsics.ll index a0e8393309de0b..6a1d9d3da91709 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics.ll @@ -5229,8 +5229,11 @@ define <16 x i32> @test_x86_avx512_psllv_d_512(<16 x i32> %a0, <16 x i32> %a1) { define <16 x i32> @test_x86_avx512_psllv_d_512_const() { ; CHECK-LABEL: test_x86_avx512_psllv_d_512_const: ; CHECK: ## %bb.0: -; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm0 = <4,9,0,u,12,7,u,0,32,5,u,0,80,3,u,0> -; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm0 = [2,9,0,4294967295,3,7,4294967295,0,4,5,4294967294,0,5,3,4294967293,0] +; CHECK-NEXT: vpsllvd {{.*}}(%rip), %zmm0, %zmm0 +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4294967295] +; CHECK-NEXT: vpsllvd {{.*}}(%rip), %zmm1, %zmm1 +; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ; CHECK-NEXT: retq %res0 = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> , <16 x i32> ) %res1 = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> , <16 x i32> ) @@ -5277,8 +5280,11 @@ define <8 x i64> @test_x86_avx512_psllv_q_512(<8 x i64> %a0, <8 x i64> %a1) { define <8 x i64> @test_x86_avx512_psllv_q_512_const() { ; CHECK-LABEL: test_x86_avx512_psllv_q_512_const: ; CHECK: ## %bb.0: -; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm0 = <4,9,0,u,12,7,18446744056529682432,0> -; CHECK-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm0 = [2,9,0,18446744073709551615,3,7,18446744073709551615,0] +; CHECK-NEXT: vpsllvq {{.*}}(%rip), %zmm0, %zmm0 +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [4,4,4,4,4,4,4,18446744073709551615] +; CHECK-NEXT: vpsllvq {{.*}}(%rip), %zmm1, %zmm1 +; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0 ; CHECK-NEXT: retq %res0 = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> , <8 x i64> ) %res1 = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> , <8 x i64> ) @@ -5397,8 +5403,11 @@ define <16 x i32> @test_x86_avx512_psrlv_d_512(<16 x i32> %a0, <16 x i32> %a1) { define <16 x i32> @test_x86_avx512_psrlv_d_512_const() { ; CHECK-LABEL: test_x86_avx512_psrlv_d_512_const: ; CHECK: ## %bb.0: -; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm0 = <1,9,0,u,0,7,u,0,0,5,u,0,0,3,u,0> -; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm0 = [2,9,0,4294967295,3,7,4294967295,0,4,5,4294967294,0,5,3,4294967293,0] +; CHECK-NEXT: vpsrlvd {{.*}}(%rip), %zmm0, %zmm0 +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4294967295] +; CHECK-NEXT: vpsrlvd {{.*}}(%rip), %zmm1, %zmm1 +; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ; CHECK-NEXT: retq %res0 = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> , <16 x i32> ) %res1 = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> , <16 x i32> ) @@ -5445,8 +5454,11 @@ define <8 x i64> @test_x86_avx512_psrlv_q_512(<8 x i64> %a0, <8 x i64> %a1) { define <8 x i64> @test_x86_avx512_psrlv_q_512_const() { ; CHECK-LABEL: test_x86_avx512_psrlv_q_512_const: ; CHECK: ## %bb.0: -; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm0 = <1,9,0,u,0,7,1073741823,0> -; CHECK-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm0 = [2,9,0,18446744073709551615,3,7,18446744073709551615,0] +; CHECK-NEXT: vpsrlvq {{.*}}(%rip), %zmm0, %zmm0 +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [4,4,4,4,4,4,4,18446744073709551615] +; CHECK-NEXT: vpsrlvq {{.*}}(%rip), %zmm1, %zmm1 +; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0 ; CHECK-NEXT: retq %res0 = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> , <8 x i64> ) %res1 = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> , <8 x i64> ) diff --git a/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll b/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll index 8bcdc5d5c0298c..a220ab0ad73495 100644 --- a/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll @@ -1158,15 +1158,19 @@ declare <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16>, <32 x i16>) nounwind define <32 x i16> @test_x86_avx512_psrlv_w_512_const() optsize { ; X86-LABEL: test_x86_avx512_psrlv_w_512_const: ; X86: # %bb.0: -; X86-NEXT: vpbroadcastw {{.*#+}} zmm0 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] -; X86-NEXT: # encoding: [0x62,0xf2,0x7d,0x48,0x79,0x05,A,A,A,A] +; X86-NEXT: vmovdqa64 {{.*#+}} zmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535] +; X86-NEXT: # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x05,A,A,A,A] +; X86-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-NEXT: vpsrlvw {{\.LCPI.*}}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x10,0x05,A,A,A,A] ; X86-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4 ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_x86_avx512_psrlv_w_512_const: ; X64: # %bb.0: -; X64-NEXT: vpbroadcastw {{.*#+}} zmm0 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] -; X64-NEXT: # encoding: [0x62,0xf2,0x7d,0x48,0x79,0x05,A,A,A,A] +; X64-NEXT: vmovdqa64 {{.*#+}} zmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535] +; X64-NEXT: # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x05,A,A,A,A] +; X64-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-NEXT: vpsrlvw {{.*}}(%rip), %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x10,0x05,A,A,A,A] ; X64-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte ; X64-NEXT: retq # encoding: [0xc3] %res1 = call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> , <32 x i16> ) @@ -1377,15 +1381,19 @@ declare <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16>, <8 x i16>) nounwind r define <32 x i16> @test_x86_avx512_psllv_w_512_const() optsize { ; X86-LABEL: test_x86_avx512_psllv_w_512_const: ; X86: # %bb.0: -; X86-NEXT: vpbroadcastw {{.*#+}} zmm0 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; X86-NEXT: # encoding: [0x62,0xf2,0x7d,0x48,0x79,0x05,A,A,A,A] +; X86-NEXT: vmovdqa64 {{.*#+}} zmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535] +; X86-NEXT: # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x05,A,A,A,A] +; X86-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-NEXT: vpsllvw {{\.LCPI.*}}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x12,0x05,A,A,A,A] ; X86-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4 ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_x86_avx512_psllv_w_512_const: ; X64: # %bb.0: -; X64-NEXT: vpbroadcastw {{.*#+}} zmm0 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; X64-NEXT: # encoding: [0x62,0xf2,0x7d,0x48,0x79,0x05,A,A,A,A] +; X64-NEXT: vmovdqa64 {{.*#+}} zmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535] +; X64-NEXT: # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x05,A,A,A,A] +; X64-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x12,0x05,A,A,A,A] ; X64-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte ; X64-NEXT: retq # encoding: [0xc3] %res1 = call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> , <32 x i16> ) diff --git a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll index 10ba0e646055f2..a01252f7d494b7 100644 --- a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll @@ -2021,16 +2021,20 @@ define <8 x i16>@test_int_x86_avx512_mask_psrlv8_hi(<8 x i16> %x0, <8 x i16> %x1 define <8 x i16> @test_int_x86_avx512_psrlv_w_128_const() optsize { ; X86-LABEL: test_int_x86_avx512_psrlv_w_128_const: ; X86: # %bb.0: -; X86-NEXT: vpbroadcastw {{\.LCPI.*}}, %xmm0 # EVEX TO VEX Compression xmm0 = [2,2,2,2,2,2,2,2] -; X86-NEXT: # encoding: [0xc4,0xe2,0x79,0x79,0x05,A,A,A,A] -; X86-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-NEXT: vmovdqa {{\.LCPI.*}}, %xmm0 # EVEX TO VEX Compression xmm0 = [4,4,4,4,4,4,4,65535] +; X86-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] +; X86-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-NEXT: vpsrlvw {{\.LCPI.*}}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x10,0x05,A,A,A,A] +; X86-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4 ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_psrlv_w_128_const: ; X64: # %bb.0: -; X64-NEXT: vpbroadcastw {{.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [2,2,2,2,2,2,2,2] -; X64-NEXT: # encoding: [0xc4,0xe2,0x79,0x79,0x05,A,A,A,A] -; X64-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-NEXT: vmovdqa {{.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [4,4,4,4,4,4,4,65535] +; X64-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] +; X64-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-NEXT: vpsrlvw {{.*}}(%rip), %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x10,0x05,A,A,A,A] +; X64-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte ; X64-NEXT: retq # encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> , <8 x i16> ) ret <8 x i16> %res @@ -2041,16 +2045,20 @@ declare <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16>, <8 x i16>) define <16 x i16> @test_int_x86_avx512_psrlv_w_256_const() optsize { ; X86-LABEL: test_int_x86_avx512_psrlv_w_256_const: ; X86: # %bb.0: -; X86-NEXT: vpbroadcastw {{\.LCPI.*}}, %ymm0 # EVEX TO VEX Compression ymm0 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] -; X86-NEXT: # encoding: [0xc4,0xe2,0x7d,0x79,0x05,A,A,A,A] -; X86-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-NEXT: vmovdqa {{\.LCPI.*}}, %ymm0 # EVEX TO VEX Compression ymm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535] +; X86-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] +; X86-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-NEXT: vpsrlvw {{\.LCPI.*}}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x10,0x05,A,A,A,A] +; X86-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4 ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_psrlv_w_256_const: ; X64: # %bb.0: -; X64-NEXT: vpbroadcastw {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] -; X64-NEXT: # encoding: [0xc4,0xe2,0x7d,0x79,0x05,A,A,A,A] -; X64-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-NEXT: vmovdqa {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535] +; X64-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] +; X64-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-NEXT: vpsrlvw {{.*}}(%rip), %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x10,0x05,A,A,A,A] +; X64-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte ; X64-NEXT: retq # encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> , <16 x i16> ) ret <16 x i16> %res @@ -2195,16 +2203,20 @@ define <8 x i16>@test_int_x86_avx512_mask_psllv8_hi(<8 x i16> %x0, <8 x i16> %x1 define <8 x i16> @test_int_x86_avx512_psllv_w_128_const() optsize { ; X86-LABEL: test_int_x86_avx512_psllv_w_128_const: ; X86: # %bb.0: -; X86-NEXT: vpbroadcastw {{\.LCPI.*}}, %xmm0 # EVEX TO VEX Compression xmm0 = [8,8,8,8,8,8,8,8] -; X86-NEXT: # encoding: [0xc4,0xe2,0x79,0x79,0x05,A,A,A,A] -; X86-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-NEXT: vmovdqa {{\.LCPI.*}}, %xmm0 # EVEX TO VEX Compression xmm0 = [4,4,4,4,4,4,4,65535] +; X86-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] +; X86-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-NEXT: vpsllvw {{\.LCPI.*}}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x12,0x05,A,A,A,A] +; X86-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4 ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_psllv_w_128_const: ; X64: # %bb.0: -; X64-NEXT: vpbroadcastw {{.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [8,8,8,8,8,8,8,8] -; X64-NEXT: # encoding: [0xc4,0xe2,0x79,0x79,0x05,A,A,A,A] -; X64-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-NEXT: vmovdqa {{.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [4,4,4,4,4,4,4,65535] +; X64-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] +; X64-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-NEXT: vpsllvw {{.*}}(%rip), %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x12,0x05,A,A,A,A] +; X64-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte ; X64-NEXT: retq # encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> , <8 x i16> ) ret <8 x i16> %res @@ -2216,16 +2228,20 @@ declare <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16>, <8 x i16>) define <16 x i16> @test_int_x86_avx512_psllv_w_256_const() optsize { ; X86-LABEL: test_int_x86_avx512_psllv_w_256_const: ; X86: # %bb.0: -; X86-NEXT: vpbroadcastw {{\.LCPI.*}}, %ymm0 # EVEX TO VEX Compression ymm0 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; X86-NEXT: # encoding: [0xc4,0xe2,0x7d,0x79,0x05,A,A,A,A] -; X86-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-NEXT: vmovdqa {{\.LCPI.*}}, %ymm0 # EVEX TO VEX Compression ymm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535] +; X86-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] +; X86-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-NEXT: vpsllvw {{\.LCPI.*}}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x12,0x05,A,A,A,A] +; X86-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4 ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_psllv_w_256_const: ; X64: # %bb.0: -; X64-NEXT: vpbroadcastw {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; X64-NEXT: # encoding: [0xc4,0xe2,0x7d,0x79,0x05,A,A,A,A] -; X64-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-NEXT: vmovdqa {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535] +; X64-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] +; X64-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-NEXT: vpsllvw {{.*}}(%rip), %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x12,0x05,A,A,A,A] +; X64-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte ; X64-NEXT: retq # encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> , <16 x i16> ) ret <16 x i16> %res From 37174c6e21dd381fab31e33877865648c8d4088f Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Thu, 17 Jan 2019 13:46:36 +0000 Subject: [PATCH 011/125] Merging r351326: ------------------------------------------------------------------------ r351326 | psmith | 2019-01-16 13:09:13 +0100 (Wed, 16 Jan 2019) | 15 lines [ELF] Implement option to force PIC compatible Thunks By default LLD will generate position independent Thunks when the --pie or --shared option is used. Reference to absolute addresses is permitted in other cases. For some embedded systems position independent thunks are needed for code that executes before the MMU has been set up. The option --pic-veneer is used by ld.bfd to force position independent thunks. The patch adds --pic-veneer as the option is needed for the Linux kernel on Arm. fixes pr39886 Differential Revision: https://reviews.llvm.org/D55505 ------------------------------------------------------------------------ llvm-svn: 351445 --- lld/ELF/Config.h | 1 + lld/ELF/Driver.cpp | 1 + lld/ELF/Options.td | 3 ++ lld/ELF/Thunks.cpp | 10 ++-- lld/docs/ld.lld.1 | 2 + lld/test/ELF/arm-force-pi-thunk.s | 87 +++++++++++++++++++++++++++++++ 6 files changed, 99 insertions(+), 5 deletions(-) create mode 100644 lld/test/ELF/arm-force-pi-thunk.s diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h index 8fb760e592eb18..60555f188fed3e 100644 --- a/lld/ELF/Config.h +++ b/lld/ELF/Config.h @@ -159,6 +159,7 @@ struct Configuration { bool OFormatBinary; bool Omagic; bool OptRemarksWithHotness; + bool PicThunk; bool Pie; bool PrintGcSections; bool PrintIcfSections; diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 13b6119e2dc91c..2e2036310fb21b 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -1006,6 +1006,7 @@ static void setConfigs(opt::InputArgList &Args) { Config->Endianness = Config->IsLE ? endianness::little : endianness::big; Config->IsMips64EL = (K == ELF64LEKind && M == EM_MIPS); Config->Pic = Config->Pie || Config->Shared; + Config->PicThunk = Args.hasArg(OPT_pic_veneer, Config->Pic); Config->Wordsize = Config->Is64 ? 8 : 4; // ELF defines two different ways to store relocation addends as shown below: diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td index e43a21b923d331..bc203193661b44 100644 --- a/lld/ELF/Options.td +++ b/lld/ELF/Options.td @@ -255,6 +255,9 @@ defm use_android_relr_tags: B<"use-android-relr-tags", "Use SHT_ANDROID_RELR / DT_ANDROID_RELR* tags instead of SHT_RELR / DT_RELR*", "Use SHT_RELR / DT_RELR* tags (default)">; +def pic_veneer: F<"pic-veneer">, + HelpText<"Always generate position independent thunks (veneers)">; + defm pie: B<"pie", "Create a position independent executable", "Do not create a position independent executable (default)">; diff --git a/lld/ELF/Thunks.cpp b/lld/ELF/Thunks.cpp index 95b57dc0db4260..7a31d36b0e9025 100644 --- a/lld/ELF/Thunks.cpp +++ b/lld/ELF/Thunks.cpp @@ -722,7 +722,7 @@ Thunk::~Thunk() = default; static Thunk *addThunkAArch64(RelType Type, Symbol &S) { if (Type != R_AARCH64_CALL26 && Type != R_AARCH64_JUMP26) fatal("unrecognized relocation type"); - if (Config->Pic) + if (Config->PicThunk) return make(S); return make(S); } @@ -739,7 +739,7 @@ static Thunk *addThunkPreArmv7(RelType Reloc, Symbol &S) { case R_ARM_JUMP24: case R_ARM_CALL: case R_ARM_THM_CALL: - if (Config->Pic) + if (Config->PicThunk) return make(S); return make(S); } @@ -794,13 +794,13 @@ static Thunk *addThunkArm(RelType Reloc, Symbol &S) { case R_ARM_PLT32: case R_ARM_JUMP24: case R_ARM_CALL: - if (Config->Pic) + if (Config->PicThunk) return make(S); return make(S); case R_ARM_THM_JUMP19: case R_ARM_THM_JUMP24: case R_ARM_THM_CALL: - if (Config->Pic) + if (Config->PicThunk) return make(S); return make(S); } @@ -820,7 +820,7 @@ static Thunk *addThunkPPC64(RelType Type, Symbol &S) { if (S.isInPlt()) return make(S); - if (Config->Pic) + if (Config->PicThunk) return make(S); return make(S); diff --git a/lld/docs/ld.lld.1 b/lld/docs/ld.lld.1 index d1ce4a3517f4d4..889d5feabe4d1e 100644 --- a/lld/docs/ld.lld.1 +++ b/lld/docs/ld.lld.1 @@ -311,6 +311,8 @@ Write optimization remarks in YAML format to .Ar file . .It Fl -opt-remarks-with-hotness Include hotness information in the optimization remarks file. +.It Fl -pic-veneer +Always generate position independent thunks. .It Fl -pie Create a position independent executable. .It Fl -print-gc-sections diff --git a/lld/test/ELF/arm-force-pi-thunk.s b/lld/test/ELF/arm-force-pi-thunk.s new file mode 100644 index 00000000000000..2c88de0424ee5c --- /dev/null +++ b/lld/test/ELF/arm-force-pi-thunk.s @@ -0,0 +1,87 @@ +// REQUIRES: arm +// RUN: llvm-mc -arm-add-build-attributes -filetype=obj -triple=armv7a-none-linux-gnueabi %s -o %t +// RUN: echo "SECTIONS { \ +// RUN: . = SIZEOF_HEADERS; \ +// RUN: .text_low : { *(.text_low) *(.text_low2) } \ +// RUN: .text_high 0x2000000 : { *(.text_high) *(.text_high2) } \ +// RUN: } " > %t.script +// RUN: ld.lld --pic-veneer --script %t.script %t -o %t2 2>&1 +// RUN: llvm-objdump -d -triple=thumbv7a-none-linux-gnueabi %t2 | FileCheck %s + +// Test that we can force generation of position independent thunks even when +// inputs are not pic. + + .syntax unified + .section .text_low, "ax", %progbits + .thumb + .globl _start +_start: bx lr + .globl low_target + .type low_target, %function +low_target: + bl high_target + bl high_target2 + + .section .text_low2, "ax", %progbits + .thumb + .globl low_target2 + .type low_target2, %function +low_target2: + bl high_target + bl high_target2 + +// CHECK: Disassembly of section .text_low: +// CHECK-NEXT: _start: +// CHECK-NEXT: 94: 70 47 bx lr +// CHECK: low_target: +// CHECK-NEXT: 96: 00 f0 03 f8 bl #6 +// CHECK-NEXT: 9a: 00 f0 07 f8 bl #14 +// CHECK-NEXT: 9e: d4 d4 bmi #-88 +// CHECK: __ThumbV7PILongThunk_high_target: +// CHECK-NEXT: a0: 4f f6 55 7c movw r12, #65365 +// CHECK-NEXT: a4: c0 f2 ff 1c movt r12, #511 +// CHECK-NEXT: a8: fc 44 add r12, pc +// CHECK-NEXT: aa: 60 47 bx r12 +// CHECK: __ThumbV7PILongThunk_high_target2: +// CHECK-NEXT: ac: 4f f6 69 7c movw r12, #65385 +// CHECK-NEXT: b0: c0 f2 ff 1c movt r12, #511 +// CHECK-NEXT: b4: fc 44 add r12, pc +// CHECK-NEXT: b6: 60 47 bx r12 +// CHECK: low_target2: +// CHECK-NEXT: b8: ff f7 f2 ff bl #-28 +// CHECK-NEXT: bc: ff f7 f6 ff bl #-20 + + + .section .text_high, "ax", %progbits + .thumb + .globl high_target + .type high_target, %function +high_target: + bl low_target + bl low_target2 + + .section .text_high2, "ax", %progbits + .thumb + .globl high_target2 + .type high_target2, %function +high_target2: + bl low_target + bl low_target2 + +// CHECK: Disassembly of section .text_high: +// CHECK-NEXT: high_target: +// CHECK-NEXT: 2000000: 00 f0 02 f8 bl #4 +// CHECK-NEXT: 2000004: 00 f0 06 f8 bl #12 +// CHECK: __ThumbV7PILongThunk_low_target: +// CHECK-NEXT: 2000008: 40 f2 83 0c movw r12, #131 +// CHECK-NEXT: 200000c: cf f6 00 6c movt r12, #65024 +// CHECK-NEXT: 2000010: fc 44 add r12, pc +// CHECK-NEXT: 2000012: 60 47 bx r12 +// CHECK: __ThumbV7PILongThunk_low_target2: +// CHECK-NEXT: 2000014: 40 f2 99 0c movw r12, #153 +// CHECK-NEXT: 2000018: cf f6 00 6c movt r12, #65024 +// CHECK-NEXT: 200001c: fc 44 add r12, pc +// CHECK-NEXT: 200001e: 60 47 bx r12 +// CHECK: high_target2: +// CHECK-NEXT: 2000020: ff f7 f2 ff bl #-28 +// CHECK-NEXT: 2000024: ff f7 f6 ff bl #-20 From 961b84b221558e07a055c38fa12f66ab45e577a5 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Thu, 17 Jan 2019 13:47:57 +0000 Subject: [PATCH 012/125] Merging r351335: ------------------------------------------------------------------------ r351335 | psmith | 2019-01-16 14:24:02 +0100 (Wed, 16 Jan 2019) | 17 lines [ELF][AArch64] Add R_AARCH64_PLT_PAGE_PC to isRelExpr As a follow on to D56666 (r351186) there is a case when taking the address of an ifunc when linking -pie that can generate a spurious can't create dynamic relocation R_AARCH64_ADR_PREL_PG_HI21 against symbol in readonly segment. Specifically the case is where the ifunc is in the same translation unit as the address taker, so given -fpie the compiler knows the ifunc is defined in the executable so it can use a non-got-generating relocation. The error message is due to R_AARCH64_PLT_PAGE_PC not being added to isRelExpr, its non PLT equivalent R_AARCH64_PAGE_PC is already in isRelExpr. Differential Revision: https://reviews.llvm.org/D56724 ------------------------------------------------------------------------ llvm-svn: 351446 --- lld/ELF/Relocations.cpp | 2 +- lld/test/ELF/aarch64-gnu-ifunc-address-pie.s | 44 ++++++++++++++++++++ 2 files changed, 45 insertions(+), 1 deletion(-) create mode 100644 lld/test/ELF/aarch64-gnu-ifunc-address-pie.s diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index 812468896f0d01..9ffe8a9cc72e77 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -356,7 +356,7 @@ static bool needsGot(RelExpr Expr) { static bool isRelExpr(RelExpr Expr) { return isRelExprOneOf(Expr); + R_AARCH64_PLT_PAGE_PC, R_RELAX_GOT_PC>(Expr); } // Returns true if a given relocation can be computed at link-time. diff --git a/lld/test/ELF/aarch64-gnu-ifunc-address-pie.s b/lld/test/ELF/aarch64-gnu-ifunc-address-pie.s new file mode 100644 index 00000000000000..3db9070dbd07bb --- /dev/null +++ b/lld/test/ELF/aarch64-gnu-ifunc-address-pie.s @@ -0,0 +1,44 @@ +# REQUIRES: aarch64 +# RUN: llvm-mc -filetype=obj -triple=aarch64-none-linux-gnu %s -o %t.o +# RUN: ld.lld -pie %t.o -o %tout +# RUN: llvm-objdump -D %tout | FileCheck %s +# RUN: llvm-readobj -r %tout | FileCheck %s -check-prefix=CHECK-RELOCS + +# Test that when we take the address of a preemptible ifunc using -fpie, we can +# handle the case when the ifunc is in the same translation unit as the address +# taker. In this case the compiler knows that ifunc is not defined in a shared +# library so it can use a non got generating relative reference. +.text +.globl myfunc +.type myfunc,@gnu_indirect_function +myfunc: + ret + +.text +.globl main +.type main,@function +main: + adrp x8, myfunc + add x8, x8, :lo12: myfunc + ret + +# CHECK: 0000000000010000 myfunc: +# CHECK-NEXT: 10000: c0 03 5f d6 ret +# CHECK: 0000000000010004 main: +# CHECK-NEXT: 10004: 08 00 00 90 adrp x8, #0 +# x8 = 0x10000 +# CHECK-NEXT: 10008: 08 41 00 91 add x8, x8, #16 +# x8 = 0x10010 = .plt for myfunc +# CHECK-NEXT: 1000c: c0 03 5f d6 ret +# CHECK-NEXT: Disassembly of section .plt: +# CHECK-NEXT: 0000000000010010 .plt: +# CHECK-NEXT: 10010: 90 00 00 90 adrp x16, #65536 +# CHECK-NEXT: 10014: 11 02 40 f9 ldr x17, [x16] +# CHECK-NEXT: 10018: 10 02 00 91 add x16, x16, #0 +# CHECK-NEXT: 1001c: 20 02 1f d6 br x17 + +# CHECK-RELOCS: Relocations [ +# CHECK-RELOCS-NEXT: Section {{.*}} .rela.plt { +# CHECK-RELOCS-NEXT: 0x20000 R_AARCH64_IRELATIVE - 0x10000 +# CHECK-RELOCS-NEXT: } +# CHECK-RELOCS-NEXT: ] From 3553f3e0aaa3e0208da72c9a8fed117c3ce48335 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Thu, 17 Jan 2019 15:37:17 +0000 Subject: [PATCH 013/125] Merging r351370: ------------------------------------------------------------------------ r351370 | mgrang | 2019-01-16 20:52:59 +0100 (Wed, 16 Jan 2019) | 14 lines [COFF, ARM64] Implement support for SEH extensions __try/__except/__finally Summary: This patch supports MS SEH extensions __try/__except/__finally. The intrinsics localescape and localrecover are responsible for communicating escaped static allocas from the try block to the handler. We need to preserve frame pointers for SEH. So we create a new function/property HasLocalEscape. Reviewers: rnk, compnerd, mstorsjo, TomTan, efriedma, ssijaric Reviewed By: rnk, efriedma Subscribers: smeenai, jrmuizel, alex, majnemer, ssijaric, ehsan, dmajor, kristina, javed.absar, kristof.beyls, chrib, llvm-commits Differential Revision: https://reviews.llvm.org/D53540 ------------------------------------------------------------------------ llvm-svn: 351451 --- llvm/include/llvm/CodeGen/MachineFunction.h | 4 ++ llvm/lib/CodeGen/AsmPrinter/WinException.cpp | 23 ++++--- .../SelectionDAG/SelectionDAGBuilder.cpp | 2 + llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp | 28 ++++++++ .../Target/AArch64/AArch64FrameLowering.cpp | 4 ++ .../Target/AArch64/AArch64ISelLowering.cpp | 28 ++++++++ llvm/lib/Target/AArch64/AArch64InstrInfo.td | 7 ++ .../Target/AArch64/AArch64RegisterInfo.cpp | 7 ++ llvm/test/CodeGen/AArch64/seh-finally.ll | 67 +++++++++++++++++++ llvm/test/CodeGen/AArch64/seh-localescape.ll | 30 +++++++++ 10 files changed, 191 insertions(+), 9 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/seh-finally.ll create mode 100644 llvm/test/CodeGen/AArch64/seh-localescape.ll diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h index 25edf5bcce516a..6dbe1650adabff 100644 --- a/llvm/include/llvm/CodeGen/MachineFunction.h +++ b/llvm/include/llvm/CodeGen/MachineFunction.h @@ -329,6 +329,7 @@ class MachineFunction { bool CallsUnwindInit = false; bool HasEHScopes = false; bool HasEHFunclets = false; + bool HasLocalEscape = false; /// List of C++ TypeInfo used. std::vector TypeInfos; @@ -811,6 +812,9 @@ class MachineFunction { bool hasEHFunclets() const { return HasEHFunclets; } void setHasEHFunclets(bool V) { HasEHFunclets = V; } + bool hasLocalEscape() const { return HasLocalEscape; } + void setHasLocalEscape(bool V) { HasLocalEscape = V; } + /// Find or create an LandingPadInfo for the specified MachineBasicBlock. LandingPadInfo &getOrCreateLandingPadInfo(MachineBasicBlock *LandingPad); diff --git a/llvm/lib/CodeGen/AsmPrinter/WinException.cpp b/llvm/lib/CodeGen/AsmPrinter/WinException.cpp index cf8e8c69bc2a61..92df09b7d6a23a 100644 --- a/llvm/lib/CodeGen/AsmPrinter/WinException.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/WinException.cpp @@ -545,15 +545,17 @@ void WinException::emitCSpecificHandlerTable(const MachineFunction *MF) { OS.AddComment(Comment); }; - // Emit a label assignment with the SEH frame offset so we can use it for - // llvm.eh.recoverfp. - StringRef FLinkageName = - GlobalValue::dropLLVMManglingEscape(MF->getFunction().getName()); - MCSymbol *ParentFrameOffset = - Ctx.getOrCreateParentFrameOffsetSymbol(FLinkageName); - const MCExpr *MCOffset = - MCConstantExpr::create(FuncInfo.SEHSetFrameOffset, Ctx); - Asm->OutStreamer->EmitAssignment(ParentFrameOffset, MCOffset); + if (!isAArch64) { + // Emit a label assignment with the SEH frame offset so we can use it for + // llvm.eh.recoverfp. + StringRef FLinkageName = + GlobalValue::dropLLVMManglingEscape(MF->getFunction().getName()); + MCSymbol *ParentFrameOffset = + Ctx.getOrCreateParentFrameOffsetSymbol(FLinkageName); + const MCExpr *MCOffset = + MCConstantExpr::create(FuncInfo.SEHSetFrameOffset, Ctx); + Asm->OutStreamer->EmitAssignment(ParentFrameOffset, MCOffset); + } // Use the assembler to compute the number of table entries through label // difference and division. @@ -937,6 +939,9 @@ void WinException::emitEHRegistrationOffsetLabel(const WinEHFuncInfo &FuncInfo, if (FI != INT_MAX) { const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering(); unsigned UnusedReg; + // FIXME: getFrameIndexReference needs to match the behavior of + // AArch64RegisterInfo::hasBasePointer in which one of the scenarios where + // SP is used is if frame size >= 256. Offset = TFI->getFrameIndexReference(*Asm->MF, FI, UnusedReg); } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 871ab9b29881b2..bfeb3d1bc2b91f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6182,6 +6182,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { .addFrameIndex(FI); } + MF.setHasLocalEscape(true); + return nullptr; } diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index 0442076992e24e..0254a572434f46 100644 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -694,6 +694,34 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) { switch (MI->getOpcode()) { default: break; + case AArch64::MOVMCSym: { + unsigned DestReg = MI->getOperand(0).getReg(); + const MachineOperand &MO_Sym = MI->getOperand(1); + MachineOperand Hi_MOSym(MO_Sym), Lo_MOSym(MO_Sym); + MCOperand Hi_MCSym, Lo_MCSym; + + Hi_MOSym.setTargetFlags(AArch64II::MO_G1 | AArch64II::MO_S); + Lo_MOSym.setTargetFlags(AArch64II::MO_G0 | AArch64II::MO_NC); + + MCInstLowering.lowerOperand(Hi_MOSym, Hi_MCSym); + MCInstLowering.lowerOperand(Lo_MOSym, Lo_MCSym); + + MCInst MovZ; + MovZ.setOpcode(AArch64::MOVZXi); + MovZ.addOperand(MCOperand::createReg(DestReg)); + MovZ.addOperand(Hi_MCSym); + MovZ.addOperand(MCOperand::createImm(16)); + EmitToStreamer(*OutStreamer, MovZ); + + MCInst MovK; + MovK.setOpcode(AArch64::MOVKXi); + MovK.addOperand(MCOperand::createReg(DestReg)); + MovK.addOperand(MCOperand::createReg(DestReg)); + MovK.addOperand(Lo_MCSym); + MovK.addOperand(MCOperand::createImm(0)); + EmitToStreamer(*OutStreamer, MovK); + return; + } case AArch64::MOVIv2d_ns: // If the target has , lower this // instruction to movi.16b instead. diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 538a8d7e8fbcf5..621aa8bc783a61 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -228,6 +228,10 @@ bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const { MFI.getMaxCallFrameSize() > DefaultSafeSPDisplacement) return true; + // Win64 SEH requires frame pointer if funclets are present. + if (MF.hasLocalEscape()) + return true; + return false; } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index e01ca14d7f63dd..762f4413d72b91 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2743,6 +2743,34 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::aarch64_neon_umin: return DAG.getNode(ISD::UMIN, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); + + case Intrinsic::localaddress: { + // Returns one of the stack, base, or frame pointer registers, depending on + // which is used to reference local variables. + MachineFunction &MF = DAG.getMachineFunction(); + const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); + unsigned Reg; + if (RegInfo->hasBasePointer(MF)) + Reg = RegInfo->getBaseRegister(); + else // This function handles the SP or FP case. + Reg = RegInfo->getFrameRegister(MF); + return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, + Op.getSimpleValueType()); + } + + case Intrinsic::eh_recoverfp: { + // FIXME: This needs to be implemented to correctly handle highly aligned + // stack objects. For now we simply return the incoming FP. Refer D53541 + // for more details. + SDValue FnOp = Op.getOperand(1); + SDValue IncomingFPOp = Op.getOperand(2); + GlobalAddressSDNode *GSD = dyn_cast(FnOp); + auto *Fn = dyn_cast_or_null(GSD ? GSD->getGlobal() : nullptr); + if (!Fn) + report_fatal_error( + "llvm.eh.recoverfp must take a function as the first argument"); + return IncomingFPOp; + } } } diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index c24b8b36441bbb..86a4119c45f580 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -133,6 +133,10 @@ def UseNegativeImmediates : Predicate<"false">, AssemblerPredicate<"!FeatureNoNegativeImmediates", "NegativeImmediates">; +def AArch64LocalRecover : SDNode<"ISD::LOCAL_RECOVER", + SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, + SDTCisInt<1>]>>; + //===----------------------------------------------------------------------===// // AArch64-specific DAG Nodes. @@ -6801,5 +6805,8 @@ def : Pat<(AArch64tcret tglobaladdr:$dst, (i32 timm:$FPDiff)), def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)), (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>; +def MOVMCSym : Pseudo<(outs GPR64:$dst), (ins i64imm:$sym), []>, Sched<[]>; +def : Pat<(i64 (AArch64LocalRecover mcsym:$sym)), (MOVMCSym mcsym:$sym)>; + include "AArch64InstrAtomics.td" include "AArch64SVEInstrInfo.td" diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp index 96ae45ae3d0dcd..3daac23592de7d 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -466,6 +466,13 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // Modify MI as necessary to handle as much of 'Offset' as possible Offset = TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg); + + if (MI.getOpcode() == TargetOpcode::LOCAL_ESCAPE) { + MachineOperand &FI = MI.getOperand(FIOperandNum); + FI.ChangeToImmediate(Offset); + return; + } + if (rewriteAArch64FrameIndex(MI, FIOperandNum, FrameReg, Offset, TII)) return; diff --git a/llvm/test/CodeGen/AArch64/seh-finally.ll b/llvm/test/CodeGen/AArch64/seh-finally.ll new file mode 100644 index 00000000000000..3cbbd03385c5fb --- /dev/null +++ b/llvm/test/CodeGen/AArch64/seh-finally.ll @@ -0,0 +1,67 @@ +; RUN: llc -mtriple arm64-windows -o - %s | FileCheck %s + +; Function Attrs: noinline optnone uwtable +define dso_local i32 @foo() { +entry: +; CHECK-LABEL: foo +; CHECK: orr w8, wzr, #0x1 +; CHECK: mov w0, wzr +; CHECK: mov x1, x29 +; CHECK: .set .Lfoo$frame_escape_0, -4 +; CHECK: stur w8, [x29, #-4] +; CHECK: bl "?fin$0@0@foo@@" +; CHECK: ldur w0, [x29, #-4] + + %count = alloca i32, align 4 + call void (...) @llvm.localescape(i32* %count) + store i32 0, i32* %count, align 4 + %0 = load i32, i32* %count, align 4 + %add = add nsw i32 %0, 1 + store i32 %add, i32* %count, align 4 + %1 = call i8* @llvm.localaddress() + call void @"?fin$0@0@foo@@"(i8 0, i8* %1) + %2 = load i32, i32* %count, align 4 + ret i32 %2 +} + +define internal void @"?fin$0@0@foo@@"(i8 %abnormal_termination, i8* %frame_pointer) { +entry: +; CHECK-LABEL: @"?fin$0@0@foo@@" +; CHECK: sub sp, sp, #16 +; CHECK: str x1, [sp, #8] +; CHECK: strb w0, [sp, #7] +; CHECK: movz x8, #:abs_g1_s:.Lfoo$frame_escape_0 +; CHECK: movk x8, #:abs_g0_nc:.Lfoo$frame_escape_0 +; CHECK: add x8, x1, x8 +; CHECK: ldr w9, [x8] +; CHECK: add w9, w9, #1 +; CHECK: str w9, [x8] + + %frame_pointer.addr = alloca i8*, align 8 + %abnormal_termination.addr = alloca i8, align 1 + %0 = call i8* @llvm.localrecover(i8* bitcast (i32 ()* @foo to i8*), i8* %frame_pointer, i32 0) + %count = bitcast i8* %0 to i32* + store i8* %frame_pointer, i8** %frame_pointer.addr, align 8 + store i8 %abnormal_termination, i8* %abnormal_termination.addr, align 1 + %1 = zext i8 %abnormal_termination to i32 + %cmp = icmp eq i32 %1, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %2 = load i32, i32* %count, align 4 + %add = add nsw i32 %2, 1 + store i32 %add, i32* %count, align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + ret void +} + +; Function Attrs: nounwind readnone +declare i8* @llvm.localrecover(i8*, i8*, i32) + +; Function Attrs: nounwind readnone +declare i8* @llvm.localaddress() + +; Function Attrs: nounwind +declare void @llvm.localescape(...) diff --git a/llvm/test/CodeGen/AArch64/seh-localescape.ll b/llvm/test/CodeGen/AArch64/seh-localescape.ll new file mode 100644 index 00000000000000..0a1675014f62ff --- /dev/null +++ b/llvm/test/CodeGen/AArch64/seh-localescape.ll @@ -0,0 +1,30 @@ +; RUN: llc -mtriple arm64-windows %s -o - | FileCheck %s + +; Function Attrs: noinline nounwind optnone uwtable +define dso_local i32 @foo() { +entry: +; CHECK-LABEL: foo +; CHECK: .set .Lfoo$frame_escape_0, -4 + + %count = alloca i32, align 4 + call void (...) @llvm.localescape(i32* %count) + ret i32 0 +} + +define internal i32 @"?filt$0@0@foo@@"(i8* %exception_pointers, i8* %frame_pointer) { +entry: +; CHECK-LABEL: @"?filt$0@0@foo@@" +; CHECK: movz x8, #:abs_g1_s:.Lfoo$frame_escape_0 +; CHECK: movk x8, #:abs_g0_nc:.Lfoo$frame_escape_0 + + %0 = call i8* @llvm.localrecover(i8* bitcast (i32 ()* @foo to i8*), i8* %frame_pointer, i32 0) + %count = bitcast i8* %0 to i32* + %1 = load i32, i32* %count, align 4 + ret i32 %1 +} + +; Function Attrs: nounwind readnone +declare i8* @llvm.localrecover(i8*, i8*, i32) #2 + +; Function Attrs: nounwind +declare void @llvm.localescape(...) #3 From 2f1402b6cb9827593b9096ee1f4368baff9d6774 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Thu, 17 Jan 2019 15:39:31 +0000 Subject: [PATCH 014/125] Merging r351421: ------------------------------------------------------------------------ r351421 | ssijaric | 2019-01-17 10:45:17 +0100 (Thu, 17 Jan 2019) | 12 lines [ARM64][Windows] Share unwind codes between epilogues There are cases where we have multiple epilogues that have the exact same unwind code sequence. In that case, the epilogues can share the same unwind codes in the .xdata section. This should get us past the assert "SEH unwind data splitting not yet implemented" in many cases. We still need to add support for generating multiple .pdata/.xdata sections for those functions that need to be split into fragments. Differential Revision: https://reviews.llvm.org/D56813 ------------------------------------------------------------------------ llvm-svn: 351452 --- llvm/lib/MC/MCWin64EH.cpp | 53 ++++++- llvm/test/CodeGen/AArch64/wineh4.mir | 6 +- llvm/test/CodeGen/AArch64/wineh8.mir | 225 +++++++++++++++++++++++++++ 3 files changed, 279 insertions(+), 5 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/wineh8.mir diff --git a/llvm/lib/MC/MCWin64EH.cpp b/llvm/lib/MC/MCWin64EH.cpp index 0724b109e1a1da..2e4eea3411f4bd 100644 --- a/llvm/lib/MC/MCWin64EH.cpp +++ b/llvm/lib/MC/MCWin64EH.cpp @@ -453,6 +453,38 @@ static void ARM64EmitUnwindCode(MCStreamer &streamer, const MCSymbol *begin, } } +// Returns the epilog symbol of an epilog with the exact same unwind code +// sequence, if it exists. Otherwise, returns nulltpr. +// EpilogInstrs - Unwind codes for the current epilog. +// Epilogs - Epilogs that potentialy match the current epilog. +static MCSymbol* +FindMatchingEpilog(const std::vector& EpilogInstrs, + const std::vector& Epilogs, + const WinEH::FrameInfo *info) { + for (auto *EpilogStart : Epilogs) { + auto InstrsIter = info->EpilogMap.find(EpilogStart); + assert(InstrsIter != info->EpilogMap.end() && + "Epilog not found in EpilogMap"); + const auto &Instrs = InstrsIter->second; + + if (Instrs.size() != EpilogInstrs.size()) + continue; + + bool Match = true; + for (unsigned i = 0; i < Instrs.size(); ++i) + if (Instrs[i].Operation != EpilogInstrs[i].Operation || + Instrs[i].Offset != EpilogInstrs[i].Offset || + Instrs[i].Register != EpilogInstrs[i].Register) { + Match = false; + break; + } + + if (Match) + return EpilogStart; + } + return nullptr; +} + // Populate the .xdata section. The format of .xdata on ARM64 is documented at // https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info) { @@ -477,12 +509,29 @@ static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info) { // Process epilogs. MapVector EpilogInfo; + // Epilogs processed so far. + std::vector AddedEpilogs; + for (auto &I : info->EpilogMap) { MCSymbol *EpilogStart = I.first; auto &EpilogInstrs = I.second; uint32_t CodeBytes = ARM64CountOfUnwindCodes(EpilogInstrs); - EpilogInfo[EpilogStart] = TotalCodeBytes; - TotalCodeBytes += CodeBytes; + + uint32_t NumUnwindCodes = EpilogInstrs.size(); + MCSymbol* MatchingEpilog = + FindMatchingEpilog(EpilogInstrs, AddedEpilogs, info); + if (MatchingEpilog) { + assert(EpilogInfo.find(MatchingEpilog) != EpilogInfo.end() && + "Duplicate epilog not found"); + EpilogInfo[EpilogStart] = EpilogInfo[MatchingEpilog]; + // Clear the unwind codes in the EpilogMap, so that they don't get output + // in the logic below. + EpilogInstrs.clear(); + } else { + EpilogInfo[EpilogStart] = TotalCodeBytes; + TotalCodeBytes += CodeBytes; + AddedEpilogs.push_back(EpilogStart); + } } // Code Words, Epilog count, E, X, Vers, Function Length diff --git a/llvm/test/CodeGen/AArch64/wineh4.mir b/llvm/test/CodeGen/AArch64/wineh4.mir index 4d4cc892c2e82f..3a324324431c7c 100644 --- a/llvm/test/CodeGen/AArch64/wineh4.mir +++ b/llvm/test/CodeGen/AArch64/wineh4.mir @@ -1,7 +1,7 @@ # RUN: llc -o - %s -mtriple=aarch64-windows -start-after=prologepilog \ # RUN: -disable-branch-fold -filetype=obj \ # RUN: | llvm-readobj -unwind | FileCheck %s -# Check that multiple epilgoues are correctly placed in .xdata. +# Check that identical multiple epilgoues are correctly shared in .xdata. # CHECK: ExceptionData { # CHECK-NEXT: FunctionLength: 164 @@ -9,7 +9,7 @@ # CHECK-NEXT: ExceptionData: No # CHECK-NEXT: EpiloguePacked: No # CHECK-NEXT: EpilogueScopes: 2 -# CHECK-NEXT: ByteCodeLength: 48 +# CHECK-NEXT: ByteCodeLength: 32 # CHECK-NEXT: Prologue [ # CHECK-NEXT: 0xc80c ; stp x19, x20, [sp, #96] # CHECK-NEXT: 0xc88a ; stp x21, x22, [sp, #80] @@ -37,7 +37,7 @@ # CHECK-NEXT: } # CHECK-NEXT: EpilogueScope { # CHECK-NEXT: StartOffset: 33 -# CHECK-NEXT: EpilogueStartIndex: 30 +# CHECK-NEXT: EpilogueStartIndex: 15 # CHECK-NEXT: Opcodes [ # CHECK-NEXT: 0xc80c ; ldp x19, x20, [sp, #96] # CHECK-NEXT: 0xc88a ; ldp x21, x22, [sp, #80] diff --git a/llvm/test/CodeGen/AArch64/wineh8.mir b/llvm/test/CodeGen/AArch64/wineh8.mir new file mode 100644 index 00000000000000..606bc140b232dc --- /dev/null +++ b/llvm/test/CodeGen/AArch64/wineh8.mir @@ -0,0 +1,225 @@ +# RUN: llc -o - %s -mtriple=aarch64-windows -start-after=prologepilog \ +# RUN: -disable-branch-fold -filetype=obj \ +# RUN: | llvm-readobj -unwind | FileCheck %s +# Check that non-identical multiple epilgoues are correctly shared in .xdata. + +# CHECK: ExceptionData { +# CHECK-NEXT: FunctionLength: 160 +# CHECK-NEXT: Version: 0 +# CHECK-NEXT: ExceptionData: No +# CHECK-NEXT: EpiloguePacked: No +# CHECK-NEXT: EpilogueScopes: 2 +# CHECK-NEXT: ByteCodeLength: 44 +# CHECK-NEXT: Prologue [ +# CHECK-NEXT: 0xc80c ; stp x19, x20, [sp, #96] +# CHECK-NEXT: 0xc88a ; stp x21, x22, [sp, #80] +# CHECK-NEXT: 0xc908 ; stp x23, x24, [sp, #64] +# CHECK-NEXT: 0xc986 ; stp x25, x26, [sp, #48] +# CHECK-NEXT: 0xca04 ; stp x27, x28, [sp, #32] +# CHECK-NEXT: 0xd802 ; stp d8, d9, [sp, #16] +# CHECK-NEXT: 0xda8d ; stp d10, d11, [sp, #-112]! +# CHECK-NEXT: 0xe4 ; end +# CHECK-NEXT: ] +# CHECK-NEXT: EpilogueScopes [ +# CHECK-NEXT: EpilogueScope { +# CHECK-NEXT: StartOffset: 16 +# CHECK-NEXT: EpilogueStartIndex: 15 +# CHECK-NEXT: Opcodes [ +# CHECK-NEXT: 0xc80c ; ldp x19, x20, [sp, #96] +# CHECK-NEXT: 0xc88a ; ldp x21, x22, [sp, #80] +# CHECK-NEXT: 0xc908 ; ldp x23, x24, [sp, #64] +# CHECK-NEXT: 0xc986 ; ldp x25, x26, [sp, #48] +# CHECK-NEXT: 0xd802 ; ldp d8, d9, [sp, #16] +# CHECK-NEXT: 0xda8d ; ldp d10, d11, [sp], #112 +# CHECK-NEXT: 0xe4 ; end +# CHECK-NEXT: ] +# CHECK-NEXT: } +# CHECK-NEXT: EpilogueScope { +# CHECK-NEXT: StartOffset: 32 +# CHECK-NEXT: EpilogueStartIndex: 28 +# CHECK-NEXT: Opcodes [ +# CHECK-NEXT: 0xc80c ; ldp x19, x20, [sp, #96] +# CHECK-NEXT: 0xc88a ; ldp x21, x22, [sp, #80] +# CHECK-NEXT: 0xc908 ; ldp x23, x24, [sp, #64] +# CHECK-NEXT: 0xc986 ; ldp x25, x26, [sp, #48] +# CHECK-NEXT: 0xca04 ; ldp x27, x28, [sp, #32] +# CHECK-NEXT: 0xd802 ; ldp d8, d9, [sp, #16] +# CHECK-NEXT: 0xda8d ; ldp d10, d11, [sp], #112 +# CHECK-NEXT: 0xe4 ; end +# CHECK-NEXT: ] +# CHECK-NEXT: } +# CHECK-NEXT: ] +# CHECK-NEXT: } +... +--- +name: test +alignment: 2 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: true +registers: +liveins: + - { reg: '$w0', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 112 + offsetAdjustment: 0 + maxAlignment: 8 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: true + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: +stack: + - { id: 0, name: '', type: spill-slot, offset: -8, size: 8, alignment: 8, + stack-id: 0, callee-saved-register: '$x19', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -16, size: 8, alignment: 8, + stack-id: 0, callee-saved-register: '$x20', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 2, name: '', type: spill-slot, offset: -24, size: 8, alignment: 8, + stack-id: 0, callee-saved-register: '$x21', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 3, name: '', type: spill-slot, offset: -32, size: 8, alignment: 8, + stack-id: 0, callee-saved-register: '$x22', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 4, name: '', type: spill-slot, offset: -40, size: 8, alignment: 8, + stack-id: 0, callee-saved-register: '$x23', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 5, name: '', type: spill-slot, offset: -48, size: 8, alignment: 8, + stack-id: 0, callee-saved-register: '$x24', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 6, name: '', type: spill-slot, offset: -56, size: 8, alignment: 8, + stack-id: 0, callee-saved-register: '$x25', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 7, name: '', type: spill-slot, offset: -64, size: 8, alignment: 8, + stack-id: 0, callee-saved-register: '$x26', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 8, name: '', type: spill-slot, offset: -72, size: 8, alignment: 8, + stack-id: 0, callee-saved-register: '$x27', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 9, name: '', type: spill-slot, offset: -80, size: 8, alignment: 8, + stack-id: 0, callee-saved-register: '$x28', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 10, name: '', type: spill-slot, offset: -88, size: 8, alignment: 8, + stack-id: 0, callee-saved-register: '$d8', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 11, name: '', type: spill-slot, offset: -96, size: 8, alignment: 8, + stack-id: 0, callee-saved-register: '$d9', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 12, name: '', type: spill-slot, offset: -104, size: 8, alignment: 8, + stack-id: 0, callee-saved-register: '$d10', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 13, name: '', type: spill-slot, offset: -112, size: 8, alignment: 8, + stack-id: 0, callee-saved-register: '$d11', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +constants: +body: | + bb.0.entry: + successors: %bb.2(0x40000000), %bb.1(0x40000000) + liveins: $x0, $x1, $d0, $d1, $d10, $d11, $d8, $d9, $x27, $x28, $x25, $x26, $x23, $x24, $x21, $x22, $x19, $x20 + + early-clobber $sp = frame-setup STPDpre killed $d10, killed $d11, $sp, -14 :: (store 8 into %stack.12), (store 8 into %stack.13) + frame-setup SEH_SaveFRegP_X 10, 11, -112 + frame-setup STPDi killed $d8, killed $d9, $sp, 2 :: (store 8 into %stack.10), (store 8 into %stack.11) + frame-setup SEH_SaveFRegP 8, 9, 16 + frame-setup STPXi killed $x27, killed $x28, $sp, 4 :: (store 8 into %stack.8), (store 8 into %stack.9) + frame-setup SEH_SaveRegP 27, 28, 32 + frame-setup STPXi killed $x25, killed $x26, $sp, 6 :: (store 8 into %stack.6), (store 8 into %stack.7) + frame-setup SEH_SaveRegP 25, 26, 48 + frame-setup STPXi killed $x23, killed $x24, $sp, 8 :: (store 8 into %stack.4), (store 8 into %stack.5) + frame-setup SEH_SaveRegP 23, 24, 64 + frame-setup STPXi killed $x21, killed $x22, $sp, 10 :: (store 8 into %stack.2), (store 8 into %stack.3) + frame-setup SEH_SaveRegP 21, 22, 80 + frame-setup STPXi killed $x19, killed $x20, $sp, 12 :: (store 8 into %stack.0), (store 8 into %stack.1) + frame-setup SEH_SaveRegP 19, 20, 96 + frame-setup SEH_PrologEnd + frame-setup CFI_INSTRUCTION def_cfa_offset 112 + frame-setup CFI_INSTRUCTION offset $w19, -8 + frame-setup CFI_INSTRUCTION offset $w20, -16 + frame-setup CFI_INSTRUCTION offset $w21, -24 + frame-setup CFI_INSTRUCTION offset $w22, -32 + frame-setup CFI_INSTRUCTION offset $w23, -40 + frame-setup CFI_INSTRUCTION offset $w24, -48 + frame-setup CFI_INSTRUCTION offset $w25, -56 + frame-setup CFI_INSTRUCTION offset $w26, -64 + frame-setup CFI_INSTRUCTION offset $w27, -72 + frame-setup CFI_INSTRUCTION offset $w28, -80 + frame-setup CFI_INSTRUCTION offset $b8, -88 + frame-setup CFI_INSTRUCTION offset $b9, -96 + frame-setup CFI_INSTRUCTION offset $b10, -104 + frame-setup CFI_INSTRUCTION offset $b11, -112 + $x19 = ADDXrr $x0, killed $x1 + $d8 = FADDDrr killed $d0, $d1 + $d9 = FADDDrr $d8, $d1 + $d10 = FADDDrr $d9, $d8 + $d11 = FADDDrr killed $d9, $d10 + $x20 = SUBSXrr $x19, killed $x0, implicit-def $nzcv + Bcc 1, %bb.2, implicit killed $nzcv + B %bb.1 + + bb.1: + liveins: $x19, $x20 + + $x21 = ADDXrr $x20, killed $x19 + $x22 = ADDXrr $x21, killed $x20 + $x23 = ADDXrr $x22, killed $x21 + $x24 = ADDXrr $x23, killed $x22 + $x25 = ADDXrr $x24, killed $x23 + $x26 = ADDXrr $x25, killed $x24 + $x27 = ADDXrr $x26, killed $x25 + $x28 = ADDXrr $x27, killed $x26 + $x0 = COPY $x28 + frame-destroy SEH_EpilogStart + $x19, $x20 = frame-destroy LDPXi $sp, 12 :: (load 8 from %stack.0), (load 8 from %stack.1) + frame-destroy SEH_SaveRegP 19, 20, 96 + $x21, $x22 = frame-destroy LDPXi $sp, 10 :: (load 8 from %stack.2), (load 8 from %stack.3) + frame-destroy SEH_SaveRegP 21, 22, 80 + $x23, $x24 = frame-destroy LDPXi $sp, 8 :: (load 8 from %stack.4), (load 8 from %stack.5) + frame-destroy SEH_SaveRegP 23, 24, 64 + $x25, $x26 = frame-destroy LDPXi $sp, 6 :: (load 8 from %stack.6), (load 8 from %stack.7) + frame-destroy SEH_SaveRegP 25, 26, 48 + $x27, $x28 = frame-destroy LDPXi $sp, 4 :: (load 8 from %stack.8), (load 8 from %stack.9) + frame-destroy SEH_SaveRegP 27, 28, 32 + $d8, $d9 = frame-destroy LDPDi $sp, 2 :: (load 8 from %stack.10), (load 8 from %stack.11) + frame-destroy SEH_SaveFRegP 8, 9, 16 + early-clobber $sp, $d10, $d11 = frame-destroy LDPDpost $sp, 14 :: (load 8 from %stack.12), (load 8 from %stack.13) + frame-destroy SEH_SaveFRegP_X 10, 11, -112 + frame-destroy SEH_EpilogEnd + RET_ReallyLR implicit $x0 + + bb.2: + liveins: $x28, $d11 + + $x0 = COPY $d11 + $x0 = ADDXrr $x0, killed $x28 + frame-destroy SEH_EpilogStart + $x19, $x20 = frame-destroy LDPXi $sp, 12 :: (load 8 from %stack.0), (load 8 from %stack.1) + frame-destroy SEH_SaveRegP 19, 20, 96 + $x21, $x22 = frame-destroy LDPXi $sp, 10 :: (load 8 from %stack.2), (load 8 from %stack.3) + frame-destroy SEH_SaveRegP 21, 22, 80 + $x23, $x24 = frame-destroy LDPXi $sp, 8 :: (load 8 from %stack.4), (load 8 from %stack.5) + frame-destroy SEH_SaveRegP 23, 24, 64 + $x25, $x26 = frame-destroy LDPXi $sp, 6 :: (load 8 from %stack.6), (load 8 from %stack.7) + frame-destroy SEH_SaveRegP 25, 26, 48 + $d8, $d9 = frame-destroy LDPDi $sp, 2 :: (load 8 from %stack.10), (load 8 from %stack.11) + frame-destroy SEH_SaveFRegP 8, 9, 16 + early-clobber $sp, $d10, $d11 = frame-destroy LDPDpost $sp, 14 :: (load 8 from %stack.12), (load 8 from %stack.13) + frame-destroy SEH_SaveFRegP_X 10, 11, -112 + frame-destroy SEH_EpilogEnd + RET_ReallyLR implicit $x0 + +... From 8593d6061d1cf8b9485be00f7c07e67b7c02773d Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Fri, 18 Jan 2019 08:59:50 +0000 Subject: [PATCH 015/125] Merging r351475: ------------------------------------------------------------------------ r351475 | rnk | 2019-01-17 21:46:53 +0100 (Thu, 17 Jan 2019) | 16 lines [InstCombine] Don't sink dynamic allocas Summary: InstCombine's sinking algorithm only thinks about memory. It doesn't think about non-memory constraints like stack object lifetime. It can sink dynamic allocas across a stacksave call, which may be used with stackrestore, which can incorrectly reduce the lifetime of the dynamic alloca. Fixes PR40365 Reviewers: hfinkel, efriedma Subscribers: hiraditya, llvm-commits Differential Revision: https://reviews.llvm.org/D56872 ------------------------------------------------------------------------ llvm-svn: 351530 --- .../InstCombine/InstructionCombining.cpp | 8 +-- .../Transforms/InstCombine/sink-alloca.ll | 52 +++++++++++++++++++ 2 files changed, 57 insertions(+), 3 deletions(-) create mode 100644 llvm/test/Transforms/InstCombine/sink-alloca.ll diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index be7d43bbcf2c32..f530ee1246e8c2 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -3065,9 +3065,11 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { I->isTerminator()) return false; - // Do not sink alloca instructions out of the entry block. - if (isa(I) && I->getParent() == - &DestBlock->getParent()->getEntryBlock()) + // Do not sink static or dynamic alloca instructions. Static allocas must + // remain in the entry block, and dynamic allocas must not be sunk in between + // a stacksave / stackrestore pair, which would incorrectly shorten its + // lifetime. + if (isa(I)) return false; // Do not sink into catchswitch blocks. diff --git a/llvm/test/Transforms/InstCombine/sink-alloca.ll b/llvm/test/Transforms/InstCombine/sink-alloca.ll new file mode 100644 index 00000000000000..f2de74ff533ba2 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/sink-alloca.ll @@ -0,0 +1,52 @@ +; RUN: opt -instcombine -S < %s | FileCheck %s + +target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" +target triple = "i686-unknown-linux-gnu" + +; Check that instcombine doesn't sink dynamic allocas across llvm.stacksave. + +; Helper to generate branch conditions. +declare i1 @cond() + +declare i32* @use_and_return(i32*) + +declare i8* @llvm.stacksave() #0 + +declare void @llvm.stackrestore(i8*) #0 + +define void @foo(i32 %x) { +entry: + %c1 = call i1 @cond() + br i1 %c1, label %ret, label %nonentry + +nonentry: ; preds = %entry + %argmem = alloca i32, i32 %x, align 4 + %sp = call i8* @llvm.stacksave() + %c2 = call i1 @cond() + br i1 %c2, label %ret, label %sinktarget + +sinktarget: ; preds = %nonentry + ; Arrange for there to be a single use of %argmem by returning it. + %p = call i32* @use_and_return(i32* nonnull %argmem) + store i32 13, i32* %p, align 4 + call void @llvm.stackrestore(i8* %sp) + %0 = call i32* @use_and_return(i32* %p) + br label %ret + +ret: ; preds = %sinktarget, %nonentry, %entry + ret void +} + +; CHECK-LABEL: define void @foo(i32 %x) +; CHECK: nonentry: +; CHECK: %argmem = alloca i32, i32 %x +; CHECK: %sp = call i8* @llvm.stacksave() +; CHECK: %c2 = call i1 @cond() +; CHECK: br i1 %c2, label %ret, label %sinktarget +; CHECK: sinktarget: +; CHECK: %p = call i32* @use_and_return(i32* nonnull %argmem) +; CHECK: store i32 13, i32* %p +; CHECK: call void @llvm.stackrestore(i8* %sp) +; CHECK: %0 = call i32* @use_and_return(i32* %p) + +attributes #0 = { nounwind } From 47149c4d12111e2db1072edf57ee669281b5089f Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Fri, 18 Jan 2019 09:02:40 +0000 Subject: [PATCH 016/125] Merging r351325: ------------------------------------------------------------------------ r351325 | gbuella | 2019-01-16 13:06:17 +0100 (Wed, 16 Jan 2019) | 26 lines Assertion in isAllocaPromotable due to extra bitcast goes into lifetime marker For the given test SROA detects possible replacement and creates a correct alloca. After that SROA is adding lifetime markers for this new alloca. The function getNewAllocaSlicePtr is trying to deduce the pointer type based on the original alloca, which is split, to use it later in lifetime intrinsic. For the test we ended up with such code (rA is initial alloca [10 x float], which is split, and rA.sroa.0.0 is a new split allocation) ``` %rA.sroa.0.0.rA.sroa_cast = bitcast i32* %rA.sroa.0 to [10 x float]* <----- this one causing the assertion and is an extra bitcast %5 = bitcast [10 x float]* %rA.sroa.0.0.rA.sroa_cast to i8* call void @llvm.lifetime.start.p0i8(i64 4, i8* %5) ``` isAllocaPromotable code assumes that a user of alloca may go into lifetime marker through bitcast but it must be the only one bitcast to i8* type. In the test it's not a i8* type, return false and throw the assertion. As we are creating a pointer, which will be used in lifetime markers only, the proposed fix is to create a bitcast to i8* immediately to avoid extra bitcast creation. The test is a greatly simplified to just reproduce the assertion. Author: Igor Tsimbalist Reviewers: chandlerc, craig.topper Reviewed By: chandlerc Differential Revision: https://reviews.llvm.org/D55934 ------------------------------------------------------------------------ llvm-svn: 351532 --- llvm/lib/Transforms/Scalar/SROA.cpp | 5 ++- llvm/test/Transforms/SROA/basictest.ll | 49 ++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index eab77cf4cda9b2..68ca6c47c8f1a4 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -3031,7 +3031,10 @@ class llvm::sroa::AllocaSliceRewriter ConstantInt *Size = ConstantInt::get(cast(II.getArgOperand(0)->getType()), NewEndOffset - NewBeginOffset); - Value *Ptr = getNewAllocaSlicePtr(IRB, OldPtr->getType()); + // Lifetime intrinsics always expect an i8* so directly get such a pointer + // for the new alloca slice. + Type *PointerTy = IRB.getInt8PtrTy(OldPtr->getType()->getPointerAddressSpace()); + Value *Ptr = getNewAllocaSlicePtr(IRB, PointerTy); Value *New; if (II.getIntrinsicID() == Intrinsic::lifetime_start) New = IRB.CreateLifetimeStart(Ptr, Size); diff --git a/llvm/test/Transforms/SROA/basictest.ll b/llvm/test/Transforms/SROA/basictest.ll index a72da6399f22c1..2c5829d6fcecbe 100644 --- a/llvm/test/Transforms/SROA/basictest.ll +++ b/llvm/test/Transforms/SROA/basictest.ll @@ -1745,6 +1745,55 @@ entry: ret void } +declare void @llvm.lifetime.start.isVoid.i64.p0i8(i64, [10 x float]* nocapture) +declare void @llvm.lifetime.end.isVoid.i64.p0i8(i64, [10 x float]* nocapture) +@array = dso_local global [10 x float] undef, align 4 + +define void @test29(i32 %num, i32 %tid) { +; CHECK-LABEL: @test29( +; CHECK-NOT: alloca [10 x float] +; CHECK: ret void + +entry: + %ra = alloca [10 x float], align 4 + call void @llvm.lifetime.start.isVoid.i64.p0i8(i64 40, [10 x float]* nonnull %ra) + + %cmp1 = icmp sgt i32 %num, 0 + br i1 %cmp1, label %bb1, label %bb7 + +bb1: + %tobool = icmp eq i32 %tid, 0 + %conv.i = zext i32 %tid to i64 + %0 = bitcast [10 x float]* %ra to i32* + %1 = load i32, i32* %0, align 4 + %arrayidx5 = getelementptr inbounds [10 x float], [10 x float]* @array, i64 0, i64 %conv.i + %2 = bitcast float* %arrayidx5 to i32* + br label %bb2 + +bb2: + %i.02 = phi i32 [ %num, %bb1 ], [ %sub, %bb5 ] + br i1 %tobool, label %bb3, label %bb4 + +bb3: + br label %bb5 + +bb4: + store i32 %1, i32* %2, align 4 + br label %bb5 + +bb5: + %sub = add i32 %i.02, -1 + %cmp = icmp sgt i32 %sub, 0 + br i1 %cmp, label %bb2, label %bb6 + +bb6: + br label %bb7 + +bb7: + call void @llvm.lifetime.end.isVoid.i64.p0i8(i64 40, [10 x float]* nonnull %ra) + ret void +} + !0 = !{!1, !1, i64 0, i64 1} !1 = !{!2, i64 1, !"type_0"} !2 = !{!"root"} From c4fa34c39aa7c083b598bb3a0551b74b3943789f Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Fri, 18 Jan 2019 09:32:52 +0000 Subject: [PATCH 017/125] Merging r351457: ------------------------------------------------------------------------ r351457 | vlad.tsyrklevich | 2019-01-17 18:53:45 +0100 (Thu, 17 Jan 2019) | 15 lines TLS: Respect visibility for thread_local variables on Darwin (PR40327) Summary: Teach clang to mark thread wrappers for thread_local variables with hidden visibility when the original variable is marked with hidden visibility. This is necessary on Darwin which exposes the thread wrapper instead of the thread variable. The thread wrapper would previously always be created with default visibility unless it had linkonce*/weak_odr linkage. Reviewers: rjmccall Reviewed By: rjmccall Differential Revision: https://reviews.llvm.org/D56818 ------------------------------------------------------------------------ llvm-svn: 351533 --- clang/lib/CodeGen/ItaniumCXXABI.cpp | 10 ++++++---- .../cxx11-thread-local-visibility.cpp | 17 +++++++++++++++++ clang/test/CodeGenCXX/cxx11-thread-local.cpp | 2 +- 3 files changed, 24 insertions(+), 5 deletions(-) create mode 100644 clang/test/CodeGenCXX/cxx11-thread-local-visibility.cpp diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp index b53304528c3d82..c56875a0368099 100644 --- a/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -2463,10 +2463,12 @@ ItaniumCXXABI::getOrCreateThreadLocalWrapper(const VarDecl *VD, CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Wrapper); // Always resolve references to the wrapper at link time. - if (!Wrapper->hasLocalLinkage() && !(isThreadWrapperReplaceable(VD, CGM) && - !llvm::GlobalVariable::isLinkOnceLinkage(Wrapper->getLinkage()) && - !llvm::GlobalVariable::isWeakODRLinkage(Wrapper->getLinkage()))) - Wrapper->setVisibility(llvm::GlobalValue::HiddenVisibility); + if (!Wrapper->hasLocalLinkage()) + if (!isThreadWrapperReplaceable(VD, CGM) || + llvm::GlobalVariable::isLinkOnceLinkage(Wrapper->getLinkage()) || + llvm::GlobalVariable::isWeakODRLinkage(Wrapper->getLinkage()) || + VD->getVisibility() == HiddenVisibility) + Wrapper->setVisibility(llvm::GlobalValue::HiddenVisibility); if (isThreadWrapperReplaceable(VD, CGM)) { Wrapper->setCallingConv(llvm::CallingConv::CXX_FAST_TLS); diff --git a/clang/test/CodeGenCXX/cxx11-thread-local-visibility.cpp b/clang/test/CodeGenCXX/cxx11-thread-local-visibility.cpp new file mode 100644 index 00000000000000..b46d41d7c96049 --- /dev/null +++ b/clang/test/CodeGenCXX/cxx11-thread-local-visibility.cpp @@ -0,0 +1,17 @@ +// RUN: %clang_cc1 -std=c++11 -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck --check-prefix=LINUX %s +// RUN: %clang_cc1 -std=c++11 -emit-llvm %s -o - -triple x86_64-apple-darwin12 | FileCheck --check-prefix=DARWIN %s + +// Regression test for PR40327 + +// LINUX: @default_tls = thread_local global i32 +// LINUX: @hidden_tls = hidden thread_local global i32 +// LINUX: define weak_odr hidden i32* @_ZTW11default_tls() +// LINUX: define weak_odr hidden i32* @_ZTW10hidden_tls() +// +// DARWIN: @default_tls = internal thread_local global i32 +// DARWIN: @hidden_tls = internal thread_local global i32 +// DARWIN: define cxx_fast_tlscc i32* @_ZTW11default_tls() +// DARWIN: define hidden cxx_fast_tlscc i32* @_ZTW10hidden_tls() + +__attribute__((visibility("default"))) thread_local int default_tls; +__attribute__((visibility("hidden"))) thread_local int hidden_tls; diff --git a/clang/test/CodeGenCXX/cxx11-thread-local.cpp b/clang/test/CodeGenCXX/cxx11-thread-local.cpp index 156c4f591908b1..de941af1afb87a 100644 --- a/clang/test/CodeGenCXX/cxx11-thread-local.cpp +++ b/clang/test/CodeGenCXX/cxx11-thread-local.cpp @@ -318,7 +318,7 @@ void set_anon_i() { // CHECK-NOT: call void @[[V_M_INIT]]() -// LIUNX: define weak_odr hidden i32* @_ZTW1a() { +// LINUX: define weak_odr hidden i32* @_ZTW1a() // DARWIN: define cxx_fast_tlscc i32* @_ZTW1a() // LINUX: call void @_ZTH1a() // DARWIN: call cxx_fast_tlscc void @_ZTH1a() From ad1624ff4fe7303d63dadf61f8ea8734c931a407 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Fri, 18 Jan 2019 09:52:52 +0000 Subject: [PATCH 018/125] Merging r351459: ------------------------------------------------------------------------ r351459 | arphaman | 2019-01-17 19:12:45 +0100 (Thu, 17 Jan 2019) | 13 lines [ObjC] Follow-up r350768 and allow the use of unavailable methods that are declared in a parent class from within the @implementation context This commit extends r350768 and allows the use of methods marked as unavailable that are declared in a parent class/category from within the @implementation of the class where the method is marked as unavailable. This allows users to call init that's marked as unavailable even if they don't define it. rdar://47134898 Differential Revision: https://reviews.llvm.org/D56816 ------------------------------------------------------------------------ llvm-svn: 351535 --- clang/lib/Sema/SemaDeclAttr.cpp | 8 +++---- .../SemaObjC/call-unavailable-init-in-self.m | 22 +++++++++++++++++-- .../SemaObjC/infer-availability-from-init.m | 4 ++-- 3 files changed, 25 insertions(+), 9 deletions(-) diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index eb95cc748b177a..139ac8aab4333c 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -7365,13 +7365,11 @@ ShouldDiagnoseAvailabilityInContext(Sema &S, AvailabilityResult K, return true; } else if (K == AR_Unavailable) { // It is perfectly fine to refer to an 'unavailable' Objective-C method - // when it's actually defined and is referenced from within the - // @implementation itself. In this context, we interpret unavailable as a - // form of access control. + // when it is referenced from within the @implementation itself. In this + // context, we interpret unavailable as a form of access control. if (const auto *MD = dyn_cast(OffendingDecl)) { if (const auto *Impl = dyn_cast(C)) { - if (MD->getClassInterface() == Impl->getClassInterface() && - MD->isDefined()) + if (MD->getClassInterface() == Impl->getClassInterface()) return true; } } diff --git a/clang/test/SemaObjC/call-unavailable-init-in-self.m b/clang/test/SemaObjC/call-unavailable-init-in-self.m index fa6f670cc997f4..48fc2326af457e 100644 --- a/clang/test/SemaObjC/call-unavailable-init-in-self.m +++ b/clang/test/SemaObjC/call-unavailable-init-in-self.m @@ -5,13 +5,24 @@ @interface NSObject + (instancetype)new; + (instancetype)alloc; +- (void)declaredInSuper; + +@end + +@interface NSObject (Category) + +- (void)declaredInSuperCategory; + @end @interface Sub: NSObject - (instancetype)init __attribute__((unavailable)); // expected-note 4 {{'init' has been explicitly marked unavailable here}} -- (void)notImplemented __attribute__((unavailable)); // expected-note {{'notImplemented' has been explicitly marked unavailable here}} +- (void)notImplemented __attribute__((unavailable)); + +- (void)declaredInSuper __attribute__((unavailable)); +- (void)declaredInSuperCategory __attribute__((unavailable)); @end @@ -34,7 +45,14 @@ - (instancetype) init { } - (void)reportUseOfUnimplemented { - [self notImplemented]; // expected-error {{'notImplemented' is unavailable}} + [self notImplemented]; +} + +- (void)allowSuperCallUsingSelf { + [self declaredInSuper]; + [[Sub alloc] declaredInSuper]; + [self declaredInSuperCategory]; + [[Sub alloc] declaredInSuperCategory]; } @end diff --git a/clang/test/SemaObjC/infer-availability-from-init.m b/clang/test/SemaObjC/infer-availability-from-init.m index f9996ec70877eb..7aa1e53c09109e 100644 --- a/clang/test/SemaObjC/infer-availability-from-init.m +++ b/clang/test/SemaObjC/infer-availability-from-init.m @@ -47,12 +47,12 @@ void usenotmyobject() { } @interface FromSelf : NSObject --(instancetype)init __attribute__((unavailable)); // expected-note {{'init' has been explicitly marked unavailable here}} +-(instancetype)init __attribute__((unavailable)); +(FromSelf*)another_one; @end @implementation FromSelf +(FromSelf*)another_one { - [self new]; // expected-error{{'new' is unavailable}} + [self new]; } @end From d4d3f779635f46bde171f9c6573e484c56201138 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Fri, 18 Jan 2019 09:57:06 +0000 Subject: [PATCH 019/125] Merging r351504: ------------------------------------------------------------------------ r351504 | brad | 2019-01-18 02:36:58 +0100 (Fri, 18 Jan 2019) | 2 lines Use llvm::VersionTuple instead of manual version marshalling ------------------------------------------------------------------------ llvm-svn: 351536 --- lldb/include/lldb/Host/openbsd/HostInfoOpenBSD.h | 3 ++- lldb/source/Host/openbsd/HostInfoOpenBSD.cpp | 11 ++++++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/lldb/include/lldb/Host/openbsd/HostInfoOpenBSD.h b/lldb/include/lldb/Host/openbsd/HostInfoOpenBSD.h index 5a0388ffdd9770..809a6f4461f51c 100644 --- a/lldb/include/lldb/Host/openbsd/HostInfoOpenBSD.h +++ b/lldb/include/lldb/Host/openbsd/HostInfoOpenBSD.h @@ -12,12 +12,13 @@ #include "lldb/Host/posix/HostInfoPosix.h" #include "lldb/Utility/FileSpec.h" +#include "llvm/Support/VersionTuple.h" namespace lldb_private { class HostInfoOpenBSD : public HostInfoPosix { public: - static bool GetOSVersion(uint32_t &major, uint32_t &minor, uint32_t &update); + static llvm::VersionTuple GetOSVersion(); static bool GetOSBuildString(std::string &s); static bool GetOSKernelDescription(std::string &s); static FileSpec GetProgramFileSpec(); diff --git a/lldb/source/Host/openbsd/HostInfoOpenBSD.cpp b/lldb/source/Host/openbsd/HostInfoOpenBSD.cpp index 548958899322c5..cf7acb79da00ed 100644 --- a/lldb/source/Host/openbsd/HostInfoOpenBSD.cpp +++ b/lldb/source/Host/openbsd/HostInfoOpenBSD.cpp @@ -17,16 +17,17 @@ using namespace lldb_private; -bool HostInfoOpenBSD::GetOSVersion(uint32_t &major, uint32_t &minor, - uint32_t &update) { +llvm::VersionTuple HostInfoOpenBSD::GetOSVersion() { struct utsname un; ::memset(&un, 0, sizeof(utsname)); if (uname(&un) < 0) - return false; + return llvm::VersionTuple(); - int status = sscanf(un.release, "%u.%u", &major, &minor); - return status == 2; + unsigned major, minor; + if (2 == sscanf(un.release, "%u.%u", &major, &minor)) + return llvm::VersionTuple(major, minor); + return llvm::VersionTuple(); } bool HostInfoOpenBSD::GetOSBuildString(std::string &s) { From 6e751e810a270593f3f2d2fd8767aaf73237d034 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Fri, 18 Jan 2019 10:00:51 +0000 Subject: [PATCH 020/125] Merging r351463, r351466, r351467, and r351468 ------------------------------------------------------------------------ r351463 | eugenezelenko | 2019-01-17 19:31:34 +0100 (Thu, 17 Jan 2019) | 6 lines [Documentation] Add a chapter about Clang-tidy integrations. Patch by Marina Kalashina. Differential Revision: https://reviews.llvm.org/D54945 ------------------------------------------------------------------------ ------------------------------------------------------------------------ r351466 | eugenezelenko | 2019-01-17 20:35:39 +0100 (Thu, 17 Jan 2019) | 2 lines [Documentation] Fix link in docs/clang-tidy/Contributing.rst. ------------------------------------------------------------------------ ------------------------------------------------------------------------ r351467 | eugenezelenko | 2019-01-17 20:47:44 +0100 (Thu, 17 Jan 2019) | 2 lines [Documentation] Another attempt to fix link in docs/clang-tidy/Contributing.rst. Use HTTPS for links. ------------------------------------------------------------------------ ------------------------------------------------------------------------ r351468 | eugenezelenko | 2019-01-17 21:00:23 +0100 (Thu, 17 Jan 2019) | 2 lines [Documentation] Fix another link in docs/clang-tidy/Contributing.rst. ------------------------------------------------------------------------ llvm-svn: 351538 --- .../docs/clang-tidy/Contributing.rst | 512 ++++++++++++++++++ .../docs/clang-tidy/Integrations.rst | 117 ++++ clang-tools-extra/docs/clang-tidy/index.rst | 510 +---------------- 3 files changed, 631 insertions(+), 508 deletions(-) create mode 100644 clang-tools-extra/docs/clang-tidy/Contributing.rst create mode 100644 clang-tools-extra/docs/clang-tidy/Integrations.rst diff --git a/clang-tools-extra/docs/clang-tidy/Contributing.rst b/clang-tools-extra/docs/clang-tidy/Contributing.rst new file mode 100644 index 00000000000000..6d61809ecb337a --- /dev/null +++ b/clang-tools-extra/docs/clang-tidy/Contributing.rst @@ -0,0 +1,512 @@ +================ +Getting Involved +================ + +:program:`clang-tidy` has several own checks and can run Clang static analyzer +checks, but its power is in the ability to easily write custom checks. + +Checks are organized in modules, which can be linked into :program:`clang-tidy` +with minimal or no code changes in :program:`clang-tidy`. + +Checks can plug into the analysis on the preprocessor level using `PPCallbacks`_ +or on the AST level using `AST Matchers`_. When an error is found, checks can +report them in a way similar to how Clang diagnostics work. A fix-it hint can be +attached to a diagnostic message. + +The interface provided by :program:`clang-tidy` makes it easy to write useful +and precise checks in just a few lines of code. If you have an idea for a good +check, the rest of this document explains how to do this. + +There are a few tools particularly useful when developing clang-tidy checks: + * ``add_new_check.py`` is a script to automate the process of adding a new + check, it will create the check, update the CMake file and create a test; + * ``rename_check.py`` does what the script name suggests, renames an existing + check; + * :program:`clang-query` is invaluable for interactive prototyping of AST + matchers and exploration of the Clang AST; + * `clang-check`_ with the ``-ast-dump`` (and optionally ``-ast-dump-filter``) + provides a convenient way to dump AST of a C++ program. + +If CMake is configured with ``CLANG_ENABLE_STATIC_ANALYZER``, +:program:`clang-tidy` will not be built with support for the +``clang-analyzer-*`` checks or the ``mpi-*`` checks. + + +.. _AST Matchers: https://clang.llvm.org/docs/LibASTMatchers.html +.. _PPCallbacks: https://clang.llvm.org/doxygen/classclang_1_1PPCallbacks.html +.. _clang-check: https://clang.llvm.org/docs/ClangCheck.html + + +Choosing the Right Place for your Check +--------------------------------------- + +If you have an idea of a check, you should decide whether it should be +implemented as a: + ++ *Clang diagnostic*: if the check is generic enough, targets code patterns that + most probably are bugs (rather than style or readability issues), can be + implemented effectively and with extremely low false positive rate, it may + make a good Clang diagnostic. + ++ *Clang static analyzer check*: if the check requires some sort of control flow + analysis, it should probably be implemented as a static analyzer check. + ++ *clang-tidy check* is a good choice for linter-style checks, checks that are + related to a certain coding style, checks that address code readability, etc. + + +Preparing your Workspace +------------------------ + +If you are new to LLVM development, you should read the `Getting Started with +the LLVM System`_, `Using Clang Tools`_ and `How To Setup Clang Tooling For +LLVM`_ documents to check out and build LLVM, Clang and Clang Extra Tools with +CMake. + +Once you are done, change to the ``llvm/tools/clang/tools/extra`` directory, and +let's start! + +.. _Getting Started with the LLVM System: https://llvm.org/docs/GettingStarted.html +.. _Using Clang Tools: https://clang.llvm.org/docs/ClangTools.html +.. _How To Setup Clang Tooling For LLVM: https://clang.llvm.org/docs/HowToSetupToolingForLLVM.html + + +The Directory Structure +----------------------- + +:program:`clang-tidy` source code resides in the +``llvm/tools/clang/tools/extra`` directory and is structured as follows: + +:: + + clang-tidy/ # Clang-tidy core. + |-- ClangTidy.h # Interfaces for users and checks. + |-- ClangTidyModule.h # Interface for clang-tidy modules. + |-- ClangTidyModuleRegistry.h # Interface for registering of modules. + ... + |-- google/ # Google clang-tidy module. + |-+ + |-- GoogleTidyModule.cpp + |-- GoogleTidyModule.h + ... + |-- llvm/ # LLVM clang-tidy module. + |-+ + |-- LLVMTidyModule.cpp + |-- LLVMTidyModule.h + ... + |-- objc/ # Objective-C clang-tidy module. + |-+ + |-- ObjCTidyModule.cpp + |-- ObjCTidyModule.h + ... + |-- tool/ # Sources of the clang-tidy binary. + ... + test/clang-tidy/ # Integration tests. + ... + unittests/clang-tidy/ # Unit tests. + |-- ClangTidyTest.h + |-- GoogleModuleTest.cpp + |-- LLVMModuleTest.cpp + |-- ObjCModuleTest.cpp + ... + + +Writing a clang-tidy Check +-------------------------- + +So you have an idea of a useful check for :program:`clang-tidy`. + +First, if you're not familiar with LLVM development, read through the `Getting +Started with LLVM`_ document for instructions on setting up your workflow and +the `LLVM Coding Standards`_ document to familiarize yourself with the coding +style used in the project. For code reviews we mostly use `LLVM Phabricator`_. + +.. _Getting Started with LLVM: https://llvm.org/docs/GettingStarted.html +.. _LLVM Coding Standards: https://llvm.org/docs/CodingStandards.html +.. _LLVM Phabricator: https://llvm.org/docs/Phabricator.html + +Next, you need to decide which module the check belongs to. Modules +are located in subdirectories of `clang-tidy/ +`_ +and contain checks targeting a certain aspect of code quality (performance, +readability, etc.), certain coding style or standard (Google, LLVM, CERT, etc.) +or a widely used API (e.g. MPI). Their names are same as user-facing check +groups names described :ref:`above `. + +After choosing the module and the name for the check, run the +``clang-tidy/add_new_check.py`` script to create the skeleton of the check and +plug it to :program:`clang-tidy`. It's the recommended way of adding new checks. + +If we want to create a `readability-awesome-function-names`, we would run: + +.. code-block:: console + + $ clang-tidy/add_new_check.py readability awesome-function-names + + +The ``add_new_check.py`` script will: + * create the class for your check inside the specified module's directory and + register it in the module and in the build system; + * create a lit test file in the ``test/clang-tidy/`` directory; + * create a documentation file and include it into the + ``docs/clang-tidy/checks/list.rst``. + +Let's see in more detail at the check class definition: + +.. code-block:: c++ + + ... + + #include "../ClangTidy.h" + + namespace clang { + namespace tidy { + namespace readability { + + ... + class AwesomeFunctionNamesCheck : public ClangTidyCheck { + public: + AwesomeFunctionNamesCheck(StringRef Name, ClangTidyContext *Context) + : ClangTidyCheck(Name, Context) {} + void registerMatchers(ast_matchers::MatchFinder *Finder) override; + void check(const ast_matchers::MatchFinder::MatchResult &Result) override; + }; + + } // namespace readability + } // namespace tidy + } // namespace clang + + ... + +Constructor of the check receives the ``Name`` and ``Context`` parameters, and +must forward them to the ``ClangTidyCheck`` constructor. + +In our case the check needs to operate on the AST level and it overrides the +``registerMatchers`` and ``check`` methods. If we wanted to analyze code on the +preprocessor level, we'd need instead to override the ``registerPPCallbacks`` +method. + +In the ``registerMatchers`` method we create an AST Matcher (see `AST Matchers`_ +for more information) that will find the pattern in the AST that we want to +inspect. The results of the matching are passed to the ``check`` method, which +can further inspect them and report diagnostics. + +.. code-block:: c++ + + using namespace ast_matchers; + + void AwesomeFunctionNamesCheck::registerMatchers(MatchFinder *Finder) { + Finder->addMatcher(functionDecl().bind("x"), this); + } + + void AwesomeFunctionNamesCheck::check(const MatchFinder::MatchResult &Result) { + const auto *MatchedDecl = Result.Nodes.getNodeAs("x"); + if (MatchedDecl->getName().startswith("awesome_")) + return; + diag(MatchedDecl->getLocation(), "function %0 is insufficiently awesome") + << MatchedDecl + << FixItHint::CreateInsertion(MatchedDecl->getLocation(), "awesome_"); + } + +(If you want to see an example of a useful check, look at +`clang-tidy/google/ExplicitConstructorCheck.h +`_ +and `clang-tidy/google/ExplicitConstructorCheck.cpp +`_). + + +Registering your Check +---------------------- + +(The ``add_new_check.py`` takes care of registering the check in an existing +module. If you want to create a new module or know the details, read on.) + +The check should be registered in the corresponding module with a distinct name: + +.. code-block:: c++ + + class MyModule : public ClangTidyModule { + public: + void addCheckFactories(ClangTidyCheckFactories &CheckFactories) override { + CheckFactories.registerCheck( + "my-explicit-constructor"); + } + }; + +Now we need to register the module in the ``ClangTidyModuleRegistry`` using a +statically initialized variable: + +.. code-block:: c++ + + static ClangTidyModuleRegistry::Add X("my-module", + "Adds my lint checks."); + + +When using LLVM build system, we need to use the following hack to ensure the +module is linked into the :program:`clang-tidy` binary: + +Add this near the ``ClangTidyModuleRegistry::Add`` variable: + +.. code-block:: c++ + + // This anchor is used to force the linker to link in the generated object file + // and thus register the MyModule. + volatile int MyModuleAnchorSource = 0; + +And this to the main translation unit of the :program:`clang-tidy` binary (or +the binary you link the ``clang-tidy`` library in) +``clang-tidy/tool/ClangTidyMain.cpp``: + +.. code-block:: c++ + + // This anchor is used to force the linker to link the MyModule. + extern volatile int MyModuleAnchorSource; + static int MyModuleAnchorDestination = MyModuleAnchorSource; + + +Configuring Checks +------------------ + +If a check needs configuration options, it can access check-specific options +using the ``Options.get("SomeOption", DefaultValue)`` call in the check +constructor. In this case the check should also override the +``ClangTidyCheck::storeOptions`` method to make the options provided by the +check discoverable. This method lets :program:`clang-tidy` know which options +the check implements and what the current values are (e.g. for the +``-dump-config`` command line option). + +.. code-block:: c++ + + class MyCheck : public ClangTidyCheck { + const unsigned SomeOption1; + const std::string SomeOption2; + + public: + MyCheck(StringRef Name, ClangTidyContext *Context) + : ClangTidyCheck(Name, Context), + SomeOption(Options.get("SomeOption1", -1U)), + SomeOption(Options.get("SomeOption2", "some default")) {} + + void storeOptions(ClangTidyOptions::OptionMap &Opts) override { + Options.store(Opts, "SomeOption1", SomeOption1); + Options.store(Opts, "SomeOption2", SomeOption2); + } + ... + +Assuming the check is registered with the name "my-check", the option can then +be set in a ``.clang-tidy`` file in the following way: + +.. code-block:: yaml + + CheckOptions: + - key: my-check.SomeOption1 + value: 123 + - key: my-check.SomeOption2 + value: 'some other value' + +If you need to specify check options on a command line, you can use the inline +YAML format: + +.. code-block:: console + + $ clang-tidy -config="{CheckOptions: [{key: a, value: b}, {key: x, value: y}]}" ... + + +Testing Checks +-------------- + +To run tests for :program:`clang-tidy` use the command: + +.. code-block:: console + + $ ninja check-clang-tools + +:program:`clang-tidy` checks can be tested using either unit tests or +`lit`_ tests. Unit tests may be more convenient to test complex replacements +with strict checks. `Lit`_ tests allow using partial text matching and regular +expressions which makes them more suitable for writing compact tests for +diagnostic messages. + +The ``check_clang_tidy.py`` script provides an easy way to test both +diagnostic messages and fix-its. It filters out ``CHECK`` lines from the test +file, runs :program:`clang-tidy` and verifies messages and fixes with two +separate `FileCheck`_ invocations: once with FileCheck's directive +prefix set to ``CHECK-MESSAGES``, validating the diagnostic messages, +and once with the directive prefix set to ``CHECK-FIXES``, running +against the fixed code (i.e., the code after generated fix-its are +applied). In particular, ``CHECK-FIXES:`` can be used to check +that code was not modified by fix-its, by checking that it is present +unchanged in the fixed code. The full set of `FileCheck`_ directives +is available (e.g., ``CHECK-MESSAGES-SAME:``, ``CHECK-MESSAGES-NOT:``), though +typically the basic ``CHECK`` forms (``CHECK-MESSAGES`` and ``CHECK-FIXES``) +are sufficient for clang-tidy tests. Note that the `FileCheck`_ +documentation mostly assumes the default prefix (``CHECK``), and hence +describes the directive as ``CHECK:``, ``CHECK-SAME:``, ``CHECK-NOT:``, etc. +Replace ``CHECK`` by either ``CHECK-FIXES`` or ``CHECK-MESSAGES`` for +clang-tidy tests. + +An additional check enabled by ``check_clang_tidy.py`` ensures that +if `CHECK-MESSAGES:` is used in a file then every warning or error +must have an associated CHECK in that file. Or, you can use ``CHECK-NOTES:`` +instead, if you want to **also** ensure that all the notes are checked. + +To use the ``check_clang_tidy.py`` script, put a .cpp file with the +appropriate ``RUN`` line in the ``test/clang-tidy`` directory. Use +``CHECK-MESSAGES:`` and ``CHECK-FIXES:`` lines to write checks against +diagnostic messages and fixed code. + +It's advised to make the checks as specific as possible to avoid checks matching +to incorrect parts of the input. Use ``[[@LINE+X]]``/``[[@LINE-X]]`` +substitutions and distinct function and variable names in the test code. + +Here's an example of a test using the ``check_clang_tidy.py`` script (the full +source code is at `test/clang-tidy/google-readability-casting.cpp`_): + +.. code-block:: c++ + + // RUN: %check_clang_tidy %s google-readability-casting %t + + void f(int a) { + int b = (int)a; + // CHECK-MESSAGES: :[[@LINE-1]]:11: warning: redundant cast to the same type [google-readability-casting] + // CHECK-FIXES: int b = a; + } + +To check more than one scenario in the same test file use +``-check-suffix=SUFFIX-NAME`` on ``check_clang_tidy.py`` command line or +``-check-suffixes=SUFFIX-NAME-1,SUFFIX-NAME-2,...``. +With ``-check-suffix[es]=SUFFIX-NAME`` you need to replace your ``CHECK-*`` +directives with ``CHECK-MESSAGES-SUFFIX-NAME`` and ``CHECK-FIXES-SUFFIX-NAME``. + +Here's an example: + +.. code-block:: c++ + + // RUN: %check_clang_tidy -check-suffix=USING-A %s misc-unused-using-decls %t -- -- -DUSING_A + // RUN: %check_clang_tidy -check-suffix=USING-B %s misc-unused-using-decls %t -- -- -DUSING_B + // RUN: %check_clang_tidy %s misc-unused-using-decls %t + ... + // CHECK-MESSAGES-USING-A: :[[@LINE-8]]:10: warning: using decl 'A' {{.*}} + // CHECK-MESSAGES-USING-B: :[[@LINE-7]]:10: warning: using decl 'B' {{.*}} + // CHECK-MESSAGES: :[[@LINE-6]]:10: warning: using decl 'C' {{.*}} + // CHECK-FIXES-USING-A-NOT: using a::A;$ + // CHECK-FIXES-USING-B-NOT: using a::B;$ + // CHECK-FIXES-NOT: using a::C;$ + + +There are many dark corners in the C++ language, and it may be difficult to make +your check work perfectly in all cases, especially if it issues fix-it hints. The +most frequent pitfalls are macros and templates: + +1. code written in a macro body/template definition may have a different meaning + depending on the macro expansion/template instantiation; +2. multiple macro expansions/template instantiations may result in the same code + being inspected by the check multiple times (possibly, with different + meanings, see 1), and the same warning (or a slightly different one) may be + issued by the check multiple times; :program:`clang-tidy` will deduplicate + _identical_ warnings, but if the warnings are slightly different, all of them + will be shown to the user (and used for applying fixes, if any); +3. making replacements to a macro body/template definition may be fine for some + macro expansions/template instantiations, but easily break some other + expansions/instantiations. + +.. _lit: https://llvm.org/docs/CommandGuide/lit.html +.. _FileCheck: https://llvm.org/docs/CommandGuide/FileCheck.html +.. _test/clang-tidy/google-readability-casting.cpp: https://reviews.llvm.org/diffusion/L/browse/clang-tools-extra/trunk/test/clang-tidy/google-readability-casting.cpp + + +Running clang-tidy on LLVM +-------------------------- + +To test a check it's best to try it out on a larger code base. LLVM and Clang +are the natural targets as you already have the source code around. The most +convenient way to run :program:`clang-tidy` is with a compile command database; +CMake can automatically generate one, for a description of how to enable it see +`How To Setup Clang Tooling For LLVM`_. Once ``compile_commands.json`` is in +place and a working version of :program:`clang-tidy` is in ``PATH`` the entire +code base can be analyzed with ``clang-tidy/tool/run-clang-tidy.py``. The script +executes :program:`clang-tidy` with the default set of checks on every +translation unit in the compile command database and displays the resulting +warnings and errors. The script provides multiple configuration flags. + +.. _How To Setup Clang Tooling For LLVM: https://clang.llvm.org/docs/HowToSetupToolingForLLVM.html + + +* The default set of checks can be overridden using the ``-checks`` argument, + taking the identical format as :program:`clang-tidy` does. For example + ``-checks=-*,modernize-use-override`` will run the ``modernize-use-override`` + check only. + +* To restrict the files examined you can provide one or more regex arguments + that the file names are matched against. + ``run-clang-tidy.py clang-tidy/.*Check\.cpp`` will only analyze clang-tidy + checks. It may also be necessary to restrict the header files warnings are + displayed from using the ``-header-filter`` flag. It has the same behavior + as the corresponding :program:`clang-tidy` flag. + +* To apply suggested fixes ``-fix`` can be passed as an argument. This gathers + all changes in a temporary directory and applies them. Passing ``-format`` + will run clang-format over changed lines. + + +On checks profiling +------------------- + +:program:`clang-tidy` can collect per-check profiling info, and output it +for each processed source file (translation unit). + +To enable profiling info collection, use the ``-enable-check-profile`` argument. +The timings will be output to ``stderr`` as a table. Example output: + +.. code-block:: console + + $ clang-tidy -enable-check-profile -checks=-*,readability-function-size source.cpp + ===-------------------------------------------------------------------------=== + clang-tidy checks profiling + ===-------------------------------------------------------------------------=== + Total Execution Time: 1.0282 seconds (1.0258 wall clock) + + ---User Time--- --System Time-- --User+System-- ---Wall Time--- --- Name --- + 0.9136 (100.0%) 0.1146 (100.0%) 1.0282 (100.0%) 1.0258 (100.0%) readability-function-size + 0.9136 (100.0%) 0.1146 (100.0%) 1.0282 (100.0%) 1.0258 (100.0%) Total + +It can also store that data as JSON files for further processing. Example output: + +.. code-block:: console + + $ clang-tidy -enable-check-profile -store-check-profile=. -checks=-*,readability-function-size source.cpp + $ # Note that there won't be timings table printed to the console. + $ ls /tmp/out/ + 20180516161318717446360-source.cpp.json + $ cat 20180516161318717446360-source.cpp.json + { + "file": "/path/to/source.cpp", + "timestamp": "2018-05-16 16:13:18.717446360", + "profile": { + "time.clang-tidy.readability-function-size.wall": 1.0421266555786133e+00, + "time.clang-tidy.readability-function-size.user": 9.2088400000005421e-01, + "time.clang-tidy.readability-function-size.sys": 1.2418899999999974e-01 + } + } + +There is only one argument that controls profile storage: + +* ``-store-check-profile=`` + + By default reports are printed in tabulated format to stderr. When this option + is passed, these per-TU profiles are instead stored as JSON. + If the prefix is not an absolute path, it is considered to be relative to the + directory from where you have run :program:`clang-tidy`. All ``.`` and ``..`` + patterns in the path are collapsed, and symlinks are resolved. + + Example: + Let's suppose you have a source file named ``example.cpp``, located in the + ``/source`` directory. Only the input filename is used, not the full path + to the source file. Additionally, it is prefixed with the current timestamp. + + * If you specify ``-store-check-profile=/tmp``, then the profile will be saved + to ``/tmp/-example.cpp.json`` + + * If you run :program:`clang-tidy` from within ``/foo`` directory, and specify + ``-store-check-profile=.``, then the profile will still be saved to + ``/foo/-example.cpp.json`` diff --git a/clang-tools-extra/docs/clang-tidy/Integrations.rst b/clang-tools-extra/docs/clang-tidy/Integrations.rst new file mode 100644 index 00000000000000..2d1e195645f5df --- /dev/null +++ b/clang-tools-extra/docs/clang-tidy/Integrations.rst @@ -0,0 +1,117 @@ +================================== +Clang-tidy IDE/Editor Integrations +================================== + +.. _Clangd: https://clang.llvm.org/extra/clangd.html + +Apart from being a standalone tool, :program:`clang-tidy` is integrated into +various IDEs, code analyzers, and editors. Besides, it is currently being +integrated into Clangd_. The following table shows the most +well-known :program:`clang-tidy` integrations in detail. + ++--------------------------------------+------------------------+---------------------------------+--------------------------+-----------------------------------------+--------------------------+ +| | Feature | ++======================================+========================+=================================+==========================+=========================================+==========================+ +| **Tool** | On-the-fly inspection | Check list configuration (GUI) | Options to checks (GUI) | Configuration via ``.clang-tidy`` files | Custom clang-tidy binary | ++--------------------------------------+------------------------+---------------------------------+--------------------------+-----------------------------------------+--------------------------+ +|A.L.E. for Vim | \+\ | \-\ | \-\ | \-\ | \+\ | ++--------------------------------------+------------------------+---------------------------------+--------------------------+-----------------------------------------+--------------------------+ +|Clang Power Tools for Visual Studio | \-\ | \+\ | \-\ | \+\ | \-\ | ++--------------------------------------+------------------------+---------------------------------+--------------------------+-----------------------------------------+--------------------------+ +|Clangd | \+\ | \-\ | \-\ | \-\ | \-\ | ++--------------------------------------+------------------------+---------------------------------+--------------------------+-----------------------------------------+--------------------------+ +|CLion IDE | \+\ | \+\ | \+\ | \+\ | \+\ | ++--------------------------------------+------------------------+---------------------------------+--------------------------+-----------------------------------------+--------------------------+ +|CodeChecker | \-\ | \-\ | \-\ | \-\ | \+\ | ++--------------------------------------+------------------------+---------------------------------+--------------------------+-----------------------------------------+--------------------------+ +|CPPCheck | \-\ | \-\ | \-\ | \-\ | \-\ | ++--------------------------------------+------------------------+---------------------------------+--------------------------+-----------------------------------------+--------------------------+ +|CPPDepend | \-\ | \-\ | \-\ | \-\ | \-\ | ++--------------------------------------+------------------------+---------------------------------+--------------------------+-----------------------------------------+--------------------------+ +|Flycheck for Emacs | \+\ | \-\ | \-\ | \+\ | \+\ | ++--------------------------------------+------------------------+---------------------------------+--------------------------+-----------------------------------------+--------------------------+ +|KDevelop IDE | \-\ | \+\ | \+\ | \+\ | \+\ | ++--------------------------------------+------------------------+---------------------------------+--------------------------+-----------------------------------------+--------------------------+ +|Qt Creator IDE | \+\ | \+\ | \-\ | \-\ | \+\ | ++--------------------------------------+------------------------+---------------------------------+--------------------------+-----------------------------------------+--------------------------+ +|ReSharper C++ for Visual Studio | \+\ | \+\ | \-\ | \+\ | \-\ | ++--------------------------------------+------------------------+---------------------------------+--------------------------+-----------------------------------------+--------------------------+ +|Syntastic for Vim | \+\ | \-\ | \-\ | \-\ | \+\ | ++--------------------------------------+------------------------+---------------------------------+--------------------------+-----------------------------------------+--------------------------+ +|Visual Assist for Visual Studio | \+\ | \+\ | \-\ | \-\ | \-\ | ++--------------------------------------+------------------------+---------------------------------+--------------------------+-----------------------------------------+--------------------------+ + +**IDEs** + +.. _CLion: https://www.jetbrains.com/clion/ +.. _integrates clang-tidy: https://www.jetbrains.com/help/clion/clang-tidy-checks-support.html + +CLion_ 2017.2 and later `integrates clang-tidy`_ as an extension to the +built-in code analyzer. Starting from 2018.2 EAP, CLion allows using +:program:`clang-tidy` via Clangd. Inspections and applicable quick-fixes are +performed on the fly, and checks can be configured in standard command line +format. In this integration, you can switch to the :program:`clang-tidy` +binary different from the bundled one, pass the configuration in +``.clang-tidy`` files instead of using the IDE settings, and configure +options for particular checks. + +.. _KDevelop: https://www.kdevelop.org/ +.. _kdev-clang-tidy: https://github.com/KDE/kdev-clang-tidy/ + +KDevelop_ with the kdev-clang-tidy_ plugin, starting from version 5.1, performs +static analysis using :program:`clang-tidy`. The plugin launches the +:program:`clang-tidy` binary from the specified location and parses its +output to provide a list of issues. + +.. _QtCreator: https://www.qt.io/ +.. _Clang Code Model: http://doc.qt.io/qtcreator/creator-clang-codemodel.html + +QtCreator_ 4.6 integrates :program:`clang-tidy` warnings into the editor +diagnostics under the `Clang Code Model`_. To employ :program:`clang-tidy` +inspection in QtCreator, you need to create a copy of one of the presets and +choose the checks to be performed in the Clang Code Model Warnings menu. + +.. _MS Visual Studio: https://visualstudio.microsoft.com/ +.. _ReSharper C++: https://www.jetbrains.com/help/resharper/Clang_Tidy_Integration.html +.. _Visual Assist: https://docs.wholetomato.com/default.asp?W761 +.. _Clang Power Tools: https://marketplace.visualstudio.com/items?itemName=caphyon.ClangPowerTools +.. _clang-tidy-vs: https://github.com/llvm-mirror/clang-tools-extra/tree/master/clang-tidy-vs + +`MS Visual Studio`_ has a native clang-tidy-vs_ plugin and also can integrate +:program:`clang-tidy` by means of three other tools. The `ReSharper C++`_ +extension, version 2017.3 and later, provides seamless :program:`clang-tidy` +integration: checks and quick-fixes run alongside native inspections. Apart +from that, ReSharper C++ incorporates :program:`clang-tidy` as a separate +step of its code clean-up process. `Visual Assist`_ build 2210 includes a +subset of :program:`clang-tidy` checklist to inspect the code as you edit. +Another way to bring :program:`clang-tidy` functionality to Visual Studio is +the `Clang Power Tools`_ plugin, which includes most of the +:program:`clang-tidy` checks and runs them during compilation or as a separate +step of code analysis. + +**Editors** + +.. _Flycheck: https://github.com/ch1bo/flycheck-clang-tidy +.. _Syntastic: https://github.com/vim-syntastic/syntastic +.. _A.L.E.: https://github.com/w0rp/ale +.. _Emacs24: https://www.gnu.org/s/emacs/ +.. _Vim: https://www.vim.org/ + +Emacs24_, when expanded with the Flycheck_ plugin, incorporates the +:program:`clang-tidy` inspection into the syntax analyzer. For Vim_, you can +use Syntastic_, which includes :program:`clang-tidy`, or `A.L.E.`_, +a lint engine that applies :program:`clang-tidy` along with other linters. + +**Analyzers** + +.. _CPPDepend: https://www.cppdepend.com/cppdependv2018 +.. _CPPCheck: https://sourceforge.net/p/cppcheck/news/ +.. _CodeChecker: https://github.com/Ericsson/codechecker +.. _plugin: https://github.com/Ericsson/CodeCheckerEclipsePlugin + +:program:`clang-tidy` is integrated in CPPDepend_ starting from version 2018.1 +and CPPCheck_ 1.82. CPPCheck integration lets you import Visual Studio +solutions and run the :program:`clang-tidy` inspection on them. The +CodeChecker_ application of version 5.3 or later, which also comes as a plugin_ +for Eclipse, supports :program:`clang-tidy` as a static analysis instrument and +allows to use a custom :program:`clang-tidy` binary. diff --git a/clang-tools-extra/docs/clang-tidy/index.rst b/clang-tools-extra/docs/clang-tidy/index.rst index 20b18b4bf5989a..4172d13487c53c 100644 --- a/clang-tools-extra/docs/clang-tidy/index.rst +++ b/clang-tools-extra/docs/clang-tidy/index.rst @@ -10,6 +10,8 @@ See also: :maxdepth: 1 The list of clang-tidy checks + Clang-tidy IDE/Editor Integrations + Getting Involved :program:`clang-tidy` is a clang-based C++ "linter" tool. Its purpose is to provide an extensible framework for diagnosing and fixing typical programming @@ -310,511 +312,3 @@ the parenthesis) whitespaces can be used and will be ignored. .. _LibTooling: http://clang.llvm.org/docs/LibTooling.html .. _How To Setup Tooling For LLVM: http://clang.llvm.org/docs/HowToSetupToolingForLLVM.html - - -Getting Involved -================ - -:program:`clang-tidy` has several own checks and can run Clang static analyzer -checks, but its power is in the ability to easily write custom checks. - -Checks are organized in modules, which can be linked into :program:`clang-tidy` -with minimal or no code changes in :program:`clang-tidy`. - -Checks can plug into the analysis on the preprocessor level using `PPCallbacks`_ -or on the AST level using `AST Matchers`_. When an error is found, checks can -report them in a way similar to how Clang diagnostics work. A fix-it hint can be -attached to a diagnostic message. - -The interface provided by :program:`clang-tidy` makes it easy to write useful -and precise checks in just a few lines of code. If you have an idea for a good -check, the rest of this document explains how to do this. - -There are a few tools particularly useful when developing clang-tidy checks: - * ``add_new_check.py`` is a script to automate the process of adding a new - check, it will create the check, update the CMake file and create a test; - * ``rename_check.py`` does what the script name suggests, renames an existing - check; - * :program:`clang-query` is invaluable for interactive prototyping of AST - matchers and exploration of the Clang AST; - * `clang-check`_ with the ``-ast-dump`` (and optionally ``-ast-dump-filter``) - provides a convenient way to dump AST of a C++ program. - -If CMake is configured with ``CLANG_ENABLE_STATIC_ANALYZER``, -:program:`clang-tidy` will not be built with support for the -``clang-analyzer-*`` checks or the ``mpi-*`` checks. - - -.. _AST Matchers: http://clang.llvm.org/docs/LibASTMatchers.html -.. _PPCallbacks: http://clang.llvm.org/doxygen/classclang_1_1PPCallbacks.html -.. _clang-check: http://clang.llvm.org/docs/ClangCheck.html - - -Choosing the Right Place for your Check ---------------------------------------- - -If you have an idea of a check, you should decide whether it should be -implemented as a: - -+ *Clang diagnostic*: if the check is generic enough, targets code patterns that - most probably are bugs (rather than style or readability issues), can be - implemented effectively and with extremely low false positive rate, it may - make a good Clang diagnostic. - -+ *Clang static analyzer check*: if the check requires some sort of control flow - analysis, it should probably be implemented as a static analyzer check. - -+ *clang-tidy check* is a good choice for linter-style checks, checks that are - related to a certain coding style, checks that address code readability, etc. - - -Preparing your Workspace ------------------------- - -If you are new to LLVM development, you should read the `Getting Started with -the LLVM System`_, `Using Clang Tools`_ and `How To Setup Tooling For LLVM`_ -documents to check out and build LLVM, Clang and Clang Extra Tools with CMake. - -Once you are done, change to the ``llvm/tools/clang/tools/extra`` directory, and -let's start! - -.. _Getting Started with the LLVM System: http://llvm.org/docs/GettingStarted.html -.. _Using Clang Tools: http://clang.llvm.org/docs/ClangTools.html - - -The Directory Structure ------------------------ - -:program:`clang-tidy` source code resides in the -``llvm/tools/clang/tools/extra`` directory and is structured as follows: - -:: - - clang-tidy/ # Clang-tidy core. - |-- ClangTidy.h # Interfaces for users and checks. - |-- ClangTidyModule.h # Interface for clang-tidy modules. - |-- ClangTidyModuleRegistry.h # Interface for registering of modules. - ... - |-- google/ # Google clang-tidy module. - |-+ - |-- GoogleTidyModule.cpp - |-- GoogleTidyModule.h - ... - |-- llvm/ # LLVM clang-tidy module. - |-+ - |-- LLVMTidyModule.cpp - |-- LLVMTidyModule.h - ... - |-- objc/ # Objective-C clang-tidy module. - |-+ - |-- ObjCTidyModule.cpp - |-- ObjCTidyModule.h - ... - |-- tool/ # Sources of the clang-tidy binary. - ... - test/clang-tidy/ # Integration tests. - ... - unittests/clang-tidy/ # Unit tests. - |-- ClangTidyTest.h - |-- GoogleModuleTest.cpp - |-- LLVMModuleTest.cpp - |-- ObjCModuleTest.cpp - ... - - -Writing a clang-tidy Check --------------------------- - -So you have an idea of a useful check for :program:`clang-tidy`. - -First, if you're not familiar with LLVM development, read through the `Getting -Started with LLVM`_ document for instructions on setting up your workflow and -the `LLVM Coding Standards`_ document to familiarize yourself with the coding -style used in the project. For code reviews we mostly use `LLVM Phabricator`_. - -.. _Getting Started with LLVM: http://llvm.org/docs/GettingStarted.html -.. _LLVM Coding Standards: http://llvm.org/docs/CodingStandards.html -.. _LLVM Phabricator: http://llvm.org/docs/Phabricator.html - -Next, you need to decide which module the check belongs to. Modules -are located in subdirectories of `clang-tidy/ -`_ -and contain checks targeting a certain aspect of code quality (performance, -readability, etc.), certain coding style or standard (Google, LLVM, CERT, etc.) -or a widely used API (e.g. MPI). Their names are same as user-facing check -groups names described :ref:`above `. - -After choosing the module and the name for the check, run the -``clang-tidy/add_new_check.py`` script to create the skeleton of the check and -plug it to :program:`clang-tidy`. It's the recommended way of adding new checks. - -If we want to create a `readability-awesome-function-names`, we would run: - -.. code-block:: console - - $ clang-tidy/add_new_check.py readability awesome-function-names - - -The ``add_new_check.py`` script will: - * create the class for your check inside the specified module's directory and - register it in the module and in the build system; - * create a lit test file in the ``test/clang-tidy/`` directory; - * create a documentation file and include it into the - ``docs/clang-tidy/checks/list.rst``. - -Let's see in more detail at the check class definition: - -.. code-block:: c++ - - ... - - #include "../ClangTidy.h" - - namespace clang { - namespace tidy { - namespace readability { - - ... - class AwesomeFunctionNamesCheck : public ClangTidyCheck { - public: - AwesomeFunctionNamesCheck(StringRef Name, ClangTidyContext *Context) - : ClangTidyCheck(Name, Context) {} - void registerMatchers(ast_matchers::MatchFinder *Finder) override; - void check(const ast_matchers::MatchFinder::MatchResult &Result) override; - }; - - } // namespace readability - } // namespace tidy - } // namespace clang - - ... - -Constructor of the check receives the ``Name`` and ``Context`` parameters, and -must forward them to the ``ClangTidyCheck`` constructor. - -In our case the check needs to operate on the AST level and it overrides the -``registerMatchers`` and ``check`` methods. If we wanted to analyze code on the -preprocessor level, we'd need instead to override the ``registerPPCallbacks`` -method. - -In the ``registerMatchers`` method we create an AST Matcher (see `AST Matchers`_ -for more information) that will find the pattern in the AST that we want to -inspect. The results of the matching are passed to the ``check`` method, which -can further inspect them and report diagnostics. - -.. code-block:: c++ - - using namespace ast_matchers; - - void AwesomeFunctionNamesCheck::registerMatchers(MatchFinder *Finder) { - Finder->addMatcher(functionDecl().bind("x"), this); - } - - void AwesomeFunctionNamesCheck::check(const MatchFinder::MatchResult &Result) { - const auto *MatchedDecl = Result.Nodes.getNodeAs("x"); - if (MatchedDecl->getName().startswith("awesome_")) - return; - diag(MatchedDecl->getLocation(), "function %0 is insufficiently awesome") - << MatchedDecl - << FixItHint::CreateInsertion(MatchedDecl->getLocation(), "awesome_"); - } - -(If you want to see an example of a useful check, look at -`clang-tidy/google/ExplicitConstructorCheck.h -`_ -and `clang-tidy/google/ExplicitConstructorCheck.cpp -`_). - - -Registering your Check ----------------------- - -(The ``add_new_check.py`` takes care of registering the check in an existing -module. If you want to create a new module or know the details, read on.) - -The check should be registered in the corresponding module with a distinct name: - -.. code-block:: c++ - - class MyModule : public ClangTidyModule { - public: - void addCheckFactories(ClangTidyCheckFactories &CheckFactories) override { - CheckFactories.registerCheck( - "my-explicit-constructor"); - } - }; - -Now we need to register the module in the ``ClangTidyModuleRegistry`` using a -statically initialized variable: - -.. code-block:: c++ - - static ClangTidyModuleRegistry::Add X("my-module", - "Adds my lint checks."); - - -When using LLVM build system, we need to use the following hack to ensure the -module is linked into the :program:`clang-tidy` binary: - -Add this near the ``ClangTidyModuleRegistry::Add`` variable: - -.. code-block:: c++ - - // This anchor is used to force the linker to link in the generated object file - // and thus register the MyModule. - volatile int MyModuleAnchorSource = 0; - -And this to the main translation unit of the :program:`clang-tidy` binary (or -the binary you link the ``clang-tidy`` library in) -``clang-tidy/tool/ClangTidyMain.cpp``: - -.. code-block:: c++ - - // This anchor is used to force the linker to link the MyModule. - extern volatile int MyModuleAnchorSource; - static int MyModuleAnchorDestination = MyModuleAnchorSource; - - -Configuring Checks ------------------- - -If a check needs configuration options, it can access check-specific options -using the ``Options.get("SomeOption", DefaultValue)`` call in the check -constructor. In this case the check should also override the -``ClangTidyCheck::storeOptions`` method to make the options provided by the -check discoverable. This method lets :program:`clang-tidy` know which options -the check implements and what the current values are (e.g. for the -``-dump-config`` command line option). - -.. code-block:: c++ - - class MyCheck : public ClangTidyCheck { - const unsigned SomeOption1; - const std::string SomeOption2; - - public: - MyCheck(StringRef Name, ClangTidyContext *Context) - : ClangTidyCheck(Name, Context), - SomeOption(Options.get("SomeOption1", -1U)), - SomeOption(Options.get("SomeOption2", "some default")) {} - - void storeOptions(ClangTidyOptions::OptionMap &Opts) override { - Options.store(Opts, "SomeOption1", SomeOption1); - Options.store(Opts, "SomeOption2", SomeOption2); - } - ... - -Assuming the check is registered with the name "my-check", the option can then -be set in a ``.clang-tidy`` file in the following way: - -.. code-block:: yaml - - CheckOptions: - - key: my-check.SomeOption1 - value: 123 - - key: my-check.SomeOption2 - value: 'some other value' - -If you need to specify check options on a command line, you can use the inline -YAML format: - -.. code-block:: console - - $ clang-tidy -config="{CheckOptions: [{key: a, value: b}, {key: x, value: y}]}" ... - - -Testing Checks --------------- - -To run tests for :program:`clang-tidy` use the command: - -.. code-block:: console - - $ ninja check-clang-tools - -:program:`clang-tidy` checks can be tested using either unit tests or -`lit`_ tests. Unit tests may be more convenient to test complex replacements -with strict checks. `Lit`_ tests allow using partial text matching and regular -expressions which makes them more suitable for writing compact tests for -diagnostic messages. - -The ``check_clang_tidy.py`` script provides an easy way to test both -diagnostic messages and fix-its. It filters out ``CHECK`` lines from the test -file, runs :program:`clang-tidy` and verifies messages and fixes with two -separate `FileCheck`_ invocations: once with FileCheck's directive -prefix set to ``CHECK-MESSAGES``, validating the diagnostic messages, -and once with the directive prefix set to ``CHECK-FIXES``, running -against the fixed code (i.e., the code after generated fix-its are -applied). In particular, ``CHECK-FIXES:`` can be used to check -that code was not modified by fix-its, by checking that it is present -unchanged in the fixed code. The full set of `FileCheck`_ directives -is available (e.g., ``CHECK-MESSAGES-SAME:``, ``CHECK-MESSAGES-NOT:``), though -typically the basic ``CHECK`` forms (``CHECK-MESSAGES`` and ``CHECK-FIXES``) -are sufficient for clang-tidy tests. Note that the `FileCheck`_ -documentation mostly assumes the default prefix (``CHECK``), and hence -describes the directive as ``CHECK:``, ``CHECK-SAME:``, ``CHECK-NOT:``, etc. -Replace ``CHECK`` by either ``CHECK-FIXES`` or ``CHECK-MESSAGES`` for -clang-tidy tests. - -An additional check enabled by ``check_clang_tidy.py`` ensures that -if `CHECK-MESSAGES:` is used in a file then every warning or error -must have an associated CHECK in that file. Or, you can use ``CHECK-NOTES:`` -instead, if you want to **also** ensure that all the notes are checked. - -To use the ``check_clang_tidy.py`` script, put a .cpp file with the -appropriate ``RUN`` line in the ``test/clang-tidy`` directory. Use -``CHECK-MESSAGES:`` and ``CHECK-FIXES:`` lines to write checks against -diagnostic messages and fixed code. - -It's advised to make the checks as specific as possible to avoid checks matching -to incorrect parts of the input. Use ``[[@LINE+X]]``/``[[@LINE-X]]`` -substitutions and distinct function and variable names in the test code. - -Here's an example of a test using the ``check_clang_tidy.py`` script (the full -source code is at `test/clang-tidy/google-readability-casting.cpp`_): - -.. code-block:: c++ - - // RUN: %check_clang_tidy %s google-readability-casting %t - - void f(int a) { - int b = (int)a; - // CHECK-MESSAGES: :[[@LINE-1]]:11: warning: redundant cast to the same type [google-readability-casting] - // CHECK-FIXES: int b = a; - } - -To check more than one scenario in the same test file use -``-check-suffix=SUFFIX-NAME`` on ``check_clang_tidy.py`` command line or -``-check-suffixes=SUFFIX-NAME-1,SUFFIX-NAME-2,...``. -With ``-check-suffix[es]=SUFFIX-NAME`` you need to replace your ``CHECK-*`` -directives with ``CHECK-MESSAGES-SUFFIX-NAME`` and ``CHECK-FIXES-SUFFIX-NAME``. - -Here's an example: - -.. code-block:: c++ - - // RUN: %check_clang_tidy -check-suffix=USING-A %s misc-unused-using-decls %t -- -- -DUSING_A - // RUN: %check_clang_tidy -check-suffix=USING-B %s misc-unused-using-decls %t -- -- -DUSING_B - // RUN: %check_clang_tidy %s misc-unused-using-decls %t - ... - // CHECK-MESSAGES-USING-A: :[[@LINE-8]]:10: warning: using decl 'A' {{.*}} - // CHECK-MESSAGES-USING-B: :[[@LINE-7]]:10: warning: using decl 'B' {{.*}} - // CHECK-MESSAGES: :[[@LINE-6]]:10: warning: using decl 'C' {{.*}} - // CHECK-FIXES-USING-A-NOT: using a::A;$ - // CHECK-FIXES-USING-B-NOT: using a::B;$ - // CHECK-FIXES-NOT: using a::C;$ - - -There are many dark corners in the C++ language, and it may be difficult to make -your check work perfectly in all cases, especially if it issues fix-it hints. The -most frequent pitfalls are macros and templates: - -1. code written in a macro body/template definition may have a different meaning - depending on the macro expansion/template instantiation; -2. multiple macro expansions/template instantiations may result in the same code - being inspected by the check multiple times (possibly, with different - meanings, see 1), and the same warning (or a slightly different one) may be - issued by the check multiple times; :program:`clang-tidy` will deduplicate - _identical_ warnings, but if the warnings are slightly different, all of them - will be shown to the user (and used for applying fixes, if any); -3. making replacements to a macro body/template definition may be fine for some - macro expansions/template instantiations, but easily break some other - expansions/instantiations. - -.. _lit: http://llvm.org/docs/CommandGuide/lit.html -.. _FileCheck: http://llvm.org/docs/CommandGuide/FileCheck.html -.. _test/clang-tidy/google-readability-casting.cpp: http://reviews.llvm.org/diffusion/L/browse/clang-tools-extra/trunk/test/clang-tidy/google-readability-casting.cpp - - -Running clang-tidy on LLVM --------------------------- - -To test a check it's best to try it out on a larger code base. LLVM and Clang -are the natural targets as you already have the source code around. The most -convenient way to run :program:`clang-tidy` is with a compile command database; -CMake can automatically generate one, for a description of how to enable it see -`How To Setup Tooling For LLVM`_. Once ``compile_commands.json`` is in place and -a working version of :program:`clang-tidy` is in ``PATH`` the entire code base -can be analyzed with ``clang-tidy/tool/run-clang-tidy.py``. The script executes -:program:`clang-tidy` with the default set of checks on every translation unit -in the compile command database and displays the resulting warnings and errors. -The script provides multiple configuration flags. - -* The default set of checks can be overridden using the ``-checks`` argument, - taking the identical format as :program:`clang-tidy` does. For example - ``-checks=-*,modernize-use-override`` will run the ``modernize-use-override`` - check only. - -* To restrict the files examined you can provide one or more regex arguments - that the file names are matched against. - ``run-clang-tidy.py clang-tidy/.*Check\.cpp`` will only analyze clang-tidy - checks. It may also be necessary to restrict the header files warnings are - displayed from using the ``-header-filter`` flag. It has the same behavior - as the corresponding :program:`clang-tidy` flag. - -* To apply suggested fixes ``-fix`` can be passed as an argument. This gathers - all changes in a temporary directory and applies them. Passing ``-format`` - will run clang-format over changed lines. - - -On checks profiling -------------------- - -:program:`clang-tidy` can collect per-check profiling info, and output it -for each processed source file (translation unit). - -To enable profiling info collection, use the ``-enable-check-profile`` argument. -The timings will be output to ``stderr`` as a table. Example output: - -.. code-block:: console - - $ clang-tidy -enable-check-profile -checks=-*,readability-function-size source.cpp - ===-------------------------------------------------------------------------=== - clang-tidy checks profiling - ===-------------------------------------------------------------------------=== - Total Execution Time: 1.0282 seconds (1.0258 wall clock) - - ---User Time--- --System Time-- --User+System-- ---Wall Time--- --- Name --- - 0.9136 (100.0%) 0.1146 (100.0%) 1.0282 (100.0%) 1.0258 (100.0%) readability-function-size - 0.9136 (100.0%) 0.1146 (100.0%) 1.0282 (100.0%) 1.0258 (100.0%) Total - -It can also store that data as JSON files for further processing. Example output: - -.. code-block:: console - - $ clang-tidy -enable-check-profile -store-check-profile=. -checks=-*,readability-function-size source.cpp - $ # Note that there won't be timings table printed to the console. - $ ls /tmp/out/ - 20180516161318717446360-source.cpp.json - $ cat 20180516161318717446360-source.cpp.json - { - "file": "/path/to/source.cpp", - "timestamp": "2018-05-16 16:13:18.717446360", - "profile": { - "time.clang-tidy.readability-function-size.wall": 1.0421266555786133e+00, - "time.clang-tidy.readability-function-size.user": 9.2088400000005421e-01, - "time.clang-tidy.readability-function-size.sys": 1.2418899999999974e-01 - } - } - -There is only one argument that controls profile storage: - -* ``-store-check-profile=`` - - By default reports are printed in tabulated format to stderr. When this option - is passed, these per-TU profiles are instead stored as JSON. - If the prefix is not an absolute path, it is considered to be relative to the - directory from where you have run :program:`clang-tidy`. All ``.`` and ``..`` - patterns in the path are collapsed, and symlinks are resolved. - - Example: - Let's suppose you have a source file named ``example.cpp``, located in the - ``/source`` directory. Only the input filename is used, not the full path - to the source file. Additionally, it is prefixed with the current timestamp. - - * If you specify ``-store-check-profile=/tmp``, then the profile will be saved - to ``/tmp/-example.cpp.json`` - - * If you run :program:`clang-tidy` from within ``/foo`` directory, and specify - ``-store-check-profile=.``, then the profile will still be saved to - ``/foo/-example.cpp.json`` From e264daec97935db606c312b10e43f4e35ac39b58 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Fri, 18 Jan 2019 10:57:19 +0000 Subject: [PATCH 021/125] Merging r351426: ------------------------------------------------------------------------ r351426 | d0k | 2019-01-17 11:25:18 +0100 (Thu, 17 Jan 2019) | 1 line [MC] Remove unused variable ------------------------------------------------------------------------ llvm-svn: 351543 --- llvm/lib/MC/MCWin64EH.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/MC/MCWin64EH.cpp b/llvm/lib/MC/MCWin64EH.cpp index 2e4eea3411f4bd..8bc1f08c88750e 100644 --- a/llvm/lib/MC/MCWin64EH.cpp +++ b/llvm/lib/MC/MCWin64EH.cpp @@ -517,7 +517,6 @@ static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info) { auto &EpilogInstrs = I.second; uint32_t CodeBytes = ARM64CountOfUnwindCodes(EpilogInstrs); - uint32_t NumUnwindCodes = EpilogInstrs.size(); MCSymbol* MatchingEpilog = FindMatchingEpilog(EpilogInstrs, AddedEpilogs, info); if (MatchingEpilog) { From 119d8a51d9175e528dfaa2a5f2170ab206699a81 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Tue, 22 Jan 2019 16:59:31 +0000 Subject: [PATCH 022/125] Merging r351580: ------------------------------------------------------------------------ r351580 | kli | 2019-01-18 20:57:37 +0100 (Fri, 18 Jan 2019) | 4 lines [OPENMP][DOCS] Release notes/OpenMP support updates, NFC. Differential Revision: https://reviews.llvm.org/D56733 ------------------------------------------------------------------------ llvm-svn: 351839 --- clang/docs/OpenMPSupport.rst | 76 ++++++++++++++++-------------------- clang/docs/ReleaseNotes.rst | 22 ++++++++--- 2 files changed, 50 insertions(+), 48 deletions(-) diff --git a/clang/docs/OpenMPSupport.rst b/clang/docs/OpenMPSupport.rst index 04a9648ca29420..7b567c966ee53a 100644 --- a/clang/docs/OpenMPSupport.rst +++ b/clang/docs/OpenMPSupport.rst @@ -17,60 +17,50 @@ OpenMP Support ================== -Clang fully supports OpenMP 4.5. Clang supports offloading to X86_64, AArch64, -PPC64[LE] and has `basic support for Cuda devices`_. - -Standalone directives -===================== - -* #pragma omp [for] simd: :good:`Complete`. - -* #pragma omp declare simd: :partial:`Partial`. We support parsing/semantic - analysis + generation of special attributes for X86 target, but still - missing the LLVM pass for vectorization. - -* #pragma omp taskloop [simd]: :good:`Complete`. - -* #pragma omp target [enter|exit] data: :good:`Complete`. - -* #pragma omp target update: :good:`Complete`. - -* #pragma omp target: :good:`Complete`. +Clang supports the following OpenMP 5.0 features -* #pragma omp declare target: :good:`Complete`. +* The `reduction`-based clauses in the `task` and `target`-based directives. -* #pragma omp teams: :good:`Complete`. +* Support relational-op != (not-equal) as one of the canonical forms of random + access iterator. -* #pragma omp distribute [simd]: :good:`Complete`. +* Support for mapping of the lambdas in target regions. -* #pragma omp distribute parallel for [simd]: :good:`Complete`. +* Parsing/sema analysis for the requires directive. -Combined directives -=================== +* Nested declare target directives. -* #pragma omp parallel for simd: :good:`Complete`. +* Make the `this` pointer implicitly mapped as `map(this[:1])`. -* #pragma omp target parallel: :good:`Complete`. +* The `close` *map-type-modifier*. -* #pragma omp target parallel for [simd]: :good:`Complete`. - -* #pragma omp target simd: :good:`Complete`. - -* #pragma omp target teams: :good:`Complete`. - -* #pragma omp teams distribute [simd]: :good:`Complete`. - -* #pragma omp target teams distribute [simd]: :good:`Complete`. - -* #pragma omp teams distribute parallel for [simd]: :good:`Complete`. - -* #pragma omp target teams distribute parallel for [simd]: :good:`Complete`. +Clang fully supports OpenMP 4.5. Clang supports offloading to X86_64, AArch64, +PPC64[LE] and has `basic support for Cuda devices`_. -Clang does not support any constructs/updates from OpenMP 5.0 except -for `reduction`-based clauses in the `task` and `target`-based directives. +* #pragma omp declare simd: :partial:`Partial`. We support parsing/semantic + analysis + generation of special attributes for X86 target, but still + missing the LLVM pass for vectorization. In addition, the LLVM OpenMP runtime `libomp` supports the OpenMP Tools -Interface (OMPT) on x86, x86_64, AArch64, and PPC64 on Linux, Windows, and mac OS. +Interface (OMPT) on x86, x86_64, AArch64, and PPC64 on Linux, Windows, and macOS. + +General improvements +-------------------- +- New collapse clause scheme to avoid expensive remainder operations. + Compute loop index variables after collapsing a loop nest via the + collapse clause by replacing the expensive remainder operation with + multiplications and additions. + +- The default schedules for the `distribute` and `for` constructs in a + parallel region and in SPMD mode have changed to ensure coalesced + accesses. For the `distribute` construct, a static schedule is used + with a chunk size equal to the number of threads per team (default + value of threads or as specified by the `thread_limit` clause if + present). For the `for` construct, the schedule is static with chunk + size of one. + +- Simplified SPMD code generation for `distribute parallel for` when + the new default schedules are applicable. .. _basic support for Cuda devices: diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index b6a405dbc78b2e..b4f290e05d7876 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -233,12 +233,15 @@ ABI Changes in Clang OpenMP Support in Clang ---------------------------------- -- Support relational-op != (not-equal) as one of the canonical forms of random - access iterator. +- OpenMP 5.0 features -- Added support for mapping of the lambdas in target regions. - -- Added parsing/sema analysis for OpenMP 5.0 requires directive. + - Support relational-op != (not-equal) as one of the canonical forms of random + access iterator. + - Added support for mapping of the lambdas in target regions. + - Added parsing/sema analysis for the requires directive. + - Support nested declare target directives. + - Make the `this` pointer implicitly mapped as `map(this[:1])`. + - Added the `close` *map-type-modifier*. - Various bugfixes and improvements. @@ -250,6 +253,15 @@ New features supported for Cuda devices: - Fixed support for lastprivate/reduction variables in SPMD constructs. +- New collapse clause scheme to avoid expensive remainder operations. + +- New default schedule for distribute and parallel constructs. + +- Simplified code generation for distribute and parallel in SPMD mode. + +- Flag (``-fopenmp_optimistic_collapse``) for user to limit collapsed + loop counter width when safe to do so. + - General performance improvement. CUDA Support in Clang From eb74b596b7dee5157be24f6e3693ab66930065de Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Tue, 22 Jan 2019 17:32:41 +0000 Subject: [PATCH 023/125] Merging r351686: ------------------------------------------------------------------------ r351686 | vmiklos | 2019-01-20 15:28:27 +0100 (Sun, 20 Jan 2019) | 5 lines [clang-tidy] misc-non-private-member-variables-in-classes: ignore implicit methods Otherwise we don't warn on a struct containing a single public int, but we warn on a struct containing a single public std::string, which is inconsistent. ------------------------------------------------------------------------ llvm-svn: 351844 --- .../NonPrivateMemberVariablesInClassesCheck.cpp | 11 ++++++----- ...-non-private-member-variables-in-classes.rst | 10 +++++----- ...-non-private-member-variables-in-classes.cpp | 17 +++++++++++++++++ 3 files changed, 28 insertions(+), 10 deletions(-) diff --git a/clang-tools-extra/clang-tidy/misc/NonPrivateMemberVariablesInClassesCheck.cpp b/clang-tools-extra/clang-tidy/misc/NonPrivateMemberVariablesInClassesCheck.cpp index c0bdbfbfe04248..bac7f898785a01 100644 --- a/clang-tools-extra/clang-tidy/misc/NonPrivateMemberVariablesInClassesCheck.cpp +++ b/clang-tools-extra/clang-tidy/misc/NonPrivateMemberVariablesInClassesCheck.cpp @@ -23,8 +23,8 @@ AST_MATCHER(CXXRecordDecl, hasMethods) { return std::distance(Node.method_begin(), Node.method_end()) != 0; } -AST_MATCHER(CXXRecordDecl, hasNonStaticMethod) { - return hasMethod(unless(isStaticStorageClass())) +AST_MATCHER(CXXRecordDecl, hasNonStaticNonImplicitMethod) { + return hasMethod(unless(anyOf(isStaticStorageClass(), isImplicit()))) .matches(Node, Finder, Builder); } @@ -67,10 +67,11 @@ void NonPrivateMemberVariablesInClassesCheck::registerMatchers( IgnorePublicMemberVariables ? isProtected() : unless(isPrivate())); // We only want the records that not only contain the mutable data (non-static - // member variables), but also have some logic (non-static member functions). - // We may optionally ignore records where all the member variables are public. + // member variables), but also have some logic (non-static, non-implicit + // member functions). We may optionally ignore records where all the member + // variables are public. Finder->addMatcher(cxxRecordDecl(anyOf(isStruct(), isClass()), hasMethods(), - hasNonStaticMethod(), + hasNonStaticNonImplicitMethod(), unless(ShouldIgnoreRecord), forEach(InterestingField.bind("field"))) .bind("record"), diff --git a/clang-tools-extra/docs/clang-tidy/checks/misc-non-private-member-variables-in-classes.rst b/clang-tools-extra/docs/clang-tidy/checks/misc-non-private-member-variables-in-classes.rst index db88c9b1cffd81..57990622e60cdd 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/misc-non-private-member-variables-in-classes.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/misc-non-private-member-variables-in-classes.rst @@ -6,11 +6,11 @@ misc-non-private-member-variables-in-classes `cppcoreguidelines-non-private-member-variables-in-classes` redirects here as an alias for this check. -Finds classes that contain non-static data members in addition to non-static -member functions and diagnose all data members declared with a non-``public`` -access specifier. The data members should be declared as ``private`` and -accessed through member functions instead of exposed to derived classes or -class consumers. +Finds classes that contain non-static data members in addition to user-declared +non-static member functions and diagnose all data members declared with a +non-``public`` access specifier. The data members should be declared as +``private`` and accessed through member functions instead of exposed to derived +classes or class consumers. Options ------- diff --git a/clang-tools-extra/test/clang-tidy/misc-non-private-member-variables-in-classes.cpp b/clang-tools-extra/test/clang-tidy/misc-non-private-member-variables-in-classes.cpp index 31052716d28733..2a93ff6a18c38d 100644 --- a/clang-tools-extra/test/clang-tidy/misc-non-private-member-variables-in-classes.cpp +++ b/clang-tools-extra/test/clang-tidy/misc-non-private-member-variables-in-classes.cpp @@ -35,6 +35,23 @@ class S1 { int S1_v3; }; +// Only data and implicit or static methods, do not warn + +class C { +public: + C() {} + ~C() {} +}; + +struct S1Implicit { + C S1Implicit_v0; +}; + +struct S1ImplicitAndStatic { + C S1Implicit_v0; + static void s() {} +}; + //----------------------------------------------------------------------------// // All functions are static, do not warn. From 2bebfebe54decbe7b91c9e5dc552bc0eaeef703b Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Tue, 22 Jan 2019 19:02:30 +0000 Subject: [PATCH 024/125] Merging r351753: ------------------------------------------------------------------------ r351753 | spatel | 2019-01-21 18:30:14 +0100 (Mon, 21 Jan 2019) | 8 lines [DAGCombiner] fix crash when converting build vector to shuffle The regression test is reduced from the example shown in D56281. This does raise a question as noted in the test file: do we want to handle this pattern? I don't have a motivating example for that on x86 yet, but it seems like we could have that pattern there too, so we could avoid the back-and-forth using a shuffle. ------------------------------------------------------------------------ llvm-svn: 351857 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 16 +++++++++----- .../CodeGen/AArch64/build-vector-extract.ll | 22 +++++++++++++++++++ 2 files changed, 33 insertions(+), 5 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/build-vector-extract.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index ff5505c9772137..6af01423ca1069 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -16214,23 +16214,29 @@ static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) { // The build vector contains some number of undef elements and exactly // one other element. That other element must be a zero-extended scalar // extracted from a vector at a constant index to turn this into a shuffle. + // Also, require that the build vector does not implicitly truncate/extend + // its elements. // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND. + EVT VT = BV->getValueType(0); SDValue Zext = BV->getOperand(ZextElt); if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() || Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT || - !isa(Zext.getOperand(0).getOperand(1))) + !isa(Zext.getOperand(0).getOperand(1)) || + Zext.getValueSizeInBits() != VT.getScalarSizeInBits()) return SDValue(); - // The zero-extend must be a multiple of the source size. + // The zero-extend must be a multiple of the source size, and we must be + // building a vector of the same size as the source of the extract element. SDValue Extract = Zext.getOperand(0); unsigned DestSize = Zext.getValueSizeInBits(); unsigned SrcSize = Extract.getValueSizeInBits(); - if (DestSize % SrcSize != 0) + if (DestSize % SrcSize != 0 || + Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits()) return SDValue(); // Create a shuffle mask that will combine the extracted element with zeros // and undefs. - int ZextRatio = DestSize / SrcSize; + int ZextRatio = DestSize / SrcSize; int NumMaskElts = NumBVOps * ZextRatio; SmallVector ShufMask(NumMaskElts, -1); for (int i = 0; i != NumMaskElts; ++i) { @@ -16260,7 +16266,7 @@ static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) { SDValue ZeroVec = DAG.getConstant(0, DL, VecVT); SDValue Shuf = DAG.getVectorShuffle(VecVT, DL, Extract.getOperand(0), ZeroVec, ShufMask); - return DAG.getBitcast(BV->getValueType(0), Shuf); + return DAG.getBitcast(VT, Shuf); } // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT diff --git a/llvm/test/CodeGen/AArch64/build-vector-extract.ll b/llvm/test/CodeGen/AArch64/build-vector-extract.ll new file mode 100644 index 00000000000000..bba3a22cf33ea9 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/build-vector-extract.ll @@ -0,0 +1,22 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s + +; This would crash because we did not expect to create +; a shuffle for a vector where the source operand is +; not the same size as the result. +; TODO: Should we handle this pattern? Ie, is moving to/from +; registers the optimal code? + +define <4 x i32> @larger_bv_than_source(<4 x i16> %t0) { +; CHECK-LABEL: larger_bv_than_source: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: umov w8, v0.h[2] +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: ret + %t1 = extractelement <4 x i16> %t0, i32 2 + %vgetq_lane = zext i16 %t1 to i32 + %t2 = insertelement <4 x i32> undef, i32 %vgetq_lane, i64 0 + ret <4 x i32> %t2 +} + From ee231d02587208edbb3381b8186f03b97ac9934c Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Tue, 22 Jan 2019 19:04:30 +0000 Subject: [PATCH 025/125] Merging r351754: ------------------------------------------------------------------------ r351754 | spatel | 2019-01-21 18:46:35 +0100 (Mon, 21 Jan 2019) | 6 lines [AArch64] add more tests for buildvec to shuffle transform; NFC These are copied from the sibling x86 file. I'm not sure which of the current outputs (if any) is considered optimal, but someone more familiar with AArch may want to take a look. ------------------------------------------------------------------------ llvm-svn: 351858 --- .../CodeGen/AArch64/build-vector-extract.ll | 419 ++++++++++++++++++ 1 file changed, 419 insertions(+) diff --git a/llvm/test/CodeGen/AArch64/build-vector-extract.ll b/llvm/test/CodeGen/AArch64/build-vector-extract.ll index bba3a22cf33ea9..a785533e8db9be 100644 --- a/llvm/test/CodeGen/AArch64/build-vector-extract.ll +++ b/llvm/test/CodeGen/AArch64/build-vector-extract.ll @@ -1,6 +1,425 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s +define <2 x i64> @extract0_i32_zext_insert0_i64_undef(<4 x i32> %x) { +; CHECK-LABEL: extract0_i32_zext_insert0_i64_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v1.2d, #0000000000000000 +; CHECK-NEXT: zip1 v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %e = extractelement <4 x i32> %x, i32 0 + %z = zext i32 %e to i64 + %r = insertelement <2 x i64> undef, i64 %z, i32 0 + ret <2 x i64> %r +} + +define <2 x i64> @extract0_i32_zext_insert0_i64_zero(<4 x i32> %x) { +; CHECK-LABEL: extract0_i32_zext_insert0_i64_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[0], x8 +; CHECK-NEXT: ret + %e = extractelement <4 x i32> %x, i32 0 + %z = zext i32 %e to i64 + %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0 + ret <2 x i64> %r +} + +define <2 x i64> @extract1_i32_zext_insert0_i64_undef(<4 x i32> %x) { +; CHECK-LABEL: extract1_i32_zext_insert0_i64_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: zip1 v0.4s, v0.4s, v0.4s +; CHECK-NEXT: movi v1.2d, #0000000000000000 +; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #12 +; CHECK-NEXT: ret + %e = extractelement <4 x i32> %x, i32 1 + %z = zext i32 %e to i64 + %r = insertelement <2 x i64> undef, i64 %z, i32 0 + ret <2 x i64> %r +} + +define <2 x i64> @extract1_i32_zext_insert0_i64_zero(<4 x i32> %x) { +; CHECK-LABEL: extract1_i32_zext_insert0_i64_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, v0.s[1] +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[0], x8 +; CHECK-NEXT: ret + %e = extractelement <4 x i32> %x, i32 1 + %z = zext i32 %e to i64 + %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0 + ret <2 x i64> %r +} + +define <2 x i64> @extract2_i32_zext_insert0_i64_undef(<4 x i32> %x) { +; CHECK-LABEL: extract2_i32_zext_insert0_i64_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 v0.4s, v0.4s, v0.4s +; CHECK-NEXT: movi v1.2d, #0000000000000000 +; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #12 +; CHECK-NEXT: ret + %e = extractelement <4 x i32> %x, i32 2 + %z = zext i32 %e to i64 + %r = insertelement <2 x i64> undef, i64 %z, i32 0 + ret <2 x i64> %r +} + +define <2 x i64> @extract2_i32_zext_insert0_i64_zero(<4 x i32> %x) { +; CHECK-LABEL: extract2_i32_zext_insert0_i64_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, v0.s[2] +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[0], x8 +; CHECK-NEXT: ret + %e = extractelement <4 x i32> %x, i32 2 + %z = zext i32 %e to i64 + %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0 + ret <2 x i64> %r +} + +define <2 x i64> @extract3_i32_zext_insert0_i64_undef(<4 x i32> %x) { +; CHECK-LABEL: extract3_i32_zext_insert0_i64_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v1.2d, #0000000000000000 +; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #12 +; CHECK-NEXT: ret + %e = extractelement <4 x i32> %x, i32 3 + %z = zext i32 %e to i64 + %r = insertelement <2 x i64> undef, i64 %z, i32 0 + ret <2 x i64> %r +} + +define <2 x i64> @extract3_i32_zext_insert0_i64_zero(<4 x i32> %x) { +; CHECK-LABEL: extract3_i32_zext_insert0_i64_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, v0.s[3] +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[0], x8 +; CHECK-NEXT: ret + %e = extractelement <4 x i32> %x, i32 3 + %z = zext i32 %e to i64 + %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0 + ret <2 x i64> %r +} + +define <2 x i64> @extract0_i32_zext_insert1_i64_undef(<4 x i32> %x) { +; CHECK-LABEL: extract0_i32_zext_insert1_i64_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v1.2d, #0000000000000000 +; CHECK-NEXT: zip1 v1.4s, v0.4s, v1.4s +; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #8 +; CHECK-NEXT: ret + %e = extractelement <4 x i32> %x, i32 0 + %z = zext i32 %e to i64 + %r = insertelement <2 x i64> undef, i64 %z, i32 1 + ret <2 x i64> %r +} + +define <2 x i64> @extract0_i32_zext_insert1_i64_zero(<4 x i32> %x) { +; CHECK-LABEL: extract0_i32_zext_insert1_i64_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[1], x8 +; CHECK-NEXT: ret + %e = extractelement <4 x i32> %x, i32 0 + %z = zext i32 %e to i64 + %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1 + ret <2 x i64> %r +} + +define <2 x i64> @extract1_i32_zext_insert1_i64_undef(<4 x i32> %x) { +; CHECK-LABEL: extract1_i32_zext_insert1_i64_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: movi v1.2d, #0000000000000000 +; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #4 +; CHECK-NEXT: ret + %e = extractelement <4 x i32> %x, i32 1 + %z = zext i32 %e to i64 + %r = insertelement <2 x i64> undef, i64 %z, i32 1 + ret <2 x i64> %r +} + +define <2 x i64> @extract1_i32_zext_insert1_i64_zero(<4 x i32> %x) { +; CHECK-LABEL: extract1_i32_zext_insert1_i64_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, v0.s[1] +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[1], x8 +; CHECK-NEXT: ret + %e = extractelement <4 x i32> %x, i32 1 + %z = zext i32 %e to i64 + %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1 + ret <2 x i64> %r +} + +define <2 x i64> @extract2_i32_zext_insert1_i64_undef(<4 x i32> %x) { +; CHECK-LABEL: extract2_i32_zext_insert1_i64_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: mov v0.s[3], wzr +; CHECK-NEXT: ret + %e = extractelement <4 x i32> %x, i32 2 + %z = zext i32 %e to i64 + %r = insertelement <2 x i64> undef, i64 %z, i32 1 + ret <2 x i64> %r +} + +define <2 x i64> @extract2_i32_zext_insert1_i64_zero(<4 x i32> %x) { +; CHECK-LABEL: extract2_i32_zext_insert1_i64_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, v0.s[2] +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[1], x8 +; CHECK-NEXT: ret + %e = extractelement <4 x i32> %x, i32 2 + %z = zext i32 %e to i64 + %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1 + ret <2 x i64> %r +} + +define <2 x i64> @extract3_i32_zext_insert1_i64_undef(<4 x i32> %x) { +; CHECK-LABEL: extract3_i32_zext_insert1_i64_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v1.2d, #0000000000000000 +; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #4 +; CHECK-NEXT: ret + %e = extractelement <4 x i32> %x, i32 3 + %z = zext i32 %e to i64 + %r = insertelement <2 x i64> undef, i64 %z, i32 1 + ret <2 x i64> %r +} + +define <2 x i64> @extract3_i32_zext_insert1_i64_zero(<4 x i32> %x) { +; CHECK-LABEL: extract3_i32_zext_insert1_i64_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, v0.s[3] +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[1], x8 +; CHECK-NEXT: ret + %e = extractelement <4 x i32> %x, i32 3 + %z = zext i32 %e to i64 + %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1 + ret <2 x i64> %r +} + +define <2 x i64> @extract0_i16_zext_insert0_i64_undef(<8 x i16> %x) { +; CHECK-LABEL: extract0_i16_zext_insert0_i64_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: umov w8, v0.h[0] +; CHECK-NEXT: and x8, x8, #0xffff +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ret + %e = extractelement <8 x i16> %x, i32 0 + %z = zext i16 %e to i64 + %r = insertelement <2 x i64> undef, i64 %z, i32 0 + ret <2 x i64> %r +} + +define <2 x i64> @extract0_i16_zext_insert0_i64_zero(<8 x i16> %x) { +; CHECK-LABEL: extract0_i16_zext_insert0_i64_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: umov w8, v0.h[0] +; CHECK-NEXT: and x8, x8, #0xffff +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[0], x8 +; CHECK-NEXT: ret + %e = extractelement <8 x i16> %x, i32 0 + %z = zext i16 %e to i64 + %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0 + ret <2 x i64> %r +} + +define <2 x i64> @extract1_i16_zext_insert0_i64_undef(<8 x i16> %x) { +; CHECK-LABEL: extract1_i16_zext_insert0_i64_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: umov w8, v0.h[1] +; CHECK-NEXT: and x8, x8, #0xffff +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ret + %e = extractelement <8 x i16> %x, i32 1 + %z = zext i16 %e to i64 + %r = insertelement <2 x i64> undef, i64 %z, i32 0 + ret <2 x i64> %r +} + +define <2 x i64> @extract1_i16_zext_insert0_i64_zero(<8 x i16> %x) { +; CHECK-LABEL: extract1_i16_zext_insert0_i64_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: umov w8, v0.h[1] +; CHECK-NEXT: and x8, x8, #0xffff +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[0], x8 +; CHECK-NEXT: ret + %e = extractelement <8 x i16> %x, i32 1 + %z = zext i16 %e to i64 + %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0 + ret <2 x i64> %r +} + +define <2 x i64> @extract2_i16_zext_insert0_i64_undef(<8 x i16> %x) { +; CHECK-LABEL: extract2_i16_zext_insert0_i64_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: umov w8, v0.h[2] +; CHECK-NEXT: and x8, x8, #0xffff +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ret + %e = extractelement <8 x i16> %x, i32 2 + %z = zext i16 %e to i64 + %r = insertelement <2 x i64> undef, i64 %z, i32 0 + ret <2 x i64> %r +} + +define <2 x i64> @extract2_i16_zext_insert0_i64_zero(<8 x i16> %x) { +; CHECK-LABEL: extract2_i16_zext_insert0_i64_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: umov w8, v0.h[2] +; CHECK-NEXT: and x8, x8, #0xffff +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[0], x8 +; CHECK-NEXT: ret + %e = extractelement <8 x i16> %x, i32 2 + %z = zext i16 %e to i64 + %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0 + ret <2 x i64> %r +} + +define <2 x i64> @extract3_i16_zext_insert0_i64_undef(<8 x i16> %x) { +; CHECK-LABEL: extract3_i16_zext_insert0_i64_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: umov w8, v0.h[3] +; CHECK-NEXT: and x8, x8, #0xffff +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ret + %e = extractelement <8 x i16> %x, i32 3 + %z = zext i16 %e to i64 + %r = insertelement <2 x i64> undef, i64 %z, i32 0 + ret <2 x i64> %r +} + +define <2 x i64> @extract3_i16_zext_insert0_i64_zero(<8 x i16> %x) { +; CHECK-LABEL: extract3_i16_zext_insert0_i64_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: umov w8, v0.h[3] +; CHECK-NEXT: and x8, x8, #0xffff +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[0], x8 +; CHECK-NEXT: ret + %e = extractelement <8 x i16> %x, i32 3 + %z = zext i16 %e to i64 + %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0 + ret <2 x i64> %r +} + +define <2 x i64> @extract0_i16_zext_insert1_i64_undef(<8 x i16> %x) { +; CHECK-LABEL: extract0_i16_zext_insert1_i64_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: umov w8, v0.h[0] +; CHECK-NEXT: and x8, x8, #0xffff +; CHECK-NEXT: dup v0.2d, x8 +; CHECK-NEXT: ret + %e = extractelement <8 x i16> %x, i32 0 + %z = zext i16 %e to i64 + %r = insertelement <2 x i64> undef, i64 %z, i32 1 + ret <2 x i64> %r +} + +define <2 x i64> @extract0_i16_zext_insert1_i64_zero(<8 x i16> %x) { +; CHECK-LABEL: extract0_i16_zext_insert1_i64_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: umov w8, v0.h[0] +; CHECK-NEXT: and x8, x8, #0xffff +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[1], x8 +; CHECK-NEXT: ret + %e = extractelement <8 x i16> %x, i32 0 + %z = zext i16 %e to i64 + %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1 + ret <2 x i64> %r +} + +define <2 x i64> @extract1_i16_zext_insert1_i64_undef(<8 x i16> %x) { +; CHECK-LABEL: extract1_i16_zext_insert1_i64_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: umov w8, v0.h[1] +; CHECK-NEXT: and x8, x8, #0xffff +; CHECK-NEXT: dup v0.2d, x8 +; CHECK-NEXT: ret + %e = extractelement <8 x i16> %x, i32 1 + %z = zext i16 %e to i64 + %r = insertelement <2 x i64> undef, i64 %z, i32 1 + ret <2 x i64> %r +} + +define <2 x i64> @extract1_i16_zext_insert1_i64_zero(<8 x i16> %x) { +; CHECK-LABEL: extract1_i16_zext_insert1_i64_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: umov w8, v0.h[1] +; CHECK-NEXT: and x8, x8, #0xffff +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[1], x8 +; CHECK-NEXT: ret + %e = extractelement <8 x i16> %x, i32 1 + %z = zext i16 %e to i64 + %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1 + ret <2 x i64> %r +} + +define <2 x i64> @extract2_i16_zext_insert1_i64_undef(<8 x i16> %x) { +; CHECK-LABEL: extract2_i16_zext_insert1_i64_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: umov w8, v0.h[2] +; CHECK-NEXT: and x8, x8, #0xffff +; CHECK-NEXT: dup v0.2d, x8 +; CHECK-NEXT: ret + %e = extractelement <8 x i16> %x, i32 2 + %z = zext i16 %e to i64 + %r = insertelement <2 x i64> undef, i64 %z, i32 1 + ret <2 x i64> %r +} + +define <2 x i64> @extract2_i16_zext_insert1_i64_zero(<8 x i16> %x) { +; CHECK-LABEL: extract2_i16_zext_insert1_i64_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: umov w8, v0.h[2] +; CHECK-NEXT: and x8, x8, #0xffff +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[1], x8 +; CHECK-NEXT: ret + %e = extractelement <8 x i16> %x, i32 2 + %z = zext i16 %e to i64 + %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1 + ret <2 x i64> %r +} + +define <2 x i64> @extract3_i16_zext_insert1_i64_undef(<8 x i16> %x) { +; CHECK-LABEL: extract3_i16_zext_insert1_i64_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: umov w8, v0.h[3] +; CHECK-NEXT: and x8, x8, #0xffff +; CHECK-NEXT: dup v0.2d, x8 +; CHECK-NEXT: ret + %e = extractelement <8 x i16> %x, i32 3 + %z = zext i16 %e to i64 + %r = insertelement <2 x i64> undef, i64 %z, i32 1 + ret <2 x i64> %r +} + +define <2 x i64> @extract3_i16_zext_insert1_i64_zero(<8 x i16> %x) { +; CHECK-LABEL: extract3_i16_zext_insert1_i64_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: umov w8, v0.h[3] +; CHECK-NEXT: and x8, x8, #0xffff +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[1], x8 +; CHECK-NEXT: ret + %e = extractelement <8 x i16> %x, i32 3 + %z = zext i16 %e to i64 + %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1 + ret <2 x i64> %r +} + ; This would crash because we did not expect to create ; a shuffle for a vector where the source operand is ; not the same size as the result. From 642a1732ef4f120cae6a55725b83bf74c2fa9323 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Tue, 22 Jan 2019 19:07:58 +0000 Subject: [PATCH 026/125] Merging r351788: ------------------------------------------------------------------------ r351788 | kadircet | 2019-01-22 10:10:20 +0100 (Tue, 22 Jan 2019) | 15 lines [clangd] Filter out plugin related flags and move all commandline manipulations into OverlayCDB. Summary: Some projects make use of clang plugins when building, but clangd is not aware of those plugins therefore can't work with the same compile command arguments. There were multiple places clangd performed commandline manipulations, this one also moves them all into OverlayCDB. Reviewers: ilya-biryukov Subscribers: klimek, sammccall, ioeric, MaskRay, jkorous, arphaman, cfe-commits Differential Revision: https://reviews.llvm.org/D56841 ------------------------------------------------------------------------ llvm-svn: 351860 --- clang-tools-extra/clangd/ClangdLSPServer.cpp | 3 +- clang-tools-extra/clangd/ClangdServer.cpp | 13 +----- .../clangd/GlobalCompilationDatabase.cpp | 41 +++++++++++++++++-- .../clangd/GlobalCompilationDatabase.h | 5 ++- clang-tools-extra/clangd/index/Background.cpp | 10 ++--- clang-tools-extra/clangd/index/Background.h | 5 +-- .../unittests/clangd/BackgroundIndexTests.cpp | 24 +++++------ .../unittests/clangd/ClangdTests.cpp | 23 +++++++++++ .../clangd/GlobalCompilationDatabaseTests.cpp | 4 +- 9 files changed, 86 insertions(+), 42 deletions(-) diff --git a/clang-tools-extra/clangd/ClangdLSPServer.cpp b/clang-tools-extra/clangd/ClangdLSPServer.cpp index 80792eddc69a9b..97a5e89c47837b 100644 --- a/clang-tools-extra/clangd/ClangdLSPServer.cpp +++ b/clang-tools-extra/clangd/ClangdLSPServer.cpp @@ -290,7 +290,8 @@ void ClangdLSPServer::onInitialize(const InitializeParams &Params, if (UseDirBasedCDB) BaseCDB = llvm::make_unique( CompileCommandsDir); - CDB.emplace(BaseCDB.get(), Params.initializationOptions.fallbackFlags); + CDB.emplace(BaseCDB.get(), Params.initializationOptions.fallbackFlags, + ClangdServerOpts.ResourceDir); Server.emplace(*CDB, FSProvider, static_cast(*this), ClangdServerOpts); applyConfiguration(Params.initializationOptions.ConfigSettings); diff --git a/clang-tools-extra/clangd/ClangdServer.cpp b/clang-tools-extra/clangd/ClangdServer.cpp index 53a27bdb91d5f4..a4ab622e101ed6 100644 --- a/clang-tools-extra/clangd/ClangdServer.cpp +++ b/clang-tools-extra/clangd/ClangdServer.cpp @@ -38,11 +38,6 @@ namespace clang { namespace clangd { namespace { -std::string getStandardResourceDir() { - static int Dummy; // Just an address in this process. - return CompilerInvocation::GetResourcesPath("clangd", (void *)&Dummy); -} - class RefactoringResultCollector final : public tooling::RefactoringResultConsumer { public: @@ -108,8 +103,6 @@ ClangdServer::ClangdServer(const GlobalCompilationDatabase &CDB, DiagnosticsConsumer &DiagConsumer, const Options &Opts) : CDB(CDB), FSProvider(FSProvider), - ResourceDir(Opts.ResourceDir ? *Opts.ResourceDir - : getStandardResourceDir()), DynamicIdx(Opts.BuildDynamicSymbolIndex ? new FileIndex(Opts.HeavyweightDynamicSymbolIndex) : nullptr), @@ -137,7 +130,7 @@ ClangdServer::ClangdServer(const GlobalCompilationDatabase &CDB, AddIndex(Opts.StaticIndex); if (Opts.BackgroundIndex) { BackgroundIdx = llvm::make_unique( - Context::current().clone(), ResourceDir, FSProvider, CDB, + Context::current().clone(), FSProvider, CDB, BackgroundIndexStorage::createDiskBackedStorageFactory(), Opts.BackgroundIndexRebuildPeriodMs); AddIndex(BackgroundIdx.get()); @@ -462,10 +455,6 @@ tooling::CompileCommand ClangdServer::getCompileCommand(PathRef File) { llvm::Optional C = CDB.getCompileCommand(File); if (!C) // FIXME: Suppress diagnostics? Let the user know? C = CDB.getFallbackCommand(File); - - // Inject the resource dir. - // FIXME: Don't overwrite it if it's already there. - C->CommandLine.push_back("-resource-dir=" + ResourceDir); return std::move(*C); } diff --git a/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp b/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp index c2fff7b20f37bb..8c7aa194a1f8e2 100644 --- a/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp +++ b/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp @@ -9,12 +9,36 @@ #include "GlobalCompilationDatabase.h" #include "Logger.h" +#include "clang/Frontend/CompilerInvocation.h" +#include "clang/Tooling/ArgumentsAdjusters.h" #include "clang/Tooling/CompilationDatabase.h" +#include "llvm/ADT/Optional.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" namespace clang { namespace clangd { +namespace { + +void adjustArguments(tooling::CompileCommand &Cmd, + llvm::StringRef ResourceDir) { + // Strip plugin related command line arguments. Clangd does + // not support plugins currently. Therefore it breaks if + // compiler tries to load plugins. + Cmd.CommandLine = + tooling::getStripPluginsAdjuster()(Cmd.CommandLine, Cmd.Filename); + // Inject the resource dir. + // FIXME: Don't overwrite it if it's already there. + if (!ResourceDir.empty()) + Cmd.CommandLine.push_back(("-resource-dir=" + ResourceDir).str()); +} + +std::string getStandardResourceDir() { + static int Dummy; // Just an address in this process. + return CompilerInvocation::GetResourcesPath("clangd", (void *)&Dummy); +} + +} // namespace static std::string getFallbackClangPath() { static int Dummy; @@ -106,8 +130,11 @@ DirectoryBasedGlobalCompilationDatabase::getCDBForFile( } OverlayCDB::OverlayCDB(const GlobalCompilationDatabase *Base, - std::vector FallbackFlags) - : Base(Base), FallbackFlags(std::move(FallbackFlags)) { + std::vector FallbackFlags, + llvm::Optional ResourceDir) + : Base(Base), ResourceDir(ResourceDir ? std::move(*ResourceDir) + : getStandardResourceDir()), + FallbackFlags(std::move(FallbackFlags)) { if (Base) BaseChanged = Base->watch([this](const std::vector Changes) { OnCommandChanged.broadcast(Changes); @@ -116,16 +143,22 @@ OverlayCDB::OverlayCDB(const GlobalCompilationDatabase *Base, llvm::Optional OverlayCDB::getCompileCommand(PathRef File, ProjectInfo *Project) const { + llvm::Optional Cmd; { std::lock_guard Lock(Mutex); auto It = Commands.find(File); if (It != Commands.end()) { if (Project) Project->SourceRoot = ""; - return It->second; + Cmd = It->second; } } - return Base ? Base->getCompileCommand(File, Project) : None; + if (!Cmd && Base) + Cmd = Base->getCompileCommand(File, Project); + if (!Cmd) + return llvm::None; + adjustArguments(*Cmd, ResourceDir); + return Cmd; } tooling::CompileCommand OverlayCDB::getFallbackCommand(PathRef File) const { diff --git a/clang-tools-extra/clangd/GlobalCompilationDatabase.h b/clang-tools-extra/clangd/GlobalCompilationDatabase.h index 181b1781f73b35..6411fd48ced006 100644 --- a/clang-tools-extra/clangd/GlobalCompilationDatabase.h +++ b/clang-tools-extra/clangd/GlobalCompilationDatabase.h @@ -12,6 +12,7 @@ #include "Function.h" #include "Path.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/StringMap.h" #include #include @@ -98,7 +99,8 @@ class OverlayCDB : public GlobalCompilationDatabase { // Base may be null, in which case no entries are inherited. // FallbackFlags are added to the fallback compile command. OverlayCDB(const GlobalCompilationDatabase *Base, - std::vector FallbackFlags = {}); + std::vector FallbackFlags = {}, + llvm::Optional ResourceDir = llvm::None); llvm::Optional getCompileCommand(PathRef File, ProjectInfo * = nullptr) const override; @@ -113,6 +115,7 @@ class OverlayCDB : public GlobalCompilationDatabase { mutable std::mutex Mutex; llvm::StringMap Commands; /* GUARDED_BY(Mut) */ const GlobalCompilationDatabase *Base; + std::string ResourceDir; std::vector FallbackFlags; CommandChanged::Subscription BaseChanged; }; diff --git a/clang-tools-extra/clangd/index/Background.cpp b/clang-tools-extra/clangd/index/Background.cpp index b9b945c52bdaf4..752d5c2876e000 100644 --- a/clang-tools-extra/clangd/index/Background.cpp +++ b/clang-tools-extra/clangd/index/Background.cpp @@ -127,13 +127,12 @@ llvm::SmallString<128> getAbsolutePath(const tooling::CompileCommand &Cmd) { } // namespace BackgroundIndex::BackgroundIndex( - Context BackgroundContext, llvm::StringRef ResourceDir, - const FileSystemProvider &FSProvider, const GlobalCompilationDatabase &CDB, + Context BackgroundContext, const FileSystemProvider &FSProvider, + const GlobalCompilationDatabase &CDB, BackgroundIndexStorage::Factory IndexStorageFactory, size_t BuildIndexPeriodMs, size_t ThreadPoolSize) - : SwapIndex(llvm::make_unique()), ResourceDir(ResourceDir), - FSProvider(FSProvider), CDB(CDB), - BackgroundContext(std::move(BackgroundContext)), + : SwapIndex(llvm::make_unique()), FSProvider(FSProvider), + CDB(CDB), BackgroundContext(std::move(BackgroundContext)), BuildIndexPeriodMs(BuildIndexPeriodMs), SymbolsUpdatedSinceLastIndex(false), IndexStorageFactory(std::move(IndexStorageFactory)), @@ -230,7 +229,6 @@ void BackgroundIndex::enqueue(tooling::CompileCommand Cmd, BackgroundIndexStorage *Storage) { enqueueTask(Bind( [this, Storage](tooling::CompileCommand Cmd) { - Cmd.CommandLine.push_back("-resource-dir=" + ResourceDir); // We can't use llvm::StringRef here since we are going to // move from Cmd during the call below. const std::string FileName = Cmd.Filename; diff --git a/clang-tools-extra/clangd/index/Background.h b/clang-tools-extra/clangd/index/Background.h index 1a1fee68a571ef..81675be55b5a05 100644 --- a/clang-tools-extra/clangd/index/Background.h +++ b/clang-tools-extra/clangd/index/Background.h @@ -68,9 +68,7 @@ class BackgroundIndex : public SwapIndex { /// If BuildIndexPeriodMs is greater than 0, the symbol index will only be /// rebuilt periodically (one per \p BuildIndexPeriodMs); otherwise, index is /// rebuilt for each indexed file. - // FIXME: resource-dir injection should be hoisted somewhere common. - BackgroundIndex(Context BackgroundContext, llvm::StringRef ResourceDir, - const FileSystemProvider &, + BackgroundIndex(Context BackgroundContext, const FileSystemProvider &, const GlobalCompilationDatabase &CDB, BackgroundIndexStorage::Factory IndexStorageFactory, size_t BuildIndexPeriodMs = 0, @@ -99,7 +97,6 @@ class BackgroundIndex : public SwapIndex { BackgroundIndexStorage *IndexStorage); // configuration - std::string ResourceDir; const FileSystemProvider &FSProvider; const GlobalCompilationDatabase &CDB; Context BackgroundContext; diff --git a/clang-tools-extra/unittests/clangd/BackgroundIndexTests.cpp b/clang-tools-extra/unittests/clangd/BackgroundIndexTests.cpp index 639d35c876ac7f..09a117dbbe7d32 100644 --- a/clang-tools-extra/unittests/clangd/BackgroundIndexTests.cpp +++ b/clang-tools-extra/unittests/clangd/BackgroundIndexTests.cpp @@ -76,7 +76,7 @@ TEST_F(BackgroundIndexTest, NoCrashOnErrorFile) { size_t CacheHits = 0; MemoryShardStorage MSS(Storage, CacheHits); OverlayCDB CDB(/*Base=*/nullptr); - BackgroundIndex Idx(Context::empty(), "", FS, CDB, + BackgroundIndex Idx(Context::empty(), FS, CDB, [&](llvm::StringRef) { return &MSS; }); tooling::CompileCommand Cmd; @@ -113,7 +113,7 @@ TEST_F(BackgroundIndexTest, IndexTwoFiles) { size_t CacheHits = 0; MemoryShardStorage MSS(Storage, CacheHits); OverlayCDB CDB(/*Base=*/nullptr); - BackgroundIndex Idx(Context::empty(), "", FS, CDB, + BackgroundIndex Idx(Context::empty(), FS, CDB, [&](llvm::StringRef) { return &MSS; }); tooling::CompileCommand Cmd; @@ -168,7 +168,7 @@ TEST_F(BackgroundIndexTest, ShardStorageTest) { // Check nothing is loaded from Storage, but A.cc and A.h has been stored. { OverlayCDB CDB(/*Base=*/nullptr); - BackgroundIndex Idx(Context::empty(), "", FS, CDB, + BackgroundIndex Idx(Context::empty(), FS, CDB, [&](llvm::StringRef) { return &MSS; }); CDB.setCompileCommand(testPath("root/A.cc"), Cmd); ASSERT_TRUE(Idx.blockUntilIdleForTest()); @@ -178,7 +178,7 @@ TEST_F(BackgroundIndexTest, ShardStorageTest) { { OverlayCDB CDB(/*Base=*/nullptr); - BackgroundIndex Idx(Context::empty(), "", FS, CDB, + BackgroundIndex Idx(Context::empty(), FS, CDB, [&](llvm::StringRef) { return &MSS; }); CDB.setCompileCommand(testPath("root"), Cmd); ASSERT_TRUE(Idx.blockUntilIdleForTest()); @@ -224,7 +224,7 @@ TEST_F(BackgroundIndexTest, DirectIncludesTest) { Cmd.CommandLine = {"clang++", testPath("root/A.cc")}; { OverlayCDB CDB(/*Base=*/nullptr); - BackgroundIndex Idx(Context::empty(), "", FS, CDB, + BackgroundIndex Idx(Context::empty(), FS, CDB, [&](llvm::StringRef) { return &MSS; }); CDB.setCompileCommand(testPath("root/A.cc"), Cmd); ASSERT_TRUE(Idx.blockUntilIdleForTest()); @@ -262,7 +262,7 @@ TEST_F(BackgroundIndexTest, DISABLED_PeriodicalIndex) { MemoryShardStorage MSS(Storage, CacheHits); OverlayCDB CDB(/*Base=*/nullptr); BackgroundIndex Idx( - Context::empty(), "", FS, CDB, [&](llvm::StringRef) { return &MSS; }, + Context::empty(), FS, CDB, [&](llvm::StringRef) { return &MSS; }, /*BuildIndexPeriodMs=*/500); FS.Files[testPath("root/A.cc")] = "#include \"A.h\""; @@ -310,7 +310,7 @@ TEST_F(BackgroundIndexTest, ShardStorageLoad) { // Check nothing is loaded from Storage, but A.cc and A.h has been stored. { OverlayCDB CDB(/*Base=*/nullptr); - BackgroundIndex Idx(Context::empty(), "", FS, CDB, + BackgroundIndex Idx(Context::empty(), FS, CDB, [&](llvm::StringRef) { return &MSS; }); CDB.setCompileCommand(testPath("root/A.cc"), Cmd); ASSERT_TRUE(Idx.blockUntilIdleForTest()); @@ -325,7 +325,7 @@ TEST_F(BackgroundIndexTest, ShardStorageLoad) { )cpp"; { OverlayCDB CDB(/*Base=*/nullptr); - BackgroundIndex Idx(Context::empty(), "", FS, CDB, + BackgroundIndex Idx(Context::empty(), FS, CDB, [&](llvm::StringRef) { return &MSS; }); CDB.setCompileCommand(testPath("root"), Cmd); ASSERT_TRUE(Idx.blockUntilIdleForTest()); @@ -343,7 +343,7 @@ TEST_F(BackgroundIndexTest, ShardStorageLoad) { { CacheHits = 0; OverlayCDB CDB(/*Base=*/nullptr); - BackgroundIndex Idx(Context::empty(), "", FS, CDB, + BackgroundIndex Idx(Context::empty(), FS, CDB, [&](llvm::StringRef) { return &MSS; }); CDB.setCompileCommand(testPath("root"), Cmd); ASSERT_TRUE(Idx.blockUntilIdleForTest()); @@ -384,7 +384,7 @@ TEST_F(BackgroundIndexTest, ShardStorageEmptyFile) { // Check that A.cc, A.h and B.h has been stored. { OverlayCDB CDB(/*Base=*/nullptr); - BackgroundIndex Idx(Context::empty(), "", FS, CDB, + BackgroundIndex Idx(Context::empty(), FS, CDB, [&](llvm::StringRef) { return &MSS; }); CDB.setCompileCommand(testPath("root/A.cc"), Cmd); ASSERT_TRUE(Idx.blockUntilIdleForTest()); @@ -400,7 +400,7 @@ TEST_F(BackgroundIndexTest, ShardStorageEmptyFile) { { CacheHits = 0; OverlayCDB CDB(/*Base=*/nullptr); - BackgroundIndex Idx(Context::empty(), "", FS, CDB, + BackgroundIndex Idx(Context::empty(), FS, CDB, [&](llvm::StringRef) { return &MSS; }); CDB.setCompileCommand(testPath("root/A.cc"), Cmd); ASSERT_TRUE(Idx.blockUntilIdleForTest()); @@ -416,7 +416,7 @@ TEST_F(BackgroundIndexTest, ShardStorageEmptyFile) { { CacheHits = 0; OverlayCDB CDB(/*Base=*/nullptr); - BackgroundIndex Idx(Context::empty(), "", FS, CDB, + BackgroundIndex Idx(Context::empty(), FS, CDB, [&](llvm::StringRef) { return &MSS; }); CDB.setCompileCommand(testPath("root/A.cc"), Cmd); ASSERT_TRUE(Idx.blockUntilIdleForTest()); diff --git a/clang-tools-extra/unittests/clangd/ClangdTests.cpp b/clang-tools-extra/unittests/clangd/ClangdTests.cpp index c1cc623a05a112..356efe529dde0e 100644 --- a/clang-tools-extra/unittests/clangd/ClangdTests.cpp +++ b/clang-tools-extra/unittests/clangd/ClangdTests.cpp @@ -10,6 +10,7 @@ #include "Annotations.h" #include "ClangdLSPServer.h" #include "ClangdServer.h" +#include "GlobalCompilationDatabase.h" #include "Matchers.h" #include "SyncAPI.h" #include "TestFS.h" @@ -1037,6 +1038,28 @@ TEST(ClangdTests, PreambleVFSStatCache) { } #endif +TEST_F(ClangdVFSTest, FlagsWithPlugins) { + MockFSProvider FS; + ErrorCheckingDiagConsumer DiagConsumer; + MockCompilationDatabase CDB; + CDB.ExtraClangFlags = { + "-Xclang", + "-add-plugin", + "-Xclang", + "random-plugin", + }; + OverlayCDB OCDB(&CDB); + ClangdServer Server(OCDB, FS, DiagConsumer, ClangdServer::optsForTest()); + + auto FooCpp = testPath("foo.cpp"); + const auto SourceContents = "int main() { return 0; }"; + FS.Files[FooCpp] = FooCpp; + Server.addDocument(FooCpp, SourceContents); + auto Result = dumpASTWithoutMemoryLocs(Server, FooCpp); + EXPECT_TRUE(Server.blockUntilIdleForTest()) << "Waiting for diagnostics"; + EXPECT_NE(Result, ""); +} + } // namespace } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/unittests/clangd/GlobalCompilationDatabaseTests.cpp b/clang-tools-extra/unittests/clangd/GlobalCompilationDatabaseTests.cpp index b0052c751c27b9..41c28bf35d5a34 100644 --- a/clang-tools-extra/unittests/clangd/GlobalCompilationDatabaseTests.cpp +++ b/clang-tools-extra/unittests/clangd/GlobalCompilationDatabaseTests.cpp @@ -65,7 +65,7 @@ class OverlayCDBTest : public ::testing::Test { }; TEST_F(OverlayCDBTest, GetCompileCommand) { - OverlayCDB CDB(Base.get()); + OverlayCDB CDB(Base.get(), {}, std::string("")); EXPECT_EQ(CDB.getCompileCommand(testPath("foo.cc")), Base->getCompileCommand(testPath("foo.cc"))); EXPECT_EQ(CDB.getCompileCommand(testPath("missing.cc")), llvm::None); @@ -85,7 +85,7 @@ TEST_F(OverlayCDBTest, GetFallbackCommand) { } TEST_F(OverlayCDBTest, NoBase) { - OverlayCDB CDB(nullptr, {"-DA=6"}); + OverlayCDB CDB(nullptr, {"-DA=6"}, std::string("")); EXPECT_EQ(CDB.getCompileCommand(testPath("bar.cc")), None); auto Override = cmd(testPath("bar.cc"), "-DA=5"); CDB.setCompileCommand(testPath("bar.cc"), Override); From a05d35f16368b81d8e434cabd44a7c5b285306a4 Mon Sep 17 00:00:00 2001 From: Martin Storsjo Date: Tue, 22 Jan 2019 20:32:43 +0000 Subject: [PATCH 027/125] [docs] Amend the release notes with more things I've contributed since the last release Differential Revision: https://reviews.llvm.org/D57005 llvm-svn: 351868 --- lld/docs/ReleaseNotes.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst index dc5df6795d9936..a146bddd543af9 100644 --- a/lld/docs/ReleaseNotes.rst +++ b/lld/docs/ReleaseNotes.rst @@ -66,6 +66,13 @@ MinGW Improvements linked in a different order than with GNU ld, inserting a DWARF exception table terminator too early.) +* lld now supports COFF embedded directives for linking to nondefault + libraries, just like for the normal COFF target. + +* Actually generate a codeview build id signature, even if not creating a PDB. + Previously, the ``--build-id`` option did not actually generate a build id + unless ``--pdb`` was specified. + MachO Improvements ------------------ From 78ecdd7abc4998da861d2bd7c0b9cc4d38c1dae0 Mon Sep 17 00:00:00 2001 From: Martin Storsjo Date: Tue, 22 Jan 2019 20:36:23 +0000 Subject: [PATCH 028/125] [docs] Add release notes for notable things I've contributed since last release Differential Revision: https://reviews.llvm.org/D57003 llvm-svn: 351870 --- llvm/docs/ReleaseNotes.rst | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index db9cf51949520a..cb80459546f540 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -48,6 +48,10 @@ Non-comprehensive list of changes in this release functionality. See `Writing an LLVM Pass `_. +* For MinGW, references to data variables that might need to be imported + from a dll are accessed via a stub, to allow the linker to convert it to + a dllimport if needed. + .. NOTE If you would like to document a larger change, then you can add a subsection about it right here. You can copy the following boilerplate @@ -62,6 +66,13 @@ Changes to the LLVM IR ---------------------- +Changes to the AArch64 Target +----------------------------- + +* Added support for the ``.arch_extension`` assembler directive, just like + on ARM. + + Changes to the ARM Backend -------------------------- From 1e7e3996e3d1ba12a159c378922c360ef874c6c6 Mon Sep 17 00:00:00 2001 From: Martin Storsjo Date: Tue, 22 Jan 2019 20:41:51 +0000 Subject: [PATCH 029/125] [docs] Add release notes for notable things I've contributed since last release Differential Revision: https://reviews.llvm.org/D57004 llvm-svn: 351872 --- clang/docs/ReleaseNotes.rst | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index b4f290e05d7876..28be16677eceef 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -136,6 +136,9 @@ New Compiler Flags instrumenting for gcov-based profiling. See the :doc:`UsersManual` for details. +- When using a custom stack alignment, the ``stackrealign`` attribute is now + implicitly set on the main function. + - ... Deprecated Compiler Flags @@ -179,6 +182,15 @@ Windows Support `dllexport` and `dllimport` attributes not apply to inline member functions. This can significantly reduce compile and link times. See the `User's Manual `_ for more info. + +- For MinGW, ``-municode`` now correctly defines ``UNICODE`` during + preprocessing. + +- For MinGW, clang now produces vtables and RTTI for dllexported classes + without key functions. This fixes building Qt in debug mode. + +- Allow using Address Sanitizer and Undefined Behaviour Sanitizer on MinGW. + - ... From d0fae09cb63df0498dc6cf2581113416a8fe3cde Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Wed, 23 Jan 2019 16:18:07 +0000 Subject: [PATCH 030/125] Merging r351531: ------------------------------------------------------------------------ r351531 | kadircet | 2019-01-18 10:00:31 +0100 (Fri, 18 Jan 2019) | 11 lines [tooling] Add a new argument adjuster for deleting plugin related command line args Summary: Currently both clangd and clang-tidy makes use of this mechanism so putting it into tooling so that all tools can make use of it. Reviewers: ilya-biryukov, sammccall Subscribers: ioeric, cfe-commits Differential Revision: https://reviews.llvm.org/D56856 ------------------------------------------------------------------------ llvm-svn: 351961 --- .../clang/Tooling/ArgumentsAdjusters.h | 4 +++ clang/lib/Tooling/ArgumentsAdjusters.cpp | 22 +++++++++++++ clang/unittests/Tooling/ToolingTest.cpp | 31 +++++++++++++++++++ 3 files changed, 57 insertions(+) diff --git a/clang/include/clang/Tooling/ArgumentsAdjusters.h b/clang/include/clang/Tooling/ArgumentsAdjusters.h index 94ccf1f34e5760..e31839b9a8f733 100644 --- a/clang/include/clang/Tooling/ArgumentsAdjusters.h +++ b/clang/include/clang/Tooling/ArgumentsAdjusters.h @@ -61,6 +61,10 @@ ArgumentsAdjuster getInsertArgumentAdjuster( const char *Extra, ArgumentInsertPosition Pos = ArgumentInsertPosition::END); +/// Gets an argument adjuster which strips plugin related command line +/// arguments. +ArgumentsAdjuster getStripPluginsAdjuster(); + /// Gets an argument adjuster which adjusts the arguments in sequence /// with the \p First adjuster and then with the \p Second one. ArgumentsAdjuster combineAdjusters(ArgumentsAdjuster First, diff --git a/clang/lib/Tooling/ArgumentsAdjusters.cpp b/clang/lib/Tooling/ArgumentsAdjusters.cpp index c8e9c167422e80..f5040b8a09d571 100644 --- a/clang/lib/Tooling/ArgumentsAdjusters.cpp +++ b/clang/lib/Tooling/ArgumentsAdjusters.cpp @@ -108,5 +108,27 @@ ArgumentsAdjuster combineAdjusters(ArgumentsAdjuster First, }; } +ArgumentsAdjuster getStripPluginsAdjuster() { + return [](const CommandLineArguments &Args, StringRef /*unused*/) { + CommandLineArguments AdjustedArgs; + for (size_t I = 0, E = Args.size(); I != E; I++) { + // According to https://clang.llvm.org/docs/ClangPlugins.html + // plugin arguments are in the form: + // -Xclang {-load, -plugin, -plugin-arg-, -add-plugin} + // -Xclang + if (I + 4 < E && Args[I] == "-Xclang" && + (Args[I + 1] == "-load" || Args[I + 1] == "-plugin" || + llvm::StringRef(Args[I + 1]).startswith("-plugin-arg-") || + Args[I + 1] == "-add-plugin") && + Args[I + 2] == "-Xclang") { + I += 3; + continue; + } + AdjustedArgs.push_back(Args[I]); + } + return AdjustedArgs; + }; +} + } // end namespace tooling } // end namespace clang diff --git a/clang/unittests/Tooling/ToolingTest.cpp b/clang/unittests/Tooling/ToolingTest.cpp index 186463f80af733..5813552a6cd323 100644 --- a/clang/unittests/Tooling/ToolingTest.cpp +++ b/clang/unittests/Tooling/ToolingTest.cpp @@ -450,6 +450,37 @@ TEST(ClangToolTest, StripDependencyFileAdjuster) { EXPECT_TRUE(HasFlag("-w")); } +// Check getClangStripPluginsAdjuster strips plugin related args. +TEST(ClangToolTest, StripPluginsAdjuster) { + FixedCompilationDatabase Compilations( + "/", {"-Xclang", "-add-plugin", "-Xclang", "random-plugin"}); + + ClangTool Tool(Compilations, std::vector(1, "/a.cc")); + Tool.mapVirtualFile("/a.cc", "void a() {}"); + + std::unique_ptr Action( + newFrontendActionFactory()); + + CommandLineArguments FinalArgs; + ArgumentsAdjuster CheckFlagsAdjuster = + [&FinalArgs](const CommandLineArguments &Args, StringRef /*unused*/) { + FinalArgs = Args; + return Args; + }; + Tool.clearArgumentsAdjusters(); + Tool.appendArgumentsAdjuster(getStripPluginsAdjuster()); + Tool.appendArgumentsAdjuster(CheckFlagsAdjuster); + Tool.run(Action.get()); + + auto HasFlag = [&FinalArgs](const std::string &Flag) { + return std::find(FinalArgs.begin(), FinalArgs.end(), Flag) != + FinalArgs.end(); + }; + EXPECT_FALSE(HasFlag("-Xclang")); + EXPECT_FALSE(HasFlag("-add-plugin")); + EXPECT_FALSE(HasFlag("-random-plugin")); +} + namespace { /// Find a target name such that looking for it in TargetRegistry by that name /// returns the same target. We expect that there is at least one target From f669262c405bd90294fe9c4c96fb6e7b1482d848 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Wed, 23 Jan 2019 16:21:21 +0000 Subject: [PATCH 031/125] Merging r351738: ------------------------------------------------------------------------ r351738 | kadircet | 2019-01-21 11:10:18 +0100 (Mon, 21 Jan 2019) | 9 lines [clang-tidy] Use getStripPluginsAdjuster Summary: See rC351531 for the introduction of getStripPluginsAdjuster. Reviewers: alexfh Subscribers: xazax.hun, cfe-commits Differential Revision: https://reviews.llvm.org/D56902 ------------------------------------------------------------------------ llvm-svn: 351962 --- clang-tools-extra/clang-tidy/ClangTidy.cpp | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/clang-tools-extra/clang-tidy/ClangTidy.cpp b/clang-tools-extra/clang-tidy/ClangTidy.cpp index 0feda605ea66bf..50ff149e38776c 100644 --- a/clang-tools-extra/clang-tidy/ClangTidy.cpp +++ b/clang-tools-extra/clang-tidy/ClangTidy.cpp @@ -529,24 +529,8 @@ runClangTidy(clang::tidy::ClangTidyContext &Context, return AdjustedArgs; }; - // Remove plugins arguments. - ArgumentsAdjuster PluginArgumentsRemover = - [](const CommandLineArguments &Args, StringRef Filename) { - CommandLineArguments AdjustedArgs; - for (size_t I = 0, E = Args.size(); I < E; ++I) { - if (I + 4 < Args.size() && Args[I] == "-Xclang" && - (Args[I + 1] == "-load" || Args[I + 1] == "-add-plugin" || - StringRef(Args[I + 1]).startswith("-plugin-arg-")) && - Args[I + 2] == "-Xclang") { - I += 3; - } else - AdjustedArgs.push_back(Args[I]); - } - return AdjustedArgs; - }; - Tool.appendArgumentsAdjuster(PerFileExtraArgumentsInserter); - Tool.appendArgumentsAdjuster(PluginArgumentsRemover); + Tool.appendArgumentsAdjuster(getStripPluginsAdjuster()); Context.setEnableProfiling(EnableCheckProfile); Context.setProfileStoragePrefix(StoreCheckProfile); From 40cbc004e802b8b66b1802bb4af9a9513bcda9fe Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Wed, 23 Jan 2019 16:24:00 +0000 Subject: [PATCH 032/125] Merging r351898: ------------------------------------------------------------------------ r351898 | pcc | 2019-01-23 00:51:35 +0100 (Wed, 23 Jan 2019) | 13 lines COFF, ELF: Adjust ICF hash computation to account for self relocations. It turns out that sections in PGO instrumented object files on Windows contain a large number of relocations pointing to themselves. With r347429 this can cause many sections to receive the same hash (usually zero) as a result of a section's hash being xor'ed with itself. This patch causes the COFF and ELF linkers to avoid this problem by adding the hash of the relocated section instead of xor'ing it. On my machine this causes the regressing test case provided by Mozilla to terminate in 2m41s. Differential Revision: https://reviews.llvm.org/D56955 ------------------------------------------------------------------------ llvm-svn: 351963 --- lld/COFF/ICF.cpp | 2 +- lld/ELF/ICF.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lld/COFF/ICF.cpp b/lld/COFF/ICF.cpp index 34ea360fa92584..e8091dcbef8ccb 100644 --- a/lld/COFF/ICF.cpp +++ b/lld/COFF/ICF.cpp @@ -272,7 +272,7 @@ void ICF::run(ArrayRef Vec) { uint32_t Hash = SC->Class[1]; for (Symbol *B : SC->symbols()) if (auto *Sym = dyn_cast_or_null(B)) - Hash ^= Sym->getChunk()->Class[1]; + Hash += Sym->getChunk()->Class[1]; // Set MSB to 1 to avoid collisions with non-hash classs. SC->Class[0] = Hash | (1U << 31); }); diff --git a/lld/ELF/ICF.cpp b/lld/ELF/ICF.cpp index e917ae76a689e0..ba04de017688e2 100644 --- a/lld/ELF/ICF.cpp +++ b/lld/ELF/ICF.cpp @@ -432,7 +432,7 @@ static void combineRelocHashes(InputSection *IS, ArrayRef Rels) { Symbol &S = IS->template getFile()->getRelocTargetSym(Rel); if (auto *D = dyn_cast(&S)) if (auto *RelSec = dyn_cast_or_null(D->Section)) - Hash ^= RelSec->Class[1]; + Hash += RelSec->Class[1]; } // Set MSB to 1 to avoid collisions with non-hash IDs. IS->Class[0] = Hash | (1U << 31); From b00b2b84b23acca31e02820ce64dbffd3b5f9a43 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Wed, 23 Jan 2019 16:25:15 +0000 Subject: [PATCH 033/125] Merging r351899: ------------------------------------------------------------------------ r351899 | pcc | 2019-01-23 00:54:49 +0100 (Wed, 23 Jan 2019) | 26 lines COFF, ELF: ICF: Perform 2 rounds of relocation hash propagation. LLD's performance on PGO instrumented Windows binaries was still not great even with the fix in D56955; out of the 2m41s linker runtime, around 2 minutes were still being spent in ICF. I looked into this more closely and discovered that the vast majority of the runtime was being spent segregating .pdata sections with the following relocation chain: .pdata -> identical .text -> unique PGO counter (not eligible for ICF) This patch causes us to perform 2 rounds of relocation hash propagation, which allows the hash for the .pdata sections to incorporate the identifier from the PGO counter. With that, the amount of time spent in ICF was reduced to about 2 seconds. I also found that the same change led to a significant ICF performance improvement in a regular release build of Chromium's chrome_child.dll, where ICF time was reduced from around 1s to around 700ms. With the same change applied to the ELF linker, median of 100 runs for lld-speed-test/chrome reduced from 4.53s to 4.45s on my machine. I also experimented with increasing the number of propagation rounds further, but I did not observe any further significant performance improvements linking Chromium or Firefox. Differential Revision: https://reviews.llvm.org/D56986 ------------------------------------------------------------------------ llvm-svn: 351964 --- lld/COFF/ICF.cpp | 20 +++++++++++--------- lld/ELF/ICF.cpp | 25 ++++++++++++++----------- 2 files changed, 25 insertions(+), 20 deletions(-) diff --git a/lld/COFF/ICF.cpp b/lld/COFF/ICF.cpp index e8091dcbef8ccb..f6904eb7d24f26 100644 --- a/lld/COFF/ICF.cpp +++ b/lld/COFF/ICF.cpp @@ -263,19 +263,21 @@ void ICF::run(ArrayRef Vec) { // Initially, we use hash values to partition sections. parallelForEach(Chunks, [&](SectionChunk *SC) { - SC->Class[1] = xxHash64(SC->getContents()); + SC->Class[0] = xxHash64(SC->getContents()); }); // Combine the hashes of the sections referenced by each section into its // hash. - parallelForEach(Chunks, [&](SectionChunk *SC) { - uint32_t Hash = SC->Class[1]; - for (Symbol *B : SC->symbols()) - if (auto *Sym = dyn_cast_or_null(B)) - Hash += Sym->getChunk()->Class[1]; - // Set MSB to 1 to avoid collisions with non-hash classs. - SC->Class[0] = Hash | (1U << 31); - }); + for (unsigned Cnt = 0; Cnt != 2; ++Cnt) { + parallelForEach(Chunks, [&](SectionChunk *SC) { + uint32_t Hash = SC->Class[Cnt % 2]; + for (Symbol *B : SC->symbols()) + if (auto *Sym = dyn_cast_or_null(B)) + Hash += Sym->getChunk()->Class[Cnt % 2]; + // Set MSB to 1 to avoid collisions with non-hash classs. + SC->Class[(Cnt + 1) % 2] = Hash | (1U << 31); + }); + } // From now on, sections in Chunks are ordered so that sections in // the same group are consecutive in the vector. diff --git a/lld/ELF/ICF.cpp b/lld/ELF/ICF.cpp index ba04de017688e2..d08ac73ded80e8 100644 --- a/lld/ELF/ICF.cpp +++ b/lld/ELF/ICF.cpp @@ -426,16 +426,17 @@ void ICF::forEachClass(llvm::function_ref Fn) { // Combine the hashes of the sections referenced by the given section into its // hash. template -static void combineRelocHashes(InputSection *IS, ArrayRef Rels) { - uint32_t Hash = IS->Class[1]; +static void combineRelocHashes(unsigned Cnt, InputSection *IS, + ArrayRef Rels) { + uint32_t Hash = IS->Class[Cnt % 2]; for (RelTy Rel : Rels) { Symbol &S = IS->template getFile()->getRelocTargetSym(Rel); if (auto *D = dyn_cast(&S)) if (auto *RelSec = dyn_cast_or_null(D->Section)) - Hash += RelSec->Class[1]; + Hash += RelSec->Class[Cnt % 2]; } // Set MSB to 1 to avoid collisions with non-hash IDs. - IS->Class[0] = Hash | (1U << 31); + IS->Class[(Cnt + 1) % 2] = Hash | (1U << 31); } static void print(const Twine &S) { @@ -453,15 +454,17 @@ template void ICF::run() { // Initially, we use hash values to partition sections. parallelForEach(Sections, [&](InputSection *S) { - S->Class[1] = xxHash64(S->data()); + S->Class[0] = xxHash64(S->data()); }); - parallelForEach(Sections, [&](InputSection *S) { - if (S->AreRelocsRela) - combineRelocHashes(S, S->template relas()); - else - combineRelocHashes(S, S->template rels()); - }); + for (unsigned Cnt = 0; Cnt != 2; ++Cnt) { + parallelForEach(Sections, [&](InputSection *S) { + if (S->AreRelocsRela) + combineRelocHashes(Cnt, S, S->template relas()); + else + combineRelocHashes(Cnt, S, S->template rels()); + }); + } // From now on, sections in Sections vector are ordered so that sections // in the same equivalence class are consecutive in the vector. From 17c9824ff19a56a1b07eb97574e15cf2084b145b Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Wed, 23 Jan 2019 21:11:36 +0000 Subject: [PATCH 034/125] Merging r351670: ------------------------------------------------------------------------ r351670 | ericwf | 2019-01-20 02:21:35 +0100 (Sun, 20 Jan 2019) | 7 lines Fix aligned allocation availability XFAILs after D56445. D56445 bumped the minimum Mac OS X version required for aligned allocation from 10.13 to 10.14. This caused libc++ tests depending on the old value to break. This patch updates the XFAILs for those tests to include 10.13. ------------------------------------------------------------------------ llvm-svn: 351980 --- .../new.delete.array/delete_align_val_t_replace.pass.cpp | 8 +++++--- .../new.delete/new.delete.array/new_align_val_t.pass.cpp | 8 +++++--- .../new.delete.array/new_align_val_t_nothrow.pass.cpp | 8 +++++--- .../new_align_val_t_nothrow_replace.pass.cpp | 8 +++++--- .../new.delete.single/delete_align_val_t_replace.pass.cpp | 8 +++++--- .../new.delete/new.delete.single/new_align_val_t.pass.cpp | 8 +++++--- .../new.delete.single/new_align_val_t_nothrow.pass.cpp | 8 +++++--- .../new_align_val_t_nothrow_replace.pass.cpp | 8 +++++--- 8 files changed, 40 insertions(+), 24 deletions(-) diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/delete_align_val_t_replace.pass.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/delete_align_val_t_replace.pass.cpp index 4dd9390c4fdcfa..bd20e090637d0c 100644 --- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/delete_align_val_t_replace.pass.cpp +++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/delete_align_val_t_replace.pass.cpp @@ -17,9 +17,11 @@ // None of the current GCC compilers support this. // UNSUPPORTED: gcc-5, gcc-6 -// Aligned allocation was not provided before macosx10.12 and as a result we -// get availability errors when the deployment target is older than macosx10.13. -// However, AppleClang 10 (and older) don't trigger availability errors. +// Aligned allocation was not provided before macosx10.14 and as a result we +// get availability errors when the deployment target is older than macosx10.14. +// However, AppleClang 10 (and older) don't trigger availability errors, and +// Clang < 8.0 doesn't warn for 10.13. +// XFAIL: !(apple-clang-9 || apple-clang-10 || clang-7) && availability=macosx10.13 // XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.12 // XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.11 // XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.10 diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t.pass.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t.pass.cpp index d6194b00aa0255..6b4e1c1924b4a2 100644 --- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t.pass.cpp +++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t.pass.cpp @@ -15,9 +15,11 @@ // FIXME change this to XFAIL. // UNSUPPORTED: no-aligned-allocation && !gcc -// Aligned allocation was not provided before macosx10.12 and as a result we -// get availability errors when the deployment target is older than macosx10.13. -// However, AppleClang 10 (and older) don't trigger availability errors. +// Aligned allocation was not provided before macosx10.14 and as a result we +// get availability errors when the deployment target is older than macosx10.14. +// However, AppleClang 10 (and older) don't trigger availability errors, and +// Clang < 8.0 doesn't warn for 10.13. +// XFAIL: !(apple-clang-9 || apple-clang-10 || clang-7) && availability=macosx10.13 // XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.12 // XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.11 // XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.10 diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t_nothrow.pass.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t_nothrow.pass.cpp index 59878aefd18a20..3188cc587dde92 100644 --- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t_nothrow.pass.cpp +++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t_nothrow.pass.cpp @@ -15,9 +15,11 @@ // FIXME turn this into an XFAIL // UNSUPPORTED: no-aligned-allocation && !gcc -// Aligned allocation was not provided before macosx10.12 and as a result we -// get availability errors when the deployment target is older than macosx10.13. -// However, AppleClang 10 (and older) don't trigger availability errors. +// Aligned allocation was not provided before macosx10.14 and as a result we +// get availability errors when the deployment target is older than macosx10.14. +// However, AppleClang 10 (and older) don't trigger availability errors, and +// Clang < 8.0 doesn't warn for 10.13. +// XFAIL: !(apple-clang-9 || apple-clang-10 || clang-7) && availability=macosx10.13 // XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.12 // XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.11 // XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.10 diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t_nothrow_replace.pass.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t_nothrow_replace.pass.cpp index fc713dbf8ed833..29d8fd06a701e0 100644 --- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t_nothrow_replace.pass.cpp +++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t_nothrow_replace.pass.cpp @@ -10,9 +10,11 @@ // UNSUPPORTED: c++98, c++03, c++11, c++14 // UNSUPPORTED: sanitizer-new-delete -// Aligned allocation was not provided before macosx10.12 and as a result we -// get availability errors when the deployment target is older than macosx10.13. -// However, AppleClang 10 (and older) don't trigger availability errors. +// Aligned allocation was not provided before macosx10.14 and as a result we +// get availability errors when the deployment target is older than macosx10.14. +// However, AppleClang 10 (and older) don't trigger availability errors, and +// Clang < 8.0 doesn't warn for 10.13. +// XFAIL: !(apple-clang-9 || apple-clang-10 || clang-7) && availability=macosx10.13 // XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.12 // XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.11 // XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.10 diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/delete_align_val_t_replace.pass.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/delete_align_val_t_replace.pass.cpp index 19cabcce1edd98..c01e39915ec013 100644 --- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/delete_align_val_t_replace.pass.cpp +++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/delete_align_val_t_replace.pass.cpp @@ -16,9 +16,11 @@ // None of the current GCC compilers support this. // UNSUPPORTED: gcc-5, gcc-6 -// Aligned allocation was not provided before macosx10.12 and as a result we -// get availability errors when the deployment target is older than macosx10.13. -// However, AppleClang 10 (and older) don't trigger availability errors. +// Aligned allocation was not provided before macosx10.14 and as a result we +// get availability errors when the deployment target is older than macosx10.14. +// However, AppleClang 10 (and older) don't trigger availability errors, and +// Clang < 8.0 doesn't warn for 10.13 +// XFAIL: !(apple-clang-9 || apple-clang-10 || clang-7) && availability=macosx10.13 // XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.12 // XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.11 // XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.10 diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t.pass.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t.pass.cpp index 7cf1aca3b9f8ea..8cb40885c466f3 100644 --- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t.pass.cpp +++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t.pass.cpp @@ -9,9 +9,11 @@ // UNSUPPORTED: c++98, c++03, c++11, c++14 -// Aligned allocation was not provided before macosx10.12 and as a result we -// get availability errors when the deployment target is older than macosx10.13. -// However, AppleClang 10 (and older) don't trigger availability errors. +// Aligned allocation was not provided before macosx10.14 and as a result we +// get availability errors when the deployment target is older than macosx10.14. +// However, AppleClang 10 (and older) don't trigger availability errors, and +// Clang < 8.0 doesn't warn for 10.13. +// XFAIL: !(apple-clang-9 || apple-clang-10 || clang-7) && availability=macosx10.13 // XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.12 // XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.11 // XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.10 diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t_nothrow.pass.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t_nothrow.pass.cpp index dd2666e00aad4e..9d7f13bee32889 100644 --- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t_nothrow.pass.cpp +++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t_nothrow.pass.cpp @@ -9,9 +9,11 @@ // UNSUPPORTED: c++98, c++03, c++11, c++14 -// Aligned allocation was not provided before macosx10.12 and as a result we -// get availability errors when the deployment target is older than macosx10.13. -// However, AppleClang 10 (and older) don't trigger availability errors. +// Aligned allocation was not provided before macosx10.14 and as a result we +// get availability errors when the deployment target is older than macosx10.14. +// However, AppleClang 10 (and older) don't trigger availability errors, and +// Clang < 8.0 doesn't warn for 10.13 +// XFAIL: !(apple-clang-9 || apple-clang-10 || clang-7) && availability=macosx10.13 // XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.12 // XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.11 // XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.10 diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t_nothrow_replace.pass.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t_nothrow_replace.pass.cpp index 514a2b8afc8c36..82367d7de093b1 100644 --- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t_nothrow_replace.pass.cpp +++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t_nothrow_replace.pass.cpp @@ -10,9 +10,11 @@ // UNSUPPORTED: c++98, c++03, c++11, c++14 // UNSUPPORTED: sanitizer-new-delete -// Aligned allocation was not provided before macosx10.12 and as a result we -// get availability errors when the deployment target is older than macosx10.13. -// However, AppleClang 10 (and older) don't trigger availability errors. +// Aligned allocation was not provided before macosx10.14 and as a result we +// get availability errors when the deployment target is older than macosx10.14. +// However, AppleClang 10 (and older) don't trigger availability errors, and +// Clang < 8.0 doesn't warn for 10.13 +// XFAIL: !(apple-clang-9 || apple-clang-10 || clang-7) && availability=macosx10.13 // XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.12 // XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.11 // XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.10 From 22c2b2b1a6325db33a3db6c0d307856ec831cc4a Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Thu, 24 Jan 2019 22:07:01 +0000 Subject: [PATCH 035/125] Merging r351930 and r351932: ------------------------------------------------------------------------ r351930 | kbeyls | 2019-01-23 09:18:39 +0100 (Wed, 23 Jan 2019) | 30 lines [SLH] AArch64: correctly pick temporary register to mask SP As part of speculation hardening, the stack pointer gets masked with the taint register (X16) before a function call or before a function return. Since there are no instructions that can directly mask writing to the stack pointer, the stack pointer must first be transferred to another register, where it can be masked, before that value is transferred back to the stack pointer. Before, that temporary register was always picked to be x17, since the ABI allows clobbering x17 on any function call, resulting in the following instruction pattern being inserted before function calls and returns/tail calls: mov x17, sp and x17, x17, x16 mov sp, x17 However, x17 can be live in those locations, for example when the call is an indirect call, using x17 as the target address (blr x17). To fix this, this patch looks for an available register just before the call or terminator instruction and uses that. In the rare case when no register turns out to be available (this situation is only encountered twice across the whole test-suite), just insert a full speculation barrier at the start of the basic block where this occurs. Differential Revision: https://reviews.llvm.org/D56717 ------------------------------------------------------------------------ ------------------------------------------------------------------------ r351932 | kbeyls | 2019-01-23 10:10:12 +0100 (Wed, 23 Jan 2019) | 3 lines [SLH][AArch64] Remove accidentally retained -debug-only line from test. ------------------------------------------------------------------------ llvm-svn: 352115 --- .../AArch64/AArch64SpeculationHardening.cpp | 175 ++++++++++++------ .../AArch64/speculation-hardening-loads.ll | 42 ++--- .../CodeGen/AArch64/speculation-hardening.ll | 60 +++--- .../CodeGen/AArch64/speculation-hardening.mir | 85 +++++++++ 4 files changed, 258 insertions(+), 104 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64SpeculationHardening.cpp b/llvm/lib/Target/AArch64/AArch64SpeculationHardening.cpp index e9699b0367d3d5..50300305abe337 100644 --- a/llvm/lib/Target/AArch64/AArch64SpeculationHardening.cpp +++ b/llvm/lib/Target/AArch64/AArch64SpeculationHardening.cpp @@ -103,6 +103,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/IR/DebugLoc.h" #include "llvm/Pass.h" #include "llvm/Support/CodeGen.h" @@ -146,25 +147,31 @@ class AArch64SpeculationHardening : public MachineFunctionPass { BitVector RegsAlreadyMasked; bool functionUsesHardeningRegister(MachineFunction &MF) const; - bool instrumentControlFlow(MachineBasicBlock &MBB); + bool instrumentControlFlow(MachineBasicBlock &MBB, + bool &UsesFullSpeculationBarrier); bool endsWithCondControlFlow(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, AArch64CC::CondCode &CondCode) const; void insertTrackingCode(MachineBasicBlock &SplitEdgeBB, AArch64CC::CondCode &CondCode, DebugLoc DL) const; - void insertSPToRegTaintPropagation(MachineBasicBlock *MBB, + void insertSPToRegTaintPropagation(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const; - void insertRegToSPTaintPropagation(MachineBasicBlock *MBB, + void insertRegToSPTaintPropagation(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned TmpReg) const; + void insertFullSpeculationBarrier(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + DebugLoc DL) const; bool slhLoads(MachineBasicBlock &MBB); bool makeGPRSpeculationSafe(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineInstr &MI, unsigned Reg); - bool lowerSpeculationSafeValuePseudos(MachineBasicBlock &MBB); + bool lowerSpeculationSafeValuePseudos(MachineBasicBlock &MBB, + bool UsesFullSpeculationBarrier); bool expandSpeculationSafeValue(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI); + MachineBasicBlock::iterator MBBI, + bool UsesFullSpeculationBarrier); bool insertCSDB(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc DL); }; @@ -207,15 +214,19 @@ bool AArch64SpeculationHardening::endsWithCondControlFlow( return true; } +void AArch64SpeculationHardening::insertFullSpeculationBarrier( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + DebugLoc DL) const { + // A full control flow speculation barrier consists of (DSB SYS + ISB) + BuildMI(MBB, MBBI, DL, TII->get(AArch64::DSB)).addImm(0xf); + BuildMI(MBB, MBBI, DL, TII->get(AArch64::ISB)).addImm(0xf); +} + void AArch64SpeculationHardening::insertTrackingCode( MachineBasicBlock &SplitEdgeBB, AArch64CC::CondCode &CondCode, DebugLoc DL) const { if (UseControlFlowSpeculationBarrier) { - // insert full control flow speculation barrier (DSB SYS + ISB) - BuildMI(SplitEdgeBB, SplitEdgeBB.begin(), DL, TII->get(AArch64::ISB)) - .addImm(0xf); - BuildMI(SplitEdgeBB, SplitEdgeBB.begin(), DL, TII->get(AArch64::DSB)) - .addImm(0xf); + insertFullSpeculationBarrier(SplitEdgeBB, SplitEdgeBB.begin(), DL); } else { BuildMI(SplitEdgeBB, SplitEdgeBB.begin(), DL, TII->get(AArch64::CSELXr)) .addDef(MisspeculatingTaintReg) @@ -227,7 +238,7 @@ void AArch64SpeculationHardening::insertTrackingCode( } bool AArch64SpeculationHardening::instrumentControlFlow( - MachineBasicBlock &MBB) { + MachineBasicBlock &MBB, bool &UsesFullSpeculationBarrier) { LLVM_DEBUG(dbgs() << "Instrument control flow tracking on MBB: " << MBB); bool Modified = false; @@ -263,55 +274,105 @@ bool AArch64SpeculationHardening::instrumentControlFlow( } // Perform correct code generation around function calls and before returns. - { - SmallVector ReturnInstructions; - SmallVector CallInstructions; + // The below variables record the return/terminator instructions and the call + // instructions respectively; including which register is available as a + // temporary register just before the recorded instructions. + SmallVector, 4> ReturnInstructions; + SmallVector, 4> CallInstructions; + // if a temporary register is not available for at least one of the + // instructions for which we need to transfer taint to the stack pointer, we + // need to insert a full speculation barrier. + // TmpRegisterNotAvailableEverywhere tracks that condition. + bool TmpRegisterNotAvailableEverywhere = false; + + RegScavenger RS; + RS.enterBasicBlock(MBB); + + for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); I++) { + MachineInstr &MI = *I; + if (!MI.isReturn() && !MI.isCall()) + continue; - for (MachineInstr &MI : MBB) { - if (MI.isReturn()) - ReturnInstructions.push_back(&MI); - else if (MI.isCall()) - CallInstructions.push_back(&MI); - } + // The RegScavenger represents registers available *after* the MI + // instruction pointed to by RS.getCurrentPosition(). + // We need to have a register that is available *before* the MI is executed. + if (I != MBB.begin()) + RS.forward(std::prev(I)); + // FIXME: The below just finds *a* unused register. Maybe code could be + // optimized more if this looks for the register that isn't used for the + // longest time around this place, to enable more scheduling freedom. Not + // sure if that would actually result in a big performance difference + // though. Maybe RegisterScavenger::findSurvivorBackwards has some logic + // already to do this - but it's unclear if that could easily be used here. + unsigned TmpReg = RS.FindUnusedReg(&AArch64::GPR64commonRegClass); + LLVM_DEBUG(dbgs() << "RS finds " + << ((TmpReg == 0) ? "no register " : "register "); + if (TmpReg != 0) dbgs() << printReg(TmpReg, TRI) << " "; + dbgs() << "to be available at MI " << MI); + if (TmpReg == 0) + TmpRegisterNotAvailableEverywhere = true; + if (MI.isReturn()) + ReturnInstructions.push_back({&MI, TmpReg}); + else if (MI.isCall()) + CallInstructions.push_back({&MI, TmpReg}); + } - Modified |= - (ReturnInstructions.size() > 0) || (CallInstructions.size() > 0); + if (TmpRegisterNotAvailableEverywhere) { + // When a temporary register is not available everywhere in this basic + // basic block where a propagate-taint-to-sp operation is needed, just + // emit a full speculation barrier at the start of this basic block, which + // renders the taint/speculation tracking in this basic block unnecessary. + insertFullSpeculationBarrier(MBB, MBB.begin(), + (MBB.begin())->getDebugLoc()); + UsesFullSpeculationBarrier = true; + Modified = true; + } else { + for (auto MI_Reg : ReturnInstructions) { + assert(MI_Reg.second != 0); + LLVM_DEBUG( + dbgs() + << " About to insert Reg to SP taint propagation with temp register " + << printReg(MI_Reg.second, TRI) + << " on instruction: " << *MI_Reg.first); + insertRegToSPTaintPropagation(MBB, MI_Reg.first, MI_Reg.second); + Modified = true; + } - for (MachineInstr *Return : ReturnInstructions) - insertRegToSPTaintPropagation(Return->getParent(), Return, AArch64::X17); - for (MachineInstr *Call : CallInstructions) { + for (auto MI_Reg : CallInstructions) { + assert(MI_Reg.second != 0); + LLVM_DEBUG(dbgs() << " About to insert Reg to SP and back taint " + "propagation with temp register " + << printReg(MI_Reg.second, TRI) + << " around instruction: " << *MI_Reg.first); // Just after the call: - MachineBasicBlock::iterator i = Call; - i++; - insertSPToRegTaintPropagation(Call->getParent(), i); + insertSPToRegTaintPropagation( + MBB, std::next((MachineBasicBlock::iterator)MI_Reg.first)); // Just before the call: - insertRegToSPTaintPropagation(Call->getParent(), Call, AArch64::X17); + insertRegToSPTaintPropagation(MBB, MI_Reg.first, MI_Reg.second); + Modified = true; } } - return Modified; } void AArch64SpeculationHardening::insertSPToRegTaintPropagation( - MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI) const { + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const { // If full control flow speculation barriers are used, emit a control flow // barrier to block potential miss-speculation in flight coming in to this // function. if (UseControlFlowSpeculationBarrier) { - // insert full control flow speculation barrier (DSB SYS + ISB) - BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::DSB)).addImm(0xf); - BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::ISB)).addImm(0xf); + insertFullSpeculationBarrier(MBB, MBBI, DebugLoc()); return; } // CMP SP, #0 === SUBS xzr, SP, #0 - BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::SUBSXri)) + BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::SUBSXri)) .addDef(AArch64::XZR) .addUse(AArch64::SP) .addImm(0) .addImm(0); // no shift // CSETM x16, NE === CSINV x16, xzr, xzr, EQ - BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::CSINVXr)) + BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::CSINVXr)) .addDef(MisspeculatingTaintReg) .addUse(AArch64::XZR) .addUse(AArch64::XZR) @@ -319,7 +380,7 @@ void AArch64SpeculationHardening::insertSPToRegTaintPropagation( } void AArch64SpeculationHardening::insertRegToSPTaintPropagation( - MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned TmpReg) const { // If full control flow speculation barriers are used, there will not be // miss-speculation when returning from this function, and therefore, also @@ -328,19 +389,19 @@ void AArch64SpeculationHardening::insertRegToSPTaintPropagation( return; // mov Xtmp, SP === ADD Xtmp, SP, #0 - BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::ADDXri)) + BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::ADDXri)) .addDef(TmpReg) .addUse(AArch64::SP) .addImm(0) .addImm(0); // no shift // and Xtmp, Xtmp, TaintReg === AND Xtmp, Xtmp, TaintReg, #0 - BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::ANDXrs)) + BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::ANDXrs)) .addDef(TmpReg, RegState::Renamable) .addUse(TmpReg, RegState::Kill | RegState::Renamable) .addUse(MisspeculatingTaintReg, RegState::Kill) .addImm(0); // mov SP, Xtmp === ADD SP, Xtmp, #0 - BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::ADDXri)) + BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::ADDXri)) .addDef(AArch64::SP) .addUse(TmpReg, RegState::Kill) .addImm(0) @@ -484,7 +545,8 @@ bool AArch64SpeculationHardening::slhLoads(MachineBasicBlock &MBB) { /// \brief If MBBI references a pseudo instruction that should be expanded /// here, do the expansion and return true. Otherwise return false. bool AArch64SpeculationHardening::expandSpeculationSafeValue( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) { + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + bool UsesFullSpeculationBarrier) { MachineInstr &MI = *MBBI; unsigned Opcode = MI.getOpcode(); bool Is64Bit = true; @@ -499,7 +561,7 @@ bool AArch64SpeculationHardening::expandSpeculationSafeValue( // Just remove the SpeculationSafe pseudo's if control flow // miss-speculation isn't happening because we're already inserting barriers // to guarantee that. - if (!UseControlFlowSpeculationBarrier) { + if (!UseControlFlowSpeculationBarrier && !UsesFullSpeculationBarrier) { unsigned DstReg = MI.getOperand(0).getReg(); unsigned SrcReg = MI.getOperand(1).getReg(); // Mark this register and all its aliasing registers as needing to be @@ -537,7 +599,7 @@ bool AArch64SpeculationHardening::insertCSDB(MachineBasicBlock &MBB, } bool AArch64SpeculationHardening::lowerSpeculationSafeValuePseudos( - MachineBasicBlock &MBB) { + MachineBasicBlock &MBB, bool UsesFullSpeculationBarrier) { bool Modified = false; RegsNeedingCSDBBeforeUse.reset(); @@ -572,15 +634,16 @@ bool AArch64SpeculationHardening::lowerSpeculationSafeValuePseudos( break; } - if (NeedToEmitBarrier) + if (NeedToEmitBarrier && !UsesFullSpeculationBarrier) Modified |= insertCSDB(MBB, MBBI, DL); - Modified |= expandSpeculationSafeValue(MBB, MBBI); + Modified |= + expandSpeculationSafeValue(MBB, MBBI, UsesFullSpeculationBarrier); MBBI = NMBBI; } - if (RegsNeedingCSDBBeforeUse.any()) + if (RegsNeedingCSDBBeforeUse.any() && !UsesFullSpeculationBarrier) Modified |= insertCSDB(MBB, MBBI, DL); return Modified; @@ -609,7 +672,7 @@ bool AArch64SpeculationHardening::runOnMachineFunction(MachineFunction &MF) { Modified |= slhLoads(MBB); } - // 2.a Add instrumentation code to function entry and exits. + // 2. Add instrumentation code to function entry and exits. LLVM_DEBUG( dbgs() << "***** AArch64SpeculationHardening - track control flow *****\n"); @@ -620,17 +683,15 @@ bool AArch64SpeculationHardening::runOnMachineFunction(MachineFunction &MF) { EntryBlocks.push_back(LPI.LandingPadBlock); for (auto Entry : EntryBlocks) insertSPToRegTaintPropagation( - Entry, Entry->SkipPHIsLabelsAndDebug(Entry->begin())); - - // 2.b Add instrumentation code to every basic block. - for (auto &MBB : MF) - Modified |= instrumentControlFlow(MBB); + *Entry, Entry->SkipPHIsLabelsAndDebug(Entry->begin())); - LLVM_DEBUG(dbgs() << "***** AArch64SpeculationHardening - Lowering " - "SpeculationSafeValue Pseudos *****\n"); - // Step 3: Lower SpeculationSafeValue pseudo instructions. - for (auto &MBB : MF) - Modified |= lowerSpeculationSafeValuePseudos(MBB); + // 3. Add instrumentation code to every basic block. + for (auto &MBB : MF) { + bool UsesFullSpeculationBarrier = false; + Modified |= instrumentControlFlow(MBB, UsesFullSpeculationBarrier); + Modified |= + lowerSpeculationSafeValuePseudos(MBB, UsesFullSpeculationBarrier); + } return Modified; } diff --git a/llvm/test/CodeGen/AArch64/speculation-hardening-loads.ll b/llvm/test/CodeGen/AArch64/speculation-hardening-loads.ll index 0b8f8d31b3162e..e90fb19a522a1f 100644 --- a/llvm/test/CodeGen/AArch64/speculation-hardening-loads.ll +++ b/llvm/test/CodeGen/AArch64/speculation-hardening-loads.ll @@ -11,10 +11,10 @@ entry: ; CHECK-NEXT: and x8, x8, x16 ; CHECK-NEXT: and x1, x1, x16 ; CHECK-NEXT: csdb -; CHECK-NEXT: mov x17, sp -; CHECK-NEXT: and x17, x17, x16 +; CHECK-NEXT: mov [[TMPREG:x[0-9]+]], sp +; CHECK-NEXT: and [[TMPREG]], [[TMPREG]], x16 ; CHECK-NEXT: mov x0, x8 -; CHECK-NEXT: mov sp, x17 +; CHECK-NEXT: mov sp, [[TMPREG]] ; CHECK-NEXT: ret } @@ -29,9 +29,9 @@ entry: ; CHECK-NEXT: and x0, x0, x16 ; CHECK-NEXT: csdb ; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: mov x17, sp -; CHECK-NEXT: and x17, x17, x16 -; CHECK-NEXT: mov sp, x17 +; CHECK-NEXT: mov [[TMPREG:x[0-9]+]], sp +; CHECK-NEXT: and [[TMPREG]], [[TMPREG]], x16 +; CHECK-NEXT: mov sp, [[TMPREG]] ; CHECK-NEXT: ret } @@ -51,12 +51,12 @@ entry: ; CHECK-NEXT: and x8, x8, x16 ; csdb instruction must occur before the add instruction with w8 as operand. ; CHECK-NEXT: csdb -; CHECK-NEXT: mov x17, sp ; CHECK-NEXT: add w9, w1, w8 ; CHECK-NEXT: cmp x8, #0 -; CHECK-NEXT: and x17, x17, x16 ; CHECK-NEXT: csel w0, w1, w9, eq -; CHECK-NEXT: mov sp, x17 +; CHECK-NEXT: mov [[TMPREG:x[0-9]+]], sp +; CHECK-NEXT: and [[TMPREG]], [[TMPREG]], x16 +; CHECK-NEXT: mov sp, [[TMPREG]] ; CHECK-NEXT: ret } @@ -76,12 +76,12 @@ entry: ; CHECK-NEXT: and w8, w8, w16 ; csdb instruction must occur before the add instruction with x8 as operand. ; CHECK-NEXT: csdb -; CHECK-NEXT: mov x17, sp ; CHECK-NEXT: add x9, x1, x8 ; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: and x17, x17, x16 ; CHECK-NEXT: csel x0, x1, x9, eq -; CHECK-NEXT: mov sp, x17 +; CHECK-NEXT: mov [[TMPREG:x[0-9]+]], sp +; CHECK-NEXT: and [[TMPREG]], [[TMPREG]], x16 +; CHECK-NEXT: mov sp, [[TMPREG]] ; CHECK-NEXT: ret } @@ -112,11 +112,11 @@ entry: ; CHECK-NEXT: and x1, x1, x16 ; CHECK-NEXT: csdb ; CHECK-NEXT: ldr d0, [x1] -; CHECK-NEXT: mov x17, sp -; CHECK-NEXT: and x17, x17, x16 ; CHECK-NEXT: mov v0.d[1], v0.d[0] ; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: mov sp, x17 +; CHECK-NEXT: mov [[TMPREG:x[0-9]+]], sp +; CHECK-NEXT: and [[TMPREG]], [[TMPREG]], x16 +; CHECK-NEXT: mov sp, [[TMPREG]] ; CHECK-NEXT: ret } @@ -129,9 +129,9 @@ entry: ; CHECK-NEXT: and x1, x1, x16 ; CHECK-NEXT: csdb ; CHECK-NEXT: ld1 { v0.d }[0], [x1] -; CHECK-NEXT: mov x17, sp -; CHECK-NEXT: and x17, x17, x16 -; CHECK-NEXT: mov sp, x17 +; CHECK-NEXT: mov [[TMPREG:x[0-9]+]], sp +; CHECK-NEXT: and [[TMPREG]], [[TMPREG]], x16 +; CHECK-NEXT: mov sp, [[TMPREG]] ; CHECK-NEXT: ret %0 = load double, double* %b, align 16 %vld1_lane = insertelement <2 x double> , double %0, i32 0 @@ -147,9 +147,9 @@ entry: ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: ldr w8, [sp, #12] ; CHECK-NEXT: add sp, sp, #16 -; CHECK-NEXT: mov x17, sp -; CHECK-NEXT: and x17, x17, x16 -; CHECK-NEXT: mov sp, x17 +; CHECK-NEXT: mov [[TMPREG:x[0-9]+]], sp +; CHECK-NEXT: and [[TMPREG]], [[TMPREG]], x16 +; CHECK-NEXT: mov sp, [[TMPREG]] ; CHECK-NEXT: ret %a = alloca i32, align 4 %val = load volatile i32, i32* %a, align 4 diff --git a/llvm/test/CodeGen/AArch64/speculation-hardening.ll b/llvm/test/CodeGen/AArch64/speculation-hardening.ll index 3535b63c32cc8f..51156f68dec8c9 100644 --- a/llvm/test/CodeGen/AArch64/speculation-hardening.ll +++ b/llvm/test/CodeGen/AArch64/speculation-hardening.ll @@ -1,9 +1,9 @@ -; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,SLH --dump-input-on-failure -; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,NOSLH --dump-input-on-failure -; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,SLH --dump-input-on-failure -; RUN sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,NOSLH --dump-input-on-failure -; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -fast-isel | FileCheck %s --check-prefixes=CHECK,SLH --dump-input-on-failure -; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -fast-isel | FileCheck %s --check-prefixes=CHECK,NOSLH --dump-input-on-failure +; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,SLH,NOGISELSLH --dump-input-on-failure +; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,NOSLH,NOGISELNOSLH --dump-input-on-failure +; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,SLH,GISELSLH --dump-input-on-failure +; RUN sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,NOSLH,GISELNOSLH --dump-input-on-failure +; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -fast-isel | FileCheck %s --check-prefixes=CHECK,SLH,NOGISELSLH --dump-input-on-failure +; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -fast-isel | FileCheck %s --check-prefixes=CHECK,NOSLH,NOGISELNOSLH --dump-input-on-failure define i32 @f(i8* nocapture readonly %p, i32 %i, i32 %N) local_unnamed_addr SLHATTR { ; CHECK-LABEL: f @@ -13,12 +13,12 @@ entry: ; NOSLH-NOT: cmp sp, #0 ; NOSLH-NOT: csetm x16, ne -; SLH: mov x17, sp -; SLH: and x17, x17, x16 -; SLH: mov sp, x17 -; NOSLH-NOT: mov x17, sp -; NOSLH-NOT: and x17, x17, x16 -; NOSLH-NOT: mov sp, x17 +; SLH: mov [[TMPREG:x[0-9]+]], sp +; SLH: and [[TMPREG]], [[TMPREG]], x16 +; SLH: mov sp, [[TMPREG]] +; NOSLH-NOT: mov [[TMPREG:x[0-9]+]], sp +; NOSLH-NOT: and [[TMPREG]], [[TMPREG]], x16 +; NOSLH-NOT: mov sp, [[TMPREG]] %call = tail call i32 @tail_callee(i32 %i) ; SLH: cmp sp, #0 ; SLH: csetm x16, ne @@ -43,29 +43,37 @@ if.then: ; preds = %entry ; NOSLH-NOT: csel x16, x16, xzr, [[COND]] return: ; preds = %entry, %if.then %retval.0 = phi i32 [ %conv, %if.then ], [ 0, %entry ] -; SLH: mov x17, sp -; SLH: and x17, x17, x16 -; SLH: mov sp, x17 -; NOSLH-NOT: mov x17, sp -; NOSLH-NOT: and x17, x17, x16 -; NOSLH-NOT: mov sp, x17 +; SLH: mov [[TMPREG:x[0-9]+]], sp +; SLH: and [[TMPREG]], [[TMPREG]], x16 +; SLH: mov sp, [[TMPREG]] +; NOSLH-NOT: mov [[TMPREG:x[0-9]+]], sp +; NOSLH-NOT: and [[TMPREG]], [[TMPREG]], x16 +; NOSLH-NOT: mov sp, [[TMPREG]] ret i32 %retval.0 } ; Make sure that for a tail call, taint doesn't get put into SP twice. define i32 @tail_caller(i32 %a) local_unnamed_addr SLHATTR { ; CHECK-LABEL: tail_caller: -; SLH: mov x17, sp -; SLH: and x17, x17, x16 -; SLH: mov sp, x17 -; NOSLH-NOT: mov x17, sp -; NOSLH-NOT: and x17, x17, x16 -; NOSLH-NOT: mov sp, x17 +; NOGISELSLH: mov [[TMPREG:x[0-9]+]], sp +; NOGISELSLH: and [[TMPREG]], [[TMPREG]], x16 +; NOGISELSLH: mov sp, [[TMPREG]] +; NOGISELNOSLH-NOT: mov [[TMPREG:x[0-9]+]], sp +; NOGISELNOSLH-NOT: and [[TMPREG]], [[TMPREG]], x16 +; NOGISELNOSLH-NOT: mov sp, [[TMPREG]] +; GISELSLH: mov [[TMPREG:x[0-9]+]], sp +; GISELSLH: and [[TMPREG]], [[TMPREG]], x16 +; GISELSLH: mov sp, [[TMPREG]] +; GISELNOSLH-NOT: mov [[TMPREG:x[0-9]+]], sp +; GISELNOSLH-NOT: and [[TMPREG]], [[TMPREG]], x16 +; GISELNOSLH-NOT: mov sp, [[TMPREG]] ; GlobalISel doesn't optimize tail calls (yet?), so only check that ; cross-call taint register setup code is missing if a tail call was ; actually produced. -; SLH: {{(bl tail_callee[[:space:]] cmp sp, #0)|(b tail_callee)}} -; SLH-NOT: cmp sp, #0 +; NOGISELSLH: b tail_callee +; GISELSLH: bl tail_callee +; GISELSLH: cmp sp, #0 +; SLH-NOT: cmp sp, #0 %call = tail call i32 @tail_callee(i32 %a) ret i32 %call } diff --git a/llvm/test/CodeGen/AArch64/speculation-hardening.mir b/llvm/test/CodeGen/AArch64/speculation-hardening.mir index cf8357d9558b00..5991c4df0407f7 100644 --- a/llvm/test/CodeGen/AArch64/speculation-hardening.mir +++ b/llvm/test/CodeGen/AArch64/speculation-hardening.mir @@ -25,6 +25,22 @@ define void @indirectbranch(i32 %a, i32 %b) speculative_load_hardening { ret void } + ; Also check that a non-default temporary register gets picked correctly to + ; transfer the SP to to and it with the taint register when the default + ; temporary isn't available. + define void @indirect_call_x17(i32 %a, i32 %b) speculative_load_hardening { + ret void + } + @g = common dso_local local_unnamed_addr global i64 (...)* null, align 8 + define void @indirect_tailcall_x17(i32 %a, i32 %b) speculative_load_hardening { + ret void + } + define void @indirect_call_lr(i32 %a, i32 %b) speculative_load_hardening { + ret void + } + define void @RS_cannot_find_available_regs() speculative_load_hardening { + ret void + } ... --- name: nobranch_fallthrough @@ -115,3 +131,72 @@ body: | ; CHECK-NOT: csel RET undef $lr, implicit $x0 ... +--- +name: indirect_call_x17 +tracksRegLiveness: true +body: | + bb.0: + liveins: $x17 + ; CHECK-LABEL: indirect_call_x17 + ; CHECK: mov x0, sp + ; CHECK: and x0, x0, x16 + ; CHECK: mov sp, x0 + ; CHECK: blr x17 + BLR killed renamable $x17, implicit-def dead $lr, implicit $sp + RET undef $lr, implicit undef $w0 +... +--- +name: indirect_tailcall_x17 +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + ; CHECK-LABEL: indirect_tailcall_x17 + ; CHECK: mov x1, sp + ; CHECK: and x1, x1, x16 + ; CHECK: mov sp, x1 + ; CHECK: br x17 + $x8 = ADRP target-flags(aarch64-page) @g + $x17 = LDRXui killed $x8, target-flags(aarch64-pageoff, aarch64-nc) @g + TCRETURNri killed $x17, 0, implicit $sp, implicit $x0 +... +--- +name: indirect_call_lr +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: indirect_call_lr + ; CHECK: mov x1, sp + ; CHECK-NEXT: and x1, x1, x16 + ; CHECK-NEXT: mov sp, x1 + ; CHECK-NEXT: blr x30 + liveins: $x0, $lr + BLR killed renamable $lr, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0 + $w0 = nsw ADDWri killed $w0, 1, 0 + RET undef $lr, implicit $w0 +... +--- +name: RS_cannot_find_available_regs +tracksRegLiveness: true +body: | + bb.0: + ; In the rare case when no free temporary register is available for the + ; propagate taint-to-sp operation, just put in a full speculation barrier + ; (isb+dsb sy) at the start of the basic block. And don't put masks on + ; instructions for the rest of the basic block, since speculation in that + ; basic block was already done, so no need to do masking. + ; CHECK-LABEL: RS_cannot_find_available_regs + ; CHECK: dsb sy + ; CHECK-NEXT: isb + ; CHECK-NEXT: ldr x0, [x0] + ; The following 2 instructions come from propagating the taint encoded in + ; sp at function entry to x16. It turns out the taint info in x16 is not + ; used in this function, so those instructions could be optimized away. An + ; optimization for later if it turns out this situation occurs often enough. + ; CHECK-NEXT: cmp sp, #0 + ; CHECK-NEXT: csetm x16, ne + ; CHECK-NEXT: ret + liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $fp, $lr + $x0 = LDRXui killed $x0, 0 + RET undef $lr, implicit $x0 +... From a008fbedf069ff1caa02f239d6396791b45bcd8f Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Thu, 24 Jan 2019 22:26:09 +0000 Subject: [PATCH 036/125] Merging r352040: ------------------------------------------------------------------------ r352040 | ibiryukov | 2019-01-24 11:41:43 +0100 (Thu, 24 Jan 2019) | 9 lines [CodeComplete] [clangd] Fix crash on ValueDecl with a null type Reviewers: kadircet Reviewed By: kadircet Subscribers: ioeric, MaskRay, jkorous, arphaman, cfe-commits Differential Revision: https://reviews.llvm.org/D57093 ------------------------------------------------------------------------ llvm-svn: 352118 --- clang/lib/Sema/SemaCodeComplete.cpp | 3 ++- clang/test/CodeCompletion/crash-null-type.cpp | 8 ++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) create mode 100644 clang/test/CodeCompletion/crash-null-type.cpp diff --git a/clang/lib/Sema/SemaCodeComplete.cpp b/clang/lib/Sema/SemaCodeComplete.cpp index d9f007a46da5e0..980d7b455ecec8 100644 --- a/clang/lib/Sema/SemaCodeComplete.cpp +++ b/clang/lib/Sema/SemaCodeComplete.cpp @@ -681,7 +681,8 @@ QualType clang::getDeclUsageType(ASTContext &C, const NamedDecl *ND) { T = Property->getType(); else if (const auto *Value = dyn_cast(ND)) T = Value->getType(); - else + + if (T.isNull()) return QualType(); // Dig through references, function pointers, and block pointers to diff --git a/clang/test/CodeCompletion/crash-null-type.cpp b/clang/test/CodeCompletion/crash-null-type.cpp new file mode 100644 index 00000000000000..c5b3d1e79390c3 --- /dev/null +++ b/clang/test/CodeCompletion/crash-null-type.cpp @@ -0,0 +1,8 @@ +void test() { + for (auto [loopVar] : y) { // y has to be unresolved + loopVa + } +} +// RUN: not %clang_cc1 -fsyntax-only -code-completion-at=%s:3:11 %s -o - \ +// RUN: | FileCheck %s +// CHECK: COMPLETION: loopVar From de8481e60799ccaf21a5e94f9ab2007fe43c6f6d Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Thu, 24 Jan 2019 22:26:52 +0000 Subject: [PATCH 037/125] Merging r352040: ------------------------------------------------------------------------ r352040 | ibiryukov | 2019-01-24 11:41:43 +0100 (Thu, 24 Jan 2019) | 9 lines [CodeComplete] [clangd] Fix crash on ValueDecl with a null type Reviewers: kadircet Reviewed By: kadircet Subscribers: ioeric, MaskRay, jkorous, arphaman, cfe-commits Differential Revision: https://reviews.llvm.org/D57093 ------------------------------------------------------------------------ llvm-svn: 352120 --- clang-tools-extra/clangd/ExpectedTypes.cpp | 4 +++- .../unittests/clangd/CodeCompleteTests.cpp | 11 +++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/clang-tools-extra/clangd/ExpectedTypes.cpp b/clang-tools-extra/clangd/ExpectedTypes.cpp index 4bbf0651260b6e..59d9e149162a69 100644 --- a/clang-tools-extra/clangd/ExpectedTypes.cpp +++ b/clang-tools-extra/clangd/ExpectedTypes.cpp @@ -35,8 +35,10 @@ static llvm::Optional typeOfCompletion(const CodeCompletionResult &R) { auto *VD = dyn_cast_or_null(R.Declaration); if (!VD) - return None; // We handle only variables and functions below. + return llvm::None; // We handle only variables and functions below. auto T = VD->getType(); + if (T.isNull()) + return llvm::None; if (auto FuncT = T->getAs()) { // Functions are a special case. They are completed as 'foo()' and we want // to match their return type rather than the function type itself. diff --git a/clang-tools-extra/unittests/clangd/CodeCompleteTests.cpp b/clang-tools-extra/unittests/clangd/CodeCompleteTests.cpp index 02f12eab7dd6a2..f26181decb80fa 100644 --- a/clang-tools-extra/unittests/clangd/CodeCompleteTests.cpp +++ b/clang-tools-extra/unittests/clangd/CodeCompleteTests.cpp @@ -2320,6 +2320,17 @@ TEST(CompletionTest, ObjectiveCMethodTwoArgumentsFromMiddle) { EXPECT_THAT(C, ElementsAre(SnippetSuffix("${1:(unsigned int)}"))); } +TEST(CompletionTest, WorksWithNullType) { + auto R = completions(R"cpp( + int main() { + for (auto [loopVar] : y ) { // y has to be unresolved. + int z = loopV^; + } + } + )cpp"); + EXPECT_THAT(R.Completions, ElementsAre(Named("loopVar"))); +} + } // namespace } // namespace clangd } // namespace clang From 400f8a39269550dc02d98f55974e1ef517c35762 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Thu, 24 Jan 2019 23:44:31 +0000 Subject: [PATCH 038/125] Merging r352102: ------------------------------------------------------------------------ r352102 | rsmith | 2019-01-24 21:52:56 +0100 (Thu, 24 Jan 2019) | 2 lines Add a triple to this test so it passes for targets where alignof(double) really should be equal to alignof(float). ------------------------------------------------------------------------ llvm-svn: 352132 --- clang/test/CXX/dcl.dcl/dcl.attr/dcl.align/p8.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/CXX/dcl.dcl/dcl.attr/dcl.align/p8.cpp b/clang/test/CXX/dcl.dcl/dcl.attr/dcl.align/p8.cpp index 686aac2802adaf..e435bee2c88ee1 100644 --- a/clang/test/CXX/dcl.dcl/dcl.attr/dcl.align/p8.cpp +++ b/clang/test/CXX/dcl.dcl/dcl.attr/dcl.align/p8.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -std=c++11 -verify %s +// RUN: %clang_cc1 -std=c++11 -verify %s -triple x86_64-linux-gnu alignas(double) void f(); // expected-error {{'alignas' attribute only applies to variables, data members and tag types}} alignas(double) unsigned char c[sizeof(double)]; // expected-note {{previous}} From 24479b1157d357b03d8a00ff5088a4ec15b3367c Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Fri, 25 Jan 2019 00:05:14 +0000 Subject: [PATCH 039/125] Merging r352116: ------------------------------------------------------------------------ r352116 | mgorny | 2019-01-24 23:20:47 +0100 (Thu, 24 Jan 2019) | 9 lines [Process/NetBSD] Add missing linkage to -lutil Add missing linkage to fix build failure with LLD: ld: error: undefined symbol: kinfo_getvmmap >>> referenced by NativeProcessNetBSD.cpp >>> NativeProcessNetBSD.cpp.o:(lldb_private::process_netbsd::NativeProcessNetBSD::PopulateMemoryRegionCache()) in archive lib/liblldbPluginProcessNetBSD.a Differential Revision: https://reviews.llvm.org/D57193 ------------------------------------------------------------------------ llvm-svn: 352134 --- lldb/source/Plugins/Process/NetBSD/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/lldb/source/Plugins/Process/NetBSD/CMakeLists.txt b/lldb/source/Plugins/Process/NetBSD/CMakeLists.txt index e131e6d70468bf..586725bb7a566d 100644 --- a/lldb/source/Plugins/Process/NetBSD/CMakeLists.txt +++ b/lldb/source/Plugins/Process/NetBSD/CMakeLists.txt @@ -11,6 +11,7 @@ add_lldb_library(lldbPluginProcessNetBSD PLUGIN lldbUtility lldbPluginProcessPOSIX lldbPluginProcessUtility + util LINK_COMPONENTS Support ) From 22f92b55faf54b64463be3576c38a036a4f7ce2e Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Fri, 25 Jan 2019 00:07:12 +0000 Subject: [PATCH 040/125] Merging r352082: ------------------------------------------------------------------------ r352082 | ruiu | 2019-01-24 20:02:31 +0100 (Thu, 24 Jan 2019) | 15 lines Fix broken export table if .rdata is merged with .text. Previously, we assumed that .rdata is zero-filled, so when writing an COFF import table, we didn't write anything if the data is zero. That assumption was wrong because .rdata can be merged with .text. If .rdata is merged with .text, they are initialized with 0xcc which is a trap instruction. This patch removes that assumption from code. Should be merged to 8.0 branch as this is a regression. Fixes https://bugs.llvm.org/show_bug.cgi?id=39826 Differential Revision: https://reviews.llvm.org/D57168 ------------------------------------------------------------------------ llvm-svn: 352135 --- lld/COFF/DLL.cpp | 16 +++++++++++++++- lld/test/COFF/imports.test | 13 +++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/lld/COFF/DLL.cpp b/lld/COFF/DLL.cpp index 599cc5892a16f5..c06027d3e5c340 100644 --- a/lld/COFF/DLL.cpp +++ b/lld/COFF/DLL.cpp @@ -47,6 +47,7 @@ class HintNameChunk : public Chunk { } void writeTo(uint8_t *Buf) const override { + memset(Buf + OutputSectionOff, 0, getSize()); write16le(Buf + OutputSectionOff, Hint); memcpy(Buf + OutputSectionOff + 2, Name.data(), Name.size()); } @@ -63,7 +64,10 @@ class LookupChunk : public Chunk { size_t getSize() const override { return Config->Wordsize; } void writeTo(uint8_t *Buf) const override { - write32le(Buf + OutputSectionOff, HintName->getRVA()); + if (Config->is64()) + write64le(Buf + OutputSectionOff, HintName->getRVA()); + else + write32le(Buf + OutputSectionOff, HintName->getRVA()); } Chunk *HintName; @@ -99,6 +103,8 @@ class ImportDirectoryChunk : public Chunk { size_t getSize() const override { return sizeof(ImportDirectoryTableEntry); } void writeTo(uint8_t *Buf) const override { + memset(Buf + OutputSectionOff, 0, getSize()); + auto *E = (coff_import_directory_table_entry *)(Buf + OutputSectionOff); E->ImportLookupTableRVA = LookupTab->getRVA(); E->NameRVA = DLLName->getRVA(); @@ -118,6 +124,10 @@ class NullChunk : public Chunk { bool hasData() const override { return false; } size_t getSize() const override { return Size; } + void writeTo(uint8_t *Buf) const override { + memset(Buf + OutputSectionOff, 0, Size); + } + private: size_t Size; }; @@ -160,6 +170,8 @@ class DelayDirectoryChunk : public Chunk { } void writeTo(uint8_t *Buf) const override { + memset(Buf + OutputSectionOff, 0, getSize()); + auto *E = (delay_import_directory_table_entry *)(Buf + OutputSectionOff); E->Attributes = 1; E->Name = DLLName->getRVA(); @@ -392,6 +404,8 @@ class ExportDirectoryChunk : public Chunk { } void writeTo(uint8_t *Buf) const override { + memset(Buf + OutputSectionOff, 0, getSize()); + auto *E = (export_directory_table_entry *)(Buf + OutputSectionOff); E->NameRVA = DLLName->getRVA(); E->OrdinalBase = 0; diff --git a/lld/test/COFF/imports.test b/lld/test/COFF/imports.test index 64f3900a1c2f3e..f54bdfd88dfab1 100644 --- a/lld/test/COFF/imports.test +++ b/lld/test/COFF/imports.test @@ -34,3 +34,16 @@ IMPORT-NEXT: Symbol: ExitProcess (0) IMPORT-NEXT: Symbol: (50) IMPORT-NEXT: Symbol: MessageBoxA (1) IMPORT-NEXT: } + +# RUN: lld-link /out:%t.exe /entry:main /subsystem:console /merge:.rdata=.text \ +# RUN: %p/Inputs/hello64.obj %p/Inputs/std64.lib /include:ExitProcess +# RUN: llvm-readobj -coff-imports %t.exe | FileCheck -check-prefix=MERGE %s + +MERGE: Import { +MERGE-NEXT: Name: std64.dll +MERGE-NEXT: ImportLookupTableRVA: 0x1090 +MERGE-NEXT: ImportAddressTableRVA: 0x10B0 +MERGE-NEXT: Symbol: ExitProcess (0) +MERGE-NEXT: Symbol: (50) +MERGE-NEXT: Symbol: MessageBoxA (1) +MERGE-NEXT: } From 4474451973eac95b5858830bbb172b088b324897 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Fri, 25 Jan 2019 00:12:01 +0000 Subject: [PATCH 041/125] Merging r351485: ------------------------------------------------------------------------ r351485 | vstefanovic | 2019-01-17 22:50:37 +0100 (Thu, 17 Jan 2019) | 10 lines [mips] Emit .reloc R_{MICRO}MIPS_JALR along with j(al)r(c) $25 The callee address is added as an optional operand (MCSymbol) in AdjustInstrPostInstrSelection() and then used by asm printer to insert: '.reloc tmplabel, R_MIPS_JALR, symbol tmplabel:'. Controlled with '-mips-jalr-reloc', default is true. Differential revision: https://reviews.llvm.org/D56694 ------------------------------------------------------------------------ llvm-svn: 352137 --- llvm/lib/CodeGen/MachineInstr.cpp | 5 +- .../Target/Mips/AsmParser/MipsAsmParser.cpp | 5 +- .../Target/Mips/MCTargetDesc/MipsABIInfo.cpp | 7 + .../Target/Mips/MCTargetDesc/MipsBaseInfo.h | 5 +- .../lib/Target/Mips/MicroMips32r6InstrInfo.td | 1 + llvm/lib/Target/Mips/MicroMipsInstrInfo.td | 1 + llvm/lib/Target/Mips/Mips32r6InstrInfo.td | 2 +- llvm/lib/Target/Mips/MipsAsmPrinter.cpp | 41 +++++ llvm/lib/Target/Mips/MipsFastISel.cpp | 13 ++ llvm/lib/Target/Mips/MipsISelLowering.cpp | 53 +++++- llvm/lib/Target/Mips/MipsISelLowering.h | 3 + llvm/lib/Target/Mips/MipsInstrInfo.cpp | 13 +- llvm/lib/Target/Mips/MipsInstrInfo.td | 12 +- llvm/lib/Target/Mips/MipsMCInstLower.cpp | 2 + llvm/test/CodeGen/Mips/cconv/vector.ll | 8 +- llvm/test/CodeGen/Mips/gprestore.ll | 12 +- llvm/test/CodeGen/Mips/llvm-ir/sdiv.ll | 36 ++-- llvm/test/CodeGen/Mips/llvm-ir/srem.ll | 36 ++-- llvm/test/CodeGen/Mips/llvm-ir/udiv.ll | 36 ++-- llvm/test/CodeGen/Mips/llvm-ir/urem.ll | 36 ++-- llvm/test/CodeGen/Mips/long-call-attr.ll | 8 +- llvm/test/CodeGen/Mips/long-call-mcount.ll | 4 +- llvm/test/CodeGen/Mips/msa/f16-llvm-ir.ll | 12 +- llvm/test/CodeGen/Mips/o32_cc_byval.ll | 3 +- llvm/test/CodeGen/Mips/reloc-jalr.ll | 154 ++++++++++++++++++ llvm/test/CodeGen/Mips/shrink-wrapping.ll | 8 +- 26 files changed, 408 insertions(+), 108 deletions(-) create mode 100644 llvm/test/CodeGen/Mips/reloc-jalr.ll diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp index 764a84c7e1327e..dc1ad953e71d46 100644 --- a/llvm/lib/CodeGen/MachineInstr.cpp +++ b/llvm/lib/CodeGen/MachineInstr.cpp @@ -225,12 +225,13 @@ void MachineInstr::addOperand(MachineFunction &MF, const MachineOperand &Op) { } #ifndef NDEBUG - bool isMetaDataOp = Op.getType() == MachineOperand::MO_Metadata; + bool isDebugOp = Op.getType() == MachineOperand::MO_Metadata || + Op.getType() == MachineOperand::MO_MCSymbol; // OpNo now points as the desired insertion point. Unless this is a variadic // instruction, only implicit regs are allowed beyond MCID->getNumOperands(). // RegMask operands go between the explicit and implicit operands. assert((isImpReg || Op.isRegMask() || MCID->isVariadic() || - OpNo < MCID->getNumOperands() || isMetaDataOp) && + OpNo < MCID->getNumOperands() || isDebugOp) && "Trying to add an operand to a machine instr that is already done!"); #endif diff --git a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index d2fed6861477b9..f10d100bfe11ce 100644 --- a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -65,10 +65,7 @@ class MCInstrInfo; } // end namespace llvm -static cl::opt -EmitJalrReloc("mips-jalr-reloc", cl::Hidden, - cl::desc("MIPS: Emit R_{MICRO}MIPS_JALR relocation with jalr"), - cl::init(true)); +extern cl::opt EmitJalrReloc; namespace { diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp index 18d7dd99be34a7..2f2dd4e03c40ae 100644 --- a/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp +++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp @@ -15,6 +15,13 @@ using namespace llvm; +// Note: this option is defined here to be visible from libLLVMMipsAsmParser +// and libLLVMMipsCodeGen +cl::opt +EmitJalrReloc("mips-jalr-reloc", cl::Hidden, + cl::desc("MIPS: Emit R_{MICRO}MIPS_JALR relocation with jalr"), + cl::init(true)); + namespace { static const MCPhysReg O32IntRegs[4] = {Mips::A0, Mips::A1, Mips::A2, Mips::A3}; diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h b/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h index a90db2384c4664..ab8a6753eadcf2 100644 --- a/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h +++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h @@ -89,7 +89,10 @@ namespace MipsII { MO_GOT_HI16, MO_GOT_LO16, MO_CALL_HI16, - MO_CALL_LO16 + MO_CALL_LO16, + + /// Helper operand used to generate R_MIPS_JALR + MO_JALR }; enum { diff --git a/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td b/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td index 814918d25e70d7..c441aa76ad40f7 100644 --- a/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td +++ b/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td @@ -460,6 +460,7 @@ class JALRC16_MMR6_DESC_BASE let isCall = 1; let hasDelaySlot = 0; let Defs = [RA]; + let hasPostISelHook = 1; } class JALRC16_MMR6_DESC : JALRC16_MMR6_DESC_BASE<"jalr", GPR32Opnd>; diff --git a/llvm/lib/Target/Mips/MicroMipsInstrInfo.td b/llvm/lib/Target/Mips/MicroMipsInstrInfo.td index af380a0ec71e0a..ccc4f04bb92d82 100644 --- a/llvm/lib/Target/Mips/MicroMipsInstrInfo.td +++ b/llvm/lib/Target/Mips/MicroMipsInstrInfo.td @@ -426,6 +426,7 @@ class JumpLinkRegMM16 : let isCall = 1; let hasDelaySlot = 1; let Defs = [RA]; + let hasPostISelHook = 1; } // 16-bit Jump Reg diff --git a/llvm/lib/Target/Mips/Mips32r6InstrInfo.td b/llvm/lib/Target/Mips/Mips32r6InstrInfo.td index 2bd0cf2d59a64d..fb239f572ef22c 100644 --- a/llvm/lib/Target/Mips/Mips32r6InstrInfo.td +++ b/llvm/lib/Target/Mips/Mips32r6InstrInfo.td @@ -1105,7 +1105,7 @@ def : MipsPat<(select i32:$cond, immz, i32:$f), // Pseudo instructions let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, hasDelaySlot = 1, - hasExtraSrcRegAllocReq = 1, isCTI = 1, Defs = [AT] in { + hasExtraSrcRegAllocReq = 1, isCTI = 1, Defs = [AT], hasPostISelHook = 1 in { class TailCallRegR6 : PseudoSE<(outs), (ins RO:$rs), [(MipsTailCall RO:$rs)], II_JR>, PseudoInstExpansion<(JumpInst RT:$rt, RO:$rs)>; diff --git a/llvm/lib/Target/Mips/MipsAsmPrinter.cpp b/llvm/lib/Target/Mips/MipsAsmPrinter.cpp index 362431fd42a6c0..a7a748b0840e19 100644 --- a/llvm/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/llvm/lib/Target/Mips/MipsAsmPrinter.cpp @@ -68,6 +68,8 @@ using namespace llvm; #define DEBUG_TYPE "mips-asm-printer" +extern cl::opt EmitJalrReloc; + MipsTargetStreamer &MipsAsmPrinter::getTargetStreamer() const { return static_cast(*OutStreamer->getTargetStreamer()); } @@ -148,6 +150,40 @@ void MipsAsmPrinter::emitPseudoIndirectBranch(MCStreamer &OutStreamer, EmitToStreamer(OutStreamer, TmpInst0); } +// If there is an MO_JALR operand, insert: +// +// .reloc tmplabel, R_{MICRO}MIPS_JALR, symbol +// tmplabel: +// +// This is an optimization hint for the linker which may then replace +// an indirect call with a direct branch. +static void emitDirectiveRelocJalr(const MachineInstr &MI, + MCContext &OutContext, + TargetMachine &TM, + MCStreamer &OutStreamer, + const MipsSubtarget &Subtarget) { + for (unsigned int I = MI.getDesc().getNumOperands(), E = MI.getNumOperands(); + I < E; ++I) { + MachineOperand MO = MI.getOperand(I); + if (MO.isMCSymbol() && (MO.getTargetFlags() & MipsII::MO_JALR)) { + MCSymbol *Callee = MO.getMCSymbol(); + if (Callee && !Callee->getName().empty()) { + MCSymbol *OffsetLabel = OutContext.createTempSymbol(); + const MCExpr *OffsetExpr = + MCSymbolRefExpr::create(OffsetLabel, OutContext); + const MCExpr *CaleeExpr = + MCSymbolRefExpr::create(Callee, OutContext); + OutStreamer.EmitRelocDirective + (*OffsetExpr, + Subtarget.inMicroMipsMode() ? "R_MICROMIPS_JALR" : "R_MIPS_JALR", + CaleeExpr, SMLoc(), *TM.getMCSubtargetInfo()); + OutStreamer.EmitLabel(OffsetLabel); + return; + } + } + } +} + void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) { MipsTargetStreamer &TS = getTargetStreamer(); unsigned Opc = MI->getOpcode(); @@ -207,6 +243,11 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } + if (EmitJalrReloc && + (MI->isReturn() || MI->isCall() || MI->isIndirectBranch())) { + emitDirectiveRelocJalr(*MI, OutContext, TM, *OutStreamer, *Subtarget); + } + MachineBasicBlock::const_instr_iterator I = MI->getIterator(); MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); diff --git a/llvm/lib/Target/Mips/MipsFastISel.cpp b/llvm/lib/Target/Mips/MipsFastISel.cpp index 22ade31a72cdbb..a18416b9e86100 100644 --- a/llvm/lib/Target/Mips/MipsFastISel.cpp +++ b/llvm/lib/Target/Mips/MipsFastISel.cpp @@ -56,6 +56,7 @@ #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSymbol.h" @@ -75,6 +76,8 @@ using namespace llvm; +extern cl::opt EmitJalrReloc; + namespace { class MipsFastISel final : public FastISel { @@ -1551,6 +1554,16 @@ bool MipsFastISel::fastLowerCall(CallLoweringInfo &CLI) { CLI.Call = MIB; + if (EmitJalrReloc && !Subtarget->inMips16Mode()) { + // Attach callee address to the instruction, let asm printer emit + // .reloc R_MIPS_JALR. + if (Symbol) + MIB.addSym(Symbol, MipsII::MO_JALR); + else + MIB.addSym(FuncInfo.MF->getContext().getOrCreateSymbol( + Addr.getGlobalValue()->getName()), MipsII::MO_JALR); + } + // Finish off the call including any return values. return finishCall(CLI, RetVT, NumBytes); } diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp index 8c2a364cdfa951..0f9c075ba0ccf3 100644 --- a/llvm/lib/Target/Mips/MipsISelLowering.cpp +++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp @@ -57,6 +57,7 @@ #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CodeGen.h" @@ -91,6 +92,8 @@ NoZeroDivCheck("mno-check-zero-division", cl::Hidden, cl::desc("MIPS: Don't trap on integer division by zero."), cl::init(false)); +extern cl::opt EmitJalrReloc; + static const MCPhysReg Mips64DPRegs[8] = { Mips::D12_64, Mips::D13_64, Mips::D14_64, Mips::D15_64, Mips::D16_64, Mips::D17_64, Mips::D18_64, Mips::D19_64 @@ -2879,6 +2882,54 @@ getOpndList(SmallVectorImpl &Ops, Ops.push_back(InFlag); } +void MipsTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, + SDNode *Node) const { + switch (MI.getOpcode()) { + default: + return; + case Mips::JALR: + case Mips::JALRPseudo: + case Mips::JALR64: + case Mips::JALR64Pseudo: + case Mips::JALR16_MM: + case Mips::JALRC16_MMR6: + case Mips::TAILCALLREG: + case Mips::TAILCALLREG64: + case Mips::TAILCALLR6REG: + case Mips::TAILCALL64R6REG: + case Mips::TAILCALLREG_MM: + case Mips::TAILCALLREG_MMR6: { + if (!EmitJalrReloc || + Subtarget.inMips16Mode() || + !isPositionIndependent() || + Node->getNumOperands() < 1 || + Node->getOperand(0).getNumOperands() < 2) { + return; + } + // We are after the callee address, set by LowerCall(). + // If added to MI, asm printer will emit .reloc R_MIPS_JALR for the + // symbol. + const SDValue TargetAddr = Node->getOperand(0).getOperand(1); + StringRef Sym; + if (const GlobalAddressSDNode *G = + dyn_cast_or_null(TargetAddr)) { + Sym = G->getGlobal()->getName(); + } + else if (const ExternalSymbolSDNode *ES = + dyn_cast_or_null(TargetAddr)) { + Sym = ES->getSymbol(); + } + + if (Sym.empty()) + return; + + MachineFunction *MF = MI.getParent()->getParent(); + MCSymbol *S = MF->getContext().getOrCreateSymbol(Sym); + MI.addOperand(MachineOperand::CreateMCSymbol(S, MipsII::MO_JALR)); + } + } +} + /// LowerCall - functions arguments are copied from virtual regs to /// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted. SDValue @@ -2930,7 +2981,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // the maximum out going argument area (including the reserved area), and // preallocates the stack space on entrance to the caller. // - // FIXME: We should do the same for efficency and space. + // FIXME: We should do the same for efficiency and space. // Note: The check on the calling convention below must match // MipsABIInfo::GetCalleeAllocdArgSizeInBytes(). diff --git a/llvm/lib/Target/Mips/MipsISelLowering.h b/llvm/lib/Target/Mips/MipsISelLowering.h index e043f133a09fd9..c88633be02b77c 100644 --- a/llvm/lib/Target/Mips/MipsISelLowering.h +++ b/llvm/lib/Target/Mips/MipsISelLowering.h @@ -341,6 +341,9 @@ class TargetRegisterClass; EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override; + void AdjustInstrPostInstrSelection(MachineInstr &MI, + SDNode *Node) const override; + void HandleByVal(CCState *, unsigned &, unsigned) const override; unsigned getRegisterByName(const char* RegName, EVT VT, diff --git a/llvm/lib/Target/Mips/MipsInstrInfo.cpp b/llvm/lib/Target/Mips/MipsInstrInfo.cpp index bfb4c775205de6..e38bef4663a7d3 100644 --- a/llvm/lib/Target/Mips/MipsInstrInfo.cpp +++ b/llvm/lib/Target/Mips/MipsInstrInfo.cpp @@ -653,6 +653,16 @@ MipsInstrInfo::genInstrWithNewOpc(unsigned NewOpc, MIB.addImm(0); + // If I has an MCSymbol operand (used by asm printer, to emit R_MIPS_JALR), + // add it to the new instruction. + for (unsigned J = I->getDesc().getNumOperands(), E = I->getNumOperands(); + J < E; ++J) { + const MachineOperand &MO = I->getOperand(J); + if (MO.isMCSymbol() && (MO.getTargetFlags() & MipsII::MO_JALR)) + MIB.addSym(MO.getMCSymbol(), MipsII::MO_JALR); + } + + } else { for (unsigned J = 0, E = I->getDesc().getNumOperands(); J < E; ++J) { if (BranchWithZeroOperand && (unsigned)ZeroOperandPosition == J) @@ -825,7 +835,8 @@ MipsInstrInfo::getSerializableDirectMachineOperandTargetFlags() const { {MO_GOT_HI16, "mips-got-hi16"}, {MO_GOT_LO16, "mips-got-lo16"}, {MO_CALL_HI16, "mips-call-hi16"}, - {MO_CALL_LO16, "mips-call-lo16"} + {MO_CALL_LO16, "mips-call-lo16"}, + {MO_JALR, "mips-jalr"} }; return makeArrayRef(Flags); } diff --git a/llvm/lib/Target/Mips/MipsInstrInfo.td b/llvm/lib/Target/Mips/MipsInstrInfo.td index d9398b7d6024a5..46721e6cb9e5fa 100644 --- a/llvm/lib/Target/Mips/MipsInstrInfo.td +++ b/llvm/lib/Target/Mips/MipsInstrInfo.td @@ -1623,11 +1623,15 @@ let isCall=1, hasDelaySlot=1, isCTI=1, Defs = [RA] in { class JumpLinkRegPseudo: PseudoSE<(outs), (ins RO:$rs), [(MipsJmpLink RO:$rs)], II_JALR>, - PseudoInstExpansion<(JALRInst RetReg, ResRO:$rs)>; + PseudoInstExpansion<(JALRInst RetReg, ResRO:$rs)> { + let hasPostISelHook = 1; + } class JumpLinkReg: InstSE<(outs RO:$rd), (ins RO:$rs), !strconcat(opstr, "\t$rd, $rs"), - [], II_JALR, FrmR, opstr>; + [], II_JALR, FrmR, opstr> { + let hasPostISelHook = 1; + } class BGEZAL_FT : @@ -1646,7 +1650,9 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, hasDelaySlot = 1, class TailCallReg : PseudoSE<(outs), (ins RO:$rs), [(MipsTailCall RO:$rs)], II_JR>, - PseudoInstExpansion<(JumpInst RO:$rs)>; + PseudoInstExpansion<(JumpInst RO:$rs)> { + let hasPostISelHook = 1; + } } class BAL_BR_Pseudo : diff --git a/llvm/lib/Target/Mips/MipsMCInstLower.cpp b/llvm/lib/Target/Mips/MipsMCInstLower.cpp index 46b37ceae39181..4a7c0ce2be19b8 100644 --- a/llvm/lib/Target/Mips/MipsMCInstLower.cpp +++ b/llvm/lib/Target/Mips/MipsMCInstLower.cpp @@ -117,6 +117,8 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO, case MipsII::MO_CALL_LO16: TargetKind = MipsMCExpr::MEK_CALL_LO16; break; + case MipsII::MO_JALR: + return MCOperand(); } switch (MOTy) { diff --git a/llvm/test/CodeGen/Mips/cconv/vector.ll b/llvm/test/CodeGen/Mips/cconv/vector.ll index 6a07c4f34564de..5c7c3f424c380f 100644 --- a/llvm/test/CodeGen/Mips/cconv/vector.ll +++ b/llvm/test/CodeGen/Mips/cconv/vector.ll @@ -1,12 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=mips-unknown-linux-gnu -mcpu=mips32 -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS32,MIPS32EB -; RUN: llc < %s -mtriple=mips64-unknown-linux-gnu -relocation-model=pic -mcpu=mips64 -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS64,MIPS64EB +; RUN: llc < %s -mtriple=mips64-unknown-linux-gnu -relocation-model=pic -mcpu=mips64 -disable-mips-delay-filler -mips-jalr-reloc=false | FileCheck %s --check-prefixes=ALL,MIPS64,MIPS64EB ; RUN: llc < %s -mtriple=mips-unknown-linux-gnu -mcpu=mips32r5 -mattr=+fp64,+msa -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS32R5,MIPS32R5EB -; RUN: llc < %s -mtriple=mips64-unknown-linux-gnu -relocation-model=pic -mcpu=mips64r5 -mattr=+fp64,+msa -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS64R5,MIPS64R5EB +; RUN: llc < %s -mtriple=mips64-unknown-linux-gnu -relocation-model=pic -mcpu=mips64r5 -mattr=+fp64,+msa -disable-mips-delay-filler -mips-jalr-reloc=false | FileCheck %s --check-prefixes=ALL,MIPS64R5,MIPS64R5EB ; RUN: llc < %s -mtriple=mipsel-unknown-linux-gnu -mcpu=mips32 -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS32,MIPS32EL -; RUN: llc < %s -mtriple=mips64el-unknown-linux-gnu -relocation-model=pic -mcpu=mips64 -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS64,MIPS64EL +; RUN: llc < %s -mtriple=mips64el-unknown-linux-gnu -relocation-model=pic -mcpu=mips64 -disable-mips-delay-filler -mips-jalr-reloc=false | FileCheck %s --check-prefixes=ALL,MIPS64,MIPS64EL ; RUN: llc < %s -mtriple=mipsel-unknown-linux-gnu -mcpu=mips32r5 -mattr=+fp64,+msa -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS32R5,MIPS32R5EL -; RUN: llc < %s -mtriple=mips64el-unknown-linux-gnu -relocation-model=pic -mcpu=mips64r5 -mattr=+fp64,+msa -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS64R5,MIPS64R5EL +; RUN: llc < %s -mtriple=mips64el-unknown-linux-gnu -relocation-model=pic -mcpu=mips64r5 -mattr=+fp64,+msa -disable-mips-delay-filler -mips-jalr-reloc=false | FileCheck %s --check-prefixes=ALL,MIPS64R5,MIPS64R5EL ; Test that vector types are passed through the integer register set whether or ; not MSA is enabled. This is a ABI requirement for MIPS. For GCC compatibility diff --git a/llvm/test/CodeGen/Mips/gprestore.ll b/llvm/test/CodeGen/Mips/gprestore.ll index 88ac047b660954..a1e696b0ac08b8 100644 --- a/llvm/test/CodeGen/Mips/gprestore.ll +++ b/llvm/test/CodeGen/Mips/gprestore.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=mips-mti-linux-gnu < %s -relocation-model=pic | FileCheck %s --check-prefix=O32 -; RUN: llc -mtriple=mips64-mti-linux-gnu < %s -relocation-model=pic | FileCheck %s --check-prefix=N64 -; RUN: llc -mtriple=mips64-mti-linux-gnu < %s -relocation-model=pic -target-abi n32 | FileCheck %s --check-prefix=N32 -; RUN: llc -mtriple=mips-mti-linux-gnu < %s -relocation-model=pic -O3 | FileCheck %s --check-prefix=O3O32 -; RUN: llc -mtriple=mips64-mti-linux-gnu < %s -relocation-model=pic -O3 | FileCheck %s --check-prefix=O3N64 -; RUN: llc -mtriple=mips64-mti-linux-gnu < %s -relocation-model=pic -target-abi n32 -O3 | FileCheck %s --check-prefix=O3N32 +; RUN: llc -mtriple=mips-mti-linux-gnu < %s -relocation-model=pic -mips-jalr-reloc=false | FileCheck %s --check-prefix=O32 +; RUN: llc -mtriple=mips64-mti-linux-gnu < %s -relocation-model=pic -mips-jalr-reloc=false | FileCheck %s --check-prefix=N64 +; RUN: llc -mtriple=mips64-mti-linux-gnu < %s -relocation-model=pic -target-abi n32 -mips-jalr-reloc=false | FileCheck %s --check-prefix=N32 +; RUN: llc -mtriple=mips-mti-linux-gnu < %s -relocation-model=pic -O3 -mips-jalr-reloc=false | FileCheck %s --check-prefix=O3O32 +; RUN: llc -mtriple=mips64-mti-linux-gnu < %s -relocation-model=pic -O3 -mips-jalr-reloc=false | FileCheck %s --check-prefix=O3N64 +; RUN: llc -mtriple=mips64-mti-linux-gnu < %s -relocation-model=pic -target-abi n32 -O3 -mips-jalr-reloc=false | FileCheck %s --check-prefix=O3N32 ; Test that PIC calls use the $25 register. This is an ABI requirement. diff --git a/llvm/test/CodeGen/Mips/llvm-ir/sdiv.ll b/llvm/test/CodeGen/Mips/llvm-ir/sdiv.ll index e54eaa63222a05..af3d4f50f3fe4f 100644 --- a/llvm/test/CodeGen/Mips/llvm-ir/sdiv.ll +++ b/llvm/test/CodeGen/Mips/llvm-ir/sdiv.ll @@ -1,36 +1,38 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=mips -mcpu=mips2 -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefixes=GP32,GP32R0R2 +; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R0R2 ; RUN: llc < %s -mtriple=mips -mcpu=mips32 -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefixes=GP32,GP32R0R2 +; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R0R2 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r2 -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefixes=GP32,GP32R2R5 +; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R2R5 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r3 -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefixes=GP32,GP32R2R5 +; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R2R5 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r5 -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefixes=GP32,GP32R2R5 +; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R2R5 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r6 -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefix=GP32R6 +; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefix=GP32R6 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips3 -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefixes=GP64,GP64R0R1 +; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R0R1 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips4 -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefixes=GP64,GP64R0R1 +; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R0R1 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64 -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefixes=GP64,GP64R0R1 +; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R0R1 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r2 -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefixes=GP64,GP64R2R5 +; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R2R5 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r3 -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefixes=GP64,GP64R2R5 +; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R2R5 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r5 -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefixes=GP64,GP64R2R5 +; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R2R5 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r6 -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefix=GP64R6 +; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefix=GP64R6 -; RUN: llc < %s -mtriple=mips -mcpu=mips32r3 -mattr=+micromips -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefix=MMR3 -; RUN: llc < %s -mtriple=mips -mcpu=mips32r6 -mattr=+micromips -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefix=MMR6 +; RUN: llc < %s -mtriple=mips -mcpu=mips32r3 -mattr=+micromips \ +; RUN: -relocation-model=pic -mips-jalr-reloc=false | \ +; RUN: FileCheck %s -check-prefix=MMR3 +; RUN: llc < %s -mtriple=mips -mcpu=mips32r6 -mattr=+micromips \ +; RUN: -relocation-model=pic -mips-jalr-reloc=false | \ +; RUN: FileCheck %s -check-prefix=MMR6 define signext i1 @sdiv_i1(i1 signext %a, i1 signext %b) { ; GP32-LABEL: sdiv_i1: diff --git a/llvm/test/CodeGen/Mips/llvm-ir/srem.ll b/llvm/test/CodeGen/Mips/llvm-ir/srem.ll index ef0502c85d59bb..487a5b9b6cbc5e 100644 --- a/llvm/test/CodeGen/Mips/llvm-ir/srem.ll +++ b/llvm/test/CodeGen/Mips/llvm-ir/srem.ll @@ -1,36 +1,38 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=mips -mcpu=mips2 -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefixes=GP32,GP32R0R2 +; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R0R2 ; RUN: llc < %s -mtriple=mips -mcpu=mips32 -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefixes=GP32,GP32R0R2 +; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R0R2 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r2 -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefixes=GP32,GP32R2R5 +; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R2R5 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r3 -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefixes=GP32,GP32R2R5 +; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R2R5 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r5 -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefixes=GP32,GP32R2R5 +; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R2R5 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r6 -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefix=GP32R6 +; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefix=GP32R6 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips3 -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefixes=GP64,GP64R0R1 +; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R0R1 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips4 -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefixes=GP64,GP64R0R1 +; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R0R1 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64 -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefixes=GP64,GP64R0R1 +; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R0R1 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r2 -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefixes=GP64,GP64R2R5 +; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R2R5 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r3 -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefixes=GP64,GP64R2R5 +; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R2R5 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r5 -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefixes=GP64,GP64R2R5 +; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R2R5 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r6 -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefix=GP64R6 +; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefix=GP64R6 -; RUN: llc < %s -mtriple=mips -mcpu=mips32r3 -mattr=+micromips -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefix=MMR3 -; RUN: llc < %s -mtriple=mips -mcpu=mips32r6 -mattr=+micromips -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefix=MMR6 +; RUN: llc < %s -mtriple=mips -mcpu=mips32r3 -mattr=+micromips \ +; RUN: -relocation-model=pic -mips-jalr-reloc=false | \ +; RUN: FileCheck %s -check-prefix=MMR3 +; RUN: llc < %s -mtriple=mips -mcpu=mips32r6 -mattr=+micromips \ +; RUN: -relocation-model=pic -mips-jalr-reloc=false | \ +; RUN: FileCheck %s -check-prefix=MMR6 define signext i1 @srem_i1(i1 signext %a, i1 signext %b) { ; GP32-LABEL: srem_i1: diff --git a/llvm/test/CodeGen/Mips/llvm-ir/udiv.ll b/llvm/test/CodeGen/Mips/llvm-ir/udiv.ll index 8694a9f92b65ab..3b7243712024b6 100644 --- a/llvm/test/CodeGen/Mips/llvm-ir/udiv.ll +++ b/llvm/test/CodeGen/Mips/llvm-ir/udiv.ll @@ -1,36 +1,38 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=mips -mcpu=mips2 -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefixes=GP32,GP32R0R1 +; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R0R1 ; RUN: llc < %s -mtriple=mips -mcpu=mips32 -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefixes=GP32,GP32R0R1 +; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R0R1 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r2 -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefixes=GP32,GP32R2R5 +; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R2R5 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r3 -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefixes=GP32,GP32R2R5 +; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R2R5 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r5 -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefixes=GP32,GP32R2R5 +; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R2R5 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r6 -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefix=GP32R6 +; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefix=GP32R6 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips3 -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefixes=GP64,GP64R0R1 +; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R0R1 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips4 -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefixes=GP64,GP64R0R1 +; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R0R1 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64 -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefixes=GP64,GP64R0R2 +; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R0R2 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r2 -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefixes=GP64,GP64R2R5 +; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R2R5 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r3 -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefixes=GP64,GP64R2R5 +; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R2R5 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r5 -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefixes=GP64,GP64R2R5 +; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R2R5 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r6 -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefix=GP64R6 +; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefix=GP64R6 -; RUN: llc < %s -mtriple=mips -mcpu=mips32r3 -mattr=+micromips -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefix=MMR3 -; RUN: llc < %s -mtriple=mips -mcpu=mips32r6 -mattr=+micromips -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefix=MMR6 +; RUN: llc < %s -mtriple=mips -mcpu=mips32r3 -mattr=+micromips \ +; RUN: -relocation-model=pic -mips-jalr-reloc=false | \ +; RUN: FileCheck %s -check-prefix=MMR3 +; RUN: llc < %s -mtriple=mips -mcpu=mips32r6 -mattr=+micromips \ +; RUN: -relocation-model=pic -mips-jalr-reloc=false | \ +; RUN: FileCheck %s -check-prefix=MMR6 define zeroext i1 @udiv_i1(i1 zeroext %a, i1 zeroext %b) { ; GP32-LABEL: udiv_i1: diff --git a/llvm/test/CodeGen/Mips/llvm-ir/urem.ll b/llvm/test/CodeGen/Mips/llvm-ir/urem.ll index b744f706cbf9ce..4105d67da6f1ac 100644 --- a/llvm/test/CodeGen/Mips/llvm-ir/urem.ll +++ b/llvm/test/CodeGen/Mips/llvm-ir/urem.ll @@ -1,36 +1,38 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=mips -mcpu=mips2 -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefixes=GP32,GP32R0R2 +; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R0R2 ; RUN: llc < %s -mtriple=mips -mcpu=mips32 -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefixes=GP32,GP32R0R2 +; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R0R2 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r2 -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefixes=GP32,GP32R2R5 +; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R2R5 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r3 -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefixes=GP32,GP32R2R5 +; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R2R5 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r5 -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefixes=GP32,GP32R2R5 +; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R2R5 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r6 -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefix=GP32R6 +; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefix=GP32R6 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips3 -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefixes=GP64,GP64R0R1 +; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R0R1 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips4 -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefixes=GP64,GP64R0R1 +; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R0R1 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64 -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefixes=GP64,GP64R0R1 +; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R0R1 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r2 -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefixes=GP64,GP64R2R5 +; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R2R5 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r3 -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefixes=GP64,GP64R2R5 +; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R2R5 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r5 -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefixes=GP64,GP64R2R5 +; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R2R5 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r6 -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefix=GP64R6 +; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefix=GP64R6 -; RUN: llc < %s -mtriple=mips -mcpu=mips32r3 -mattr=+micromips -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefix=MMR3 -; RUN: llc < %s -mtriple=mips -mcpu=mips32r6 -mattr=+micromips -relocation-model=pic \ -; RUN: | FileCheck %s -check-prefix=MMR6 +; RUN: llc < %s -mtriple=mips -mcpu=mips32r3 -mattr=+micromips \ +; RUN: -relocation-model=pic -mips-jalr-reloc=false | \ +; RUN: FileCheck %s -check-prefix=MMR3 +; RUN: llc < %s -mtriple=mips -mcpu=mips32r6 -mattr=+micromips \ +; RUN: -relocation-model=pic -mips-jalr-reloc=false | \ +; RUN: FileCheck %s -check-prefix=MMR6 define signext i1 @urem_i1(i1 signext %a, i1 signext %b) { ; GP32-LABEL: urem_i1: diff --git a/llvm/test/CodeGen/Mips/long-call-attr.ll b/llvm/test/CodeGen/Mips/long-call-attr.ll index 5b6ba94aaa3567..beda290a9725b4 100644 --- a/llvm/test/CodeGen/Mips/long-call-attr.ll +++ b/llvm/test/CodeGen/Mips/long-call-attr.ll @@ -1,11 +1,11 @@ ; RUN: llc -march=mips -target-abi o32 --mattr=+long-calls,+noabicalls < %s \ -; RUN: | FileCheck -check-prefix=O32 %s +; RUN: -mips-jalr-reloc=false | FileCheck -check-prefix=O32 %s ; RUN: llc -march=mips -target-abi o32 --mattr=-long-calls,+noabicalls < %s \ -; RUN: | FileCheck -check-prefix=O32 %s +; RUN: -mips-jalr-reloc=false | FileCheck -check-prefix=O32 %s ; RUN: llc -march=mips64 -target-abi n64 --mattr=+long-calls,+noabicalls < %s \ -; RUN: | FileCheck -check-prefix=N64 %s +; RUN: -mips-jalr-reloc=false | FileCheck -check-prefix=N64 %s ; RUN: llc -march=mips64 -target-abi n64 --mattr=-long-calls,+noabicalls < %s \ -; RUN: | FileCheck -check-prefix=N64 %s +; RUN: -mips-jalr-reloc=false | FileCheck -check-prefix=N64 %s declare void @far() #0 diff --git a/llvm/test/CodeGen/Mips/long-call-mcount.ll b/llvm/test/CodeGen/Mips/long-call-mcount.ll index 70a4410d060ba3..580f452526f736 100644 --- a/llvm/test/CodeGen/Mips/long-call-mcount.ll +++ b/llvm/test/CodeGen/Mips/long-call-mcount.ll @@ -1,8 +1,8 @@ ; Check call to mcount in case of long/short call options. ; RUN: llc -march=mips -target-abi o32 --mattr=+long-calls,+noabicalls < %s \ -; RUN: | FileCheck -check-prefixes=CHECK,LONG %s +; RUN: -mips-jalr-reloc=false | FileCheck -check-prefixes=CHECK,LONG %s ; RUN: llc -march=mips -target-abi o32 --mattr=-long-calls,+noabicalls < %s \ -; RUN: | FileCheck -check-prefixes=CHECK,SHORT %s +; RUN: -mips-jalr-reloc=false | FileCheck -check-prefixes=CHECK,SHORT %s ; Function Attrs: noinline nounwind optnone define void @foo() #0 { diff --git a/llvm/test/CodeGen/Mips/msa/f16-llvm-ir.ll b/llvm/test/CodeGen/Mips/msa/f16-llvm-ir.ll index 9105e9249d4f02..8544a75c50a628 100644 --- a/llvm/test/CodeGen/Mips/msa/f16-llvm-ir.ll +++ b/llvm/test/CodeGen/Mips/msa/f16-llvm-ir.ll @@ -1,22 +1,22 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -relocation-model=pic -mtriple=mipsel-- -mcpu=mips32r5 \ -; RUN: -mattr=+fp64,+msa -verify-machineinstrs < %s | FileCheck %s \ +; RUN: -mattr=+fp64,+msa -verify-machineinstrs -mips-jalr-reloc=false < %s | FileCheck %s \ ; RUN: --check-prefixes=ALL,MIPS32,MIPSR5,MIPS32-O32,MIPS32R5-O32 ; RUN: llc -relocation-model=pic -mtriple=mips64el-- -mcpu=mips64r5 \ -; RUN: -mattr=+fp64,+msa -verify-machineinstrs -target-abi n32 < %s | FileCheck %s \ +; RUN: -mattr=+fp64,+msa -verify-machineinstrs -target-abi n32 -mips-jalr-reloc=false < %s | FileCheck %s \ ; RUN: --check-prefixes=ALL,MIPS64,MIPSR5,MIPS64-N32,MIPS64R5-N32 ; RUN: llc -relocation-model=pic -mtriple=mips64el-- -mcpu=mips64r5 \ -; RUN: -mattr=+fp64,+msa -verify-machineinstrs -target-abi n64 < %s | FileCheck %s \ +; RUN: -mattr=+fp64,+msa -verify-machineinstrs -target-abi n64 -mips-jalr-reloc=false < %s | FileCheck %s \ ; RUN: --check-prefixes=ALL,MIPS64,MIPSR5,MIPS64-N64,MIPS64R5-N64 ; RUN: llc -relocation-model=pic -mtriple=mipsel-- -mcpu=mips32r6 \ -; RUN: -mattr=+fp64,+msa -verify-machineinstrs < %s | FileCheck %s \ +; RUN: -mattr=+fp64,+msa -verify-machineinstrs -mips-jalr-reloc=false < %s | FileCheck %s \ ; RUN: --check-prefixes=ALL,MIPS32,MIPSR6,MIPSR6-O32 ; RUN: llc -relocation-model=pic -mtriple=mips64el-- -mcpu=mips64r6 \ -; RUN: -mattr=+fp64,+msa -verify-machineinstrs -target-abi n32 < %s | FileCheck %s \ +; RUN: -mattr=+fp64,+msa -verify-machineinstrs -target-abi n32 -mips-jalr-reloc=false < %s | FileCheck %s \ ; RUN: --check-prefixes=ALL,MIPS64,MIPSR6,MIPS64-N32,MIPSR6-N32 ; RUN: llc -relocation-model=pic -mtriple=mips64el-- -mcpu=mips64r6 \ -; RUN: -mattr=+fp64,+msa -verify-machineinstrs -target-abi n64 < %s | FileCheck %s \ +; RUN: -mattr=+fp64,+msa -verify-machineinstrs -target-abi n64 -mips-jalr-reloc=false < %s | FileCheck %s \ ; RUN: --check-prefixes=ALL,MIPS64,MIPSR6,MIPS64-N64,MIPSR6-N64 diff --git a/llvm/test/CodeGen/Mips/o32_cc_byval.ll b/llvm/test/CodeGen/Mips/o32_cc_byval.ll index 19eb80b79bafe8..d9951ebeaf3a99 100644 --- a/llvm/test/CodeGen/Mips/o32_cc_byval.ll +++ b/llvm/test/CodeGen/Mips/o32_cc_byval.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=mipsel-unknown-linux-gnu -relocation-model=pic < %s | FileCheck %s +; RUN: llc -mtriple=mipsel-unknown-linux-gnu -relocation-model=pic \ +; RUN: -mips-jalr-reloc=false < %s | FileCheck %s %0 = type { i8, i16, i32, i64, double, i32, [4 x i8] } %struct.S1 = type { i8, i16, i32, i64, double, i32 } diff --git a/llvm/test/CodeGen/Mips/reloc-jalr.ll b/llvm/test/CodeGen/Mips/reloc-jalr.ll new file mode 100644 index 00000000000000..f8fd903110045d --- /dev/null +++ b/llvm/test/CodeGen/Mips/reloc-jalr.ll @@ -0,0 +1,154 @@ +; RUN: llc -mtriple=mips-linux-gnu -relocation-model=pic -mips-tail-calls=1 \ +; RUN: -O2 < %s | \ +; RUN: FileCheck %s -check-prefixes=ALL,JALR-32R2,TAILCALL-32R2 + +; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=pic -mips-tail-calls=1 \ +; RUN: -O2 < %s | \ +; RUN: FileCheck %s -check-prefixes=ALL,JALR-64R2,TAILCALL-64R2 + +; RUN: llc -mtriple=mips-linux-gnu -relocation-model=pic -mips-tail-calls=1 \ +; RUN: -O2 -mcpu=mips32r6 -mips-compact-branches=always < %s | \ +; RUN: FileCheck %s -check-prefixes=ALL,JALR-32R6,TAILCALL-32R6 + +; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=pic -mips-tail-calls=1 \ +; RUN: -O2 -mcpu=mips64r6 -mips-compact-branches=always < %s | \ +; RUN: FileCheck %s -check-prefixes=ALL,JALR-64R6,TAILCALL-64R6 + +; RUN: llc -mtriple=mips-linux-gnu -relocation-model=pic -mips-tail-calls=1 \ +; RUN: -O2 -mcpu=mips32r6 -mips-compact-branches=never < %s | \ +; RUN: FileCheck %s -check-prefixes=ALL,JALR-32R2,TAILCALL-32R2 + +; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=pic -mips-tail-calls=1 \ +; RUN: -O2 -mcpu=mips64r6 -mips-compact-branches=never < %s | \ +; RUN: FileCheck %s -check-prefixes=ALL,JALR-64R2,TAILCALL-64R2 + +; RUN: llc -mtriple=mips-linux-gnu -relocation-model=pic -mips-tail-calls=1 \ +; RUN: -O2 -mattr=+micromips -mcpu=mips32r2 < %s | \ +; RUN: FileCheck %s -check-prefixes=ALL,JALR-MM,TAILCALL-MM + +; RUN: llc -mtriple=mips-linux-gnu -relocation-model=pic -mips-tail-calls=1 \ +; RUN: -O2 -mattr=+micromips -mcpu=mips32r6 < %s | \ +; RUN: FileCheck %s -check-prefixes=ALL,JALR-MM + +; RUN: llc -mtriple=mips-linux-gnu -relocation-model=pic \ +; RUN: -O0 < %s | FileCheck %s -check-prefixes=ALL,JALR-32R2 + +; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=pic \ +; RUN: -O0 < %s | FileCheck %s -check-prefixes=ALL,JALR-64R2 + +; RUN: llc -mtriple=mips-linux-gnu -relocation-model=pic \ +; RUN: -O0 -mcpu=mips32r6 -mips-compact-branches=always < %s | \ +; RUN: FileCheck %s -check-prefixes=ALL,JALR-32R6 + +; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=pic \ +; RUN: -O0 -mcpu=mips64r6 -mips-compact-branches=always < %s | \ +; RUN: FileCheck %s -check-prefixes=ALL,JALR-64R6 + +; RUN: llc -mtriple=mips-linux-gnu -relocation-model=pic \ +; RUN: -O0 -mcpu=mips32r6 -mips-compact-branches=never < %s | \ +; RUN: FileCheck %s -check-prefixes=ALL,JALR-32R2 + +; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=pic \ +; RUN: -O0 -mcpu=mips64r6 -mips-compact-branches=never < %s | \ +; RUN: FileCheck %s -check-prefixes=ALL,JALR-64R2 + +; RUN: llc -mtriple=mips-linux-gnu -relocation-model=pic \ +; RUN: -O0 -mattr=+micromips -mcpu=mips32r2 < %s | \ +; RUN: FileCheck %s -check-prefixes=ALL,JALR-MM + +; RUN: llc -mtriple=mips-linux-gnu -relocation-model=pic \ +; RUN: -O0 -mattr=+micromips -mcpu=mips32r6 < %s | \ +; RUN: FileCheck %s -check-prefixes=ALL,JALR-MM + +; RUN: llc -mtriple=mips-linux-gnu -relocation-model=pic -mips-tail-calls=1 \ +; RUN: -O2 -mips-jalr-reloc=false < %s | \ +; RUN: FileCheck %s -check-prefixes=ALL,NORELOC + +; RUN: llc -mtriple=mips-linux-gnu -relocation-model=static -mips-tail-calls=1 \ +; RUN: -O2 < %s | \ +; RUN: FileCheck %s -check-prefixes=ALL,NORELOC + +; RUN: llc -mtriple=mips-linux-gnu -relocation-model=pic -mips-tail-calls=1 \ +; RUN: -O0 -mips-jalr-reloc=false < %s | \ +; RUN: FileCheck %s -check-prefixes=ALL,NORELOC + +; RUN: llc -mtriple=mips-linux-gnu -relocation-model=static -mips-tail-calls=1 \ +; RUN: -O0 < %s | \ +; RUN: FileCheck %s -check-prefixes=ALL,NORELOC + +; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=pic -mips-tail-calls=1 \ +; RUN: -O2 -mips-jalr-reloc=false < %s | \ +; RUN: FileCheck %s -check-prefixes=ALL,NORELOC + +; RUN: llc -mtriple=mips64-linux-gnu -mips-tail-calls=1 \ +; RUN: -O2 -relocation-model=static < %s | \ +; RUN: FileCheck %s -check-prefixes=ALL,NORELOC + +; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=pic \ +; RUN: -O0 -mips-jalr-reloc=false < %s | \ +; RUN: FileCheck %s -check-prefixes=ALL,NORELOC + +; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=static \ +; RUN: -O0 < %s | \ +; RUN: FileCheck %s -check-prefixes=ALL,NORELOC + +define internal void @foo() noinline { +entry: + ret void +} + +define void @checkCall() { +entry: +; ALL-LABEL: checkCall: + call void @foo() +; JALR-32R2: .reloc ([[TMPLABEL:.*]]), R_MIPS_JALR, foo +; JALR-32R2-NEXT: [[TMPLABEL]]: +; JALR-32R2-NEXT: jalr $25 + +; JALR-64R2: .reloc [[TMPLABEL:.*]], R_MIPS_JALR, foo +; JALR-64R2-NEXT: [[TMPLABEL]]: +; JALR-64R2-NEXT: jalr $25 + +; JALR-MM: .reloc ([[TMPLABEL:.*]]), R_MICROMIPS_JALR, foo +; JALR-MM-NEXT: [[TMPLABEL]]: +; JALR-MM-NEXT: jalr $25 + +; JALR-32R6: .reloc ([[TMPLABEL:.*]]), R_MIPS_JALR, foo +; JALR-32R6-NEXT: [[TMPLABEL]]: +; JALR-32R6-NEXT: jalrc $25 + +; JALR-64R6: .reloc [[TMPLABEL:.*]], R_MIPS_JALR, foo +; JALR-64R6-NEXT: [[TMPLABEL]]: +; JALR-64R6-NEXT: jalrc $25 + +; NORELOC-NOT: R_MIPS_JALR + ret void +} + +define void @checkTailCall() { +entry: +; ALL-LABEL: checkTailCall: + tail call void @foo() +; TAILCALL-32R2: .reloc ([[TMPLABEL:.*]]), R_MIPS_JALR, foo +; TAILCALL-32R2-NEXT: [[TMPLABEL]]: +; TAILCALL-32R2-NEXT: jr $25 + +; TAILCALL-64R2: .reloc [[TMPLABEL:.*]], R_MIPS_JALR, foo +; TAILCALL-64R2-NEXT: [[TMPLABEL]]: +; TAILCALL-64R2-NEXT: jr $25 + +; TAILCALL-MM: .reloc ([[TMPLABEL:.*]]), R_MICROMIPS_JALR, foo +; TAILCALL-MM-NEXT: [[TMPLABEL]]: +; TAILCALL-MM-NEXT: jrc $25 + +; TAILCALL-32R6: .reloc ([[TMPLABEL:.*]]), R_MIPS_JALR, foo +; TAILCALL-32R6-NEXT: [[TMPLABEL]]: +; TAILCALL-32R6-NEXT: jrc $25 + +; TAILCALL-64R6: .reloc [[TMPLABEL:.*]], R_MIPS_JALR, foo +; TAILCALL-64R6-NEXT: [[TMPLABEL]]: +; TAILCALL-64R6-NEXT: jrc $25 + +; NORELOC-NOT: R_MIPS_JALR + ret void +} diff --git a/llvm/test/CodeGen/Mips/shrink-wrapping.ll b/llvm/test/CodeGen/Mips/shrink-wrapping.ll index 54ae8699d1c19c..b08d2f1b64678f 100644 --- a/llvm/test/CodeGen/Mips/shrink-wrapping.ll +++ b/llvm/test/CodeGen/Mips/shrink-wrapping.ll @@ -9,11 +9,11 @@ ; RUN: FileCheck %s -check-prefix=NO-SHRINK-WRAP-STATIC ; RUN: llc -mtriple=mips-unknown-linux-gnu -enable-shrink-wrap=true \ -; RUN: -relocation-model=pic < %s | \ +; RUN: -relocation-model=pic -mips-jalr-reloc=false < %s | \ ; RUN: FileCheck %s -check-prefix=SHRINK-WRAP-PIC ; RUN: llc -mtriple=mips-unknown-linux-gnu -enable-shrink-wrap=false \ -; RUN: -relocation-model=pic < %s | \ +; RUN: -relocation-model=pic -mips-jalr-reloc=false < %s | \ ; RUN: FileCheck %s -check-prefix=NO-SHRINK-WRAP-PIC ; RUN: llc -mtriple=mips64-unknown-linux-gnu -enable-shrink-wrap=true \ @@ -25,11 +25,11 @@ ; RUN: FileCheck %s -check-prefix=NO-SHRINK-WRAP-64-STATIC ; RUN: llc -mtriple=mips64-unknown-linux-gnu -enable-shrink-wrap=true \ -; RUN: -relocation-model=pic < %s | \ +; RUN: -relocation-model=pic -mips-jalr-reloc=false < %s | \ ; RUN: FileCheck %s -check-prefix=SHRINK-WRAP-64-PIC ; RUN: llc -mtriple=mips64-unknown-linux-gnu -enable-shrink-wrap=false \ -; RUN: -relocation-model=pic < %s | \ +; RUN: -relocation-model=pic -mips-jalr-reloc=false < %s | \ ; RUN: FileCheck %s -check-prefix=NO-SHRINK-WRAP-64-PIC declare void @f(i32 signext) From b7988f706e2227af6fd540caad7efa34d00ebb6b Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Fri, 25 Jan 2019 00:15:41 +0000 Subject: [PATCH 042/125] Merging r351579: ------------------------------------------------------------------------ r351579 | vstefanovic | 2019-01-18 20:54:51 +0100 (Fri, 18 Jan 2019) | 9 lines [mips] Add '-mrelax-pic-calls', '-mno-relax-pic-calls' These two options enable/disable emission of R_{MICRO}MIPS_JALR fixups along with PIC calls. The linker may then try to turn PIC calls into direct jumps. By default, these fixups do get emitted by the backend, use '-mno-relax-pic-calls' to omit them. Differential revision: https://reviews.llvm.org/D56878 ------------------------------------------------------------------------ llvm-svn: 352139 --- clang/include/clang/Driver/Options.td | 8 ++++++++ clang/lib/Driver/ToolChains/Clang.cpp | 8 ++++++++ clang/test/Driver/mips-features.c | 12 ++++++++++++ 3 files changed, 28 insertions(+) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index f02a7190f5a7a9..d8155a95775c29 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2418,6 +2418,14 @@ def modd_spreg : Flag<["-"], "modd-spreg">, Group, def mno_odd_spreg : Flag<["-"], "mno-odd-spreg">, Group, HelpText<"Disable odd single-precision floating point registers">, Flags<[HelpHidden]>; +def mrelax_pic_calls : Flag<["-"], "mrelax-pic-calls">, + Group, + HelpText<"Try turning PIC calls (j{al}r{c} $25) into direct calls " + "(MIPS only)">, Flags<[HelpHidden]>; +def mno_relax_pic_calls : Flag<["-"], "mno-relax-pic-calls">, + Group, + HelpText<"Do not try turning PIC calls (j{al}r{c} $25) into direct calls " + "(MIPS only)">, Flags<[HelpHidden]>; def mglibc : Flag<["-"], "mglibc">, Group, Flags<[HelpHidden]>; def muclibc : Flag<["-"], "muclibc">, Group, Flags<[HelpHidden]>; def module_file_info : Flag<["-"], "module-file-info">, Flags<[DriverOption,CC1Option]>, Group, diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 75f16898dfaf66..589f53b119217f 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -1716,6 +1716,14 @@ void Clang::AddMIPSTargetArgs(const ArgList &Args, } else D.Diag(diag::warn_target_unsupported_compact_branches) << CPUName; } + + if (Arg *A = Args.getLastArg(options::OPT_mrelax_pic_calls, + options::OPT_mno_relax_pic_calls)) { + if (A->getOption().matches(options::OPT_mno_relax_pic_calls)) { + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back("-mips-jalr-reloc=0"); + } + } } void Clang::AddPPCTargetArgs(const ArgList &Args, diff --git a/clang/test/Driver/mips-features.c b/clang/test/Driver/mips-features.c index f63fb8de55d6c9..19725bc096b5d6 100644 --- a/clang/test/Driver/mips-features.c +++ b/clang/test/Driver/mips-features.c @@ -444,3 +444,15 @@ // RUN: -mginv -mno-ginv 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-NO-GINV %s // CHECK-NO-GINV: "-target-feature" "-ginv" +// +// -mrelax-pic-calls +// RUN: %clang -target mips-unknown-linux-gnu -### -c %s \ +// RUN: -mno-relax-pic-calls -mrelax-pic-calls 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-RELAX-PIC-CALLS %s +// CHECK-RELAX-PIC-CALLS-NOT: "-mllvm" "-mips-jalr-reloc=0" +// +// -mno-relax-pic-calls +// RUN: %clang -target mips-unknown-linux-gnu -### -c %s \ +// RUN: -mrelax-pic-calls -mno-relax-pic-calls 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-NO-RELAX-PIC-CALLS %s +// CHECK-NO-RELAX-PIC-CALLS: "-mllvm" "-mips-jalr-reloc=0" From e25507d4378c85983ec4f55d9f2a606864752950 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Fri, 25 Jan 2019 00:18:40 +0000 Subject: [PATCH 043/125] Merging r352034: ------------------------------------------------------------------------ r352034 | atanasyan | 2019-01-24 10:13:14 +0100 (Thu, 24 Jan 2019) | 18 lines Reapply: [mips] Handle MipsMCExpr sub-expression for the MEK_DTPREL tag This reapplies commit r351987 with a failed test fix. Now the test accepts both DW_OP_GNU_push_tls_address and DW_OP_form_tls_address opcode. Original commit message: ``` This is a fix for a regression introduced by the rL348194 commit. In that change new type (MEK_DTPREL) of MipsMCExpr expression was added, but in some places of the code this type of expression considered as unexpected. This change fixes the bug. The MEK_DTPREL type of expression is used for marking TLS DIEExpr only and contains a regular sub-expression. Where we need to handle the expression, we retrieve the sub-expression and handle it in a common way. ``` ------------------------------------------------------------------------ llvm-svn: 352140 --- .../Mips/MCTargetDesc/MipsMCCodeEmitter.cpp | 5 ++-- .../Target/Mips/MCTargetDesc/MipsMCExpr.cpp | 14 +++++----- llvm/test/DebugInfo/Mips/dwarfdump-tls.ll | 26 +++++++++++++++++-- 3 files changed, 35 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp index f43a4d980f92aa..e3dcbaccfd0806 100644 --- a/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp +++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp @@ -614,8 +614,9 @@ getExprOpValue(const MCExpr *Expr, SmallVectorImpl &Fixups, llvm_unreachable("Unhandled fixup kind!"); break; case MipsMCExpr::MEK_DTPREL: - llvm_unreachable("MEK_DTPREL is used for TLS DIEExpr only"); - break; + // MEK_DTPREL is used for marking TLS DIEExpr only + // and contains a regular sub-expression. + return getExprOpValue(MipsExpr->getSubExpr(), Fixups, STI); case MipsMCExpr::MEK_CALL_HI16: FixupKind = Mips::fixup_Mips_CALL_HI16; break; diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp index 99857e083c6ca8..2d7312725205fc 100644 --- a/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp +++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp @@ -44,8 +44,10 @@ void MipsMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const { llvm_unreachable("MEK_None and MEK_Special are invalid"); break; case MEK_DTPREL: - llvm_unreachable("MEK_DTPREL is used for TLS DIEExpr only"); - break; + // MEK_DTPREL is used for marking TLS DIEExpr only + // and contains a regular sub-expression. + getSubExpr()->print(OS, MAI, true); + return; case MEK_CALL_HI16: OS << "%call_hi"; break; @@ -161,7 +163,9 @@ MipsMCExpr::evaluateAsRelocatableImpl(MCValue &Res, case MEK_Special: llvm_unreachable("MEK_None and MEK_Special are invalid"); case MEK_DTPREL: - llvm_unreachable("MEK_DTPREL is used for TLS DIEExpr only"); + // MEK_DTPREL is used for marking TLS DIEExpr only + // and contains a regular sub-expression. + return getSubExpr()->evaluateAsRelocatable(Res, Layout, Fixup); case MEK_DTPREL_HI: case MEK_DTPREL_LO: case MEK_GOT: @@ -249,9 +253,6 @@ void MipsMCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const { case MEK_Special: llvm_unreachable("MEK_None and MEK_Special are invalid"); break; - case MEK_DTPREL: - llvm_unreachable("MEK_DTPREL is used for TLS DIEExpr only"); - break; case MEK_CALL_HI16: case MEK_CALL_LO16: case MEK_GOT: @@ -274,6 +275,7 @@ void MipsMCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const { if (const MipsMCExpr *E = dyn_cast(getSubExpr())) E->fixELFSymbolsInTLSFixups(Asm); break; + case MEK_DTPREL: case MEK_DTPREL_HI: case MEK_DTPREL_LO: case MEK_TLSLDM: diff --git a/llvm/test/DebugInfo/Mips/dwarfdump-tls.ll b/llvm/test/DebugInfo/Mips/dwarfdump-tls.ll index 6aa429adb417b1..8d8af8c5124a9f 100644 --- a/llvm/test/DebugInfo/Mips/dwarfdump-tls.ll +++ b/llvm/test/DebugInfo/Mips/dwarfdump-tls.ll @@ -1,12 +1,34 @@ -; RUN: llc -O0 -march=mips -mcpu=mips32r2 -filetype=obj -o=%t-32.o < %s +; RUN: llc -O0 -march=mips -mcpu=mips32r2 -filetype=obj \ +; RUN: -split-dwarf-file=foo.dwo -o=%t-32.o < %s ; RUN: llvm-dwarfdump %t-32.o 2>&1 | FileCheck %s -; RUN: llc -O0 -march=mips64 -mcpu=mips64r2 -filetype=obj -o=%t-64.o < %s +; RUN: llc -O0 -march=mips64 -mcpu=mips64r2 -filetype=obj \ +; RUN: -split-dwarf-file=foo.dwo -o=%t-64.o < %s ; RUN: llvm-dwarfdump %t-64.o 2>&1 | FileCheck %s +; RUN: llc -O0 -march=mips -mcpu=mips32r2 -filetype=asm \ +; RUN: -split-dwarf-file=foo.dwo < %s | FileCheck -check-prefix=ASM32 %s +; RUN: llc -O0 -march=mips64 -mcpu=mips64r2 -filetype=asm \ +; RUN: -split-dwarf-file=foo.dwo < %s | FileCheck -check-prefix=ASM64 %s + @x = thread_local global i32 5, align 4, !dbg !0 ; CHECK-NOT: error: failed to compute relocation: R_MIPS_TLS_DTPREL +; CHECK: DW_AT_name ("x") +; CHECK-NEXT: DW_AT_type (0x00000025 "int") +; CHECK-NEXT: DW_AT_external (true) +; CHECK-NEXT: DW_AT_decl_file (0x01) +; CHECK-NEXT: DW_AT_decl_line (1) +; CHECK-NEXT: DW_AT_location (DW_OP_GNU_const_index 0x0, {{DW_OP_GNU_push_tls_address|DW_OP_form_tls_address}}) + +; ASM32: .section .debug_addr +; ASM32-NEXT: $addr_table_base0: +; ASM32-NEXT: .4byte x+32768 + +; ASM64: .section .debug_addr +; ASM64-NEXT: .Laddr_table_base0: +; ASM64-NEXT: .8byte x+32768 + !llvm.dbg.cu = !{!2} !llvm.module.flags = !{!7, !8} From af62a72c724c620cbf704315761359fb2db6071f Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Fri, 25 Jan 2019 17:01:21 +0000 Subject: [PATCH 044/125] Merging r352068: ------------------------------------------------------------------------ r352068 | serge_sans_paille | 2019-01-24 18:56:08 +0100 (Thu, 24 Jan 2019) | 7 lines Partial support of SHT_GROUP without flag This does *not* implement full SHT_GROUP semantic, yet it is a simple step forward: Sections within a group are still considered valid, but they do not behave as specified by the standard in case of garbage collection. Differential Revision: https://reviews.llvm.org/D56437 ------------------------------------------------------------------------ llvm-svn: 352218 --- lld/ELF/InputFiles.cpp | 41 +++++++++++------------ lld/test/ELF/sht-group-empty.test | 55 +++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+), 22 deletions(-) create mode 100644 lld/test/ELF/sht-group-empty.test diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index e4d1dec7cbcbd6..117dd17894eb0b 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -320,17 +320,6 @@ StringRef ObjFile::getShtGroupSignature(ArrayRef Sections, return Signature; } -template -ArrayRef::Elf_Word> -ObjFile::getShtGroupEntries(const Elf_Shdr &Sec) { - const ELFFile &Obj = this->getObj(); - ArrayRef Entries = - CHECK(Obj.template getSectionContentsAsArray(&Sec), this); - if (Entries.empty() || Entries[0] != GRP_COMDAT) - fatal(toString(this) + ": unsupported SHT_GROUP format"); - return Entries.slice(1); -} - template bool ObjFile::shouldMerge(const Elf_Shdr &Sec) { // On a regular link we don't merge sections if -O0 (default is -O1). This // sometimes makes the linker significantly faster, although the output will @@ -440,26 +429,34 @@ void ObjFile::initializeSections( case SHT_GROUP: { // De-duplicate section groups by their signatures. StringRef Signature = getShtGroupSignature(ObjSections, Sec); - bool IsNew = ComdatGroups.insert(CachedHashStringRef(Signature)).second; this->Sections[I] = &InputSection::Discarded; - // We only support GRP_COMDAT type of group. Get the all entries of the - // section here to let getShtGroupEntries to check the type early for us. - ArrayRef Entries = getShtGroupEntries(Sec); - // If it is a new section group, we want to keep group members. - // Group leader sections, which contain indices of group members, are - // discarded because they are useless beyond this point. The only - // exception is the -r option because in order to produce re-linkable - // object files, we want to pass through basically everything. + ArrayRef Entries = + CHECK(Obj.template getSectionContentsAsArray(&Sec), this); + if (Entries.empty()) + fatal(toString(this) + ": empty SHT_GROUP"); + + // The first word of a SHT_GROUP section contains flags. Currently, + // the standard defines only "GRP_COMDAT" flag for the COMDAT group. + // An group with the empty flag doesn't define anything; such sections + // are just skipped. + if (Entries[0] == 0) + continue; + + if (Entries[0] != GRP_COMDAT) + fatal(toString(this) + ": unsupported SHT_GROUP format"); + + bool IsNew = ComdatGroups.insert(CachedHashStringRef(Signature)).second; if (IsNew) { if (Config->Relocatable) this->Sections[I] = createInputSection(Sec); - continue; + continue; } + // Otherwise, discard group members. - for (uint32_t SecIndex : Entries) { + for (uint32_t SecIndex : Entries.slice(1)) { if (SecIndex >= Size) fatal(toString(this) + ": invalid section index in group: " + Twine(SecIndex)); diff --git a/lld/test/ELF/sht-group-empty.test b/lld/test/ELF/sht-group-empty.test new file mode 100644 index 00000000000000..46c77f332e7e70 --- /dev/null +++ b/lld/test/ELF/sht-group-empty.test @@ -0,0 +1,55 @@ +# RUN: yaml2obj %s -o %t.o +# RUN: ld.lld %t.o %t.o -o %t -r +# RUN: llvm-readobj -s %t | FileCheck %s + +# CHECK: Name: .text.foo +# CHECK: Name: .rela.text.foo + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .group + Type: SHT_GROUP + Link: .symtab + Info: foo + Members: + - SectionOrType: GRP_COMDAT + - SectionOrType: .text.foo + - SectionOrType: .text.bar + - SectionOrType: .note + - Name: .note + Type: SHT_NOTE + Flags: [ SHF_GROUP ] + - Name: .text.foo + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_EXECINSTR, SHF_GROUP ] + - Name: .text.bar + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_EXECINSTR, SHF_GROUP ] + - Name: .rela.text.foo + Type: SHT_RELA + Flags: [ SHF_INFO_LINK, SHF_GROUP ] + Link: .symtab + Info: .text.foo + Relocations: + - Offset: 0x0000000000000000 + Symbol: foo + Type: R_X86_64_64 + - Name: .rela.text.bar + Type: SHT_RELA + Flags: [ SHF_INFO_LINK, SHF_GROUP ] + Link: .symtab + Info: .text.bar + Relocations: + - Offset: 0x0000000000000000 + Symbol: bar + Type: R_X86_64_64 +Symbols: + Global: + - Name: foo + - Name: bar + From 4ba28fc323461e9c7016f484be5e241f98d97841 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Fri, 25 Jan 2019 18:18:53 +0000 Subject: [PATCH 045/125] Merging r352079: ------------------------------------------------------------------------ r352079 | sammccall | 2019-01-24 19:55:24 +0100 (Thu, 24 Jan 2019) | 33 lines [FileManager] Revert r347205 to avoid PCH file-descriptor leak. Summary: r347205 fixed a bug in FileManager: first calling getFile(shouldOpen=false) and then getFile(shouldOpen=true) results in the file not being open. Unfortunately, some code was (inadvertently?) relying on this bug: when building with a PCH, the file entries are obtained first by passing shouldOpen=false, and then later shouldOpen=true, without any intention of reading them. After r347205, they do get unneccesarily opened. Aside from extra operations, this means they need to be closed. Normally files are closed when their contents are read. As these files are never read, they stay open until clang exits. On platforms with a low open-files limit (e.g. Mac), this can lead to spurious file-not-found errors when building large projects with PCH enabled, e.g. https://bugs.chromium.org/p/chromium/issues/detail?id=924225 Fixing the callsites to pass shouldOpen=false when the file won't be read is not quite trivial (that info isn't available at the direct callsite), and passing shouldOpen=false is a performance regression (it results in open+fstat pairs being replaced by stat+open). So an ideal fix is going to be a little risky and we need some fix soon (especially for the llvm 8 branch). The problem addressed by r347205 is rare and has only been observed in clangd. It was present in llvm-7, so we can live with it for now. Reviewers: bkramer, thakis Subscribers: ilya-biryukov, ioeric, kadircet, cfe-commits Differential Revision: https://reviews.llvm.org/D57165 ------------------------------------------------------------------------ llvm-svn: 352225 --- clang/include/clang/Basic/FileManager.h | 5 ++-- clang/lib/Basic/FileManager.cpp | 35 +++++++---------------- clang/test/PCH/leakfiles | 29 +++++++++++++++++++ clang/unittests/Basic/FileManagerTest.cpp | 27 ----------------- 4 files changed, 42 insertions(+), 54 deletions(-) create mode 100644 clang/test/PCH/leakfiles diff --git a/clang/include/clang/Basic/FileManager.h b/clang/include/clang/Basic/FileManager.h index e7891baf530479..6a71289f74c1de 100644 --- a/clang/include/clang/Basic/FileManager.h +++ b/clang/include/clang/Basic/FileManager.h @@ -70,15 +70,14 @@ class FileEntry { bool IsNamedPipe; bool InPCH; bool IsValid; // Is this \c FileEntry initialized and valid? - bool DeferredOpen; // Created by getFile(OpenFile=0); may open later. /// The open file, if it is owned by the \p FileEntry. mutable std::unique_ptr File; public: FileEntry() - : UniqueID(0, 0), IsNamedPipe(false), InPCH(false), IsValid(false), - DeferredOpen(false) {} + : UniqueID(0, 0), IsNamedPipe(false), InPCH(false), IsValid(false) + {} FileEntry(const FileEntry &) = delete; FileEntry &operator=(const FileEntry &) = delete; diff --git a/clang/lib/Basic/FileManager.cpp b/clang/lib/Basic/FileManager.cpp index f5a2d4894c13e5..01ec1d69cde53f 100644 --- a/clang/lib/Basic/FileManager.cpp +++ b/clang/lib/Basic/FileManager.cpp @@ -189,21 +189,15 @@ const FileEntry *FileManager::getFile(StringRef Filename, bool openFile, *SeenFileEntries.insert(std::make_pair(Filename, nullptr)).first; // See if there is already an entry in the map. - if (NamedFileEnt.second) { - if (NamedFileEnt.second == NON_EXISTENT_FILE) - return nullptr; - // Entry exists: return it *unless* it wasn't opened and open is requested. - if (!(NamedFileEnt.second->DeferredOpen && openFile)) - return NamedFileEnt.second; - // We previously stat()ed the file, but didn't open it: do that below. - // FIXME: the below does other redundant work too (stats the dir and file). - } else { - // By default, initialize it to invalid. - NamedFileEnt.second = NON_EXISTENT_FILE; - } + if (NamedFileEnt.second) + return NamedFileEnt.second == NON_EXISTENT_FILE ? nullptr + : NamedFileEnt.second; ++NumFileCacheMisses; + // By default, initialize it to invalid. + NamedFileEnt.second = NON_EXISTENT_FILE; + // Get the null-terminated file name as stored as the key of the // SeenFileEntries map. StringRef InterndFileName = NamedFileEnt.first(); @@ -241,7 +235,6 @@ const FileEntry *FileManager::getFile(StringRef Filename, bool openFile, // It exists. See if we have already opened a file with the same inode. // This occurs when one dir is symlinked to another, for example. FileEntry &UFE = UniqueRealFiles[Data.UniqueID]; - UFE.DeferredOpen = !openFile; NamedFileEnt.second = &UFE; @@ -258,15 +251,6 @@ const FileEntry *FileManager::getFile(StringRef Filename, bool openFile, InterndFileName = NamedFileEnt.first().data(); } - // If we opened the file for the first time, record the resulting info. - // Do this even if the cache entry was valid, maybe we didn't previously open. - if (F && !UFE.File) { - if (auto PathName = F->getName()) - fillRealPathName(&UFE, *PathName); - UFE.File = std::move(F); - assert(!UFE.DeferredOpen && "we just opened it!"); - } - if (UFE.isValid()) { // Already have an entry with this inode, return it. // FIXME: this hack ensures that if we look up a file by a virtual path in @@ -297,9 +281,13 @@ const FileEntry *FileManager::getFile(StringRef Filename, bool openFile, UFE.UniqueID = Data.UniqueID; UFE.IsNamedPipe = Data.IsNamedPipe; UFE.InPCH = Data.InPCH; + UFE.File = std::move(F); UFE.IsValid = true; - // Note File and DeferredOpen were initialized above. + if (UFE.File) { + if (auto PathName = UFE.File->getName()) + fillRealPathName(&UFE, *PathName); + } return &UFE; } @@ -371,7 +359,6 @@ FileManager::getVirtualFile(StringRef Filename, off_t Size, UFE->UID = NextFileUID++; UFE->IsValid = true; UFE->File.reset(); - UFE->DeferredOpen = false; return UFE; } diff --git a/clang/test/PCH/leakfiles b/clang/test/PCH/leakfiles new file mode 100644 index 00000000000000..90b279026bc1ab --- /dev/null +++ b/clang/test/PCH/leakfiles @@ -0,0 +1,29 @@ +// Test that compiling using a PCH doesn't leak file descriptors. +// https://bugs.chromium.org/p/chromium/issues/detail?id=924225 +// +// This test requires bash loops and ulimit. +// REQUIRES: shell +// UNSUPPORTED: win32 +// +// Set up source files. lib/lib.h includes lots of lib*.h files in that dir. +// client.c includes lib/lib.h, and also the individual files directly. +// +// RUN: rm -rf %t +// RUN: mkdir %t +// RUN: cd %t +// RUN: mkdir lib +// RUN: for i in {1..300}; do touch lib/lib$i.h; done +// RUN: for i in {1..300}; do echo "#include \"lib$i.h\"" >> lib/lib.h; done +// RUN: echo "#include \"lib/lib.h\"" > client.c +// RUN: for i in {1..300}; do echo "#include \"lib/lib$i.h\"" >> client.c; done +// +// We want to verify that we don't hold all the files open at the same time. +// This is important e.g. on mac, which has a low default FD limit. +// RUN: ulimit -n 100 +// +// Test without PCH. +// RUN: %clang_cc1 -fsyntax-only -Ilib/ client.c +// +// Test with PCH. +// RUN: %clang_cc1 -emit-pch -o pch -Ilib/ client.c +// RUN: %clang_cc1 -include-pch pch -Ilib/ client.c -fsyntax-only diff --git a/clang/unittests/Basic/FileManagerTest.cpp b/clang/unittests/Basic/FileManagerTest.cpp index 746d9ad5e89bb1..8e98f44fa46d8e 100644 --- a/clang/unittests/Basic/FileManagerTest.cpp +++ b/clang/unittests/Basic/FileManagerTest.cpp @@ -222,33 +222,6 @@ TEST_F(FileManagerTest, getFileReturnsNULLForNonexistentFile) { EXPECT_EQ(nullptr, file); } -// When calling getFile(OpenFile=false); getFile(OpenFile=true) the file is -// opened for the second call. -TEST_F(FileManagerTest, getFileDefersOpen) { - llvm::IntrusiveRefCntPtr FS( - new llvm::vfs::InMemoryFileSystem()); - FS->addFile("/tmp/test", 0, llvm::MemoryBuffer::getMemBufferCopy("test")); - FS->addFile("/tmp/testv", 0, llvm::MemoryBuffer::getMemBufferCopy("testv")); - FileManager manager(options, FS); - - const FileEntry *file = manager.getFile("/tmp/test", /*OpenFile=*/false); - ASSERT_TRUE(file != nullptr); - ASSERT_TRUE(file->isValid()); - // "real path name" reveals whether the file was actually opened. - EXPECT_FALSE(file->isOpenForTests()); - - file = manager.getFile("/tmp/test", /*OpenFile=*/true); - ASSERT_TRUE(file != nullptr); - ASSERT_TRUE(file->isValid()); - EXPECT_TRUE(file->isOpenForTests()); - - // However we should never try to open a file previously opened as virtual. - ASSERT_TRUE(manager.getVirtualFile("/tmp/testv", 5, 0)); - ASSERT_TRUE(manager.getFile("/tmp/testv", /*OpenFile=*/false)); - file = manager.getFile("/tmp/testv", /*OpenFile=*/true); - EXPECT_FALSE(file->isOpenForTests()); -} - // The following tests apply to Unix-like system only. #ifndef _WIN32 From 9fbd743487e6d52976f1c50ff3f243b91eb45fdd Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Fri, 25 Jan 2019 19:31:17 +0000 Subject: [PATCH 046/125] Merging r352204: ------------------------------------------------------------------------ r352204 | sammccall | 2019-01-25 16:05:33 +0100 (Fri, 25 Jan 2019) | 7 lines [JSON] Work around excess-precision issue when comparing T_Integer numbers. Reviewers: bkramer Subscribers: kristina, llvm-commits Differential Revision: https://reviews.llvm.org/D57237 ------------------------------------------------------------------------ llvm-svn: 352233 --- llvm/include/llvm/Support/JSON.h | 1 + llvm/lib/Support/JSON.cpp | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/llvm/include/llvm/Support/JSON.h b/llvm/include/llvm/Support/JSON.h index 7a04fd52bc50e4..2b95a174e018cb 100644 --- a/llvm/include/llvm/Support/JSON.h +++ b/llvm/include/llvm/Support/JSON.h @@ -481,6 +481,7 @@ class Value { mutable llvm::AlignedCharArrayUnion Union; + friend bool operator==(const Value &, const Value &); }; bool operator==(const Value &, const Value &); diff --git a/llvm/lib/Support/JSON.cpp b/llvm/lib/Support/JSON.cpp index d468013fb94a53..07a556814915d1 100644 --- a/llvm/lib/Support/JSON.cpp +++ b/llvm/lib/Support/JSON.cpp @@ -182,6 +182,12 @@ bool operator==(const Value &L, const Value &R) { case Value::Boolean: return *L.getAsBoolean() == *R.getAsBoolean(); case Value::Number: + // Workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=323 + // The same integer must convert to the same double, per the standard. + // However we see 64-vs-80-bit precision comparisons with gcc-7 -O3 -m32. + // So we avoid floating point promotion for exact comparisons. + if (L.Type == Value::T_Integer || R.Type == Value::T_Integer) + return L.getAsInteger() == R.getAsInteger(); return *L.getAsNumber() == *R.getAsNumber(); case Value::String: return *L.getAsString() == *R.getAsString(); From 30d3a54b74cf304a82d4f4ab2ef39efdc017ed78 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Fri, 25 Jan 2019 22:55:41 +0000 Subject: [PATCH 047/125] Merging r352234: ------------------------------------------------------------------------ r352234 | dim | 2019-01-25 20:36:47 +0100 (Fri, 25 Jan 2019) | 38 lines Fix XRayTest link on FreeBSD (and likely NetBSD too) Summary: As reported on llvm-testers, during 8.0.0-rc1 testing I got errors while building of `XRayTest`, during `check-all`: ``` [100%] Generating XRayTest-x86_64-Test /home/dim/llvm/8.0.0/rc1/Phase3/Release/llvmCore-8.0.0-rc1.obj/./lib/libLLVMSupport.a(Signals.cpp.o): In function `llvm::sys::PrintStackTrace(llvm::raw_ostream&)': Signals.cpp:(.text._ZN4llvm3sys15PrintStackTraceERNS_11raw_ostreamE+0x24): undefined reference to `backtrace' Signals.cpp:(.text._ZN4llvm3sys15PrintStackTraceERNS_11raw_ostreamE+0x254): undefined reference to `llvm::itaniumDemangle(char const*, char*, unsigned long*, int*)' clang-8: error: linker command failed with exit code 1 (use -v to see invocation) gmake[3]: *** [projects/compiler-rt/lib/xray/tests/unit/CMakeFiles/TXRayTest-x86_64-Test.dir/build.make:73: projects/compiler-rt/lib/xray/tests/unit/XRayTest-x86_64-Test] Error 1 gmake[3]: Target 'projects/compiler-rt/lib/xray/tests/unit/CMakeFiles/TXRayTest-x86_64-Test.dir/build' not remade because of errors. gmake[2]: *** [CMakeFiles/Makefile2:33513: projects/compiler-rt/lib/xray/tests/unit/CMakeFiles/TXRayTest-x86_64-Test.dir/all] Error 2 gmake[2]: Target 'CMakeFiles/check-all.dir/all' not remade because of errors. gmake[1]: *** [CMakeFiles/Makefile2:737: CMakeFiles/check-all.dir/rule] Error 2 gmake[1]: Target 'check-all' not remade because of errors. gmake: *** [Makefile:277: check-all] Error 2 [Release Phase3] check-all failed ``` This is because the `backtrace` function requires `-lexecinfo` on BSD platforms. To fix this, detect the `execinfo` library in `cmake/config-ix.cmake`, and add it to the unit test link flags. Additionally, since the code in `sys::PrintStackTrace` makes use of `itaniumDemangle`, also add `-lLLVMDemangle`. (Note that this is more of a general problem with libLLVMSupport, but I'm looking for a quick fix now so it can be merged to the 8.0 branch.) Reviewers: dberris, hans, mgorny, samsonov Reviewed By: dberris Subscribers: krytarowski, delcypher, erik.pilkington, #sanitizers, emaste, llvm-commits Differential Revision: https://reviews.llvm.org/D57181 ------------------------------------------------------------------------ llvm-svn: 352251 --- compiler-rt/cmake/config-ix.cmake | 1 + compiler-rt/lib/xray/tests/CMakeLists.txt | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/compiler-rt/cmake/config-ix.cmake b/compiler-rt/cmake/config-ix.cmake index db5c4645dc0a58..5b94338771780e 100644 --- a/compiler-rt/cmake/config-ix.cmake +++ b/compiler-rt/cmake/config-ix.cmake @@ -118,6 +118,7 @@ check_library_exists(dl dlopen "" COMPILER_RT_HAS_LIBDL) check_library_exists(rt shm_open "" COMPILER_RT_HAS_LIBRT) check_library_exists(m pow "" COMPILER_RT_HAS_LIBM) check_library_exists(pthread pthread_create "" COMPILER_RT_HAS_LIBPTHREAD) +check_library_exists(execinfo backtrace "" COMPILER_RT_HAS_LIBEXECINFO) # Look for terminfo library, used in unittests that depend on LLVMSupport. if(LLVM_ENABLE_TERMINFO) diff --git a/compiler-rt/lib/xray/tests/CMakeLists.txt b/compiler-rt/lib/xray/tests/CMakeLists.txt index 89a2b3b01ed8ae..deddc5101e76b2 100644 --- a/compiler-rt/lib/xray/tests/CMakeLists.txt +++ b/compiler-rt/lib/xray/tests/CMakeLists.txt @@ -71,13 +71,14 @@ if (NOT APPLE) endforeach() # We also add the actual libraries to link as dependencies. - list(APPEND XRAY_UNITTEST_LINK_FLAGS -lLLVMXRay -lLLVMSupport -lLLVMTestingSupport) + list(APPEND XRAY_UNITTEST_LINK_FLAGS -lLLVMXRay -lLLVMSupport -lLLVMDemangle -lLLVMTestingSupport) endif() append_list_if(COMPILER_RT_HAS_LIBM -lm XRAY_UNITTEST_LINK_FLAGS) append_list_if(COMPILER_RT_HAS_LIBRT -lrt XRAY_UNITTEST_LINK_FLAGS) append_list_if(COMPILER_RT_HAS_LIBDL -ldl XRAY_UNITTEST_LINK_FLAGS) append_list_if(COMPILER_RT_HAS_LIBPTHREAD -pthread XRAY_UNITTEST_LINK_FLAGS) + append_list_if(COMPILER_RT_HAS_LIBEXECINFO -lexecinfo XRAY_UNITTEST_LINK_FLAGS) endif() macro(add_xray_unittest testname) From f149bcab8da432dc0a0d7e580447d6c259d71170 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Mon, 28 Jan 2019 13:24:40 +0000 Subject: [PATCH 048/125] Merging r352257: Redirecting to URL 'https://llvm.org/svn/llvm-project/lld/trunk': ------------------------------------------------------------------------ r352257 | ruiu | 2019-01-26 01:31:49 +0100 (Sat, 26 Jan 2019) | 1 line Remove dead declaration. ------------------------------------------------------------------------ llvm-svn: 352352 --- lld/ELF/InputFiles.h | 1 - 1 file changed, 1 deletion(-) diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h index 5094ddd804a5fe..d7cbbc67a36562 100644 --- a/lld/ELF/InputFiles.h +++ b/lld/ELF/InputFiles.h @@ -175,7 +175,6 @@ template class ObjFile : public ELFFileBase { StringRef getShtGroupSignature(ArrayRef Sections, const Elf_Shdr &Sec); - ArrayRef getShtGroupEntries(const Elf_Shdr &Sec); public: static bool classof(const InputFile *F) { return F->kind() == Base::ObjKind; } From 01ddd7c63a07d9c0eece2ce0f205de38df535b4c Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Mon, 28 Jan 2019 13:32:03 +0000 Subject: [PATCH 049/125] Remove failing test expectations. See discussions on the cfe-commits & clangd-dev thread about r352079. llvm-svn: 352354 --- clang-tools-extra/unittests/clangd/XRefsTests.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/clang-tools-extra/unittests/clangd/XRefsTests.cpp b/clang-tools-extra/unittests/clangd/XRefsTests.cpp index 88394b6cc5126e..8616fbdb179eff 100644 --- a/clang-tools-extra/unittests/clangd/XRefsTests.cpp +++ b/clang-tools-extra/unittests/clangd/XRefsTests.cpp @@ -1082,8 +1082,6 @@ TEST(GoToInclude, All) { // Test include outside of preamble. Locations = runFindDefinitions(Server, FooCpp, SourceAnnotations.point("6")); ASSERT_TRUE(bool(Locations)) << "findDefinitions returned an error"; - EXPECT_THAT(*Locations, - ElementsAre(FileRange(FooH, HeaderAnnotations.range()))); // Test a few positions that do not result in Locations. Locations = runFindDefinitions(Server, FooCpp, SourceAnnotations.point("4")); @@ -1092,13 +1090,9 @@ TEST(GoToInclude, All) { Locations = runFindDefinitions(Server, FooCpp, SourceAnnotations.point("5")); ASSERT_TRUE(bool(Locations)) << "findDefinitions returned an error"; - EXPECT_THAT(*Locations, - ElementsAre(FileRange(FooH, HeaderAnnotations.range()))); Locations = runFindDefinitions(Server, FooCpp, SourceAnnotations.point("7")); ASSERT_TRUE(bool(Locations)) << "findDefinitions returned an error"; - EXPECT_THAT(*Locations, - ElementsAre(FileRange(FooH, HeaderAnnotations.range()))); } TEST(GoToDefinition, WithPreamble) { From 8f1376fec9817523affa196f4faa5d073380736c Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Mon, 28 Jan 2019 13:38:10 +0000 Subject: [PATCH 050/125] Merging r352302: Redirecting to URL 'https://llvm.org/svn/llvm-project/lld/trunk': ------------------------------------------------------------------------ r352302 | nickdesaulniers | 2019-01-27 03:54:23 +0100 (Sun, 27 Jan 2019) | 33 lines lld: elf: discard more specific .gnu.linkonce section Summary: lld discards .gnu.linonce.* sections work around a bug in glibc. https://sourceware.org/bugzilla/show_bug.cgi?id=20543 Unfortunately, the Linux kernel uses a section named .gnu.linkonce.this_module to store infomation about kernel modules. The kernel reads data from this section when loading kernel modules, and errors if it fails to find this section. The current behavior of lld discards this section when kernel modules are linked, so kernel modules linked with lld are unloadable by the linux kernel. The Linux kernel should use a comdat section instead of .gnu.linkonce. The minimum version of binutils supported by the kernel supports comdat sections. The kernel is also not relying on the old linkonce behavior; it seems to have chosen a name that contains a deprecated GNU feature. Changing the section name now in the kernel would require all kernel modules to be recompiled to make use of the new section name. Instead, rather than discarding .gnu.linkonce.*, let's discard the more specific section name to continue working around the glibc issue while supporting linking Linux kernel modules. Link: https://github.com/ClangBuiltLinux/linux/issues/329 Reviewers: pcc, espindola Reviewed By: pcc Subscribers: nathanchance, emaste, arichardson, void, srhines Differential Revision: https://reviews.llvm.org/D57294 ------------------------------------------------------------------------ llvm-svn: 352355 --- lld/ELF/InputFiles.cpp | 3 +- lld/test/ELF/comdat-linkonce.s | 7 ++++- lld/test/ELF/no-discard-this_module.s | 41 +++++++++++++++++++++++++++ 3 files changed, 49 insertions(+), 2 deletions(-) create mode 100644 lld/test/ELF/no-discard-this_module.s diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index 117dd17894eb0b..bc7e61072e6422 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -736,7 +736,8 @@ InputSectionBase *ObjFile::createInputSection(const Elf_Shdr &Sec) { // sections. Drop those sections to avoid duplicate symbol errors. // FIXME: This is glibc PR20543, we should remove this hack once that has been // fixed for a while. - if (Name.startswith(".gnu.linkonce.")) + if (Name == ".gnu.linkonce.t.__x86.get_pc_thunk.bx" || + Name == ".gnu.linkonce.t.__i686.get_pc_thunk.bx") return &InputSection::Discarded; // If we are creating a new .build-id section, strip existing .build-id diff --git a/lld/test/ELF/comdat-linkonce.s b/lld/test/ELF/comdat-linkonce.s index 8721f58bb20ce1..8b1d4b362e86c3 100644 --- a/lld/test/ELF/comdat-linkonce.s +++ b/lld/test/ELF/comdat-linkonce.s @@ -4,7 +4,12 @@ // RUN: ld.lld -shared %t.o %t2.o -o %t // RUN: ld.lld -shared %t2.o %t.o -o %t -.section .gnu.linkonce.t.zed +.section .gnu.linkonce.t.__x86.get_pc_thunk.bx .globl abc abc: nop + +.section .gnu.linkonce.t.__i686.get_pc_thunk.bx +.globl def +def: +nop diff --git a/lld/test/ELF/no-discard-this_module.s b/lld/test/ELF/no-discard-this_module.s new file mode 100644 index 00000000000000..3ce56d165fc1a6 --- /dev/null +++ b/lld/test/ELF/no-discard-this_module.s @@ -0,0 +1,41 @@ +// REQUIRES: x86 +// RUN: llvm-mc -filetype=obj -triple=x86_64-linux-gnu -save-temp-labels %s -o %t +// RUN: ld.lld %t -o %t2 +// RUN: llvm-readobj -s -sd -t %t2 | FileCheck %s + +.global _start +_start: + +// This section and symbol is used by Linux kernel modules. Ensure it's not +// accidentally discarded. +.section .gnu.linkonce.this_module: +__this_module: +.byte 0x00 + +// CHECK: Section { +// CHECK: Index: +// CHECK: Name: .gnu.linkonce.this_module +// CHECK-NEXT: Type: SHT_PROGBITS +// CHECK-NEXT: Flags [ +// CHECK-NEXT: ] +// CHECK-NEXT: Address: +// CHECK-NEXT: Offset: +// CHECK-NEXT: Size: +// CHECK-NEXT: Link: +// CHECK-NEXT: Info: +// CHECK-NEXT: AddressAlignment: +// CHECK-NEXT: EntrySize: +// CHECK-NEXT: SectionData ( +// CHECK-NEXT: 0000: 00 |.| +// CHECK-NEXT: ) +// CHECK-NEXT: } + +// CHECK: Symbol { +// CHECK: Name: __this_module +// CHECK-NEXT: Value: +// CHECK-NEXT: Size: +// CHECK-NEXT: Binding: Local +// CHECK-NEXT: Type: None +// CHECK-NEXT: Other: +// CHECK-NEXT: Section: .gnu.linkonce.this_module: +// CHECK-NEXT: } From f86097d7e57fd146877b6d5bc1be12e133106cc9 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Mon, 28 Jan 2019 13:47:09 +0000 Subject: [PATCH 051/125] Merging r352323: Redirecting to URL 'https://llvm.org/svn/llvm-project/cfe/trunk': ------------------------------------------------------------------------ r352323 | rakete1111 | 2019-01-27 20:19:59 +0100 (Sun, 27 Jan 2019) | 31 lines [SemaCXX] Fix ICE with structure bindings to members of template Summary: Trying to use structure binding with a structure that doesn't implement std::tuple_size, should unpack the data members. When the struct is a template though, clang might hit an assertion (if the type has not been completed before), because CXXRecordDecl::DefinitionData is nullptr. This commit fixes the problem by completing the type while trying to decompose the structured binding. The ICE happens in real world code, for example, when trying to iterate a protobuf generated map with a range-based for loop and structure bindings (because google::protobuf::MapPair is a template and doesn't support std::tuple_size). Reported-by: nicholas.sun@nlsun.com Patch by Daniele Di Proietto Reviewers: #clang, rsmith Reviewed By: #clang, rsmith Subscribers: cpplearner, Rakete1111, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D56974 ------------------------------------------------------------------------ llvm-svn: 352356 --- clang/lib/Sema/SemaDeclCXX.cpp | 4 ++++ clang/test/SemaCXX/cxx1z-decomposition.cpp | 17 +++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index 43b289d8d0de13..e66ee43307036f 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -1301,6 +1301,10 @@ static DeclAccessPair findDecomposableBaseClass(Sema &S, SourceLocation Loc, static bool checkMemberDecomposition(Sema &S, ArrayRef Bindings, ValueDecl *Src, QualType DecompType, const CXXRecordDecl *OrigRD) { + if (S.RequireCompleteType(Src->getLocation(), DecompType, + diag::err_incomplete_type)) + return true; + CXXCastPath BasePath; DeclAccessPair BasePair = findDecomposableBaseClass(S, Src->getLocation(), OrigRD, BasePath); diff --git a/clang/test/SemaCXX/cxx1z-decomposition.cpp b/clang/test/SemaCXX/cxx1z-decomposition.cpp index 3c9b181f1c8f0e..8b5fd6809bb4ce 100644 --- a/clang/test/SemaCXX/cxx1z-decomposition.cpp +++ b/clang/test/SemaCXX/cxx1z-decomposition.cpp @@ -81,4 +81,21 @@ struct PR37352 { void f() { static auto [a] = *this; } // expected-error {{cannot be declared 'static'}} }; +namespace instantiate_template { + +template +struct pair { + T1 a; + T2 b; +}; + +const pair &f1(); + +int f2() { + const auto &[a, b] = f1(); + return a + b; +} + +} // namespace instantiate_template + // FIXME: by-value array copies From 246bce481ff9c666f7c7409415b2bfa6da927e55 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Mon, 28 Jan 2019 13:52:09 +0000 Subject: [PATCH 052/125] Merging r352099: Redirecting to URL 'https://llvm.org/svn/llvm-project/cfe/trunk': ------------------------------------------------------------------------ r352099 | djg | 2019-01-24 21:31:11 +0100 (Thu, 24 Jan 2019) | 7 lines [WebAssembly] Factor commonality between wasm32 and wasm64 in test/Preprocessor/init.c Use the -check-prefixes= feature to merge most of the WEBASSEMBLY32 and WEBASSEMBLY64 test checks into a shared WEBASSEMBLY test check. Differential Revision: https://reviews.llvm.org/D57153 ------------------------------------------------------------------------ llvm-svn: 352359 --- clang/test/Preprocessor/init.c | 975 ++++++++++++--------------------- 1 file changed, 342 insertions(+), 633 deletions(-) diff --git a/clang/test/Preprocessor/init.c b/clang/test/Preprocessor/init.c index 940dddade33786..d852102424ec38 100644 --- a/clang/test/Preprocessor/init.c +++ b/clang/test/Preprocessor/init.c @@ -9110,667 +9110,376 @@ // // RUN: %clang_cc1 -E -dM -ffreestanding -triple=wasm32-unknown-unknown \ // RUN: < /dev/null \ -// RUN: | FileCheck -match-full-lines -check-prefix=WEBASSEMBLY32 %s +// RUN: | FileCheck -match-full-lines -check-prefixes=WEBASSEMBLY,WEBASSEMBLY32 %s +// RUN: %clang_cc1 -E -dM -ffreestanding -triple=wasm64-unknown-unknown \ +// RUN: < /dev/null \ +// RUN: | FileCheck -match-full-lines -check-prefixes=WEBASSEMBLY,WEBASSEMBLY64 %s // // WEBASSEMBLY32:#define _ILP32 1 // WEBASSEMBLY32-NOT:#define _LP64 -// WEBASSEMBLY32-NEXT:#define __ATOMIC_ACQUIRE 2 -// WEBASSEMBLY32-NEXT:#define __ATOMIC_ACQ_REL 4 -// WEBASSEMBLY32-NEXT:#define __ATOMIC_CONSUME 1 -// WEBASSEMBLY32-NEXT:#define __ATOMIC_RELAXED 0 -// WEBASSEMBLY32-NEXT:#define __ATOMIC_RELEASE 3 -// WEBASSEMBLY32-NEXT:#define __ATOMIC_SEQ_CST 5 -// WEBASSEMBLY32-NEXT:#define __BIGGEST_ALIGNMENT__ 16 -// WEBASSEMBLY32-NEXT:#define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__ -// WEBASSEMBLY32-NEXT:#define __CHAR16_TYPE__ unsigned short -// WEBASSEMBLY32-NEXT:#define __CHAR32_TYPE__ unsigned int -// WEBASSEMBLY32-NEXT:#define __CHAR_BIT__ 8 -// WEBASSEMBLY32-NOT:#define __CHAR_UNSIGNED__ -// WEBASSEMBLY32-NEXT:#define __CLANG_ATOMIC_BOOL_LOCK_FREE 2 -// WEBASSEMBLY32-NEXT:#define __CLANG_ATOMIC_CHAR16_T_LOCK_FREE 2 -// WEBASSEMBLY32-NEXT:#define __CLANG_ATOMIC_CHAR32_T_LOCK_FREE 2 -// WEBASSEMBLY32-NEXT:#define __CLANG_ATOMIC_CHAR_LOCK_FREE 2 -// WEBASSEMBLY32-NEXT:#define __CLANG_ATOMIC_INT_LOCK_FREE 2 -// WEBASSEMBLY32-NEXT:#define __CLANG_ATOMIC_LLONG_LOCK_FREE 2 -// WEBASSEMBLY32-NEXT:#define __CLANG_ATOMIC_LONG_LOCK_FREE 2 -// WEBASSEMBLY32-NEXT:#define __CLANG_ATOMIC_POINTER_LOCK_FREE 2 -// WEBASSEMBLY32-NEXT:#define __CLANG_ATOMIC_SHORT_LOCK_FREE 2 -// WEBASSEMBLY32-NEXT:#define __CLANG_ATOMIC_WCHAR_T_LOCK_FREE 2 -// WEBASSEMBLY32-NEXT:#define __CONSTANT_CFSTRINGS__ 1 -// WEBASSEMBLY32-NEXT:#define __DBL_DECIMAL_DIG__ 17 -// WEBASSEMBLY32-NEXT:#define __DBL_DENORM_MIN__ 4.9406564584124654e-324 -// WEBASSEMBLY32-NEXT:#define __DBL_DIG__ 15 -// WEBASSEMBLY32-NEXT:#define __DBL_EPSILON__ 2.2204460492503131e-16 -// WEBASSEMBLY32-NEXT:#define __DBL_HAS_DENORM__ 1 -// WEBASSEMBLY32-NEXT:#define __DBL_HAS_INFINITY__ 1 -// WEBASSEMBLY32-NEXT:#define __DBL_HAS_QUIET_NAN__ 1 -// WEBASSEMBLY32-NEXT:#define __DBL_MANT_DIG__ 53 -// WEBASSEMBLY32-NEXT:#define __DBL_MAX_10_EXP__ 308 -// WEBASSEMBLY32-NEXT:#define __DBL_MAX_EXP__ 1024 -// WEBASSEMBLY32-NEXT:#define __DBL_MAX__ 1.7976931348623157e+308 -// WEBASSEMBLY32-NEXT:#define __DBL_MIN_10_EXP__ (-307) -// WEBASSEMBLY32-NEXT:#define __DBL_MIN_EXP__ (-1021) -// WEBASSEMBLY32-NEXT:#define __DBL_MIN__ 2.2250738585072014e-308 -// WEBASSEMBLY32-NEXT:#define __DECIMAL_DIG__ __LDBL_DECIMAL_DIG__ -// WEBASSEMBLY32-NOT:#define __ELF__ -// WEBASSEMBLY32-NEXT:#define __FINITE_MATH_ONLY__ 0 -// WEBASSEMBLY32:#define __FLT_DECIMAL_DIG__ 9 -// WEBASSEMBLY32-NEXT:#define __FLT_DENORM_MIN__ 1.40129846e-45F -// WEBASSEMBLY32-NEXT:#define __FLT_DIG__ 6 -// WEBASSEMBLY32-NEXT:#define __FLT_EPSILON__ 1.19209290e-7F -// WEBASSEMBLY32-NEXT:#define __FLT_EVAL_METHOD__ 0 -// WEBASSEMBLY32-NEXT:#define __FLT_HAS_DENORM__ 1 -// WEBASSEMBLY32-NEXT:#define __FLT_HAS_INFINITY__ 1 -// WEBASSEMBLY32-NEXT:#define __FLT_HAS_QUIET_NAN__ 1 -// WEBASSEMBLY32-NEXT:#define __FLT_MANT_DIG__ 24 -// WEBASSEMBLY32-NEXT:#define __FLT_MAX_10_EXP__ 38 -// WEBASSEMBLY32-NEXT:#define __FLT_MAX_EXP__ 128 -// WEBASSEMBLY32-NEXT:#define __FLT_MAX__ 3.40282347e+38F -// WEBASSEMBLY32-NEXT:#define __FLT_MIN_10_EXP__ (-37) -// WEBASSEMBLY32-NEXT:#define __FLT_MIN_EXP__ (-125) -// WEBASSEMBLY32-NEXT:#define __FLT_MIN__ 1.17549435e-38F -// WEBASSEMBLY32-NEXT:#define __FLT_RADIX__ 2 -// WEBASSEMBLY32-NEXT:#define __GCC_ATOMIC_BOOL_LOCK_FREE 2 -// WEBASSEMBLY32-NEXT:#define __GCC_ATOMIC_CHAR16_T_LOCK_FREE 2 -// WEBASSEMBLY32-NEXT:#define __GCC_ATOMIC_CHAR32_T_LOCK_FREE 2 -// WEBASSEMBLY32-NEXT:#define __GCC_ATOMIC_CHAR_LOCK_FREE 2 -// WEBASSEMBLY32-NEXT:#define __GCC_ATOMIC_INT_LOCK_FREE 2 -// WEBASSEMBLY32-NEXT:#define __GCC_ATOMIC_LLONG_LOCK_FREE 2 -// WEBASSEMBLY32-NEXT:#define __GCC_ATOMIC_LONG_LOCK_FREE 2 -// WEBASSEMBLY32-NEXT:#define __GCC_ATOMIC_POINTER_LOCK_FREE 2 -// WEBASSEMBLY32-NEXT:#define __GCC_ATOMIC_SHORT_LOCK_FREE 2 -// WEBASSEMBLY32-NEXT:#define __GCC_ATOMIC_TEST_AND_SET_TRUEVAL 1 -// WEBASSEMBLY32-NEXT:#define __GCC_ATOMIC_WCHAR_T_LOCK_FREE 2 -// WEBASSEMBLY32-NEXT:#define __GNUC_MINOR__ {{.*}} -// WEBASSEMBLY32-NEXT:#define __GNUC_PATCHLEVEL__ {{.*}} -// WEBASSEMBLY32-NEXT:#define __GNUC_STDC_INLINE__ 1 -// WEBASSEMBLY32-NEXT:#define __GNUC__ {{.*}} -// WEBASSEMBLY32-NEXT:#define __GXX_ABI_VERSION 1002 +// WEBASSEMBLY64-NOT:#define _ILP32 +// WEBASSEMBLY64:#define _LP64 1 +// WEBASSEMBLY-NEXT:#define __ATOMIC_ACQUIRE 2 +// WEBASSEMBLY-NEXT:#define __ATOMIC_ACQ_REL 4 +// WEBASSEMBLY-NEXT:#define __ATOMIC_CONSUME 1 +// WEBASSEMBLY-NEXT:#define __ATOMIC_RELAXED 0 +// WEBASSEMBLY-NEXT:#define __ATOMIC_RELEASE 3 +// WEBASSEMBLY-NEXT:#define __ATOMIC_SEQ_CST 5 +// WEBASSEMBLY-NEXT:#define __BIGGEST_ALIGNMENT__ 16 +// WEBASSEMBLY-NEXT:#define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__ +// WEBASSEMBLY-NEXT:#define __CHAR16_TYPE__ unsigned short +// WEBASSEMBLY-NEXT:#define __CHAR32_TYPE__ unsigned int +// WEBASSEMBLY-NEXT:#define __CHAR_BIT__ 8 +// WEBASSEMBLY-NOT:#define __CHAR_UNSIGNED__ +// WEBASSEMBLY-NEXT:#define __CLANG_ATOMIC_BOOL_LOCK_FREE 2 +// WEBASSEMBLY-NEXT:#define __CLANG_ATOMIC_CHAR16_T_LOCK_FREE 2 +// WEBASSEMBLY-NEXT:#define __CLANG_ATOMIC_CHAR32_T_LOCK_FREE 2 +// WEBASSEMBLY-NEXT:#define __CLANG_ATOMIC_CHAR_LOCK_FREE 2 +// WEBASSEMBLY-NEXT:#define __CLANG_ATOMIC_INT_LOCK_FREE 2 +// WEBASSEMBLY-NEXT:#define __CLANG_ATOMIC_LLONG_LOCK_FREE 2 +// WEBASSEMBLY-NEXT:#define __CLANG_ATOMIC_LONG_LOCK_FREE 2 +// WEBASSEMBLY-NEXT:#define __CLANG_ATOMIC_POINTER_LOCK_FREE 2 +// WEBASSEMBLY-NEXT:#define __CLANG_ATOMIC_SHORT_LOCK_FREE 2 +// WEBASSEMBLY-NEXT:#define __CLANG_ATOMIC_WCHAR_T_LOCK_FREE 2 +// WEBASSEMBLY-NEXT:#define __CONSTANT_CFSTRINGS__ 1 +// WEBASSEMBLY-NEXT:#define __DBL_DECIMAL_DIG__ 17 +// WEBASSEMBLY-NEXT:#define __DBL_DENORM_MIN__ 4.9406564584124654e-324 +// WEBASSEMBLY-NEXT:#define __DBL_DIG__ 15 +// WEBASSEMBLY-NEXT:#define __DBL_EPSILON__ 2.2204460492503131e-16 +// WEBASSEMBLY-NEXT:#define __DBL_HAS_DENORM__ 1 +// WEBASSEMBLY-NEXT:#define __DBL_HAS_INFINITY__ 1 +// WEBASSEMBLY-NEXT:#define __DBL_HAS_QUIET_NAN__ 1 +// WEBASSEMBLY-NEXT:#define __DBL_MANT_DIG__ 53 +// WEBASSEMBLY-NEXT:#define __DBL_MAX_10_EXP__ 308 +// WEBASSEMBLY-NEXT:#define __DBL_MAX_EXP__ 1024 +// WEBASSEMBLY-NEXT:#define __DBL_MAX__ 1.7976931348623157e+308 +// WEBASSEMBLY-NEXT:#define __DBL_MIN_10_EXP__ (-307) +// WEBASSEMBLY-NEXT:#define __DBL_MIN_EXP__ (-1021) +// WEBASSEMBLY-NEXT:#define __DBL_MIN__ 2.2250738585072014e-308 +// WEBASSEMBLY-NEXT:#define __DECIMAL_DIG__ __LDBL_DECIMAL_DIG__ +// WEBASSEMBLY-NOT:#define __ELF__ +// WEBASSEMBLY-NEXT:#define __FINITE_MATH_ONLY__ 0 +// WEBASSEMBLY-NEXT:#define __FLT16_DECIMAL_DIG__ 5 +// WEBASSEMBLY-NEXT:#define __FLT16_DENORM_MIN__ 5.9604644775390625e-8F16 +// WEBASSEMBLY-NEXT:#define __FLT16_DIG__ 3 +// WEBASSEMBLY-NEXT:#define __FLT16_EPSILON__ 9.765625e-4F16 +// WEBASSEMBLY-NEXT:#define __FLT16_HAS_DENORM__ 1 +// WEBASSEMBLY-NEXT:#define __FLT16_HAS_INFINITY__ 1 +// WEBASSEMBLY-NEXT:#define __FLT16_HAS_QUIET_NAN__ 1 +// WEBASSEMBLY-NEXT:#define __FLT16_MANT_DIG__ 11 +// WEBASSEMBLY-NEXT:#define __FLT16_MAX_10_EXP__ 4 +// WEBASSEMBLY-NEXT:#define __FLT16_MAX_EXP__ 15 +// WEBASSEMBLY-NEXT:#define __FLT16_MAX__ 6.5504e+4F16 +// WEBASSEMBLY-NEXT:#define __FLT16_MIN_10_EXP__ (-13) +// WEBASSEMBLY-NEXT:#define __FLT16_MIN_EXP__ (-14) +// WEBASSEMBLY-NEXT:#define __FLT16_MIN__ 6.103515625e-5F16 +// WEBASSEMBLY-NEXT:#define __FLT_DECIMAL_DIG__ 9 +// WEBASSEMBLY-NEXT:#define __FLT_DENORM_MIN__ 1.40129846e-45F +// WEBASSEMBLY-NEXT:#define __FLT_DIG__ 6 +// WEBASSEMBLY-NEXT:#define __FLT_EPSILON__ 1.19209290e-7F +// WEBASSEMBLY-NEXT:#define __FLT_EVAL_METHOD__ 0 +// WEBASSEMBLY-NEXT:#define __FLT_HAS_DENORM__ 1 +// WEBASSEMBLY-NEXT:#define __FLT_HAS_INFINITY__ 1 +// WEBASSEMBLY-NEXT:#define __FLT_HAS_QUIET_NAN__ 1 +// WEBASSEMBLY-NEXT:#define __FLT_MANT_DIG__ 24 +// WEBASSEMBLY-NEXT:#define __FLT_MAX_10_EXP__ 38 +// WEBASSEMBLY-NEXT:#define __FLT_MAX_EXP__ 128 +// WEBASSEMBLY-NEXT:#define __FLT_MAX__ 3.40282347e+38F +// WEBASSEMBLY-NEXT:#define __FLT_MIN_10_EXP__ (-37) +// WEBASSEMBLY-NEXT:#define __FLT_MIN_EXP__ (-125) +// WEBASSEMBLY-NEXT:#define __FLT_MIN__ 1.17549435e-38F +// WEBASSEMBLY-NEXT:#define __FLT_RADIX__ 2 +// WEBASSEMBLY-NEXT:#define __GCC_ATOMIC_BOOL_LOCK_FREE 2 +// WEBASSEMBLY-NEXT:#define __GCC_ATOMIC_CHAR16_T_LOCK_FREE 2 +// WEBASSEMBLY-NEXT:#define __GCC_ATOMIC_CHAR32_T_LOCK_FREE 2 +// WEBASSEMBLY-NEXT:#define __GCC_ATOMIC_CHAR_LOCK_FREE 2 +// WEBASSEMBLY-NEXT:#define __GCC_ATOMIC_INT_LOCK_FREE 2 +// WEBASSEMBLY-NEXT:#define __GCC_ATOMIC_LLONG_LOCK_FREE 2 +// WEBASSEMBLY-NEXT:#define __GCC_ATOMIC_LONG_LOCK_FREE 2 +// WEBASSEMBLY-NEXT:#define __GCC_ATOMIC_POINTER_LOCK_FREE 2 +// WEBASSEMBLY-NEXT:#define __GCC_ATOMIC_SHORT_LOCK_FREE 2 +// WEBASSEMBLY-NEXT:#define __GCC_ATOMIC_TEST_AND_SET_TRUEVAL 1 +// WEBASSEMBLY-NEXT:#define __GCC_ATOMIC_WCHAR_T_LOCK_FREE 2 +// WEBASSEMBLY-NEXT:#define __GNUC_MINOR__ {{.*}} +// WEBASSEMBLY-NEXT:#define __GNUC_PATCHLEVEL__ {{.*}} +// WEBASSEMBLY-NEXT:#define __GNUC_STDC_INLINE__ 1 +// WEBASSEMBLY-NEXT:#define __GNUC__ {{.*}} +// WEBASSEMBLY-NEXT:#define __GXX_ABI_VERSION 1002 // WEBASSEMBLY32-NEXT:#define __ILP32__ 1 -// WEBASSEMBLY32-NEXT:#define __INT16_C_SUFFIX__ -// WEBASSEMBLY32-NEXT:#define __INT16_FMTd__ "hd" -// WEBASSEMBLY32-NEXT:#define __INT16_FMTi__ "hi" -// WEBASSEMBLY32-NEXT:#define __INT16_MAX__ 32767 -// WEBASSEMBLY32-NEXT:#define __INT16_TYPE__ short -// WEBASSEMBLY32-NEXT:#define __INT32_C_SUFFIX__ -// WEBASSEMBLY32-NEXT:#define __INT32_FMTd__ "d" -// WEBASSEMBLY32-NEXT:#define __INT32_FMTi__ "i" -// WEBASSEMBLY32-NEXT:#define __INT32_MAX__ 2147483647 -// WEBASSEMBLY32-NEXT:#define __INT32_TYPE__ int -// WEBASSEMBLY32-NEXT:#define __INT64_C_SUFFIX__ LL -// WEBASSEMBLY32-NEXT:#define __INT64_FMTd__ "lld" -// WEBASSEMBLY32-NEXT:#define __INT64_FMTi__ "lli" -// WEBASSEMBLY32-NEXT:#define __INT64_MAX__ 9223372036854775807LL -// WEBASSEMBLY32-NEXT:#define __INT64_TYPE__ long long int -// WEBASSEMBLY32-NEXT:#define __INT8_C_SUFFIX__ -// WEBASSEMBLY32-NEXT:#define __INT8_FMTd__ "hhd" -// WEBASSEMBLY32-NEXT:#define __INT8_FMTi__ "hhi" -// WEBASSEMBLY32-NEXT:#define __INT8_MAX__ 127 -// WEBASSEMBLY32-NEXT:#define __INT8_TYPE__ signed char -// WEBASSEMBLY32-NEXT:#define __INTMAX_C_SUFFIX__ LL -// WEBASSEMBLY32-NEXT:#define __INTMAX_FMTd__ "lld" -// WEBASSEMBLY32-NEXT:#define __INTMAX_FMTi__ "lli" -// WEBASSEMBLY32-NEXT:#define __INTMAX_MAX__ 9223372036854775807LL -// WEBASSEMBLY32-NEXT:#define __INTMAX_TYPE__ long long int -// WEBASSEMBLY32-NEXT:#define __INTMAX_WIDTH__ 64 -// WEBASSEMBLY32-NEXT:#define __INTPTR_FMTd__ "ld" -// WEBASSEMBLY32-NEXT:#define __INTPTR_FMTi__ "li" +// WEBASSEMBLY64-NOT:#define __ILP32__ +// WEBASSEMBLY-NEXT:#define __INT16_C_SUFFIX__ +// WEBASSEMBLY-NEXT:#define __INT16_FMTd__ "hd" +// WEBASSEMBLY-NEXT:#define __INT16_FMTi__ "hi" +// WEBASSEMBLY-NEXT:#define __INT16_MAX__ 32767 +// WEBASSEMBLY-NEXT:#define __INT16_TYPE__ short +// WEBASSEMBLY-NEXT:#define __INT32_C_SUFFIX__ +// WEBASSEMBLY-NEXT:#define __INT32_FMTd__ "d" +// WEBASSEMBLY-NEXT:#define __INT32_FMTi__ "i" +// WEBASSEMBLY-NEXT:#define __INT32_MAX__ 2147483647 +// WEBASSEMBLY-NEXT:#define __INT32_TYPE__ int +// WEBASSEMBLY-NEXT:#define __INT64_C_SUFFIX__ LL +// WEBASSEMBLY-NEXT:#define __INT64_FMTd__ "lld" +// WEBASSEMBLY-NEXT:#define __INT64_FMTi__ "lli" +// WEBASSEMBLY-NEXT:#define __INT64_MAX__ 9223372036854775807LL +// WEBASSEMBLY-NEXT:#define __INT64_TYPE__ long long int +// WEBASSEMBLY-NEXT:#define __INT8_C_SUFFIX__ +// WEBASSEMBLY-NEXT:#define __INT8_FMTd__ "hhd" +// WEBASSEMBLY-NEXT:#define __INT8_FMTi__ "hhi" +// WEBASSEMBLY-NEXT:#define __INT8_MAX__ 127 +// WEBASSEMBLY-NEXT:#define __INT8_TYPE__ signed char +// WEBASSEMBLY-NEXT:#define __INTMAX_C_SUFFIX__ LL +// WEBASSEMBLY-NEXT:#define __INTMAX_FMTd__ "lld" +// WEBASSEMBLY-NEXT:#define __INTMAX_FMTi__ "lli" +// WEBASSEMBLY-NEXT:#define __INTMAX_MAX__ 9223372036854775807LL +// WEBASSEMBLY-NEXT:#define __INTMAX_TYPE__ long long int +// WEBASSEMBLY-NEXT:#define __INTMAX_WIDTH__ 64 +// WEBASSEMBLY-NEXT:#define __INTPTR_FMTd__ "ld" +// WEBASSEMBLY-NEXT:#define __INTPTR_FMTi__ "li" // WEBASSEMBLY32-NEXT:#define __INTPTR_MAX__ 2147483647L -// WEBASSEMBLY32-NEXT:#define __INTPTR_TYPE__ long int +// WEBASSEMBLY64-NEXT:#define __INTPTR_MAX__ 9223372036854775807L +// WEBASSEMBLY-NEXT:#define __INTPTR_TYPE__ long int // WEBASSEMBLY32-NEXT:#define __INTPTR_WIDTH__ 32 -// WEBASSEMBLY32-NEXT:#define __INT_FAST16_FMTd__ "hd" -// WEBASSEMBLY32-NEXT:#define __INT_FAST16_FMTi__ "hi" -// WEBASSEMBLY32-NEXT:#define __INT_FAST16_MAX__ 32767 -// WEBASSEMBLY32-NEXT:#define __INT_FAST16_TYPE__ short -// WEBASSEMBLY32-NEXT:#define __INT_FAST32_FMTd__ "d" -// WEBASSEMBLY32-NEXT:#define __INT_FAST32_FMTi__ "i" -// WEBASSEMBLY32-NEXT:#define __INT_FAST32_MAX__ 2147483647 -// WEBASSEMBLY32-NEXT:#define __INT_FAST32_TYPE__ int -// WEBASSEMBLY32-NEXT:#define __INT_FAST64_FMTd__ "lld" -// WEBASSEMBLY32-NEXT:#define __INT_FAST64_FMTi__ "lli" -// WEBASSEMBLY32-NEXT:#define __INT_FAST64_MAX__ 9223372036854775807LL -// WEBASSEMBLY32-NEXT:#define __INT_FAST64_TYPE__ long long int -// WEBASSEMBLY32-NEXT:#define __INT_FAST8_FMTd__ "hhd" -// WEBASSEMBLY32-NEXT:#define __INT_FAST8_FMTi__ "hhi" -// WEBASSEMBLY32-NEXT:#define __INT_FAST8_MAX__ 127 -// WEBASSEMBLY32-NEXT:#define __INT_FAST8_TYPE__ signed char -// WEBASSEMBLY32-NEXT:#define __INT_LEAST16_FMTd__ "hd" -// WEBASSEMBLY32-NEXT:#define __INT_LEAST16_FMTi__ "hi" -// WEBASSEMBLY32-NEXT:#define __INT_LEAST16_MAX__ 32767 -// WEBASSEMBLY32-NEXT:#define __INT_LEAST16_TYPE__ short -// WEBASSEMBLY32-NEXT:#define __INT_LEAST32_FMTd__ "d" -// WEBASSEMBLY32-NEXT:#define __INT_LEAST32_FMTi__ "i" -// WEBASSEMBLY32-NEXT:#define __INT_LEAST32_MAX__ 2147483647 -// WEBASSEMBLY32-NEXT:#define __INT_LEAST32_TYPE__ int -// WEBASSEMBLY32-NEXT:#define __INT_LEAST64_FMTd__ "lld" -// WEBASSEMBLY32-NEXT:#define __INT_LEAST64_FMTi__ "lli" -// WEBASSEMBLY32-NEXT:#define __INT_LEAST64_MAX__ 9223372036854775807LL -// WEBASSEMBLY32-NEXT:#define __INT_LEAST64_TYPE__ long long int -// WEBASSEMBLY32-NEXT:#define __INT_LEAST8_FMTd__ "hhd" -// WEBASSEMBLY32-NEXT:#define __INT_LEAST8_FMTi__ "hhi" -// WEBASSEMBLY32-NEXT:#define __INT_LEAST8_MAX__ 127 -// WEBASSEMBLY32-NEXT:#define __INT_LEAST8_TYPE__ signed char -// WEBASSEMBLY32-NEXT:#define __INT_MAX__ 2147483647 -// WEBASSEMBLY32-NEXT:#define __LDBL_DECIMAL_DIG__ 36 -// WEBASSEMBLY32-NEXT:#define __LDBL_DENORM_MIN__ 6.47517511943802511092443895822764655e-4966L -// WEBASSEMBLY32-NEXT:#define __LDBL_DIG__ 33 -// WEBASSEMBLY32-NEXT:#define __LDBL_EPSILON__ 1.92592994438723585305597794258492732e-34L -// WEBASSEMBLY32-NEXT:#define __LDBL_HAS_DENORM__ 1 -// WEBASSEMBLY32-NEXT:#define __LDBL_HAS_INFINITY__ 1 -// WEBASSEMBLY32-NEXT:#define __LDBL_HAS_QUIET_NAN__ 1 -// WEBASSEMBLY32-NEXT:#define __LDBL_MANT_DIG__ 113 -// WEBASSEMBLY32-NEXT:#define __LDBL_MAX_10_EXP__ 4932 -// WEBASSEMBLY32-NEXT:#define __LDBL_MAX_EXP__ 16384 -// WEBASSEMBLY32-NEXT:#define __LDBL_MAX__ 1.18973149535723176508575932662800702e+4932L -// WEBASSEMBLY32-NEXT:#define __LDBL_MIN_10_EXP__ (-4931) -// WEBASSEMBLY32-NEXT:#define __LDBL_MIN_EXP__ (-16381) -// WEBASSEMBLY32-NEXT:#define __LDBL_MIN__ 3.36210314311209350626267781732175260e-4932L -// WEBASSEMBLY32-NEXT:#define __LITTLE_ENDIAN__ 1 -// WEBASSEMBLY32-NEXT:#define __LONG_LONG_MAX__ 9223372036854775807LL +// WEBASSEMBLY64-NEXT:#define __INTPTR_WIDTH__ 64 +// WEBASSEMBLY-NEXT:#define __INT_FAST16_FMTd__ "hd" +// WEBASSEMBLY-NEXT:#define __INT_FAST16_FMTi__ "hi" +// WEBASSEMBLY-NEXT:#define __INT_FAST16_MAX__ 32767 +// WEBASSEMBLY-NEXT:#define __INT_FAST16_TYPE__ short +// WEBASSEMBLY-NEXT:#define __INT_FAST32_FMTd__ "d" +// WEBASSEMBLY-NEXT:#define __INT_FAST32_FMTi__ "i" +// WEBASSEMBLY-NEXT:#define __INT_FAST32_MAX__ 2147483647 +// WEBASSEMBLY-NEXT:#define __INT_FAST32_TYPE__ int +// WEBASSEMBLY-NEXT:#define __INT_FAST64_FMTd__ "lld" +// WEBASSEMBLY-NEXT:#define __INT_FAST64_FMTi__ "lli" +// WEBASSEMBLY-NEXT:#define __INT_FAST64_MAX__ 9223372036854775807LL +// WEBASSEMBLY-NEXT:#define __INT_FAST64_TYPE__ long long int +// WEBASSEMBLY-NEXT:#define __INT_FAST8_FMTd__ "hhd" +// WEBASSEMBLY-NEXT:#define __INT_FAST8_FMTi__ "hhi" +// WEBASSEMBLY-NEXT:#define __INT_FAST8_MAX__ 127 +// WEBASSEMBLY-NEXT:#define __INT_FAST8_TYPE__ signed char +// WEBASSEMBLY-NEXT:#define __INT_LEAST16_FMTd__ "hd" +// WEBASSEMBLY-NEXT:#define __INT_LEAST16_FMTi__ "hi" +// WEBASSEMBLY-NEXT:#define __INT_LEAST16_MAX__ 32767 +// WEBASSEMBLY-NEXT:#define __INT_LEAST16_TYPE__ short +// WEBASSEMBLY-NEXT:#define __INT_LEAST32_FMTd__ "d" +// WEBASSEMBLY-NEXT:#define __INT_LEAST32_FMTi__ "i" +// WEBASSEMBLY-NEXT:#define __INT_LEAST32_MAX__ 2147483647 +// WEBASSEMBLY-NEXT:#define __INT_LEAST32_TYPE__ int +// WEBASSEMBLY-NEXT:#define __INT_LEAST64_FMTd__ "lld" +// WEBASSEMBLY-NEXT:#define __INT_LEAST64_FMTi__ "lli" +// WEBASSEMBLY-NEXT:#define __INT_LEAST64_MAX__ 9223372036854775807LL +// WEBASSEMBLY-NEXT:#define __INT_LEAST64_TYPE__ long long int +// WEBASSEMBLY-NEXT:#define __INT_LEAST8_FMTd__ "hhd" +// WEBASSEMBLY-NEXT:#define __INT_LEAST8_FMTi__ "hhi" +// WEBASSEMBLY-NEXT:#define __INT_LEAST8_MAX__ 127 +// WEBASSEMBLY-NEXT:#define __INT_LEAST8_TYPE__ signed char +// WEBASSEMBLY-NEXT:#define __INT_MAX__ 2147483647 +// WEBASSEMBLY-NEXT:#define __LDBL_DECIMAL_DIG__ 36 +// WEBASSEMBLY-NEXT:#define __LDBL_DENORM_MIN__ 6.47517511943802511092443895822764655e-4966L +// WEBASSEMBLY-NEXT:#define __LDBL_DIG__ 33 +// WEBASSEMBLY-NEXT:#define __LDBL_EPSILON__ 1.92592994438723585305597794258492732e-34L +// WEBASSEMBLY-NEXT:#define __LDBL_HAS_DENORM__ 1 +// WEBASSEMBLY-NEXT:#define __LDBL_HAS_INFINITY__ 1 +// WEBASSEMBLY-NEXT:#define __LDBL_HAS_QUIET_NAN__ 1 +// WEBASSEMBLY-NEXT:#define __LDBL_MANT_DIG__ 113 +// WEBASSEMBLY-NEXT:#define __LDBL_MAX_10_EXP__ 4932 +// WEBASSEMBLY-NEXT:#define __LDBL_MAX_EXP__ 16384 +// WEBASSEMBLY-NEXT:#define __LDBL_MAX__ 1.18973149535723176508575932662800702e+4932L +// WEBASSEMBLY-NEXT:#define __LDBL_MIN_10_EXP__ (-4931) +// WEBASSEMBLY-NEXT:#define __LDBL_MIN_EXP__ (-16381) +// WEBASSEMBLY-NEXT:#define __LDBL_MIN__ 3.36210314311209350626267781732175260e-4932L +// WEBASSEMBLY-NEXT:#define __LITTLE_ENDIAN__ 1 +// WEBASSEMBLY-NEXT:#define __LONG_LONG_MAX__ 9223372036854775807LL // WEBASSEMBLY32-NEXT:#define __LONG_MAX__ 2147483647L // WEBASSEMBLY32-NOT:#define __LP64__ -// WEBASSEMBLY32-NEXT:#define __NO_INLINE__ 1 -// WEBASSEMBLY32-NEXT:#define __OBJC_BOOL_IS_BOOL 0 -// WEBASSEMBLY32-NEXT:#define __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES 3 -// WEBASSEMBLY32-NEXT:#define __OPENCL_MEMORY_SCOPE_DEVICE 2 -// WEBASSEMBLY32-NEXT:#define __OPENCL_MEMORY_SCOPE_SUB_GROUP 4 -// WEBASSEMBLY32-NEXT:#define __OPENCL_MEMORY_SCOPE_WORK_GROUP 1 -// WEBASSEMBLY32-NEXT:#define __OPENCL_MEMORY_SCOPE_WORK_ITEM 0 -// WEBASSEMBLY32-NEXT:#define __ORDER_BIG_ENDIAN__ 4321 -// WEBASSEMBLY32-NEXT:#define __ORDER_LITTLE_ENDIAN__ 1234 -// WEBASSEMBLY32-NEXT:#define __ORDER_PDP_ENDIAN__ 3412 +// WEBASSEMBLY64-NEXT:#define __LONG_MAX__ 9223372036854775807L +// WEBASSEMBLY64-NEXT:#define __LP64__ 1 +// WEBASSEMBLY-NEXT:#define __NO_INLINE__ 1 +// WEBASSEMBLY-NEXT:#define __OBJC_BOOL_IS_BOOL 0 +// WEBASSEMBLY-NEXT:#define __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES 3 +// WEBASSEMBLY-NEXT:#define __OPENCL_MEMORY_SCOPE_DEVICE 2 +// WEBASSEMBLY-NEXT:#define __OPENCL_MEMORY_SCOPE_SUB_GROUP 4 +// WEBASSEMBLY-NEXT:#define __OPENCL_MEMORY_SCOPE_WORK_GROUP 1 +// WEBASSEMBLY-NEXT:#define __OPENCL_MEMORY_SCOPE_WORK_ITEM 0 +// WEBASSEMBLY-NEXT:#define __ORDER_BIG_ENDIAN__ 4321 +// WEBASSEMBLY-NEXT:#define __ORDER_LITTLE_ENDIAN__ 1234 +// WEBASSEMBLY-NEXT:#define __ORDER_PDP_ENDIAN__ 3412 // WEBASSEMBLY32-NEXT:#define __POINTER_WIDTH__ 32 -// WEBASSEMBLY32-NEXT:#define __PRAGMA_REDEFINE_EXTNAME 1 -// WEBASSEMBLY32-NEXT:#define __PTRDIFF_FMTd__ "ld" -// WEBASSEMBLY32-NEXT:#define __PTRDIFF_FMTi__ "li" +// WEBASSEMBLY64-NEXT:#define __POINTER_WIDTH__ 64 +// WEBASSEMBLY-NEXT:#define __PRAGMA_REDEFINE_EXTNAME 1 +// WEBASSEMBLY-NEXT:#define __PTRDIFF_FMTd__ "ld" +// WEBASSEMBLY-NEXT:#define __PTRDIFF_FMTi__ "li" // WEBASSEMBLY32-NEXT:#define __PTRDIFF_MAX__ 2147483647L -// WEBASSEMBLY32-NEXT:#define __PTRDIFF_TYPE__ long int +// WEBASSEMBLY64-NEXT:#define __PTRDIFF_MAX__ 9223372036854775807L +// WEBASSEMBLY-NEXT:#define __PTRDIFF_TYPE__ long int // WEBASSEMBLY32-NEXT:#define __PTRDIFF_WIDTH__ 32 -// WEBASSEMBLY32-NOT:#define __REGISTER_PREFIX__ -// WEBASSEMBLY32-NEXT:#define __SCHAR_MAX__ 127 -// WEBASSEMBLY32-NEXT:#define __SHRT_MAX__ 32767 +// WEBASSEMBLY64-NEXT:#define __PTRDIFF_WIDTH__ 64 +// WEBASSEMBLY-NOT:#define __REGISTER_PREFIX__ +// WEBASSEMBLY-NEXT:#define __SCHAR_MAX__ 127 +// WEBASSEMBLY-NEXT:#define __SHRT_MAX__ 32767 // WEBASSEMBLY32-NEXT:#define __SIG_ATOMIC_MAX__ 2147483647L // WEBASSEMBLY32-NEXT:#define __SIG_ATOMIC_WIDTH__ 32 -// WEBASSEMBLY32-NEXT:#define __SIZEOF_DOUBLE__ 8 -// WEBASSEMBLY32-NEXT:#define __SIZEOF_FLOAT__ 4 -// WEBASSEMBLY32-NEXT:#define __SIZEOF_INT128__ 16 -// WEBASSEMBLY32-NEXT:#define __SIZEOF_INT__ 4 -// WEBASSEMBLY32-NEXT:#define __SIZEOF_LONG_DOUBLE__ 16 -// WEBASSEMBLY32-NEXT:#define __SIZEOF_LONG_LONG__ 8 +// WEBASSEMBLY64-NEXT:#define __SIG_ATOMIC_MAX__ 9223372036854775807L +// WEBASSEMBLY64-NEXT:#define __SIG_ATOMIC_WIDTH__ 64 +// WEBASSEMBLY-NEXT:#define __SIZEOF_DOUBLE__ 8 +// WEBASSEMBLY-NEXT:#define __SIZEOF_FLOAT__ 4 +// WEBASSEMBLY-NEXT:#define __SIZEOF_INT128__ 16 +// WEBASSEMBLY-NEXT:#define __SIZEOF_INT__ 4 +// WEBASSEMBLY-NEXT:#define __SIZEOF_LONG_DOUBLE__ 16 +// WEBASSEMBLY-NEXT:#define __SIZEOF_LONG_LONG__ 8 // WEBASSEMBLY32-NEXT:#define __SIZEOF_LONG__ 4 // WEBASSEMBLY32-NEXT:#define __SIZEOF_POINTER__ 4 // WEBASSEMBLY32-NEXT:#define __SIZEOF_PTRDIFF_T__ 4 -// WEBASSEMBLY32-NEXT:#define __SIZEOF_SHORT__ 2 -// WEBASSEMBLY32-NEXT:#define __SIZEOF_SIZE_T__ 4 -// WEBASSEMBLY32-NEXT:#define __SIZEOF_WCHAR_T__ 4 -// WEBASSEMBLY32-NEXT:#define __SIZEOF_WINT_T__ 4 -// WEBASSEMBLY32-NEXT:#define __SIZE_FMTX__ "lX" -// WEBASSEMBLY32-NEXT:#define __SIZE_FMTo__ "lo" -// WEBASSEMBLY32-NEXT:#define __SIZE_FMTu__ "lu" -// WEBASSEMBLY32-NEXT:#define __SIZE_FMTx__ "lx" -// WEBASSEMBLY32-NEXT:#define __SIZE_MAX__ 4294967295UL -// WEBASSEMBLY32-NEXT:#define __SIZE_TYPE__ long unsigned int -// WEBASSEMBLY32-NEXT:#define __SIZE_WIDTH__ 32 -// WEBASSEMBLY32-NEXT:#define __STDC_HOSTED__ 0 -// WEBASSEMBLY32-NOT:#define __STDC_MB_MIGHT_NEQ_WC__ -// WEBASSEMBLY32-NOT:#define __STDC_NO_ATOMICS__ -// WEBASSEMBLY32-NOT:#define __STDC_NO_COMPLEX__ -// WEBASSEMBLY32-NOT:#define __STDC_NO_VLA__ -// WEBASSEMBLY32-NOT:#define __STDC_NO_THREADS__ -// WEBASSEMBLY32-NEXT:#define __STDC_UTF_16__ 1 -// WEBASSEMBLY32-NEXT:#define __STDC_UTF_32__ 1 -// WEBASSEMBLY32-NEXT:#define __STDC_VERSION__ 201112L -// WEBASSEMBLY32-NEXT:#define __STDC__ 1 -// WEBASSEMBLY32-NEXT:#define __UINT16_C_SUFFIX__ -// WEBASSEMBLY32-NEXT:#define __UINT16_FMTX__ "hX" -// WEBASSEMBLY32-NEXT:#define __UINT16_FMTo__ "ho" -// WEBASSEMBLY32-NEXT:#define __UINT16_FMTu__ "hu" -// WEBASSEMBLY32-NEXT:#define __UINT16_FMTx__ "hx" -// WEBASSEMBLY32-NEXT:#define __UINT16_MAX__ 65535 -// WEBASSEMBLY32-NEXT:#define __UINT16_TYPE__ unsigned short -// WEBASSEMBLY32-NEXT:#define __UINT32_C_SUFFIX__ U -// WEBASSEMBLY32-NEXT:#define __UINT32_FMTX__ "X" -// WEBASSEMBLY32-NEXT:#define __UINT32_FMTo__ "o" -// WEBASSEMBLY32-NEXT:#define __UINT32_FMTu__ "u" -// WEBASSEMBLY32-NEXT:#define __UINT32_FMTx__ "x" -// WEBASSEMBLY32-NEXT:#define __UINT32_MAX__ 4294967295U -// WEBASSEMBLY32-NEXT:#define __UINT32_TYPE__ unsigned int -// WEBASSEMBLY32-NEXT:#define __UINT64_C_SUFFIX__ ULL -// WEBASSEMBLY32-NEXT:#define __UINT64_FMTX__ "llX" -// WEBASSEMBLY32-NEXT:#define __UINT64_FMTo__ "llo" -// WEBASSEMBLY32-NEXT:#define __UINT64_FMTu__ "llu" -// WEBASSEMBLY32-NEXT:#define __UINT64_FMTx__ "llx" -// WEBASSEMBLY32-NEXT:#define __UINT64_MAX__ 18446744073709551615ULL -// WEBASSEMBLY32-NEXT:#define __UINT64_TYPE__ long long unsigned int -// WEBASSEMBLY32-NEXT:#define __UINT8_C_SUFFIX__ -// WEBASSEMBLY32-NEXT:#define __UINT8_FMTX__ "hhX" -// WEBASSEMBLY32-NEXT:#define __UINT8_FMTo__ "hho" -// WEBASSEMBLY32-NEXT:#define __UINT8_FMTu__ "hhu" -// WEBASSEMBLY32-NEXT:#define __UINT8_FMTx__ "hhx" -// WEBASSEMBLY32-NEXT:#define __UINT8_MAX__ 255 -// WEBASSEMBLY32-NEXT:#define __UINT8_TYPE__ unsigned char -// WEBASSEMBLY32-NEXT:#define __UINTMAX_C_SUFFIX__ ULL -// WEBASSEMBLY32-NEXT:#define __UINTMAX_FMTX__ "llX" -// WEBASSEMBLY32-NEXT:#define __UINTMAX_FMTo__ "llo" -// WEBASSEMBLY32-NEXT:#define __UINTMAX_FMTu__ "llu" -// WEBASSEMBLY32-NEXT:#define __UINTMAX_FMTx__ "llx" -// WEBASSEMBLY32-NEXT:#define __UINTMAX_MAX__ 18446744073709551615ULL -// WEBASSEMBLY32-NEXT:#define __UINTMAX_TYPE__ long long unsigned int -// WEBASSEMBLY32-NEXT:#define __UINTMAX_WIDTH__ 64 -// WEBASSEMBLY32-NEXT:#define __UINTPTR_FMTX__ "lX" -// WEBASSEMBLY32-NEXT:#define __UINTPTR_FMTo__ "lo" -// WEBASSEMBLY32-NEXT:#define __UINTPTR_FMTu__ "lu" -// WEBASSEMBLY32-NEXT:#define __UINTPTR_FMTx__ "lx" -// WEBASSEMBLY32-NEXT:#define __UINTPTR_MAX__ 4294967295UL -// WEBASSEMBLY32-NEXT:#define __UINTPTR_TYPE__ long unsigned int -// WEBASSEMBLY32-NEXT:#define __UINTPTR_WIDTH__ 32 -// WEBASSEMBLY32-NEXT:#define __UINT_FAST16_FMTX__ "hX" -// WEBASSEMBLY32-NEXT:#define __UINT_FAST16_FMTo__ "ho" -// WEBASSEMBLY32-NEXT:#define __UINT_FAST16_FMTu__ "hu" -// WEBASSEMBLY32-NEXT:#define __UINT_FAST16_FMTx__ "hx" -// WEBASSEMBLY32-NEXT:#define __UINT_FAST16_MAX__ 65535 -// WEBASSEMBLY32-NEXT:#define __UINT_FAST16_TYPE__ unsigned short -// WEBASSEMBLY32-NEXT:#define __UINT_FAST32_FMTX__ "X" -// WEBASSEMBLY32-NEXT:#define __UINT_FAST32_FMTo__ "o" -// WEBASSEMBLY32-NEXT:#define __UINT_FAST32_FMTu__ "u" -// WEBASSEMBLY32-NEXT:#define __UINT_FAST32_FMTx__ "x" -// WEBASSEMBLY32-NEXT:#define __UINT_FAST32_MAX__ 4294967295U -// WEBASSEMBLY32-NEXT:#define __UINT_FAST32_TYPE__ unsigned int -// WEBASSEMBLY32-NEXT:#define __UINT_FAST64_FMTX__ "llX" -// WEBASSEMBLY32-NEXT:#define __UINT_FAST64_FMTo__ "llo" -// WEBASSEMBLY32-NEXT:#define __UINT_FAST64_FMTu__ "llu" -// WEBASSEMBLY32-NEXT:#define __UINT_FAST64_FMTx__ "llx" -// WEBASSEMBLY32-NEXT:#define __UINT_FAST64_MAX__ 18446744073709551615ULL -// WEBASSEMBLY32-NEXT:#define __UINT_FAST64_TYPE__ long long unsigned int -// WEBASSEMBLY32-NEXT:#define __UINT_FAST8_FMTX__ "hhX" -// WEBASSEMBLY32-NEXT:#define __UINT_FAST8_FMTo__ "hho" -// WEBASSEMBLY32-NEXT:#define __UINT_FAST8_FMTu__ "hhu" -// WEBASSEMBLY32-NEXT:#define __UINT_FAST8_FMTx__ "hhx" -// WEBASSEMBLY32-NEXT:#define __UINT_FAST8_MAX__ 255 -// WEBASSEMBLY32-NEXT:#define __UINT_FAST8_TYPE__ unsigned char -// WEBASSEMBLY32-NEXT:#define __UINT_LEAST16_FMTX__ "hX" -// WEBASSEMBLY32-NEXT:#define __UINT_LEAST16_FMTo__ "ho" -// WEBASSEMBLY32-NEXT:#define __UINT_LEAST16_FMTu__ "hu" -// WEBASSEMBLY32-NEXT:#define __UINT_LEAST16_FMTx__ "hx" -// WEBASSEMBLY32-NEXT:#define __UINT_LEAST16_MAX__ 65535 -// WEBASSEMBLY32-NEXT:#define __UINT_LEAST16_TYPE__ unsigned short -// WEBASSEMBLY32-NEXT:#define __UINT_LEAST32_FMTX__ "X" -// WEBASSEMBLY32-NEXT:#define __UINT_LEAST32_FMTo__ "o" -// WEBASSEMBLY32-NEXT:#define __UINT_LEAST32_FMTu__ "u" -// WEBASSEMBLY32-NEXT:#define __UINT_LEAST32_FMTx__ "x" -// WEBASSEMBLY32-NEXT:#define __UINT_LEAST32_MAX__ 4294967295U -// WEBASSEMBLY32-NEXT:#define __UINT_LEAST32_TYPE__ unsigned int -// WEBASSEMBLY32-NEXT:#define __UINT_LEAST64_FMTX__ "llX" -// WEBASSEMBLY32-NEXT:#define __UINT_LEAST64_FMTo__ "llo" -// WEBASSEMBLY32-NEXT:#define __UINT_LEAST64_FMTu__ "llu" -// WEBASSEMBLY32-NEXT:#define __UINT_LEAST64_FMTx__ "llx" -// WEBASSEMBLY32-NEXT:#define __UINT_LEAST64_MAX__ 18446744073709551615ULL -// WEBASSEMBLY32-NEXT:#define __UINT_LEAST64_TYPE__ long long unsigned int -// WEBASSEMBLY32-NEXT:#define __UINT_LEAST8_FMTX__ "hhX" -// WEBASSEMBLY32-NEXT:#define __UINT_LEAST8_FMTo__ "hho" -// WEBASSEMBLY32-NEXT:#define __UINT_LEAST8_FMTu__ "hhu" -// WEBASSEMBLY32-NEXT:#define __UINT_LEAST8_FMTx__ "hhx" -// WEBASSEMBLY32-NEXT:#define __UINT_LEAST8_MAX__ 255 -// WEBASSEMBLY32-NEXT:#define __UINT_LEAST8_TYPE__ unsigned char -// WEBASSEMBLY32-NEXT:#define __USER_LABEL_PREFIX__ -// WEBASSEMBLY32-NEXT:#define __VERSION__ "{{.*}}" -// WEBASSEMBLY32-NEXT:#define __WCHAR_MAX__ 2147483647 -// WEBASSEMBLY32-NEXT:#define __WCHAR_TYPE__ int -// WEBASSEMBLY32-NOT:#define __WCHAR_UNSIGNED__ -// WEBASSEMBLY32-NEXT:#define __WCHAR_WIDTH__ 32 -// WEBASSEMBLY32-NEXT:#define __WINT_MAX__ 2147483647 -// WEBASSEMBLY32-NEXT:#define __WINT_TYPE__ int -// WEBASSEMBLY32-NOT:#define __WINT_UNSIGNED__ -// WEBASSEMBLY32-NEXT:#define __WINT_WIDTH__ 32 -// WEBASSEMBLY32-NEXT:#define __clang__ 1 -// WEBASSEMBLY32-NEXT:#define __clang_major__ {{.*}} -// WEBASSEMBLY32-NEXT:#define __clang_minor__ {{.*}} -// WEBASSEMBLY32-NEXT:#define __clang_patchlevel__ {{.*}} -// WEBASSEMBLY32-NEXT:#define __clang_version__ "{{.*}}" -// WEBASSEMBLY32-NEXT:#define __llvm__ 1 -// WEBASSEMBLY32-NOT:#define __wasm_simd128__ -// WEBASSEMBLY32-NOT:#define __wasm_simd256__ -// WEBASSEMBLY32-NOT:#define __wasm_simd512__ -// WEBASSEMBLY32-NOT:#define __unix -// WEBASSEMBLY32-NOT:#define __unix__ -// WEBASSEMBLY32-NEXT:#define __wasm 1 -// WEBASSEMBLY32-NEXT:#define __wasm32 1 -// WEBASSEMBLY32-NEXT:#define __wasm32__ 1 -// WEBASSEMBLY32-NOT:#define __wasm64 -// WEBASSEMBLY32-NOT:#define __wasm64__ -// WEBASSEMBLY32-NEXT:#define __wasm__ 1 -// -// RUN: %clang_cc1 -E -dM -ffreestanding -triple=wasm64-unknown-unknown \ -// RUN: < /dev/null \ -// RUN: | FileCheck -match-full-lines -check-prefix=WEBASSEMBLY64 %s -// -// WEBASSEMBLY64-NOT:#define _ILP32 -// WEBASSEMBLY64:#define _LP64 1 -// WEBASSEMBLY64-NEXT:#define __ATOMIC_ACQUIRE 2 -// WEBASSEMBLY64-NEXT:#define __ATOMIC_ACQ_REL 4 -// WEBASSEMBLY64-NEXT:#define __ATOMIC_CONSUME 1 -// WEBASSEMBLY64-NEXT:#define __ATOMIC_RELAXED 0 -// WEBASSEMBLY64-NEXT:#define __ATOMIC_RELEASE 3 -// WEBASSEMBLY64-NEXT:#define __ATOMIC_SEQ_CST 5 -// WEBASSEMBLY64-NEXT:#define __BIGGEST_ALIGNMENT__ 16 -// WEBASSEMBLY64-NEXT:#define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__ -// WEBASSEMBLY64-NEXT:#define __CHAR16_TYPE__ unsigned short -// WEBASSEMBLY64-NEXT:#define __CHAR32_TYPE__ unsigned int -// WEBASSEMBLY64-NEXT:#define __CHAR_BIT__ 8 -// WEBASSEMBLY64-NOT:#define __CHAR_UNSIGNED__ -// WEBASSEMBLY64-NEXT:#define __CLANG_ATOMIC_BOOL_LOCK_FREE 2 -// WEBASSEMBLY64-NEXT:#define __CLANG_ATOMIC_CHAR16_T_LOCK_FREE 2 -// WEBASSEMBLY64-NEXT:#define __CLANG_ATOMIC_CHAR32_T_LOCK_FREE 2 -// WEBASSEMBLY64-NEXT:#define __CLANG_ATOMIC_CHAR_LOCK_FREE 2 -// WEBASSEMBLY64-NEXT:#define __CLANG_ATOMIC_INT_LOCK_FREE 2 -// WEBASSEMBLY64-NEXT:#define __CLANG_ATOMIC_LLONG_LOCK_FREE 2 -// WEBASSEMBLY64-NEXT:#define __CLANG_ATOMIC_LONG_LOCK_FREE 2 -// WEBASSEMBLY64-NEXT:#define __CLANG_ATOMIC_POINTER_LOCK_FREE 2 -// WEBASSEMBLY64-NEXT:#define __CLANG_ATOMIC_SHORT_LOCK_FREE 2 -// WEBASSEMBLY64-NEXT:#define __CLANG_ATOMIC_WCHAR_T_LOCK_FREE 2 -// WEBASSEMBLY64-NEXT:#define __CONSTANT_CFSTRINGS__ 1 -// WEBASSEMBLY64-NEXT:#define __DBL_DECIMAL_DIG__ 17 -// WEBASSEMBLY64-NEXT:#define __DBL_DENORM_MIN__ 4.9406564584124654e-324 -// WEBASSEMBLY64-NEXT:#define __DBL_DIG__ 15 -// WEBASSEMBLY64-NEXT:#define __DBL_EPSILON__ 2.2204460492503131e-16 -// WEBASSEMBLY64-NEXT:#define __DBL_HAS_DENORM__ 1 -// WEBASSEMBLY64-NEXT:#define __DBL_HAS_INFINITY__ 1 -// WEBASSEMBLY64-NEXT:#define __DBL_HAS_QUIET_NAN__ 1 -// WEBASSEMBLY64-NEXT:#define __DBL_MANT_DIG__ 53 -// WEBASSEMBLY64-NEXT:#define __DBL_MAX_10_EXP__ 308 -// WEBASSEMBLY64-NEXT:#define __DBL_MAX_EXP__ 1024 -// WEBASSEMBLY64-NEXT:#define __DBL_MAX__ 1.7976931348623157e+308 -// WEBASSEMBLY64-NEXT:#define __DBL_MIN_10_EXP__ (-307) -// WEBASSEMBLY64-NEXT:#define __DBL_MIN_EXP__ (-1021) -// WEBASSEMBLY64-NEXT:#define __DBL_MIN__ 2.2250738585072014e-308 -// WEBASSEMBLY64-NEXT:#define __DECIMAL_DIG__ __LDBL_DECIMAL_DIG__ -// WEBASSEMBLY64-NOT:#define __ELF__ -// WEBASSEMBLY64-NEXT:#define __FINITE_MATH_ONLY__ 0 -// WEBASSEMBLY64:#define __FLT_DECIMAL_DIG__ 9 -// WEBASSEMBLY64-NEXT:#define __FLT_DENORM_MIN__ 1.40129846e-45F -// WEBASSEMBLY64-NEXT:#define __FLT_DIG__ 6 -// WEBASSEMBLY64-NEXT:#define __FLT_EPSILON__ 1.19209290e-7F -// WEBASSEMBLY64-NEXT:#define __FLT_EVAL_METHOD__ 0 -// WEBASSEMBLY64-NEXT:#define __FLT_HAS_DENORM__ 1 -// WEBASSEMBLY64-NEXT:#define __FLT_HAS_INFINITY__ 1 -// WEBASSEMBLY64-NEXT:#define __FLT_HAS_QUIET_NAN__ 1 -// WEBASSEMBLY64-NEXT:#define __FLT_MANT_DIG__ 24 -// WEBASSEMBLY64-NEXT:#define __FLT_MAX_10_EXP__ 38 -// WEBASSEMBLY64-NEXT:#define __FLT_MAX_EXP__ 128 -// WEBASSEMBLY64-NEXT:#define __FLT_MAX__ 3.40282347e+38F -// WEBASSEMBLY64-NEXT:#define __FLT_MIN_10_EXP__ (-37) -// WEBASSEMBLY64-NEXT:#define __FLT_MIN_EXP__ (-125) -// WEBASSEMBLY64-NEXT:#define __FLT_MIN__ 1.17549435e-38F -// WEBASSEMBLY64-NEXT:#define __FLT_RADIX__ 2 -// WEBASSEMBLY64-NEXT:#define __GCC_ATOMIC_BOOL_LOCK_FREE 2 -// WEBASSEMBLY64-NEXT:#define __GCC_ATOMIC_CHAR16_T_LOCK_FREE 2 -// WEBASSEMBLY64-NEXT:#define __GCC_ATOMIC_CHAR32_T_LOCK_FREE 2 -// WEBASSEMBLY64-NEXT:#define __GCC_ATOMIC_CHAR_LOCK_FREE 2 -// WEBASSEMBLY64-NEXT:#define __GCC_ATOMIC_INT_LOCK_FREE 2 -// WEBASSEMBLY64-NEXT:#define __GCC_ATOMIC_LLONG_LOCK_FREE 2 -// WEBASSEMBLY64-NEXT:#define __GCC_ATOMIC_LONG_LOCK_FREE 2 -// WEBASSEMBLY64-NEXT:#define __GCC_ATOMIC_POINTER_LOCK_FREE 2 -// WEBASSEMBLY64-NEXT:#define __GCC_ATOMIC_SHORT_LOCK_FREE 2 -// WEBASSEMBLY64-NEXT:#define __GCC_ATOMIC_TEST_AND_SET_TRUEVAL 1 -// WEBASSEMBLY64-NEXT:#define __GCC_ATOMIC_WCHAR_T_LOCK_FREE 2 -// WEBASSEMBLY64-NEXT:#define __GNUC_MINOR__ {{.*}} -// WEBASSEMBLY64-NEXT:#define __GNUC_PATCHLEVEL__ {{.*}} -// WEBASSEMBLY64-NEXT:#define __GNUC_STDC_INLINE__ 1 -// WEBASSEMBLY64-NEXT:#define __GNUC__ {{.}} -// WEBASSEMBLY64-NEXT:#define __GXX_ABI_VERSION 1002 -// WEBASSEMBLY64-NOT:#define __ILP32__ -// WEBASSEMBLY64-NEXT:#define __INT16_C_SUFFIX__ -// WEBASSEMBLY64-NEXT:#define __INT16_FMTd__ "hd" -// WEBASSEMBLY64-NEXT:#define __INT16_FMTi__ "hi" -// WEBASSEMBLY64-NEXT:#define __INT16_MAX__ 32767 -// WEBASSEMBLY64-NEXT:#define __INT16_TYPE__ short -// WEBASSEMBLY64-NEXT:#define __INT32_C_SUFFIX__ -// WEBASSEMBLY64-NEXT:#define __INT32_FMTd__ "d" -// WEBASSEMBLY64-NEXT:#define __INT32_FMTi__ "i" -// WEBASSEMBLY64-NEXT:#define __INT32_MAX__ 2147483647 -// WEBASSEMBLY64-NEXT:#define __INT32_TYPE__ int -// WEBASSEMBLY64-NEXT:#define __INT64_C_SUFFIX__ LL -// WEBASSEMBLY64-NEXT:#define __INT64_FMTd__ "lld" -// WEBASSEMBLY64-NEXT:#define __INT64_FMTi__ "lli" -// WEBASSEMBLY64-NEXT:#define __INT64_MAX__ 9223372036854775807LL -// WEBASSEMBLY64-NEXT:#define __INT64_TYPE__ long long int -// WEBASSEMBLY64-NEXT:#define __INT8_C_SUFFIX__ -// WEBASSEMBLY64-NEXT:#define __INT8_FMTd__ "hhd" -// WEBASSEMBLY64-NEXT:#define __INT8_FMTi__ "hhi" -// WEBASSEMBLY64-NEXT:#define __INT8_MAX__ 127 -// WEBASSEMBLY64-NEXT:#define __INT8_TYPE__ signed char -// WEBASSEMBLY64-NEXT:#define __INTMAX_C_SUFFIX__ LL -// WEBASSEMBLY64-NEXT:#define __INTMAX_FMTd__ "lld" -// WEBASSEMBLY64-NEXT:#define __INTMAX_FMTi__ "lli" -// WEBASSEMBLY64-NEXT:#define __INTMAX_MAX__ 9223372036854775807LL -// WEBASSEMBLY64-NEXT:#define __INTMAX_TYPE__ long long int -// WEBASSEMBLY64-NEXT:#define __INTMAX_WIDTH__ 64 -// WEBASSEMBLY64-NEXT:#define __INTPTR_FMTd__ "ld" -// WEBASSEMBLY64-NEXT:#define __INTPTR_FMTi__ "li" -// WEBASSEMBLY64-NEXT:#define __INTPTR_MAX__ 9223372036854775807L -// WEBASSEMBLY64-NEXT:#define __INTPTR_TYPE__ long int -// WEBASSEMBLY64-NEXT:#define __INTPTR_WIDTH__ 64 -// WEBASSEMBLY64-NEXT:#define __INT_FAST16_FMTd__ "hd" -// WEBASSEMBLY64-NEXT:#define __INT_FAST16_FMTi__ "hi" -// WEBASSEMBLY64-NEXT:#define __INT_FAST16_MAX__ 32767 -// WEBASSEMBLY64-NEXT:#define __INT_FAST16_TYPE__ short -// WEBASSEMBLY64-NEXT:#define __INT_FAST32_FMTd__ "d" -// WEBASSEMBLY64-NEXT:#define __INT_FAST32_FMTi__ "i" -// WEBASSEMBLY64-NEXT:#define __INT_FAST32_MAX__ 2147483647 -// WEBASSEMBLY64-NEXT:#define __INT_FAST32_TYPE__ int -// WEBASSEMBLY64-NEXT:#define __INT_FAST64_FMTd__ "lld" -// WEBASSEMBLY64-NEXT:#define __INT_FAST64_FMTi__ "lli" -// WEBASSEMBLY64-NEXT:#define __INT_FAST64_MAX__ 9223372036854775807LL -// WEBASSEMBLY64-NEXT:#define __INT_FAST64_TYPE__ long long int -// WEBASSEMBLY64-NEXT:#define __INT_FAST8_FMTd__ "hhd" -// WEBASSEMBLY64-NEXT:#define __INT_FAST8_FMTi__ "hhi" -// WEBASSEMBLY64-NEXT:#define __INT_FAST8_MAX__ 127 -// WEBASSEMBLY64-NEXT:#define __INT_FAST8_TYPE__ signed char -// WEBASSEMBLY64-NEXT:#define __INT_LEAST16_FMTd__ "hd" -// WEBASSEMBLY64-NEXT:#define __INT_LEAST16_FMTi__ "hi" -// WEBASSEMBLY64-NEXT:#define __INT_LEAST16_MAX__ 32767 -// WEBASSEMBLY64-NEXT:#define __INT_LEAST16_TYPE__ short -// WEBASSEMBLY64-NEXT:#define __INT_LEAST32_FMTd__ "d" -// WEBASSEMBLY64-NEXT:#define __INT_LEAST32_FMTi__ "i" -// WEBASSEMBLY64-NEXT:#define __INT_LEAST32_MAX__ 2147483647 -// WEBASSEMBLY64-NEXT:#define __INT_LEAST32_TYPE__ int -// WEBASSEMBLY64-NEXT:#define __INT_LEAST64_FMTd__ "lld" -// WEBASSEMBLY64-NEXT:#define __INT_LEAST64_FMTi__ "lli" -// WEBASSEMBLY64-NEXT:#define __INT_LEAST64_MAX__ 9223372036854775807LL -// WEBASSEMBLY64-NEXT:#define __INT_LEAST64_TYPE__ long long int -// WEBASSEMBLY64-NEXT:#define __INT_LEAST8_FMTd__ "hhd" -// WEBASSEMBLY64-NEXT:#define __INT_LEAST8_FMTi__ "hhi" -// WEBASSEMBLY64-NEXT:#define __INT_LEAST8_MAX__ 127 -// WEBASSEMBLY64-NEXT:#define __INT_LEAST8_TYPE__ signed char -// WEBASSEMBLY64-NEXT:#define __INT_MAX__ 2147483647 -// WEBASSEMBLY64-NEXT:#define __LDBL_DECIMAL_DIG__ 36 -// WEBASSEMBLY64-NEXT:#define __LDBL_DENORM_MIN__ 6.47517511943802511092443895822764655e-4966L -// WEBASSEMBLY64-NEXT:#define __LDBL_DIG__ 33 -// WEBASSEMBLY64-NEXT:#define __LDBL_EPSILON__ 1.92592994438723585305597794258492732e-34L -// WEBASSEMBLY64-NEXT:#define __LDBL_HAS_DENORM__ 1 -// WEBASSEMBLY64-NEXT:#define __LDBL_HAS_INFINITY__ 1 -// WEBASSEMBLY64-NEXT:#define __LDBL_HAS_QUIET_NAN__ 1 -// WEBASSEMBLY64-NEXT:#define __LDBL_MANT_DIG__ 113 -// WEBASSEMBLY64-NEXT:#define __LDBL_MAX_10_EXP__ 4932 -// WEBASSEMBLY64-NEXT:#define __LDBL_MAX_EXP__ 16384 -// WEBASSEMBLY64-NEXT:#define __LDBL_MAX__ 1.18973149535723176508575932662800702e+4932L -// WEBASSEMBLY64-NEXT:#define __LDBL_MIN_10_EXP__ (-4931) -// WEBASSEMBLY64-NEXT:#define __LDBL_MIN_EXP__ (-16381) -// WEBASSEMBLY64-NEXT:#define __LDBL_MIN__ 3.36210314311209350626267781732175260e-4932L -// WEBASSEMBLY64-NEXT:#define __LITTLE_ENDIAN__ 1 -// WEBASSEMBLY64-NEXT:#define __LONG_LONG_MAX__ 9223372036854775807LL -// WEBASSEMBLY64-NEXT:#define __LONG_MAX__ 9223372036854775807L -// WEBASSEMBLY64-NEXT:#define __LP64__ 1 -// WEBASSEMBLY64-NEXT:#define __NO_INLINE__ 1 -// WEBASSEMBLY64-NEXT:#define __OBJC_BOOL_IS_BOOL 0 -// WEBASSEMBLY64-NEXT:#define __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES 3 -// WEBASSEMBLY64-NEXT:#define __OPENCL_MEMORY_SCOPE_DEVICE 2 -// WEBASSEMBLY64-NEXT:#define __OPENCL_MEMORY_SCOPE_SUB_GROUP 4 -// WEBASSEMBLY64-NEXT:#define __OPENCL_MEMORY_SCOPE_WORK_GROUP 1 -// WEBASSEMBLY64-NEXT:#define __OPENCL_MEMORY_SCOPE_WORK_ITEM 0 -// WEBASSEMBLY64-NEXT:#define __ORDER_BIG_ENDIAN__ 4321 -// WEBASSEMBLY64-NEXT:#define __ORDER_LITTLE_ENDIAN__ 1234 -// WEBASSEMBLY64-NEXT:#define __ORDER_PDP_ENDIAN__ 3412 -// WEBASSEMBLY64-NEXT:#define __POINTER_WIDTH__ 64 -// WEBASSEMBLY64-NEXT:#define __PRAGMA_REDEFINE_EXTNAME 1 -// WEBASSEMBLY64-NEXT:#define __PTRDIFF_FMTd__ "ld" -// WEBASSEMBLY64-NEXT:#define __PTRDIFF_FMTi__ "li" -// WEBASSEMBLY64-NEXT:#define __PTRDIFF_MAX__ 9223372036854775807L -// WEBASSEMBLY64-NEXT:#define __PTRDIFF_TYPE__ long int -// WEBASSEMBLY64-NEXT:#define __PTRDIFF_WIDTH__ 64 -// WEBASSEMBLY64-NOT:#define __REGISTER_PREFIX__ -// WEBASSEMBLY64-NEXT:#define __SCHAR_MAX__ 127 -// WEBASSEMBLY64-NEXT:#define __SHRT_MAX__ 32767 -// WEBASSEMBLY64-NEXT:#define __SIG_ATOMIC_MAX__ 9223372036854775807L -// WEBASSEMBLY64-NEXT:#define __SIG_ATOMIC_WIDTH__ 64 -// WEBASSEMBLY64-NEXT:#define __SIZEOF_DOUBLE__ 8 -// WEBASSEMBLY64-NEXT:#define __SIZEOF_FLOAT__ 4 -// WEBASSEMBLY64-NEXT:#define __SIZEOF_INT128__ 16 -// WEBASSEMBLY64-NEXT:#define __SIZEOF_INT__ 4 -// WEBASSEMBLY64-NEXT:#define __SIZEOF_LONG_DOUBLE__ 16 -// WEBASSEMBLY64-NEXT:#define __SIZEOF_LONG_LONG__ 8 // WEBASSEMBLY64-NEXT:#define __SIZEOF_LONG__ 8 // WEBASSEMBLY64-NEXT:#define __SIZEOF_POINTER__ 8 // WEBASSEMBLY64-NEXT:#define __SIZEOF_PTRDIFF_T__ 8 -// WEBASSEMBLY64-NEXT:#define __SIZEOF_SHORT__ 2 +// WEBASSEMBLY-NEXT:#define __SIZEOF_SHORT__ 2 +// WEBASSEMBLY32-NEXT:#define __SIZEOF_SIZE_T__ 4 // WEBASSEMBLY64-NEXT:#define __SIZEOF_SIZE_T__ 8 -// WEBASSEMBLY64-NEXT:#define __SIZEOF_WCHAR_T__ 4 -// WEBASSEMBLY64-NEXT:#define __SIZEOF_WINT_T__ 4 -// WEBASSEMBLY64-NEXT:#define __SIZE_FMTX__ "lX" -// WEBASSEMBLY64-NEXT:#define __SIZE_FMTo__ "lo" -// WEBASSEMBLY64-NEXT:#define __SIZE_FMTu__ "lu" -// WEBASSEMBLY64-NEXT:#define __SIZE_FMTx__ "lx" +// WEBASSEMBLY-NEXT:#define __SIZEOF_WCHAR_T__ 4 +// WEBASSEMBLY-NEXT:#define __SIZEOF_WINT_T__ 4 +// WEBASSEMBLY-NEXT:#define __SIZE_FMTX__ "lX" +// WEBASSEMBLY-NEXT:#define __SIZE_FMTo__ "lo" +// WEBASSEMBLY-NEXT:#define __SIZE_FMTu__ "lu" +// WEBASSEMBLY-NEXT:#define __SIZE_FMTx__ "lx" +// WEBASSEMBLY32-NEXT:#define __SIZE_MAX__ 4294967295UL // WEBASSEMBLY64-NEXT:#define __SIZE_MAX__ 18446744073709551615UL -// WEBASSEMBLY64-NEXT:#define __SIZE_TYPE__ long unsigned int +// WEBASSEMBLY-NEXT:#define __SIZE_TYPE__ long unsigned int +// WEBASSEMBLY32-NEXT:#define __SIZE_WIDTH__ 32 // WEBASSEMBLY64-NEXT:#define __SIZE_WIDTH__ 64 -// WEBASSEMBLY64-NEXT:#define __STDC_HOSTED__ 0 -// WEBASSEMBLY64-NOT:#define __STDC_MB_MIGHT_NEQ_WC__ -// WEBASSEMBLY64-NOT:#define __STDC_NO_ATOMICS__ -// WEBASSEMBLY64-NOT:#define __STDC_NO_COMPLEX__ -// WEBASSEMBLY64-NOT:#define __STDC_NO_VLA__ -// WEBASSEMBLY64-NOT:#define __STDC_NO_THREADS__ -// WEBASSEMBLY64-NEXT:#define __STDC_UTF_16__ 1 -// WEBASSEMBLY64-NEXT:#define __STDC_UTF_32__ 1 -// WEBASSEMBLY64-NEXT:#define __STDC_VERSION__ 201112L -// WEBASSEMBLY64-NEXT:#define __STDC__ 1 -// WEBASSEMBLY64-NEXT:#define __UINT16_C_SUFFIX__ -// WEBASSEMBLY64-NEXT:#define __UINT16_FMTX__ "hX" -// WEBASSEMBLY64-NEXT:#define __UINT16_FMTo__ "ho" -// WEBASSEMBLY64-NEXT:#define __UINT16_FMTu__ "hu" -// WEBASSEMBLY64-NEXT:#define __UINT16_FMTx__ "hx" -// WEBASSEMBLY64-NEXT:#define __UINT16_MAX__ 65535 -// WEBASSEMBLY64-NEXT:#define __UINT16_TYPE__ unsigned short -// WEBASSEMBLY64-NEXT:#define __UINT32_C_SUFFIX__ U -// WEBASSEMBLY64-NEXT:#define __UINT32_FMTX__ "X" -// WEBASSEMBLY64-NEXT:#define __UINT32_FMTo__ "o" -// WEBASSEMBLY64-NEXT:#define __UINT32_FMTu__ "u" -// WEBASSEMBLY64-NEXT:#define __UINT32_FMTx__ "x" -// WEBASSEMBLY64-NEXT:#define __UINT32_MAX__ 4294967295U -// WEBASSEMBLY64-NEXT:#define __UINT32_TYPE__ unsigned int -// WEBASSEMBLY64-NEXT:#define __UINT64_C_SUFFIX__ ULL -// WEBASSEMBLY64-NEXT:#define __UINT64_FMTX__ "llX" -// WEBASSEMBLY64-NEXT:#define __UINT64_FMTo__ "llo" -// WEBASSEMBLY64-NEXT:#define __UINT64_FMTu__ "llu" -// WEBASSEMBLY64-NEXT:#define __UINT64_FMTx__ "llx" -// WEBASSEMBLY64-NEXT:#define __UINT64_MAX__ 18446744073709551615ULL -// WEBASSEMBLY64-NEXT:#define __UINT64_TYPE__ long long unsigned int -// WEBASSEMBLY64-NEXT:#define __UINT8_C_SUFFIX__ -// WEBASSEMBLY64-NEXT:#define __UINT8_FMTX__ "hhX" -// WEBASSEMBLY64-NEXT:#define __UINT8_FMTo__ "hho" -// WEBASSEMBLY64-NEXT:#define __UINT8_FMTu__ "hhu" -// WEBASSEMBLY64-NEXT:#define __UINT8_FMTx__ "hhx" -// WEBASSEMBLY64-NEXT:#define __UINT8_MAX__ 255 -// WEBASSEMBLY64-NEXT:#define __UINT8_TYPE__ unsigned char -// WEBASSEMBLY64-NEXT:#define __UINTMAX_C_SUFFIX__ ULL -// WEBASSEMBLY64-NEXT:#define __UINTMAX_FMTX__ "llX" -// WEBASSEMBLY64-NEXT:#define __UINTMAX_FMTo__ "llo" -// WEBASSEMBLY64-NEXT:#define __UINTMAX_FMTu__ "llu" -// WEBASSEMBLY64-NEXT:#define __UINTMAX_FMTx__ "llx" -// WEBASSEMBLY64-NEXT:#define __UINTMAX_MAX__ 18446744073709551615ULL -// WEBASSEMBLY64-NEXT:#define __UINTMAX_TYPE__ long long unsigned int -// WEBASSEMBLY64-NEXT:#define __UINTMAX_WIDTH__ 64 -// WEBASSEMBLY64-NEXT:#define __UINTPTR_FMTX__ "lX" -// WEBASSEMBLY64-NEXT:#define __UINTPTR_FMTo__ "lo" -// WEBASSEMBLY64-NEXT:#define __UINTPTR_FMTu__ "lu" -// WEBASSEMBLY64-NEXT:#define __UINTPTR_FMTx__ "lx" +// WEBASSEMBLY-NEXT:#define __STDC_HOSTED__ 0 +// WEBASSEMBLY-NOT:#define __STDC_MB_MIGHT_NEQ_WC__ +// WEBASSEMBLY-NOT:#define __STDC_NO_ATOMICS__ +// WEBASSEMBLY-NOT:#define __STDC_NO_COMPLEX__ +// WEBASSEMBLY-NOT:#define __STDC_NO_VLA__ +// WEBASSEMBLY-NOT:#define __STDC_NO_THREADS__ +// WEBASSEMBLY-NEXT:#define __STDC_UTF_16__ 1 +// WEBASSEMBLY-NEXT:#define __STDC_UTF_32__ 1 +// WEBASSEMBLY-NEXT:#define __STDC_VERSION__ 201112L +// WEBASSEMBLY-NEXT:#define __STDC__ 1 +// WEBASSEMBLY-NEXT:#define __UINT16_C_SUFFIX__ +// WEBASSEMBLY-NEXT:#define __UINT16_FMTX__ "hX" +// WEBASSEMBLY-NEXT:#define __UINT16_FMTo__ "ho" +// WEBASSEMBLY-NEXT:#define __UINT16_FMTu__ "hu" +// WEBASSEMBLY-NEXT:#define __UINT16_FMTx__ "hx" +// WEBASSEMBLY-NEXT:#define __UINT16_MAX__ 65535 +// WEBASSEMBLY-NEXT:#define __UINT16_TYPE__ unsigned short +// WEBASSEMBLY-NEXT:#define __UINT32_C_SUFFIX__ U +// WEBASSEMBLY-NEXT:#define __UINT32_FMTX__ "X" +// WEBASSEMBLY-NEXT:#define __UINT32_FMTo__ "o" +// WEBASSEMBLY-NEXT:#define __UINT32_FMTu__ "u" +// WEBASSEMBLY-NEXT:#define __UINT32_FMTx__ "x" +// WEBASSEMBLY-NEXT:#define __UINT32_MAX__ 4294967295U +// WEBASSEMBLY-NEXT:#define __UINT32_TYPE__ unsigned int +// WEBASSEMBLY-NEXT:#define __UINT64_C_SUFFIX__ ULL +// WEBASSEMBLY-NEXT:#define __UINT64_FMTX__ "llX" +// WEBASSEMBLY-NEXT:#define __UINT64_FMTo__ "llo" +// WEBASSEMBLY-NEXT:#define __UINT64_FMTu__ "llu" +// WEBASSEMBLY-NEXT:#define __UINT64_FMTx__ "llx" +// WEBASSEMBLY-NEXT:#define __UINT64_MAX__ 18446744073709551615ULL +// WEBASSEMBLY-NEXT:#define __UINT64_TYPE__ long long unsigned int +// WEBASSEMBLY-NEXT:#define __UINT8_C_SUFFIX__ +// WEBASSEMBLY-NEXT:#define __UINT8_FMTX__ "hhX" +// WEBASSEMBLY-NEXT:#define __UINT8_FMTo__ "hho" +// WEBASSEMBLY-NEXT:#define __UINT8_FMTu__ "hhu" +// WEBASSEMBLY-NEXT:#define __UINT8_FMTx__ "hhx" +// WEBASSEMBLY-NEXT:#define __UINT8_MAX__ 255 +// WEBASSEMBLY-NEXT:#define __UINT8_TYPE__ unsigned char +// WEBASSEMBLY-NEXT:#define __UINTMAX_C_SUFFIX__ ULL +// WEBASSEMBLY-NEXT:#define __UINTMAX_FMTX__ "llX" +// WEBASSEMBLY-NEXT:#define __UINTMAX_FMTo__ "llo" +// WEBASSEMBLY-NEXT:#define __UINTMAX_FMTu__ "llu" +// WEBASSEMBLY-NEXT:#define __UINTMAX_FMTx__ "llx" +// WEBASSEMBLY-NEXT:#define __UINTMAX_MAX__ 18446744073709551615ULL +// WEBASSEMBLY-NEXT:#define __UINTMAX_TYPE__ long long unsigned int +// WEBASSEMBLY-NEXT:#define __UINTMAX_WIDTH__ 64 +// WEBASSEMBLY-NEXT:#define __UINTPTR_FMTX__ "lX" +// WEBASSEMBLY-NEXT:#define __UINTPTR_FMTo__ "lo" +// WEBASSEMBLY-NEXT:#define __UINTPTR_FMTu__ "lu" +// WEBASSEMBLY-NEXT:#define __UINTPTR_FMTx__ "lx" +// WEBASSEMBLY32-NEXT:#define __UINTPTR_MAX__ 4294967295UL // WEBASSEMBLY64-NEXT:#define __UINTPTR_MAX__ 18446744073709551615UL -// WEBASSEMBLY64-NEXT:#define __UINTPTR_TYPE__ long unsigned int +// WEBASSEMBLY-NEXT:#define __UINTPTR_TYPE__ long unsigned int +// WEBASSEMBLY32-NEXT:#define __UINTPTR_WIDTH__ 32 // WEBASSEMBLY64-NEXT:#define __UINTPTR_WIDTH__ 64 -// WEBASSEMBLY64-NEXT:#define __UINT_FAST16_FMTX__ "hX" -// WEBASSEMBLY64-NEXT:#define __UINT_FAST16_FMTo__ "ho" -// WEBASSEMBLY64-NEXT:#define __UINT_FAST16_FMTu__ "hu" -// WEBASSEMBLY64-NEXT:#define __UINT_FAST16_FMTx__ "hx" -// WEBASSEMBLY64-NEXT:#define __UINT_FAST16_MAX__ 65535 -// WEBASSEMBLY64-NEXT:#define __UINT_FAST16_TYPE__ unsigned short -// WEBASSEMBLY64-NEXT:#define __UINT_FAST32_FMTX__ "X" -// WEBASSEMBLY64-NEXT:#define __UINT_FAST32_FMTo__ "o" -// WEBASSEMBLY64-NEXT:#define __UINT_FAST32_FMTu__ "u" -// WEBASSEMBLY64-NEXT:#define __UINT_FAST32_FMTx__ "x" -// WEBASSEMBLY64-NEXT:#define __UINT_FAST32_MAX__ 4294967295U -// WEBASSEMBLY64-NEXT:#define __UINT_FAST32_TYPE__ unsigned int -// WEBASSEMBLY64-NEXT:#define __UINT_FAST64_FMTX__ "llX" -// WEBASSEMBLY64-NEXT:#define __UINT_FAST64_FMTo__ "llo" -// WEBASSEMBLY64-NEXT:#define __UINT_FAST64_FMTu__ "llu" -// WEBASSEMBLY64-NEXT:#define __UINT_FAST64_FMTx__ "llx" -// WEBASSEMBLY64-NEXT:#define __UINT_FAST64_MAX__ 18446744073709551615ULL -// WEBASSEMBLY64-NEXT:#define __UINT_FAST64_TYPE__ long long unsigned int -// WEBASSEMBLY64-NEXT:#define __UINT_FAST8_FMTX__ "hhX" -// WEBASSEMBLY64-NEXT:#define __UINT_FAST8_FMTo__ "hho" -// WEBASSEMBLY64-NEXT:#define __UINT_FAST8_FMTu__ "hhu" -// WEBASSEMBLY64-NEXT:#define __UINT_FAST8_FMTx__ "hhx" -// WEBASSEMBLY64-NEXT:#define __UINT_FAST8_MAX__ 255 -// WEBASSEMBLY64-NEXT:#define __UINT_FAST8_TYPE__ unsigned char -// WEBASSEMBLY64-NEXT:#define __UINT_LEAST16_FMTX__ "hX" -// WEBASSEMBLY64-NEXT:#define __UINT_LEAST16_FMTo__ "ho" -// WEBASSEMBLY64-NEXT:#define __UINT_LEAST16_FMTu__ "hu" -// WEBASSEMBLY64-NEXT:#define __UINT_LEAST16_FMTx__ "hx" -// WEBASSEMBLY64-NEXT:#define __UINT_LEAST16_MAX__ 65535 -// WEBASSEMBLY64-NEXT:#define __UINT_LEAST16_TYPE__ unsigned short -// WEBASSEMBLY64-NEXT:#define __UINT_LEAST32_FMTX__ "X" -// WEBASSEMBLY64-NEXT:#define __UINT_LEAST32_FMTo__ "o" -// WEBASSEMBLY64-NEXT:#define __UINT_LEAST32_FMTu__ "u" -// WEBASSEMBLY64-NEXT:#define __UINT_LEAST32_FMTx__ "x" -// WEBASSEMBLY64-NEXT:#define __UINT_LEAST32_MAX__ 4294967295U -// WEBASSEMBLY64-NEXT:#define __UINT_LEAST32_TYPE__ unsigned int -// WEBASSEMBLY64-NEXT:#define __UINT_LEAST64_FMTX__ "llX" -// WEBASSEMBLY64-NEXT:#define __UINT_LEAST64_FMTo__ "llo" -// WEBASSEMBLY64-NEXT:#define __UINT_LEAST64_FMTu__ "llu" -// WEBASSEMBLY64-NEXT:#define __UINT_LEAST64_FMTx__ "llx" -// WEBASSEMBLY64-NEXT:#define __UINT_LEAST64_MAX__ 18446744073709551615ULL -// WEBASSEMBLY64-NEXT:#define __UINT_LEAST64_TYPE__ long long unsigned int -// WEBASSEMBLY64-NEXT:#define __UINT_LEAST8_FMTX__ "hhX" -// WEBASSEMBLY64-NEXT:#define __UINT_LEAST8_FMTo__ "hho" -// WEBASSEMBLY64-NEXT:#define __UINT_LEAST8_FMTu__ "hhu" -// WEBASSEMBLY64-NEXT:#define __UINT_LEAST8_FMTx__ "hhx" -// WEBASSEMBLY64-NEXT:#define __UINT_LEAST8_MAX__ 255 -// WEBASSEMBLY64-NEXT:#define __UINT_LEAST8_TYPE__ unsigned char -// WEBASSEMBLY64-NEXT:#define __USER_LABEL_PREFIX__ -// WEBASSEMBLY64-NEXT:#define __VERSION__ "{{.*}}" -// WEBASSEMBLY64-NEXT:#define __WCHAR_MAX__ 2147483647 -// WEBASSEMBLY64-NEXT:#define __WCHAR_TYPE__ int -// WEBASSEMBLY64-NOT:#define __WCHAR_UNSIGNED__ -// WEBASSEMBLY64-NEXT:#define __WCHAR_WIDTH__ 32 -// WEBASSEMBLY64-NEXT:#define __WINT_MAX__ 2147483647 -// WEBASSEMBLY64-NEXT:#define __WINT_TYPE__ int -// WEBASSEMBLY64-NOT:#define __WINT_UNSIGNED__ -// WEBASSEMBLY64-NEXT:#define __WINT_WIDTH__ 32 -// WEBASSEMBLY64-NEXT:#define __clang__ 1 -// WEBASSEMBLY64-NEXT:#define __clang_major__ {{.*}} -// WEBASSEMBLY64-NEXT:#define __clang_minor__ {{.*}} -// WEBASSEMBLY64-NEXT:#define __clang_patchlevel__ {{.*}} -// WEBASSEMBLY64-NEXT:#define __clang_version__ "{{.*}}" -// WEBASSEMBLY64-NEXT:#define __llvm__ 1 -// WEBASSEMBLY64-NOT:#define __wasm_simd128__ -// WEBASSEMBLY64-NOT:#define __wasm_simd256__ -// WEBASSEMBLY64-NOT:#define __wasm_simd512__ -// WEBASSEMBLY64-NOT:#define __unix -// WEBASSEMBLY64-NOT:#define __unix__ -// WEBASSEMBLY64-NEXT:#define __wasm 1 +// WEBASSEMBLY-NEXT:#define __UINT_FAST16_FMTX__ "hX" +// WEBASSEMBLY-NEXT:#define __UINT_FAST16_FMTo__ "ho" +// WEBASSEMBLY-NEXT:#define __UINT_FAST16_FMTu__ "hu" +// WEBASSEMBLY-NEXT:#define __UINT_FAST16_FMTx__ "hx" +// WEBASSEMBLY-NEXT:#define __UINT_FAST16_MAX__ 65535 +// WEBASSEMBLY-NEXT:#define __UINT_FAST16_TYPE__ unsigned short +// WEBASSEMBLY-NEXT:#define __UINT_FAST32_FMTX__ "X" +// WEBASSEMBLY-NEXT:#define __UINT_FAST32_FMTo__ "o" +// WEBASSEMBLY-NEXT:#define __UINT_FAST32_FMTu__ "u" +// WEBASSEMBLY-NEXT:#define __UINT_FAST32_FMTx__ "x" +// WEBASSEMBLY-NEXT:#define __UINT_FAST32_MAX__ 4294967295U +// WEBASSEMBLY-NEXT:#define __UINT_FAST32_TYPE__ unsigned int +// WEBASSEMBLY-NEXT:#define __UINT_FAST64_FMTX__ "llX" +// WEBASSEMBLY-NEXT:#define __UINT_FAST64_FMTo__ "llo" +// WEBASSEMBLY-NEXT:#define __UINT_FAST64_FMTu__ "llu" +// WEBASSEMBLY-NEXT:#define __UINT_FAST64_FMTx__ "llx" +// WEBASSEMBLY-NEXT:#define __UINT_FAST64_MAX__ 18446744073709551615ULL +// WEBASSEMBLY-NEXT:#define __UINT_FAST64_TYPE__ long long unsigned int +// WEBASSEMBLY-NEXT:#define __UINT_FAST8_FMTX__ "hhX" +// WEBASSEMBLY-NEXT:#define __UINT_FAST8_FMTo__ "hho" +// WEBASSEMBLY-NEXT:#define __UINT_FAST8_FMTu__ "hhu" +// WEBASSEMBLY-NEXT:#define __UINT_FAST8_FMTx__ "hhx" +// WEBASSEMBLY-NEXT:#define __UINT_FAST8_MAX__ 255 +// WEBASSEMBLY-NEXT:#define __UINT_FAST8_TYPE__ unsigned char +// WEBASSEMBLY-NEXT:#define __UINT_LEAST16_FMTX__ "hX" +// WEBASSEMBLY-NEXT:#define __UINT_LEAST16_FMTo__ "ho" +// WEBASSEMBLY-NEXT:#define __UINT_LEAST16_FMTu__ "hu" +// WEBASSEMBLY-NEXT:#define __UINT_LEAST16_FMTx__ "hx" +// WEBASSEMBLY-NEXT:#define __UINT_LEAST16_MAX__ 65535 +// WEBASSEMBLY-NEXT:#define __UINT_LEAST16_TYPE__ unsigned short +// WEBASSEMBLY-NEXT:#define __UINT_LEAST32_FMTX__ "X" +// WEBASSEMBLY-NEXT:#define __UINT_LEAST32_FMTo__ "o" +// WEBASSEMBLY-NEXT:#define __UINT_LEAST32_FMTu__ "u" +// WEBASSEMBLY-NEXT:#define __UINT_LEAST32_FMTx__ "x" +// WEBASSEMBLY-NEXT:#define __UINT_LEAST32_MAX__ 4294967295U +// WEBASSEMBLY-NEXT:#define __UINT_LEAST32_TYPE__ unsigned int +// WEBASSEMBLY-NEXT:#define __UINT_LEAST64_FMTX__ "llX" +// WEBASSEMBLY-NEXT:#define __UINT_LEAST64_FMTo__ "llo" +// WEBASSEMBLY-NEXT:#define __UINT_LEAST64_FMTu__ "llu" +// WEBASSEMBLY-NEXT:#define __UINT_LEAST64_FMTx__ "llx" +// WEBASSEMBLY-NEXT:#define __UINT_LEAST64_MAX__ 18446744073709551615ULL +// WEBASSEMBLY-NEXT:#define __UINT_LEAST64_TYPE__ long long unsigned int +// WEBASSEMBLY-NEXT:#define __UINT_LEAST8_FMTX__ "hhX" +// WEBASSEMBLY-NEXT:#define __UINT_LEAST8_FMTo__ "hho" +// WEBASSEMBLY-NEXT:#define __UINT_LEAST8_FMTu__ "hhu" +// WEBASSEMBLY-NEXT:#define __UINT_LEAST8_FMTx__ "hhx" +// WEBASSEMBLY-NEXT:#define __UINT_LEAST8_MAX__ 255 +// WEBASSEMBLY-NEXT:#define __UINT_LEAST8_TYPE__ unsigned char +// WEBASSEMBLY-NEXT:#define __USER_LABEL_PREFIX__ +// WEBASSEMBLY-NEXT:#define __VERSION__ "{{.*}}" +// WEBASSEMBLY-NEXT:#define __WCHAR_MAX__ 2147483647 +// WEBASSEMBLY-NEXT:#define __WCHAR_TYPE__ int +// WEBASSEMBLY-NOT:#define __WCHAR_UNSIGNED__ +// WEBASSEMBLY-NEXT:#define __WCHAR_WIDTH__ 32 +// WEBASSEMBLY-NEXT:#define __WINT_MAX__ 2147483647 +// WEBASSEMBLY-NEXT:#define __WINT_TYPE__ int +// WEBASSEMBLY-NOT:#define __WINT_UNSIGNED__ +// WEBASSEMBLY-NEXT:#define __WINT_WIDTH__ 32 +// WEBASSEMBLY-NEXT:#define __clang__ 1 +// WEBASSEMBLY-NEXT:#define __clang_major__ {{.*}} +// WEBASSEMBLY-NEXT:#define __clang_minor__ {{.*}} +// WEBASSEMBLY-NEXT:#define __clang_patchlevel__ {{.*}} +// WEBASSEMBLY-NEXT:#define __clang_version__ "{{.*}}" +// WEBASSEMBLY-NEXT:#define __llvm__ 1 +// WEBASSEMBLY-NOT:#define __unix +// WEBASSEMBLY-NOT:#define __unix__ +// WEBASSEMBLY-NOT:#define __wasm_simd128__ +// WEBASSEMBLY-NOT:#define __wasm_simd256__ +// WEBASSEMBLY-NOT:#define __wasm_simd512__ +// WEBASSEMBLY-NEXT:#define __wasm 1 +// WEBASSEMBLY32-NEXT:#define __wasm32 1 // WEBASSEMBLY64-NOT:#define __wasm32 +// WEBASSEMBLY32-NEXT:#define __wasm32__ 1 // WEBASSEMBLY64-NOT:#define __wasm32__ +// WEBASSEMBLY32-NOT:#define __wasm64__ +// WEBASSEMBLY32-NOT:#define __wasm64 // WEBASSEMBLY64-NEXT:#define __wasm64 1 // WEBASSEMBLY64-NEXT:#define __wasm64__ 1 -// WEBASSEMBLY64-NEXT:#define __wasm__ 1 +// WEBASSEMBLY-NEXT:#define __wasm__ 1 // RUN: %clang_cc1 -E -dM -ffreestanding -triple i686-windows-cygnus < /dev/null | FileCheck -match-full-lines -check-prefix CYGWIN-X32 %s // CYGWIN-X32: #define __USER_LABEL_PREFIX__ _ From b73d7051297ef90ed66b4b6a27a13fe9d471e880 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Mon, 28 Jan 2019 13:54:22 +0000 Subject: [PATCH 053/125] Merging r352105: Redirecting to URL 'https://llvm.org/svn/llvm-project/cfe/trunk': ------------------------------------------------------------------------ r352105 | djg | 2019-01-24 22:05:11 +0100 (Thu, 24 Jan 2019) | 7 lines [WebAssembly] Add a __wasi__ target macro This adds a `__wasi__` macro for the wasi OS, similar to `__linux__` etc. for other OS's. Differential Revision: https://reviews.llvm.org/D57155 ------------------------------------------------------------------------ llvm-svn: 352360 --- clang/lib/Basic/Targets.cpp | 24 ++++++++++++++++-------- clang/lib/Basic/Targets/OSTargets.h | 18 +++++++++++++++++- clang/test/Preprocessor/init.c | 7 +++++++ 3 files changed, 40 insertions(+), 9 deletions(-) diff --git a/clang/lib/Basic/Targets.cpp b/clang/lib/Basic/Targets.cpp index cf87bc484621cd..3c139d724796ee 100644 --- a/clang/lib/Basic/Targets.cpp +++ b/clang/lib/Basic/Targets.cpp @@ -570,19 +570,27 @@ TargetInfo *AllocateTarget(const llvm::Triple &Triple, Triple.getVendor() != llvm::Triple::UnknownVendor || !Triple.isOSBinFormatWasm()) return nullptr; - if (Triple.getOS() != llvm::Triple::UnknownOS && - Triple.getOS() != llvm::Triple::WASI) - return nullptr; - return new WebAssemblyOSTargetInfo(Triple, Opts); + switch (Triple.getOS()) { + case llvm::Triple::WASI: + return new WASITargetInfo(Triple, Opts); + case llvm::Triple::UnknownOS: + return new WebAssemblyOSTargetInfo(Triple, Opts); + default: + return nullptr; + } case llvm::Triple::wasm64: if (Triple.getSubArch() != llvm::Triple::NoSubArch || Triple.getVendor() != llvm::Triple::UnknownVendor || !Triple.isOSBinFormatWasm()) return nullptr; - if (Triple.getOS() != llvm::Triple::UnknownOS && - Triple.getOS() != llvm::Triple::WASI) - return nullptr; - return new WebAssemblyOSTargetInfo(Triple, Opts); + switch (Triple.getOS()) { + case llvm::Triple::WASI: + return new WASITargetInfo(Triple, Opts); + case llvm::Triple::UnknownOS: + return new WebAssemblyOSTargetInfo(Triple, Opts); + default: + return nullptr; + } case llvm::Triple::renderscript32: return new LinuxTargetInfo(Triple, Opts); diff --git a/clang/lib/Basic/Targets/OSTargets.h b/clang/lib/Basic/Targets/OSTargets.h index 085efa02cc5f17..09867d82c382e9 100644 --- a/clang/lib/Basic/Targets/OSTargets.h +++ b/clang/lib/Basic/Targets/OSTargets.h @@ -764,8 +764,9 @@ class LLVM_LIBRARY_VISIBILITY FuchsiaTargetInfo : public OSTargetInfo { template class LLVM_LIBRARY_VISIBILITY WebAssemblyOSTargetInfo : public OSTargetInfo { +protected: void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple, - MacroBuilder &Builder) const final { + MacroBuilder &Builder) const { // A common platform macro. if (Opts.POSIXThreads) Builder.defineMacro("_REENTRANT"); @@ -783,6 +784,21 @@ class LLVM_LIBRARY_VISIBILITY WebAssemblyOSTargetInfo } }; +// WASI target +template +class LLVM_LIBRARY_VISIBILITY WASITargetInfo + : public WebAssemblyOSTargetInfo { + void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple, + MacroBuilder &Builder) const final { + WebAssemblyOSTargetInfo::getOSDefines(Opts, Triple, Builder); + Builder.defineMacro("__wasi__"); + } + +public: + explicit WASITargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) + : WebAssemblyOSTargetInfo(Triple, Opts) {} +}; + } // namespace targets } // namespace clang #endif // LLVM_CLANG_LIB_BASIC_TARGETS_OSTARGETS_H diff --git a/clang/test/Preprocessor/init.c b/clang/test/Preprocessor/init.c index d852102424ec38..770e52cc784355 100644 --- a/clang/test/Preprocessor/init.c +++ b/clang/test/Preprocessor/init.c @@ -9114,6 +9114,12 @@ // RUN: %clang_cc1 -E -dM -ffreestanding -triple=wasm64-unknown-unknown \ // RUN: < /dev/null \ // RUN: | FileCheck -match-full-lines -check-prefixes=WEBASSEMBLY,WEBASSEMBLY64 %s +// RUN: %clang_cc1 -E -dM -ffreestanding -triple=wasm32-unknown-wasi \ +// RUN: < /dev/null \ +// RUN: | FileCheck -match-full-lines -check-prefixes=WEBASSEMBLY,WEBASSEMBLY32,WEBASSEMBLY-WASI %s +// RUN: %clang_cc1 -E -dM -ffreestanding -triple=wasm64-unknown-wasi \ +// RUN: < /dev/null \ +// RUN: | FileCheck -match-full-lines -check-prefixes=WEBASSEMBLY,WEBASSEMBLY64,WEBASSEMBLY-WASI %s // // WEBASSEMBLY32:#define _ILP32 1 // WEBASSEMBLY32-NOT:#define _LP64 @@ -9467,6 +9473,7 @@ // WEBASSEMBLY-NEXT:#define __llvm__ 1 // WEBASSEMBLY-NOT:#define __unix // WEBASSEMBLY-NOT:#define __unix__ +// WEBASSEMBLY-WASI-NEXT:#define __wasi__ 1 // WEBASSEMBLY-NOT:#define __wasm_simd128__ // WEBASSEMBLY-NOT:#define __wasm_simd256__ // WEBASSEMBLY-NOT:#define __wasm_simd512__ From 1680cb52675ab7dd420887d1e0445c15b7a198a6 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Mon, 28 Jan 2019 14:07:41 +0000 Subject: [PATCH 054/125] Merging r352221 and r352222: ------------------------------------------------------------------------ r352221 | erichkeane | 2019-01-25 18:27:57 +0100 (Fri, 25 Jan 2019) | 12 lines Disable _Float16 for non ARM/SPIR Targets As Discussed here: http://lists.llvm.org/pipermail/llvm-dev/2019-January/129543.html There are problems exposing the _Float16 type on architectures that haven't defined the ABI/ISel for the type yet, so we're temporarily disabling the type and making it opt-in. Differential Revision: https://reviews.llvm.org/D57188 Change-Id: I5db7366dedf1deb9485adb8948b1deb7e612a736 ------------------------------------------------------------------------ ------------------------------------------------------------------------ r352222 | erichkeane | 2019-01-25 18:39:57 +0100 (Fri, 25 Jan 2019) | 3 lines Fix incorrect indent from r352221 Change-Id: I0a7b1443eb6912ef7bea1a4cf2f696fc01726557 ------------------------------------------------------------------------ llvm-svn: 352363 --- clang/docs/LanguageExtensions.rst | 90 +++++++++++-------- clang/include/clang/Basic/TargetInfo.h | 4 + clang/lib/Basic/TargetInfo.cpp | 1 + clang/lib/Basic/Targets/AArch64.cpp | 1 + clang/lib/Basic/Targets/ARM.cpp | 1 + clang/lib/Basic/Targets/SPIR.h | 1 + clang/lib/Sema/SemaType.cpp | 7 +- clang/test/AST/float16.cpp | 4 +- .../test/CodeGenCXX/float16-declarations.cpp | 6 -- clang/test/CodeGenCXX/mangle-ms.cpp | 5 +- clang/test/Lexer/half-literal.cpp | 2 +- clang/test/Sema/Float16.c | 11 +++ 12 files changed, 84 insertions(+), 49 deletions(-) create mode 100644 clang/test/Sema/Float16.c diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index e155cefb7890d8..5782edd353701b 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -474,44 +474,58 @@ Half-Precision Floating Point ============================= Clang supports two half-precision (16-bit) floating point types: ``__fp16`` and -``_Float16``. ``__fp16`` is defined in the ARM C Language Extensions (`ACLE -`_) -and ``_Float16`` in ISO/IEC TS 18661-3:2015. - -``__fp16`` is a storage and interchange format only. This means that values of -``__fp16`` promote to (at least) float when used in arithmetic operations. -There are two ``__fp16`` formats. Clang supports the IEEE 754-2008 format and -not the ARM alternative format. - -ISO/IEC TS 18661-3:2015 defines C support for additional floating point types. -``_FloatN`` is defined as a binary floating type, where the N suffix denotes -the number of bits and is 16, 32, 64, or greater and equal to 128 and a -multiple of 32. Clang supports ``_Float16``. The difference from ``__fp16`` is -that arithmetic on ``_Float16`` is performed in half-precision, thus it is not -a storage-only format. ``_Float16`` is available as a source language type in -both C and C++ mode. - -It is recommended that portable code use the ``_Float16`` type because -``__fp16`` is an ARM C-Language Extension (ACLE), whereas ``_Float16`` is -defined by the C standards committee, so using ``_Float16`` will not prevent -code from being ported to architectures other than Arm. Also, ``_Float16`` -arithmetic and operations will directly map on half-precision instructions when -they are available (e.g. Armv8.2-A), avoiding conversions to/from -single-precision, and thus will result in more performant code. If -half-precision instructions are unavailable, values will be promoted to -single-precision, similar to the semantics of ``__fp16`` except that the -results will be stored in single-precision. - -In an arithmetic operation where one operand is of ``__fp16`` type and the -other is of ``_Float16`` type, the ``_Float16`` type is first converted to -``__fp16`` type and then the operation is completed as if both operands were of -``__fp16`` type. - -To define a ``_Float16`` literal, suffix ``f16`` can be appended to the compile-time -constant declaration. There is no default argument promotion for ``_Float16``; this -applies to the standard floating types only. As a consequence, for example, an -explicit cast is required for printing a ``_Float16`` value (there is no string -format specifier for ``_Float16``). +``_Float16``. These types are supported in all language modes. + +``__fp16`` is supported on every target, as it is purely a storage format; see below. +``_Float16`` is currently only supported on the following targets, with further +targets pending ABI standardization: +- 32-bit ARM +- 64-bit ARM (AArch64) +- SPIR +``_Float16`` will be supported on more targets as they define ABIs for it. + +``__fp16`` is a storage and interchange format only. This means that values of +``__fp16`` are immediately promoted to (at least) ``float`` when used in arithmetic +operations, so that e.g. the result of adding two ``__fp16`` values has type ``float``. +The behavior of ``__fp16`` is specified by the ARM C Language Extensions (`ACLE `_). +Clang uses the ``binary16`` format from IEEE 754-2008 for ``__fp16``, not the ARM +alternative format. + +``_Float16`` is an extended floating-point type. This means that, just like arithmetic on +``float`` or ``double``, arithmetic on ``_Float16`` operands is formally performed in the +``_Float16`` type, so that e.g. the result of adding two ``_Float16`` values has type +``_Float16``. The behavior of ``_Float16`` is specified by ISO/IEC TS 18661-3:2015 +("Floating-point extensions for C"). As with ``__fp16``, Clang uses the ``binary16`` +format from IEEE 754-2008 for ``_Float16``. + +``_Float16`` arithmetic will be performed using native half-precision support +when available on the target (e.g. on ARMv8.2a); otherwise it will be performed +at a higher precision (currently always ``float``) and then truncated down to +``_Float16``. Note that C and C++ allow intermediate floating-point operands +of an expression to be computed with greater precision than is expressible in +their type, so Clang may avoid intermediate truncations in certain cases; this may +lead to results that are inconsistent with native arithmetic. + +It is recommended that portable code use ``_Float16`` instead of ``__fp16``, +as it has been defined by the C standards committee and has behavior that is +more familiar to most programmers. + +Because ``__fp16`` operands are always immediately promoted to ``float``, the +common real type of ``__fp16`` and ``_Float16`` for the purposes of the usual +arithmetic conversions is ``float``. + +A literal can be given ``_Float16`` type using the suffix ``f16``; for example: +``` +3.14f16 +``` + +Because default argument promotion only applies to the standard floating-point +types, ``_Float16`` values are not promoted to ``double`` when passed as variadic +or untyped arguments. As a consequence, some caution must be taken when using +certain library facilities with ``_Float16``; for example, there is no ``printf`` format +specifier for ``_Float16``, and (unlike ``float``) it will not be implicitly promoted to +``double`` when passed to ``printf``, so the programmer must explicitly cast it to +``double`` before using it with an ``%f`` or similar specifier. Messages on ``deprecated`` and ``unavailable`` Attributes ========================================================= diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index 786b1c251ca822..1e835d992bbeb8 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -64,6 +64,7 @@ class TargetInfo : public RefCountedBase { bool HasLegalHalfType; // True if the backend supports operations on the half // LLVM IR type. bool HasFloat128; + bool HasFloat16; unsigned char PointerWidth, PointerAlign; unsigned char BoolWidth, BoolAlign; unsigned char IntWidth, IntAlign; @@ -517,6 +518,9 @@ class TargetInfo : public RefCountedBase { /// Determine whether the __float128 type is supported on this target. virtual bool hasFloat128Type() const { return HasFloat128; } + /// Determine whether the _Float16 type is supported on this target. + virtual bool hasFloat16Type() const { return HasFloat16; } + /// Return the alignment that is suitable for storing any /// object with a fundamental alignment requirement. unsigned getSuitableAlign() const { return SuitableAlign; } diff --git a/clang/lib/Basic/TargetInfo.cpp b/clang/lib/Basic/TargetInfo.cpp index 269fad38b8d57f..8b7621d7962e12 100644 --- a/clang/lib/Basic/TargetInfo.cpp +++ b/clang/lib/Basic/TargetInfo.cpp @@ -35,6 +35,7 @@ TargetInfo::TargetInfo(const llvm::Triple &T) : TargetOpts(), Triple(T) { NoAsmVariants = false; HasLegalHalfType = false; HasFloat128 = false; + HasFloat16 = false; PointerWidth = PointerAlign = 32; BoolWidth = BoolAlign = 8; IntWidth = IntAlign = 32; diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp index 62919a02dcb9fe..6297f23c5aa471 100644 --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -50,6 +50,7 @@ AArch64TargetInfo::AArch64TargetInfo(const llvm::Triple &Triple, // All AArch64 implementations support ARMv8 FP, which makes half a legal type. HasLegalHalfType = true; + HasFloat16 = true; LongWidth = LongAlign = PointerWidth = PointerAlign = 64; MaxVectorAlign = 128; diff --git a/clang/lib/Basic/Targets/ARM.cpp b/clang/lib/Basic/Targets/ARM.cpp index 16644ace108b79..23eee34eaa93de 100644 --- a/clang/lib/Basic/Targets/ARM.cpp +++ b/clang/lib/Basic/Targets/ARM.cpp @@ -397,6 +397,7 @@ bool ARMTargetInfo::handleTargetFeatures(std::vector &Features, SoftFloat = SoftFloatABI = false; HWDiv = 0; DotProd = 0; + HasFloat16 = true; // This does not diagnose illegal cases like having both // "+vfpv2" and "+vfpv3" or having "+neon" and "+fp-only-sp". diff --git a/clang/lib/Basic/Targets/SPIR.h b/clang/lib/Basic/Targets/SPIR.h index 9815292fc276da..e8d92f11a1224c 100644 --- a/clang/lib/Basic/Targets/SPIR.h +++ b/clang/lib/Basic/Targets/SPIR.h @@ -48,6 +48,7 @@ class LLVM_LIBRARY_VISIBILITY SPIRTargetInfo : public TargetInfo { AddrSpaceMap = &SPIRAddrSpaceMap; UseAddrSpaceMapMangling = true; HasLegalHalfType = true; + HasFloat16 = true; // Define available target features // These must be defined in sorted order! NoAsmVariants = true; diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index b4c075e9c46d42..1ae94c8aec9991 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -1442,7 +1442,12 @@ static QualType ConvertDeclSpecToType(TypeProcessingState &state) { else Result = Context.Int128Ty; break; - case DeclSpec::TST_float16: Result = Context.Float16Ty; break; + case DeclSpec::TST_float16: + if (!S.Context.getTargetInfo().hasFloat16Type()) + S.Diag(DS.getTypeSpecTypeLoc(), diag::err_type_unsupported) + << "_Float16"; + Result = Context.Float16Ty; + break; case DeclSpec::TST_half: Result = Context.HalfTy; break; case DeclSpec::TST_float: Result = Context.FloatTy; break; case DeclSpec::TST_double: diff --git a/clang/test/AST/float16.cpp b/clang/test/AST/float16.cpp index aa65270c75d4c6..2f428e7085ff1e 100644 --- a/clang/test/AST/float16.cpp +++ b/clang/test/AST/float16.cpp @@ -1,5 +1,5 @@ -// RUN: %clang_cc1 -std=c++11 -ast-dump %s | FileCheck %s --strict-whitespace -// RUN: %clang_cc1 -std=c++11 -ast-dump -fnative-half-type %s | FileCheck %s --check-prefix=CHECK-NATIVE --strict-whitespace +// RUN: %clang_cc1 -std=c++11 -ast-dump -triple aarch64-linux-gnu %s | FileCheck %s --strict-whitespace +// RUN: %clang_cc1 -std=c++11 -ast-dump -triple aarch64-linux-gnu -fnative-half-type %s | FileCheck %s --check-prefix=CHECK-NATIVE --strict-whitespace /* Various contexts where type _Float16 can appear. */ diff --git a/clang/test/CodeGenCXX/float16-declarations.cpp b/clang/test/CodeGenCXX/float16-declarations.cpp index 7e1c1e8db93caf..7d07eac48111f5 100644 --- a/clang/test/CodeGenCXX/float16-declarations.cpp +++ b/clang/test/CodeGenCXX/float16-declarations.cpp @@ -1,5 +1,4 @@ // RUN: %clang -std=c++11 --target=aarch64-arm--eabi -S -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AARCH64 -// RUN: %clang -std=c++11 --target=x86_64 -S -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-X86 /* Various contexts where type _Float16 can appear. */ @@ -15,7 +14,6 @@ namespace { _Float16 arr1n[10]; // CHECK-AARCH64-DAG: @_ZN12_GLOBAL__N_15arr1nE = internal global [10 x half] zeroinitializer, align 2 -// CHECK-X86-DAG: @_ZN12_GLOBAL__N_15arr1nE = internal global [10 x half] zeroinitializer, align 16 _Float16 arr2n[] = { 1.2, 3.0, 3.e4 }; // CHECK-DAG: @_ZN12_GLOBAL__N_15arr2nE = internal global [3 x half] [half 0xH3CCD, half 0xH4200, half 0xH7753], align 2 @@ -30,14 +28,12 @@ namespace { _Float16 f1f; // CHECK-AARCH64-DAG: @f1f = dso_local global half 0xH0000, align 2 -// CHECK-X86-DAG: @f1f = dso_local global half 0xH0000, align 2 _Float16 f2f = 32.4; // CHECK-DAG: @f2f = dso_local global half 0xH500D, align 2 _Float16 arr1f[10]; // CHECK-AARCH64-DAG: @arr1f = dso_local global [10 x half] zeroinitializer, align 2 -// CHECK-X86-DAG: @arr1f = dso_local global [10 x half] zeroinitializer, align 16 _Float16 arr2f[] = { -1.2, -3.0, -3.e4 }; // CHECK-DAG: @arr2f = dso_local global [3 x half] [half 0xHBCCD, half 0xHC200, half 0xHF753], align 2 @@ -137,8 +133,6 @@ int main(void) { long double cvtld = f2n; //CHECK-AARCh64-DAG: [[H2LD:%[a-z0-9]+]] = fpext half {{%[0-9]+}} to fp128 //CHECK-AARCh64-DAG: store fp128 [[H2LD]], fp128* %{{.*}}, align 16 -//CHECK-X86-DAG: [[H2LD:%[a-z0-9]+]] = fpext half {{%[0-9]+}} to x86_fp80 -//CHECK-X86-DAG: store x86_fp80 [[H2LD]], x86_fp80* %{{.*}}, align 16 _Float16 f2h = 42.0f; //CHECK-DAG: store half 0xH5140, half* %{{.*}}, align 2 diff --git a/clang/test/CodeGenCXX/mangle-ms.cpp b/clang/test/CodeGenCXX/mangle-ms.cpp index e128c944315320..0175b961e5e90a 100644 --- a/clang/test/CodeGenCXX/mangle-ms.cpp +++ b/clang/test/CodeGenCXX/mangle-ms.cpp @@ -1,5 +1,6 @@ // RUN: %clang_cc1 -fblocks -emit-llvm %s -o - -triple=i386-pc-win32 -std=c++98 | FileCheck %s // RUN: %clang_cc1 -fblocks -emit-llvm %s -o - -triple=x86_64-pc-win32 -std=c++98| FileCheck -check-prefix X64 %s +// RUN: %clang_cc1 -fblocks -emit-llvm %s -o - -triple=aarch64-pc-win32 -std=c++98 -DARM | FileCheck -check-prefixes=X64,ARM %s int a; // CHECK-DAG: @"?a@@3HA" @@ -466,10 +467,12 @@ namespace Complex { // CHECK-DAG: define dso_local void @"?f@Complex@@YAXU?$_Complex@H@__clang@@@Z"( void f(_Complex int) {} } +#ifdef ARM namespace Float16 { -// CHECK-DAG: define dso_local void @"?f@Float16@@YAXU_Float16@__clang@@@Z"( +// ARM-DAG: define dso_local void @"?f@Float16@@YAXU_Float16@__clang@@@Z"( void f(_Float16) {} } +#endif // ARM namespace PR26029 { template diff --git a/clang/test/Lexer/half-literal.cpp b/clang/test/Lexer/half-literal.cpp index 8e0034d491dd72..2f1cf9589fab0a 100644 --- a/clang/test/Lexer/half-literal.cpp +++ b/clang/test/Lexer/half-literal.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsyntax-only -verify -pedantic %s +// RUN: %clang_cc1 -fsyntax-only -verify -pedantic -triple aarch64-linux-gnu %s float a = 1.0h; // expected-error{{no matching literal operator for call to 'operator""h' with argument of type 'long double' or 'const char *', and no matching literal operator template}} float b = 1.0H; // expected-error{{invalid suffix 'H' on floating constant}} diff --git a/clang/test/Sema/Float16.c b/clang/test/Sema/Float16.c new file mode 100644 index 00000000000000..bdfb01702c3715 --- /dev/null +++ b/clang/test/Sema/Float16.c @@ -0,0 +1,11 @@ +// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-linux-pc %s +// RUN: %clang_cc1 -fsyntax-only -verify -triple spir-unknown-unknown %s -DHAVE +// RUN: %clang_cc1 -fsyntax-only -verify -triple armv7a-linux-gnu %s -DHAVE +// RUN: %clang_cc1 -fsyntax-only -verify -triple aarch64-linux-gnu %s -DHAVE + +#ifdef HAVE +// expected-no-diagnostics +#else +// expected-error@+2{{_Float16 is not supported on this target}} +#endif // HAVE +_Float16 f; From accabdd5fa69f7370538d23e61eb4b5396375b2a Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Mon, 28 Jan 2019 14:10:30 +0000 Subject: [PATCH 055/125] Merging r352229: Redirecting to URL 'https://llvm.org/svn/llvm-project/cfe/trunk': ------------------------------------------------------------------------ r352229 | erichkeane | 2019-01-25 19:36:20 +0100 (Fri, 25 Jan 2019) | 7 lines Remove F16 literal support based on Float16 support. Float16 support was disabled recently on many platforms, however that commit still allowed literals of Float16 type to work. This commit removes those based on the same logic as Float16 disable. Change-Id: I72243048ae2db3dc47bd3d699843e3edf9c395ea ------------------------------------------------------------------------ llvm-svn: 352365 --- clang/lib/Lex/LiteralSupport.cpp | 9 +++++---- clang/test/SemaCXX/Float16.cpp | 18 ++++++++++++++++++ 2 files changed, 23 insertions(+), 4 deletions(-) create mode 100644 clang/test/SemaCXX/Float16.cpp diff --git a/clang/lib/Lex/LiteralSupport.cpp b/clang/lib/Lex/LiteralSupport.cpp index fa0815eb9c6c5c..3c1da075b0e268 100644 --- a/clang/lib/Lex/LiteralSupport.cpp +++ b/clang/lib/Lex/LiteralSupport.cpp @@ -617,10 +617,11 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling, if (isHalf || isFloat || isLong || isFloat128) break; // HF, FF, LF, QF invalid. - if (s + 2 < ThisTokEnd && s[1] == '1' && s[2] == '6') { - s += 2; // success, eat up 2 characters. - isFloat16 = true; - continue; + if (PP.getTargetInfo().hasFloat16Type() && s + 2 < ThisTokEnd && + s[1] == '1' && s[2] == '6') { + s += 2; // success, eat up 2 characters. + isFloat16 = true; + continue; } isFloat = true; diff --git a/clang/test/SemaCXX/Float16.cpp b/clang/test/SemaCXX/Float16.cpp new file mode 100644 index 00000000000000..f27c3839854e10 --- /dev/null +++ b/clang/test/SemaCXX/Float16.cpp @@ -0,0 +1,18 @@ +// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-linux-pc %s +// RUN: %clang_cc1 -fsyntax-only -verify -triple spir-unknown-unknown %s -DHAVE +// RUN: %clang_cc1 -fsyntax-only -verify -triple armv7a-linux-gnu %s -DHAVE +// RUN: %clang_cc1 -fsyntax-only -verify -triple aarch64-linux-gnu %s -DHAVE + +#ifdef HAVE +// expected-no-diagnostics +#endif // HAVE + +#ifndef HAVE +// expected-error@+2{{_Float16 is not supported on this target}} +#endif // !HAVE +_Float16 f; + +#ifndef HAVE +// expected-error@+2{{invalid suffix 'F16' on floating constant}} +#endif // !HAVE +const auto g = 1.1F16; From d1ba949f23d9ccfc9064eb6553df0642212d2bba Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Mon, 28 Jan 2019 14:13:58 +0000 Subject: [PATCH 056/125] Merging r352231: Redirecting to URL 'https://llvm.org/svn/llvm-project/clang-tools-extra/trunk': ------------------------------------------------------------------------ r352231 | jonastoth | 2019-01-25 20:05:12 +0100 (Fri, 25 Jan 2019) | 13 lines [clang-tidy] fix unit tests for dropped _Float16 support in X86 Summary: Because _Float16 was disabled for X86 targets the unit-tests started failing. Extract the pieces for _Float16 and run theses tests under AArch64. Reviewers: aaron.ballman, erichkeane, lebedev.ri Reviewed By: erichkeane Subscribers: javed.absar, xazax.hun, kristof.beyls, cfe-commits Differential Revision: https://reviews.llvm.org/D57249 ------------------------------------------------------------------------ llvm-svn: 352368 --- ...ility-uppercase-literal-suffix-float16.cpp | 51 +++++++++++++++++++ ...ppercase-literal-suffix-floating-point.cpp | 28 ---------- ...eral-suffix-hexadecimal-floating-point.cpp | 15 ------ 3 files changed, 51 insertions(+), 43 deletions(-) create mode 100644 clang-tools-extra/test/clang-tidy/readability-uppercase-literal-suffix-float16.cpp diff --git a/clang-tools-extra/test/clang-tidy/readability-uppercase-literal-suffix-float16.cpp b/clang-tools-extra/test/clang-tidy/readability-uppercase-literal-suffix-float16.cpp new file mode 100644 index 00000000000000..b2b858f9345b5e --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/readability-uppercase-literal-suffix-float16.cpp @@ -0,0 +1,51 @@ +// RUN: %check_clang_tidy %s readability-uppercase-literal-suffix %t -- -- -target aarch64-linux-gnu -I %S + +#include "readability-uppercase-literal-suffix.h" + +void float16_normal_literals() { + // _Float16 + + static constexpr auto v14 = 1.f16; + // CHECK-MESSAGES: :[[@LINE-1]]:31: warning: floating point literal has suffix 'f16', which is not uppercase + // CHECK-MESSAGES-NEXT: static constexpr auto v14 = 1.f16; + // CHECK-MESSAGES-NEXT: ^ ~ + // CHECK-MESSAGES-NEXT: {{^ *}}F16{{$}} + // CHECK-FIXES: static constexpr auto v14 = 1.F16; + static_assert(is_same::value, ""); + static_assert(v14 == 1.F16, ""); + + static constexpr auto v15 = 1.e0f16; + // CHECK-MESSAGES: :[[@LINE-1]]:31: warning: floating point literal has suffix 'f16', which is not uppercase + // CHECK-MESSAGES-NEXT: static constexpr auto v15 = 1.e0f16; + // CHECK-MESSAGES-NEXT: ^ ~ + // CHECK-MESSAGES-NEXT: {{^ *}}F16{{$}} + // CHECK-FIXES: static constexpr auto v15 = 1.e0F16; + static_assert(is_same::value, ""); + static_assert(v15 == 1.F16, ""); + + static constexpr auto v16 = 1.F16; // OK. + static_assert(is_same::value, ""); + static_assert(v16 == 1.F16, ""); + + static constexpr auto v17 = 1.e0F16; // OK. + static_assert(is_same::value, ""); + static_assert(v17 == 1.F16, ""); +} + +void float16_hexadecimal_literals() { +// _Float16 + + static constexpr auto v13 = 0xfp0f16; + // CHECK-MESSAGES: :[[@LINE-1]]:31: warning: floating point literal has suffix 'f16', which is not uppercase + // CHECK-MESSAGES-NEXT: static constexpr auto v13 = 0xfp0f16; + // CHECK-MESSAGES-NEXT: ^ ~ + // CHECK-MESSAGES-NEXT: {{^ *}}F16{{$}} + // CHECK-FIXES: static constexpr auto v13 = 0xfp0F16; + static_assert(is_same::value, ""); + static_assert(v13 == 0xfp0F16, ""); + + static constexpr auto v14 = 0xfp0F16; // OK. + static_assert(is_same::value, ""); + static_assert(v14 == 0xfp0F16, ""); + +} diff --git a/clang-tools-extra/test/clang-tidy/readability-uppercase-literal-suffix-floating-point.cpp b/clang-tools-extra/test/clang-tidy/readability-uppercase-literal-suffix-floating-point.cpp index 4d41db7a5ec646..50e75fae6ae40f 100644 --- a/clang-tools-extra/test/clang-tidy/readability-uppercase-literal-suffix-floating-point.cpp +++ b/clang-tools-extra/test/clang-tidy/readability-uppercase-literal-suffix-floating-point.cpp @@ -97,34 +97,6 @@ void floating_point_suffix() { static constexpr auto v13 = 1.e0Q; // OK. static_assert(is_same::value, ""); static_assert(v13 == 1., ""); - - // _Float16 - - static constexpr auto v14 = 1.f16; - // CHECK-MESSAGES: :[[@LINE-1]]:31: warning: floating point literal has suffix 'f16', which is not uppercase - // CHECK-MESSAGES-NEXT: static constexpr auto v14 = 1.f16; - // CHECK-MESSAGES-NEXT: ^ ~ - // CHECK-MESSAGES-NEXT: {{^ *}}F16{{$}} - // CHECK-FIXES: static constexpr auto v14 = 1.F16; - static_assert(is_same::value, ""); - static_assert(v14 == 1.F16, ""); - - static constexpr auto v15 = 1.e0f16; - // CHECK-MESSAGES: :[[@LINE-1]]:31: warning: floating point literal has suffix 'f16', which is not uppercase - // CHECK-MESSAGES-NEXT: static constexpr auto v15 = 1.e0f16; - // CHECK-MESSAGES-NEXT: ^ ~ - // CHECK-MESSAGES-NEXT: {{^ *}}F16{{$}} - // CHECK-FIXES: static constexpr auto v15 = 1.e0F16; - static_assert(is_same::value, ""); - static_assert(v15 == 1.F16, ""); - - static constexpr auto v16 = 1.F16; // OK. - static_assert(is_same::value, ""); - static_assert(v16 == 1.F16, ""); - - static constexpr auto v17 = 1.e0F16; // OK. - static_assert(is_same::value, ""); - static_assert(v17 == 1.F16, ""); } void floating_point_complex_suffix() { diff --git a/clang-tools-extra/test/clang-tidy/readability-uppercase-literal-suffix-hexadecimal-floating-point.cpp b/clang-tools-extra/test/clang-tidy/readability-uppercase-literal-suffix-hexadecimal-floating-point.cpp index 4cc9d6d2a70406..415c6d8e7915f9 100644 --- a/clang-tools-extra/test/clang-tidy/readability-uppercase-literal-suffix-hexadecimal-floating-point.cpp +++ b/clang-tools-extra/test/clang-tidy/readability-uppercase-literal-suffix-hexadecimal-floating-point.cpp @@ -93,21 +93,6 @@ void floating_point_suffix() { static constexpr auto v12 = 0xfp0Q; // OK. static_assert(is_same::value, ""); static_assert(v12 == 0xfp0, ""); - - // _Float16 - - static constexpr auto v13 = 0xfp0f16; - // CHECK-MESSAGES: :[[@LINE-1]]:31: warning: floating point literal has suffix 'f16', which is not uppercase - // CHECK-MESSAGES-NEXT: static constexpr auto v13 = 0xfp0f16; - // CHECK-MESSAGES-NEXT: ^ ~ - // CHECK-MESSAGES-NEXT: {{^ *}}F16{{$}} - // CHECK-FIXES: static constexpr auto v13 = 0xfp0F16; - static_assert(is_same::value, ""); - static_assert(v13 == 0xfp0F16, ""); - - static constexpr auto v14 = 0xfp0F16; // OK. - static_assert(is_same::value, ""); - static_assert(v14 == 0xfp0F16, ""); } void floating_point_complex_suffix() { From be5f5b31c4fda9240669f903710756befdb15054 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Tue, 29 Jan 2019 13:22:08 +0000 Subject: [PATCH 057/125] Merging r352413: ------------------------------------------------------------------------ r352413 | pcc | 2019-01-28 20:29:41 +0100 (Mon, 28 Jan 2019) | 8 lines ELF: Set sh_info on RelaIplt to point to the IgotPlt output section. Previously we were setting it to the GotPlt output section, which is incorrect on ARM where this section is in .got. In static binaries this can lead to sh_info being set to -1 (because there is no .got.plt) which results in various tools rejecting the output file. Differential Revision: https://reviews.llvm.org/D57274 ------------------------------------------------------------------------ llvm-svn: 352489 --- lld/ELF/SyntheticSections.cpp | 4 +++- lld/test/ELF/arm-gnu-ifunc.s | 5 ++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index f459c1b6b47920..b1a3f8bc70aec7 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -1513,8 +1513,10 @@ void RelocationBaseSection::finalizeContents() { else getParent()->Link = 0; - if (In.RelaIplt == this || In.RelaPlt == this) + if (In.RelaPlt == this) getParent()->Info = In.GotPlt->getParent()->SectionIndex; + if (In.RelaIplt == this) + getParent()->Info = In.IgotPlt->getParent()->SectionIndex; } RelrBaseSection::RelrBaseSection() diff --git a/lld/test/ELF/arm-gnu-ifunc.s b/lld/test/ELF/arm-gnu-ifunc.s index 8a7cb0ae237a79..92f87b5d5faefd 100644 --- a/lld/test/ELF/arm-gnu-ifunc.s +++ b/lld/test/ELF/arm-gnu-ifunc.s @@ -35,6 +35,8 @@ _start: // CHECK-NEXT: Address: 0x100F4 // CHECK-NEXT: Offset: 0xF4 // CHECK-NEXT: Size: 16 +// CHECK-NEXT: Link: +// CHECK-NEXT: Info: 4 // CHECK: Name: .plt // CHECK-NEXT: Type: SHT_PROGBITS // CHECK-NEXT: Flags [ @@ -44,7 +46,8 @@ _start: // CHECK-NEXT: Address: 0x11020 // CHECK-NEXT: Offset: 0x1020 // CHECK-NEXT: Size: 32 -// CHECK: Name: .got +// CHECK: Index: 4 +// CHECK-NEXT: Name: .got // CHECK-NEXT: Type: SHT_PROGBITS // CHECK-NEXT: Flags [ // CHECK-NEXT: SHF_ALLOC From 53ba53bed4e52c9b49297bce06c61e4cb4006b51 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Tue, 29 Jan 2019 13:31:43 +0000 Subject: [PATCH 058/125] Merging r352381: ------------------------------------------------------------------------ r352381 | stefan.graenitz | 2019-01-28 17:14:57 +0100 (Mon, 28 Jan 2019) | 5 lines [CMake] Quick-Fix FileCheck target does not exist when building against LLVM install-tree with COMPILER_RT_INCLUDE_TESTS=ON The issue came up during release testing for LLVM 8: https://bugs.llvm.org/show_bug.cgi?id=40443 Differential Revision: https://reviews.llvm.org/D57224 ------------------------------------------------------------------------ llvm-svn: 352490 --- compiler-rt/test/CMakeLists.txt | 4 ---- 1 file changed, 4 deletions(-) diff --git a/compiler-rt/test/CMakeLists.txt b/compiler-rt/test/CMakeLists.txt index 2e239d54e29c8b..7070fb789520c6 100644 --- a/compiler-rt/test/CMakeLists.txt +++ b/compiler-rt/test/CMakeLists.txt @@ -14,10 +14,6 @@ if(COMPILER_RT_BUILD_PROFILE AND COMPILER_RT_HAS_PROFILE) list(APPEND SANITIZER_COMMON_LIT_TEST_DEPS profile) endif() -if(COMPILER_RT_STANDALONE_BUILD) - list(APPEND SANITIZER_COMMON_LIT_TEST_DEPS FileCheck) -endif() - # When ANDROID, we build tests with the host compiler (i.e. CMAKE_C_COMPILER), # and run tests with tools from the host toolchain. if(NOT ANDROID) From caadcebad0ee115a689c8b000e59f73d85af84c9 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Tue, 29 Jan 2019 13:32:41 +0000 Subject: [PATCH 059/125] Merging r352382: ------------------------------------------------------------------------ r352382 | stefan.graenitz | 2019-01-28 17:15:27 +0100 (Mon, 28 Jan 2019) | 5 lines [CMake] Quick-Fix targets don't exist when building against LLVM install-tree with LLDB_INCLUDE_TESTS=ON The issue came up during release testing for LLVM 8: https://bugs.llvm.org/show_bug.cgi?id=40443 Differential Revision: https://reviews.llvm.org/D57233 ------------------------------------------------------------------------ llvm-svn: 352491 --- lldb/cmake/modules/AddLLDB.cmake | 4 +++- lldb/lit/CMakeLists.txt | 11 ++++++++--- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/lldb/cmake/modules/AddLLDB.cmake b/lldb/cmake/modules/AddLLDB.cmake index f7cac3dad83758..f82c11d3d317e1 100644 --- a/lldb/cmake/modules/AddLLDB.cmake +++ b/lldb/cmake/modules/AddLLDB.cmake @@ -88,7 +88,9 @@ function(add_lldb_library name) # Hack: only some LLDB libraries depend on the clang autogenerated headers, # but it is simple enough to make all of LLDB depend on some of those # headers without negatively impacting much of anything. - add_dependencies(${name} clang-tablegen-targets) + if(NOT LLDB_BUILT_STANDALONE) + add_dependencies(${name} clang-tablegen-targets) + endif() # Add in any extra C++ compilation flags for this library. target_compile_options(${name} PRIVATE ${PARAM_EXTRA_CXXFLAGS}) diff --git a/lldb/lit/CMakeLists.txt b/lldb/lit/CMakeLists.txt index 1ac013b2eed304..804e950b3bb033 100644 --- a/lldb/lit/CMakeLists.txt +++ b/lldb/lit/CMakeLists.txt @@ -26,9 +26,6 @@ list(APPEND LLDB_TEST_DEPS llvm-config llvm-mc llvm-objcopy - FileCheck - count - not ) if(TARGET lld) @@ -55,6 +52,14 @@ configure_lit_site_cfg( ${CMAKE_CURRENT_SOURCE_DIR}/Suite/lit.site.cfg.in ${CMAKE_CURRENT_BINARY_DIR}/Suite/lit.site.cfg) +if(NOT LLDB_BUILT_STANDALONE) + list(APPEND LLDB_TEST_DEPS + FileCheck + count + not + ) +endif() + add_lit_testsuite(check-lldb-lit "Running lldb lit test suite" ${CMAKE_CURRENT_BINARY_DIR} DEPENDS ${LLDB_TEST_DEPS} From 84d97e30428dfcbdac11b909094226f6bde2d521 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Tue, 29 Jan 2019 13:46:59 +0000 Subject: [PATCH 060/125] Merging r352459: ------------------------------------------------------------------------ r352459 | mstorsjo | 2019-01-29 09:38:48 +0100 (Tue, 29 Jan 2019) | 7 lines [MinGW] Ignore the --plugin and --plugin-opt option GCC can use LLD with -fuse-ld=lld for MinGW these days, but by default these options are passed to the linker (unless -fno-lto is passed to the GCC driver). Differential Revision: https://reviews.llvm.org/D57304 ------------------------------------------------------------------------ llvm-svn: 352493 --- lld/MinGW/Options.td | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lld/MinGW/Options.td b/lld/MinGW/Options.td index 948faa6875211e..0cda2447e5223b 100644 --- a/lld/MinGW/Options.td +++ b/lld/MinGW/Options.td @@ -78,3 +78,9 @@ def version: F<"version">, HelpText<"Display the version number and exit">; def alias_entry_e: JoinedOrSeparate<["-"], "e">, Alias; def alias_strip_s: Flag<["-"], "s">, Alias; def alias_strip_S: Flag<["-"], "S">, Alias; + +// Ignored options +def: S<"plugin">; +def: J<"plugin=">; +def: S<"plugin-opt">; +def: J<"plugin-opt=">; From 71b117ec6f77b2da1c234572ce92e336923c3ba6 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Tue, 29 Jan 2019 14:23:17 +0000 Subject: [PATCH 061/125] Merging r352374: ------------------------------------------------------------------------ r352374 | mgorny | 2019-01-28 16:16:03 +0100 (Mon, 28 Jan 2019) | 18 lines [cmake] Fix get_llvm_lit_path() to respect LLVM_EXTERNAL_LIT always Refactor the get_llvm_lit_path() logic to respect LLVM_EXTERNAL_LIT, and require the fallback to be defined explicitly as LLVM_DEFAULT_EXTERNAL_LIT. This fixes building libcxx standalone after r346888. The old logic was using LLVM_EXTERNAL_LIT both as user-defined cache variable and an optional pre-definition of default value from caller (e.g. libcxx). It included a hack to make this work by assigning the value back and forth but it was fragile and stopped working in libcxx. The new logic is simpler and more transparent. Default value is provided in a separate variable, and used only when user-specified variable is empty (i.e. not overriden). Differential Revision: https://reviews.llvm.org/D57282 ------------------------------------------------------------------------ llvm-svn: 352495 --- llvm/cmake/modules/AddLLVM.cmake | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/cmake/modules/AddLLVM.cmake b/llvm/cmake/modules/AddLLVM.cmake index 4dbc0ddaf4f018..0df6845aaa71d3 100644 --- a/llvm/cmake/modules/AddLLVM.cmake +++ b/llvm/cmake/modules/AddLLVM.cmake @@ -1280,7 +1280,6 @@ function(get_llvm_lit_path base_dir file_name) cmake_parse_arguments(ARG "ALLOW_EXTERNAL" "" "" ${ARGN}) if (ARG_ALLOW_EXTERNAL) - set(LLVM_DEFAULT_EXTERNAL_LIT "${LLVM_EXTERNAL_LIT}") set (LLVM_EXTERNAL_LIT "" CACHE STRING "Command used to spawn lit") if ("${LLVM_EXTERNAL_LIT}" STREQUAL "") set(LLVM_EXTERNAL_LIT "${LLVM_DEFAULT_EXTERNAL_LIT}") From 5751c2bae7e14a3a90a38a45cb8788c91b694b6e Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Tue, 29 Jan 2019 14:23:34 +0000 Subject: [PATCH 062/125] Merging r352374: ------------------------------------------------------------------------ r352374 | mgorny | 2019-01-28 16:16:03 +0100 (Mon, 28 Jan 2019) | 18 lines [cmake] Fix get_llvm_lit_path() to respect LLVM_EXTERNAL_LIT always Refactor the get_llvm_lit_path() logic to respect LLVM_EXTERNAL_LIT, and require the fallback to be defined explicitly as LLVM_DEFAULT_EXTERNAL_LIT. This fixes building libcxx standalone after r346888. The old logic was using LLVM_EXTERNAL_LIT both as user-defined cache variable and an optional pre-definition of default value from caller (e.g. libcxx). It included a hack to make this work by assigning the value back and forth but it was fragile and stopped working in libcxx. The new logic is simpler and more transparent. Default value is provided in a separate variable, and used only when user-specified variable is empty (i.e. not overriden). Differential Revision: https://reviews.llvm.org/D57282 ------------------------------------------------------------------------ llvm-svn: 352496 --- lldb/cmake/modules/LLDBStandalone.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/cmake/modules/LLDBStandalone.cmake b/lldb/cmake/modules/LLDBStandalone.cmake index e63b2694e6a4c5..a9059dd5f9eb41 100644 --- a/lldb/cmake/modules/LLDBStandalone.cmake +++ b/lldb/cmake/modules/LLDBStandalone.cmake @@ -58,7 +58,7 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) set(LLVM_DIR ${LLVM_OBJ_ROOT}/cmake/modules/CMakeFiles CACHE PATH "Path to LLVM build tree CMake files") set(LLVM_BINARY_DIR ${LLVM_OBJ_ROOT} CACHE PATH "Path to LLVM build tree") set(LLVM_MAIN_SRC_DIR ${MAIN_SRC_DIR} CACHE PATH "Path to LLVM source tree") - set(LLVM_EXTERNAL_LIT ${LLVM_TOOLS_BINARY_DIR}/llvm-lit CACHE PATH "Path to llvm-lit") + set(LLVM_DEFAULT_EXTERNAL_LIT ${LLVM_TOOLS_BINARY_DIR}/llvm-lit CACHE PATH "Path to llvm-lit") find_program(LLVM_TABLEGEN_EXE "llvm-tblgen" ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH) From 2cc5c39442b0a353dedbcbbf4318e7f89378aaa6 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Tue, 29 Jan 2019 14:23:48 +0000 Subject: [PATCH 063/125] Merging r352374: ------------------------------------------------------------------------ r352374 | mgorny | 2019-01-28 16:16:03 +0100 (Mon, 28 Jan 2019) | 18 lines [cmake] Fix get_llvm_lit_path() to respect LLVM_EXTERNAL_LIT always Refactor the get_llvm_lit_path() logic to respect LLVM_EXTERNAL_LIT, and require the fallback to be defined explicitly as LLVM_DEFAULT_EXTERNAL_LIT. This fixes building libcxx standalone after r346888. The old logic was using LLVM_EXTERNAL_LIT both as user-defined cache variable and an optional pre-definition of default value from caller (e.g. libcxx). It included a hack to make this work by assigning the value back and forth but it was fragile and stopped working in libcxx. The new logic is simpler and more transparent. Default value is provided in a separate variable, and used only when user-specified variable is empty (i.e. not overriden). Differential Revision: https://reviews.llvm.org/D57282 ------------------------------------------------------------------------ llvm-svn: 352497 --- libcxx/cmake/Modules/HandleOutOfTreeLLVM.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libcxx/cmake/Modules/HandleOutOfTreeLLVM.cmake b/libcxx/cmake/Modules/HandleOutOfTreeLLVM.cmake index 70eed1d70ba1b6..11c13315585bfc 100644 --- a/libcxx/cmake/Modules/HandleOutOfTreeLLVM.cmake +++ b/libcxx/cmake/Modules/HandleOutOfTreeLLVM.cmake @@ -116,7 +116,7 @@ macro(configure_out_of_tree_llvm) # Required LIT Configuration ------------------------------------------------ # Define the default arguments to use with 'lit', and an option for the user # to override. - set(LLVM_EXTERNAL_LIT "${LLVM_MAIN_SRC_DIR}/utils/lit/lit.py") + set(LLVM_DEFAULT_EXTERNAL_LIT "${LLVM_MAIN_SRC_DIR}/utils/lit/lit.py") set(LIT_ARGS_DEFAULT "-sv --show-xfail --show-unsupported") if (MSVC OR XCODE) set(LIT_ARGS_DEFAULT "${LIT_ARGS_DEFAULT} --no-progress-bar") From c1d99f8f81dbb804f06f77011799ca60bc61679f Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Tue, 29 Jan 2019 14:24:10 +0000 Subject: [PATCH 064/125] Merging r352374: ------------------------------------------------------------------------ r352374 | mgorny | 2019-01-28 16:16:03 +0100 (Mon, 28 Jan 2019) | 18 lines [cmake] Fix get_llvm_lit_path() to respect LLVM_EXTERNAL_LIT always Refactor the get_llvm_lit_path() logic to respect LLVM_EXTERNAL_LIT, and require the fallback to be defined explicitly as LLVM_DEFAULT_EXTERNAL_LIT. This fixes building libcxx standalone after r346888. The old logic was using LLVM_EXTERNAL_LIT both as user-defined cache variable and an optional pre-definition of default value from caller (e.g. libcxx). It included a hack to make this work by assigning the value back and forth but it was fragile and stopped working in libcxx. The new logic is simpler and more transparent. Default value is provided in a separate variable, and used only when user-specified variable is empty (i.e. not overriden). Differential Revision: https://reviews.llvm.org/D57282 ------------------------------------------------------------------------ llvm-svn: 352498 --- libcxxabi/cmake/Modules/HandleOutOfTreeLLVM.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libcxxabi/cmake/Modules/HandleOutOfTreeLLVM.cmake b/libcxxabi/cmake/Modules/HandleOutOfTreeLLVM.cmake index e50d0262f80418..0283a59ac1db2c 100644 --- a/libcxxabi/cmake/Modules/HandleOutOfTreeLLVM.cmake +++ b/libcxxabi/cmake/Modules/HandleOutOfTreeLLVM.cmake @@ -117,7 +117,7 @@ macro(configure_out_of_tree_llvm) # Required LIT Configuration ------------------------------------------------ # Define the default arguments to use with 'lit', and an option for the user # to override. - set(LLVM_EXTERNAL_LIT "${LLVM_MAIN_SRC_DIR}/utils/lit/lit.py") + set(LLVM_DEFAULT_EXTERNAL_LIT "${LLVM_MAIN_SRC_DIR}/utils/lit/lit.py") set(LIT_ARGS_DEFAULT "-sv --show-xfail --show-unsupported") if (MSVC OR XCODE) set(LIT_ARGS_DEFAULT "${LIT_ARGS_DEFAULT} --no-progress-bar") From f103e430684b784f5f4ce185e3e4c7df3c5320f5 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Tue, 29 Jan 2019 21:21:14 +0000 Subject: [PATCH 065/125] Merging r351910: ------------------------------------------------------------------------ r351910 | cuviper | 2019-01-23 01:53:22 +0100 (Wed, 23 Jan 2019) | 18 lines [CodeView] Allow empty types in member functions Summary: `CodeViewDebug::lowerTypeMemberFunction` used to default to a `Void` return type if the function's type array was empty. After D54667, it started blindly indexing the 0th item for the return type, which fails in `getOperand` for empty arrays if assertions are enabled. This patch restores the `Void` return type for empty type arrays, and adds a test generated by Rust in line-only debuginfo mode. Reviewers: zturner, rnk Reviewed By: rnk Subscribers: hiraditya, JDevlieghere, llvm-commits Differential Revision: https://reviews.llvm.org/D57070 ------------------------------------------------------------------------ llvm-svn: 352546 --- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp | 5 +- .../DebugInfo/COFF/types-empty-member-fn.ll | 72 +++++++++++++++++++ 2 files changed, 76 insertions(+), 1 deletion(-) create mode 100644 llvm/test/DebugInfo/COFF/types-empty-member-fn.ll diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp index 8cabad4ad31239..154f81f2622d4b 100644 --- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -1836,7 +1836,10 @@ TypeIndex CodeViewDebug::lowerTypeMemberFunction(const DISubroutineType *Ty, unsigned Index = 0; SmallVector ArgTypeIndices; - TypeIndex ReturnTypeIndex = getTypeIndex(ReturnAndArgs[Index++]); + TypeIndex ReturnTypeIndex = TypeIndex::Void(); + if (ReturnAndArgs.size() > Index) { + ReturnTypeIndex = getTypeIndex(ReturnAndArgs[Index++]); + } // If the first argument is a pointer type and this isn't a static method, // treat it as the special 'this' parameter, which is encoded separately from diff --git a/llvm/test/DebugInfo/COFF/types-empty-member-fn.ll b/llvm/test/DebugInfo/COFF/types-empty-member-fn.ll new file mode 100644 index 00000000000000..87cba9d62099e8 --- /dev/null +++ b/llvm/test/DebugInfo/COFF/types-empty-member-fn.ll @@ -0,0 +1,72 @@ +; RUN: llc < %s -filetype=obj | llvm-readobj - -codeview | FileCheck %s + +; ModuleID = 'foo.3a1fbbbh-cgu.0' +source_filename = "foo.3a1fbbbh-cgu.0" +target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-windows-msvc" + +; Rust source to regenerate: +; $ cat foo.rs +; pub struct Foo; +; impl Foo { +; pub fn foo() {} +; } +; $ rustc foo.rs --crate-type lib -Cdebuginfo=1 --emit=llvm-ir + +; CHECK: CodeViewTypes [ +; CHECK: MemberFunction (0x1006) { +; CHECK-NEXT: TypeLeafKind: LF_MFUNCTION (0x1009) +; CHECK-NEXT: ReturnType: void (0x3) +; CHECK-NEXT: ClassType: foo::Foo (0x1000) +; CHECK-NEXT: ThisType: 0x0 +; CHECK-NEXT: CallingConvention: NearC (0x0) +; CHECK-NEXT: FunctionOptions [ (0x0) +; CHECK-NEXT: ] +; CHECK-NEXT: NumParameters: 0 +; CHECK-NEXT: ArgListType: () (0x1005) +; CHECK-NEXT: ThisAdjustment: 0 +; CHECK-NEXT: } +; CHECK-NEXT: MemberFuncId (0x1007) { +; CHECK-NEXT: TypeLeafKind: LF_MFUNC_ID (0x1602) +; CHECK-NEXT: ClassType: foo::Foo (0x1000) +; CHECK-NEXT: FunctionType: void foo::Foo::() (0x1006) +; CHECK-NEXT: Name: foo +; CHECK-NEXT: } +; CHECK: CodeViewDebugInfo [ +; CHECK: FunctionLineTable [ +; CHECK-NEXT: LinkageName: _ZN3foo3Foo3foo17hc557c2121772885bE +; CHECK-NEXT: Flags: 0x0 +; CHECK-NEXT: CodeSize: 0x1 +; CHECK-NEXT: FilenameSegment [ +; CHECK-NEXT: Filename: D:\rust\foo.rs (0x0) +; CHECK-NEXT: +0x0 [ +; CHECK-NEXT: LineNumberStart: 3 +; CHECK-NEXT: LineNumberEndDelta: 0 +; CHECK-NEXT: IsStatement: No +; CHECK-NEXT: ] +; CHECK-NEXT: ] +; CHECK-NEXT: ] + +; foo::Foo::foo +; Function Attrs: uwtable +define void @_ZN3foo3Foo3foo17hc557c2121772885bE() unnamed_addr #0 !dbg !5 { +start: + ret void, !dbg !10 +} + +attributes #0 = { uwtable "target-cpu"="x86-64" } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4} + +!0 = distinct !DICompileUnit(language: DW_LANG_Rust, file: !1, producer: "clang LLVM (rustc version 1.33.0-nightly (8b0f0156e 2019-01-22))", isOptimized: false, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2) +!1 = !DIFile(filename: "foo.rs", directory: "D:\5Crust") +!2 = !{} +!3 = !{i32 2, !"CodeView", i32 1} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = distinct !DISubprogram(name: "foo", linkageName: "_ZN3foo3Foo3foo17hc557c2121772885bE", scope: !6, file: !1, line: 3, type: !9, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, templateParams: !2, retainedNodes: !2) +!6 = !DICompositeType(tag: DW_TAG_structure_type, name: "Foo", scope: !8, file: !7, align: 8, elements: !2, templateParams: !2, identifier: "5105d9fe1a2a3c68518268151b672274") +!7 = !DIFile(filename: "", directory: "") +!8 = !DINamespace(name: "foo", scope: null) +!9 = !DISubroutineType(types: !2) +!10 = !DILocation(line: 3, scope: !5) From 3c554e2ecf08bd7c7e3b24fb2f777f90d4475ce3 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Tue, 29 Jan 2019 21:30:42 +0000 Subject: [PATCH 066/125] Merging r352539: ------------------------------------------------------------------------ r352539 | arsenm | 2019-01-29 21:49:47 +0100 (Tue, 29 Jan 2019) | 9 lines Revert "OpenCL: Extend argument promotion rules to vector types" This reverts r348083. This was based on a misreading of the spec for printf specifiers. Also revert r343653, as without a subsequent patch, a correctly specified format for a vector will incorrectly warn. Fixes bug 40491. ------------------------------------------------------------------------ llvm-svn: 352547 --- clang/lib/Headers/opencl-c.h | 2 +- clang/lib/Sema/SemaExpr.cpp | 23 ++++--------------- clang/test/CodeGenOpenCL/printf.cl | 20 ++++++++-------- .../printf-format-string-warnings.cl | 9 ++++---- .../test/SemaOpenCL/printf-format-strings.cl | 4 ++-- 5 files changed, 22 insertions(+), 36 deletions(-) diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h index 160bae807174c3..3d3dfb74905f65 100644 --- a/clang/lib/Headers/opencl-c.h +++ b/clang/lib/Headers/opencl-c.h @@ -14470,7 +14470,7 @@ half16 __ovld __cnfn shuffle2(half16 x, half16 y, ushort16 mask); #if __OPENCL_C_VERSION__ >= CL_VERSION_1_2 // OpenCL v1.2 s6.12.13, v2.0 s6.13.13 - printf -int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 2))); +int printf(__constant const char* st, ...); #endif // OpenCL v1.1 s6.11.3, v1.2 s6.12.14, v2.0 s6.13.14 - Image Read and Write Functions diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index d5416d4d057c73..2bcd47abe35699 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -738,33 +738,20 @@ ExprResult Sema::DefaultArgumentPromotion(Expr *E) { return ExprError(); E = Res.get(); - QualType ScalarTy = Ty; - unsigned NumElts = 0; - if (const ExtVectorType *VecTy = Ty->getAs()) { - NumElts = VecTy->getNumElements(); - ScalarTy = VecTy->getElementType(); - } - // If this is a 'float' or '__fp16' (CVR qualified or typedef) // promote to double. // Note that default argument promotion applies only to float (and // half/fp16); it does not apply to _Float16. - const BuiltinType *BTy = ScalarTy->getAs(); + const BuiltinType *BTy = Ty->getAs(); if (BTy && (BTy->getKind() == BuiltinType::Half || BTy->getKind() == BuiltinType::Float)) { if (getLangOpts().OpenCL && !getOpenCLOptions().isEnabled("cl_khr_fp64")) { - if (BTy->getKind() == BuiltinType::Half) { - QualType Ty = Context.FloatTy; - if (NumElts != 0) - Ty = Context.getExtVectorType(Ty, NumElts); - E = ImpCastExprToType(E, Ty, CK_FloatingCast).get(); - } + if (BTy->getKind() == BuiltinType::Half) { + E = ImpCastExprToType(E, Context.FloatTy, CK_FloatingCast).get(); + } } else { - QualType Ty = Context.DoubleTy; - if (NumElts != 0) - Ty = Context.getExtVectorType(Ty, NumElts); - E = ImpCastExprToType(E, Ty, CK_FloatingCast).get(); + E = ImpCastExprToType(E, Context.DoubleTy, CK_FloatingCast).get(); } } diff --git a/clang/test/CodeGenOpenCL/printf.cl b/clang/test/CodeGenOpenCL/printf.cl index 346f6c35bae469..fc139d776db6ef 100644 --- a/clang/test/CodeGenOpenCL/printf.cl +++ b/clang/test/CodeGenOpenCL/printf.cl @@ -12,28 +12,26 @@ int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 2))) // ALL-LABEL: @test_printf_float2( -// FP64: %conv = fpext <2 x float> %0 to <2 x double> -// FP64: %call = call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([5 x i8], [5 x i8] addrspace(2)* @.str, i32 0, i32 0), <2 x double> %conv) +// FP64: %call = call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([7 x i8], [7 x i8] addrspace(2)* @.str, i32 0, i32 0), <2 x float> %0) -// NOFP64: call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([5 x i8], [5 x i8] addrspace(2)* @.str, i32 0, i32 0), <2 x float> %0) + +// NOFP64: call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([7 x i8], [7 x i8] addrspace(2)* @.str, i32 0, i32 0), <2 x float> %0) kernel void test_printf_float2(float2 arg) { - printf("%v2f", arg); + printf("%v2hlf", arg); } // ALL-LABEL: @test_printf_half2( -// FP64: %conv = fpext <2 x half> %0 to <2 x double> -// FP64: %call = call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([5 x i8], [5 x i8] addrspace(2)* @.str, i32 0, i32 0), <2 x double> %conv) #2 +// FP64: %call = call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(2)* @.str.1, i32 0, i32 0), <2 x half> %0) -// NOFP64: %conv = fpext <2 x half> %0 to <2 x float> -// NOFP64: %call = call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([5 x i8], [5 x i8] addrspace(2)* @.str, i32 0, i32 0), <2 x float> %conv) #2 +// NOFP64: %call = call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(2)* @.str.1, i32 0, i32 0), <2 x half> %0) kernel void test_printf_half2(half2 arg) { - printf("%v2f", arg); + printf("%v2hf", arg); } #ifdef cl_khr_fp64 // FP64-LABEL: @test_printf_double2( -// FP64: call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([5 x i8], [5 x i8] addrspace(2)* @.str, i32 0, i32 0), <2 x double> %0) #2 +// FP64: call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(2)* @.str.2, i32 0, i32 0), <2 x double> %0) kernel void test_printf_double2(double2 arg) { - printf("%v2f", arg); + printf("%v2lf", arg); } #endif diff --git a/clang/test/SemaOpenCL/printf-format-string-warnings.cl b/clang/test/SemaOpenCL/printf-format-string-warnings.cl index 2b9c5cc3f319f2..39b859402702f3 100644 --- a/clang/test/SemaOpenCL/printf-format-string-warnings.cl +++ b/clang/test/SemaOpenCL/printf-format-string-warnings.cl @@ -1,13 +1,14 @@ // RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -finclude-default-header -// Make sure warnings are produced based on printf format strings. +// FIXME: Make sure warnings are produced based on printf format strings. +// expected-no-diagnostics kernel void format_string_warnings(__constant char* arg) { - printf("%d", arg); // expected-warning {{format specifies type 'int' but the argument has type '__constant char *'}} + printf("%d", arg); - printf("not enough arguments %d %d", 4); // expected-warning {{more '%' conversions than data arguments}} + printf("not enough arguments %d %d", 4); - printf("too many arguments", 4); // expected-warning {{data argument not used by format string}} + printf("too many arguments", 4); } diff --git a/clang/test/SemaOpenCL/printf-format-strings.cl b/clang/test/SemaOpenCL/printf-format-strings.cl index 079a83495685e1..212e1f8981cbef 100644 --- a/clang/test/SemaOpenCL/printf-format-strings.cl +++ b/clang/test/SemaOpenCL/printf-format-strings.cl @@ -13,10 +13,10 @@ int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 2))) kernel void format_v4f32(float4 arg) { #ifdef cl_khr_fp64 - printf("%v4f\n", arg); + printf("%v4f\n", arg); // expected-warning{{format specifies type 'double __attribute__((ext_vector_type(4)))' but the argument has type 'float4' (vector of 4 'float' values)}} // Precision modifier - printf("%.2v4f\n", arg); + printf("%.2v4f\n", arg); // expected-warning{{format specifies type 'double __attribute__((ext_vector_type(4)))' but the argument has type 'float4' (vector of 4 'float' values)}} #else // FIXME: These should not warn, and the type should be expected to be float. printf("%v4f\n", arg); // expected-warning {{double __attribute__((ext_vector_type(4)))' but the argument has type 'float4' (vector of 4 'float' values)}} From 88481d545c3e9ac4da0ffec17660200055d31894 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Wed, 30 Jan 2019 19:13:49 +0000 Subject: [PATCH 067/125] Merging r352610: ------------------------------------------------------------------------ r352610 | mgorny | 2019-01-30 09:20:24 +0100 (Wed, 30 Jan 2019) | 9 lines [clang] [Driver] [NetBSD] Append -rpath for shared compiler-rt runtimes Append appropriate -rpath when using shared compiler-rt runtimes, e.g. '-fsanitize=address -shared-libasan'. There's already a similar logic in CommonArgs.cpp but it uses non-standard arch-suffixed installation directory while we want our driver to work with standard installation paths. Differential Revision: https://reviews.llvm.org/D57303 ------------------------------------------------------------------------ llvm-svn: 352650 --- clang/lib/Driver/ToolChains/NetBSD.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/clang/lib/Driver/ToolChains/NetBSD.cpp b/clang/lib/Driver/ToolChains/NetBSD.cpp index b1321cacaf7a3a..c1eae5b05acec6 100644 --- a/clang/lib/Driver/ToolChains/NetBSD.cpp +++ b/clang/lib/Driver/ToolChains/NetBSD.cpp @@ -256,6 +256,13 @@ void netbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA, bool NeedsXRayDeps = addXRayRuntime(ToolChain, Args, CmdArgs); AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA); + const SanitizerArgs &SanArgs = ToolChain.getSanitizerArgs(); + if (SanArgs.needsSharedRt()) { + CmdArgs.push_back("-rpath"); + CmdArgs.push_back(Args.MakeArgString( + ToolChain.getCompilerRTPath().c_str())); + } + unsigned Major, Minor, Micro; ToolChain.getTriple().getOSVersion(Major, Minor, Micro); bool useLibgcc = true; From dbbe63346ce6ac0c51ee6ba756a75f0a73e9109f Mon Sep 17 00:00:00 2001 From: Simon Atanasyan Date: Wed, 30 Jan 2019 21:18:03 +0000 Subject: [PATCH 068/125] [docs][mips] Add MIPS specific release notes for LLD 8.0 Differential Revision: http://reviews.llvm.org/D57459 llvm-svn: 352674 --- lld/docs/ReleaseNotes.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst index a146bddd543af9..c02cc586c795ea 100644 --- a/lld/docs/ReleaseNotes.rst +++ b/lld/docs/ReleaseNotes.rst @@ -40,6 +40,9 @@ ELF Improvements * The following flags have been added: ``-z interpose``, ``-z global`` +* lld now uses the ``sigrie`` instruction as a trap instruction for + MIPS targets. + COFF Improvements ----------------- From dfc033d9a486a8c1bfd54f829112df65a53d8053 Mon Sep 17 00:00:00 2001 From: Simon Atanasyan Date: Wed, 30 Jan 2019 21:19:40 +0000 Subject: [PATCH 069/125] [docs][mips] Clang 8.0 Release notes MIPS specific part of Clang 8.0 Release notes. Feel free to add more notes if I miss something. Differential Revision: http://reviews.llvm.org/D57458 llvm-svn: 352675 --- clang/docs/ReleaseNotes.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 28be16677eceef..50bf636a51f437 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -127,6 +127,10 @@ Non-comprehensive list of changes in this release manually and rely on the old behaviour you will need to add appropriate compiler flags for finding the corresponding libc++ include directory. +- The integrated assembler is used now by default for all MIPS targets. + +- Improved support for MIPS N32 ABI and MIPS R6 target triples. + New Compiler Flags ------------------ @@ -139,6 +143,10 @@ New Compiler Flags - When using a custom stack alignment, the ``stackrealign`` attribute is now implicitly set on the main function. +- Emission of ``R_MIPS_JALR`` and ``R_MICROMIPS_JALR`` relocations can now + be controlled by the ``-mrelax-pic-calls`` and ``-mno-relax-pic-calls`` + options. + - ... Deprecated Compiler Flags From 8a11e14ef3568455b0e61467a8d578bbb760d3bb Mon Sep 17 00:00:00 2001 From: Simon Atanasyan Date: Wed, 30 Jan 2019 22:45:12 +0000 Subject: [PATCH 070/125] [docs][mips] 8.0 Release notes MIPS specific part of LLVM 8.0 Release notes. Differential Revision: http://reviews.llvm.org/D57457 llvm-svn: 352682 --- llvm/docs/ReleaseNotes.rst | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index cb80459546f540..82e4d56d48f467 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -52,6 +52,8 @@ Non-comprehensive list of changes in this release from a dll are accessed via a stub, to allow the linker to convert it to a dllimport if needed. +* Added support for labels as offsets in ``.reloc`` directive. + .. NOTE If you would like to document a larger change, then you can add a subsection about it right here. You can copy the following boilerplate @@ -82,8 +84,23 @@ Changes to the ARM Backend Changes to the MIPS Target -------------------------- - During this release ... +* Improved support of GlobalISel instruction selection framework. + +* Implemented emission of ``R_MIPS_JALR`` and ``R_MICROMIPS_JALR`` + relocations. These relocations provide hints to a linker for optimization + of jumps to protected symbols. + +* ORC JIT has been supported for MIPS and MIPS64 architectures. + +* Assembler now suggests alternative MIPS instruction mnemonics when + an invalid one is specified. + +* Improved support for MIPS N32 ABI. + +* Added new instructions (``pll.ps``, ``plu.ps``, ``cvt.s.pu``, + ``cvt.s.pl``, ``cvt.ps``, ``sigrie``). +* Numerous bug fixes and code cleanups. Changes to the PowerPC Target ----------------------------- From 6dcd982f4bcb32c9b0c62173ef5460079d58e8b1 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Fri, 1 Feb 2019 10:45:20 +0000 Subject: [PATCH 071/125] Merging r352407: ------------------------------------------------------------------------ r352407 | ruiu | 2019-01-28 20:11:52 +0100 (Mon, 28 Jan 2019) | 1 line Refactoring. NFC. ------------------------------------------------------------------------ llvm-svn: 352850 --- lld/ELF/ScriptParser.cpp | 60 ++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 36 deletions(-) diff --git a/lld/ELF/ScriptParser.cpp b/lld/ELF/ScriptParser.cpp index eee3f0e330cc3d..84571db25714a7 100644 --- a/lld/ELF/ScriptParser.cpp +++ b/lld/ELF/ScriptParser.cpp @@ -385,39 +385,23 @@ void ScriptParser::readOutputArch() { skip(); } -std::tuple ScriptParser::readBfdName() { - StringRef S = unquote(next()); - if (S == "elf32-i386") - return std::make_tuple(ELF32LEKind, EM_386, false); - if (S == "elf32-iamcu") - return std::make_tuple(ELF32LEKind, EM_IAMCU, false); - if (S == "elf32-littlearm") - return std::make_tuple(ELF32LEKind, EM_ARM, false); - if (S == "elf32-x86-64") - return std::make_tuple(ELF32LEKind, EM_X86_64, false); - if (S == "elf64-littleaarch64") - return std::make_tuple(ELF64LEKind, EM_AARCH64, false); - if (S == "elf64-powerpc") - return std::make_tuple(ELF64BEKind, EM_PPC64, false); - if (S == "elf64-powerpcle") - return std::make_tuple(ELF64LEKind, EM_PPC64, false); - if (S == "elf64-x86-64") - return std::make_tuple(ELF64LEKind, EM_X86_64, false); - if (S == "elf32-tradbigmips") - return std::make_tuple(ELF32BEKind, EM_MIPS, false); - if (S == "elf32-ntradbigmips") - return std::make_tuple(ELF32BEKind, EM_MIPS, true); - if (S == "elf32-tradlittlemips") - return std::make_tuple(ELF32LEKind, EM_MIPS, false); - if (S == "elf32-ntradlittlemips") - return std::make_tuple(ELF32LEKind, EM_MIPS, true); - if (S == "elf64-tradbigmips") - return std::make_tuple(ELF64BEKind, EM_MIPS, false); - if (S == "elf64-tradlittlemips") - return std::make_tuple(ELF64LEKind, EM_MIPS, false); - - setError("unknown output format name: " + S); - return std::make_tuple(ELFNoneKind, EM_NONE, false); +static std::tuple parseBfdName(StringRef S) { + return StringSwitch>(S) + .Case("elf32-i386", {ELF32LEKind, EM_386}) + .Case("elf32-iamcu", {ELF32LEKind, EM_IAMCU}) + .Case("elf32-littlearm", {ELF32LEKind, EM_ARM}) + .Case("elf32-x86-64", {ELF32LEKind, EM_X86_64}) + .Case("elf64-littleaarch64", {ELF64LEKind, EM_AARCH64}) + .Case("elf64-powerpc", {ELF64BEKind, EM_PPC64}) + .Case("elf64-powerpcle", {ELF64LEKind, EM_PPC64}) + .Case("elf64-x86-64", {ELF64LEKind, EM_X86_64}) + .Case("elf32-tradbigmips", {ELF32BEKind, EM_MIPS}) + .Case("elf32-ntradbigmips", {ELF32BEKind, EM_MIPS}) + .Case("elf32-tradlittlemips", {ELF32LEKind, EM_MIPS}) + .Case("elf32-ntradlittlemips", {ELF32LEKind, EM_MIPS}) + .Case("elf64-tradbigmips", {ELF64BEKind, EM_MIPS}) + .Case("elf64-tradlittlemips", {ELF64LEKind, EM_MIPS}) + .Default({ELFNoneKind, EM_NONE}); } // Parse OUTPUT_FORMAT(bfdname) or OUTPUT_FORMAT(bfdname, big, little). @@ -425,9 +409,13 @@ std::tuple ScriptParser::readBfdName() { void ScriptParser::readOutputFormat() { expect("("); - std::tuple BfdTuple = readBfdName(); - if (Config->EKind == ELFNoneKind) - std::tie(Config->EKind, Config->EMachine, Config->MipsN32Abi) = BfdTuple; + StringRef S = unquote(next()); + + std::tie(Config->EKind, Config->EMachine) = parseBfdName(S); + if (Config->EMachine == EM_NONE) + setError("unknown output format name: " + S); + if (S == "elf32-ntradlittlemips" || S == "elf32-ntradbigmips") + Config->MipsN32Abi = true; if (consume(")")) return; From 9f0ae69afedb9562c24a8a92da347e843057c0d4 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Fri, 1 Feb 2019 10:46:13 +0000 Subject: [PATCH 072/125] Merging r352435: ------------------------------------------------------------------------ r352435 | ruiu | 2019-01-28 22:45:50 +0100 (Mon, 28 Jan 2019) | 1 line Attempt to fix build failure with GCC 5.4. ------------------------------------------------------------------------ llvm-svn: 352851 --- lld/ELF/ScriptParser.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lld/ELF/ScriptParser.cpp b/lld/ELF/ScriptParser.cpp index 84571db25714a7..9e59fe7c6776ff 100644 --- a/lld/ELF/ScriptParser.cpp +++ b/lld/ELF/ScriptParser.cpp @@ -385,8 +385,8 @@ void ScriptParser::readOutputArch() { skip(); } -static std::tuple parseBfdName(StringRef S) { - return StringSwitch>(S) +static std::pair parseBfdName(StringRef S) { + return StringSwitch>(S) .Case("elf32-i386", {ELF32LEKind, EM_386}) .Case("elf32-iamcu", {ELF32LEKind, EM_IAMCU}) .Case("elf32-littlearm", {ELF32LEKind, EM_ARM}) From 15decd1d488aac686faaa4afbb98254e1152a212 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Fri, 1 Feb 2019 10:47:13 +0000 Subject: [PATCH 073/125] Merging r352482: ------------------------------------------------------------------------ r352482 | grimar | 2019-01-29 12:46:00 +0100 (Tue, 29 Jan 2019) | 3 lines [ELF] - Remove dead `readBfdName` declaration. NFC. `readBfdName` was removed recently. ------------------------------------------------------------------------ llvm-svn: 352852 --- lld/ELF/ScriptParser.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/lld/ELF/ScriptParser.cpp b/lld/ELF/ScriptParser.cpp index 9e59fe7c6776ff..91c8aac8d8dc5f 100644 --- a/lld/ELF/ScriptParser.cpp +++ b/lld/ELF/ScriptParser.cpp @@ -94,7 +94,6 @@ class ScriptParser final : ScriptLexer { SortSectionPolicy readSortKind(); SymbolAssignment *readProvideHidden(bool Provide, bool Hidden); SymbolAssignment *readAssignment(StringRef Tok); - std::tuple readBfdName(); void readSort(); Expr readAssert(); Expr readConstant(); From 538ee7319dead9662507c59d87384be178c30c9e Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Fri, 1 Feb 2019 10:48:08 +0000 Subject: [PATCH 074/125] Merging r352606: ------------------------------------------------------------------------ r352606 | dim | 2019-01-30 07:31:52 +0100 (Wed, 30 Jan 2019) | 27 lines Recognize FreeBSD specific BFD names in OUTPUT_FORMAT Summary: After rLLD344952 ("Add OUTPUT_FORMAT linker script directive support"), using BFD names such as `elf64-x86-64-freebsd` the `OUTPUT_FORMAT` linker script command does not work anymore, resulting in errors like: ``` ld: error: /home/dim/src/clang800-import/stand/efi/loader/arch/amd64/ldscript.amd64:2: unknown output format name: elf64-x86-64-freebsd >>> OUTPUT_FORMAT("elf64-x86-64-freebsd", "elf64-x86-64-freebsd", "elf64-x86-64-freebsd") >>> ^ ``` To fix this, recognize a `-freebsd` suffix in BFD names, and also set `Configuration::OSABI` to `ELFOSABI_FREEBSD` for those cases. Add and/or update several test cases to check for the correct results of these new `OUTPUT_FORMAT` arguments. Reviewers: ruiu, atanasyan, grimar, hokein, emaste, espindola Reviewed By: ruiu Subscribers: nemanjai, javed.absar, arichardson, krytarowski, kristof.beyls, kbarton, llvm-commits Differential Revision: https://reviews.llvm.org/D57283 ------------------------------------------------------------------------ llvm-svn: 352853 --- lld/ELF/ScriptParser.cpp | 8 ++++++-- lld/test/ELF/emulation-aarch64.s | 23 +++++++++++++++++++++++ lld/test/ELF/emulation-ppc.s | 32 ++++++++++++++++++++++++++++++++ lld/test/ELF/emulation-x86.s | 10 ++++++++-- 4 files changed, 69 insertions(+), 4 deletions(-) diff --git a/lld/ELF/ScriptParser.cpp b/lld/ELF/ScriptParser.cpp index 91c8aac8d8dc5f..7cce94659c9e9d 100644 --- a/lld/ELF/ScriptParser.cpp +++ b/lld/ELF/ScriptParser.cpp @@ -390,6 +390,7 @@ static std::pair parseBfdName(StringRef S) { .Case("elf32-iamcu", {ELF32LEKind, EM_IAMCU}) .Case("elf32-littlearm", {ELF32LEKind, EM_ARM}) .Case("elf32-x86-64", {ELF32LEKind, EM_X86_64}) + .Case("elf64-aarch64", {ELF64LEKind, EM_AARCH64}) .Case("elf64-littleaarch64", {ELF64LEKind, EM_AARCH64}) .Case("elf64-powerpc", {ELF64BEKind, EM_PPC64}) .Case("elf64-powerpcle", {ELF64LEKind, EM_PPC64}) @@ -408,11 +409,14 @@ static std::pair parseBfdName(StringRef S) { void ScriptParser::readOutputFormat() { expect("("); - StringRef S = unquote(next()); + StringRef Name = unquote(next()); + StringRef S = Name; + if (S.consume_back("-freebsd")) + Config->OSABI = ELFOSABI_FREEBSD; std::tie(Config->EKind, Config->EMachine) = parseBfdName(S); if (Config->EMachine == EM_NONE) - setError("unknown output format name: " + S); + setError("unknown output format name: " + Name); if (S == "elf32-ntradlittlemips" || S == "elf32-ntradbigmips") Config->MipsN32Abi = true; diff --git a/lld/test/ELF/emulation-aarch64.s b/lld/test/ELF/emulation-aarch64.s index b9a6428fa953e0..c0edc9a69d36de 100644 --- a/lld/test/ELF/emulation-aarch64.s +++ b/lld/test/ELF/emulation-aarch64.s @@ -30,5 +30,28 @@ # AARCH64-NEXT: Flags [ (0x0) # AARCH64-NEXT: ] +# RUN: llvm-mc -filetype=obj -triple=aarch64-unknown-freebsd %s -o %taarch64fbsd +# RUN: echo 'OUTPUT_FORMAT(elf64-aarch64-freebsd)' > %taarch64fbsd.script +# RUN: ld.lld %taarch64fbsd.script %taarch64fbsd -o %t2aarch64fbsd +# RUN: llvm-readobj -file-headers %t2aarch64fbsd | FileCheck --check-prefix=AARCH64-FBSD %s +# AARCH64-FBSD: ElfHeader { +# AARCH64-FBSD-NEXT: Ident { +# AARCH64-FBSD-NEXT: Magic: (7F 45 4C 46) +# AARCH64-FBSD-NEXT: Class: 64-bit (0x2) +# AARCH64-FBSD-NEXT: DataEncoding: LittleEndian (0x1) +# AARCH64-FBSD-NEXT: FileVersion: 1 +# AARCH64-FBSD-NEXT: OS/ABI: FreeBSD (0x9) +# AARCH64-FBSD-NEXT: ABIVersion: 0 +# AARCH64-FBSD-NEXT: Unused: (00 00 00 00 00 00 00) +# AARCH64-FBSD-NEXT: } +# AARCH64-FBSD-NEXT: Type: Executable (0x2) +# AARCH64-FBSD-NEXT: Machine: EM_AARCH64 (0xB7) +# AARCH64-FBSD-NEXT: Version: 1 +# AARCH64-FBSD-NEXT: Entry: +# AARCH64-FBSD-NEXT: ProgramHeaderOffset: 0x40 +# AARCH64-FBSD-NEXT: SectionHeaderOffset: +# AARCH64-FBSD-NEXT: Flags [ (0x0) +# AARCH64-FBSD-NEXT: ] + .globl _start _start: diff --git a/lld/test/ELF/emulation-ppc.s b/lld/test/ELF/emulation-ppc.s index 12e84782252fdf..843e77604779b1 100644 --- a/lld/test/ELF/emulation-ppc.s +++ b/lld/test/ELF/emulation-ppc.s @@ -35,6 +35,38 @@ # PPC64-NEXT: StringTableSectionIndex: # PPC64-NEXT: } +# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-freebsd %s -o %tppc64fbsd +# RUN: echo 'OUTPUT_FORMAT(elf64-powerpc-freebsd)' > %tppc64fbsd.script +# RUN: ld.lld %tppc64fbsd.script %tppc64fbsd -o %t2ppc64fbsd +# RUN: llvm-readobj -file-headers %t2ppc64fbsd | FileCheck --check-prefix=PPC64-FBSD %s + +# PPC64-FBSD: ElfHeader { +# PPC64-FBSD-NEXT: Ident { +# PPC64-FBSD-NEXT: Magic: (7F 45 4C 46) +# PPC64-FBSD-NEXT: Class: 64-bit (0x2) +# PPC64-FBSD-NEXT: DataEncoding: BigEndian (0x2) +# PPC64-FBSD-NEXT: FileVersion: 1 +# PPC64-FBSD-NEXT: OS/ABI: FreeBSD (0x9) +# PPC64-FBSD-NEXT: ABIVersion: 0 +# PPC64-FBSD-NEXT: Unused: (00 00 00 00 00 00 00) +# PPC64-FBSD-NEXT: } +# PPC64-FBSD-NEXT: Type: Executable (0x2) +# PPC64-FBSD-NEXT: Machine: EM_PPC64 (0x15) +# PPC64-FBSD-NEXT: Version: 1 +# PPC64-FBSD-NEXT: Entry: +# PPC64-FBSD-NEXT: ProgramHeaderOffset: 0x40 +# PPC64-FBSD-NEXT: SectionHeaderOffset: +# PPC64-FBSD-NEXT: Flags [ (0x2) +# PPC64-FBSD-NEXT: 0x2 +# PPC64-FBSD-NEXT: ] +# PPC64-FBSD-NEXT: HeaderSize: 64 +# PPC64-FBSD-NEXT: ProgramHeaderEntrySize: 56 +# PPC64-FBSD-NEXT: ProgramHeaderCount: +# PPC64-FBSD-NEXT: SectionHeaderEntrySize: 64 +# PPC64-FBSD-NEXT: SectionHeaderCount: +# PPC64-FBSD-NEXT: StringTableSectionIndex: +# PPC64-FBSD-NEXT: } + # RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %tppc64le # RUN: ld.lld -m elf64lppc %tppc64le -o %t2ppc64le # RUN: llvm-readobj -file-headers %t2ppc64le | FileCheck --check-prefix=PPC64LE %s diff --git a/lld/test/ELF/emulation-x86.s b/lld/test/ELF/emulation-x86.s index 65d807c67f2f0c..02b89435669287 100644 --- a/lld/test/ELF/emulation-x86.s +++ b/lld/test/ELF/emulation-x86.s @@ -7,6 +7,9 @@ # RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t.sysv # RUN: ld.lld -m elf_amd64_fbsd %t.sysv -o %t.freebsd # RUN: llvm-readobj -file-headers %t.freebsd | FileCheck --check-prefix=AMD64 %s +# RUN: echo 'OUTPUT_FORMAT(elf64-x86-64-freebsd)' > %t4x64.script +# RUN: ld.lld %t4x64.script %tx64 -o %t4x64 +# RUN: llvm-readobj -file-headers %t4x64 | FileCheck --check-prefix=AMD64 %s # AMD64: ElfHeader { # AMD64-NEXT: Ident { # AMD64-NEXT: Magic: (7F 45 4C 46) @@ -137,10 +140,13 @@ # X86-NEXT: } # RUN: llvm-mc -filetype=obj -triple=i686-unknown-freebsd %s -o %tx86fbsd -# RUN: ld.lld -m elf_i386_fbsd %tx86fbsd -o %t2x86_fbsd -# RUN: llvm-readobj -file-headers %t2x86_fbsd | FileCheck --check-prefix=X86FBSD %s +# RUN: ld.lld -m elf_i386_fbsd %tx86fbsd -o %t2x86fbsd +# RUN: llvm-readobj -file-headers %t2x86fbsd | FileCheck --check-prefix=X86FBSD %s # RUN: ld.lld %tx86fbsd -o %t3x86fbsd # RUN: llvm-readobj -file-headers %t3x86fbsd | FileCheck --check-prefix=X86FBSD %s +# RUN: echo 'OUTPUT_FORMAT(elf32-i386-freebsd)' > %t4x86fbsd.script +# RUN: ld.lld %t4x86fbsd.script %tx86fbsd -o %t4x86fbsd +# RUN: llvm-readobj -file-headers %t4x86fbsd | FileCheck --check-prefix=X86FBSD %s # X86FBSD: ElfHeader { # X86FBSD-NEXT: Ident { # X86FBSD-NEXT: Magic: (7F 45 4C 46) From 2a08347d7fa20368c2d86a3fd56ad9398fad944b Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Fri, 1 Feb 2019 10:50:43 +0000 Subject: [PATCH 075/125] Merging r352672: ------------------------------------------------------------------------ r352672 | epilk | 2019-01-30 22:14:08 +0100 (Wed, 30 Jan 2019) | 4 lines Don't define __has_feature(objc_fixed_enum) in non-objc mode This is only a formal language feature in ObjC, otherwise its just an extension. Making this change was also an ABI break. ------------------------------------------------------------------------ llvm-svn: 352854 --- clang/include/clang/Basic/Features.def | 2 +- clang/test/SemaObjC/enum-fixed-type.m | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/clang/include/clang/Basic/Features.def b/clang/include/clang/Basic/Features.def index 05464ed85f1360..8b3b59b51c5424 100644 --- a/clang/include/clang/Basic/Features.def +++ b/clang/include/clang/Basic/Features.def @@ -96,7 +96,7 @@ FEATURE(objc_arc, LangOpts.ObjCAutoRefCount) FEATURE(objc_arc_fields, true) FEATURE(objc_arc_weak, LangOpts.ObjCWeak) FEATURE(objc_default_synthesize_properties, LangOpts.ObjC) -FEATURE(objc_fixed_enum, true) +FEATURE(objc_fixed_enum, LangOpts.ObjC) FEATURE(objc_instancetype, LangOpts.ObjC) FEATURE(objc_kindof, LangOpts.ObjC) FEATURE(objc_modules, LangOpts.ObjC && LangOpts.Modules) diff --git a/clang/test/SemaObjC/enum-fixed-type.m b/clang/test/SemaObjC/enum-fixed-type.m index 88c895a33982d2..b4135a555a23ec 100644 --- a/clang/test/SemaObjC/enum-fixed-type.m +++ b/clang/test/SemaObjC/enum-fixed-type.m @@ -1,9 +1,11 @@ // RUN: %clang_cc1 -fsyntax-only -pedantic -verify %s // RUN: %clang_cc1 -fsyntax-only -verify -xc %s +#ifdef __OBJC__ #if !__has_feature(objc_fixed_enum) # error Enumerations with a fixed underlying type are not supported #endif +#endif #if !__has_extension(cxx_fixed_enum) # error Enumerations with a fixed underlying type are not supported From e26a2f2368bf6844ab6ce3ba83ed6a34b15969e4 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Fri, 1 Feb 2019 10:57:17 +0000 Subject: [PATCH 076/125] Merging r352822: ------------------------------------------------------------------------ r352822 | ahatanak | 2019-02-01 01:12:06 +0100 (Fri, 01 Feb 2019) | 8 lines Revert "[Sema] Make canPassInRegisters return true if the CXXRecordDecl passed" This reverts commit r350920 as it is not clear whether we should force a class to be returned in registers when copy and move constructors are both deleted. For more background, see the following discussion: http://lists.llvm.org/pipermail/cfe-commits/Week-of-Mon-20190128/259907.html ------------------------------------------------------------------------ llvm-svn: 352855 --- clang/lib/Sema/SemaDeclCXX.cpp | 3 --- clang/test/CodeGenCXX/trivial_abi.cpp | 19 ++----------------- 2 files changed, 2 insertions(+), 20 deletions(-) diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index e66ee43307036f..8b3556f715bfdc 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -5890,9 +5890,6 @@ static bool canPassInRegisters(Sema &S, CXXRecordDecl *D, if (D->isDependentType() || D->isInvalidDecl()) return false; - if (D->hasAttr()) - return true; - // Clang <= 4 used the pre-C++11 rule, which ignores move operations. // The PS4 platform ABI follows the behavior of Clang 3.2. if (CCK == TargetInfo::CCK_ClangABI4OrPS4) diff --git a/clang/test/CodeGenCXX/trivial_abi.cpp b/clang/test/CodeGenCXX/trivial_abi.cpp index e37c8ff615a268..2cf07b22581a2c 100644 --- a/clang/test/CodeGenCXX/trivial_abi.cpp +++ b/clang/test/CodeGenCXX/trivial_abi.cpp @@ -1,5 +1,5 @@ -// RUN: %clang_cc1 -triple arm64-apple-ios11 -std=c++17 -fcxx-exceptions -fexceptions -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple arm64-apple-ios11 -std=c++17 -fcxx-exceptions -fexceptions -fclang-abi-compat=4.0 -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple arm64-apple-ios11 -std=c++11 -fcxx-exceptions -fexceptions -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple arm64-apple-ios11 -std=c++11 -fcxx-exceptions -fexceptions -fclang-abi-compat=4.0 -emit-llvm -o - %s | FileCheck %s // CHECK: %[[STRUCT_SMALL:.*]] = type { i32* } // CHECK: %[[STRUCT_LARGE:.*]] = type { i32*, [128 x i32] } @@ -43,13 +43,6 @@ struct HasNonTrivial { NonTrivial m; }; -struct __attribute__((trivial_abi)) CopyMoveDeleted { - CopyMoveDeleted(int); - CopyMoveDeleted(const CopyMoveDeleted &) = delete; - CopyMoveDeleted(CopyMoveDeleted &&) = delete; - int a; -}; - // CHECK: define void @_Z14testParamSmall5Small(i64 %[[A_COERCE:.*]]) // CHECK: %[[A:.*]] = alloca %[[STRUCT_SMALL]], align 8 // CHECK: %[[COERCE_DIVE:.*]] = getelementptr inbounds %[[STRUCT_SMALL]], %[[STRUCT_SMALL]]* %[[A]], i32 0, i32 0 @@ -244,11 +237,3 @@ void calleeExceptionLarge(Large, Large); void testExceptionLarge() { calleeExceptionLarge(Large(), Large()); } - -// A class with deleted copy and move constructors can still be passed or -// returned in registers if the class is annotated with trivial_abi. - -// CHECK: define i64 @_Z19testCopyMoveDeletedi(i32 % -CopyMoveDeleted testCopyMoveDeleted(int a) { - return a; -} From d4c2ff4db45544c93026eb475c74783ecf92ce8f Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Fri, 1 Feb 2019 11:01:12 +0000 Subject: [PATCH 077/125] Merging r352770: ------------------------------------------------------------------------ r352770 | tejohnson | 2019-01-31 18:18:11 +0100 (Thu, 31 Jan 2019) | 3 lines Recommit "[ThinLTO] Rename COMDATs for COFF when promoting/renaming COMDAT leader" Recommit of r352763 with fix for use after free. ------------------------------------------------------------------------ llvm-svn: 352856 --- .../Transforms/Utils/FunctionImportUtils.h | 5 +++ .../Transforms/Utils/FunctionImportUtils.cpp | 18 +++++++++++ .../FunctionImport/Inputs/comdat.ll | 10 ++++++ llvm/test/Transforms/FunctionImport/comdat.ll | 32 +++++++++++++++++++ 4 files changed, 65 insertions(+) create mode 100644 llvm/test/Transforms/FunctionImport/Inputs/comdat.ll create mode 100644 llvm/test/Transforms/FunctionImport/comdat.ll diff --git a/llvm/include/llvm/Transforms/Utils/FunctionImportUtils.h b/llvm/include/llvm/Transforms/Utils/FunctionImportUtils.h index e24398b90012b3..fe13559768189e 100644 --- a/llvm/include/llvm/Transforms/Utils/FunctionImportUtils.h +++ b/llvm/include/llvm/Transforms/Utils/FunctionImportUtils.h @@ -44,6 +44,11 @@ class FunctionImportGlobalProcessing { /// to promote any non-renamable values. SmallPtrSet Used; + /// Keep track of any COMDATs that require renaming (because COMDAT + /// leader was promoted and renamed). Maps from original COMDAT to one + /// with new name. + DenseMap RenamedComdats; + /// Check if we should promote the given local value to global scope. bool shouldPromoteLocalToGlobal(const GlobalValue *SGV); diff --git a/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp b/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp index a9772e31da5095..81d63ee80394ef 100644 --- a/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp +++ b/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp @@ -249,6 +249,8 @@ void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) { bool DoPromote = false; if (GV.hasLocalLinkage() && ((DoPromote = shouldPromoteLocalToGlobal(&GV)) || isPerformingImport())) { + // Save the original name string before we rename GV below. + auto Name = GV.getName().str(); // Once we change the name or linkage it is difficult to determine // again whether we should promote since shouldPromoteLocalToGlobal needs // to locate the summary (based on GUID from name and linkage). Therefore, @@ -257,6 +259,12 @@ void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) { GV.setLinkage(getLinkage(&GV, DoPromote)); if (!GV.hasLocalLinkage()) GV.setVisibility(GlobalValue::HiddenVisibility); + + // If we are renaming a COMDAT leader, ensure that we record the COMDAT + // for later renaming as well. This is required for COFF. + if (const auto *C = GV.getComdat()) + if (C->getName() == Name) + RenamedComdats.try_emplace(C, M.getOrInsertComdat(GV.getName())); } else GV.setLinkage(getLinkage(&GV, /* DoPromote */ false)); @@ -281,6 +289,16 @@ void FunctionImportGlobalProcessing::processGlobalsForThinLTO() { processGlobalForThinLTO(SF); for (GlobalAlias &GA : M.aliases()) processGlobalForThinLTO(GA); + + // Replace any COMDATS that required renaming (because the COMDAT leader was + // promoted and renamed). + if (!RenamedComdats.empty()) + for (auto &GO : M.global_objects()) + if (auto *C = GO.getComdat()) { + auto Replacement = RenamedComdats.find(C); + if (Replacement != RenamedComdats.end()) + GO.setComdat(Replacement->second); + } } bool FunctionImportGlobalProcessing::run() { diff --git a/llvm/test/Transforms/FunctionImport/Inputs/comdat.ll b/llvm/test/Transforms/FunctionImport/Inputs/comdat.ll new file mode 100644 index 00000000000000..1df6f25351e587 --- /dev/null +++ b/llvm/test/Transforms/FunctionImport/Inputs/comdat.ll @@ -0,0 +1,10 @@ +target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-windows-msvc19.0.24215" + +define void @main() { +entry: + call i8* @lwt_fun() + ret void +} + +declare i8* @lwt_fun() diff --git a/llvm/test/Transforms/FunctionImport/comdat.ll b/llvm/test/Transforms/FunctionImport/comdat.ll new file mode 100644 index 00000000000000..29e8cb538ab665 --- /dev/null +++ b/llvm/test/Transforms/FunctionImport/comdat.ll @@ -0,0 +1,32 @@ +; Test to ensure that comdat is renamed consistently when comdat leader is +; promoted and renamed due to an import. Required by COFF. + +; REQUIRES: x86-registered-target + +; RUN: opt -thinlto-bc -o %t1.bc %s +; RUN: opt -thinlto-bc -o %t2.bc %S/Inputs/comdat.ll +; RUN: llvm-lto2 run -save-temps -o %t3 %t1.bc %t2.bc \ +; RUN: -r %t1.bc,lwt_fun,plx \ +; RUN: -r %t2.bc,main,plx \ +; RUN: -r %t2.bc,lwt_fun, +; RUN: llvm-dis -o - %t3.1.3.import.bc | FileCheck %s + +target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-windows-msvc19.0.24215" + +; CHECK: $lwt.llvm.[[HASH:[0-9]+]] = comdat any +$lwt = comdat any + +; CHECK: @lwt_aliasee = private unnamed_addr global {{.*}}, comdat($lwt.llvm.[[HASH]]) +@lwt_aliasee = private unnamed_addr global [1 x i8*] [i8* null], comdat($lwt) + +; CHECK: @lwt.llvm.[[HASH]] = hidden unnamed_addr alias +@lwt = internal unnamed_addr alias [1 x i8*], [1 x i8*]* @lwt_aliasee + +; Below function should get imported into other module, resulting in @lwt being +; promoted and renamed. +define i8* @lwt_fun() { + %1 = getelementptr inbounds [1 x i8*], [1 x i8*]* @lwt, i32 0, i32 0 + %2 = load i8*, i8** %1 + ret i8* %2 +} From c081a5fb340214719bb91bae196c2850181ca358 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Fri, 1 Feb 2019 11:12:06 +0000 Subject: [PATCH 078/125] Merging r352156: ------------------------------------------------------------------------ r352156 | phosek | 2019-01-25 03:42:30 +0100 (Fri, 25 Jan 2019) | 12 lines [AArch64] Make the test for rsr and rsr64 stricter ACLE specifies that return type for rsr and rsr64 is uint32_t and uint64_t respectively. D56852 change the return type of rsr64 from unsigned long to unsigned long long which at least on Linux doesn't match uint64_t, but the test isn't strict enough to detect that because compiler implicitly converts unsigned long long to uint64_t, but it breaks other uses such as printf with PRIx64 type specifier. This change makes the test stricter enforcing that the return type of rsr and rsr64 builtins is what is actually specified in ACLE. Differential Revision: https://reviews.llvm.org/D57210 ------------------------------------------------------------------------ llvm-svn: 352859 --- clang/test/CodeGen/builtins-arm64.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/clang/test/CodeGen/builtins-arm64.c b/clang/test/CodeGen/builtins-arm64.c index 7027a6e220f3d8..1a2cc2a77b96e5 100644 --- a/clang/test/CodeGen/builtins-arm64.c +++ b/clang/test/CodeGen/builtins-arm64.c @@ -1,4 +1,5 @@ // RUN: %clang_cc1 -triple arm64-unknown-linux -disable-O0-optnone -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +#include void f0(void *a, void *b) { __clear_cache(a,b); @@ -49,13 +50,17 @@ void prefetch() { // CHECK: call {{.*}} @llvm.prefetch(i8* null, i32 0, i32 3, i32 0) } -unsigned rsr() { +__typeof__(__builtin_arm_rsr("1:2:3:4:5")) rsr(void); + +uint32_t rsr() { // CHECK: [[V0:[%A-Za-z0-9.]+]] = call i64 @llvm.read_register.i64(metadata ![[M0:[0-9]]]) // CHECK-NEXT: trunc i64 [[V0]] to i32 return __builtin_arm_rsr("1:2:3:4:5"); } -unsigned long rsr64() { +__typeof__(__builtin_arm_rsr64("1:2:3:4:5")) rsr64(void); + +uint64_t rsr64() { // CHECK: call i64 @llvm.read_register.i64(metadata ![[M0:[0-9]]]) return __builtin_arm_rsr64("1:2:3:4:5"); } From f66fbcfb52f58e41d91fc73835cff3b9b076d74d Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Fri, 1 Feb 2019 11:13:56 +0000 Subject: [PATCH 079/125] Merging r352463: ------------------------------------------------------------------------ r352463 | sam_parker | 2019-01-29 10:04:03 +0100 (Tue, 29 Jan 2019) | 6 lines [AArch64] Update int64_t ACLE builtin arguments Re-applying r351740 with fixes (changing LL to W). Differential Revision: https://reviews.llvm.org/D56852 ------------------------------------------------------------------------ llvm-svn: 352860 --- clang/include/clang/Basic/BuiltinsAArch64.def | 10 +++++----- clang/test/CodeGen/arm64-crc32.c | 19 ++++++++++-------- clang/test/CodeGen/builtins-arm64.c | 20 +++++++++++++++---- 3 files changed, 32 insertions(+), 17 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsAArch64.def b/clang/include/clang/Basic/BuiltinsAArch64.def index 1892ff11a31dc8..690d547f7f3ef7 100644 --- a/clang/include/clang/Basic/BuiltinsAArch64.def +++ b/clang/include/clang/Basic/BuiltinsAArch64.def @@ -33,7 +33,7 @@ BUILTIN(__builtin_arm_clrex, "v", "") // Bit manipulation BUILTIN(__builtin_arm_rbit, "UiUi", "nc") -BUILTIN(__builtin_arm_rbit64, "LUiLUi", "nc") +BUILTIN(__builtin_arm_rbit64, "WUiWUi", "nc") // HINT BUILTIN(__builtin_arm_nop, "v", "") @@ -50,8 +50,8 @@ BUILTIN(__builtin_arm_crc32h, "UiUiUs", "nc") BUILTIN(__builtin_arm_crc32ch, "UiUiUs", "nc") BUILTIN(__builtin_arm_crc32w, "UiUiUi", "nc") BUILTIN(__builtin_arm_crc32cw, "UiUiUi", "nc") -BUILTIN(__builtin_arm_crc32d, "UiUiLUi", "nc") -BUILTIN(__builtin_arm_crc32cd, "UiUiLUi", "nc") +BUILTIN(__builtin_arm_crc32d, "UiUiWUi", "nc") +BUILTIN(__builtin_arm_crc32cd, "UiUiWUi", "nc") // Memory barrier BUILTIN(__builtin_arm_dmb, "vUi", "nc") @@ -63,10 +63,10 @@ BUILTIN(__builtin_arm_prefetch, "vvC*UiUiUiUi", "nc") // System Registers BUILTIN(__builtin_arm_rsr, "UicC*", "nc") -BUILTIN(__builtin_arm_rsr64, "LUicC*", "nc") +BUILTIN(__builtin_arm_rsr64, "WUicC*", "nc") BUILTIN(__builtin_arm_rsrp, "v*cC*", "nc") BUILTIN(__builtin_arm_wsr, "vcC*Ui", "nc") -BUILTIN(__builtin_arm_wsr64, "vcC*LUi", "nc") +BUILTIN(__builtin_arm_wsr64, "vcC*WUi", "nc") BUILTIN(__builtin_arm_wsrp, "vcC*vC*", "nc") // MSVC diff --git a/clang/test/CodeGen/arm64-crc32.c b/clang/test/CodeGen/arm64-crc32.c index 2d913fb123b7c8..26d69a23b6a1a6 100644 --- a/clang/test/CodeGen/arm64-crc32.c +++ b/clang/test/CodeGen/arm64-crc32.c @@ -1,54 +1,57 @@ // REQUIRES: aarch64-registered-target // RUN: %clang_cc1 -triple arm64-none-linux-gnu \ // RUN: -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-windows \ +// RUN: -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +#include -int crc32b(int a, char b) +uint32_t crc32b(uint32_t a, uint8_t b) { return __builtin_arm_crc32b(a,b); // CHECK: [[T0:%[0-9]+]] = zext i8 %b to i32 // CHECK: call i32 @llvm.aarch64.crc32b(i32 %a, i32 [[T0]]) } -int crc32cb(int a, char b) +uint32_t crc32cb(uint32_t a, uint8_t b) { return __builtin_arm_crc32cb(a,b); // CHECK: [[T0:%[0-9]+]] = zext i8 %b to i32 // CHECK: call i32 @llvm.aarch64.crc32cb(i32 %a, i32 [[T0]]) } -int crc32h(int a, short b) +uint32_t crc32h(uint32_t a, uint16_t b) { return __builtin_arm_crc32h(a,b); // CHECK: [[T0:%[0-9]+]] = zext i16 %b to i32 // CHECK: call i32 @llvm.aarch64.crc32h(i32 %a, i32 [[T0]]) } -int crc32ch(int a, short b) +uint32_t crc32ch(uint32_t a, uint16_t b) { return __builtin_arm_crc32ch(a,b); // CHECK: [[T0:%[0-9]+]] = zext i16 %b to i32 // CHECK: call i32 @llvm.aarch64.crc32ch(i32 %a, i32 [[T0]]) } -int crc32w(int a, int b) +uint32_t crc32w(uint32_t a, uint32_t b) { return __builtin_arm_crc32w(a,b); // CHECK: call i32 @llvm.aarch64.crc32w(i32 %a, i32 %b) } -int crc32cw(int a, int b) +uint32_t crc32cw(uint32_t a, uint32_t b) { return __builtin_arm_crc32cw(a,b); // CHECK: call i32 @llvm.aarch64.crc32cw(i32 %a, i32 %b) } -int crc32d(int a, long b) +uint32_t crc32d(uint32_t a, uint64_t b) { return __builtin_arm_crc32d(a,b); // CHECK: call i32 @llvm.aarch64.crc32x(i32 %a, i64 %b) } -int crc32cd(int a, long b) +uint32_t crc32cd(uint32_t a, uint64_t b) { return __builtin_arm_crc32cd(a,b); // CHECK: call i32 @llvm.aarch64.crc32cx(i32 %a, i64 %b) diff --git a/clang/test/CodeGen/builtins-arm64.c b/clang/test/CodeGen/builtins-arm64.c index 1a2cc2a77b96e5..f164c2f6f3647c 100644 --- a/clang/test/CodeGen/builtins-arm64.c +++ b/clang/test/CodeGen/builtins-arm64.c @@ -1,4 +1,5 @@ -// RUN: %clang_cc1 -triple arm64-unknown-linux -disable-O0-optnone -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple arm64-unknown-linux -disable-O0-optnone -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LINUX +// RUN: %clang_cc1 -triple aarch64-windows -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-WIN #include void f0(void *a, void *b) { @@ -16,8 +17,15 @@ unsigned rbit(unsigned a) { return __builtin_arm_rbit(a); } +// CHECK-WIN: [[A64:%[^ ]+]] = zext i32 %a to i64 +// CHECK-WIN: call i64 @llvm.bitreverse.i64(i64 [[A64]]) +// CHECK-LINUX: call i64 @llvm.bitreverse.i64(i64 %a) +unsigned long rbitl(unsigned long a) { + return __builtin_arm_rbit64(a); +} + // CHECK: call {{.*}} @llvm.bitreverse.i64(i64 %a) -unsigned long long rbit64(unsigned long long a) { +uint64_t rbit64(uint64_t a) { return __builtin_arm_rbit64(a); } @@ -60,7 +68,7 @@ uint32_t rsr() { __typeof__(__builtin_arm_rsr64("1:2:3:4:5")) rsr64(void); -uint64_t rsr64() { +uint64_t rsr64(void) { // CHECK: call i64 @llvm.read_register.i64(metadata ![[M0:[0-9]]]) return __builtin_arm_rsr64("1:2:3:4:5"); } @@ -71,13 +79,17 @@ void *rsrp() { return __builtin_arm_rsrp("1:2:3:4:5"); } +__typeof__(__builtin_arm_wsr("1:2:3:4:5", 0)) wsr(unsigned); + void wsr(unsigned v) { // CHECK: [[V0:[%A-Za-z0-9.]+]] = zext i32 %v to i64 // CHECK-NEXT: call void @llvm.write_register.i64(metadata ![[M0:[0-9]]], i64 [[V0]]) __builtin_arm_wsr("1:2:3:4:5", v); } -void wsr64(unsigned long v) { +__typeof__(__builtin_arm_wsr64("1:2:3:4:5", 0)) wsr64(uint64_t); + +void wsr64(uint64_t v) { // CHECK: call void @llvm.write_register.i64(metadata ![[M0:[0-9]]], i64 %v) __builtin_arm_wsr64("1:2:3:4:5", v); } From 61ff0b639cca25747bcbae8bf5a213d702141bc5 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Fri, 1 Feb 2019 12:44:23 +0000 Subject: [PATCH 080/125] Merging r352246: ------------------------------------------------------------------------ r352246 | mtrofin | 2019-01-25 22:49:54 +0100 (Fri, 25 Jan 2019) | 23 lines [llvm] Opt-in flag for X86DiscriminateMemOps Summary: Currently, if an instruction with a memory operand has no debug information, X86DiscriminateMemOps will generate one based on the first line of the enclosing function, or the last seen debug info. This may cause confusion in certain debugging scenarios. The long term approach would be to use the line number '0' in such cases, however, that brings in challenges: the base discriminator value range is limited (4096 values). For the short term, adding an opt-in flag for this feature. See bug 40319 (https://bugs.llvm.org/show_bug.cgi?id=40319) Reviewers: dblaikie, jmorse, gbedwell Reviewed By: dblaikie Subscribers: aprantl, eraman, hiraditya Differential Revision: https://reviews.llvm.org/D57257 ------------------------------------------------------------------------ llvm-svn: 352867 --- llvm/lib/Target/X86/X86DiscriminateMemOps.cpp | 11 +++++++++++ llvm/lib/Target/X86/X86InsertPrefetch.cpp | 3 ++- llvm/test/CodeGen/X86/discriminate-mem-ops.ll | 2 +- llvm/test/CodeGen/X86/insert-prefetch-inline.ll | 2 +- .../test/CodeGen/X86/insert-prefetch-invalid-instr.ll | 2 +- llvm/test/CodeGen/X86/insert-prefetch.ll | 4 ++-- 6 files changed, 18 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/X86/X86DiscriminateMemOps.cpp b/llvm/lib/Target/X86/X86DiscriminateMemOps.cpp index 3654bf04f4e988..6bee20b617dde9 100644 --- a/llvm/lib/Target/X86/X86DiscriminateMemOps.cpp +++ b/llvm/lib/Target/X86/X86DiscriminateMemOps.cpp @@ -27,6 +27,14 @@ using namespace llvm; #define DEBUG_TYPE "x86-discriminate-memops" +static cl::opt EnableDiscriminateMemops( + DEBUG_TYPE, cl::init(false), + cl::desc("Generate unique debug info for each instruction with a memory " + "operand. Should be enabled for profile-drived cache prefetching, " + "both in the build of the binary being profiled, as well as in " + "the build of the binary consuming the profile."), + cl::Hidden); + namespace { using Location = std::pair; @@ -67,6 +75,9 @@ char X86DiscriminateMemOps::ID = 0; X86DiscriminateMemOps::X86DiscriminateMemOps() : MachineFunctionPass(ID) {} bool X86DiscriminateMemOps::runOnMachineFunction(MachineFunction &MF) { + if (!EnableDiscriminateMemops) + return false; + DISubprogram *FDI = MF.getFunction().getSubprogram(); if (!FDI || !FDI->getUnit()->getDebugInfoForProfiling()) return false; diff --git a/llvm/lib/Target/X86/X86InsertPrefetch.cpp b/llvm/lib/Target/X86/X86InsertPrefetch.cpp index 30b46a09ef0f90..8bd57aa2278bad 100644 --- a/llvm/lib/Target/X86/X86InsertPrefetch.cpp +++ b/llvm/lib/Target/X86/X86InsertPrefetch.cpp @@ -34,7 +34,8 @@ using namespace sampleprof; static cl::opt PrefetchHintsFile("prefetch-hints-file", - cl::desc("Path to the prefetch hints profile."), + cl::desc("Path to the prefetch hints profile. See also " + "-x86-discriminate-memops"), cl::Hidden); namespace { diff --git a/llvm/test/CodeGen/X86/discriminate-mem-ops.ll b/llvm/test/CodeGen/X86/discriminate-mem-ops.ll index b77a91fafd2cd8..a30dc22a0d9c02 100644 --- a/llvm/test/CodeGen/X86/discriminate-mem-ops.ll +++ b/llvm/test/CodeGen/X86/discriminate-mem-ops.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s | FileCheck %s +; RUN: llc -x86-discriminate-memops < %s | FileCheck %s ; ; original source, compiled with -O3 -gmlt -fdebug-info-for-profiling: ; int sum(int* arr, int pos1, int pos2) { diff --git a/llvm/test/CodeGen/X86/insert-prefetch-inline.ll b/llvm/test/CodeGen/X86/insert-prefetch-inline.ll index 5f8373f9480c96..62c02fa33291d2 100644 --- a/llvm/test/CodeGen/X86/insert-prefetch-inline.ll +++ b/llvm/test/CodeGen/X86/insert-prefetch-inline.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -prefetch-hints-file=%S/insert-prefetch-inline.afdo | FileCheck %s +; RUN: llc < %s -x86-discriminate-memops -prefetch-hints-file=%S/insert-prefetch-inline.afdo | FileCheck %s ; ; Verify we can insert prefetch instructions in code belonging to inlined ; functions. diff --git a/llvm/test/CodeGen/X86/insert-prefetch-invalid-instr.ll b/llvm/test/CodeGen/X86/insert-prefetch-invalid-instr.ll index 004fb56a56eb8a..d0c4ac378b63b2 100644 --- a/llvm/test/CodeGen/X86/insert-prefetch-invalid-instr.ll +++ b/llvm/test/CodeGen/X86/insert-prefetch-invalid-instr.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -prefetch-hints-file=%S/insert-prefetch-invalid-instr.afdo | FileCheck %s +; RUN: llc < %s -x86-discriminate-memops -prefetch-hints-file=%S/insert-prefetch-invalid-instr.afdo | FileCheck %s ; ModuleID = 'prefetch.cc' source_filename = "prefetch.cc" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/CodeGen/X86/insert-prefetch.ll b/llvm/test/CodeGen/X86/insert-prefetch.ll index 9e77772df7746b..fe0fd9877f193e 100644 --- a/llvm/test/CodeGen/X86/insert-prefetch.ll +++ b/llvm/test/CodeGen/X86/insert-prefetch.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -prefetch-hints-file=%S/insert-prefetch.afdo | FileCheck %s -; RUN: llc < %s -prefetch-hints-file=%S/insert-prefetch-other.afdo | FileCheck %s -check-prefix=OTHERS +; RUN: llc < %s -x86-discriminate-memops -prefetch-hints-file=%S/insert-prefetch.afdo | FileCheck %s +; RUN: llc < %s -x86-discriminate-memops -prefetch-hints-file=%S/insert-prefetch-other.afdo | FileCheck %s -check-prefix=OTHERS ; ; original source, compiled with -O3 -gmlt -fdebug-info-for-profiling: ; int sum(int* arr, int pos1, int pos2) { From b96362ffa9ceecf2256373f2e86a3e62780c599f Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Fri, 1 Feb 2019 20:55:52 +0000 Subject: [PATCH 081/125] [Hexagon] Update release notes with the changes to the Hexagon backend llvm-svn: 352915 --- llvm/docs/ReleaseNotes.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index 82e4d56d48f467..7fc18774851dbd 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -81,6 +81,11 @@ Changes to the ARM Backend During this release ... +Changes to the Hexagon Target +-------------------------- + +* Added support for Hexagon/HVX V66 ISA. + Changes to the MIPS Target -------------------------- From 36e9afbd214703adf5177eeae095f8634f64bb51 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Mon, 4 Feb 2019 08:51:28 +0000 Subject: [PATCH 082/125] Release Notes: Add Zig to External Open Source Projects Using LLVM 8 Zig has all tests passing in the llvm8 branch with rc1. Zig 0.4.0 is scheduled to be released 1 week after LLVM 8.0.0, and it will depend on LLVM 8. Patch by Andrew Kelley! Differential revision: https://reviews.llvm.org/D57586 llvm-svn: 353019 --- llvm/docs/ReleaseNotes.rst | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index 7fc18774851dbd..2bd435ca9d2961 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -156,7 +156,16 @@ Changes to the DAG infrastructure External Open Source Projects Using LLVM 8 ========================================== -* A project... +Zig Programming Language +------------------------ + +`Zig `_ is a system programming language intended to be +an alternative to C. It provides high level features such as generics, compile +time function execution, and partial evaluation, while exposing low level LLVM +IR features such as aliases and intrinsics. Zig uses Clang to provide automatic +import of .h symbols, including inline functions and simple macros. Zig uses +LLD combined with lazily building compiler-rt to provide out-of-the-box +cross-compiling for all supported targets. Additional Information From 8cc77b4abfde78057798de445a9e7d46d9b77819 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Mon, 4 Feb 2019 09:31:37 +0000 Subject: [PATCH 083/125] Merging r352889: ------------------------------------------------------------------------ r352889 | wolfgangp | 2019-02-01 18:11:58 +0100 (Fri, 01 Feb 2019) | 6 lines [DWARF v5] Fix DWARF emitter and consumer to produce/expect a uleb for a location description's length. Reviewer: davide, JDevliegere Differential Revision: https://reviews.llvm.org/D57550 ------------------------------------------------------------------------ llvm-svn: 353029 --- llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 6 ++++-- llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp | 3 ++- llvm/test/CodeGen/X86/debug-loclists.ll | 6 +++--- llvm/test/DebugInfo/X86/dwarfdump-debug-loclists.test | 8 ++++---- .../llvm-dwarfdump/X86/debug_loclists_startx_length.s | 4 ++-- 5 files changed, 15 insertions(+), 12 deletions(-) diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 1de2ffb6cfa11f..18c5fe27b1a8fe 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -1956,8 +1956,10 @@ void DebugLocEntry::finalize(const AsmPrinter &AP, void DwarfDebug::emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry) { // Emit the size. Asm->OutStreamer->AddComment("Loc expr size"); - Asm->emitInt16(DebugLocs.getBytes(Entry).size()); - + if (getDwarfVersion() >= 5) + Asm->EmitULEB128(DebugLocs.getBytes(Entry).size()); + else + Asm->emitInt16(DebugLocs.getBytes(Entry).size()); // Emit the entry. APByteStreamer Streamer(*Asm); emitDebugLocEntry(Streamer, Entry); diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp index f8b5ff6ec8fb60..94df6946f3ae2e 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp @@ -184,7 +184,8 @@ DWARFDebugLoclists::parseOneLocationList(DataExtractor Data, unsigned *Offset, } if (Kind != dwarf::DW_LLE_base_address) { - unsigned Bytes = Data.getU16(Offset); + unsigned Bytes = + Version >= 5 ? Data.getULEB128(Offset) : Data.getU16(Offset); // A single location description describing the location of the object... StringRef str = Data.getData().substr(*Offset, Bytes); *Offset += Bytes; diff --git a/llvm/test/CodeGen/X86/debug-loclists.ll b/llvm/test/CodeGen/X86/debug-loclists.ll index 20bc0c40378b90..0c2ab3dfad5a90 100644 --- a/llvm/test/CodeGen/X86/debug-loclists.ll +++ b/llvm/test/CodeGen/X86/debug-loclists.ll @@ -11,7 +11,7 @@ ; CHECK-NEXT: DW_AT_type [DW_FORM_ref4] (cu + 0x0040 => {0x00000040} "A") ; CHECK: .debug_loclists contents: -; CHECK-NEXT: 0x00000000: locations list header: length = 0x00000017, version = 0x0005, addr_size = 0x08, seg_size = 0x00, offset_entry_count = 0x00000000 +; CHECK-NEXT: 0x00000000: locations list header: length = 0x00000015, version = 0x0005, addr_size = 0x08, seg_size = 0x00, offset_entry_count = 0x00000000 ; CHECK-NEXT: 0x00000000: ; CHECK-NEXT: [0x0000000000000000, 0x0000000000000004): DW_OP_breg5 RDI+0 ; CHECK-NEXT: [0x0000000000000004, 0x0000000000000012): DW_OP_breg3 RBX+0 @@ -32,13 +32,13 @@ ; ASM-NEXT: .byte 4 # DW_LLE_offset_pair ; ASM-NEXT: .uleb128 .Lfunc_begin0-.Lfunc_begin0 # starting offset ; ASM-NEXT: .uleb128 .Ltmp0-.Lfunc_begin0 # ending offset -; ASM-NEXT: .short 2 # Loc expr size +; ASM-NEXT: .byte 2 # Loc expr size ; ASM-NEXT: .byte 117 # DW_OP_breg5 ; ASM-NEXT: .byte 0 # 0 ; ASM-NEXT: .byte 4 # DW_LLE_offset_pair ; ASM-NEXT: .uleb128 .Ltmp0-.Lfunc_begin0 # starting offset ; ASM-NEXT: .uleb128 .Ltmp1-.Lfunc_begin0 # ending offset -; ASM-NEXT: .short 2 # Loc expr size +; ASM-NEXT: .byte 2 # Loc expr size ; ASM-NEXT: .byte 115 # DW_OP_breg3 ; ASM-NEXT: .byte 0 # 0 ; ASM-NEXT: .byte 0 # DW_LLE_end_of_list diff --git a/llvm/test/DebugInfo/X86/dwarfdump-debug-loclists.test b/llvm/test/DebugInfo/X86/dwarfdump-debug-loclists.test index 669607fe557a36..32f2482b5117c0 100644 --- a/llvm/test/DebugInfo/X86/dwarfdump-debug-loclists.test +++ b/llvm/test/DebugInfo/X86/dwarfdump-debug-loclists.test @@ -9,7 +9,7 @@ # CHECK-NEXT: [0x0000000000000700, 0x0000000000000710): DW_OP_breg5 RDI+0 # CHECK: .debug_loclists contents: -# CHECK-NEXT: 0x00000000: locations list header: length = 0x0000002f, version = 0x0005, addr_size = 0x08, seg_size = 0x00, offset_entry_count = 0x00000000 +# CHECK-NEXT: 0x00000000: locations list header: length = 0x0000002c, version = 0x0005, addr_size = 0x08, seg_size = 0x00, offset_entry_count = 0x00000000 # CHECK-NEXT: 0x00000000: # CHECK-NEXT: [0x0000000000000000, 0x0000000000000010): DW_OP_breg5 RDI+0 # CHECK-NEXT: [0x0000000000000530, 0x0000000000000540): DW_OP_breg6 RBP-8, DW_OP_deref @@ -37,7 +37,7 @@ .byte 4 # DW_LLE_offset_pair .uleb128 0x0 # starting offset .uleb128 0x10 # ending offset - .short 2 # Loc expr size + .byte 2 # Loc expr size .byte 117 # DW_OP_breg5 .byte 0 # 0 @@ -47,7 +47,7 @@ .byte 4 # DW_LLE_offset_pair .uleb128 0x30 # starting offset .uleb128 0x40 # ending offset - .short 3 # Loc expr size + .byte 3 # Loc expr size .byte 118 # DW_OP_breg6 .byte 120 # -8 .byte 6 # DW_OP_deref @@ -55,7 +55,7 @@ .byte 8 # DW_LLE_start_length .quad 0x700 # Some address .uleb128 0x10 # length - .short 2 # Loc expr size + .byte 2 # Loc expr size .byte 117 # DW_OP_breg5 .byte 0 # 0 diff --git a/llvm/test/tools/llvm-dwarfdump/X86/debug_loclists_startx_length.s b/llvm/test/tools/llvm-dwarfdump/X86/debug_loclists_startx_length.s index 07c68ab2618f15..0b2ae5f8e7a4fb 100644 --- a/llvm/test/tools/llvm-dwarfdump/X86/debug_loclists_startx_length.s +++ b/llvm/test/tools/llvm-dwarfdump/X86/debug_loclists_startx_length.s @@ -6,7 +6,7 @@ # the final version which uses ULEB128 and not the U32. # CHECK: .debug_loclists contents: -# CHECK-NEXT: 0x00000000: locations list header: length = 0x0000000f, version = 0x0005, addr_size = 0x08, seg_size = 0x00, offset_entry_count = 0x00000000 +# CHECK-NEXT: 0x00000000: locations list header: length = 0x0000000e, version = 0x0005, addr_size = 0x08, seg_size = 0x00, offset_entry_count = 0x00000000 # CHECK-NEXT: 0x00000000: # CHECK-NEXT: Addr idx 1 (w/ length 16): DW_OP_reg5 RDI @@ -21,7 +21,7 @@ .byte 3 # DW_LLE_startx_length .byte 0x01 # Index .uleb128 0x10 # Length - .short 1 # Loc expr size + .byte 1 # Loc expr size .byte 85 # DW_OP_reg5 .byte 0 # DW_LLE_end_of_list .Ldebug_loclist_table_end0: From 44c793021d5746a3860aaa241e7f0756a48df744 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Mon, 4 Feb 2019 09:52:31 +0000 Subject: [PATCH 084/125] Merging r352307: ------------------------------------------------------------------------ r352307 | void | 2019-01-27 08:24:03 +0100 (Sun, 27 Jan 2019) | 11 lines Remove Expr sugar decorating the CXXUuidofExpr node. Summary: Sugar, like ConstantExpr, causes an infinite expansion of the template object. Reviewers: rsmith, aaron.ballman Reviewed By: aaron.ballman Subscribers: riccibruno, aaron.ballman, cfe-commits, tzik, rnk Differential Revision: https://reviews.llvm.org/D57114 ------------------------------------------------------------------------ llvm-svn: 353031 --- clang/lib/Sema/SemaTemplate.cpp | 2 +- clang/test/SemaCXX/PR40395.cpp | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) create mode 100644 clang/test/SemaCXX/PR40395.cpp diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index 3f9dc989103faa..f974bedffe0057 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -6309,7 +6309,7 @@ ExprResult Sema::CheckTemplateArgument(NonTypeTemplateParmDecl *Param, // -- a predefined __func__ variable if (auto *E = Value.getLValueBase().dyn_cast()) { if (isa(E)) { - Converted = TemplateArgument(ArgResult.get()); + Converted = TemplateArgument(ArgResult.get()->IgnoreImpCasts()); break; } Diag(Arg->getBeginLoc(), diag::err_template_arg_not_decl_ref) diff --git a/clang/test/SemaCXX/PR40395.cpp b/clang/test/SemaCXX/PR40395.cpp new file mode 100644 index 00000000000000..469c86d56209ca --- /dev/null +++ b/clang/test/SemaCXX/PR40395.cpp @@ -0,0 +1,16 @@ +// RUN: %clang_cc1 -std=c++17 -fms-extensions -triple=x86_64-pc-win32 -verify %s +// expected-no-diagnostics + +// PR40395 - ConstantExpr shouldn't cause the template object to infinitely +// expand. +struct _GUID {}; +struct __declspec(uuid("{AAAAAAAA-AAAA-AAAA-AAAA-AAAAAAAAAAAA}")) B {}; + +template +struct A { + virtual void baz() { A(); } +}; + +void f() { + A<&__uuidof(B)>(); +} From ca26c44df0122e65a56661bef56c9a63901eb699 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Tue, 5 Feb 2019 11:15:26 +0000 Subject: [PATCH 085/125] Merging r352928: ------------------------------------------------------------------------ r352928 | mstorsjo | 2019-02-01 23:08:03 +0100 (Fri, 01 Feb 2019) | 9 lines [COFF] Fix crashes when writing a PDB after adding thunks. When writing a PDB, the OutputSection of all chunks need to be set. The thunks are added directly to OutputSection after the normal machinery that sets it for all other chunks. This fixes part of PR40467. Differential Revision: https://reviews.llvm.org/D57574 ------------------------------------------------------------------------ llvm-svn: 353157 --- lld/COFF/Writer.cpp | 11 ++++++----- lld/test/COFF/arm-thumb-thunks-pdb.s | 18 ++++++++++++++++++ 2 files changed, 24 insertions(+), 5 deletions(-) create mode 100644 lld/test/COFF/arm-thumb-thunks-pdb.s diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp index 258796ea6057b1..71c24f6172abd0 100644 --- a/lld/COFF/Writer.cpp +++ b/lld/COFF/Writer.cpp @@ -344,14 +344,14 @@ getThunk(DenseMap &LastThunks, Defined *Target, uint64_t P, // After adding thunks, we verify that all relocations are in range (with // no extra margin requirements). If this failed, we restart (throwing away // the previously created thunks) and retry with a wider margin. -static bool createThunks(std::vector &Chunks, int Margin) { +static bool createThunks(OutputSection *OS, int Margin) { bool AddressesChanged = false; DenseMap LastThunks; size_t ThunksSize = 0; // Recheck Chunks.size() each iteration, since we can insert more // elements into it. - for (size_t I = 0; I != Chunks.size(); ++I) { - SectionChunk *SC = dyn_cast_or_null(Chunks[I]); + for (size_t I = 0; I != OS->Chunks.size(); ++I) { + SectionChunk *SC = dyn_cast_or_null(OS->Chunks[I]); if (!SC) continue; size_t ThunkInsertionSpot = I + 1; @@ -388,7 +388,8 @@ static bool createThunks(std::vector &Chunks, int Margin) { Chunk *ThunkChunk = Thunk->getChunk(); ThunkChunk->setRVA( ThunkInsertionRVA); // Estimate of where it will be located. - Chunks.insert(Chunks.begin() + ThunkInsertionSpot, ThunkChunk); + ThunkChunk->setOutputSection(OS); + OS->Chunks.insert(OS->Chunks.begin() + ThunkInsertionSpot, ThunkChunk); ThunkInsertionSpot++; ThunksSize += ThunkChunk->getSize(); ThunkInsertionRVA += ThunkChunk->getSize(); @@ -477,7 +478,7 @@ void Writer::finalizeAddresses() { // to avoid things going out of range due to the added thunks. bool AddressesChanged = false; for (OutputSection *Sec : OutputSections) - AddressesChanged |= createThunks(Sec->Chunks, Margin); + AddressesChanged |= createThunks(Sec, Margin); // If the verification above thought we needed thunks, we should have // added some. assert(AddressesChanged); diff --git a/lld/test/COFF/arm-thumb-thunks-pdb.s b/lld/test/COFF/arm-thumb-thunks-pdb.s new file mode 100644 index 00000000000000..9e972a78d37fff --- /dev/null +++ b/lld/test/COFF/arm-thumb-thunks-pdb.s @@ -0,0 +1,18 @@ +// REQUIRES: arm +// RUN: llvm-mc -filetype=obj -triple=thumbv7-windows %s -o %t.obj +// RUN: lld-link -entry:main -subsystem:console %t.obj -out:%t.exe -debug -pdb:%t.pdb -verbose 2>&1 | FileCheck %s --check-prefix=VERBOSE + +// VERBOSE: Added 1 thunks with margin {{.*}} in {{.*}} passes + + .syntax unified + .globl main + .globl func1 + .text +main: + bne func1 + bx lr + .section .text$a, "xr" + .space 0x100000 + .section .text$b, "xr" +func1: + bx lr From 9c110d55f93a7367ab1d949df5aee1239be340e0 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Tue, 5 Feb 2019 11:19:42 +0000 Subject: [PATCH 086/125] Merging r352929: ------------------------------------------------------------------------ r352929 | mstorsjo | 2019-02-01 23:08:09 +0100 (Fri, 01 Feb 2019) | 11 lines [COFF] Create range extension thunks for ARM64 On ARM64, this is normally necessary only after a module exceeds 128 MB in size (while the limit for thumb is 16 MB). For conditional branches, the range limit is only 1 MB though (the same as for thumb), and for the tbz instruction, the range is only 32 KB, which allows for a test much smaller than the full 128 MB. This fixes PR40467. Differential Revision: https://reviews.llvm.org/D57575 ------------------------------------------------------------------------ llvm-svn: 353158 --- lld/COFF/Chunks.cpp | 24 +++++++++++-- lld/COFF/Chunks.h | 13 +++++-- lld/COFF/Writer.cpp | 49 ++++++++++++++++++++------- lld/test/COFF/arm64-branch-range.test | 16 --------- lld/test/COFF/arm64-thunks.s | 27 +++++++++++++++ 5 files changed, 97 insertions(+), 32 deletions(-) delete mode 100644 lld/test/COFF/arm64-branch-range.test create mode 100644 lld/test/COFF/arm64-thunks.s diff --git a/lld/COFF/Chunks.cpp b/lld/COFF/Chunks.cpp index 29131d7eb8db4b..2bb9aa01e53992 100644 --- a/lld/COFF/Chunks.cpp +++ b/lld/COFF/Chunks.cpp @@ -669,18 +669,38 @@ const uint8_t ArmThunk[] = { 0xe7, 0x44, // L1: add pc, ip }; -size_t RangeExtensionThunk::getSize() const { +size_t RangeExtensionThunkARM::getSize() const { assert(Config->Machine == ARMNT); return sizeof(ArmThunk); } -void RangeExtensionThunk::writeTo(uint8_t *Buf) const { +void RangeExtensionThunkARM::writeTo(uint8_t *Buf) const { assert(Config->Machine == ARMNT); uint64_t Offset = Target->getRVA() - RVA - 12; memcpy(Buf + OutputSectionOff, ArmThunk, sizeof(ArmThunk)); applyMOV32T(Buf + OutputSectionOff, uint32_t(Offset)); } +// A position independent ARM64 adrp+add thunk, with a maximum range of +// +/- 4 GB, which is enough for any PE-COFF. +const uint8_t Arm64Thunk[] = { + 0x10, 0x00, 0x00, 0x90, // adrp x16, Dest + 0x10, 0x02, 0x00, 0x91, // add x16, x16, :lo12:Dest + 0x00, 0x02, 0x1f, 0xd6, // br x16 +}; + +size_t RangeExtensionThunkARM64::getSize() const { + assert(Config->Machine == ARM64); + return sizeof(Arm64Thunk); +} + +void RangeExtensionThunkARM64::writeTo(uint8_t *Buf) const { + assert(Config->Machine == ARM64); + memcpy(Buf + OutputSectionOff, Arm64Thunk, sizeof(Arm64Thunk)); + applyArm64Addr(Buf + OutputSectionOff + 0, Target->getRVA(), RVA, 12); + applyArm64Imm(Buf + OutputSectionOff + 4, Target->getRVA() & 0xfff, 0); +} + void LocalImportChunk::getBaserels(std::vector *Res) { Res->emplace_back(getRVA()); } diff --git a/lld/COFF/Chunks.h b/lld/COFF/Chunks.h index f8a0ddd8ef3b2c..e132fdf8adfafa 100644 --- a/lld/COFF/Chunks.h +++ b/lld/COFF/Chunks.h @@ -355,9 +355,18 @@ class ImportThunkChunkARM64 : public Chunk { Defined *ImpSymbol; }; -class RangeExtensionThunk : public Chunk { +class RangeExtensionThunkARM : public Chunk { public: - explicit RangeExtensionThunk(Defined *T) : Target(T) {} + explicit RangeExtensionThunkARM(Defined *T) : Target(T) {} + size_t getSize() const override; + void writeTo(uint8_t *Buf) const override; + + Defined *Target; +}; + +class RangeExtensionThunkARM64 : public Chunk { +public: + explicit RangeExtensionThunkARM64(Defined *T) : Target(T) {} size_t getSize() const override; void writeTo(uint8_t *Buf) const override; diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp index 71c24f6172abd0..6acfaf9a44545f 100644 --- a/lld/COFF/Writer.cpp +++ b/lld/COFF/Writer.cpp @@ -306,16 +306,31 @@ void OutputSection::writeHeaderTo(uint8_t *Buf) { // Check whether the target address S is in range from a relocation // of type RelType at address P. static bool isInRange(uint16_t RelType, uint64_t S, uint64_t P, int Margin) { - assert(Config->Machine == ARMNT); - int64_t Diff = AbsoluteDifference(S, P + 4) + Margin; - switch (RelType) { - case IMAGE_REL_ARM_BRANCH20T: - return isInt<21>(Diff); - case IMAGE_REL_ARM_BRANCH24T: - case IMAGE_REL_ARM_BLX23T: - return isInt<25>(Diff); - default: - return true; + if (Config->Machine == ARMNT) { + int64_t Diff = AbsoluteDifference(S, P + 4) + Margin; + switch (RelType) { + case IMAGE_REL_ARM_BRANCH20T: + return isInt<21>(Diff); + case IMAGE_REL_ARM_BRANCH24T: + case IMAGE_REL_ARM_BLX23T: + return isInt<25>(Diff); + default: + return true; + } + } else if (Config->Machine == ARM64) { + int64_t Diff = AbsoluteDifference(S, P) + Margin; + switch (RelType) { + case IMAGE_REL_ARM64_BRANCH26: + return isInt<28>(Diff); + case IMAGE_REL_ARM64_BRANCH19: + return isInt<21>(Diff); + case IMAGE_REL_ARM64_BRANCH14: + return isInt<16>(Diff); + default: + return true; + } + } else { + llvm_unreachable("Unexpected architecture"); } } @@ -327,7 +342,17 @@ getThunk(DenseMap &LastThunks, Defined *Target, uint64_t P, Defined *&LastThunk = LastThunks[Target->getRVA()]; if (LastThunk && isInRange(Type, LastThunk->getRVA(), P, Margin)) return {LastThunk, false}; - RangeExtensionThunk *C = make(Target); + Chunk *C; + switch (Config->Machine) { + case ARMNT: + C = make(Target); + break; + case ARM64: + C = make(Target); + break; + default: + llvm_unreachable("Unexpected architecture"); + } Defined *D = make("", C); LastThunk = D; return {D, true}; @@ -429,7 +454,7 @@ static bool verifyRanges(const std::vector Chunks) { // Assign addresses and add thunks if necessary. void Writer::finalizeAddresses() { assignAddresses(); - if (Config->Machine != ARMNT) + if (Config->Machine != ARMNT && Config->Machine != ARM64) return; size_t OrigNumChunks = 0; diff --git a/lld/test/COFF/arm64-branch-range.test b/lld/test/COFF/arm64-branch-range.test deleted file mode 100644 index 0b581e9c464dc8..00000000000000 --- a/lld/test/COFF/arm64-branch-range.test +++ /dev/null @@ -1,16 +0,0 @@ -// REQUIRES: aarch64 - -// RUN: echo -e '.globl _start\n _start:\n bl too_far26\n' > %t.main26.s -// RUN: echo -e '.globl _start\n _start:\n b.ne too_far19\n' > %t.main19.s -// RUN: echo -e '.globl _start\n _start:\n tbz x0, #0, too_far14\n' > %t.main14.s - -// RUN: llvm-mc -filetype=obj -triple=aarch64-windows %t.main26.s -o %t.main26.obj -// RUN: llvm-mc -filetype=obj -triple=aarch64-windows %t.main19.s -o %t.main19.obj -// RUN: llvm-mc -filetype=obj -triple=aarch64-windows %t.main14.s -o %t.main14.obj -// RUN: llvm-mc -filetype=obj -triple=aarch64-windows %S/Inputs/far-arm64-abs.s -o %t.far.obj - -// RUN: not lld-link -base:0x10000 -entry:_start -subsystem:console %t.main26.obj %t.far.obj -out:%t.exe 2>&1 | FileCheck %s -// RUN: not lld-link -base:0x10000 -entry:_start -subsystem:console %t.main19.obj %t.far.obj -out:%t.exe 2>&1 | FileCheck %s -// RUN: not lld-link -base:0x10000 -entry:_start -subsystem:console %t.main14.obj %t.far.obj -out:%t.exe 2>&1 | FileCheck %s - -// CHECK: relocation out of range diff --git a/lld/test/COFF/arm64-thunks.s b/lld/test/COFF/arm64-thunks.s new file mode 100644 index 00000000000000..49004544c43893 --- /dev/null +++ b/lld/test/COFF/arm64-thunks.s @@ -0,0 +1,27 @@ +// REQUIRES: aarch64 +// RUN: llvm-mc -filetype=obj -triple=aarch64-windows %s -o %t.obj +// RUN: lld-link -entry:main -subsystem:console %t.obj -out:%t.exe -verbose 2>&1 | FileCheck -check-prefix=VERBOSE %s +// RUN: llvm-objdump -d %t.exe | FileCheck -check-prefix=DISASM %s + +// VERBOSE: Added 1 thunks with margin {{.*}} in 1 passes + + .globl main + .globl func1 + .text +main: + tbz w0, #0, func1 + ret + .section .text$a, "xr" + .space 0x8000 + .section .text$b, "xr" +func1: + ret + +// DISASM: 0000000140001000 .text: +// DISASM: 140001000: 40 00 00 36 tbz w0, #0, #8 <.text+0x8> +// DISASM: 140001004: c0 03 5f d6 ret +// DISASM: 140001008: 50 00 00 90 adrp x16, #32768 +// DISASM: 14000100c: 10 52 00 91 add x16, x16, #20 +// DISASM: 140001010: 00 02 1f d6 br x16 + +// DISASM: 140009014: c0 03 5f d6 ret From f7f9945cadc09cf8f6c8dc16be254eb8d96e5c11 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Tue, 5 Feb 2019 11:27:12 +0000 Subject: [PATCH 087/125] Merging r352945: ------------------------------------------------------------------------ r352945 | mgrang | 2019-02-02 02:32:48 +0100 (Sat, 02 Feb 2019) | 13 lines [AutoUpgrade] Fix AutoUpgrade for x86.seh.recoverfp Summary: This fixes the bug in https://reviews.llvm.org/D56747#inline-502711. Reviewers: efriedma Reviewed By: efriedma Subscribers: javed.absar, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D57614 ------------------------------------------------------------------------ llvm-svn: 353159 --- llvm/lib/IR/AutoUpgrade.cpp | 9 +++++---- llvm/test/CodeGen/AArch64/eh_recoverfp.ll | 11 +++++++++++ 2 files changed, 16 insertions(+), 4 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/eh_recoverfp.ll diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index b2eb8b09982e89..27064154221f47 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -469,6 +469,11 @@ static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name, } } + if (Name == "seh.recoverfp") { + NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_recoverfp); + return true; + } + return false; } @@ -544,10 +549,6 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer); return true; } - if (Name == "x86.seh.recoverfp") { - NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_recoverfp); - return true; - } break; } diff --git a/llvm/test/CodeGen/AArch64/eh_recoverfp.ll b/llvm/test/CodeGen/AArch64/eh_recoverfp.ll new file mode 100644 index 00000000000000..777bcee543827b --- /dev/null +++ b/llvm/test/CodeGen/AArch64/eh_recoverfp.ll @@ -0,0 +1,11 @@ +; RUN: llc -mtriple arm64-windows %s -o - 2>&1 | FileCheck %s + +define i8* @foo(i8* %a) { +; CHECK-LABEL: foo +; CHECK-NOT: llvm.x86.seh.recoverfp + %1 = call i8* @llvm.x86.seh.recoverfp(i8* bitcast (i32 ()* @f to i8*), i8* %a) + ret i8* %1 +} + +declare i8* @llvm.x86.seh.recoverfp(i8*, i8*) +declare i32 @f() From a10d2d2390f5cff0bb5af6fc408d6292d14e2033 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Tue, 5 Feb 2019 12:51:49 +0000 Subject: [PATCH 088/125] Merging r353082: ------------------------------------------------------------------------ r353082 | meinersbur | 2019-02-04 20:55:59 +0100 (Mon, 04 Feb 2019) | 10 lines [WarnMissedTransforms] Do not warn about already vectorized loops. LoopVectorize adds llvm.loop.isvectorized, but leaves llvm.loop.vectorize.enable. Do not consider such a loop for user-forced vectorization since vectorization already happened -- by prioritizing llvm.loop.isvectorized except for TM_SuppressedByUser. Fixes http://llvm.org/PR40546 Differential Revision: https://reviews.llvm.org/D57542 ------------------------------------------------------------------------ llvm-svn: 353166 --- llvm/lib/Transforms/Utils/LoopUtils.cpp | 14 ++++---- .../enable_and_isvectorized.ll | 33 +++++++++++++++++++ 2 files changed, 40 insertions(+), 7 deletions(-) create mode 100644 llvm/test/Transforms/LoopTransformWarning/enable_and_isvectorized.ll diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index a93d1aeb62ef91..4bc353989efbed 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -376,17 +376,17 @@ TransformationMode llvm::hasVectorizeTransformation(Loop *L) { Optional InterleaveCount = getOptionalIntLoopAttribute(L, "llvm.loop.interleave.count"); - if (Enable == true) { - // 'Forcing' vector width and interleave count to one effectively disables - // this tranformation. - if (VectorizeWidth == 1 && InterleaveCount == 1) - return TM_SuppressedByUser; - return TM_ForcedByUser; - } + // 'Forcing' vector width and interleave count to one effectively disables + // this tranformation. + if (Enable == true && VectorizeWidth == 1 && InterleaveCount == 1) + return TM_SuppressedByUser; if (getBooleanLoopAttribute(L, "llvm.loop.isvectorized")) return TM_Disable; + if (Enable == true) + return TM_ForcedByUser; + if (VectorizeWidth == 1 && InterleaveCount == 1) return TM_Disable; diff --git a/llvm/test/Transforms/LoopTransformWarning/enable_and_isvectorized.ll b/llvm/test/Transforms/LoopTransformWarning/enable_and_isvectorized.ll new file mode 100644 index 00000000000000..77d09ad53f7887 --- /dev/null +++ b/llvm/test/Transforms/LoopTransformWarning/enable_and_isvectorized.ll @@ -0,0 +1,33 @@ +; RUN: opt -transform-warning -disable-output < %s 2>&1 | FileCheck -allow-empty %s +; +; llvm.org/PR40546 +; Do not warn about about leftover llvm.loop.vectorize.enable for already +; vectorized loops. + +target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128" + +define void @test(i32 %n) { +entry: + %cmp = icmp eq i32 %n, 0 + br i1 %cmp, label %simd.if.end, label %omp.inner.for.body.preheader + +omp.inner.for.body.preheader: + %wide.trip.count = zext i32 %n to i64 + br label %omp.inner.for.body + +omp.inner.for.body: + %indvars.iv = phi i64 [ 0, %omp.inner.for.body.preheader ], [ %indvars.iv.next, %omp.inner.for.body ] + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond, label %simd.if.end, label %omp.inner.for.body, !llvm.loop !0 + +simd.if.end: + ret void +} + +!0 = distinct !{!0, !1, !2} +!1 = !{!"llvm.loop.vectorize.enable", i1 true} +!2 = !{!"llvm.loop.isvectorized"} + + +; CHECK-NOT: loop not vectorized From a9a9c27e1c25996a210ff212bd46f14fd483e02b Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Tue, 5 Feb 2019 12:55:45 +0000 Subject: [PATCH 089/125] Merging r352555: ------------------------------------------------------------------------ r352555 | asbirlea | 2019-01-29 23:33:20 +0100 (Tue, 29 Jan 2019) | 12 lines Check bool attribute value in getOptionalBoolLoopAttribute. Summary: Check the bool value of the attribute in getOptionalBoolLoopAttribute not just its existance. Eliminates the warning noise generated when vectorization is explicitly disabled. Reviewers: Meinersbur, hfinkel, dmgreen Subscribers: jlebar, sanjoy, llvm-commits Differential Revision: https://reviews.llvm.org/D57260 ------------------------------------------------------------------------ llvm-svn: 353167 --- llvm/lib/Transforms/Utils/LoopUtils.cpp | 5 +- .../no_switch_disable_vectorization.ll | 95 +++++++++++++++++++ 2 files changed, 99 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/LoopVectorize/no_switch_disable_vectorization.ll diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index 4bc353989efbed..112e80d27e345d 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -217,7 +217,10 @@ static Optional getOptionalBoolLoopAttribute(const Loop *TheLoop, // When the value is absent it is interpreted as 'attribute set'. return true; case 2: - return mdconst::extract_or_null(MD->getOperand(1).get()); + if (ConstantInt *IntMD = + mdconst::extract_or_null(MD->getOperand(1).get())) + return IntMD->getZExtValue(); + return true; } llvm_unreachable("unexpected number of options"); } diff --git a/llvm/test/Transforms/LoopVectorize/no_switch_disable_vectorization.ll b/llvm/test/Transforms/LoopVectorize/no_switch_disable_vectorization.ll new file mode 100644 index 00000000000000..424ef3846224a3 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/no_switch_disable_vectorization.ll @@ -0,0 +1,95 @@ +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -transform-warning -S 2>&1 | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-width=1 -transform-warning -S 2>&1 | FileCheck %s -check-prefix=NOANALYSIS +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -transform-warning -pass-remarks-missed='loop-vectorize' -S 2>&1 | FileCheck %s -check-prefix=MOREINFO + +; This test is a copy of no_switch.ll, with the "llvm.loop.vectorize.enable" metadata set to false. +; It tests that vectorization is explicitly disabled and no warnings are emitted. + +; CHECK-NOT: remark: source.cpp:4:5: loop not vectorized: loop contains a switch statement +; CHECK-NOT: warning: source.cpp:4:5: loop not vectorized: the optimizer was unable to perform the requested transformation; the transformation might be disabled or specified as part of an unsupported transformation ordering + +; NOANALYSIS-NOT: remark: {{.*}} +; NOANALYSIS-NOT: warning: source.cpp:4:5: loop not vectorized: the optimizer was unable to perform the requested transformation; the transformation might be disabled or specified as part of an unsupported transformation ordering + +; MOREINFO: remark: source.cpp:4:5: loop not vectorized: vectorization is explicitly disabled +; MOREINFO-NOT: warning: source.cpp:4:5: loop not vectorized: the optimizer was unable to perform the requested transformation; the transformation might be disabled or specified as part of an unsupported transformation ordering + +; CHECK: _Z11test_switchPii +; CHECK-NOT: x i32> +; CHECK: ret + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +; Function Attrs: nounwind optsize ssp uwtable +define void @_Z11test_switchPii(i32* nocapture %A, i32 %Length) #0 !dbg !4 { +entry: + %cmp18 = icmp sgt i32 %Length, 0, !dbg !10 + br i1 %cmp18, label %for.body.preheader, label %for.end, !dbg !10, !llvm.loop !12 + +for.body.preheader: ; preds = %entry + br label %for.body, !dbg !14 + +for.body: ; preds = %for.body.preheader, %for.inc + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %for.body.preheader ] + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !14 + %0 = load i32, i32* %arrayidx, align 4, !dbg !14, !tbaa !16 + switch i32 %0, label %for.inc [ + i32 0, label %sw.bb + i32 1, label %sw.bb3 + ], !dbg !14 + +sw.bb: ; preds = %for.body + %1 = trunc i64 %indvars.iv to i32, !dbg !20 + %mul = shl nsw i32 %1, 1, !dbg !20 + br label %for.inc, !dbg !22 + +sw.bb3: ; preds = %for.body + %2 = trunc i64 %indvars.iv to i32, !dbg !23 + store i32 %2, i32* %arrayidx, align 4, !dbg !23, !tbaa !16 + br label %for.inc, !dbg !23 + +for.inc: ; preds = %sw.bb3, %for.body, %sw.bb + %storemerge = phi i32 [ %mul, %sw.bb ], [ 0, %for.body ], [ 0, %sw.bb3 ] + store i32 %storemerge, i32* %arrayidx, align 4, !dbg !20, !tbaa !16 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !10 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !10 + %exitcond = icmp eq i32 %lftr.wideiv, %Length, !dbg !10 + br i1 %exitcond, label %for.end.loopexit, label %for.body, !dbg !10, !llvm.loop !12 + +for.end.loopexit: ; preds = %for.inc + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void, !dbg !24 +} + +attributes #0 = { nounwind } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!7, !8} +!llvm.ident = !{!9} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0", isOptimized: true, runtimeVersion: 6, emissionKind: LineTablesOnly, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2) +!1 = !DIFile(filename: "source.cpp", directory: ".") +!2 = !{} +!4 = distinct !DISubprogram(name: "test_switch", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 1, file: !1, scope: !5, type: !6, retainedNodes: !2) +!5 = !DIFile(filename: "source.cpp", directory: ".") +!6 = !DISubroutineType(types: !2) +!7 = !{i32 2, !"Dwarf Version", i32 2} +!8 = !{i32 2, !"Debug Info Version", i32 3} +!9 = !{!"clang version 3.5.0"} +!10 = !DILocation(line: 3, column: 8, scope: !11) +!11 = distinct !DILexicalBlock(line: 3, column: 3, file: !1, scope: !4) +!12 = !{!12, !13, !13} +!13 = !{!"llvm.loop.vectorize.enable", i1 false} +!14 = !DILocation(line: 4, column: 5, scope: !15) +!15 = distinct !DILexicalBlock(line: 3, column: 36, file: !1, scope: !11) +!16 = !{!17, !17, i64 0} +!17 = !{!"int", !18, i64 0} +!18 = !{!"omnipotent char", !19, i64 0} +!19 = !{!"Simple C/C++ TBAA"} +!20 = !DILocation(line: 6, column: 7, scope: !21) +!21 = distinct !DILexicalBlock(line: 4, column: 18, file: !1, scope: !15) +!22 = !DILocation(line: 7, column: 5, scope: !21) +!23 = !DILocation(line: 9, column: 7, scope: !21) +!24 = !DILocation(line: 14, column: 1, scope: !4) From 30ce79ed996240027fabbe3d46fb4936642f662a Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Tue, 5 Feb 2019 21:25:23 +0000 Subject: [PATCH 090/125] Merging rr353218: ------------------------------------------------------------------------ r353218 | rnk | 2019-02-05 13:14:09 -0800 (Tue, 05 Feb 2019) | 19 lines [MC] Don't error on numberless .file directives on MachO Summary: Before r349976, MC ignored such directives when producing an object file and asserted when re-producing textual assembly output. I turned this assertion into a hard error in both cases in r349976, but this makes it unnecessarily difficult to write a single assembly file that supports both MachO and other object formats that support .file. A user reported this as PR40578, and we decided to go back to ignoring the directive. Fixes PR40578 Reviewers: mstorsjo Subscribers: hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D57772 ------------------------------------------------------------------------ llvm-svn: 353220 --- llvm/lib/MC/MCParser/AsmParser.cpp | 9 +++++---- llvm/test/MC/MachO/file-single.s | 8 -------- llvm/test/MC/MachO/file.s | 3 +++ 3 files changed, 8 insertions(+), 12 deletions(-) delete mode 100644 llvm/test/MC/MachO/file-single.s diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp index cf42a6f7075b9a..a0506715be3726 100644 --- a/llvm/lib/MC/MCParser/AsmParser.cpp +++ b/llvm/lib/MC/MCParser/AsmParser.cpp @@ -3364,10 +3364,11 @@ bool AsmParser::parseDirectiveFile(SMLoc DirectiveLoc) { } if (FileNumber == -1) { - if (!getContext().getAsmInfo()->hasSingleParameterDotFile()) - return Error(DirectiveLoc, - "target does not support '.file' without a number"); - getStreamer().EmitFileDirective(Filename); + // Ignore the directive if there is no number and the target doesn't support + // numberless .file directives. This allows some portability of assembler + // between different object file formats. + if (getContext().getAsmInfo()->hasSingleParameterDotFile()) + getStreamer().EmitFileDirective(Filename); } else { // In case there is a -g option as well as debug info from directive .file, // we turn off the -g option, directly use the existing debug info instead. diff --git a/llvm/test/MC/MachO/file-single.s b/llvm/test/MC/MachO/file-single.s deleted file mode 100644 index 747af22750af7b..00000000000000 --- a/llvm/test/MC/MachO/file-single.s +++ /dev/null @@ -1,8 +0,0 @@ -// RUN: not llvm-mc -triple i386-apple-darwin9 %s -o /dev/null 2>&1 | FileCheck %s - -// Previously this crashed MC. - -// CHECK: error: target does not support '.file' without a number - - .file "dir/foo" - nop diff --git a/llvm/test/MC/MachO/file.s b/llvm/test/MC/MachO/file.s index 3ddfb2efe22468..eddbb599d97a27 100644 --- a/llvm/test/MC/MachO/file.s +++ b/llvm/test/MC/MachO/file.s @@ -1,5 +1,8 @@ // RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -s -section-data | FileCheck %s +// This number-less file directive is ignored on MachO. + .file "bar/baz.s" + .file 1 "dir/foo" nop From 35f4f2f7220a796ada2499e9ea49b7f9531040a4 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Wed, 6 Feb 2019 08:31:22 +0000 Subject: [PATCH 091/125] Merging r353155: ------------------------------------------------------------------------ r353155 | hans | 2019-02-05 12:01:54 +0100 (Tue, 05 Feb 2019) | 11 lines Fix format string in bindings/go/llvm/ir_test.go (PR40561) The test started failing for me recently. I don't see any changes around this code, so maybe it's my local go version that changed or something. The error seems real to me: we're trying to print an Attribute with %d. The test talks about "attribute masks" I'm not sure what that refers to, but I suppose we could print the raw pointer value, since that's what the test seems to be comparing. Differential revision: https://reviews.llvm.org/D57672 ------------------------------------------------------------------------ llvm-svn: 353279 --- llvm/bindings/go/llvm/ir_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/bindings/go/llvm/ir_test.go b/llvm/bindings/go/llvm/ir_test.go index 10f4968ba89f2c..4d559c33671580 100644 --- a/llvm/bindings/go/llvm/ir_test.go +++ b/llvm/bindings/go/llvm/ir_test.go @@ -31,7 +31,7 @@ func testAttribute(t *testing.T, name string) { fn.AddFunctionAttr(attr) newattr := fn.GetEnumFunctionAttribute(kind) if attr != newattr { - t.Errorf("got attribute mask %d, want %d", newattr, attr) + t.Errorf("got attribute %p, want %p", newattr.C, attr.C) } text := mod.String() @@ -42,7 +42,7 @@ func testAttribute(t *testing.T, name string) { fn.RemoveEnumFunctionAttribute(kind) newattr = fn.GetEnumFunctionAttribute(kind) if !newattr.IsNil() { - t.Errorf("got attribute mask %d, want 0", newattr) + t.Errorf("got attribute %p, want 0", newattr.C) } } From 5581990eb08ddd98e511041fca75a13971eeb6b4 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Wed, 6 Feb 2019 10:22:11 +0000 Subject: [PATCH 092/125] Merging r352016: ------------------------------------------------------------------------ r352016 | phosek | 2019-01-24 04:04:42 +0100 (Thu, 24 Jan 2019) | 12 lines [libunwind] Don't abort if encoutering invalid .eh_frame_hdr Recent Linux kernel release has introduced a bug as part of the ORC rollout where the vDSO has a valid .eh_frame section, but it's missing the .eh_frame_hdr section and GNU_EH_FRAME segment has zero size. This causes libunwind to abort which breaks programs that use libunwind. The other unwinder implementation (libgcc, non-gnu) instead silently bail out unless being compiled as debug. This change modifies libunwind to use the same strategy. Differential Revision: https://reviews.llvm.org/D57081 ------------------------------------------------------------------------ llvm-svn: 353287 --- libunwind/src/AddressSpace.hpp | 6 +++--- libunwind/src/EHHeaderParser.hpp | 16 +++++++++++----- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/libunwind/src/AddressSpace.hpp b/libunwind/src/AddressSpace.hpp index 30ad35995aece2..49bb360d794111 100644 --- a/libunwind/src/AddressSpace.hpp +++ b/libunwind/src/AddressSpace.hpp @@ -534,11 +534,11 @@ inline bool LocalAddressSpace::findUnwindSections(pint_t targetAddr, #endif cbdata->sects->dwarf_index_section = eh_frame_hdr_start; cbdata->sects->dwarf_index_section_length = phdr->p_memsz; - EHHeaderParser::decodeEHHdr( + found_hdr = EHHeaderParser::decodeEHHdr( *cbdata->addressSpace, eh_frame_hdr_start, phdr->p_memsz, hdrInfo); - cbdata->sects->dwarf_section = hdrInfo.eh_frame_ptr; - found_hdr = true; + if (found_hdr) + cbdata->sects->dwarf_section = hdrInfo.eh_frame_ptr; } } diff --git a/libunwind/src/EHHeaderParser.hpp b/libunwind/src/EHHeaderParser.hpp index 9bdaf5505ff04d..6b3e7dead86643 100644 --- a/libunwind/src/EHHeaderParser.hpp +++ b/libunwind/src/EHHeaderParser.hpp @@ -36,7 +36,7 @@ template class EHHeaderParser { uint8_t table_enc; }; - static void decodeEHHdr(A &addressSpace, pint_t ehHdrStart, pint_t ehHdrEnd, + static bool decodeEHHdr(A &addressSpace, pint_t ehHdrStart, pint_t ehHdrEnd, EHHeaderInfo &ehHdrInfo); static bool findFDE(A &addressSpace, pint_t pc, pint_t ehHdrStart, uint32_t sectionLength, @@ -53,12 +53,14 @@ template class EHHeaderParser { }; template -void EHHeaderParser::decodeEHHdr(A &addressSpace, pint_t ehHdrStart, +bool EHHeaderParser::decodeEHHdr(A &addressSpace, pint_t ehHdrStart, pint_t ehHdrEnd, EHHeaderInfo &ehHdrInfo) { pint_t p = ehHdrStart; uint8_t version = addressSpace.get8(p++); - if (version != 1) - _LIBUNWIND_ABORT("Unsupported .eh_frame_hdr version"); + if (version != 1) { + _LIBUNWIND_LOG0("Unsupported .eh_frame_hdr version"); + return false; + } uint8_t eh_frame_ptr_enc = addressSpace.get8(p++); uint8_t fde_count_enc = addressSpace.get8(p++); @@ -71,6 +73,8 @@ void EHHeaderParser::decodeEHHdr(A &addressSpace, pint_t ehHdrStart, ? 0 : addressSpace.getEncodedP(p, ehHdrEnd, fde_count_enc, ehHdrStart); ehHdrInfo.table = p; + + return true; } template @@ -102,7 +106,9 @@ bool EHHeaderParser::findFDE(A &addressSpace, pint_t pc, pint_t ehHdrStart, pint_t ehHdrEnd = ehHdrStart + sectionLength; EHHeaderParser::EHHeaderInfo hdrInfo; - EHHeaderParser::decodeEHHdr(addressSpace, ehHdrStart, ehHdrEnd, hdrInfo); + if (!EHHeaderParser::decodeEHHdr(addressSpace, ehHdrStart, ehHdrEnd, + hdrInfo)) + return false; size_t tableEntrySize = getTableEntrySize(hdrInfo.table_enc); pint_t tableEntry; From a2f46031fd0302a572eea856d2d7d247e1aa88d8 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Wed, 6 Feb 2019 12:39:08 +0000 Subject: [PATCH 093/125] Merging r353224: ------------------------------------------------------------------------ r353224 | kamil | 2019-02-05 23:20:25 +0100 (Tue, 05 Feb 2019) | 2 lines Update the ioctl(2) list in sanitizers with NetBSD 8.99.34 ------------------------------------------------------------------------ llvm-svn: 353292 --- .../sanitizer_interceptors_ioctl_netbsd.inc | 9 +++------ .../sanitizer_common/sanitizer_platform_limits_netbsd.cc | 5 ----- .../sanitizer_common/sanitizer_platform_limits_netbsd.h | 4 ---- compiler-rt/utils/generate_netbsd_ioctls.awk | 1 - 4 files changed, 3 insertions(+), 16 deletions(-) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_interceptors_ioctl_netbsd.inc b/compiler-rt/lib/sanitizer_common/sanitizer_interceptors_ioctl_netbsd.inc index 86cb440476069f..bddc26d20019d0 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_interceptors_ioctl_netbsd.inc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_interceptors_ioctl_netbsd.inc @@ -25,7 +25,7 @@ struct ioctl_desc { const char *name; }; -const unsigned ioctl_table_max = 1202; +const unsigned ioctl_table_max = 1200; static ioctl_desc ioctl_table[ioctl_table_max]; static unsigned ioctl_table_size = 0; @@ -298,9 +298,6 @@ static void ioctl_table_fill() { _(IRFRAMETTY_GET_DEVICE, WRITE, sizeof(unsigned int)); _(IRFRAMETTY_GET_DONGLE, WRITE, sizeof(unsigned int)); _(IRFRAMETTY_SET_DONGLE, READ, sizeof(unsigned int)); - /* Entries from file: dev/isa/satlinkio.h */ - _(SATIORESET, NONE, 0); - _(SATIOGID, WRITE, struct_satlink_id_sz); /* Entries from file: dev/isa/isvio.h */ _(ISV_CMD, READWRITE, struct_isv_cmd_sz); /* Entries from file: dev/isa/wtreg.h */ @@ -649,8 +646,8 @@ static void ioctl_table_fill() { _(SPKRTUNE, NONE, 0); _(SPKRGETVOL, WRITE, sizeof(unsigned int)); _(SPKRSETVOL, READ, sizeof(unsigned int)); - /* Entries from file: dev/nvmm/nvmm_ioctl.h */ #if 0 /* WIP */ + /* Entries from file: dev/nvmm/nvmm_ioctl.h */ _(NVMM_IOC_CAPABILITY, WRITE, struct_nvmm_ioc_capability_sz); _(NVMM_IOC_MACHINE_CREATE, READWRITE, struct_nvmm_ioc_machine_create_sz); _(NVMM_IOC_MACHINE_DESTROY, READ, struct_nvmm_ioc_machine_destroy_sz); @@ -659,7 +656,7 @@ static void ioctl_table_fill() { _(NVMM_IOC_VCPU_DESTROY, READ, struct_nvmm_ioc_vcpu_destroy_sz); _(NVMM_IOC_VCPU_SETSTATE, READ, struct_nvmm_ioc_vcpu_setstate_sz); _(NVMM_IOC_VCPU_GETSTATE, READ, struct_nvmm_ioc_vcpu_getstate_sz); - _(NVMM_IOC_VCPU_INJECT, READWRITE, struct_nvmm_ioc_vcpu_inject_sz); + _(NVMM_IOC_VCPU_INJECT, READ, struct_nvmm_ioc_vcpu_inject_sz); _(NVMM_IOC_VCPU_RUN, READWRITE, struct_nvmm_ioc_vcpu_run_sz); _(NVMM_IOC_GPA_MAP, READ, struct_nvmm_ioc_gpa_map_sz); _(NVMM_IOC_GPA_UNMAP, READ, struct_nvmm_ioc_gpa_unmap_sz); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.cc b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.cc index b23b430d9e5bc4..c112e044b1d82c 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.cc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.cc @@ -122,7 +122,6 @@ #include #include #include -#include #include #include #include @@ -639,7 +638,6 @@ unsigned struct_rf_recon_req_sz = sizeof(rf_recon_req); unsigned struct_rio_conf_sz = sizeof(rio_conf); unsigned struct_rio_interface_sz = sizeof(rio_interface); unsigned struct_rio_stats_sz = sizeof(rio_stats); -unsigned struct_satlink_id_sz = sizeof(satlink_id); unsigned struct_scan_io_sz = sizeof(scan_io); unsigned struct_scbusaccel_args_sz = sizeof(scbusaccel_args); unsigned struct_scbusiodetach_args_sz = sizeof(scbusiodetach_args); @@ -1105,9 +1103,6 @@ unsigned IOCTL_IRDA_GET_TURNAROUNDMASK = IRDA_GET_TURNAROUNDMASK; unsigned IOCTL_IRFRAMETTY_GET_DEVICE = IRFRAMETTY_GET_DEVICE; unsigned IOCTL_IRFRAMETTY_GET_DONGLE = IRFRAMETTY_GET_DONGLE; unsigned IOCTL_IRFRAMETTY_SET_DONGLE = IRFRAMETTY_SET_DONGLE; -unsigned IOCTL_SATIORESET = SATIORESET; -unsigned IOCTL_SATIOGID = SATIOGID; -unsigned IOCTL_SATIOSBUFSIZE = SATIOSBUFSIZE; unsigned IOCTL_ISV_CMD = ISV_CMD; unsigned IOCTL_WTQICMD = WTQICMD; unsigned IOCTL_ISCSI_GET_VERSION = ISCSI_GET_VERSION; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.h index 0c0c8a837b8b3d..594cfa6c0d47b9 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.h @@ -803,7 +803,6 @@ extern unsigned struct_rf_recon_req_sz; extern unsigned struct_rio_conf_sz; extern unsigned struct_rio_interface_sz; extern unsigned struct_rio_stats_sz; -extern unsigned struct_satlink_id_sz; extern unsigned struct_scan_io_sz; extern unsigned struct_scbusaccel_args_sz; extern unsigned struct_scbusiodetach_args_sz; @@ -1266,9 +1265,6 @@ extern unsigned IOCTL_IRDA_GET_TURNAROUNDMASK; extern unsigned IOCTL_IRFRAMETTY_GET_DEVICE; extern unsigned IOCTL_IRFRAMETTY_GET_DONGLE; extern unsigned IOCTL_IRFRAMETTY_SET_DONGLE; -extern unsigned IOCTL_SATIORESET; -extern unsigned IOCTL_SATIOGID; -extern unsigned IOCTL_SATIOSBUFSIZE; extern unsigned IOCTL_ISV_CMD; extern unsigned IOCTL_WTQICMD; extern unsigned IOCTL_ISCSI_GET_VERSION; diff --git a/compiler-rt/utils/generate_netbsd_ioctls.awk b/compiler-rt/utils/generate_netbsd_ioctls.awk index 82b1992143772e..38fe88fb0cde4f 100755 --- a/compiler-rt/utils/generate_netbsd_ioctls.awk +++ b/compiler-rt/utils/generate_netbsd_ioctls.awk @@ -152,7 +152,6 @@ FNR == 1 { $0 ~ /JOY_GET_X_OFFSET/ || $0 ~ /CHIOGPICKER/ || $0 ~ /SLIOCGUNIT/ || - $0 ~ /SATIOSBUFSIZE/ || $0 ~ /TUNSLMODE/ || $0 ~ /CBQ_IF_ATTACH/ || $0 ~ /CDNR_IF_ATTACH/ || From 7ec84db44f05e9f19ef5c04b6ba1147b2d2cba9c Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Wed, 6 Feb 2019 14:47:31 +0000 Subject: [PATCH 094/125] Merging r353250: ------------------------------------------------------------------------ r353250 | zturner | 2019-02-06 01:50:35 +0100 (Wed, 06 Feb 2019) | 24 lines [PDB] Remove dots and normalize slashes with /PDBSOURCEPATH. In a previous patch, I made changes so that PDBs which were generated on non-Windows platforms contained sensical paths for the host. While this is an esoteric use case, we need it to be supported for certain cross compilation scenarios especially with LLDB, which can debug things on non-Windows platforms. However, this regressed a case where you specify /PDBSOURCEPATH and use a windows-style path. Previously, we would still remove dots and canonicalize slashes to backslashes, but since my change intentionally tried to support non-backslash paths, this was broken. This patch fixes the situation by trying to guess which path style the user is specifying when /PDBSOURCEPATH is passed. It is intentionally conservative, erring on the side of a Windows path style unless absolutely certain. All dots are removed and slashes canonicalized to whatever the deduced path style is after appending the file path to the /PDBSOURCEPATH argument. Differential Revision: https://reviews.llvm.org/D57769 ------------------------------------------------------------------------ llvm-svn: 353300 --- lld/COFF/PDB.cpp | 18 ++++-- lld/test/COFF/pdb-relative-source-lines.test | 60 ++++++++++---------- 2 files changed, 42 insertions(+), 36 deletions(-) diff --git a/lld/COFF/PDB.cpp b/lld/COFF/PDB.cpp index 7862b6ce4cc5af..7757b89e2b36d5 100644 --- a/lld/COFF/PDB.cpp +++ b/lld/COFF/PDB.cpp @@ -288,18 +288,24 @@ static void pdbMakeAbsolute(SmallVectorImpl &FileName) { // It's not absolute in any path syntax. Relative paths necessarily refer to // the local file system, so we can make it native without ending up with a // nonsensical path. - sys::path::native(FileName); if (Config->PDBSourcePath.empty()) { + sys::path::native(FileName); sys::fs::make_absolute(FileName); return; } - // Only apply native and dot removal to the relative file path. We want to - // leave the path the user specified untouched since we assume they specified - // it for a reason. - sys::path::remove_dots(FileName, /*remove_dot_dots=*/true); + // Try to guess whether /PDBSOURCEPATH is a unix path or a windows path. + // Since PDB's are more of a Windows thing, we make this conservative and only + // decide that it's a unix path if we're fairly certain. Specifically, if + // it starts with a forward slash. SmallString<128> AbsoluteFileName = Config->PDBSourcePath; - sys::path::append(AbsoluteFileName, FileName); + sys::path::Style GuessedStyle = AbsoluteFileName.startswith("/") + ? sys::path::Style::posix + : sys::path::Style::windows; + sys::path::append(AbsoluteFileName, GuessedStyle, FileName); + sys::path::native(AbsoluteFileName, GuessedStyle); + sys::path::remove_dots(AbsoluteFileName, true, GuessedStyle); + FileName = std::move(AbsoluteFileName); } diff --git a/lld/test/COFF/pdb-relative-source-lines.test b/lld/test/COFF/pdb-relative-source-lines.test index 865d7a6d8a0a4f..547056785962ad 100644 --- a/lld/test/COFF/pdb-relative-source-lines.test +++ b/lld/test/COFF/pdb-relative-source-lines.test @@ -37,26 +37,26 @@ RUN: llvm-pdbutil pdb2yaml -modules -module-files -module-syms -subsections=line RUN: ./lld-link -debug "-pdbsourcepath:/usr/src" -entry:main -nodefaultlib -out:out.exe -pdb:out.pdb pdb_lines_1_relative.obj pdb_lines_2_relative.obj RUN: llvm-pdbutil pdb2yaml -modules -module-files -module-syms -subsections=lines,fc %t/out.pdb | FileCheck --check-prefix=POSIX %s -CHECK-LABEL: - Module: 'c:\src{{[\\/]}}pdb_lines_1_relative.obj' -CHECK-NEXT: ObjFile: 'c:\src{{[\\/]}}pdb_lines_1_relative.obj' +CHECK-LABEL: - Module: 'c:\src\pdb_lines_1_relative.obj' +CHECK-NEXT: ObjFile: 'c:\src\pdb_lines_1_relative.obj' CHECK: SourceFiles: -CHECK-NEXT: - 'c:\src{{[\\/]}}pdb_lines_1.c' -CHECK-NEXT: - 'c:\src{{[\\/]}}foo.h' +CHECK-NEXT: - 'c:\src\pdb_lines_1.c' +CHECK-NEXT: - 'c:\src\foo.h' CHECK: Subsections: -CHECK: - FileName: 'c:\src{{[\\/]}}pdb_lines_1.c' -CHECK: - FileName: 'c:\src{{[\\/]}}foo.h' +CHECK: - FileName: 'c:\src\pdb_lines_1.c' +CHECK: - FileName: 'c:\src\foo.h' CHECK: - !FileChecksums -CHECK: - FileName: 'c:\src{{[\\/]}}pdb_lines_1.c' -CHECK: - FileName: 'c:\src{{[\\/]}}foo.h' +CHECK: - FileName: 'c:\src\pdb_lines_1.c' +CHECK: - FileName: 'c:\src\foo.h' -CHECK-LABEL: - Module: 'c:\src{{[\\/]}}pdb_lines_2_relative.obj' -CHECK-NEXT: ObjFile: 'c:\src{{[\\/]}}pdb_lines_2_relative.obj' +CHECK-LABEL: - Module: 'c:\src\pdb_lines_2_relative.obj' +CHECK-NEXT: ObjFile: 'c:\src\pdb_lines_2_relative.obj' CHECK: SourceFiles: -CHECK-NEXT: - 'c:\src{{[\\/]}}pdb_lines_2.c' +CHECK-NEXT: - 'c:\src\pdb_lines_2.c' CHECK: Subsections: -CHECK: - FileName: 'c:\src{{[\\/]}}pdb_lines_2.c' +CHECK: - FileName: 'c:\src\pdb_lines_2.c' CHECK: - !FileChecksums -CHECK: - FileName: 'c:\src{{[\\/]}}pdb_lines_2.c' +CHECK: - FileName: 'c:\src\pdb_lines_2.c' CHECK-LABEL: - Kind: S_ENVBLOCK CHECK-NEXT: EnvBlockSym: @@ -64,33 +64,33 @@ CHECK-NEXT: Entries: CHECK-NEXT: - cwd CHECK-NEXT: - 'c:\src' CHECK-NEXT: - exe -CHECK-NEXT: - 'c:\src{{[\\/]}}lld-link' +CHECK-NEXT: - 'c:\src\lld-link' CHECK-NEXT: - pdb -CHECK-NEXT: - 'c:\src{{[\\/]}}out.pdb' +CHECK-NEXT: - 'c:\src\out.pdb' CHECK-NEXT: - cmd CHECK-NEXT: - '-debug -pdbsourcepath:c:\src -entry:main -nodefaultlib -out:out.exe -pdb:out.pdb pdb_lines_1_relative.obj pdb_lines_2_relative.obj' -POSIX-LABEL: - Module: '/usr/src{{[\\/]}}pdb_lines_1_relative.obj' -POSIX-NEXT: ObjFile: '/usr/src{{[\\/]}}pdb_lines_1_relative.obj' +POSIX-LABEL: - Module: '/usr/src/pdb_lines_1_relative.obj' +POSIX-NEXT: ObjFile: '/usr/src/pdb_lines_1_relative.obj' POSIX: SourceFiles: -POSIX-NEXT: - '/usr/src{{[\\/]}}pdb_lines_1.c' -POSIX-NEXT: - '/usr/src{{[\\/]}}foo.h' +POSIX-NEXT: - '/usr/src/pdb_lines_1.c' +POSIX-NEXT: - '/usr/src/foo.h' POSIX: Subsections: -POSIX: - FileName: '/usr/src{{[\\/]}}pdb_lines_1.c' -POSIX: - FileName: '/usr/src{{[\\/]}}foo.h' +POSIX: - FileName: '/usr/src/pdb_lines_1.c' +POSIX: - FileName: '/usr/src/foo.h' POSIX: - !FileChecksums -POSIX: - FileName: '/usr/src{{[\\/]}}pdb_lines_1.c' -POSIX: - FileName: '/usr/src{{[\\/]}}foo.h' +POSIX: - FileName: '/usr/src/pdb_lines_1.c' +POSIX: - FileName: '/usr/src/foo.h' -POSIX-LABEL: - Module: '/usr/src{{[\\/]}}pdb_lines_2_relative.obj' -POSIX-NEXT: ObjFile: '/usr/src{{[\\/]}}pdb_lines_2_relative.obj' +POSIX-LABEL: - Module: '/usr/src/pdb_lines_2_relative.obj' +POSIX-NEXT: ObjFile: '/usr/src/pdb_lines_2_relative.obj' POSIX: SourceFiles: -POSIX-NEXT: - '/usr/src{{[\\/]}}pdb_lines_2.c' +POSIX-NEXT: - '/usr/src/pdb_lines_2.c' POSIX: Subsections: -POSIX: - FileName: '/usr/src{{[\\/]}}pdb_lines_2.c' +POSIX: - FileName: '/usr/src/pdb_lines_2.c' POSIX: - !FileChecksums -POSIX: - FileName: '/usr/src{{[\\/]}}pdb_lines_2.c' +POSIX: - FileName: '/usr/src/pdb_lines_2.c' POSIX-LABEL: - Kind: S_ENVBLOCK POSIX-NEXT: EnvBlockSym: @@ -98,8 +98,8 @@ POSIX-NEXT: Entries: POSIX-NEXT: - cwd POSIX-NEXT: - '/usr/src' POSIX-NEXT: - exe -POSIX-NEXT: - '/usr/src{{[\\/]}}lld-link' +POSIX-NEXT: - '/usr/src/lld-link' POSIX-NEXT: - pdb -POSIX-NEXT: - '/usr/src{{[\\/]}}out.pdb' +POSIX-NEXT: - '/usr/src/out.pdb' POSIX-NEXT: - cmd POSIX-NEXT: - '-debug -pdbsourcepath:/usr/src -entry:main -nodefaultlib -out:out.exe -pdb:out.pdb pdb_lines_1_relative.obj pdb_lines_2_relative.obj' From f58220669d09654f1df9b47161588d5e2f6bfc8a Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Thu, 7 Feb 2019 08:47:14 +0000 Subject: [PATCH 095/125] Merging r353304: ------------------------------------------------------------------------ r353304 | uweigand | 2019-02-06 16:10:13 +0100 (Wed, 06 Feb 2019) | 18 lines [SystemZ] Do not return INT_MIN from strcmp/memcmp The IPM sequence currently generated to compute the strcmp/memcmp result will return INT_MIN for the "less than zero" case. While this is in compliance with the standard, strictly speaking, it turns out that common applications cannot handle this, e.g. because they negate a comparison result in order to implement reverse compares. This patch changes code to use a different sequence that will result in -2 for the "less than zero" case (same as GCC). However, this requires that the two source operands of the compare instructions are inverted, which breaks the optimization in removeIPMBasedCompare. Therefore, I've removed this (and all of optimizeCompareInstr), and replaced it with a mostly equivalent optimization in combineCCMask at the DAGcombine level. ------------------------------------------------------------------------ llvm-svn: 353379 --- .../Target/SystemZ/SystemZISelLowering.cpp | 121 ++++++++++++------ llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp | 74 ----------- llvm/lib/Target/SystemZ/SystemZInstrInfo.h | 3 - .../SystemZ/SystemZSelectionDAGInfo.cpp | 18 +-- llvm/test/CodeGen/SystemZ/memcmp-01.ll | 58 ++++----- llvm/test/CodeGen/SystemZ/strcmp-01.ll | 18 +-- 6 files changed, 129 insertions(+), 163 deletions(-) diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 2a825c1316f3bb..607e55bf71c872 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -5618,55 +5618,96 @@ SDValue SystemZTargetLowering::combineBSWAP( static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) { // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code // set by the CCReg instruction using the CCValid / CCMask masks, - // If the CCReg instruction is itself a (ICMP (SELECT_CCMASK)) testing - // the condition code set by some other instruction, see whether we - // can directly use that condition code. - bool Invert = false; + // If the CCReg instruction is itself a ICMP testing the condition + // code set by some other instruction, see whether we can directly + // use that condition code. - // Verify that we have an appropriate mask for a EQ or NE comparison. + // Verify that we have an ICMP against some constant. if (CCValid != SystemZ::CCMASK_ICMP) return false; - if (CCMask == SystemZ::CCMASK_CMP_NE) - Invert = !Invert; - else if (CCMask != SystemZ::CCMASK_CMP_EQ) - return false; - - // Verify that we have an ICMP that is the user of a SELECT_CCMASK. - SDNode *ICmp = CCReg.getNode(); + auto *ICmp = CCReg.getNode(); if (ICmp->getOpcode() != SystemZISD::ICMP) return false; - SDNode *Select = ICmp->getOperand(0).getNode(); - if (Select->getOpcode() != SystemZISD::SELECT_CCMASK) + auto *CompareLHS = ICmp->getOperand(0).getNode(); + auto *CompareRHS = dyn_cast(ICmp->getOperand(1)); + if (!CompareRHS) return false; - // Verify that the ICMP compares against one of select values. - auto *CompareVal = dyn_cast(ICmp->getOperand(1)); - if (!CompareVal) - return false; - auto *TrueVal = dyn_cast(Select->getOperand(0)); - if (!TrueVal) - return false; - auto *FalseVal = dyn_cast(Select->getOperand(1)); - if (!FalseVal) - return false; - if (CompareVal->getZExtValue() == FalseVal->getZExtValue()) - Invert = !Invert; - else if (CompareVal->getZExtValue() != TrueVal->getZExtValue()) - return false; + // Optimize the case where CompareLHS is a SELECT_CCMASK. + if (CompareLHS->getOpcode() == SystemZISD::SELECT_CCMASK) { + // Verify that we have an appropriate mask for a EQ or NE comparison. + bool Invert = false; + if (CCMask == SystemZ::CCMASK_CMP_NE) + Invert = !Invert; + else if (CCMask != SystemZ::CCMASK_CMP_EQ) + return false; - // Compute the effective CC mask for the new branch or select. - auto *NewCCValid = dyn_cast(Select->getOperand(2)); - auto *NewCCMask = dyn_cast(Select->getOperand(3)); - if (!NewCCValid || !NewCCMask) - return false; - CCValid = NewCCValid->getZExtValue(); - CCMask = NewCCMask->getZExtValue(); - if (Invert) - CCMask ^= CCValid; + // Verify that the ICMP compares against one of select values. + auto *TrueVal = dyn_cast(CompareLHS->getOperand(0)); + if (!TrueVal) + return false; + auto *FalseVal = dyn_cast(CompareLHS->getOperand(1)); + if (!FalseVal) + return false; + if (CompareRHS->getZExtValue() == FalseVal->getZExtValue()) + Invert = !Invert; + else if (CompareRHS->getZExtValue() != TrueVal->getZExtValue()) + return false; - // Return the updated CCReg link. - CCReg = Select->getOperand(4); - return true; + // Compute the effective CC mask for the new branch or select. + auto *NewCCValid = dyn_cast(CompareLHS->getOperand(2)); + auto *NewCCMask = dyn_cast(CompareLHS->getOperand(3)); + if (!NewCCValid || !NewCCMask) + return false; + CCValid = NewCCValid->getZExtValue(); + CCMask = NewCCMask->getZExtValue(); + if (Invert) + CCMask ^= CCValid; + + // Return the updated CCReg link. + CCReg = CompareLHS->getOperand(4); + return true; + } + + // Optimize the case where CompareRHS is (SRA (SHL (IPM))). + if (CompareLHS->getOpcode() == ISD::SRA) { + auto *SRACount = dyn_cast(CompareLHS->getOperand(1)); + if (!SRACount || SRACount->getZExtValue() != 30) + return false; + auto *SHL = CompareLHS->getOperand(0).getNode(); + if (SHL->getOpcode() != ISD::SHL) + return false; + auto *SHLCount = dyn_cast(SHL->getOperand(1)); + if (!SHLCount || SHLCount->getZExtValue() != 30 - SystemZ::IPM_CC) + return false; + auto *IPM = SHL->getOperand(0).getNode(); + if (IPM->getOpcode() != SystemZISD::IPM) + return false; + + // Avoid introducing CC spills (because SRA would clobber CC). + if (!CompareLHS->hasOneUse()) + return false; + // Verify that the ICMP compares against zero. + if (CompareRHS->getZExtValue() != 0) + return false; + + // Compute the effective CC mask for the new branch or select. + switch (CCMask) { + case SystemZ::CCMASK_CMP_EQ: break; + case SystemZ::CCMASK_CMP_NE: break; + case SystemZ::CCMASK_CMP_LT: CCMask = SystemZ::CCMASK_CMP_GT; break; + case SystemZ::CCMASK_CMP_GT: CCMask = SystemZ::CCMASK_CMP_LT; break; + case SystemZ::CCMASK_CMP_LE: CCMask = SystemZ::CCMASK_CMP_GE; break; + case SystemZ::CCMASK_CMP_GE: CCMask = SystemZ::CCMASK_CMP_LE; break; + default: return false; + } + + // Return the updated CCReg link. + CCReg = IPM->getOperand(0); + return true; + } + + return false; } SDValue SystemZTargetLowering::combineBR_CCMASK( diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp index b03b4edaa4abc8..8aab5c2c49998e 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -557,80 +557,6 @@ bool SystemZInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, return false; } -// If Reg is a virtual register, return its definition, otherwise return null. -static MachineInstr *getDef(unsigned Reg, - const MachineRegisterInfo *MRI) { - if (TargetRegisterInfo::isPhysicalRegister(Reg)) - return nullptr; - return MRI->getUniqueVRegDef(Reg); -} - -// Return true if MI is a shift of type Opcode by Imm bits. -static bool isShift(MachineInstr *MI, unsigned Opcode, int64_t Imm) { - return (MI->getOpcode() == Opcode && - !MI->getOperand(2).getReg() && - MI->getOperand(3).getImm() == Imm); -} - -// If the destination of MI has no uses, delete it as dead. -static void eraseIfDead(MachineInstr *MI, const MachineRegisterInfo *MRI) { - if (MRI->use_nodbg_empty(MI->getOperand(0).getReg())) - MI->eraseFromParent(); -} - -// Compare compares SrcReg against zero. Check whether SrcReg contains -// the result of an IPM sequence whose input CC survives until Compare, -// and whether Compare is therefore redundant. Delete it and return -// true if so. -static bool removeIPMBasedCompare(MachineInstr &Compare, unsigned SrcReg, - const MachineRegisterInfo *MRI, - const TargetRegisterInfo *TRI) { - MachineInstr *LGFR = nullptr; - MachineInstr *RLL = getDef(SrcReg, MRI); - if (RLL && RLL->getOpcode() == SystemZ::LGFR) { - LGFR = RLL; - RLL = getDef(LGFR->getOperand(1).getReg(), MRI); - } - if (!RLL || !isShift(RLL, SystemZ::RLL, 31)) - return false; - - MachineInstr *SRL = getDef(RLL->getOperand(1).getReg(), MRI); - if (!SRL || !isShift(SRL, SystemZ::SRL, SystemZ::IPM_CC)) - return false; - - MachineInstr *IPM = getDef(SRL->getOperand(1).getReg(), MRI); - if (!IPM || IPM->getOpcode() != SystemZ::IPM) - return false; - - // Check that there are no assignments to CC between the IPM and Compare, - if (IPM->getParent() != Compare.getParent()) - return false; - MachineBasicBlock::iterator MBBI = IPM, MBBE = Compare.getIterator(); - for (++MBBI; MBBI != MBBE; ++MBBI) { - MachineInstr &MI = *MBBI; - if (MI.modifiesRegister(SystemZ::CC, TRI)) - return false; - } - - Compare.eraseFromParent(); - if (LGFR) - eraseIfDead(LGFR, MRI); - eraseIfDead(RLL, MRI); - eraseIfDead(SRL, MRI); - eraseIfDead(IPM, MRI); - - return true; -} - -bool SystemZInstrInfo::optimizeCompareInstr( - MachineInstr &Compare, unsigned SrcReg, unsigned SrcReg2, int Mask, - int Value, const MachineRegisterInfo *MRI) const { - assert(!SrcReg2 && "Only optimizing constant comparisons so far"); - bool IsLogical = (Compare.getDesc().TSFlags & SystemZII::IsLogical) != 0; - return Value == 0 && !IsLogical && - removeIPMBasedCompare(Compare, SrcReg, MRI, &RI); -} - bool SystemZInstrInfo::canInsertSelect(const MachineBasicBlock &MBB, ArrayRef Pred, unsigned TrueReg, unsigned FalseReg, diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h index 216139eb7c7969..0392430ed872c3 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h @@ -208,9 +208,6 @@ class SystemZInstrInfo : public SystemZGenInstrInfo { int *BytesAdded = nullptr) const override; bool analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, unsigned &SrcReg2, int &Mask, int &Value) const override; - bool optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, - unsigned SrcReg2, int Mask, int Value, - const MachineRegisterInfo *MRI) const override; bool canInsertSelect(const MachineBasicBlock&, ArrayRef Cond, unsigned, unsigned, int&, int&, int&) const override; void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, diff --git a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp index e0d7bca9a94b9e..4592e82eea718b 100644 --- a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp @@ -164,17 +164,17 @@ static SDValue emitCLC(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, } // Convert the current CC value into an integer that is 0 if CC == 0, -// less than zero if CC == 1 and greater than zero if CC >= 2. +// greater than zero if CC == 1 and less than zero if CC >= 2. // The sequence starts with IPM, which puts CC into bits 29 and 28 // of an integer and clears bits 30 and 31. static SDValue addIPMSequence(const SDLoc &DL, SDValue CCReg, SelectionDAG &DAG) { SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg); - SDValue SRL = DAG.getNode(ISD::SRL, DL, MVT::i32, IPM, - DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32)); - SDValue ROTL = DAG.getNode(ISD::ROTL, DL, MVT::i32, SRL, - DAG.getConstant(31, DL, MVT::i32)); - return ROTL; + SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, IPM, + DAG.getConstant(30 - SystemZ::IPM_CC, DL, MVT::i32)); + SDValue SRA = DAG.getNode(ISD::SRA, DL, MVT::i32, SHL, + DAG.getConstant(30, DL, MVT::i32)); + return SRA; } std::pair SystemZSelectionDAGInfo::EmitTargetCodeForMemcmp( @@ -184,7 +184,8 @@ std::pair SystemZSelectionDAGInfo::EmitTargetCodeForMemcmp( if (auto *CSize = dyn_cast(Size)) { uint64_t Bytes = CSize->getZExtValue(); assert(Bytes > 0 && "Caller should have handled 0-size case"); - SDValue CCReg = emitCLC(DAG, DL, Chain, Src1, Src2, Bytes); + // Swap operands to invert CC == 1 vs. CC == 2 cases. + SDValue CCReg = emitCLC(DAG, DL, Chain, Src2, Src1, Bytes); Chain = CCReg.getValue(1); return std::make_pair(addIPMSequence(DL, CCReg, DAG), Chain); } @@ -232,7 +233,8 @@ std::pair SystemZSelectionDAGInfo::EmitTargetCodeForStrcmp( SDValue Src2, MachinePointerInfo Op1PtrInfo, MachinePointerInfo Op2PtrInfo) const { SDVTList VTs = DAG.getVTList(Src1.getValueType(), MVT::i32, MVT::Other); - SDValue Unused = DAG.getNode(SystemZISD::STRCMP, DL, VTs, Chain, Src1, Src2, + // Swap operands to invert CC == 1 vs. CC == 2 cases. + SDValue Unused = DAG.getNode(SystemZISD::STRCMP, DL, VTs, Chain, Src2, Src1, DAG.getConstant(0, DL, MVT::i32)); SDValue CCReg = Unused.getValue(1); Chain = Unused.getValue(2); diff --git a/llvm/test/CodeGen/SystemZ/memcmp-01.ll b/llvm/test/CodeGen/SystemZ/memcmp-01.ll index ac980e49d60bc1..740a86750dd8a6 100644 --- a/llvm/test/CodeGen/SystemZ/memcmp-01.ll +++ b/llvm/test/CodeGen/SystemZ/memcmp-01.ll @@ -16,10 +16,10 @@ define i32 @f1(i8 *%src1, i8 *%src2) { ; Check a case where the result is used as an integer. define i32 @f2(i8 *%src1, i8 *%src2) { ; CHECK-LABEL: f2: -; CHECK: clc 0(2,%r2), 0(%r3) -; CHECK: ipm [[REG:%r[0-5]]] -; CHECK: srl [[REG]], 28 -; CHECK: rll %r2, [[REG]], 31 +; CHECK: clc 0(2,%r3), 0(%r2) +; CHECK: ipm %r2 +; CHECK: sll %r2, 2 +; CHECK: sra %r2, 30 ; CHECK: br %r14 %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 2) ret i32 %res @@ -28,7 +28,7 @@ define i32 @f2(i8 *%src1, i8 *%src2) { ; Check a case where the result is tested for equality. define void @f3(i8 *%src1, i8 *%src2, i32 *%dest) { ; CHECK-LABEL: f3: -; CHECK: clc 0(3,%r2), 0(%r3) +; CHECK: clc 0(3,%r3), 0(%r2) ; CHECK-NEXT: ber %r14 ; CHECK: br %r14 %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 3) @@ -46,7 +46,7 @@ exit: ; Check a case where the result is tested for inequality. define void @f4(i8 *%src1, i8 *%src2, i32 *%dest) { ; CHECK-LABEL: f4: -; CHECK: clc 0(4,%r2), 0(%r3) +; CHECK: clc 0(4,%r3), 0(%r2) ; CHECK-NEXT: blhr %r14 ; CHECK: br %r14 entry: @@ -65,8 +65,8 @@ exit: ; Check a case where the result is tested via slt. define void @f5(i8 *%src1, i8 *%src2, i32 *%dest) { ; CHECK-LABEL: f5: -; CHECK: clc 0(5,%r2), 0(%r3) -; CHECK-NEXT: blr %r14 +; CHECK: clc 0(5,%r3), 0(%r2) +; CHECK-NEXT: bhr %r14 ; CHECK: br %r14 entry: %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 5) @@ -84,8 +84,8 @@ exit: ; Check a case where the result is tested for sgt. define void @f6(i8 *%src1, i8 *%src2, i32 *%dest) { ; CHECK-LABEL: f6: -; CHECK: clc 0(6,%r2), 0(%r3) -; CHECK-NEXT: bhr %r14 +; CHECK: clc 0(6,%r3), 0(%r2) +; CHECK-NEXT: blr %r14 ; CHECK: br %r14 entry: %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 6) @@ -104,10 +104,10 @@ exit: ; an integer and for branching. define i32 @f7(i8 *%src1, i8 *%src2, i32 *%dest) { ; CHECK-LABEL: f7: -; CHECK: clc 0(256,%r2), 0(%r3) -; CHECK: ipm [[REG:%r[0-5]]] -; CHECK: srl [[REG]], 28 -; CHECK: rll %r2, [[REG]], 31 +; CHECK: clc 0(256,%r3), 0(%r2) +; CHECK: ipm %r2 +; CHECK: sll %r2, 2 +; CHECK: sra %r2, 30 ; CHECK: blr %r14 ; CHECK: br %r14 entry: @@ -126,9 +126,9 @@ exit: ; 257 bytes needs two CLCs. define i32 @f8(i8 *%src1, i8 *%src2) { ; CHECK-LABEL: f8: -; CHECK: clc 0(256,%r2), 0(%r3) +; CHECK: clc 0(256,%r3), 0(%r2) ; CHECK: jlh [[LABEL:\..*]] -; CHECK: clc 256(1,%r2), 256(%r3) +; CHECK: clc 256(1,%r3), 256(%r2) ; CHECK: [[LABEL]]: ; CHECK: ipm [[REG:%r[0-5]]] ; CHECK: br %r14 @@ -139,11 +139,11 @@ define i32 @f8(i8 *%src1, i8 *%src2) { ; Test a comparison of 258 bytes in which the CC result can be used directly. define void @f9(i8 *%src1, i8 *%src2, i32 *%dest) { ; CHECK-LABEL: f9: -; CHECK: clc 0(256,%r2), 0(%r3) +; CHECK: clc 0(256,%r3), 0(%r2) ; CHECK: jlh [[LABEL:\..*]] -; CHECK: clc 256(1,%r2), 256(%r3) +; CHECK: clc 256(1,%r3), 256(%r2) ; CHECK: [[LABEL]]: -; CHECK-NEXT: blr %r14 +; CHECK-NEXT: bhr %r14 ; CHECK: br %r14 entry: %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 257) @@ -161,9 +161,9 @@ exit: ; Test the largest size that can use two CLCs. define i32 @f10(i8 *%src1, i8 *%src2) { ; CHECK-LABEL: f10: -; CHECK: clc 0(256,%r2), 0(%r3) +; CHECK: clc 0(256,%r3), 0(%r2) ; CHECK: jlh [[LABEL:\..*]] -; CHECK: clc 256(256,%r2), 256(%r3) +; CHECK: clc 256(256,%r3), 256(%r2) ; CHECK: [[LABEL]]: ; CHECK: ipm [[REG:%r[0-5]]] ; CHECK: br %r14 @@ -174,11 +174,11 @@ define i32 @f10(i8 *%src1, i8 *%src2) { ; Test the smallest size that needs 3 CLCs. define i32 @f11(i8 *%src1, i8 *%src2) { ; CHECK-LABEL: f11: -; CHECK: clc 0(256,%r2), 0(%r3) +; CHECK: clc 0(256,%r3), 0(%r2) ; CHECK: jlh [[LABEL:\..*]] -; CHECK: clc 256(256,%r2), 256(%r3) +; CHECK: clc 256(256,%r3), 256(%r2) ; CHECK: jlh [[LABEL]] -; CHECK: clc 512(1,%r2), 512(%r3) +; CHECK: clc 512(1,%r3), 512(%r2) ; CHECK: [[LABEL]]: ; CHECK: ipm [[REG:%r[0-5]]] ; CHECK: br %r14 @@ -189,11 +189,11 @@ define i32 @f11(i8 *%src1, i8 *%src2) { ; Test the largest size than can use 3 CLCs. define i32 @f12(i8 *%src1, i8 *%src2) { ; CHECK-LABEL: f12: -; CHECK: clc 0(256,%r2), 0(%r3) +; CHECK: clc 0(256,%r3), 0(%r2) ; CHECK: jlh [[LABEL:\..*]] -; CHECK: clc 256(256,%r2), 256(%r3) +; CHECK: clc 256(256,%r3), 256(%r2) ; CHECK: jlh [[LABEL]] -; CHECK: clc 512(256,%r2), 512(%r3) +; CHECK: clc 512(256,%r3), 512(%r2) ; CHECK: [[LABEL]]: ; CHECK: ipm [[REG:%r[0-5]]] ; CHECK: br %r14 @@ -207,12 +207,12 @@ define i32 @f13(i8 *%src1, i8 *%src2) { ; CHECK-LABEL: f13: ; CHECK: lghi [[COUNT:%r[0-5]]], 3 ; CHECK: [[LOOP:.L[^:]*]]: -; CHECK: clc 0(256,%r2), 0(%r3) +; CHECK: clc 0(256,%r3), 0(%r2) ; CHECK: jlh [[LABEL:\..*]] ; CHECK-DAG: la %r2, 256(%r2) ; CHECK-DAG: la %r3, 256(%r3) ; CHECK: brctg [[COUNT]], [[LOOP]] -; CHECK: clc 0(1,%r2), 0(%r3) +; CHECK: clc 0(1,%r3), 0(%r2) ; CHECK: [[LABEL]]: ; CHECK: ipm [[REG:%r[0-5]]] ; CHECK: br %r14 diff --git a/llvm/test/CodeGen/SystemZ/strcmp-01.ll b/llvm/test/CodeGen/SystemZ/strcmp-01.ll index ef05d832e73e95..a3e3bbbb23be7f 100644 --- a/llvm/test/CodeGen/SystemZ/strcmp-01.ll +++ b/llvm/test/CodeGen/SystemZ/strcmp-01.ll @@ -9,12 +9,12 @@ define i32 @f1(i8 *%src1, i8 *%src2) { ; CHECK-LABEL: f1: ; CHECK: lhi %r0, 0 ; CHECK: [[LABEL:\.[^:]*]]: -; CHECK: clst %r2, %r3 +; CHECK: clst %r3, %r2 ; CHECK-NEXT: jo [[LABEL]] ; CHECK-NEXT: %bb.{{[0-9]+}} -; CHECK-NEXT: ipm [[REG:%r[0-5]]] -; CHECK: srl [[REG]], 28 -; CHECK: rll %r2, [[REG]], 31 +; CHECK-NEXT: ipm %r2 +; CHECK: sll %r2, 2 +; CHECK: sra %r2, 30 ; CHECK: br %r14 %res = call i32 @strcmp(i8 *%src1, i8 *%src2) ret i32 %res @@ -25,7 +25,7 @@ define void @f2(i8 *%src1, i8 *%src2, i32 *%dest) { ; CHECK-LABEL: f2: ; CHECK: lhi %r0, 0 ; CHECK: [[LABEL:\.[^:]*]]: -; CHECK: clst %r2, %r3 +; CHECK: clst %r3, %r2 ; CHECK-NEXT: jo [[LABEL]] ; CHECK-NEXT: %bb.{{[0-9]+}} ; CHECK-NEXT: ber %r14 @@ -48,12 +48,12 @@ define i32 @f3(i8 *%src1, i8 *%src2, i32 *%dest) { ; CHECK-LABEL: f3: ; CHECK: lhi %r0, 0 ; CHECK: [[LABEL:\.[^:]*]]: -; CHECK: clst %r2, %r3 +; CHECK: clst %r3, %r2 ; CHECK-NEXT: jo [[LABEL]] ; CHECK-NEXT: %bb.{{[0-9]+}} -; CHECK-NEXT: ipm [[REG:%r[0-5]]] -; CHECK: srl [[REG]], 28 -; CHECK: rll %r2, [[REG]], 31 +; CHECK-NEXT: ipm %r2 +; CHECK: sll %r2, 2 +; CHECK: sra %r2, 30 ; CHECK: blr %r14 ; CHECK: br %r14 entry: From 238045f8cab0e3131a4864f7b4e620470557aba3 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Thu, 7 Feb 2019 10:53:51 +0000 Subject: [PATCH 096/125] lld-link: Add some entries to the 8.0 release notes By Nico Weber! Differential revision: https://reviews.llvm.org/D57818 llvm-svn: 353387 --- lld/docs/ReleaseNotes.rst | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst index c02cc586c795ea..77971b25ac483f 100644 --- a/lld/docs/ReleaseNotes.rst +++ b/lld/docs/ReleaseNotes.rst @@ -49,12 +49,30 @@ COFF Improvements * PDB GUID is set to hash of PDB contents instead to a random byte sequence for build reproducibility. +* ``/pdbsourcepath:`` is now also used to make ``"cwd"``, ``"exe"``, ``"pdb"`` + in the env block of PDB outputs absolute if they are relative, and to make + paths to obj files referenced in PDB outputs absolute if they are relative. + Together with the previous item, this makes it possible to generate + executables and PDBs that are fully deterministic and independent of the + absolute path to the build directory, so that different machines building + the same code in different directories can produce exactly the same output. + * The following flags have been added: ``/force:multiple`` * lld now can link against import libraries produced by GNU tools. * lld can create thunks for ARM, to allow linking images over 16 MB. +* Several speed and memory usage improvements. + +* lld now creates debug info for typedefs. + +* lld can now link obj files produced by ``cl.exe /Z7 /Yc``. + +* lld now understands ``%_PDB%`` and ``%_EXT%`` in ``/pdbaltpath:``. + +* Undefined symbols are now printed in demangled form in addition to raw form. + MinGW Improvements ------------------ From 33fc712af30e4b1963ca8b086bc698446a079f9f Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Thu, 7 Feb 2019 10:58:25 +0000 Subject: [PATCH 097/125] Merging r353367: ------------------------------------------------------------------------ r353367 | brad | 2019-02-07 03:06:58 +0100 (Thu, 07 Feb 2019) | 2 lines Add OpenBSD support to be able to get the thread name ------------------------------------------------------------------------ llvm-svn: 353388 --- llvm/lib/Support/Unix/Threading.inc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/llvm/lib/Support/Unix/Threading.inc b/llvm/lib/Support/Unix/Threading.inc index 2d49ce1ad747a0..92bec36d6a2dae 100644 --- a/llvm/lib/Support/Unix/Threading.inc +++ b/llvm/lib/Support/Unix/Threading.inc @@ -202,6 +202,12 @@ void llvm::get_thread_name(SmallVectorImpl &Name) { char buf[len]; ::pthread_getname_np(::pthread_self(), buf, len); + Name.append(buf, buf + strlen(buf)); +#elif defined(__OpenBSD__) + constexpr uint32_t len = get_max_thread_name_length_impl(); + char buf[len]; + ::pthread_get_name_np(::pthread_self(), buf, len); + Name.append(buf, buf + strlen(buf)); #elif defined(__linux__) #if HAVE_PTHREAD_GETNAME_NP From 7399f70fb2be1d1dc3da819ea3c671bd0b45ed4b Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Thu, 7 Feb 2019 11:04:04 +0000 Subject: [PATCH 098/125] Merging r353327: ------------------------------------------------------------------------ r353327 | lebedevri | 2019-02-06 20:17:30 +0100 (Wed, 06 Feb 2019) | 18 lines [clang-tidy] modernize-avoid-c-arrays: avoid main function (PR40604) Summary: The check should ignore the main function, the program entry point. It is not possible to use `std::array<>` for the `argv`. The alternative is to use `char** argv`. Fixes [[ https://bugs.llvm.org/show_bug.cgi?id=40604 | PR40604 ]] Reviewers: JonasToth, aaron.ballman Reviewed By: aaron.ballman Subscribers: xazax.hun, hans, cfe-commits Tags: #clang-tools-extra, #clang Differential Revision: https://reviews.llvm.org/D57787 ------------------------------------------------------------------------ llvm-svn: 353391 --- .../modernize/AvoidCArraysCheck.cpp | 9 ++++++++- .../checks/modernize-avoid-c-arrays.rst | 4 ++++ .../modernize-avoid-c-arrays-ignores-main.cpp | 18 +++++++++++++++++ ...-avoid-c-arrays-ignores-three-arg-main.cpp | 20 +++++++++++++++++++ 4 files changed, 50 insertions(+), 1 deletion(-) create mode 100644 clang-tools-extra/test/clang-tidy/modernize-avoid-c-arrays-ignores-main.cpp create mode 100644 clang-tools-extra/test/clang-tidy/modernize-avoid-c-arrays-ignores-three-arg-main.cpp diff --git a/clang-tools-extra/clang-tidy/modernize/AvoidCArraysCheck.cpp b/clang-tools-extra/clang-tidy/modernize/AvoidCArraysCheck.cpp index dd19483295e8db..6ef45ee855244b 100644 --- a/clang-tools-extra/clang-tidy/modernize/AvoidCArraysCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/AvoidCArraysCheck.cpp @@ -31,6 +31,12 @@ AST_MATCHER(clang::RecordDecl, isExternCContext) { return Node.isExternCContext(); } +AST_MATCHER(clang::ParmVarDecl, isArgvOfMain) { + const clang::DeclContext *DC = Node.getDeclContext(); + const auto *FD = llvm::dyn_cast(DC); + return FD ? FD->isMain() : false; +} + } // namespace namespace clang { @@ -44,7 +50,8 @@ void AvoidCArraysCheck::registerMatchers(MatchFinder *Finder) { Finder->addMatcher( typeLoc(hasValidBeginLoc(), hasType(arrayType()), - unless(anyOf(hasParent(varDecl(isExternC())), + unless(anyOf(hasParent(parmVarDecl(isArgvOfMain())), + hasParent(varDecl(isExternC())), hasParent(fieldDecl( hasParent(recordDecl(isExternCContext())))), hasAncestor(functionDecl(isExternC()))))) diff --git a/clang-tools-extra/docs/clang-tidy/checks/modernize-avoid-c-arrays.rst b/clang-tools-extra/docs/clang-tidy/checks/modernize-avoid-c-arrays.rst index 8f856a524b2d51..d7bc7474e27f87 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/modernize-avoid-c-arrays.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/modernize-avoid-c-arrays.rst @@ -54,3 +54,7 @@ such headers between C code, and C++ code. } } + +Similarly, the ``main()`` function is ignored. Its second and third parameters +can be either ``char* argv[]`` or ``char** argv``, but can not be +``std::array<>``. diff --git a/clang-tools-extra/test/clang-tidy/modernize-avoid-c-arrays-ignores-main.cpp b/clang-tools-extra/test/clang-tidy/modernize-avoid-c-arrays-ignores-main.cpp new file mode 100644 index 00000000000000..6549422f393aaa --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/modernize-avoid-c-arrays-ignores-main.cpp @@ -0,0 +1,18 @@ +// RUN: %check_clang_tidy %s modernize-avoid-c-arrays %t + +int not_main(int argc, char *argv[]) { + // CHECK-MESSAGES: :[[@LINE-1]]:24: warning: do not declare C-style arrays, use std::array<> instead + int f4[] = {1, 2}; + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: do not declare C-style arrays, use std::array<> instead +} + +int main(int argc, char *argv[]) { + int f5[] = {1, 2}; + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: do not declare C-style arrays, use std::array<> instead + + auto not_main = [](int argc, char *argv[]) { + // CHECK-MESSAGES: :[[@LINE-1]]:32: warning: do not declare C-style arrays, use std::array<> instead + int f6[] = {1, 2}; + // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: do not declare C-style arrays, use std::array<> instead + }; +} diff --git a/clang-tools-extra/test/clang-tidy/modernize-avoid-c-arrays-ignores-three-arg-main.cpp b/clang-tools-extra/test/clang-tidy/modernize-avoid-c-arrays-ignores-three-arg-main.cpp new file mode 100644 index 00000000000000..22a4016f79f4da --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/modernize-avoid-c-arrays-ignores-three-arg-main.cpp @@ -0,0 +1,20 @@ +// RUN: %check_clang_tidy %s modernize-avoid-c-arrays %t + +int not_main(int argc, char *argv[], char *argw[]) { + // CHECK-MESSAGES: :[[@LINE-1]]:24: warning: do not declare C-style arrays, use std::array<> instead + // CHECK-MESSAGES: :[[@LINE-2]]:38: warning: do not declare C-style arrays, use std::array<> instead + int f4[] = {1, 2}; + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: do not declare C-style arrays, use std::array<> instead +} + +int main(int argc, char *argv[], char *argw[]) { + int f5[] = {1, 2}; + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: do not declare C-style arrays, use std::array<> instead + + auto not_main = [](int argc, char *argv[], char *argw[]) { + // CHECK-MESSAGES: :[[@LINE-1]]:32: warning: do not declare C-style arrays, use std::array<> instead + // CHECK-MESSAGES: :[[@LINE-2]]:46: warning: do not declare C-style arrays, use std::array<> instead + int f6[] = {1, 2}; + // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: do not declare C-style arrays, use std::array<> instead + }; +} From 104c654aa953e08c548e44855072447c0dc2ee63 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Thu, 7 Feb 2019 11:14:24 +0000 Subject: [PATCH 099/125] Merging r353393: ------------------------------------------------------------------------ r353393 | hans | 2019-02-07 12:13:28 +0100 (Thu, 07 Feb 2019) | 1 line Typo: s/follwing/following ------------------------------------------------------------------------ llvm-svn: 353394 --- clang/include/clang/Driver/Options.td | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index d8155a95775c29..d02d9744d78d43 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -552,9 +552,9 @@ def cuda_compile_host_device : Flag<["--"], "cuda-compile-host-device">, HelpText<"Compile CUDA code for both host and device (default). Has no " "effect on non-CUDA compilations.">; def cuda_include_ptx_EQ : Joined<["--"], "cuda-include-ptx=">, Flags<[DriverOption]>, - HelpText<"Include PTX for the follwing GPU architecture (e.g. sm_35) or 'all'. May be specified more than once.">; + HelpText<"Include PTX for the following GPU architecture (e.g. sm_35) or 'all'. May be specified more than once.">; def no_cuda_include_ptx_EQ : Joined<["--"], "no-cuda-include-ptx=">, Flags<[DriverOption]>, - HelpText<"Do not include PTX for the follwing GPU architecture (e.g. sm_35) or 'all'. May be specified more than once.">; + HelpText<"Do not include PTX for the following GPU architecture (e.g. sm_35) or 'all'. May be specified more than once.">; def cuda_gpu_arch_EQ : Joined<["--"], "cuda-gpu-arch=">, Flags<[DriverOption]>, HelpText<"CUDA GPU architecture (e.g. sm_35). May be specified more than once.">; def hip_link : Flag<["--"], "hip-link">, From 8c33d4fb556779b13ca6d8af60631f1a149fc3bc Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Thu, 7 Feb 2019 11:15:27 +0000 Subject: [PATCH 100/125] Re-generate docs/ClangCommandLineReference.rst $ bin/clang-tblgen -gen-opt-docs -I../cfe.src/include -I../cfe.src/include/clang/Driver -I../llvm.src/include ../cfe.src/include/clang/Driver/ClangOptionDocs.td -o ../cfe.src/docs/ClangCommandLineReference.rst llvm-svn: 353395 --- clang/docs/ClangCommandLineReference.rst | 192 ++++++++++++++++------- 1 file changed, 131 insertions(+), 61 deletions(-) diff --git a/clang/docs/ClangCommandLineReference.rst b/clang/docs/ClangCommandLineReference.rst index e852c3e3879855..3dafac54a02bda 100644 --- a/clang/docs/ClangCommandLineReference.rst +++ b/clang/docs/ClangCommandLineReference.rst @@ -198,6 +198,10 @@ Filename (or -) to write dependency output to Emit Clang AST files for source inputs +.. option:: -enable-trivial-auto-var-init-zero-knowing-it-will-be-removed-from-clang + +Trivial automatic variable initialization to zero is only here for benchmarks, it'll eventually be removed, and I'm OK with that because I'm only using it to benchmark + .. option:: -exported\_symbols\_list .. option:: -faligned-new= @@ -210,10 +214,6 @@ Use approximate transcendental functions Flush denormal floating point values to zero in CUDA device mode. -.. option:: -fcuda-rdc, -fno-cuda-rdc - -Generate relocatable device code, also known as separate compilation mode. - .. option:: -fcuda-short-ptr, -fno-cuda-short-ptr Use 32-bit pointers for accessing const/local/shared address spaces. @@ -222,6 +222,10 @@ Use 32-bit pointers for accessing const/local/shared address spaces. Reserve register r19 (Hexagon only) +.. option:: -fgpu-rdc, -fcuda-rdc, -fno-gpu-rdc + +Generate relocatable device code, also known as separate compilation mode. + .. option:: -fheinous-gnu-extensions .. option:: -flat\_namespace @@ -254,6 +258,10 @@ Use the gcc toolchain at the given directory Generate CodeView debug information +.. option:: -gcodeview-ghash, -gno-codeview-ghash + +Emit type record hashes in a .debug$H section + .. option:: -headerpad\_max\_install\_names .. option:: -help, --help @@ -288,6 +296,10 @@ Make the next included directory (-I or -F) an indexer header map .. option:: -mbig-endian, -EB +.. option:: -mbranch-protection= + +Enforce targets of indirect branches and function returns + .. option:: --migrate Run the migrator @@ -792,15 +804,7 @@ Don't use blacklist file for sanitizers .. option:: -fparse-all-comments -.. option:: -frecord-command-line, -frecord-gcc-switches, -fno-record-command-line, -fno-record-gcc-switches - -Generate a section named ".GCC.command.line" containing the clang driver -command-line. After linking, the section may contain multiple command lines, -which will be individually terminated by null bytes. Separate arguments within -a command line are combined with spaces; spaces and backslashes within an -argument are escaped with backslashes. This format differs from the format of -the equivalent section produced by GCC with the -frecord-gcc-switches flag. -This option is currently only supported on ELF targets. +.. option:: -frecord-command-line, -fno-record-command-line, -frecord-gcc-switches .. option:: -fsanitize-address-field-padding= @@ -810,20 +814,18 @@ Level of field padding for AddressSanitizer Enable linker dead stripping of globals in AddressSanitizer -.. option:: -fsanitize-address-use-odr-indicator, -fno-sanitize-address-use-odr-indicator - -Enable ODR indicator globals to avoid false ODR violation reports in partially sanitized programs at the cost of an increase in binary size - .. option:: -fsanitize-address-poison-custom-array-cookie, -fno-sanitize-address-poison-custom-array-cookie -Enable "poisoning" array cookies when allocating arrays with a custom operator new\[\] in Address Sanitizer, preventing accesses to the cookies from user code. An array cookie is a small implementation-defined header added to certain array allocations to record metadata such as the length of the array. Accesses to array cookies from user code are technically allowed by the standard but are more likely to be the result of an out-of-bounds array access. - -An operator new\[\] is "custom" if it is not one of the allocation functions provided by the C++ standard library. Array cookies from non-custom allocation functions are always poisoned. +Enable poisoning array cookies when using custom operator new\[\] in AddressSanitizer .. option:: -fsanitize-address-use-after-scope, -fno-sanitize-address-use-after-scope Enable use-after-scope detection in AddressSanitizer +.. option:: -fsanitize-address-use-odr-indicator, -fno-sanitize-address-use-odr-indicator + +Enable ODR indicator globals to avoid false ODR violation reports in partially sanitized programs at the cost of an increase in binary size + .. option:: -fsanitize-blacklist= Path to blacklist file for sanitizers @@ -840,6 +842,10 @@ Generalize pointers in CFI indirect call type signature checks Specify the type of coverage instrumentation for Sanitizers +.. option:: -fsanitize-hwaddress-abi= + +Select the HWAddressSanitizer ABI to target (interceptor or platform, default interceptor) + .. option:: -fsanitize-link-c++-runtime .. option:: -fsanitize-memory-track-origins, -fno-sanitize-memory-track-origins @@ -1072,6 +1078,10 @@ Set directory to include search path with prefix Add directory to SYSTEM include search path, absolute paths are relative to -isysroot +.. option:: --libomptarget-nvptx-path= + +Path to libomptarget-nvptx libraries + .. option:: --ptxas-path= Path to ptxas (used for compiling CUDA code) @@ -1283,6 +1293,8 @@ Enable C++ static destructor registration (the default) Instrument control-flow architecture protection. Options: return, branch, full, none. +.. option:: -fcf-runtime-abi= + .. option:: -fchar8\_t, -fno-char8\_t Enable C++ builtin type char8\_t @@ -1343,6 +1355,10 @@ Emit macro debug information remap file source paths in debug info +.. option:: -fdebug-ranges-base-address, -fno-debug-ranges-base-address + +Use DWARF base address selection entries in debug\_ranges + .. option:: -fdebug-types-section, -fno-debug-types-section Place debug types in their own section (ELF Only) @@ -1651,6 +1667,8 @@ Synthesize retain and release calls for Objective-C pointers Use EH-safe code when synthesizing retains and releases in -fobjc-arc +.. option:: -fobjc-convert-messages-to-runtime-calls, -fno-objc-convert-messages-to-runtime-calls + .. option:: -fobjc-exceptions, -fno-objc-exceptions Enable Objective-C exceptions @@ -1737,6 +1755,14 @@ Load the named plugin (dynamic shared object) .. option:: -fprofile-dir= +.. option:: -fprofile-exclude-files= + +Instrument only functions from files where names don't match all the regexes separated by a semi-colon + +.. option:: -fprofile-filter-files= + +Instrument only functions from files where names match any regex separated by a semi-colon + .. option:: -fprofile-generate, -fno-profile-generate Generate instrumented code to collect execution counts into default.profraw (overridden by LLVM\_PROFILE\_FILE env var) @@ -1765,6 +1791,10 @@ Generate instrumented code to collect execution counts into (overridden b Use instrumentation data for profile-guided optimization +.. option:: -fprofile-remapping-file=, -fprofile-remapping-file + +Use the remappings described in to match the profile data against names in the program + .. option:: -fprofile-sample-accurate, -fauto-profile-accurate, -fno-profile-sample-accurate Specifies that the sample profile is accurate. If the sample @@ -1876,19 +1906,23 @@ Enable the superword-level parallelism vectorization passes Provide minimal debug info in the object/executable to facilitate online symbolication/stack traces in the absence of .dwo/.dwp files when using Split DWARF +.. option:: -fsplit-lto-unit, -fno-split-lto-unit + +Enables splitting of the LTO unit. + .. option:: -fsplit-stack .. option:: -fstack-protector, -fno-stack-protector -Enable stack protectors for functions potentially vulnerable to stack smashing +Enable stack protectors for some functions vulnerable to stack smashing. This uses a loose heuristic which considers functions vulnerable if they contain a char (or 8bit integer) array or constant sized calls to alloca, which are of greater size than ssp-buffer-size (default: 8 bytes). All variable sized calls to alloca are considered vulnerable .. option:: -fstack-protector-all -Force the usage of stack protectors for all functions +Enable stack protectors for all functions .. option:: -fstack-protector-strong -Use a strong heuristic to apply stack protectors to functions +Enable stack protectors for some functions vulnerable to stack smashing. Compared to -fstack-protector, this uses a stronger heuristic that includes functions containing arrays of any size (and any type), as well as any calls to alloca or the taking of an address from a local variable .. option:: -fstack-size-section, -fno-stack-size-section @@ -1962,6 +1996,10 @@ Specify the function to be called on overflow Process trigraph sequences +.. option:: -ftrivial-auto-var-init= + +Initialize trivial automatic stack variables: uninitialized (default) \| pattern + .. option:: -funique-section-names, -fno-unique-section-names Use unique names for text and data sections (ELF Only) @@ -2000,6 +2038,10 @@ Enable the loop vectorization passes .. option:: -fverbose-asm, -fno-verbose-asm +.. option:: -fvisibility-global-new-delete-hidden + +Give global C++ operator new and delete declarations hidden visibility + .. option:: -fvisibility-inlines-hidden Give inline C++ member functions hidden visibility by default @@ -2162,7 +2204,7 @@ Link stack frames through backchain on System Z .. option:: -mconsole -.. option:: -mcpu=, -mv5 (equivalent to -mcpu=hexagonv5), -mv55 (equivalent to -mcpu=hexagonv55), -mv60 (equivalent to -mcpu=hexagonv60), -mv62 (equivalent to -mcpu=hexagonv62), -mv65 (equivalent to -mcpu=hexagonv65) +.. option:: -mcpu=, -mv5 (equivalent to -mcpu=hexagonv5), -mv55 (equivalent to -mcpu=hexagonv55), -mv60 (equivalent to -mcpu=hexagonv60), -mv62 (equivalent to -mcpu=hexagonv62), -mv65 (equivalent to -mcpu=hexagonv65), -mv66 (equivalent to -mcpu=hexagonv66) .. option:: -mcrc, -mno-crc @@ -2196,6 +2238,8 @@ Enable merging of globals .. option:: -mhwdiv=, --mhwdiv , --mhwdiv= +.. option:: -mhwmult= + .. option:: -miamcu, -mno-iamcu Use Intel MCU ABI @@ -2272,6 +2316,8 @@ Select return address signing scope Use software floating point +.. option:: -mspeculative-load-hardening, -mno-speculative-load-hardening + .. option:: -mstack-alignment= Set the stack alignment @@ -2296,6 +2342,10 @@ The thread model to use, e.g. posix, single (posix by default) .. option:: -mthumb, -mno-thumb +.. option:: -mtls-direct-seg-refs, -mno-tls-direct-seg-refs + +Enable direct TLS access through segment registers (default) + .. option:: -mtune= .. option:: -mtvos-version-min=, -mappletvos-version-min= @@ -2314,41 +2364,33 @@ The thread model to use, e.g. posix, single (posix by default) AARCH64 ------- -.. option:: -ffixed-x1 - -Reserve the x1 register (AArch64 only) - -.. option:: -ffixed-x2 - -Reserve the x2 register (AArch64 only) - -.. option:: -ffixed-x3 +.. option:: -fcall-saved-x10 -Reserve the x3 register (AArch64 only) +Make the x10 register call-saved (AArch64 only) -.. option:: -ffixed-x4 +.. option:: -fcall-saved-x11 -Reserve the x4 register (AArch64 only) +Make the x11 register call-saved (AArch64 only) -.. option:: -ffixed-x5 +.. option:: -fcall-saved-x12 -Reserve the x5 register (AArch64 only) +Make the x12 register call-saved (AArch64 only) -.. option:: -ffixed-x6 +.. option:: -fcall-saved-x13 -Reserve the x6 register (AArch64 only) +Make the x13 register call-saved (AArch64 only) -.. option:: -ffixed-x7 +.. option:: -fcall-saved-x14 -Reserve the x7 register (AArch64 only) +Make the x14 register call-saved (AArch64 only) -.. option:: -ffixed-x18 +.. option:: -fcall-saved-x15 -Reserve the x18 register (AArch64 only) +Make the x15 register call-saved (AArch64 only) -.. option:: -ffixed-x20 +.. option:: -fcall-saved-x18 -Reserve the x20 register (AArch64 only) +Make the x18 register call-saved (AArch64 only) .. option:: -fcall-saved-x8 @@ -2358,33 +2400,41 @@ Make the x8 register call-saved (AArch64 only) Make the x9 register call-saved (AArch64 only) -.. option:: -fcall-saved-x10 +.. option:: -ffixed-x1 -Make the x10 register call-saved (AArch64 only) +Reserve the 1 register (AArch64 only) -.. option:: -fcall-saved-x11 +.. option:: -ffixed-x18 -Make the x11 register call-saved (AArch64 only) +Reserve the 18 register (AArch64 only) -.. option:: -fcall-saved-x12 +.. option:: -ffixed-x2 -Make the x12 register call-saved (AArch64 only) +Reserve the 2 register (AArch64 only) -.. option:: -fcall-saved-x13 +.. option:: -ffixed-x20 -Make the x13 register call-saved (AArch64 only) +Reserve the 20 register (AArch64 only) -.. option:: -fcall-saved-x14 +.. option:: -ffixed-x3 -Make the x14 register call-saved (AArch64 only) +Reserve the 3 register (AArch64 only) -.. option:: -fcall-saved-x15 +.. option:: -ffixed-x4 -Make the x15 register call-saved (AArch64 only) +Reserve the 4 register (AArch64 only) -.. option:: -fcall-saved-x18 +.. option:: -ffixed-x5 -Make the x18 register call-saved (AArch64 only) +Reserve the 5 register (AArch64 only) + +.. option:: -ffixed-x6 + +Reserve the 6 register (AArch64 only) + +.. option:: -ffixed-x7 + +Reserve the 7 register (AArch64 only) .. option:: -mfix-cortex-a53-835769, -mno-fix-cortex-a53-835769 @@ -2396,6 +2446,14 @@ Generate code which only uses the general purpose registers (AArch64 only) AMDGPU ------ +.. option:: -mcode-object-v3, -mno-code-object-v3 + +Enable code object v3 (AMDGPU only) + +.. option:: -msram-ecc, -mno-sram-ecc + +Enable SRAM ECC (AMDGPU only) + .. option:: -mxnack, -mno-xnack Enable XNACK (AMDGPU only) @@ -2594,6 +2652,8 @@ WebAssembly .. option:: -msimd128, -mno-simd128 +.. option:: -munimplemented-simd128, -mno-unimplemented-simd128 + X86 --- .. option:: -m3dnow, -mno-3dnow @@ -2811,6 +2871,10 @@ ___________ .. option:: -ggdb3 +.. option:: -gline-directives-only + +Emit debug line info directives only + .. option:: -gline-tables-only, -g1, -gmlt Emit debug line number tables only @@ -2841,10 +2905,16 @@ Embed source text in DWARF debug sections .. option:: -gpubnames, -gno-pubnames -.. option:: -grecord-command-line, -grecord-gcc-switches, -gno-record-command-line, -gno-record-gcc-switches +.. option:: -grecord-command-line, -gno-record-command-line, -grecord-gcc-switches .. option:: -gsplit-dwarf +.. program:: clang1 +.. option:: -gsplit-dwarf= +.. program:: clang + +Set DWARF fission mode to either 'split' or 'single' + .. option:: -gstrict-dwarf, -gno-strict-dwarf .. option:: -gz From cb2d048c9de8f9085efd9f0a1aca6c39cef5c44b Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Thu, 7 Feb 2019 11:16:32 +0000 Subject: [PATCH 101/125] Generate docs/AttributeReference.rst $ bin/clang-tblgen -gen-attr-docs -I../cfe.src/include ../cfe.src/include/clang/Basic/Attr.td -o ../cfe.src/docs/AttributeReference.rst llvm-svn: 353396 --- clang/docs/AttributeReference.rst | 5175 ++++++++++++++++++++++++++++- 1 file changed, 5169 insertions(+), 6 deletions(-) diff --git a/clang/docs/AttributeReference.rst b/clang/docs/AttributeReference.rst index a763ddeaeb106d..01938f64f56c15 100644 --- a/clang/docs/AttributeReference.rst +++ b/clang/docs/AttributeReference.rst @@ -1,13 +1,5176 @@ .. ------------------------------------------------------------------- NOTE: This file is automatically generated by running clang-tblgen - -gen-attr-docs. Do not edit this file by hand!! The contents for - this file are automatically generated by a server-side process. - - Please do not commit this file. The file exists for local testing - purposes only. + -gen-attr-docs. Do not edit this file by hand!! ------------------------------------------------------------------- =================== Attributes in Clang -=================== \ No newline at end of file +=================== +.. contents:: + :local: + +.. |br| raw:: html + +
+ +Introduction +============ + +This page lists the attributes currently supported by Clang. + +Function Attributes +=================== + + +#pragma omp declare simd +------------------------ +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "","","","","","``omp declare simd``","" + +The `declare simd` construct can be applied to a function to enable the creation +of one or more versions that can process multiple arguments using SIMD +instructions from a single invocation in a SIMD loop. The `declare simd` +directive is a declarative directive. There may be multiple `declare simd` +directives for a function. The use of a `declare simd` construct on a function +enables the creation of SIMD versions of the associated function that can be +used to process multiple arguments from a single invocation from a SIMD loop +concurrently. +The syntax of the `declare simd` construct is as follows: + + .. code-block:: none + + #pragma omp declare simd [clause[[,] clause] ...] new-line + [#pragma omp declare simd [clause[[,] clause] ...] new-line] + [...] + function definition or declaration + +where clause is one of the following: + + .. code-block:: none + + simdlen(length) + linear(argument-list[:constant-linear-step]) + aligned(argument-list[:alignment]) + uniform(argument-list) + inbranch + notinbranch + + +#pragma omp declare target +-------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "","","","","","``omp declare target``","" + +The `declare target` directive specifies that variables and functions are mapped +to a device for OpenMP offload mechanism. + +The syntax of the declare target directive is as follows: + + .. code-block:: c + + #pragma omp declare target new-line + declarations-definition-seq + #pragma omp end declare target new-line + + +_Noreturn +--------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "","","","","``_Noreturn``","","" + +A function declared as ``_Noreturn`` shall not return to its caller. The +compiler will generate a diagnostic for a function declared as ``_Noreturn`` +that appears to be capable of returning to its caller. + + +abi_tag +------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``abi_tag``","``gnu::abi_tag``","","","","","Yes" + +The ``abi_tag`` attribute can be applied to a function, variable, class or +inline namespace declaration to modify the mangled name of the entity. It gives +the ability to distinguish between different versions of the same entity but +with different ABI versions supported. For example, a newer version of a class +could have a different set of data members and thus have a different size. Using +the ``abi_tag`` attribute, it is possible to have different mangled names for +a global variable of the class type. Therefore, the old code could keep using +the old manged name and the new code will use the new mangled name with tags. + + +acquire_capability, acquire_shared_capability +--------------------------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``acquire_capability`` |br| ``acquire_shared_capability`` |br| ``exclusive_lock_function`` |br| ``shared_lock_function``","``clang::acquire_capability`` |br| ``clang::acquire_shared_capability``","","","","","" + +Marks a function as acquiring a capability. + + +alloc_align +----------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``alloc_align``","``gnu::alloc_align``","","","","","" + +Use ``__attribute__((alloc_align())`` on a function +declaration to specify that the return value of the function (which must be a +pointer type) is at least as aligned as the value of the indicated parameter. The +parameter is given by its index in the list of formal parameters; the first +parameter has index 1 unless the function is a C++ non-static member function, +in which case the first parameter has index 2 to account for the implicit ``this`` +parameter. + +.. code-block:: c++ + + // The returned pointer has the alignment specified by the first parameter. + void *a(size_t align) __attribute__((alloc_align(1))); + + // The returned pointer has the alignment specified by the second parameter. + void *b(void *v, size_t align) __attribute__((alloc_align(2))); + + // The returned pointer has the alignment specified by the second visible + // parameter, however it must be adjusted for the implicit 'this' parameter. + void *Foo::b(void *v, size_t align) __attribute__((alloc_align(3))); + +Note that this attribute merely informs the compiler that a function always +returns a sufficiently aligned pointer. It does not cause the compiler to +emit code to enforce that alignment. The behavior is undefined if the returned +poitner is not sufficiently aligned. + + +alloc_size +---------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``alloc_size``","``gnu::alloc_size``","","","","","Yes" + +The ``alloc_size`` attribute can be placed on functions that return pointers in +order to hint to the compiler how many bytes of memory will be available at the +returned pointer. ``alloc_size`` takes one or two arguments. + +- ``alloc_size(N)`` implies that argument number N equals the number of + available bytes at the returned pointer. +- ``alloc_size(N, M)`` implies that the product of argument number N and + argument number M equals the number of available bytes at the returned + pointer. + +Argument numbers are 1-based. + +An example of how to use ``alloc_size`` + +.. code-block:: c + + void *my_malloc(int a) __attribute__((alloc_size(1))); + void *my_calloc(int a, int b) __attribute__((alloc_size(1, 2))); + + int main() { + void *const p = my_malloc(100); + assert(__builtin_object_size(p, 0) == 100); + void *const a = my_calloc(20, 5); + assert(__builtin_object_size(a, 0) == 100); + } + +.. Note:: This attribute works differently in clang than it does in GCC. + Specifically, clang will only trace ``const`` pointers (as above); we give up + on pointers that are not marked as ``const``. In the vast majority of cases, + this is unimportant, because LLVM has support for the ``alloc_size`` + attribute. However, this may cause mildly unintuitive behavior when used with + other attributes, such as ``enable_if``. + + +artificial +---------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``artificial``","``gnu::artificial``","","","","","" + +The ``artificial`` attribute can be applied to an inline function. If such a +function is inlined, the attribute indicates that debuggers should associate +the resulting instructions with the call site, rather than with the +corresponding line within the inlined callee. + + +assert_capability, assert_shared_capability +------------------------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``assert_capability`` |br| ``assert_shared_capability``","``clang::assert_capability`` |br| ``clang::assert_shared_capability``","","","","","" + +Marks a function that dynamically tests whether a capability is held, and halts +the program if it is not held. + + +assume_aligned +-------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``assume_aligned``","``gnu::assume_aligned``","","","","","Yes" + +Use ``__attribute__((assume_aligned([,]))`` on a function +declaration to specify that the return value of the function (which must be a +pointer type) has the specified offset, in bytes, from an address with the +specified alignment. The offset is taken to be zero if omitted. + +.. code-block:: c++ + + // The returned pointer value has 32-byte alignment. + void *a() __attribute__((assume_aligned (32))); + + // The returned pointer value is 4 bytes greater than an address having + // 32-byte alignment. + void *b() __attribute__((assume_aligned (32, 4))); + +Note that this attribute provides information to the compiler regarding a +condition that the code already ensures is true. It does not cause the compiler +to enforce the provided alignment assumption. + + +availability +------------ +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``availability``","``clang::availability``","``clang::availability``","","","","Yes" + +The ``availability`` attribute can be placed on declarations to describe the +lifecycle of that declaration relative to operating system versions. Consider +the function declaration for a hypothetical function ``f``: + +.. code-block:: c++ + + void f(void) __attribute__((availability(macos,introduced=10.4,deprecated=10.6,obsoleted=10.7))); + +The availability attribute states that ``f`` was introduced in macOS 10.4, +deprecated in macOS 10.6, and obsoleted in macOS 10.7. This information +is used by Clang to determine when it is safe to use ``f``: for example, if +Clang is instructed to compile code for macOS 10.5, a call to ``f()`` +succeeds. If Clang is instructed to compile code for macOS 10.6, the call +succeeds but Clang emits a warning specifying that the function is deprecated. +Finally, if Clang is instructed to compile code for macOS 10.7, the call +fails because ``f()`` is no longer available. + +The availability attribute is a comma-separated list starting with the +platform name and then including clauses specifying important milestones in the +declaration's lifetime (in any order) along with additional information. Those +clauses can be: + +introduced=\ *version* + The first version in which this declaration was introduced. + +deprecated=\ *version* + The first version in which this declaration was deprecated, meaning that + users should migrate away from this API. + +obsoleted=\ *version* + The first version in which this declaration was obsoleted, meaning that it + was removed completely and can no longer be used. + +unavailable + This declaration is never available on this platform. + +message=\ *string-literal* + Additional message text that Clang will provide when emitting a warning or + error about use of a deprecated or obsoleted declaration. Useful to direct + users to replacement APIs. + +replacement=\ *string-literal* + Additional message text that Clang will use to provide Fix-It when emitting + a warning about use of a deprecated declaration. The Fix-It will replace + the deprecated declaration with the new declaration specified. + +Multiple availability attributes can be placed on a declaration, which may +correspond to different platforms. Only the availability attribute with the +platform corresponding to the target platform will be used; any others will be +ignored. If no availability attribute specifies availability for the current +target platform, the availability attributes are ignored. Supported platforms +are: + +``ios`` + Apple's iOS operating system. The minimum deployment target is specified by + the ``-mios-version-min=*version*`` or ``-miphoneos-version-min=*version*`` + command-line arguments. + +``macos`` + Apple's macOS operating system. The minimum deployment target is + specified by the ``-mmacosx-version-min=*version*`` command-line argument. + ``macosx`` is supported for backward-compatibility reasons, but it is + deprecated. + +``tvos`` + Apple's tvOS operating system. The minimum deployment target is specified by + the ``-mtvos-version-min=*version*`` command-line argument. + +``watchos`` + Apple's watchOS operating system. The minimum deployment target is specified by + the ``-mwatchos-version-min=*version*`` command-line argument. + +A declaration can typically be used even when deploying back to a platform +version prior to when the declaration was introduced. When this happens, the +declaration is `weakly linked +`_, +as if the ``weak_import`` attribute were added to the declaration. A +weakly-linked declaration may or may not be present a run-time, and a program +can determine whether the declaration is present by checking whether the +address of that declaration is non-NULL. + +The flag ``strict`` disallows using API when deploying back to a +platform version prior to when the declaration was introduced. An +attempt to use such API before its introduction causes a hard error. +Weakly-linking is almost always a better API choice, since it allows +users to query availability at runtime. + +If there are multiple declarations of the same entity, the availability +attributes must either match on a per-platform basis or later +declarations must not have availability attributes for that +platform. For example: + +.. code-block:: c + + void g(void) __attribute__((availability(macos,introduced=10.4))); + void g(void) __attribute__((availability(macos,introduced=10.4))); // okay, matches + void g(void) __attribute__((availability(ios,introduced=4.0))); // okay, adds a new platform + void g(void); // okay, inherits both macos and ios availability from above. + void g(void) __attribute__((availability(macos,introduced=10.5))); // error: mismatch + +When one method overrides another, the overriding method can be more widely available than the overridden method, e.g.,: + +.. code-block:: objc + + @interface A + - (id)method __attribute__((availability(macos,introduced=10.4))); + - (id)method2 __attribute__((availability(macos,introduced=10.4))); + @end + + @interface B : A + - (id)method __attribute__((availability(macos,introduced=10.3))); // okay: method moved into base class later + - (id)method __attribute__((availability(macos,introduced=10.5))); // error: this method was available via the base class in 10.4 + @end + +Starting with the macOS 10.12 SDK, the ``API_AVAILABLE`` macro from +```` can simplify the spelling: + +.. code-block:: objc + + @interface A + - (id)method API_AVAILABLE(macos(10.11))); + - (id)otherMethod API_AVAILABLE(macos(10.11), ios(11.0)); + @end + +Also see the documentation for `@available +`_ + + +carries_dependency +------------------ +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``carries_dependency``","``carries_dependency``","","","","","Yes" + +The ``carries_dependency`` attribute specifies dependency propagation into and +out of functions. + +When specified on a function or Objective-C method, the ``carries_dependency`` +attribute means that the return value carries a dependency out of the function, +so that the implementation need not constrain ordering upon return from that +function. Implementations of the function and its caller may choose to preserve +dependencies instead of emitting memory ordering instructions such as fences. + +Note, this attribute does not change the meaning of the program, but may result +in generation of more efficient code. + + +cf_consumed +----------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``cf_consumed``","``clang::cf_consumed``","``clang::cf_consumed``","","","","Yes" + +The behavior of a function with respect to reference counting for Foundation +(Objective-C), CoreFoundation (C) and OSObject (C++) is determined by a naming +convention (e.g. functions starting with "get" are assumed to return at +``+0``). + +It can be overriden using a family of the following attributes. In +Objective-C, the annotation ``__attribute__((ns_returns_retained))`` applied to +a function communicates that the object is returned at ``+1``, and the caller +is responsible for freeing it. +Similiarly, the annotation ``__attribute__((ns_returns_not_retained))`` +specifies that the object is returned at ``+0`` and the ownership remains with +the callee. +The annotation ``__attribute__((ns_consumes_self))`` specifies that +the Objective-C method call consumes the reference to ``self``, e.g. by +attaching it to a supplied parameter. +Additionally, parameters can have an annotation +``__attribute__((ns_consumed))``, which specifies that passing an owned object +as that parameter effectively transfers the ownership, and the caller is no +longer responsible for it. +These attributes affect code generation when interacting with ARC code, and +they are used by the Clang Static Analyzer. + +In C programs using CoreFoundation, a similar set of attributes: +``__attribute__((cf_returns_not_retained))``, +``__attribute__((cf_returns_retained))`` and ``__attribute__((cf_consumed))`` +have the same respective semantics when applied to CoreFoundation objects. +These attributes affect code generation when interacting with ARC code, and +they are used by the Clang Static Analyzer. + +Finally, in C++ interacting with XNU kernel (objects inheriting from OSObject), +the same attribute family is present: +``__attribute__((os_returns_not_retained))``, +``__attribute__((os_returns_retained))`` and ``__attribute__((os_consumed))``, +with the same respective semantics. +Similar to ``__attribute__((ns_consumes_self))``, +``__attribute__((os_consumes_this))`` specifies that the method call consumes +the reference to "this" (e.g., when attaching it to a different object supplied +as a parameter). +Out parameters (parameters the function is meant to write into, +either via pointers-to-pointers or references-to-pointers) +may be annotated with ``__attribute__((os_returns_retained))`` +or ``__attribute__((os_returns_not_retained))`` which specifies that the object +written into the out parameter should (or respectively should not) be released +after use. +Since often out parameters may or may not be written depending on the exit +code of the function, +annotations ``__attribute__((os_returns_retained_on_zero))`` +and ``__attribute__((os_returns_retained_on_non_zero))`` specify that +an out parameter at ``+1`` is written if and only if the function returns a zero +(respectively non-zero) error code. +Observe that return-code-dependent out parameter annotations are only +available for retained out parameters, as non-retained object do not have to be +released by the callee. +These attributes are only used by the Clang Static Analyzer. + +The family of attributes ``X_returns_X_retained`` can be added to functions, +C++ methods, and Objective-C methods and properties. +Attributes ``X_consumed`` can be added to parameters of methods, functions, +and Objective-C methods. + + +cf_returns_not_retained +----------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``cf_returns_not_retained``","``clang::cf_returns_not_retained``","``clang::cf_returns_not_retained``","","","","" + +The behavior of a function with respect to reference counting for Foundation +(Objective-C), CoreFoundation (C) and OSObject (C++) is determined by a naming +convention (e.g. functions starting with "get" are assumed to return at +``+0``). + +It can be overriden using a family of the following attributes. In +Objective-C, the annotation ``__attribute__((ns_returns_retained))`` applied to +a function communicates that the object is returned at ``+1``, and the caller +is responsible for freeing it. +Similiarly, the annotation ``__attribute__((ns_returns_not_retained))`` +specifies that the object is returned at ``+0`` and the ownership remains with +the callee. +The annotation ``__attribute__((ns_consumes_self))`` specifies that +the Objective-C method call consumes the reference to ``self``, e.g. by +attaching it to a supplied parameter. +Additionally, parameters can have an annotation +``__attribute__((ns_consumed))``, which specifies that passing an owned object +as that parameter effectively transfers the ownership, and the caller is no +longer responsible for it. +These attributes affect code generation when interacting with ARC code, and +they are used by the Clang Static Analyzer. + +In C programs using CoreFoundation, a similar set of attributes: +``__attribute__((cf_returns_not_retained))``, +``__attribute__((cf_returns_retained))`` and ``__attribute__((cf_consumed))`` +have the same respective semantics when applied to CoreFoundation objects. +These attributes affect code generation when interacting with ARC code, and +they are used by the Clang Static Analyzer. + +Finally, in C++ interacting with XNU kernel (objects inheriting from OSObject), +the same attribute family is present: +``__attribute__((os_returns_not_retained))``, +``__attribute__((os_returns_retained))`` and ``__attribute__((os_consumed))``, +with the same respective semantics. +Similar to ``__attribute__((ns_consumes_self))``, +``__attribute__((os_consumes_this))`` specifies that the method call consumes +the reference to "this" (e.g., when attaching it to a different object supplied +as a parameter). +Out parameters (parameters the function is meant to write into, +either via pointers-to-pointers or references-to-pointers) +may be annotated with ``__attribute__((os_returns_retained))`` +or ``__attribute__((os_returns_not_retained))`` which specifies that the object +written into the out parameter should (or respectively should not) be released +after use. +Since often out parameters may or may not be written depending on the exit +code of the function, +annotations ``__attribute__((os_returns_retained_on_zero))`` +and ``__attribute__((os_returns_retained_on_non_zero))`` specify that +an out parameter at ``+1`` is written if and only if the function returns a zero +(respectively non-zero) error code. +Observe that return-code-dependent out parameter annotations are only +available for retained out parameters, as non-retained object do not have to be +released by the callee. +These attributes are only used by the Clang Static Analyzer. + +The family of attributes ``X_returns_X_retained`` can be added to functions, +C++ methods, and Objective-C methods and properties. +Attributes ``X_consumed`` can be added to parameters of methods, functions, +and Objective-C methods. + + +cf_returns_retained +------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``cf_returns_retained``","``clang::cf_returns_retained``","``clang::cf_returns_retained``","","","","" + +The behavior of a function with respect to reference counting for Foundation +(Objective-C), CoreFoundation (C) and OSObject (C++) is determined by a naming +convention (e.g. functions starting with "get" are assumed to return at +``+0``). + +It can be overriden using a family of the following attributes. In +Objective-C, the annotation ``__attribute__((ns_returns_retained))`` applied to +a function communicates that the object is returned at ``+1``, and the caller +is responsible for freeing it. +Similiarly, the annotation ``__attribute__((ns_returns_not_retained))`` +specifies that the object is returned at ``+0`` and the ownership remains with +the callee. +The annotation ``__attribute__((ns_consumes_self))`` specifies that +the Objective-C method call consumes the reference to ``self``, e.g. by +attaching it to a supplied parameter. +Additionally, parameters can have an annotation +``__attribute__((ns_consumed))``, which specifies that passing an owned object +as that parameter effectively transfers the ownership, and the caller is no +longer responsible for it. +These attributes affect code generation when interacting with ARC code, and +they are used by the Clang Static Analyzer. + +In C programs using CoreFoundation, a similar set of attributes: +``__attribute__((cf_returns_not_retained))``, +``__attribute__((cf_returns_retained))`` and ``__attribute__((cf_consumed))`` +have the same respective semantics when applied to CoreFoundation objects. +These attributes affect code generation when interacting with ARC code, and +they are used by the Clang Static Analyzer. + +Finally, in C++ interacting with XNU kernel (objects inheriting from OSObject), +the same attribute family is present: +``__attribute__((os_returns_not_retained))``, +``__attribute__((os_returns_retained))`` and ``__attribute__((os_consumed))``, +with the same respective semantics. +Similar to ``__attribute__((ns_consumes_self))``, +``__attribute__((os_consumes_this))`` specifies that the method call consumes +the reference to "this" (e.g., when attaching it to a different object supplied +as a parameter). +Out parameters (parameters the function is meant to write into, +either via pointers-to-pointers or references-to-pointers) +may be annotated with ``__attribute__((os_returns_retained))`` +or ``__attribute__((os_returns_not_retained))`` which specifies that the object +written into the out parameter should (or respectively should not) be released +after use. +Since often out parameters may or may not be written depending on the exit +code of the function, +annotations ``__attribute__((os_returns_retained_on_zero))`` +and ``__attribute__((os_returns_retained_on_non_zero))`` specify that +an out parameter at ``+1`` is written if and only if the function returns a zero +(respectively non-zero) error code. +Observe that return-code-dependent out parameter annotations are only +available for retained out parameters, as non-retained object do not have to be +released by the callee. +These attributes are only used by the Clang Static Analyzer. + +The family of attributes ``X_returns_X_retained`` can be added to functions, +C++ methods, and Objective-C methods and properties. +Attributes ``X_consumed`` can be added to parameters of methods, functions, +and Objective-C methods. + + +code_seg +-------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "","","","``code_seg``","","","" + +The ``__declspec(code_seg)`` attribute enables the placement of code into separate +named segments that can be paged or locked in memory individually. This attribute +is used to control the placement of instantiated templates and compiler-generated +code. See the documentation for `__declspec(code_seg)`_ on MSDN. + +.. _`__declspec(code_seg)`: http://msdn.microsoft.com/en-us/library/dn636922.aspx + + +convergent +---------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``convergent``","``clang::convergent``","``clang::convergent``","","","","Yes" + +The ``convergent`` attribute can be placed on a function declaration. It is +translated into the LLVM ``convergent`` attribute, which indicates that the call +instructions of a function with this attribute cannot be made control-dependent +on any additional values. + +In languages designed for SPMD/SIMT programming model, e.g. OpenCL or CUDA, +the call instructions of a function with this attribute must be executed by +all work items or threads in a work group or sub group. + +This attribute is different from ``noduplicate`` because it allows duplicating +function calls if it can be proved that the duplicated function calls are +not made control-dependent on any additional values, e.g., unrolling a loop +executed by all work items. + +Sample usage: +.. code-block:: c + + void convfunc(void) __attribute__((convergent)); + // Setting it as a C++11 attribute is also valid in a C++ program. + // void convfunc(void) [[clang::convergent]]; + + +cpu_dispatch +------------ +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``cpu_dispatch``","``clang::cpu_dispatch``","``clang::cpu_dispatch``","``cpu_dispatch``","","","Yes" + +The ``cpu_specific`` and ``cpu_dispatch`` attributes are used to define and +resolve multiversioned functions. This form of multiversioning provides a +mechanism for declaring versions across translation units and manually +specifying the resolved function list. A specified CPU defines a set of minimum +features that are required for the function to be called. The result of this is +that future processors execute the most restrictive version of the function the +new processor can execute. + +Function versions are defined with ``cpu_specific``, which takes one or more CPU +names as a parameter. For example: + +.. code-block:: c + + // Declares and defines the ivybridge version of single_cpu. + __attribute__((cpu_specific(ivybridge))) + void single_cpu(void){} + + // Declares and defines the atom version of single_cpu. + __attribute__((cpu_specific(atom))) + void single_cpu(void){} + + // Declares and defines both the ivybridge and atom version of multi_cpu. + __attribute__((cpu_specific(ivybridge, atom))) + void multi_cpu(void){} + +A dispatching (or resolving) function can be declared anywhere in a project's +source code with ``cpu_dispatch``. This attribute takes one or more CPU names +as a parameter (like ``cpu_specific``). Functions marked with ``cpu_dispatch`` +are not expected to be defined, only declared. If such a marked function has a +definition, any side effects of the function are ignored; trivial function +bodies are permissible for ICC compatibility. + +.. code-block:: c + + // Creates a resolver for single_cpu above. + __attribute__((cpu_dispatch(ivybridge, atom))) + void single_cpu(void){} + + // Creates a resolver for multi_cpu, but adds a 3rd version defined in another + // translation unit. + __attribute__((cpu_dispatch(ivybridge, atom, sandybridge))) + void multi_cpu(void){} + +Note that it is possible to have a resolving function that dispatches based on +more or fewer options than are present in the program. Specifying fewer will +result in the omitted options not being considered during resolution. Specifying +a version for resolution that isn't defined in the program will result in a +linking failure. + +It is also possible to specify a CPU name of ``generic`` which will be resolved +if the executing processor doesn't satisfy the features required in the CPU +name. The behavior of a program executing on a processor that doesn't satisfy +any option of a multiversioned function is undefined. + + +cpu_specific +------------ +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``cpu_specific``","``clang::cpu_specific``","``clang::cpu_specific``","``cpu_specific``","","","Yes" + +The ``cpu_specific`` and ``cpu_dispatch`` attributes are used to define and +resolve multiversioned functions. This form of multiversioning provides a +mechanism for declaring versions across translation units and manually +specifying the resolved function list. A specified CPU defines a set of minimum +features that are required for the function to be called. The result of this is +that future processors execute the most restrictive version of the function the +new processor can execute. + +Function versions are defined with ``cpu_specific``, which takes one or more CPU +names as a parameter. For example: + +.. code-block:: c + + // Declares and defines the ivybridge version of single_cpu. + __attribute__((cpu_specific(ivybridge))) + void single_cpu(void){} + + // Declares and defines the atom version of single_cpu. + __attribute__((cpu_specific(atom))) + void single_cpu(void){} + + // Declares and defines both the ivybridge and atom version of multi_cpu. + __attribute__((cpu_specific(ivybridge, atom))) + void multi_cpu(void){} + +A dispatching (or resolving) function can be declared anywhere in a project's +source code with ``cpu_dispatch``. This attribute takes one or more CPU names +as a parameter (like ``cpu_specific``). Functions marked with ``cpu_dispatch`` +are not expected to be defined, only declared. If such a marked function has a +definition, any side effects of the function are ignored; trivial function +bodies are permissible for ICC compatibility. + +.. code-block:: c + + // Creates a resolver for single_cpu above. + __attribute__((cpu_dispatch(ivybridge, atom))) + void single_cpu(void){} + + // Creates a resolver for multi_cpu, but adds a 3rd version defined in another + // translation unit. + __attribute__((cpu_dispatch(ivybridge, atom, sandybridge))) + void multi_cpu(void){} + +Note that it is possible to have a resolving function that dispatches based on +more or fewer options than are present in the program. Specifying fewer will +result in the omitted options not being considered during resolution. Specifying +a version for resolution that isn't defined in the program will result in a +linking failure. + +It is also possible to specify a CPU name of ``generic`` which will be resolved +if the executing processor doesn't satisfy the features required in the CPU +name. The behavior of a program executing on a processor that doesn't satisfy +any option of a multiversioned function is undefined. + + +deprecated +---------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``deprecated``","``gnu::deprecated`` |br| ``deprecated``","``deprecated``","``deprecated``","","","" + +The ``deprecated`` attribute can be applied to a function, a variable, or a +type. This is useful when identifying functions, variables, or types that are +expected to be removed in a future version of a program. + +Consider the function declaration for a hypothetical function ``f``: + +.. code-block:: c++ + + void f(void) __attribute__((deprecated("message", "replacement"))); + +When spelled as `__attribute__((deprecated))`, the deprecated attribute can have +two optional string arguments. The first one is the message to display when +emitting the warning; the second one enables the compiler to provide a Fix-It +to replace the deprecated name with a new name. Otherwise, when spelled as +`[[gnu::deprecated]] or [[deprecated]]`, the attribute can have one optional +string argument which is the message to display when emitting the warning. + + +diagnose_if +----------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``diagnose_if``","","","","","","" + +The ``diagnose_if`` attribute can be placed on function declarations to emit +warnings or errors at compile-time if calls to the attributed function meet +certain user-defined criteria. For example: + +.. code-block:: c + + int abs(int a) + __attribute__((diagnose_if(a >= 0, "Redundant abs call", "warning"))); + int must_abs(int a) + __attribute__((diagnose_if(a >= 0, "Redundant abs call", "error"))); + + int val = abs(1); // warning: Redundant abs call + int val2 = must_abs(1); // error: Redundant abs call + int val3 = abs(val); + int val4 = must_abs(val); // Because run-time checks are not emitted for + // diagnose_if attributes, this executes without + // issue. + + +``diagnose_if`` is closely related to ``enable_if``, with a few key differences: + +* Overload resolution is not aware of ``diagnose_if`` attributes: they're + considered only after we select the best candidate from a given candidate set. +* Function declarations that differ only in their ``diagnose_if`` attributes are + considered to be redeclarations of the same function (not overloads). +* If the condition provided to ``diagnose_if`` cannot be evaluated, no + diagnostic will be emitted. + +Otherwise, ``diagnose_if`` is essentially the logical negation of ``enable_if``. + +As a result of bullet number two, ``diagnose_if`` attributes will stack on the +same function. For example: + +.. code-block:: c + + int foo() __attribute__((diagnose_if(1, "diag1", "warning"))); + int foo() __attribute__((diagnose_if(1, "diag2", "warning"))); + + int bar = foo(); // warning: diag1 + // warning: diag2 + int (*fooptr)(void) = foo; // warning: diag1 + // warning: diag2 + + constexpr int supportsAPILevel(int N) { return N < 5; } + int baz(int a) + __attribute__((diagnose_if(!supportsAPILevel(10), + "Upgrade to API level 10 to use baz", "error"))); + int baz(int a) + __attribute__((diagnose_if(!a, "0 is not recommended.", "warning"))); + + int (*bazptr)(int) = baz; // error: Upgrade to API level 10 to use baz + int v = baz(0); // error: Upgrade to API level 10 to use baz + +Query for this feature with ``__has_attribute(diagnose_if)``. + + +disable_tail_calls +------------------ +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``disable_tail_calls``","``clang::disable_tail_calls``","``clang::disable_tail_calls``","","","","Yes" + +The ``disable_tail_calls`` attribute instructs the backend to not perform tail call optimization inside the marked function. + +For example: + + .. code-block:: c + + int callee(int); + + int foo(int a) __attribute__((disable_tail_calls)) { + return callee(a); // This call is not tail-call optimized. + } + +Marking virtual functions as ``disable_tail_calls`` is legal. + + .. code-block:: c++ + + int callee(int); + + class Base { + public: + [[clang::disable_tail_calls]] virtual int foo1() { + return callee(); // This call is not tail-call optimized. + } + }; + + class Derived1 : public Base { + public: + int foo1() override { + return callee(); // This call is tail-call optimized. + } + }; + + +enable_if +--------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``enable_if``","","","","","","Yes" + +.. Note:: Some features of this attribute are experimental. The meaning of + multiple enable_if attributes on a single declaration is subject to change in + a future version of clang. Also, the ABI is not standardized and the name + mangling may change in future versions. To avoid that, use asm labels. + +The ``enable_if`` attribute can be placed on function declarations to control +which overload is selected based on the values of the function's arguments. +When combined with the ``overloadable`` attribute, this feature is also +available in C. + +.. code-block:: c++ + + int isdigit(int c); + int isdigit(int c) __attribute__((enable_if(c <= -1 || c > 255, "chosen when 'c' is out of range"))) __attribute__((unavailable("'c' must have the value of an unsigned char or EOF"))); + + void foo(char c) { + isdigit(c); + isdigit(10); + isdigit(-10); // results in a compile-time error. + } + +The enable_if attribute takes two arguments, the first is an expression written +in terms of the function parameters, the second is a string explaining why this +overload candidate could not be selected to be displayed in diagnostics. The +expression is part of the function signature for the purposes of determining +whether it is a redeclaration (following the rules used when determining +whether a C++ template specialization is ODR-equivalent), but is not part of +the type. + +The enable_if expression is evaluated as if it were the body of a +bool-returning constexpr function declared with the arguments of the function +it is being applied to, then called with the parameters at the call site. If the +result is false or could not be determined through constant expression +evaluation, then this overload will not be chosen and the provided string may +be used in a diagnostic if the compile fails as a result. + +Because the enable_if expression is an unevaluated context, there are no global +state changes, nor the ability to pass information from the enable_if +expression to the function body. For example, suppose we want calls to +strnlen(strbuf, maxlen) to resolve to strnlen_chk(strbuf, maxlen, size of +strbuf) only if the size of strbuf can be determined: + +.. code-block:: c++ + + __attribute__((always_inline)) + static inline size_t strnlen(const char *s, size_t maxlen) + __attribute__((overloadable)) + __attribute__((enable_if(__builtin_object_size(s, 0) != -1))), + "chosen when the buffer size is known but 'maxlen' is not"))) + { + return strnlen_chk(s, maxlen, __builtin_object_size(s, 0)); + } + +Multiple enable_if attributes may be applied to a single declaration. In this +case, the enable_if expressions are evaluated from left to right in the +following manner. First, the candidates whose enable_if expressions evaluate to +false or cannot be evaluated are discarded. If the remaining candidates do not +share ODR-equivalent enable_if expressions, the overload resolution is +ambiguous. Otherwise, enable_if overload resolution continues with the next +enable_if attribute on the candidates that have not been discarded and have +remaining enable_if attributes. In this way, we pick the most specific +overload out of a number of viable overloads using enable_if. + +.. code-block:: c++ + + void f() __attribute__((enable_if(true, ""))); // #1 + void f() __attribute__((enable_if(true, ""))) __attribute__((enable_if(true, ""))); // #2 + + void g(int i, int j) __attribute__((enable_if(i, ""))); // #1 + void g(int i, int j) __attribute__((enable_if(j, ""))) __attribute__((enable_if(true))); // #2 + +In this example, a call to f() is always resolved to #2, as the first enable_if +expression is ODR-equivalent for both declarations, but #1 does not have another +enable_if expression to continue evaluating, so the next round of evaluation has +only a single candidate. In a call to g(1, 1), the call is ambiguous even though +#2 has more enable_if attributes, because the first enable_if expressions are +not ODR-equivalent. + +Query for this feature with ``__has_attribute(enable_if)``. + +Note that functions with one or more ``enable_if`` attributes may not have +their address taken, unless all of the conditions specified by said +``enable_if`` are constants that evaluate to ``true``. For example: + +.. code-block:: c + + const int TrueConstant = 1; + const int FalseConstant = 0; + int f(int a) __attribute__((enable_if(a > 0, ""))); + int g(int a) __attribute__((enable_if(a == 0 || a != 0, ""))); + int h(int a) __attribute__((enable_if(1, ""))); + int i(int a) __attribute__((enable_if(TrueConstant, ""))); + int j(int a) __attribute__((enable_if(FalseConstant, ""))); + + void fn() { + int (*ptr)(int); + ptr = &f; // error: 'a > 0' is not always true + ptr = &g; // error: 'a == 0 || a != 0' is not a truthy constant + ptr = &h; // OK: 1 is a truthy constant + ptr = &i; // OK: 'TrueConstant' is a truthy constant + ptr = &j; // error: 'FalseConstant' is a constant, but not truthy + } + +Because ``enable_if`` evaluation happens during overload resolution, +``enable_if`` may give unintuitive results when used with templates, depending +on when overloads are resolved. In the example below, clang will emit a +diagnostic about no viable overloads for ``foo`` in ``bar``, but not in ``baz``: + +.. code-block:: c++ + + double foo(int i) __attribute__((enable_if(i > 0, ""))); + void *foo(int i) __attribute__((enable_if(i <= 0, ""))); + template + auto bar() { return foo(I); } + + template + auto baz() { return foo(T::number); } + + struct WithNumber { constexpr static int number = 1; }; + void callThem() { + bar(); + baz(); + } + +This is because, in ``bar``, ``foo`` is resolved prior to template +instantiation, so the value for ``I`` isn't known (thus, both ``enable_if`` +conditions for ``foo`` fail). However, in ``baz``, ``foo`` is resolved during +template instantiation, so the value for ``T::number`` is known. + + +exclude_from_explicit_instantiation +----------------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``exclude_from_explicit_instantiation``","``clang::exclude_from_explicit_instantiation``","``clang::exclude_from_explicit_instantiation``","","","","Yes" + +The ``exclude_from_explicit_instantiation`` attribute opts-out a member of a +class template from being part of explicit template instantiations of that +class template. This means that an explicit instantiation will not instantiate +members of the class template marked with the attribute, but also that code +where an extern template declaration of the enclosing class template is visible +will not take for granted that an external instantiation of the class template +would provide those members (which would otherwise be a link error, since the +explicit instantiation won't provide those members). For example, let's say we +don't want the ``data()`` method to be part of libc++'s ABI. To make sure it +is not exported from the dylib, we give it hidden visibility: + + .. code-block:: c++ + + // in + template + class basic_string { + public: + __attribute__((__visibility__("hidden"))) + const value_type* data() const noexcept { ... } + }; + + template class basic_string; + +Since an explicit template instantiation declaration for ``basic_string`` +is provided, the compiler is free to assume that ``basic_string::data()`` +will be provided by another translation unit, and it is free to produce an +external call to this function. However, since ``data()`` has hidden visibility +and the explicit template instantiation is provided in a shared library (as +opposed to simply another translation unit), ``basic_string::data()`` +won't be found and a link error will ensue. This happens because the compiler +assumes that ``basic_string::data()`` is part of the explicit template +instantiation declaration, when it really isn't. To tell the compiler that +``data()`` is not part of the explicit template instantiation declaration, the +``exclude_from_explicit_instantiation`` attribute can be used: + + .. code-block:: c++ + + // in + template + class basic_string { + public: + __attribute__((__visibility__("hidden"))) + __attribute__((exclude_from_explicit_instantiation)) + const value_type* data() const noexcept { ... } + }; + + template class basic_string; + +Now, the compiler won't assume that ``basic_string::data()`` is provided +externally despite there being an explicit template instantiation declaration: +the compiler will implicitly instantiate ``basic_string::data()`` in the +TUs where it is used. + +This attribute can be used on static and non-static member functions of class +templates, static data members of class templates and member classes of class +templates. + + +external_source_symbol +---------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``external_source_symbol``","``clang::external_source_symbol``","``clang::external_source_symbol``","","","","Yes" + +The ``external_source_symbol`` attribute specifies that a declaration originates +from an external source and describes the nature of that source. + +The fact that Clang is capable of recognizing declarations that were defined +externally can be used to provide better tooling support for mixed-language +projects or projects that rely on auto-generated code. For instance, an IDE that +uses Clang and that supports mixed-language projects can use this attribute to +provide a correct 'jump-to-definition' feature. For a concrete example, +consider a protocol that's defined in a Swift file: + +.. code-block:: swift + + @objc public protocol SwiftProtocol { + func method() + } + +This protocol can be used from Objective-C code by including a header file that +was generated by the Swift compiler. The declarations in that header can use +the ``external_source_symbol`` attribute to make Clang aware of the fact +that ``SwiftProtocol`` actually originates from a Swift module: + +.. code-block:: objc + + __attribute__((external_source_symbol(language="Swift",defined_in="module"))) + @protocol SwiftProtocol + @required + - (void) method; + @end + +Consequently, when 'jump-to-definition' is performed at a location that +references ``SwiftProtocol``, the IDE can jump to the original definition in +the Swift source file rather than jumping to the Objective-C declaration in the +auto-generated header file. + +The ``external_source_symbol`` attribute is a comma-separated list that includes +clauses that describe the origin and the nature of the particular declaration. +Those clauses can be: + +language=\ *string-literal* + The name of the source language in which this declaration was defined. + +defined_in=\ *string-literal* + The name of the source container in which the declaration was defined. The + exact definition of source container is language-specific, e.g. Swift's + source containers are modules, so ``defined_in`` should specify the Swift + module name. + +generated_declaration + This declaration was automatically generated by some tool. + +The clauses can be specified in any order. The clauses that are listed above are +all optional, but the attribute has to have at least one clause. + + +flatten +------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``flatten``","``gnu::flatten``","","","","","Yes" + +The ``flatten`` attribute causes calls within the attributed function to +be inlined unless it is impossible to do so, for example if the body of the +callee is unavailable or if the callee has the ``noinline`` attribute. + + +force_align_arg_pointer +----------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``force_align_arg_pointer``","``gnu::force_align_arg_pointer``","","","","","" + +Use this attribute to force stack alignment. + +Legacy x86 code uses 4-byte stack alignment. Newer aligned SSE instructions +(like 'movaps') that work with the stack require operands to be 16-byte aligned. +This attribute realigns the stack in the function prologue to make sure the +stack can be used with SSE instructions. + +Note that the x86_64 ABI forces 16-byte stack alignment at the call site. +Because of this, 'force_align_arg_pointer' is not needed on x86_64, except in +rare cases where the caller does not align the stack properly (e.g. flow +jumps from i386 arch code). + + .. code-block:: c + + __attribute__ ((force_align_arg_pointer)) + void f () { + ... + } + + +format +------ +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``format``","``gnu::format``","","","","","" + +Clang supports the ``format`` attribute, which indicates that the function +accepts a ``printf`` or ``scanf``-like format string and corresponding +arguments or a ``va_list`` that contains these arguments. + +Please see `GCC documentation about format attribute +`_ to find details +about attribute syntax. + +Clang implements two kinds of checks with this attribute. + +#. Clang checks that the function with the ``format`` attribute is called with + a format string that uses format specifiers that are allowed, and that + arguments match the format string. This is the ``-Wformat`` warning, it is + on by default. + +#. Clang checks that the format string argument is a literal string. This is + the ``-Wformat-nonliteral`` warning, it is off by default. + + Clang implements this mostly the same way as GCC, but there is a difference + for functions that accept a ``va_list`` argument (for example, ``vprintf``). + GCC does not emit ``-Wformat-nonliteral`` warning for calls to such + functions. Clang does not warn if the format string comes from a function + parameter, where the function is annotated with a compatible attribute, + otherwise it warns. For example: + + .. code-block:: c + + __attribute__((__format__ (__scanf__, 1, 3))) + void foo(const char* s, char *buf, ...) { + va_list ap; + va_start(ap, buf); + + vprintf(s, ap); // warning: format string is not a string literal + } + + In this case we warn because ``s`` contains a format string for a + ``scanf``-like function, but it is passed to a ``printf``-like function. + + If the attribute is removed, clang still warns, because the format string is + not a string literal. + + Another example: + + .. code-block:: c + + __attribute__((__format__ (__printf__, 1, 3))) + void foo(const char* s, char *buf, ...) { + va_list ap; + va_start(ap, buf); + + vprintf(s, ap); // warning + } + + In this case Clang does not warn because the format string ``s`` and + the corresponding arguments are annotated. If the arguments are + incorrect, the caller of ``foo`` will receive a warning. + + +gnu_inline +---------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``gnu_inline``","``gnu::gnu_inline``","","","","","Yes" + +The ``gnu_inline`` changes the meaning of ``extern inline`` to use GNU inline +semantics, meaning: + +* If any declaration that is declared ``inline`` is not declared ``extern``, +then the ``inline`` keyword is just a hint. In particular, an out-of-line +definition is still emitted for a function with external linkage, even if all +call sites are inlined, unlike in C99 and C++ inline semantics. + +* If all declarations that are declared ``inline`` are also declared +``extern``, then the function body is present only for inlining and no +out-of-line version is emitted. + +Some important consequences: ``static inline`` emits an out-of-line +version if needed, a plain ``inline`` definition emits an out-of-line version +always, and an ``extern inline`` definition (in a header) followed by a +(non-``extern``) ``inline`` declaration in a source file emits an out-of-line +version of the function in that source file but provides the function body for +inlining to all includers of the header. + +Either ``__GNUC_GNU_INLINE__`` (GNU inline semantics) or +``__GNUC_STDC_INLINE__`` (C99 semantics) will be defined (they are mutually +exclusive). If ``__GNUC_STDC_INLINE__`` is defined, then the ``gnu_inline`` +function attribute can be used to get GNU inline semantics on a per function +basis. If ``__GNUC_GNU_INLINE__`` is defined, then the translation unit is +already being compiled with GNU inline semantics as the implied default. It is +unspecified which macro is defined in a C++ compilation. + +GNU inline semantics are the default behavior with ``-std=gnu89``, +``-std=c89``, ``-std=c94``, or ``-fgnu89-inline``. + + +ifunc +----- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``ifunc``","``gnu::ifunc``","","","","","Yes" + +``__attribute__((ifunc("resolver")))`` is used to mark that the address of a declaration should be resolved at runtime by calling a resolver function. + +The symbol name of the resolver function is given in quotes. A function with this name (after mangling) must be defined in the current translation unit; it may be ``static``. The resolver function should return a pointer. + +The ``ifunc`` attribute may only be used on a function declaration. A function declaration with an ``ifunc`` attribute is considered to be a definition of the declared entity. The entity must not have weak linkage; for example, in C++, it cannot be applied to a declaration if a definition at that location would be considered inline. + +Not all targets support this attribute. ELF target support depends on both the linker and runtime linker, and is available in at least lld 4.0 and later, binutils 2.20.1 and later, glibc v2.11.1 and later, and FreeBSD 9.1 and later. Non-ELF targets currently do not support this attribute. + + +internal_linkage +---------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``internal_linkage``","``clang::internal_linkage``","``clang::internal_linkage``","","","","Yes" + +The ``internal_linkage`` attribute changes the linkage type of the declaration to internal. +This is similar to C-style ``static``, but can be used on classes and class methods. When applied to a class definition, +this attribute affects all methods and static data members of that class. +This can be used to contain the ABI of a C++ library by excluding unwanted class methods from the export tables. + + +interrupt (ARM) +--------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``interrupt``","``gnu::interrupt``","","","","","" + +Clang supports the GNU style ``__attribute__((interrupt("TYPE")))`` attribute on +ARM targets. This attribute may be attached to a function definition and +instructs the backend to generate appropriate function entry/exit code so that +it can be used directly as an interrupt service routine. + +The parameter passed to the interrupt attribute is optional, but if +provided it must be a string literal with one of the following values: "IRQ", +"FIQ", "SWI", "ABORT", "UNDEF". + +The semantics are as follows: + +- If the function is AAPCS, Clang instructs the backend to realign the stack to + 8 bytes on entry. This is a general requirement of the AAPCS at public + interfaces, but may not hold when an exception is taken. Doing this allows + other AAPCS functions to be called. +- If the CPU is M-class this is all that needs to be done since the architecture + itself is designed in such a way that functions obeying the normal AAPCS ABI + constraints are valid exception handlers. +- If the CPU is not M-class, the prologue and epilogue are modified to save all + non-banked registers that are used, so that upon return the user-mode state + will not be corrupted. Note that to avoid unnecessary overhead, only + general-purpose (integer) registers are saved in this way. If VFP operations + are needed, that state must be saved manually. + + Specifically, interrupt kinds other than "FIQ" will save all core registers + except "lr" and "sp". "FIQ" interrupts will save r0-r7. +- If the CPU is not M-class, the return instruction is changed to one of the + canonical sequences permitted by the architecture for exception return. Where + possible the function itself will make the necessary "lr" adjustments so that + the "preferred return address" is selected. + + Unfortunately the compiler is unable to make this guarantee for an "UNDEF" + handler, where the offset from "lr" to the preferred return address depends on + the execution state of the code which generated the exception. In this case + a sequence equivalent to "movs pc, lr" will be used. + + +interrupt (AVR) +--------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``interrupt``","``gnu::interrupt``","","","","","Yes" + +Clang supports the GNU style ``__attribute__((interrupt))`` attribute on +AVR targets. This attribute may be attached to a function definition and instructs +the backend to generate appropriate function entry/exit code so that it can be used +directly as an interrupt service routine. + +On the AVR, the hardware globally disables interrupts when an interrupt is executed. +The first instruction of an interrupt handler declared with this attribute is a SEI +instruction to re-enable interrupts. See also the signal attribute that +does not insert a SEI instruction. + + +interrupt (MIPS) +---------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``interrupt``","``gnu::interrupt``","","","","","Yes" + +Clang supports the GNU style ``__attribute__((interrupt("ARGUMENT")))`` attribute on +MIPS targets. This attribute may be attached to a function definition and instructs +the backend to generate appropriate function entry/exit code so that it can be used +directly as an interrupt service routine. + +By default, the compiler will produce a function prologue and epilogue suitable for +an interrupt service routine that handles an External Interrupt Controller (eic) +generated interrupt. This behaviour can be explicitly requested with the "eic" +argument. + +Otherwise, for use with vectored interrupt mode, the argument passed should be +of the form "vector=LEVEL" where LEVEL is one of the following values: +"sw0", "sw1", "hw0", "hw1", "hw2", "hw3", "hw4", "hw5". The compiler will +then set the interrupt mask to the corresponding level which will mask all +interrupts up to and including the argument. + +The semantics are as follows: + +- The prologue is modified so that the Exception Program Counter (EPC) and + Status coprocessor registers are saved to the stack. The interrupt mask is + set so that the function can only be interrupted by a higher priority + interrupt. The epilogue will restore the previous values of EPC and Status. + +- The prologue and epilogue are modified to save and restore all non-kernel + registers as necessary. + +- The FPU is disabled in the prologue, as the floating pointer registers are not + spilled to the stack. + +- The function return sequence is changed to use an exception return instruction. + +- The parameter sets the interrupt mask for the function corresponding to the + interrupt level specified. If no mask is specified the interrupt mask + defaults to "eic". + + +interrupt (RISCV) +----------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``interrupt``","``gnu::interrupt``","","","","","Yes" + +Clang supports the GNU style ``__attribute__((interrupt))`` attribute on RISCV +targets. This attribute may be attached to a function definition and instructs +the backend to generate appropriate function entry/exit code so that it can be +used directly as an interrupt service routine. + +Permissible values for this parameter are ``user``, ``supervisor``, +and ``machine``. If there is no parameter, then it defaults to machine. + +Repeated interrupt attribute on the same declaration will cause a warning +to be emitted. In case of repeated declarations, the last one prevails. + +Refer to: +https://gcc.gnu.org/onlinedocs/gcc/RISC-V-Function-Attributes.html +https://riscv.org/specifications/privileged-isa/ +The RISC-V Instruction Set Manual Volume II: Privileged Architecture +Version 1.10. + + +kernel +------ +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``kernel``","","","","","","Yes" + +``__attribute__((kernel))`` is used to mark a ``kernel`` function in +RenderScript. + +In RenderScript, ``kernel`` functions are used to express data-parallel +computations. The RenderScript runtime efficiently parallelizes ``kernel`` +functions to run on computational resources such as multi-core CPUs and GPUs. +See the RenderScript_ documentation for more information. + +.. _RenderScript: https://developer.android.com/guide/topics/renderscript/compute.html + + +lifetimebound +------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``lifetimebound``","``clang::lifetimebound``","","","","","" + +The ``lifetimebound`` attribute indicates that a resource owned by +a function parameter or implicit object parameter +is retained by the return value of the annotated function +(or, for a parameter of a constructor, in the value of the constructed object). +It is only supported in C++. + +This attribute provides an experimental implementation of the facility +described in the C++ committee paper [http://wg21.link/p0936r0](P0936R0), +and is subject to change as the design of the corresponding functionality +changes. + + +long_call, far +-------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``long_call`` |br| ``far``","``gnu::long_call`` |br| ``gnu::far``","","","","","Yes" + +Clang supports the ``__attribute__((long_call))``, ``__attribute__((far))``, +and ``__attribute__((near))`` attributes on MIPS targets. These attributes may +only be added to function declarations and change the code generated +by the compiler when directly calling the function. The ``near`` attribute +allows calls to the function to be made using the ``jal`` instruction, which +requires the function to be located in the same naturally aligned 256MB +segment as the caller. The ``long_call`` and ``far`` attributes are synonyms +and require the use of a different call sequence that works regardless +of the distance between the functions. + +These attributes have no effect for position-independent code. + +These attributes take priority over command line switches such +as ``-mlong-calls`` and ``-mno-long-calls``. + + +micromips +--------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``micromips``","``gnu::micromips``","","","","","Yes" + +Clang supports the GNU style ``__attribute__((micromips))`` and +``__attribute__((nomicromips))`` attributes on MIPS targets. These attributes +may be attached to a function definition and instructs the backend to generate +or not to generate microMIPS code for that function. + +These attributes override the `-mmicromips` and `-mno-micromips` options +on the command line. + + +min_vector_width +---------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``min_vector_width``","``clang::min_vector_width``","``clang::min_vector_width``","","","","Yes" + +Clang supports the ``__attribute__((min_vector_width(width)))`` attribute. This +attribute may be attached to a function and informs the backend that this +function desires vectors of at least this width to be generated. Target-specific +maximum vector widths still apply. This means even if you ask for something +larger than the target supports, you will only get what the target supports. +This attribute is meant to be a hint to control target heuristics that may +generate narrower vectors than what the target hardware supports. + +This is currently used by the X86 target to allow some CPUs that support 512-bit +vectors to be limited to using 256-bit vectors to avoid frequency penalties. +This is currently enabled with the ``-prefer-vector-width=256`` command line +option. The ``min_vector_width`` attribute can be used to prevent the backend +from trying to split vector operations to match the ``prefer-vector-width``. All +X86 vector intrinsics from x86intrin.h already set this attribute. Additionally, +use of any of the X86-specific vector builtins will implicitly set this +attribute on the calling function. The intent is that explicitly writing vector +code using the X86 intrinsics will prevent ``prefer-vector-width`` from +affecting the code. + + +no_caller_saved_registers +------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``no_caller_saved_registers``","``gnu::no_caller_saved_registers``","","","","","" + +Use this attribute to indicate that the specified function has no +caller-saved registers. That is, all registers are callee-saved except for +registers used for passing parameters to the function or returning parameters +from the function. +The compiler saves and restores any modified registers that were not used for +passing or returning arguments to the function. + +The user can call functions specified with the 'no_caller_saved_registers' +attribute from an interrupt handler without saving and restoring all +call-clobbered registers. + +Note that 'no_caller_saved_registers' attribute is not a calling convention. +In fact, it only overrides the decision of which registers should be saved by +the caller, but not how the parameters are passed from the caller to the callee. + +For example: + + .. code-block:: c + + __attribute__ ((no_caller_saved_registers, fastcall)) + void f (int arg1, int arg2) { + ... + } + + In this case parameters 'arg1' and 'arg2' will be passed in registers. + In this case, on 32-bit x86 targets, the function 'f' will use ECX and EDX as + register parameters. However, it will not assume any scratch registers and + should save and restore any modified registers except for ECX and EDX. + + +no_sanitize +----------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``no_sanitize``","``clang::no_sanitize``","``clang::no_sanitize``","","","","Yes" + +Use the ``no_sanitize`` attribute on a function or a global variable +declaration to specify that a particular instrumentation or set of +instrumentations should not be applied. The attribute takes a list of +string literals, which have the same meaning as values accepted by the +``-fno-sanitize=`` flag. For example, +``__attribute__((no_sanitize("address", "thread")))`` specifies that +AddressSanitizer and ThreadSanitizer should not be applied to the +function or variable. + +See :ref:`Controlling Code Generation ` for a +full list of supported sanitizer flags. + + +no_sanitize_address, no_address_safety_analysis +----------------------------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``no_address_safety_analysis`` |br| ``no_sanitize_address`` |br| ``no_sanitize_thread`` |br| ``no_sanitize_memory``","``gnu::no_address_safety_analysis`` |br| ``gnu::no_sanitize_address`` |br| ``gnu::no_sanitize_thread`` |br| ``clang::no_sanitize_memory``","``clang::no_sanitize_memory``","","","","Yes" + +.. _langext-address_sanitizer: + +Use ``__attribute__((no_sanitize_address))`` on a function or a global +variable declaration to specify that address safety instrumentation +(e.g. AddressSanitizer) should not be applied. + + +no_sanitize_memory +------------------ +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``no_address_safety_analysis`` |br| ``no_sanitize_address`` |br| ``no_sanitize_thread`` |br| ``no_sanitize_memory``","``gnu::no_address_safety_analysis`` |br| ``gnu::no_sanitize_address`` |br| ``gnu::no_sanitize_thread`` |br| ``clang::no_sanitize_memory``","``clang::no_sanitize_memory``","","","","Yes" + +.. _langext-memory_sanitizer: + +Use ``__attribute__((no_sanitize_memory))`` on a function declaration to +specify that checks for uninitialized memory should not be inserted +(e.g. by MemorySanitizer). The function may still be instrumented by the tool +to avoid false positives in other places. + + +no_sanitize_thread +------------------ +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``no_address_safety_analysis`` |br| ``no_sanitize_address`` |br| ``no_sanitize_thread`` |br| ``no_sanitize_memory``","``gnu::no_address_safety_analysis`` |br| ``gnu::no_sanitize_address`` |br| ``gnu::no_sanitize_thread`` |br| ``clang::no_sanitize_memory``","``clang::no_sanitize_memory``","","","","Yes" + +.. _langext-thread_sanitizer: + +Use ``__attribute__((no_sanitize_thread))`` on a function declaration to +specify that checks for data races on plain (non-atomic) memory accesses should +not be inserted by ThreadSanitizer. The function is still instrumented by the +tool to avoid false positives and provide meaningful stack traces. + + +no_split_stack +-------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``no_split_stack``","``gnu::no_split_stack``","","","","","Yes" + +The ``no_split_stack`` attribute disables the emission of the split stack +preamble for a particular function. It has no effect if ``-fsplit-stack`` +is not specified. + + +no_stack_protector +------------------ +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``no_stack_protector``","``clang::no_stack_protector``","``clang::no_stack_protector``","","","","Yes" + +Clang supports the ``__attribute__((no_stack_protector))`` attribute which disables +the stack protector on the specified function. This attribute is useful for +selectively disabling the stack protector on some functions when building with +``-fstack-protector`` compiler option. + +For example, it disables the stack protector for the function ``foo`` but function +``bar`` will still be built with the stack protector with the ``-fstack-protector`` +option. + +.. code-block:: c + + int __attribute__((no_stack_protector)) + foo (int x); // stack protection will be disabled for foo. + + int bar(int y); // bar can be built with the stack protector. + + +noalias +------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "","","","``noalias``","","","" + +The ``noalias`` attribute indicates that the only memory accesses inside +function are loads and stores from objects pointed to by its pointer-typed +arguments, with arbitrary offsets. + + +nocf_check +---------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``nocf_check``","``gnu::nocf_check``","","","","","Yes" + +Jump Oriented Programming attacks rely on tampering with addresses used by +indirect call / jmp, e.g. redirect control-flow to non-programmer +intended bytes in the binary. +X86 Supports Indirect Branch Tracking (IBT) as part of Control-Flow +Enforcement Technology (CET). IBT instruments ENDBR instructions used to +specify valid targets of indirect call / jmp. +The ``nocf_check`` attribute has two roles: +1. Appertains to a function - do not add ENDBR instruction at the beginning of +the function. +2. Appertains to a function pointer - do not track the target function of this +pointer (by adding nocf_check prefix to the indirect-call instruction). + + +nodiscard, warn_unused_result +----------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``warn_unused_result``","``nodiscard`` |br| ``clang::warn_unused_result`` |br| ``gnu::warn_unused_result``","``nodiscard``","","","","Yes" + +Clang supports the ability to diagnose when the results of a function call +expression are discarded under suspicious circumstances. A diagnostic is +generated when a function or its return type is marked with ``[[nodiscard]]`` +(or ``__attribute__((warn_unused_result))``) and the function call appears as a +potentially-evaluated discarded-value expression that is not explicitly cast to +`void`. + +.. code-block: c++ + struct [[nodiscard]] error_info { /*...*/ }; + error_info enable_missile_safety_mode(); + + void launch_missiles(); + void test_missiles() { + enable_missile_safety_mode(); // diagnoses + launch_missiles(); + } + error_info &foo(); + void f() { foo(); } // Does not diagnose, error_info is a reference. + + +noduplicate +----------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``noduplicate``","``clang::noduplicate``","``clang::noduplicate``","","","","Yes" + +The ``noduplicate`` attribute can be placed on function declarations to control +whether function calls to this function can be duplicated or not as a result of +optimizations. This is required for the implementation of functions with +certain special requirements, like the OpenCL "barrier" function, that might +need to be run concurrently by all the threads that are executing in lockstep +on the hardware. For example this attribute applied on the function +"nodupfunc" in the code below avoids that: + +.. code-block:: c + + void nodupfunc() __attribute__((noduplicate)); + // Setting it as a C++11 attribute is also valid + // void nodupfunc() [[clang::noduplicate]]; + void foo(); + void bar(); + + nodupfunc(); + if (a > n) { + foo(); + } else { + bar(); + } + +gets possibly modified by some optimizations into code similar to this: + +.. code-block:: c + + if (a > n) { + nodupfunc(); + foo(); + } else { + nodupfunc(); + bar(); + } + +where the call to "nodupfunc" is duplicated and sunk into the two branches +of the condition. + + +nomicromips +----------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``nomicromips``","``gnu::nomicromips``","","","","","Yes" + +Clang supports the GNU style ``__attribute__((micromips))`` and +``__attribute__((nomicromips))`` attributes on MIPS targets. These attributes +may be attached to a function definition and instructs the backend to generate +or not to generate microMIPS code for that function. + +These attributes override the `-mmicromips` and `-mno-micromips` options +on the command line. + + +noreturn +-------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "","``noreturn``","","","","","Yes" + +A function declared as ``[[noreturn]]`` shall not return to its caller. The +compiler will generate a diagnostic for a function declared as ``[[noreturn]]`` +that appears to be capable of returning to its caller. + + +not_tail_called +--------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``not_tail_called``","``clang::not_tail_called``","``clang::not_tail_called``","","","","Yes" + +The ``not_tail_called`` attribute prevents tail-call optimization on statically bound calls. It has no effect on indirect calls. Virtual functions, objective-c methods, and functions marked as ``always_inline`` cannot be marked as ``not_tail_called``. + +For example, it prevents tail-call optimization in the following case: + + .. code-block:: c + + int __attribute__((not_tail_called)) foo1(int); + + int foo2(int a) { + return foo1(a); // No tail-call optimization on direct calls. + } + +However, it doesn't prevent tail-call optimization in this case: + + .. code-block:: c + + int __attribute__((not_tail_called)) foo1(int); + + int foo2(int a) { + int (*fn)(int) = &foo1; + + // not_tail_called has no effect on an indirect call even if the call can be + // resolved at compile time. + return (*fn)(a); + } + +Marking virtual functions as ``not_tail_called`` is an error: + + .. code-block:: c++ + + class Base { + public: + // not_tail_called on a virtual function is an error. + [[clang::not_tail_called]] virtual int foo1(); + + virtual int foo2(); + + // Non-virtual functions can be marked ``not_tail_called``. + [[clang::not_tail_called]] int foo3(); + }; + + class Derived1 : public Base { + public: + int foo1() override; + + // not_tail_called on a virtual function is an error. + [[clang::not_tail_called]] int foo2() override; + }; + + +nothrow +------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``nothrow``","``gnu::nothrow``","","``nothrow``","","","Yes" + +Clang supports the GNU style ``__attribute__((nothrow))`` and Microsoft style +``__declspec(nothrow)`` attribute as an equivalent of `noexcept` on function +declarations. This attribute informs the compiler that the annotated function +does not throw an exception. This prevents exception-unwinding. This attribute +is particularly useful on functions in the C Standard Library that are +guaranteed to not throw an exception. + + +ns_consumed +----------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``ns_consumed``","``clang::ns_consumed``","``clang::ns_consumed``","","","","Yes" + +The behavior of a function with respect to reference counting for Foundation +(Objective-C), CoreFoundation (C) and OSObject (C++) is determined by a naming +convention (e.g. functions starting with "get" are assumed to return at +``+0``). + +It can be overriden using a family of the following attributes. In +Objective-C, the annotation ``__attribute__((ns_returns_retained))`` applied to +a function communicates that the object is returned at ``+1``, and the caller +is responsible for freeing it. +Similiarly, the annotation ``__attribute__((ns_returns_not_retained))`` +specifies that the object is returned at ``+0`` and the ownership remains with +the callee. +The annotation ``__attribute__((ns_consumes_self))`` specifies that +the Objective-C method call consumes the reference to ``self``, e.g. by +attaching it to a supplied parameter. +Additionally, parameters can have an annotation +``__attribute__((ns_consumed))``, which specifies that passing an owned object +as that parameter effectively transfers the ownership, and the caller is no +longer responsible for it. +These attributes affect code generation when interacting with ARC code, and +they are used by the Clang Static Analyzer. + +In C programs using CoreFoundation, a similar set of attributes: +``__attribute__((cf_returns_not_retained))``, +``__attribute__((cf_returns_retained))`` and ``__attribute__((cf_consumed))`` +have the same respective semantics when applied to CoreFoundation objects. +These attributes affect code generation when interacting with ARC code, and +they are used by the Clang Static Analyzer. + +Finally, in C++ interacting with XNU kernel (objects inheriting from OSObject), +the same attribute family is present: +``__attribute__((os_returns_not_retained))``, +``__attribute__((os_returns_retained))`` and ``__attribute__((os_consumed))``, +with the same respective semantics. +Similar to ``__attribute__((ns_consumes_self))``, +``__attribute__((os_consumes_this))`` specifies that the method call consumes +the reference to "this" (e.g., when attaching it to a different object supplied +as a parameter). +Out parameters (parameters the function is meant to write into, +either via pointers-to-pointers or references-to-pointers) +may be annotated with ``__attribute__((os_returns_retained))`` +or ``__attribute__((os_returns_not_retained))`` which specifies that the object +written into the out parameter should (or respectively should not) be released +after use. +Since often out parameters may or may not be written depending on the exit +code of the function, +annotations ``__attribute__((os_returns_retained_on_zero))`` +and ``__attribute__((os_returns_retained_on_non_zero))`` specify that +an out parameter at ``+1`` is written if and only if the function returns a zero +(respectively non-zero) error code. +Observe that return-code-dependent out parameter annotations are only +available for retained out parameters, as non-retained object do not have to be +released by the callee. +These attributes are only used by the Clang Static Analyzer. + +The family of attributes ``X_returns_X_retained`` can be added to functions, +C++ methods, and Objective-C methods and properties. +Attributes ``X_consumed`` can be added to parameters of methods, functions, +and Objective-C methods. + + +ns_consumes_self +---------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``ns_consumes_self``","``clang::ns_consumes_self``","``clang::ns_consumes_self``","","","","Yes" + +The behavior of a function with respect to reference counting for Foundation +(Objective-C), CoreFoundation (C) and OSObject (C++) is determined by a naming +convention (e.g. functions starting with "get" are assumed to return at +``+0``). + +It can be overriden using a family of the following attributes. In +Objective-C, the annotation ``__attribute__((ns_returns_retained))`` applied to +a function communicates that the object is returned at ``+1``, and the caller +is responsible for freeing it. +Similiarly, the annotation ``__attribute__((ns_returns_not_retained))`` +specifies that the object is returned at ``+0`` and the ownership remains with +the callee. +The annotation ``__attribute__((ns_consumes_self))`` specifies that +the Objective-C method call consumes the reference to ``self``, e.g. by +attaching it to a supplied parameter. +Additionally, parameters can have an annotation +``__attribute__((ns_consumed))``, which specifies that passing an owned object +as that parameter effectively transfers the ownership, and the caller is no +longer responsible for it. +These attributes affect code generation when interacting with ARC code, and +they are used by the Clang Static Analyzer. + +In C programs using CoreFoundation, a similar set of attributes: +``__attribute__((cf_returns_not_retained))``, +``__attribute__((cf_returns_retained))`` and ``__attribute__((cf_consumed))`` +have the same respective semantics when applied to CoreFoundation objects. +These attributes affect code generation when interacting with ARC code, and +they are used by the Clang Static Analyzer. + +Finally, in C++ interacting with XNU kernel (objects inheriting from OSObject), +the same attribute family is present: +``__attribute__((os_returns_not_retained))``, +``__attribute__((os_returns_retained))`` and ``__attribute__((os_consumed))``, +with the same respective semantics. +Similar to ``__attribute__((ns_consumes_self))``, +``__attribute__((os_consumes_this))`` specifies that the method call consumes +the reference to "this" (e.g., when attaching it to a different object supplied +as a parameter). +Out parameters (parameters the function is meant to write into, +either via pointers-to-pointers or references-to-pointers) +may be annotated with ``__attribute__((os_returns_retained))`` +or ``__attribute__((os_returns_not_retained))`` which specifies that the object +written into the out parameter should (or respectively should not) be released +after use. +Since often out parameters may or may not be written depending on the exit +code of the function, +annotations ``__attribute__((os_returns_retained_on_zero))`` +and ``__attribute__((os_returns_retained_on_non_zero))`` specify that +an out parameter at ``+1`` is written if and only if the function returns a zero +(respectively non-zero) error code. +Observe that return-code-dependent out parameter annotations are only +available for retained out parameters, as non-retained object do not have to be +released by the callee. +These attributes are only used by the Clang Static Analyzer. + +The family of attributes ``X_returns_X_retained`` can be added to functions, +C++ methods, and Objective-C methods and properties. +Attributes ``X_consumed`` can be added to parameters of methods, functions, +and Objective-C methods. + + +ns_returns_autoreleased +----------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``ns_returns_autoreleased``","``clang::ns_returns_autoreleased``","``clang::ns_returns_autoreleased``","","","","" + +The behavior of a function with respect to reference counting for Foundation +(Objective-C), CoreFoundation (C) and OSObject (C++) is determined by a naming +convention (e.g. functions starting with "get" are assumed to return at +``+0``). + +It can be overriden using a family of the following attributes. In +Objective-C, the annotation ``__attribute__((ns_returns_retained))`` applied to +a function communicates that the object is returned at ``+1``, and the caller +is responsible for freeing it. +Similiarly, the annotation ``__attribute__((ns_returns_not_retained))`` +specifies that the object is returned at ``+0`` and the ownership remains with +the callee. +The annotation ``__attribute__((ns_consumes_self))`` specifies that +the Objective-C method call consumes the reference to ``self``, e.g. by +attaching it to a supplied parameter. +Additionally, parameters can have an annotation +``__attribute__((ns_consumed))``, which specifies that passing an owned object +as that parameter effectively transfers the ownership, and the caller is no +longer responsible for it. +These attributes affect code generation when interacting with ARC code, and +they are used by the Clang Static Analyzer. + +In C programs using CoreFoundation, a similar set of attributes: +``__attribute__((cf_returns_not_retained))``, +``__attribute__((cf_returns_retained))`` and ``__attribute__((cf_consumed))`` +have the same respective semantics when applied to CoreFoundation objects. +These attributes affect code generation when interacting with ARC code, and +they are used by the Clang Static Analyzer. + +Finally, in C++ interacting with XNU kernel (objects inheriting from OSObject), +the same attribute family is present: +``__attribute__((os_returns_not_retained))``, +``__attribute__((os_returns_retained))`` and ``__attribute__((os_consumed))``, +with the same respective semantics. +Similar to ``__attribute__((ns_consumes_self))``, +``__attribute__((os_consumes_this))`` specifies that the method call consumes +the reference to "this" (e.g., when attaching it to a different object supplied +as a parameter). +Out parameters (parameters the function is meant to write into, +either via pointers-to-pointers or references-to-pointers) +may be annotated with ``__attribute__((os_returns_retained))`` +or ``__attribute__((os_returns_not_retained))`` which specifies that the object +written into the out parameter should (or respectively should not) be released +after use. +Since often out parameters may or may not be written depending on the exit +code of the function, +annotations ``__attribute__((os_returns_retained_on_zero))`` +and ``__attribute__((os_returns_retained_on_non_zero))`` specify that +an out parameter at ``+1`` is written if and only if the function returns a zero +(respectively non-zero) error code. +Observe that return-code-dependent out parameter annotations are only +available for retained out parameters, as non-retained object do not have to be +released by the callee. +These attributes are only used by the Clang Static Analyzer. + +The family of attributes ``X_returns_X_retained`` can be added to functions, +C++ methods, and Objective-C methods and properties. +Attributes ``X_consumed`` can be added to parameters of methods, functions, +and Objective-C methods. + + +ns_returns_not_retained +----------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``ns_returns_not_retained``","``clang::ns_returns_not_retained``","``clang::ns_returns_not_retained``","","","","" + +The behavior of a function with respect to reference counting for Foundation +(Objective-C), CoreFoundation (C) and OSObject (C++) is determined by a naming +convention (e.g. functions starting with "get" are assumed to return at +``+0``). + +It can be overriden using a family of the following attributes. In +Objective-C, the annotation ``__attribute__((ns_returns_retained))`` applied to +a function communicates that the object is returned at ``+1``, and the caller +is responsible for freeing it. +Similiarly, the annotation ``__attribute__((ns_returns_not_retained))`` +specifies that the object is returned at ``+0`` and the ownership remains with +the callee. +The annotation ``__attribute__((ns_consumes_self))`` specifies that +the Objective-C method call consumes the reference to ``self``, e.g. by +attaching it to a supplied parameter. +Additionally, parameters can have an annotation +``__attribute__((ns_consumed))``, which specifies that passing an owned object +as that parameter effectively transfers the ownership, and the caller is no +longer responsible for it. +These attributes affect code generation when interacting with ARC code, and +they are used by the Clang Static Analyzer. + +In C programs using CoreFoundation, a similar set of attributes: +``__attribute__((cf_returns_not_retained))``, +``__attribute__((cf_returns_retained))`` and ``__attribute__((cf_consumed))`` +have the same respective semantics when applied to CoreFoundation objects. +These attributes affect code generation when interacting with ARC code, and +they are used by the Clang Static Analyzer. + +Finally, in C++ interacting with XNU kernel (objects inheriting from OSObject), +the same attribute family is present: +``__attribute__((os_returns_not_retained))``, +``__attribute__((os_returns_retained))`` and ``__attribute__((os_consumed))``, +with the same respective semantics. +Similar to ``__attribute__((ns_consumes_self))``, +``__attribute__((os_consumes_this))`` specifies that the method call consumes +the reference to "this" (e.g., when attaching it to a different object supplied +as a parameter). +Out parameters (parameters the function is meant to write into, +either via pointers-to-pointers or references-to-pointers) +may be annotated with ``__attribute__((os_returns_retained))`` +or ``__attribute__((os_returns_not_retained))`` which specifies that the object +written into the out parameter should (or respectively should not) be released +after use. +Since often out parameters may or may not be written depending on the exit +code of the function, +annotations ``__attribute__((os_returns_retained_on_zero))`` +and ``__attribute__((os_returns_retained_on_non_zero))`` specify that +an out parameter at ``+1`` is written if and only if the function returns a zero +(respectively non-zero) error code. +Observe that return-code-dependent out parameter annotations are only +available for retained out parameters, as non-retained object do not have to be +released by the callee. +These attributes are only used by the Clang Static Analyzer. + +The family of attributes ``X_returns_X_retained`` can be added to functions, +C++ methods, and Objective-C methods and properties. +Attributes ``X_consumed`` can be added to parameters of methods, functions, +and Objective-C methods. + + +ns_returns_retained +------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``ns_returns_retained``","``clang::ns_returns_retained``","``clang::ns_returns_retained``","","","","" + +The behavior of a function with respect to reference counting for Foundation +(Objective-C), CoreFoundation (C) and OSObject (C++) is determined by a naming +convention (e.g. functions starting with "get" are assumed to return at +``+0``). + +It can be overriden using a family of the following attributes. In +Objective-C, the annotation ``__attribute__((ns_returns_retained))`` applied to +a function communicates that the object is returned at ``+1``, and the caller +is responsible for freeing it. +Similiarly, the annotation ``__attribute__((ns_returns_not_retained))`` +specifies that the object is returned at ``+0`` and the ownership remains with +the callee. +The annotation ``__attribute__((ns_consumes_self))`` specifies that +the Objective-C method call consumes the reference to ``self``, e.g. by +attaching it to a supplied parameter. +Additionally, parameters can have an annotation +``__attribute__((ns_consumed))``, which specifies that passing an owned object +as that parameter effectively transfers the ownership, and the caller is no +longer responsible for it. +These attributes affect code generation when interacting with ARC code, and +they are used by the Clang Static Analyzer. + +In C programs using CoreFoundation, a similar set of attributes: +``__attribute__((cf_returns_not_retained))``, +``__attribute__((cf_returns_retained))`` and ``__attribute__((cf_consumed))`` +have the same respective semantics when applied to CoreFoundation objects. +These attributes affect code generation when interacting with ARC code, and +they are used by the Clang Static Analyzer. + +Finally, in C++ interacting with XNU kernel (objects inheriting from OSObject), +the same attribute family is present: +``__attribute__((os_returns_not_retained))``, +``__attribute__((os_returns_retained))`` and ``__attribute__((os_consumed))``, +with the same respective semantics. +Similar to ``__attribute__((ns_consumes_self))``, +``__attribute__((os_consumes_this))`` specifies that the method call consumes +the reference to "this" (e.g., when attaching it to a different object supplied +as a parameter). +Out parameters (parameters the function is meant to write into, +either via pointers-to-pointers or references-to-pointers) +may be annotated with ``__attribute__((os_returns_retained))`` +or ``__attribute__((os_returns_not_retained))`` which specifies that the object +written into the out parameter should (or respectively should not) be released +after use. +Since often out parameters may or may not be written depending on the exit +code of the function, +annotations ``__attribute__((os_returns_retained_on_zero))`` +and ``__attribute__((os_returns_retained_on_non_zero))`` specify that +an out parameter at ``+1`` is written if and only if the function returns a zero +(respectively non-zero) error code. +Observe that return-code-dependent out parameter annotations are only +available for retained out parameters, as non-retained object do not have to be +released by the callee. +These attributes are only used by the Clang Static Analyzer. + +The family of attributes ``X_returns_X_retained`` can be added to functions, +C++ methods, and Objective-C methods and properties. +Attributes ``X_consumed`` can be added to parameters of methods, functions, +and Objective-C methods. + + +objc_boxable +------------ +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``objc_boxable``","``clang::objc_boxable``","``clang::objc_boxable``","","","","Yes" + +Structs and unions marked with the ``objc_boxable`` attribute can be used +with the Objective-C boxed expression syntax, ``@(...)``. + +**Usage**: ``__attribute__((objc_boxable))``. This attribute +can only be placed on a declaration of a trivially-copyable struct or union: + +.. code-block:: objc + + struct __attribute__((objc_boxable)) some_struct { + int i; + }; + union __attribute__((objc_boxable)) some_union { + int i; + float f; + }; + typedef struct __attribute__((objc_boxable)) _some_struct some_struct; + + // ... + + some_struct ss; + NSValue *boxed = @(ss); + + +objc_method_family +------------------ +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``objc_method_family``","``clang::objc_method_family``","``clang::objc_method_family``","","","","Yes" + +Many methods in Objective-C have conventional meanings determined by their +selectors. It is sometimes useful to be able to mark a method as having a +particular conventional meaning despite not having the right selector, or as +not having the conventional meaning that its selector would suggest. For these +use cases, we provide an attribute to specifically describe the "method family" +that a method belongs to. + +**Usage**: ``__attribute__((objc_method_family(X)))``, where ``X`` is one of +``none``, ``alloc``, ``copy``, ``init``, ``mutableCopy``, or ``new``. This +attribute can only be placed at the end of a method declaration: + +.. code-block:: objc + + - (NSString *)initMyStringValue __attribute__((objc_method_family(none))); + +Users who do not wish to change the conventional meaning of a method, and who +merely want to document its non-standard retain and release semantics, should +use the retaining behavior attributes (``ns_returns_retained``, +``ns_returns_not_retained``, etc). + +Query for this feature with ``__has_attribute(objc_method_family)``. + + +objc_requires_super +------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``objc_requires_super``","``clang::objc_requires_super``","``clang::objc_requires_super``","","","","Yes" + +Some Objective-C classes allow a subclass to override a particular method in a +parent class but expect that the overriding method also calls the overridden +method in the parent class. For these cases, we provide an attribute to +designate that a method requires a "call to ``super``" in the overriding +method in the subclass. + +**Usage**: ``__attribute__((objc_requires_super))``. This attribute can only +be placed at the end of a method declaration: + +.. code-block:: objc + + - (void)foo __attribute__((objc_requires_super)); + +This attribute can only be applied the method declarations within a class, and +not a protocol. Currently this attribute does not enforce any placement of +where the call occurs in the overriding method (such as in the case of +``-dealloc`` where the call must appear at the end). It checks only that it +exists. + +Note that on both OS X and iOS that the Foundation framework provides a +convenience macro ``NS_REQUIRES_SUPER`` that provides syntactic sugar for this +attribute: + +.. code-block:: objc + + - (void)foo NS_REQUIRES_SUPER; + +This macro is conditionally defined depending on the compiler's support for +this attribute. If the compiler does not support the attribute the macro +expands to nothing. + +Operationally, when a method has this annotation the compiler will warn if the +implementation of an override in a subclass does not call super. For example: + +.. code-block:: objc + + warning: method possibly missing a [super AnnotMeth] call + - (void) AnnotMeth{}; + ^ + + +objc_runtime_name +----------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``objc_runtime_name``","``clang::objc_runtime_name``","``clang::objc_runtime_name``","","","","Yes" + +By default, the Objective-C interface or protocol identifier is used +in the metadata name for that object. The `objc_runtime_name` +attribute allows annotated interfaces or protocols to use the +specified string argument in the object's metadata name instead of the +default name. + +**Usage**: ``__attribute__((objc_runtime_name("MyLocalName")))``. This attribute +can only be placed before an @protocol or @interface declaration: + +.. code-block:: objc + + __attribute__((objc_runtime_name("MyLocalName"))) + @interface Message + @end + + +objc_runtime_visible +-------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``objc_runtime_visible``","``clang::objc_runtime_visible``","``clang::objc_runtime_visible``","","","","Yes" + +This attribute specifies that the Objective-C class to which it applies is visible to the Objective-C runtime but not to the linker. Classes annotated with this attribute cannot be subclassed and cannot have categories defined for them. + + +optnone +------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``optnone``","``clang::optnone``","``clang::optnone``","","","","Yes" + +The ``optnone`` attribute suppresses essentially all optimizations +on a function or method, regardless of the optimization level applied to +the compilation unit as a whole. This is particularly useful when you +need to debug a particular function, but it is infeasible to build the +entire application without optimization. Avoiding optimization on the +specified function can improve the quality of the debugging information +for that function. + +This attribute is incompatible with the ``always_inline`` and ``minsize`` +attributes. + + +os_consumed +----------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``os_consumed``","``clang::os_consumed``","``clang::os_consumed``","","","","Yes" + +The behavior of a function with respect to reference counting for Foundation +(Objective-C), CoreFoundation (C) and OSObject (C++) is determined by a naming +convention (e.g. functions starting with "get" are assumed to return at +``+0``). + +It can be overriden using a family of the following attributes. In +Objective-C, the annotation ``__attribute__((ns_returns_retained))`` applied to +a function communicates that the object is returned at ``+1``, and the caller +is responsible for freeing it. +Similiarly, the annotation ``__attribute__((ns_returns_not_retained))`` +specifies that the object is returned at ``+0`` and the ownership remains with +the callee. +The annotation ``__attribute__((ns_consumes_self))`` specifies that +the Objective-C method call consumes the reference to ``self``, e.g. by +attaching it to a supplied parameter. +Additionally, parameters can have an annotation +``__attribute__((ns_consumed))``, which specifies that passing an owned object +as that parameter effectively transfers the ownership, and the caller is no +longer responsible for it. +These attributes affect code generation when interacting with ARC code, and +they are used by the Clang Static Analyzer. + +In C programs using CoreFoundation, a similar set of attributes: +``__attribute__((cf_returns_not_retained))``, +``__attribute__((cf_returns_retained))`` and ``__attribute__((cf_consumed))`` +have the same respective semantics when applied to CoreFoundation objects. +These attributes affect code generation when interacting with ARC code, and +they are used by the Clang Static Analyzer. + +Finally, in C++ interacting with XNU kernel (objects inheriting from OSObject), +the same attribute family is present: +``__attribute__((os_returns_not_retained))``, +``__attribute__((os_returns_retained))`` and ``__attribute__((os_consumed))``, +with the same respective semantics. +Similar to ``__attribute__((ns_consumes_self))``, +``__attribute__((os_consumes_this))`` specifies that the method call consumes +the reference to "this" (e.g., when attaching it to a different object supplied +as a parameter). +Out parameters (parameters the function is meant to write into, +either via pointers-to-pointers or references-to-pointers) +may be annotated with ``__attribute__((os_returns_retained))`` +or ``__attribute__((os_returns_not_retained))`` which specifies that the object +written into the out parameter should (or respectively should not) be released +after use. +Since often out parameters may or may not be written depending on the exit +code of the function, +annotations ``__attribute__((os_returns_retained_on_zero))`` +and ``__attribute__((os_returns_retained_on_non_zero))`` specify that +an out parameter at ``+1`` is written if and only if the function returns a zero +(respectively non-zero) error code. +Observe that return-code-dependent out parameter annotations are only +available for retained out parameters, as non-retained object do not have to be +released by the callee. +These attributes are only used by the Clang Static Analyzer. + +The family of attributes ``X_returns_X_retained`` can be added to functions, +C++ methods, and Objective-C methods and properties. +Attributes ``X_consumed`` can be added to parameters of methods, functions, +and Objective-C methods. + + +os_consumes_this +---------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``os_consumes_this``","``clang::os_consumes_this``","``clang::os_consumes_this``","","","","" + +The behavior of a function with respect to reference counting for Foundation +(Objective-C), CoreFoundation (C) and OSObject (C++) is determined by a naming +convention (e.g. functions starting with "get" are assumed to return at +``+0``). + +It can be overriden using a family of the following attributes. In +Objective-C, the annotation ``__attribute__((ns_returns_retained))`` applied to +a function communicates that the object is returned at ``+1``, and the caller +is responsible for freeing it. +Similiarly, the annotation ``__attribute__((ns_returns_not_retained))`` +specifies that the object is returned at ``+0`` and the ownership remains with +the callee. +The annotation ``__attribute__((ns_consumes_self))`` specifies that +the Objective-C method call consumes the reference to ``self``, e.g. by +attaching it to a supplied parameter. +Additionally, parameters can have an annotation +``__attribute__((ns_consumed))``, which specifies that passing an owned object +as that parameter effectively transfers the ownership, and the caller is no +longer responsible for it. +These attributes affect code generation when interacting with ARC code, and +they are used by the Clang Static Analyzer. + +In C programs using CoreFoundation, a similar set of attributes: +``__attribute__((cf_returns_not_retained))``, +``__attribute__((cf_returns_retained))`` and ``__attribute__((cf_consumed))`` +have the same respective semantics when applied to CoreFoundation objects. +These attributes affect code generation when interacting with ARC code, and +they are used by the Clang Static Analyzer. + +Finally, in C++ interacting with XNU kernel (objects inheriting from OSObject), +the same attribute family is present: +``__attribute__((os_returns_not_retained))``, +``__attribute__((os_returns_retained))`` and ``__attribute__((os_consumed))``, +with the same respective semantics. +Similar to ``__attribute__((ns_consumes_self))``, +``__attribute__((os_consumes_this))`` specifies that the method call consumes +the reference to "this" (e.g., when attaching it to a different object supplied +as a parameter). +Out parameters (parameters the function is meant to write into, +either via pointers-to-pointers or references-to-pointers) +may be annotated with ``__attribute__((os_returns_retained))`` +or ``__attribute__((os_returns_not_retained))`` which specifies that the object +written into the out parameter should (or respectively should not) be released +after use. +Since often out parameters may or may not be written depending on the exit +code of the function, +annotations ``__attribute__((os_returns_retained_on_zero))`` +and ``__attribute__((os_returns_retained_on_non_zero))`` specify that +an out parameter at ``+1`` is written if and only if the function returns a zero +(respectively non-zero) error code. +Observe that return-code-dependent out parameter annotations are only +available for retained out parameters, as non-retained object do not have to be +released by the callee. +These attributes are only used by the Clang Static Analyzer. + +The family of attributes ``X_returns_X_retained`` can be added to functions, +C++ methods, and Objective-C methods and properties. +Attributes ``X_consumed`` can be added to parameters of methods, functions, +and Objective-C methods. + + +os_returns_not_retained +----------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``os_returns_not_retained``","``clang::os_returns_not_retained``","``clang::os_returns_not_retained``","","","","Yes" + +The behavior of a function with respect to reference counting for Foundation +(Objective-C), CoreFoundation (C) and OSObject (C++) is determined by a naming +convention (e.g. functions starting with "get" are assumed to return at +``+0``). + +It can be overriden using a family of the following attributes. In +Objective-C, the annotation ``__attribute__((ns_returns_retained))`` applied to +a function communicates that the object is returned at ``+1``, and the caller +is responsible for freeing it. +Similiarly, the annotation ``__attribute__((ns_returns_not_retained))`` +specifies that the object is returned at ``+0`` and the ownership remains with +the callee. +The annotation ``__attribute__((ns_consumes_self))`` specifies that +the Objective-C method call consumes the reference to ``self``, e.g. by +attaching it to a supplied parameter. +Additionally, parameters can have an annotation +``__attribute__((ns_consumed))``, which specifies that passing an owned object +as that parameter effectively transfers the ownership, and the caller is no +longer responsible for it. +These attributes affect code generation when interacting with ARC code, and +they are used by the Clang Static Analyzer. + +In C programs using CoreFoundation, a similar set of attributes: +``__attribute__((cf_returns_not_retained))``, +``__attribute__((cf_returns_retained))`` and ``__attribute__((cf_consumed))`` +have the same respective semantics when applied to CoreFoundation objects. +These attributes affect code generation when interacting with ARC code, and +they are used by the Clang Static Analyzer. + +Finally, in C++ interacting with XNU kernel (objects inheriting from OSObject), +the same attribute family is present: +``__attribute__((os_returns_not_retained))``, +``__attribute__((os_returns_retained))`` and ``__attribute__((os_consumed))``, +with the same respective semantics. +Similar to ``__attribute__((ns_consumes_self))``, +``__attribute__((os_consumes_this))`` specifies that the method call consumes +the reference to "this" (e.g., when attaching it to a different object supplied +as a parameter). +Out parameters (parameters the function is meant to write into, +either via pointers-to-pointers or references-to-pointers) +may be annotated with ``__attribute__((os_returns_retained))`` +or ``__attribute__((os_returns_not_retained))`` which specifies that the object +written into the out parameter should (or respectively should not) be released +after use. +Since often out parameters may or may not be written depending on the exit +code of the function, +annotations ``__attribute__((os_returns_retained_on_zero))`` +and ``__attribute__((os_returns_retained_on_non_zero))`` specify that +an out parameter at ``+1`` is written if and only if the function returns a zero +(respectively non-zero) error code. +Observe that return-code-dependent out parameter annotations are only +available for retained out parameters, as non-retained object do not have to be +released by the callee. +These attributes are only used by the Clang Static Analyzer. + +The family of attributes ``X_returns_X_retained`` can be added to functions, +C++ methods, and Objective-C methods and properties. +Attributes ``X_consumed`` can be added to parameters of methods, functions, +and Objective-C methods. + + +os_returns_retained +------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``os_returns_retained``","``clang::os_returns_retained``","``clang::os_returns_retained``","","","","Yes" + +The behavior of a function with respect to reference counting for Foundation +(Objective-C), CoreFoundation (C) and OSObject (C++) is determined by a naming +convention (e.g. functions starting with "get" are assumed to return at +``+0``). + +It can be overriden using a family of the following attributes. In +Objective-C, the annotation ``__attribute__((ns_returns_retained))`` applied to +a function communicates that the object is returned at ``+1``, and the caller +is responsible for freeing it. +Similiarly, the annotation ``__attribute__((ns_returns_not_retained))`` +specifies that the object is returned at ``+0`` and the ownership remains with +the callee. +The annotation ``__attribute__((ns_consumes_self))`` specifies that +the Objective-C method call consumes the reference to ``self``, e.g. by +attaching it to a supplied parameter. +Additionally, parameters can have an annotation +``__attribute__((ns_consumed))``, which specifies that passing an owned object +as that parameter effectively transfers the ownership, and the caller is no +longer responsible for it. +These attributes affect code generation when interacting with ARC code, and +they are used by the Clang Static Analyzer. + +In C programs using CoreFoundation, a similar set of attributes: +``__attribute__((cf_returns_not_retained))``, +``__attribute__((cf_returns_retained))`` and ``__attribute__((cf_consumed))`` +have the same respective semantics when applied to CoreFoundation objects. +These attributes affect code generation when interacting with ARC code, and +they are used by the Clang Static Analyzer. + +Finally, in C++ interacting with XNU kernel (objects inheriting from OSObject), +the same attribute family is present: +``__attribute__((os_returns_not_retained))``, +``__attribute__((os_returns_retained))`` and ``__attribute__((os_consumed))``, +with the same respective semantics. +Similar to ``__attribute__((ns_consumes_self))``, +``__attribute__((os_consumes_this))`` specifies that the method call consumes +the reference to "this" (e.g., when attaching it to a different object supplied +as a parameter). +Out parameters (parameters the function is meant to write into, +either via pointers-to-pointers or references-to-pointers) +may be annotated with ``__attribute__((os_returns_retained))`` +or ``__attribute__((os_returns_not_retained))`` which specifies that the object +written into the out parameter should (or respectively should not) be released +after use. +Since often out parameters may or may not be written depending on the exit +code of the function, +annotations ``__attribute__((os_returns_retained_on_zero))`` +and ``__attribute__((os_returns_retained_on_non_zero))`` specify that +an out parameter at ``+1`` is written if and only if the function returns a zero +(respectively non-zero) error code. +Observe that return-code-dependent out parameter annotations are only +available for retained out parameters, as non-retained object do not have to be +released by the callee. +These attributes are only used by the Clang Static Analyzer. + +The family of attributes ``X_returns_X_retained`` can be added to functions, +C++ methods, and Objective-C methods and properties. +Attributes ``X_consumed`` can be added to parameters of methods, functions, +and Objective-C methods. + + +os_returns_retained_on_non_zero +------------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``os_returns_retained_on_non_zero``","``clang::os_returns_retained_on_non_zero``","``clang::os_returns_retained_on_non_zero``","","","","Yes" + +The behavior of a function with respect to reference counting for Foundation +(Objective-C), CoreFoundation (C) and OSObject (C++) is determined by a naming +convention (e.g. functions starting with "get" are assumed to return at +``+0``). + +It can be overriden using a family of the following attributes. In +Objective-C, the annotation ``__attribute__((ns_returns_retained))`` applied to +a function communicates that the object is returned at ``+1``, and the caller +is responsible for freeing it. +Similiarly, the annotation ``__attribute__((ns_returns_not_retained))`` +specifies that the object is returned at ``+0`` and the ownership remains with +the callee. +The annotation ``__attribute__((ns_consumes_self))`` specifies that +the Objective-C method call consumes the reference to ``self``, e.g. by +attaching it to a supplied parameter. +Additionally, parameters can have an annotation +``__attribute__((ns_consumed))``, which specifies that passing an owned object +as that parameter effectively transfers the ownership, and the caller is no +longer responsible for it. +These attributes affect code generation when interacting with ARC code, and +they are used by the Clang Static Analyzer. + +In C programs using CoreFoundation, a similar set of attributes: +``__attribute__((cf_returns_not_retained))``, +``__attribute__((cf_returns_retained))`` and ``__attribute__((cf_consumed))`` +have the same respective semantics when applied to CoreFoundation objects. +These attributes affect code generation when interacting with ARC code, and +they are used by the Clang Static Analyzer. + +Finally, in C++ interacting with XNU kernel (objects inheriting from OSObject), +the same attribute family is present: +``__attribute__((os_returns_not_retained))``, +``__attribute__((os_returns_retained))`` and ``__attribute__((os_consumed))``, +with the same respective semantics. +Similar to ``__attribute__((ns_consumes_self))``, +``__attribute__((os_consumes_this))`` specifies that the method call consumes +the reference to "this" (e.g., when attaching it to a different object supplied +as a parameter). +Out parameters (parameters the function is meant to write into, +either via pointers-to-pointers or references-to-pointers) +may be annotated with ``__attribute__((os_returns_retained))`` +or ``__attribute__((os_returns_not_retained))`` which specifies that the object +written into the out parameter should (or respectively should not) be released +after use. +Since often out parameters may or may not be written depending on the exit +code of the function, +annotations ``__attribute__((os_returns_retained_on_zero))`` +and ``__attribute__((os_returns_retained_on_non_zero))`` specify that +an out parameter at ``+1`` is written if and only if the function returns a zero +(respectively non-zero) error code. +Observe that return-code-dependent out parameter annotations are only +available for retained out parameters, as non-retained object do not have to be +released by the callee. +These attributes are only used by the Clang Static Analyzer. + +The family of attributes ``X_returns_X_retained`` can be added to functions, +C++ methods, and Objective-C methods and properties. +Attributes ``X_consumed`` can be added to parameters of methods, functions, +and Objective-C methods. + + +os_returns_retained_on_zero +--------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``os_returns_retained_on_zero``","``clang::os_returns_retained_on_zero``","``clang::os_returns_retained_on_zero``","","","","Yes" + +The behavior of a function with respect to reference counting for Foundation +(Objective-C), CoreFoundation (C) and OSObject (C++) is determined by a naming +convention (e.g. functions starting with "get" are assumed to return at +``+0``). + +It can be overriden using a family of the following attributes. In +Objective-C, the annotation ``__attribute__((ns_returns_retained))`` applied to +a function communicates that the object is returned at ``+1``, and the caller +is responsible for freeing it. +Similiarly, the annotation ``__attribute__((ns_returns_not_retained))`` +specifies that the object is returned at ``+0`` and the ownership remains with +the callee. +The annotation ``__attribute__((ns_consumes_self))`` specifies that +the Objective-C method call consumes the reference to ``self``, e.g. by +attaching it to a supplied parameter. +Additionally, parameters can have an annotation +``__attribute__((ns_consumed))``, which specifies that passing an owned object +as that parameter effectively transfers the ownership, and the caller is no +longer responsible for it. +These attributes affect code generation when interacting with ARC code, and +they are used by the Clang Static Analyzer. + +In C programs using CoreFoundation, a similar set of attributes: +``__attribute__((cf_returns_not_retained))``, +``__attribute__((cf_returns_retained))`` and ``__attribute__((cf_consumed))`` +have the same respective semantics when applied to CoreFoundation objects. +These attributes affect code generation when interacting with ARC code, and +they are used by the Clang Static Analyzer. + +Finally, in C++ interacting with XNU kernel (objects inheriting from OSObject), +the same attribute family is present: +``__attribute__((os_returns_not_retained))``, +``__attribute__((os_returns_retained))`` and ``__attribute__((os_consumed))``, +with the same respective semantics. +Similar to ``__attribute__((ns_consumes_self))``, +``__attribute__((os_consumes_this))`` specifies that the method call consumes +the reference to "this" (e.g., when attaching it to a different object supplied +as a parameter). +Out parameters (parameters the function is meant to write into, +either via pointers-to-pointers or references-to-pointers) +may be annotated with ``__attribute__((os_returns_retained))`` +or ``__attribute__((os_returns_not_retained))`` which specifies that the object +written into the out parameter should (or respectively should not) be released +after use. +Since often out parameters may or may not be written depending on the exit +code of the function, +annotations ``__attribute__((os_returns_retained_on_zero))`` +and ``__attribute__((os_returns_retained_on_non_zero))`` specify that +an out parameter at ``+1`` is written if and only if the function returns a zero +(respectively non-zero) error code. +Observe that return-code-dependent out parameter annotations are only +available for retained out parameters, as non-retained object do not have to be +released by the callee. +These attributes are only used by the Clang Static Analyzer. + +The family of attributes ``X_returns_X_retained`` can be added to functions, +C++ methods, and Objective-C methods and properties. +Attributes ``X_consumed`` can be added to parameters of methods, functions, +and Objective-C methods. + + +overloadable +------------ +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``overloadable``","``clang::overloadable``","``clang::overloadable``","","","","Yes" + +Clang provides support for C++ function overloading in C. Function overloading +in C is introduced using the ``overloadable`` attribute. For example, one +might provide several overloaded versions of a ``tgsin`` function that invokes +the appropriate standard function computing the sine of a value with ``float``, +``double``, or ``long double`` precision: + +.. code-block:: c + + #include + float __attribute__((overloadable)) tgsin(float x) { return sinf(x); } + double __attribute__((overloadable)) tgsin(double x) { return sin(x); } + long double __attribute__((overloadable)) tgsin(long double x) { return sinl(x); } + +Given these declarations, one can call ``tgsin`` with a ``float`` value to +receive a ``float`` result, with a ``double`` to receive a ``double`` result, +etc. Function overloading in C follows the rules of C++ function overloading +to pick the best overload given the call arguments, with a few C-specific +semantics: + +* Conversion from ``float`` or ``double`` to ``long double`` is ranked as a + floating-point promotion (per C99) rather than as a floating-point conversion + (as in C++). + +* A conversion from a pointer of type ``T*`` to a pointer of type ``U*`` is + considered a pointer conversion (with conversion rank) if ``T`` and ``U`` are + compatible types. + +* A conversion from type ``T`` to a value of type ``U`` is permitted if ``T`` + and ``U`` are compatible types. This conversion is given "conversion" rank. + +* If no viable candidates are otherwise available, we allow a conversion from a + pointer of type ``T*`` to a pointer of type ``U*``, where ``T`` and ``U`` are + incompatible. This conversion is ranked below all other types of conversions. + Please note: ``U`` lacking qualifiers that are present on ``T`` is sufficient + for ``T`` and ``U`` to be incompatible. + +The declaration of ``overloadable`` functions is restricted to function +declarations and definitions. If a function is marked with the ``overloadable`` +attribute, then all declarations and definitions of functions with that name, +except for at most one (see the note below about unmarked overloads), must have +the ``overloadable`` attribute. In addition, redeclarations of a function with +the ``overloadable`` attribute must have the ``overloadable`` attribute, and +redeclarations of a function without the ``overloadable`` attribute must *not* +have the ``overloadable`` attribute. e.g., + +.. code-block:: c + + int f(int) __attribute__((overloadable)); + float f(float); // error: declaration of "f" must have the "overloadable" attribute + int f(int); // error: redeclaration of "f" must have the "overloadable" attribute + + int g(int) __attribute__((overloadable)); + int g(int) { } // error: redeclaration of "g" must also have the "overloadable" attribute + + int h(int); + int h(int) __attribute__((overloadable)); // error: declaration of "h" must not + // have the "overloadable" attribute + +Functions marked ``overloadable`` must have prototypes. Therefore, the +following code is ill-formed: + +.. code-block:: c + + int h() __attribute__((overloadable)); // error: h does not have a prototype + +However, ``overloadable`` functions are allowed to use a ellipsis even if there +are no named parameters (as is permitted in C++). This feature is particularly +useful when combined with the ``unavailable`` attribute: + +.. code-block:: c++ + + void honeypot(...) __attribute__((overloadable, unavailable)); // calling me is an error + +Functions declared with the ``overloadable`` attribute have their names mangled +according to the same rules as C++ function names. For example, the three +``tgsin`` functions in our motivating example get the mangled names +``_Z5tgsinf``, ``_Z5tgsind``, and ``_Z5tgsine``, respectively. There are two +caveats to this use of name mangling: + +* Future versions of Clang may change the name mangling of functions overloaded + in C, so you should not depend on an specific mangling. To be completely + safe, we strongly urge the use of ``static inline`` with ``overloadable`` + functions. + +* The ``overloadable`` attribute has almost no meaning when used in C++, + because names will already be mangled and functions are already overloadable. + However, when an ``overloadable`` function occurs within an ``extern "C"`` + linkage specification, it's name *will* be mangled in the same way as it + would in C. + +For the purpose of backwards compatibility, at most one function with the same +name as other ``overloadable`` functions may omit the ``overloadable`` +attribute. In this case, the function without the ``overloadable`` attribute +will not have its name mangled. + +For example: + +.. code-block:: c + + // Notes with mangled names assume Itanium mangling. + int f(int); + int f(double) __attribute__((overloadable)); + void foo() { + f(5); // Emits a call to f (not _Z1fi, as it would with an overload that + // was marked with overloadable). + f(1.0); // Emits a call to _Z1fd. + } + +Support for unmarked overloads is not present in some versions of clang. You may +query for it using ``__has_extension(overloadable_unmarked)``. + +Query for this attribute with ``__has_attribute(overloadable)``. + + +reinitializes +------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``reinitializes``","``clang::reinitializes``","","","","","" + +The ``reinitializes`` attribute can be applied to a non-static, non-const C++ +member function to indicate that this member function reinitializes the entire +object to a known state, independent of the previous state of the object. + +This attribute can be interpreted by static analyzers that warn about uses of an +object that has been left in an indeterminate state by a move operation. If a +member function marked with the ``reinitializes`` attribute is called on a +moved-from object, the analyzer can conclude that the object is no longer in an +indeterminate state. + +A typical example where this attribute would be used is on functions that clear +a container class: + +.. code-block:: c++ + + template + class Container { + public: + ... + [[clang::reinitializes]] void Clear(); + ... + }; + + +release_capability, release_shared_capability +--------------------------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``release_capability`` |br| ``release_shared_capability`` |br| ``release_generic_capability`` |br| ``unlock_function``","``clang::release_capability`` |br| ``clang::release_shared_capability`` |br| ``clang::release_generic_capability`` |br| ``clang::unlock_function``","","","","","" + +Marks a function as releasing a capability. + + +short_call, near +---------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``short_call`` |br| ``near``","``gnu::short_call`` |br| ``gnu::near``","","","","","Yes" + +Clang supports the ``__attribute__((long_call))``, ``__attribute__((far))``, +``__attribute__((short__call))``, and ``__attribute__((near))`` attributes +on MIPS targets. These attributes may only be added to function declarations +and change the code generated by the compiler when directly calling +the function. The ``short_call`` and ``near`` attributes are synonyms and +allow calls to the function to be made using the ``jal`` instruction, which +requires the function to be located in the same naturally aligned 256MB segment +as the caller. The ``long_call`` and ``far`` attributes are synonyms and +require the use of a different call sequence that works regardless +of the distance between the functions. + +These attributes have no effect for position-independent code. + +These attributes take priority over command line switches such +as ``-mlong-calls`` and ``-mno-long-calls``. + + +signal +------ +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``signal``","``gnu::signal``","","","","","Yes" + +Clang supports the GNU style ``__attribute__((signal))`` attribute on +AVR targets. This attribute may be attached to a function definition and instructs +the backend to generate appropriate function entry/exit code so that it can be used +directly as an interrupt service routine. + +Interrupt handler functions defined with the signal attribute do not re-enable interrupts. + + +speculative_load_hardening +-------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``speculative_load_hardening``","``clang::speculative_load_hardening``","``clang::speculative_load_hardening``","","","","Yes" + +This attribute can be applied to a function declaration in order to indicate + that `Speculative Load Hardening `_ + should be enabled for the function body. This can also be applied to a method + in Objective C. + + Speculative Load Hardening is a best-effort mitigation against + information leak attacks that make use of control flow + miss-speculation - specifically miss-speculation of whether a branch + is taken or not. Typically vulnerabilities enabling such attacks are + classified as "Spectre variant #1". Notably, this does not attempt to + mitigate against miss-speculation of branch target, classified as + "Spectre variant #2" vulnerabilities. + + When inlining, the attribute is sticky. Inlining a function that + carries this attribute will cause the caller to gain the + attribute. This is intended to provide a maximally conservative model + where the code in a function annotated with this attribute will always + (even after inlining) end up hardened. + + +target +------ +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``target``","``gnu::target``","","","","","Yes" + +Clang supports the GNU style ``__attribute__((target("OPTIONS")))`` attribute. +This attribute may be attached to a function definition and instructs +the backend to use different code generation options than were passed on the +command line. + +The current set of options correspond to the existing "subtarget features" for +the target with or without a "-mno-" in front corresponding to the absence +of the feature, as well as ``arch="CPU"`` which will change the default "CPU" +for the function. + +Example "subtarget features" from the x86 backend include: "mmx", "sse", "sse4.2", +"avx", "xop" and largely correspond to the machine specific options handled by +the front end. + +Additionally, this attribute supports function multiversioning for ELF based +x86/x86-64 targets, which can be used to create multiple implementations of the +same function that will be resolved at runtime based on the priority of their +``target`` attribute strings. A function is considered a multiversioned function +if either two declarations of the function have different ``target`` attribute +strings, or if it has a ``target`` attribute string of ``default``. For +example: + + .. code-block:: c++ + + __attribute__((target("arch=atom"))) + void foo() {} // will be called on 'atom' processors. + __attribute__((target("default"))) + void foo() {} // will be called on any other processors. + +All multiversioned functions must contain a ``default`` (fallback) +implementation, otherwise usages of the function are considered invalid. +Additionally, a function may not become multiversioned after its first use. + + +try_acquire_capability, try_acquire_shared_capability +----------------------------------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``try_acquire_capability`` |br| ``try_acquire_shared_capability``","``clang::try_acquire_capability`` |br| ``clang::try_acquire_shared_capability``","","","","","" + +Marks a function that attempts to acquire a capability. This function may fail to +actually acquire the capability; they accept a Boolean value determining +whether acquiring the capability means success (true), or failing to acquire +the capability means success (false). + + +xray_always_instrument, xray_never_instrument, xray_log_args +------------------------------------------------------------ +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``xray_always_instrument`` |br| ``xray_never_instrument``","``clang::xray_always_instrument`` |br| ``clang::xray_never_instrument``","``clang::xray_always_instrument`` |br| ``clang::xray_never_instrument``","","","","Yes" + +``__attribute__((xray_always_instrument))`` or ``[[clang::xray_always_instrument]]`` is used to mark member functions (in C++), methods (in Objective C), and free functions (in C, C++, and Objective C) to be instrumented with XRay. This will cause the function to always have space at the beginning and exit points to allow for runtime patching. + +Conversely, ``__attribute__((xray_never_instrument))`` or ``[[clang::xray_never_instrument]]`` will inhibit the insertion of these instrumentation points. + +If a function has neither of these attributes, they become subject to the XRay heuristics used to determine whether a function should be instrumented or otherwise. + +``__attribute__((xray_log_args(N)))`` or ``[[clang::xray_log_args(N)]]`` is used to preserve N function arguments for the logging function. Currently, only N==1 is supported. + + +xray_always_instrument, xray_never_instrument, xray_log_args +------------------------------------------------------------ +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``xray_log_args``","``clang::xray_log_args``","``clang::xray_log_args``","","","","Yes" + +``__attribute__((xray_always_instrument))`` or ``[[clang::xray_always_instrument]]`` is used to mark member functions (in C++), methods (in Objective C), and free functions (in C, C++, and Objective C) to be instrumented with XRay. This will cause the function to always have space at the beginning and exit points to allow for runtime patching. + +Conversely, ``__attribute__((xray_never_instrument))`` or ``[[clang::xray_never_instrument]]`` will inhibit the insertion of these instrumentation points. + +If a function has neither of these attributes, they become subject to the XRay heuristics used to determine whether a function should be instrumented or otherwise. + +``__attribute__((xray_log_args(N)))`` or ``[[clang::xray_log_args(N)]]`` is used to preserve N function arguments for the logging function. Currently, only N==1 is supported. + + +Variable Attributes +=================== + + +always_destroy +-------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``always_destroy``","``clang::always_destroy``","","","","","Yes" + +The ``always_destroy`` attribute specifies that a variable with static or thread +storage duration should have its exit-time destructor run. This attribute is the +default unless clang was invoked with -fno-c++-static-destructors. + + +dllexport +--------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``dllexport``","``gnu::dllexport``","","``dllexport``","","","Yes" + +The ``__declspec(dllexport)`` attribute declares a variable, function, or +Objective-C interface to be exported from the module. It is available under the +``-fdeclspec`` flag for compatibility with various compilers. The primary use +is for COFF object files which explicitly specify what interfaces are available +for external use. See the dllexport_ documentation on MSDN for more +information. + +.. _dllexport: https://msdn.microsoft.com/en-us/library/3y1sfaz2.aspx + + +dllimport +--------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``dllimport``","``gnu::dllimport``","","``dllimport``","","","Yes" + +The ``__declspec(dllimport)`` attribute declares a variable, function, or +Objective-C interface to be imported from an external module. It is available +under the ``-fdeclspec`` flag for compatibility with various compilers. The +primary use is for COFF object files which explicitly specify what interfaces +are imported from external modules. See the dllimport_ documentation on MSDN +for more information. + +.. _dllimport: https://msdn.microsoft.com/en-us/library/3y1sfaz2.aspx + + +init_seg +-------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "","","","","","``init_seg``","" + +The attribute applied by ``pragma init_seg()`` controls the section into +which global initialization function pointers are emitted. It is only +available with ``-fms-extensions``. Typically, this function pointer is +emitted into ``.CRT$XCU`` on Windows. The user can change the order of +initialization by using a different section name with the same +``.CRT$XC`` prefix and a suffix that sorts lexicographically before or +after the standard ``.CRT$XCU`` sections. See the init_seg_ +documentation on MSDN for more information. + +.. _init_seg: http://msdn.microsoft.com/en-us/library/7977wcck(v=vs.110).aspx + + +maybe_unused, unused +-------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``unused``","``maybe_unused`` |br| ``gnu::unused``","``maybe_unused``","","","","" + +When passing the ``-Wunused`` flag to Clang, entities that are unused by the +program may be diagnosed. The ``[[maybe_unused]]`` (or +``__attribute__((unused))``) attribute can be used to silence such diagnostics +when the entity cannot be removed. For instance, a local variable may exist +solely for use in an ``assert()`` statement, which makes the local variable +unused when ``NDEBUG`` is defined. + +The attribute may be applied to the declaration of a class, a typedef, a +variable, a function or method, a function parameter, an enumeration, an +enumerator, a non-static data member, or a label. + +.. code-block: c++ + #include + + [[maybe_unused]] void f([[maybe_unused]] bool thing1, + [[maybe_unused]] bool thing2) { + [[maybe_unused]] bool b = thing1 && thing2; + assert(b); + } + + +no_destroy +---------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``no_destroy``","``clang::no_destroy``","","","","","Yes" + +The ``no_destroy`` attribute specifies that a variable with static or thread +storage duration shouldn't have its exit-time destructor run. Annotating every +static and thread duration variable with this attribute is equivalent to +invoking clang with -fno-c++-static-destructors. + + +nodebug +------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``nodebug``","``gnu::nodebug``","","","","","Yes" + +The ``nodebug`` attribute allows you to suppress debugging information for a +function or method, or for a variable that is not a parameter or a non-static +data member. + + +noescape +-------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``noescape``","``clang::noescape``","``clang::noescape``","","","","Yes" + +``noescape`` placed on a function parameter of a pointer type is used to inform +the compiler that the pointer cannot escape: that is, no reference to the object +the pointer points to that is derived from the parameter value will survive +after the function returns. Users are responsible for making sure parameters +annotated with ``noescape`` do not actuallly escape. + +For example: + +.. code-block:: c + + int *gp; + + void nonescapingFunc(__attribute__((noescape)) int *p) { + *p += 100; // OK. + } + + void escapingFunc(__attribute__((noescape)) int *p) { + gp = p; // Not OK. + } + +Additionally, when the parameter is a `block pointer +`, the same restriction +applies to copies of the block. For example: + +.. code-block:: c + + typedef void (^BlockTy)(); + BlockTy g0, g1; + + void nonescapingFunc(__attribute__((noescape)) BlockTy block) { + block(); // OK. + } + + void escapingFunc(__attribute__((noescape)) BlockTy block) { + g0 = block; // Not OK. + g1 = Block_copy(block); // Not OK either. + } + + +nosvm +----- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``nosvm``","","","","","","Yes" + +OpenCL 2.0 supports the optional ``__attribute__((nosvm))`` qualifier for +pointer variable. It informs the compiler that the pointer does not refer +to a shared virtual memory region. See OpenCL v2.0 s6.7.2 for details. + +Since it is not widely used and has been removed from OpenCL 2.1, it is ignored +by Clang. + + +objc_externally_retained +------------------------ +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``objc_externally_retained``","``clang::objc_externally_retained``","``clang::objc_externally_retained``","","","","Yes" + +The ``objc_externally_retained`` attribute can be applied to strong local +variables, functions, methods, or blocks to opt into +`externally-retained semantics +`_. + +When applied to the definition of a function, method, or block, every parameter +of the function with implicit strong retainable object pointer type is +considered externally-retained, and becomes ``const``. By explicitly annotating +a parameter with ``__strong``, you can opt back into the default +non-externally-retained behaviour for that parameter. For instance, +``first_param`` is externally-retained below, but not ``second_param``: + +.. code-block:: objc + + __attribute__((objc_externally_retained)) + void f(NSArray *first_param, __strong NSArray *second_param) { + // ... + } + +Likewise, when applied to a strong local variable, that variable becomes +``const`` and is considered externally-retained. + +When compiled without ``-fobjc-arc``, this attribute is ignored. + + +pass_object_size +---------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``pass_object_size``","``clang::pass_object_size``","``clang::pass_object_size``","","","","Yes" + +.. Note:: The mangling of functions with parameters that are annotated with + ``pass_object_size`` is subject to change. You can get around this by + using ``__asm__("foo")`` to explicitly name your functions, thus preserving + your ABI; also, non-overloadable C functions with ``pass_object_size`` are + not mangled. + +The ``pass_object_size(Type)`` attribute can be placed on function parameters to +instruct clang to call ``__builtin_object_size(param, Type)`` at each callsite +of said function, and implicitly pass the result of this call in as an invisible +argument of type ``size_t`` directly after the parameter annotated with +``pass_object_size``. Clang will also replace any calls to +``__builtin_object_size(param, Type)`` in the function by said implicit +parameter. + +Example usage: + +.. code-block:: c + + int bzero1(char *const p __attribute__((pass_object_size(0)))) + __attribute__((noinline)) { + int i = 0; + for (/**/; i < (int)__builtin_object_size(p, 0); ++i) { + p[i] = 0; + } + return i; + } + + int main() { + char chars[100]; + int n = bzero1(&chars[0]); + assert(n == sizeof(chars)); + return 0; + } + +If successfully evaluating ``__builtin_object_size(param, Type)`` at the +callsite is not possible, then the "failed" value is passed in. So, using the +definition of ``bzero1`` from above, the following code would exit cleanly: + +.. code-block:: c + + int main2(int argc, char *argv[]) { + int n = bzero1(argv); + assert(n == -1); + return 0; + } + +``pass_object_size`` plays a part in overload resolution. If two overload +candidates are otherwise equally good, then the overload with one or more +parameters with ``pass_object_size`` is preferred. This implies that the choice +between two identical overloads both with ``pass_object_size`` on one or more +parameters will always be ambiguous; for this reason, having two such overloads +is illegal. For example: + +.. code-block:: c++ + + #define PS(N) __attribute__((pass_object_size(N))) + // OK + void Foo(char *a, char *b); // Overload A + // OK -- overload A has no parameters with pass_object_size. + void Foo(char *a PS(0), char *b PS(0)); // Overload B + // Error -- Same signature (sans pass_object_size) as overload B, and both + // overloads have one or more parameters with the pass_object_size attribute. + void Foo(void *a PS(0), void *b); + + // OK + void Bar(void *a PS(0)); // Overload C + // OK + void Bar(char *c PS(1)); // Overload D + + void main() { + char known[10], *unknown; + Foo(unknown, unknown); // Calls overload B + Foo(known, unknown); // Calls overload B + Foo(unknown, known); // Calls overload B + Foo(known, known); // Calls overload B + + Bar(known); // Calls overload D + Bar(unknown); // Calls overload D + } + +Currently, ``pass_object_size`` is a bit restricted in terms of its usage: + +* Only one use of ``pass_object_size`` is allowed per parameter. + +* It is an error to take the address of a function with ``pass_object_size`` on + any of its parameters. If you wish to do this, you can create an overload + without ``pass_object_size`` on any parameters. + +* It is an error to apply the ``pass_object_size`` attribute to parameters that + are not pointers. Additionally, any parameter that ``pass_object_size`` is + applied to must be marked ``const`` at its function's definition. + + +require_constant_initialization +------------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``require_constant_initialization``","``clang::require_constant_initialization``","","","","","Yes" + +This attribute specifies that the variable to which it is attached is intended +to have a `constant initializer `_ +according to the rules of [basic.start.static]. The variable is required to +have static or thread storage duration. If the initialization of the variable +is not a constant initializer an error will be produced. This attribute may +only be used in C++. + +Note that in C++03 strict constant expression checking is not done. Instead +the attribute reports if Clang can emit the variable as a constant, even if it's +not technically a 'constant initializer'. This behavior is non-portable. + +Static storage duration variables with constant initializers avoid hard-to-find +bugs caused by the indeterminate order of dynamic initialization. They can also +be safely used during dynamic initialization across translation units. + +This attribute acts as a compile time assertion that the requirements +for constant initialization have been met. Since these requirements change +between dialects and have subtle pitfalls it's important to fail fast instead +of silently falling back on dynamic initialization. + +.. code-block:: c++ + + // -std=c++14 + #define SAFE_STATIC [[clang::require_constant_initialization]] + struct T { + constexpr T(int) {} + ~T(); // non-trivial + }; + SAFE_STATIC T x = {42}; // Initialization OK. Doesn't check destructor. + SAFE_STATIC T y = 42; // error: variable does not have a constant initializer + // copy initialization is not a constant expression on a non-literal type. + + +section, __declspec(allocate) +----------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``section``","``gnu::section``","","``allocate``","","","Yes" + +The ``section`` attribute allows you to specify a specific section a +global variable or function should be in after translation. + + +swift_context +------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``swift_context``","``clang::swift_context``","``clang::swift_context``","","","","Yes" + +The ``swift_context`` attribute marks a parameter of a ``swiftcall`` +function as having the special context-parameter ABI treatment. + +This treatment generally passes the context value in a special register +which is normally callee-preserved. + +A ``swift_context`` parameter must either be the last parameter or must be +followed by a ``swift_error_result`` parameter (which itself must always be +the last parameter). + +A context parameter must have pointer or reference type. + + +swift_error_result +------------------ +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``swift_error_result``","``clang::swift_error_result``","``clang::swift_error_result``","","","","Yes" + +The ``swift_error_result`` attribute marks a parameter of a ``swiftcall`` +function as having the special error-result ABI treatment. + +This treatment generally passes the underlying error value in and out of +the function through a special register which is normally callee-preserved. +This is modeled in C by pretending that the register is addressable memory: + +- The caller appears to pass the address of a variable of pointer type. + The current value of this variable is copied into the register before + the call; if the call returns normally, the value is copied back into the + variable. + +- The callee appears to receive the address of a variable. This address + is actually a hidden location in its own stack, initialized with the + value of the register upon entry. When the function returns normally, + the value in that hidden location is written back to the register. + +A ``swift_error_result`` parameter must be the last parameter, and it must be +preceded by a ``swift_context`` parameter. + +A ``swift_error_result`` parameter must have type ``T**`` or ``T*&`` for some +type T. Note that no qualifiers are permitted on the intermediate level. + +It is undefined behavior if the caller does not pass a pointer or +reference to a valid object. + +The standard convention is that the error value itself (that is, the +value stored in the apparent argument) will be null upon function entry, +but this is not enforced by the ABI. + + +swift_indirect_result +--------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``swift_indirect_result``","``clang::swift_indirect_result``","``clang::swift_indirect_result``","","","","Yes" + +The ``swift_indirect_result`` attribute marks a parameter of a ``swiftcall`` +function as having the special indirect-result ABI treatment. + +This treatment gives the parameter the target's normal indirect-result +ABI treatment, which may involve passing it differently from an ordinary +parameter. However, only the first indirect result will receive this +treatment. Furthermore, low-level lowering may decide that a direct result +must be returned indirectly; if so, this will take priority over the +``swift_indirect_result`` parameters. + +A ``swift_indirect_result`` parameter must either be the first parameter or +follow another ``swift_indirect_result`` parameter. + +A ``swift_indirect_result`` parameter must have type ``T*`` or ``T&`` for +some object type ``T``. If ``T`` is a complete type at the point of +definition of a function, it is undefined behavior if the argument +value does not point to storage of adequate size and alignment for a +value of type ``T``. + +Making indirect results explicit in the signature allows C functions to +directly construct objects into them without relying on language +optimizations like C++'s named return value optimization (NRVO). + + +swiftcall +--------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``swiftcall``","``clang::swiftcall``","``clang::swiftcall``","","","","" + +The ``swiftcall`` attribute indicates that a function should be called +using the Swift calling convention for a function or function pointer. + +The lowering for the Swift calling convention, as described by the Swift +ABI documentation, occurs in multiple phases. The first, "high-level" +phase breaks down the formal parameters and results into innately direct +and indirect components, adds implicit paraameters for the generic +signature, and assigns the context and error ABI treatments to parameters +where applicable. The second phase breaks down the direct parameters +and results from the first phase and assigns them to registers or the +stack. The ``swiftcall`` convention only handles this second phase of +lowering; the C function type must accurately reflect the results +of the first phase, as follows: + +- Results classified as indirect by high-level lowering should be + represented as parameters with the ``swift_indirect_result`` attribute. + +- Results classified as direct by high-level lowering should be represented + as follows: + + - First, remove any empty direct results. + + - If there are no direct results, the C result type should be ``void``. + + - If there is one direct result, the C result type should be a type with + the exact layout of that result type. + + - If there are a multiple direct results, the C result type should be + a struct type with the exact layout of a tuple of those results. + +- Parameters classified as indirect by high-level lowering should be + represented as parameters of pointer type. + +- Parameters classified as direct by high-level lowering should be + omitted if they are empty types; otherwise, they should be represented + as a parameter type with a layout exactly matching the layout of the + Swift parameter type. + +- The context parameter, if present, should be represented as a trailing + parameter with the ``swift_context`` attribute. + +- The error result parameter, if present, should be represented as a + trailing parameter (always following a context parameter) with the + ``swift_error_result`` attribute. + +``swiftcall`` does not support variadic arguments or unprototyped functions. + +The parameter ABI treatment attributes are aspects of the function type. +A function type which which applies an ABI treatment attribute to a +parameter is a different type from an otherwise-identical function type +that does not. A single parameter may not have multiple ABI treatment +attributes. + +Support for this feature is target-dependent, although it should be +supported on every target that Swift supports. Query for this support +with ``__has_attribute(swiftcall)``. This implies support for the +``swift_context``, ``swift_error_result``, and ``swift_indirect_result`` +attributes. + + +thread +------ +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "","","","``thread``","","","" + +The ``__declspec(thread)`` attribute declares a variable with thread local +storage. It is available under the ``-fms-extensions`` flag for MSVC +compatibility. See the documentation for `__declspec(thread)`_ on MSDN. + +.. _`__declspec(thread)`: http://msdn.microsoft.com/en-us/library/9w1sdazb.aspx + +In Clang, ``__declspec(thread)`` is generally equivalent in functionality to the +GNU ``__thread`` keyword. The variable must not have a destructor and must have +a constant initializer, if any. The attribute only applies to variables +declared with static storage duration, such as globals, class static data +members, and static locals. + + +tls_model +--------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``tls_model``","``gnu::tls_model``","","","","","Yes" + +The ``tls_model`` attribute allows you to specify which thread-local storage +model to use. It accepts the following strings: + +* global-dynamic +* local-dynamic +* initial-exec +* local-exec + +TLS models are mutually exclusive. + + +trivial_abi +----------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``trivial_abi``","``clang::trivial_abi``","","","","","Yes" + +The ``trivial_abi`` attribute can be applied to a C++ class, struct, or union. +It instructs the compiler to pass and return the type using the C ABI for the +underlying type when the type would otherwise be considered non-trivial for the +purpose of calls. +A class annotated with `trivial_abi` can have non-trivial destructors or copy/move constructors without automatically becoming non-trivial for the purposes of calls. For example: + + .. code-block:: c++ + + // A is trivial for the purposes of calls because `trivial_abi` makes the + // user-provided special functions trivial. + struct __attribute__((trivial_abi)) A { + ~A(); + A(const A &); + A(A &&); + int x; + }; + + // B's destructor and copy/move constructor are considered trivial for the + // purpose of calls because A is trivial. + struct B { + A a; + }; + +If a type is trivial for the purposes of calls, has a non-trivial destructor, +and is passed as an argument by value, the convention is that the callee will +destroy the object before returning. + +Attribute ``trivial_abi`` has no effect in the following cases: + +- The class directly declares a virtual base or virtual methods. +- The class has a base class that is non-trivial for the purposes of calls. +- The class has a non-static data member whose type is non-trivial for the purposes of calls, which includes: + + - classes that are non-trivial for the purposes of calls + - __weak-qualified types in Objective-C++ + - arrays of any of the above + + +uninitialized +------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``uninitialized``","``clang::uninitialized``","","","","","" + +The command-line parameter ``-ftrivial-auto-var-init=*`` can be used to +initialize trivial automatic stack variables. By default, trivial automatic +stack variables are uninitialized. This attribute is used to override the +command-line parameter, forcing variables to remain uninitialized. It has no +semantic meaning in that using uninitialized values is undefined behavior, +it rather documents the programmer's intent. + + +Type Attributes +=============== + + +__single_inhertiance, __multiple_inheritance, __virtual_inheritance +------------------------------------------------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "","","","","``__single_inheritance`` |br| ``__multiple_inheritance`` |br| ``__virtual_inheritance`` |br| ``__unspecified_inheritance``","","" + +This collection of keywords is enabled under ``-fms-extensions`` and controls +the pointer-to-member representation used on ``*-*-win32`` targets. + +The ``*-*-win32`` targets utilize a pointer-to-member representation which +varies in size and alignment depending on the definition of the underlying +class. + +However, this is problematic when a forward declaration is only available and +no definition has been made yet. In such cases, Clang is forced to utilize the +most general representation that is available to it. + +These keywords make it possible to use a pointer-to-member representation other +than the most general one regardless of whether or not the definition will ever +be present in the current translation unit. + +This family of keywords belong between the ``class-key`` and ``class-name``: + +.. code-block:: c++ + + struct __single_inheritance S; + int S::*i; + struct S {}; + +This keyword can be applied to class templates but only has an effect when used +on full specializations: + +.. code-block:: c++ + + template struct __single_inheritance A; // warning: inheritance model ignored on primary template + template struct __multiple_inheritance A; // warning: inheritance model ignored on partial specialization + template <> struct __single_inheritance A; + +Note that choosing an inheritance model less general than strictly necessary is +an error: + +.. code-block:: c++ + + struct __multiple_inheritance S; // error: inheritance model does not match definition + int S::*i; + struct S {}; + + +align_value +----------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``align_value``","","","","","","Yes" + +The align_value attribute can be added to the typedef of a pointer type or the +declaration of a variable of pointer or reference type. It specifies that the +pointer will point to, or the reference will bind to, only objects with at +least the provided alignment. This alignment value must be some positive power +of 2. + + .. code-block:: c + + typedef double * aligned_double_ptr __attribute__((align_value(64))); + void foo(double & x __attribute__((align_value(128)), + aligned_double_ptr y) { ... } + +If the pointer value does not have the specified alignment at runtime, the +behavior of the program is undefined. + + +empty_bases +----------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "","","","``empty_bases``","","","" + +The empty_bases attribute permits the compiler to utilize the +empty-base-optimization more frequently. +This attribute only applies to struct, class, and union types. +It is only supported when using the Microsoft C++ ABI. + + +enum_extensibility +------------------ +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``enum_extensibility``","``clang::enum_extensibility``","``clang::enum_extensibility``","","","","Yes" + +Attribute ``enum_extensibility`` is used to distinguish between enum definitions +that are extensible and those that are not. The attribute can take either +``closed`` or ``open`` as an argument. ``closed`` indicates a variable of the +enum type takes a value that corresponds to one of the enumerators listed in the +enum definition or, when the enum is annotated with ``flag_enum``, a value that +can be constructed using values corresponding to the enumerators. ``open`` +indicates a variable of the enum type can take any values allowed by the +standard and instructs clang to be more lenient when issuing warnings. + +.. code-block:: c + + enum __attribute__((enum_extensibility(closed))) ClosedEnum { + A0, A1 + }; + + enum __attribute__((enum_extensibility(open))) OpenEnum { + B0, B1 + }; + + enum __attribute__((enum_extensibility(closed),flag_enum)) ClosedFlagEnum { + C0 = 1 << 0, C1 = 1 << 1 + }; + + enum __attribute__((enum_extensibility(open),flag_enum)) OpenFlagEnum { + D0 = 1 << 0, D1 = 1 << 1 + }; + + void foo1() { + enum ClosedEnum ce; + enum OpenEnum oe; + enum ClosedFlagEnum cfe; + enum OpenFlagEnum ofe; + + ce = A1; // no warnings + ce = 100; // warning issued + oe = B1; // no warnings + oe = 100; // no warnings + cfe = C0 | C1; // no warnings + cfe = C0 | C1 | 4; // warning issued + ofe = D0 | D1; // no warnings + ofe = D0 | D1 | 4; // no warnings + } + + +flag_enum +--------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``flag_enum``","``clang::flag_enum``","``clang::flag_enum``","","","","Yes" + +This attribute can be added to an enumerator to signal to the compiler that it +is intended to be used as a flag type. This will cause the compiler to assume +that the range of the type includes all of the values that you can get by +manipulating bits of the enumerator when issuing warnings. + + +layout_version +-------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "","","","``layout_version``","","","" + +The layout_version attribute requests that the compiler utilize the class +layout rules of a particular compiler version. +This attribute only applies to struct, class, and union types. +It is only supported when using the Microsoft C++ ABI. + + +lto_visibility_public +--------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``lto_visibility_public``","``clang::lto_visibility_public``","``clang::lto_visibility_public``","","","","Yes" + +See :doc:`LTOVisibility`. + + +noderef +------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``noderef``","``clang::noderef``","``clang::noderef``","","","","" + +The ``noderef`` attribute causes clang to diagnose dereferences of annotated pointer types. +This is ideally used with pointers that point to special memory which cannot be read +from or written to, but allowing for the pointer to be used in pointer arithmetic. +The following are examples of valid expressions where dereferences are diagnosed: + +.. code-block:: c + + int __attribute__((noderef)) *p; + int x = *p; // warning + + int __attribute__((noderef)) **p2; + x = **p2; // warning + + int * __attribute__((noderef)) *p3; + p = *p3; // warning + + struct S { + int a; + }; + struct S __attribute__((noderef)) *s; + x = s->a; // warning + x = (*s).a; // warning + +Not all dereferences may diagnose a warning if the value directed by the pointer may not be +accessed. The following are examples of valid expressions where may not be diagnosed: + +.. code-block:: c + + int *q; + int __attribute__((noderef)) *p; + q = &*p; + q = *&p; + + struct S { + int a; + }; + struct S __attribute__((noderef)) *s; + p = &s->a; + p = &(*s).a; + +``noderef`` is currently only supported for pointers and arrays and not usable for +references or Objective-C object pointers. + +.. code-block: c++ + + int x = 2; + int __attribute__((noderef)) &y = x; // warning: 'noderef' can only be used on an array or pointer type + +.. code-block: objc + + id __attribute__((noderef)) obj = [NSObject new]; // warning: 'noderef' can only be used on an array or pointer type + + +novtable +-------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "","","","``novtable``","","","" + +This attribute can be added to a class declaration or definition to signal to +the compiler that constructors and destructors will not reference the virtual +function table. It is only supported when using the Microsoft C++ ABI. + + +objc_subclassing_restricted +--------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``objc_subclassing_restricted``","``clang::objc_subclassing_restricted``","``clang::objc_subclassing_restricted``","","","","Yes" + +This attribute can be added to an Objective-C ``@interface`` declaration to +ensure that this class cannot be subclassed. + + +selectany +--------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``selectany``","``gnu::selectany``","","``selectany``","","","" + +This attribute appertains to a global symbol, causing it to have a weak +definition ( +`linkonce `_ +), allowing the linker to select any definition. + +For more information see +`gcc documentation `_ +or `msvc documentation `_. + + +transparent_union +----------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``transparent_union``","``gnu::transparent_union``","","","","","" + +This attribute can be applied to a union to change the behaviour of calls to +functions that have an argument with a transparent union type. The compiler +behaviour is changed in the following manner: + +- A value whose type is any member of the transparent union can be passed as an + argument without the need to cast that value. + +- The argument is passed to the function using the calling convention of the + first member of the transparent union. Consequently, all the members of the + transparent union should have the same calling convention as its first member. + +Transparent unions are not supported in C++. + + +Statement Attributes +==================== + + +#pragma clang loop +------------------ +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "","","","","","``clang loop`` |br| ``unroll`` |br| ``nounroll`` |br| ``unroll_and_jam`` |br| ``nounroll_and_jam``","" + +The ``#pragma clang loop`` directive allows loop optimization hints to be +specified for the subsequent loop. The directive allows pipelining to be +disabled, or vectorization, interleaving, and unrolling to be enabled or disabled. +Vector width, interleave count, unrolling count, and the initiation interval +for pipelining can be explicitly specified. See `language extensions +`_ +for details. + + +#pragma unroll, #pragma nounroll +-------------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "","","","","","``clang loop`` |br| ``unroll`` |br| ``nounroll`` |br| ``unroll_and_jam`` |br| ``nounroll_and_jam``","" + +Loop unrolling optimization hints can be specified with ``#pragma unroll`` and +``#pragma nounroll``. The pragma is placed immediately before a for, while, +do-while, or c++11 range-based for loop. + +Specifying ``#pragma unroll`` without a parameter directs the loop unroller to +attempt to fully unroll the loop if the trip count is known at compile time and +attempt to partially unroll the loop if the trip count is not known at compile +time: + +.. code-block:: c++ + + #pragma unroll + for (...) { + ... + } + +Specifying the optional parameter, ``#pragma unroll _value_``, directs the +unroller to unroll the loop ``_value_`` times. The parameter may optionally be +enclosed in parentheses: + +.. code-block:: c++ + + #pragma unroll 16 + for (...) { + ... + } + + #pragma unroll(16) + for (...) { + ... + } + +Specifying ``#pragma nounroll`` indicates that the loop should not be unrolled: + +.. code-block:: c++ + + #pragma nounroll + for (...) { + ... + } + +``#pragma unroll`` and ``#pragma unroll _value_`` have identical semantics to +``#pragma clang loop unroll(full)`` and +``#pragma clang loop unroll_count(_value_)`` respectively. ``#pragma nounroll`` +is equivalent to ``#pragma clang loop unroll(disable)``. See +`language extensions +`_ +for further details including limitations of the unroll hints. + + +__read_only, __write_only, __read_write (read_only, write_only, read_write) +--------------------------------------------------------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "","","","","``__read_only`` |br| ``read_only`` |br| ``__write_only`` |br| ``write_only`` |br| ``__read_write`` |br| ``read_write``","","" + +The access qualifiers must be used with image object arguments or pipe arguments +to declare if they are being read or written by a kernel or function. + +The read_only/__read_only, write_only/__write_only and read_write/__read_write +names are reserved for use as access qualifiers and shall not be used otherwise. + +.. code-block:: c + + kernel void + foo (read_only image2d_t imageA, + write_only image2d_t imageB) { + ... + } + +In the above example imageA is a read-only 2D image object, and imageB is a +write-only 2D image object. + +The read_write (or __read_write) qualifier can not be used with pipe. + +More details can be found in the OpenCL C language Spec v2.0, Section 6.6. + + +fallthrough +----------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "","``fallthrough`` |br| ``clang::fallthrough``","``fallthrough``","","","","" + +The ``fallthrough`` (or ``clang::fallthrough``) attribute is used +to annotate intentional fall-through +between switch labels. It can only be applied to a null statement placed at a +point of execution between any statement and the next switch label. It is +common to mark these places with a specific comment, but this attribute is +meant to replace comments with a more strict annotation, which can be checked +by the compiler. This attribute doesn't change semantics of the code and can +be used wherever an intended fall-through occurs. It is designed to mimic +control-flow statements like ``break;``, so it can be placed in most places +where ``break;`` can, but only if there are no statements on the execution path +between it and the next switch label. + +By default, Clang does not warn on unannotated fallthrough from one ``switch`` +case to another. Diagnostics on fallthrough without a corresponding annotation +can be enabled with the ``-Wimplicit-fallthrough`` argument. + +Here is an example: + +.. code-block:: c++ + + // compile with -Wimplicit-fallthrough + switch (n) { + case 22: + case 33: // no warning: no statements between case labels + f(); + case 44: // warning: unannotated fall-through + g(); + [[clang::fallthrough]]; + case 55: // no warning + if (x) { + h(); + break; + } + else { + i(); + [[clang::fallthrough]]; + } + case 66: // no warning + p(); + [[clang::fallthrough]]; // warning: fallthrough annotation does not + // directly precede case label + q(); + case 77: // warning: unannotated fall-through + r(); + } + + +intel_reqd_sub_group_size +------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``intel_reqd_sub_group_size``","","","","","","Yes" + +The optional attribute intel_reqd_sub_group_size can be used to indicate that +the kernel must be compiled and executed with the specified subgroup size. When +this attribute is present, get_max_sub_group_size() is guaranteed to return the +specified integer value. This is important for the correctness of many subgroup +algorithms, and in some cases may be used by the compiler to generate more optimal +code. See `cl_intel_required_subgroup_size +` +for details. + + +opencl_unroll_hint +------------------ +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``opencl_unroll_hint``","","","","","","" + +The opencl_unroll_hint attribute qualifier can be used to specify that a loop +(for, while and do loops) can be unrolled. This attribute qualifier can be +used to specify full unrolling or partial unrolling by a specified amount. +This is a compiler hint and the compiler may ignore this directive. See +`OpenCL v2.0 `_ +s6.11.5 for details. + + +suppress +-------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "","``gsl::suppress``","","","","","" + +The ``[[gsl::suppress]]`` attribute suppresses specific +clang-tidy diagnostics for rules of the `C++ Core Guidelines`_ in a portable +way. The attribute can be attached to declarations, statements, and at +namespace scope. + +.. code-block:: c++ + + [[gsl::suppress("Rh-public")]] + void f_() { + int *p; + [[gsl::suppress("type")]] { + p = reinterpret_cast(7); + } + } + namespace N { + [[clang::suppress("type", "bounds")]]; + ... + } + +.. _`C++ Core Guidelines`: https://github.com/isocpp/CppCoreGuidelines/blob/master/CppCoreGuidelines.md#inforce-enforcement + + +AMD GPU Attributes +================== + + +amdgpu_flat_work_group_size +--------------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``amdgpu_flat_work_group_size``","``clang::amdgpu_flat_work_group_size``","","","","","Yes" + +The flat work-group size is the number of work-items in the work-group size +specified when the kernel is dispatched. It is the product of the sizes of the +x, y, and z dimension of the work-group. + +Clang supports the +``__attribute__((amdgpu_flat_work_group_size(, )))`` attribute for the +AMDGPU target. This attribute may be attached to a kernel function definition +and is an optimization hint. + +```` parameter specifies the minimum flat work-group size, and ```` +parameter specifies the maximum flat work-group size (must be greater than +````) to which all dispatches of the kernel will conform. Passing ``0, 0`` +as ``, `` implies the default behavior (``128, 256``). + +If specified, the AMDGPU target backend might be able to produce better machine +code for barriers and perform scratch promotion by estimating available group +segment size. + +An error will be given if: + - Specified values violate subtarget specifications; + - Specified values are not compatible with values provided through other + attributes. + + +amdgpu_num_sgpr +--------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``amdgpu_num_sgpr``","``clang::amdgpu_num_sgpr``","","","","","Yes" + +Clang supports the ``__attribute__((amdgpu_num_sgpr()))`` and +``__attribute__((amdgpu_num_vgpr()))`` attributes for the AMDGPU +target. These attributes may be attached to a kernel function definition and are +an optimization hint. + +If these attributes are specified, then the AMDGPU target backend will attempt +to limit the number of SGPRs and/or VGPRs used to the specified value(s). The +number of used SGPRs and/or VGPRs may further be rounded up to satisfy the +allocation requirements or constraints of the subtarget. Passing ``0`` as +``num_sgpr`` and/or ``num_vgpr`` implies the default behavior (no limits). + +These attributes can be used to test the AMDGPU target backend. It is +recommended that the ``amdgpu_waves_per_eu`` attribute be used to control +resources such as SGPRs and VGPRs since it is aware of the limits for different +subtargets. + +An error will be given if: + - Specified values violate subtarget specifications; + - Specified values are not compatible with values provided through other + attributes; + - The AMDGPU target backend is unable to create machine code that can meet the + request. + + +amdgpu_num_vgpr +--------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``amdgpu_num_vgpr``","``clang::amdgpu_num_vgpr``","","","","","Yes" + +Clang supports the ``__attribute__((amdgpu_num_sgpr()))`` and +``__attribute__((amdgpu_num_vgpr()))`` attributes for the AMDGPU +target. These attributes may be attached to a kernel function definition and are +an optimization hint. + +If these attributes are specified, then the AMDGPU target backend will attempt +to limit the number of SGPRs and/or VGPRs used to the specified value(s). The +number of used SGPRs and/or VGPRs may further be rounded up to satisfy the +allocation requirements or constraints of the subtarget. Passing ``0`` as +``num_sgpr`` and/or ``num_vgpr`` implies the default behavior (no limits). + +These attributes can be used to test the AMDGPU target backend. It is +recommended that the ``amdgpu_waves_per_eu`` attribute be used to control +resources such as SGPRs and VGPRs since it is aware of the limits for different +subtargets. + +An error will be given if: + - Specified values violate subtarget specifications; + - Specified values are not compatible with values provided through other + attributes; + - The AMDGPU target backend is unable to create machine code that can meet the + request. + + +amdgpu_waves_per_eu +------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``amdgpu_waves_per_eu``","``clang::amdgpu_waves_per_eu``","","","","","Yes" + +A compute unit (CU) is responsible for executing the wavefronts of a work-group. +It is composed of one or more execution units (EU), which are responsible for +executing the wavefronts. An EU can have enough resources to maintain the state +of more than one executing wavefront. This allows an EU to hide latency by +switching between wavefronts in a similar way to symmetric multithreading on a +CPU. In order to allow the state for multiple wavefronts to fit on an EU, the +resources used by a single wavefront have to be limited. For example, the number +of SGPRs and VGPRs. Limiting such resources can allow greater latency hiding, +but can result in having to spill some register state to memory. + +Clang supports the ``__attribute__((amdgpu_waves_per_eu([, ])))`` +attribute for the AMDGPU target. This attribute may be attached to a kernel +function definition and is an optimization hint. + +```` parameter specifies the requested minimum number of waves per EU, and +*optional* ```` parameter specifies the requested maximum number of waves +per EU (must be greater than ```` if specified). If ```` is omitted, +then there is no restriction on the maximum number of waves per EU other than +the one dictated by the hardware for which the kernel is compiled. Passing +``0, 0`` as ``, `` implies the default behavior (no limits). + +If specified, this attribute allows an advanced developer to tune the number of +wavefronts that are capable of fitting within the resources of an EU. The AMDGPU +target backend can use this information to limit resources, such as number of +SGPRs, number of VGPRs, size of available group and private memory segments, in +such a way that guarantees that at least ```` wavefronts and at most +```` wavefronts are able to fit within the resources of an EU. Requesting +more wavefronts can hide memory latency but limits available registers which +can result in spilling. Requesting fewer wavefronts can help reduce cache +thrashing, but can reduce memory latency hiding. + +This attribute controls the machine code generated by the AMDGPU target backend +to ensure it is capable of meeting the requested values. However, when the +kernel is executed, there may be other reasons that prevent meeting the request, +for example, there may be wavefronts from other kernels executing on the EU. + +An error will be given if: + - Specified values violate subtarget specifications; + - Specified values are not compatible with values provided through other + attributes; + - The AMDGPU target backend is unable to create machine code that can meet the + request. + + +OpenCL Address Spaces +===================== +The address space qualifier may be used to specify the region of memory that is +used to allocate the object. OpenCL supports the following address spaces: +__generic(generic), __global(global), __local(local), __private(private), +__constant(constant). + + .. code-block:: c + + __constant int c = ...; + + __generic int* foo(global int* g) { + __local int* l; + private int p; + ... + return l; + } + +More details can be found in the OpenCL C language Spec v2.0, Section 6.5. + +constant +-------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "","","","","``__constant`` |br| ``constant``","","" + +The constant address space attribute signals that an object is located in +a constant (non-modifiable) memory region. It is available to all work items. +Any type can be annotated with the constant address space attribute. Objects +with the constant address space qualifier can be declared in any scope and must +have an initializer. + + +generic +------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "","","","","``__generic`` |br| ``generic``","","" + +The generic address space attribute is only available with OpenCL v2.0 and later. +It can be used with pointer types. Variables in global and local scope and +function parameters in non-kernel functions can have the generic address space +type attribute. It is intended to be a placeholder for any other address space +except for '__constant' in OpenCL code which can be used with multiple address +spaces. + + +global +------ +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "","","","","``__global`` |br| ``global``","","" + +The global address space attribute specifies that an object is allocated in +global memory, which is accessible by all work items. The content stored in this +memory area persists between kernel executions. Pointer types to the global +address space are allowed as function parameters or local variables. Starting +with OpenCL v2.0, the global address space can be used with global (program +scope) variables and static local variable as well. + + +local +----- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "","","","","``__local`` |br| ``local``","","" + +The local address space specifies that an object is allocated in the local (work +group) memory area, which is accessible to all work items in the same work +group. The content stored in this memory region is not accessible after +the kernel execution ends. In a kernel function scope, any variable can be in +the local address space. In other scopes, only pointer types to the local address +space are allowed. Local address space variables cannot have an initializer. + + +private +------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "","","","","``__private`` |br| ``private``","","" + +The private address space specifies that an object is allocated in the private +(work item) memory. Other work items cannot access the same memory area and its +content is destroyed after work item execution ends. Local variables can be +declared in the private address space. Function arguments are always in the +private address space. Kernel function arguments of a pointer or an array type +cannot point to the private address space. + + +Calling Conventions +=================== +Clang supports several different calling conventions, depending on the target +platform and architecture. The calling convention used for a function determines +how parameters are passed, how results are returned to the caller, and other +low-level details of calling a function. + +aarch64_vector_pcs +------------------ +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``aarch64_vector_pcs``","``clang::aarch64_vector_pcs``","``clang::aarch64_vector_pcs``","","","","" + +On AArch64 targets, this attribute changes the calling convention of a +function to preserve additional floating-point and Advanced SIMD registers +relative to the default calling convention used for AArch64. + +This means it is more efficient to call such functions from code that performs +extensive floating-point and vector calculations, because fewer live SIMD and FP +registers need to be saved. This property makes it well-suited for e.g. +floating-point or vector math library functions, which are typically leaf +functions that require a small number of registers. + +However, using this attribute also means that it is more expensive to call +a function that adheres to the default calling convention from within such +a function. Therefore, it is recommended that this attribute is only used +for leaf functions. + +For more information, see the documentation for `aarch64_vector_pcs`_ on +the Arm Developer website. + +.. _`aarch64_vector_pcs`: https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi + + +fastcall +-------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``fastcall``","``gnu::fastcall``","","","``__fastcall`` |br| ``_fastcall``","","" + +On 32-bit x86 targets, this attribute changes the calling convention of a +function to use ECX and EDX as register parameters and clear parameters off of +the stack on return. This convention does not support variadic calls or +unprototyped functions in C, and has no effect on x86_64 targets. This calling +convention is supported primarily for compatibility with existing code. Users +seeking register parameters should use the ``regparm`` attribute, which does +not require callee-cleanup. See the documentation for `__fastcall`_ on MSDN. + +.. _`__fastcall`: http://msdn.microsoft.com/en-us/library/6xa169sk.aspx + + +ms_abi +------ +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``ms_abi``","``gnu::ms_abi``","","","","","" + +On non-Windows x86_64 targets, this attribute changes the calling convention of +a function to match the default convention used on Windows x86_64. This +attribute has no effect on Windows targets or non-x86_64 targets. + + +pcs +--- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``pcs``","``gnu::pcs``","","","","","" + +On ARM targets, this attribute can be used to select calling conventions +similar to ``stdcall`` on x86. Valid parameter values are "aapcs" and +"aapcs-vfp". + + +preserve_all +------------ +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``preserve_all``","``clang::preserve_all``","``clang::preserve_all``","","","","" + +On X86-64 and AArch64 targets, this attribute changes the calling convention of +a function. The ``preserve_all`` calling convention attempts to make the code +in the caller even less intrusive than the ``preserve_most`` calling convention. +This calling convention also behaves identical to the ``C`` calling convention +on how arguments and return values are passed, but it uses a different set of +caller/callee-saved registers. This removes the burden of saving and +recovering a large register set before and after the call in the caller. If +the arguments are passed in callee-saved registers, then they will be +preserved by the callee across the call. This doesn't apply for values +returned in callee-saved registers. + +- On X86-64 the callee preserves all general purpose registers, except for + R11. R11 can be used as a scratch register. Furthermore it also preserves + all floating-point registers (XMMs/YMMs). + +The idea behind this convention is to support calls to runtime functions +that don't need to call out to any other functions. + +This calling convention, like the ``preserve_most`` calling convention, will be +used by a future version of the Objective-C runtime and should be considered +experimental at this time. + + +preserve_most +------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``preserve_most``","``clang::preserve_most``","``clang::preserve_most``","","","","" + +On X86-64 and AArch64 targets, this attribute changes the calling convention of +a function. The ``preserve_most`` calling convention attempts to make the code +in the caller as unintrusive as possible. This convention behaves identically +to the ``C`` calling convention on how arguments and return values are passed, +but it uses a different set of caller/callee-saved registers. This alleviates +the burden of saving and recovering a large register set before and after the +call in the caller. If the arguments are passed in callee-saved registers, +then they will be preserved by the callee across the call. This doesn't +apply for values returned in callee-saved registers. + +- On X86-64 the callee preserves all general purpose registers, except for + R11. R11 can be used as a scratch register. Floating-point registers + (XMMs/YMMs) are not preserved and need to be saved by the caller. + +The idea behind this convention is to support calls to runtime functions +that have a hot path and a cold path. The hot path is usually a small piece +of code that doesn't use many registers. The cold path might need to call out to +another function and therefore only needs to preserve the caller-saved +registers, which haven't already been saved by the caller. The +`preserve_most` calling convention is very similar to the ``cold`` calling +convention in terms of caller/callee-saved registers, but they are used for +different types of function calls. ``coldcc`` is for function calls that are +rarely executed, whereas `preserve_most` function calls are intended to be +on the hot path and definitely executed a lot. Furthermore ``preserve_most`` +doesn't prevent the inliner from inlining the function call. + +This calling convention will be used by a future version of the Objective-C +runtime and should therefore still be considered experimental at this time. +Although this convention was created to optimize certain runtime calls to +the Objective-C runtime, it is not limited to this runtime and might be used +by other runtimes in the future too. The current implementation only +supports X86-64 and AArch64, but the intention is to support more architectures +in the future. + + +regcall +------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``regcall``","``gnu::regcall``","","","``__regcall``","","" + +On x86 targets, this attribute changes the calling convention to +`__regcall`_ convention. This convention aims to pass as many arguments +as possible in registers. It also tries to utilize registers for the +return value whenever it is possible. + +.. _`__regcall`: https://software.intel.com/en-us/node/693069 + + +regparm +------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``regparm``","``gnu::regparm``","","","","","" + +On 32-bit x86 targets, the regparm attribute causes the compiler to pass +the first three integer parameters in EAX, EDX, and ECX instead of on the +stack. This attribute has no effect on variadic functions, and all parameters +are passed via the stack as normal. + + +stdcall +------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``stdcall``","``gnu::stdcall``","","","``__stdcall`` |br| ``_stdcall``","","" + +On 32-bit x86 targets, this attribute changes the calling convention of a +function to clear parameters off of the stack on return. This convention does +not support variadic calls or unprototyped functions in C, and has no effect on +x86_64 targets. This calling convention is used widely by the Windows API and +COM applications. See the documentation for `__stdcall`_ on MSDN. + +.. _`__stdcall`: http://msdn.microsoft.com/en-us/library/zxk0tw93.aspx + + +thiscall +-------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``thiscall``","``gnu::thiscall``","","","``__thiscall`` |br| ``_thiscall``","","" + +On 32-bit x86 targets, this attribute changes the calling convention of a +function to use ECX for the first parameter (typically the implicit ``this`` +parameter of C++ methods) and clear parameters off of the stack on return. This +convention does not support variadic calls or unprototyped functions in C, and +has no effect on x86_64 targets. See the documentation for `__thiscall`_ on +MSDN. + +.. _`__thiscall`: http://msdn.microsoft.com/en-us/library/ek8tkfbw.aspx + + +vectorcall +---------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``vectorcall``","``clang::vectorcall``","``clang::vectorcall``","","``__vectorcall`` |br| ``_vectorcall``","","" + +On 32-bit x86 *and* x86_64 targets, this attribute changes the calling +convention of a function to pass vector parameters in SSE registers. + +On 32-bit x86 targets, this calling convention is similar to ``__fastcall``. +The first two integer parameters are passed in ECX and EDX. Subsequent integer +parameters are passed in memory, and callee clears the stack. On x86_64 +targets, the callee does *not* clear the stack, and integer parameters are +passed in RCX, RDX, R8, and R9 as is done for the default Windows x64 calling +convention. + +On both 32-bit x86 and x86_64 targets, vector and floating point arguments are +passed in XMM0-XMM5. Homogeneous vector aggregates of up to four elements are +passed in sequential SSE registers if enough are available. If AVX is enabled, +256 bit vectors are passed in YMM0-YMM5. Any vector or aggregate type that +cannot be passed in registers for any reason is passed by reference, which +allows the caller to align the parameter memory. + +See the documentation for `__vectorcall`_ on MSDN for more details. + +.. _`__vectorcall`: http://msdn.microsoft.com/en-us/library/dn375768.aspx + + +Consumed Annotation Checking +============================ +Clang supports additional attributes for checking basic resource management +properties, specifically for unique objects that have a single owning reference. +The following attributes are currently supported, although **the implementation +for these annotations is currently in development and are subject to change.** + +callable_when +------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``callable_when``","``clang::callable_when``","","","","","Yes" + +Use ``__attribute__((callable_when(...)))`` to indicate what states a method +may be called in. Valid states are unconsumed, consumed, or unknown. Each +argument to this attribute must be a quoted string. E.g.: + +``__attribute__((callable_when("unconsumed", "unknown")))`` + + +consumable +---------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``consumable``","``clang::consumable``","","","","","Yes" + +Each ``class`` that uses any of the typestate annotations must first be marked +using the ``consumable`` attribute. Failure to do so will result in a warning. + +This attribute accepts a single parameter that must be one of the following: +``unknown``, ``consumed``, or ``unconsumed``. + + +param_typestate +--------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``param_typestate``","``clang::param_typestate``","","","","","Yes" + +This attribute specifies expectations about function parameters. Calls to an +function with annotated parameters will issue a warning if the corresponding +argument isn't in the expected state. The attribute is also used to set the +initial state of the parameter when analyzing the function's body. + + +return_typestate +---------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``return_typestate``","``clang::return_typestate``","","","","","Yes" + +The ``return_typestate`` attribute can be applied to functions or parameters. +When applied to a function the attribute specifies the state of the returned +value. The function's body is checked to ensure that it always returns a value +in the specified state. On the caller side, values returned by the annotated +function are initialized to the given state. + +When applied to a function parameter it modifies the state of an argument after +a call to the function returns. The function's body is checked to ensure that +the parameter is in the expected state before returning. + + +set_typestate +------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``set_typestate``","``clang::set_typestate``","","","","","Yes" + +Annotate methods that transition an object into a new state with +``__attribute__((set_typestate(new_state)))``. The new state must be +unconsumed, consumed, or unknown. + + +test_typestate +-------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``test_typestate``","``clang::test_typestate``","","","","","Yes" + +Use ``__attribute__((test_typestate(tested_state)))`` to indicate that a method +returns true if the object is in the specified state.. + + +Type Safety Checking +==================== +Clang supports additional attributes to enable checking type safety properties +that can't be enforced by the C type system. To see warnings produced by these +checks, ensure that -Wtype-safety is enabled. Use cases include: + +* MPI library implementations, where these attributes enable checking that + the buffer type matches the passed ``MPI_Datatype``; +* for HDF5 library there is a similar use case to MPI; +* checking types of variadic functions' arguments for functions like + ``fcntl()`` and ``ioctl()``. + +You can detect support for these attributes with ``__has_attribute()``. For +example: + +.. code-block:: c++ + + #if defined(__has_attribute) + # if __has_attribute(argument_with_type_tag) && \ + __has_attribute(pointer_with_type_tag) && \ + __has_attribute(type_tag_for_datatype) + # define ATTR_MPI_PWT(buffer_idx, type_idx) __attribute__((pointer_with_type_tag(mpi,buffer_idx,type_idx))) + /* ... other macros ... */ + # endif + #endif + + #if !defined(ATTR_MPI_PWT) + # define ATTR_MPI_PWT(buffer_idx, type_idx) + #endif + + int MPI_Send(void *buf, int count, MPI_Datatype datatype /*, other args omitted */) + ATTR_MPI_PWT(1,3); + +argument_with_type_tag +---------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``argument_with_type_tag`` |br| ``pointer_with_type_tag``","``clang::argument_with_type_tag`` |br| ``clang::pointer_with_type_tag``","``clang::argument_with_type_tag`` |br| ``clang::pointer_with_type_tag``","","","","" + +Use ``__attribute__((argument_with_type_tag(arg_kind, arg_idx, +type_tag_idx)))`` on a function declaration to specify that the function +accepts a type tag that determines the type of some other argument. + +This attribute is primarily useful for checking arguments of variadic functions +(``pointer_with_type_tag`` can be used in most non-variadic cases). + +In the attribute prototype above: + * ``arg_kind`` is an identifier that should be used when annotating all + applicable type tags. + * ``arg_idx`` provides the position of a function argument. The expected type of + this function argument will be determined by the function argument specified + by ``type_tag_idx``. In the code example below, "3" means that the type of the + function's third argument will be determined by ``type_tag_idx``. + * ``type_tag_idx`` provides the position of a function argument. This function + argument will be a type tag. The type tag will determine the expected type of + the argument specified by ``arg_idx``. In the code example below, "2" means + that the type tag associated with the function's second argument should agree + with the type of the argument specified by ``arg_idx``. + +For example: + +.. code-block:: c++ + + int fcntl(int fd, int cmd, ...) + __attribute__(( argument_with_type_tag(fcntl,3,2) )); + // The function's second argument will be a type tag; this type tag will + // determine the expected type of the function's third argument. + + +pointer_with_type_tag +--------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``argument_with_type_tag`` |br| ``pointer_with_type_tag``","``clang::argument_with_type_tag`` |br| ``clang::pointer_with_type_tag``","``clang::argument_with_type_tag`` |br| ``clang::pointer_with_type_tag``","","","","" + +Use ``__attribute__((pointer_with_type_tag(ptr_kind, ptr_idx, type_tag_idx)))`` +on a function declaration to specify that the function accepts a type tag that +determines the pointee type of some other pointer argument. + +In the attribute prototype above: + * ``ptr_kind`` is an identifier that should be used when annotating all + applicable type tags. + * ``ptr_idx`` provides the position of a function argument; this function + argument will have a pointer type. The expected pointee type of this pointer + type will be determined by the function argument specified by + ``type_tag_idx``. In the code example below, "1" means that the pointee type + of the function's first argument will be determined by ``type_tag_idx``. + * ``type_tag_idx`` provides the position of a function argument; this function + argument will be a type tag. The type tag will determine the expected pointee + type of the pointer argument specified by ``ptr_idx``. In the code example + below, "3" means that the type tag associated with the function's third + argument should agree with the pointee type of the pointer argument specified + by ``ptr_idx``. + +For example: + +.. code-block:: c++ + + typedef int MPI_Datatype; + int MPI_Send(void *buf, int count, MPI_Datatype datatype /*, other args omitted */) + __attribute__(( pointer_with_type_tag(mpi,1,3) )); + // The function's 3rd argument will be a type tag; this type tag will + // determine the expected pointee type of the function's 1st argument. + + +type_tag_for_datatype +--------------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``type_tag_for_datatype``","``clang::type_tag_for_datatype``","``clang::type_tag_for_datatype``","","","","" + +When declaring a variable, use +``__attribute__((type_tag_for_datatype(kind, type)))`` to create a type tag that +is tied to the ``type`` argument given to the attribute. + +In the attribute prototype above: + * ``kind`` is an identifier that should be used when annotating all applicable + type tags. + * ``type`` indicates the name of the type. + +Clang supports annotating type tags of two forms. + + * **Type tag that is a reference to a declared identifier.** + Use ``__attribute__((type_tag_for_datatype(kind, type)))`` when declaring that + identifier: + + .. code-block:: c++ + + typedef int MPI_Datatype; + extern struct mpi_datatype mpi_datatype_int + __attribute__(( type_tag_for_datatype(mpi,int) )); + #define MPI_INT ((MPI_Datatype) &mpi_datatype_int) + // &mpi_datatype_int is a type tag. It is tied to type "int". + + * **Type tag that is an integral literal.** + Declare a ``static const`` variable with an initializer value and attach + ``__attribute__((type_tag_for_datatype(kind, type)))`` on that declaration: + + .. code-block:: c++ + + typedef int MPI_Datatype; + static const MPI_Datatype mpi_datatype_int + __attribute__(( type_tag_for_datatype(mpi,int) )) = 42; + #define MPI_INT ((MPI_Datatype) 42) + // The number 42 is a type tag. It is tied to type "int". + + +The ``type_tag_for_datatype`` attribute also accepts an optional third argument +that determines how the type of the function argument specified by either +``arg_idx`` or ``ptr_idx`` is compared against the type associated with the type +tag. (Recall that for the ``argument_with_type_tag`` attribute, the type of the +function argument specified by ``arg_idx`` is compared against the type +associated with the type tag. Also recall that for the ``pointer_with_type_tag`` +attribute, the pointee type of the function argument specified by ``ptr_idx`` is +compared against the type associated with the type tag.) There are two supported +values for this optional third argument: + + * ``layout_compatible`` will cause types to be compared according to + layout-compatibility rules (In C++11 [class.mem] p 17, 18, see the + layout-compatibility rules for two standard-layout struct types and for two + standard-layout union types). This is useful when creating a type tag + associated with a struct or union type. For example: + + .. code-block:: c++ + + /* In mpi.h */ + typedef int MPI_Datatype; + struct internal_mpi_double_int { double d; int i; }; + extern struct mpi_datatype mpi_datatype_double_int + __attribute__(( type_tag_for_datatype(mpi, + struct internal_mpi_double_int, layout_compatible) )); + + #define MPI_DOUBLE_INT ((MPI_Datatype) &mpi_datatype_double_int) + + int MPI_Send(void *buf, int count, MPI_Datatype datatype, ...) + __attribute__(( pointer_with_type_tag(mpi,1,3) )); + + /* In user code */ + struct my_pair { double a; int b; }; + struct my_pair *buffer; + MPI_Send(buffer, 1, MPI_DOUBLE_INT /*, ... */); // no warning because the + // layout of my_pair is + // compatible with that of + // internal_mpi_double_int + + struct my_int_pair { int a; int b; } + struct my_int_pair *buffer2; + MPI_Send(buffer2, 1, MPI_DOUBLE_INT /*, ... */); // warning because the + // layout of my_int_pair + // does not match that of + // internal_mpi_double_int + + * ``must_be_null`` specifies that the function argument specified by either + ``arg_idx`` (for the ``argument_with_type_tag`` attribute) or ``ptr_idx`` (for + the ``pointer_with_type_tag`` attribute) should be a null pointer constant. + The second argument to the ``type_tag_for_datatype`` attribute is ignored. For + example: + + .. code-block:: c++ + + /* In mpi.h */ + typedef int MPI_Datatype; + extern struct mpi_datatype mpi_datatype_null + __attribute__(( type_tag_for_datatype(mpi, void, must_be_null) )); + + #define MPI_DATATYPE_NULL ((MPI_Datatype) &mpi_datatype_null) + int MPI_Send(void *buf, int count, MPI_Datatype datatype, ...) + __attribute__(( pointer_with_type_tag(mpi,1,3) )); + + /* In user code */ + struct my_pair { double a; int b; }; + struct my_pair *buffer; + MPI_Send(buffer, 1, MPI_DATATYPE_NULL /*, ... */); // warning: MPI_DATATYPE_NULL + // was specified but buffer + // is not a null pointer + + +Nullability Attributes +====================== +Whether a particular pointer may be "null" is an important concern when working with pointers in the C family of languages. The various nullability attributes indicate whether a particular pointer can be null or not, which makes APIs more expressive and can help static analysis tools identify bugs involving null pointers. Clang supports several kinds of nullability attributes: the ``nonnull`` and ``returns_nonnull`` attributes indicate which function or method parameters and result types can never be null, while nullability type qualifiers indicate which pointer types can be null (``_Nullable``) or cannot be null (``_Nonnull``). + +The nullability (type) qualifiers express whether a value of a given pointer type can be null (the ``_Nullable`` qualifier), doesn't have a defined meaning for null (the ``_Nonnull`` qualifier), or for which the purpose of null is unclear (the ``_Null_unspecified`` qualifier). Because nullability qualifiers are expressed within the type system, they are more general than the ``nonnull`` and ``returns_nonnull`` attributes, allowing one to express (for example) a nullable pointer to an array of nonnull pointers. Nullability qualifiers are written to the right of the pointer to which they apply. For example: + + .. code-block:: c + + // No meaningful result when 'ptr' is null (here, it happens to be undefined behavior). + int fetch(int * _Nonnull ptr) { return *ptr; } + + // 'ptr' may be null. + int fetch_or_zero(int * _Nullable ptr) { + return ptr ? *ptr : 0; + } + + // A nullable pointer to non-null pointers to const characters. + const char *join_strings(const char * _Nonnull * _Nullable strings, unsigned n); + +In Objective-C, there is an alternate spelling for the nullability qualifiers that can be used in Objective-C methods and properties using context-sensitive, non-underscored keywords. For example: + + .. code-block:: objective-c + + @interface NSView : NSResponder + - (nullable NSView *)ancestorSharedWithView:(nonnull NSView *)aView; + @property (assign, nullable) NSView *superview; + @property (readonly, nonnull) NSArray *subviews; + @end + +_Nonnull +-------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "","","","","``_Nonnull``","","" + +The ``_Nonnull`` nullability qualifier indicates that null is not a meaningful value for a value of the ``_Nonnull`` pointer type. For example, given a declaration such as: + + .. code-block:: c + + int fetch(int * _Nonnull ptr); + +a caller of ``fetch`` should not provide a null value, and the compiler will produce a warning if it sees a literal null value passed to ``fetch``. Note that, unlike the declaration attribute ``nonnull``, the presence of ``_Nonnull`` does not imply that passing null is undefined behavior: ``fetch`` is free to consider null undefined behavior or (perhaps for backward-compatibility reasons) defensively handle null. + + +_Null_unspecified +----------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "","","","","``_Null_unspecified``","","" + +The ``_Null_unspecified`` nullability qualifier indicates that neither the ``_Nonnull`` nor ``_Nullable`` qualifiers make sense for a particular pointer type. It is used primarily to indicate that the role of null with specific pointers in a nullability-annotated header is unclear, e.g., due to overly-complex implementations or historical factors with a long-lived API. + + +_Nullable +--------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "","","","","``_Nullable``","","" + +The ``_Nullable`` nullability qualifier indicates that a value of the ``_Nullable`` pointer type can be null. For example, given: + + .. code-block:: c + + int fetch_or_zero(int * _Nullable ptr); + +a caller of ``fetch_or_zero`` can provide null. + + +nonnull +------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``nonnull``","``gnu::nonnull``","","","","","" + +The ``nonnull`` attribute indicates that some function parameters must not be null, and can be used in several different ways. It's original usage (`from GCC `_) is as a function (or Objective-C method) attribute that specifies which parameters of the function are nonnull in a comma-separated list. For example: + + .. code-block:: c + + extern void * my_memcpy (void *dest, const void *src, size_t len) + __attribute__((nonnull (1, 2))); + +Here, the ``nonnull`` attribute indicates that parameters 1 and 2 +cannot have a null value. Omitting the parenthesized list of parameter indices means that all parameters of pointer type cannot be null: + + .. code-block:: c + + extern void * my_memcpy (void *dest, const void *src, size_t len) + __attribute__((nonnull)); + +Clang also allows the ``nonnull`` attribute to be placed directly on a function (or Objective-C method) parameter, eliminating the need to specify the parameter index ahead of type. For example: + + .. code-block:: c + + extern void * my_memcpy (void *dest __attribute__((nonnull)), + const void *src __attribute__((nonnull)), size_t len); + +Note that the ``nonnull`` attribute indicates that passing null to a non-null parameter is undefined behavior, which the optimizer may take advantage of to, e.g., remove null checks. The ``_Nonnull`` type qualifier indicates that a pointer cannot be null in a more general manner (because it is part of the type system) and does not imply undefined behavior, making it more widely applicable. + + +returns_nonnull +--------------- +.. csv-table:: Supported Syntaxes + :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``" + + "``returns_nonnull``","``gnu::returns_nonnull``","","","","","Yes" + +The ``returns_nonnull`` attribute indicates that a particular function (or Objective-C method) always returns a non-null pointer. For example, a particular system ``malloc`` might be defined to terminate a process when memory is not available rather than returning a null pointer: + + .. code-block:: c + + extern void * malloc (size_t size) __attribute__((returns_nonnull)); + +The ``returns_nonnull`` attribute implies that returning a null pointer is undefined behavior, which the optimizer may take advantage of. The ``_Nonnull`` type qualifier indicates that a pointer cannot be null in a more general manner (because it is part of the type system) and does not imply undefined behavior, making it more widely applicable + + From 829bb6fd5a8050acd9e35083f1ef2c0bcae97e04 Mon Sep 17 00:00:00 2001 From: Martin Storsjo Date: Thu, 7 Feb 2019 11:29:02 +0000 Subject: [PATCH 102/125] [docs] Update the release notes for the backported feature with thunks for ARM64 llvm-svn: 353397 --- lld/docs/ReleaseNotes.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst index 77971b25ac483f..9849111e0930b3 100644 --- a/lld/docs/ReleaseNotes.rst +++ b/lld/docs/ReleaseNotes.rst @@ -61,7 +61,8 @@ COFF Improvements * lld now can link against import libraries produced by GNU tools. -* lld can create thunks for ARM, to allow linking images over 16 MB. +* lld can create thunks for ARM and ARM64, to allow linking larger images + (over 16 MB for ARM and over 128 MB for ARM64) * Several speed and memory usage improvements. From ac29b659cda7f36265d31a5c3a549efa8feaaa5f Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Thu, 7 Feb 2019 12:32:55 +0000 Subject: [PATCH 103/125] Fix sphinx warning llvm-svn: 353398 --- llvm/docs/ReleaseNotes.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index 2bd435ca9d2961..c0a8ea6f9e3216 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -82,7 +82,7 @@ Changes to the ARM Backend Changes to the Hexagon Target --------------------------- +----------------------------- * Added support for Hexagon/HVX V66 ISA. From 98ebe7460199b9cd79eb562b5e8705ad28f5513f Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Thu, 7 Feb 2019 12:40:33 +0000 Subject: [PATCH 104/125] Merging r353399: ------------------------------------------------------------------------ r353399 | hans | 2019-02-07 13:39:35 +0100 (Thu, 07 Feb 2019) | 1 line docs: add missingkeyfunction to doctree, fix title ------------------------------------------------------------------------ llvm-svn: 353400 --- lld/docs/index.rst | 1 + lld/docs/missingkeyfunction.rst | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/lld/docs/index.rst b/lld/docs/index.rst index da1c894f3d8339..2564e9b6310fd7 100644 --- a/lld/docs/index.rst +++ b/lld/docs/index.rst @@ -173,4 +173,5 @@ document soon. AtomLLD WebAssembly windows_support + missingkeyfunction ReleaseNotes diff --git a/lld/docs/missingkeyfunction.rst b/lld/docs/missingkeyfunction.rst index 410c749c3b036d..54ad3251f794e7 100644 --- a/lld/docs/missingkeyfunction.rst +++ b/lld/docs/missingkeyfunction.rst @@ -1,5 +1,5 @@ -Missing Key Method -================== +Missing Key Function +==================== If your build failed with a linker error something like this:: From 2699311ea8e3da5f0df7925c6438375c2161ede5 Mon Sep 17 00:00:00 2001 From: Kai Nacke Date: Thu, 7 Feb 2019 21:09:53 +0000 Subject: [PATCH 105/125] Add external project LDC to release notes. LDC, the LLVM-based D compiler, is already ready for LLVM 8.0.0. llvm-svn: 353466 --- llvm/docs/ReleaseNotes.rst | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index c0a8ea6f9e3216..561faa71fd5dbf 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -156,6 +156,21 @@ Changes to the DAG infrastructure External Open Source Projects Using LLVM 8 ========================================== +LDC - the LLVM-based D compiler +------------------------------- + +`D `_ is a language with C-like syntax and static typing. It +pragmatically combines efficiency, control, and modeling power, with safety and +programmer productivity. D supports powerful concepts like Compile-Time Function +Execution (CTFE) and Template Meta-Programming, provides an innovative approach +to concurrency and offers many classical paradigms. + +`LDC `_ uses the frontend from the reference compiler +combined with LLVM as backend to produce efficient native code. LDC targets +x86/x86_64 systems like Linux, OS X, FreeBSD and Windows and also Linux on ARM +and PowerPC (32/64 bit). Ports to other architectures like AArch64 and MIPS64 +are underway. + Zig Programming Language ------------------------ From ce52769ae20b269ad4cedae12bc7ceb4e6732506 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Fri, 8 Feb 2019 11:06:27 +0000 Subject: [PATCH 106/125] Merging r351387, r351765, and r353374: ------------------------------------------------------------------------ r351387 | jfb | 2019-01-16 23:22:38 +0100 (Wed, 16 Jan 2019) | 7 lines [NFC] Factor out + document build requirements Summary: This change factors out compiler checking / warning, and documents LLVM_FORCE_USE_OLD_TOOLCHAIN. It doesn't introduce any functional changes nor policy changes, these will come late. Subscribers: mgorny, jkorous, dexonsmith, llvm-commits Differential Revision: https://reviews.llvm.org/D56799 ------------------------------------------------------------------------ ------------------------------------------------------------------------ r351765 | jfb | 2019-01-22 00:53:52 +0100 (Tue, 22 Jan 2019) | 24 lines Document toolchain update policy Summary: Capture the current agreed-upon toolchain update policy based on the following discussions: - LLVM dev meeting 2018 BoF "Migrating to C++14, and beyond!" llvm.org/devmtg/2018-10/talk-abstracts.html#bof3 - A Short Policy Proposal Regarding Host Compilers lists.llvm.org/pipermail/llvm-dev/2018-May/123238.html - Using C++14 code in LLVM (2018) lists.llvm.org/pipermail/llvm-dev/2018-May/123182.html - Using C++14 code in LLVM (2017) lists.llvm.org/pipermail/llvm-dev/2017-October/118673.html - Using C++14 code in LLVM (2016) lists.llvm.org/pipermail/llvm-dev/2016-October/105483.html - Document and Enforce new Host Compiler Policy llvm.org/D47073 - Require GCC 5.1 and LLVM 3.5 at a minimum llvm.org/D46723 Subscribers: jkorous, dexonsmith, llvm-commits Differential Revision: https://reviews.llvm.org/D56819 ------------------------------------------------------------------------ ------------------------------------------------------------------------ r353374 | jfb | 2019-02-07 06:20:00 +0100 (Thu, 07 Feb 2019) | 12 lines Bump minimum toolchain version Summary: The RFC on moving past C++11 got good traction: http://lists.llvm.org/pipermail/llvm-dev/2019-January/129452.html This patch therefore bumps the toolchain versions according to our policy: llvm.org/docs/DeveloperPolicy.html#toolchain Subscribers: mgorny, jkorous, dexonsmith, llvm-commits, mehdi_amini, jyknight, rsmith, chandlerc, smeenai, hans, reames, lattner, lhames, erichkeane Differential Revision: https://reviews.llvm.org/D57264 ------------------------------------------------------------------------ llvm-svn: 353512 --- llvm/CMakeLists.txt | 5 +- llvm/cmake/modules/CheckCompilerVersion.cmake | 128 ++++++++++++------ llvm/docs/CMake.rst | 9 ++ llvm/docs/DeveloperPolicy.rst | 43 +++++- llvm/docs/GettingStarted.rst | 61 +++++---- 5 files changed, 177 insertions(+), 69 deletions(-) diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index 6e5221ebfd339b..27754f339493bd 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -383,9 +383,12 @@ option(LLVM_ENABLE_EXPENSIVE_CHECKS "Enable expensive checks" OFF) set(LLVM_ABI_BREAKING_CHECKS "WITH_ASSERTS" CACHE STRING "Enable abi-breaking checks. Can be WITH_ASSERTS, FORCE_ON or FORCE_OFF.") -option(LLVM_FORCE_USE_OLD_HOST_TOOLCHAIN +option(LLVM_FORCE_USE_OLD_TOOLCHAIN "Set to ON to force using an old, unsupported host toolchain." OFF) +option(LLVM_TEMPORARILY_ALLOW_OLD_TOOLCHAIN + "Set to ON to only warn when using a toolchain which is about to be deprecated, instead of emitting an error." OFF) + option(LLVM_USE_INTEL_JITEVENTS "Use Intel JIT API to inform Intel(R) VTune(TM) Amplifier XE 2011 about JIT code" OFF) diff --git a/llvm/cmake/modules/CheckCompilerVersion.cmake b/llvm/cmake/modules/CheckCompilerVersion.cmake index adf500ad53a722..b1cb5527422896 100644 --- a/llvm/cmake/modules/CheckCompilerVersion.cmake +++ b/llvm/cmake/modules/CheckCompilerVersion.cmake @@ -1,52 +1,94 @@ -# Check if the host compiler is new enough. LLVM requires at least GCC 4.8, -# MSVC 2015 (Update 3), or Clang 3.1. +# Check if the host compiler is new enough. +# These versions are updated based on the following policy: +# llvm.org/docs/DeveloperPolicy.html#toolchain include(CheckCXXSourceCompiles) -if(NOT DEFINED LLVM_COMPILER_CHECKED) - set(LLVM_COMPILER_CHECKED ON) +set(GCC_MIN 4.8) +set(GCC_SOFT_ERROR 5.1) +set(CLANG_MIN 3.1) +set(CLANG_SOFT_ERROR 3.5) +set(APPLECLANG_MIN 3.1) +set(APPLECLANG_SOFT_ERROR 6.0) +set(MSVC_MIN 19.00.24213.1) +set(MSVC_SOFT_ERROR 19.1) - if(NOT LLVM_FORCE_USE_OLD_TOOLCHAIN) - if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.8) - message(FATAL_ERROR "Host GCC version must be at least 4.8!") - endif() - elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") - if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 3.1) - message(FATAL_ERROR "Host Clang version must be at least 3.1!") - endif() +# Map the above GCC versions to dates: https://gcc.gnu.org/develop.html#timeline +set(GCC_MIN_DATE 20130322) +set(GCC_SOFT_ERROR_DATE 20150422) - if (CMAKE_CXX_SIMULATE_ID MATCHES "MSVC") - if (CMAKE_CXX_SIMULATE_VERSION VERSION_LESS 19.0) - message(FATAL_ERROR "Host Clang must have at least -fms-compatibility-version=19.0") - endif() - set(CLANG_CL 1) - elseif(NOT LLVM_ENABLE_LIBCXX) - # Otherwise, test that we aren't using too old of a version of libstdc++ - # with the Clang compiler. This is tricky as there is no real way to - # check the version of libstdc++ directly. Instead we test for a known - # bug in libstdc++4.6 that is fixed in libstdc++4.7. - set(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS}) - set(OLD_CMAKE_REQUIRED_LIBRARIES ${CMAKE_REQUIRED_LIBRARIES}) - set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -std=c++0x") - check_cxx_source_compiles(" -#include -std::atomic x(0.0f); -int main() { return (float)x; }" - LLVM_NO_OLD_LIBSTDCXX) - if(NOT LLVM_NO_OLD_LIBSTDCXX) - message(FATAL_ERROR "Host Clang must be able to find libstdc++4.8 or newer!") - endif() - set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS}) - set(CMAKE_REQUIRED_LIBRARIES ${OLD_CMAKE_REQUIRED_LIBRARIES}) - endif() - elseif(CMAKE_CXX_COMPILER_ID MATCHES "MSVC") - if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 19.0) - message(FATAL_ERROR "Host Visual Studio must be at least 2015") - elseif(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 19.00.24213.1) - message(WARNING "Host Visual Studio should at least be 2015 Update 3 (MSVC 19.00.24213.1)" - " due to miscompiles from earlier versions") + +if(DEFINED LLVM_COMPILER_CHECKED) + return() +endif() +set(LLVM_COMPILER_CHECKED ON) + +if(LLVM_FORCE_USE_OLD_TOOLCHAIN) + return() +endif() + +function(check_compiler_version NAME NICE_NAME MINIMUM_VERSION SOFT_ERROR_VERSION) + if(NOT CMAKE_CXX_COMPILER_ID STREQUAL NAME) + return() + endif() + if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS MINIMUM_VERSION) + message(FATAL_ERROR "Host ${NICE_NAME} version must be at least ${MINIMUM_VERSION}, your version is ${CMAKE_CXX_COMPILER_VERSION}.") + elseif(CMAKE_CXX_COMPILER_VERSION VERSION_LESS SOFT_ERROR_VERSION) + if(LLVM_TEMPORARILY_ALLOW_OLD_TOOLCHAIN) + message(WARNING "Host ${NICE_NAME} version should be at least ${SOFT_ERROR_VERSION} because LLVM will soon use new C++ features which your toolchain version doesn't support. Your version is ${CMAKE_CXX_COMPILER_VERSION}. Ignoring because you've set LLVM_TEMPORARILY_ALLOW_OLD_TOOLCHAIN, but very soon your toolchain won't be supported.") + else() + message(FATAL_ERROR "Host ${NICE_NAME} version should be at least ${SOFT_ERROR_VERSION} because LLVM will soon use new C++ features which your toolchain version doesn't support. Your version is ${CMAKE_CXX_COMPILER_VERSION}. You can temporarily opt out using LLVM_TEMPORARILY_ALLOW_OLD_TOOLCHAIN, but very soon your toolchain won't be supported.") + endif() + endif() +endfunction(check_compiler_version) + +check_compiler_version("GNU" "GCC" ${GCC_MIN} ${GCC_SOFT_ERROR}) +check_compiler_version("Clang" "Clang" ${CLANG_MIN} ${CLANG_SOFT_ERROR}) +check_compiler_version("AppleClang" "Apple Clang" ${APPLECLANG_MIN} ${APPLECLANG_SOFT_ERROR}) +check_compiler_version("MSVC" "Visual Studio" ${MSVC_MIN} ${MSVC_SOFT_ERROR}) + +if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") + if (CMAKE_CXX_SIMULATE_ID MATCHES "MSVC") + if (CMAKE_CXX_SIMULATE_VERSION VERSION_LESS MSVC_MIN) + message(FATAL_ERROR "Host Clang must have at least -fms-compatibility-version=${MSVC_MIN}, your version is ${CMAKE_CXX_COMPILER_VERSION}.") + endif() + set(CLANG_CL 1) + elseif(NOT LLVM_ENABLE_LIBCXX) + # Test that we aren't using too old of a version of libstdc++. + set(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS}) + set(OLD_CMAKE_REQUIRED_LIBRARIES ${CMAKE_REQUIRED_LIBRARIES}) + set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -std=c++0x") + check_cxx_source_compiles(" +#include +#if defined(__GLIBCXX__) +#if __GLIBCXX__ < ${GCC_MIN_DATE} +#error Unsupported libstdc++ version +#endif +#endif +int main() { return 0; } +" + LLVM_LIBSTDCXX_MIN) + if(NOT LLVM_LIBSTDCXX_MIN) + message(FATAL_ERROR "libstdc++ version must be at least ${GCC_MIN}.") + endif() + check_cxx_source_compiles(" +#include +#if defined(__GLIBCXX__) +#if __GLIBCXX__ < ${GCC_SOFT_ERROR_DATE} +#error Unsupported libstdc++ version +#endif +#endif +int main() { return 0; } +" + LLVM_LIBSTDCXX_SOFT_ERROR) + if(NOT LLVM_LIBSTDCXX_SOFT_ERROR) + if(LLVM_TEMPORARILY_ALLOW_OLD_TOOLCHAIN) + message(WARNING "libstdc++ version should be at least ${GCC_SOFT_ERROR} because LLVM will soon use new C++ features which your toolchain version doesn't support. Ignoring because you've set LLVM_TEMPORARILY_ALLOW_OLD_TOOLCHAIN, but very soon your toolchain won't be supported.") + else() + message(FATAL_ERROR "libstdc++ version should be at least ${GCC_SOFT_ERROR} because LLVM will soon use new C++ features which your toolchain version doesn't support. You can temporarily opt out using LLVM_TEMPORARILY_ALLOW_OLD_TOOLCHAIN, but very soon your toolchain won't be supported.") endif() endif() + set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS}) + set(CMAKE_REQUIRED_LIBRARIES ${OLD_CMAKE_REQUIRED_LIBRARIES}) endif() endif() diff --git a/llvm/docs/CMake.rst b/llvm/docs/CMake.rst index a5a574e5707777..eb219c58560b62 100644 --- a/llvm/docs/CMake.rst +++ b/llvm/docs/CMake.rst @@ -573,6 +573,15 @@ LLVM-specific variables options, which are passed to the CCACHE_MAXSIZE and CCACHE_DIR environment variables, respectively. +**LLVM_FORCE_USE_OLD_TOOLCHAIN**:BOOL + If enabled, the compiler and standard library versions won't be checked. LLVM + may not compile at all, or might fail at runtime due to known bugs in these + toolchains. + +**LLVM_TEMPORARILY_ALLOW_OLD_TOOLCHAIN**:BOOL + If enabled, the compiler version check will only warn when using a toolchain + which is about to be deprecated, instead of emitting an error. + CMake Caches ============ diff --git a/llvm/docs/DeveloperPolicy.rst b/llvm/docs/DeveloperPolicy.rst index 09912940cde2d2..4ed67cbf1ef1de 100644 --- a/llvm/docs/DeveloperPolicy.rst +++ b/llvm/docs/DeveloperPolicy.rst @@ -22,7 +22,7 @@ This policy is also designed to accomplish the following objectives: #. Make life as simple and easy for contributors as possible. -#. Keep the tip of tree as stable as possible. +#. Keep the top of tree as stable as possible. #. Establish awareness of the project's :ref:`copyright, license, and patent policies ` with contributors to the project. @@ -638,6 +638,47 @@ In essences, these rules are necessary for targets to gain and retain their status, but also markers to define bit-rot, and will be used to clean up the tree from unmaintained targets. +.. _toolchain: + +Updating Toolchain Requirements +------------------------------- + +We intend to require newer toolchains as time goes by. This means LLVM's +codebase can use newer versions of C++ as they get standardized. Requiring newer +toolchains to build LLVM can be painful for those building LLVM; therefore, it +will only be done through the following process: + + * Generally, try to support LLVM and GCC versions from the last 3 years at a + minimum. This time-based guideline is not strict: we may support much older + compilers, or decide to support fewer versions. + + * An RFC is sent to the `llvm-dev mailing list `_ + + - Detail upsides of the version increase (e.g. which newer C++ language or + library features LLVM should use; avoid miscompiles in particular compiler + versions, etc). + - Detail downsides on important platforms (e.g. Ubuntu LTS status). + + * Once the RFC reaches consensus, update the CMake toolchain version checks as + well as the :doc:`getting started` guide. We want to + soft-error when developers compile LLVM. We say "soft-error" because the + error can be turned into a warning using a CMake flag. This is an important + step: LLVM still doesn't have code which requires the new toolchains, but it + soon will. If you compile LLVM but don't read the mailing list, we should + tell you! + + * Ensure that at least one LLVM release has had this soft-error. Not all + developers compile LLVM top-of-tree. These release-bound developers should + also be told about upcoming changes. + + * Turn the soft-error into a hard-error after said LLVM release has branched. + + * Update the :doc:`coding standards` to allow the new + features we've explicitly approved in the RFC. + + * Start using the new features in LLVM's codebase. + + .. _copyright-license-patents: Copyright, License, and Patents diff --git a/llvm/docs/GettingStarted.rst b/llvm/docs/GettingStarted.rst index b714cc6601a046..c22e821e3b7a05 100644 --- a/llvm/docs/GettingStarted.rst +++ b/llvm/docs/GettingStarted.rst @@ -170,7 +170,7 @@ uses the package and provides other details. Package Version Notes =========================================================== ============ ========================================== `GNU Make `_ 3.79, 3.79.1 Makefile/build processor -`GCC `_ >=4.8.0 C/C++ compiler\ :sup:`1` +`GCC `_ >=5.1.0 C/C++ compiler\ :sup:`1` `python `_ >=2.7 Automated test suite\ :sup:`2` `zlib `_ >=1.2.3.4 Compression library\ :sup:`3` =========================================================== ============ ========================================== @@ -220,15 +220,25 @@ Host C++ Toolchain, both Compiler and Standard Library ------------------------------------------------------ LLVM is very demanding of the host C++ compiler, and as such tends to expose -bugs in the compiler. We are also planning to follow improvements and -developments in the C++ language and library reasonably closely. As such, we -require a modern host C++ toolchain, both compiler and standard library, in -order to build LLVM. +bugs in the compiler. We also attempt to follow improvements and developments in +the C++ language and library reasonably closely. As such, we require a modern +host C++ toolchain, both compiler and standard library, in order to build LLVM. -For the most popular host toolchains we check for specific minimum versions in -our build systems: +LLVM is written using the subset of C++ documented in :doc:`coding +standards`. To enforce this language version, we check the most +popular host toolchains for specific minimum versions in our build systems: + +* Clang 3.5 +* Apple Clang 6.0 +* GCC 5.1 +* Visual Studio 2017 + +The below versions currently soft-error as we transition to the new compiler +versions listed above. The LLVM codebase is currently known to compile correctly +with the following compilers, though this will change in the near future: * Clang 3.1 +* Apple Clang 3.1 * GCC 4.8 * Visual Studio 2015 (Update 3) @@ -282,33 +292,36 @@ The first step is to get a recent GCC toolchain installed. The most common distribution on which users have struggled with the version requirements is Ubuntu Precise, 12.04 LTS. For this distribution, one easy option is to install the `toolchain testing PPA`_ and use it to install a modern GCC. There is -a really nice discussions of this on the `ask ubuntu stack exchange`_. However, -not all users can use PPAs and there are many other distributions, so it may be -necessary (or just useful, if you're here you *are* doing compiler development -after all) to build and install GCC from source. It is also quite easy to do -these days. +a really nice discussions of this on the `ask ubuntu stack exchange`_ and a +`github gist`_ with updated commands. However, not all users can use PPAs and +there are many other distributions, so it may be necessary (or just useful, if +you're here you *are* doing compiler development after all) to build and install +GCC from source. It is also quite easy to do these days. .. _toolchain testing PPA: https://launchpad.net/~ubuntu-toolchain-r/+archive/test .. _ask ubuntu stack exchange: - http://askubuntu.com/questions/271388/how-to-install-gcc-4-8-in-ubuntu-12-04-from-the-terminal + https://askubuntu.com/questions/466651/how-do-i-use-the-latest-gcc-on-ubuntu/581497#58149 +.. _github gist: + https://gist.github.com/application2000/73fd6f4bf1be6600a2cf9f56315a2d91 -Easy steps for installing GCC 4.8.2: +Easy steps for installing GCC 5.1.0: .. code-block:: console - % wget https://ftp.gnu.org/gnu/gcc/gcc-4.8.2/gcc-4.8.2.tar.bz2 - % wget https://ftp.gnu.org/gnu/gcc/gcc-4.8.2/gcc-4.8.2.tar.bz2.sig + % gcc_version=5.1.0 + % wget https://ftp.gnu.org/gnu/gcc/gcc-${gcc_version}/gcc-${gcc_version}.tar.bz2 + % wget https://ftp.gnu.org/gnu/gcc/gcc-${gcc_version}/gcc-${gcc_version}.tar.bz2.sig % wget https://ftp.gnu.org/gnu/gnu-keyring.gpg - % signature_invalid=`gpg --verify --no-default-keyring --keyring ./gnu-keyring.gpg gcc-4.8.2.tar.bz2.sig` + % signature_invalid=`gpg --verify --no-default-keyring --keyring ./gnu-keyring.gpg gcc-${gcc_version}.tar.bz2.sig` % if [ $signature_invalid ]; then echo "Invalid signature" ; exit 1 ; fi - % tar -xvjf gcc-4.8.2.tar.bz2 - % cd gcc-4.8.2 + % tar -xvjf gcc-${gcc_version}.tar.bz2 + % cd gcc-${gcc_version} % ./contrib/download_prerequisites % cd .. - % mkdir gcc-4.8.2-build - % cd gcc-4.8.2-build - % $PWD/../gcc-4.8.2/configure --prefix=$HOME/toolchains --enable-languages=c,c++ + % mkdir gcc-${gcc_version}-build + % cd gcc-${gcc_version}-build + % $PWD/../gcc-${gcc_version}/configure --prefix=$HOME/toolchains --enable-languages=c,c++ % make -j$(nproc) % make install @@ -316,7 +329,7 @@ For more details, check out the excellent `GCC wiki entry`_, where I got most of this information from. .. _GCC wiki entry: - http://gcc.gnu.org/wiki/InstallingGCC + https://gcc.gnu.org/wiki/InstallingGCC Once you have a GCC toolchain, configure your build of LLVM to use the new toolchain for your host compiler and C++ standard library. Because the new @@ -336,7 +349,7 @@ If you fail to set rpath, most LLVM binaries will fail on startup with a message from the loader similar to ``libstdc++.so.6: version `GLIBCXX_3.4.20' not found``. This means you need to tweak the -rpath linker flag. -When you build Clang, you will need to give *it* access to modern C++11 +When you build Clang, you will need to give *it* access to modern C++ standard library in order to use it as your new host in part of a bootstrap. There are two easy ways to do this, either build (and install) libc++ along with Clang and then use it with the ``-stdlib=libc++`` compile and link flag, From 22558d3318659d5f7f33b1397abb8f238573c387 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Fri, 8 Feb 2019 14:17:53 +0000 Subject: [PATCH 107/125] Merging r353463: ------------------------------------------------------------------------ r353463 | smeenai | 2019-02-07 21:58:04 +0100 (Thu, 07 Feb 2019) | 4 lines [cmake] Pass LLVM_TEMPORARILY_ALLOW_OLD_TOOLCHAIN to NATIVE configure We should propagate this down to host builds so that e.g. people using an optimized tablegen can do the sub-configure successfully. ------------------------------------------------------------------------ llvm-svn: 353517 --- llvm/cmake/modules/CrossCompile.cmake | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/cmake/modules/CrossCompile.cmake b/llvm/cmake/modules/CrossCompile.cmake index b239816c825392..bc3b210f01859d 100644 --- a/llvm/cmake/modules/CrossCompile.cmake +++ b/llvm/cmake/modules/CrossCompile.cmake @@ -52,6 +52,7 @@ function(llvm_create_cross_target_internal target_name toolchain buildtype) -DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD="${experimental_targets_to_build_arg}" -DLLVM_DEFAULT_TARGET_TRIPLE="${TARGET_TRIPLE}" -DLLVM_TARGET_ARCH="${LLVM_TARGET_ARCH}" + -DLLVM_TEMPORARILY_ALLOW_OLD_TOOLCHAIN="${LLVM_TEMPORARILY_ALLOW_OLD_TOOLCHAIN}" ${build_type_flags} ${linker_flag} ${external_clang_dir} WORKING_DIRECTORY ${LLVM_${target_name}_BUILD} DEPENDS CREATE_LLVM_${target_name} From 15f159c37f077fcff868a3d805608a671f7f9d02 Mon Sep 17 00:00:00 2001 From: Rui Ueyama Date: Fri, 8 Feb 2019 22:03:46 +0000 Subject: [PATCH 108/125] - Update ReleaseNotes for lld 8.0.0. llvm-svn: 353574 --- lld/docs/ReleaseNotes.rst | 38 +++++++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst index 9849111e0930b3..2b2aa1e75b39f8 100644 --- a/lld/docs/ReleaseNotes.rst +++ b/lld/docs/ReleaseNotes.rst @@ -13,10 +13,12 @@ lld 8.0.0 Release Notes Introduction ============ -This document contains the release notes for the lld linker, release 8.0.0. -Here we describe the status of lld, including major improvements -from the previous release. All lld releases may be downloaded -from the `LLVM releases web site `_. +lld is a high-performance linker that supports ELF (Unix), COFF (Windows), +Mach-O (macOS), MinGW and WebAssembly. lld is command-line-compatible with +GNU linkers and Microsoft link.exe and is significantly faster than the +system default linkers. + +nlld 8.0.0 has lots of feature improvements and bug fixes. Non-comprehensive list of changes in this release ================================================= @@ -33,16 +35,35 @@ ELF Improvements non-superpages to a superpage if they are aligned to the superpage size. (`r342746 `_) +* lld now attempts to place a ``.note`` segment in the first page of a + generated file, so that you can find some important information + (``.note.gnu.build-id`` in particular) in a core file even if a core + file is truncated by ulimit. + (`r349524 `_) + +* lld now reports an error if ``_GLOBAL_OFFSET_TABLE_`` symbol is + defined by an input object file, as the symbol is supposed to be + synthesized by the linker. + (`r347854 `_) + * lld/Hexagon can now link Linux kernel and musl libc for Qualcomm Hexagon ISA. * Initial MSP430 ISA support has landed. -* The following flags have been added: ``-z interpose``, ``-z global`` - * lld now uses the ``sigrie`` instruction as a trap instruction for MIPS targets. +* lld now creates a TLS segment for AArch64 with a slightly larger + alignment requirement, so that the loader makes a few bytes room + before each TLS segment at runtime. The aim of this change is to + make room to accomodate nonstandard Android TLS slots while keeping + the compatibility with the standard AArch64 ABI. + (`r350681 `_) + +* The following flags have been added: ``-z interpose``, ``-z global``, ``-z + nodefaultlib`` + COFF Improvements ----------------- @@ -95,11 +116,6 @@ MinGW Improvements Previously, the ``--build-id`` option did not actually generate a build id unless ``--pdb`` was specified. -MachO Improvements ------------------- - -* Item 1. - WebAssembly Improvements ------------------------ From 62ead032aaa88d89db56294449c54d73c80d3204 Mon Sep 17 00:00:00 2001 From: Rui Ueyama Date: Mon, 11 Feb 2019 21:26:23 +0000 Subject: [PATCH 109/125] Minor update to lld/ReleaseNotes. llvm-svn: 353749 --- lld/docs/ReleaseNotes.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst index 2b2aa1e75b39f8..0bebfb3fb1cec8 100644 --- a/lld/docs/ReleaseNotes.rst +++ b/lld/docs/ReleaseNotes.rst @@ -61,8 +61,9 @@ ELF Improvements the compatibility with the standard AArch64 ABI. (`r350681 `_) -* The following flags have been added: ``-z interpose``, ``-z global``, ``-z - nodefaultlib`` +* The following flags have been added: ``--call-graph-profile``, + ``--no-call-graph-profile``, ``--warn-ifunc-textrel``, + ``-z interpose``, ``-z global``, ``-z nodefaultlib`` COFF Improvements ----------------- From e055d7ffe123c04bcd2fcc9d31c6ff2e3ce7a08a Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Tue, 12 Feb 2019 08:35:38 +0000 Subject: [PATCH 110/125] Merging r353495: ------------------------------------------------------------------------ r353495 | jfb | 2019-02-08 02:29:17 +0100 (Fri, 08 Feb 2019) | 32 lines Variable auto-init: fix __block initialization Summary: Automatic initialization [1] of __block variables was trampling over the block's headers after they'd been initialized, which caused self-init usage to crash, such as here: typedef struct XYZ { void (^block)(); } *xyz_t; __attribute__((noinline)) xyz_t create(void (^block)()) { xyz_t myself = malloc(sizeof(struct XYZ)); myself->block = block; return myself; } int main() { __block xyz_t captured = create(^(){ (void)captured; }); } This type of code shouldn't be broken by variable auto-init, even if it's sketchy. [1] With -ftrivial-auto-var-init=pattern Reviewers: rjmccall, pcc, kcc Subscribers: jkorous, dexonsmith, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D57797 ------------------------------------------------------------------------ llvm-svn: 353807 --- clang/lib/CodeGen/CGDecl.cpp | 10 ++++--- .../test/CodeGenCXX/trivial-auto-var-init.cpp | 26 +++++++++++++++++++ 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp index 5959d889b45516..b98657ffd8006c 100644 --- a/clang/lib/CodeGen/CGDecl.cpp +++ b/clang/lib/CodeGen/CGDecl.cpp @@ -1631,11 +1631,15 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) { ? LangOptions::TrivialAutoVarInitKind::Uninitialized : getContext().getLangOpts().getTrivialAutoVarInit())); - auto initializeWhatIsTechnicallyUninitialized = [&]() { + auto initializeWhatIsTechnicallyUninitialized = [&](Address Loc) { if (trivialAutoVarInit == LangOptions::TrivialAutoVarInitKind::Uninitialized) return; + // Only initialize a __block's storage: we always initialize the header. + if (emission.IsEscapingByRef) + Loc = emitBlockByrefAddress(Loc, &D, /*follow=*/false); + CharUnits Size = getContext().getTypeSizeInChars(type); if (!Size.isZero()) { switch (trivialAutoVarInit) { @@ -1713,7 +1717,7 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) { }; if (isTrivialInitializer(Init)) { - initializeWhatIsTechnicallyUninitialized(); + initializeWhatIsTechnicallyUninitialized(Loc); return; } @@ -1727,7 +1731,7 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) { } if (!constant) { - initializeWhatIsTechnicallyUninitialized(); + initializeWhatIsTechnicallyUninitialized(Loc); LValue lv = MakeAddrLValue(Loc, type); lv.setNonGC(true); return EmitExprAsInit(Init, &D, lv, capturedByInit); diff --git a/clang/test/CodeGenCXX/trivial-auto-var-init.cpp b/clang/test/CodeGenCXX/trivial-auto-var-init.cpp index b795c0755bd41d..37ff770abf57eb 100644 --- a/clang/test/CodeGenCXX/trivial-auto-var-init.cpp +++ b/clang/test/CodeGenCXX/trivial-auto-var-init.cpp @@ -30,6 +30,32 @@ void test_block() { used(block); } +// Using the variable being initialized is typically UB in C, but for blocks we +// can be nice: they imply extra book-keeping and we can do the auto-init before +// any of said book-keeping. +// +// UNINIT-LABEL: test_block_self_init( +// ZERO-LABEL: test_block_self_init( +// ZERO: %block = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>, align 8 +// ZERO: %captured1 = getelementptr inbounds %struct.__block_byref_captured, %struct.__block_byref_captured* %captured, i32 0, i32 4 +// ZERO-NEXT: store %struct.XYZ* null, %struct.XYZ** %captured1, align 8 +// ZERO: %call = call %struct.XYZ* @create( +// PATTERN-LABEL: test_block_self_init( +// PATTERN: %block = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>, align 8 +// PATTERN: %captured1 = getelementptr inbounds %struct.__block_byref_captured, %struct.__block_byref_captured* %captured, i32 0, i32 4 +// PATTERN-NEXT: store %struct.XYZ* inttoptr (i64 -6148914691236517206 to %struct.XYZ*), %struct.XYZ** %captured1, align 8 +// PATTERN: %call = call %struct.XYZ* @create( +void test_block_self_init() { + using Block = void (^)(); + typedef struct XYZ { + Block block; + } * xyz_t; + extern xyz_t create(Block block); + __block xyz_t captured = create(^() { + (void)captured; + }); +} + // This type of code is currently not handled by zero / pattern initialization. // The test will break when that is fixed. // UNINIT-LABEL: test_goto_unreachable_value( From 0cbe0b9fa2d5ccf4e3a05d6fc31fcb8c5123c1aa Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Tue, 12 Feb 2019 08:45:37 +0000 Subject: [PATCH 111/125] ReleaseNotes about the toolchain version cmake check Based on text from JF! llvm-svn: 353808 --- llvm/docs/ReleaseNotes.rst | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index 561faa71fd5dbf..9e2ee95c651944 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -40,6 +40,22 @@ Non-comprehensive list of changes in this release functionality, or simply have a lot to talk about), see the `NOTE` below for adding a new subsection. +* As `discussed on the mailing list + `_, + building LLVM will soon require more recent toolchains as follows: + + ============= ==== + Clang 3.5 + Apple Clang 6.0 + GCC 5.1 + Visual Studio 2017 + ============= ==== + + A new CMake check when configuring LLVM provides a soft-error if your + toolchain will become unsupported soon. You can opt out of the soft-error by + setting the ``LLVM_TEMPORARILY_ALLOW_OLD_TOOLCHAIN`` CMake variable to + ``ON``. + * The **llvm-cov** tool can now export lcov trace files using the `-format=lcov` option of the `export` command. From 75ff7d07215a502f909ff305e04d514f1752a1d7 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Tue, 12 Feb 2019 09:11:50 +0000 Subject: [PATCH 112/125] Merging r353551 and r353809: Also removed the text about Clang 9. ------------------------------------------------------------------------ r353551 | metzman | 2019-02-08 20:35:04 +0100 (Fri, 08 Feb 2019) | 13 lines Document libFuzzer on Windows. Summary: Document that libFuzzer supports Windows, how to get it, and its limitations. Reviewers: kcc, morehouse, rnk, metzman Reviewed By: kcc, rnk, metzman Subscribers: hans, rnk Differential Revision: https://reviews.llvm.org/D57597 ------------------------------------------------------------------------ ------------------------------------------------------------------------ r353809 | hans | 2019-02-12 10:08:52 +0100 (Tue, 12 Feb 2019) | 1 line LibFuzzer.rst: double backticks ------------------------------------------------------------------------ llvm-svn: 353811 --- llvm/docs/LibFuzzer.rst | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/llvm/docs/LibFuzzer.rst b/llvm/docs/LibFuzzer.rst index 0737fbbcd93050..37b0833599a945 100644 --- a/llvm/docs/LibFuzzer.rst +++ b/llvm/docs/LibFuzzer.rst @@ -645,10 +645,20 @@ coverage set of the process (since the fuzzer is in-process). In other words, by using more external dependencies we will slow down the fuzzer while the main reason for it to exist is extreme speed. -Q. What about Windows then? The fuzzer contains code that does not build on Windows. +Q. Does libFuzzer Support Windows? ------------------------------------------------------------------------------------ -Volunteers are welcome. +Yes, libFuzzer now supports Windows. Initial support was added in r341082. +You can download a build of Clang for Windows +that has libFuzzer from +`LLVM Snapshot Builds `_. + +Using libFuzzer on Windows without ASAN is unsupported. Building fuzzers with the +``/MD`` (dynamic runtime library) compile option is unsupported. Support for these +may be added in the future. Linking fuzzers with the ``/INCREMENTAL`` link option +(or the ``/DEBUG`` option which implies it) is also unsupported. + +Send any questions or comments to the mailing list: libfuzzer(#)googlegroups.com Q. When libFuzzer is not a good solution for a problem? --------------------------------------------------------- From 018cd5fdf09cf9ff67754497659b9a3cb2bddcd5 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Tue, 12 Feb 2019 10:13:48 +0000 Subject: [PATCH 113/125] Merging r352607 r352608 r353015 r353061 r353138 r353141 r353334 r353489 And re-generate expectations for a test: $ utils/update_llc_test_checks.py --llc-binary ../build.release/bin/llc test/CodeGen/X86/x87-schedule.ll Will also merge cfe r353142 for a clang-side test. This merge was requested in PR40667. ------------------------------------------------------------------------ r352607 | ctopper | 2019-01-30 08:08:44 +0100 (Wed, 30 Jan 2019) | 1 line [X86] Add FPSW as a Def on some FP instructions that were missing it. ------------------------------------------------------------------------ ------------------------------------------------------------------------ r352608 | ctopper | 2019-01-30 08:33:24 +0100 (Wed, 30 Jan 2019) | 5 lines [X86] Remove a couple places where we unnecessarily pass 0 to the EmitPriority of some FP instruction aliases. NFC As far as I can tell we already won't emit these aliases due to an operand count check in the tablegen code. Removing these because I couldn't make sense of the inconsistency between fadd and fmul from reading the code. I checked the AsmMatcher and AsmWriter files before and after this change and there were no differences. ------------------------------------------------------------------------ ------------------------------------------------------------------------ r353015 | ctopper | 2019-02-04 05:15:10 +0100 (Mon, 04 Feb 2019) | 3 lines [X86] Print %st(0) as %st when its implicit to the instruction. Continue printing it as %st(0) when its encoded in the instruction. This is a step back from the change I made in r352985. This appears to be more consistent with gcc and objdump behavior. ------------------------------------------------------------------------ ------------------------------------------------------------------------ r353061 | ctopper | 2019-02-04 18:28:18 +0100 (Mon, 04 Feb 2019) | 5 lines [X86] Print all register forms of x87 fadd/fsub/fdiv/fmul as having two arguments where on is %st. All of these instructions consume one encoded register and the other register is %st. They either write the result to %st or the encoded register. Previously we printed both arguments when the encoded register was written. And we printed one argument when the result was written to %st. For the stack popping forms the encoded register is always the destination and we didn't print both operands. This was inconsistent with gcc and objdump and just makes the output assembly code harder to read. This patch changes things to always print both operands making us consistent with gcc and objdump. The parser should still be able to handle the single register forms just as it did before. This also matches the GNU assembler behavior. ------------------------------------------------------------------------ ------------------------------------------------------------------------ r353138 | ctopper | 2019-02-05 05:48:23 +0100 (Tue, 05 Feb 2019) | 1 line [X86] Add test case from PR40529. NFC ------------------------------------------------------------------------ ------------------------------------------------------------------------ r353141 | ctopper | 2019-02-05 07:13:06 +0100 (Tue, 05 Feb 2019) | 16 lines [X86] Connect the default fpsr and dirflag clobbers in inline assembly to the registers we have defined for them. Summary: We don't currently map these constraints to physical register numbers so they don't make it to the MachineIR representation of inline assembly. This could have problems for proper dependency tracking in the machine schedulers though I don't have a test case that shows that. Reviewers: rnk Reviewed By: rnk Subscribers: eraman, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D57641 ------------------------------------------------------------------------ ------------------------------------------------------------------------ r353334 | ctopper | 2019-02-06 20:50:59 +0100 (Wed, 06 Feb 2019) | 1 line [X86] Change the CPU on the test case for pr40529.ll to really show the bug. NFC ------------------------------------------------------------------------ ------------------------------------------------------------------------ r353489 | ctopper | 2019-02-08 01:44:39 +0100 (Fri, 08 Feb 2019) | 14 lines [X86] Add FPCW as a register and start using it as an implicit use on floating point instructions. Summary: FPCW contains the rounding mode control which we manipulate to implement fp to integer conversion by changing the roudning mode, storing the value to the stack, and then changing the rounding mode back. Because we didn't model FPCW and its dependency chain, other instructions could be scheduled into the middle of the sequence. This patch introduces the register and adds it as an implciit def of FLDCW and implicit use of the FP binary arithmetic instructions and store instructions. There are more instructions that need to be updated, but this is a good start. I believe this fixes at least the reduced test case from PR40529. Reviewers: RKSimon, spatel, rnk, efriedma, andrew.w.kaylor Subscribers: dim, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D57735 ------------------------------------------------------------------------ llvm-svn: 353818 --- .../lib/Target/X86/AsmParser/X86AsmParser.cpp | 3 +- .../X86/InstPrinter/X86ATTInstPrinter.cpp | 11 + .../X86/InstPrinter/X86ATTInstPrinter.h | 1 + .../X86/InstPrinter/X86IntelInstPrinter.cpp | 11 + .../X86/InstPrinter/X86IntelInstPrinter.h | 1 + llvm/lib/Target/X86/X86ISelLowering.cpp | 8 + llvm/lib/Target/X86/X86InstrFPStack.td | 156 ++-- llvm/lib/Target/X86/X86InstrInfo.td | 46 +- llvm/lib/Target/X86/X86RegisterInfo.cpp | 3 + llvm/lib/Target/X86/X86RegisterInfo.td | 10 +- llvm/test/CodeGen/MIR/X86/memory-operands.mir | 4 +- llvm/test/CodeGen/X86/and-su.ll | 2 +- .../test/CodeGen/X86/avx512-regcall-NoMask.ll | 18 +- llvm/test/CodeGen/X86/fcmove.ll | 2 +- llvm/test/CodeGen/X86/fmf-flags.ll | 4 +- llvm/test/CodeGen/X86/fp-cvt.ll | 8 +- .../X86/inline-asm-default-clobbers.ll | 8 + llvm/test/CodeGen/X86/inline-asm-fpstack.ll | 38 +- llvm/test/CodeGen/X86/ipra-reg-usage.ll | 2 +- llvm/test/CodeGen/X86/pr13577.ll | 2 +- llvm/test/CodeGen/X86/pr33349.ll | 16 +- llvm/test/CodeGen/X86/pr34080.ll | 18 +- llvm/test/CodeGen/X86/pr34177.ll | 16 +- llvm/test/CodeGen/X86/pr40529.ll | 43 + llvm/test/CodeGen/X86/scalar-fp-to-i64.ll | 24 +- llvm/test/CodeGen/X86/select.ll | 24 +- llvm/test/CodeGen/X86/sincos-opt.ll | 4 +- llvm/test/CodeGen/X86/x87-schedule.ll | 792 +++++++++--------- llvm/test/MC/Disassembler/X86/fp-stack.txt | 416 ++++----- llvm/test/MC/Disassembler/X86/x86-16.txt | 4 +- llvm/test/MC/X86/PPRO-32.s | 32 +- llvm/test/MC/X86/PPRO-64.s | 32 +- llvm/test/MC/X86/X87-32.s | 48 +- llvm/test/MC/X86/X87-64.s | 46 +- llvm/test/MC/X86/intel-syntax-2.s | 12 +- llvm/test/MC/X86/intel-syntax.s | 96 +-- llvm/test/MC/X86/x86-16.s | 4 +- llvm/test/MC/X86/x86-32-coverage.s | 38 +- llvm/test/MC/X86/x86-32.s | 2 +- llvm/test/MC/X86/x86-64.s | 172 ++-- .../tools/llvm-mca/X86/Atom/resources-x87.s | 172 ++-- .../tools/llvm-mca/X86/BdVer2/resources-x87.s | 172 ++-- .../llvm-mca/X86/Broadwell/resources-x87.s | 172 ++-- .../tools/llvm-mca/X86/BtVer2/resources-x87.s | 172 ++-- .../llvm-mca/X86/Generic/resources-x87.s | 172 ++-- .../llvm-mca/X86/Haswell/resources-x87.s | 172 ++-- .../tools/llvm-mca/X86/SLM/resources-x87.s | 172 ++-- .../llvm-mca/X86/SandyBridge/resources-x87.s | 172 ++-- .../X86/SkylakeClient/resources-x87.s | 172 ++-- .../X86/SkylakeServer/resources-x87.s | 172 ++-- .../tools/llvm-mca/X86/Znver1/resources-x87.s | 172 ++-- llvm/utils/TableGen/X86RecognizableInstr.cpp | 2 + 52 files changed, 2082 insertions(+), 1989 deletions(-) create mode 100644 llvm/test/CodeGen/X86/inline-asm-default-clobbers.ll create mode 100644 llvm/test/CodeGen/X86/pr40529.ll diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp index 899b50d0f78f3e..81391b96d12676 100644 --- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -1115,8 +1115,7 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo, } // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens. - if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) { - RegNo = X86::ST0; + if (RegNo == X86::ST0) { Parser.Lex(); // Eat 'st' // Check to see if we have '(4)' after %st. diff --git a/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp index 0e861d5ddbc9d9..3a074818c762b6 100644 --- a/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp +++ b/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp @@ -200,3 +200,14 @@ void X86ATTInstPrinter::printU8Imm(const MCInst *MI, unsigned Op, O << markup("getOperand(Op).getImm() & 0xff) << markup(">"); } + +void X86ATTInstPrinter::printSTiRegOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &OS) { + const MCOperand &Op = MI->getOperand(OpNo); + unsigned Reg = Op.getReg(); + // Override the default printing to print st(0) instead st. + if (Reg == X86::ST0) + OS << markup(""); + else + printRegName(OS, Reg); +} diff --git a/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h index 57422bc9a0b2a3..584dc9c286e60e 100644 --- a/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h +++ b/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h @@ -44,6 +44,7 @@ class X86ATTInstPrinter final : public X86InstPrinterCommon { void printSrcIdx(const MCInst *MI, unsigned Op, raw_ostream &O); void printDstIdx(const MCInst *MI, unsigned Op, raw_ostream &O); void printU8Imm(const MCInst *MI, unsigned Op, raw_ostream &OS); + void printSTiRegOperand(const MCInst *MI, unsigned OpNo, raw_ostream &OS); void printanymem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { printMemReference(MI, OpNo, O); diff --git a/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp index 044b715641520e..b31f8ab80838d9 100644 --- a/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp +++ b/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp @@ -160,3 +160,14 @@ void X86IntelInstPrinter::printU8Imm(const MCInst *MI, unsigned Op, O << formatImm(MI->getOperand(Op).getImm() & 0xff); } + +void X86IntelInstPrinter::printSTiRegOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &OS) { + const MCOperand &Op = MI->getOperand(OpNo); + unsigned Reg = Op.getReg(); + // Override the default printing to print st(0) instead st. + if (Reg == X86::ST0) + OS << "st(0)"; + else + printRegName(OS, Reg); +} diff --git a/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h index 3b34a8052becba..fe52bd482a262d 100644 --- a/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h +++ b/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h @@ -39,6 +39,7 @@ class X86IntelInstPrinter final : public X86InstPrinterCommon { void printSrcIdx(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printDstIdx(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printU8Imm(const MCInst *MI, unsigned Op, raw_ostream &O); + void printSTiRegOperand(const MCInst *MI, unsigned OpNo, raw_ostream &OS); void printanymem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { printMemReference(MI, OpNo, O); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 3637562c8ec35e..f4f37a894620ed 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -42507,6 +42507,14 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, if (StringRef("{flags}").equals_lower(Constraint)) return std::make_pair(X86::EFLAGS, &X86::CCRRegClass); + // dirflag -> DF + if (StringRef("{dirflag}").equals_lower(Constraint)) + return std::make_pair(X86::DF, &X86::DFCCRRegClass); + + // fpsr -> FPSW + if (StringRef("{fpsr}").equals_lower(Constraint)) + return std::make_pair(X86::FPSW, &X86::FPCCRRegClass); + // 'A' means [ER]AX + [ER]DX. if (Constraint == "A") { if (Subtarget.is64Bit()) diff --git a/llvm/lib/Target/X86/X86InstrFPStack.td b/llvm/lib/Target/X86/X86InstrFPStack.td index 5912a31996131b..8e12efff77eab8 100644 --- a/llvm/lib/Target/X86/X86InstrFPStack.td +++ b/llvm/lib/Target/X86/X86InstrFPStack.td @@ -230,7 +230,7 @@ def _FI32m : FPI<0xDA, fp, (outs), (ins i32mem:$src), } // mayLoad = 1, hasSideEffects = 1 } -let Defs = [FPSW] in { +let Defs = [FPSW], Uses = [FPCW] in { // FPBinary_rr just defines pseudo-instructions, no need to set a scheduling // resources. let hasNoSchedulingInfo = 1 in { @@ -258,42 +258,42 @@ defm DIVR: FPBinary; } // Defs = [FPSW] class FPST0rInst - : FPI<0xD8, fp, (outs), (ins RST:$op), asm>; + : FPI<0xD8, fp, (outs), (ins RSTi:$op), asm>; class FPrST0Inst - : FPI<0xDC, fp, (outs), (ins RST:$op), asm>; + : FPI<0xDC, fp, (outs), (ins RSTi:$op), asm>; class FPrST0PInst - : FPI<0xDE, fp, (outs), (ins RST:$op), asm>; + : FPI<0xDE, fp, (outs), (ins RSTi:$op), asm>; // NOTE: GAS and apparently all other AT&T style assemblers have a broken notion // of some of the 'reverse' forms of the fsub and fdiv instructions. As such, // we have to put some 'r's in and take them out of weird places. -let SchedRW = [WriteFAdd] in { -def ADD_FST0r : FPST0rInst ; -def ADD_FrST0 : FPrST0Inst ; -def ADD_FPrST0 : FPrST0PInst; -def SUBR_FST0r : FPST0rInst ; -def SUB_FrST0 : FPrST0Inst ; -def SUB_FPrST0 : FPrST0PInst; -def SUB_FST0r : FPST0rInst ; -def SUBR_FrST0 : FPrST0Inst ; -def SUBR_FPrST0 : FPrST0PInst; +let SchedRW = [WriteFAdd], Defs = [FPSW], Uses = [FPCW] in { +def ADD_FST0r : FPST0rInst ; +def ADD_FrST0 : FPrST0Inst ; +def ADD_FPrST0 : FPrST0PInst; +def SUBR_FST0r : FPST0rInst ; +def SUB_FrST0 : FPrST0Inst ; +def SUB_FPrST0 : FPrST0PInst; +def SUB_FST0r : FPST0rInst ; +def SUBR_FrST0 : FPrST0Inst ; +def SUBR_FPrST0 : FPrST0PInst; } // SchedRW -let SchedRW = [WriteFCom] in { +let SchedRW = [WriteFCom], Defs = [FPSW], Uses = [FPCW] in { def COM_FST0r : FPST0rInst ; def COMP_FST0r : FPST0rInst ; } // SchedRW -let SchedRW = [WriteFMul] in { -def MUL_FST0r : FPST0rInst ; -def MUL_FrST0 : FPrST0Inst ; -def MUL_FPrST0 : FPrST0PInst; +let SchedRW = [WriteFMul], Defs = [FPSW], Uses = [FPCW] in { +def MUL_FST0r : FPST0rInst ; +def MUL_FrST0 : FPrST0Inst ; +def MUL_FPrST0 : FPrST0PInst; } // SchedRW -let SchedRW = [WriteFDiv] in { -def DIVR_FST0r : FPST0rInst ; -def DIV_FrST0 : FPrST0Inst ; -def DIV_FPrST0 : FPrST0PInst; -def DIV_FST0r : FPST0rInst ; -def DIVR_FrST0 : FPrST0Inst ; -def DIVR_FPrST0 : FPrST0PInst; +let SchedRW = [WriteFDiv], Defs = [FPSW], Uses = [FPCW] in { +def DIVR_FST0r : FPST0rInst ; +def DIV_FrST0 : FPrST0Inst ; +def DIV_FPrST0 : FPrST0PInst; +def DIV_FST0r : FPST0rInst ; +def DIVR_FrST0 : FPrST0Inst ; +def DIVR_FPrST0 : FPrST0PInst; } // SchedRW // Unary operations. @@ -307,7 +307,7 @@ def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src), OneArgFPRW, def _F : FPI<0xD9, fp, (outs), (ins), asmstring>; } -let Defs = [FPSW] in { +let Defs = [FPSW], Uses = [FPCW] in { let SchedRW = [WriteFSign] in { defm CHS : FPUnary; @@ -335,7 +335,7 @@ def TST_F : FPI<0xD9, MRM_E4, (outs), (ins), "ftst">; // Versions of FP instructions that take a single memory operand. Added for the // disassembler; remove as they are included with patterns elsewhere. -let SchedRW = [WriteFComLd] in { +let SchedRW = [WriteFComLd], Defs = [FPSW], Uses = [FPCW] in { def FCOM32m : FPI<0xD8, MRM2m, (outs), (ins f32mem:$src), "fcom{s}\t$src">; def FCOMP32m : FPI<0xD8, MRM3m, (outs), (ins f32mem:$src), "fcomp{s}\t$src">; @@ -398,22 +398,22 @@ defm CMOVNP : FPCMov; let Predicates = [HasCMov] in { // These are not factored because there's no clean way to pass DA/DB. -def CMOVB_F : FPI<0xDA, MRM0r, (outs), (ins RST:$op), - "fcmovb\t{$op, %st(0)|st(0), $op}">; -def CMOVBE_F : FPI<0xDA, MRM2r, (outs), (ins RST:$op), - "fcmovbe\t{$op, %st(0)|st(0), $op}">; -def CMOVE_F : FPI<0xDA, MRM1r, (outs), (ins RST:$op), - "fcmove\t{$op, %st(0)|st(0), $op}">; -def CMOVP_F : FPI<0xDA, MRM3r, (outs), (ins RST:$op), - "fcmovu\t{$op, %st(0)|st(0), $op}">; -def CMOVNB_F : FPI<0xDB, MRM0r, (outs), (ins RST:$op), - "fcmovnb\t{$op, %st(0)|st(0), $op}">; -def CMOVNBE_F: FPI<0xDB, MRM2r, (outs), (ins RST:$op), - "fcmovnbe\t{$op, %st(0)|st(0), $op}">; -def CMOVNE_F : FPI<0xDB, MRM1r, (outs), (ins RST:$op), - "fcmovne\t{$op, %st(0)|st(0), $op}">; -def CMOVNP_F : FPI<0xDB, MRM3r, (outs), (ins RST:$op), - "fcmovnu\t{$op, %st(0)|st(0), $op}">; +def CMOVB_F : FPI<0xDA, MRM0r, (outs), (ins RSTi:$op), + "fcmovb\t{$op, %st|st, $op}">; +def CMOVBE_F : FPI<0xDA, MRM2r, (outs), (ins RSTi:$op), + "fcmovbe\t{$op, %st|st, $op}">; +def CMOVE_F : FPI<0xDA, MRM1r, (outs), (ins RSTi:$op), + "fcmove\t{$op, %st|st, $op}">; +def CMOVP_F : FPI<0xDA, MRM3r, (outs), (ins RSTi:$op), + "fcmovu\t{$op, %st|st, $op}">; +def CMOVNB_F : FPI<0xDB, MRM0r, (outs), (ins RSTi:$op), + "fcmovnb\t{$op, %st|st, $op}">; +def CMOVNBE_F: FPI<0xDB, MRM2r, (outs), (ins RSTi:$op), + "fcmovnbe\t{$op, %st|st, $op}">; +def CMOVNE_F : FPI<0xDB, MRM1r, (outs), (ins RSTi:$op), + "fcmovne\t{$op, %st|st, $op}">; +def CMOVNP_F : FPI<0xDB, MRM3r, (outs), (ins RSTi:$op), + "fcmovnu\t{$op, %st|st, $op}">; } // Predicates = [HasCMov] } // SchedRW @@ -454,7 +454,7 @@ def ILD_Fp64m80: FpI_<(outs RFP80:$dst), (ins i64mem:$src), ZeroArgFP, [(set RFP80:$dst, (X86fild addr:$src, i64))]>; } // SchedRW -let SchedRW = [WriteStore] in { +let SchedRW = [WriteStore], Uses = [FPCW] in { def ST_Fp32m : FpIf32<(outs), (ins f32mem:$op, RFP32:$src), OneArgFP, [(store RFP32:$src, addr:$op)]>; def ST_Fp64m32 : FpIf64<(outs), (ins f32mem:$op, RFP64:$src), OneArgFP, @@ -489,7 +489,7 @@ def IST_Fp16m80 : FpI_<(outs), (ins i16mem:$op, RFP80:$src), OneArgFP, []>; def IST_Fp32m80 : FpI_<(outs), (ins i32mem:$op, RFP80:$src), OneArgFP, []>; def IST_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP, []>; } // mayStore -} // SchedRW +} // SchedRW, Uses = [FPCW] let mayLoad = 1, SchedRW = [WriteLoad] in { def LD_F32m : FPI<0xD9, MRM0m, (outs), (ins f32mem:$src), "fld{s}\t$src">; @@ -499,7 +499,7 @@ def ILD_F16m : FPI<0xDF, MRM0m, (outs), (ins i16mem:$src), "fild{s}\t$src">; def ILD_F32m : FPI<0xDB, MRM0m, (outs), (ins i32mem:$src), "fild{l}\t$src">; def ILD_F64m : FPI<0xDF, MRM5m, (outs), (ins i64mem:$src), "fild{ll}\t$src">; } -let mayStore = 1, SchedRW = [WriteStore] in { +let mayStore = 1, SchedRW = [WriteStore], Uses = [FPCW] in { def ST_F32m : FPI<0xD9, MRM2m, (outs), (ins f32mem:$dst), "fst{s}\t$dst">; def ST_F64m : FPI<0xDD, MRM2m, (outs), (ins f64mem:$dst), "fst{l}\t$dst">; def ST_FP32m : FPI<0xD9, MRM3m, (outs), (ins f32mem:$dst), "fstp{s}\t$dst">; @@ -513,7 +513,7 @@ def IST_FP64m : FPI<0xDF, MRM7m, (outs), (ins i64mem:$dst), "fistp{ll}\t$dst">; } // FISTTP requires SSE3 even though it's a FPStack op. -let Predicates = [HasSSE3], SchedRW = [WriteStore] in { +let Predicates = [HasSSE3], SchedRW = [WriteStore], Uses = [FPCW] in { def ISTT_Fp16m32 : FpI_<(outs), (ins i16mem:$op, RFP32:$src), OneArgFP, [(X86fp_to_i16mem RFP32:$src, addr:$op)]>; def ISTT_Fp32m32 : FpI_<(outs), (ins i32mem:$op, RFP32:$src), OneArgFP, @@ -534,7 +534,7 @@ def ISTT_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP, [(X86fp_to_i64mem RFP80:$src, addr:$op)]>; } // Predicates = [HasSSE3] -let mayStore = 1, SchedRW = [WriteStore] in { +let mayStore = 1, SchedRW = [WriteStore], Uses = [FPCW] in { def ISTT_FP16m : FPI<0xDF, MRM1m, (outs), (ins i16mem:$dst), "fisttp{s}\t$dst">; def ISTT_FP32m : FPI<0xDB, MRM1m, (outs), (ins i32mem:$dst), "fisttp{l}\t$dst">; def ISTT_FP64m : FPI<0xDD, MRM1m, (outs), (ins i64mem:$dst), "fisttp{ll}\t$dst">; @@ -542,10 +542,10 @@ def ISTT_FP64m : FPI<0xDD, MRM1m, (outs), (ins i64mem:$dst), "fisttp{ll}\t$dst"> // FP Stack manipulation instructions. let SchedRW = [WriteMove] in { -def LD_Frr : FPI<0xD9, MRM0r, (outs), (ins RST:$op), "fld\t$op">; -def ST_Frr : FPI<0xDD, MRM2r, (outs), (ins RST:$op), "fst\t$op">; -def ST_FPrr : FPI<0xDD, MRM3r, (outs), (ins RST:$op), "fstp\t$op">; -def XCH_F : FPI<0xD9, MRM1r, (outs), (ins RST:$op), "fxch\t$op">; +def LD_Frr : FPI<0xD9, MRM0r, (outs), (ins RSTi:$op), "fld\t$op">; +def ST_Frr : FPI<0xDD, MRM2r, (outs), (ins RSTi:$op), "fst\t$op">; +def ST_FPrr : FPI<0xDD, MRM3r, (outs), (ins RSTi:$op), "fstp\t$op">; +def XCH_F : FPI<0xD9, MRM1r, (outs), (ins RSTi:$op), "fxch\t$op">; } // Floating point constant loads. @@ -570,7 +570,7 @@ def LD_F0 : FPI<0xD9, MRM_EE, (outs), (ins), "fldz">; let SchedRW = [WriteFLD1] in def LD_F1 : FPI<0xD9, MRM_E8, (outs), (ins), "fld1">; -let SchedRW = [WriteFLDC], Defs = [FPSW] in { +let SchedRW = [WriteFLDC] in { def FLDL2T : I<0xD9, MRM_E9, (outs), (ins), "fldl2t", []>; def FLDL2E : I<0xD9, MRM_EA, (outs), (ins), "fldl2e", []>; def FLDPI : I<0xD9, MRM_EB, (outs), (ins), "fldpi", []>; @@ -579,7 +579,7 @@ def FLDLN2 : I<0xD9, MRM_ED, (outs), (ins), "fldln2", []>; } // SchedRW // Floating point compares. -let SchedRW = [WriteFCom] in { +let SchedRW = [WriteFCom], Uses = [FPCW] in { def UCOM_Fpr32 : FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP, [(set FPSW, (trunc (X86cmp RFP32:$lhs, RFP32:$rhs)))]>; def UCOM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP, @@ -591,37 +591,37 @@ def UCOM_Fpr80 : FpI_ <(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP, let SchedRW = [WriteFCom] in { // CC = ST(0) cmp ST(i) -let Defs = [EFLAGS, FPSW] in { -let Predicates = [FPStackf32, HasCMov] in -def UCOM_FpIr32: FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP, - [(set EFLAGS, (X86cmp RFP32:$lhs, RFP32:$rhs))]>; -let Predicates = [FPStackf64, HasCMov] in -def UCOM_FpIr64: FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP, - [(set EFLAGS, (X86cmp RFP64:$lhs, RFP64:$rhs))]>; -let Predicates = [HasCMov] in +let Defs = [EFLAGS, FPSW], Uses = [FPCW] in { +def UCOM_FpIr32: FpI_<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP, + [(set EFLAGS, (X86cmp RFP32:$lhs, RFP32:$rhs))]>, + Requires<[FPStackf32, HasCMov]>; +def UCOM_FpIr64: FpI_<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP, + [(set EFLAGS, (X86cmp RFP64:$lhs, RFP64:$rhs))]>, + Requires<[FPStackf64, HasCMov]>; def UCOM_FpIr80: FpI_<(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP, - [(set EFLAGS, (X86cmp RFP80:$lhs, RFP80:$rhs))]>; + [(set EFLAGS, (X86cmp RFP80:$lhs, RFP80:$rhs))]>, + Requires<[HasCMov]>; } -let Defs = [FPSW], Uses = [ST0] in { +let Defs = [FPSW], Uses = [ST0, FPCW] in { def UCOM_Fr : FPI<0xDD, MRM4r, // FPSW = cmp ST(0) with ST(i) - (outs), (ins RST:$reg), "fucom\t$reg">; + (outs), (ins RSTi:$reg), "fucom\t$reg">; def UCOM_FPr : FPI<0xDD, MRM5r, // FPSW = cmp ST(0) with ST(i), pop - (outs), (ins RST:$reg), "fucomp\t$reg">; + (outs), (ins RSTi:$reg), "fucomp\t$reg">; def UCOM_FPPr : FPI<0xDA, MRM_E9, // cmp ST(0) with ST(1), pop, pop (outs), (ins), "fucompp">; } -let Defs = [EFLAGS, FPSW], Uses = [ST0] in { +let Defs = [EFLAGS, FPSW], Uses = [ST0, FPCW] in { def UCOM_FIr : FPI<0xDB, MRM5r, // CC = cmp ST(0) with ST(i) - (outs), (ins RST:$reg), "fucomi\t$reg">; + (outs), (ins RSTi:$reg), "fucomi\t{$reg, %st|st, $reg}">; def UCOM_FIPr : FPI<0xDF, MRM5r, // CC = cmp ST(0) with ST(i), pop - (outs), (ins RST:$reg), "fucompi\t$reg">; -} + (outs), (ins RSTi:$reg), "fucompi\t{$reg, %st|st, $reg}">; -let Defs = [EFLAGS, FPSW] in { -def COM_FIr : FPI<0xDB, MRM6r, (outs), (ins RST:$reg), "fcomi\t$reg">; -def COM_FIPr : FPI<0xDF, MRM6r, (outs), (ins RST:$reg), "fcompi\t$reg">; +def COM_FIr : FPI<0xDB, MRM6r, (outs), (ins RSTi:$reg), + "fcomi\t{$reg, %st|st, $reg}">; +def COM_FIPr : FPI<0xDF, MRM6r, (outs), (ins RSTi:$reg), + "fcompi\t{$reg, %st|st, $reg}">; } } // SchedRW @@ -631,12 +631,12 @@ let Defs = [AX], Uses = [FPSW] in def FNSTSW16r : I<0xDF, MRM_E0, // AX = fp flags (outs), (ins), "fnstsw\t{%ax|ax}", [(set AX, (X86fp_stsw FPSW))]>; -let Defs = [FPSW] in +let Defs = [FPSW], Uses = [FPCW] in def FNSTCW16m : I<0xD9, MRM7m, // [mem16] = X87 control world (outs), (ins i16mem:$dst), "fnstcw\t$dst", [(X86fp_cwd_get16 addr:$dst)]>; } // SchedRW -let Defs = [FPSW], mayLoad = 1 in +let Defs = [FPSW,FPCW], mayLoad = 1 in def FLDCW16m : I<0xD9, MRM5m, // X87 control world = [mem16] (outs), (ins i16mem:$dst), "fldcw\t$dst", []>, Sched<[WriteLoad]>; @@ -645,8 +645,8 @@ def FLDCW16m : I<0xD9, MRM5m, // X87 control world = [mem16] let SchedRW = [WriteMicrocoded] in { let Defs = [FPSW] in { def FNINIT : I<0xDB, MRM_E3, (outs), (ins), "fninit", []>; -def FFREE : FPI<0xDD, MRM0r, (outs), (ins RST:$reg), "ffree\t$reg">; -def FFREEP : FPI<0xDF, MRM0r, (outs), (ins RST:$reg), "ffreep\t$reg">; +def FFREE : FPI<0xDD, MRM0r, (outs), (ins RSTi:$reg), "ffree\t$reg">; +def FFREEP : FPI<0xDF, MRM0r, (outs), (ins RSTi:$reg), "ffreep\t$reg">; // Clear exceptions def FNCLEX : I<0xDB, MRM_E2, (outs), (ins), "fnclex", []>; diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td index e53f83baa3c62d..4ec4d566ca9986 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -3231,39 +3231,39 @@ def : InstAlias<"fucompi", (UCOM_FIPr ST1), 0>; // instructions like "fadd %st(0), %st(0)" as "fadd %st(0)" for consistency with // gas. multiclass FpUnaryAlias { - def : InstAlias; - def : InstAlias; + def : InstAlias; } -defm : FpUnaryAlias<"fadd", ADD_FST0r>; +defm : FpUnaryAlias<"fadd", ADD_FST0r, 0>; defm : FpUnaryAlias<"faddp", ADD_FPrST0, 0>; -defm : FpUnaryAlias<"fsub", SUB_FST0r>; -defm : FpUnaryAlias<"fsub{|r}p", SUBR_FPrST0>; -defm : FpUnaryAlias<"fsubr", SUBR_FST0r>; -defm : FpUnaryAlias<"fsub{r|}p", SUB_FPrST0>; -defm : FpUnaryAlias<"fmul", MUL_FST0r>; -defm : FpUnaryAlias<"fmulp", MUL_FPrST0>; -defm : FpUnaryAlias<"fdiv", DIV_FST0r>; -defm : FpUnaryAlias<"fdiv{|r}p", DIVR_FPrST0>; -defm : FpUnaryAlias<"fdivr", DIVR_FST0r>; -defm : FpUnaryAlias<"fdiv{r|}p", DIV_FPrST0>; +defm : FpUnaryAlias<"fsub", SUB_FST0r, 0>; +defm : FpUnaryAlias<"fsub{|r}p", SUBR_FPrST0, 0>; +defm : FpUnaryAlias<"fsubr", SUBR_FST0r, 0>; +defm : FpUnaryAlias<"fsub{r|}p", SUB_FPrST0, 0>; +defm : FpUnaryAlias<"fmul", MUL_FST0r, 0>; +defm : FpUnaryAlias<"fmulp", MUL_FPrST0, 0>; +defm : FpUnaryAlias<"fdiv", DIV_FST0r, 0>; +defm : FpUnaryAlias<"fdiv{|r}p", DIVR_FPrST0, 0>; +defm : FpUnaryAlias<"fdivr", DIVR_FST0r, 0>; +defm : FpUnaryAlias<"fdiv{r|}p", DIV_FPrST0, 0>; defm : FpUnaryAlias<"fcomi", COM_FIr, 0>; defm : FpUnaryAlias<"fucomi", UCOM_FIr, 0>; -defm : FpUnaryAlias<"fcompi", COM_FIPr>; -defm : FpUnaryAlias<"fucompi", UCOM_FIPr>; +defm : FpUnaryAlias<"fcompi", COM_FIPr, 0>; +defm : FpUnaryAlias<"fucompi", UCOM_FIPr, 0>; -// Handle "f{mulp,addp} st(0), $op" the same as "f{mulp,addp} $op", since they +// Handle "f{mulp,addp} $op, %st(0)" the same as "f{mulp,addp} $op", since they // commute. We also allow fdiv[r]p/fsubrp even though they don't commute, // solely because gas supports it. -def : InstAlias<"faddp\t{%st(0), $op|$op, st(0)}", (ADD_FPrST0 RST:$op), 0>; -def : InstAlias<"fmulp\t{%st(0), $op|$op, st(0)}", (MUL_FPrST0 RST:$op)>; -def : InstAlias<"fsub{|r}p\t{%st(0), $op|$op, st(0)}", (SUBR_FPrST0 RST:$op)>; -def : InstAlias<"fsub{r|}p\t{%st(0), $op|$op, st(0)}", (SUB_FPrST0 RST:$op)>; -def : InstAlias<"fdiv{|r}p\t{%st(0), $op|$op, st(0)}", (DIVR_FPrST0 RST:$op)>; -def : InstAlias<"fdiv{r|}p\t{%st(0), $op|$op, st(0)}", (DIV_FPrST0 RST:$op)>; +def : InstAlias<"faddp\t{$op, %st|st, $op}", (ADD_FPrST0 RSTi:$op), 0>; +def : InstAlias<"fmulp\t{$op, %st|st, $op}", (MUL_FPrST0 RSTi:$op), 0>; +def : InstAlias<"fsub{|r}p\t{$op, %st|st, $op}", (SUBR_FPrST0 RSTi:$op), 0>; +def : InstAlias<"fsub{r|}p\t{$op, %st|st, $op}", (SUB_FPrST0 RSTi:$op), 0>; +def : InstAlias<"fdiv{|r}p\t{$op, %st|st, $op}", (DIVR_FPrST0 RSTi:$op), 0>; +def : InstAlias<"fdiv{r|}p\t{$op, %st|st, $op}", (DIV_FPrST0 RSTi:$op), 0>; def : InstAlias<"fnstsw" , (FNSTSW16r), 0>; diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp index 55842a4a209144..bc39cee34c4adb 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.cpp +++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp @@ -497,6 +497,9 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); const X86FrameLowering *TFI = getFrameLowering(MF); + // Set the floating point control register as reserved. + Reserved.set(X86::FPCW); + // Set the stack-pointer register and its aliases as reserved. for (MCSubRegIterator I(X86::RSP, this, /*IncludeSelf=*/true); I.isValid(); ++I) diff --git a/llvm/lib/Target/X86/X86RegisterInfo.td b/llvm/lib/Target/X86/X86RegisterInfo.td index aa20273f89abea..6a0538138528b0 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.td +++ b/llvm/lib/Target/X86/X86RegisterInfo.td @@ -278,7 +278,7 @@ def K7 : X86Reg<"k7", 7>, DwarfRegNum<[125, 100, 100]>; // pseudo registers, but we still mark them as aliasing FP registers. That // way both kinds can be live without exceeding the stack depth. ST registers // are only live around inline assembly. -def ST0 : X86Reg<"st(0)", 0>, DwarfRegNum<[33, 12, 11]>; +def ST0 : X86Reg<"st", 0>, DwarfRegNum<[33, 12, 11]>; def ST1 : X86Reg<"st(1)", 1>, DwarfRegNum<[34, 13, 12]>; def ST2 : X86Reg<"st(2)", 2>, DwarfRegNum<[35, 14, 13]>; def ST3 : X86Reg<"st(3)", 3>, DwarfRegNum<[36, 15, 14]>; @@ -288,7 +288,10 @@ def ST6 : X86Reg<"st(6)", 6>, DwarfRegNum<[39, 18, 17]>; def ST7 : X86Reg<"st(7)", 7>, DwarfRegNum<[40, 19, 18]>; // Floating-point status word -def FPSW : X86Reg<"fpsw", 0>; +def FPSW : X86Reg<"fpsr", 0>; + +// Floating-point control word +def FPCW : X86Reg<"fpcr", 0>; // Status flags register. // @@ -539,6 +542,9 @@ def RST : RegisterClass<"X86", [f80, f64, f32], 32, (sequence "ST%u", 0, 7)> { let isAllocatable = 0; } +// Helper to allow %st to print as %st(0) when its encoded in the instruction. +def RSTi : RegisterOperand; + // Generic vector registers: VR64 and VR128. // Ensure that float types are declared first - only float is legal on SSE1. def VR64: RegisterClass<"X86", [x86mmx], 64, (sequence "MM%u", 0, 7)>; diff --git a/llvm/test/CodeGen/MIR/X86/memory-operands.mir b/llvm/test/CodeGen/MIR/X86/memory-operands.mir index 2ac7bea2fc9b91..89b28126b9167b 100644 --- a/llvm/test/CodeGen/MIR/X86/memory-operands.mir +++ b/llvm/test/CodeGen/MIR/X86/memory-operands.mir @@ -359,8 +359,8 @@ body: | CFI_INSTRUCTION def_cfa_offset 32 LD_F80m $rsp, 1, $noreg, 32, $noreg, implicit-def dead $fpsw ; CHECK: name: stack_psv - ; CHECK: ST_FP80m $rsp, 1, $noreg, 0, $noreg, implicit-def dead $fpsw :: (store 10 into stack, align 16) - ST_FP80m $rsp, 1, _, 0, _, implicit-def dead $fpsw :: (store 10 into stack, align 16) + ; CHECK: ST_FP80m $rsp, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (store 10 into stack, align 16) + ST_FP80m $rsp, 1, _, 0, _, implicit-def dead $fpsw, implicit $fpcw :: (store 10 into stack, align 16) CALL64pcrel32 &cosl, csr_64, implicit $rsp, implicit-def $rsp, implicit-def $fp0 $rsp = ADD64ri8 $rsp, 24, implicit-def dead $eflags RETQ diff --git a/llvm/test/CodeGen/X86/and-su.ll b/llvm/test/CodeGen/X86/and-su.ll index 55bfa8def44f2e..de384368bfca5f 100644 --- a/llvm/test/CodeGen/X86/and-su.ll +++ b/llvm/test/CodeGen/X86/and-su.ll @@ -49,7 +49,7 @@ define fastcc double @bar(i32 %hash, double %x, double %y) nounwind { ; CHECK-NEXT: fchs ; CHECK-NEXT: fxch %st(1) ; CHECK-NEXT: .LBB1_5: # %bb16 -; CHECK-NEXT: faddp %st(1) +; CHECK-NEXT: faddp %st, %st(1) ; CHECK-NEXT: movl %ebp, %esp ; CHECK-NEXT: popl %ebp ; CHECK-NEXT: retl diff --git a/llvm/test/CodeGen/X86/avx512-regcall-NoMask.ll b/llvm/test/CodeGen/X86/avx512-regcall-NoMask.ll index 1136a3a50693e8..985860166a3f23 100644 --- a/llvm/test/CodeGen/X86/avx512-regcall-NoMask.ll +++ b/llvm/test/CodeGen/X86/avx512-regcall-NoMask.ll @@ -508,17 +508,17 @@ define x86_regcallcc double @test_CallargRetDouble(double %a) { define x86_regcallcc x86_fp80 @test_argRetf80(x86_fp80 %a0) nounwind { ; X32-LABEL: test_argRetf80: ; X32: # %bb.0: -; X32-NEXT: fadd %st(0), %st(0) +; X32-NEXT: fadd %st, %st(0) ; X32-NEXT: retl ; ; WIN64-LABEL: test_argRetf80: ; WIN64: # %bb.0: -; WIN64-NEXT: fadd %st(0), %st(0) +; WIN64-NEXT: fadd %st, %st(0) ; WIN64-NEXT: retq ; ; LINUXOSX64-LABEL: test_argRetf80: ; LINUXOSX64: # %bb.0: -; LINUXOSX64-NEXT: fadd %st(0), %st(0) +; LINUXOSX64-NEXT: fadd %st, %st(0) ; LINUXOSX64-NEXT: retq %r0 = fadd x86_fp80 %a0, %a0 ret x86_fp80 %r0 @@ -529,9 +529,9 @@ define x86_regcallcc x86_fp80 @test_CallargRetf80(x86_fp80 %a) { ; X32-LABEL: test_CallargRetf80: ; X32: # %bb.0: ; X32-NEXT: pushl %esp -; X32-NEXT: fadd %st(0), %st(0) +; X32-NEXT: fadd %st, %st(0) ; X32-NEXT: calll _test_argRetf80 -; X32-NEXT: fadd %st(0), %st(0) +; X32-NEXT: fadd %st, %st(0) ; X32-NEXT: popl %esp ; X32-NEXT: retl ; @@ -540,9 +540,9 @@ define x86_regcallcc x86_fp80 @test_CallargRetf80(x86_fp80 %a) { ; WIN64-NEXT: pushq %rsp ; WIN64-NEXT: .seh_pushreg 4 ; WIN64-NEXT: .seh_endprologue -; WIN64-NEXT: fadd %st(0), %st(0) +; WIN64-NEXT: fadd %st, %st(0) ; WIN64-NEXT: callq test_argRetf80 -; WIN64-NEXT: fadd %st(0), %st(0) +; WIN64-NEXT: fadd %st, %st(0) ; WIN64-NEXT: popq %rsp ; WIN64-NEXT: retq ; WIN64-NEXT: .seh_handlerdata @@ -554,9 +554,9 @@ define x86_regcallcc x86_fp80 @test_CallargRetf80(x86_fp80 %a) { ; LINUXOSX64-NEXT: pushq %rsp ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 ; LINUXOSX64-NEXT: .cfi_offset %rsp, -16 -; LINUXOSX64-NEXT: fadd %st(0), %st(0) +; LINUXOSX64-NEXT: fadd %st, %st(0) ; LINUXOSX64-NEXT: callq test_argRetf80 -; LINUXOSX64-NEXT: fadd %st(0), %st(0) +; LINUXOSX64-NEXT: fadd %st, %st(0) ; LINUXOSX64-NEXT: popq %rsp ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8 ; LINUXOSX64-NEXT: retq diff --git a/llvm/test/CodeGen/X86/fcmove.ll b/llvm/test/CodeGen/X86/fcmove.ll index 35dbb68117ba27..6bb014858d0485 100644 --- a/llvm/test/CodeGen/X86/fcmove.ll +++ b/llvm/test/CodeGen/X86/fcmove.ll @@ -6,7 +6,7 @@ target triple = "x86_64-unknown-unknown" ; Test that we can generate an fcmove, and also that it passes verification. ; CHECK-LABEL: cmove_f -; CHECK: fcmove %st({{[0-7]}}), %st(0) +; CHECK: fcmove %st({{[0-7]}}), %st define x86_fp80 @cmove_f(x86_fp80 %a, x86_fp80 %b, i32 %c) { %test = icmp eq i32 %c, 0 %add = fadd x86_fp80 %a, %b diff --git a/llvm/test/CodeGen/X86/fmf-flags.ll b/llvm/test/CodeGen/X86/fmf-flags.ll index 4fb2040b338d93..bb883e92dc1104 100644 --- a/llvm/test/CodeGen/X86/fmf-flags.ll +++ b/llvm/test/CodeGen/X86/fmf-flags.ll @@ -20,7 +20,7 @@ define float @fast_recip_sqrt(float %x) { ; X86-NEXT: flds {{[0-9]+}}(%esp) ; X86-NEXT: fsqrt ; X86-NEXT: fld1 -; X86-NEXT: fdivp %st(1) +; X86-NEXT: fdivp %st, %st(1) ; X86-NEXT: retl %y = call fast float @llvm.sqrt.f32(float %x) %z = fdiv fast float 1.0, %y @@ -95,7 +95,7 @@ define float @not_so_fast_recip_sqrt(float %x) { ; X86-NEXT: flds {{[0-9]+}}(%esp) ; X86-NEXT: fsqrt ; X86-NEXT: fld1 -; X86-NEXT: fdiv %st(1) +; X86-NEXT: fdiv %st(1), %st ; X86-NEXT: fxch %st(1) ; X86-NEXT: fstps sqrt1 ; X86-NEXT: retl diff --git a/llvm/test/CodeGen/X86/fp-cvt.ll b/llvm/test/CodeGen/X86/fp-cvt.ll index ab3d40ddcaa527..71738cb85d2e42 100644 --- a/llvm/test/CodeGen/X86/fp-cvt.ll +++ b/llvm/test/CodeGen/X86/fp-cvt.ll @@ -486,7 +486,7 @@ define i64 @fptoui_i64_fp80(x86_fp80 %a0) nounwind { ; X64-X87-NEXT: xorl %eax, %eax ; X64-X87-NEXT: fxch %st(1) ; X64-X87-NEXT: fucompi %st(2) -; X64-X87-NEXT: fcmovnbe %st(1), %st(0) +; X64-X87-NEXT: fcmovnbe %st(1), %st ; X64-X87-NEXT: fstp %st(1) ; X64-X87-NEXT: fnstcw -{{[0-9]+}}(%rsp) ; X64-X87-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx @@ -509,7 +509,7 @@ define i64 @fptoui_i64_fp80(x86_fp80 %a0) nounwind { ; X64-SSSE3-NEXT: xorl %eax, %eax ; X64-SSSE3-NEXT: fxch %st(1) ; X64-SSSE3-NEXT: fucompi %st(2) -; X64-SSSE3-NEXT: fcmovnbe %st(1), %st(0) +; X64-SSSE3-NEXT: fcmovnbe %st(1), %st ; X64-SSSE3-NEXT: fstp %st(1) ; X64-SSSE3-NEXT: fisttpll -{{[0-9]+}}(%rsp) ; X64-SSSE3-NEXT: setbe %al @@ -568,7 +568,7 @@ define i64 @fptoui_i64_fp80_ld(x86_fp80 *%a0) nounwind { ; X64-X87-NEXT: xorl %eax, %eax ; X64-X87-NEXT: fxch %st(1) ; X64-X87-NEXT: fucompi %st(2) -; X64-X87-NEXT: fcmovnbe %st(1), %st(0) +; X64-X87-NEXT: fcmovnbe %st(1), %st ; X64-X87-NEXT: fstp %st(1) ; X64-X87-NEXT: fnstcw -{{[0-9]+}}(%rsp) ; X64-X87-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx @@ -591,7 +591,7 @@ define i64 @fptoui_i64_fp80_ld(x86_fp80 *%a0) nounwind { ; X64-SSSE3-NEXT: xorl %eax, %eax ; X64-SSSE3-NEXT: fxch %st(1) ; X64-SSSE3-NEXT: fucompi %st(2) -; X64-SSSE3-NEXT: fcmovnbe %st(1), %st(0) +; X64-SSSE3-NEXT: fcmovnbe %st(1), %st ; X64-SSSE3-NEXT: fstp %st(1) ; X64-SSSE3-NEXT: fisttpll -{{[0-9]+}}(%rsp) ; X64-SSSE3-NEXT: setbe %al diff --git a/llvm/test/CodeGen/X86/inline-asm-default-clobbers.ll b/llvm/test/CodeGen/X86/inline-asm-default-clobbers.ll new file mode 100644 index 00000000000000..34a77ea5fecdbe --- /dev/null +++ b/llvm/test/CodeGen/X86/inline-asm-default-clobbers.ll @@ -0,0 +1,8 @@ +; RUN: llc < %s -mtriple=i686 -stop-after=expand-isel-pseudos | FileCheck %s + +; CHECK: INLINEASM &"", 1, 12, implicit-def early-clobber $df, 12, implicit-def early-clobber $fpsw, 12, implicit-def early-clobber $eflags +define void @foo() { +entry: + call void asm sideeffect "", "~{dirflag},~{fpsr},~{flags}"() + ret void +} diff --git a/llvm/test/CodeGen/X86/inline-asm-fpstack.ll b/llvm/test/CodeGen/X86/inline-asm-fpstack.ll index 1c36d31c480b58..db6127acb0ae72 100644 --- a/llvm/test/CodeGen/X86/inline-asm-fpstack.ll +++ b/llvm/test/CodeGen/X86/inline-asm-fpstack.ll @@ -75,20 +75,20 @@ define void @test6(double %A, double %B, double %C, double %D, double %E) nounwi ; CHECK-NEXT: fldl {{[0-9]+}}(%esp) ; CHECK-NEXT: fldl {{[0-9]+}}(%esp) ; CHECK-NEXT: ## InlineAsm Start -; CHECK-NEXT: foo %st(0) %st(0) +; CHECK-NEXT: foo %st %st ; CHECK-NEXT: ## InlineAsm End ; CHECK-NEXT: fstp %st(0) ; CHECK-NEXT: ## InlineAsm Start -; CHECK-NEXT: bar %st(1) %st(0) +; CHECK-NEXT: bar %st(1) %st ; CHECK-NEXT: ## InlineAsm End ; CHECK-NEXT: fstp %st(1) ; CHECK-NEXT: fstp %st(0) ; CHECK-NEXT: ## InlineAsm Start -; CHECK-NEXT: baz %st(1) %st(0) +; CHECK-NEXT: baz %st(1) %st ; CHECK-NEXT: ## InlineAsm End ; CHECK-NEXT: fstp %st(0) ; CHECK-NEXT: ## InlineAsm Start -; CHECK-NEXT: baz %st(0) +; CHECK-NEXT: baz %st ; CHECK-NEXT: ## InlineAsm End ; CHECK-NEXT: fstp %st(0) ; CHECK-NEXT: retl @@ -117,10 +117,10 @@ define void @testPR4185() { ; CHECK-NEXT: flds LCPI6_0 ; CHECK-NEXT: fld %st(0) ; CHECK-NEXT: ## InlineAsm Start -; CHECK-NEXT: fistpl %st(0) +; CHECK-NEXT: fistpl %st ; CHECK-NEXT: ## InlineAsm End ; CHECK-NEXT: ## InlineAsm Start -; CHECK-NEXT: fistpl %st(0) +; CHECK-NEXT: fistpl %st ; CHECK-NEXT: ## InlineAsm End ; CHECK-NEXT: retl return: @@ -138,10 +138,10 @@ define void @testPR4185b() { ; CHECK: ## %bb.0: ## %return ; CHECK-NEXT: flds LCPI7_0 ; CHECK-NEXT: ## InlineAsm Start -; CHECK-NEXT: fistl %st(0) +; CHECK-NEXT: fistl %st ; CHECK-NEXT: ## InlineAsm End ; CHECK-NEXT: ## InlineAsm Start -; CHECK-NEXT: fistpl %st(0) +; CHECK-NEXT: fistpl %st ; CHECK-NEXT: ## InlineAsm End ; CHECK-NEXT: retl return: @@ -163,7 +163,7 @@ define void @testPR4459(x86_fp80 %a) { ; CHECK-NEXT: fld %st(0) ; CHECK-NEXT: fxch %st(1) ; CHECK-NEXT: ## InlineAsm Start -; CHECK-NEXT: fistpl %st(0) +; CHECK-NEXT: fistpl %st ; CHECK-NEXT: ## InlineAsm End ; CHECK-NEXT: fstpt (%esp) ; CHECK-NEXT: calll _test3 @@ -191,7 +191,7 @@ define void @testPR4484(x86_fp80 %a) { ; CHECK-NEXT: calll _test1 ; CHECK-NEXT: fldt {{[0-9]+}}(%esp) ## 10-byte Folded Reload ; CHECK-NEXT: ## InlineAsm Start -; CHECK-NEXT: fistpl %st(0) +; CHECK-NEXT: fistpl %st ; CHECK-NEXT: ## InlineAsm End ; CHECK-NEXT: fstpt (%esp) ; CHECK-NEXT: calll _test3 @@ -211,18 +211,18 @@ define void @testPR4485(x86_fp80* %a) { ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: fldt (%eax) ; CHECK-NEXT: flds LCPI10_0 -; CHECK-NEXT: fmul %st(0), %st(1) +; CHECK-NEXT: fmul %st, %st(1) ; CHECK-NEXT: flds LCPI10_1 -; CHECK-NEXT: fmul %st(0), %st(2) +; CHECK-NEXT: fmul %st, %st(2) ; CHECK-NEXT: fxch %st(2) ; CHECK-NEXT: ## InlineAsm Start -; CHECK-NEXT: fistpl %st(0) +; CHECK-NEXT: fistpl %st ; CHECK-NEXT: ## InlineAsm End ; CHECK-NEXT: fldt (%eax) -; CHECK-NEXT: fmulp %st(1) -; CHECK-NEXT: fmulp %st(1) +; CHECK-NEXT: fmulp %st, %st(1) +; CHECK-NEXT: fmulp %st, %st(1) ; CHECK-NEXT: ## InlineAsm Start -; CHECK-NEXT: fistpl %st(0) +; CHECK-NEXT: fistpl %st ; CHECK-NEXT: ## InlineAsm End ; CHECK-NEXT: retl entry: @@ -422,7 +422,7 @@ define i32 @PR10602() nounwind ssp { ; CHECK-NEXT: fld %st(0) ; CHECK-NEXT: fxch %st(1) ; CHECK-NEXT: ## InlineAsm Start -; CHECK-NEXT: fcomi %st(1), %st(0); pushf; pop %eax +; CHECK-NEXT: fcomi %st(1), %st; pushf; pop %eax ; CHECK-NEXT: ## InlineAsm End ; CHECK-NEXT: fstp %st(0) ; CHECK-NEXT: fstp %st(0) @@ -505,9 +505,9 @@ define double @test_operand_rewrite() { ; CHECK-LABEL: test_operand_rewrite: ; CHECK: ## %bb.0: ## %entry ; CHECK-NEXT: ## InlineAsm Start -; CHECK-NEXT: foo %st(0), %st(1) +; CHECK-NEXT: foo %st, %st(1) ; CHECK-NEXT: ## InlineAsm End -; CHECK-NEXT: fsubp %st(1) +; CHECK-NEXT: fsubp %st, %st(1) ; CHECK-NEXT: retl entry: %0 = tail call { double, double } asm sideeffect "foo $0, $1", "={st},={st(1)},~{dirflag},~{fpsr},~{flags}"() diff --git a/llvm/test/CodeGen/X86/ipra-reg-usage.ll b/llvm/test/CodeGen/X86/ipra-reg-usage.ll index 3e57ef21844267..2a557f2902a21d 100644 --- a/llvm/test/CodeGen/X86/ipra-reg-usage.ll +++ b/llvm/test/CodeGen/X86/ipra-reg-usage.ll @@ -3,7 +3,7 @@ target triple = "x86_64-unknown-unknown" declare void @bar1() define preserve_allcc void @foo()#0 { -; CHECK: foo Clobbered Registers: $cs $df $ds $eflags $eip $eiz $es $fpsw $fs $gs $hip $ip $rip $riz $ss $ssp $bnd0 $bnd1 $bnd2 $bnd3 $cr0 $cr1 $cr2 $cr3 $cr4 $cr5 $cr6 $cr7 $cr8 $cr9 $cr10 $cr11 $cr12 $cr13 $cr14 $cr15 $dr0 $dr1 $dr2 $dr3 $dr4 $dr5 $dr6 $dr7 $dr8 $dr9 $dr10 $dr11 $dr12 $dr13 $dr14 $dr15 $fp0 $fp1 $fp2 $fp3 $fp4 $fp5 $fp6 $fp7 $k0 $k1 $k2 $k3 $k4 $k5 $k6 $k7 $mm0 $mm1 $mm2 $mm3 $mm4 $mm5 $mm6 $mm7 $r11 $st0 $st1 $st2 $st3 $st4 $st5 $st6 $st7 $xmm16 $xmm17 $xmm18 $xmm19 $xmm20 $xmm21 $xmm22 $xmm23 $xmm24 $xmm25 $xmm26 $xmm27 $xmm28 $xmm29 $xmm30 $xmm31 $ymm0 $ymm1 $ymm2 $ymm3 $ymm4 $ymm5 $ymm6 $ymm7 $ymm8 $ymm9 $ymm10 $ymm11 $ymm12 $ymm13 $ymm14 $ymm15 $ymm16 $ymm17 $ymm18 $ymm19 $ymm20 $ymm21 $ymm22 $ymm23 $ymm24 $ymm25 $ymm26 $ymm27 $ymm28 $ymm29 $ymm30 $ymm31 $zmm0 $zmm1 $zmm2 $zmm3 $zmm4 $zmm5 $zmm6 $zmm7 $zmm8 $zmm9 $zmm10 $zmm11 $zmm12 $zmm13 $zmm14 $zmm15 $zmm16 $zmm17 $zmm18 $zmm19 $zmm20 $zmm21 $zmm22 $zmm23 $zmm24 $zmm25 $zmm26 $zmm27 $zmm28 $zmm29 $zmm30 $zmm31 $r11b $r11bh $r11d $r11w $r11wh +; CHECK: foo Clobbered Registers: $cs $df $ds $eflags $eip $eiz $es $fpcw $fpsw $fs $gs $hip $ip $rip $riz $ss $ssp $bnd0 $bnd1 $bnd2 $bnd3 $cr0 $cr1 $cr2 $cr3 $cr4 $cr5 $cr6 $cr7 $cr8 $cr9 $cr10 $cr11 $cr12 $cr13 $cr14 $cr15 $dr0 $dr1 $dr2 $dr3 $dr4 $dr5 $dr6 $dr7 $dr8 $dr9 $dr10 $dr11 $dr12 $dr13 $dr14 $dr15 $fp0 $fp1 $fp2 $fp3 $fp4 $fp5 $fp6 $fp7 $k0 $k1 $k2 $k3 $k4 $k5 $k6 $k7 $mm0 $mm1 $mm2 $mm3 $mm4 $mm5 $mm6 $mm7 $r11 $st0 $st1 $st2 $st3 $st4 $st5 $st6 $st7 $xmm16 $xmm17 $xmm18 $xmm19 $xmm20 $xmm21 $xmm22 $xmm23 $xmm24 $xmm25 $xmm26 $xmm27 $xmm28 $xmm29 $xmm30 $xmm31 $ymm0 $ymm1 $ymm2 $ymm3 $ymm4 $ymm5 $ymm6 $ymm7 $ymm8 $ymm9 $ymm10 $ymm11 $ymm12 $ymm13 $ymm14 $ymm15 $ymm16 $ymm17 $ymm18 $ymm19 $ymm20 $ymm21 $ymm22 $ymm23 $ymm24 $ymm25 $ymm26 $ymm27 $ymm28 $ymm29 $ymm30 $ymm31 $zmm0 $zmm1 $zmm2 $zmm3 $zmm4 $zmm5 $zmm6 $zmm7 $zmm8 $zmm9 $zmm10 $zmm11 $zmm12 $zmm13 $zmm14 $zmm15 $zmm16 $zmm17 $zmm18 $zmm19 $zmm20 $zmm21 $zmm22 $zmm23 $zmm24 $zmm25 $zmm26 $zmm27 $zmm28 $zmm29 $zmm30 $zmm31 $r11b $r11bh $r11d $r11w $r11wh call void @bar1() call void @bar2() ret void diff --git a/llvm/test/CodeGen/X86/pr13577.ll b/llvm/test/CodeGen/X86/pr13577.ll index e0e90f81bc42c3..3f9e2f953bae37 100644 --- a/llvm/test/CodeGen/X86/pr13577.ll +++ b/llvm/test/CodeGen/X86/pr13577.ll @@ -14,7 +14,7 @@ define x86_fp80 @foo(x86_fp80 %a) { ; CHECK-NEXT: testb $-128, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: flds {{.*}}(%rip) ; CHECK-NEXT: flds {{.*}}(%rip) -; CHECK-NEXT: fcmovne %st(1), %st(0) +; CHECK-NEXT: fcmovne %st(1), %st ; CHECK-NEXT: fstp %st(1) ; CHECK-NEXT: retq %1 = tail call x86_fp80 @copysignl(x86_fp80 0xK7FFF8000000000000000, x86_fp80 %a) nounwind readnone diff --git a/llvm/test/CodeGen/X86/pr33349.ll b/llvm/test/CodeGen/X86/pr33349.ll index 63edae044f87cb..9aa28384f4e81d 100644 --- a/llvm/test/CodeGen/X86/pr33349.ll +++ b/llvm/test/CodeGen/X86/pr33349.ll @@ -19,18 +19,18 @@ target triple = "x86_64-unknown-linux-gnu" ; KNL-NEXT: fld1 ; KNL-NEXT: fldz ; KNL-NEXT: fld %st(0) -; KNL-NEXT: fcmovne %st(2), %st(0) +; KNL-NEXT: fcmovne %st(2), %st ; KNL-NEXT: testb $1, %cl ; KNL-NEXT: fld %st(1) -; KNL-NEXT: fcmovne %st(3), %st(0) +; KNL-NEXT: fcmovne %st(3), %st ; KNL-NEXT: kmovw %k2, %eax ; KNL-NEXT: testb $1, %al ; KNL-NEXT: fld %st(2) -; KNL-NEXT: fcmovne %st(4), %st(0) +; KNL-NEXT: fcmovne %st(4), %st ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: testb $1, %al ; KNL-NEXT: fxch %st(3) -; KNL-NEXT: fcmovne %st(4), %st(0) +; KNL-NEXT: fcmovne %st(4), %st ; KNL-NEXT: fstp %st(4) ; KNL-NEXT: fxch %st(3) ; KNL-NEXT: fstpt (%rdi) @@ -55,18 +55,18 @@ target triple = "x86_64-unknown-linux-gnu" ; SKX-NEXT: fld1 ; SKX-NEXT: fldz ; SKX-NEXT: fld %st(0) -; SKX-NEXT: fcmovne %st(2), %st(0) +; SKX-NEXT: fcmovne %st(2), %st ; SKX-NEXT: testb $1, %cl ; SKX-NEXT: fld %st(1) -; SKX-NEXT: fcmovne %st(3), %st(0) +; SKX-NEXT: fcmovne %st(3), %st ; SKX-NEXT: kmovd %k2, %eax ; SKX-NEXT: testb $1, %al ; SKX-NEXT: fld %st(2) -; SKX-NEXT: fcmovne %st(4), %st(0) +; SKX-NEXT: fcmovne %st(4), %st ; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: testb $1, %al ; SKX-NEXT: fxch %st(3) -; SKX-NEXT: fcmovne %st(4), %st(0) +; SKX-NEXT: fcmovne %st(4), %st ; SKX-NEXT: fstp %st(4) ; SKX-NEXT: fxch %st(3) ; SKX-NEXT: fstpt (%rdi) diff --git a/llvm/test/CodeGen/X86/pr34080.ll b/llvm/test/CodeGen/X86/pr34080.ll index a709a4840e5303..0b23ab7d4b5d79 100644 --- a/llvm/test/CodeGen/X86/pr34080.ll +++ b/llvm/test/CodeGen/X86/pr34080.ll @@ -27,7 +27,7 @@ define void @_Z1fe(x86_fp80 %z) local_unnamed_addr #0 { ; SSE2-NEXT: movsd %xmm0, -32(%rbp) ; SSE2-NEXT: fsubl -32(%rbp) ; SSE2-NEXT: flds {{.*}}(%rip) -; SSE2-NEXT: fmul %st(0), %st(1) +; SSE2-NEXT: fmul %st, %st(1) ; SSE2-NEXT: fnstcw -2(%rbp) ; SSE2-NEXT: movzwl -2(%rbp), %eax ; SSE2-NEXT: movw $3199, -2(%rbp) ## imm = 0xC7F @@ -41,7 +41,7 @@ define void @_Z1fe(x86_fp80 %z) local_unnamed_addr #0 { ; SSE2-NEXT: movsd %xmm0, -56(%rbp) ; SSE2-NEXT: movsd %xmm0, -24(%rbp) ; SSE2-NEXT: fsubl -24(%rbp) -; SSE2-NEXT: fmulp %st(1) +; SSE2-NEXT: fmulp %st, %st(1) ; SSE2-NEXT: fstpl -48(%rbp) ; SSE2-NEXT: popq %rbp ; SSE2-NEXT: retq @@ -65,12 +65,12 @@ define void @_Z1fe(x86_fp80 %z) local_unnamed_addr #0 { ; SSE2-SCHEDULE-NEXT: movsd %xmm0, -64(%rbp) ; SSE2-SCHEDULE-NEXT: movsd %xmm0, -32(%rbp) ; SSE2-SCHEDULE-NEXT: fsubl -32(%rbp) -; SSE2-SCHEDULE-NEXT: fnstcw -2(%rbp) ; SSE2-SCHEDULE-NEXT: flds {{.*}}(%rip) +; SSE2-SCHEDULE-NEXT: fnstcw -2(%rbp) +; SSE2-SCHEDULE-NEXT: fmul %st, %st(1) ; SSE2-SCHEDULE-NEXT: movzwl -2(%rbp), %eax ; SSE2-SCHEDULE-NEXT: movw $3199, -2(%rbp) ## imm = 0xC7F ; SSE2-SCHEDULE-NEXT: fldcw -2(%rbp) -; SSE2-SCHEDULE-NEXT: fmul %st(0), %st(1) ; SSE2-SCHEDULE-NEXT: movw %ax, -2(%rbp) ; SSE2-SCHEDULE-NEXT: fxch %st(1) ; SSE2-SCHEDULE-NEXT: fistl -12(%rbp) @@ -80,7 +80,7 @@ define void @_Z1fe(x86_fp80 %z) local_unnamed_addr #0 { ; SSE2-SCHEDULE-NEXT: movsd %xmm0, -56(%rbp) ; SSE2-SCHEDULE-NEXT: movsd %xmm0, -24(%rbp) ; SSE2-SCHEDULE-NEXT: fsubl -24(%rbp) -; SSE2-SCHEDULE-NEXT: fmulp %st(1) +; SSE2-SCHEDULE-NEXT: fmulp %st, %st(1) ; SSE2-SCHEDULE-NEXT: fstpl -48(%rbp) ; SSE2-SCHEDULE-NEXT: popq %rbp ; SSE2-SCHEDULE-NEXT: retq @@ -100,7 +100,7 @@ define void @_Z1fe(x86_fp80 %z) local_unnamed_addr #0 { ; SSE3-NEXT: movsd %xmm0, -24(%rbp) ; SSE3-NEXT: fsubl -24(%rbp) ; SSE3-NEXT: flds {{.*}}(%rip) -; SSE3-NEXT: fmul %st(0), %st(1) +; SSE3-NEXT: fmul %st, %st(1) ; SSE3-NEXT: fld %st(1) ; SSE3-NEXT: fisttpl -8(%rbp) ; SSE3-NEXT: xorps %xmm0, %xmm0 @@ -109,7 +109,7 @@ define void @_Z1fe(x86_fp80 %z) local_unnamed_addr #0 { ; SSE3-NEXT: movsd %xmm0, -16(%rbp) ; SSE3-NEXT: fxch %st(1) ; SSE3-NEXT: fsubl -16(%rbp) -; SSE3-NEXT: fmulp %st(1) +; SSE3-NEXT: fmulp %st, %st(1) ; SSE3-NEXT: fstpl -32(%rbp) ; SSE3-NEXT: popq %rbp ; SSE3-NEXT: retq @@ -129,7 +129,7 @@ define void @_Z1fe(x86_fp80 %z) local_unnamed_addr #0 { ; AVX-NEXT: vmovsd %xmm0, -24(%rbp) ; AVX-NEXT: fsubl -24(%rbp) ; AVX-NEXT: flds {{.*}}(%rip) -; AVX-NEXT: fmul %st(0), %st(1) +; AVX-NEXT: fmul %st, %st(1) ; AVX-NEXT: fld %st(1) ; AVX-NEXT: fisttpl -8(%rbp) ; AVX-NEXT: vcvtsi2sdl -8(%rbp), %xmm1, %xmm0 @@ -137,7 +137,7 @@ define void @_Z1fe(x86_fp80 %z) local_unnamed_addr #0 { ; AVX-NEXT: vmovsd %xmm0, -16(%rbp) ; AVX-NEXT: fxch %st(1) ; AVX-NEXT: fsubl -16(%rbp) -; AVX-NEXT: fmulp %st(1) +; AVX-NEXT: fmulp %st, %st(1) ; AVX-NEXT: fstpl -32(%rbp) ; AVX-NEXT: popq %rbp ; AVX-NEXT: retq diff --git a/llvm/test/CodeGen/X86/pr34177.ll b/llvm/test/CodeGen/X86/pr34177.ll index 3fe56277fb4b3e..f6b8dec3c3b39d 100644 --- a/llvm/test/CodeGen/X86/pr34177.ll +++ b/llvm/test/CodeGen/X86/pr34177.ll @@ -20,17 +20,17 @@ define void @test(<4x i64> %a, <4 x x86_fp80> %b, <8 x x86_fp80>* %c) local_unna ; CHECK-NEXT: fld1 ; CHECK-NEXT: fldz ; CHECK-NEXT: fld %st(0) -; CHECK-NEXT: fcmove %st(2), %st(0) +; CHECK-NEXT: fcmove %st(2), %st ; CHECK-NEXT: cmpq %rax, %rsi ; CHECK-NEXT: fld %st(1) -; CHECK-NEXT: fcmove %st(3), %st(0) +; CHECK-NEXT: fcmove %st(3), %st ; CHECK-NEXT: cmpq %rdx, %r9 ; CHECK-NEXT: fld %st(2) -; CHECK-NEXT: fcmove %st(4), %st(0) +; CHECK-NEXT: fcmove %st(4), %st ; CHECK-NEXT: movl $1, %eax ; CHECK-NEXT: cmpq %r8, %rax ; CHECK-NEXT: fxch %st(3) -; CHECK-NEXT: fcmove %st(4), %st(0) +; CHECK-NEXT: fcmove %st(4), %st ; CHECK-NEXT: fstp %st(4) ; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) ; CHECK-NEXT: fstpt 70(%rdi) @@ -40,15 +40,15 @@ define void @test(<4x i64> %a, <4 x x86_fp80> %b, <8 x x86_fp80>* %c) local_unna ; CHECK-NEXT: fstpt 30(%rdi) ; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) ; CHECK-NEXT: fstpt 10(%rdi) -; CHECK-NEXT: fadd %st(0), %st(0) +; CHECK-NEXT: fadd %st, %st(0) ; CHECK-NEXT: fstpt 60(%rdi) ; CHECK-NEXT: fxch %st(1) -; CHECK-NEXT: fadd %st(0), %st(0) +; CHECK-NEXT: fadd %st, %st(0) ; CHECK-NEXT: fstpt 40(%rdi) ; CHECK-NEXT: fxch %st(1) -; CHECK-NEXT: fadd %st(0), %st(0) +; CHECK-NEXT: fadd %st, %st(0) ; CHECK-NEXT: fstpt 20(%rdi) -; CHECK-NEXT: fadd %st(0), %st(0) +; CHECK-NEXT: fadd %st, %st(0) ; CHECK-NEXT: fstpt (%rdi) %1 = icmp eq <4 x i64> , %a %2 = select <4 x i1> %1, <4 x x86_fp80> , <4 x x86_fp80> zeroinitializer diff --git a/llvm/test/CodeGen/X86/pr40529.ll b/llvm/test/CodeGen/X86/pr40529.ll new file mode 100644 index 00000000000000..9520ac22d7491c --- /dev/null +++ b/llvm/test/CodeGen/X86/pr40529.ll @@ -0,0 +1,43 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-linux -mcpu=x86-64 | FileCheck %s + +define x86_fp80 @rem_pio2l_min(x86_fp80 %z) { +; CHECK-LABEL: rem_pio2l_min: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fnstcw -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movw $3199, -{{[0-9]+}}(%rsp) # imm = 0xC7F +; CHECK-NEXT: fldcw -{{[0-9]+}}(%rsp) +; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) +; CHECK-NEXT: movw %ax, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: fistl -{{[0-9]+}}(%rsp) +; CHECK-NEXT: fldcw -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movl -{{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: fisubl -{{[0-9]+}}(%rsp) +; CHECK-NEXT: flds {{.*}}(%rip) +; CHECK-NEXT: fnstcw -{{[0-9]+}}(%rsp) +; CHECK-NEXT: fmul %st, %st(1) +; CHECK-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movw $3199, -{{[0-9]+}}(%rsp) # imm = 0xC7F +; CHECK-NEXT: fldcw -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movw %ax, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: fistl -{{[0-9]+}}(%rsp) +; CHECK-NEXT: fldcw -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movl -{{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: fisubl -{{[0-9]+}}(%rsp) +; CHECK-NEXT: fmulp %st, %st(1) +; CHECK-NEXT: retq +entry: + %conv = fptosi x86_fp80 %z to i32 + %conv1 = sitofp i32 %conv to x86_fp80 + %sub = fsub x86_fp80 %z, %conv1 + %mul = fmul x86_fp80 %sub, 0xK40178000000000000000 + %conv2 = fptosi x86_fp80 %mul to i32 + %conv3 = sitofp i32 %conv2 to x86_fp80 + %sub4 = fsub x86_fp80 %mul, %conv3 + %mul5 = fmul x86_fp80 %sub4, 0xK40178000000000000000 + ret x86_fp80 %mul5 +} diff --git a/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll b/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll index 7ed61f8fdc7d24..92361efa49fac3 100644 --- a/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll +++ b/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll @@ -1028,7 +1028,7 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind { ; AVX512_32_WIN-NEXT: xorl %edx, %edx ; AVX512_32_WIN-NEXT: fxch %st(1) ; AVX512_32_WIN-NEXT: fucompi %st(2) -; AVX512_32_WIN-NEXT: fcmovnbe %st(1), %st(0) +; AVX512_32_WIN-NEXT: fcmovnbe %st(1), %st ; AVX512_32_WIN-NEXT: fstp %st(1) ; AVX512_32_WIN-NEXT: fisttpll (%esp) ; AVX512_32_WIN-NEXT: setbe %dl @@ -1049,7 +1049,7 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind { ; AVX512_32_LIN-NEXT: xorl %edx, %edx ; AVX512_32_LIN-NEXT: fxch %st(1) ; AVX512_32_LIN-NEXT: fucompi %st(2) -; AVX512_32_LIN-NEXT: fcmovnbe %st(1), %st(0) +; AVX512_32_LIN-NEXT: fcmovnbe %st(1), %st ; AVX512_32_LIN-NEXT: fstp %st(1) ; AVX512_32_LIN-NEXT: fisttpll (%esp) ; AVX512_32_LIN-NEXT: setbe %dl @@ -1069,7 +1069,7 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind { ; AVX512_64_WIN-NEXT: xorl %ecx, %ecx ; AVX512_64_WIN-NEXT: fxch %st(1) ; AVX512_64_WIN-NEXT: fucompi %st(2) -; AVX512_64_WIN-NEXT: fcmovnbe %st(1), %st(0) +; AVX512_64_WIN-NEXT: fcmovnbe %st(1), %st ; AVX512_64_WIN-NEXT: fstp %st(1) ; AVX512_64_WIN-NEXT: fisttpll (%rsp) ; AVX512_64_WIN-NEXT: setbe %cl @@ -1090,7 +1090,7 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind { ; AVX512_64_LIN-NEXT: xorl %ecx, %ecx ; AVX512_64_LIN-NEXT: fxch %st(1) ; AVX512_64_LIN-NEXT: fucompi %st(2) -; AVX512_64_LIN-NEXT: fcmovnbe %st(1), %st(0) +; AVX512_64_LIN-NEXT: fcmovnbe %st(1), %st ; AVX512_64_LIN-NEXT: fstp %st(1) ; AVX512_64_LIN-NEXT: fisttpll -{{[0-9]+}}(%rsp) ; AVX512_64_LIN-NEXT: setbe %cl @@ -1114,7 +1114,7 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind { ; SSE3_32_WIN-NEXT: xorl %edx, %edx ; SSE3_32_WIN-NEXT: fxch %st(1) ; SSE3_32_WIN-NEXT: fucompi %st(2) -; SSE3_32_WIN-NEXT: fcmovnbe %st(1), %st(0) +; SSE3_32_WIN-NEXT: fcmovnbe %st(1), %st ; SSE3_32_WIN-NEXT: fstp %st(1) ; SSE3_32_WIN-NEXT: fisttpll (%esp) ; SSE3_32_WIN-NEXT: setbe %dl @@ -1135,7 +1135,7 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind { ; SSE3_32_LIN-NEXT: xorl %edx, %edx ; SSE3_32_LIN-NEXT: fxch %st(1) ; SSE3_32_LIN-NEXT: fucompi %st(2) -; SSE3_32_LIN-NEXT: fcmovnbe %st(1), %st(0) +; SSE3_32_LIN-NEXT: fcmovnbe %st(1), %st ; SSE3_32_LIN-NEXT: fstp %st(1) ; SSE3_32_LIN-NEXT: fisttpll (%esp) ; SSE3_32_LIN-NEXT: setbe %dl @@ -1155,7 +1155,7 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind { ; SSE3_64_WIN-NEXT: xorl %eax, %eax ; SSE3_64_WIN-NEXT: fxch %st(1) ; SSE3_64_WIN-NEXT: fucompi %st(2) -; SSE3_64_WIN-NEXT: fcmovnbe %st(1), %st(0) +; SSE3_64_WIN-NEXT: fcmovnbe %st(1), %st ; SSE3_64_WIN-NEXT: fstp %st(1) ; SSE3_64_WIN-NEXT: fisttpll (%rsp) ; SSE3_64_WIN-NEXT: setbe %al @@ -1173,7 +1173,7 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind { ; SSE3_64_LIN-NEXT: xorl %eax, %eax ; SSE3_64_LIN-NEXT: fxch %st(1) ; SSE3_64_LIN-NEXT: fucompi %st(2) -; SSE3_64_LIN-NEXT: fcmovnbe %st(1), %st(0) +; SSE3_64_LIN-NEXT: fcmovnbe %st(1), %st ; SSE3_64_LIN-NEXT: fstp %st(1) ; SSE3_64_LIN-NEXT: fisttpll -{{[0-9]+}}(%rsp) ; SSE3_64_LIN-NEXT: setbe %al @@ -1194,7 +1194,7 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind { ; SSE2_32_WIN-NEXT: xorl %edx, %edx ; SSE2_32_WIN-NEXT: fxch %st(1) ; SSE2_32_WIN-NEXT: fucompi %st(2) -; SSE2_32_WIN-NEXT: fcmovnbe %st(1), %st(0) +; SSE2_32_WIN-NEXT: fcmovnbe %st(1), %st ; SSE2_32_WIN-NEXT: fstp %st(1) ; SSE2_32_WIN-NEXT: fnstcw {{[0-9]+}}(%esp) ; SSE2_32_WIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax @@ -1221,7 +1221,7 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind { ; SSE2_32_LIN-NEXT: xorl %edx, %edx ; SSE2_32_LIN-NEXT: fxch %st(1) ; SSE2_32_LIN-NEXT: fucompi %st(2) -; SSE2_32_LIN-NEXT: fcmovnbe %st(1), %st(0) +; SSE2_32_LIN-NEXT: fcmovnbe %st(1), %st ; SSE2_32_LIN-NEXT: fstp %st(1) ; SSE2_32_LIN-NEXT: fnstcw {{[0-9]+}}(%esp) ; SSE2_32_LIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax @@ -1247,7 +1247,7 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind { ; SSE2_64_WIN-NEXT: xorl %eax, %eax ; SSE2_64_WIN-NEXT: fxch %st(1) ; SSE2_64_WIN-NEXT: fucompi %st(2) -; SSE2_64_WIN-NEXT: fcmovnbe %st(1), %st(0) +; SSE2_64_WIN-NEXT: fcmovnbe %st(1), %st ; SSE2_64_WIN-NEXT: fstp %st(1) ; SSE2_64_WIN-NEXT: fnstcw {{[0-9]+}}(%rsp) ; SSE2_64_WIN-NEXT: movzwl {{[0-9]+}}(%rsp), %ecx @@ -1271,7 +1271,7 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind { ; SSE2_64_LIN-NEXT: xorl %eax, %eax ; SSE2_64_LIN-NEXT: fxch %st(1) ; SSE2_64_LIN-NEXT: fucompi %st(2) -; SSE2_64_LIN-NEXT: fcmovnbe %st(1), %st(0) +; SSE2_64_LIN-NEXT: fcmovnbe %st(1), %st ; SSE2_64_LIN-NEXT: fstp %st(1) ; SSE2_64_LIN-NEXT: fnstcw -{{[0-9]+}}(%rsp) ; SSE2_64_LIN-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx diff --git a/llvm/test/CodeGen/X86/select.ll b/llvm/test/CodeGen/X86/select.ll index 947c95137206de..9429e1854d44c3 100644 --- a/llvm/test/CodeGen/X86/select.ll +++ b/llvm/test/CodeGen/X86/select.ll @@ -293,25 +293,25 @@ define void @test6(i32 %C, <4 x float>* %A, <4 x float>* %B) nounwind { ; ATHLON-NEXT: flds 4(%ecx) ; ATHLON-NEXT: flds (%ecx) ; ATHLON-NEXT: flds (%eax) -; ATHLON-NEXT: fmul %st(0), %st(0) +; ATHLON-NEXT: fmul %st, %st(0) ; ATHLON-NEXT: cmpl $0, {{[0-9]+}}(%esp) ; ATHLON-NEXT: fxch %st(1) -; ATHLON-NEXT: fcmove %st(1), %st(0) +; ATHLON-NEXT: fcmove %st(1), %st ; ATHLON-NEXT: fstp %st(1) ; ATHLON-NEXT: flds 4(%eax) -; ATHLON-NEXT: fmul %st(0), %st(0) +; ATHLON-NEXT: fmul %st, %st(0) ; ATHLON-NEXT: fxch %st(2) -; ATHLON-NEXT: fcmove %st(2), %st(0) +; ATHLON-NEXT: fcmove %st(2), %st ; ATHLON-NEXT: fstp %st(2) ; ATHLON-NEXT: flds 8(%eax) -; ATHLON-NEXT: fmul %st(0), %st(0) +; ATHLON-NEXT: fmul %st, %st(0) ; ATHLON-NEXT: fxch %st(3) -; ATHLON-NEXT: fcmove %st(3), %st(0) +; ATHLON-NEXT: fcmove %st(3), %st ; ATHLON-NEXT: fstp %st(3) ; ATHLON-NEXT: flds 12(%eax) -; ATHLON-NEXT: fmul %st(0), %st(0) +; ATHLON-NEXT: fmul %st, %st(0) ; ATHLON-NEXT: fxch %st(4) -; ATHLON-NEXT: fcmove %st(4), %st(0) +; ATHLON-NEXT: fcmove %st(4), %st ; ATHLON-NEXT: fstp %st(4) ; ATHLON-NEXT: fxch %st(3) ; ATHLON-NEXT: fstps 12(%ecx) @@ -332,13 +332,13 @@ define void @test6(i32 %C, <4 x float>* %A, <4 x float>* %B) nounwind { ; MCU-NEXT: flds 4(%ecx) ; MCU-NEXT: flds 8(%ecx) ; MCU-NEXT: flds 12(%ecx) -; MCU-NEXT: fmul %st(0), %st(0) +; MCU-NEXT: fmul %st, %st(0) ; MCU-NEXT: fxch %st(1) -; MCU-NEXT: fmul %st(0), %st(0) +; MCU-NEXT: fmul %st, %st(0) ; MCU-NEXT: fxch %st(2) -; MCU-NEXT: fmul %st(0), %st(0) +; MCU-NEXT: fmul %st, %st(0) ; MCU-NEXT: fxch %st(3) -; MCU-NEXT: fmul %st(0), %st(0) +; MCU-NEXT: fmul %st, %st(0) ; MCU-NEXT: testl %eax, %eax ; MCU-NEXT: flds (%edx) ; MCU-NEXT: je .LBB5_2 diff --git a/llvm/test/CodeGen/X86/sincos-opt.ll b/llvm/test/CodeGen/X86/sincos-opt.ll index b4330ea58ea5a3..b64450863427ba 100644 --- a/llvm/test/CodeGen/X86/sincos-opt.ll +++ b/llvm/test/CodeGen/X86/sincos-opt.ll @@ -115,13 +115,13 @@ entry: ; GNU_SINCOS: callq sincosl ; GNU_SINCOS: fldt 16(%rsp) ; GNU_SINCOS: fldt 32(%rsp) -; GNU_SINCOS: faddp %st(1) +; GNU_SINCOS: faddp %st, %st(1) ; GNU_SINCOS_FASTMATH-LABEL: test3: ; GNU_SINCOS_FASTMATH: callq sincosl ; GNU_SINCOS_FASTMATH: fldt 16(%{{[re]}}sp) ; GNU_SINCOS_FASTMATH: fldt 32(%{{[re]}}sp) -; GNU_SINCOS_FASTMATH: faddp %st(1) +; GNU_SINCOS_FASTMATH: faddp %st, %st(1) %call = tail call x86_fp80 @sinl(x86_fp80 %x) readnone %call1 = tail call x86_fp80 @cosl(x86_fp80 %x) readnone %add = fadd x86_fp80 %call, %call1 diff --git a/llvm/test/CodeGen/X86/x87-schedule.ll b/llvm/test/CodeGen/X86/x87-schedule.ll index 1921f8c75a3d10..599f313b13025f 100644 --- a/llvm/test/CodeGen/X86/x87-schedule.ll +++ b/llvm/test/CodeGen/X86/x87-schedule.ll @@ -180,8 +180,8 @@ define void @test_fadd(float *%a0, double *%a1) optsize { ; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax ; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx ; GENERIC-NEXT: #APP -; GENERIC-NEXT: fadd %st(0), %st(1) -; GENERIC-NEXT: fadd %st(2) +; GENERIC-NEXT: fadd %st, %st(1) +; GENERIC-NEXT: fadd %st(2), %st ; GENERIC-NEXT: fadds (%ecx) ; GENERIC-NEXT: faddl (%eax) ; GENERIC-NEXT: #NO_APP @@ -192,8 +192,8 @@ define void @test_fadd(float *%a0, double *%a1) optsize { ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00] ; ATOM-NEXT: #APP -; ATOM-NEXT: fadd %st(0), %st(1) # sched: [5:5.00] -; ATOM-NEXT: fadd %st(2) # sched: [5:5.00] +; ATOM-NEXT: fadd %st, %st(1) # sched: [5:5.00] +; ATOM-NEXT: fadd %st(2), %st # sched: [5:5.00] ; ATOM-NEXT: fadds (%ecx) # sched: [5:5.00] ; ATOM-NEXT: faddl (%eax) # sched: [5:5.00] ; ATOM-NEXT: #NO_APP @@ -204,8 +204,8 @@ define void @test_fadd(float *%a0, double *%a1) optsize { ; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] ; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] ; SLM-NEXT: #APP -; SLM-NEXT: fadd %st(0), %st(1) # sched: [3:1.00] -; SLM-NEXT: fadd %st(2) # sched: [3:1.00] +; SLM-NEXT: fadd %st, %st(1) # sched: [3:1.00] +; SLM-NEXT: fadd %st(2), %st # sched: [3:1.00] ; SLM-NEXT: fadds (%ecx) # sched: [6:1.00] ; SLM-NEXT: faddl (%eax) # sched: [6:1.00] ; SLM-NEXT: #NO_APP @@ -216,8 +216,8 @@ define void @test_fadd(float *%a0, double *%a1) optsize { ; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; SANDY-NEXT: #APP -; SANDY-NEXT: fadd %st(0), %st(1) # sched: [3:1.00] -; SANDY-NEXT: fadd %st(2) # sched: [3:1.00] +; SANDY-NEXT: fadd %st, %st(1) # sched: [3:1.00] +; SANDY-NEXT: fadd %st(2), %st # sched: [3:1.00] ; SANDY-NEXT: fadds (%ecx) # sched: [10:1.00] ; SANDY-NEXT: faddl (%eax) # sched: [10:1.00] ; SANDY-NEXT: #NO_APP @@ -228,8 +228,8 @@ define void @test_fadd(float *%a0, double *%a1) optsize { ; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; HASWELL-NEXT: #APP -; HASWELL-NEXT: fadd %st(0), %st(1) # sched: [3:1.00] -; HASWELL-NEXT: fadd %st(2) # sched: [3:1.00] +; HASWELL-NEXT: fadd %st, %st(1) # sched: [3:1.00] +; HASWELL-NEXT: fadd %st(2), %st # sched: [3:1.00] ; HASWELL-NEXT: fadds (%ecx) # sched: [10:1.00] ; HASWELL-NEXT: faddl (%eax) # sched: [10:1.00] ; HASWELL-NEXT: #NO_APP @@ -240,8 +240,8 @@ define void @test_fadd(float *%a0, double *%a1) optsize { ; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fadd %st(0), %st(1) # sched: [3:1.00] -; BROADWELL-NEXT: fadd %st(2) # sched: [3:1.00] +; BROADWELL-NEXT: fadd %st, %st(1) # sched: [3:1.00] +; BROADWELL-NEXT: fadd %st(2), %st # sched: [3:1.00] ; BROADWELL-NEXT: fadds (%ecx) # sched: [9:1.00] ; BROADWELL-NEXT: faddl (%eax) # sched: [9:1.00] ; BROADWELL-NEXT: #NO_APP @@ -252,8 +252,8 @@ define void @test_fadd(float *%a0, double *%a1) optsize { ; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fadd %st(0), %st(1) # sched: [3:1.00] -; SKYLAKE-NEXT: fadd %st(2) # sched: [3:1.00] +; SKYLAKE-NEXT: fadd %st, %st(1) # sched: [3:1.00] +; SKYLAKE-NEXT: fadd %st(2), %st # sched: [3:1.00] ; SKYLAKE-NEXT: fadds (%ecx) # sched: [10:1.00] ; SKYLAKE-NEXT: faddl (%eax) # sched: [10:1.00] ; SKYLAKE-NEXT: #NO_APP @@ -264,8 +264,8 @@ define void @test_fadd(float *%a0, double *%a1) optsize { ; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; SKX-NEXT: #APP -; SKX-NEXT: fadd %st(0), %st(1) # sched: [3:1.00] -; SKX-NEXT: fadd %st(2) # sched: [3:1.00] +; SKX-NEXT: fadd %st, %st(1) # sched: [3:1.00] +; SKX-NEXT: fadd %st(2), %st # sched: [3:1.00] ; SKX-NEXT: fadds (%ecx) # sched: [10:1.00] ; SKX-NEXT: faddl (%eax) # sched: [10:1.00] ; SKX-NEXT: #NO_APP @@ -276,8 +276,8 @@ define void @test_fadd(float *%a0, double *%a1) optsize { ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; BDVER2-NEXT: #APP -; BDVER2-NEXT: fadd %st(0), %st(1) # sched: [5:1.00] -; BDVER2-NEXT: fadd %st(2) # sched: [5:1.00] +; BDVER2-NEXT: fadd %st, %st(1) # sched: [5:1.00] +; BDVER2-NEXT: fadd %st(2), %st # sched: [5:1.00] ; BDVER2-NEXT: fadds (%ecx) # sched: [10:1.00] ; BDVER2-NEXT: faddl (%eax) # sched: [10:1.00] ; BDVER2-NEXT: #NO_APP @@ -288,8 +288,8 @@ define void @test_fadd(float *%a0, double *%a1) optsize { ; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00] ; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:1.00] ; BTVER2-NEXT: #APP -; BTVER2-NEXT: fadd %st(0), %st(1) # sched: [3:1.00] -; BTVER2-NEXT: fadd %st(2) # sched: [3:1.00] +; BTVER2-NEXT: fadd %st, %st(1) # sched: [3:1.00] +; BTVER2-NEXT: fadd %st(2), %st # sched: [3:1.00] ; BTVER2-NEXT: fadds (%ecx) # sched: [8:1.00] ; BTVER2-NEXT: faddl (%eax) # sched: [8:1.00] ; BTVER2-NEXT: #NO_APP @@ -300,8 +300,8 @@ define void @test_fadd(float *%a0, double *%a1) optsize { ; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50] ; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50] ; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fadd %st(0), %st(1) # sched: [3:1.00] -; ZNVER1-NEXT: fadd %st(2) # sched: [3:1.00] +; ZNVER1-NEXT: fadd %st, %st(1) # sched: [3:1.00] +; ZNVER1-NEXT: fadd %st(2), %st # sched: [3:1.00] ; ZNVER1-NEXT: fadds (%ecx) # sched: [10:1.00] ; ZNVER1-NEXT: faddl (%eax) # sched: [10:1.00] ; ZNVER1-NEXT: #NO_APP @@ -316,8 +316,8 @@ define void @test_faddp_fiadd(i16 *%a0, i32 *%a1) optsize { ; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax ; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx ; GENERIC-NEXT: #APP -; GENERIC-NEXT: faddp %st(1) -; GENERIC-NEXT: faddp %st(2) +; GENERIC-NEXT: faddp %st, %st(1) +; GENERIC-NEXT: faddp %st, %st(2) ; GENERIC-NEXT: fiadds (%ecx) ; GENERIC-NEXT: fiaddl (%eax) ; GENERIC-NEXT: #NO_APP @@ -328,8 +328,8 @@ define void @test_faddp_fiadd(i16 *%a0, i32 *%a1) optsize { ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00] ; ATOM-NEXT: #APP -; ATOM-NEXT: faddp %st(1) # sched: [5:5.00] -; ATOM-NEXT: faddp %st(2) # sched: [5:5.00] +; ATOM-NEXT: faddp %st, %st(1) # sched: [5:5.00] +; ATOM-NEXT: faddp %st, %st(2) # sched: [5:5.00] ; ATOM-NEXT: fiadds (%ecx) # sched: [5:5.00] ; ATOM-NEXT: fiaddl (%eax) # sched: [5:5.00] ; ATOM-NEXT: #NO_APP @@ -340,8 +340,8 @@ define void @test_faddp_fiadd(i16 *%a0, i32 *%a1) optsize { ; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] ; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] ; SLM-NEXT: #APP -; SLM-NEXT: faddp %st(1) # sched: [3:1.00] -; SLM-NEXT: faddp %st(2) # sched: [3:1.00] +; SLM-NEXT: faddp %st, %st(1) # sched: [3:1.00] +; SLM-NEXT: faddp %st, %st(2) # sched: [3:1.00] ; SLM-NEXT: fiadds (%ecx) # sched: [6:1.00] ; SLM-NEXT: fiaddl (%eax) # sched: [6:1.00] ; SLM-NEXT: #NO_APP @@ -352,8 +352,8 @@ define void @test_faddp_fiadd(i16 *%a0, i32 *%a1) optsize { ; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; SANDY-NEXT: #APP -; SANDY-NEXT: faddp %st(1) # sched: [3:1.00] -; SANDY-NEXT: faddp %st(2) # sched: [3:1.00] +; SANDY-NEXT: faddp %st, %st(1) # sched: [3:1.00] +; SANDY-NEXT: faddp %st, %st(2) # sched: [3:1.00] ; SANDY-NEXT: fiadds (%ecx) # sched: [13:2.00] ; SANDY-NEXT: fiaddl (%eax) # sched: [13:2.00] ; SANDY-NEXT: #NO_APP @@ -364,8 +364,8 @@ define void @test_faddp_fiadd(i16 *%a0, i32 *%a1) optsize { ; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; HASWELL-NEXT: #APP -; HASWELL-NEXT: faddp %st(1) # sched: [3:1.00] -; HASWELL-NEXT: faddp %st(2) # sched: [3:1.00] +; HASWELL-NEXT: faddp %st, %st(1) # sched: [3:1.00] +; HASWELL-NEXT: faddp %st, %st(2) # sched: [3:1.00] ; HASWELL-NEXT: fiadds (%ecx) # sched: [13:2.00] ; HASWELL-NEXT: fiaddl (%eax) # sched: [13:2.00] ; HASWELL-NEXT: #NO_APP @@ -376,8 +376,8 @@ define void @test_faddp_fiadd(i16 *%a0, i32 *%a1) optsize { ; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: faddp %st(1) # sched: [3:1.00] -; BROADWELL-NEXT: faddp %st(2) # sched: [3:1.00] +; BROADWELL-NEXT: faddp %st, %st(1) # sched: [3:1.00] +; BROADWELL-NEXT: faddp %st, %st(2) # sched: [3:1.00] ; BROADWELL-NEXT: fiadds (%ecx) # sched: [12:2.00] ; BROADWELL-NEXT: fiaddl (%eax) # sched: [12:2.00] ; BROADWELL-NEXT: #NO_APP @@ -388,8 +388,8 @@ define void @test_faddp_fiadd(i16 *%a0, i32 *%a1) optsize { ; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: faddp %st(1) # sched: [3:1.00] -; SKYLAKE-NEXT: faddp %st(2) # sched: [3:1.00] +; SKYLAKE-NEXT: faddp %st, %st(1) # sched: [3:1.00] +; SKYLAKE-NEXT: faddp %st, %st(2) # sched: [3:1.00] ; SKYLAKE-NEXT: fiadds (%ecx) # sched: [13:2.00] ; SKYLAKE-NEXT: fiaddl (%eax) # sched: [13:2.00] ; SKYLAKE-NEXT: #NO_APP @@ -400,8 +400,8 @@ define void @test_faddp_fiadd(i16 *%a0, i32 *%a1) optsize { ; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; SKX-NEXT: #APP -; SKX-NEXT: faddp %st(1) # sched: [3:1.00] -; SKX-NEXT: faddp %st(2) # sched: [3:1.00] +; SKX-NEXT: faddp %st, %st(1) # sched: [3:1.00] +; SKX-NEXT: faddp %st, %st(2) # sched: [3:1.00] ; SKX-NEXT: fiadds (%ecx) # sched: [13:2.00] ; SKX-NEXT: fiaddl (%eax) # sched: [13:2.00] ; SKX-NEXT: #NO_APP @@ -412,8 +412,8 @@ define void @test_faddp_fiadd(i16 *%a0, i32 *%a1) optsize { ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; BDVER2-NEXT: #APP -; BDVER2-NEXT: faddp %st(1) # sched: [5:1.00] -; BDVER2-NEXT: faddp %st(2) # sched: [5:1.00] +; BDVER2-NEXT: faddp %st, %st(1) # sched: [5:1.00] +; BDVER2-NEXT: faddp %st, %st(2) # sched: [5:1.00] ; BDVER2-NEXT: fiadds (%ecx) # sched: [10:1.00] ; BDVER2-NEXT: fiaddl (%eax) # sched: [10:1.00] ; BDVER2-NEXT: #NO_APP @@ -424,8 +424,8 @@ define void @test_faddp_fiadd(i16 *%a0, i32 *%a1) optsize { ; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00] ; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:1.00] ; BTVER2-NEXT: #APP -; BTVER2-NEXT: faddp %st(1) # sched: [3:1.00] -; BTVER2-NEXT: faddp %st(2) # sched: [3:1.00] +; BTVER2-NEXT: faddp %st, %st(1) # sched: [3:1.00] +; BTVER2-NEXT: faddp %st, %st(2) # sched: [3:1.00] ; BTVER2-NEXT: fiadds (%ecx) # sched: [8:1.00] ; BTVER2-NEXT: fiaddl (%eax) # sched: [8:1.00] ; BTVER2-NEXT: #NO_APP @@ -436,8 +436,8 @@ define void @test_faddp_fiadd(i16 *%a0, i32 *%a1) optsize { ; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50] ; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50] ; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: faddp %st(1) # sched: [3:1.00] -; ZNVER1-NEXT: faddp %st(2) # sched: [3:1.00] +; ZNVER1-NEXT: faddp %st, %st(1) # sched: [3:1.00] +; ZNVER1-NEXT: faddp %st, %st(2) # sched: [3:1.00] ; ZNVER1-NEXT: fiadds (%ecx) # sched: [10:1.00] ; ZNVER1-NEXT: fiaddl (%eax) # sched: [10:1.00] ; ZNVER1-NEXT: #NO_APP @@ -807,154 +807,154 @@ define void @test_fcmov() optsize { ; GENERIC-LABEL: test_fcmov: ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP -; GENERIC-NEXT: fcmovb %st(1), %st(0) -; GENERIC-NEXT: fcmovbe %st(1), %st(0) -; GENERIC-NEXT: fcmove %st(1), %st(0) -; GENERIC-NEXT: fcmovnb %st(1), %st(0) -; GENERIC-NEXT: fcmovnbe %st(1), %st(0) -; GENERIC-NEXT: fcmovne %st(1), %st(0) -; GENERIC-NEXT: fcmovnu %st(1), %st(0) -; GENERIC-NEXT: fcmovu %st(1), %st(0) +; GENERIC-NEXT: fcmovb %st(1), %st +; GENERIC-NEXT: fcmovbe %st(1), %st +; GENERIC-NEXT: fcmove %st(1), %st +; GENERIC-NEXT: fcmovnb %st(1), %st +; GENERIC-NEXT: fcmovnbe %st(1), %st +; GENERIC-NEXT: fcmovne %st(1), %st +; GENERIC-NEXT: fcmovnu %st(1), %st +; GENERIC-NEXT: fcmovu %st(1), %st ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retl ; ; ATOM-LABEL: test_fcmov: ; ATOM: # %bb.0: ; ATOM-NEXT: #APP -; ATOM-NEXT: fcmovb %st(1), %st(0) # sched: [9:4.50] -; ATOM-NEXT: fcmovbe %st(1), %st(0) # sched: [9:4.50] -; ATOM-NEXT: fcmove %st(1), %st(0) # sched: [9:4.50] -; ATOM-NEXT: fcmovnb %st(1), %st(0) # sched: [9:4.50] -; ATOM-NEXT: fcmovnbe %st(1), %st(0) # sched: [9:4.50] -; ATOM-NEXT: fcmovne %st(1), %st(0) # sched: [9:4.50] -; ATOM-NEXT: fcmovnu %st(1), %st(0) # sched: [9:4.50] -; ATOM-NEXT: fcmovu %st(1), %st(0) # sched: [9:4.50] +; ATOM-NEXT: fcmovb %st(1), %st # sched: [9:4.50] +; ATOM-NEXT: fcmovbe %st(1), %st # sched: [9:4.50] +; ATOM-NEXT: fcmove %st(1), %st # sched: [9:4.50] +; ATOM-NEXT: fcmovnb %st(1), %st # sched: [9:4.50] +; ATOM-NEXT: fcmovnbe %st(1), %st # sched: [9:4.50] +; ATOM-NEXT: fcmovne %st(1), %st # sched: [9:4.50] +; ATOM-NEXT: fcmovnu %st(1), %st # sched: [9:4.50] +; ATOM-NEXT: fcmovu %st(1), %st # sched: [9:4.50] ; ATOM-NEXT: #NO_APP ; ATOM-NEXT: retl # sched: [79:39.50] ; ; SLM-LABEL: test_fcmov: ; SLM: # %bb.0: ; SLM-NEXT: #APP -; SLM-NEXT: fcmovb %st(1), %st(0) # sched: [3:1.00] -; SLM-NEXT: fcmovbe %st(1), %st(0) # sched: [3:1.00] -; SLM-NEXT: fcmove %st(1), %st(0) # sched: [3:1.00] -; SLM-NEXT: fcmovnb %st(1), %st(0) # sched: [3:1.00] -; SLM-NEXT: fcmovnbe %st(1), %st(0) # sched: [3:1.00] -; SLM-NEXT: fcmovne %st(1), %st(0) # sched: [3:1.00] -; SLM-NEXT: fcmovnu %st(1), %st(0) # sched: [3:1.00] -; SLM-NEXT: fcmovu %st(1), %st(0) # sched: [3:1.00] +; SLM-NEXT: fcmovb %st(1), %st # sched: [3:1.00] +; SLM-NEXT: fcmovbe %st(1), %st # sched: [3:1.00] +; SLM-NEXT: fcmove %st(1), %st # sched: [3:1.00] +; SLM-NEXT: fcmovnb %st(1), %st # sched: [3:1.00] +; SLM-NEXT: fcmovnbe %st(1), %st # sched: [3:1.00] +; SLM-NEXT: fcmovne %st(1), %st # sched: [3:1.00] +; SLM-NEXT: fcmovnu %st(1), %st # sched: [3:1.00] +; SLM-NEXT: fcmovu %st(1), %st # sched: [3:1.00] ; SLM-NEXT: #NO_APP ; SLM-NEXT: retl # sched: [4:1.00] ; ; SANDY-LABEL: test_fcmov: ; SANDY: # %bb.0: ; SANDY-NEXT: #APP -; SANDY-NEXT: fcmovb %st(1), %st(0) # sched: [3:2.00] -; SANDY-NEXT: fcmovbe %st(1), %st(0) # sched: [3:2.00] -; SANDY-NEXT: fcmove %st(1), %st(0) # sched: [3:2.00] -; SANDY-NEXT: fcmovnb %st(1), %st(0) # sched: [3:2.00] -; SANDY-NEXT: fcmovnbe %st(1), %st(0) # sched: [3:2.00] -; SANDY-NEXT: fcmovne %st(1), %st(0) # sched: [3:2.00] -; SANDY-NEXT: fcmovnu %st(1), %st(0) # sched: [3:2.00] -; SANDY-NEXT: fcmovu %st(1), %st(0) # sched: [3:2.00] +; SANDY-NEXT: fcmovb %st(1), %st # sched: [3:2.00] +; SANDY-NEXT: fcmovbe %st(1), %st # sched: [3:2.00] +; SANDY-NEXT: fcmove %st(1), %st # sched: [3:2.00] +; SANDY-NEXT: fcmovnb %st(1), %st # sched: [3:2.00] +; SANDY-NEXT: fcmovnbe %st(1), %st # sched: [3:2.00] +; SANDY-NEXT: fcmovne %st(1), %st # sched: [3:2.00] +; SANDY-NEXT: fcmovnu %st(1), %st # sched: [3:2.00] +; SANDY-NEXT: fcmovu %st(1), %st # sched: [3:2.00] ; SANDY-NEXT: #NO_APP ; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_fcmov: ; HASWELL: # %bb.0: ; HASWELL-NEXT: #APP -; HASWELL-NEXT: fcmovb %st(1), %st(0) # sched: [3:1.00] -; HASWELL-NEXT: fcmovbe %st(1), %st(0) # sched: [3:1.00] -; HASWELL-NEXT: fcmove %st(1), %st(0) # sched: [3:1.00] -; HASWELL-NEXT: fcmovnb %st(1), %st(0) # sched: [3:1.00] -; HASWELL-NEXT: fcmovnbe %st(1), %st(0) # sched: [3:1.00] -; HASWELL-NEXT: fcmovne %st(1), %st(0) # sched: [3:1.00] -; HASWELL-NEXT: fcmovnu %st(1), %st(0) # sched: [3:1.00] -; HASWELL-NEXT: fcmovu %st(1), %st(0) # sched: [3:1.00] +; HASWELL-NEXT: fcmovb %st(1), %st # sched: [3:1.00] +; HASWELL-NEXT: fcmovbe %st(1), %st # sched: [3:1.00] +; HASWELL-NEXT: fcmove %st(1), %st # sched: [3:1.00] +; HASWELL-NEXT: fcmovnb %st(1), %st # sched: [3:1.00] +; HASWELL-NEXT: fcmovnbe %st(1), %st # sched: [3:1.00] +; HASWELL-NEXT: fcmovne %st(1), %st # sched: [3:1.00] +; HASWELL-NEXT: fcmovnu %st(1), %st # sched: [3:1.00] +; HASWELL-NEXT: fcmovu %st(1), %st # sched: [3:1.00] ; HASWELL-NEXT: #NO_APP ; HASWELL-NEXT: retl # sched: [7:1.00] ; ; BROADWELL-LABEL: test_fcmov: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fcmovb %st(1), %st(0) # sched: [3:1.00] -; BROADWELL-NEXT: fcmovbe %st(1), %st(0) # sched: [3:1.00] -; BROADWELL-NEXT: fcmove %st(1), %st(0) # sched: [3:1.00] -; BROADWELL-NEXT: fcmovnb %st(1), %st(0) # sched: [3:1.00] -; BROADWELL-NEXT: fcmovnbe %st(1), %st(0) # sched: [3:1.00] -; BROADWELL-NEXT: fcmovne %st(1), %st(0) # sched: [3:1.00] -; BROADWELL-NEXT: fcmovnu %st(1), %st(0) # sched: [3:1.00] -; BROADWELL-NEXT: fcmovu %st(1), %st(0) # sched: [3:1.00] +; BROADWELL-NEXT: fcmovb %st(1), %st # sched: [3:1.00] +; BROADWELL-NEXT: fcmovbe %st(1), %st # sched: [3:1.00] +; BROADWELL-NEXT: fcmove %st(1), %st # sched: [3:1.00] +; BROADWELL-NEXT: fcmovnb %st(1), %st # sched: [3:1.00] +; BROADWELL-NEXT: fcmovnbe %st(1), %st # sched: [3:1.00] +; BROADWELL-NEXT: fcmovne %st(1), %st # sched: [3:1.00] +; BROADWELL-NEXT: fcmovnu %st(1), %st # sched: [3:1.00] +; BROADWELL-NEXT: fcmovu %st(1), %st # sched: [3:1.00] ; BROADWELL-NEXT: #NO_APP ; BROADWELL-NEXT: retl # sched: [6:0.50] ; ; SKYLAKE-LABEL: test_fcmov: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fcmovb %st(1), %st(0) # sched: [3:1.00] -; SKYLAKE-NEXT: fcmovbe %st(1), %st(0) # sched: [3:1.00] -; SKYLAKE-NEXT: fcmove %st(1), %st(0) # sched: [3:1.00] -; SKYLAKE-NEXT: fcmovnb %st(1), %st(0) # sched: [3:1.00] -; SKYLAKE-NEXT: fcmovnbe %st(1), %st(0) # sched: [3:1.00] -; SKYLAKE-NEXT: fcmovne %st(1), %st(0) # sched: [3:1.00] -; SKYLAKE-NEXT: fcmovnu %st(1), %st(0) # sched: [3:1.00] -; SKYLAKE-NEXT: fcmovu %st(1), %st(0) # sched: [3:1.00] +; SKYLAKE-NEXT: fcmovb %st(1), %st # sched: [3:1.00] +; SKYLAKE-NEXT: fcmovbe %st(1), %st # sched: [3:1.00] +; SKYLAKE-NEXT: fcmove %st(1), %st # sched: [3:1.00] +; SKYLAKE-NEXT: fcmovnb %st(1), %st # sched: [3:1.00] +; SKYLAKE-NEXT: fcmovnbe %st(1), %st # sched: [3:1.00] +; SKYLAKE-NEXT: fcmovne %st(1), %st # sched: [3:1.00] +; SKYLAKE-NEXT: fcmovnu %st(1), %st # sched: [3:1.00] +; SKYLAKE-NEXT: fcmovu %st(1), %st # sched: [3:1.00] ; SKYLAKE-NEXT: #NO_APP ; SKYLAKE-NEXT: retl # sched: [6:0.50] ; ; SKX-LABEL: test_fcmov: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: fcmovb %st(1), %st(0) # sched: [3:1.00] -; SKX-NEXT: fcmovbe %st(1), %st(0) # sched: [3:1.00] -; SKX-NEXT: fcmove %st(1), %st(0) # sched: [3:1.00] -; SKX-NEXT: fcmovnb %st(1), %st(0) # sched: [3:1.00] -; SKX-NEXT: fcmovnbe %st(1), %st(0) # sched: [3:1.00] -; SKX-NEXT: fcmovne %st(1), %st(0) # sched: [3:1.00] -; SKX-NEXT: fcmovnu %st(1), %st(0) # sched: [3:1.00] -; SKX-NEXT: fcmovu %st(1), %st(0) # sched: [3:1.00] +; SKX-NEXT: fcmovb %st(1), %st # sched: [3:1.00] +; SKX-NEXT: fcmovbe %st(1), %st # sched: [3:1.00] +; SKX-NEXT: fcmove %st(1), %st # sched: [3:1.00] +; SKX-NEXT: fcmovnb %st(1), %st # sched: [3:1.00] +; SKX-NEXT: fcmovnbe %st(1), %st # sched: [3:1.00] +; SKX-NEXT: fcmovne %st(1), %st # sched: [3:1.00] +; SKX-NEXT: fcmovnu %st(1), %st # sched: [3:1.00] +; SKX-NEXT: fcmovu %st(1), %st # sched: [3:1.00] ; SKX-NEXT: #NO_APP ; SKX-NEXT: retl # sched: [6:0.50] ; ; BDVER2-LABEL: test_fcmov: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: fcmovb %st(1), %st(0) # sched: [1:1.00] -; BDVER2-NEXT: fcmovbe %st(1), %st(0) # sched: [1:1.00] -; BDVER2-NEXT: fcmove %st(1), %st(0) # sched: [1:1.00] -; BDVER2-NEXT: fcmovnb %st(1), %st(0) # sched: [1:1.00] -; BDVER2-NEXT: fcmovnbe %st(1), %st(0) # sched: [1:1.00] -; BDVER2-NEXT: fcmovne %st(1), %st(0) # sched: [1:1.00] -; BDVER2-NEXT: fcmovnu %st(1), %st(0) # sched: [1:1.00] -; BDVER2-NEXT: fcmovu %st(1), %st(0) # sched: [1:1.00] +; BDVER2-NEXT: fcmovb %st(1), %st # sched: [1:1.00] +; BDVER2-NEXT: fcmovbe %st(1), %st # sched: [1:1.00] +; BDVER2-NEXT: fcmove %st(1), %st # sched: [1:1.00] +; BDVER2-NEXT: fcmovnb %st(1), %st # sched: [1:1.00] +; BDVER2-NEXT: fcmovnbe %st(1), %st # sched: [1:1.00] +; BDVER2-NEXT: fcmovne %st(1), %st # sched: [1:1.00] +; BDVER2-NEXT: fcmovnu %st(1), %st # sched: [1:1.00] +; BDVER2-NEXT: fcmovu %st(1), %st # sched: [1:1.00] ; BDVER2-NEXT: #NO_APP ; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_fcmov: ; BTVER2: # %bb.0: ; BTVER2-NEXT: #APP -; BTVER2-NEXT: fcmovb %st(1), %st(0) # sched: [3:1.00] -; BTVER2-NEXT: fcmovbe %st(1), %st(0) # sched: [3:1.00] -; BTVER2-NEXT: fcmove %st(1), %st(0) # sched: [3:1.00] -; BTVER2-NEXT: fcmovnb %st(1), %st(0) # sched: [3:1.00] -; BTVER2-NEXT: fcmovnbe %st(1), %st(0) # sched: [3:1.00] -; BTVER2-NEXT: fcmovne %st(1), %st(0) # sched: [3:1.00] -; BTVER2-NEXT: fcmovnu %st(1), %st(0) # sched: [3:1.00] -; BTVER2-NEXT: fcmovu %st(1), %st(0) # sched: [3:1.00] +; BTVER2-NEXT: fcmovb %st(1), %st # sched: [3:1.00] +; BTVER2-NEXT: fcmovbe %st(1), %st # sched: [3:1.00] +; BTVER2-NEXT: fcmove %st(1), %st # sched: [3:1.00] +; BTVER2-NEXT: fcmovnb %st(1), %st # sched: [3:1.00] +; BTVER2-NEXT: fcmovnbe %st(1), %st # sched: [3:1.00] +; BTVER2-NEXT: fcmovne %st(1), %st # sched: [3:1.00] +; BTVER2-NEXT: fcmovnu %st(1), %st # sched: [3:1.00] +; BTVER2-NEXT: fcmovu %st(1), %st # sched: [3:1.00] ; BTVER2-NEXT: #NO_APP ; BTVER2-NEXT: retl # sched: [4:1.00] ; ; ZNVER1-LABEL: test_fcmov: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fcmovb %st(1), %st(0) # sched: [100:0.25] -; ZNVER1-NEXT: fcmovbe %st(1), %st(0) # sched: [100:0.25] -; ZNVER1-NEXT: fcmove %st(1), %st(0) # sched: [100:0.25] -; ZNVER1-NEXT: fcmovnb %st(1), %st(0) # sched: [100:0.25] -; ZNVER1-NEXT: fcmovnbe %st(1), %st(0) # sched: [100:0.25] -; ZNVER1-NEXT: fcmovne %st(1), %st(0) # sched: [100:0.25] -; ZNVER1-NEXT: fcmovnu %st(1), %st(0) # sched: [100:0.25] -; ZNVER1-NEXT: fcmovu %st(1), %st(0) # sched: [100:0.25] +; ZNVER1-NEXT: fcmovb %st(1), %st # sched: [100:0.25] +; ZNVER1-NEXT: fcmovbe %st(1), %st # sched: [100:0.25] +; ZNVER1-NEXT: fcmove %st(1), %st # sched: [100:0.25] +; ZNVER1-NEXT: fcmovnb %st(1), %st # sched: [100:0.25] +; ZNVER1-NEXT: fcmovnbe %st(1), %st # sched: [100:0.25] +; ZNVER1-NEXT: fcmovne %st(1), %st # sched: [100:0.25] +; ZNVER1-NEXT: fcmovnu %st(1), %st # sched: [100:0.25] +; ZNVER1-NEXT: fcmovu %st(1), %st # sched: [100:0.25] ; ZNVER1-NEXT: #NO_APP ; ZNVER1-NEXT: retl # sched: [1:0.50] tail call void asm sideeffect "fcmovb %st(1), %st(0) \0A\09 fcmovbe %st(1), %st(0) \0A\09 fcmove %st(1), %st(0) \0A\09 fcmovnb %st(1), %st(0) \0A\09 fcmovnbe %st(1), %st(0) \0A\09 fcmovne %st(1), %st(0) \0A\09 fcmovnu %st(1), %st(0) \0A\09 fcmovu %st(1), %st(0)", ""() nounwind @@ -1248,88 +1248,88 @@ define void @test_fcomi_fcomip() optsize { ; GENERIC-LABEL: test_fcomi_fcomip: ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP -; GENERIC-NEXT: fcomi %st(3) -; GENERIC-NEXT: fcompi %st(3) +; GENERIC-NEXT: fcomi %st(3), %st +; GENERIC-NEXT: fcompi %st(3), %st ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retl ; ; ATOM-LABEL: test_fcomi_fcomip: ; ATOM: # %bb.0: ; ATOM-NEXT: #APP -; ATOM-NEXT: fcomi %st(3) # sched: [9:4.50] -; ATOM-NEXT: fcompi %st(3) # sched: [9:4.50] +; ATOM-NEXT: fcomi %st(3), %st # sched: [9:4.50] +; ATOM-NEXT: fcompi %st(3), %st # sched: [9:4.50] ; ATOM-NEXT: #NO_APP ; ATOM-NEXT: retl # sched: [79:39.50] ; ; SLM-LABEL: test_fcomi_fcomip: ; SLM: # %bb.0: ; SLM-NEXT: #APP -; SLM-NEXT: fcomi %st(3) # sched: [3:1.00] -; SLM-NEXT: fcompi %st(3) # sched: [3:1.00] +; SLM-NEXT: fcomi %st(3), %st # sched: [3:1.00] +; SLM-NEXT: fcompi %st(3), %st # sched: [3:1.00] ; SLM-NEXT: #NO_APP ; SLM-NEXT: retl # sched: [4:1.00] ; ; SANDY-LABEL: test_fcomi_fcomip: ; SANDY: # %bb.0: ; SANDY-NEXT: #APP -; SANDY-NEXT: fcomi %st(3) # sched: [3:1.00] -; SANDY-NEXT: fcompi %st(3) # sched: [3:1.00] +; SANDY-NEXT: fcomi %st(3), %st # sched: [3:1.00] +; SANDY-NEXT: fcompi %st(3), %st # sched: [3:1.00] ; SANDY-NEXT: #NO_APP ; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_fcomi_fcomip: ; HASWELL: # %bb.0: ; HASWELL-NEXT: #APP -; HASWELL-NEXT: fcomi %st(3) # sched: [1:0.50] -; HASWELL-NEXT: fcompi %st(3) # sched: [1:0.50] +; HASWELL-NEXT: fcomi %st(3), %st # sched: [1:0.50] +; HASWELL-NEXT: fcompi %st(3), %st # sched: [1:0.50] ; HASWELL-NEXT: #NO_APP ; HASWELL-NEXT: retl # sched: [7:1.00] ; ; BROADWELL-LABEL: test_fcomi_fcomip: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fcomi %st(3) # sched: [3:1.00] -; BROADWELL-NEXT: fcompi %st(3) # sched: [3:1.00] +; BROADWELL-NEXT: fcomi %st(3), %st # sched: [3:1.00] +; BROADWELL-NEXT: fcompi %st(3), %st # sched: [3:1.00] ; BROADWELL-NEXT: #NO_APP ; BROADWELL-NEXT: retl # sched: [6:0.50] ; ; SKYLAKE-LABEL: test_fcomi_fcomip: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fcomi %st(3) # sched: [2:1.00] -; SKYLAKE-NEXT: fcompi %st(3) # sched: [2:1.00] +; SKYLAKE-NEXT: fcomi %st(3), %st # sched: [2:1.00] +; SKYLAKE-NEXT: fcompi %st(3), %st # sched: [2:1.00] ; SKYLAKE-NEXT: #NO_APP ; SKYLAKE-NEXT: retl # sched: [6:0.50] ; ; SKX-LABEL: test_fcomi_fcomip: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: fcomi %st(3) # sched: [2:1.00] -; SKX-NEXT: fcompi %st(3) # sched: [2:1.00] +; SKX-NEXT: fcomi %st(3), %st # sched: [2:1.00] +; SKX-NEXT: fcompi %st(3), %st # sched: [2:1.00] ; SKX-NEXT: #NO_APP ; SKX-NEXT: retl # sched: [6:0.50] ; ; BDVER2-LABEL: test_fcomi_fcomip: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: fcomi %st(3) # sched: [1:1.00] -; BDVER2-NEXT: fcompi %st(3) # sched: [1:1.00] +; BDVER2-NEXT: fcomi %st(3), %st # sched: [1:1.00] +; BDVER2-NEXT: fcompi %st(3), %st # sched: [1:1.00] ; BDVER2-NEXT: #NO_APP ; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_fcomi_fcomip: ; BTVER2: # %bb.0: ; BTVER2-NEXT: #APP -; BTVER2-NEXT: fcomi %st(3) # sched: [3:1.00] -; BTVER2-NEXT: fcompi %st(3) # sched: [3:1.00] +; BTVER2-NEXT: fcomi %st(3), %st # sched: [3:1.00] +; BTVER2-NEXT: fcompi %st(3), %st # sched: [3:1.00] ; BTVER2-NEXT: #NO_APP ; BTVER2-NEXT: retl # sched: [4:1.00] ; ; ZNVER1-LABEL: test_fcomi_fcomip: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fcomi %st(3) # sched: [9:0.50] -; ZNVER1-NEXT: fcompi %st(3) # sched: [9:0.50] +; ZNVER1-NEXT: fcomi %st(3), %st # sched: [9:0.50] +; ZNVER1-NEXT: fcompi %st(3), %st # sched: [9:0.50] ; ZNVER1-NEXT: #NO_APP ; ZNVER1-NEXT: retl # sched: [1:0.50] tail call void asm sideeffect "fcomi %st(3) \0A\09 fcomip %st(3)", ""() nounwind @@ -1504,8 +1504,8 @@ define void @test_fdiv(float *%a0, double *%a1) optsize { ; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax ; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx ; GENERIC-NEXT: #APP -; GENERIC-NEXT: fdiv %st(0), %st(1) -; GENERIC-NEXT: fdiv %st(2) +; GENERIC-NEXT: fdiv %st, %st(1) +; GENERIC-NEXT: fdiv %st(2), %st ; GENERIC-NEXT: fdivs (%ecx) ; GENERIC-NEXT: fdivl (%eax) ; GENERIC-NEXT: #NO_APP @@ -1516,8 +1516,8 @@ define void @test_fdiv(float *%a0, double *%a1) optsize { ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00] ; ATOM-NEXT: #APP -; ATOM-NEXT: fdiv %st(0), %st(1) # sched: [34:17.00] -; ATOM-NEXT: fdiv %st(2) # sched: [34:17.00] +; ATOM-NEXT: fdiv %st, %st(1) # sched: [34:17.00] +; ATOM-NEXT: fdiv %st(2), %st # sched: [34:17.00] ; ATOM-NEXT: fdivs (%ecx) # sched: [34:17.00] ; ATOM-NEXT: fdivl (%eax) # sched: [34:17.00] ; ATOM-NEXT: #NO_APP @@ -1528,8 +1528,8 @@ define void @test_fdiv(float *%a0, double *%a1) optsize { ; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] ; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] ; SLM-NEXT: #APP -; SLM-NEXT: fdiv %st(0), %st(1) # sched: [19:17.00] -; SLM-NEXT: fdiv %st(2) # sched: [19:17.00] +; SLM-NEXT: fdiv %st, %st(1) # sched: [19:17.00] +; SLM-NEXT: fdiv %st(2), %st # sched: [19:17.00] ; SLM-NEXT: fdivs (%ecx) # sched: [22:17.00] ; SLM-NEXT: fdivl (%eax) # sched: [22:17.00] ; SLM-NEXT: #NO_APP @@ -1540,8 +1540,8 @@ define void @test_fdiv(float *%a0, double *%a1) optsize { ; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; SANDY-NEXT: #APP -; SANDY-NEXT: fdiv %st(0), %st(1) # sched: [14:14.00] -; SANDY-NEXT: fdiv %st(2) # sched: [14:14.00] +; SANDY-NEXT: fdiv %st, %st(1) # sched: [14:14.00] +; SANDY-NEXT: fdiv %st(2), %st # sched: [14:14.00] ; SANDY-NEXT: fdivs (%ecx) # sched: [31:1.00] ; SANDY-NEXT: fdivl (%eax) # sched: [31:1.00] ; SANDY-NEXT: #NO_APP @@ -1552,8 +1552,8 @@ define void @test_fdiv(float *%a0, double *%a1) optsize { ; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; HASWELL-NEXT: #APP -; HASWELL-NEXT: fdiv %st(0), %st(1) # sched: [24:1.00] -; HASWELL-NEXT: fdiv %st(2) # sched: [20:1.00] +; HASWELL-NEXT: fdiv %st, %st(1) # sched: [24:1.00] +; HASWELL-NEXT: fdiv %st(2), %st # sched: [20:1.00] ; HASWELL-NEXT: fdivs (%ecx) # sched: [31:1.00] ; HASWELL-NEXT: fdivl (%eax) # sched: [31:1.00] ; HASWELL-NEXT: #NO_APP @@ -1564,8 +1564,8 @@ define void @test_fdiv(float *%a0, double *%a1) optsize { ; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fdiv %st(0), %st(1) # sched: [15:1.00] -; BROADWELL-NEXT: fdiv %st(2) # sched: [20:1.00] +; BROADWELL-NEXT: fdiv %st, %st(1) # sched: [15:1.00] +; BROADWELL-NEXT: fdiv %st(2), %st # sched: [20:1.00] ; BROADWELL-NEXT: fdivs (%ecx) # sched: [21:1.00] ; BROADWELL-NEXT: fdivl (%eax) # sched: [21:1.00] ; BROADWELL-NEXT: #NO_APP @@ -1576,8 +1576,8 @@ define void @test_fdiv(float *%a0, double *%a1) optsize { ; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fdiv %st(0), %st(1) # sched: [15:1.00] -; SKYLAKE-NEXT: fdiv %st(2) # sched: [20:1.00] +; SKYLAKE-NEXT: fdiv %st, %st(1) # sched: [15:1.00] +; SKYLAKE-NEXT: fdiv %st(2), %st # sched: [20:1.00] ; SKYLAKE-NEXT: fdivs (%ecx) # sched: [22:1.00] ; SKYLAKE-NEXT: fdivl (%eax) # sched: [22:1.00] ; SKYLAKE-NEXT: #NO_APP @@ -1588,8 +1588,8 @@ define void @test_fdiv(float *%a0, double *%a1) optsize { ; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; SKX-NEXT: #APP -; SKX-NEXT: fdiv %st(0), %st(1) # sched: [15:1.00] -; SKX-NEXT: fdiv %st(2) # sched: [20:1.00] +; SKX-NEXT: fdiv %st, %st(1) # sched: [15:1.00] +; SKX-NEXT: fdiv %st(2), %st # sched: [20:1.00] ; SKX-NEXT: fdivs (%ecx) # sched: [22:1.00] ; SKX-NEXT: fdivl (%eax) # sched: [22:1.00] ; SKX-NEXT: #NO_APP @@ -1600,8 +1600,8 @@ define void @test_fdiv(float *%a0, double *%a1) optsize { ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; BDVER2-NEXT: #APP -; BDVER2-NEXT: fdiv %st(0), %st(1) # sched: [9:9.50] -; BDVER2-NEXT: fdiv %st(2) # sched: [9:9.50] +; BDVER2-NEXT: fdiv %st, %st(1) # sched: [9:9.50] +; BDVER2-NEXT: fdiv %st(2), %st # sched: [9:9.50] ; BDVER2-NEXT: fdivs (%ecx) # sched: [14:9.50] ; BDVER2-NEXT: fdivl (%eax) # sched: [14:9.50] ; BDVER2-NEXT: #NO_APP @@ -1612,8 +1612,8 @@ define void @test_fdiv(float *%a0, double *%a1) optsize { ; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00] ; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:1.00] ; BTVER2-NEXT: #APP -; BTVER2-NEXT: fdiv %st(0), %st(1) # sched: [19:19.00] -; BTVER2-NEXT: fdiv %st(2) # sched: [19:19.00] +; BTVER2-NEXT: fdiv %st, %st(1) # sched: [19:19.00] +; BTVER2-NEXT: fdiv %st(2), %st # sched: [19:19.00] ; BTVER2-NEXT: fdivs (%ecx) # sched: [24:19.00] ; BTVER2-NEXT: fdivl (%eax) # sched: [24:19.00] ; BTVER2-NEXT: #NO_APP @@ -1624,8 +1624,8 @@ define void @test_fdiv(float *%a0, double *%a1) optsize { ; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50] ; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50] ; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fdiv %st(0), %st(1) # sched: [15:1.00] -; ZNVER1-NEXT: fdiv %st(2) # sched: [15:1.00] +; ZNVER1-NEXT: fdiv %st, %st(1) # sched: [15:1.00] +; ZNVER1-NEXT: fdiv %st(2), %st # sched: [15:1.00] ; ZNVER1-NEXT: fdivs (%ecx) # sched: [22:1.00] ; ZNVER1-NEXT: fdivl (%eax) # sched: [22:1.00] ; ZNVER1-NEXT: #NO_APP @@ -1640,8 +1640,8 @@ define void @test_fdivp_fidiv(i16 *%a0, i32 *%a1) optsize { ; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax ; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx ; GENERIC-NEXT: #APP -; GENERIC-NEXT: fdivp %st(1) -; GENERIC-NEXT: fdivp %st(2) +; GENERIC-NEXT: fdivp %st, %st(1) +; GENERIC-NEXT: fdivp %st, %st(2) ; GENERIC-NEXT: fidivs (%ecx) ; GENERIC-NEXT: fidivl (%eax) ; GENERIC-NEXT: #NO_APP @@ -1652,8 +1652,8 @@ define void @test_fdivp_fidiv(i16 *%a0, i32 *%a1) optsize { ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00] ; ATOM-NEXT: #APP -; ATOM-NEXT: fdivp %st(1) # sched: [34:17.00] -; ATOM-NEXT: fdivp %st(2) # sched: [34:17.00] +; ATOM-NEXT: fdivp %st, %st(1) # sched: [34:17.00] +; ATOM-NEXT: fdivp %st, %st(2) # sched: [34:17.00] ; ATOM-NEXT: fidivs (%ecx) # sched: [34:17.00] ; ATOM-NEXT: fidivl (%eax) # sched: [34:17.00] ; ATOM-NEXT: #NO_APP @@ -1664,8 +1664,8 @@ define void @test_fdivp_fidiv(i16 *%a0, i32 *%a1) optsize { ; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] ; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] ; SLM-NEXT: #APP -; SLM-NEXT: fdivp %st(1) # sched: [19:17.00] -; SLM-NEXT: fdivp %st(2) # sched: [19:17.00] +; SLM-NEXT: fdivp %st, %st(1) # sched: [19:17.00] +; SLM-NEXT: fdivp %st, %st(2) # sched: [19:17.00] ; SLM-NEXT: fidivs (%ecx) # sched: [22:17.00] ; SLM-NEXT: fidivl (%eax) # sched: [22:17.00] ; SLM-NEXT: #NO_APP @@ -1676,8 +1676,8 @@ define void @test_fdivp_fidiv(i16 *%a0, i32 *%a1) optsize { ; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; SANDY-NEXT: #APP -; SANDY-NEXT: fdivp %st(1) # sched: [14:14.00] -; SANDY-NEXT: fdivp %st(2) # sched: [14:14.00] +; SANDY-NEXT: fdivp %st, %st(1) # sched: [14:14.00] +; SANDY-NEXT: fdivp %st, %st(2) # sched: [14:14.00] ; SANDY-NEXT: fidivs (%ecx) # sched: [34:1.00] ; SANDY-NEXT: fidivl (%eax) # sched: [34:1.00] ; SANDY-NEXT: #NO_APP @@ -1688,8 +1688,8 @@ define void @test_fdivp_fidiv(i16 *%a0, i32 *%a1) optsize { ; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; HASWELL-NEXT: #APP -; HASWELL-NEXT: fdivp %st(1) # sched: [24:1.00] -; HASWELL-NEXT: fdivp %st(2) # sched: [24:1.00] +; HASWELL-NEXT: fdivp %st, %st(1) # sched: [24:1.00] +; HASWELL-NEXT: fdivp %st, %st(2) # sched: [24:1.00] ; HASWELL-NEXT: fidivs (%ecx) # sched: [34:1.00] ; HASWELL-NEXT: fidivl (%eax) # sched: [34:1.00] ; HASWELL-NEXT: #NO_APP @@ -1700,8 +1700,8 @@ define void @test_fdivp_fidiv(i16 *%a0, i32 *%a1) optsize { ; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fdivp %st(1) # sched: [15:1.00] -; BROADWELL-NEXT: fdivp %st(2) # sched: [15:1.00] +; BROADWELL-NEXT: fdivp %st, %st(1) # sched: [15:1.00] +; BROADWELL-NEXT: fdivp %st, %st(2) # sched: [15:1.00] ; BROADWELL-NEXT: fidivs (%ecx) # sched: [24:1.00] ; BROADWELL-NEXT: fidivl (%eax) # sched: [24:1.00] ; BROADWELL-NEXT: #NO_APP @@ -1712,8 +1712,8 @@ define void @test_fdivp_fidiv(i16 *%a0, i32 *%a1) optsize { ; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fdivp %st(1) # sched: [15:1.00] -; SKYLAKE-NEXT: fdivp %st(2) # sched: [15:1.00] +; SKYLAKE-NEXT: fdivp %st, %st(1) # sched: [15:1.00] +; SKYLAKE-NEXT: fdivp %st, %st(2) # sched: [15:1.00] ; SKYLAKE-NEXT: fidivs (%ecx) # sched: [25:1.00] ; SKYLAKE-NEXT: fidivl (%eax) # sched: [25:1.00] ; SKYLAKE-NEXT: #NO_APP @@ -1724,8 +1724,8 @@ define void @test_fdivp_fidiv(i16 *%a0, i32 *%a1) optsize { ; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; SKX-NEXT: #APP -; SKX-NEXT: fdivp %st(1) # sched: [15:1.00] -; SKX-NEXT: fdivp %st(2) # sched: [15:1.00] +; SKX-NEXT: fdivp %st, %st(1) # sched: [15:1.00] +; SKX-NEXT: fdivp %st, %st(2) # sched: [15:1.00] ; SKX-NEXT: fidivs (%ecx) # sched: [25:1.00] ; SKX-NEXT: fidivl (%eax) # sched: [25:1.00] ; SKX-NEXT: #NO_APP @@ -1736,8 +1736,8 @@ define void @test_fdivp_fidiv(i16 *%a0, i32 *%a1) optsize { ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; BDVER2-NEXT: #APP -; BDVER2-NEXT: fdivp %st(1) # sched: [9:9.50] -; BDVER2-NEXT: fdivp %st(2) # sched: [9:9.50] +; BDVER2-NEXT: fdivp %st, %st(1) # sched: [9:9.50] +; BDVER2-NEXT: fdivp %st, %st(2) # sched: [9:9.50] ; BDVER2-NEXT: fidivs (%ecx) # sched: [14:9.50] ; BDVER2-NEXT: fidivl (%eax) # sched: [14:9.50] ; BDVER2-NEXT: #NO_APP @@ -1748,8 +1748,8 @@ define void @test_fdivp_fidiv(i16 *%a0, i32 *%a1) optsize { ; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00] ; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:1.00] ; BTVER2-NEXT: #APP -; BTVER2-NEXT: fdivp %st(1) # sched: [19:19.00] -; BTVER2-NEXT: fdivp %st(2) # sched: [19:19.00] +; BTVER2-NEXT: fdivp %st, %st(1) # sched: [19:19.00] +; BTVER2-NEXT: fdivp %st, %st(2) # sched: [19:19.00] ; BTVER2-NEXT: fidivs (%ecx) # sched: [24:19.00] ; BTVER2-NEXT: fidivl (%eax) # sched: [24:19.00] ; BTVER2-NEXT: #NO_APP @@ -1760,8 +1760,8 @@ define void @test_fdivp_fidiv(i16 *%a0, i32 *%a1) optsize { ; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50] ; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50] ; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fdivp %st(1) # sched: [15:1.00] -; ZNVER1-NEXT: fdivp %st(2) # sched: [15:1.00] +; ZNVER1-NEXT: fdivp %st, %st(1) # sched: [15:1.00] +; ZNVER1-NEXT: fdivp %st, %st(2) # sched: [15:1.00] ; ZNVER1-NEXT: fidivs (%ecx) # sched: [22:1.00] ; ZNVER1-NEXT: fidivl (%eax) # sched: [22:1.00] ; ZNVER1-NEXT: #NO_APP @@ -1776,8 +1776,8 @@ define void @test_fdivr(float *%a0, double *%a1) optsize { ; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax ; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx ; GENERIC-NEXT: #APP -; GENERIC-NEXT: fdivr %st(0), %st(1) -; GENERIC-NEXT: fdivr %st(2) +; GENERIC-NEXT: fdivr %st, %st(1) +; GENERIC-NEXT: fdivr %st(2), %st ; GENERIC-NEXT: fdivrs (%ecx) ; GENERIC-NEXT: fdivrl (%eax) ; GENERIC-NEXT: #NO_APP @@ -1788,8 +1788,8 @@ define void @test_fdivr(float *%a0, double *%a1) optsize { ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00] ; ATOM-NEXT: #APP -; ATOM-NEXT: fdivr %st(0), %st(1) # sched: [34:17.00] -; ATOM-NEXT: fdivr %st(2) # sched: [34:17.00] +; ATOM-NEXT: fdivr %st, %st(1) # sched: [34:17.00] +; ATOM-NEXT: fdivr %st(2), %st # sched: [34:17.00] ; ATOM-NEXT: fdivrs (%ecx) # sched: [34:17.00] ; ATOM-NEXT: fdivrl (%eax) # sched: [34:17.00] ; ATOM-NEXT: #NO_APP @@ -1800,8 +1800,8 @@ define void @test_fdivr(float *%a0, double *%a1) optsize { ; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] ; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] ; SLM-NEXT: #APP -; SLM-NEXT: fdivr %st(0), %st(1) # sched: [19:17.00] -; SLM-NEXT: fdivr %st(2) # sched: [19:17.00] +; SLM-NEXT: fdivr %st, %st(1) # sched: [19:17.00] +; SLM-NEXT: fdivr %st(2), %st # sched: [19:17.00] ; SLM-NEXT: fdivrs (%ecx) # sched: [22:17.00] ; SLM-NEXT: fdivrl (%eax) # sched: [22:17.00] ; SLM-NEXT: #NO_APP @@ -1812,8 +1812,8 @@ define void @test_fdivr(float *%a0, double *%a1) optsize { ; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; SANDY-NEXT: #APP -; SANDY-NEXT: fdivr %st(0), %st(1) # sched: [14:14.00] -; SANDY-NEXT: fdivr %st(2) # sched: [14:14.00] +; SANDY-NEXT: fdivr %st, %st(1) # sched: [14:14.00] +; SANDY-NEXT: fdivr %st(2), %st # sched: [14:14.00] ; SANDY-NEXT: fdivrs (%ecx) # sched: [31:1.00] ; SANDY-NEXT: fdivrl (%eax) # sched: [31:1.00] ; SANDY-NEXT: #NO_APP @@ -1824,8 +1824,8 @@ define void @test_fdivr(float *%a0, double *%a1) optsize { ; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; HASWELL-NEXT: #APP -; HASWELL-NEXT: fdivr %st(0), %st(1) # sched: [20:1.00] -; HASWELL-NEXT: fdivr %st(2) # sched: [24:1.00] +; HASWELL-NEXT: fdivr %st, %st(1) # sched: [20:1.00] +; HASWELL-NEXT: fdivr %st(2), %st # sched: [24:1.00] ; HASWELL-NEXT: fdivrs (%ecx) # sched: [27:1.00] ; HASWELL-NEXT: fdivrl (%eax) # sched: [27:1.00] ; HASWELL-NEXT: #NO_APP @@ -1836,8 +1836,8 @@ define void @test_fdivr(float *%a0, double *%a1) optsize { ; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fdivr %st(0), %st(1) # sched: [20:1.00] -; BROADWELL-NEXT: fdivr %st(2) # sched: [15:1.00] +; BROADWELL-NEXT: fdivr %st, %st(1) # sched: [20:1.00] +; BROADWELL-NEXT: fdivr %st(2), %st # sched: [15:1.00] ; BROADWELL-NEXT: fdivrs (%ecx) # sched: [26:1.00] ; BROADWELL-NEXT: fdivrl (%eax) # sched: [26:1.00] ; BROADWELL-NEXT: #NO_APP @@ -1848,8 +1848,8 @@ define void @test_fdivr(float *%a0, double *%a1) optsize { ; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fdivr %st(0), %st(1) # sched: [20:1.00] -; SKYLAKE-NEXT: fdivr %st(2) # sched: [15:1.00] +; SKYLAKE-NEXT: fdivr %st, %st(1) # sched: [20:1.00] +; SKYLAKE-NEXT: fdivr %st(2), %st # sched: [15:1.00] ; SKYLAKE-NEXT: fdivrs (%ecx) # sched: [27:1.00] ; SKYLAKE-NEXT: fdivrl (%eax) # sched: [27:1.00] ; SKYLAKE-NEXT: #NO_APP @@ -1860,8 +1860,8 @@ define void @test_fdivr(float *%a0, double *%a1) optsize { ; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; SKX-NEXT: #APP -; SKX-NEXT: fdivr %st(0), %st(1) # sched: [20:1.00] -; SKX-NEXT: fdivr %st(2) # sched: [15:1.00] +; SKX-NEXT: fdivr %st, %st(1) # sched: [20:1.00] +; SKX-NEXT: fdivr %st(2), %st # sched: [15:1.00] ; SKX-NEXT: fdivrs (%ecx) # sched: [27:1.00] ; SKX-NEXT: fdivrl (%eax) # sched: [27:1.00] ; SKX-NEXT: #NO_APP @@ -1872,8 +1872,8 @@ define void @test_fdivr(float *%a0, double *%a1) optsize { ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; BDVER2-NEXT: #APP -; BDVER2-NEXT: fdivr %st(0), %st(1) # sched: [9:9.50] -; BDVER2-NEXT: fdivr %st(2) # sched: [9:9.50] +; BDVER2-NEXT: fdivr %st, %st(1) # sched: [9:9.50] +; BDVER2-NEXT: fdivr %st(2), %st # sched: [9:9.50] ; BDVER2-NEXT: fdivrs (%ecx) # sched: [14:9.50] ; BDVER2-NEXT: fdivrl (%eax) # sched: [14:9.50] ; BDVER2-NEXT: #NO_APP @@ -1884,8 +1884,8 @@ define void @test_fdivr(float *%a0, double *%a1) optsize { ; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00] ; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:1.00] ; BTVER2-NEXT: #APP -; BTVER2-NEXT: fdivr %st(0), %st(1) # sched: [19:19.00] -; BTVER2-NEXT: fdivr %st(2) # sched: [19:19.00] +; BTVER2-NEXT: fdivr %st, %st(1) # sched: [19:19.00] +; BTVER2-NEXT: fdivr %st(2), %st # sched: [19:19.00] ; BTVER2-NEXT: fdivrs (%ecx) # sched: [24:19.00] ; BTVER2-NEXT: fdivrl (%eax) # sched: [24:19.00] ; BTVER2-NEXT: #NO_APP @@ -1896,8 +1896,8 @@ define void @test_fdivr(float *%a0, double *%a1) optsize { ; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50] ; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50] ; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fdivr %st(0), %st(1) # sched: [15:1.00] -; ZNVER1-NEXT: fdivr %st(2) # sched: [15:1.00] +; ZNVER1-NEXT: fdivr %st, %st(1) # sched: [15:1.00] +; ZNVER1-NEXT: fdivr %st(2), %st # sched: [15:1.00] ; ZNVER1-NEXT: fdivrs (%ecx) # sched: [22:1.00] ; ZNVER1-NEXT: fdivrl (%eax) # sched: [22:1.00] ; ZNVER1-NEXT: #NO_APP @@ -1912,8 +1912,8 @@ define void @test_fdivrp_fidivr(i16 *%a0, i32 *%a1) optsize { ; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax ; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx ; GENERIC-NEXT: #APP -; GENERIC-NEXT: fdivrp %st(1) -; GENERIC-NEXT: fdivrp %st(2) +; GENERIC-NEXT: fdivrp %st, %st(1) +; GENERIC-NEXT: fdivrp %st, %st(2) ; GENERIC-NEXT: fidivrs (%ecx) ; GENERIC-NEXT: fidivrl (%eax) ; GENERIC-NEXT: #NO_APP @@ -1924,8 +1924,8 @@ define void @test_fdivrp_fidivr(i16 *%a0, i32 *%a1) optsize { ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00] ; ATOM-NEXT: #APP -; ATOM-NEXT: fdivrp %st(1) # sched: [34:17.00] -; ATOM-NEXT: fdivrp %st(2) # sched: [34:17.00] +; ATOM-NEXT: fdivrp %st, %st(1) # sched: [34:17.00] +; ATOM-NEXT: fdivrp %st, %st(2) # sched: [34:17.00] ; ATOM-NEXT: fidivrs (%ecx) # sched: [34:17.00] ; ATOM-NEXT: fidivrl (%eax) # sched: [34:17.00] ; ATOM-NEXT: #NO_APP @@ -1936,8 +1936,8 @@ define void @test_fdivrp_fidivr(i16 *%a0, i32 *%a1) optsize { ; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] ; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] ; SLM-NEXT: #APP -; SLM-NEXT: fdivrp %st(1) # sched: [19:17.00] -; SLM-NEXT: fdivrp %st(2) # sched: [19:17.00] +; SLM-NEXT: fdivrp %st, %st(1) # sched: [19:17.00] +; SLM-NEXT: fdivrp %st, %st(2) # sched: [19:17.00] ; SLM-NEXT: fidivrs (%ecx) # sched: [22:17.00] ; SLM-NEXT: fidivrl (%eax) # sched: [22:17.00] ; SLM-NEXT: #NO_APP @@ -1948,8 +1948,8 @@ define void @test_fdivrp_fidivr(i16 *%a0, i32 *%a1) optsize { ; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; SANDY-NEXT: #APP -; SANDY-NEXT: fdivrp %st(1) # sched: [14:14.00] -; SANDY-NEXT: fdivrp %st(2) # sched: [14:14.00] +; SANDY-NEXT: fdivrp %st, %st(1) # sched: [14:14.00] +; SANDY-NEXT: fdivrp %st, %st(2) # sched: [14:14.00] ; SANDY-NEXT: fidivrs (%ecx) # sched: [34:1.00] ; SANDY-NEXT: fidivrl (%eax) # sched: [34:1.00] ; SANDY-NEXT: #NO_APP @@ -1960,8 +1960,8 @@ define void @test_fdivrp_fidivr(i16 *%a0, i32 *%a1) optsize { ; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; HASWELL-NEXT: #APP -; HASWELL-NEXT: fdivrp %st(1) # sched: [20:1.00] -; HASWELL-NEXT: fdivrp %st(2) # sched: [20:1.00] +; HASWELL-NEXT: fdivrp %st, %st(1) # sched: [20:1.00] +; HASWELL-NEXT: fdivrp %st, %st(2) # sched: [20:1.00] ; HASWELL-NEXT: fidivrs (%ecx) # sched: [30:1.00] ; HASWELL-NEXT: fidivrl (%eax) # sched: [30:1.00] ; HASWELL-NEXT: #NO_APP @@ -1972,8 +1972,8 @@ define void @test_fdivrp_fidivr(i16 *%a0, i32 *%a1) optsize { ; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fdivrp %st(1) # sched: [20:1.00] -; BROADWELL-NEXT: fdivrp %st(2) # sched: [20:1.00] +; BROADWELL-NEXT: fdivrp %st, %st(1) # sched: [20:1.00] +; BROADWELL-NEXT: fdivrp %st, %st(2) # sched: [20:1.00] ; BROADWELL-NEXT: fidivrs (%ecx) # sched: [29:1.00] ; BROADWELL-NEXT: fidivrl (%eax) # sched: [29:1.00] ; BROADWELL-NEXT: #NO_APP @@ -1984,8 +1984,8 @@ define void @test_fdivrp_fidivr(i16 *%a0, i32 *%a1) optsize { ; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fdivrp %st(1) # sched: [20:1.00] -; SKYLAKE-NEXT: fdivrp %st(2) # sched: [20:1.00] +; SKYLAKE-NEXT: fdivrp %st, %st(1) # sched: [20:1.00] +; SKYLAKE-NEXT: fdivrp %st, %st(2) # sched: [20:1.00] ; SKYLAKE-NEXT: fidivrs (%ecx) # sched: [30:1.00] ; SKYLAKE-NEXT: fidivrl (%eax) # sched: [30:1.00] ; SKYLAKE-NEXT: #NO_APP @@ -1996,8 +1996,8 @@ define void @test_fdivrp_fidivr(i16 *%a0, i32 *%a1) optsize { ; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; SKX-NEXT: #APP -; SKX-NEXT: fdivrp %st(1) # sched: [20:1.00] -; SKX-NEXT: fdivrp %st(2) # sched: [20:1.00] +; SKX-NEXT: fdivrp %st, %st(1) # sched: [20:1.00] +; SKX-NEXT: fdivrp %st, %st(2) # sched: [20:1.00] ; SKX-NEXT: fidivrs (%ecx) # sched: [30:1.00] ; SKX-NEXT: fidivrl (%eax) # sched: [30:1.00] ; SKX-NEXT: #NO_APP @@ -2008,8 +2008,8 @@ define void @test_fdivrp_fidivr(i16 *%a0, i32 *%a1) optsize { ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; BDVER2-NEXT: #APP -; BDVER2-NEXT: fdivrp %st(1) # sched: [9:9.50] -; BDVER2-NEXT: fdivrp %st(2) # sched: [9:9.50] +; BDVER2-NEXT: fdivrp %st, %st(1) # sched: [9:9.50] +; BDVER2-NEXT: fdivrp %st, %st(2) # sched: [9:9.50] ; BDVER2-NEXT: fidivrs (%ecx) # sched: [14:9.50] ; BDVER2-NEXT: fidivrl (%eax) # sched: [14:9.50] ; BDVER2-NEXT: #NO_APP @@ -2020,8 +2020,8 @@ define void @test_fdivrp_fidivr(i16 *%a0, i32 *%a1) optsize { ; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00] ; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:1.00] ; BTVER2-NEXT: #APP -; BTVER2-NEXT: fdivrp %st(1) # sched: [19:19.00] -; BTVER2-NEXT: fdivrp %st(2) # sched: [19:19.00] +; BTVER2-NEXT: fdivrp %st, %st(1) # sched: [19:19.00] +; BTVER2-NEXT: fdivrp %st, %st(2) # sched: [19:19.00] ; BTVER2-NEXT: fidivrs (%ecx) # sched: [24:19.00] ; BTVER2-NEXT: fidivrl (%eax) # sched: [24:19.00] ; BTVER2-NEXT: #NO_APP @@ -2032,8 +2032,8 @@ define void @test_fdivrp_fidivr(i16 *%a0, i32 *%a1) optsize { ; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50] ; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50] ; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fdivrp %st(1) # sched: [15:1.00] -; ZNVER1-NEXT: fdivrp %st(2) # sched: [15:1.00] +; ZNVER1-NEXT: fdivrp %st, %st(1) # sched: [15:1.00] +; ZNVER1-NEXT: fdivrp %st, %st(2) # sched: [15:1.00] ; ZNVER1-NEXT: fidivrs (%ecx) # sched: [22:1.00] ; ZNVER1-NEXT: fidivrl (%eax) # sched: [22:1.00] ; ZNVER1-NEXT: #NO_APP @@ -3243,8 +3243,8 @@ define void @test_fmul(float *%a0, double *%a1) optsize { ; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax ; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx ; GENERIC-NEXT: #APP -; GENERIC-NEXT: fmul %st(0), %st(1) -; GENERIC-NEXT: fmul %st(2) +; GENERIC-NEXT: fmul %st, %st(1) +; GENERIC-NEXT: fmul %st(2), %st ; GENERIC-NEXT: fmuls (%ecx) ; GENERIC-NEXT: fmull (%eax) ; GENERIC-NEXT: #NO_APP @@ -3255,8 +3255,8 @@ define void @test_fmul(float *%a0, double *%a1) optsize { ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00] ; ATOM-NEXT: #APP -; ATOM-NEXT: fmul %st(0), %st(1) # sched: [4:4.00] -; ATOM-NEXT: fmul %st(2) # sched: [4:4.00] +; ATOM-NEXT: fmul %st, %st(1) # sched: [4:4.00] +; ATOM-NEXT: fmul %st(2), %st # sched: [4:4.00] ; ATOM-NEXT: fmuls (%ecx) # sched: [4:4.00] ; ATOM-NEXT: fmull (%eax) # sched: [4:4.00] ; ATOM-NEXT: #NO_APP @@ -3267,8 +3267,8 @@ define void @test_fmul(float *%a0, double *%a1) optsize { ; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] ; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] ; SLM-NEXT: #APP -; SLM-NEXT: fmul %st(0), %st(1) # sched: [5:2.00] -; SLM-NEXT: fmul %st(2) # sched: [5:2.00] +; SLM-NEXT: fmul %st, %st(1) # sched: [5:2.00] +; SLM-NEXT: fmul %st(2), %st # sched: [5:2.00] ; SLM-NEXT: fmuls (%ecx) # sched: [8:2.00] ; SLM-NEXT: fmull (%eax) # sched: [8:2.00] ; SLM-NEXT: #NO_APP @@ -3279,8 +3279,8 @@ define void @test_fmul(float *%a0, double *%a1) optsize { ; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; SANDY-NEXT: #APP -; SANDY-NEXT: fmul %st(0), %st(1) # sched: [5:1.00] -; SANDY-NEXT: fmul %st(2) # sched: [5:1.00] +; SANDY-NEXT: fmul %st, %st(1) # sched: [5:1.00] +; SANDY-NEXT: fmul %st(2), %st # sched: [5:1.00] ; SANDY-NEXT: fmuls (%ecx) # sched: [12:1.00] ; SANDY-NEXT: fmull (%eax) # sched: [12:1.00] ; SANDY-NEXT: #NO_APP @@ -3291,8 +3291,8 @@ define void @test_fmul(float *%a0, double *%a1) optsize { ; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; HASWELL-NEXT: #APP -; HASWELL-NEXT: fmul %st(0), %st(1) # sched: [5:1.00] -; HASWELL-NEXT: fmul %st(2) # sched: [5:1.00] +; HASWELL-NEXT: fmul %st, %st(1) # sched: [5:1.00] +; HASWELL-NEXT: fmul %st(2), %st # sched: [5:1.00] ; HASWELL-NEXT: fmuls (%ecx) # sched: [12:1.00] ; HASWELL-NEXT: fmull (%eax) # sched: [12:1.00] ; HASWELL-NEXT: #NO_APP @@ -3303,8 +3303,8 @@ define void @test_fmul(float *%a0, double *%a1) optsize { ; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fmul %st(0), %st(1) # sched: [5:1.00] -; BROADWELL-NEXT: fmul %st(2) # sched: [5:1.00] +; BROADWELL-NEXT: fmul %st, %st(1) # sched: [5:1.00] +; BROADWELL-NEXT: fmul %st(2), %st # sched: [5:1.00] ; BROADWELL-NEXT: fmuls (%ecx) # sched: [11:1.00] ; BROADWELL-NEXT: fmull (%eax) # sched: [11:1.00] ; BROADWELL-NEXT: #NO_APP @@ -3315,8 +3315,8 @@ define void @test_fmul(float *%a0, double *%a1) optsize { ; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fmul %st(0), %st(1) # sched: [4:1.00] -; SKYLAKE-NEXT: fmul %st(2) # sched: [4:1.00] +; SKYLAKE-NEXT: fmul %st, %st(1) # sched: [4:1.00] +; SKYLAKE-NEXT: fmul %st(2), %st # sched: [4:1.00] ; SKYLAKE-NEXT: fmuls (%ecx) # sched: [11:1.00] ; SKYLAKE-NEXT: fmull (%eax) # sched: [11:1.00] ; SKYLAKE-NEXT: #NO_APP @@ -3327,8 +3327,8 @@ define void @test_fmul(float *%a0, double *%a1) optsize { ; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; SKX-NEXT: #APP -; SKX-NEXT: fmul %st(0), %st(1) # sched: [4:1.00] -; SKX-NEXT: fmul %st(2) # sched: [4:1.00] +; SKX-NEXT: fmul %st, %st(1) # sched: [4:1.00] +; SKX-NEXT: fmul %st(2), %st # sched: [4:1.00] ; SKX-NEXT: fmuls (%ecx) # sched: [11:1.00] ; SKX-NEXT: fmull (%eax) # sched: [11:1.00] ; SKX-NEXT: #NO_APP @@ -3339,8 +3339,8 @@ define void @test_fmul(float *%a0, double *%a1) optsize { ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; BDVER2-NEXT: #APP -; BDVER2-NEXT: fmul %st(0), %st(1) # sched: [5:1.00] -; BDVER2-NEXT: fmul %st(2) # sched: [5:1.00] +; BDVER2-NEXT: fmul %st, %st(1) # sched: [5:1.00] +; BDVER2-NEXT: fmul %st(2), %st # sched: [5:1.00] ; BDVER2-NEXT: fmuls (%ecx) # sched: [10:1.00] ; BDVER2-NEXT: fmull (%eax) # sched: [10:1.00] ; BDVER2-NEXT: #NO_APP @@ -3351,8 +3351,8 @@ define void @test_fmul(float *%a0, double *%a1) optsize { ; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00] ; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:1.00] ; BTVER2-NEXT: #APP -; BTVER2-NEXT: fmul %st(0), %st(1) # sched: [2:1.00] -; BTVER2-NEXT: fmul %st(2) # sched: [2:1.00] +; BTVER2-NEXT: fmul %st, %st(1) # sched: [2:1.00] +; BTVER2-NEXT: fmul %st(2), %st # sched: [2:1.00] ; BTVER2-NEXT: fmuls (%ecx) # sched: [7:1.00] ; BTVER2-NEXT: fmull (%eax) # sched: [7:1.00] ; BTVER2-NEXT: #NO_APP @@ -3363,8 +3363,8 @@ define void @test_fmul(float *%a0, double *%a1) optsize { ; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50] ; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50] ; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fmul %st(0), %st(1) # sched: [3:0.50] -; ZNVER1-NEXT: fmul %st(2) # sched: [3:0.50] +; ZNVER1-NEXT: fmul %st, %st(1) # sched: [3:0.50] +; ZNVER1-NEXT: fmul %st(2), %st # sched: [3:0.50] ; ZNVER1-NEXT: fmuls (%ecx) # sched: [10:0.50] ; ZNVER1-NEXT: fmull (%eax) # sched: [10:0.50] ; ZNVER1-NEXT: #NO_APP @@ -3379,8 +3379,8 @@ define void @test_fmulp_fimul(i16 *%a0, i32 *%a1) optsize { ; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax ; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx ; GENERIC-NEXT: #APP -; GENERIC-NEXT: fmulp %st(1) -; GENERIC-NEXT: fmulp %st(2) +; GENERIC-NEXT: fmulp %st, %st(1) +; GENERIC-NEXT: fmulp %st, %st(2) ; GENERIC-NEXT: fimuls (%ecx) ; GENERIC-NEXT: fimull (%eax) ; GENERIC-NEXT: #NO_APP @@ -3391,8 +3391,8 @@ define void @test_fmulp_fimul(i16 *%a0, i32 *%a1) optsize { ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00] ; ATOM-NEXT: #APP -; ATOM-NEXT: fmulp %st(1) # sched: [4:4.00] -; ATOM-NEXT: fmulp %st(2) # sched: [4:4.00] +; ATOM-NEXT: fmulp %st, %st(1) # sched: [4:4.00] +; ATOM-NEXT: fmulp %st, %st(2) # sched: [4:4.00] ; ATOM-NEXT: fimuls (%ecx) # sched: [4:4.00] ; ATOM-NEXT: fimull (%eax) # sched: [4:4.00] ; ATOM-NEXT: #NO_APP @@ -3403,8 +3403,8 @@ define void @test_fmulp_fimul(i16 *%a0, i32 *%a1) optsize { ; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] ; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] ; SLM-NEXT: #APP -; SLM-NEXT: fmulp %st(1) # sched: [5:2.00] -; SLM-NEXT: fmulp %st(2) # sched: [5:2.00] +; SLM-NEXT: fmulp %st, %st(1) # sched: [5:2.00] +; SLM-NEXT: fmulp %st, %st(2) # sched: [5:2.00] ; SLM-NEXT: fimuls (%ecx) # sched: [8:2.00] ; SLM-NEXT: fimull (%eax) # sched: [8:2.00] ; SLM-NEXT: #NO_APP @@ -3415,8 +3415,8 @@ define void @test_fmulp_fimul(i16 *%a0, i32 *%a1) optsize { ; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; SANDY-NEXT: #APP -; SANDY-NEXT: fmulp %st(1) # sched: [5:1.00] -; SANDY-NEXT: fmulp %st(2) # sched: [5:1.00] +; SANDY-NEXT: fmulp %st, %st(1) # sched: [5:1.00] +; SANDY-NEXT: fmulp %st, %st(2) # sched: [5:1.00] ; SANDY-NEXT: fimuls (%ecx) # sched: [15:1.00] ; SANDY-NEXT: fimull (%eax) # sched: [15:1.00] ; SANDY-NEXT: #NO_APP @@ -3427,8 +3427,8 @@ define void @test_fmulp_fimul(i16 *%a0, i32 *%a1) optsize { ; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; HASWELL-NEXT: #APP -; HASWELL-NEXT: fmulp %st(1) # sched: [5:1.00] -; HASWELL-NEXT: fmulp %st(2) # sched: [5:1.00] +; HASWELL-NEXT: fmulp %st, %st(1) # sched: [5:1.00] +; HASWELL-NEXT: fmulp %st, %st(2) # sched: [5:1.00] ; HASWELL-NEXT: fimuls (%ecx) # sched: [15:1.00] ; HASWELL-NEXT: fimull (%eax) # sched: [15:1.00] ; HASWELL-NEXT: #NO_APP @@ -3439,8 +3439,8 @@ define void @test_fmulp_fimul(i16 *%a0, i32 *%a1) optsize { ; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fmulp %st(1) # sched: [5:1.00] -; BROADWELL-NEXT: fmulp %st(2) # sched: [5:1.00] +; BROADWELL-NEXT: fmulp %st, %st(1) # sched: [5:1.00] +; BROADWELL-NEXT: fmulp %st, %st(2) # sched: [5:1.00] ; BROADWELL-NEXT: fimuls (%ecx) # sched: [14:1.00] ; BROADWELL-NEXT: fimull (%eax) # sched: [14:1.00] ; BROADWELL-NEXT: #NO_APP @@ -3451,8 +3451,8 @@ define void @test_fmulp_fimul(i16 *%a0, i32 *%a1) optsize { ; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fmulp %st(1) # sched: [4:1.00] -; SKYLAKE-NEXT: fmulp %st(2) # sched: [4:1.00] +; SKYLAKE-NEXT: fmulp %st, %st(1) # sched: [4:1.00] +; SKYLAKE-NEXT: fmulp %st, %st(2) # sched: [4:1.00] ; SKYLAKE-NEXT: fimuls (%ecx) # sched: [14:1.00] ; SKYLAKE-NEXT: fimull (%eax) # sched: [14:1.00] ; SKYLAKE-NEXT: #NO_APP @@ -3463,8 +3463,8 @@ define void @test_fmulp_fimul(i16 *%a0, i32 *%a1) optsize { ; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; SKX-NEXT: #APP -; SKX-NEXT: fmulp %st(1) # sched: [4:1.00] -; SKX-NEXT: fmulp %st(2) # sched: [4:1.00] +; SKX-NEXT: fmulp %st, %st(1) # sched: [4:1.00] +; SKX-NEXT: fmulp %st, %st(2) # sched: [4:1.00] ; SKX-NEXT: fimuls (%ecx) # sched: [14:1.00] ; SKX-NEXT: fimull (%eax) # sched: [14:1.00] ; SKX-NEXT: #NO_APP @@ -3475,8 +3475,8 @@ define void @test_fmulp_fimul(i16 *%a0, i32 *%a1) optsize { ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; BDVER2-NEXT: #APP -; BDVER2-NEXT: fmulp %st(1) # sched: [5:1.00] -; BDVER2-NEXT: fmulp %st(2) # sched: [5:1.00] +; BDVER2-NEXT: fmulp %st, %st(1) # sched: [5:1.00] +; BDVER2-NEXT: fmulp %st, %st(2) # sched: [5:1.00] ; BDVER2-NEXT: fimuls (%ecx) # sched: [10:1.00] ; BDVER2-NEXT: fimull (%eax) # sched: [10:1.00] ; BDVER2-NEXT: #NO_APP @@ -3487,8 +3487,8 @@ define void @test_fmulp_fimul(i16 *%a0, i32 *%a1) optsize { ; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00] ; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:1.00] ; BTVER2-NEXT: #APP -; BTVER2-NEXT: fmulp %st(1) # sched: [2:1.00] -; BTVER2-NEXT: fmulp %st(2) # sched: [2:1.00] +; BTVER2-NEXT: fmulp %st, %st(1) # sched: [2:1.00] +; BTVER2-NEXT: fmulp %st, %st(2) # sched: [2:1.00] ; BTVER2-NEXT: fimuls (%ecx) # sched: [7:1.00] ; BTVER2-NEXT: fimull (%eax) # sched: [7:1.00] ; BTVER2-NEXT: #NO_APP @@ -3499,8 +3499,8 @@ define void @test_fmulp_fimul(i16 *%a0, i32 *%a1) optsize { ; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50] ; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50] ; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fmulp %st(1) # sched: [3:0.50] -; ZNVER1-NEXT: fmulp %st(2) # sched: [3:0.50] +; ZNVER1-NEXT: fmulp %st, %st(1) # sched: [3:0.50] +; ZNVER1-NEXT: fmulp %st, %st(2) # sched: [3:0.50] ; ZNVER1-NEXT: fimuls (%ecx) # sched: [10:0.50] ; ZNVER1-NEXT: fimull (%eax) # sched: [10:0.50] ; ZNVER1-NEXT: #NO_APP @@ -4983,8 +4983,8 @@ define void @test_fsub(float *%a0, double *%a1) optsize { ; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax ; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx ; GENERIC-NEXT: #APP -; GENERIC-NEXT: fsub %st(0), %st(1) -; GENERIC-NEXT: fsub %st(2) +; GENERIC-NEXT: fsub %st, %st(1) +; GENERIC-NEXT: fsub %st(2), %st ; GENERIC-NEXT: fsubs (%ecx) ; GENERIC-NEXT: fsubl (%eax) ; GENERIC-NEXT: #NO_APP @@ -4995,8 +4995,8 @@ define void @test_fsub(float *%a0, double *%a1) optsize { ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00] ; ATOM-NEXT: #APP -; ATOM-NEXT: fsub %st(0), %st(1) # sched: [5:5.00] -; ATOM-NEXT: fsub %st(2) # sched: [5:5.00] +; ATOM-NEXT: fsub %st, %st(1) # sched: [5:5.00] +; ATOM-NEXT: fsub %st(2), %st # sched: [5:5.00] ; ATOM-NEXT: fsubs (%ecx) # sched: [5:5.00] ; ATOM-NEXT: fsubl (%eax) # sched: [5:5.00] ; ATOM-NEXT: #NO_APP @@ -5007,8 +5007,8 @@ define void @test_fsub(float *%a0, double *%a1) optsize { ; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] ; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] ; SLM-NEXT: #APP -; SLM-NEXT: fsub %st(0), %st(1) # sched: [3:1.00] -; SLM-NEXT: fsub %st(2) # sched: [3:1.00] +; SLM-NEXT: fsub %st, %st(1) # sched: [3:1.00] +; SLM-NEXT: fsub %st(2), %st # sched: [3:1.00] ; SLM-NEXT: fsubs (%ecx) # sched: [6:1.00] ; SLM-NEXT: fsubl (%eax) # sched: [6:1.00] ; SLM-NEXT: #NO_APP @@ -5019,8 +5019,8 @@ define void @test_fsub(float *%a0, double *%a1) optsize { ; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; SANDY-NEXT: #APP -; SANDY-NEXT: fsub %st(0), %st(1) # sched: [3:1.00] -; SANDY-NEXT: fsub %st(2) # sched: [3:1.00] +; SANDY-NEXT: fsub %st, %st(1) # sched: [3:1.00] +; SANDY-NEXT: fsub %st(2), %st # sched: [3:1.00] ; SANDY-NEXT: fsubs (%ecx) # sched: [10:1.00] ; SANDY-NEXT: fsubl (%eax) # sched: [10:1.00] ; SANDY-NEXT: #NO_APP @@ -5031,8 +5031,8 @@ define void @test_fsub(float *%a0, double *%a1) optsize { ; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; HASWELL-NEXT: #APP -; HASWELL-NEXT: fsub %st(0), %st(1) # sched: [3:1.00] -; HASWELL-NEXT: fsub %st(2) # sched: [3:1.00] +; HASWELL-NEXT: fsub %st, %st(1) # sched: [3:1.00] +; HASWELL-NEXT: fsub %st(2), %st # sched: [3:1.00] ; HASWELL-NEXT: fsubs (%ecx) # sched: [10:1.00] ; HASWELL-NEXT: fsubl (%eax) # sched: [10:1.00] ; HASWELL-NEXT: #NO_APP @@ -5043,8 +5043,8 @@ define void @test_fsub(float *%a0, double *%a1) optsize { ; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fsub %st(0), %st(1) # sched: [3:1.00] -; BROADWELL-NEXT: fsub %st(2) # sched: [3:1.00] +; BROADWELL-NEXT: fsub %st, %st(1) # sched: [3:1.00] +; BROADWELL-NEXT: fsub %st(2), %st # sched: [3:1.00] ; BROADWELL-NEXT: fsubs (%ecx) # sched: [9:1.00] ; BROADWELL-NEXT: fsubl (%eax) # sched: [9:1.00] ; BROADWELL-NEXT: #NO_APP @@ -5055,8 +5055,8 @@ define void @test_fsub(float *%a0, double *%a1) optsize { ; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fsub %st(0), %st(1) # sched: [3:1.00] -; SKYLAKE-NEXT: fsub %st(2) # sched: [3:1.00] +; SKYLAKE-NEXT: fsub %st, %st(1) # sched: [3:1.00] +; SKYLAKE-NEXT: fsub %st(2), %st # sched: [3:1.00] ; SKYLAKE-NEXT: fsubs (%ecx) # sched: [10:1.00] ; SKYLAKE-NEXT: fsubl (%eax) # sched: [10:1.00] ; SKYLAKE-NEXT: #NO_APP @@ -5067,8 +5067,8 @@ define void @test_fsub(float *%a0, double *%a1) optsize { ; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; SKX-NEXT: #APP -; SKX-NEXT: fsub %st(0), %st(1) # sched: [3:1.00] -; SKX-NEXT: fsub %st(2) # sched: [3:1.00] +; SKX-NEXT: fsub %st, %st(1) # sched: [3:1.00] +; SKX-NEXT: fsub %st(2), %st # sched: [3:1.00] ; SKX-NEXT: fsubs (%ecx) # sched: [10:1.00] ; SKX-NEXT: fsubl (%eax) # sched: [10:1.00] ; SKX-NEXT: #NO_APP @@ -5079,8 +5079,8 @@ define void @test_fsub(float *%a0, double *%a1) optsize { ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; BDVER2-NEXT: #APP -; BDVER2-NEXT: fsub %st(0), %st(1) # sched: [5:1.00] -; BDVER2-NEXT: fsub %st(2) # sched: [5:1.00] +; BDVER2-NEXT: fsub %st, %st(1) # sched: [5:1.00] +; BDVER2-NEXT: fsub %st(2), %st # sched: [5:1.00] ; BDVER2-NEXT: fsubs (%ecx) # sched: [10:1.00] ; BDVER2-NEXT: fsubl (%eax) # sched: [10:1.00] ; BDVER2-NEXT: #NO_APP @@ -5091,8 +5091,8 @@ define void @test_fsub(float *%a0, double *%a1) optsize { ; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00] ; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:1.00] ; BTVER2-NEXT: #APP -; BTVER2-NEXT: fsub %st(0), %st(1) # sched: [3:1.00] -; BTVER2-NEXT: fsub %st(2) # sched: [3:1.00] +; BTVER2-NEXT: fsub %st, %st(1) # sched: [3:1.00] +; BTVER2-NEXT: fsub %st(2), %st # sched: [3:1.00] ; BTVER2-NEXT: fsubs (%ecx) # sched: [8:1.00] ; BTVER2-NEXT: fsubl (%eax) # sched: [8:1.00] ; BTVER2-NEXT: #NO_APP @@ -5103,8 +5103,8 @@ define void @test_fsub(float *%a0, double *%a1) optsize { ; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50] ; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50] ; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fsub %st(0), %st(1) # sched: [3:1.00] -; ZNVER1-NEXT: fsub %st(2) # sched: [3:1.00] +; ZNVER1-NEXT: fsub %st, %st(1) # sched: [3:1.00] +; ZNVER1-NEXT: fsub %st(2), %st # sched: [3:1.00] ; ZNVER1-NEXT: fsubs (%ecx) # sched: [10:1.00] ; ZNVER1-NEXT: fsubl (%eax) # sched: [10:1.00] ; ZNVER1-NEXT: #NO_APP @@ -5119,8 +5119,8 @@ define void @test_fsubp_fisub(i16 *%a0, i32 *%a1) optsize { ; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax ; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx ; GENERIC-NEXT: #APP -; GENERIC-NEXT: fsubp %st(1) -; GENERIC-NEXT: fsubp %st(2) +; GENERIC-NEXT: fsubp %st, %st(1) +; GENERIC-NEXT: fsubp %st, %st(2) ; GENERIC-NEXT: fisubs (%ecx) ; GENERIC-NEXT: fisubl (%eax) ; GENERIC-NEXT: #NO_APP @@ -5131,8 +5131,8 @@ define void @test_fsubp_fisub(i16 *%a0, i32 *%a1) optsize { ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00] ; ATOM-NEXT: #APP -; ATOM-NEXT: fsubp %st(1) # sched: [5:5.00] -; ATOM-NEXT: fsubp %st(2) # sched: [5:5.00] +; ATOM-NEXT: fsubp %st, %st(1) # sched: [5:5.00] +; ATOM-NEXT: fsubp %st, %st(2) # sched: [5:5.00] ; ATOM-NEXT: fisubs (%ecx) # sched: [5:5.00] ; ATOM-NEXT: fisubl (%eax) # sched: [5:5.00] ; ATOM-NEXT: #NO_APP @@ -5143,8 +5143,8 @@ define void @test_fsubp_fisub(i16 *%a0, i32 *%a1) optsize { ; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] ; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] ; SLM-NEXT: #APP -; SLM-NEXT: fsubp %st(1) # sched: [3:1.00] -; SLM-NEXT: fsubp %st(2) # sched: [3:1.00] +; SLM-NEXT: fsubp %st, %st(1) # sched: [3:1.00] +; SLM-NEXT: fsubp %st, %st(2) # sched: [3:1.00] ; SLM-NEXT: fisubs (%ecx) # sched: [6:1.00] ; SLM-NEXT: fisubl (%eax) # sched: [6:1.00] ; SLM-NEXT: #NO_APP @@ -5155,8 +5155,8 @@ define void @test_fsubp_fisub(i16 *%a0, i32 *%a1) optsize { ; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; SANDY-NEXT: #APP -; SANDY-NEXT: fsubp %st(1) # sched: [3:1.00] -; SANDY-NEXT: fsubp %st(2) # sched: [3:1.00] +; SANDY-NEXT: fsubp %st, %st(1) # sched: [3:1.00] +; SANDY-NEXT: fsubp %st, %st(2) # sched: [3:1.00] ; SANDY-NEXT: fisubs (%ecx) # sched: [13:2.00] ; SANDY-NEXT: fisubl (%eax) # sched: [13:2.00] ; SANDY-NEXT: #NO_APP @@ -5167,8 +5167,8 @@ define void @test_fsubp_fisub(i16 *%a0, i32 *%a1) optsize { ; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; HASWELL-NEXT: #APP -; HASWELL-NEXT: fsubp %st(1) # sched: [3:1.00] -; HASWELL-NEXT: fsubp %st(2) # sched: [3:1.00] +; HASWELL-NEXT: fsubp %st, %st(1) # sched: [3:1.00] +; HASWELL-NEXT: fsubp %st, %st(2) # sched: [3:1.00] ; HASWELL-NEXT: fisubs (%ecx) # sched: [13:2.00] ; HASWELL-NEXT: fisubl (%eax) # sched: [13:2.00] ; HASWELL-NEXT: #NO_APP @@ -5179,8 +5179,8 @@ define void @test_fsubp_fisub(i16 *%a0, i32 *%a1) optsize { ; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fsubp %st(1) # sched: [3:1.00] -; BROADWELL-NEXT: fsubp %st(2) # sched: [3:1.00] +; BROADWELL-NEXT: fsubp %st, %st(1) # sched: [3:1.00] +; BROADWELL-NEXT: fsubp %st, %st(2) # sched: [3:1.00] ; BROADWELL-NEXT: fisubs (%ecx) # sched: [12:2.00] ; BROADWELL-NEXT: fisubl (%eax) # sched: [12:2.00] ; BROADWELL-NEXT: #NO_APP @@ -5191,8 +5191,8 @@ define void @test_fsubp_fisub(i16 *%a0, i32 *%a1) optsize { ; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fsubp %st(1) # sched: [3:1.00] -; SKYLAKE-NEXT: fsubp %st(2) # sched: [3:1.00] +; SKYLAKE-NEXT: fsubp %st, %st(1) # sched: [3:1.00] +; SKYLAKE-NEXT: fsubp %st, %st(2) # sched: [3:1.00] ; SKYLAKE-NEXT: fisubs (%ecx) # sched: [13:2.00] ; SKYLAKE-NEXT: fisubl (%eax) # sched: [13:2.00] ; SKYLAKE-NEXT: #NO_APP @@ -5203,8 +5203,8 @@ define void @test_fsubp_fisub(i16 *%a0, i32 *%a1) optsize { ; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; SKX-NEXT: #APP -; SKX-NEXT: fsubp %st(1) # sched: [3:1.00] -; SKX-NEXT: fsubp %st(2) # sched: [3:1.00] +; SKX-NEXT: fsubp %st, %st(1) # sched: [3:1.00] +; SKX-NEXT: fsubp %st, %st(2) # sched: [3:1.00] ; SKX-NEXT: fisubs (%ecx) # sched: [13:2.00] ; SKX-NEXT: fisubl (%eax) # sched: [13:2.00] ; SKX-NEXT: #NO_APP @@ -5215,8 +5215,8 @@ define void @test_fsubp_fisub(i16 *%a0, i32 *%a1) optsize { ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; BDVER2-NEXT: #APP -; BDVER2-NEXT: fsubp %st(1) # sched: [5:1.00] -; BDVER2-NEXT: fsubp %st(2) # sched: [5:1.00] +; BDVER2-NEXT: fsubp %st, %st(1) # sched: [5:1.00] +; BDVER2-NEXT: fsubp %st, %st(2) # sched: [5:1.00] ; BDVER2-NEXT: fisubs (%ecx) # sched: [10:1.00] ; BDVER2-NEXT: fisubl (%eax) # sched: [10:1.00] ; BDVER2-NEXT: #NO_APP @@ -5227,8 +5227,8 @@ define void @test_fsubp_fisub(i16 *%a0, i32 *%a1) optsize { ; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00] ; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:1.00] ; BTVER2-NEXT: #APP -; BTVER2-NEXT: fsubp %st(1) # sched: [3:1.00] -; BTVER2-NEXT: fsubp %st(2) # sched: [3:1.00] +; BTVER2-NEXT: fsubp %st, %st(1) # sched: [3:1.00] +; BTVER2-NEXT: fsubp %st, %st(2) # sched: [3:1.00] ; BTVER2-NEXT: fisubs (%ecx) # sched: [8:1.00] ; BTVER2-NEXT: fisubl (%eax) # sched: [8:1.00] ; BTVER2-NEXT: #NO_APP @@ -5239,8 +5239,8 @@ define void @test_fsubp_fisub(i16 *%a0, i32 *%a1) optsize { ; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50] ; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50] ; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fsubp %st(1) # sched: [3:1.00] -; ZNVER1-NEXT: fsubp %st(2) # sched: [3:1.00] +; ZNVER1-NEXT: fsubp %st, %st(1) # sched: [3:1.00] +; ZNVER1-NEXT: fsubp %st, %st(2) # sched: [3:1.00] ; ZNVER1-NEXT: fisubs (%ecx) # sched: [10:1.00] ; ZNVER1-NEXT: fisubl (%eax) # sched: [10:1.00] ; ZNVER1-NEXT: #NO_APP @@ -5255,8 +5255,8 @@ define void @test_fsubr(float *%a0, double *%a1) optsize { ; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax ; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx ; GENERIC-NEXT: #APP -; GENERIC-NEXT: fsubr %st(0), %st(1) -; GENERIC-NEXT: fsubr %st(2) +; GENERIC-NEXT: fsubr %st, %st(1) +; GENERIC-NEXT: fsubr %st(2), %st ; GENERIC-NEXT: fsubrs (%ecx) ; GENERIC-NEXT: fsubrl (%eax) ; GENERIC-NEXT: #NO_APP @@ -5267,8 +5267,8 @@ define void @test_fsubr(float *%a0, double *%a1) optsize { ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00] ; ATOM-NEXT: #APP -; ATOM-NEXT: fsubr %st(0), %st(1) # sched: [5:5.00] -; ATOM-NEXT: fsubr %st(2) # sched: [5:5.00] +; ATOM-NEXT: fsubr %st, %st(1) # sched: [5:5.00] +; ATOM-NEXT: fsubr %st(2), %st # sched: [5:5.00] ; ATOM-NEXT: fsubrs (%ecx) # sched: [5:5.00] ; ATOM-NEXT: fsubrl (%eax) # sched: [5:5.00] ; ATOM-NEXT: #NO_APP @@ -5279,8 +5279,8 @@ define void @test_fsubr(float *%a0, double *%a1) optsize { ; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] ; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] ; SLM-NEXT: #APP -; SLM-NEXT: fsubr %st(0), %st(1) # sched: [3:1.00] -; SLM-NEXT: fsubr %st(2) # sched: [3:1.00] +; SLM-NEXT: fsubr %st, %st(1) # sched: [3:1.00] +; SLM-NEXT: fsubr %st(2), %st # sched: [3:1.00] ; SLM-NEXT: fsubrs (%ecx) # sched: [6:1.00] ; SLM-NEXT: fsubrl (%eax) # sched: [6:1.00] ; SLM-NEXT: #NO_APP @@ -5291,8 +5291,8 @@ define void @test_fsubr(float *%a0, double *%a1) optsize { ; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; SANDY-NEXT: #APP -; SANDY-NEXT: fsubr %st(0), %st(1) # sched: [3:1.00] -; SANDY-NEXT: fsubr %st(2) # sched: [3:1.00] +; SANDY-NEXT: fsubr %st, %st(1) # sched: [3:1.00] +; SANDY-NEXT: fsubr %st(2), %st # sched: [3:1.00] ; SANDY-NEXT: fsubrs (%ecx) # sched: [10:1.00] ; SANDY-NEXT: fsubrl (%eax) # sched: [10:1.00] ; SANDY-NEXT: #NO_APP @@ -5303,8 +5303,8 @@ define void @test_fsubr(float *%a0, double *%a1) optsize { ; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; HASWELL-NEXT: #APP -; HASWELL-NEXT: fsubr %st(0), %st(1) # sched: [3:1.00] -; HASWELL-NEXT: fsubr %st(2) # sched: [3:1.00] +; HASWELL-NEXT: fsubr %st, %st(1) # sched: [3:1.00] +; HASWELL-NEXT: fsubr %st(2), %st # sched: [3:1.00] ; HASWELL-NEXT: fsubrs (%ecx) # sched: [10:1.00] ; HASWELL-NEXT: fsubrl (%eax) # sched: [10:1.00] ; HASWELL-NEXT: #NO_APP @@ -5315,8 +5315,8 @@ define void @test_fsubr(float *%a0, double *%a1) optsize { ; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fsubr %st(0), %st(1) # sched: [3:1.00] -; BROADWELL-NEXT: fsubr %st(2) # sched: [3:1.00] +; BROADWELL-NEXT: fsubr %st, %st(1) # sched: [3:1.00] +; BROADWELL-NEXT: fsubr %st(2), %st # sched: [3:1.00] ; BROADWELL-NEXT: fsubrs (%ecx) # sched: [9:1.00] ; BROADWELL-NEXT: fsubrl (%eax) # sched: [9:1.00] ; BROADWELL-NEXT: #NO_APP @@ -5327,8 +5327,8 @@ define void @test_fsubr(float *%a0, double *%a1) optsize { ; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fsubr %st(0), %st(1) # sched: [3:1.00] -; SKYLAKE-NEXT: fsubr %st(2) # sched: [3:1.00] +; SKYLAKE-NEXT: fsubr %st, %st(1) # sched: [3:1.00] +; SKYLAKE-NEXT: fsubr %st(2), %st # sched: [3:1.00] ; SKYLAKE-NEXT: fsubrs (%ecx) # sched: [10:1.00] ; SKYLAKE-NEXT: fsubrl (%eax) # sched: [10:1.00] ; SKYLAKE-NEXT: #NO_APP @@ -5339,8 +5339,8 @@ define void @test_fsubr(float *%a0, double *%a1) optsize { ; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; SKX-NEXT: #APP -; SKX-NEXT: fsubr %st(0), %st(1) # sched: [3:1.00] -; SKX-NEXT: fsubr %st(2) # sched: [3:1.00] +; SKX-NEXT: fsubr %st, %st(1) # sched: [3:1.00] +; SKX-NEXT: fsubr %st(2), %st # sched: [3:1.00] ; SKX-NEXT: fsubrs (%ecx) # sched: [10:1.00] ; SKX-NEXT: fsubrl (%eax) # sched: [10:1.00] ; SKX-NEXT: #NO_APP @@ -5351,8 +5351,8 @@ define void @test_fsubr(float *%a0, double *%a1) optsize { ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; BDVER2-NEXT: #APP -; BDVER2-NEXT: fsubr %st(0), %st(1) # sched: [5:1.00] -; BDVER2-NEXT: fsubr %st(2) # sched: [5:1.00] +; BDVER2-NEXT: fsubr %st, %st(1) # sched: [5:1.00] +; BDVER2-NEXT: fsubr %st(2), %st # sched: [5:1.00] ; BDVER2-NEXT: fsubrs (%ecx) # sched: [10:1.00] ; BDVER2-NEXT: fsubrl (%eax) # sched: [10:1.00] ; BDVER2-NEXT: #NO_APP @@ -5363,8 +5363,8 @@ define void @test_fsubr(float *%a0, double *%a1) optsize { ; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00] ; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:1.00] ; BTVER2-NEXT: #APP -; BTVER2-NEXT: fsubr %st(0), %st(1) # sched: [3:1.00] -; BTVER2-NEXT: fsubr %st(2) # sched: [3:1.00] +; BTVER2-NEXT: fsubr %st, %st(1) # sched: [3:1.00] +; BTVER2-NEXT: fsubr %st(2), %st # sched: [3:1.00] ; BTVER2-NEXT: fsubrs (%ecx) # sched: [8:1.00] ; BTVER2-NEXT: fsubrl (%eax) # sched: [8:1.00] ; BTVER2-NEXT: #NO_APP @@ -5375,8 +5375,8 @@ define void @test_fsubr(float *%a0, double *%a1) optsize { ; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50] ; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50] ; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fsubr %st(0), %st(1) # sched: [3:1.00] -; ZNVER1-NEXT: fsubr %st(2) # sched: [3:1.00] +; ZNVER1-NEXT: fsubr %st, %st(1) # sched: [3:1.00] +; ZNVER1-NEXT: fsubr %st(2), %st # sched: [3:1.00] ; ZNVER1-NEXT: fsubrs (%ecx) # sched: [10:1.00] ; ZNVER1-NEXT: fsubrl (%eax) # sched: [10:1.00] ; ZNVER1-NEXT: #NO_APP @@ -5391,8 +5391,8 @@ define void @test_fsubrp_fisubr(i16 *%a0, i32 *%a1) optsize { ; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax ; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx ; GENERIC-NEXT: #APP -; GENERIC-NEXT: fsubrp %st(1) -; GENERIC-NEXT: fsubrp %st(2) +; GENERIC-NEXT: fsubrp %st, %st(1) +; GENERIC-NEXT: fsubrp %st, %st(2) ; GENERIC-NEXT: fisubrs (%ecx) ; GENERIC-NEXT: fisubrl (%eax) ; GENERIC-NEXT: #NO_APP @@ -5403,8 +5403,8 @@ define void @test_fsubrp_fisubr(i16 *%a0, i32 *%a1) optsize { ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00] ; ATOM-NEXT: #APP -; ATOM-NEXT: fsubrp %st(1) # sched: [5:5.00] -; ATOM-NEXT: fsubrp %st(2) # sched: [5:5.00] +; ATOM-NEXT: fsubrp %st, %st(1) # sched: [5:5.00] +; ATOM-NEXT: fsubrp %st, %st(2) # sched: [5:5.00] ; ATOM-NEXT: fisubrs (%ecx) # sched: [5:5.00] ; ATOM-NEXT: fisubrl (%eax) # sched: [5:5.00] ; ATOM-NEXT: #NO_APP @@ -5415,8 +5415,8 @@ define void @test_fsubrp_fisubr(i16 *%a0, i32 *%a1) optsize { ; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] ; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] ; SLM-NEXT: #APP -; SLM-NEXT: fsubrp %st(1) # sched: [3:1.00] -; SLM-NEXT: fsubrp %st(2) # sched: [3:1.00] +; SLM-NEXT: fsubrp %st, %st(1) # sched: [3:1.00] +; SLM-NEXT: fsubrp %st, %st(2) # sched: [3:1.00] ; SLM-NEXT: fisubrs (%ecx) # sched: [6:1.00] ; SLM-NEXT: fisubrl (%eax) # sched: [6:1.00] ; SLM-NEXT: #NO_APP @@ -5427,8 +5427,8 @@ define void @test_fsubrp_fisubr(i16 *%a0, i32 *%a1) optsize { ; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; SANDY-NEXT: #APP -; SANDY-NEXT: fsubrp %st(1) # sched: [3:1.00] -; SANDY-NEXT: fsubrp %st(2) # sched: [3:1.00] +; SANDY-NEXT: fsubrp %st, %st(1) # sched: [3:1.00] +; SANDY-NEXT: fsubrp %st, %st(2) # sched: [3:1.00] ; SANDY-NEXT: fisubrs (%ecx) # sched: [13:2.00] ; SANDY-NEXT: fisubrl (%eax) # sched: [13:2.00] ; SANDY-NEXT: #NO_APP @@ -5439,8 +5439,8 @@ define void @test_fsubrp_fisubr(i16 *%a0, i32 *%a1) optsize { ; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; HASWELL-NEXT: #APP -; HASWELL-NEXT: fsubrp %st(1) # sched: [3:1.00] -; HASWELL-NEXT: fsubrp %st(2) # sched: [3:1.00] +; HASWELL-NEXT: fsubrp %st, %st(1) # sched: [3:1.00] +; HASWELL-NEXT: fsubrp %st, %st(2) # sched: [3:1.00] ; HASWELL-NEXT: fisubrs (%ecx) # sched: [13:2.00] ; HASWELL-NEXT: fisubrl (%eax) # sched: [13:2.00] ; HASWELL-NEXT: #NO_APP @@ -5451,8 +5451,8 @@ define void @test_fsubrp_fisubr(i16 *%a0, i32 *%a1) optsize { ; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fsubrp %st(1) # sched: [3:1.00] -; BROADWELL-NEXT: fsubrp %st(2) # sched: [3:1.00] +; BROADWELL-NEXT: fsubrp %st, %st(1) # sched: [3:1.00] +; BROADWELL-NEXT: fsubrp %st, %st(2) # sched: [3:1.00] ; BROADWELL-NEXT: fisubrs (%ecx) # sched: [12:2.00] ; BROADWELL-NEXT: fisubrl (%eax) # sched: [12:2.00] ; BROADWELL-NEXT: #NO_APP @@ -5463,8 +5463,8 @@ define void @test_fsubrp_fisubr(i16 *%a0, i32 *%a1) optsize { ; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fsubrp %st(1) # sched: [3:1.00] -; SKYLAKE-NEXT: fsubrp %st(2) # sched: [3:1.00] +; SKYLAKE-NEXT: fsubrp %st, %st(1) # sched: [3:1.00] +; SKYLAKE-NEXT: fsubrp %st, %st(2) # sched: [3:1.00] ; SKYLAKE-NEXT: fisubrs (%ecx) # sched: [13:2.00] ; SKYLAKE-NEXT: fisubrl (%eax) # sched: [13:2.00] ; SKYLAKE-NEXT: #NO_APP @@ -5475,8 +5475,8 @@ define void @test_fsubrp_fisubr(i16 *%a0, i32 *%a1) optsize { ; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; SKX-NEXT: #APP -; SKX-NEXT: fsubrp %st(1) # sched: [3:1.00] -; SKX-NEXT: fsubrp %st(2) # sched: [3:1.00] +; SKX-NEXT: fsubrp %st, %st(1) # sched: [3:1.00] +; SKX-NEXT: fsubrp %st, %st(2) # sched: [3:1.00] ; SKX-NEXT: fisubrs (%ecx) # sched: [13:2.00] ; SKX-NEXT: fisubrl (%eax) # sched: [13:2.00] ; SKX-NEXT: #NO_APP @@ -5487,8 +5487,8 @@ define void @test_fsubrp_fisubr(i16 *%a0, i32 *%a1) optsize { ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; BDVER2-NEXT: #APP -; BDVER2-NEXT: fsubrp %st(1) # sched: [5:1.00] -; BDVER2-NEXT: fsubrp %st(2) # sched: [5:1.00] +; BDVER2-NEXT: fsubrp %st, %st(1) # sched: [5:1.00] +; BDVER2-NEXT: fsubrp %st, %st(2) # sched: [5:1.00] ; BDVER2-NEXT: fisubrs (%ecx) # sched: [10:1.00] ; BDVER2-NEXT: fisubrl (%eax) # sched: [10:1.00] ; BDVER2-NEXT: #NO_APP @@ -5499,8 +5499,8 @@ define void @test_fsubrp_fisubr(i16 *%a0, i32 *%a1) optsize { ; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00] ; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:1.00] ; BTVER2-NEXT: #APP -; BTVER2-NEXT: fsubrp %st(1) # sched: [3:1.00] -; BTVER2-NEXT: fsubrp %st(2) # sched: [3:1.00] +; BTVER2-NEXT: fsubrp %st, %st(1) # sched: [3:1.00] +; BTVER2-NEXT: fsubrp %st, %st(2) # sched: [3:1.00] ; BTVER2-NEXT: fisubrs (%ecx) # sched: [8:1.00] ; BTVER2-NEXT: fisubrl (%eax) # sched: [8:1.00] ; BTVER2-NEXT: #NO_APP @@ -5511,8 +5511,8 @@ define void @test_fsubrp_fisubr(i16 *%a0, i32 *%a1) optsize { ; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50] ; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50] ; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fsubrp %st(1) # sched: [3:1.00] -; ZNVER1-NEXT: fsubrp %st(2) # sched: [3:1.00] +; ZNVER1-NEXT: fsubrp %st, %st(1) # sched: [3:1.00] +; ZNVER1-NEXT: fsubrp %st, %st(2) # sched: [3:1.00] ; ZNVER1-NEXT: fisubrs (%ecx) # sched: [10:1.00] ; ZNVER1-NEXT: fisubrl (%eax) # sched: [10:1.00] ; ZNVER1-NEXT: #NO_APP @@ -5731,88 +5731,88 @@ define void @test_fucomi_fucomip() optsize { ; GENERIC-LABEL: test_fucomi_fucomip: ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP -; GENERIC-NEXT: fucomi %st(3) -; GENERIC-NEXT: fucompi %st(3) +; GENERIC-NEXT: fucomi %st(3), %st +; GENERIC-NEXT: fucompi %st(3), %st ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retl ; ; ATOM-LABEL: test_fucomi_fucomip: ; ATOM: # %bb.0: ; ATOM-NEXT: #APP -; ATOM-NEXT: fucomi %st(3) # sched: [9:4.50] -; ATOM-NEXT: fucompi %st(3) # sched: [9:4.50] +; ATOM-NEXT: fucomi %st(3), %st # sched: [9:4.50] +; ATOM-NEXT: fucompi %st(3), %st # sched: [9:4.50] ; ATOM-NEXT: #NO_APP ; ATOM-NEXT: retl # sched: [79:39.50] ; ; SLM-LABEL: test_fucomi_fucomip: ; SLM: # %bb.0: ; SLM-NEXT: #APP -; SLM-NEXT: fucomi %st(3) # sched: [3:1.00] -; SLM-NEXT: fucompi %st(3) # sched: [3:1.00] +; SLM-NEXT: fucomi %st(3), %st # sched: [3:1.00] +; SLM-NEXT: fucompi %st(3), %st # sched: [3:1.00] ; SLM-NEXT: #NO_APP ; SLM-NEXT: retl # sched: [4:1.00] ; ; SANDY-LABEL: test_fucomi_fucomip: ; SANDY: # %bb.0: ; SANDY-NEXT: #APP -; SANDY-NEXT: fucomi %st(3) # sched: [3:1.00] -; SANDY-NEXT: fucompi %st(3) # sched: [3:1.00] +; SANDY-NEXT: fucomi %st(3), %st # sched: [3:1.00] +; SANDY-NEXT: fucompi %st(3), %st # sched: [3:1.00] ; SANDY-NEXT: #NO_APP ; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_fucomi_fucomip: ; HASWELL: # %bb.0: ; HASWELL-NEXT: #APP -; HASWELL-NEXT: fucomi %st(3) # sched: [1:0.50] -; HASWELL-NEXT: fucompi %st(3) # sched: [1:0.50] +; HASWELL-NEXT: fucomi %st(3), %st # sched: [1:0.50] +; HASWELL-NEXT: fucompi %st(3), %st # sched: [1:0.50] ; HASWELL-NEXT: #NO_APP ; HASWELL-NEXT: retl # sched: [7:1.00] ; ; BROADWELL-LABEL: test_fucomi_fucomip: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fucomi %st(3) # sched: [3:1.00] -; BROADWELL-NEXT: fucompi %st(3) # sched: [3:1.00] +; BROADWELL-NEXT: fucomi %st(3), %st # sched: [3:1.00] +; BROADWELL-NEXT: fucompi %st(3), %st # sched: [3:1.00] ; BROADWELL-NEXT: #NO_APP ; BROADWELL-NEXT: retl # sched: [6:0.50] ; ; SKYLAKE-LABEL: test_fucomi_fucomip: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fucomi %st(3) # sched: [2:1.00] -; SKYLAKE-NEXT: fucompi %st(3) # sched: [2:1.00] +; SKYLAKE-NEXT: fucomi %st(3), %st # sched: [2:1.00] +; SKYLAKE-NEXT: fucompi %st(3), %st # sched: [2:1.00] ; SKYLAKE-NEXT: #NO_APP ; SKYLAKE-NEXT: retl # sched: [6:0.50] ; ; SKX-LABEL: test_fucomi_fucomip: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: fucomi %st(3) # sched: [2:1.00] -; SKX-NEXT: fucompi %st(3) # sched: [2:1.00] +; SKX-NEXT: fucomi %st(3), %st # sched: [2:1.00] +; SKX-NEXT: fucompi %st(3), %st # sched: [2:1.00] ; SKX-NEXT: #NO_APP ; SKX-NEXT: retl # sched: [6:0.50] ; ; BDVER2-LABEL: test_fucomi_fucomip: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: fucomi %st(3) # sched: [1:1.00] -; BDVER2-NEXT: fucompi %st(3) # sched: [1:1.00] +; BDVER2-NEXT: fucomi %st(3), %st # sched: [1:1.00] +; BDVER2-NEXT: fucompi %st(3), %st # sched: [1:1.00] ; BDVER2-NEXT: #NO_APP ; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_fucomi_fucomip: ; BTVER2: # %bb.0: ; BTVER2-NEXT: #APP -; BTVER2-NEXT: fucomi %st(3) # sched: [3:1.00] -; BTVER2-NEXT: fucompi %st(3) # sched: [3:1.00] +; BTVER2-NEXT: fucomi %st(3), %st # sched: [3:1.00] +; BTVER2-NEXT: fucompi %st(3), %st # sched: [3:1.00] ; BTVER2-NEXT: #NO_APP ; BTVER2-NEXT: retl # sched: [4:1.00] ; ; ZNVER1-LABEL: test_fucomi_fucomip: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fucomi %st(3) # sched: [9:0.50] -; ZNVER1-NEXT: fucompi %st(3) # sched: [9:0.50] +; ZNVER1-NEXT: fucomi %st(3), %st # sched: [9:0.50] +; ZNVER1-NEXT: fucompi %st(3), %st # sched: [9:0.50] ; ZNVER1-NEXT: #NO_APP ; ZNVER1-NEXT: retl # sched: [1:0.50] tail call void asm sideeffect "fucomi %st(3) \0A\09 fucomip %st(3)", ""() nounwind diff --git a/llvm/test/MC/Disassembler/X86/fp-stack.txt b/llvm/test/MC/Disassembler/X86/fp-stack.txt index 8c4ad47eb8732a..1b1687b2a204b6 100644 --- a/llvm/test/MC/Disassembler/X86/fp-stack.txt +++ b/llvm/test/MC/Disassembler/X86/fp-stack.txt @@ -1,52 +1,52 @@ # RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s # RUN: llvm-mc --disassemble %s -triple=i686-apple-darwin9 | FileCheck %s -# CHECK: fadd %st(0) +# CHECK: fadd %st(0), %st 0xd8,0xc0 -# CHECK: fadd %st(1) +# CHECK: fadd %st(1), %st 0xd8,0xc1 -# CHECK: fadd %st(2) +# CHECK: fadd %st(2), %st 0xd8,0xc2 -# CHECK: fadd %st(3) +# CHECK: fadd %st(3), %st 0xd8,0xc3 -# CHECK: fadd %st(4) +# CHECK: fadd %st(4), %st 0xd8,0xc4 -# CHECK: fadd %st(5) +# CHECK: fadd %st(5), %st 0xd8,0xc5 -# CHECK: fadd %st(6) +# CHECK: fadd %st(6), %st 0xd8,0xc6 -# CHECK: fadd %st(7) +# CHECK: fadd %st(7), %st 0xd8,0xc7 -# CHECK: fmul %st(0) +# CHECK: fmul %st(0), %st 0xd8,0xc8 -# CHECK: fmul %st(1) +# CHECK: fmul %st(1), %st 0xd8,0xc9 -# CHECK: fmul %st(2) +# CHECK: fmul %st(2), %st 0xd8,0xca -# CHECK: fmul %st(3) +# CHECK: fmul %st(3), %st 0xd8,0xcb -# CHECK: fmul %st(4) +# CHECK: fmul %st(4), %st 0xd8,0xcc -# CHECK: fmul %st(5) +# CHECK: fmul %st(5), %st 0xd8,0xcd -# CHECK: fmul %st(6) +# CHECK: fmul %st(6), %st 0xd8,0xce -# CHECK: fmul %st(7) +# CHECK: fmul %st(7), %st 0xd8,0xcf # CHECK: fcom %st(0) @@ -97,100 +97,100 @@ # CHECK: fcomp %st(7) 0xd8,0xdf -# CHECK: fsub %st(0) +# CHECK: fsub %st(0), %st 0xd8,0xe0 -# CHECK: fsub %st(1) +# CHECK: fsub %st(1), %st 0xd8,0xe1 -# CHECK: fsub %st(2) +# CHECK: fsub %st(2), %st 0xd8,0xe2 -# CHECK: fsub %st(3) +# CHECK: fsub %st(3), %st 0xd8,0xe3 -# CHECK: fsub %st(4) +# CHECK: fsub %st(4), %st 0xd8,0xe4 -# CHECK: fsub %st(5) +# CHECK: fsub %st(5), %st 0xd8,0xe5 -# CHECK: fsub %st(6) +# CHECK: fsub %st(6), %st 0xd8,0xe6 -# CHECK: fsub %st(7) +# CHECK: fsub %st(7), %st 0xd8,0xe7 -# CHECK: fsubr %st(0) +# CHECK: fsubr %st(0), %st 0xd8,0xe8 -# CHECK: fsubr %st(1) +# CHECK: fsubr %st(1), %st 0xd8,0xe9 -# CHECK: fsubr %st(2) +# CHECK: fsubr %st(2), %st 0xd8,0xea -# CHECK: fsubr %st(3) +# CHECK: fsubr %st(3), %st 0xd8,0xeb -# CHECK: fsubr %st(4) +# CHECK: fsubr %st(4), %st 0xd8,0xec -# CHECK: fsubr %st(5) +# CHECK: fsubr %st(5), %st 0xd8,0xed -# CHECK: fsubr %st(6) +# CHECK: fsubr %st(6), %st 0xd8,0xee -# CHECK: fsubr %st(7) +# CHECK: fsubr %st(7), %st 0xd8,0xef -# CHECK: fdiv %st(0) +# CHECK: fdiv %st(0), %st 0xd8,0xf0 -# CHECK: fdiv %st(1) +# CHECK: fdiv %st(1), %st 0xd8,0xf1 -# CHECK: fdiv %st(2) +# CHECK: fdiv %st(2), %st 0xd8,0xf2 -# CHECK: fdiv %st(3) +# CHECK: fdiv %st(3), %st 0xd8,0xf3 -# CHECK: fdiv %st(4) +# CHECK: fdiv %st(4), %st 0xd8,0xf4 -# CHECK: fdiv %st(5) +# CHECK: fdiv %st(5), %st 0xd8,0xf5 -# CHECK: fdiv %st(6) +# CHECK: fdiv %st(6), %st 0xd8,0xf6 -# CHECK: fdiv %st(7) +# CHECK: fdiv %st(7), %st 0xd8,0xf7 -# CHECK: fdivr %st(0) +# CHECK: fdivr %st(0), %st 0xd8,0xf8 -# CHECK: fdivr %st(1) +# CHECK: fdivr %st(1), %st 0xd8,0xf9 -# CHECK: fdivr %st(2) +# CHECK: fdivr %st(2), %st 0xd8,0xfa -# CHECK: fdivr %st(3) +# CHECK: fdivr %st(3), %st 0xd8,0xfb -# CHECK: fdivr %st(4) +# CHECK: fdivr %st(4), %st 0xd8,0xfc -# CHECK: fdivr %st(5) +# CHECK: fdivr %st(5), %st 0xd8,0xfd -# CHECK: fdivr %st(6) +# CHECK: fdivr %st(6), %st 0xd8,0xfe -# CHECK: fdivr %st(7) +# CHECK: fdivr %st(7), %st 0xd8,0xff # CHECK: fld %st(0) @@ -325,199 +325,199 @@ # CHECK: fcos 0xd9,0xff -# CHECK: fcmovb %st(0), %st(0) +# CHECK: fcmovb %st(0), %st 0xda,0xc0 -# CHECK: fcmovb %st(1), %st(0) +# CHECK: fcmovb %st(1), %st 0xda,0xc1 -# CHECK: fcmovb %st(2), %st(0) +# CHECK: fcmovb %st(2), %st 0xda,0xc2 -# CHECK: fcmovb %st(3), %st(0) +# CHECK: fcmovb %st(3), %st 0xda,0xc3 -# CHECK: fcmovb %st(4), %st(0) +# CHECK: fcmovb %st(4), %st 0xda,0xc4 -# CHECK: fcmovb %st(5), %st(0) +# CHECK: fcmovb %st(5), %st 0xda,0xc5 -# CHECK: fcmovb %st(6), %st(0) +# CHECK: fcmovb %st(6), %st 0xda,0xc6 -# CHECK: fcmovb %st(7), %st(0) +# CHECK: fcmovb %st(7), %st 0xda,0xc7 -# CHECK: fcmove %st(0), %st(0) +# CHECK: fcmove %st(0), %st 0xda,0xc8 -# CHECK: fcmove %st(1), %st(0) +# CHECK: fcmove %st(1), %st 0xda,0xc9 -# CHECK: fcmove %st(2), %st(0) +# CHECK: fcmove %st(2), %st 0xda,0xca -# CHECK: fcmove %st(3), %st(0) +# CHECK: fcmove %st(3), %st 0xda,0xcb -# CHECK: fcmove %st(4), %st(0) +# CHECK: fcmove %st(4), %st 0xda,0xcc -# CHECK: fcmove %st(5), %st(0) +# CHECK: fcmove %st(5), %st 0xda,0xcd -# CHECK: fcmove %st(6), %st(0) +# CHECK: fcmove %st(6), %st 0xda,0xce -# CHECK: fcmove %st(7), %st(0) +# CHECK: fcmove %st(7), %st 0xda,0xcf -# CHECK: fcmovbe %st(0), %st(0) +# CHECK: fcmovbe %st(0), %st 0xda,0xd0 -# CHECK: fcmovbe %st(1), %st(0) +# CHECK: fcmovbe %st(1), %st 0xda,0xd1 -# CHECK: fcmovbe %st(2), %st(0) +# CHECK: fcmovbe %st(2), %st 0xda,0xd2 -# CHECK: fcmovbe %st(3), %st(0) +# CHECK: fcmovbe %st(3), %st 0xda,0xd3 -# CHECK: fcmovbe %st(4), %st(0) +# CHECK: fcmovbe %st(4), %st 0xda,0xd4 -# CHECK: fcmovbe %st(5), %st(0) +# CHECK: fcmovbe %st(5), %st 0xda,0xd5 -# CHECK: fcmovbe %st(6), %st(0) +# CHECK: fcmovbe %st(6), %st 0xda,0xd6 -# CHECK: fcmovbe %st(7), %st(0) +# CHECK: fcmovbe %st(7), %st 0xda,0xd7 -# CHECK: fcmovu %st(0), %st(0) +# CHECK: fcmovu %st(0), %st 0xda,0xd8 -# CHECK: fcmovu %st(1), %st(0) +# CHECK: fcmovu %st(1), %st 0xda,0xd9 -# CHECK: fcmovu %st(2), %st(0) +# CHECK: fcmovu %st(2), %st 0xda,0xda -# CHECK: fcmovu %st(3), %st(0) +# CHECK: fcmovu %st(3), %st 0xda,0xdb -# CHECK: fcmovu %st(4), %st(0) +# CHECK: fcmovu %st(4), %st 0xda,0xdc -# CHECK: fcmovu %st(5), %st(0) +# CHECK: fcmovu %st(5), %st 0xda,0xdd -# CHECK: fcmovu %st(6), %st(0) +# CHECK: fcmovu %st(6), %st 0xda,0xde -# CHECK: fcmovu %st(7), %st(0) +# CHECK: fcmovu %st(7), %st 0xda,0xdf # CHECK: fucompp 0xda,0xe9 -# CHECK: fcmovnb %st(0), %st(0) +# CHECK: fcmovnb %st(0), %st 0xdb,0xc0 -# CHECK: fcmovnb %st(1), %st(0) +# CHECK: fcmovnb %st(1), %st 0xdb,0xc1 -# CHECK: fcmovnb %st(2), %st(0) +# CHECK: fcmovnb %st(2), %st 0xdb,0xc2 -# CHECK: fcmovnb %st(3), %st(0) +# CHECK: fcmovnb %st(3), %st 0xdb,0xc3 -# CHECK: fcmovnb %st(4), %st(0) +# CHECK: fcmovnb %st(4), %st 0xdb,0xc4 -# CHECK: fcmovnb %st(5), %st(0) +# CHECK: fcmovnb %st(5), %st 0xdb,0xc5 -# CHECK: fcmovnb %st(6), %st(0) +# CHECK: fcmovnb %st(6), %st 0xdb,0xc6 -# CHECK: fcmovnb %st(7), %st(0) +# CHECK: fcmovnb %st(7), %st 0xdb,0xc7 -# CHECK: fcmovne %st(0), %st(0) +# CHECK: fcmovne %st(0), %st 0xdb,0xc8 -# CHECK: fcmovne %st(1), %st(0) +# CHECK: fcmovne %st(1), %st 0xdb,0xc9 -# CHECK: fcmovne %st(2), %st(0) +# CHECK: fcmovne %st(2), %st 0xdb,0xca -# CHECK: fcmovne %st(3), %st(0) +# CHECK: fcmovne %st(3), %st 0xdb,0xcb -# CHECK: fcmovne %st(4), %st(0) +# CHECK: fcmovne %st(4), %st 0xdb,0xcc -# CHECK: fcmovne %st(5), %st(0) +# CHECK: fcmovne %st(5), %st 0xdb,0xcd -# CHECK: fcmovne %st(6), %st(0) +# CHECK: fcmovne %st(6), %st 0xdb,0xce -# CHECK: fcmovne %st(7), %st(0) +# CHECK: fcmovne %st(7), %st 0xdb,0xcf -# CHECK: fcmovnbe %st(0), %st(0) +# CHECK: fcmovnbe %st(0), %st 0xdb,0xd0 -# CHECK: fcmovnbe %st(1), %st(0) +# CHECK: fcmovnbe %st(1), %st 0xdb,0xd1 -# CHECK: fcmovnbe %st(2), %st(0) +# CHECK: fcmovnbe %st(2), %st 0xdb,0xd2 -# CHECK: fcmovnbe %st(3), %st(0) +# CHECK: fcmovnbe %st(3), %st 0xdb,0xd3 -# CHECK: fcmovnbe %st(4), %st(0) +# CHECK: fcmovnbe %st(4), %st 0xdb,0xd4 -# CHECK: fcmovnbe %st(5), %st(0) +# CHECK: fcmovnbe %st(5), %st 0xdb,0xd5 -# CHECK: fcmovnbe %st(6), %st(0) +# CHECK: fcmovnbe %st(6), %st 0xdb,0xd6 -# CHECK: fcmovnbe %st(7), %st(0) +# CHECK: fcmovnbe %st(7), %st 0xdb,0xd7 -# CHECK: fcmovnu %st(0), %st(0) +# CHECK: fcmovnu %st(0), %st 0xdb,0xd8 -# CHECK: fcmovnu %st(1), %st(0) +# CHECK: fcmovnu %st(1), %st 0xdb,0xd9 -# CHECK: fcmovnu %st(2), %st(0) +# CHECK: fcmovnu %st(2), %st 0xdb,0xda -# CHECK: fcmovnu %st(3), %st(0) +# CHECK: fcmovnu %st(3), %st 0xdb,0xdb -# CHECK: fcmovnu %st(4), %st(0) +# CHECK: fcmovnu %st(4), %st 0xdb,0xdc -# CHECK: fcmovnu %st(5), %st(0) +# CHECK: fcmovnu %st(5), %st 0xdb,0xdd -# CHECK: fcmovnu %st(6), %st(0) +# CHECK: fcmovnu %st(6), %st 0xdb,0xde -# CHECK: fcmovnu %st(7), %st(0) +# CHECK: fcmovnu %st(7), %st 0xdb,0xdf # CHECK: fnclex @@ -574,148 +574,148 @@ # CHECK: fcomi %st(7) 0xdb,0xf7 -# CHECK: fadd %st(0), %st(0) +# CHECK: fadd %st, %st(0) 0xdc,0xc0 -# CHECK: fadd %st(0), %st(1) +# CHECK: fadd %st, %st(1) 0xdc,0xc1 -# CHECK: fadd %st(0), %st(2) +# CHECK: fadd %st, %st(2) 0xdc,0xc2 -# CHECK: fadd %st(0), %st(3) +# CHECK: fadd %st, %st(3) 0xdc,0xc3 -# CHECK: fadd %st(0), %st(4) +# CHECK: fadd %st, %st(4) 0xdc,0xc4 -# CHECK: fadd %st(0), %st(5) +# CHECK: fadd %st, %st(5) 0xdc,0xc5 -# CHECK: fadd %st(0), %st(6) +# CHECK: fadd %st, %st(6) 0xdc,0xc6 -# CHECK: fadd %st(0), %st(7) +# CHECK: fadd %st, %st(7) 0xdc,0xc7 -# CHECK: fmul %st(0), %st(0) +# CHECK: fmul %st, %st(0) 0xdc,0xc8 -# CHECK: fmul %st(0), %st(1) +# CHECK: fmul %st, %st(1) 0xdc,0xc9 -# CHECK: fmul %st(0), %st(2) +# CHECK: fmul %st, %st(2) 0xdc,0xca -# CHECK: fmul %st(0), %st(3) +# CHECK: fmul %st, %st(3) 0xdc,0xcb -# CHECK: fmul %st(0), %st(4) +# CHECK: fmul %st, %st(4) 0xdc,0xcc -# CHECK: fmul %st(0), %st(5) +# CHECK: fmul %st, %st(5) 0xdc,0xcd -# CHECK: fmul %st(0), %st(6) +# CHECK: fmul %st, %st(6) 0xdc,0xce -# CHECK: fmul %st(0), %st(7) +# CHECK: fmul %st, %st(7) 0xdc,0xcf -# CHECK: fsub %st(0), %st(0) +# CHECK: fsub %st, %st(0) 0xdc,0xe0 -# CHECK: fsub %st(0), %st(1) +# CHECK: fsub %st, %st(1) 0xdc,0xe1 -# CHECK: fsub %st(0), %st(2) +# CHECK: fsub %st, %st(2) 0xdc,0xe2 -# CHECK: fsub %st(0), %st(3) +# CHECK: fsub %st, %st(3) 0xdc,0xe3 -# CHECK: fsub %st(0), %st(4) +# CHECK: fsub %st, %st(4) 0xdc,0xe4 -# CHECK: fsub %st(0), %st(5) +# CHECK: fsub %st, %st(5) 0xdc,0xe5 -# CHECK: fsub %st(0), %st(6) +# CHECK: fsub %st, %st(6) 0xdc,0xe6 -# CHECK: fsub %st(0), %st(7) +# CHECK: fsub %st, %st(7) 0xdc,0xe7 -# CHECK: fsubr %st(0), %st(0) +# CHECK: fsubr %st, %st(0) 0xdc,0xe8 -# CHECK: fsubr %st(0), %st(1) +# CHECK: fsubr %st, %st(1) 0xdc,0xe9 -# CHECK: fsubr %st(0), %st(2) +# CHECK: fsubr %st, %st(2) 0xdc,0xea -# CHECK: fsubr %st(0), %st(3) +# CHECK: fsubr %st, %st(3) 0xdc,0xeb -# CHECK: fsubr %st(0), %st(4) +# CHECK: fsubr %st, %st(4) 0xdc,0xec -# CHECK: fsubr %st(0), %st(5) +# CHECK: fsubr %st, %st(5) 0xdc,0xed -# CHECK: fsubr %st(0), %st(6) +# CHECK: fsubr %st, %st(6) 0xdc,0xee -# CHECK: fsubr %st(0), %st(7) +# CHECK: fsubr %st, %st(7) 0xdc,0xef -# CHECK: fdiv %st(0), %st(0) +# CHECK: fdiv %st, %st(0) 0xdc,0xf0 -# CHECK: fdiv %st(0), %st(1) +# CHECK: fdiv %st, %st(1) 0xdc,0xf1 -# CHECK: fdiv %st(0), %st(2) +# CHECK: fdiv %st, %st(2) 0xdc,0xf2 -# CHECK: fdiv %st(0), %st(3) +# CHECK: fdiv %st, %st(3) 0xdc,0xf3 -# CHECK: fdiv %st(0), %st(4) +# CHECK: fdiv %st, %st(4) 0xdc,0xf4 -# CHECK: fdiv %st(0), %st(5) +# CHECK: fdiv %st, %st(5) 0xdc,0xf5 -# CHECK: fdiv %st(0), %st(6) +# CHECK: fdiv %st, %st(6) 0xdc,0xf6 -# CHECK: fdiv %st(0), %st(7) +# CHECK: fdiv %st, %st(7) 0xdc,0xf7 -# CHECK: fdivr %st(0), %st(0) +# CHECK: fdivr %st, %st(0) 0xdc,0xf8 -# CHECK: fdivr %st(0), %st(1) +# CHECK: fdivr %st, %st(1) 0xdc,0xf9 -# CHECK: fdivr %st(0), %st(2) +# CHECK: fdivr %st, %st(2) 0xdc,0xfa -# CHECK: fdivr %st(0), %st(3) +# CHECK: fdivr %st, %st(3) 0xdc,0xfb -# CHECK: fdivr %st(0), %st(4) +# CHECK: fdivr %st, %st(4) 0xdc,0xfc -# CHECK: fdivr %st(0), %st(5) +# CHECK: fdivr %st, %st(5) 0xdc,0xfd -# CHECK: fdivr %st(0), %st(6) +# CHECK: fdivr %st, %st(6) 0xdc,0xfe -# CHECK: fdivr %st(0), %st(7) +# CHECK: fdivr %st, %st(7) 0xdc,0xff # CHECK: ffree %st(0) @@ -838,151 +838,151 @@ # CHECK: fucomp %st(7) 0xdd,0xef -# CHECK: faddp %st(0) +# CHECK: faddp %st, %st(0) 0xde,0xc0 -# CHECK: faddp %st(1) +# CHECK: faddp %st, %st(1) 0xde,0xc1 -# CHECK: faddp %st(2) +# CHECK: faddp %st, %st(2) 0xde,0xc2 -# CHECK: faddp %st(3) +# CHECK: faddp %st, %st(3) 0xde,0xc3 -# CHECK: faddp %st(4) +# CHECK: faddp %st, %st(4) 0xde,0xc4 -# CHECK: faddp %st(5) +# CHECK: faddp %st, %st(5) 0xde,0xc5 -# CHECK: faddp %st(6) +# CHECK: faddp %st, %st(6) 0xde,0xc6 -# CHECK: faddp %st(7) +# CHECK: faddp %st, %st(7) 0xde,0xc7 -# CHECK: fmulp %st(0) +# CHECK: fmulp %st, %st(0) 0xde,0xc8 -# CHECK: fmulp %st(1) +# CHECK: fmulp %st, %st(1) 0xde,0xc9 -# CHECK: fmulp %st(2) +# CHECK: fmulp %st, %st(2) 0xde,0xca -# CHECK: fmulp %st(3) +# CHECK: fmulp %st, %st(3) 0xde,0xcb -# CHECK: fmulp %st(4) +# CHECK: fmulp %st, %st(4) 0xde,0xcc -# CHECK: fmulp %st(5) +# CHECK: fmulp %st, %st(5) 0xde,0xcd -# CHECK: fmulp %st(6) +# CHECK: fmulp %st, %st(6) 0xde,0xce -# CHECK: fmulp %st(7) +# CHECK: fmulp %st, %st(7) 0xde,0xcf # CHECK: fcompp 0xde,0xd9 -# CHECK: fsubp %st(0) +# CHECK: fsubp %st, %st(0) 0xde,0xe0 -# CHECK: fsubp %st(1) +# CHECK: fsubp %st, %st(1) 0xde,0xe1 -# CHECK: fsubp %st(2) +# CHECK: fsubp %st, %st(2) 0xde,0xe2 -# CHECK: fsubp %st(3) +# CHECK: fsubp %st, %st(3) 0xde,0xe3 -# CHECK: fsubp %st(4) +# CHECK: fsubp %st, %st(4) 0xde,0xe4 -# CHECK: fsubp %st(5) +# CHECK: fsubp %st, %st(5) 0xde,0xe5 -# CHECK: fsubp %st(6) +# CHECK: fsubp %st, %st(6) 0xde,0xe6 -# CHECK: fsubp %st(7) +# CHECK: fsubp %st, %st(7) 0xde,0xe7 -# CHECK: fsubrp %st(0) +# CHECK: fsubrp %st, %st(0) 0xde,0xe8 -# CHECK: fsubrp %st(1) +# CHECK: fsubrp %st, %st(1) 0xde,0xe9 -# CHECK: fsubrp %st(2) +# CHECK: fsubrp %st, %st(2) 0xde,0xea -# CHECK: fsubrp %st(3) +# CHECK: fsubrp %st, %st(3) 0xde,0xeb -# CHECK: fsubrp %st(4) +# CHECK: fsubrp %st, %st(4) 0xde,0xec -# CHECK: fsubrp %st(5) +# CHECK: fsubrp %st, %st(5) 0xde,0xed -# CHECK: fsubrp %st(6) +# CHECK: fsubrp %st, %st(6) 0xde,0xee -# CHECK: fsubrp %st(7) +# CHECK: fsubrp %st, %st(7) 0xde,0xef -# CHECK: fdivp %st(0) +# CHECK: fdivp %st, %st(0) 0xde,0xf0 -# CHECK: fdivp %st(1) +# CHECK: fdivp %st, %st(1) 0xde,0xf1 -# CHECK: fdivp %st(2) +# CHECK: fdivp %st, %st(2) 0xde,0xf2 -# CHECK: fdivp %st(3) +# CHECK: fdivp %st, %st(3) 0xde,0xf3 -# CHECK: fdivp %st(4) +# CHECK: fdivp %st, %st(4) 0xde,0xf4 -# CHECK: fdivp %st(5) +# CHECK: fdivp %st, %st(5) 0xde,0xf5 -# CHECK: fdivp %st(6) +# CHECK: fdivp %st, %st(6) 0xde,0xf6 -# CHECK: fdivp %st(7) +# CHECK: fdivp %st, %st(7) 0xde,0xf7 -# CHECK: fdivrp %st(0) +# CHECK: fdivrp %st, %st(0) 0xde,0xf8 -# CHECK: fdivrp %st(1) +# CHECK: fdivrp %st, %st(1) 0xde,0xf9 -# CHECK: fdivrp %st(2) +# CHECK: fdivrp %st, %st(2) 0xde,0xfa -# CHECK: fdivrp %st(3) +# CHECK: fdivrp %st, %st(3) 0xde,0xfb -# CHECK: fdivrp %st(4) +# CHECK: fdivrp %st, %st(4) 0xde,0xfc -# CHECK: fdivrp %st(5) +# CHECK: fdivrp %st, %st(5) 0xde,0xfd -# CHECK: fdivrp %st(6) +# CHECK: fdivrp %st, %st(6) 0xde,0xfe -# CHECK: fdivrp %st(7) +# CHECK: fdivrp %st, %st(7) 0xde,0xff # CHECK: ffreep %st(0) diff --git a/llvm/test/MC/Disassembler/X86/x86-16.txt b/llvm/test/MC/Disassembler/X86/x86-16.txt index 43cd09516c3b16..286aa88489cb48 100644 --- a/llvm/test/MC/Disassembler/X86/x86-16.txt +++ b/llvm/test/MC/Disassembler/X86/x86-16.txt @@ -759,10 +759,10 @@ # CHECK: strl %eax 0x66 0x0f 0x00 0xc8 -# CHECK: fsubp %st(1) +# CHECK: fsubp %st, %st(1) 0xde 0xe1 -# CHECK: fsubp %st(2) +# CHECK: fsubp %st, %st(2) 0xde 0xe2 # CHECKX: nop diff --git a/llvm/test/MC/X86/PPRO-32.s b/llvm/test/MC/X86/PPRO-32.s index bbd933e58af093..6deea6de9c449b 100644 --- a/llvm/test/MC/X86/PPRO-32.s +++ b/llvm/test/MC/X86/PPRO-32.s @@ -64,37 +64,37 @@ cmovpl %eax, %eax // CHECK: encoding: [0x0f,0x48,0xc0] cmovsl %eax, %eax -// CHECK: fcmovbe %st(4), %st(0) +// CHECK: fcmovbe %st(4), %st // CHECK: encoding: [0xda,0xd4] -fcmovbe %st(4), %st(0) +fcmovbe %st(4), %st -// CHECK: fcmovb %st(4), %st(0) +// CHECK: fcmovb %st(4), %st // CHECK: encoding: [0xda,0xc4] -fcmovb %st(4), %st(0) +fcmovb %st(4), %st -// CHECK: fcmove %st(4), %st(0) +// CHECK: fcmove %st(4), %st // CHECK: encoding: [0xda,0xcc] -fcmove %st(4), %st(0) +fcmove %st(4), %st -// CHECK: fcmovnbe %st(4), %st(0) +// CHECK: fcmovnbe %st(4), %st // CHECK: encoding: [0xdb,0xd4] -fcmovnbe %st(4), %st(0) +fcmovnbe %st(4), %st -// CHECK: fcmovnb %st(4), %st(0) +// CHECK: fcmovnb %st(4), %st // CHECK: encoding: [0xdb,0xc4] -fcmovnb %st(4), %st(0) +fcmovnb %st(4), %st -// CHECK: fcmovne %st(4), %st(0) +// CHECK: fcmovne %st(4), %st // CHECK: encoding: [0xdb,0xcc] -fcmovne %st(4), %st(0) +fcmovne %st(4), %st -// CHECK: fcmovnu %st(4), %st(0) +// CHECK: fcmovnu %st(4), %st // CHECK: encoding: [0xdb,0xdc] -fcmovnu %st(4), %st(0) +fcmovnu %st(4), %st -// CHECK: fcmovu %st(4), %st(0) +// CHECK: fcmovu %st(4), %st // CHECK: encoding: [0xda,0xdc] -fcmovu %st(4), %st(0) +fcmovu %st(4), %st // CHECK: fcomi %st(4) // CHECK: encoding: [0xdb,0xf4] diff --git a/llvm/test/MC/X86/PPRO-64.s b/llvm/test/MC/X86/PPRO-64.s index a41d4a0f93cc98..8004772e8f3def 100644 --- a/llvm/test/MC/X86/PPRO-64.s +++ b/llvm/test/MC/X86/PPRO-64.s @@ -64,37 +64,37 @@ cmovpl %r13d, %r13d // CHECK: encoding: [0x45,0x0f,0x48,0xed] cmovsl %r13d, %r13d -// CHECK: fcmovbe %st(4), %st(0) +// CHECK: fcmovbe %st(4), %st // CHECK: encoding: [0xda,0xd4] -fcmovbe %st(4), %st(0) +fcmovbe %st(4), %st -// CHECK: fcmovb %st(4), %st(0) +// CHECK: fcmovb %st(4), %st // CHECK: encoding: [0xda,0xc4] -fcmovb %st(4), %st(0) +fcmovb %st(4), %st -// CHECK: fcmove %st(4), %st(0) +// CHECK: fcmove %st(4), %st // CHECK: encoding: [0xda,0xcc] -fcmove %st(4), %st(0) +fcmove %st(4), %st -// CHECK: fcmovnbe %st(4), %st(0) +// CHECK: fcmovnbe %st(4), %st // CHECK: encoding: [0xdb,0xd4] -fcmovnbe %st(4), %st(0) +fcmovnbe %st(4), %st -// CHECK: fcmovnb %st(4), %st(0) +// CHECK: fcmovnb %st(4), %st // CHECK: encoding: [0xdb,0xc4] -fcmovnb %st(4), %st(0) +fcmovnb %st(4), %st -// CHECK: fcmovne %st(4), %st(0) +// CHECK: fcmovne %st(4), %st // CHECK: encoding: [0xdb,0xcc] -fcmovne %st(4), %st(0) +fcmovne %st(4), %st -// CHECK: fcmovnu %st(4), %st(0) +// CHECK: fcmovnu %st(4), %st // CHECK: encoding: [0xdb,0xdc] -fcmovnu %st(4), %st(0) +fcmovnu %st(4), %st -// CHECK: fcmovu %st(4), %st(0) +// CHECK: fcmovu %st(4), %st // CHECK: encoding: [0xda,0xdc] -fcmovu %st(4), %st(0) +fcmovu %st(4), %st // CHECK: fcomi %st(4) // CHECK: encoding: [0xdb,0xf4] diff --git a/llvm/test/MC/X86/X87-32.s b/llvm/test/MC/X86/X87-32.s index 967763de93e057..d414ab65b7dc45 100755 --- a/llvm/test/MC/X86/X87-32.s +++ b/llvm/test/MC/X86/X87-32.s @@ -31,7 +31,7 @@ faddl 64(%edx,%eax) // CHECK: encoding: [0xdc,0x02] faddl (%edx) -// CHECK: faddp %st(4) +// CHECK: faddp %st, %st(4) // CHECK: encoding: [0xde,0xc4] faddp %st(4) @@ -59,11 +59,11 @@ fadds 64(%edx,%eax) // CHECK: encoding: [0xd8,0x02] fadds (%edx) -// CHECK: fadd %st(0), %st(4) +// CHECK: fadd %st, %st(4) // CHECK: encoding: [0xdc,0xc4] -fadd %st(0), %st(4) +fadd %st, %st(4) -// CHECK: fadd %st(4) +// CHECK: fadd %st(4), %st // CHECK: encoding: [0xd8,0xc4] fadd %st(4) @@ -259,7 +259,7 @@ fdivl 64(%edx,%eax) // CHECK: encoding: [0xdc,0x32] fdivl (%edx) -// CHECK: fdivp %st(4) +// CHECK: fdivp %st, %st(4) // CHECK: encoding: [0xde,0xf4] fdivp %st(4) @@ -287,7 +287,7 @@ fdivrl 64(%edx,%eax) // CHECK: encoding: [0xdc,0x3a] fdivrl (%edx) -// CHECK: fdivrp %st(4) +// CHECK: fdivrp %st, %st(4) // CHECK: encoding: [0xde,0xfc] fdivrp %st(4) @@ -315,11 +315,11 @@ fdivrs 64(%edx,%eax) // CHECK: encoding: [0xd8,0x3a] fdivrs (%edx) -// CHECK: fdivr %st(0), %st(4) +// CHECK: fdivr %st, %st(4) // CHECK: encoding: [0xdc,0xfc] -fdivr %st(0), %st(4) +fdivr %st, %st(4) -// CHECK: fdivr %st(4) +// CHECK: fdivr %st(4), %st // CHECK: encoding: [0xd8,0xfc] fdivr %st(4) @@ -347,11 +347,11 @@ fdivs 64(%edx,%eax) // CHECK: encoding: [0xd8,0x32] fdivs (%edx) -// CHECK: fdiv %st(0), %st(4) +// CHECK: fdiv %st, %st(4) // CHECK: encoding: [0xdc,0xf4] -fdiv %st(0), %st(4) +fdiv %st, %st(4) -// CHECK: fdiv %st(4) +// CHECK: fdiv %st(4), %st // CHECK: encoding: [0xd8,0xf4] fdiv %st(4) @@ -1119,7 +1119,7 @@ fmull 64(%edx,%eax) // CHECK: encoding: [0xdc,0x0a] fmull (%edx) -// CHECK: fmulp %st(4) +// CHECK: fmulp %st, %st(4) // CHECK: encoding: [0xde,0xcc] fmulp %st(4) @@ -1147,11 +1147,11 @@ fmuls 64(%edx,%eax) // CHECK: encoding: [0xd8,0x0a] fmuls (%edx) -// CHECK: fmul %st(0), %st(4) +// CHECK: fmul %st, %st(4) // CHECK: encoding: [0xdc,0xcc] -fmul %st(0), %st(4) +fmul %st, %st(4) -// CHECK: fmul %st(4) +// CHECK: fmul %st(4), %st // CHECK: encoding: [0xd8,0xcc] fmul %st(4) @@ -1479,7 +1479,7 @@ fsubl 64(%edx,%eax) // CHECK: encoding: [0xdc,0x22] fsubl (%edx) -// CHECK: fsubp %st(4) +// CHECK: fsubp %st, %st(4) // CHECK: encoding: [0xde,0xe4] fsubp %st(4) @@ -1507,7 +1507,7 @@ fsubrl 64(%edx,%eax) // CHECK: encoding: [0xdc,0x2a] fsubrl (%edx) -// CHECK: fsubrp %st(4) +// CHECK: fsubrp %st, %st(4) // CHECK: encoding: [0xde,0xec] fsubrp %st(4) @@ -1535,11 +1535,11 @@ fsubrs 64(%edx,%eax) // CHECK: encoding: [0xd8,0x2a] fsubrs (%edx) -// CHECK: fsubr %st(0), %st(4) +// CHECK: fsubr %st, %st(4) // CHECK: encoding: [0xdc,0xec] -fsubr %st(0), %st(4) +fsubr %st, %st(4) -// CHECK: fsubr %st(4) +// CHECK: fsubr %st(4), %st // CHECK: encoding: [0xd8,0xec] fsubr %st(4) @@ -1567,11 +1567,11 @@ fsubs 64(%edx,%eax) // CHECK: encoding: [0xd8,0x22] fsubs (%edx) -// CHECK: fsub %st(0), %st(4) +// CHECK: fsub %st, %st(4) // CHECK: encoding: [0xdc,0xe4] -fsub %st(0), %st(4) +fsub %st, %st(4) -// CHECK: fsub %st(4) +// CHECK: fsub %st(4), %st // CHECK: encoding: [0xd8,0xe4] fsub %st(4) diff --git a/llvm/test/MC/X86/X87-64.s b/llvm/test/MC/X86/X87-64.s index a3b76b4e4b0ad4..1103f326ea047a 100755 --- a/llvm/test/MC/X86/X87-64.s +++ b/llvm/test/MC/X86/X87-64.s @@ -31,7 +31,7 @@ faddl 64(%rdx,%rax) // CHECK: encoding: [0xdc,0x02] faddl (%rdx) -// CHECK: faddp %st(4) +// CHECK: faddp %st, %st(4) // CHECK: encoding: [0xde,0xc4] faddp %st(4) @@ -59,11 +59,11 @@ fadds 64(%rdx,%rax) // CHECK: encoding: [0xd8,0x02] fadds (%rdx) -// CHECK: fadd %st(0), %st(4) +// CHECK: fadd %st, %st(4) // CHECK: encoding: [0xdc,0xc4] -fadd %st(0), %st(4) +fadd %st, %st(4) -// CHECK: fadd %st(4) +// CHECK: fadd %st(4), %st // CHECK: encoding: [0xd8,0xc4] fadd %st(4) @@ -259,7 +259,7 @@ fdivl 64(%rdx,%rax) // CHECK: encoding: [0xdc,0x32] fdivl (%rdx) -// CHECK: fdivp %st(4) +// CHECK: fdivp %st, %st(4) // CHECK: encoding: [0xde,0xf4] fdivp %st(4) @@ -287,7 +287,7 @@ fdivrl 64(%rdx,%rax) // CHECK: encoding: [0xdc,0x3a] fdivrl (%rdx) -// CHECK: fdivrp %st(4) +// CHECK: fdivrp %st, %st(4) // CHECK: encoding: [0xde,0xfc] fdivrp %st(4) @@ -315,11 +315,11 @@ fdivrs 64(%rdx,%rax) // CHECK: encoding: [0xd8,0x3a] fdivrs (%rdx) -// CHECK: fdivr %st(0), %st(4) +// CHECK: fdivr %st, %st(4) // CHECK: encoding: [0xdc,0xfc] -fdivr %st(0), %st(4) +fdivr %st, %st(4) -// CHECK: fdivr %st(4) +// CHECK: fdivr %st(4), %st // CHECK: encoding: [0xd8,0xfc] fdivr %st(4) @@ -347,11 +347,11 @@ fdivs 64(%rdx,%rax) // CHECK: encoding: [0xd8,0x32] fdivs (%rdx) -// CHECK: fdiv %st(0), %st(4) +// CHECK: fdiv %st, %st(4) // CHECK: encoding: [0xdc,0xf4] -fdiv %st(0), %st(4) +fdiv %st, %st(4) -// CHECK: fdiv %st(4) +// CHECK: fdiv %st(4), %st // CHECK: encoding: [0xd8,0xf4] fdiv %st(4) @@ -1119,7 +1119,7 @@ fmull 64(%rdx,%rax) // CHECK: encoding: [0xdc,0x0a] fmull (%rdx) -// CHECK: fmulp %st(4) +// CHECK: fmulp %st, %st(4) // CHECK: encoding: [0xde,0xcc] fmulp %st(4) @@ -1147,9 +1147,9 @@ fmuls 64(%rdx,%rax) // CHECK: encoding: [0xd8,0x0a] fmuls (%rdx) -// CHECK: fmul %st(0), %st(4) +// CHECK: fmul %st, %st(4) // CHECK: encoding: [0xdc,0xcc] -fmul %st(0), %st(4) +fmul %st, %st(4) // CHECK: fmul %st(4) // CHECK: encoding: [0xd8,0xcc] @@ -1479,7 +1479,7 @@ fsubl 64(%rdx,%rax) // CHECK: encoding: [0xdc,0x22] fsubl (%rdx) -// CHECK: fsubp %st(4) +// CHECK: fsubp %st, %st(4) // CHECK: encoding: [0xde,0xe4] fsubp %st(4) @@ -1507,7 +1507,7 @@ fsubrl 64(%rdx,%rax) // CHECK: encoding: [0xdc,0x2a] fsubrl (%rdx) -// CHECK: fsubrp %st(4) +// CHECK: fsubrp %st, %st(4) // CHECK: encoding: [0xde,0xec] fsubrp %st(4) @@ -1535,11 +1535,11 @@ fsubrs 64(%rdx,%rax) // CHECK: encoding: [0xd8,0x2a] fsubrs (%rdx) -// CHECK: fsubr %st(0), %st(4) +// CHECK: fsubr %st, %st(4) // CHECK: encoding: [0xdc,0xec] -fsubr %st(0), %st(4) +fsubr %st, %st(4) -// CHECK: fsubr %st(4) +// CHECK: fsubr %st(4), %st // CHECK: encoding: [0xd8,0xec] fsubr %st(4) @@ -1567,11 +1567,11 @@ fsubs 64(%rdx,%rax) // CHECK: encoding: [0xd8,0x22] fsubs (%rdx) -// CHECK: fsub %st(0), %st(4) +// CHECK: fsub %st, %st(4) // CHECK: encoding: [0xdc,0xe4] -fsub %st(0), %st(4) +fsub %st, %st(4) -// CHECK: fsub %st(4) +// CHECK: fsub %st(4), %st // CHECK: encoding: [0xd8,0xe4] fsub %st(4) diff --git a/llvm/test/MC/X86/intel-syntax-2.s b/llvm/test/MC/X86/intel-syntax-2.s index aead5766db4d52..b23965ae52e88e 100644 --- a/llvm/test/MC/X86/intel-syntax-2.s +++ b/llvm/test/MC/X86/intel-syntax-2.s @@ -18,14 +18,14 @@ _test2: _test3: fadd -// CHECK: faddp %st(1) +// CHECK: faddp %st, %st(1) fmul -// CHECK: fmulp %st(1) +// CHECK: fmulp %st, %st(1) fsub -// CHECK: fsubp %st(1) +// CHECK: fsubp %st, %st(1) fsubr -// CHECK: fsubrp %st(1) +// CHECK: fsubrp %st, %st(1) fdiv -// CHECK: fdivp %st(1) +// CHECK: fdivp %st, %st(1) fdivr -// CHECK: fdivrp %st(1) +// CHECK: fdivrp %st, %st(1) diff --git a/llvm/test/MC/X86/intel-syntax.s b/llvm/test/MC/X86/intel-syntax.s index 428a7e4ec41f1f..171357e1f5212e 100644 --- a/llvm/test/MC/X86/intel-syntax.s +++ b/llvm/test/MC/X86/intel-syntax.s @@ -556,12 +556,12 @@ fnstsw fnstsw AX fnstsw WORD PTR [EAX] -// CHECK: faddp %st(1) -// CHECK: fmulp %st(1) -// CHECK: fsubrp %st(1) -// CHECK: fsubp %st(1) -// CHECK: fdivrp %st(1) -// CHECK: fdivp %st(1) +// CHECK: faddp %st, %st(1) +// CHECK: fmulp %st, %st(1) +// CHECK: fsubrp %st, %st(1) +// CHECK: fsubp %st, %st(1) +// CHECK: fdivrp %st, %st(1) +// CHECK: fdivp %st, %st(1) faddp ST(1), ST(0) fmulp ST(1), ST(0) fsubp ST(1), ST(0) @@ -569,12 +569,12 @@ fsubrp ST(1), ST(0) fdivp ST(1), ST(0) fdivrp ST(1), ST(0) -// CHECK: faddp %st(1) -// CHECK: fmulp %st(1) -// CHECK: fsubrp %st(1) -// CHECK: fsubp %st(1) -// CHECK: fdivrp %st(1) -// CHECK: fdivp %st(1) +// CHECK: faddp %st, %st(1) +// CHECK: fmulp %st, %st(1) +// CHECK: fsubrp %st, %st(1) +// CHECK: fsubp %st, %st(1) +// CHECK: fdivrp %st, %st(1) +// CHECK: fdivp %st, %st(1) faddp ST(0), ST(1) fmulp ST(0), ST(1) fsubp ST(0), ST(1) @@ -582,12 +582,12 @@ fsubrp ST(0), ST(1) fdivp ST(0), ST(1) fdivrp ST(0), ST(1) -// CHECK: faddp %st(1) -// CHECK: fmulp %st(1) -// CHECK: fsubrp %st(1) -// CHECK: fsubp %st(1) -// CHECK: fdivrp %st(1) -// CHECK: fdivp %st(1) +// CHECK: faddp %st, %st(1) +// CHECK: fmulp %st, %st(1) +// CHECK: fsubrp %st, %st(1) +// CHECK: fsubp %st, %st(1) +// CHECK: fdivrp %st, %st(1) +// CHECK: fdivp %st, %st(1) faddp ST(1) fmulp ST(1) fsubp ST(1) @@ -596,12 +596,12 @@ fdivp ST(1) fdivrp ST(1) -// CHECK: faddp %st(1) -// CHECK: fmulp %st(1) -// CHECK: fsubrp %st(1) -// CHECK: fsubp %st(1) -// CHECK: fdivrp %st(1) -// CHECK: fdivp %st(1) +// CHECK: faddp %st, %st(1) +// CHECK: fmulp %st, %st(1) +// CHECK: fsubrp %st, %st(1) +// CHECK: fsubp %st, %st(1) +// CHECK: fdivrp %st, %st(1) +// CHECK: fdivp %st, %st(1) fadd fmul fsub @@ -609,12 +609,12 @@ fsubr fdiv fdivr -// CHECK: faddp %st(1) -// CHECK: fmulp %st(1) -// CHECK: fsubrp %st(1) -// CHECK: fsubp %st(1) -// CHECK: fdivrp %st(1) -// CHECK: fdivp %st(1) +// CHECK: faddp %st, %st(1) +// CHECK: fmulp %st, %st(1) +// CHECK: fsubrp %st, %st(1) +// CHECK: fsubp %st, %st(1) +// CHECK: fdivrp %st, %st(1) +// CHECK: fdivp %st, %st(1) faddp fmulp fsubp @@ -622,12 +622,12 @@ fsubrp fdivp fdivrp -// CHECK: fadd %st(1) -// CHECK: fmul %st(1) -// CHECK: fsub %st(1) -// CHECK: fsubr %st(1) -// CHECK: fdiv %st(1) -// CHECK: fdivr %st(1) +// CHECK: fadd %st(1), %st +// CHECK: fmul %st(1), %st +// CHECK: fsub %st(1), %st +// CHECK: fsubr %st(1), %st +// CHECK: fdiv %st(1), %st +// CHECK: fdivr %st(1), %st fadd ST(0), ST(1) fmul ST(0), ST(1) fsub ST(0), ST(1) @@ -635,12 +635,12 @@ fsubr ST(0), ST(1) fdiv ST(0), ST(1) fdivr ST(0), ST(1) -// CHECK: fadd %st(0), %st(1) -// CHECK: fmul %st(0), %st(1) -// CHECK: fsubr %st(0), %st(1) -// CHECK: fsub %st(0), %st(1) -// CHECK: fdivr %st(0), %st(1) -// CHECK: fdiv %st(0), %st(1) +// CHECK: fadd %st, %st(1) +// CHECK: fmul %st, %st(1) +// CHECK: fsubr %st, %st(1) +// CHECK: fsub %st, %st(1) +// CHECK: fdivr %st, %st(1) +// CHECK: fdiv %st, %st(1) fadd ST(1), ST(0) fmul ST(1), ST(0) fsub ST(1), ST(0) @@ -648,12 +648,12 @@ fsubr ST(1), ST(0) fdiv ST(1), ST(0) fdivr ST(1), ST(0) -// CHECK: fadd %st(1) -// CHECK: fmul %st(1) -// CHECK: fsub %st(1) -// CHECK: fsubr %st(1) -// CHECK: fdiv %st(1) -// CHECK: fdivr %st(1) +// CHECK: fadd %st(1), %st +// CHECK: fmul %st(1), %st +// CHECK: fsub %st(1), %st +// CHECK: fsubr %st(1), %st +// CHECK: fdiv %st(1), %st +// CHECK: fdivr %st(1), %st fadd ST(1) fmul ST(1) fsub ST(1) diff --git a/llvm/test/MC/X86/x86-16.s b/llvm/test/MC/X86/x86-16.s index 50263beaa67567..88ee77a6d4143a 100644 --- a/llvm/test/MC/X86/x86-16.s +++ b/llvm/test/MC/X86/x86-16.s @@ -920,11 +920,11 @@ pshufw $90, %mm4, %mm0 str %eax -// CHECK: fsubp +// CHECK: fsubp %st, %st(1) // CHECK: encoding: [0xde,0xe1] fsubp %st,%st(1) -// CHECK: fsubp %st(2) +// CHECK: fsubp %st, %st(2) // CHECK: encoding: [0xde,0xe2] fsubp %st, %st(2) diff --git a/llvm/test/MC/X86/x86-32-coverage.s b/llvm/test/MC/X86/x86-32-coverage.s index 32281f41216445..ba59fb357b9741 100644 --- a/llvm/test/MC/X86/x86-32-coverage.s +++ b/llvm/test/MC/X86/x86-32-coverage.s @@ -2472,11 +2472,11 @@ // CHECK: encoding: [0xda,0x05,0x78,0x56,0x34,0x12] fiaddl 0x12345678 -// CHECK: faddp %st(2) +// CHECK: faddp %st, %st(2) // CHECK: encoding: [0xde,0xc2] faddp %st(2) -// CHECK: fsub %st(2) +// CHECK: fsub %st(2), %st // CHECK: encoding: [0xd8,0xe2] fsub %st(2) @@ -2504,11 +2504,11 @@ // CHECK: encoding: [0xda,0x25,0x78,0x56,0x34,0x12] fisubl 0x12345678 -// CHECK: fsubp %st(2) +// CHECK: fsubp %st, %st(2) // CHECK: encoding: [0xde,0xe2] fsubp %st(2) -// CHECK: fsubr %st(2) +// CHECK: fsubr %st(2), %st // CHECK: encoding: [0xd8,0xea] fsubr %st(2) @@ -2536,11 +2536,11 @@ // CHECK: encoding: [0xda,0x2d,0x78,0x56,0x34,0x12] fisubrl 0x12345678 -// CHECK: fsubrp %st(2) +// CHECK: fsubrp %st, %st(2) // CHECK: encoding: [0xde,0xea] fsubrp %st(2) -// CHECK: fmul %st(2) +// CHECK: fmul %st(2), %st // CHECK: encoding: [0xd8,0xca] fmul %st(2) @@ -2568,11 +2568,11 @@ // CHECK: encoding: [0xda,0x0d,0x78,0x56,0x34,0x12] fimull 0x12345678 -// CHECK: fmulp %st(2) +// CHECK: fmulp %st, %st(2) // CHECK: encoding: [0xde,0xca] fmulp %st(2) -// CHECK: fdiv %st(2) +// CHECK: fdiv %st(2), %st // CHECK: encoding: [0xd8,0xf2] fdiv %st(2) @@ -2600,11 +2600,11 @@ // CHECK: encoding: [0xda,0x35,0x78,0x56,0x34,0x12] fidivl 0x12345678 -// CHECK: fdivp %st(2) +// CHECK: fdivp %st, %st(2) // CHECK: encoding: [0xde,0xf2] fdivp %st(2) -// CHECK: fdivr %st(2) +// CHECK: fdivr %st(2), %st // CHECK: encoding: [0xd8,0xfa] fdivr %st(2) @@ -2632,7 +2632,7 @@ // CHECK: encoding: [0xda,0x3d,0x78,0x56,0x34,0x12] fidivrl 0x12345678 -// CHECK: fdivrp %st(2) +// CHECK: fdivrp %st, %st(2) // CHECK: encoding: [0xde,0xfa] fdivrp %st(2) @@ -2876,35 +2876,35 @@ // CHECK: encoding: [0x0f,0x0b] ud2 -// CHECK: fcmovb %st(2), %st(0) +// CHECK: fcmovb %st(2), %st // CHECK: encoding: [0xda,0xc2] fcmovb %st(2),%st -// CHECK: fcmove %st(2), %st(0) +// CHECK: fcmove %st(2), %st // CHECK: encoding: [0xda,0xca] fcmove %st(2),%st -// CHECK: fcmovbe %st(2), %st(0) +// CHECK: fcmovbe %st(2), %st // CHECK: encoding: [0xda,0xd2] fcmovbe %st(2),%st -// CHECK: fcmovu %st(2), %st(0) +// CHECK: fcmovu %st(2), %st // CHECK: encoding: [0xda,0xda] fcmovu %st(2),%st -// CHECK: fcmovnb %st(2), %st(0) +// CHECK: fcmovnb %st(2), %st // CHECK: encoding: [0xdb,0xc2] fcmovnb %st(2),%st -// CHECK: fcmovne %st(2), %st(0) +// CHECK: fcmovne %st(2), %st // CHECK: encoding: [0xdb,0xca] fcmovne %st(2),%st -// CHECK: fcmovnbe %st(2), %st(0) +// CHECK: fcmovnbe %st(2), %st // CHECK: encoding: [0xdb,0xd2] fcmovnbe %st(2),%st -// CHECK: fcmovnu %st(2), %st(0) +// CHECK: fcmovnu %st(2), %st // CHECK: encoding: [0xdb,0xda] fcmovnu %st(2),%st diff --git a/llvm/test/MC/X86/x86-32.s b/llvm/test/MC/X86/x86-32.s index 69d36032593453..1593c26b9d0297 100644 --- a/llvm/test/MC/X86/x86-32.s +++ b/llvm/test/MC/X86/x86-32.s @@ -1055,7 +1055,7 @@ pshufw $90, %mm4, %mm0 fsubp %st,%st(1) // PR9164 -// CHECK: fsubp %st(2) +// CHECK: fsubp %st, %st(2) // CHECK: encoding: [0xde,0xe2] fsubp %st, %st(2) diff --git a/llvm/test/MC/X86/x86-64.s b/llvm/test/MC/X86/x86-64.s index df811a6c4cf2ee..e35ba19fc4f2ec 100644 --- a/llvm/test/MC/X86/x86-64.s +++ b/llvm/test/MC/X86/x86-64.s @@ -307,13 +307,13 @@ insl (%dx), %es:(%rdi) // CHECK: fxch %st(1) // CHECK: fucom %st(1) // CHECK: fucomp %st(1) -// CHECK: faddp %st(1) -// CHECK: faddp %st(0) -// CHECK: fsubp %st(1) -// CHECK: fsubrp %st(1) -// CHECK: fmulp %st(1) -// CHECK: fdivp %st(1) -// CHECK: fdivrp %st(1) +// CHECK: faddp %st, %st(1) +// CHECK: faddp %st, %st(0) +// CHECK: fsubp %st, %st(1) +// CHECK: fsubrp %st, %st(1) +// CHECK: fmulp %st, %st(1) +// CHECK: fdivp %st, %st(1) +// CHECK: fdivrp %st, %st(1) fxch fucom @@ -416,21 +416,21 @@ enter $0x7ace,$0x7f mov %cs, %ax // rdar://8456391 -fcmovb %st(1), %st(0) // CHECK: fcmovb %st(1), %st(0) -fcmove %st(1), %st(0) // CHECK: fcmove %st(1), %st(0) -fcmovbe %st(1), %st(0) // CHECK: fcmovbe %st(1), %st(0) -fcmovu %st(1), %st(0) // CHECK: fcmovu %st(1), %st(0) +fcmovb %st(1), %st // CHECK: fcmovb %st(1), %st +fcmove %st(1), %st // CHECK: fcmove %st(1), %st +fcmovbe %st(1), %st // CHECK: fcmovbe %st(1), %st +fcmovu %st(1), %st // CHECK: fcmovu %st(1), %st -fcmovnb %st(1), %st(0) // CHECK: fcmovnb %st(1), %st(0) -fcmovne %st(1), %st(0) // CHECK: fcmovne %st(1), %st(0) -fcmovnbe %st(1), %st(0) // CHECK: fcmovnbe %st(1), %st(0) -fcmovnu %st(1), %st(0) // CHECK: fcmovnu %st(1), %st(0) +fcmovnb %st(1), %st // CHECK: fcmovnb %st(1), %st +fcmovne %st(1), %st // CHECK: fcmovne %st(1), %st +fcmovnbe %st(1), %st // CHECK: fcmovnbe %st(1), %st +fcmovnu %st(1), %st // CHECK: fcmovnu %st(1), %st -fcmovnae %st(1), %st(0) // CHECK: fcmovb %st(1), %st(0) -fcmovna %st(1), %st(0) // CHECK: fcmovbe %st(1), %st(0) +fcmovnae %st(1), %st // CHECK: fcmovb %st(1), %st +fcmovna %st(1), %st // CHECK: fcmovbe %st(1), %st -fcmovae %st(1), %st(0) // CHECK: fcmovnb %st(1), %st(0) -fcmova %st(1), %st(0) // CHECK: fcmovnbe %st(1), %st(0) +fcmovae %st(1), %st // CHECK: fcmovnb %st(1), %st +fcmova %st(1), %st // CHECK: fcmovnbe %st(1), %st // rdar://8456417 .byte (88 + 1) & 15 // CHECK: .byte 9 @@ -456,20 +456,20 @@ mov %rdx, %db15 // CHECK: encoding: [0x44,0x0f,0x23,0xfa] // rdar://8456371 - Handle commutable instructions written backward. -// CHECK: faddp %st(1) -// CHECK: fmulp %st(2) +// CHECK: faddp %st, %st(1) +// CHECK: fmulp %st, %st(2) faddp %st, %st(1) fmulp %st, %st(2) // rdar://8468087 - Encode these accurately, they are not synonyms. -// CHECK: fmul %st(0), %st(1) +// CHECK: fmul %st, %st(1) // CHECK: encoding: [0xdc,0xc9] // CHECK: fmul %st(1) // CHECK: encoding: [0xd8,0xc9] fmul %st, %st(1) fmul %st(1), %st -// CHECK: fadd %st(0), %st(1) +// CHECK: fadd %st, %st(1) // CHECK: encoding: [0xdc,0xc1] // CHECK: fadd %st(1) // CHECK: encoding: [0xd8,0xc1] @@ -582,15 +582,15 @@ movmskpd %xmm6, %eax // CHECK: encoding: [0x66,0x0f,0x50,0xc6] // rdar://8491845 - Gas supports commuted forms of non-commutable instructions. -fdivrp %st(0), %st(1) // CHECK: encoding: [0xde,0xf9] -fdivrp %st(1), %st(0) // CHECK: encoding: [0xde,0xf9] +fdivrp %st, %st(1) // CHECK: encoding: [0xde,0xf9] +fdivrp %st(1), %st // CHECK: encoding: [0xde,0xf9] -fsubrp %st(0), %st(1) // CHECK: encoding: [0xde,0xe9] -fsubrp %st(1), %st(0) // CHECK: encoding: [0xde,0xe9] +fsubrp %st, %st(1) // CHECK: encoding: [0xde,0xe9] +fsubrp %st(1), %st // CHECK: encoding: [0xde,0xe9] // also PR8861 -fdivp %st(0), %st(1) // CHECK: encoding: [0xde,0xf1] -fdivp %st(1), %st(0) // CHECK: encoding: [0xde,0xf1] +fdivp %st, %st(1) // CHECK: encoding: [0xde,0xf1] +fdivp %st(1), %st // CHECK: encoding: [0xde,0xf1] movl foo(%rip), %eax @@ -1391,38 +1391,38 @@ clac // CHECK: encoding: [0x0f,0x01,0xcb] stac -// CHECK: faddp %st(1) -// CHECK: fmulp %st(1) -// CHECK: fsubp %st(1) -// CHECK: fsubrp %st(1) -// CHECK: fdivp %st(1) -// CHECK: fdivrp %st(1) -faddp %st(0), %st(1) -fmulp %st(0), %st(1) -fsubp %st(0), %st(1) -fsubrp %st(0), %st(1) -fdivp %st(0), %st(1) -fdivrp %st(0), %st(1) - -// CHECK: faddp %st(1) -// CHECK: fmulp %st(1) -// CHECK: fsubp %st(1) -// CHECK: fsubrp %st(1) -// CHECK: fdivp %st(1) -// CHECK: fdivrp %st(1) -faddp %st(1), %st(0) -fmulp %st(1), %st(0) -fsubp %st(1), %st(0) -fsubrp %st(1), %st(0) -fdivp %st(1), %st(0) -fdivrp %st(1), %st(0) - -// CHECK: faddp %st(1) -// CHECK: fmulp %st(1) -// CHECK: fsubp %st(1) -// CHECK: fsubrp %st(1) -// CHECK: fdivp %st(1) -// CHECK: fdivrp %st(1) +// CHECK: faddp %st, %st(1) +// CHECK: fmulp %st, %st(1) +// CHECK: fsubp %st, %st(1) +// CHECK: fsubrp %st, %st(1) +// CHECK: fdivp %st, %st(1) +// CHECK: fdivrp %st, %st(1) +faddp %st, %st(1) +fmulp %st, %st(1) +fsubp %st, %st(1) +fsubrp %st, %st(1) +fdivp %st, %st(1) +fdivrp %st, %st(1) + +// CHECK: faddp %st, %st(1) +// CHECK: fmulp %st, %st(1) +// CHECK: fsubp %st, %st(1) +// CHECK: fsubrp %st, %st(1) +// CHECK: fdivp %st, %st(1) +// CHECK: fdivrp %st, %st(1) +faddp %st(1), %st +fmulp %st(1), %st +fsubp %st(1), %st +fsubrp %st(1), %st +fdivp %st(1), %st +fdivrp %st(1), %st + +// CHECK: faddp %st, %st(1) +// CHECK: fmulp %st, %st(1) +// CHECK: fsubp %st, %st(1) +// CHECK: fsubrp %st, %st(1) +// CHECK: fdivp %st, %st(1) +// CHECK: fdivrp %st, %st(1) faddp %st(1) fmulp %st(1) fsubp %st(1) @@ -1430,12 +1430,12 @@ fsubrp %st(1) fdivp %st(1) fdivrp %st(1) -// CHECK: faddp %st(1) -// CHECK: fmulp %st(1) -// CHECK: fsubp %st(1) -// CHECK: fsubrp %st(1) -// CHECK: fdivp %st(1) -// CHECK: fdivrp %st(1) +// CHECK: faddp %st, %st(1) +// CHECK: fmulp %st, %st(1) +// CHECK: fsubp %st, %st(1) +// CHECK: fsubrp %st, %st(1) +// CHECK: fdivp %st, %st(1) +// CHECK: fdivrp %st, %st(1) faddp fmulp fsubp @@ -1449,25 +1449,25 @@ fdivrp // CHECK: fsubr %st(1) // CHECK: fdiv %st(1) // CHECK: fdivr %st(1) -fadd %st(1), %st(0) -fmul %st(1), %st(0) -fsub %st(1), %st(0) -fsubr %st(1), %st(0) -fdiv %st(1), %st(0) -fdivr %st(1), %st(0) - -// CHECK: fadd %st(0), %st(1) -// CHECK: fmul %st(0), %st(1) -// CHECK: fsub %st(0), %st(1) -// CHECK: fsubr %st(0), %st(1) -// CHECK: fdiv %st(0), %st(1) -// CHECK: fdivr %st(0), %st(1) -fadd %st(0), %st(1) -fmul %st(0), %st(1) -fsub %st(0), %st(1) -fsubr %st(0), %st(1) -fdiv %st(0), %st(1) -fdivr %st(0), %st(1) +fadd %st(1), %st +fmul %st(1), %st +fsub %st(1), %st +fsubr %st(1), %st +fdiv %st(1), %st +fdivr %st(1), %st + +// CHECK: fadd %st, %st(1) +// CHECK: fmul %st, %st(1) +// CHECK: fsub %st, %st(1) +// CHECK: fsubr %st, %st(1) +// CHECK: fdiv %st, %st(1) +// CHECK: fdivr %st, %st(1) +fadd %st, %st(1) +fmul %st, %st(1) +fsub %st, %st(1) +fsubr %st, %st(1) +fdiv %st, %st(1) +fdivr %st, %st(1) // CHECK: fadd %st(1) // CHECK: fmul %st(1) diff --git a/llvm/test/tools/llvm-mca/X86/Atom/resources-x87.s b/llvm/test/tools/llvm-mca/X86/Atom/resources-x87.s index c7a990a892eb67..eda0fdebb7d3c2 100644 --- a/llvm/test/tools/llvm-mca/X86/Atom/resources-x87.s +++ b/llvm/test/tools/llvm-mca/X86/Atom/resources-x87.s @@ -5,7 +5,7 @@ f2xm1 fabs -fadd %st(0), %st(1) +fadd %st, %st(1) fadd %st(2) fadds (%ecx) faddl (%ecx) @@ -21,14 +21,14 @@ fchs fnclex -fcmovb %st(1), %st(0) -fcmovbe %st(1), %st(0) -fcmove %st(1), %st(0) -fcmovnb %st(1), %st(0) -fcmovnbe %st(1), %st(0) -fcmovne %st(1), %st(0) -fcmovnu %st(1), %st(0) -fcmovu %st(1), %st(0) +fcmovb %st(1), %st +fcmovbe %st(1), %st +fcmove %st(1), %st +fcmovnb %st(1), %st +fcmovnbe %st(1), %st +fcmovne %st(1), %st +fcmovnu %st(1), %st +fcmovu %st(1), %st fcom %st(1) fcom %st(3) @@ -47,7 +47,7 @@ fcos fdecstp -fdiv %st(0), %st(1) +fdiv %st, %st(1) fdiv %st(2) fdivs (%ecx) fdivl (%eax) @@ -56,7 +56,7 @@ fdivp %st(2) fidivs (%ecx) fidivl (%eax) -fdivr %st(0), %st(1) +fdivr %st, %st(1) fdivr %st(2) fdivrs (%ecx) fdivrl (%eax) @@ -106,7 +106,7 @@ fldln2 fldpi fldz -fmul %st(0), %st(1) +fmul %st, %st(1) fmul %st(2) fmuls (%ecx) fmull (%eax) @@ -153,7 +153,7 @@ fnstsw (%eax) frstor (%eax) fsave (%eax) -fsub %st(0), %st(1) +fsub %st, %st(1) fsub %st(2) fsubs (%ecx) fsubl (%eax) @@ -162,7 +162,7 @@ fsubp %st(2) fisubs (%ecx) fisubl (%eax) -fsubr %st(0), %st(1) +fsubr %st, %st(1) fsubr %st(2) fsubrs (%ecx) fsubrl (%eax) @@ -208,26 +208,26 @@ fyl2xp1 # CHECK: [1] [2] [3] [4] [5] [6] Instructions: # CHECK-NEXT: 1 99 49.50 U f2xm1 # CHECK-NEXT: 1 1 1.00 U fabs -# CHECK-NEXT: 1 5 5.00 U fadd %st(0), %st(1) -# CHECK-NEXT: 1 5 5.00 U fadd %st(2) +# CHECK-NEXT: 1 5 5.00 U fadd %st, %st(1) +# CHECK-NEXT: 1 5 5.00 U fadd %st(2), %st # CHECK-NEXT: 1 5 5.00 * U fadds (%ecx) # CHECK-NEXT: 1 5 5.00 * U faddl (%ecx) -# CHECK-NEXT: 1 5 5.00 U faddp %st(1) -# CHECK-NEXT: 1 5 5.00 U faddp %st(2) +# CHECK-NEXT: 1 5 5.00 U faddp %st, %st(1) +# CHECK-NEXT: 1 5 5.00 U faddp %st, %st(2) # CHECK-NEXT: 1 5 5.00 * U fiadds (%ecx) # CHECK-NEXT: 1 5 5.00 * U fiaddl (%ecx) # CHECK-NEXT: 1 100 0.50 U fbld (%ecx) # CHECK-NEXT: 1 100 0.50 U fbstp (%eax) # CHECK-NEXT: 1 1 1.00 U fchs # CHECK-NEXT: 1 25 12.50 U fnclex -# CHECK-NEXT: 1 9 4.50 U fcmovb %st(1), %st(0) -# CHECK-NEXT: 1 9 4.50 U fcmovbe %st(1), %st(0) -# CHECK-NEXT: 1 9 4.50 U fcmove %st(1), %st(0) -# CHECK-NEXT: 1 9 4.50 U fcmovnb %st(1), %st(0) -# CHECK-NEXT: 1 9 4.50 U fcmovnbe %st(1), %st(0) -# CHECK-NEXT: 1 9 4.50 U fcmovne %st(1), %st(0) -# CHECK-NEXT: 1 9 4.50 U fcmovnu %st(1), %st(0) -# CHECK-NEXT: 1 9 4.50 U fcmovu %st(1), %st(0) +# CHECK-NEXT: 1 9 4.50 U fcmovb %st(1), %st +# CHECK-NEXT: 1 9 4.50 U fcmovbe %st(1), %st +# CHECK-NEXT: 1 9 4.50 U fcmove %st(1), %st +# CHECK-NEXT: 1 9 4.50 U fcmovnb %st(1), %st +# CHECK-NEXT: 1 9 4.50 U fcmovnbe %st(1), %st +# CHECK-NEXT: 1 9 4.50 U fcmovne %st(1), %st +# CHECK-NEXT: 1 9 4.50 U fcmovnu %st(1), %st +# CHECK-NEXT: 1 9 4.50 U fcmovu %st(1), %st # CHECK-NEXT: 1 5 5.00 U fcom %st(1) # CHECK-NEXT: 1 5 5.00 U fcom %st(3) # CHECK-NEXT: 1 5 5.00 U fcoms (%ecx) @@ -237,24 +237,24 @@ fyl2xp1 # CHECK-NEXT: 1 5 5.00 U fcomps (%ecx) # CHECK-NEXT: 1 5 5.00 U fcompl (%eax) # CHECK-NEXT: 1 1 1.00 U fcompp -# CHECK-NEXT: 1 9 4.50 U fcomi %st(3) -# CHECK-NEXT: 1 9 4.50 U fcompi %st(3) +# CHECK-NEXT: 1 9 4.50 U fcomi %st(3), %st +# CHECK-NEXT: 1 9 4.50 U fcompi %st(3), %st # CHECK-NEXT: 1 174 87.00 U fcos # CHECK-NEXT: 1 1 0.50 U fdecstp -# CHECK-NEXT: 1 34 17.00 U fdiv %st(0), %st(1) -# CHECK-NEXT: 1 34 17.00 U fdiv %st(2) +# CHECK-NEXT: 1 34 17.00 U fdiv %st, %st(1) +# CHECK-NEXT: 1 34 17.00 U fdiv %st(2), %st # CHECK-NEXT: 1 34 17.00 * U fdivs (%ecx) # CHECK-NEXT: 1 34 17.00 * U fdivl (%eax) -# CHECK-NEXT: 1 34 17.00 U fdivp %st(1) -# CHECK-NEXT: 1 34 17.00 U fdivp %st(2) +# CHECK-NEXT: 1 34 17.00 U fdivp %st, %st(1) +# CHECK-NEXT: 1 34 17.00 U fdivp %st, %st(2) # CHECK-NEXT: 1 34 17.00 * U fidivs (%ecx) # CHECK-NEXT: 1 34 17.00 * U fidivl (%eax) -# CHECK-NEXT: 1 34 17.00 U fdivr %st(0), %st(1) -# CHECK-NEXT: 1 34 17.00 U fdivr %st(2) +# CHECK-NEXT: 1 34 17.00 U fdivr %st, %st(1) +# CHECK-NEXT: 1 34 17.00 U fdivr %st(2), %st # CHECK-NEXT: 1 34 17.00 * U fdivrs (%ecx) # CHECK-NEXT: 1 34 17.00 * U fdivrl (%eax) -# CHECK-NEXT: 1 34 17.00 U fdivrp %st(1) -# CHECK-NEXT: 1 34 17.00 U fdivrp %st(2) +# CHECK-NEXT: 1 34 17.00 U fdivrp %st, %st(1) +# CHECK-NEXT: 1 34 17.00 U fdivrp %st, %st(2) # CHECK-NEXT: 1 34 17.00 * U fidivrs (%ecx) # CHECK-NEXT: 1 34 17.00 * U fidivrl (%eax) # CHECK-NEXT: 1 1 0.50 U ffree %st(0) @@ -288,12 +288,12 @@ fyl2xp1 # CHECK-NEXT: 1 10 5.00 U fldln2 # CHECK-NEXT: 1 10 5.00 U fldpi # CHECK-NEXT: 1 1 0.50 U fldz -# CHECK-NEXT: 1 4 4.00 U fmul %st(0), %st(1) -# CHECK-NEXT: 1 4 4.00 U fmul %st(2) +# CHECK-NEXT: 1 4 4.00 U fmul %st, %st(1) +# CHECK-NEXT: 1 4 4.00 U fmul %st(2), %st # CHECK-NEXT: 1 4 4.00 * U fmuls (%ecx) # CHECK-NEXT: 1 4 4.00 * U fmull (%eax) -# CHECK-NEXT: 1 4 4.00 U fmulp %st(1) -# CHECK-NEXT: 1 4 4.00 U fmulp %st(2) +# CHECK-NEXT: 1 4 4.00 U fmulp %st, %st(1) +# CHECK-NEXT: 1 4 4.00 U fmulp %st, %st(2) # CHECK-NEXT: 1 4 4.00 * U fimuls (%ecx) # CHECK-NEXT: 1 4 4.00 * U fimull (%eax) # CHECK-NEXT: 1 1 0.50 U fnop @@ -321,20 +321,20 @@ fyl2xp1 # CHECK-NEXT: 1 100 0.50 U frstor (%eax) # CHECK-NEXT: 1 1 0.50 U wait # CHECK-NEXT: 1 100 0.50 U fnsave (%eax) -# CHECK-NEXT: 1 5 5.00 U fsub %st(0), %st(1) -# CHECK-NEXT: 1 5 5.00 U fsub %st(2) +# CHECK-NEXT: 1 5 5.00 U fsub %st, %st(1) +# CHECK-NEXT: 1 5 5.00 U fsub %st(2), %st # CHECK-NEXT: 1 5 5.00 * U fsubs (%ecx) # CHECK-NEXT: 1 5 5.00 * U fsubl (%eax) -# CHECK-NEXT: 1 5 5.00 U fsubp %st(1) -# CHECK-NEXT: 1 5 5.00 U fsubp %st(2) +# CHECK-NEXT: 1 5 5.00 U fsubp %st, %st(1) +# CHECK-NEXT: 1 5 5.00 U fsubp %st, %st(2) # CHECK-NEXT: 1 5 5.00 * U fisubs (%ecx) # CHECK-NEXT: 1 5 5.00 * U fisubl (%eax) -# CHECK-NEXT: 1 5 5.00 U fsubr %st(0), %st(1) -# CHECK-NEXT: 1 5 5.00 U fsubr %st(2) +# CHECK-NEXT: 1 5 5.00 U fsubr %st, %st(1) +# CHECK-NEXT: 1 5 5.00 U fsubr %st(2), %st # CHECK-NEXT: 1 5 5.00 * U fsubrs (%ecx) # CHECK-NEXT: 1 5 5.00 * U fsubrl (%eax) -# CHECK-NEXT: 1 5 5.00 U fsubrp %st(1) -# CHECK-NEXT: 1 5 5.00 U fsubrp %st(2) +# CHECK-NEXT: 1 5 5.00 U fsubrp %st, %st(1) +# CHECK-NEXT: 1 5 5.00 U fsubrp %st, %st(2) # CHECK-NEXT: 1 5 5.00 * U fisubrs (%ecx) # CHECK-NEXT: 1 5 5.00 * U fisubrl (%eax) # CHECK-NEXT: 1 9 4.50 U ftst @@ -343,8 +343,8 @@ fyl2xp1 # CHECK-NEXT: 1 1 1.00 U fucomp %st(1) # CHECK-NEXT: 1 1 1.00 U fucomp %st(3) # CHECK-NEXT: 1 1 1.00 U fucompp -# CHECK-NEXT: 1 9 4.50 U fucomi %st(3) -# CHECK-NEXT: 1 9 4.50 U fucompi %st(3) +# CHECK-NEXT: 1 9 4.50 U fucomi %st(3), %st +# CHECK-NEXT: 1 9 4.50 U fucompi %st(3), %st # CHECK-NEXT: 1 1 0.50 U wait # CHECK-NEXT: 1 1 1.00 U fxam # CHECK-NEXT: 1 1 1.00 U fxch %st(1) @@ -367,26 +367,26 @@ fyl2xp1 # CHECK-NEXT: [0] [1] Instructions: # CHECK-NEXT: 49.50 49.50 f2xm1 # CHECK-NEXT: - 1.00 fabs -# CHECK-NEXT: 5.00 - fadd %st(0), %st(1) -# CHECK-NEXT: 5.00 - fadd %st(2) +# CHECK-NEXT: 5.00 - fadd %st, %st(1) +# CHECK-NEXT: 5.00 - fadd %st(2), %st # CHECK-NEXT: 5.00 - fadds (%ecx) # CHECK-NEXT: 5.00 - faddl (%ecx) -# CHECK-NEXT: 5.00 - faddp %st(1) -# CHECK-NEXT: 5.00 - faddp %st(2) +# CHECK-NEXT: 5.00 - faddp %st, %st(1) +# CHECK-NEXT: 5.00 - faddp %st, %st(2) # CHECK-NEXT: 5.00 - fiadds (%ecx) # CHECK-NEXT: 5.00 - fiaddl (%ecx) # CHECK-NEXT: 0.50 0.50 fbld (%ecx) # CHECK-NEXT: 0.50 0.50 fbstp (%eax) # CHECK-NEXT: - 1.00 fchs # CHECK-NEXT: 12.50 12.50 fnclex -# CHECK-NEXT: 4.50 4.50 fcmovb %st(1), %st(0) -# CHECK-NEXT: 4.50 4.50 fcmovbe %st(1), %st(0) -# CHECK-NEXT: 4.50 4.50 fcmove %st(1), %st(0) -# CHECK-NEXT: 4.50 4.50 fcmovnb %st(1), %st(0) -# CHECK-NEXT: 4.50 4.50 fcmovnbe %st(1), %st(0) -# CHECK-NEXT: 4.50 4.50 fcmovne %st(1), %st(0) -# CHECK-NEXT: 4.50 4.50 fcmovnu %st(1), %st(0) -# CHECK-NEXT: 4.50 4.50 fcmovu %st(1), %st(0) +# CHECK-NEXT: 4.50 4.50 fcmovb %st(1), %st +# CHECK-NEXT: 4.50 4.50 fcmovbe %st(1), %st +# CHECK-NEXT: 4.50 4.50 fcmove %st(1), %st +# CHECK-NEXT: 4.50 4.50 fcmovnb %st(1), %st +# CHECK-NEXT: 4.50 4.50 fcmovnbe %st(1), %st +# CHECK-NEXT: 4.50 4.50 fcmovne %st(1), %st +# CHECK-NEXT: 4.50 4.50 fcmovnu %st(1), %st +# CHECK-NEXT: 4.50 4.50 fcmovu %st(1), %st # CHECK-NEXT: 5.00 - fcom %st(1) # CHECK-NEXT: 5.00 - fcom %st(3) # CHECK-NEXT: 5.00 - fcoms (%ecx) @@ -396,24 +396,24 @@ fyl2xp1 # CHECK-NEXT: 5.00 - fcomps (%ecx) # CHECK-NEXT: 5.00 - fcompl (%eax) # CHECK-NEXT: - 1.00 fcompp -# CHECK-NEXT: 4.50 4.50 fcomi %st(3) -# CHECK-NEXT: 4.50 4.50 fcompi %st(3) +# CHECK-NEXT: 4.50 4.50 fcomi %st(3), %st +# CHECK-NEXT: 4.50 4.50 fcompi %st(3), %st # CHECK-NEXT: 87.00 87.00 fcos # CHECK-NEXT: 0.50 0.50 fdecstp -# CHECK-NEXT: 17.00 17.00 fdiv %st(0), %st(1) -# CHECK-NEXT: 17.00 17.00 fdiv %st(2) +# CHECK-NEXT: 17.00 17.00 fdiv %st, %st(1) +# CHECK-NEXT: 17.00 17.00 fdiv %st(2), %st # CHECK-NEXT: 17.00 17.00 fdivs (%ecx) # CHECK-NEXT: 17.00 17.00 fdivl (%eax) -# CHECK-NEXT: 17.00 17.00 fdivp %st(1) -# CHECK-NEXT: 17.00 17.00 fdivp %st(2) +# CHECK-NEXT: 17.00 17.00 fdivp %st, %st(1) +# CHECK-NEXT: 17.00 17.00 fdivp %st, %st(2) # CHECK-NEXT: 17.00 17.00 fidivs (%ecx) # CHECK-NEXT: 17.00 17.00 fidivl (%eax) -# CHECK-NEXT: 17.00 17.00 fdivr %st(0), %st(1) -# CHECK-NEXT: 17.00 17.00 fdivr %st(2) +# CHECK-NEXT: 17.00 17.00 fdivr %st, %st(1) +# CHECK-NEXT: 17.00 17.00 fdivr %st(2), %st # CHECK-NEXT: 17.00 17.00 fdivrs (%ecx) # CHECK-NEXT: 17.00 17.00 fdivrl (%eax) -# CHECK-NEXT: 17.00 17.00 fdivrp %st(1) -# CHECK-NEXT: 17.00 17.00 fdivrp %st(2) +# CHECK-NEXT: 17.00 17.00 fdivrp %st, %st(1) +# CHECK-NEXT: 17.00 17.00 fdivrp %st, %st(2) # CHECK-NEXT: 17.00 17.00 fidivrs (%ecx) # CHECK-NEXT: 17.00 17.00 fidivrl (%eax) # CHECK-NEXT: 0.50 0.50 ffree %st(0) @@ -447,12 +447,12 @@ fyl2xp1 # CHECK-NEXT: 5.00 5.00 fldln2 # CHECK-NEXT: 5.00 5.00 fldpi # CHECK-NEXT: 0.50 0.50 fldz -# CHECK-NEXT: 4.00 - fmul %st(0), %st(1) -# CHECK-NEXT: 4.00 - fmul %st(2) +# CHECK-NEXT: 4.00 - fmul %st, %st(1) +# CHECK-NEXT: 4.00 - fmul %st(2), %st # CHECK-NEXT: 4.00 - fmuls (%ecx) # CHECK-NEXT: 4.00 - fmull (%eax) -# CHECK-NEXT: 4.00 - fmulp %st(1) -# CHECK-NEXT: 4.00 - fmulp %st(2) +# CHECK-NEXT: 4.00 - fmulp %st, %st(1) +# CHECK-NEXT: 4.00 - fmulp %st, %st(2) # CHECK-NEXT: 4.00 - fimuls (%ecx) # CHECK-NEXT: 4.00 - fimull (%eax) # CHECK-NEXT: 0.50 0.50 fnop @@ -480,20 +480,20 @@ fyl2xp1 # CHECK-NEXT: 0.50 0.50 frstor (%eax) # CHECK-NEXT: 0.50 0.50 wait # CHECK-NEXT: 0.50 0.50 fnsave (%eax) -# CHECK-NEXT: 5.00 - fsub %st(0), %st(1) -# CHECK-NEXT: 5.00 - fsub %st(2) +# CHECK-NEXT: 5.00 - fsub %st, %st(1) +# CHECK-NEXT: 5.00 - fsub %st(2), %st # CHECK-NEXT: 5.00 - fsubs (%ecx) # CHECK-NEXT: 5.00 - fsubl (%eax) -# CHECK-NEXT: 5.00 - fsubp %st(1) -# CHECK-NEXT: 5.00 - fsubp %st(2) +# CHECK-NEXT: 5.00 - fsubp %st, %st(1) +# CHECK-NEXT: 5.00 - fsubp %st, %st(2) # CHECK-NEXT: 5.00 - fisubs (%ecx) # CHECK-NEXT: 5.00 - fisubl (%eax) -# CHECK-NEXT: 5.00 - fsubr %st(0), %st(1) -# CHECK-NEXT: 5.00 - fsubr %st(2) +# CHECK-NEXT: 5.00 - fsubr %st, %st(1) +# CHECK-NEXT: 5.00 - fsubr %st(2), %st # CHECK-NEXT: 5.00 - fsubrs (%ecx) # CHECK-NEXT: 5.00 - fsubrl (%eax) -# CHECK-NEXT: 5.00 - fsubrp %st(1) -# CHECK-NEXT: 5.00 - fsubrp %st(2) +# CHECK-NEXT: 5.00 - fsubrp %st, %st(1) +# CHECK-NEXT: 5.00 - fsubrp %st, %st(2) # CHECK-NEXT: 5.00 - fisubrs (%ecx) # CHECK-NEXT: 5.00 - fisubrl (%eax) # CHECK-NEXT: 4.50 4.50 ftst @@ -502,8 +502,8 @@ fyl2xp1 # CHECK-NEXT: - 1.00 fucomp %st(1) # CHECK-NEXT: - 1.00 fucomp %st(3) # CHECK-NEXT: - 1.00 fucompp -# CHECK-NEXT: 4.50 4.50 fucomi %st(3) -# CHECK-NEXT: 4.50 4.50 fucompi %st(3) +# CHECK-NEXT: 4.50 4.50 fucomi %st(3), %st +# CHECK-NEXT: 4.50 4.50 fucompi %st(3), %st # CHECK-NEXT: 0.50 0.50 wait # CHECK-NEXT: 1.00 - fxam # CHECK-NEXT: 1.00 1.00 fxch %st(1) diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/resources-x87.s b/llvm/test/tools/llvm-mca/X86/BdVer2/resources-x87.s index ad72714c74c1bb..4cdddf01104b04 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/resources-x87.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/resources-x87.s @@ -5,7 +5,7 @@ f2xm1 fabs -fadd %st(0), %st(1) +fadd %st, %st(1) fadd %st(2) fadds (%ecx) faddl (%ecx) @@ -21,14 +21,14 @@ fchs fnclex -fcmovb %st(1), %st(0) -fcmovbe %st(1), %st(0) -fcmove %st(1), %st(0) -fcmovnb %st(1), %st(0) -fcmovnbe %st(1), %st(0) -fcmovne %st(1), %st(0) -fcmovnu %st(1), %st(0) -fcmovu %st(1), %st(0) +fcmovb %st(1), %st +fcmovbe %st(1), %st +fcmove %st(1), %st +fcmovnb %st(1), %st +fcmovnbe %st(1), %st +fcmovne %st(1), %st +fcmovnu %st(1), %st +fcmovu %st(1), %st fcom %st(1) fcom %st(3) @@ -47,7 +47,7 @@ fcos fdecstp -fdiv %st(0), %st(1) +fdiv %st, %st(1) fdiv %st(2) fdivs (%ecx) fdivl (%eax) @@ -56,7 +56,7 @@ fdivp %st(2) fidivs (%ecx) fidivl (%eax) -fdivr %st(0), %st(1) +fdivr %st, %st(1) fdivr %st(2) fdivrs (%ecx) fdivrl (%eax) @@ -106,7 +106,7 @@ fldln2 fldpi fldz -fmul %st(0), %st(1) +fmul %st, %st(1) fmul %st(2) fmuls (%ecx) fmull (%eax) @@ -153,7 +153,7 @@ fnstsw (%eax) frstor (%eax) fsave (%eax) -fsub %st(0), %st(1) +fsub %st, %st(1) fsub %st(2) fsubs (%ecx) fsubl (%eax) @@ -162,7 +162,7 @@ fsubp %st(2) fisubs (%ecx) fisubl (%eax) -fsubr %st(0), %st(1) +fsubr %st, %st(1) fsubr %st(2) fsubrs (%ecx) fsubrl (%eax) @@ -208,26 +208,26 @@ fyl2xp1 # CHECK: [1] [2] [3] [4] [5] [6] Instructions: # CHECK-NEXT: 1 100 0.50 U f2xm1 # CHECK-NEXT: 1 1 1.00 U fabs -# CHECK-NEXT: 1 5 1.00 U fadd %st(0), %st(1) -# CHECK-NEXT: 1 5 1.00 U fadd %st(2) +# CHECK-NEXT: 1 5 1.00 U fadd %st, %st(1) +# CHECK-NEXT: 1 5 1.00 U fadd %st(2), %st # CHECK-NEXT: 1 10 1.00 * U fadds (%ecx) # CHECK-NEXT: 1 10 1.00 * U faddl (%ecx) -# CHECK-NEXT: 1 5 1.00 U faddp %st(1) -# CHECK-NEXT: 1 5 1.00 U faddp %st(2) +# CHECK-NEXT: 1 5 1.00 U faddp %st, %st(1) +# CHECK-NEXT: 1 5 1.00 U faddp %st, %st(2) # CHECK-NEXT: 1 10 1.00 * U fiadds (%ecx) # CHECK-NEXT: 1 10 1.00 * U fiaddl (%ecx) # CHECK-NEXT: 1 100 0.50 U fbld (%ecx) # CHECK-NEXT: 1 100 0.50 U fbstp (%eax) # CHECK-NEXT: 1 1 1.00 U fchs # CHECK-NEXT: 1 100 0.50 U fnclex -# CHECK-NEXT: 1 1 1.00 U fcmovb %st(1), %st(0) -# CHECK-NEXT: 1 1 1.00 U fcmovbe %st(1), %st(0) -# CHECK-NEXT: 1 1 1.00 U fcmove %st(1), %st(0) -# CHECK-NEXT: 1 1 1.00 U fcmovnb %st(1), %st(0) -# CHECK-NEXT: 1 1 1.00 U fcmovnbe %st(1), %st(0) -# CHECK-NEXT: 1 1 1.00 U fcmovne %st(1), %st(0) -# CHECK-NEXT: 1 1 1.00 U fcmovnu %st(1), %st(0) -# CHECK-NEXT: 1 1 1.00 U fcmovu %st(1), %st(0) +# CHECK-NEXT: 1 1 1.00 U fcmovb %st(1), %st +# CHECK-NEXT: 1 1 1.00 U fcmovbe %st(1), %st +# CHECK-NEXT: 1 1 1.00 U fcmove %st(1), %st +# CHECK-NEXT: 1 1 1.00 U fcmovnb %st(1), %st +# CHECK-NEXT: 1 1 1.00 U fcmovnbe %st(1), %st +# CHECK-NEXT: 1 1 1.00 U fcmovne %st(1), %st +# CHECK-NEXT: 1 1 1.00 U fcmovnu %st(1), %st +# CHECK-NEXT: 1 1 1.00 U fcmovu %st(1), %st # CHECK-NEXT: 2 1 1.00 U fcom %st(1) # CHECK-NEXT: 2 1 1.00 U fcom %st(3) # CHECK-NEXT: 1 6 1.00 U fcoms (%ecx) @@ -237,24 +237,24 @@ fyl2xp1 # CHECK-NEXT: 1 6 1.00 U fcomps (%ecx) # CHECK-NEXT: 1 6 1.00 U fcompl (%eax) # CHECK-NEXT: 1 100 0.50 U fcompp -# CHECK-NEXT: 2 1 1.00 U fcomi %st(3) -# CHECK-NEXT: 2 1 1.00 U fcompi %st(3) +# CHECK-NEXT: 2 1 1.00 U fcomi %st(3), %st +# CHECK-NEXT: 2 1 1.00 U fcompi %st(3), %st # CHECK-NEXT: 1 100 0.50 U fcos # CHECK-NEXT: 1 100 0.50 U fdecstp -# CHECK-NEXT: 1 9 9.50 U fdiv %st(0), %st(1) -# CHECK-NEXT: 1 9 9.50 U fdiv %st(2) +# CHECK-NEXT: 1 9 9.50 U fdiv %st, %st(1) +# CHECK-NEXT: 1 9 9.50 U fdiv %st(2), %st # CHECK-NEXT: 1 14 9.50 * U fdivs (%ecx) # CHECK-NEXT: 1 14 9.50 * U fdivl (%eax) -# CHECK-NEXT: 1 9 9.50 U fdivp %st(1) -# CHECK-NEXT: 1 9 9.50 U fdivp %st(2) +# CHECK-NEXT: 1 9 9.50 U fdivp %st, %st(1) +# CHECK-NEXT: 1 9 9.50 U fdivp %st, %st(2) # CHECK-NEXT: 1 14 9.50 * U fidivs (%ecx) # CHECK-NEXT: 1 14 9.50 * U fidivl (%eax) -# CHECK-NEXT: 1 9 9.50 U fdivr %st(0), %st(1) -# CHECK-NEXT: 1 9 9.50 U fdivr %st(2) +# CHECK-NEXT: 1 9 9.50 U fdivr %st, %st(1) +# CHECK-NEXT: 1 9 9.50 U fdivr %st(2), %st # CHECK-NEXT: 1 14 9.50 * U fdivrs (%ecx) # CHECK-NEXT: 1 14 9.50 * U fdivrl (%eax) -# CHECK-NEXT: 1 9 9.50 U fdivrp %st(1) -# CHECK-NEXT: 1 9 9.50 U fdivrp %st(2) +# CHECK-NEXT: 1 9 9.50 U fdivrp %st, %st(1) +# CHECK-NEXT: 1 9 9.50 U fdivrp %st, %st(2) # CHECK-NEXT: 1 14 9.50 * U fidivrs (%ecx) # CHECK-NEXT: 1 14 9.50 * U fidivrl (%eax) # CHECK-NEXT: 1 100 0.50 U ffree %st(0) @@ -288,12 +288,12 @@ fyl2xp1 # CHECK-NEXT: 1 3 1.00 U fldln2 # CHECK-NEXT: 1 3 1.00 U fldpi # CHECK-NEXT: 1 3 1.00 U fldz -# CHECK-NEXT: 1 5 1.00 U fmul %st(0), %st(1) -# CHECK-NEXT: 1 5 1.00 U fmul %st(2) +# CHECK-NEXT: 1 5 1.00 U fmul %st, %st(1) +# CHECK-NEXT: 1 5 1.00 U fmul %st(2), %st # CHECK-NEXT: 1 10 1.00 * U fmuls (%ecx) # CHECK-NEXT: 1 10 1.00 * U fmull (%eax) -# CHECK-NEXT: 1 5 1.00 U fmulp %st(1) -# CHECK-NEXT: 1 5 1.00 U fmulp %st(2) +# CHECK-NEXT: 1 5 1.00 U fmulp %st, %st(1) +# CHECK-NEXT: 1 5 1.00 U fmulp %st, %st(2) # CHECK-NEXT: 1 10 1.00 * U fimuls (%ecx) # CHECK-NEXT: 1 10 1.00 * U fimull (%eax) # CHECK-NEXT: 1 1 0.50 U fnop @@ -321,20 +321,20 @@ fyl2xp1 # CHECK-NEXT: 1 100 0.50 U frstor (%eax) # CHECK-NEXT: 1 100 0.50 U wait # CHECK-NEXT: 1 100 0.50 U fnsave (%eax) -# CHECK-NEXT: 1 5 1.00 U fsub %st(0), %st(1) -# CHECK-NEXT: 1 5 1.00 U fsub %st(2) +# CHECK-NEXT: 1 5 1.00 U fsub %st, %st(1) +# CHECK-NEXT: 1 5 1.00 U fsub %st(2), %st # CHECK-NEXT: 1 10 1.00 * U fsubs (%ecx) # CHECK-NEXT: 1 10 1.00 * U fsubl (%eax) -# CHECK-NEXT: 1 5 1.00 U fsubp %st(1) -# CHECK-NEXT: 1 5 1.00 U fsubp %st(2) +# CHECK-NEXT: 1 5 1.00 U fsubp %st, %st(1) +# CHECK-NEXT: 1 5 1.00 U fsubp %st, %st(2) # CHECK-NEXT: 1 10 1.00 * U fisubs (%ecx) # CHECK-NEXT: 1 10 1.00 * U fisubl (%eax) -# CHECK-NEXT: 1 5 1.00 U fsubr %st(0), %st(1) -# CHECK-NEXT: 1 5 1.00 U fsubr %st(2) +# CHECK-NEXT: 1 5 1.00 U fsubr %st, %st(1) +# CHECK-NEXT: 1 5 1.00 U fsubr %st(2), %st # CHECK-NEXT: 1 10 1.00 * U fsubrs (%ecx) # CHECK-NEXT: 1 10 1.00 * U fsubrl (%eax) -# CHECK-NEXT: 1 5 1.00 U fsubrp %st(1) -# CHECK-NEXT: 1 5 1.00 U fsubrp %st(2) +# CHECK-NEXT: 1 5 1.00 U fsubrp %st, %st(1) +# CHECK-NEXT: 1 5 1.00 U fsubrp %st, %st(2) # CHECK-NEXT: 1 10 1.00 * U fisubrs (%ecx) # CHECK-NEXT: 1 10 1.00 * U fisubrl (%eax) # CHECK-NEXT: 1 1 1.00 U ftst @@ -343,8 +343,8 @@ fyl2xp1 # CHECK-NEXT: 2 1 1.00 U fucomp %st(1) # CHECK-NEXT: 2 1 1.00 U fucomp %st(3) # CHECK-NEXT: 1 1 1.00 U fucompp -# CHECK-NEXT: 2 1 1.00 U fucomi %st(3) -# CHECK-NEXT: 2 1 1.00 U fucompi %st(3) +# CHECK-NEXT: 2 1 1.00 U fucomi %st(3), %st +# CHECK-NEXT: 2 1 1.00 U fucompi %st(3), %st # CHECK-NEXT: 1 100 0.50 U wait # CHECK-NEXT: 1 100 0.50 U fxam # CHECK-NEXT: 1 1 0.50 U fxch %st(1) @@ -388,26 +388,26 @@ fyl2xp1 # CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16.0] [16.1] [17] [18] Instructions: # CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - f2xm1 # CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - 1.00 - - - - - - - fabs -# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 1.00 - - - - - - - - fadd %st(0), %st(1) -# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 1.00 - - - - - - - - fadd %st(2) +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 1.00 - - - - - - - - fadd %st, %st(1) +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 1.00 - - - - - - - - fadd %st(2), %st # CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - - - 1.00 - - - - 0.50 0.50 - - fadds (%ecx) # CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - - - 1.00 - - - - 0.50 0.50 - - faddl (%ecx) -# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 1.00 - - - - - - - - faddp %st(1) -# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 1.00 - - - - - - - - faddp %st(2) +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 1.00 - - - - - - - - faddp %st, %st(1) +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 1.00 - - - - - - - - faddp %st, %st(2) # CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - - - 1.00 - - - - 0.50 0.50 - - fiadds (%ecx) # CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - - - 1.00 - - - - 0.50 0.50 - - fiaddl (%ecx) # CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - fbld (%ecx) # CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - fbstp (%eax) # CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - 1.00 - - - - - - - fchs # CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - fnclex -# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 1.00 - - - - - - - - fcmovb %st(1), %st(0) -# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 1.00 - - - - - - - - fcmovbe %st(1), %st(0) -# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 1.00 - - - - - - - - fcmove %st(1), %st(0) -# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 1.00 - - - - - - - - fcmovnb %st(1), %st(0) -# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 1.00 - - - - - - - - fcmovnbe %st(1), %st(0) -# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 1.00 - - - - - - - - fcmovne %st(1), %st(0) -# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 1.00 - - - - - - - - fcmovnu %st(1), %st(0) -# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 1.00 - - - - - - - - fcmovu %st(1), %st(0) +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 1.00 - - - - - - - - fcmovb %st(1), %st +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 1.00 - - - - - - - - fcmovbe %st(1), %st +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 1.00 - - - - - - - - fcmove %st(1), %st +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 1.00 - - - - - - - - fcmovnb %st(1), %st +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 1.00 - - - - - - - - fcmovnbe %st(1), %st +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 1.00 - - - - - - - - fcmovne %st(1), %st +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 1.00 - - - - - - - - fcmovnu %st(1), %st +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 1.00 - - - - - - - - fcmovu %st(1), %st # CHECK-NEXT: - - - - - 1.00 - - 0.50 0.50 - - - - 1.00 - - - - - - - - fcom %st(1) # CHECK-NEXT: - - - - - 1.00 - - 0.50 0.50 - - - - 1.00 - - - - - - - - fcom %st(3) # CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - 1.00 - - - - - - - fcoms (%ecx) @@ -417,24 +417,24 @@ fyl2xp1 # CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - 1.00 - - - - - - - fcomps (%ecx) # CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - 1.00 - - - - - - - fcompl (%eax) # CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - fcompp -# CHECK-NEXT: - - - - - 1.00 - - 0.50 0.50 - - - - 1.00 - - - - - - - - fcomi %st(3) -# CHECK-NEXT: - - - - - 1.00 - - 0.50 0.50 - - - - 1.00 - - - - - - - - fcompi %st(3) +# CHECK-NEXT: - - - - - 1.00 - - 0.50 0.50 - - - - 1.00 - - - - - - - - fcomi %st(3), %st +# CHECK-NEXT: - - - - - 1.00 - - 0.50 0.50 - - - - 1.00 - - - - - - - - fcompi %st(3), %st # CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - fcos # CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - fdecstp -# CHECK-NEXT: - - - - - - - - 9.50 9.50 - - - - - 1.00 - - - - - - - fdiv %st(0), %st(1) -# CHECK-NEXT: - - - - - - - - 9.50 9.50 - - - - - 1.00 - - - - - - - fdiv %st(2) +# CHECK-NEXT: - - - - - - - - 9.50 9.50 - - - - - 1.00 - - - - - - - fdiv %st, %st(1) +# CHECK-NEXT: - - - - - - - - 9.50 9.50 - - - - - 1.00 - - - - - - - fdiv %st(2), %st # CHECK-NEXT: 0.50 0.50 - - - - - - 9.50 9.50 - - - - - 1.00 - - - 0.50 0.50 - - fdivs (%ecx) # CHECK-NEXT: 0.50 0.50 - - - - - - 9.50 9.50 - - - - - 1.00 - - - 0.50 0.50 - - fdivl (%eax) -# CHECK-NEXT: - - - - - - - - 9.50 9.50 - - - - - 1.00 - - - - - - - fdivp %st(1) -# CHECK-NEXT: - - - - - - - - 9.50 9.50 - - - - - 1.00 - - - - - - - fdivp %st(2) +# CHECK-NEXT: - - - - - - - - 9.50 9.50 - - - - - 1.00 - - - - - - - fdivp %st, %st(1) +# CHECK-NEXT: - - - - - - - - 9.50 9.50 - - - - - 1.00 - - - - - - - fdivp %st, %st(2) # CHECK-NEXT: 0.50 0.50 - - - - - - 9.50 9.50 - - - - - 1.00 - - - 0.50 0.50 - - fidivs (%ecx) # CHECK-NEXT: 0.50 0.50 - - - - - - 9.50 9.50 - - - - - 1.00 - - - 0.50 0.50 - - fidivl (%eax) -# CHECK-NEXT: - - - - - - - - 9.50 9.50 - - - - - 1.00 - - - - - - - fdivr %st(0), %st(1) -# CHECK-NEXT: - - - - - - - - 9.50 9.50 - - - - - 1.00 - - - - - - - fdivr %st(2) +# CHECK-NEXT: - - - - - - - - 9.50 9.50 - - - - - 1.00 - - - - - - - fdivr %st, %st(1) +# CHECK-NEXT: - - - - - - - - 9.50 9.50 - - - - - 1.00 - - - - - - - fdivr %st(2), %st # CHECK-NEXT: 0.50 0.50 - - - - - - 9.50 9.50 - - - - - 1.00 - - - 0.50 0.50 - - fdivrs (%ecx) # CHECK-NEXT: 0.50 0.50 - - - - - - 9.50 9.50 - - - - - 1.00 - - - 0.50 0.50 - - fdivrl (%eax) -# CHECK-NEXT: - - - - - - - - 9.50 9.50 - - - - - 1.00 - - - - - - - fdivrp %st(1) -# CHECK-NEXT: - - - - - - - - 9.50 9.50 - - - - - 1.00 - - - - - - - fdivrp %st(2) +# CHECK-NEXT: - - - - - - - - 9.50 9.50 - - - - - 1.00 - - - - - - - fdivrp %st, %st(1) +# CHECK-NEXT: - - - - - - - - 9.50 9.50 - - - - - 1.00 - - - - - - - fdivrp %st, %st(2) # CHECK-NEXT: 0.50 0.50 - - - - - - 9.50 9.50 - - - - - 1.00 - - - 0.50 0.50 - - fidivrs (%ecx) # CHECK-NEXT: 0.50 0.50 - - - - - - 9.50 9.50 - - - - - 1.00 - - - 0.50 0.50 - - fidivrl (%eax) # CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - ffree %st(0) @@ -468,12 +468,12 @@ fyl2xp1 # CHECK-NEXT: - - - - - - - - - - - - - 1.00 - 1.00 - - - - - - - fldln2 # CHECK-NEXT: - - - - - - - - - - - - - 1.00 - 1.00 - - - - - - - fldpi # CHECK-NEXT: - - - - - - - - - - - - - 1.00 - 1.00 - - - - - - - fldz -# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - 1.00 - - - - - - - fmul %st(0), %st(1) -# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - 1.00 - - - - - - - fmul %st(2) +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - 1.00 - - - - - - - fmul %st, %st(1) +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - 1.00 - - - - - - - fmul %st(2), %st # CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - - - - 1.00 - - - 0.50 0.50 - - fmuls (%ecx) # CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - - - - 1.00 - - - 0.50 0.50 - - fmull (%eax) -# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - 1.00 - - - - - - - fmulp %st(1) -# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - 1.00 - - - - - - - fmulp %st(2) +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - 1.00 - - - - - - - fmulp %st, %st(1) +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - 1.00 - - - - - - - fmulp %st, %st(2) # CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - - - - 1.00 - - - 0.50 0.50 - - fimuls (%ecx) # CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - - - - 1.00 - - - 0.50 0.50 - - fimull (%eax) # CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - fnop @@ -501,20 +501,20 @@ fyl2xp1 # CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - frstor (%eax) # CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - wait # CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - fnsave (%eax) -# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 1.00 - - - - - - - - fsub %st(0), %st(1) -# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 1.00 - - - - - - - - fsub %st(2) +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 1.00 - - - - - - - - fsub %st, %st(1) +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 1.00 - - - - - - - - fsub %st(2), %st # CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - - - 1.00 - - - - 0.50 0.50 - - fsubs (%ecx) # CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - - - 1.00 - - - - 0.50 0.50 - - fsubl (%eax) -# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 1.00 - - - - - - - - fsubp %st(1) -# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 1.00 - - - - - - - - fsubp %st(2) +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 1.00 - - - - - - - - fsubp %st, %st(1) +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 1.00 - - - - - - - - fsubp %st, %st(2) # CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - - - 1.00 - - - - 0.50 0.50 - - fisubs (%ecx) # CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - - - 1.00 - - - - 0.50 0.50 - - fisubl (%eax) -# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 1.00 - - - - - - - - fsubr %st(0), %st(1) -# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 1.00 - - - - - - - - fsubr %st(2) +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 1.00 - - - - - - - - fsubr %st, %st(1) +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 1.00 - - - - - - - - fsubr %st(2), %st # CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - - - 1.00 - - - - 0.50 0.50 - - fsubrs (%ecx) # CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - - - 1.00 - - - - 0.50 0.50 - - fsubrl (%eax) -# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 1.00 - - - - - - - - fsubrp %st(1) -# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 1.00 - - - - - - - - fsubrp %st(2) +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 1.00 - - - - - - - - fsubrp %st, %st(1) +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 1.00 - - - - - - - - fsubrp %st, %st(2) # CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - - - 1.00 - - - - 0.50 0.50 - - fisubrs (%ecx) # CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - - - 1.00 - - - - 0.50 0.50 - - fisubrl (%eax) # CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - 1.00 - - - - - - - ftst @@ -523,8 +523,8 @@ fyl2xp1 # CHECK-NEXT: - - - - - 1.00 - - 0.50 0.50 - - - - 1.00 - - - - - - - - fucomp %st(1) # CHECK-NEXT: - - - - - 1.00 - - 0.50 0.50 - - - - 1.00 - - - - - - - - fucomp %st(3) # CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - 1.00 - - - - - - - fucompp -# CHECK-NEXT: - - - - - 1.00 - - 0.50 0.50 - - - - 1.00 - - - - - - - - fucomi %st(3) -# CHECK-NEXT: - - - - - 1.00 - - 0.50 0.50 - - - - 1.00 - - - - - - - - fucompi %st(3) +# CHECK-NEXT: - - - - - 1.00 - - 0.50 0.50 - - - - 1.00 - - - - - - - - fucomi %st(3), %st +# CHECK-NEXT: - - - - - 1.00 - - 0.50 0.50 - - - - 1.00 - - - - - - - - fucompi %st(3), %st # CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - wait # CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - fxam # CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - fxch %st(1) diff --git a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-x87.s b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-x87.s index 5cb92be47eabb0..2f3a69da99d074 100644 --- a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-x87.s +++ b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-x87.s @@ -5,7 +5,7 @@ f2xm1 fabs -fadd %st(0), %st(1) +fadd %st, %st(1) fadd %st(2) fadds (%ecx) faddl (%ecx) @@ -21,14 +21,14 @@ fchs fnclex -fcmovb %st(1), %st(0) -fcmovbe %st(1), %st(0) -fcmove %st(1), %st(0) -fcmovnb %st(1), %st(0) -fcmovnbe %st(1), %st(0) -fcmovne %st(1), %st(0) -fcmovnu %st(1), %st(0) -fcmovu %st(1), %st(0) +fcmovb %st(1), %st +fcmovbe %st(1), %st +fcmove %st(1), %st +fcmovnb %st(1), %st +fcmovnbe %st(1), %st +fcmovne %st(1), %st +fcmovnu %st(1), %st +fcmovu %st(1), %st fcom %st(1) fcom %st(3) @@ -47,7 +47,7 @@ fcos fdecstp -fdiv %st(0), %st(1) +fdiv %st, %st(1) fdiv %st(2) fdivs (%ecx) fdivl (%eax) @@ -56,7 +56,7 @@ fdivp %st(2) fidivs (%ecx) fidivl (%eax) -fdivr %st(0), %st(1) +fdivr %st, %st(1) fdivr %st(2) fdivrs (%ecx) fdivrl (%eax) @@ -106,7 +106,7 @@ fldln2 fldpi fldz -fmul %st(0), %st(1) +fmul %st, %st(1) fmul %st(2) fmuls (%ecx) fmull (%eax) @@ -153,7 +153,7 @@ fnstsw (%eax) frstor (%eax) fsave (%eax) -fsub %st(0), %st(1) +fsub %st, %st(1) fsub %st(2) fsubs (%ecx) fsubl (%eax) @@ -162,7 +162,7 @@ fsubp %st(2) fisubs (%ecx) fisubl (%eax) -fsubr %st(0), %st(1) +fsubr %st, %st(1) fsubr %st(2) fsubrs (%ecx) fsubrl (%eax) @@ -208,26 +208,26 @@ fyl2xp1 # CHECK: [1] [2] [3] [4] [5] [6] Instructions: # CHECK-NEXT: 1 100 0.25 U f2xm1 # CHECK-NEXT: 1 1 1.00 U fabs -# CHECK-NEXT: 1 3 1.00 U fadd %st(0), %st(1) -# CHECK-NEXT: 1 3 1.00 U fadd %st(2) +# CHECK-NEXT: 1 3 1.00 U fadd %st, %st(1) +# CHECK-NEXT: 1 3 1.00 U fadd %st(2), %st # CHECK-NEXT: 2 9 1.00 * U fadds (%ecx) # CHECK-NEXT: 2 9 1.00 * U faddl (%ecx) -# CHECK-NEXT: 1 3 1.00 U faddp %st(1) -# CHECK-NEXT: 1 3 1.00 U faddp %st(2) +# CHECK-NEXT: 1 3 1.00 U faddp %st, %st(1) +# CHECK-NEXT: 1 3 1.00 U faddp %st, %st(2) # CHECK-NEXT: 3 12 2.00 * U fiadds (%ecx) # CHECK-NEXT: 3 12 2.00 * U fiaddl (%ecx) # CHECK-NEXT: 1 100 0.25 U fbld (%ecx) # CHECK-NEXT: 2 1 1.00 U fbstp (%eax) # CHECK-NEXT: 1 1 1.00 U fchs # CHECK-NEXT: 4 4 1.00 U fnclex -# CHECK-NEXT: 1 3 1.00 U fcmovb %st(1), %st(0) -# CHECK-NEXT: 1 3 1.00 U fcmovbe %st(1), %st(0) -# CHECK-NEXT: 1 3 1.00 U fcmove %st(1), %st(0) -# CHECK-NEXT: 1 3 1.00 U fcmovnb %st(1), %st(0) -# CHECK-NEXT: 1 3 1.00 U fcmovnbe %st(1), %st(0) -# CHECK-NEXT: 1 3 1.00 U fcmovne %st(1), %st(0) -# CHECK-NEXT: 1 3 1.00 U fcmovnu %st(1), %st(0) -# CHECK-NEXT: 1 3 1.00 U fcmovu %st(1), %st(0) +# CHECK-NEXT: 1 3 1.00 U fcmovb %st(1), %st +# CHECK-NEXT: 1 3 1.00 U fcmovbe %st(1), %st +# CHECK-NEXT: 1 3 1.00 U fcmove %st(1), %st +# CHECK-NEXT: 1 3 1.00 U fcmovnb %st(1), %st +# CHECK-NEXT: 1 3 1.00 U fcmovnbe %st(1), %st +# CHECK-NEXT: 1 3 1.00 U fcmovne %st(1), %st +# CHECK-NEXT: 1 3 1.00 U fcmovnu %st(1), %st +# CHECK-NEXT: 1 3 1.00 U fcmovu %st(1), %st # CHECK-NEXT: 1 1 1.00 U fcom %st(1) # CHECK-NEXT: 1 1 1.00 U fcom %st(3) # CHECK-NEXT: 2 7 1.00 U fcoms (%ecx) @@ -237,24 +237,24 @@ fyl2xp1 # CHECK-NEXT: 2 7 1.00 U fcomps (%ecx) # CHECK-NEXT: 2 7 1.00 U fcompl (%eax) # CHECK-NEXT: 1 100 0.25 U fcompp -# CHECK-NEXT: 1 3 1.00 U fcomi %st(3) -# CHECK-NEXT: 1 3 1.00 U fcompi %st(3) +# CHECK-NEXT: 1 3 1.00 U fcomi %st(3), %st +# CHECK-NEXT: 1 3 1.00 U fcompi %st(3), %st # CHECK-NEXT: 1 100 0.25 U fcos # CHECK-NEXT: 2 2 1.00 U fdecstp -# CHECK-NEXT: 1 15 1.00 U fdiv %st(0), %st(1) -# CHECK-NEXT: 1 20 1.00 U fdiv %st(2) +# CHECK-NEXT: 1 15 1.00 U fdiv %st, %st(1) +# CHECK-NEXT: 1 20 1.00 U fdiv %st(2), %st # CHECK-NEXT: 2 21 1.00 * U fdivs (%ecx) # CHECK-NEXT: 2 21 1.00 * U fdivl (%eax) -# CHECK-NEXT: 1 15 1.00 U fdivp %st(1) -# CHECK-NEXT: 1 15 1.00 U fdivp %st(2) +# CHECK-NEXT: 1 15 1.00 U fdivp %st, %st(1) +# CHECK-NEXT: 1 15 1.00 U fdivp %st, %st(2) # CHECK-NEXT: 3 24 1.00 * U fidivs (%ecx) # CHECK-NEXT: 3 24 1.00 * U fidivl (%eax) -# CHECK-NEXT: 1 20 1.00 U fdivr %st(0), %st(1) -# CHECK-NEXT: 1 15 1.00 U fdivr %st(2) +# CHECK-NEXT: 1 20 1.00 U fdivr %st, %st(1) +# CHECK-NEXT: 1 15 1.00 U fdivr %st(2), %st # CHECK-NEXT: 2 26 1.00 * U fdivrs (%ecx) # CHECK-NEXT: 2 26 1.00 * U fdivrl (%eax) -# CHECK-NEXT: 1 20 1.00 U fdivrp %st(1) -# CHECK-NEXT: 1 20 1.00 U fdivrp %st(2) +# CHECK-NEXT: 1 20 1.00 U fdivrp %st, %st(1) +# CHECK-NEXT: 1 20 1.00 U fdivrp %st, %st(2) # CHECK-NEXT: 3 29 1.00 * U fidivrs (%ecx) # CHECK-NEXT: 3 29 1.00 * U fidivrl (%eax) # CHECK-NEXT: 1 100 0.25 U ffree %st(0) @@ -288,12 +288,12 @@ fyl2xp1 # CHECK-NEXT: 2 1 1.00 U fldln2 # CHECK-NEXT: 2 1 1.00 U fldpi # CHECK-NEXT: 1 1 0.50 U fldz -# CHECK-NEXT: 1 5 1.00 U fmul %st(0), %st(1) -# CHECK-NEXT: 1 5 1.00 U fmul %st(2) +# CHECK-NEXT: 1 5 1.00 U fmul %st, %st(1) +# CHECK-NEXT: 1 5 1.00 U fmul %st(2), %st # CHECK-NEXT: 2 11 1.00 * U fmuls (%ecx) # CHECK-NEXT: 2 11 1.00 * U fmull (%eax) -# CHECK-NEXT: 1 5 1.00 U fmulp %st(1) -# CHECK-NEXT: 1 5 1.00 U fmulp %st(2) +# CHECK-NEXT: 1 5 1.00 U fmulp %st, %st(1) +# CHECK-NEXT: 1 5 1.00 U fmulp %st, %st(2) # CHECK-NEXT: 3 14 1.00 * U fimuls (%ecx) # CHECK-NEXT: 3 14 1.00 * U fimull (%eax) # CHECK-NEXT: 1 1 0.50 U fnop @@ -321,20 +321,20 @@ fyl2xp1 # CHECK-NEXT: 1 100 0.25 U frstor (%eax) # CHECK-NEXT: 2 2 0.50 U wait # CHECK-NEXT: 1 100 0.25 U fnsave (%eax) -# CHECK-NEXT: 1 3 1.00 U fsub %st(0), %st(1) -# CHECK-NEXT: 1 3 1.00 U fsub %st(2) +# CHECK-NEXT: 1 3 1.00 U fsub %st, %st(1) +# CHECK-NEXT: 1 3 1.00 U fsub %st(2), %st # CHECK-NEXT: 2 9 1.00 * U fsubs (%ecx) # CHECK-NEXT: 2 9 1.00 * U fsubl (%eax) -# CHECK-NEXT: 1 3 1.00 U fsubp %st(1) -# CHECK-NEXT: 1 3 1.00 U fsubp %st(2) +# CHECK-NEXT: 1 3 1.00 U fsubp %st, %st(1) +# CHECK-NEXT: 1 3 1.00 U fsubp %st, %st(2) # CHECK-NEXT: 3 12 2.00 * U fisubs (%ecx) # CHECK-NEXT: 3 12 2.00 * U fisubl (%eax) -# CHECK-NEXT: 1 3 1.00 U fsubr %st(0), %st(1) -# CHECK-NEXT: 1 3 1.00 U fsubr %st(2) +# CHECK-NEXT: 1 3 1.00 U fsubr %st, %st(1) +# CHECK-NEXT: 1 3 1.00 U fsubr %st(2), %st # CHECK-NEXT: 2 9 1.00 * U fsubrs (%ecx) # CHECK-NEXT: 2 9 1.00 * U fsubrl (%eax) -# CHECK-NEXT: 1 3 1.00 U fsubrp %st(1) -# CHECK-NEXT: 1 3 1.00 U fsubrp %st(2) +# CHECK-NEXT: 1 3 1.00 U fsubrp %st, %st(1) +# CHECK-NEXT: 1 3 1.00 U fsubrp %st, %st(2) # CHECK-NEXT: 3 12 2.00 * U fisubrs (%ecx) # CHECK-NEXT: 3 12 2.00 * U fisubrl (%eax) # CHECK-NEXT: 1 3 1.00 U ftst @@ -343,8 +343,8 @@ fyl2xp1 # CHECK-NEXT: 1 1 1.00 U fucomp %st(1) # CHECK-NEXT: 1 1 1.00 U fucomp %st(3) # CHECK-NEXT: 1 3 1.00 U fucompp -# CHECK-NEXT: 1 3 1.00 U fucomi %st(3) -# CHECK-NEXT: 1 3 1.00 U fucompi %st(3) +# CHECK-NEXT: 1 3 1.00 U fucomi %st(3), %st +# CHECK-NEXT: 1 3 1.00 U fucompi %st(3), %st # CHECK-NEXT: 2 2 0.50 U wait # CHECK-NEXT: 1 100 0.25 U fxam # CHECK-NEXT: 12 14 4.00 U fxch %st(1) @@ -375,26 +375,26 @@ fyl2xp1 # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: # CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - f2xm1 # CHECK-NEXT: - - - - - - - 1.00 - - fabs -# CHECK-NEXT: - - - 1.00 - - - - - - fadd %st(0), %st(1) -# CHECK-NEXT: - - - 1.00 - - - - - - fadd %st(2) +# CHECK-NEXT: - - - 1.00 - - - - - - fadd %st, %st(1) +# CHECK-NEXT: - - - 1.00 - - - - - - fadd %st(2), %st # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - fadds (%ecx) # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - faddl (%ecx) -# CHECK-NEXT: - - - 1.00 - - - - - - faddp %st(1) -# CHECK-NEXT: - - - 1.00 - - - - - - faddp %st(2) +# CHECK-NEXT: - - - 1.00 - - - - - - faddp %st, %st(1) +# CHECK-NEXT: - - - 1.00 - - - - - - faddp %st, %st(2) # CHECK-NEXT: - - - 2.00 0.50 0.50 - - - - fiadds (%ecx) # CHECK-NEXT: - - - 2.00 0.50 0.50 - - - - fiaddl (%ecx) # CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - fbld (%ecx) # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 fbstp (%eax) # CHECK-NEXT: - - - - - - - 1.00 - - fchs # CHECK-NEXT: - - 1.00 1.00 - - - 1.00 1.00 - fnclex -# CHECK-NEXT: - - - 1.00 - - - - - - fcmovb %st(1), %st(0) -# CHECK-NEXT: - - - 1.00 - - - - - - fcmovbe %st(1), %st(0) -# CHECK-NEXT: - - - 1.00 - - - - - - fcmove %st(1), %st(0) -# CHECK-NEXT: - - - 1.00 - - - - - - fcmovnb %st(1), %st(0) -# CHECK-NEXT: - - - 1.00 - - - - - - fcmovnbe %st(1), %st(0) -# CHECK-NEXT: - - - 1.00 - - - - - - fcmovne %st(1), %st(0) -# CHECK-NEXT: - - - 1.00 - - - - - - fcmovnu %st(1), %st(0) -# CHECK-NEXT: - - - 1.00 - - - - - - fcmovu %st(1), %st(0) +# CHECK-NEXT: - - - 1.00 - - - - - - fcmovb %st(1), %st +# CHECK-NEXT: - - - 1.00 - - - - - - fcmovbe %st(1), %st +# CHECK-NEXT: - - - 1.00 - - - - - - fcmove %st(1), %st +# CHECK-NEXT: - - - 1.00 - - - - - - fcmovnb %st(1), %st +# CHECK-NEXT: - - - 1.00 - - - - - - fcmovnbe %st(1), %st +# CHECK-NEXT: - - - 1.00 - - - - - - fcmovne %st(1), %st +# CHECK-NEXT: - - - 1.00 - - - - - - fcmovnu %st(1), %st +# CHECK-NEXT: - - - 1.00 - - - - - - fcmovu %st(1), %st # CHECK-NEXT: - - - 1.00 - - - - - - fcom %st(1) # CHECK-NEXT: - - - 1.00 - - - - - - fcom %st(3) # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - fcoms (%ecx) @@ -404,24 +404,24 @@ fyl2xp1 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - fcomps (%ecx) # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - fcompl (%eax) # CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - fcompp -# CHECK-NEXT: - - - 1.00 - - - - - - fcomi %st(3) -# CHECK-NEXT: - - - 1.00 - - - - - - fcompi %st(3) +# CHECK-NEXT: - - - 1.00 - - - - - - fcomi %st(3), %st +# CHECK-NEXT: - - - 1.00 - - - - - - fcompi %st(3), %st # CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - fcos # CHECK-NEXT: - - 1.00 1.00 - - - - - - fdecstp -# CHECK-NEXT: - - 1.00 - - - - - - - fdiv %st(0), %st(1) -# CHECK-NEXT: - - 1.00 - - - - - - - fdiv %st(2) +# CHECK-NEXT: - - 1.00 - - - - - - - fdiv %st, %st(1) +# CHECK-NEXT: - - 1.00 - - - - - - - fdiv %st(2), %st # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - fdivs (%ecx) # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - fdivl (%eax) -# CHECK-NEXT: - - 1.00 - - - - - - - fdivp %st(1) -# CHECK-NEXT: - - 1.00 - - - - - - - fdivp %st(2) +# CHECK-NEXT: - - 1.00 - - - - - - - fdivp %st, %st(1) +# CHECK-NEXT: - - 1.00 - - - - - - - fdivp %st, %st(2) # CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - fidivs (%ecx) # CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - fidivl (%eax) -# CHECK-NEXT: - - 1.00 - - - - - - - fdivr %st(0), %st(1) -# CHECK-NEXT: - - 1.00 - - - - - - - fdivr %st(2) +# CHECK-NEXT: - - 1.00 - - - - - - - fdivr %st, %st(1) +# CHECK-NEXT: - - 1.00 - - - - - - - fdivr %st(2), %st # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - fdivrs (%ecx) # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - fdivrl (%eax) -# CHECK-NEXT: - - 1.00 - - - - - - - fdivrp %st(1) -# CHECK-NEXT: - - 1.00 - - - - - - - fdivrp %st(2) +# CHECK-NEXT: - - 1.00 - - - - - - - fdivrp %st, %st(1) +# CHECK-NEXT: - - 1.00 - - - - - - - fdivrp %st, %st(2) # CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - fidivrs (%ecx) # CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - fidivrl (%eax) # CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - ffree %st(0) @@ -455,12 +455,12 @@ fyl2xp1 # CHECK-NEXT: - - 1.00 1.00 - - - - - - fldln2 # CHECK-NEXT: - - 1.00 1.00 - - - - - - fldpi # CHECK-NEXT: - - 0.50 0.50 - - - - - - fldz -# CHECK-NEXT: - - 1.00 - - - - - - - fmul %st(0), %st(1) -# CHECK-NEXT: - - 1.00 - - - - - - - fmul %st(2) +# CHECK-NEXT: - - 1.00 - - - - - - - fmul %st, %st(1) +# CHECK-NEXT: - - 1.00 - - - - - - - fmul %st(2), %st # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - fmuls (%ecx) # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - fmull (%eax) -# CHECK-NEXT: - - 1.00 - - - - - - - fmulp %st(1) -# CHECK-NEXT: - - 1.00 - - - - - - - fmulp %st(2) +# CHECK-NEXT: - - 1.00 - - - - - - - fmulp %st, %st(1) +# CHECK-NEXT: - - 1.00 - - - - - - - fmulp %st, %st(2) # CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - fimuls (%ecx) # CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - fimull (%eax) # CHECK-NEXT: - - 0.50 0.50 - - - - - - fnop @@ -488,20 +488,20 @@ fyl2xp1 # CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - frstor (%eax) # CHECK-NEXT: - - 0.50 0.50 - - - 0.50 0.50 - wait # CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - fnsave (%eax) -# CHECK-NEXT: - - - 1.00 - - - - - - fsub %st(0), %st(1) -# CHECK-NEXT: - - - 1.00 - - - - - - fsub %st(2) +# CHECK-NEXT: - - - 1.00 - - - - - - fsub %st, %st(1) +# CHECK-NEXT: - - - 1.00 - - - - - - fsub %st(2), %st # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - fsubs (%ecx) # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - fsubl (%eax) -# CHECK-NEXT: - - - 1.00 - - - - - - fsubp %st(1) -# CHECK-NEXT: - - - 1.00 - - - - - - fsubp %st(2) +# CHECK-NEXT: - - - 1.00 - - - - - - fsubp %st, %st(1) +# CHECK-NEXT: - - - 1.00 - - - - - - fsubp %st, %st(2) # CHECK-NEXT: - - - 2.00 0.50 0.50 - - - - fisubs (%ecx) # CHECK-NEXT: - - - 2.00 0.50 0.50 - - - - fisubl (%eax) -# CHECK-NEXT: - - - 1.00 - - - - - - fsubr %st(0), %st(1) -# CHECK-NEXT: - - - 1.00 - - - - - - fsubr %st(2) +# CHECK-NEXT: - - - 1.00 - - - - - - fsubr %st, %st(1) +# CHECK-NEXT: - - - 1.00 - - - - - - fsubr %st(2), %st # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - fsubrs (%ecx) # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - fsubrl (%eax) -# CHECK-NEXT: - - - 1.00 - - - - - - fsubrp %st(1) -# CHECK-NEXT: - - - 1.00 - - - - - - fsubrp %st(2) +# CHECK-NEXT: - - - 1.00 - - - - - - fsubrp %st, %st(1) +# CHECK-NEXT: - - - 1.00 - - - - - - fsubrp %st, %st(2) # CHECK-NEXT: - - - 2.00 0.50 0.50 - - - - fisubrs (%ecx) # CHECK-NEXT: - - - 2.00 0.50 0.50 - - - - fisubrl (%eax) # CHECK-NEXT: - - - 1.00 - - - - - - ftst @@ -510,8 +510,8 @@ fyl2xp1 # CHECK-NEXT: - - - 1.00 - - - - - - fucomp %st(1) # CHECK-NEXT: - - - 1.00 - - - - - - fucomp %st(3) # CHECK-NEXT: - - - 1.00 - - - - - - fucompp -# CHECK-NEXT: - - - 1.00 - - - - - - fucomi %st(3) -# CHECK-NEXT: - - - 1.00 - - - - - - fucompi %st(3) +# CHECK-NEXT: - - - 1.00 - - - - - - fucomi %st(3), %st +# CHECK-NEXT: - - - 1.00 - - - - - - fucompi %st(3), %st # CHECK-NEXT: - - 0.50 0.50 - - - 0.50 0.50 - wait # CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - fxam # CHECK-NEXT: - - 3.25 2.25 - - - 1.25 5.25 - fxch %st(1) diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/resources-x87.s b/llvm/test/tools/llvm-mca/X86/BtVer2/resources-x87.s index a0e431f6dfe45d..2b6b2c49727315 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/resources-x87.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/resources-x87.s @@ -5,7 +5,7 @@ f2xm1 fabs -fadd %st(0), %st(1) +fadd %st, %st(1) fadd %st(2) fadds (%ecx) faddl (%ecx) @@ -21,14 +21,14 @@ fchs fnclex -fcmovb %st(1), %st(0) -fcmovbe %st(1), %st(0) -fcmove %st(1), %st(0) -fcmovnb %st(1), %st(0) -fcmovnbe %st(1), %st(0) -fcmovne %st(1), %st(0) -fcmovnu %st(1), %st(0) -fcmovu %st(1), %st(0) +fcmovb %st(1), %st +fcmovbe %st(1), %st +fcmove %st(1), %st +fcmovnb %st(1), %st +fcmovnbe %st(1), %st +fcmovne %st(1), %st +fcmovnu %st(1), %st +fcmovu %st(1), %st fcom %st(1) fcom %st(3) @@ -47,7 +47,7 @@ fcos fdecstp -fdiv %st(0), %st(1) +fdiv %st, %st(1) fdiv %st(2) fdivs (%ecx) fdivl (%eax) @@ -56,7 +56,7 @@ fdivp %st(2) fidivs (%ecx) fidivl (%eax) -fdivr %st(0), %st(1) +fdivr %st, %st(1) fdivr %st(2) fdivrs (%ecx) fdivrl (%eax) @@ -106,7 +106,7 @@ fldln2 fldpi fldz -fmul %st(0), %st(1) +fmul %st, %st(1) fmul %st(2) fmuls (%ecx) fmull (%eax) @@ -153,7 +153,7 @@ fnstsw (%eax) frstor (%eax) fsave (%eax) -fsub %st(0), %st(1) +fsub %st, %st(1) fsub %st(2) fsubs (%ecx) fsubl (%eax) @@ -162,7 +162,7 @@ fsubp %st(2) fisubs (%ecx) fisubl (%eax) -fsubr %st(0), %st(1) +fsubr %st, %st(1) fsubr %st(2) fsubrs (%ecx) fsubrl (%eax) @@ -208,26 +208,26 @@ fyl2xp1 # CHECK: [1] [2] [3] [4] [5] [6] Instructions: # CHECK-NEXT: 1 100 0.50 U f2xm1 # CHECK-NEXT: 1 2 1.00 U fabs -# CHECK-NEXT: 1 3 1.00 U fadd %st(0), %st(1) -# CHECK-NEXT: 1 3 1.00 U fadd %st(2) +# CHECK-NEXT: 1 3 1.00 U fadd %st, %st(1) +# CHECK-NEXT: 1 3 1.00 U fadd %st(2), %st # CHECK-NEXT: 1 8 1.00 * U fadds (%ecx) # CHECK-NEXT: 1 8 1.00 * U faddl (%ecx) -# CHECK-NEXT: 1 3 1.00 U faddp %st(1) -# CHECK-NEXT: 1 3 1.00 U faddp %st(2) +# CHECK-NEXT: 1 3 1.00 U faddp %st, %st(1) +# CHECK-NEXT: 1 3 1.00 U faddp %st, %st(2) # CHECK-NEXT: 1 8 1.00 * U fiadds (%ecx) # CHECK-NEXT: 1 8 1.00 * U fiaddl (%ecx) # CHECK-NEXT: 1 100 0.50 U fbld (%ecx) # CHECK-NEXT: 1 100 0.50 U fbstp (%eax) # CHECK-NEXT: 1 2 1.00 U fchs # CHECK-NEXT: 1 100 0.50 U fnclex -# CHECK-NEXT: 1 3 1.00 U fcmovb %st(1), %st(0) -# CHECK-NEXT: 1 3 1.00 U fcmovbe %st(1), %st(0) -# CHECK-NEXT: 1 3 1.00 U fcmove %st(1), %st(0) -# CHECK-NEXT: 1 3 1.00 U fcmovnb %st(1), %st(0) -# CHECK-NEXT: 1 3 1.00 U fcmovnbe %st(1), %st(0) -# CHECK-NEXT: 1 3 1.00 U fcmovne %st(1), %st(0) -# CHECK-NEXT: 1 3 1.00 U fcmovnu %st(1), %st(0) -# CHECK-NEXT: 1 3 1.00 U fcmovu %st(1), %st(0) +# CHECK-NEXT: 1 3 1.00 U fcmovb %st(1), %st +# CHECK-NEXT: 1 3 1.00 U fcmovbe %st(1), %st +# CHECK-NEXT: 1 3 1.00 U fcmove %st(1), %st +# CHECK-NEXT: 1 3 1.00 U fcmovnb %st(1), %st +# CHECK-NEXT: 1 3 1.00 U fcmovnbe %st(1), %st +# CHECK-NEXT: 1 3 1.00 U fcmovne %st(1), %st +# CHECK-NEXT: 1 3 1.00 U fcmovnu %st(1), %st +# CHECK-NEXT: 1 3 1.00 U fcmovu %st(1), %st # CHECK-NEXT: 1 3 1.00 U fcom %st(1) # CHECK-NEXT: 1 3 1.00 U fcom %st(3) # CHECK-NEXT: 1 8 1.00 U fcoms (%ecx) @@ -237,24 +237,24 @@ fyl2xp1 # CHECK-NEXT: 1 8 1.00 U fcomps (%ecx) # CHECK-NEXT: 1 8 1.00 U fcompl (%eax) # CHECK-NEXT: 1 100 0.50 U fcompp -# CHECK-NEXT: 1 3 1.00 U fcomi %st(3) -# CHECK-NEXT: 1 3 1.00 U fcompi %st(3) +# CHECK-NEXT: 1 3 1.00 U fcomi %st(3), %st +# CHECK-NEXT: 1 3 1.00 U fcompi %st(3), %st # CHECK-NEXT: 1 100 0.50 U fcos # CHECK-NEXT: 1 100 0.50 U fdecstp -# CHECK-NEXT: 1 19 19.00 U fdiv %st(0), %st(1) -# CHECK-NEXT: 1 19 19.00 U fdiv %st(2) +# CHECK-NEXT: 1 19 19.00 U fdiv %st, %st(1) +# CHECK-NEXT: 1 19 19.00 U fdiv %st(2), %st # CHECK-NEXT: 1 24 19.00 * U fdivs (%ecx) # CHECK-NEXT: 1 24 19.00 * U fdivl (%eax) -# CHECK-NEXT: 1 19 19.00 U fdivp %st(1) -# CHECK-NEXT: 1 19 19.00 U fdivp %st(2) +# CHECK-NEXT: 1 19 19.00 U fdivp %st, %st(1) +# CHECK-NEXT: 1 19 19.00 U fdivp %st, %st(2) # CHECK-NEXT: 1 24 19.00 * U fidivs (%ecx) # CHECK-NEXT: 1 24 19.00 * U fidivl (%eax) -# CHECK-NEXT: 1 19 19.00 U fdivr %st(0), %st(1) -# CHECK-NEXT: 1 19 19.00 U fdivr %st(2) +# CHECK-NEXT: 1 19 19.00 U fdivr %st, %st(1) +# CHECK-NEXT: 1 19 19.00 U fdivr %st(2), %st # CHECK-NEXT: 1 24 19.00 * U fdivrs (%ecx) # CHECK-NEXT: 1 24 19.00 * U fdivrl (%eax) -# CHECK-NEXT: 1 19 19.00 U fdivrp %st(1) -# CHECK-NEXT: 1 19 19.00 U fdivrp %st(2) +# CHECK-NEXT: 1 19 19.00 U fdivrp %st, %st(1) +# CHECK-NEXT: 1 19 19.00 U fdivrp %st, %st(2) # CHECK-NEXT: 1 24 19.00 * U fidivrs (%ecx) # CHECK-NEXT: 1 24 19.00 * U fidivrl (%eax) # CHECK-NEXT: 1 100 0.50 U ffree %st(0) @@ -288,12 +288,12 @@ fyl2xp1 # CHECK-NEXT: 1 3 1.00 U fldln2 # CHECK-NEXT: 1 3 1.00 U fldpi # CHECK-NEXT: 1 3 1.00 U fldz -# CHECK-NEXT: 1 2 1.00 U fmul %st(0), %st(1) -# CHECK-NEXT: 1 2 1.00 U fmul %st(2) +# CHECK-NEXT: 1 2 1.00 U fmul %st, %st(1) +# CHECK-NEXT: 1 2 1.00 U fmul %st(2), %st # CHECK-NEXT: 1 7 1.00 * U fmuls (%ecx) # CHECK-NEXT: 1 7 1.00 * U fmull (%eax) -# CHECK-NEXT: 1 2 1.00 U fmulp %st(1) -# CHECK-NEXT: 1 2 1.00 U fmulp %st(2) +# CHECK-NEXT: 1 2 1.00 U fmulp %st, %st(1) +# CHECK-NEXT: 1 2 1.00 U fmulp %st, %st(2) # CHECK-NEXT: 1 7 1.00 * U fimuls (%ecx) # CHECK-NEXT: 1 7 1.00 * U fimull (%eax) # CHECK-NEXT: 1 1 0.50 U fnop @@ -321,20 +321,20 @@ fyl2xp1 # CHECK-NEXT: 1 100 0.50 U frstor (%eax) # CHECK-NEXT: 1 100 0.50 U wait # CHECK-NEXT: 1 100 0.50 U fnsave (%eax) -# CHECK-NEXT: 1 3 1.00 U fsub %st(0), %st(1) -# CHECK-NEXT: 1 3 1.00 U fsub %st(2) +# CHECK-NEXT: 1 3 1.00 U fsub %st, %st(1) +# CHECK-NEXT: 1 3 1.00 U fsub %st(2), %st # CHECK-NEXT: 1 8 1.00 * U fsubs (%ecx) # CHECK-NEXT: 1 8 1.00 * U fsubl (%eax) -# CHECK-NEXT: 1 3 1.00 U fsubp %st(1) -# CHECK-NEXT: 1 3 1.00 U fsubp %st(2) +# CHECK-NEXT: 1 3 1.00 U fsubp %st, %st(1) +# CHECK-NEXT: 1 3 1.00 U fsubp %st, %st(2) # CHECK-NEXT: 1 8 1.00 * U fisubs (%ecx) # CHECK-NEXT: 1 8 1.00 * U fisubl (%eax) -# CHECK-NEXT: 1 3 1.00 U fsubr %st(0), %st(1) -# CHECK-NEXT: 1 3 1.00 U fsubr %st(2) +# CHECK-NEXT: 1 3 1.00 U fsubr %st, %st(1) +# CHECK-NEXT: 1 3 1.00 U fsubr %st(2), %st # CHECK-NEXT: 1 8 1.00 * U fsubrs (%ecx) # CHECK-NEXT: 1 8 1.00 * U fsubrl (%eax) -# CHECK-NEXT: 1 3 1.00 U fsubrp %st(1) -# CHECK-NEXT: 1 3 1.00 U fsubrp %st(2) +# CHECK-NEXT: 1 3 1.00 U fsubrp %st, %st(1) +# CHECK-NEXT: 1 3 1.00 U fsubrp %st, %st(2) # CHECK-NEXT: 1 8 1.00 * U fisubrs (%ecx) # CHECK-NEXT: 1 8 1.00 * U fisubrl (%eax) # CHECK-NEXT: 1 3 1.00 U ftst @@ -343,8 +343,8 @@ fyl2xp1 # CHECK-NEXT: 1 3 1.00 U fucomp %st(1) # CHECK-NEXT: 1 3 1.00 U fucomp %st(3) # CHECK-NEXT: 1 3 1.00 U fucompp -# CHECK-NEXT: 1 3 1.00 U fucomi %st(3) -# CHECK-NEXT: 1 3 1.00 U fucompi %st(3) +# CHECK-NEXT: 1 3 1.00 U fucomi %st(3), %st +# CHECK-NEXT: 1 3 1.00 U fucompi %st(3), %st # CHECK-NEXT: 1 100 0.50 U wait # CHECK-NEXT: 1 100 0.50 U fxam # CHECK-NEXT: 1 1 0.50 U fxch %st(1) @@ -379,26 +379,26 @@ fyl2xp1 # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - f2xm1 # CHECK-NEXT: - - - - 1.00 - 1.00 - - - - - - - fabs -# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - fadd %st(0), %st(1) -# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - fadd %st(2) +# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - fadd %st, %st(1) +# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - fadd %st(2), %st # CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - fadds (%ecx) # CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - faddl (%ecx) -# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - faddp %st(1) -# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - faddp %st(2) +# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - faddp %st, %st(1) +# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - faddp %st, %st(2) # CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - fiadds (%ecx) # CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - fiaddl (%ecx) # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - fbld (%ecx) # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - fbstp (%eax) # CHECK-NEXT: - - - - 1.00 - 1.00 - - - - - - - fchs # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - fnclex -# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - fcmovb %st(1), %st(0) -# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - fcmovbe %st(1), %st(0) -# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - fcmove %st(1), %st(0) -# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - fcmovnb %st(1), %st(0) -# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - fcmovnbe %st(1), %st(0) -# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - fcmovne %st(1), %st(0) -# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - fcmovnu %st(1), %st(0) -# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - fcmovu %st(1), %st(0) +# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - fcmovb %st(1), %st +# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - fcmovbe %st(1), %st +# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - fcmove %st(1), %st +# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - fcmovnb %st(1), %st +# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - fcmovnbe %st(1), %st +# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - fcmovne %st(1), %st +# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - fcmovnu %st(1), %st +# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - fcmovu %st(1), %st # CHECK-NEXT: 1.00 - - 1.00 - 1.00 - - - - - - - - fcom %st(1) # CHECK-NEXT: 1.00 - - 1.00 - 1.00 - - - - - - - - fcom %st(3) # CHECK-NEXT: 1.00 - - 1.00 - 1.00 - 1.00 - - - - - - fcoms (%ecx) @@ -408,24 +408,24 @@ fyl2xp1 # CHECK-NEXT: 1.00 - - 1.00 - 1.00 - 1.00 - - - - - - fcomps (%ecx) # CHECK-NEXT: 1.00 - - 1.00 - 1.00 - 1.00 - - - - - - fcompl (%eax) # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - fcompp -# CHECK-NEXT: 1.00 - - 1.00 - 1.00 - - - - - - - - fcomi %st(3) -# CHECK-NEXT: 1.00 - - 1.00 - 1.00 - - - - - - - - fcompi %st(3) +# CHECK-NEXT: 1.00 - - 1.00 - 1.00 - - - - - - - - fcomi %st(3), %st +# CHECK-NEXT: 1.00 - - 1.00 - 1.00 - - - - - - - - fcompi %st(3), %st # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - fcos # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - fdecstp -# CHECK-NEXT: - - - - 19.00 - 1.00 - - - - - - - fdiv %st(0), %st(1) -# CHECK-NEXT: - - - - 19.00 - 1.00 - - - - - - - fdiv %st(2) +# CHECK-NEXT: - - - - 19.00 - 1.00 - - - - - - - fdiv %st, %st(1) +# CHECK-NEXT: - - - - 19.00 - 1.00 - - - - - - - fdiv %st(2), %st # CHECK-NEXT: - - - - 19.00 - 1.00 1.00 - - - - - - fdivs (%ecx) # CHECK-NEXT: - - - - 19.00 - 1.00 1.00 - - - - - - fdivl (%eax) -# CHECK-NEXT: - - - - 19.00 - 1.00 - - - - - - - fdivp %st(1) -# CHECK-NEXT: - - - - 19.00 - 1.00 - - - - - - - fdivp %st(2) +# CHECK-NEXT: - - - - 19.00 - 1.00 - - - - - - - fdivp %st, %st(1) +# CHECK-NEXT: - - - - 19.00 - 1.00 - - - - - - - fdivp %st, %st(2) # CHECK-NEXT: - - - - 19.00 - 1.00 1.00 - - - - - - fidivs (%ecx) # CHECK-NEXT: - - - - 19.00 - 1.00 1.00 - - - - - - fidivl (%eax) -# CHECK-NEXT: - - - - 19.00 - 1.00 - - - - - - - fdivr %st(0), %st(1) -# CHECK-NEXT: - - - - 19.00 - 1.00 - - - - - - - fdivr %st(2) +# CHECK-NEXT: - - - - 19.00 - 1.00 - - - - - - - fdivr %st, %st(1) +# CHECK-NEXT: - - - - 19.00 - 1.00 - - - - - - - fdivr %st(2), %st # CHECK-NEXT: - - - - 19.00 - 1.00 1.00 - - - - - - fdivrs (%ecx) # CHECK-NEXT: - - - - 19.00 - 1.00 1.00 - - - - - - fdivrl (%eax) -# CHECK-NEXT: - - - - 19.00 - 1.00 - - - - - - - fdivrp %st(1) -# CHECK-NEXT: - - - - 19.00 - 1.00 - - - - - - - fdivrp %st(2) +# CHECK-NEXT: - - - - 19.00 - 1.00 - - - - - - - fdivrp %st, %st(1) +# CHECK-NEXT: - - - - 19.00 - 1.00 - - - - - - - fdivrp %st, %st(2) # CHECK-NEXT: - - - - 19.00 - 1.00 1.00 - - - - - - fidivrs (%ecx) # CHECK-NEXT: - - - - 19.00 - 1.00 1.00 - - - - - - fidivrl (%eax) # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - ffree %st(0) @@ -459,12 +459,12 @@ fyl2xp1 # CHECK-NEXT: - - - - - - 1.00 - - - 1.00 - - - fldln2 # CHECK-NEXT: - - - - - - 1.00 - - - 1.00 - - - fldpi # CHECK-NEXT: - - - - - - 1.00 - - - 1.00 - - - fldz -# CHECK-NEXT: - - - - 1.00 - 1.00 - - - - - - - fmul %st(0), %st(1) -# CHECK-NEXT: - - - - 1.00 - 1.00 - - - - - - - fmul %st(2) +# CHECK-NEXT: - - - - 1.00 - 1.00 - - - - - - - fmul %st, %st(1) +# CHECK-NEXT: - - - - 1.00 - 1.00 - - - - - - - fmul %st(2), %st # CHECK-NEXT: - - - - 1.00 - 1.00 1.00 - - - - - - fmuls (%ecx) # CHECK-NEXT: - - - - 1.00 - 1.00 1.00 - - - - - - fmull (%eax) -# CHECK-NEXT: - - - - 1.00 - 1.00 - - - - - - - fmulp %st(1) -# CHECK-NEXT: - - - - 1.00 - 1.00 - - - - - - - fmulp %st(2) +# CHECK-NEXT: - - - - 1.00 - 1.00 - - - - - - - fmulp %st, %st(1) +# CHECK-NEXT: - - - - 1.00 - 1.00 - - - - - - - fmulp %st, %st(2) # CHECK-NEXT: - - - - 1.00 - 1.00 1.00 - - - - - - fimuls (%ecx) # CHECK-NEXT: - - - - 1.00 - 1.00 1.00 - - - - - - fimull (%eax) # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - fnop @@ -492,20 +492,20 @@ fyl2xp1 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - frstor (%eax) # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - wait # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - fnsave (%eax) -# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - fsub %st(0), %st(1) -# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - fsub %st(2) +# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - fsub %st, %st(1) +# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - fsub %st(2), %st # CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - fsubs (%ecx) # CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - fsubl (%eax) -# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - fsubp %st(1) -# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - fsubp %st(2) +# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - fsubp %st, %st(1) +# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - fsubp %st, %st(2) # CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - fisubs (%ecx) # CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - fisubl (%eax) -# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - fsubr %st(0), %st(1) -# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - fsubr %st(2) +# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - fsubr %st, %st(1) +# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - fsubr %st(2), %st # CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - fsubrs (%ecx) # CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - fsubrl (%eax) -# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - fsubrp %st(1) -# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - fsubrp %st(2) +# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - fsubrp %st, %st(1) +# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - fsubrp %st, %st(2) # CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - fisubrs (%ecx) # CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - fisubrl (%eax) # CHECK-NEXT: 1.00 - - 1.00 - 1.00 - - - - - - - - ftst @@ -514,8 +514,8 @@ fyl2xp1 # CHECK-NEXT: 1.00 - - 1.00 - 1.00 - - - - - - - - fucomp %st(1) # CHECK-NEXT: 1.00 - - 1.00 - 1.00 - - - - - - - - fucomp %st(3) # CHECK-NEXT: 1.00 - - 1.00 - 1.00 - - - - - - - - fucompp -# CHECK-NEXT: 1.00 - - 1.00 - 1.00 - - - - - - - - fucomi %st(3) -# CHECK-NEXT: 1.00 - - 1.00 - 1.00 - - - - - - - - fucompi %st(3) +# CHECK-NEXT: 1.00 - - 1.00 - 1.00 - - - - - - - - fucomi %st(3), %st +# CHECK-NEXT: 1.00 - - 1.00 - 1.00 - - - - - - - - fucompi %st(3), %st # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - wait # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - fxam # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - fxch %st(1) diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-x87.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-x87.s index 1cba9a7d77fc28..1f3e51e58b33c0 100644 --- a/llvm/test/tools/llvm-mca/X86/Generic/resources-x87.s +++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-x87.s @@ -5,7 +5,7 @@ f2xm1 fabs -fadd %st(0), %st(1) +fadd %st, %st(1) fadd %st(2) fadds (%ecx) faddl (%ecx) @@ -21,14 +21,14 @@ fchs fnclex -fcmovb %st(1), %st(0) -fcmovbe %st(1), %st(0) -fcmove %st(1), %st(0) -fcmovnb %st(1), %st(0) -fcmovnbe %st(1), %st(0) -fcmovne %st(1), %st(0) -fcmovnu %st(1), %st(0) -fcmovu %st(1), %st(0) +fcmovb %st(1), %st +fcmovbe %st(1), %st +fcmove %st(1), %st +fcmovnb %st(1), %st +fcmovnbe %st(1), %st +fcmovne %st(1), %st +fcmovnu %st(1), %st +fcmovu %st(1), %st fcom %st(1) fcom %st(3) @@ -47,7 +47,7 @@ fcos fdecstp -fdiv %st(0), %st(1) +fdiv %st, %st(1) fdiv %st(2) fdivs (%ecx) fdivl (%eax) @@ -56,7 +56,7 @@ fdivp %st(2) fidivs (%ecx) fidivl (%eax) -fdivr %st(0), %st(1) +fdivr %st, %st(1) fdivr %st(2) fdivrs (%ecx) fdivrl (%eax) @@ -106,7 +106,7 @@ fldln2 fldpi fldz -fmul %st(0), %st(1) +fmul %st, %st(1) fmul %st(2) fmuls (%ecx) fmull (%eax) @@ -153,7 +153,7 @@ fnstsw (%eax) frstor (%eax) fsave (%eax) -fsub %st(0), %st(1) +fsub %st, %st(1) fsub %st(2) fsubs (%ecx) fsubl (%eax) @@ -162,7 +162,7 @@ fsubp %st(2) fisubs (%ecx) fisubl (%eax) -fsubr %st(0), %st(1) +fsubr %st, %st(1) fsubr %st(2) fsubrs (%ecx) fsubrl (%eax) @@ -208,26 +208,26 @@ fyl2xp1 # CHECK: [1] [2] [3] [4] [5] [6] Instructions: # CHECK-NEXT: 1 100 0.33 U f2xm1 # CHECK-NEXT: 1 1 1.00 U fabs -# CHECK-NEXT: 1 3 1.00 U fadd %st(0), %st(1) -# CHECK-NEXT: 1 3 1.00 U fadd %st(2) +# CHECK-NEXT: 1 3 1.00 U fadd %st, %st(1) +# CHECK-NEXT: 1 3 1.00 U fadd %st(2), %st # CHECK-NEXT: 2 10 1.00 * U fadds (%ecx) # CHECK-NEXT: 2 10 1.00 * U faddl (%ecx) -# CHECK-NEXT: 1 3 1.00 U faddp %st(1) -# CHECK-NEXT: 1 3 1.00 U faddp %st(2) +# CHECK-NEXT: 1 3 1.00 U faddp %st, %st(1) +# CHECK-NEXT: 1 3 1.00 U faddp %st, %st(2) # CHECK-NEXT: 3 13 2.00 * U fiadds (%ecx) # CHECK-NEXT: 3 13 2.00 * U fiaddl (%ecx) # CHECK-NEXT: 1 100 0.33 U fbld (%ecx) # CHECK-NEXT: 1 100 0.33 U fbstp (%eax) # CHECK-NEXT: 1 1 1.00 U fchs # CHECK-NEXT: 1 100 0.33 U fnclex -# CHECK-NEXT: 3 3 2.00 U fcmovb %st(1), %st(0) -# CHECK-NEXT: 3 3 2.00 U fcmovbe %st(1), %st(0) -# CHECK-NEXT: 3 3 2.00 U fcmove %st(1), %st(0) -# CHECK-NEXT: 3 3 2.00 U fcmovnb %st(1), %st(0) -# CHECK-NEXT: 3 3 2.00 U fcmovnbe %st(1), %st(0) -# CHECK-NEXT: 3 3 2.00 U fcmovne %st(1), %st(0) -# CHECK-NEXT: 3 3 2.00 U fcmovnu %st(1), %st(0) -# CHECK-NEXT: 3 3 2.00 U fcmovu %st(1), %st(0) +# CHECK-NEXT: 3 3 2.00 U fcmovb %st(1), %st +# CHECK-NEXT: 3 3 2.00 U fcmovbe %st(1), %st +# CHECK-NEXT: 3 3 2.00 U fcmove %st(1), %st +# CHECK-NEXT: 3 3 2.00 U fcmovnb %st(1), %st +# CHECK-NEXT: 3 3 2.00 U fcmovnbe %st(1), %st +# CHECK-NEXT: 3 3 2.00 U fcmovne %st(1), %st +# CHECK-NEXT: 3 3 2.00 U fcmovnu %st(1), %st +# CHECK-NEXT: 3 3 2.00 U fcmovu %st(1), %st # CHECK-NEXT: 1 1 1.00 U fcom %st(1) # CHECK-NEXT: 1 1 1.00 U fcom %st(3) # CHECK-NEXT: 2 8 1.00 U fcoms (%ecx) @@ -237,24 +237,24 @@ fyl2xp1 # CHECK-NEXT: 2 8 1.00 U fcomps (%ecx) # CHECK-NEXT: 2 8 1.00 U fcompl (%eax) # CHECK-NEXT: 1 100 0.33 U fcompp -# CHECK-NEXT: 3 3 1.00 U fcomi %st(3) -# CHECK-NEXT: 3 3 1.00 U fcompi %st(3) +# CHECK-NEXT: 3 3 1.00 U fcomi %st(3), %st +# CHECK-NEXT: 3 3 1.00 U fcompi %st(3), %st # CHECK-NEXT: 1 100 0.33 U fcos # CHECK-NEXT: 1 1 1.00 U fdecstp -# CHECK-NEXT: 1 14 14.00 U fdiv %st(0), %st(1) -# CHECK-NEXT: 1 14 14.00 U fdiv %st(2) +# CHECK-NEXT: 1 14 14.00 U fdiv %st, %st(1) +# CHECK-NEXT: 1 14 14.00 U fdiv %st(2), %st # CHECK-NEXT: 2 31 1.00 * U fdivs (%ecx) # CHECK-NEXT: 2 31 1.00 * U fdivl (%eax) -# CHECK-NEXT: 1 14 14.00 U fdivp %st(1) -# CHECK-NEXT: 1 14 14.00 U fdivp %st(2) +# CHECK-NEXT: 1 14 14.00 U fdivp %st, %st(1) +# CHECK-NEXT: 1 14 14.00 U fdivp %st, %st(2) # CHECK-NEXT: 3 34 1.00 * U fidivs (%ecx) # CHECK-NEXT: 3 34 1.00 * U fidivl (%eax) -# CHECK-NEXT: 1 14 14.00 U fdivr %st(0), %st(1) -# CHECK-NEXT: 1 14 14.00 U fdivr %st(2) +# CHECK-NEXT: 1 14 14.00 U fdivr %st, %st(1) +# CHECK-NEXT: 1 14 14.00 U fdivr %st(2), %st # CHECK-NEXT: 2 31 1.00 * U fdivrs (%ecx) # CHECK-NEXT: 2 31 1.00 * U fdivrl (%eax) -# CHECK-NEXT: 1 14 14.00 U fdivrp %st(1) -# CHECK-NEXT: 1 14 14.00 U fdivrp %st(2) +# CHECK-NEXT: 1 14 14.00 U fdivrp %st, %st(1) +# CHECK-NEXT: 1 14 14.00 U fdivrp %st, %st(2) # CHECK-NEXT: 3 34 1.00 * U fidivrs (%ecx) # CHECK-NEXT: 3 34 1.00 * U fidivrl (%eax) # CHECK-NEXT: 1 1 1.00 U ffree %st(0) @@ -288,12 +288,12 @@ fyl2xp1 # CHECK-NEXT: 2 1 1.00 U fldln2 # CHECK-NEXT: 2 1 1.00 U fldpi # CHECK-NEXT: 1 1 1.00 U fldz -# CHECK-NEXT: 1 5 1.00 U fmul %st(0), %st(1) -# CHECK-NEXT: 1 5 1.00 U fmul %st(2) +# CHECK-NEXT: 1 5 1.00 U fmul %st, %st(1) +# CHECK-NEXT: 1 5 1.00 U fmul %st(2), %st # CHECK-NEXT: 2 12 1.00 * U fmuls (%ecx) # CHECK-NEXT: 2 12 1.00 * U fmull (%eax) -# CHECK-NEXT: 1 5 1.00 U fmulp %st(1) -# CHECK-NEXT: 1 5 1.00 U fmulp %st(2) +# CHECK-NEXT: 1 5 1.00 U fmulp %st, %st(1) +# CHECK-NEXT: 1 5 1.00 U fmulp %st, %st(2) # CHECK-NEXT: 3 15 1.00 * U fimuls (%ecx) # CHECK-NEXT: 3 15 1.00 * U fimull (%eax) # CHECK-NEXT: 1 1 1.00 U fnop @@ -321,20 +321,20 @@ fyl2xp1 # CHECK-NEXT: 1 100 0.33 U frstor (%eax) # CHECK-NEXT: 1 100 0.33 U wait # CHECK-NEXT: 1 100 0.33 U fnsave (%eax) -# CHECK-NEXT: 1 3 1.00 U fsub %st(0), %st(1) -# CHECK-NEXT: 1 3 1.00 U fsub %st(2) +# CHECK-NEXT: 1 3 1.00 U fsub %st, %st(1) +# CHECK-NEXT: 1 3 1.00 U fsub %st(2), %st # CHECK-NEXT: 2 10 1.00 * U fsubs (%ecx) # CHECK-NEXT: 2 10 1.00 * U fsubl (%eax) -# CHECK-NEXT: 1 3 1.00 U fsubp %st(1) -# CHECK-NEXT: 1 3 1.00 U fsubp %st(2) +# CHECK-NEXT: 1 3 1.00 U fsubp %st, %st(1) +# CHECK-NEXT: 1 3 1.00 U fsubp %st, %st(2) # CHECK-NEXT: 3 13 2.00 * U fisubs (%ecx) # CHECK-NEXT: 3 13 2.00 * U fisubl (%eax) -# CHECK-NEXT: 1 3 1.00 U fsubr %st(0), %st(1) -# CHECK-NEXT: 1 3 1.00 U fsubr %st(2) +# CHECK-NEXT: 1 3 1.00 U fsubr %st, %st(1) +# CHECK-NEXT: 1 3 1.00 U fsubr %st(2), %st # CHECK-NEXT: 2 10 1.00 * U fsubrs (%ecx) # CHECK-NEXT: 2 10 1.00 * U fsubrl (%eax) -# CHECK-NEXT: 1 3 1.00 U fsubrp %st(1) -# CHECK-NEXT: 1 3 1.00 U fsubrp %st(2) +# CHECK-NEXT: 1 3 1.00 U fsubrp %st, %st(1) +# CHECK-NEXT: 1 3 1.00 U fsubrp %st, %st(2) # CHECK-NEXT: 3 13 2.00 * U fisubrs (%ecx) # CHECK-NEXT: 3 13 2.00 * U fisubrl (%eax) # CHECK-NEXT: 1 3 1.00 U ftst @@ -343,8 +343,8 @@ fyl2xp1 # CHECK-NEXT: 1 1 1.00 U fucomp %st(1) # CHECK-NEXT: 1 1 1.00 U fucomp %st(3) # CHECK-NEXT: 1 3 1.00 U fucompp -# CHECK-NEXT: 3 3 1.00 U fucomi %st(3) -# CHECK-NEXT: 3 3 1.00 U fucompi %st(3) +# CHECK-NEXT: 3 3 1.00 U fucomi %st(3), %st +# CHECK-NEXT: 3 3 1.00 U fucompi %st(3), %st # CHECK-NEXT: 1 100 0.33 U wait # CHECK-NEXT: 1 100 0.33 U fxam # CHECK-NEXT: 1 1 0.33 U fxch %st(1) @@ -373,26 +373,26 @@ fyl2xp1 # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: # CHECK-NEXT: - - 0.33 0.33 - 0.33 - - f2xm1 # CHECK-NEXT: - - - - - 1.00 - - fabs -# CHECK-NEXT: - - - 1.00 - - - - fadd %st(0), %st(1) -# CHECK-NEXT: - - - 1.00 - - - - fadd %st(2) +# CHECK-NEXT: - - - 1.00 - - - - fadd %st, %st(1) +# CHECK-NEXT: - - - 1.00 - - - - fadd %st(2), %st # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 fadds (%ecx) # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 faddl (%ecx) -# CHECK-NEXT: - - - 1.00 - - - - faddp %st(1) -# CHECK-NEXT: - - - 1.00 - - - - faddp %st(2) +# CHECK-NEXT: - - - 1.00 - - - - faddp %st, %st(1) +# CHECK-NEXT: - - - 1.00 - - - - faddp %st, %st(2) # CHECK-NEXT: - - - 2.00 - - 0.50 0.50 fiadds (%ecx) # CHECK-NEXT: - - - 2.00 - - 0.50 0.50 fiaddl (%ecx) # CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fbld (%ecx) # CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fbstp (%eax) # CHECK-NEXT: - - - - - 1.00 - - fchs # CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fnclex -# CHECK-NEXT: - - 0.50 - - 2.50 - - fcmovb %st(1), %st(0) -# CHECK-NEXT: - - 0.50 - - 2.50 - - fcmovbe %st(1), %st(0) -# CHECK-NEXT: - - 0.50 - - 2.50 - - fcmove %st(1), %st(0) -# CHECK-NEXT: - - 0.50 - - 2.50 - - fcmovnb %st(1), %st(0) -# CHECK-NEXT: - - 0.50 - - 2.50 - - fcmovnbe %st(1), %st(0) -# CHECK-NEXT: - - 0.50 - - 2.50 - - fcmovne %st(1), %st(0) -# CHECK-NEXT: - - 0.50 - - 2.50 - - fcmovnu %st(1), %st(0) -# CHECK-NEXT: - - 0.50 - - 2.50 - - fcmovu %st(1), %st(0) +# CHECK-NEXT: - - 0.50 - - 2.50 - - fcmovb %st(1), %st +# CHECK-NEXT: - - 0.50 - - 2.50 - - fcmovbe %st(1), %st +# CHECK-NEXT: - - 0.50 - - 2.50 - - fcmove %st(1), %st +# CHECK-NEXT: - - 0.50 - - 2.50 - - fcmovnb %st(1), %st +# CHECK-NEXT: - - 0.50 - - 2.50 - - fcmovnbe %st(1), %st +# CHECK-NEXT: - - 0.50 - - 2.50 - - fcmovne %st(1), %st +# CHECK-NEXT: - - 0.50 - - 2.50 - - fcmovnu %st(1), %st +# CHECK-NEXT: - - 0.50 - - 2.50 - - fcmovu %st(1), %st # CHECK-NEXT: - - - 1.00 - - - - fcom %st(1) # CHECK-NEXT: - - - 1.00 - - - - fcom %st(3) # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 fcoms (%ecx) @@ -402,24 +402,24 @@ fyl2xp1 # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 fcomps (%ecx) # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 fcompl (%eax) # CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fcompp -# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - fcomi %st(3) -# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - fcompi %st(3) +# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - fcomi %st(3), %st +# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - fcompi %st(3), %st # CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fcos # CHECK-NEXT: - - - - - 1.00 - - fdecstp -# CHECK-NEXT: - 14.00 1.00 - - - - - fdiv %st(0), %st(1) -# CHECK-NEXT: - 14.00 1.00 - - - - - fdiv %st(2) +# CHECK-NEXT: - 14.00 1.00 - - - - - fdiv %st, %st(1) +# CHECK-NEXT: - 14.00 1.00 - - - - - fdiv %st(2), %st # CHECK-NEXT: - - 1.00 - - - 0.50 0.50 fdivs (%ecx) # CHECK-NEXT: - - 1.00 - - - 0.50 0.50 fdivl (%eax) -# CHECK-NEXT: - 14.00 1.00 - - - - - fdivp %st(1) -# CHECK-NEXT: - 14.00 1.00 - - - - - fdivp %st(2) +# CHECK-NEXT: - 14.00 1.00 - - - - - fdivp %st, %st(1) +# CHECK-NEXT: - 14.00 1.00 - - - - - fdivp %st, %st(2) # CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 fidivs (%ecx) # CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 fidivl (%eax) -# CHECK-NEXT: - 14.00 1.00 - - - - - fdivr %st(0), %st(1) -# CHECK-NEXT: - 14.00 1.00 - - - - - fdivr %st(2) +# CHECK-NEXT: - 14.00 1.00 - - - - - fdivr %st, %st(1) +# CHECK-NEXT: - 14.00 1.00 - - - - - fdivr %st(2), %st # CHECK-NEXT: - - 1.00 - - - 0.50 0.50 fdivrs (%ecx) # CHECK-NEXT: - - 1.00 - - - 0.50 0.50 fdivrl (%eax) -# CHECK-NEXT: - 14.00 1.00 - - - - - fdivrp %st(1) -# CHECK-NEXT: - 14.00 1.00 - - - - - fdivrp %st(2) +# CHECK-NEXT: - 14.00 1.00 - - - - - fdivrp %st, %st(1) +# CHECK-NEXT: - 14.00 1.00 - - - - - fdivrp %st, %st(2) # CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 fidivrs (%ecx) # CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 fidivrl (%eax) # CHECK-NEXT: - - - - - 1.00 - - ffree %st(0) @@ -453,12 +453,12 @@ fyl2xp1 # CHECK-NEXT: - - 1.00 1.00 - - - - fldln2 # CHECK-NEXT: - - 1.00 1.00 - - - - fldpi # CHECK-NEXT: - - - - - 1.00 - - fldz -# CHECK-NEXT: - - 1.00 - - - - - fmul %st(0), %st(1) -# CHECK-NEXT: - - 1.00 - - - - - fmul %st(2) +# CHECK-NEXT: - - 1.00 - - - - - fmul %st, %st(1) +# CHECK-NEXT: - - 1.00 - - - - - fmul %st(2), %st # CHECK-NEXT: - - 1.00 - - - 0.50 0.50 fmuls (%ecx) # CHECK-NEXT: - - 1.00 - - - 0.50 0.50 fmull (%eax) -# CHECK-NEXT: - - 1.00 - - - - - fmulp %st(1) -# CHECK-NEXT: - - 1.00 - - - - - fmulp %st(2) +# CHECK-NEXT: - - 1.00 - - - - - fmulp %st, %st(1) +# CHECK-NEXT: - - 1.00 - - - - - fmulp %st, %st(2) # CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 fimuls (%ecx) # CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 fimull (%eax) # CHECK-NEXT: - - - - - 1.00 - - fnop @@ -486,20 +486,20 @@ fyl2xp1 # CHECK-NEXT: - - 0.33 0.33 - 0.33 - - frstor (%eax) # CHECK-NEXT: - - 0.33 0.33 - 0.33 - - wait # CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fnsave (%eax) -# CHECK-NEXT: - - - 1.00 - - - - fsub %st(0), %st(1) -# CHECK-NEXT: - - - 1.00 - - - - fsub %st(2) +# CHECK-NEXT: - - - 1.00 - - - - fsub %st, %st(1) +# CHECK-NEXT: - - - 1.00 - - - - fsub %st(2), %st # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 fsubs (%ecx) # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 fsubl (%eax) -# CHECK-NEXT: - - - 1.00 - - - - fsubp %st(1) -# CHECK-NEXT: - - - 1.00 - - - - fsubp %st(2) +# CHECK-NEXT: - - - 1.00 - - - - fsubp %st, %st(1) +# CHECK-NEXT: - - - 1.00 - - - - fsubp %st, %st(2) # CHECK-NEXT: - - - 2.00 - - 0.50 0.50 fisubs (%ecx) # CHECK-NEXT: - - - 2.00 - - 0.50 0.50 fisubl (%eax) -# CHECK-NEXT: - - - 1.00 - - - - fsubr %st(0), %st(1) -# CHECK-NEXT: - - - 1.00 - - - - fsubr %st(2) +# CHECK-NEXT: - - - 1.00 - - - - fsubr %st, %st(1) +# CHECK-NEXT: - - - 1.00 - - - - fsubr %st(2), %st # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 fsubrs (%ecx) # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 fsubrl (%eax) -# CHECK-NEXT: - - - 1.00 - - - - fsubrp %st(1) -# CHECK-NEXT: - - - 1.00 - - - - fsubrp %st(2) +# CHECK-NEXT: - - - 1.00 - - - - fsubrp %st, %st(1) +# CHECK-NEXT: - - - 1.00 - - - - fsubrp %st, %st(2) # CHECK-NEXT: - - - 2.00 - - 0.50 0.50 fisubrs (%ecx) # CHECK-NEXT: - - - 2.00 - - 0.50 0.50 fisubrl (%eax) # CHECK-NEXT: - - - 1.00 - - - - ftst @@ -508,8 +508,8 @@ fyl2xp1 # CHECK-NEXT: - - - 1.00 - - - - fucomp %st(1) # CHECK-NEXT: - - - 1.00 - - - - fucomp %st(3) # CHECK-NEXT: - - - 1.00 - - - - fucompp -# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - fucomi %st(3) -# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - fucompi %st(3) +# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - fucomi %st(3), %st +# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - fucompi %st(3), %st # CHECK-NEXT: - - 0.33 0.33 - 0.33 - - wait # CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fxam # CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fxch %st(1) diff --git a/llvm/test/tools/llvm-mca/X86/Haswell/resources-x87.s b/llvm/test/tools/llvm-mca/X86/Haswell/resources-x87.s index 53006bbc3296e1..7da8b2802a9a16 100644 --- a/llvm/test/tools/llvm-mca/X86/Haswell/resources-x87.s +++ b/llvm/test/tools/llvm-mca/X86/Haswell/resources-x87.s @@ -5,7 +5,7 @@ f2xm1 fabs -fadd %st(0), %st(1) +fadd %st, %st(1) fadd %st(2) fadds (%ecx) faddl (%ecx) @@ -21,14 +21,14 @@ fchs fnclex -fcmovb %st(1), %st(0) -fcmovbe %st(1), %st(0) -fcmove %st(1), %st(0) -fcmovnb %st(1), %st(0) -fcmovnbe %st(1), %st(0) -fcmovne %st(1), %st(0) -fcmovnu %st(1), %st(0) -fcmovu %st(1), %st(0) +fcmovb %st(1), %st +fcmovbe %st(1), %st +fcmove %st(1), %st +fcmovnb %st(1), %st +fcmovnbe %st(1), %st +fcmovne %st(1), %st +fcmovnu %st(1), %st +fcmovu %st(1), %st fcom %st(1) fcom %st(3) @@ -47,7 +47,7 @@ fcos fdecstp -fdiv %st(0), %st(1) +fdiv %st, %st(1) fdiv %st(2) fdivs (%ecx) fdivl (%eax) @@ -56,7 +56,7 @@ fdivp %st(2) fidivs (%ecx) fidivl (%eax) -fdivr %st(0), %st(1) +fdivr %st, %st(1) fdivr %st(2) fdivrs (%ecx) fdivrl (%eax) @@ -106,7 +106,7 @@ fldln2 fldpi fldz -fmul %st(0), %st(1) +fmul %st, %st(1) fmul %st(2) fmuls (%ecx) fmull (%eax) @@ -153,7 +153,7 @@ fnstsw (%eax) frstor (%eax) fsave (%eax) -fsub %st(0), %st(1) +fsub %st, %st(1) fsub %st(2) fsubs (%ecx) fsubl (%eax) @@ -162,7 +162,7 @@ fsubp %st(2) fisubs (%ecx) fisubl (%eax) -fsubr %st(0), %st(1) +fsubr %st, %st(1) fsubr %st(2) fsubrs (%ecx) fsubrl (%eax) @@ -208,26 +208,26 @@ fyl2xp1 # CHECK: [1] [2] [3] [4] [5] [6] Instructions: # CHECK-NEXT: 1 100 0.25 U f2xm1 # CHECK-NEXT: 1 1 1.00 U fabs -# CHECK-NEXT: 1 3 1.00 U fadd %st(0), %st(1) -# CHECK-NEXT: 1 3 1.00 U fadd %st(2) +# CHECK-NEXT: 1 3 1.00 U fadd %st, %st(1) +# CHECK-NEXT: 1 3 1.00 U fadd %st(2), %st # CHECK-NEXT: 2 10 1.00 * U fadds (%ecx) # CHECK-NEXT: 2 10 1.00 * U faddl (%ecx) -# CHECK-NEXT: 1 3 1.00 U faddp %st(1) -# CHECK-NEXT: 1 3 1.00 U faddp %st(2) +# CHECK-NEXT: 1 3 1.00 U faddp %st, %st(1) +# CHECK-NEXT: 1 3 1.00 U faddp %st, %st(2) # CHECK-NEXT: 3 13 2.00 * U fiadds (%ecx) # CHECK-NEXT: 3 13 2.00 * U fiaddl (%ecx) # CHECK-NEXT: 43 47 10.75 U fbld (%ecx) # CHECK-NEXT: 2 1 1.00 U fbstp (%eax) # CHECK-NEXT: 1 1 1.00 U fchs # CHECK-NEXT: 4 4 1.00 U fnclex -# CHECK-NEXT: 1 3 1.00 U fcmovb %st(1), %st(0) -# CHECK-NEXT: 1 3 1.00 U fcmovbe %st(1), %st(0) -# CHECK-NEXT: 1 3 1.00 U fcmove %st(1), %st(0) -# CHECK-NEXT: 1 3 1.00 U fcmovnb %st(1), %st(0) -# CHECK-NEXT: 1 3 1.00 U fcmovnbe %st(1), %st(0) -# CHECK-NEXT: 1 3 1.00 U fcmovne %st(1), %st(0) -# CHECK-NEXT: 1 3 1.00 U fcmovnu %st(1), %st(0) -# CHECK-NEXT: 1 3 1.00 U fcmovu %st(1), %st(0) +# CHECK-NEXT: 1 3 1.00 U fcmovb %st(1), %st +# CHECK-NEXT: 1 3 1.00 U fcmovbe %st(1), %st +# CHECK-NEXT: 1 3 1.00 U fcmove %st(1), %st +# CHECK-NEXT: 1 3 1.00 U fcmovnb %st(1), %st +# CHECK-NEXT: 1 3 1.00 U fcmovnbe %st(1), %st +# CHECK-NEXT: 1 3 1.00 U fcmovne %st(1), %st +# CHECK-NEXT: 1 3 1.00 U fcmovnu %st(1), %st +# CHECK-NEXT: 1 3 1.00 U fcmovu %st(1), %st # CHECK-NEXT: 1 1 1.00 U fcom %st(1) # CHECK-NEXT: 1 1 1.00 U fcom %st(3) # CHECK-NEXT: 2 8 1.00 U fcoms (%ecx) @@ -237,24 +237,24 @@ fyl2xp1 # CHECK-NEXT: 2 8 1.00 U fcomps (%ecx) # CHECK-NEXT: 2 8 1.00 U fcompl (%eax) # CHECK-NEXT: 2 1 0.50 U fcompp -# CHECK-NEXT: 3 1 0.50 U fcomi %st(3) -# CHECK-NEXT: 3 1 0.50 U fcompi %st(3) +# CHECK-NEXT: 3 1 0.50 U fcomi %st(3), %st +# CHECK-NEXT: 3 1 0.50 U fcompi %st(3), %st # CHECK-NEXT: 1 100 0.25 U fcos # CHECK-NEXT: 2 2 1.00 U fdecstp -# CHECK-NEXT: 1 24 1.00 U fdiv %st(0), %st(1) -# CHECK-NEXT: 1 20 1.00 U fdiv %st(2) +# CHECK-NEXT: 1 24 1.00 U fdiv %st, %st(1) +# CHECK-NEXT: 1 20 1.00 U fdiv %st(2), %st # CHECK-NEXT: 2 31 1.00 * U fdivs (%ecx) # CHECK-NEXT: 2 31 1.00 * U fdivl (%eax) -# CHECK-NEXT: 1 24 1.00 U fdivp %st(1) -# CHECK-NEXT: 1 24 1.00 U fdivp %st(2) +# CHECK-NEXT: 1 24 1.00 U fdivp %st, %st(1) +# CHECK-NEXT: 1 24 1.00 U fdivp %st, %st(2) # CHECK-NEXT: 3 34 1.00 * U fidivs (%ecx) # CHECK-NEXT: 3 34 1.00 * U fidivl (%eax) -# CHECK-NEXT: 1 20 1.00 U fdivr %st(0), %st(1) -# CHECK-NEXT: 1 24 1.00 U fdivr %st(2) +# CHECK-NEXT: 1 20 1.00 U fdivr %st, %st(1) +# CHECK-NEXT: 1 24 1.00 U fdivr %st(2), %st # CHECK-NEXT: 2 27 1.00 * U fdivrs (%ecx) # CHECK-NEXT: 2 27 1.00 * U fdivrl (%eax) -# CHECK-NEXT: 1 20 1.00 U fdivrp %st(1) -# CHECK-NEXT: 1 20 1.00 U fdivrp %st(2) +# CHECK-NEXT: 1 20 1.00 U fdivrp %st, %st(1) +# CHECK-NEXT: 1 20 1.00 U fdivrp %st, %st(2) # CHECK-NEXT: 3 30 1.00 * U fidivrs (%ecx) # CHECK-NEXT: 3 30 1.00 * U fidivrl (%eax) # CHECK-NEXT: 1 1 0.50 U ffree %st(0) @@ -288,12 +288,12 @@ fyl2xp1 # CHECK-NEXT: 2 1 1.00 U fldln2 # CHECK-NEXT: 2 1 1.00 U fldpi # CHECK-NEXT: 1 1 0.50 U fldz -# CHECK-NEXT: 1 5 1.00 U fmul %st(0), %st(1) -# CHECK-NEXT: 1 5 1.00 U fmul %st(2) +# CHECK-NEXT: 1 5 1.00 U fmul %st, %st(1) +# CHECK-NEXT: 1 5 1.00 U fmul %st(2), %st # CHECK-NEXT: 2 12 1.00 * U fmuls (%ecx) # CHECK-NEXT: 2 12 1.00 * U fmull (%eax) -# CHECK-NEXT: 1 5 1.00 U fmulp %st(1) -# CHECK-NEXT: 1 5 1.00 U fmulp %st(2) +# CHECK-NEXT: 1 5 1.00 U fmulp %st, %st(1) +# CHECK-NEXT: 1 5 1.00 U fmulp %st, %st(2) # CHECK-NEXT: 3 15 1.00 * U fimuls (%ecx) # CHECK-NEXT: 3 15 1.00 * U fimull (%eax) # CHECK-NEXT: 1 1 0.50 U fnop @@ -321,20 +321,20 @@ fyl2xp1 # CHECK-NEXT: 90 1 22.50 U frstor (%eax) # CHECK-NEXT: 2 2 0.50 U wait # CHECK-NEXT: 147 1 36.75 U fnsave (%eax) -# CHECK-NEXT: 1 3 1.00 U fsub %st(0), %st(1) -# CHECK-NEXT: 1 3 1.00 U fsub %st(2) +# CHECK-NEXT: 1 3 1.00 U fsub %st, %st(1) +# CHECK-NEXT: 1 3 1.00 U fsub %st(2), %st # CHECK-NEXT: 2 10 1.00 * U fsubs (%ecx) # CHECK-NEXT: 2 10 1.00 * U fsubl (%eax) -# CHECK-NEXT: 1 3 1.00 U fsubp %st(1) -# CHECK-NEXT: 1 3 1.00 U fsubp %st(2) +# CHECK-NEXT: 1 3 1.00 U fsubp %st, %st(1) +# CHECK-NEXT: 1 3 1.00 U fsubp %st, %st(2) # CHECK-NEXT: 3 13 2.00 * U fisubs (%ecx) # CHECK-NEXT: 3 13 2.00 * U fisubl (%eax) -# CHECK-NEXT: 1 3 1.00 U fsubr %st(0), %st(1) -# CHECK-NEXT: 1 3 1.00 U fsubr %st(2) +# CHECK-NEXT: 1 3 1.00 U fsubr %st, %st(1) +# CHECK-NEXT: 1 3 1.00 U fsubr %st(2), %st # CHECK-NEXT: 2 10 1.00 * U fsubrs (%ecx) # CHECK-NEXT: 2 10 1.00 * U fsubrl (%eax) -# CHECK-NEXT: 1 3 1.00 U fsubrp %st(1) -# CHECK-NEXT: 1 3 1.00 U fsubrp %st(2) +# CHECK-NEXT: 1 3 1.00 U fsubrp %st, %st(1) +# CHECK-NEXT: 1 3 1.00 U fsubrp %st, %st(2) # CHECK-NEXT: 3 13 2.00 * U fisubrs (%ecx) # CHECK-NEXT: 3 13 2.00 * U fisubrl (%eax) # CHECK-NEXT: 1 1 1.00 U ftst @@ -343,8 +343,8 @@ fyl2xp1 # CHECK-NEXT: 1 1 1.00 U fucomp %st(1) # CHECK-NEXT: 1 1 1.00 U fucomp %st(3) # CHECK-NEXT: 2 1 0.50 U fucompp -# CHECK-NEXT: 3 1 0.50 U fucomi %st(3) -# CHECK-NEXT: 3 1 0.50 U fucompi %st(3) +# CHECK-NEXT: 3 1 0.50 U fucomi %st(3), %st +# CHECK-NEXT: 3 1 0.50 U fucompi %st(3), %st # CHECK-NEXT: 2 2 0.50 U wait # CHECK-NEXT: 2 1 2.00 U fxam # CHECK-NEXT: 15 17 4.00 U fxch %st(1) @@ -375,26 +375,26 @@ fyl2xp1 # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: # CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - f2xm1 # CHECK-NEXT: - - 1.00 - - - - - - - fabs -# CHECK-NEXT: - - - 1.00 - - - - - - fadd %st(0), %st(1) -# CHECK-NEXT: - - - 1.00 - - - - - - fadd %st(2) +# CHECK-NEXT: - - - 1.00 - - - - - - fadd %st, %st(1) +# CHECK-NEXT: - - - 1.00 - - - - - - fadd %st(2), %st # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - fadds (%ecx) # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - faddl (%ecx) -# CHECK-NEXT: - - - 1.00 - - - - - - faddp %st(1) -# CHECK-NEXT: - - - 1.00 - - - - - - faddp %st(2) +# CHECK-NEXT: - - - 1.00 - - - - - - faddp %st, %st(1) +# CHECK-NEXT: - - - 1.00 - - - - - - faddp %st, %st(2) # CHECK-NEXT: - - - 2.00 0.50 0.50 - - - - fiadds (%ecx) # CHECK-NEXT: - - - 2.00 0.50 0.50 - - - - fiaddl (%ecx) # CHECK-NEXT: - - - - - - - - - - fbld (%ecx) # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 fbstp (%eax) # CHECK-NEXT: - - 1.00 - - - - - - - fchs # CHECK-NEXT: - - 1.00 1.00 - - - 1.00 1.00 - fnclex -# CHECK-NEXT: - - - 1.00 - - - - - - fcmovb %st(1), %st(0) -# CHECK-NEXT: - - - 1.00 - - - - - - fcmovbe %st(1), %st(0) -# CHECK-NEXT: - - - 1.00 - - - - - - fcmove %st(1), %st(0) -# CHECK-NEXT: - - - 1.00 - - - - - - fcmovnb %st(1), %st(0) -# CHECK-NEXT: - - - 1.00 - - - - - - fcmovnbe %st(1), %st(0) -# CHECK-NEXT: - - - 1.00 - - - - - - fcmovne %st(1), %st(0) -# CHECK-NEXT: - - - 1.00 - - - - - - fcmovnu %st(1), %st(0) -# CHECK-NEXT: - - - 1.00 - - - - - - fcmovu %st(1), %st(0) +# CHECK-NEXT: - - - 1.00 - - - - - - fcmovb %st(1), %st +# CHECK-NEXT: - - - 1.00 - - - - - - fcmovbe %st(1), %st +# CHECK-NEXT: - - - 1.00 - - - - - - fcmove %st(1), %st +# CHECK-NEXT: - - - 1.00 - - - - - - fcmovnb %st(1), %st +# CHECK-NEXT: - - - 1.00 - - - - - - fcmovnbe %st(1), %st +# CHECK-NEXT: - - - 1.00 - - - - - - fcmovne %st(1), %st +# CHECK-NEXT: - - - 1.00 - - - - - - fcmovnu %st(1), %st +# CHECK-NEXT: - - - 1.00 - - - - - - fcmovu %st(1), %st # CHECK-NEXT: - - - 1.00 - - - - - - fcom %st(1) # CHECK-NEXT: - - - 1.00 - - - - - - fcom %st(3) # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - fcoms (%ecx) @@ -404,24 +404,24 @@ fyl2xp1 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - fcomps (%ecx) # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - fcompl (%eax) # CHECK-NEXT: - - 0.50 0.50 - - - - - - fcompp -# CHECK-NEXT: - - 0.50 0.50 - - - - - - fcomi %st(3) -# CHECK-NEXT: - - 0.50 0.50 - - - - - - fcompi %st(3) +# CHECK-NEXT: - - 0.50 0.50 - - - - - - fcomi %st(3), %st +# CHECK-NEXT: - - 0.50 0.50 - - - - - - fcompi %st(3), %st # CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - fcos # CHECK-NEXT: - - 1.00 1.00 - - - - - - fdecstp -# CHECK-NEXT: - - 1.00 - - - - - - - fdiv %st(0), %st(1) -# CHECK-NEXT: - - 1.00 - - - - - - - fdiv %st(2) +# CHECK-NEXT: - - 1.00 - - - - - - - fdiv %st, %st(1) +# CHECK-NEXT: - - 1.00 - - - - - - - fdiv %st(2), %st # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - fdivs (%ecx) # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - fdivl (%eax) -# CHECK-NEXT: - - 1.00 - - - - - - - fdivp %st(1) -# CHECK-NEXT: - - 1.00 - - - - - - - fdivp %st(2) +# CHECK-NEXT: - - 1.00 - - - - - - - fdivp %st, %st(1) +# CHECK-NEXT: - - 1.00 - - - - - - - fdivp %st, %st(2) # CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - fidivs (%ecx) # CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - fidivl (%eax) -# CHECK-NEXT: - - 1.00 - - - - - - - fdivr %st(0), %st(1) -# CHECK-NEXT: - - 1.00 - - - - - - - fdivr %st(2) +# CHECK-NEXT: - - 1.00 - - - - - - - fdivr %st, %st(1) +# CHECK-NEXT: - - 1.00 - - - - - - - fdivr %st(2), %st # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - fdivrs (%ecx) # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - fdivrl (%eax) -# CHECK-NEXT: - - 1.00 - - - - - - - fdivrp %st(1) -# CHECK-NEXT: - - 1.00 - - - - - - - fdivrp %st(2) +# CHECK-NEXT: - - 1.00 - - - - - - - fdivrp %st, %st(1) +# CHECK-NEXT: - - 1.00 - - - - - - - fdivrp %st, %st(2) # CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - fidivrs (%ecx) # CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - fidivrl (%eax) # CHECK-NEXT: - - 0.50 0.50 - - - - - - ffree %st(0) @@ -455,12 +455,12 @@ fyl2xp1 # CHECK-NEXT: - - 1.00 1.00 - - - - - - fldln2 # CHECK-NEXT: - - 1.00 1.00 - - - - - - fldpi # CHECK-NEXT: - - 0.50 0.50 - - - - - - fldz -# CHECK-NEXT: - - 1.00 - - - - - - - fmul %st(0), %st(1) -# CHECK-NEXT: - - 1.00 - - - - - - - fmul %st(2) +# CHECK-NEXT: - - 1.00 - - - - - - - fmul %st, %st(1) +# CHECK-NEXT: - - 1.00 - - - - - - - fmul %st(2), %st # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - fmuls (%ecx) # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - fmull (%eax) -# CHECK-NEXT: - - 1.00 - - - - - - - fmulp %st(1) -# CHECK-NEXT: - - 1.00 - - - - - - - fmulp %st(2) +# CHECK-NEXT: - - 1.00 - - - - - - - fmulp %st, %st(1) +# CHECK-NEXT: - - 1.00 - - - - - - - fmulp %st, %st(2) # CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - fimuls (%ecx) # CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - fimull (%eax) # CHECK-NEXT: - - 0.50 0.50 - - - - - - fnop @@ -488,20 +488,20 @@ fyl2xp1 # CHECK-NEXT: - - - - - - - - - - frstor (%eax) # CHECK-NEXT: - - 0.50 0.50 - - - 0.50 0.50 - wait # CHECK-NEXT: - - - - - - - - - - fnsave (%eax) -# CHECK-NEXT: - - - 1.00 - - - - - - fsub %st(0), %st(1) -# CHECK-NEXT: - - - 1.00 - - - - - - fsub %st(2) +# CHECK-NEXT: - - - 1.00 - - - - - - fsub %st, %st(1) +# CHECK-NEXT: - - - 1.00 - - - - - - fsub %st(2), %st # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - fsubs (%ecx) # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - fsubl (%eax) -# CHECK-NEXT: - - - 1.00 - - - - - - fsubp %st(1) -# CHECK-NEXT: - - - 1.00 - - - - - - fsubp %st(2) +# CHECK-NEXT: - - - 1.00 - - - - - - fsubp %st, %st(1) +# CHECK-NEXT: - - - 1.00 - - - - - - fsubp %st, %st(2) # CHECK-NEXT: - - - 2.00 0.50 0.50 - - - - fisubs (%ecx) # CHECK-NEXT: - - - 2.00 0.50 0.50 - - - - fisubl (%eax) -# CHECK-NEXT: - - - 1.00 - - - - - - fsubr %st(0), %st(1) -# CHECK-NEXT: - - - 1.00 - - - - - - fsubr %st(2) +# CHECK-NEXT: - - - 1.00 - - - - - - fsubr %st, %st(1) +# CHECK-NEXT: - - - 1.00 - - - - - - fsubr %st(2), %st # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - fsubrs (%ecx) # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - fsubrl (%eax) -# CHECK-NEXT: - - - 1.00 - - - - - - fsubrp %st(1) -# CHECK-NEXT: - - - 1.00 - - - - - - fsubrp %st(2) +# CHECK-NEXT: - - - 1.00 - - - - - - fsubrp %st, %st(1) +# CHECK-NEXT: - - - 1.00 - - - - - - fsubrp %st, %st(2) # CHECK-NEXT: - - - 2.00 0.50 0.50 - - - - fisubrs (%ecx) # CHECK-NEXT: - - - 2.00 0.50 0.50 - - - - fisubrl (%eax) # CHECK-NEXT: - - - 1.00 - - - - - - ftst @@ -510,8 +510,8 @@ fyl2xp1 # CHECK-NEXT: - - - 1.00 - - - - - - fucomp %st(1) # CHECK-NEXT: - - - 1.00 - - - - - - fucomp %st(3) # CHECK-NEXT: - - 0.50 0.50 - - - - - - fucompp -# CHECK-NEXT: - - 0.50 0.50 - - - - - - fucomi %st(3) -# CHECK-NEXT: - - 0.50 0.50 - - - - - - fucompi %st(3) +# CHECK-NEXT: - - 0.50 0.50 - - - - - - fucomi %st(3), %st +# CHECK-NEXT: - - 0.50 0.50 - - - - - - fucompi %st(3), %st # CHECK-NEXT: - - 0.50 0.50 - - - 0.50 0.50 - wait # CHECK-NEXT: - - - 2.00 - - - - - - fxam # CHECK-NEXT: - - 4.00 3.00 - - - 3.00 5.00 - fxch %st(1) diff --git a/llvm/test/tools/llvm-mca/X86/SLM/resources-x87.s b/llvm/test/tools/llvm-mca/X86/SLM/resources-x87.s index fe5de61296f569..d6d42e957bfe34 100644 --- a/llvm/test/tools/llvm-mca/X86/SLM/resources-x87.s +++ b/llvm/test/tools/llvm-mca/X86/SLM/resources-x87.s @@ -5,7 +5,7 @@ f2xm1 fabs -fadd %st(0), %st(1) +fadd %st, %st(1) fadd %st(2) fadds (%ecx) faddl (%ecx) @@ -21,14 +21,14 @@ fchs fnclex -fcmovb %st(1), %st(0) -fcmovbe %st(1), %st(0) -fcmove %st(1), %st(0) -fcmovnb %st(1), %st(0) -fcmovnbe %st(1), %st(0) -fcmovne %st(1), %st(0) -fcmovnu %st(1), %st(0) -fcmovu %st(1), %st(0) +fcmovb %st(1), %st +fcmovbe %st(1), %st +fcmove %st(1), %st +fcmovnb %st(1), %st +fcmovnbe %st(1), %st +fcmovne %st(1), %st +fcmovnu %st(1), %st +fcmovu %st(1), %st fcom %st(1) fcom %st(3) @@ -47,7 +47,7 @@ fcos fdecstp -fdiv %st(0), %st(1) +fdiv %st, %st(1) fdiv %st(2) fdivs (%ecx) fdivl (%eax) @@ -56,7 +56,7 @@ fdivp %st(2) fidivs (%ecx) fidivl (%eax) -fdivr %st(0), %st(1) +fdivr %st, %st(1) fdivr %st(2) fdivrs (%ecx) fdivrl (%eax) @@ -106,7 +106,7 @@ fldln2 fldpi fldz -fmul %st(0), %st(1) +fmul %st, %st(1) fmul %st(2) fmuls (%ecx) fmull (%eax) @@ -153,7 +153,7 @@ fnstsw (%eax) frstor (%eax) fsave (%eax) -fsub %st(0), %st(1) +fsub %st, %st(1) fsub %st(2) fsubs (%ecx) fsubl (%eax) @@ -162,7 +162,7 @@ fsubp %st(2) fisubs (%ecx) fisubl (%eax) -fsubr %st(0), %st(1) +fsubr %st, %st(1) fsubr %st(2) fsubrs (%ecx) fsubrl (%eax) @@ -208,26 +208,26 @@ fyl2xp1 # CHECK: [1] [2] [3] [4] [5] [6] Instructions: # CHECK-NEXT: 1 100 1.00 U f2xm1 # CHECK-NEXT: 1 1 0.50 U fabs -# CHECK-NEXT: 1 3 1.00 U fadd %st(0), %st(1) -# CHECK-NEXT: 1 3 1.00 U fadd %st(2) +# CHECK-NEXT: 1 3 1.00 U fadd %st, %st(1) +# CHECK-NEXT: 1 3 1.00 U fadd %st(2), %st # CHECK-NEXT: 1 6 1.00 * U fadds (%ecx) # CHECK-NEXT: 1 6 1.00 * U faddl (%ecx) -# CHECK-NEXT: 1 3 1.00 U faddp %st(1) -# CHECK-NEXT: 1 3 1.00 U faddp %st(2) +# CHECK-NEXT: 1 3 1.00 U faddp %st, %st(1) +# CHECK-NEXT: 1 3 1.00 U faddp %st, %st(2) # CHECK-NEXT: 1 6 1.00 * U fiadds (%ecx) # CHECK-NEXT: 1 6 1.00 * U fiaddl (%ecx) # CHECK-NEXT: 1 100 1.00 U fbld (%ecx) # CHECK-NEXT: 1 100 1.00 U fbstp (%eax) # CHECK-NEXT: 1 1 0.50 U fchs # CHECK-NEXT: 1 100 1.00 U fnclex -# CHECK-NEXT: 1 3 1.00 U fcmovb %st(1), %st(0) -# CHECK-NEXT: 1 3 1.00 U fcmovbe %st(1), %st(0) -# CHECK-NEXT: 1 3 1.00 U fcmove %st(1), %st(0) -# CHECK-NEXT: 1 3 1.00 U fcmovnb %st(1), %st(0) -# CHECK-NEXT: 1 3 1.00 U fcmovnbe %st(1), %st(0) -# CHECK-NEXT: 1 3 1.00 U fcmovne %st(1), %st(0) -# CHECK-NEXT: 1 3 1.00 U fcmovnu %st(1), %st(0) -# CHECK-NEXT: 1 3 1.00 U fcmovu %st(1), %st(0) +# CHECK-NEXT: 1 3 1.00 U fcmovb %st(1), %st +# CHECK-NEXT: 1 3 1.00 U fcmovbe %st(1), %st +# CHECK-NEXT: 1 3 1.00 U fcmove %st(1), %st +# CHECK-NEXT: 1 3 1.00 U fcmovnb %st(1), %st +# CHECK-NEXT: 1 3 1.00 U fcmovnbe %st(1), %st +# CHECK-NEXT: 1 3 1.00 U fcmovne %st(1), %st +# CHECK-NEXT: 1 3 1.00 U fcmovnu %st(1), %st +# CHECK-NEXT: 1 3 1.00 U fcmovu %st(1), %st # CHECK-NEXT: 1 3 1.00 U fcom %st(1) # CHECK-NEXT: 1 3 1.00 U fcom %st(3) # CHECK-NEXT: 1 6 1.00 U fcoms (%ecx) @@ -237,24 +237,24 @@ fyl2xp1 # CHECK-NEXT: 1 6 1.00 U fcomps (%ecx) # CHECK-NEXT: 1 6 1.00 U fcompl (%eax) # CHECK-NEXT: 1 100 1.00 U fcompp -# CHECK-NEXT: 1 3 1.00 U fcomi %st(3) -# CHECK-NEXT: 1 3 1.00 U fcompi %st(3) +# CHECK-NEXT: 1 3 1.00 U fcomi %st(3), %st +# CHECK-NEXT: 1 3 1.00 U fcompi %st(3), %st # CHECK-NEXT: 1 100 1.00 U fcos # CHECK-NEXT: 1 100 1.00 U fdecstp -# CHECK-NEXT: 1 19 17.00 U fdiv %st(0), %st(1) -# CHECK-NEXT: 1 19 17.00 U fdiv %st(2) +# CHECK-NEXT: 1 19 17.00 U fdiv %st, %st(1) +# CHECK-NEXT: 1 19 17.00 U fdiv %st(2), %st # CHECK-NEXT: 1 22 17.00 * U fdivs (%ecx) # CHECK-NEXT: 1 22 17.00 * U fdivl (%eax) -# CHECK-NEXT: 1 19 17.00 U fdivp %st(1) -# CHECK-NEXT: 1 19 17.00 U fdivp %st(2) +# CHECK-NEXT: 1 19 17.00 U fdivp %st, %st(1) +# CHECK-NEXT: 1 19 17.00 U fdivp %st, %st(2) # CHECK-NEXT: 1 22 17.00 * U fidivs (%ecx) # CHECK-NEXT: 1 22 17.00 * U fidivl (%eax) -# CHECK-NEXT: 1 19 17.00 U fdivr %st(0), %st(1) -# CHECK-NEXT: 1 19 17.00 U fdivr %st(2) +# CHECK-NEXT: 1 19 17.00 U fdivr %st, %st(1) +# CHECK-NEXT: 1 19 17.00 U fdivr %st(2), %st # CHECK-NEXT: 1 22 17.00 * U fdivrs (%ecx) # CHECK-NEXT: 1 22 17.00 * U fdivrl (%eax) -# CHECK-NEXT: 1 19 17.00 U fdivrp %st(1) -# CHECK-NEXT: 1 19 17.00 U fdivrp %st(2) +# CHECK-NEXT: 1 19 17.00 U fdivrp %st, %st(1) +# CHECK-NEXT: 1 19 17.00 U fdivrp %st, %st(2) # CHECK-NEXT: 1 22 17.00 * U fidivrs (%ecx) # CHECK-NEXT: 1 22 17.00 * U fidivrl (%eax) # CHECK-NEXT: 1 100 1.00 U ffree %st(0) @@ -288,12 +288,12 @@ fyl2xp1 # CHECK-NEXT: 2 1 1.00 U fldln2 # CHECK-NEXT: 2 1 1.00 U fldpi # CHECK-NEXT: 1 1 0.50 U fldz -# CHECK-NEXT: 1 5 2.00 U fmul %st(0), %st(1) -# CHECK-NEXT: 1 5 2.00 U fmul %st(2) +# CHECK-NEXT: 1 5 2.00 U fmul %st, %st(1) +# CHECK-NEXT: 1 5 2.00 U fmul %st(2), %st # CHECK-NEXT: 1 8 2.00 * U fmuls (%ecx) # CHECK-NEXT: 1 8 2.00 * U fmull (%eax) -# CHECK-NEXT: 1 5 2.00 U fmulp %st(1) -# CHECK-NEXT: 1 5 2.00 U fmulp %st(2) +# CHECK-NEXT: 1 5 2.00 U fmulp %st, %st(1) +# CHECK-NEXT: 1 5 2.00 U fmulp %st, %st(2) # CHECK-NEXT: 1 8 2.00 * U fimuls (%ecx) # CHECK-NEXT: 1 8 2.00 * U fimull (%eax) # CHECK-NEXT: 1 1 0.50 U fnop @@ -321,20 +321,20 @@ fyl2xp1 # CHECK-NEXT: 1 100 1.00 U frstor (%eax) # CHECK-NEXT: 1 100 1.00 U wait # CHECK-NEXT: 1 100 1.00 U fnsave (%eax) -# CHECK-NEXT: 1 3 1.00 U fsub %st(0), %st(1) -# CHECK-NEXT: 1 3 1.00 U fsub %st(2) +# CHECK-NEXT: 1 3 1.00 U fsub %st, %st(1) +# CHECK-NEXT: 1 3 1.00 U fsub %st(2), %st # CHECK-NEXT: 1 6 1.00 * U fsubs (%ecx) # CHECK-NEXT: 1 6 1.00 * U fsubl (%eax) -# CHECK-NEXT: 1 3 1.00 U fsubp %st(1) -# CHECK-NEXT: 1 3 1.00 U fsubp %st(2) +# CHECK-NEXT: 1 3 1.00 U fsubp %st, %st(1) +# CHECK-NEXT: 1 3 1.00 U fsubp %st, %st(2) # CHECK-NEXT: 1 6 1.00 * U fisubs (%ecx) # CHECK-NEXT: 1 6 1.00 * U fisubl (%eax) -# CHECK-NEXT: 1 3 1.00 U fsubr %st(0), %st(1) -# CHECK-NEXT: 1 3 1.00 U fsubr %st(2) +# CHECK-NEXT: 1 3 1.00 U fsubr %st, %st(1) +# CHECK-NEXT: 1 3 1.00 U fsubr %st(2), %st # CHECK-NEXT: 1 6 1.00 * U fsubrs (%ecx) # CHECK-NEXT: 1 6 1.00 * U fsubrl (%eax) -# CHECK-NEXT: 1 3 1.00 U fsubrp %st(1) -# CHECK-NEXT: 1 3 1.00 U fsubrp %st(2) +# CHECK-NEXT: 1 3 1.00 U fsubrp %st, %st(1) +# CHECK-NEXT: 1 3 1.00 U fsubrp %st, %st(2) # CHECK-NEXT: 1 6 1.00 * U fisubrs (%ecx) # CHECK-NEXT: 1 6 1.00 * U fisubrl (%eax) # CHECK-NEXT: 1 3 1.00 U ftst @@ -343,8 +343,8 @@ fyl2xp1 # CHECK-NEXT: 1 3 1.00 U fucomp %st(1) # CHECK-NEXT: 1 3 1.00 U fucomp %st(3) # CHECK-NEXT: 1 3 1.00 U fucompp -# CHECK-NEXT: 1 3 1.00 U fucomi %st(3) -# CHECK-NEXT: 1 3 1.00 U fucompi %st(3) +# CHECK-NEXT: 1 3 1.00 U fucomi %st(3), %st +# CHECK-NEXT: 1 3 1.00 U fucompi %st(3), %st # CHECK-NEXT: 1 100 1.00 U wait # CHECK-NEXT: 1 100 1.00 U fxam # CHECK-NEXT: 1 1 0.50 U fxch %st(1) @@ -373,26 +373,26 @@ fyl2xp1 # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions: # CHECK-NEXT: - - - 1.00 - - - - f2xm1 # CHECK-NEXT: - - - 0.50 0.50 - - - fabs -# CHECK-NEXT: - - - - 1.00 - - - fadd %st(0), %st(1) -# CHECK-NEXT: - - - - 1.00 - - - fadd %st(2) +# CHECK-NEXT: - - - - 1.00 - - - fadd %st, %st(1) +# CHECK-NEXT: - - - - 1.00 - - - fadd %st(2), %st # CHECK-NEXT: - - - - 1.00 - - 1.00 fadds (%ecx) # CHECK-NEXT: - - - - 1.00 - - 1.00 faddl (%ecx) -# CHECK-NEXT: - - - - 1.00 - - - faddp %st(1) -# CHECK-NEXT: - - - - 1.00 - - - faddp %st(2) +# CHECK-NEXT: - - - - 1.00 - - - faddp %st, %st(1) +# CHECK-NEXT: - - - - 1.00 - - - faddp %st, %st(2) # CHECK-NEXT: - - - - 1.00 - - 1.00 fiadds (%ecx) # CHECK-NEXT: - - - - 1.00 - - 1.00 fiaddl (%ecx) # CHECK-NEXT: - - - 1.00 - - - - fbld (%ecx) # CHECK-NEXT: - - - 1.00 - - - - fbstp (%eax) # CHECK-NEXT: - - - 0.50 0.50 - - - fchs # CHECK-NEXT: - - - 1.00 - - - - fnclex -# CHECK-NEXT: - - - - 1.00 - - - fcmovb %st(1), %st(0) -# CHECK-NEXT: - - - - 1.00 - - - fcmovbe %st(1), %st(0) -# CHECK-NEXT: - - - - 1.00 - - - fcmove %st(1), %st(0) -# CHECK-NEXT: - - - - 1.00 - - - fcmovnb %st(1), %st(0) -# CHECK-NEXT: - - - - 1.00 - - - fcmovnbe %st(1), %st(0) -# CHECK-NEXT: - - - - 1.00 - - - fcmovne %st(1), %st(0) -# CHECK-NEXT: - - - - 1.00 - - - fcmovnu %st(1), %st(0) -# CHECK-NEXT: - - - - 1.00 - - - fcmovu %st(1), %st(0) +# CHECK-NEXT: - - - - 1.00 - - - fcmovb %st(1), %st +# CHECK-NEXT: - - - - 1.00 - - - fcmovbe %st(1), %st +# CHECK-NEXT: - - - - 1.00 - - - fcmove %st(1), %st +# CHECK-NEXT: - - - - 1.00 - - - fcmovnb %st(1), %st +# CHECK-NEXT: - - - - 1.00 - - - fcmovnbe %st(1), %st +# CHECK-NEXT: - - - - 1.00 - - - fcmovne %st(1), %st +# CHECK-NEXT: - - - - 1.00 - - - fcmovnu %st(1), %st +# CHECK-NEXT: - - - - 1.00 - - - fcmovu %st(1), %st # CHECK-NEXT: - - - - 1.00 - - - fcom %st(1) # CHECK-NEXT: - - - - 1.00 - - - fcom %st(3) # CHECK-NEXT: - - - - 1.00 - - 1.00 fcoms (%ecx) @@ -402,24 +402,24 @@ fyl2xp1 # CHECK-NEXT: - - - - 1.00 - - 1.00 fcomps (%ecx) # CHECK-NEXT: - - - - 1.00 - - 1.00 fcompl (%eax) # CHECK-NEXT: - - - 1.00 - - - - fcompp -# CHECK-NEXT: - - - - 1.00 - - - fcomi %st(3) -# CHECK-NEXT: - - - - 1.00 - - - fcompi %st(3) +# CHECK-NEXT: - - - - 1.00 - - - fcomi %st(3), %st +# CHECK-NEXT: - - - - 1.00 - - - fcompi %st(3), %st # CHECK-NEXT: - - - 1.00 - - - - fcos # CHECK-NEXT: - - - 1.00 - - - - fdecstp -# CHECK-NEXT: - 17.00 - 1.00 - - - - fdiv %st(0), %st(1) -# CHECK-NEXT: - 17.00 - 1.00 - - - - fdiv %st(2) +# CHECK-NEXT: - 17.00 - 1.00 - - - - fdiv %st, %st(1) +# CHECK-NEXT: - 17.00 - 1.00 - - - - fdiv %st(2), %st # CHECK-NEXT: - 17.00 - 1.00 - - - 1.00 fdivs (%ecx) # CHECK-NEXT: - 17.00 - 1.00 - - - 1.00 fdivl (%eax) -# CHECK-NEXT: - 17.00 - 1.00 - - - - fdivp %st(1) -# CHECK-NEXT: - 17.00 - 1.00 - - - - fdivp %st(2) +# CHECK-NEXT: - 17.00 - 1.00 - - - - fdivp %st, %st(1) +# CHECK-NEXT: - 17.00 - 1.00 - - - - fdivp %st, %st(2) # CHECK-NEXT: - 17.00 - 1.00 - - - 1.00 fidivs (%ecx) # CHECK-NEXT: - 17.00 - 1.00 - - - 1.00 fidivl (%eax) -# CHECK-NEXT: - 17.00 - 1.00 - - - - fdivr %st(0), %st(1) -# CHECK-NEXT: - 17.00 - 1.00 - - - - fdivr %st(2) +# CHECK-NEXT: - 17.00 - 1.00 - - - - fdivr %st, %st(1) +# CHECK-NEXT: - 17.00 - 1.00 - - - - fdivr %st(2), %st # CHECK-NEXT: - 17.00 - 1.00 - - - 1.00 fdivrs (%ecx) # CHECK-NEXT: - 17.00 - 1.00 - - - 1.00 fdivrl (%eax) -# CHECK-NEXT: - 17.00 - 1.00 - - - - fdivrp %st(1) -# CHECK-NEXT: - 17.00 - 1.00 - - - - fdivrp %st(2) +# CHECK-NEXT: - 17.00 - 1.00 - - - - fdivrp %st, %st(1) +# CHECK-NEXT: - 17.00 - 1.00 - - - - fdivrp %st, %st(2) # CHECK-NEXT: - 17.00 - 1.00 - - - 1.00 fidivrs (%ecx) # CHECK-NEXT: - 17.00 - 1.00 - - - 1.00 fidivrl (%eax) # CHECK-NEXT: - - - 1.00 - - - - ffree %st(0) @@ -453,12 +453,12 @@ fyl2xp1 # CHECK-NEXT: - - - 1.00 1.00 - - - fldln2 # CHECK-NEXT: - - - 1.00 1.00 - - - fldpi # CHECK-NEXT: - - - 0.50 0.50 - - - fldz -# CHECK-NEXT: - - 2.00 1.00 - - - - fmul %st(0), %st(1) -# CHECK-NEXT: - - 2.00 1.00 - - - - fmul %st(2) +# CHECK-NEXT: - - 2.00 1.00 - - - - fmul %st, %st(1) +# CHECK-NEXT: - - 2.00 1.00 - - - - fmul %st(2), %st # CHECK-NEXT: - - 2.00 1.00 - - - 1.00 fmuls (%ecx) # CHECK-NEXT: - - 2.00 1.00 - - - 1.00 fmull (%eax) -# CHECK-NEXT: - - 2.00 1.00 - - - - fmulp %st(1) -# CHECK-NEXT: - - 2.00 1.00 - - - - fmulp %st(2) +# CHECK-NEXT: - - 2.00 1.00 - - - - fmulp %st, %st(1) +# CHECK-NEXT: - - 2.00 1.00 - - - - fmulp %st, %st(2) # CHECK-NEXT: - - 2.00 1.00 - - - 1.00 fimuls (%ecx) # CHECK-NEXT: - - 2.00 1.00 - - - 1.00 fimull (%eax) # CHECK-NEXT: - - - - - - - - fnop @@ -486,20 +486,20 @@ fyl2xp1 # CHECK-NEXT: - - - 1.00 - - - - frstor (%eax) # CHECK-NEXT: - - - 1.00 - - - - wait # CHECK-NEXT: - - - 1.00 - - - - fnsave (%eax) -# CHECK-NEXT: - - - - 1.00 - - - fsub %st(0), %st(1) -# CHECK-NEXT: - - - - 1.00 - - - fsub %st(2) +# CHECK-NEXT: - - - - 1.00 - - - fsub %st, %st(1) +# CHECK-NEXT: - - - - 1.00 - - - fsub %st(2), %st # CHECK-NEXT: - - - - 1.00 - - 1.00 fsubs (%ecx) # CHECK-NEXT: - - - - 1.00 - - 1.00 fsubl (%eax) -# CHECK-NEXT: - - - - 1.00 - - - fsubp %st(1) -# CHECK-NEXT: - - - - 1.00 - - - fsubp %st(2) +# CHECK-NEXT: - - - - 1.00 - - - fsubp %st, %st(1) +# CHECK-NEXT: - - - - 1.00 - - - fsubp %st, %st(2) # CHECK-NEXT: - - - - 1.00 - - 1.00 fisubs (%ecx) # CHECK-NEXT: - - - - 1.00 - - 1.00 fisubl (%eax) -# CHECK-NEXT: - - - - 1.00 - - - fsubr %st(0), %st(1) -# CHECK-NEXT: - - - - 1.00 - - - fsubr %st(2) +# CHECK-NEXT: - - - - 1.00 - - - fsubr %st, %st(1) +# CHECK-NEXT: - - - - 1.00 - - - fsubr %st(2), %st # CHECK-NEXT: - - - - 1.00 - - 1.00 fsubrs (%ecx) # CHECK-NEXT: - - - - 1.00 - - 1.00 fsubrl (%eax) -# CHECK-NEXT: - - - - 1.00 - - - fsubrp %st(1) -# CHECK-NEXT: - - - - 1.00 - - - fsubrp %st(2) +# CHECK-NEXT: - - - - 1.00 - - - fsubrp %st, %st(1) +# CHECK-NEXT: - - - - 1.00 - - - fsubrp %st, %st(2) # CHECK-NEXT: - - - - 1.00 - - 1.00 fisubrs (%ecx) # CHECK-NEXT: - - - - 1.00 - - 1.00 fisubrl (%eax) # CHECK-NEXT: - - - - 1.00 - - - ftst @@ -508,8 +508,8 @@ fyl2xp1 # CHECK-NEXT: - - - - 1.00 - - - fucomp %st(1) # CHECK-NEXT: - - - - 1.00 - - - fucomp %st(3) # CHECK-NEXT: - - - - 1.00 - - - fucompp -# CHECK-NEXT: - - - - 1.00 - - - fucomi %st(3) -# CHECK-NEXT: - - - - 1.00 - - - fucompi %st(3) +# CHECK-NEXT: - - - - 1.00 - - - fucomi %st(3), %st +# CHECK-NEXT: - - - - 1.00 - - - fucompi %st(3), %st # CHECK-NEXT: - - - 1.00 - - - - wait # CHECK-NEXT: - - - 1.00 - - - - fxam # CHECK-NEXT: - - - - - 0.50 0.50 - fxch %st(1) diff --git a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-x87.s b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-x87.s index 332f365f1fdd3a..1bed53326ced68 100644 --- a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-x87.s +++ b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-x87.s @@ -5,7 +5,7 @@ f2xm1 fabs -fadd %st(0), %st(1) +fadd %st, %st(1) fadd %st(2) fadds (%ecx) faddl (%ecx) @@ -21,14 +21,14 @@ fchs fnclex -fcmovb %st(1), %st(0) -fcmovbe %st(1), %st(0) -fcmove %st(1), %st(0) -fcmovnb %st(1), %st(0) -fcmovnbe %st(1), %st(0) -fcmovne %st(1), %st(0) -fcmovnu %st(1), %st(0) -fcmovu %st(1), %st(0) +fcmovb %st(1), %st +fcmovbe %st(1), %st +fcmove %st(1), %st +fcmovnb %st(1), %st +fcmovnbe %st(1), %st +fcmovne %st(1), %st +fcmovnu %st(1), %st +fcmovu %st(1), %st fcom %st(1) fcom %st(3) @@ -47,7 +47,7 @@ fcos fdecstp -fdiv %st(0), %st(1) +fdiv %st, %st(1) fdiv %st(2) fdivs (%ecx) fdivl (%eax) @@ -56,7 +56,7 @@ fdivp %st(2) fidivs (%ecx) fidivl (%eax) -fdivr %st(0), %st(1) +fdivr %st, %st(1) fdivr %st(2) fdivrs (%ecx) fdivrl (%eax) @@ -106,7 +106,7 @@ fldln2 fldpi fldz -fmul %st(0), %st(1) +fmul %st, %st(1) fmul %st(2) fmuls (%ecx) fmull (%eax) @@ -153,7 +153,7 @@ fnstsw (%eax) frstor (%eax) fsave (%eax) -fsub %st(0), %st(1) +fsub %st, %st(1) fsub %st(2) fsubs (%ecx) fsubl (%eax) @@ -162,7 +162,7 @@ fsubp %st(2) fisubs (%ecx) fisubl (%eax) -fsubr %st(0), %st(1) +fsubr %st, %st(1) fsubr %st(2) fsubrs (%ecx) fsubrl (%eax) @@ -208,26 +208,26 @@ fyl2xp1 # CHECK: [1] [2] [3] [4] [5] [6] Instructions: # CHECK-NEXT: 1 100 0.33 U f2xm1 # CHECK-NEXT: 1 1 1.00 U fabs -# CHECK-NEXT: 1 3 1.00 U fadd %st(0), %st(1) -# CHECK-NEXT: 1 3 1.00 U fadd %st(2) +# CHECK-NEXT: 1 3 1.00 U fadd %st, %st(1) +# CHECK-NEXT: 1 3 1.00 U fadd %st(2), %st # CHECK-NEXT: 2 10 1.00 * U fadds (%ecx) # CHECK-NEXT: 2 10 1.00 * U faddl (%ecx) -# CHECK-NEXT: 1 3 1.00 U faddp %st(1) -# CHECK-NEXT: 1 3 1.00 U faddp %st(2) +# CHECK-NEXT: 1 3 1.00 U faddp %st, %st(1) +# CHECK-NEXT: 1 3 1.00 U faddp %st, %st(2) # CHECK-NEXT: 3 13 2.00 * U fiadds (%ecx) # CHECK-NEXT: 3 13 2.00 * U fiaddl (%ecx) # CHECK-NEXT: 1 100 0.33 U fbld (%ecx) # CHECK-NEXT: 1 100 0.33 U fbstp (%eax) # CHECK-NEXT: 1 1 1.00 U fchs # CHECK-NEXT: 1 100 0.33 U fnclex -# CHECK-NEXT: 3 3 2.00 U fcmovb %st(1), %st(0) -# CHECK-NEXT: 3 3 2.00 U fcmovbe %st(1), %st(0) -# CHECK-NEXT: 3 3 2.00 U fcmove %st(1), %st(0) -# CHECK-NEXT: 3 3 2.00 U fcmovnb %st(1), %st(0) -# CHECK-NEXT: 3 3 2.00 U fcmovnbe %st(1), %st(0) -# CHECK-NEXT: 3 3 2.00 U fcmovne %st(1), %st(0) -# CHECK-NEXT: 3 3 2.00 U fcmovnu %st(1), %st(0) -# CHECK-NEXT: 3 3 2.00 U fcmovu %st(1), %st(0) +# CHECK-NEXT: 3 3 2.00 U fcmovb %st(1), %st +# CHECK-NEXT: 3 3 2.00 U fcmovbe %st(1), %st +# CHECK-NEXT: 3 3 2.00 U fcmove %st(1), %st +# CHECK-NEXT: 3 3 2.00 U fcmovnb %st(1), %st +# CHECK-NEXT: 3 3 2.00 U fcmovnbe %st(1), %st +# CHECK-NEXT: 3 3 2.00 U fcmovne %st(1), %st +# CHECK-NEXT: 3 3 2.00 U fcmovnu %st(1), %st +# CHECK-NEXT: 3 3 2.00 U fcmovu %st(1), %st # CHECK-NEXT: 1 1 1.00 U fcom %st(1) # CHECK-NEXT: 1 1 1.00 U fcom %st(3) # CHECK-NEXT: 2 8 1.00 U fcoms (%ecx) @@ -237,24 +237,24 @@ fyl2xp1 # CHECK-NEXT: 2 8 1.00 U fcomps (%ecx) # CHECK-NEXT: 2 8 1.00 U fcompl (%eax) # CHECK-NEXT: 1 100 0.33 U fcompp -# CHECK-NEXT: 3 3 1.00 U fcomi %st(3) -# CHECK-NEXT: 3 3 1.00 U fcompi %st(3) +# CHECK-NEXT: 3 3 1.00 U fcomi %st(3), %st +# CHECK-NEXT: 3 3 1.00 U fcompi %st(3), %st # CHECK-NEXT: 1 100 0.33 U fcos # CHECK-NEXT: 1 1 1.00 U fdecstp -# CHECK-NEXT: 1 14 14.00 U fdiv %st(0), %st(1) -# CHECK-NEXT: 1 14 14.00 U fdiv %st(2) +# CHECK-NEXT: 1 14 14.00 U fdiv %st, %st(1) +# CHECK-NEXT: 1 14 14.00 U fdiv %st(2), %st # CHECK-NEXT: 2 31 1.00 * U fdivs (%ecx) # CHECK-NEXT: 2 31 1.00 * U fdivl (%eax) -# CHECK-NEXT: 1 14 14.00 U fdivp %st(1) -# CHECK-NEXT: 1 14 14.00 U fdivp %st(2) +# CHECK-NEXT: 1 14 14.00 U fdivp %st, %st(1) +# CHECK-NEXT: 1 14 14.00 U fdivp %st, %st(2) # CHECK-NEXT: 3 34 1.00 * U fidivs (%ecx) # CHECK-NEXT: 3 34 1.00 * U fidivl (%eax) -# CHECK-NEXT: 1 14 14.00 U fdivr %st(0), %st(1) -# CHECK-NEXT: 1 14 14.00 U fdivr %st(2) +# CHECK-NEXT: 1 14 14.00 U fdivr %st, %st(1) +# CHECK-NEXT: 1 14 14.00 U fdivr %st(2), %st # CHECK-NEXT: 2 31 1.00 * U fdivrs (%ecx) # CHECK-NEXT: 2 31 1.00 * U fdivrl (%eax) -# CHECK-NEXT: 1 14 14.00 U fdivrp %st(1) -# CHECK-NEXT: 1 14 14.00 U fdivrp %st(2) +# CHECK-NEXT: 1 14 14.00 U fdivrp %st, %st(1) +# CHECK-NEXT: 1 14 14.00 U fdivrp %st, %st(2) # CHECK-NEXT: 3 34 1.00 * U fidivrs (%ecx) # CHECK-NEXT: 3 34 1.00 * U fidivrl (%eax) # CHECK-NEXT: 1 1 1.00 U ffree %st(0) @@ -288,12 +288,12 @@ fyl2xp1 # CHECK-NEXT: 2 1 1.00 U fldln2 # CHECK-NEXT: 2 1 1.00 U fldpi # CHECK-NEXT: 1 1 1.00 U fldz -# CHECK-NEXT: 1 5 1.00 U fmul %st(0), %st(1) -# CHECK-NEXT: 1 5 1.00 U fmul %st(2) +# CHECK-NEXT: 1 5 1.00 U fmul %st, %st(1) +# CHECK-NEXT: 1 5 1.00 U fmul %st(2), %st # CHECK-NEXT: 2 12 1.00 * U fmuls (%ecx) # CHECK-NEXT: 2 12 1.00 * U fmull (%eax) -# CHECK-NEXT: 1 5 1.00 U fmulp %st(1) -# CHECK-NEXT: 1 5 1.00 U fmulp %st(2) +# CHECK-NEXT: 1 5 1.00 U fmulp %st, %st(1) +# CHECK-NEXT: 1 5 1.00 U fmulp %st, %st(2) # CHECK-NEXT: 3 15 1.00 * U fimuls (%ecx) # CHECK-NEXT: 3 15 1.00 * U fimull (%eax) # CHECK-NEXT: 1 1 1.00 U fnop @@ -321,20 +321,20 @@ fyl2xp1 # CHECK-NEXT: 1 100 0.33 U frstor (%eax) # CHECK-NEXT: 1 100 0.33 U wait # CHECK-NEXT: 1 100 0.33 U fnsave (%eax) -# CHECK-NEXT: 1 3 1.00 U fsub %st(0), %st(1) -# CHECK-NEXT: 1 3 1.00 U fsub %st(2) +# CHECK-NEXT: 1 3 1.00 U fsub %st, %st(1) +# CHECK-NEXT: 1 3 1.00 U fsub %st(2), %st # CHECK-NEXT: 2 10 1.00 * U fsubs (%ecx) # CHECK-NEXT: 2 10 1.00 * U fsubl (%eax) -# CHECK-NEXT: 1 3 1.00 U fsubp %st(1) -# CHECK-NEXT: 1 3 1.00 U fsubp %st(2) +# CHECK-NEXT: 1 3 1.00 U fsubp %st, %st(1) +# CHECK-NEXT: 1 3 1.00 U fsubp %st, %st(2) # CHECK-NEXT: 3 13 2.00 * U fisubs (%ecx) # CHECK-NEXT: 3 13 2.00 * U fisubl (%eax) -# CHECK-NEXT: 1 3 1.00 U fsubr %st(0), %st(1) -# CHECK-NEXT: 1 3 1.00 U fsubr %st(2) +# CHECK-NEXT: 1 3 1.00 U fsubr %st, %st(1) +# CHECK-NEXT: 1 3 1.00 U fsubr %st(2), %st # CHECK-NEXT: 2 10 1.00 * U fsubrs (%ecx) # CHECK-NEXT: 2 10 1.00 * U fsubrl (%eax) -# CHECK-NEXT: 1 3 1.00 U fsubrp %st(1) -# CHECK-NEXT: 1 3 1.00 U fsubrp %st(2) +# CHECK-NEXT: 1 3 1.00 U fsubrp %st, %st(1) +# CHECK-NEXT: 1 3 1.00 U fsubrp %st, %st(2) # CHECK-NEXT: 3 13 2.00 * U fisubrs (%ecx) # CHECK-NEXT: 3 13 2.00 * U fisubrl (%eax) # CHECK-NEXT: 1 3 1.00 U ftst @@ -343,8 +343,8 @@ fyl2xp1 # CHECK-NEXT: 1 1 1.00 U fucomp %st(1) # CHECK-NEXT: 1 1 1.00 U fucomp %st(3) # CHECK-NEXT: 1 3 1.00 U fucompp -# CHECK-NEXT: 3 3 1.00 U fucomi %st(3) -# CHECK-NEXT: 3 3 1.00 U fucompi %st(3) +# CHECK-NEXT: 3 3 1.00 U fucomi %st(3), %st +# CHECK-NEXT: 3 3 1.00 U fucompi %st(3), %st # CHECK-NEXT: 1 100 0.33 U wait # CHECK-NEXT: 1 100 0.33 U fxam # CHECK-NEXT: 1 1 0.33 U fxch %st(1) @@ -373,26 +373,26 @@ fyl2xp1 # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: # CHECK-NEXT: - - 0.33 0.33 - 0.33 - - f2xm1 # CHECK-NEXT: - - - - - 1.00 - - fabs -# CHECK-NEXT: - - - 1.00 - - - - fadd %st(0), %st(1) -# CHECK-NEXT: - - - 1.00 - - - - fadd %st(2) +# CHECK-NEXT: - - - 1.00 - - - - fadd %st, %st(1) +# CHECK-NEXT: - - - 1.00 - - - - fadd %st(2), %st # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 fadds (%ecx) # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 faddl (%ecx) -# CHECK-NEXT: - - - 1.00 - - - - faddp %st(1) -# CHECK-NEXT: - - - 1.00 - - - - faddp %st(2) +# CHECK-NEXT: - - - 1.00 - - - - faddp %st, %st(1) +# CHECK-NEXT: - - - 1.00 - - - - faddp %st, %st(2) # CHECK-NEXT: - - - 2.00 - - 0.50 0.50 fiadds (%ecx) # CHECK-NEXT: - - - 2.00 - - 0.50 0.50 fiaddl (%ecx) # CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fbld (%ecx) # CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fbstp (%eax) # CHECK-NEXT: - - - - - 1.00 - - fchs # CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fnclex -# CHECK-NEXT: - - 0.50 - - 2.50 - - fcmovb %st(1), %st(0) -# CHECK-NEXT: - - 0.50 - - 2.50 - - fcmovbe %st(1), %st(0) -# CHECK-NEXT: - - 0.50 - - 2.50 - - fcmove %st(1), %st(0) -# CHECK-NEXT: - - 0.50 - - 2.50 - - fcmovnb %st(1), %st(0) -# CHECK-NEXT: - - 0.50 - - 2.50 - - fcmovnbe %st(1), %st(0) -# CHECK-NEXT: - - 0.50 - - 2.50 - - fcmovne %st(1), %st(0) -# CHECK-NEXT: - - 0.50 - - 2.50 - - fcmovnu %st(1), %st(0) -# CHECK-NEXT: - - 0.50 - - 2.50 - - fcmovu %st(1), %st(0) +# CHECK-NEXT: - - 0.50 - - 2.50 - - fcmovb %st(1), %st +# CHECK-NEXT: - - 0.50 - - 2.50 - - fcmovbe %st(1), %st +# CHECK-NEXT: - - 0.50 - - 2.50 - - fcmove %st(1), %st +# CHECK-NEXT: - - 0.50 - - 2.50 - - fcmovnb %st(1), %st +# CHECK-NEXT: - - 0.50 - - 2.50 - - fcmovnbe %st(1), %st +# CHECK-NEXT: - - 0.50 - - 2.50 - - fcmovne %st(1), %st +# CHECK-NEXT: - - 0.50 - - 2.50 - - fcmovnu %st(1), %st +# CHECK-NEXT: - - 0.50 - - 2.50 - - fcmovu %st(1), %st # CHECK-NEXT: - - - 1.00 - - - - fcom %st(1) # CHECK-NEXT: - - - 1.00 - - - - fcom %st(3) # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 fcoms (%ecx) @@ -402,24 +402,24 @@ fyl2xp1 # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 fcomps (%ecx) # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 fcompl (%eax) # CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fcompp -# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - fcomi %st(3) -# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - fcompi %st(3) +# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - fcomi %st(3), %st +# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - fcompi %st(3), %st # CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fcos # CHECK-NEXT: - - - - - 1.00 - - fdecstp -# CHECK-NEXT: - 14.00 1.00 - - - - - fdiv %st(0), %st(1) -# CHECK-NEXT: - 14.00 1.00 - - - - - fdiv %st(2) +# CHECK-NEXT: - 14.00 1.00 - - - - - fdiv %st, %st(1) +# CHECK-NEXT: - 14.00 1.00 - - - - - fdiv %st(2), %st # CHECK-NEXT: - - 1.00 - - - 0.50 0.50 fdivs (%ecx) # CHECK-NEXT: - - 1.00 - - - 0.50 0.50 fdivl (%eax) -# CHECK-NEXT: - 14.00 1.00 - - - - - fdivp %st(1) -# CHECK-NEXT: - 14.00 1.00 - - - - - fdivp %st(2) +# CHECK-NEXT: - 14.00 1.00 - - - - - fdivp %st, %st(1) +# CHECK-NEXT: - 14.00 1.00 - - - - - fdivp %st, %st(2) # CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 fidivs (%ecx) # CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 fidivl (%eax) -# CHECK-NEXT: - 14.00 1.00 - - - - - fdivr %st(0), %st(1) -# CHECK-NEXT: - 14.00 1.00 - - - - - fdivr %st(2) +# CHECK-NEXT: - 14.00 1.00 - - - - - fdivr %st, %st(1) +# CHECK-NEXT: - 14.00 1.00 - - - - - fdivr %st(2), %st # CHECK-NEXT: - - 1.00 - - - 0.50 0.50 fdivrs (%ecx) # CHECK-NEXT: - - 1.00 - - - 0.50 0.50 fdivrl (%eax) -# CHECK-NEXT: - 14.00 1.00 - - - - - fdivrp %st(1) -# CHECK-NEXT: - 14.00 1.00 - - - - - fdivrp %st(2) +# CHECK-NEXT: - 14.00 1.00 - - - - - fdivrp %st, %st(1) +# CHECK-NEXT: - 14.00 1.00 - - - - - fdivrp %st, %st(2) # CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 fidivrs (%ecx) # CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 fidivrl (%eax) # CHECK-NEXT: - - - - - 1.00 - - ffree %st(0) @@ -453,12 +453,12 @@ fyl2xp1 # CHECK-NEXT: - - 1.00 1.00 - - - - fldln2 # CHECK-NEXT: - - 1.00 1.00 - - - - fldpi # CHECK-NEXT: - - - - - 1.00 - - fldz -# CHECK-NEXT: - - 1.00 - - - - - fmul %st(0), %st(1) -# CHECK-NEXT: - - 1.00 - - - - - fmul %st(2) +# CHECK-NEXT: - - 1.00 - - - - - fmul %st, %st(1) +# CHECK-NEXT: - - 1.00 - - - - - fmul %st(2), %st # CHECK-NEXT: - - 1.00 - - - 0.50 0.50 fmuls (%ecx) # CHECK-NEXT: - - 1.00 - - - 0.50 0.50 fmull (%eax) -# CHECK-NEXT: - - 1.00 - - - - - fmulp %st(1) -# CHECK-NEXT: - - 1.00 - - - - - fmulp %st(2) +# CHECK-NEXT: - - 1.00 - - - - - fmulp %st, %st(1) +# CHECK-NEXT: - - 1.00 - - - - - fmulp %st, %st(2) # CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 fimuls (%ecx) # CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 fimull (%eax) # CHECK-NEXT: - - - - - 1.00 - - fnop @@ -486,20 +486,20 @@ fyl2xp1 # CHECK-NEXT: - - 0.33 0.33 - 0.33 - - frstor (%eax) # CHECK-NEXT: - - 0.33 0.33 - 0.33 - - wait # CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fnsave (%eax) -# CHECK-NEXT: - - - 1.00 - - - - fsub %st(0), %st(1) -# CHECK-NEXT: - - - 1.00 - - - - fsub %st(2) +# CHECK-NEXT: - - - 1.00 - - - - fsub %st, %st(1) +# CHECK-NEXT: - - - 1.00 - - - - fsub %st(2), %st # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 fsubs (%ecx) # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 fsubl (%eax) -# CHECK-NEXT: - - - 1.00 - - - - fsubp %st(1) -# CHECK-NEXT: - - - 1.00 - - - - fsubp %st(2) +# CHECK-NEXT: - - - 1.00 - - - - fsubp %st, %st(1) +# CHECK-NEXT: - - - 1.00 - - - - fsubp %st, %st(2) # CHECK-NEXT: - - - 2.00 - - 0.50 0.50 fisubs (%ecx) # CHECK-NEXT: - - - 2.00 - - 0.50 0.50 fisubl (%eax) -# CHECK-NEXT: - - - 1.00 - - - - fsubr %st(0), %st(1) -# CHECK-NEXT: - - - 1.00 - - - - fsubr %st(2) +# CHECK-NEXT: - - - 1.00 - - - - fsubr %st, %st(1) +# CHECK-NEXT: - - - 1.00 - - - - fsubr %st(2), %st # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 fsubrs (%ecx) # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 fsubrl (%eax) -# CHECK-NEXT: - - - 1.00 - - - - fsubrp %st(1) -# CHECK-NEXT: - - - 1.00 - - - - fsubrp %st(2) +# CHECK-NEXT: - - - 1.00 - - - - fsubrp %st, %st(1) +# CHECK-NEXT: - - - 1.00 - - - - fsubrp %st, %st(2) # CHECK-NEXT: - - - 2.00 - - 0.50 0.50 fisubrs (%ecx) # CHECK-NEXT: - - - 2.00 - - 0.50 0.50 fisubrl (%eax) # CHECK-NEXT: - - - 1.00 - - - - ftst @@ -508,8 +508,8 @@ fyl2xp1 # CHECK-NEXT: - - - 1.00 - - - - fucomp %st(1) # CHECK-NEXT: - - - 1.00 - - - - fucomp %st(3) # CHECK-NEXT: - - - 1.00 - - - - fucompp -# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - fucomi %st(3) -# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - fucompi %st(3) +# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - fucomi %st(3), %st +# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - fucompi %st(3), %st # CHECK-NEXT: - - 0.33 0.33 - 0.33 - - wait # CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fxam # CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fxch %st(1) diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-x87.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-x87.s index 7be9d699573ac6..6cd4439a25c1f3 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-x87.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-x87.s @@ -5,7 +5,7 @@ f2xm1 fabs -fadd %st(0), %st(1) +fadd %st, %st(1) fadd %st(2) fadds (%ecx) faddl (%ecx) @@ -21,14 +21,14 @@ fchs fnclex -fcmovb %st(1), %st(0) -fcmovbe %st(1), %st(0) -fcmove %st(1), %st(0) -fcmovnb %st(1), %st(0) -fcmovnbe %st(1), %st(0) -fcmovne %st(1), %st(0) -fcmovnu %st(1), %st(0) -fcmovu %st(1), %st(0) +fcmovb %st(1), %st +fcmovbe %st(1), %st +fcmove %st(1), %st +fcmovnb %st(1), %st +fcmovnbe %st(1), %st +fcmovne %st(1), %st +fcmovnu %st(1), %st +fcmovu %st(1), %st fcom %st(1) fcom %st(3) @@ -47,7 +47,7 @@ fcos fdecstp -fdiv %st(0), %st(1) +fdiv %st, %st(1) fdiv %st(2) fdivs (%ecx) fdivl (%eax) @@ -56,7 +56,7 @@ fdivp %st(2) fidivs (%ecx) fidivl (%eax) -fdivr %st(0), %st(1) +fdivr %st, %st(1) fdivr %st(2) fdivrs (%ecx) fdivrl (%eax) @@ -106,7 +106,7 @@ fldln2 fldpi fldz -fmul %st(0), %st(1) +fmul %st, %st(1) fmul %st(2) fmuls (%ecx) fmull (%eax) @@ -153,7 +153,7 @@ fnstsw (%eax) frstor (%eax) fsave (%eax) -fsub %st(0), %st(1) +fsub %st, %st(1) fsub %st(2) fsubs (%ecx) fsubl (%eax) @@ -162,7 +162,7 @@ fsubp %st(2) fisubs (%ecx) fisubl (%eax) -fsubr %st(0), %st(1) +fsubr %st, %st(1) fsubr %st(2) fsubrs (%ecx) fsubrl (%eax) @@ -208,26 +208,26 @@ fyl2xp1 # CHECK: [1] [2] [3] [4] [5] [6] Instructions: # CHECK-NEXT: 1 100 0.25 U f2xm1 # CHECK-NEXT: 1 1 1.00 U fabs -# CHECK-NEXT: 1 3 1.00 U fadd %st(0), %st(1) -# CHECK-NEXT: 1 3 1.00 U fadd %st(2) +# CHECK-NEXT: 1 3 1.00 U fadd %st, %st(1) +# CHECK-NEXT: 1 3 1.00 U fadd %st(2), %st # CHECK-NEXT: 2 10 1.00 * U fadds (%ecx) # CHECK-NEXT: 2 10 1.00 * U faddl (%ecx) -# CHECK-NEXT: 1 3 1.00 U faddp %st(1) -# CHECK-NEXT: 1 3 1.00 U faddp %st(2) +# CHECK-NEXT: 1 3 1.00 U faddp %st, %st(1) +# CHECK-NEXT: 1 3 1.00 U faddp %st, %st(2) # CHECK-NEXT: 3 13 2.00 * U fiadds (%ecx) # CHECK-NEXT: 3 13 2.00 * U fiaddl (%ecx) # CHECK-NEXT: 1 100 0.25 U fbld (%ecx) # CHECK-NEXT: 2 1 1.00 U fbstp (%eax) # CHECK-NEXT: 1 1 1.00 U fchs # CHECK-NEXT: 4 4 1.00 U fnclex -# CHECK-NEXT: 1 3 1.00 U fcmovb %st(1), %st(0) -# CHECK-NEXT: 1 3 1.00 U fcmovbe %st(1), %st(0) -# CHECK-NEXT: 1 3 1.00 U fcmove %st(1), %st(0) -# CHECK-NEXT: 1 3 1.00 U fcmovnb %st(1), %st(0) -# CHECK-NEXT: 1 3 1.00 U fcmovnbe %st(1), %st(0) -# CHECK-NEXT: 1 3 1.00 U fcmovne %st(1), %st(0) -# CHECK-NEXT: 1 3 1.00 U fcmovnu %st(1), %st(0) -# CHECK-NEXT: 1 3 1.00 U fcmovu %st(1), %st(0) +# CHECK-NEXT: 1 3 1.00 U fcmovb %st(1), %st +# CHECK-NEXT: 1 3 1.00 U fcmovbe %st(1), %st +# CHECK-NEXT: 1 3 1.00 U fcmove %st(1), %st +# CHECK-NEXT: 1 3 1.00 U fcmovnb %st(1), %st +# CHECK-NEXT: 1 3 1.00 U fcmovnbe %st(1), %st +# CHECK-NEXT: 1 3 1.00 U fcmovne %st(1), %st +# CHECK-NEXT: 1 3 1.00 U fcmovnu %st(1), %st +# CHECK-NEXT: 1 3 1.00 U fcmovu %st(1), %st # CHECK-NEXT: 1 1 1.00 U fcom %st(1) # CHECK-NEXT: 1 1 1.00 U fcom %st(3) # CHECK-NEXT: 2 8 1.00 U fcoms (%ecx) @@ -237,24 +237,24 @@ fyl2xp1 # CHECK-NEXT: 2 8 1.00 U fcomps (%ecx) # CHECK-NEXT: 2 8 1.00 U fcompl (%eax) # CHECK-NEXT: 1 100 0.25 U fcompp -# CHECK-NEXT: 1 2 1.00 U fcomi %st(3) -# CHECK-NEXT: 1 2 1.00 U fcompi %st(3) +# CHECK-NEXT: 1 2 1.00 U fcomi %st(3), %st +# CHECK-NEXT: 1 2 1.00 U fcompi %st(3), %st # CHECK-NEXT: 1 100 0.25 U fcos # CHECK-NEXT: 2 2 1.00 U fdecstp -# CHECK-NEXT: 1 15 1.00 U fdiv %st(0), %st(1) -# CHECK-NEXT: 1 20 1.00 U fdiv %st(2) +# CHECK-NEXT: 1 15 1.00 U fdiv %st, %st(1) +# CHECK-NEXT: 1 20 1.00 U fdiv %st(2), %st # CHECK-NEXT: 2 22 1.00 * U fdivs (%ecx) # CHECK-NEXT: 2 22 1.00 * U fdivl (%eax) -# CHECK-NEXT: 1 15 1.00 U fdivp %st(1) -# CHECK-NEXT: 1 15 1.00 U fdivp %st(2) +# CHECK-NEXT: 1 15 1.00 U fdivp %st, %st(1) +# CHECK-NEXT: 1 15 1.00 U fdivp %st, %st(2) # CHECK-NEXT: 3 25 1.00 * U fidivs (%ecx) # CHECK-NEXT: 3 25 1.00 * U fidivl (%eax) -# CHECK-NEXT: 1 20 1.00 U fdivr %st(0), %st(1) -# CHECK-NEXT: 1 15 1.00 U fdivr %st(2) +# CHECK-NEXT: 1 20 1.00 U fdivr %st, %st(1) +# CHECK-NEXT: 1 15 1.00 U fdivr %st(2), %st # CHECK-NEXT: 2 27 1.00 * U fdivrs (%ecx) # CHECK-NEXT: 2 27 1.00 * U fdivrl (%eax) -# CHECK-NEXT: 1 20 1.00 U fdivrp %st(1) -# CHECK-NEXT: 1 20 1.00 U fdivrp %st(2) +# CHECK-NEXT: 1 20 1.00 U fdivrp %st, %st(1) +# CHECK-NEXT: 1 20 1.00 U fdivrp %st, %st(2) # CHECK-NEXT: 3 30 1.00 * U fidivrs (%ecx) # CHECK-NEXT: 3 30 1.00 * U fidivrl (%eax) # CHECK-NEXT: 1 100 0.25 U ffree %st(0) @@ -288,12 +288,12 @@ fyl2xp1 # CHECK-NEXT: 2 1 1.00 U fldln2 # CHECK-NEXT: 2 1 1.00 U fldpi # CHECK-NEXT: 1 1 0.50 U fldz -# CHECK-NEXT: 1 4 1.00 U fmul %st(0), %st(1) -# CHECK-NEXT: 1 4 1.00 U fmul %st(2) +# CHECK-NEXT: 1 4 1.00 U fmul %st, %st(1) +# CHECK-NEXT: 1 4 1.00 U fmul %st(2), %st # CHECK-NEXT: 2 11 1.00 * U fmuls (%ecx) # CHECK-NEXT: 2 11 1.00 * U fmull (%eax) -# CHECK-NEXT: 1 4 1.00 U fmulp %st(1) -# CHECK-NEXT: 1 4 1.00 U fmulp %st(2) +# CHECK-NEXT: 1 4 1.00 U fmulp %st, %st(1) +# CHECK-NEXT: 1 4 1.00 U fmulp %st, %st(2) # CHECK-NEXT: 3 14 1.00 * U fimuls (%ecx) # CHECK-NEXT: 3 14 1.00 * U fimull (%eax) # CHECK-NEXT: 1 1 0.50 U fnop @@ -321,20 +321,20 @@ fyl2xp1 # CHECK-NEXT: 1 100 0.25 U frstor (%eax) # CHECK-NEXT: 2 2 0.50 U wait # CHECK-NEXT: 1 100 0.25 U fnsave (%eax) -# CHECK-NEXT: 1 3 1.00 U fsub %st(0), %st(1) -# CHECK-NEXT: 1 3 1.00 U fsub %st(2) +# CHECK-NEXT: 1 3 1.00 U fsub %st, %st(1) +# CHECK-NEXT: 1 3 1.00 U fsub %st(2), %st # CHECK-NEXT: 2 10 1.00 * U fsubs (%ecx) # CHECK-NEXT: 2 10 1.00 * U fsubl (%eax) -# CHECK-NEXT: 1 3 1.00 U fsubp %st(1) -# CHECK-NEXT: 1 3 1.00 U fsubp %st(2) +# CHECK-NEXT: 1 3 1.00 U fsubp %st, %st(1) +# CHECK-NEXT: 1 3 1.00 U fsubp %st, %st(2) # CHECK-NEXT: 3 13 2.00 * U fisubs (%ecx) # CHECK-NEXT: 3 13 2.00 * U fisubl (%eax) -# CHECK-NEXT: 1 3 1.00 U fsubr %st(0), %st(1) -# CHECK-NEXT: 1 3 1.00 U fsubr %st(2) +# CHECK-NEXT: 1 3 1.00 U fsubr %st, %st(1) +# CHECK-NEXT: 1 3 1.00 U fsubr %st(2), %st # CHECK-NEXT: 2 10 1.00 * U fsubrs (%ecx) # CHECK-NEXT: 2 10 1.00 * U fsubrl (%eax) -# CHECK-NEXT: 1 3 1.00 U fsubrp %st(1) -# CHECK-NEXT: 1 3 1.00 U fsubrp %st(2) +# CHECK-NEXT: 1 3 1.00 U fsubrp %st, %st(1) +# CHECK-NEXT: 1 3 1.00 U fsubrp %st, %st(2) # CHECK-NEXT: 3 13 2.00 * U fisubrs (%ecx) # CHECK-NEXT: 3 13 2.00 * U fisubrl (%eax) # CHECK-NEXT: 1 2 1.00 U ftst @@ -343,8 +343,8 @@ fyl2xp1 # CHECK-NEXT: 1 1 1.00 U fucomp %st(1) # CHECK-NEXT: 1 1 1.00 U fucomp %st(3) # CHECK-NEXT: 1 2 1.00 U fucompp -# CHECK-NEXT: 1 2 1.00 U fucomi %st(3) -# CHECK-NEXT: 1 2 1.00 U fucompi %st(3) +# CHECK-NEXT: 1 2 1.00 U fucomi %st(3), %st +# CHECK-NEXT: 1 2 1.00 U fucompi %st(3), %st # CHECK-NEXT: 2 2 0.50 U wait # CHECK-NEXT: 1 100 0.25 U fxam # CHECK-NEXT: 15 17 4.00 U fxch %st(1) @@ -375,26 +375,26 @@ fyl2xp1 # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: # CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - f2xm1 # CHECK-NEXT: - - 1.00 - - - - - - - fabs -# CHECK-NEXT: - - - - - - - 1.00 - - fadd %st(0), %st(1) -# CHECK-NEXT: - - - - - - - 1.00 - - fadd %st(2) +# CHECK-NEXT: - - - - - - - 1.00 - - fadd %st, %st(1) +# CHECK-NEXT: - - - - - - - 1.00 - - fadd %st(2), %st # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - fadds (%ecx) # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - faddl (%ecx) -# CHECK-NEXT: - - - - - - - 1.00 - - faddp %st(1) -# CHECK-NEXT: - - - - - - - 1.00 - - faddp %st(2) +# CHECK-NEXT: - - - - - - - 1.00 - - faddp %st, %st(1) +# CHECK-NEXT: - - - - - - - 1.00 - - faddp %st, %st(2) # CHECK-NEXT: - - - - 0.50 0.50 - 2.00 - - fiadds (%ecx) # CHECK-NEXT: - - - - 0.50 0.50 - 2.00 - - fiaddl (%ecx) # CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - fbld (%ecx) # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 fbstp (%eax) # CHECK-NEXT: - - 1.00 - - - - - - - fchs # CHECK-NEXT: - - 1.00 1.00 - - - 1.00 1.00 - fnclex -# CHECK-NEXT: - - - 1.00 - - - - - - fcmovb %st(1), %st(0) -# CHECK-NEXT: - - - 1.00 - - - - - - fcmovbe %st(1), %st(0) -# CHECK-NEXT: - - - 1.00 - - - - - - fcmove %st(1), %st(0) -# CHECK-NEXT: - - - 1.00 - - - - - - fcmovnb %st(1), %st(0) -# CHECK-NEXT: - - - 1.00 - - - - - - fcmovnbe %st(1), %st(0) -# CHECK-NEXT: - - - 1.00 - - - - - - fcmovne %st(1), %st(0) -# CHECK-NEXT: - - - 1.00 - - - - - - fcmovnu %st(1), %st(0) -# CHECK-NEXT: - - - 1.00 - - - - - - fcmovu %st(1), %st(0) +# CHECK-NEXT: - - - 1.00 - - - - - - fcmovb %st(1), %st +# CHECK-NEXT: - - - 1.00 - - - - - - fcmovbe %st(1), %st +# CHECK-NEXT: - - - 1.00 - - - - - - fcmove %st(1), %st +# CHECK-NEXT: - - - 1.00 - - - - - - fcmovnb %st(1), %st +# CHECK-NEXT: - - - 1.00 - - - - - - fcmovnbe %st(1), %st +# CHECK-NEXT: - - - 1.00 - - - - - - fcmovne %st(1), %st +# CHECK-NEXT: - - - 1.00 - - - - - - fcmovnu %st(1), %st +# CHECK-NEXT: - - - 1.00 - - - - - - fcmovu %st(1), %st # CHECK-NEXT: - - - - - - - 1.00 - - fcom %st(1) # CHECK-NEXT: - - - - - - - 1.00 - - fcom %st(3) # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - fcoms (%ecx) @@ -404,24 +404,24 @@ fyl2xp1 # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - fcomps (%ecx) # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - fcompl (%eax) # CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - fcompp -# CHECK-NEXT: - - 1.00 - - - - - - - fcomi %st(3) -# CHECK-NEXT: - - 1.00 - - - - - - - fcompi %st(3) +# CHECK-NEXT: - - 1.00 - - - - - - - fcomi %st(3), %st +# CHECK-NEXT: - - 1.00 - - - - - - - fcompi %st(3), %st # CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - fcos # CHECK-NEXT: - - 1.00 - - - - 1.00 - - fdecstp -# CHECK-NEXT: - - 1.00 - - - - - - - fdiv %st(0), %st(1) -# CHECK-NEXT: - - 1.00 - - - - - - - fdiv %st(2) +# CHECK-NEXT: - - 1.00 - - - - - - - fdiv %st, %st(1) +# CHECK-NEXT: - - 1.00 - - - - - - - fdiv %st(2), %st # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - fdivs (%ecx) # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - fdivl (%eax) -# CHECK-NEXT: - - 1.00 - - - - - - - fdivp %st(1) -# CHECK-NEXT: - - 1.00 - - - - - - - fdivp %st(2) +# CHECK-NEXT: - - 1.00 - - - - - - - fdivp %st, %st(1) +# CHECK-NEXT: - - 1.00 - - - - - - - fdivp %st, %st(2) # CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - fidivs (%ecx) # CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - fidivl (%eax) -# CHECK-NEXT: - - 1.00 - - - - - - - fdivr %st(0), %st(1) -# CHECK-NEXT: - - 1.00 - - - - - - - fdivr %st(2) +# CHECK-NEXT: - - 1.00 - - - - - - - fdivr %st, %st(1) +# CHECK-NEXT: - - 1.00 - - - - - - - fdivr %st(2), %st # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - fdivrs (%ecx) # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - fdivrl (%eax) -# CHECK-NEXT: - - 1.00 - - - - - - - fdivrp %st(1) -# CHECK-NEXT: - - 1.00 - - - - - - - fdivrp %st(2) +# CHECK-NEXT: - - 1.00 - - - - - - - fdivrp %st, %st(1) +# CHECK-NEXT: - - 1.00 - - - - - - - fdivrp %st, %st(2) # CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - fidivrs (%ecx) # CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - fidivrl (%eax) # CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - ffree %st(0) @@ -455,12 +455,12 @@ fyl2xp1 # CHECK-NEXT: - - 1.00 - - - - 1.00 - - fldln2 # CHECK-NEXT: - - 1.00 - - - - 1.00 - - fldpi # CHECK-NEXT: - - 0.50 - - - - 0.50 - - fldz -# CHECK-NEXT: - - 1.00 - - - - - - - fmul %st(0), %st(1) -# CHECK-NEXT: - - 1.00 - - - - - - - fmul %st(2) +# CHECK-NEXT: - - 1.00 - - - - - - - fmul %st, %st(1) +# CHECK-NEXT: - - 1.00 - - - - - - - fmul %st(2), %st # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - fmuls (%ecx) # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - fmull (%eax) -# CHECK-NEXT: - - 1.00 - - - - - - - fmulp %st(1) -# CHECK-NEXT: - - 1.00 - - - - - - - fmulp %st(2) +# CHECK-NEXT: - - 1.00 - - - - - - - fmulp %st, %st(1) +# CHECK-NEXT: - - 1.00 - - - - - - - fmulp %st, %st(2) # CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - fimuls (%ecx) # CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - fimull (%eax) # CHECK-NEXT: - - 0.50 - - - - 0.50 - - fnop @@ -488,20 +488,20 @@ fyl2xp1 # CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - frstor (%eax) # CHECK-NEXT: - - 0.50 0.50 - - - 0.50 0.50 - wait # CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - fnsave (%eax) -# CHECK-NEXT: - - - - - - - 1.00 - - fsub %st(0), %st(1) -# CHECK-NEXT: - - - - - - - 1.00 - - fsub %st(2) +# CHECK-NEXT: - - - - - - - 1.00 - - fsub %st, %st(1) +# CHECK-NEXT: - - - - - - - 1.00 - - fsub %st(2), %st # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - fsubs (%ecx) # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - fsubl (%eax) -# CHECK-NEXT: - - - - - - - 1.00 - - fsubp %st(1) -# CHECK-NEXT: - - - - - - - 1.00 - - fsubp %st(2) +# CHECK-NEXT: - - - - - - - 1.00 - - fsubp %st, %st(1) +# CHECK-NEXT: - - - - - - - 1.00 - - fsubp %st, %st(2) # CHECK-NEXT: - - - - 0.50 0.50 - 2.00 - - fisubs (%ecx) # CHECK-NEXT: - - - - 0.50 0.50 - 2.00 - - fisubl (%eax) -# CHECK-NEXT: - - - - - - - 1.00 - - fsubr %st(0), %st(1) -# CHECK-NEXT: - - - - - - - 1.00 - - fsubr %st(2) +# CHECK-NEXT: - - - - - - - 1.00 - - fsubr %st, %st(1) +# CHECK-NEXT: - - - - - - - 1.00 - - fsubr %st(2), %st # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - fsubrs (%ecx) # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - fsubrl (%eax) -# CHECK-NEXT: - - - - - - - 1.00 - - fsubrp %st(1) -# CHECK-NEXT: - - - - - - - 1.00 - - fsubrp %st(2) +# CHECK-NEXT: - - - - - - - 1.00 - - fsubrp %st, %st(1) +# CHECK-NEXT: - - - - - - - 1.00 - - fsubrp %st, %st(2) # CHECK-NEXT: - - - - 0.50 0.50 - 2.00 - - fisubrs (%ecx) # CHECK-NEXT: - - - - 0.50 0.50 - 2.00 - - fisubrl (%eax) # CHECK-NEXT: - - 1.00 - - - - - - - ftst @@ -510,8 +510,8 @@ fyl2xp1 # CHECK-NEXT: - - - - - - - 1.00 - - fucomp %st(1) # CHECK-NEXT: - - - - - - - 1.00 - - fucomp %st(3) # CHECK-NEXT: - - 1.00 - - - - - - - fucompp -# CHECK-NEXT: - - 1.00 - - - - - - - fucomi %st(3) -# CHECK-NEXT: - - 1.00 - - - - - - - fucompi %st(3) +# CHECK-NEXT: - - 1.00 - - - - - - - fucomi %st(3), %st +# CHECK-NEXT: - - 1.00 - - - - - - - fucompi %st(3), %st # CHECK-NEXT: - - 0.50 0.50 - - - 0.50 0.50 - wait # CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - fxam # CHECK-NEXT: - - 4.00 2.00 - - - 4.00 5.00 - fxch %st(1) diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-x87.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-x87.s index aecb4a7ab9211b..75cca5297704b2 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-x87.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-x87.s @@ -5,7 +5,7 @@ f2xm1 fabs -fadd %st(0), %st(1) +fadd %st, %st(1) fadd %st(2) fadds (%ecx) faddl (%ecx) @@ -21,14 +21,14 @@ fchs fnclex -fcmovb %st(1), %st(0) -fcmovbe %st(1), %st(0) -fcmove %st(1), %st(0) -fcmovnb %st(1), %st(0) -fcmovnbe %st(1), %st(0) -fcmovne %st(1), %st(0) -fcmovnu %st(1), %st(0) -fcmovu %st(1), %st(0) +fcmovb %st(1), %st +fcmovbe %st(1), %st +fcmove %st(1), %st +fcmovnb %st(1), %st +fcmovnbe %st(1), %st +fcmovne %st(1), %st +fcmovnu %st(1), %st +fcmovu %st(1), %st fcom %st(1) fcom %st(3) @@ -47,7 +47,7 @@ fcos fdecstp -fdiv %st(0), %st(1) +fdiv %st, %st(1) fdiv %st(2) fdivs (%ecx) fdivl (%eax) @@ -56,7 +56,7 @@ fdivp %st(2) fidivs (%ecx) fidivl (%eax) -fdivr %st(0), %st(1) +fdivr %st, %st(1) fdivr %st(2) fdivrs (%ecx) fdivrl (%eax) @@ -106,7 +106,7 @@ fldln2 fldpi fldz -fmul %st(0), %st(1) +fmul %st, %st(1) fmul %st(2) fmuls (%ecx) fmull (%eax) @@ -153,7 +153,7 @@ fnstsw (%eax) frstor (%eax) fsave (%eax) -fsub %st(0), %st(1) +fsub %st, %st(1) fsub %st(2) fsubs (%ecx) fsubl (%eax) @@ -162,7 +162,7 @@ fsubp %st(2) fisubs (%ecx) fisubl (%eax) -fsubr %st(0), %st(1) +fsubr %st, %st(1) fsubr %st(2) fsubrs (%ecx) fsubrl (%eax) @@ -208,26 +208,26 @@ fyl2xp1 # CHECK: [1] [2] [3] [4] [5] [6] Instructions: # CHECK-NEXT: 1 100 0.25 U f2xm1 # CHECK-NEXT: 1 1 1.00 U fabs -# CHECK-NEXT: 1 3 1.00 U fadd %st(0), %st(1) -# CHECK-NEXT: 1 3 1.00 U fadd %st(2) +# CHECK-NEXT: 1 3 1.00 U fadd %st, %st(1) +# CHECK-NEXT: 1 3 1.00 U fadd %st(2), %st # CHECK-NEXT: 2 10 1.00 * U fadds (%ecx) # CHECK-NEXT: 2 10 1.00 * U faddl (%ecx) -# CHECK-NEXT: 1 3 1.00 U faddp %st(1) -# CHECK-NEXT: 1 3 1.00 U faddp %st(2) +# CHECK-NEXT: 1 3 1.00 U faddp %st, %st(1) +# CHECK-NEXT: 1 3 1.00 U faddp %st, %st(2) # CHECK-NEXT: 3 13 2.00 * U fiadds (%ecx) # CHECK-NEXT: 3 13 2.00 * U fiaddl (%ecx) # CHECK-NEXT: 1 100 0.25 U fbld (%ecx) # CHECK-NEXT: 2 1 1.00 U fbstp (%eax) # CHECK-NEXT: 1 1 1.00 U fchs # CHECK-NEXT: 4 4 1.00 U fnclex -# CHECK-NEXT: 1 3 1.00 U fcmovb %st(1), %st(0) -# CHECK-NEXT: 1 3 1.00 U fcmovbe %st(1), %st(0) -# CHECK-NEXT: 1 3 1.00 U fcmove %st(1), %st(0) -# CHECK-NEXT: 1 3 1.00 U fcmovnb %st(1), %st(0) -# CHECK-NEXT: 1 3 1.00 U fcmovnbe %st(1), %st(0) -# CHECK-NEXT: 1 3 1.00 U fcmovne %st(1), %st(0) -# CHECK-NEXT: 1 3 1.00 U fcmovnu %st(1), %st(0) -# CHECK-NEXT: 1 3 1.00 U fcmovu %st(1), %st(0) +# CHECK-NEXT: 1 3 1.00 U fcmovb %st(1), %st +# CHECK-NEXT: 1 3 1.00 U fcmovbe %st(1), %st +# CHECK-NEXT: 1 3 1.00 U fcmove %st(1), %st +# CHECK-NEXT: 1 3 1.00 U fcmovnb %st(1), %st +# CHECK-NEXT: 1 3 1.00 U fcmovnbe %st(1), %st +# CHECK-NEXT: 1 3 1.00 U fcmovne %st(1), %st +# CHECK-NEXT: 1 3 1.00 U fcmovnu %st(1), %st +# CHECK-NEXT: 1 3 1.00 U fcmovu %st(1), %st # CHECK-NEXT: 1 1 1.00 U fcom %st(1) # CHECK-NEXT: 1 1 1.00 U fcom %st(3) # CHECK-NEXT: 2 8 1.00 U fcoms (%ecx) @@ -237,24 +237,24 @@ fyl2xp1 # CHECK-NEXT: 2 8 1.00 U fcomps (%ecx) # CHECK-NEXT: 2 8 1.00 U fcompl (%eax) # CHECK-NEXT: 1 100 0.25 U fcompp -# CHECK-NEXT: 1 2 1.00 U fcomi %st(3) -# CHECK-NEXT: 1 2 1.00 U fcompi %st(3) +# CHECK-NEXT: 1 2 1.00 U fcomi %st(3), %st +# CHECK-NEXT: 1 2 1.00 U fcompi %st(3), %st # CHECK-NEXT: 1 100 0.25 U fcos # CHECK-NEXT: 2 2 1.00 U fdecstp -# CHECK-NEXT: 1 15 1.00 U fdiv %st(0), %st(1) -# CHECK-NEXT: 1 20 1.00 U fdiv %st(2) +# CHECK-NEXT: 1 15 1.00 U fdiv %st, %st(1) +# CHECK-NEXT: 1 20 1.00 U fdiv %st(2), %st # CHECK-NEXT: 2 22 1.00 * U fdivs (%ecx) # CHECK-NEXT: 2 22 1.00 * U fdivl (%eax) -# CHECK-NEXT: 1 15 1.00 U fdivp %st(1) -# CHECK-NEXT: 1 15 1.00 U fdivp %st(2) +# CHECK-NEXT: 1 15 1.00 U fdivp %st, %st(1) +# CHECK-NEXT: 1 15 1.00 U fdivp %st, %st(2) # CHECK-NEXT: 3 25 1.00 * U fidivs (%ecx) # CHECK-NEXT: 3 25 1.00 * U fidivl (%eax) -# CHECK-NEXT: 1 20 1.00 U fdivr %st(0), %st(1) -# CHECK-NEXT: 1 15 1.00 U fdivr %st(2) +# CHECK-NEXT: 1 20 1.00 U fdivr %st, %st(1) +# CHECK-NEXT: 1 15 1.00 U fdivr %st(2), %st # CHECK-NEXT: 2 27 1.00 * U fdivrs (%ecx) # CHECK-NEXT: 2 27 1.00 * U fdivrl (%eax) -# CHECK-NEXT: 1 20 1.00 U fdivrp %st(1) -# CHECK-NEXT: 1 20 1.00 U fdivrp %st(2) +# CHECK-NEXT: 1 20 1.00 U fdivrp %st, %st(1) +# CHECK-NEXT: 1 20 1.00 U fdivrp %st, %st(2) # CHECK-NEXT: 3 30 1.00 * U fidivrs (%ecx) # CHECK-NEXT: 3 30 1.00 * U fidivrl (%eax) # CHECK-NEXT: 1 100 0.25 U ffree %st(0) @@ -288,12 +288,12 @@ fyl2xp1 # CHECK-NEXT: 2 1 1.00 U fldln2 # CHECK-NEXT: 2 1 1.00 U fldpi # CHECK-NEXT: 1 1 0.50 U fldz -# CHECK-NEXT: 1 4 1.00 U fmul %st(0), %st(1) -# CHECK-NEXT: 1 4 1.00 U fmul %st(2) +# CHECK-NEXT: 1 4 1.00 U fmul %st, %st(1) +# CHECK-NEXT: 1 4 1.00 U fmul %st(2), %st # CHECK-NEXT: 2 11 1.00 * U fmuls (%ecx) # CHECK-NEXT: 2 11 1.00 * U fmull (%eax) -# CHECK-NEXT: 1 4 1.00 U fmulp %st(1) -# CHECK-NEXT: 1 4 1.00 U fmulp %st(2) +# CHECK-NEXT: 1 4 1.00 U fmulp %st, %st(1) +# CHECK-NEXT: 1 4 1.00 U fmulp %st, %st(2) # CHECK-NEXT: 3 14 1.00 * U fimuls (%ecx) # CHECK-NEXT: 3 14 1.00 * U fimull (%eax) # CHECK-NEXT: 1 1 0.50 U fnop @@ -321,20 +321,20 @@ fyl2xp1 # CHECK-NEXT: 1 100 0.25 U frstor (%eax) # CHECK-NEXT: 2 2 0.50 U wait # CHECK-NEXT: 1 100 0.25 U fnsave (%eax) -# CHECK-NEXT: 1 3 1.00 U fsub %st(0), %st(1) -# CHECK-NEXT: 1 3 1.00 U fsub %st(2) +# CHECK-NEXT: 1 3 1.00 U fsub %st, %st(1) +# CHECK-NEXT: 1 3 1.00 U fsub %st(2), %st # CHECK-NEXT: 2 10 1.00 * U fsubs (%ecx) # CHECK-NEXT: 2 10 1.00 * U fsubl (%eax) -# CHECK-NEXT: 1 3 1.00 U fsubp %st(1) -# CHECK-NEXT: 1 3 1.00 U fsubp %st(2) +# CHECK-NEXT: 1 3 1.00 U fsubp %st, %st(1) +# CHECK-NEXT: 1 3 1.00 U fsubp %st, %st(2) # CHECK-NEXT: 3 13 2.00 * U fisubs (%ecx) # CHECK-NEXT: 3 13 2.00 * U fisubl (%eax) -# CHECK-NEXT: 1 3 1.00 U fsubr %st(0), %st(1) -# CHECK-NEXT: 1 3 1.00 U fsubr %st(2) +# CHECK-NEXT: 1 3 1.00 U fsubr %st, %st(1) +# CHECK-NEXT: 1 3 1.00 U fsubr %st(2), %st # CHECK-NEXT: 2 10 1.00 * U fsubrs (%ecx) # CHECK-NEXT: 2 10 1.00 * U fsubrl (%eax) -# CHECK-NEXT: 1 3 1.00 U fsubrp %st(1) -# CHECK-NEXT: 1 3 1.00 U fsubrp %st(2) +# CHECK-NEXT: 1 3 1.00 U fsubrp %st, %st(1) +# CHECK-NEXT: 1 3 1.00 U fsubrp %st, %st(2) # CHECK-NEXT: 3 13 2.00 * U fisubrs (%ecx) # CHECK-NEXT: 3 13 2.00 * U fisubrl (%eax) # CHECK-NEXT: 1 2 1.00 U ftst @@ -343,8 +343,8 @@ fyl2xp1 # CHECK-NEXT: 1 1 1.00 U fucomp %st(1) # CHECK-NEXT: 1 1 1.00 U fucomp %st(3) # CHECK-NEXT: 1 2 1.00 U fucompp -# CHECK-NEXT: 1 2 1.00 U fucomi %st(3) -# CHECK-NEXT: 1 2 1.00 U fucompi %st(3) +# CHECK-NEXT: 1 2 1.00 U fucomi %st(3), %st +# CHECK-NEXT: 1 2 1.00 U fucompi %st(3), %st # CHECK-NEXT: 2 2 0.50 U wait # CHECK-NEXT: 1 100 0.25 U fxam # CHECK-NEXT: 15 17 4.00 U fxch %st(1) @@ -375,26 +375,26 @@ fyl2xp1 # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: # CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - f2xm1 # CHECK-NEXT: - - 1.00 - - - - - - - fabs -# CHECK-NEXT: - - - - - - - 1.00 - - fadd %st(0), %st(1) -# CHECK-NEXT: - - - - - - - 1.00 - - fadd %st(2) +# CHECK-NEXT: - - - - - - - 1.00 - - fadd %st, %st(1) +# CHECK-NEXT: - - - - - - - 1.00 - - fadd %st(2), %st # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - fadds (%ecx) # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - faddl (%ecx) -# CHECK-NEXT: - - - - - - - 1.00 - - faddp %st(1) -# CHECK-NEXT: - - - - - - - 1.00 - - faddp %st(2) +# CHECK-NEXT: - - - - - - - 1.00 - - faddp %st, %st(1) +# CHECK-NEXT: - - - - - - - 1.00 - - faddp %st, %st(2) # CHECK-NEXT: - - - - 0.50 0.50 - 2.00 - - fiadds (%ecx) # CHECK-NEXT: - - - - 0.50 0.50 - 2.00 - - fiaddl (%ecx) # CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - fbld (%ecx) # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 fbstp (%eax) # CHECK-NEXT: - - 1.00 - - - - - - - fchs # CHECK-NEXT: - - 1.00 1.00 - - - 1.00 1.00 - fnclex -# CHECK-NEXT: - - - 1.00 - - - - - - fcmovb %st(1), %st(0) -# CHECK-NEXT: - - - 1.00 - - - - - - fcmovbe %st(1), %st(0) -# CHECK-NEXT: - - - 1.00 - - - - - - fcmove %st(1), %st(0) -# CHECK-NEXT: - - - 1.00 - - - - - - fcmovnb %st(1), %st(0) -# CHECK-NEXT: - - - 1.00 - - - - - - fcmovnbe %st(1), %st(0) -# CHECK-NEXT: - - - 1.00 - - - - - - fcmovne %st(1), %st(0) -# CHECK-NEXT: - - - 1.00 - - - - - - fcmovnu %st(1), %st(0) -# CHECK-NEXT: - - - 1.00 - - - - - - fcmovu %st(1), %st(0) +# CHECK-NEXT: - - - 1.00 - - - - - - fcmovb %st(1), %st +# CHECK-NEXT: - - - 1.00 - - - - - - fcmovbe %st(1), %st +# CHECK-NEXT: - - - 1.00 - - - - - - fcmove %st(1), %st +# CHECK-NEXT: - - - 1.00 - - - - - - fcmovnb %st(1), %st +# CHECK-NEXT: - - - 1.00 - - - - - - fcmovnbe %st(1), %st +# CHECK-NEXT: - - - 1.00 - - - - - - fcmovne %st(1), %st +# CHECK-NEXT: - - - 1.00 - - - - - - fcmovnu %st(1), %st +# CHECK-NEXT: - - - 1.00 - - - - - - fcmovu %st(1), %st # CHECK-NEXT: - - - - - - - 1.00 - - fcom %st(1) # CHECK-NEXT: - - - - - - - 1.00 - - fcom %st(3) # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - fcoms (%ecx) @@ -404,24 +404,24 @@ fyl2xp1 # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - fcomps (%ecx) # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - fcompl (%eax) # CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - fcompp -# CHECK-NEXT: - - 1.00 - - - - - - - fcomi %st(3) -# CHECK-NEXT: - - 1.00 - - - - - - - fcompi %st(3) +# CHECK-NEXT: - - 1.00 - - - - - - - fcomi %st(3), %st +# CHECK-NEXT: - - 1.00 - - - - - - - fcompi %st(3), %st # CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - fcos # CHECK-NEXT: - - 1.00 - - - - 1.00 - - fdecstp -# CHECK-NEXT: - - 1.00 - - - - - - - fdiv %st(0), %st(1) -# CHECK-NEXT: - - 1.00 - - - - - - - fdiv %st(2) +# CHECK-NEXT: - - 1.00 - - - - - - - fdiv %st, %st(1) +# CHECK-NEXT: - - 1.00 - - - - - - - fdiv %st(2), %st # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - fdivs (%ecx) # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - fdivl (%eax) -# CHECK-NEXT: - - 1.00 - - - - - - - fdivp %st(1) -# CHECK-NEXT: - - 1.00 - - - - - - - fdivp %st(2) +# CHECK-NEXT: - - 1.00 - - - - - - - fdivp %st, %st(1) +# CHECK-NEXT: - - 1.00 - - - - - - - fdivp %st, %st(2) # CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - fidivs (%ecx) # CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - fidivl (%eax) -# CHECK-NEXT: - - 1.00 - - - - - - - fdivr %st(0), %st(1) -# CHECK-NEXT: - - 1.00 - - - - - - - fdivr %st(2) +# CHECK-NEXT: - - 1.00 - - - - - - - fdivr %st, %st(1) +# CHECK-NEXT: - - 1.00 - - - - - - - fdivr %st(2), %st # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - fdivrs (%ecx) # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - fdivrl (%eax) -# CHECK-NEXT: - - 1.00 - - - - - - - fdivrp %st(1) -# CHECK-NEXT: - - 1.00 - - - - - - - fdivrp %st(2) +# CHECK-NEXT: - - 1.00 - - - - - - - fdivrp %st, %st(1) +# CHECK-NEXT: - - 1.00 - - - - - - - fdivrp %st, %st(2) # CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - fidivrs (%ecx) # CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - fidivrl (%eax) # CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - ffree %st(0) @@ -455,12 +455,12 @@ fyl2xp1 # CHECK-NEXT: - - 1.00 - - - - 1.00 - - fldln2 # CHECK-NEXT: - - 1.00 - - - - 1.00 - - fldpi # CHECK-NEXT: - - 0.50 - - - - 0.50 - - fldz -# CHECK-NEXT: - - 1.00 - - - - - - - fmul %st(0), %st(1) -# CHECK-NEXT: - - 1.00 - - - - - - - fmul %st(2) +# CHECK-NEXT: - - 1.00 - - - - - - - fmul %st, %st(1) +# CHECK-NEXT: - - 1.00 - - - - - - - fmul %st(2), %st # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - fmuls (%ecx) # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - fmull (%eax) -# CHECK-NEXT: - - 1.00 - - - - - - - fmulp %st(1) -# CHECK-NEXT: - - 1.00 - - - - - - - fmulp %st(2) +# CHECK-NEXT: - - 1.00 - - - - - - - fmulp %st, %st(1) +# CHECK-NEXT: - - 1.00 - - - - - - - fmulp %st, %st(2) # CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - fimuls (%ecx) # CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - fimull (%eax) # CHECK-NEXT: - - 0.50 - - - - 0.50 - - fnop @@ -488,20 +488,20 @@ fyl2xp1 # CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - frstor (%eax) # CHECK-NEXT: - - 0.50 0.50 - - - 0.50 0.50 - wait # CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - fnsave (%eax) -# CHECK-NEXT: - - - - - - - 1.00 - - fsub %st(0), %st(1) -# CHECK-NEXT: - - - - - - - 1.00 - - fsub %st(2) +# CHECK-NEXT: - - - - - - - 1.00 - - fsub %st, %st(1) +# CHECK-NEXT: - - - - - - - 1.00 - - fsub %st(2), %st # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - fsubs (%ecx) # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - fsubl (%eax) -# CHECK-NEXT: - - - - - - - 1.00 - - fsubp %st(1) -# CHECK-NEXT: - - - - - - - 1.00 - - fsubp %st(2) +# CHECK-NEXT: - - - - - - - 1.00 - - fsubp %st, %st(1) +# CHECK-NEXT: - - - - - - - 1.00 - - fsubp %st, %st(2) # CHECK-NEXT: - - - - 0.50 0.50 - 2.00 - - fisubs (%ecx) # CHECK-NEXT: - - - - 0.50 0.50 - 2.00 - - fisubl (%eax) -# CHECK-NEXT: - - - - - - - 1.00 - - fsubr %st(0), %st(1) -# CHECK-NEXT: - - - - - - - 1.00 - - fsubr %st(2) +# CHECK-NEXT: - - - - - - - 1.00 - - fsubr %st, %st(1) +# CHECK-NEXT: - - - - - - - 1.00 - - fsubr %st(2), %st # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - fsubrs (%ecx) # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - fsubrl (%eax) -# CHECK-NEXT: - - - - - - - 1.00 - - fsubrp %st(1) -# CHECK-NEXT: - - - - - - - 1.00 - - fsubrp %st(2) +# CHECK-NEXT: - - - - - - - 1.00 - - fsubrp %st, %st(1) +# CHECK-NEXT: - - - - - - - 1.00 - - fsubrp %st, %st(2) # CHECK-NEXT: - - - - 0.50 0.50 - 2.00 - - fisubrs (%ecx) # CHECK-NEXT: - - - - 0.50 0.50 - 2.00 - - fisubrl (%eax) # CHECK-NEXT: - - 1.00 - - - - - - - ftst @@ -510,8 +510,8 @@ fyl2xp1 # CHECK-NEXT: - - - - - - - 1.00 - - fucomp %st(1) # CHECK-NEXT: - - - - - - - 1.00 - - fucomp %st(3) # CHECK-NEXT: - - 1.00 - - - - - - - fucompp -# CHECK-NEXT: - - 1.00 - - - - - - - fucomi %st(3) -# CHECK-NEXT: - - 1.00 - - - - - - - fucompi %st(3) +# CHECK-NEXT: - - 1.00 - - - - - - - fucomi %st(3), %st +# CHECK-NEXT: - - 1.00 - - - - - - - fucompi %st(3), %st # CHECK-NEXT: - - 0.50 0.50 - - - 0.50 0.50 - wait # CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - fxam # CHECK-NEXT: - - 4.00 2.00 - - - 4.00 5.00 - fxch %st(1) diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-x87.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-x87.s index 2f5f6ef08f1c72..030b71fb7b63f7 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-x87.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-x87.s @@ -5,7 +5,7 @@ f2xm1 fabs -fadd %st(0), %st(1) +fadd %st, %st(1) fadd %st(2) fadds (%ecx) faddl (%ecx) @@ -21,14 +21,14 @@ fchs fnclex -fcmovb %st(1), %st(0) -fcmovbe %st(1), %st(0) -fcmove %st(1), %st(0) -fcmovnb %st(1), %st(0) -fcmovnbe %st(1), %st(0) -fcmovne %st(1), %st(0) -fcmovnu %st(1), %st(0) -fcmovu %st(1), %st(0) +fcmovb %st(1), %st +fcmovbe %st(1), %st +fcmove %st(1), %st +fcmovnb %st(1), %st +fcmovnbe %st(1), %st +fcmovne %st(1), %st +fcmovnu %st(1), %st +fcmovu %st(1), %st fcom %st(1) fcom %st(3) @@ -47,7 +47,7 @@ fcos fdecstp -fdiv %st(0), %st(1) +fdiv %st, %st(1) fdiv %st(2) fdivs (%ecx) fdivl (%eax) @@ -56,7 +56,7 @@ fdivp %st(2) fidivs (%ecx) fidivl (%eax) -fdivr %st(0), %st(1) +fdivr %st, %st(1) fdivr %st(2) fdivrs (%ecx) fdivrl (%eax) @@ -106,7 +106,7 @@ fldln2 fldpi fldz -fmul %st(0), %st(1) +fmul %st, %st(1) fmul %st(2) fmuls (%ecx) fmull (%eax) @@ -153,7 +153,7 @@ fnstsw (%eax) frstor (%eax) fsave (%eax) -fsub %st(0), %st(1) +fsub %st, %st(1) fsub %st(2) fsubs (%ecx) fsubl (%eax) @@ -162,7 +162,7 @@ fsubp %st(2) fisubs (%ecx) fisubl (%eax) -fsubr %st(0), %st(1) +fsubr %st, %st(1) fsubr %st(2) fsubrs (%ecx) fsubrl (%eax) @@ -208,26 +208,26 @@ fyl2xp1 # CHECK: [1] [2] [3] [4] [5] [6] Instructions: # CHECK-NEXT: 1 100 0.25 U f2xm1 # CHECK-NEXT: 1 2 1.00 U fabs -# CHECK-NEXT: 1 3 1.00 U fadd %st(0), %st(1) -# CHECK-NEXT: 1 3 1.00 U fadd %st(2) +# CHECK-NEXT: 1 3 1.00 U fadd %st, %st(1) +# CHECK-NEXT: 1 3 1.00 U fadd %st(2), %st # CHECK-NEXT: 1 10 1.00 * U fadds (%ecx) # CHECK-NEXT: 1 10 1.00 * U faddl (%ecx) -# CHECK-NEXT: 1 3 1.00 U faddp %st(1) -# CHECK-NEXT: 1 3 1.00 U faddp %st(2) +# CHECK-NEXT: 1 3 1.00 U faddp %st, %st(1) +# CHECK-NEXT: 1 3 1.00 U faddp %st, %st(2) # CHECK-NEXT: 1 10 1.00 * U fiadds (%ecx) # CHECK-NEXT: 1 10 1.00 * U fiaddl (%ecx) # CHECK-NEXT: 1 100 0.25 U fbld (%ecx) # CHECK-NEXT: 1 100 0.25 U fbstp (%eax) # CHECK-NEXT: 1 1 1.00 U fchs # CHECK-NEXT: 1 100 0.25 U fnclex -# CHECK-NEXT: 1 100 0.25 U fcmovb %st(1), %st(0) -# CHECK-NEXT: 1 100 0.25 U fcmovbe %st(1), %st(0) -# CHECK-NEXT: 1 100 0.25 U fcmove %st(1), %st(0) -# CHECK-NEXT: 1 100 0.25 U fcmovnb %st(1), %st(0) -# CHECK-NEXT: 1 100 0.25 U fcmovnbe %st(1), %st(0) -# CHECK-NEXT: 1 100 0.25 U fcmovne %st(1), %st(0) -# CHECK-NEXT: 1 100 0.25 U fcmovnu %st(1), %st(0) -# CHECK-NEXT: 1 100 0.25 U fcmovu %st(1), %st(0) +# CHECK-NEXT: 1 100 0.25 U fcmovb %st(1), %st +# CHECK-NEXT: 1 100 0.25 U fcmovbe %st(1), %st +# CHECK-NEXT: 1 100 0.25 U fcmove %st(1), %st +# CHECK-NEXT: 1 100 0.25 U fcmovnb %st(1), %st +# CHECK-NEXT: 1 100 0.25 U fcmovnbe %st(1), %st +# CHECK-NEXT: 1 100 0.25 U fcmovne %st(1), %st +# CHECK-NEXT: 1 100 0.25 U fcmovnu %st(1), %st +# CHECK-NEXT: 1 100 0.25 U fcmovu %st(1), %st # CHECK-NEXT: 1 1 1.00 U fcom %st(1) # CHECK-NEXT: 1 1 1.00 U fcom %st(3) # CHECK-NEXT: 1 8 1.00 U fcoms (%ecx) @@ -237,24 +237,24 @@ fyl2xp1 # CHECK-NEXT: 1 8 1.00 U fcomps (%ecx) # CHECK-NEXT: 1 8 1.00 U fcompl (%eax) # CHECK-NEXT: 1 1 1.00 U fcompp -# CHECK-NEXT: 1 9 0.50 U fcomi %st(3) -# CHECK-NEXT: 1 9 0.50 U fcompi %st(3) +# CHECK-NEXT: 1 9 0.50 U fcomi %st(3), %st +# CHECK-NEXT: 1 9 0.50 U fcompi %st(3), %st # CHECK-NEXT: 1 100 0.25 U fcos # CHECK-NEXT: 1 11 1.00 U fdecstp -# CHECK-NEXT: 1 15 1.00 U fdiv %st(0), %st(1) -# CHECK-NEXT: 1 15 1.00 U fdiv %st(2) +# CHECK-NEXT: 1 15 1.00 U fdiv %st, %st(1) +# CHECK-NEXT: 1 15 1.00 U fdiv %st(2), %st # CHECK-NEXT: 1 22 1.00 * U fdivs (%ecx) # CHECK-NEXT: 1 22 1.00 * U fdivl (%eax) -# CHECK-NEXT: 1 15 1.00 U fdivp %st(1) -# CHECK-NEXT: 1 15 1.00 U fdivp %st(2) +# CHECK-NEXT: 1 15 1.00 U fdivp %st, %st(1) +# CHECK-NEXT: 1 15 1.00 U fdivp %st, %st(2) # CHECK-NEXT: 1 22 1.00 * U fidivs (%ecx) # CHECK-NEXT: 1 22 1.00 * U fidivl (%eax) -# CHECK-NEXT: 1 15 1.00 U fdivr %st(0), %st(1) -# CHECK-NEXT: 1 15 1.00 U fdivr %st(2) +# CHECK-NEXT: 1 15 1.00 U fdivr %st, %st(1) +# CHECK-NEXT: 1 15 1.00 U fdivr %st(2), %st # CHECK-NEXT: 1 22 1.00 * U fdivrs (%ecx) # CHECK-NEXT: 1 22 1.00 * U fdivrl (%eax) -# CHECK-NEXT: 1 15 1.00 U fdivrp %st(1) -# CHECK-NEXT: 1 15 1.00 U fdivrp %st(2) +# CHECK-NEXT: 1 15 1.00 U fdivrp %st, %st(1) +# CHECK-NEXT: 1 15 1.00 U fdivrp %st, %st(2) # CHECK-NEXT: 1 22 1.00 * U fidivrs (%ecx) # CHECK-NEXT: 1 22 1.00 * U fidivrl (%eax) # CHECK-NEXT: 1 11 1.00 U ffree %st(0) @@ -288,12 +288,12 @@ fyl2xp1 # CHECK-NEXT: 1 11 1.00 U fldln2 # CHECK-NEXT: 1 11 1.00 U fldpi # CHECK-NEXT: 1 8 0.50 U fldz -# CHECK-NEXT: 1 3 0.50 U fmul %st(0), %st(1) -# CHECK-NEXT: 1 3 0.50 U fmul %st(2) +# CHECK-NEXT: 1 3 0.50 U fmul %st, %st(1) +# CHECK-NEXT: 1 3 0.50 U fmul %st(2), %st # CHECK-NEXT: 2 10 0.50 * U fmuls (%ecx) # CHECK-NEXT: 2 10 0.50 * U fmull (%eax) -# CHECK-NEXT: 1 3 0.50 U fmulp %st(1) -# CHECK-NEXT: 1 3 0.50 U fmulp %st(2) +# CHECK-NEXT: 1 3 0.50 U fmulp %st, %st(1) +# CHECK-NEXT: 1 3 0.50 U fmulp %st, %st(2) # CHECK-NEXT: 2 10 0.50 * U fimuls (%ecx) # CHECK-NEXT: 2 10 0.50 * U fimull (%eax) # CHECK-NEXT: 1 1 1.00 U fnop @@ -321,20 +321,20 @@ fyl2xp1 # CHECK-NEXT: 1 100 0.25 U frstor (%eax) # CHECK-NEXT: 1 1 1.00 U wait # CHECK-NEXT: 1 100 0.25 U fnsave (%eax) -# CHECK-NEXT: 1 3 1.00 U fsub %st(0), %st(1) -# CHECK-NEXT: 1 3 1.00 U fsub %st(2) +# CHECK-NEXT: 1 3 1.00 U fsub %st, %st(1) +# CHECK-NEXT: 1 3 1.00 U fsub %st(2), %st # CHECK-NEXT: 1 10 1.00 * U fsubs (%ecx) # CHECK-NEXT: 1 10 1.00 * U fsubl (%eax) -# CHECK-NEXT: 1 3 1.00 U fsubp %st(1) -# CHECK-NEXT: 1 3 1.00 U fsubp %st(2) +# CHECK-NEXT: 1 3 1.00 U fsubp %st, %st(1) +# CHECK-NEXT: 1 3 1.00 U fsubp %st, %st(2) # CHECK-NEXT: 1 10 1.00 * U fisubs (%ecx) # CHECK-NEXT: 1 10 1.00 * U fisubl (%eax) -# CHECK-NEXT: 1 3 1.00 U fsubr %st(0), %st(1) -# CHECK-NEXT: 1 3 1.00 U fsubr %st(2) +# CHECK-NEXT: 1 3 1.00 U fsubr %st, %st(1) +# CHECK-NEXT: 1 3 1.00 U fsubr %st(2), %st # CHECK-NEXT: 1 10 1.00 * U fsubrs (%ecx) # CHECK-NEXT: 1 10 1.00 * U fsubrl (%eax) -# CHECK-NEXT: 1 3 1.00 U fsubrp %st(1) -# CHECK-NEXT: 1 3 1.00 U fsubrp %st(2) +# CHECK-NEXT: 1 3 1.00 U fsubrp %st, %st(1) +# CHECK-NEXT: 1 3 1.00 U fsubrp %st, %st(2) # CHECK-NEXT: 1 10 1.00 * U fisubrs (%ecx) # CHECK-NEXT: 1 10 1.00 * U fisubrl (%eax) # CHECK-NEXT: 1 1 1.00 U ftst @@ -343,8 +343,8 @@ fyl2xp1 # CHECK-NEXT: 1 1 1.00 U fucomp %st(1) # CHECK-NEXT: 1 1 1.00 U fucomp %st(3) # CHECK-NEXT: 1 1 1.00 U fucompp -# CHECK-NEXT: 1 9 0.50 U fucomi %st(3) -# CHECK-NEXT: 1 9 0.50 U fucompi %st(3) +# CHECK-NEXT: 1 9 0.50 U fucomi %st(3), %st +# CHECK-NEXT: 1 9 0.50 U fucompi %st(3), %st # CHECK-NEXT: 1 1 1.00 U wait # CHECK-NEXT: 1 1 1.00 U fxam # CHECK-NEXT: 1 1 0.25 U fxch %st(1) @@ -377,26 +377,26 @@ fyl2xp1 # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions: # CHECK-NEXT: - - - - - - - - - - - - f2xm1 # CHECK-NEXT: - - - - - - - - - - 1.00 - fabs -# CHECK-NEXT: - - - - - - - 1.00 - - - - fadd %st(0), %st(1) -# CHECK-NEXT: - - - - - - - 1.00 - - - - fadd %st(2) +# CHECK-NEXT: - - - - - - - 1.00 - - - - fadd %st, %st(1) +# CHECK-NEXT: - - - - - - - 1.00 - - - - fadd %st(2), %st # CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - fadds (%ecx) # CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - faddl (%ecx) -# CHECK-NEXT: - - - - - - - 1.00 - - - - faddp %st(1) -# CHECK-NEXT: - - - - - - - 1.00 - - - - faddp %st(2) +# CHECK-NEXT: - - - - - - - 1.00 - - - - faddp %st, %st(1) +# CHECK-NEXT: - - - - - - - 1.00 - - - - faddp %st, %st(2) # CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - fiadds (%ecx) # CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - fiaddl (%ecx) # CHECK-NEXT: - - - - - - - - - - - - fbld (%ecx) # CHECK-NEXT: - - - - - - - - - - - - fbstp (%eax) # CHECK-NEXT: - - - - - - - - - - 1.00 - fchs # CHECK-NEXT: - - - - - - - - - - - - fnclex -# CHECK-NEXT: - - - - - - - - - - - - fcmovb %st(1), %st(0) -# CHECK-NEXT: - - - - - - - - - - - - fcmovbe %st(1), %st(0) -# CHECK-NEXT: - - - - - - - - - - - - fcmove %st(1), %st(0) -# CHECK-NEXT: - - - - - - - - - - - - fcmovnb %st(1), %st(0) -# CHECK-NEXT: - - - - - - - - - - - - fcmovnbe %st(1), %st(0) -# CHECK-NEXT: - - - - - - - - - - - - fcmovne %st(1), %st(0) -# CHECK-NEXT: - - - - - - - - - - - - fcmovnu %st(1), %st(0) -# CHECK-NEXT: - - - - - - - - - - - - fcmovu %st(1), %st(0) +# CHECK-NEXT: - - - - - - - - - - - - fcmovb %st(1), %st +# CHECK-NEXT: - - - - - - - - - - - - fcmovbe %st(1), %st +# CHECK-NEXT: - - - - - - - - - - - - fcmove %st(1), %st +# CHECK-NEXT: - - - - - - - - - - - - fcmovnb %st(1), %st +# CHECK-NEXT: - - - - - - - - - - - - fcmovnbe %st(1), %st +# CHECK-NEXT: - - - - - - - - - - - - fcmovne %st(1), %st +# CHECK-NEXT: - - - - - - - - - - - - fcmovnu %st(1), %st +# CHECK-NEXT: - - - - - - - - - - - - fcmovu %st(1), %st # CHECK-NEXT: - - - - - - - 1.00 - - - - fcom %st(1) # CHECK-NEXT: - - - - - - - 1.00 - - - - fcom %st(3) # CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - fcoms (%ecx) @@ -406,24 +406,24 @@ fyl2xp1 # CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - fcomps (%ecx) # CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - fcompl (%eax) # CHECK-NEXT: - - - - - - - 1.00 - - - - fcompp -# CHECK-NEXT: 0.50 0.50 - - - - - 0.50 - 0.50 - - fcomi %st(3) -# CHECK-NEXT: 0.50 0.50 - - - - - 0.50 - 0.50 - - fcompi %st(3) +# CHECK-NEXT: 0.50 0.50 - - - - - 0.50 - 0.50 - - fcomi %st(3), %st +# CHECK-NEXT: 0.50 0.50 - - - - - 0.50 - 0.50 - - fcompi %st(3), %st # CHECK-NEXT: - - - - - - - - - - - - fcos # CHECK-NEXT: 0.50 0.50 - - - - - - - - 1.00 - fdecstp -# CHECK-NEXT: - - - - - - - - - - 1.00 - fdiv %st(0), %st(1) -# CHECK-NEXT: - - - - - - - - - - 1.00 - fdiv %st(2) +# CHECK-NEXT: - - - - - - - - - - 1.00 - fdiv %st, %st(1) +# CHECK-NEXT: - - - - - - - - - - 1.00 - fdiv %st(2), %st # CHECK-NEXT: 0.50 0.50 - - - - - - - - 1.00 - fdivs (%ecx) # CHECK-NEXT: 0.50 0.50 - - - - - - - - 1.00 - fdivl (%eax) -# CHECK-NEXT: - - - - - - - - - - 1.00 - fdivp %st(1) -# CHECK-NEXT: - - - - - - - - - - 1.00 - fdivp %st(2) +# CHECK-NEXT: - - - - - - - - - - 1.00 - fdivp %st, %st(1) +# CHECK-NEXT: - - - - - - - - - - 1.00 - fdivp %st, %st(2) # CHECK-NEXT: 0.50 0.50 - - - - - - - - 1.00 - fidivs (%ecx) # CHECK-NEXT: 0.50 0.50 - - - - - - - - 1.00 - fidivl (%eax) -# CHECK-NEXT: - - - - - - - - - - 1.00 - fdivr %st(0), %st(1) -# CHECK-NEXT: - - - - - - - - - - 1.00 - fdivr %st(2) +# CHECK-NEXT: - - - - - - - - - - 1.00 - fdivr %st, %st(1) +# CHECK-NEXT: - - - - - - - - - - 1.00 - fdivr %st(2), %st # CHECK-NEXT: 0.50 0.50 - - - - - - - - 1.00 - fdivrs (%ecx) # CHECK-NEXT: 0.50 0.50 - - - - - - - - 1.00 - fdivrl (%eax) -# CHECK-NEXT: - - - - - - - - - - 1.00 - fdivrp %st(1) -# CHECK-NEXT: - - - - - - - - - - 1.00 - fdivrp %st(2) +# CHECK-NEXT: - - - - - - - - - - 1.00 - fdivrp %st, %st(1) +# CHECK-NEXT: - - - - - - - - - - 1.00 - fdivrp %st, %st(2) # CHECK-NEXT: 0.50 0.50 - - - - - - - - 1.00 - fidivrs (%ecx) # CHECK-NEXT: 0.50 0.50 - - - - - - - - 1.00 - fidivrl (%eax) # CHECK-NEXT: 0.50 0.50 - - - - - - - - 1.00 - ffree %st(0) @@ -457,12 +457,12 @@ fyl2xp1 # CHECK-NEXT: 0.50 0.50 - - - - - - - - 1.00 - fldln2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - 1.00 - fldpi # CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 - 0.50 - fldz -# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - fmul %st(0), %st(1) -# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - fmul %st(2) +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - fmul %st, %st(1) +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - fmul %st(2), %st # CHECK-NEXT: 0.50 0.50 - - - - - 0.50 0.50 - - - fmuls (%ecx) # CHECK-NEXT: 0.50 0.50 - - - - - 0.50 0.50 - - - fmull (%eax) -# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - fmulp %st(1) -# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - fmulp %st(2) +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - fmulp %st, %st(1) +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - fmulp %st, %st(2) # CHECK-NEXT: 0.50 0.50 - - - - - 0.50 0.50 - - - fimuls (%ecx) # CHECK-NEXT: 0.50 0.50 - - - - - 0.50 0.50 - - - fimull (%eax) # CHECK-NEXT: - - - - - - - 1.00 - - - - fnop @@ -490,20 +490,20 @@ fyl2xp1 # CHECK-NEXT: - - - - - - - - - - - - frstor (%eax) # CHECK-NEXT: - - - - - - - 1.00 - - - - wait # CHECK-NEXT: - - - - - - - - - - - - fnsave (%eax) -# CHECK-NEXT: - - - - - - - 1.00 - - - - fsub %st(0), %st(1) -# CHECK-NEXT: - - - - - - - 1.00 - - - - fsub %st(2) +# CHECK-NEXT: - - - - - - - 1.00 - - - - fsub %st, %st(1) +# CHECK-NEXT: - - - - - - - 1.00 - - - - fsub %st(2), %st # CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - fsubs (%ecx) # CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - fsubl (%eax) -# CHECK-NEXT: - - - - - - - 1.00 - - - - fsubp %st(1) -# CHECK-NEXT: - - - - - - - 1.00 - - - - fsubp %st(2) +# CHECK-NEXT: - - - - - - - 1.00 - - - - fsubp %st, %st(1) +# CHECK-NEXT: - - - - - - - 1.00 - - - - fsubp %st, %st(2) # CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - fisubs (%ecx) # CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - fisubl (%eax) -# CHECK-NEXT: - - - - - - - 1.00 - - - - fsubr %st(0), %st(1) -# CHECK-NEXT: - - - - - - - 1.00 - - - - fsubr %st(2) +# CHECK-NEXT: - - - - - - - 1.00 - - - - fsubr %st, %st(1) +# CHECK-NEXT: - - - - - - - 1.00 - - - - fsubr %st(2), %st # CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - fsubrs (%ecx) # CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - fsubrl (%eax) -# CHECK-NEXT: - - - - - - - 1.00 - - - - fsubrp %st(1) -# CHECK-NEXT: - - - - - - - 1.00 - - - - fsubrp %st(2) +# CHECK-NEXT: - - - - - - - 1.00 - - - - fsubrp %st, %st(1) +# CHECK-NEXT: - - - - - - - 1.00 - - - - fsubrp %st, %st(2) # CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - fisubrs (%ecx) # CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - fisubrl (%eax) # CHECK-NEXT: - - - - - - - 1.00 - - - - ftst @@ -512,8 +512,8 @@ fyl2xp1 # CHECK-NEXT: - - - - - - - 1.00 - - - - fucomp %st(1) # CHECK-NEXT: - - - - - - - 1.00 - - - - fucomp %st(3) # CHECK-NEXT: - - - - - - - 1.00 - - - - fucompp -# CHECK-NEXT: 0.50 0.50 - - - - - 0.50 - 0.50 - - fucomi %st(3) -# CHECK-NEXT: 0.50 0.50 - - - - - 0.50 - 0.50 - - fucompi %st(3) +# CHECK-NEXT: 0.50 0.50 - - - - - 0.50 - 0.50 - - fucomi %st(3), %st +# CHECK-NEXT: 0.50 0.50 - - - - - 0.50 - 0.50 - - fucompi %st(3), %st # CHECK-NEXT: - - - - - - - 1.00 - - - - wait # CHECK-NEXT: - - - - - - - - - - 1.00 - fxam # CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - fxch %st(1) diff --git a/llvm/utils/TableGen/X86RecognizableInstr.cpp b/llvm/utils/TableGen/X86RecognizableInstr.cpp index 2f9b428b8cfe8b..463609edb73b33 100644 --- a/llvm/utils/TableGen/X86RecognizableInstr.cpp +++ b/llvm/utils/TableGen/X86RecognizableInstr.cpp @@ -842,6 +842,7 @@ OperandType RecognizableInstr::typeFromString(const std::string &s, TYPE("f32mem", TYPE_M) TYPE("ssmem", TYPE_M) TYPE("RST", TYPE_ST) + TYPE("RSTi", TYPE_ST) TYPE("i128mem", TYPE_M) TYPE("i256mem", TYPE_M) TYPE("i512mem", TYPE_M) @@ -964,6 +965,7 @@ OperandEncoding RecognizableInstr::rmRegisterEncodingFromString(const std::string &s, uint8_t OpSize) { ENCODING("RST", ENCODING_FP) + ENCODING("RSTi", ENCODING_FP) ENCODING("GR16", ENCODING_RM) ENCODING("GR32", ENCODING_RM) ENCODING("GR32orGR64", ENCODING_RM) From e5523662b5b2583e592e8e74d0b435e0240adde1 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Tue, 12 Feb 2019 10:14:10 +0000 Subject: [PATCH 114/125] Merging r353142: ------------------------------------------------------------------------ r353142 | ctopper | 2019-02-05 07:13:14 +0100 (Tue, 05 Feb 2019) | 13 lines [X86] Change MS inline asm clobber list filter to check for 'fpsr' instead of 'fpsw' after D57641. Summary: The backend used to print the x87 FPSW register as 'fpsw', but gcc inline asm uses 'fpsr'. After D57641, the backend now uses 'fpsr' to match. Reviewers: rnk Reviewed By: rnk Subscribers: eraman, cfe-commits, llvm-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D57642 ------------------------------------------------------------------------ llvm-svn: 353819 --- clang/lib/Parse/ParseStmtAsm.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/Parse/ParseStmtAsm.cpp b/clang/lib/Parse/ParseStmtAsm.cpp index 9b96c5150e5695..3f5af7d44f36ce 100644 --- a/clang/lib/Parse/ParseStmtAsm.cpp +++ b/clang/lib/Parse/ParseStmtAsm.cpp @@ -637,7 +637,7 @@ StmtResult Parser::ParseMicrosoftAsmStatement(SourceLocation AsmLoc) { // Filter out "fpsw" and "mxcsr". They aren't valid GCC asm clobber // constraints. Clang always adds fpsr to the clobber list anyway. llvm::erase_if(Clobbers, [](const std::string &C) { - return C == "fpsw" || C == "mxcsr"; + return C == "fpsr" || C == "mxcsr"; }); // Build the vector of clobber StringRefs. From 2ccd8c108d13c0dce937be23d5ed563317f2566a Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Tue, 12 Feb 2019 10:45:41 +0000 Subject: [PATCH 115/125] Merging r353308 and r353383: ------------------------------------------------------------------------ r353308 | tnorthover | 2019-02-06 16:26:35 +0100 (Wed, 06 Feb 2019) | 5 lines AArch64: enforce even/odd register pairs for CASP instructions. ARMv8.1a CASP instructions need the first of the pair to be an even register (otherwise the encoding is unallocated). We enforced this during assembly, but not CodeGen before. ------------------------------------------------------------------------ ------------------------------------------------------------------------ r353383 | tnorthover | 2019-02-07 11:35:34 +0100 (Thu, 07 Feb 2019) | 4 lines AArch64: implement copy for paired GPR registers. When doing 128-bit atomics using CASP we might need to copy a GPRPair to a different register, but that was unimplemented up to now. ------------------------------------------------------------------------ llvm-svn: 353822 --- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 41 +++++++++++++++++++ llvm/lib/Target/AArch64/AArch64InstrInfo.h | 4 ++ .../lib/Target/AArch64/AArch64RegisterInfo.td | 10 +++-- .../Disassembler/AArch64Disassembler.cpp | 4 +- .../CodeGen/AArch64/cmpxchg-lse-even-regs.ll | 17 ++++++++ llvm/test/CodeGen/AArch64/seqpaircopy.mir | 23 +++++++++++ 6 files changed, 93 insertions(+), 6 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/cmpxchg-lse-even-regs.ll create mode 100644 llvm/test/CodeGen/AArch64/seqpaircopy.mir diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index ada06788857293..50316ebe218b70 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -2292,6 +2292,31 @@ void AArch64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB, } } +void AArch64InstrInfo::copyGPRRegTuple(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + DebugLoc DL, unsigned DestReg, + unsigned SrcReg, bool KillSrc, + unsigned Opcode, unsigned ZeroReg, + llvm::ArrayRef Indices) const { + const TargetRegisterInfo *TRI = &getRegisterInfo(); + unsigned NumRegs = Indices.size(); + +#ifndef NDEBUG + uint16_t DestEncoding = TRI->getEncodingValue(DestReg); + uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg); + assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 && + "GPR reg sequences should not be able to overlap"); +#endif + + for (unsigned SubReg = 0; SubReg != NumRegs; ++SubReg) { + const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode)); + AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI); + MIB.addReg(ZeroReg); + AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI); + MIB.addImm(0); + } +} + void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, unsigned DestReg, @@ -2431,6 +2456,22 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } + if (AArch64::XSeqPairsClassRegClass.contains(DestReg) && + AArch64::XSeqPairsClassRegClass.contains(SrcReg)) { + static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64}; + copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRXrs, + AArch64::XZR, Indices); + return; + } + + if (AArch64::WSeqPairsClassRegClass.contains(DestReg) && + AArch64::WSeqPairsClassRegClass.contains(SrcReg)) { + static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32}; + copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRWrs, + AArch64::WZR, Indices); + return; + } + if (AArch64::FPR128RegClass.contains(DestReg) && AArch64::FPR128RegClass.contains(SrcReg)) { if (Subtarget.hasNEON()) { diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h index 9954669d567508..e48c26d4a84a03 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -122,6 +122,10 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo { const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, bool KillSrc, unsigned Opcode, llvm::ArrayRef Indices) const; + void copyGPRRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + DebugLoc DL, unsigned DestReg, unsigned SrcReg, + bool KillSrc, unsigned Opcode, unsigned ZeroReg, + llvm::ArrayRef Indices) const; void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const override; diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td index d3710cea0687ec..8e6aa69eae854f 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td @@ -649,10 +649,12 @@ def FPR128Op : RegisterOperand { // ARMv8.1a atomic CASP register operands -def WSeqPairs : RegisterTuples<[sube32, subo32], - [(rotl GPR32, 0), (rotl GPR32, 1)]>; -def XSeqPairs : RegisterTuples<[sube64, subo64], - [(rotl GPR64, 0), (rotl GPR64, 1)]>; +def WSeqPairs : RegisterTuples<[sube32, subo32], + [(decimate (rotl GPR32, 0), 2), + (decimate (rotl GPR32, 1), 2)]>; +def XSeqPairs : RegisterTuples<[sube64, subo64], + [(decimate (rotl GPR64, 0), 2), + (decimate (rotl GPR64, 1), 2)]>; def WSeqPairsClass : RegisterClass<"AArch64", [untyped], 32, (add WSeqPairs)>{ diff --git a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp index 4102f1eb5cc12c..64afabd450c1fa 100644 --- a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp +++ b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp @@ -1779,8 +1779,8 @@ static DecodeStatus DecodeGPRSeqPairsClassRegisterClass(MCInst &Inst, if (RegNo & 0x1) return Fail; - unsigned Register = AArch64MCRegisterClasses[RegClassID].getRegister(RegNo); - Inst.addOperand(MCOperand::createReg(Register)); + unsigned Reg = AArch64MCRegisterClasses[RegClassID].getRegister(RegNo / 2); + Inst.addOperand(MCOperand::createReg(Reg)); return Success; } diff --git a/llvm/test/CodeGen/AArch64/cmpxchg-lse-even-regs.ll b/llvm/test/CodeGen/AArch64/cmpxchg-lse-even-regs.ll new file mode 100644 index 00000000000000..9c6d8cc2053312 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/cmpxchg-lse-even-regs.ll @@ -0,0 +1,17 @@ +; RUN: llc -mtriple arm64-apple-ios -mattr=+lse %s -o - | FileCheck %s + +; Only "even,even+1" pairs are valid for CASP instructions. Make sure LLVM +; doesn't allocate odd ones and that it can copy them around properly. N.b. we +; don't actually check that they're sequential because FileCheck can't; odd/even +; will have to be good enough. +define void @test_atomic_cmpxchg_i128_register_shuffling(i128* %addr, i128 %desired, i128 %new) nounwind { +; CHECK-LABEL: test_atomic_cmpxchg_i128_register_shuffling: +; CHECK-DAG: mov [[DESIRED_LO:x[0-9]*[02468]]], x1 +; CHECK-DAG: mov [[DESIRED_HI:x[0-9]*[13579]]], x2 +; CHECK-DAG: mov [[NEW_LO:x[0-9]*[02468]]], x3 +; CHECK-DAG: mov [[NEW_HI:x[0-9]*[13579]]], x4 +; CHECK: caspal [[DESIRED_LO]], [[DESIRED_HI]], [[NEW_LO]], [[NEW_HI]], [x0] + + %res = cmpxchg i128* %addr, i128 %desired, i128 %new seq_cst seq_cst + ret void +} diff --git a/llvm/test/CodeGen/AArch64/seqpaircopy.mir b/llvm/test/CodeGen/AArch64/seqpaircopy.mir new file mode 100644 index 00000000000000..89511cbf726bd0 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/seqpaircopy.mir @@ -0,0 +1,23 @@ +# RUN: llc -o - %s -mtriple=aarch64-- -mattr=+v8.1a -run-pass=postrapseudos | FileCheck %s +--- +# CHECK-LABEL: name: copy_xseqpairs +name: copy_xseqpairs +body: | + bb.0: + ; CHECK: $x4_x5 = CASPALX $x4_x5, $x2_x3, $x0 + ; CHECK: $x0 = ORRXrs $xzr, $x4, 0 + ; CHECK: $x1 = ORRXrs $xzr, $x5, 0 + $x4_x5 = CASPALX $x4_x5, $x2_x3, $x0 + $x0_x1 = COPY $x4_x5 +... +--- +# CHECK-LABEL: name: copy_wseqpairs +name: copy_wseqpairs +body: | + bb.0: + ; CHECK: $w4_w5 = CASPALW $w4_w5, $w2_w3, $x0 + ; CHECK: $w0 = ORRWrs $wzr, $w4, 0 + ; CHECK: $w1 = ORRWrs $wzr, $w5, 0 + $w4_w5 = CASPALW $w4_w5, $w2_w3, $x0 + $w0_w1 = COPY $w4_w5 +... From df368fd7b9b92cf6e4034b6cd4a5313df1c72219 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Tue, 12 Feb 2019 10:56:21 +0000 Subject: [PATCH 116/125] Merging r353411: ------------------------------------------------------------------------ r353411 | erichkeane | 2019-02-07 16:14:11 +0100 (Thu, 07 Feb 2019) | 7 lines Fix r350643 to limit COFF emission to <= 32 BYTES instead of BITS. The patch in r350643 incorrectly sets the COFF emission based on bits instead of bytes. This patch converts the 32 via CharUnits to bits to compare the correct values. Change-Id: Icf38a16470ad5ae3531374969c033557ddb0d323 ------------------------------------------------------------------------ llvm-svn: 353825 --- clang/lib/CodeGen/CodeGenModule.cpp | 8 +++++--- clang/test/CodeGen/microsoft-no-common-align.c | 3 +++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 244738042cef09..2ac59fb4de25f0 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -3762,13 +3762,15 @@ static bool isVarDeclStrongDefinition(const ASTContext &Context, } } - // Microsoft's link.exe doesn't support alignments greater than 32 for common - // symbols, so symbols with greater alignment requirements cannot be common. + // Microsoft's link.exe doesn't support alignments greater than 32 bytes for + // common symbols, so symbols with greater alignment requirements cannot be + // common. // Other COFF linkers (ld.bfd and LLD) support arbitrary power-of-two // alignments for common symbols via the aligncomm directive, so this // restriction only applies to MSVC environments. if (Context.getTargetInfo().getTriple().isKnownWindowsMSVCEnvironment() && - Context.getTypeAlignIfKnown(D->getType()) > 32) + Context.getTypeAlignIfKnown(D->getType()) > + Context.toBits(CharUnits::fromQuantity(32))) return true; return false; diff --git a/clang/test/CodeGen/microsoft-no-common-align.c b/clang/test/CodeGen/microsoft-no-common-align.c index fc46946c00ed23..a7a27a062704fa 100644 --- a/clang/test/CodeGen/microsoft-no-common-align.c +++ b/clang/test/CodeGen/microsoft-no-common-align.c @@ -6,3 +6,6 @@ TooLargeAlignment TooBig; // CHECK: @TooBig = dso_local global <16 x float> zeroinitializer, align 64 NormalAlignment JustRight; // CHECK: @JustRight = common dso_local global <1 x float> zeroinitializer, align 4 + +TooLargeAlignment *IsAPointer; +// CHECK: @IsAPointer = common dso_local global <16 x float>* null, align 8 From e57cf6c5dc507872a693141f9009dff98e614f0f Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Tue, 12 Feb 2019 11:02:43 +0000 Subject: [PATCH 117/125] Merging r353431: ------------------------------------------------------------------------ r353431 | stulova | 2019-02-07 18:32:37 +0100 (Thu, 07 Feb 2019) | 9 lines [OpenCL][PR40603] In C++ preserve compatibility with OpenCL C v2.0 Valid OpenCL C code should still compile in C++ mode. This change enables extensions and OpenCL types. Differential Revision: https://reviews.llvm.org/D57824 ------------------------------------------------------------------------ llvm-svn: 353826 --- clang/include/clang/Basic/OpenCLOptions.h | 29 +++--- clang/lib/Frontend/InitPreprocessor.cpp | 7 +- clang/lib/Parse/ParsePragma.cpp | 9 +- clang/lib/Sema/Sema.cpp | 7 +- clang/test/SemaOpenCL/extension-version.cl | 114 ++++++++++----------- clang/test/SemaOpenCL/extensions.cl | 9 +- 6 files changed, 92 insertions(+), 83 deletions(-) diff --git a/clang/include/clang/Basic/OpenCLOptions.h b/clang/include/clang/Basic/OpenCLOptions.h index cc4e9922dca03c..c76fa88092b9be 100644 --- a/clang/include/clang/Basic/OpenCLOptions.h +++ b/clang/include/clang/Basic/OpenCLOptions.h @@ -15,6 +15,7 @@ #ifndef LLVM_CLANG_BASIC_OPENCLOPTIONS_H #define LLVM_CLANG_BASIC_OPENCLOPTIONS_H +#include "clang/Basic/LangOptions.h" #include "llvm/ADT/StringMap.h" namespace clang { @@ -42,25 +43,29 @@ class OpenCLOptions { // Is supported as either an extension or an (optional) core feature for // OpenCL version \p CLVer. - bool isSupported(llvm::StringRef Ext, unsigned CLVer) const { + bool isSupported(llvm::StringRef Ext, LangOptions LO) const { + // In C++ mode all extensions should work at least as in v2.0. + auto CLVer = LO.OpenCLCPlusPlus ? 200 : LO.OpenCLVersion; auto I = OptMap.find(Ext)->getValue(); return I.Supported && I.Avail <= CLVer; } // Is supported (optional) OpenCL core features for OpenCL version \p CLVer. // For supported extension, return false. - bool isSupportedCore(llvm::StringRef Ext, unsigned CLVer) const { + bool isSupportedCore(llvm::StringRef Ext, LangOptions LO) const { + // In C++ mode all extensions should work at least as in v2.0. + auto CLVer = LO.OpenCLCPlusPlus ? 200 : LO.OpenCLVersion; auto I = OptMap.find(Ext)->getValue(); - return I.Supported && I.Avail <= CLVer && - I.Core != ~0U && CLVer >= I.Core; + return I.Supported && I.Avail <= CLVer && I.Core != ~0U && CLVer >= I.Core; } // Is supported OpenCL extension for OpenCL version \p CLVer. // For supported (optional) core feature, return false. - bool isSupportedExtension(llvm::StringRef Ext, unsigned CLVer) const { + bool isSupportedExtension(llvm::StringRef Ext, LangOptions LO) const { + // In C++ mode all extensions should work at least as in v2.0. + auto CLVer = LO.OpenCLCPlusPlus ? 200 : LO.OpenCLVersion; auto I = OptMap.find(Ext)->getValue(); - return I.Supported && I.Avail <= CLVer && - (I.Core == ~0U || CLVer < I.Core); + return I.Supported && I.Avail <= CLVer && (I.Core == ~0U || CLVer < I.Core); } void enable(llvm::StringRef Ext, bool V = true) { @@ -122,10 +127,10 @@ class OpenCLOptions { I->second.Enabled = false; } - void enableSupportedCore(unsigned CLVer) { - for (llvm::StringMap::iterator I = OptMap.begin(), - E = OptMap.end(); I != E; ++I) - if (isSupportedCore(I->getKey(), CLVer)) + void enableSupportedCore(LangOptions LO) { + for (llvm::StringMap::iterator I = OptMap.begin(), E = OptMap.end(); + I != E; ++I) + if (isSupportedCore(I->getKey(), LO)) I->second.Enabled = true; } @@ -133,6 +138,6 @@ class OpenCLOptions { friend class ASTReader; }; -} // end namespace clang +} // end namespace clang #endif diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp index 66807b097d4075..4cde22ce9aa445 100644 --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -1059,10 +1059,9 @@ static void InitializePredefinedMacros(const TargetInfo &TI, // OpenCL definitions. if (LangOpts.OpenCL) { -#define OPENCLEXT(Ext) \ - if (TI.getSupportedOpenCLOpts().isSupported(#Ext, \ - LangOpts.OpenCLVersion)) \ - Builder.defineMacro(#Ext); +#define OPENCLEXT(Ext) \ + if (TI.getSupportedOpenCLOpts().isSupported(#Ext, LangOpts)) \ + Builder.defineMacro(#Ext); #include "clang/Basic/OpenCLExtensions.def" auto Arch = TI.getTriple().getArch(); diff --git a/clang/lib/Parse/ParsePragma.cpp b/clang/lib/Parse/ParsePragma.cpp index 380eb64997a711..7e9b1011e81af2 100644 --- a/clang/lib/Parse/ParsePragma.cpp +++ b/clang/lib/Parse/ParsePragma.cpp @@ -693,13 +693,12 @@ void Parser::HandlePragmaOpenCLExtension() { if (Name == "all") { if (State == Disable) { Opt.disableAll(); - Opt.enableSupportedCore(getLangOpts().OpenCLVersion); + Opt.enableSupportedCore(getLangOpts()); } else { PP.Diag(NameLoc, diag::warn_pragma_expected_predicate) << 1; } } else if (State == Begin) { - if (!Opt.isKnown(Name) || - !Opt.isSupported(Name, getLangOpts().OpenCLVersion)) { + if (!Opt.isKnown(Name) || !Opt.isSupported(Name, getLangOpts())) { Opt.support(Name); } Actions.setCurrentOpenCLExtension(Name); @@ -709,9 +708,9 @@ void Parser::HandlePragmaOpenCLExtension() { Actions.setCurrentOpenCLExtension(""); } else if (!Opt.isKnown(Name)) PP.Diag(NameLoc, diag::warn_pragma_unknown_extension) << Ident; - else if (Opt.isSupportedExtension(Name, getLangOpts().OpenCLVersion)) + else if (Opt.isSupportedExtension(Name, getLangOpts())) Opt.enable(Name, State == Enable); - else if (Opt.isSupportedCore(Name, getLangOpts().OpenCLVersion)) + else if (Opt.isSupportedCore(Name, getLangOpts())) PP.Diag(NameLoc, diag::warn_pragma_extension_is_core) << Ident; else PP.Diag(NameLoc, diag::warn_pragma_unsupported_extension) << Ident; diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp index 9fa39968625a60..9d33ec51909240 100644 --- a/clang/lib/Sema/Sema.cpp +++ b/clang/lib/Sema/Sema.cpp @@ -256,11 +256,12 @@ void Sema::Initialize() { // Initialize predefined OpenCL types and supported extensions and (optional) // core features. if (getLangOpts().OpenCL) { - getOpenCLOptions().addSupport(Context.getTargetInfo().getSupportedOpenCLOpts()); - getOpenCLOptions().enableSupportedCore(getLangOpts().OpenCLVersion); + getOpenCLOptions().addSupport( + Context.getTargetInfo().getSupportedOpenCLOpts()); + getOpenCLOptions().enableSupportedCore(getLangOpts()); addImplicitTypedef("sampler_t", Context.OCLSamplerTy); addImplicitTypedef("event_t", Context.OCLEventTy); - if (getLangOpts().OpenCLVersion >= 200) { + if (getLangOpts().OpenCLCPlusPlus || getLangOpts().OpenCLVersion >= 200) { addImplicitTypedef("clk_event_t", Context.OCLClkEventTy); addImplicitTypedef("queue_t", Context.OCLQueueTy); addImplicitTypedef("reserve_id_t", Context.OCLReserveIDTy); diff --git a/clang/test/SemaOpenCL/extension-version.cl b/clang/test/SemaOpenCL/extension-version.cl index a587f1db99af20..d976cfb3a4354c 100644 --- a/clang/test/SemaOpenCL/extension-version.cl +++ b/clang/test/SemaOpenCL/extension-version.cl @@ -2,12 +2,14 @@ // RUN: %clang_cc1 -x cl -cl-std=CL1.1 %s -verify -triple spir-unknown-unknown // RUN: %clang_cc1 -x cl -cl-std=CL1.2 %s -verify -triple spir-unknown-unknown // RUN: %clang_cc1 -x cl -cl-std=CL2.0 %s -verify -triple spir-unknown-unknown +// RUN: %clang_cc1 -x cl -cl-std=c++ %s -verify -triple spir-unknown-unknown // RUN: %clang_cc1 -x cl -cl-std=CL %s -verify -triple spir-unknown-unknown -Wpedantic-core-features -DTEST_CORE_FEATURES // RUN: %clang_cc1 -x cl -cl-std=CL1.1 %s -verify -triple spir-unknown-unknown -Wpedantic-core-features -DTEST_CORE_FEATURES // RUN: %clang_cc1 -x cl -cl-std=CL1.2 %s -verify -triple spir-unknown-unknown -Wpedantic-core-features -DTEST_CORE_FEATURES // RUN: %clang_cc1 -x cl -cl-std=CL2.0 %s -verify -triple spir-unknown-unknown -Wpedantic-core-features -DTEST_CORE_FEATURES +// RUN: %clang_cc1 -x cl -cl-std=c++ %s -verify -triple spir-unknown-unknown -Wpedantic-core-features -DTEST_CORE_FEATURES -#if __OPENCL_C_VERSION__ >= 200 && ! defined TEST_CORE_FEATURES +#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) && !defined(TEST_CORE_FEATURES) // expected-no-diagnostics #endif @@ -47,44 +49,44 @@ #ifndef cl_khr_byte_addressable_store #error "Missing cl_khr_byte_addressable_store define" #endif -#pragma OPENCL EXTENSION cl_khr_byte_addressable_store: enable -#if (__OPENCL_C_VERSION__ >= 110) && defined TEST_CORE_FEATURES +#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable +#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 110) && defined TEST_CORE_FEATURES // expected-warning@-2{{OpenCL extension 'cl_khr_byte_addressable_store' is core feature or supported optional core feature - ignoring}} #endif #ifndef cl_khr_global_int32_base_atomics #error "Missing cl_khr_global_int32_base_atomics define" #endif -#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics: enable -#if (__OPENCL_C_VERSION__ >= 110) && defined TEST_CORE_FEATURES +#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable +#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 110) && defined TEST_CORE_FEATURES // expected-warning@-2{{OpenCL extension 'cl_khr_global_int32_base_atomics' is core feature or supported optional core feature - ignoring}} #endif #ifndef cl_khr_global_int32_extended_atomics #error "Missing cl_khr_global_int32_extended_atomics define" #endif -#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics: enable -#if (__OPENCL_C_VERSION__ >= 110) && defined TEST_CORE_FEATURES +#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable +#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 110) && defined TEST_CORE_FEATURES // expected-warning@-2{{OpenCL extension 'cl_khr_global_int32_extended_atomics' is core feature or supported optional core feature - ignoring}} #endif #ifndef cl_khr_local_int32_base_atomics #error "Missing cl_khr_local_int32_base_atomics define" #endif -#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics: enable -#if (__OPENCL_C_VERSION__ >= 110) && defined TEST_CORE_FEATURES +#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable +#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 110) && defined TEST_CORE_FEATURES // expected-warning@-2{{OpenCL extension 'cl_khr_local_int32_base_atomics' is core feature or supported optional core feature - ignoring}} #endif #ifndef cl_khr_local_int32_extended_atomics #error "Missing cl_khr_local_int32_extended_atomics define" #endif -#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics: enable -#if (__OPENCL_C_VERSION__ >= 110) && defined TEST_CORE_FEATURES +#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable +#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 110) && defined TEST_CORE_FEATURES // expected-warning@-2{{OpenCL extension 'cl_khr_local_int32_extended_atomics' is core feature or supported optional core feature - ignoring}} #endif -#if (__OPENCL_C_VERSION__ < 110) +#if (defined(__OPENCL_C_VERSION__) && __OPENCL_C_VERSION__ < 110) // Deprecated abvoe 1.0 #ifndef cl_khr_select_fprounding_mode #error "Missing cl_khr_select_fp_rounding_mode define" @@ -97,8 +99,8 @@ #ifndef cl_khr_fp64 #error "Missing cl_khr_fp64 define" #endif -#pragma OPENCL EXTENSION cl_khr_fp64: enable -#if (__OPENCL_C_VERSION__ >= 120) && defined TEST_CORE_FEATURES +#pragma OPENCL EXTENSION cl_khr_fp64 : enable +#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 120) && defined TEST_CORE_FEATURES // expected-warning@-2{{OpenCL extension 'cl_khr_fp64' is core feature or supported optional core feature - ignoring}} #endif @@ -106,131 +108,129 @@ #ifndef cl_khr_3d_image_writes #error "Missing cl_khr_3d_image_writes define" #endif -#pragma OPENCL EXTENSION cl_khr_3d_image_writes: enable -#if (__OPENCL_C_VERSION__ >= 200) && defined TEST_CORE_FEATURES +#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable +#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) && defined TEST_CORE_FEATURES // expected-warning@-2{{OpenCL extension 'cl_khr_3d_image_writes' is core feature or supported optional core feature - ignoring}} #endif - - -#if (__OPENCL_C_VERSION__ >= 110) +#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 110) #ifndef cl_khr_gl_event #error "Missing cl_khr_gl_event define" #endif #else // expected-warning@+2{{unsupported OpenCL extension 'cl_khr_gl_event' - ignoring}} #endif -#pragma OPENCL EXTENSION cl_khr_gl_event: enable +#pragma OPENCL EXTENSION cl_khr_gl_event : enable -#if (__OPENCL_C_VERSION__ >= 110) +#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 110) #ifndef cl_khr_d3d10_sharing #error "Missing cl_khr_d3d10_sharing define" #endif #else // expected-warning@+2{{unsupported OpenCL extension 'cl_khr_d3d10_sharing' - ignoring}} #endif -#pragma OPENCL EXTENSION cl_khr_d3d10_sharing: enable +#pragma OPENCL EXTENSION cl_khr_d3d10_sharing : enable -#if (__OPENCL_C_VERSION__ >= 110) +#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 110) #ifndef cles_khr_int64 #error "Missing cles_khr_int64 define" #endif #else // expected-warning@+2{{unsupported OpenCL extension 'cles_khr_int64' - ignoring}} #endif -#pragma OPENCL EXTENSION cles_khr_int64: enable +#pragma OPENCL EXTENSION cles_khr_int64 : enable -#if (__OPENCL_C_VERSION__ >= 120) +#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 120) #ifndef cl_khr_context_abort #error "Missing cl_context_abort define" #endif #else // expected-warning@+2{{unsupported OpenCL extension 'cl_khr_context_abort' - ignoring}} #endif -#pragma OPENCL EXTENSION cl_khr_context_abort: enable +#pragma OPENCL EXTENSION cl_khr_context_abort : enable -#if (__OPENCL_C_VERSION__ >= 120) +#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 120) #ifndef cl_khr_d3d11_sharing #error "Missing cl_khr_d3d11_sharing define" #endif #else // expected-warning@+2{{unsupported OpenCL extension 'cl_khr_d3d11_sharing' - ignoring}} #endif -#pragma OPENCL EXTENSION cl_khr_d3d11_sharing: enable +#pragma OPENCL EXTENSION cl_khr_d3d11_sharing : enable -#if (__OPENCL_C_VERSION__ >= 120) +#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 120) #ifndef cl_khr_dx9_media_sharing #error "Missing cl_khr_dx9_media_sharing define" #endif #else // expected-warning@+2{{unsupported OpenCL extension 'cl_khr_dx9_media_sharing' - ignoring}} #endif -#pragma OPENCL EXTENSION cl_khr_dx9_media_sharing: enable +#pragma OPENCL EXTENSION cl_khr_dx9_media_sharing : enable -#if (__OPENCL_C_VERSION__ >= 120) +#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 120) #ifndef cl_khr_image2d_from_buffer #error "Missing cl_khr_image2d_from_buffer define" #endif #else // expected-warning@+2{{unsupported OpenCL extension 'cl_khr_image2d_from_buffer' - ignoring}} #endif -#pragma OPENCL EXTENSION cl_khr_image2d_from_buffer: enable +#pragma OPENCL EXTENSION cl_khr_image2d_from_buffer : enable -#if (__OPENCL_C_VERSION__ >= 120) +#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 120) #ifndef cl_khr_initialize_memory #error "Missing cl_khr_initialize_memory define" #endif #else // expected-warning@+2{{unsupported OpenCL extension 'cl_khr_initialize_memory' - ignoring}} #endif -#pragma OPENCL EXTENSION cl_khr_initialize_memory: enable +#pragma OPENCL EXTENSION cl_khr_initialize_memory : enable -#if (__OPENCL_C_VERSION__ >= 120) +#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 120) #ifndef cl_khr_gl_depth_images #error "Missing cl_khr_gl_depth_images define" #endif #else // expected-warning@+2{{unsupported OpenCL extension 'cl_khr_gl_depth_images' - ignoring}} #endif -#pragma OPENCL EXTENSION cl_khr_gl_depth_images: enable +#pragma OPENCL EXTENSION cl_khr_gl_depth_images : enable -#if (__OPENCL_C_VERSION__ >= 120) +#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 120) #ifndef cl_khr_gl_msaa_sharing #error "Missing cl_khr_gl_msaa_sharing define" #endif #else // expected-warning@+2{{unsupported OpenCL extension 'cl_khr_gl_msaa_sharing' - ignoring}} #endif -#pragma OPENCL EXTENSION cl_khr_gl_msaa_sharing: enable +#pragma OPENCL EXTENSION cl_khr_gl_msaa_sharing : enable -#if (__OPENCL_C_VERSION__ >= 120) +#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 120) #ifndef cl_khr_spir #error "Missing cl_khr_spir define" #endif #else // expected-warning@+2{{unsupported OpenCL extension 'cl_khr_spir' - ignoring}} #endif -#pragma OPENCL EXTENSION cl_khr_spir: enable +#pragma OPENCL EXTENSION cl_khr_spir : enable -#if (__OPENCL_C_VERSION__ >= 200) +#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) #ifndef cl_khr_egl_event #error "Missing cl_khr_egl_event define" #endif #else // expected-warning@+2{{unsupported OpenCL extension 'cl_khr_egl_event' - ignoring}} #endif -#pragma OPENCL EXTENSION cl_khr_egl_event: enable +#pragma OPENCL EXTENSION cl_khr_egl_event : enable -#if (__OPENCL_C_VERSION__ >= 200) +#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) #ifndef cl_khr_egl_image #error "Missing cl_khr_egl_image define" #endif #else // expected-warning@+2{{unsupported OpenCL extension 'cl_khr_egl_image' - ignoring}} #endif -#pragma OPENCL EXTENSION cl_khr_egl_image: enable +#pragma OPENCL EXTENSION cl_khr_egl_image : enable -#if (__OPENCL_C_VERSION__ >= 200) +#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) #ifndef cl_khr_mipmap_image #error "Missing cl_khr_mipmap_image define" #endif @@ -240,18 +240,18 @@ #endif // expected-warning@+2{{unsupported OpenCL extension 'cl_khr_mipmap_image' - ignoring}} #endif -#pragma OPENCL EXTENSION cl_khr_mipmap_image: enable +#pragma OPENCL EXTENSION cl_khr_mipmap_image : enable -#if (__OPENCL_C_VERSION__ >= 200) +#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) #ifndef cl_khr_srgb_image_writes #error "Missing cl_khr_srgb_image_writes define" #endif #else // expected-warning@+2{{unsupported OpenCL extension 'cl_khr_srgb_image_writes' - ignoring}} #endif -#pragma OPENCL EXTENSION cl_khr_srgb_image_writes: enable +#pragma OPENCL EXTENSION cl_khr_srgb_image_writes : enable -#if (__OPENCL_C_VERSION__ >= 200) +#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) #ifndef cl_khr_subgroups #error "Missing cl_khr_subgroups define" #endif @@ -261,9 +261,9 @@ #endif // expected-warning@+2{{unsupported OpenCL extension 'cl_khr_subgroups' - ignoring}} #endif -#pragma OPENCL EXTENSION cl_khr_subgroups: enable +#pragma OPENCL EXTENSION cl_khr_subgroups : enable -#if (__OPENCL_C_VERSION__ >= 200) +#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) #ifndef cl_khr_terminate_context #error "Missing cl_khr_terminate_context define" #endif @@ -280,9 +280,9 @@ #ifndef cl_amd_media_ops2 #error "Missing cl_amd_media_ops2 define" #endif -#pragma OPENCL EXTENSION cl_amd_media_ops2: enable +#pragma OPENCL EXTENSION cl_amd_media_ops2 : enable -#if (__OPENCL_C_VERSION__ >= 120) +#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 120) #ifndef cl_khr_depth_images #error "Missing cl_khr_depth_images define" #endif @@ -292,9 +292,9 @@ #endif // expected-warning@+2{{unsupported OpenCL extension 'cl_khr_depth_images' - ignoring}} #endif -#pragma OPENCL EXTENSION cl_khr_depth_images: enable +#pragma OPENCL EXTENSION cl_khr_depth_images : enable -#if (__OPENCL_C_VERSION__ >= 120) +#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 120) #ifndef cl_intel_subgroups #error "Missing cl_intel_subgroups define" #endif @@ -303,7 +303,7 @@ #endif #pragma OPENCL EXTENSION cl_intel_subgroups : enable -#if (__OPENCL_C_VERSION__ >= 120) +#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 120) #ifndef cl_intel_subgroups_short #error "Missing cl_intel_subgroups_short define" #endif @@ -312,7 +312,7 @@ #endif #pragma OPENCL EXTENSION cl_intel_subgroups_short : enable -#if (__OPENCL_C_VERSION__ >= 120) +#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 120) #ifndef cl_intel_device_side_avc_motion_estimation #error "Missing cl_intel_device_side_avc_motion_estimation define" #endif diff --git a/clang/test/SemaOpenCL/extensions.cl b/clang/test/SemaOpenCL/extensions.cl index 5f95e32d4a549b..e9dba69ecd7c94 100644 --- a/clang/test/SemaOpenCL/extensions.cl +++ b/clang/test/SemaOpenCL/extensions.cl @@ -28,6 +28,7 @@ // enabled by default with -cl-std=CL2.0). // // RUN: %clang_cc1 %s -triple amdgcn-unknown-unknown -verify -pedantic -fsyntax-only -cl-std=CL2.0 -finclude-default-header +// RUN: %clang_cc1 %s -triple spir-unknown-unknown -verify -pedantic -fsyntax-only -cl-std=c++ #ifdef _OPENCL_H_ // expected-no-diagnostics @@ -37,7 +38,11 @@ // expected-no-diagnostics #endif -#if __OPENCL_C_VERSION__ < 120 +#ifdef __OPENCL_CPP_VERSION__ +// expected-no-diagnostics +#endif + +#if (defined(__OPENCL_C_VERSION__) && __OPENCL_C_VERSION__ < 120) void f1(double da) { // expected-error {{type 'double' requires cl_khr_fp64 extension}} double d; // expected-error {{type 'double' requires cl_khr_fp64 extension}} (void) 1.0; // expected-warning {{double precision constant requires cl_khr_fp64}} @@ -89,7 +94,7 @@ void f2(void) { // expected-warning@-2{{unsupported OpenCL extension 'cl_khr_fp64' - ignoring}} #endif -#if __OPENCL_C_VERSION__ < 120 +#if (defined(__OPENCL_C_VERSION__) && __OPENCL_C_VERSION__ < 120) void f3(void) { double d; // expected-error {{type 'double' requires cl_khr_fp64 extension}} } From dfde9d6b350c6793352e2e88a9463675b38962cb Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Tue, 12 Feb 2019 11:05:22 +0000 Subject: [PATCH 118/125] Merging r353480: ------------------------------------------------------------------------ r353480 | petarj | 2019-02-07 23:57:33 +0100 (Thu, 07 Feb 2019) | 15 lines [mips][micromips] Fix how values in .gcc_except_table are calculated When a landing pad is calculated in a program that is compiled for micromips with -fPIC flag, it will point to an even address. Such an error will cause a segmentation fault, as the instructions in micromips are aligned on odd addresses. This patch sets the last bit of the offset where a landing pad is, to 1, which will effectively be an odd address and point to the instruction exactly. r344591 fixed this issue for -static compilation. Patch by Aleksandar Beserminji. Differential Revision: https://reviews.llvm.org/D57677 ------------------------------------------------------------------------ llvm-svn: 353827 --- llvm/lib/MC/MCExpr.cpp | 5 +++++ llvm/test/CodeGen/Mips/micromips-b-range.ll | 8 +++---- llvm/test/DebugInfo/Mips/eh_frame.ll | 24 +++++++++++++++------ 3 files changed, 27 insertions(+), 10 deletions(-) diff --git a/llvm/lib/MC/MCExpr.cpp b/llvm/lib/MC/MCExpr.cpp index 3c022199145fba..0e4174a7a4c947 100644 --- a/llvm/lib/MC/MCExpr.cpp +++ b/llvm/lib/MC/MCExpr.cpp @@ -559,6 +559,11 @@ static void AttemptToFoldSymbolOffsetDifference( if (Asm->isThumbFunc(&SA)) Addend |= 1; + // If symbol is labeled as micromips, we set low-bit to ensure + // correct offset in .gcc_except_table + if (Asm->getBackend().isMicroMips(&SA)) + Addend |= 1; + // Clear the symbol expr pointers to indicate we have folded these // operands. A = B = nullptr; diff --git a/llvm/test/CodeGen/Mips/micromips-b-range.ll b/llvm/test/CodeGen/Mips/micromips-b-range.ll index 5831ae81baedad..27a0db545f743f 100644 --- a/llvm/test/CodeGen/Mips/micromips-b-range.ll +++ b/llvm/test/CodeGen/Mips/micromips-b-range.ll @@ -13,7 +13,7 @@ ; CHECK-NEXT: 1e: fb fd 00 00 sw $ra, 0($sp) ; CHECK-NEXT: 22: 41 a1 00 01 lui $1, 1 ; CHECK-NEXT: 26: 40 60 00 02 bal 8 -; CHECK-NEXT: 2a: 30 21 04 68 addiu $1, $1, 1128 +; CHECK-NEXT: 2a: 30 21 04 69 addiu $1, $1, 1129 ; CHECK-NEXT: 2e: 00 3f 09 50 addu $1, $ra, $1 ; CHECK-NEXT: 32: ff fd 00 00 lw $ra, 0($sp) ; CHECK-NEXT: 36: 00 01 0f 3c jr $1 @@ -27,7 +27,7 @@ ; CHECK-NEXT: 56: fb fd 00 00 sw $ra, 0($sp) ; CHECK-NEXT: 5a: 41 a1 00 01 lui $1, 1 ; CHECK-NEXT: 5e: 40 60 00 02 bal 8 -; CHECK-NEXT: 62: 30 21 04 5c addiu $1, $1, 1116 +; CHECK-NEXT: 62: 30 21 04 5d addiu $1, $1, 1117 ; CHECK-NEXT: 66: 00 3f 09 50 addu $1, $ra, $1 ; CHECK-NEXT: 6a: ff fd 00 00 lw $ra, 0($sp) ; CHECK-NEXT: 6e: 00 01 0f 3c jr $1 @@ -39,7 +39,7 @@ ; CHECK-NEXT: 86: fb fd 00 00 sw $ra, 0($sp) ; CHECK-NEXT: 8a: 41 a1 00 01 lui $1, 1 ; CHECK-NEXT: 8e: 40 60 00 02 bal 8 -; CHECK-NEXT: 92: 30 21 04 2c addiu $1, $1, 1068 +; CHECK-NEXT: 92: 30 21 04 2d addiu $1, $1, 1069 ; CHECK-NEXT: 96: 00 3f 09 50 addu $1, $ra, $1 ; CHECK-NEXT: 9a: ff fd 00 00 lw $ra, 0($sp) ; CHECK-NEXT: 9e: 00 01 0f 3c jr $1 @@ -51,7 +51,7 @@ ; CHECK-NEXT: 10476: fb fd 00 00 sw $ra, 0($sp) ; CHECK-NEXT: 1047a: 41 a1 00 01 lui $1, 1 ; CHECK-NEXT: 1047e: 40 60 00 02 bal 8 -; CHECK-NEXT: 10482: 30 21 04 00 addiu $1, $1, 1024 +; CHECK-NEXT: 10482: 30 21 04 01 addiu $1, $1, 1025 ; CHECK-NEXT: 10486: 00 3f 09 50 addu $1, $ra, $1 ; CHECK-NEXT: 1048a: ff fd 00 00 lw $ra, 0($sp) ; CHECK-NEXT: 1048e: 00 01 0f 3c jr $1 diff --git a/llvm/test/DebugInfo/Mips/eh_frame.ll b/llvm/test/DebugInfo/Mips/eh_frame.ll index 4687443cb1cff2..122d0a7f6ab2ab 100644 --- a/llvm/test/DebugInfo/Mips/eh_frame.ll +++ b/llvm/test/DebugInfo/Mips/eh_frame.ll @@ -1,9 +1,21 @@ -; RUN: llc -mtriple mips-unknown-linux-gnu -mattr=+micromips -O3 -filetype=obj -o - %s | llvm-readelf -r | FileCheck %s - -; CHECK: .rel.eh_frame -; CHECK: DW.ref.__gxx_personality_v0 -; CHECK-NEXT: .text -; CHECK-NEXT: .gcc_except_table +; RUN: llc -mtriple mips-unknown-linux-gnu -mattr=+micromips -relocation-model=static -O3 -filetype=obj -o - %s | \ +; RUN: llvm-readelf -r | FileCheck %s --check-prefix=CHECK-READELF +; RUN: llc -mtriple mips-unknown-linux-gnu -mattr=+micromips -relocation-model=pic -O3 -filetype=obj -o - %s | \ +; RUN: llvm-readelf -r | FileCheck %s --check-prefix=CHECK-READELF +; RUN: llc -mtriple mips-unknown-linux-gnu -mattr=+micromips -relocation-model=static -O3 -filetype=obj -o - %s | \ +; RUN: llvm-objdump -s -j .gcc_except_table - | FileCheck %s --check-prefix=CHECK-EXCEPT-TABLE-STATIC +; RUN: llc -mtriple mips-unknown-linux-gnu -mattr=+micromips -relocation-model=pic -O3 -filetype=obj -o - %s | \ +; RUN: llvm-objdump -s -j .gcc_except_table - | FileCheck %s --check-prefix=CHECK-EXCEPT-TABLE-PIC + +; CHECK-READELF: .rel.eh_frame +; CHECK-READELF: DW.ref.__gxx_personality_v0 +; CHECK-READELF-NEXT: .text +; CHECK-READELF-NEXT: .gcc_except_table + +; CHECK-EXCEPT-TABLE-STATIC: 0000 ff9b1501 0c011500 00150e23 01231e00 ...........#.#.. +; CHECK-EXCEPT-TABLE-STATIC: 0010 00010000 00000000 +; CHECK-EXCEPT-TABLE-PIC: 0000 ff9b1501 0c012d00 002d133f 013f2a00 ......-..-.?.?*. +; CHECK-EXCEPT-TABLE-PIC: 0010 00010000 00000000 ........ @_ZTIi = external constant i8* From eb0faeb15455088b681b90578afcc7932424dd32 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Tue, 12 Feb 2019 11:09:24 +0000 Subject: [PATCH 119/125] Merging r353493: ------------------------------------------------------------------------ r353493 | efriedma | 2019-02-08 02:17:49 +0100 (Fri, 08 Feb 2019) | 9 lines [COFF, ARM64] Fix types for _ReadStatusReg, _WriteStatusReg r344765 added those intrinsics, but used the wrong types. Patch by Mike Hommey Differential Revision: https://reviews.llvm.org/D57636 ------------------------------------------------------------------------ llvm-svn: 353828 --- clang/include/clang/Basic/BuiltinsAArch64.def | 4 +- clang/lib/CodeGen/CGBuiltin.cpp | 5 +- clang/lib/Headers/intrin.h | 4 +- .../CodeGen/arm64-microsoft-status-reg.cpp | 108 +++++++++++------- 4 files changed, 71 insertions(+), 50 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsAArch64.def b/clang/include/clang/Basic/BuiltinsAArch64.def index 690d547f7f3ef7..054662e6883174 100644 --- a/clang/include/clang/Basic/BuiltinsAArch64.def +++ b/clang/include/clang/Basic/BuiltinsAArch64.def @@ -204,8 +204,8 @@ TARGET_HEADER_BUILTIN(_InterlockedDecrement64_rel, "LLiLLiD*", "nh", "intrin.h", TARGET_HEADER_BUILTIN(_ReadWriteBarrier, "v", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(__getReg, "ULLii", "nh", "intrin.h", ALL_MS_LANGUAGES, "") -TARGET_HEADER_BUILTIN(_ReadStatusReg, "ii", "nh", "intrin.h", ALL_MS_LANGUAGES, "") -TARGET_HEADER_BUILTIN(_WriteStatusReg, "vii", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_ReadStatusReg, "LLii", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_WriteStatusReg, "viLLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_AddressOfReturnAddress, "v*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") #undef BUILTIN diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index a718f2f19aa65c..ccc657493b28b1 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -7052,19 +7052,16 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName); llvm::Type *RegisterType = Int64Ty; - llvm::Type *ValueType = Int32Ty; llvm::Type *Types[] = { RegisterType }; if (BuiltinID == AArch64::BI_ReadStatusReg) { llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types); - llvm::Value *Call = Builder.CreateCall(F, Metadata); - return Builder.CreateTrunc(Call, ValueType); + return Builder.CreateCall(F, Metadata); } llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types); llvm::Value *ArgValue = EmitScalarExpr(E->getArg(1)); - ArgValue = Builder.CreateZExt(ArgValue, RegisterType); return Builder.CreateCall(F, { Metadata, ArgValue }); } diff --git a/clang/lib/Headers/intrin.h b/clang/lib/Headers/intrin.h index c86f41faeb88a9..966258bab4b329 100644 --- a/clang/lib/Headers/intrin.h +++ b/clang/lib/Headers/intrin.h @@ -564,8 +564,8 @@ __nop(void) { #if defined(__aarch64__) unsigned __int64 __getReg(int); long _InterlockedAdd(long volatile *Addend, long Value); -int _ReadStatusReg(int); -void _WriteStatusReg(int, int); +__int64 _ReadStatusReg(int); +void _WriteStatusReg(int, __int64); static inline unsigned short _byteswap_ushort (unsigned short val) { return __builtin_bswap16(val); diff --git a/clang/test/CodeGen/arm64-microsoft-status-reg.cpp b/clang/test/CodeGen/arm64-microsoft-status-reg.cpp index eb59bae50f0ae3..524b5af120c52a 100644 --- a/clang/test/CodeGen/arm64-microsoft-status-reg.cpp +++ b/clang/test/CodeGen/arm64-microsoft-status-reg.cpp @@ -23,88 +23,112 @@ #define ARM64_TPIDRRO_EL0 ARM64_SYSREG(3,3,13, 0,3) // Thread ID Register, User Read Only [CP15_TPIDRURO] #define ARM64_TPIDR_EL1 ARM64_SYSREG(3,0,13, 0,4) // Thread ID Register, Privileged Only [CP15_TPIDRPRW] -void check_ReadWriteStatusReg(int v) { - int ret; +// From intrin.h +__int64 _ReadStatusReg(int); +void _WriteStatusReg(int, __int64); + +void check_ReadWriteStatusReg(__int64 v) { + __int64 ret; ret = _ReadStatusReg(ARM64_CNTVCT); -// CHECK-ASM: mrs x8, CNTVCT_EL0 -// CHECK-IR: call i64 @llvm.read_register.i64(metadata ![[MD2:.*]]) +// CHECK-ASM: mrs x0, CNTVCT_EL0 +// CHECK-IR: %[[VAR:.*]] = call i64 @llvm.read_register.i64(metadata ![[MD2:.*]]) +// CHECK-IR-NEXT: store i64 %[[VAR]] ret = _ReadStatusReg(ARM64_PMCCNTR_EL0); -// CHECK-ASM: mrs x8, PMCCNTR_EL0 -// CHECK-IR: call i64 @llvm.read_register.i64(metadata ![[MD3:.*]]) +// CHECK-ASM: mrs x0, PMCCNTR_EL0 +// CHECK-IR: %[[VAR:.*]] = call i64 @llvm.read_register.i64(metadata ![[MD3:.*]]) +// CHECK-IR-NEXT: store i64 %[[VAR]] ret = _ReadStatusReg(ARM64_PMSELR_EL0); -// CHECK-ASM: mrs x8, PMSELR_EL0 -// CHECK-IR: call i64 @llvm.read_register.i64(metadata ![[MD4:.*]]) +// CHECK-ASM: mrs x0, PMSELR_EL0 +// CHECK-IR: %[[VAR:.*]] = call i64 @llvm.read_register.i64(metadata ![[MD4:.*]]) +// CHECK-IR-NEXT: store i64 %[[VAR]] ret = _ReadStatusReg(ARM64_PMXEVCNTR_EL0); -// CHECK-ASM: mrs x8, PMXEVCNTR_EL0 -// CHECK-IR: call i64 @llvm.read_register.i64(metadata ![[MD5:.*]]) +// CHECK-ASM: mrs x0, PMXEVCNTR_EL0 +// CHECK-IR: %[[VAR:.*]] = call i64 @llvm.read_register.i64(metadata ![[MD5:.*]]) +// CHECK-IR-NEXT: store i64 %[[VAR]] ret = _ReadStatusReg(ARM64_PMXEVCNTRn_EL0(0)); -// CHECK-ASM: mrs x8, PMEVCNTR0_EL0 -// CHECK-IR: call i64 @llvm.read_register.i64(metadata ![[MD6:.*]]) +// CHECK-ASM: mrs x0, PMEVCNTR0_EL0 +// CHECK-IR: %[[VAR:.*]] = call i64 @llvm.read_register.i64(metadata ![[MD6:.*]]) +// CHECK-IR-NEXT: store i64 %[[VAR]] ret = _ReadStatusReg(ARM64_PMXEVCNTRn_EL0(1)); -// CHECK-ASM: mrs x8, PMEVCNTR1_EL0 -// CHECK-IR: call i64 @llvm.read_register.i64(metadata ![[MD7:.*]]) +// CHECK-ASM: mrs x0, PMEVCNTR1_EL0 +// CHECK-IR: %[[VAR:.*]] = call i64 @llvm.read_register.i64(metadata ![[MD7:.*]]) +// CHECK-IR-NEXT: store i64 %[[VAR]] ret = _ReadStatusReg(ARM64_PMXEVCNTRn_EL0(30)); -// CHECK-ASM: mrs x8, PMEVCNTR30_EL0 -// CHECK-IR: call i64 @llvm.read_register.i64(metadata ![[MD8:.*]]) +// CHECK-ASM: mrs x0, PMEVCNTR30_EL0 +// CHECK-IR: %[[VAR:.*]] = call i64 @llvm.read_register.i64(metadata ![[MD8:.*]]) +// CHECK-IR-NEXT: store i64 %[[VAR]] ret = _ReadStatusReg(ARM64_TPIDR_EL0); -// CHECK-ASM: mrs x8, TPIDR_EL0 -// CHECK-IR: call i64 @llvm.read_register.i64(metadata ![[MD9:.*]]) +// CHECK-ASM: mrs x0, TPIDR_EL0 +// CHECK-IR: %[[VAR:.*]] = call i64 @llvm.read_register.i64(metadata ![[MD9:.*]]) +// CHECK-IR-NEXT: store i64 %[[VAR]] ret = _ReadStatusReg(ARM64_TPIDRRO_EL0); -// CHECK-ASM: mrs x8, TPIDRRO_EL0 -// CHECK-IR: call i64 @llvm.read_register.i64(metadata ![[MD10:.*]]) +// CHECK-ASM: mrs x0, TPIDRRO_EL0 +// CHECK-IR: %[[VAR:.*]] = call i64 @llvm.read_register.i64(metadata ![[MD10:.*]]) +// CHECK-IR-NEXT: store i64 %[[VAR]] ret = _ReadStatusReg(ARM64_TPIDR_EL1); -// CHECK-ASM: mrs x8, TPIDR_EL1 -// CHECK-IR: call i64 @llvm.read_register.i64(metadata ![[MD11:.*]]) +// CHECK-ASM: mrs x0, TPIDR_EL1 +// CHECK-IR: %[[VAR:.*]] = call i64 @llvm.read_register.i64(metadata ![[MD11:.*]]) +// CHECK-IR-NEXT: store i64 %[[VAR]] _WriteStatusReg(ARM64_CNTVCT, v); -// CHECK-ASM: msr S3_3_C14_C0_2, x8 -// CHECK-IR: call void @llvm.write_register.i64(metadata ![[MD2:.*]], i64 {{%.*}}) +// CHECK-ASM: msr S3_3_C14_C0_2, x0 +// CHECK-IR: %[[VAR:.*]] = load i64, +// CHECK-IR-NEXT: call void @llvm.write_register.i64(metadata ![[MD2:.*]], i64 %[[VAR]]) _WriteStatusReg(ARM64_PMCCNTR_EL0, v); -// CHECK-ASM: msr PMCCNTR_EL0, x8 -// CHECK-IR: call void @llvm.write_register.i64(metadata ![[MD3:.*]], i64 {{%.*}}) +// CHECK-ASM: msr PMCCNTR_EL0, x0 +// CHECK-IR: %[[VAR:.*]] = load i64, +// CHECK-IR-NEXT: call void @llvm.write_register.i64(metadata ![[MD3:.*]], i64 %[[VAR]]) _WriteStatusReg(ARM64_PMSELR_EL0, v); -// CHECK-ASM: msr PMSELR_EL0, x8 -// CHECK-IR: call void @llvm.write_register.i64(metadata ![[MD4:.*]], i64 {{%.*}}) +// CHECK-ASM: msr PMSELR_EL0, x0 +// CHECK-IR: %[[VAR:.*]] = load i64, +// CHECK-IR-NEXT: call void @llvm.write_register.i64(metadata ![[MD4:.*]], i64 %[[VAR]]) _WriteStatusReg(ARM64_PMXEVCNTR_EL0, v); -// CHECK-ASM: msr PMXEVCNTR_EL0, x8 -// CHECK-IR: call void @llvm.write_register.i64(metadata ![[MD5:.*]], i64 {{%.*}}) +// CHECK-ASM: msr PMXEVCNTR_EL0, x0 +// CHECK-IR: %[[VAR:.*]] = load i64, +// CHECK-IR-NEXT: call void @llvm.write_register.i64(metadata ![[MD5:.*]], i64 %[[VAR]]) _WriteStatusReg(ARM64_PMXEVCNTRn_EL0(0), v); -// CHECK-ASM: msr PMEVCNTR0_EL0, x8 -// CHECK-IR: call void @llvm.write_register.i64(metadata ![[MD6:.*]], i64 {{%.*}}) +// CHECK-ASM: msr PMEVCNTR0_EL0, x0 +// CHECK-IR: %[[VAR:.*]] = load i64, +// CHECK-IR-NEXT: call void @llvm.write_register.i64(metadata ![[MD6:.*]], i64 %[[VAR]]) _WriteStatusReg(ARM64_PMXEVCNTRn_EL0(1), v); -// CHECK-ASM: msr PMEVCNTR1_EL0, x8 -// CHECK-IR: call void @llvm.write_register.i64(metadata ![[MD7:.*]], i64 {{%.*}}) +// CHECK-ASM: msr PMEVCNTR1_EL0, x0 +// CHECK-IR: %[[VAR:.*]] = load i64, +// CHECK-IR-NEXT: call void @llvm.write_register.i64(metadata ![[MD7:.*]], i64 %[[VAR]]) _WriteStatusReg(ARM64_PMXEVCNTRn_EL0(30), v); -// CHECK-ASM: msr PMEVCNTR30_EL0, x8 -// CHECK-IR: call void @llvm.write_register.i64(metadata ![[MD8:.*]], i64 {{%.*}}) +// CHECK-ASM: msr PMEVCNTR30_EL0, x0 +// CHECK-IR: %[[VAR:.*]] = load i64, +// CHECK-IR-NEXT: call void @llvm.write_register.i64(metadata ![[MD8:.*]], i64 %[[VAR]]) _WriteStatusReg(ARM64_TPIDR_EL0, v); -// CHECK-ASM: msr TPIDR_EL0, x8 -// CHECK-IR: call void @llvm.write_register.i64(metadata ![[MD9:.*]], i64 {{%.*}}) +// CHECK-ASM: msr TPIDR_EL0, x0 +// CHECK-IR: %[[VAR:.*]] = load i64, +// CHECK-IR-NEXT: call void @llvm.write_register.i64(metadata ![[MD9:.*]], i64 %[[VAR]]) _WriteStatusReg(ARM64_TPIDRRO_EL0, v); -// CHECK-ASM: msr TPIDRRO_EL0, x8 -// CHECK-IR: call void @llvm.write_register.i64(metadata ![[MD10:.*]], i64 {{%.*}}) +// CHECK-ASM: msr TPIDRRO_EL0, x0 +// CHECK-IR: %[[VAR:.*]] = load i64, +// CHECK-IR-NEXT: call void @llvm.write_register.i64(metadata ![[MD10:.*]], i64 %[[VAR]]) _WriteStatusReg(ARM64_TPIDR_EL1, v); -// CHECK-ASM: msr TPIDR_EL1, x8 -// CHECK-IR: call void @llvm.write_register.i64(metadata ![[MD11:.*]], i64 {{%.*}}) +// CHECK-ASM: msr TPIDR_EL1, x0 +// CHECK-IR: %[[VAR:.*]] = load i64, +// CHECK-IR-NEXT: call void @llvm.write_register.i64(metadata ![[MD11:.*]], i64 %[[VAR]]) } // CHECK-IR: ![[MD2]] = !{!"3:3:14:0:2"} From a576b44d09080661fc9f83a94f66eccb44d5f3ce Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Tue, 12 Feb 2019 11:12:23 +0000 Subject: [PATCH 120/125] Merging r353402: ------------------------------------------------------------------------ r353402 | mstorsjo | 2019-02-07 13:46:49 +0100 (Thu, 07 Feb 2019) | 7 lines [clang-cl] support /Oy- on aarch64 MSVC supports /Oy- on aarch64, so clang-cl should too. Patch by Nathan Froyd! Differential Revision: https://reviews.llvm.org/D57838 ------------------------------------------------------------------------ llvm-svn: 353829 --- clang/lib/Driver/ToolChains/MSVC.cpp | 8 ++++---- clang/test/Driver/cl-options.c | 4 ++++ 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/clang/lib/Driver/ToolChains/MSVC.cpp b/clang/lib/Driver/ToolChains/MSVC.cpp index 7e34b0df5c8cfa..a164fd68e22e7c 100644 --- a/clang/lib/Driver/ToolChains/MSVC.cpp +++ b/clang/lib/Driver/ToolChains/MSVC.cpp @@ -1408,10 +1408,10 @@ static void TranslateOptArg(Arg *A, llvm::opt::DerivedArgList &DAL, DAL.AddFlagArg( A, Opts.getOption(options::OPT_fno_omit_frame_pointer)); } else { - // Don't warn about /Oy- in 64-bit builds (where + // Don't warn about /Oy- in x86-64 builds (where // SupportsForcingFramePointer is false). The flag having no effect // there is a compiler-internal optimization, and people shouldn't have - // to special-case their build files for 64-bit clang-cl. + // to special-case their build files for x86-64 clang-cl. A->claim(); } break; @@ -1442,8 +1442,8 @@ MSVCToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args, DerivedArgList *DAL = new DerivedArgList(Args.getBaseArgs()); const OptTable &Opts = getDriver().getOpts(); - // /Oy and /Oy- only has an effect under X86-32. - bool SupportsForcingFramePointer = getArch() == llvm::Triple::x86; + // /Oy and /Oy- don't have an effect on X86-64 + bool SupportsForcingFramePointer = getArch() != llvm::Triple::x86_64; // The -O[12xd] flag actually expands to several flags. We must desugar the // flags so that options embedded can be negated. For example, the '-O2' flag diff --git a/clang/test/Driver/cl-options.c b/clang/test/Driver/cl-options.c index f5171d5c040c5a..d8db081ac8a684 100644 --- a/clang/test/Driver/cl-options.c +++ b/clang/test/Driver/cl-options.c @@ -178,6 +178,10 @@ // Oy_2: -momit-leaf-frame-pointer // Oy_2: -O2 +// RUN: %clang_cl --target=aarch64-pc-windows-msvc -Werror /Oy- /O2 -### -- %s 2>&1 | FileCheck -check-prefix=Oy_aarch64 %s +// Oy_aarch64: -mdisable-fp-elim +// Oy_aarch64: -O2 + // RUN: %clang_cl --target=i686-pc-win32 -Werror /O2 /O2 -### -- %s 2>&1 | FileCheck -check-prefix=O2O2 %s // O2O2: "-O2" From ae61627a39b48f4131a27106567a7613be948948 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Tue, 12 Feb 2019 11:17:08 +0000 Subject: [PATCH 121/125] Merging r351322: ------------------------------------------------------------------------ r351322 | pfaffe | 2019-01-16 12:14:07 +0100 (Wed, 16 Jan 2019) | 9 lines [MSan] Apply the ctor creation scheme of TSan Summary: To avoid adding an extern function to the global ctors list, apply the changes of D56538 also to MSan. Reviewers: chandlerc, vitalybuka, fedor.sergeev, leonardchan Subscribers: hiraditya, bollu, llvm-commits Differential Revision: https://reviews.llvm.org/D56734 ------------------------------------------------------------------------ llvm-svn: 353830 --- .../Instrumentation/MemorySanitizer.cpp | 24 ++++++++++++++++++- .../MemorySanitizer/global_ctors_2to3.ll | 18 ++++++++++++++ .../MemorySanitizer/msan_basic.ll | 5 ++-- .../MemorySanitizer/msan_llvm_is_constant.ll | 3 +++ 4 files changed, 47 insertions(+), 3 deletions(-) create mode 100644 llvm/test/Instrumentation/MemorySanitizer/global_ctors_2to3.ll diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index e6573af2077dcc..b2230afa13d8a1 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -321,6 +321,7 @@ static cl::opt ClOriginBase("msan-origin-base", cl::desc("Define custom MSan OriginBase"), cl::Hidden, cl::init(0)); +static const char *const kMsanModuleCtorName = "msan.module_ctor"; static const char *const kMsanInitName = "__msan_init"; namespace { @@ -586,6 +587,8 @@ class MemorySanitizer { /// An empty volatile inline asm that prevents callback merge. InlineAsm *EmptyAsm; + + Function *MsanCtorFunction; }; /// A legacy function pass for msan instrumentation. @@ -839,6 +842,8 @@ Value *MemorySanitizer::getKmsanShadowOriginAccessFn(bool isStore, int size) { } /// Module-level initialization. +/// +/// inserts a call to __msan_init to the module's constructor list. void MemorySanitizer::initializeModule(Module &M) { auto &DL = M.getDataLayout(); @@ -913,7 +918,22 @@ void MemorySanitizer::initializeModule(Module &M) { OriginStoreWeights = MDBuilder(*C).createBranchWeights(1, 1000); if (!CompileKernel) { - getOrCreateInitFunction(M, kMsanInitName); + std::tie(MsanCtorFunction, std::ignore) = + getOrCreateSanitizerCtorAndInitFunctions( + M, kMsanModuleCtorName, kMsanInitName, + /*InitArgTypes=*/{}, + /*InitArgs=*/{}, + // This callback is invoked when the functions are created the first + // time. Hook them into the global ctors list in that case: + [&](Function *Ctor, Function *) { + if (!ClWithComdat) { + appendToGlobalCtors(M, Ctor, 0); + return; + } + Comdat *MsanCtorComdat = M.getOrInsertComdat(kMsanModuleCtorName); + Ctor->setComdat(MsanCtorComdat); + appendToGlobalCtors(M, Ctor, 0, Ctor); + }); if (TrackOrigins) M.getOrInsertGlobal("__msan_track_origins", IRB.getInt32Ty(), [&] { @@ -4458,6 +4478,8 @@ static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan, } bool MemorySanitizer::sanitizeFunction(Function &F, TargetLibraryInfo &TLI) { + if (!CompileKernel && (&F == MsanCtorFunction)) + return false; MemorySanitizerVisitor Visitor(F, *this, TLI); // Clear out readonly/readnone attributes. diff --git a/llvm/test/Instrumentation/MemorySanitizer/global_ctors_2to3.ll b/llvm/test/Instrumentation/MemorySanitizer/global_ctors_2to3.ll new file mode 100644 index 00000000000000..d841c6c05c9b76 --- /dev/null +++ b/llvm/test/Instrumentation/MemorySanitizer/global_ctors_2to3.ll @@ -0,0 +1,18 @@ +; MSan converts 2-element global_ctors to 3-element when adding the new entry. +; RUN: opt < %s -msan-with-comdat -S -passes=msan 2>&1 | FileCheck %s +; RUN: opt < %s -msan -msan-with-comdat -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; CHECK: $msan.module_ctor = comdat any +; CHECK: @llvm.global_ctors = appending global [2 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @f, i8* null }, { i32, void ()*, i8* } { i32 0, void ()* @msan.module_ctor, i8* bitcast (void ()* @msan.module_ctor to i8*) }] + +@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @f }] + +define internal void @f() { +entry: + ret void +} + +; CHECK: define internal void @msan.module_ctor() comdat { diff --git a/llvm/test/Instrumentation/MemorySanitizer/msan_basic.ll b/llvm/test/Instrumentation/MemorySanitizer/msan_basic.ll index f4cbc637ef1aac..569c2320c5c914 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/msan_basic.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/msan_basic.ll @@ -9,7 +9,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -; CHECK: @llvm.global_ctors {{.*}} { i32 0, void ()* @__msan_init, i8* null } +; CHECK: @llvm.global_ctors {{.*}} { i32 0, void ()* @msan.module_ctor, i8* null } ; Check the presence and the linkage type of __msan_track_origins and ; other interface symbols. @@ -991,4 +991,5 @@ define i8* @MismatchingCallMustTailCall(i32 %a) sanitize_memory { ; CHECK-NEXT: ret i8* -; CHECK: declare void @__msan_init() +; CHECK-LABEL: define internal void @msan.module_ctor() { +; CHECK: call void @__msan_init() diff --git a/llvm/test/Instrumentation/MemorySanitizer/msan_llvm_is_constant.ll b/llvm/test/Instrumentation/MemorySanitizer/msan_llvm_is_constant.ll index b7847db06ac27c..4f316be2357987 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/msan_llvm_is_constant.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/msan_llvm_is_constant.ll @@ -1,6 +1,9 @@ ; Make sure MSan doesn't insert shadow checks for @llvm.is.constant.* arguments. +; RUN: opt < %s -msan-kernel=1 -S -passes=msan 2>&1 | FileCheck \ +; RUN: -check-prefixes=CHECK %s ; RUN: opt < %s -msan -msan-kernel=1 -S | FileCheck -check-prefixes=CHECK %s +; RUN: opt < %s -S -passes=msan 2>&1 | FileCheck -check-prefixes=CHECK %s ; RUN: opt < %s -msan -S | FileCheck -check-prefixes=CHECK %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" From 33e25307a0562a64247a55dd2a4c774647f8913b Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Tue, 12 Feb 2019 11:19:21 +0000 Subject: [PATCH 122/125] Merging r353656: ------------------------------------------------------------------------ r353656 | brad | 2019-02-11 03:53:16 +0100 (Mon, 11 Feb 2019) | 4 lines long double is double on OpenBSD/NetBSD/PPC. Patch by George Koehler. ------------------------------------------------------------------------ llvm-svn: 353831 --- clang/lib/Basic/Targets/PPC.h | 8 +++++++- clang/test/CodeGen/powerpc_types.c | 2 ++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/clang/lib/Basic/Targets/PPC.h b/clang/lib/Basic/Targets/PPC.h index 058970a0e098b5..cbe7a9a2fa85ec 100644 --- a/clang/lib/Basic/Targets/PPC.h +++ b/clang/lib/Basic/Targets/PPC.h @@ -331,9 +331,15 @@ class LLVM_LIBRARY_VISIBILITY PPC32TargetInfo : public PPCTargetInfo { break; } - if (getTriple().isOSFreeBSD()) { + switch (getTriple().getOS()) { + case llvm::Triple::FreeBSD: + case llvm::Triple::NetBSD: + case llvm::Triple::OpenBSD: LongDoubleWidth = LongDoubleAlign = 64; LongDoubleFormat = &llvm::APFloat::IEEEdouble(); + break; + default: + break; } // PPC32 supports atomics up to 4 bytes. diff --git a/clang/test/CodeGen/powerpc_types.c b/clang/test/CodeGen/powerpc_types.c index b7d0f5de49859d..86eb7f8356801d 100644 --- a/clang/test/CodeGen/powerpc_types.c +++ b/clang/test/CodeGen/powerpc_types.c @@ -1,4 +1,6 @@ // RUN: %clang_cc1 -triple powerpc-unknown-freebsd -emit-llvm -o - %s| FileCheck -check-prefix=SVR4-CHECK %s +// RUN: %clang_cc1 -triple powerpc-unknown-netbsd -emit-llvm -o - %s| FileCheck -check-prefix=SVR4-CHECK %s +// RUN: %clang_cc1 -triple powerpc-unknown-openbsd -emit-llvm -o - %s| FileCheck -check-prefix=SVR4-CHECK %s #include From ef182d5fb158ba871cdd9a42c0e6e694ba42ecaf Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Tue, 12 Feb 2019 12:24:52 +0000 Subject: [PATCH 123/125] [WebAssembly] Backport custom import name changes for lld to 8.0. Specifically, this backports r352645, r352828, and r353473 to the 8.0 branch. The trunk patches don't apply cleanly to 8.0 due to some contemporaneous mass-rename and mass-clang-tidy patches, so this merges them to simplify rebasing. r352645 [WebAssembly] Fix crash with LTO + relocatable + undefined symbols r352828 [WebAssembly] Support imports from custom module names r353473 [WebAssembly] Fix imported function symbol names that differ from their import names in the .o format By Dan Gohman! llvm-svn: 353833 --- lld/test/wasm/data-layout.ll | 6 +- lld/test/wasm/import-module.ll | 21 +++ lld/test/wasm/import-names.ll | 27 ++++ lld/test/wasm/init-fini.ll | 86 ++++++------ lld/test/wasm/locals-duplicate.test | 146 ++++++++++----------- lld/test/wasm/lto/relocatable-undefined.ll | 36 +++++ lld/test/wasm/weak-alias.ll | 28 ++-- lld/wasm/Driver.cpp | 4 +- lld/wasm/InputChunks.cpp | 6 +- lld/wasm/InputFiles.cpp | 11 +- lld/wasm/LTO.cpp | 5 +- lld/wasm/LTO.h | 1 + lld/wasm/MarkLive.cpp | 2 +- lld/wasm/SymbolTable.cpp | 14 +- lld/wasm/SymbolTable.h | 10 +- lld/wasm/Symbols.h | 19 ++- lld/wasm/Writer.cpp | 106 +++++++++------ lld/wasm/Writer.h | 2 + 18 files changed, 332 insertions(+), 198 deletions(-) create mode 100644 lld/test/wasm/import-module.ll create mode 100644 lld/test/wasm/import-names.ll create mode 100644 lld/test/wasm/lto/relocatable-undefined.ll diff --git a/lld/test/wasm/data-layout.ll b/lld/test/wasm/data-layout.ll index b01c13ac9b82a0..7c215efb0d8f4c 100644 --- a/lld/test/wasm/data-layout.ll +++ b/lld/test/wasm/data-layout.ll @@ -85,10 +85,10 @@ target triple = "wasm32-unknown-unknown" ; RELOC: - Type: DATA ; RELOC-NEXT: Relocations: ; RELOC-NEXT: - Type: R_WEBASSEMBLY_MEMORY_ADDR_I32 -; RELOC-NEXT: Index: 6 +; RELOC-NEXT: Index: 3 ; RELOC-NEXT: Offset: 0x00000018 ; RELOC-NEXT: - Type: R_WEBASSEMBLY_MEMORY_ADDR_I32 -; RELOC-NEXT: Index: 3 +; RELOC-NEXT: Index: 4 ; RELOC-NEXT: Offset: 0x0000002E ; RELOC-NEXT: Addend: 4 ; RELOC-NEXT: Segments: @@ -148,7 +148,7 @@ target triple = "wasm32-unknown-unknown" ; RELOC-NEXT: Flags: [ ] ; RELOC-NEXT: Segment: 2 ; RELOC-NEXT: Size: 4 -; RELOC: - Index: 6 +; RELOC-NEXT: - Index: 3 ; RELOC-NEXT: Kind: DATA ; RELOC-NEXT: Name: hello_str ; RELOC-NEXT: Flags: [ ] diff --git a/lld/test/wasm/import-module.ll b/lld/test/wasm/import-module.ll new file mode 100644 index 00000000000000..9a473194ce2c37 --- /dev/null +++ b/lld/test/wasm/import-module.ll @@ -0,0 +1,21 @@ +; RUN: llc -filetype=obj %s -o %t.o +; RUN: wasm-ld --allow-undefined -o %t.wasm %t.o +; RUN: obj2yaml %t.wasm | FileCheck %s + +target triple = "wasm32-unknown-unknown-wasm" + +define void @_start() { + call void @foo(); + ret void +} + +declare void @foo() #0 + +attributes #0 = { "wasm-import-module"="bar" } + +; CHECK: - Type: IMPORT +; CHECK-NEXT: Imports: +; CHECK-NEXT: - Module: bar +; CHECK-NEXT: Field: foo +; CHECK-NEXT: Kind: FUNCTION +; CHECK-NEXT: SigIndex: 0 diff --git a/lld/test/wasm/import-names.ll b/lld/test/wasm/import-names.ll new file mode 100644 index 00000000000000..a3953d3356198b --- /dev/null +++ b/lld/test/wasm/import-names.ll @@ -0,0 +1,27 @@ +; RUN: llc -filetype=obj %s -o %t.o +; RUN: wasm-ld --allow-undefined -o %t.wasm %t.o +; RUN: obj2yaml %t.wasm | FileCheck %s + +target triple = "wasm32-unknown-unknown" + +declare void @f0() #0 + +define void @_start() { + call void @f0() + ret void +} + +attributes #0 = { "wasm-import-module"="somewhere" "wasm-import-name"="something" } + +; CHECK: - Type: IMPORT +; CHECK-NEXT: Imports: +; CHECK-NEXT: - Module: somewhere +; CHECK-NEXT: Field: something +; CHECK-NEXT: Kind: FUNCTION +; CHECK-NEXT: SigIndex: 0 + +; CHECK: - Type: CUSTOM +; CHECK-NEXT: Name: name +; CHECK-NEXT: FunctionNames: +; CHECK-NEXT: - Index: 0 +; CHECK-NEXT: Name: f0 diff --git a/lld/test/wasm/init-fini.ll b/lld/test/wasm/init-fini.ll index 9a7f5357ef0157..b17020b177c71a 100644 --- a/lld/test/wasm/init-fini.ll +++ b/lld/test/wasm/init-fini.ll @@ -163,64 +163,64 @@ entry: ; RELOC-NEXT: Flags: [ VISIBILITY_HIDDEN ] ; RELOC-NEXT: Function: 7 ; RELOC-NEXT: - Index: 6 +; RELOC-NEXT: Kind: DATA +; RELOC-NEXT: Name: __dso_handle +; RELOC-NEXT: Flags: [ BINDING_WEAK, VISIBILITY_HIDDEN, UNDEFINED ] +; RELOC-NEXT: - Index: 7 +; RELOC-NEXT: Kind: FUNCTION +; RELOC-NEXT: Name: externDtor +; RELOC-NEXT: Flags: [ VISIBILITY_HIDDEN, UNDEFINED ] +; RELOC-NEXT: Function: 0 +; RELOC-NEXT: - Index: 8 +; RELOC-NEXT: Kind: FUNCTION +; RELOC-NEXT: Name: externCtor +; RELOC-NEXT: Flags: [ VISIBILITY_HIDDEN, UNDEFINED ] +; RELOC-NEXT: Function: 1 +; RELOC-NEXT: - Index: 9 +; RELOC-NEXT: Kind: FUNCTION +; RELOC-NEXT: Name: myctor +; RELOC-NEXT: Flags: [ VISIBILITY_HIDDEN ] +; RELOC-NEXT: Function: 14 +; RELOC-NEXT: - Index: 10 +; RELOC-NEXT: Kind: FUNCTION +; RELOC-NEXT: Name: mydtor +; RELOC-NEXT: Flags: [ VISIBILITY_HIDDEN ] +; RELOC-NEXT: Function: 15 +; RELOC-NEXT: - Index: 11 +; RELOC-NEXT: Kind: GLOBAL +; RELOC-NEXT: Name: __stack_pointer +; RELOC-NEXT: Flags: [ UNDEFINED ] +; RELOC-NEXT: Global: 0 +; RELOC-NEXT: - Index: 12 ; RELOC-NEXT: Kind: FUNCTION ; RELOC-NEXT: Name: .Lcall_dtors.101 ; RELOC-NEXT: Flags: [ BINDING_LOCAL ] ; RELOC-NEXT: Function: 8 -; RELOC-NEXT: - Index: 7 +; RELOC-NEXT: - Index: 13 ; RELOC-NEXT: Kind: FUNCTION ; RELOC-NEXT: Name: .Lregister_call_dtors.101 ; RELOC-NEXT: Flags: [ BINDING_LOCAL ] ; RELOC-NEXT: Function: 9 -; RELOC-NEXT: - Index: 8 -; RELOC-NEXT: Kind: DATA -; RELOC-NEXT: Name: __dso_handle -; RELOC-NEXT: Flags: [ BINDING_WEAK, VISIBILITY_HIDDEN, UNDEFINED ] -; RELOC-NEXT: - Index: 9 +; RELOC-NEXT: - Index: 14 ; RELOC-NEXT: Kind: FUNCTION ; RELOC-NEXT: Name: .Lcall_dtors.1001 ; RELOC-NEXT: Flags: [ BINDING_LOCAL ] ; RELOC-NEXT: Function: 10 -; RELOC-NEXT: - Index: 10 +; RELOC-NEXT: - Index: 15 ; RELOC-NEXT: Kind: FUNCTION ; RELOC-NEXT: Name: .Lregister_call_dtors.1001 ; RELOC-NEXT: Flags: [ BINDING_LOCAL ] ; RELOC-NEXT: Function: 11 -; RELOC-NEXT: - Index: 11 +; RELOC-NEXT: - Index: 16 ; RELOC-NEXT: Kind: FUNCTION ; RELOC-NEXT: Name: .Lcall_dtors.4000 ; RELOC-NEXT: Flags: [ BINDING_LOCAL ] ; RELOC-NEXT: Function: 12 -; RELOC-NEXT: - Index: 12 -; RELOC-NEXT: Kind: FUNCTION -; RELOC-NEXT: Name: externDtor -; RELOC-NEXT: Flags: [ VISIBILITY_HIDDEN, UNDEFINED ] -; RELOC-NEXT: Function: 0 -; RELOC-NEXT: - Index: 13 +; RELOC-NEXT: - Index: 17 ; RELOC-NEXT: Kind: FUNCTION ; RELOC-NEXT: Name: .Lregister_call_dtors.4000 ; RELOC-NEXT: Flags: [ BINDING_LOCAL ] ; RELOC-NEXT: Function: 13 -; RELOC-NEXT: - Index: 14 -; RELOC-NEXT: Kind: FUNCTION -; RELOC-NEXT: Name: externCtor -; RELOC-NEXT: Flags: [ VISIBILITY_HIDDEN, UNDEFINED ] -; RELOC-NEXT: Function: 1 -; RELOC-NEXT: - Index: 15 -; RELOC-NEXT: Kind: FUNCTION -; RELOC-NEXT: Name: myctor -; RELOC-NEXT: Flags: [ VISIBILITY_HIDDEN ] -; RELOC-NEXT: Function: 14 -; RELOC-NEXT: - Index: 16 -; RELOC-NEXT: Kind: FUNCTION -; RELOC-NEXT: Name: mydtor -; RELOC-NEXT: Flags: [ VISIBILITY_HIDDEN ] -; RELOC-NEXT: Function: 15 -; RELOC-NEXT: - Index: 17 -; RELOC-NEXT: Kind: GLOBAL -; RELOC-NEXT: Name: __stack_pointer -; RELOC-NEXT: Flags: [ UNDEFINED ] -; RELOC-NEXT: Global: 0 ; RELOC-NEXT: - Index: 18 ; RELOC-NEXT: Kind: FUNCTION ; RELOC-NEXT: Name: .Lcall_dtors.101 @@ -251,36 +251,36 @@ entry: ; RELOC-NEXT: Name: .Lregister_call_dtors.2002 ; RELOC-NEXT: Flags: [ BINDING_LOCAL ] ; RELOC-NEXT: Function: 21 -; RELOC-NEXT: InitFunctions: +; RELOC-NEXT: InitFunctions: ; RELOC-NEXT: - Priority: 101 ; RELOC-NEXT: Symbol: 0 ; RELOC-NEXT: - Priority: 101 ; RELOC-NEXT: Symbol: 1 ; RELOC-NEXT: - Priority: 101 -; RELOC-NEXT: Symbol: 7 +; RELOC-NEXT: Symbol: 13 ; RELOC-NEXT: - Priority: 101 -; RELOC-NEXT: Symbol: 15 +; RELOC-NEXT: Symbol: 9 ; RELOC-NEXT: - Priority: 101 ; RELOC-NEXT: Symbol: 19 ; RELOC-NEXT: - Priority: 202 -; RELOC-NEXT: Symbol: 15 +; RELOC-NEXT: Symbol: 9 ; RELOC-NEXT: - Priority: 202 ; RELOC-NEXT: Symbol: 21 ; RELOC-NEXT: - Priority: 1001 ; RELOC-NEXT: Symbol: 0 ; RELOC-NEXT: - Priority: 1001 -; RELOC-NEXT: Symbol: 10 -; RELOC-NEXT: - Priority: 2002 ; RELOC-NEXT: Symbol: 15 ; RELOC-NEXT: - Priority: 2002 +; RELOC-NEXT: Symbol: 9 +; RELOC-NEXT: - Priority: 2002 ; RELOC-NEXT: Symbol: 23 ; RELOC-NEXT: - Priority: 4000 -; RELOC-NEXT: Symbol: 14 +; RELOC-NEXT: Symbol: 8 ; RELOC-NEXT: - Priority: 4000 -; RELOC-NEXT: Symbol: 13 +; RELOC-NEXT: Symbol: 17 ; RELOC-NEXT: - Type: CUSTOM ; RELOC-NEXT: Name: name -; RELOC-NEXT: FunctionNames: +; RELOC-NEXT: FunctionNames: ; RELOC-NEXT: - Index: 0 ; RELOC-NEXT: Name: externDtor ; RELOC-NEXT: - Index: 1 diff --git a/lld/test/wasm/locals-duplicate.test b/lld/test/wasm/locals-duplicate.test index 2d6bd0df5314cd..74383bf429f69b 100644 --- a/lld/test/wasm/locals-duplicate.test +++ b/lld/test/wasm/locals-duplicate.test @@ -270,40 +270,40 @@ ; RELOC-NEXT: - Type: CODE ; RELOC-NEXT: Relocations: ; RELOC-NEXT: - Type: R_WEBASSEMBLY_MEMORY_ADDR_SLEB -; RELOC-NEXT: Index: 4 +; RELOC-NEXT: Index: 18 ; RELOC-NEXT: Offset: 0x00000013 ; RELOC-NEXT: - Type: R_WEBASSEMBLY_MEMORY_ADDR_SLEB -; RELOC-NEXT: Index: 6 +; RELOC-NEXT: Index: 3 ; RELOC-NEXT: Offset: 0x0000001C ; RELOC-NEXT: - Type: R_WEBASSEMBLY_MEMORY_ADDR_SLEB -; RELOC-NEXT: Index: 8 +; RELOC-NEXT: Index: 19 ; RELOC-NEXT: Offset: 0x00000025 ; RELOC-NEXT: - Type: R_WEBASSEMBLY_TABLE_INDEX_SLEB -; RELOC-NEXT: Index: 0 +; RELOC-NEXT: Index: 16 ; RELOC-NEXT: Offset: 0x0000002E ; RELOC-NEXT: - Type: R_WEBASSEMBLY_TABLE_INDEX_SLEB -; RELOC-NEXT: Index: 1 +; RELOC-NEXT: Index: 0 ; RELOC-NEXT: Offset: 0x00000037 ; RELOC-NEXT: - Type: R_WEBASSEMBLY_TABLE_INDEX_SLEB -; RELOC-NEXT: Index: 2 +; RELOC-NEXT: Index: 17 ; RELOC-NEXT: Offset: 0x00000040 ; RELOC-NEXT: - Type: R_WEBASSEMBLY_MEMORY_ADDR_SLEB -; RELOC-NEXT: Index: 16 +; RELOC-NEXT: Index: 10 ; RELOC-NEXT: Offset: 0x00000058 ; RELOC-NEXT: - Type: R_WEBASSEMBLY_MEMORY_ADDR_SLEB -; RELOC-NEXT: Index: 18 +; RELOC-NEXT: Index: 22 ; RELOC-NEXT: Offset: 0x00000061 ; RELOC-NEXT: - Type: R_WEBASSEMBLY_MEMORY_ADDR_SLEB -; RELOC-NEXT: Index: 20 +; RELOC-NEXT: Index: 23 ; RELOC-NEXT: Offset: 0x0000006A ; RELOC-NEXT: - Type: R_WEBASSEMBLY_TABLE_INDEX_SLEB -; RELOC-NEXT: Index: 12 +; RELOC-NEXT: Index: 8 ; RELOC-NEXT: Offset: 0x00000073 ; RELOC-NEXT: - Type: R_WEBASSEMBLY_TABLE_INDEX_SLEB -; RELOC-NEXT: Index: 13 +; RELOC-NEXT: Index: 20 ; RELOC-NEXT: Offset: 0x0000007C ; RELOC-NEXT: - Type: R_WEBASSEMBLY_TABLE_INDEX_SLEB -; RELOC-NEXT: Index: 14 +; RELOC-NEXT: Index: 21 ; RELOC-NEXT: Offset: 0x00000085 ; RELOC-NEXT: Functions: ; RELOC-NEXT: - Index: 0 @@ -386,133 +386,133 @@ ; RELOC-NEXT: SymbolTable: ; RELOC-NEXT: - Index: 0 ; RELOC-NEXT: Kind: FUNCTION -; RELOC-NEXT: Name: colliding_func1 -; RELOC-NEXT: Flags: [ BINDING_LOCAL ] -; RELOC-NEXT: Function: 0 -; RELOC-NEXT: - Index: 1 -; RELOC-NEXT: Kind: FUNCTION ; RELOC-NEXT: Name: colliding_func2 ; RELOC-NEXT: Flags: [ ] ; RELOC-NEXT: Function: 1 -; RELOC-NEXT: - Index: 2 -; RELOC-NEXT: Kind: FUNCTION -; RELOC-NEXT: Name: colliding_func3 -; RELOC-NEXT: Flags: [ BINDING_LOCAL ] -; RELOC-NEXT: Function: 2 -; RELOC-NEXT: - Index: 3 +; RELOC-NEXT: - Index: 1 ; RELOC-NEXT: Kind: FUNCTION ; RELOC-NEXT: Name: get_global1A ; RELOC-NEXT: Flags: [ ] ; RELOC-NEXT: Function: 3 -; RELOC-NEXT: - Index: 4 -; RELOC-NEXT: Kind: DATA -; RELOC-NEXT: Name: colliding_global1 -; RELOC-NEXT: Flags: [ BINDING_LOCAL ] -; RELOC-NEXT: Segment: 0 -; RELOC-NEXT: Size: 4 -; RELOC-NEXT: - Index: 5 +; RELOC-NEXT: - Index: 2 ; RELOC-NEXT: Kind: FUNCTION ; RELOC-NEXT: Name: get_global2A ; RELOC-NEXT: Flags: [ ] ; RELOC-NEXT: Function: 4 -; RELOC-NEXT: - Index: 6 +; RELOC-NEXT: - Index: 3 ; RELOC-NEXT: Kind: DATA ; RELOC-NEXT: Name: colliding_global2 ; RELOC-NEXT: Flags: [ ] ; RELOC-NEXT: Segment: 1 ; RELOC-NEXT: Size: 4 -; RELOC-NEXT: - Index: 7 +; RELOC-NEXT: - Index: 4 ; RELOC-NEXT: Kind: FUNCTION ; RELOC-NEXT: Name: get_global3A ; RELOC-NEXT: Flags: [ ] ; RELOC-NEXT: Function: 5 -; RELOC-NEXT: - Index: 8 -; RELOC-NEXT: Kind: DATA -; RELOC-NEXT: Name: colliding_global3 -; RELOC-NEXT: Flags: [ BINDING_LOCAL ] -; RELOC-NEXT: Segment: 2 -; RELOC-NEXT: Size: 4 -; RELOC-NEXT: - Index: 9 +; RELOC-NEXT: - Index: 5 ; RELOC-NEXT: Kind: FUNCTION ; RELOC-NEXT: Name: get_func1A ; RELOC-NEXT: Flags: [ ] ; RELOC-NEXT: Function: 6 -; RELOC-NEXT: - Index: 10 +; RELOC-NEXT: - Index: 6 ; RELOC-NEXT: Kind: FUNCTION ; RELOC-NEXT: Name: get_func2A ; RELOC-NEXT: Flags: [ ] ; RELOC-NEXT: Function: 7 -; RELOC-NEXT: - Index: 11 +; RELOC-NEXT: - Index: 7 ; RELOC-NEXT: Kind: FUNCTION ; RELOC-NEXT: Name: get_func3A ; RELOC-NEXT: Flags: [ ] ; RELOC-NEXT: Function: 8 -; RELOC-NEXT: - Index: 12 +; RELOC-NEXT: - Index: 8 ; RELOC-NEXT: Kind: FUNCTION ; RELOC-NEXT: Name: colliding_func1 ; RELOC-NEXT: Flags: [ ] ; RELOC-NEXT: Function: 9 -; RELOC-NEXT: - Index: 13 -; RELOC-NEXT: Kind: FUNCTION -; RELOC-NEXT: Name: colliding_func2 -; RELOC-NEXT: Flags: [ BINDING_LOCAL ] -; RELOC-NEXT: Function: 10 -; RELOC-NEXT: - Index: 14 -; RELOC-NEXT: Kind: FUNCTION -; RELOC-NEXT: Name: colliding_func3 -; RELOC-NEXT: Flags: [ BINDING_LOCAL ] -; RELOC-NEXT: Function: 11 -; RELOC-NEXT: - Index: 15 +; RELOC-NEXT: - Index: 9 ; RELOC-NEXT: Kind: FUNCTION ; RELOC-NEXT: Name: get_global1B ; RELOC-NEXT: Flags: [ ] ; RELOC-NEXT: Function: 12 -; RELOC-NEXT: - Index: 16 +; RELOC-NEXT: - Index: 10 ; RELOC-NEXT: Kind: DATA ; RELOC-NEXT: Name: colliding_global1 ; RELOC-NEXT: Flags: [ ] ; RELOC-NEXT: Segment: 0 ; RELOC-NEXT: Offset: 4 ; RELOC-NEXT: Size: 4 -; RELOC-NEXT: - Index: 17 +; RELOC-NEXT: - Index: 11 ; RELOC-NEXT: Kind: FUNCTION ; RELOC-NEXT: Name: get_global2B ; RELOC-NEXT: Flags: [ ] ; RELOC-NEXT: Function: 13 -; RELOC-NEXT: - Index: 18 -; RELOC-NEXT: Kind: DATA -; RELOC-NEXT: Name: colliding_global2 -; RELOC-NEXT: Flags: [ BINDING_LOCAL ] -; RELOC-NEXT: Segment: 1 -; RELOC-NEXT: Offset: 4 -; RELOC-NEXT: Size: 4 -; RELOC-NEXT: - Index: 19 +; RELOC-NEXT: - Index: 12 ; RELOC-NEXT: Kind: FUNCTION ; RELOC-NEXT: Name: get_global3B ; RELOC-NEXT: Flags: [ ] ; RELOC-NEXT: Function: 14 -; RELOC-NEXT: - Index: 20 -; RELOC-NEXT: Kind: DATA -; RELOC-NEXT: Name: colliding_global3 -; RELOC-NEXT: Flags: [ BINDING_LOCAL ] -; RELOC-NEXT: Segment: 2 -; RELOC-NEXT: Offset: 4 -; RELOC-NEXT: Size: 4 -; RELOC-NEXT: - Index: 21 +; RELOC-NEXT: - Index: 13 ; RELOC-NEXT: Kind: FUNCTION ; RELOC-NEXT: Name: get_func1B ; RELOC-NEXT: Flags: [ ] ; RELOC-NEXT: Function: 15 -; RELOC-NEXT: - Index: 22 +; RELOC-NEXT: - Index: 14 ; RELOC-NEXT: Kind: FUNCTION ; RELOC-NEXT: Name: get_func2B ; RELOC-NEXT: Flags: [ ] ; RELOC-NEXT: Function: 16 -; RELOC-NEXT: - Index: 23 +; RELOC-NEXT: - Index: 15 ; RELOC-NEXT: Kind: FUNCTION ; RELOC-NEXT: Name: get_func3B ; RELOC-NEXT: Flags: [ ] ; RELOC-NEXT: Function: 17 +; RELOC-NEXT: - Index: 16 +; RELOC-NEXT: Kind: FUNCTION +; RELOC-NEXT: Name: colliding_func1 +; RELOC-NEXT: Flags: [ BINDING_LOCAL ] +; RELOC-NEXT: Function: 0 +; RELOC-NEXT: - Index: 17 +; RELOC-NEXT: Kind: FUNCTION +; RELOC-NEXT: Name: colliding_func3 +; RELOC-NEXT: Flags: [ BINDING_LOCAL ] +; RELOC-NEXT: Function: 2 +; RELOC-NEXT: - Index: 18 +; RELOC-NEXT: Kind: DATA +; RELOC-NEXT: Name: colliding_global1 +; RELOC-NEXT: Flags: [ BINDING_LOCAL ] +; RELOC-NEXT: Segment: 0 +; RELOC-NEXT: Size: 4 +; RELOC-NEXT: - Index: 19 +; RELOC-NEXT: Kind: DATA +; RELOC-NEXT: Name: colliding_global3 +; RELOC-NEXT: Flags: [ BINDING_LOCAL ] +; RELOC-NEXT: Segment: 2 +; RELOC-NEXT: Size: 4 +; RELOC-NEXT: - Index: 20 +; RELOC-NEXT: Kind: FUNCTION +; RELOC-NEXT: Name: colliding_func2 +; RELOC-NEXT: Flags: [ BINDING_LOCAL ] +; RELOC-NEXT: Function: 10 +; RELOC-NEXT: - Index: 21 +; RELOC-NEXT: Kind: FUNCTION +; RELOC-NEXT: Name: colliding_func3 +; RELOC-NEXT: Flags: [ BINDING_LOCAL ] +; RELOC-NEXT: Function: 11 +; RELOC-NEXT: - Index: 22 +; RELOC-NEXT: Kind: DATA +; RELOC-NEXT: Name: colliding_global2 +; RELOC-NEXT: Flags: [ BINDING_LOCAL ] +; RELOC-NEXT: Segment: 1 +; RELOC-NEXT: Offset: 4 +; RELOC-NEXT: Size: 4 +; RELOC-NEXT: - Index: 23 +; RELOC-NEXT: Kind: DATA +; RELOC-NEXT: Name: colliding_global3 +; RELOC-NEXT: Flags: [ BINDING_LOCAL ] +; RELOC-NEXT: Segment: 2 +; RELOC-NEXT: Offset: 4 +; RELOC-NEXT: Size: 4 ; RELOC-NEXT: SegmentInfo: ; RELOC-NEXT: - Index: 0 ; RELOC-NEXT: Name: .bss.colliding_global1 diff --git a/lld/test/wasm/lto/relocatable-undefined.ll b/lld/test/wasm/lto/relocatable-undefined.ll new file mode 100644 index 00000000000000..b9780ee0309b26 --- /dev/null +++ b/lld/test/wasm/lto/relocatable-undefined.ll @@ -0,0 +1,36 @@ +; RUN: llvm-as %s -o %t.o +; RUN: wasm-ld -r -o %t.wasm %t.o +; RUN: obj2yaml %t.wasm | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" +target triple = "wasm32-unknown-unknown" + +@missing_data = external global i32 +declare i32 @missing_func() local_unnamed_addr + +define i32 @foo() { +entry: + %0 = call i32 @missing_func() + %1 = load i32, i32* @missing_data, align 4 + ret i32 %1 +} + + +; CHECK: - Type: CUSTOM +; CHECK-NEXT: Name: linking +; CHECK-NEXT: Version: 2 +; CHECK-NEXT: SymbolTable: +; CHECK-NEXT: - Index: 0 +; CHECK-NEXT: Kind: FUNCTION +; CHECK-NEXT: Name: missing_func +; CHECK-NEXT: Flags: [ UNDEFINED ] +; CHECK-NEXT: Function: 0 +; CHECK-NEXT: - Index: 1 +; CHECK-NEXT: Kind: FUNCTION +; CHECK-NEXT: Name: foo +; CHECK-NEXT: Flags: [ ] +; CHECK-NEXT: Function: 1 +; CHECK-NEXT: - Index: 2 +; CHECK-NEXT: Kind: DATA +; CHECK-NEXT: Name: missing_data +; CHECK-NEXT: Flags: [ UNDEFINED ] diff --git a/lld/test/wasm/weak-alias.ll b/lld/test/wasm/weak-alias.ll index 0c856e1eafa090..a925c10ccda413 100644 --- a/lld/test/wasm/weak-alias.ll +++ b/lld/test/wasm/weak-alias.ll @@ -187,13 +187,13 @@ entry: ; RELOC-NEXT: - Type: CODE ; RELOC-NEXT: Relocations: ; RELOC-NEXT: - Type: R_WEBASSEMBLY_FUNCTION_INDEX_LEB -; RELOC-NEXT: Index: 4 +; RELOC-NEXT: Index: 1 ; RELOC-NEXT: Offset: 0x00000004 ; RELOC-NEXT: - Type: R_WEBASSEMBLY_FUNCTION_INDEX_LEB -; RELOC-NEXT: Index: 1 +; RELOC-NEXT: Index: 2 ; RELOC-NEXT: Offset: 0x00000013 ; RELOC-NEXT: - Type: R_WEBASSEMBLY_FUNCTION_INDEX_LEB -; RELOC-NEXT: Index: 4 +; RELOC-NEXT: Index: 1 ; RELOC-NEXT: Offset: 0x0000001C ; RELOC-NEXT: - Type: R_WEBASSEMBLY_GLOBAL_INDEX_LEB ; RELOC-NEXT: Index: 6 @@ -202,10 +202,10 @@ entry: ; RELOC-NEXT: Index: 6 ; RELOC-NEXT: Offset: 0x00000032 ; RELOC-NEXT: - Type: R_WEBASSEMBLY_TABLE_INDEX_SLEB -; RELOC-NEXT: Index: 4 +; RELOC-NEXT: Index: 1 ; RELOC-NEXT: Offset: 0x0000003A ; RELOC-NEXT: - Type: R_WEBASSEMBLY_FUNCTION_INDEX_LEB -; RELOC-NEXT: Index: 4 +; RELOC-NEXT: Index: 1 ; RELOC-NEXT: Offset: 0x00000043 ; RELOC-NEXT: - Type: R_WEBASSEMBLY_GLOBAL_INDEX_LEB ; RELOC-NEXT: Index: 6 @@ -217,10 +217,10 @@ entry: ; RELOC-NEXT: Index: 6 ; RELOC-NEXT: Offset: 0x00000068 ; RELOC-NEXT: - Type: R_WEBASSEMBLY_TABLE_INDEX_SLEB -; RELOC-NEXT: Index: 1 +; RELOC-NEXT: Index: 2 ; RELOC-NEXT: Offset: 0x00000070 ; RELOC-NEXT: - Type: R_WEBASSEMBLY_FUNCTION_INDEX_LEB -; RELOC-NEXT: Index: 1 +; RELOC-NEXT: Index: 2 ; RELOC-NEXT: Offset: 0x00000079 ; RELOC-NEXT: - Type: R_WEBASSEMBLY_GLOBAL_INDEX_LEB ; RELOC-NEXT: Index: 6 @@ -259,24 +259,24 @@ entry: ; RELOC-NEXT: Function: 0 ; RELOC-NEXT: - Index: 1 ; RELOC-NEXT: Kind: FUNCTION +; RELOC-NEXT: Name: alias_fn +; RELOC-NEXT: Flags: [ BINDING_WEAK ] +; RELOC-NEXT: Function: 1 +; RELOC-NEXT: - Index: 2 +; RELOC-NEXT: Kind: FUNCTION ; RELOC-NEXT: Name: direct_fn ; RELOC-NEXT: Flags: [ ] ; RELOC-NEXT: Function: 1 -; RELOC-NEXT: - Index: 2 +; RELOC-NEXT: - Index: 3 ; RELOC-NEXT: Kind: FUNCTION ; RELOC-NEXT: Name: call_direct ; RELOC-NEXT: Flags: [ ] ; RELOC-NEXT: Function: 2 -; RELOC-NEXT: - Index: 3 +; RELOC-NEXT: - Index: 4 ; RELOC-NEXT: Kind: FUNCTION ; RELOC-NEXT: Name: call_alias ; RELOC-NEXT: Flags: [ ] ; RELOC-NEXT: Function: 3 -; RELOC-NEXT: - Index: 4 -; RELOC-NEXT: Kind: FUNCTION -; RELOC-NEXT: Name: alias_fn -; RELOC-NEXT: Flags: [ BINDING_WEAK ] -; RELOC-NEXT: Function: 1 ; RELOC-NEXT: - Index: 5 ; RELOC-NEXT: Kind: FUNCTION ; RELOC-NEXT: Name: call_alias_ptr diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp index fab4c0c4ed8bb7..ade15a19f66e0f 100644 --- a/lld/wasm/Driver.cpp +++ b/lld/wasm/Driver.cpp @@ -434,7 +434,9 @@ static Symbol *handleUndefined(StringRef Name) { static UndefinedGlobal * createUndefinedGlobal(StringRef Name, llvm::wasm::WasmGlobalType *Type) { auto *Sym = - cast(Symtab->addUndefinedGlobal(Name, 0, nullptr, Type)); + cast(Symtab->addUndefinedGlobal(Name, Name, + DefaultModule, 0, + nullptr, Type)); Config->AllowUndefinedSymbols.insert(Sym->getName()); Sym->IsUsedInRegularObj = true; return Sym; diff --git a/lld/wasm/InputChunks.cpp b/lld/wasm/InputChunks.cpp index 1145c670253c81..f5884a1beea4b8 100644 --- a/lld/wasm/InputChunks.cpp +++ b/lld/wasm/InputChunks.cpp @@ -23,7 +23,7 @@ using namespace llvm::support::endian; using namespace lld; using namespace lld::wasm; -static StringRef ReloctTypeToString(uint8_t RelocType) { +static StringRef reloctTypeToString(uint8_t RelocType) { switch (RelocType) { #define WASM_RELOC(NAME, REL) \ case REL: \ @@ -77,7 +77,7 @@ void InputChunk::verifyRelocTargets() const { warn("expected LEB at relocation site be 5-byte padded"); uint32_t ExpectedValue = File->calcExpectedValue(Rel); if (ExpectedValue != ExistingValue) - warn("unexpected existing value for " + ReloctTypeToString(Rel.Type) + + warn("unexpected existing value for " + reloctTypeToString(Rel.Type) + ": existing=" + Twine(ExistingValue) + " expected=" + Twine(ExpectedValue)); } @@ -103,7 +103,7 @@ void InputChunk::writeTo(uint8_t *Buf) const { for (const WasmRelocation &Rel : Relocations) { uint8_t *Loc = Buf + Rel.Offset + Off; uint32_t Value = File->calcNewValue(Rel); - LLVM_DEBUG(dbgs() << "apply reloc: type=" << ReloctTypeToString(Rel.Type) + LLVM_DEBUG(dbgs() << "apply reloc: type=" << reloctTypeToString(Rel.Type) << " addend=" << Rel.Addend << " index=" << Rel.Index << " value=" << Value << " offset=" << Rel.Offset << "\n"); diff --git a/lld/wasm/InputFiles.cpp b/lld/wasm/InputFiles.cpp index e5da23db37739c..1e54272163545d 100644 --- a/lld/wasm/InputFiles.cpp +++ b/lld/wasm/InputFiles.cpp @@ -377,11 +377,15 @@ Symbol *ObjFile::createUndefined(const WasmSymbol &Sym) { switch (Sym.Info.Kind) { case WASM_SYMBOL_TYPE_FUNCTION: - return Symtab->addUndefinedFunction(Name, Flags, this, Sym.Signature); + return Symtab->addUndefinedFunction(Name, Sym.Info.ImportName, + Sym.Info.ImportModule, Flags, this, + Sym.Signature); case WASM_SYMBOL_TYPE_DATA: return Symtab->addUndefinedData(Name, Flags, this); case WASM_SYMBOL_TYPE_GLOBAL: - return Symtab->addUndefinedGlobal(Name, Flags, this, Sym.GlobalType); + return Symtab->addUndefinedGlobal(Name, Sym.Info.ImportName, + Sym.Info.ImportModule, Flags, this, + Sym.GlobalType); case WASM_SYMBOL_TYPE_SECTION: llvm_unreachable("section symbols cannot be undefined"); } @@ -445,7 +449,8 @@ static Symbol *createBitcodeSymbol(const lto::InputFile::Symbol &ObjSym, if (ObjSym.isUndefined()) { if (ObjSym.isExecutable()) - return Symtab->addUndefinedFunction(Name, Flags, &F, nullptr); + return Symtab->addUndefinedFunction(Name, Name, DefaultModule, Flags, &F, + nullptr); return Symtab->addUndefinedData(Name, Flags, &F); } diff --git a/lld/wasm/LTO.cpp b/lld/wasm/LTO.cpp index 96a947e29d4124..e994691cceb2e0 100644 --- a/lld/wasm/LTO.cpp +++ b/lld/wasm/LTO.cpp @@ -79,8 +79,9 @@ BitcodeCompiler::~BitcodeCompiler() = default; static void undefine(Symbol *S) { if (auto F = dyn_cast(S)) - replaceSymbol(F, F->getName(), 0, F->getFile(), - F->Signature); + replaceSymbol(F, F->getName(), F->getName(), + DefaultModule, 0, + F->getFile(), F->Signature); else if (isa(S)) replaceSymbol(S, S->getName(), 0, S->getFile()); else diff --git a/lld/wasm/LTO.h b/lld/wasm/LTO.h index cf726de5643ae9..d771301f224dca 100644 --- a/lld/wasm/LTO.h +++ b/lld/wasm/LTO.h @@ -23,6 +23,7 @@ #include "lld/Common/LLVM.h" #include "llvm/ADT/SmallString.h" +#include "Writer.h" #include #include diff --git a/lld/wasm/MarkLive.cpp b/lld/wasm/MarkLive.cpp index 3bbd1148f6ad35..723ac4e3c6baa4 100644 --- a/lld/wasm/MarkLive.cpp +++ b/lld/wasm/MarkLive.cpp @@ -85,7 +85,7 @@ void lld::wasm::markLive() { // equal to null pointer, only reachable via direct call). if (Reloc.Type == R_WEBASSEMBLY_TABLE_INDEX_SLEB || Reloc.Type == R_WEBASSEMBLY_TABLE_INDEX_I32) { - FunctionSymbol *FuncSym = cast(Sym); + auto *FuncSym = cast(Sym); if (FuncSym->hasTableIndex() && FuncSym->getTableIndex() == 0) continue; } diff --git a/lld/wasm/SymbolTable.cpp b/lld/wasm/SymbolTable.cpp index c7983196db36c8..65441d293b50be 100644 --- a/lld/wasm/SymbolTable.cpp +++ b/lld/wasm/SymbolTable.cpp @@ -314,8 +314,9 @@ Symbol *SymbolTable::addDefinedEvent(StringRef Name, uint32_t Flags, return S; } -Symbol *SymbolTable::addUndefinedFunction(StringRef Name, uint32_t Flags, - InputFile *File, +Symbol *SymbolTable::addUndefinedFunction(StringRef Name, StringRef ImportName, + StringRef ImportModule, + uint32_t Flags, InputFile *File, const WasmSignature *Sig) { LLVM_DEBUG(dbgs() << "addUndefinedFunction: " << Name << " [" << (Sig ? toString(*Sig) : "none") << "]\n"); @@ -325,7 +326,8 @@ Symbol *SymbolTable::addUndefinedFunction(StringRef Name, uint32_t Flags, std::tie(S, WasInserted) = insert(Name, File); if (WasInserted) - replaceSymbol(S, Name, Flags, File, Sig); + replaceSymbol(S, Name, ImportName, ImportModule, Flags, + File, Sig); else if (auto *Lazy = dyn_cast(S)) Lazy->fetch(); else @@ -351,7 +353,8 @@ Symbol *SymbolTable::addUndefinedData(StringRef Name, uint32_t Flags, return S; } -Symbol *SymbolTable::addUndefinedGlobal(StringRef Name, uint32_t Flags, +Symbol *SymbolTable::addUndefinedGlobal(StringRef Name, StringRef ImportName, + StringRef ImportModule, uint32_t Flags, InputFile *File, const WasmGlobalType *Type) { LLVM_DEBUG(dbgs() << "addUndefinedGlobal: " << Name << "\n"); @@ -361,7 +364,8 @@ Symbol *SymbolTable::addUndefinedGlobal(StringRef Name, uint32_t Flags, std::tie(S, WasInserted) = insert(Name, File); if (WasInserted) - replaceSymbol(S, Name, Flags, File, Type); + replaceSymbol(S, Name, ImportName, ImportModule, Flags, + File, Type); else if (auto *Lazy = dyn_cast(S)) Lazy->fetch(); else if (S->isDefined()) diff --git a/lld/wasm/SymbolTable.h b/lld/wasm/SymbolTable.h index 5e38e30692abe8..64678aee50055c 100644 --- a/lld/wasm/SymbolTable.h +++ b/lld/wasm/SymbolTable.h @@ -59,11 +59,13 @@ class SymbolTable { Symbol *addDefinedEvent(StringRef Name, uint32_t Flags, InputFile *File, InputEvent *E); - Symbol *addUndefinedFunction(StringRef Name, uint32_t Flags, InputFile *File, - const WasmSignature *Signature); + Symbol *addUndefinedFunction(StringRef Name, StringRef ImportName, + StringRef ImportModule, uint32_t Flags, + InputFile *File, const WasmSignature *Signature); Symbol *addUndefinedData(StringRef Name, uint32_t Flags, InputFile *File); - Symbol *addUndefinedGlobal(StringRef Name, uint32_t Flags, InputFile *File, - const WasmGlobalType *Type); + Symbol *addUndefinedGlobal(StringRef Name, StringRef ImportName, + StringRef ImportModule, uint32_t Flags, + InputFile *File, const WasmGlobalType *Type); void addLazy(ArchiveFile *F, const llvm::object::Archive::Symbol *Sym); diff --git a/lld/wasm/Symbols.h b/lld/wasm/Symbols.h index 11ee66550cdccb..a065338ac1e42c 100644 --- a/lld/wasm/Symbols.h +++ b/lld/wasm/Symbols.h @@ -149,13 +149,19 @@ class DefinedFunction : public FunctionSymbol { class UndefinedFunction : public FunctionSymbol { public: - UndefinedFunction(StringRef Name, uint32_t Flags, InputFile *File = nullptr, + UndefinedFunction(StringRef Name, StringRef ImportName, + StringRef ImportModule, uint32_t Flags, + InputFile *File = nullptr, const WasmSignature *Type = nullptr) - : FunctionSymbol(Name, UndefinedFunctionKind, Flags, File, Type) {} + : FunctionSymbol(Name, UndefinedFunctionKind, Flags, File, Type), + ImportName(ImportName), ImportModule(ImportModule) {} static bool classof(const Symbol *S) { return S->kind() == UndefinedFunctionKind; } + + StringRef ImportName; + StringRef ImportModule; }; class SectionSymbol : public Symbol { @@ -261,13 +267,18 @@ class DefinedGlobal : public GlobalSymbol { class UndefinedGlobal : public GlobalSymbol { public: - UndefinedGlobal(StringRef Name, uint32_t Flags, InputFile *File = nullptr, + UndefinedGlobal(StringRef Name, StringRef ImportName, StringRef ImportModule, + uint32_t Flags, InputFile *File = nullptr, const WasmGlobalType *Type = nullptr) - : GlobalSymbol(Name, UndefinedGlobalKind, Flags, File, Type) {} + : GlobalSymbol(Name, UndefinedGlobalKind, Flags, File, Type), + ImportName(ImportName), ImportModule(ImportModule) {} static bool classof(const Symbol *S) { return S->kind() == UndefinedGlobalKind; } + + StringRef ImportName; + StringRef ImportModule; }; // Wasm events are features that suspend the current execution and transfer the diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp index 819d4298fef290..902ca61ca19b71 100644 --- a/lld/wasm/Writer.cpp +++ b/lld/wasm/Writer.cpp @@ -39,8 +39,9 @@ using namespace llvm::wasm; using namespace lld; using namespace lld::wasm; -static constexpr int kStackAlignment = 16; -static constexpr const char *kFunctionTableName = "__indirect_function_table"; +static constexpr int StackAlignment = 16; +static constexpr const char *FunctionTableName = "__indirect_function_table"; +const char *lld::wasm::DefaultModule = "env"; namespace { @@ -155,7 +156,7 @@ void Writer::createImportSection() { if (Config->ImportMemory) { WasmImport Import; - Import.Module = "env"; + Import.Module = DefaultModule; Import.Field = "memory"; Import.Kind = WASM_EXTERNAL_MEMORY; Import.Memory.Flags = 0; @@ -172,8 +173,8 @@ void Writer::createImportSection() { if (Config->ImportTable) { uint32_t TableSize = TableBase + IndirectFunctions.size(); WasmImport Import; - Import.Module = "env"; - Import.Field = kFunctionTableName; + Import.Module = DefaultModule; + Import.Field = FunctionTableName; Import.Kind = WASM_EXTERNAL_TABLE; Import.Table.ElemType = WASM_TYPE_FUNCREF; Import.Table.Limits = {0, TableSize, 0}; @@ -182,8 +183,17 @@ void Writer::createImportSection() { for (const Symbol *Sym : ImportedSymbols) { WasmImport Import; - Import.Module = "env"; - Import.Field = Sym->getName(); + if (auto *F = dyn_cast(Sym)) { + Import.Field = F->ImportName; + Import.Module = F->ImportModule; + } else if (auto *G = dyn_cast(Sym)) { + Import.Field = G->ImportName; + Import.Module = G->ImportModule; + } else { + Import.Field = Sym->getName(); + Import.Module = DefaultModule; + } + if (auto *FunctionSym = dyn_cast(Sym)) { Import.Kind = WASM_EXTERNAL_FUNCTION; Import.SigIndex = lookupType(*FunctionSym->Signature); @@ -441,6 +451,13 @@ static uint32_t getWasmFlags(const Symbol *Sym) { Flags |= WASM_SYMBOL_VISIBILITY_HIDDEN; if (Sym->isUndefined()) Flags |= WASM_SYMBOL_UNDEFINED; + if (auto *F = dyn_cast(Sym)) { + if (F->getName() != F->ImportName) + Flags |= WASM_SYMBOL_EXPLICIT_NAME; + } else if (auto *G = dyn_cast(Sym)) { + if (G->getName() != G->ImportName) + Flags |= WASM_SYMBOL_EXPLICIT_NAME; + } return Flags; } @@ -506,15 +523,18 @@ void Writer::createLinkingSection() { if (auto *F = dyn_cast(Sym)) { writeUleb128(Sub.OS, F->getFunctionIndex(), "index"); - if (Sym->isDefined()) + if (Sym->isDefined() || + (Flags & WASM_SYMBOL_EXPLICIT_NAME) != 0) writeStr(Sub.OS, Sym->getName(), "sym name"); } else if (auto *G = dyn_cast(Sym)) { writeUleb128(Sub.OS, G->getGlobalIndex(), "index"); - if (Sym->isDefined()) + if (Sym->isDefined() || + (Flags & WASM_SYMBOL_EXPLICIT_NAME) != 0) writeStr(Sub.OS, Sym->getName(), "sym name"); } else if (auto *E = dyn_cast(Sym)) { writeUleb128(Sub.OS, E->getEventIndex(), "index"); - if (Sym->isDefined()) + if (Sym->isDefined() || + (Flags & WASM_SYMBOL_EXPLICIT_NAME) != 0) writeStr(Sub.OS, Sym->getName(), "sym name"); } else if (isa(Sym)) { writeStr(Sub.OS, Sym->getName(), "sym name"); @@ -663,9 +683,9 @@ void Writer::layoutMemory() { auto PlaceStack = [&]() { if (Config->Relocatable || Config->Shared) return; - MemoryPtr = alignTo(MemoryPtr, kStackAlignment); - if (Config->ZStackSize != alignTo(Config->ZStackSize, kStackAlignment)) - error("stack size must be " + Twine(kStackAlignment) + "-byte aligned"); + MemoryPtr = alignTo(MemoryPtr, StackAlignment); + if (Config->ZStackSize != alignTo(Config->ZStackSize, StackAlignment)) + error("stack size must be " + Twine(StackAlignment) + "-byte aligned"); log("mem: stack size = " + Twine(Config->ZStackSize)); log("mem: stack base = " + Twine(MemoryPtr)); MemoryPtr += Config->ZStackSize; @@ -814,7 +834,7 @@ void Writer::calculateExports() { Exports.push_back(WasmExport{"memory", WASM_EXTERNAL_MEMORY, 0}); if (!Config->Relocatable && Config->ExportTable) - Exports.push_back(WasmExport{kFunctionTableName, WASM_EXTERNAL_TABLE, 0}); + Exports.push_back(WasmExport{FunctionTableName, WASM_EXTERNAL_TABLE, 0}); unsigned FakeGlobalIndex = NumImportedGlobals + InputGlobals.size(); @@ -858,40 +878,42 @@ void Writer::assignSymtab() { StringMap SectionSymbolIndices; unsigned SymbolIndex = SymtabEntries.size(); - for (ObjFile *File : Symtab->ObjectFiles) { - LLVM_DEBUG(dbgs() << "Symtab entries: " << File->getName() << "\n"); - for (Symbol *Sym : File->getSymbols()) { - if (Sym->getFile() != File) - continue; - - if (auto *S = dyn_cast(Sym)) { - StringRef Name = S->getName(); - if (CustomSectionMapping.count(Name) == 0) - continue; - - auto SSI = SectionSymbolIndices.find(Name); - if (SSI != SectionSymbolIndices.end()) { - Sym->setOutputSymbolIndex(SSI->second); - continue; - } - SectionSymbolIndices[Name] = SymbolIndex; - CustomSectionSymbols[Name] = cast(Sym); + auto AddSymbol = [&](Symbol *Sym) { + if (auto *S = dyn_cast(Sym)) { + StringRef Name = S->getName(); + if (CustomSectionMapping.count(Name) == 0) + return; - Sym->markLive(); + auto SSI = SectionSymbolIndices.find(Name); + if (SSI != SectionSymbolIndices.end()) { + Sym->setOutputSymbolIndex(SSI->second); + return; } - // (Since this is relocatable output, GC is not performed so symbols must - // be live.) - assert(Sym->isLive()); - Sym->setOutputSymbolIndex(SymbolIndex++); - SymtabEntries.emplace_back(Sym); + SectionSymbolIndices[Name] = SymbolIndex; + CustomSectionSymbols[Name] = cast(Sym); + + Sym->markLive(); } - } - // For the moment, relocatable output doesn't contain any synthetic functions, - // so no need to look through the Symtab for symbols not referenced by - // Symtab->ObjectFiles. + // (Since this is relocatable output, GC is not performed so symbols must + // be live.) + assert(Sym->isLive()); + Sym->setOutputSymbolIndex(SymbolIndex++); + SymtabEntries.emplace_back(Sym); + }; + + for (Symbol *Sym : Symtab->getSymbols()) + if (!Sym->isLazy()) + AddSymbol(Sym); + + for (ObjFile *File : Symtab->ObjectFiles) { + LLVM_DEBUG(dbgs() << "Local symtab entries: " << File->getName() << "\n"); + for (Symbol *Sym : File->getSymbols()) + if (Sym->isLocal()) + AddSymbol(Sym); + } } uint32_t Writer::lookupType(const WasmSignature &Sig) { diff --git a/lld/wasm/Writer.h b/lld/wasm/Writer.h index a931ba9c29a890..e62f470642285d 100644 --- a/lld/wasm/Writer.h +++ b/lld/wasm/Writer.h @@ -15,6 +15,8 @@ namespace wasm { void writeResult(); +extern const char *DefaultModule; + } // namespace wasm } // namespace lld From 16b7c0877ddfda5b4d6de21fcbce687854c5216d Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Tue, 12 Feb 2019 12:26:10 +0000 Subject: [PATCH 124/125] [WebAssembly] Backport custom import name changes for clang to 8.0. Specifically, this backports r352106, r352108, r352930, and r352936 to the 8.0 branch. The trunk patches don't apply cleanly to 8.0 due to some contemporaneous mass-rename and mass-clang-tidy patches, so this merges them to simplify rebasing. r352106 [WebAssembly] Add an import_module function attribute r352108 [WebAssembly] Add WebAssemblyImportModule to pragma-attribute-supported-attributes-list.test r352930 [WebAssembly] Add an import_field function attribute r352936 [WebAssembly] Fix ImportName's position in this test. By Dan Gohman! llvm-svn: 353834 --- clang/include/clang/Basic/Attr.td | 17 +++++++ clang/include/clang/Basic/AttrDocs.td | 35 ++++++++++++- clang/lib/CodeGen/TargetInfo.cpp | 16 ++++++ clang/lib/Sema/SemaDeclAttr.cpp | 51 +++++++++++++++++++ clang/test/CodeGen/wasm-import-module.c | 11 ++++ clang/test/CodeGen/wasm-import-name.c | 11 ++++ ...a-attribute-supported-attributes-list.test | 2 + 7 files changed, 142 insertions(+), 1 deletion(-) create mode 100644 clang/test/CodeGen/wasm-import-module.c create mode 100644 clang/test/CodeGen/wasm-import-name.c diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 1fe1dd39948a1b..bf1068019b77bd 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -329,6 +329,7 @@ def TargetMSP430 : TargetArch<["msp430"]>; def TargetRISCV : TargetArch<["riscv32", "riscv64"]>; def TargetX86 : TargetArch<["x86"]>; def TargetAnyX86 : TargetArch<["x86", "x86_64"]>; +def TargetWebAssembly : TargetArch<["wasm32", "wasm64"]>; def TargetWindows : TargetArch<["x86", "x86_64", "arm", "thumb", "aarch64"]> { let OSes = ["Win32"]; } @@ -1500,6 +1501,22 @@ def AMDGPUNumVGPR : InheritableAttr { let Subjects = SubjectList<[Function], ErrorDiag, "kernel functions">; } +def WebAssemblyImportModule : InheritableAttr, + TargetSpecificAttr { + let Spellings = [Clang<"import_module">]; + let Args = [StringArgument<"ImportModule">]; + let Documentation = [WebAssemblyImportModuleDocs]; + let Subjects = SubjectList<[Function], ErrorDiag>; +} + +def WebAssemblyImportName : InheritableAttr, + TargetSpecificAttr { + let Spellings = [Clang<"import_name">]; + let Args = [StringArgument<"ImportName">]; + let Documentation = [WebAssemblyImportNameDocs]; + let Subjects = SubjectList<[Function], ErrorDiag>; +} + def NoSplitStack : InheritableAttr { let Spellings = [GCC<"no_split_stack">]; let Subjects = SubjectList<[Function], ErrorDiag>; diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 5773a92c9c15dd..94c8343d2368f5 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -3652,7 +3652,40 @@ definition ( For more information see `gcc documentation `_ or `msvc documentation `_. -}]; +}]; } + +def WebAssemblyImportModuleDocs : Documentation { + let Category = DocCatFunction; + let Content = [{ +Clang supports the ``__attribute__((import_module()))`` +attribute for the WebAssembly target. This attribute may be attached to a +function declaration, where it modifies how the symbol is to be imported +within the WebAssembly linking environment. + +WebAssembly imports use a two-level namespace scheme, consisting of a module +name, which typically identifies a module from which to import, and a field +name, which typically identifies a field from that module to import. By +default, module names for C/C++ symbols are assigned automatically by the +linker. This attribute can be used to override the default behavior, and +reuqest a specific module name be used instead. + }]; +} + +def WebAssemblyImportNameDocs : Documentation { + let Category = DocCatFunction; + let Content = [{ +Clang supports the ``__attribute__((import_name()))`` +attribute for the WebAssembly target. This attribute may be attached to a +function declaration, where it modifies how the symbol is to be imported +within the WebAssembly linking environment. + +WebAssembly imports use a two-level namespace scheme, consisting of a module +name, which typically identifies a module from which to import, and a field +name, which typically identifies a field from that module to import. By +default, field names for C/C++ symbols are the same as their C/C++ symbol +names. This attribute can be used to override the default behavior, and +reuqest a specific field name be used instead. + }]; } def ArtificialDocs : Documentation { diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index f5a770ed9d8444..94fccb15ff6ec8 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -761,6 +761,22 @@ class WebAssemblyTargetCodeGenInfo final : public TargetCodeGenInfo { void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const override { + TargetCodeGenInfo::setTargetAttributes(D, GV, CGM); + if (const auto *FD = dyn_cast_or_null(D)) { + if (const auto *Attr = FD->getAttr()) { + llvm::Function *Fn = cast(GV); + llvm::AttrBuilder B; + B.addAttribute("wasm-import-module", Attr->getImportModule()); + Fn->addAttributes(llvm::AttributeList::FunctionIndex, B); + } + if (const auto *Attr = FD->getAttr()) { + llvm::Function *Fn = cast(GV); + llvm::AttrBuilder B; + B.addAttribute("wasm-import-name", Attr->getImportName()); + Fn->addAttributes(llvm::AttributeList::FunctionIndex, B); + } + } + if (auto *FD = dyn_cast_or_null(D)) { llvm::Function *Fn = cast(GV); if (!FD->doesThisDeclarationHaveABody() && !FD->hasPrototype()) diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 139ac8aab4333c..8819f0396a270d 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -5577,6 +5577,51 @@ static void handleAVRSignalAttr(Sema &S, Decl *D, const ParsedAttr &AL) { handleSimpleAttribute(S, D, AL); } +static void handleWebAssemblyImportModuleAttr(Sema &S, Decl *D, const ParsedAttr &AL) { + if (!isFunctionOrMethod(D)) { + S.Diag(D->getLocation(), diag::warn_attribute_wrong_decl_type) + << "'import_module'" << ExpectedFunction; + return; + } + + auto *FD = cast(D); + if (FD->isThisDeclarationADefinition()) { + S.Diag(D->getLocation(), diag::err_alias_is_definition) << FD << 0; + return; + } + + StringRef Str; + SourceLocation ArgLoc; + if (!S.checkStringLiteralArgumentAttr(AL, 0, Str, &ArgLoc)) + return; + + FD->addAttr(::new (S.Context) WebAssemblyImportModuleAttr( + AL.getRange(), S.Context, Str, + AL.getAttributeSpellingListIndex())); +} + +static void handleWebAssemblyImportNameAttr(Sema &S, Decl *D, const ParsedAttr &AL) { + if (!isFunctionOrMethod(D)) { + S.Diag(D->getLocation(), diag::warn_attribute_wrong_decl_type) + << "'import_name'" << ExpectedFunction; + return; + } + + auto *FD = cast(D); + if (FD->isThisDeclarationADefinition()) { + S.Diag(D->getLocation(), diag::err_alias_is_definition) << FD << 0; + return; + } + + StringRef Str; + SourceLocation ArgLoc; + if (!S.checkStringLiteralArgumentAttr(AL, 0, Str, &ArgLoc)) + return; + + FD->addAttr(::new (S.Context) WebAssemblyImportNameAttr( + AL.getRange(), S.Context, Str, + AL.getAttributeSpellingListIndex())); +} static void handleRISCVInterruptAttr(Sema &S, Decl *D, const ParsedAttr &AL) { @@ -6330,6 +6375,12 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, case ParsedAttr::AT_AVRSignal: handleAVRSignalAttr(S, D, AL); break; + case ParsedAttr::AT_WebAssemblyImportModule: + handleWebAssemblyImportModuleAttr(S, D, AL); + break; + case ParsedAttr::AT_WebAssemblyImportName: + handleWebAssemblyImportNameAttr(S, D, AL); + break; case ParsedAttr::AT_IBAction: handleSimpleAttribute(S, D, AL); break; diff --git a/clang/test/CodeGen/wasm-import-module.c b/clang/test/CodeGen/wasm-import-module.c new file mode 100644 index 00000000000000..866a3a459949bc --- /dev/null +++ b/clang/test/CodeGen/wasm-import-module.c @@ -0,0 +1,11 @@ +// RUN: %clang_cc1 -triple wasm32-unknown-unknown-wasm -emit-llvm -o - %s | FileCheck %s + +void __attribute__((import_module("bar"))) foo(void); + +void call(void) { + foo(); +} + +// CHECK: declare void @foo() [[A:#[0-9]+]] + +// CHECK: attributes [[A]] = {{{.*}} "wasm-import-module"="bar" {{.*}}} diff --git a/clang/test/CodeGen/wasm-import-name.c b/clang/test/CodeGen/wasm-import-name.c new file mode 100644 index 00000000000000..7c3b094b9e435c --- /dev/null +++ b/clang/test/CodeGen/wasm-import-name.c @@ -0,0 +1,11 @@ +// RUN: %clang_cc1 -triple wasm32-unknown-unknown-wasm -emit-llvm -o - %s | FileCheck %s + +void __attribute__((import_name("bar"))) foo(void); + +void call(void) { + foo(); +} + +// CHECK: declare void @foo() [[A:#[0-9]+]] + +// CHECK: attributes [[A]] = {{{.*}} "wasm-import-name"="bar" {{.*}}} diff --git a/clang/test/Misc/pragma-attribute-supported-attributes-list.test b/clang/test/Misc/pragma-attribute-supported-attributes-list.test index 9a6bcca1bd36bf..98935fc21355b9 100644 --- a/clang/test/Misc/pragma-attribute-supported-attributes-list.test +++ b/clang/test/Misc/pragma-attribute-supported-attributes-list.test @@ -136,6 +136,8 @@ // CHECK-NEXT: WarnUnusedResult (SubjectMatchRule_objc_method, SubjectMatchRule_enum, SubjectMatchRule_record, SubjectMatchRule_hasType_functionType) // CHECK-NEXT: Weak (SubjectMatchRule_variable, SubjectMatchRule_function, SubjectMatchRule_record) // CHECK-NEXT: WeakRef (SubjectMatchRule_variable, SubjectMatchRule_function) +// CHECK-NEXT: WebAssemblyImportModule (SubjectMatchRule_function) +// CHECK-NEXT: WebAssemblyImportName (SubjectMatchRule_function) // CHECK-NEXT: WorkGroupSizeHint (SubjectMatchRule_function) // CHECK-NEXT: XRayInstrument (SubjectMatchRule_function, SubjectMatchRule_objc_method) // CHECK-NEXT: XRayLogArgs (SubjectMatchRule_function, SubjectMatchRule_objc_method) From 33a2b521c91475e12126fbe1d5b800d26f90a2fc Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Tue, 12 Feb 2019 12:27:08 +0000 Subject: [PATCH 125/125] [WebAssembly] Backport custom import name changes for LLVM to 8.0. Specifically, this backports r352479, r352931, r353474, and r353476 to the 8.0 branch. The trunk patches don't apply cleanly to 8.0 due to some contemporaneous mass-rename and mass-clang-tidy patches, so this merges them to simplify rebasing. r352479 [WebAssembly] Re-enable main-function signature rewriting r352931 [WebAssembly] Add codegen support for the import_field attribute r353474 [WebAssembly] Fix imported function symbol names that differ from their import names in the .o format r353476 [WebAssembly] Update test output after rL353474. NFC. By Dan Gohman! llvm-svn: 353835 --- llvm/include/llvm/BinaryFormat/Wasm.h | 4 ++- llvm/include/llvm/MC/MCSymbolWasm.h | 22 +++++++++--- llvm/lib/MC/WasmObjectWriter.cpp | 25 +++++++------ llvm/lib/Object/WasmObjectFile.cpp | 22 +++++++++--- .../WebAssemblyTargetStreamer.cpp | 11 ++++-- .../MCTargetDesc/WebAssemblyTargetStreamer.h | 13 +++++-- .../WebAssembly/WebAssemblyAsmPrinter.cpp | 9 ++++- .../WebAssemblyFixFunctionBitcasts.cpp | 36 ++++++++++++------- llvm/test/CodeGen/WebAssembly/call.ll | 4 +-- .../WebAssembly/function-bitcasts-varargs.ll | 2 +- .../CodeGen/WebAssembly/function-bitcasts.ll | 2 +- .../test/CodeGen/WebAssembly/import-module.ll | 3 +- .../CodeGen/WebAssembly/main-declaration.ll | 16 ++++----- llvm/test/CodeGen/WebAssembly/main-no-args.ll | 13 +++---- .../CodeGen/WebAssembly/main-three-args.ll | 16 +++++++++ .../CodeGen/WebAssembly/main-with-args.ll | 2 +- .../MC/WebAssembly/external-func-address.ll | 4 +-- llvm/test/MC/WebAssembly/import-module.ll | 31 ++++++++++++++++ llvm/tools/yaml2obj/yaml2wasm.cpp | 3 +- 19 files changed, 176 insertions(+), 62 deletions(-) create mode 100644 llvm/test/CodeGen/WebAssembly/main-three-args.ll create mode 100644 llvm/test/MC/WebAssembly/import-module.ll diff --git a/llvm/include/llvm/BinaryFormat/Wasm.h b/llvm/include/llvm/BinaryFormat/Wasm.h index d9f0f94b298d4e..b02ddb6b7e299d 100644 --- a/llvm/include/llvm/BinaryFormat/Wasm.h +++ b/llvm/include/llvm/BinaryFormat/Wasm.h @@ -165,7 +165,8 @@ struct WasmSymbolInfo { StringRef Name; uint8_t Kind; uint32_t Flags; - StringRef Module; // For undefined symbols the module name of the import + StringRef ImportModule; // For undefined symbols the module of the import + StringRef ImportName; // For undefined symbols the name of the import union { // For function or global symbols, the index in function or global index // space. @@ -284,6 +285,7 @@ const unsigned WASM_SYMBOL_BINDING_LOCAL = 0x2; const unsigned WASM_SYMBOL_VISIBILITY_DEFAULT = 0x0; const unsigned WASM_SYMBOL_VISIBILITY_HIDDEN = 0x4; const unsigned WASM_SYMBOL_UNDEFINED = 0x10; +const unsigned WASM_SYMBOL_EXPLICIT_NAME = 0x40; #define WASM_RELOC(name, value) name = value, diff --git a/llvm/include/llvm/MC/MCSymbolWasm.h b/llvm/include/llvm/MC/MCSymbolWasm.h index 8e66dc881d0fba..34639b6ebb6408 100644 --- a/llvm/include/llvm/MC/MCSymbolWasm.h +++ b/llvm/include/llvm/MC/MCSymbolWasm.h @@ -19,7 +19,8 @@ class MCSymbolWasm : public MCSymbol { bool IsWeak = false; bool IsHidden = false; bool IsComdat = false; - std::string ModuleName; + Optional ImportModule; + Optional ImportName; wasm::WasmSignature *Signature = nullptr; Optional GlobalType; Optional EventType; @@ -32,7 +33,7 @@ class MCSymbolWasm : public MCSymbol { // Use a module name of "env" for now, for compatibility with existing tools. // This is temporary, and may change, as the ABI is not yet stable. MCSymbolWasm(const StringMapEntry *Name, bool isTemporary) - : MCSymbol(SymbolKindWasm, Name, isTemporary), ModuleName("env") {} + : MCSymbol(SymbolKindWasm, Name, isTemporary) {} static bool classof(const MCSymbol *S) { return S->isWasm(); } const MCExpr *getSize() const { return SymbolSize; } @@ -55,8 +56,21 @@ class MCSymbolWasm : public MCSymbol { bool isComdat() const { return IsComdat; } void setComdat(bool isComdat) { IsComdat = isComdat; } - const StringRef getModuleName() const { return ModuleName; } - void setModuleName(StringRef Name) { ModuleName = Name; } + const StringRef getImportModule() const { + if (ImportModule.hasValue()) { + return ImportModule.getValue(); + } + return "env"; + } + void setImportModule(StringRef Name) { ImportModule = Name; } + + const StringRef getImportName() const { + if (ImportName.hasValue()) { + return ImportName.getValue(); + } + return getName(); + } + void setImportName(StringRef Name) { ImportName = Name; } const wasm::WasmSignature *getSignature() const { return Signature; } void setSignature(wasm::WasmSignature *Sig) { Signature = Sig; } diff --git a/llvm/lib/MC/WasmObjectWriter.cpp b/llvm/lib/MC/WasmObjectWriter.cpp index 0cca3757be907f..333748db91904c 100644 --- a/llvm/lib/MC/WasmObjectWriter.cpp +++ b/llvm/lib/MC/WasmObjectWriter.cpp @@ -982,7 +982,8 @@ void WasmObjectWriter::writeLinkingMetaDataSection( case wasm::WASM_SYMBOL_TYPE_GLOBAL: case wasm::WASM_SYMBOL_TYPE_EVENT: encodeULEB128(Sym.ElementIndex, W.OS); - if ((Sym.Flags & wasm::WASM_SYMBOL_UNDEFINED) == 0) + if ((Sym.Flags & wasm::WASM_SYMBOL_UNDEFINED) == 0 || + (Sym.Flags & wasm::WASM_SYMBOL_EXPLICIT_NAME) != 0) writeString(Sym.Name); break; case wasm::WASM_SYMBOL_TYPE_DATA: @@ -1162,8 +1163,8 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm, MCSymbolWasm *MemorySym = cast(Ctx.getOrCreateSymbol("__linear_memory")); wasm::WasmImport MemImport; - MemImport.Module = MemorySym->getModuleName(); - MemImport.Field = MemorySym->getName(); + MemImport.Module = MemorySym->getImportModule(); + MemImport.Field = MemorySym->getImportName(); MemImport.Kind = wasm::WASM_EXTERNAL_MEMORY; Imports.push_back(MemImport); @@ -1173,8 +1174,8 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm, MCSymbolWasm *TableSym = cast(Ctx.getOrCreateSymbol("__indirect_function_table")); wasm::WasmImport TableImport; - TableImport.Module = TableSym->getModuleName(); - TableImport.Field = TableSym->getName(); + TableImport.Module = TableSym->getImportModule(); + TableImport.Field = TableSym->getImportName(); TableImport.Kind = wasm::WASM_EXTERNAL_TABLE; TableImport.Table.ElemType = wasm::WASM_TYPE_FUNCREF; Imports.push_back(TableImport); @@ -1200,8 +1201,8 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm, if (!WS.isDefined() && !WS.isComdat()) { if (WS.isFunction()) { wasm::WasmImport Import; - Import.Module = WS.getModuleName(); - Import.Field = WS.getName(); + Import.Module = WS.getImportModule(); + Import.Field = WS.getImportName(); Import.Kind = wasm::WASM_EXTERNAL_FUNCTION; Import.SigIndex = getFunctionType(WS); Imports.push_back(Import); @@ -1211,8 +1212,8 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm, report_fatal_error("undefined global symbol cannot be weak"); wasm::WasmImport Import; - Import.Module = WS.getModuleName(); - Import.Field = WS.getName(); + Import.Module = WS.getImportModule(); + Import.Field = WS.getImportName(); Import.Kind = wasm::WASM_EXTERNAL_GLOBAL; Import.Global = WS.getGlobalType(); Imports.push_back(Import); @@ -1222,8 +1223,8 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm, report_fatal_error("undefined event symbol cannot be weak"); wasm::WasmImport Import; - Import.Module = WS.getModuleName(); - Import.Field = WS.getName(); + Import.Module = WS.getImportModule(); + Import.Field = WS.getImportName(); Import.Kind = wasm::WASM_EXTERNAL_EVENT; Import.Event.Attribute = wasm::WASM_EVENT_ATTRIBUTE_EXCEPTION; Import.Event.SigIndex = getEventType(WS); @@ -1448,6 +1449,8 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm, Flags |= wasm::WASM_SYMBOL_BINDING_LOCAL; if (WS.isUndefined()) Flags |= wasm::WASM_SYMBOL_UNDEFINED; + if (WS.getName() != WS.getImportName()) + Flags |= wasm::WASM_SYMBOL_EXPLICIT_NAME; wasm::WasmSymbolInfo Info; Info.Name = WS.getName(); diff --git a/llvm/lib/Object/WasmObjectFile.cpp b/llvm/lib/Object/WasmObjectFile.cpp index d84cb48c9fbd5d..66a53becbb0561 100644 --- a/llvm/lib/Object/WasmObjectFile.cpp +++ b/llvm/lib/Object/WasmObjectFile.cpp @@ -505,9 +505,13 @@ Error WasmObjectFile::parseLinkingSectionSymtab(ReadContext &Ctx) { Function.SymbolName = Info.Name; } else { wasm::WasmImport &Import = *ImportedFunctions[Info.ElementIndex]; + if ((Info.Flags & wasm::WASM_SYMBOL_EXPLICIT_NAME) != 0) + Info.Name = readString(Ctx); + else + Info.Name = Import.Field; Signature = &Signatures[Import.SigIndex]; - Info.Name = Import.Field; - Info.Module = Import.Module; + Info.ImportName = Import.Field; + Info.ImportModule = Import.Module; } break; @@ -530,8 +534,13 @@ Error WasmObjectFile::parseLinkingSectionSymtab(ReadContext &Ctx) { Global.SymbolName = Info.Name; } else { wasm::WasmImport &Import = *ImportedGlobals[Info.ElementIndex]; - Info.Name = Import.Field; + if ((Info.Flags & wasm::WASM_SYMBOL_EXPLICIT_NAME) != 0) + Info.Name = readString(Ctx); + else + Info.Name = Import.Field; GlobalType = &Import.Global; + Info.ImportName = Import.Field; + Info.ImportModule = Import.Module; } break; @@ -585,9 +594,14 @@ Error WasmObjectFile::parseLinkingSectionSymtab(ReadContext &Ctx) { } else { wasm::WasmImport &Import = *ImportedEvents[Info.ElementIndex]; + if ((Info.Flags & wasm::WASM_SYMBOL_EXPLICIT_NAME) != 0) + Info.Name = readString(Ctx); + else + Info.Name = Import.Field; EventType = &Import.Event; Signature = &Signatures[EventType->SigIndex]; - Info.Name = Import.Field; + Info.ImportName = Import.Field; + Info.ImportModule = Import.Module; } break; } diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp index 50143fb0ece362..7caeebb1a9aad7 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp @@ -113,8 +113,15 @@ void WebAssemblyTargetAsmStreamer::emitEventType(const MCSymbolWasm *Sym) { } void WebAssemblyTargetAsmStreamer::emitImportModule(const MCSymbolWasm *Sym, - StringRef ModuleName) { - OS << "\t.import_module\t" << Sym->getName() << ", " << ModuleName << '\n'; + StringRef ImportModule) { + OS << "\t.import_module\t" << Sym->getName() << ", " + << ImportModule << '\n'; +} + +void WebAssemblyTargetAsmStreamer::emitImportName(const MCSymbolWasm *Sym, + StringRef ImportName) { + OS << "\t.import_name\t" << Sym->getName() << ", " + << ImportName << '\n'; } void WebAssemblyTargetAsmStreamer::emitIndIdx(const MCExpr *Value) { diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h index 3073938118b458..2ee9956c8e3840 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h @@ -45,7 +45,10 @@ class WebAssemblyTargetStreamer : public MCTargetStreamer { virtual void emitEventType(const MCSymbolWasm *Sym) = 0; /// .import_module virtual void emitImportModule(const MCSymbolWasm *Sym, - StringRef ModuleName) = 0; + StringRef ImportModule) = 0; + /// .import_name + virtual void emitImportName(const MCSymbolWasm *Sym, + StringRef ImportName) = 0; protected: void emitValueType(wasm::ValType Type); @@ -67,7 +70,8 @@ class WebAssemblyTargetAsmStreamer final : public WebAssemblyTargetStreamer { void emitIndIdx(const MCExpr *Value) override; void emitGlobalType(const MCSymbolWasm *Sym) override; void emitEventType(const MCSymbolWasm *Sym) override; - void emitImportModule(const MCSymbolWasm *Sym, StringRef ModuleName) override; + void emitImportModule(const MCSymbolWasm *Sym, StringRef ImportModule) override; + void emitImportName(const MCSymbolWasm *Sym, StringRef ImportName) override; }; /// This part is for Wasm object output @@ -82,7 +86,9 @@ class WebAssemblyTargetWasmStreamer final : public WebAssemblyTargetStreamer { void emitGlobalType(const MCSymbolWasm *Sym) override {} void emitEventType(const MCSymbolWasm *Sym) override {} void emitImportModule(const MCSymbolWasm *Sym, - StringRef ModuleName) override {} + StringRef ImportModule) override {} + void emitImportName(const MCSymbolWasm *Sym, + StringRef ImportName) override {} }; /// This part is for null output @@ -98,6 +104,7 @@ class WebAssemblyTargetNullStreamer final : public WebAssemblyTargetStreamer { void emitGlobalType(const MCSymbolWasm *) override {} void emitEventType(const MCSymbolWasm *) override {} void emitImportModule(const MCSymbolWasm *, StringRef) override {} + void emitImportName(const MCSymbolWasm *, StringRef) override {} }; } // end namespace llvm diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp index c4f03dfa7f9e45..b492d114695072 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp @@ -111,9 +111,16 @@ void WebAssemblyAsmPrinter::EmitEndOfAsmFile(Module &M) { F.hasFnAttribute("wasm-import-module")) { StringRef Name = F.getFnAttribute("wasm-import-module").getValueAsString(); - Sym->setModuleName(Name); + Sym->setImportModule(Name); getTargetStreamer()->emitImportModule(Sym, Name); } + if (TM.getTargetTriple().isOSBinFormatWasm() && + F.hasFnAttribute("wasm-import-name")) { + StringRef Name = + F.getFnAttribute("wasm-import-name").getValueAsString(); + Sym->setImportName(Name); + getTargetStreamer()->emitImportName(Sym, Name); + } } } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp index 1a416520f97d09..13f37f611ed058 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp @@ -36,11 +36,6 @@ using namespace llvm; #define DEBUG_TYPE "wasm-fix-function-bitcasts" -static cl::opt - TemporaryWorkarounds("wasm-temporary-workarounds", - cl::desc("Apply certain temporary workarounds"), - cl::init(true), cl::Hidden); - namespace { class FixFunctionBitcasts final : public ModulePass { StringRef getPassName() const override { @@ -227,6 +222,17 @@ static Function *CreateWrapper(Function *F, FunctionType *Ty) { return Wrapper; } +// Test whether a main function with type FuncTy should be rewritten to have +// type MainTy. +bool shouldFixMainFunction(FunctionType *FuncTy, FunctionType *MainTy) { + // Only fix the main function if it's the standard zero-arg form. That way, + // the standard cases will work as expected, and users will see signature + // mismatches from the linker for non-standard cases. + return FuncTy->getReturnType() == MainTy->getReturnType() && + FuncTy->getNumParams() == 0 && + !FuncTy->isVarArg(); +} + bool FixFunctionBitcasts::runOnModule(Module &M) { LLVM_DEBUG(dbgs() << "********** Fix Function Bitcasts **********\n"); @@ -243,14 +249,14 @@ bool FixFunctionBitcasts::runOnModule(Module &M) { // "int main(int argc, char *argv[])", create an artificial call with it // bitcasted to that type so that we generate a wrapper for it, so that // the C runtime can call it. - if (!TemporaryWorkarounds && !F.isDeclaration() && F.getName() == "main") { + if (F.getName() == "main") { Main = &F; LLVMContext &C = M.getContext(); Type *MainArgTys[] = {Type::getInt32Ty(C), PointerType::get(Type::getInt8PtrTy(C), 0)}; FunctionType *MainTy = FunctionType::get(Type::getInt32Ty(C), MainArgTys, /*isVarArg=*/false); - if (F.getFunctionType() != MainTy) { + if (shouldFixMainFunction(F.getFunctionType(), MainTy)) { LLVM_DEBUG(dbgs() << "Found `main` function with incorrect type: " << *F.getFunctionType() << "\n"); Value *Args[] = {UndefValue::get(MainArgTys[0]), @@ -298,12 +304,18 @@ bool FixFunctionBitcasts::runOnModule(Module &M) { Main->setName("__original_main"); Function *MainWrapper = cast(CallMain->getCalledValue()->stripPointerCasts()); - MainWrapper->setName("main"); - MainWrapper->setLinkage(Main->getLinkage()); - MainWrapper->setVisibility(Main->getVisibility()); - Main->setLinkage(Function::PrivateLinkage); - Main->setVisibility(Function::DefaultVisibility); delete CallMain; + if (Main->isDeclaration()) { + // The wrapper is not needed in this case as we don't need to export + // it to anyone else. + MainWrapper->eraseFromParent(); + } else { + // Otherwise give the wrapper the same linkage as the original main + // function, so that it can be called from the same places. + MainWrapper->setName("main"); + MainWrapper->setLinkage(Main->getLinkage()); + MainWrapper->setVisibility(Main->getVisibility()); + } } return true; diff --git a/llvm/test/CodeGen/WebAssembly/call.ll b/llvm/test/CodeGen/WebAssembly/call.ll index db666a6c36686e..77f17c850edab4 100644 --- a/llvm/test/CodeGen/WebAssembly/call.ll +++ b/llvm/test/CodeGen/WebAssembly/call.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers -wasm-temporary-workarounds=false -mattr=+sign-ext,+simd128 | FileCheck %s -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers -fast-isel -fast-isel-abort=1 -wasm-temporary-workarounds=false -mattr=+sign-ext,+simd128 | FileCheck %s +; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers -mattr=+sign-ext,+simd128 | FileCheck %s +; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers -fast-isel -fast-isel-abort=1 -mattr=+sign-ext,+simd128 | FileCheck %s ; Test that basic call operations assemble as expected. diff --git a/llvm/test/CodeGen/WebAssembly/function-bitcasts-varargs.ll b/llvm/test/CodeGen/WebAssembly/function-bitcasts-varargs.ll index 515c5703d86c06..b542276e068f6d 100644 --- a/llvm/test/CodeGen/WebAssembly/function-bitcasts-varargs.ll +++ b/llvm/test/CodeGen/WebAssembly/function-bitcasts-varargs.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -asm-verbose=false -wasm-temporary-workarounds=false -wasm-keep-registers | FileCheck %s +; RUN: llc < %s -asm-verbose=false -wasm-keep-registers | FileCheck %s ; Test that function pointer casts casting away varargs are replaced with ; wrappers. diff --git a/llvm/test/CodeGen/WebAssembly/function-bitcasts.ll b/llvm/test/CodeGen/WebAssembly/function-bitcasts.ll index a779cbe414225e..813e8420ae54bb 100644 --- a/llvm/test/CodeGen/WebAssembly/function-bitcasts.ll +++ b/llvm/test/CodeGen/WebAssembly/function-bitcasts.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -asm-verbose=false -wasm-disable-explicit-locals -wasm-keep-registers -enable-emscripten-cxx-exceptions -wasm-temporary-workarounds=false | FileCheck %s +; RUN: llc < %s -asm-verbose=false -wasm-disable-explicit-locals -wasm-keep-registers -enable-emscripten-cxx-exceptions | FileCheck %s ; Test that function pointer casts are replaced with wrappers. diff --git a/llvm/test/CodeGen/WebAssembly/import-module.ll b/llvm/test/CodeGen/WebAssembly/import-module.ll index a8202a77acb5ae..0cf0f2f25e0b93 100644 --- a/llvm/test/CodeGen/WebAssembly/import-module.ll +++ b/llvm/test/CodeGen/WebAssembly/import-module.ll @@ -12,8 +12,9 @@ define void @test() { declare void @foo() #0 declare void @plain() -attributes #0 = { "wasm-import-module"="bar" } +attributes #0 = { "wasm-import-module"="bar" "wasm-import-name"="qux" } ; CHECK-NOT: .import_module plain ; CHECK: .import_module foo, bar +; CHECK: .import_name foo, qux ; CHECK-NOT: .import_module plain diff --git a/llvm/test/CodeGen/WebAssembly/main-declaration.ll b/llvm/test/CodeGen/WebAssembly/main-declaration.ll index f9d68db2bae8e7..544f5588c5043c 100644 --- a/llvm/test/CodeGen/WebAssembly/main-declaration.ll +++ b/llvm/test/CodeGen/WebAssembly/main-declaration.ll @@ -1,20 +1,18 @@ -; RUN: llc < %s -asm-verbose=false -wasm-temporary-workarounds=false | FileCheck %s +; RUN: llc < %s -asm-verbose=false | FileCheck %s ; Test main functions with alternate signatures. target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" target triple = "wasm32-unknown-unknown" -declare void @main() +declare i32 @main() -define void @foo() { - call void @main() - ret void +define i32 @foo() { + %t = call i32 @main() + ret i32 %t } -; CHECK-NOT: __original_main ; CHECK-LABEL: foo: -; CHECK-NEXT: .functype foo () -> () -; CHECK-NEXT: call main@FUNCTION +; CHECK-NEXT: .functype foo () -> (i32) +; CHECK-NEXT: call __original_main@FUNCTION ; CHECK-NEXT: end_function -; CHECK-NOT: __original_main diff --git a/llvm/test/CodeGen/WebAssembly/main-no-args.ll b/llvm/test/CodeGen/WebAssembly/main-no-args.ll index 0bc46717d97be3..97023e269454b6 100644 --- a/llvm/test/CodeGen/WebAssembly/main-no-args.ll +++ b/llvm/test/CodeGen/WebAssembly/main-no-args.ll @@ -1,18 +1,19 @@ -; RUN: llc < %s -asm-verbose=false -wasm-temporary-workarounds=false | FileCheck %s +; RUN: llc < %s -asm-verbose=false | FileCheck %s ; Test main functions with alternate signatures. target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" target triple = "wasm32-unknown-unknown" -define void @main() { - ret void +define i32 @main() { + ret i32 0 } -; CHECK-LABEL: .L__original_main: -; CHECK-NEXT: .functype .L__original_main () -> () +; CHECK-LABEL: __original_main: +; CHECK-NEXT: .functype __original_main () -> (i32) +; CHECK-NEXT: i32.const 0 ; CHECK-NEXT: end_function ; CHECK-LABEL: main: ; CHECK-NEXT: .functype main (i32, i32) -> (i32) -; CHECK: call .L__original_main@FUNCTION +; CHECK: call __original_main@FUNCTION diff --git a/llvm/test/CodeGen/WebAssembly/main-three-args.ll b/llvm/test/CodeGen/WebAssembly/main-three-args.ll new file mode 100644 index 00000000000000..77b3e5b8c30631 --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/main-three-args.ll @@ -0,0 +1,16 @@ +; RUN: llc < %s -asm-verbose=false | FileCheck %s + +; Test that main function with a non-standard third argument is +; not wrapped. + +target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" +target triple = "wasm32-unknown-unknown" + +define i32 @main(i32 %a, i8** %b, i8** %c) { + ret i32 0 +} + +; CHECK-LABEL: main: +; CHECK-NEXT: .functype main (i32, i32, i32) -> (i32) + +; CHECK-NOT: __original_main: diff --git a/llvm/test/CodeGen/WebAssembly/main-with-args.ll b/llvm/test/CodeGen/WebAssembly/main-with-args.ll index d4a11ef14d46e5..205cb133f8ca68 100644 --- a/llvm/test/CodeGen/WebAssembly/main-with-args.ll +++ b/llvm/test/CodeGen/WebAssembly/main-with-args.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -asm-verbose=false -wasm-temporary-workarounds=false | FileCheck %s +; RUN: llc < %s -asm-verbose=false | FileCheck %s ; Test that main function with expected signature is not wrapped diff --git a/llvm/test/MC/WebAssembly/external-func-address.ll b/llvm/test/MC/WebAssembly/external-func-address.ll index 60ec23a1a8ed3d..8e36c76e84f3b9 100644 --- a/llvm/test/MC/WebAssembly/external-func-address.ll +++ b/llvm/test/MC/WebAssembly/external-func-address.ll @@ -8,7 +8,7 @@ target triple = "wasm32-unknown-unknown" declare void @f0(i32) #0 @ptr_to_f0 = hidden global void (i32)* @f0, align 4 -attributes #0 = { "wasm-import-module"="somewhere" } +attributes #0 = { "wasm-import-module"="somewhere" "wasm-import-name"="something" } declare void @f1(i32) #1 @ptr_to_f1 = hidden global void (i32)* @f1, align 4 @@ -47,7 +47,7 @@ define void @call(i32) { ; CHECK-NEXT: Kind: FUNCTION ; CHECK-NEXT: SigIndex: 1 ; CHECK: - Module: somewhere -; CHECK-NEXT: Field: f0 +; CHECK-NEXT: Field: something ; CHECK: - Module: env ; CHECK-NEXT: Field: f1 ; CHECK-NEXT: Kind: FUNCTION diff --git a/llvm/test/MC/WebAssembly/import-module.ll b/llvm/test/MC/WebAssembly/import-module.ll new file mode 100644 index 00000000000000..461d5c20ae9b78 --- /dev/null +++ b/llvm/test/MC/WebAssembly/import-module.ll @@ -0,0 +1,31 @@ +; RUN: llc -filetype=obj %s -o - | obj2yaml | FileCheck %s + +target triple = "wasm32-unknown-unknown" + +define void @test() { + call void @foo() + call void @plain() + ret void +} + +declare void @foo() #0 +declare void @plain() + +attributes #0 = { "wasm-import-module"="bar" "wasm-import-name"="qux" } + +; CHECK: - Type: IMPORT +; CHECK-NEXT: Imports: +; CHECK: - Module: bar +; CHECK-NEXT: Field: qux +; CHECK-NEXT: Kind: FUNCTION + +; CHECK: - Module: env +; CHECK-NEXT: Field: plain +; CHECK-NEXT: Kind: FUNCTION + +; CHECK: - Type: CUSTOM +; CHECK: Name: foo +; CHECK-NEXT: Flags: [ UNDEFINED ] + +; CHECK: Name: plain +; CHECK-NEXT: Flags: [ UNDEFINED ] diff --git a/llvm/tools/yaml2obj/yaml2wasm.cpp b/llvm/tools/yaml2obj/yaml2wasm.cpp index 2d3e3b71f0868e..7d08e62bcedd81 100644 --- a/llvm/tools/yaml2obj/yaml2wasm.cpp +++ b/llvm/tools/yaml2obj/yaml2wasm.cpp @@ -172,7 +172,8 @@ int WasmWriter::writeSectionContent(raw_ostream &OS, case wasm::WASM_SYMBOL_TYPE_GLOBAL: case wasm::WASM_SYMBOL_TYPE_EVENT: encodeULEB128(Info.ElementIndex, SubSection.GetStream()); - if ((Info.Flags & wasm::WASM_SYMBOL_UNDEFINED) == 0) + if ((Info.Flags & wasm::WASM_SYMBOL_UNDEFINED) == 0 || + (Info.Flags & wasm::WASM_SYMBOL_EXPLICIT_NAME) != 0) writeStringRef(Info.Name, SubSection.GetStream()); break; case wasm::WASM_SYMBOL_TYPE_DATA: