Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

arm floating point argument failures #1088

Closed
honggyukim opened this issue Jan 12, 2020 · 12 comments · Fixed by #1135
Closed

arm floating point argument failures #1088

honggyukim opened this issue Jan 12, 2020 · 12 comments · Fixed by #1135

Comments

@honggyukim
Copy link
Collaborator

honggyukim commented Jan 12, 2020

Here is a test program.

$ cat float.c
#include <stdio.h>

float float_add(float a, float b)
{
        fprintf(stderr, "a = %f, b = %f\n", a, b);
        return a + b;
}

int main(int argc, char *argv[])
{
        double a;

        a = float_add(-0.1, 0.2);
        return a > 0;
}

It shows floating point numbers in normal execution.

$ gcc -pg -g float.c

$ file a.out
a.out: ELF 32-bit LSB executable, ARM, EABI5 version 1 (SYSV), ...

$ ./a.out
a = -0.100000, b = 0.200000

It also shows correct numbers with simple uftrace record execution.

$ uftrace record --no-libcall --no-event a.out
a = -0.100000, b = 0.200000

$ uftrace record --no-libcall --no-event --debug-domain uftrace:1 a.out
uftrace: checking binary a.out
uftrace: removing uftrace.data.old directory
uftrace: creating 1 thread(s) for recording
uftrace: using /home/honggyu/usr/lib/libmcount.so library for tracing
a = -0.100000, b = 0.200000
uftrace: child terminated with exit code: 1
uftrace: reading uftrace.data/task.txt file
uftrace: flushing /uftrace-3a8039083d891c45-17880-000

However, the floating point numbers are somehow printed incorrect values if uftrace tries to record an argument.

$ uftrace record -A float_add@fparg1 --no-libcall --no-event a.out
a = 0.000000, b = 0.000000

$ uftrace replay
# DURATION     TID     FUNCTION
            [ 17914] | main() {
  47.760 us [ 17914] |   float_add(0.000000);
  54.740 us [ 17914] | } /* main */

The result is same with --auto-args.

$ uftrace record -a --no-libcall --no-event a.out
a = 0.000000, b = 0.000000
@honggyukim
Copy link
Collaborator Author

Here is another problem in return value recording.

$ uftrace -R float_add@retval/f32 --no-libcall --no-event a.out
a = -0.100000, b = 0.200000
# DURATION     TID     FUNCTION
            [  1009] | main() {
   1.247 ms [  1009] |   float_add() = -nan;
   1.268 ms [  1009] | } /* main */

@honggyukim
Copy link
Collaborator Author

honggyukim commented Feb 9, 2020

It seems that memset@plt makes s0 and s1 cleared to zero.

 1│ Dump of assembler code for function save_argument:
 2│ /home/honggyu/work/uftrace/libmcount/record.c:
 3503     {
 40x76f8a518 <+0>:     push    {r4, r11, lr}
 50x76f8a51c <+4>:     add     r11, sp, #8
 60x76f8a520 <+8>:     sub     sp, sp, #84     ; 0x54
 70x76f8a524 <+12>:    str     r0, [r11, #-80] ; 0xffffffb0
 80x76f8a528 <+16>:    str     r1, [r11, #-84] ; 0xffffffac
 90x76f8a52c <+20>:    str     r2, [r11, #-88] ; 0xffffffa8
100x76f8a530 <+24>:    str     r3, [r11, #-92] ; 0xffffffa4
110x76f8a534 <+28>:    sub     r3, r11, #12
120x76f8a538 <+32>:    sub     r3, r3, #64     ; 0x40
130x76f8a53c <+36>:    add     r3, r3, #15
140x76f8a540 <+40>:    lsr     r3, r3, #4
150x76f8a544 <+44>:    lsl     r4, r3, #4
16
17504             void *argbuf = get_argbuf(mtdp, rstack);
180x76f8a548 <+48>:    ldr     r1, [r11, #-84] ; 0xffffffac
190x76f8a54c <+52>:    ldr     r0, [r11, #-80] ; 0xffffffb0
200x76f8a550 <+56>:    bl      0x76f89ac8 <get_argbuf>
210x76f8a554 <+60>:    str     r0, [r11, #-16]
22
23505             unsigned size;
24506             struct mcount_arg_context ctx = {
250x76f8a558 <+64>:    mov     r0, r4
260x76f8a55c <+68>:    mov     r3, #48 ; 0x30
270x76f8a560 <+72>:    mov     r2, r3
280x76f8a564 <+76>:    mov     r1, #0
290x76f8a568 <+80>:    bl      0x76f7e384 <memset@plt>
30├──> 0x76f8a56c <+84>:    ldr     r3, [r11, #-92] ; 0xffffffa4
310x76f8a570 <+88>:    str     r3, [r4]
32
33507                     .regs = regs,
34508                     .stack_base = rstack->parent_loc,
350x76f8a574 <+922>:   ldr     r3, [r11, #-84] ; 0xffffffac
360x76f8a578 <+966>:   ldr     r3, [r3]
37
38506             struct mcount_arg_context ctx = {
390x76f8a57c <+100>:   str     r3, [r4, #4]
40
41509                     .regions = &mtdp->mem_regions,
420x76f8a580 <+104>:   ldr     r3, [r11, #-80] ; 0xffffffb0
430x76f8a584 <+108>:   add     r3, r3, #4160   ; 0x1040
440x76f8a588 <+112>:   add     r3, r3, #36     ; 0x24

The original source code in C doesn't have such memset call, but it seems to be inserted for struct initialization by compiler implicitly.

 499void save_argument(struct mcount_thread_data *mtdp,
 500struct mcount_ret_stack *rstack,
 501struct list_head *args_spec,
 502struct mcount_regs *regs)
 503│ {
 504void *argbuf = get_argbuf(mtdp, rstack);
 505unsigned size;
 506├───────> struct mcount_arg_context ctx = {
 507│                 .regs = regs,
 508│                 .stack_base = rstack->parent_loc,
 509│                 .regions = &mtdp->mem_regions,
 510│                 .arch = &mtdp->arch,
 511│         };
 512513size = save_to_argbuf(argbuf, args_spec, &ctx);
 514if (size == -1U) {
 515pr_warn("argument data is too big\n");
 516return;
 517│         }
 518519*(unsigned *)argbuf = size;
 520rstack->flags |= MCOUNT_FL_ARGUMENT;
 521│ }

@honggyukim
Copy link
Collaborator Author

Here is the memset code in /usr/lib/arm-linux-gnueabihf/libarmmem-v7l.so.

The instruction in line 3 overwrites s0 and s1 registers with vdup.8 q0, r1 instruction.

 1│ Dump of assembler code for function memset:
 20x76f690f8 <+0>:     mov     r3, r0
 3├──> 0x76f690fc <+4>:     vdup.8  q0, r1
 40x76f69100 <+8>:     cmp     r2, #79 ; 0x4f
 50x76f69104 <+12>:    vdup.8  q1, r1
 60x76f69108 <+16>:    bcc     0x76f69190 <memset+152>
 70x76f6910c <+20>:    ands    r12, r0, #15
 80x76f69110 <+24>:    beq     0x76f69140 <memset+72>
 90x76f69114 <+28>:    rsb     r12, r12, #16
100x76f69118 <+32>:    sub     r2, r2, r12
110x76f6911c <+36>:                    ; <UNDEFINED> instruction: 0xe6ffcf3c
120x76f69120 <+40>:    tst     r0, #1
130x76f69124 <+44>:    strbne  r1, [r3], #1
140x76f69128 <+48>:    lsls    r12, r12, #2
150x76f6912c <+52>:    strbcs  r1, [r3, #1]
160x76f69130 <+56>:    strbcs  r1, [r3], #2
170x76f69134 <+60>:    vstmiami        r3!, {s0}
180x76f69138 <+64>:    lsls    r12, r12, #2
190x76f6913c <+68>:    vstmiacs        r3!, {d0}
200x76f69140 <+72>:    mov     r12, #64        ; 0x40
210x76f69144 <+76>:    sub     r2, r2, #64     ; 0x40
220x76f69148 <+80>:    add     r1, r3, #32
230x76f6914c <+84>:    vst1.8  {d0-d3}, [r3 :128], r12
240x76f69150 <+88>:    subs    r2, r2, #64     ; 0x40
250x76f69154 <+92>:    vst1.8  {d0-d3}, [r1 :128], r12
260x76f69158 <+96>:    bcs     0x76f6914c <memset+84>
270x76f6915c <+100>:   vmov.32 r1, d0[0]
280x76f69160 <+104>:   lsls    r2, r2, #27
290x76f69164 <+108>:   bcc     0x76f6916c <memset+116>
300x76f69168 <+112>:   vst1.8  {d0-d3}, [r3]!
310x76f6916c <+116>:   bpl     0x76f69174 <memset+124>
320x76f69170 <+120>:   vst1.8  {d0-d1}, [r3]!
330x76f69174 <+124>:   lsls    r2, r2, #2
340x76f69178 <+128>:   vstmiacs        r3!, {d0}
350x76f6917c <+132>:   strmi   r1, [r3], #4
360x76f69180 <+136>:   lsls    r2, r2, #2
370x76f69184 <+140>:   strhcs  r1, [r3], #2
380x76f69188 <+144>:   strbmi  r1, [r3]
390x76f6918c <+148>:   bx      lr
400x76f69190 <+152>:   tst     r3, #3
410x76f69194 <+156>:   beq     0x76f691ac <memset+180>
420x76f69198 <+160>:   subs    r2, r2, #1
430x76f6919c <+164>:   bcc     0x76f6918c <memset+148>
440x76f691a0 <+168>:   strb    r1, [r3], #1
450x76f691a4 <+172>:   tst     r3, #3
460x76f691a8 <+176>:   bne     0x76f69198 <memset+160>
470x76f691ac <+180>:   cmp     r2, #32
480x76f691b0 <+184>:   bcc     0x76f6915c <memset+100>
490x76f691b4 <+188>:   vst1.8  {d0-d3}, [r3]!
500x76f691b8 <+192>:   sub     r2, r2, #32
510x76f691bc <+196>:   b       0x76f6915c <memset+100>
52│ End of assembler dump.

@honggyukim
Copy link
Collaborator Author

honggyukim commented Feb 9, 2020

I manually changed the code as follows:

diff --git a/libmcount/record.c b/libmcount/record.c
index 4bf383c5..bdf4711b 100644
--- a/libmcount/record.c
+++ b/libmcount/record.c
@@ -503,12 +503,21 @@ void save_argument(struct mcount_thread_data *mtdp,
 {
        void *argbuf = get_argbuf(mtdp, rstack);
        unsigned size;
+#if 0
        struct mcount_arg_context ctx = {
                .regs = regs,
                .stack_base = rstack->parent_loc,
                .regions = &mtdp->mem_regions,
                .arch = &mtdp->arch,
        };
+#else
+       struct mcount_arg_context ctx;
+       mcount_memset1(&ctx, 0, sizeof(ctx));
+       ctx.regs = regs;
+       ctx.stack_base = rstack->parent_loc;
+       ctx.regions = &mtdp->mem_regions;
+       ctx.arch = &mtdp->arch;
+#endif

        size = save_to_argbuf(argbuf, args_spec, &ctx);
        if (size == -1U) {

At least it doesn't change the original value printed by the program.

$ uftrace record -A float_add@fparg1 --no-libcall --no-event a.out
a = -0.100000, b = 0.200000

However, it still doesn't record the argument properly.

$ uftrace replay
# DURATION     TID     FUNCTION
            [  5371] | main() {
  85.364 us [  5371] |   float_add(0.000000);
 104.792 us [  5371] | } /* main */

There might be somewhere else to be fixed together.

$ uftrace -R float_add@retval/f32 --no-libcall --no-event a.out
a = -0.100000, b = 0.200000
# DURATION     TID     FUNCTION
            [  5580] | main() {
 169.479 us [  5580] |   float_add() = -nan;
 201.145 us [  5580] | } /* main */

@honggyukim
Copy link
Collaborator Author

honggyukim commented Feb 9, 2020

I think the right fix might be possible if there is a way to disable such implicit usage of memset call for designated initialization in struct, but I don't know if there is such a way to do it.

@honggyukim
Copy link
Collaborator Author

honggyukim commented Feb 9, 2020

The return value can be fixed with the following change.

diff --git a/arch/arm/mcount-support.c b/arch/arm/mcount-support.c
index e609c2f9..4335e854 100644
--- a/arch/arm/mcount-support.c
+++ b/arch/arm/mcount-support.c
@@ -488,8 +488,10 @@ void mcount_arch_get_retval(struct mcount_arg_context *ctx,
        /* type of return value cannot be FLOAT, so check format instead */
 #ifdef HAVE_ARM_HARDFP
        if (spec->fmt == ARG_FMT_FLOAT && use_hard_float) {
-               /* d0 register (64 bit) was saved below the r0 */
-               memcpy(ctx->val.v, ctx->retval - 2, spec->size);
+               /* d0, d1 registers (64 bit) were saved below the r0 */
+               long *float_retval = ctx->retval - 4;
+
+               memcpy(ctx->val.v, float_retval, spec->size);
        }
        else
 #endif /* HAVE_ARM_HARDFP */

The following commit should have also modified it as above.

  • ffb69ce arm: Handle struct return type by keeping more regs on stack

The return value is correct now.

$ uftrace -R float_add@retval/f32 --no-libcall --no-event a.out
a = -0.100000, b = 0.200000
# DURATION     TID     FUNCTION
            [ 16135] | main() {
  68.176 us [ 16135] |   float_add() = 0.100000;
 178.802 us [ 16135] | } /* main */

@honggyukim
Copy link
Collaborator Author

The summarized change is as follows:

diff --git a/arch/arm/mcount-support.c b/arch/arm/mcount-support.c
index e609c2f9..4335e854 100644
--- a/arch/arm/mcount-support.c
+++ b/arch/arm/mcount-support.c
@@ -488,8 +488,10 @@ void mcount_arch_get_retval(struct mcount_arg_context *ctx,
        /* type of return value cannot be FLOAT, so check format instead */
 #ifdef HAVE_ARM_HARDFP
        if (spec->fmt == ARG_FMT_FLOAT && use_hard_float) {
-               /* d0 register (64 bit) was saved below the r0 */
-               memcpy(ctx->val.v, ctx->retval - 2, spec->size);
+               /* d0, d1 registers (64 bit) were saved below the r0 */
+               long *float_retval = ctx->retval - 4;
+
+               memcpy(ctx->val.v, float_retval, spec->size);
        }
        else
 #endif /* HAVE_ARM_HARDFP */
diff --git a/libmcount/record.c b/libmcount/record.c
index 4bf383c5..b3ea9dcd 100644
--- a/libmcount/record.c
+++ b/libmcount/record.c
@@ -503,12 +503,13 @@ void save_argument(struct mcount_thread_data *mtdp,
 {
        void *argbuf = get_argbuf(mtdp, rstack);
        unsigned size;
-       struct mcount_arg_context ctx = {
-               .regs = regs,
-               .stack_base = rstack->parent_loc,
-               .regions = &mtdp->mem_regions,
-               .arch = &mtdp->arch,
-       };
+       struct mcount_arg_context ctx;
+
+       mcount_memset1(&ctx, 0, sizeof(ctx));
+       ctx.regs = regs;
+       ctx.stack_base = rstack->parent_loc;
+       ctx.regions = &mtdp->mem_regions;
+       ctx.arch = &mtdp->arch;

        size = save_to_argbuf(argbuf, args_spec, &ctx);
        if (size == -1U) {
@@ -526,11 +527,12 @@ void save_retval(struct mcount_thread_data *mtdp,
        struct list_head *args_spec = rstack->pargs;
        void *argbuf = get_argbuf(mtdp, rstack);
        unsigned size;
-       struct mcount_arg_context ctx = {
-               .retval = retval,
-               .regions = &mtdp->mem_regions,
-               .arch = &mtdp->arch,
-       };
+       struct mcount_arg_context ctx;
+
+       mcount_memset1(&ctx, 0, sizeof(ctx));
+       ctx.retval = retval;
+       ctx.regions = &mtdp->mem_regions;
+       ctx.arch = &mtdp->arch;

        size = save_to_argbuf(argbuf, args_spec, &ctx);
        if (size == -1U) {

@honggyukim
Copy link
Collaborator Author

honggyukim commented Feb 9, 2020

However, the above change doesn't fix the following example.

$ cat float2.c
#include <stdio.h>
#include <math.h>

int main(int argc, char *argv[])
{
        float a = 1.0f;
        float e = expf(a);
        fprintf(stderr, "expf(%f) = %f\n", a, e);
        double one = log(e);
        fprintf(stderr, "log(%f) = %lf\n", e, one);

        return one - 1;
}
$ gcc -pg -fno-builtin float2.c -lm

$ ./a.out
expf(1.000000) = 2.718282
log(2.718282) = 1.000000

$ uftrace -A expf@fparg1/32 -R expf@retval/f32 -A log@fparg1/64 -R log@retval/f64 -N fprintf a.out
expf(1.000000) = 1.000000
log(1.000000) = -inf
# DURATION     TID     FUNCTION
            [ 16572] | main() {
 126.302 us [ 16572] |   expf(0.000000) = 1.000000;
  55.104 us [ 16572] |   log(0.000000) = -inf;
 323.749 us [ 16572] | } /* main */

It's slightly changed example based on t198_lib_arg_float.py.

@honggyukim
Copy link
Collaborator Author

To fix the above problem, more change is needed in plthook routine as well.

diff --git a/libmcount/plthook.c b/libmcount/plthook.c
index 2ce62cce..160a0c3c 100644
--- a/libmcount/plthook.c
+++ b/libmcount/plthook.c
@@ -732,9 +732,6 @@ static unsigned long __plthook_entry(unsigned long *ret_addr,
        struct sym *sym;
        struct mcount_thread_data *mtdp = NULL;
        struct mcount_ret_stack *rstack;
-       struct uftrace_trigger tr = {
-               .flags = 0,
-       };
        bool skip = false;
        bool recursion = true;
        enum filter_result filtered;
@@ -742,6 +739,10 @@ static unsigned long __plthook_entry(unsigned long *ret_addr,
        struct plthook_special_func *func;
        unsigned long special_flag = 0;
        unsigned long real_addr = 0;
+       struct uftrace_trigger tr;
+
+       mcount_memset1(&tr, 0, sizeof(tr));
+       tr.flags = 0;

        // if necessary, implement it by architecture.
        child_idx = mcount_arch_child_idx(child_idx);

Now, the result is correct.

$ ./a.out
expf(1.000000) = 2.718282
log(2.718282) = 1.000000

$ uftrace -A expf@fparg1/32 -R expf@retval/f32 -A log@fparg1/64 -R log@retval/f64 -N fprintf a.out
expf(1.000000) = 2.718282
log(2.718282) = 1.000000
# DURATION     TID     FUNCTION
            [ 17613] | main() {
  88.697 us [ 17613] |   expf(1.000000) = 2.718282;
 120.780 us [ 17613] |   log(2.718282) = 1.000000;
 337.082 us [ 17613] | } /* main */

@honggyukim
Copy link
Collaborator Author

honggyukim commented Feb 9, 2020

The above change fixes floating point related tests in ARM 32-bit.

$ ./runtest.py float
Start 2 tests with 2 worker
Test case                 pg             finstrument-fu
------------------------: O0 O1 O2 O3 Os O0 O1 O2 O3 Os
083 arg_float           : OK OK OK OK OK SK SK SK SK SK
198 lib_arg_float       : OK OK OK OK OK SK SK SK SK SK

But it requires the following test fix as well.

diff --git a/tests/t083_arg_float.py b/tests/t083_arg_float.py
index e4f2de8b..02410436 100644
--- a/tests/t083_arg_float.py
+++ b/tests/t083_arg_float.py
@@ -31,7 +31,5 @@ class TestCase(TestBase):
             # argument count follows the size of type
             argopt = argopt.replace('float_mul@fparg1/64,fparg2/32',
                                     'float_mul@fparg1/64,fparg3/32')
-            argopt = argopt.replace('float_div@fparg1,fparg2',
-                                    'float_div@fparg1,fparg3')

         return '%s %s %s' % (TestBase.uftrace_cmd, argopt, 't-' + self.name)

@namhyung What do you think about this approach?

@honggyukim
Copy link
Collaborator Author

@namhyung Could you please take a look at this?

@namhyung
Copy link
Owner

namhyung commented Mar 3, 2020

Sorry for the late reply. Both changes are look good.

Since ffb69ce offset from *retval and d0 register chagned. And I think we can use mcount_memset4() instead because it's gauranteed that both of address and size will be multiple of 4.

honggyukim added a commit to honggyukim/uftrace that referenced this issue Mar 5, 2020
The designated initializer in C makes implicit memset internally in arm
architecture.  Due to the memset, VFP registers are clobbered
unexpectedly, so this patch fixes the problem by replacing the
designated initializer to explicit 'mcount_memset4'.

Source:
  #include <stdio.h>

  float float_add(float a, float b)
  {
          fprintf(stderr, "a = %f, b = %f\n", a, b);
          return a + b;
  }

  int main(int argc, char *argv[])
  {
          double c;

          c = float_add(-0.1, 0.2);
          fprintf(stderr, "c = %f\n", c);
          return c > 0;
  }

Before:
  $ uftrace -a -F main a.out
  a = 0.000000, b = 0.000000
  c = 0.000000
  # DURATION     TID     FUNCTION
              [ 25362] | main(1, 0x7ea25344) {
              [ 25362] |   float_add(0.000000, 0.000000) {
   503.228 us [ 25362] |     fprintf(&_IO_2_1_stderr_, "a = %f, b = %f\n") = 27;
   511.197 us [ 25362] |   } = 0.000000; /* float_add */
     9.687 us [ 25362] |   fprintf(&_IO_2_1_stderr_, "c = %f\n") = 13;
   531.977 us [ 25362] | } = 0; /* main */

After:
  $ uftrace -a -F main a.out
  a = -0.100000, b = 0.200000
  c = 0.100000
  # DURATION     TID     FUNCTION
              [ 25146] | main(1, 0x7edbb344) {
              [ 25146] |   float_add(-0.100000, 0.200000) {
   501.769 us [ 25146] |     fprintf(&_IO_2_1_stderr_, "a = %f, b = %f\n") = 28;
   509.321 us [ 25146] |   } = 0.100000; /* float_add */
    12.500 us [ 25146] |   fprintf(&_IO_2_1_stderr_, "c = %f\n") = 13;
   533.539 us [ 25146] | } = 1; /* main */

Fixed: namhyung#1088

Signed-off-by: Honggyu Kim <[email protected]>
honggyukim added a commit to honggyukim/uftrace that referenced this issue Mar 5, 2020
The designated initializer in C makes implicit memset internally in arm
architecture.  Due to the memset, VFP registers are clobbered
unexpectedly, so this patch fixes the problem by replacing the
designated initializer to explicit 'mcount_memset4'.

In addition, the offset of VFP registers were incorrect because the
following commit pushs d1 register on top of the original d0.

  ffb69ce arm: Handle struct return type by keeping more regs on stack

So the offset is adjusted from -2 to -4 to cope with the change.

Source:
  #include <stdio.h>

  float float_add(float a, float b)
  {
          fprintf(stderr, "a = %f, b = %f\n", a, b);
          return a + b;
  }

  int main(int argc, char *argv[])
  {
          double c;

          c = float_add(-0.1, 0.2);
          fprintf(stderr, "c = %f\n", c);
          return c > 0;
  }

Before:
  $ uftrace -a -F main a.out
  a = 0.000000, b = 0.000000
  c = 0.000000
  # DURATION     TID     FUNCTION
              [ 25362] | main(1, 0x7ea25344) {
              [ 25362] |   float_add(0.000000, 0.000000) {
   503.228 us [ 25362] |     fprintf(&_IO_2_1_stderr_, "a = %f, b = %f\n") = 27;
   511.197 us [ 25362] |   } = 0.000000; /* float_add */
     9.687 us [ 25362] |   fprintf(&_IO_2_1_stderr_, "c = %f\n") = 13;
   531.977 us [ 25362] | } = 0; /* main */

After:
  $ uftrace -a -F main a.out
  a = -0.100000, b = 0.200000
  c = 0.100000
  # DURATION     TID     FUNCTION
              [ 25146] | main(1, 0x7edbb344) {
              [ 25146] |   float_add(-0.100000, 0.200000) {
   501.769 us [ 25146] |     fprintf(&_IO_2_1_stderr_, "a = %f, b = %f\n") = 28;
   509.321 us [ 25146] |   } = 0.100000; /* float_add */
    12.500 us [ 25146] |   fprintf(&_IO_2_1_stderr_, "c = %f\n") = 13;
   533.539 us [ 25146] | } = 1; /* main */

Fixed: namhyung#1088

Signed-off-by: Honggyu Kim <[email protected]>
honggyukim added a commit to honggyukim/uftrace that referenced this issue Mar 9, 2020
The designated initializer in C makes implicit memset internally in arm
architecture.  Due to the memset, VFP registers are clobbered
unexpectedly, so this patch fixes the problem by replacing the
designated initializer to explicit 'mcount_memset4'.

In addition, the offset of VFP registers were incorrect because the
following commit pushs d1 register on top of the original d0.

  ffb69ce arm: Handle struct return type by keeping more regs on stack

So the offset is adjusted from -2 to -4 to cope with the change.

Source:
  #include <stdio.h>

  float float_add(float a, float b)
  {
          fprintf(stderr, "a = %f, b = %f\n", a, b);
          return a + b;
  }

  int main(int argc, char *argv[])
  {
          double c;

          c = float_add(-0.1, 0.2);
          fprintf(stderr, "c = %f\n", c);
          return c > 0;
  }

Before:
  $ uftrace -a -F main a.out
  a = 0.000000, b = 0.000000
  c = 0.000000
  # DURATION     TID     FUNCTION
              [ 25362] | main(1, 0x7ea25344) {
              [ 25362] |   float_add(0.000000, 0.000000) {
   503.228 us [ 25362] |     fprintf(&_IO_2_1_stderr_, "a = %f, b = %f\n") = 27;
   511.197 us [ 25362] |   } = 0.000000; /* float_add */
     9.687 us [ 25362] |   fprintf(&_IO_2_1_stderr_, "c = %f\n") = 13;
   531.977 us [ 25362] | } = 0; /* main */

After:
  $ uftrace -a -F main a.out
  a = -0.100000, b = 0.200000
  c = 0.100000
  # DURATION     TID     FUNCTION
              [ 25146] | main(1, 0x7edbb344) {
              [ 25146] |   float_add(-0.100000, 0.200000) {
   501.769 us [ 25146] |     fprintf(&_IO_2_1_stderr_, "a = %f, b = %f\n") = 28;
   509.321 us [ 25146] |   } = 0.100000; /* float_add */
    12.500 us [ 25146] |   fprintf(&_IO_2_1_stderr_, "c = %f\n") = 13;
   533.539 us [ 25146] | } = 1; /* main */

Fixed: namhyung#1088

Signed-off-by: Honggyu Kim <[email protected]>
honggyukim added a commit to honggyukim/uftrace that referenced this issue Mar 15, 2020
The designated initializer in C makes implicit memset internally in arm
architecture.  Due to the memset, VFP registers are clobbered
unexpectedly, so this patch fixes the problem by replacing the
designated initializer to explicit 'mcount_memset4'.

In addition, the offset of VFP registers were incorrect because the
following commit pushs d1 register on top of the original d0.

  ffb69ce arm: Handle struct return type by keeping more regs on stack

So the offset is adjusted from -2 to -4 to cope with the change.

Source:
  #include <stdio.h>

  float float_add(float a, float b)
  {
          fprintf(stderr, "a = %f, b = %f\n", a, b);
          return a + b;
  }

  int main(int argc, char *argv[])
  {
          double c;

          c = float_add(-0.1, 0.2);
          fprintf(stderr, "c = %f\n", c);
          return c > 0;
  }

Before:
  $ uftrace -a -F main a.out
  a = 0.000000, b = 0.000000
  c = 0.000000
  # DURATION     TID     FUNCTION
              [ 25362] | main(1, 0x7ea25344) {
              [ 25362] |   float_add(0.000000, 0.000000) {
   503.228 us [ 25362] |     fprintf(&_IO_2_1_stderr_, "a = %f, b = %f\n") = 27;
   511.197 us [ 25362] |   } = 0.000000; /* float_add */
     9.687 us [ 25362] |   fprintf(&_IO_2_1_stderr_, "c = %f\n") = 13;
   531.977 us [ 25362] | } = 0; /* main */

After:
  $ uftrace -a -F main a.out
  a = -0.100000, b = 0.200000
  c = 0.100000
  # DURATION     TID     FUNCTION
              [ 25146] | main(1, 0x7edbb344) {
              [ 25146] |   float_add(-0.100000, 0.200000) {
   501.769 us [ 25146] |     fprintf(&_IO_2_1_stderr_, "a = %f, b = %f\n") = 28;
   509.321 us [ 25146] |   } = 0.100000; /* float_add */
    12.500 us [ 25146] |   fprintf(&_IO_2_1_stderr_, "c = %f\n") = 13;
   533.539 us [ 25146] | } = 1; /* main */

Fixed: namhyung#1088

Signed-off-by: Honggyu Kim <[email protected]>
namhyung pushed a commit that referenced this issue Mar 16, 2020
The designated initializer in C makes implicit memset internally in arm
architecture.  Due to the memset, VFP registers are clobbered
unexpectedly, so this patch fixes the problem by replacing the
designated initializer to explicit 'mcount_memset4'.

In addition, the offset of VFP registers were incorrect because the
following commit pushs d1 register on top of the original d0.

  ffb69ce arm: Handle struct return type by keeping more regs on stack

So the offset is adjusted from -2 to -4 to cope with the change.

Source:
  #include <stdio.h>

  float float_add(float a, float b)
  {
          fprintf(stderr, "a = %f, b = %f\n", a, b);
          return a + b;
  }

  int main(int argc, char *argv[])
  {
          double c;

          c = float_add(-0.1, 0.2);
          fprintf(stderr, "c = %f\n", c);
          return c > 0;
  }

Before:
  $ uftrace -a -F main a.out
  a = 0.000000, b = 0.000000
  c = 0.000000
  # DURATION     TID     FUNCTION
              [ 25362] | main(1, 0x7ea25344) {
              [ 25362] |   float_add(0.000000, 0.000000) {
   503.228 us [ 25362] |     fprintf(&_IO_2_1_stderr_, "a = %f, b = %f\n") = 27;
   511.197 us [ 25362] |   } = 0.000000; /* float_add */
     9.687 us [ 25362] |   fprintf(&_IO_2_1_stderr_, "c = %f\n") = 13;
   531.977 us [ 25362] | } = 0; /* main */

After:
  $ uftrace -a -F main a.out
  a = -0.100000, b = 0.200000
  c = 0.100000
  # DURATION     TID     FUNCTION
              [ 25146] | main(1, 0x7edbb344) {
              [ 25146] |   float_add(-0.100000, 0.200000) {
   501.769 us [ 25146] |     fprintf(&_IO_2_1_stderr_, "a = %f, b = %f\n") = 28;
   509.321 us [ 25146] |   } = 0.100000; /* float_add */
    12.500 us [ 25146] |   fprintf(&_IO_2_1_stderr_, "c = %f\n") = 13;
   533.539 us [ 25146] | } = 1; /* main */

Fixed: #1088

Signed-off-by: Honggyu Kim <[email protected]>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging a pull request may close this issue.

2 participants