-
Notifications
You must be signed in to change notification settings - Fork 474
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
arm floating point argument failures #1088
Comments
Here is another problem in return value recording.
|
It seems that 1│ Dump of assembler code for function save_argument:
2│ /home/honggyu/work/uftrace/libmcount/record.c:
3│ 503 {
4│ 0x76f8a518 <+0>: push {r4, r11, lr}
5│ 0x76f8a51c <+4>: add r11, sp, #8
6│ 0x76f8a520 <+8>: sub sp, sp, #84 ; 0x54
7│ 0x76f8a524 <+12>: str r0, [r11, #-80] ; 0xffffffb0
8│ 0x76f8a528 <+16>: str r1, [r11, #-84] ; 0xffffffac
9│ 0x76f8a52c <+20>: str r2, [r11, #-88] ; 0xffffffa8
10│ 0x76f8a530 <+24>: str r3, [r11, #-92] ; 0xffffffa4
11│ 0x76f8a534 <+28>: sub r3, r11, #12
12│ 0x76f8a538 <+32>: sub r3, r3, #64 ; 0x40
13│ 0x76f8a53c <+36>: add r3, r3, #15
14│ 0x76f8a540 <+40>: lsr r3, r3, #4
15│ 0x76f8a544 <+44>: lsl r4, r3, #4
16│
17│ 504 void *argbuf = get_argbuf(mtdp, rstack);
18│ 0x76f8a548 <+48>: ldr r1, [r11, #-84] ; 0xffffffac
19│ 0x76f8a54c <+52>: ldr r0, [r11, #-80] ; 0xffffffb0
20│ 0x76f8a550 <+56>: bl 0x76f89ac8 <get_argbuf>
21│ 0x76f8a554 <+60>: str r0, [r11, #-16]
22│
23│ 505 unsigned size;
24│ 506 struct mcount_arg_context ctx = {
25│ 0x76f8a558 <+64>: mov r0, r4
26│ 0x76f8a55c <+68>: mov r3, #48 ; 0x30
27│ 0x76f8a560 <+72>: mov r2, r3
28│ 0x76f8a564 <+76>: mov r1, #0
29│ 0x76f8a568 <+80>: bl 0x76f7e384 <memset@plt>
30├──> 0x76f8a56c <+84>: ldr r3, [r11, #-92] ; 0xffffffa4
31│ 0x76f8a570 <+88>: str r3, [r4]
32│
33│ 507 .regs = regs,
34│ 508 .stack_base = rstack->parent_loc,
35│ 0x76f8a574 <+922>: ldr r3, [r11, #-84] ; 0xffffffac
36│ 0x76f8a578 <+966>: ldr r3, [r3]
37│
38│ 506 struct mcount_arg_context ctx = {
39│ 0x76f8a57c <+100>: str r3, [r4, #4]
40│
41│ 509 .regions = &mtdp->mem_regions,
42│ 0x76f8a580 <+104>: ldr r3, [r11, #-80] ; 0xffffffb0
43│ 0x76f8a584 <+108>: add r3, r3, #4160 ; 0x1040
44│ 0x76f8a588 <+112>: add r3, r3, #36 ; 0x24 The original source code in C doesn't have such 499│ void save_argument(struct mcount_thread_data *mtdp,
500│ struct mcount_ret_stack *rstack,
501│ struct list_head *args_spec,
502│ struct mcount_regs *regs)
503│ {
504│ void *argbuf = get_argbuf(mtdp, rstack);
505│ unsigned size;
506├───────> struct mcount_arg_context ctx = {
507│ .regs = regs,
508│ .stack_base = rstack->parent_loc,
509│ .regions = &mtdp->mem_regions,
510│ .arch = &mtdp->arch,
511│ };
512│
513│ size = save_to_argbuf(argbuf, args_spec, &ctx);
514│ if (size == -1U) {
515│ pr_warn("argument data is too big\n");
516│ return;
517│ }
518│
519│ *(unsigned *)argbuf = size;
520│ rstack->flags |= MCOUNT_FL_ARGUMENT;
521│ } |
Here is the The instruction in line 3 overwrites 1│ Dump of assembler code for function memset:
2│ 0x76f690f8 <+0>: mov r3, r0
3├──> 0x76f690fc <+4>: vdup.8 q0, r1
4│ 0x76f69100 <+8>: cmp r2, #79 ; 0x4f
5│ 0x76f69104 <+12>: vdup.8 q1, r1
6│ 0x76f69108 <+16>: bcc 0x76f69190 <memset+152>
7│ 0x76f6910c <+20>: ands r12, r0, #15
8│ 0x76f69110 <+24>: beq 0x76f69140 <memset+72>
9│ 0x76f69114 <+28>: rsb r12, r12, #16
10│ 0x76f69118 <+32>: sub r2, r2, r12
11│ 0x76f6911c <+36>: ; <UNDEFINED> instruction: 0xe6ffcf3c
12│ 0x76f69120 <+40>: tst r0, #1
13│ 0x76f69124 <+44>: strbne r1, [r3], #1
14│ 0x76f69128 <+48>: lsls r12, r12, #2
15│ 0x76f6912c <+52>: strbcs r1, [r3, #1]
16│ 0x76f69130 <+56>: strbcs r1, [r3], #2
17│ 0x76f69134 <+60>: vstmiami r3!, {s0}
18│ 0x76f69138 <+64>: lsls r12, r12, #2
19│ 0x76f6913c <+68>: vstmiacs r3!, {d0}
20│ 0x76f69140 <+72>: mov r12, #64 ; 0x40
21│ 0x76f69144 <+76>: sub r2, r2, #64 ; 0x40
22│ 0x76f69148 <+80>: add r1, r3, #32
23│ 0x76f6914c <+84>: vst1.8 {d0-d3}, [r3 :128], r12
24│ 0x76f69150 <+88>: subs r2, r2, #64 ; 0x40
25│ 0x76f69154 <+92>: vst1.8 {d0-d3}, [r1 :128], r12
26│ 0x76f69158 <+96>: bcs 0x76f6914c <memset+84>
27│ 0x76f6915c <+100>: vmov.32 r1, d0[0]
28│ 0x76f69160 <+104>: lsls r2, r2, #27
29│ 0x76f69164 <+108>: bcc 0x76f6916c <memset+116>
30│ 0x76f69168 <+112>: vst1.8 {d0-d3}, [r3]!
31│ 0x76f6916c <+116>: bpl 0x76f69174 <memset+124>
32│ 0x76f69170 <+120>: vst1.8 {d0-d1}, [r3]!
33│ 0x76f69174 <+124>: lsls r2, r2, #2
34│ 0x76f69178 <+128>: vstmiacs r3!, {d0}
35│ 0x76f6917c <+132>: strmi r1, [r3], #4
36│ 0x76f69180 <+136>: lsls r2, r2, #2
37│ 0x76f69184 <+140>: strhcs r1, [r3], #2
38│ 0x76f69188 <+144>: strbmi r1, [r3]
39│ 0x76f6918c <+148>: bx lr
40│ 0x76f69190 <+152>: tst r3, #3
41│ 0x76f69194 <+156>: beq 0x76f691ac <memset+180>
42│ 0x76f69198 <+160>: subs r2, r2, #1
43│ 0x76f6919c <+164>: bcc 0x76f6918c <memset+148>
44│ 0x76f691a0 <+168>: strb r1, [r3], #1
45│ 0x76f691a4 <+172>: tst r3, #3
46│ 0x76f691a8 <+176>: bne 0x76f69198 <memset+160>
47│ 0x76f691ac <+180>: cmp r2, #32
48│ 0x76f691b0 <+184>: bcc 0x76f6915c <memset+100>
49│ 0x76f691b4 <+188>: vst1.8 {d0-d3}, [r3]!
50│ 0x76f691b8 <+192>: sub r2, r2, #32
51│ 0x76f691bc <+196>: b 0x76f6915c <memset+100>
52│ End of assembler dump. |
I manually changed the code as follows: diff --git a/libmcount/record.c b/libmcount/record.c
index 4bf383c5..bdf4711b 100644
--- a/libmcount/record.c
+++ b/libmcount/record.c
@@ -503,12 +503,21 @@ void save_argument(struct mcount_thread_data *mtdp,
{
void *argbuf = get_argbuf(mtdp, rstack);
unsigned size;
+#if 0
struct mcount_arg_context ctx = {
.regs = regs,
.stack_base = rstack->parent_loc,
.regions = &mtdp->mem_regions,
.arch = &mtdp->arch,
};
+#else
+ struct mcount_arg_context ctx;
+ mcount_memset1(&ctx, 0, sizeof(ctx));
+ ctx.regs = regs;
+ ctx.stack_base = rstack->parent_loc;
+ ctx.regions = &mtdp->mem_regions;
+ ctx.arch = &mtdp->arch;
+#endif
size = save_to_argbuf(argbuf, args_spec, &ctx);
if (size == -1U) { At least it doesn't change the original value printed by the program.
However, it still doesn't record the argument properly.
There might be somewhere else to be fixed together.
|
I think the right fix might be possible if there is a way to disable such implicit usage of |
The return value can be fixed with the following change. diff --git a/arch/arm/mcount-support.c b/arch/arm/mcount-support.c
index e609c2f9..4335e854 100644
--- a/arch/arm/mcount-support.c
+++ b/arch/arm/mcount-support.c
@@ -488,8 +488,10 @@ void mcount_arch_get_retval(struct mcount_arg_context *ctx,
/* type of return value cannot be FLOAT, so check format instead */
#ifdef HAVE_ARM_HARDFP
if (spec->fmt == ARG_FMT_FLOAT && use_hard_float) {
- /* d0 register (64 bit) was saved below the r0 */
- memcpy(ctx->val.v, ctx->retval - 2, spec->size);
+ /* d0, d1 registers (64 bit) were saved below the r0 */
+ long *float_retval = ctx->retval - 4;
+
+ memcpy(ctx->val.v, float_retval, spec->size);
}
else
#endif /* HAVE_ARM_HARDFP */ The following commit should have also modified it as above.
The return value is correct now.
|
The summarized change is as follows: diff --git a/arch/arm/mcount-support.c b/arch/arm/mcount-support.c
index e609c2f9..4335e854 100644
--- a/arch/arm/mcount-support.c
+++ b/arch/arm/mcount-support.c
@@ -488,8 +488,10 @@ void mcount_arch_get_retval(struct mcount_arg_context *ctx,
/* type of return value cannot be FLOAT, so check format instead */
#ifdef HAVE_ARM_HARDFP
if (spec->fmt == ARG_FMT_FLOAT && use_hard_float) {
- /* d0 register (64 bit) was saved below the r0 */
- memcpy(ctx->val.v, ctx->retval - 2, spec->size);
+ /* d0, d1 registers (64 bit) were saved below the r0 */
+ long *float_retval = ctx->retval - 4;
+
+ memcpy(ctx->val.v, float_retval, spec->size);
}
else
#endif /* HAVE_ARM_HARDFP */
diff --git a/libmcount/record.c b/libmcount/record.c
index 4bf383c5..b3ea9dcd 100644
--- a/libmcount/record.c
+++ b/libmcount/record.c
@@ -503,12 +503,13 @@ void save_argument(struct mcount_thread_data *mtdp,
{
void *argbuf = get_argbuf(mtdp, rstack);
unsigned size;
- struct mcount_arg_context ctx = {
- .regs = regs,
- .stack_base = rstack->parent_loc,
- .regions = &mtdp->mem_regions,
- .arch = &mtdp->arch,
- };
+ struct mcount_arg_context ctx;
+
+ mcount_memset1(&ctx, 0, sizeof(ctx));
+ ctx.regs = regs;
+ ctx.stack_base = rstack->parent_loc;
+ ctx.regions = &mtdp->mem_regions;
+ ctx.arch = &mtdp->arch;
size = save_to_argbuf(argbuf, args_spec, &ctx);
if (size == -1U) {
@@ -526,11 +527,12 @@ void save_retval(struct mcount_thread_data *mtdp,
struct list_head *args_spec = rstack->pargs;
void *argbuf = get_argbuf(mtdp, rstack);
unsigned size;
- struct mcount_arg_context ctx = {
- .retval = retval,
- .regions = &mtdp->mem_regions,
- .arch = &mtdp->arch,
- };
+ struct mcount_arg_context ctx;
+
+ mcount_memset1(&ctx, 0, sizeof(ctx));
+ ctx.retval = retval;
+ ctx.regions = &mtdp->mem_regions;
+ ctx.arch = &mtdp->arch;
size = save_to_argbuf(argbuf, args_spec, &ctx);
if (size == -1U) { |
However, the above change doesn't fix the following example. $ cat float2.c
#include <stdio.h>
#include <math.h>
int main(int argc, char *argv[])
{
float a = 1.0f;
float e = expf(a);
fprintf(stderr, "expf(%f) = %f\n", a, e);
double one = log(e);
fprintf(stderr, "log(%f) = %lf\n", e, one);
return one - 1;
}
It's slightly changed example based on |
To fix the above problem, more change is needed in plthook routine as well. diff --git a/libmcount/plthook.c b/libmcount/plthook.c
index 2ce62cce..160a0c3c 100644
--- a/libmcount/plthook.c
+++ b/libmcount/plthook.c
@@ -732,9 +732,6 @@ static unsigned long __plthook_entry(unsigned long *ret_addr,
struct sym *sym;
struct mcount_thread_data *mtdp = NULL;
struct mcount_ret_stack *rstack;
- struct uftrace_trigger tr = {
- .flags = 0,
- };
bool skip = false;
bool recursion = true;
enum filter_result filtered;
@@ -742,6 +739,10 @@ static unsigned long __plthook_entry(unsigned long *ret_addr,
struct plthook_special_func *func;
unsigned long special_flag = 0;
unsigned long real_addr = 0;
+ struct uftrace_trigger tr;
+
+ mcount_memset1(&tr, 0, sizeof(tr));
+ tr.flags = 0;
// if necessary, implement it by architecture.
child_idx = mcount_arch_child_idx(child_idx); Now, the result is correct.
|
The above change fixes floating point related tests in ARM 32-bit.
But it requires the following test fix as well. diff --git a/tests/t083_arg_float.py b/tests/t083_arg_float.py
index e4f2de8b..02410436 100644
--- a/tests/t083_arg_float.py
+++ b/tests/t083_arg_float.py
@@ -31,7 +31,5 @@ class TestCase(TestBase):
# argument count follows the size of type
argopt = argopt.replace('float_mul@fparg1/64,fparg2/32',
'float_mul@fparg1/64,fparg3/32')
- argopt = argopt.replace('float_div@fparg1,fparg2',
- 'float_div@fparg1,fparg3')
return '%s %s %s' % (TestBase.uftrace_cmd, argopt, 't-' + self.name) @namhyung What do you think about this approach? |
@namhyung Could you please take a look at this? |
Sorry for the late reply. Both changes are look good. Since ffb69ce offset from |
The designated initializer in C makes implicit memset internally in arm architecture. Due to the memset, VFP registers are clobbered unexpectedly, so this patch fixes the problem by replacing the designated initializer to explicit 'mcount_memset4'. Source: #include <stdio.h> float float_add(float a, float b) { fprintf(stderr, "a = %f, b = %f\n", a, b); return a + b; } int main(int argc, char *argv[]) { double c; c = float_add(-0.1, 0.2); fprintf(stderr, "c = %f\n", c); return c > 0; } Before: $ uftrace -a -F main a.out a = 0.000000, b = 0.000000 c = 0.000000 # DURATION TID FUNCTION [ 25362] | main(1, 0x7ea25344) { [ 25362] | float_add(0.000000, 0.000000) { 503.228 us [ 25362] | fprintf(&_IO_2_1_stderr_, "a = %f, b = %f\n") = 27; 511.197 us [ 25362] | } = 0.000000; /* float_add */ 9.687 us [ 25362] | fprintf(&_IO_2_1_stderr_, "c = %f\n") = 13; 531.977 us [ 25362] | } = 0; /* main */ After: $ uftrace -a -F main a.out a = -0.100000, b = 0.200000 c = 0.100000 # DURATION TID FUNCTION [ 25146] | main(1, 0x7edbb344) { [ 25146] | float_add(-0.100000, 0.200000) { 501.769 us [ 25146] | fprintf(&_IO_2_1_stderr_, "a = %f, b = %f\n") = 28; 509.321 us [ 25146] | } = 0.100000; /* float_add */ 12.500 us [ 25146] | fprintf(&_IO_2_1_stderr_, "c = %f\n") = 13; 533.539 us [ 25146] | } = 1; /* main */ Fixed: namhyung#1088 Signed-off-by: Honggyu Kim <[email protected]>
The designated initializer in C makes implicit memset internally in arm architecture. Due to the memset, VFP registers are clobbered unexpectedly, so this patch fixes the problem by replacing the designated initializer to explicit 'mcount_memset4'. In addition, the offset of VFP registers were incorrect because the following commit pushs d1 register on top of the original d0. ffb69ce arm: Handle struct return type by keeping more regs on stack So the offset is adjusted from -2 to -4 to cope with the change. Source: #include <stdio.h> float float_add(float a, float b) { fprintf(stderr, "a = %f, b = %f\n", a, b); return a + b; } int main(int argc, char *argv[]) { double c; c = float_add(-0.1, 0.2); fprintf(stderr, "c = %f\n", c); return c > 0; } Before: $ uftrace -a -F main a.out a = 0.000000, b = 0.000000 c = 0.000000 # DURATION TID FUNCTION [ 25362] | main(1, 0x7ea25344) { [ 25362] | float_add(0.000000, 0.000000) { 503.228 us [ 25362] | fprintf(&_IO_2_1_stderr_, "a = %f, b = %f\n") = 27; 511.197 us [ 25362] | } = 0.000000; /* float_add */ 9.687 us [ 25362] | fprintf(&_IO_2_1_stderr_, "c = %f\n") = 13; 531.977 us [ 25362] | } = 0; /* main */ After: $ uftrace -a -F main a.out a = -0.100000, b = 0.200000 c = 0.100000 # DURATION TID FUNCTION [ 25146] | main(1, 0x7edbb344) { [ 25146] | float_add(-0.100000, 0.200000) { 501.769 us [ 25146] | fprintf(&_IO_2_1_stderr_, "a = %f, b = %f\n") = 28; 509.321 us [ 25146] | } = 0.100000; /* float_add */ 12.500 us [ 25146] | fprintf(&_IO_2_1_stderr_, "c = %f\n") = 13; 533.539 us [ 25146] | } = 1; /* main */ Fixed: namhyung#1088 Signed-off-by: Honggyu Kim <[email protected]>
The designated initializer in C makes implicit memset internally in arm architecture. Due to the memset, VFP registers are clobbered unexpectedly, so this patch fixes the problem by replacing the designated initializer to explicit 'mcount_memset4'. In addition, the offset of VFP registers were incorrect because the following commit pushs d1 register on top of the original d0. ffb69ce arm: Handle struct return type by keeping more regs on stack So the offset is adjusted from -2 to -4 to cope with the change. Source: #include <stdio.h> float float_add(float a, float b) { fprintf(stderr, "a = %f, b = %f\n", a, b); return a + b; } int main(int argc, char *argv[]) { double c; c = float_add(-0.1, 0.2); fprintf(stderr, "c = %f\n", c); return c > 0; } Before: $ uftrace -a -F main a.out a = 0.000000, b = 0.000000 c = 0.000000 # DURATION TID FUNCTION [ 25362] | main(1, 0x7ea25344) { [ 25362] | float_add(0.000000, 0.000000) { 503.228 us [ 25362] | fprintf(&_IO_2_1_stderr_, "a = %f, b = %f\n") = 27; 511.197 us [ 25362] | } = 0.000000; /* float_add */ 9.687 us [ 25362] | fprintf(&_IO_2_1_stderr_, "c = %f\n") = 13; 531.977 us [ 25362] | } = 0; /* main */ After: $ uftrace -a -F main a.out a = -0.100000, b = 0.200000 c = 0.100000 # DURATION TID FUNCTION [ 25146] | main(1, 0x7edbb344) { [ 25146] | float_add(-0.100000, 0.200000) { 501.769 us [ 25146] | fprintf(&_IO_2_1_stderr_, "a = %f, b = %f\n") = 28; 509.321 us [ 25146] | } = 0.100000; /* float_add */ 12.500 us [ 25146] | fprintf(&_IO_2_1_stderr_, "c = %f\n") = 13; 533.539 us [ 25146] | } = 1; /* main */ Fixed: namhyung#1088 Signed-off-by: Honggyu Kim <[email protected]>
The designated initializer in C makes implicit memset internally in arm architecture. Due to the memset, VFP registers are clobbered unexpectedly, so this patch fixes the problem by replacing the designated initializer to explicit 'mcount_memset4'. In addition, the offset of VFP registers were incorrect because the following commit pushs d1 register on top of the original d0. ffb69ce arm: Handle struct return type by keeping more regs on stack So the offset is adjusted from -2 to -4 to cope with the change. Source: #include <stdio.h> float float_add(float a, float b) { fprintf(stderr, "a = %f, b = %f\n", a, b); return a + b; } int main(int argc, char *argv[]) { double c; c = float_add(-0.1, 0.2); fprintf(stderr, "c = %f\n", c); return c > 0; } Before: $ uftrace -a -F main a.out a = 0.000000, b = 0.000000 c = 0.000000 # DURATION TID FUNCTION [ 25362] | main(1, 0x7ea25344) { [ 25362] | float_add(0.000000, 0.000000) { 503.228 us [ 25362] | fprintf(&_IO_2_1_stderr_, "a = %f, b = %f\n") = 27; 511.197 us [ 25362] | } = 0.000000; /* float_add */ 9.687 us [ 25362] | fprintf(&_IO_2_1_stderr_, "c = %f\n") = 13; 531.977 us [ 25362] | } = 0; /* main */ After: $ uftrace -a -F main a.out a = -0.100000, b = 0.200000 c = 0.100000 # DURATION TID FUNCTION [ 25146] | main(1, 0x7edbb344) { [ 25146] | float_add(-0.100000, 0.200000) { 501.769 us [ 25146] | fprintf(&_IO_2_1_stderr_, "a = %f, b = %f\n") = 28; 509.321 us [ 25146] | } = 0.100000; /* float_add */ 12.500 us [ 25146] | fprintf(&_IO_2_1_stderr_, "c = %f\n") = 13; 533.539 us [ 25146] | } = 1; /* main */ Fixed: namhyung#1088 Signed-off-by: Honggyu Kim <[email protected]>
The designated initializer in C makes implicit memset internally in arm architecture. Due to the memset, VFP registers are clobbered unexpectedly, so this patch fixes the problem by replacing the designated initializer to explicit 'mcount_memset4'. In addition, the offset of VFP registers were incorrect because the following commit pushs d1 register on top of the original d0. ffb69ce arm: Handle struct return type by keeping more regs on stack So the offset is adjusted from -2 to -4 to cope with the change. Source: #include <stdio.h> float float_add(float a, float b) { fprintf(stderr, "a = %f, b = %f\n", a, b); return a + b; } int main(int argc, char *argv[]) { double c; c = float_add(-0.1, 0.2); fprintf(stderr, "c = %f\n", c); return c > 0; } Before: $ uftrace -a -F main a.out a = 0.000000, b = 0.000000 c = 0.000000 # DURATION TID FUNCTION [ 25362] | main(1, 0x7ea25344) { [ 25362] | float_add(0.000000, 0.000000) { 503.228 us [ 25362] | fprintf(&_IO_2_1_stderr_, "a = %f, b = %f\n") = 27; 511.197 us [ 25362] | } = 0.000000; /* float_add */ 9.687 us [ 25362] | fprintf(&_IO_2_1_stderr_, "c = %f\n") = 13; 531.977 us [ 25362] | } = 0; /* main */ After: $ uftrace -a -F main a.out a = -0.100000, b = 0.200000 c = 0.100000 # DURATION TID FUNCTION [ 25146] | main(1, 0x7edbb344) { [ 25146] | float_add(-0.100000, 0.200000) { 501.769 us [ 25146] | fprintf(&_IO_2_1_stderr_, "a = %f, b = %f\n") = 28; 509.321 us [ 25146] | } = 0.100000; /* float_add */ 12.500 us [ 25146] | fprintf(&_IO_2_1_stderr_, "c = %f\n") = 13; 533.539 us [ 25146] | } = 1; /* main */ Fixed: #1088 Signed-off-by: Honggyu Kim <[email protected]>
Here is a test program.
It shows floating point numbers in normal execution.
It also shows correct numbers with simple uftrace record execution.
However, the floating point numbers are somehow printed incorrect values if uftrace tries to record an argument.
The result is same with
--auto-args
.The text was updated successfully, but these errors were encountered: