Immutable constructors should be inlined even when they have a lot of arguments #12165

simonster · 2015-07-15T17:42:35Z

e.g.:

julia> immutable X
           a::Int
           b::Int
           c::Int
           d::Int
           e::Int
           f::Int
           g::Int
           h::Int
           i::Int
       end

julia> f() = X(1, 2, 3, 4, 5, 6, 7, 8, 9);

julia> @code_llvm f()

define %X @julia_f_21095() {
top:
  %0 = load %jl_value_t** inttoptr (i64 4589611400 to %jl_value_t**), align 8
  %1 = call %X @julia_call_21096(%jl_value_t* %0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9)
  ret %X %1
}

The work being done here still needs to be done even if the constructor is not inlined. All making a function call to the constructor does is needlessly move memory around and make it harder for LLVM to optimize things.

JeffBezanson · 2015-07-15T17:47:23Z

Inlining big things everywhere has a cost in code size.

JeffBezanson · 2015-07-15T17:48:04Z

Removing performance tag until there is performance data.

vtjnash · 2015-07-15T17:52:08Z

there's a likelihood that the inlining threshold should be proportional to the number of arguments to the function.

simonster · 2015-07-15T17:52:45Z

The code should actually be smaller for this case if you inline. Here's what it looks like just below the inlining threshold:

julia> immutable X
           a::Int
           b::Int
           c::Int
           d::Int
           e::Int
           f::Int
           g::Int
       end

julia> f() = X(1, 2, 3, 4, 5, 6, 7);

julia> @code_native f()
    .section    __TEXT,__text,regular,pure_instructions
Filename: none
Source line: 1
    pushq   %rbp
    movq    %rsp, %rbp
Source line: 1
    movq    $7, 48(%rdi)
    movq    $6, 40(%rdi)
    movq    $5, 32(%rdi)
    movq    $4, 24(%rdi)
    movq    $3, 16(%rdi)
    movq    $2, 8(%rdi)
    movq    $1, (%rdi)
    popq    %rbp
    ret

and here's an example just above the inlining threshold:

julia> immutable X
           a::Int
           b::Int
           c::Int
           d::Int
           e::Int
           f::Int
           g::Int
           i::Int
       end

julia> f() = X(1, 2, 3, 4, 5, 6, 7, 8);

julia> @code_native f()
    .section    __TEXT,__text,regular,pure_instructions
Filename: none
Source line: 1
    pushq   %rbp
    movq    %rsp, %rbp
    pushq   %rbx
    subq    $104, %rsp
    movq    %rdi, %rbx
    movabsq $4560738376, %rax       ## imm = 0x10FD75848
Source line: 1
    movq    (%rax), %rsi
    movq    $8, 24(%rsp)
    movq    $7, 16(%rsp)
    movq    $6, 8(%rsp)
    movq    $5, (%rsp)
    leaq    -72(%rbp), %rdi
    movabsq $13335795376, %rax      ## imm = 0x31AE016B0
    movl    $1, %edx
    movl    $2, %ecx
    movl    $3, %r8d
    movl    $4, %r9d
    callq   *%rax
    movq    -40(%rbp), %rax
    movq    -32(%rbp), %rcx
    movq    -24(%rbp), %rdx
    movq    -16(%rbp), %rsi
    movq    %rsi, 56(%rbx)
    movq    %rdx, 48(%rbx)
    movq    %rcx, 40(%rbx)
    movq    %rax, 32(%rbx)
    movq    -48(%rbp), %rax
    movq    %rax, 24(%rbx)
    movq    -56(%rbp), %rax
    movq    %rax, 16(%rbx)
    movq    -72(%rbp), %rax
    movq    -64(%rbp), %rcx
    movq    %rcx, 8(%rbx)
    movq    %rax, (%rbx)
    addq    $104, %rsp
    popq    %rbx
    popq    %rbp
    ret

JeffBezanson · 2015-07-15T17:56:47Z

Yes, @vtjnash is probably right that having more arguments should increase the chance of inlining, especially once you get to more than ~5 arguments or so.

Keno · 2015-07-15T18:03:35Z

Another big issue here is the lack of sret on that return parameter causing the extra copy after the callq.

simonster · 2015-07-15T18:50:27Z

Okay, here's a stripped down version of the code where I noticed that the code I got was not what I expected. When this can be inlined, not only is there less unnecessary memory movement going on, but LLVM can perform fewer comparisons because it knows exactly what needs to be compared.

immutable FloatingPointDatatype
    class::UInt8
    bitfield1::UInt8
    bitfield2::UInt8
    bitfield3::UInt8
    size::UInt32
    bitoffset::UInt16
    bitprecision::UInt16
    exponentlocation::UInt8
    exponentsize::UInt8
    mantissalocation::UInt8
    mantissasize::UInt8
    exponentbias::UInt32
end

h5type(::Type{Float16}) =
    FloatingPointDatatype(0x00, 0x20, 0x0f, 0x00, UInt32(2), 0x0000, UInt16(16), UInt8(10), 0x05, 0x00, UInt32(10), 0x0000000f)
h5type(::Type{Float32}) =
    FloatingPointDatatype(0x00, 0x20, 0x1f, 0x00, UInt32(4), 0x0000, UInt16(32), UInt8(23), 0x08, 0x00, UInt32(23), 0x0000007f)
h5type(::Type{Float64}) =
    FloatingPointDatatype(0x00, 0x20, 0x3f, 0x00, UInt32(8), 0x0000, UInt16(64), UInt8(52), 0x0b, 0x00, UInt32(52), 0x000003ff)

immutable UnsupportedFeatureException <: Exception end

function jltype(dt::FloatingPointDatatype)
    if dt == h5type(Float64)
        return 64
    elseif dt == h5type(Float32)
        return 32
    elseif dt == h5type(Float16)
        return 16
    else
        throw(UnsupportedFeatureException())
    end
end

function jltype_test()
    x = fill(h5type(Float16), 1000000)
    y = 0
    @time for i = 1:length(x)
        y += jltype(x[i])
    end
    y
end

I can force inlining by defining:

@eval function jltype(dt::FloatingPointDatatype)
    if dt == $(Expr(:new, :FloatingPointDatatype, 0x00, 0x20, 0x3f, 0x00, UInt32(8), 0x0000, UInt16(64), UInt8(52), 0x0b, 0x00, UInt32(52), 0x000003ff))
        return 64
    elseif dt == $(Expr(:new, :FloatingPointDatatype, 0x00, 0x20, 0x1f, 0x00, UInt32(4), 0x0000, UInt16(32), UInt8(23), 0x08, 0x00, UInt32(23), 0x0000007f))
        return 32
    elseif dt == $(Expr(:new, :FloatingPointDatatype, 0x00, 0x20, 0x0f, 0x00, UInt32(2), 0x0000, UInt16(16), UInt8(10), 0x05, 0x00, UInt32(10), 0x0000000f))
        return 16
    else
        throw(UnsupportedFeatureException())
    end
end

Without inlining:

julia> jltype_test();
  39.989 milliseconds

With inlining:

julia> jltype_test();
  11.151 milliseconds

timholy · 2015-07-15T19:28:47Z

@vtjnash's proposal would help a lot with #9622.

KristofferC · 2017-01-22T12:26:51Z

This looks fixed now:

julia> @code_native f()
	.text
Filename: REPL[25]
	pushq	%rbp
	movq	%rsp, %rbp
Source line: 1
	movq	$1, (%rdi)
	movq	$2, 8(%rdi)
	movq	$3, 16(%rdi)
	movq	$4, 24(%rdi)
	movq	$5, 32(%rdi)
	movq	$6, 40(%rdi)
	movq	$7, 48(%rdi)
	movq	$8, 56(%rdi)
	movq	$9, 64(%rdi)
	movq	%rdi, %rax
	popq	%rbp
	retq

julia> @code_native h5type(Float32)
	.text
Filename: REPL[30]
	pushq	%rbp
	movq	%rsp, %rbp
Source line: 1
	movb	$0, (%rdi)
	movb	$32, 1(%rdi)
	movb	$31, 2(%rdi)
	movb	$0, 3(%rdi)
	movl	$4, 4(%rdi)
	movw	$0, 8(%rdi)
	movw	$32, 10(%rdi)
	movb	$23, 12(%rdi)
	movb	$8, 13(%rdi)
	movb	$0, 14(%rdi)
	movb	$23, 15(%rdi)
	movl	$127, 16(%rdi)
	movq	%rdi, %rax
	popq	%rbp
	retq
	nopw	%cs:(%rax,%rax)

julia> f() = h5type(Float32) == h5type(Float16)
f (generic function with 1 method)

julia> @code_native f()
	.text
Filename: REPL[15]
	pushq	%rbp
	movq	%rsp, %rbp
Source line: 1
	xorl	%eax, %eax
	popq	%rbp
	retq
	nopl	(%rax,%rax)

simonster added the performance Must go faster label Jul 15, 2015

JeffBezanson removed the performance Must go faster label Jul 15, 2015

JeffBezanson added the compiler:codegen Generation of LLVM IR and native code label Jul 15, 2015

timholy added the performance Must go faster label Jul 15, 2015

KristofferC closed this as completed Jan 22, 2017

tkelman added the potential benchmark Could make a good benchmark in BaseBenchmarks label Jan 22, 2017

KristofferC mentioned this issue Aug 17, 2018

add some regression tests from base JuliaCI/BaseBenchmarks.jl#226

Merged

KristofferC removed the potential benchmark Could make a good benchmark in BaseBenchmarks label Oct 8, 2018

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Immutable constructors should be inlined even when they have a lot of arguments #12165

Immutable constructors should be inlined even when they have a lot of arguments #12165

simonster commented Jul 15, 2015

JeffBezanson commented Jul 15, 2015

JeffBezanson commented Jul 15, 2015

vtjnash commented Jul 15, 2015

simonster commented Jul 15, 2015

JeffBezanson commented Jul 15, 2015

Keno commented Jul 15, 2015

simonster commented Jul 15, 2015

timholy commented Jul 15, 2015

KristofferC commented Jan 22, 2017

Immutable constructors should be inlined even when they have a lot of arguments #12165

Immutable constructors should be inlined even when they have a lot of arguments #12165

Comments

simonster commented Jul 15, 2015

JeffBezanson commented Jul 15, 2015

JeffBezanson commented Jul 15, 2015

vtjnash commented Jul 15, 2015

simonster commented Jul 15, 2015

JeffBezanson commented Jul 15, 2015

Keno commented Jul 15, 2015

simonster commented Jul 15, 2015

timholy commented Jul 15, 2015

KristofferC commented Jan 22, 2017