diff --git a/base/floatfuncs.jl b/base/floatfuncs.jl
index 24b998a2a22d3..14b3ac7a25da5 100644
--- a/base/floatfuncs.jl
+++ b/base/floatfuncs.jl
@@ -2,19 +2,19 @@
 
 ## floating-point functions ##
 
-copysign(x::Float64, y::Float64) = box(Float64,copysign_float(unbox(Float64,x),unbox(Float64,y)))
-copysign(x::Float32, y::Float32) = box(Float32,copysign_float(unbox(Float32,x),unbox(Float32,y)))
+copysign(x::Float64, y::Float64) = box(Float64, copysign_float(x, y))
+copysign(x::Float32, y::Float32) = box(Float32, copysign_float(x, y))
 copysign(x::Float32, y::Real) = copysign(x, Float32(y))
 copysign(x::Float64, y::Real) = copysign(x, Float64(y))
 
-flipsign(x::Float64, y::Float64) = box(Float64,xor_int(unbox(Float64,x),and_int(unbox(Float64,y),0x8000000000000000)))
-flipsign(x::Float32, y::Float32) = box(Float32,xor_int(unbox(Float32,x),and_int(unbox(Float32,y),0x80000000)))
+flipsign(x::Float64, y::Float64) = box(Float64, xor_int(box(UInt64, x), and_int(box(UInt64, y), 0x8000000000000000)))
+flipsign(x::Float32, y::Float32) = box(Float32, xor_int(box(UInt32, x), and_int(box(UInt32, y), 0x80000000)))
 flipsign(x::Float32, y::Real) = flipsign(x, Float32(y))
 flipsign(x::Float64, y::Real) = flipsign(x, Float64(y))
 
-signbit(x::Float64) = signbit(reinterpret(Int64,x))
-signbit(x::Float32) = signbit(reinterpret(Int32,x))
-signbit(x::Float16) = signbit(reinterpret(Int16,x))
+signbit(x::Float64) = signbit(box(Int64, x))
+signbit(x::Float32) = signbit(box(Int32, x))
+signbit(x::Float16) = signbit(box(Int16, x))
 
 maxintfloat(::Type{Float64}) = 9007199254740992.
 maxintfloat(::Type{Float32}) = Float32(16777216.)
@@ -22,20 +22,20 @@ maxintfloat(::Type{Float16}) = Float16(2048f0)
 maxintfloat{T<:AbstractFloat}(x::T)  = maxintfloat(T)
 maxintfloat() = maxintfloat(Float64)
 
-isinteger(x::AbstractFloat) = x-trunc(x) == 0
+isinteger(x::AbstractFloat) = (x - trunc(x) == 0)
 
-num2hex(x::Float16) = hex(reinterpret(UInt16,x), 4)
-num2hex(x::Float32) = hex(box(UInt32,unbox(Float32,x)),8)
-num2hex(x::Float64) = hex(box(UInt64,unbox(Float64,x)),16)
+num2hex(x::Float16) = hex(box(UInt16, x), 4)
+num2hex(x::Float32) = hex(box(UInt32, x), 8)
+num2hex(x::Float64) = hex(box(UInt64, x), 16)
 
 function hex2num(s::AbstractString)
     if length(s) <= 4
-        return box(Float16,unbox(UInt16,parse(UInt16,s,16)))
+        return box(Float16, parse(UInt16, s, 16))
     end
     if length(s) <= 8
-        return box(Float32,unbox(UInt32,parse(UInt32,s,16)))
+        return box(Float32, parse(UInt32, s, 16))
     end
-    return box(Float64,unbox(UInt64,parse(UInt64,s,16)))
+    return box(Float64, parse(UInt64, s, 16))
 end
 
 """
@@ -249,9 +249,9 @@ fma_libm(x::Float32, y::Float32, z::Float32) =
 fma_libm(x::Float64, y::Float64, z::Float64) =
     ccall(("fma", libm_name), Float64, (Float64,Float64,Float64), x, y, z)
 fma_llvm(x::Float32, y::Float32, z::Float32) =
-    box(Float32,fma_float(unbox(Float32,x),unbox(Float32,y),unbox(Float32,z)))
+    box(Float32, fma_float(x, y, z))
 fma_llvm(x::Float64, y::Float64, z::Float64) =
-    box(Float64,fma_float(unbox(Float64,x),unbox(Float64,y),unbox(Float64,z)))
+    box(Float64, fma_float(x, y, z))
 # Disable LLVM's fma if it is incorrect, e.g. because LLVM falls back
 # onto a broken system libm; if so, use openlibm's fma instead
 # 1.0000305f0 = 1 + 1/2^15
diff --git a/base/int.jl b/base/int.jl
index 33836165e1ca0..4ee2bbae2575d 100644
--- a/base/int.jl
+++ b/base/int.jl
@@ -7,12 +7,12 @@
 # they are also used elsewhere where Int128/UInt128 support is separated out,
 # such as in hashing2.jl
 
-const BitSigned64_types   = (Int8,Int16,Int32,Int64)
-const BitUnsigned64_types = (UInt8,UInt16,UInt32,UInt64)
-const BitInteger64_types  = (BitSigned64_types...,BitUnsigned64_types...)
-const BitSigned_types     = (BitSigned64_types...,Int128)
-const BitUnsigned_types   = (BitUnsigned64_types...,UInt128)
-const BitInteger_types    = (BitSigned_types...,BitUnsigned_types...)
+const BitSigned64_types   = (Int8, Int16, Int32, Int64)
+const BitUnsigned64_types = (UInt8, UInt16, UInt32, UInt64)
+const BitInteger64_types  = (BitSigned64_types..., BitUnsigned64_types...)
+const BitSigned_types     = (BitSigned64_types..., Int128)
+const BitUnsigned_types   = (BitUnsigned64_types..., UInt128)
+const BitInteger_types    = (BitSigned_types..., BitUnsigned_types...)
 
 typealias BitSigned64   Union{BitSigned64_types...}
 typealias BitUnsigned64 Union{BitUnsigned64_types...}
@@ -20,20 +20,20 @@ typealias BitInteger64  Union{BitInteger64_types...}
 typealias BitSigned     Union{BitSigned_types...}
 typealias BitUnsigned   Union{BitUnsigned_types...}
 typealias BitInteger    Union{BitInteger_types...}
-typealias BitSigned64T  Union{Type{Int8},Type{Int16},Type{Int32},Type{Int64}}
-typealias BitUnsigned64T Union{Type{UInt8},Type{UInt16},Type{UInt32},Type{UInt64}}
+typealias BitSigned64T  Union{Type{Int8}, Type{Int16}, Type{Int32}, Type{Int64}}
+typealias BitUnsigned64T Union{Type{UInt8}, Type{UInt16}, Type{UInt32}, Type{UInt64}}
 
 ## integer comparisons ##
 
-<{T<:BitSigned}(x::T, y::T)  = slt_int(unbox(T,x),unbox(T,y))
+<{T<:BitSigned}(x::T, y::T)  = slt_int(x, y)
 
--{T<:BitInteger}(x::T)       = box(T, neg_int(unbox(T,x)))
--{T<:BitInteger}(x::T, y::T) = box(T, sub_int(unbox(T,x),unbox(T,y)))
-+{T<:BitInteger}(x::T, y::T) = box(T, add_int(unbox(T,x),unbox(T,y)))
-*{T<:BitInteger}(x::T, y::T) = box(T, mul_int(unbox(T,x),unbox(T,y)))
+-{T<:BitInteger}(x::T)       = box(T, neg_int(x))
+-{T<:BitInteger}(x::T, y::T) = box(T, sub_int(x, y))
++{T<:BitInteger}(x::T, y::T) = box(T, add_int(x, y))
+*{T<:BitInteger}(x::T, y::T) = box(T, mul_int(x, y))
 
-/(x::Integer, y::Integer) = float(x)/float(y)
-inv(x::Integer) = float(one(x))/float(x)
+/(x::Integer, y::Integer) = float(x) / float(y)
+inv(x::Integer) = float(one(x)) / float(x)
 
 """
     isodd(x::Integer) -> Bool
@@ -48,7 +48,7 @@ julia> isodd(10)
 false
 ```
 """
-isodd(n::Integer) = rem(n,2) != 0
+isodd(n::Integer) = rem(n, 2) != 0
 
 """
     iseven(x::Integer) -> Bool
@@ -68,19 +68,19 @@ iseven(n::Integer) = !isodd(n)
 signbit(x::Integer) = x < 0
 signbit(x::Unsigned) = false
 
-flipsign{T<:BitSigned}(x::T, y::T) = box(T,flipsign_int(unbox(T,x),unbox(T,y)))
+flipsign{T<:BitSigned}(x::T, y::T) = box(T, flipsign_int(x, y))
 
-flipsign(x::Signed, y::Signed)  = convert(typeof(x), flipsign(promote(x,y)...))
-flipsign(x::Signed, y::Float16) = flipsign(x, reinterpret(Int16,y))
-flipsign(x::Signed, y::Float32) = flipsign(x, reinterpret(Int32,y))
-flipsign(x::Signed, y::Float64) = flipsign(x, reinterpret(Int64,y))
-flipsign(x::Signed, y::Real)    = flipsign(x, -oftype(x,signbit(y)))
+flipsign(x::Signed, y::Signed)  = convert(typeof(x), flipsign(promote(x, y)...))
+flipsign(x::Signed, y::Float16) = flipsign(x, box(Int16, y))
+flipsign(x::Signed, y::Float32) = flipsign(x, box(Int32, y))
+flipsign(x::Signed, y::Float64) = flipsign(x, box(Int64, y))
+flipsign(x::Signed, y::Real)    = flipsign(x, -oftype(x, signbit(y)))
 
-copysign(x::Signed, y::Signed)  = flipsign(x, x$y)
-copysign(x::Signed, y::Float16) = copysign(x, reinterpret(Int16,y))
-copysign(x::Signed, y::Float32) = copysign(x, reinterpret(Int32,y))
-copysign(x::Signed, y::Float64) = copysign(x, reinterpret(Int64,y))
-copysign(x::Signed, y::Real)    = copysign(x, -oftype(x,signbit(y)))
+copysign(x::Signed, y::Signed)  = flipsign(x, x $ y)
+copysign(x::Signed, y::Float16) = copysign(x, box(Int16, y))
+copysign(x::Signed, y::Float32) = copysign(x, box(Int32, y))
+copysign(x::Signed, y::Float64) = copysign(x, box(Int64, y))
+copysign(x::Signed, y::Real)    = copysign(x, -oftype(x, signbit(y)))
 
 """
     abs(x)
@@ -100,20 +100,20 @@ abs(x::Signed) = flipsign(x,x)
 
 ~(n::Integer) = -n-1
 
-unsigned(x::Signed) = reinterpret(typeof(convert(Unsigned,zero(x))), x)
+unsigned(x::Signed) = reinterpret(typeof(convert(Unsigned, zero(x))), x)
 unsigned(x::Bool) = convert(Unsigned, x)
 unsigned(x) = convert(Unsigned, x)
-signed(x::Unsigned) = reinterpret(typeof(convert(Signed,zero(x))), x)
+signed(x::Unsigned) = reinterpret(typeof(convert(Signed, zero(x))), x)
 signed(x) = convert(Signed, x)
 
-div(x::Signed, y::Unsigned) = flipsign(signed(div(unsigned(abs(x)),y)),x)
-div(x::Unsigned, y::Signed) = unsigned(flipsign(signed(div(x,unsigned(abs(y)))),y))
+div(x::Signed, y::Unsigned) = flipsign(signed(div(unsigned(abs(x)), y)), x)
+div(x::Unsigned, y::Signed) = unsigned(flipsign(signed(div(x, unsigned(abs(y)))), y))
 
-rem(x::Signed, y::Unsigned) = flipsign(signed(rem(unsigned(abs(x)),y)),x)
-rem(x::Unsigned, y::Signed) = rem(x,unsigned(abs(y)))
+rem(x::Signed, y::Unsigned) = flipsign(signed(rem(unsigned(abs(x)), y)), x)
+rem(x::Unsigned, y::Signed) = rem(x, unsigned(abs(y)))
 
-fld(x::Signed, y::Unsigned) = div(x,y)-(signbit(x)&(rem(x,y)!=0))
-fld(x::Unsigned, y::Signed) = div(x,y)-(signbit(y)&(rem(x,y)!=0))
+fld(x::Signed, y::Unsigned) = div(x, y) - (signbit(x) & (rem(x, y) != 0))
+fld(x::Unsigned, y::Signed) = div(x, y) - (signbit(y) & (rem(x, y) != 0))
 
 
 """
@@ -128,50 +128,50 @@ x == fld(x,y)*y + mod(x,y)
 """
 function mod{T<:Integer}(x::T, y::T)
     y == -1 && return T(0)   # avoid potential overflow in fld
-    x - fld(x,y)*y
+    return x - fld(x, y) * y
 end
-mod(x::Signed, y::Unsigned) = rem(y+unsigned(rem(x,y)),y)
-mod(x::Unsigned, y::Signed) = rem(y+signed(rem(x,y)),y)
-mod{T<:Unsigned}(x::T, y::T) = rem(x,y)
+mod(x::Signed, y::Unsigned) = rem(y + unsigned(rem(x, y)), y)
+mod(x::Unsigned, y::Signed) = rem(y + signed(rem(x, y)), y)
+mod{T<:Unsigned}(x::T, y::T) = rem(x, y)
 
-cld(x::Signed, y::Unsigned) = div(x,y)+(!signbit(x)&(rem(x,y)!=0))
-cld(x::Unsigned, y::Signed) = div(x,y)+(!signbit(y)&(rem(x,y)!=0))
+cld(x::Signed, y::Unsigned) = div(x, y) + (!signbit(x) & (rem(x, y) != 0))
+cld(x::Unsigned, y::Signed) = div(x, y) + (!signbit(y) & (rem(x, y) != 0))
 
 # Don't promote integers for div/rem/mod since there is no danger of overflow,
 # while there is a substantial performance penalty to 64-bit promotion.
-div{T<:BitSigned64}(x::T, y::T) = box(T,checked_sdiv_int(unbox(T,x),unbox(T,y)))
-rem{T<:BitSigned64}(x::T, y::T) = box(T,checked_srem_int(unbox(T,x),unbox(T,y)))
-div{T<:BitUnsigned64}(x::T, y::T) = box(T,checked_udiv_int(unbox(T,x),unbox(T,y)))
-rem{T<:BitUnsigned64}(x::T, y::T) = box(T,checked_urem_int(unbox(T,x),unbox(T,y)))
+div{T<:BitSigned64}(x::T, y::T) = box(T, checked_sdiv_int(x, y))
+rem{T<:BitSigned64}(x::T, y::T) = box(T, checked_srem_int(x, y))
+div{T<:BitUnsigned64}(x::T, y::T) = box(T, checked_udiv_int(x, y))
+rem{T<:BitUnsigned64}(x::T, y::T) = box(T, checked_urem_int(x, y))
 
 
 # fld(x,y) == div(x,y) - ((x>=0) != (y>=0) && rem(x,y) != 0 ? 1 : 0)
 fld{T<:Unsigned}(x::T, y::T) = div(x,y)
 function fld{T<:Integer}(x::T, y::T)
-    d = div(x,y)
-    d - (signbit(x$y) & (d*y!=x))
+    d = div(x, y)
+    return d - (signbit(x $ y) & (d * y != x))
 end
 
 # cld(x,y) = div(x,y) + ((x>0) == (y>0) && rem(x,y) != 0 ? 1 : 0)
 function cld{T<:Unsigned}(x::T, y::T)
-    d = div(x,y)
-    d + (d*y!=x)
+    d = div(x, y)
+    return d + (d * y != x)
 end
 function cld{T<:Integer}(x::T, y::T)
-    d = div(x,y)
-    d + (((x>0) == (y>0)) & (d*y!=x))
+    d = div(x, y)
+    return d + (((x > 0) == (y > 0)) & (d * y != x))
 end
 
 ## integer bitwise operations ##
 
-(~){T<:BitInteger}(x::T)       = box(T,not_int(unbox(T,x)))
-(&){T<:BitInteger}(x::T, y::T) = box(T,and_int(unbox(T,x),unbox(T,y)))
-(|){T<:BitInteger}(x::T, y::T) = box(T, or_int(unbox(T,x),unbox(T,y)))
-($){T<:BitInteger}(x::T, y::T) = box(T,xor_int(unbox(T,x),unbox(T,y)))
+(~){T<:BitInteger}(x::T)       = box(T, not_int(x))
+(&){T<:BitInteger}(x::T, y::T) = box(T, and_int(x, y))
+(|){T<:BitInteger}(x::T, y::T) = box(T, or_int(x, y))
+($){T<:BitInteger}(x::T, y::T) = box(T, xor_int(x, y))
 
 bswap{T<:Union{Int8,UInt8}}(x::T) = x
 bswap{T<:Union{Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, UInt128}}(x::T) =
-    box(T,bswap_int(unbox(T,x)))
+    box(T, bswap_int(x))
 
 """
     count_ones(x::Integer) -> Integer
@@ -183,7 +183,7 @@ julia> count_ones(7)
 3
 ```
 """
-count_ones{T<:BitInteger}(x::T) = Int(box(T,ctpop_int(unbox(T,x))))
+count_ones{T<:BitInteger}(x::T) = Int(box(T, ctpop_int(x)))
 
 """
     leading_zeros(x::Integer) -> Integer
@@ -195,7 +195,7 @@ julia> leading_zeros(Int32(1))
 31
 ```
 """
-leading_zeros{T<:BitInteger}(x::T) = Int(box(T,ctlz_int(unbox(T,x))))
+leading_zeros{T<:BitInteger}(x::T) = Int(box(T, ctlz_int(x)))
 
 """
     trailing_zeros(x::Integer) -> Integer
@@ -207,7 +207,7 @@ julia> trailing_zeros(2)
 1
 ```
 """
-trailing_zeros{T<:BitInteger}(x::T) = Int(box(T,cttz_int(unbox(T,x))))
+trailing_zeros{T<:BitInteger}(x::T) = Int(box(T, cttz_int(x)))
 
 """
     count_zeros(x::Integer) -> Integer
@@ -247,9 +247,9 @@ trailing_ones(x::Integer) = trailing_zeros(~x)
 
 ## integer comparisons ##
 
-<{T<:BitUnsigned}(x::T, y::T)  = ult_int(unbox(T,x),unbox(T,y))
-<={T<:BitSigned}(x::T, y::T)   = sle_int(unbox(T,x),unbox(T,y))
-<={T<:BitUnsigned}(x::T, y::T) = ule_int(unbox(T,x),unbox(T,y))
+<{T<:BitUnsigned}(x::T, y::T)  = ult_int(x, y)
+<={T<:BitSigned}(x::T, y::T)   = sle_int(x, y)
+<={T<:BitUnsigned}(x::T, y::T) = ule_int(x, y)
 
 ==(x::Signed,   y::Unsigned) = (x >= 0) & (unsigned(x) == y)
 ==(x::Unsigned, y::Signed  ) = (y >= 0) & (x == unsigned(y))
@@ -262,13 +262,13 @@ trailing_ones(x::Integer) = trailing_zeros(~x)
 
 # unsigned shift counts always shift in the same direction
 >>{T<:BitSigned,S<:BitUnsigned}(x::T, y::S) =
-    box(T,ashr_int(unbox(T,x),unbox(S,y)))
+    box(T, ashr_int(x, y))
 >>{T<:BitUnsigned,S<:BitUnsigned}(x::T, y::S) =
-    box(T,lshr_int(unbox(T,x),unbox(S,y)))
+    box(T, lshr_int(x, y))
 <<{T<:BitInteger,S<:BitUnsigned}(x::T,  y::S) =
-    box(T, shl_int(unbox(T,x),unbox(S,y)))
+    box(T, shl_int(x, y))
 >>>{T<:BitInteger,S<:BitUnsigned}(x::T, y::S) =
-    box(T,lshr_int(unbox(T,x),unbox(S,y)))
+    box(T, lshr_int(x, y))
 # signed shift counts can shift in either direction
 # note: this early during bootstrap, `>=` is not yet available
 # note: we only define Int shift counts here; the generic case is handled later
@@ -281,50 +281,50 @@ trailing_ones(x::Integer) = trailing_zeros(~x)
 
 ## integer conversions ##
 
-for to in BitInteger_types, from in (BitInteger_types...,Bool)
+for to in BitInteger_types, from in (BitInteger_types..., Bool)
     if !(to === from)
         if to.size < from.size
             if issubtype(to, Signed)
                 if issubtype(from, Unsigned)
                     @eval convert(::Type{$to}, x::($from)) =
-                        box($to,checked_trunc_sint($to,check_top_bit(unbox($from,x))))
+                        box($to, checked_trunc_sint($to, check_top_bit(x)))
                 else
                     @eval convert(::Type{$to}, x::($from)) =
-                        box($to,checked_trunc_sint($to,unbox($from,x)))
+                        box($to, checked_trunc_sint($to, x))
                 end
             else
                 @eval convert(::Type{$to}, x::($from)) =
-                    box($to,checked_trunc_uint($to,unbox($from,x)))
+                    box($to, checked_trunc_uint($to, x))
             end
-            @eval rem(x::($from), ::Type{$to}) = box($to,trunc_int($to,unbox($from,x)))
+            @eval rem(x::($from), ::Type{$to}) = box($to, trunc_int($to, x))
         elseif from.size < to.size || from === Bool
             if issubtype(from, Signed)
                 if issubtype(to, Unsigned)
                     @eval convert(::Type{$to}, x::($from)) =
-                        box($to,sext_int($to,check_top_bit(unbox($from,x))))
+                        box($to, sext_int($to, check_top_bit(x)))
                 else
                     @eval convert(::Type{$to}, x::($from)) =
-                        box($to,sext_int($to,unbox($from,x)))
+                        box($to, sext_int($to, x))
                 end
-                @eval rem(x::($from), ::Type{$to}) = box($to,sext_int($to,unbox($from,x)))
+                @eval rem(x::($from), ::Type{$to}) = box($to, sext_int($to, x))
             else
-                @eval convert(::Type{$to}, x::($from)) = box($to,zext_int($to,unbox($from,x)))
-                @eval rem(x::($from), ::Type{$to}) = convert($to,x)
+                @eval convert(::Type{$to}, x::($from)) = box($to, zext_int($to, x))
+                @eval rem(x::($from), ::Type{$to}) = convert($to, x)
             end
         else
-            if !(issubtype(from,Signed) === issubtype(to,Signed))
+            if !(issubtype(from, Signed) === issubtype(to, Signed))
                 # raise InexactError if x's top bit is set
-                @eval convert(::Type{$to}, x::($from)) = box($to,check_top_bit(unbox($from,x)))
+                @eval convert(::Type{$to}, x::($from)) = box($to, check_top_bit(x))
             else
-                @eval convert(::Type{$to}, x::($from)) = box($to,unbox($from,x))
+                @eval convert(::Type{$to}, x::($from)) = box($to, x)
             end
-            @eval rem(x::($from), ::Type{$to}) = box($to,unbox($from,x))
+            @eval rem(x::($from), ::Type{$to}) = box($to, x)
         end
     end
 end
 
 rem{T<:Integer}(x::T, ::Type{T}) = x
-rem(x::Integer, ::Type{Bool}) = ((x&1)!=0)
+rem(x::Integer, ::Type{Bool}) = ((x & 1) != 0)
 mod{T<:Integer}(x::Integer, ::Type{T}) = rem(x, T)
 
 unsafe_trunc{T<:Integer}(::Type{T}, x::Integer) = rem(x, T)
@@ -333,39 +333,39 @@ for (Ts, Tu) in ((Int8, UInt8), (Int16, UInt16), (Int32, UInt32), (Int64, UInt64
     @eval convert(::Type{Unsigned}, x::$Ts) = convert($Tu, x)
 end
 
-convert{T<:Union{Float32, Float64, Bool}}(::Type{Signed}, x::T) = convert(Int,x)
-convert{T<:Union{Float32, Float64, Bool}}(::Type{Unsigned}, x::T) = convert(UInt,x)
+convert{T<:Union{Float32, Float64, Bool}}(::Type{Signed}, x::T) = convert(Int, x)
+convert{T<:Union{Float32, Float64, Bool}}(::Type{Unsigned}, x::T) = convert(UInt, x)
 
 convert(::Type{Integer}, x::Integer) = x
-convert(::Type{Integer}, x::Real) = convert(Signed,x)
+convert(::Type{Integer}, x::Real) = convert(Signed, x)
 
 round(x::Integer) = x
 trunc(x::Integer) = x
 floor(x::Integer) = x
  ceil(x::Integer) = x
 
-round{T<:Integer}(::Type{T},x::Integer) = convert(T,x)
-trunc{T<:Integer}(::Type{T},x::Integer) = convert(T,x)
-floor{T<:Integer}(::Type{T},x::Integer) = convert(T,x)
- ceil{T<:Integer}(::Type{T},x::Integer) = convert(T,x)
+round{T<:Integer}(::Type{T}, x::Integer) = convert(T, x)
+trunc{T<:Integer}(::Type{T}, x::Integer) = convert(T, x)
+floor{T<:Integer}(::Type{T}, x::Integer) = convert(T, x)
+ ceil{T<:Integer}(::Type{T}, x::Integer) = convert(T, x)
 
 ## integer construction ##
 
 macro int128_str(s)
-    parse(Int128,s)
+    return parse(Int128, s)
 end
 
 macro uint128_str(s)
-    parse(UInt128,s)
+    return parse(UInt128, s)
 end
 
 macro big_str(s)
-    n = tryparse(BigInt,s)
+    n = tryparse(BigInt, s)
     !isnull(n) && return get(n)
-    n = tryparse(BigFloat,s)
+    n = tryparse(BigFloat, s)
     !isnull(n) && return get(n)
     message = "invalid number format $s for BigInt or BigFloat"
-    :(throw(ArgumentError($message)))
+    return :(throw(ArgumentError($message)))
 end
 
 ## integer promotions ##
@@ -411,9 +411,9 @@ typemax(::Type{Int64 }) = 9223372036854775807
 typemin(::Type{UInt64}) = UInt64(0)
 typemax(::Type{UInt64}) = 0xffffffffffffffff
 @eval typemin(::Type{UInt128}) = $(convert(UInt128, 0))
-@eval typemax(::Type{UInt128}) = $(box(UInt128,unbox(Int128,convert(Int128,-1))))
-@eval typemin(::Type{Int128} ) = $(convert(Int128,1)<<127)
-@eval typemax(::Type{Int128} ) = $(box(Int128,unbox(UInt128,typemax(UInt128)>>1)))
+@eval typemax(::Type{UInt128}) = $(box(UInt128, convert(Int128, -1)))
+@eval typemin(::Type{Int128} ) = $(convert(Int128, 1) << 127)
+@eval typemax(::Type{Int128} ) = $(box(Int128, typemax(UInt128) >> 1))
 
 widen{T<:Union{Int8, Int16}}(::Type{T}) = Int32
 widen(::Type{Int32}) = Int64
@@ -425,12 +425,12 @@ widen(::Type{UInt64}) = UInt128
 # a few special cases,
 # Int64*UInt64 => Int128
 # |x|<=2^(k-1), |y|<=2^k-1   =>   |x*y|<=2^(2k-1)-1
-widemul(x::Signed,y::Unsigned) = widen(x)*signed(widen(y))
-widemul(x::Unsigned,y::Signed) = signed(widen(x))*widen(y)
+widemul(x::Signed,y::Unsigned) = widen(x) * signed(widen(y))
+widemul(x::Unsigned,y::Signed) = signed(widen(x)) * widen(y)
 # multplication by Bool doesn't require widening
-widemul(x::Bool,y::Bool) = x*y
-widemul(x::Bool,y::Number) = x*y
-widemul(x::Number,y::Bool) = x*y
+widemul(x::Bool,y::Bool) = x * y
+widemul(x::Bool,y::Number) = x * y
+widemul(x::Number,y::Bool) = x * y
 
 
 ## wide multiplication, Int128 multiply and divide ##
@@ -440,41 +440,41 @@ if Core.sizeof(Int) == 4
         local u0::UInt64, v0::UInt64, w0::UInt64
         local u1::Int64, v1::Int64, w1::UInt64, w2::Int64, t::UInt64
 
-        u0 = u&0xffffffff; u1 = u>>32
-        v0 = v&0xffffffff; v1 = v>>32
-        w0 = u0*v0
-        t = reinterpret(UInt64,u1)*v0 + (w0>>>32)
-        w2 = reinterpret(Int64,t) >> 32
-        w1 = u0*reinterpret(UInt64,v1) + (t&0xffffffff)
-        hi = u1*v1 + w2 + (reinterpret(Int64,w1) >> 32)
-        lo = w0&0xffffffff + (w1 << 32)
-        Int128(hi)<<64 + Int128(lo)
+        u0 = u & 0xffffffff; u1 = u >> 32
+        v0 = v & 0xffffffff; v1 = v >> 32
+        w0 = u0 * v0
+        t = reinterpret(UInt64, u1) * v0 + (w0 >>> 32)
+        w2 = reinterpret(Int64, t) >> 32
+        w1 = u0 * reinterpret(UInt64, v1) + (t & 0xffffffff)
+        hi = u1 * v1 + w2 + (reinterpret(Int64, w1) >> 32)
+        lo = w0 & 0xffffffff + (w1 << 32)
+        return Int128(hi) << 64 + Int128(lo)
     end
 
     function widemul(u::UInt64, v::UInt64)
         local u0::UInt64, v0::UInt64, w0::UInt64
         local u1::UInt64, v1::UInt64, w1::UInt64, w2::UInt64, t::UInt64
 
-        u0 = u&0xffffffff; u1 = u>>>32
-        v0 = v&0xffffffff; v1 = v>>>32
-        w0 = u0*v0
-        t = u1*v0 + (w0>>>32)
-        w2 = t>>>32
-        w1 = u0*v1 + (t&0xffffffff)
-        hi = u1*v1 + w2 + (w1 >>> 32)
-        lo = w0&0xffffffff + (w1 << 32)
-        UInt128(hi)<<64 + UInt128(lo)
+        u0 = u & 0xffffffff; u1 = u >>> 32
+        v0 = v & 0xffffffff; v1 = v >>> 32
+        w0 = u0 * v0
+        t = u1 * v0 + (w0 >>> 32)
+        w2 = t >>> 32
+        w1 = u0 * v1 + (t & 0xffffffff)
+        hi = u1 * v1 + w2 + (w1 >>> 32)
+        lo = w0 & 0xffffffff + (w1 << 32)
+        return UInt128(hi) << 64 + UInt128(lo)
     end
 
     function *(u::Int128, v::Int128)
-        u0 = u % UInt64; u1 = Int64(u>>64)
-        v0 = v % UInt64; v1 = Int64(v>>64)
+        u0 = u % UInt64; u1 = Int64(u >> 64)
+        v0 = v % UInt64; v1 = Int64(v >> 64)
         lolo = widemul(u0, v0)
-        lohi = widemul(reinterpret(Int64,u0), v1)
-        hilo = widemul(u1, reinterpret(Int64,v0))
-        t = reinterpret(UInt128,hilo) + (lolo>>>64)
-        w1 = reinterpret(UInt128,lohi) + (t&0xffffffffffffffff)
-        Int128(lolo&0xffffffffffffffff) + reinterpret(Int128,w1)<<64
+        lohi = widemul(reinterpret(Int64, u0), v1)
+        hilo = widemul(u1, reinterpret(Int64, v0))
+        t = reinterpret(UInt128, hilo) + (lolo >>> 64)
+        w1 = reinterpret(UInt128, lohi) + (t & 0xffffffffffffffff)
+        return Int128(lolo & 0xffffffffffffffff) + reinterpret(Int128, w1) << 64
     end
 
     function *(u::UInt128, v::UInt128)
@@ -483,35 +483,35 @@ if Core.sizeof(Int) == 4
         lolo = widemul(u0, v0)
         lohi = widemul(u0, v1)
         hilo = widemul(u1, v0)
-        t = hilo + (lolo>>>64)
-        w1 = lohi + (t&0xffffffffffffffff)
-        (lolo&0xffffffffffffffff) + UInt128(w1)<<64
+        t = hilo + (lolo >>> 64)
+        w1 = lohi + (t & 0xffffffffffffffff)
+        return (lolo & 0xffffffffffffffff) + UInt128(w1) << 64
     end
 
     function div(x::Int128, y::Int128)
         (x == typemin(Int128)) & (y == -1) && throw(DivideError())
-        Int128(div(BigInt(x),BigInt(y)))
+        return Int128(div(BigInt(x), BigInt(y)))
     end
     function div(x::UInt128, y::UInt128)
-        UInt128(div(BigInt(x),BigInt(y)))
+        return UInt128(div(BigInt(x), BigInt(y)))
     end
 
     function rem(x::Int128, y::Int128)
-        Int128(rem(BigInt(x),BigInt(y)))
+        return Int128(rem(BigInt(x), BigInt(y)))
     end
     function rem(x::UInt128, y::UInt128)
-        UInt128(rem(BigInt(x),BigInt(y)))
+        return UInt128(rem(BigInt(x), BigInt(y)))
     end
 
     function mod(x::Int128, y::Int128)
-        Int128(mod(BigInt(x),BigInt(y)))
+        return Int128(mod(BigInt(x), BigInt(y)))
     end
 else
-    *{T<:Union{Int128,UInt128}}(x::T, y::T)  = box(T,mul_int(unbox(T,x),unbox(T,y)))
+    *{T<:Union{Int128,UInt128}}(x::T, y::T)  = box(T, mul_int(x, y))
 
-    div(x::Int128,  y::Int128)  = box(Int128,checked_sdiv_int(unbox(Int128,x),unbox(Int128,y)))
-    div(x::UInt128, y::UInt128) = box(UInt128,checked_udiv_int(unbox(UInt128,x),unbox(UInt128,y)))
+    div(x::Int128,  y::Int128)  = box(Int128, checked_sdiv_int(x, y))
+    div(x::UInt128, y::UInt128) = box(UInt128, checked_udiv_int(x, y))
 
-    rem(x::Int128,  y::Int128)  = box(Int128,checked_srem_int(unbox(Int128,x),unbox(Int128,y)))
-    rem(x::UInt128, y::UInt128) = box(UInt128,checked_urem_int(unbox(UInt128,x),unbox(UInt128,y)))
+    rem(x::Int128,  y::Int128)  = box(Int128, checked_srem_int(x, y))
+    rem(x::UInt128, y::UInt128) = box(UInt128, checked_urem_int(x, y))
 end
diff --git a/base/sysinfo.jl b/base/sysinfo.jl
index 6554ec6283bca..633520d082339 100644
--- a/base/sysinfo.jl
+++ b/base/sysinfo.jl
@@ -133,7 +133,7 @@ function cpu_info()
     Base.uv_error("uv_cpu_info",ccall(:uv_cpu_info, Int32, (Ptr{Ptr{UV_cpu_info_t}}, Ptr{Int32}), UVcpus, count))
     cpus = Array{CPUinfo}(count[1])
     for i = 1:length(cpus)
-        cpus[i] = CPUinfo(unsafe_load(UVcpus[1],i))
+        cpus[i] = CPUinfo(unsafe_load(UVcpus[1], i))
     end
     ccall(:uv_free_cpu_info, Void, (Ptr{UV_cpu_info_t}, Int32), UVcpus[1], count[1])
     return cpus
diff --git a/src/codegen.cpp b/src/codegen.cpp
index f5c2cc5564e5f..bf041bf54e33e 100644
--- a/src/codegen.cpp
+++ b/src/codegen.cpp
@@ -556,7 +556,7 @@ static Value *make_jlcall(ArrayRef<const jl_cgval_t*> args, jl_codectx_t *ctx);
 static Value *global_binding_pointer(jl_module_t *m, jl_sym_t *s,
                                      jl_binding_t **pbnd, bool assign, jl_codectx_t *ctx);
 static jl_cgval_t emit_checked_var(Value *bp, jl_sym_t *name, jl_codectx_t *ctx, bool isvol, MDNode *tbaa);
-static Value *emit_condition(jl_value_t *cond, const std::string &msg, jl_codectx_t *ctx);
+static Value *emit_condition(const jl_cgval_t &condV, const std::string &msg, jl_codectx_t *ctx);
 static void allocate_gc_frame(BasicBlock *b0, jl_codectx_t *ctx);
 static GlobalVariable *prepare_global(GlobalVariable *G, Module *M = jl_builderModule);
 static Value *prepare_call(Value *Callee);
@@ -2134,9 +2134,10 @@ static Value *emit_bits_compare(const jl_cgval_t &arg1, const jl_cgval_t &arg2,
     Type *at = julia_type_to_llvm(arg1.typ);
 
     if (at->isIntegerTy() || at->isPointerTy() || at->isFloatingPointTy()) {
-        Value *varg1 = emit_unbox(at, arg1, arg1.typ);
-        Value *varg2 = emit_unbox(at, arg2, arg2.typ);
-        return builder.CreateICmpEQ(JL_INT(varg1),JL_INT(varg2));
+        Type *at_int = INTT(at);
+        Value *varg1 = emit_unbox(at_int, arg1, arg1.typ);
+        Value *varg2 = emit_unbox(at_int, arg2, arg2.typ);
+        return builder.CreateICmpEQ(varg1, varg2);
     }
 
     if (at->isVectorTy()) {
@@ -2145,11 +2146,11 @@ static Value *emit_bits_compare(const jl_cgval_t &arg1, const jl_cgval_t &arg2,
         Value *varg1 = emit_unbox(at, arg1, arg1.typ);
         Value *varg2 = emit_unbox(at, arg2, arg2.typ);
         size_t l = jl_svec_len(types);
-        for(unsigned i=0; i < l; i++) {
-            jl_value_t *fldty = jl_svecref(types,i);
+        for (unsigned i = 0; i < l; i++) {
+            jl_value_t *fldty = jl_svecref(types, i);
             Value *subAns, *fld1, *fld2;
-            fld1 = builder.CreateExtractElement(varg1, ConstantInt::get(T_int32,i)),
-            fld2 = builder.CreateExtractElement(varg2, ConstantInt::get(T_int32,i)),
+            fld1 = builder.CreateExtractElement(varg1, ConstantInt::get(T_int32, i)),
+            fld2 = builder.CreateExtractElement(varg2, ConstantInt::get(T_int32, i)),
             subAns = emit_bits_compare(mark_julia_type(fld1, false, fldty, ctx), mark_julia_type(fld2, false, fldty, ctx), ctx);
             answer = builder.CreateAnd(answer, subAns);
         }
@@ -2196,7 +2197,7 @@ static Value *emit_bits_compare(const jl_cgval_t &arg1, const jl_cgval_t &arg2,
         }
     }
     assert(0 && "what is this llvm type?");
-    return 0;
+    abort();
 }
 
 // emit code for is (===).
diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp
index 33eb2282b489e..773890d2e9284 100644
--- a/src/intrinsics.cpp
+++ b/src/intrinsics.cpp
@@ -8,6 +8,7 @@ namespace JL_I {
 
 using namespace JL_I;
 static Function *runtime_func[num_intrinsics];
+static bool float_func[num_intrinsics];
 static void jl_init_intrinsic_functions_codegen(Module *m)
 {
     std::vector<Type *> args1(0); \
@@ -38,6 +39,41 @@ static void jl_init_intrinsic_functions_codegen(Module *m)
 #undef ADD_I
 #undef ADD_HIDDEN
 #undef ALIAS
+
+    float_func[neg_float] = true;
+    float_func[neg_float_fast] = true;
+    float_func[add_float] = true;
+    float_func[sub_float] = true;
+    float_func[mul_float] = true;
+    float_func[div_float] = true;
+    float_func[rem_float] = true;
+    float_func[add_float_fast] = true;
+    float_func[sub_float_fast] = true;
+    float_func[mul_float_fast] = true;
+    float_func[div_float_fast] = true;
+    float_func[rem_float_fast] = true;
+    float_func[fma_float] = true;
+    float_func[muladd_float] = true;
+    float_func[eq_float] = true;
+    float_func[ne_float] = true;
+    float_func[lt_float] = true;
+    float_func[le_float] = true;
+    float_func[eq_float_fast] = true;
+    float_func[ne_float_fast] = true;
+    float_func[lt_float_fast] = true;
+    float_func[le_float_fast] = true;
+    float_func[fpiseq] = true;
+    float_func[fpislt] = true;
+    float_func[abs_float] = true;
+    //float_func[copysign_float] = false;
+    float_func[ceil_llvm] = true;
+    float_func[floor_llvm] = true;
+    float_func[trunc_llvm] = true;
+    float_func[rint_llvm] = true;
+    float_func[sqrt_llvm] = true;
+    float_func[sqrt_llvm_fast] = true;
+
+    float_func[powi_llvm] = true;
 }
 
 extern "C" JL_DLLEXPORT uint32_t jl_get_LLVM_VERSION(void);
@@ -70,85 +106,80 @@ extern "C" JL_DLLEXPORT int8_t jl_is_memdebug() {
     where the box is needed.
 */
 
-static Type *FTnbits(size_t nb)
+// convert an llvm type to same-size float type
+static Type *FLOATT(Type *t)
 {
+    if (t->isFloatingPointTy())
+        return t;
+    unsigned nb = (t->isPointerTy() ? sizeof(void*) : t->getPrimitiveSizeInBits());
+    if (nb == 64)
+        return T_float64;
+    if (nb == 32)
+        return T_float32;
 #ifndef DISABLE_FLOAT16
     if (nb == 16)
         return T_float16;
-    else
 #endif
-    if (nb == 32)
-        return T_float32;
-    else if (nb == 64)
-        return T_float64;
-    else if (nb == 128)
+    if (nb == 128)
         return T_float128;
-    else
-        jl_error("Unsupported Float Size");
-}
-// convert int type to same-size float type
-static Type *FT(Type *t)
-{
-    if (t->isFloatingPointTy())
-        return t;
-    return FTnbits(t->getPrimitiveSizeInBits());
-}
-
-// reinterpret-cast to float
-static Value *FP(Value *v)
-{
-    if (v->getType()->isFloatingPointTy())
-        return v;
-    return emit_bitcast(v, FT(v->getType()));
+    jl_error("Unsupported Float Size");
 }
 
-// convert float type to same-size int type
-static Type *JL_INTT(Type *t)
+// convert an llvm type to same-size int type
+static Type *INTT(Type *t)
 {
     if (t->isIntegerTy())
         return t;
     if (t->isPointerTy())
         return T_size;
-    if (t == T_float32) return T_int32;
-    if (t == T_float64) return T_int64;
-    assert(t == T_void);
-    return T_void;
+    if (t == T_float64)
+        return T_int64;
+    if (t == T_float32)
+        return T_int32;
+    if (t == T_float16)
+        return T_int16;
+    //if (t == T_float128)
+    //    return T_int128;
+    assert(0 && "unhandled llvm type!");
+    abort();
 }
+
 // convert float type to same-size int type (as a Julia type)
 static jl_value_t *JL_JLUINTT(Type *t)
 {
     assert(!t->isIntegerTy());
-    if (t == T_float32) return (jl_value_t*)jl_uint32_type;
-    if (t == T_float64) return (jl_value_t*)jl_uint64_type;
-    if (t == T_float16) return (jl_value_t*)jl_uint16_type;
+    if (t == T_float64)
+        return (jl_value_t*)jl_uint64_type;
+    if (t == T_float32)
+        return (jl_value_t*)jl_uint32_type;
+    if (t == T_float16)
+        return (jl_value_t*)jl_uint16_type;
+    //if (t == T_float128)
+    //   return (jl_value_t*)jl_uint128_type;
     assert(t == T_void);
     return jl_bottom_type;
 }
+
 static jl_value_t *JL_JLSINTT(Type *t)
 {
     assert(!t->isIntegerTy());
-    if (t == T_float32) return (jl_value_t*)jl_int32_type;
-    if (t == T_float64) return (jl_value_t*)jl_int64_type;
-    if (t == T_float16) return (jl_value_t*)jl_int16_type;
+    if (t == T_float64)
+        return (jl_value_t*)jl_int64_type;
+    if (t == T_float32)
+        return (jl_value_t*)jl_int32_type;
+    if (t == T_float16)
+        return (jl_value_t*)jl_int16_type;
+    //if (t == T_float128)
+    //   return (jl_value_t*)jl_uint128_type;
     assert(t == T_void);
     return jl_bottom_type;
 }
 
-// reinterpret-cast to int
-static Value *JL_INT(Value *v)
-{
-    Type *t = v->getType();
-    if (t->isIntegerTy())
-        return v;
-    if (t->isPointerTy())
-        return builder.CreatePtrToInt(v, JL_INTT(t));
-    return emit_bitcast(v, JL_INTT(t));
-}
-
 static Value *uint_cnvt(Type *to, Value *x)
 {
     Type *t = x->getType();
-    if (t == to) return x;
+    if (t == to)
+        return x;
     if (to->getPrimitiveSizeInBits() < x->getType()->getPrimitiveSizeInBits())
         return builder.CreateTrunc(x, to);
     return builder.CreateZExt(x, to);
@@ -267,6 +298,9 @@ static Value *emit_unbox(Type *to, const jl_cgval_t &x, jl_value_t *jt, Value *d
     assert(to != T_pjlvalue);
     // TODO: fully validate that x.typ == jt?
     if (x.isghost) {
+        // this can happen when a branch yielding a different type ends
+        // up being dead code, and type inference knows that the other
+        // branch's type is the only one that matters.
         if (type_is_ghost(to)) {
             return NULL;
         }
@@ -278,23 +312,30 @@ static Value *emit_unbox(Type *to, const jl_cgval_t &x, jl_value_t *jt, Value *d
     if (!x.ispointer() || c) { // already unboxed, but sometimes need conversion
         Value *unboxed = c ? c : x.V;
         Type *ty = unboxed->getType();
-        // bools are stored internally as int8 (for now)
-        if (ty == T_int1 && to == T_int8)
-            unboxed = builder.CreateZExt(unboxed, T_int8);
-        else if (ty->isPointerTy() && !to->isPointerTy())
-            unboxed = builder.CreatePtrToInt(unboxed, to);
-        else if (!ty->isPointerTy() && to->isPointerTy())
+        assert(ty != T_void);
+        bool frompointer = ty->isPointerTy();
+        bool topointer = to->isPointerTy();
+        if (frompointer && topointer) {
+            unboxed = emit_bitcast(unboxed, to);
+        }
+        else if (frompointer) {
+            Type *INTT_to = INTT(to);
+            unboxed = builder.CreatePtrToInt(unboxed, INTT_to);
+            if (INTT_to != to)
+                unboxed = builder.CreateBitCast(unboxed, to);
+        }
+        else if (topointer) {
+            Type *INTT_to = INTT(to);
+            if (to != INTT_to)
+                unboxed = builder.CreateBitCast(unboxed, INTT_to);
             unboxed = builder.CreateIntToPtr(unboxed, to);
-        else if (ty->isPointerTy() && to->isPointerTy())
-            // pointer types are going away anyways, and this can come up in ccall argument conversion
-            unboxed = builder.CreatePointerCast(unboxed, to);
-        else if (ty != to) {
-            // this can happen when a branch yielding a different type ends
-            // up being dead code, and type inference knows that the other
-            // branch's type is the only one that matters.
-            // assert(ty == T_void);
-            //emit_error("emit_unbox: a type mismatch error in occurred during codegen", ctx);
-            unboxed = UndefValue::get(to); // type mismatch error
+        }
+        else if (ty == T_int1 && to == T_int8) {
+            // bools may be stored internally as int8
+            unboxed = builder.CreateZExt(unboxed, T_int8);
+        }
+        else {
+            unboxed = builder.CreateBitCast(unboxed, to);
         }
         if (!dest)
             return unboxed;
@@ -355,104 +396,63 @@ static Value *emit_unbox(Type *to, const jl_cgval_t &x, jl_value_t *jt, Value *d
     }
 }
 
-// unbox, trying to determine correct bitstype automatically
-// returns some sort of raw, unboxed numeric type (e.g. in registers)
-static Value *auto_unbox(const jl_cgval_t &v, jl_codectx_t *ctx)
-{
-    jl_value_t *bt = v.typ;
-    if (!jl_is_bitstype(bt)) {
-        // This can be reached with a direct invalid call to an Intrinsic, such as:
-        //   Intrinsics.neg_int("")
-        emit_error("auto_unbox: unable to determine argument type", ctx);
-        return UndefValue::get(T_void);
-    }
-    bool isboxed;
-    Type *to = julia_type_to_llvm(v.typ, &isboxed);
-    if (to == NULL || isboxed) {
-        // might be some sort of incomplete (but valid) Ptr{T} type, for example
-        unsigned int nb = jl_datatype_nbits(bt);
-        to = IntegerType::get(jl_LLVMContext, nb);
-    }
-    if (type_is_ghost(to)) {
-        return NULL;
-    }
-    assert(!to->isAggregateType()); // expecting some sort of jl_bitstype
-    return emit_unbox(to, v, bt);
-}
-static Value *auto_unbox(jl_value_t *x, jl_codectx_t *ctx)
-{
-    jl_cgval_t v = emit_expr(x, ctx);
-    return auto_unbox(v, ctx);
-}
-
-static jl_value_t *staticeval_bitstype(jl_value_t *targ, const char *fname, jl_codectx_t *ctx)
+static jl_value_t *staticeval_bitstype(const jl_cgval_t &targ)
 {
     // evaluate an argument at compile time to determine what type it is
-    jl_cgval_t bt_value = emit_expr(targ, ctx);
-    jl_value_t *bt = NULL;
-    if (jl_is_type_type(bt_value.typ))
-        bt = jl_tparam0(bt_value.typ);
-    if (!bt || !jl_is_bitstype(bt)) {
-        emit_error("expected bits type as first argument", ctx);
-        return NULL;
+    if (jl_is_type_type(targ.typ)) {
+        jl_value_t *bt = jl_tparam0(targ.typ);
+        if (jl_is_bitstype(bt))
+            return bt;
     }
-    return bt;
+    return NULL;
 }
 
-static Type *staticeval_bitstype(jl_value_t *bt)
+static Type *bitstype_to_llvm(jl_value_t *bt)
 {
     assert(jl_is_bitstype(bt));
     bool isboxed;
     Type *to = julia_type_to_llvm(bt, &isboxed);
+    assert(!type_is_ghost(to));
     if (to == NULL || isboxed) {
+        // might be some sort of incomplete (but valid) Ptr{T} type, for example
         unsigned int nb = jl_datatype_nbits(bt);
         to = IntegerType::get(jl_LLVMContext, nb);
     }
     assert(!to->isAggregateType()); // expecting a bits type
-    return to;
+    return to; // IntegerType, FloatingPointType, or PointerType
 }
 
-// figure out how many bits a bitstype has at compile time
-static int get_bitstype_nbits(jl_value_t *bt)
+static jl_cgval_t emit_runtime_call(intrinsic f, const jl_cgval_t *argv, size_t nargs, jl_codectx_t *ctx)
 {
-    assert(jl_is_bitstype(bt));
-    return jl_datatype_nbits(bt);
+    Value *func = prepare_call(runtime_func[f]);
+    Value **argvalues = (Value**)alloca(sizeof(Value*) * nargs);
+    for (size_t i = 0; i < nargs; ++i) {
+        argvalues[i] = boxed(argv[i], ctx);
+    }
+    Value *r = builder.CreateCall(func, makeArrayRef(argvalues, nargs));
+    return mark_julia_type(r, true, (jl_value_t*)jl_any_type, ctx);
 }
 
 // put a bits type tag on some value (despite the name, this doesn't necessarily actually "box" the value however)
-static jl_cgval_t generic_box(jl_value_t *targ, jl_value_t *x, jl_codectx_t *ctx)
+static jl_cgval_t generic_reinterpret(const jl_cgval_t *argv, jl_codectx_t *ctx)
 {
-    // Examine the first argument //
-    jl_cgval_t bt_value = emit_expr(targ, ctx);
-    jl_cgval_t v = emit_expr(x, ctx);
-    jl_value_t *bt = NULL;
-    if (jl_is_type_type(bt_value.typ))
-        bt = jl_tparam0(bt_value.typ);
-
-    if (!bt || !jl_is_bitstype(bt)) {
-        // it's easier to throw a good error from C than llvm
-        Value *arg1 = boxed(bt_value, ctx);
-        Value *arg2 = boxed(v, ctx);
-        Value *func = prepare_call(runtime_func[reinterpret]);
-#if JL_LLVM_VERSION >= 30700
-        Value *r = builder.CreateCall(func, {arg1, arg2});
-#else
-        Value *r = builder.CreateCall2(func, arg1, arg2);
-#endif
-        jl_value_t *et = expr_type(targ, ctx);
-        return mark_julia_type(r, true, jl_is_type_type(et) ? jl_tparam0(et) : (jl_value_t*)jl_any_type, ctx);
-    }
+    // Give the arguments names //
+    const jl_cgval_t &bt_value = argv[0];
+    const jl_cgval_t &v = argv[1];
+    jl_value_t *bt = staticeval_bitstype(bt_value);
 
-    Type *llvmt = staticeval_bitstype(bt);
+    // it's easier to throw a good error from C than llvm
+    if (!bt)
+        return emit_runtime_call(reinterpret, argv, 2, ctx);
+
+    Type *llvmt = bitstype_to_llvm(bt);
     int nb = jl_datatype_size(bt);
 
     // Examine the second argument //
     bool isboxed;
     Type *vxt = julia_type_to_llvm(v.typ, &isboxed);
 
-    if (!jl_is_datatype(v.typ)
-        || !jl_is_bitstype(v.typ)
-        || jl_datatype_size(v.typ) != nb) {
+    if (!jl_is_bitstype(v.typ) || jl_datatype_size(v.typ) != nb) {
         Value *typ = emit_typeof_boxed(v, ctx);
         if (!jl_is_bitstype(v.typ)) {
             if (isboxed) {
@@ -516,191 +516,141 @@ static jl_cgval_t generic_box(jl_value_t *targ, jl_value_t *x, jl_codectx_t *ctx
             true, bt, ctx);
 }
 
-// put a bits type tag on some value
-static jl_cgval_t generic_unbox(jl_value_t *targ, jl_value_t *x, jl_codectx_t *ctx)
+static jl_cgval_t generic_cast(
+        intrinsic f, Value *(*generic)(Type*, Value*, jl_codectx_t*),
+        const jl_cgval_t *argv, jl_codectx_t *ctx, bool toint, bool fromint)
 {
-    // Examine the first argument //
-    jl_cgval_t bt_value = emit_expr(targ, ctx);
-    jl_value_t *bt = NULL;
-    if (jl_is_type_type(bt_value.typ))
-        bt = jl_tparam0(bt_value.typ);
+    const jl_cgval_t &targ = argv[0];
+    const jl_cgval_t &v = argv[1];
+    jl_value_t *jlto = staticeval_bitstype(targ);
+    if (!jlto || !jl_is_bitstype(v.typ))
+        return emit_runtime_call(f, argv, 2, ctx);
+    Type *to = bitstype_to_llvm(jlto);
+    Type *vt = bitstype_to_llvm(v.typ);
+    if (toint)
+        to = INTT(to);
+    else
+        to = FLOATT(to);
+    if (fromint)
+        vt = INTT(vt);
+    else
+        vt = FLOATT(vt);
+    Value *from = emit_unbox(vt, v, v.typ);
+    Value *ans = generic(to, from, ctx);
+    return mark_julia_type(ans, false, jlto, ctx);
+}
 
-    // Examine the second argument //
-    jl_cgval_t v = emit_expr(x, ctx);
-
-    if (bt == NULL || !jl_is_leaf_type(bt)) {
-        // dynamically-determined type; evaluate.
-        int nb, alignment;
-        Type *llvmt;
-        if (bt && jl_is_bitstype(bt)) {
-            // always fixed size
-            nb = jl_datatype_size(bt);
-            llvmt = staticeval_bitstype(bt);
-            alignment = ((jl_datatype_t*)bt)->layout->alignment;
-        }
-        else {
-            bt = v.typ;
-            if (!jl_is_leaf_type(bt) && !jl_is_bitstype(bt)) {
-                // TODO: currently doesn't handle the case where the type of neither argument is understood at compile time
-                // since codegen has no idea what size it might have
-                jl_error("codegen: failed during evaluation of a call to unbox");
-                return jl_cgval_t();
-            }
-            nb = jl_datatype_size(bt);
-            llvmt = staticeval_bitstype(bt);
-            alignment = ((jl_datatype_t*)bt)->layout->alignment;
-        }
-        Value *runtime_bt = boxed(bt_value, ctx);
-        // XXX: emit type validity check on runtime_bt (bitstype of size nb)
+static Value *generic_trunc(Type *to, Value *x, jl_codectx_t *ctx)
+{
+    return builder.CreateTrunc(x, to);
+}
 
-        Value *newobj = emit_allocobj(ctx, nb, runtime_bt);
-        if (!v.ispointer()) {
-            tbaa_decorate(tbaa_value, builder.CreateAlignedStore(emit_unbox(llvmt, v, v.typ), builder.CreatePointerCast(newobj, llvmt->getPointerTo()), alignment));
-        }
-        else {
-            prepare_call(builder.CreateMemCpy(newobj, data_pointer(v, ctx, T_pint8), nb, alignment)->getCalledValue());
-            mark_gc_use(v);
-        }
-        return mark_julia_type(newobj, true, bt ? bt : (jl_value_t*)jl_any_type, ctx);
-    }
+static Value *generic_trunc_uchecked(Type *to, Value *x, jl_codectx_t *ctx)
+{
+    Value *ans = builder.CreateTrunc(x, to);
+    Value *back = builder.CreateZExt(ans, x->getType());
+    raise_exception_unless(builder.CreateICmpEQ(back, x),
+                           literal_pointer_val(jl_inexact_exception), ctx);
+    return ans;
+}
 
-    if (!jl_is_bitstype(bt)) {
-        // TODO: to accept arbitrary types, replace this function with a call to llvm_type_rewrite
-        emit_error("unbox: expected bits type as first argument", ctx);
-        return jl_cgval_t();
-    }
+static Value *generic_trunc_schecked(Type *to, Value *x, jl_codectx_t *ctx)
+{
+    Value *ans = builder.CreateTrunc(x, to);
+    Value *back = builder.CreateSExt(ans, x->getType());
+    raise_exception_unless(builder.CreateICmpEQ(back, x),
+                           literal_pointer_val(jl_inexact_exception), ctx);
+    return ans;
+}
 
-    Type *llvmt = staticeval_bitstype(bt);
-    if (v.typ == bt)
-        return v;
+static Value *generic_sext(Type *to, Value *x, jl_codectx_t *ctx)
+{
+    return builder.CreateSExt(x, to);
+}
 
-    Value *vx;
-    if (v.ispointer()) {
-        vx = tbaa_decorate(v.tbaa, builder.CreateLoad(data_pointer(v, ctx, llvmt->getPointerTo())));
-    }
-    else {
-        vx = v.V;
-        if (!jl_is_bitstype(v.typ)) {
-            emit_error("unbox: expected bits type value for second argument", ctx);
-            return jl_cgval_t();
-        }
-    }
+static Value *generic_zext(Type *to, Value *x, jl_codectx_t *ctx)
+{
+    return builder.CreateZExt(x, to);
+}
 
-    Type *vxt = vx->getType();
-    if (llvmt == T_int1) {
-        vx = builder.CreateTrunc(vx, llvmt);
-    }
-    else if (vxt == T_int1 && llvmt == T_int8) {
-        vx = builder.CreateZExt(vx, llvmt);
-    }
-    else if (vxt != llvmt) {
-        // getPrimitiveSizeInBits() == 0 for pointers
-        // PtrToInt and IntToPtr ignore size differences
-        if (vxt->getPrimitiveSizeInBits() != llvmt->getPrimitiveSizeInBits() &&
-            !(vxt->isPointerTy() && llvmt->getPrimitiveSizeInBits() == sizeof(void*)*8) &&
-            !(llvmt->isPointerTy() && vxt->getPrimitiveSizeInBits() == sizeof(void*)*8)) {
-            emit_error("unbox: argument is of incorrect size", ctx);
-            return jl_cgval_t();
-        }
-        if (vxt->isPointerTy() && !llvmt->isPointerTy())
-            vx = builder.CreatePtrToInt(vx, llvmt);
-        else if (!vxt->isPointerTy() && llvmt->isPointerTy())
-            vx = builder.CreateIntToPtr(vx, llvmt);
-        else
-            vx = emit_bitcast(vx, llvmt);
-    }
+static Value *generic_uitofp(Type *to, Value *x, jl_codectx_t *ctx)
+{
+    return builder.CreateUIToFP(x, to);
+}
 
-    return mark_julia_type(vx, false, bt, ctx);
+static Value *generic_sitofp(Type *to, Value *x, jl_codectx_t *ctx)
+{
+    return builder.CreateSIToFP(x, to);
 }
 
-// NOTE: signd (signed) only relevant if check == true
-static jl_cgval_t generic_trunc(jl_value_t *targ, jl_value_t *x, jl_codectx_t *ctx, bool check, bool signd)
+static Value *generic_fptoui(Type *to, Value *x, jl_codectx_t *ctx)
 {
-    jl_value_t *jlto = staticeval_bitstype(targ, "trunc_int", ctx);
-    if (!jlto) return jl_cgval_t(); // jlto threw an error
-    Type *to = staticeval_bitstype(jlto);
-    Value *ix = JL_INT(auto_unbox(x, ctx));
-    if (ix->getType() == T_void) return jl_cgval_t(); // auto_unbox threw an error
-    Value *ans = builder.CreateTrunc(ix, to);
-    if (check) {
-        Value *back = signd ? builder.CreateSExt(ans, ix->getType()) :
-            builder.CreateZExt(ans, ix->getType());
-        raise_exception_unless(builder.CreateICmpEQ(back, ix),
-                               literal_pointer_val(jl_inexact_exception), ctx);
-    }
-    return mark_julia_type(ans, false, jlto, ctx);
+    return builder.CreateFPToUI(x, to);
 }
 
-static jl_cgval_t generic_sext(jl_value_t *targ, jl_value_t *x, jl_codectx_t *ctx)
+static Value *generic_fptosi(Type *to, Value *x, jl_codectx_t *ctx)
 {
-    jl_value_t *jlto = staticeval_bitstype(targ, "sext_int", ctx);
-    if (!jlto) return jl_cgval_t(); // jlto threw an error
-    Type *to = staticeval_bitstype(jlto);
-    Value *ix = JL_INT(auto_unbox(x, ctx));
-    if (ix->getType() == T_void) return jl_cgval_t(); // auto_unbox threw an error
-    Value *ans = builder.CreateSExt(ix, to);
-    return mark_julia_type(ans, false, jlto, ctx);
+    return builder.CreateFPToSI(x, to);
 }
 
-static jl_cgval_t generic_zext(jl_value_t *targ, jl_value_t *x, jl_codectx_t *ctx)
+static Value *generic_fptrunc(Type *to, Value *x, jl_codectx_t *ctx)
 {
-    jl_value_t *jlto = staticeval_bitstype(targ, "zext_int", ctx);
-    if (!jlto) return jl_cgval_t(); // jlto threw an error
-    Type *to = staticeval_bitstype(jlto);
-    Value *ix = JL_INT(auto_unbox(x, ctx));
-    if (ix->getType() == T_void) return jl_cgval_t(); // auto_unbox threw an error
-    Value *ans = builder.CreateZExt(ix, to);
-    return mark_julia_type(ans, false, jlto, ctx);
+    return builder.CreateFPTrunc(x, to);
 }
 
-static jl_cgval_t emit_runtime_pointerref(jl_value_t *e, jl_value_t *i, jl_value_t *align, jl_codectx_t *ctx)
+static Value *generic_fpext(Type *to, Value *x, jl_codectx_t *ctx)
 {
-    jl_cgval_t parg = emit_expr(e, ctx);
-    Value *iarg = boxed(emit_expr(i, ctx), ctx);
-    Value *alignarg = boxed(emit_expr(align, ctx), ctx);
-#if JL_LLVM_VERSION >= 30700
-    Value *ret = builder.CreateCall(prepare_call(runtime_func[pointerref]), { boxed(parg, ctx), iarg, alignarg });
-#else
-    Value *ret = builder.CreateCall3(prepare_call(runtime_func[pointerref]), boxed(parg, ctx), iarg, alignarg);
+#ifdef JL_NEED_FLOATTEMP_VAR
+    // Target platform might carry extra precision.
+    // Force rounding to single precision first. The reason is that it's
+    // fine to keep working in extended precision as long as it's
+    // understood that everything is implicitly rounded to 23 bits,
+    // but if we start looking at more bits we need to actually do the
+    // rounding first instead of carrying around incorrect low bits.
+    Value *jlfloattemp_var = emit_static_alloca(x->getType());
+    builder.CreateStore(x, jlfloattemp_var);
+    x  = builder.CreateLoad(jlfloattemp_var, true);
 #endif
-    jl_value_t *ety;
-    if (jl_is_cpointer_type(parg.typ)) {
-        ety = jl_tparam0(parg.typ);
-    }
-    else {
-        ety = (jl_value_t*)jl_any_type;
-    }
-    return mark_julia_type(ret, true, ety, ctx);
+    return builder.CreateFPExt(x, to);
+}
+
+static jl_cgval_t emit_runtime_pointerref(jl_cgval_t *argv, jl_codectx_t *ctx)
+{
+    return emit_runtime_call(pointerref, argv, 3, ctx);
 }
 
-static jl_cgval_t emit_pointerref(jl_value_t *e, jl_value_t *i, jl_value_t *align, jl_codectx_t *ctx)
+static jl_cgval_t emit_pointerref(jl_cgval_t *argv, jl_codectx_t *ctx)
 {
-    jl_value_t *aty = expr_type(e, ctx);
+    const jl_cgval_t &e = argv[0];
+    const jl_cgval_t &i = argv[1];
+    const jl_cgval_t &align = argv[2];
+
+    if (align.constant == NULL || !jl_is_long(align.constant))
+        return emit_runtime_pointerref(argv, ctx);
+    unsigned align_nb = jl_unbox_long(align.constant);
+
+    if (i.typ != (jl_value_t*)jl_long_type)
+        return emit_runtime_pointerref(argv, ctx);
+    jl_value_t *aty = e.typ;
     if (!jl_is_cpointer_type(aty))
-        return emit_runtime_pointerref(e, i, align, ctx);
-        //jl_error("pointerref: expected pointer type as first argument");
+        return emit_runtime_pointerref(argv, ctx);
     jl_value_t *ety = jl_tparam0(aty);
     if (jl_is_typevar(ety))
-        return emit_runtime_pointerref(e, i, align, ctx);
-        //jl_error("pointerref: invalid pointer");
-    if (expr_type(i, ctx) != (jl_value_t*)jl_long_type)
-        return emit_runtime_pointerref(e, i, align, ctx);
-        //jl_error("pointerref: invalid index type");
-    jl_cgval_t align_val = emit_expr(align, ctx);
-    if (align_val.constant == NULL || !jl_is_long(align_val.constant))
-        return emit_runtime_pointerref(e, i, align, ctx);
-        //jl_error("pointerref: invalid or non-statically evaluatable alignment")
-    Value *thePtr = auto_unbox(e,ctx);
-    Value *idx = emit_unbox(T_size, emit_expr(i, ctx), (jl_value_t*)jl_long_type);
+        return emit_runtime_pointerref(argv, ctx);
+    if (!jl_is_datatype(ety))
+        ety = (jl_value_t*)jl_any_type;
+
+    Value *idx = emit_unbox(T_size, i, (jl_value_t*)jl_long_type);
     Value *im1 = builder.CreateSub(idx, ConstantInt::get(T_size, 1));
+
     if (!jl_isbits(ety)) {
-        if (ety == (jl_value_t*)jl_any_type)
+        if (ety == (jl_value_t*)jl_any_type) {
+            Value *thePtr = emit_unbox(T_ppjlvalue, e, e.typ);
             return mark_julia_type(
-                    builder.CreateAlignedLoad(builder.CreateGEP(
-                        emit_bitcast(thePtr, T_ppjlvalue),
-                        im1), jl_unbox_long(align_val.constant)),
+                    builder.CreateAlignedLoad(builder.CreateGEP(thePtr, im1), align_nb),
                     true,
                     ety, ctx);
+        }
         if (!jl_is_structtype(ety) || jl_is_array_type(ety) || !jl_is_leaf_type(ety)) {
             emit_error("pointerref: invalid pointer type", ctx);
             return jl_cgval_t();
@@ -711,78 +661,72 @@ static jl_cgval_t emit_pointerref(jl_value_t *e, jl_value_t *i, jl_value_t *alig
                                      literal_pointer_val((jl_value_t*)ety));
         im1 = builder.CreateMul(im1, ConstantInt::get(T_size,
                     LLT_ALIGN(size, ((jl_datatype_t*)ety)->layout->alignment)));
+        Value *thePtr = emit_unbox(T_pint8, e, e.typ);
         thePtr = builder.CreateGEP(emit_bitcast(thePtr, T_pint8), im1);
-        prepare_call(builder.CreateMemCpy(emit_bitcast(strct, T_pint8),
-                             thePtr, size, 1)->getCalledValue());
+        builder.CreateMemCpy(emit_bitcast(strct, T_pint8), thePtr, size, 1);
         return mark_julia_type(strct, true, ety, ctx);
     }
-    return typed_load(thePtr, im1, ety, ctx, tbaa_data, jl_unbox_long(align_val.constant));
+
+    bool isboxed;
+    Type *ptrty = julia_type_to_llvm(e.typ, &isboxed);
+    assert(!isboxed);
+    Value *thePtr = emit_unbox(ptrty, e, e.typ);
+    return typed_load(thePtr, im1, ety, ctx, tbaa_data, align_nb);
 }
 
-static jl_cgval_t emit_runtime_pointerset(jl_value_t *e, jl_value_t *x, jl_value_t *i, jl_value_t *align, jl_codectx_t *ctx)
+static jl_cgval_t emit_runtime_pointerset(jl_cgval_t *argv, jl_codectx_t *ctx)
 {
-    jl_cgval_t parg = emit_expr(e, ctx);
-    Value *xarg = boxed(emit_expr(x, ctx), ctx);
-    Value *iarg = boxed(emit_expr(i, ctx), ctx);
-    Value *alignarg = boxed(emit_expr(align, ctx), ctx);
-#if JL_LLVM_VERSION >= 30700
-    builder.CreateCall(prepare_call(runtime_func[pointerset]), { boxed(parg, ctx), xarg, iarg, alignarg });
-#else
-    builder.CreateCall4(prepare_call(runtime_func[pointerset]), boxed(parg, ctx), xarg, iarg, alignarg);
-#endif
-    return parg;
+    return emit_runtime_call(pointerset, argv, 4, ctx);
 }
 
 // e[i] = x
-static jl_cgval_t emit_pointerset(jl_value_t *e, jl_value_t *x, jl_value_t *i, jl_value_t *align, jl_codectx_t *ctx)
+static jl_cgval_t emit_pointerset(jl_cgval_t *argv, jl_codectx_t *ctx)
 {
-    jl_value_t *aty = expr_type(e, ctx);
+    const jl_cgval_t &e = argv[0];
+    const jl_cgval_t &x = argv[1];
+    const jl_cgval_t &i = argv[2];
+    const jl_cgval_t &align = argv[3];
+
+    if (align.constant == NULL || !jl_is_long(align.constant))
+        return emit_runtime_pointerset(argv, ctx);
+    unsigned align_nb = jl_unbox_long(align.constant);
+
+    if (i.typ != (jl_value_t*)jl_long_type)
+        return emit_runtime_pointerset(argv, ctx);
+    jl_value_t *aty = e.typ;
     if (!jl_is_cpointer_type(aty))
-        return emit_runtime_pointerset(e, x, i, align, ctx);
-        //jl_error("pointerset: expected pointer type as first argument");
+        return emit_runtime_pointerset(argv, ctx);
     jl_value_t *ety = jl_tparam0(aty);
     if (jl_is_typevar(ety))
-        return emit_runtime_pointerset(e, x, i, align, ctx);
-        //jl_error("pointerset: invalid pointer");
-    jl_value_t *xty = expr_type(x, ctx);
-    jl_cgval_t val;
-    bool emitted = false;
-    if (!jl_subtype(xty, ety, 0)) {
-        emitted = true;
-        val = emit_expr(x, ctx);
-        emit_typecheck(val, ety, "pointerset: type mismatch in assign", ctx);
-    }
-    if (expr_type(i, ctx) != (jl_value_t*)jl_long_type)
-        return emit_runtime_pointerset(e, x, i, align, ctx);
-        //jl_error("pointerset: invalid index type");
-    jl_cgval_t align_val = emit_expr(align, ctx);
-    if (align_val.constant == NULL || !jl_is_long(align_val.constant))
-        return emit_runtime_pointerset(e, x, i, align, ctx);
-        //jl_error("pointerset: invalid or non-statically evaluatable alignment")
-    Value *idx = emit_unbox(T_size, emit_expr(i, ctx),(jl_value_t*)jl_long_type);
+        return emit_runtime_pointerset(argv, ctx);
+    if (!jl_is_datatype(ety))
+        ety = (jl_value_t*)jl_any_type;
+    jl_value_t *xty = x.typ;
+    if (!jl_subtype(xty, ety, 0))
+        emit_typecheck(x, ety, "pointerset: type mismatch in assign", ctx);
+
+    Value *idx = emit_unbox(T_size, i, (jl_value_t*)jl_long_type);
     Value *im1 = builder.CreateSub(idx, ConstantInt::get(T_size, 1));
-    Value *thePtr = auto_unbox(e,ctx);
+
+    Value *thePtr;
     if (!jl_isbits(ety) && ety != (jl_value_t*)jl_any_type) {
         if (!jl_is_structtype(ety) || jl_is_array_type(ety) || !jl_is_leaf_type(ety)) {
             emit_error("pointerset: invalid pointer type", ctx);
             return jl_cgval_t();
         }
-        if (!emitted)
-            val = emit_expr(x, ctx);
-        assert(val.isboxed);
-        assert(jl_is_datatype(ety));
+        thePtr = emit_unbox(T_pint8, e, e.typ);
         uint64_t size = jl_datatype_size(ety);
         im1 = builder.CreateMul(im1, ConstantInt::get(T_size,
                     LLT_ALIGN(size, ((jl_datatype_t*)ety)->layout->alignment)));
-        prepare_call(builder.CreateMemCpy(builder.CreateGEP(emit_bitcast(thePtr, T_pint8), im1),
-                             data_pointer(val, ctx, T_pint8), size, jl_unbox_long(align_val.constant))->getCalledValue());
+        builder.CreateMemCpy(builder.CreateGEP(thePtr, im1),
+                             data_pointer(x, ctx, T_pint8), size, align_nb);
     }
     else {
-        if (!emitted) {
-            val = emit_expr(x, ctx);
-        }
-        assert(jl_is_datatype(ety));
-        typed_store(thePtr, im1, val, ety, ctx, tbaa_data, NULL, jl_unbox_long(align_val.constant));
+        bool isboxed;
+        Type *ptrty = julia_type_to_llvm(e.typ, &isboxed);
+        assert(!isboxed);
+        thePtr = emit_unbox(ptrty, e, e.typ);
+        typed_store(thePtr, im1, x, ety, ctx, tbaa_data, NULL, align_nb);
     }
     return mark_julia_type(thePtr, false, aty, ctx);
 }
@@ -839,7 +783,7 @@ struct math_builder {
     }
 };
 
-static Value *emit_untyped_intrinsic(intrinsic f, Value *x, Value *y, Value *z, size_t nargs,
+static Value *emit_untyped_intrinsic(intrinsic f, Value **argvalues, size_t nargs,
                                      jl_codectx_t *ctx, jl_datatype_t **newtyp);
 static jl_cgval_t emit_intrinsic(intrinsic f, jl_value_t **args, size_t nargs,
                                  jl_codectx_t *ctx)
@@ -858,14 +802,15 @@ static jl_cgval_t emit_intrinsic(intrinsic f, jl_value_t **args, size_t nargs,
 
     if (f == llvmcall)
         return emit_llvmcall(args, nargs, ctx);
-    if (f == unbox)
-        return generic_unbox(args[1], args[2], ctx); // TODO: replace with generic_box
+    if (f == cglobal_auto || f == cglobal)
+        return emit_cglobal(args, nargs, ctx);
 
-#if 0 // this section enables runtime-intrinsics (e.g. for testing), and disables their llvm counterparts
     jl_cgval_t *argv = (jl_cgval_t*)alloca(sizeof(jl_cgval_t) * nargs);
     for (size_t i = 0; i < nargs; ++i) {
         argv[i] = emit_expr(args[i + 1], ctx);
     }
+
+#if 0 // this section enables runtime-intrinsics (e.g. for testing)
     Value *func = prepare_call(runtime_func[f]);
     Value **argvalues = (Value**)alloca(sizeof(Value*) * nargs);
     for (size_t i = 0; i < nargs; ++i) {
@@ -873,128 +818,83 @@ static jl_cgval_t emit_intrinsic(intrinsic f, jl_value_t **args, size_t nargs,
     }
     Value *r = builder.CreateCall(func, makeArrayRef(argvalues, nargs));
     return mark_julia_type(r, true, (jl_value_t*)jl_any_type, ctx);
-#else
+#endif
+
     switch (f) {
     case arraylen:
-        return mark_julia_type(emit_arraylen(emit_expr(args[1], ctx), args[1], ctx), false,
-                               jl_long_type, ctx);
-    case cglobal_auto:
-    case cglobal:
-        return emit_cglobal(args, nargs, ctx);
+        return mark_julia_type(emit_arraylen(argv[0], args[1], ctx), false, jl_long_type, ctx);
     case pointerref:
-        return emit_pointerref(args[1], args[2], args[3], ctx);
+        return emit_pointerref(argv, ctx);
     case pointerset:
-        return emit_pointerset(args[1], args[2], args[3], args[4], ctx);
+        return emit_pointerset(argv, ctx);
     case box:
-        return generic_box(args[1], args[2], ctx);
+    case unbox:
+        return generic_reinterpret(argv, ctx);
     case trunc_int:
-        return generic_trunc(args[1], args[2], ctx, false, false);
-    case checked_trunc_sint:
-        return generic_trunc(args[1], args[2], ctx, true, true);
+        return generic_cast(f, generic_trunc, argv, ctx, true, true);
     case checked_trunc_uint:
-        return generic_trunc(args[1], args[2], ctx, true, false);
+        return generic_cast(f, generic_trunc_uchecked, argv, ctx, true, true);
+    case checked_trunc_sint:
+        return generic_cast(f, generic_trunc_schecked, argv, ctx, true, true);
     case sext_int:
-        return generic_sext(args[1], args[2], ctx);
+        return generic_cast(f, generic_sext, argv, ctx, true, true);
     case zext_int:
-        return generic_zext(args[1], args[2], ctx);
-
-    case uitofp: {
-        jl_value_t *bt = staticeval_bitstype(args[1], "uitofp", ctx);
-        if (!bt) return jl_cgval_t();
-        int nb = get_bitstype_nbits(bt);
-        Value *xi = JL_INT(auto_unbox(args[2],ctx));
-        if (xi->getType() == T_void) return jl_cgval_t(); // auto_unbox threw an error
-        return mark_julia_type(builder.CreateUIToFP(xi, FTnbits(nb)), false, bt, ctx);
-    }
-
-    case sitofp: {
-        jl_value_t *bt = staticeval_bitstype(args[1], "sitofp", ctx);
-        if (!bt) return jl_cgval_t();
-        int nb = get_bitstype_nbits(bt);
-        Value *xi = JL_INT(auto_unbox(args[2],ctx));
-        if (xi->getType() == T_void) return jl_cgval_t(); // auto_unbox threw an error
-        return mark_julia_type(builder.CreateSIToFP(xi, FTnbits(nb)), false, bt, ctx);
-    }
+        return generic_cast(f, generic_zext, argv, ctx, true, true);
+    case uitofp:
+        return generic_cast(f, generic_uitofp, argv, ctx, false, true);
+    case sitofp:
+        return generic_cast(f, generic_sitofp, argv, ctx, false, true);
+    case fptoui:
+        return generic_cast(f, generic_fptoui, argv, ctx, true, false);
+    case fptosi:
+        return generic_cast(f, generic_fptosi, argv, ctx, true, false);
+    case fptrunc:
+        return generic_cast(f, generic_fptrunc, argv, ctx, false, false);
+    case fpext:
+        return generic_cast(f, generic_fpext, argv, ctx, false, false);
 
     case fptoui_auto: {
-        Value *x = FP(auto_unbox(args[1], ctx));
-        if (x->getType() == T_void) return jl_cgval_t(); // auto_unbox threw an error
-        return mark_julia_type(
-                builder.CreateFPToUI(FP(x), JL_INTT(x->getType())),
-                false,
-                JL_JLUINTT(x->getType()), ctx);
-    }
-    case fptoui: {
-        jl_value_t *bt = staticeval_bitstype(args[1], "sitofp", ctx);
-        if (!bt) return jl_cgval_t();
-        int nb = get_bitstype_nbits(bt);
-        Value *xf = FP(auto_unbox(args[2],ctx));
-        if (xf->getType() == T_void) return jl_cgval_t(); // auto_unbox threw an error
-        return mark_julia_type(builder.CreateFPToUI(xf, Type::getIntNTy(jl_LLVMContext, nb)), false, bt, ctx);
+        const jl_cgval_t &x = argv[0];
+        if (!jl_is_bitstype(x.typ))
+            return emit_runtime_call(f, argv, nargs, ctx);
+        Type *xt = bitstype_to_llvm(x.typ);
+        Type *to = INTT(xt);
+        xt = FLOATT(xt);
+        Value *from = emit_unbox(xt, x, x.typ);
+        Value *ans = builder.CreateFPToUI(from, to);
+        return mark_julia_type(ans, false, JL_JLUINTT(xt), ctx);
     }
 
     case fptosi_auto: {
-        Value *x = FP(auto_unbox(args[1], ctx));
-        return mark_julia_type(
-                builder.CreateFPToSI(FP(x), JL_INTT(x->getType())),
-                false,
-                JL_JLSINTT(x->getType()), ctx);
-    }
-    case fptosi: {
-        jl_value_t *bt = staticeval_bitstype(args[1], "sitofp", ctx);
-        if (!bt) return jl_cgval_t();
-        int nb = get_bitstype_nbits(bt);
-        Value *xf = FP(auto_unbox(args[2],ctx));
-        if (xf->getType() == T_void) return jl_cgval_t(); // auto_unbox threw an error
-        return mark_julia_type(builder.CreateFPToSI(xf, Type::getIntNTy(jl_LLVMContext, nb)), false, bt, ctx);
-    }
-
-    case fptrunc: {
-        jl_value_t *bt = staticeval_bitstype(args[1], "sitofp", ctx);
-        if (!bt) return jl_cgval_t();
-        int nb = get_bitstype_nbits(bt);
-        Value *xf = FP(auto_unbox(args[2],ctx));
-        if (xf->getType() == T_void) return jl_cgval_t(); // auto_unbox threw an error
-        return mark_julia_type(builder.CreateFPTrunc(xf, FTnbits(nb)), false, bt, ctx);
-    }
-
-    case fpext: {
-        jl_value_t *bt = staticeval_bitstype(args[1], "sitofp", ctx);
-        if (!bt) return jl_cgval_t();
-        int nb = get_bitstype_nbits(bt);
-        Value *x = auto_unbox(args[2],ctx);
-        if (x->getType() == T_void) return jl_cgval_t(); // auto_unbox threw an error
-#ifdef JL_NEED_FLOATTEMP_VAR
-        // Target platform might carry extra precision.
-        // Force rounding to single precision first. The reason is that it's
-        // fine to keep working in extended precision as long as it's
-        // understood that everything is implicitly rounded to 23 bits,
-        // but if we start looking at more bits we need to actually do the
-        // rounding first instead of carrying around incorrect low bits.
-        Value *jlfloattemp_var = emit_static_alloca(FT(x->getType()));
-        builder.CreateStore(FP(x), jlfloattemp_var);
-        x  = builder.CreateLoad(jlfloattemp_var, true);
-#endif
-        return mark_julia_type(builder.CreateFPExt(x, FTnbits(nb)), false, bt, ctx);
+        const jl_cgval_t &x = argv[0];
+        if (!jl_is_bitstype(x.typ))
+            return emit_runtime_call(f, argv, nargs, ctx);
+        Type *xt = bitstype_to_llvm(x.typ);
+        Type *to = INTT(xt);
+        xt = FLOATT(xt);
+        Value *from = emit_unbox(xt, x, x.typ);
+        Value *ans = builder.CreateFPToSI(from, to);
+        return mark_julia_type(ans, false, JL_JLSINTT(xt), ctx);
     }
 
     case select_value: {
-        Value *isfalse = emit_condition(args[1], "select_value", ctx); // emit the first argument
-        jl_value_t *t1 = expr_type(args[2], ctx);
-        jl_value_t *t2 = expr_type(args[3], ctx);
-        bool isboxed;
-        Type *llt1 = julia_type_to_llvm(t1, &isboxed);
-        Value *ifelse_result;
+        Value *isfalse = emit_condition(argv[0], "select_value", ctx); // emit the first argument
         // emit X and Y arguments
-        jl_cgval_t x = emit_expr(args[2], ctx);
-        jl_cgval_t y = emit_expr(args[3], ctx);
+        const jl_cgval_t &x = argv[1];
+        const jl_cgval_t &y = argv[2];
+        jl_value_t *t1 = x.typ;
+        jl_value_t *t2 = y.typ;
         // check the return value was valid
-        if (x.typ == jl_bottom_type && y.typ == jl_bottom_type)
+        if (t1 == jl_bottom_type && t2 == jl_bottom_type)
             return jl_cgval_t(); // undefined
-        if (x.typ == jl_bottom_type)
+        if (t1 == jl_bottom_type)
             return y;
-        if (y.typ == jl_bottom_type)
+        if (t2 == jl_bottom_type)
             return x;
+
+        Value *ifelse_result;
+        bool isboxed;
+        Type *llt1 = julia_type_to_llvm(t1, &isboxed);
         if (t1 != t2)
             isboxed = true;
         if (!isboxed) {
@@ -1015,128 +915,161 @@ static jl_cgval_t emit_intrinsic(intrinsic f, jl_value_t **args, size_t nargs,
         return mark_julia_type(ifelse_result, isboxed, jt, ctx);
     }
 
+    case not_int: {
+        const jl_cgval_t &x = argv[0];
+        if (!jl_is_bitstype(x.typ))
+            return emit_runtime_call(f, argv, nargs, ctx);
+        Type *xt = INTT(bitstype_to_llvm(x.typ));
+        Value *from = emit_unbox(xt, x, x.typ);
+        Value *ans;
+        if (x.typ == (jl_value_t*)jl_bool_type)
+            ans = builder.CreateXor(from, ConstantInt::get(T_int8, 1, true));
+        else
+            ans = builder.CreateXor(from, ConstantInt::get(xt, -1, true));
+        return mark_julia_type(ans, false, x.typ, ctx);
+    }
+
+    case powi_llvm: {
+        const jl_cgval_t &x = argv[0];
+        const jl_cgval_t &y = argv[1];
+        if (!jl_is_bitstype(x.typ) || !jl_is_bitstype(y.typ) || jl_datatype_size(y.typ) != 4)
+            return emit_runtime_call(f, argv, nargs, ctx);
+        Type *xt = FLOATT(bitstype_to_llvm(x.typ));
+        Type *yt = T_int32;
+
+        Value *xv = emit_unbox(xt, x, x.typ);
+        Value *yv = emit_unbox(yt, y, y.typ);
+#if JL_LLVM_VERSION >= 30600
+        Value *powi = Intrinsic::getDeclaration(jl_Module, Intrinsic::powi, makeArrayRef(xt));
+#if JL_LLVM_VERSION >= 30700
+        Value *ans = builder.CreateCall(powi, {xv, yv});
+#else
+        Value *ans = builder.CreateCall2(powi, xv, yv);
+#endif
+#else
+        // issue #6506
+        Value *ans = builder.CreateCall2(prepare_call(xt == T_float64 ? jlpow_func : jlpowf_func),
+                xv, builder.CreateSIToFP(yv, xt));
+#endif
+        return mark_julia_type(ans, false, x.typ, ctx);
+    }
+
     default: {
-        if (nargs < 1) jl_error("invalid intrinsic call");
-        jl_cgval_t xinfo = emit_expr(args[1], ctx);
-        Value *x = auto_unbox(xinfo, ctx);
-        if (!x || type_is_ghost(x->getType())) {
-            emit_error("invalid intrinsic argument at 1", ctx);
-            return jl_cgval_t();
+        assert(nargs >= 1 && "invalid nargs for intrinsic call");
+        const jl_cgval_t &xinfo = argv[0];
+
+        // verify argument types
+        if (!jl_is_bitstype(xinfo.typ))
+            return emit_runtime_call(f, argv, nargs, ctx);
+        Type *xtyp = bitstype_to_llvm(xinfo.typ);
+        if (float_func[f])
+            xtyp = FLOATT(xtyp);
+        else
+            xtyp = INTT(xtyp);
+
+        Type **argt = (Type**)alloca(sizeof(Type*) * nargs);
+        argt[0] = xtyp;
+
+        if (f == shl_int || f == lshr_int || f == ashr_int) {
+            if (!jl_is_bitstype(argv[1].typ))
+                return emit_runtime_call(f, argv, nargs, ctx);
+            argt[1] = INTT(bitstype_to_llvm(argv[1].typ));
         }
-        Value *y = NULL;
-        if (nargs>1) {
-            y = auto_unbox(args[2], ctx);
-            if (!y || type_is_ghost(y->getType())) {
-                emit_error("invalid intrinsic argument at 2", ctx);
-                return jl_cgval_t();
+        else {
+            for (size_t i = 1; i < nargs; ++i) {
+                if (xinfo.typ != argv[i].typ)
+                    return emit_runtime_call(f, argv, nargs, ctx);
+                argt[i] = xtyp;
             }
         }
-        Value *z = NULL;
-        if (nargs>2) {
-            z = auto_unbox(args[3], ctx);
-            if (!z || type_is_ghost(z->getType())) {
-                emit_error("invalid intrinsic argument at 3", ctx);
-                return jl_cgval_t();
-            }
+
+        // unbox the arguments
+        Value **argvalues = (Value**)alloca(sizeof(Value*) * nargs);
+        for (size_t i = 0; i < nargs; ++i) {
+            argvalues[i] = emit_unbox(argt[i], argv[i], argv[i].typ);
         }
+
+        // call the intrinsic
         jl_value_t *newtyp = NULL;
-        // TODO: compare the type validity of x,y,z before emitting the intrinsic
-        Value *r;
-        if (f == not_int && xinfo.typ == (jl_value_t*)jl_bool_type)
-            r = builder.CreateXor(x, ConstantInt::get(T_int8, 1, true));
-        else
-            r = emit_untyped_intrinsic(f, x, y, z, nargs, ctx, (jl_datatype_t**)&newtyp);
-        if (!newtyp && r->getType() != x->getType())
-            // cast back to the exact original type (e.g. float vs. int) before remarking as a julia type
-            r = emit_bitcast(r, x->getType());
+        Value *r = emit_untyped_intrinsic(f, argvalues, nargs, ctx, (jl_datatype_t**)&newtyp);
         if (r->getType() == T_int1)
             r = builder.CreateZExt(r, T_int8);
         return mark_julia_type(r, false, newtyp ? newtyp : xinfo.typ, ctx);
     }
     }
-#endif
-    abort(); // unreachable
+    assert(0 && "unreachable");
 }
 
-static Value *emit_untyped_intrinsic(intrinsic f, Value *x, Value *y, Value *z, size_t nargs,
+static Value *emit_untyped_intrinsic(intrinsic f, Value **argvalues, size_t nargs,
                                      jl_codectx_t *ctx, jl_datatype_t **newtyp)
 {
+    Value *x = nargs > 0 ? argvalues[0] : NULL;
+    Value *y = nargs > 1 ? argvalues[1] : NULL;
+    Value *z = nargs > 2 ? argvalues[2] : NULL;
     Type *t = x->getType();
-    Value *fy;
-    Value *den;
-    Value *typemin;
+
     switch (f) {
     case neg_int:
 #if JL_LLVM_VERSION >= 30700
-     return builder.CreateNeg(JL_INT(x));
+     return builder.CreateNeg(x);
 #else
-     return builder.CreateSub(ConstantInt::get(t, 0), JL_INT(x));
+     return builder.CreateSub(ConstantInt::get(t, 0), x);
 #endif
-    case add_int: return builder.CreateAdd(JL_INT(x), JL_INT(y));
-    case sub_int: return builder.CreateSub(JL_INT(x), JL_INT(y));
-    case mul_int: return builder.CreateMul(JL_INT(x), JL_INT(y));
-    case sdiv_int: return builder.CreateSDiv(JL_INT(x), JL_INT(y));
-    case udiv_int: return builder.CreateUDiv(JL_INT(x), JL_INT(y));
-    case srem_int: return builder.CreateSRem(JL_INT(x), JL_INT(y));
-    case urem_int: return builder.CreateURem(JL_INT(x), JL_INT(y));
+    case add_int: return builder.CreateAdd(x, y);
+    case sub_int: return builder.CreateSub(x, y);
+    case mul_int: return builder.CreateMul(x, y);
+    case sdiv_int: return builder.CreateSDiv(x, y);
+    case udiv_int: return builder.CreateUDiv(x, y);
+    case srem_int: return builder.CreateSRem(x, y);
+    case urem_int: return builder.CreateURem(x, y);
 
 // Implements IEEE negate. Unfortunately there is no compliant way
 // to implement this in LLVM 3.4, though there are two different idioms
 // that do the correct thing on LLVM <= 3.3 and >= 3.5 respectively.
 // See issue #7868
 #if JL_LLVM_VERSION >= 30500
-    case neg_float: return math_builder(ctx)().CreateFSub(ConstantFP::get(FT(t), -0.0), FP(x));
-    case neg_float_fast: return math_builder(ctx, true)().CreateFNeg(FP(x));
+    case neg_float: return math_builder(ctx)().CreateFSub(ConstantFP::get(t, -0.0), x);
+    case neg_float_fast: return math_builder(ctx, true)().CreateFNeg(x);
 #else
     case neg_float:
-        return math_builder(ctx)().CreateFMul(ConstantFP::get(FT(t), -1.0), FP(x));
+        return math_builder(ctx)().CreateFMul(ConstantFP::get(t, -1.0), x);
     case neg_float_fast:
-        return math_builder(ctx, true)().CreateFMul(ConstantFP::get(FT(t), -1.0), FP(x));
+        return math_builder(ctx, true)().CreateFMul(ConstantFP::get(t, -1.0), x);
 #endif
-    case add_float: return math_builder(ctx)().CreateFAdd(FP(x), FP(y));
-    case sub_float: return math_builder(ctx)().CreateFSub(FP(x), FP(y));
-    case mul_float: return math_builder(ctx)().CreateFMul(FP(x), FP(y));
-    case div_float: return math_builder(ctx)().CreateFDiv(FP(x), FP(y));
-    case rem_float: return math_builder(ctx)().CreateFRem(FP(x), FP(y));
-    case add_float_fast: return math_builder(ctx, true)().CreateFAdd(FP(x), FP(y));
-    case sub_float_fast: return math_builder(ctx, true)().CreateFSub(FP(x), FP(y));
-    case mul_float_fast: return math_builder(ctx, true)().CreateFMul(FP(x), FP(y));
-    case div_float_fast: return math_builder(ctx, true)().CreateFDiv(FP(x), FP(y));
-    case rem_float_fast: return math_builder(ctx, true)().CreateFRem(FP(x), FP(y));
+    case add_float: return math_builder(ctx)().CreateFAdd(x, y);
+    case sub_float: return math_builder(ctx)().CreateFSub(x, y);
+    case mul_float: return math_builder(ctx)().CreateFMul(x, y);
+    case div_float: return math_builder(ctx)().CreateFDiv(x, y);
+    case rem_float: return math_builder(ctx)().CreateFRem(x, y);
+    case add_float_fast: return math_builder(ctx, true)().CreateFAdd(x, y);
+    case sub_float_fast: return math_builder(ctx, true)().CreateFSub(x, y);
+    case mul_float_fast: return math_builder(ctx, true)().CreateFMul(x, y);
+    case div_float_fast: return math_builder(ctx, true)().CreateFDiv(x, y);
+    case rem_float_fast: return math_builder(ctx, true)().CreateFRem(x, y);
     case fma_float: {
       assert(y->getType() == x->getType());
       assert(z->getType() == y->getType());
-      Value *fmaintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::fma,
-                                   ArrayRef<Type*>(x->getType()));
+      Value *fmaintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::fma, makeArrayRef(t));
 #if JL_LLVM_VERSION >= 30700
-      return builder.CreateCall(fmaintr,{ FP(x), FP(y), FP(z) });
+      return builder.CreateCall(fmaintr, {x, y, z});
 #else
-      return builder.CreateCall3(fmaintr, FP(x), FP(y), FP(z));
+      return builder.CreateCall3(fmaintr, x, y, z);
 #endif
     }
-    case muladd_float:
+    case muladd_float: {
 #if JL_LLVM_VERSION >= 30400
-    {
       assert(y->getType() == x->getType());
       assert(z->getType() == y->getType());
+      Value *muladdintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::fmuladd, makeArrayRef(t));
 #if JL_LLVM_VERSION >= 30700
-      return builder.CreateCall
+      return builder.CreateCall(muladdintr, {x, y, z});
 #else
-      return builder.CreateCall3
+      return builder.CreateCall3(muladdintr, x, y, z);
 #endif
-        (Intrinsic::getDeclaration(jl_Module, Intrinsic::fmuladd,
-                                   ArrayRef<Type*>(x->getType())),
-         #if JL_LLVM_VERSION >= 30700
-         {FP(x), FP(y), FP(z)}
-         #else
-         FP(x), FP(y), FP(z)
-         #endif
-        );
-    }
 #else
-      return math_builder(ctx, true)().
-        CreateFAdd(builder.CreateFMul(FP(x), FP(y)), FP(z));
+      return math_builder(ctx, true)().CreateFAdd(builder.CreateFMul(x, y), z);
 #endif
+    }
 
     case checked_sadd_int:
     case checked_uadd_int:
@@ -1144,8 +1077,7 @@ static Value *emit_untyped_intrinsic(intrinsic f, Value *x, Value *y, Value *z,
     case checked_usub_int:
     case checked_smul_int:
     case checked_umul_int: {
-        Value *ix = JL_INT(x); Value *iy = JL_INT(y);
-        assert(ix->getType() == iy->getType());
+        assert(x->getType() == y->getType());
         Value *intr =
             Intrinsic::getDeclaration(jl_Module,
                 f==checked_sadd_int ?
@@ -1159,296 +1091,216 @@ static Value *emit_untyped_intrinsic(intrinsic f, Value *x, Value *y, Value *z,
                    (f==checked_smul_int ?
                     Intrinsic::smul_with_overflow :
                     Intrinsic::umul_with_overflow)))),
-                ArrayRef<Type*>(ix->getType()));
+                makeArrayRef(t));
 #if JL_LLVM_VERSION >= 30700
-        Value *res = builder.CreateCall(intr,{ix, iy});
+        Value *res = builder.CreateCall(intr, {x, y});
 #else
-        Value *res = builder.CreateCall2(intr, ix, iy);
+        Value *res = builder.CreateCall2(intr, x, y);
 #endif
         Value *obit = builder.CreateExtractValue(res, ArrayRef<unsigned>(1));
         raise_exception_if(obit, literal_pointer_val(jl_overflow_exception), ctx);
         return builder.CreateExtractValue(res, ArrayRef<unsigned>(0));
     }
 
-    case checked_sdiv_int:
-        den = JL_INT(y);
-        t = den->getType();
-        x = JL_INT(x);
-
-        typemin = builder.CreateShl(ConstantInt::get(t,1),
-                                    x->getType()->getPrimitiveSizeInBits()-1);
-        raise_exception_unless(builder.
-                               CreateAnd(builder.
-                                         CreateICmpNE(den, ConstantInt::get(t,0)),
-                                         builder.
-                                         CreateOr(builder.
-                                                  CreateICmpNE(den,
-                                                               ConstantInt::get(t,-1,true)),
-                                                  builder.CreateICmpNE(x, typemin))),
-                               literal_pointer_val(jl_diverror_exception), ctx);
+    case checked_sdiv_int: {
+        Value *typemin = builder.CreateShl(ConstantInt::get(t, 1), t->getPrimitiveSizeInBits() - 1);
+        raise_exception_unless(
+                builder.CreateAnd(
+                    builder.CreateICmpNE(y, ConstantInt::get(t, 0)),
+                    builder.CreateOr(
+                        builder.CreateICmpNE(y, ConstantInt::get(t, -1, true)),
+                        builder.CreateICmpNE(x, typemin))),
+                literal_pointer_val(jl_diverror_exception), ctx);
 
-        return builder.CreateSDiv(x, den);
+        return builder.CreateSDiv(x, y);
+    }
     case checked_udiv_int:
-        den = JL_INT(y);
-        t = den->getType();
-        raise_exception_unless(builder.CreateICmpNE(den, ConstantInt::get(t,0)),
+        raise_exception_unless(builder.CreateICmpNE(y, ConstantInt::get(t, 0)),
                                literal_pointer_val(jl_diverror_exception), ctx);
-        return builder.CreateUDiv(JL_INT(x), den);
+        return builder.CreateUDiv(x, y);
 
     case checked_srem_int:
-        return emit_checked_srem_int(JL_INT(x), JL_INT(y), ctx);
+        return emit_checked_srem_int(x, y, ctx);
 
     case checked_urem_int:
-        den = JL_INT(y);
-        t = den->getType();
-        raise_exception_unless(builder.CreateICmpNE(den, ConstantInt::get(t,0)),
+        raise_exception_unless(builder.CreateICmpNE(y, ConstantInt::get(t, 0)),
                                literal_pointer_val(jl_diverror_exception), ctx);
-        return builder.CreateURem(JL_INT(x), den);
+        return builder.CreateURem(x, y);
 
     case check_top_bit:
         // raise InexactError if argument's top bit is set
-        x = JL_INT(x);
-        raise_exception_if(builder.
-                           CreateTrunc(builder.
-                                       CreateLShr(x, ConstantInt::get(t, t->getPrimitiveSizeInBits()-1)),
-                                       T_int1),
-                           literal_pointer_val(jl_inexact_exception), ctx);
+        raise_exception_if(
+                builder.CreateTrunc(
+                    builder.CreateLShr(x, ConstantInt::get(t, t->getPrimitiveSizeInBits() - 1)),
+                    T_int1),
+                literal_pointer_val(jl_inexact_exception), ctx);
         return x;
 
-    case eq_int:  *newtyp = jl_bool_type; return builder.CreateICmpEQ(JL_INT(x), JL_INT(y));
-    case ne_int:  *newtyp = jl_bool_type; return builder.CreateICmpNE(JL_INT(x), JL_INT(y));
-    case slt_int: *newtyp = jl_bool_type; return builder.CreateICmpSLT(JL_INT(x), JL_INT(y));
-    case ult_int: *newtyp = jl_bool_type; return builder.CreateICmpULT(JL_INT(x), JL_INT(y));
-    case sle_int: *newtyp = jl_bool_type; return builder.CreateICmpSLE(JL_INT(x), JL_INT(y));
-    case ule_int: *newtyp = jl_bool_type; return builder.CreateICmpULE(JL_INT(x), JL_INT(y));
+    case eq_int:  *newtyp = jl_bool_type; return builder.CreateICmpEQ(x, y);
+    case ne_int:  *newtyp = jl_bool_type; return builder.CreateICmpNE(x, y);
+    case slt_int: *newtyp = jl_bool_type; return builder.CreateICmpSLT(x, y);
+    case ult_int: *newtyp = jl_bool_type; return builder.CreateICmpULT(x, y);
+    case sle_int: *newtyp = jl_bool_type; return builder.CreateICmpSLE(x, y);
+    case ule_int: *newtyp = jl_bool_type; return builder.CreateICmpULE(x, y);
 
-    case eq_float: *newtyp = jl_bool_type; return math_builder(ctx)().CreateFCmpOEQ(FP(x), FP(y));
-    case ne_float: *newtyp = jl_bool_type; return math_builder(ctx)().CreateFCmpUNE(FP(x), FP(y));
-    case lt_float: *newtyp = jl_bool_type; return math_builder(ctx)().CreateFCmpOLT(FP(x), FP(y));
-    case le_float: *newtyp = jl_bool_type; return math_builder(ctx)().CreateFCmpOLE(FP(x), FP(y));
+    case eq_float: *newtyp = jl_bool_type; return math_builder(ctx)().CreateFCmpOEQ(x, y);
+    case ne_float: *newtyp = jl_bool_type; return math_builder(ctx)().CreateFCmpUNE(x, y);
+    case lt_float: *newtyp = jl_bool_type; return math_builder(ctx)().CreateFCmpOLT(x, y);
+    case le_float: *newtyp = jl_bool_type; return math_builder(ctx)().CreateFCmpOLE(x, y);
 
-    case eq_float_fast: *newtyp = jl_bool_type; return math_builder(ctx, true)().CreateFCmpOEQ(FP(x), FP(y));
-    case ne_float_fast: *newtyp = jl_bool_type; return math_builder(ctx, true)().CreateFCmpUNE(FP(x), FP(y));
-    case lt_float_fast: *newtyp = jl_bool_type; return math_builder(ctx, true)().CreateFCmpOLT(FP(x), FP(y));
-    case le_float_fast: *newtyp = jl_bool_type; return math_builder(ctx, true)().CreateFCmpOLE(FP(x), FP(y));
+    case eq_float_fast: *newtyp = jl_bool_type; return math_builder(ctx, true)().CreateFCmpOEQ(x, y);
+    case ne_float_fast: *newtyp = jl_bool_type; return math_builder(ctx, true)().CreateFCmpUNE(x, y);
+    case lt_float_fast: *newtyp = jl_bool_type; return math_builder(ctx, true)().CreateFCmpOLT(x, y);
+    case le_float_fast: *newtyp = jl_bool_type; return math_builder(ctx, true)().CreateFCmpOLE(x, y);
 
     case fpiseq: {
         *newtyp = jl_bool_type;
-        Value *xi = JL_INT(x);
-        Value *yi = JL_INT(y);
-        x = FP(x);
-        fy = FP(y);
+        Type *it = INTT(t);
+        Value *xi = builder.CreateBitCast(x, it);
+        Value *yi = builder.CreateBitCast(y, it);
         return builder.CreateOr(builder.CreateAnd(builder.CreateFCmpUNO(x, x),
-                                                  builder.CreateFCmpUNO(fy, fy)),
+                                                  builder.CreateFCmpUNO(y, y)),
                                 builder.CreateICmpEQ(xi, yi));
     }
 
     case fpislt: {
         *newtyp = jl_bool_type;
-        Value *xi = JL_INT(x);
-        Value *yi = JL_INT(y);
-        x = FP(x);
-        fy = FP(y);
+        Type *it = INTT(t);
+        Value *xi = builder.CreateBitCast(x, it);
+        Value *yi = builder.CreateBitCast(y, it);
         return builder.CreateOr(
             builder.CreateAnd(
                 builder.CreateFCmpORD(x, x),
-                builder.CreateFCmpUNO(fy, fy)
-            ),
+                builder.CreateFCmpUNO(y, y)),
             builder.CreateAnd(
-                builder.CreateFCmpORD(x, fy),
+                builder.CreateFCmpORD(x, y),
                 builder.CreateOr(
                     builder.CreateAnd(
-                        builder.CreateICmpSGE(xi, ConstantInt::get(xi->getType(), 0)),
-                        builder.CreateICmpSLT(xi, yi)
-                    ),
+                        builder.CreateICmpSGE(xi, ConstantInt::get(it, 0)),
+                        builder.CreateICmpSLT(xi, yi)),
                     builder.CreateAnd(
-                        builder.CreateICmpSLT(xi, ConstantInt::get(xi->getType(), 0)),
-                        builder.CreateICmpUGT(xi, yi)
-                    )
-                )
-            )
-        );
+                        builder.CreateICmpSLT(xi, ConstantInt::get(it, 0)),
+                        builder.CreateICmpUGT(xi, yi)))));
     }
 
-    case and_int: return builder.CreateAnd(JL_INT(x), JL_INT(y));
-    case or_int:  return builder.CreateOr(JL_INT(x), JL_INT(y));
-    case xor_int: return builder.CreateXor(JL_INT(x), JL_INT(y));
-    case not_int: return builder.CreateXor(JL_INT(x), ConstantInt::get(t, -1, true));
+    case and_int: return builder.CreateAnd(x, y);
+    case or_int:  return builder.CreateOr(x, y);
+    case xor_int: return builder.CreateXor(x, y);
+
     case shl_int:
-        x = JL_INT(x); y = JL_INT(y);
-        return builder.
-            CreateSelect(builder.
-                         CreateICmpUGE(y, ConstantInt::get(y->getType(),
-                                                           x->getType()->getPrimitiveSizeInBits())),
-                         ConstantInt::get(x->getType(),0),
-                         builder.CreateShl(x, uint_cnvt(t,y)));
+        return builder.CreateSelect(
+                builder.CreateICmpUGE(y, ConstantInt::get(y->getType(),
+                                                          t->getPrimitiveSizeInBits())),
+                ConstantInt::get(t, 0),
+                builder.CreateShl(x, uint_cnvt(t, y)));
     case lshr_int:
-        x = JL_INT(x); y = JL_INT(y);
-        return builder.
-            CreateSelect(builder.
-                         CreateICmpUGE(y, ConstantInt::get(y->getType(),
-                                                           x->getType()->getPrimitiveSizeInBits())),
-                         ConstantInt::get(x->getType(),0),
-                         builder.CreateLShr(x, uint_cnvt(t,y)));
+        return builder.CreateSelect(
+                builder.CreateICmpUGE(y, ConstantInt::get(y->getType(),
+                                                          t->getPrimitiveSizeInBits())),
+                ConstantInt::get(t, 0),
+                builder.CreateLShr(x, uint_cnvt(t, y)));
     case ashr_int:
-        x = JL_INT(x); y = JL_INT(y);
-        return builder.
-            CreateSelect(builder.
-                         CreateICmpUGE(y, ConstantInt::get(y->getType(),
-                                                           x->getType()->getPrimitiveSizeInBits())),
-                         builder.CreateAShr(x, ConstantInt::get(x->getType(),
-                                                                x->getType()->getPrimitiveSizeInBits()-1)),
-                         builder.CreateAShr(x, uint_cnvt(t,y)));
-    case bswap_int:
-        x = JL_INT(x);
-        return builder.CreateCall(
-            Intrinsic::getDeclaration(jl_Module, Intrinsic::bswap,
-                                      ArrayRef<Type*>(x->getType())), x);
-    case ctpop_int:
-        x = JL_INT(x);
-        return builder.CreateCall(
-            Intrinsic::getDeclaration(jl_Module, Intrinsic::ctpop,
-                                      ArrayRef<Type*>(x->getType())), x);
+        return builder.CreateSelect(
+                builder.CreateICmpUGE(y, ConstantInt::get(y->getType(),
+                                                          t->getPrimitiveSizeInBits())),
+                builder.CreateAShr(x, ConstantInt::get(t, t->getPrimitiveSizeInBits() - 1)),
+                builder.CreateAShr(x, uint_cnvt(t, y)));
+
+    case bswap_int: {
+        Value *bswapintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::bswap, makeArrayRef(t));
+        return builder.CreateCall(bswapintr, x);
+    }
+    case ctpop_int: {
+        Value *ctpopintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::ctpop, makeArrayRef(t));
+        return builder.CreateCall(ctpopintr, x);
+    }
     case ctlz_int: {
-        x = JL_INT(x);
-        Type *types[1] = {x->getType()};
-        Value *ctlz = Intrinsic::getDeclaration(jl_Module, Intrinsic::ctlz,
-                                      ArrayRef<Type*>(types));
+        Value *ctlz = Intrinsic::getDeclaration(jl_Module, Intrinsic::ctlz, makeArrayRef(t));
+        y = ConstantInt::get(T_int1, 0);
 #if JL_LLVM_VERSION >= 30700
-        return builder.CreateCall(ctlz, {x, ConstantInt::get(T_int1,0)});
+        return builder.CreateCall(ctlz, {x, y});
 #else
-        return builder.CreateCall2(ctlz, x, ConstantInt::get(T_int1,0));
+        return builder.CreateCall2(ctlz, x, y);
 #endif
     }
     case cttz_int: {
-        x = JL_INT(x);
-        Type *types[1] = {x->getType()};
-        Value *cttz = Intrinsic::getDeclaration(jl_Module, Intrinsic::cttz, ArrayRef<Type*>(types));
+        Value *cttz = Intrinsic::getDeclaration(jl_Module, Intrinsic::cttz, makeArrayRef(t));
+        y = ConstantInt::get(T_int1, 0);
 #if JL_LLVM_VERSION >= 30700
-        return builder.CreateCall(cttz, {x, ConstantInt::get(T_int1, 0)});
+        return builder.CreateCall(cttz, {x, y});
 #else
-        return builder.CreateCall2(cttz, x, ConstantInt::get(T_int1, 0));
+        return builder.CreateCall2(cttz, x, y);
 #endif
     }
 
-    case abs_float:
-    {
-        x = FP(x);
+    case abs_float: {
+        Value *absintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::fabs, makeArrayRef(t));
 #if JL_LLVM_VERSION >= 30400
-        return builder.CreateCall(
-            Intrinsic::getDeclaration(jl_Module, Intrinsic::fabs,
-                                                            ArrayRef<Type*>(x->getType())),
-                                  x);
+        return builder.CreateCall(absintr, x);
 #else
-        Type *intt = JL_INTT(x->getType());
-        Value *bits = emit_bitcast(FP(x), intt);
+        Type *intt = INTT(t);
+        Value *bits = builder.CreateBitCast(x, intt);
         Value *absbits =
             builder.CreateAnd(bits,
-                              ConstantInt::get(intt, APInt::getSignedMaxValue(((IntegerType*)intt)->getBitWidth())));
-        return emit_bitcast(absbits, x->getType());
+                              ConstantInt::get(intt, APInt::getSignedMaxValue(cast<IntegerType>(intt)->getBitWidth())));
+        return builder.CreateBitCast(absbits, t);
 #endif
     }
-    case copysign_float:
-    {
-        x = FP(x);
-        fy = FP(y);
-        Type *intt = JL_INTT(x->getType());
-        Value *bits = emit_bitcast(x, intt);
-        Value *sbits = emit_bitcast(fy, intt);
-        unsigned nb = ((IntegerType*)intt)->getBitWidth();
+    case copysign_float: {
+        Value *bits = builder.CreateBitCast(x, t);
+        Value *sbits = builder.CreateBitCast(y, t);
+        unsigned nb = cast<IntegerType>(t)->getBitWidth();
         APInt notsignbit = APInt::getSignedMaxValue(nb);
-        APInt signbit0(nb, 0); signbit0.setBit(nb-1);
-        Value *rbits =
-            builder.CreateOr(builder.CreateAnd(bits,
-                                               ConstantInt::get(intt,
-                                                                notsignbit)),
-                             builder.CreateAnd(sbits,
-                                               ConstantInt::get(intt,
-                                                                signbit0)));
-        return emit_bitcast(rbits, x->getType());
+        APInt signbit0(nb, 0); signbit0.setBit(nb - 1);
+        return builder.CreateOr(
+                    builder.CreateAnd(bits, ConstantInt::get(t, notsignbit)),
+                    builder.CreateAnd(sbits, ConstantInt::get(t, signbit0)));
     }
-    case flipsign_int:
-    {
-        x = JL_INT(x);
-        fy = JL_INT(y);
-        Type *intt = x->getType();
+    case flipsign_int: {
         ConstantInt *cx = dyn_cast<ConstantInt>(x);
-        ConstantInt *cy = dyn_cast<ConstantInt>(fy);
+        ConstantInt *cy = dyn_cast<ConstantInt>(y);
         if (cx && cy) {
             APInt ix = cx->getValue();
             APInt iy = cy->getValue();
-            return ConstantInt::get(intt, iy.isNonNegative() ? ix : -ix);
+            return ConstantInt::get(t, iy.isNonNegative() ? ix : -ix);
         }
         if (cy) {
             APInt iy = cy->getValue();
-            return iy.isNonNegative() ? x : builder.CreateSub(ConstantInt::get(intt,0), x);
+            return iy.isNonNegative() ? x : builder.CreateSub(ConstantInt::get(t, 0), x);
         }
-        Value *tmp = builder.CreateAShr(fy, ConstantInt::get(intt,((IntegerType*)intt)->getBitWidth()-1));
-        return builder.CreateXor(builder.CreateAdd(x,tmp),tmp);
+        Value *tmp = builder.CreateAShr(y, ConstantInt::get(t, cast<IntegerType>(t)->getBitWidth() - 1));
+        return builder.CreateXor(builder.CreateAdd(x, tmp), tmp);
     }
     case ceil_llvm: {
-        x = FP(x);
-        return builder.CreateCall(Intrinsic::getDeclaration(jl_Module, Intrinsic::ceil,
-                                                            ArrayRef<Type*>(x->getType())),
-                                  x);
+        Value *ceilintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::ceil, makeArrayRef(t));
+        return builder.CreateCall(ceilintr, x);
     }
     case floor_llvm: {
-        x = FP(x);
-        return builder.CreateCall(Intrinsic::getDeclaration(jl_Module, Intrinsic::floor,
-                                                            ArrayRef<Type*>(x->getType())),
-                                  x);
+        Value *floorintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::floor, makeArrayRef(t));
+        return builder.CreateCall(floorintr, x);
     }
     case trunc_llvm: {
-        x = FP(x);
-        return builder.CreateCall(Intrinsic::getDeclaration(jl_Module, Intrinsic::trunc,
-                                                            ArrayRef<Type*>(x->getType())),
-                                  x);
+        Value *truncintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::trunc, makeArrayRef(t));
+        return builder.CreateCall(truncintr, x);
     }
     case rint_llvm: {
-        x = FP(x);
-        return builder.CreateCall(Intrinsic::getDeclaration(jl_Module, Intrinsic::rint,
-                                                            ArrayRef<Type*>(x->getType())),
-                                  x);
+        Value *rintintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::rint, makeArrayRef(t));
+        return builder.CreateCall(rintintr, x);
     }
-    case sqrt_llvm: {
-        x = FP(x);
-        raise_exception_unless(builder.CreateFCmpUGE(x, ConstantFP::get(x->getType(),0.0)),
+    case sqrt_llvm:
+        raise_exception_unless(builder.CreateFCmpUGE(x, ConstantFP::get(t, 0.0)),
                                literal_pointer_val(jl_domain_exception), ctx);
-        return builder.CreateCall(Intrinsic::getDeclaration(jl_Module, Intrinsic::sqrt,
-                                                            ArrayRef<Type*>(x->getType())),
-                                  x);
-    }
-    case powi_llvm: {
-        x = FP(x);
-        y = JL_INT(y);
-        Type *tx = x->getType(); // TODO: LLVM expects this to be i32
-#if JL_LLVM_VERSION >= 30600
-        Type *ts[1] = { tx };
-        Value *powi = Intrinsic::getDeclaration(jl_Module, Intrinsic::powi,
-            ArrayRef<Type*>(ts));
-#if JL_LLVM_VERSION >= 30700
-        return builder.CreateCall(powi, {x, y});
-#else
-        return builder.CreateCall2(powi, x, y);
-#endif
-#else
-        // issue #6506
-        return builder.CreateCall2(prepare_call(tx == T_float64 ? jlpow_func : jlpowf_func),
-                x, builder.CreateSIToFP(y, tx));
-#endif
-    }
+        // fall-through
     case sqrt_llvm_fast: {
-        x = FP(x);
-        return builder.CreateCall(Intrinsic::getDeclaration(jl_Module, Intrinsic::sqrt,
-                                                            ArrayRef<Type*>(x->getType())),
-                                  x);
+        Value *sqrtintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::sqrt, makeArrayRef(t));
+        return builder.CreateCall(sqrtintr, x);
     }
 
     default:
-        assert(false);
+        assert(0 && "invalid intrinsic");
+        abort();
     }
-    assert(false);
-    return NULL;
+    assert(0 && "unreachable");
 }
 
 #define BOX_F(ct,jl_ct)                                                 \