Skip to content

Commit

Permalink
auto merge of #15998 : luqmana/rust/nmnnbd, r=thestinger
Browse files Browse the repository at this point in the history
LLVM recently added a new attribute, dereferenceable: http://reviews.llvm.org/D4449

>This patch adds a dereferencable attribute. In some sense, this is a companion to the nonnull attribute, but specifies that the pointer is known to be dereferencable in the same sense as a pointer generated by alloca is known to be dereferencable.

With rust, everywhere that we previously marked `nonnull` we can actually mark as `dereferenceable` (which implies nonnull) since we know the size. That is, except for one case: when generating calls for TyVisitor. It seems like we haven't substituted the self type (so we have `ty_param`) and just treat it as an opaque pointer so I just left that bit as nonnull.

With this, LLVM can for example hoist a load out of a loop where it previously couldn't:

```Rust
pub fn baz(c: &uint, n: uint) -> uint {
    let mut res = 0;
    for i in range(0, n) {
        if i > 0 {
            res += *c * i;
        }
    }
    res
}
```

Before:
```llvm
define i64 @baz(i64* noalias nocapture nonnull readonly, i64) unnamed_addr #0 {
entry-block:
  br label %for_loopback.outer

for_loopback.outer:                               ; preds = %then-block-33-, %entry-block
  %.ph = phi i64 [ %.lcssa, %then-block-33- ], [ 0, %entry-block ]
  %res.0.ph = phi i64 [ %8, %then-block-33- ], [ 0, %entry-block ]
  br label %for_loopback

for_exit:                                         ; preds = %for_loopback
  %res.0.ph.lcssa = phi i64 [ %res.0.ph, %for_loopback ]
  ret i64 %res.0.ph.lcssa

for_loopback:                                     ; preds = %for_loopback.outer, %for_body
  %2 = phi i64 [ %4, %for_body ], [ %.ph, %for_loopback.outer ]
  %3 = icmp ult i64 %2, %1
  br i1 %3, label %for_body, label %for_exit

for_body:                                         ; preds = %for_loopback
  %4 = add i64 %2, 1
  %5 = icmp eq i64 %2, 0
  br i1 %5, label %for_loopback, label %then-block-33-

then-block-33-:                                   ; preds = %for_body
  %.lcssa = phi i64 [ %4, %for_body ]
  %.lcssa15 = phi i64 [ %2, %for_body ]
  %6 = load i64* %0, align 8                     ; <------- this load
  %7 = mul i64 %6, %.lcssa15
  %8 = add i64 %7, %res.0.ph
  br label %for_loopback.outer
}
```

After:
```llvm
define i64 @baz(i64* noalias nocapture readonly dereferenceable(8), i64) unnamed_addr #0 {
entry-block:
  %2 = load i64* %0, align 8                    ; <------- load once instead
  br label %for_loopback.outer

for_loopback.outer:                               ; preds = %then-block-33-, %entry-block
  %.ph = phi i64 [ %.lcssa, %then-block-33- ], [ 0, %entry-block ]
  %res.0.ph = phi i64 [ %8, %then-block-33- ], [ 0, %entry-block ]
  br label %for_loopback

for_exit:                                         ; preds = %for_loopback
  %res.0.ph.lcssa = phi i64 [ %res.0.ph, %for_loopback ]
  ret i64 %res.0.ph.lcssa

for_loopback:                                     ; preds = %for_loopback.outer, %for_body
  %3 = phi i64 [ %5, %for_body ], [ %.ph, %for_loopback.outer ]
  %4 = icmp ult i64 %3, %1
  br i1 %4, label %for_body, label %for_exit

for_body:                                         ; preds = %for_loopback
  %5 = add i64 %3, 1
  %6 = icmp eq i64 %3, 0
  br i1 %6, label %for_loopback, label %then-block-33-

then-block-33-:                                   ; preds = %for_body
  %.lcssa = phi i64 [ %5, %for_body ]
  %.lcssa15 = phi i64 [ %3, %for_body ]
  %7 = mul i64 %2, %.lcssa15
  %8 = add i64 %7, %res.0.ph
  br label %for_loopback.outer
}
```
  • Loading branch information
bors committed Jul 26, 2014
2 parents 50c62b4 + a78c0f1 commit 7aa4079
Show file tree
Hide file tree
Showing 10 changed files with 237 additions and 81 deletions.
122 changes: 77 additions & 45 deletions src/librustc/middle/trans/base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -281,11 +281,7 @@ pub fn decl_rust_fn(ccx: &CrateContext, fn_ty: ty::t, name: &str) -> ValueRef {

let llfn = decl_fn(ccx, name, llvm::CCallConv, llfty, output);
let attrs = get_fn_llvm_attributes(ccx, fn_ty);
for &(idx, attr) in attrs.iter() {
unsafe {
llvm::LLVMAddFunctionAttribute(llfn, idx as c_uint, attr);
}
}
attrs.apply_llfn(llfn);

llfn
}
Expand Down Expand Up @@ -962,7 +958,7 @@ pub fn invoke<'a>(
llargs.as_slice(),
normal_bcx.llbb,
landing_pad,
attributes.as_slice());
Some(attributes));
return (llresult, normal_bcx);
} else {
debug!("calling {} at {}", llfn, bcx.llbb);
Expand All @@ -975,7 +971,7 @@ pub fn invoke<'a>(
None => debuginfo::clear_source_location(bcx.fcx)
};

let llresult = Call(bcx, llfn, llargs.as_slice(), attributes.as_slice());
let llresult = Call(bcx, llfn, llargs.as_slice(), Some(attributes));
return (llresult, bcx);
}
}
Expand Down Expand Up @@ -1081,7 +1077,7 @@ pub fn call_lifetime_start(cx: &Block, ptr: ValueRef) {
let llsize = C_u64(ccx, machine::llsize_of_alloc(ccx, val_ty(ptr).element_type()));
let ptr = PointerCast(cx, ptr, Type::i8p(ccx));
let lifetime_start = ccx.get_intrinsic(&"llvm.lifetime.start");
Call(cx, lifetime_start, [llsize, ptr], []);
Call(cx, lifetime_start, [llsize, ptr], None);
}

pub fn call_lifetime_end(cx: &Block, ptr: ValueRef) {
Expand All @@ -1095,7 +1091,7 @@ pub fn call_lifetime_end(cx: &Block, ptr: ValueRef) {
let llsize = C_u64(ccx, machine::llsize_of_alloc(ccx, val_ty(ptr).element_type()));
let ptr = PointerCast(cx, ptr, Type::i8p(ccx));
let lifetime_end = ccx.get_intrinsic(&"llvm.lifetime.end");
Call(cx, lifetime_end, [llsize, ptr], []);
Call(cx, lifetime_end, [llsize, ptr], None);
}

pub fn call_memcpy(cx: &Block, dst: ValueRef, src: ValueRef, n_bytes: ValueRef, align: u32) {
Expand All @@ -1111,7 +1107,7 @@ pub fn call_memcpy(cx: &Block, dst: ValueRef, src: ValueRef, n_bytes: ValueRef,
let size = IntCast(cx, n_bytes, ccx.int_type);
let align = C_i32(ccx, align as i32);
let volatile = C_bool(ccx, false);
Call(cx, memcpy, [dst_ptr, src_ptr, size, align, volatile], []);
Call(cx, memcpy, [dst_ptr, src_ptr, size, align, volatile], None);
}

pub fn memcpy_ty(bcx: &Block, dst: ValueRef, src: ValueRef, t: ty::t) {
Expand Down Expand Up @@ -1156,7 +1152,7 @@ fn memzero(b: &Builder, llptr: ValueRef, ty: Type) {
let size = machine::llsize_of(ccx, ty);
let align = C_i32(ccx, llalign_of_min(ccx, ty) as i32);
let volatile = C_bool(ccx, false);
b.call(llintrinsicfn, [llptr, llzeroval, size, align, volatile], []);
b.call(llintrinsicfn, [llptr, llzeroval, size, align, volatile], None);
}

pub fn alloc_ty(bcx: &Block, t: ty::t, name: &str) -> ValueRef {
Expand Down Expand Up @@ -2040,7 +2036,7 @@ fn register_fn(ccx: &CrateContext,
}

pub fn get_fn_llvm_attributes(ccx: &CrateContext, fn_ty: ty::t)
-> Vec<(uint, u64)> {
-> llvm::AttrBuilder {
use middle::ty::{BrAnon, ReLateBound};

let (fn_sig, abi, has_env) = match ty::get(fn_ty).sty {
Expand All @@ -2056,31 +2052,33 @@ pub fn get_fn_llvm_attributes(ccx: &CrateContext, fn_ty: ty::t)
_ => fail!("expected closure or function.")
};

// Since index 0 is the return value of the llvm func, we start
// at either 1 or 2 depending on whether there's an env slot or not
let mut first_arg_offset = if has_env { 2 } else { 1 };
let mut attrs = llvm::AttrBuilder::new();
let ret_ty = fn_sig.output;

// These have an odd calling convention, so we skip them for now.
//
// FIXME(pcwalton): We don't have to skip them; just untuple the result.
if abi == RustCall {
return Vec::new()
return attrs;
}

// Since index 0 is the return value of the llvm func, we start
// at either 1 or 2 depending on whether there's an env slot or not
let mut first_arg_offset = if has_env { 2 } else { 1 };
let mut attrs = Vec::new();
let ret_ty = fn_sig.output;

// A function pointer is called without the declaration
// available, so we have to apply any attributes with ABI
// implications directly to the call instruction. Right now,
// the only attribute we need to worry about is `sret`.
if type_of::return_uses_outptr(ccx, ret_ty) {
attrs.push((1, llvm::StructRetAttribute as u64));
let llret_sz = llsize_of_real(ccx, type_of::type_of(ccx, ret_ty));

// The outptr can be noalias and nocapture because it's entirely
// invisible to the program. We can also mark it as nonnull
attrs.push((1, llvm::NoAliasAttribute as u64));
attrs.push((1, llvm::NoCaptureAttribute as u64));
attrs.push((1, llvm::NonNullAttribute as u64));
// invisible to the program. We also know it's nonnull as well
// as how many bytes we can dereference
attrs.arg(1, llvm::StructRetAttribute)
.arg(1, llvm::NoAliasAttribute)
.arg(1, llvm::NoCaptureAttribute)
.arg(1, llvm::DereferenceableAttribute(llret_sz));

// Add one more since there's an outptr
first_arg_offset += 1;
Expand All @@ -2094,27 +2092,28 @@ pub fn get_fn_llvm_attributes(ccx: &CrateContext, fn_ty: ty::t)
ty::ty_str | ty::ty_vec(..) | ty::ty_trait(..) => true, _ => false
} => {}
ty::ty_uniq(_) => {
attrs.push((llvm::ReturnIndex as uint, llvm::NoAliasAttribute as u64));
attrs.ret(llvm::NoAliasAttribute);
}
_ => {}
}

// We can also mark the return value as `nonnull` in certain cases
// We can also mark the return value as `dereferenceable` in certain cases
match ty::get(ret_ty).sty {
// These are not really pointers but pairs, (pointer, len)
ty::ty_uniq(it) |
ty::ty_rptr(_, ty::mt { ty: it, .. }) if match ty::get(it).sty {
ty::ty_str | ty::ty_vec(..) | ty::ty_trait(..) => true, _ => false
} => {}
ty::ty_uniq(_) | ty::ty_rptr(_, _) => {
attrs.push((llvm::ReturnIndex as uint, llvm::NonNullAttribute as u64));
ty::ty_uniq(inner) | ty::ty_rptr(_, ty::mt { ty: inner, .. }) => {
let llret_sz = llsize_of_real(ccx, type_of::type_of(ccx, inner));
attrs.ret(llvm::DereferenceableAttribute(llret_sz));
}
_ => {}
}

match ty::get(ret_ty).sty {
ty::ty_bool => {
attrs.push((llvm::ReturnIndex as uint, llvm::ZExtAttribute as u64));
attrs.ret(llvm::ZExtAttribute);
}
_ => {}
}
Expand All @@ -2124,44 +2123,77 @@ pub fn get_fn_llvm_attributes(ccx: &CrateContext, fn_ty: ty::t)
match ty::get(t).sty {
// this needs to be first to prevent fat pointers from falling through
_ if !type_is_immediate(ccx, t) => {
let llarg_sz = llsize_of_real(ccx, type_of::type_of(ccx, t));

// For non-immediate arguments the callee gets its own copy of
// the value on the stack, so there are no aliases. It's also
// program-invisible so can't possibly capture
attrs.push((idx, llvm::NoAliasAttribute as u64));
attrs.push((idx, llvm::NoCaptureAttribute as u64));
attrs.push((idx, llvm::NonNullAttribute as u64));
attrs.arg(idx, llvm::NoAliasAttribute)
.arg(idx, llvm::NoCaptureAttribute)
.arg(idx, llvm::DereferenceableAttribute(llarg_sz));
}

ty::ty_bool => {
attrs.push((idx, llvm::ZExtAttribute as u64));
attrs.arg(idx, llvm::ZExtAttribute);
}

// `~` pointer parameters never alias because ownership is transferred
ty::ty_uniq(_) => {
attrs.push((idx, llvm::NoAliasAttribute as u64));
attrs.push((idx, llvm::NonNullAttribute as u64));
ty::ty_uniq(inner) => {
let llsz = llsize_of_real(ccx, type_of::type_of(ccx, inner));

attrs.arg(idx, llvm::NoAliasAttribute)
.arg(idx, llvm::DereferenceableAttribute(llsz));
}

// The visit glue deals only with opaque pointers so we don't
// actually know the concrete type of Self thus we don't know how
// many bytes to mark as dereferenceable so instead we just mark
// it as nonnull which still holds true
ty::ty_rptr(b, ty::mt { ty: it, mutbl }) if match ty::get(it).sty {
ty::ty_param(_) => true, _ => false
} && mutbl == ast::MutMutable => {
attrs.arg(idx, llvm::NoAliasAttribute)
.arg(idx, llvm::NonNullAttribute);

match b {
ReLateBound(_, BrAnon(_)) => {
attrs.arg(idx, llvm::NoCaptureAttribute);
}
_ => {}
}
}

// `&mut` pointer parameters never alias other parameters, or mutable global data
// `&` pointer parameters never alias either (for LLVM's purposes) as long as the
// interior is safe
ty::ty_rptr(b, mt) if mt.mutbl == ast::MutMutable ||
!ty::type_contents(ccx.tcx(), mt.ty).interior_unsafe() => {
attrs.push((idx, llvm::NoAliasAttribute as u64));
attrs.push((idx, llvm::NonNullAttribute as u64));

let llsz = llsize_of_real(ccx, type_of::type_of(ccx, mt.ty));
attrs.arg(idx, llvm::NoAliasAttribute)
.arg(idx, llvm::DereferenceableAttribute(llsz));

match b {
ReLateBound(_, BrAnon(_)) => {
attrs.push((idx, llvm::NoCaptureAttribute as u64));
attrs.arg(idx, llvm::NoCaptureAttribute);
}
_ => {}
}
}

// When a reference in an argument has no named lifetime, it's impossible for that
// reference to escape this function (returned or stored beyond the call by a closure).
ty::ty_rptr(ReLateBound(_, BrAnon(_)), _) => {
attrs.push((idx, llvm::NoCaptureAttribute as u64));
attrs.push((idx, llvm::NonNullAttribute as u64));
ty::ty_rptr(ReLateBound(_, BrAnon(_)), mt) => {
let llsz = llsize_of_real(ccx, type_of::type_of(ccx, mt.ty));
attrs.arg(idx, llvm::NoCaptureAttribute)
.arg(idx, llvm::DereferenceableAttribute(llsz));
}
// & pointer parameters are never null
ty::ty_rptr(_, _) => {
attrs.push((idx, llvm::NonNullAttribute as u64));

// & pointer parameters are also never null and we know exactly how
// many bytes we can dereference
ty::ty_rptr(_, mt) => {
let llsz = llsize_of_real(ccx, type_of::type_of(ccx, mt.ty));
attrs.arg(idx, llvm::DereferenceableAttribute(llsz));
}
_ => ()
}
Expand Down
8 changes: 4 additions & 4 deletions src/librustc/middle/trans/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
#![allow(non_snake_case_functions)]

use llvm;
use llvm::{CallConv, AtomicBinOp, AtomicOrdering, AsmDialect};
use llvm::{CallConv, AtomicBinOp, AtomicOrdering, AsmDialect, AttrBuilder};
use llvm::{Opcode, IntPredicate, RealPredicate};
use llvm::{ValueRef, BasicBlockRef};
use middle::trans::common::*;
Expand Down Expand Up @@ -113,7 +113,7 @@ pub fn Invoke(cx: &Block,
args: &[ValueRef],
then: BasicBlockRef,
catch: BasicBlockRef,
attributes: &[(uint, u64)])
attributes: Option<AttrBuilder>)
-> ValueRef {
if cx.unreachable.get() {
return C_null(Type::i8(cx.ccx()));
Expand Down Expand Up @@ -681,13 +681,13 @@ pub fn InlineAsmCall(cx: &Block, asm: *const c_char, cons: *const c_char,
}

pub fn Call(cx: &Block, fn_: ValueRef, args: &[ValueRef],
attributes: &[(uint, u64)]) -> ValueRef {
attributes: Option<AttrBuilder>) -> ValueRef {
if cx.unreachable.get() { return _UndefReturn(cx, fn_); }
B(cx).call(fn_, args, attributes)
}

pub fn CallWithConv(cx: &Block, fn_: ValueRef, args: &[ValueRef], conv: CallConv,
attributes: &[(uint, u64)]) -> ValueRef {
attributes: Option<AttrBuilder>) -> ValueRef {
if cx.unreachable.get() { return _UndefReturn(cx, fn_); }
B(cx).call_with_conv(fn_, args, conv, attributes)
}
Expand Down
22 changes: 12 additions & 10 deletions src/librustc/middle/trans/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#![allow(dead_code)] // FFI wrappers

use llvm;
use llvm::{CallConv, AtomicBinOp, AtomicOrdering, AsmDialect};
use llvm::{CallConv, AtomicBinOp, AtomicOrdering, AsmDialect, AttrBuilder};
use llvm::{Opcode, IntPredicate, RealPredicate, False};
use llvm::{ValueRef, BasicBlockRef, BuilderRef, ModuleRef};
use middle::trans::base;
Expand Down Expand Up @@ -155,7 +155,7 @@ impl<'a> Builder<'a> {
args: &[ValueRef],
then: BasicBlockRef,
catch: BasicBlockRef,
attributes: &[(uint, u64)])
attributes: Option<AttrBuilder>)
-> ValueRef {
self.count_insn("invoke");

Expand All @@ -174,8 +174,9 @@ impl<'a> Builder<'a> {
then,
catch,
noname());
for &(idx, attr) in attributes.iter() {
llvm::LLVMAddCallSiteAttribute(v, idx as c_uint, attr);
match attributes {
Some(a) => a.apply_callsite(v),
None => {}
}
v
}
Expand Down Expand Up @@ -777,7 +778,7 @@ impl<'a> Builder<'a> {
c, noname(), False, False)
}
});
self.call(asm, [], []);
self.call(asm, [], None);
}
}

Expand All @@ -802,12 +803,12 @@ impl<'a> Builder<'a> {
unsafe {
let v = llvm::LLVMInlineAsm(
fty.to_ref(), asm, cons, volatile, alignstack, dia as c_uint);
self.call(v, inputs, [])
self.call(v, inputs, None)
}
}

pub fn call(&self, llfn: ValueRef, args: &[ValueRef],
attributes: &[(uint, u64)]) -> ValueRef {
attributes: Option<AttrBuilder>) -> ValueRef {
self.count_insn("call");

debug!("Call {} with args ({})",
Expand All @@ -820,15 +821,16 @@ impl<'a> Builder<'a> {
unsafe {
let v = llvm::LLVMBuildCall(self.llbuilder, llfn, args.as_ptr(),
args.len() as c_uint, noname());
for &(idx, attr) in attributes.iter() {
llvm::LLVMAddCallSiteAttribute(v, idx as c_uint, attr);
match attributes {
Some(a) => a.apply_callsite(v),
None => {}
}
v
}
}

pub fn call_with_conv(&self, llfn: ValueRef, args: &[ValueRef],
conv: CallConv, attributes: &[(uint, u64)]) -> ValueRef {
conv: CallConv, attributes: Option<AttrBuilder>) -> ValueRef {
self.count_insn("callwithconv");
let v = self.call(llfn, args, attributes);
llvm::SetInstructionCallConv(v, conv);
Expand Down
2 changes: 1 addition & 1 deletion src/librustc/middle/trans/closure.rs
Original file line number Diff line number Diff line change
Expand Up @@ -588,7 +588,7 @@ pub fn get_wrapper_for_bare_fn(ccx: &CrateContext,
}
llargs.extend(args.iter().map(|arg| arg.val));

let retval = Call(bcx, fn_ptr, llargs.as_slice(), []);
let retval = Call(bcx, fn_ptr, llargs.as_slice(), None);
if type_is_zero_size(ccx, f.sig.output) || fcx.llretptr.get().is_some() {
RetVoid(bcx);
} else {
Expand Down
2 changes: 1 addition & 1 deletion src/librustc/middle/trans/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -553,7 +553,7 @@ fn trans_index<'a>(bcx: &'a Block<'a>,
let expected = Call(bcx,
expect,
[bounds_check, C_bool(ccx, false)],
[]);
None);
bcx = with_cond(bcx, expected, |bcx| {
controlflow::trans_fail_bounds_check(bcx,
index_expr.span,
Expand Down
Loading

0 comments on commit 7aa4079

Please sign in to comment.