diff --git a/src/librustc/session/config.rs b/src/librustc/session/config.rs index 7c1d457a6eeca..6ef64ef3d96d9 100644 --- a/src/librustc/session/config.rs +++ b/src/librustc/session/config.rs @@ -409,9 +409,7 @@ impl_stable_hash_for!(struct self::OutputFilenames { outputs }); -/// Codegen unit names generated by the numbered naming scheme will contain this -/// marker right before the index of the codegen unit. -pub const NUMBERED_CODEGEN_UNIT_MARKER: &'static str = ".cgu-"; +pub const RUST_CGU_EXT: &str = "rust-cgu"; impl OutputFilenames { pub fn path(&self, flavor: OutputType) -> PathBuf { @@ -442,22 +440,14 @@ impl OutputFilenames { let mut extension = String::new(); if let Some(codegen_unit_name) = codegen_unit_name { - if codegen_unit_name.contains(NUMBERED_CODEGEN_UNIT_MARKER) { - // If we use the numbered naming scheme for modules, we don't want - // the files to look like ... - // but simply .. - let marker_offset = codegen_unit_name.rfind(NUMBERED_CODEGEN_UNIT_MARKER) - .unwrap(); - let index_offset = marker_offset + NUMBERED_CODEGEN_UNIT_MARKER.len(); - extension.push_str(&codegen_unit_name[index_offset .. ]); - } else { - extension.push_str(codegen_unit_name); - }; + extension.push_str(codegen_unit_name); } if !ext.is_empty() { if !extension.is_empty() { extension.push_str("."); + extension.push_str(RUST_CGU_EXT); + extension.push_str("."); } extension.push_str(ext); @@ -1105,6 +1095,8 @@ options! {DebuggingOptions, DebuggingSetter, basic_debugging_options, "run the non-lexical lifetimes MIR pass"), trans_time_graph: bool = (false, parse_bool, [UNTRACKED], "generate a graphical HTML report of time spent in trans and LLVM"), + thinlto: bool = (false, parse_bool, [TRACKED], + "enable ThinLTO when possible"), } pub fn default_lib_output() -> CrateType { diff --git a/src/librustc_llvm/build.rs b/src/librustc_llvm/build.rs index dde7a38efc796..676bbc87b6cdb 100644 --- a/src/librustc_llvm/build.rs +++ b/src/librustc_llvm/build.rs @@ -115,6 +115,7 @@ fn main() { "linker", "asmparser", "mcjit", + "lto", "interpreter", "instrumentation"]; diff --git a/src/librustc_llvm/ffi.rs b/src/librustc_llvm/ffi.rs index fd4a136f50ba4..3399bf2acd891 100644 --- a/src/librustc_llvm/ffi.rs +++ b/src/librustc_llvm/ffi.rs @@ -345,6 +345,20 @@ pub enum PassKind { Module, } +/// LLVMRustThinLTOData +pub enum ThinLTOData {} + +/// LLVMRustThinLTOBuffer +pub enum ThinLTOBuffer {} + +/// LLVMRustThinLTOModule +#[repr(C)] +pub struct ThinLTOModule { + pub identifier: *const c_char, + pub data: *const u8, + pub len: usize, +} + // Opaque pointer types #[allow(missing_copy_implementations)] pub enum Module_opaque {} @@ -1271,6 +1285,9 @@ extern "C" { PM: PassManagerRef, Internalize: Bool, RunInliner: Bool); + pub fn LLVMRustPassManagerBuilderPopulateThinLTOPassManager( + PMB: PassManagerBuilderRef, + PM: PassManagerRef) -> bool; // Stuff that's in rustllvm/ because it's not upstream yet. @@ -1685,4 +1702,43 @@ extern "C" { pub fn LLVMRustModuleBufferLen(p: *const ModuleBuffer) -> usize; pub fn LLVMRustModuleBufferFree(p: *mut ModuleBuffer); pub fn LLVMRustModuleCost(M: ModuleRef) -> u64; + + pub fn LLVMRustThinLTOAvailable() -> bool; + pub fn LLVMRustWriteThinBitcodeToFile(PMR: PassManagerRef, + M: ModuleRef, + BC: *const c_char) -> bool; + pub fn LLVMRustThinLTOBufferCreate(M: ModuleRef) -> *mut ThinLTOBuffer; + pub fn LLVMRustThinLTOBufferFree(M: *mut ThinLTOBuffer); + pub fn LLVMRustThinLTOBufferPtr(M: *const ThinLTOBuffer) -> *const c_char; + pub fn LLVMRustThinLTOBufferLen(M: *const ThinLTOBuffer) -> size_t; + pub fn LLVMRustCreateThinLTOData( + Modules: *const ThinLTOModule, + NumModules: c_uint, + PreservedSymbols: *const *const c_char, + PreservedSymbolsLen: c_uint, + ) -> *mut ThinLTOData; + pub fn LLVMRustPrepareThinLTORename( + Data: *const ThinLTOData, + Module: ModuleRef, + ) -> bool; + pub fn LLVMRustPrepareThinLTOResolveWeak( + Data: *const ThinLTOData, + Module: ModuleRef, + ) -> bool; + pub fn LLVMRustPrepareThinLTOInternalize( + Data: *const ThinLTOData, + Module: ModuleRef, + ) -> bool; + pub fn LLVMRustPrepareThinLTOImport( + Data: *const ThinLTOData, + Module: ModuleRef, + ) -> bool; + pub fn LLVMRustFreeThinLTOData(Data: *mut ThinLTOData); + pub fn LLVMRustParseBitcodeForThinLTO( + Context: ContextRef, + Data: *const u8, + len: usize, + Identifier: *const c_char, + ) -> ModuleRef; + pub fn LLVMGetModuleIdentifier(M: ModuleRef, size: *mut usize) -> *const c_char; } diff --git a/src/librustc_trans/back/link.rs b/src/librustc_trans/back/link.rs index 3badc1b9a6986..3f25c182fa22a 100644 --- a/src/librustc_trans/back/link.rs +++ b/src/librustc_trans/back/link.rs @@ -16,6 +16,7 @@ use super::rpath::RPathConfig; use super::rpath; use metadata::METADATA_FILENAME; use rustc::session::config::{self, NoDebugInfo, OutputFilenames, OutputType, PrintRequest}; +use rustc::session::config::RUST_CGU_EXT; use rustc::session::filesearch; use rustc::session::search_paths::PathKind; use rustc::session::Session; @@ -45,13 +46,9 @@ use syntax::attr; /// The LLVM module name containing crate-metadata. This includes a `.` on /// purpose, so it cannot clash with the name of a user-defined module. pub const METADATA_MODULE_NAME: &'static str = "crate.metadata"; -/// The name of the crate-metadata object file the compiler generates. Must -/// match up with `METADATA_MODULE_NAME`. -pub const METADATA_OBJ_NAME: &'static str = "crate.metadata.o"; // same as for metadata above, but for allocator shim pub const ALLOCATOR_MODULE_NAME: &'static str = "crate.allocator"; -pub const ALLOCATOR_OBJ_NAME: &'static str = "crate.allocator.o"; pub use rustc_trans_utils::link::{find_crate_name, filename_for_input, default_output_for_target, invalid_output_for_target, build_link_meta, out_filename, @@ -129,6 +126,14 @@ fn command_path(sess: &Session) -> OsString { env::join_paths(new_path).unwrap() } +fn metadata_obj(outputs: &OutputFilenames) -> PathBuf { + outputs.temp_path(OutputType::Object, Some(METADATA_MODULE_NAME)) +} + +fn allocator_obj(outputs: &OutputFilenames) -> PathBuf { + outputs.temp_path(OutputType::Object, Some(ALLOCATOR_MODULE_NAME)) +} + pub fn remove(sess: &Session, path: &Path) { match fs::remove_file(path) { Ok(..) => {} @@ -174,9 +179,9 @@ pub fn link_binary(sess: &Session, remove(sess, &obj.object); } } - remove(sess, &outputs.with_extension(METADATA_OBJ_NAME)); + remove(sess, &metadata_obj(outputs)); if trans.allocator_module.is_some() { - remove(sess, &outputs.with_extension(ALLOCATOR_OBJ_NAME)); + remove(sess, &allocator_obj(outputs)); } } @@ -478,7 +483,7 @@ fn link_rlib<'a>(sess: &'a Session, RlibFlavor::StaticlibBase => { if trans.allocator_module.is_some() { - ab.add_file(&outputs.with_extension(ALLOCATOR_OBJ_NAME)); + ab.add_file(&allocator_obj(outputs)); } } } @@ -908,11 +913,11 @@ fn link_args(cmd: &mut Linker, // object file, so we link that in here. if crate_type == config::CrateTypeDylib || crate_type == config::CrateTypeProcMacro { - cmd.add_object(&outputs.with_extension(METADATA_OBJ_NAME)); + cmd.add_object(&metadata_obj(outputs)); } if trans.allocator_module.is_some() { - cmd.add_object(&outputs.with_extension(ALLOCATOR_OBJ_NAME)); + cmd.add_object(&allocator_obj(outputs)); } // Try to strip as much out of the generated object by removing unused @@ -1265,11 +1270,23 @@ fn add_upstream_rust_crates(cmd: &mut Linker, let canonical = f.replace("-", "_"); let canonical_name = name.replace("-", "_"); + // Look for `.rust-cgu.o` at the end of the filename to conclude + // that this is a Rust-related object file. + fn looks_like_rust(s: &str) -> bool { + let path = Path::new(s); + let ext = path.extension().and_then(|s| s.to_str()); + if ext != Some(OutputType::Object.extension()) { + return false + } + let ext2 = path.file_stem() + .and_then(|s| Path::new(s).extension()) + .and_then(|s| s.to_str()); + ext2 == Some(RUST_CGU_EXT) + } + let is_rust_object = - canonical.starts_with(&canonical_name) && { - let num = &f[name.len()..f.len() - 2]; - num.len() > 0 && num[1..].parse::().is_ok() - }; + canonical.starts_with(&canonical_name) && + looks_like_rust(&f); // If we've been requested to skip all native object files // (those not generated by the rust compiler) then we can skip diff --git a/src/librustc_trans/back/lto.rs b/src/librustc_trans/back/lto.rs index 8651b95b12a5e..8f75b891a302c 100644 --- a/src/librustc_trans/back/lto.rs +++ b/src/librustc_trans/back/lto.rs @@ -9,23 +9,25 @@ // except according to those terms. use back::bytecode::{DecodedBytecode, RLIB_BYTECODE_EXTENSION}; -use back::write; use back::symbol_export; -use rustc::session::config; +use back::write::{ModuleConfig, with_llvm_pmb, CodegenContext}; +use back::write; use errors::{FatalError, Handler}; -use llvm; use llvm::archive_ro::ArchiveRO; use llvm::{ModuleRef, TargetMachineRef, True, False}; +use llvm; +use rustc::hir::def_id::LOCAL_CRATE; use rustc::middle::exported_symbols::SymbolExportLevel; +use rustc::session::config; use rustc::util::common::time; -use rustc::hir::def_id::LOCAL_CRATE; -use back::write::{ModuleConfig, with_llvm_pmb, CodegenContext}; -use {ModuleTranslation, ModuleKind}; +use time_graph::Timeline; +use {ModuleTranslation, ModuleLlvm, ModuleKind, ModuleSource}; use libc; use std::ffi::CString; use std::slice; +use std::sync::Arc; pub fn crate_type_allows_lto(crate_type: config::CrateType) -> bool { match crate_type { @@ -45,14 +47,14 @@ pub enum LtoModuleTranslation { _serialized_bitcode: Vec, }, - // Note the lack of other entries in this enum! Ideally one day this gap is - // intended to be filled with a "Thin" LTO variant. + Thin(ThinModule), } impl LtoModuleTranslation { pub fn name(&self) -> &str { match *self { LtoModuleTranslation::Fat { .. } => "everything", + LtoModuleTranslation::Thin(ref m) => m.name(), } } @@ -62,7 +64,9 @@ impl LtoModuleTranslation { /// points to LLVM data structures owned by this `LtoModuleTranslation`. /// It's intended that the module returned is immediately code generated and /// dropped, and then this LTO module is dropped. - pub unsafe fn optimize(&mut self, cgcx: &CodegenContext) + pub unsafe fn optimize(&mut self, + cgcx: &CodegenContext, + timeline: &mut Timeline) -> Result { match *self { @@ -71,9 +75,11 @@ impl LtoModuleTranslation { let config = cgcx.config(trans.kind); let llmod = trans.llvm().unwrap().llmod; let tm = trans.llvm().unwrap().tm; - run_pass_manager(cgcx, tm, llmod, config); + run_pass_manager(cgcx, tm, llmod, config, false); + timeline.record("fat-done"); Ok(trans) } + LtoModuleTranslation::Thin(ref mut thin) => thin.optimize(cgcx, timeline), } } @@ -83,33 +89,31 @@ impl LtoModuleTranslation { match *self { // Only one module with fat LTO, so the cost doesn't matter. LtoModuleTranslation::Fat { .. } => 0, + LtoModuleTranslation::Thin(ref m) => m.cost(), } } } -pub fn run(cgcx: &CodegenContext, modules: Vec) +pub enum LTOMode { + WholeCrateGraph, + JustThisCrate, +} + +pub fn run(cgcx: &CodegenContext, + modules: Vec, + mode: LTOMode, + timeline: &mut Timeline) -> Result, FatalError> { let diag_handler = cgcx.create_diag_handler(); - if cgcx.opts.cg.prefer_dynamic { - diag_handler.struct_err("cannot prefer dynamic linking when performing LTO") - .note("only 'staticlib', 'bin', and 'cdylib' outputs are \ - supported with LTO") - .emit(); - return Err(FatalError) - } - - // Make sure we actually can run LTO - for crate_type in cgcx.crate_types.iter() { - if !crate_type_allows_lto(*crate_type) { - let e = diag_handler.fatal("lto can only be run for executables, cdylibs and \ - static library outputs"); - return Err(e) + let export_threshold = match mode { + LTOMode::WholeCrateGraph => { + symbol_export::crates_export_threshold(&cgcx.crate_types) } - } - - let export_threshold = - symbol_export::crates_export_threshold(&cgcx.crate_types); + LTOMode::JustThisCrate => { + SymbolExportLevel::Rust + } + }; let symbol_filter = &|&(ref name, _, level): &(String, _, SymbolExportLevel)| { if level.is_below_threshold(export_threshold) { @@ -121,55 +125,81 @@ pub fn run(cgcx: &CodegenContext, modules: Vec) } }; - let mut symbol_white_list: Vec = cgcx.exported_symbols[&LOCAL_CRATE] + let mut symbol_white_list = cgcx.exported_symbols[&LOCAL_CRATE] .iter() .filter_map(symbol_filter) - .collect(); - info!("{} symbols in whitelist", symbol_white_list.len()); + .collect::>(); + timeline.record("whitelist"); - // For each of our upstream dependencies, find the corresponding rlib and - // load the bitcode from the archive. Then merge it into the current LLVM - // module that we've got. + // If we're performing LTO for the entire crate graph, then for each of our + // upstream dependencies, find the corresponding rlib and load the bitcode + // from the archive. + // + // We save off all the bytecode and LLVM module ids for later processing + // with either fat or thin LTO let mut upstream_modules = Vec::new(); - for &(cnum, ref path) in cgcx.each_linked_rlib_for_lto.iter() { - symbol_white_list.extend( - cgcx.exported_symbols[&cnum] - .iter() - .filter_map(symbol_filter)); - info!("{} symbols in whitelist after {}", symbol_white_list.len(), cnum); - - let archive = ArchiveRO::open(&path).expect("wanted an rlib"); - let bytecodes = archive.iter().filter_map(|child| { - child.ok().and_then(|c| c.name().map(|name| (name, c))) - }).filter(|&(name, _)| name.ends_with(RLIB_BYTECODE_EXTENSION)); - for (name, data) in bytecodes { - info!("adding bytecode {}", name); - let bc_encoded = data.data(); - - let (bc, id) = time(cgcx.time_passes, &format!("decode {}", name), || { - match DecodedBytecode::new(bc_encoded) { - Ok(b) => Ok((b.bytecode(), b.identifier().to_string())), - Err(e) => Err(diag_handler.fatal(&e)), - } - })?; - let bc = SerializedModule::FromRlib(bc); - upstream_modules.push((bc, CString::new(id).unwrap())); + if let LTOMode::WholeCrateGraph = mode { + if cgcx.opts.cg.prefer_dynamic { + diag_handler.struct_err("cannot prefer dynamic linking when performing LTO") + .note("only 'staticlib', 'bin', and 'cdylib' outputs are \ + supported with LTO") + .emit(); + return Err(FatalError) + } + + // Make sure we actually can run LTO + for crate_type in cgcx.crate_types.iter() { + if !crate_type_allows_lto(*crate_type) { + let e = diag_handler.fatal("lto can only be run for executables, cdylibs and \ + static library outputs"); + return Err(e) + } } - } - // Internalize everything but the exported symbols of the current module - let arr: Vec<*const libc::c_char> = symbol_white_list.iter() - .map(|c| c.as_ptr()) - .collect(); + for &(cnum, ref path) in cgcx.each_linked_rlib_for_lto.iter() { + symbol_white_list.extend( + cgcx.exported_symbols[&cnum] + .iter() + .filter_map(symbol_filter)); + + let archive = ArchiveRO::open(&path).expect("wanted an rlib"); + let bytecodes = archive.iter().filter_map(|child| { + child.ok().and_then(|c| c.name().map(|name| (name, c))) + }).filter(|&(name, _)| name.ends_with(RLIB_BYTECODE_EXTENSION)); + for (name, data) in bytecodes { + info!("adding bytecode {}", name); + let bc_encoded = data.data(); + + let (bc, id) = time(cgcx.time_passes, &format!("decode {}", name), || { + match DecodedBytecode::new(bc_encoded) { + Ok(b) => Ok((b.bytecode(), b.identifier().to_string())), + Err(e) => Err(diag_handler.fatal(&e)), + } + })?; + let bc = SerializedModule::FromRlib(bc); + upstream_modules.push((bc, CString::new(id).unwrap())); + } + timeline.record(&format!("load: {}", path.display())); + } + } - fat_lto(cgcx, &diag_handler, modules, upstream_modules, &arr) + let arr = symbol_white_list.iter().map(|c| c.as_ptr()).collect::>(); + match mode { + LTOMode::WholeCrateGraph if !cgcx.thinlto => { + fat_lto(cgcx, &diag_handler, modules, upstream_modules, &arr, timeline) + } + _ => { + thin_lto(&diag_handler, modules, upstream_modules, &arr, timeline) + } + } } fn fat_lto(cgcx: &CodegenContext, diag_handler: &Handler, mut modules: Vec, mut serialized_modules: Vec<(SerializedModule, CString)>, - symbol_white_list: &[*const libc::c_char]) + symbol_white_list: &[*const libc::c_char], + timeline: &mut Timeline) -> Result, FatalError> { info!("going for a fat lto"); @@ -228,6 +258,7 @@ fn fat_lto(cgcx: &CodegenContext, Err(write::llvm_err(&diag_handler, msg)) } })?; + timeline.record(&format!("link {:?}", name)); serialized_bitcode.push(bc_decoded); } cgcx.save_temp_bitcode(&module, "lto.input"); @@ -248,6 +279,7 @@ fn fat_lto(cgcx: &CodegenContext, } cgcx.save_temp_bitcode(&module, "lto.after-nounwind"); } + timeline.record("passes"); Ok(vec![LtoModuleTranslation::Fat { module: Some(module), @@ -255,11 +287,143 @@ fn fat_lto(cgcx: &CodegenContext, }]) } +/// Prepare "thin" LTO to get run on these modules. +/// +/// The general structure of ThinLTO is quite different from the structure of +/// "fat" LTO above. With "fat" LTO all LLVM modules in question are merged into +/// one giant LLVM module, and then we run more optimization passes over this +/// big module after internalizing most symbols. Thin LTO, on the other hand, +/// avoid this large bottleneck through more targeted optimization. +/// +/// At a high level Thin LTO looks like: +/// +/// 1. Prepare a "summary" of each LLVM module in question which describes +/// the values inside, cost of the values, etc. +/// 2. Merge the summaries of all modules in question into one "index" +/// 3. Perform some global analysis on this index +/// 4. For each module, use the index and analysis calculated previously to +/// perform local transformations on the module, for example inlining +/// small functions from other modules. +/// 5. Run thin-specific optimization passes over each module, and then code +/// generate everything at the end. +/// +/// The summary for each module is intended to be quite cheap, and the global +/// index is relatively quite cheap to create as well. As a result, the goal of +/// ThinLTO is to reduce the bottleneck on LTO and enable LTO to be used in more +/// situations. For example one cheap optimization is that we can parallelize +/// all codegen modules, easily making use of all the cores on a machine. +/// +/// With all that in mind, the function here is designed at specifically just +/// calculating the *index* for ThinLTO. This index will then be shared amongst +/// all of the `LtoModuleTranslation` units returned below and destroyed once +/// they all go out of scope. +fn thin_lto(diag_handler: &Handler, + modules: Vec, + serialized_modules: Vec<(SerializedModule, CString)>, + symbol_white_list: &[*const libc::c_char], + timeline: &mut Timeline) + -> Result, FatalError> +{ + unsafe { + info!("going for that thin, thin LTO"); + + let mut thin_buffers = Vec::new(); + let mut module_names = Vec::new(); + let mut thin_modules = Vec::new(); + + // FIXME: right now, like with fat LTO, we serialize all in-memory + // modules before working with them and ThinLTO. We really + // shouldn't do this, however, and instead figure out how to + // extract a summary from an in-memory module and then merge that + // into the global index. It turns out that this loop is by far + // the most expensive portion of this small bit of global + // analysis! + for (i, module) in modules.iter().enumerate() { + info!("local module: {} - {}", i, module.llmod_id); + let llvm = module.llvm().expect("can't lto pretranslated module"); + let name = CString::new(module.llmod_id.clone()).unwrap(); + let buffer = llvm::LLVMRustThinLTOBufferCreate(llvm.llmod); + let buffer = ThinBuffer(buffer); + thin_modules.push(llvm::ThinLTOModule { + identifier: name.as_ptr(), + data: buffer.data().as_ptr(), + len: buffer.data().len(), + }); + thin_buffers.push(buffer); + module_names.push(name); + timeline.record(&module.llmod_id); + } + + // FIXME: All upstream crates are deserialized internally in the + // function below to extract their summary and modules. Note that + // unlike the loop above we *must* decode and/or read something + // here as these are all just serialized files on disk. An + // improvement, however, to make here would be to store the + // module summary separately from the actual module itself. Right + // now this is store in one large bitcode file, and the entire + // file is deflate-compressed. We could try to bypass some of the + // decompression by storing the index uncompressed and only + // lazily decompressing the bytecode if necessary. + // + // Note that truly taking advantage of this optimization will + // likely be further down the road. We'd have to implement + // incremental ThinLTO first where we could actually avoid + // looking at upstream modules entirely sometimes (the contents, + // we must always unconditionally look at the index). + let mut serialized = Vec::new(); + for (module, name) in serialized_modules { + info!("foreign module {:?}", name); + thin_modules.push(llvm::ThinLTOModule { + identifier: name.as_ptr(), + data: module.data().as_ptr(), + len: module.data().len(), + }); + serialized.push(module); + module_names.push(name); + } + + // Delegate to the C++ bindings to create some data here. Once this is a + // tried-and-true interface we may wish to try to upstream some of this + // to LLVM itself, right now we reimplement a lot of what they do + // upstream... + let data = llvm::LLVMRustCreateThinLTOData( + thin_modules.as_ptr(), + thin_modules.len() as u32, + symbol_white_list.as_ptr(), + symbol_white_list.len() as u32, + ); + if data.is_null() { + let msg = format!("failed to prepare thin LTO context"); + return Err(write::llvm_err(&diag_handler, msg)) + } + let data = ThinData(data); + info!("thin LTO data created"); + timeline.record("data"); + + // Throw our data in an `Arc` as we'll be sharing it across threads. We + // also put all memory referenced by the C++ data (buffers, ids, etc) + // into the arc as well. After this we'll create a thin module + // translation per module in this data. + let shared = Arc::new(ThinShared { + data, + thin_buffers, + serialized_modules: serialized, + module_names, + }); + Ok((0..shared.module_names.len()).map(|i| { + LtoModuleTranslation::Thin(ThinModule { + shared: shared.clone(), + idx: i, + }) + }).collect()) + } +} + fn run_pass_manager(cgcx: &CodegenContext, tm: TargetMachineRef, llmod: ModuleRef, - config: &ModuleConfig) { - + config: &ModuleConfig, + thin: bool) { // Now we have one massive module inside of llmod. Time to run the // LTO-specific optimization passes that LLVM provides. // @@ -274,9 +438,15 @@ fn run_pass_manager(cgcx: &CodegenContext, llvm::LLVMRustAddPass(pm, pass); with_llvm_pmb(llmod, config, &mut |b| { - llvm::LLVMPassManagerBuilderPopulateLTOPassManager(b, pm, - /* Internalize = */ False, - /* RunInliner = */ True); + if thin { + if !llvm::LLVMRustPassManagerBuilderPopulateThinLTOPassManager(b, pm) { + panic!("this version of LLVM does not support ThinLTO"); + } + } else { + llvm::LLVMPassManagerBuilderPopulateLTOPassManager(b, pm, + /* Internalize = */ False, + /* RunInliner = */ True); + } }); let pass = llvm::LLVMRustFindAndCreatePass("verify\0".as_ptr() as *const _); @@ -331,3 +501,158 @@ impl Drop for ModuleBuffer { unsafe { llvm::LLVMRustModuleBufferFree(self.0); } } } + +pub struct ThinModule { + shared: Arc, + idx: usize, +} + +struct ThinShared { + data: ThinData, + thin_buffers: Vec, + serialized_modules: Vec, + module_names: Vec, +} + +struct ThinData(*mut llvm::ThinLTOData); + +unsafe impl Send for ThinData {} +unsafe impl Sync for ThinData {} + +impl Drop for ThinData { + fn drop(&mut self) { + unsafe { + llvm::LLVMRustFreeThinLTOData(self.0); + } + } +} + +struct ThinBuffer(*mut llvm::ThinLTOBuffer); + +unsafe impl Send for ThinBuffer {} +unsafe impl Sync for ThinBuffer {} + +impl ThinBuffer { + fn data(&self) -> &[u8] { + unsafe { + let ptr = llvm::LLVMRustThinLTOBufferPtr(self.0) as *const _; + let len = llvm::LLVMRustThinLTOBufferLen(self.0); + slice::from_raw_parts(ptr, len) + } + } +} + +impl Drop for ThinBuffer { + fn drop(&mut self) { + unsafe { + llvm::LLVMRustThinLTOBufferFree(self.0); + } + } +} + +impl ThinModule { + fn name(&self) -> &str { + self.shared.module_names[self.idx].to_str().unwrap() + } + + fn cost(&self) -> u64 { + // Yes, that's correct, we're using the size of the bytecode as an + // indicator for how costly this codegen unit is. + self.data().len() as u64 + } + + fn data(&self) -> &[u8] { + let a = self.shared.thin_buffers.get(self.idx).map(|b| b.data()); + a.unwrap_or_else(|| { + let len = self.shared.thin_buffers.len(); + self.shared.serialized_modules[self.idx - len].data() + }) + } + + unsafe fn optimize(&mut self, cgcx: &CodegenContext, timeline: &mut Timeline) + -> Result + { + let diag_handler = cgcx.create_diag_handler(); + let tm = (cgcx.tm_factory)().map_err(|e| { + write::llvm_err(&diag_handler, e) + })?; + + // Right now the implementation we've got only works over serialized + // modules, so we create a fresh new LLVM context and parse the module + // into that context. One day, however, we may do this for upstream + // crates but for locally translated modules we may be able to reuse + // that LLVM Context and Module. + let llcx = llvm::LLVMContextCreate(); + let llmod = llvm::LLVMRustParseBitcodeForThinLTO( + llcx, + self.data().as_ptr(), + self.data().len(), + self.shared.module_names[self.idx].as_ptr(), + ); + assert!(!llmod.is_null()); + let mtrans = ModuleTranslation { + source: ModuleSource::Translated(ModuleLlvm { + llmod, + llcx, + tm, + }), + llmod_id: self.name().to_string(), + name: self.name().to_string(), + kind: ModuleKind::Regular, + }; + cgcx.save_temp_bitcode(&mtrans, "thin-lto-input"); + + // Like with "fat" LTO, get some better optimizations if landing pads + // are disabled by removing all landing pads. + if cgcx.no_landing_pads { + llvm::LLVMRustMarkAllFunctionsNounwind(llmod); + cgcx.save_temp_bitcode(&mtrans, "thin-lto-after-nounwind"); + timeline.record("nounwind"); + } + + // Up next comes the per-module local analyses that we do for Thin LTO. + // Each of these functions is basically copied from the LLVM + // implementation and then tailored to suit this implementation. Ideally + // each of these would be supported by upstream LLVM but that's perhaps + // a patch for another day! + // + // You can find some more comments about these functions in the LLVM + // bindings we've got (currently `PassWrapper.cpp`) + if !llvm::LLVMRustPrepareThinLTORename(self.shared.data.0, llmod) { + let msg = format!("failed to prepare thin LTO module"); + return Err(write::llvm_err(&diag_handler, msg)) + } + cgcx.save_temp_bitcode(&mtrans, "thin-lto-after-rename"); + timeline.record("rename"); + if !llvm::LLVMRustPrepareThinLTOResolveWeak(self.shared.data.0, llmod) { + let msg = format!("failed to prepare thin LTO module"); + return Err(write::llvm_err(&diag_handler, msg)) + } + cgcx.save_temp_bitcode(&mtrans, "thin-lto-after-resolve"); + timeline.record("resolve"); + if !llvm::LLVMRustPrepareThinLTOInternalize(self.shared.data.0, llmod) { + let msg = format!("failed to prepare thin LTO module"); + return Err(write::llvm_err(&diag_handler, msg)) + } + cgcx.save_temp_bitcode(&mtrans, "thin-lto-after-internalize"); + timeline.record("internalize"); + if !llvm::LLVMRustPrepareThinLTOImport(self.shared.data.0, llmod) { + let msg = format!("failed to prepare thin LTO module"); + return Err(write::llvm_err(&diag_handler, msg)) + } + cgcx.save_temp_bitcode(&mtrans, "thin-lto-after-import"); + timeline.record("import"); + + // Alright now that we've done everything related to the ThinLTO + // analysis it's time to run some optimizations! Here we use the same + // `run_pass_manager` as the "fat" LTO above except that we tell it to + // populate a thin-specific pass manager, which presumably LLVM treats a + // little differently. + info!("running thin lto passes over {}", mtrans.name); + let config = cgcx.config(mtrans.kind); + run_pass_manager(cgcx, tm, llmod, config, true); + cgcx.save_temp_bitcode(&mtrans, "thin-lto-after-pm"); + timeline.record("thin-done"); + Ok(mtrans) + } +} diff --git a/src/librustc_trans/back/write.rs b/src/librustc_trans/back/write.rs index 1988b9f76e401..b3b2384a02766 100644 --- a/src/librustc_trans/back/write.rs +++ b/src/librustc_trans/back/write.rs @@ -19,7 +19,7 @@ use rustc::session::config::{self, OutputFilenames, OutputType, OutputTypes, Pas AllPasses, Sanitizer}; use rustc::session::Session; use rustc::util::nodemap::FxHashMap; -use time_graph::{self, TimeGraph}; +use time_graph::{self, TimeGraph, Timeline}; use llvm; use llvm::{ModuleRef, TargetMachineRef, PassManagerRef, DiagnosticInfoRef}; use llvm::{SMDiagnosticRef, ContextRef}; @@ -303,6 +303,7 @@ pub struct CodegenContext { // Resouces needed when running LTO pub time_passes: bool, pub lto: bool, + pub thinlto: bool, pub no_landing_pads: bool, pub save_temps: bool, pub exported_symbols: Arc, @@ -315,6 +316,8 @@ pub struct CodegenContext { allocator_module_config: Arc, pub tm_factory: Arc Result + Send + Sync>, + // Number of cgus excluding the allocator/metadata modules + pub total_cgus: usize, // Handler to use for diagnostics produced during codegen. pub diag_emitter: SharedEmitter, // LLVM passes added by plugins. @@ -450,7 +453,8 @@ unsafe extern "C" fn diagnostic_handler(info: DiagnosticInfoRef, user: *mut c_vo unsafe fn optimize(cgcx: &CodegenContext, diag_handler: &Handler, mtrans: &ModuleTranslation, - config: &ModuleConfig) + config: &ModuleConfig, + timeline: &mut Timeline) -> Result<(), FatalError> { let (llmod, llcx, tm) = match mtrans.source { @@ -529,6 +533,7 @@ unsafe fn optimize(cgcx: &CodegenContext, // Finally, run the actual optimization passes time(config.time_passes, &format!("llvm function passes [{}]", module_name.unwrap()), || llvm::LLVMRustRunFunctionPassManager(fpm, llmod)); + timeline.record("fpm"); time(config.time_passes, &format!("llvm module passes [{}]", module_name.unwrap()), || llvm::LLVMRunPassManager(mpm, llmod)); @@ -543,7 +548,18 @@ fn generate_lto_work(cgcx: &CodegenContext, modules: Vec) -> Vec<(WorkItem, u64)> { - let lto_modules = lto::run(cgcx, modules).unwrap_or_else(|e| panic!(e)); + let mut timeline = cgcx.time_graph.as_ref().map(|tg| { + tg.start(TRANS_WORKER_TIMELINE, + TRANS_WORK_PACKAGE_KIND, + "generate lto") + }).unwrap_or(Timeline::noop()); + let mode = if cgcx.lto { + lto::LTOMode::WholeCrateGraph + } else { + lto::LTOMode::JustThisCrate + }; + let lto_modules = lto::run(cgcx, modules, mode, &mut timeline) + .unwrap_or_else(|e| panic!(e)); lto_modules.into_iter().map(|module| { let cost = module.cost(); @@ -554,9 +570,11 @@ fn generate_lto_work(cgcx: &CodegenContext, unsafe fn codegen(cgcx: &CodegenContext, diag_handler: &Handler, mtrans: ModuleTranslation, - config: &ModuleConfig) + config: &ModuleConfig, + timeline: &mut Timeline) -> Result { + timeline.record("codegen"); let (llmod, llcx, tm) = match mtrans.source { ModuleSource::Translated(ref llvm) => (llvm.llmod, llvm.llcx, llvm.tm), ModuleSource::Preexisting(_) => { @@ -601,7 +619,18 @@ unsafe fn codegen(cgcx: &CodegenContext, if write_bc { let bc_out_c = path2cstr(&bc_out); - llvm::LLVMWriteBitcodeToFile(llmod, bc_out_c.as_ptr()); + if llvm::LLVMRustThinLTOAvailable() { + with_codegen(tm, llmod, config.no_builtins, |cpm| { + llvm::LLVMRustWriteThinBitcodeToFile( + cpm, + llmod, + bc_out_c.as_ptr(), + ) + }); + } else { + llvm::LLVMWriteBitcodeToFile(llmod, bc_out_c.as_ptr()); + } + timeline.record("bc"); } time(config.time_passes, &format!("codegen passes [{}]", module_name.unwrap()), @@ -644,7 +673,8 @@ unsafe fn codegen(cgcx: &CodegenContext, with_codegen(tm, llmod, config.no_builtins, |cpm| { llvm::LLVMRustPrintModule(cpm, llmod, out.as_ptr(), demangle_callback); llvm::LLVMDisposePassManager(cpm); - }) + }); + timeline.record("ir"); } if config.emit_asm { @@ -665,6 +695,7 @@ unsafe fn codegen(cgcx: &CodegenContext, if config.emit_obj { llvm::LLVMDisposeModule(llmod); } + timeline.record("asm"); } if write_obj { @@ -672,6 +703,7 @@ unsafe fn codegen(cgcx: &CodegenContext, write_output_file(diag_handler, tm, cpm, llmod, &obj_out, llvm::FileType::ObjectFile) })?; + timeline.record("obj"); } Ok(()) @@ -712,7 +744,8 @@ pub fn start_async_translation(tcx: TyCtxt, time_graph: Option, link: LinkMeta, metadata: EncodedMetadata, - coordinator_receive: Receiver>) + coordinator_receive: Receiver>, + total_cgus: usize) -> OngoingCrateTranslation { let sess = tcx.sess; let crate_output = tcx.output_filenames(LOCAL_CRATE); @@ -836,6 +869,7 @@ pub fn start_async_translation(tcx: TyCtxt, shared_emitter, trans_worker_send, coordinator_receive, + total_cgus, client, time_graph.clone(), Arc::new(modules_config), @@ -1080,7 +1114,9 @@ enum WorkItemResult { NeedsLTO(ModuleTranslation), } -fn execute_work_item(cgcx: &CodegenContext, work_item: WorkItem) +fn execute_work_item(cgcx: &CodegenContext, + work_item: WorkItem, + timeline: &mut Timeline) -> Result { let diag_handler = cgcx.create_diag_handler(); @@ -1089,8 +1125,8 @@ fn execute_work_item(cgcx: &CodegenContext, work_item: WorkItem) WorkItem::Optimize(mtrans) => mtrans, WorkItem::LTO(mut lto) => { unsafe { - let module = lto.optimize(cgcx)?; - let module = codegen(cgcx, &diag_handler, module, config)?; + let module = lto.optimize(cgcx, timeline)?; + let module = codegen(cgcx, &diag_handler, module, config, timeline)?; return Ok(WorkItemResult::Compiled(module)) } } @@ -1140,9 +1176,27 @@ fn execute_work_item(cgcx: &CodegenContext, work_item: WorkItem) debug!("llvm-optimizing {:?}", module_name); unsafe { - optimize(cgcx, &diag_handler, &mtrans, config)?; - if !cgcx.lto || mtrans.kind == ModuleKind::Metadata { - let module = codegen(cgcx, &diag_handler, mtrans, config)?; + optimize(cgcx, &diag_handler, &mtrans, config, timeline)?; + + let lto = cgcx.lto; + + let auto_thin_lto = + cgcx.thinlto && + cgcx.total_cgus > 1 && + mtrans.kind != ModuleKind::Allocator; + + // If we're a metadata module we never participate in LTO. + // + // If LTO was explicitly requested on the command line, we always + // LTO everything else. + // + // If LTO *wasn't* explicitly requested and we're not a metdata + // module, then we may automatically do ThinLTO if we've got + // multiple codegen units. Note, however, that the allocator module + // doesn't participate here automatically because of linker + // shenanigans later on. + if mtrans.kind == ModuleKind::Metadata || (!lto && !auto_thin_lto) { + let module = codegen(cgcx, &diag_handler, mtrans, config, timeline)?; Ok(WorkItemResult::Compiled(module)) } else { Ok(WorkItemResult::NeedsLTO(mtrans)) @@ -1187,6 +1241,7 @@ fn start_executing_work(tcx: TyCtxt, shared_emitter: SharedEmitter, trans_worker_send: Sender, coordinator_receive: Receiver>, + total_cgus: usize, jobserver: Client, time_graph: Option, modules_config: Arc, @@ -1229,6 +1284,7 @@ fn start_executing_work(tcx: TyCtxt, crate_types: sess.crate_types.borrow().clone(), each_linked_rlib_for_lto, lto: sess.lto(), + thinlto: sess.opts.debugging_opts.thinlto, no_landing_pads: sess.no_landing_pads(), save_temps: sess.opts.cg.save_temps, opts: Arc::new(sess.opts.clone()), @@ -1246,6 +1302,7 @@ fn start_executing_work(tcx: TyCtxt, metadata_module_config: metadata_config, allocator_module_config: allocator_config, tm_factory: target_machine_factory(tcx.sess), + total_cgus, }; // This is the "main loop" of parallel work happening for parallel codegen. @@ -1743,12 +1800,13 @@ fn spawn_work(cgcx: CodegenContext, work: WorkItem) { // as a diagnostic was already sent off to the main thread - just // surface that there was an error in this worker. bomb.result = { - let _timing_guard = cgcx.time_graph.as_ref().map(|tg| { + let timeline = cgcx.time_graph.as_ref().map(|tg| { tg.start(time_graph::TimelineId(cgcx.worker), LLVM_WORK_PACKAGE_KIND, &work.name()) }); - execute_work_item(&cgcx, work).ok() + let mut timeline = timeline.unwrap_or(Timeline::noop()); + execute_work_item(&cgcx, work, &mut timeline).ok() }; }); } diff --git a/src/librustc_trans/base.rs b/src/librustc_trans/base.rs index 8c39f579b3ea6..5764b0b34d1c9 100644 --- a/src/librustc_trans/base.rs +++ b/src/librustc_trans/base.rs @@ -886,6 +886,11 @@ pub fn trans_crate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, check_for_rustc_errors_attr(tcx); + if tcx.sess.opts.debugging_opts.thinlto { + if unsafe { !llvm::LLVMRustThinLTOAvailable() } { + tcx.sess.fatal("this compiler's LLVM does not support ThinLTO"); + } + } let crate_hash = tcx.dep_graph .fingerprint_of(&DepNode::new_no_params(DepKind::Krate)); @@ -925,7 +930,8 @@ pub fn trans_crate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, time_graph.clone(), link_meta, metadata, - rx); + rx, + 1); ongoing_translation.submit_pre_translated_module_to_llvm(tcx, metadata_module); ongoing_translation.translation_finished(tcx); @@ -961,7 +967,8 @@ pub fn trans_crate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, time_graph.clone(), link_meta, metadata, - rx); + rx, + codegen_units.len()); // Translate an allocator shim, if any let allocator_module = if let Some(kind) = tcx.sess.allocator_kind.get() { @@ -1372,7 +1379,9 @@ fn compile_codegen_unit<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, // crashes if the module identifier is same as other symbols // such as a function name in the module. // 1. http://llvm.org/bugs/show_bug.cgi?id=11479 - let llmod_id = format!("{}.rs", cgu.name()); + let llmod_id = format!("{}-{}.rs", + cgu.name(), + tcx.crate_disambiguator(LOCAL_CRATE)); // Instantiate translation items without filling out definitions yet... let scx = SharedCrateContext::new(tcx); diff --git a/src/librustc_trans/partitioning.rs b/src/librustc_trans/partitioning.rs index 7b6daa7d13328..82ec1aaa41364 100644 --- a/src/librustc_trans/partitioning.rs +++ b/src/librustc_trans/partitioning.rs @@ -108,7 +108,6 @@ use rustc::dep_graph::{DepNode, WorkProductId}; use rustc::hir::def_id::DefId; use rustc::hir::map::DefPathData; use rustc::middle::trans::{Linkage, Visibility}; -use rustc::session::config::NUMBERED_CODEGEN_UNIT_MARKER; use rustc::ty::{self, TyCtxt, InstanceDef}; use rustc::ty::item_path::characteristic_def_id_of_type; use rustc::util::nodemap::{FxHashMap, FxHashSet}; @@ -627,7 +626,7 @@ fn compute_codegen_unit_name<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, } fn numbered_codegen_unit_name(crate_name: &str, index: usize) -> InternedString { - Symbol::intern(&format!("{}{}{}", crate_name, NUMBERED_CODEGEN_UNIT_MARKER, index)).as_str() + Symbol::intern(&format!("{}{}", crate_name, index)).as_str() } fn debug_dump<'a, 'b, 'tcx, I>(tcx: TyCtxt<'a, 'tcx, 'tcx>, diff --git a/src/librustc_trans/time_graph.rs b/src/librustc_trans/time_graph.rs index ec57af888e5f2..a8502682a806b 100644 --- a/src/librustc_trans/time_graph.rs +++ b/src/librustc_trans/time_graph.rs @@ -9,11 +9,12 @@ // except according to those terms. use std::collections::HashMap; +use std::fs::File; +use std::io::prelude::*; use std::marker::PhantomData; +use std::mem; use std::sync::{Arc, Mutex}; use std::time::Instant; -use std::io::prelude::*; -use std::fs::File; const OUTPUT_WIDTH_IN_PX: u64 = 1000; const TIME_LINE_HEIGHT_IN_PX: u64 = 20; @@ -25,6 +26,7 @@ struct Timing { end: Instant, work_package_kind: WorkPackageKind, name: String, + events: Vec<(String, Instant)>, } #[derive(Clone, Copy, Hash, Eq, PartialEq, Debug)] @@ -44,9 +46,14 @@ pub struct TimeGraph { #[derive(Clone, Copy)] pub struct WorkPackageKind(pub &'static [&'static str]); -pub struct RaiiToken { +pub struct Timeline { + token: Option, +} + +struct RaiiToken { graph: TimeGraph, timeline: TimelineId, + events: Vec<(String, Instant)>, // The token must not be Send: _marker: PhantomData<*const ()> } @@ -54,7 +61,7 @@ pub struct RaiiToken { impl Drop for RaiiToken { fn drop(&mut self) { - self.graph.end(self.timeline); + self.graph.end(self.timeline, mem::replace(&mut self.events, Vec::new())); } } @@ -68,7 +75,7 @@ impl TimeGraph { pub fn start(&self, timeline: TimelineId, work_package_kind: WorkPackageKind, - name: &str) -> RaiiToken { + name: &str) -> Timeline { { let mut table = self.data.lock().unwrap(); @@ -81,14 +88,17 @@ impl TimeGraph { data.open_work_package = Some((Instant::now(), work_package_kind, name.to_string())); } - RaiiToken { - graph: self.clone(), - timeline, - _marker: PhantomData, + Timeline { + token: Some(RaiiToken { + graph: self.clone(), + timeline, + events: Vec::new(), + _marker: PhantomData, + }), } } - fn end(&self, timeline: TimelineId) { + fn end(&self, timeline: TimelineId, events: Vec<(String, Instant)>) { let end = Instant::now(); let mut table = self.data.lock().unwrap(); @@ -100,6 +110,7 @@ impl TimeGraph { end, work_package_kind, name, + events, }); } else { bug!("end timing without start?") @@ -113,13 +124,13 @@ impl TimeGraph { assert!(data.open_work_package.is_none()); } - let mut timelines: Vec = + let mut threads: Vec = table.values().map(|data| data.clone()).collect(); - timelines.sort_by_key(|timeline| timeline.timings[0].start); + threads.sort_by_key(|timeline| timeline.timings[0].start); - let earliest_instant = timelines[0].timings[0].start; - let latest_instant = timelines.iter() + let earliest_instant = threads[0].timings[0].start; + let latest_instant = threads.iter() .map(|timeline| timeline.timings .last() .unwrap() @@ -130,16 +141,46 @@ impl TimeGraph { let mut file = File::create(format!("{}.html", output_filename)).unwrap(); - writeln!(file, "").unwrap(); - writeln!(file, "").unwrap(); - writeln!(file, "").unwrap(); + writeln!(file, " + + + + + +
+ ", + total_height = threads.len() * TIME_LINE_HEIGHT_STRIDE_IN_PX, + width = OUTPUT_WIDTH_IN_PX, + ).unwrap(); let mut color = 0; - - for (line_index, timeline) in timelines.iter().enumerate() { + for (line_index, thread) in threads.iter().enumerate() { let line_top = line_index * TIME_LINE_HEIGHT_STRIDE_IN_PX; - for span in &timeline.timings { + for span in &thread.timings { let start = distance(earliest_instant, span.start); let end = distance(earliest_instant, span.end); @@ -148,13 +189,13 @@ impl TimeGraph { let colors = span.work_package_kind.0; - writeln!(file, "
{}
", + writeln!(file, "{}", + color, line_top, start, end - start, @@ -167,8 +208,61 @@ impl TimeGraph { } } - writeln!(file, "").unwrap(); - writeln!(file, "").unwrap(); + writeln!(file, " +
+ ").unwrap(); + + let mut idx = 0; + for thread in threads.iter() { + for timing in &thread.timings { + let colors = timing.work_package_kind.0; + let height = TIME_LINE_HEIGHT_STRIDE_IN_PX * timing.events.len(); + writeln!(file, "
", + idx, + colors[idx % colors.len()], + height).unwrap(); + idx += 1; + let max = distance(timing.start, timing.end); + for (i, &(ref event, time)) in timing.events.iter().enumerate() { + let i = i as u64; + let time = distance(timing.start, time); + let at = normalize(time, max, OUTPUT_WIDTH_IN_PX); + writeln!(file, "{}", + at, + TIME_LINE_HEIGHT_IN_PX * i, + event).unwrap(); + } + writeln!(file, "
").unwrap(); + } + } + + writeln!(file, " + + + ").unwrap(); + } +} + +impl Timeline { + pub fn noop() -> Timeline { + Timeline { token: None } + } + + /// Record an event which happened at this moment on this timeline. + /// + /// Events are displayed in the eventual HTML output where you can click on + /// a particular timeline and it'll expand to all of the events that + /// happened on that timeline. This can then be used to drill into a + /// particular timeline and see what events are happening and taking the + /// most time. + pub fn record(&mut self, name: &str) { + if let Some(ref mut token) = self.token { + token.events.push((name.to_string(), Instant::now())); + } } } diff --git a/src/rustllvm/PassWrapper.cpp b/src/rustllvm/PassWrapper.cpp index 2f966e5a1c5e2..e37f048dd4757 100644 --- a/src/rustllvm/PassWrapper.cpp +++ b/src/rustllvm/PassWrapper.cpp @@ -26,7 +26,11 @@ #include "llvm/Transforms/IPO/PassManagerBuilder.h" #if LLVM_VERSION_GE(4, 0) +#include "llvm/Object/ModuleSummaryIndexObjectFile.h" #include "llvm/Transforms/IPO/AlwaysInliner.h" +#include "llvm/Transforms/IPO/FunctionImport.h" +#include "llvm/Transforms/Utils/FunctionImportUtils.h" +#include "llvm/LTO/LTO.h" #endif #include "llvm-c/Transforms/PassManagerBuilder.h" @@ -102,6 +106,19 @@ extern "C" void LLVMRustAddPass(LLVMPassManagerRef PMR, LLVMPassRef RustPass) { PMB->add(Pass); } +extern "C" +bool LLVMRustPassManagerBuilderPopulateThinLTOPassManager( + LLVMPassManagerBuilderRef PMBR, + LLVMPassManagerRef PMR +) { +#if LLVM_VERSION_GE(4, 0) + unwrap(PMBR)->populateThinLTOPassManager(*unwrap(PMR)); + return true; +#else + return false; +#endif +} + #ifdef LLVM_COMPONENT_X86 #define SUBTARGET_X86 SUBTARGET(X86) #else @@ -740,3 +757,447 @@ extern "C" void LLVMRustSetModulePIELevel(LLVMModuleRef M) { unwrap(M)->setPIELevel(PIELevel::Level::Large); #endif } + +extern "C" bool +LLVMRustThinLTOAvailable() { +#if LLVM_VERSION_GE(4, 0) + return true; +#else + return false; +#endif +} + +#if LLVM_VERSION_GE(4, 0) + +// Here you'll find an implementation of ThinLTO as used by the Rust compiler +// right now. This ThinLTO support is only enabled on "recent ish" versions of +// LLVM, and otherwise it's just blanket rejected from other compilers. +// +// Most of this implementation is straight copied from LLVM. At the time of +// this writing it wasn't *quite* suitable to reuse more code from upstream +// for our purposes, but we should strive to upstream this support once it's +// ready to go! I figure we may want a bit of testing locally first before +// sending this upstream to LLVM. I hear though they're quite eager to receive +// feedback like this! +// +// If you're reading this code and wondering "what in the world" or you're +// working "good lord by LLVM upgrade is *still* failing due to these bindings" +// then fear not! (ok maybe fear a little). All code here is mostly based +// on `lib/LTO/ThinLTOCodeGenerator.cpp` in LLVM. +// +// You'll find that the general layout here roughly corresponds to the `run` +// method in that file as well as `ProcessThinLTOModule`. Functions are +// specifically commented below as well, but if you're updating this code +// or otherwise trying to understand it, the LLVM source will be useful in +// interpreting the mysteries within. +// +// Otherwise I'll apologize in advance, it probably requires a relatively +// significant investment on your part to "truly understand" what's going on +// here. Not saying I do myself, but it took me awhile staring at LLVM's source +// and various online resources about ThinLTO to make heads or tails of all +// this. + +extern "C" bool +LLVMRustWriteThinBitcodeToFile(LLVMPassManagerRef PMR, + LLVMModuleRef M, + const char *BcFile) { + llvm::legacy::PassManager *PM = unwrap(PMR); + std::error_code EC; + llvm::raw_fd_ostream bc(BcFile, EC, llvm::sys::fs::F_None); + if (EC) { + LLVMRustSetLastError(EC.message().c_str()); + return false; + } + PM->add(createWriteThinLTOBitcodePass(bc)); + PM->run(*unwrap(M)); + delete PM; + return true; +} + +// This is a shared data structure which *must* be threadsafe to share +// read-only amongst threads. This also corresponds basically to the arguments +// of the `ProcessThinLTOModule` function in the LLVM source. +struct LLVMRustThinLTOData { + // The combined index that is the global analysis over all modules we're + // performing ThinLTO for. This is mostly managed by LLVM. + ModuleSummaryIndex Index; + + // All modules we may look at, stored as in-memory serialized versions. This + // is later used when inlining to ensure we can extract any module to inline + // from. + StringMap ModuleMap; + + // A set that we manage of everything we *don't* want internalized. Note that + // this includes all transitive references right now as well, but it may not + // always! + DenseSet GUIDPreservedSymbols; + + // Not 100% sure what these are, but they impact what's internalized and + // what's inlined across modules, I believe. + StringMap ImportLists; + StringMap ExportLists; + StringMap ModuleToDefinedGVSummaries; +}; + +// Just an argument to the `LLVMRustCreateThinLTOData` function below. +struct LLVMRustThinLTOModule { + const char *identifier; + const char *data; + size_t len; +}; + +// This is copied from `lib/LTO/ThinLTOCodeGenerator.cpp`, not sure what it +// does. +static const GlobalValueSummary * +getFirstDefinitionForLinker(const GlobalValueSummaryList &GVSummaryList) { + auto StrongDefForLinker = llvm::find_if( + GVSummaryList, [](const std::unique_ptr &Summary) { + auto Linkage = Summary->linkage(); + return !GlobalValue::isAvailableExternallyLinkage(Linkage) && + !GlobalValue::isWeakForLinker(Linkage); + }); + if (StrongDefForLinker != GVSummaryList.end()) + return StrongDefForLinker->get(); + + auto FirstDefForLinker = llvm::find_if( + GVSummaryList, [](const std::unique_ptr &Summary) { + auto Linkage = Summary->linkage(); + return !GlobalValue::isAvailableExternallyLinkage(Linkage); + }); + if (FirstDefForLinker == GVSummaryList.end()) + return nullptr; + return FirstDefForLinker->get(); +} + +// This is a helper function we added that isn't present in LLVM's source. +// +// The way LTO works in Rust is that we typically have a number of symbols that +// we know ahead of time need to be preserved. We want to ensure that ThinLTO +// doesn't accidentally internalize any of these and otherwise is always +// ready to keep them linking correctly. +// +// This function will recursively walk the `GUID` provided and all of its +// references, as specified in the `Index`. In other words, we're taking a +// `GUID` as input, adding it to `Preserved`, and then taking all `GUID` +// items that the input references and recursing. +static void +addPreservedGUID(const ModuleSummaryIndex &Index, + DenseSet &Preserved, + GlobalValue::GUID GUID) { + if (Preserved.count(GUID)) + return; + Preserved.insert(GUID); + + auto SummaryList = Index.findGlobalValueSummaryList(GUID); + if (SummaryList == Index.end()) + return; + for (auto &Summary : SummaryList->second) { + for (auto &Ref : Summary->refs()) { + if (Ref.isGUID()) { + addPreservedGUID(Index, Preserved, Ref.getGUID()); + } else { + auto Value = Ref.getValue(); + addPreservedGUID(Index, Preserved, Value->getGUID()); + } + } + + GlobalValueSummary *GVSummary = Summary.get(); + if (isa(GVSummary)) { + FunctionSummary *FS = cast(GVSummary); + for (auto &Call: FS->calls()) { + if (Call.first.isGUID()) { + addPreservedGUID(Index, Preserved, Call.first.getGUID()); + } else { + auto Value = Call.first.getValue(); + addPreservedGUID(Index, Preserved, Value->getGUID()); + } + } + for (auto &GUID: FS->type_tests()) { + addPreservedGUID(Index, Preserved, GUID); + } + } + } +} + +// The main entry point for creating the global ThinLTO analysis. The structure +// here is basically the same as before threads are spawned in the `run` +// function of `lib/LTO/ThinLTOCodeGenerator.cpp`. +extern "C" LLVMRustThinLTOData* +LLVMRustCreateThinLTOData(LLVMRustThinLTOModule *modules, + int num_modules, + const char **preserved_symbols, + int num_symbols) { + auto Ret = llvm::make_unique(); + + // Load each module's summary and merge it into one combined index + for (int i = 0; i < num_modules; i++) { + auto module = &modules[i]; + StringRef buffer(module->data, module->len); + MemoryBufferRef mem_buffer(buffer, module->identifier); + + Ret->ModuleMap[module->identifier] = mem_buffer; + + Expected> ObjOrErr = + object::ModuleSummaryIndexObjectFile::create(mem_buffer); + if (!ObjOrErr) { + LLVMRustSetLastError(toString(ObjOrErr.takeError()).c_str()); + return nullptr; + } + auto Index = (*ObjOrErr)->takeIndex(); + Ret->Index.mergeFrom(std::move(Index), i); + } + + // Collect for each module the list of function it defines (GUID -> Summary) + Ret->Index.collectDefinedGVSummariesPerModule(Ret->ModuleToDefinedGVSummaries); + + // Convert the preserved symbols set from string to GUID, this is then needed + // for internalization. We use `addPreservedGUID` to include any transitively + // used symbol as well. + for (int i = 0; i < num_symbols; i++) { + addPreservedGUID(Ret->Index, + Ret->GUIDPreservedSymbols, + GlobalValue::getGUID(preserved_symbols[i])); + } + + // Collect the import/export lists for all modules from the call-graph in the + // combined index + // + // This is copied from `lib/LTO/ThinLTOCodeGenerator.cpp` + computeDeadSymbols(Ret->Index, Ret->GUIDPreservedSymbols); + ComputeCrossModuleImport( + Ret->Index, + Ret->ModuleToDefinedGVSummaries, + Ret->ImportLists, + Ret->ExportLists + ); + + // Resolve LinkOnce/Weak symbols, this has to be computed early be cause it + // impacts the caching. + // + // This is copied from `lib/LTO/ThinLTOCodeGenerator.cpp` + StringMap> ResolvedODR; + DenseMap PrevailingCopy; + for (auto &I : Ret->Index) { + if (I.second.size() > 1) + PrevailingCopy[I.first] = getFirstDefinitionForLinker(I.second); + } + auto isPrevailing = [&](GlobalValue::GUID GUID, const GlobalValueSummary *S) { + const auto &Prevailing = PrevailingCopy.find(GUID); + if (Prevailing == PrevailingCopy.end()) + return true; + return Prevailing->second == S; + }; + auto recordNewLinkage = [&](StringRef ModuleIdentifier, + GlobalValue::GUID GUID, + GlobalValue::LinkageTypes NewLinkage) { + ResolvedODR[ModuleIdentifier][GUID] = NewLinkage; + }; + thinLTOResolveWeakForLinkerInIndex(Ret->Index, isPrevailing, recordNewLinkage); + auto isExported = [&](StringRef ModuleIdentifier, GlobalValue::GUID GUID) { + const auto &ExportList = Ret->ExportLists.find(ModuleIdentifier); + return (ExportList != Ret->ExportLists.end() && + ExportList->second.count(GUID)) || + Ret->GUIDPreservedSymbols.count(GUID); + }; + thinLTOInternalizeAndPromoteInIndex(Ret->Index, isExported); + + return Ret.release(); +} + +extern "C" void +LLVMRustFreeThinLTOData(LLVMRustThinLTOData *Data) { + delete Data; +} + +// Below are the various passes that happen *per module* when doing ThinLTO. +// +// In other words, these are the functions that are all run concurrently +// with one another, one per module. The passes here correspond to the analysis +// passes in `lib/LTO/ThinLTOCodeGenerator.cpp`, currently found in the +// `ProcessThinLTOModule` function. Here they're split up into separate steps +// so rustc can save off the intermediate bytecode between each step. + +extern "C" bool +LLVMRustPrepareThinLTORename(const LLVMRustThinLTOData *Data, LLVMModuleRef M) { + Module &Mod = *unwrap(M); + if (renameModuleForThinLTO(Mod, Data->Index)) { + LLVMRustSetLastError("renameModuleForThinLTO failed"); + return false; + } + return true; +} + +extern "C" bool +LLVMRustPrepareThinLTOResolveWeak(const LLVMRustThinLTOData *Data, LLVMModuleRef M) { + Module &Mod = *unwrap(M); + const auto &DefinedGlobals = Data->ModuleToDefinedGVSummaries.lookup(Mod.getModuleIdentifier()); + thinLTOResolveWeakForLinkerModule(Mod, DefinedGlobals); + return true; +} + +extern "C" bool +LLVMRustPrepareThinLTOInternalize(const LLVMRustThinLTOData *Data, LLVMModuleRef M) { + Module &Mod = *unwrap(M); + const auto &DefinedGlobals = Data->ModuleToDefinedGVSummaries.lookup(Mod.getModuleIdentifier()); + thinLTOInternalizeModule(Mod, DefinedGlobals); + return true; +} + +extern "C" bool +LLVMRustPrepareThinLTOImport(const LLVMRustThinLTOData *Data, LLVMModuleRef M) { + Module &Mod = *unwrap(M); + const auto &ImportList = Data->ImportLists.lookup(Mod.getModuleIdentifier()); + auto Loader = [&](StringRef Identifier) { + const auto &Memory = Data->ModuleMap.lookup(Identifier); + auto &Context = Mod.getContext(); + return getLazyBitcodeModule(Memory, Context, true, true); + }; + FunctionImporter Importer(Data->Index, Loader); + Expected Result = Importer.importFunctions(Mod, ImportList); + if (!Result) { + LLVMRustSetLastError(toString(Result.takeError()).c_str()); + return false; + } + return true; +} + +// This struct and various functions are sort of a hack right now, but the +// problem is that we've got in-memory LLVM modules after we generate and +// optimize all codegen-units for one compilation in rustc. To be compatible +// with the LTO support above we need to serialize the modules plus their +// ThinLTO summary into memory. +// +// This structure is basically an owned version of a serialize module, with +// a ThinLTO summary attached. +struct LLVMRustThinLTOBuffer { + std::string data; +}; + +extern "C" LLVMRustThinLTOBuffer* +LLVMRustThinLTOBufferCreate(LLVMModuleRef M) { + auto Ret = llvm::make_unique(); + { + raw_string_ostream OS(Ret->data); + { + legacy::PassManager PM; + PM.add(createWriteThinLTOBitcodePass(OS)); + PM.run(*unwrap(M)); + } + } + return Ret.release(); +} + +extern "C" void +LLVMRustThinLTOBufferFree(LLVMRustThinLTOBuffer *Buffer) { + delete Buffer; +} + +extern "C" const void* +LLVMRustThinLTOBufferPtr(const LLVMRustThinLTOBuffer *Buffer) { + return Buffer->data.data(); +} + +extern "C" size_t +LLVMRustThinLTOBufferLen(const LLVMRustThinLTOBuffer *Buffer) { + return Buffer->data.length(); +} + +// This is what we used to parse upstream bitcode for actual ThinLTO +// processing. We'll call this once per module optimized through ThinLTO, and +// it'll be called concurrently on many threads. +extern "C" LLVMModuleRef +LLVMRustParseBitcodeForThinLTO(LLVMContextRef Context, + const char *data, + size_t len, + const char *identifier) { + StringRef Data(data, len); + MemoryBufferRef Buffer(Data, identifier); + unwrap(Context)->enableDebugTypeODRUniquing(); + Expected> SrcOrError = + parseBitcodeFile(Buffer, *unwrap(Context)); + if (!SrcOrError) { + LLVMRustSetLastError(toString(SrcOrError.takeError()).c_str()); + return nullptr; + } + return wrap(std::move(*SrcOrError).release()); +} + +#else + +extern "C" bool +LLVMRustWriteThinBitcodeToFile(LLVMPassManagerRef PMR, + LLVMModuleRef M, + const char *BcFile) { + llvm_unreachable("ThinLTO not available"); +} + +struct LLVMRustThinLTOData { +}; + +struct LLVMRustThinLTOModule { +}; + +extern "C" LLVMRustThinLTOData* +LLVMRustCreateThinLTOData(LLVMRustThinLTOModule *modules, + int num_modules, + const char **preserved_symbols, + int num_symbols) { + llvm_unreachable("ThinLTO not available"); +} + +extern "C" bool +LLVMRustPrepareThinLTORename(const LLVMRustThinLTOData *Data, LLVMModuleRef M) { + llvm_unreachable("ThinLTO not available"); +} + +extern "C" bool +LLVMRustPrepareThinLTOResolveWeak(const LLVMRustThinLTOData *Data, LLVMModuleRef M) { + llvm_unreachable("ThinLTO not available"); +} + +extern "C" bool +LLVMRustPrepareThinLTOInternalize(const LLVMRustThinLTOData *Data, LLVMModuleRef M) { + llvm_unreachable("ThinLTO not available"); +} + +extern "C" bool +LLVMRustPrepareThinLTOImport(const LLVMRustThinLTOData *Data, LLVMModuleRef M) { + llvm_unreachable("ThinLTO not available"); +} + +extern "C" void +LLVMRustFreeThinLTOData(LLVMRustThinLTOData *Data) { + llvm_unreachable("ThinLTO not available"); +} + +struct LLVMRustThinLTOBuffer { +}; + +extern "C" LLVMRustThinLTOBuffer* +LLVMRustThinLTOBufferCreate(LLVMModuleRef M) { + llvm_unreachable("ThinLTO not available"); +} + +extern "C" void +LLVMRustThinLTOBufferFree(LLVMRustThinLTOBuffer *Buffer) { + llvm_unreachable("ThinLTO not available"); +} + +extern "C" const void* +LLVMRustThinLTOBufferPtr(const LLVMRustThinLTOBuffer *Buffer) { + llvm_unreachable("ThinLTO not available"); +} + +extern "C" size_t +LLVMRustThinLTOBufferLen(const LLVMRustThinLTOBuffer *Buffer) { + llvm_unreachable("ThinLTO not available"); +} + +extern "C" LLVMModuleRef +LLVMRustParseBitcodeForThinLTO(LLVMContextRef Context, + const char *data, + size_t len, + const char *identifier) { + llvm_unreachable("ThinLTO not available"); +} +#endif // LLVM_VERSION_GE(4, 0) diff --git a/src/test/codegen-units/item-collection/drop_in_place_intrinsic.rs b/src/test/codegen-units/item-collection/drop_in_place_intrinsic.rs index d8e6028b799fb..bd1325e7501c1 100644 --- a/src/test/codegen-units/item-collection/drop_in_place_intrinsic.rs +++ b/src/test/codegen-units/item-collection/drop_in_place_intrinsic.rs @@ -11,7 +11,7 @@ // ignore-tidy-linelength // compile-flags:-Zprint-trans-items=eager -//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0] @@ drop_in_place_intrinsic.cgu-0[Internal] +//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0] @@ drop_in_place_intrinsic0[Internal] struct StructWithDtor(u32); impl Drop for StructWithDtor { @@ -22,7 +22,7 @@ impl Drop for StructWithDtor { //~ TRANS_ITEM fn drop_in_place_intrinsic::main[0] fn main() { - //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<[drop_in_place_intrinsic::StructWithDtor[0]; 2]> @@ drop_in_place_intrinsic.cgu-0[Internal] + //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<[drop_in_place_intrinsic::StructWithDtor[0]; 2]> @@ drop_in_place_intrinsic0[Internal] let x = [StructWithDtor(0), StructWithDtor(1)]; drop_slice_in_place(&x); @@ -34,7 +34,7 @@ fn drop_slice_in_place(x: &[StructWithDtor]) { // This is the interesting thing in this test case: Normally we would // not have drop-glue for the unsized [StructWithDtor]. This has to be // generated though when the drop_in_place() intrinsic is used. - //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<[drop_in_place_intrinsic::StructWithDtor[0]]> @@ drop_in_place_intrinsic.cgu-0[Internal] + //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<[drop_in_place_intrinsic::StructWithDtor[0]]> @@ drop_in_place_intrinsic0[Internal] ::std::ptr::drop_in_place(x as *const _ as *mut [StructWithDtor]); } } diff --git a/src/test/codegen-units/item-collection/generic-drop-glue.rs b/src/test/codegen-units/item-collection/generic-drop-glue.rs index 06e02b100152e..108a8b570dedd 100644 --- a/src/test/codegen-units/item-collection/generic-drop-glue.rs +++ b/src/test/codegen-units/item-collection/generic-drop-glue.rs @@ -45,7 +45,7 @@ enum EnumNoDrop { struct NonGenericNoDrop(i32); struct NonGenericWithDrop(i32); -//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0] @@ generic_drop_glue.cgu-0[Internal] +//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0] @@ generic_drop_glue0[Internal] impl Drop for NonGenericWithDrop { //~ TRANS_ITEM fn generic_drop_glue::{{impl}}[2]::drop[0] @@ -54,11 +54,11 @@ impl Drop for NonGenericWithDrop { //~ TRANS_ITEM fn generic_drop_glue::main[0] fn main() { - //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]> @@ generic_drop_glue.cgu-0[Internal] + //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]> @@ generic_drop_glue0[Internal] //~ TRANS_ITEM fn generic_drop_glue::{{impl}}[0]::drop[0] let _ = StructWithDrop { x: 0i8, y: 'a' }.x; - //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]> @@ generic_drop_glue.cgu-0[Internal] + //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]> @@ generic_drop_glue0[Internal] //~ TRANS_ITEM fn generic_drop_glue::{{impl}}[0]::drop[0]<&str, generic_drop_glue::NonGenericNoDrop[0]> let _ = StructWithDrop { x: "&str", y: NonGenericNoDrop(0) }.y; @@ -67,17 +67,17 @@ fn main() { // This is supposed to generate drop-glue because it contains a field that // needs to be dropped. - //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]> @@ generic_drop_glue.cgu-0[Internal] + //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]> @@ generic_drop_glue0[Internal] let _ = StructNoDrop { x: NonGenericWithDrop(0), y: 0f64 }.y; - //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]> @@ generic_drop_glue.cgu-0[Internal] + //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]> @@ generic_drop_glue0[Internal] //~ TRANS_ITEM fn generic_drop_glue::{{impl}}[1]::drop[0] let _ = match EnumWithDrop::A::(0) { EnumWithDrop::A(x) => x, EnumWithDrop::B(x) => x as i32 }; - //~TRANS_ITEM fn core::ptr[0]::drop_in_place[0]> @@ generic_drop_glue.cgu-0[Internal] + //~TRANS_ITEM fn core::ptr[0]::drop_in_place[0]> @@ generic_drop_glue0[Internal] //~ TRANS_ITEM fn generic_drop_glue::{{impl}}[1]::drop[0] let _ = match EnumWithDrop::B::(1.0) { EnumWithDrop::A(x) => x, diff --git a/src/test/codegen-units/item-collection/instantiation-through-vtable.rs b/src/test/codegen-units/item-collection/instantiation-through-vtable.rs index 9c6bdb6624eae..875cacb3907e0 100644 --- a/src/test/codegen-units/item-collection/instantiation-through-vtable.rs +++ b/src/test/codegen-units/item-collection/instantiation-through-vtable.rs @@ -31,13 +31,13 @@ impl Trait for Struct { fn main() { let s1 = Struct { _a: 0u32 }; - //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]> @@ instantiation_through_vtable.cgu-0[Internal] + //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]> @@ instantiation_through_vtable0[Internal] //~ TRANS_ITEM fn instantiation_through_vtable::{{impl}}[0]::foo[0] //~ TRANS_ITEM fn instantiation_through_vtable::{{impl}}[0]::bar[0] let _ = &s1 as &Trait; let s1 = Struct { _a: 0u64 }; - //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]> @@ instantiation_through_vtable.cgu-0[Internal] + //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]> @@ instantiation_through_vtable0[Internal] //~ TRANS_ITEM fn instantiation_through_vtable::{{impl}}[0]::foo[0] //~ TRANS_ITEM fn instantiation_through_vtable::{{impl}}[0]::bar[0] let _ = &s1 as &Trait; diff --git a/src/test/codegen-units/item-collection/non-generic-drop-glue.rs b/src/test/codegen-units/item-collection/non-generic-drop-glue.rs index 5f70ff396ddd5..a6081b2c5eb23 100644 --- a/src/test/codegen-units/item-collection/non-generic-drop-glue.rs +++ b/src/test/codegen-units/item-collection/non-generic-drop-glue.rs @@ -13,7 +13,7 @@ #![deny(dead_code)] -//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0] @@ non_generic_drop_glue.cgu-0[Internal] +//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0] @@ non_generic_drop_glue0[Internal] struct StructWithDrop { x: i32 } @@ -27,7 +27,7 @@ struct StructNoDrop { x: i32 } -//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0] @@ non_generic_drop_glue.cgu-0[Internal] +//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0] @@ non_generic_drop_glue0[Internal] enum EnumWithDrop { A(i32) } diff --git a/src/test/codegen-units/item-collection/transitive-drop-glue.rs b/src/test/codegen-units/item-collection/transitive-drop-glue.rs index e41cb34eec6ab..8eef5f00f2a78 100644 --- a/src/test/codegen-units/item-collection/transitive-drop-glue.rs +++ b/src/test/codegen-units/item-collection/transitive-drop-glue.rs @@ -13,11 +13,11 @@ #![deny(dead_code)] -//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0] @@ transitive_drop_glue.cgu-0[Internal] +//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0] @@ transitive_drop_glue0[Internal] struct Root(Intermediate); -//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0] @@ transitive_drop_glue.cgu-0[Internal] +//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0] @@ transitive_drop_glue0[Internal] struct Intermediate(Leaf); -//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0] @@ transitive_drop_glue.cgu-0[Internal] +//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0] @@ transitive_drop_glue0[Internal] struct Leaf; impl Drop for Leaf { @@ -38,15 +38,15 @@ fn main() { let _ = Root(Intermediate(Leaf)); - //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]> @@ transitive_drop_glue.cgu-0[Internal] - //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]> @@ transitive_drop_glue.cgu-0[Internal] - //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]> @@ transitive_drop_glue.cgu-0[Internal] + //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]> @@ transitive_drop_glue0[Internal] + //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]> @@ transitive_drop_glue0[Internal] + //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]> @@ transitive_drop_glue0[Internal] //~ TRANS_ITEM fn transitive_drop_glue::{{impl}}[1]::drop[0] let _ = RootGen(IntermediateGen(LeafGen(0u32))); - //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]> @@ transitive_drop_glue.cgu-0[Internal] - //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]> @@ transitive_drop_glue.cgu-0[Internal] - //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]> @@ transitive_drop_glue.cgu-0[Internal] + //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]> @@ transitive_drop_glue0[Internal] + //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]> @@ transitive_drop_glue0[Internal] + //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]> @@ transitive_drop_glue0[Internal] //~ TRANS_ITEM fn transitive_drop_glue::{{impl}}[1]::drop[0] let _ = RootGen(IntermediateGen(LeafGen(0i16))); } diff --git a/src/test/codegen-units/item-collection/tuple-drop-glue.rs b/src/test/codegen-units/item-collection/tuple-drop-glue.rs index 39043cf87cbec..7edb1c14525a9 100644 --- a/src/test/codegen-units/item-collection/tuple-drop-glue.rs +++ b/src/test/codegen-units/item-collection/tuple-drop-glue.rs @@ -13,7 +13,7 @@ #![deny(dead_code)] -//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0] @@ tuple_drop_glue.cgu-0[Internal] +//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0] @@ tuple_drop_glue0[Internal] struct Dropped; impl Drop for Dropped { @@ -23,10 +23,10 @@ impl Drop for Dropped { //~ TRANS_ITEM fn tuple_drop_glue::main[0] fn main() { - //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<(u32, tuple_drop_glue::Dropped[0])> @@ tuple_drop_glue.cgu-0[Internal] + //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<(u32, tuple_drop_glue::Dropped[0])> @@ tuple_drop_glue0[Internal] let x = (0u32, Dropped); - //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<(i16, (tuple_drop_glue::Dropped[0], bool))> @@ tuple_drop_glue.cgu-0[Internal] - //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<(tuple_drop_glue::Dropped[0], bool)> @@ tuple_drop_glue.cgu-0[Internal] + //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<(i16, (tuple_drop_glue::Dropped[0], bool))> @@ tuple_drop_glue0[Internal] + //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<(tuple_drop_glue::Dropped[0], bool)> @@ tuple_drop_glue0[Internal] let x = (0i16, (Dropped, true)); } diff --git a/src/test/codegen-units/item-collection/unsizing.rs b/src/test/codegen-units/item-collection/unsizing.rs index de7613741b27b..cf0c6643238b1 100644 --- a/src/test/codegen-units/item-collection/unsizing.rs +++ b/src/test/codegen-units/item-collection/unsizing.rs @@ -57,13 +57,13 @@ fn main() { // simple case let bool_sized = &true; - //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0] @@ unsizing.cgu-0[Internal] + //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0] @@ unsizing0[Internal] //~ TRANS_ITEM fn unsizing::{{impl}}[0]::foo[0] let _bool_unsized = bool_sized as &Trait; let char_sized = &'a'; - //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0] @@ unsizing.cgu-0[Internal] + //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0] @@ unsizing0[Internal] //~ TRANS_ITEM fn unsizing::{{impl}}[1]::foo[0] let _char_unsized = char_sized as &Trait; @@ -73,13 +73,13 @@ fn main() _b: 2, _c: 3.0f64 }; - //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0] @@ unsizing.cgu-0[Internal] + //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0] @@ unsizing0[Internal] //~ TRANS_ITEM fn unsizing::{{impl}}[2]::foo[0] let _struct_unsized = struct_sized as &Struct; // custom coercion let wrapper_sized = Wrapper(&0u32); - //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0] @@ unsizing.cgu-0[Internal] + //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0] @@ unsizing0[Internal] //~ TRANS_ITEM fn unsizing::{{impl}}[3]::foo[0] let _wrapper_sized = wrapper_sized as Wrapper; } diff --git a/src/test/run-make/extra-filename-with-temp-outputs/Makefile b/src/test/run-make/extra-filename-with-temp-outputs/Makefile index d33c18a6f3c2c..13ca397eaf23e 100644 --- a/src/test/run-make/extra-filename-with-temp-outputs/Makefile +++ b/src/test/run-make/extra-filename-with-temp-outputs/Makefile @@ -2,5 +2,5 @@ all: $(RUSTC) -C extra-filename=bar foo.rs -C save-temps - rm $(TMPDIR)/foobar.0.o + rm $(TMPDIR)/foobar.foo0.rust-cgu.o rm $(TMPDIR)/$(call BIN,foobar) diff --git a/src/test/run-make/sepcomp-cci-copies/Makefile b/src/test/run-make/sepcomp-cci-copies/Makefile index 189088219d5b3..8324a074d6c72 100644 --- a/src/test/run-make/sepcomp-cci-copies/Makefile +++ b/src/test/run-make/sepcomp-cci-copies/Makefile @@ -6,4 +6,4 @@ all: $(RUSTC) cci_lib.rs $(RUSTC) foo.rs --emit=llvm-ir -C codegen-units=3 - [ "$$(cat "$(TMPDIR)"/foo.?.ll | grep -c define\ .*cci_fn)" -eq "2" ] + [ "$$(cat "$(TMPDIR)"/foo.*.ll | grep -c define\ .*cci_fn)" -eq "2" ] diff --git a/src/test/run-make/sepcomp-inlining/Makefile b/src/test/run-make/sepcomp-inlining/Makefile index 720dfff2c0438..6dc837b8a7874 100644 --- a/src/test/run-make/sepcomp-inlining/Makefile +++ b/src/test/run-make/sepcomp-inlining/Makefile @@ -8,7 +8,7 @@ all: $(RUSTC) foo.rs --emit=llvm-ir -C codegen-units=3 - [ "$$(cat "$(TMPDIR)"/foo.?.ll | grep -c define\ i32\ .*inlined)" -eq "0" ] - [ "$$(cat "$(TMPDIR)"/foo.?.ll | grep -c define\ internal\ i32\ .*inlined)" -eq "2" ] - [ "$$(cat "$(TMPDIR)"/foo.?.ll | grep -c define\ hidden\ i32\ .*normal)" -eq "1" ] - [ "$$(cat "$(TMPDIR)"/foo.?.ll | grep -c declare\ hidden\ i32\ .*normal)" -eq "2" ] + [ "$$(cat "$(TMPDIR)"/foo.*.ll | grep -c define\ i32\ .*inlined)" -eq "0" ] + [ "$$(cat "$(TMPDIR)"/foo.*.ll | grep -c define\ internal\ i32\ .*inlined)" -eq "2" ] + [ "$$(cat "$(TMPDIR)"/foo.*.ll | grep -c define\ hidden\ i32\ .*normal)" -eq "1" ] + [ "$$(cat "$(TMPDIR)"/foo.*.ll | grep -c declare\ hidden\ i32\ .*normal)" -eq "2" ] diff --git a/src/test/run-make/sepcomp-separate/Makefile b/src/test/run-make/sepcomp-separate/Makefile index a475bdfd74a24..5b8bdb0fad8cd 100644 --- a/src/test/run-make/sepcomp-separate/Makefile +++ b/src/test/run-make/sepcomp-separate/Makefile @@ -6,4 +6,4 @@ all: $(RUSTC) foo.rs --emit=llvm-ir -C codegen-units=3 - [ "$$(cat "$(TMPDIR)"/foo.?.ll | grep -c define\ .*magic_fn)" -eq "3" ] + [ "$$(cat "$(TMPDIR)"/foo.*.ll | grep -c define\ .*magic_fn)" -eq "3" ] diff --git a/src/test/run-pass/auxiliary/thin-lto-inlines-aux.rs b/src/test/run-pass/auxiliary/thin-lto-inlines-aux.rs new file mode 100644 index 0000000000000..ccbb0e7a71863 --- /dev/null +++ b/src/test/run-pass/auxiliary/thin-lto-inlines-aux.rs @@ -0,0 +1,17 @@ +// Copyright 2017 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// no-prefer-dynamic + +#![crate_type = "rlib"] + +pub fn bar() -> u32 { + 3 +} diff --git a/src/test/run-pass/thin-lto-inlines.rs b/src/test/run-pass/thin-lto-inlines.rs new file mode 100644 index 0000000000000..3135a682d869d --- /dev/null +++ b/src/test/run-pass/thin-lto-inlines.rs @@ -0,0 +1,39 @@ +// Copyright 2017 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// compile-flags: -Z thinlto -C codegen-units=8 -O +// min-llvm-version 4.0 +// ignore-emscripten + +// We want to assert here that ThinLTO will inline across codegen units. There's +// not really a great way to do that in general so we sort of hack around it by +// praying two functions go into separate codegen units and then assuming that +// if inlining *doesn't* happen the first byte of the functions will differ. + +pub fn foo() -> u32 { + bar::bar() +} + +mod bar { + pub fn bar() -> u32 { + 3 + } +} + +fn main() { + println!("{} {}", foo(), bar::bar()); + + unsafe { + let foo = foo as usize as *const u8; + let bar = bar::bar as usize as *const u8; + + assert_eq!(*foo, *bar); + } +} diff --git a/src/test/run-pass/thin-lto-inlines2.rs b/src/test/run-pass/thin-lto-inlines2.rs new file mode 100644 index 0000000000000..ed899d2b115b6 --- /dev/null +++ b/src/test/run-pass/thin-lto-inlines2.rs @@ -0,0 +1,38 @@ +// Copyright 2017 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// compile-flags: -Z thinlto -C codegen-units=8 -O -C lto +// aux-build:thin-lto-inlines-aux.rs +// min-llvm-version 4.0 +// no-prefer-dynamic +// ignore-emscripten + +// We want to assert here that ThinLTO will inline across codegen units. There's +// not really a great way to do that in general so we sort of hack around it by +// praying two functions go into separate codegen units and then assuming that +// if inlining *doesn't* happen the first byte of the functions will differ. + +extern crate thin_lto_inlines_aux as bar; + +pub fn foo() -> u32 { + bar::bar() +} + +fn main() { + println!("{} {}", foo(), bar::bar()); + + unsafe { + let foo = foo as usize as *const u8; + let bar = bar::bar as usize as *const u8; + + assert_eq!(*foo, *bar); + } +} +