Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Assertion failed: (MI && "No instruction defining live value"), function computeDeadValues #26

Closed
shepmaster opened this issue Feb 15, 2017 · 22 comments
Labels
A-libcore Affects compiling the core library A-llvm Affects the LLVM AVR backend has-llvm-commit This issue should be fixed in upstream LLVM has-reduced-testcase A small LLVM IR file exists that demonstrates the problem

Comments

@shepmaster
Copy link
Member

shepmaster commented Feb 15, 2017

A reproduction:

; ModuleID = 'bugpoint-reduced-simplified.bc'
source_filename = "bugpoint-output-016b51a.bc"
target datalayout = "e-p:16:16:16-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-n8"
target triple = "avr-atmel-none"

%"num::bignum::Big32x40.ALPHA.0.9.10.15.16.17.18.22.27.31.32.36.38.49.64.71.85.88.93.98.100.0.10.12.14.15.16.19.25.26.28.33.34.35.48.63.74.79.88.93.107.119" = type { i16, [2 x i8], [40 x i32], [0 x i8] }

@core_num_flt2dec_strategy_dragon_POW10TO128 = external constant [14 x i32], align 4

define nonnull dereferenceable(164) %"num::bignum::Big32x40.ALPHA.0.9.10.15.16.17.18.22.27.31.32.36.38.49.64.71.85.88.93.98.100.0.10.12.14.15.16.19.25.26.28.33.34.35.48.63.74.79.88.93.107.119"* @core_num_flt2dec_strategy_dragon(%"num::bignum::Big32x40.ALPHA.0.9.10.15.16.17.18.22.27.31.32.36.38.49.64.71.85.88.93.98.100.0.10.12.14.15.16.19.25.26.28.33.34.35.48.63.74.79.88.93.107.119"* returned dereferenceable(164), i16) unnamed_addr {
start:
  %ret.i = alloca [40 x i32], align 4
  %2 = icmp eq i16 undef, 0
  br i1 %2, label %bb5, label %bb1

bb1:                                              ; preds = %start
  unreachable

bb5:                                              ; preds = %start
  br i1 undef, label %bb14, label %bb11

bb11:                                             ; preds = %bb5
  unreachable

bb14:                                             ; preds = %bb5
  %3 = bitcast [40 x i32]* %ret.i to i8*
  call void @llvm.memset.p0i8.i16(i8* nonnull %3, i8 0, i16 160, i32 4, i1 false)
  %4 = getelementptr inbounds %"num::bignum::Big32x40.ALPHA.0.9.10.15.16.17.18.22.27.31.32.36.38.49.64.71.85.88.93.98.100.0.10.12.14.15.16.19.25.26.28.33.34.35.48.63.74.79.88.93.107.119", %"num::bignum::Big32x40.ALPHA.0.9.10.15.16.17.18.22.27.31.32.36.38.49.64.71.85.88.93.98.100.0.10.12.14.15.16.19.25.26.28.33.34.35.48.63.74.79.88.93.107.119"* %0, i16 0, i32 0
  %5 = load i16, i16* %4, align 2
  %6 = icmp ult i16 %5, 14
  %7 = getelementptr inbounds %"num::bignum::Big32x40.ALPHA.0.9.10.15.16.17.18.22.27.31.32.36.38.49.64.71.85.88.93.98.100.0.10.12.14.15.16.19.25.26.28.33.34.35.48.63.74.79.88.93.107.119", %"num::bignum::Big32x40.ALPHA.0.9.10.15.16.17.18.22.27.31.32.36.38.49.64.71.85.88.93.98.100.0.10.12.14.15.16.19.25.26.28.33.34.35.48.63.74.79.88.93.107.119"* %0, i16 0, i32 2, i16 0
  br i1 %6, label %bb2.i38, label %bb3.i39

bb2.i38:                                          ; preds = %bb14
  %8 = getelementptr inbounds %"num::bignum::Big32x40.ALPHA.0.9.10.15.16.17.18.22.27.31.32.36.38.49.64.71.85.88.93.98.100.0.10.12.14.15.16.19.25.26.28.33.34.35.48.63.74.79.88.93.107.119", %"num::bignum::Big32x40.ALPHA.0.9.10.15.16.17.18.22.27.31.32.36.38.49.64.71.85.88.93.98.100.0.10.12.14.15.16.19.25.26.28.33.34.35.48.63.74.79.88.93.107.119"* %0, i16 0, i32 2, i16 %5
  %9 = ptrtoint i32* %7 to i16
  br label %bb4.outer.i122

bb4.outer.i122:                                   ; preds = %bb24.i141, %bb2.i38
  %iter.sroa.0.0.ph.i119 = phi i16 [ %13, %bb24.i141 ], [ %9, %bb2.i38 ]
  %retsz.0.ph.i121 = phi i16 [ %.retsz.0.i140, %bb24.i141 ], [ 0, %bb2.i38 ]
  br label %bb4.i125

bb4.i125:                                         ; preds = %core.iter.Enumerate.ALPHA, %bb4.outer.i122
  %iter.sroa.0.0.i123 = phi i16 [ %13, %core.iter.Enumerate.ALPHA ], [ %iter.sroa.0.0.ph.i119, %bb4.outer.i122 ]
  %10 = inttoptr i16 %iter.sroa.0.0.i123 to i32*
  %11 = icmp eq i32* %10, %8
  br i1 %11, label %core.num.bignum.Big32x40.exit44, label %core.iter.Enumerate.ALPHA

core.iter.Enumerate.ALPHA:                        ; preds = %bb4.i125
  %12 = getelementptr inbounds i32, i32* %10, i16 1
  %13 = ptrtoint i32* %12 to i16
  %14 = load i32, i32* %10, align 4
  %15 = icmp eq i32 %14, 0
  br i1 %15, label %bb4.i125, label %core..iter..Enumerate.exit17

core..iter..Enumerate.exit17:                     ; preds = %core.iter.Enumerate.ALPHA
  %16 = zext i32 %14 to i64
  br label %core..iter..Enumerate.exit17.i132

core..iter..Enumerate.exit17.i132:                ; preds = %core_slice_IndexMut.exit13, %core..iter..Enumerate.exit17
  %carry.085.i129 = phi i32 [ 0, %core..iter..Enumerate.exit17 ], [ %29, %core_slice_IndexMut.exit13 ]
  %17 = icmp ult i16 undef, 40
  br i1 %17, label %core_slice_IndexMut.exit13, label %panic.i.i14.i134

bb16.i133:                                        ; preds = %core_slice_IndexMut.exit13
  %18 = icmp eq i32 %29, 0
  br i1 %18, label %bb24.i141, label %bb21.i136

panic.i.i14.i134:                                 ; preds = %core..iter..Enumerate.exit17.i132
  unreachable

core_slice_IndexMut.exit13:                       ; preds = %core..iter..Enumerate.exit17.i132
  %19 = load i32, i32* null, align 4
  %20 = getelementptr inbounds [40 x i32], [40 x i32]* %ret.i, i16 0, i16 undef
  %21 = load i32, i32* %20, align 4
  %22 = zext i32 %19 to i64
  %23 = mul nuw i64 %22, %16
  %24 = zext i32 %21 to i64
  %25 = zext i32 %carry.085.i129 to i64
  %26 = add nuw nsw i64 %24, %25
  %27 = add i64 %26, %23
  %28 = lshr i64 %27, 32
  %29 = trunc i64 %28 to i32
  %30 = icmp eq i32* undef, getelementptr inbounds ([14 x i32], [14 x i32]* @core_num_flt2dec_strategy_dragon_POW10TO128, i16 1, i16 0)
  br i1 %30, label %bb16.i133, label %core..iter..Enumerate.exit17.i132

bb21.i136:                                        ; preds = %bb16.i133
  %31 = icmp ult i16 undef, 40
  br i1 %31, label %"_ZN4core5slice70_$LT$impl$u20$core..ops..IndexMut$LT$I$GT$$u20$for$u20$$u5b$T$u5d$$GT$9index_mut17h8eccc0af1ec6f971E.exit.i138", label %panic.i.i.i137

panic.i.i.i137:                                   ; preds = %bb21.i136
  unreachable

"_ZN4core5slice70_$LT$impl$u20$core..ops..IndexMut$LT$I$GT$$u20$for$u20$$u5b$T$u5d$$GT$9index_mut17h8eccc0af1ec6f971E.exit.i138": ; preds = %bb21.i136
  store i32 %29, i32* undef, align 4
  br label %bb24.i141

bb24.i141:                                        ; preds = %"_ZN4core5slice70_$LT$impl$u20$core..ops..IndexMut$LT$I$GT$$u20$for$u20$$u5b$T$u5d$$GT$9index_mut17h8eccc0af1ec6f971E.exit.i138", %bb16.i133
  %sz.0.i139 = phi i16 [ 15, %"_ZN4core5slice70_$LT$impl$u20$core..ops..IndexMut$LT$I$GT$$u20$for$u20$$u5b$T$u5d$$GT$9index_mut17h8eccc0af1ec6f971E.exit.i138" ], [ 14, %bb16.i133 ]
  %32 = add i16 %sz.0.i139, 0
  %33 = icmp ult i16 %retsz.0.ph.i121, %32
  %.retsz.0.i140 = select i1 %33, i16 %32, i16 %retsz.0.ph.i121
  br label %bb4.outer.i122

bb3.i39:                                          ; preds = %bb14
  %34 = call fastcc i16 @_ZN4core3num6bignum8Big32x4010mul_digits9mul_inner17h5d3461bce04d16ccE([40 x i32]* nonnull dereferenceable(160) %ret.i, i32* noalias nonnull readonly getelementptr inbounds ([14 x i32], [14 x i32]* @core_num_flt2dec_strategy_dragon_POW10TO128, i16 0, i16 0), i16 14, i32* noalias nonnull readonly %7, i16 %5)
  br label %core.num.bignum.Big32x40.exit44

core.num.bignum.Big32x40.exit44:                  ; preds = %bb3.i39, %bb4.i125
  %retsz.0.i40 = phi i16 [ %34, %bb3.i39 ], [ %retsz.0.ph.i121, %bb4.i125 ]
  call void @llvm.memcpy.p0i8.p0i8.i16(i8* undef, i8* nonnull %3, i16 160, i32 4, i1 false)
  store i16 %retsz.0.i40, i16* %4, align 2
  %35 = and i16 %1, 256
  %36 = icmp eq i16 %35, 0
  br i1 %36, label %bb30, label %bb27

bb27:                                             ; preds = %core.num.bignum.Big32x40.exit44
  unreachable

bb30:                                             ; preds = %core.num.bignum.Big32x40.exit44
  ret %"num::bignum::Big32x40.ALPHA.0.9.10.15.16.17.18.22.27.31.32.36.38.49.64.71.85.88.93.98.100.0.10.12.14.15.16.19.25.26.28.33.34.35.48.63.74.79.88.93.107.119"* %0
}

declare fastcc i16 @_ZN4core3num6bignum8Big32x4010mul_digits9mul_inner17h5d3461bce04d16ccE([40 x i32]* nocapture dereferenceable(160), i32* noalias nonnull readonly, i16, i32* noalias nonnull readonly, i16) unnamed_addr

; Function Attrs: argmemonly nounwind
declare void @llvm.memset.p0i8.i16(i8* nocapture writeonly, i8, i16, i32, i1) #0

; Function Attrs: argmemonly nounwind
declare void @llvm.memcpy.p0i8.p0i8.i16(i8* nocapture writeonly, i8* nocapture readonly, i16, i32, i1) #0

attributes #0 = { argmemonly nounwind }
Original
; ModuleID = 'bugpoint-reduced-simplified.bc'
source_filename = "bugpoint-output-7b72b44.bc"
target datalayout = "e-p:16:16:16-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-n8"
target triple = "avr"

%BN1.0.5.14.17.18.19.21.26.29.31.34.40.42.57.71.80.84.89.91.95.105 = type { i16, [2 x i8], [40 x i32], [0 x i8] }

@DragonCodeAlpha = external constant [14 x i32], align 4

; Function Attrs: uwtable
define nonnull dereferenceable(164) %BN1.0.5.14.17.18.19.21.26.29.31.34.40.42.57.71.80.84.89.91.95.105* @DragonAlpha(%BN1.0.5.14.17.18.19.21.26.29.31.34.40.42.57.71.80.84.89.91.95.105* returned dereferenceable(164), i16) unnamed_addr #0 personality i32 (...)* @rust_eh_personality {
start:
  %ret.i = alloca [40 x i32], align 4
  br i1 undef, label %bb5, label %bb1

bb1:                                              ; preds = %start
  unreachable

bb5:                                              ; preds = %start
  %2 = icmp eq i16 undef, 0
  br i1 %2, label %bb10, label %bb8

bb8:                                              ; preds = %bb5
  unreachable

bb10:                                             ; preds = %bb5
  %3 = bitcast [40 x i32]* %ret.i to i8*
  %4 = load i16, i16* undef, align 2
  %5 = getelementptr inbounds %BN1.0.5.14.17.18.19.21.26.29.31.34.40.42.57.71.80.84.89.91.95.105, %BN1.0.5.14.17.18.19.21.26.29.31.34.40.42.57.71.80.84.89.91.95.105* %0, i16 0, i32 2, i16 0
  br i1 undef, label %bb2.i38, label %bb3.i39

bb2.i38:                                          ; preds = %bb10
  %6 = getelementptr inbounds %BN1.0.5.14.17.18.19.21.26.29.31.34.40.42.57.71.80.84.89.91.95.105, %BN1.0.5.14.17.18.19.21.26.29.31.34.40.42.57.71.80.84.89.91.95.105* %0, i16 0, i32 2, i16 %4
  br label %bb4.outer.i122

bb4.outer.i122:                                   ; preds = %bb24.i141, %bb2.i38
  %iter.sroa.7.0.ph.i120 = phi i16 [ %8, %bb24.i141 ], [ 0, %bb2.i38 ]
  %retsz.0.ph.i121 = phi i16 [ %.retsz.0.i140, %bb24.i141 ], [ 0, %bb2.i38 ]
  br label %bb4.i125

bb4.i125:                                         ; preds = %EnumerateAlpha, %bb4.outer.i122
  %iter.sroa.7.0.i124 = phi i16 [ %8, %EnumerateAlpha ], [ %iter.sroa.7.0.ph.i120, %bb4.outer.i122 ]
  %7 = icmp eq i32* undef, %6
  br i1 %7, label %_ZN4core3num6bignum8Big32x4010mul_digits17hb016ab455b44b71bE.exit44, label %EnumerateAlpha

EnumerateAlpha:                                   ; preds = %bb4.i125
  %8 = add i16 %iter.sroa.7.0.i124, 1
  %9 = icmp eq i32 undef, 0
  br i1 %9, label %bb4.i125, label %EnumerateBeta

EnumerateBeta:                                    ; preds = %EnumerateAlpha
  %10 = zext i32 undef to i64
  br label %EnumerateGamma

EnumerateGamma:                                   ; preds = %EnumerateBeta
  %11 = icmp ult i16 undef, 40
  br i1 %11, label %SliceMutAlpha, label %panic.i.i14.i134, !prof !0

bb16.i133:                                        ; preds = %SliceMutAlpha
  br i1 undef, label %bb24.i141, label %bb21.i136

panic.i.i14.i134:                                 ; preds = %EnumerateGamma
  unreachable

SliceMutAlpha:                                    ; preds = %EnumerateGamma
  %12 = load i32, i32* undef, align 4, !alias.scope !1, !noalias !4
  %13 = zext i32 %12 to i64
  %14 = mul nuw i64 %13, %10
  %15 = add i64 0, %14
  %16 = lshr i64 %15, 32
  %17 = trunc i64 %15 to i32
  store i32 %17, i32* undef, align 4, !noalias !6
  br label %bb16.i133

bb21.i136:                                        ; preds = %bb16.i133
  br i1 undef, label %SliceMutBeta, label %panic.i.i.i137, !prof !0

panic.i.i.i137:                                   ; preds = %bb21.i136
  unreachable

SliceMutBeta:                                     ; preds = %bb21.i136
  br label %bb24.i141

bb24.i141:                                        ; preds = %SliceMutBeta, %bb16.i133
  %18 = add i16 0, %iter.sroa.7.0.i124
  %19 = icmp ult i16 %retsz.0.ph.i121, %18
  %.retsz.0.i140 = select i1 %19, i16 %18, i16 %retsz.0.ph.i121
  br label %bb4.outer.i122

bb3.i39:                                          ; preds = %bb10
  %20 = call fastcc i16 @BignumInnerAlpha([40 x i32]* nonnull dereferenceable(160) %ret.i, i32* noalias nonnull readonly getelementptr inbounds ([14 x i32], [14 x i32]* @DragonCodeAlpha, i16 0, i16 0), i16 14, i32* noalias nonnull readonly %5, i16 %4)
  br label %_ZN4core3num6bignum8Big32x4010mul_digits17hb016ab455b44b71bE.exit44

_ZN4core3num6bignum8Big32x4010mul_digits17hb016ab455b44b71bE.exit44: ; preds = %bb3.i39, %bb4.i125
  %retsz.0.i40 = phi i16 [ %20, %bb3.i39 ], [ %retsz.0.ph.i121, %bb4.i125 ]
  %21 = getelementptr inbounds %BN1.0.5.14.17.18.19.21.26.29.31.34.40.42.57.71.80.84.89.91.95.105, %BN1.0.5.14.17.18.19.21.26.29.31.34.40.42.57.71.80.84.89.91.95.105* %0, i16 0, i32 2
  %22 = bitcast [40 x i32]* %21 to i8*
  call void @llvm.memcpy.p0i8.p0i8.i16(i8* %22, i8* nonnull %3, i16 160, i32 4, i1 false), !noalias !7
  store i16 %retsz.0.i40, i16* undef, align 2, !noalias !7
  %23 = and i16 %1, 256
  %24 = icmp eq i16 %23, 0
  br i1 %24, label %bb30, label %bb27

bb27:                                             ; preds = %_ZN4core3num6bignum8Big32x4010mul_digits17hb016ab455b44b71bE.exit44
  unreachable

bb30:                                             ; preds = %_ZN4core3num6bignum8Big32x4010mul_digits17hb016ab455b44b71bE.exit44
  ret %BN1.0.5.14.17.18.19.21.26.29.31.34.40.42.57.71.80.84.89.91.95.105* %0
}

; Function Attrs: uwtable
declare fastcc i16 @BignumInnerAlpha([40 x i32]* nocapture dereferenceable(160), i32* noalias nonnull readonly, i16, i32* noalias nonnull readonly, i16) unnamed_addr #0

; Function Attrs: argmemonly nounwind
declare void @llvm.memcpy.p0i8.p0i8.i16(i8* nocapture writeonly, i8* nocapture readonly, i16, i32, i1) #1

declare i32 @rust_eh_personality(...) unnamed_addr

attributes #0 = { uwtable }
attributes #1 = { argmemonly nounwind }

!0 = !{!"branch_weights", i32 2000, i32 1}
!1 = !{!2}
!2 = distinct !{!2, !3, !"BignumInnerAlpha: argument 1"}
!3 = distinct !{!3, !"BignumInnerAlpha"}
!4 = !{!5}
!5 = distinct !{!5, !3, !"BignumInnerAlpha: argument 0"}
!6 = !{!5, !2}
!7 = !{!8}
!8 = distinct !{!8, !9, !"_ZN4core3num6bignum8Big32x4010mul_digits17hb016ab455b44b71bE: argument 0"}
!9 = distinct !{!9, !"_ZN4core3num6bignum8Big32x4010mul_digits17hb016ab455b44b71bE"}
@dylanmckay
Copy link
Member

This sounds like it might be a bug mentioned to me on llvm-commits a while ago. I'll fetch more details later.

@dylanmckay
Copy link
Member

Here's the bug I was thinking of

http://llvm.org/viewvc/llvm-project?view=revision&revision=293934

[LiveRangeEdit] Don't mess up with LiveInterval when a new vreg is created.

In r283838, we added the capability of splitting unspillable register.
When doing so we had to make sure the split live-ranges were also
unspillable and we did that by marking the related live-ranges in the
delegate method that is called when a new vreg is created.
However, by accessing the live-range there, we also triggered their lazy
computation (LiveIntervalAnalysis::getInterval) which is not what we
want in general. Indeed, later code in LiveRangeEdit is going to build
the live-ranges this lazy computation may mess up that computation
resulting in assertion failures. Namely, the createEmptyIntervalFrom
method expect that the live-range is going to be empty, not computed.

Thanks to Mikael Holmén [email protected] for noticing and
reporting the problem.

It doesn't sound as relevant now that I remember it.

@shepmaster
Copy link
Member Author

I didn't re-bugpoint, but a segfault is still present even with the proposed LLVM 4.0 merge into Rust:

* thread #2, name = 'rustc', stop reason = EXC_BAD_ACCESS (code=1, address=0x28)
  * frame #0: 0x0000000102d97326 librustc_llvm-8af32171c42232fc.dylib`llvm::MachineInstr::addRegisterDead(unsigned int, llvm::TargetRegisterInfo const*, bool) + 230
    frame #1: 0x0000000102d3781b librustc_llvm-8af32171c42232fc.dylib`llvm::LiveIntervals::computeDeadValues(llvm::LiveInterval&, llvm::SmallVectorImpl<llvm::MachineInstr*>*) + 491
    frame #2: 0x0000000102d37e1c librustc_llvm-8af32171c42232fc.dylib`llvm::LiveIntervals::shrinkToUses(llvm::LiveInterval*, llvm::SmallVectorImpl<llvm::MachineInstr*>*) + 972
    frame #3: 0x0000000102d4d882 librustc_llvm-8af32171c42232fc.dylib`llvm::LiveRangeEdit::eliminateDeadDefs(llvm::SmallVectorImpl<llvm::MachineInstr*>&, llvm::ArrayRef<unsigned int>, llvm::AAResults*) + 514
    frame #4: 0x0000000102d19206 librustc_llvm-8af32171c42232fc.dylib`(anonymous namespace)::InlineSpiller::postOptimization() + 14502
    frame #5: 0x0000000102e28210 librustc_llvm-8af32171c42232fc.dylib`llvm::RegAllocBase::postOptimization() + 32
    frame #6: 0x0000000102e30114 librustc_llvm-8af32171c42232fc.dylib`(anonymous namespace)::RAGreedy::runOnMachineFunction(llvm::MachineFunction&) + 3412
    frame #7: 0x0000000102d8c716 librustc_llvm-8af32171c42232fc.dylib`llvm::MachineFunctionPass::runOnFunction(llvm::Function&) + 134
    frame #8: 0x0000000103493352 librustc_llvm-8af32171c42232fc.dylib`llvm::FPPassManager::runOnFunction(llvm::Function&) + 530
    frame #9: 0x0000000103493553 librustc_llvm-8af32171c42232fc.dylib`llvm::FPPassManager::runOnModule(llvm::Module&) + 51
    frame #10: 0x00000001034939ea librustc_llvm-8af32171c42232fc.dylib`llvm::legacy::PassManagerImpl::run(llvm::Module&) + 922
    frame #11: 0x00000001020bcbed librustc_llvm-8af32171c42232fc.dylib`::LLVMRustWriteOutputFile(Target=0x000000011b146800, PMR=0x0000000104d03d50, M=0x0000000107c49de0, Path=<unavailable>, RustFileType=<unavailable>) at PassWrapper.cpp:477 [opt]

@shepmaster shepmaster added A-libcore Affects compiling the core library A-llvm Affects the LLVM AVR backend labels Apr 25, 2017
@shepmaster shepmaster changed the title Segfault in Greedy Register Allocator Assertion failed: (MI && "No instruction defining live value"), function computeDeadValues Apr 28, 2017
@shepmaster
Copy link
Member Author

I tried cherry-picking the commit you mentioned (llvm-mirror/llvm@5340452) but it seems like I get the same assertion.

@shepmaster
Copy link
Member Author

shepmaster commented Apr 28, 2017

Looking in the debugger, it seems like some data got out of agreement somewhere. I think it's saying that the store came from offset 1676, but that's blank:

(lldb) p Def.dump()
1676r
(lldb) p Indexes->dump()
1640 %vreg89<def> = COPY %vreg88; DREGS:%vreg89,%vreg88
1648 CPWRdRr %vreg89, %vreg67, %SREG<imp-def>; DREGS:%vreg89,%vreg67
1664
1672 %vreg71<earlyclobber,def> = LDDWRdYQ <fi#4>, 0; mem:LD2[FixedStack4](align=1) DREGS:%vreg71
1676
1684 %vreg84<earlyclobber,def> = LDDWRdYQ <fi#2>, 0; mem:LD2[FixedStack2](align=1) IWREGS:%vreg84
1692 BRLOk <BB#1>, %SREG<imp-use,kill>
1696 RJMPk <BB#2>
1712

@shepmaster shepmaster added the has-reduced-testcase A small LLVM IR file exists that demonstrates the problem label Apr 28, 2017
@brainlag
Copy link

I tried to debug this quite a bit. With the latest llvm this changed a bit. Now Def.dump() is 1832r which does not point to an empty machine instruction but between two machine instructions which is not valid(?). The offset between two machine instructions is always 16 but 1832 % 16 is 8 and not 0. I tried to figure out where this invalid live interval is created but these breakpoints are never hit. For me this looks like some kind of corruption but I don't have enough experience with C++ to make an educated guess.

@brainlag
Copy link

brainlag commented Mar 5, 2018

So after a lot of debugging I still don't know whats going on but here are my findings.
There is no corruption just some iterator magic which leads to a machine instruction at 1832.

Here is the instruction map before 1832 gets removed:

1800B	  %138:dregs = COPY %136:dregs
1808B	  %139:dldregs = LDIWRdK 14
1824B	  %102:dregs = COPY %138:dregs
1832B	  early-clobber %109:dldregs = LDDWRdYQ %stack.2, 0; mem:LD2[FixedStack2](align=1)
1840B	  early-clobber %120:iwregs = LDDWRdYQ %stack.1, 0; mem:LD2[FixedStack1](align=1)
1848B	  RJMPk %bb.22

The function which removes it is InlineSpiller::coalesceStackAccess. So I tried to prevent that function to remove the instruction but the result is the same. The InlineSpiller remvoes the instruction somewhere in the postOptimization step.

For some reason we end up with this total broken live range which then blows up:

%149 [1832r,1832d:0)[2744r,2744d:1)[2760B,2760d:2)  0@1832r 1@2744r 2@2760B-phi weight:1.106830e-03`

All 3 indexes point to same machine instruction:

early-clobber %XXX:dldregs = LDDWRdYQ %stack.2, 0; mem:LD2[FixedStack2](align=1)

I also found this somewhat related bug:

http://lists.llvm.org/pipermail/llvm-dev/2012-July/051741.html
https://bugs.llvm.org/show_bug.cgi?id=13375

Maybe someone has an idea what is going on?

@dylanmckay
Copy link
Member

Interestingly, the OP of the mailing list post you linked is one of the three original people that worked on AVR-LLVM, so it's fairly likely it's related, if not the same!

@dylanmckay
Copy link
Member

Cherry picking this from the mailing list thread

OK, I see what is happening. We don't expect foldMemoryOperand to turn a normal def into an early-clobber and vice versa.

This is not easy to fix, could you file a PR, please?

As a workaround, you can use a pseudo-instruction in loadRegFromStackSlot() that doesn't have the early-clobber flag. Then replace it with the real instruction in expandPostRAPseudo().

/jakob

For what it's worth, the original llvm-avr sources had a comment on LDDWRdYQ to the effect of it being a workaround for something. It was never clear what it was a workaround for though.

@dylanmckay
Copy link
Member

We could fix this properly, or we could work around it by adding a guard condition to skip the call to foldMemoryOperand if the slot is not an earlyclobber slot and the folded instruction has earlyclobber operands.

I suspect that it will be unnecessary to check for earlyclobber slots because if the existing slots always come from COPY pseudo instructions, then earlyclobber will never be present on this generic instruction.

Would be nice to fix it properly though

@dylanmckay
Copy link
Member

Good news!

The mailing list thread and bug report shown in @brainlag's investigation reminded me of a very old FIXME comment left in the AVR backend since 2010. At one point, I removed a small number of historic TODOs/FIXMEs, because most of them were very vague and short, no longer making sense to anybody.

At some point, the earlyclobber flag was added to LDDWRdYQ, which was originally introduced to work around this bug. albeit with a completely different way of showing itself.

I've removed the earlyclobber flag which fixes this issue. At some point it would be nice to see the register allocator esoteric handle cases like this though.

@dylanmckay
Copy link
Member

Here's the important bit

llvm-mirror/llvm@d45c0f1

@dylanmckay dylanmckay added the has-llvm-commit This issue should be fixed in upstream LLVM label Mar 6, 2018
@dylanmckay
Copy link
Member

Thanks for all of the hours spent @brainlag, it took a few hours to put all of the pieces together, but the debugging you did filled the last piece of the puzzle!

@dylanmckay
Copy link
Member

We should cherry-pick this, but it's probably not worth it until #90 lands

@brainlag
Copy link

brainlag commented Mar 6, 2018

I think i saw a similar workaround in MIPS and/or ARM but those didn't mention the bug.

@brainlag
Copy link

brainlag commented Mar 18, 2018

I still don't know what this earlyclobber flag is supposed to do but don't you think we also should remove it from LDDWRdPtrQ too (AVRInstrInfo.td#L1224)? This fixes #37 for me and also #95. And then there is still LDDRdPtrQ, which also has a earlyclobber flag but didn't trigger a error yet.

@shepmaster
Copy link
Member Author

This fixes #37 for me and also #95

That's super exciting!

@dylanmckay
Copy link
Member

The @earlyclobber flag is there to tell LLVM that the output register and input registers cannot be the same because the instruction itself "clobbers the register's value early", sometime before the actual instruction completes.

In AVR, this is used on pseudo instructions because some of the expansion patterns introduce additional instructions which overwrite some registers.

For example, we often turn 16-bit load pseudo instructions into something like this

LDD r12, X+ ; load from pointer into r12, postincrement the pointer value
LDD r13, X

Because the expanded instruction sequence always write to the source pointer register, that means that none of the other registers in the instruction can overlap with the X register, namely r27:r26.

I believe that removing the earlyclobber hint will remove this constraint from the register allocator. This would reduce register pressure, but it will also corrupt the values we store into registers. This could explain why it fixes #37 and #95.

Here is the mailing list posting where it was originally introduced.

This is my understanding of earlyclobber at least.

@brainlag
Copy link

So do you think LDDWRdPtrQ is different to LDDWRdYQ or are we getting away without adding earlyclobber to it? This workaround for bug 13375 is for both instructions.

@brainlag
Copy link

brainlag commented Mar 19, 2018

Ok nevermind, LDDWRdYQ is already a special case of LDDWRdPtrQ.

@dylanmckay
Copy link
Member

As you've implied but I'll be concrete

Leaving earlyclobber off LDDWRdYQ only works because the pseudo expansion pass will directly replace it with LDDWRdPtrQ prior to register allocation, when the constraint actually matters.

This works around the bug in foldMemoryOperand that doesn't handle earlyclobber loads. With the workaround, it never handles earlyclobber loads - we essentially add earlyclobber after the fact by replacing the instruction opcode with one that does specify earlyclobber.

@shepmaster
Copy link
Member Author

Here's a smaller testcase:

; ModuleID = 'bugpoint-reduced-simplified.bc'
source_filename = "bugpoint-output-3cab020.bc"
target datalayout = "e-p:16:8-i8:8-i16:8-i32:8-i64:8-f32:8-f64:8-n8-a:8"
target triple = "avr-unknown-unknown"

%"num::bignum::Big32x40" = type { [0 x i8], i16, [0 x i8], [40 x i32], [0 x i8] }

define void @"core::num::flt2dec::strategy::dragon::format_exact"() unnamed_addr {
start:
  %_72 = alloca %"num::bignum::Big32x40", align 1
  %0 = getelementptr inbounds %"num::bignum::Big32x40", %"num::bignum::Big32x40"* null, i16 0, i32 3
  %1 = bitcast [40 x i32]* %0 to i8*
  %2 = getelementptr inbounds %"num::bignum::Big32x40", %"num::bignum::Big32x40"* %_72, i16 0, i32 3
  %3 = bitcast [40 x i32]* %2 to i8*
  call void @llvm.memcpy.p0i8.p0i8.i16(i8* nonnull %3, i8* nonnull %1, i16 160, i32 1, i1 false)
  %4 = getelementptr inbounds %"num::bignum::Big32x40", %"num::bignum::Big32x40"* %_72, i16 0, i32 3, i16 0
  br label %"core::slice::<impl core::ops::index::IndexMut<I> for [T]>::index_mut.exit.i.i"

"core::slice::<impl core::ops::index::IndexMut<I> for [T]>::index_mut.exit.i.i": ; preds = %"core::num::bignum::Big32x40::div_rem_small.exit.i.bb2.i.i_crit_edge", %start
  br i1 undef, label %"core::num::bignum::Big32x40::div_rem_small.exit.i", label %"<u32 as core::num::bignum::FullOps>::full_div_rem.exit.lr.ph.i.i"

"<u32 as core::num::bignum::FullOps>::full_div_rem.exit.lr.ph.i.i": ; preds = %"core::slice::<impl core::ops::index::IndexMut<I> for [T]>::index_mut.exit.i.i"
  br label %"<u32 as core::num::bignum::FullOps>::full_div_rem.exit.i.i"

"<u32 as core::num::bignum::FullOps>::full_div_rem.exit.i.i": ; preds = %"<u32 as core::num::bignum::FullOps>::full_div_rem.exit.i.i", %"<u32 as core::num::bignum::FullOps>::full_div_rem.exit.lr.ph.i.i"
  %5 = load i32, i32* undef, align 1
  %6 = zext i32 %5 to i64
  %7 = or i64 0, %6
  %8 = udiv i64 %7, 1000000000
  %9 = trunc i64 %8 to i32
  store i32 %9, i32* undef, align 1
  %10 = icmp eq i32* undef, %4
  br i1 %10, label %"core::num::bignum::Big32x40::div_rem_small.exit.i", label %"<u32 as core::num::bignum::FullOps>::full_div_rem.exit.i.i"

"core::num::bignum::Big32x40::div_rem_small.exit.i": ; preds = %"<u32 as core::num::bignum::FullOps>::full_div_rem.exit.i.i", %"core::slice::<impl core::ops::index::IndexMut<I> for [T]>::index_mut.exit.i.i"
  %11 = icmp ugt i16 undef, 9
  br i1 %11, label %"core::num::bignum::Big32x40::div_rem_small.exit.i.bb2.i.i_crit_edge", label %bb7.i74

"core::num::bignum::Big32x40::div_rem_small.exit.i.bb2.i.i_crit_edge": ; preds = %"core::num::bignum::Big32x40::div_rem_small.exit.i"
  br label %"core::slice::<impl core::ops::index::IndexMut<I> for [T]>::index_mut.exit.i.i"

bb7.i74:                                          ; preds = %"core::num::bignum::Big32x40::div_rem_small.exit.i"
  unreachable
}

; Function Attrs: argmemonly nounwind
declare void @llvm.memcpy.p0i8.p0i8.i16(i8* nocapture writeonly, i8* nocapture readonly, i16, i32, i1) #0

attributes #0 = { argmemonly nounwind }

@shepmaster shepmaster added A-libcore Affects compiling the core library and removed A-libcore Affects compiling the core library labels May 4, 2018
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
A-libcore Affects compiling the core library A-llvm Affects the LLVM AVR backend has-llvm-commit This issue should be fixed in upstream LLVM has-reduced-testcase A small LLVM IR file exists that demonstrates the problem
Projects
None yet
Development

No branches or pull requests

3 participants