-
-
Notifications
You must be signed in to change notification settings - Fork 2.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
result location mechanism (part of no-copy semantics) #2602
Conversation
```zig export fn entry() void { var x: Foo = foo(); } ``` ```llvm define void @entry() #2 !dbg !37 { Entry: %x = alloca %Foo, align 4 call fastcc void @foo(%Foo* sret %x), !dbg !48 call void @llvm.dbg.declare(metadata %Foo* %x, metadata !41, metadata !DIExpression()), !dbg !49 ret void, !dbg !50 } ```
```zig export fn entry() void { var c = true; var x = if (c) u8(4) else u32(10); } ``` ```llvm define void @entry() #2 !dbg !35 { Entry: %c = alloca i1, align 1 %x = alloca i32, align 4 store i1 true, i1* %c, align 1, !dbg !44 call void @llvm.dbg.declare(metadata i1* %c, metadata !39, metadata !DIExpression()), !dbg !45 %0 = load i1, i1* %c, align 1, !dbg !46 br i1 %0, label %Then, label %Else, !dbg !46 Then: ; preds = %Entry br label %EndIf, !dbg !47 Else: ; preds = %Entry br label %EndIf, !dbg !47 EndIf: ; preds = %Else, %Then %1 = phi i32 [ 4, %Then ], [ 10, %Else ], !dbg !47 store i32 %1, i32* %x, align 4, !dbg !47 call void @llvm.dbg.declare(metadata i32* %x, metadata !42, metadata !DIExpression()), !dbg !48 ret void, !dbg !49 } ```
```zig export fn entry() void { var c = true; var x = if (c) u8(4) else if (c) u16(100) else u32(10); } ``` ```llvm define void @entry() #2 !dbg !35 { Entry: %c = alloca i1, align 1 %x = alloca i32, align 4 store i1 true, i1* %c, align 1, !dbg !44 call void @llvm.dbg.declare(metadata i1* %c, metadata !39, metadata !DIExpression()), !dbg !45 %0 = load i1, i1* %c, align 1, !dbg !46 br i1 %0, label %Then2, label %Else, !dbg !46 Else: ; preds = %Entry %1 = load i1, i1* %c, align 1, !dbg !47 br i1 %1, label %Then, label %Else1, !dbg !47 Then: ; preds = %Else br label %EndIf, !dbg !48 Else1: ; preds = %Else br label %EndIf, !dbg !48 Then2: ; preds = %Entry br label %EndIf3, !dbg !49 EndIf: ; preds = %Else1, %Then %2 = phi i32 [ 100, %Then ], [ 10, %Else1 ], !dbg !48 br label %EndIf3, !dbg !49 EndIf3: ; preds = %EndIf, %Then2 %3 = phi i32 [ 4, %Then2 ], [ %2, %EndIf ], !dbg !49 store i32 %3, i32* %x, align 4, !dbg !49 call void @llvm.dbg.declare(metadata i32* %x, metadata !42, metadata !DIExpression()), !dbg !50 ret void, !dbg !51 } ```
```zig export fn entry() void { var c = true; var a = u8(4); const x = if (c) a else u32(8); } ``` ```llvm define void @entry() #2 !dbg !35 { Entry: %c = alloca i1, align 1 %a = alloca i8, align 1 %x = alloca i32, align 4 store i1 true, i1* %c, align 1, !dbg !45 call void @llvm.dbg.declare(metadata i1* %c, metadata !39, metadata !DIExpression()), !dbg !46 store i8 4, i8* %a, align 1, !dbg !47 call void @llvm.dbg.declare(metadata i8* %a, metadata !42, metadata !DIExpression()), !dbg !48 %0 = load i1, i1* %c, align 1, !dbg !49 br i1 %0, label %Then, label %Else, !dbg !49 Then: ; preds = %Entry %1 = load i8, i8* %a, align 1, !dbg !50 %2 = zext i8 %1 to i32, !dbg !50 br label %EndIf, !dbg !51 Else: ; preds = %Entry br label %EndIf, !dbg !51 EndIf: ; preds = %Else, %Then %3 = phi i32 [ %2, %Then ], [ 8, %Else ], !dbg !51 store i32 %3, i32* %x, align 4, !dbg !51 call void @llvm.dbg.declare(metadata i32* %x, metadata !43, metadata !DIExpression()), !dbg !52 ret void, !dbg !53 } ```
```zig export fn entry() void { var x = foo(); } const Foo = struct { x: i32, }; fn foo() Foo { return Foo{ .x = 1234, }; } ``` ```llvm define void @entry() #2 !dbg !35 { Entry: %x = alloca %Foo, align 4 call fastcc void @foo(%Foo* sret %x), !dbg !45 call void @llvm.dbg.declare(metadata %Foo* %x, metadata !39, metadata !DIExpression()), !dbg !46 ret void, !dbg !47 } define internal fastcc void @foo(%Foo* nonnull sret) unnamed_addr #2 !dbg !48 { Entry: %1 = bitcast %Foo* %0 to i8*, !dbg !52 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %1, i8* align 4 bitcast (%Foo* @0 to i8*), i64 4, i1 false), !dbg !52 ret void, !dbg !52 } ```
Note that neither the payload capture variable nor the error capture variable require a stack allocation. ```zig export fn entry() void { var c: anyerror!i32 = 1234; while (c) |hi| {} else |e| {} } ``` ```llvm define void @entry() #2 !dbg !39 { Entry: %c = alloca { i16, i32 }, align 4 %0 = bitcast { i16, i32 }* %c to i8*, !dbg !52 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 bitcast ({ i16, i32 }* @0 to i8*), i64 8, i1 false), !dbg !52 call void @llvm.dbg.declare(metadata { i16, i32 }* %c, metadata !43, metadata !DIExpression()), !dbg !52 br label %WhileCond, !dbg !53 WhileCond: ; preds = %WhileBody, %Entry %1 = getelementptr inbounds { i16, i32 }, { i16, i32 }* %c, i32 0, i32 0, !dbg !54 %2 = load i16, i16* %1, align 2, !dbg !54 %3 = icmp ne i16 %2, 0, !dbg !54 br i1 %3, label %WhileElse, label %WhileBody, !dbg !54 WhileBody: ; preds = %WhileCond %4 = getelementptr inbounds { i16, i32 }, { i16, i32 }* %c, i32 0, i32 1, !dbg !53 call void @llvm.dbg.declare(metadata i32* %4, metadata !50, metadata !DIExpression()), !dbg !53 br label %WhileCond, !dbg !53 WhileElse: ; preds = %WhileCond %5 = getelementptr inbounds { i16, i32 }, { i16, i32 }* %c, i32 0, i32 0, !dbg !55 call void @llvm.dbg.declare(metadata i16* %5, metadata !51, metadata !DIExpression()), !dbg !55 ret void, !dbg !56 } ```
Note that only the index variable requires a stack allocation, and the memcpy for the element is gone. ```zig export fn entry() void { var buf: [10]i32 = undefined; for (buf) |x| {} } ``` ```llvm define void @entry() #2 !dbg !35 { Entry: %buf = alloca [10 x i32], align 4 %i = alloca i64, align 8 %0 = bitcast [10 x i32]* %buf to i8*, !dbg !47 call void @llvm.memset.p0i8.i64(i8* align 4 %0, i8 -86, i64 40, i1 false), !dbg !47 call void @llvm.dbg.declare(metadata [10 x i32]* %buf, metadata !39, metadata !DIExpression()), !dbg !47 store i64 0, i64* %i, align 8, !dbg !48 call void @llvm.dbg.declare(metadata i64* %i, metadata !45, metadata !DIExpression()), !dbg !48 br label %ForCond, !dbg !48 ForCond: ; preds = %ForBody, %Entry %1 = load i64, i64* %i, align 8, !dbg !48 %2 = icmp ult i64 %1, 10, !dbg !48 br i1 %2, label %ForBody, label %ForEnd, !dbg !48 ForBody: ; preds = %ForCond %3 = getelementptr inbounds [10 x i32], [10 x i32]* %buf, i64 0, i64 %1, !dbg !48 call void @llvm.dbg.declare(metadata i32* %3, metadata !46, metadata !DIExpression()), !dbg !49 %4 = add nuw i64 %1, 1, !dbg !48 store i64 %4, i64* %i, align 8, !dbg !48 br label %ForCond, !dbg !48 ForEnd: ; preds = %ForCond ret void, !dbg !50 } ```
if expressions no longer introduce a stack allocation. ```zig export fn entry() void { var x: anyerror!i32 = 1234; if (x) |i| {} else |e| {} } ``` ```llvm define void @entry() #2 !dbg !39 { Entry: %x = alloca { i16, i32 }, align 4 %0 = bitcast { i16, i32 }* %x to i8*, !dbg !52 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 bitcast ({ i16, i32 }* @0 to i8*), i64 8, i1 false), !dbg !52 call void @llvm.dbg.declare(metadata { i16, i32 }* %x, metadata !43, metadata !DIExpression()), !dbg !52 %1 = getelementptr inbounds { i16, i32 }, { i16, i32 }* %x, i32 0, i32 0, !dbg !53 %2 = load i16, i16* %1, align 2, !dbg !53 %3 = icmp ne i16 %2, 0, !dbg !53 br i1 %3, label %TryElse, label %TryOk, !dbg !53 TryOk: ; preds = %Entry %4 = getelementptr inbounds { i16, i32 }, { i16, i32 }* %x, i32 0, i32 1, !dbg !53 call void @llvm.dbg.declare(metadata i32* %4, metadata !50, metadata !DIExpression()), !dbg !53 br label %TryEnd, !dbg !53 TryElse: ; preds = %Entry %5 = getelementptr inbounds { i16, i32 }, { i16, i32 }* %x, i32 0, i32 0, !dbg !53 call void @llvm.dbg.declare(metadata i16* %5, metadata !51, metadata !DIExpression()), !dbg !53 br label %TryEnd, !dbg !53 TryEnd: ; preds = %TryElse, %TryOk ret void, !dbg !54 } ```
except for switch expressions
```zig export fn entry() void { var c: i32 = 1234; var x = switch (c) { 1 => u8(1), 2...4 => u16(2), else => u32(3), }; } ``` ```llvm define void @entry() #2 !dbg !35 { Entry: %c = alloca i32, align 4 %x = alloca i32, align 4 store i32 1234, i32* %c, align 4, !dbg !44 call void @llvm.dbg.declare(metadata i32* %c, metadata !39, metadata !DIExpression()), !dbg !44 %0 = load i32, i32* %c, align 4, !dbg !45 %1 = icmp sge i32 %0, 2, !dbg !46 %2 = icmp sle i32 %0, 4, !dbg !46 %3 = and i1 %1, %2, !dbg !46 br i1 %3, label %SwitchRangeYes, label %SwitchRangeNo, !dbg !46 SwitchRangeYes: ; preds = %Entry br label %SwitchEnd, !dbg !45 SwitchElse: ; preds = %SwitchRangeNo br label %SwitchEnd, !dbg !45 SwitchProng: ; preds = %SwitchRangeNo br label %SwitchEnd, !dbg !45 SwitchEnd: ; preds = %SwitchProng, %SwitchElse, %SwitchRangeYes %4 = phi i32 [ 2, %SwitchRangeYes ], [ 3, %SwitchElse ], [ 1, %SwitchProng ], !dbg !45 store i32 %4, i32* %x, align 4, !dbg !45 call void @llvm.dbg.declare(metadata i32* %x, metadata !42, metadata !DIExpression()), !dbg !47 ret void, !dbg !48 SwitchRangeNo: ; preds = %Entry switch i32 %0, label %SwitchElse [ i32 1, label %SwitchProng ], !dbg !45 } ```
```zig export fn entry() void { var c = true; var x = if (c) foo() else bar(); } ``` ```llvm define void @entry() #2 !dbg !35 { Entry: %c = alloca i1, align 1 %x = alloca %Foo, align 4 store i1 true, i1* %c, align 1, !dbg !47 call void @llvm.dbg.declare(metadata i1* %c, metadata !39, metadata !DIExpression()), !dbg !48 %0 = load i1, i1* %c, align 1, !dbg !49 br i1 %0, label %Then, label %Else, !dbg !49 Then: ; preds = %Entry call fastcc void @foo(%Foo* sret %x), !dbg !50 br label %EndIf, !dbg !51 Else: ; preds = %Entry call fastcc void @bar(%Foo* sret %x), !dbg !52 br label %EndIf, !dbg !51 EndIf: ; preds = %Else, %Then call void @llvm.dbg.declare(metadata %Foo* %x, metadata !42, metadata !DIExpression()), !dbg !53 ret void, !dbg !54 } ```
```zig export fn entry() void { var c: anyerror!i32 = 1234; var x = while (c) |y| break foo() else |e| bar(); } ``` ```llvm define void @entry() #2 !dbg !39 { Entry: %c = alloca { i16, i32 }, align 4 %x = alloca %Foo, align 4 %0 = bitcast { i16, i32 }* %c to i8*, !dbg !56 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 bitcast ({ i16, i32 }* @0 to i8*), i64 8, i1 false), !dbg !56 call void @llvm.dbg.declare(metadata { i16, i32 }* %c, metadata !43, metadata !DIExpression()), !dbg !56 br label %WhileCond, !dbg !57 WhileCond: ; preds = %Entry %1 = getelementptr inbounds { i16, i32 }, { i16, i32 }* %c, i32 0, i32 0, !dbg !58 %2 = load i16, i16* %1, align 2, !dbg !58 %3 = icmp ne i16 %2, 0, !dbg !58 br i1 %3, label %WhileElse, label %WhileBody, !dbg !58 WhileBody: ; preds = %WhileCond %4 = getelementptr inbounds { i16, i32 }, { i16, i32 }* %c, i32 0, i32 1, !dbg !57 call void @llvm.dbg.declare(metadata i32* %4, metadata !50, metadata !DIExpression()), !dbg !57 call fastcc void @foo(%Foo* sret %x), !dbg !59 br label %WhileEnd, !dbg !60 WhileElse: ; preds = %WhileCond %5 = getelementptr inbounds { i16, i32 }, { i16, i32 }* %c, i32 0, i32 0, !dbg !61 call void @llvm.dbg.declare(metadata i16* %5, metadata !51, metadata !DIExpression()), !dbg !61 call fastcc void @bar(%Foo* sret %x), !dbg !61 br label %WhileEnd, !dbg !57 WhileEnd: ; preds = %WhileElse, %WhileBody call void @llvm.dbg.declare(metadata %Foo* %x, metadata !52, metadata !DIExpression()), !dbg !62 ret void, !dbg !63 } ```
```zig export fn entry() void { var buf: [10]u8 = undefined; var x = for (buf) |x| break foo() else bar(); } ``` ```llvm define void @entry() #2 !dbg !35 { Entry: %buf = alloca [10 x i8], align 1 %i = alloca i64, align 8 %x = alloca %Foo, align 4 %0 = bitcast [10 x i8]* %buf to i8*, !dbg !51 call void @llvm.memset.p0i8.i64(i8* align 1 %0, i8 -86, i64 10, i1 false), !dbg !51 call void @llvm.dbg.declare(metadata [10 x i8]* %buf, metadata !39, metadata !DIExpression()), !dbg !51 store i64 0, i64* %i, align 8, !dbg !52 call void @llvm.dbg.declare(metadata i64* %i, metadata !44, metadata !DIExpression()), !dbg !52 br label %ForCond, !dbg !52 ForCond: ; preds = %Entry %1 = load i64, i64* %i, align 8, !dbg !52 %2 = icmp ult i64 %1, 10, !dbg !52 br i1 %2, label %ForBody, label %ForElse, !dbg !52 ForBody: ; preds = %ForCond %3 = getelementptr inbounds [10 x i8], [10 x i8]* %buf, i64 0, i64 %1, !dbg !52 call void @llvm.dbg.declare(metadata i8* %3, metadata !45, metadata !DIExpression()), !dbg !53 call fastcc void @foo(%Foo* sret %x), !dbg !54 br label %ForEnd, !dbg !55 ForElse: ; preds = %ForCond call fastcc void @bar(%Foo* sret %x), !dbg !56 br label %ForEnd, !dbg !52 ForEnd: ; preds = %ForElse, %ForBody call void @llvm.dbg.declare(metadata %Foo* %x, metadata !46, metadata !DIExpression()), !dbg !57 ret void, !dbg !58 } ```
```zig export fn entry() void { var x = crap() catch bar(); } ``` ```llvm define void @entry() #2 !dbg !40 { Entry: %0 = alloca { i16, %Foo }, align 4 %x = alloca %Foo, align 4 call fastcc void @crap({ i16, %Foo }* sret %0), !dbg !50 %1 = getelementptr inbounds { i16, %Foo }, { i16, %Foo }* %0, i32 0, i32 0, !dbg !51 %2 = load i16, i16* %1, align 2, !dbg !51 %3 = icmp ne i16 %2, 0, !dbg !51 br i1 %3, label %UnwrapErrError, label %UnwrapErrOk, !dbg !51 UnwrapErrError: ; preds = %Entry call fastcc void @bar(%Foo* sret %x), !dbg !52 br label %UnwrapErrEnd, !dbg !51 UnwrapErrOk: ; preds = %Entry %4 = getelementptr inbounds { i16, %Foo }, { i16, %Foo }* %0, i32 0, i32 1, !dbg !51 %5 = bitcast %Foo* %4 to i8*, !dbg !51 %6 = bitcast %Foo* %x to i8*, !dbg !51 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %6, i8* align 4 %5, i64 4, i1 false), !dbg !51 br label %UnwrapErrEnd, !dbg !51 UnwrapErrEnd: ; preds = %UnwrapErrOk, %UnwrapErrError ret void, !dbg !53 } ```
```zig export fn entry() void { const static = Foo{ .x = 9, .bar = Bar{ .y = 10 }, }; const runtime = foo(true); } fn foo(c: bool) Foo { return Foo{ .x = 12, .bar = if (c) bar1() else bar2(), }; } fn bar1() Bar { return Bar{ .y = 34 }; } fn bar2() Bar { return Bar{ .y = 56 }; } ``` ```llvm @0 = internal unnamed_addr constant %Foo { i32 9, %Bar { i32 10 } }, align 4 @1 = internal unnamed_addr constant %Bar { i32 34 }, align 4 @2 = internal unnamed_addr constant %Bar { i32 56 }, align 4 define void @entry() #2 !dbg !35 { Entry: %runtime = alloca %Foo, align 4 call void @llvm.dbg.declare(metadata %Foo* @0, metadata !39, metadata !DIExpression()), !dbg !50 call fastcc void @foo(%Foo* sret %runtime, i1 true), !dbg !51 call void @llvm.dbg.declare(metadata %Foo* %runtime, metadata !49, metadata !DIExpression()), !dbg !52 ret void, !dbg !53 } define internal fastcc void @foo(%Foo* nonnull sret, i1) unnamed_addr #2 !dbg !54 { Entry: %c = alloca i1, align 1 store i1 %1, i1* %c, align 1 call void @llvm.dbg.declare(metadata i1* %c, metadata !60, metadata !DIExpression()), !dbg !61 %2 = getelementptr inbounds %Foo, %Foo* %0, i32 0, i32 0, !dbg !62 store i32 12, i32* %2, align 4, !dbg !62 %3 = getelementptr inbounds %Foo, %Foo* %0, i32 0, i32 1, !dbg !64 %4 = load i1, i1* %c, align 1, !dbg !65 br i1 %4, label %Then, label %Else, !dbg !65 Then: ; preds = %Entry call fastcc void @BAR1(%Bar* sret %3), !dbg !66 br label %EndIf, !dbg !64 Else: ; preds = %Entry call fastcc void @bar2(%Bar* sret %3), !dbg !67 br label %EndIf, !dbg !64 EndIf: ; preds = %Else, %Then ret void, !dbg !68 } define internal fastcc void @BAR1(%Bar* nonnull sret) unnamed_addr #2 !dbg !69 { Entry: %1 = bitcast %Bar* %0 to i8*, !dbg !73 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %1, i8* align 4 bitcast (%Bar* @1 to i8*), i64 4, i1 false), !dbg !73 ret void, !dbg !73 } define internal fastcc void @bar2(%Bar* nonnull sret) unnamed_addr #2 !dbg !75 { Entry: %1 = bitcast %Bar* %0 to i8*, !dbg !76 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %1, i8* align 4 bitcast (%Bar* @2 to i8*), i64 4, i1 false), !dbg !76 ret void, !dbg !76 } !39 = !DILocalVariable(name: "static", scope: !40, file: !5, line: 2, type: !41) !49 = !DILocalVariable(name: "runtime", scope: !40, file: !5, line: 6, type: !41) ```
```zig export fn entry() void { var x = [3]Bar{ bar(), bar(), Bar{ .y = 12 } }; } ``` ```llvm define void @entry() #2 !dbg !35 { Entry: %x = alloca [3 x %Bar], align 4 %0 = getelementptr inbounds [3 x %Bar], [3 x %Bar]* %x, i64 0, i64 0, !dbg !48 call fastcc void @bar(%Bar* sret %0), !dbg !48 %1 = getelementptr inbounds [3 x %Bar], [3 x %Bar]* %x, i64 0, i64 1, !dbg !49 call fastcc void @bar(%Bar* sret %1), !dbg !49 %2 = getelementptr inbounds [3 x %Bar], [3 x %Bar]* %x, i64 0, i64 2, !dbg !50 %3 = bitcast %Bar* %2 to i8*, !dbg !50 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %3, i8* align 4 bitcast (%Bar* @0 to i8*), i64 4, i1 false), !dbg !50 call void @llvm.dbg.declare(metadata [3 x %Bar]* %x, metadata !39, metadata !DIExpression()), !dbg !51 ret void, !dbg !52 } ```
these ones getting skipped need to get fixed before merging the branch
This comment has been minimized.
This comment has been minimized.
Brilliant, thanks @mikdusan for the patch, looks good. Do you want to make a pull request so you can get credit for it (you can select copy-elision-3 instead of master as your destination branch)? |
This comment has been minimized.
This comment has been minimized.
../src/analyze.cpp:4716:63: error: ‘void* memcpy(void*, const void*, size_t)’ writing to an object of type ‘struct ConstExprValue’ with no trivial copy-assignment [-Werror=class-memaccess] with gcc-9 |
Rather than fixing regressions with deprecated coroutines, I'm going to let them regress more until #2377 is solved.
in non-debug modes
This is the third attempt, which is
working beautifullyworking.After this is done, the door is opened to some important core language improvements, most notably the coroutine rewrite (#2377) and well-defined copy eliding semantics (#287). The main idea here is that you can look at any expression and easily understand where the result will be written to. For example:
With the new result location semantics, the function call
foo()
is guaranteed to directly initialize the memory that the variablex
is associated with. Note that this is a semantic guarantee, regardless of optimization level. What's neat about this is that it works with complex expressions, for example:Here, the function calls to
bar()
andbaz()
are guaranteed to directly initializex.field2
. Have a look at the earlier commit messages for more demonstrations of zig code and the corresponding LLVM IR it's generating.Merge checklist:
Get the self-hosted compiler buildingI'm allowing this to regress until The Coroutine Rewrite Issue #2377 is done.Some more items, which I may postpone by filing issues rather than doing them in this branch:
whereas this one does not:
but all 3 should be semantically identical.
It creates an unnecessary stack variable.
Preliminary Performance Comparison
It's not a fair comparison because more tests have been added in the result location branch. However, note that the percent of time spent in LLVM Emit Output is a smaller percent, and the time spent in Semantic Analysis is a higher percent. This is a good thing, because it means that Zig's IR code - which I hastily coded up without regard for performance in this branch - can be more easily improved than LLVM's perf. The LLVM IR that this branch produces is significantly simpler for LLVM to compile, and significantly fewer calls to memcpy at runtime.